From 6bf0a5cb5034a7e684dcc3500e841785237ce2dd Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sun, 7 Apr 2024 19:32:43 +0200 Subject: Adding upstream version 1:115.7.0. Signed-off-by: Daniel Baumann --- netwerk/streamconv/converters/moz.build | 31 + netwerk/streamconv/converters/mozTXTToHTMLConv.cpp | 1285 ++++++++++++++++++++ netwerk/streamconv/converters/mozTXTToHTMLConv.h | 284 +++++ netwerk/streamconv/converters/nsDirIndex.cpp | 86 ++ netwerk/streamconv/converters/nsDirIndex.h | 32 + netwerk/streamconv/converters/nsDirIndexParser.cpp | 440 +++++++ netwerk/streamconv/converters/nsDirIndexParser.h | 72 ++ .../streamconv/converters/nsHTTPCompressConv.cpp | 746 ++++++++++++ netwerk/streamconv/converters/nsHTTPCompressConv.h | 110 ++ .../streamconv/converters/nsICompressConvStats.idl | 17 + netwerk/streamconv/converters/nsIndexedToHTML.cpp | 826 +++++++++++++ netwerk/streamconv/converters/nsIndexedToHTML.h | 59 + netwerk/streamconv/converters/nsMultiMixedConv.cpp | 1042 ++++++++++++++++ netwerk/streamconv/converters/nsMultiMixedConv.h | 258 ++++ netwerk/streamconv/converters/nsUnknownDecoder.cpp | 852 +++++++++++++ netwerk/streamconv/converters/nsUnknownDecoder.h | 152 +++ 16 files changed, 6292 insertions(+) create mode 100644 netwerk/streamconv/converters/moz.build create mode 100644 netwerk/streamconv/converters/mozTXTToHTMLConv.cpp create mode 100644 netwerk/streamconv/converters/mozTXTToHTMLConv.h create mode 100644 netwerk/streamconv/converters/nsDirIndex.cpp create mode 100644 netwerk/streamconv/converters/nsDirIndex.h create mode 100644 netwerk/streamconv/converters/nsDirIndexParser.cpp create mode 100644 netwerk/streamconv/converters/nsDirIndexParser.h create mode 100644 netwerk/streamconv/converters/nsHTTPCompressConv.cpp create mode 100644 netwerk/streamconv/converters/nsHTTPCompressConv.h create mode 100644 netwerk/streamconv/converters/nsICompressConvStats.idl create mode 100644 netwerk/streamconv/converters/nsIndexedToHTML.cpp create mode 100644 netwerk/streamconv/converters/nsIndexedToHTML.h create mode 100644 netwerk/streamconv/converters/nsMultiMixedConv.cpp create mode 100644 netwerk/streamconv/converters/nsMultiMixedConv.h create mode 100644 netwerk/streamconv/converters/nsUnknownDecoder.cpp create mode 100644 netwerk/streamconv/converters/nsUnknownDecoder.h (limited to 'netwerk/streamconv/converters') diff --git a/netwerk/streamconv/converters/moz.build b/netwerk/streamconv/converters/moz.build new file mode 100644 index 0000000000..d985eb363a --- /dev/null +++ b/netwerk/streamconv/converters/moz.build @@ -0,0 +1,31 @@ +# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*- +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +XPIDL_SOURCES += ["nsICompressConvStats.idl"] + +EXPORTS += [ + "nsHTTPCompressConv.h", + "nsUnknownDecoder.h", +] + +XPIDL_MODULE = "necko_http" + +UNIFIED_SOURCES += [ + "mozTXTToHTMLConv.cpp", + "nsDirIndex.cpp", + "nsDirIndexParser.cpp", + "nsHTTPCompressConv.cpp", + "nsIndexedToHTML.cpp", + "nsMultiMixedConv.cpp", + "nsUnknownDecoder.cpp", +] + +FINAL_LIBRARY = "xul" + +LOCAL_INCLUDES += [ + "/modules/brotli/dec", + "/netwerk/base", +] diff --git a/netwerk/streamconv/converters/mozTXTToHTMLConv.cpp b/netwerk/streamconv/converters/mozTXTToHTMLConv.cpp new file mode 100644 index 0000000000..20e2fabd80 --- /dev/null +++ b/netwerk/streamconv/converters/mozTXTToHTMLConv.cpp @@ -0,0 +1,1285 @@ +/* -*- Mode: C; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "mozilla/TextUtils.h" +#include "mozTXTToHTMLConv.h" +#include "mozilla/intl/Segmenter.h" +#include "mozilla/Maybe.h" +#include "nsNetUtil.h" +#include "nsUnicharUtils.h" +#include "nsUnicodeProperties.h" +#include "nsCRT.h" +#include "nsIExternalProtocolHandler.h" +#include "nsIURI.h" + +#include + +#ifdef DEBUG_BenB_Perf +# include "prtime.h" +# include "prinrval.h" +#endif + +using mozilla::IsAscii; +using mozilla::IsAsciiAlpha; +using mozilla::IsAsciiDigit; +using mozilla::Maybe; +using mozilla::Some; +using mozilla::Span; +using mozilla::intl::GraphemeClusterBreakIteratorUtf16; +using mozilla::intl::GraphemeClusterBreakReverseIteratorUtf16; + +const double growthRate = 1.2; + +// Bug 183111, editor now replaces multiple spaces with leading +// 0xA0's and a single ending space, so need to treat 0xA0's as spaces. +// 0xA0 is the Latin1/Unicode character for "non-breaking space (nbsp)" +// Also recognize the Japanese ideographic space 0x3000 as a space. +static inline bool IsSpace(const char16_t aChar) { + return (nsCRT::IsAsciiSpace(aChar) || aChar == 0xA0 || aChar == 0x3000); +} + +// Escape Char will take ch, escape it and append the result to +// aStringToAppendTo +void mozTXTToHTMLConv::EscapeChar(const char16_t ch, + nsAString& aStringToAppendTo, + bool inAttribute) { + switch (ch) { + case '<': + aStringToAppendTo.AppendLiteral("<"); + break; + case '>': + aStringToAppendTo.AppendLiteral(">"); + break; + case '&': + aStringToAppendTo.AppendLiteral("&"); + break; + case '"': + if (inAttribute) { + aStringToAppendTo.AppendLiteral("""); + break; + } + // else fall through + [[fallthrough]]; + default: + aStringToAppendTo += ch; + } +} + +// EscapeStr takes the passed in string and +// escapes it IN PLACE. +void mozTXTToHTMLConv::EscapeStr(nsString& aInString, bool inAttribute) { + // the replace substring routines + // don't seem to work if you have a character + // in the in string that is also in the replacement + // string! =( + // aInString.ReplaceSubstring("&", "&"); + // aInString.ReplaceSubstring("<", "<"); + // aInString.ReplaceSubstring(">", ">"); + for (uint32_t i = 0; i < aInString.Length();) { + switch (aInString[i]) { + case '<': + aInString.Cut(i, 1); + aInString.InsertLiteral(u"<", i); + i += 4; // skip past the integers we just added + break; + case '>': + aInString.Cut(i, 1); + aInString.InsertLiteral(u">", i); + i += 4; // skip past the integers we just added + break; + case '&': + aInString.Cut(i, 1); + aInString.InsertLiteral(u"&", i); + i += 5; // skip past the integers we just added + break; + case '"': + if (inAttribute) { + aInString.Cut(i, 1); + aInString.InsertLiteral(u""", i); + i += 6; + break; + } + // else fall through + [[fallthrough]]; + default: + i++; + } + } +} + +void mozTXTToHTMLConv::UnescapeStr(const char16_t* aInString, int32_t aStartPos, + int32_t aLength, nsString& aOutString) { + const char16_t* subString = nullptr; + for (uint32_t i = aStartPos; int32_t(i) - aStartPos < aLength;) { + int32_t remainingChars = i - aStartPos; + if (aInString[i] == '&') { + subString = &aInString[i]; + if (!NS_strncmp(subString, u"<", + std::min(4, aLength - remainingChars))) { + aOutString.Append(char16_t('<')); + i += 4; + } else if (!NS_strncmp(subString, u">", + std::min(4, aLength - remainingChars))) { + aOutString.Append(char16_t('>')); + i += 4; + } else if (!NS_strncmp(subString, u"&", + std::min(5, aLength - remainingChars))) { + aOutString.Append(char16_t('&')); + i += 5; + } else if (!NS_strncmp(subString, u""", + std::min(6, aLength - remainingChars))) { + aOutString.Append(char16_t('"')); + i += 6; + } else { + aOutString += aInString[i]; + i++; + } + } else { + aOutString += aInString[i]; + i++; + } + } +} + +void mozTXTToHTMLConv::CompleteAbbreviatedURL(const char16_t* aInString, + int32_t aInLength, + const uint32_t pos, + nsString& aOutString) { + NS_ASSERTION(int32_t(pos) < aInLength, + "bad args to CompleteAbbreviatedURL, see bug #190851"); + if (int32_t(pos) >= aInLength) return; + + if (aInString[pos] == '@') { + // only pre-pend a mailto url if the string contains a .domain in it.. + // i.e. we want to linkify johndoe@foo.com but not "let's meet @8pm" + nsDependentString inString(aInString, aInLength); + if (inString.FindChar('.', pos) != + kNotFound) // if we have a '.' after the @ sign.... + { + aOutString.AssignLiteral("mailto:"); + aOutString += aInString; + } + } else if (aInString[pos] == '.') { + if (ItMatchesDelimited(aInString, aInLength, u"www.", 4, LT_IGNORE, + LT_IGNORE)) { + aOutString.AssignLiteral("http://"); + aOutString += aInString; + } else if (ItMatchesDelimited(aInString, aInLength, u"ftp.", 4, LT_IGNORE, + LT_IGNORE)) { + aOutString.AssignLiteral("ftp://"); + aOutString += aInString; + } + } +} + +bool mozTXTToHTMLConv::FindURLStart(const char16_t* aInString, + int32_t aInLength, const uint32_t pos, + const modetype check, uint32_t& start) { + switch (check) { // no breaks, because end of blocks is never reached + case RFC1738: { + if (!NS_strncmp(&aInString[std::max(int32_t(pos - 4), 0)], u"\"", pos - 1); + if (i != kNotFound && + (temp[uint32_t(i)] == '<' || temp[uint32_t(i)] == '"')) { + start = uint32_t(++i); + return start < pos; + } + return false; + } + case freetext: { + int32_t i = pos - 1; + for (; i >= 0 && + (IsAsciiAlpha(aInString[uint32_t(i)]) || + IsAsciiDigit(aInString[uint32_t(i)]) || + aInString[uint32_t(i)] == '+' || aInString[uint32_t(i)] == '-' || + aInString[uint32_t(i)] == '.'); + i--) { + ; + } + if (++i >= 0 && uint32_t(i) < pos && + IsAsciiAlpha(aInString[uint32_t(i)])) { + start = uint32_t(i); + return true; + } + return false; + } + case abbreviated: { + int32_t i = pos - 1; + // This disallows non-ascii-characters for email. + // Currently correct, but revisit later after standards changed. + bool isEmail = aInString[pos] == (char16_t)'@'; + // These chars mark the start of the URL + for (; i >= 0 && aInString[uint32_t(i)] != '>' && + aInString[uint32_t(i)] != '<' && aInString[uint32_t(i)] != '"' && + aInString[uint32_t(i)] != '\'' && aInString[uint32_t(i)] != '`' && + aInString[uint32_t(i)] != ',' && aInString[uint32_t(i)] != '{' && + aInString[uint32_t(i)] != '[' && aInString[uint32_t(i)] != '(' && + aInString[uint32_t(i)] != '|' && aInString[uint32_t(i)] != '\\' && + !IsSpace(aInString[uint32_t(i)]) && + (!isEmail || IsAscii(aInString[uint32_t(i)])) && + (!isEmail || aInString[uint32_t(i)] != ')'); + i--) { + ; + } + if (++i >= 0 && uint32_t(i) < pos && + (IsAsciiAlpha(aInString[uint32_t(i)]) || + IsAsciiDigit(aInString[uint32_t(i)]))) { + start = uint32_t(i); + return true; + } + return false; + } + default: + return false; + } // switch +} + +bool mozTXTToHTMLConv::FindURLEnd(const char16_t* aInString, + int32_t aInStringLength, const uint32_t pos, + const modetype check, const uint32_t start, + uint32_t& end) { + switch (check) { // no breaks, because end of blocks is never reached + case RFC1738: + case RFC2396E: { + nsDependentSubstring temp(aInString, aInStringLength); + + int32_t i = temp.FindCharInSet(u"<>\"", pos + 1); + if (i != kNotFound && + temp[uint32_t(i--)] == + (check == RFC1738 || temp[start - 1] == '<' ? '>' : '"')) { + end = uint32_t(i); + return end > pos; + } + return false; + } + case freetext: + case abbreviated: { + uint32_t i = pos + 1; + bool isEmail = aInString[pos] == (char16_t)'@'; + bool seenOpeningParenthesis = false; // there is a '(' earlier in the URL + bool seenOpeningSquareBracket = + false; // there is a '[' earlier in the URL + for (; int32_t(i) < aInStringLength; i++) { + // These chars mark the end of the URL + if (aInString[i] == '>' || aInString[i] == '<' || aInString[i] == '"' || + aInString[i] == '`' || aInString[i] == '}' || aInString[i] == '{' || + (aInString[i] == ')' && !seenOpeningParenthesis) || + (aInString[i] == ']' && !seenOpeningSquareBracket) || + // Allow IPv6 adresses like http://[1080::8:800:200C:417A]/foo. + (aInString[i] == '[' && i > 2 && + (aInString[i - 1] != '/' || aInString[i - 2] != '/')) || + IsSpace(aInString[i])) { + break; + } + // Disallow non-ascii-characters for email. + // Currently correct, but revisit later after standards changed. + if (isEmail && (aInString[i] == '(' || aInString[i] == '\'' || + !IsAscii(aInString[i]))) { + break; + } + if (aInString[i] == '(') seenOpeningParenthesis = true; + if (aInString[i] == '[') seenOpeningSquareBracket = true; + } + // These chars are allowed in the middle of the URL, but not at end. + // Technically they are, but are used in normal text after the URL. + while (--i > pos && (aInString[i] == '.' || aInString[i] == ',' || + aInString[i] == ';' || aInString[i] == '!' || + aInString[i] == '?' || aInString[i] == '-' || + aInString[i] == ':' || aInString[i] == '\'')) { + ; + } + if (i > pos) { + end = i; + return true; + } + return false; + } + default: + return false; + } // switch +} + +void mozTXTToHTMLConv::CalculateURLBoundaries( + const char16_t* aInString, int32_t aInStringLength, const uint32_t pos, + const uint32_t whathasbeendone, const modetype check, const uint32_t start, + const uint32_t end, nsString& txtURL, nsString& desc, + int32_t& replaceBefore, int32_t& replaceAfter) { + uint32_t descstart = start; + switch (check) { + case RFC1738: { + descstart = start - 5; + desc.Append(&aInString[descstart], + end - descstart + 2); // include "" + replaceAfter = end - pos + 1; + } break; + case RFC2396E: { + descstart = start - 1; + desc.Append(&aInString[descstart], + end - descstart + 2); // include brackets + replaceAfter = end - pos + 1; + } break; + case freetext: + case abbreviated: { + descstart = start; + desc.Append(&aInString[descstart], + end - start + 1); // don't include brackets + replaceAfter = end - pos; + } break; + default: + break; + } // switch + + EscapeStr(desc, false); + + txtURL.Append(&aInString[start], end - start + 1); + txtURL.StripWhitespace(); + + // FIX ME + nsAutoString temp2; + ScanTXT(nsDependentSubstring(&aInString[descstart], pos - descstart), + ~kURLs /*prevents loop*/ & whathasbeendone, temp2); + replaceBefore = temp2.Length(); +} + +bool mozTXTToHTMLConv::ShouldLinkify(const nsCString& aURL) { + if (!mIOService) return false; + + nsAutoCString scheme; + nsresult rv = mIOService->ExtractScheme(aURL, scheme); + if (NS_FAILED(rv)) return false; + + if (scheme == "http" || scheme == "https" || scheme == "mailto") { + return true; + } + + // Get the handler for this scheme. + nsCOMPtr handler; + rv = mIOService->GetProtocolHandler(scheme.get(), getter_AddRefs(handler)); + if (NS_FAILED(rv)) return false; + + // Is it an external protocol handler? If not, linkify it. + nsCOMPtr externalHandler = + do_QueryInterface(handler); + if (!externalHandler) return true; // handler is built-in, linkify it! + + // If external app exists for the scheme then linkify it. + bool exists; + rv = externalHandler->ExternalAppExistsForScheme(scheme, &exists); + return (NS_SUCCEEDED(rv) && exists); +} + +bool mozTXTToHTMLConv::CheckURLAndCreateHTML(const nsString& txtURL, + const nsString& desc, + const modetype mode, + nsString& outputHTML) { + // Create *uri from txtURL + nsCOMPtr uri; + nsresult rv; + // Lazily initialize mIOService + if (!mIOService) { + mIOService = do_GetIOService(); + + if (!mIOService) return false; + } + + // See if the url should be linkified. + NS_ConvertUTF16toUTF8 utf8URL(txtURL); + if (!ShouldLinkify(utf8URL)) return false; + + // it would be faster if we could just check to see if there is a protocol + // handler for the url and return instead of actually trying to create a + // url... + rv = mIOService->NewURI(utf8URL, nullptr, nullptr, getter_AddRefs(uri)); + + // Real work + if (NS_SUCCEEDED(rv) && uri) { + outputHTML.AssignLiteral(""); + outputHTML += desc; + outputHTML.AppendLiteral(""); + return true; + } + return false; +} + +NS_IMETHODIMP mozTXTToHTMLConv::FindURLInPlaintext(const char16_t* aInString, + int32_t aInLength, + int32_t aPos, + int32_t* aStartPos, + int32_t* aEndPos) { + // call FindURL on the passed in string + nsAutoString outputHTML; // we'll ignore the generated output HTML + + *aStartPos = -1; + *aEndPos = -1; + + FindURL(aInString, aInLength, aPos, kURLs, outputHTML, *aStartPos, *aEndPos); + + return NS_OK; +} + +bool mozTXTToHTMLConv::FindURL(const char16_t* aInString, int32_t aInLength, + const uint32_t pos, + const uint32_t whathasbeendone, + nsString& outputHTML, int32_t& replaceBefore, + int32_t& replaceAfter) { + enum statetype { unchecked, invalid, startok, endok, success }; + static const modetype ranking[] = {RFC1738, RFC2396E, freetext, abbreviated}; + + statetype state[mozTXTToHTMLConv_lastMode + 1]; // 0(=unknown)..lastMode + /* I don't like this abuse of enums as index for the array, + but I don't know a better method */ + + // Define, which modes to check + /* all modes but abbreviated are checked for text[pos] == ':', + only abbreviated for '.', RFC2396E and abbreviated for '@' */ + for (modetype iState = unknown; iState <= mozTXTToHTMLConv_lastMode; + iState = modetype(iState + 1)) { + state[iState] = aInString[pos] == ':' ? unchecked : invalid; + } + switch (aInString[pos]) { + case '@': + state[RFC2396E] = unchecked; + [[fallthrough]]; + case '.': + state[abbreviated] = unchecked; + break; + case ':': + state[abbreviated] = invalid; + break; + default: + break; + } + + // Test, first successful mode wins, sequence defined by |ranking| + int32_t iCheck = 0; // the currently tested modetype + modetype check = ranking[iCheck]; + for (; iCheck < mozTXTToHTMLConv_numberOfModes && state[check] != success; + iCheck++) + /* check state from last run. + If this is the first, check this one, which isn't = success yet */ + { + check = ranking[iCheck]; + + uint32_t start, end; + + if (state[check] == unchecked) { + if (FindURLStart(aInString, aInLength, pos, check, start)) { + state[check] = startok; + } + } + + if (state[check] == startok) { + if (FindURLEnd(aInString, aInLength, pos, check, start, end)) { + state[check] = endok; + } + } + + if (state[check] == endok) { + nsAutoString txtURL, desc; + int32_t resultReplaceBefore, resultReplaceAfter; + + CalculateURLBoundaries(aInString, aInLength, pos, whathasbeendone, check, + start, end, txtURL, desc, resultReplaceBefore, + resultReplaceAfter); + + if (aInString[pos] != ':') { + nsAutoString temp = txtURL; + txtURL.SetLength(0); + CompleteAbbreviatedURL(temp.get(), temp.Length(), pos - start, txtURL); + } + + if (!txtURL.IsEmpty() && + CheckURLAndCreateHTML(txtURL, desc, check, outputHTML)) { + replaceBefore = resultReplaceBefore; + replaceAfter = resultReplaceAfter; + state[check] = success; + } + } // if + } // for + return state[check] == success; +} + +static inline bool IsAlpha(const uint32_t aChar) { + return mozilla::unicode::GetGenCategory(aChar) == nsUGenCategory::kLetter; +} + +static inline bool IsDigit(const uint32_t aChar) { + return mozilla::unicode::GetGenCategory(aChar) == nsUGenCategory::kNumber; +} + +bool mozTXTToHTMLConv::ItMatchesDelimited(const char16_t* aInString, + int32_t aInLength, + const char16_t* rep, int32_t aRepLen, + LIMTYPE before, LIMTYPE after) { + // this little method gets called a LOT. I found we were spending a + // lot of time just calculating the length of the variable "rep" + // over and over again every time we called it. So we're now passing + // an integer in here. + int32_t textLen = aInLength; + + if (((before == LT_IGNORE && (after == LT_IGNORE || after == LT_DELIMITER)) && + textLen < aRepLen) || + ((before != LT_IGNORE || (after != LT_IGNORE && after != LT_DELIMITER)) && + textLen < aRepLen + 1) || + (before != LT_IGNORE && after != LT_IGNORE && after != LT_DELIMITER && + textLen < aRepLen + 2)) { + return false; + } + + uint32_t text0 = aInString[0]; + if (aInLength > 1 && NS_IS_SURROGATE_PAIR(text0, aInString[1])) { + text0 = SURROGATE_TO_UCS4(text0, aInString[1]); + } + // find length of the char/cluster to be ignored + int32_t ignoreLen = before == LT_IGNORE ? 0 : 1; + if (ignoreLen) { + GraphemeClusterBreakIteratorUtf16 ci( + Span(aInString, aInLength)); + ignoreLen = *ci.Next(); + } + + int32_t afterIndex = aRepLen + ignoreLen; + uint32_t textAfterPos = aInString[afterIndex]; + if (aInLength > afterIndex + 1 && + NS_IS_SURROGATE_PAIR(textAfterPos, aInString[afterIndex + 1])) { + textAfterPos = SURROGATE_TO_UCS4(textAfterPos, aInString[afterIndex + 1]); + } + + return !((before == LT_ALPHA && !IsAlpha(text0)) || + (before == LT_DIGIT && !IsDigit(text0)) || + (before == LT_DELIMITER && + (IsAlpha(text0) || IsDigit(text0) || text0 == *rep)) || + (after == LT_ALPHA && !IsAlpha(textAfterPos)) || + (after == LT_DIGIT && !IsDigit(textAfterPos)) || + (after == LT_DELIMITER && + (IsAlpha(textAfterPos) || IsDigit(textAfterPos) || + textAfterPos == *rep)) || + !Substring(Substring(aInString, aInString + aInLength), ignoreLen, + aRepLen) + .Equals(Substring(rep, rep + aRepLen), + nsCaseInsensitiveStringComparator)); +} + +uint32_t mozTXTToHTMLConv::NumberOfMatches(const char16_t* aInString, + int32_t aInStringLength, + const char16_t* rep, int32_t aRepLen, + LIMTYPE before, LIMTYPE after) { + uint32_t result = 0; + + // Limit lookahead length to avoid pathological O(n^2) behavior; looking so + // far ahead is unlikely to be important for cases where styling marked-up + // fragments is actually useful anyhow. + const uint32_t len = + std::min(2000u, mozilla::AssertedCast(aInStringLength)); + GraphemeClusterBreakIteratorUtf16 ci(Span(aInString, len)); + for (uint32_t pos = 0; pos < len; pos = *ci.Next()) { + if (ItMatchesDelimited(aInString + pos, aInStringLength - pos, rep, aRepLen, + before, after)) { + result++; + } + } + return result; +} + +// NOTE: the converted html for the phrase is appended to aOutString +// tagHTML and attributeHTML are plain ASCII (literal strings, in fact) +bool mozTXTToHTMLConv::StructPhraseHit( + const char16_t* aInString, int32_t aInStringLength, bool col0, + const char16_t* tagTXT, int32_t aTagTXTLen, const char* tagHTML, + const char* attributeHTML, nsAString& aOutString, uint32_t& openTags) { + /* We're searching for the following pattern: + LT_DELIMITER - "*" - ALPHA - + [ some text (maybe more "*"-pairs) - ALPHA ] "*" - LT_DELIMITER. + is only inserted, if existence of a pair could be verified + We use the first opening/closing tag, if we can choose */ + + const char16_t* newOffset = aInString; + int32_t newLength = aInStringLength; + if (!col0) // skip the first element? + { + newOffset = &aInString[1]; + newLength = aInStringLength - 1; + } + + // opening tag + if (ItMatchesDelimited(aInString, aInStringLength, tagTXT, aTagTXTLen, + (col0 ? LT_IGNORE : LT_DELIMITER), + LT_ALPHA) // is opening tag + && NumberOfMatches(newOffset, newLength, tagTXT, aTagTXTLen, LT_ALPHA, + LT_DELIMITER) // remaining closing tags + > openTags) { + openTags++; + aOutString.Append('<'); + aOutString.AppendASCII(tagHTML); + aOutString.Append(char16_t(' ')); + aOutString.AppendASCII(attributeHTML); + aOutString.AppendLiteral(">"); + aOutString.Append(tagTXT); + aOutString.AppendLiteral(""); + return true; + } + + // closing tag + if (openTags > 0 && ItMatchesDelimited(aInString, aInStringLength, tagTXT, + aTagTXTLen, LT_ALPHA, LT_DELIMITER)) { + openTags--; + aOutString.AppendLiteral(""); + aOutString.Append(tagTXT); + aOutString.AppendLiteral("')); + return true; + } + + return false; +} + +bool mozTXTToHTMLConv::SmilyHit(const char16_t* aInString, int32_t aLength, + bool col0, const char* tagTXT, + const nsString& imageName, nsString& outputHTML, + int32_t& glyphTextLen) { + if (!aInString || !tagTXT || imageName.IsEmpty()) return false; + + int32_t tagLen = strlen(tagTXT); + + uint32_t delim = (col0 ? 0 : 1) + tagLen; + + if ((col0 || IsSpace(aInString[0])) && + (aLength <= int32_t(delim) || IsSpace(aInString[delim]) || + (aLength > int32_t(delim + 1) && + (aInString[delim] == '.' || aInString[delim] == ',' || + aInString[delim] == ';' || aInString[delim] == '8' || + aInString[delim] == '>' || aInString[delim] == '!' || + aInString[delim] == '?') && + IsSpace(aInString[delim + 1]))) && + ItMatchesDelimited(aInString, aLength, + NS_ConvertASCIItoUTF16(tagTXT).get(), tagLen, + col0 ? LT_IGNORE : LT_DELIMITER, LT_IGNORE) + // Note: tests at different pos for LT_IGNORE and LT_DELIMITER + ) { + if (!col0) { + outputHTML.Truncate(); + outputHTML.Append(char16_t(' ')); + } + + outputHTML.Append(imageName); // emoji unicode + glyphTextLen = (col0 ? 0 : 1) + tagLen; + return true; + } + + return false; +} + +// the glyph is appended to aOutputString instead of the original string... +bool mozTXTToHTMLConv::GlyphHit(const char16_t* aInString, int32_t aInLength, + bool col0, nsAString& aOutputString, + int32_t& glyphTextLen) { + char16_t text0 = aInString[0]; + char16_t text1 = aInString[1]; + char16_t firstChar = (col0 ? text0 : text1); + + // temporary variable used to store the glyph html text + nsAutoString outputHTML; + bool bTestSmilie; + bool bArg = false; + int i; + + // refactor some of this mess to avoid code duplication and speed execution a + // bit there are two cases that need to be tried one after another. To avoid a + // lot of duplicate code, rolling into a loop + + i = 0; + while (i < 2) { + bTestSmilie = false; + if (!i && (firstChar == ':' || firstChar == ';' || firstChar == '=' || + firstChar == '>' || firstChar == '8' || firstChar == 'O')) { + // first test passed + + bTestSmilie = true; + bArg = col0; + } + if (i && col0 && + (text1 == ':' || text1 == ';' || text1 == '=' || text1 == '>' || + text1 == '8' || text1 == 'O')) { + // second test passed + + bTestSmilie = true; + bArg = false; + } + if (bTestSmilie && (SmilyHit(aInString, aInLength, bArg, ":-)", + u"🙂"_ns, // smile, U+1F642 + outputHTML, glyphTextLen) || + + SmilyHit(aInString, aInLength, bArg, ":)", + u"🙂"_ns, // smile, U+1F642 + outputHTML, glyphTextLen) || + + SmilyHit(aInString, aInLength, bArg, ":-D", + u"😂"_ns, // laughing, U+1F602 + outputHTML, glyphTextLen) || + + SmilyHit(aInString, aInLength, bArg, ":-(", + u"🙁"_ns, // frown, U+1F641 + outputHTML, glyphTextLen) || + + SmilyHit(aInString, aInLength, bArg, ":(", + u"🙁"_ns, // frown, U+1F641 + outputHTML, glyphTextLen) || + + SmilyHit(aInString, aInLength, bArg, ":$", + u"😳"_ns, // embarassed, U+1F633 + outputHTML, glyphTextLen) || + + SmilyHit(aInString, aInLength, bArg, ";-)", + u"😉"_ns, // wink, U+1F609 + outputHTML, glyphTextLen) || + + SmilyHit(aInString, aInLength, col0, ";)", + u"😉"_ns, // wink, U+1F609 + outputHTML, glyphTextLen) || + + SmilyHit(aInString, aInLength, bArg, ":-\\", + u"😕"_ns, // undecided, U+1F615 + outputHTML, glyphTextLen) || + + SmilyHit(aInString, aInLength, bArg, ":-P", + u"😛"_ns, // tongue, U+1F61B + outputHTML, glyphTextLen) || + + SmilyHit(aInString, aInLength, bArg, ";-P", + u"😜"_ns, // winking face with tongue, U+1F61C + outputHTML, glyphTextLen) || + + SmilyHit(aInString, aInLength, bArg, "=-O", + u"😮"_ns, // surprise, U+1F62E + outputHTML, glyphTextLen) || + + SmilyHit(aInString, aInLength, bArg, ":-*", + u"😘"_ns, // kiss, U+1F618 + outputHTML, glyphTextLen) || + + SmilyHit(aInString, aInLength, bArg, ">:o", + u"🤬"_ns, // swearing, U+1F92C + outputHTML, glyphTextLen) || + + SmilyHit(aInString, aInLength, bArg, ">:-o", + u"🤬"_ns, // swearing, U+1F92C + outputHTML, glyphTextLen) || + + SmilyHit(aInString, aInLength, bArg, ">:(", + u"😠"_ns, // angry, U+1F620 + outputHTML, glyphTextLen) || + + SmilyHit(aInString, aInLength, bArg, ">:-(", + u"😠"_ns, // angry, U+1F620 + outputHTML, glyphTextLen) || + + SmilyHit(aInString, aInLength, bArg, "8-)", + u"😎"_ns, // cool, U+1F60E + outputHTML, glyphTextLen) || + + SmilyHit(aInString, aInLength, bArg, ":-$", + u"🤑"_ns, // money, U+1F911 + outputHTML, glyphTextLen) || + + SmilyHit(aInString, aInLength, bArg, ":-!", + u"😬"_ns, // foot, U+1F62C + outputHTML, glyphTextLen) || + + SmilyHit(aInString, aInLength, bArg, "O:-)", + u"😇"_ns, // innocent, U+1F607 + outputHTML, glyphTextLen) || + + SmilyHit(aInString, aInLength, bArg, ":'(", + u"😭"_ns, // cry, U+1F62D + outputHTML, glyphTextLen) || + + SmilyHit(aInString, aInLength, bArg, ":-X", + u"🤐"_ns, // sealed, U+1F910 + outputHTML, glyphTextLen))) { + aOutputString.Append(outputHTML); + return true; + } + i++; + } + if (text0 == '\f') { + aOutputString.AppendLiteral(""); + glyphTextLen = 1; + return true; + } + if (text0 == '+' || text1 == '+') { + if (ItMatchesDelimited(aInString, aInLength, u" +/-", 4, LT_IGNORE, + LT_IGNORE)) { + aOutputString.AppendLiteral(" ±"); + glyphTextLen = 4; + return true; + } + if (col0 && ItMatchesDelimited(aInString, aInLength, u"+/-", 3, LT_IGNORE, + LT_IGNORE)) { + aOutputString.AppendLiteral("±"); + glyphTextLen = 3; + return true; + } + } + + // x^2 => x2, also handle powers x^-2, x^0.5 + // implement regular expression /[\dA-Za-z\)\]}]\^-?\d+(\.\d+)*[^\dA-Za-z]/ + if (text1 == '^' && + (IsAsciiDigit(text0) || IsAsciiAlpha(text0) || text0 == ')' || + text0 == ']' || text0 == '}') && + ((2 < aInLength && IsAsciiDigit(aInString[2])) || + (3 < aInLength && aInString[2] == '-' && IsAsciiDigit(aInString[3])))) { + // Find first non-digit + int32_t delimPos = 3; // skip "^" and first digit (or '-') + for (; delimPos < aInLength && + (IsAsciiDigit(aInString[delimPos]) || + (aInString[delimPos] == '.' && delimPos + 1 < aInLength && + IsAsciiDigit(aInString[delimPos + 1]))); + delimPos++) { + ; + } + + if (delimPos < aInLength && IsAsciiAlpha(aInString[delimPos])) { + return false; + } + + outputHTML.Truncate(); + outputHTML += text0; + outputHTML.AppendLiteral( + "" + "" + "^"); + + aOutputString.Append(outputHTML); + aOutputString.Append(&aInString[2], delimPos - 2); + aOutputString.AppendLiteral(""); + + glyphTextLen = delimPos /* - 1 + 1 */; + return true; + } + /* + The following strings are not substituted: + |TXT |HTML |Reason + +------+---------+---------- + -> ← Bug #454 + => ⇐ dito + <- → dito + <= ⇒ dito + (tm) ™ dito + 1/4 ¼ is triggered by 1/4 Part 1, 2/4 Part 2, ... + 3/4 ¾ dito + 1/2 ½ similar + */ + return false; +} + +/*************************************************************************** + Library-internal Interface +****************************************************************************/ + +NS_IMPL_ISUPPORTS(mozTXTToHTMLConv, mozITXTToHTMLConv, nsIStreamConverter, + nsIStreamListener, nsIRequestObserver) + +int32_t mozTXTToHTMLConv::CiteLevelTXT(const char16_t* line, + uint32_t& logLineStart) { + int32_t result = 0; + int32_t lineLength = NS_strlen(line); + + bool moreCites = true; + while (moreCites) { + /* E.g. the following lines count as quote: + + > text + //#ifdef QUOTE_RECOGNITION_AGGRESSIVE + >text + //#ifdef QUOTE_RECOGNITION_AGGRESSIVE + > text + ] text + USER> text + USER] text + //#endif + + logLineStart is the position of "t" in this example + */ + uint32_t i = logLineStart; + +#ifdef QUOTE_RECOGNITION_AGGRESSIVE + for (; int32_t(i) < lineLength && IsSpace(line[i]); i++) + ; + for (; int32_t(i) < lineLength && IsAsciiAlpha(line[i]) && + nsCRT::IsUpper(line[i]); + i++) + ; + if (int32_t(i) < lineLength && (line[i] == '>' || line[i] == ']')) +#else + if (int32_t(i) < lineLength && line[i] == '>') +#endif + { + i++; + if (int32_t(i) < lineLength && line[i] == ' ') i++; + // sendmail/mbox + // Placed here for performance increase + const char16_t* indexString = &line[logLineStart]; + // here, |logLineStart < lineLength| is always true + uint32_t minlength = std::min(uint32_t(6), NS_strlen(indexString)); + if (Substring(indexString, indexString + minlength) + .Equals(Substring(u">From "_ns, 0, minlength), + nsCaseInsensitiveStringComparator)) { + // XXX RFC2646 + moreCites = false; + } else { + result++; + logLineStart = i; + } + } else { + moreCites = false; + } + } + + return result; +} + +NS_IMETHODIMP +mozTXTToHTMLConv::ScanTXT(const nsAString& aInString, uint32_t whattodo, + nsAString& aOutString) { + if (aInString.Length() == 0) { + aOutString.Truncate(); + return NS_OK; + } + + if (!aOutString.SetCapacity(uint32_t(aInString.Length() * growthRate), + mozilla::fallible)) { + return NS_ERROR_OUT_OF_MEMORY; + } + + bool doURLs = 0 != (whattodo & kURLs); + bool doGlyphSubstitution = 0 != (whattodo & kGlyphSubstitution); + bool doStructPhrase = 0 != (whattodo & kStructPhrase); + + uint32_t structPhrase_strong = 0; // Number of currently open tags + uint32_t structPhrase_underline = 0; + uint32_t structPhrase_italic = 0; + uint32_t structPhrase_code = 0; + + uint32_t endOfLastURLOutput = 0; + + nsAutoString outputHTML; // moved here for performance increase + + const char16_t* rawInputString = aInString.BeginReading(); + uint32_t inLength = aInString.Length(); + + const Span inString(aInString); + GraphemeClusterBreakIteratorUtf16 ci(inString); + uint32_t i = 0; + while (i < inLength) { + if (doGlyphSubstitution) { + int32_t glyphTextLen; + if (GlyphHit(&rawInputString[i], inLength - i, i == 0, aOutString, + glyphTextLen)) { + i = *ci.Seek(i + glyphTextLen - 1); + continue; + } + } + + if (doStructPhrase) { + const char16_t* newOffset = rawInputString; + int32_t newLength = aInString.Length(); + if (i > 0) // skip the first element? + { + GraphemeClusterBreakReverseIteratorUtf16 ri( + Span(rawInputString, i)); + Maybe nextPos = ri.Next(); + newOffset += *nextPos; + newLength -= *nextPos; + } + + switch (aInString[i]) // Performance increase + { + case '*': + if (StructPhraseHit(newOffset, newLength, i == 0, u"*", 1, "b", + "class=\"moz-txt-star\"", aOutString, + structPhrase_strong)) { + i = *ci.Next(); + continue; + } + break; + case '/': + if (StructPhraseHit(newOffset, newLength, i == 0, u"/", 1, "i", + "class=\"moz-txt-slash\"", aOutString, + structPhrase_italic)) { + i = *ci.Next(); + continue; + } + break; + case '_': + if (StructPhraseHit(newOffset, newLength, i == 0, u"_", 1, + "span" /* is deprecated */, + "class=\"moz-txt-underscore\"", aOutString, + structPhrase_underline)) { + i = *ci.Next(); + continue; + } + break; + case '|': + if (StructPhraseHit(newOffset, newLength, i == 0, u"|", 1, "code", + "class=\"moz-txt-verticalline\"", aOutString, + structPhrase_code)) { + i = *ci.Next(); + continue; + } + break; + } + } + + if (doURLs) { + switch (aInString[i]) { + case ':': + case '@': + case '.': + if ((i == 0 || ((i > 0) && aInString[i - 1] != ' ')) && + ((i == aInString.Length() - 1) || + (aInString[i + 1] != ' '))) // Performance increase + { + int32_t replaceBefore; + int32_t replaceAfter; + if (FindURL(rawInputString, aInString.Length(), i, whattodo, + outputHTML, replaceBefore, replaceAfter) && + structPhrase_strong + structPhrase_italic + + structPhrase_underline + structPhrase_code == + 0 + /* workaround for bug #19445 */) { + // Don't cut into previously inserted HTML (bug 1509493) + if (aOutString.Length() - replaceBefore < endOfLastURLOutput) { + break; + } + aOutString.Cut(aOutString.Length() - replaceBefore, + replaceBefore); + aOutString += outputHTML; + endOfLastURLOutput = aOutString.Length(); + i = *ci.Seek(i + replaceAfter); + continue; + } + } + break; + } // switch + } + + switch (aInString[i]) { + // Special symbols + case '<': + case '>': + case '&': + EscapeChar(aInString[i], aOutString, false); + i = *ci.Next(); + break; + // Normal characters + default: { + const uint32_t oldIdx = i; + i = *ci.Next(); + aOutString.Append(inString.FromTo(oldIdx, i)); + break; + } + } + } + return NS_OK; +} + +NS_IMETHODIMP +mozTXTToHTMLConv::ScanHTML(const nsAString& input, uint32_t whattodo, + nsAString& aOutString) { + const nsPromiseFlatString& aInString = PromiseFlatString(input); + if (!aOutString.SetCapacity(uint32_t(aInString.Length() * growthRate), + mozilla::fallible)) { + return NS_ERROR_OUT_OF_MEMORY; + } + + // some common variables we were recalculating + // every time inside the for loop... + int32_t lengthOfInString = aInString.Length(); + const char16_t* uniBuffer = aInString.get(); + +#ifdef DEBUG_BenB_Perf + PRTime parsing_start = PR_IntervalNow(); +#endif + + // Look for simple entities not included in a tags and scan them. + // Skip all tags ("<[...]>") and content in an a link tag (""), + // comment tag (""), style tag, script tag or head tag. + // Unescape the rest (text between tags) and pass it to ScanTXT. + nsAutoCString canFollow(" \f\n\r\t>"); + for (int32_t i = 0; i < lengthOfInString;) { + if (aInString[i] == '<') // html tag + { + int32_t start = i; + if (i + 2 < lengthOfInString && nsCRT::ToLower(aInString[i + 1]) == 'a' && + canFollow.FindChar(aInString[i + 2]) != kNotFound) + // if a tag, skip until . + // Make sure there's a white-space character after, not to match "abbr". + { + i = aInString.LowerCaseFindASCII("", i); + if (i == kNotFound) { + i = lengthOfInString; + } else { + i += 4; + } + } else if (Substring(aInString, i + 1, 3).LowerCaseEqualsASCII("!--")) + // if out-commended code, skip until --> + { + i = aInString.Find(u"-->", i); + if (i == kNotFound) { + i = lengthOfInString; + } else { + i += 3; + } + } else if (i + 6 < lengthOfInString && + Substring(aInString, i + 1, 5).LowerCaseEqualsASCII("style") && + canFollow.FindChar(aInString[i + 6]) != kNotFound) + // if style tag, skip until + { + i = aInString.LowerCaseFindASCII("", i); + if (i == kNotFound) { + i = lengthOfInString; + } else { + i += 8; + } + } else if (i + 7 < lengthOfInString && + Substring(aInString, i + 1, 6) + .LowerCaseEqualsASCII("script") && + canFollow.FindChar(aInString[i + 7]) != kNotFound) + // if script tag, skip until + { + i = aInString.LowerCaseFindASCII("", i); + if (i == kNotFound) { + i = lengthOfInString; + } else { + i += 9; + } + } else if (i + 5 < lengthOfInString && + Substring(aInString, i + 1, 4).LowerCaseEqualsASCII("head") && + canFollow.FindChar(aInString[i + 5]) != kNotFound) + // if head tag, skip until + // Make sure not to match
. + { + i = aInString.LowerCaseFindASCII("", i); + if (i == kNotFound) { + i = lengthOfInString; + } else { + i += 7; + } + } else // just skip tag (attributes etc.) + { + i = aInString.FindChar('>', i); + if (i == kNotFound) { + i = lengthOfInString; + } else { + i++; + } + } + aOutString.Append(&uniBuffer[start], i - start); + } else { + uint32_t start = uint32_t(i); + i = aInString.FindChar('<', i); + if (i == kNotFound) i = lengthOfInString; + + nsAutoStringN<256> tempString; + tempString.SetCapacity(uint32_t((uint32_t(i) - start) * growthRate)); + UnescapeStr(uniBuffer, start, uint32_t(i) - start, tempString); + ScanTXT(tempString, whattodo, aOutString); + } + } + +#ifdef DEBUG_BenB_Perf + printf("ScanHTML time: %d ms\n", + PR_IntervalToMilliseconds(PR_IntervalNow() - parsing_start)); +#endif + return NS_OK; +} + +/**************************************************************************** + XPCOM Interface +*****************************************************************************/ + +NS_IMETHODIMP +mozTXTToHTMLConv::Convert(nsIInputStream* aFromStream, const char* aFromType, + const char* aToType, nsISupports* aCtxt, + nsIInputStream** _retval) { + return NS_ERROR_NOT_IMPLEMENTED; +} + +NS_IMETHODIMP +mozTXTToHTMLConv::AsyncConvertData(const char* aFromType, const char* aToType, + nsIStreamListener* aListener, + nsISupports* aCtxt) { + return NS_ERROR_NOT_IMPLEMENTED; +} + +NS_IMETHODIMP +mozTXTToHTMLConv::GetConvertedType(const nsACString& aFromType, + nsIChannel* aChannel, nsACString& aToType) { + return NS_ERROR_NOT_IMPLEMENTED; +} + +NS_IMETHODIMP +mozTXTToHTMLConv::OnDataAvailable(nsIRequest* request, nsIInputStream* inStr, + uint64_t sourceOffset, uint32_t count) { + return NS_ERROR_NOT_IMPLEMENTED; +} + +NS_IMETHODIMP +mozTXTToHTMLConv::OnStartRequest(nsIRequest* request) { + return NS_ERROR_NOT_IMPLEMENTED; +} + +NS_IMETHODIMP +mozTXTToHTMLConv::OnStopRequest(nsIRequest* request, nsresult aStatus) { + return NS_ERROR_NOT_IMPLEMENTED; +} + +NS_IMETHODIMP +mozTXTToHTMLConv::CiteLevelTXT(const char16_t* line, uint32_t* logLineStart, + uint32_t* _retval) { + if (!logLineStart || !_retval || !line) return NS_ERROR_NULL_POINTER; + *_retval = CiteLevelTXT(line, *logLineStart); + return NS_OK; +} + +nsresult MOZ_NewTXTToHTMLConv(mozTXTToHTMLConv** aConv) { + MOZ_ASSERT(aConv != nullptr, "null ptr"); + if (!aConv) return NS_ERROR_NULL_POINTER; + + RefPtr conv = new mozTXTToHTMLConv(); + conv.forget(aConv); + // return (*aConv)->Init(); + return NS_OK; +} diff --git a/netwerk/streamconv/converters/mozTXTToHTMLConv.h b/netwerk/streamconv/converters/mozTXTToHTMLConv.h new file mode 100644 index 0000000000..a9b8888450 --- /dev/null +++ b/netwerk/streamconv/converters/mozTXTToHTMLConv.h @@ -0,0 +1,284 @@ +/* -*- Mode: C; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/** + Description: Currently only functions to enhance plain text with HTML tags. + See mozITXTToHTMLConv. Stream conversion is defunct. +*/ + +#ifndef _mozTXTToHTMLConv_h__ +#define _mozTXTToHTMLConv_h__ + +#include "mozITXTToHTMLConv.h" +#include "nsString.h" +#include "nsCOMPtr.h" + +class nsIIOService; + +class mozTXTToHTMLConv : public mozITXTToHTMLConv { + virtual ~mozTXTToHTMLConv() = default; + + ////////////////////////////////////////////////////////// + public: + ////////////////////////////////////////////////////////// + + mozTXTToHTMLConv() = default; + NS_DECL_ISUPPORTS + + NS_DECL_MOZITXTTOHTMLCONV + NS_DECL_NSIREQUESTOBSERVER + NS_DECL_NSISTREAMLISTENER + NS_DECL_NSISTREAMCONVERTER + + /** + see mozITXTToHTMLConv::CiteLevelTXT + */ + int32_t CiteLevelTXT(const char16_t* line, uint32_t& logLineStart); + + ////////////////////////////////////////////////////////// + protected: + ////////////////////////////////////////////////////////// + nsCOMPtr + mIOService; // for performance reasons, cache the netwerk service... + /** + Completes
    +
  • Case 1: mailto: "mozilla@bucksch.org" -> "mailto:mozilla@bucksch.org" +
  • Case 2: http: "www.mozilla.org" -> "http://www.mozilla.org" +
  • Case 3: ftp: "ftp.mozilla.org" -> "ftp://www.mozilla.org" +
+ It does no check, if the resulting URL is valid. + @param text (in): abbreviated URL + @param pos (in): position of "@" (case 1) or first "." (case 2 and 3) + @return Completed URL at success and empty string at failure + */ + void CompleteAbbreviatedURL(const char16_t* aInString, int32_t aInLength, + const uint32_t pos, nsString& aOutString); + + ////////////////////////////////////////////////////////// + private: + ////////////////////////////////////////////////////////// + + enum LIMTYPE { + LT_IGNORE, // limitation not checked + LT_DELIMITER, // not alphanumeric and not rep[0]. End of text is also ok. + LT_ALPHA, // alpha char + LT_DIGIT + }; + + /** + @param text (in): the string to search through.

+ If before = IGNORE,
+ rep is compared starting at 1. char of text (text[0]),
+ else starting at 2. char of text (text[1]). + Chars after "after"-delimiter are ignored. + @param rep (in): the string to look for + @param aRepLen (in): the number of bytes in the string to look for + @param before (in): limitation before rep + @param after (in): limitation after rep + @return true, if rep is found and limitation spec is met or rep is empty + */ + bool ItMatchesDelimited(const char16_t* aInString, int32_t aInLength, + const char16_t* rep, int32_t aRepLen, LIMTYPE before, + LIMTYPE after); + + /** + @param see ItMatchesDelimited + @return Number of ItMatchesDelimited in text + */ + uint32_t NumberOfMatches(const char16_t* aInString, int32_t aInStringLength, + const char16_t* rep, int32_t aRepLen, LIMTYPE before, + LIMTYPE after); + + /** + Currently only changes "<", ">" and "&". All others stay as they are.

+ "Char" in function name to avoid side effects with nsString(ch) + constructors. + @param ch (in) + @param aStringToAppendto (out) - the string to append the escaped + string to. + @param inAttribute (in) - will escape quotes, too (which is + only needed for attribute values) + */ + void EscapeChar(const char16_t ch, nsAString& aStringToAppendto, + bool inAttribute); + + /** + See EscapeChar. Escapes the string in place. + */ + void EscapeStr(nsString& aInString, bool inAttribute); + + /** + Currently only reverts "<", ">" and "&". All others stay as they are.

+ @param aInString (in) HTML string + @param aStartPos (in) start index into the buffer + @param aLength (in) length of the buffer + @param aOutString (out) unescaped buffer + */ + void UnescapeStr(const char16_t* aInString, int32_t aStartPos, + int32_t aLength, nsString& aOutString); + + /** + Note: I use different strategies to pass context between the + functions (full text and pos vs. cutted text and col0, glphyTextLen vs. + replaceBefore/-After). It makes some sense, but is hard to understand + (maintain) :-(. + */ + + /** +

Note: replaceBefore + replaceAfter + 1 (for char at pos) chars + in text should be replaced by outputHTML.

+

Note: This function should be able to process a URL on multiple + lines, but currently, ScanForURLs is called for every line, so it can't.

+ @param text (in): includes possibly a URL + @param pos (in): position in text, where either ":", "." or "@" are found + @param whathasbeendone (in): What the calling ScanTXT did/has to do with the + (not-linkified) text, i.e. usually the "whattodo" parameter. + (Needed to calculate replaceBefore.) NOT what will be done with + the content of the link. + @param outputHTML (out): URL with HTML-a tag + @param replaceBefore (out): Number of chars of URL before pos + @param replaceAfter (out): Number of chars of URL after pos + @return URL found + */ + bool FindURL(const char16_t* aInString, int32_t aInLength, const uint32_t pos, + const uint32_t whathasbeendone, nsString& outputHTML, + int32_t& replaceBefore, int32_t& replaceAfter); + + enum modetype { + unknown, + RFC1738, /* Check, if RFC1738, APPENDIX compliant, + like "". */ + RFC2396E, /* RFC2396, APPENDIX E allows anglebrackets (like + "") (without "URL:") or + quotation marks(like ""http://www.mozilla.org""). + Also allow email addresses without scheme, + e.g. "" */ + freetext, /* assume heading scheme + with "[a-zA-Z][a-zA-Z0-9+\-\.]*:" like "news:" + (see RFC2396, Section 3.1). + Certain characters (see code) or any whitespace + (including linebreaks) end the URL. + Other certain (punctation) characters (see code) + at the end are stripped off. */ + abbreviated /* Similar to freetext, but without scheme, e.g. + "www.mozilla.org", "ftp.mozilla.org" and + "mozilla@bucksch.org". */ + /* RFC1738 and RFC2396E type URLs may use multiple lines, + whitespace is stripped. Special characters like ")" stay intact.*/ + }; + + /** + * @param text (in), pos (in): see FindURL + * @param check (in): Start must be conform with this mode + * @param start (out): Position in text, where URL (including brackets or + * similar) starts + * @return |check|-conform start has been found + */ + bool FindURLStart(const char16_t* aInString, int32_t aInLength, + const uint32_t pos, const modetype check, uint32_t& start); + + /** + * @param text (in), pos (in): see FindURL + * @param check (in): End must be conform with this mode + * @param start (in): see FindURLStart + * @param end (out): Similar to |start| param of FindURLStart + * @return |check|-conform end has been found + */ + bool FindURLEnd(const char16_t* aInString, int32_t aInStringLength, + const uint32_t pos, const modetype check, + const uint32_t start, uint32_t& end); + + /** + * @param text (in), pos (in), whathasbeendone (in): see FindURL + * @param check (in): Current mode + * @param start (in), end (in): see FindURLEnd + * @param txtURL (out): Guessed (raw) URL. + * Without whitespace, but not completed. + * @param desc (out): Link as shown to the user, but already escaped. + * Should be placed between the and tags. + * @param replaceBefore(out), replaceAfter (out): see FindURL + */ + void CalculateURLBoundaries(const char16_t* aInString, + int32_t aInStringLength, const uint32_t pos, + const uint32_t whathasbeendone, + const modetype check, const uint32_t start, + const uint32_t end, nsString& txtURL, + nsString& desc, int32_t& replaceBefore, + int32_t& replaceAfter); + + /** + * @param txtURL (in), desc (in): see CalculateURLBoundaries + * @param outputHTML (out): see FindURL + * @return A valid URL could be found (and creation of HTML successful) + */ + bool CheckURLAndCreateHTML(const nsString& txtURL, const nsString& desc, + const modetype mode, nsString& outputHTML); + + /** + @param text (in): line of text possibly with tagTXT.

+ if col0 is true, + starting with tagTXT
+ else + starting one char before tagTXT + @param col0 (in): tagTXT is on the beginning of the line (or paragraph). + open must be 0 then. + @param tagTXT (in): Tag in plaintext to search for, e.g. "*" + @param aTagTxtLen (in): length of tagTXT. + @param tagHTML (in): HTML-Tag to replace tagTXT with, + without "<" and ">", e.g. "strong" + @param attributeHTML (in): HTML-attribute to add to opening tagHTML, + e.g. "class=txt_star" + @param aOutString: string to APPEND the converted html into + @param open (in/out): Number of currently open tags of type tagHTML + @return Conversion succeeded + */ + bool StructPhraseHit(const char16_t* aInString, int32_t aInStringLength, + bool col0, const char16_t* tagTXT, int32_t aTagTxtLen, + const char* tagHTML, const char* attributeHTML, + nsAString& aOutString, uint32_t& openTags); + + /** + @param text (in), col0 (in): see GlyphHit + @param tagTXT (in): Smily, see also StructPhraseHit + @param imageName (in): the basename of the file that contains the image for + this smilie + @param outputHTML (out): new string containing the html for the smily + @param glyphTextLen (out): see GlyphHit + */ + bool SmilyHit(const char16_t* aInString, int32_t aLength, bool col0, + const char* tagTXT, const nsString& imageName, + nsString& outputHTML, int32_t& glyphTextLen); + + /** + Checks, if we can replace some chars at the start of line with prettier HTML + code.

+ If success is reported, replace the first glyphTextLen chars with outputHTML + + @param text (in): line of text possibly with Glyph.

+ If col0 is true, + starting with Glyph
+ else + starting one char before Glyph + @param col0 (in): text starts at the beginning of the line (or paragraph) + @param aOutString (out): APPENDS html for the glyph to this string + @param glyphTextLen (out): Length of original text to replace + @return see StructPhraseHit + */ + bool GlyphHit(const char16_t* aInString, int32_t aInLength, bool col0, + nsAString& aOutputString, int32_t& glyphTextLen); + + /** + Check if a given url should be linkified. + @param aURL (in): url to be checked on. + */ + bool ShouldLinkify(const nsCString& aURL); +}; + +// It's said, that Win32 and Mac don't like static const members +const int32_t mozTXTToHTMLConv_lastMode = 4; +// Needed (only) by mozTXTToHTMLConv::FindURL +const int32_t mozTXTToHTMLConv_numberOfModes = 4; // dito; unknown not counted + +#endif diff --git a/netwerk/streamconv/converters/nsDirIndex.cpp b/netwerk/streamconv/converters/nsDirIndex.cpp new file mode 100644 index 0000000000..6d7f19c11e --- /dev/null +++ b/netwerk/streamconv/converters/nsDirIndex.cpp @@ -0,0 +1,86 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsDirIndex.h" + +NS_IMPL_ISUPPORTS(nsDirIndex, nsIDirIndex) + +NS_IMETHODIMP +nsDirIndex::GetType(uint32_t* aType) { + NS_ENSURE_ARG_POINTER(aType); + + *aType = mType; + return NS_OK; +} + +NS_IMETHODIMP +nsDirIndex::SetType(uint32_t aType) { + mType = aType; + return NS_OK; +} + +NS_IMETHODIMP +nsDirIndex::GetContentType(nsACString& aContentType) { + aContentType = mContentType; + return NS_OK; +} + +NS_IMETHODIMP +nsDirIndex::SetContentType(const nsACString& aContentType) { + mContentType = aContentType; + return NS_OK; +} + +NS_IMETHODIMP +nsDirIndex::GetLocation(nsACString& aLocation) { + aLocation = mLocation; + return NS_OK; +} + +NS_IMETHODIMP +nsDirIndex::SetLocation(const nsACString& aLocation) { + mLocation = aLocation; + return NS_OK; +} + +NS_IMETHODIMP +nsDirIndex::GetDescription(nsAString& aDescription) { + aDescription = mDescription; + return NS_OK; +} + +NS_IMETHODIMP +nsDirIndex::SetDescription(const nsAString& aDescription) { + mDescription = aDescription; + return NS_OK; +} + +NS_IMETHODIMP +nsDirIndex::GetSize(int64_t* aSize) { + NS_ENSURE_ARG_POINTER(aSize); + + *aSize = mSize; + return NS_OK; +} + +NS_IMETHODIMP +nsDirIndex::SetSize(int64_t aSize) { + mSize = aSize; + return NS_OK; +} + +NS_IMETHODIMP +nsDirIndex::GetLastModified(PRTime* aLastModified) { + NS_ENSURE_ARG_POINTER(aLastModified); + + *aLastModified = mLastModified; + return NS_OK; +} + +NS_IMETHODIMP +nsDirIndex::SetLastModified(PRTime aLastModified) { + mLastModified = aLastModified; + return NS_OK; +} diff --git a/netwerk/streamconv/converters/nsDirIndex.h b/netwerk/streamconv/converters/nsDirIndex.h new file mode 100644 index 0000000000..eb6009295c --- /dev/null +++ b/netwerk/streamconv/converters/nsDirIndex.h @@ -0,0 +1,32 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef nsDirIndex_h__ +#define nsDirIndex_h__ + +#include "nsIDirIndex.h" +#include "nsString.h" +#include "mozilla/Attributes.h" + +class nsDirIndex final : public nsIDirIndex { + private: + ~nsDirIndex() = default; + + public: + nsDirIndex() = default; + + NS_DECL_ISUPPORTS + NS_DECL_NSIDIRINDEX + + protected: + uint32_t mType{TYPE_UNKNOWN}; + nsCString mContentType; + nsCString mLocation; + nsString mDescription; + int64_t mSize{INT64_MAX}; + PRTime mLastModified{-1LL}; +}; + +#endif diff --git a/netwerk/streamconv/converters/nsDirIndexParser.cpp b/netwerk/streamconv/converters/nsDirIndexParser.cpp new file mode 100644 index 0000000000..6501cfac1f --- /dev/null +++ b/netwerk/streamconv/converters/nsDirIndexParser.cpp @@ -0,0 +1,440 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* This parsing code originally lived in xpfe/components/directory/ - bbaetz */ + +#include "nsDirIndexParser.h" + +#include "mozilla/ArrayUtils.h" +#include "mozilla/ClearOnShutdown.h" +#include "mozilla/Encoding.h" +#include "mozilla/StaticPtr.h" +#include "prprf.h" +#include "nsCRT.h" +#include "nsDirIndex.h" +#include "nsEscape.h" +#include "nsIDirIndex.h" +#include "nsIInputStream.h" +#include "nsITextToSubURI.h" +#include "nsServiceManagerUtils.h" +#include "mozilla/intl/LocaleService.h" + +using namespace mozilla; + +struct EncodingProp { + const char* const mKey; + NotNull mValue; +}; + +static StaticRefPtr gTextToSubURI; + +static const EncodingProp localesFallbacks[] = { + {"ar", WINDOWS_1256_ENCODING}, {"ba", WINDOWS_1251_ENCODING}, + {"be", WINDOWS_1251_ENCODING}, {"bg", WINDOWS_1251_ENCODING}, + {"cs", WINDOWS_1250_ENCODING}, {"el", ISO_8859_7_ENCODING}, + {"et", WINDOWS_1257_ENCODING}, {"fa", WINDOWS_1256_ENCODING}, + {"he", WINDOWS_1255_ENCODING}, {"hr", WINDOWS_1250_ENCODING}, + {"hu", ISO_8859_2_ENCODING}, {"ja", SHIFT_JIS_ENCODING}, + {"kk", WINDOWS_1251_ENCODING}, {"ko", EUC_KR_ENCODING}, + {"ku", WINDOWS_1254_ENCODING}, {"ky", WINDOWS_1251_ENCODING}, + {"lt", WINDOWS_1257_ENCODING}, {"lv", WINDOWS_1257_ENCODING}, + {"mk", WINDOWS_1251_ENCODING}, {"pl", ISO_8859_2_ENCODING}, + {"ru", WINDOWS_1251_ENCODING}, {"sah", WINDOWS_1251_ENCODING}, + {"sk", WINDOWS_1250_ENCODING}, {"sl", ISO_8859_2_ENCODING}, + {"sr", WINDOWS_1251_ENCODING}, {"tg", WINDOWS_1251_ENCODING}, + {"th", WINDOWS_874_ENCODING}, {"tr", WINDOWS_1254_ENCODING}, + {"tt", WINDOWS_1251_ENCODING}, {"uk", WINDOWS_1251_ENCODING}, + {"vi", WINDOWS_1258_ENCODING}, {"zh", GBK_ENCODING}}; + +static NotNull +GetFTPFallbackEncodingDoNotAddNewCallersToThisFunction() { + nsAutoCString locale; + mozilla::intl::LocaleService::GetInstance()->GetAppLocaleAsBCP47(locale); + + // Let's lower case the string just in case unofficial language packs + // don't stick to conventions. + ToLowerCase(locale); // ASCII lowercasing with CString input! + + // Special case Traditional Chinese before throwing away stuff after the + // language itself. Today we only ship zh-TW, but be defensive about + // possible future values. + if (locale.EqualsLiteral("zh-tw") || locale.EqualsLiteral("zh-hk") || + locale.EqualsLiteral("zh-mo") || locale.EqualsLiteral("zh-hant")) { + return BIG5_ENCODING; + } + + // Throw away regions and other variants to accommodate weird stuff seen + // in telemetry--apparently unofficial language packs. + int32_t hyphenIndex = locale.FindChar('-'); + if (hyphenIndex >= 0) { + locale.Truncate(hyphenIndex); + } + + size_t index; + if (BinarySearchIf( + localesFallbacks, 0, ArrayLength(localesFallbacks), + [&locale](const EncodingProp& aProperty) { + return Compare(locale, nsDependentCString(aProperty.mKey)); + }, + &index)) { + return localesFallbacks[index].mValue; + } + return WINDOWS_1252_ENCODING; +} + +NS_IMPL_ISUPPORTS(nsDirIndexParser, nsIRequestObserver, nsIStreamListener, + nsIDirIndexParser) + +nsresult nsDirIndexParser::Init() { + mLineStart = 0; + mHasDescription = false; + mFormat[0] = -1; + auto encoding = GetFTPFallbackEncodingDoNotAddNewCallersToThisFunction(); + encoding->Name(mEncoding); + + nsresult rv = NS_OK; + if (!gTextToSubURI) { + nsCOMPtr service = + do_GetService(NS_ITEXTTOSUBURI_CONTRACTID, &rv); + if (NS_SUCCEEDED(rv)) { + gTextToSubURI = service; + ClearOnShutdown(&gTextToSubURI); + } + } + + return rv; +} + +NS_IMETHODIMP +nsDirIndexParser::SetListener(nsIDirIndexListener* aListener) { + mListener = aListener; + return NS_OK; +} + +NS_IMETHODIMP +nsDirIndexParser::GetListener(nsIDirIndexListener** aListener) { + *aListener = do_AddRef(mListener).take(); + return NS_OK; +} + +NS_IMETHODIMP +nsDirIndexParser::GetComment(char** aComment) { + *aComment = ToNewCString(mComment, mozilla::fallible); + + if (!*aComment) return NS_ERROR_OUT_OF_MEMORY; + + return NS_OK; +} + +NS_IMETHODIMP +nsDirIndexParser::SetEncoding(const char* aEncoding) { + mEncoding.Assign(aEncoding); + return NS_OK; +} + +NS_IMETHODIMP +nsDirIndexParser::GetEncoding(char** aEncoding) { + *aEncoding = ToNewCString(mEncoding, mozilla::fallible); + + if (!*aEncoding) return NS_ERROR_OUT_OF_MEMORY; + + return NS_OK; +} + +NS_IMETHODIMP +nsDirIndexParser::OnStartRequest(nsIRequest* aRequest) { return NS_OK; } + +NS_IMETHODIMP +nsDirIndexParser::OnStopRequest(nsIRequest* aRequest, nsresult aStatusCode) { + // Finish up + if (mBuf.Length() > (uint32_t)mLineStart) { + ProcessData(aRequest); + } + + return NS_OK; +} + +nsDirIndexParser::Field nsDirIndexParser::gFieldTable[] = { + {"Filename", FIELD_FILENAME}, + {"Description", FIELD_DESCRIPTION}, + {"Content-Length", FIELD_CONTENTLENGTH}, + {"Last-Modified", FIELD_LASTMODIFIED}, + {"Content-Type", FIELD_CONTENTTYPE}, + {"File-Type", FIELD_FILETYPE}, + {nullptr, FIELD_UNKNOWN}}; + +void nsDirIndexParser::ParseFormat(const char* aFormatStr) { + // Parse a "200" format line, and remember the fields and their + // ordering in mFormat. Multiple 200 lines stomp on each other. + unsigned int formatNum = 0; + mFormat[0] = -1; + + do { + while (*aFormatStr && nsCRT::IsAsciiSpace(char16_t(*aFormatStr))) { + ++aFormatStr; + } + + if (!*aFormatStr) break; + + nsAutoCString name; + int32_t len = 0; + while (aFormatStr[len] && !nsCRT::IsAsciiSpace(char16_t(aFormatStr[len]))) { + ++len; + } + name.Append(aFormatStr, len); + aFormatStr += len; + + // Okay, we're gonna monkey with the nsStr. Bold! + name.SetLength(nsUnescapeCount(name.BeginWriting())); + + // All tokens are case-insensitive - + // http://www.mozilla.org/projects/netlib/dirindexformat.html + if (name.LowerCaseEqualsLiteral("description")) mHasDescription = true; + + for (Field* i = gFieldTable; i->mName; ++i) { + if (name.EqualsIgnoreCase(i->mName)) { + mFormat[formatNum] = i->mType; + mFormat[++formatNum] = -1; + break; + } + } + + } while (*aFormatStr && (formatNum < (ArrayLength(mFormat) - 1))); +} + +void nsDirIndexParser::ParseData(nsIDirIndex* aIdx, char* aDataStr, + int32_t aLineLen) { + // Parse a "201" data line, using the field ordering specified in + // mFormat. + + if (mFormat[0] == -1) { + // Ignore if we haven't seen a format yet. + return; + } + + nsAutoCString filename; + int32_t lineLen = aLineLen; + + for (int32_t i = 0; mFormat[i] != -1; ++i) { + // If we've exhausted the data before we run out of fields, just bail. + if (!*aDataStr || (lineLen < 1)) { + return; + } + + while ((lineLen > 0) && nsCRT::IsAsciiSpace(*aDataStr)) { + ++aDataStr; + --lineLen; + } + + if (lineLen < 1) { + // invalid format, bail + return; + } + + char* value = aDataStr; + if (*aDataStr == '"' || *aDataStr == '\'') { + // it's a quoted string. snarf everything up to the next quote character + const char quotechar = *(aDataStr++); + lineLen--; + ++value; + while ((lineLen > 0) && *aDataStr != quotechar) { + ++aDataStr; + --lineLen; + } + if (lineLen > 0) { + *aDataStr++ = '\0'; + --lineLen; + } + + if (!lineLen) { + // invalid format, bail + return; + } + } else { + // it's unquoted. snarf until we see whitespace. + value = aDataStr; + while ((lineLen > 0) && (!nsCRT::IsAsciiSpace(*aDataStr))) { + ++aDataStr; + --lineLen; + } + if (lineLen > 0) { + *aDataStr++ = '\0'; + --lineLen; + } + // even if we ran out of line length here, there's still a trailing zero + // byte afterwards + } + + fieldType t = fieldType(mFormat[i]); + switch (t) { + case FIELD_FILENAME: { + // don't unescape at this point, so that UnEscapeAndConvert() can + filename = value; + + bool success = false; + + nsAutoString entryuri; + + if (RefPtr textToSub = gTextToSubURI) { + nsAutoString result; + if (NS_SUCCEEDED( + textToSub->UnEscapeAndConvert(mEncoding, filename, result))) { + if (!result.IsEmpty()) { + aIdx->SetLocation(filename); + if (!mHasDescription) aIdx->SetDescription(result); + success = true; + } + } else { + NS_WARNING("UnEscapeAndConvert error"); + } + } + + if (!success) { + // if unsuccessfully at charset conversion, then + // just fallback to unescape'ing in-place + // XXX - this shouldn't be using UTF8, should it? + // when can we fail to get the service, anyway? - bbaetz + aIdx->SetLocation(filename); + if (!mHasDescription) { + aIdx->SetDescription(NS_ConvertUTF8toUTF16(value)); + } + } + } break; + case FIELD_DESCRIPTION: + nsUnescape(value); + aIdx->SetDescription(NS_ConvertUTF8toUTF16(value)); + break; + case FIELD_CONTENTLENGTH: { + int64_t len; + int32_t status = PR_sscanf(value, "%lld", &len); + if (status == 1) { + aIdx->SetSize(len); + } else { + aIdx->SetSize(UINT64_MAX); // UINT64_MAX means unknown + } + } break; + case FIELD_LASTMODIFIED: { + PRTime tm; + nsUnescape(value); + if (PR_ParseTimeString(value, false, &tm) == PR_SUCCESS) { + aIdx->SetLastModified(tm); + } + } break; + case FIELD_CONTENTTYPE: + aIdx->SetContentType(nsDependentCString(value)); + break; + case FIELD_FILETYPE: + // unescape in-place + nsUnescape(value); + if (!nsCRT::strcasecmp(value, "directory")) { + aIdx->SetType(nsIDirIndex::TYPE_DIRECTORY); + } else if (!nsCRT::strcasecmp(value, "file")) { + aIdx->SetType(nsIDirIndex::TYPE_FILE); + } else if (!nsCRT::strcasecmp(value, "symbolic-link")) { + aIdx->SetType(nsIDirIndex::TYPE_SYMLINK); + } else { + aIdx->SetType(nsIDirIndex::TYPE_UNKNOWN); + } + break; + case FIELD_UNKNOWN: + // ignore + break; + } + } +} + +NS_IMETHODIMP +nsDirIndexParser::OnDataAvailable(nsIRequest* aRequest, nsIInputStream* aStream, + uint64_t aSourceOffset, uint32_t aCount) { + if (aCount < 1) return NS_OK; + + uint32_t len = mBuf.Length(); + + // Ensure that our mBuf has capacity to hold the data we're about to + // read. + // Before adjusting the capacity, guard against any potential overflow + // resulting from the addition of aCount with len. See Bug 1823551. + NS_ENSURE_TRUE((UINT32_MAX - aCount) >= len, NS_ERROR_FAILURE); + if (!mBuf.SetLength(len + aCount, fallible)) return NS_ERROR_OUT_OF_MEMORY; + + // Now read the data into our buffer. + nsresult rv; + uint32_t count; + rv = aStream->Read(mBuf.BeginWriting() + len, aCount, &count); + if (NS_FAILED(rv)) return rv; + + // Set the string's length according to the amount of data we've read. + // Note: we know this to work on nsCString. This isn't guaranteed to + // work on other strings. + mBuf.SetLength(len + count); + + return ProcessData(aRequest); +} + +nsresult nsDirIndexParser::ProcessData(nsIRequest* aRequest) { + if (!mListener) return NS_ERROR_FAILURE; + + while (true) { + int32_t eol = mBuf.FindCharInSet("\n\r", mLineStart); + if (eol < 0) break; + mBuf.SetCharAt(char16_t('\0'), eol); + + const char* line = mBuf.get() + mLineStart; + + int32_t lineLen = eol - mLineStart; + mLineStart = eol + 1; + + if (lineLen >= 4) { + const char* buf = line; + + if (buf[0] == '1') { + if (buf[1] == '0') { + if (buf[2] == '0' && buf[3] == ':') { + // 100. Human-readable comment line. Ignore + } else if (buf[2] == '1' && buf[3] == ':') { + // 101. Human-readable information line. + mComment.Append(buf + 4); + + char* value = ((char*)buf) + 4; + nsUnescape(value); + mListener->OnInformationAvailable(aRequest, + NS_ConvertUTF8toUTF16(value)); + + } else if (buf[2] == '2' && buf[3] == ':') { + // 102. Human-readable information line, HTML. + mComment.Append(buf + 4); + } + } + } else if (buf[0] == '2') { + if (buf[1] == '0') { + if (buf[2] == '0' && buf[3] == ':') { + // 200. Define field names + ParseFormat(buf + 4); + } else if (buf[2] == '1' && buf[3] == ':') { + // 201. Field data + nsCOMPtr idx = new nsDirIndex(); + + ParseData(idx, ((char*)buf) + 4, lineLen - 4); + mListener->OnIndexAvailable(aRequest, idx); + } + } + } else if (buf[0] == '3') { + if (buf[1] == '0') { + if (buf[2] == '0' && buf[3] == ':') { + // 300. Self-referring URL + } else if (buf[2] == '1' && buf[3] == ':') { + // 301. OUR EXTENSION - encoding + int i = 4; + while (buf[i] && nsCRT::IsAsciiSpace(buf[i])) ++i; + + if (buf[i]) SetEncoding(buf + i); + } + } + } + } + } + + return NS_OK; +} diff --git a/netwerk/streamconv/converters/nsDirIndexParser.h b/netwerk/streamconv/converters/nsDirIndexParser.h new file mode 100644 index 0000000000..597a946a3c --- /dev/null +++ b/netwerk/streamconv/converters/nsDirIndexParser.h @@ -0,0 +1,72 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef __NSDIRINDEX_H_ +#define __NSDIRINDEX_H_ + +#include "nsString.h" +#include "nsCOMPtr.h" +#include "nsIDirIndexListener.h" +#include "mozilla/RefPtr.h" + +class nsIDirIndex; +class nsITextToSubURI; + +/* CID: {a0d6ad32-1dd1-11b2-aa55-a40187b54036} */ + +class nsDirIndexParser : public nsIDirIndexParser { + private: + virtual ~nsDirIndexParser() = default; + + nsDirIndexParser() = default; + nsresult Init(); + + public: + NS_DECL_ISUPPORTS + NS_DECL_NSISTREAMLISTENER + NS_DECL_NSIREQUESTOBSERVER + NS_DECL_NSIDIRINDEXPARSER + + static already_AddRefed CreateInstance() { + RefPtr parser = new nsDirIndexParser(); + if (NS_FAILED(parser->Init())) { + return nullptr; + } + return parser.forget(); + } + + enum fieldType { + FIELD_UNKNOWN = 0, // MUST be 0 + FIELD_FILENAME, + FIELD_DESCRIPTION, + FIELD_CONTENTLENGTH, + FIELD_LASTMODIFIED, + FIELD_CONTENTTYPE, + FIELD_FILETYPE + }; + + protected: + nsCOMPtr mListener; + + nsCString mEncoding; + nsCString mComment; + nsCString mBuf; + int32_t mLineStart{0}; + bool mHasDescription{false}; + int mFormat[8]{-1}; + + nsresult ProcessData(nsIRequest* aRequest); + void ParseFormat(const char* aFormatStr); + void ParseData(nsIDirIndex* aIdx, char* aDataStr, int32_t lineLen); + + struct Field { + const char* mName; + fieldType mType; + }; + + static Field gFieldTable[]; +}; + +#endif diff --git a/netwerk/streamconv/converters/nsHTTPCompressConv.cpp b/netwerk/streamconv/converters/nsHTTPCompressConv.cpp new file mode 100644 index 0000000000..b5c58b9754 --- /dev/null +++ b/netwerk/streamconv/converters/nsHTTPCompressConv.cpp @@ -0,0 +1,746 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set sw=2 ts=8 et tw=80 : */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsHTTPCompressConv.h" +#include "nsCOMPtr.h" +#include "nsCRT.h" +#include "nsError.h" +#include "nsStreamUtils.h" +#include "nsStringStream.h" +#include "nsComponentManagerUtils.h" +#include "nsThreadUtils.h" +#include "mozilla/Preferences.h" +#include "mozilla/Logging.h" +#include "nsIForcePendingChannel.h" +#include "nsIRequest.h" +#include "mozilla/UniquePtrExtensions.h" + +// brotli headers +#undef assert +#include "assert.h" +#include "state.h" +#include "brotli/decode.h" + +namespace mozilla { +namespace net { + +extern LazyLogModule gHttpLog; +#define LOG(args) \ + MOZ_LOG(mozilla::net::gHttpLog, mozilla::LogLevel::Debug, args) + +class BrotliWrapper { + public: + BrotliWrapper() { + BrotliDecoderStateInit(&mState, nullptr, nullptr, nullptr); + } + ~BrotliWrapper() { BrotliDecoderStateCleanup(&mState); } + + BrotliDecoderState mState{}; + Atomic mTotalOut{0}; + nsresult mStatus = NS_OK; + Atomic mBrotliStateIsStreamEnd{false}; + + nsIRequest* mRequest{nullptr}; + nsISupports* mContext{nullptr}; + uint64_t mSourceOffset{0}; +}; + +// nsISupports implementation +NS_IMPL_ISUPPORTS(nsHTTPCompressConv, nsIStreamConverter, nsIStreamListener, + nsIRequestObserver, nsICompressConvStats, + nsIThreadRetargetableStreamListener) + +// nsFTPDirListingConv methods +nsHTTPCompressConv::nsHTTPCompressConv() { + LOG(("nsHttpCompresssConv %p ctor\n", this)); + if (NS_IsMainThread()) { + mFailUncleanStops = + Preferences::GetBool("network.http.enforce-framing.http", false); + } else { + mFailUncleanStops = false; + } +} + +nsHTTPCompressConv::~nsHTTPCompressConv() { + LOG(("nsHttpCompresssConv %p dtor\n", this)); + if (mInpBuffer) { + free(mInpBuffer); + } + + if (mOutBuffer) { + free(mOutBuffer); + } + + // For some reason we are not getting Z_STREAM_END. But this was also seen + // for mozilla bug 198133. Need to handle this case. + if (mStreamInitialized && !mStreamEnded) { + inflateEnd(&d_stream); + } +} + +NS_IMETHODIMP +nsHTTPCompressConv::GetDecodedDataLength(uint64_t* aDecodedDataLength) { + *aDecodedDataLength = mDecodedDataLength; + return NS_OK; +} + +NS_IMETHODIMP +nsHTTPCompressConv::AsyncConvertData(const char* aFromType, const char* aToType, + nsIStreamListener* aListener, + nsISupports* aCtxt) { + if (!nsCRT::strncasecmp(aFromType, HTTP_COMPRESS_TYPE, + sizeof(HTTP_COMPRESS_TYPE) - 1) || + !nsCRT::strncasecmp(aFromType, HTTP_X_COMPRESS_TYPE, + sizeof(HTTP_X_COMPRESS_TYPE) - 1)) { + mMode = HTTP_COMPRESS_COMPRESS; + } else if (!nsCRT::strncasecmp(aFromType, HTTP_GZIP_TYPE, + sizeof(HTTP_GZIP_TYPE) - 1) || + !nsCRT::strncasecmp(aFromType, HTTP_X_GZIP_TYPE, + sizeof(HTTP_X_GZIP_TYPE) - 1)) { + mMode = HTTP_COMPRESS_GZIP; + } else if (!nsCRT::strncasecmp(aFromType, HTTP_DEFLATE_TYPE, + sizeof(HTTP_DEFLATE_TYPE) - 1)) { + mMode = HTTP_COMPRESS_DEFLATE; + } else if (!nsCRT::strncasecmp(aFromType, HTTP_BROTLI_TYPE, + sizeof(HTTP_BROTLI_TYPE) - 1)) { + mMode = HTTP_COMPRESS_BROTLI; + } + LOG(("nsHttpCompresssConv %p AsyncConvertData %s %s mode %d\n", this, + aFromType, aToType, (CompressMode)mMode)); + + MutexAutoLock lock(mMutex); + // hook ourself up with the receiving listener. + mListener = aListener; + + return NS_OK; +} + +NS_IMETHODIMP +nsHTTPCompressConv::GetConvertedType(const nsACString& aFromType, + nsIChannel* aChannel, + nsACString& aToType) { + return NS_ERROR_NOT_IMPLEMENTED; +} + +NS_IMETHODIMP +nsHTTPCompressConv::OnStartRequest(nsIRequest* request) { + LOG(("nsHttpCompresssConv %p onstart\n", this)); + nsCOMPtr listener; + { + MutexAutoLock lock(mMutex); + listener = mListener; + } + return listener->OnStartRequest(request); +} + +NS_IMETHODIMP +nsHTTPCompressConv::OnStopRequest(nsIRequest* request, nsresult aStatus) { + nsresult status = aStatus; + LOG(("nsHttpCompresssConv %p onstop %" PRIx32 "\n", this, + static_cast(aStatus))); + + // Framing integrity is enforced for content-encoding: gzip, but not for + // content-encoding: deflate. Note that gzip vs deflate is NOT determined + // by content sniffing but only via header. + if (!mStreamEnded && NS_SUCCEEDED(status) && + (mFailUncleanStops && (mMode == HTTP_COMPRESS_GZIP))) { + // This is not a clean end of gzip stream: the transfer is incomplete. + status = NS_ERROR_NET_PARTIAL_TRANSFER; + LOG(("nsHttpCompresssConv %p onstop partial gzip\n", this)); + } + if (NS_SUCCEEDED(status) && mMode == HTTP_COMPRESS_BROTLI) { + nsCOMPtr fpChannel = do_QueryInterface(request); + bool isPending = false; + if (request) { + request->IsPending(&isPending); + } + if (fpChannel && !isPending) { + fpChannel->ForcePending(true); + } + if (mBrotli && (mBrotli->mTotalOut == 0) && + !mBrotli->mBrotliStateIsStreamEnd) { + status = NS_ERROR_INVALID_CONTENT_ENCODING; + } + LOG(("nsHttpCompresssConv %p onstop brotlihandler rv %" PRIx32 "\n", this, + static_cast(status))); + if (fpChannel && !isPending) { + fpChannel->ForcePending(false); + } + } + + nsCOMPtr listener; + { + MutexAutoLock lock(mMutex); + listener = mListener; + } + return listener->OnStopRequest(request, status); +} + +/* static */ +nsresult nsHTTPCompressConv::BrotliHandler(nsIInputStream* stream, + void* closure, const char* dataIn, + uint32_t, uint32_t aAvail, + uint32_t* countRead) { + MOZ_ASSERT(stream); + nsHTTPCompressConv* self = static_cast(closure); + *countRead = 0; + + const size_t kOutSize = 128 * 1024; // just a chunk size, we call in a loop + uint8_t* outPtr; + size_t outSize; + size_t avail = aAvail; + BrotliDecoderResult res; + + if (!self->mBrotli) { + *countRead = aAvail; + return NS_OK; + } + + auto outBuffer = MakeUniqueFallible(kOutSize); + if (outBuffer == nullptr) { + self->mBrotli->mStatus = NS_ERROR_OUT_OF_MEMORY; + return self->mBrotli->mStatus; + } + do { + outSize = kOutSize; + outPtr = outBuffer.get(); + + // brotli api is documented in brotli/dec/decode.h and brotli/dec/decode.c + LOG(("nsHttpCompresssConv %p brotlihandler decompress %zu\n", self, avail)); + size_t totalOut = self->mBrotli->mTotalOut; + res = ::BrotliDecoderDecompressStream( + &self->mBrotli->mState, &avail, + reinterpret_cast(&dataIn), &outSize, &outPtr, + &totalOut); + outSize = kOutSize - outSize; + self->mBrotli->mTotalOut = totalOut; + self->mBrotli->mBrotliStateIsStreamEnd = + BrotliDecoderIsFinished(&self->mBrotli->mState); + LOG(("nsHttpCompresssConv %p brotlihandler decompress rv=%" PRIx32 + " out=%zu\n", + self, static_cast(res), outSize)); + + if (res == BROTLI_DECODER_RESULT_ERROR) { + LOG(("nsHttpCompressConv %p marking invalid encoding", self)); + self->mBrotli->mStatus = NS_ERROR_INVALID_CONTENT_ENCODING; + return self->mBrotli->mStatus; + } + + // in 'the current implementation' brotli must consume everything before + // asking for more input + if (res == BROTLI_DECODER_RESULT_NEEDS_MORE_INPUT) { + MOZ_ASSERT(!avail); + if (avail) { + LOG(("nsHttpCompressConv %p did not consume all input", self)); + self->mBrotli->mStatus = NS_ERROR_UNEXPECTED; + return self->mBrotli->mStatus; + } + } + + auto callOnDataAvailable = [&](uint64_t aSourceOffset, const char* aBuffer, + uint32_t aCount) { + nsresult rv = self->do_OnDataAvailable(self->mBrotli->mRequest, + aSourceOffset, aBuffer, aCount); + LOG(("nsHttpCompressConv %p BrotliHandler ODA rv=%" PRIx32, self, + static_cast(rv))); + if (NS_FAILED(rv)) { + self->mBrotli->mStatus = rv; + } + + return rv; + }; + + if (outSize > 0) { + if (NS_FAILED(callOnDataAvailable( + self->mBrotli->mSourceOffset, + reinterpret_cast(outBuffer.get()), outSize))) { + return self->mBrotli->mStatus; + } + } + + // See bug 1759745. If the decoder has more output data, take it. + while (::BrotliDecoderHasMoreOutput(&self->mBrotli->mState)) { + outSize = kOutSize; + const uint8_t* buffer = + ::BrotliDecoderTakeOutput(&self->mBrotli->mState, &outSize); + if (NS_FAILED(callOnDataAvailable(self->mBrotli->mSourceOffset, + reinterpret_cast(buffer), + outSize))) { + return self->mBrotli->mStatus; + } + } + + if (res == BROTLI_DECODER_RESULT_SUCCESS || + res == BROTLI_DECODER_RESULT_NEEDS_MORE_INPUT) { + *countRead = aAvail; + return NS_OK; + } + MOZ_ASSERT(res == BROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT); + } while (res == BROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT); + + self->mBrotli->mStatus = NS_ERROR_UNEXPECTED; + return self->mBrotli->mStatus; +} + +NS_IMETHODIMP +nsHTTPCompressConv::OnDataAvailable(nsIRequest* request, nsIInputStream* iStr, + uint64_t aSourceOffset, uint32_t aCount) { + nsresult rv = NS_ERROR_INVALID_CONTENT_ENCODING; + uint32_t streamLen = aCount; + LOG(("nsHttpCompressConv %p OnDataAvailable %d", this, aCount)); + + if (streamLen == 0) { + NS_ERROR("count of zero passed to OnDataAvailable"); + return NS_ERROR_UNEXPECTED; + } + + if (mStreamEnded) { + // Hmm... this may just indicate that the data stream is done and that + // what's left is either metadata or padding of some sort.... throwing + // it out is probably the safe thing to do. + uint32_t n; + return iStr->ReadSegments(NS_DiscardSegment, nullptr, streamLen, &n); + } + + switch (mMode) { + case HTTP_COMPRESS_GZIP: + streamLen = check_header(iStr, streamLen, &rv); + + if (rv != NS_OK) { + return rv; + } + + if (streamLen == 0) { + return NS_OK; + } + + [[fallthrough]]; + + case HTTP_COMPRESS_DEFLATE: + + if (mInpBuffer != nullptr && streamLen > mInpBufferLen) { + unsigned char* originalInpBuffer = mInpBuffer; + if (!(mInpBuffer = (unsigned char*)realloc( + originalInpBuffer, mInpBufferLen = streamLen))) { + free(originalInpBuffer); + } + + if (mOutBufferLen < streamLen * 2) { + unsigned char* originalOutBuffer = mOutBuffer; + if (!(mOutBuffer = (unsigned char*)realloc( + mOutBuffer, mOutBufferLen = streamLen * 3))) { + free(originalOutBuffer); + } + } + + if (mInpBuffer == nullptr || mOutBuffer == nullptr) { + return NS_ERROR_OUT_OF_MEMORY; + } + } + + if (mInpBuffer == nullptr) { + mInpBuffer = (unsigned char*)malloc(mInpBufferLen = streamLen); + } + + if (mOutBuffer == nullptr) { + mOutBuffer = (unsigned char*)malloc(mOutBufferLen = streamLen * 3); + } + + if (mInpBuffer == nullptr || mOutBuffer == nullptr) { + return NS_ERROR_OUT_OF_MEMORY; + } + + uint32_t unused; + iStr->Read((char*)mInpBuffer, streamLen, &unused); + + if (mMode == HTTP_COMPRESS_DEFLATE) { + if (!mStreamInitialized) { + memset(&d_stream, 0, sizeof(d_stream)); + + if (inflateInit(&d_stream) != Z_OK) { + return NS_ERROR_FAILURE; + } + + mStreamInitialized = true; + } + d_stream.next_in = mInpBuffer; + d_stream.avail_in = (uInt)streamLen; + + mDummyStreamInitialised = false; + for (;;) { + d_stream.next_out = mOutBuffer; + d_stream.avail_out = (uInt)mOutBufferLen; + + int code = inflate(&d_stream, Z_NO_FLUSH); + unsigned bytesWritten = (uInt)mOutBufferLen - d_stream.avail_out; + + if (code == Z_STREAM_END) { + if (bytesWritten) { + rv = do_OnDataAvailable(request, aSourceOffset, (char*)mOutBuffer, + bytesWritten); + if (NS_FAILED(rv)) { + return rv; + } + } + + inflateEnd(&d_stream); + mStreamEnded = true; + break; + } + if (code == Z_OK) { + if (bytesWritten) { + rv = do_OnDataAvailable(request, aSourceOffset, (char*)mOutBuffer, + bytesWritten); + if (NS_FAILED(rv)) { + return rv; + } + } + } else if (code == Z_BUF_ERROR) { + if (bytesWritten) { + rv = do_OnDataAvailable(request, aSourceOffset, (char*)mOutBuffer, + bytesWritten); + if (NS_FAILED(rv)) { + return rv; + } + } + break; + } else if (code == Z_DATA_ERROR) { + // some servers (notably Apache with mod_deflate) don't generate + // zlib headers insert a dummy header and try again + static char dummy_head[2] = { + 0x8 + 0x7 * 0x10, + (((0x8 + 0x7 * 0x10) * 0x100 + 30) / 31 * 31) & 0xFF, + }; + inflateReset(&d_stream); + d_stream.next_in = (Bytef*)dummy_head; + d_stream.avail_in = sizeof(dummy_head); + + code = inflate(&d_stream, Z_NO_FLUSH); + if (code != Z_OK) { + return NS_ERROR_FAILURE; + } + + // stop an endless loop caused by non-deflate data being labelled as + // deflate + if (mDummyStreamInitialised) { + NS_WARNING( + "endless loop detected" + " - invalid deflate"); + return NS_ERROR_INVALID_CONTENT_ENCODING; + } + mDummyStreamInitialised = true; + // reset stream pointers to our original data + d_stream.next_in = mInpBuffer; + d_stream.avail_in = (uInt)streamLen; + } else { + return NS_ERROR_INVALID_CONTENT_ENCODING; + } + } /* for */ + } else { + if (!mStreamInitialized) { + memset(&d_stream, 0, sizeof(d_stream)); + + if (inflateInit2(&d_stream, -MAX_WBITS) != Z_OK) { + return NS_ERROR_FAILURE; + } + + mStreamInitialized = true; + } + + d_stream.next_in = mInpBuffer; + d_stream.avail_in = (uInt)streamLen; + + for (;;) { + d_stream.next_out = mOutBuffer; + d_stream.avail_out = (uInt)mOutBufferLen; + + int code = inflate(&d_stream, Z_NO_FLUSH); + unsigned bytesWritten = (uInt)mOutBufferLen - d_stream.avail_out; + + if (code == Z_STREAM_END) { + if (bytesWritten) { + rv = do_OnDataAvailable(request, aSourceOffset, (char*)mOutBuffer, + bytesWritten); + if (NS_FAILED(rv)) { + return rv; + } + } + + inflateEnd(&d_stream); + mStreamEnded = true; + break; + } + if (code == Z_OK) { + if (bytesWritten) { + rv = do_OnDataAvailable(request, aSourceOffset, (char*)mOutBuffer, + bytesWritten); + if (NS_FAILED(rv)) { + return rv; + } + } + } else if (code == Z_BUF_ERROR) { + if (bytesWritten) { + rv = do_OnDataAvailable(request, aSourceOffset, (char*)mOutBuffer, + bytesWritten); + if (NS_FAILED(rv)) { + return rv; + } + } + break; + } else { + return NS_ERROR_INVALID_CONTENT_ENCODING; + } + } /* for */ + } /* gzip */ + break; + + case HTTP_COMPRESS_BROTLI: { + if (!mBrotli) { + mBrotli = MakeUnique(); + } + + mBrotli->mRequest = request; + mBrotli->mContext = nullptr; + mBrotli->mSourceOffset = aSourceOffset; + + uint32_t countRead; + rv = iStr->ReadSegments(BrotliHandler, this, streamLen, &countRead); + if (NS_SUCCEEDED(rv)) { + rv = mBrotli->mStatus; + } + if (NS_FAILED(rv)) { + return rv; + } + } break; + + default: + nsCOMPtr listener; + { + MutexAutoLock lock(mMutex); + listener = mListener; + } + rv = listener->OnDataAvailable(request, iStr, aSourceOffset, aCount); + if (NS_FAILED(rv)) { + return rv; + } + } /* switch */ + + return NS_OK; +} /* OnDataAvailable */ + +// XXX/ruslan: need to implement this too + +NS_IMETHODIMP +nsHTTPCompressConv::Convert(nsIInputStream* aFromStream, const char* aFromType, + const char* aToType, nsISupports* aCtxt, + nsIInputStream** _retval) { + return NS_ERROR_NOT_IMPLEMENTED; +} + +nsresult nsHTTPCompressConv::do_OnDataAvailable(nsIRequest* request, + uint64_t offset, + const char* buffer, + uint32_t count) { + if (!mStream) { + mStream = do_CreateInstance(NS_STRINGINPUTSTREAM_CONTRACTID); + NS_ENSURE_STATE(mStream); + } + + mStream->ShareData(buffer, count); + + nsCOMPtr listener; + { + MutexAutoLock lock(mMutex); + listener = mListener; + } + nsresult rv = listener->OnDataAvailable(request, mStream, offset, count); + + // Make sure the stream no longer references |buffer| in case our listener + // is crazy enough to try to read from |mStream| after ODA. + mStream->ShareData("", 0); + mDecodedDataLength += count; + + return rv; +} + +#define ASCII_FLAG 0x01 /* bit 0 set: file probably ascii text */ +#define HEAD_CRC 0x02 /* bit 1 set: header CRC present */ +#define EXTRA_FIELD 0x04 /* bit 2 set: extra field present */ +#define ORIG_NAME 0x08 /* bit 3 set: original file name present */ +#define COMMENT 0x10 /* bit 4 set: file comment present */ +#define RESERVED 0xE0 /* bits 5..7: reserved */ + +static unsigned gz_magic[2] = {0x1f, 0x8b}; /* gzip magic header */ + +uint32_t nsHTTPCompressConv::check_header(nsIInputStream* iStr, + uint32_t streamLen, nsresult* rs) { + enum { + GZIP_INIT = 0, + GZIP_OS, + GZIP_EXTRA0, + GZIP_EXTRA1, + GZIP_EXTRA2, + GZIP_ORIG, + GZIP_COMMENT, + GZIP_CRC + }; + char c; + + *rs = NS_OK; + + if (mCheckHeaderDone) { + return streamLen; + } + + while (streamLen) { + switch (hMode) { + case GZIP_INIT: + uint32_t unused; + iStr->Read(&c, 1, &unused); + streamLen--; + + if (mSkipCount == 0 && ((unsigned)c & 0377) != gz_magic[0]) { + *rs = NS_ERROR_INVALID_CONTENT_ENCODING; + return 0; + } + + if (mSkipCount == 1 && ((unsigned)c & 0377) != gz_magic[1]) { + *rs = NS_ERROR_INVALID_CONTENT_ENCODING; + return 0; + } + + if (mSkipCount == 2 && ((unsigned)c & 0377) != Z_DEFLATED) { + *rs = NS_ERROR_INVALID_CONTENT_ENCODING; + return 0; + } + + mSkipCount++; + if (mSkipCount == 4) { + mFlags = (unsigned)c & 0377; + if (mFlags & RESERVED) { + *rs = NS_ERROR_INVALID_CONTENT_ENCODING; + return 0; + } + hMode = GZIP_OS; + mSkipCount = 0; + } + break; + + case GZIP_OS: + iStr->Read(&c, 1, &unused); + streamLen--; + mSkipCount++; + + if (mSkipCount == 6) { + hMode = GZIP_EXTRA0; + } + break; + + case GZIP_EXTRA0: + if (mFlags & EXTRA_FIELD) { + iStr->Read(&c, 1, &unused); + streamLen--; + mLen = (uInt)c & 0377; + hMode = GZIP_EXTRA1; + } else { + hMode = GZIP_ORIG; + } + break; + + case GZIP_EXTRA1: + iStr->Read(&c, 1, &unused); + streamLen--; + mLen |= ((uInt)c & 0377) << 8; + mSkipCount = 0; + hMode = GZIP_EXTRA2; + break; + + case GZIP_EXTRA2: + if (mSkipCount == mLen) { + hMode = GZIP_ORIG; + } else { + iStr->Read(&c, 1, &unused); + streamLen--; + mSkipCount++; + } + break; + + case GZIP_ORIG: + if (mFlags & ORIG_NAME) { + iStr->Read(&c, 1, &unused); + streamLen--; + if (c == 0) hMode = GZIP_COMMENT; + } else { + hMode = GZIP_COMMENT; + } + break; + + case GZIP_COMMENT: + if (mFlags & COMMENT) { + iStr->Read(&c, 1, &unused); + streamLen--; + if (c == 0) { + hMode = GZIP_CRC; + mSkipCount = 0; + } + } else { + hMode = GZIP_CRC; + mSkipCount = 0; + } + break; + + case GZIP_CRC: + if (mFlags & HEAD_CRC) { + iStr->Read(&c, 1, &unused); + streamLen--; + mSkipCount++; + if (mSkipCount == 2) { + mCheckHeaderDone = true; + return streamLen; + } + } else { + mCheckHeaderDone = true; + return streamLen; + } + break; + } + } + return streamLen; +} + +NS_IMETHODIMP +nsHTTPCompressConv::CheckListenerChain() { + nsCOMPtr listener; + { + MutexAutoLock lock(mMutex); + listener = do_QueryInterface(mListener); + } + + if (!listener) { + return NS_ERROR_NO_INTERFACE; + } + + return listener->CheckListenerChain(); +} + +} // namespace net +} // namespace mozilla + +nsresult NS_NewHTTPCompressConv( + mozilla::net::nsHTTPCompressConv** aHTTPCompressConv) { + MOZ_ASSERT(aHTTPCompressConv != nullptr, "null ptr"); + if (!aHTTPCompressConv) { + return NS_ERROR_NULL_POINTER; + } + + RefPtr outVal = + new mozilla::net::nsHTTPCompressConv(); + if (!outVal) { + return NS_ERROR_OUT_OF_MEMORY; + } + outVal.forget(aHTTPCompressConv); + return NS_OK; +} diff --git a/netwerk/streamconv/converters/nsHTTPCompressConv.h b/netwerk/streamconv/converters/nsHTTPCompressConv.h new file mode 100644 index 0000000000..c21cb3ebde --- /dev/null +++ b/netwerk/streamconv/converters/nsHTTPCompressConv.h @@ -0,0 +1,110 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set sw=2 ts=8 et tw=80 : */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#if !defined(__nsHTTPCompressConv__h__) +# define __nsHTTPCompressConv__h__ 1 + +# include "nsIStreamConverter.h" +# include "nsICompressConvStats.h" +# include "nsIThreadRetargetableStreamListener.h" +# include "nsCOMPtr.h" +# include "mozilla/Atomics.h" +# include "mozilla/Mutex.h" + +# include "zlib.h" + +class nsIStringInputStream; + +# define NS_HTTPCOMPRESSCONVERTER_CID \ + { \ + /* 66230b2b-17fa-4bd3-abf4-07986151022d */ \ + 0x66230b2b, 0x17fa, 0x4bd3, { \ + 0xab, 0xf4, 0x07, 0x98, 0x61, 0x51, 0x02, 0x2d \ + } \ + } + +# define HTTP_DEFLATE_TYPE "deflate" +# define HTTP_GZIP_TYPE "gzip" +# define HTTP_X_GZIP_TYPE "x-gzip" +# define HTTP_COMPRESS_TYPE "compress" +# define HTTP_X_COMPRESS_TYPE "x-compress" +# define HTTP_BROTLI_TYPE "br" +# define HTTP_IDENTITY_TYPE "identity" +# define HTTP_UNCOMPRESSED_TYPE "uncompressed" + +namespace mozilla { +namespace net { + +class BrotliWrapper; + +class nsHTTPCompressConv : public nsIStreamConverter, + public nsICompressConvStats, + public nsIThreadRetargetableStreamListener { + public: + // nsISupports methods + NS_DECL_THREADSAFE_ISUPPORTS + NS_DECL_NSIREQUESTOBSERVER + NS_DECL_NSISTREAMLISTENER + NS_DECL_NSICOMPRESSCONVSTATS + NS_DECL_NSITHREADRETARGETABLESTREAMLISTENER + + // nsIStreamConverter methods + NS_DECL_NSISTREAMCONVERTER + + nsHTTPCompressConv(); + + using CompressMode = enum { + HTTP_COMPRESS_GZIP, + HTTP_COMPRESS_DEFLATE, + HTTP_COMPRESS_COMPRESS, + HTTP_COMPRESS_BROTLI, + HTTP_COMPRESS_IDENTITY + }; + + private: + virtual ~nsHTTPCompressConv(); + + nsCOMPtr + mListener; // this guy gets the converted data via his OnDataAvailable () + Atomic mMode{HTTP_COMPRESS_IDENTITY}; + + unsigned char* mOutBuffer{nullptr}; + unsigned char* mInpBuffer{nullptr}; + + uint32_t mOutBufferLen{0}; + uint32_t mInpBufferLen{0}; + + UniquePtr mBrotli; + + nsCOMPtr mStream; + + static nsresult BrotliHandler(nsIInputStream* stream, void* closure, + const char* dataIn, uint32_t, uint32_t avail, + uint32_t* countRead); + + nsresult do_OnDataAvailable(nsIRequest* request, uint64_t aSourceOffset, + const char* buffer, uint32_t aCount); + + bool mCheckHeaderDone{false}; + Atomic mStreamEnded{false}; + bool mStreamInitialized{false}; + bool mDummyStreamInitialised{false}; + bool mFailUncleanStops; + + z_stream d_stream{}; + unsigned mLen{0}, hMode{0}, mSkipCount{0}, mFlags{0}; + + uint32_t check_header(nsIInputStream* iStr, uint32_t streamLen, nsresult* rs); + + Atomic mDecodedDataLength{0}; + + mutable mozilla::Mutex mMutex MOZ_UNANNOTATED{"nsHTTPCompressConv"}; +}; + +} // namespace net +} // namespace mozilla + +#endif diff --git a/netwerk/streamconv/converters/nsICompressConvStats.idl b/netwerk/streamconv/converters/nsICompressConvStats.idl new file mode 100644 index 0000000000..a8837563ed --- /dev/null +++ b/netwerk/streamconv/converters/nsICompressConvStats.idl @@ -0,0 +1,17 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsISupports.idl" + +/** + * nsICompressConvStats + * + * This interface allows for the observation of decoded resource sizes + */ +[builtinclass, scriptable, uuid(58172ad0-46a9-4893-8fde-cd909c10792a)] +interface nsICompressConvStats : nsISupports +{ + readonly attribute uint64_t decodedDataLength; +}; diff --git a/netwerk/streamconv/converters/nsIndexedToHTML.cpp b/netwerk/streamconv/converters/nsIndexedToHTML.cpp new file mode 100644 index 0000000000..00d83f6574 --- /dev/null +++ b/netwerk/streamconv/converters/nsIndexedToHTML.cpp @@ -0,0 +1,826 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsIndexedToHTML.h" + +#include "mozilla/Encoding.h" +#include "mozilla/intl/AppDateTimeFormat.h" +#include "mozilla/intl/LocaleService.h" +#include "nsNetUtil.h" +#include "netCore.h" +#include "nsStringStream.h" +#include "nsIFile.h" +#include "nsIFileURL.h" +#include "nsEscape.h" +#include "nsIDirIndex.h" +#include "nsURLHelper.h" +#include "nsIStringBundle.h" +#include "nsDirIndexParser.h" +#include "nsNativeCharsetUtils.h" +#include "nsString.h" +#include "nsContentUtils.h" +#include +#include "nsIChannel.h" +#include "mozilla/Unused.h" +#include "nsIURIMutator.h" +#include "nsITextToSubURI.h" + +using mozilla::intl::LocaleService; +using namespace mozilla; + +NS_IMPL_ISUPPORTS(nsIndexedToHTML, nsIDirIndexListener, nsIStreamConverter, + nsIRequestObserver, nsIStreamListener) + +static void AppendNonAsciiToNCR(const nsAString& in, nsCString& out) { + nsAString::const_iterator start, end; + + in.BeginReading(start); + in.EndReading(end); + + while (start != end) { + if (*start < 128) { + out.Append(*start++); + } else { + out.AppendLiteral("&#x"); + out.AppendInt(*start++, 16); + out.Append(';'); + } + } +} + +nsresult nsIndexedToHTML::Create(REFNSIID aIID, void** aResult) { + nsresult rv; + + nsIndexedToHTML* _s = new nsIndexedToHTML(); + if (_s == nullptr) return NS_ERROR_OUT_OF_MEMORY; + + rv = _s->QueryInterface(aIID, aResult); + return rv; +} + +nsresult nsIndexedToHTML::Init(nsIStreamListener* aListener) { + nsresult rv = NS_OK; + + mListener = aListener; + + nsCOMPtr sbs = + do_GetService(NS_STRINGBUNDLE_CONTRACTID, &rv); + if (NS_FAILED(rv)) return rv; + rv = sbs->CreateBundle(NECKO_MSGS_URL, getter_AddRefs(mBundle)); + + mExpectAbsLoc = false; + + return rv; +} + +NS_IMETHODIMP +nsIndexedToHTML::Convert(nsIInputStream* aFromStream, const char* aFromType, + const char* aToType, nsISupports* aCtxt, + nsIInputStream** res) { + return NS_ERROR_NOT_IMPLEMENTED; +} + +NS_IMETHODIMP +nsIndexedToHTML::AsyncConvertData(const char* aFromType, const char* aToType, + nsIStreamListener* aListener, + nsISupports* aCtxt) { + return Init(aListener); +} + +NS_IMETHODIMP +nsIndexedToHTML::GetConvertedType(const nsACString& aFromType, + nsIChannel* aChannel, nsACString& aToType) { + return NS_ERROR_NOT_IMPLEMENTED; +} + +NS_IMETHODIMP +nsIndexedToHTML::OnStartRequest(nsIRequest* request) { + nsCString buffer; + nsresult rv = DoOnStartRequest(request, buffer); + if (NS_FAILED(rv)) { + request->Cancel(rv); + } + + rv = mListener->OnStartRequest(request); + if (NS_FAILED(rv)) return rv; + + // The request may have been canceled, and if that happens, we want to + // suppress calls to OnDataAvailable. + request->GetStatus(&rv); + if (NS_FAILED(rv)) return rv; + + // Push our buffer to the listener. + + rv = SendToListener(request, buffer); + return rv; +} + +nsresult nsIndexedToHTML::DoOnStartRequest(nsIRequest* request, + nsCString& aBuffer) { + nsresult rv; + + nsCOMPtr channel = do_QueryInterface(request); + nsCOMPtr uri; + rv = channel->GetOriginalURI(getter_AddRefs(uri)); + if (NS_FAILED(rv)) return rv; + + // We use the original URI for the title and parent link when it's a + // resource:// url, instead of the jar:file:// url it resolves to. + if (!uri->SchemeIs("resource")) { + rv = channel->GetURI(getter_AddRefs(uri)); + if (NS_FAILED(rv)) return rv; + } + + channel->SetContentType("text/html"_ns); + + mParser = nsDirIndexParser::CreateInstance(); + if (!mParser) return NS_ERROR_FAILURE; + + rv = mParser->SetListener(this); + if (NS_FAILED(rv)) return rv; + + rv = mParser->OnStartRequest(request); + if (NS_FAILED(rv)) return rv; + + nsAutoCString baseUri, titleUri; + rv = uri->GetAsciiSpec(baseUri); + if (NS_FAILED(rv)) return rv; + + nsCOMPtr titleURL; + rv = NS_MutateURI(uri).SetQuery(""_ns).SetRef(""_ns).Finalize(titleURL); + if (NS_FAILED(rv)) { + titleURL = uri; + } + + nsCString parentStr; + + nsCString buffer; + buffer.AppendLiteral("\n\n\n"); + + // XXX - should be using the 300: line from the parser. + // We can't guarantee that that comes before any entry, so we'd have to + // buffer, and do other painful stuff. + // I'll deal with this when I make the changes to handle welcome messages + // The .. stuff should also come from the lower level protocols, but that + // would muck up the XUL display + // - bbaetz + + if (uri->SchemeIs("file")) { + nsCOMPtr fileUrl = do_QueryInterface(uri); + nsCOMPtr file; + rv = fileUrl->GetFile(getter_AddRefs(file)); + if (NS_FAILED(rv)) return rv; + + nsAutoCString url; + rv = net_GetURLSpecFromFile(file, url); + if (NS_FAILED(rv)) return rv; + baseUri.Assign(url); + + nsCOMPtr parent; + rv = file->GetParent(getter_AddRefs(parent)); + + if (parent && NS_SUCCEEDED(rv)) { + net_GetURLSpecFromDir(parent, url); + if (NS_FAILED(rv)) return rv; + parentStr.Assign(url); + } + + // Directory index will be always encoded in UTF-8 if this is file url + buffer.AppendLiteral("\n"); + + } else if (uri->SchemeIs("jar")) { + nsAutoCString path; + rv = uri->GetPathQueryRef(path); + if (NS_FAILED(rv)) return rv; + + // a top-level jar directory URL is of the form jar:foo.zip!/ + // path will be of the form foo.zip!/, and its last two characters + // will be "!/" + // XXX this won't work correctly when the name of the directory being + // XXX displayed ends with "!", but then again, jar: URIs don't deal + // XXX particularly well with such directories anyway + if (!StringEndsWith(path, "!/"_ns)) { + rv = uri->Resolve(".."_ns, parentStr); + if (NS_FAILED(rv)) return rv; + } + } else { + // default behavior for other protocols is to assume the channel's + // URL references a directory ending in '/' -- fixup if necessary. + nsAutoCString path; + rv = uri->GetPathQueryRef(path); + if (NS_FAILED(rv)) return rv; + if (baseUri.Last() != '/') { + baseUri.Append('/'); + path.Append('/'); + mozilla::Unused << NS_MutateURI(uri).SetPathQueryRef(path).Finalize(uri); + } + if (!path.EqualsLiteral("/")) { + rv = uri->Resolve(".."_ns, parentStr); + if (NS_FAILED(rv)) return rv; + } + } + + rv = titleURL->GetAsciiSpec(titleUri); + if (NS_FAILED(rv)) { + return rv; + } + + buffer.AppendLiteral( + "\n" + "\n" + "\n"); + + buffer.AppendLiteral(R"( innerUri = NS_GetInnermostURI(uri); + if (!innerUri) return NS_ERROR_UNEXPECTED; + nsCOMPtr fileURL(do_QueryInterface(innerUri)); + // XXX bug 388553: can't use skinnable icons here due to security restrictions + if (fileURL) { + buffer.AppendLiteral( + "" + "AAAAAQCAYAAAAf8%2F9hAAAAGXRFWHRTb2Z0d2FyZQBBZG9i" + "ZSBJbWFnZVJlYWR5ccllPAAAAjFJREFUeNqsU8uOElEQPffR" + "3XQ3ONASdBJCSBxHos5%2B3Bg3rvkCv8PElS78gPkO%2FATj" + "QoUdO2ftrJiRh6aneTb9sOpC4weMN6lcuFV16pxDIfI8x12O" + "YIDhcPiu2Wx%2B%2FHF5CW1Z6Jyegt%2FTNEWSJIjjGFEUIQ" + "xDrFYrWFSzXC4%2FdLvd95pRKpXKy%2BpRFZ7nwaWo1%2BsG" + "nQG2260BKJfLKJVKGI1GEEJw7ateryd0v993W63WEwjgxfn5" + "obGYzgCbzcaEbdsIggDj8Riu6z6iUk9SYZMSx8W0LMsM%2FS" + "KK75xnJlIq80anQXdbEp0OhcPJ0eiaJnGRMEyyPDsAKKUM9c" + "lkYoDo3SZJzzSdp0VSKYmfV1co%2Bz580kw5KDIM8RbRfEnU" + "f1HzxtQyMAGcaGruTKczMzEIaqhKifV6jd%2BzGQQB5llunF" + "%2FM52BizC2K5sYPYvZcu653tjOM9O93wnYc08gmkgg4VAxi" + "xfqFUJT36AYBZGd6PJkFCZnnlBxMp38gqIgLpZB0y4Nph18l" + "yWh5FFbrOSxbl3V4G%2BVB7T4ajYYxTyuLtO%2BCvWGgJE1M" + "c7JNsJEhvgw%2FQV4fo%2F24nbEsX2u1d5sVyn8sJO0ZAQiI" + "YnFh%2BxrfLz%2Fj29cBS%2FO14zg3i8XigW3ZkErDtmKoeM" + "%2BAJGRMnXeEPGKf0nCD1ydvkDzU9Jbc6OpR7WIw6L8lQ%2B" + "4pQ1%2FlPF0RGM9Ns91Wmptk0GfB4EJkt77vXYj%2F8m%2B8" + "y%2FkrwABHbz2H9V68DQAAAABJRU5ErkJggg%3D%3D"); + } else { + buffer.AppendLiteral( + "" + "AAAAAQCAYAAAAf8%2F9hAAAAGXRFWHRTb2Z0d2FyZQBBZG9i" + "ZSBJbWFnZVJlYWR5ccllPAAAAeBJREFUeNqcU81O20AQ%2Ft" + "Z2AgQSYQRqL1UPVG2hAUQkxLEStz4DrXpLpD5Drz31Cajax%" + "2Bghhx6qHIJURBTxIwQRwopCBbZjHMcOTrzermPipsSt1Iw0" + "3p3ZmW%2B%2B2R0TxhgOD34wjCHZlQ0iDYz9yvEfhxMTCYhE" + "QDIZhkxKd2sqzX2TOD2vBQCQhpPefng1ZP2dVPlLLdpL8SEM" + "cxng%2Fbs0RIHhtgs4twxOh%2BHjZxvzDx%2F3GQQiDFISiR" + "BLFMPKTRMollzcWECrDVhtxtdRVsL9youPxGj%2FbdfFlUZh" + "tDyYbYqWRUdai1oQRZ5oHeHl2gNM%2B01Uqio8RlH%2Bnsaz" + "JzNwXcq1B%2BiXPHprlEEymeBfXs1w8XxxihfyuXqoHqpoGj" + "ZM04bddgG%2F9%2B8WGj87qDdsrK9m%2BoA%2BpbhQTDh2l1" + "%2Bi2weNbSHMZyjvNXmVbqh9Fj5Oz27uEoP%2BSTxANruJs9" + "L%2FT6P0ewqPx5nmiAG5f6AoCtN1PbJzuRyJAyDBzzSQYvEr" + "f06yYxhGXlEa8H2KVGoasjwLx3Ewk858opQWXm%2B%2Fib9E" + "QrBzclLLLy89xYvlpchvtixcX6uo1y%2FzsiwHrkIsgKbp%2" + "BYWFOWicuqppoNTnStHzPFCPQhBEBOyGAX4JMADFetubi4BS" + "YAAAAABJRU5ErkJggg%3D%3D"); + } + buffer.AppendLiteral("\">\n"); + + // Everything needs to end in a /, + // otherwise we end up linking to file:///foo/dirfile + + if (!mTextToSubURI) { + mTextToSubURI = do_GetService(NS_ITEXTTOSUBURI_CONTRACTID, &rv); + if (NS_FAILED(rv)) return rv; + } + + nsAutoString unEscapeSpec; + rv = mTextToSubURI->UnEscapeAndConvert("UTF-8"_ns, titleUri, unEscapeSpec); + if (NS_FAILED(rv)) { + return rv; + } + + nsCString htmlEscSpecUtf8; + nsAppendEscapedHTML(NS_ConvertUTF16toUTF8(unEscapeSpec), htmlEscSpecUtf8); + AutoTArray<nsString, 1> formatTitle; + CopyUTF8toUTF16(htmlEscSpecUtf8, *formatTitle.AppendElement()); + + nsAutoString title; + rv = mBundle->FormatStringFromName("DirTitle", formatTitle, title); + if (NS_FAILED(rv)) return rv; + + // we want to convert string bundle to NCR + // to ensure they're shown in any charsets + AppendNonAsciiToNCR(title, buffer); + + buffer.AppendLiteral("\n"); + + // If there is a quote character in the baseUri, then + // lets not add a base URL. The reason for this is that + // if we stick baseUri containing a quote into a quoted + // string, the quote character will prematurely close + // the base href string. This is a fall-back check; + // that's why it is OK to not use a base rather than + // trying to play nice and escaping the quotes. See bug + // 358128. + + if (!baseUri.Contains('"')) { + // Great, the baseUri does not contain a char that + // will prematurely close the string. Go ahead an + // add a base href, but only do so if we're not + // dealing with a resource URI. + if (!uri->SchemeIs("resource")) { + buffer.AppendLiteral("\n"); + } + } else { + NS_ERROR("broken protocol handler didn't escape double-quote."); + } + + nsCString direction("ltr"_ns); + if (LocaleService::GetInstance()->IsAppLocaleRTL()) { + direction.AssignLiteral("rtl"); + } + + buffer.AppendLiteral("\n\n

"); + AppendNonAsciiToNCR(title, buffer); + buffer.AppendLiteral("

\n"); + + if (!parentStr.IsEmpty()) { + nsAutoString parentText; + rv = mBundle->GetStringFromName("DirGoUp", parentText); + if (NS_FAILED(rv)) return rv; + + buffer.AppendLiteral(R"(

"); + AppendNonAsciiToNCR(parentText, buffer); + buffer.AppendLiteral("

\n"); + } + + if (uri->SchemeIs("file")) { + nsAutoString showHiddenText; + rv = mBundle->GetStringFromName("ShowHidden", showHiddenText); + if (NS_FAILED(rv)) return rv; + + buffer.AppendLiteral( + "

\n"); + } + + buffer.AppendLiteral( + "\n" + " \n" + " \n" + " \n" + " \n" + " \n" + " \n" + " \n"); + buffer.AppendLiteral(" \n"); + + aBuffer = buffer; + return rv; +} + +NS_IMETHODIMP +nsIndexedToHTML::OnStopRequest(nsIRequest* request, nsresult aStatus) { + if (NS_SUCCEEDED(aStatus)) { + nsCString buffer; + buffer.AssignLiteral("
"); + + nsAutoString columnText; + rv = mBundle->GetStringFromName("DirColName", columnText); + if (NS_FAILED(rv)) return rv; + AppendNonAsciiToNCR(columnText, buffer); + buffer.AppendLiteral( + ""); + + rv = mBundle->GetStringFromName("DirColSize", columnText); + if (NS_FAILED(rv)) return rv; + AppendNonAsciiToNCR(columnText, buffer); + buffer.AppendLiteral( + ""); + + rv = mBundle->GetStringFromName("DirColMTime", columnText); + if (NS_FAILED(rv)) return rv; + AppendNonAsciiToNCR(columnText, buffer); + buffer.AppendLiteral( + "
\n"); + + aStatus = SendToListener(request, buffer); + } + + mParser->OnStopRequest(request, aStatus); + mParser = nullptr; + + return mListener->OnStopRequest(request, aStatus); +} + +nsresult nsIndexedToHTML::SendToListener(nsIRequest* aRequest, + const nsACString& aBuffer) { + nsCOMPtr inputData; + nsresult rv = NS_NewCStringInputStream(getter_AddRefs(inputData), aBuffer); + NS_ENSURE_SUCCESS(rv, rv); + return mListener->OnDataAvailable(aRequest, inputData, 0, aBuffer.Length()); +} + +NS_IMETHODIMP +nsIndexedToHTML::OnDataAvailable(nsIRequest* aRequest, nsIInputStream* aInput, + uint64_t aOffset, uint32_t aCount) { + return mParser->OnDataAvailable(aRequest, aInput, aOffset, aCount); +} + +static nsresult FormatTime( + const mozilla::intl::DateTimeFormat::StyleBag& aStyleBag, + const PRTime aPrTime, nsAString& aStringOut) { + // FormatPRExplodedTime will use GMT based formatted string (e.g. GMT+1) + // instead of local time zone name (e.g. CEST). + // To avoid this case when ResistFingerprinting is disabled, use + // |FormatPRTime| to show exact time zone name. + if (!nsContentUtils::ShouldResistFingerprinting()) { + return mozilla::intl::AppDateTimeFormat::Format(aStyleBag, aPrTime, + aStringOut); + } + + PRExplodedTime prExplodedTime; + PR_ExplodeTime(aPrTime, PR_GMTParameters, &prExplodedTime); + return mozilla::intl::AppDateTimeFormat::Format(aStyleBag, &prExplodedTime, + aStringOut); +} + +NS_IMETHODIMP +nsIndexedToHTML::OnIndexAvailable(nsIRequest* aRequest, nsIDirIndex* aIndex) { + nsresult rv; + if (!aIndex) return NS_ERROR_NULL_POINTER; + + nsCString pushBuffer; + pushBuffer.AppendLiteral("GetLocation(loc); + + // Adjust the length in case unescaping shortened the string. + loc.Truncate(nsUnescapeCount(loc.BeginWriting())); + + if (loc.IsEmpty()) { + return NS_ERROR_ILLEGAL_VALUE; + } + if (loc.First() == char16_t('.')) { + pushBuffer.AppendLiteral(" class=\"hidden-object\""); + } + + pushBuffer.AppendLiteral(">\n GetType(&type); + switch (type) { + case nsIDirIndex::TYPE_SYMLINK: + pushBuffer.Append('0'); + break; + case nsIDirIndex::TYPE_DIRECTORY: + pushBuffer.Append('1'); + break; + default: + pushBuffer.Append('2'); + break; + } + nsCString escaped; + nsAppendEscapedHTML(loc, escaped); + pushBuffer.Append(escaped); + + pushBuffer.AppendLiteral( + R"(">
"); + + if (type == nsIDirIndex::TYPE_FILE || type == nsIDirIndex::TYPE_UNKNOWN) { + pushBuffer.AppendLiteral("\"");GetStringFromName("DirFileLabel", altText); + if (NS_FAILED(rv)) return rv; + AppendNonAsciiToNCR(altText, pushBuffer); + pushBuffer.AppendLiteral("\">"); + } + + pushBuffer.Append(escaped); + pushBuffer.AppendLiteral("
\n '); + } else { + int64_t size; + aIndex->GetSize(&size); + + if (uint64_t(size) != UINT64_MAX) { + pushBuffer.AppendLiteral(" sortable-data=\""); + pushBuffer.AppendInt(size); + pushBuffer.AppendLiteral("\">"); + nsAutoCString sizeString; + FormatSizeString(size, sizeString); + pushBuffer.Append(sizeString); + } else { + pushBuffer.Append('>'); + } + } + pushBuffer.AppendLiteral("\n GetLastModified(&t); + + if (t == -1LL) { + pushBuffer.AppendLiteral(">\n "); + } else { + pushBuffer.AppendLiteral(" sortable-data=\""); + pushBuffer.AppendInt(static_cast(t)); + pushBuffer.AppendLiteral("\">"); + // Add date string + nsAutoString formatted; + mozilla::intl::DateTimeFormat::StyleBag dateBag; + dateBag.date = Some(mozilla::intl::DateTimeFormat::Style::Short); + FormatTime(dateBag, t, formatted); + AppendNonAsciiToNCR(formatted, pushBuffer); + pushBuffer.AppendLiteral("\n "); + // Add time string + mozilla::intl::DateTimeFormat::StyleBag timeBag; + timeBag.time = Some(mozilla::intl::DateTimeFormat::Style::Long); + FormatTime(timeBag, t, formatted); + // use NCR to show date in any doc charset + AppendNonAsciiToNCR(formatted, pushBuffer); + } + + pushBuffer.AppendLiteral("\n"); + + return SendToListener(aRequest, pushBuffer); +} + +NS_IMETHODIMP +nsIndexedToHTML::OnInformationAvailable(nsIRequest* aRequest, + const nsAString& aInfo) { + nsAutoCString pushBuffer; + nsAutoCString escapedUtf8; + nsAppendEscapedHTML(NS_ConvertUTF16toUTF8(aInfo), escapedUtf8); + pushBuffer.AppendLiteral("\n "); + // escaped is provided in Unicode, so write hex NCRs as necessary + // to prevent the HTML parser from applying a character set. + AppendNonAsciiToNCR(NS_ConvertUTF8toUTF16(escapedUtf8), pushBuffer); + pushBuffer.AppendLiteral( + "\n \n \n \n\n"); + + return SendToListener(aRequest, pushBuffer); +} + +void nsIndexedToHTML::FormatSizeString(int64_t inSize, + nsCString& outSizeString) { + outSizeString.Truncate(); + if (inSize > int64_t(0)) { + // round up to the nearest Kilobyte + int64_t upperSize = (inSize + int64_t(1023)) / int64_t(1024); + outSizeString.AppendInt(upperSize); + outSizeString.AppendLiteral(" KB"); + } +} diff --git a/netwerk/streamconv/converters/nsIndexedToHTML.h b/netwerk/streamconv/converters/nsIndexedToHTML.h new file mode 100644 index 0000000000..614793e434 --- /dev/null +++ b/netwerk/streamconv/converters/nsIndexedToHTML.h @@ -0,0 +1,59 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef ____nsindexedtohtml___h___ +#define ____nsindexedtohtml___h___ + +#include "nsCOMPtr.h" +#include "nsString.h" +#include "nsIStreamConverter.h" +#include "nsIDirIndexListener.h" + +#define NS_NSINDEXEDTOHTMLCONVERTER_CID \ + { \ + 0xcf0f71fd, 0xfafd, 0x4e2b, { \ + 0x9f, 0xdc, 0x13, 0x4d, 0x97, 0x2e, 0x16, 0xe2 \ + } \ + } + +class nsIStringBundle; +class nsITextToSubURI; + +class nsIndexedToHTML : public nsIStreamConverter, public nsIDirIndexListener { + public: + NS_DECL_ISUPPORTS + NS_DECL_NSISTREAMCONVERTER + NS_DECL_NSIREQUESTOBSERVER + NS_DECL_NSISTREAMLISTENER + NS_DECL_NSIDIRINDEXLISTENER + + nsIndexedToHTML() = default; + + nsresult Init(nsIStreamListener* aListener); + + static nsresult Create(REFNSIID aIID, void** aResult); + + protected: + void FormatSizeString(int64_t inSize, nsCString& outSizeString); + nsresult SendToListener(nsIRequest* aRequest, const nsACString& aBuffer); + // Helper to properly implement OnStartRequest + nsresult DoOnStartRequest(nsIRequest* request, nsCString& aBuffer); + + protected: + nsCOMPtr mParser; + nsCOMPtr mListener; // final listener (consumer) + + nsCOMPtr mBundle; + + nsCOMPtr mTextToSubURI; + + private: + // Expecting absolute locations, given by 201 lines. + bool mExpectAbsLoc{false}; + + virtual ~nsIndexedToHTML() = default; +}; + +#endif diff --git a/netwerk/streamconv/converters/nsMultiMixedConv.cpp b/netwerk/streamconv/converters/nsMultiMixedConv.cpp new file mode 100644 index 0000000000..54e57e41b4 --- /dev/null +++ b/netwerk/streamconv/converters/nsMultiMixedConv.cpp @@ -0,0 +1,1042 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsMultiMixedConv.h" +#include "nsIHttpChannel.h" +#include "nsNetCID.h" +#include "nsMimeTypes.h" +#include "nsIStringStream.h" +#include "nsCRT.h" +#include "nsIHttpChannelInternal.h" +#include "nsURLHelper.h" +#include "nsIStreamConverterService.h" +#include +#include "nsContentSecurityManager.h" +#include "nsHttp.h" +#include "nsNetUtil.h" +#include "nsIURI.h" +#include "nsHttpHeaderArray.h" +#include "mozilla/AutoRestore.h" +#include "mozilla/Tokenizer.h" +#include "nsComponentManagerUtils.h" + +using namespace mozilla; + +nsPartChannel::nsPartChannel(nsIChannel* aMultipartChannel, uint32_t aPartID, + bool aIsFirstPart, nsIStreamListener* aListener) + : mMultipartChannel(aMultipartChannel), + mListener(aListener), + mPartID(aPartID), + mIsFirstPart(aIsFirstPart) { + // Inherit the load flags from the original channel... + mMultipartChannel->GetLoadFlags(&mLoadFlags); + + mMultipartChannel->GetLoadGroup(getter_AddRefs(mLoadGroup)); +} + +void nsPartChannel::InitializeByteRange(int64_t aStart, int64_t aEnd) { + mIsByteRangeRequest = true; + + mByteRangeStart = aStart; + mByteRangeEnd = aEnd; +} + +nsresult nsPartChannel::SendOnStartRequest(nsISupports* aContext) { + return mListener->OnStartRequest(this); +} + +nsresult nsPartChannel::SendOnDataAvailable(nsISupports* aContext, + nsIInputStream* aStream, + uint64_t aOffset, uint32_t aLen) { + return mListener->OnDataAvailable(this, aStream, aOffset, aLen); +} + +nsresult nsPartChannel::SendOnStopRequest(nsISupports* aContext, + nsresult aStatus) { + // Drop the listener + nsCOMPtr listener; + listener.swap(mListener); + return listener->OnStopRequest(this, aStatus); +} + +void nsPartChannel::SetContentDisposition( + const nsACString& aContentDispositionHeader) { + mContentDispositionHeader = aContentDispositionHeader; + nsCOMPtr uri; + GetURI(getter_AddRefs(uri)); + NS_GetFilenameFromDisposition(mContentDispositionFilename, + mContentDispositionHeader); + mContentDisposition = + NS_GetContentDispositionFromHeader(mContentDispositionHeader, this); +} + +// +// nsISupports implementation... +// + +NS_IMPL_ADDREF(nsPartChannel) +NS_IMPL_RELEASE(nsPartChannel) + +NS_INTERFACE_MAP_BEGIN(nsPartChannel) + NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports, nsIChannel) + NS_INTERFACE_MAP_ENTRY(nsIRequest) + NS_INTERFACE_MAP_ENTRY(nsIChannel) + NS_INTERFACE_MAP_ENTRY(nsIByteRangeRequest) + NS_INTERFACE_MAP_ENTRY(nsIMultiPartChannel) +NS_INTERFACE_MAP_END + +// +// nsIRequest implementation... +// + +NS_IMETHODIMP +nsPartChannel::GetName(nsACString& aResult) { + return mMultipartChannel->GetName(aResult); +} + +NS_IMETHODIMP +nsPartChannel::IsPending(bool* aResult) { + // For now, consider the active lifetime of each part the same as + // the underlying multipart channel... This is not exactly right, + // but it is good enough :-) + return mMultipartChannel->IsPending(aResult); +} + +NS_IMETHODIMP +nsPartChannel::GetStatus(nsresult* aResult) { + nsresult rv = NS_OK; + + if (NS_FAILED(mStatus)) { + *aResult = mStatus; + } else { + rv = mMultipartChannel->GetStatus(aResult); + } + + return rv; +} + +NS_IMETHODIMP nsPartChannel::SetCanceledReason(const nsACString& aReason) { + return SetCanceledReasonImpl(aReason); +} + +NS_IMETHODIMP nsPartChannel::GetCanceledReason(nsACString& aReason) { + return GetCanceledReasonImpl(aReason); +} + +NS_IMETHODIMP nsPartChannel::CancelWithReason(nsresult aStatus, + const nsACString& aReason) { + return CancelWithReasonImpl(aStatus, aReason); +} + +NS_IMETHODIMP +nsPartChannel::Cancel(nsresult aStatus) { + // Cancelling an individual part must not cancel the underlying + // multipart channel... + // XXX but we should stop sending data for _this_ part channel! + mStatus = aStatus; + return NS_OK; +} + +NS_IMETHODIMP +nsPartChannel::GetCanceled(bool* aCanceled) { + *aCanceled = NS_FAILED(mStatus); + return NS_OK; +} + +NS_IMETHODIMP +nsPartChannel::Suspend(void) { + // Suspending an individual part must not suspend the underlying + // multipart channel... + // XXX why not? + return NS_OK; +} + +NS_IMETHODIMP +nsPartChannel::Resume(void) { + // Resuming an individual part must not resume the underlying + // multipart channel... + // XXX why not? + return NS_OK; +} + +// +// nsIChannel implementation +// + +NS_IMETHODIMP +nsPartChannel::GetOriginalURI(nsIURI** aURI) { + return mMultipartChannel->GetOriginalURI(aURI); +} + +NS_IMETHODIMP +nsPartChannel::SetOriginalURI(nsIURI* aURI) { + return mMultipartChannel->SetOriginalURI(aURI); +} + +NS_IMETHODIMP +nsPartChannel::GetURI(nsIURI** aURI) { return mMultipartChannel->GetURI(aURI); } + +NS_IMETHODIMP +nsPartChannel::Open(nsIInputStream** aStream) { + nsCOMPtr listener; + nsresult rv = + nsContentSecurityManager::doContentSecurityCheck(this, listener); + NS_ENSURE_SUCCESS(rv, rv); + + // This channel cannot be opened! + return NS_ERROR_FAILURE; +} + +NS_IMETHODIMP +nsPartChannel::AsyncOpen(nsIStreamListener* aListener) { + nsCOMPtr listener = aListener; + nsresult rv = + nsContentSecurityManager::doContentSecurityCheck(this, listener); + NS_ENSURE_SUCCESS(rv, rv); + + // This channel cannot be opened! + return NS_ERROR_FAILURE; +} + +NS_IMETHODIMP +nsPartChannel::GetLoadFlags(nsLoadFlags* aLoadFlags) { + *aLoadFlags = mLoadFlags; + return NS_OK; +} + +NS_IMETHODIMP +nsPartChannel::SetLoadFlags(nsLoadFlags aLoadFlags) { + mLoadFlags = aLoadFlags; + return NS_OK; +} + +NS_IMETHODIMP +nsPartChannel::GetTRRMode(nsIRequest::TRRMode* aTRRMode) { + return GetTRRModeImpl(aTRRMode); +} + +NS_IMETHODIMP +nsPartChannel::SetTRRMode(nsIRequest::TRRMode aTRRMode) { + return SetTRRModeImpl(aTRRMode); +} + +NS_IMETHODIMP +nsPartChannel::GetIsDocument(bool* aIsDocument) { + return NS_GetIsDocumentChannel(this, aIsDocument); +} + +NS_IMETHODIMP +nsPartChannel::GetLoadGroup(nsILoadGroup** aLoadGroup) { + *aLoadGroup = do_AddRef(mLoadGroup).take(); + return NS_OK; +} + +NS_IMETHODIMP +nsPartChannel::SetLoadGroup(nsILoadGroup* aLoadGroup) { + mLoadGroup = aLoadGroup; + + return NS_OK; +} + +NS_IMETHODIMP +nsPartChannel::GetOwner(nsISupports** aOwner) { + return mMultipartChannel->GetOwner(aOwner); +} + +NS_IMETHODIMP +nsPartChannel::SetOwner(nsISupports* aOwner) { + return mMultipartChannel->SetOwner(aOwner); +} + +NS_IMETHODIMP +nsPartChannel::GetLoadInfo(nsILoadInfo** aLoadInfo) { + return mMultipartChannel->GetLoadInfo(aLoadInfo); +} + +NS_IMETHODIMP +nsPartChannel::SetLoadInfo(nsILoadInfo* aLoadInfo) { + MOZ_RELEASE_ASSERT(aLoadInfo, "loadinfo can't be null"); + return mMultipartChannel->SetLoadInfo(aLoadInfo); +} + +NS_IMETHODIMP +nsPartChannel::GetNotificationCallbacks(nsIInterfaceRequestor** aCallbacks) { + return mMultipartChannel->GetNotificationCallbacks(aCallbacks); +} + +NS_IMETHODIMP +nsPartChannel::SetNotificationCallbacks(nsIInterfaceRequestor* aCallbacks) { + return mMultipartChannel->SetNotificationCallbacks(aCallbacks); +} + +NS_IMETHODIMP +nsPartChannel::GetSecurityInfo(nsITransportSecurityInfo** aSecurityInfo) { + return mMultipartChannel->GetSecurityInfo(aSecurityInfo); +} + +NS_IMETHODIMP +nsPartChannel::GetContentType(nsACString& aContentType) { + aContentType = mContentType; + return NS_OK; +} + +NS_IMETHODIMP +nsPartChannel::SetContentType(const nsACString& aContentType) { + bool dummy; + net_ParseContentType(aContentType, mContentType, mContentCharset, &dummy); + return NS_OK; +} + +NS_IMETHODIMP +nsPartChannel::GetContentCharset(nsACString& aContentCharset) { + aContentCharset = mContentCharset; + return NS_OK; +} + +NS_IMETHODIMP +nsPartChannel::SetContentCharset(const nsACString& aContentCharset) { + mContentCharset = aContentCharset; + return NS_OK; +} + +NS_IMETHODIMP +nsPartChannel::GetContentLength(int64_t* aContentLength) { + *aContentLength = mContentLength; + return NS_OK; +} + +NS_IMETHODIMP +nsPartChannel::SetContentLength(int64_t aContentLength) { + mContentLength = aContentLength; + return NS_OK; +} + +NS_IMETHODIMP +nsPartChannel::GetContentDisposition(uint32_t* aContentDisposition) { + if (mContentDispositionHeader.IsEmpty()) return NS_ERROR_NOT_AVAILABLE; + + *aContentDisposition = mContentDisposition; + return NS_OK; +} + +NS_IMETHODIMP +nsPartChannel::SetContentDisposition(uint32_t aContentDisposition) { + return NS_ERROR_NOT_AVAILABLE; +} + +NS_IMETHODIMP +nsPartChannel::GetContentDispositionFilename( + nsAString& aContentDispositionFilename) { + if (mContentDispositionFilename.IsEmpty()) return NS_ERROR_NOT_AVAILABLE; + + aContentDispositionFilename = mContentDispositionFilename; + return NS_OK; +} + +NS_IMETHODIMP +nsPartChannel::SetContentDispositionFilename( + const nsAString& aContentDispositionFilename) { + return NS_ERROR_NOT_AVAILABLE; +} + +NS_IMETHODIMP +nsPartChannel::GetContentDispositionHeader( + nsACString& aContentDispositionHeader) { + if (mContentDispositionHeader.IsEmpty()) return NS_ERROR_NOT_AVAILABLE; + + aContentDispositionHeader = mContentDispositionHeader; + return NS_OK; +} + +NS_IMETHODIMP +nsPartChannel::GetPartID(uint32_t* aPartID) { + *aPartID = mPartID; + return NS_OK; +} + +NS_IMETHODIMP +nsPartChannel::GetIsFirstPart(bool* aIsFirstPart) { + *aIsFirstPart = mIsFirstPart; + return NS_OK; +} + +NS_IMETHODIMP +nsPartChannel::GetIsLastPart(bool* aIsLastPart) { + *aIsLastPart = mIsLastPart; + return NS_OK; +} + +// +// nsIByteRangeRequest implementation... +// + +NS_IMETHODIMP +nsPartChannel::GetIsByteRangeRequest(bool* aIsByteRangeRequest) { + *aIsByteRangeRequest = mIsByteRangeRequest; + + return NS_OK; +} + +NS_IMETHODIMP +nsPartChannel::GetStartRange(int64_t* aStartRange) { + *aStartRange = mByteRangeStart; + + return NS_OK; +} + +NS_IMETHODIMP +nsPartChannel::GetEndRange(int64_t* aEndRange) { + *aEndRange = mByteRangeEnd; + return NS_OK; +} + +NS_IMETHODIMP +nsPartChannel::GetBaseChannel(nsIChannel** aReturn) { + NS_ENSURE_ARG_POINTER(aReturn); + + *aReturn = do_AddRef(mMultipartChannel).take(); + return NS_OK; +} + +// nsISupports implementation +NS_IMPL_ISUPPORTS(nsMultiMixedConv, nsIStreamConverter, nsIStreamListener, + nsIRequestObserver) + +// nsIStreamConverter implementation + +// No syncronous conversion at this time. +NS_IMETHODIMP +nsMultiMixedConv::Convert(nsIInputStream* aFromStream, const char* aFromType, + const char* aToType, nsISupports* aCtxt, + nsIInputStream** _retval) { + return NS_ERROR_NOT_IMPLEMENTED; +} + +// Stream converter service calls this to initialize the actual stream converter +// (us). +NS_IMETHODIMP +nsMultiMixedConv::AsyncConvertData(const char* aFromType, const char* aToType, + nsIStreamListener* aListener, + nsISupports* aCtxt) { + NS_ASSERTION(aListener && aFromType && aToType, + "null pointer passed into multi mixed converter"); + + // hook up our final listener. this guy gets the various On*() calls we want + // to throw at him. + // + // WARNING: this listener must be able to handle multiple OnStartRequest, + // OnDataAvail() and OnStopRequest() call combinations. We call of series + // of these for each sub-part in the raw stream. + mFinalListener = aListener; + + return NS_OK; +} + +NS_IMETHODIMP +nsMultiMixedConv::GetConvertedType(const nsACString& aFromType, + nsIChannel* aChannel, nsACString& aToType) { + return NS_ERROR_NOT_IMPLEMENTED; +} + +// nsIRequestObserver implementation +NS_IMETHODIMP +nsMultiMixedConv::OnStartRequest(nsIRequest* request) { + // we're assuming the content-type is available at this stage + NS_ASSERTION(mBoundary.IsEmpty(), "a second on start???"); + + nsresult rv; + + mTotalSent = 0; + mChannel = do_QueryInterface(request, &rv); + if (NS_FAILED(rv)) return rv; + + nsAutoCString contentType; + + // ask the HTTP channel for the content-type and extract the boundary from it. + nsCOMPtr httpChannel = do_QueryInterface(mChannel, &rv); + if (NS_SUCCEEDED(rv)) { + rv = httpChannel->GetResponseHeader("content-type"_ns, contentType); + if (NS_FAILED(rv)) { + return rv; + } + nsCString csp; + rv = httpChannel->GetResponseHeader("content-security-policy"_ns, csp); + if (NS_SUCCEEDED(rv)) { + mRootContentSecurityPolicy = csp; + } + } else { + // try asking the channel directly + rv = mChannel->GetContentType(contentType); + if (NS_FAILED(rv)) { + return NS_ERROR_FAILURE; + } + } + + Tokenizer p(contentType); + p.SkipUntil(Token::Char(';')); + if (!p.CheckChar(';')) { + return NS_ERROR_CORRUPTED_CONTENT; + } + p.SkipWhites(); + if (!p.CheckWord("boundary")) { + return NS_ERROR_CORRUPTED_CONTENT; + } + p.SkipWhites(); + if (!p.CheckChar('=')) { + return NS_ERROR_CORRUPTED_CONTENT; + } + p.SkipWhites(); + Unused << p.ReadUntil(Token::Char(';'), mBoundary); + mBoundary.Trim( + " \""); // ignoring potential quoted string formatting violations + if (mBoundary.IsEmpty()) { + return NS_ERROR_CORRUPTED_CONTENT; + } + + mHeaderTokens[HEADER_CONTENT_TYPE] = mTokenizer.AddCustomToken( + "content-type", mTokenizer.CASE_INSENSITIVE, false); + mHeaderTokens[HEADER_CONTENT_LENGTH] = mTokenizer.AddCustomToken( + "content-length", mTokenizer.CASE_INSENSITIVE, false); + mHeaderTokens[HEADER_CONTENT_DISPOSITION] = mTokenizer.AddCustomToken( + "content-disposition", mTokenizer.CASE_INSENSITIVE, false); + mHeaderTokens[HEADER_SET_COOKIE] = mTokenizer.AddCustomToken( + "set-cookie", mTokenizer.CASE_INSENSITIVE, false); + mHeaderTokens[HEADER_CONTENT_RANGE] = mTokenizer.AddCustomToken( + "content-range", mTokenizer.CASE_INSENSITIVE, false); + mHeaderTokens[HEADER_RANGE] = + mTokenizer.AddCustomToken("range", mTokenizer.CASE_INSENSITIVE, false); + mHeaderTokens[HEADER_CONTENT_SECURITY_POLICY] = mTokenizer.AddCustomToken( + "content-security-policy", mTokenizer.CASE_INSENSITIVE, false); + + mLFToken = mTokenizer.AddCustomToken("\n", mTokenizer.CASE_SENSITIVE, false); + mCRLFToken = + mTokenizer.AddCustomToken("\r\n", mTokenizer.CASE_SENSITIVE, false); + + SwitchToControlParsing(); + + mBoundaryToken = + mTokenizer.AddCustomToken(mBoundary, mTokenizer.CASE_SENSITIVE); + mBoundaryTokenWithDashes = + mTokenizer.AddCustomToken("--"_ns + mBoundary, mTokenizer.CASE_SENSITIVE); + + return NS_OK; +} + +// nsIStreamListener implementation +NS_IMETHODIMP +nsMultiMixedConv::OnDataAvailable(nsIRequest* request, nsIInputStream* inStr, + uint64_t sourceOffset, uint32_t count) { + // Failing these assertions may indicate that some of the target listeners of + // this converter is looping the thead queue, which is harmful to how we + // collect the raw (content) data. + MOZ_DIAGNOSTIC_ASSERT(!mInOnDataAvailable, + "nsMultiMixedConv::OnDataAvailable reentered!"); + MOZ_DIAGNOSTIC_ASSERT( + !mRawData, "There are unsent data from the previous tokenizer feed!"); + + if (mInOnDataAvailable) { + // The multipart logic is incapable of being reentered. + return NS_ERROR_UNEXPECTED; + } + + mozilla::AutoRestore restore(mInOnDataAvailable); + mInOnDataAvailable = true; + + nsresult rv_feed = mTokenizer.FeedInput(inStr, count); + // We must do this every time. Regardless if something has failed during the + // parsing process. Otherwise the raw data reference would not be thrown + // away. + nsresult rv_send = SendData(); + + return NS_FAILED(rv_send) ? rv_send : rv_feed; +} + +NS_IMETHODIMP +nsMultiMixedConv::OnStopRequest(nsIRequest* request, nsresult aStatus) { + nsresult rv; + + if (mPartChannel) { + mPartChannel->SetIsLastPart(); + + MOZ_DIAGNOSTIC_ASSERT( + !mRawData, "There are unsent data from the previous tokenizer feed!"); + + rv = mTokenizer.FinishInput(); + if (NS_SUCCEEDED(aStatus)) { + aStatus = rv; + } + rv = SendData(); + if (NS_SUCCEEDED(aStatus)) { + aStatus = rv; + } + + (void)SendStop(aStatus); + } else if (NS_FAILED(aStatus) && !mRequestListenerNotified) { + // underlying data production problem. we should not be in + // the middle of sending data. if we were, mPartChannel, + // above, would have been non-null. + + (void)mFinalListener->OnStartRequest(request); + (void)mFinalListener->OnStopRequest(request, aStatus); + } + + nsCOMPtr multiListener = + do_QueryInterface(mFinalListener); + if (multiListener) { + multiListener->OnAfterLastPart(aStatus); + } + + return NS_OK; +} + +nsresult nsMultiMixedConv::ConsumeToken(Token const& token) { + nsresult rv; + + switch (mParserState) { + case PREAMBLE: + if (token.Equals(mBoundaryTokenWithDashes)) { + // The server first used boundary '--boundary'. Hence, we no longer + // accept plain 'boundary' token as a delimiter. + mTokenizer.RemoveCustomToken(mBoundaryToken); + mParserState = BOUNDARY_CRLF; + break; + } + if (token.Equals(mBoundaryToken)) { + // And here the opposite from the just above block... + mTokenizer.RemoveCustomToken(mBoundaryTokenWithDashes); + mParserState = BOUNDARY_CRLF; + break; + } + + // This is a preamble, just ignore it and wait for the boundary. + break; + + case BOUNDARY_CRLF: + if (token.Equals(Token::NewLine())) { + mParserState = HEADER_NAME; + mResponseHeader = HEADER_UNKNOWN; + HeadersToDefault(); + SetHeaderTokensEnabled(true); + break; + } + return NS_ERROR_CORRUPTED_CONTENT; + + case HEADER_NAME: + SetHeaderTokensEnabled(false); + if (token.Equals(Token::NewLine())) { + mParserState = BODY_INIT; + SwitchToBodyParsing(); + break; + } + for (uint32_t h = HEADER_CONTENT_TYPE; h < HEADER_UNKNOWN; ++h) { + if (token.Equals(mHeaderTokens[h])) { + mResponseHeader = static_cast(h); + break; + } + } + mParserState = HEADER_SEP; + break; + + case HEADER_SEP: + if (token.Equals(Token::Char(':'))) { + mParserState = HEADER_VALUE; + mResponseHeaderValue.Truncate(); + break; + } + if (mResponseHeader == HEADER_UNKNOWN) { + // If the header is not of any we understand, just pass everything till + // ':' + break; + } + if (token.Equals(Token::Whitespace())) { + // Accept only header-name traling whitespaces after known headers + break; + } + return NS_ERROR_CORRUPTED_CONTENT; + + case HEADER_VALUE: + if (token.Equals(Token::Whitespace()) && mResponseHeaderValue.IsEmpty()) { + // Eat leading whitespaces + break; + } + if (token.Equals(Token::NewLine())) { + nsresult rv = ProcessHeader(); + if (NS_FAILED(rv)) { + return rv; + } + mParserState = HEADER_NAME; + mResponseHeader = HEADER_UNKNOWN; + SetHeaderTokensEnabled(true); + } else { + mResponseHeaderValue.Append(token.Fragment()); + } + break; + + case BODY_INIT: + rv = SendStart(); + if (NS_FAILED(rv)) { + return rv; + } + mParserState = BODY; + [[fallthrough]]; + + case BODY: { + if (!token.Equals(mLFToken) && !token.Equals(mCRLFToken)) { + if (token.Equals(mBoundaryTokenWithDashes) || + token.Equals(mBoundaryToken)) { + // Allow CRLF to NOT be part of the boundary as well + SwitchToControlParsing(); + mParserState = TRAIL_DASH1; + break; + } + AccumulateData(token); + break; + } + + // After CRLF we must explicitly check for boundary. If found, + // that CRLF is part of the boundary and must not be send to the + // data listener. + Token token2; + if (!mTokenizer.Next(token2)) { + // Note: this will give us the CRLF token again when more data + // or OnStopRequest arrive. I.e. we will enter BODY case in + // the very same state as we are now and start this block over. + mTokenizer.NeedMoreInput(); + break; + } + if (token2.Equals(mBoundaryTokenWithDashes) || + token2.Equals(mBoundaryToken)) { + SwitchToControlParsing(); + mParserState = TRAIL_DASH1; + break; + } + + AccumulateData(token); + AccumulateData(token2); + break; + } + + case TRAIL_DASH1: + if (token.Equals(Token::NewLine())) { + rv = SendStop(NS_OK); + if (NS_FAILED(rv)) { + return rv; + } + mParserState = BOUNDARY_CRLF; + mTokenizer.Rollback(); + break; + } + if (token.Equals(Token::Char('-'))) { + mParserState = TRAIL_DASH2; + break; + } + return NS_ERROR_CORRUPTED_CONTENT; + + case TRAIL_DASH2: + if (token.Equals(Token::Char('-'))) { + mPartChannel->SetIsLastPart(); + // SendStop calls SendData first. + rv = SendStop(NS_OK); + if (NS_FAILED(rv)) { + return rv; + } + mParserState = EPILOGUE; + break; + } + return NS_ERROR_CORRUPTED_CONTENT; + + case EPILOGUE: + // Just ignore + break; + + default: + MOZ_ASSERT(false, "Missing parser state handling branch"); + break; + } // switch + + return NS_OK; +} + +void nsMultiMixedConv::SetHeaderTokensEnabled(bool aEnable) { + for (uint32_t h = HEADER_FIRST; h < HEADER_UNKNOWN; ++h) { + mTokenizer.EnableCustomToken(mHeaderTokens[h], aEnable); + } +} + +void nsMultiMixedConv::SwitchToBodyParsing() { + mTokenizer.SetTokenizingMode(Tokenizer::Mode::CUSTOM_ONLY); + mTokenizer.EnableCustomToken(mLFToken, true); + mTokenizer.EnableCustomToken(mCRLFToken, true); + mTokenizer.EnableCustomToken(mBoundaryTokenWithDashes, true); + mTokenizer.EnableCustomToken(mBoundaryToken, true); +} + +void nsMultiMixedConv::SwitchToControlParsing() { + mTokenizer.SetTokenizingMode(Tokenizer::Mode::FULL); + mTokenizer.EnableCustomToken(mLFToken, false); + mTokenizer.EnableCustomToken(mCRLFToken, false); + mTokenizer.EnableCustomToken(mBoundaryTokenWithDashes, false); + mTokenizer.EnableCustomToken(mBoundaryToken, false); +} + +// nsMultiMixedConv methods +nsMultiMixedConv::nsMultiMixedConv() + // XXX: This is a hack to bypass the raw pointer to refcounted object in + // lambda analysis. It should be removed and replaced when the + // IncrementalTokenizer API is improved to avoid the need for such + // workarounds. + // + // This is safe because `mTokenizer` will not outlive `this`, meaning + // that this std::bind object will be destroyed before `this` dies. + : mTokenizer(std::bind(&nsMultiMixedConv::ConsumeToken, this, + std::placeholders::_1)) {} + +nsresult nsMultiMixedConv::SendStart() { + nsresult rv = NS_OK; + + nsCOMPtr partListener(mFinalListener); + if (mContentType.IsEmpty()) { + mContentType.AssignLiteral(UNKNOWN_CONTENT_TYPE); + nsCOMPtr serv = + do_GetService(NS_STREAMCONVERTERSERVICE_CONTRACTID, &rv); + if (NS_SUCCEEDED(rv)) { + nsCOMPtr converter; + rv = serv->AsyncConvertData(UNKNOWN_CONTENT_TYPE, "*/*", mFinalListener, + mContext, getter_AddRefs(converter)); + if (NS_SUCCEEDED(rv)) { + partListener = converter; + } + } + } + + // if we already have an mPartChannel, that means we never sent a Stop() + // before starting up another "part." that would be bad. + MOZ_ASSERT(!mPartChannel, "tisk tisk, shouldn't be overwriting a channel"); + + nsPartChannel* newChannel; + newChannel = new nsPartChannel(mChannel, mCurrentPartID, mCurrentPartID == 0, + partListener); + + ++mCurrentPartID; + + if (mIsByteRangeRequest) { + newChannel->InitializeByteRange(mByteRangeStart, mByteRangeEnd); + } + + mTotalSent = 0; + + // Set up the new part channel... + mPartChannel = newChannel; + + rv = mPartChannel->SetContentType(mContentType); + if (NS_FAILED(rv)) return rv; + + rv = mPartChannel->SetContentLength(mContentLength); + if (NS_FAILED(rv)) return rv; + + mPartChannel->SetContentDisposition(mContentDisposition); + + // Each part of a multipart/replace response can be used + // for the top level document. We must inform upper layers + // about this by setting the LOAD_REPLACE flag so that certain + // state assertions are evaluated as positive. + nsLoadFlags loadFlags = 0; + mPartChannel->GetLoadFlags(&loadFlags); + loadFlags |= nsIChannel::LOAD_REPLACE; + mPartChannel->SetLoadFlags(loadFlags); + + nsCOMPtr loadGroup; + (void)mPartChannel->GetLoadGroup(getter_AddRefs(loadGroup)); + + // Add the new channel to the load group (if any) + if (loadGroup) { + rv = loadGroup->AddRequest(mPartChannel, nullptr); + if (NS_FAILED(rv)) return rv; + } + + // This prevents artificial call to OnStart/StopRequest when the root + // channel fails. Since now it's ensured to keep with the nsIStreamListener + // contract every time. + mRequestListenerNotified = true; + + // Let's start off the load. NOTE: we don't forward on the channel passed + // into our OnDataAvailable() as it's the root channel for the raw stream. + return mPartChannel->SendOnStartRequest(mContext); +} + +nsresult nsMultiMixedConv::SendStop(nsresult aStatus) { + // Make sure we send out all accumulcated data prior call to OnStopRequest. + // If there is no data, this is a no-op. + nsresult rv = SendData(); + if (NS_SUCCEEDED(aStatus)) { + aStatus = rv; + } + if (mPartChannel) { + rv = mPartChannel->SendOnStopRequest(mContext, aStatus); + // don't check for failure here, we need to remove the channel from + // the loadgroup. + + // Remove the channel from its load group (if any) + nsCOMPtr loadGroup; + (void)mPartChannel->GetLoadGroup(getter_AddRefs(loadGroup)); + if (loadGroup) { + (void)loadGroup->RemoveRequest(mPartChannel, mContext, aStatus); + } + } + + mPartChannel = nullptr; + return rv; +} + +void nsMultiMixedConv::AccumulateData(Token const& aToken) { + if (!mRawData) { + // This is the first read of raw data during this FeedInput loop + // of the incremental tokenizer. All 'raw' tokens are coming from + // the same linear buffer, hence begining of this loop raw data + // is begining of the first raw token. Length of this loop raw + // data is just sum of all 'raw' tokens we collect during this loop. + // + // It's ensured we flush (send to to the listener via OnDataAvailable) + // and nullify the collected raw data right after FeedInput call. + // Hence, the reference can't outlive the actual buffer. + mRawData = aToken.Fragment().BeginReading(); + mRawDataLength = 0; + } + + mRawDataLength += aToken.Fragment().Length(); +} + +nsresult nsMultiMixedConv::SendData() { + nsresult rv; + + if (!mRawData) { + return NS_OK; + } + + nsACString::const_char_iterator rawData = mRawData; + mRawData = nullptr; + + if (!mPartChannel) { + return NS_ERROR_FAILURE; // something went wrong w/ processing + } + + if (mContentLength != UINT64_MAX) { + // make sure that we don't send more than the mContentLength + // XXX why? perhaps the Content-Length header was actually wrong!! + if ((uint64_t(mRawDataLength) + mTotalSent) > mContentLength) { + mRawDataLength = static_cast(mContentLength - mTotalSent); + } + + if (mRawDataLength == 0) return NS_OK; + } + + uint64_t offset = mTotalSent; + mTotalSent += mRawDataLength; + + nsCOMPtr ss( + do_CreateInstance("@mozilla.org/io/string-input-stream;1", &rv)); + if (NS_FAILED(rv)) return rv; + + rv = ss->ShareData(rawData, mRawDataLength); + mRawData = nullptr; + if (NS_FAILED(rv)) return rv; + + return mPartChannel->SendOnDataAvailable(mContext, ss, offset, + mRawDataLength); +} + +void nsMultiMixedConv::HeadersToDefault() { + mContentLength = UINT64_MAX; + mContentType.Truncate(); + mContentDisposition.Truncate(); + mContentSecurityPolicy.Truncate(); + mIsByteRangeRequest = false; +} + +nsresult nsMultiMixedConv::ProcessHeader() { + mozilla::Tokenizer p(mResponseHeaderValue); + + switch (mResponseHeader) { + case HEADER_CONTENT_TYPE: + mContentType = mResponseHeaderValue; + mContentType.CompressWhitespace(); + break; + case HEADER_CONTENT_LENGTH: + p.SkipWhites(); + if (!p.ReadInteger(&mContentLength)) { + return NS_ERROR_CORRUPTED_CONTENT; + } + break; + case HEADER_CONTENT_DISPOSITION: + mContentDisposition = mResponseHeaderValue; + mContentDisposition.CompressWhitespace(); + break; + case HEADER_SET_COOKIE: { + nsCOMPtr httpInternal = + do_QueryInterface(mChannel); + mResponseHeaderValue.CompressWhitespace(); + if (httpInternal) { + DebugOnly rv = httpInternal->SetCookie(mResponseHeaderValue); + MOZ_ASSERT(NS_SUCCEEDED(rv)); + } + break; + } + case HEADER_RANGE: + case HEADER_CONTENT_RANGE: { + if (!p.CheckWord("bytes") || !p.CheckWhite()) { + return NS_ERROR_CORRUPTED_CONTENT; + } + p.SkipWhites(); + if (p.CheckChar('*')) { + mByteRangeStart = mByteRangeEnd = 0; + } else if (!p.ReadInteger(&mByteRangeStart) || !p.CheckChar('-') || + !p.ReadInteger(&mByteRangeEnd)) { + return NS_ERROR_CORRUPTED_CONTENT; + } + mIsByteRangeRequest = true; + if (mContentLength == UINT64_MAX) { + mContentLength = uint64_t(mByteRangeEnd - mByteRangeStart + 1); + } + break; + } + case HEADER_CONTENT_SECURITY_POLICY: { + mContentSecurityPolicy = mResponseHeaderValue; + mContentSecurityPolicy.CompressWhitespace(); + nsCOMPtr httpChannel = do_QueryInterface(mChannel); + if (httpChannel) { + nsCString resultCSP = mRootContentSecurityPolicy; + if (!mContentSecurityPolicy.IsEmpty()) { + // We are updating the root channel CSP header respectively for + // each part as: CSP-root + CSP-partN, where N is the part number. + // Here we append current part's CSP to root CSP and reset CSP + // header for each part. + if (!resultCSP.IsEmpty()) { + resultCSP.Append(";"); + } + resultCSP.Append(mContentSecurityPolicy); + } + nsresult rv = httpChannel->SetResponseHeader( + "Content-Security-Policy"_ns, resultCSP, false); + if (NS_FAILED(rv)) { + return NS_ERROR_CORRUPTED_CONTENT; + } + } + break; + } + case HEADER_UNKNOWN: + // We ignore anything else... + break; + } + + return NS_OK; +} + +nsresult NS_NewMultiMixedConv(nsMultiMixedConv** aMultiMixedConv) { + MOZ_ASSERT(aMultiMixedConv != nullptr, "null ptr"); + + RefPtr conv = new nsMultiMixedConv(); + conv.forget(aMultiMixedConv); + return NS_OK; +} diff --git a/netwerk/streamconv/converters/nsMultiMixedConv.h b/netwerk/streamconv/converters/nsMultiMixedConv.h new file mode 100644 index 0000000000..9bdaaabf19 --- /dev/null +++ b/netwerk/streamconv/converters/nsMultiMixedConv.h @@ -0,0 +1,258 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +#ifndef __nsmultimixedconv__h__ +#define __nsmultimixedconv__h__ + +#include "nsIStreamConverter.h" +#include "nsIChannel.h" +#include "nsString.h" +#include "nsCOMPtr.h" +#include "nsIByteRangeRequest.h" +#include "nsIMultiPartChannel.h" +#include "mozilla/Attributes.h" +#include "mozilla/IncrementalTokenizer.h" +#include "nsHttpResponseHead.h" +#include "mozilla/UniquePtr.h" + +#define NS_MULTIMIXEDCONVERTER_CID \ + { /* 7584CE90-5B25-11d3-A175-0050041CAF44 */ \ + 0x7584ce90, 0x5b25, 0x11d3, { \ + 0xa1, 0x75, 0x0, 0x50, 0x4, 0x1c, 0xaf, 0x44 \ + } \ + } + +// +// nsPartChannel is a "dummy" channel which represents an individual part of +// a multipart/mixed stream... +// +// Instances on this channel are passed out to the consumer through the +// nsIStreamListener interface. +// +class nsPartChannel final : public nsIChannel, + public nsIByteRangeRequest, + public nsIMultiPartChannel { + public: + nsPartChannel(nsIChannel* aMultipartChannel, uint32_t aPartID, + bool aIsFirstPart, nsIStreamListener* aListener); + + void InitializeByteRange(int64_t aStart, int64_t aEnd); + void SetIsLastPart() { mIsLastPart = true; } + nsresult SendOnStartRequest(nsISupports* aContext); + nsresult SendOnDataAvailable(nsISupports* aContext, nsIInputStream* aStream, + uint64_t aOffset, uint32_t aLen); + nsresult SendOnStopRequest(nsISupports* aContext, nsresult aStatus); + /* SetContentDisposition expects the full value of the Content-Disposition + * header */ + void SetContentDisposition(const nsACString& aContentDispositionHeader); + // TODO(ER): This appears to be dead code + void SetResponseHead(mozilla::net::nsHttpResponseHead* head) { + mResponseHead.reset(head); + } + + NS_DECL_ISUPPORTS + NS_DECL_NSIREQUEST + NS_DECL_NSICHANNEL + NS_DECL_NSIBYTERANGEREQUEST + NS_DECL_NSIMULTIPARTCHANNEL + + protected: + ~nsPartChannel() = default; + + protected: + nsCOMPtr mMultipartChannel; + nsCOMPtr mListener; + mozilla::UniquePtr mResponseHead; + + nsresult mStatus{NS_OK}; + nsLoadFlags mLoadFlags{0}; + + nsCOMPtr mLoadGroup; + + nsCString mContentType; + nsCString mContentCharset; + uint32_t mContentDisposition{0}; + nsString mContentDispositionFilename; + nsCString mContentDispositionHeader; + uint64_t mContentLength{UINT64_MAX}; + + bool mIsByteRangeRequest{false}; + int64_t mByteRangeStart{0}; + int64_t mByteRangeEnd{0}; + + uint32_t mPartID; // unique ID that can be used to identify + // this part of the multipart document + bool mIsFirstPart; + bool mIsLastPart{false}; +}; + +// The nsMultiMixedConv stream converter converts a stream of type +// "multipart/x-mixed-replace" to it's subparts. There was some debate as to +// whether or not the functionality desired when HTTP confronted this type +// required a stream converter. After all, this type really prompts various +// viewer related actions rather than stream conversion. There simply needs to +// be a piece in place that can strip out the multiple parts of a stream of this +// type, and "display" them accordingly. +// +// With that said, this "stream converter" spends more time packaging up the sub +// parts of the main stream and sending them off the destination stream +// listener, than doing any real stream parsing/converting. +// +// WARNING: This converter requires that it's destination stream listener be +// able to handle multiple OnStartRequest(), OnDataAvailable(), and +// OnStopRequest() call combinations. Each series represents the beginning, +// data production, and ending phase of each sub- part of the original +// stream. +// +// NOTE: this MIME-type is used by HTTP, *not* SMTP, or IMAP. +// +// NOTE: For reference, a general description of how this MIME type should be +// handled via HTTP, see +// http://home.netscape.com/assist/net_sites/pushpull.html . Note that real +// world server content deviates considerably from this overview. +// +// Implementation assumptions: +// Assumed structue: +// --BoundaryToken[\r]\n +// content-type: foo/bar[\r]\n +// ... (other headers if any) +// [\r]\n (second line feed to delimit end of headers) +// data +// --BoundaryToken-- (end delimited by final "--") +// +// linebreaks can be either CRLF or LFLF. linebreaks preceding +// boundary tokens are NOT considered part of the data. BoundaryToken +// is any opaque string. +// +// + +class nsMultiMixedConv : public nsIStreamConverter { + public: + NS_DECL_ISUPPORTS + NS_DECL_NSISTREAMCONVERTER + NS_DECL_NSISTREAMLISTENER + NS_DECL_NSIREQUESTOBSERVER + + explicit nsMultiMixedConv(); + + protected: + using Token = mozilla::IncrementalTokenizer::Token; + + virtual ~nsMultiMixedConv() = default; + + nsresult SendStart(); + void AccumulateData(Token const& aToken); + nsresult SendData(); + nsresult SendStop(nsresult aStatus); + + // member data + nsCOMPtr mFinalListener; // this guy gets the converted + // data via his OnDataAvailable() + + nsCOMPtr + mChannel; // The channel as we get in in OnStartRequest call + RefPtr mPartChannel; // the channel for the given part we're + // processing. one channel per part. + nsCOMPtr mContext; + nsCString mContentType; + nsCString mContentDisposition; + nsCString mContentSecurityPolicy; + nsCString mRootContentSecurityPolicy; + uint64_t mContentLength{UINT64_MAX}; + uint64_t mTotalSent{0}; + + // The following members are for tracking the byte ranges in + // multipart/mixed content which specified the 'Content-Range:' + // header... + int64_t mByteRangeStart{0}; + int64_t mByteRangeEnd{0}; + bool mIsByteRangeRequest{false}; + // This flag is set first time we create a part channel. + // We use it to prevent duplicated OnStopRequest call on the listener + // when we fail from some reason to ever create a part channel that + // ensures correct notifications. + bool mRequestListenerNotified{false}; + + uint32_t mCurrentPartID{0}; + + // Flag preventing reenter of OnDataAvailable in case the target listener + // ends up spinning the event loop. + bool mInOnDataAvailable{false}; + + // Current state of the incremental parser + enum EParserState { + PREAMBLE, + BOUNDARY_CRLF, + HEADER_NAME, + HEADER_SEP, + HEADER_VALUE, + BODY_INIT, + BODY, + TRAIL_DASH1, + TRAIL_DASH2, + EPILOGUE, + + INIT = PREAMBLE + } mParserState{INIT}; + + // Response part header value, valid when we find a header name + // we recognize. + enum EHeader : uint32_t { + HEADER_FIRST, + HEADER_CONTENT_TYPE = HEADER_FIRST, + HEADER_CONTENT_LENGTH, + HEADER_CONTENT_DISPOSITION, + HEADER_SET_COOKIE, + HEADER_CONTENT_RANGE, + HEADER_RANGE, + HEADER_CONTENT_SECURITY_POLICY, + HEADER_UNKNOWN + } mResponseHeader{HEADER_UNKNOWN}; + // Cumulated value of a response header. + nsCString mResponseHeaderValue; + + nsCString mBoundary; + mozilla::IncrementalTokenizer mTokenizer; + + // When in the "body parsing" mode, see below, we cumulate raw data + // incrementally to mainly avoid any unnecessary granularity. + // mRawData points to the first byte in the tokenizer buffer where part + // body data begins or continues. mRawDataLength is a cumulated length + // of that data during a single tokenizer input feed. This is always + // flushed right after we fed the tokenizer. + nsACString::const_char_iterator mRawData{nullptr}; + nsACString::size_type mRawDataLength{0}; + + // At the start we don't know if the server will be sending boundary with + // or without the leading dashes. + Token mBoundaryToken; + Token mBoundaryTokenWithDashes; + // We need these custom tokens to allow finding CRLF when in the binary mode. + // CRLF before boundary is considered part of the boundary and not part of + // the data. + Token mLFToken; + Token mCRLFToken; + // Custom tokens for each of the response headers we recognize. + Token mHeaderTokens[HEADER_UNKNOWN]; + + // Resets values driven by part headers, like content type, to their defaults, + // called at the start of every part processing. + void HeadersToDefault(); + // Processes captured value of mResponseHeader header. + nsresult ProcessHeader(); + // Switches the parser and tokenizer state to "binary mode" which only + // searches for the 'CRLF boundary' delimiter. + void SwitchToBodyParsing(); + // Switches to the default mode, we are in this mode when parsing headers and + // control data around the boundary delimiters. + void SwitchToControlParsing(); + // Turns on or off recognition of the headers we recognize in part heads. + void SetHeaderTokensEnabled(bool aEnable); + + // The main parser callback called by the IncrementalTokenizer + // instance from OnDataAvailable or OnStopRequest. + nsresult ConsumeToken(Token const& token); +}; + +#endif /* __nsmultimixedconv__h__ */ diff --git a/netwerk/streamconv/converters/nsUnknownDecoder.cpp b/netwerk/streamconv/converters/nsUnknownDecoder.cpp new file mode 100644 index 0000000000..a9d919456a --- /dev/null +++ b/netwerk/streamconv/converters/nsUnknownDecoder.cpp @@ -0,0 +1,852 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsUnknownDecoder.h" +#include "nsIPipe.h" +#include "nsIInputStream.h" +#include "nsIOutputStream.h" +#include "nsMimeTypes.h" + +#include "nsCRT.h" + +#include "nsIMIMEService.h" + +#include "nsIViewSourceChannel.h" +#include "nsIHttpChannel.h" +#include "nsIForcePendingChannel.h" +#include "nsIEncodedChannel.h" +#include "nsIURI.h" +#include "nsStringStream.h" +#include "nsNetCID.h" +#include "nsNetUtil.h" +#include "nsQueryObject.h" +#include "nsComponentManagerUtils.h" +#include "nsServiceManagerUtils.h" + +#include + +#define MAX_BUFFER_SIZE 512u + +using namespace mozilla; + +NS_IMPL_ISUPPORTS(nsUnknownDecoder::ConvertedStreamListener, nsIStreamListener, + nsIRequestObserver) + +nsUnknownDecoder::ConvertedStreamListener::ConvertedStreamListener( + nsUnknownDecoder* aDecoder) { + mDecoder = aDecoder; +} + +nsresult nsUnknownDecoder::ConvertedStreamListener::AppendDataToString( + nsIInputStream* inputStream, void* closure, const char* rawSegment, + uint32_t toOffset, uint32_t count, uint32_t* writeCount) { + nsCString* decodedData = static_cast(closure); + decodedData->Append(rawSegment, count); + *writeCount = count; + return NS_OK; +} + +NS_IMETHODIMP +nsUnknownDecoder::ConvertedStreamListener::OnStartRequest(nsIRequest* request) { + return NS_OK; +} + +NS_IMETHODIMP +nsUnknownDecoder::ConvertedStreamListener::OnDataAvailable( + nsIRequest* request, nsIInputStream* stream, uint64_t offset, + uint32_t count) { + uint32_t read; + nsAutoCString decodedData; + { + MutexAutoLock lock(mDecoder->mMutex); + decodedData = mDecoder->mDecodedData; + } + nsresult rv = + stream->ReadSegments(AppendDataToString, &decodedData, count, &read); + if (NS_FAILED(rv)) { + return rv; + } + MutexAutoLock lock(mDecoder->mMutex); + mDecoder->mDecodedData = decodedData; + return NS_OK; +} + +NS_IMETHODIMP +nsUnknownDecoder::ConvertedStreamListener::OnStopRequest(nsIRequest* request, + nsresult status) { + return NS_OK; +} + +nsUnknownDecoder::nsUnknownDecoder(nsIStreamListener* aListener) + : mNextListener(aListener), + mBuffer(nullptr), + mBufferLen(0), + mMutex("nsUnknownDecoder"), + mDecodedData("") {} + +nsUnknownDecoder::~nsUnknownDecoder() { + if (mBuffer) { + delete[] mBuffer; + mBuffer = nullptr; + } +} + +// ---- +// +// nsISupports implementation... +// +// ---- + +NS_IMPL_ADDREF(nsUnknownDecoder) +NS_IMPL_RELEASE(nsUnknownDecoder) + +NS_INTERFACE_MAP_BEGIN(nsUnknownDecoder) + NS_INTERFACE_MAP_ENTRY(nsIStreamConverter) + NS_INTERFACE_MAP_ENTRY(nsIStreamListener) + NS_INTERFACE_MAP_ENTRY(nsIRequestObserver) + NS_INTERFACE_MAP_ENTRY(nsIContentSniffer) + NS_INTERFACE_MAP_ENTRY(nsIThreadRetargetableStreamListener) + NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports, nsIStreamListener) +NS_INTERFACE_MAP_END + +// ---- +// +// nsIStreamConverter methods... +// +// ---- + +NS_IMETHODIMP +nsUnknownDecoder::Convert(nsIInputStream* aFromStream, const char* aFromType, + const char* aToType, nsISupports* aCtxt, + nsIInputStream** aResultStream) { + return NS_ERROR_NOT_IMPLEMENTED; +} + +NS_IMETHODIMP +nsUnknownDecoder::AsyncConvertData(const char* aFromType, const char* aToType, + nsIStreamListener* aListener, + nsISupports* aCtxt) { + NS_ASSERTION(aListener && aFromType && aToType, + "null pointer passed into multi mixed converter"); + // hook up our final listener. this guy gets the various On*() calls we want + // to throw at him. + // + + MutexAutoLock lock(mMutex); + mNextListener = aListener; + return (aListener) ? NS_OK : NS_ERROR_FAILURE; +} + +NS_IMETHODIMP +nsUnknownDecoder::GetConvertedType(const nsACString& aFromType, + nsIChannel* aChannel, nsACString& aToType) { + return NS_ERROR_NOT_IMPLEMENTED; +} + +// ---- +// +// nsIStreamListener methods... +// +// ---- + +NS_IMETHODIMP +nsUnknownDecoder::OnDataAvailable(nsIRequest* request, nsIInputStream* aStream, + uint64_t aSourceOffset, uint32_t aCount) { + nsresult rv = NS_OK; + + bool contentTypeEmpty; + { + MutexAutoLock lock(mMutex); + if (!mNextListener) return NS_ERROR_FAILURE; + + contentTypeEmpty = mContentType.IsEmpty(); + } + + if (contentTypeEmpty) { + uint32_t count, len; + + // If the buffer has not been allocated by now, just fail... + if (!mBuffer) return NS_ERROR_OUT_OF_MEMORY; + + // + // Determine how much of the stream should be read to fill up the + // sniffer buffer... + // + if (mBufferLen + aCount >= MAX_BUFFER_SIZE) { + count = MAX_BUFFER_SIZE - mBufferLen; + } else { + count = aCount; + } + + // Read the data into the buffer... + rv = aStream->Read((mBuffer + mBufferLen), count, &len); + if (NS_FAILED(rv)) return rv; + + mBufferLen += len; + aCount -= len; + + if (aCount) { + // + // Adjust the source offset... The call to FireListenerNotifications(...) + // will make the first OnDataAvailable(...) call with an offset of 0. + // So, this offset needs to be adjusted to reflect that... + // + aSourceOffset += mBufferLen; + + DetermineContentType(request); + + rv = FireListenerNotifications(request, nullptr); + } + } + + // Must not fire ODA again if it failed once + if (aCount && NS_SUCCEEDED(rv)) { +#ifdef DEBUG + { + MutexAutoLock lock(mMutex); + NS_ASSERTION(!mContentType.IsEmpty(), + "Content type should be known by now."); + } +#endif + + nsCOMPtr listener; + { + MutexAutoLock lock(mMutex); + listener = mNextListener; + } + rv = listener->OnDataAvailable(request, aStream, aSourceOffset, aCount); + } + + return rv; +} + +// ---- +// +// nsIRequestObserver methods... +// +// ---- + +NS_IMETHODIMP +nsUnknownDecoder::OnStartRequest(nsIRequest* request) { + nsresult rv = NS_OK; + + { + MutexAutoLock lock(mMutex); + if (!mNextListener) return NS_ERROR_FAILURE; + } + + // Allocate the sniffer buffer... + if (NS_SUCCEEDED(rv) && !mBuffer) { + mBuffer = new char[MAX_BUFFER_SIZE]; + + if (!mBuffer) { + rv = NS_ERROR_OUT_OF_MEMORY; + } + } + + // Do not pass the OnStartRequest on to the next listener (yet)... + return rv; +} + +NS_IMETHODIMP +nsUnknownDecoder::OnStopRequest(nsIRequest* request, nsresult aStatus) { + nsresult rv = NS_OK; + + bool contentTypeEmpty; + { + MutexAutoLock lock(mMutex); + if (!mNextListener) return NS_ERROR_FAILURE; + + contentTypeEmpty = mContentType.IsEmpty(); + } + + // + // The total amount of data is less than the size of the sniffer buffer. + // Analyze the buffer now... + // + if (contentTypeEmpty) { + DetermineContentType(request); + + // Make sure channel listeners see channel as pending while we call + // OnStartRequest/OnDataAvailable, even though the underlying channel + // has already hit OnStopRequest. + nsCOMPtr forcePendingChannel = + do_QueryInterface(request); + if (forcePendingChannel) { + forcePendingChannel->ForcePending(true); + } + + rv = FireListenerNotifications(request, nullptr); + + if (NS_FAILED(rv)) { + aStatus = rv; + } + + // now we need to set pending state to false before calling OnStopRequest + if (forcePendingChannel) { + forcePendingChannel->ForcePending(false); + } + } + + nsCOMPtr listener; + { + MutexAutoLock lock(mMutex); + listener = mNextListener; + mNextListener = nullptr; + } + rv = listener->OnStopRequest(request, aStatus); + + return rv; +} + +// ---- +// +// nsIContentSniffer methods... +// +// ---- +NS_IMETHODIMP +nsUnknownDecoder::GetMIMETypeFromContent(nsIRequest* aRequest, + const uint8_t* aData, uint32_t aLength, + nsACString& type) { + // This is only used by sniffer, therefore we do not need to lock anything + // here. + nsCOMPtr channel(do_QueryInterface(aRequest)); + if (channel) { + nsCOMPtr loadInfo = channel->LoadInfo(); + if (loadInfo->GetSkipContentSniffing()) { + return NS_ERROR_NOT_AVAILABLE; + } + } + + mBuffer = const_cast(reinterpret_cast(aData)); + mBufferLen = aLength; + DetermineContentType(aRequest); + mBuffer = nullptr; + mBufferLen = 0; + type.Assign(mContentType); + mContentType.Truncate(); + return type.IsEmpty() ? NS_ERROR_NOT_AVAILABLE : NS_OK; +} + +// Actual sniffing code + +/** + * This is the array of sniffer entries that depend on "magic numbers" + * in the file. Each entry has either a type associated with it (set + * these with the SNIFFER_ENTRY macro) or a function to be executed + * (set these with the SNIFFER_ENTRY_WITH_FUNC macro). The function + * should take a single nsIRequest* and returns bool -- true if + * it sets mContentType, false otherwise + */ +nsUnknownDecoder::nsSnifferEntry nsUnknownDecoder::sSnifferEntries[] = { + SNIFFER_ENTRY("%PDF-", APPLICATION_PDF), + + SNIFFER_ENTRY("%!PS-Adobe-", APPLICATION_POSTSCRIPT), + + // Files that start with mailbox delimiters let's provisionally call + // text/plain + SNIFFER_ENTRY("From", TEXT_PLAIN), SNIFFER_ENTRY(">From", TEXT_PLAIN), + + // If the buffer begins with "#!" or "%!" then it is a script of + // some sort... "Scripts" can include arbitrary data to be passed + // to an interpreter, so we need to decide whether we can call this + // text or whether it's data. + SNIFFER_ENTRY_WITH_FUNC("#!", &nsUnknownDecoder::LastDitchSniff), + + // XXXbz should (and can) we also include the various ways that channel(do_QueryInterface(aRequest)); + if (channel) { + nsCOMPtr loadInfo = channel->LoadInfo(); + if (loadInfo->GetSkipContentSniffing()) { + /* + * If we did not get a useful Content-Type from the server + * but also have sniffing disabled, just determine whether + * to use text/plain or octetstream and log an error to the Console + */ + LastDitchSniff(aRequest); + + nsCOMPtr httpChannel(do_QueryInterface(aRequest)); + if (httpChannel) { + nsAutoCString type; + httpChannel->GetContentType(type); + nsCOMPtr requestUri; + httpChannel->GetURI(getter_AddRefs(requestUri)); + nsAutoCString spec; + requestUri->GetSpec(spec); + if (spec.Length() > 50) { + spec.Truncate(50); + spec.AppendLiteral("..."); + } + httpChannel->LogMimeTypeMismatch( + "XTCOWithMIMEValueMissing"_ns, false, NS_ConvertUTF8toUTF16(spec), + // Type is not used in the Error Message but required + NS_ConvertUTF8toUTF16(type)); + } + return; + } + } + + const char* testData = mBuffer; + uint32_t testDataLen = mBufferLen; + // Check if data are compressed. + nsAutoCString decodedData; + + if (channel) { + // ConvertEncodedData is always called only on a single thread for each + // instance of an object. + nsresult rv = ConvertEncodedData(aRequest, mBuffer, mBufferLen); + if (NS_SUCCEEDED(rv)) { + MutexAutoLock lock(mMutex); + decodedData = mDecodedData; + } + if (!decodedData.IsEmpty()) { + testData = decodedData.get(); + testDataLen = std::min(decodedData.Length(), MAX_BUFFER_SIZE); + } + } + + // First, run through all the types we can detect reliably based on + // magic numbers + uint32_t i; + for (i = 0; i < sSnifferEntryNum; ++i) { + if (testDataLen >= sSnifferEntries[i].mByteLen && // enough data + memcmp(testData, sSnifferEntries[i].mBytes, + sSnifferEntries[i].mByteLen) == 0) { // and type matches + NS_ASSERTION( + sSnifferEntries[i].mMimeType || + sSnifferEntries[i].mContentTypeSniffer, + "Must have either a type string or a function to set the type"); + NS_ASSERTION(!sSnifferEntries[i].mMimeType || + !sSnifferEntries[i].mContentTypeSniffer, + "Both a type string and a type sniffing function set;" + " using type string"); + if (sSnifferEntries[i].mMimeType) { + MutexAutoLock lock(mMutex); + mContentType = sSnifferEntries[i].mMimeType; + NS_ASSERTION(!mContentType.IsEmpty(), + "Content type should be known by now."); + return; + } + if ((this->*(sSnifferEntries[i].mContentTypeSniffer))(aRequest)) { +#ifdef DEBUG + MutexAutoLock lock(mMutex); + NS_ASSERTION(!mContentType.IsEmpty(), + "Content type should be known by now."); +#endif + return; + } + } + } + + nsAutoCString sniffedType; + NS_SniffContent(NS_DATA_SNIFFER_CATEGORY, aRequest, (const uint8_t*)testData, + testDataLen, sniffedType); + { + MutexAutoLock lock(mMutex); + mContentType = sniffedType; + if (!mContentType.IsEmpty()) { + return; + } + } + + if (SniffForHTML(aRequest)) { +#ifdef DEBUG + MutexAutoLock lock(mMutex); + NS_ASSERTION(!mContentType.IsEmpty(), + "Content type should be known by now."); +#endif + return; + } + + // We don't know what this is yet. Before we just give up, try + // the URI from the request. + if (SniffURI(aRequest)) { +#ifdef DEBUG + MutexAutoLock lock(mMutex); + NS_ASSERTION(!mContentType.IsEmpty(), + "Content type should be known by now."); +#endif + return; + } + + LastDitchSniff(aRequest); +#ifdef DEBUG + MutexAutoLock lock(mMutex); + NS_ASSERTION(!mContentType.IsEmpty(), "Content type should be known by now."); +#endif +} + +bool nsUnknownDecoder::SniffForHTML(nsIRequest* aRequest) { + MutexAutoLock lock(mMutex); + + // Now look for HTML. + const char* str; + const char* end; + if (mDecodedData.IsEmpty()) { + str = mBuffer; + end = mBuffer + mBufferLen; + } else { + str = mDecodedData.get(); + end = mDecodedData.get() + + std::min(mDecodedData.Length(), MAX_BUFFER_SIZE); + } + + // skip leading whitespace + while (str != end && nsCRT::IsAsciiSpace(*str)) { + ++str; + } + + // did we find something like a start tag? + if (str == end || *str != '<' || ++str == end) { + return false; + } + + // If we seem to be SGML or XML and we got down here, just pretend we're HTML + if (*str == '!' || *str == '?') { + mContentType = TEXT_HTML; + return true; + } + + uint32_t bufSize = end - str; + // We use sizeof(_tagstr) below because that's the length of _tagstr + // with the one char " " or ">" appended. +#define MATCHES_TAG(_tagstr) \ + (bufSize >= sizeof(_tagstr) && \ + (nsCRT::strncasecmp(str, _tagstr " ", sizeof(_tagstr)) == 0 || \ + nsCRT::strncasecmp(str, _tagstr ">", sizeof(_tagstr)) == 0)) + + if (MATCHES_TAG("html") || MATCHES_TAG("frameset") || MATCHES_TAG("body") || + MATCHES_TAG("head") || MATCHES_TAG("script") || MATCHES_TAG("iframe") || + MATCHES_TAG("a") || MATCHES_TAG("img") || MATCHES_TAG("table") || + MATCHES_TAG("title") || MATCHES_TAG("link") || MATCHES_TAG("base") || + MATCHES_TAG("style") || MATCHES_TAG("div") || MATCHES_TAG("p") || + MATCHES_TAG("font") || MATCHES_TAG("applet") || MATCHES_TAG("meta") || + MATCHES_TAG("center") || MATCHES_TAG("form") || MATCHES_TAG("isindex") || + MATCHES_TAG("h1") || MATCHES_TAG("h2") || MATCHES_TAG("h3") || + MATCHES_TAG("h4") || MATCHES_TAG("h5") || MATCHES_TAG("h6") || + MATCHES_TAG("b") || MATCHES_TAG("pre")) { + mContentType = TEXT_HTML; + return true; + } + +#undef MATCHES_TAG + + return false; +} + +bool nsUnknownDecoder::SniffForXML(nsIRequest* aRequest) { + // First see whether we can glean anything from the uri... + if (!SniffURI(aRequest)) { + // Oh well; just generic XML will have to do + MutexAutoLock lock(mMutex); + mContentType = TEXT_XML; + } + + return true; +} + +bool nsUnknownDecoder::SniffURI(nsIRequest* aRequest) { + nsCOMPtr channel(do_QueryInterface(aRequest)); + nsCOMPtr loadInfo = channel->LoadInfo(); + if (loadInfo->GetSkipContentSniffing()) { + return false; + } + nsCOMPtr mimeService(do_GetService("@mozilla.org/mime;1")); + if (mimeService) { + nsCOMPtr channel = do_QueryInterface(aRequest); + if (channel) { + nsCOMPtr uri; + nsresult result = channel->GetURI(getter_AddRefs(uri)); + if (NS_SUCCEEDED(result) && uri) { + nsAutoCString type; + result = mimeService->GetTypeFromURI(uri, type); + if (NS_SUCCEEDED(result)) { + MutexAutoLock lock(mMutex); + mContentType = type; + return true; + } + } + } + } + + return false; +} + +// This macro is based on RFC 2046 Section 4.1.2. Treat any char 0-31 +// except the 9-13 range (\t, \n, \v, \f, \r) and char 27 (used by +// encodings like Shift_JIS) as non-text +#define IS_TEXT_CHAR(ch) \ + (((unsigned char)(ch)) > 31 || (9 <= (ch) && (ch) <= 13) || (ch) == 27) + +bool nsUnknownDecoder::LastDitchSniff(nsIRequest* aRequest) { + // All we can do now is try to guess whether this is text/plain or + // application/octet-stream + + MutexAutoLock lock(mMutex); + + const char* testData; + uint32_t testDataLen; + if (mDecodedData.IsEmpty()) { + testData = mBuffer; + // Since some legacy text files end with 0x1A, reading the entire buffer + // will lead misdetection. + testDataLen = std::min(mBufferLen, MAX_BUFFER_SIZE); + } else { + testData = mDecodedData.get(); + testDataLen = std::min(mDecodedData.Length(), MAX_BUFFER_SIZE); + } + + // First, check for a BOM. If we see one, assume this is text/plain + // in whatever encoding. If there is a BOM _and_ text we will + // always have at least 4 bytes in the buffer (since the 2-byte BOMs + // are for 2-byte encodings and the UTF-8 BOM is 3 bytes). + if (testDataLen >= 4) { + const unsigned char* buf = (const unsigned char*)testData; + if ((buf[0] == 0xFE && buf[1] == 0xFF) || // UTF-16, Big Endian + (buf[0] == 0xFF && buf[1] == 0xFE) || // UTF-16 or UCS-4, Little Endian + (buf[0] == 0xEF && buf[1] == 0xBB && buf[2] == 0xBF) || // UTF-8 + (buf[0] == 0 && buf[1] == 0 && buf[2] == 0xFE && + buf[3] == 0xFF)) { // UCS-4, Big Endian + + mContentType = TEXT_PLAIN; + return true; + } + } + + // Now see whether the buffer has any non-text chars. If not, then let's + // just call it text/plain... + // + uint32_t i; + for (i = 0; i < testDataLen && IS_TEXT_CHAR(testData[i]); i++) { + } + + if (i == testDataLen) { + mContentType = TEXT_PLAIN; + } else { + mContentType = APPLICATION_OCTET_STREAM; + } + + return true; +} + +nsresult nsUnknownDecoder::FireListenerNotifications(nsIRequest* request, + nsISupports* aCtxt) { + nsresult rv = NS_OK; + + nsCOMPtr listener; + nsAutoCString contentType; + { + MutexAutoLock lock(mMutex); + if (!mNextListener) return NS_ERROR_FAILURE; + + listener = mNextListener; + contentType = mContentType; + } + + if (!contentType.IsEmpty()) { + nsCOMPtr viewSourceChannel = + do_QueryInterface(request); + if (viewSourceChannel) { + rv = viewSourceChannel->SetOriginalContentType(contentType); + } else { + nsCOMPtr channel = do_QueryInterface(request, &rv); + if (NS_SUCCEEDED(rv)) { + // Set the new content type on the channel... + rv = channel->SetContentType(contentType); + } + } + + NS_ASSERTION(NS_SUCCEEDED(rv), "Unable to set content type on channel!"); + + if (NS_FAILED(rv)) { + // Cancel the request to make sure it has the correct status if + // mNextListener looks at it. + request->Cancel(rv); + listener->OnStartRequest(request); + return rv; + } + } + + // Fire the OnStartRequest(...) + rv = listener->OnStartRequest(request); + + if (NS_SUCCEEDED(rv)) { + // install stream converter if required + nsCOMPtr encodedChannel = do_QueryInterface(request); + if (encodedChannel) { + nsCOMPtr listenerNew; + rv = encodedChannel->DoApplyContentConversions( + listener, getter_AddRefs(listenerNew), aCtxt); + if (NS_SUCCEEDED(rv) && listenerNew) { + MutexAutoLock lock(mMutex); + mNextListener = listenerNew; + listener = listenerNew; + } + } + } + + if (!mBuffer) return NS_ERROR_OUT_OF_MEMORY; + + // If the request was canceled, then we need to treat that equivalently + // to an error returned by OnStartRequest. + if (NS_SUCCEEDED(rv)) request->GetStatus(&rv); + + // Fire the first OnDataAvailable for the data that was read from the + // stream into the sniffer buffer... + if (NS_SUCCEEDED(rv) && (mBufferLen > 0)) { + uint32_t len = 0; + nsCOMPtr in; + nsCOMPtr out; + + // Create a pipe and fill it with the data from the sniffer buffer. + NS_NewPipe(getter_AddRefs(in), getter_AddRefs(out), MAX_BUFFER_SIZE, + MAX_BUFFER_SIZE); + + rv = out->Write(mBuffer, mBufferLen, &len); + if (NS_SUCCEEDED(rv)) { + if (len == mBufferLen) { + rv = listener->OnDataAvailable(request, in, 0, len); + } else { + NS_ERROR("Unable to write all the data into the pipe."); + rv = NS_ERROR_FAILURE; + } + } + } + + delete[] mBuffer; + mBuffer = nullptr; + mBufferLen = 0; + + return rv; +} + +nsresult nsUnknownDecoder::ConvertEncodedData(nsIRequest* request, + const char* data, + uint32_t length) { + nsresult rv = NS_OK; + + { + MutexAutoLock lock(mMutex); + mDecodedData = ""; + } + nsCOMPtr encodedChannel(do_QueryInterface(request)); + if (encodedChannel) { + RefPtr strListener = + new ConvertedStreamListener(this); + + nsCOMPtr listener; + rv = encodedChannel->DoApplyContentConversions( + strListener, getter_AddRefs(listener), nullptr); + + if (NS_FAILED(rv)) { + return rv; + } + + if (listener) { + listener->OnStartRequest(request); + + if (length) { + nsCOMPtr rawStream = + do_CreateInstance(NS_STRINGINPUTSTREAM_CONTRACTID); + if (!rawStream) return NS_ERROR_FAILURE; + + rv = rawStream->SetData((const char*)data, length); + NS_ENSURE_SUCCESS(rv, rv); + + rv = listener->OnDataAvailable(request, rawStream, 0, length); + NS_ENSURE_SUCCESS(rv, rv); + } + + listener->OnStopRequest(request, NS_OK); + } + } + return rv; +} + +// +// nsIThreadRetargetableStreamListener methods +// +NS_IMETHODIMP +nsUnknownDecoder::CheckListenerChain() { + nsCOMPtr listener; + { + MutexAutoLock lock(mMutex); + listener = do_QueryInterface(mNextListener); + } + if (!listener) { + return NS_ERROR_NO_INTERFACE; + } + + return listener->CheckListenerChain(); +} + +void nsBinaryDetector::DetermineContentType(nsIRequest* aRequest) { + nsCOMPtr httpChannel = do_QueryInterface(aRequest); + if (!httpChannel) { + return; + } + + nsCOMPtr loadInfo = httpChannel->LoadInfo(); + if (loadInfo->GetSkipContentSniffing()) { + LastDitchSniff(aRequest); + return; + } + // It's an HTTP channel. Check for the text/plain mess + nsAutoCString contentTypeHdr; + Unused << httpChannel->GetResponseHeader("Content-Type"_ns, contentTypeHdr); + nsAutoCString contentType; + httpChannel->GetContentType(contentType); + + // Make sure to do a case-sensitive exact match comparison here. Apache + // 1.x just sends text/plain for "unknown", while Apache 2.x sends + // text/plain with a ISO-8859-1 charset. Debian's Apache version, just to + // be different, sends text/plain with iso-8859-1 charset. For extra fun, + // FC7, RHEL4, and Ubuntu Feisty send charset=UTF-8. Don't do general + // case-insensitive comparison, since we really want to apply this crap as + // rarely as we can. + if (!contentType.EqualsLiteral("text/plain") || + (!contentTypeHdr.EqualsLiteral("text/plain") && + !contentTypeHdr.EqualsLiteral("text/plain; charset=ISO-8859-1") && + !contentTypeHdr.EqualsLiteral("text/plain; charset=iso-8859-1") && + !contentTypeHdr.EqualsLiteral("text/plain; charset=UTF-8"))) { + return; + } + + // Check whether we have content-encoding. If we do, don't try to + // detect the type. + // XXXbz we could improve this by doing a local decompress if we + // wanted, I'm sure. + nsAutoCString contentEncoding; + Unused << httpChannel->GetResponseHeader("Content-Encoding"_ns, + contentEncoding); + if (!contentEncoding.IsEmpty()) { + return; + } + + LastDitchSniff(aRequest); + MutexAutoLock lock(mMutex); + if (mContentType.EqualsLiteral(APPLICATION_OCTET_STREAM)) { + // We want to guess at it instead + mContentType = APPLICATION_GUESS_FROM_EXT; + } else { + // Let the text/plain type we already have be, so that other content + // sniffers can also get a shot at this data. + mContentType.Truncate(); + } +} diff --git a/netwerk/streamconv/converters/nsUnknownDecoder.h b/netwerk/streamconv/converters/nsUnknownDecoder.h new file mode 100644 index 0000000000..0df9df93ad --- /dev/null +++ b/netwerk/streamconv/converters/nsUnknownDecoder.h @@ -0,0 +1,152 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef nsUnknownDecoder_h__ +#define nsUnknownDecoder_h__ + +#include "nsIStreamConverter.h" +#include "nsIThreadRetargetableStreamListener.h" +#include "nsIContentSniffer.h" +#include "mozilla/Mutex.h" +#include "mozilla/Atomics.h" + +#include "nsCOMPtr.h" +#include "nsString.h" + +#define NS_UNKNOWNDECODER_CID \ + { /* 7d7008a0-c49a-11d3-9b22-0080c7cb1080 */ \ + 0x7d7008a0, 0xc49a, 0x11d3, { \ + 0x9b, 0x22, 0x00, 0x80, 0xc7, 0xcb, 0x10, 0x80 \ + } \ + } + +class nsUnknownDecoder : public nsIStreamConverter, + public nsIContentSniffer, + public nsIThreadRetargetableStreamListener { + public: + // nsISupports methods + NS_DECL_ISUPPORTS + + // nsIStreamConverter methods + NS_DECL_NSISTREAMCONVERTER + + // nsIStreamListener methods + NS_DECL_NSISTREAMLISTENER + + // nsIRequestObserver methods + NS_DECL_NSIREQUESTOBSERVER + + // nsIContentSniffer methods + NS_DECL_NSICONTENTSNIFFER + + // nsIThreadRetargetableStreamListener methods + NS_DECL_NSITHREADRETARGETABLESTREAMLISTENER + + explicit nsUnknownDecoder(nsIStreamListener* aListener = nullptr); + + protected: + virtual ~nsUnknownDecoder(); + + virtual void DetermineContentType(nsIRequest* aRequest); + nsresult FireListenerNotifications(nsIRequest* request, nsISupports* aCtxt); + + class ConvertedStreamListener : public nsIStreamListener { + public: + explicit ConvertedStreamListener(nsUnknownDecoder* aDecoder); + + NS_DECL_ISUPPORTS + NS_DECL_NSIREQUESTOBSERVER + NS_DECL_NSISTREAMLISTENER + + private: + virtual ~ConvertedStreamListener() = default; + static nsresult AppendDataToString(nsIInputStream* inputStream, + void* closure, const char* rawSegment, + uint32_t toOffset, uint32_t count, + uint32_t* writeCount); + nsUnknownDecoder* mDecoder; + }; + + protected: + nsCOMPtr mNextListener; + + // Various sniffer functions. Returning true means that a type + // was determined; false means no luck. + bool SniffForHTML(nsIRequest* aRequest); + bool SniffForXML(nsIRequest* aRequest); + + // SniffURI guesses at the content type based on the URI (typically + // using the extentsion) + bool SniffURI(nsIRequest* aRequest); + + // LastDitchSniff guesses at text/plain vs. application/octet-stream + // by just looking at whether the data contains null bytes, and + // maybe at the fraction of chars with high bit set. Use this only + // as a last-ditch attempt to decide a content type! + bool LastDitchSniff(nsIRequest* aRequest); + + /** + * An entry struct for our array of sniffers. Each entry has either + * a type associated with it (set these with the SNIFFER_ENTRY macro) + * or a function to be executed (set these with the + * SNIFFER_ENTRY_WITH_FUNC macro). The function should take a single + * nsIRequest* and returns bool -- true if it sets mContentType, + * false otherwise + */ + struct nsSnifferEntry { + using TypeSniffFunc = bool (nsUnknownDecoder::*)(nsIRequest*); + + const char* mBytes; + uint32_t mByteLen; + + // Exactly one of mMimeType and mContentTypeSniffer should be set non-null + const char* mMimeType; + TypeSniffFunc mContentTypeSniffer; + }; + +#define SNIFFER_ENTRY(_bytes, _type) \ + { _bytes, sizeof(_bytes) - 1, _type, nullptr } + +#define SNIFFER_ENTRY_WITH_FUNC(_bytes, _func) \ + { _bytes, sizeof(_bytes) - 1, nullptr, _func } + + static nsSnifferEntry sSnifferEntries[]; + static uint32_t sSnifferEntryNum; + + // We guarantee in order delivery of OnStart, OnStop and OnData, therefore + // we do not need proper locking for mBuffer. + mozilla::Atomic mBuffer; + mozilla::Atomic mBufferLen; + + nsCString mContentType; + + // This mutex syncs: mContentType, mDecodedData and mNextListener. + mutable mozilla::Mutex mMutex MOZ_UNANNOTATED; + + protected: + nsresult ConvertEncodedData(nsIRequest* request, const char* data, + uint32_t length); + nsCString mDecodedData; // If data are encoded this will be uncompress data. +}; + +#define NS_BINARYDETECTOR_CID \ + { /* a2027ec6-ba0d-4c72-805d-148233f5f33c */ \ + 0xa2027ec6, 0xba0d, 0x4c72, { \ + 0x80, 0x5d, 0x14, 0x82, 0x33, 0xf5, 0xf3, 0x3c \ + } \ + } + +/** + * Class that detects whether a data stream is text or binary. This reuses + * most of nsUnknownDecoder except the actual content-type determination logic + * -- our overridden DetermineContentType simply calls LastDitchSniff and sets + * the type to APPLICATION_GUESS_FROM_EXT if the data is detected as binary. + */ +class nsBinaryDetector : public nsUnknownDecoder { + protected: + virtual void DetermineContentType(nsIRequest* aRequest) override; +}; + +#endif /* nsUnknownDecoder_h__ */ -- cgit v1.2.3