summaryrefslogtreecommitdiffstats
path: root/netwerk/streamconv/converters
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-28 14:29:10 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-28 14:29:10 +0000
commit2aa4a82499d4becd2284cdb482213d541b8804dd (patch)
treeb80bf8bf13c3766139fbacc530efd0dd9d54394c /netwerk/streamconv/converters
parentInitial commit. (diff)
downloadfirefox-2aa4a82499d4becd2284cdb482213d541b8804dd.tar.xz
firefox-2aa4a82499d4becd2284cdb482213d541b8804dd.zip
Adding upstream version 86.0.1.upstream/86.0.1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'netwerk/streamconv/converters')
-rw-r--r--netwerk/streamconv/converters/ParseFTPList.cpp1493
-rw-r--r--netwerk/streamconv/converters/ParseFTPList.h102
-rw-r--r--netwerk/streamconv/converters/moz.build32
-rw-r--r--netwerk/streamconv/converters/mozTXTToHTMLConv.cpp1260
-rw-r--r--netwerk/streamconv/converters/mozTXTToHTMLConv.h284
-rw-r--r--netwerk/streamconv/converters/nsDirIndex.cpp89
-rw-r--r--netwerk/streamconv/converters/nsDirIndex.h32
-rw-r--r--netwerk/streamconv/converters/nsDirIndexParser.cpp444
-rw-r--r--netwerk/streamconv/converters/nsDirIndexParser.h75
-rw-r--r--netwerk/streamconv/converters/nsFTPDirListingConv.cpp342
-rw-r--r--netwerk/streamconv/converters/nsFTPDirListingConv.h52
-rw-r--r--netwerk/streamconv/converters/nsHTTPCompressConv.cpp722
-rw-r--r--netwerk/streamconv/converters/nsHTTPCompressConv.h137
-rw-r--r--netwerk/streamconv/converters/nsICompressConvStats.idl17
-rw-r--r--netwerk/streamconv/converters/nsIndexedToHTML.cpp847
-rw-r--r--netwerk/streamconv/converters/nsIndexedToHTML.h61
-rw-r--r--netwerk/streamconv/converters/nsMultiMixedConv.cpp1038
-rw-r--r--netwerk/streamconv/converters/nsMultiMixedConv.h256
-rw-r--r--netwerk/streamconv/converters/nsUnknownDecoder.cpp894
-rw-r--r--netwerk/streamconv/converters/nsUnknownDecoder.h166
20 files changed, 8343 insertions, 0 deletions
diff --git a/netwerk/streamconv/converters/ParseFTPList.cpp b/netwerk/streamconv/converters/ParseFTPList.cpp
new file mode 100644
index 0000000000..892ca91e6e
--- /dev/null
+++ b/netwerk/streamconv/converters/ParseFTPList.cpp
@@ -0,0 +1,1493 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "ParseFTPList.h"
+#include <algorithm>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include "plstr.h"
+#include "nsDebug.h"
+#include "prprf.h"
+#include "nsUnicharUtils.h"
+#include "mozilla/CheckedInt.h"
+#include "mozilla/IntegerPrintfMacros.h"
+#include "mozilla/TextUtils.h"
+#include "mozilla/Sprintf.h"
+
+/* ==================================================================== */
+
+using mozilla::CheckedInt;
+using mozilla::IsAsciiAlpha;
+using mozilla::IsAsciiAlphanumeric;
+using mozilla::IsAsciiDigit;
+using mozilla::IsAsciiLowercaseAlpha;
+
+static const int kMaxFTPListLen = 32768;
+
+static inline int ParsingFailed(struct list_state* state) {
+ if (state->parsed_one || state->lstyle) /* junk if we fail to parse */
+ return '?'; /* this time but had previously parsed successfully */
+ return '"'; /* its part of a comment or error message */
+}
+
+void FixupYear(PRExplodedTime* aTime) {
+ /* if year has only two digits then assume that
+ 00-79 is 2000-2079
+ 80-99 is 1980-1999 */
+ if (aTime->tm_year < 80) {
+ aTime->tm_year += 2000;
+ } else if (aTime->tm_year < 100) {
+ aTime->tm_year += 1900;
+ }
+}
+
+int ParseFTPList(const char* line, struct list_state* state,
+ struct list_result* result, PRTimeParamFn timeParam,
+ NowTimeFn nowTimeFn) {
+ unsigned int carry_buf_len; /* copy of state->carry_buf_len */
+ unsigned int pos;
+ const char* p;
+
+ if (!line || !state || !result) return 0;
+
+ memset(result, 0, sizeof(*result));
+ state->numlines++;
+
+ /* carry buffer is only valid from one line to the next */
+ carry_buf_len = state->carry_buf_len;
+ state->carry_buf_len = 0;
+
+ /* strip leading whitespace */
+ while (*line == ' ' || *line == '\t') line++;
+
+ /* line is terminated at first '\0' or '\n' */
+ p = line;
+ while (*p && *p != '\n') p++;
+ unsigned int linelen = p - line;
+
+ if (linelen > 0 && *p == '\n' && *(p - 1) == '\r') linelen--;
+
+ /* DON'T strip trailing whitespace. */
+
+ if (linelen > kMaxFTPListLen) {
+ return ParsingFailed(state);
+ }
+
+ if (linelen > 0) {
+ static const char* month_names = "JanFebMarAprMayJunJulAugSepOctNovDec";
+ const char* tokens[16]; /* 16 is more than enough */
+ unsigned int toklen[(sizeof(tokens) / sizeof(tokens[0]))];
+ unsigned int linelen_sans_wsp; // line length sans whitespace
+ unsigned int numtoks = 0;
+ unsigned int tokmarker = 0; /* extra info for lstyle handler */
+ unsigned int month_num = 0;
+ char tbuf[4];
+ int lstyle = 0;
+
+ if (carry_buf_len) /* VMS long filename carryover buffer */
+ {
+ tokens[0] = state->carry_buf;
+ toklen[0] = carry_buf_len;
+ numtoks++;
+ }
+
+ pos = 0;
+ while (pos < linelen && numtoks < (sizeof(tokens) / sizeof(tokens[0]))) {
+ while (pos < linelen &&
+ (line[pos] == ' ' || line[pos] == '\t' || line[pos] == '\r'))
+ pos++;
+ if (pos < linelen) {
+ tokens[numtoks] = &line[pos];
+ while (pos < linelen &&
+ (line[pos] != ' ' && line[pos] != '\t' && line[pos] != '\r'))
+ pos++;
+ if (tokens[numtoks] != &line[pos]) {
+ toklen[numtoks] = (&line[pos] - tokens[numtoks]);
+ numtoks++;
+ }
+ }
+ }
+
+ if (!numtoks) return ParsingFailed(state);
+
+ linelen_sans_wsp = &(tokens[numtoks - 1][toklen[numtoks - 1]]) - tokens[0];
+ if (numtoks == (sizeof(tokens) / sizeof(tokens[0]))) {
+ pos = linelen;
+ while (pos > 0 && (line[pos - 1] == ' ' || line[pos - 1] == '\t')) pos--;
+ linelen_sans_wsp = pos;
+ }
+
+ /* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */
+
+#if defined(SUPPORT_EPLF)
+ /* EPLF handling must come somewhere before /bin/dls handling. */
+ if (!lstyle && (!state->lstyle || state->lstyle == 'E')) {
+ if (*line == '+' && linelen > 4 && numtoks >= 2) {
+ pos = 1;
+ while (pos < (linelen - 1)) {
+ p = &line[pos++];
+ if (*p == '/')
+ result->fe_type = 'd'; /* its a dir */
+ else if (*p == 'r')
+ result->fe_type = 'f'; /* its a file */
+ else if (*p == 'm') {
+ if (IsAsciiDigit(line[pos])) {
+ while (pos < linelen && IsAsciiDigit(line[pos])) pos++;
+ if (pos < linelen && line[pos] == ',') {
+ PRTime t;
+ PRTime seconds;
+ PR_sscanf(p + 1, "%llu", &seconds);
+ t = seconds * PR_USEC_PER_SEC;
+ PR_ExplodeTime(t, timeParam, &(result->fe_time));
+ }
+ }
+ } else if (*p == 's') {
+ if (IsAsciiDigit(line[pos])) {
+ while (pos < linelen && IsAsciiDigit(line[pos])) pos++;
+ if (pos < linelen && line[pos] == ',' &&
+ ((&line[pos]) - (p + 1)) < int(sizeof(result->fe_size) - 1)) {
+ memcpy(result->fe_size, p + 1,
+ (unsigned)(&line[pos] - (p + 1)));
+ result->fe_size[(&line[pos] - (p + 1))] = '\0';
+ }
+ }
+ } else if (IsAsciiAlpha(
+ *p)) /* 'i'/'up' or unknown "fact" (property) */
+ {
+ while (pos < linelen && *++p != ',') pos++;
+ } else if (*p != '\t' || (p + 1) != tokens[1]) {
+ break; /* its not EPLF after all */
+ } else {
+ state->parsed_one = 1;
+ state->lstyle = lstyle = 'E';
+
+ p = &(line[linelen_sans_wsp]);
+ result->fe_fname = tokens[1];
+ result->fe_fnlen = p - tokens[1];
+
+ if (!result->fe_type) /* access denied */
+ {
+ result->fe_type = 'f'; /* is assuming 'f'ile correct? */
+ return '?'; /* NO! junk it. */
+ }
+ return result->fe_type;
+ }
+ if (pos >= (linelen - 1) || line[pos] != ',') break;
+ pos++;
+ } /* while (pos < linelen) */
+ memset(result, 0, sizeof(*result));
+ } /* if (*line == '+' && linelen > 4 && numtoks >= 2) */
+ } /* if (!lstyle && (!state->lstyle || state->lstyle == 'E')) */
+#endif /* SUPPORT_EPLF */
+
+ /* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */
+
+#if defined(SUPPORT_VMS)
+ if (!lstyle &&
+ (!state->lstyle ||
+ state->lstyle == 'V')) { /* try VMS Multinet/UCX/CMS server */
+ /*
+ * Legal characters in a VMS file/dir spec are [A-Z0-9$.-_~].
+ * '$' cannot begin a filename and `-' cannot be used as the first
+ * or last character. '.' is only valid as a directory separator
+ * and <file>.<type> separator. A canonical filename spec might look
+ * like this: DISK$VOL:[DIR1.DIR2.DIR3]FILE.TYPE;123
+ * All VMS FTP servers LIST in uppercase.
+ *
+ * We need to be picky about this in order to support
+ * multi-line listings correctly.
+ */
+ if (!state->parsed_one &&
+ (numtoks == 1 || (numtoks == 2 && toklen[0] == 9 &&
+ memcmp(tokens[0], "Directory", 9) == 0))) {
+ /* If no dirstyle has been detected yet, and this line is a
+ * VMS list's dirname, then turn on VMS dirstyle.
+ * eg "ACA:[ANONYMOUS]", "DISK$FTP:[ANONYMOUS]", "SYS$ANONFTP:"
+ */
+ p = tokens[0];
+ pos = toklen[0];
+ if (numtoks == 2) {
+ p = tokens[1];
+ pos = toklen[1];
+ }
+ pos--;
+ if (pos >= 3) {
+ while (pos > 0 && p[pos] != '[') {
+ pos--;
+ if (p[pos] == '-' || p[pos] == '$') {
+ if (pos == 0 || p[pos - 1] == '[' || p[pos - 1] == '.' ||
+ (p[pos] == '-' && (p[pos + 1] == ']' || p[pos + 1] == '.')))
+ break;
+ } else if (p[pos] != '.' && p[pos] != '~' &&
+ !IsAsciiAlphanumeric(p[pos]))
+ break;
+ else if (IsAsciiLowercaseAlpha(p[pos]))
+ break;
+ }
+ if (pos > 0) {
+ pos--;
+ if (p[pos] != ':' || p[pos + 1] != '[') pos = 0;
+ }
+ }
+ if (pos > 0 && p[pos] == ':') {
+ while (pos > 0) {
+ pos--;
+ if (p[pos] != '$' && p[pos] != '_' && p[pos] != '-' &&
+ p[pos] != '~' && !IsAsciiAlphanumeric(p[pos]))
+ break;
+ else if (IsAsciiLowercaseAlpha(p[pos]))
+ break;
+ }
+ if (pos == 0) {
+ state->lstyle = 'V';
+ return '?'; /* its junk */
+ }
+ }
+ /* fallthrough */
+ } else if ((tokens[0][toklen[0] - 1]) != ';') {
+ if (numtoks == 1 && (state->lstyle == 'V' && !carry_buf_len))
+ lstyle = 'V';
+ else if (numtoks < 4)
+ ;
+ else if (toklen[1] >= 10 && memcmp(tokens[1], "%RMS-E-PRV", 10) == 0)
+ lstyle = 'V';
+ else if ((&line[linelen] - tokens[1]) >= 22 &&
+ memcmp(tokens[1], "insufficient privilege", 22) == 0)
+ lstyle = 'V';
+ else if (numtoks != 4 && numtoks != 6)
+ ;
+ else if (numtoks == 6 &&
+ (toklen[5] < 4 || *tokens[5] != '(' || /* perms */
+ (tokens[5][toklen[5] - 1]) != ')'))
+ ;
+ else if ((toklen[2] == 10 || toklen[2] == 11) &&
+ (tokens[2][toklen[2] - 5]) == '-' &&
+ (tokens[2][toklen[2] - 9]) == '-' &&
+ (((toklen[3] == 4 || toklen[3] == 5 || toklen[3] == 7 ||
+ toklen[3] == 8) &&
+ (tokens[3][toklen[3] - 3]) == ':') ||
+ ((toklen[3] == 10 || toklen[3] == 11) &&
+ (tokens[3][toklen[3] - 3]) ==
+ '.')) && /* time in [H]H:MM[:SS[.CC]] format */
+ IsAsciiDigit(*tokens[1]) && /* size */
+ IsAsciiDigit(*tokens[2]) && /* date */
+ IsAsciiDigit(*tokens[3]) /* time */
+ ) {
+ lstyle = 'V';
+ }
+ if (lstyle == 'V') {
+ // clang-format off
+ /*
+ * MultiNet FTP:
+ * LOGIN.COM;2 1 4-NOV-1994 04:09 [ANONYMOUS] (RWE,RWE,,)
+ * PUB.DIR;1 1 27-JAN-1994 14:46 [ANONYMOUS] (RWE,RWE,RE,RWE)
+ * README.FTP;1 %RMS-E-PRV, insufficient privilege or file protection violation
+ * ROUSSOS.DIR;1 1 27-JAN-1994 14:48 [CS,ROUSSOS] (RWE,RWE,RE,R)
+ * S67-50903.JPG;1 328 22-SEP-1998 16:19 [ANONYMOUS] (RWED,RWED,,)
+ * UCX FTP:
+ * CII-MANUAL.TEX;1 213/216 29-JAN-1996 03:33:12 [ANONYMOU,ANONYMOUS] (RWED,RWED,,)
+ * CMU/VMS-IP FTP
+ * [VMSSERV.FILES]ALARM.DIR;1 1/3 5-MAR-1993 18:09
+ * TCPware FTP
+ * FOO.BAR;1 4 5-MAR-1993 18:09:01.12
+ * Long filename example:
+ * THIS-IS-A-LONG-VMS-FILENAME.AND-THIS-IS-A-LONG-VMS-FILETYPE\r\n
+ * 213[/nnn] 29-JAN-1996 03:33[:nn] [ANONYMOU,ANONYMOUS] (RWED,RWED,,)
+ */
+ // clang-format on
+ tokmarker = 0;
+ p = tokens[0];
+ pos = 0;
+ if (*p == '[' && toklen[0] >= 4) /* CMU style */
+ {
+ if (p[1] != ']') {
+ p++;
+ pos++;
+ }
+ while (lstyle && pos < toklen[0] && *p != ']') {
+ if (*p != '$' && *p != '.' && *p != '_' && *p != '-' &&
+ *p != '~' && !IsAsciiAlphanumeric(*p))
+ lstyle = 0;
+ pos++;
+ p++;
+ }
+ if (lstyle && pos < (toklen[0] - 1)) {
+ /* ']' was found and there is at least one character after it */
+ NS_ASSERTION(*p == ']', "unexpected state");
+ pos++;
+ p++;
+ tokmarker = pos; /* length of leading "[DIR1.DIR2.etc]" */
+ } else {
+ /* not a CMU style listing */
+ lstyle = 0;
+ }
+ }
+ while (lstyle && pos < toklen[0] && *p != ';') {
+ if (*p != '$' && *p != '.' && *p != '_' && *p != '-' && *p != '~' &&
+ !IsAsciiAlphanumeric(*p))
+ lstyle = 0;
+ else if (IsAsciiLowercaseAlpha(*p))
+ lstyle = 0;
+ p++;
+ pos++;
+ }
+ if (lstyle && *p == ';') {
+ if (pos == 0 || pos == (toklen[0] - 1)) lstyle = 0;
+ for (pos++; lstyle && pos < toklen[0]; pos++) {
+ if (!IsAsciiDigit(tokens[0][pos])) lstyle = 0;
+ }
+ }
+ pos = (p - tokens[0]); /* => fnlength sans ";####" */
+ pos -= tokmarker; /* => fnlength sans "[DIR1.DIR2.etc]" */
+ p = &(tokens[0][tokmarker]); /* offset of basename */
+
+ if (!lstyle || pos == 0 ||
+ pos > 80) /* VMS filenames can't be longer than that */
+ {
+ lstyle = 0;
+ } else if (numtoks == 1) {
+ /* if VMS has been detected and there is only one token and that
+ * token was a VMS filename then this is a multiline VMS LIST entry.
+ */
+ if (pos >= (sizeof(state->carry_buf) - 1))
+ pos = (sizeof(state->carry_buf) - 1); /* shouldn't happen */
+ memcpy(state->carry_buf, p, pos);
+ state->carry_buf_len = pos;
+ return '?'; /* tell caller to treat as junk */
+ } else if (IsAsciiDigit(*tokens[1])) /* not no-privs message */
+ {
+ for (pos = 0; lstyle && pos < (toklen[1]); pos++) {
+ if (!IsAsciiDigit((tokens[1][pos])) && (tokens[1][pos]) != '/')
+ lstyle = 0;
+ }
+ if (lstyle && numtoks > 4) /* Multinet or UCX but not CMU */
+ {
+ for (pos = 1; lstyle && pos < (toklen[5] - 1); pos++) {
+ p = &(tokens[5][pos]);
+ if (*p != 'R' && *p != 'W' && *p != 'E' && *p != 'D' &&
+ *p != ',')
+ lstyle = 0;
+ }
+ }
+ }
+ } /* passed initial tests */
+ } /* else if ((tokens[0][toklen[0]-1]) != ';') */
+
+ if (lstyle == 'V') {
+ state->parsed_one = 1;
+ state->lstyle = lstyle;
+
+ if (IsAsciiDigit(*tokens[1])) /* not permission denied etc */
+ {
+ /* strip leading directory name */
+ if (*tokens[0] == '[') /* CMU server */
+ {
+ pos = toklen[0] - 1;
+ p = tokens[0] + 1;
+ while (*p != ']') {
+ p++;
+ pos--;
+ }
+ toklen[0] = --pos;
+ tokens[0] = ++p;
+ }
+ pos = 0;
+ while (pos < toklen[0] && (tokens[0][pos]) != ';') pos++;
+
+ result->fe_cinfs = 1;
+ result->fe_type = 'f';
+ result->fe_fname = tokens[0];
+ result->fe_fnlen = pos;
+
+ if (pos > 4) {
+ p = &(tokens[0][pos - 4]);
+ if (p[0] == '.' && p[1] == 'D' && p[2] == 'I' && p[3] == 'R') {
+ result->fe_fnlen -= 4;
+ result->fe_type = 'd';
+ }
+ }
+
+ if (result->fe_type != 'd') {
+ /* #### or used/allocated form. If used/allocated form, then
+ * 'used' is the size in bytes if and only if 'used'<=allocated.
+ * If 'used' is size in bytes then it can be > 2^32
+ * If 'used' is not size in bytes then it is size in blocks.
+ */
+ pos = 0;
+ while (pos < toklen[1] && (tokens[1][pos]) != '/') pos++;
+
+ /*
+ * On OpenVMS, the size is given in blocks. A block is 512
+ * bytes. This can only approximate the size of the file,
+ * but that's better than not showing a size at all.
+ * numBlocks is clamped to UINT32_MAX to make 32-bit and
+ * 64-bit builds return consistent results.
+ */
+ uint64_t numBlocks = strtoul(tokens[1], nullptr, 10);
+ numBlocks = std::min(numBlocks, (uint64_t)UINT32_MAX);
+ uint64_t fileSize = numBlocks * 512;
+ SprintfLiteral(result->fe_size, "%" PRIu64, fileSize);
+ } /* if (result->fe_type != 'd') */
+
+ p = tokens[2] + 2;
+ if (*p == '-') p++;
+ tbuf[0] = p[0];
+ tbuf[1] = ToLowerCaseASCII(p[1]);
+ tbuf[2] = ToLowerCaseASCII(p[2]);
+ month_num = 0;
+ for (pos = 0; pos < (12 * 3); pos += 3) {
+ if (tbuf[0] == month_names[pos + 0] &&
+ tbuf[1] == month_names[pos + 1] &&
+ tbuf[2] == month_names[pos + 2])
+ break;
+ month_num++;
+ }
+ if (month_num >= 12) month_num = 0;
+ result->fe_time.tm_month = month_num;
+ result->fe_time.tm_mday = atoi(tokens[2]);
+ result->fe_time.tm_year = atoi(p + 4); // NSPR wants year as XXXX
+
+ p = tokens[3] + 2;
+ if (*p == ':') p++;
+ if (p[2] == ':') result->fe_time.tm_sec = atoi(p + 3);
+ result->fe_time.tm_hour = atoi(tokens[3]);
+ result->fe_time.tm_min = atoi(p);
+
+ return result->fe_type;
+
+ } /* if (IsAsciiDigit(*tokens[1])) */
+
+ return '?'; /* junk */
+
+ } /* if (lstyle == 'V') */
+ } /* if (!lstyle && (!state->lstyle || state->lstyle == 'V')) */
+#endif
+
+ /* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */
+
+#if defined(SUPPORT_CMS)
+ /* Virtual Machine/Conversational Monitor System (IBM Mainframe) */
+ if (!lstyle && (!state->lstyle || state->lstyle == 'C')) /* VM/CMS */
+ {
+ /* LISTing according to mirror.pl
+ * Filename FileType Fm Format Lrecl Records Blocks Date Time
+ * LASTING GLOBALV A1 V 41 21 1 9/16/91 15:10:32
+ * J43401 NETLOG A0 V 77 1 1 9/12/91 12:36:04
+ * PROFILE EXEC A1 V 17 3 1 9/12/91 12:39:07
+ * DIRUNIX SCRIPT A1 V 77 1216 17 1/04/93 20:30:47
+ * MAIL PROFILE A2 F 80 1 1 10/14/92 16:12:27
+ * BADY2K TEXT A0 V 1 1 1 1/03/102 10:11:12
+ * AUTHORS A1 DIR - - - 9/20/99 10:31:11
+ *
+ * LISTing from vm.marist.edu and vm.sc.edu
+ * 220-FTPSERVE IBM VM Level 420 at VM.MARIST.EDU, 04:58:12 EDT WEDNESDAY
+ * 2002-07-10 AUTHORS DIR - - -
+ * 1999-09-20 10:31:11 - HARRINGTON DIR - - -
+ * 1997-02-12 15:33:28 - PICS DIR - - -
+ * 2000-10-12 15:43:23 - SYSFILE DIR - - -
+ * 2000-07-20 17:48:01 - WELCNVT EXEC V 72 9 1
+ * 1999-09-20 17:16:18 - WELCOME EREADME F 80 21 1
+ * 1999-12-27 16:19:00 - WELCOME README V 82 21 1
+ * 1999-12-27 16:19:04 - README ANONYMOU V 71 26 1
+ * 1997-04-02 12:33:20 TCP291 README ANONYOLD V 71 15 1
+ * 1995-08-25 16:04:27 TCP291
+ */
+ if (numtoks >= 7 && (toklen[0] + toklen[1]) <= 16) {
+ for (pos = 1; !lstyle && (pos + 5) < numtoks; pos++) {
+ p = tokens[pos];
+ if ((toklen[pos] == 1 && (*p == 'F' || *p == 'V')) ||
+ (toklen[pos] == 3 && *p == 'D' && p[1] == 'I' && p[2] == 'R')) {
+ if (toklen[pos + 5] == 8 && (tokens[pos + 5][2]) == ':' &&
+ (tokens[pos + 5][5]) == ':') {
+ p = tokens[pos + 4];
+ if ((toklen[pos + 4] == 10 && p[4] == '-' && p[7] == '-') ||
+ (toklen[pos + 4] >= 7 && toklen[pos + 4] <= 9 &&
+ p[((p[1] != '/') ? (2) : (1))] == '/' &&
+ p[((p[1] != '/') ? (5) : (4))] == '/'))
+ /* Y2K bugs possible ("7/06/102" or "13/02/101") */
+ {
+ if ((*tokens[pos + 1] == '-' && *tokens[pos + 2] == '-' &&
+ *tokens[pos + 3] == '-') ||
+ (IsAsciiDigit(*tokens[pos + 1]) &&
+ IsAsciiDigit(*tokens[pos + 2]) &&
+ IsAsciiDigit(*tokens[pos + 3]))) {
+ lstyle = 'C';
+ tokmarker = pos;
+ }
+ }
+ }
+ }
+ } /* for (pos = 1; !lstyle && (pos+5) < numtoks; pos++) */
+ } /* if (numtoks >= 7) */
+
+ /* extra checking if first pass */
+ if (lstyle && !state->lstyle) {
+ for (pos = 0, p = tokens[0]; lstyle && pos < toklen[0]; pos++, p++) {
+ if (IsAsciiLowercaseAlpha(*p)) lstyle = 0;
+ }
+ for (pos = tokmarker + 1; pos <= tokmarker + 3; pos++) {
+ if (!(toklen[pos] == 1 && *tokens[pos] == '-')) {
+ for (p = tokens[pos]; lstyle && p < (tokens[pos] + toklen[pos]);
+ p++) {
+ if (!IsAsciiDigit(*p)) lstyle = 0;
+ }
+ }
+ }
+ for (pos = 0, p = tokens[tokmarker + 4];
+ lstyle && pos < toklen[tokmarker + 4]; pos++, p++) {
+ if (*p == '/') {
+ /* There may be Y2K bugs in the date. Don't simplify to
+ * pos != (len-3) && pos != (len-6) like time is done.
+ */
+ if ((tokens[tokmarker + 4][1]) == '/') {
+ if (pos != 1 && pos != 4) lstyle = 0;
+ } else if (pos != 2 && pos != 5)
+ lstyle = 0;
+ } else if (*p != '-' && !IsAsciiDigit(*p))
+ lstyle = 0;
+ else if (*p == '-' && pos != 4 && pos != 7)
+ lstyle = 0;
+ }
+ for (pos = 0, p = tokens[tokmarker + 5];
+ lstyle && pos < toklen[tokmarker + 5]; pos++, p++) {
+ if (*p != ':' && !IsAsciiDigit(*p))
+ lstyle = 0;
+ else if (*p == ':' && pos != (toklen[tokmarker + 5] - 3) &&
+ pos != (toklen[tokmarker + 5] - 6))
+ lstyle = 0;
+ }
+ } /* initial if() */
+
+ if (lstyle == 'C') {
+ state->parsed_one = 1;
+ state->lstyle = lstyle;
+
+ p = tokens[tokmarker + 4];
+ if (toklen[tokmarker + 4] == 10) /* newstyle: YYYY-MM-DD format */
+ {
+ result->fe_time.tm_year = atoi(p + 0);
+ result->fe_time.tm_month = atoi(p + 5) - 1;
+ result->fe_time.tm_mday = atoi(p + 8);
+ } else /* oldstyle: [M]M/DD/YY format */
+ {
+ pos = toklen[tokmarker + 4];
+ result->fe_time.tm_month = atoi(p) - 1;
+ result->fe_time.tm_mday = atoi((p + pos) - 5);
+ result->fe_time.tm_year = atoi((p + pos) - 2);
+ FixupYear(&result->fe_time);
+ }
+
+ p = tokens[tokmarker + 5];
+ pos = toklen[tokmarker + 5];
+ result->fe_time.tm_hour = atoi(p);
+ result->fe_time.tm_min = atoi((p + pos) - 5);
+ result->fe_time.tm_sec = atoi((p + pos) - 2);
+
+ result->fe_cinfs = 1;
+ result->fe_fname = tokens[0];
+ result->fe_fnlen = toklen[0];
+ result->fe_type = 'f';
+
+ p = tokens[tokmarker];
+ if (toklen[tokmarker] == 3 && *p == 'D' && p[1] == 'I' && p[2] == 'R')
+ result->fe_type = 'd';
+
+ if ((/*newstyle*/ toklen[tokmarker + 4] == 10 && tokmarker > 1) ||
+ (/*oldstyle*/ toklen[tokmarker + 4] != 10 &&
+ tokmarker > 2)) { /* have a filetype column */
+ char* dot;
+ p = &(tokens[0][toklen[0]]);
+ memcpy(&dot, &p, sizeof(dot)); /* NASTY! */
+ *dot++ = '.';
+ p = tokens[1];
+ for (pos = 0; pos < toklen[1]; pos++) *dot++ = *p++;
+ result->fe_fnlen += 1 + toklen[1];
+ }
+
+ /* oldstyle LISTING:
+ * files/dirs not on the 'A' minidisk are not RETRievable/CHDIRable
+ if (toklen[tokmarker+4] != 10 && *tokens[tokmarker-1] != 'A')
+ return '?';
+ */
+
+ /* VM/CMS LISTings have no usable filesize field.
+ * Have to use the 'SIZE' command for that.
+ */
+ return result->fe_type;
+
+ } /* if (lstyle == 'C' && (!state->lstyle || state->lstyle == lstyle)) */
+ } /* VM/CMS */
+#endif
+
+ /* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */
+
+#if defined(SUPPORT_DOS) /* WinNT DOS dirstyle */
+ if (!lstyle && (!state->lstyle || state->lstyle == 'W')) {
+ // clang-format off
+ /*
+ * "10-23-00 01:27PM <DIR> veronist"
+ * "06-15-00 07:37AM <DIR> zoe"
+ * "07-14-00 01:35PM 2094926 canprankdesk.tif"
+ * "07-21-00 01:19PM 95077 Jon Kauffman Enjoys the Good Life.jpg"
+ * "07-21-00 01:19PM 52275 Name Plate.jpg"
+ * "07-14-00 01:38PM 2250540 Valentineoffprank-HiRes.jpg"
+ */
+ // Microsoft FTP server with FtpDirBrowseShowLongDate set returns year
+ // in 4-digit format:
+ // "10-10-2014 10:10AM <DIR> FTP"
+ // Windows CE FTP server returns time in 24-hour format:
+ // "05-03-13 22:01 <DIR> APPS"
+ // clang-format on
+ if ((numtoks >= 4) && (toklen[0] == 8 || toklen[0] == 10) &&
+ (toklen[1] == 5 || toklen[1] == 7) &&
+ (*tokens[2] == '<' || IsAsciiDigit(*tokens[2]))) {
+ p = tokens[0];
+ if (IsAsciiDigit(p[0]) && IsAsciiDigit(p[1]) && p[2] == '-' &&
+ IsAsciiDigit(p[3]) && IsAsciiDigit(p[4]) && p[5] == '-' &&
+ IsAsciiDigit(p[6]) && IsAsciiDigit(p[7])) {
+ p = tokens[1];
+ if (IsAsciiDigit(p[0]) && IsAsciiDigit(p[1]) && p[2] == ':' &&
+ IsAsciiDigit(p[3]) && IsAsciiDigit(p[4]) &&
+ (toklen[1] == 5 ||
+ (toklen[1] == 7 && (p[5] == 'A' || p[5] == 'P') &&
+ p[6] == 'M'))) {
+ lstyle = 'W';
+ if (!state->lstyle) {
+ p = tokens[2];
+ /* <DIR> or <JUNCTION> */
+ if (*p != '<' || p[toklen[2] - 1] != '>') {
+ for (pos = 1; (lstyle && pos < toklen[2]); pos++) {
+ if (!IsAsciiDigit(*++p)) lstyle = 0;
+ }
+ }
+ }
+ }
+ }
+ }
+
+ if (lstyle == 'W') {
+ state->parsed_one = 1;
+ state->lstyle = lstyle;
+
+ p = &(line[linelen]); /* line end */
+ result->fe_cinfs = 1;
+ result->fe_fname = tokens[3];
+ result->fe_fnlen = p - tokens[3];
+ result->fe_type = 'd';
+
+ if (*tokens[2] != '<') /* not <DIR> or <JUNCTION> */
+ {
+ // try to handle correctly spaces at the beginning of the filename
+ // filesize (token[2]) must end at offset 38
+ if (tokens[2] + toklen[2] - line == 38) {
+ result->fe_fname = &(line[39]);
+ result->fe_fnlen = p - result->fe_fname;
+ }
+ result->fe_type = 'f';
+ pos = toklen[2];
+ if (pos > (sizeof(result->fe_size) - 1)) {
+ pos = (sizeof(result->fe_size) - 1);
+ }
+ memcpy(result->fe_size, tokens[2], pos);
+ result->fe_size[pos] = '\0';
+ } else {
+ // try to handle correctly spaces at the beginning of the filename
+ // token[2] must begin at offset 24, the length is 5 or 10
+ // token[3] must begin at offset 39 or higher
+ if (tokens[2] - line == 24 && (toklen[2] == 5 || toklen[2] == 10) &&
+ tokens[3] - line >= 39) {
+ result->fe_fname = &(line[39]);
+ result->fe_fnlen = p - result->fe_fname;
+ }
+
+ if ((tokens[2][1]) != 'D') /* not <DIR> */
+ {
+ result->fe_type = '?'; /* unknown until junc for sure */
+ if (result->fe_fnlen > 4) {
+ p = result->fe_fname;
+ for (pos = result->fe_fnlen - 4; pos > 0; pos--) {
+ if (p[0] == ' ' && p[3] == ' ' && p[2] == '>' &&
+ (p[1] == '=' || p[1] == '-')) {
+ result->fe_type = 'l';
+ result->fe_fnlen = p - result->fe_fname;
+ result->fe_lname = p + 4;
+ result->fe_lnlen = &(line[linelen]) - result->fe_lname;
+ break;
+ }
+ p++;
+ }
+ }
+ }
+ }
+
+ result->fe_time.tm_month = atoi(tokens[0] + 0);
+ if (result->fe_time.tm_month != 0) {
+ result->fe_time.tm_month--;
+ result->fe_time.tm_mday = atoi(tokens[0] + 3);
+ result->fe_time.tm_year = atoi(tokens[0] + 6);
+ FixupYear(&result->fe_time);
+ }
+
+ result->fe_time.tm_hour = atoi(tokens[1] + 0);
+ result->fe_time.tm_min = atoi(tokens[1] + 3);
+ if (toklen[1] == 7) {
+ if ((tokens[1][5]) == 'P' && result->fe_time.tm_hour < 12)
+ result->fe_time.tm_hour += 12;
+ else if ((tokens[1][5]) == 'A' && result->fe_time.tm_hour == 12)
+ result->fe_time.tm_hour = 0;
+ }
+
+ /* the caller should do this (if dropping "." and ".." is desired)
+ if (result->fe_type == 'd' && result->fe_fname[0] == '.' &&
+ (result->fe_fnlen == 1 || (result->fe_fnlen == 2 &&
+ result->fe_fname[1] == '.')))
+ return '?';
+ */
+
+ return result->fe_type;
+ } /* if (lstyle == 'W' && (!state->lstyle || state->lstyle == lstyle)) */
+ } /* if (!lstyle && (!state->lstyle || state->lstyle == 'W')) */
+#endif
+
+ /* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */
+
+#if defined(SUPPORT_OS2)
+ if (!lstyle && (!state->lstyle || state->lstyle == 'O')) /* OS/2 test */
+ {
+ /* 220 server IBM TCP/IP for OS/2 - FTP Server ver 23:04:36 on Jan 15 1997
+ *ready. fixed position, space padded columns. I have only a vague idea of
+ *what the contents between col 18 and 34 might be: All I can infer is
+ *that there may be attribute flags in there and there may be a " DIR" in
+ *there.
+ *
+ * 1 2 3 4 5 6
+ *0123456789012345678901234567890123456789012345678901234567890123456789
+ *----- size -------|??????????????? MM-DD-YY| HH:MM| nnnnnnnnn....
+ * 0 DIR 04-11-95 16:26 .
+ * 0 DIR 04-11-95 16:26 ..
+ * 0 DIR 04-11-95 16:26 ADDRESS
+ * 612 RHSA 07-28-95 16:45 air_tra1.bag
+ * 195 A 08-09-95 10:23 Alfa1.bag
+ * 0 RHS DIR 04-11-95 16:26 ATTACH
+ * 372 A 08-09-95 10:26 Aussie_1.bag
+ * 310992 06-28-94 09:56 INSTALL.EXE
+ * 1 2 3 4
+ * 01234567890123456789012345678901234567890123456789
+ * dirlist from the mirror.pl project, col positions from Mozilla.
+ */
+ p = &(line[toklen[0]]);
+ /* \s(\d\d-\d\d-\d\d)\s+(\d\d:\d\d)\s */
+ if (numtoks >= 4 && toklen[0] <= 18 && IsAsciiDigit(*tokens[0]) &&
+ (linelen - toklen[0]) >= (54 - 18) && p[18 - 18] == ' ' &&
+ p[34 - 18] == ' ' && p[37 - 18] == '-' && p[40 - 18] == '-' &&
+ p[43 - 18] == ' ' && p[45 - 18] == ' ' && p[48 - 18] == ':' &&
+ p[51 - 18] == ' ' && IsAsciiDigit(p[35 - 18]) &&
+ IsAsciiDigit(p[36 - 18]) && IsAsciiDigit(p[38 - 18]) &&
+ IsAsciiDigit(p[39 - 18]) && IsAsciiDigit(p[41 - 18]) &&
+ IsAsciiDigit(p[42 - 18]) && IsAsciiDigit(p[46 - 18]) &&
+ IsAsciiDigit(p[47 - 18]) && IsAsciiDigit(p[49 - 18]) &&
+ IsAsciiDigit(p[50 - 18]) &&
+ (linelen_sans_wsp - toklen[0]) > (53 - 18)) {
+ lstyle = 'O'; /* OS/2 */
+ if (!state->lstyle) {
+ for (pos = 1; lstyle && pos < toklen[0]; pos++) {
+ if (!IsAsciiDigit(tokens[0][pos])) lstyle = 0;
+ }
+ }
+ }
+
+ if (lstyle == 'O') {
+ state->parsed_one = 1;
+ state->lstyle = lstyle;
+
+ p = &(line[toklen[0]]);
+
+ result->fe_cinfs = 1;
+ result->fe_fname = &p[53 - 18];
+ result->fe_fnlen = (&(line[linelen_sans_wsp])) - (result->fe_fname);
+ result->fe_type = 'f';
+
+ /* I don't have a real listing to determine exact pos, so scan. */
+ for (pos = (18 - 18); pos < ((35 - 18) - 4); pos++) {
+ if (p[pos + 0] == ' ' && p[pos + 1] == 'D' && p[pos + 2] == 'I' &&
+ p[pos + 3] == 'R') {
+ result->fe_type = 'd';
+ break;
+ }
+ }
+
+ if (result->fe_type != 'd') {
+ pos = toklen[0];
+ if (pos > (sizeof(result->fe_size) - 1))
+ pos = (sizeof(result->fe_size) - 1);
+ memcpy(result->fe_size, tokens[0], pos);
+ result->fe_size[pos] = '\0';
+ }
+
+ result->fe_time.tm_month = atoi(&p[35 - 18]) - 1;
+ result->fe_time.tm_mday = atoi(&p[38 - 18]);
+ result->fe_time.tm_year = atoi(&p[41 - 18]);
+ FixupYear(&result->fe_time);
+ result->fe_time.tm_hour = atoi(&p[46 - 18]);
+ result->fe_time.tm_min = atoi(&p[49 - 18]);
+
+ /* the caller should do this (if dropping "." and ".." is desired)
+ if (result->fe_type == 'd' && result->fe_fname[0] == '.' &&
+ (result->fe_fnlen == 1 || (result->fe_fnlen == 2 &&
+ result->fe_fname[1] == '.')))
+ return '?';
+ */
+
+ return result->fe_type;
+ } /* if (lstyle == 'O') */
+
+ } /* if (!lstyle && (!state->lstyle || state->lstyle == 'O')) */
+#endif
+
+ /* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */
+
+#if defined(SUPPORT_LSL)
+ if (!lstyle && (!state->lstyle || state->lstyle == 'U')) /* /bin/ls & co. */
+ {
+ /* UNIX-style listing, without inum and without blocks
+ * "-rw-r--r-- 1 root other 531 Jan 29 03:26 README"
+ * "dr-xr-xr-x 2 root other 512 Apr 8 1994 etc"
+ * "dr-xr-xr-x 2 root 512 Apr 8 1994 etc"
+ * "lrwxrwxrwx 1 root other 7 Jan 25 00:17 bin -> usr/bin"
+ * Also produced by Microsoft's FTP servers for Windows:
+ * "---------- 1 owner group 1803128 Jul 10 10:18 ls-lR.Z"
+ * "d--------- 1 owner group 0 May 9 19:45 Softlib"
+ * Also WFTPD for MSDOS:
+ * "-rwxrwxrwx 1 noone nogroup 322 Aug 19 1996 message.ftp"
+ * Hellsoft for NetWare:
+ * "d[RWCEMFA] supervisor 512 Jan 16 18:53 login"
+ * "-[RWCEMFA] rhesus 214059 Oct 20 15:27 cx.exe"
+ * Newer Hellsoft for NetWare: (netlab2.usu.edu)
+ * - [RWCEAFMS] NFAUUser 192 Apr 27 15:21 HEADER.html
+ * d [RWCEAFMS] jrd 512 Jul 11 03:01 allupdates
+ * Also NetPresenz for the Mac:
+ * "-------r-- 326 1391972 1392298 Nov 22 1995 MegaPhone.sit"
+ * "drwxrwxr-x folder 2 May 10 1996 network"
+ * Protected directory:
+ * "drwx-wx-wt 2 root wheel 512 Jul 1 02:15 incoming"
+ * uid/gid instead of username/groupname:
+ * "drwxr-xr-x 2 0 0 512 May 28 22:17 etc"
+ */
+
+ bool is_old_Hellsoft = false;
+
+ if (numtoks >= 6) {
+ /* there are two perm formats (Hellsoft/NetWare and *IX strmode(3)).
+ * Scan for size column only if the perm format is one or the other.
+ */
+ if (toklen[0] == 1 || (tokens[0][1]) == '[') {
+ if (*tokens[0] == 'd' || *tokens[0] == '-') {
+ pos = toklen[0] - 1;
+ p = tokens[0] + 1;
+ if (pos == 0) {
+ p = tokens[1];
+ pos = toklen[1];
+ }
+ if ((pos == 9 || pos == 10) && (*p == '[' && p[pos - 1] == ']') &&
+ (p[1] == 'R' || p[1] == '-') && (p[2] == 'W' || p[2] == '-') &&
+ (p[3] == 'C' || p[3] == '-') && (p[4] == 'E' || p[4] == '-')) {
+ /* rest is FMA[S] or AFM[S] */
+ lstyle = 'U'; /* very likely one of the NetWare servers */
+ if (toklen[0] == 10) is_old_Hellsoft = true;
+ }
+ }
+ } else if ((toklen[0] == 10 || toklen[0] == 11) &&
+ strchr("-bcdlpsw?DFam", *tokens[0])) {
+ p = &(tokens[0][1]);
+ if ((p[0] == 'r' || p[0] == '-') && (p[1] == 'w' || p[1] == '-') &&
+ (p[3] == 'r' || p[3] == '-') && (p[4] == 'w' || p[4] == '-') &&
+ (p[6] == 'r' || p[6] == '-') && (p[7] == 'w' || p[7] == '-'))
+ /* 'x'/p[9] can be S|s|x|-|T|t or implementation specific */
+ {
+ lstyle = 'U'; /* very likely /bin/ls */
+ }
+ }
+ }
+ if (lstyle == 'U') /* first token checks out */
+ {
+ lstyle = 0;
+ for (pos = (numtoks - 5); !lstyle && pos > 1; pos--) {
+ /* scan for: (\d+)\s+([A-Z][a-z][a-z])\s+
+ * (\d\d\d\d|\d\:\d\d|\d\d\:\d\d|\d\:\d\d\:\d\d|\d\d\:\d\d\:\d\d)
+ * \s+(.+)$
+ */
+ if (IsAsciiDigit(*tokens[pos]) /* size */
+ /* (\w\w\w) */
+ && toklen[pos + 1] == 3 && IsAsciiAlpha(*tokens[pos + 1]) &&
+ IsAsciiAlpha(tokens[pos + 1][1]) &&
+ IsAsciiAlpha(tokens[pos + 1][2])
+ /* (\d|\d\d) */
+ && IsAsciiDigit(*tokens[pos + 2]) &&
+ (toklen[pos + 2] == 1 ||
+ (toklen[pos + 2] == 2 && IsAsciiDigit(tokens[pos + 2][1]))) &&
+ toklen[pos + 3] >= 4 &&
+ IsAsciiDigit(*tokens[pos + 3])
+ /* (\d\:\d\d\:\d\d|\d\d\:\d\d\:\d\d) */
+ && (toklen[pos + 3] <= 5 ||
+ ((toklen[pos + 3] == 7 || toklen[pos + 3] == 8) &&
+ (tokens[pos + 3][toklen[pos + 3] - 3]) == ':')) &&
+ IsAsciiDigit(tokens[pos + 3][toklen[pos + 3] - 2]) &&
+ IsAsciiDigit(tokens[pos + 3][toklen[pos + 3] - 1]) &&
+ (
+ /* (\d\d\d\d) */
+ ((toklen[pos + 3] == 4 || toklen[pos + 3] == 5) &&
+ IsAsciiDigit(tokens[pos + 3][1]) &&
+ IsAsciiDigit(tokens[pos + 3][2]))
+ /* (\d\:\d\d|\d\:\d\d\:\d\d) */
+ || ((toklen[pos + 3] == 4 || toklen[pos + 3] == 7) &&
+ (tokens[pos + 3][1]) == ':' &&
+ IsAsciiDigit(tokens[pos + 3][2]) &&
+ IsAsciiDigit(tokens[pos + 3][3]))
+ /* (\d\d\:\d\d|\d\d\:\d\d\:\d\d) */
+ || ((toklen[pos + 3] == 5 || toklen[pos + 3] == 8) &&
+ IsAsciiDigit(tokens[pos + 3][1]) &&
+ (tokens[pos + 3][2]) == ':' &&
+ IsAsciiDigit(tokens[pos + 3][3]) &&
+ IsAsciiDigit(tokens[pos + 3][4])))) {
+ lstyle = 'U'; /* assume /bin/ls or variant format */
+ tokmarker = pos;
+
+ /* check that size is numeric */
+ p = tokens[tokmarker];
+ unsigned int i;
+ for (i = 0; i < toklen[tokmarker]; i++) {
+ if (!IsAsciiDigit(*p++)) {
+ lstyle = 0;
+ break;
+ }
+ }
+ if (lstyle) {
+ month_num = 0;
+ p = tokens[tokmarker + 1];
+ for (i = 0; i < (12 * 3); i += 3) {
+ if (p[0] == month_names[i + 0] && p[1] == month_names[i + 1] &&
+ p[2] == month_names[i + 2])
+ break;
+ month_num++;
+ }
+ if (month_num >= 12) lstyle = 0;
+ }
+ } /* relative position test */
+ } /* for (pos = (numtoks-5); !lstyle && pos > 1; pos--) */
+ } /* if (lstyle == 'U') */
+
+ if (lstyle == 'U') {
+ state->parsed_one = 1;
+ state->lstyle = lstyle;
+
+ result->fe_cinfs = 0;
+ result->fe_type = '?';
+ if (*tokens[0] == 'd' || *tokens[0] == 'l')
+ result->fe_type = *tokens[0];
+ else if (*tokens[0] == 'D')
+ result->fe_type = 'd';
+ else if (*tokens[0] == '-' || *tokens[0] == 'F')
+ result->fe_type = 'f'; /* (hopefully a regular file) */
+
+ if (result->fe_type != 'd') {
+ pos = toklen[tokmarker];
+ if (pos > (sizeof(result->fe_size) - 1))
+ pos = (sizeof(result->fe_size) - 1);
+ memcpy(result->fe_size, tokens[tokmarker], pos);
+ result->fe_size[pos] = '\0';
+ }
+
+ result->fe_time.tm_month = month_num;
+ result->fe_time.tm_mday = atoi(tokens[tokmarker + 2]);
+ if (result->fe_time.tm_mday == 0) result->fe_time.tm_mday++;
+
+ p = tokens[tokmarker + 3];
+ pos = (unsigned int)atoi(p);
+ if (p[1] == ':') /* one digit hour */
+ p--;
+ if (p[2] != ':') /* year */
+ {
+ result->fe_time.tm_year = pos;
+ } else {
+ result->fe_time.tm_hour = pos;
+ result->fe_time.tm_min = atoi(p + 3);
+ if (p[5] == ':') result->fe_time.tm_sec = atoi(p + 6);
+
+ if (!state->now_time) {
+ state->now_time = nowTimeFn();
+ PR_ExplodeTime((state->now_time), timeParam, &(state->now_tm));
+ }
+
+ result->fe_time.tm_year = state->now_tm.tm_year;
+ if (((state->now_tm.tm_month << 5) + state->now_tm.tm_mday) <
+ ((result->fe_time.tm_month << 5) + result->fe_time.tm_mday))
+ result->fe_time.tm_year--;
+
+ } /* time/year */
+
+ // The length of the whole date string should be 12. On AIX the length
+ // is only 11 when the year is present in the date string and there is
+ // 1 padding space at the end of the string. In both cases the filename
+ // starts at offset 13 from the start of the date string.
+ // Don't care about leading spaces when the date string has different
+ // format or when old Hellsoft output was detected.
+ {
+ const char* date_start = tokens[tokmarker + 1];
+ const char* date_end = tokens[tokmarker + 3] + toklen[tokmarker + 3];
+ if (!is_old_Hellsoft &&
+ ((date_end - date_start) == 12 ||
+ ((date_end - date_start) == 11 && date_end[1] == ' ')))
+ result->fe_fname = date_start + 13;
+ else
+ result->fe_fname = tokens[tokmarker + 4];
+ }
+
+ result->fe_fnlen = (&(line[linelen])) - (result->fe_fname);
+
+ if (result->fe_type == 'l' && result->fe_fnlen > 4) {
+ /* First try to use result->fe_size to find " -> " sequence.
+ This can give proper result for cases like "aaa -> bbb -> ccc". */
+ uintptr_t fe_size = atoi(result->fe_size);
+ CheckedInt<uintptr_t> arrow_start(result->fe_fnlen);
+ arrow_start -= fe_size;
+ arrow_start -= 4;
+
+ if (arrow_start.isValid() &&
+ PL_strncmp(result->fe_fname + arrow_start.value(), " -> ", 4) ==
+ 0) {
+ result->fe_lname = result->fe_fname + (result->fe_fnlen - fe_size);
+ result->fe_lnlen = (&(line[linelen])) - (result->fe_lname);
+ result->fe_fnlen = arrow_start.value();
+ } else {
+ /* Search for sequence " -> " from the end for case when there are
+ more occurrences. F.e. if ftpd returns "a -> b -> c" assume
+ "a -> b" as a name. Powerusers can remove unnecessary parts
+ manually but there is no way to follow the link when some
+ essential part is missing. */
+ p = result->fe_fname + (result->fe_fnlen - 5);
+ for (pos = (result->fe_fnlen - 5); pos > 0; pos--) {
+ if (PL_strncmp(p, " -> ", 4) == 0) {
+ result->fe_lname = p + 4;
+ result->fe_lnlen = (&(line[linelen])) - (result->fe_lname);
+ result->fe_fnlen = pos;
+ break;
+ }
+ p--;
+ }
+ }
+ }
+
+# if defined(SUPPORT_LSLF) /* some (very rare) servers return ls -lF */
+ if (result->fe_fnlen > 1) {
+ p = result->fe_fname[result->fe_fnlen - 1];
+ pos = result->fe_type;
+ if (pos == 'd') {
+ if (*p == '/') result->fe_fnlen--; /* directory */
+ } else if (pos == 'l') {
+ if (*p == '@') result->fe_fnlen--; /* symlink */
+ } else if (pos == 'f') {
+ if (*p == '*') result->fe_fnlen--; /* executable */
+ } else if (*p == '=' || *p == '%' || *p == '|') {
+ result->fe_fnlen--; /* socket, whiteout, fifo */
+ }
+ }
+# endif
+
+ /* the caller should do this (if dropping "." and ".." is desired)
+ if (result->fe_type == 'd' && result->fe_fname[0] == '.' &&
+ (result->fe_fnlen == 1 || (result->fe_fnlen == 2 &&
+ result->fe_fname[1] == '.')))
+ return '?';
+ */
+
+ return result->fe_type;
+
+ } /* if (lstyle == 'U') */
+
+ } /* if (!lstyle && (!state->lstyle || state->lstyle == 'U')) */
+#endif
+
+ /* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */
+
+#if defined(SUPPORT_W16) /* 16bit Windows */
+ if (!lstyle &&
+ (!state->lstyle ||
+ state->lstyle == 'w')) { /* old SuperTCP suite FTP server for Win3.1 */
+ /* old NetManage Chameleon TCP/IP suite FTP server for Win3.1 */
+ /*
+ * SuperTCP dirlist from the mirror.pl project
+ * mon/day/year separator may be '/' or '-'.
+ * . <DIR> 11-16-94 17:16
+ * .. <DIR> 11-16-94 17:16
+ * INSTALL <DIR> 11-16-94 17:17
+ * CMT <DIR> 11-21-94 10:17
+ * DESIGN1.DOC 11264 05-11-95 14:20
+ * README.TXT 1045 05-10-95 11:01
+ * WPKIT1.EXE 960338 06-21-95 17:01
+ * CMT.CSV 0 07-06-95 14:56
+ *
+ * Chameleon dirlist guessed from lynx
+ * . <DIR> Nov 16 1994 17:16
+ * .. <DIR> Nov 16 1994 17:16
+ * INSTALL <DIR> Nov 16 1994 17:17
+ * CMT <DIR> Nov 21 1994 10:17
+ * DESIGN1.DOC 11264 May 11 1995 14:20 A
+ * README.TXT 1045 May 10 1995 11:01
+ * WPKIT1.EXE 960338 Jun 21 1995 17:01 R
+ * CMT.CSV 0 Jul 06 1995 14:56 RHA
+ */
+ if (numtoks >= 4 && toklen[0] < 13 &&
+ ((toklen[1] == 5 && *tokens[1] == '<') || IsAsciiDigit(*tokens[1]))) {
+ if (numtoks == 4 && (toklen[2] == 8 || toklen[2] == 9) &&
+ (((tokens[2][2]) == '/' && (tokens[2][5]) == '/') ||
+ ((tokens[2][2]) == '-' && (tokens[2][5]) == '-')) &&
+ (toklen[3] == 4 || toklen[3] == 5) &&
+ (tokens[3][toklen[3] - 3]) == ':' && IsAsciiDigit(tokens[2][0]) &&
+ IsAsciiDigit(tokens[2][1]) && IsAsciiDigit(tokens[2][3]) &&
+ IsAsciiDigit(tokens[2][4]) && IsAsciiDigit(tokens[2][6]) &&
+ IsAsciiDigit(tokens[2][7]) &&
+ (toklen[2] < 9 || IsAsciiDigit(tokens[2][8])) &&
+ IsAsciiDigit(tokens[3][toklen[3] - 1]) &&
+ IsAsciiDigit(tokens[3][toklen[3] - 2]) &&
+ IsAsciiDigit(tokens[3][toklen[3] - 4]) &&
+ IsAsciiDigit(*tokens[3])) {
+ lstyle = 'w';
+ } else if ((numtoks == 6 || numtoks == 7) && toklen[2] == 3 &&
+ toklen[3] == 2 && toklen[4] == 4 && toklen[5] == 5 &&
+ (tokens[5][2]) == ':' && IsAsciiAlpha(tokens[2][0]) &&
+ IsAsciiAlpha(tokens[2][1]) && IsAsciiAlpha(tokens[2][2]) &&
+ IsAsciiDigit(tokens[3][0]) && IsAsciiDigit(tokens[3][1]) &&
+ IsAsciiDigit(tokens[4][0]) && IsAsciiDigit(tokens[4][1]) &&
+ IsAsciiDigit(tokens[4][2]) && IsAsciiDigit(tokens[4][3]) &&
+ IsAsciiDigit(tokens[5][0]) && IsAsciiDigit(tokens[5][1]) &&
+ IsAsciiDigit(tokens[5][3]) && IsAsciiDigit(tokens[5][4])
+ /* could also check that (&(tokens[5][5]) - tokens[2]) == 17
+ */
+ ) {
+ lstyle = 'w';
+ }
+ if (lstyle && state->lstyle != lstyle) /* first time */
+ {
+ p = tokens[1];
+ if (toklen[1] != 5 || p[0] != '<' || p[1] != 'D' || p[2] != 'I' ||
+ p[3] != 'R' || p[4] != '>') {
+ for (pos = 0; lstyle && pos < toklen[1]; pos++) {
+ if (!IsAsciiDigit(*p++)) lstyle = 0;
+ }
+ } /* not <DIR> */
+ } /* if (first time) */
+ } /* if (numtoks == ...) */
+
+ if (lstyle == 'w') {
+ state->parsed_one = 1;
+ state->lstyle = lstyle;
+
+ result->fe_cinfs = 1;
+ result->fe_fname = tokens[0];
+ result->fe_fnlen = toklen[0];
+ result->fe_type = 'd';
+
+ p = tokens[1];
+ if (IsAsciiDigit(*p)) {
+ result->fe_type = 'f';
+ pos = toklen[1];
+ if (pos > (sizeof(result->fe_size) - 1))
+ pos = sizeof(result->fe_size) - 1;
+ memcpy(result->fe_size, p, pos);
+ result->fe_size[pos] = '\0';
+ }
+
+ p = tokens[2];
+ if (toklen[2] == 3) /* Chameleon */
+ {
+ tbuf[0] = ToUpperCaseASCII(p[0]);
+ tbuf[1] = ToLowerCaseASCII(p[1]);
+ tbuf[2] = ToLowerCaseASCII(p[2]);
+ for (pos = 0; pos < (12 * 3); pos += 3) {
+ if (tbuf[0] == month_names[pos + 0] &&
+ tbuf[1] == month_names[pos + 1] &&
+ tbuf[2] == month_names[pos + 2]) {
+ result->fe_time.tm_month = pos / 3;
+ result->fe_time.tm_mday = atoi(tokens[3]);
+ result->fe_time.tm_year = atoi(tokens[4]);
+ break;
+ }
+ }
+ pos = 5; /* Chameleon toknum of date field */
+ } else {
+ result->fe_time.tm_month = atoi(p + 0) - 1;
+ result->fe_time.tm_mday = atoi(p + 3);
+ result->fe_time.tm_year = atoi(p + 6);
+ FixupYear(&result->fe_time); /* SuperTCP */
+
+ pos = 3; /* SuperTCP toknum of date field */
+ }
+
+ result->fe_time.tm_hour = atoi(tokens[pos]);
+ result->fe_time.tm_min = atoi(&(tokens[pos][toklen[pos] - 2]));
+
+ /* the caller should do this (if dropping "." and ".." is desired)
+ if (result->fe_type == 'd' && result->fe_fname[0] == '.' &&
+ (result->fe_fnlen == 1 || (result->fe_fnlen == 2 &&
+ result->fe_fname[1] == '.')))
+ return '?';
+ */
+
+ return result->fe_type;
+ } /* (lstyle == 'w') */
+
+ } /* if (!lstyle && (!state->lstyle || state->lstyle == 'w')) */
+#endif
+
+ /* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */
+
+#if defined(SUPPORT_DLS) /* dls -dtR */
+ if (!lstyle &&
+ (state->lstyle == 'D' || (!state->lstyle && state->numlines == 1)))
+ /* /bin/dls lines have to be immediately recognizable (first line) */
+ {
+ /* I haven't seen an FTP server that delivers a /bin/dls listing,
+ * but can infer the format from the lynx and mirror.pl projects.
+ * Both formats are supported.
+ *
+ * Lynx says:
+ * README 763 Information about this server\0
+ * bin/ - \0
+ * etc/ = \0
+ * ls-lR 0 \0
+ * ls-lR.Z 3 \0
+ * pub/ = Public area\0
+ * usr/ - \0
+ * morgan 14 -> ../real/morgan\0
+ * TIMIT.mostlikely.Z\0
+ * 79215 \0
+ *
+ * mirror.pl says:
+ * filename: ^(\S*)\s+
+ * size: (\-|\=|\d+)\s+
+ * month/day: ((\w\w\w\s+\d+|\d+\s+\w\w\w)\s+
+ * time/year: (\d+:\d+|\d\d\d\d))\s+
+ * rest: (.+)
+ *
+ * README 763 Jul 11 21:05 Information about this server
+ * bin/ - Apr 28 1994
+ * etc/ = 11 Jul 21:04
+ * ls-lR 0 6 Aug 17:14
+ * ls-lR.Z 3 05 Sep 1994
+ * pub/ = Jul 11 21:04 Public area
+ * usr/ - Sep 7 09:39
+ * morgan 14 Apr 18 09:39 -> ../real/morgan
+ * TIMIT.mostlikely.Z
+ * 79215 Jul 11 21:04
+ */
+ if (!state->lstyle && line[linelen - 1] == ':' && linelen >= 2 &&
+ toklen[numtoks - 1] != 1) {
+ /* code in mirror.pl suggests that a listing may be preceded
+ * by a PWD line in the form "/some/dir/names/here:"
+ * but does not necessarily begin with '/'. *sigh*
+ */
+ pos = 0;
+ p = line;
+ while (pos < (linelen - 1)) {
+ /* illegal (or extremely unusual) chars in a dirspec */
+ if (*p == '<' || *p == '|' || *p == '>' || *p == '?' || *p == '*' ||
+ *p == '\\')
+ break;
+ if (*p == '/' && pos < (linelen - 2) && p[1] == '/') break;
+ pos++;
+ p++;
+ }
+ if (pos == (linelen - 1)) {
+ state->lstyle = 'D';
+ return '?';
+ }
+ }
+
+ if (!lstyle && numtoks >= 2) {
+ pos = 22; /* pos of (\d+|-|=) if this is not part of a multiline */
+ if (state->lstyle && carry_buf_len) /* first is from previous line */
+ pos = toklen[1] - 1; /* and is 'as-is' (may contain whitespace) */
+
+ if (linelen > pos) {
+ p = &line[pos];
+ if ((*p == '-' || *p == '=' || IsAsciiDigit(*p)) &&
+ ((linelen == (pos + 1)) ||
+ (linelen >= (pos + 3) && p[1] == ' ' && p[2] == ' '))) {
+ tokmarker = 1;
+ if (!carry_buf_len) {
+ pos = 1;
+ while (pos < numtoks && (tokens[pos] + toklen[pos]) < (&line[23]))
+ pos++;
+ tokmarker = 0;
+ if ((tokens[pos] + toklen[pos]) == (&line[23])) tokmarker = pos;
+ }
+ if (tokmarker) {
+ lstyle = 'D';
+ if (*tokens[tokmarker] == '-' || *tokens[tokmarker] == '=') {
+ if (toklen[tokmarker] != 1 ||
+ (tokens[tokmarker - 1][toklen[tokmarker - 1] - 1]) != '/')
+ lstyle = 0;
+ } else {
+ for (pos = 0; lstyle && pos < toklen[tokmarker]; pos++) {
+ if (!IsAsciiDigit(tokens[tokmarker][pos])) lstyle = 0;
+ }
+ }
+ if (lstyle && !state->lstyle) /* first time */
+ {
+ /* scan for illegal (or incredibly unusual) chars in fname */
+ for (p = tokens[0];
+ lstyle &&
+ p < &(tokens[tokmarker - 1][toklen[tokmarker - 1]]);
+ p++) {
+ if (*p == '<' || *p == '|' || *p == '>' || *p == '?' ||
+ *p == '*' || *p == '/' || *p == '\\')
+ lstyle = 0;
+ }
+ }
+
+ } /* size token found */
+ } /* expected chars behind expected size token */
+ } /* if (linelen > pos) */
+ } /* if (!lstyle && numtoks >= 2) */
+
+ if (!lstyle && state->lstyle == 'D' && !carry_buf_len) {
+ /* the filename of a multi-line entry can be identified
+ * correctly only if dls format had been previously established.
+ * This should always be true because there should be entries
+ * for '.' and/or '..' and/or CWD that precede the rest of the
+ * listing.
+ */
+ pos = linelen;
+ if (pos > (sizeof(state->carry_buf) - 1))
+ pos = sizeof(state->carry_buf) - 1;
+ memcpy(state->carry_buf, line, pos);
+ state->carry_buf_len = pos;
+ return '?';
+ }
+
+ if (lstyle == 'D') {
+ state->parsed_one = 1;
+ state->lstyle = lstyle;
+
+ p = &(tokens[tokmarker - 1][toklen[tokmarker - 1]]);
+ result->fe_fname = tokens[0];
+ result->fe_fnlen = p - tokens[0];
+ result->fe_type = 'f';
+
+ if (result->fe_fname[result->fe_fnlen - 1] == '/') {
+ if (result->fe_lnlen == 1)
+ result->fe_type = '?';
+ else {
+ result->fe_fnlen--;
+ result->fe_type = 'd';
+ }
+ } else if (IsAsciiDigit(*tokens[tokmarker])) {
+ pos = toklen[tokmarker];
+ if (pos > (sizeof(result->fe_size) - 1))
+ pos = sizeof(result->fe_size) - 1;
+ memcpy(result->fe_size, tokens[tokmarker], pos);
+ result->fe_size[pos] = '\0';
+ }
+
+ if ((tokmarker + 3) < numtoks &&
+ (&(tokens[numtoks - 1][toklen[numtoks - 1]]) -
+ tokens[tokmarker + 1]) >= (1 + 1 + 3 + 1 + 4)) {
+ pos = (tokmarker + 3);
+ p = tokens[pos];
+ pos = toklen[pos];
+
+ if ((pos == 4 || pos == 5) && IsAsciiDigit(*p) &&
+ IsAsciiDigit(p[pos - 1]) && IsAsciiDigit(p[pos - 2]) &&
+ ((pos == 5 && p[2] == ':') ||
+ (pos == 4 && (IsAsciiDigit(p[1]) || p[1] == ':')))) {
+ month_num = tokmarker + 1; /* assumed position of month field */
+ pos = tokmarker + 2; /* assumed position of mday field */
+ if (IsAsciiDigit(*tokens[month_num])) /* positions are reversed */
+ {
+ month_num++;
+ pos--;
+ }
+ p = tokens[month_num];
+ if (IsAsciiDigit(*tokens[pos]) &&
+ (toklen[pos] == 1 ||
+ (toklen[pos] == 2 && IsAsciiDigit(tokens[pos][1]))) &&
+ toklen[month_num] == 3 && IsAsciiAlpha(*p) &&
+ IsAsciiAlpha(p[1]) && IsAsciiAlpha(p[2])) {
+ pos = atoi(tokens[pos]);
+ if (pos > 0 && pos <= 31) {
+ result->fe_time.tm_mday = pos;
+ month_num = 1;
+ for (pos = 0; pos < (12 * 3); pos += 3) {
+ if (p[0] == month_names[pos + 0] &&
+ p[1] == month_names[pos + 1] &&
+ p[2] == month_names[pos + 2])
+ break;
+ month_num++;
+ }
+ if (month_num > 12)
+ result->fe_time.tm_mday = 0;
+ else
+ result->fe_time.tm_month = month_num - 1;
+ }
+ }
+ if (result->fe_time.tm_mday) {
+ tokmarker += 3; /* skip mday/mon/yrtime (to find " -> ") */
+ p = tokens[tokmarker];
+
+ pos = atoi(p);
+ if (pos > 24)
+ result->fe_time.tm_year = pos;
+ else {
+ if (p[1] == ':') p--;
+ result->fe_time.tm_hour = pos;
+ result->fe_time.tm_min = atoi(p + 3);
+ if (!state->now_time) {
+ state->now_time = nowTimeFn();
+ PR_ExplodeTime((state->now_time), timeParam,
+ &(state->now_tm));
+ }
+ result->fe_time.tm_year = state->now_tm.tm_year;
+ if (((state->now_tm.tm_month << 4) + state->now_tm.tm_mday) <
+ ((result->fe_time.tm_month << 4) + result->fe_time.tm_mday))
+ result->fe_time.tm_year--;
+ } /* got year or time */
+ } /* got month/mday */
+ } /* may have year or time */
+ } /* enough remaining to possibly have date/time */
+
+ if (numtoks > (tokmarker + 2)) {
+ pos = tokmarker + 1;
+ p = tokens[pos];
+ if (toklen[pos] == 2 && *p == '-' && p[1] == '>') {
+ p = &(tokens[numtoks - 1][toklen[numtoks - 1]]);
+ result->fe_type = 'l';
+ result->fe_lname = tokens[pos + 1];
+ result->fe_lnlen = p - result->fe_lname;
+ if (result->fe_lnlen > 1 &&
+ result->fe_lname[result->fe_lnlen - 1] == '/')
+ result->fe_lnlen--;
+ }
+ } /* if (numtoks > (tokmarker+2)) */
+
+ /* the caller should do this (if dropping "." and ".." is desired)
+ if (result->fe_type == 'd' && result->fe_fname[0] == '.' &&
+ (result->fe_fnlen == 1 || (result->fe_fnlen == 2 &&
+ result->fe_fname[1] == '.')))
+ return '?';
+ */
+
+ return result->fe_type;
+
+ } /* if (lstyle == 'D') */
+ } /* if (!lstyle && (!state->lstyle || state->lstyle == 'D')) */
+#endif
+
+ /* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */
+
+ } /* if (linelen > 0) */
+
+ return ParsingFailed(state);
+}
diff --git a/netwerk/streamconv/converters/ParseFTPList.h b/netwerk/streamconv/converters/ParseFTPList.h
new file mode 100644
index 0000000000..610db04d31
--- /dev/null
+++ b/netwerk/streamconv/converters/ParseFTPList.h
@@ -0,0 +1,102 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef ParseRTPList_h___
+#define ParseRTPList_h___
+
+#include <stdint.h>
+#include <string.h>
+#include "prtime.h"
+
+/* ParseFTPList() parses lines from an FTP LIST command.
+**
+** Written July 2002 by Cyrus Patel <cyp@fb14.uni-mainz.de>
+** with acknowledgements to squid, lynx, wget and ftpmirror.
+**
+** Arguments:
+** 'line': line of FTP data connection output. The line is assumed
+** to end at the first '\0' or '\n' or '\r\n'.
+** 'state': a structure used internally to track state between
+** lines. Needs to be bzero()'d at LIST begin.
+** 'result': where ParseFTPList will store the results of the parse
+** if 'line' is not a comment and is not junk.
+**
+** Returns one of the following:
+** 'd' - LIST line is a directory entry ('result' is valid)
+** 'f' - LIST line is a file's entry ('result' is valid)
+** 'l' - LIST line is a symlink's entry ('result' is valid)
+** '?' - LIST line is junk. (cwd, non-file/dir/link, etc)
+** '"' - its not a LIST line (its a "comment")
+**
+** It may be advisable to let the end-user see "comments" (particularly when
+** the listing results in ONLY such lines) because such a listing may be:
+** - an unknown LIST format (NLST or "custom" format for example)
+** - an error msg (EPERM,ENOENT,ENFILE,EMFILE,ENOTDIR,ENOTBLK,EEXDEV etc).
+** - an empty directory and the 'comment' is a "total 0" line or similar.
+** (warning: a "total 0" can also mean the total size is unknown).
+**
+** ParseFTPList() supports all known FTP LISTing formats:
+** - '/bin/ls -l' and all variants (including Hellsoft FTP for NetWare);
+** - EPLF (Easily Parsable List Format);
+** - Windows NT's default "DOS-dirstyle";
+** - OS/2 basic server format LIST format;
+** - VMS (MultiNet, UCX, and CMU) LIST format (including multi-line format);
+** - IBM VM/CMS, VM/ESA LIST format (two known variants);
+** - SuperTCP FTP Server for Win16 LIST format;
+** - NetManage Chameleon (NEWT) for Win16 LIST format;
+** - '/bin/dls' (two known variants, plus multi-line) LIST format;
+** If there are others, then I'd like to hear about them (send me a sample).
+**
+** NLSTings are not supported explicitely because they cannot be machine
+** parsed consistently: NLSTings do not have unique characteristics - even
+** the assumption that there won't be whitespace on the line does not hold
+** because some nlistings have more than one filename per line and/or
+** may have filenames that have spaces in them. Moreover, distinguishing
+** between an error message and an NLST line would require ParseList() to
+** recognize all the possible strerror() messages in the world.
+*/
+
+/* #undef anything you don't want to support */
+#define SUPPORT_LSL /* /bin/ls -l and dozens of variations therof */
+#define SUPPORT_DLS /* /bin/dls format (very, Very, VERY rare) */
+#define SUPPORT_EPLF /* Extraordinarily Pathetic List Format */
+#define SUPPORT_DOS /* WinNT server in 'site dirstyle' dos */
+#define SUPPORT_VMS /* VMS (all: MultiNet, UCX, CMU-IP) */
+#define SUPPORT_CMS /* IBM VM/CMS,VM/ESA (z/VM and LISTING forms) */
+#define SUPPORT_OS2 /* IBM TCP/IP for OS/2 - FTP Server */
+#define SUPPORT_W16 /* win16 hosts: SuperTCP or NetManage Chameleon */
+
+struct list_state {
+ list_state() { memset(this, 0, sizeof(*this)); }
+
+ PRTime now_time; /* needed for year determination */
+ PRExplodedTime now_tm; /* needed for year determination */
+ int32_t lstyle; /* LISTing style */
+ int32_t parsed_one; /* returned anything yet? */
+ char carry_buf[84]; /* for VMS multiline */
+ uint32_t carry_buf_len; /* length of name in carry_buf */
+ uint32_t numlines; /* number of lines seen */
+};
+
+struct list_result {
+ int32_t fe_type; /* 'd'(dir) or 'l'(link) or 'f'(file) */
+ const char* fe_fname; /* pointer to filename */
+ uint32_t fe_fnlen; /* length of filename */
+ const char* fe_lname; /* pointer to symlink name */
+ uint32_t fe_lnlen; /* length of symlink name */
+ char fe_size[40]; /* size of file in bytes (<= (2^128 - 1)) */
+ PRExplodedTime fe_time; /* last-modified time */
+ int32_t fe_cinfs; /* file system is definitely case insensitive */
+ /* (converting all-upcase names may be desirable) */
+};
+
+typedef PRTime (*NowTimeFn)();
+
+int ParseFTPList(const char* line, struct list_state* state,
+ struct list_result* result,
+ PRTimeParamFn timeParam = PR_LocalTimeParameters,
+ NowTimeFn nowTimeFn = PR_Now);
+
+#endif /* !ParseRTPList_h___ */
diff --git a/netwerk/streamconv/converters/moz.build b/netwerk/streamconv/converters/moz.build
new file mode 100644
index 0000000000..46b1d4a4dc
--- /dev/null
+++ b/netwerk/streamconv/converters/moz.build
@@ -0,0 +1,32 @@
+# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*-
+# vim: set filetype=python:
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+XPIDL_SOURCES += ["nsICompressConvStats.idl"]
+
+EXPORTS += [
+ "nsUnknownDecoder.h",
+]
+
+XPIDL_MODULE = "necko_http"
+
+UNIFIED_SOURCES += [
+ "mozTXTToHTMLConv.cpp",
+ "nsDirIndex.cpp",
+ "nsDirIndexParser.cpp",
+ "nsFTPDirListingConv.cpp",
+ "nsHTTPCompressConv.cpp",
+ "nsIndexedToHTML.cpp",
+ "nsMultiMixedConv.cpp",
+ "nsUnknownDecoder.cpp",
+ "ParseFTPList.cpp",
+]
+
+FINAL_LIBRARY = "xul"
+
+LOCAL_INCLUDES += [
+ "/modules/brotli/dec",
+ "/netwerk/base",
+]
diff --git a/netwerk/streamconv/converters/mozTXTToHTMLConv.cpp b/netwerk/streamconv/converters/mozTXTToHTMLConv.cpp
new file mode 100644
index 0000000000..1ab51adb82
--- /dev/null
+++ b/netwerk/streamconv/converters/mozTXTToHTMLConv.cpp
@@ -0,0 +1,1260 @@
+/* -*- Mode: C; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "mozilla/TextUtils.h"
+#include "mozTXTToHTMLConv.h"
+#include "nsNetUtil.h"
+#include "nsUnicharUtils.h"
+#include "nsUnicodeProperties.h"
+#include "nsCRT.h"
+#include "nsIExternalProtocolHandler.h"
+#include "nsIURI.h"
+
+#include <algorithm>
+
+#ifdef DEBUG_BenB_Perf
+# include "prtime.h"
+# include "prinrval.h"
+#endif
+
+using mozilla::IsAscii;
+using mozilla::IsAsciiAlpha;
+using mozilla::IsAsciiDigit;
+
+const double growthRate = 1.2;
+
+// Bug 183111, editor now replaces multiple spaces with leading
+// 0xA0's and a single ending space, so need to treat 0xA0's as spaces.
+// 0xA0 is the Latin1/Unicode character for "non-breaking space (nbsp)"
+// Also recognize the Japanese ideographic space 0x3000 as a space.
+static inline bool IsSpace(const char16_t aChar) {
+ return (nsCRT::IsAsciiSpace(aChar) || aChar == 0xA0 || aChar == 0x3000);
+}
+
+// Escape Char will take ch, escape it and append the result to
+// aStringToAppendTo
+void mozTXTToHTMLConv::EscapeChar(const char16_t ch,
+ nsAString& aStringToAppendTo,
+ bool inAttribute) {
+ switch (ch) {
+ case '<':
+ aStringToAppendTo.AppendLiteral("&lt;");
+ break;
+ case '>':
+ aStringToAppendTo.AppendLiteral("&gt;");
+ break;
+ case '&':
+ aStringToAppendTo.AppendLiteral("&amp;");
+ break;
+ case '"':
+ if (inAttribute) {
+ aStringToAppendTo.AppendLiteral("&quot;");
+ break;
+ }
+ // else fall through
+ [[fallthrough]];
+ default:
+ aStringToAppendTo += ch;
+ }
+}
+
+// EscapeStr takes the passed in string and
+// escapes it IN PLACE.
+void mozTXTToHTMLConv::EscapeStr(nsString& aInString, bool inAttribute) {
+ // the replace substring routines
+ // don't seem to work if you have a character
+ // in the in string that is also in the replacement
+ // string! =(
+ // aInString.ReplaceSubstring("&", "&amp;");
+ // aInString.ReplaceSubstring("<", "&lt;");
+ // aInString.ReplaceSubstring(">", "&gt;");
+ for (uint32_t i = 0; i < aInString.Length();) {
+ switch (aInString[i]) {
+ case '<':
+ aInString.Cut(i, 1);
+ aInString.InsertLiteral(u"&lt;", i);
+ i += 4; // skip past the integers we just added
+ break;
+ case '>':
+ aInString.Cut(i, 1);
+ aInString.InsertLiteral(u"&gt;", i);
+ i += 4; // skip past the integers we just added
+ break;
+ case '&':
+ aInString.Cut(i, 1);
+ aInString.InsertLiteral(u"&amp;", i);
+ i += 5; // skip past the integers we just added
+ break;
+ case '"':
+ if (inAttribute) {
+ aInString.Cut(i, 1);
+ aInString.InsertLiteral(u"&quot;", i);
+ i += 6;
+ break;
+ }
+ // else fall through
+ [[fallthrough]];
+ default:
+ i++;
+ }
+ }
+}
+
+void mozTXTToHTMLConv::UnescapeStr(const char16_t* aInString, int32_t aStartPos,
+ int32_t aLength, nsString& aOutString) {
+ const char16_t* subString = nullptr;
+ for (uint32_t i = aStartPos; int32_t(i) - aStartPos < aLength;) {
+ int32_t remainingChars = i - aStartPos;
+ if (aInString[i] == '&') {
+ subString = &aInString[i];
+ if (!NS_strncmp(subString, u"&lt;",
+ std::min(4, aLength - remainingChars))) {
+ aOutString.Append(char16_t('<'));
+ i += 4;
+ } else if (!NS_strncmp(subString, u"&gt;",
+ std::min(4, aLength - remainingChars))) {
+ aOutString.Append(char16_t('>'));
+ i += 4;
+ } else if (!NS_strncmp(subString, u"&amp;",
+ std::min(5, aLength - remainingChars))) {
+ aOutString.Append(char16_t('&'));
+ i += 5;
+ } else if (!NS_strncmp(subString, u"&quot;",
+ std::min(6, aLength - remainingChars))) {
+ aOutString.Append(char16_t('"'));
+ i += 6;
+ } else {
+ aOutString += aInString[i];
+ i++;
+ }
+ } else {
+ aOutString += aInString[i];
+ i++;
+ }
+ }
+}
+
+void mozTXTToHTMLConv::CompleteAbbreviatedURL(const char16_t* aInString,
+ int32_t aInLength,
+ const uint32_t pos,
+ nsString& aOutString) {
+ NS_ASSERTION(int32_t(pos) < aInLength,
+ "bad args to CompleteAbbreviatedURL, see bug #190851");
+ if (int32_t(pos) >= aInLength) return;
+
+ if (aInString[pos] == '@') {
+ // only pre-pend a mailto url if the string contains a .domain in it..
+ // i.e. we want to linkify johndoe@foo.com but not "let's meet @8pm"
+ nsDependentString inString(aInString, aInLength);
+ if (inString.FindChar('.', pos) !=
+ kNotFound) // if we have a '.' after the @ sign....
+ {
+ aOutString.AssignLiteral("mailto:");
+ aOutString += aInString;
+ }
+ } else if (aInString[pos] == '.') {
+ if (ItMatchesDelimited(aInString, aInLength, u"www.", 4, LT_IGNORE,
+ LT_IGNORE)) {
+ aOutString.AssignLiteral("http://");
+ aOutString += aInString;
+ } else if (ItMatchesDelimited(aInString, aInLength, u"ftp.", 4, LT_IGNORE,
+ LT_IGNORE)) {
+ aOutString.AssignLiteral("ftp://");
+ aOutString += aInString;
+ }
+ }
+}
+
+bool mozTXTToHTMLConv::FindURLStart(const char16_t* aInString,
+ int32_t aInLength, const uint32_t pos,
+ const modetype check, uint32_t& start) {
+ switch (check) { // no breaks, because end of blocks is never reached
+ case RFC1738: {
+ if (!NS_strncmp(&aInString[std::max(int32_t(pos - 4), 0)], u"<URL:", 5)) {
+ start = pos + 1;
+ return true;
+ }
+ return false;
+ }
+ case RFC2396E: {
+ nsString temp(aInString, aInLength);
+ int32_t i = pos <= 0 ? kNotFound : temp.RFindCharInSet(u"<>\"", pos - 1);
+ if (i != kNotFound &&
+ (temp[uint32_t(i)] == '<' || temp[uint32_t(i)] == '"')) {
+ start = uint32_t(++i);
+ return start < pos;
+ }
+ return false;
+ }
+ case freetext: {
+ int32_t i = pos - 1;
+ for (; i >= 0 &&
+ (IsAsciiAlpha(aInString[uint32_t(i)]) ||
+ IsAsciiDigit(aInString[uint32_t(i)]) ||
+ aInString[uint32_t(i)] == '+' || aInString[uint32_t(i)] == '-' ||
+ aInString[uint32_t(i)] == '.');
+ i--)
+ ;
+ if (++i >= 0 && uint32_t(i) < pos &&
+ IsAsciiAlpha(aInString[uint32_t(i)])) {
+ start = uint32_t(i);
+ return true;
+ }
+ return false;
+ }
+ case abbreviated: {
+ int32_t i = pos - 1;
+ // This disallows non-ascii-characters for email.
+ // Currently correct, but revisit later after standards changed.
+ bool isEmail = aInString[pos] == (char16_t)'@';
+ // These chars mark the start of the URL
+ for (; i >= 0 && aInString[uint32_t(i)] != '>' &&
+ aInString[uint32_t(i)] != '<' && aInString[uint32_t(i)] != '"' &&
+ aInString[uint32_t(i)] != '\'' && aInString[uint32_t(i)] != '`' &&
+ aInString[uint32_t(i)] != ',' && aInString[uint32_t(i)] != '{' &&
+ aInString[uint32_t(i)] != '[' && aInString[uint32_t(i)] != '(' &&
+ aInString[uint32_t(i)] != '|' && aInString[uint32_t(i)] != '\\' &&
+ !IsSpace(aInString[uint32_t(i)]) &&
+ (!isEmail || IsAscii(aInString[uint32_t(i)])) &&
+ (!isEmail || aInString[uint32_t(i)] != ')');
+ i--)
+ ;
+ if (++i >= 0 && uint32_t(i) < pos &&
+ (IsAsciiAlpha(aInString[uint32_t(i)]) ||
+ IsAsciiDigit(aInString[uint32_t(i)]))) {
+ start = uint32_t(i);
+ return true;
+ }
+ return false;
+ }
+ default:
+ return false;
+ } // switch
+}
+
+bool mozTXTToHTMLConv::FindURLEnd(const char16_t* aInString,
+ int32_t aInStringLength, const uint32_t pos,
+ const modetype check, const uint32_t start,
+ uint32_t& end) {
+ switch (check) { // no breaks, because end of blocks is never reached
+ case RFC1738:
+ case RFC2396E: {
+ nsString temp(aInString, aInStringLength);
+
+ int32_t i = temp.FindCharInSet(u"<>\"", pos + 1);
+ if (i != kNotFound &&
+ temp[uint32_t(i--)] ==
+ (check == RFC1738 || temp[start - 1] == '<' ? '>' : '"')) {
+ end = uint32_t(i);
+ return end > pos;
+ }
+ return false;
+ }
+ case freetext:
+ case abbreviated: {
+ uint32_t i = pos + 1;
+ bool isEmail = aInString[pos] == (char16_t)'@';
+ bool seenOpeningParenthesis = false; // there is a '(' earlier in the URL
+ bool seenOpeningSquareBracket =
+ false; // there is a '[' earlier in the URL
+ for (; int32_t(i) < aInStringLength; i++) {
+ // These chars mark the end of the URL
+ if (aInString[i] == '>' || aInString[i] == '<' || aInString[i] == '"' ||
+ aInString[i] == '`' || aInString[i] == '}' || aInString[i] == '{' ||
+ (aInString[i] == ')' && !seenOpeningParenthesis) ||
+ (aInString[i] == ']' && !seenOpeningSquareBracket) ||
+ // Allow IPv6 adresses like http://[1080::8:800:200C:417A]/foo.
+ (aInString[i] == '[' && i > 2 &&
+ (aInString[i - 1] != '/' || aInString[i - 2] != '/')) ||
+ IsSpace(aInString[i]))
+ break;
+ // Disallow non-ascii-characters for email.
+ // Currently correct, but revisit later after standards changed.
+ if (isEmail && (aInString[i] == '(' || aInString[i] == '\'' ||
+ !IsAscii(aInString[i])))
+ break;
+ if (aInString[i] == '(') seenOpeningParenthesis = true;
+ if (aInString[i] == '[') seenOpeningSquareBracket = true;
+ }
+ // These chars are allowed in the middle of the URL, but not at end.
+ // Technically they are, but are used in normal text after the URL.
+ while (--i > pos && (aInString[i] == '.' || aInString[i] == ',' ||
+ aInString[i] == ';' || aInString[i] == '!' ||
+ aInString[i] == '?' || aInString[i] == '-' ||
+ aInString[i] == ':' || aInString[i] == '\''))
+ ;
+ if (i > pos) {
+ end = i;
+ return true;
+ }
+ return false;
+ }
+ default:
+ return false;
+ } // switch
+}
+
+void mozTXTToHTMLConv::CalculateURLBoundaries(
+ const char16_t* aInString, int32_t aInStringLength, const uint32_t pos,
+ const uint32_t whathasbeendone, const modetype check, const uint32_t start,
+ const uint32_t end, nsString& txtURL, nsString& desc,
+ int32_t& replaceBefore, int32_t& replaceAfter) {
+ uint32_t descstart = start;
+ switch (check) {
+ case RFC1738: {
+ descstart = start - 5;
+ desc.Append(&aInString[descstart],
+ end - descstart + 2); // include "<URL:" and ">"
+ replaceAfter = end - pos + 1;
+ } break;
+ case RFC2396E: {
+ descstart = start - 1;
+ desc.Append(&aInString[descstart],
+ end - descstart + 2); // include brackets
+ replaceAfter = end - pos + 1;
+ } break;
+ case freetext:
+ case abbreviated: {
+ descstart = start;
+ desc.Append(&aInString[descstart],
+ end - start + 1); // don't include brackets
+ replaceAfter = end - pos;
+ } break;
+ default:
+ break;
+ } // switch
+
+ EscapeStr(desc, false);
+
+ txtURL.Append(&aInString[start], end - start + 1);
+ txtURL.StripWhitespace();
+
+ // FIX ME
+ nsAutoString temp2;
+ ScanTXT(nsDependentSubstring(&aInString[descstart], pos - descstart),
+ ~kURLs /*prevents loop*/ & whathasbeendone, temp2);
+ replaceBefore = temp2.Length();
+}
+
+bool mozTXTToHTMLConv::ShouldLinkify(const nsCString& aURL) {
+ if (!mIOService) return false;
+
+ nsAutoCString scheme;
+ nsresult rv = mIOService->ExtractScheme(aURL, scheme);
+ if (NS_FAILED(rv)) return false;
+
+ if (scheme == "http" || scheme == "https" || scheme == "mailto") {
+ return true;
+ }
+
+ // Get the handler for this scheme.
+ nsCOMPtr<nsIProtocolHandler> handler;
+ rv = mIOService->GetProtocolHandler(scheme.get(), getter_AddRefs(handler));
+ if (NS_FAILED(rv)) return false;
+
+ // Is it an external protocol handler? If not, linkify it.
+ nsCOMPtr<nsIExternalProtocolHandler> externalHandler =
+ do_QueryInterface(handler);
+ if (!externalHandler) return true; // handler is built-in, linkify it!
+
+ // If external app exists for the scheme then linkify it.
+ bool exists;
+ rv = externalHandler->ExternalAppExistsForScheme(scheme, &exists);
+ return (NS_SUCCEEDED(rv) && exists);
+}
+
+bool mozTXTToHTMLConv::CheckURLAndCreateHTML(const nsString& txtURL,
+ const nsString& desc,
+ const modetype mode,
+ nsString& outputHTML) {
+ // Create *uri from txtURL
+ nsCOMPtr<nsIURI> uri;
+ nsresult rv;
+ // Lazily initialize mIOService
+ if (!mIOService) {
+ mIOService = do_GetIOService();
+
+ if (!mIOService) return false;
+ }
+
+ // See if the url should be linkified.
+ NS_ConvertUTF16toUTF8 utf8URL(txtURL);
+ if (!ShouldLinkify(utf8URL)) return false;
+
+ // it would be faster if we could just check to see if there is a protocol
+ // handler for the url and return instead of actually trying to create a
+ // url...
+ rv = mIOService->NewURI(utf8URL, nullptr, nullptr, getter_AddRefs(uri));
+
+ // Real work
+ if (NS_SUCCEEDED(rv) && uri) {
+ outputHTML.AssignLiteral("<a class=\"moz-txt-link-");
+ switch (mode) {
+ case RFC1738:
+ outputHTML.AppendLiteral("rfc1738");
+ break;
+ case RFC2396E:
+ outputHTML.AppendLiteral("rfc2396E");
+ break;
+ case freetext:
+ outputHTML.AppendLiteral("freetext");
+ break;
+ case abbreviated:
+ outputHTML.AppendLiteral("abbreviated");
+ break;
+ default:
+ break;
+ }
+ nsAutoString escapedURL(txtURL);
+ EscapeStr(escapedURL, true);
+
+ outputHTML.AppendLiteral("\" href=\"");
+ outputHTML += escapedURL;
+ outputHTML.AppendLiteral("\">");
+ outputHTML += desc;
+ outputHTML.AppendLiteral("</a>");
+ return true;
+ }
+ return false;
+}
+
+NS_IMETHODIMP mozTXTToHTMLConv::FindURLInPlaintext(const char16_t* aInString,
+ int32_t aInLength,
+ int32_t aPos,
+ int32_t* aStartPos,
+ int32_t* aEndPos) {
+ // call FindURL on the passed in string
+ nsAutoString outputHTML; // we'll ignore the generated output HTML
+
+ *aStartPos = -1;
+ *aEndPos = -1;
+
+ FindURL(aInString, aInLength, aPos, kURLs, outputHTML, *aStartPos, *aEndPos);
+
+ return NS_OK;
+}
+
+bool mozTXTToHTMLConv::FindURL(const char16_t* aInString, int32_t aInLength,
+ const uint32_t pos,
+ const uint32_t whathasbeendone,
+ nsString& outputHTML, int32_t& replaceBefore,
+ int32_t& replaceAfter) {
+ enum statetype { unchecked, invalid, startok, endok, success };
+ static const modetype ranking[] = {RFC1738, RFC2396E, freetext, abbreviated};
+
+ statetype state[mozTXTToHTMLConv_lastMode + 1]; // 0(=unknown)..lastMode
+ /* I don't like this abuse of enums as index for the array,
+ but I don't know a better method */
+
+ // Define, which modes to check
+ /* all modes but abbreviated are checked for text[pos] == ':',
+ only abbreviated for '.', RFC2396E and abbreviated for '@' */
+ for (modetype iState = unknown; iState <= mozTXTToHTMLConv_lastMode;
+ iState = modetype(iState + 1))
+ state[iState] = aInString[pos] == ':' ? unchecked : invalid;
+ switch (aInString[pos]) {
+ case '@':
+ state[RFC2396E] = unchecked;
+ [[fallthrough]];
+ case '.':
+ state[abbreviated] = unchecked;
+ break;
+ case ':':
+ state[abbreviated] = invalid;
+ break;
+ default:
+ break;
+ }
+
+ // Test, first successful mode wins, sequence defined by |ranking|
+ int32_t iCheck = 0; // the currently tested modetype
+ modetype check = ranking[iCheck];
+ for (; iCheck < mozTXTToHTMLConv_numberOfModes && state[check] != success;
+ iCheck++)
+ /* check state from last run.
+ If this is the first, check this one, which isn't = success yet */
+ {
+ check = ranking[iCheck];
+
+ uint32_t start, end;
+
+ if (state[check] == unchecked)
+ if (FindURLStart(aInString, aInLength, pos, check, start))
+ state[check] = startok;
+
+ if (state[check] == startok)
+ if (FindURLEnd(aInString, aInLength, pos, check, start, end))
+ state[check] = endok;
+
+ if (state[check] == endok) {
+ nsAutoString txtURL, desc;
+ int32_t resultReplaceBefore, resultReplaceAfter;
+
+ CalculateURLBoundaries(aInString, aInLength, pos, whathasbeendone, check,
+ start, end, txtURL, desc, resultReplaceBefore,
+ resultReplaceAfter);
+
+ if (aInString[pos] != ':') {
+ nsAutoString temp = txtURL;
+ txtURL.SetLength(0);
+ CompleteAbbreviatedURL(temp.get(), temp.Length(), pos - start, txtURL);
+ }
+
+ if (!txtURL.IsEmpty() &&
+ CheckURLAndCreateHTML(txtURL, desc, check, outputHTML)) {
+ replaceBefore = resultReplaceBefore;
+ replaceAfter = resultReplaceAfter;
+ state[check] = success;
+ }
+ } // if
+ } // for
+ return state[check] == success;
+}
+
+static inline bool IsAlpha(const uint32_t aChar) {
+ return mozilla::unicode::GetGenCategory(aChar) == nsUGenCategory::kLetter;
+}
+
+static inline bool IsDigit(const uint32_t aChar) {
+ return mozilla::unicode::GetGenCategory(aChar) == nsUGenCategory::kNumber;
+}
+
+bool mozTXTToHTMLConv::ItMatchesDelimited(const char16_t* aInString,
+ int32_t aInLength,
+ const char16_t* rep, int32_t aRepLen,
+ LIMTYPE before, LIMTYPE after) {
+ // this little method gets called a LOT. I found we were spending a
+ // lot of time just calculating the length of the variable "rep"
+ // over and over again every time we called it. So we're now passing
+ // an integer in here.
+ int32_t textLen = aInLength;
+
+ if (((before == LT_IGNORE && (after == LT_IGNORE || after == LT_DELIMITER)) &&
+ textLen < aRepLen) ||
+ ((before != LT_IGNORE || (after != LT_IGNORE && after != LT_DELIMITER)) &&
+ textLen < aRepLen + 1) ||
+ (before != LT_IGNORE && after != LT_IGNORE && after != LT_DELIMITER &&
+ textLen < aRepLen + 2))
+ return false;
+
+ uint32_t text0 = aInString[0];
+ if (aInLength > 1 && NS_IS_SURROGATE_PAIR(text0, aInString[1])) {
+ text0 = SURROGATE_TO_UCS4(text0, aInString[1]);
+ }
+ // find length of the char/cluster to be ignored
+ int32_t ignoreLen = before == LT_IGNORE ? 0 : 1;
+ if (ignoreLen) {
+ mozilla::unicode::ClusterIterator ci(aInString, aInLength);
+ ci.Next();
+ ignoreLen = ci - aInString;
+ }
+
+ int32_t afterIndex = aRepLen + ignoreLen;
+ uint32_t textAfterPos = aInString[afterIndex];
+ if (aInLength > afterIndex + 1 &&
+ NS_IS_SURROGATE_PAIR(textAfterPos, aInString[afterIndex + 1])) {
+ textAfterPos = SURROGATE_TO_UCS4(textAfterPos, aInString[afterIndex + 1]);
+ }
+
+ if ((before == LT_ALPHA && !IsAlpha(text0)) ||
+ (before == LT_DIGIT && !IsDigit(text0)) ||
+ (before == LT_DELIMITER &&
+ (IsAlpha(text0) || IsDigit(text0) || text0 == *rep)) ||
+ (after == LT_ALPHA && !IsAlpha(textAfterPos)) ||
+ (after == LT_DIGIT && !IsDigit(textAfterPos)) ||
+ (after == LT_DELIMITER &&
+ (IsAlpha(textAfterPos) || IsDigit(textAfterPos) ||
+ textAfterPos == *rep)) ||
+ !Substring(Substring(aInString, aInString + aInLength), ignoreLen,
+ aRepLen)
+ .Equals(Substring(rep, rep + aRepLen),
+ nsCaseInsensitiveStringComparator))
+ return false;
+
+ return true;
+}
+
+uint32_t mozTXTToHTMLConv::NumberOfMatches(const char16_t* aInString,
+ int32_t aInStringLength,
+ const char16_t* rep, int32_t aRepLen,
+ LIMTYPE before, LIMTYPE after) {
+ uint32_t result = 0;
+
+ const char16_t* end = aInString + aInStringLength;
+ for (mozilla::unicode::ClusterIterator ci(aInString, aInStringLength);
+ !ci.AtEnd(); ci.Next()) {
+ if (ItMatchesDelimited(ci, end - ci, rep, aRepLen, before, after)) {
+ result++;
+ }
+ }
+ return result;
+}
+
+// NOTE: the converted html for the phrase is appended to aOutString
+// tagHTML and attributeHTML are plain ASCII (literal strings, in fact)
+bool mozTXTToHTMLConv::StructPhraseHit(
+ const char16_t* aInString, int32_t aInStringLength, bool col0,
+ const char16_t* tagTXT, int32_t aTagTXTLen, const char* tagHTML,
+ const char* attributeHTML, nsAString& aOutString, uint32_t& openTags) {
+ /* We're searching for the following pattern:
+ LT_DELIMITER - "*" - ALPHA -
+ [ some text (maybe more "*"-pairs) - ALPHA ] "*" - LT_DELIMITER.
+ <strong> is only inserted, if existence of a pair could be verified
+ We use the first opening/closing tag, if we can choose */
+
+ const char16_t* newOffset = aInString;
+ int32_t newLength = aInStringLength;
+ if (!col0) // skip the first element?
+ {
+ newOffset = &aInString[1];
+ newLength = aInStringLength - 1;
+ }
+
+ // opening tag
+ if (ItMatchesDelimited(aInString, aInStringLength, tagTXT, aTagTXTLen,
+ (col0 ? LT_IGNORE : LT_DELIMITER),
+ LT_ALPHA) // is opening tag
+ && NumberOfMatches(newOffset, newLength, tagTXT, aTagTXTLen, LT_ALPHA,
+ LT_DELIMITER) // remaining closing tags
+ > openTags) {
+ openTags++;
+ aOutString.Append('<');
+ aOutString.AppendASCII(tagHTML);
+ aOutString.Append(char16_t(' '));
+ aOutString.AppendASCII(attributeHTML);
+ aOutString.AppendLiteral("><span class=\"moz-txt-tag\">");
+ aOutString.Append(tagTXT);
+ aOutString.AppendLiteral("</span>");
+ return true;
+ }
+
+ // closing tag
+ else if (openTags > 0 &&
+ ItMatchesDelimited(aInString, aInStringLength, tagTXT, aTagTXTLen,
+ LT_ALPHA, LT_DELIMITER)) {
+ openTags--;
+ aOutString.AppendLiteral("<span class=\"moz-txt-tag\">");
+ aOutString.Append(tagTXT);
+ aOutString.AppendLiteral("</span></");
+ aOutString.AppendASCII(tagHTML);
+ aOutString.Append(char16_t('>'));
+ return true;
+ }
+
+ return false;
+}
+
+bool mozTXTToHTMLConv::SmilyHit(const char16_t* aInString, int32_t aLength,
+ bool col0, const char* tagTXT,
+ const char* imageName, nsString& outputHTML,
+ int32_t& glyphTextLen) {
+ if (!aInString || !tagTXT || !imageName) return false;
+
+ int32_t tagLen = strlen(tagTXT);
+
+ uint32_t delim = (col0 ? 0 : 1) + tagLen;
+
+ if ((col0 || IsSpace(aInString[0])) &&
+ (aLength <= int32_t(delim) || IsSpace(aInString[delim]) ||
+ (aLength > int32_t(delim + 1) &&
+ (aInString[delim] == '.' || aInString[delim] == ',' ||
+ aInString[delim] == ';' || aInString[delim] == '8' ||
+ aInString[delim] == '>' || aInString[delim] == '!' ||
+ aInString[delim] == '?') &&
+ IsSpace(aInString[delim + 1]))) &&
+ ItMatchesDelimited(aInString, aLength,
+ NS_ConvertASCIItoUTF16(tagTXT).get(), tagLen,
+ col0 ? LT_IGNORE : LT_DELIMITER, LT_IGNORE)
+ // Note: tests at different pos for LT_IGNORE and LT_DELIMITER
+ ) {
+ if (!col0) {
+ outputHTML.Truncate();
+ outputHTML.Append(char16_t(' '));
+ }
+
+ outputHTML.AppendLiteral("<span class=\""); // <span class="
+ outputHTML.AppendASCII(imageName); // e.g. smiley-frown
+ outputHTML.AppendLiteral("\" title=\""); // " title="
+ outputHTML.AppendASCII(tagTXT); // smiley tooltip
+ outputHTML.AppendLiteral("\"><span>"); // "><span>
+ outputHTML.AppendASCII(tagTXT); // original text
+ outputHTML.AppendLiteral("</span></span>"); // </span></span>
+ glyphTextLen = (col0 ? 0 : 1) + tagLen;
+ return true;
+ }
+
+ return false;
+}
+
+// the glyph is appended to aOutputString instead of the original string...
+bool mozTXTToHTMLConv::GlyphHit(const char16_t* aInString, int32_t aInLength,
+ bool col0, nsAString& aOutputString,
+ int32_t& glyphTextLen) {
+ char16_t text0 = aInString[0];
+ char16_t text1 = aInString[1];
+ char16_t firstChar = (col0 ? text0 : text1);
+
+ // temporary variable used to store the glyph html text
+ nsAutoString outputHTML;
+ bool bTestSmilie;
+ bool bArg = false;
+ int i;
+
+ // refactor some of this mess to avoid code duplication and speed execution a
+ // bit there are two cases that need to be tried one after another. To avoid a
+ // lot of duplicate code, rolling into a loop
+
+ i = 0;
+ while (i < 2) {
+ bTestSmilie = false;
+ if (!i && (firstChar == ':' || firstChar == ';' || firstChar == '=' ||
+ firstChar == '>' || firstChar == '8' || firstChar == 'O')) {
+ // first test passed
+
+ bTestSmilie = true;
+ bArg = col0;
+ }
+ if (i && col0 &&
+ (text1 == ':' || text1 == ';' || text1 == '=' || text1 == '>' ||
+ text1 == '8' || text1 == 'O')) {
+ // second test passed
+
+ bTestSmilie = true;
+ bArg = false;
+ }
+ if (bTestSmilie && (SmilyHit(aInString, aInLength, bArg, ":-)",
+ "moz-smiley-s1", // smile
+ outputHTML, glyphTextLen) ||
+
+ SmilyHit(aInString, aInLength, bArg, ":)",
+ "moz-smiley-s1", // smile
+ outputHTML, glyphTextLen) ||
+
+ SmilyHit(aInString, aInLength, bArg, ":-D",
+ "moz-smiley-s5", // laughing
+ outputHTML, glyphTextLen) ||
+
+ SmilyHit(aInString, aInLength, bArg, ":-(",
+ "moz-smiley-s2", // frown
+ outputHTML, glyphTextLen) ||
+
+ SmilyHit(aInString, aInLength, bArg, ":(",
+ "moz-smiley-s2", // frown
+ outputHTML, glyphTextLen) ||
+
+ SmilyHit(aInString, aInLength, bArg, ":-[",
+ "moz-smiley-s6", // embarassed
+ outputHTML, glyphTextLen) ||
+
+ SmilyHit(aInString, aInLength, bArg, ";-)",
+ "moz-smiley-s3", // wink
+ outputHTML, glyphTextLen) ||
+
+ SmilyHit(aInString, aInLength, col0, ";)",
+ "moz-smiley-s3", // wink
+ outputHTML, glyphTextLen) ||
+
+ SmilyHit(aInString, aInLength, bArg, ":-\\",
+ "moz-smiley-s7", // undecided
+ outputHTML, glyphTextLen) ||
+
+ SmilyHit(aInString, aInLength, bArg, ":-P",
+ "moz-smiley-s4", // tongue
+ outputHTML, glyphTextLen) ||
+
+ SmilyHit(aInString, aInLength, bArg, ";-P",
+ "moz-smiley-s4", // tongue
+ outputHTML, glyphTextLen) ||
+
+ SmilyHit(aInString, aInLength, bArg, "=-O",
+ "moz-smiley-s8", // surprise
+ outputHTML, glyphTextLen) ||
+
+ SmilyHit(aInString, aInLength, bArg, ":-*",
+ "moz-smiley-s9", // kiss
+ outputHTML, glyphTextLen) ||
+
+ SmilyHit(aInString, aInLength, bArg, ">:o",
+ "moz-smiley-s10", // yell
+ outputHTML, glyphTextLen) ||
+
+ SmilyHit(aInString, aInLength, bArg, ">:-o",
+ "moz-smiley-s10", // yell
+ outputHTML, glyphTextLen) ||
+
+ SmilyHit(aInString, aInLength, bArg, "8-)",
+ "moz-smiley-s11", // cool
+ outputHTML, glyphTextLen) ||
+
+ SmilyHit(aInString, aInLength, bArg, ":-$",
+ "moz-smiley-s12", // money
+ outputHTML, glyphTextLen) ||
+
+ SmilyHit(aInString, aInLength, bArg, ":-!",
+ "moz-smiley-s13", // foot
+ outputHTML, glyphTextLen) ||
+
+ SmilyHit(aInString, aInLength, bArg, "O:-)",
+ "moz-smiley-s14", // innocent
+ outputHTML, glyphTextLen) ||
+
+ SmilyHit(aInString, aInLength, bArg, ":'(",
+ "moz-smiley-s15", // cry
+ outputHTML, glyphTextLen) ||
+
+ SmilyHit(aInString, aInLength, bArg, ":-X",
+ "moz-smiley-s16", // sealed
+ outputHTML, glyphTextLen))) {
+ aOutputString.Append(outputHTML);
+ return true;
+ }
+ i++;
+ }
+ if (text0 == '\f') {
+ aOutputString.AppendLiteral("<span class='moz-txt-formfeed'></span>");
+ glyphTextLen = 1;
+ return true;
+ }
+ if (text0 == '+' || text1 == '+') {
+ if (ItMatchesDelimited(aInString, aInLength, u" +/-", 4, LT_IGNORE,
+ LT_IGNORE)) {
+ aOutputString.AppendLiteral(" &plusmn;");
+ glyphTextLen = 4;
+ return true;
+ }
+ if (col0 && ItMatchesDelimited(aInString, aInLength, u"+/-", 3, LT_IGNORE,
+ LT_IGNORE)) {
+ aOutputString.AppendLiteral("&plusmn;");
+ glyphTextLen = 3;
+ return true;
+ }
+ }
+
+ // x^2 => x<sup>2</sup>, also handle powers x^-2, x^0.5
+ // implement regular expression /[\dA-Za-z\)\]}]\^-?\d+(\.\d+)*[^\dA-Za-z]/
+ if (text1 == '^' &&
+ (IsAsciiDigit(text0) || IsAsciiAlpha(text0) || text0 == ')' ||
+ text0 == ']' || text0 == '}') &&
+ ((2 < aInLength && IsAsciiDigit(aInString[2])) ||
+ (3 < aInLength && aInString[2] == '-' && IsAsciiDigit(aInString[3])))) {
+ // Find first non-digit
+ int32_t delimPos = 3; // skip "^" and first digit (or '-')
+ for (; delimPos < aInLength &&
+ (IsAsciiDigit(aInString[delimPos]) ||
+ (aInString[delimPos] == '.' && delimPos + 1 < aInLength &&
+ IsAsciiDigit(aInString[delimPos + 1])));
+ delimPos++)
+ ;
+
+ if (delimPos < aInLength && IsAsciiAlpha(aInString[delimPos])) {
+ return false;
+ }
+
+ outputHTML.Truncate();
+ outputHTML += text0;
+ outputHTML.AppendLiteral(
+ "<sup class=\"moz-txt-sup\">"
+ "<span style=\"display:inline-block;width:0;height:0;overflow:hidden\">"
+ "^</span>");
+
+ aOutputString.Append(outputHTML);
+ aOutputString.Append(&aInString[2], delimPos - 2);
+ aOutputString.AppendLiteral("</sup>");
+
+ glyphTextLen = delimPos /* - 1 + 1 */;
+ return true;
+ }
+ /*
+ The following strings are not substituted:
+ |TXT |HTML |Reason
+ +------+---------+----------
+ -> &larr; Bug #454
+ => &lArr; dito
+ <- &rarr; dito
+ <= &rArr; dito
+ (tm) &trade; dito
+ 1/4 &frac14; is triggered by 1/4 Part 1, 2/4 Part 2, ...
+ 3/4 &frac34; dito
+ 1/2 &frac12; similar
+ */
+ return false;
+}
+
+/***************************************************************************
+ Library-internal Interface
+****************************************************************************/
+
+NS_IMPL_ISUPPORTS(mozTXTToHTMLConv, mozITXTToHTMLConv, nsIStreamConverter,
+ nsIStreamListener, nsIRequestObserver)
+
+int32_t mozTXTToHTMLConv::CiteLevelTXT(const char16_t* line,
+ uint32_t& logLineStart) {
+ int32_t result = 0;
+ int32_t lineLength = NS_strlen(line);
+
+ bool moreCites = true;
+ while (moreCites) {
+ /* E.g. the following lines count as quote:
+
+ > text
+ //#ifdef QUOTE_RECOGNITION_AGGRESSIVE
+ >text
+ //#ifdef QUOTE_RECOGNITION_AGGRESSIVE
+ > text
+ ] text
+ USER> text
+ USER] text
+ //#endif
+
+ logLineStart is the position of "t" in this example
+ */
+ uint32_t i = logLineStart;
+
+#ifdef QUOTE_RECOGNITION_AGGRESSIVE
+ for (; int32_t(i) < lineLength && IsSpace(line[i]); i++)
+ ;
+ for (; int32_t(i) < lineLength && IsAsciiAlpha(line[i]) &&
+ nsCRT::IsUpper(line[i]);
+ i++)
+ ;
+ if (int32_t(i) < lineLength && (line[i] == '>' || line[i] == ']'))
+#else
+ if (int32_t(i) < lineLength && line[i] == '>')
+#endif
+ {
+ i++;
+ if (int32_t(i) < lineLength && line[i] == ' ') i++;
+ // sendmail/mbox
+ // Placed here for performance increase
+ const char16_t* indexString = &line[logLineStart];
+ // here, |logLineStart < lineLength| is always true
+ uint32_t minlength = std::min(uint32_t(6), NS_strlen(indexString));
+ if (Substring(indexString, indexString + minlength)
+ .Equals(Substring(u">From "_ns, 0, minlength),
+ nsCaseInsensitiveStringComparator))
+ // XXX RFC2646
+ moreCites = false;
+ else {
+ result++;
+ logLineStart = i;
+ }
+ } else
+ moreCites = false;
+ }
+
+ return result;
+}
+
+NS_IMETHODIMP
+mozTXTToHTMLConv::ScanTXT(const nsAString& aInString, uint32_t whattodo,
+ nsAString& aOutString) {
+ if (aInString.Length() == 0) {
+ aOutString.Truncate();
+ return NS_OK;
+ }
+
+ if (!aOutString.SetCapacity(uint32_t(aInString.Length() * growthRate),
+ mozilla::fallible)) {
+ return NS_ERROR_OUT_OF_MEMORY;
+ }
+
+ bool doURLs = 0 != (whattodo & kURLs);
+ bool doGlyphSubstitution = 0 != (whattodo & kGlyphSubstitution);
+ bool doStructPhrase = 0 != (whattodo & kStructPhrase);
+
+ uint32_t structPhrase_strong = 0; // Number of currently open tags
+ uint32_t structPhrase_underline = 0;
+ uint32_t structPhrase_italic = 0;
+ uint32_t structPhrase_code = 0;
+
+ uint32_t endOfLastURLOutput = 0;
+
+ nsAutoString outputHTML; // moved here for performance increase
+
+ const char16_t* rawInputString = aInString.BeginReading();
+ uint32_t inLength = aInString.Length();
+
+ for (mozilla::unicode::ClusterIterator ci(rawInputString, inLength);
+ !ci.AtEnd();) {
+ uint32_t i = ci - rawInputString;
+ if (doGlyphSubstitution) {
+ int32_t glyphTextLen;
+ if (GlyphHit(&rawInputString[i], inLength - i, i == 0, aOutString,
+ glyphTextLen)) {
+ i += glyphTextLen;
+ while (ci < rawInputString + i) {
+ ci.Next();
+ }
+ continue;
+ }
+ }
+
+ if (doStructPhrase) {
+ const char16_t* newOffset = rawInputString;
+ int32_t newLength = aInString.Length();
+ if (i > 0) // skip the first element?
+ {
+ mozilla::unicode::ClusterReverseIterator ri(rawInputString, i);
+ ri.Next();
+ newOffset = ri;
+ newLength = aInString.Length() - (ri - rawInputString);
+ }
+
+ switch (aInString[i]) // Performance increase
+ {
+ case '*':
+ if (StructPhraseHit(newOffset, newLength, i == 0, u"*", 1, "b",
+ "class=\"moz-txt-star\"", aOutString,
+ structPhrase_strong)) {
+ ci.Next();
+ continue;
+ }
+ break;
+ case '/':
+ if (StructPhraseHit(newOffset, newLength, i == 0, u"/", 1, "i",
+ "class=\"moz-txt-slash\"", aOutString,
+ structPhrase_italic)) {
+ ci.Next();
+ continue;
+ }
+ break;
+ case '_':
+ if (StructPhraseHit(newOffset, newLength, i == 0, u"_", 1,
+ "span" /* <u> is deprecated */,
+ "class=\"moz-txt-underscore\"", aOutString,
+ structPhrase_underline)) {
+ ci.Next();
+ continue;
+ }
+ break;
+ case '|':
+ if (StructPhraseHit(newOffset, newLength, i == 0, u"|", 1, "code",
+ "class=\"moz-txt-verticalline\"", aOutString,
+ structPhrase_code)) {
+ ci.Next();
+ continue;
+ }
+ break;
+ }
+ }
+
+ if (doURLs) {
+ switch (aInString[i]) {
+ case ':':
+ case '@':
+ case '.':
+ if ((i == 0 || ((i > 0) && aInString[i - 1] != ' ')) &&
+ ((i == aInString.Length() - 1) ||
+ (aInString[i + 1] != ' '))) // Performance increase
+ {
+ int32_t replaceBefore;
+ int32_t replaceAfter;
+ if (FindURL(rawInputString, aInString.Length(), i, whattodo,
+ outputHTML, replaceBefore, replaceAfter) &&
+ structPhrase_strong + structPhrase_italic +
+ structPhrase_underline + structPhrase_code ==
+ 0
+ /* workaround for bug #19445 */) {
+ // Don't cut into previously inserted HTML (bug 1509493)
+ if (aOutString.Length() - replaceBefore < endOfLastURLOutput) {
+ break;
+ }
+ aOutString.Cut(aOutString.Length() - replaceBefore,
+ replaceBefore);
+ aOutString += outputHTML;
+ endOfLastURLOutput = aOutString.Length();
+ i += replaceAfter + 1;
+ while (ci < rawInputString + i) {
+ ci.Next();
+ }
+ continue;
+ }
+ }
+ break;
+ } // switch
+ }
+
+ switch (aInString[i]) {
+ // Special symbols
+ case '<':
+ case '>':
+ case '&':
+ EscapeChar(aInString[i], aOutString, false);
+ ci.Next();
+ break;
+ // Normal characters
+ default: {
+ const char16_t* start = ci;
+ ci.Next();
+ aOutString += Substring(start, (const char16_t*)ci);
+ break;
+ }
+ }
+ }
+ return NS_OK;
+}
+
+NS_IMETHODIMP
+mozTXTToHTMLConv::ScanHTML(const nsAString& input, uint32_t whattodo,
+ nsAString& aOutString) {
+ const nsPromiseFlatString& aInString = PromiseFlatString(input);
+ if (!aOutString.SetCapacity(uint32_t(aInString.Length() * growthRate),
+ mozilla::fallible)) {
+ return NS_ERROR_OUT_OF_MEMORY;
+ }
+
+ // some common variables we were recalculating
+ // every time inside the for loop...
+ int32_t lengthOfInString = aInString.Length();
+ const char16_t* uniBuffer = aInString.get();
+
+#ifdef DEBUG_BenB_Perf
+ PRTime parsing_start = PR_IntervalNow();
+#endif
+
+ // Look for simple entities not included in a tags and scan them.
+ // Skip all tags ("<[...]>") and content in an a link tag ("<a [...]</a>"),
+ // comment tag ("<!--[...]-->"), style tag, script tag or head tag.
+ // Unescape the rest (text between tags) and pass it to ScanTXT.
+ nsAutoCString canFollow(" \f\n\r\t>");
+ for (int32_t i = 0; i < lengthOfInString;) {
+ if (aInString[i] == '<') // html tag
+ {
+ int32_t start = i;
+ if (i + 2 < lengthOfInString && nsCRT::ToLower(aInString[i + 1]) == 'a' &&
+ canFollow.FindChar(aInString[i + 2]) != kNotFound)
+ // if a tag, skip until </a>.
+ // Make sure there's a white-space character after, not to match "abbr".
+ {
+ i = aInString.Find("</a>", true, i);
+ if (i == kNotFound)
+ i = lengthOfInString;
+ else
+ i += 4;
+ } else if (Substring(aInString, i + 1, 3).LowerCaseEqualsASCII("!--"))
+ // if out-commended code, skip until -->
+ {
+ i = aInString.Find("-->", false, i);
+ if (i == kNotFound)
+ i = lengthOfInString;
+ else
+ i += 3;
+ } else if (i + 6 < lengthOfInString &&
+ Substring(aInString, i + 1, 5).LowerCaseEqualsASCII("style") &&
+ canFollow.FindChar(aInString[i + 6]) != kNotFound)
+ // if style tag, skip until </style>
+ {
+ i = aInString.Find("</style>", true, i);
+ if (i == kNotFound)
+ i = lengthOfInString;
+ else
+ i += 8;
+ } else if (i + 7 < lengthOfInString &&
+ Substring(aInString, i + 1, 6)
+ .LowerCaseEqualsASCII("script") &&
+ canFollow.FindChar(aInString[i + 7]) != kNotFound)
+ // if script tag, skip until </script>
+ {
+ i = aInString.Find("</script>", true, i);
+ if (i == kNotFound)
+ i = lengthOfInString;
+ else
+ i += 9;
+ } else if (i + 5 < lengthOfInString &&
+ Substring(aInString, i + 1, 4).LowerCaseEqualsASCII("head") &&
+ canFollow.FindChar(aInString[i + 5]) != kNotFound)
+ // if head tag, skip until </head>
+ // Make sure not to match <header>.
+ {
+ i = aInString.Find("</head>", true, i);
+ if (i == kNotFound)
+ i = lengthOfInString;
+ else
+ i += 7;
+ } else // just skip tag (attributes etc.)
+ {
+ i = aInString.FindChar('>', i);
+ if (i == kNotFound)
+ i = lengthOfInString;
+ else
+ i++;
+ }
+ aOutString.Append(&uniBuffer[start], i - start);
+ } else {
+ uint32_t start = uint32_t(i);
+ i = aInString.FindChar('<', i);
+ if (i == kNotFound) i = lengthOfInString;
+
+ nsString tempString;
+ tempString.SetCapacity(uint32_t((uint32_t(i) - start) * growthRate));
+ UnescapeStr(uniBuffer, start, uint32_t(i) - start, tempString);
+ ScanTXT(tempString, whattodo, aOutString);
+ }
+ }
+
+#ifdef DEBUG_BenB_Perf
+ printf("ScanHTML time: %d ms\n",
+ PR_IntervalToMilliseconds(PR_IntervalNow() - parsing_start));
+#endif
+ return NS_OK;
+}
+
+/****************************************************************************
+ XPCOM Interface
+*****************************************************************************/
+
+NS_IMETHODIMP
+mozTXTToHTMLConv::Convert(nsIInputStream* aFromStream, const char* aFromType,
+ const char* aToType, nsISupports* aCtxt,
+ nsIInputStream** _retval) {
+ return NS_ERROR_NOT_IMPLEMENTED;
+}
+
+NS_IMETHODIMP
+mozTXTToHTMLConv::AsyncConvertData(const char* aFromType, const char* aToType,
+ nsIStreamListener* aListener,
+ nsISupports* aCtxt) {
+ return NS_ERROR_NOT_IMPLEMENTED;
+}
+
+NS_IMETHODIMP
+mozTXTToHTMLConv::GetConvertedType(const nsACString& aFromType,
+ nsIChannel* aChannel, nsACString& aToType) {
+ return NS_ERROR_NOT_IMPLEMENTED;
+}
+
+NS_IMETHODIMP
+mozTXTToHTMLConv::OnDataAvailable(nsIRequest* request, nsIInputStream* inStr,
+ uint64_t sourceOffset, uint32_t count) {
+ return NS_ERROR_NOT_IMPLEMENTED;
+}
+
+NS_IMETHODIMP
+mozTXTToHTMLConv::OnStartRequest(nsIRequest* request) {
+ return NS_ERROR_NOT_IMPLEMENTED;
+}
+
+NS_IMETHODIMP
+mozTXTToHTMLConv::OnStopRequest(nsIRequest* request, nsresult aStatus) {
+ return NS_ERROR_NOT_IMPLEMENTED;
+}
+
+NS_IMETHODIMP
+mozTXTToHTMLConv::CiteLevelTXT(const char16_t* line, uint32_t* logLineStart,
+ uint32_t* _retval) {
+ if (!logLineStart || !_retval || !line) return NS_ERROR_NULL_POINTER;
+ *_retval = CiteLevelTXT(line, *logLineStart);
+ return NS_OK;
+}
+
+nsresult MOZ_NewTXTToHTMLConv(mozTXTToHTMLConv** aConv) {
+ MOZ_ASSERT(aConv != nullptr, "null ptr");
+ if (!aConv) return NS_ERROR_NULL_POINTER;
+
+ RefPtr<mozTXTToHTMLConv> conv = new mozTXTToHTMLConv();
+ conv.forget(aConv);
+ // return (*aConv)->Init();
+ return NS_OK;
+}
diff --git a/netwerk/streamconv/converters/mozTXTToHTMLConv.h b/netwerk/streamconv/converters/mozTXTToHTMLConv.h
new file mode 100644
index 0000000000..23c3dac30f
--- /dev/null
+++ b/netwerk/streamconv/converters/mozTXTToHTMLConv.h
@@ -0,0 +1,284 @@
+/* -*- Mode: C; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/**
+ Description: Currently only functions to enhance plain text with HTML tags.
+ See mozITXTToHTMLConv. Stream conversion is defunct.
+*/
+
+#ifndef _mozTXTToHTMLConv_h__
+#define _mozTXTToHTMLConv_h__
+
+#include "mozITXTToHTMLConv.h"
+#include "nsString.h"
+#include "nsCOMPtr.h"
+
+class nsIIOService;
+
+class mozTXTToHTMLConv : public mozITXTToHTMLConv {
+ virtual ~mozTXTToHTMLConv() = default;
+
+ //////////////////////////////////////////////////////////
+ public:
+ //////////////////////////////////////////////////////////
+
+ mozTXTToHTMLConv() = default;
+ NS_DECL_ISUPPORTS
+
+ NS_DECL_MOZITXTTOHTMLCONV
+ NS_DECL_NSIREQUESTOBSERVER
+ NS_DECL_NSISTREAMLISTENER
+ NS_DECL_NSISTREAMCONVERTER
+
+ /**
+ see mozITXTToHTMLConv::CiteLevelTXT
+ */
+ int32_t CiteLevelTXT(const char16_t* line, uint32_t& logLineStart);
+
+ //////////////////////////////////////////////////////////
+ protected:
+ //////////////////////////////////////////////////////////
+ nsCOMPtr<nsIIOService>
+ mIOService; // for performance reasons, cache the netwerk service...
+ /**
+ Completes<ul>
+ <li>Case 1: mailto: "mozilla@bucksch.org" -> "mailto:mozilla@bucksch.org"
+ <li>Case 2: http: "www.mozilla.org" -> "http://www.mozilla.org"
+ <li>Case 3: ftp: "ftp.mozilla.org" -> "ftp://www.mozilla.org"
+ </ul>
+ It does no check, if the resulting URL is valid.
+ @param text (in): abbreviated URL
+ @param pos (in): position of "@" (case 1) or first "." (case 2 and 3)
+ @return Completed URL at success and empty string at failure
+ */
+ void CompleteAbbreviatedURL(const char16_t* aInString, int32_t aInLength,
+ const uint32_t pos, nsString& aOutString);
+
+ //////////////////////////////////////////////////////////
+ private:
+ //////////////////////////////////////////////////////////
+
+ enum LIMTYPE {
+ LT_IGNORE, // limitation not checked
+ LT_DELIMITER, // not alphanumeric and not rep[0]. End of text is also ok.
+ LT_ALPHA, // alpha char
+ LT_DIGIT
+ };
+
+ /**
+ @param text (in): the string to search through.<p>
+ If before = IGNORE,<br>
+ rep is compared starting at 1. char of text (text[0]),<br>
+ else starting at 2. char of text (text[1]).
+ Chars after "after"-delimiter are ignored.
+ @param rep (in): the string to look for
+ @param aRepLen (in): the number of bytes in the string to look for
+ @param before (in): limitation before rep
+ @param after (in): limitation after rep
+ @return true, if rep is found and limitation spec is met or rep is empty
+ */
+ bool ItMatchesDelimited(const char16_t* aInString, int32_t aInLength,
+ const char16_t* rep, int32_t aRepLen, LIMTYPE before,
+ LIMTYPE after);
+
+ /**
+ @param see ItMatchesDelimited
+ @return Number of ItMatchesDelimited in text
+ */
+ uint32_t NumberOfMatches(const char16_t* aInString, int32_t aInStringLength,
+ const char16_t* rep, int32_t aRepLen, LIMTYPE before,
+ LIMTYPE after);
+
+ /**
+ Currently only changes "<", ">" and "&". All others stay as they are.<p>
+ "Char" in function name to avoid side effects with nsString(ch)
+ constructors.
+ @param ch (in)
+ @param aStringToAppendto (out) - the string to append the escaped
+ string to.
+ @param inAttribute (in) - will escape quotes, too (which is
+ only needed for attribute values)
+ */
+ void EscapeChar(const char16_t ch, nsAString& aStringToAppendto,
+ bool inAttribute);
+
+ /**
+ See EscapeChar. Escapes the string in place.
+ */
+ void EscapeStr(nsString& aInString, bool inAttribute);
+
+ /**
+ Currently only reverts "<", ">" and "&". All others stay as they are.<p>
+ @param aInString (in) HTML string
+ @param aStartPos (in) start index into the buffer
+ @param aLength (in) length of the buffer
+ @param aOutString (out) unescaped buffer
+ */
+ void UnescapeStr(const char16_t* aInString, int32_t aStartPos,
+ int32_t aLength, nsString& aOutString);
+
+ /**
+ <em>Note</em>: I use different strategies to pass context between the
+ functions (full text and pos vs. cutted text and col0, glphyTextLen vs.
+ replaceBefore/-After). It makes some sense, but is hard to understand
+ (maintain) :-(.
+ */
+
+ /**
+ <p><em>Note:</em> replaceBefore + replaceAfter + 1 (for char at pos) chars
+ in text should be replaced by outputHTML.</p>
+ <p><em>Note:</em> This function should be able to process a URL on multiple
+ lines, but currently, ScanForURLs is called for every line, so it can't.</p>
+ @param text (in): includes possibly a URL
+ @param pos (in): position in text, where either ":", "." or "@" are found
+ @param whathasbeendone (in): What the calling ScanTXT did/has to do with the
+ (not-linkified) text, i.e. usually the "whattodo" parameter.
+ (Needed to calculate replaceBefore.) NOT what will be done with
+ the content of the link.
+ @param outputHTML (out): URL with HTML-a tag
+ @param replaceBefore (out): Number of chars of URL before pos
+ @param replaceAfter (out): Number of chars of URL after pos
+ @return URL found
+ */
+ bool FindURL(const char16_t* aInString, int32_t aInLength, const uint32_t pos,
+ const uint32_t whathasbeendone, nsString& outputHTML,
+ int32_t& replaceBefore, int32_t& replaceAfter);
+
+ enum modetype {
+ unknown,
+ RFC1738, /* Check, if RFC1738, APPENDIX compliant,
+ like "<URL:http://www.mozilla.org>". */
+ RFC2396E, /* RFC2396, APPENDIX E allows anglebrackets (like
+ "<http://www.mozilla.org>") (without "URL:") or
+ quotation marks(like ""http://www.mozilla.org"").
+ Also allow email addresses without scheme,
+ e.g. "<mozilla@bucksch.org>" */
+ freetext, /* assume heading scheme
+ with "[a-zA-Z][a-zA-Z0-9+\-\.]*:" like "news:"
+ (see RFC2396, Section 3.1).
+ Certain characters (see code) or any whitespace
+ (including linebreaks) end the URL.
+ Other certain (punctation) characters (see code)
+ at the end are stripped off. */
+ abbreviated /* Similar to freetext, but without scheme, e.g.
+ "www.mozilla.org", "ftp.mozilla.org" and
+ "mozilla@bucksch.org". */
+ /* RFC1738 and RFC2396E type URLs may use multiple lines,
+ whitespace is stripped. Special characters like ")" stay intact.*/
+ };
+
+ /**
+ * @param text (in), pos (in): see FindURL
+ * @param check (in): Start must be conform with this mode
+ * @param start (out): Position in text, where URL (including brackets or
+ * similar) starts
+ * @return |check|-conform start has been found
+ */
+ bool FindURLStart(const char16_t* aInString, int32_t aInLength,
+ const uint32_t pos, const modetype check, uint32_t& start);
+
+ /**
+ * @param text (in), pos (in): see FindURL
+ * @param check (in): End must be conform with this mode
+ * @param start (in): see FindURLStart
+ * @param end (out): Similar to |start| param of FindURLStart
+ * @return |check|-conform end has been found
+ */
+ bool FindURLEnd(const char16_t* aInString, int32_t aInStringLength,
+ const uint32_t pos, const modetype check,
+ const uint32_t start, uint32_t& end);
+
+ /**
+ * @param text (in), pos (in), whathasbeendone (in): see FindURL
+ * @param check (in): Current mode
+ * @param start (in), end (in): see FindURLEnd
+ * @param txtURL (out): Guessed (raw) URL.
+ * Without whitespace, but not completed.
+ * @param desc (out): Link as shown to the user, but already escaped.
+ * Should be placed between the <a> and </a> tags.
+ * @param replaceBefore(out), replaceAfter (out): see FindURL
+ */
+ void CalculateURLBoundaries(const char16_t* aInString,
+ int32_t aInStringLength, const uint32_t pos,
+ const uint32_t whathasbeendone,
+ const modetype check, const uint32_t start,
+ const uint32_t end, nsString& txtURL,
+ nsString& desc, int32_t& replaceBefore,
+ int32_t& replaceAfter);
+
+ /**
+ * @param txtURL (in), desc (in): see CalculateURLBoundaries
+ * @param outputHTML (out): see FindURL
+ * @return A valid URL could be found (and creation of HTML successful)
+ */
+ bool CheckURLAndCreateHTML(const nsString& txtURL, const nsString& desc,
+ const modetype mode, nsString& outputHTML);
+
+ /**
+ @param text (in): line of text possibly with tagTXT.<p>
+ if col0 is true,
+ starting with tagTXT<br>
+ else
+ starting one char before tagTXT
+ @param col0 (in): tagTXT is on the beginning of the line (or paragraph).
+ open must be 0 then.
+ @param tagTXT (in): Tag in plaintext to search for, e.g. "*"
+ @param aTagTxtLen (in): length of tagTXT.
+ @param tagHTML (in): HTML-Tag to replace tagTXT with,
+ without "<" and ">", e.g. "strong"
+ @param attributeHTML (in): HTML-attribute to add to opening tagHTML,
+ e.g. "class=txt_star"
+ @param aOutString: string to APPEND the converted html into
+ @param open (in/out): Number of currently open tags of type tagHTML
+ @return Conversion succeeded
+ */
+ bool StructPhraseHit(const char16_t* aInString, int32_t aInStringLength,
+ bool col0, const char16_t* tagTXT, int32_t aTagTxtLen,
+ const char* tagHTML, const char* attributeHTML,
+ nsAString& aOutputString, uint32_t& openTags);
+
+ /**
+ @param text (in), col0 (in): see GlyphHit
+ @param tagTXT (in): Smily, see also StructPhraseHit
+ @param imageName (in): the basename of the file that contains the image for
+ this smilie
+ @param outputHTML (out): new string containing the html for the smily
+ @param glyphTextLen (out): see GlyphHit
+ */
+ bool SmilyHit(const char16_t* aInString, int32_t aLength, bool col0,
+ const char* tagTXT, const char* imageName, nsString& outputHTML,
+ int32_t& glyphTextLen);
+
+ /**
+ Checks, if we can replace some chars at the start of line with prettier HTML
+ code.<p>
+ If success is reported, replace the first glyphTextLen chars with outputHTML
+
+ @param text (in): line of text possibly with Glyph.<p>
+ If col0 is true,
+ starting with Glyph <br><!-- (br not part of text) -->
+ else
+ starting one char before Glyph
+ @param col0 (in): text starts at the beginning of the line (or paragraph)
+ @param aOutString (out): APPENDS html for the glyph to this string
+ @param glyphTextLen (out): Length of original text to replace
+ @return see StructPhraseHit
+ */
+ bool GlyphHit(const char16_t* aInString, int32_t aInLength, bool col0,
+ nsAString& aOutString, int32_t& glyphTextLen);
+
+ /**
+ Check if a given url should be linkified.
+ @param aURL (in): url to be checked on.
+ */
+ bool ShouldLinkify(const nsCString& aURL);
+};
+
+// It's said, that Win32 and Mac don't like static const members
+const int32_t mozTXTToHTMLConv_lastMode = 4;
+// Needed (only) by mozTXTToHTMLConv::FindURL
+const int32_t mozTXTToHTMLConv_numberOfModes = 4; // dito; unknown not counted
+
+#endif
diff --git a/netwerk/streamconv/converters/nsDirIndex.cpp b/netwerk/streamconv/converters/nsDirIndex.cpp
new file mode 100644
index 0000000000..c6e89fd7c5
--- /dev/null
+++ b/netwerk/streamconv/converters/nsDirIndex.cpp
@@ -0,0 +1,89 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "nsDirIndex.h"
+
+NS_IMPL_ISUPPORTS(nsDirIndex, nsIDirIndex)
+
+nsDirIndex::nsDirIndex()
+ : mType(TYPE_UNKNOWN), mSize(UINT64_MAX), mLastModified(-1LL) {}
+
+NS_IMETHODIMP
+nsDirIndex::GetType(uint32_t* aType) {
+ NS_ENSURE_ARG_POINTER(aType);
+
+ *aType = mType;
+ return NS_OK;
+}
+
+NS_IMETHODIMP
+nsDirIndex::SetType(uint32_t aType) {
+ mType = aType;
+ return NS_OK;
+}
+
+NS_IMETHODIMP
+nsDirIndex::GetContentType(nsACString& aContentType) {
+ aContentType = mContentType;
+ return NS_OK;
+}
+
+NS_IMETHODIMP
+nsDirIndex::SetContentType(const nsACString& aContentType) {
+ mContentType = aContentType;
+ return NS_OK;
+}
+
+NS_IMETHODIMP
+nsDirIndex::GetLocation(nsACString& aLocation) {
+ aLocation = mLocation;
+ return NS_OK;
+}
+
+NS_IMETHODIMP
+nsDirIndex::SetLocation(const nsACString& aLocation) {
+ mLocation = aLocation;
+ return NS_OK;
+}
+
+NS_IMETHODIMP
+nsDirIndex::GetDescription(nsAString& aDescription) {
+ aDescription = mDescription;
+ return NS_OK;
+}
+
+NS_IMETHODIMP
+nsDirIndex::SetDescription(const nsAString& aDescription) {
+ mDescription = aDescription;
+ return NS_OK;
+}
+
+NS_IMETHODIMP
+nsDirIndex::GetSize(int64_t* aSize) {
+ NS_ENSURE_ARG_POINTER(aSize);
+
+ *aSize = mSize;
+ return NS_OK;
+}
+
+NS_IMETHODIMP
+nsDirIndex::SetSize(int64_t aSize) {
+ mSize = aSize;
+ return NS_OK;
+}
+
+NS_IMETHODIMP
+nsDirIndex::GetLastModified(PRTime* aLastModified) {
+ NS_ENSURE_ARG_POINTER(aLastModified);
+
+ *aLastModified = mLastModified;
+ return NS_OK;
+}
+
+NS_IMETHODIMP
+nsDirIndex::SetLastModified(PRTime aLastModified) {
+ mLastModified = aLastModified;
+ return NS_OK;
+}
diff --git a/netwerk/streamconv/converters/nsDirIndex.h b/netwerk/streamconv/converters/nsDirIndex.h
new file mode 100644
index 0000000000..2ff411de54
--- /dev/null
+++ b/netwerk/streamconv/converters/nsDirIndex.h
@@ -0,0 +1,32 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef nsDirIndex_h__
+#define nsDirIndex_h__
+
+#include "nsIDirIndex.h"
+#include "nsString.h"
+#include "mozilla/Attributes.h"
+
+class nsDirIndex final : public nsIDirIndex {
+ private:
+ ~nsDirIndex() = default;
+
+ public:
+ nsDirIndex();
+
+ NS_DECL_ISUPPORTS
+ NS_DECL_NSIDIRINDEX
+
+ protected:
+ uint32_t mType;
+ nsCString mContentType;
+ nsCString mLocation;
+ nsString mDescription;
+ int64_t mSize;
+ PRTime mLastModified;
+};
+
+#endif
diff --git a/netwerk/streamconv/converters/nsDirIndexParser.cpp b/netwerk/streamconv/converters/nsDirIndexParser.cpp
new file mode 100644
index 0000000000..ed53cc45fb
--- /dev/null
+++ b/netwerk/streamconv/converters/nsDirIndexParser.cpp
@@ -0,0 +1,444 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/* This parsing code originally lived in xpfe/components/directory/ - bbaetz */
+
+#include "nsDirIndexParser.h"
+
+#include "mozilla/ArrayUtils.h"
+#include "mozilla/Encoding.h"
+#include "prprf.h"
+#include "nsCRT.h"
+#include "nsDirIndex.h"
+#include "nsEscape.h"
+#include "nsIDirIndex.h"
+#include "nsIInputStream.h"
+#include "nsITextToSubURI.h"
+#include "nsServiceManagerUtils.h"
+#include "mozilla/intl/LocaleService.h"
+
+using namespace mozilla;
+
+struct EncodingProp {
+ const char* const mKey;
+ NotNull<const Encoding*> mValue;
+};
+
+static const EncodingProp localesFallbacks[] = {
+ {"ar", WINDOWS_1256_ENCODING}, {"ba", WINDOWS_1251_ENCODING},
+ {"be", WINDOWS_1251_ENCODING}, {"bg", WINDOWS_1251_ENCODING},
+ {"cs", WINDOWS_1250_ENCODING}, {"el", ISO_8859_7_ENCODING},
+ {"et", WINDOWS_1257_ENCODING}, {"fa", WINDOWS_1256_ENCODING},
+ {"he", WINDOWS_1255_ENCODING}, {"hr", WINDOWS_1250_ENCODING},
+ {"hu", ISO_8859_2_ENCODING}, {"ja", SHIFT_JIS_ENCODING},
+ {"kk", WINDOWS_1251_ENCODING}, {"ko", EUC_KR_ENCODING},
+ {"ku", WINDOWS_1254_ENCODING}, {"ky", WINDOWS_1251_ENCODING},
+ {"lt", WINDOWS_1257_ENCODING}, {"lv", WINDOWS_1257_ENCODING},
+ {"mk", WINDOWS_1251_ENCODING}, {"pl", ISO_8859_2_ENCODING},
+ {"ru", WINDOWS_1251_ENCODING}, {"sah", WINDOWS_1251_ENCODING},
+ {"sk", WINDOWS_1250_ENCODING}, {"sl", ISO_8859_2_ENCODING},
+ {"sr", WINDOWS_1251_ENCODING}, {"tg", WINDOWS_1251_ENCODING},
+ {"th", WINDOWS_874_ENCODING}, {"tr", WINDOWS_1254_ENCODING},
+ {"tt", WINDOWS_1251_ENCODING}, {"uk", WINDOWS_1251_ENCODING},
+ {"vi", WINDOWS_1258_ENCODING}, {"zh", GBK_ENCODING}};
+
+static NotNull<const Encoding*>
+GetFTPFallbackEncodingDoNotAddNewCallersToThisFunction() {
+ nsAutoCString locale;
+ mozilla::intl::LocaleService::GetInstance()->GetAppLocaleAsBCP47(locale);
+
+ // Let's lower case the string just in case unofficial language packs
+ // don't stick to conventions.
+ ToLowerCase(locale); // ASCII lowercasing with CString input!
+
+ // Special case Traditional Chinese before throwing away stuff after the
+ // language itself. Today we only ship zh-TW, but be defensive about
+ // possible future values.
+ if (locale.EqualsLiteral("zh-tw") || locale.EqualsLiteral("zh-hk") ||
+ locale.EqualsLiteral("zh-mo") || locale.EqualsLiteral("zh-hant")) {
+ return BIG5_ENCODING;
+ }
+
+ // Throw away regions and other variants to accommodate weird stuff seen
+ // in telemetry--apparently unofficial language packs.
+ int32_t hyphenIndex = locale.FindChar('-');
+ if (hyphenIndex >= 0) {
+ locale.Truncate(hyphenIndex);
+ }
+
+ size_t index;
+ if (BinarySearchIf(
+ localesFallbacks, 0, ArrayLength(localesFallbacks),
+ [&locale](const EncodingProp& aProperty) {
+ return locale.Compare(aProperty.mKey);
+ },
+ &index)) {
+ return localesFallbacks[index].mValue;
+ }
+ return WINDOWS_1252_ENCODING;
+}
+
+NS_IMPL_ISUPPORTS(nsDirIndexParser, nsIRequestObserver, nsIStreamListener,
+ nsIDirIndexParser)
+
+nsDirIndexParser::nsDirIndexParser() : mLineStart(0), mHasDescription(false) {}
+
+nsresult nsDirIndexParser::Init() {
+ mLineStart = 0;
+ mHasDescription = false;
+ mFormat[0] = -1;
+ auto encoding = GetFTPFallbackEncodingDoNotAddNewCallersToThisFunction();
+ encoding->Name(mEncoding);
+
+ nsresult rv;
+ // XXX not threadsafe
+ if (gRefCntParser++ == 0)
+ rv = CallGetService(NS_ITEXTTOSUBURI_CONTRACTID, &gTextToSubURI);
+ else
+ rv = NS_OK;
+
+ return rv;
+}
+
+nsDirIndexParser::~nsDirIndexParser() {
+ // XXX not threadsafe
+ if (--gRefCntParser == 0) {
+ NS_IF_RELEASE(gTextToSubURI);
+ }
+}
+
+NS_IMETHODIMP
+nsDirIndexParser::SetListener(nsIDirIndexListener* aListener) {
+ mListener = aListener;
+ return NS_OK;
+}
+
+NS_IMETHODIMP
+nsDirIndexParser::GetListener(nsIDirIndexListener** aListener) {
+ NS_IF_ADDREF(*aListener = mListener.get());
+ return NS_OK;
+}
+
+NS_IMETHODIMP
+nsDirIndexParser::GetComment(char** aComment) {
+ *aComment = ToNewCString(mComment, mozilla::fallible);
+
+ if (!*aComment) return NS_ERROR_OUT_OF_MEMORY;
+
+ return NS_OK;
+}
+
+NS_IMETHODIMP
+nsDirIndexParser::SetEncoding(const char* aEncoding) {
+ mEncoding.Assign(aEncoding);
+ return NS_OK;
+}
+
+NS_IMETHODIMP
+nsDirIndexParser::GetEncoding(char** aEncoding) {
+ *aEncoding = ToNewCString(mEncoding, mozilla::fallible);
+
+ if (!*aEncoding) return NS_ERROR_OUT_OF_MEMORY;
+
+ return NS_OK;
+}
+
+NS_IMETHODIMP
+nsDirIndexParser::OnStartRequest(nsIRequest* aRequest) { return NS_OK; }
+
+NS_IMETHODIMP
+nsDirIndexParser::OnStopRequest(nsIRequest* aRequest, nsresult aStatusCode) {
+ // Finish up
+ if (mBuf.Length() > (uint32_t)mLineStart) {
+ ProcessData(aRequest, nullptr);
+ }
+
+ return NS_OK;
+}
+
+nsDirIndexParser::Field nsDirIndexParser::gFieldTable[] = {
+ {"Filename", FIELD_FILENAME},
+ {"Description", FIELD_DESCRIPTION},
+ {"Content-Length", FIELD_CONTENTLENGTH},
+ {"Last-Modified", FIELD_LASTMODIFIED},
+ {"Content-Type", FIELD_CONTENTTYPE},
+ {"File-Type", FIELD_FILETYPE},
+ {nullptr, FIELD_UNKNOWN}};
+
+nsrefcnt nsDirIndexParser::gRefCntParser = 0;
+nsITextToSubURI* nsDirIndexParser::gTextToSubURI;
+
+void nsDirIndexParser::ParseFormat(const char* aFormatStr) {
+ // Parse a "200" format line, and remember the fields and their
+ // ordering in mFormat. Multiple 200 lines stomp on each other.
+ unsigned int formatNum = 0;
+ mFormat[0] = -1;
+
+ do {
+ while (*aFormatStr && nsCRT::IsAsciiSpace(char16_t(*aFormatStr)))
+ ++aFormatStr;
+
+ if (!*aFormatStr) break;
+
+ nsAutoCString name;
+ int32_t len = 0;
+ while (aFormatStr[len] && !nsCRT::IsAsciiSpace(char16_t(aFormatStr[len])))
+ ++len;
+ name.Append(aFormatStr, len);
+ aFormatStr += len;
+
+ // Okay, we're gonna monkey with the nsStr. Bold!
+ name.SetLength(nsUnescapeCount(name.BeginWriting()));
+
+ // All tokens are case-insensitive -
+ // http://www.mozilla.org/projects/netlib/dirindexformat.html
+ if (name.LowerCaseEqualsLiteral("description")) mHasDescription = true;
+
+ for (Field* i = gFieldTable; i->mName; ++i) {
+ if (name.EqualsIgnoreCase(i->mName)) {
+ mFormat[formatNum] = i->mType;
+ mFormat[++formatNum] = -1;
+ break;
+ }
+ }
+
+ } while (*aFormatStr && (formatNum < (ArrayLength(mFormat) - 1)));
+}
+
+void nsDirIndexParser::ParseData(nsIDirIndex* aIdx, char* aDataStr,
+ int32_t aLineLen) {
+ // Parse a "201" data line, using the field ordering specified in
+ // mFormat.
+
+ if (mFormat[0] == -1) {
+ // Ignore if we haven't seen a format yet.
+ return;
+ }
+
+ nsAutoCString filename;
+ int32_t lineLen = aLineLen;
+
+ for (int32_t i = 0; mFormat[i] != -1; ++i) {
+ // If we've exhausted the data before we run out of fields, just bail.
+ if (!*aDataStr || (lineLen < 1)) {
+ return;
+ }
+
+ while ((lineLen > 0) && nsCRT::IsAsciiSpace(*aDataStr)) {
+ ++aDataStr;
+ --lineLen;
+ }
+
+ if (lineLen < 1) {
+ // invalid format, bail
+ return;
+ }
+
+ char* value = aDataStr;
+ if (*aDataStr == '"' || *aDataStr == '\'') {
+ // it's a quoted string. snarf everything up to the next quote character
+ const char quotechar = *(aDataStr++);
+ lineLen--;
+ ++value;
+ while ((lineLen > 0) && *aDataStr != quotechar) {
+ ++aDataStr;
+ --lineLen;
+ }
+ if (lineLen > 0) {
+ *aDataStr++ = '\0';
+ --lineLen;
+ }
+
+ if (!lineLen) {
+ // invalid format, bail
+ return;
+ }
+ } else {
+ // it's unquoted. snarf until we see whitespace.
+ value = aDataStr;
+ while ((lineLen > 0) && (!nsCRT::IsAsciiSpace(*aDataStr))) {
+ ++aDataStr;
+ --lineLen;
+ }
+ if (lineLen > 0) {
+ *aDataStr++ = '\0';
+ --lineLen;
+ }
+ // even if we ran out of line length here, there's still a trailing zero
+ // byte afterwards
+ }
+
+ fieldType t = fieldType(mFormat[i]);
+ switch (t) {
+ case FIELD_FILENAME: {
+ // don't unescape at this point, so that UnEscapeAndConvert() can
+ filename = value;
+
+ bool success = false;
+
+ nsAutoString entryuri;
+
+ if (gTextToSubURI) {
+ nsAutoString result;
+ if (NS_SUCCEEDED(gTextToSubURI->UnEscapeAndConvert(
+ mEncoding, filename, result))) {
+ if (!result.IsEmpty()) {
+ aIdx->SetLocation(filename);
+ if (!mHasDescription) aIdx->SetDescription(result);
+ success = true;
+ }
+ } else {
+ NS_WARNING("UnEscapeAndConvert error");
+ }
+ }
+
+ if (!success) {
+ // if unsuccessfully at charset conversion, then
+ // just fallback to unescape'ing in-place
+ // XXX - this shouldn't be using UTF8, should it?
+ // when can we fail to get the service, anyway? - bbaetz
+ aIdx->SetLocation(filename);
+ if (!mHasDescription) {
+ aIdx->SetDescription(NS_ConvertUTF8toUTF16(value));
+ }
+ }
+ } break;
+ case FIELD_DESCRIPTION:
+ nsUnescape(value);
+ aIdx->SetDescription(NS_ConvertUTF8toUTF16(value));
+ break;
+ case FIELD_CONTENTLENGTH: {
+ int64_t len;
+ int32_t status = PR_sscanf(value, "%lld", &len);
+ if (status == 1)
+ aIdx->SetSize(len);
+ else
+ aIdx->SetSize(UINT64_MAX); // UINT64_MAX means unknown
+ } break;
+ case FIELD_LASTMODIFIED: {
+ PRTime tm;
+ nsUnescape(value);
+ if (PR_ParseTimeString(value, false, &tm) == PR_SUCCESS) {
+ aIdx->SetLastModified(tm);
+ }
+ } break;
+ case FIELD_CONTENTTYPE:
+ aIdx->SetContentType(nsDependentCString(value));
+ break;
+ case FIELD_FILETYPE:
+ // unescape in-place
+ nsUnescape(value);
+ if (!nsCRT::strcasecmp(value, "directory")) {
+ aIdx->SetType(nsIDirIndex::TYPE_DIRECTORY);
+ } else if (!nsCRT::strcasecmp(value, "file")) {
+ aIdx->SetType(nsIDirIndex::TYPE_FILE);
+ } else if (!nsCRT::strcasecmp(value, "symbolic-link")) {
+ aIdx->SetType(nsIDirIndex::TYPE_SYMLINK);
+ } else {
+ aIdx->SetType(nsIDirIndex::TYPE_UNKNOWN);
+ }
+ break;
+ case FIELD_UNKNOWN:
+ // ignore
+ break;
+ }
+ }
+}
+
+NS_IMETHODIMP
+nsDirIndexParser::OnDataAvailable(nsIRequest* aRequest, nsIInputStream* aStream,
+ uint64_t aSourceOffset, uint32_t aCount) {
+ if (aCount < 1) return NS_OK;
+
+ int32_t len = mBuf.Length();
+
+ // Ensure that our mBuf has capacity to hold the data we're about to
+ // read.
+ if (!mBuf.SetLength(len + aCount, fallible)) return NS_ERROR_OUT_OF_MEMORY;
+
+ // Now read the data into our buffer.
+ nsresult rv;
+ uint32_t count;
+ rv = aStream->Read(mBuf.BeginWriting() + len, aCount, &count);
+ if (NS_FAILED(rv)) return rv;
+
+ // Set the string's length according to the amount of data we've read.
+ // Note: we know this to work on nsCString. This isn't guaranteed to
+ // work on other strings.
+ mBuf.SetLength(len + count);
+
+ return ProcessData(aRequest, nullptr);
+}
+
+nsresult nsDirIndexParser::ProcessData(nsIRequest* aRequest,
+ nsISupports* aCtxt) {
+ if (!mListener) return NS_ERROR_FAILURE;
+
+ int32_t numItems = 0;
+
+ while (true) {
+ ++numItems;
+
+ int32_t eol = mBuf.FindCharInSet("\n\r", mLineStart);
+ if (eol < 0) break;
+ mBuf.SetCharAt(char16_t('\0'), eol);
+
+ const char* line = mBuf.get() + mLineStart;
+
+ int32_t lineLen = eol - mLineStart;
+ mLineStart = eol + 1;
+
+ if (lineLen >= 4) {
+ const char* buf = line;
+
+ if (buf[0] == '1') {
+ if (buf[1] == '0') {
+ if (buf[2] == '0' && buf[3] == ':') {
+ // 100. Human-readable comment line. Ignore
+ } else if (buf[2] == '1' && buf[3] == ':') {
+ // 101. Human-readable information line.
+ mComment.Append(buf + 4);
+
+ char* value = ((char*)buf) + 4;
+ nsUnescape(value);
+ mListener->OnInformationAvailable(aRequest, aCtxt,
+ NS_ConvertUTF8toUTF16(value));
+
+ } else if (buf[2] == '2' && buf[3] == ':') {
+ // 102. Human-readable information line, HTML.
+ mComment.Append(buf + 4);
+ }
+ }
+ } else if (buf[0] == '2') {
+ if (buf[1] == '0') {
+ if (buf[2] == '0' && buf[3] == ':') {
+ // 200. Define field names
+ ParseFormat(buf + 4);
+ } else if (buf[2] == '1' && buf[3] == ':') {
+ // 201. Field data
+ nsCOMPtr<nsIDirIndex> idx = new nsDirIndex();
+
+ ParseData(idx, ((char*)buf) + 4, lineLen - 4);
+ mListener->OnIndexAvailable(aRequest, aCtxt, idx);
+ }
+ }
+ } else if (buf[0] == '3') {
+ if (buf[1] == '0') {
+ if (buf[2] == '0' && buf[3] == ':') {
+ // 300. Self-referring URL
+ } else if (buf[2] == '1' && buf[3] == ':') {
+ // 301. OUR EXTENSION - encoding
+ int i = 4;
+ while (buf[i] && nsCRT::IsAsciiSpace(buf[i])) ++i;
+
+ if (buf[i]) SetEncoding(buf + i);
+ }
+ }
+ }
+ }
+ }
+
+ return NS_OK;
+}
diff --git a/netwerk/streamconv/converters/nsDirIndexParser.h b/netwerk/streamconv/converters/nsDirIndexParser.h
new file mode 100644
index 0000000000..c2dfe3c160
--- /dev/null
+++ b/netwerk/streamconv/converters/nsDirIndexParser.h
@@ -0,0 +1,75 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef __NSDIRINDEX_H_
+#define __NSDIRINDEX_H_
+
+#include "nsString.h"
+#include "nsCOMPtr.h"
+#include "nsIDirIndexListener.h"
+#include "mozilla/RefPtr.h"
+
+class nsIDirIndex;
+class nsITextToSubURI;
+
+/* CID: {a0d6ad32-1dd1-11b2-aa55-a40187b54036} */
+
+class nsDirIndexParser : public nsIDirIndexParser {
+ private:
+ virtual ~nsDirIndexParser();
+
+ nsDirIndexParser();
+ nsresult Init();
+
+ public:
+ NS_DECL_ISUPPORTS
+ NS_DECL_NSISTREAMLISTENER
+ NS_DECL_NSIREQUESTOBSERVER
+ NS_DECL_NSIDIRINDEXPARSER
+
+ static already_AddRefed<nsIDirIndexParser> CreateInstance() {
+ RefPtr<nsDirIndexParser> parser = new nsDirIndexParser();
+ if (NS_FAILED(parser->Init())) {
+ return nullptr;
+ }
+ return parser.forget();
+ }
+
+ enum fieldType {
+ FIELD_UNKNOWN = 0, // MUST be 0
+ FIELD_FILENAME,
+ FIELD_DESCRIPTION,
+ FIELD_CONTENTLENGTH,
+ FIELD_LASTMODIFIED,
+ FIELD_CONTENTTYPE,
+ FIELD_FILETYPE
+ };
+
+ protected:
+ nsCOMPtr<nsIDirIndexListener> mListener;
+
+ nsCString mEncoding;
+ nsCString mComment;
+ nsCString mBuf;
+ int32_t mLineStart;
+ bool mHasDescription;
+ int mFormat[8];
+
+ nsresult ProcessData(nsIRequest* aRequest, nsISupports* aCtxt);
+ void ParseFormat(const char* buf);
+ void ParseData(nsIDirIndex* aIdx, char* aDataStr, int32_t lineLen);
+
+ struct Field {
+ const char* mName;
+ fieldType mType;
+ };
+
+ static Field gFieldTable[];
+
+ static nsrefcnt gRefCntParser;
+ static nsITextToSubURI* gTextToSubURI;
+};
+
+#endif
diff --git a/netwerk/streamconv/converters/nsFTPDirListingConv.cpp b/netwerk/streamconv/converters/nsFTPDirListingConv.cpp
new file mode 100644
index 0000000000..d155481f55
--- /dev/null
+++ b/netwerk/streamconv/converters/nsFTPDirListingConv.cpp
@@ -0,0 +1,342 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "nsFTPDirListingConv.h"
+#include "nsMemory.h"
+#include "plstr.h"
+#include "mozilla/Logging.h"
+#include "nsCOMPtr.h"
+#include "nsEscape.h"
+#include "nsStringStream.h"
+#include "nsIStreamListener.h"
+#include "nsCRT.h"
+#include "nsIChannel.h"
+#include "nsIURI.h"
+#include "nsIURIMutator.h"
+
+#include "ParseFTPList.h"
+#include <algorithm>
+
+#include "mozilla/UniquePtrExtensions.h"
+#include "mozilla/Unused.h"
+
+//
+// Log module for FTP dir listing stream converter logging...
+//
+// To enable logging (see prlog.h for full details):
+//
+// set MOZ_LOG=nsFTPDirListConv:5
+// set MOZ_LOG_FILE=network.log
+//
+// This enables LogLevel::Debug level information and places all output in
+// the file network.log.
+//
+static mozilla::LazyLogModule gFTPDirListConvLog("nsFTPDirListingConv");
+using namespace mozilla;
+
+// nsISupports implementation
+NS_IMPL_ISUPPORTS(nsFTPDirListingConv, nsIStreamConverter, nsIStreamListener,
+ nsIRequestObserver)
+
+// nsIStreamConverter implementation
+NS_IMETHODIMP
+nsFTPDirListingConv::Convert(nsIInputStream* aFromStream, const char* aFromType,
+ const char* aToType, nsISupports* aCtxt,
+ nsIInputStream** _retval) {
+ return NS_ERROR_NOT_IMPLEMENTED;
+}
+
+// Stream converter service calls this to initialize the actual stream converter
+// (us).
+NS_IMETHODIMP
+nsFTPDirListingConv::AsyncConvertData(const char* aFromType,
+ const char* aToType,
+ nsIStreamListener* aListener,
+ nsISupports* aCtxt) {
+ NS_ASSERTION(aListener && aFromType && aToType,
+ "null pointer passed into FTP dir listing converter");
+
+ // hook up our final listener. this guy gets the various On*() calls we want
+ // to throw at him.
+ mFinalListener = aListener;
+ NS_ADDREF(mFinalListener);
+
+ MOZ_LOG(gFTPDirListConvLog, LogLevel::Debug,
+ ("nsFTPDirListingConv::AsyncConvertData() converting FROM raw, TO "
+ "application/http-index-format\n"));
+
+ return NS_OK;
+}
+
+NS_IMETHODIMP
+nsFTPDirListingConv::GetConvertedType(const nsACString& aFromType,
+ nsIChannel* aChannel,
+ nsACString& aToType) {
+ return NS_ERROR_NOT_IMPLEMENTED;
+}
+
+// nsIStreamListener implementation
+NS_IMETHODIMP
+nsFTPDirListingConv::OnDataAvailable(nsIRequest* request, nsIInputStream* inStr,
+ uint64_t sourceOffset, uint32_t count) {
+ NS_ASSERTION(request, "FTP dir listing stream converter needs a request");
+
+ nsresult rv;
+
+ nsCOMPtr<nsIChannel> channel = do_QueryInterface(request, &rv);
+ NS_ENSURE_SUCCESS(rv, rv);
+
+ uint32_t read, streamLen;
+
+ uint64_t streamLen64;
+ rv = inStr->Available(&streamLen64);
+ NS_ENSURE_SUCCESS(rv, rv);
+ streamLen = (uint32_t)std::min(streamLen64, uint64_t(UINT32_MAX - 1));
+
+ auto buffer = MakeUniqueFallible<char[]>(streamLen + 1);
+ NS_ENSURE_TRUE(buffer, NS_ERROR_OUT_OF_MEMORY);
+
+ rv = inStr->Read(buffer.get(), streamLen, &read);
+ NS_ENSURE_SUCCESS(rv, rv);
+
+ // the dir listings are ascii text, null terminate this sucker.
+ buffer[streamLen] = '\0';
+
+ MOZ_LOG(gFTPDirListConvLog, LogLevel::Debug,
+ ("nsFTPDirListingConv::OnData(request = %p, inStr = %p, "
+ "sourceOffset = %" PRIu64 ", count = %u)\n",
+ request, inStr, sourceOffset, count));
+
+ if (!mBuffer.IsEmpty()) {
+ // we have data left over from a previous OnDataAvailable() call.
+ // combine the buffers so we don't lose any data.
+ mBuffer.Append(buffer.get());
+
+ buffer = MakeUniqueFallible<char[]>(mBuffer.Length() + 1);
+ NS_ENSURE_TRUE(buffer, NS_ERROR_OUT_OF_MEMORY);
+
+ strncpy(buffer.get(), mBuffer.get(), mBuffer.Length() + 1);
+ mBuffer.Truncate();
+ }
+
+ MOZ_LOG(gFTPDirListConvLog, LogLevel::Debug,
+ ("::OnData() received the following %d bytes...\n\n%s\n\n", streamLen,
+ buffer.get()));
+
+ nsAutoCString indexFormat;
+ if (!mSentHeading) {
+ // build up the 300: line
+ nsCOMPtr<nsIURI> uri;
+ rv = channel->GetURI(getter_AddRefs(uri));
+ NS_ENSURE_SUCCESS(rv, rv);
+
+ rv = GetHeaders(indexFormat, uri);
+ NS_ENSURE_SUCCESS(rv, rv);
+
+ mSentHeading = true;
+ }
+
+ char* line = buffer.get();
+ line = DigestBufferLines(line, indexFormat);
+
+ MOZ_LOG(gFTPDirListConvLog, LogLevel::Debug,
+ ("::OnData() sending the following %d bytes...\n\n%s\n\n",
+ indexFormat.Length(), indexFormat.get()));
+
+ // if there's any data left over, buffer it.
+ if (line && *line) {
+ mBuffer.Append(line);
+ MOZ_LOG(gFTPDirListConvLog, LogLevel::Debug,
+ ("::OnData() buffering the following %zu bytes...\n\n%s\n\n",
+ strlen(line), line));
+ }
+
+ // send the converted data out.
+ nsCOMPtr<nsIInputStream> inputData;
+
+ rv = NS_NewCStringInputStream(getter_AddRefs(inputData), indexFormat);
+ NS_ENSURE_SUCCESS(rv, rv);
+
+ rv = mFinalListener->OnDataAvailable(request, inputData, 0,
+ indexFormat.Length());
+
+ return rv;
+}
+
+// nsIRequestObserver implementation
+NS_IMETHODIMP
+nsFTPDirListingConv::OnStartRequest(nsIRequest* request) {
+ // we don't care about start. move along... but start masqeurading
+ // as the http-index channel now.
+ return mFinalListener->OnStartRequest(request);
+}
+
+NS_IMETHODIMP
+nsFTPDirListingConv::OnStopRequest(nsIRequest* request, nsresult aStatus) {
+ // we don't care about stop. move along...
+
+ return mFinalListener->OnStopRequest(request, aStatus);
+}
+
+// nsFTPDirListingConv methods
+nsFTPDirListingConv::nsFTPDirListingConv() {
+ mFinalListener = nullptr;
+ mSentHeading = false;
+}
+
+nsFTPDirListingConv::~nsFTPDirListingConv() { NS_IF_RELEASE(mFinalListener); }
+
+nsresult nsFTPDirListingConv::GetHeaders(nsACString& headers, nsIURI* uri) {
+ nsresult rv = NS_OK;
+ // build up 300 line
+ headers.AppendLiteral("300: ");
+
+ // Bug 111117 - don't print the password
+ nsAutoCString pw;
+ nsAutoCString spec;
+ uri->GetPassword(pw);
+ if (!pw.IsEmpty()) {
+ nsCOMPtr<nsIURI> noPassURI;
+ rv = NS_MutateURI(uri).SetPassword(""_ns).Finalize(noPassURI);
+ if (NS_FAILED(rv)) return rv;
+ rv = noPassURI->GetAsciiSpec(spec);
+ if (NS_FAILED(rv)) return rv;
+ headers.Append(spec);
+ } else {
+ rv = uri->GetAsciiSpec(spec);
+ if (NS_FAILED(rv)) return rv;
+
+ headers.Append(spec);
+ }
+ headers.Append(char(nsCRT::LF));
+ // END 300:
+
+ // build up the column heading; 200:
+ headers.AppendLiteral(
+ "200: filename content-length last-modified file-type\n");
+ // END 200:
+ return rv;
+}
+
+char* nsFTPDirListingConv::DigestBufferLines(char* aBuffer,
+ nsCString& aString) {
+ char* line = aBuffer;
+ char* eol;
+ bool cr = false;
+
+ list_state state;
+
+ // while we have new lines, parse 'em into application/http-index-format.
+ while (line && (eol = PL_strchr(line, nsCRT::LF))) {
+ // yank any carriage returns too.
+ if (eol > line && *(eol - 1) == nsCRT::CR) {
+ eol--;
+ *eol = '\0';
+ cr = true;
+ } else {
+ *eol = '\0';
+ cr = false;
+ }
+
+ list_result result;
+
+ int type = ParseFTPList(line, &state, &result);
+
+ // if it is other than a directory, file, or link -OR- if it is a
+ // directory named . or .., skip over this line.
+ if ((type != 'd' && type != 'f' && type != 'l') ||
+ (result.fe_type == 'd' && result.fe_fname[0] == '.' &&
+ (result.fe_fnlen == 1 ||
+ (result.fe_fnlen == 2 && result.fe_fname[1] == '.')))) {
+ if (cr)
+ line = eol + 2;
+ else
+ line = eol + 1;
+
+ continue;
+ }
+
+ // blast the index entry into the indexFormat buffer as a 201: line.
+ aString.AppendLiteral("201: ");
+ // FILENAME
+
+ // parsers for styles 'U' and 'W' handle sequence " -> " themself
+ if (state.lstyle != 'U' && state.lstyle != 'W') {
+ const char* offset = strstr(result.fe_fname, " -> ");
+ if (offset) {
+ result.fe_fnlen = offset - result.fe_fname;
+ }
+ }
+
+ nsAutoCString buf;
+ aString.Append('\"');
+ aString.Append(NS_EscapeURL(
+ Substring(result.fe_fname, result.fe_fname + result.fe_fnlen),
+ esc_Minimal | esc_OnlyASCII | esc_Forced, buf));
+ aString.AppendLiteral("\" ");
+
+ // CONTENT LENGTH
+
+ if (type != 'd') {
+ for (char& fe : result.fe_size) {
+ if (fe != '\0') aString.Append((const char*)&fe, 1);
+ }
+
+ aString.Append(' ');
+ } else
+ aString.AppendLiteral("0 ");
+
+ // MODIFIED DATE
+ char buffer[256] = "";
+
+ // ParseFTPList can return time structure with invalid values.
+ // PR_NormalizeTime will set all values into valid limits.
+ result.fe_time.tm_params.tp_gmt_offset = 0;
+ result.fe_time.tm_params.tp_dst_offset = 0;
+ PR_NormalizeTime(&result.fe_time, PR_GMTParameters);
+
+ // Note: The below is the RFC822/1123 format, as required by
+ // the application/http-index-format specs
+ // viewers of such a format can then reformat this into the
+ // current locale (or anything else they choose)
+ PR_FormatTimeUSEnglish(buffer, sizeof(buffer), "%a, %d %b %Y %H:%M:%S GMT",
+ &result.fe_time);
+
+ nsAutoCString escaped;
+ Unused << NS_WARN_IF(
+ !NS_Escape(nsDependentCString(buffer), escaped, url_Path));
+ aString.Append(escaped);
+ aString.Append(' ');
+
+ // ENTRY TYPE
+ if (type == 'd')
+ aString.AppendLiteral("DIRECTORY");
+ else if (type == 'l')
+ aString.AppendLiteral("SYMBOLIC-LINK");
+ else
+ aString.AppendLiteral("FILE");
+
+ aString.Append(' ');
+
+ aString.Append(char(nsCRT::LF)); // complete this line
+ // END 201:
+
+ if (cr)
+ line = eol + 2;
+ else
+ line = eol + 1;
+ } // end while(eol)
+
+ return line;
+}
+
+nsresult NS_NewFTPDirListingConv(nsFTPDirListingConv** aFTPDirListingConv) {
+ MOZ_ASSERT(aFTPDirListingConv != nullptr, "null ptr");
+ if (!aFTPDirListingConv) return NS_ERROR_NULL_POINTER;
+
+ RefPtr<nsFTPDirListingConv> conv = new nsFTPDirListingConv();
+ conv.forget(aFTPDirListingConv);
+ return NS_OK;
+}
diff --git a/netwerk/streamconv/converters/nsFTPDirListingConv.h b/netwerk/streamconv/converters/nsFTPDirListingConv.h
new file mode 100644
index 0000000000..57ca806d11
--- /dev/null
+++ b/netwerk/streamconv/converters/nsFTPDirListingConv.h
@@ -0,0 +1,52 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+#ifndef __nsftpdirlistingdconv__h__
+#define __nsftpdirlistingdconv__h__
+
+#include "nsIStreamConverter.h"
+#include "nsString.h"
+
+class nsIURI;
+
+#define NS_FTPDIRLISTINGCONVERTER_CID \
+ { /* 14C0E880-623E-11d3-A178-0050041CAF44 */ \
+ 0x14c0e880, 0x623e, 0x11d3, { \
+ 0xa1, 0x78, 0x00, 0x50, 0x04, 0x1c, 0xaf, 0x44 \
+ } \
+ }
+
+class nsFTPDirListingConv : public nsIStreamConverter {
+ public:
+ // nsISupports methods
+ NS_DECL_ISUPPORTS
+
+ // nsIStreamConverter methods
+ NS_DECL_NSISTREAMCONVERTER
+
+ // nsIStreamListener methods
+ NS_DECL_NSISTREAMLISTENER
+
+ // nsIRequestObserver methods
+ NS_DECL_NSIREQUESTOBSERVER
+
+ // nsFTPDirListingConv methods
+ nsFTPDirListingConv();
+
+ private:
+ virtual ~nsFTPDirListingConv();
+
+ // Get the application/http-index-format headers
+ nsresult GetHeaders(nsACString& str, nsIURI* uri);
+ char* DigestBufferLines(char* aBuffer, nsCString& aString);
+
+ // member data
+ nsCString mBuffer; // buffered data.
+ bool mSentHeading; // have we sent 100, 101, 200, and 300 lines yet?
+
+ nsIStreamListener* mFinalListener; // this guy gets the converted data via
+ // his OnDataAvailable()
+};
+
+#endif /* __nsftpdirlistingdconv__h__ */
diff --git a/netwerk/streamconv/converters/nsHTTPCompressConv.cpp b/netwerk/streamconv/converters/nsHTTPCompressConv.cpp
new file mode 100644
index 0000000000..1de4737866
--- /dev/null
+++ b/netwerk/streamconv/converters/nsHTTPCompressConv.cpp
@@ -0,0 +1,722 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set sw=2 ts=8 et tw=80 : */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "nsHTTPCompressConv.h"
+#include "nsMemory.h"
+#include "plstr.h"
+#include "nsCOMPtr.h"
+#include "nsError.h"
+#include "nsStreamUtils.h"
+#include "nsStringStream.h"
+#include "nsComponentManagerUtils.h"
+#include "nsThreadUtils.h"
+#include "mozilla/Preferences.h"
+#include "mozilla/Logging.h"
+#include "nsIForcePendingChannel.h"
+#include "nsIRequest.h"
+
+// brotli headers
+#include "state.h"
+#include "brotli/decode.h"
+
+namespace mozilla {
+namespace net {
+
+extern LazyLogModule gHttpLog;
+#define LOG(args) \
+ MOZ_LOG(mozilla::net::gHttpLog, mozilla::LogLevel::Debug, args)
+
+// nsISupports implementation
+NS_IMPL_ISUPPORTS(nsHTTPCompressConv, nsIStreamConverter, nsIStreamListener,
+ nsIRequestObserver, nsICompressConvStats,
+ nsIThreadRetargetableStreamListener)
+
+// nsFTPDirListingConv methods
+nsHTTPCompressConv::nsHTTPCompressConv()
+ : mMode(HTTP_COMPRESS_IDENTITY),
+ mOutBuffer(nullptr),
+ mInpBuffer(nullptr),
+ mOutBufferLen(0),
+ mInpBufferLen(0),
+ mCheckHeaderDone(false),
+ mStreamEnded(false),
+ mStreamInitialized(false),
+ mDummyStreamInitialised(false),
+ d_stream{},
+ mLen(0),
+ hMode(0),
+ mSkipCount(0),
+ mFlags(0),
+ mDecodedDataLength(0),
+ mMutex("nsHTTPCompressConv") {
+ LOG(("nsHttpCompresssConv %p ctor\n", this));
+ if (NS_IsMainThread()) {
+ mFailUncleanStops =
+ Preferences::GetBool("network.http.enforce-framing.http", false);
+ } else {
+ mFailUncleanStops = false;
+ }
+}
+
+nsHTTPCompressConv::~nsHTTPCompressConv() {
+ LOG(("nsHttpCompresssConv %p dtor\n", this));
+ if (mInpBuffer) {
+ free(mInpBuffer);
+ }
+
+ if (mOutBuffer) {
+ free(mOutBuffer);
+ }
+
+ // For some reason we are not getting Z_STREAM_END. But this was also seen
+ // for mozilla bug 198133. Need to handle this case.
+ if (mStreamInitialized && !mStreamEnded) {
+ inflateEnd(&d_stream);
+ }
+}
+
+NS_IMETHODIMP
+nsHTTPCompressConv::GetDecodedDataLength(uint64_t* aDecodedDataLength) {
+ *aDecodedDataLength = mDecodedDataLength;
+ return NS_OK;
+}
+
+NS_IMETHODIMP
+nsHTTPCompressConv::AsyncConvertData(const char* aFromType, const char* aToType,
+ nsIStreamListener* aListener,
+ nsISupports* aCtxt) {
+ if (!PL_strncasecmp(aFromType, HTTP_COMPRESS_TYPE,
+ sizeof(HTTP_COMPRESS_TYPE) - 1) ||
+ !PL_strncasecmp(aFromType, HTTP_X_COMPRESS_TYPE,
+ sizeof(HTTP_X_COMPRESS_TYPE) - 1)) {
+ mMode = HTTP_COMPRESS_COMPRESS;
+ } else if (!PL_strncasecmp(aFromType, HTTP_GZIP_TYPE,
+ sizeof(HTTP_GZIP_TYPE) - 1) ||
+ !PL_strncasecmp(aFromType, HTTP_X_GZIP_TYPE,
+ sizeof(HTTP_X_GZIP_TYPE) - 1)) {
+ mMode = HTTP_COMPRESS_GZIP;
+ } else if (!PL_strncasecmp(aFromType, HTTP_DEFLATE_TYPE,
+ sizeof(HTTP_DEFLATE_TYPE) - 1)) {
+ mMode = HTTP_COMPRESS_DEFLATE;
+ } else if (!PL_strncasecmp(aFromType, HTTP_BROTLI_TYPE,
+ sizeof(HTTP_BROTLI_TYPE) - 1)) {
+ mMode = HTTP_COMPRESS_BROTLI;
+ }
+ LOG(("nsHttpCompresssConv %p AsyncConvertData %s %s mode %d\n", this,
+ aFromType, aToType, (CompressMode)mMode));
+
+ MutexAutoLock lock(mMutex);
+ // hook ourself up with the receiving listener.
+ mListener = aListener;
+
+ return NS_OK;
+}
+
+NS_IMETHODIMP
+nsHTTPCompressConv::GetConvertedType(const nsACString& aFromType,
+ nsIChannel* aChannel,
+ nsACString& aToType) {
+ return NS_ERROR_NOT_IMPLEMENTED;
+}
+
+NS_IMETHODIMP
+nsHTTPCompressConv::OnStartRequest(nsIRequest* request) {
+ LOG(("nsHttpCompresssConv %p onstart\n", this));
+ nsCOMPtr<nsIStreamListener> listener;
+ {
+ MutexAutoLock lock(mMutex);
+ listener = mListener;
+ }
+ return listener->OnStartRequest(request);
+}
+
+NS_IMETHODIMP
+nsHTTPCompressConv::OnStopRequest(nsIRequest* request, nsresult aStatus) {
+ nsresult status = aStatus;
+ LOG(("nsHttpCompresssConv %p onstop %" PRIx32 "\n", this,
+ static_cast<uint32_t>(aStatus)));
+
+ // Framing integrity is enforced for content-encoding: gzip, but not for
+ // content-encoding: deflate. Note that gzip vs deflate is NOT determined
+ // by content sniffing but only via header.
+ if (!mStreamEnded && NS_SUCCEEDED(status) &&
+ (mFailUncleanStops && (mMode == HTTP_COMPRESS_GZIP))) {
+ // This is not a clean end of gzip stream: the transfer is incomplete.
+ status = NS_ERROR_NET_PARTIAL_TRANSFER;
+ LOG(("nsHttpCompresssConv %p onstop partial gzip\n", this));
+ }
+ if (NS_SUCCEEDED(status) && mMode == HTTP_COMPRESS_BROTLI) {
+ nsCOMPtr<nsIForcePendingChannel> fpChannel = do_QueryInterface(request);
+ bool isPending = false;
+ if (request) {
+ request->IsPending(&isPending);
+ }
+ if (fpChannel && !isPending) {
+ fpChannel->ForcePending(true);
+ }
+ if (mBrotli && (mBrotli->mTotalOut == 0) &&
+ !mBrotli->mBrotliStateIsStreamEnd) {
+ status = NS_ERROR_INVALID_CONTENT_ENCODING;
+ }
+ LOG(("nsHttpCompresssConv %p onstop brotlihandler rv %" PRIx32 "\n", this,
+ static_cast<uint32_t>(status)));
+ if (fpChannel && !isPending) {
+ fpChannel->ForcePending(false);
+ }
+ }
+
+ nsCOMPtr<nsIStreamListener> listener;
+ {
+ MutexAutoLock lock(mMutex);
+ listener = mListener;
+ }
+ return listener->OnStopRequest(request, status);
+}
+
+/* static */
+nsresult nsHTTPCompressConv::BrotliHandler(nsIInputStream* stream,
+ void* closure, const char* dataIn,
+ uint32_t, uint32_t aAvail,
+ uint32_t* countRead) {
+ MOZ_ASSERT(stream);
+ nsHTTPCompressConv* self = static_cast<nsHTTPCompressConv*>(closure);
+ *countRead = 0;
+
+ const size_t kOutSize = 128 * 1024; // just a chunk size, we call in a loop
+ uint8_t* outPtr;
+ size_t outSize;
+ size_t avail = aAvail;
+ BrotliDecoderResult res;
+
+ if (!self->mBrotli) {
+ *countRead = aAvail;
+ return NS_OK;
+ }
+
+ auto outBuffer = MakeUniqueFallible<uint8_t[]>(kOutSize);
+ if (outBuffer == nullptr) {
+ self->mBrotli->mStatus = NS_ERROR_OUT_OF_MEMORY;
+ return self->mBrotli->mStatus;
+ }
+
+ do {
+ outSize = kOutSize;
+ outPtr = outBuffer.get();
+
+ // brotli api is documented in brotli/dec/decode.h and brotli/dec/decode.c
+ LOG(("nsHttpCompresssConv %p brotlihandler decompress %zu\n", self, avail));
+ size_t totalOut = self->mBrotli->mTotalOut;
+ res = ::BrotliDecoderDecompressStream(
+ &self->mBrotli->mState, &avail,
+ reinterpret_cast<const unsigned char**>(&dataIn), &outSize, &outPtr,
+ &totalOut);
+ outSize = kOutSize - outSize;
+ self->mBrotli->mTotalOut = totalOut;
+ self->mBrotli->mBrotliStateIsStreamEnd =
+ BrotliDecoderIsFinished(&self->mBrotli->mState);
+ LOG(("nsHttpCompresssConv %p brotlihandler decompress rv=%" PRIx32
+ " out=%zu\n",
+ self, static_cast<uint32_t>(res), outSize));
+
+ if (res == BROTLI_DECODER_RESULT_ERROR) {
+ LOG(("nsHttpCompressConv %p marking invalid encoding", self));
+ self->mBrotli->mStatus = NS_ERROR_INVALID_CONTENT_ENCODING;
+ return self->mBrotli->mStatus;
+ }
+
+ // in 'the current implementation' brotli must consume everything before
+ // asking for more input
+ if (res == BROTLI_DECODER_RESULT_NEEDS_MORE_INPUT) {
+ MOZ_ASSERT(!avail);
+ if (avail) {
+ LOG(("nsHttpCompressConv %p did not consume all input", self));
+ self->mBrotli->mStatus = NS_ERROR_UNEXPECTED;
+ return self->mBrotli->mStatus;
+ }
+ }
+ if (outSize > 0) {
+ nsresult rv = self->do_OnDataAvailable(
+ self->mBrotli->mRequest, self->mBrotli->mContext,
+ self->mBrotli->mSourceOffset,
+ reinterpret_cast<const char*>(outBuffer.get()), outSize);
+ LOG(("nsHttpCompressConv %p BrotliHandler ODA rv=%" PRIx32, self,
+ static_cast<uint32_t>(rv)));
+ if (NS_FAILED(rv)) {
+ self->mBrotli->mStatus = rv;
+ return self->mBrotli->mStatus;
+ }
+ }
+
+ if (res == BROTLI_DECODER_RESULT_SUCCESS ||
+ res == BROTLI_DECODER_RESULT_NEEDS_MORE_INPUT) {
+ *countRead = aAvail;
+ return NS_OK;
+ }
+ MOZ_ASSERT(res == BROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT);
+ } while (res == BROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT);
+
+ self->mBrotli->mStatus = NS_ERROR_UNEXPECTED;
+ return self->mBrotli->mStatus;
+}
+
+NS_IMETHODIMP
+nsHTTPCompressConv::OnDataAvailable(nsIRequest* request, nsIInputStream* iStr,
+ uint64_t aSourceOffset, uint32_t aCount) {
+ nsresult rv = NS_ERROR_INVALID_CONTENT_ENCODING;
+ uint32_t streamLen = aCount;
+ LOG(("nsHttpCompressConv %p OnDataAvailable %d", this, aCount));
+
+ if (streamLen == 0) {
+ NS_ERROR("count of zero passed to OnDataAvailable");
+ return NS_ERROR_UNEXPECTED;
+ }
+
+ if (mStreamEnded) {
+ // Hmm... this may just indicate that the data stream is done and that
+ // what's left is either metadata or padding of some sort.... throwing
+ // it out is probably the safe thing to do.
+ uint32_t n;
+ return iStr->ReadSegments(NS_DiscardSegment, nullptr, streamLen, &n);
+ }
+
+ switch (mMode) {
+ case HTTP_COMPRESS_GZIP:
+ streamLen = check_header(iStr, streamLen, &rv);
+
+ if (rv != NS_OK) {
+ return rv;
+ }
+
+ if (streamLen == 0) {
+ return NS_OK;
+ }
+
+ [[fallthrough]];
+
+ case HTTP_COMPRESS_DEFLATE:
+
+ if (mInpBuffer != nullptr && streamLen > mInpBufferLen) {
+ unsigned char* originalInpBuffer = mInpBuffer;
+ if (!(mInpBuffer = (unsigned char*)realloc(
+ originalInpBuffer, mInpBufferLen = streamLen))) {
+ free(originalInpBuffer);
+ }
+
+ if (mOutBufferLen < streamLen * 2) {
+ unsigned char* originalOutBuffer = mOutBuffer;
+ if (!(mOutBuffer = (unsigned char*)realloc(
+ mOutBuffer, mOutBufferLen = streamLen * 3))) {
+ free(originalOutBuffer);
+ }
+ }
+
+ if (mInpBuffer == nullptr || mOutBuffer == nullptr) {
+ return NS_ERROR_OUT_OF_MEMORY;
+ }
+ }
+
+ if (mInpBuffer == nullptr) {
+ mInpBuffer = (unsigned char*)malloc(mInpBufferLen = streamLen);
+ }
+
+ if (mOutBuffer == nullptr) {
+ mOutBuffer = (unsigned char*)malloc(mOutBufferLen = streamLen * 3);
+ }
+
+ if (mInpBuffer == nullptr || mOutBuffer == nullptr) {
+ return NS_ERROR_OUT_OF_MEMORY;
+ }
+
+ uint32_t unused;
+ iStr->Read((char*)mInpBuffer, streamLen, &unused);
+
+ if (mMode == HTTP_COMPRESS_DEFLATE) {
+ if (!mStreamInitialized) {
+ memset(&d_stream, 0, sizeof(d_stream));
+
+ if (inflateInit(&d_stream) != Z_OK) {
+ return NS_ERROR_FAILURE;
+ }
+
+ mStreamInitialized = true;
+ }
+ d_stream.next_in = mInpBuffer;
+ d_stream.avail_in = (uInt)streamLen;
+
+ mDummyStreamInitialised = false;
+ for (;;) {
+ d_stream.next_out = mOutBuffer;
+ d_stream.avail_out = (uInt)mOutBufferLen;
+
+ int code = inflate(&d_stream, Z_NO_FLUSH);
+ unsigned bytesWritten = (uInt)mOutBufferLen - d_stream.avail_out;
+
+ if (code == Z_STREAM_END) {
+ if (bytesWritten) {
+ rv = do_OnDataAvailable(request, nullptr, aSourceOffset,
+ (char*)mOutBuffer, bytesWritten);
+ if (NS_FAILED(rv)) {
+ return rv;
+ }
+ }
+
+ inflateEnd(&d_stream);
+ mStreamEnded = true;
+ break;
+ } else if (code == Z_OK) {
+ if (bytesWritten) {
+ rv = do_OnDataAvailable(request, nullptr, aSourceOffset,
+ (char*)mOutBuffer, bytesWritten);
+ if (NS_FAILED(rv)) {
+ return rv;
+ }
+ }
+ } else if (code == Z_BUF_ERROR) {
+ if (bytesWritten) {
+ rv = do_OnDataAvailable(request, nullptr, aSourceOffset,
+ (char*)mOutBuffer, bytesWritten);
+ if (NS_FAILED(rv)) {
+ return rv;
+ }
+ }
+ break;
+ } else if (code == Z_DATA_ERROR) {
+ // some servers (notably Apache with mod_deflate) don't generate
+ // zlib headers insert a dummy header and try again
+ static char dummy_head[2] = {
+ 0x8 + 0x7 * 0x10,
+ (((0x8 + 0x7 * 0x10) * 0x100 + 30) / 31 * 31) & 0xFF,
+ };
+ inflateReset(&d_stream);
+ d_stream.next_in = (Bytef*)dummy_head;
+ d_stream.avail_in = sizeof(dummy_head);
+
+ code = inflate(&d_stream, Z_NO_FLUSH);
+ if (code != Z_OK) {
+ return NS_ERROR_FAILURE;
+ }
+
+ // stop an endless loop caused by non-deflate data being labelled as
+ // deflate
+ if (mDummyStreamInitialised) {
+ NS_WARNING(
+ "endless loop detected"
+ " - invalid deflate");
+ return NS_ERROR_INVALID_CONTENT_ENCODING;
+ }
+ mDummyStreamInitialised = true;
+ // reset stream pointers to our original data
+ d_stream.next_in = mInpBuffer;
+ d_stream.avail_in = (uInt)streamLen;
+ } else {
+ return NS_ERROR_INVALID_CONTENT_ENCODING;
+ }
+ } /* for */
+ } else {
+ if (!mStreamInitialized) {
+ memset(&d_stream, 0, sizeof(d_stream));
+
+ if (inflateInit2(&d_stream, -MAX_WBITS) != Z_OK) {
+ return NS_ERROR_FAILURE;
+ }
+
+ mStreamInitialized = true;
+ }
+
+ d_stream.next_in = mInpBuffer;
+ d_stream.avail_in = (uInt)streamLen;
+
+ for (;;) {
+ d_stream.next_out = mOutBuffer;
+ d_stream.avail_out = (uInt)mOutBufferLen;
+
+ int code = inflate(&d_stream, Z_NO_FLUSH);
+ unsigned bytesWritten = (uInt)mOutBufferLen - d_stream.avail_out;
+
+ if (code == Z_STREAM_END) {
+ if (bytesWritten) {
+ rv = do_OnDataAvailable(request, nullptr, aSourceOffset,
+ (char*)mOutBuffer, bytesWritten);
+ if (NS_FAILED(rv)) {
+ return rv;
+ }
+ }
+
+ inflateEnd(&d_stream);
+ mStreamEnded = true;
+ break;
+ } else if (code == Z_OK) {
+ if (bytesWritten) {
+ rv = do_OnDataAvailable(request, nullptr, aSourceOffset,
+ (char*)mOutBuffer, bytesWritten);
+ if (NS_FAILED(rv)) {
+ return rv;
+ }
+ }
+ } else if (code == Z_BUF_ERROR) {
+ if (bytesWritten) {
+ rv = do_OnDataAvailable(request, nullptr, aSourceOffset,
+ (char*)mOutBuffer, bytesWritten);
+ if (NS_FAILED(rv)) {
+ return rv;
+ }
+ }
+ break;
+ } else {
+ return NS_ERROR_INVALID_CONTENT_ENCODING;
+ }
+ } /* for */
+ } /* gzip */
+ break;
+
+ case HTTP_COMPRESS_BROTLI: {
+ if (!mBrotli) {
+ mBrotli = MakeUnique<BrotliWrapper>();
+ }
+
+ mBrotli->mRequest = request;
+ mBrotli->mContext = nullptr;
+ mBrotli->mSourceOffset = aSourceOffset;
+
+ uint32_t countRead;
+ rv = iStr->ReadSegments(BrotliHandler, this, streamLen, &countRead);
+ if (NS_SUCCEEDED(rv)) {
+ rv = mBrotli->mStatus;
+ }
+ if (NS_FAILED(rv)) {
+ return rv;
+ }
+ } break;
+
+ default:
+ nsCOMPtr<nsIStreamListener> listener;
+ {
+ MutexAutoLock lock(mMutex);
+ listener = mListener;
+ }
+ rv = listener->OnDataAvailable(request, iStr, aSourceOffset, aCount);
+ if (NS_FAILED(rv)) {
+ return rv;
+ }
+ } /* switch */
+
+ return NS_OK;
+} /* OnDataAvailable */
+
+// XXX/ruslan: need to implement this too
+
+NS_IMETHODIMP
+nsHTTPCompressConv::Convert(nsIInputStream* aFromStream, const char* aFromType,
+ const char* aToType, nsISupports* aCtxt,
+ nsIInputStream** _retval) {
+ return NS_ERROR_NOT_IMPLEMENTED;
+}
+
+nsresult nsHTTPCompressConv::do_OnDataAvailable(nsIRequest* request,
+ nsISupports* context,
+ uint64_t offset,
+ const char* buffer,
+ uint32_t count) {
+ if (!mStream) {
+ mStream = do_CreateInstance(NS_STRINGINPUTSTREAM_CONTRACTID);
+ NS_ENSURE_STATE(mStream);
+ }
+
+ mStream->ShareData(buffer, count);
+
+ nsCOMPtr<nsIStreamListener> listener;
+ {
+ MutexAutoLock lock(mMutex);
+ listener = mListener;
+ }
+ nsresult rv = listener->OnDataAvailable(request, mStream, offset, count);
+
+ // Make sure the stream no longer references |buffer| in case our listener
+ // is crazy enough to try to read from |mStream| after ODA.
+ mStream->ShareData("", 0);
+ mDecodedDataLength += count;
+
+ return rv;
+}
+
+#define ASCII_FLAG 0x01 /* bit 0 set: file probably ascii text */
+#define HEAD_CRC 0x02 /* bit 1 set: header CRC present */
+#define EXTRA_FIELD 0x04 /* bit 2 set: extra field present */
+#define ORIG_NAME 0x08 /* bit 3 set: original file name present */
+#define COMMENT 0x10 /* bit 4 set: file comment present */
+#define RESERVED 0xE0 /* bits 5..7: reserved */
+
+static unsigned gz_magic[2] = {0x1f, 0x8b}; /* gzip magic header */
+
+uint32_t nsHTTPCompressConv::check_header(nsIInputStream* iStr,
+ uint32_t streamLen, nsresult* rs) {
+ enum {
+ GZIP_INIT = 0,
+ GZIP_OS,
+ GZIP_EXTRA0,
+ GZIP_EXTRA1,
+ GZIP_EXTRA2,
+ GZIP_ORIG,
+ GZIP_COMMENT,
+ GZIP_CRC
+ };
+ char c;
+
+ *rs = NS_OK;
+
+ if (mCheckHeaderDone) {
+ return streamLen;
+ }
+
+ while (streamLen) {
+ switch (hMode) {
+ case GZIP_INIT:
+ uint32_t unused;
+ iStr->Read(&c, 1, &unused);
+ streamLen--;
+
+ if (mSkipCount == 0 && ((unsigned)c & 0377) != gz_magic[0]) {
+ *rs = NS_ERROR_INVALID_CONTENT_ENCODING;
+ return 0;
+ }
+
+ if (mSkipCount == 1 && ((unsigned)c & 0377) != gz_magic[1]) {
+ *rs = NS_ERROR_INVALID_CONTENT_ENCODING;
+ return 0;
+ }
+
+ if (mSkipCount == 2 && ((unsigned)c & 0377) != Z_DEFLATED) {
+ *rs = NS_ERROR_INVALID_CONTENT_ENCODING;
+ return 0;
+ }
+
+ mSkipCount++;
+ if (mSkipCount == 4) {
+ mFlags = (unsigned)c & 0377;
+ if (mFlags & RESERVED) {
+ *rs = NS_ERROR_INVALID_CONTENT_ENCODING;
+ return 0;
+ }
+ hMode = GZIP_OS;
+ mSkipCount = 0;
+ }
+ break;
+
+ case GZIP_OS:
+ iStr->Read(&c, 1, &unused);
+ streamLen--;
+ mSkipCount++;
+
+ if (mSkipCount == 6) {
+ hMode = GZIP_EXTRA0;
+ }
+ break;
+
+ case GZIP_EXTRA0:
+ if (mFlags & EXTRA_FIELD) {
+ iStr->Read(&c, 1, &unused);
+ streamLen--;
+ mLen = (uInt)c & 0377;
+ hMode = GZIP_EXTRA1;
+ } else {
+ hMode = GZIP_ORIG;
+ }
+ break;
+
+ case GZIP_EXTRA1:
+ iStr->Read(&c, 1, &unused);
+ streamLen--;
+ mLen |= ((uInt)c & 0377) << 8;
+ mSkipCount = 0;
+ hMode = GZIP_EXTRA2;
+ break;
+
+ case GZIP_EXTRA2:
+ if (mSkipCount == mLen) {
+ hMode = GZIP_ORIG;
+ } else {
+ iStr->Read(&c, 1, &unused);
+ streamLen--;
+ mSkipCount++;
+ }
+ break;
+
+ case GZIP_ORIG:
+ if (mFlags & ORIG_NAME) {
+ iStr->Read(&c, 1, &unused);
+ streamLen--;
+ if (c == 0) hMode = GZIP_COMMENT;
+ } else {
+ hMode = GZIP_COMMENT;
+ }
+ break;
+
+ case GZIP_COMMENT:
+ if (mFlags & COMMENT) {
+ iStr->Read(&c, 1, &unused);
+ streamLen--;
+ if (c == 0) {
+ hMode = GZIP_CRC;
+ mSkipCount = 0;
+ }
+ } else {
+ hMode = GZIP_CRC;
+ mSkipCount = 0;
+ }
+ break;
+
+ case GZIP_CRC:
+ if (mFlags & HEAD_CRC) {
+ iStr->Read(&c, 1, &unused);
+ streamLen--;
+ mSkipCount++;
+ if (mSkipCount == 2) {
+ mCheckHeaderDone = true;
+ return streamLen;
+ }
+ } else {
+ mCheckHeaderDone = true;
+ return streamLen;
+ }
+ break;
+ }
+ }
+ return streamLen;
+}
+
+NS_IMETHODIMP
+nsHTTPCompressConv::CheckListenerChain() {
+ nsCOMPtr<nsIThreadRetargetableStreamListener> listener;
+ {
+ MutexAutoLock lock(mMutex);
+ listener = do_QueryInterface(mListener);
+ }
+
+ if (!listener) {
+ return NS_ERROR_NO_INTERFACE;
+ }
+
+ return listener->CheckListenerChain();
+}
+
+} // namespace net
+} // namespace mozilla
+
+nsresult NS_NewHTTPCompressConv(
+ mozilla::net::nsHTTPCompressConv** aHTTPCompressConv) {
+ MOZ_ASSERT(aHTTPCompressConv != nullptr, "null ptr");
+ if (!aHTTPCompressConv) {
+ return NS_ERROR_NULL_POINTER;
+ }
+
+ RefPtr<mozilla::net::nsHTTPCompressConv> outVal =
+ new mozilla::net::nsHTTPCompressConv();
+ if (!outVal) {
+ return NS_ERROR_OUT_OF_MEMORY;
+ }
+ outVal.forget(aHTTPCompressConv);
+ return NS_OK;
+}
diff --git a/netwerk/streamconv/converters/nsHTTPCompressConv.h b/netwerk/streamconv/converters/nsHTTPCompressConv.h
new file mode 100644
index 0000000000..1ad34bbfab
--- /dev/null
+++ b/netwerk/streamconv/converters/nsHTTPCompressConv.h
@@ -0,0 +1,137 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set sw=2 ts=8 et tw=80 : */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#if !defined(__nsHTTPCompressConv__h__)
+# define __nsHTTPCompressConv__h__ 1
+
+# include "nsIStreamConverter.h"
+# include "nsICompressConvStats.h"
+# include "nsIThreadRetargetableStreamListener.h"
+# include "nsCOMPtr.h"
+# include "mozilla/Atomics.h"
+# include "mozilla/Mutex.h"
+
+# include "zlib.h"
+
+// brotli includes
+# undef assert
+# include "assert.h"
+# include "state.h"
+
+class nsIStringInputStream;
+
+# define NS_HTTPCOMPRESSCONVERTER_CID \
+ { \
+ /* 66230b2b-17fa-4bd3-abf4-07986151022d */ \
+ 0x66230b2b, 0x17fa, 0x4bd3, { \
+ 0xab, 0xf4, 0x07, 0x98, 0x61, 0x51, 0x02, 0x2d \
+ } \
+ }
+
+# define HTTP_DEFLATE_TYPE "deflate"
+# define HTTP_GZIP_TYPE "gzip"
+# define HTTP_X_GZIP_TYPE "x-gzip"
+# define HTTP_COMPRESS_TYPE "compress"
+# define HTTP_X_COMPRESS_TYPE "x-compress"
+# define HTTP_BROTLI_TYPE "br"
+# define HTTP_IDENTITY_TYPE "identity"
+# define HTTP_UNCOMPRESSED_TYPE "uncompressed"
+
+namespace mozilla {
+namespace net {
+
+typedef enum {
+ HTTP_COMPRESS_GZIP,
+ HTTP_COMPRESS_DEFLATE,
+ HTTP_COMPRESS_COMPRESS,
+ HTTP_COMPRESS_BROTLI,
+ HTTP_COMPRESS_IDENTITY
+} CompressMode;
+
+class BrotliWrapper {
+ public:
+ BrotliWrapper()
+ : mTotalOut(0),
+ mStatus(NS_OK),
+ mBrotliStateIsStreamEnd(false),
+ mRequest(nullptr),
+ mContext(nullptr),
+ mSourceOffset(0) {
+ BrotliDecoderStateInit(&mState, 0, 0, 0);
+ }
+ ~BrotliWrapper() { BrotliDecoderStateCleanup(&mState); }
+
+ BrotliDecoderState mState;
+ Atomic<size_t, Relaxed> mTotalOut;
+ nsresult mStatus;
+ Atomic<bool, Relaxed> mBrotliStateIsStreamEnd;
+
+ nsIRequest* mRequest;
+ nsISupports* mContext;
+ uint64_t mSourceOffset;
+};
+
+class nsHTTPCompressConv : public nsIStreamConverter,
+ public nsICompressConvStats,
+ public nsIThreadRetargetableStreamListener {
+ public:
+ // nsISupports methods
+ NS_DECL_THREADSAFE_ISUPPORTS
+ NS_DECL_NSIREQUESTOBSERVER
+ NS_DECL_NSISTREAMLISTENER
+ NS_DECL_NSICOMPRESSCONVSTATS
+ NS_DECL_NSITHREADRETARGETABLESTREAMLISTENER
+
+ // nsIStreamConverter methods
+ NS_DECL_NSISTREAMCONVERTER
+
+ nsHTTPCompressConv();
+
+ private:
+ virtual ~nsHTTPCompressConv();
+
+ nsCOMPtr<nsIStreamListener>
+ mListener; // this guy gets the converted data via his OnDataAvailable ()
+ Atomic<CompressMode, Relaxed> mMode;
+
+ unsigned char* mOutBuffer;
+ unsigned char* mInpBuffer;
+
+ uint32_t mOutBufferLen;
+ uint32_t mInpBufferLen;
+
+ UniquePtr<BrotliWrapper> mBrotli;
+
+ nsCOMPtr<nsIStringInputStream> mStream;
+
+ static nsresult BrotliHandler(nsIInputStream* stream, void* closure,
+ const char* dataIn, uint32_t, uint32_t avail,
+ uint32_t* countRead);
+
+ nsresult do_OnDataAvailable(nsIRequest* request, nsISupports* aContext,
+ uint64_t aSourceOffset, const char* buffer,
+ uint32_t aCount);
+
+ bool mCheckHeaderDone;
+ Atomic<bool> mStreamEnded;
+ bool mStreamInitialized;
+ bool mDummyStreamInitialised;
+ bool mFailUncleanStops;
+
+ z_stream d_stream;
+ unsigned mLen, hMode, mSkipCount, mFlags;
+
+ uint32_t check_header(nsIInputStream* iStr, uint32_t streamLen, nsresult* rv);
+
+ Atomic<uint32_t, Relaxed> mDecodedDataLength;
+
+ mutable mozilla::Mutex mMutex;
+};
+
+} // namespace net
+} // namespace mozilla
+
+#endif
diff --git a/netwerk/streamconv/converters/nsICompressConvStats.idl b/netwerk/streamconv/converters/nsICompressConvStats.idl
new file mode 100644
index 0000000000..a8837563ed
--- /dev/null
+++ b/netwerk/streamconv/converters/nsICompressConvStats.idl
@@ -0,0 +1,17 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "nsISupports.idl"
+
+/**
+ * nsICompressConvStats
+ *
+ * This interface allows for the observation of decoded resource sizes
+ */
+[builtinclass, scriptable, uuid(58172ad0-46a9-4893-8fde-cd909c10792a)]
+interface nsICompressConvStats : nsISupports
+{
+ readonly attribute uint64_t decodedDataLength;
+};
diff --git a/netwerk/streamconv/converters/nsIndexedToHTML.cpp b/netwerk/streamconv/converters/nsIndexedToHTML.cpp
new file mode 100644
index 0000000000..53e85970fa
--- /dev/null
+++ b/netwerk/streamconv/converters/nsIndexedToHTML.cpp
@@ -0,0 +1,847 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "nsIndexedToHTML.h"
+
+#include "DateTimeFormat.h"
+#include "mozilla/Encoding.h"
+#include "mozilla/intl/LocaleService.h"
+#include "nsNetUtil.h"
+#include "netCore.h"
+#include "nsStringStream.h"
+#include "nsIFile.h"
+#include "nsIFileURL.h"
+#include "nsEscape.h"
+#include "nsIDirIndex.h"
+#include "nsURLHelper.h"
+#include "nsIStringBundle.h"
+#include "nsDirIndexParser.h"
+#include "nsNativeCharsetUtils.h"
+#include "nsString.h"
+#include "nsContentUtils.h"
+#include <algorithm>
+#include "nsIChannel.h"
+#include "mozilla/Unused.h"
+
+using mozilla::intl::LocaleService;
+
+NS_IMPL_ISUPPORTS(nsIndexedToHTML, nsIDirIndexListener, nsIStreamConverter,
+ nsIRequestObserver, nsIStreamListener)
+
+static void AppendNonAsciiToNCR(const nsAString& in, nsCString& out) {
+ nsAString::const_iterator start, end;
+
+ in.BeginReading(start);
+ in.EndReading(end);
+
+ while (start != end) {
+ if (*start < 128) {
+ out.Append(*start++);
+ } else {
+ out.AppendLiteral("&#x");
+ out.AppendInt(*start++, 16);
+ out.Append(';');
+ }
+ }
+}
+
+nsIndexedToHTML::nsIndexedToHTML() : mExpectAbsLoc(false) {}
+
+nsresult nsIndexedToHTML::Create(nsISupports* aOuter, REFNSIID aIID,
+ void** aResult) {
+ nsresult rv;
+ if (aOuter) return NS_ERROR_NO_AGGREGATION;
+
+ nsIndexedToHTML* _s = new nsIndexedToHTML();
+ if (_s == nullptr) return NS_ERROR_OUT_OF_MEMORY;
+
+ rv = _s->QueryInterface(aIID, aResult);
+ return rv;
+}
+
+nsresult nsIndexedToHTML::Init(nsIStreamListener* aListener) {
+ nsresult rv = NS_OK;
+
+ mListener = aListener;
+
+ nsCOMPtr<nsIStringBundleService> sbs =
+ do_GetService(NS_STRINGBUNDLE_CONTRACTID, &rv);
+ if (NS_FAILED(rv)) return rv;
+ rv = sbs->CreateBundle(NECKO_MSGS_URL, getter_AddRefs(mBundle));
+
+ mExpectAbsLoc = false;
+
+ return rv;
+}
+
+NS_IMETHODIMP
+nsIndexedToHTML::Convert(nsIInputStream* aFromStream, const char* aFromType,
+ const char* aToType, nsISupports* aCtxt,
+ nsIInputStream** res) {
+ return NS_ERROR_NOT_IMPLEMENTED;
+}
+
+NS_IMETHODIMP
+nsIndexedToHTML::AsyncConvertData(const char* aFromType, const char* aToType,
+ nsIStreamListener* aListener,
+ nsISupports* aCtxt) {
+ return Init(aListener);
+}
+
+NS_IMETHODIMP
+nsIndexedToHTML::GetConvertedType(const nsACString& aFromType,
+ nsIChannel* aChannel, nsACString& aToType) {
+ return NS_ERROR_NOT_IMPLEMENTED;
+}
+
+NS_IMETHODIMP
+nsIndexedToHTML::OnStartRequest(nsIRequest* request) {
+ nsCString buffer;
+ nsresult rv = DoOnStartRequest(request, nullptr, buffer);
+ if (NS_FAILED(rv)) {
+ request->Cancel(rv);
+ }
+
+ rv = mListener->OnStartRequest(request);
+ if (NS_FAILED(rv)) return rv;
+
+ // The request may have been canceled, and if that happens, we want to
+ // suppress calls to OnDataAvailable.
+ request->GetStatus(&rv);
+ if (NS_FAILED(rv)) return rv;
+
+ // Push our buffer to the listener.
+
+ rv = SendToListener(request, nullptr, buffer);
+ return rv;
+}
+
+nsresult nsIndexedToHTML::DoOnStartRequest(nsIRequest* request,
+ nsISupports* aContext,
+ nsCString& aBuffer) {
+ nsresult rv;
+
+ nsCOMPtr<nsIChannel> channel = do_QueryInterface(request);
+ nsCOMPtr<nsIURI> uri;
+ rv = channel->GetOriginalURI(getter_AddRefs(uri));
+ if (NS_FAILED(rv)) return rv;
+
+ // We use the original URI for the title and parent link when it's a
+ // resource:// url, instead of the jar:file:// url it resolves to.
+ if (!uri->SchemeIs("resource")) {
+ rv = channel->GetURI(getter_AddRefs(uri));
+ if (NS_FAILED(rv)) return rv;
+ }
+
+ channel->SetContentType("text/html"_ns);
+
+ mParser = nsDirIndexParser::CreateInstance();
+ if (!mParser) return NS_ERROR_FAILURE;
+
+ rv = mParser->SetListener(this);
+ if (NS_FAILED(rv)) return rv;
+
+ rv = mParser->OnStartRequest(request);
+ if (NS_FAILED(rv)) return rv;
+
+ nsAutoCString baseUri, titleUri;
+ rv = uri->GetAsciiSpec(baseUri);
+ if (NS_FAILED(rv)) return rv;
+
+ nsCOMPtr<nsIURI> titleURL;
+ rv = NS_MutateURI(uri).SetQuery(""_ns).SetRef(""_ns).Finalize(titleURL);
+ if (NS_FAILED(rv)) {
+ titleURL = uri;
+ }
+
+ nsCString parentStr;
+
+ nsCString buffer;
+ buffer.AppendLiteral("<!DOCTYPE html>\n<html>\n<head>\n");
+
+ // XXX - should be using the 300: line from the parser.
+ // We can't guarantee that that comes before any entry, so we'd have to
+ // buffer, and do other painful stuff.
+ // I'll deal with this when I make the changes to handle welcome messages
+ // The .. stuff should also come from the lower level protocols, but that
+ // would muck up the XUL display
+ // - bbaetz
+
+ if (uri->SchemeIs("ftp")) {
+ // strip out the password here, so it doesn't show in the page title
+ // This is done by the 300: line generation in ftp, but we don't use
+ // that - see above
+
+ nsAutoCString pw;
+ rv = titleURL->GetPassword(pw);
+ if (NS_FAILED(rv)) return rv;
+ if (!pw.IsEmpty()) {
+ nsCOMPtr<nsIURI> newUri;
+ rv = NS_MutateURI(titleURL).SetPassword(""_ns).Finalize(titleURL);
+ if (NS_FAILED(rv)) return rv;
+ }
+
+ nsAutoCString path;
+ rv = uri->GetPathQueryRef(path);
+ if (NS_FAILED(rv)) return rv;
+
+ if (!path.EqualsLiteral("//") && !path.LowerCaseEqualsLiteral("/%2f")) {
+ rv = uri->Resolve(".."_ns, parentStr);
+ if (NS_FAILED(rv)) return rv;
+ }
+ } else if (uri->SchemeIs("file")) {
+ nsCOMPtr<nsIFileURL> fileUrl = do_QueryInterface(uri);
+ nsCOMPtr<nsIFile> file;
+ rv = fileUrl->GetFile(getter_AddRefs(file));
+ if (NS_FAILED(rv)) return rv;
+
+ nsAutoCString url;
+ rv = net_GetURLSpecFromFile(file, url);
+ if (NS_FAILED(rv)) return rv;
+ baseUri.Assign(url);
+
+ nsCOMPtr<nsIFile> parent;
+ rv = file->GetParent(getter_AddRefs(parent));
+
+ if (parent && NS_SUCCEEDED(rv)) {
+ net_GetURLSpecFromDir(parent, url);
+ if (NS_FAILED(rv)) return rv;
+ parentStr.Assign(url);
+ }
+
+ // Directory index will be always encoded in UTF-8 if this is file url
+ buffer.AppendLiteral("<meta charset=\"UTF-8\">\n");
+
+ } else if (uri->SchemeIs("jar")) {
+ nsAutoCString path;
+ rv = uri->GetPathQueryRef(path);
+ if (NS_FAILED(rv)) return rv;
+
+ // a top-level jar directory URL is of the form jar:foo.zip!/
+ // path will be of the form foo.zip!/, and its last two characters
+ // will be "!/"
+ // XXX this won't work correctly when the name of the directory being
+ // XXX displayed ends with "!", but then again, jar: URIs don't deal
+ // XXX particularly well with such directories anyway
+ if (!StringEndsWith(path, "!/"_ns)) {
+ rv = uri->Resolve(".."_ns, parentStr);
+ if (NS_FAILED(rv)) return rv;
+ }
+ } else {
+ // default behavior for other protocols is to assume the channel's
+ // URL references a directory ending in '/' -- fixup if necessary.
+ nsAutoCString path;
+ rv = uri->GetPathQueryRef(path);
+ if (NS_FAILED(rv)) return rv;
+ if (baseUri.Last() != '/') {
+ baseUri.Append('/');
+ path.Append('/');
+ mozilla::Unused << NS_MutateURI(uri).SetPathQueryRef(path).Finalize(uri);
+ }
+ if (!path.EqualsLiteral("/")) {
+ rv = uri->Resolve(".."_ns, parentStr);
+ if (NS_FAILED(rv)) return rv;
+ }
+ }
+
+ rv = titleURL->GetAsciiSpec(titleUri);
+ if (NS_FAILED(rv)) {
+ return rv;
+ }
+
+ buffer.AppendLiteral(
+ "<style type=\"text/css\">\n"
+ ":root {\n"
+ " font-family: sans-serif;\n"
+ "}\n"
+ "img {\n"
+ " border: 0;\n"
+ "}\n"
+ "th {\n"
+ " text-align: start;\n"
+ " white-space: nowrap;\n"
+ "}\n"
+ "th > a {\n"
+ " color: inherit;\n"
+ "}\n"
+ "table[order] > thead > tr > th {\n"
+ " cursor: pointer;\n"
+ "}\n"
+ "table[order] > thead > tr > th::after {\n"
+ " display: none;\n"
+ " width: .8em;\n"
+ " margin-inline-end: -.8em;\n"
+ " text-align: end;\n"
+ "}\n"
+ "table[order=\"asc\"] > thead > tr > th::after {\n"
+ " content: \"\\2193\"; /* DOWNWARDS ARROW (U+2193) */\n"
+ "}\n"
+ "table[order=\"desc\"] > thead > tr > th::after {\n"
+ " content: \"\\2191\"; /* UPWARDS ARROW (U+2191) */\n"
+ "}\n"
+ "table[order][order-by=\"0\"] > thead > tr > th:first-child > a ,\n"
+ "table[order][order-by=\"1\"] > thead > tr > th:first-child + th > a ,\n"
+ "table[order][order-by=\"2\"] > thead > tr > th:first-child + th + th > "
+ "a {\n"
+ " text-decoration: underline;\n"
+ "}\n"
+ "table[order][order-by=\"0\"] > thead > tr > th:first-child::after ,\n"
+ "table[order][order-by=\"1\"] > thead > tr > th:first-child + th::after "
+ ",\n"
+ "table[order][order-by=\"2\"] > thead > tr > th:first-child + th + "
+ "th::after {\n"
+ " display: inline-block;\n"
+ "}\n"
+ "table.remove-hidden > tbody > tr.hidden-object {\n"
+ " display: none;\n"
+ "}\n"
+ "td {\n"
+ " white-space: nowrap;\n"
+ "}\n"
+ "table.ellipsis {\n"
+ " width: 100%;\n"
+ " table-layout: fixed;\n"
+ " border-spacing: 0;\n"
+ "}\n"
+ "table.ellipsis > tbody > tr > td {\n"
+ " padding: 0;\n"
+ " overflow: hidden;\n"
+ " text-overflow: ellipsis;\n"
+ "}\n"
+ "/* name */\n"
+ "/* name */\n"
+ "th:first-child {\n"
+ " padding-inline-end: 2em;\n"
+ "}\n"
+ "/* size */\n"
+ "th:first-child + th {\n"
+ " padding-inline-end: 1em;\n"
+ "}\n"
+ "td:first-child + td {\n"
+ " text-align: end;\n"
+ " padding-inline-end: 1em;\n"
+ "}\n"
+ "/* date */\n"
+ "td:first-child + td + td {\n"
+ " padding-inline-start: 1em;\n"
+ " padding-inline-end: .5em;\n"
+ "}\n"
+ "/* time */\n"
+ "td:first-child + td + td + td {\n"
+ " padding-inline-start: .5em;\n"
+ "}\n"
+ ".symlink {\n"
+ " font-style: italic;\n"
+ "}\n"
+ ".dir ,\n"
+ ".symlink ,\n"
+ ".file {\n"
+ " margin-inline-start: 20px;\n"
+ "}\n"
+ ".dir::before ,\n"
+ ".file > img {\n"
+ " margin-inline-end: 4px;\n"
+ " margin-inline-start: -20px;\n"
+ " max-width: 16px;\n"
+ " max-height: 16px;\n"
+ " vertical-align: middle;\n"
+ "}\n"
+ ".dir::before {\n"
+ " content: url(resource://content-accessible/html/folder.png);\n"
+ "}\n"
+ "</style>\n"
+ "<link rel=\"stylesheet\" media=\"screen, projection\" type=\"text/css\""
+ " href=\"chrome://global/skin/dirListing/dirListing.css\">\n"
+ "<script type=\"application/javascript\">\n"
+ "'use strict';\n"
+ "var gTable, gOrderBy, gTBody, gRows, gUI_showHidden;\n"
+ "document.addEventListener(\"DOMContentLoaded\", function() {\n"
+ " gTable = document.getElementsByTagName(\"table\")[0];\n"
+ " gTBody = gTable.tBodies[0];\n"
+ " if (gTBody.rows.length < 2)\n"
+ " return;\n"
+ " gUI_showHidden = document.getElementById(\"UI_showHidden\");\n"
+ " var headCells = gTable.tHead.rows[0].cells,\n"
+ " hiddenObjects = false;\n"
+ " function rowAction(i) {\n"
+ " return function(event) {\n"
+ " event.preventDefault();\n"
+ " orderBy(i);\n"
+ " }\n"
+ " }\n"
+ " for (var i = headCells.length - 1; i >= 0; i--) {\n"
+ " var anchor = document.createElement(\"a\");\n"
+ " anchor.href = \"\";\n"
+ " anchor.appendChild(headCells[i].firstChild);\n"
+ " headCells[i].appendChild(anchor);\n"
+ " headCells[i].addEventListener(\"click\", rowAction(i), true);\n"
+ " }\n"
+ " if (gUI_showHidden) {\n"
+ " gRows = Array.from(gTBody.rows);\n"
+ " hiddenObjects = gRows.some(row => row.className == "
+ "\"hidden-object\");\n"
+ " }\n"
+ " gTable.setAttribute(\"order\", \"\");\n"
+ " if (hiddenObjects) {\n"
+ " gUI_showHidden.style.display = \"block\";\n"
+ " updateHidden();\n"
+ " }\n"
+ "}, \"false\");\n"
+ "function compareRows(rowA, rowB) {\n"
+ " var a = rowA.cells[gOrderBy].getAttribute(\"sortable-data\") || "
+ "\"\";\n"
+ " var b = rowB.cells[gOrderBy].getAttribute(\"sortable-data\") || "
+ "\"\";\n"
+ " var intA = +a;\n"
+ " var intB = +b;\n"
+ " if (a == intA && b == intB) {\n"
+ " a = intA;\n"
+ " b = intB;\n"
+ " } else {\n"
+ " a = a.toLowerCase();\n"
+ " b = b.toLowerCase();\n"
+ " }\n"
+ " if (a < b)\n"
+ " return -1;\n"
+ " if (a > b)\n"
+ " return 1;\n"
+ " return 0;\n"
+ "}\n"
+ "function orderBy(column) {\n"
+ " if (!gRows)\n"
+ " gRows = Array.from(gTBody.rows);\n"
+ " var order;\n"
+ " if (gOrderBy == column) {\n"
+ " order = gTable.getAttribute(\"order\") == \"asc\" ? \"desc\" : "
+ "\"asc\";\n"
+ " } else {\n"
+ " order = \"asc\";\n"
+ " gOrderBy = column;\n"
+ " gTable.setAttribute(\"order-by\", column);\n"
+ " gRows.sort(compareRows);\n"
+ " }\n"
+ " gTable.removeChild(gTBody);\n"
+ " gTable.setAttribute(\"order\", order);\n"
+ " if (order == \"asc\")\n"
+ " for (var i = 0; i < gRows.length; i++)\n"
+ " gTBody.appendChild(gRows[i]);\n"
+ " else\n"
+ " for (var i = gRows.length - 1; i >= 0; i--)\n"
+ " gTBody.appendChild(gRows[i]);\n"
+ " gTable.appendChild(gTBody);\n"
+ "}\n"
+ "function updateHidden() {\n"
+ " gTable.className = "
+ "gUI_showHidden.getElementsByTagName(\"input\")[0].checked ?\n"
+ " \"\" :\n"
+ " \"remove-hidden\";\n"
+ "}\n"
+ "</script>\n");
+
+ buffer.AppendLiteral(R"(<link rel="icon" type="image/png" href=")");
+ nsCOMPtr<nsIURI> innerUri = NS_GetInnermostURI(uri);
+ if (!innerUri) return NS_ERROR_UNEXPECTED;
+ nsCOMPtr<nsIFileURL> fileURL(do_QueryInterface(innerUri));
+ // XXX bug 388553: can't use skinnable icons here due to security restrictions
+ if (fileURL) {
+ buffer.AppendLiteral(
+ "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAB"
+ "AAAAAQCAYAAAAf8%2F9hAAAAGXRFWHRTb2Z0d2FyZQBBZG9i"
+ "ZSBJbWFnZVJlYWR5ccllPAAAAjFJREFUeNqsU8uOElEQPffR"
+ "3XQ3ONASdBJCSBxHos5%2B3Bg3rvkCv8PElS78gPkO%2FATj"
+ "QoUdO2ftrJiRh6aneTb9sOpC4weMN6lcuFV16pxDIfI8x12O"
+ "YIDhcPiu2Wx%2B%2FHF5CW1Z6Jyegt%2FTNEWSJIjjGFEUIQ"
+ "xDrFYrWFSzXC4%2FdLvd95pRKpXKy%2BpRFZ7nwaWo1%2BsG"
+ "nQG2260BKJfLKJVKGI1GEEJw7ateryd0v993W63WEwjgxfn5"
+ "obGYzgCbzcaEbdsIggDj8Riu6z6iUk9SYZMSx8W0LMsM%2FS"
+ "KK75xnJlIq80anQXdbEp0OhcPJ0eiaJnGRMEyyPDsAKKUM9c"
+ "lkYoDo3SZJzzSdp0VSKYmfV1co%2Bz580kw5KDIM8RbRfEnU"
+ "f1HzxtQyMAGcaGruTKczMzEIaqhKifV6jd%2BzGQQB5llunF"
+ "%2FM52BizC2K5sYPYvZcu653tjOM9O93wnYc08gmkgg4VAxi"
+ "xfqFUJT36AYBZGd6PJkFCZnnlBxMp38gqIgLpZB0y4Nph18l"
+ "yWh5FFbrOSxbl3V4G%2BVB7T4ajYYxTyuLtO%2BCvWGgJE1M"
+ "c7JNsJEhvgw%2FQV4fo%2F24nbEsX2u1d5sVyn8sJO0ZAQiI"
+ "YnFh%2BxrfLz%2Fj29cBS%2FO14zg3i8XigW3ZkErDtmKoeM"
+ "%2BAJGRMnXeEPGKf0nCD1ydvkDzU9Jbc6OpR7WIw6L8lQ%2B"
+ "4pQ1%2FlPF0RGM9Ns91Wmptk0GfB4EJkt77vXYj%2F8m%2B8"
+ "y%2FkrwABHbz2H9V68DQAAAABJRU5ErkJggg%3D%3D");
+ } else {
+ buffer.AppendLiteral(
+ "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAB"
+ "AAAAAQCAYAAAAf8%2F9hAAAAGXRFWHRTb2Z0d2FyZQBBZG9i"
+ "ZSBJbWFnZVJlYWR5ccllPAAAAeBJREFUeNqcU81O20AQ%2Ft"
+ "Z2AgQSYQRqL1UPVG2hAUQkxLEStz4DrXpLpD5Drz31Cajax%"
+ "2Bghhx6qHIJURBTxIwQRwopCBbZjHMcOTrzermPipsSt1Iw0"
+ "3p3ZmW%2B%2B2R0TxhgOD34wjCHZlQ0iDYz9yvEfhxMTCYhE"
+ "QDIZhkxKd2sqzX2TOD2vBQCQhpPefng1ZP2dVPlLLdpL8SEM"
+ "cxng%2Fbs0RIHhtgs4twxOh%2BHjZxvzDx%2F3GQQiDFISiR"
+ "BLFMPKTRMollzcWECrDVhtxtdRVsL9youPxGj%2FbdfFlUZh"
+ "tDyYbYqWRUdai1oQRZ5oHeHl2gNM%2B01Uqio8RlH%2Bnsaz"
+ "JzNwXcq1B%2BiXPHprlEEymeBfXs1w8XxxihfyuXqoHqpoGj"
+ "ZM04bddgG%2F9%2B8WGj87qDdsrK9m%2BoA%2BpbhQTDh2l1"
+ "%2Bi2weNbSHMZyjvNXmVbqh9Fj5Oz27uEoP%2BSTxANruJs9"
+ "L%2FT6P0ewqPx5nmiAG5f6AoCtN1PbJzuRyJAyDBzzSQYvEr"
+ "f06yYxhGXlEa8H2KVGoasjwLx3Ewk858opQWXm%2B%2Fib9E"
+ "QrBzclLLLy89xYvlpchvtixcX6uo1y%2FzsiwHrkIsgKbp%2"
+ "BYWFOWicuqppoNTnStHzPFCPQhBEBOyGAX4JMADFetubi4BS"
+ "YAAAAABJRU5ErkJggg%3D%3D");
+ }
+ buffer.AppendLiteral("\">\n<title>");
+
+ // Everything needs to end in a /,
+ // otherwise we end up linking to file:///foo/dirfile
+
+ if (!mTextToSubURI) {
+ mTextToSubURI = do_GetService(NS_ITEXTTOSUBURI_CONTRACTID, &rv);
+ if (NS_FAILED(rv)) return rv;
+ }
+
+ nsAutoString unEscapeSpec;
+ rv = mTextToSubURI->UnEscapeAndConvert("UTF-8"_ns, titleUri, unEscapeSpec);
+ if (NS_FAILED(rv)) {
+ return rv;
+ }
+
+ nsCString htmlEscSpecUtf8;
+ nsAppendEscapedHTML(NS_ConvertUTF16toUTF8(unEscapeSpec), htmlEscSpecUtf8);
+ AutoTArray<nsString, 1> formatTitle;
+ CopyUTF8toUTF16(htmlEscSpecUtf8, *formatTitle.AppendElement());
+
+ nsAutoString title;
+ rv = mBundle->FormatStringFromName("DirTitle", formatTitle, title);
+ if (NS_FAILED(rv)) return rv;
+
+ // we want to convert string bundle to NCR
+ // to ensure they're shown in any charsets
+ AppendNonAsciiToNCR(title, buffer);
+
+ buffer.AppendLiteral("</title>\n");
+
+ // If there is a quote character in the baseUri, then
+ // lets not add a base URL. The reason for this is that
+ // if we stick baseUri containing a quote into a quoted
+ // string, the quote character will prematurely close
+ // the base href string. This is a fall-back check;
+ // that's why it is OK to not use a base rather than
+ // trying to play nice and escaping the quotes. See bug
+ // 358128.
+
+ if (!baseUri.Contains('"')) {
+ // Great, the baseUri does not contain a char that
+ // will prematurely close the string. Go ahead an
+ // add a base href, but only do so if we're not
+ // dealing with a resource URI.
+ if (!uri->SchemeIs("resource")) {
+ buffer.AppendLiteral("<base href=\"");
+ nsAppendEscapedHTML(baseUri, buffer);
+ buffer.AppendLiteral("\" />\n");
+ }
+ } else {
+ NS_ERROR("broken protocol handler didn't escape double-quote.");
+ }
+
+ nsCString direction("ltr"_ns);
+ if (LocaleService::GetInstance()->IsAppLocaleRTL()) {
+ direction.AssignLiteral("rtl");
+ }
+
+ buffer.AppendLiteral("</head>\n<body dir=\"");
+ buffer.Append(direction);
+ buffer.AppendLiteral("\">\n<h1>");
+ AppendNonAsciiToNCR(title, buffer);
+ buffer.AppendLiteral("</h1>\n");
+
+ if (!parentStr.IsEmpty()) {
+ nsAutoString parentText;
+ rv = mBundle->GetStringFromName("DirGoUp", parentText);
+ if (NS_FAILED(rv)) return rv;
+
+ buffer.AppendLiteral(R"(<p id="UI_goUp"><a class="up" href=")");
+ nsAppendEscapedHTML(parentStr, buffer);
+ buffer.AppendLiteral("\">");
+ AppendNonAsciiToNCR(parentText, buffer);
+ buffer.AppendLiteral("</a></p>\n");
+ }
+
+ if (uri->SchemeIs("file")) {
+ nsAutoString showHiddenText;
+ rv = mBundle->GetStringFromName("ShowHidden", showHiddenText);
+ if (NS_FAILED(rv)) return rv;
+
+ buffer.AppendLiteral(
+ "<p id=\"UI_showHidden\" style=\"display:none\"><label><input "
+ "type=\"checkbox\" checked onchange=\"updateHidden()\">");
+ AppendNonAsciiToNCR(showHiddenText, buffer);
+ buffer.AppendLiteral("</label></p>\n");
+ }
+
+ buffer.AppendLiteral(
+ "<table>\n"
+ " <thead>\n"
+ " <tr>\n"
+ " <th>");
+
+ nsAutoString columnText;
+ rv = mBundle->GetStringFromName("DirColName", columnText);
+ if (NS_FAILED(rv)) return rv;
+ AppendNonAsciiToNCR(columnText, buffer);
+ buffer.AppendLiteral(
+ "</th>\n"
+ " <th>");
+
+ rv = mBundle->GetStringFromName("DirColSize", columnText);
+ if (NS_FAILED(rv)) return rv;
+ AppendNonAsciiToNCR(columnText, buffer);
+ buffer.AppendLiteral(
+ "</th>\n"
+ " <th colspan=\"2\">");
+
+ rv = mBundle->GetStringFromName("DirColMTime", columnText);
+ if (NS_FAILED(rv)) return rv;
+ AppendNonAsciiToNCR(columnText, buffer);
+ buffer.AppendLiteral(
+ "</th>\n"
+ " </tr>\n"
+ " </thead>\n");
+ buffer.AppendLiteral(" <tbody>\n");
+
+ aBuffer = buffer;
+ return rv;
+}
+
+NS_IMETHODIMP
+nsIndexedToHTML::OnStopRequest(nsIRequest* request, nsresult aStatus) {
+ if (NS_SUCCEEDED(aStatus)) {
+ nsCString buffer;
+ buffer.AssignLiteral("</tbody></table></body></html>\n");
+
+ aStatus = SendToListener(request, nullptr, buffer);
+ }
+
+ mParser->OnStopRequest(request, aStatus);
+ mParser = nullptr;
+
+ return mListener->OnStopRequest(request, aStatus);
+}
+
+nsresult nsIndexedToHTML::SendToListener(nsIRequest* aRequest,
+ nsISupports* aContext,
+ const nsACString& aBuffer) {
+ nsCOMPtr<nsIInputStream> inputData;
+ nsresult rv = NS_NewCStringInputStream(getter_AddRefs(inputData), aBuffer);
+ NS_ENSURE_SUCCESS(rv, rv);
+ return mListener->OnDataAvailable(aRequest, inputData, 0, aBuffer.Length());
+}
+
+NS_IMETHODIMP
+nsIndexedToHTML::OnDataAvailable(nsIRequest* aRequest, nsIInputStream* aInput,
+ uint64_t aOffset, uint32_t aCount) {
+ return mParser->OnDataAvailable(aRequest, aInput, aOffset, aCount);
+}
+
+static nsresult FormatTime(const nsDateFormatSelector aDateFormatSelector,
+ const nsTimeFormatSelector aTimeFormatSelector,
+ const PRTime aPrTime, nsAString& aStringOut) {
+ // FormatPRExplodedTime will use GMT based formatted string (e.g. GMT+1)
+ // instead of local time zone name (e.g. CEST).
+ // To avoid this case when ResistFingerprinting is disabled, use
+ // |FormatPRTime| to show exact time zone name.
+ if (!nsContentUtils::ShouldResistFingerprinting()) {
+ return mozilla::DateTimeFormat::FormatPRTime(
+ aDateFormatSelector, aTimeFormatSelector, aPrTime, aStringOut);
+ }
+
+ PRExplodedTime prExplodedTime;
+ PR_ExplodeTime(aPrTime, PR_GMTParameters, &prExplodedTime);
+ return mozilla::DateTimeFormat::FormatPRExplodedTime(
+ aDateFormatSelector, aTimeFormatSelector, &prExplodedTime, aStringOut);
+}
+
+NS_IMETHODIMP
+nsIndexedToHTML::OnIndexAvailable(nsIRequest* aRequest, nsISupports* aCtxt,
+ nsIDirIndex* aIndex) {
+ nsresult rv;
+ if (!aIndex) return NS_ERROR_NULL_POINTER;
+
+ nsCString pushBuffer;
+ pushBuffer.AppendLiteral("<tr");
+
+ // We don't know the file's character set yet, so retrieve the raw bytes
+ // which will be decoded by the HTML parser.
+ nsCString loc;
+ aIndex->GetLocation(loc);
+
+ // Adjust the length in case unescaping shortened the string.
+ loc.Truncate(nsUnescapeCount(loc.BeginWriting()));
+
+ if (loc.IsEmpty()) {
+ return NS_ERROR_ILLEGAL_VALUE;
+ }
+ if (loc.First() == char16_t('.'))
+ pushBuffer.AppendLiteral(" class=\"hidden-object\"");
+
+ pushBuffer.AppendLiteral(">\n <td sortable-data=\"");
+
+ // The sort key is the name of the item, prepended by either 0, 1 or 2
+ // in order to group items.
+ uint32_t type;
+ aIndex->GetType(&type);
+ switch (type) {
+ case nsIDirIndex::TYPE_SYMLINK:
+ pushBuffer.Append('0');
+ break;
+ case nsIDirIndex::TYPE_DIRECTORY:
+ pushBuffer.Append('1');
+ break;
+ default:
+ pushBuffer.Append('2');
+ break;
+ }
+ nsCString escaped;
+ nsAppendEscapedHTML(loc, escaped);
+ pushBuffer.Append(escaped);
+
+ pushBuffer.AppendLiteral(
+ R"("><table class="ellipsis"><tbody><tr><td><a class=")");
+ switch (type) {
+ case nsIDirIndex::TYPE_DIRECTORY:
+ pushBuffer.AppendLiteral("dir");
+ break;
+ case nsIDirIndex::TYPE_SYMLINK:
+ pushBuffer.AppendLiteral("symlink");
+ break;
+ default:
+ pushBuffer.AppendLiteral("file");
+ break;
+ }
+
+ pushBuffer.AppendLiteral("\" href=\"");
+
+ // need to escape links
+ nsAutoCString locEscaped;
+
+ // Adding trailing slash helps to recognize whether the URL points to a file
+ // or a directory (bug #214405).
+ if ((type == nsIDirIndex::TYPE_DIRECTORY) && (loc.Last() != '/')) {
+ loc.Append('/');
+ }
+
+ // now minimally re-escape the location...
+ uint32_t escFlags;
+ // for some protocols, we expect the location to be absolute.
+ // if so, and if the location indeed appears to be a valid URI, then go
+ // ahead and treat it like one.
+
+ nsAutoCString scheme;
+ if (mExpectAbsLoc && NS_SUCCEEDED(net_ExtractURLScheme(loc, scheme))) {
+ // escape as absolute
+ escFlags = esc_Forced | esc_AlwaysCopy | esc_Minimal;
+ } else {
+ // escape as relative
+ // esc_Directory is needed because directories have a trailing slash.
+ // Without it, the trailing '/' will be escaped, and links from within
+ // that directory will be incorrect
+ escFlags = esc_Forced | esc_AlwaysCopy | esc_FileBaseName | esc_Colon |
+ esc_Directory;
+ }
+ NS_EscapeURL(loc.get(), loc.Length(), escFlags, locEscaped);
+ // esc_Directory does not escape the semicolons, so if a filename
+ // contains semicolons we need to manually escape them.
+ // This replacement should be removed in bug #473280
+ locEscaped.ReplaceSubstring(";", "%3b");
+ nsAppendEscapedHTML(locEscaped, pushBuffer);
+ pushBuffer.AppendLiteral("\">");
+
+ if (type == nsIDirIndex::TYPE_FILE || type == nsIDirIndex::TYPE_UNKNOWN) {
+ pushBuffer.AppendLiteral("<img src=\"moz-icon://");
+ int32_t lastDot = locEscaped.RFindChar('.');
+ if (lastDot != kNotFound) {
+ locEscaped.Cut(0, lastDot);
+ nsAppendEscapedHTML(locEscaped, pushBuffer);
+ } else {
+ pushBuffer.AppendLiteral("unknown");
+ }
+ pushBuffer.AppendLiteral("?size=16\" alt=\"");
+
+ nsAutoString altText;
+ rv = mBundle->GetStringFromName("DirFileLabel", altText);
+ if (NS_FAILED(rv)) return rv;
+ AppendNonAsciiToNCR(altText, pushBuffer);
+ pushBuffer.AppendLiteral("\">");
+ }
+
+ pushBuffer.Append(escaped);
+ pushBuffer.AppendLiteral("</a></td></tr></tbody></table></td>\n <td");
+
+ if (type == nsIDirIndex::TYPE_DIRECTORY ||
+ type == nsIDirIndex::TYPE_SYMLINK) {
+ pushBuffer.Append('>');
+ } else {
+ int64_t size;
+ aIndex->GetSize(&size);
+
+ if (uint64_t(size) != UINT64_MAX) {
+ pushBuffer.AppendLiteral(" sortable-data=\"");
+ pushBuffer.AppendInt(size);
+ pushBuffer.AppendLiteral("\">");
+ nsAutoCString sizeString;
+ FormatSizeString(size, sizeString);
+ pushBuffer.Append(sizeString);
+ } else {
+ pushBuffer.Append('>');
+ }
+ }
+ pushBuffer.AppendLiteral("</td>\n <td");
+
+ PRTime t;
+ aIndex->GetLastModified(&t);
+
+ if (t == -1LL) {
+ pushBuffer.AppendLiteral("></td>\n <td>");
+ } else {
+ pushBuffer.AppendLiteral(" sortable-data=\"");
+ pushBuffer.AppendInt(static_cast<int64_t>(t));
+ pushBuffer.AppendLiteral("\">");
+ nsAutoString formatted;
+ FormatTime(kDateFormatShort, kTimeFormatNone, t, formatted);
+ AppendNonAsciiToNCR(formatted, pushBuffer);
+ pushBuffer.AppendLiteral("</td>\n <td>");
+ FormatTime(kDateFormatNone, kTimeFormatLong, t, formatted);
+ // use NCR to show date in any doc charset
+ AppendNonAsciiToNCR(formatted, pushBuffer);
+ }
+
+ pushBuffer.AppendLiteral("</td>\n</tr>");
+
+ return SendToListener(aRequest, aCtxt, pushBuffer);
+}
+
+NS_IMETHODIMP
+nsIndexedToHTML::OnInformationAvailable(nsIRequest* aRequest,
+ nsISupports* aCtxt,
+ const nsAString& aInfo) {
+ nsAutoCString pushBuffer;
+ nsAutoCString escapedUtf8;
+ nsAppendEscapedHTML(NS_ConvertUTF16toUTF8(aInfo), escapedUtf8);
+ pushBuffer.AppendLiteral("<tr>\n <td>");
+ // escaped is provided in Unicode, so write hex NCRs as necessary
+ // to prevent the HTML parser from applying a character set.
+ AppendNonAsciiToNCR(NS_ConvertUTF8toUTF16(escapedUtf8), pushBuffer);
+ pushBuffer.AppendLiteral(
+ "</td>\n <td></td>\n <td></td>\n <td></td>\n</tr>\n");
+
+ return SendToListener(aRequest, aCtxt, pushBuffer);
+}
+
+void nsIndexedToHTML::FormatSizeString(int64_t inSize,
+ nsCString& outSizeString) {
+ outSizeString.Truncate();
+ if (inSize > int64_t(0)) {
+ // round up to the nearest Kilobyte
+ int64_t upperSize = (inSize + int64_t(1023)) / int64_t(1024);
+ outSizeString.AppendInt(upperSize);
+ outSizeString.AppendLiteral(" KB");
+ }
+}
diff --git a/netwerk/streamconv/converters/nsIndexedToHTML.h b/netwerk/streamconv/converters/nsIndexedToHTML.h
new file mode 100644
index 0000000000..6173ecb523
--- /dev/null
+++ b/netwerk/streamconv/converters/nsIndexedToHTML.h
@@ -0,0 +1,61 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef ____nsindexedtohtml___h___
+#define ____nsindexedtohtml___h___
+
+#include "nsCOMPtr.h"
+#include "nsString.h"
+#include "nsIStreamConverter.h"
+#include "nsIDirIndexListener.h"
+
+#define NS_NSINDEXEDTOHTMLCONVERTER_CID \
+ { \
+ 0xcf0f71fd, 0xfafd, 0x4e2b, { \
+ 0x9f, 0xdc, 0x13, 0x4d, 0x97, 0x2e, 0x16, 0xe2 \
+ } \
+ }
+
+class nsIStringBundle;
+class nsITextToSubURI;
+
+class nsIndexedToHTML : public nsIStreamConverter, public nsIDirIndexListener {
+ public:
+ NS_DECL_ISUPPORTS
+ NS_DECL_NSISTREAMCONVERTER
+ NS_DECL_NSIREQUESTOBSERVER
+ NS_DECL_NSISTREAMLISTENER
+ NS_DECL_NSIDIRINDEXLISTENER
+
+ nsIndexedToHTML();
+
+ nsresult Init(nsIStreamListener* aListener);
+
+ static nsresult Create(nsISupports* aOuter, REFNSIID aIID, void** aResult);
+
+ protected:
+ void FormatSizeString(int64_t inSize, nsCString& outSizeString);
+ nsresult SendToListener(nsIRequest* aRequest, nsISupports* aContext,
+ const nsACString& aBuffer);
+ // Helper to properly implement OnStartRequest
+ nsresult DoOnStartRequest(nsIRequest* request, nsISupports* aContext,
+ nsCString& aBuffer);
+
+ protected:
+ nsCOMPtr<nsIDirIndexParser> mParser;
+ nsCOMPtr<nsIStreamListener> mListener; // final listener (consumer)
+
+ nsCOMPtr<nsIStringBundle> mBundle;
+
+ nsCOMPtr<nsITextToSubURI> mTextToSubURI;
+
+ private:
+ // Expecting absolute locations, given by 201 lines.
+ bool mExpectAbsLoc;
+
+ virtual ~nsIndexedToHTML() = default;
+};
+
+#endif
diff --git a/netwerk/streamconv/converters/nsMultiMixedConv.cpp b/netwerk/streamconv/converters/nsMultiMixedConv.cpp
new file mode 100644
index 0000000000..8acc130421
--- /dev/null
+++ b/netwerk/streamconv/converters/nsMultiMixedConv.cpp
@@ -0,0 +1,1038 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "nsMultiMixedConv.h"
+#include "plstr.h"
+#include "nsIHttpChannel.h"
+#include "nsNetCID.h"
+#include "nsMimeTypes.h"
+#include "nsIStringStream.h"
+#include "nsCRT.h"
+#include "nsIHttpChannelInternal.h"
+#include "nsURLHelper.h"
+#include "nsIStreamConverterService.h"
+#include <algorithm>
+#include "nsContentSecurityManager.h"
+#include "nsHttp.h"
+#include "nsNetUtil.h"
+#include "nsIURI.h"
+#include "nsHttpHeaderArray.h"
+#include "mozilla/AutoRestore.h"
+
+nsPartChannel::nsPartChannel(nsIChannel* aMultipartChannel, uint32_t aPartID,
+ nsIStreamListener* aListener)
+ : mMultipartChannel(aMultipartChannel),
+ mListener(aListener),
+ mStatus(NS_OK),
+ mLoadFlags(0),
+ mContentDisposition(0),
+ mContentLength(UINT64_MAX),
+ mIsByteRangeRequest(false),
+ mByteRangeStart(0),
+ mByteRangeEnd(0),
+ mPartID(aPartID),
+ mIsLastPart(false) {
+ // Inherit the load flags from the original channel...
+ mMultipartChannel->GetLoadFlags(&mLoadFlags);
+
+ mMultipartChannel->GetLoadGroup(getter_AddRefs(mLoadGroup));
+}
+
+void nsPartChannel::InitializeByteRange(int64_t aStart, int64_t aEnd) {
+ mIsByteRangeRequest = true;
+
+ mByteRangeStart = aStart;
+ mByteRangeEnd = aEnd;
+}
+
+nsresult nsPartChannel::SendOnStartRequest(nsISupports* aContext) {
+ return mListener->OnStartRequest(this);
+}
+
+nsresult nsPartChannel::SendOnDataAvailable(nsISupports* aContext,
+ nsIInputStream* aStream,
+ uint64_t aOffset, uint32_t aLen) {
+ return mListener->OnDataAvailable(this, aStream, aOffset, aLen);
+}
+
+nsresult nsPartChannel::SendOnStopRequest(nsISupports* aContext,
+ nsresult aStatus) {
+ // Drop the listener
+ nsCOMPtr<nsIStreamListener> listener;
+ listener.swap(mListener);
+ return listener->OnStopRequest(this, aStatus);
+}
+
+void nsPartChannel::SetContentDisposition(
+ const nsACString& aContentDispositionHeader) {
+ mContentDispositionHeader = aContentDispositionHeader;
+ nsCOMPtr<nsIURI> uri;
+ GetURI(getter_AddRefs(uri));
+ NS_GetFilenameFromDisposition(mContentDispositionFilename,
+ mContentDispositionHeader);
+ mContentDisposition =
+ NS_GetContentDispositionFromHeader(mContentDispositionHeader, this);
+}
+
+//
+// nsISupports implementation...
+//
+
+NS_IMPL_ADDREF(nsPartChannel)
+NS_IMPL_RELEASE(nsPartChannel)
+
+NS_INTERFACE_MAP_BEGIN(nsPartChannel)
+ NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports, nsIChannel)
+ NS_INTERFACE_MAP_ENTRY(nsIRequest)
+ NS_INTERFACE_MAP_ENTRY(nsIChannel)
+ NS_INTERFACE_MAP_ENTRY(nsIByteRangeRequest)
+ NS_INTERFACE_MAP_ENTRY(nsIMultiPartChannel)
+NS_INTERFACE_MAP_END
+
+//
+// nsIRequest implementation...
+//
+
+NS_IMETHODIMP
+nsPartChannel::GetName(nsACString& aResult) {
+ return mMultipartChannel->GetName(aResult);
+}
+
+NS_IMETHODIMP
+nsPartChannel::IsPending(bool* aResult) {
+ // For now, consider the active lifetime of each part the same as
+ // the underlying multipart channel... This is not exactly right,
+ // but it is good enough :-)
+ return mMultipartChannel->IsPending(aResult);
+}
+
+NS_IMETHODIMP
+nsPartChannel::GetStatus(nsresult* aResult) {
+ nsresult rv = NS_OK;
+
+ if (NS_FAILED(mStatus)) {
+ *aResult = mStatus;
+ } else {
+ rv = mMultipartChannel->GetStatus(aResult);
+ }
+
+ return rv;
+}
+
+NS_IMETHODIMP
+nsPartChannel::Cancel(nsresult aStatus) {
+ // Cancelling an individual part must not cancel the underlying
+ // multipart channel...
+ // XXX but we should stop sending data for _this_ part channel!
+ mStatus = aStatus;
+ return NS_OK;
+}
+
+NS_IMETHODIMP
+nsPartChannel::GetCanceled(bool* aCanceled) {
+ *aCanceled = NS_FAILED(mStatus);
+ return NS_OK;
+}
+
+NS_IMETHODIMP
+nsPartChannel::Suspend(void) {
+ // Suspending an individual part must not suspend the underlying
+ // multipart channel...
+ // XXX why not?
+ return NS_OK;
+}
+
+NS_IMETHODIMP
+nsPartChannel::Resume(void) {
+ // Resuming an individual part must not resume the underlying
+ // multipart channel...
+ // XXX why not?
+ return NS_OK;
+}
+
+//
+// nsIChannel implementation
+//
+
+NS_IMETHODIMP
+nsPartChannel::GetOriginalURI(nsIURI** aURI) {
+ return mMultipartChannel->GetOriginalURI(aURI);
+}
+
+NS_IMETHODIMP
+nsPartChannel::SetOriginalURI(nsIURI* aURI) {
+ return mMultipartChannel->SetOriginalURI(aURI);
+}
+
+NS_IMETHODIMP
+nsPartChannel::GetURI(nsIURI** aURI) { return mMultipartChannel->GetURI(aURI); }
+
+NS_IMETHODIMP
+nsPartChannel::Open(nsIInputStream** aStream) {
+ nsCOMPtr<nsIStreamListener> listener;
+ nsresult rv =
+ nsContentSecurityManager::doContentSecurityCheck(this, listener);
+ NS_ENSURE_SUCCESS(rv, rv);
+
+ // This channel cannot be opened!
+ return NS_ERROR_FAILURE;
+}
+
+NS_IMETHODIMP
+nsPartChannel::AsyncOpen(nsIStreamListener* aListener) {
+ nsCOMPtr<nsIStreamListener> listener = aListener;
+ nsresult rv =
+ nsContentSecurityManager::doContentSecurityCheck(this, listener);
+ NS_ENSURE_SUCCESS(rv, rv);
+
+ // This channel cannot be opened!
+ return NS_ERROR_FAILURE;
+}
+
+NS_IMETHODIMP
+nsPartChannel::GetLoadFlags(nsLoadFlags* aLoadFlags) {
+ *aLoadFlags = mLoadFlags;
+ return NS_OK;
+}
+
+NS_IMETHODIMP
+nsPartChannel::SetLoadFlags(nsLoadFlags aLoadFlags) {
+ mLoadFlags = aLoadFlags;
+ return NS_OK;
+}
+
+NS_IMETHODIMP
+nsPartChannel::GetTRRMode(nsIRequest::TRRMode* aTRRMode) {
+ return GetTRRModeImpl(aTRRMode);
+}
+
+NS_IMETHODIMP
+nsPartChannel::SetTRRMode(nsIRequest::TRRMode aTRRMode) {
+ return SetTRRModeImpl(aTRRMode);
+}
+
+NS_IMETHODIMP
+nsPartChannel::GetIsDocument(bool* aIsDocument) {
+ return NS_GetIsDocumentChannel(this, aIsDocument);
+}
+
+NS_IMETHODIMP
+nsPartChannel::GetLoadGroup(nsILoadGroup** aLoadGroup) {
+ *aLoadGroup = mLoadGroup;
+ NS_IF_ADDREF(*aLoadGroup);
+
+ return NS_OK;
+}
+
+NS_IMETHODIMP
+nsPartChannel::SetLoadGroup(nsILoadGroup* aLoadGroup) {
+ mLoadGroup = aLoadGroup;
+
+ return NS_OK;
+}
+
+NS_IMETHODIMP
+nsPartChannel::GetOwner(nsISupports** aOwner) {
+ return mMultipartChannel->GetOwner(aOwner);
+}
+
+NS_IMETHODIMP
+nsPartChannel::SetOwner(nsISupports* aOwner) {
+ return mMultipartChannel->SetOwner(aOwner);
+}
+
+NS_IMETHODIMP
+nsPartChannel::GetLoadInfo(nsILoadInfo** aLoadInfo) {
+ return mMultipartChannel->GetLoadInfo(aLoadInfo);
+}
+
+NS_IMETHODIMP
+nsPartChannel::SetLoadInfo(nsILoadInfo* aLoadInfo) {
+ MOZ_RELEASE_ASSERT(aLoadInfo, "loadinfo can't be null");
+ return mMultipartChannel->SetLoadInfo(aLoadInfo);
+}
+
+NS_IMETHODIMP
+nsPartChannel::GetNotificationCallbacks(nsIInterfaceRequestor** aCallbacks) {
+ return mMultipartChannel->GetNotificationCallbacks(aCallbacks);
+}
+
+NS_IMETHODIMP
+nsPartChannel::SetNotificationCallbacks(nsIInterfaceRequestor* aCallbacks) {
+ return mMultipartChannel->SetNotificationCallbacks(aCallbacks);
+}
+
+NS_IMETHODIMP
+nsPartChannel::GetSecurityInfo(nsISupports** aSecurityInfo) {
+ return mMultipartChannel->GetSecurityInfo(aSecurityInfo);
+}
+
+NS_IMETHODIMP
+nsPartChannel::GetContentType(nsACString& aContentType) {
+ aContentType = mContentType;
+ return NS_OK;
+}
+
+NS_IMETHODIMP
+nsPartChannel::SetContentType(const nsACString& aContentType) {
+ bool dummy;
+ net_ParseContentType(aContentType, mContentType, mContentCharset, &dummy);
+ return NS_OK;
+}
+
+NS_IMETHODIMP
+nsPartChannel::GetContentCharset(nsACString& aContentCharset) {
+ aContentCharset = mContentCharset;
+ return NS_OK;
+}
+
+NS_IMETHODIMP
+nsPartChannel::SetContentCharset(const nsACString& aContentCharset) {
+ mContentCharset = aContentCharset;
+ return NS_OK;
+}
+
+NS_IMETHODIMP
+nsPartChannel::GetContentLength(int64_t* aContentLength) {
+ *aContentLength = mContentLength;
+ return NS_OK;
+}
+
+NS_IMETHODIMP
+nsPartChannel::SetContentLength(int64_t aContentLength) {
+ mContentLength = aContentLength;
+ return NS_OK;
+}
+
+NS_IMETHODIMP
+nsPartChannel::GetContentDisposition(uint32_t* aContentDisposition) {
+ if (mContentDispositionHeader.IsEmpty()) return NS_ERROR_NOT_AVAILABLE;
+
+ *aContentDisposition = mContentDisposition;
+ return NS_OK;
+}
+
+NS_IMETHODIMP
+nsPartChannel::SetContentDisposition(uint32_t aContentDisposition) {
+ return NS_ERROR_NOT_AVAILABLE;
+}
+
+NS_IMETHODIMP
+nsPartChannel::GetContentDispositionFilename(
+ nsAString& aContentDispositionFilename) {
+ if (mContentDispositionFilename.IsEmpty()) return NS_ERROR_NOT_AVAILABLE;
+
+ aContentDispositionFilename = mContentDispositionFilename;
+ return NS_OK;
+}
+
+NS_IMETHODIMP
+nsPartChannel::SetContentDispositionFilename(
+ const nsAString& aContentDispositionFilename) {
+ return NS_ERROR_NOT_AVAILABLE;
+}
+
+NS_IMETHODIMP
+nsPartChannel::GetContentDispositionHeader(
+ nsACString& aContentDispositionHeader) {
+ if (mContentDispositionHeader.IsEmpty()) return NS_ERROR_NOT_AVAILABLE;
+
+ aContentDispositionHeader = mContentDispositionHeader;
+ return NS_OK;
+}
+
+NS_IMETHODIMP
+nsPartChannel::GetPartID(uint32_t* aPartID) {
+ *aPartID = mPartID;
+ return NS_OK;
+}
+
+NS_IMETHODIMP
+nsPartChannel::GetIsLastPart(bool* aIsLastPart) {
+ *aIsLastPart = mIsLastPart;
+ return NS_OK;
+}
+
+//
+// nsIByteRangeRequest implementation...
+//
+
+NS_IMETHODIMP
+nsPartChannel::GetIsByteRangeRequest(bool* aIsByteRangeRequest) {
+ *aIsByteRangeRequest = mIsByteRangeRequest;
+
+ return NS_OK;
+}
+
+NS_IMETHODIMP
+nsPartChannel::GetStartRange(int64_t* aStartRange) {
+ *aStartRange = mByteRangeStart;
+
+ return NS_OK;
+}
+
+NS_IMETHODIMP
+nsPartChannel::GetEndRange(int64_t* aEndRange) {
+ *aEndRange = mByteRangeEnd;
+ return NS_OK;
+}
+
+NS_IMETHODIMP
+nsPartChannel::GetBaseChannel(nsIChannel** aReturn) {
+ NS_ENSURE_ARG_POINTER(aReturn);
+
+ *aReturn = mMultipartChannel;
+ NS_IF_ADDREF(*aReturn);
+ return NS_OK;
+}
+
+// nsISupports implementation
+NS_IMPL_ISUPPORTS(nsMultiMixedConv, nsIStreamConverter, nsIStreamListener,
+ nsIRequestObserver)
+
+// nsIStreamConverter implementation
+
+// No syncronous conversion at this time.
+NS_IMETHODIMP
+nsMultiMixedConv::Convert(nsIInputStream* aFromStream, const char* aFromType,
+ const char* aToType, nsISupports* aCtxt,
+ nsIInputStream** _retval) {
+ return NS_ERROR_NOT_IMPLEMENTED;
+}
+
+// Stream converter service calls this to initialize the actual stream converter
+// (us).
+NS_IMETHODIMP
+nsMultiMixedConv::AsyncConvertData(const char* aFromType, const char* aToType,
+ nsIStreamListener* aListener,
+ nsISupports* aCtxt) {
+ NS_ASSERTION(aListener && aFromType && aToType,
+ "null pointer passed into multi mixed converter");
+
+ // hook up our final listener. this guy gets the various On*() calls we want
+ // to throw at him.
+ //
+ // WARNING: this listener must be able to handle multiple OnStartRequest,
+ // OnDataAvail() and OnStopRequest() call combinations. We call of series
+ // of these for each sub-part in the raw stream.
+ mFinalListener = aListener;
+
+ return NS_OK;
+}
+
+NS_IMETHODIMP
+nsMultiMixedConv::GetConvertedType(const nsACString& aFromType,
+ nsIChannel* aChannel, nsACString& aToType) {
+ return NS_ERROR_NOT_IMPLEMENTED;
+}
+
+// nsIRequestObserver implementation
+NS_IMETHODIMP
+nsMultiMixedConv::OnStartRequest(nsIRequest* request) {
+ // we're assuming the content-type is available at this stage
+ NS_ASSERTION(mBoundary.IsEmpty(), "a second on start???");
+
+ nsresult rv;
+
+ mTotalSent = 0;
+ mChannel = do_QueryInterface(request, &rv);
+ if (NS_FAILED(rv)) return rv;
+
+ nsAutoCString contentType;
+
+ // ask the HTTP channel for the content-type and extract the boundary from it.
+ nsCOMPtr<nsIHttpChannel> httpChannel = do_QueryInterface(mChannel, &rv);
+ if (NS_SUCCEEDED(rv)) {
+ rv = httpChannel->GetResponseHeader("content-type"_ns, contentType);
+ if (NS_FAILED(rv)) {
+ return rv;
+ }
+ nsCString csp;
+ rv = httpChannel->GetResponseHeader("content-security-policy"_ns, csp);
+ if (NS_SUCCEEDED(rv)) {
+ mRootContentSecurityPolicy = csp;
+ }
+ } else {
+ // try asking the channel directly
+ rv = mChannel->GetContentType(contentType);
+ if (NS_FAILED(rv)) {
+ return NS_ERROR_FAILURE;
+ }
+ }
+
+ Tokenizer p(contentType);
+ p.SkipUntil(Token::Char(';'));
+ if (!p.CheckChar(';')) {
+ return NS_ERROR_CORRUPTED_CONTENT;
+ }
+ p.SkipWhites();
+ if (!p.CheckWord("boundary")) {
+ return NS_ERROR_CORRUPTED_CONTENT;
+ }
+ p.SkipWhites();
+ if (!p.CheckChar('=')) {
+ return NS_ERROR_CORRUPTED_CONTENT;
+ }
+ p.SkipWhites();
+ Unused << p.ReadUntil(Token::Char(';'), mBoundary);
+ mBoundary.Trim(
+ " \""); // ignoring potential quoted string formatting violations
+ if (mBoundary.IsEmpty()) {
+ return NS_ERROR_CORRUPTED_CONTENT;
+ }
+
+ mHeaderTokens[HEADER_CONTENT_TYPE] = mTokenizer.AddCustomToken(
+ "content-type", mTokenizer.CASE_INSENSITIVE, false);
+ mHeaderTokens[HEADER_CONTENT_LENGTH] = mTokenizer.AddCustomToken(
+ "content-length", mTokenizer.CASE_INSENSITIVE, false);
+ mHeaderTokens[HEADER_CONTENT_DISPOSITION] = mTokenizer.AddCustomToken(
+ "content-disposition", mTokenizer.CASE_INSENSITIVE, false);
+ mHeaderTokens[HEADER_SET_COOKIE] = mTokenizer.AddCustomToken(
+ "set-cookie", mTokenizer.CASE_INSENSITIVE, false);
+ mHeaderTokens[HEADER_CONTENT_RANGE] = mTokenizer.AddCustomToken(
+ "content-range", mTokenizer.CASE_INSENSITIVE, false);
+ mHeaderTokens[HEADER_RANGE] =
+ mTokenizer.AddCustomToken("range", mTokenizer.CASE_INSENSITIVE, false);
+ mHeaderTokens[HEADER_CONTENT_SECURITY_POLICY] = mTokenizer.AddCustomToken(
+ "content-security-policy", mTokenizer.CASE_INSENSITIVE, false);
+
+ mLFToken = mTokenizer.AddCustomToken("\n", mTokenizer.CASE_SENSITIVE, false);
+ mCRLFToken =
+ mTokenizer.AddCustomToken("\r\n", mTokenizer.CASE_SENSITIVE, false);
+
+ SwitchToControlParsing();
+
+ mBoundaryToken =
+ mTokenizer.AddCustomToken(mBoundary, mTokenizer.CASE_SENSITIVE);
+ mBoundaryTokenWithDashes =
+ mTokenizer.AddCustomToken("--"_ns + mBoundary, mTokenizer.CASE_SENSITIVE);
+
+ return NS_OK;
+}
+
+// nsIStreamListener implementation
+NS_IMETHODIMP
+nsMultiMixedConv::OnDataAvailable(nsIRequest* request, nsIInputStream* inStr,
+ uint64_t sourceOffset, uint32_t count) {
+ // Failing these assertions may indicate that some of the target listeners of
+ // this converter is looping the thead queue, which is harmful to how we
+ // collect the raw (content) data.
+ MOZ_DIAGNOSTIC_ASSERT(!mInOnDataAvailable,
+ "nsMultiMixedConv::OnDataAvailable reentered!");
+ MOZ_DIAGNOSTIC_ASSERT(
+ !mRawData, "There are unsent data from the previous tokenizer feed!");
+
+ if (mInOnDataAvailable) {
+ // The multipart logic is incapable of being reentered.
+ return NS_ERROR_UNEXPECTED;
+ }
+
+ mozilla::AutoRestore<bool> restore(mInOnDataAvailable);
+ mInOnDataAvailable = true;
+
+ nsresult rv_feed = mTokenizer.FeedInput(inStr, count);
+ // We must do this every time. Regardless if something has failed during the
+ // parsing process. Otherwise the raw data reference would not be thrown
+ // away.
+ nsresult rv_send = SendData();
+
+ return NS_FAILED(rv_send) ? rv_send : rv_feed;
+}
+
+NS_IMETHODIMP
+nsMultiMixedConv::OnStopRequest(nsIRequest* request, nsresult aStatus) {
+ nsresult rv;
+
+ if (mPartChannel) {
+ mPartChannel->SetIsLastPart();
+
+ MOZ_DIAGNOSTIC_ASSERT(
+ !mRawData, "There are unsent data from the previous tokenizer feed!");
+
+ rv = mTokenizer.FinishInput();
+ if (NS_SUCCEEDED(aStatus)) {
+ aStatus = rv;
+ }
+ rv = SendData();
+ if (NS_SUCCEEDED(aStatus)) {
+ aStatus = rv;
+ }
+
+ (void)SendStop(aStatus);
+ } else if (NS_FAILED(aStatus) && !mRequestListenerNotified) {
+ // underlying data production problem. we should not be in
+ // the middle of sending data. if we were, mPartChannel,
+ // above, would have been non-null.
+
+ (void)mFinalListener->OnStartRequest(request);
+ (void)mFinalListener->OnStopRequest(request, aStatus);
+ }
+
+ nsCOMPtr<nsIMultiPartChannelListener> multiListener =
+ do_QueryInterface(mFinalListener);
+ if (multiListener) {
+ multiListener->OnAfterLastPart(aStatus);
+ }
+
+ return NS_OK;
+}
+
+nsresult nsMultiMixedConv::ConsumeToken(Token const& token) {
+ nsresult rv;
+
+ switch (mParserState) {
+ case PREAMBLE:
+ if (token.Equals(mBoundaryTokenWithDashes)) {
+ // The server first used boundary '--boundary'. Hence, we no longer
+ // accept plain 'boundary' token as a delimiter.
+ mTokenizer.RemoveCustomToken(mBoundaryToken);
+ mParserState = BOUNDARY_CRLF;
+ break;
+ }
+ if (token.Equals(mBoundaryToken)) {
+ // And here the opposite from the just above block...
+ mTokenizer.RemoveCustomToken(mBoundaryTokenWithDashes);
+ mParserState = BOUNDARY_CRLF;
+ break;
+ }
+
+ // This is a preamble, just ignore it and wait for the boundary.
+ break;
+
+ case BOUNDARY_CRLF:
+ if (token.Equals(Token::NewLine())) {
+ mParserState = HEADER_NAME;
+ mResponseHeader = HEADER_UNKNOWN;
+ HeadersToDefault();
+ SetHeaderTokensEnabled(true);
+ break;
+ }
+ return NS_ERROR_CORRUPTED_CONTENT;
+
+ case HEADER_NAME:
+ SetHeaderTokensEnabled(false);
+ if (token.Equals(Token::NewLine())) {
+ mParserState = BODY_INIT;
+ SwitchToBodyParsing();
+ break;
+ }
+ for (uint32_t h = HEADER_CONTENT_TYPE; h < HEADER_UNKNOWN; ++h) {
+ if (token.Equals(mHeaderTokens[h])) {
+ mResponseHeader = static_cast<EHeader>(h);
+ break;
+ }
+ }
+ mParserState = HEADER_SEP;
+ break;
+
+ case HEADER_SEP:
+ if (token.Equals(Token::Char(':'))) {
+ mParserState = HEADER_VALUE;
+ mResponseHeaderValue.Truncate();
+ break;
+ }
+ if (mResponseHeader == HEADER_UNKNOWN) {
+ // If the header is not of any we understand, just pass everything till
+ // ':'
+ break;
+ }
+ if (token.Equals(Token::Whitespace())) {
+ // Accept only header-name traling whitespaces after known headers
+ break;
+ }
+ return NS_ERROR_CORRUPTED_CONTENT;
+
+ case HEADER_VALUE:
+ if (token.Equals(Token::Whitespace()) && mResponseHeaderValue.IsEmpty()) {
+ // Eat leading whitespaces
+ break;
+ }
+ if (token.Equals(Token::NewLine())) {
+ nsresult rv = ProcessHeader();
+ if (NS_FAILED(rv)) {
+ return rv;
+ }
+ mParserState = HEADER_NAME;
+ mResponseHeader = HEADER_UNKNOWN;
+ SetHeaderTokensEnabled(true);
+ } else {
+ mResponseHeaderValue.Append(token.Fragment());
+ }
+ break;
+
+ case BODY_INIT:
+ rv = SendStart();
+ if (NS_FAILED(rv)) {
+ return rv;
+ }
+ mParserState = BODY;
+ [[fallthrough]];
+
+ case BODY: {
+ if (!token.Equals(mLFToken) && !token.Equals(mCRLFToken)) {
+ if (token.Equals(mBoundaryTokenWithDashes) ||
+ token.Equals(mBoundaryToken)) {
+ // Allow CRLF to NOT be part of the boundary as well
+ SwitchToControlParsing();
+ mParserState = TRAIL_DASH1;
+ break;
+ }
+ AccumulateData(token);
+ break;
+ }
+
+ // After CRLF we must explicitly check for boundary. If found,
+ // that CRLF is part of the boundary and must not be send to the
+ // data listener.
+ Token token2;
+ if (!mTokenizer.Next(token2)) {
+ // Note: this will give us the CRLF token again when more data
+ // or OnStopRequest arrive. I.e. we will enter BODY case in
+ // the very same state as we are now and start this block over.
+ mTokenizer.NeedMoreInput();
+ break;
+ }
+ if (token2.Equals(mBoundaryTokenWithDashes) ||
+ token2.Equals(mBoundaryToken)) {
+ SwitchToControlParsing();
+ mParserState = TRAIL_DASH1;
+ break;
+ }
+
+ AccumulateData(token);
+ AccumulateData(token2);
+ break;
+ }
+
+ case TRAIL_DASH1:
+ if (token.Equals(Token::NewLine())) {
+ rv = SendStop(NS_OK);
+ if (NS_FAILED(rv)) {
+ return rv;
+ }
+ mParserState = BOUNDARY_CRLF;
+ mTokenizer.Rollback();
+ break;
+ }
+ if (token.Equals(Token::Char('-'))) {
+ mParserState = TRAIL_DASH2;
+ break;
+ }
+ return NS_ERROR_CORRUPTED_CONTENT;
+
+ case TRAIL_DASH2:
+ if (token.Equals(Token::Char('-'))) {
+ mPartChannel->SetIsLastPart();
+ // SendStop calls SendData first.
+ rv = SendStop(NS_OK);
+ if (NS_FAILED(rv)) {
+ return rv;
+ }
+ mParserState = EPILOGUE;
+ break;
+ }
+ return NS_ERROR_CORRUPTED_CONTENT;
+
+ case EPILOGUE:
+ // Just ignore
+ break;
+
+ default:
+ MOZ_ASSERT(false, "Missing parser state handling branch");
+ break;
+ } // switch
+
+ return NS_OK;
+}
+
+void nsMultiMixedConv::SetHeaderTokensEnabled(bool aEnable) {
+ for (uint32_t h = HEADER_FIRST; h < HEADER_UNKNOWN; ++h) {
+ mTokenizer.EnableCustomToken(mHeaderTokens[h], aEnable);
+ }
+}
+
+void nsMultiMixedConv::SwitchToBodyParsing() {
+ mTokenizer.SetTokenizingMode(Tokenizer::Mode::CUSTOM_ONLY);
+ mTokenizer.EnableCustomToken(mLFToken, true);
+ mTokenizer.EnableCustomToken(mCRLFToken, true);
+ mTokenizer.EnableCustomToken(mBoundaryTokenWithDashes, true);
+ mTokenizer.EnableCustomToken(mBoundaryToken, true);
+}
+
+void nsMultiMixedConv::SwitchToControlParsing() {
+ mTokenizer.SetTokenizingMode(Tokenizer::Mode::FULL);
+ mTokenizer.EnableCustomToken(mLFToken, false);
+ mTokenizer.EnableCustomToken(mCRLFToken, false);
+ mTokenizer.EnableCustomToken(mBoundaryTokenWithDashes, false);
+ mTokenizer.EnableCustomToken(mBoundaryToken, false);
+}
+
+// nsMultiMixedConv methods
+nsMultiMixedConv::nsMultiMixedConv()
+ : mCurrentPartID(0),
+ mInOnDataAvailable(false),
+ mResponseHeader(HEADER_UNKNOWN),
+ // XXX: This is a hack to bypass the raw pointer to refcounted object in
+ // lambda analysis. It should be removed and replaced when the
+ // IncrementalTokenizer API is improved to avoid the need for such
+ // workarounds.
+ //
+ // This is safe because `mTokenizer` will not outlive `this`, meaning that
+ // this std::bind object will be destroyed before `this` dies.
+ mTokenizer(std::bind(&nsMultiMixedConv::ConsumeToken, this,
+ std::placeholders::_1)) {
+ mContentLength = UINT64_MAX;
+ mByteRangeStart = 0;
+ mByteRangeEnd = 0;
+ mTotalSent = 0;
+ mIsByteRangeRequest = false;
+ mParserState = INIT;
+ mRawData = nullptr;
+ mRequestListenerNotified = false;
+}
+
+nsresult nsMultiMixedConv::SendStart() {
+ nsresult rv = NS_OK;
+
+ nsCOMPtr<nsIStreamListener> partListener(mFinalListener);
+ if (mContentType.IsEmpty()) {
+ mContentType.AssignLiteral(UNKNOWN_CONTENT_TYPE);
+ nsCOMPtr<nsIStreamConverterService> serv =
+ do_GetService(NS_STREAMCONVERTERSERVICE_CONTRACTID, &rv);
+ if (NS_SUCCEEDED(rv)) {
+ nsCOMPtr<nsIStreamListener> converter;
+ rv = serv->AsyncConvertData(UNKNOWN_CONTENT_TYPE, "*/*", mFinalListener,
+ mContext, getter_AddRefs(converter));
+ if (NS_SUCCEEDED(rv)) {
+ partListener = converter;
+ }
+ }
+ }
+
+ // if we already have an mPartChannel, that means we never sent a Stop()
+ // before starting up another "part." that would be bad.
+ MOZ_ASSERT(!mPartChannel, "tisk tisk, shouldn't be overwriting a channel");
+
+ nsPartChannel* newChannel;
+ newChannel = new nsPartChannel(mChannel, mCurrentPartID++, partListener);
+ if (!newChannel) return NS_ERROR_OUT_OF_MEMORY;
+
+ if (mIsByteRangeRequest) {
+ newChannel->InitializeByteRange(mByteRangeStart, mByteRangeEnd);
+ }
+
+ mTotalSent = 0;
+
+ // Set up the new part channel...
+ mPartChannel = newChannel;
+
+ rv = mPartChannel->SetContentType(mContentType);
+ if (NS_FAILED(rv)) return rv;
+
+ rv = mPartChannel->SetContentLength(mContentLength);
+ if (NS_FAILED(rv)) return rv;
+
+ mPartChannel->SetContentDisposition(mContentDisposition);
+
+ // Each part of a multipart/replace response can be used
+ // for the top level document. We must inform upper layers
+ // about this by setting the LOAD_REPLACE flag so that certain
+ // state assertions are evaluated as positive.
+ nsLoadFlags loadFlags = 0;
+ mPartChannel->GetLoadFlags(&loadFlags);
+ loadFlags |= nsIChannel::LOAD_REPLACE;
+ mPartChannel->SetLoadFlags(loadFlags);
+
+ nsCOMPtr<nsILoadGroup> loadGroup;
+ (void)mPartChannel->GetLoadGroup(getter_AddRefs(loadGroup));
+
+ // Add the new channel to the load group (if any)
+ if (loadGroup) {
+ rv = loadGroup->AddRequest(mPartChannel, nullptr);
+ if (NS_FAILED(rv)) return rv;
+ }
+
+ // This prevents artificial call to OnStart/StopRequest when the root
+ // channel fails. Since now it's ensured to keep with the nsIStreamListener
+ // contract every time.
+ mRequestListenerNotified = true;
+
+ // Let's start off the load. NOTE: we don't forward on the channel passed
+ // into our OnDataAvailable() as it's the root channel for the raw stream.
+ return mPartChannel->SendOnStartRequest(mContext);
+}
+
+nsresult nsMultiMixedConv::SendStop(nsresult aStatus) {
+ // Make sure we send out all accumulcated data prior call to OnStopRequest.
+ // If there is no data, this is a no-op.
+ nsresult rv = SendData();
+ if (NS_SUCCEEDED(aStatus)) {
+ aStatus = rv;
+ }
+ if (mPartChannel) {
+ rv = mPartChannel->SendOnStopRequest(mContext, aStatus);
+ // don't check for failure here, we need to remove the channel from
+ // the loadgroup.
+
+ // Remove the channel from its load group (if any)
+ nsCOMPtr<nsILoadGroup> loadGroup;
+ (void)mPartChannel->GetLoadGroup(getter_AddRefs(loadGroup));
+ if (loadGroup)
+ (void)loadGroup->RemoveRequest(mPartChannel, mContext, aStatus);
+ }
+
+ mPartChannel = nullptr;
+ return rv;
+}
+
+void nsMultiMixedConv::AccumulateData(Token const& aToken) {
+ if (!mRawData) {
+ // This is the first read of raw data during this FeedInput loop
+ // of the incremental tokenizer. All 'raw' tokens are coming from
+ // the same linear buffer, hence begining of this loop raw data
+ // is begining of the first raw token. Length of this loop raw
+ // data is just sum of all 'raw' tokens we collect during this loop.
+ //
+ // It's ensured we flush (send to to the listener via OnDataAvailable)
+ // and nullify the collected raw data right after FeedInput call.
+ // Hence, the reference can't outlive the actual buffer.
+ mRawData = aToken.Fragment().BeginReading();
+ mRawDataLength = 0;
+ }
+
+ mRawDataLength += aToken.Fragment().Length();
+}
+
+nsresult nsMultiMixedConv::SendData() {
+ nsresult rv;
+
+ if (!mRawData) {
+ return NS_OK;
+ }
+
+ nsACString::const_char_iterator rawData = mRawData;
+ mRawData = nullptr;
+
+ if (!mPartChannel) {
+ return NS_ERROR_FAILURE; // something went wrong w/ processing
+ }
+
+ if (mContentLength != UINT64_MAX) {
+ // make sure that we don't send more than the mContentLength
+ // XXX why? perhaps the Content-Length header was actually wrong!!
+ if ((uint64_t(mRawDataLength) + mTotalSent) > mContentLength)
+ mRawDataLength = static_cast<uint32_t>(mContentLength - mTotalSent);
+
+ if (mRawDataLength == 0) return NS_OK;
+ }
+
+ uint64_t offset = mTotalSent;
+ mTotalSent += mRawDataLength;
+
+ nsCOMPtr<nsIStringInputStream> ss(
+ do_CreateInstance("@mozilla.org/io/string-input-stream;1", &rv));
+ if (NS_FAILED(rv)) return rv;
+
+ rv = ss->ShareData(rawData, mRawDataLength);
+ mRawData = nullptr;
+ if (NS_FAILED(rv)) return rv;
+
+ return mPartChannel->SendOnDataAvailable(mContext, ss, offset,
+ mRawDataLength);
+}
+
+void nsMultiMixedConv::HeadersToDefault() {
+ mContentLength = UINT64_MAX;
+ mContentType.Truncate();
+ mContentDisposition.Truncate();
+ mContentSecurityPolicy.Truncate();
+ mIsByteRangeRequest = false;
+}
+
+nsresult nsMultiMixedConv::ProcessHeader() {
+ mozilla::Tokenizer p(mResponseHeaderValue);
+
+ switch (mResponseHeader) {
+ case HEADER_CONTENT_TYPE:
+ mContentType = mResponseHeaderValue;
+ mContentType.CompressWhitespace();
+ break;
+ case HEADER_CONTENT_LENGTH:
+ p.SkipWhites();
+ if (!p.ReadInteger(&mContentLength)) {
+ return NS_ERROR_CORRUPTED_CONTENT;
+ }
+ break;
+ case HEADER_CONTENT_DISPOSITION:
+ mContentDisposition = mResponseHeaderValue;
+ mContentDisposition.CompressWhitespace();
+ break;
+ case HEADER_SET_COOKIE: {
+ nsCOMPtr<nsIHttpChannelInternal> httpInternal =
+ do_QueryInterface(mChannel);
+ mResponseHeaderValue.CompressWhitespace();
+ if (httpInternal) {
+ DebugOnly<nsresult> rv = httpInternal->SetCookie(mResponseHeaderValue);
+ MOZ_ASSERT(NS_SUCCEEDED(rv));
+ }
+ break;
+ }
+ case HEADER_RANGE:
+ case HEADER_CONTENT_RANGE: {
+ if (!p.CheckWord("bytes") || !p.CheckWhite()) {
+ return NS_ERROR_CORRUPTED_CONTENT;
+ }
+ p.SkipWhites();
+ if (p.CheckChar('*')) {
+ mByteRangeStart = mByteRangeEnd = 0;
+ } else if (!p.ReadInteger(&mByteRangeStart) || !p.CheckChar('-') ||
+ !p.ReadInteger(&mByteRangeEnd)) {
+ return NS_ERROR_CORRUPTED_CONTENT;
+ }
+ mIsByteRangeRequest = true;
+ if (mContentLength == UINT64_MAX) {
+ mContentLength = uint64_t(mByteRangeEnd - mByteRangeStart + 1);
+ }
+ break;
+ }
+ case HEADER_CONTENT_SECURITY_POLICY: {
+ mContentSecurityPolicy = mResponseHeaderValue;
+ mContentSecurityPolicy.CompressWhitespace();
+ nsCOMPtr<nsIHttpChannel> httpChannel = do_QueryInterface(mChannel);
+ if (httpChannel) {
+ nsCString resultCSP = mRootContentSecurityPolicy;
+ if (!mContentSecurityPolicy.IsEmpty()) {
+ // We are updating the root channel CSP header respectively for
+ // each part as: CSP-root + CSP-partN, where N is the part number.
+ // Here we append current part's CSP to root CSP and reset CSP
+ // header for each part.
+ if (!resultCSP.IsEmpty()) {
+ resultCSP.Append(";");
+ }
+ resultCSP.Append(mContentSecurityPolicy);
+ }
+ nsresult rv = httpChannel->SetResponseHeader(
+ "Content-Security-Policy"_ns, resultCSP, false);
+ if (NS_FAILED(rv)) {
+ return NS_ERROR_CORRUPTED_CONTENT;
+ }
+ }
+ break;
+ }
+ case HEADER_UNKNOWN:
+ // We ignore anything else...
+ break;
+ }
+
+ return NS_OK;
+}
+
+nsresult NS_NewMultiMixedConv(nsMultiMixedConv** aMultiMixedConv) {
+ MOZ_ASSERT(aMultiMixedConv != nullptr, "null ptr");
+
+ RefPtr<nsMultiMixedConv> conv = new nsMultiMixedConv();
+ conv.forget(aMultiMixedConv);
+ return NS_OK;
+}
diff --git a/netwerk/streamconv/converters/nsMultiMixedConv.h b/netwerk/streamconv/converters/nsMultiMixedConv.h
new file mode 100644
index 0000000000..feb93ff360
--- /dev/null
+++ b/netwerk/streamconv/converters/nsMultiMixedConv.h
@@ -0,0 +1,256 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+#ifndef __nsmultimixedconv__h__
+#define __nsmultimixedconv__h__
+
+#include "nsIStreamConverter.h"
+#include "nsIChannel.h"
+#include "nsString.h"
+#include "nsCOMPtr.h"
+#include "nsIByteRangeRequest.h"
+#include "nsIMultiPartChannel.h"
+#include "mozilla/Attributes.h"
+#include "mozilla/IncrementalTokenizer.h"
+#include "nsHttpResponseHead.h"
+
+#define NS_MULTIMIXEDCONVERTER_CID \
+ { /* 7584CE90-5B25-11d3-A175-0050041CAF44 */ \
+ 0x7584ce90, 0x5b25, 0x11d3, { \
+ 0xa1, 0x75, 0x0, 0x50, 0x4, 0x1c, 0xaf, 0x44 \
+ } \
+ }
+
+//
+// nsPartChannel is a "dummy" channel which represents an individual part of
+// a multipart/mixed stream...
+//
+// Instances on this channel are passed out to the consumer through the
+// nsIStreamListener interface.
+//
+class nsPartChannel final : public nsIChannel,
+ public nsIByteRangeRequest,
+ public nsIMultiPartChannel {
+ public:
+ nsPartChannel(nsIChannel* aMultipartChannel, uint32_t aPartID,
+ nsIStreamListener* aListener);
+
+ void InitializeByteRange(int64_t aStart, int64_t aEnd);
+ void SetIsLastPart() { mIsLastPart = true; }
+ nsresult SendOnStartRequest(nsISupports* aContext);
+ nsresult SendOnDataAvailable(nsISupports* aContext, nsIInputStream* aStream,
+ uint64_t aOffset, uint32_t aLen);
+ nsresult SendOnStopRequest(nsISupports* aContext, nsresult aStatus);
+ /* SetContentDisposition expects the full value of the Content-Disposition
+ * header */
+ void SetContentDisposition(const nsACString& aContentDispositionHeader);
+ // TODO(ER): This appears to be dead code
+ void SetResponseHead(mozilla::net::nsHttpResponseHead* head) {
+ mResponseHead.reset(head);
+ }
+
+ NS_DECL_ISUPPORTS
+ NS_DECL_NSIREQUEST
+ NS_DECL_NSICHANNEL
+ NS_DECL_NSIBYTERANGEREQUEST
+ NS_DECL_NSIMULTIPARTCHANNEL
+
+ protected:
+ ~nsPartChannel() = default;
+
+ protected:
+ nsCOMPtr<nsIChannel> mMultipartChannel;
+ nsCOMPtr<nsIStreamListener> mListener;
+ UniquePtr<mozilla::net::nsHttpResponseHead> mResponseHead;
+
+ nsresult mStatus;
+ nsLoadFlags mLoadFlags;
+
+ nsCOMPtr<nsILoadGroup> mLoadGroup;
+
+ nsCString mContentType;
+ nsCString mContentCharset;
+ uint32_t mContentDisposition;
+ nsString mContentDispositionFilename;
+ nsCString mContentDispositionHeader;
+ uint64_t mContentLength;
+
+ bool mIsByteRangeRequest;
+ int64_t mByteRangeStart;
+ int64_t mByteRangeEnd;
+
+ uint32_t mPartID; // unique ID that can be used to identify
+ // this part of the multipart document
+ bool mIsLastPart;
+};
+
+// The nsMultiMixedConv stream converter converts a stream of type
+// "multipart/x-mixed-replace" to it's subparts. There was some debate as to
+// whether or not the functionality desired when HTTP confronted this type
+// required a stream converter. After all, this type really prompts various
+// viewer related actions rather than stream conversion. There simply needs to
+// be a piece in place that can strip out the multiple parts of a stream of this
+// type, and "display" them accordingly.
+//
+// With that said, this "stream converter" spends more time packaging up the sub
+// parts of the main stream and sending them off the destination stream
+// listener, than doing any real stream parsing/converting.
+//
+// WARNING: This converter requires that it's destination stream listener be
+// able to handle multiple OnStartRequest(), OnDataAvailable(), and
+// OnStopRequest() call combinations. Each series represents the beginning,
+// data production, and ending phase of each sub- part of the original
+// stream.
+//
+// NOTE: this MIME-type is used by HTTP, *not* SMTP, or IMAP.
+//
+// NOTE: For reference, a general description of how this MIME type should be
+// handled via HTTP, see
+// http://home.netscape.com/assist/net_sites/pushpull.html . Note that real
+// world server content deviates considerably from this overview.
+//
+// Implementation assumptions:
+// Assumed structue:
+// --BoundaryToken[\r]\n
+// content-type: foo/bar[\r]\n
+// ... (other headers if any)
+// [\r]\n (second line feed to delimit end of headers)
+// data
+// --BoundaryToken-- (end delimited by final "--")
+//
+// linebreaks can be either CRLF or LFLF. linebreaks preceding
+// boundary tokens are NOT considered part of the data. BoundaryToken
+// is any opaque string.
+//
+//
+
+class nsMultiMixedConv : public nsIStreamConverter {
+ public:
+ NS_DECL_ISUPPORTS
+ NS_DECL_NSISTREAMCONVERTER
+ NS_DECL_NSISTREAMLISTENER
+ NS_DECL_NSIREQUESTOBSERVER
+
+ explicit nsMultiMixedConv();
+
+ protected:
+ typedef mozilla::IncrementalTokenizer::Token Token;
+
+ virtual ~nsMultiMixedConv() = default;
+
+ nsresult SendStart();
+ void AccumulateData(Token const& aToken);
+ nsresult SendData();
+ nsresult SendStop(nsresult aStatus);
+
+ // member data
+ nsCOMPtr<nsIStreamListener> mFinalListener; // this guy gets the converted
+ // data via his OnDataAvailable()
+
+ nsCOMPtr<nsIChannel>
+ mChannel; // The channel as we get in in OnStartRequest call
+ RefPtr<nsPartChannel> mPartChannel; // the channel for the given part we're
+ // processing. one channel per part.
+ nsCOMPtr<nsISupports> mContext;
+ nsCString mContentType;
+ nsCString mContentDisposition;
+ nsCString mContentSecurityPolicy;
+ nsCString mRootContentSecurityPolicy;
+ uint64_t mContentLength;
+ uint64_t mTotalSent;
+
+ // The following members are for tracking the byte ranges in
+ // multipart/mixed content which specified the 'Content-Range:'
+ // header...
+ int64_t mByteRangeStart;
+ int64_t mByteRangeEnd;
+ bool mIsByteRangeRequest;
+ // This flag is set first time we create a part channel.
+ // We use it to prevent duplicated OnStopRequest call on the listener
+ // when we fail from some reason to ever create a part channel that
+ // ensures correct notifications.
+ bool mRequestListenerNotified;
+
+ uint32_t mCurrentPartID;
+
+ // Flag preventing reenter of OnDataAvailable in case the target listener
+ // ends up spinning the event loop.
+ bool mInOnDataAvailable;
+
+ // Current state of the incremental parser
+ enum EParserState {
+ PREAMBLE,
+ BOUNDARY_CRLF,
+ HEADER_NAME,
+ HEADER_SEP,
+ HEADER_VALUE,
+ BODY_INIT,
+ BODY,
+ TRAIL_DASH1,
+ TRAIL_DASH2,
+ EPILOGUE,
+
+ INIT = PREAMBLE
+ } mParserState;
+
+ // Response part header value, valid when we find a header name
+ // we recognize.
+ enum EHeader : uint32_t {
+ HEADER_FIRST,
+ HEADER_CONTENT_TYPE = HEADER_FIRST,
+ HEADER_CONTENT_LENGTH,
+ HEADER_CONTENT_DISPOSITION,
+ HEADER_SET_COOKIE,
+ HEADER_CONTENT_RANGE,
+ HEADER_RANGE,
+ HEADER_CONTENT_SECURITY_POLICY,
+ HEADER_UNKNOWN
+ } mResponseHeader;
+ // Cumulated value of a response header.
+ nsCString mResponseHeaderValue;
+
+ nsCString mBoundary;
+ mozilla::IncrementalTokenizer mTokenizer;
+
+ // When in the "body parsing" mode, see below, we cumulate raw data
+ // incrementally to mainly avoid any unnecessary granularity.
+ // mRawData points to the first byte in the tokenizer buffer where part
+ // body data begins or continues. mRawDataLength is a cumulated length
+ // of that data during a single tokenizer input feed. This is always
+ // flushed right after we fed the tokenizer.
+ nsACString::const_char_iterator mRawData;
+ nsACString::size_type mRawDataLength;
+
+ // At the start we don't know if the server will be sending boundary with
+ // or without the leading dashes.
+ Token mBoundaryToken;
+ Token mBoundaryTokenWithDashes;
+ // We need these custom tokens to allow finding CRLF when in the binary mode.
+ // CRLF before boundary is considered part of the boundary and not part of
+ // the data.
+ Token mLFToken;
+ Token mCRLFToken;
+ // Custom tokens for each of the response headers we recognize.
+ Token mHeaderTokens[HEADER_UNKNOWN];
+
+ // Resets values driven by part headers, like content type, to their defaults,
+ // called at the start of every part processing.
+ void HeadersToDefault();
+ // Processes captured value of mResponseHeader header.
+ nsresult ProcessHeader();
+ // Switches the parser and tokenizer state to "binary mode" which only
+ // searches for the 'CRLF boundary' delimiter.
+ void SwitchToBodyParsing();
+ // Switches to the default mode, we are in this mode when parsing headers and
+ // control data around the boundary delimiters.
+ void SwitchToControlParsing();
+ // Turns on or off recognition of the headers we recognize in part heads.
+ void SetHeaderTokensEnabled(bool aEnable);
+
+ // The main parser callback called by the IncrementalTokenizer
+ // instance from OnDataAvailable or OnStopRequest.
+ nsresult ConsumeToken(Token const& token);
+};
+
+#endif /* __nsmultimixedconv__h__ */
diff --git a/netwerk/streamconv/converters/nsUnknownDecoder.cpp b/netwerk/streamconv/converters/nsUnknownDecoder.cpp
new file mode 100644
index 0000000000..3ba51c8dc4
--- /dev/null
+++ b/netwerk/streamconv/converters/nsUnknownDecoder.cpp
@@ -0,0 +1,894 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "nsUnknownDecoder.h"
+#include "nsIPipe.h"
+#include "nsIInputStream.h"
+#include "nsIOutputStream.h"
+#include "nsMimeTypes.h"
+#include "nsIPrefBranch.h"
+
+#include "nsCRT.h"
+
+#include "nsIMIMEService.h"
+
+#include "nsIViewSourceChannel.h"
+#include "nsIHttpChannel.h"
+#include "nsIForcePendingChannel.h"
+#include "nsIEncodedChannel.h"
+#include "nsIURI.h"
+#include "nsStringStream.h"
+#include "nsNetCID.h"
+#include "nsNetUtil.h"
+
+#include <algorithm>
+
+#define MAX_BUFFER_SIZE 512u
+
+NS_IMPL_ISUPPORTS(nsUnknownDecoder::ConvertedStreamListener, nsIStreamListener,
+ nsIRequestObserver)
+
+nsUnknownDecoder::ConvertedStreamListener::ConvertedStreamListener(
+ nsUnknownDecoder* aDecoder) {
+ mDecoder = aDecoder;
+}
+
+nsresult nsUnknownDecoder::ConvertedStreamListener::AppendDataToString(
+ nsIInputStream* inputStream, void* closure, const char* rawSegment,
+ uint32_t toOffset, uint32_t count, uint32_t* writeCount) {
+ nsCString* decodedData = static_cast<nsCString*>(closure);
+ decodedData->Append(rawSegment, count);
+ *writeCount = count;
+ return NS_OK;
+}
+
+NS_IMETHODIMP
+nsUnknownDecoder::ConvertedStreamListener::OnStartRequest(nsIRequest* request) {
+ return NS_OK;
+}
+
+NS_IMETHODIMP
+nsUnknownDecoder::ConvertedStreamListener::OnDataAvailable(
+ nsIRequest* request, nsIInputStream* stream, uint64_t offset,
+ uint32_t count) {
+ uint32_t read;
+ nsAutoCString decodedData;
+ {
+ MutexAutoLock lock(mDecoder->mMutex);
+ decodedData = mDecoder->mDecodedData;
+ }
+ nsresult rv =
+ stream->ReadSegments(AppendDataToString, &decodedData, count, &read);
+ if (NS_FAILED(rv)) {
+ return rv;
+ }
+ MutexAutoLock lock(mDecoder->mMutex);
+ mDecoder->mDecodedData = decodedData;
+ return NS_OK;
+}
+
+NS_IMETHODIMP
+nsUnknownDecoder::ConvertedStreamListener::OnStopRequest(nsIRequest* request,
+ nsresult status) {
+ return NS_OK;
+}
+
+nsUnknownDecoder::nsUnknownDecoder()
+ : mBuffer(nullptr),
+ mBufferLen(0),
+ mRequireHTMLsuffix(false),
+ mMutex("nsUnknownDecoder"),
+ mDecodedData("") {
+ nsCOMPtr<nsIPrefBranch> prefs = do_GetService(NS_PREFSERVICE_CONTRACTID);
+ if (prefs) {
+ bool val;
+ if (NS_SUCCEEDED(prefs->GetBoolPref("security.requireHTMLsuffix", &val)))
+ mRequireHTMLsuffix = val;
+ }
+}
+
+nsUnknownDecoder::~nsUnknownDecoder() {
+ if (mBuffer) {
+ delete[] mBuffer;
+ mBuffer = nullptr;
+ }
+}
+
+// ----
+//
+// nsISupports implementation...
+//
+// ----
+
+NS_IMPL_ADDREF(nsUnknownDecoder)
+NS_IMPL_RELEASE(nsUnknownDecoder)
+
+NS_INTERFACE_MAP_BEGIN(nsUnknownDecoder)
+ NS_INTERFACE_MAP_ENTRY(nsIStreamConverter)
+ NS_INTERFACE_MAP_ENTRY(nsIStreamListener)
+ NS_INTERFACE_MAP_ENTRY(nsIRequestObserver)
+ NS_INTERFACE_MAP_ENTRY(nsIContentSniffer)
+ NS_INTERFACE_MAP_ENTRY(nsIThreadRetargetableStreamListener)
+ NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports, nsIStreamListener)
+NS_INTERFACE_MAP_END
+
+// ----
+//
+// nsIStreamConverter methods...
+//
+// ----
+
+NS_IMETHODIMP
+nsUnknownDecoder::Convert(nsIInputStream* aFromStream, const char* aFromType,
+ const char* aToType, nsISupports* aCtxt,
+ nsIInputStream** aResultStream) {
+ return NS_ERROR_NOT_IMPLEMENTED;
+}
+
+NS_IMETHODIMP
+nsUnknownDecoder::AsyncConvertData(const char* aFromType, const char* aToType,
+ nsIStreamListener* aListener,
+ nsISupports* aCtxt) {
+ NS_ASSERTION(aListener && aFromType && aToType,
+ "null pointer passed into multi mixed converter");
+ // hook up our final listener. this guy gets the various On*() calls we want
+ // to throw at him.
+ //
+
+ MutexAutoLock lock(mMutex);
+ mNextListener = aListener;
+ return (aListener) ? NS_OK : NS_ERROR_FAILURE;
+}
+
+NS_IMETHODIMP
+nsUnknownDecoder::GetConvertedType(const nsACString& aFromType,
+ nsIChannel* aChannel, nsACString& aToType) {
+ return NS_ERROR_NOT_IMPLEMENTED;
+}
+
+// ----
+//
+// nsIStreamListener methods...
+//
+// ----
+
+NS_IMETHODIMP
+nsUnknownDecoder::OnDataAvailable(nsIRequest* request, nsIInputStream* aStream,
+ uint64_t aSourceOffset, uint32_t aCount) {
+ nsresult rv = NS_OK;
+
+ bool contentTypeEmpty;
+ {
+ MutexAutoLock lock(mMutex);
+ if (!mNextListener) return NS_ERROR_FAILURE;
+
+ contentTypeEmpty = mContentType.IsEmpty();
+ }
+
+ if (contentTypeEmpty) {
+ uint32_t count, len;
+
+ // If the buffer has not been allocated by now, just fail...
+ if (!mBuffer) return NS_ERROR_OUT_OF_MEMORY;
+
+ //
+ // Determine how much of the stream should be read to fill up the
+ // sniffer buffer...
+ //
+ if (mBufferLen + aCount >= MAX_BUFFER_SIZE) {
+ count = MAX_BUFFER_SIZE - mBufferLen;
+ } else {
+ count = aCount;
+ }
+
+ // Read the data into the buffer...
+ rv = aStream->Read((mBuffer + mBufferLen), count, &len);
+ if (NS_FAILED(rv)) return rv;
+
+ mBufferLen += len;
+ aCount -= len;
+
+ if (aCount) {
+ //
+ // Adjust the source offset... The call to FireListenerNotifications(...)
+ // will make the first OnDataAvailable(...) call with an offset of 0.
+ // So, this offset needs to be adjusted to reflect that...
+ //
+ aSourceOffset += mBufferLen;
+
+ DetermineContentType(request);
+
+ rv = FireListenerNotifications(request, nullptr);
+ }
+ }
+
+ // Must not fire ODA again if it failed once
+ if (aCount && NS_SUCCEEDED(rv)) {
+#ifdef DEBUG
+ {
+ MutexAutoLock lock(mMutex);
+ NS_ASSERTION(!mContentType.IsEmpty(),
+ "Content type should be known by now.");
+ }
+#endif
+
+ nsCOMPtr<nsIStreamListener> listener;
+ {
+ MutexAutoLock lock(mMutex);
+ listener = mNextListener;
+ }
+ rv = listener->OnDataAvailable(request, aStream, aSourceOffset, aCount);
+ }
+
+ return rv;
+}
+
+// ----
+//
+// nsIRequestObserver methods...
+//
+// ----
+
+NS_IMETHODIMP
+nsUnknownDecoder::OnStartRequest(nsIRequest* request) {
+ nsresult rv = NS_OK;
+
+ {
+ MutexAutoLock lock(mMutex);
+ if (!mNextListener) return NS_ERROR_FAILURE;
+ }
+
+ // Allocate the sniffer buffer...
+ if (NS_SUCCEEDED(rv) && !mBuffer) {
+ mBuffer = new char[MAX_BUFFER_SIZE];
+
+ if (!mBuffer) {
+ rv = NS_ERROR_OUT_OF_MEMORY;
+ }
+ }
+
+ // Do not pass the OnStartRequest on to the next listener (yet)...
+ return rv;
+}
+
+NS_IMETHODIMP
+nsUnknownDecoder::OnStopRequest(nsIRequest* request, nsresult aStatus) {
+ nsresult rv = NS_OK;
+
+ bool contentTypeEmpty;
+ {
+ MutexAutoLock lock(mMutex);
+ if (!mNextListener) return NS_ERROR_FAILURE;
+
+ contentTypeEmpty = mContentType.IsEmpty();
+ }
+
+ //
+ // The total amount of data is less than the size of the sniffer buffer.
+ // Analyze the buffer now...
+ //
+ if (contentTypeEmpty) {
+ DetermineContentType(request);
+
+ // Make sure channel listeners see channel as pending while we call
+ // OnStartRequest/OnDataAvailable, even though the underlying channel
+ // has already hit OnStopRequest.
+ nsCOMPtr<nsIForcePendingChannel> forcePendingChannel =
+ do_QueryInterface(request);
+ if (forcePendingChannel) {
+ forcePendingChannel->ForcePending(true);
+ }
+
+ rv = FireListenerNotifications(request, nullptr);
+
+ if (NS_FAILED(rv)) {
+ aStatus = rv;
+ }
+
+ // now we need to set pending state to false before calling OnStopRequest
+ if (forcePendingChannel) {
+ forcePendingChannel->ForcePending(false);
+ }
+ }
+
+ nsCOMPtr<nsIStreamListener> listener;
+ {
+ MutexAutoLock lock(mMutex);
+ listener = mNextListener;
+ mNextListener = nullptr;
+ }
+ rv = listener->OnStopRequest(request, aStatus);
+
+ return rv;
+}
+
+// ----
+//
+// nsIContentSniffer methods...
+//
+// ----
+NS_IMETHODIMP
+nsUnknownDecoder::GetMIMETypeFromContent(nsIRequest* aRequest,
+ const uint8_t* aData, uint32_t aLength,
+ nsACString& type) {
+ // This is only used by sniffer, therefore we do not need to lock anything
+ // here.
+ nsCOMPtr<nsIChannel> channel(do_QueryInterface(aRequest));
+ if (channel) {
+ nsCOMPtr<nsILoadInfo> loadInfo = channel->LoadInfo();
+ if (loadInfo->GetSkipContentSniffing()) {
+ return NS_ERROR_NOT_AVAILABLE;
+ }
+ }
+
+ mBuffer = const_cast<char*>(reinterpret_cast<const char*>(aData));
+ mBufferLen = aLength;
+ DetermineContentType(aRequest);
+ mBuffer = nullptr;
+ mBufferLen = 0;
+ type.Assign(mContentType);
+ mContentType.Truncate();
+ return type.IsEmpty() ? NS_ERROR_NOT_AVAILABLE : NS_OK;
+}
+
+// Actual sniffing code
+
+bool nsUnknownDecoder::AllowSniffing(nsIRequest* aRequest) {
+ if (!mRequireHTMLsuffix) {
+ return true;
+ }
+
+ nsCOMPtr<nsIChannel> channel = do_QueryInterface(aRequest);
+ if (!channel) {
+ NS_ERROR("QI failed");
+ return false;
+ }
+
+ nsCOMPtr<nsIURI> uri;
+ if (NS_FAILED(channel->GetURI(getter_AddRefs(uri))) || !uri) {
+ return false;
+ }
+
+ nsCOMPtr<nsILoadInfo> loadInfo = channel->LoadInfo();
+ if (loadInfo->GetSkipContentSniffing()) {
+ return false;
+ }
+
+ return !uri->SchemeIs("file");
+}
+
+/**
+ * This is the array of sniffer entries that depend on "magic numbers"
+ * in the file. Each entry has either a type associated with it (set
+ * these with the SNIFFER_ENTRY macro) or a function to be executed
+ * (set these with the SNIFFER_ENTRY_WITH_FUNC macro). The function
+ * should take a single nsIRequest* and returns bool -- true if
+ * it sets mContentType, false otherwise
+ */
+nsUnknownDecoder::nsSnifferEntry nsUnknownDecoder::sSnifferEntries[] = {
+ SNIFFER_ENTRY("%PDF-", APPLICATION_PDF),
+
+ SNIFFER_ENTRY("%!PS-Adobe-", APPLICATION_POSTSCRIPT),
+
+ // Files that start with mailbox delimiters let's provisionally call
+ // text/plain
+ SNIFFER_ENTRY("From", TEXT_PLAIN), SNIFFER_ENTRY(">From", TEXT_PLAIN),
+
+ // If the buffer begins with "#!" or "%!" then it is a script of
+ // some sort... "Scripts" can include arbitrary data to be passed
+ // to an interpreter, so we need to decide whether we can call this
+ // text or whether it's data.
+ SNIFFER_ENTRY_WITH_FUNC("#!", &nsUnknownDecoder::LastDitchSniff),
+
+ // XXXbz should (and can) we also include the various ways that <?xml can
+ // appear as UTF-16 and such? See http://www.w3.org/TR/REC-xml#sec-guessing
+ SNIFFER_ENTRY_WITH_FUNC("<?xml", &nsUnknownDecoder::SniffForXML)};
+
+uint32_t nsUnknownDecoder::sSnifferEntryNum =
+ sizeof(nsUnknownDecoder::sSnifferEntries) /
+ sizeof(nsUnknownDecoder::nsSnifferEntry);
+
+void nsUnknownDecoder::DetermineContentType(nsIRequest* aRequest) {
+ {
+ MutexAutoLock lock(mMutex);
+ NS_ASSERTION(mContentType.IsEmpty(), "Content type is already known.");
+ if (!mContentType.IsEmpty()) return;
+ }
+
+ nsCOMPtr<nsIChannel> channel(do_QueryInterface(aRequest));
+ if (channel) {
+ nsCOMPtr<nsILoadInfo> loadInfo = channel->LoadInfo();
+ if (loadInfo->GetSkipContentSniffing()) {
+ /*
+ * If we did not get a useful Content-Type from the server
+ * but also have sniffing disabled, just determine whether
+ * to use text/plain or octetstream and log an error to the Console
+ */
+ LastDitchSniff(aRequest);
+
+ nsCOMPtr<nsIHttpChannel> httpChannel(do_QueryInterface(aRequest));
+ if (httpChannel) {
+ nsAutoCString type;
+ httpChannel->GetContentType(type);
+ nsCOMPtr<nsIURI> requestUri;
+ httpChannel->GetURI(getter_AddRefs(requestUri));
+ nsAutoCString spec;
+ requestUri->GetSpec(spec);
+ if (spec.Length() > 50) {
+ spec.Truncate(50);
+ spec.AppendLiteral("...");
+ }
+ httpChannel->LogMimeTypeMismatch(
+ "XTCOWithMIMEValueMissing"_ns, false, NS_ConvertUTF8toUTF16(spec),
+ // Type is not used in the Error Message but required
+ NS_ConvertUTF8toUTF16(type));
+ }
+ return;
+ }
+ }
+
+ const char* testData = mBuffer;
+ uint32_t testDataLen = mBufferLen;
+ // Check if data are compressed.
+ nsAutoCString decodedData;
+
+ if (channel) {
+ // ConvertEncodedData is always called only on a single thread for each
+ // instance of an object.
+ nsresult rv = ConvertEncodedData(aRequest, mBuffer, mBufferLen);
+ if (NS_SUCCEEDED(rv)) {
+ MutexAutoLock lock(mMutex);
+ decodedData = mDecodedData;
+ }
+ if (!decodedData.IsEmpty()) {
+ testData = decodedData.get();
+ testDataLen = std::min(decodedData.Length(), MAX_BUFFER_SIZE);
+ }
+ }
+
+ // First, run through all the types we can detect reliably based on
+ // magic numbers
+ uint32_t i;
+ for (i = 0; i < sSnifferEntryNum; ++i) {
+ if (testDataLen >= sSnifferEntries[i].mByteLen && // enough data
+ memcmp(testData, sSnifferEntries[i].mBytes,
+ sSnifferEntries[i].mByteLen) == 0) { // and type matches
+ NS_ASSERTION(
+ sSnifferEntries[i].mMimeType ||
+ sSnifferEntries[i].mContentTypeSniffer,
+ "Must have either a type string or a function to set the type");
+ NS_ASSERTION(!sSnifferEntries[i].mMimeType ||
+ !sSnifferEntries[i].mContentTypeSniffer,
+ "Both a type string and a type sniffing function set;"
+ " using type string");
+ if (sSnifferEntries[i].mMimeType) {
+ MutexAutoLock lock(mMutex);
+ mContentType = sSnifferEntries[i].mMimeType;
+ NS_ASSERTION(!mContentType.IsEmpty(),
+ "Content type should be known by now.");
+ return;
+ }
+ if ((this->*(sSnifferEntries[i].mContentTypeSniffer))(aRequest)) {
+#ifdef DEBUG
+ MutexAutoLock lock(mMutex);
+ NS_ASSERTION(!mContentType.IsEmpty(),
+ "Content type should be known by now.");
+#endif
+ return;
+ }
+ }
+ }
+
+ nsAutoCString sniffedType;
+ NS_SniffContent(NS_DATA_SNIFFER_CATEGORY, aRequest, (const uint8_t*)testData,
+ testDataLen, sniffedType);
+ {
+ MutexAutoLock lock(mMutex);
+ mContentType = sniffedType;
+ if (!mContentType.IsEmpty()) {
+ return;
+ }
+ }
+
+ if (SniffForHTML(aRequest)) {
+#ifdef DEBUG
+ MutexAutoLock lock(mMutex);
+ NS_ASSERTION(!mContentType.IsEmpty(),
+ "Content type should be known by now.");
+#endif
+ return;
+ }
+
+ // We don't know what this is yet. Before we just give up, try
+ // the URI from the request.
+ if (SniffURI(aRequest)) {
+#ifdef DEBUG
+ MutexAutoLock lock(mMutex);
+ NS_ASSERTION(!mContentType.IsEmpty(),
+ "Content type should be known by now.");
+#endif
+ return;
+ }
+
+ LastDitchSniff(aRequest);
+#ifdef DEBUG
+ MutexAutoLock lock(mMutex);
+ NS_ASSERTION(!mContentType.IsEmpty(), "Content type should be known by now.");
+#endif
+}
+
+bool nsUnknownDecoder::SniffForHTML(nsIRequest* aRequest) {
+ /*
+ * To prevent a possible attack, we will not consider this to be
+ * html content if it comes from the local file system and our prefs
+ * are set right
+ */
+ if (!AllowSniffing(aRequest)) {
+ return false;
+ }
+
+ MutexAutoLock lock(mMutex);
+
+ // Now look for HTML.
+ const char* str;
+ const char* end;
+ if (mDecodedData.IsEmpty()) {
+ str = mBuffer;
+ end = mBuffer + mBufferLen;
+ } else {
+ str = mDecodedData.get();
+ end = mDecodedData.get() + std::min(mDecodedData.Length(), MAX_BUFFER_SIZE);
+ }
+
+ // skip leading whitespace
+ while (str != end && nsCRT::IsAsciiSpace(*str)) {
+ ++str;
+ }
+
+ // did we find something like a start tag?
+ if (str == end || *str != '<' || ++str == end) {
+ return false;
+ }
+
+ // If we seem to be SGML or XML and we got down here, just pretend we're HTML
+ if (*str == '!' || *str == '?') {
+ mContentType = TEXT_HTML;
+ return true;
+ }
+
+ uint32_t bufSize = end - str;
+ // We use sizeof(_tagstr) below because that's the length of _tagstr
+ // with the one char " " or ">" appended.
+#define MATCHES_TAG(_tagstr) \
+ (bufSize >= sizeof(_tagstr) && \
+ (PL_strncasecmp(str, _tagstr " ", sizeof(_tagstr)) == 0 || \
+ PL_strncasecmp(str, _tagstr ">", sizeof(_tagstr)) == 0))
+
+ if (MATCHES_TAG("html") || MATCHES_TAG("frameset") || MATCHES_TAG("body") ||
+ MATCHES_TAG("head") || MATCHES_TAG("script") || MATCHES_TAG("iframe") ||
+ MATCHES_TAG("a") || MATCHES_TAG("img") || MATCHES_TAG("table") ||
+ MATCHES_TAG("title") || MATCHES_TAG("link") || MATCHES_TAG("base") ||
+ MATCHES_TAG("style") || MATCHES_TAG("div") || MATCHES_TAG("p") ||
+ MATCHES_TAG("font") || MATCHES_TAG("applet") || MATCHES_TAG("meta") ||
+ MATCHES_TAG("center") || MATCHES_TAG("form") || MATCHES_TAG("isindex") ||
+ MATCHES_TAG("h1") || MATCHES_TAG("h2") || MATCHES_TAG("h3") ||
+ MATCHES_TAG("h4") || MATCHES_TAG("h5") || MATCHES_TAG("h6") ||
+ MATCHES_TAG("b") || MATCHES_TAG("pre")) {
+ mContentType = TEXT_HTML;
+ return true;
+ }
+
+#undef MATCHES_TAG
+
+ return false;
+}
+
+bool nsUnknownDecoder::SniffForXML(nsIRequest* aRequest) {
+ // Just like HTML, this should be able to be shut off.
+ if (!AllowSniffing(aRequest)) {
+ return false;
+ }
+
+ // First see whether we can glean anything from the uri...
+ if (!SniffURI(aRequest)) {
+ // Oh well; just generic XML will have to do
+ MutexAutoLock lock(mMutex);
+ mContentType = TEXT_XML;
+ }
+
+ return true;
+}
+
+bool nsUnknownDecoder::SniffURI(nsIRequest* aRequest) {
+ nsCOMPtr<nsIChannel> channel(do_QueryInterface(aRequest));
+ nsCOMPtr<nsILoadInfo> loadInfo = channel->LoadInfo();
+ if (loadInfo->GetSkipContentSniffing()) {
+ return false;
+ }
+ nsCOMPtr<nsIMIMEService> mimeService(do_GetService("@mozilla.org/mime;1"));
+ if (mimeService) {
+ nsCOMPtr<nsIChannel> channel = do_QueryInterface(aRequest);
+ if (channel) {
+ nsCOMPtr<nsIURI> uri;
+ nsresult result = channel->GetURI(getter_AddRefs(uri));
+ if (NS_SUCCEEDED(result) && uri) {
+ nsAutoCString type;
+ result = mimeService->GetTypeFromURI(uri, type);
+ if (NS_SUCCEEDED(result)) {
+ MutexAutoLock lock(mMutex);
+ mContentType = type;
+ return true;
+ }
+ }
+ }
+ }
+
+ return false;
+}
+
+// This macro is based on RFC 2046 Section 4.1.2. Treat any char 0-31
+// except the 9-13 range (\t, \n, \v, \f, \r) and char 27 (used by
+// encodings like Shift_JIS) as non-text
+#define IS_TEXT_CHAR(ch) \
+ (((unsigned char)(ch)) > 31 || (9 <= (ch) && (ch) <= 13) || (ch) == 27)
+
+bool nsUnknownDecoder::LastDitchSniff(nsIRequest* aRequest) {
+ // All we can do now is try to guess whether this is text/plain or
+ // application/octet-stream
+
+ MutexAutoLock lock(mMutex);
+
+ const char* testData;
+ uint32_t testDataLen;
+ if (mDecodedData.IsEmpty()) {
+ testData = mBuffer;
+ // Since some legacy text files end with 0x1A, reading the entire buffer
+ // will lead misdetection.
+ testDataLen = std::min<uint32_t>(mBufferLen, MAX_BUFFER_SIZE);
+ } else {
+ testData = mDecodedData.get();
+ testDataLen = std::min(mDecodedData.Length(), MAX_BUFFER_SIZE);
+ }
+
+ // First, check for a BOM. If we see one, assume this is text/plain
+ // in whatever encoding. If there is a BOM _and_ text we will
+ // always have at least 4 bytes in the buffer (since the 2-byte BOMs
+ // are for 2-byte encodings and the UTF-8 BOM is 3 bytes).
+ if (testDataLen >= 4) {
+ const unsigned char* buf = (const unsigned char*)testData;
+ if ((buf[0] == 0xFE && buf[1] == 0xFF) || // UTF-16, Big Endian
+ (buf[0] == 0xFF && buf[1] == 0xFE) || // UTF-16 or UCS-4, Little Endian
+ (buf[0] == 0xEF && buf[1] == 0xBB && buf[2] == 0xBF) || // UTF-8
+ (buf[0] == 0 && buf[1] == 0 && buf[2] == 0xFE &&
+ buf[3] == 0xFF)) { // UCS-4, Big Endian
+
+ mContentType = TEXT_PLAIN;
+ return true;
+ }
+ }
+
+ // Now see whether the buffer has any non-text chars. If not, then let's
+ // just call it text/plain...
+ //
+ uint32_t i;
+ for (i = 0; i < testDataLen && IS_TEXT_CHAR(testData[i]); i++) {
+ }
+
+ if (i == testDataLen) {
+ mContentType = TEXT_PLAIN;
+ } else {
+ mContentType = APPLICATION_OCTET_STREAM;
+ }
+
+ return true;
+}
+
+nsresult nsUnknownDecoder::FireListenerNotifications(nsIRequest* request,
+ nsISupports* aCtxt) {
+ nsresult rv = NS_OK;
+
+ nsCOMPtr<nsIStreamListener> listener;
+ nsAutoCString contentType;
+ {
+ MutexAutoLock lock(mMutex);
+ if (!mNextListener) return NS_ERROR_FAILURE;
+
+ listener = mNextListener;
+ contentType = mContentType;
+ }
+
+ if (!contentType.IsEmpty()) {
+ nsCOMPtr<nsIViewSourceChannel> viewSourceChannel =
+ do_QueryInterface(request);
+ if (viewSourceChannel) {
+ rv = viewSourceChannel->SetOriginalContentType(contentType);
+ } else {
+ nsCOMPtr<nsIChannel> channel = do_QueryInterface(request, &rv);
+ if (NS_SUCCEEDED(rv)) {
+ // Set the new content type on the channel...
+ rv = channel->SetContentType(contentType);
+ }
+ }
+
+ NS_ASSERTION(NS_SUCCEEDED(rv), "Unable to set content type on channel!");
+
+ if (NS_FAILED(rv)) {
+ // Cancel the request to make sure it has the correct status if
+ // mNextListener looks at it.
+ request->Cancel(rv);
+ listener->OnStartRequest(request);
+ return rv;
+ }
+ }
+
+ // Fire the OnStartRequest(...)
+ rv = listener->OnStartRequest(request);
+
+ if (NS_SUCCEEDED(rv)) {
+ // install stream converter if required
+ nsCOMPtr<nsIEncodedChannel> encodedChannel = do_QueryInterface(request);
+ if (encodedChannel) {
+ nsCOMPtr<nsIStreamListener> listenerNew;
+ rv = encodedChannel->DoApplyContentConversions(
+ listener, getter_AddRefs(listenerNew), aCtxt);
+ if (NS_SUCCEEDED(rv) && listenerNew) {
+ MutexAutoLock lock(mMutex);
+ mNextListener = listenerNew;
+ listener = listenerNew;
+ }
+ }
+ }
+
+ if (!mBuffer) return NS_ERROR_OUT_OF_MEMORY;
+
+ // If the request was canceled, then we need to treat that equivalently
+ // to an error returned by OnStartRequest.
+ if (NS_SUCCEEDED(rv)) request->GetStatus(&rv);
+
+ // Fire the first OnDataAvailable for the data that was read from the
+ // stream into the sniffer buffer...
+ if (NS_SUCCEEDED(rv) && (mBufferLen > 0)) {
+ uint32_t len = 0;
+ nsCOMPtr<nsIInputStream> in;
+ nsCOMPtr<nsIOutputStream> out;
+
+ // Create a pipe and fill it with the data from the sniffer buffer.
+ rv = NS_NewPipe(getter_AddRefs(in), getter_AddRefs(out), MAX_BUFFER_SIZE,
+ MAX_BUFFER_SIZE);
+
+ if (NS_SUCCEEDED(rv)) {
+ rv = out->Write(mBuffer, mBufferLen, &len);
+ if (NS_SUCCEEDED(rv)) {
+ if (len == mBufferLen) {
+ rv = listener->OnDataAvailable(request, in, 0, len);
+ } else {
+ NS_ERROR("Unable to write all the data into the pipe.");
+ rv = NS_ERROR_FAILURE;
+ }
+ }
+ }
+ }
+
+ delete[] mBuffer;
+ mBuffer = nullptr;
+ mBufferLen = 0;
+
+ return rv;
+}
+
+nsresult nsUnknownDecoder::ConvertEncodedData(nsIRequest* request,
+ const char* data,
+ uint32_t length) {
+ nsresult rv = NS_OK;
+
+ {
+ MutexAutoLock lock(mMutex);
+ mDecodedData = "";
+ }
+ nsCOMPtr<nsIEncodedChannel> encodedChannel(do_QueryInterface(request));
+ if (encodedChannel) {
+ RefPtr<ConvertedStreamListener> strListener =
+ new ConvertedStreamListener(this);
+
+ nsCOMPtr<nsIStreamListener> listener;
+ rv = encodedChannel->DoApplyContentConversions(
+ strListener, getter_AddRefs(listener), nullptr);
+
+ if (NS_FAILED(rv)) {
+ return rv;
+ }
+
+ if (listener) {
+ listener->OnStartRequest(request);
+
+ if (length) {
+ nsCOMPtr<nsIStringInputStream> rawStream =
+ do_CreateInstance(NS_STRINGINPUTSTREAM_CONTRACTID);
+ if (!rawStream) return NS_ERROR_FAILURE;
+
+ rv = rawStream->SetData((const char*)data, length);
+ NS_ENSURE_SUCCESS(rv, rv);
+
+ rv = listener->OnDataAvailable(request, rawStream, 0, length);
+ NS_ENSURE_SUCCESS(rv, rv);
+ }
+
+ listener->OnStopRequest(request, NS_OK);
+ }
+ }
+ return rv;
+}
+
+//
+// nsIThreadRetargetableStreamListener methods
+//
+NS_IMETHODIMP
+nsUnknownDecoder::CheckListenerChain() {
+ nsCOMPtr<nsIThreadRetargetableStreamListener> listener;
+ {
+ MutexAutoLock lock(mMutex);
+ listener = do_QueryInterface(mNextListener);
+ }
+ if (!listener) {
+ return NS_ERROR_NO_INTERFACE;
+ }
+
+ return listener->CheckListenerChain();
+}
+
+void nsBinaryDetector::DetermineContentType(nsIRequest* aRequest) {
+ nsCOMPtr<nsIHttpChannel> httpChannel = do_QueryInterface(aRequest);
+ if (!httpChannel) {
+ return;
+ }
+
+ nsCOMPtr<nsILoadInfo> loadInfo = httpChannel->LoadInfo();
+ if (loadInfo->GetSkipContentSniffing()) {
+ LastDitchSniff(aRequest);
+ return;
+ }
+ // It's an HTTP channel. Check for the text/plain mess
+ nsAutoCString contentTypeHdr;
+ Unused << httpChannel->GetResponseHeader("Content-Type"_ns, contentTypeHdr);
+ nsAutoCString contentType;
+ httpChannel->GetContentType(contentType);
+
+ // Make sure to do a case-sensitive exact match comparison here. Apache
+ // 1.x just sends text/plain for "unknown", while Apache 2.x sends
+ // text/plain with a ISO-8859-1 charset. Debian's Apache version, just to
+ // be different, sends text/plain with iso-8859-1 charset. For extra fun,
+ // FC7, RHEL4, and Ubuntu Feisty send charset=UTF-8. Don't do general
+ // case-insensitive comparison, since we really want to apply this crap as
+ // rarely as we can.
+ if (!contentType.EqualsLiteral("text/plain") ||
+ (!contentTypeHdr.EqualsLiteral("text/plain") &&
+ !contentTypeHdr.EqualsLiteral("text/plain; charset=ISO-8859-1") &&
+ !contentTypeHdr.EqualsLiteral("text/plain; charset=iso-8859-1") &&
+ !contentTypeHdr.EqualsLiteral("text/plain; charset=UTF-8"))) {
+ return;
+ }
+
+ // Check whether we have content-encoding. If we do, don't try to
+ // detect the type.
+ // XXXbz we could improve this by doing a local decompress if we
+ // wanted, I'm sure.
+ nsAutoCString contentEncoding;
+ Unused << httpChannel->GetResponseHeader("Content-Encoding"_ns,
+ contentEncoding);
+ if (!contentEncoding.IsEmpty()) {
+ return;
+ }
+
+ LastDitchSniff(aRequest);
+ MutexAutoLock lock(mMutex);
+ if (mContentType.EqualsLiteral(APPLICATION_OCTET_STREAM)) {
+ // We want to guess at it instead
+ mContentType = APPLICATION_GUESS_FROM_EXT;
+ } else {
+ // Let the text/plain type we already have be, so that other content
+ // sniffers can also get a shot at this data.
+ mContentType.Truncate();
+ }
+}
diff --git a/netwerk/streamconv/converters/nsUnknownDecoder.h b/netwerk/streamconv/converters/nsUnknownDecoder.h
new file mode 100644
index 0000000000..3c46d52414
--- /dev/null
+++ b/netwerk/streamconv/converters/nsUnknownDecoder.h
@@ -0,0 +1,166 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef nsUnknownDecoder_h__
+#define nsUnknownDecoder_h__
+
+#include "nsIStreamConverter.h"
+#include "nsIThreadRetargetableStreamListener.h"
+#include "nsIContentSniffer.h"
+#include "mozilla/Mutex.h"
+#include "mozilla/Atomics.h"
+
+#include "nsCOMPtr.h"
+#include "nsString.h"
+
+#define NS_UNKNOWNDECODER_CID \
+ { /* 7d7008a0-c49a-11d3-9b22-0080c7cb1080 */ \
+ 0x7d7008a0, 0xc49a, 0x11d3, { \
+ 0x9b, 0x22, 0x00, 0x80, 0xc7, 0xcb, 0x10, 0x80 \
+ } \
+ }
+
+class nsUnknownDecoder : public nsIStreamConverter,
+ public nsIContentSniffer,
+ public nsIThreadRetargetableStreamListener {
+ public:
+ // nsISupports methods
+ NS_DECL_ISUPPORTS
+
+ // nsIStreamConverter methods
+ NS_DECL_NSISTREAMCONVERTER
+
+ // nsIStreamListener methods
+ NS_DECL_NSISTREAMLISTENER
+
+ // nsIRequestObserver methods
+ NS_DECL_NSIREQUESTOBSERVER
+
+ // nsIContentSniffer methods
+ NS_DECL_NSICONTENTSNIFFER
+
+ // nsIThreadRetargetableStreamListener methods
+ NS_DECL_NSITHREADRETARGETABLESTREAMLISTENER
+
+ nsUnknownDecoder();
+
+ protected:
+ virtual ~nsUnknownDecoder();
+
+ virtual void DetermineContentType(nsIRequest* aRequest);
+ nsresult FireListenerNotifications(nsIRequest* request, nsISupports* aCtxt);
+
+ class ConvertedStreamListener : public nsIStreamListener {
+ public:
+ explicit ConvertedStreamListener(nsUnknownDecoder* aDecoder);
+
+ NS_DECL_ISUPPORTS
+ NS_DECL_NSIREQUESTOBSERVER
+ NS_DECL_NSISTREAMLISTENER
+
+ private:
+ virtual ~ConvertedStreamListener() = default;
+ static nsresult AppendDataToString(nsIInputStream* inputStream,
+ void* closure, const char* rawSegment,
+ uint32_t toOffset, uint32_t count,
+ uint32_t* writeCount);
+ nsUnknownDecoder* mDecoder;
+ };
+
+ protected:
+ nsCOMPtr<nsIStreamListener> mNextListener;
+
+ // Function to use to check whether sniffing some potentially
+ // dangerous types (eg HTML) is ok for this request. We can disable
+ // sniffing for local files if needed using this. Just a security
+ // precation thingy... who knows when we suddenly need to flip this
+ // pref?
+ bool AllowSniffing(nsIRequest* aRequest);
+
+ // Various sniffer functions. Returning true means that a type
+ // was determined; false means no luck.
+ bool SniffForHTML(nsIRequest* aRequest);
+ bool SniffForXML(nsIRequest* aRequest);
+
+ // SniffURI guesses at the content type based on the URI (typically
+ // using the extentsion)
+ bool SniffURI(nsIRequest* aRequest);
+
+ // LastDitchSniff guesses at text/plain vs. application/octet-stream
+ // by just looking at whether the data contains null bytes, and
+ // maybe at the fraction of chars with high bit set. Use this only
+ // as a last-ditch attempt to decide a content type!
+ bool LastDitchSniff(nsIRequest* aRequest);
+
+ /**
+ * An entry struct for our array of sniffers. Each entry has either
+ * a type associated with it (set these with the SNIFFER_ENTRY macro)
+ * or a function to be executed (set these with the
+ * SNIFFER_ENTRY_WITH_FUNC macro). The function should take a single
+ * nsIRequest* and returns bool -- true if it sets mContentType,
+ * false otherwise
+ */
+ struct nsSnifferEntry {
+ typedef bool (nsUnknownDecoder::*TypeSniffFunc)(nsIRequest* aRequest);
+
+ const char* mBytes;
+ uint32_t mByteLen;
+
+ // Exactly one of mMimeType and mContentTypeSniffer should be set non-null
+ const char* mMimeType;
+ TypeSniffFunc mContentTypeSniffer;
+ };
+
+#define SNIFFER_ENTRY(_bytes, _type) \
+ { _bytes, sizeof(_bytes) - 1, _type, nullptr }
+
+#define SNIFFER_ENTRY_WITH_FUNC(_bytes, _func) \
+ { _bytes, sizeof(_bytes) - 1, nullptr, _func }
+
+ static nsSnifferEntry sSnifferEntries[];
+ static uint32_t sSnifferEntryNum;
+
+ // We guarantee in order delivery of OnStart, OnStop and OnData, therefore
+ // we do not need proper locking for mBuffer.
+ mozilla::Atomic<char*> mBuffer;
+ mozilla::Atomic<uint32_t> mBufferLen;
+ mozilla::Atomic<bool> mRequireHTMLsuffix;
+
+ nsCString mContentType;
+
+ // This mutex syncs: mContentType, mDecodedData and mNextListener.
+ mutable mozilla::Mutex mMutex;
+
+ protected:
+ nsresult ConvertEncodedData(nsIRequest* request, const char* data,
+ uint32_t length);
+ nsCString mDecodedData; // If data are encoded this will be uncompress data.
+};
+
+#define NS_BINARYDETECTOR_CID \
+ { /* a2027ec6-ba0d-4c72-805d-148233f5f33c */ \
+ 0xa2027ec6, 0xba0d, 0x4c72, { \
+ 0x80, 0x5d, 0x14, 0x82, 0x33, 0xf5, 0xf3, 0x3c \
+ } \
+ }
+
+/**
+ * Class that detects whether a data stream is text or binary. This reuses
+ * most of nsUnknownDecoder except the actual content-type determination logic
+ * -- our overridden DetermineContentType simply calls LastDitchSniff and sets
+ * the type to APPLICATION_GUESS_FROM_EXT if the data is detected as binary.
+ */
+class nsBinaryDetector : public nsUnknownDecoder {
+ protected:
+ virtual void DetermineContentType(nsIRequest* aRequest) override;
+};
+
+#define NS_BINARYDETECTOR_CATEGORYENTRY \
+ { \
+ NS_CONTENT_SNIFFER_CATEGORY, "Binary Detector", \
+ NS_BINARYDETECTOR_CONTRACTID \
+ }
+
+#endif /* nsUnknownDecoder_h__ */