diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-28 14:29:10 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-28 14:29:10 +0000 |
commit | 2aa4a82499d4becd2284cdb482213d541b8804dd (patch) | |
tree | b80bf8bf13c3766139fbacc530efd0dd9d54394c /netwerk/streamconv/converters | |
parent | Initial commit. (diff) | |
download | firefox-2aa4a82499d4becd2284cdb482213d541b8804dd.tar.xz firefox-2aa4a82499d4becd2284cdb482213d541b8804dd.zip |
Adding upstream version 86.0.1.upstream/86.0.1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'netwerk/streamconv/converters')
20 files changed, 8343 insertions, 0 deletions
diff --git a/netwerk/streamconv/converters/ParseFTPList.cpp b/netwerk/streamconv/converters/ParseFTPList.cpp new file mode 100644 index 0000000000..892ca91e6e --- /dev/null +++ b/netwerk/streamconv/converters/ParseFTPList.cpp @@ -0,0 +1,1493 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "ParseFTPList.h" +#include <algorithm> +#include <stdlib.h> +#include <string.h> +#include <ctype.h> +#include "plstr.h" +#include "nsDebug.h" +#include "prprf.h" +#include "nsUnicharUtils.h" +#include "mozilla/CheckedInt.h" +#include "mozilla/IntegerPrintfMacros.h" +#include "mozilla/TextUtils.h" +#include "mozilla/Sprintf.h" + +/* ==================================================================== */ + +using mozilla::CheckedInt; +using mozilla::IsAsciiAlpha; +using mozilla::IsAsciiAlphanumeric; +using mozilla::IsAsciiDigit; +using mozilla::IsAsciiLowercaseAlpha; + +static const int kMaxFTPListLen = 32768; + +static inline int ParsingFailed(struct list_state* state) { + if (state->parsed_one || state->lstyle) /* junk if we fail to parse */ + return '?'; /* this time but had previously parsed successfully */ + return '"'; /* its part of a comment or error message */ +} + +void FixupYear(PRExplodedTime* aTime) { + /* if year has only two digits then assume that + 00-79 is 2000-2079 + 80-99 is 1980-1999 */ + if (aTime->tm_year < 80) { + aTime->tm_year += 2000; + } else if (aTime->tm_year < 100) { + aTime->tm_year += 1900; + } +} + +int ParseFTPList(const char* line, struct list_state* state, + struct list_result* result, PRTimeParamFn timeParam, + NowTimeFn nowTimeFn) { + unsigned int carry_buf_len; /* copy of state->carry_buf_len */ + unsigned int pos; + const char* p; + + if (!line || !state || !result) return 0; + + memset(result, 0, sizeof(*result)); + state->numlines++; + + /* carry buffer is only valid from one line to the next */ + carry_buf_len = state->carry_buf_len; + state->carry_buf_len = 0; + + /* strip leading whitespace */ + while (*line == ' ' || *line == '\t') line++; + + /* line is terminated at first '\0' or '\n' */ + p = line; + while (*p && *p != '\n') p++; + unsigned int linelen = p - line; + + if (linelen > 0 && *p == '\n' && *(p - 1) == '\r') linelen--; + + /* DON'T strip trailing whitespace. */ + + if (linelen > kMaxFTPListLen) { + return ParsingFailed(state); + } + + if (linelen > 0) { + static const char* month_names = "JanFebMarAprMayJunJulAugSepOctNovDec"; + const char* tokens[16]; /* 16 is more than enough */ + unsigned int toklen[(sizeof(tokens) / sizeof(tokens[0]))]; + unsigned int linelen_sans_wsp; // line length sans whitespace + unsigned int numtoks = 0; + unsigned int tokmarker = 0; /* extra info for lstyle handler */ + unsigned int month_num = 0; + char tbuf[4]; + int lstyle = 0; + + if (carry_buf_len) /* VMS long filename carryover buffer */ + { + tokens[0] = state->carry_buf; + toklen[0] = carry_buf_len; + numtoks++; + } + + pos = 0; + while (pos < linelen && numtoks < (sizeof(tokens) / sizeof(tokens[0]))) { + while (pos < linelen && + (line[pos] == ' ' || line[pos] == '\t' || line[pos] == '\r')) + pos++; + if (pos < linelen) { + tokens[numtoks] = &line[pos]; + while (pos < linelen && + (line[pos] != ' ' && line[pos] != '\t' && line[pos] != '\r')) + pos++; + if (tokens[numtoks] != &line[pos]) { + toklen[numtoks] = (&line[pos] - tokens[numtoks]); + numtoks++; + } + } + } + + if (!numtoks) return ParsingFailed(state); + + linelen_sans_wsp = &(tokens[numtoks - 1][toklen[numtoks - 1]]) - tokens[0]; + if (numtoks == (sizeof(tokens) / sizeof(tokens[0]))) { + pos = linelen; + while (pos > 0 && (line[pos - 1] == ' ' || line[pos - 1] == '\t')) pos--; + linelen_sans_wsp = pos; + } + + /* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */ + +#if defined(SUPPORT_EPLF) + /* EPLF handling must come somewhere before /bin/dls handling. */ + if (!lstyle && (!state->lstyle || state->lstyle == 'E')) { + if (*line == '+' && linelen > 4 && numtoks >= 2) { + pos = 1; + while (pos < (linelen - 1)) { + p = &line[pos++]; + if (*p == '/') + result->fe_type = 'd'; /* its a dir */ + else if (*p == 'r') + result->fe_type = 'f'; /* its a file */ + else if (*p == 'm') { + if (IsAsciiDigit(line[pos])) { + while (pos < linelen && IsAsciiDigit(line[pos])) pos++; + if (pos < linelen && line[pos] == ',') { + PRTime t; + PRTime seconds; + PR_sscanf(p + 1, "%llu", &seconds); + t = seconds * PR_USEC_PER_SEC; + PR_ExplodeTime(t, timeParam, &(result->fe_time)); + } + } + } else if (*p == 's') { + if (IsAsciiDigit(line[pos])) { + while (pos < linelen && IsAsciiDigit(line[pos])) pos++; + if (pos < linelen && line[pos] == ',' && + ((&line[pos]) - (p + 1)) < int(sizeof(result->fe_size) - 1)) { + memcpy(result->fe_size, p + 1, + (unsigned)(&line[pos] - (p + 1))); + result->fe_size[(&line[pos] - (p + 1))] = '\0'; + } + } + } else if (IsAsciiAlpha( + *p)) /* 'i'/'up' or unknown "fact" (property) */ + { + while (pos < linelen && *++p != ',') pos++; + } else if (*p != '\t' || (p + 1) != tokens[1]) { + break; /* its not EPLF after all */ + } else { + state->parsed_one = 1; + state->lstyle = lstyle = 'E'; + + p = &(line[linelen_sans_wsp]); + result->fe_fname = tokens[1]; + result->fe_fnlen = p - tokens[1]; + + if (!result->fe_type) /* access denied */ + { + result->fe_type = 'f'; /* is assuming 'f'ile correct? */ + return '?'; /* NO! junk it. */ + } + return result->fe_type; + } + if (pos >= (linelen - 1) || line[pos] != ',') break; + pos++; + } /* while (pos < linelen) */ + memset(result, 0, sizeof(*result)); + } /* if (*line == '+' && linelen > 4 && numtoks >= 2) */ + } /* if (!lstyle && (!state->lstyle || state->lstyle == 'E')) */ +#endif /* SUPPORT_EPLF */ + + /* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */ + +#if defined(SUPPORT_VMS) + if (!lstyle && + (!state->lstyle || + state->lstyle == 'V')) { /* try VMS Multinet/UCX/CMS server */ + /* + * Legal characters in a VMS file/dir spec are [A-Z0-9$.-_~]. + * '$' cannot begin a filename and `-' cannot be used as the first + * or last character. '.' is only valid as a directory separator + * and <file>.<type> separator. A canonical filename spec might look + * like this: DISK$VOL:[DIR1.DIR2.DIR3]FILE.TYPE;123 + * All VMS FTP servers LIST in uppercase. + * + * We need to be picky about this in order to support + * multi-line listings correctly. + */ + if (!state->parsed_one && + (numtoks == 1 || (numtoks == 2 && toklen[0] == 9 && + memcmp(tokens[0], "Directory", 9) == 0))) { + /* If no dirstyle has been detected yet, and this line is a + * VMS list's dirname, then turn on VMS dirstyle. + * eg "ACA:[ANONYMOUS]", "DISK$FTP:[ANONYMOUS]", "SYS$ANONFTP:" + */ + p = tokens[0]; + pos = toklen[0]; + if (numtoks == 2) { + p = tokens[1]; + pos = toklen[1]; + } + pos--; + if (pos >= 3) { + while (pos > 0 && p[pos] != '[') { + pos--; + if (p[pos] == '-' || p[pos] == '$') { + if (pos == 0 || p[pos - 1] == '[' || p[pos - 1] == '.' || + (p[pos] == '-' && (p[pos + 1] == ']' || p[pos + 1] == '.'))) + break; + } else if (p[pos] != '.' && p[pos] != '~' && + !IsAsciiAlphanumeric(p[pos])) + break; + else if (IsAsciiLowercaseAlpha(p[pos])) + break; + } + if (pos > 0) { + pos--; + if (p[pos] != ':' || p[pos + 1] != '[') pos = 0; + } + } + if (pos > 0 && p[pos] == ':') { + while (pos > 0) { + pos--; + if (p[pos] != '$' && p[pos] != '_' && p[pos] != '-' && + p[pos] != '~' && !IsAsciiAlphanumeric(p[pos])) + break; + else if (IsAsciiLowercaseAlpha(p[pos])) + break; + } + if (pos == 0) { + state->lstyle = 'V'; + return '?'; /* its junk */ + } + } + /* fallthrough */ + } else if ((tokens[0][toklen[0] - 1]) != ';') { + if (numtoks == 1 && (state->lstyle == 'V' && !carry_buf_len)) + lstyle = 'V'; + else if (numtoks < 4) + ; + else if (toklen[1] >= 10 && memcmp(tokens[1], "%RMS-E-PRV", 10) == 0) + lstyle = 'V'; + else if ((&line[linelen] - tokens[1]) >= 22 && + memcmp(tokens[1], "insufficient privilege", 22) == 0) + lstyle = 'V'; + else if (numtoks != 4 && numtoks != 6) + ; + else if (numtoks == 6 && + (toklen[5] < 4 || *tokens[5] != '(' || /* perms */ + (tokens[5][toklen[5] - 1]) != ')')) + ; + else if ((toklen[2] == 10 || toklen[2] == 11) && + (tokens[2][toklen[2] - 5]) == '-' && + (tokens[2][toklen[2] - 9]) == '-' && + (((toklen[3] == 4 || toklen[3] == 5 || toklen[3] == 7 || + toklen[3] == 8) && + (tokens[3][toklen[3] - 3]) == ':') || + ((toklen[3] == 10 || toklen[3] == 11) && + (tokens[3][toklen[3] - 3]) == + '.')) && /* time in [H]H:MM[:SS[.CC]] format */ + IsAsciiDigit(*tokens[1]) && /* size */ + IsAsciiDigit(*tokens[2]) && /* date */ + IsAsciiDigit(*tokens[3]) /* time */ + ) { + lstyle = 'V'; + } + if (lstyle == 'V') { + // clang-format off + /* + * MultiNet FTP: + * LOGIN.COM;2 1 4-NOV-1994 04:09 [ANONYMOUS] (RWE,RWE,,) + * PUB.DIR;1 1 27-JAN-1994 14:46 [ANONYMOUS] (RWE,RWE,RE,RWE) + * README.FTP;1 %RMS-E-PRV, insufficient privilege or file protection violation + * ROUSSOS.DIR;1 1 27-JAN-1994 14:48 [CS,ROUSSOS] (RWE,RWE,RE,R) + * S67-50903.JPG;1 328 22-SEP-1998 16:19 [ANONYMOUS] (RWED,RWED,,) + * UCX FTP: + * CII-MANUAL.TEX;1 213/216 29-JAN-1996 03:33:12 [ANONYMOU,ANONYMOUS] (RWED,RWED,,) + * CMU/VMS-IP FTP + * [VMSSERV.FILES]ALARM.DIR;1 1/3 5-MAR-1993 18:09 + * TCPware FTP + * FOO.BAR;1 4 5-MAR-1993 18:09:01.12 + * Long filename example: + * THIS-IS-A-LONG-VMS-FILENAME.AND-THIS-IS-A-LONG-VMS-FILETYPE\r\n + * 213[/nnn] 29-JAN-1996 03:33[:nn] [ANONYMOU,ANONYMOUS] (RWED,RWED,,) + */ + // clang-format on + tokmarker = 0; + p = tokens[0]; + pos = 0; + if (*p == '[' && toklen[0] >= 4) /* CMU style */ + { + if (p[1] != ']') { + p++; + pos++; + } + while (lstyle && pos < toklen[0] && *p != ']') { + if (*p != '$' && *p != '.' && *p != '_' && *p != '-' && + *p != '~' && !IsAsciiAlphanumeric(*p)) + lstyle = 0; + pos++; + p++; + } + if (lstyle && pos < (toklen[0] - 1)) { + /* ']' was found and there is at least one character after it */ + NS_ASSERTION(*p == ']', "unexpected state"); + pos++; + p++; + tokmarker = pos; /* length of leading "[DIR1.DIR2.etc]" */ + } else { + /* not a CMU style listing */ + lstyle = 0; + } + } + while (lstyle && pos < toklen[0] && *p != ';') { + if (*p != '$' && *p != '.' && *p != '_' && *p != '-' && *p != '~' && + !IsAsciiAlphanumeric(*p)) + lstyle = 0; + else if (IsAsciiLowercaseAlpha(*p)) + lstyle = 0; + p++; + pos++; + } + if (lstyle && *p == ';') { + if (pos == 0 || pos == (toklen[0] - 1)) lstyle = 0; + for (pos++; lstyle && pos < toklen[0]; pos++) { + if (!IsAsciiDigit(tokens[0][pos])) lstyle = 0; + } + } + pos = (p - tokens[0]); /* => fnlength sans ";####" */ + pos -= tokmarker; /* => fnlength sans "[DIR1.DIR2.etc]" */ + p = &(tokens[0][tokmarker]); /* offset of basename */ + + if (!lstyle || pos == 0 || + pos > 80) /* VMS filenames can't be longer than that */ + { + lstyle = 0; + } else if (numtoks == 1) { + /* if VMS has been detected and there is only one token and that + * token was a VMS filename then this is a multiline VMS LIST entry. + */ + if (pos >= (sizeof(state->carry_buf) - 1)) + pos = (sizeof(state->carry_buf) - 1); /* shouldn't happen */ + memcpy(state->carry_buf, p, pos); + state->carry_buf_len = pos; + return '?'; /* tell caller to treat as junk */ + } else if (IsAsciiDigit(*tokens[1])) /* not no-privs message */ + { + for (pos = 0; lstyle && pos < (toklen[1]); pos++) { + if (!IsAsciiDigit((tokens[1][pos])) && (tokens[1][pos]) != '/') + lstyle = 0; + } + if (lstyle && numtoks > 4) /* Multinet or UCX but not CMU */ + { + for (pos = 1; lstyle && pos < (toklen[5] - 1); pos++) { + p = &(tokens[5][pos]); + if (*p != 'R' && *p != 'W' && *p != 'E' && *p != 'D' && + *p != ',') + lstyle = 0; + } + } + } + } /* passed initial tests */ + } /* else if ((tokens[0][toklen[0]-1]) != ';') */ + + if (lstyle == 'V') { + state->parsed_one = 1; + state->lstyle = lstyle; + + if (IsAsciiDigit(*tokens[1])) /* not permission denied etc */ + { + /* strip leading directory name */ + if (*tokens[0] == '[') /* CMU server */ + { + pos = toklen[0] - 1; + p = tokens[0] + 1; + while (*p != ']') { + p++; + pos--; + } + toklen[0] = --pos; + tokens[0] = ++p; + } + pos = 0; + while (pos < toklen[0] && (tokens[0][pos]) != ';') pos++; + + result->fe_cinfs = 1; + result->fe_type = 'f'; + result->fe_fname = tokens[0]; + result->fe_fnlen = pos; + + if (pos > 4) { + p = &(tokens[0][pos - 4]); + if (p[0] == '.' && p[1] == 'D' && p[2] == 'I' && p[3] == 'R') { + result->fe_fnlen -= 4; + result->fe_type = 'd'; + } + } + + if (result->fe_type != 'd') { + /* #### or used/allocated form. If used/allocated form, then + * 'used' is the size in bytes if and only if 'used'<=allocated. + * If 'used' is size in bytes then it can be > 2^32 + * If 'used' is not size in bytes then it is size in blocks. + */ + pos = 0; + while (pos < toklen[1] && (tokens[1][pos]) != '/') pos++; + + /* + * On OpenVMS, the size is given in blocks. A block is 512 + * bytes. This can only approximate the size of the file, + * but that's better than not showing a size at all. + * numBlocks is clamped to UINT32_MAX to make 32-bit and + * 64-bit builds return consistent results. + */ + uint64_t numBlocks = strtoul(tokens[1], nullptr, 10); + numBlocks = std::min(numBlocks, (uint64_t)UINT32_MAX); + uint64_t fileSize = numBlocks * 512; + SprintfLiteral(result->fe_size, "%" PRIu64, fileSize); + } /* if (result->fe_type != 'd') */ + + p = tokens[2] + 2; + if (*p == '-') p++; + tbuf[0] = p[0]; + tbuf[1] = ToLowerCaseASCII(p[1]); + tbuf[2] = ToLowerCaseASCII(p[2]); + month_num = 0; + for (pos = 0; pos < (12 * 3); pos += 3) { + if (tbuf[0] == month_names[pos + 0] && + tbuf[1] == month_names[pos + 1] && + tbuf[2] == month_names[pos + 2]) + break; + month_num++; + } + if (month_num >= 12) month_num = 0; + result->fe_time.tm_month = month_num; + result->fe_time.tm_mday = atoi(tokens[2]); + result->fe_time.tm_year = atoi(p + 4); // NSPR wants year as XXXX + + p = tokens[3] + 2; + if (*p == ':') p++; + if (p[2] == ':') result->fe_time.tm_sec = atoi(p + 3); + result->fe_time.tm_hour = atoi(tokens[3]); + result->fe_time.tm_min = atoi(p); + + return result->fe_type; + + } /* if (IsAsciiDigit(*tokens[1])) */ + + return '?'; /* junk */ + + } /* if (lstyle == 'V') */ + } /* if (!lstyle && (!state->lstyle || state->lstyle == 'V')) */ +#endif + + /* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */ + +#if defined(SUPPORT_CMS) + /* Virtual Machine/Conversational Monitor System (IBM Mainframe) */ + if (!lstyle && (!state->lstyle || state->lstyle == 'C')) /* VM/CMS */ + { + /* LISTing according to mirror.pl + * Filename FileType Fm Format Lrecl Records Blocks Date Time + * LASTING GLOBALV A1 V 41 21 1 9/16/91 15:10:32 + * J43401 NETLOG A0 V 77 1 1 9/12/91 12:36:04 + * PROFILE EXEC A1 V 17 3 1 9/12/91 12:39:07 + * DIRUNIX SCRIPT A1 V 77 1216 17 1/04/93 20:30:47 + * MAIL PROFILE A2 F 80 1 1 10/14/92 16:12:27 + * BADY2K TEXT A0 V 1 1 1 1/03/102 10:11:12 + * AUTHORS A1 DIR - - - 9/20/99 10:31:11 + * + * LISTing from vm.marist.edu and vm.sc.edu + * 220-FTPSERVE IBM VM Level 420 at VM.MARIST.EDU, 04:58:12 EDT WEDNESDAY + * 2002-07-10 AUTHORS DIR - - - + * 1999-09-20 10:31:11 - HARRINGTON DIR - - - + * 1997-02-12 15:33:28 - PICS DIR - - - + * 2000-10-12 15:43:23 - SYSFILE DIR - - - + * 2000-07-20 17:48:01 - WELCNVT EXEC V 72 9 1 + * 1999-09-20 17:16:18 - WELCOME EREADME F 80 21 1 + * 1999-12-27 16:19:00 - WELCOME README V 82 21 1 + * 1999-12-27 16:19:04 - README ANONYMOU V 71 26 1 + * 1997-04-02 12:33:20 TCP291 README ANONYOLD V 71 15 1 + * 1995-08-25 16:04:27 TCP291 + */ + if (numtoks >= 7 && (toklen[0] + toklen[1]) <= 16) { + for (pos = 1; !lstyle && (pos + 5) < numtoks; pos++) { + p = tokens[pos]; + if ((toklen[pos] == 1 && (*p == 'F' || *p == 'V')) || + (toklen[pos] == 3 && *p == 'D' && p[1] == 'I' && p[2] == 'R')) { + if (toklen[pos + 5] == 8 && (tokens[pos + 5][2]) == ':' && + (tokens[pos + 5][5]) == ':') { + p = tokens[pos + 4]; + if ((toklen[pos + 4] == 10 && p[4] == '-' && p[7] == '-') || + (toklen[pos + 4] >= 7 && toklen[pos + 4] <= 9 && + p[((p[1] != '/') ? (2) : (1))] == '/' && + p[((p[1] != '/') ? (5) : (4))] == '/')) + /* Y2K bugs possible ("7/06/102" or "13/02/101") */ + { + if ((*tokens[pos + 1] == '-' && *tokens[pos + 2] == '-' && + *tokens[pos + 3] == '-') || + (IsAsciiDigit(*tokens[pos + 1]) && + IsAsciiDigit(*tokens[pos + 2]) && + IsAsciiDigit(*tokens[pos + 3]))) { + lstyle = 'C'; + tokmarker = pos; + } + } + } + } + } /* for (pos = 1; !lstyle && (pos+5) < numtoks; pos++) */ + } /* if (numtoks >= 7) */ + + /* extra checking if first pass */ + if (lstyle && !state->lstyle) { + for (pos = 0, p = tokens[0]; lstyle && pos < toklen[0]; pos++, p++) { + if (IsAsciiLowercaseAlpha(*p)) lstyle = 0; + } + for (pos = tokmarker + 1; pos <= tokmarker + 3; pos++) { + if (!(toklen[pos] == 1 && *tokens[pos] == '-')) { + for (p = tokens[pos]; lstyle && p < (tokens[pos] + toklen[pos]); + p++) { + if (!IsAsciiDigit(*p)) lstyle = 0; + } + } + } + for (pos = 0, p = tokens[tokmarker + 4]; + lstyle && pos < toklen[tokmarker + 4]; pos++, p++) { + if (*p == '/') { + /* There may be Y2K bugs in the date. Don't simplify to + * pos != (len-3) && pos != (len-6) like time is done. + */ + if ((tokens[tokmarker + 4][1]) == '/') { + if (pos != 1 && pos != 4) lstyle = 0; + } else if (pos != 2 && pos != 5) + lstyle = 0; + } else if (*p != '-' && !IsAsciiDigit(*p)) + lstyle = 0; + else if (*p == '-' && pos != 4 && pos != 7) + lstyle = 0; + } + for (pos = 0, p = tokens[tokmarker + 5]; + lstyle && pos < toklen[tokmarker + 5]; pos++, p++) { + if (*p != ':' && !IsAsciiDigit(*p)) + lstyle = 0; + else if (*p == ':' && pos != (toklen[tokmarker + 5] - 3) && + pos != (toklen[tokmarker + 5] - 6)) + lstyle = 0; + } + } /* initial if() */ + + if (lstyle == 'C') { + state->parsed_one = 1; + state->lstyle = lstyle; + + p = tokens[tokmarker + 4]; + if (toklen[tokmarker + 4] == 10) /* newstyle: YYYY-MM-DD format */ + { + result->fe_time.tm_year = atoi(p + 0); + result->fe_time.tm_month = atoi(p + 5) - 1; + result->fe_time.tm_mday = atoi(p + 8); + } else /* oldstyle: [M]M/DD/YY format */ + { + pos = toklen[tokmarker + 4]; + result->fe_time.tm_month = atoi(p) - 1; + result->fe_time.tm_mday = atoi((p + pos) - 5); + result->fe_time.tm_year = atoi((p + pos) - 2); + FixupYear(&result->fe_time); + } + + p = tokens[tokmarker + 5]; + pos = toklen[tokmarker + 5]; + result->fe_time.tm_hour = atoi(p); + result->fe_time.tm_min = atoi((p + pos) - 5); + result->fe_time.tm_sec = atoi((p + pos) - 2); + + result->fe_cinfs = 1; + result->fe_fname = tokens[0]; + result->fe_fnlen = toklen[0]; + result->fe_type = 'f'; + + p = tokens[tokmarker]; + if (toklen[tokmarker] == 3 && *p == 'D' && p[1] == 'I' && p[2] == 'R') + result->fe_type = 'd'; + + if ((/*newstyle*/ toklen[tokmarker + 4] == 10 && tokmarker > 1) || + (/*oldstyle*/ toklen[tokmarker + 4] != 10 && + tokmarker > 2)) { /* have a filetype column */ + char* dot; + p = &(tokens[0][toklen[0]]); + memcpy(&dot, &p, sizeof(dot)); /* NASTY! */ + *dot++ = '.'; + p = tokens[1]; + for (pos = 0; pos < toklen[1]; pos++) *dot++ = *p++; + result->fe_fnlen += 1 + toklen[1]; + } + + /* oldstyle LISTING: + * files/dirs not on the 'A' minidisk are not RETRievable/CHDIRable + if (toklen[tokmarker+4] != 10 && *tokens[tokmarker-1] != 'A') + return '?'; + */ + + /* VM/CMS LISTings have no usable filesize field. + * Have to use the 'SIZE' command for that. + */ + return result->fe_type; + + } /* if (lstyle == 'C' && (!state->lstyle || state->lstyle == lstyle)) */ + } /* VM/CMS */ +#endif + + /* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */ + +#if defined(SUPPORT_DOS) /* WinNT DOS dirstyle */ + if (!lstyle && (!state->lstyle || state->lstyle == 'W')) { + // clang-format off + /* + * "10-23-00 01:27PM <DIR> veronist" + * "06-15-00 07:37AM <DIR> zoe" + * "07-14-00 01:35PM 2094926 canprankdesk.tif" + * "07-21-00 01:19PM 95077 Jon Kauffman Enjoys the Good Life.jpg" + * "07-21-00 01:19PM 52275 Name Plate.jpg" + * "07-14-00 01:38PM 2250540 Valentineoffprank-HiRes.jpg" + */ + // Microsoft FTP server with FtpDirBrowseShowLongDate set returns year + // in 4-digit format: + // "10-10-2014 10:10AM <DIR> FTP" + // Windows CE FTP server returns time in 24-hour format: + // "05-03-13 22:01 <DIR> APPS" + // clang-format on + if ((numtoks >= 4) && (toklen[0] == 8 || toklen[0] == 10) && + (toklen[1] == 5 || toklen[1] == 7) && + (*tokens[2] == '<' || IsAsciiDigit(*tokens[2]))) { + p = tokens[0]; + if (IsAsciiDigit(p[0]) && IsAsciiDigit(p[1]) && p[2] == '-' && + IsAsciiDigit(p[3]) && IsAsciiDigit(p[4]) && p[5] == '-' && + IsAsciiDigit(p[6]) && IsAsciiDigit(p[7])) { + p = tokens[1]; + if (IsAsciiDigit(p[0]) && IsAsciiDigit(p[1]) && p[2] == ':' && + IsAsciiDigit(p[3]) && IsAsciiDigit(p[4]) && + (toklen[1] == 5 || + (toklen[1] == 7 && (p[5] == 'A' || p[5] == 'P') && + p[6] == 'M'))) { + lstyle = 'W'; + if (!state->lstyle) { + p = tokens[2]; + /* <DIR> or <JUNCTION> */ + if (*p != '<' || p[toklen[2] - 1] != '>') { + for (pos = 1; (lstyle && pos < toklen[2]); pos++) { + if (!IsAsciiDigit(*++p)) lstyle = 0; + } + } + } + } + } + } + + if (lstyle == 'W') { + state->parsed_one = 1; + state->lstyle = lstyle; + + p = &(line[linelen]); /* line end */ + result->fe_cinfs = 1; + result->fe_fname = tokens[3]; + result->fe_fnlen = p - tokens[3]; + result->fe_type = 'd'; + + if (*tokens[2] != '<') /* not <DIR> or <JUNCTION> */ + { + // try to handle correctly spaces at the beginning of the filename + // filesize (token[2]) must end at offset 38 + if (tokens[2] + toklen[2] - line == 38) { + result->fe_fname = &(line[39]); + result->fe_fnlen = p - result->fe_fname; + } + result->fe_type = 'f'; + pos = toklen[2]; + if (pos > (sizeof(result->fe_size) - 1)) { + pos = (sizeof(result->fe_size) - 1); + } + memcpy(result->fe_size, tokens[2], pos); + result->fe_size[pos] = '\0'; + } else { + // try to handle correctly spaces at the beginning of the filename + // token[2] must begin at offset 24, the length is 5 or 10 + // token[3] must begin at offset 39 or higher + if (tokens[2] - line == 24 && (toklen[2] == 5 || toklen[2] == 10) && + tokens[3] - line >= 39) { + result->fe_fname = &(line[39]); + result->fe_fnlen = p - result->fe_fname; + } + + if ((tokens[2][1]) != 'D') /* not <DIR> */ + { + result->fe_type = '?'; /* unknown until junc for sure */ + if (result->fe_fnlen > 4) { + p = result->fe_fname; + for (pos = result->fe_fnlen - 4; pos > 0; pos--) { + if (p[0] == ' ' && p[3] == ' ' && p[2] == '>' && + (p[1] == '=' || p[1] == '-')) { + result->fe_type = 'l'; + result->fe_fnlen = p - result->fe_fname; + result->fe_lname = p + 4; + result->fe_lnlen = &(line[linelen]) - result->fe_lname; + break; + } + p++; + } + } + } + } + + result->fe_time.tm_month = atoi(tokens[0] + 0); + if (result->fe_time.tm_month != 0) { + result->fe_time.tm_month--; + result->fe_time.tm_mday = atoi(tokens[0] + 3); + result->fe_time.tm_year = atoi(tokens[0] + 6); + FixupYear(&result->fe_time); + } + + result->fe_time.tm_hour = atoi(tokens[1] + 0); + result->fe_time.tm_min = atoi(tokens[1] + 3); + if (toklen[1] == 7) { + if ((tokens[1][5]) == 'P' && result->fe_time.tm_hour < 12) + result->fe_time.tm_hour += 12; + else if ((tokens[1][5]) == 'A' && result->fe_time.tm_hour == 12) + result->fe_time.tm_hour = 0; + } + + /* the caller should do this (if dropping "." and ".." is desired) + if (result->fe_type == 'd' && result->fe_fname[0] == '.' && + (result->fe_fnlen == 1 || (result->fe_fnlen == 2 && + result->fe_fname[1] == '.'))) + return '?'; + */ + + return result->fe_type; + } /* if (lstyle == 'W' && (!state->lstyle || state->lstyle == lstyle)) */ + } /* if (!lstyle && (!state->lstyle || state->lstyle == 'W')) */ +#endif + + /* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */ + +#if defined(SUPPORT_OS2) + if (!lstyle && (!state->lstyle || state->lstyle == 'O')) /* OS/2 test */ + { + /* 220 server IBM TCP/IP for OS/2 - FTP Server ver 23:04:36 on Jan 15 1997 + *ready. fixed position, space padded columns. I have only a vague idea of + *what the contents between col 18 and 34 might be: All I can infer is + *that there may be attribute flags in there and there may be a " DIR" in + *there. + * + * 1 2 3 4 5 6 + *0123456789012345678901234567890123456789012345678901234567890123456789 + *----- size -------|??????????????? MM-DD-YY| HH:MM| nnnnnnnnn.... + * 0 DIR 04-11-95 16:26 . + * 0 DIR 04-11-95 16:26 .. + * 0 DIR 04-11-95 16:26 ADDRESS + * 612 RHSA 07-28-95 16:45 air_tra1.bag + * 195 A 08-09-95 10:23 Alfa1.bag + * 0 RHS DIR 04-11-95 16:26 ATTACH + * 372 A 08-09-95 10:26 Aussie_1.bag + * 310992 06-28-94 09:56 INSTALL.EXE + * 1 2 3 4 + * 01234567890123456789012345678901234567890123456789 + * dirlist from the mirror.pl project, col positions from Mozilla. + */ + p = &(line[toklen[0]]); + /* \s(\d\d-\d\d-\d\d)\s+(\d\d:\d\d)\s */ + if (numtoks >= 4 && toklen[0] <= 18 && IsAsciiDigit(*tokens[0]) && + (linelen - toklen[0]) >= (54 - 18) && p[18 - 18] == ' ' && + p[34 - 18] == ' ' && p[37 - 18] == '-' && p[40 - 18] == '-' && + p[43 - 18] == ' ' && p[45 - 18] == ' ' && p[48 - 18] == ':' && + p[51 - 18] == ' ' && IsAsciiDigit(p[35 - 18]) && + IsAsciiDigit(p[36 - 18]) && IsAsciiDigit(p[38 - 18]) && + IsAsciiDigit(p[39 - 18]) && IsAsciiDigit(p[41 - 18]) && + IsAsciiDigit(p[42 - 18]) && IsAsciiDigit(p[46 - 18]) && + IsAsciiDigit(p[47 - 18]) && IsAsciiDigit(p[49 - 18]) && + IsAsciiDigit(p[50 - 18]) && + (linelen_sans_wsp - toklen[0]) > (53 - 18)) { + lstyle = 'O'; /* OS/2 */ + if (!state->lstyle) { + for (pos = 1; lstyle && pos < toklen[0]; pos++) { + if (!IsAsciiDigit(tokens[0][pos])) lstyle = 0; + } + } + } + + if (lstyle == 'O') { + state->parsed_one = 1; + state->lstyle = lstyle; + + p = &(line[toklen[0]]); + + result->fe_cinfs = 1; + result->fe_fname = &p[53 - 18]; + result->fe_fnlen = (&(line[linelen_sans_wsp])) - (result->fe_fname); + result->fe_type = 'f'; + + /* I don't have a real listing to determine exact pos, so scan. */ + for (pos = (18 - 18); pos < ((35 - 18) - 4); pos++) { + if (p[pos + 0] == ' ' && p[pos + 1] == 'D' && p[pos + 2] == 'I' && + p[pos + 3] == 'R') { + result->fe_type = 'd'; + break; + } + } + + if (result->fe_type != 'd') { + pos = toklen[0]; + if (pos > (sizeof(result->fe_size) - 1)) + pos = (sizeof(result->fe_size) - 1); + memcpy(result->fe_size, tokens[0], pos); + result->fe_size[pos] = '\0'; + } + + result->fe_time.tm_month = atoi(&p[35 - 18]) - 1; + result->fe_time.tm_mday = atoi(&p[38 - 18]); + result->fe_time.tm_year = atoi(&p[41 - 18]); + FixupYear(&result->fe_time); + result->fe_time.tm_hour = atoi(&p[46 - 18]); + result->fe_time.tm_min = atoi(&p[49 - 18]); + + /* the caller should do this (if dropping "." and ".." is desired) + if (result->fe_type == 'd' && result->fe_fname[0] == '.' && + (result->fe_fnlen == 1 || (result->fe_fnlen == 2 && + result->fe_fname[1] == '.'))) + return '?'; + */ + + return result->fe_type; + } /* if (lstyle == 'O') */ + + } /* if (!lstyle && (!state->lstyle || state->lstyle == 'O')) */ +#endif + + /* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */ + +#if defined(SUPPORT_LSL) + if (!lstyle && (!state->lstyle || state->lstyle == 'U')) /* /bin/ls & co. */ + { + /* UNIX-style listing, without inum and without blocks + * "-rw-r--r-- 1 root other 531 Jan 29 03:26 README" + * "dr-xr-xr-x 2 root other 512 Apr 8 1994 etc" + * "dr-xr-xr-x 2 root 512 Apr 8 1994 etc" + * "lrwxrwxrwx 1 root other 7 Jan 25 00:17 bin -> usr/bin" + * Also produced by Microsoft's FTP servers for Windows: + * "---------- 1 owner group 1803128 Jul 10 10:18 ls-lR.Z" + * "d--------- 1 owner group 0 May 9 19:45 Softlib" + * Also WFTPD for MSDOS: + * "-rwxrwxrwx 1 noone nogroup 322 Aug 19 1996 message.ftp" + * Hellsoft for NetWare: + * "d[RWCEMFA] supervisor 512 Jan 16 18:53 login" + * "-[RWCEMFA] rhesus 214059 Oct 20 15:27 cx.exe" + * Newer Hellsoft for NetWare: (netlab2.usu.edu) + * - [RWCEAFMS] NFAUUser 192 Apr 27 15:21 HEADER.html + * d [RWCEAFMS] jrd 512 Jul 11 03:01 allupdates + * Also NetPresenz for the Mac: + * "-------r-- 326 1391972 1392298 Nov 22 1995 MegaPhone.sit" + * "drwxrwxr-x folder 2 May 10 1996 network" + * Protected directory: + * "drwx-wx-wt 2 root wheel 512 Jul 1 02:15 incoming" + * uid/gid instead of username/groupname: + * "drwxr-xr-x 2 0 0 512 May 28 22:17 etc" + */ + + bool is_old_Hellsoft = false; + + if (numtoks >= 6) { + /* there are two perm formats (Hellsoft/NetWare and *IX strmode(3)). + * Scan for size column only if the perm format is one or the other. + */ + if (toklen[0] == 1 || (tokens[0][1]) == '[') { + if (*tokens[0] == 'd' || *tokens[0] == '-') { + pos = toklen[0] - 1; + p = tokens[0] + 1; + if (pos == 0) { + p = tokens[1]; + pos = toklen[1]; + } + if ((pos == 9 || pos == 10) && (*p == '[' && p[pos - 1] == ']') && + (p[1] == 'R' || p[1] == '-') && (p[2] == 'W' || p[2] == '-') && + (p[3] == 'C' || p[3] == '-') && (p[4] == 'E' || p[4] == '-')) { + /* rest is FMA[S] or AFM[S] */ + lstyle = 'U'; /* very likely one of the NetWare servers */ + if (toklen[0] == 10) is_old_Hellsoft = true; + } + } + } else if ((toklen[0] == 10 || toklen[0] == 11) && + strchr("-bcdlpsw?DFam", *tokens[0])) { + p = &(tokens[0][1]); + if ((p[0] == 'r' || p[0] == '-') && (p[1] == 'w' || p[1] == '-') && + (p[3] == 'r' || p[3] == '-') && (p[4] == 'w' || p[4] == '-') && + (p[6] == 'r' || p[6] == '-') && (p[7] == 'w' || p[7] == '-')) + /* 'x'/p[9] can be S|s|x|-|T|t or implementation specific */ + { + lstyle = 'U'; /* very likely /bin/ls */ + } + } + } + if (lstyle == 'U') /* first token checks out */ + { + lstyle = 0; + for (pos = (numtoks - 5); !lstyle && pos > 1; pos--) { + /* scan for: (\d+)\s+([A-Z][a-z][a-z])\s+ + * (\d\d\d\d|\d\:\d\d|\d\d\:\d\d|\d\:\d\d\:\d\d|\d\d\:\d\d\:\d\d) + * \s+(.+)$ + */ + if (IsAsciiDigit(*tokens[pos]) /* size */ + /* (\w\w\w) */ + && toklen[pos + 1] == 3 && IsAsciiAlpha(*tokens[pos + 1]) && + IsAsciiAlpha(tokens[pos + 1][1]) && + IsAsciiAlpha(tokens[pos + 1][2]) + /* (\d|\d\d) */ + && IsAsciiDigit(*tokens[pos + 2]) && + (toklen[pos + 2] == 1 || + (toklen[pos + 2] == 2 && IsAsciiDigit(tokens[pos + 2][1]))) && + toklen[pos + 3] >= 4 && + IsAsciiDigit(*tokens[pos + 3]) + /* (\d\:\d\d\:\d\d|\d\d\:\d\d\:\d\d) */ + && (toklen[pos + 3] <= 5 || + ((toklen[pos + 3] == 7 || toklen[pos + 3] == 8) && + (tokens[pos + 3][toklen[pos + 3] - 3]) == ':')) && + IsAsciiDigit(tokens[pos + 3][toklen[pos + 3] - 2]) && + IsAsciiDigit(tokens[pos + 3][toklen[pos + 3] - 1]) && + ( + /* (\d\d\d\d) */ + ((toklen[pos + 3] == 4 || toklen[pos + 3] == 5) && + IsAsciiDigit(tokens[pos + 3][1]) && + IsAsciiDigit(tokens[pos + 3][2])) + /* (\d\:\d\d|\d\:\d\d\:\d\d) */ + || ((toklen[pos + 3] == 4 || toklen[pos + 3] == 7) && + (tokens[pos + 3][1]) == ':' && + IsAsciiDigit(tokens[pos + 3][2]) && + IsAsciiDigit(tokens[pos + 3][3])) + /* (\d\d\:\d\d|\d\d\:\d\d\:\d\d) */ + || ((toklen[pos + 3] == 5 || toklen[pos + 3] == 8) && + IsAsciiDigit(tokens[pos + 3][1]) && + (tokens[pos + 3][2]) == ':' && + IsAsciiDigit(tokens[pos + 3][3]) && + IsAsciiDigit(tokens[pos + 3][4])))) { + lstyle = 'U'; /* assume /bin/ls or variant format */ + tokmarker = pos; + + /* check that size is numeric */ + p = tokens[tokmarker]; + unsigned int i; + for (i = 0; i < toklen[tokmarker]; i++) { + if (!IsAsciiDigit(*p++)) { + lstyle = 0; + break; + } + } + if (lstyle) { + month_num = 0; + p = tokens[tokmarker + 1]; + for (i = 0; i < (12 * 3); i += 3) { + if (p[0] == month_names[i + 0] && p[1] == month_names[i + 1] && + p[2] == month_names[i + 2]) + break; + month_num++; + } + if (month_num >= 12) lstyle = 0; + } + } /* relative position test */ + } /* for (pos = (numtoks-5); !lstyle && pos > 1; pos--) */ + } /* if (lstyle == 'U') */ + + if (lstyle == 'U') { + state->parsed_one = 1; + state->lstyle = lstyle; + + result->fe_cinfs = 0; + result->fe_type = '?'; + if (*tokens[0] == 'd' || *tokens[0] == 'l') + result->fe_type = *tokens[0]; + else if (*tokens[0] == 'D') + result->fe_type = 'd'; + else if (*tokens[0] == '-' || *tokens[0] == 'F') + result->fe_type = 'f'; /* (hopefully a regular file) */ + + if (result->fe_type != 'd') { + pos = toklen[tokmarker]; + if (pos > (sizeof(result->fe_size) - 1)) + pos = (sizeof(result->fe_size) - 1); + memcpy(result->fe_size, tokens[tokmarker], pos); + result->fe_size[pos] = '\0'; + } + + result->fe_time.tm_month = month_num; + result->fe_time.tm_mday = atoi(tokens[tokmarker + 2]); + if (result->fe_time.tm_mday == 0) result->fe_time.tm_mday++; + + p = tokens[tokmarker + 3]; + pos = (unsigned int)atoi(p); + if (p[1] == ':') /* one digit hour */ + p--; + if (p[2] != ':') /* year */ + { + result->fe_time.tm_year = pos; + } else { + result->fe_time.tm_hour = pos; + result->fe_time.tm_min = atoi(p + 3); + if (p[5] == ':') result->fe_time.tm_sec = atoi(p + 6); + + if (!state->now_time) { + state->now_time = nowTimeFn(); + PR_ExplodeTime((state->now_time), timeParam, &(state->now_tm)); + } + + result->fe_time.tm_year = state->now_tm.tm_year; + if (((state->now_tm.tm_month << 5) + state->now_tm.tm_mday) < + ((result->fe_time.tm_month << 5) + result->fe_time.tm_mday)) + result->fe_time.tm_year--; + + } /* time/year */ + + // The length of the whole date string should be 12. On AIX the length + // is only 11 when the year is present in the date string and there is + // 1 padding space at the end of the string. In both cases the filename + // starts at offset 13 from the start of the date string. + // Don't care about leading spaces when the date string has different + // format or when old Hellsoft output was detected. + { + const char* date_start = tokens[tokmarker + 1]; + const char* date_end = tokens[tokmarker + 3] + toklen[tokmarker + 3]; + if (!is_old_Hellsoft && + ((date_end - date_start) == 12 || + ((date_end - date_start) == 11 && date_end[1] == ' '))) + result->fe_fname = date_start + 13; + else + result->fe_fname = tokens[tokmarker + 4]; + } + + result->fe_fnlen = (&(line[linelen])) - (result->fe_fname); + + if (result->fe_type == 'l' && result->fe_fnlen > 4) { + /* First try to use result->fe_size to find " -> " sequence. + This can give proper result for cases like "aaa -> bbb -> ccc". */ + uintptr_t fe_size = atoi(result->fe_size); + CheckedInt<uintptr_t> arrow_start(result->fe_fnlen); + arrow_start -= fe_size; + arrow_start -= 4; + + if (arrow_start.isValid() && + PL_strncmp(result->fe_fname + arrow_start.value(), " -> ", 4) == + 0) { + result->fe_lname = result->fe_fname + (result->fe_fnlen - fe_size); + result->fe_lnlen = (&(line[linelen])) - (result->fe_lname); + result->fe_fnlen = arrow_start.value(); + } else { + /* Search for sequence " -> " from the end for case when there are + more occurrences. F.e. if ftpd returns "a -> b -> c" assume + "a -> b" as a name. Powerusers can remove unnecessary parts + manually but there is no way to follow the link when some + essential part is missing. */ + p = result->fe_fname + (result->fe_fnlen - 5); + for (pos = (result->fe_fnlen - 5); pos > 0; pos--) { + if (PL_strncmp(p, " -> ", 4) == 0) { + result->fe_lname = p + 4; + result->fe_lnlen = (&(line[linelen])) - (result->fe_lname); + result->fe_fnlen = pos; + break; + } + p--; + } + } + } + +# if defined(SUPPORT_LSLF) /* some (very rare) servers return ls -lF */ + if (result->fe_fnlen > 1) { + p = result->fe_fname[result->fe_fnlen - 1]; + pos = result->fe_type; + if (pos == 'd') { + if (*p == '/') result->fe_fnlen--; /* directory */ + } else if (pos == 'l') { + if (*p == '@') result->fe_fnlen--; /* symlink */ + } else if (pos == 'f') { + if (*p == '*') result->fe_fnlen--; /* executable */ + } else if (*p == '=' || *p == '%' || *p == '|') { + result->fe_fnlen--; /* socket, whiteout, fifo */ + } + } +# endif + + /* the caller should do this (if dropping "." and ".." is desired) + if (result->fe_type == 'd' && result->fe_fname[0] == '.' && + (result->fe_fnlen == 1 || (result->fe_fnlen == 2 && + result->fe_fname[1] == '.'))) + return '?'; + */ + + return result->fe_type; + + } /* if (lstyle == 'U') */ + + } /* if (!lstyle && (!state->lstyle || state->lstyle == 'U')) */ +#endif + + /* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */ + +#if defined(SUPPORT_W16) /* 16bit Windows */ + if (!lstyle && + (!state->lstyle || + state->lstyle == 'w')) { /* old SuperTCP suite FTP server for Win3.1 */ + /* old NetManage Chameleon TCP/IP suite FTP server for Win3.1 */ + /* + * SuperTCP dirlist from the mirror.pl project + * mon/day/year separator may be '/' or '-'. + * . <DIR> 11-16-94 17:16 + * .. <DIR> 11-16-94 17:16 + * INSTALL <DIR> 11-16-94 17:17 + * CMT <DIR> 11-21-94 10:17 + * DESIGN1.DOC 11264 05-11-95 14:20 + * README.TXT 1045 05-10-95 11:01 + * WPKIT1.EXE 960338 06-21-95 17:01 + * CMT.CSV 0 07-06-95 14:56 + * + * Chameleon dirlist guessed from lynx + * . <DIR> Nov 16 1994 17:16 + * .. <DIR> Nov 16 1994 17:16 + * INSTALL <DIR> Nov 16 1994 17:17 + * CMT <DIR> Nov 21 1994 10:17 + * DESIGN1.DOC 11264 May 11 1995 14:20 A + * README.TXT 1045 May 10 1995 11:01 + * WPKIT1.EXE 960338 Jun 21 1995 17:01 R + * CMT.CSV 0 Jul 06 1995 14:56 RHA + */ + if (numtoks >= 4 && toklen[0] < 13 && + ((toklen[1] == 5 && *tokens[1] == '<') || IsAsciiDigit(*tokens[1]))) { + if (numtoks == 4 && (toklen[2] == 8 || toklen[2] == 9) && + (((tokens[2][2]) == '/' && (tokens[2][5]) == '/') || + ((tokens[2][2]) == '-' && (tokens[2][5]) == '-')) && + (toklen[3] == 4 || toklen[3] == 5) && + (tokens[3][toklen[3] - 3]) == ':' && IsAsciiDigit(tokens[2][0]) && + IsAsciiDigit(tokens[2][1]) && IsAsciiDigit(tokens[2][3]) && + IsAsciiDigit(tokens[2][4]) && IsAsciiDigit(tokens[2][6]) && + IsAsciiDigit(tokens[2][7]) && + (toklen[2] < 9 || IsAsciiDigit(tokens[2][8])) && + IsAsciiDigit(tokens[3][toklen[3] - 1]) && + IsAsciiDigit(tokens[3][toklen[3] - 2]) && + IsAsciiDigit(tokens[3][toklen[3] - 4]) && + IsAsciiDigit(*tokens[3])) { + lstyle = 'w'; + } else if ((numtoks == 6 || numtoks == 7) && toklen[2] == 3 && + toklen[3] == 2 && toklen[4] == 4 && toklen[5] == 5 && + (tokens[5][2]) == ':' && IsAsciiAlpha(tokens[2][0]) && + IsAsciiAlpha(tokens[2][1]) && IsAsciiAlpha(tokens[2][2]) && + IsAsciiDigit(tokens[3][0]) && IsAsciiDigit(tokens[3][1]) && + IsAsciiDigit(tokens[4][0]) && IsAsciiDigit(tokens[4][1]) && + IsAsciiDigit(tokens[4][2]) && IsAsciiDigit(tokens[4][3]) && + IsAsciiDigit(tokens[5][0]) && IsAsciiDigit(tokens[5][1]) && + IsAsciiDigit(tokens[5][3]) && IsAsciiDigit(tokens[5][4]) + /* could also check that (&(tokens[5][5]) - tokens[2]) == 17 + */ + ) { + lstyle = 'w'; + } + if (lstyle && state->lstyle != lstyle) /* first time */ + { + p = tokens[1]; + if (toklen[1] != 5 || p[0] != '<' || p[1] != 'D' || p[2] != 'I' || + p[3] != 'R' || p[4] != '>') { + for (pos = 0; lstyle && pos < toklen[1]; pos++) { + if (!IsAsciiDigit(*p++)) lstyle = 0; + } + } /* not <DIR> */ + } /* if (first time) */ + } /* if (numtoks == ...) */ + + if (lstyle == 'w') { + state->parsed_one = 1; + state->lstyle = lstyle; + + result->fe_cinfs = 1; + result->fe_fname = tokens[0]; + result->fe_fnlen = toklen[0]; + result->fe_type = 'd'; + + p = tokens[1]; + if (IsAsciiDigit(*p)) { + result->fe_type = 'f'; + pos = toklen[1]; + if (pos > (sizeof(result->fe_size) - 1)) + pos = sizeof(result->fe_size) - 1; + memcpy(result->fe_size, p, pos); + result->fe_size[pos] = '\0'; + } + + p = tokens[2]; + if (toklen[2] == 3) /* Chameleon */ + { + tbuf[0] = ToUpperCaseASCII(p[0]); + tbuf[1] = ToLowerCaseASCII(p[1]); + tbuf[2] = ToLowerCaseASCII(p[2]); + for (pos = 0; pos < (12 * 3); pos += 3) { + if (tbuf[0] == month_names[pos + 0] && + tbuf[1] == month_names[pos + 1] && + tbuf[2] == month_names[pos + 2]) { + result->fe_time.tm_month = pos / 3; + result->fe_time.tm_mday = atoi(tokens[3]); + result->fe_time.tm_year = atoi(tokens[4]); + break; + } + } + pos = 5; /* Chameleon toknum of date field */ + } else { + result->fe_time.tm_month = atoi(p + 0) - 1; + result->fe_time.tm_mday = atoi(p + 3); + result->fe_time.tm_year = atoi(p + 6); + FixupYear(&result->fe_time); /* SuperTCP */ + + pos = 3; /* SuperTCP toknum of date field */ + } + + result->fe_time.tm_hour = atoi(tokens[pos]); + result->fe_time.tm_min = atoi(&(tokens[pos][toklen[pos] - 2])); + + /* the caller should do this (if dropping "." and ".." is desired) + if (result->fe_type == 'd' && result->fe_fname[0] == '.' && + (result->fe_fnlen == 1 || (result->fe_fnlen == 2 && + result->fe_fname[1] == '.'))) + return '?'; + */ + + return result->fe_type; + } /* (lstyle == 'w') */ + + } /* if (!lstyle && (!state->lstyle || state->lstyle == 'w')) */ +#endif + + /* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */ + +#if defined(SUPPORT_DLS) /* dls -dtR */ + if (!lstyle && + (state->lstyle == 'D' || (!state->lstyle && state->numlines == 1))) + /* /bin/dls lines have to be immediately recognizable (first line) */ + { + /* I haven't seen an FTP server that delivers a /bin/dls listing, + * but can infer the format from the lynx and mirror.pl projects. + * Both formats are supported. + * + * Lynx says: + * README 763 Information about this server\0 + * bin/ - \0 + * etc/ = \0 + * ls-lR 0 \0 + * ls-lR.Z 3 \0 + * pub/ = Public area\0 + * usr/ - \0 + * morgan 14 -> ../real/morgan\0 + * TIMIT.mostlikely.Z\0 + * 79215 \0 + * + * mirror.pl says: + * filename: ^(\S*)\s+ + * size: (\-|\=|\d+)\s+ + * month/day: ((\w\w\w\s+\d+|\d+\s+\w\w\w)\s+ + * time/year: (\d+:\d+|\d\d\d\d))\s+ + * rest: (.+) + * + * README 763 Jul 11 21:05 Information about this server + * bin/ - Apr 28 1994 + * etc/ = 11 Jul 21:04 + * ls-lR 0 6 Aug 17:14 + * ls-lR.Z 3 05 Sep 1994 + * pub/ = Jul 11 21:04 Public area + * usr/ - Sep 7 09:39 + * morgan 14 Apr 18 09:39 -> ../real/morgan + * TIMIT.mostlikely.Z + * 79215 Jul 11 21:04 + */ + if (!state->lstyle && line[linelen - 1] == ':' && linelen >= 2 && + toklen[numtoks - 1] != 1) { + /* code in mirror.pl suggests that a listing may be preceded + * by a PWD line in the form "/some/dir/names/here:" + * but does not necessarily begin with '/'. *sigh* + */ + pos = 0; + p = line; + while (pos < (linelen - 1)) { + /* illegal (or extremely unusual) chars in a dirspec */ + if (*p == '<' || *p == '|' || *p == '>' || *p == '?' || *p == '*' || + *p == '\\') + break; + if (*p == '/' && pos < (linelen - 2) && p[1] == '/') break; + pos++; + p++; + } + if (pos == (linelen - 1)) { + state->lstyle = 'D'; + return '?'; + } + } + + if (!lstyle && numtoks >= 2) { + pos = 22; /* pos of (\d+|-|=) if this is not part of a multiline */ + if (state->lstyle && carry_buf_len) /* first is from previous line */ + pos = toklen[1] - 1; /* and is 'as-is' (may contain whitespace) */ + + if (linelen > pos) { + p = &line[pos]; + if ((*p == '-' || *p == '=' || IsAsciiDigit(*p)) && + ((linelen == (pos + 1)) || + (linelen >= (pos + 3) && p[1] == ' ' && p[2] == ' '))) { + tokmarker = 1; + if (!carry_buf_len) { + pos = 1; + while (pos < numtoks && (tokens[pos] + toklen[pos]) < (&line[23])) + pos++; + tokmarker = 0; + if ((tokens[pos] + toklen[pos]) == (&line[23])) tokmarker = pos; + } + if (tokmarker) { + lstyle = 'D'; + if (*tokens[tokmarker] == '-' || *tokens[tokmarker] == '=') { + if (toklen[tokmarker] != 1 || + (tokens[tokmarker - 1][toklen[tokmarker - 1] - 1]) != '/') + lstyle = 0; + } else { + for (pos = 0; lstyle && pos < toklen[tokmarker]; pos++) { + if (!IsAsciiDigit(tokens[tokmarker][pos])) lstyle = 0; + } + } + if (lstyle && !state->lstyle) /* first time */ + { + /* scan for illegal (or incredibly unusual) chars in fname */ + for (p = tokens[0]; + lstyle && + p < &(tokens[tokmarker - 1][toklen[tokmarker - 1]]); + p++) { + if (*p == '<' || *p == '|' || *p == '>' || *p == '?' || + *p == '*' || *p == '/' || *p == '\\') + lstyle = 0; + } + } + + } /* size token found */ + } /* expected chars behind expected size token */ + } /* if (linelen > pos) */ + } /* if (!lstyle && numtoks >= 2) */ + + if (!lstyle && state->lstyle == 'D' && !carry_buf_len) { + /* the filename of a multi-line entry can be identified + * correctly only if dls format had been previously established. + * This should always be true because there should be entries + * for '.' and/or '..' and/or CWD that precede the rest of the + * listing. + */ + pos = linelen; + if (pos > (sizeof(state->carry_buf) - 1)) + pos = sizeof(state->carry_buf) - 1; + memcpy(state->carry_buf, line, pos); + state->carry_buf_len = pos; + return '?'; + } + + if (lstyle == 'D') { + state->parsed_one = 1; + state->lstyle = lstyle; + + p = &(tokens[tokmarker - 1][toklen[tokmarker - 1]]); + result->fe_fname = tokens[0]; + result->fe_fnlen = p - tokens[0]; + result->fe_type = 'f'; + + if (result->fe_fname[result->fe_fnlen - 1] == '/') { + if (result->fe_lnlen == 1) + result->fe_type = '?'; + else { + result->fe_fnlen--; + result->fe_type = 'd'; + } + } else if (IsAsciiDigit(*tokens[tokmarker])) { + pos = toklen[tokmarker]; + if (pos > (sizeof(result->fe_size) - 1)) + pos = sizeof(result->fe_size) - 1; + memcpy(result->fe_size, tokens[tokmarker], pos); + result->fe_size[pos] = '\0'; + } + + if ((tokmarker + 3) < numtoks && + (&(tokens[numtoks - 1][toklen[numtoks - 1]]) - + tokens[tokmarker + 1]) >= (1 + 1 + 3 + 1 + 4)) { + pos = (tokmarker + 3); + p = tokens[pos]; + pos = toklen[pos]; + + if ((pos == 4 || pos == 5) && IsAsciiDigit(*p) && + IsAsciiDigit(p[pos - 1]) && IsAsciiDigit(p[pos - 2]) && + ((pos == 5 && p[2] == ':') || + (pos == 4 && (IsAsciiDigit(p[1]) || p[1] == ':')))) { + month_num = tokmarker + 1; /* assumed position of month field */ + pos = tokmarker + 2; /* assumed position of mday field */ + if (IsAsciiDigit(*tokens[month_num])) /* positions are reversed */ + { + month_num++; + pos--; + } + p = tokens[month_num]; + if (IsAsciiDigit(*tokens[pos]) && + (toklen[pos] == 1 || + (toklen[pos] == 2 && IsAsciiDigit(tokens[pos][1]))) && + toklen[month_num] == 3 && IsAsciiAlpha(*p) && + IsAsciiAlpha(p[1]) && IsAsciiAlpha(p[2])) { + pos = atoi(tokens[pos]); + if (pos > 0 && pos <= 31) { + result->fe_time.tm_mday = pos; + month_num = 1; + for (pos = 0; pos < (12 * 3); pos += 3) { + if (p[0] == month_names[pos + 0] && + p[1] == month_names[pos + 1] && + p[2] == month_names[pos + 2]) + break; + month_num++; + } + if (month_num > 12) + result->fe_time.tm_mday = 0; + else + result->fe_time.tm_month = month_num - 1; + } + } + if (result->fe_time.tm_mday) { + tokmarker += 3; /* skip mday/mon/yrtime (to find " -> ") */ + p = tokens[tokmarker]; + + pos = atoi(p); + if (pos > 24) + result->fe_time.tm_year = pos; + else { + if (p[1] == ':') p--; + result->fe_time.tm_hour = pos; + result->fe_time.tm_min = atoi(p + 3); + if (!state->now_time) { + state->now_time = nowTimeFn(); + PR_ExplodeTime((state->now_time), timeParam, + &(state->now_tm)); + } + result->fe_time.tm_year = state->now_tm.tm_year; + if (((state->now_tm.tm_month << 4) + state->now_tm.tm_mday) < + ((result->fe_time.tm_month << 4) + result->fe_time.tm_mday)) + result->fe_time.tm_year--; + } /* got year or time */ + } /* got month/mday */ + } /* may have year or time */ + } /* enough remaining to possibly have date/time */ + + if (numtoks > (tokmarker + 2)) { + pos = tokmarker + 1; + p = tokens[pos]; + if (toklen[pos] == 2 && *p == '-' && p[1] == '>') { + p = &(tokens[numtoks - 1][toklen[numtoks - 1]]); + result->fe_type = 'l'; + result->fe_lname = tokens[pos + 1]; + result->fe_lnlen = p - result->fe_lname; + if (result->fe_lnlen > 1 && + result->fe_lname[result->fe_lnlen - 1] == '/') + result->fe_lnlen--; + } + } /* if (numtoks > (tokmarker+2)) */ + + /* the caller should do this (if dropping "." and ".." is desired) + if (result->fe_type == 'd' && result->fe_fname[0] == '.' && + (result->fe_fnlen == 1 || (result->fe_fnlen == 2 && + result->fe_fname[1] == '.'))) + return '?'; + */ + + return result->fe_type; + + } /* if (lstyle == 'D') */ + } /* if (!lstyle && (!state->lstyle || state->lstyle == 'D')) */ +#endif + + /* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */ + + } /* if (linelen > 0) */ + + return ParsingFailed(state); +} diff --git a/netwerk/streamconv/converters/ParseFTPList.h b/netwerk/streamconv/converters/ParseFTPList.h new file mode 100644 index 0000000000..610db04d31 --- /dev/null +++ b/netwerk/streamconv/converters/ParseFTPList.h @@ -0,0 +1,102 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef ParseRTPList_h___ +#define ParseRTPList_h___ + +#include <stdint.h> +#include <string.h> +#include "prtime.h" + +/* ParseFTPList() parses lines from an FTP LIST command. +** +** Written July 2002 by Cyrus Patel <cyp@fb14.uni-mainz.de> +** with acknowledgements to squid, lynx, wget and ftpmirror. +** +** Arguments: +** 'line': line of FTP data connection output. The line is assumed +** to end at the first '\0' or '\n' or '\r\n'. +** 'state': a structure used internally to track state between +** lines. Needs to be bzero()'d at LIST begin. +** 'result': where ParseFTPList will store the results of the parse +** if 'line' is not a comment and is not junk. +** +** Returns one of the following: +** 'd' - LIST line is a directory entry ('result' is valid) +** 'f' - LIST line is a file's entry ('result' is valid) +** 'l' - LIST line is a symlink's entry ('result' is valid) +** '?' - LIST line is junk. (cwd, non-file/dir/link, etc) +** '"' - its not a LIST line (its a "comment") +** +** It may be advisable to let the end-user see "comments" (particularly when +** the listing results in ONLY such lines) because such a listing may be: +** - an unknown LIST format (NLST or "custom" format for example) +** - an error msg (EPERM,ENOENT,ENFILE,EMFILE,ENOTDIR,ENOTBLK,EEXDEV etc). +** - an empty directory and the 'comment' is a "total 0" line or similar. +** (warning: a "total 0" can also mean the total size is unknown). +** +** ParseFTPList() supports all known FTP LISTing formats: +** - '/bin/ls -l' and all variants (including Hellsoft FTP for NetWare); +** - EPLF (Easily Parsable List Format); +** - Windows NT's default "DOS-dirstyle"; +** - OS/2 basic server format LIST format; +** - VMS (MultiNet, UCX, and CMU) LIST format (including multi-line format); +** - IBM VM/CMS, VM/ESA LIST format (two known variants); +** - SuperTCP FTP Server for Win16 LIST format; +** - NetManage Chameleon (NEWT) for Win16 LIST format; +** - '/bin/dls' (two known variants, plus multi-line) LIST format; +** If there are others, then I'd like to hear about them (send me a sample). +** +** NLSTings are not supported explicitely because they cannot be machine +** parsed consistently: NLSTings do not have unique characteristics - even +** the assumption that there won't be whitespace on the line does not hold +** because some nlistings have more than one filename per line and/or +** may have filenames that have spaces in them. Moreover, distinguishing +** between an error message and an NLST line would require ParseList() to +** recognize all the possible strerror() messages in the world. +*/ + +/* #undef anything you don't want to support */ +#define SUPPORT_LSL /* /bin/ls -l and dozens of variations therof */ +#define SUPPORT_DLS /* /bin/dls format (very, Very, VERY rare) */ +#define SUPPORT_EPLF /* Extraordinarily Pathetic List Format */ +#define SUPPORT_DOS /* WinNT server in 'site dirstyle' dos */ +#define SUPPORT_VMS /* VMS (all: MultiNet, UCX, CMU-IP) */ +#define SUPPORT_CMS /* IBM VM/CMS,VM/ESA (z/VM and LISTING forms) */ +#define SUPPORT_OS2 /* IBM TCP/IP for OS/2 - FTP Server */ +#define SUPPORT_W16 /* win16 hosts: SuperTCP or NetManage Chameleon */ + +struct list_state { + list_state() { memset(this, 0, sizeof(*this)); } + + PRTime now_time; /* needed for year determination */ + PRExplodedTime now_tm; /* needed for year determination */ + int32_t lstyle; /* LISTing style */ + int32_t parsed_one; /* returned anything yet? */ + char carry_buf[84]; /* for VMS multiline */ + uint32_t carry_buf_len; /* length of name in carry_buf */ + uint32_t numlines; /* number of lines seen */ +}; + +struct list_result { + int32_t fe_type; /* 'd'(dir) or 'l'(link) or 'f'(file) */ + const char* fe_fname; /* pointer to filename */ + uint32_t fe_fnlen; /* length of filename */ + const char* fe_lname; /* pointer to symlink name */ + uint32_t fe_lnlen; /* length of symlink name */ + char fe_size[40]; /* size of file in bytes (<= (2^128 - 1)) */ + PRExplodedTime fe_time; /* last-modified time */ + int32_t fe_cinfs; /* file system is definitely case insensitive */ + /* (converting all-upcase names may be desirable) */ +}; + +typedef PRTime (*NowTimeFn)(); + +int ParseFTPList(const char* line, struct list_state* state, + struct list_result* result, + PRTimeParamFn timeParam = PR_LocalTimeParameters, + NowTimeFn nowTimeFn = PR_Now); + +#endif /* !ParseRTPList_h___ */ diff --git a/netwerk/streamconv/converters/moz.build b/netwerk/streamconv/converters/moz.build new file mode 100644 index 0000000000..46b1d4a4dc --- /dev/null +++ b/netwerk/streamconv/converters/moz.build @@ -0,0 +1,32 @@ +# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*- +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +XPIDL_SOURCES += ["nsICompressConvStats.idl"] + +EXPORTS += [ + "nsUnknownDecoder.h", +] + +XPIDL_MODULE = "necko_http" + +UNIFIED_SOURCES += [ + "mozTXTToHTMLConv.cpp", + "nsDirIndex.cpp", + "nsDirIndexParser.cpp", + "nsFTPDirListingConv.cpp", + "nsHTTPCompressConv.cpp", + "nsIndexedToHTML.cpp", + "nsMultiMixedConv.cpp", + "nsUnknownDecoder.cpp", + "ParseFTPList.cpp", +] + +FINAL_LIBRARY = "xul" + +LOCAL_INCLUDES += [ + "/modules/brotli/dec", + "/netwerk/base", +] diff --git a/netwerk/streamconv/converters/mozTXTToHTMLConv.cpp b/netwerk/streamconv/converters/mozTXTToHTMLConv.cpp new file mode 100644 index 0000000000..1ab51adb82 --- /dev/null +++ b/netwerk/streamconv/converters/mozTXTToHTMLConv.cpp @@ -0,0 +1,1260 @@ +/* -*- Mode: C; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "mozilla/TextUtils.h" +#include "mozTXTToHTMLConv.h" +#include "nsNetUtil.h" +#include "nsUnicharUtils.h" +#include "nsUnicodeProperties.h" +#include "nsCRT.h" +#include "nsIExternalProtocolHandler.h" +#include "nsIURI.h" + +#include <algorithm> + +#ifdef DEBUG_BenB_Perf +# include "prtime.h" +# include "prinrval.h" +#endif + +using mozilla::IsAscii; +using mozilla::IsAsciiAlpha; +using mozilla::IsAsciiDigit; + +const double growthRate = 1.2; + +// Bug 183111, editor now replaces multiple spaces with leading +// 0xA0's and a single ending space, so need to treat 0xA0's as spaces. +// 0xA0 is the Latin1/Unicode character for "non-breaking space (nbsp)" +// Also recognize the Japanese ideographic space 0x3000 as a space. +static inline bool IsSpace(const char16_t aChar) { + return (nsCRT::IsAsciiSpace(aChar) || aChar == 0xA0 || aChar == 0x3000); +} + +// Escape Char will take ch, escape it and append the result to +// aStringToAppendTo +void mozTXTToHTMLConv::EscapeChar(const char16_t ch, + nsAString& aStringToAppendTo, + bool inAttribute) { + switch (ch) { + case '<': + aStringToAppendTo.AppendLiteral("<"); + break; + case '>': + aStringToAppendTo.AppendLiteral(">"); + break; + case '&': + aStringToAppendTo.AppendLiteral("&"); + break; + case '"': + if (inAttribute) { + aStringToAppendTo.AppendLiteral("""); + break; + } + // else fall through + [[fallthrough]]; + default: + aStringToAppendTo += ch; + } +} + +// EscapeStr takes the passed in string and +// escapes it IN PLACE. +void mozTXTToHTMLConv::EscapeStr(nsString& aInString, bool inAttribute) { + // the replace substring routines + // don't seem to work if you have a character + // in the in string that is also in the replacement + // string! =( + // aInString.ReplaceSubstring("&", "&"); + // aInString.ReplaceSubstring("<", "<"); + // aInString.ReplaceSubstring(">", ">"); + for (uint32_t i = 0; i < aInString.Length();) { + switch (aInString[i]) { + case '<': + aInString.Cut(i, 1); + aInString.InsertLiteral(u"<", i); + i += 4; // skip past the integers we just added + break; + case '>': + aInString.Cut(i, 1); + aInString.InsertLiteral(u">", i); + i += 4; // skip past the integers we just added + break; + case '&': + aInString.Cut(i, 1); + aInString.InsertLiteral(u"&", i); + i += 5; // skip past the integers we just added + break; + case '"': + if (inAttribute) { + aInString.Cut(i, 1); + aInString.InsertLiteral(u""", i); + i += 6; + break; + } + // else fall through + [[fallthrough]]; + default: + i++; + } + } +} + +void mozTXTToHTMLConv::UnescapeStr(const char16_t* aInString, int32_t aStartPos, + int32_t aLength, nsString& aOutString) { + const char16_t* subString = nullptr; + for (uint32_t i = aStartPos; int32_t(i) - aStartPos < aLength;) { + int32_t remainingChars = i - aStartPos; + if (aInString[i] == '&') { + subString = &aInString[i]; + if (!NS_strncmp(subString, u"<", + std::min(4, aLength - remainingChars))) { + aOutString.Append(char16_t('<')); + i += 4; + } else if (!NS_strncmp(subString, u">", + std::min(4, aLength - remainingChars))) { + aOutString.Append(char16_t('>')); + i += 4; + } else if (!NS_strncmp(subString, u"&", + std::min(5, aLength - remainingChars))) { + aOutString.Append(char16_t('&')); + i += 5; + } else if (!NS_strncmp(subString, u""", + std::min(6, aLength - remainingChars))) { + aOutString.Append(char16_t('"')); + i += 6; + } else { + aOutString += aInString[i]; + i++; + } + } else { + aOutString += aInString[i]; + i++; + } + } +} + +void mozTXTToHTMLConv::CompleteAbbreviatedURL(const char16_t* aInString, + int32_t aInLength, + const uint32_t pos, + nsString& aOutString) { + NS_ASSERTION(int32_t(pos) < aInLength, + "bad args to CompleteAbbreviatedURL, see bug #190851"); + if (int32_t(pos) >= aInLength) return; + + if (aInString[pos] == '@') { + // only pre-pend a mailto url if the string contains a .domain in it.. + // i.e. we want to linkify johndoe@foo.com but not "let's meet @8pm" + nsDependentString inString(aInString, aInLength); + if (inString.FindChar('.', pos) != + kNotFound) // if we have a '.' after the @ sign.... + { + aOutString.AssignLiteral("mailto:"); + aOutString += aInString; + } + } else if (aInString[pos] == '.') { + if (ItMatchesDelimited(aInString, aInLength, u"www.", 4, LT_IGNORE, + LT_IGNORE)) { + aOutString.AssignLiteral("http://"); + aOutString += aInString; + } else if (ItMatchesDelimited(aInString, aInLength, u"ftp.", 4, LT_IGNORE, + LT_IGNORE)) { + aOutString.AssignLiteral("ftp://"); + aOutString += aInString; + } + } +} + +bool mozTXTToHTMLConv::FindURLStart(const char16_t* aInString, + int32_t aInLength, const uint32_t pos, + const modetype check, uint32_t& start) { + switch (check) { // no breaks, because end of blocks is never reached + case RFC1738: { + if (!NS_strncmp(&aInString[std::max(int32_t(pos - 4), 0)], u"<URL:", 5)) { + start = pos + 1; + return true; + } + return false; + } + case RFC2396E: { + nsString temp(aInString, aInLength); + int32_t i = pos <= 0 ? kNotFound : temp.RFindCharInSet(u"<>\"", pos - 1); + if (i != kNotFound && + (temp[uint32_t(i)] == '<' || temp[uint32_t(i)] == '"')) { + start = uint32_t(++i); + return start < pos; + } + return false; + } + case freetext: { + int32_t i = pos - 1; + for (; i >= 0 && + (IsAsciiAlpha(aInString[uint32_t(i)]) || + IsAsciiDigit(aInString[uint32_t(i)]) || + aInString[uint32_t(i)] == '+' || aInString[uint32_t(i)] == '-' || + aInString[uint32_t(i)] == '.'); + i--) + ; + if (++i >= 0 && uint32_t(i) < pos && + IsAsciiAlpha(aInString[uint32_t(i)])) { + start = uint32_t(i); + return true; + } + return false; + } + case abbreviated: { + int32_t i = pos - 1; + // This disallows non-ascii-characters for email. + // Currently correct, but revisit later after standards changed. + bool isEmail = aInString[pos] == (char16_t)'@'; + // These chars mark the start of the URL + for (; i >= 0 && aInString[uint32_t(i)] != '>' && + aInString[uint32_t(i)] != '<' && aInString[uint32_t(i)] != '"' && + aInString[uint32_t(i)] != '\'' && aInString[uint32_t(i)] != '`' && + aInString[uint32_t(i)] != ',' && aInString[uint32_t(i)] != '{' && + aInString[uint32_t(i)] != '[' && aInString[uint32_t(i)] != '(' && + aInString[uint32_t(i)] != '|' && aInString[uint32_t(i)] != '\\' && + !IsSpace(aInString[uint32_t(i)]) && + (!isEmail || IsAscii(aInString[uint32_t(i)])) && + (!isEmail || aInString[uint32_t(i)] != ')'); + i--) + ; + if (++i >= 0 && uint32_t(i) < pos && + (IsAsciiAlpha(aInString[uint32_t(i)]) || + IsAsciiDigit(aInString[uint32_t(i)]))) { + start = uint32_t(i); + return true; + } + return false; + } + default: + return false; + } // switch +} + +bool mozTXTToHTMLConv::FindURLEnd(const char16_t* aInString, + int32_t aInStringLength, const uint32_t pos, + const modetype check, const uint32_t start, + uint32_t& end) { + switch (check) { // no breaks, because end of blocks is never reached + case RFC1738: + case RFC2396E: { + nsString temp(aInString, aInStringLength); + + int32_t i = temp.FindCharInSet(u"<>\"", pos + 1); + if (i != kNotFound && + temp[uint32_t(i--)] == + (check == RFC1738 || temp[start - 1] == '<' ? '>' : '"')) { + end = uint32_t(i); + return end > pos; + } + return false; + } + case freetext: + case abbreviated: { + uint32_t i = pos + 1; + bool isEmail = aInString[pos] == (char16_t)'@'; + bool seenOpeningParenthesis = false; // there is a '(' earlier in the URL + bool seenOpeningSquareBracket = + false; // there is a '[' earlier in the URL + for (; int32_t(i) < aInStringLength; i++) { + // These chars mark the end of the URL + if (aInString[i] == '>' || aInString[i] == '<' || aInString[i] == '"' || + aInString[i] == '`' || aInString[i] == '}' || aInString[i] == '{' || + (aInString[i] == ')' && !seenOpeningParenthesis) || + (aInString[i] == ']' && !seenOpeningSquareBracket) || + // Allow IPv6 adresses like http://[1080::8:800:200C:417A]/foo. + (aInString[i] == '[' && i > 2 && + (aInString[i - 1] != '/' || aInString[i - 2] != '/')) || + IsSpace(aInString[i])) + break; + // Disallow non-ascii-characters for email. + // Currently correct, but revisit later after standards changed. + if (isEmail && (aInString[i] == '(' || aInString[i] == '\'' || + !IsAscii(aInString[i]))) + break; + if (aInString[i] == '(') seenOpeningParenthesis = true; + if (aInString[i] == '[') seenOpeningSquareBracket = true; + } + // These chars are allowed in the middle of the URL, but not at end. + // Technically they are, but are used in normal text after the URL. + while (--i > pos && (aInString[i] == '.' || aInString[i] == ',' || + aInString[i] == ';' || aInString[i] == '!' || + aInString[i] == '?' || aInString[i] == '-' || + aInString[i] == ':' || aInString[i] == '\'')) + ; + if (i > pos) { + end = i; + return true; + } + return false; + } + default: + return false; + } // switch +} + +void mozTXTToHTMLConv::CalculateURLBoundaries( + const char16_t* aInString, int32_t aInStringLength, const uint32_t pos, + const uint32_t whathasbeendone, const modetype check, const uint32_t start, + const uint32_t end, nsString& txtURL, nsString& desc, + int32_t& replaceBefore, int32_t& replaceAfter) { + uint32_t descstart = start; + switch (check) { + case RFC1738: { + descstart = start - 5; + desc.Append(&aInString[descstart], + end - descstart + 2); // include "<URL:" and ">" + replaceAfter = end - pos + 1; + } break; + case RFC2396E: { + descstart = start - 1; + desc.Append(&aInString[descstart], + end - descstart + 2); // include brackets + replaceAfter = end - pos + 1; + } break; + case freetext: + case abbreviated: { + descstart = start; + desc.Append(&aInString[descstart], + end - start + 1); // don't include brackets + replaceAfter = end - pos; + } break; + default: + break; + } // switch + + EscapeStr(desc, false); + + txtURL.Append(&aInString[start], end - start + 1); + txtURL.StripWhitespace(); + + // FIX ME + nsAutoString temp2; + ScanTXT(nsDependentSubstring(&aInString[descstart], pos - descstart), + ~kURLs /*prevents loop*/ & whathasbeendone, temp2); + replaceBefore = temp2.Length(); +} + +bool mozTXTToHTMLConv::ShouldLinkify(const nsCString& aURL) { + if (!mIOService) return false; + + nsAutoCString scheme; + nsresult rv = mIOService->ExtractScheme(aURL, scheme); + if (NS_FAILED(rv)) return false; + + if (scheme == "http" || scheme == "https" || scheme == "mailto") { + return true; + } + + // Get the handler for this scheme. + nsCOMPtr<nsIProtocolHandler> handler; + rv = mIOService->GetProtocolHandler(scheme.get(), getter_AddRefs(handler)); + if (NS_FAILED(rv)) return false; + + // Is it an external protocol handler? If not, linkify it. + nsCOMPtr<nsIExternalProtocolHandler> externalHandler = + do_QueryInterface(handler); + if (!externalHandler) return true; // handler is built-in, linkify it! + + // If external app exists for the scheme then linkify it. + bool exists; + rv = externalHandler->ExternalAppExistsForScheme(scheme, &exists); + return (NS_SUCCEEDED(rv) && exists); +} + +bool mozTXTToHTMLConv::CheckURLAndCreateHTML(const nsString& txtURL, + const nsString& desc, + const modetype mode, + nsString& outputHTML) { + // Create *uri from txtURL + nsCOMPtr<nsIURI> uri; + nsresult rv; + // Lazily initialize mIOService + if (!mIOService) { + mIOService = do_GetIOService(); + + if (!mIOService) return false; + } + + // See if the url should be linkified. + NS_ConvertUTF16toUTF8 utf8URL(txtURL); + if (!ShouldLinkify(utf8URL)) return false; + + // it would be faster if we could just check to see if there is a protocol + // handler for the url and return instead of actually trying to create a + // url... + rv = mIOService->NewURI(utf8URL, nullptr, nullptr, getter_AddRefs(uri)); + + // Real work + if (NS_SUCCEEDED(rv) && uri) { + outputHTML.AssignLiteral("<a class=\"moz-txt-link-"); + switch (mode) { + case RFC1738: + outputHTML.AppendLiteral("rfc1738"); + break; + case RFC2396E: + outputHTML.AppendLiteral("rfc2396E"); + break; + case freetext: + outputHTML.AppendLiteral("freetext"); + break; + case abbreviated: + outputHTML.AppendLiteral("abbreviated"); + break; + default: + break; + } + nsAutoString escapedURL(txtURL); + EscapeStr(escapedURL, true); + + outputHTML.AppendLiteral("\" href=\""); + outputHTML += escapedURL; + outputHTML.AppendLiteral("\">"); + outputHTML += desc; + outputHTML.AppendLiteral("</a>"); + return true; + } + return false; +} + +NS_IMETHODIMP mozTXTToHTMLConv::FindURLInPlaintext(const char16_t* aInString, + int32_t aInLength, + int32_t aPos, + int32_t* aStartPos, + int32_t* aEndPos) { + // call FindURL on the passed in string + nsAutoString outputHTML; // we'll ignore the generated output HTML + + *aStartPos = -1; + *aEndPos = -1; + + FindURL(aInString, aInLength, aPos, kURLs, outputHTML, *aStartPos, *aEndPos); + + return NS_OK; +} + +bool mozTXTToHTMLConv::FindURL(const char16_t* aInString, int32_t aInLength, + const uint32_t pos, + const uint32_t whathasbeendone, + nsString& outputHTML, int32_t& replaceBefore, + int32_t& replaceAfter) { + enum statetype { unchecked, invalid, startok, endok, success }; + static const modetype ranking[] = {RFC1738, RFC2396E, freetext, abbreviated}; + + statetype state[mozTXTToHTMLConv_lastMode + 1]; // 0(=unknown)..lastMode + /* I don't like this abuse of enums as index for the array, + but I don't know a better method */ + + // Define, which modes to check + /* all modes but abbreviated are checked for text[pos] == ':', + only abbreviated for '.', RFC2396E and abbreviated for '@' */ + for (modetype iState = unknown; iState <= mozTXTToHTMLConv_lastMode; + iState = modetype(iState + 1)) + state[iState] = aInString[pos] == ':' ? unchecked : invalid; + switch (aInString[pos]) { + case '@': + state[RFC2396E] = unchecked; + [[fallthrough]]; + case '.': + state[abbreviated] = unchecked; + break; + case ':': + state[abbreviated] = invalid; + break; + default: + break; + } + + // Test, first successful mode wins, sequence defined by |ranking| + int32_t iCheck = 0; // the currently tested modetype + modetype check = ranking[iCheck]; + for (; iCheck < mozTXTToHTMLConv_numberOfModes && state[check] != success; + iCheck++) + /* check state from last run. + If this is the first, check this one, which isn't = success yet */ + { + check = ranking[iCheck]; + + uint32_t start, end; + + if (state[check] == unchecked) + if (FindURLStart(aInString, aInLength, pos, check, start)) + state[check] = startok; + + if (state[check] == startok) + if (FindURLEnd(aInString, aInLength, pos, check, start, end)) + state[check] = endok; + + if (state[check] == endok) { + nsAutoString txtURL, desc; + int32_t resultReplaceBefore, resultReplaceAfter; + + CalculateURLBoundaries(aInString, aInLength, pos, whathasbeendone, check, + start, end, txtURL, desc, resultReplaceBefore, + resultReplaceAfter); + + if (aInString[pos] != ':') { + nsAutoString temp = txtURL; + txtURL.SetLength(0); + CompleteAbbreviatedURL(temp.get(), temp.Length(), pos - start, txtURL); + } + + if (!txtURL.IsEmpty() && + CheckURLAndCreateHTML(txtURL, desc, check, outputHTML)) { + replaceBefore = resultReplaceBefore; + replaceAfter = resultReplaceAfter; + state[check] = success; + } + } // if + } // for + return state[check] == success; +} + +static inline bool IsAlpha(const uint32_t aChar) { + return mozilla::unicode::GetGenCategory(aChar) == nsUGenCategory::kLetter; +} + +static inline bool IsDigit(const uint32_t aChar) { + return mozilla::unicode::GetGenCategory(aChar) == nsUGenCategory::kNumber; +} + +bool mozTXTToHTMLConv::ItMatchesDelimited(const char16_t* aInString, + int32_t aInLength, + const char16_t* rep, int32_t aRepLen, + LIMTYPE before, LIMTYPE after) { + // this little method gets called a LOT. I found we were spending a + // lot of time just calculating the length of the variable "rep" + // over and over again every time we called it. So we're now passing + // an integer in here. + int32_t textLen = aInLength; + + if (((before == LT_IGNORE && (after == LT_IGNORE || after == LT_DELIMITER)) && + textLen < aRepLen) || + ((before != LT_IGNORE || (after != LT_IGNORE && after != LT_DELIMITER)) && + textLen < aRepLen + 1) || + (before != LT_IGNORE && after != LT_IGNORE && after != LT_DELIMITER && + textLen < aRepLen + 2)) + return false; + + uint32_t text0 = aInString[0]; + if (aInLength > 1 && NS_IS_SURROGATE_PAIR(text0, aInString[1])) { + text0 = SURROGATE_TO_UCS4(text0, aInString[1]); + } + // find length of the char/cluster to be ignored + int32_t ignoreLen = before == LT_IGNORE ? 0 : 1; + if (ignoreLen) { + mozilla::unicode::ClusterIterator ci(aInString, aInLength); + ci.Next(); + ignoreLen = ci - aInString; + } + + int32_t afterIndex = aRepLen + ignoreLen; + uint32_t textAfterPos = aInString[afterIndex]; + if (aInLength > afterIndex + 1 && + NS_IS_SURROGATE_PAIR(textAfterPos, aInString[afterIndex + 1])) { + textAfterPos = SURROGATE_TO_UCS4(textAfterPos, aInString[afterIndex + 1]); + } + + if ((before == LT_ALPHA && !IsAlpha(text0)) || + (before == LT_DIGIT && !IsDigit(text0)) || + (before == LT_DELIMITER && + (IsAlpha(text0) || IsDigit(text0) || text0 == *rep)) || + (after == LT_ALPHA && !IsAlpha(textAfterPos)) || + (after == LT_DIGIT && !IsDigit(textAfterPos)) || + (after == LT_DELIMITER && + (IsAlpha(textAfterPos) || IsDigit(textAfterPos) || + textAfterPos == *rep)) || + !Substring(Substring(aInString, aInString + aInLength), ignoreLen, + aRepLen) + .Equals(Substring(rep, rep + aRepLen), + nsCaseInsensitiveStringComparator)) + return false; + + return true; +} + +uint32_t mozTXTToHTMLConv::NumberOfMatches(const char16_t* aInString, + int32_t aInStringLength, + const char16_t* rep, int32_t aRepLen, + LIMTYPE before, LIMTYPE after) { + uint32_t result = 0; + + const char16_t* end = aInString + aInStringLength; + for (mozilla::unicode::ClusterIterator ci(aInString, aInStringLength); + !ci.AtEnd(); ci.Next()) { + if (ItMatchesDelimited(ci, end - ci, rep, aRepLen, before, after)) { + result++; + } + } + return result; +} + +// NOTE: the converted html for the phrase is appended to aOutString +// tagHTML and attributeHTML are plain ASCII (literal strings, in fact) +bool mozTXTToHTMLConv::StructPhraseHit( + const char16_t* aInString, int32_t aInStringLength, bool col0, + const char16_t* tagTXT, int32_t aTagTXTLen, const char* tagHTML, + const char* attributeHTML, nsAString& aOutString, uint32_t& openTags) { + /* We're searching for the following pattern: + LT_DELIMITER - "*" - ALPHA - + [ some text (maybe more "*"-pairs) - ALPHA ] "*" - LT_DELIMITER. + <strong> is only inserted, if existence of a pair could be verified + We use the first opening/closing tag, if we can choose */ + + const char16_t* newOffset = aInString; + int32_t newLength = aInStringLength; + if (!col0) // skip the first element? + { + newOffset = &aInString[1]; + newLength = aInStringLength - 1; + } + + // opening tag + if (ItMatchesDelimited(aInString, aInStringLength, tagTXT, aTagTXTLen, + (col0 ? LT_IGNORE : LT_DELIMITER), + LT_ALPHA) // is opening tag + && NumberOfMatches(newOffset, newLength, tagTXT, aTagTXTLen, LT_ALPHA, + LT_DELIMITER) // remaining closing tags + > openTags) { + openTags++; + aOutString.Append('<'); + aOutString.AppendASCII(tagHTML); + aOutString.Append(char16_t(' ')); + aOutString.AppendASCII(attributeHTML); + aOutString.AppendLiteral("><span class=\"moz-txt-tag\">"); + aOutString.Append(tagTXT); + aOutString.AppendLiteral("</span>"); + return true; + } + + // closing tag + else if (openTags > 0 && + ItMatchesDelimited(aInString, aInStringLength, tagTXT, aTagTXTLen, + LT_ALPHA, LT_DELIMITER)) { + openTags--; + aOutString.AppendLiteral("<span class=\"moz-txt-tag\">"); + aOutString.Append(tagTXT); + aOutString.AppendLiteral("</span></"); + aOutString.AppendASCII(tagHTML); + aOutString.Append(char16_t('>')); + return true; + } + + return false; +} + +bool mozTXTToHTMLConv::SmilyHit(const char16_t* aInString, int32_t aLength, + bool col0, const char* tagTXT, + const char* imageName, nsString& outputHTML, + int32_t& glyphTextLen) { + if (!aInString || !tagTXT || !imageName) return false; + + int32_t tagLen = strlen(tagTXT); + + uint32_t delim = (col0 ? 0 : 1) + tagLen; + + if ((col0 || IsSpace(aInString[0])) && + (aLength <= int32_t(delim) || IsSpace(aInString[delim]) || + (aLength > int32_t(delim + 1) && + (aInString[delim] == '.' || aInString[delim] == ',' || + aInString[delim] == ';' || aInString[delim] == '8' || + aInString[delim] == '>' || aInString[delim] == '!' || + aInString[delim] == '?') && + IsSpace(aInString[delim + 1]))) && + ItMatchesDelimited(aInString, aLength, + NS_ConvertASCIItoUTF16(tagTXT).get(), tagLen, + col0 ? LT_IGNORE : LT_DELIMITER, LT_IGNORE) + // Note: tests at different pos for LT_IGNORE and LT_DELIMITER + ) { + if (!col0) { + outputHTML.Truncate(); + outputHTML.Append(char16_t(' ')); + } + + outputHTML.AppendLiteral("<span class=\""); // <span class=" + outputHTML.AppendASCII(imageName); // e.g. smiley-frown + outputHTML.AppendLiteral("\" title=\""); // " title=" + outputHTML.AppendASCII(tagTXT); // smiley tooltip + outputHTML.AppendLiteral("\"><span>"); // "><span> + outputHTML.AppendASCII(tagTXT); // original text + outputHTML.AppendLiteral("</span></span>"); // </span></span> + glyphTextLen = (col0 ? 0 : 1) + tagLen; + return true; + } + + return false; +} + +// the glyph is appended to aOutputString instead of the original string... +bool mozTXTToHTMLConv::GlyphHit(const char16_t* aInString, int32_t aInLength, + bool col0, nsAString& aOutputString, + int32_t& glyphTextLen) { + char16_t text0 = aInString[0]; + char16_t text1 = aInString[1]; + char16_t firstChar = (col0 ? text0 : text1); + + // temporary variable used to store the glyph html text + nsAutoString outputHTML; + bool bTestSmilie; + bool bArg = false; + int i; + + // refactor some of this mess to avoid code duplication and speed execution a + // bit there are two cases that need to be tried one after another. To avoid a + // lot of duplicate code, rolling into a loop + + i = 0; + while (i < 2) { + bTestSmilie = false; + if (!i && (firstChar == ':' || firstChar == ';' || firstChar == '=' || + firstChar == '>' || firstChar == '8' || firstChar == 'O')) { + // first test passed + + bTestSmilie = true; + bArg = col0; + } + if (i && col0 && + (text1 == ':' || text1 == ';' || text1 == '=' || text1 == '>' || + text1 == '8' || text1 == 'O')) { + // second test passed + + bTestSmilie = true; + bArg = false; + } + if (bTestSmilie && (SmilyHit(aInString, aInLength, bArg, ":-)", + "moz-smiley-s1", // smile + outputHTML, glyphTextLen) || + + SmilyHit(aInString, aInLength, bArg, ":)", + "moz-smiley-s1", // smile + outputHTML, glyphTextLen) || + + SmilyHit(aInString, aInLength, bArg, ":-D", + "moz-smiley-s5", // laughing + outputHTML, glyphTextLen) || + + SmilyHit(aInString, aInLength, bArg, ":-(", + "moz-smiley-s2", // frown + outputHTML, glyphTextLen) || + + SmilyHit(aInString, aInLength, bArg, ":(", + "moz-smiley-s2", // frown + outputHTML, glyphTextLen) || + + SmilyHit(aInString, aInLength, bArg, ":-[", + "moz-smiley-s6", // embarassed + outputHTML, glyphTextLen) || + + SmilyHit(aInString, aInLength, bArg, ";-)", + "moz-smiley-s3", // wink + outputHTML, glyphTextLen) || + + SmilyHit(aInString, aInLength, col0, ";)", + "moz-smiley-s3", // wink + outputHTML, glyphTextLen) || + + SmilyHit(aInString, aInLength, bArg, ":-\\", + "moz-smiley-s7", // undecided + outputHTML, glyphTextLen) || + + SmilyHit(aInString, aInLength, bArg, ":-P", + "moz-smiley-s4", // tongue + outputHTML, glyphTextLen) || + + SmilyHit(aInString, aInLength, bArg, ";-P", + "moz-smiley-s4", // tongue + outputHTML, glyphTextLen) || + + SmilyHit(aInString, aInLength, bArg, "=-O", + "moz-smiley-s8", // surprise + outputHTML, glyphTextLen) || + + SmilyHit(aInString, aInLength, bArg, ":-*", + "moz-smiley-s9", // kiss + outputHTML, glyphTextLen) || + + SmilyHit(aInString, aInLength, bArg, ">:o", + "moz-smiley-s10", // yell + outputHTML, glyphTextLen) || + + SmilyHit(aInString, aInLength, bArg, ">:-o", + "moz-smiley-s10", // yell + outputHTML, glyphTextLen) || + + SmilyHit(aInString, aInLength, bArg, "8-)", + "moz-smiley-s11", // cool + outputHTML, glyphTextLen) || + + SmilyHit(aInString, aInLength, bArg, ":-$", + "moz-smiley-s12", // money + outputHTML, glyphTextLen) || + + SmilyHit(aInString, aInLength, bArg, ":-!", + "moz-smiley-s13", // foot + outputHTML, glyphTextLen) || + + SmilyHit(aInString, aInLength, bArg, "O:-)", + "moz-smiley-s14", // innocent + outputHTML, glyphTextLen) || + + SmilyHit(aInString, aInLength, bArg, ":'(", + "moz-smiley-s15", // cry + outputHTML, glyphTextLen) || + + SmilyHit(aInString, aInLength, bArg, ":-X", + "moz-smiley-s16", // sealed + outputHTML, glyphTextLen))) { + aOutputString.Append(outputHTML); + return true; + } + i++; + } + if (text0 == '\f') { + aOutputString.AppendLiteral("<span class='moz-txt-formfeed'></span>"); + glyphTextLen = 1; + return true; + } + if (text0 == '+' || text1 == '+') { + if (ItMatchesDelimited(aInString, aInLength, u" +/-", 4, LT_IGNORE, + LT_IGNORE)) { + aOutputString.AppendLiteral(" ±"); + glyphTextLen = 4; + return true; + } + if (col0 && ItMatchesDelimited(aInString, aInLength, u"+/-", 3, LT_IGNORE, + LT_IGNORE)) { + aOutputString.AppendLiteral("±"); + glyphTextLen = 3; + return true; + } + } + + // x^2 => x<sup>2</sup>, also handle powers x^-2, x^0.5 + // implement regular expression /[\dA-Za-z\)\]}]\^-?\d+(\.\d+)*[^\dA-Za-z]/ + if (text1 == '^' && + (IsAsciiDigit(text0) || IsAsciiAlpha(text0) || text0 == ')' || + text0 == ']' || text0 == '}') && + ((2 < aInLength && IsAsciiDigit(aInString[2])) || + (3 < aInLength && aInString[2] == '-' && IsAsciiDigit(aInString[3])))) { + // Find first non-digit + int32_t delimPos = 3; // skip "^" and first digit (or '-') + for (; delimPos < aInLength && + (IsAsciiDigit(aInString[delimPos]) || + (aInString[delimPos] == '.' && delimPos + 1 < aInLength && + IsAsciiDigit(aInString[delimPos + 1]))); + delimPos++) + ; + + if (delimPos < aInLength && IsAsciiAlpha(aInString[delimPos])) { + return false; + } + + outputHTML.Truncate(); + outputHTML += text0; + outputHTML.AppendLiteral( + "<sup class=\"moz-txt-sup\">" + "<span style=\"display:inline-block;width:0;height:0;overflow:hidden\">" + "^</span>"); + + aOutputString.Append(outputHTML); + aOutputString.Append(&aInString[2], delimPos - 2); + aOutputString.AppendLiteral("</sup>"); + + glyphTextLen = delimPos /* - 1 + 1 */; + return true; + } + /* + The following strings are not substituted: + |TXT |HTML |Reason + +------+---------+---------- + -> ← Bug #454 + => ⇐ dito + <- → dito + <= ⇒ dito + (tm) ™ dito + 1/4 ¼ is triggered by 1/4 Part 1, 2/4 Part 2, ... + 3/4 ¾ dito + 1/2 ½ similar + */ + return false; +} + +/*************************************************************************** + Library-internal Interface +****************************************************************************/ + +NS_IMPL_ISUPPORTS(mozTXTToHTMLConv, mozITXTToHTMLConv, nsIStreamConverter, + nsIStreamListener, nsIRequestObserver) + +int32_t mozTXTToHTMLConv::CiteLevelTXT(const char16_t* line, + uint32_t& logLineStart) { + int32_t result = 0; + int32_t lineLength = NS_strlen(line); + + bool moreCites = true; + while (moreCites) { + /* E.g. the following lines count as quote: + + > text + //#ifdef QUOTE_RECOGNITION_AGGRESSIVE + >text + //#ifdef QUOTE_RECOGNITION_AGGRESSIVE + > text + ] text + USER> text + USER] text + //#endif + + logLineStart is the position of "t" in this example + */ + uint32_t i = logLineStart; + +#ifdef QUOTE_RECOGNITION_AGGRESSIVE + for (; int32_t(i) < lineLength && IsSpace(line[i]); i++) + ; + for (; int32_t(i) < lineLength && IsAsciiAlpha(line[i]) && + nsCRT::IsUpper(line[i]); + i++) + ; + if (int32_t(i) < lineLength && (line[i] == '>' || line[i] == ']')) +#else + if (int32_t(i) < lineLength && line[i] == '>') +#endif + { + i++; + if (int32_t(i) < lineLength && line[i] == ' ') i++; + // sendmail/mbox + // Placed here for performance increase + const char16_t* indexString = &line[logLineStart]; + // here, |logLineStart < lineLength| is always true + uint32_t minlength = std::min(uint32_t(6), NS_strlen(indexString)); + if (Substring(indexString, indexString + minlength) + .Equals(Substring(u">From "_ns, 0, minlength), + nsCaseInsensitiveStringComparator)) + // XXX RFC2646 + moreCites = false; + else { + result++; + logLineStart = i; + } + } else + moreCites = false; + } + + return result; +} + +NS_IMETHODIMP +mozTXTToHTMLConv::ScanTXT(const nsAString& aInString, uint32_t whattodo, + nsAString& aOutString) { + if (aInString.Length() == 0) { + aOutString.Truncate(); + return NS_OK; + } + + if (!aOutString.SetCapacity(uint32_t(aInString.Length() * growthRate), + mozilla::fallible)) { + return NS_ERROR_OUT_OF_MEMORY; + } + + bool doURLs = 0 != (whattodo & kURLs); + bool doGlyphSubstitution = 0 != (whattodo & kGlyphSubstitution); + bool doStructPhrase = 0 != (whattodo & kStructPhrase); + + uint32_t structPhrase_strong = 0; // Number of currently open tags + uint32_t structPhrase_underline = 0; + uint32_t structPhrase_italic = 0; + uint32_t structPhrase_code = 0; + + uint32_t endOfLastURLOutput = 0; + + nsAutoString outputHTML; // moved here for performance increase + + const char16_t* rawInputString = aInString.BeginReading(); + uint32_t inLength = aInString.Length(); + + for (mozilla::unicode::ClusterIterator ci(rawInputString, inLength); + !ci.AtEnd();) { + uint32_t i = ci - rawInputString; + if (doGlyphSubstitution) { + int32_t glyphTextLen; + if (GlyphHit(&rawInputString[i], inLength - i, i == 0, aOutString, + glyphTextLen)) { + i += glyphTextLen; + while (ci < rawInputString + i) { + ci.Next(); + } + continue; + } + } + + if (doStructPhrase) { + const char16_t* newOffset = rawInputString; + int32_t newLength = aInString.Length(); + if (i > 0) // skip the first element? + { + mozilla::unicode::ClusterReverseIterator ri(rawInputString, i); + ri.Next(); + newOffset = ri; + newLength = aInString.Length() - (ri - rawInputString); + } + + switch (aInString[i]) // Performance increase + { + case '*': + if (StructPhraseHit(newOffset, newLength, i == 0, u"*", 1, "b", + "class=\"moz-txt-star\"", aOutString, + structPhrase_strong)) { + ci.Next(); + continue; + } + break; + case '/': + if (StructPhraseHit(newOffset, newLength, i == 0, u"/", 1, "i", + "class=\"moz-txt-slash\"", aOutString, + structPhrase_italic)) { + ci.Next(); + continue; + } + break; + case '_': + if (StructPhraseHit(newOffset, newLength, i == 0, u"_", 1, + "span" /* <u> is deprecated */, + "class=\"moz-txt-underscore\"", aOutString, + structPhrase_underline)) { + ci.Next(); + continue; + } + break; + case '|': + if (StructPhraseHit(newOffset, newLength, i == 0, u"|", 1, "code", + "class=\"moz-txt-verticalline\"", aOutString, + structPhrase_code)) { + ci.Next(); + continue; + } + break; + } + } + + if (doURLs) { + switch (aInString[i]) { + case ':': + case '@': + case '.': + if ((i == 0 || ((i > 0) && aInString[i - 1] != ' ')) && + ((i == aInString.Length() - 1) || + (aInString[i + 1] != ' '))) // Performance increase + { + int32_t replaceBefore; + int32_t replaceAfter; + if (FindURL(rawInputString, aInString.Length(), i, whattodo, + outputHTML, replaceBefore, replaceAfter) && + structPhrase_strong + structPhrase_italic + + structPhrase_underline + structPhrase_code == + 0 + /* workaround for bug #19445 */) { + // Don't cut into previously inserted HTML (bug 1509493) + if (aOutString.Length() - replaceBefore < endOfLastURLOutput) { + break; + } + aOutString.Cut(aOutString.Length() - replaceBefore, + replaceBefore); + aOutString += outputHTML; + endOfLastURLOutput = aOutString.Length(); + i += replaceAfter + 1; + while (ci < rawInputString + i) { + ci.Next(); + } + continue; + } + } + break; + } // switch + } + + switch (aInString[i]) { + // Special symbols + case '<': + case '>': + case '&': + EscapeChar(aInString[i], aOutString, false); + ci.Next(); + break; + // Normal characters + default: { + const char16_t* start = ci; + ci.Next(); + aOutString += Substring(start, (const char16_t*)ci); + break; + } + } + } + return NS_OK; +} + +NS_IMETHODIMP +mozTXTToHTMLConv::ScanHTML(const nsAString& input, uint32_t whattodo, + nsAString& aOutString) { + const nsPromiseFlatString& aInString = PromiseFlatString(input); + if (!aOutString.SetCapacity(uint32_t(aInString.Length() * growthRate), + mozilla::fallible)) { + return NS_ERROR_OUT_OF_MEMORY; + } + + // some common variables we were recalculating + // every time inside the for loop... + int32_t lengthOfInString = aInString.Length(); + const char16_t* uniBuffer = aInString.get(); + +#ifdef DEBUG_BenB_Perf + PRTime parsing_start = PR_IntervalNow(); +#endif + + // Look for simple entities not included in a tags and scan them. + // Skip all tags ("<[...]>") and content in an a link tag ("<a [...]</a>"), + // comment tag ("<!--[...]-->"), style tag, script tag or head tag. + // Unescape the rest (text between tags) and pass it to ScanTXT. + nsAutoCString canFollow(" \f\n\r\t>"); + for (int32_t i = 0; i < lengthOfInString;) { + if (aInString[i] == '<') // html tag + { + int32_t start = i; + if (i + 2 < lengthOfInString && nsCRT::ToLower(aInString[i + 1]) == 'a' && + canFollow.FindChar(aInString[i + 2]) != kNotFound) + // if a tag, skip until </a>. + // Make sure there's a white-space character after, not to match "abbr". + { + i = aInString.Find("</a>", true, i); + if (i == kNotFound) + i = lengthOfInString; + else + i += 4; + } else if (Substring(aInString, i + 1, 3).LowerCaseEqualsASCII("!--")) + // if out-commended code, skip until --> + { + i = aInString.Find("-->", false, i); + if (i == kNotFound) + i = lengthOfInString; + else + i += 3; + } else if (i + 6 < lengthOfInString && + Substring(aInString, i + 1, 5).LowerCaseEqualsASCII("style") && + canFollow.FindChar(aInString[i + 6]) != kNotFound) + // if style tag, skip until </style> + { + i = aInString.Find("</style>", true, i); + if (i == kNotFound) + i = lengthOfInString; + else + i += 8; + } else if (i + 7 < lengthOfInString && + Substring(aInString, i + 1, 6) + .LowerCaseEqualsASCII("script") && + canFollow.FindChar(aInString[i + 7]) != kNotFound) + // if script tag, skip until </script> + { + i = aInString.Find("</script>", true, i); + if (i == kNotFound) + i = lengthOfInString; + else + i += 9; + } else if (i + 5 < lengthOfInString && + Substring(aInString, i + 1, 4).LowerCaseEqualsASCII("head") && + canFollow.FindChar(aInString[i + 5]) != kNotFound) + // if head tag, skip until </head> + // Make sure not to match <header>. + { + i = aInString.Find("</head>", true, i); + if (i == kNotFound) + i = lengthOfInString; + else + i += 7; + } else // just skip tag (attributes etc.) + { + i = aInString.FindChar('>', i); + if (i == kNotFound) + i = lengthOfInString; + else + i++; + } + aOutString.Append(&uniBuffer[start], i - start); + } else { + uint32_t start = uint32_t(i); + i = aInString.FindChar('<', i); + if (i == kNotFound) i = lengthOfInString; + + nsString tempString; + tempString.SetCapacity(uint32_t((uint32_t(i) - start) * growthRate)); + UnescapeStr(uniBuffer, start, uint32_t(i) - start, tempString); + ScanTXT(tempString, whattodo, aOutString); + } + } + +#ifdef DEBUG_BenB_Perf + printf("ScanHTML time: %d ms\n", + PR_IntervalToMilliseconds(PR_IntervalNow() - parsing_start)); +#endif + return NS_OK; +} + +/**************************************************************************** + XPCOM Interface +*****************************************************************************/ + +NS_IMETHODIMP +mozTXTToHTMLConv::Convert(nsIInputStream* aFromStream, const char* aFromType, + const char* aToType, nsISupports* aCtxt, + nsIInputStream** _retval) { + return NS_ERROR_NOT_IMPLEMENTED; +} + +NS_IMETHODIMP +mozTXTToHTMLConv::AsyncConvertData(const char* aFromType, const char* aToType, + nsIStreamListener* aListener, + nsISupports* aCtxt) { + return NS_ERROR_NOT_IMPLEMENTED; +} + +NS_IMETHODIMP +mozTXTToHTMLConv::GetConvertedType(const nsACString& aFromType, + nsIChannel* aChannel, nsACString& aToType) { + return NS_ERROR_NOT_IMPLEMENTED; +} + +NS_IMETHODIMP +mozTXTToHTMLConv::OnDataAvailable(nsIRequest* request, nsIInputStream* inStr, + uint64_t sourceOffset, uint32_t count) { + return NS_ERROR_NOT_IMPLEMENTED; +} + +NS_IMETHODIMP +mozTXTToHTMLConv::OnStartRequest(nsIRequest* request) { + return NS_ERROR_NOT_IMPLEMENTED; +} + +NS_IMETHODIMP +mozTXTToHTMLConv::OnStopRequest(nsIRequest* request, nsresult aStatus) { + return NS_ERROR_NOT_IMPLEMENTED; +} + +NS_IMETHODIMP +mozTXTToHTMLConv::CiteLevelTXT(const char16_t* line, uint32_t* logLineStart, + uint32_t* _retval) { + if (!logLineStart || !_retval || !line) return NS_ERROR_NULL_POINTER; + *_retval = CiteLevelTXT(line, *logLineStart); + return NS_OK; +} + +nsresult MOZ_NewTXTToHTMLConv(mozTXTToHTMLConv** aConv) { + MOZ_ASSERT(aConv != nullptr, "null ptr"); + if (!aConv) return NS_ERROR_NULL_POINTER; + + RefPtr<mozTXTToHTMLConv> conv = new mozTXTToHTMLConv(); + conv.forget(aConv); + // return (*aConv)->Init(); + return NS_OK; +} diff --git a/netwerk/streamconv/converters/mozTXTToHTMLConv.h b/netwerk/streamconv/converters/mozTXTToHTMLConv.h new file mode 100644 index 0000000000..23c3dac30f --- /dev/null +++ b/netwerk/streamconv/converters/mozTXTToHTMLConv.h @@ -0,0 +1,284 @@ +/* -*- Mode: C; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/** + Description: Currently only functions to enhance plain text with HTML tags. + See mozITXTToHTMLConv. Stream conversion is defunct. +*/ + +#ifndef _mozTXTToHTMLConv_h__ +#define _mozTXTToHTMLConv_h__ + +#include "mozITXTToHTMLConv.h" +#include "nsString.h" +#include "nsCOMPtr.h" + +class nsIIOService; + +class mozTXTToHTMLConv : public mozITXTToHTMLConv { + virtual ~mozTXTToHTMLConv() = default; + + ////////////////////////////////////////////////////////// + public: + ////////////////////////////////////////////////////////// + + mozTXTToHTMLConv() = default; + NS_DECL_ISUPPORTS + + NS_DECL_MOZITXTTOHTMLCONV + NS_DECL_NSIREQUESTOBSERVER + NS_DECL_NSISTREAMLISTENER + NS_DECL_NSISTREAMCONVERTER + + /** + see mozITXTToHTMLConv::CiteLevelTXT + */ + int32_t CiteLevelTXT(const char16_t* line, uint32_t& logLineStart); + + ////////////////////////////////////////////////////////// + protected: + ////////////////////////////////////////////////////////// + nsCOMPtr<nsIIOService> + mIOService; // for performance reasons, cache the netwerk service... + /** + Completes<ul> + <li>Case 1: mailto: "mozilla@bucksch.org" -> "mailto:mozilla@bucksch.org" + <li>Case 2: http: "www.mozilla.org" -> "http://www.mozilla.org" + <li>Case 3: ftp: "ftp.mozilla.org" -> "ftp://www.mozilla.org" + </ul> + It does no check, if the resulting URL is valid. + @param text (in): abbreviated URL + @param pos (in): position of "@" (case 1) or first "." (case 2 and 3) + @return Completed URL at success and empty string at failure + */ + void CompleteAbbreviatedURL(const char16_t* aInString, int32_t aInLength, + const uint32_t pos, nsString& aOutString); + + ////////////////////////////////////////////////////////// + private: + ////////////////////////////////////////////////////////// + + enum LIMTYPE { + LT_IGNORE, // limitation not checked + LT_DELIMITER, // not alphanumeric and not rep[0]. End of text is also ok. + LT_ALPHA, // alpha char + LT_DIGIT + }; + + /** + @param text (in): the string to search through.<p> + If before = IGNORE,<br> + rep is compared starting at 1. char of text (text[0]),<br> + else starting at 2. char of text (text[1]). + Chars after "after"-delimiter are ignored. + @param rep (in): the string to look for + @param aRepLen (in): the number of bytes in the string to look for + @param before (in): limitation before rep + @param after (in): limitation after rep + @return true, if rep is found and limitation spec is met or rep is empty + */ + bool ItMatchesDelimited(const char16_t* aInString, int32_t aInLength, + const char16_t* rep, int32_t aRepLen, LIMTYPE before, + LIMTYPE after); + + /** + @param see ItMatchesDelimited + @return Number of ItMatchesDelimited in text + */ + uint32_t NumberOfMatches(const char16_t* aInString, int32_t aInStringLength, + const char16_t* rep, int32_t aRepLen, LIMTYPE before, + LIMTYPE after); + + /** + Currently only changes "<", ">" and "&". All others stay as they are.<p> + "Char" in function name to avoid side effects with nsString(ch) + constructors. + @param ch (in) + @param aStringToAppendto (out) - the string to append the escaped + string to. + @param inAttribute (in) - will escape quotes, too (which is + only needed for attribute values) + */ + void EscapeChar(const char16_t ch, nsAString& aStringToAppendto, + bool inAttribute); + + /** + See EscapeChar. Escapes the string in place. + */ + void EscapeStr(nsString& aInString, bool inAttribute); + + /** + Currently only reverts "<", ">" and "&". All others stay as they are.<p> + @param aInString (in) HTML string + @param aStartPos (in) start index into the buffer + @param aLength (in) length of the buffer + @param aOutString (out) unescaped buffer + */ + void UnescapeStr(const char16_t* aInString, int32_t aStartPos, + int32_t aLength, nsString& aOutString); + + /** + <em>Note</em>: I use different strategies to pass context between the + functions (full text and pos vs. cutted text and col0, glphyTextLen vs. + replaceBefore/-After). It makes some sense, but is hard to understand + (maintain) :-(. + */ + + /** + <p><em>Note:</em> replaceBefore + replaceAfter + 1 (for char at pos) chars + in text should be replaced by outputHTML.</p> + <p><em>Note:</em> This function should be able to process a URL on multiple + lines, but currently, ScanForURLs is called for every line, so it can't.</p> + @param text (in): includes possibly a URL + @param pos (in): position in text, where either ":", "." or "@" are found + @param whathasbeendone (in): What the calling ScanTXT did/has to do with the + (not-linkified) text, i.e. usually the "whattodo" parameter. + (Needed to calculate replaceBefore.) NOT what will be done with + the content of the link. + @param outputHTML (out): URL with HTML-a tag + @param replaceBefore (out): Number of chars of URL before pos + @param replaceAfter (out): Number of chars of URL after pos + @return URL found + */ + bool FindURL(const char16_t* aInString, int32_t aInLength, const uint32_t pos, + const uint32_t whathasbeendone, nsString& outputHTML, + int32_t& replaceBefore, int32_t& replaceAfter); + + enum modetype { + unknown, + RFC1738, /* Check, if RFC1738, APPENDIX compliant, + like "<URL:http://www.mozilla.org>". */ + RFC2396E, /* RFC2396, APPENDIX E allows anglebrackets (like + "<http://www.mozilla.org>") (without "URL:") or + quotation marks(like ""http://www.mozilla.org""). + Also allow email addresses without scheme, + e.g. "<mozilla@bucksch.org>" */ + freetext, /* assume heading scheme + with "[a-zA-Z][a-zA-Z0-9+\-\.]*:" like "news:" + (see RFC2396, Section 3.1). + Certain characters (see code) or any whitespace + (including linebreaks) end the URL. + Other certain (punctation) characters (see code) + at the end are stripped off. */ + abbreviated /* Similar to freetext, but without scheme, e.g. + "www.mozilla.org", "ftp.mozilla.org" and + "mozilla@bucksch.org". */ + /* RFC1738 and RFC2396E type URLs may use multiple lines, + whitespace is stripped. Special characters like ")" stay intact.*/ + }; + + /** + * @param text (in), pos (in): see FindURL + * @param check (in): Start must be conform with this mode + * @param start (out): Position in text, where URL (including brackets or + * similar) starts + * @return |check|-conform start has been found + */ + bool FindURLStart(const char16_t* aInString, int32_t aInLength, + const uint32_t pos, const modetype check, uint32_t& start); + + /** + * @param text (in), pos (in): see FindURL + * @param check (in): End must be conform with this mode + * @param start (in): see FindURLStart + * @param end (out): Similar to |start| param of FindURLStart + * @return |check|-conform end has been found + */ + bool FindURLEnd(const char16_t* aInString, int32_t aInStringLength, + const uint32_t pos, const modetype check, + const uint32_t start, uint32_t& end); + + /** + * @param text (in), pos (in), whathasbeendone (in): see FindURL + * @param check (in): Current mode + * @param start (in), end (in): see FindURLEnd + * @param txtURL (out): Guessed (raw) URL. + * Without whitespace, but not completed. + * @param desc (out): Link as shown to the user, but already escaped. + * Should be placed between the <a> and </a> tags. + * @param replaceBefore(out), replaceAfter (out): see FindURL + */ + void CalculateURLBoundaries(const char16_t* aInString, + int32_t aInStringLength, const uint32_t pos, + const uint32_t whathasbeendone, + const modetype check, const uint32_t start, + const uint32_t end, nsString& txtURL, + nsString& desc, int32_t& replaceBefore, + int32_t& replaceAfter); + + /** + * @param txtURL (in), desc (in): see CalculateURLBoundaries + * @param outputHTML (out): see FindURL + * @return A valid URL could be found (and creation of HTML successful) + */ + bool CheckURLAndCreateHTML(const nsString& txtURL, const nsString& desc, + const modetype mode, nsString& outputHTML); + + /** + @param text (in): line of text possibly with tagTXT.<p> + if col0 is true, + starting with tagTXT<br> + else + starting one char before tagTXT + @param col0 (in): tagTXT is on the beginning of the line (or paragraph). + open must be 0 then. + @param tagTXT (in): Tag in plaintext to search for, e.g. "*" + @param aTagTxtLen (in): length of tagTXT. + @param tagHTML (in): HTML-Tag to replace tagTXT with, + without "<" and ">", e.g. "strong" + @param attributeHTML (in): HTML-attribute to add to opening tagHTML, + e.g. "class=txt_star" + @param aOutString: string to APPEND the converted html into + @param open (in/out): Number of currently open tags of type tagHTML + @return Conversion succeeded + */ + bool StructPhraseHit(const char16_t* aInString, int32_t aInStringLength, + bool col0, const char16_t* tagTXT, int32_t aTagTxtLen, + const char* tagHTML, const char* attributeHTML, + nsAString& aOutputString, uint32_t& openTags); + + /** + @param text (in), col0 (in): see GlyphHit + @param tagTXT (in): Smily, see also StructPhraseHit + @param imageName (in): the basename of the file that contains the image for + this smilie + @param outputHTML (out): new string containing the html for the smily + @param glyphTextLen (out): see GlyphHit + */ + bool SmilyHit(const char16_t* aInString, int32_t aLength, bool col0, + const char* tagTXT, const char* imageName, nsString& outputHTML, + int32_t& glyphTextLen); + + /** + Checks, if we can replace some chars at the start of line with prettier HTML + code.<p> + If success is reported, replace the first glyphTextLen chars with outputHTML + + @param text (in): line of text possibly with Glyph.<p> + If col0 is true, + starting with Glyph <br><!-- (br not part of text) --> + else + starting one char before Glyph + @param col0 (in): text starts at the beginning of the line (or paragraph) + @param aOutString (out): APPENDS html for the glyph to this string + @param glyphTextLen (out): Length of original text to replace + @return see StructPhraseHit + */ + bool GlyphHit(const char16_t* aInString, int32_t aInLength, bool col0, + nsAString& aOutString, int32_t& glyphTextLen); + + /** + Check if a given url should be linkified. + @param aURL (in): url to be checked on. + */ + bool ShouldLinkify(const nsCString& aURL); +}; + +// It's said, that Win32 and Mac don't like static const members +const int32_t mozTXTToHTMLConv_lastMode = 4; +// Needed (only) by mozTXTToHTMLConv::FindURL +const int32_t mozTXTToHTMLConv_numberOfModes = 4; // dito; unknown not counted + +#endif diff --git a/netwerk/streamconv/converters/nsDirIndex.cpp b/netwerk/streamconv/converters/nsDirIndex.cpp new file mode 100644 index 0000000000..c6e89fd7c5 --- /dev/null +++ b/netwerk/streamconv/converters/nsDirIndex.cpp @@ -0,0 +1,89 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsDirIndex.h" + +NS_IMPL_ISUPPORTS(nsDirIndex, nsIDirIndex) + +nsDirIndex::nsDirIndex() + : mType(TYPE_UNKNOWN), mSize(UINT64_MAX), mLastModified(-1LL) {} + +NS_IMETHODIMP +nsDirIndex::GetType(uint32_t* aType) { + NS_ENSURE_ARG_POINTER(aType); + + *aType = mType; + return NS_OK; +} + +NS_IMETHODIMP +nsDirIndex::SetType(uint32_t aType) { + mType = aType; + return NS_OK; +} + +NS_IMETHODIMP +nsDirIndex::GetContentType(nsACString& aContentType) { + aContentType = mContentType; + return NS_OK; +} + +NS_IMETHODIMP +nsDirIndex::SetContentType(const nsACString& aContentType) { + mContentType = aContentType; + return NS_OK; +} + +NS_IMETHODIMP +nsDirIndex::GetLocation(nsACString& aLocation) { + aLocation = mLocation; + return NS_OK; +} + +NS_IMETHODIMP +nsDirIndex::SetLocation(const nsACString& aLocation) { + mLocation = aLocation; + return NS_OK; +} + +NS_IMETHODIMP +nsDirIndex::GetDescription(nsAString& aDescription) { + aDescription = mDescription; + return NS_OK; +} + +NS_IMETHODIMP +nsDirIndex::SetDescription(const nsAString& aDescription) { + mDescription = aDescription; + return NS_OK; +} + +NS_IMETHODIMP +nsDirIndex::GetSize(int64_t* aSize) { + NS_ENSURE_ARG_POINTER(aSize); + + *aSize = mSize; + return NS_OK; +} + +NS_IMETHODIMP +nsDirIndex::SetSize(int64_t aSize) { + mSize = aSize; + return NS_OK; +} + +NS_IMETHODIMP +nsDirIndex::GetLastModified(PRTime* aLastModified) { + NS_ENSURE_ARG_POINTER(aLastModified); + + *aLastModified = mLastModified; + return NS_OK; +} + +NS_IMETHODIMP +nsDirIndex::SetLastModified(PRTime aLastModified) { + mLastModified = aLastModified; + return NS_OK; +} diff --git a/netwerk/streamconv/converters/nsDirIndex.h b/netwerk/streamconv/converters/nsDirIndex.h new file mode 100644 index 0000000000..2ff411de54 --- /dev/null +++ b/netwerk/streamconv/converters/nsDirIndex.h @@ -0,0 +1,32 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef nsDirIndex_h__ +#define nsDirIndex_h__ + +#include "nsIDirIndex.h" +#include "nsString.h" +#include "mozilla/Attributes.h" + +class nsDirIndex final : public nsIDirIndex { + private: + ~nsDirIndex() = default; + + public: + nsDirIndex(); + + NS_DECL_ISUPPORTS + NS_DECL_NSIDIRINDEX + + protected: + uint32_t mType; + nsCString mContentType; + nsCString mLocation; + nsString mDescription; + int64_t mSize; + PRTime mLastModified; +}; + +#endif diff --git a/netwerk/streamconv/converters/nsDirIndexParser.cpp b/netwerk/streamconv/converters/nsDirIndexParser.cpp new file mode 100644 index 0000000000..ed53cc45fb --- /dev/null +++ b/netwerk/streamconv/converters/nsDirIndexParser.cpp @@ -0,0 +1,444 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* This parsing code originally lived in xpfe/components/directory/ - bbaetz */ + +#include "nsDirIndexParser.h" + +#include "mozilla/ArrayUtils.h" +#include "mozilla/Encoding.h" +#include "prprf.h" +#include "nsCRT.h" +#include "nsDirIndex.h" +#include "nsEscape.h" +#include "nsIDirIndex.h" +#include "nsIInputStream.h" +#include "nsITextToSubURI.h" +#include "nsServiceManagerUtils.h" +#include "mozilla/intl/LocaleService.h" + +using namespace mozilla; + +struct EncodingProp { + const char* const mKey; + NotNull<const Encoding*> mValue; +}; + +static const EncodingProp localesFallbacks[] = { + {"ar", WINDOWS_1256_ENCODING}, {"ba", WINDOWS_1251_ENCODING}, + {"be", WINDOWS_1251_ENCODING}, {"bg", WINDOWS_1251_ENCODING}, + {"cs", WINDOWS_1250_ENCODING}, {"el", ISO_8859_7_ENCODING}, + {"et", WINDOWS_1257_ENCODING}, {"fa", WINDOWS_1256_ENCODING}, + {"he", WINDOWS_1255_ENCODING}, {"hr", WINDOWS_1250_ENCODING}, + {"hu", ISO_8859_2_ENCODING}, {"ja", SHIFT_JIS_ENCODING}, + {"kk", WINDOWS_1251_ENCODING}, {"ko", EUC_KR_ENCODING}, + {"ku", WINDOWS_1254_ENCODING}, {"ky", WINDOWS_1251_ENCODING}, + {"lt", WINDOWS_1257_ENCODING}, {"lv", WINDOWS_1257_ENCODING}, + {"mk", WINDOWS_1251_ENCODING}, {"pl", ISO_8859_2_ENCODING}, + {"ru", WINDOWS_1251_ENCODING}, {"sah", WINDOWS_1251_ENCODING}, + {"sk", WINDOWS_1250_ENCODING}, {"sl", ISO_8859_2_ENCODING}, + {"sr", WINDOWS_1251_ENCODING}, {"tg", WINDOWS_1251_ENCODING}, + {"th", WINDOWS_874_ENCODING}, {"tr", WINDOWS_1254_ENCODING}, + {"tt", WINDOWS_1251_ENCODING}, {"uk", WINDOWS_1251_ENCODING}, + {"vi", WINDOWS_1258_ENCODING}, {"zh", GBK_ENCODING}}; + +static NotNull<const Encoding*> +GetFTPFallbackEncodingDoNotAddNewCallersToThisFunction() { + nsAutoCString locale; + mozilla::intl::LocaleService::GetInstance()->GetAppLocaleAsBCP47(locale); + + // Let's lower case the string just in case unofficial language packs + // don't stick to conventions. + ToLowerCase(locale); // ASCII lowercasing with CString input! + + // Special case Traditional Chinese before throwing away stuff after the + // language itself. Today we only ship zh-TW, but be defensive about + // possible future values. + if (locale.EqualsLiteral("zh-tw") || locale.EqualsLiteral("zh-hk") || + locale.EqualsLiteral("zh-mo") || locale.EqualsLiteral("zh-hant")) { + return BIG5_ENCODING; + } + + // Throw away regions and other variants to accommodate weird stuff seen + // in telemetry--apparently unofficial language packs. + int32_t hyphenIndex = locale.FindChar('-'); + if (hyphenIndex >= 0) { + locale.Truncate(hyphenIndex); + } + + size_t index; + if (BinarySearchIf( + localesFallbacks, 0, ArrayLength(localesFallbacks), + [&locale](const EncodingProp& aProperty) { + return locale.Compare(aProperty.mKey); + }, + &index)) { + return localesFallbacks[index].mValue; + } + return WINDOWS_1252_ENCODING; +} + +NS_IMPL_ISUPPORTS(nsDirIndexParser, nsIRequestObserver, nsIStreamListener, + nsIDirIndexParser) + +nsDirIndexParser::nsDirIndexParser() : mLineStart(0), mHasDescription(false) {} + +nsresult nsDirIndexParser::Init() { + mLineStart = 0; + mHasDescription = false; + mFormat[0] = -1; + auto encoding = GetFTPFallbackEncodingDoNotAddNewCallersToThisFunction(); + encoding->Name(mEncoding); + + nsresult rv; + // XXX not threadsafe + if (gRefCntParser++ == 0) + rv = CallGetService(NS_ITEXTTOSUBURI_CONTRACTID, &gTextToSubURI); + else + rv = NS_OK; + + return rv; +} + +nsDirIndexParser::~nsDirIndexParser() { + // XXX not threadsafe + if (--gRefCntParser == 0) { + NS_IF_RELEASE(gTextToSubURI); + } +} + +NS_IMETHODIMP +nsDirIndexParser::SetListener(nsIDirIndexListener* aListener) { + mListener = aListener; + return NS_OK; +} + +NS_IMETHODIMP +nsDirIndexParser::GetListener(nsIDirIndexListener** aListener) { + NS_IF_ADDREF(*aListener = mListener.get()); + return NS_OK; +} + +NS_IMETHODIMP +nsDirIndexParser::GetComment(char** aComment) { + *aComment = ToNewCString(mComment, mozilla::fallible); + + if (!*aComment) return NS_ERROR_OUT_OF_MEMORY; + + return NS_OK; +} + +NS_IMETHODIMP +nsDirIndexParser::SetEncoding(const char* aEncoding) { + mEncoding.Assign(aEncoding); + return NS_OK; +} + +NS_IMETHODIMP +nsDirIndexParser::GetEncoding(char** aEncoding) { + *aEncoding = ToNewCString(mEncoding, mozilla::fallible); + + if (!*aEncoding) return NS_ERROR_OUT_OF_MEMORY; + + return NS_OK; +} + +NS_IMETHODIMP +nsDirIndexParser::OnStartRequest(nsIRequest* aRequest) { return NS_OK; } + +NS_IMETHODIMP +nsDirIndexParser::OnStopRequest(nsIRequest* aRequest, nsresult aStatusCode) { + // Finish up + if (mBuf.Length() > (uint32_t)mLineStart) { + ProcessData(aRequest, nullptr); + } + + return NS_OK; +} + +nsDirIndexParser::Field nsDirIndexParser::gFieldTable[] = { + {"Filename", FIELD_FILENAME}, + {"Description", FIELD_DESCRIPTION}, + {"Content-Length", FIELD_CONTENTLENGTH}, + {"Last-Modified", FIELD_LASTMODIFIED}, + {"Content-Type", FIELD_CONTENTTYPE}, + {"File-Type", FIELD_FILETYPE}, + {nullptr, FIELD_UNKNOWN}}; + +nsrefcnt nsDirIndexParser::gRefCntParser = 0; +nsITextToSubURI* nsDirIndexParser::gTextToSubURI; + +void nsDirIndexParser::ParseFormat(const char* aFormatStr) { + // Parse a "200" format line, and remember the fields and their + // ordering in mFormat. Multiple 200 lines stomp on each other. + unsigned int formatNum = 0; + mFormat[0] = -1; + + do { + while (*aFormatStr && nsCRT::IsAsciiSpace(char16_t(*aFormatStr))) + ++aFormatStr; + + if (!*aFormatStr) break; + + nsAutoCString name; + int32_t len = 0; + while (aFormatStr[len] && !nsCRT::IsAsciiSpace(char16_t(aFormatStr[len]))) + ++len; + name.Append(aFormatStr, len); + aFormatStr += len; + + // Okay, we're gonna monkey with the nsStr. Bold! + name.SetLength(nsUnescapeCount(name.BeginWriting())); + + // All tokens are case-insensitive - + // http://www.mozilla.org/projects/netlib/dirindexformat.html + if (name.LowerCaseEqualsLiteral("description")) mHasDescription = true; + + for (Field* i = gFieldTable; i->mName; ++i) { + if (name.EqualsIgnoreCase(i->mName)) { + mFormat[formatNum] = i->mType; + mFormat[++formatNum] = -1; + break; + } + } + + } while (*aFormatStr && (formatNum < (ArrayLength(mFormat) - 1))); +} + +void nsDirIndexParser::ParseData(nsIDirIndex* aIdx, char* aDataStr, + int32_t aLineLen) { + // Parse a "201" data line, using the field ordering specified in + // mFormat. + + if (mFormat[0] == -1) { + // Ignore if we haven't seen a format yet. + return; + } + + nsAutoCString filename; + int32_t lineLen = aLineLen; + + for (int32_t i = 0; mFormat[i] != -1; ++i) { + // If we've exhausted the data before we run out of fields, just bail. + if (!*aDataStr || (lineLen < 1)) { + return; + } + + while ((lineLen > 0) && nsCRT::IsAsciiSpace(*aDataStr)) { + ++aDataStr; + --lineLen; + } + + if (lineLen < 1) { + // invalid format, bail + return; + } + + char* value = aDataStr; + if (*aDataStr == '"' || *aDataStr == '\'') { + // it's a quoted string. snarf everything up to the next quote character + const char quotechar = *(aDataStr++); + lineLen--; + ++value; + while ((lineLen > 0) && *aDataStr != quotechar) { + ++aDataStr; + --lineLen; + } + if (lineLen > 0) { + *aDataStr++ = '\0'; + --lineLen; + } + + if (!lineLen) { + // invalid format, bail + return; + } + } else { + // it's unquoted. snarf until we see whitespace. + value = aDataStr; + while ((lineLen > 0) && (!nsCRT::IsAsciiSpace(*aDataStr))) { + ++aDataStr; + --lineLen; + } + if (lineLen > 0) { + *aDataStr++ = '\0'; + --lineLen; + } + // even if we ran out of line length here, there's still a trailing zero + // byte afterwards + } + + fieldType t = fieldType(mFormat[i]); + switch (t) { + case FIELD_FILENAME: { + // don't unescape at this point, so that UnEscapeAndConvert() can + filename = value; + + bool success = false; + + nsAutoString entryuri; + + if (gTextToSubURI) { + nsAutoString result; + if (NS_SUCCEEDED(gTextToSubURI->UnEscapeAndConvert( + mEncoding, filename, result))) { + if (!result.IsEmpty()) { + aIdx->SetLocation(filename); + if (!mHasDescription) aIdx->SetDescription(result); + success = true; + } + } else { + NS_WARNING("UnEscapeAndConvert error"); + } + } + + if (!success) { + // if unsuccessfully at charset conversion, then + // just fallback to unescape'ing in-place + // XXX - this shouldn't be using UTF8, should it? + // when can we fail to get the service, anyway? - bbaetz + aIdx->SetLocation(filename); + if (!mHasDescription) { + aIdx->SetDescription(NS_ConvertUTF8toUTF16(value)); + } + } + } break; + case FIELD_DESCRIPTION: + nsUnescape(value); + aIdx->SetDescription(NS_ConvertUTF8toUTF16(value)); + break; + case FIELD_CONTENTLENGTH: { + int64_t len; + int32_t status = PR_sscanf(value, "%lld", &len); + if (status == 1) + aIdx->SetSize(len); + else + aIdx->SetSize(UINT64_MAX); // UINT64_MAX means unknown + } break; + case FIELD_LASTMODIFIED: { + PRTime tm; + nsUnescape(value); + if (PR_ParseTimeString(value, false, &tm) == PR_SUCCESS) { + aIdx->SetLastModified(tm); + } + } break; + case FIELD_CONTENTTYPE: + aIdx->SetContentType(nsDependentCString(value)); + break; + case FIELD_FILETYPE: + // unescape in-place + nsUnescape(value); + if (!nsCRT::strcasecmp(value, "directory")) { + aIdx->SetType(nsIDirIndex::TYPE_DIRECTORY); + } else if (!nsCRT::strcasecmp(value, "file")) { + aIdx->SetType(nsIDirIndex::TYPE_FILE); + } else if (!nsCRT::strcasecmp(value, "symbolic-link")) { + aIdx->SetType(nsIDirIndex::TYPE_SYMLINK); + } else { + aIdx->SetType(nsIDirIndex::TYPE_UNKNOWN); + } + break; + case FIELD_UNKNOWN: + // ignore + break; + } + } +} + +NS_IMETHODIMP +nsDirIndexParser::OnDataAvailable(nsIRequest* aRequest, nsIInputStream* aStream, + uint64_t aSourceOffset, uint32_t aCount) { + if (aCount < 1) return NS_OK; + + int32_t len = mBuf.Length(); + + // Ensure that our mBuf has capacity to hold the data we're about to + // read. + if (!mBuf.SetLength(len + aCount, fallible)) return NS_ERROR_OUT_OF_MEMORY; + + // Now read the data into our buffer. + nsresult rv; + uint32_t count; + rv = aStream->Read(mBuf.BeginWriting() + len, aCount, &count); + if (NS_FAILED(rv)) return rv; + + // Set the string's length according to the amount of data we've read. + // Note: we know this to work on nsCString. This isn't guaranteed to + // work on other strings. + mBuf.SetLength(len + count); + + return ProcessData(aRequest, nullptr); +} + +nsresult nsDirIndexParser::ProcessData(nsIRequest* aRequest, + nsISupports* aCtxt) { + if (!mListener) return NS_ERROR_FAILURE; + + int32_t numItems = 0; + + while (true) { + ++numItems; + + int32_t eol = mBuf.FindCharInSet("\n\r", mLineStart); + if (eol < 0) break; + mBuf.SetCharAt(char16_t('\0'), eol); + + const char* line = mBuf.get() + mLineStart; + + int32_t lineLen = eol - mLineStart; + mLineStart = eol + 1; + + if (lineLen >= 4) { + const char* buf = line; + + if (buf[0] == '1') { + if (buf[1] == '0') { + if (buf[2] == '0' && buf[3] == ':') { + // 100. Human-readable comment line. Ignore + } else if (buf[2] == '1' && buf[3] == ':') { + // 101. Human-readable information line. + mComment.Append(buf + 4); + + char* value = ((char*)buf) + 4; + nsUnescape(value); + mListener->OnInformationAvailable(aRequest, aCtxt, + NS_ConvertUTF8toUTF16(value)); + + } else if (buf[2] == '2' && buf[3] == ':') { + // 102. Human-readable information line, HTML. + mComment.Append(buf + 4); + } + } + } else if (buf[0] == '2') { + if (buf[1] == '0') { + if (buf[2] == '0' && buf[3] == ':') { + // 200. Define field names + ParseFormat(buf + 4); + } else if (buf[2] == '1' && buf[3] == ':') { + // 201. Field data + nsCOMPtr<nsIDirIndex> idx = new nsDirIndex(); + + ParseData(idx, ((char*)buf) + 4, lineLen - 4); + mListener->OnIndexAvailable(aRequest, aCtxt, idx); + } + } + } else if (buf[0] == '3') { + if (buf[1] == '0') { + if (buf[2] == '0' && buf[3] == ':') { + // 300. Self-referring URL + } else if (buf[2] == '1' && buf[3] == ':') { + // 301. OUR EXTENSION - encoding + int i = 4; + while (buf[i] && nsCRT::IsAsciiSpace(buf[i])) ++i; + + if (buf[i]) SetEncoding(buf + i); + } + } + } + } + } + + return NS_OK; +} diff --git a/netwerk/streamconv/converters/nsDirIndexParser.h b/netwerk/streamconv/converters/nsDirIndexParser.h new file mode 100644 index 0000000000..c2dfe3c160 --- /dev/null +++ b/netwerk/streamconv/converters/nsDirIndexParser.h @@ -0,0 +1,75 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef __NSDIRINDEX_H_ +#define __NSDIRINDEX_H_ + +#include "nsString.h" +#include "nsCOMPtr.h" +#include "nsIDirIndexListener.h" +#include "mozilla/RefPtr.h" + +class nsIDirIndex; +class nsITextToSubURI; + +/* CID: {a0d6ad32-1dd1-11b2-aa55-a40187b54036} */ + +class nsDirIndexParser : public nsIDirIndexParser { + private: + virtual ~nsDirIndexParser(); + + nsDirIndexParser(); + nsresult Init(); + + public: + NS_DECL_ISUPPORTS + NS_DECL_NSISTREAMLISTENER + NS_DECL_NSIREQUESTOBSERVER + NS_DECL_NSIDIRINDEXPARSER + + static already_AddRefed<nsIDirIndexParser> CreateInstance() { + RefPtr<nsDirIndexParser> parser = new nsDirIndexParser(); + if (NS_FAILED(parser->Init())) { + return nullptr; + } + return parser.forget(); + } + + enum fieldType { + FIELD_UNKNOWN = 0, // MUST be 0 + FIELD_FILENAME, + FIELD_DESCRIPTION, + FIELD_CONTENTLENGTH, + FIELD_LASTMODIFIED, + FIELD_CONTENTTYPE, + FIELD_FILETYPE + }; + + protected: + nsCOMPtr<nsIDirIndexListener> mListener; + + nsCString mEncoding; + nsCString mComment; + nsCString mBuf; + int32_t mLineStart; + bool mHasDescription; + int mFormat[8]; + + nsresult ProcessData(nsIRequest* aRequest, nsISupports* aCtxt); + void ParseFormat(const char* buf); + void ParseData(nsIDirIndex* aIdx, char* aDataStr, int32_t lineLen); + + struct Field { + const char* mName; + fieldType mType; + }; + + static Field gFieldTable[]; + + static nsrefcnt gRefCntParser; + static nsITextToSubURI* gTextToSubURI; +}; + +#endif diff --git a/netwerk/streamconv/converters/nsFTPDirListingConv.cpp b/netwerk/streamconv/converters/nsFTPDirListingConv.cpp new file mode 100644 index 0000000000..d155481f55 --- /dev/null +++ b/netwerk/streamconv/converters/nsFTPDirListingConv.cpp @@ -0,0 +1,342 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsFTPDirListingConv.h" +#include "nsMemory.h" +#include "plstr.h" +#include "mozilla/Logging.h" +#include "nsCOMPtr.h" +#include "nsEscape.h" +#include "nsStringStream.h" +#include "nsIStreamListener.h" +#include "nsCRT.h" +#include "nsIChannel.h" +#include "nsIURI.h" +#include "nsIURIMutator.h" + +#include "ParseFTPList.h" +#include <algorithm> + +#include "mozilla/UniquePtrExtensions.h" +#include "mozilla/Unused.h" + +// +// Log module for FTP dir listing stream converter logging... +// +// To enable logging (see prlog.h for full details): +// +// set MOZ_LOG=nsFTPDirListConv:5 +// set MOZ_LOG_FILE=network.log +// +// This enables LogLevel::Debug level information and places all output in +// the file network.log. +// +static mozilla::LazyLogModule gFTPDirListConvLog("nsFTPDirListingConv"); +using namespace mozilla; + +// nsISupports implementation +NS_IMPL_ISUPPORTS(nsFTPDirListingConv, nsIStreamConverter, nsIStreamListener, + nsIRequestObserver) + +// nsIStreamConverter implementation +NS_IMETHODIMP +nsFTPDirListingConv::Convert(nsIInputStream* aFromStream, const char* aFromType, + const char* aToType, nsISupports* aCtxt, + nsIInputStream** _retval) { + return NS_ERROR_NOT_IMPLEMENTED; +} + +// Stream converter service calls this to initialize the actual stream converter +// (us). +NS_IMETHODIMP +nsFTPDirListingConv::AsyncConvertData(const char* aFromType, + const char* aToType, + nsIStreamListener* aListener, + nsISupports* aCtxt) { + NS_ASSERTION(aListener && aFromType && aToType, + "null pointer passed into FTP dir listing converter"); + + // hook up our final listener. this guy gets the various On*() calls we want + // to throw at him. + mFinalListener = aListener; + NS_ADDREF(mFinalListener); + + MOZ_LOG(gFTPDirListConvLog, LogLevel::Debug, + ("nsFTPDirListingConv::AsyncConvertData() converting FROM raw, TO " + "application/http-index-format\n")); + + return NS_OK; +} + +NS_IMETHODIMP +nsFTPDirListingConv::GetConvertedType(const nsACString& aFromType, + nsIChannel* aChannel, + nsACString& aToType) { + return NS_ERROR_NOT_IMPLEMENTED; +} + +// nsIStreamListener implementation +NS_IMETHODIMP +nsFTPDirListingConv::OnDataAvailable(nsIRequest* request, nsIInputStream* inStr, + uint64_t sourceOffset, uint32_t count) { + NS_ASSERTION(request, "FTP dir listing stream converter needs a request"); + + nsresult rv; + + nsCOMPtr<nsIChannel> channel = do_QueryInterface(request, &rv); + NS_ENSURE_SUCCESS(rv, rv); + + uint32_t read, streamLen; + + uint64_t streamLen64; + rv = inStr->Available(&streamLen64); + NS_ENSURE_SUCCESS(rv, rv); + streamLen = (uint32_t)std::min(streamLen64, uint64_t(UINT32_MAX - 1)); + + auto buffer = MakeUniqueFallible<char[]>(streamLen + 1); + NS_ENSURE_TRUE(buffer, NS_ERROR_OUT_OF_MEMORY); + + rv = inStr->Read(buffer.get(), streamLen, &read); + NS_ENSURE_SUCCESS(rv, rv); + + // the dir listings are ascii text, null terminate this sucker. + buffer[streamLen] = '\0'; + + MOZ_LOG(gFTPDirListConvLog, LogLevel::Debug, + ("nsFTPDirListingConv::OnData(request = %p, inStr = %p, " + "sourceOffset = %" PRIu64 ", count = %u)\n", + request, inStr, sourceOffset, count)); + + if (!mBuffer.IsEmpty()) { + // we have data left over from a previous OnDataAvailable() call. + // combine the buffers so we don't lose any data. + mBuffer.Append(buffer.get()); + + buffer = MakeUniqueFallible<char[]>(mBuffer.Length() + 1); + NS_ENSURE_TRUE(buffer, NS_ERROR_OUT_OF_MEMORY); + + strncpy(buffer.get(), mBuffer.get(), mBuffer.Length() + 1); + mBuffer.Truncate(); + } + + MOZ_LOG(gFTPDirListConvLog, LogLevel::Debug, + ("::OnData() received the following %d bytes...\n\n%s\n\n", streamLen, + buffer.get())); + + nsAutoCString indexFormat; + if (!mSentHeading) { + // build up the 300: line + nsCOMPtr<nsIURI> uri; + rv = channel->GetURI(getter_AddRefs(uri)); + NS_ENSURE_SUCCESS(rv, rv); + + rv = GetHeaders(indexFormat, uri); + NS_ENSURE_SUCCESS(rv, rv); + + mSentHeading = true; + } + + char* line = buffer.get(); + line = DigestBufferLines(line, indexFormat); + + MOZ_LOG(gFTPDirListConvLog, LogLevel::Debug, + ("::OnData() sending the following %d bytes...\n\n%s\n\n", + indexFormat.Length(), indexFormat.get())); + + // if there's any data left over, buffer it. + if (line && *line) { + mBuffer.Append(line); + MOZ_LOG(gFTPDirListConvLog, LogLevel::Debug, + ("::OnData() buffering the following %zu bytes...\n\n%s\n\n", + strlen(line), line)); + } + + // send the converted data out. + nsCOMPtr<nsIInputStream> inputData; + + rv = NS_NewCStringInputStream(getter_AddRefs(inputData), indexFormat); + NS_ENSURE_SUCCESS(rv, rv); + + rv = mFinalListener->OnDataAvailable(request, inputData, 0, + indexFormat.Length()); + + return rv; +} + +// nsIRequestObserver implementation +NS_IMETHODIMP +nsFTPDirListingConv::OnStartRequest(nsIRequest* request) { + // we don't care about start. move along... but start masqeurading + // as the http-index channel now. + return mFinalListener->OnStartRequest(request); +} + +NS_IMETHODIMP +nsFTPDirListingConv::OnStopRequest(nsIRequest* request, nsresult aStatus) { + // we don't care about stop. move along... + + return mFinalListener->OnStopRequest(request, aStatus); +} + +// nsFTPDirListingConv methods +nsFTPDirListingConv::nsFTPDirListingConv() { + mFinalListener = nullptr; + mSentHeading = false; +} + +nsFTPDirListingConv::~nsFTPDirListingConv() { NS_IF_RELEASE(mFinalListener); } + +nsresult nsFTPDirListingConv::GetHeaders(nsACString& headers, nsIURI* uri) { + nsresult rv = NS_OK; + // build up 300 line + headers.AppendLiteral("300: "); + + // Bug 111117 - don't print the password + nsAutoCString pw; + nsAutoCString spec; + uri->GetPassword(pw); + if (!pw.IsEmpty()) { + nsCOMPtr<nsIURI> noPassURI; + rv = NS_MutateURI(uri).SetPassword(""_ns).Finalize(noPassURI); + if (NS_FAILED(rv)) return rv; + rv = noPassURI->GetAsciiSpec(spec); + if (NS_FAILED(rv)) return rv; + headers.Append(spec); + } else { + rv = uri->GetAsciiSpec(spec); + if (NS_FAILED(rv)) return rv; + + headers.Append(spec); + } + headers.Append(char(nsCRT::LF)); + // END 300: + + // build up the column heading; 200: + headers.AppendLiteral( + "200: filename content-length last-modified file-type\n"); + // END 200: + return rv; +} + +char* nsFTPDirListingConv::DigestBufferLines(char* aBuffer, + nsCString& aString) { + char* line = aBuffer; + char* eol; + bool cr = false; + + list_state state; + + // while we have new lines, parse 'em into application/http-index-format. + while (line && (eol = PL_strchr(line, nsCRT::LF))) { + // yank any carriage returns too. + if (eol > line && *(eol - 1) == nsCRT::CR) { + eol--; + *eol = '\0'; + cr = true; + } else { + *eol = '\0'; + cr = false; + } + + list_result result; + + int type = ParseFTPList(line, &state, &result); + + // if it is other than a directory, file, or link -OR- if it is a + // directory named . or .., skip over this line. + if ((type != 'd' && type != 'f' && type != 'l') || + (result.fe_type == 'd' && result.fe_fname[0] == '.' && + (result.fe_fnlen == 1 || + (result.fe_fnlen == 2 && result.fe_fname[1] == '.')))) { + if (cr) + line = eol + 2; + else + line = eol + 1; + + continue; + } + + // blast the index entry into the indexFormat buffer as a 201: line. + aString.AppendLiteral("201: "); + // FILENAME + + // parsers for styles 'U' and 'W' handle sequence " -> " themself + if (state.lstyle != 'U' && state.lstyle != 'W') { + const char* offset = strstr(result.fe_fname, " -> "); + if (offset) { + result.fe_fnlen = offset - result.fe_fname; + } + } + + nsAutoCString buf; + aString.Append('\"'); + aString.Append(NS_EscapeURL( + Substring(result.fe_fname, result.fe_fname + result.fe_fnlen), + esc_Minimal | esc_OnlyASCII | esc_Forced, buf)); + aString.AppendLiteral("\" "); + + // CONTENT LENGTH + + if (type != 'd') { + for (char& fe : result.fe_size) { + if (fe != '\0') aString.Append((const char*)&fe, 1); + } + + aString.Append(' '); + } else + aString.AppendLiteral("0 "); + + // MODIFIED DATE + char buffer[256] = ""; + + // ParseFTPList can return time structure with invalid values. + // PR_NormalizeTime will set all values into valid limits. + result.fe_time.tm_params.tp_gmt_offset = 0; + result.fe_time.tm_params.tp_dst_offset = 0; + PR_NormalizeTime(&result.fe_time, PR_GMTParameters); + + // Note: The below is the RFC822/1123 format, as required by + // the application/http-index-format specs + // viewers of such a format can then reformat this into the + // current locale (or anything else they choose) + PR_FormatTimeUSEnglish(buffer, sizeof(buffer), "%a, %d %b %Y %H:%M:%S GMT", + &result.fe_time); + + nsAutoCString escaped; + Unused << NS_WARN_IF( + !NS_Escape(nsDependentCString(buffer), escaped, url_Path)); + aString.Append(escaped); + aString.Append(' '); + + // ENTRY TYPE + if (type == 'd') + aString.AppendLiteral("DIRECTORY"); + else if (type == 'l') + aString.AppendLiteral("SYMBOLIC-LINK"); + else + aString.AppendLiteral("FILE"); + + aString.Append(' '); + + aString.Append(char(nsCRT::LF)); // complete this line + // END 201: + + if (cr) + line = eol + 2; + else + line = eol + 1; + } // end while(eol) + + return line; +} + +nsresult NS_NewFTPDirListingConv(nsFTPDirListingConv** aFTPDirListingConv) { + MOZ_ASSERT(aFTPDirListingConv != nullptr, "null ptr"); + if (!aFTPDirListingConv) return NS_ERROR_NULL_POINTER; + + RefPtr<nsFTPDirListingConv> conv = new nsFTPDirListingConv(); + conv.forget(aFTPDirListingConv); + return NS_OK; +} diff --git a/netwerk/streamconv/converters/nsFTPDirListingConv.h b/netwerk/streamconv/converters/nsFTPDirListingConv.h new file mode 100644 index 0000000000..57ca806d11 --- /dev/null +++ b/netwerk/streamconv/converters/nsFTPDirListingConv.h @@ -0,0 +1,52 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +#ifndef __nsftpdirlistingdconv__h__ +#define __nsftpdirlistingdconv__h__ + +#include "nsIStreamConverter.h" +#include "nsString.h" + +class nsIURI; + +#define NS_FTPDIRLISTINGCONVERTER_CID \ + { /* 14C0E880-623E-11d3-A178-0050041CAF44 */ \ + 0x14c0e880, 0x623e, 0x11d3, { \ + 0xa1, 0x78, 0x00, 0x50, 0x04, 0x1c, 0xaf, 0x44 \ + } \ + } + +class nsFTPDirListingConv : public nsIStreamConverter { + public: + // nsISupports methods + NS_DECL_ISUPPORTS + + // nsIStreamConverter methods + NS_DECL_NSISTREAMCONVERTER + + // nsIStreamListener methods + NS_DECL_NSISTREAMLISTENER + + // nsIRequestObserver methods + NS_DECL_NSIREQUESTOBSERVER + + // nsFTPDirListingConv methods + nsFTPDirListingConv(); + + private: + virtual ~nsFTPDirListingConv(); + + // Get the application/http-index-format headers + nsresult GetHeaders(nsACString& str, nsIURI* uri); + char* DigestBufferLines(char* aBuffer, nsCString& aString); + + // member data + nsCString mBuffer; // buffered data. + bool mSentHeading; // have we sent 100, 101, 200, and 300 lines yet? + + nsIStreamListener* mFinalListener; // this guy gets the converted data via + // his OnDataAvailable() +}; + +#endif /* __nsftpdirlistingdconv__h__ */ diff --git a/netwerk/streamconv/converters/nsHTTPCompressConv.cpp b/netwerk/streamconv/converters/nsHTTPCompressConv.cpp new file mode 100644 index 0000000000..1de4737866 --- /dev/null +++ b/netwerk/streamconv/converters/nsHTTPCompressConv.cpp @@ -0,0 +1,722 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set sw=2 ts=8 et tw=80 : */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsHTTPCompressConv.h" +#include "nsMemory.h" +#include "plstr.h" +#include "nsCOMPtr.h" +#include "nsError.h" +#include "nsStreamUtils.h" +#include "nsStringStream.h" +#include "nsComponentManagerUtils.h" +#include "nsThreadUtils.h" +#include "mozilla/Preferences.h" +#include "mozilla/Logging.h" +#include "nsIForcePendingChannel.h" +#include "nsIRequest.h" + +// brotli headers +#include "state.h" +#include "brotli/decode.h" + +namespace mozilla { +namespace net { + +extern LazyLogModule gHttpLog; +#define LOG(args) \ + MOZ_LOG(mozilla::net::gHttpLog, mozilla::LogLevel::Debug, args) + +// nsISupports implementation +NS_IMPL_ISUPPORTS(nsHTTPCompressConv, nsIStreamConverter, nsIStreamListener, + nsIRequestObserver, nsICompressConvStats, + nsIThreadRetargetableStreamListener) + +// nsFTPDirListingConv methods +nsHTTPCompressConv::nsHTTPCompressConv() + : mMode(HTTP_COMPRESS_IDENTITY), + mOutBuffer(nullptr), + mInpBuffer(nullptr), + mOutBufferLen(0), + mInpBufferLen(0), + mCheckHeaderDone(false), + mStreamEnded(false), + mStreamInitialized(false), + mDummyStreamInitialised(false), + d_stream{}, + mLen(0), + hMode(0), + mSkipCount(0), + mFlags(0), + mDecodedDataLength(0), + mMutex("nsHTTPCompressConv") { + LOG(("nsHttpCompresssConv %p ctor\n", this)); + if (NS_IsMainThread()) { + mFailUncleanStops = + Preferences::GetBool("network.http.enforce-framing.http", false); + } else { + mFailUncleanStops = false; + } +} + +nsHTTPCompressConv::~nsHTTPCompressConv() { + LOG(("nsHttpCompresssConv %p dtor\n", this)); + if (mInpBuffer) { + free(mInpBuffer); + } + + if (mOutBuffer) { + free(mOutBuffer); + } + + // For some reason we are not getting Z_STREAM_END. But this was also seen + // for mozilla bug 198133. Need to handle this case. + if (mStreamInitialized && !mStreamEnded) { + inflateEnd(&d_stream); + } +} + +NS_IMETHODIMP +nsHTTPCompressConv::GetDecodedDataLength(uint64_t* aDecodedDataLength) { + *aDecodedDataLength = mDecodedDataLength; + return NS_OK; +} + +NS_IMETHODIMP +nsHTTPCompressConv::AsyncConvertData(const char* aFromType, const char* aToType, + nsIStreamListener* aListener, + nsISupports* aCtxt) { + if (!PL_strncasecmp(aFromType, HTTP_COMPRESS_TYPE, + sizeof(HTTP_COMPRESS_TYPE) - 1) || + !PL_strncasecmp(aFromType, HTTP_X_COMPRESS_TYPE, + sizeof(HTTP_X_COMPRESS_TYPE) - 1)) { + mMode = HTTP_COMPRESS_COMPRESS; + } else if (!PL_strncasecmp(aFromType, HTTP_GZIP_TYPE, + sizeof(HTTP_GZIP_TYPE) - 1) || + !PL_strncasecmp(aFromType, HTTP_X_GZIP_TYPE, + sizeof(HTTP_X_GZIP_TYPE) - 1)) { + mMode = HTTP_COMPRESS_GZIP; + } else if (!PL_strncasecmp(aFromType, HTTP_DEFLATE_TYPE, + sizeof(HTTP_DEFLATE_TYPE) - 1)) { + mMode = HTTP_COMPRESS_DEFLATE; + } else if (!PL_strncasecmp(aFromType, HTTP_BROTLI_TYPE, + sizeof(HTTP_BROTLI_TYPE) - 1)) { + mMode = HTTP_COMPRESS_BROTLI; + } + LOG(("nsHttpCompresssConv %p AsyncConvertData %s %s mode %d\n", this, + aFromType, aToType, (CompressMode)mMode)); + + MutexAutoLock lock(mMutex); + // hook ourself up with the receiving listener. + mListener = aListener; + + return NS_OK; +} + +NS_IMETHODIMP +nsHTTPCompressConv::GetConvertedType(const nsACString& aFromType, + nsIChannel* aChannel, + nsACString& aToType) { + return NS_ERROR_NOT_IMPLEMENTED; +} + +NS_IMETHODIMP +nsHTTPCompressConv::OnStartRequest(nsIRequest* request) { + LOG(("nsHttpCompresssConv %p onstart\n", this)); + nsCOMPtr<nsIStreamListener> listener; + { + MutexAutoLock lock(mMutex); + listener = mListener; + } + return listener->OnStartRequest(request); +} + +NS_IMETHODIMP +nsHTTPCompressConv::OnStopRequest(nsIRequest* request, nsresult aStatus) { + nsresult status = aStatus; + LOG(("nsHttpCompresssConv %p onstop %" PRIx32 "\n", this, + static_cast<uint32_t>(aStatus))); + + // Framing integrity is enforced for content-encoding: gzip, but not for + // content-encoding: deflate. Note that gzip vs deflate is NOT determined + // by content sniffing but only via header. + if (!mStreamEnded && NS_SUCCEEDED(status) && + (mFailUncleanStops && (mMode == HTTP_COMPRESS_GZIP))) { + // This is not a clean end of gzip stream: the transfer is incomplete. + status = NS_ERROR_NET_PARTIAL_TRANSFER; + LOG(("nsHttpCompresssConv %p onstop partial gzip\n", this)); + } + if (NS_SUCCEEDED(status) && mMode == HTTP_COMPRESS_BROTLI) { + nsCOMPtr<nsIForcePendingChannel> fpChannel = do_QueryInterface(request); + bool isPending = false; + if (request) { + request->IsPending(&isPending); + } + if (fpChannel && !isPending) { + fpChannel->ForcePending(true); + } + if (mBrotli && (mBrotli->mTotalOut == 0) && + !mBrotli->mBrotliStateIsStreamEnd) { + status = NS_ERROR_INVALID_CONTENT_ENCODING; + } + LOG(("nsHttpCompresssConv %p onstop brotlihandler rv %" PRIx32 "\n", this, + static_cast<uint32_t>(status))); + if (fpChannel && !isPending) { + fpChannel->ForcePending(false); + } + } + + nsCOMPtr<nsIStreamListener> listener; + { + MutexAutoLock lock(mMutex); + listener = mListener; + } + return listener->OnStopRequest(request, status); +} + +/* static */ +nsresult nsHTTPCompressConv::BrotliHandler(nsIInputStream* stream, + void* closure, const char* dataIn, + uint32_t, uint32_t aAvail, + uint32_t* countRead) { + MOZ_ASSERT(stream); + nsHTTPCompressConv* self = static_cast<nsHTTPCompressConv*>(closure); + *countRead = 0; + + const size_t kOutSize = 128 * 1024; // just a chunk size, we call in a loop + uint8_t* outPtr; + size_t outSize; + size_t avail = aAvail; + BrotliDecoderResult res; + + if (!self->mBrotli) { + *countRead = aAvail; + return NS_OK; + } + + auto outBuffer = MakeUniqueFallible<uint8_t[]>(kOutSize); + if (outBuffer == nullptr) { + self->mBrotli->mStatus = NS_ERROR_OUT_OF_MEMORY; + return self->mBrotli->mStatus; + } + + do { + outSize = kOutSize; + outPtr = outBuffer.get(); + + // brotli api is documented in brotli/dec/decode.h and brotli/dec/decode.c + LOG(("nsHttpCompresssConv %p brotlihandler decompress %zu\n", self, avail)); + size_t totalOut = self->mBrotli->mTotalOut; + res = ::BrotliDecoderDecompressStream( + &self->mBrotli->mState, &avail, + reinterpret_cast<const unsigned char**>(&dataIn), &outSize, &outPtr, + &totalOut); + outSize = kOutSize - outSize; + self->mBrotli->mTotalOut = totalOut; + self->mBrotli->mBrotliStateIsStreamEnd = + BrotliDecoderIsFinished(&self->mBrotli->mState); + LOG(("nsHttpCompresssConv %p brotlihandler decompress rv=%" PRIx32 + " out=%zu\n", + self, static_cast<uint32_t>(res), outSize)); + + if (res == BROTLI_DECODER_RESULT_ERROR) { + LOG(("nsHttpCompressConv %p marking invalid encoding", self)); + self->mBrotli->mStatus = NS_ERROR_INVALID_CONTENT_ENCODING; + return self->mBrotli->mStatus; + } + + // in 'the current implementation' brotli must consume everything before + // asking for more input + if (res == BROTLI_DECODER_RESULT_NEEDS_MORE_INPUT) { + MOZ_ASSERT(!avail); + if (avail) { + LOG(("nsHttpCompressConv %p did not consume all input", self)); + self->mBrotli->mStatus = NS_ERROR_UNEXPECTED; + return self->mBrotli->mStatus; + } + } + if (outSize > 0) { + nsresult rv = self->do_OnDataAvailable( + self->mBrotli->mRequest, self->mBrotli->mContext, + self->mBrotli->mSourceOffset, + reinterpret_cast<const char*>(outBuffer.get()), outSize); + LOG(("nsHttpCompressConv %p BrotliHandler ODA rv=%" PRIx32, self, + static_cast<uint32_t>(rv))); + if (NS_FAILED(rv)) { + self->mBrotli->mStatus = rv; + return self->mBrotli->mStatus; + } + } + + if (res == BROTLI_DECODER_RESULT_SUCCESS || + res == BROTLI_DECODER_RESULT_NEEDS_MORE_INPUT) { + *countRead = aAvail; + return NS_OK; + } + MOZ_ASSERT(res == BROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT); + } while (res == BROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT); + + self->mBrotli->mStatus = NS_ERROR_UNEXPECTED; + return self->mBrotli->mStatus; +} + +NS_IMETHODIMP +nsHTTPCompressConv::OnDataAvailable(nsIRequest* request, nsIInputStream* iStr, + uint64_t aSourceOffset, uint32_t aCount) { + nsresult rv = NS_ERROR_INVALID_CONTENT_ENCODING; + uint32_t streamLen = aCount; + LOG(("nsHttpCompressConv %p OnDataAvailable %d", this, aCount)); + + if (streamLen == 0) { + NS_ERROR("count of zero passed to OnDataAvailable"); + return NS_ERROR_UNEXPECTED; + } + + if (mStreamEnded) { + // Hmm... this may just indicate that the data stream is done and that + // what's left is either metadata or padding of some sort.... throwing + // it out is probably the safe thing to do. + uint32_t n; + return iStr->ReadSegments(NS_DiscardSegment, nullptr, streamLen, &n); + } + + switch (mMode) { + case HTTP_COMPRESS_GZIP: + streamLen = check_header(iStr, streamLen, &rv); + + if (rv != NS_OK) { + return rv; + } + + if (streamLen == 0) { + return NS_OK; + } + + [[fallthrough]]; + + case HTTP_COMPRESS_DEFLATE: + + if (mInpBuffer != nullptr && streamLen > mInpBufferLen) { + unsigned char* originalInpBuffer = mInpBuffer; + if (!(mInpBuffer = (unsigned char*)realloc( + originalInpBuffer, mInpBufferLen = streamLen))) { + free(originalInpBuffer); + } + + if (mOutBufferLen < streamLen * 2) { + unsigned char* originalOutBuffer = mOutBuffer; + if (!(mOutBuffer = (unsigned char*)realloc( + mOutBuffer, mOutBufferLen = streamLen * 3))) { + free(originalOutBuffer); + } + } + + if (mInpBuffer == nullptr || mOutBuffer == nullptr) { + return NS_ERROR_OUT_OF_MEMORY; + } + } + + if (mInpBuffer == nullptr) { + mInpBuffer = (unsigned char*)malloc(mInpBufferLen = streamLen); + } + + if (mOutBuffer == nullptr) { + mOutBuffer = (unsigned char*)malloc(mOutBufferLen = streamLen * 3); + } + + if (mInpBuffer == nullptr || mOutBuffer == nullptr) { + return NS_ERROR_OUT_OF_MEMORY; + } + + uint32_t unused; + iStr->Read((char*)mInpBuffer, streamLen, &unused); + + if (mMode == HTTP_COMPRESS_DEFLATE) { + if (!mStreamInitialized) { + memset(&d_stream, 0, sizeof(d_stream)); + + if (inflateInit(&d_stream) != Z_OK) { + return NS_ERROR_FAILURE; + } + + mStreamInitialized = true; + } + d_stream.next_in = mInpBuffer; + d_stream.avail_in = (uInt)streamLen; + + mDummyStreamInitialised = false; + for (;;) { + d_stream.next_out = mOutBuffer; + d_stream.avail_out = (uInt)mOutBufferLen; + + int code = inflate(&d_stream, Z_NO_FLUSH); + unsigned bytesWritten = (uInt)mOutBufferLen - d_stream.avail_out; + + if (code == Z_STREAM_END) { + if (bytesWritten) { + rv = do_OnDataAvailable(request, nullptr, aSourceOffset, + (char*)mOutBuffer, bytesWritten); + if (NS_FAILED(rv)) { + return rv; + } + } + + inflateEnd(&d_stream); + mStreamEnded = true; + break; + } else if (code == Z_OK) { + if (bytesWritten) { + rv = do_OnDataAvailable(request, nullptr, aSourceOffset, + (char*)mOutBuffer, bytesWritten); + if (NS_FAILED(rv)) { + return rv; + } + } + } else if (code == Z_BUF_ERROR) { + if (bytesWritten) { + rv = do_OnDataAvailable(request, nullptr, aSourceOffset, + (char*)mOutBuffer, bytesWritten); + if (NS_FAILED(rv)) { + return rv; + } + } + break; + } else if (code == Z_DATA_ERROR) { + // some servers (notably Apache with mod_deflate) don't generate + // zlib headers insert a dummy header and try again + static char dummy_head[2] = { + 0x8 + 0x7 * 0x10, + (((0x8 + 0x7 * 0x10) * 0x100 + 30) / 31 * 31) & 0xFF, + }; + inflateReset(&d_stream); + d_stream.next_in = (Bytef*)dummy_head; + d_stream.avail_in = sizeof(dummy_head); + + code = inflate(&d_stream, Z_NO_FLUSH); + if (code != Z_OK) { + return NS_ERROR_FAILURE; + } + + // stop an endless loop caused by non-deflate data being labelled as + // deflate + if (mDummyStreamInitialised) { + NS_WARNING( + "endless loop detected" + " - invalid deflate"); + return NS_ERROR_INVALID_CONTENT_ENCODING; + } + mDummyStreamInitialised = true; + // reset stream pointers to our original data + d_stream.next_in = mInpBuffer; + d_stream.avail_in = (uInt)streamLen; + } else { + return NS_ERROR_INVALID_CONTENT_ENCODING; + } + } /* for */ + } else { + if (!mStreamInitialized) { + memset(&d_stream, 0, sizeof(d_stream)); + + if (inflateInit2(&d_stream, -MAX_WBITS) != Z_OK) { + return NS_ERROR_FAILURE; + } + + mStreamInitialized = true; + } + + d_stream.next_in = mInpBuffer; + d_stream.avail_in = (uInt)streamLen; + + for (;;) { + d_stream.next_out = mOutBuffer; + d_stream.avail_out = (uInt)mOutBufferLen; + + int code = inflate(&d_stream, Z_NO_FLUSH); + unsigned bytesWritten = (uInt)mOutBufferLen - d_stream.avail_out; + + if (code == Z_STREAM_END) { + if (bytesWritten) { + rv = do_OnDataAvailable(request, nullptr, aSourceOffset, + (char*)mOutBuffer, bytesWritten); + if (NS_FAILED(rv)) { + return rv; + } + } + + inflateEnd(&d_stream); + mStreamEnded = true; + break; + } else if (code == Z_OK) { + if (bytesWritten) { + rv = do_OnDataAvailable(request, nullptr, aSourceOffset, + (char*)mOutBuffer, bytesWritten); + if (NS_FAILED(rv)) { + return rv; + } + } + } else if (code == Z_BUF_ERROR) { + if (bytesWritten) { + rv = do_OnDataAvailable(request, nullptr, aSourceOffset, + (char*)mOutBuffer, bytesWritten); + if (NS_FAILED(rv)) { + return rv; + } + } + break; + } else { + return NS_ERROR_INVALID_CONTENT_ENCODING; + } + } /* for */ + } /* gzip */ + break; + + case HTTP_COMPRESS_BROTLI: { + if (!mBrotli) { + mBrotli = MakeUnique<BrotliWrapper>(); + } + + mBrotli->mRequest = request; + mBrotli->mContext = nullptr; + mBrotli->mSourceOffset = aSourceOffset; + + uint32_t countRead; + rv = iStr->ReadSegments(BrotliHandler, this, streamLen, &countRead); + if (NS_SUCCEEDED(rv)) { + rv = mBrotli->mStatus; + } + if (NS_FAILED(rv)) { + return rv; + } + } break; + + default: + nsCOMPtr<nsIStreamListener> listener; + { + MutexAutoLock lock(mMutex); + listener = mListener; + } + rv = listener->OnDataAvailable(request, iStr, aSourceOffset, aCount); + if (NS_FAILED(rv)) { + return rv; + } + } /* switch */ + + return NS_OK; +} /* OnDataAvailable */ + +// XXX/ruslan: need to implement this too + +NS_IMETHODIMP +nsHTTPCompressConv::Convert(nsIInputStream* aFromStream, const char* aFromType, + const char* aToType, nsISupports* aCtxt, + nsIInputStream** _retval) { + return NS_ERROR_NOT_IMPLEMENTED; +} + +nsresult nsHTTPCompressConv::do_OnDataAvailable(nsIRequest* request, + nsISupports* context, + uint64_t offset, + const char* buffer, + uint32_t count) { + if (!mStream) { + mStream = do_CreateInstance(NS_STRINGINPUTSTREAM_CONTRACTID); + NS_ENSURE_STATE(mStream); + } + + mStream->ShareData(buffer, count); + + nsCOMPtr<nsIStreamListener> listener; + { + MutexAutoLock lock(mMutex); + listener = mListener; + } + nsresult rv = listener->OnDataAvailable(request, mStream, offset, count); + + // Make sure the stream no longer references |buffer| in case our listener + // is crazy enough to try to read from |mStream| after ODA. + mStream->ShareData("", 0); + mDecodedDataLength += count; + + return rv; +} + +#define ASCII_FLAG 0x01 /* bit 0 set: file probably ascii text */ +#define HEAD_CRC 0x02 /* bit 1 set: header CRC present */ +#define EXTRA_FIELD 0x04 /* bit 2 set: extra field present */ +#define ORIG_NAME 0x08 /* bit 3 set: original file name present */ +#define COMMENT 0x10 /* bit 4 set: file comment present */ +#define RESERVED 0xE0 /* bits 5..7: reserved */ + +static unsigned gz_magic[2] = {0x1f, 0x8b}; /* gzip magic header */ + +uint32_t nsHTTPCompressConv::check_header(nsIInputStream* iStr, + uint32_t streamLen, nsresult* rs) { + enum { + GZIP_INIT = 0, + GZIP_OS, + GZIP_EXTRA0, + GZIP_EXTRA1, + GZIP_EXTRA2, + GZIP_ORIG, + GZIP_COMMENT, + GZIP_CRC + }; + char c; + + *rs = NS_OK; + + if (mCheckHeaderDone) { + return streamLen; + } + + while (streamLen) { + switch (hMode) { + case GZIP_INIT: + uint32_t unused; + iStr->Read(&c, 1, &unused); + streamLen--; + + if (mSkipCount == 0 && ((unsigned)c & 0377) != gz_magic[0]) { + *rs = NS_ERROR_INVALID_CONTENT_ENCODING; + return 0; + } + + if (mSkipCount == 1 && ((unsigned)c & 0377) != gz_magic[1]) { + *rs = NS_ERROR_INVALID_CONTENT_ENCODING; + return 0; + } + + if (mSkipCount == 2 && ((unsigned)c & 0377) != Z_DEFLATED) { + *rs = NS_ERROR_INVALID_CONTENT_ENCODING; + return 0; + } + + mSkipCount++; + if (mSkipCount == 4) { + mFlags = (unsigned)c & 0377; + if (mFlags & RESERVED) { + *rs = NS_ERROR_INVALID_CONTENT_ENCODING; + return 0; + } + hMode = GZIP_OS; + mSkipCount = 0; + } + break; + + case GZIP_OS: + iStr->Read(&c, 1, &unused); + streamLen--; + mSkipCount++; + + if (mSkipCount == 6) { + hMode = GZIP_EXTRA0; + } + break; + + case GZIP_EXTRA0: + if (mFlags & EXTRA_FIELD) { + iStr->Read(&c, 1, &unused); + streamLen--; + mLen = (uInt)c & 0377; + hMode = GZIP_EXTRA1; + } else { + hMode = GZIP_ORIG; + } + break; + + case GZIP_EXTRA1: + iStr->Read(&c, 1, &unused); + streamLen--; + mLen |= ((uInt)c & 0377) << 8; + mSkipCount = 0; + hMode = GZIP_EXTRA2; + break; + + case GZIP_EXTRA2: + if (mSkipCount == mLen) { + hMode = GZIP_ORIG; + } else { + iStr->Read(&c, 1, &unused); + streamLen--; + mSkipCount++; + } + break; + + case GZIP_ORIG: + if (mFlags & ORIG_NAME) { + iStr->Read(&c, 1, &unused); + streamLen--; + if (c == 0) hMode = GZIP_COMMENT; + } else { + hMode = GZIP_COMMENT; + } + break; + + case GZIP_COMMENT: + if (mFlags & COMMENT) { + iStr->Read(&c, 1, &unused); + streamLen--; + if (c == 0) { + hMode = GZIP_CRC; + mSkipCount = 0; + } + } else { + hMode = GZIP_CRC; + mSkipCount = 0; + } + break; + + case GZIP_CRC: + if (mFlags & HEAD_CRC) { + iStr->Read(&c, 1, &unused); + streamLen--; + mSkipCount++; + if (mSkipCount == 2) { + mCheckHeaderDone = true; + return streamLen; + } + } else { + mCheckHeaderDone = true; + return streamLen; + } + break; + } + } + return streamLen; +} + +NS_IMETHODIMP +nsHTTPCompressConv::CheckListenerChain() { + nsCOMPtr<nsIThreadRetargetableStreamListener> listener; + { + MutexAutoLock lock(mMutex); + listener = do_QueryInterface(mListener); + } + + if (!listener) { + return NS_ERROR_NO_INTERFACE; + } + + return listener->CheckListenerChain(); +} + +} // namespace net +} // namespace mozilla + +nsresult NS_NewHTTPCompressConv( + mozilla::net::nsHTTPCompressConv** aHTTPCompressConv) { + MOZ_ASSERT(aHTTPCompressConv != nullptr, "null ptr"); + if (!aHTTPCompressConv) { + return NS_ERROR_NULL_POINTER; + } + + RefPtr<mozilla::net::nsHTTPCompressConv> outVal = + new mozilla::net::nsHTTPCompressConv(); + if (!outVal) { + return NS_ERROR_OUT_OF_MEMORY; + } + outVal.forget(aHTTPCompressConv); + return NS_OK; +} diff --git a/netwerk/streamconv/converters/nsHTTPCompressConv.h b/netwerk/streamconv/converters/nsHTTPCompressConv.h new file mode 100644 index 0000000000..1ad34bbfab --- /dev/null +++ b/netwerk/streamconv/converters/nsHTTPCompressConv.h @@ -0,0 +1,137 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set sw=2 ts=8 et tw=80 : */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#if !defined(__nsHTTPCompressConv__h__) +# define __nsHTTPCompressConv__h__ 1 + +# include "nsIStreamConverter.h" +# include "nsICompressConvStats.h" +# include "nsIThreadRetargetableStreamListener.h" +# include "nsCOMPtr.h" +# include "mozilla/Atomics.h" +# include "mozilla/Mutex.h" + +# include "zlib.h" + +// brotli includes +# undef assert +# include "assert.h" +# include "state.h" + +class nsIStringInputStream; + +# define NS_HTTPCOMPRESSCONVERTER_CID \ + { \ + /* 66230b2b-17fa-4bd3-abf4-07986151022d */ \ + 0x66230b2b, 0x17fa, 0x4bd3, { \ + 0xab, 0xf4, 0x07, 0x98, 0x61, 0x51, 0x02, 0x2d \ + } \ + } + +# define HTTP_DEFLATE_TYPE "deflate" +# define HTTP_GZIP_TYPE "gzip" +# define HTTP_X_GZIP_TYPE "x-gzip" +# define HTTP_COMPRESS_TYPE "compress" +# define HTTP_X_COMPRESS_TYPE "x-compress" +# define HTTP_BROTLI_TYPE "br" +# define HTTP_IDENTITY_TYPE "identity" +# define HTTP_UNCOMPRESSED_TYPE "uncompressed" + +namespace mozilla { +namespace net { + +typedef enum { + HTTP_COMPRESS_GZIP, + HTTP_COMPRESS_DEFLATE, + HTTP_COMPRESS_COMPRESS, + HTTP_COMPRESS_BROTLI, + HTTP_COMPRESS_IDENTITY +} CompressMode; + +class BrotliWrapper { + public: + BrotliWrapper() + : mTotalOut(0), + mStatus(NS_OK), + mBrotliStateIsStreamEnd(false), + mRequest(nullptr), + mContext(nullptr), + mSourceOffset(0) { + BrotliDecoderStateInit(&mState, 0, 0, 0); + } + ~BrotliWrapper() { BrotliDecoderStateCleanup(&mState); } + + BrotliDecoderState mState; + Atomic<size_t, Relaxed> mTotalOut; + nsresult mStatus; + Atomic<bool, Relaxed> mBrotliStateIsStreamEnd; + + nsIRequest* mRequest; + nsISupports* mContext; + uint64_t mSourceOffset; +}; + +class nsHTTPCompressConv : public nsIStreamConverter, + public nsICompressConvStats, + public nsIThreadRetargetableStreamListener { + public: + // nsISupports methods + NS_DECL_THREADSAFE_ISUPPORTS + NS_DECL_NSIREQUESTOBSERVER + NS_DECL_NSISTREAMLISTENER + NS_DECL_NSICOMPRESSCONVSTATS + NS_DECL_NSITHREADRETARGETABLESTREAMLISTENER + + // nsIStreamConverter methods + NS_DECL_NSISTREAMCONVERTER + + nsHTTPCompressConv(); + + private: + virtual ~nsHTTPCompressConv(); + + nsCOMPtr<nsIStreamListener> + mListener; // this guy gets the converted data via his OnDataAvailable () + Atomic<CompressMode, Relaxed> mMode; + + unsigned char* mOutBuffer; + unsigned char* mInpBuffer; + + uint32_t mOutBufferLen; + uint32_t mInpBufferLen; + + UniquePtr<BrotliWrapper> mBrotli; + + nsCOMPtr<nsIStringInputStream> mStream; + + static nsresult BrotliHandler(nsIInputStream* stream, void* closure, + const char* dataIn, uint32_t, uint32_t avail, + uint32_t* countRead); + + nsresult do_OnDataAvailable(nsIRequest* request, nsISupports* aContext, + uint64_t aSourceOffset, const char* buffer, + uint32_t aCount); + + bool mCheckHeaderDone; + Atomic<bool> mStreamEnded; + bool mStreamInitialized; + bool mDummyStreamInitialised; + bool mFailUncleanStops; + + z_stream d_stream; + unsigned mLen, hMode, mSkipCount, mFlags; + + uint32_t check_header(nsIInputStream* iStr, uint32_t streamLen, nsresult* rv); + + Atomic<uint32_t, Relaxed> mDecodedDataLength; + + mutable mozilla::Mutex mMutex; +}; + +} // namespace net +} // namespace mozilla + +#endif diff --git a/netwerk/streamconv/converters/nsICompressConvStats.idl b/netwerk/streamconv/converters/nsICompressConvStats.idl new file mode 100644 index 0000000000..a8837563ed --- /dev/null +++ b/netwerk/streamconv/converters/nsICompressConvStats.idl @@ -0,0 +1,17 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsISupports.idl" + +/** + * nsICompressConvStats + * + * This interface allows for the observation of decoded resource sizes + */ +[builtinclass, scriptable, uuid(58172ad0-46a9-4893-8fde-cd909c10792a)] +interface nsICompressConvStats : nsISupports +{ + readonly attribute uint64_t decodedDataLength; +}; diff --git a/netwerk/streamconv/converters/nsIndexedToHTML.cpp b/netwerk/streamconv/converters/nsIndexedToHTML.cpp new file mode 100644 index 0000000000..53e85970fa --- /dev/null +++ b/netwerk/streamconv/converters/nsIndexedToHTML.cpp @@ -0,0 +1,847 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsIndexedToHTML.h" + +#include "DateTimeFormat.h" +#include "mozilla/Encoding.h" +#include "mozilla/intl/LocaleService.h" +#include "nsNetUtil.h" +#include "netCore.h" +#include "nsStringStream.h" +#include "nsIFile.h" +#include "nsIFileURL.h" +#include "nsEscape.h" +#include "nsIDirIndex.h" +#include "nsURLHelper.h" +#include "nsIStringBundle.h" +#include "nsDirIndexParser.h" +#include "nsNativeCharsetUtils.h" +#include "nsString.h" +#include "nsContentUtils.h" +#include <algorithm> +#include "nsIChannel.h" +#include "mozilla/Unused.h" + +using mozilla::intl::LocaleService; + +NS_IMPL_ISUPPORTS(nsIndexedToHTML, nsIDirIndexListener, nsIStreamConverter, + nsIRequestObserver, nsIStreamListener) + +static void AppendNonAsciiToNCR(const nsAString& in, nsCString& out) { + nsAString::const_iterator start, end; + + in.BeginReading(start); + in.EndReading(end); + + while (start != end) { + if (*start < 128) { + out.Append(*start++); + } else { + out.AppendLiteral("&#x"); + out.AppendInt(*start++, 16); + out.Append(';'); + } + } +} + +nsIndexedToHTML::nsIndexedToHTML() : mExpectAbsLoc(false) {} + +nsresult nsIndexedToHTML::Create(nsISupports* aOuter, REFNSIID aIID, + void** aResult) { + nsresult rv; + if (aOuter) return NS_ERROR_NO_AGGREGATION; + + nsIndexedToHTML* _s = new nsIndexedToHTML(); + if (_s == nullptr) return NS_ERROR_OUT_OF_MEMORY; + + rv = _s->QueryInterface(aIID, aResult); + return rv; +} + +nsresult nsIndexedToHTML::Init(nsIStreamListener* aListener) { + nsresult rv = NS_OK; + + mListener = aListener; + + nsCOMPtr<nsIStringBundleService> sbs = + do_GetService(NS_STRINGBUNDLE_CONTRACTID, &rv); + if (NS_FAILED(rv)) return rv; + rv = sbs->CreateBundle(NECKO_MSGS_URL, getter_AddRefs(mBundle)); + + mExpectAbsLoc = false; + + return rv; +} + +NS_IMETHODIMP +nsIndexedToHTML::Convert(nsIInputStream* aFromStream, const char* aFromType, + const char* aToType, nsISupports* aCtxt, + nsIInputStream** res) { + return NS_ERROR_NOT_IMPLEMENTED; +} + +NS_IMETHODIMP +nsIndexedToHTML::AsyncConvertData(const char* aFromType, const char* aToType, + nsIStreamListener* aListener, + nsISupports* aCtxt) { + return Init(aListener); +} + +NS_IMETHODIMP +nsIndexedToHTML::GetConvertedType(const nsACString& aFromType, + nsIChannel* aChannel, nsACString& aToType) { + return NS_ERROR_NOT_IMPLEMENTED; +} + +NS_IMETHODIMP +nsIndexedToHTML::OnStartRequest(nsIRequest* request) { + nsCString buffer; + nsresult rv = DoOnStartRequest(request, nullptr, buffer); + if (NS_FAILED(rv)) { + request->Cancel(rv); + } + + rv = mListener->OnStartRequest(request); + if (NS_FAILED(rv)) return rv; + + // The request may have been canceled, and if that happens, we want to + // suppress calls to OnDataAvailable. + request->GetStatus(&rv); + if (NS_FAILED(rv)) return rv; + + // Push our buffer to the listener. + + rv = SendToListener(request, nullptr, buffer); + return rv; +} + +nsresult nsIndexedToHTML::DoOnStartRequest(nsIRequest* request, + nsISupports* aContext, + nsCString& aBuffer) { + nsresult rv; + + nsCOMPtr<nsIChannel> channel = do_QueryInterface(request); + nsCOMPtr<nsIURI> uri; + rv = channel->GetOriginalURI(getter_AddRefs(uri)); + if (NS_FAILED(rv)) return rv; + + // We use the original URI for the title and parent link when it's a + // resource:// url, instead of the jar:file:// url it resolves to. + if (!uri->SchemeIs("resource")) { + rv = channel->GetURI(getter_AddRefs(uri)); + if (NS_FAILED(rv)) return rv; + } + + channel->SetContentType("text/html"_ns); + + mParser = nsDirIndexParser::CreateInstance(); + if (!mParser) return NS_ERROR_FAILURE; + + rv = mParser->SetListener(this); + if (NS_FAILED(rv)) return rv; + + rv = mParser->OnStartRequest(request); + if (NS_FAILED(rv)) return rv; + + nsAutoCString baseUri, titleUri; + rv = uri->GetAsciiSpec(baseUri); + if (NS_FAILED(rv)) return rv; + + nsCOMPtr<nsIURI> titleURL; + rv = NS_MutateURI(uri).SetQuery(""_ns).SetRef(""_ns).Finalize(titleURL); + if (NS_FAILED(rv)) { + titleURL = uri; + } + + nsCString parentStr; + + nsCString buffer; + buffer.AppendLiteral("<!DOCTYPE html>\n<html>\n<head>\n"); + + // XXX - should be using the 300: line from the parser. + // We can't guarantee that that comes before any entry, so we'd have to + // buffer, and do other painful stuff. + // I'll deal with this when I make the changes to handle welcome messages + // The .. stuff should also come from the lower level protocols, but that + // would muck up the XUL display + // - bbaetz + + if (uri->SchemeIs("ftp")) { + // strip out the password here, so it doesn't show in the page title + // This is done by the 300: line generation in ftp, but we don't use + // that - see above + + nsAutoCString pw; + rv = titleURL->GetPassword(pw); + if (NS_FAILED(rv)) return rv; + if (!pw.IsEmpty()) { + nsCOMPtr<nsIURI> newUri; + rv = NS_MutateURI(titleURL).SetPassword(""_ns).Finalize(titleURL); + if (NS_FAILED(rv)) return rv; + } + + nsAutoCString path; + rv = uri->GetPathQueryRef(path); + if (NS_FAILED(rv)) return rv; + + if (!path.EqualsLiteral("//") && !path.LowerCaseEqualsLiteral("/%2f")) { + rv = uri->Resolve(".."_ns, parentStr); + if (NS_FAILED(rv)) return rv; + } + } else if (uri->SchemeIs("file")) { + nsCOMPtr<nsIFileURL> fileUrl = do_QueryInterface(uri); + nsCOMPtr<nsIFile> file; + rv = fileUrl->GetFile(getter_AddRefs(file)); + if (NS_FAILED(rv)) return rv; + + nsAutoCString url; + rv = net_GetURLSpecFromFile(file, url); + if (NS_FAILED(rv)) return rv; + baseUri.Assign(url); + + nsCOMPtr<nsIFile> parent; + rv = file->GetParent(getter_AddRefs(parent)); + + if (parent && NS_SUCCEEDED(rv)) { + net_GetURLSpecFromDir(parent, url); + if (NS_FAILED(rv)) return rv; + parentStr.Assign(url); + } + + // Directory index will be always encoded in UTF-8 if this is file url + buffer.AppendLiteral("<meta charset=\"UTF-8\">\n"); + + } else if (uri->SchemeIs("jar")) { + nsAutoCString path; + rv = uri->GetPathQueryRef(path); + if (NS_FAILED(rv)) return rv; + + // a top-level jar directory URL is of the form jar:foo.zip!/ + // path will be of the form foo.zip!/, and its last two characters + // will be "!/" + // XXX this won't work correctly when the name of the directory being + // XXX displayed ends with "!", but then again, jar: URIs don't deal + // XXX particularly well with such directories anyway + if (!StringEndsWith(path, "!/"_ns)) { + rv = uri->Resolve(".."_ns, parentStr); + if (NS_FAILED(rv)) return rv; + } + } else { + // default behavior for other protocols is to assume the channel's + // URL references a directory ending in '/' -- fixup if necessary. + nsAutoCString path; + rv = uri->GetPathQueryRef(path); + if (NS_FAILED(rv)) return rv; + if (baseUri.Last() != '/') { + baseUri.Append('/'); + path.Append('/'); + mozilla::Unused << NS_MutateURI(uri).SetPathQueryRef(path).Finalize(uri); + } + if (!path.EqualsLiteral("/")) { + rv = uri->Resolve(".."_ns, parentStr); + if (NS_FAILED(rv)) return rv; + } + } + + rv = titleURL->GetAsciiSpec(titleUri); + if (NS_FAILED(rv)) { + return rv; + } + + buffer.AppendLiteral( + "<style type=\"text/css\">\n" + ":root {\n" + " font-family: sans-serif;\n" + "}\n" + "img {\n" + " border: 0;\n" + "}\n" + "th {\n" + " text-align: start;\n" + " white-space: nowrap;\n" + "}\n" + "th > a {\n" + " color: inherit;\n" + "}\n" + "table[order] > thead > tr > th {\n" + " cursor: pointer;\n" + "}\n" + "table[order] > thead > tr > th::after {\n" + " display: none;\n" + " width: .8em;\n" + " margin-inline-end: -.8em;\n" + " text-align: end;\n" + "}\n" + "table[order=\"asc\"] > thead > tr > th::after {\n" + " content: \"\\2193\"; /* DOWNWARDS ARROW (U+2193) */\n" + "}\n" + "table[order=\"desc\"] > thead > tr > th::after {\n" + " content: \"\\2191\"; /* UPWARDS ARROW (U+2191) */\n" + "}\n" + "table[order][order-by=\"0\"] > thead > tr > th:first-child > a ,\n" + "table[order][order-by=\"1\"] > thead > tr > th:first-child + th > a ,\n" + "table[order][order-by=\"2\"] > thead > tr > th:first-child + th + th > " + "a {\n" + " text-decoration: underline;\n" + "}\n" + "table[order][order-by=\"0\"] > thead > tr > th:first-child::after ,\n" + "table[order][order-by=\"1\"] > thead > tr > th:first-child + th::after " + ",\n" + "table[order][order-by=\"2\"] > thead > tr > th:first-child + th + " + "th::after {\n" + " display: inline-block;\n" + "}\n" + "table.remove-hidden > tbody > tr.hidden-object {\n" + " display: none;\n" + "}\n" + "td {\n" + " white-space: nowrap;\n" + "}\n" + "table.ellipsis {\n" + " width: 100%;\n" + " table-layout: fixed;\n" + " border-spacing: 0;\n" + "}\n" + "table.ellipsis > tbody > tr > td {\n" + " padding: 0;\n" + " overflow: hidden;\n" + " text-overflow: ellipsis;\n" + "}\n" + "/* name */\n" + "/* name */\n" + "th:first-child {\n" + " padding-inline-end: 2em;\n" + "}\n" + "/* size */\n" + "th:first-child + th {\n" + " padding-inline-end: 1em;\n" + "}\n" + "td:first-child + td {\n" + " text-align: end;\n" + " padding-inline-end: 1em;\n" + "}\n" + "/* date */\n" + "td:first-child + td + td {\n" + " padding-inline-start: 1em;\n" + " padding-inline-end: .5em;\n" + "}\n" + "/* time */\n" + "td:first-child + td + td + td {\n" + " padding-inline-start: .5em;\n" + "}\n" + ".symlink {\n" + " font-style: italic;\n" + "}\n" + ".dir ,\n" + ".symlink ,\n" + ".file {\n" + " margin-inline-start: 20px;\n" + "}\n" + ".dir::before ,\n" + ".file > img {\n" + " margin-inline-end: 4px;\n" + " margin-inline-start: -20px;\n" + " max-width: 16px;\n" + " max-height: 16px;\n" + " vertical-align: middle;\n" + "}\n" + ".dir::before {\n" + " content: url(resource://content-accessible/html/folder.png);\n" + "}\n" + "</style>\n" + "<link rel=\"stylesheet\" media=\"screen, projection\" type=\"text/css\"" + " href=\"chrome://global/skin/dirListing/dirListing.css\">\n" + "<script type=\"application/javascript\">\n" + "'use strict';\n" + "var gTable, gOrderBy, gTBody, gRows, gUI_showHidden;\n" + "document.addEventListener(\"DOMContentLoaded\", function() {\n" + " gTable = document.getElementsByTagName(\"table\")[0];\n" + " gTBody = gTable.tBodies[0];\n" + " if (gTBody.rows.length < 2)\n" + " return;\n" + " gUI_showHidden = document.getElementById(\"UI_showHidden\");\n" + " var headCells = gTable.tHead.rows[0].cells,\n" + " hiddenObjects = false;\n" + " function rowAction(i) {\n" + " return function(event) {\n" + " event.preventDefault();\n" + " orderBy(i);\n" + " }\n" + " }\n" + " for (var i = headCells.length - 1; i >= 0; i--) {\n" + " var anchor = document.createElement(\"a\");\n" + " anchor.href = \"\";\n" + " anchor.appendChild(headCells[i].firstChild);\n" + " headCells[i].appendChild(anchor);\n" + " headCells[i].addEventListener(\"click\", rowAction(i), true);\n" + " }\n" + " if (gUI_showHidden) {\n" + " gRows = Array.from(gTBody.rows);\n" + " hiddenObjects = gRows.some(row => row.className == " + "\"hidden-object\");\n" + " }\n" + " gTable.setAttribute(\"order\", \"\");\n" + " if (hiddenObjects) {\n" + " gUI_showHidden.style.display = \"block\";\n" + " updateHidden();\n" + " }\n" + "}, \"false\");\n" + "function compareRows(rowA, rowB) {\n" + " var a = rowA.cells[gOrderBy].getAttribute(\"sortable-data\") || " + "\"\";\n" + " var b = rowB.cells[gOrderBy].getAttribute(\"sortable-data\") || " + "\"\";\n" + " var intA = +a;\n" + " var intB = +b;\n" + " if (a == intA && b == intB) {\n" + " a = intA;\n" + " b = intB;\n" + " } else {\n" + " a = a.toLowerCase();\n" + " b = b.toLowerCase();\n" + " }\n" + " if (a < b)\n" + " return -1;\n" + " if (a > b)\n" + " return 1;\n" + " return 0;\n" + "}\n" + "function orderBy(column) {\n" + " if (!gRows)\n" + " gRows = Array.from(gTBody.rows);\n" + " var order;\n" + " if (gOrderBy == column) {\n" + " order = gTable.getAttribute(\"order\") == \"asc\" ? \"desc\" : " + "\"asc\";\n" + " } else {\n" + " order = \"asc\";\n" + " gOrderBy = column;\n" + " gTable.setAttribute(\"order-by\", column);\n" + " gRows.sort(compareRows);\n" + " }\n" + " gTable.removeChild(gTBody);\n" + " gTable.setAttribute(\"order\", order);\n" + " if (order == \"asc\")\n" + " for (var i = 0; i < gRows.length; i++)\n" + " gTBody.appendChild(gRows[i]);\n" + " else\n" + " for (var i = gRows.length - 1; i >= 0; i--)\n" + " gTBody.appendChild(gRows[i]);\n" + " gTable.appendChild(gTBody);\n" + "}\n" + "function updateHidden() {\n" + " gTable.className = " + "gUI_showHidden.getElementsByTagName(\"input\")[0].checked ?\n" + " \"\" :\n" + " \"remove-hidden\";\n" + "}\n" + "</script>\n"); + + buffer.AppendLiteral(R"(<link rel="icon" type="image/png" href=")"); + nsCOMPtr<nsIURI> innerUri = NS_GetInnermostURI(uri); + if (!innerUri) return NS_ERROR_UNEXPECTED; + nsCOMPtr<nsIFileURL> fileURL(do_QueryInterface(innerUri)); + // XXX bug 388553: can't use skinnable icons here due to security restrictions + if (fileURL) { + buffer.AppendLiteral( + "" + "AAAAAQCAYAAAAf8%2F9hAAAAGXRFWHRTb2Z0d2FyZQBBZG9i" + "ZSBJbWFnZVJlYWR5ccllPAAAAjFJREFUeNqsU8uOElEQPffR" + "3XQ3ONASdBJCSBxHos5%2B3Bg3rvkCv8PElS78gPkO%2FATj" + "QoUdO2ftrJiRh6aneTb9sOpC4weMN6lcuFV16pxDIfI8x12O" + "YIDhcPiu2Wx%2B%2FHF5CW1Z6Jyegt%2FTNEWSJIjjGFEUIQ" + "xDrFYrWFSzXC4%2FdLvd95pRKpXKy%2BpRFZ7nwaWo1%2BsG" + "nQG2260BKJfLKJVKGI1GEEJw7ateryd0v993W63WEwjgxfn5" + "obGYzgCbzcaEbdsIggDj8Riu6z6iUk9SYZMSx8W0LMsM%2FS" + "KK75xnJlIq80anQXdbEp0OhcPJ0eiaJnGRMEyyPDsAKKUM9c" + "lkYoDo3SZJzzSdp0VSKYmfV1co%2Bz580kw5KDIM8RbRfEnU" + "f1HzxtQyMAGcaGruTKczMzEIaqhKifV6jd%2BzGQQB5llunF" + "%2FM52BizC2K5sYPYvZcu653tjOM9O93wnYc08gmkgg4VAxi" + "xfqFUJT36AYBZGd6PJkFCZnnlBxMp38gqIgLpZB0y4Nph18l" + "yWh5FFbrOSxbl3V4G%2BVB7T4ajYYxTyuLtO%2BCvWGgJE1M" + "c7JNsJEhvgw%2FQV4fo%2F24nbEsX2u1d5sVyn8sJO0ZAQiI" + "YnFh%2BxrfLz%2Fj29cBS%2FO14zg3i8XigW3ZkErDtmKoeM" + "%2BAJGRMnXeEPGKf0nCD1ydvkDzU9Jbc6OpR7WIw6L8lQ%2B" + "4pQ1%2FlPF0RGM9Ns91Wmptk0GfB4EJkt77vXYj%2F8m%2B8" + "y%2FkrwABHbz2H9V68DQAAAABJRU5ErkJggg%3D%3D"); + } else { + buffer.AppendLiteral( + "" + "AAAAAQCAYAAAAf8%2F9hAAAAGXRFWHRTb2Z0d2FyZQBBZG9i" + "ZSBJbWFnZVJlYWR5ccllPAAAAeBJREFUeNqcU81O20AQ%2Ft" + "Z2AgQSYQRqL1UPVG2hAUQkxLEStz4DrXpLpD5Drz31Cajax%" + "2Bghhx6qHIJURBTxIwQRwopCBbZjHMcOTrzermPipsSt1Iw0" + "3p3ZmW%2B%2B2R0TxhgOD34wjCHZlQ0iDYz9yvEfhxMTCYhE" + "QDIZhkxKd2sqzX2TOD2vBQCQhpPefng1ZP2dVPlLLdpL8SEM" + "cxng%2Fbs0RIHhtgs4twxOh%2BHjZxvzDx%2F3GQQiDFISiR" + "BLFMPKTRMollzcWECrDVhtxtdRVsL9youPxGj%2FbdfFlUZh" + "tDyYbYqWRUdai1oQRZ5oHeHl2gNM%2B01Uqio8RlH%2Bnsaz" + "JzNwXcq1B%2BiXPHprlEEymeBfXs1w8XxxihfyuXqoHqpoGj" + "ZM04bddgG%2F9%2B8WGj87qDdsrK9m%2BoA%2BpbhQTDh2l1" + "%2Bi2weNbSHMZyjvNXmVbqh9Fj5Oz27uEoP%2BSTxANruJs9" + "L%2FT6P0ewqPx5nmiAG5f6AoCtN1PbJzuRyJAyDBzzSQYvEr" + "f06yYxhGXlEa8H2KVGoasjwLx3Ewk858opQWXm%2B%2Fib9E" + "QrBzclLLLy89xYvlpchvtixcX6uo1y%2FzsiwHrkIsgKbp%2" + "BYWFOWicuqppoNTnStHzPFCPQhBEBOyGAX4JMADFetubi4BS" + "YAAAAABJRU5ErkJggg%3D%3D"); + } + buffer.AppendLiteral("\">\n<title>"); + + // Everything needs to end in a /, + // otherwise we end up linking to file:///foo/dirfile + + if (!mTextToSubURI) { + mTextToSubURI = do_GetService(NS_ITEXTTOSUBURI_CONTRACTID, &rv); + if (NS_FAILED(rv)) return rv; + } + + nsAutoString unEscapeSpec; + rv = mTextToSubURI->UnEscapeAndConvert("UTF-8"_ns, titleUri, unEscapeSpec); + if (NS_FAILED(rv)) { + return rv; + } + + nsCString htmlEscSpecUtf8; + nsAppendEscapedHTML(NS_ConvertUTF16toUTF8(unEscapeSpec), htmlEscSpecUtf8); + AutoTArray<nsString, 1> formatTitle; + CopyUTF8toUTF16(htmlEscSpecUtf8, *formatTitle.AppendElement()); + + nsAutoString title; + rv = mBundle->FormatStringFromName("DirTitle", formatTitle, title); + if (NS_FAILED(rv)) return rv; + + // we want to convert string bundle to NCR + // to ensure they're shown in any charsets + AppendNonAsciiToNCR(title, buffer); + + buffer.AppendLiteral("</title>\n"); + + // If there is a quote character in the baseUri, then + // lets not add a base URL. The reason for this is that + // if we stick baseUri containing a quote into a quoted + // string, the quote character will prematurely close + // the base href string. This is a fall-back check; + // that's why it is OK to not use a base rather than + // trying to play nice and escaping the quotes. See bug + // 358128. + + if (!baseUri.Contains('"')) { + // Great, the baseUri does not contain a char that + // will prematurely close the string. Go ahead an + // add a base href, but only do so if we're not + // dealing with a resource URI. + if (!uri->SchemeIs("resource")) { + buffer.AppendLiteral("<base href=\""); + nsAppendEscapedHTML(baseUri, buffer); + buffer.AppendLiteral("\" />\n"); + } + } else { + NS_ERROR("broken protocol handler didn't escape double-quote."); + } + + nsCString direction("ltr"_ns); + if (LocaleService::GetInstance()->IsAppLocaleRTL()) { + direction.AssignLiteral("rtl"); + } + + buffer.AppendLiteral("</head>\n<body dir=\""); + buffer.Append(direction); + buffer.AppendLiteral("\">\n<h1>"); + AppendNonAsciiToNCR(title, buffer); + buffer.AppendLiteral("</h1>\n"); + + if (!parentStr.IsEmpty()) { + nsAutoString parentText; + rv = mBundle->GetStringFromName("DirGoUp", parentText); + if (NS_FAILED(rv)) return rv; + + buffer.AppendLiteral(R"(<p id="UI_goUp"><a class="up" href=")"); + nsAppendEscapedHTML(parentStr, buffer); + buffer.AppendLiteral("\">"); + AppendNonAsciiToNCR(parentText, buffer); + buffer.AppendLiteral("</a></p>\n"); + } + + if (uri->SchemeIs("file")) { + nsAutoString showHiddenText; + rv = mBundle->GetStringFromName("ShowHidden", showHiddenText); + if (NS_FAILED(rv)) return rv; + + buffer.AppendLiteral( + "<p id=\"UI_showHidden\" style=\"display:none\"><label><input " + "type=\"checkbox\" checked onchange=\"updateHidden()\">"); + AppendNonAsciiToNCR(showHiddenText, buffer); + buffer.AppendLiteral("</label></p>\n"); + } + + buffer.AppendLiteral( + "<table>\n" + " <thead>\n" + " <tr>\n" + " <th>"); + + nsAutoString columnText; + rv = mBundle->GetStringFromName("DirColName", columnText); + if (NS_FAILED(rv)) return rv; + AppendNonAsciiToNCR(columnText, buffer); + buffer.AppendLiteral( + "</th>\n" + " <th>"); + + rv = mBundle->GetStringFromName("DirColSize", columnText); + if (NS_FAILED(rv)) return rv; + AppendNonAsciiToNCR(columnText, buffer); + buffer.AppendLiteral( + "</th>\n" + " <th colspan=\"2\">"); + + rv = mBundle->GetStringFromName("DirColMTime", columnText); + if (NS_FAILED(rv)) return rv; + AppendNonAsciiToNCR(columnText, buffer); + buffer.AppendLiteral( + "</th>\n" + " </tr>\n" + " </thead>\n"); + buffer.AppendLiteral(" <tbody>\n"); + + aBuffer = buffer; + return rv; +} + +NS_IMETHODIMP +nsIndexedToHTML::OnStopRequest(nsIRequest* request, nsresult aStatus) { + if (NS_SUCCEEDED(aStatus)) { + nsCString buffer; + buffer.AssignLiteral("</tbody></table></body></html>\n"); + + aStatus = SendToListener(request, nullptr, buffer); + } + + mParser->OnStopRequest(request, aStatus); + mParser = nullptr; + + return mListener->OnStopRequest(request, aStatus); +} + +nsresult nsIndexedToHTML::SendToListener(nsIRequest* aRequest, + nsISupports* aContext, + const nsACString& aBuffer) { + nsCOMPtr<nsIInputStream> inputData; + nsresult rv = NS_NewCStringInputStream(getter_AddRefs(inputData), aBuffer); + NS_ENSURE_SUCCESS(rv, rv); + return mListener->OnDataAvailable(aRequest, inputData, 0, aBuffer.Length()); +} + +NS_IMETHODIMP +nsIndexedToHTML::OnDataAvailable(nsIRequest* aRequest, nsIInputStream* aInput, + uint64_t aOffset, uint32_t aCount) { + return mParser->OnDataAvailable(aRequest, aInput, aOffset, aCount); +} + +static nsresult FormatTime(const nsDateFormatSelector aDateFormatSelector, + const nsTimeFormatSelector aTimeFormatSelector, + const PRTime aPrTime, nsAString& aStringOut) { + // FormatPRExplodedTime will use GMT based formatted string (e.g. GMT+1) + // instead of local time zone name (e.g. CEST). + // To avoid this case when ResistFingerprinting is disabled, use + // |FormatPRTime| to show exact time zone name. + if (!nsContentUtils::ShouldResistFingerprinting()) { + return mozilla::DateTimeFormat::FormatPRTime( + aDateFormatSelector, aTimeFormatSelector, aPrTime, aStringOut); + } + + PRExplodedTime prExplodedTime; + PR_ExplodeTime(aPrTime, PR_GMTParameters, &prExplodedTime); + return mozilla::DateTimeFormat::FormatPRExplodedTime( + aDateFormatSelector, aTimeFormatSelector, &prExplodedTime, aStringOut); +} + +NS_IMETHODIMP +nsIndexedToHTML::OnIndexAvailable(nsIRequest* aRequest, nsISupports* aCtxt, + nsIDirIndex* aIndex) { + nsresult rv; + if (!aIndex) return NS_ERROR_NULL_POINTER; + + nsCString pushBuffer; + pushBuffer.AppendLiteral("<tr"); + + // We don't know the file's character set yet, so retrieve the raw bytes + // which will be decoded by the HTML parser. + nsCString loc; + aIndex->GetLocation(loc); + + // Adjust the length in case unescaping shortened the string. + loc.Truncate(nsUnescapeCount(loc.BeginWriting())); + + if (loc.IsEmpty()) { + return NS_ERROR_ILLEGAL_VALUE; + } + if (loc.First() == char16_t('.')) + pushBuffer.AppendLiteral(" class=\"hidden-object\""); + + pushBuffer.AppendLiteral(">\n <td sortable-data=\""); + + // The sort key is the name of the item, prepended by either 0, 1 or 2 + // in order to group items. + uint32_t type; + aIndex->GetType(&type); + switch (type) { + case nsIDirIndex::TYPE_SYMLINK: + pushBuffer.Append('0'); + break; + case nsIDirIndex::TYPE_DIRECTORY: + pushBuffer.Append('1'); + break; + default: + pushBuffer.Append('2'); + break; + } + nsCString escaped; + nsAppendEscapedHTML(loc, escaped); + pushBuffer.Append(escaped); + + pushBuffer.AppendLiteral( + R"("><table class="ellipsis"><tbody><tr><td><a class=")"); + switch (type) { + case nsIDirIndex::TYPE_DIRECTORY: + pushBuffer.AppendLiteral("dir"); + break; + case nsIDirIndex::TYPE_SYMLINK: + pushBuffer.AppendLiteral("symlink"); + break; + default: + pushBuffer.AppendLiteral("file"); + break; + } + + pushBuffer.AppendLiteral("\" href=\""); + + // need to escape links + nsAutoCString locEscaped; + + // Adding trailing slash helps to recognize whether the URL points to a file + // or a directory (bug #214405). + if ((type == nsIDirIndex::TYPE_DIRECTORY) && (loc.Last() != '/')) { + loc.Append('/'); + } + + // now minimally re-escape the location... + uint32_t escFlags; + // for some protocols, we expect the location to be absolute. + // if so, and if the location indeed appears to be a valid URI, then go + // ahead and treat it like one. + + nsAutoCString scheme; + if (mExpectAbsLoc && NS_SUCCEEDED(net_ExtractURLScheme(loc, scheme))) { + // escape as absolute + escFlags = esc_Forced | esc_AlwaysCopy | esc_Minimal; + } else { + // escape as relative + // esc_Directory is needed because directories have a trailing slash. + // Without it, the trailing '/' will be escaped, and links from within + // that directory will be incorrect + escFlags = esc_Forced | esc_AlwaysCopy | esc_FileBaseName | esc_Colon | + esc_Directory; + } + NS_EscapeURL(loc.get(), loc.Length(), escFlags, locEscaped); + // esc_Directory does not escape the semicolons, so if a filename + // contains semicolons we need to manually escape them. + // This replacement should be removed in bug #473280 + locEscaped.ReplaceSubstring(";", "%3b"); + nsAppendEscapedHTML(locEscaped, pushBuffer); + pushBuffer.AppendLiteral("\">"); + + if (type == nsIDirIndex::TYPE_FILE || type == nsIDirIndex::TYPE_UNKNOWN) { + pushBuffer.AppendLiteral("<img src=\"moz-icon://"); + int32_t lastDot = locEscaped.RFindChar('.'); + if (lastDot != kNotFound) { + locEscaped.Cut(0, lastDot); + nsAppendEscapedHTML(locEscaped, pushBuffer); + } else { + pushBuffer.AppendLiteral("unknown"); + } + pushBuffer.AppendLiteral("?size=16\" alt=\""); + + nsAutoString altText; + rv = mBundle->GetStringFromName("DirFileLabel", altText); + if (NS_FAILED(rv)) return rv; + AppendNonAsciiToNCR(altText, pushBuffer); + pushBuffer.AppendLiteral("\">"); + } + + pushBuffer.Append(escaped); + pushBuffer.AppendLiteral("</a></td></tr></tbody></table></td>\n <td"); + + if (type == nsIDirIndex::TYPE_DIRECTORY || + type == nsIDirIndex::TYPE_SYMLINK) { + pushBuffer.Append('>'); + } else { + int64_t size; + aIndex->GetSize(&size); + + if (uint64_t(size) != UINT64_MAX) { + pushBuffer.AppendLiteral(" sortable-data=\""); + pushBuffer.AppendInt(size); + pushBuffer.AppendLiteral("\">"); + nsAutoCString sizeString; + FormatSizeString(size, sizeString); + pushBuffer.Append(sizeString); + } else { + pushBuffer.Append('>'); + } + } + pushBuffer.AppendLiteral("</td>\n <td"); + + PRTime t; + aIndex->GetLastModified(&t); + + if (t == -1LL) { + pushBuffer.AppendLiteral("></td>\n <td>"); + } else { + pushBuffer.AppendLiteral(" sortable-data=\""); + pushBuffer.AppendInt(static_cast<int64_t>(t)); + pushBuffer.AppendLiteral("\">"); + nsAutoString formatted; + FormatTime(kDateFormatShort, kTimeFormatNone, t, formatted); + AppendNonAsciiToNCR(formatted, pushBuffer); + pushBuffer.AppendLiteral("</td>\n <td>"); + FormatTime(kDateFormatNone, kTimeFormatLong, t, formatted); + // use NCR to show date in any doc charset + AppendNonAsciiToNCR(formatted, pushBuffer); + } + + pushBuffer.AppendLiteral("</td>\n</tr>"); + + return SendToListener(aRequest, aCtxt, pushBuffer); +} + +NS_IMETHODIMP +nsIndexedToHTML::OnInformationAvailable(nsIRequest* aRequest, + nsISupports* aCtxt, + const nsAString& aInfo) { + nsAutoCString pushBuffer; + nsAutoCString escapedUtf8; + nsAppendEscapedHTML(NS_ConvertUTF16toUTF8(aInfo), escapedUtf8); + pushBuffer.AppendLiteral("<tr>\n <td>"); + // escaped is provided in Unicode, so write hex NCRs as necessary + // to prevent the HTML parser from applying a character set. + AppendNonAsciiToNCR(NS_ConvertUTF8toUTF16(escapedUtf8), pushBuffer); + pushBuffer.AppendLiteral( + "</td>\n <td></td>\n <td></td>\n <td></td>\n</tr>\n"); + + return SendToListener(aRequest, aCtxt, pushBuffer); +} + +void nsIndexedToHTML::FormatSizeString(int64_t inSize, + nsCString& outSizeString) { + outSizeString.Truncate(); + if (inSize > int64_t(0)) { + // round up to the nearest Kilobyte + int64_t upperSize = (inSize + int64_t(1023)) / int64_t(1024); + outSizeString.AppendInt(upperSize); + outSizeString.AppendLiteral(" KB"); + } +} diff --git a/netwerk/streamconv/converters/nsIndexedToHTML.h b/netwerk/streamconv/converters/nsIndexedToHTML.h new file mode 100644 index 0000000000..6173ecb523 --- /dev/null +++ b/netwerk/streamconv/converters/nsIndexedToHTML.h @@ -0,0 +1,61 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef ____nsindexedtohtml___h___ +#define ____nsindexedtohtml___h___ + +#include "nsCOMPtr.h" +#include "nsString.h" +#include "nsIStreamConverter.h" +#include "nsIDirIndexListener.h" + +#define NS_NSINDEXEDTOHTMLCONVERTER_CID \ + { \ + 0xcf0f71fd, 0xfafd, 0x4e2b, { \ + 0x9f, 0xdc, 0x13, 0x4d, 0x97, 0x2e, 0x16, 0xe2 \ + } \ + } + +class nsIStringBundle; +class nsITextToSubURI; + +class nsIndexedToHTML : public nsIStreamConverter, public nsIDirIndexListener { + public: + NS_DECL_ISUPPORTS + NS_DECL_NSISTREAMCONVERTER + NS_DECL_NSIREQUESTOBSERVER + NS_DECL_NSISTREAMLISTENER + NS_DECL_NSIDIRINDEXLISTENER + + nsIndexedToHTML(); + + nsresult Init(nsIStreamListener* aListener); + + static nsresult Create(nsISupports* aOuter, REFNSIID aIID, void** aResult); + + protected: + void FormatSizeString(int64_t inSize, nsCString& outSizeString); + nsresult SendToListener(nsIRequest* aRequest, nsISupports* aContext, + const nsACString& aBuffer); + // Helper to properly implement OnStartRequest + nsresult DoOnStartRequest(nsIRequest* request, nsISupports* aContext, + nsCString& aBuffer); + + protected: + nsCOMPtr<nsIDirIndexParser> mParser; + nsCOMPtr<nsIStreamListener> mListener; // final listener (consumer) + + nsCOMPtr<nsIStringBundle> mBundle; + + nsCOMPtr<nsITextToSubURI> mTextToSubURI; + + private: + // Expecting absolute locations, given by 201 lines. + bool mExpectAbsLoc; + + virtual ~nsIndexedToHTML() = default; +}; + +#endif diff --git a/netwerk/streamconv/converters/nsMultiMixedConv.cpp b/netwerk/streamconv/converters/nsMultiMixedConv.cpp new file mode 100644 index 0000000000..8acc130421 --- /dev/null +++ b/netwerk/streamconv/converters/nsMultiMixedConv.cpp @@ -0,0 +1,1038 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsMultiMixedConv.h" +#include "plstr.h" +#include "nsIHttpChannel.h" +#include "nsNetCID.h" +#include "nsMimeTypes.h" +#include "nsIStringStream.h" +#include "nsCRT.h" +#include "nsIHttpChannelInternal.h" +#include "nsURLHelper.h" +#include "nsIStreamConverterService.h" +#include <algorithm> +#include "nsContentSecurityManager.h" +#include "nsHttp.h" +#include "nsNetUtil.h" +#include "nsIURI.h" +#include "nsHttpHeaderArray.h" +#include "mozilla/AutoRestore.h" + +nsPartChannel::nsPartChannel(nsIChannel* aMultipartChannel, uint32_t aPartID, + nsIStreamListener* aListener) + : mMultipartChannel(aMultipartChannel), + mListener(aListener), + mStatus(NS_OK), + mLoadFlags(0), + mContentDisposition(0), + mContentLength(UINT64_MAX), + mIsByteRangeRequest(false), + mByteRangeStart(0), + mByteRangeEnd(0), + mPartID(aPartID), + mIsLastPart(false) { + // Inherit the load flags from the original channel... + mMultipartChannel->GetLoadFlags(&mLoadFlags); + + mMultipartChannel->GetLoadGroup(getter_AddRefs(mLoadGroup)); +} + +void nsPartChannel::InitializeByteRange(int64_t aStart, int64_t aEnd) { + mIsByteRangeRequest = true; + + mByteRangeStart = aStart; + mByteRangeEnd = aEnd; +} + +nsresult nsPartChannel::SendOnStartRequest(nsISupports* aContext) { + return mListener->OnStartRequest(this); +} + +nsresult nsPartChannel::SendOnDataAvailable(nsISupports* aContext, + nsIInputStream* aStream, + uint64_t aOffset, uint32_t aLen) { + return mListener->OnDataAvailable(this, aStream, aOffset, aLen); +} + +nsresult nsPartChannel::SendOnStopRequest(nsISupports* aContext, + nsresult aStatus) { + // Drop the listener + nsCOMPtr<nsIStreamListener> listener; + listener.swap(mListener); + return listener->OnStopRequest(this, aStatus); +} + +void nsPartChannel::SetContentDisposition( + const nsACString& aContentDispositionHeader) { + mContentDispositionHeader = aContentDispositionHeader; + nsCOMPtr<nsIURI> uri; + GetURI(getter_AddRefs(uri)); + NS_GetFilenameFromDisposition(mContentDispositionFilename, + mContentDispositionHeader); + mContentDisposition = + NS_GetContentDispositionFromHeader(mContentDispositionHeader, this); +} + +// +// nsISupports implementation... +// + +NS_IMPL_ADDREF(nsPartChannel) +NS_IMPL_RELEASE(nsPartChannel) + +NS_INTERFACE_MAP_BEGIN(nsPartChannel) + NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports, nsIChannel) + NS_INTERFACE_MAP_ENTRY(nsIRequest) + NS_INTERFACE_MAP_ENTRY(nsIChannel) + NS_INTERFACE_MAP_ENTRY(nsIByteRangeRequest) + NS_INTERFACE_MAP_ENTRY(nsIMultiPartChannel) +NS_INTERFACE_MAP_END + +// +// nsIRequest implementation... +// + +NS_IMETHODIMP +nsPartChannel::GetName(nsACString& aResult) { + return mMultipartChannel->GetName(aResult); +} + +NS_IMETHODIMP +nsPartChannel::IsPending(bool* aResult) { + // For now, consider the active lifetime of each part the same as + // the underlying multipart channel... This is not exactly right, + // but it is good enough :-) + return mMultipartChannel->IsPending(aResult); +} + +NS_IMETHODIMP +nsPartChannel::GetStatus(nsresult* aResult) { + nsresult rv = NS_OK; + + if (NS_FAILED(mStatus)) { + *aResult = mStatus; + } else { + rv = mMultipartChannel->GetStatus(aResult); + } + + return rv; +} + +NS_IMETHODIMP +nsPartChannel::Cancel(nsresult aStatus) { + // Cancelling an individual part must not cancel the underlying + // multipart channel... + // XXX but we should stop sending data for _this_ part channel! + mStatus = aStatus; + return NS_OK; +} + +NS_IMETHODIMP +nsPartChannel::GetCanceled(bool* aCanceled) { + *aCanceled = NS_FAILED(mStatus); + return NS_OK; +} + +NS_IMETHODIMP +nsPartChannel::Suspend(void) { + // Suspending an individual part must not suspend the underlying + // multipart channel... + // XXX why not? + return NS_OK; +} + +NS_IMETHODIMP +nsPartChannel::Resume(void) { + // Resuming an individual part must not resume the underlying + // multipart channel... + // XXX why not? + return NS_OK; +} + +// +// nsIChannel implementation +// + +NS_IMETHODIMP +nsPartChannel::GetOriginalURI(nsIURI** aURI) { + return mMultipartChannel->GetOriginalURI(aURI); +} + +NS_IMETHODIMP +nsPartChannel::SetOriginalURI(nsIURI* aURI) { + return mMultipartChannel->SetOriginalURI(aURI); +} + +NS_IMETHODIMP +nsPartChannel::GetURI(nsIURI** aURI) { return mMultipartChannel->GetURI(aURI); } + +NS_IMETHODIMP +nsPartChannel::Open(nsIInputStream** aStream) { + nsCOMPtr<nsIStreamListener> listener; + nsresult rv = + nsContentSecurityManager::doContentSecurityCheck(this, listener); + NS_ENSURE_SUCCESS(rv, rv); + + // This channel cannot be opened! + return NS_ERROR_FAILURE; +} + +NS_IMETHODIMP +nsPartChannel::AsyncOpen(nsIStreamListener* aListener) { + nsCOMPtr<nsIStreamListener> listener = aListener; + nsresult rv = + nsContentSecurityManager::doContentSecurityCheck(this, listener); + NS_ENSURE_SUCCESS(rv, rv); + + // This channel cannot be opened! + return NS_ERROR_FAILURE; +} + +NS_IMETHODIMP +nsPartChannel::GetLoadFlags(nsLoadFlags* aLoadFlags) { + *aLoadFlags = mLoadFlags; + return NS_OK; +} + +NS_IMETHODIMP +nsPartChannel::SetLoadFlags(nsLoadFlags aLoadFlags) { + mLoadFlags = aLoadFlags; + return NS_OK; +} + +NS_IMETHODIMP +nsPartChannel::GetTRRMode(nsIRequest::TRRMode* aTRRMode) { + return GetTRRModeImpl(aTRRMode); +} + +NS_IMETHODIMP +nsPartChannel::SetTRRMode(nsIRequest::TRRMode aTRRMode) { + return SetTRRModeImpl(aTRRMode); +} + +NS_IMETHODIMP +nsPartChannel::GetIsDocument(bool* aIsDocument) { + return NS_GetIsDocumentChannel(this, aIsDocument); +} + +NS_IMETHODIMP +nsPartChannel::GetLoadGroup(nsILoadGroup** aLoadGroup) { + *aLoadGroup = mLoadGroup; + NS_IF_ADDREF(*aLoadGroup); + + return NS_OK; +} + +NS_IMETHODIMP +nsPartChannel::SetLoadGroup(nsILoadGroup* aLoadGroup) { + mLoadGroup = aLoadGroup; + + return NS_OK; +} + +NS_IMETHODIMP +nsPartChannel::GetOwner(nsISupports** aOwner) { + return mMultipartChannel->GetOwner(aOwner); +} + +NS_IMETHODIMP +nsPartChannel::SetOwner(nsISupports* aOwner) { + return mMultipartChannel->SetOwner(aOwner); +} + +NS_IMETHODIMP +nsPartChannel::GetLoadInfo(nsILoadInfo** aLoadInfo) { + return mMultipartChannel->GetLoadInfo(aLoadInfo); +} + +NS_IMETHODIMP +nsPartChannel::SetLoadInfo(nsILoadInfo* aLoadInfo) { + MOZ_RELEASE_ASSERT(aLoadInfo, "loadinfo can't be null"); + return mMultipartChannel->SetLoadInfo(aLoadInfo); +} + +NS_IMETHODIMP +nsPartChannel::GetNotificationCallbacks(nsIInterfaceRequestor** aCallbacks) { + return mMultipartChannel->GetNotificationCallbacks(aCallbacks); +} + +NS_IMETHODIMP +nsPartChannel::SetNotificationCallbacks(nsIInterfaceRequestor* aCallbacks) { + return mMultipartChannel->SetNotificationCallbacks(aCallbacks); +} + +NS_IMETHODIMP +nsPartChannel::GetSecurityInfo(nsISupports** aSecurityInfo) { + return mMultipartChannel->GetSecurityInfo(aSecurityInfo); +} + +NS_IMETHODIMP +nsPartChannel::GetContentType(nsACString& aContentType) { + aContentType = mContentType; + return NS_OK; +} + +NS_IMETHODIMP +nsPartChannel::SetContentType(const nsACString& aContentType) { + bool dummy; + net_ParseContentType(aContentType, mContentType, mContentCharset, &dummy); + return NS_OK; +} + +NS_IMETHODIMP +nsPartChannel::GetContentCharset(nsACString& aContentCharset) { + aContentCharset = mContentCharset; + return NS_OK; +} + +NS_IMETHODIMP +nsPartChannel::SetContentCharset(const nsACString& aContentCharset) { + mContentCharset = aContentCharset; + return NS_OK; +} + +NS_IMETHODIMP +nsPartChannel::GetContentLength(int64_t* aContentLength) { + *aContentLength = mContentLength; + return NS_OK; +} + +NS_IMETHODIMP +nsPartChannel::SetContentLength(int64_t aContentLength) { + mContentLength = aContentLength; + return NS_OK; +} + +NS_IMETHODIMP +nsPartChannel::GetContentDisposition(uint32_t* aContentDisposition) { + if (mContentDispositionHeader.IsEmpty()) return NS_ERROR_NOT_AVAILABLE; + + *aContentDisposition = mContentDisposition; + return NS_OK; +} + +NS_IMETHODIMP +nsPartChannel::SetContentDisposition(uint32_t aContentDisposition) { + return NS_ERROR_NOT_AVAILABLE; +} + +NS_IMETHODIMP +nsPartChannel::GetContentDispositionFilename( + nsAString& aContentDispositionFilename) { + if (mContentDispositionFilename.IsEmpty()) return NS_ERROR_NOT_AVAILABLE; + + aContentDispositionFilename = mContentDispositionFilename; + return NS_OK; +} + +NS_IMETHODIMP +nsPartChannel::SetContentDispositionFilename( + const nsAString& aContentDispositionFilename) { + return NS_ERROR_NOT_AVAILABLE; +} + +NS_IMETHODIMP +nsPartChannel::GetContentDispositionHeader( + nsACString& aContentDispositionHeader) { + if (mContentDispositionHeader.IsEmpty()) return NS_ERROR_NOT_AVAILABLE; + + aContentDispositionHeader = mContentDispositionHeader; + return NS_OK; +} + +NS_IMETHODIMP +nsPartChannel::GetPartID(uint32_t* aPartID) { + *aPartID = mPartID; + return NS_OK; +} + +NS_IMETHODIMP +nsPartChannel::GetIsLastPart(bool* aIsLastPart) { + *aIsLastPart = mIsLastPart; + return NS_OK; +} + +// +// nsIByteRangeRequest implementation... +// + +NS_IMETHODIMP +nsPartChannel::GetIsByteRangeRequest(bool* aIsByteRangeRequest) { + *aIsByteRangeRequest = mIsByteRangeRequest; + + return NS_OK; +} + +NS_IMETHODIMP +nsPartChannel::GetStartRange(int64_t* aStartRange) { + *aStartRange = mByteRangeStart; + + return NS_OK; +} + +NS_IMETHODIMP +nsPartChannel::GetEndRange(int64_t* aEndRange) { + *aEndRange = mByteRangeEnd; + return NS_OK; +} + +NS_IMETHODIMP +nsPartChannel::GetBaseChannel(nsIChannel** aReturn) { + NS_ENSURE_ARG_POINTER(aReturn); + + *aReturn = mMultipartChannel; + NS_IF_ADDREF(*aReturn); + return NS_OK; +} + +// nsISupports implementation +NS_IMPL_ISUPPORTS(nsMultiMixedConv, nsIStreamConverter, nsIStreamListener, + nsIRequestObserver) + +// nsIStreamConverter implementation + +// No syncronous conversion at this time. +NS_IMETHODIMP +nsMultiMixedConv::Convert(nsIInputStream* aFromStream, const char* aFromType, + const char* aToType, nsISupports* aCtxt, + nsIInputStream** _retval) { + return NS_ERROR_NOT_IMPLEMENTED; +} + +// Stream converter service calls this to initialize the actual stream converter +// (us). +NS_IMETHODIMP +nsMultiMixedConv::AsyncConvertData(const char* aFromType, const char* aToType, + nsIStreamListener* aListener, + nsISupports* aCtxt) { + NS_ASSERTION(aListener && aFromType && aToType, + "null pointer passed into multi mixed converter"); + + // hook up our final listener. this guy gets the various On*() calls we want + // to throw at him. + // + // WARNING: this listener must be able to handle multiple OnStartRequest, + // OnDataAvail() and OnStopRequest() call combinations. We call of series + // of these for each sub-part in the raw stream. + mFinalListener = aListener; + + return NS_OK; +} + +NS_IMETHODIMP +nsMultiMixedConv::GetConvertedType(const nsACString& aFromType, + nsIChannel* aChannel, nsACString& aToType) { + return NS_ERROR_NOT_IMPLEMENTED; +} + +// nsIRequestObserver implementation +NS_IMETHODIMP +nsMultiMixedConv::OnStartRequest(nsIRequest* request) { + // we're assuming the content-type is available at this stage + NS_ASSERTION(mBoundary.IsEmpty(), "a second on start???"); + + nsresult rv; + + mTotalSent = 0; + mChannel = do_QueryInterface(request, &rv); + if (NS_FAILED(rv)) return rv; + + nsAutoCString contentType; + + // ask the HTTP channel for the content-type and extract the boundary from it. + nsCOMPtr<nsIHttpChannel> httpChannel = do_QueryInterface(mChannel, &rv); + if (NS_SUCCEEDED(rv)) { + rv = httpChannel->GetResponseHeader("content-type"_ns, contentType); + if (NS_FAILED(rv)) { + return rv; + } + nsCString csp; + rv = httpChannel->GetResponseHeader("content-security-policy"_ns, csp); + if (NS_SUCCEEDED(rv)) { + mRootContentSecurityPolicy = csp; + } + } else { + // try asking the channel directly + rv = mChannel->GetContentType(contentType); + if (NS_FAILED(rv)) { + return NS_ERROR_FAILURE; + } + } + + Tokenizer p(contentType); + p.SkipUntil(Token::Char(';')); + if (!p.CheckChar(';')) { + return NS_ERROR_CORRUPTED_CONTENT; + } + p.SkipWhites(); + if (!p.CheckWord("boundary")) { + return NS_ERROR_CORRUPTED_CONTENT; + } + p.SkipWhites(); + if (!p.CheckChar('=')) { + return NS_ERROR_CORRUPTED_CONTENT; + } + p.SkipWhites(); + Unused << p.ReadUntil(Token::Char(';'), mBoundary); + mBoundary.Trim( + " \""); // ignoring potential quoted string formatting violations + if (mBoundary.IsEmpty()) { + return NS_ERROR_CORRUPTED_CONTENT; + } + + mHeaderTokens[HEADER_CONTENT_TYPE] = mTokenizer.AddCustomToken( + "content-type", mTokenizer.CASE_INSENSITIVE, false); + mHeaderTokens[HEADER_CONTENT_LENGTH] = mTokenizer.AddCustomToken( + "content-length", mTokenizer.CASE_INSENSITIVE, false); + mHeaderTokens[HEADER_CONTENT_DISPOSITION] = mTokenizer.AddCustomToken( + "content-disposition", mTokenizer.CASE_INSENSITIVE, false); + mHeaderTokens[HEADER_SET_COOKIE] = mTokenizer.AddCustomToken( + "set-cookie", mTokenizer.CASE_INSENSITIVE, false); + mHeaderTokens[HEADER_CONTENT_RANGE] = mTokenizer.AddCustomToken( + "content-range", mTokenizer.CASE_INSENSITIVE, false); + mHeaderTokens[HEADER_RANGE] = + mTokenizer.AddCustomToken("range", mTokenizer.CASE_INSENSITIVE, false); + mHeaderTokens[HEADER_CONTENT_SECURITY_POLICY] = mTokenizer.AddCustomToken( + "content-security-policy", mTokenizer.CASE_INSENSITIVE, false); + + mLFToken = mTokenizer.AddCustomToken("\n", mTokenizer.CASE_SENSITIVE, false); + mCRLFToken = + mTokenizer.AddCustomToken("\r\n", mTokenizer.CASE_SENSITIVE, false); + + SwitchToControlParsing(); + + mBoundaryToken = + mTokenizer.AddCustomToken(mBoundary, mTokenizer.CASE_SENSITIVE); + mBoundaryTokenWithDashes = + mTokenizer.AddCustomToken("--"_ns + mBoundary, mTokenizer.CASE_SENSITIVE); + + return NS_OK; +} + +// nsIStreamListener implementation +NS_IMETHODIMP +nsMultiMixedConv::OnDataAvailable(nsIRequest* request, nsIInputStream* inStr, + uint64_t sourceOffset, uint32_t count) { + // Failing these assertions may indicate that some of the target listeners of + // this converter is looping the thead queue, which is harmful to how we + // collect the raw (content) data. + MOZ_DIAGNOSTIC_ASSERT(!mInOnDataAvailable, + "nsMultiMixedConv::OnDataAvailable reentered!"); + MOZ_DIAGNOSTIC_ASSERT( + !mRawData, "There are unsent data from the previous tokenizer feed!"); + + if (mInOnDataAvailable) { + // The multipart logic is incapable of being reentered. + return NS_ERROR_UNEXPECTED; + } + + mozilla::AutoRestore<bool> restore(mInOnDataAvailable); + mInOnDataAvailable = true; + + nsresult rv_feed = mTokenizer.FeedInput(inStr, count); + // We must do this every time. Regardless if something has failed during the + // parsing process. Otherwise the raw data reference would not be thrown + // away. + nsresult rv_send = SendData(); + + return NS_FAILED(rv_send) ? rv_send : rv_feed; +} + +NS_IMETHODIMP +nsMultiMixedConv::OnStopRequest(nsIRequest* request, nsresult aStatus) { + nsresult rv; + + if (mPartChannel) { + mPartChannel->SetIsLastPart(); + + MOZ_DIAGNOSTIC_ASSERT( + !mRawData, "There are unsent data from the previous tokenizer feed!"); + + rv = mTokenizer.FinishInput(); + if (NS_SUCCEEDED(aStatus)) { + aStatus = rv; + } + rv = SendData(); + if (NS_SUCCEEDED(aStatus)) { + aStatus = rv; + } + + (void)SendStop(aStatus); + } else if (NS_FAILED(aStatus) && !mRequestListenerNotified) { + // underlying data production problem. we should not be in + // the middle of sending data. if we were, mPartChannel, + // above, would have been non-null. + + (void)mFinalListener->OnStartRequest(request); + (void)mFinalListener->OnStopRequest(request, aStatus); + } + + nsCOMPtr<nsIMultiPartChannelListener> multiListener = + do_QueryInterface(mFinalListener); + if (multiListener) { + multiListener->OnAfterLastPart(aStatus); + } + + return NS_OK; +} + +nsresult nsMultiMixedConv::ConsumeToken(Token const& token) { + nsresult rv; + + switch (mParserState) { + case PREAMBLE: + if (token.Equals(mBoundaryTokenWithDashes)) { + // The server first used boundary '--boundary'. Hence, we no longer + // accept plain 'boundary' token as a delimiter. + mTokenizer.RemoveCustomToken(mBoundaryToken); + mParserState = BOUNDARY_CRLF; + break; + } + if (token.Equals(mBoundaryToken)) { + // And here the opposite from the just above block... + mTokenizer.RemoveCustomToken(mBoundaryTokenWithDashes); + mParserState = BOUNDARY_CRLF; + break; + } + + // This is a preamble, just ignore it and wait for the boundary. + break; + + case BOUNDARY_CRLF: + if (token.Equals(Token::NewLine())) { + mParserState = HEADER_NAME; + mResponseHeader = HEADER_UNKNOWN; + HeadersToDefault(); + SetHeaderTokensEnabled(true); + break; + } + return NS_ERROR_CORRUPTED_CONTENT; + + case HEADER_NAME: + SetHeaderTokensEnabled(false); + if (token.Equals(Token::NewLine())) { + mParserState = BODY_INIT; + SwitchToBodyParsing(); + break; + } + for (uint32_t h = HEADER_CONTENT_TYPE; h < HEADER_UNKNOWN; ++h) { + if (token.Equals(mHeaderTokens[h])) { + mResponseHeader = static_cast<EHeader>(h); + break; + } + } + mParserState = HEADER_SEP; + break; + + case HEADER_SEP: + if (token.Equals(Token::Char(':'))) { + mParserState = HEADER_VALUE; + mResponseHeaderValue.Truncate(); + break; + } + if (mResponseHeader == HEADER_UNKNOWN) { + // If the header is not of any we understand, just pass everything till + // ':' + break; + } + if (token.Equals(Token::Whitespace())) { + // Accept only header-name traling whitespaces after known headers + break; + } + return NS_ERROR_CORRUPTED_CONTENT; + + case HEADER_VALUE: + if (token.Equals(Token::Whitespace()) && mResponseHeaderValue.IsEmpty()) { + // Eat leading whitespaces + break; + } + if (token.Equals(Token::NewLine())) { + nsresult rv = ProcessHeader(); + if (NS_FAILED(rv)) { + return rv; + } + mParserState = HEADER_NAME; + mResponseHeader = HEADER_UNKNOWN; + SetHeaderTokensEnabled(true); + } else { + mResponseHeaderValue.Append(token.Fragment()); + } + break; + + case BODY_INIT: + rv = SendStart(); + if (NS_FAILED(rv)) { + return rv; + } + mParserState = BODY; + [[fallthrough]]; + + case BODY: { + if (!token.Equals(mLFToken) && !token.Equals(mCRLFToken)) { + if (token.Equals(mBoundaryTokenWithDashes) || + token.Equals(mBoundaryToken)) { + // Allow CRLF to NOT be part of the boundary as well + SwitchToControlParsing(); + mParserState = TRAIL_DASH1; + break; + } + AccumulateData(token); + break; + } + + // After CRLF we must explicitly check for boundary. If found, + // that CRLF is part of the boundary and must not be send to the + // data listener. + Token token2; + if (!mTokenizer.Next(token2)) { + // Note: this will give us the CRLF token again when more data + // or OnStopRequest arrive. I.e. we will enter BODY case in + // the very same state as we are now and start this block over. + mTokenizer.NeedMoreInput(); + break; + } + if (token2.Equals(mBoundaryTokenWithDashes) || + token2.Equals(mBoundaryToken)) { + SwitchToControlParsing(); + mParserState = TRAIL_DASH1; + break; + } + + AccumulateData(token); + AccumulateData(token2); + break; + } + + case TRAIL_DASH1: + if (token.Equals(Token::NewLine())) { + rv = SendStop(NS_OK); + if (NS_FAILED(rv)) { + return rv; + } + mParserState = BOUNDARY_CRLF; + mTokenizer.Rollback(); + break; + } + if (token.Equals(Token::Char('-'))) { + mParserState = TRAIL_DASH2; + break; + } + return NS_ERROR_CORRUPTED_CONTENT; + + case TRAIL_DASH2: + if (token.Equals(Token::Char('-'))) { + mPartChannel->SetIsLastPart(); + // SendStop calls SendData first. + rv = SendStop(NS_OK); + if (NS_FAILED(rv)) { + return rv; + } + mParserState = EPILOGUE; + break; + } + return NS_ERROR_CORRUPTED_CONTENT; + + case EPILOGUE: + // Just ignore + break; + + default: + MOZ_ASSERT(false, "Missing parser state handling branch"); + break; + } // switch + + return NS_OK; +} + +void nsMultiMixedConv::SetHeaderTokensEnabled(bool aEnable) { + for (uint32_t h = HEADER_FIRST; h < HEADER_UNKNOWN; ++h) { + mTokenizer.EnableCustomToken(mHeaderTokens[h], aEnable); + } +} + +void nsMultiMixedConv::SwitchToBodyParsing() { + mTokenizer.SetTokenizingMode(Tokenizer::Mode::CUSTOM_ONLY); + mTokenizer.EnableCustomToken(mLFToken, true); + mTokenizer.EnableCustomToken(mCRLFToken, true); + mTokenizer.EnableCustomToken(mBoundaryTokenWithDashes, true); + mTokenizer.EnableCustomToken(mBoundaryToken, true); +} + +void nsMultiMixedConv::SwitchToControlParsing() { + mTokenizer.SetTokenizingMode(Tokenizer::Mode::FULL); + mTokenizer.EnableCustomToken(mLFToken, false); + mTokenizer.EnableCustomToken(mCRLFToken, false); + mTokenizer.EnableCustomToken(mBoundaryTokenWithDashes, false); + mTokenizer.EnableCustomToken(mBoundaryToken, false); +} + +// nsMultiMixedConv methods +nsMultiMixedConv::nsMultiMixedConv() + : mCurrentPartID(0), + mInOnDataAvailable(false), + mResponseHeader(HEADER_UNKNOWN), + // XXX: This is a hack to bypass the raw pointer to refcounted object in + // lambda analysis. It should be removed and replaced when the + // IncrementalTokenizer API is improved to avoid the need for such + // workarounds. + // + // This is safe because `mTokenizer` will not outlive `this`, meaning that + // this std::bind object will be destroyed before `this` dies. + mTokenizer(std::bind(&nsMultiMixedConv::ConsumeToken, this, + std::placeholders::_1)) { + mContentLength = UINT64_MAX; + mByteRangeStart = 0; + mByteRangeEnd = 0; + mTotalSent = 0; + mIsByteRangeRequest = false; + mParserState = INIT; + mRawData = nullptr; + mRequestListenerNotified = false; +} + +nsresult nsMultiMixedConv::SendStart() { + nsresult rv = NS_OK; + + nsCOMPtr<nsIStreamListener> partListener(mFinalListener); + if (mContentType.IsEmpty()) { + mContentType.AssignLiteral(UNKNOWN_CONTENT_TYPE); + nsCOMPtr<nsIStreamConverterService> serv = + do_GetService(NS_STREAMCONVERTERSERVICE_CONTRACTID, &rv); + if (NS_SUCCEEDED(rv)) { + nsCOMPtr<nsIStreamListener> converter; + rv = serv->AsyncConvertData(UNKNOWN_CONTENT_TYPE, "*/*", mFinalListener, + mContext, getter_AddRefs(converter)); + if (NS_SUCCEEDED(rv)) { + partListener = converter; + } + } + } + + // if we already have an mPartChannel, that means we never sent a Stop() + // before starting up another "part." that would be bad. + MOZ_ASSERT(!mPartChannel, "tisk tisk, shouldn't be overwriting a channel"); + + nsPartChannel* newChannel; + newChannel = new nsPartChannel(mChannel, mCurrentPartID++, partListener); + if (!newChannel) return NS_ERROR_OUT_OF_MEMORY; + + if (mIsByteRangeRequest) { + newChannel->InitializeByteRange(mByteRangeStart, mByteRangeEnd); + } + + mTotalSent = 0; + + // Set up the new part channel... + mPartChannel = newChannel; + + rv = mPartChannel->SetContentType(mContentType); + if (NS_FAILED(rv)) return rv; + + rv = mPartChannel->SetContentLength(mContentLength); + if (NS_FAILED(rv)) return rv; + + mPartChannel->SetContentDisposition(mContentDisposition); + + // Each part of a multipart/replace response can be used + // for the top level document. We must inform upper layers + // about this by setting the LOAD_REPLACE flag so that certain + // state assertions are evaluated as positive. + nsLoadFlags loadFlags = 0; + mPartChannel->GetLoadFlags(&loadFlags); + loadFlags |= nsIChannel::LOAD_REPLACE; + mPartChannel->SetLoadFlags(loadFlags); + + nsCOMPtr<nsILoadGroup> loadGroup; + (void)mPartChannel->GetLoadGroup(getter_AddRefs(loadGroup)); + + // Add the new channel to the load group (if any) + if (loadGroup) { + rv = loadGroup->AddRequest(mPartChannel, nullptr); + if (NS_FAILED(rv)) return rv; + } + + // This prevents artificial call to OnStart/StopRequest when the root + // channel fails. Since now it's ensured to keep with the nsIStreamListener + // contract every time. + mRequestListenerNotified = true; + + // Let's start off the load. NOTE: we don't forward on the channel passed + // into our OnDataAvailable() as it's the root channel for the raw stream. + return mPartChannel->SendOnStartRequest(mContext); +} + +nsresult nsMultiMixedConv::SendStop(nsresult aStatus) { + // Make sure we send out all accumulcated data prior call to OnStopRequest. + // If there is no data, this is a no-op. + nsresult rv = SendData(); + if (NS_SUCCEEDED(aStatus)) { + aStatus = rv; + } + if (mPartChannel) { + rv = mPartChannel->SendOnStopRequest(mContext, aStatus); + // don't check for failure here, we need to remove the channel from + // the loadgroup. + + // Remove the channel from its load group (if any) + nsCOMPtr<nsILoadGroup> loadGroup; + (void)mPartChannel->GetLoadGroup(getter_AddRefs(loadGroup)); + if (loadGroup) + (void)loadGroup->RemoveRequest(mPartChannel, mContext, aStatus); + } + + mPartChannel = nullptr; + return rv; +} + +void nsMultiMixedConv::AccumulateData(Token const& aToken) { + if (!mRawData) { + // This is the first read of raw data during this FeedInput loop + // of the incremental tokenizer. All 'raw' tokens are coming from + // the same linear buffer, hence begining of this loop raw data + // is begining of the first raw token. Length of this loop raw + // data is just sum of all 'raw' tokens we collect during this loop. + // + // It's ensured we flush (send to to the listener via OnDataAvailable) + // and nullify the collected raw data right after FeedInput call. + // Hence, the reference can't outlive the actual buffer. + mRawData = aToken.Fragment().BeginReading(); + mRawDataLength = 0; + } + + mRawDataLength += aToken.Fragment().Length(); +} + +nsresult nsMultiMixedConv::SendData() { + nsresult rv; + + if (!mRawData) { + return NS_OK; + } + + nsACString::const_char_iterator rawData = mRawData; + mRawData = nullptr; + + if (!mPartChannel) { + return NS_ERROR_FAILURE; // something went wrong w/ processing + } + + if (mContentLength != UINT64_MAX) { + // make sure that we don't send more than the mContentLength + // XXX why? perhaps the Content-Length header was actually wrong!! + if ((uint64_t(mRawDataLength) + mTotalSent) > mContentLength) + mRawDataLength = static_cast<uint32_t>(mContentLength - mTotalSent); + + if (mRawDataLength == 0) return NS_OK; + } + + uint64_t offset = mTotalSent; + mTotalSent += mRawDataLength; + + nsCOMPtr<nsIStringInputStream> ss( + do_CreateInstance("@mozilla.org/io/string-input-stream;1", &rv)); + if (NS_FAILED(rv)) return rv; + + rv = ss->ShareData(rawData, mRawDataLength); + mRawData = nullptr; + if (NS_FAILED(rv)) return rv; + + return mPartChannel->SendOnDataAvailable(mContext, ss, offset, + mRawDataLength); +} + +void nsMultiMixedConv::HeadersToDefault() { + mContentLength = UINT64_MAX; + mContentType.Truncate(); + mContentDisposition.Truncate(); + mContentSecurityPolicy.Truncate(); + mIsByteRangeRequest = false; +} + +nsresult nsMultiMixedConv::ProcessHeader() { + mozilla::Tokenizer p(mResponseHeaderValue); + + switch (mResponseHeader) { + case HEADER_CONTENT_TYPE: + mContentType = mResponseHeaderValue; + mContentType.CompressWhitespace(); + break; + case HEADER_CONTENT_LENGTH: + p.SkipWhites(); + if (!p.ReadInteger(&mContentLength)) { + return NS_ERROR_CORRUPTED_CONTENT; + } + break; + case HEADER_CONTENT_DISPOSITION: + mContentDisposition = mResponseHeaderValue; + mContentDisposition.CompressWhitespace(); + break; + case HEADER_SET_COOKIE: { + nsCOMPtr<nsIHttpChannelInternal> httpInternal = + do_QueryInterface(mChannel); + mResponseHeaderValue.CompressWhitespace(); + if (httpInternal) { + DebugOnly<nsresult> rv = httpInternal->SetCookie(mResponseHeaderValue); + MOZ_ASSERT(NS_SUCCEEDED(rv)); + } + break; + } + case HEADER_RANGE: + case HEADER_CONTENT_RANGE: { + if (!p.CheckWord("bytes") || !p.CheckWhite()) { + return NS_ERROR_CORRUPTED_CONTENT; + } + p.SkipWhites(); + if (p.CheckChar('*')) { + mByteRangeStart = mByteRangeEnd = 0; + } else if (!p.ReadInteger(&mByteRangeStart) || !p.CheckChar('-') || + !p.ReadInteger(&mByteRangeEnd)) { + return NS_ERROR_CORRUPTED_CONTENT; + } + mIsByteRangeRequest = true; + if (mContentLength == UINT64_MAX) { + mContentLength = uint64_t(mByteRangeEnd - mByteRangeStart + 1); + } + break; + } + case HEADER_CONTENT_SECURITY_POLICY: { + mContentSecurityPolicy = mResponseHeaderValue; + mContentSecurityPolicy.CompressWhitespace(); + nsCOMPtr<nsIHttpChannel> httpChannel = do_QueryInterface(mChannel); + if (httpChannel) { + nsCString resultCSP = mRootContentSecurityPolicy; + if (!mContentSecurityPolicy.IsEmpty()) { + // We are updating the root channel CSP header respectively for + // each part as: CSP-root + CSP-partN, where N is the part number. + // Here we append current part's CSP to root CSP and reset CSP + // header for each part. + if (!resultCSP.IsEmpty()) { + resultCSP.Append(";"); + } + resultCSP.Append(mContentSecurityPolicy); + } + nsresult rv = httpChannel->SetResponseHeader( + "Content-Security-Policy"_ns, resultCSP, false); + if (NS_FAILED(rv)) { + return NS_ERROR_CORRUPTED_CONTENT; + } + } + break; + } + case HEADER_UNKNOWN: + // We ignore anything else... + break; + } + + return NS_OK; +} + +nsresult NS_NewMultiMixedConv(nsMultiMixedConv** aMultiMixedConv) { + MOZ_ASSERT(aMultiMixedConv != nullptr, "null ptr"); + + RefPtr<nsMultiMixedConv> conv = new nsMultiMixedConv(); + conv.forget(aMultiMixedConv); + return NS_OK; +} diff --git a/netwerk/streamconv/converters/nsMultiMixedConv.h b/netwerk/streamconv/converters/nsMultiMixedConv.h new file mode 100644 index 0000000000..feb93ff360 --- /dev/null +++ b/netwerk/streamconv/converters/nsMultiMixedConv.h @@ -0,0 +1,256 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +#ifndef __nsmultimixedconv__h__ +#define __nsmultimixedconv__h__ + +#include "nsIStreamConverter.h" +#include "nsIChannel.h" +#include "nsString.h" +#include "nsCOMPtr.h" +#include "nsIByteRangeRequest.h" +#include "nsIMultiPartChannel.h" +#include "mozilla/Attributes.h" +#include "mozilla/IncrementalTokenizer.h" +#include "nsHttpResponseHead.h" + +#define NS_MULTIMIXEDCONVERTER_CID \ + { /* 7584CE90-5B25-11d3-A175-0050041CAF44 */ \ + 0x7584ce90, 0x5b25, 0x11d3, { \ + 0xa1, 0x75, 0x0, 0x50, 0x4, 0x1c, 0xaf, 0x44 \ + } \ + } + +// +// nsPartChannel is a "dummy" channel which represents an individual part of +// a multipart/mixed stream... +// +// Instances on this channel are passed out to the consumer through the +// nsIStreamListener interface. +// +class nsPartChannel final : public nsIChannel, + public nsIByteRangeRequest, + public nsIMultiPartChannel { + public: + nsPartChannel(nsIChannel* aMultipartChannel, uint32_t aPartID, + nsIStreamListener* aListener); + + void InitializeByteRange(int64_t aStart, int64_t aEnd); + void SetIsLastPart() { mIsLastPart = true; } + nsresult SendOnStartRequest(nsISupports* aContext); + nsresult SendOnDataAvailable(nsISupports* aContext, nsIInputStream* aStream, + uint64_t aOffset, uint32_t aLen); + nsresult SendOnStopRequest(nsISupports* aContext, nsresult aStatus); + /* SetContentDisposition expects the full value of the Content-Disposition + * header */ + void SetContentDisposition(const nsACString& aContentDispositionHeader); + // TODO(ER): This appears to be dead code + void SetResponseHead(mozilla::net::nsHttpResponseHead* head) { + mResponseHead.reset(head); + } + + NS_DECL_ISUPPORTS + NS_DECL_NSIREQUEST + NS_DECL_NSICHANNEL + NS_DECL_NSIBYTERANGEREQUEST + NS_DECL_NSIMULTIPARTCHANNEL + + protected: + ~nsPartChannel() = default; + + protected: + nsCOMPtr<nsIChannel> mMultipartChannel; + nsCOMPtr<nsIStreamListener> mListener; + UniquePtr<mozilla::net::nsHttpResponseHead> mResponseHead; + + nsresult mStatus; + nsLoadFlags mLoadFlags; + + nsCOMPtr<nsILoadGroup> mLoadGroup; + + nsCString mContentType; + nsCString mContentCharset; + uint32_t mContentDisposition; + nsString mContentDispositionFilename; + nsCString mContentDispositionHeader; + uint64_t mContentLength; + + bool mIsByteRangeRequest; + int64_t mByteRangeStart; + int64_t mByteRangeEnd; + + uint32_t mPartID; // unique ID that can be used to identify + // this part of the multipart document + bool mIsLastPart; +}; + +// The nsMultiMixedConv stream converter converts a stream of type +// "multipart/x-mixed-replace" to it's subparts. There was some debate as to +// whether or not the functionality desired when HTTP confronted this type +// required a stream converter. After all, this type really prompts various +// viewer related actions rather than stream conversion. There simply needs to +// be a piece in place that can strip out the multiple parts of a stream of this +// type, and "display" them accordingly. +// +// With that said, this "stream converter" spends more time packaging up the sub +// parts of the main stream and sending them off the destination stream +// listener, than doing any real stream parsing/converting. +// +// WARNING: This converter requires that it's destination stream listener be +// able to handle multiple OnStartRequest(), OnDataAvailable(), and +// OnStopRequest() call combinations. Each series represents the beginning, +// data production, and ending phase of each sub- part of the original +// stream. +// +// NOTE: this MIME-type is used by HTTP, *not* SMTP, or IMAP. +// +// NOTE: For reference, a general description of how this MIME type should be +// handled via HTTP, see +// http://home.netscape.com/assist/net_sites/pushpull.html . Note that real +// world server content deviates considerably from this overview. +// +// Implementation assumptions: +// Assumed structue: +// --BoundaryToken[\r]\n +// content-type: foo/bar[\r]\n +// ... (other headers if any) +// [\r]\n (second line feed to delimit end of headers) +// data +// --BoundaryToken-- (end delimited by final "--") +// +// linebreaks can be either CRLF or LFLF. linebreaks preceding +// boundary tokens are NOT considered part of the data. BoundaryToken +// is any opaque string. +// +// + +class nsMultiMixedConv : public nsIStreamConverter { + public: + NS_DECL_ISUPPORTS + NS_DECL_NSISTREAMCONVERTER + NS_DECL_NSISTREAMLISTENER + NS_DECL_NSIREQUESTOBSERVER + + explicit nsMultiMixedConv(); + + protected: + typedef mozilla::IncrementalTokenizer::Token Token; + + virtual ~nsMultiMixedConv() = default; + + nsresult SendStart(); + void AccumulateData(Token const& aToken); + nsresult SendData(); + nsresult SendStop(nsresult aStatus); + + // member data + nsCOMPtr<nsIStreamListener> mFinalListener; // this guy gets the converted + // data via his OnDataAvailable() + + nsCOMPtr<nsIChannel> + mChannel; // The channel as we get in in OnStartRequest call + RefPtr<nsPartChannel> mPartChannel; // the channel for the given part we're + // processing. one channel per part. + nsCOMPtr<nsISupports> mContext; + nsCString mContentType; + nsCString mContentDisposition; + nsCString mContentSecurityPolicy; + nsCString mRootContentSecurityPolicy; + uint64_t mContentLength; + uint64_t mTotalSent; + + // The following members are for tracking the byte ranges in + // multipart/mixed content which specified the 'Content-Range:' + // header... + int64_t mByteRangeStart; + int64_t mByteRangeEnd; + bool mIsByteRangeRequest; + // This flag is set first time we create a part channel. + // We use it to prevent duplicated OnStopRequest call on the listener + // when we fail from some reason to ever create a part channel that + // ensures correct notifications. + bool mRequestListenerNotified; + + uint32_t mCurrentPartID; + + // Flag preventing reenter of OnDataAvailable in case the target listener + // ends up spinning the event loop. + bool mInOnDataAvailable; + + // Current state of the incremental parser + enum EParserState { + PREAMBLE, + BOUNDARY_CRLF, + HEADER_NAME, + HEADER_SEP, + HEADER_VALUE, + BODY_INIT, + BODY, + TRAIL_DASH1, + TRAIL_DASH2, + EPILOGUE, + + INIT = PREAMBLE + } mParserState; + + // Response part header value, valid when we find a header name + // we recognize. + enum EHeader : uint32_t { + HEADER_FIRST, + HEADER_CONTENT_TYPE = HEADER_FIRST, + HEADER_CONTENT_LENGTH, + HEADER_CONTENT_DISPOSITION, + HEADER_SET_COOKIE, + HEADER_CONTENT_RANGE, + HEADER_RANGE, + HEADER_CONTENT_SECURITY_POLICY, + HEADER_UNKNOWN + } mResponseHeader; + // Cumulated value of a response header. + nsCString mResponseHeaderValue; + + nsCString mBoundary; + mozilla::IncrementalTokenizer mTokenizer; + + // When in the "body parsing" mode, see below, we cumulate raw data + // incrementally to mainly avoid any unnecessary granularity. + // mRawData points to the first byte in the tokenizer buffer where part + // body data begins or continues. mRawDataLength is a cumulated length + // of that data during a single tokenizer input feed. This is always + // flushed right after we fed the tokenizer. + nsACString::const_char_iterator mRawData; + nsACString::size_type mRawDataLength; + + // At the start we don't know if the server will be sending boundary with + // or without the leading dashes. + Token mBoundaryToken; + Token mBoundaryTokenWithDashes; + // We need these custom tokens to allow finding CRLF when in the binary mode. + // CRLF before boundary is considered part of the boundary and not part of + // the data. + Token mLFToken; + Token mCRLFToken; + // Custom tokens for each of the response headers we recognize. + Token mHeaderTokens[HEADER_UNKNOWN]; + + // Resets values driven by part headers, like content type, to their defaults, + // called at the start of every part processing. + void HeadersToDefault(); + // Processes captured value of mResponseHeader header. + nsresult ProcessHeader(); + // Switches the parser and tokenizer state to "binary mode" which only + // searches for the 'CRLF boundary' delimiter. + void SwitchToBodyParsing(); + // Switches to the default mode, we are in this mode when parsing headers and + // control data around the boundary delimiters. + void SwitchToControlParsing(); + // Turns on or off recognition of the headers we recognize in part heads. + void SetHeaderTokensEnabled(bool aEnable); + + // The main parser callback called by the IncrementalTokenizer + // instance from OnDataAvailable or OnStopRequest. + nsresult ConsumeToken(Token const& token); +}; + +#endif /* __nsmultimixedconv__h__ */ diff --git a/netwerk/streamconv/converters/nsUnknownDecoder.cpp b/netwerk/streamconv/converters/nsUnknownDecoder.cpp new file mode 100644 index 0000000000..3ba51c8dc4 --- /dev/null +++ b/netwerk/streamconv/converters/nsUnknownDecoder.cpp @@ -0,0 +1,894 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsUnknownDecoder.h" +#include "nsIPipe.h" +#include "nsIInputStream.h" +#include "nsIOutputStream.h" +#include "nsMimeTypes.h" +#include "nsIPrefBranch.h" + +#include "nsCRT.h" + +#include "nsIMIMEService.h" + +#include "nsIViewSourceChannel.h" +#include "nsIHttpChannel.h" +#include "nsIForcePendingChannel.h" +#include "nsIEncodedChannel.h" +#include "nsIURI.h" +#include "nsStringStream.h" +#include "nsNetCID.h" +#include "nsNetUtil.h" + +#include <algorithm> + +#define MAX_BUFFER_SIZE 512u + +NS_IMPL_ISUPPORTS(nsUnknownDecoder::ConvertedStreamListener, nsIStreamListener, + nsIRequestObserver) + +nsUnknownDecoder::ConvertedStreamListener::ConvertedStreamListener( + nsUnknownDecoder* aDecoder) { + mDecoder = aDecoder; +} + +nsresult nsUnknownDecoder::ConvertedStreamListener::AppendDataToString( + nsIInputStream* inputStream, void* closure, const char* rawSegment, + uint32_t toOffset, uint32_t count, uint32_t* writeCount) { + nsCString* decodedData = static_cast<nsCString*>(closure); + decodedData->Append(rawSegment, count); + *writeCount = count; + return NS_OK; +} + +NS_IMETHODIMP +nsUnknownDecoder::ConvertedStreamListener::OnStartRequest(nsIRequest* request) { + return NS_OK; +} + +NS_IMETHODIMP +nsUnknownDecoder::ConvertedStreamListener::OnDataAvailable( + nsIRequest* request, nsIInputStream* stream, uint64_t offset, + uint32_t count) { + uint32_t read; + nsAutoCString decodedData; + { + MutexAutoLock lock(mDecoder->mMutex); + decodedData = mDecoder->mDecodedData; + } + nsresult rv = + stream->ReadSegments(AppendDataToString, &decodedData, count, &read); + if (NS_FAILED(rv)) { + return rv; + } + MutexAutoLock lock(mDecoder->mMutex); + mDecoder->mDecodedData = decodedData; + return NS_OK; +} + +NS_IMETHODIMP +nsUnknownDecoder::ConvertedStreamListener::OnStopRequest(nsIRequest* request, + nsresult status) { + return NS_OK; +} + +nsUnknownDecoder::nsUnknownDecoder() + : mBuffer(nullptr), + mBufferLen(0), + mRequireHTMLsuffix(false), + mMutex("nsUnknownDecoder"), + mDecodedData("") { + nsCOMPtr<nsIPrefBranch> prefs = do_GetService(NS_PREFSERVICE_CONTRACTID); + if (prefs) { + bool val; + if (NS_SUCCEEDED(prefs->GetBoolPref("security.requireHTMLsuffix", &val))) + mRequireHTMLsuffix = val; + } +} + +nsUnknownDecoder::~nsUnknownDecoder() { + if (mBuffer) { + delete[] mBuffer; + mBuffer = nullptr; + } +} + +// ---- +// +// nsISupports implementation... +// +// ---- + +NS_IMPL_ADDREF(nsUnknownDecoder) +NS_IMPL_RELEASE(nsUnknownDecoder) + +NS_INTERFACE_MAP_BEGIN(nsUnknownDecoder) + NS_INTERFACE_MAP_ENTRY(nsIStreamConverter) + NS_INTERFACE_MAP_ENTRY(nsIStreamListener) + NS_INTERFACE_MAP_ENTRY(nsIRequestObserver) + NS_INTERFACE_MAP_ENTRY(nsIContentSniffer) + NS_INTERFACE_MAP_ENTRY(nsIThreadRetargetableStreamListener) + NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports, nsIStreamListener) +NS_INTERFACE_MAP_END + +// ---- +// +// nsIStreamConverter methods... +// +// ---- + +NS_IMETHODIMP +nsUnknownDecoder::Convert(nsIInputStream* aFromStream, const char* aFromType, + const char* aToType, nsISupports* aCtxt, + nsIInputStream** aResultStream) { + return NS_ERROR_NOT_IMPLEMENTED; +} + +NS_IMETHODIMP +nsUnknownDecoder::AsyncConvertData(const char* aFromType, const char* aToType, + nsIStreamListener* aListener, + nsISupports* aCtxt) { + NS_ASSERTION(aListener && aFromType && aToType, + "null pointer passed into multi mixed converter"); + // hook up our final listener. this guy gets the various On*() calls we want + // to throw at him. + // + + MutexAutoLock lock(mMutex); + mNextListener = aListener; + return (aListener) ? NS_OK : NS_ERROR_FAILURE; +} + +NS_IMETHODIMP +nsUnknownDecoder::GetConvertedType(const nsACString& aFromType, + nsIChannel* aChannel, nsACString& aToType) { + return NS_ERROR_NOT_IMPLEMENTED; +} + +// ---- +// +// nsIStreamListener methods... +// +// ---- + +NS_IMETHODIMP +nsUnknownDecoder::OnDataAvailable(nsIRequest* request, nsIInputStream* aStream, + uint64_t aSourceOffset, uint32_t aCount) { + nsresult rv = NS_OK; + + bool contentTypeEmpty; + { + MutexAutoLock lock(mMutex); + if (!mNextListener) return NS_ERROR_FAILURE; + + contentTypeEmpty = mContentType.IsEmpty(); + } + + if (contentTypeEmpty) { + uint32_t count, len; + + // If the buffer has not been allocated by now, just fail... + if (!mBuffer) return NS_ERROR_OUT_OF_MEMORY; + + // + // Determine how much of the stream should be read to fill up the + // sniffer buffer... + // + if (mBufferLen + aCount >= MAX_BUFFER_SIZE) { + count = MAX_BUFFER_SIZE - mBufferLen; + } else { + count = aCount; + } + + // Read the data into the buffer... + rv = aStream->Read((mBuffer + mBufferLen), count, &len); + if (NS_FAILED(rv)) return rv; + + mBufferLen += len; + aCount -= len; + + if (aCount) { + // + // Adjust the source offset... The call to FireListenerNotifications(...) + // will make the first OnDataAvailable(...) call with an offset of 0. + // So, this offset needs to be adjusted to reflect that... + // + aSourceOffset += mBufferLen; + + DetermineContentType(request); + + rv = FireListenerNotifications(request, nullptr); + } + } + + // Must not fire ODA again if it failed once + if (aCount && NS_SUCCEEDED(rv)) { +#ifdef DEBUG + { + MutexAutoLock lock(mMutex); + NS_ASSERTION(!mContentType.IsEmpty(), + "Content type should be known by now."); + } +#endif + + nsCOMPtr<nsIStreamListener> listener; + { + MutexAutoLock lock(mMutex); + listener = mNextListener; + } + rv = listener->OnDataAvailable(request, aStream, aSourceOffset, aCount); + } + + return rv; +} + +// ---- +// +// nsIRequestObserver methods... +// +// ---- + +NS_IMETHODIMP +nsUnknownDecoder::OnStartRequest(nsIRequest* request) { + nsresult rv = NS_OK; + + { + MutexAutoLock lock(mMutex); + if (!mNextListener) return NS_ERROR_FAILURE; + } + + // Allocate the sniffer buffer... + if (NS_SUCCEEDED(rv) && !mBuffer) { + mBuffer = new char[MAX_BUFFER_SIZE]; + + if (!mBuffer) { + rv = NS_ERROR_OUT_OF_MEMORY; + } + } + + // Do not pass the OnStartRequest on to the next listener (yet)... + return rv; +} + +NS_IMETHODIMP +nsUnknownDecoder::OnStopRequest(nsIRequest* request, nsresult aStatus) { + nsresult rv = NS_OK; + + bool contentTypeEmpty; + { + MutexAutoLock lock(mMutex); + if (!mNextListener) return NS_ERROR_FAILURE; + + contentTypeEmpty = mContentType.IsEmpty(); + } + + // + // The total amount of data is less than the size of the sniffer buffer. + // Analyze the buffer now... + // + if (contentTypeEmpty) { + DetermineContentType(request); + + // Make sure channel listeners see channel as pending while we call + // OnStartRequest/OnDataAvailable, even though the underlying channel + // has already hit OnStopRequest. + nsCOMPtr<nsIForcePendingChannel> forcePendingChannel = + do_QueryInterface(request); + if (forcePendingChannel) { + forcePendingChannel->ForcePending(true); + } + + rv = FireListenerNotifications(request, nullptr); + + if (NS_FAILED(rv)) { + aStatus = rv; + } + + // now we need to set pending state to false before calling OnStopRequest + if (forcePendingChannel) { + forcePendingChannel->ForcePending(false); + } + } + + nsCOMPtr<nsIStreamListener> listener; + { + MutexAutoLock lock(mMutex); + listener = mNextListener; + mNextListener = nullptr; + } + rv = listener->OnStopRequest(request, aStatus); + + return rv; +} + +// ---- +// +// nsIContentSniffer methods... +// +// ---- +NS_IMETHODIMP +nsUnknownDecoder::GetMIMETypeFromContent(nsIRequest* aRequest, + const uint8_t* aData, uint32_t aLength, + nsACString& type) { + // This is only used by sniffer, therefore we do not need to lock anything + // here. + nsCOMPtr<nsIChannel> channel(do_QueryInterface(aRequest)); + if (channel) { + nsCOMPtr<nsILoadInfo> loadInfo = channel->LoadInfo(); + if (loadInfo->GetSkipContentSniffing()) { + return NS_ERROR_NOT_AVAILABLE; + } + } + + mBuffer = const_cast<char*>(reinterpret_cast<const char*>(aData)); + mBufferLen = aLength; + DetermineContentType(aRequest); + mBuffer = nullptr; + mBufferLen = 0; + type.Assign(mContentType); + mContentType.Truncate(); + return type.IsEmpty() ? NS_ERROR_NOT_AVAILABLE : NS_OK; +} + +// Actual sniffing code + +bool nsUnknownDecoder::AllowSniffing(nsIRequest* aRequest) { + if (!mRequireHTMLsuffix) { + return true; + } + + nsCOMPtr<nsIChannel> channel = do_QueryInterface(aRequest); + if (!channel) { + NS_ERROR("QI failed"); + return false; + } + + nsCOMPtr<nsIURI> uri; + if (NS_FAILED(channel->GetURI(getter_AddRefs(uri))) || !uri) { + return false; + } + + nsCOMPtr<nsILoadInfo> loadInfo = channel->LoadInfo(); + if (loadInfo->GetSkipContentSniffing()) { + return false; + } + + return !uri->SchemeIs("file"); +} + +/** + * This is the array of sniffer entries that depend on "magic numbers" + * in the file. Each entry has either a type associated with it (set + * these with the SNIFFER_ENTRY macro) or a function to be executed + * (set these with the SNIFFER_ENTRY_WITH_FUNC macro). The function + * should take a single nsIRequest* and returns bool -- true if + * it sets mContentType, false otherwise + */ +nsUnknownDecoder::nsSnifferEntry nsUnknownDecoder::sSnifferEntries[] = { + SNIFFER_ENTRY("%PDF-", APPLICATION_PDF), + + SNIFFER_ENTRY("%!PS-Adobe-", APPLICATION_POSTSCRIPT), + + // Files that start with mailbox delimiters let's provisionally call + // text/plain + SNIFFER_ENTRY("From", TEXT_PLAIN), SNIFFER_ENTRY(">From", TEXT_PLAIN), + + // If the buffer begins with "#!" or "%!" then it is a script of + // some sort... "Scripts" can include arbitrary data to be passed + // to an interpreter, so we need to decide whether we can call this + // text or whether it's data. + SNIFFER_ENTRY_WITH_FUNC("#!", &nsUnknownDecoder::LastDitchSniff), + + // XXXbz should (and can) we also include the various ways that <?xml can + // appear as UTF-16 and such? See http://www.w3.org/TR/REC-xml#sec-guessing + SNIFFER_ENTRY_WITH_FUNC("<?xml", &nsUnknownDecoder::SniffForXML)}; + +uint32_t nsUnknownDecoder::sSnifferEntryNum = + sizeof(nsUnknownDecoder::sSnifferEntries) / + sizeof(nsUnknownDecoder::nsSnifferEntry); + +void nsUnknownDecoder::DetermineContentType(nsIRequest* aRequest) { + { + MutexAutoLock lock(mMutex); + NS_ASSERTION(mContentType.IsEmpty(), "Content type is already known."); + if (!mContentType.IsEmpty()) return; + } + + nsCOMPtr<nsIChannel> channel(do_QueryInterface(aRequest)); + if (channel) { + nsCOMPtr<nsILoadInfo> loadInfo = channel->LoadInfo(); + if (loadInfo->GetSkipContentSniffing()) { + /* + * If we did not get a useful Content-Type from the server + * but also have sniffing disabled, just determine whether + * to use text/plain or octetstream and log an error to the Console + */ + LastDitchSniff(aRequest); + + nsCOMPtr<nsIHttpChannel> httpChannel(do_QueryInterface(aRequest)); + if (httpChannel) { + nsAutoCString type; + httpChannel->GetContentType(type); + nsCOMPtr<nsIURI> requestUri; + httpChannel->GetURI(getter_AddRefs(requestUri)); + nsAutoCString spec; + requestUri->GetSpec(spec); + if (spec.Length() > 50) { + spec.Truncate(50); + spec.AppendLiteral("..."); + } + httpChannel->LogMimeTypeMismatch( + "XTCOWithMIMEValueMissing"_ns, false, NS_ConvertUTF8toUTF16(spec), + // Type is not used in the Error Message but required + NS_ConvertUTF8toUTF16(type)); + } + return; + } + } + + const char* testData = mBuffer; + uint32_t testDataLen = mBufferLen; + // Check if data are compressed. + nsAutoCString decodedData; + + if (channel) { + // ConvertEncodedData is always called only on a single thread for each + // instance of an object. + nsresult rv = ConvertEncodedData(aRequest, mBuffer, mBufferLen); + if (NS_SUCCEEDED(rv)) { + MutexAutoLock lock(mMutex); + decodedData = mDecodedData; + } + if (!decodedData.IsEmpty()) { + testData = decodedData.get(); + testDataLen = std::min(decodedData.Length(), MAX_BUFFER_SIZE); + } + } + + // First, run through all the types we can detect reliably based on + // magic numbers + uint32_t i; + for (i = 0; i < sSnifferEntryNum; ++i) { + if (testDataLen >= sSnifferEntries[i].mByteLen && // enough data + memcmp(testData, sSnifferEntries[i].mBytes, + sSnifferEntries[i].mByteLen) == 0) { // and type matches + NS_ASSERTION( + sSnifferEntries[i].mMimeType || + sSnifferEntries[i].mContentTypeSniffer, + "Must have either a type string or a function to set the type"); + NS_ASSERTION(!sSnifferEntries[i].mMimeType || + !sSnifferEntries[i].mContentTypeSniffer, + "Both a type string and a type sniffing function set;" + " using type string"); + if (sSnifferEntries[i].mMimeType) { + MutexAutoLock lock(mMutex); + mContentType = sSnifferEntries[i].mMimeType; + NS_ASSERTION(!mContentType.IsEmpty(), + "Content type should be known by now."); + return; + } + if ((this->*(sSnifferEntries[i].mContentTypeSniffer))(aRequest)) { +#ifdef DEBUG + MutexAutoLock lock(mMutex); + NS_ASSERTION(!mContentType.IsEmpty(), + "Content type should be known by now."); +#endif + return; + } + } + } + + nsAutoCString sniffedType; + NS_SniffContent(NS_DATA_SNIFFER_CATEGORY, aRequest, (const uint8_t*)testData, + testDataLen, sniffedType); + { + MutexAutoLock lock(mMutex); + mContentType = sniffedType; + if (!mContentType.IsEmpty()) { + return; + } + } + + if (SniffForHTML(aRequest)) { +#ifdef DEBUG + MutexAutoLock lock(mMutex); + NS_ASSERTION(!mContentType.IsEmpty(), + "Content type should be known by now."); +#endif + return; + } + + // We don't know what this is yet. Before we just give up, try + // the URI from the request. + if (SniffURI(aRequest)) { +#ifdef DEBUG + MutexAutoLock lock(mMutex); + NS_ASSERTION(!mContentType.IsEmpty(), + "Content type should be known by now."); +#endif + return; + } + + LastDitchSniff(aRequest); +#ifdef DEBUG + MutexAutoLock lock(mMutex); + NS_ASSERTION(!mContentType.IsEmpty(), "Content type should be known by now."); +#endif +} + +bool nsUnknownDecoder::SniffForHTML(nsIRequest* aRequest) { + /* + * To prevent a possible attack, we will not consider this to be + * html content if it comes from the local file system and our prefs + * are set right + */ + if (!AllowSniffing(aRequest)) { + return false; + } + + MutexAutoLock lock(mMutex); + + // Now look for HTML. + const char* str; + const char* end; + if (mDecodedData.IsEmpty()) { + str = mBuffer; + end = mBuffer + mBufferLen; + } else { + str = mDecodedData.get(); + end = mDecodedData.get() + std::min(mDecodedData.Length(), MAX_BUFFER_SIZE); + } + + // skip leading whitespace + while (str != end && nsCRT::IsAsciiSpace(*str)) { + ++str; + } + + // did we find something like a start tag? + if (str == end || *str != '<' || ++str == end) { + return false; + } + + // If we seem to be SGML or XML and we got down here, just pretend we're HTML + if (*str == '!' || *str == '?') { + mContentType = TEXT_HTML; + return true; + } + + uint32_t bufSize = end - str; + // We use sizeof(_tagstr) below because that's the length of _tagstr + // with the one char " " or ">" appended. +#define MATCHES_TAG(_tagstr) \ + (bufSize >= sizeof(_tagstr) && \ + (PL_strncasecmp(str, _tagstr " ", sizeof(_tagstr)) == 0 || \ + PL_strncasecmp(str, _tagstr ">", sizeof(_tagstr)) == 0)) + + if (MATCHES_TAG("html") || MATCHES_TAG("frameset") || MATCHES_TAG("body") || + MATCHES_TAG("head") || MATCHES_TAG("script") || MATCHES_TAG("iframe") || + MATCHES_TAG("a") || MATCHES_TAG("img") || MATCHES_TAG("table") || + MATCHES_TAG("title") || MATCHES_TAG("link") || MATCHES_TAG("base") || + MATCHES_TAG("style") || MATCHES_TAG("div") || MATCHES_TAG("p") || + MATCHES_TAG("font") || MATCHES_TAG("applet") || MATCHES_TAG("meta") || + MATCHES_TAG("center") || MATCHES_TAG("form") || MATCHES_TAG("isindex") || + MATCHES_TAG("h1") || MATCHES_TAG("h2") || MATCHES_TAG("h3") || + MATCHES_TAG("h4") || MATCHES_TAG("h5") || MATCHES_TAG("h6") || + MATCHES_TAG("b") || MATCHES_TAG("pre")) { + mContentType = TEXT_HTML; + return true; + } + +#undef MATCHES_TAG + + return false; +} + +bool nsUnknownDecoder::SniffForXML(nsIRequest* aRequest) { + // Just like HTML, this should be able to be shut off. + if (!AllowSniffing(aRequest)) { + return false; + } + + // First see whether we can glean anything from the uri... + if (!SniffURI(aRequest)) { + // Oh well; just generic XML will have to do + MutexAutoLock lock(mMutex); + mContentType = TEXT_XML; + } + + return true; +} + +bool nsUnknownDecoder::SniffURI(nsIRequest* aRequest) { + nsCOMPtr<nsIChannel> channel(do_QueryInterface(aRequest)); + nsCOMPtr<nsILoadInfo> loadInfo = channel->LoadInfo(); + if (loadInfo->GetSkipContentSniffing()) { + return false; + } + nsCOMPtr<nsIMIMEService> mimeService(do_GetService("@mozilla.org/mime;1")); + if (mimeService) { + nsCOMPtr<nsIChannel> channel = do_QueryInterface(aRequest); + if (channel) { + nsCOMPtr<nsIURI> uri; + nsresult result = channel->GetURI(getter_AddRefs(uri)); + if (NS_SUCCEEDED(result) && uri) { + nsAutoCString type; + result = mimeService->GetTypeFromURI(uri, type); + if (NS_SUCCEEDED(result)) { + MutexAutoLock lock(mMutex); + mContentType = type; + return true; + } + } + } + } + + return false; +} + +// This macro is based on RFC 2046 Section 4.1.2. Treat any char 0-31 +// except the 9-13 range (\t, \n, \v, \f, \r) and char 27 (used by +// encodings like Shift_JIS) as non-text +#define IS_TEXT_CHAR(ch) \ + (((unsigned char)(ch)) > 31 || (9 <= (ch) && (ch) <= 13) || (ch) == 27) + +bool nsUnknownDecoder::LastDitchSniff(nsIRequest* aRequest) { + // All we can do now is try to guess whether this is text/plain or + // application/octet-stream + + MutexAutoLock lock(mMutex); + + const char* testData; + uint32_t testDataLen; + if (mDecodedData.IsEmpty()) { + testData = mBuffer; + // Since some legacy text files end with 0x1A, reading the entire buffer + // will lead misdetection. + testDataLen = std::min<uint32_t>(mBufferLen, MAX_BUFFER_SIZE); + } else { + testData = mDecodedData.get(); + testDataLen = std::min(mDecodedData.Length(), MAX_BUFFER_SIZE); + } + + // First, check for a BOM. If we see one, assume this is text/plain + // in whatever encoding. If there is a BOM _and_ text we will + // always have at least 4 bytes in the buffer (since the 2-byte BOMs + // are for 2-byte encodings and the UTF-8 BOM is 3 bytes). + if (testDataLen >= 4) { + const unsigned char* buf = (const unsigned char*)testData; + if ((buf[0] == 0xFE && buf[1] == 0xFF) || // UTF-16, Big Endian + (buf[0] == 0xFF && buf[1] == 0xFE) || // UTF-16 or UCS-4, Little Endian + (buf[0] == 0xEF && buf[1] == 0xBB && buf[2] == 0xBF) || // UTF-8 + (buf[0] == 0 && buf[1] == 0 && buf[2] == 0xFE && + buf[3] == 0xFF)) { // UCS-4, Big Endian + + mContentType = TEXT_PLAIN; + return true; + } + } + + // Now see whether the buffer has any non-text chars. If not, then let's + // just call it text/plain... + // + uint32_t i; + for (i = 0; i < testDataLen && IS_TEXT_CHAR(testData[i]); i++) { + } + + if (i == testDataLen) { + mContentType = TEXT_PLAIN; + } else { + mContentType = APPLICATION_OCTET_STREAM; + } + + return true; +} + +nsresult nsUnknownDecoder::FireListenerNotifications(nsIRequest* request, + nsISupports* aCtxt) { + nsresult rv = NS_OK; + + nsCOMPtr<nsIStreamListener> listener; + nsAutoCString contentType; + { + MutexAutoLock lock(mMutex); + if (!mNextListener) return NS_ERROR_FAILURE; + + listener = mNextListener; + contentType = mContentType; + } + + if (!contentType.IsEmpty()) { + nsCOMPtr<nsIViewSourceChannel> viewSourceChannel = + do_QueryInterface(request); + if (viewSourceChannel) { + rv = viewSourceChannel->SetOriginalContentType(contentType); + } else { + nsCOMPtr<nsIChannel> channel = do_QueryInterface(request, &rv); + if (NS_SUCCEEDED(rv)) { + // Set the new content type on the channel... + rv = channel->SetContentType(contentType); + } + } + + NS_ASSERTION(NS_SUCCEEDED(rv), "Unable to set content type on channel!"); + + if (NS_FAILED(rv)) { + // Cancel the request to make sure it has the correct status if + // mNextListener looks at it. + request->Cancel(rv); + listener->OnStartRequest(request); + return rv; + } + } + + // Fire the OnStartRequest(...) + rv = listener->OnStartRequest(request); + + if (NS_SUCCEEDED(rv)) { + // install stream converter if required + nsCOMPtr<nsIEncodedChannel> encodedChannel = do_QueryInterface(request); + if (encodedChannel) { + nsCOMPtr<nsIStreamListener> listenerNew; + rv = encodedChannel->DoApplyContentConversions( + listener, getter_AddRefs(listenerNew), aCtxt); + if (NS_SUCCEEDED(rv) && listenerNew) { + MutexAutoLock lock(mMutex); + mNextListener = listenerNew; + listener = listenerNew; + } + } + } + + if (!mBuffer) return NS_ERROR_OUT_OF_MEMORY; + + // If the request was canceled, then we need to treat that equivalently + // to an error returned by OnStartRequest. + if (NS_SUCCEEDED(rv)) request->GetStatus(&rv); + + // Fire the first OnDataAvailable for the data that was read from the + // stream into the sniffer buffer... + if (NS_SUCCEEDED(rv) && (mBufferLen > 0)) { + uint32_t len = 0; + nsCOMPtr<nsIInputStream> in; + nsCOMPtr<nsIOutputStream> out; + + // Create a pipe and fill it with the data from the sniffer buffer. + rv = NS_NewPipe(getter_AddRefs(in), getter_AddRefs(out), MAX_BUFFER_SIZE, + MAX_BUFFER_SIZE); + + if (NS_SUCCEEDED(rv)) { + rv = out->Write(mBuffer, mBufferLen, &len); + if (NS_SUCCEEDED(rv)) { + if (len == mBufferLen) { + rv = listener->OnDataAvailable(request, in, 0, len); + } else { + NS_ERROR("Unable to write all the data into the pipe."); + rv = NS_ERROR_FAILURE; + } + } + } + } + + delete[] mBuffer; + mBuffer = nullptr; + mBufferLen = 0; + + return rv; +} + +nsresult nsUnknownDecoder::ConvertEncodedData(nsIRequest* request, + const char* data, + uint32_t length) { + nsresult rv = NS_OK; + + { + MutexAutoLock lock(mMutex); + mDecodedData = ""; + } + nsCOMPtr<nsIEncodedChannel> encodedChannel(do_QueryInterface(request)); + if (encodedChannel) { + RefPtr<ConvertedStreamListener> strListener = + new ConvertedStreamListener(this); + + nsCOMPtr<nsIStreamListener> listener; + rv = encodedChannel->DoApplyContentConversions( + strListener, getter_AddRefs(listener), nullptr); + + if (NS_FAILED(rv)) { + return rv; + } + + if (listener) { + listener->OnStartRequest(request); + + if (length) { + nsCOMPtr<nsIStringInputStream> rawStream = + do_CreateInstance(NS_STRINGINPUTSTREAM_CONTRACTID); + if (!rawStream) return NS_ERROR_FAILURE; + + rv = rawStream->SetData((const char*)data, length); + NS_ENSURE_SUCCESS(rv, rv); + + rv = listener->OnDataAvailable(request, rawStream, 0, length); + NS_ENSURE_SUCCESS(rv, rv); + } + + listener->OnStopRequest(request, NS_OK); + } + } + return rv; +} + +// +// nsIThreadRetargetableStreamListener methods +// +NS_IMETHODIMP +nsUnknownDecoder::CheckListenerChain() { + nsCOMPtr<nsIThreadRetargetableStreamListener> listener; + { + MutexAutoLock lock(mMutex); + listener = do_QueryInterface(mNextListener); + } + if (!listener) { + return NS_ERROR_NO_INTERFACE; + } + + return listener->CheckListenerChain(); +} + +void nsBinaryDetector::DetermineContentType(nsIRequest* aRequest) { + nsCOMPtr<nsIHttpChannel> httpChannel = do_QueryInterface(aRequest); + if (!httpChannel) { + return; + } + + nsCOMPtr<nsILoadInfo> loadInfo = httpChannel->LoadInfo(); + if (loadInfo->GetSkipContentSniffing()) { + LastDitchSniff(aRequest); + return; + } + // It's an HTTP channel. Check for the text/plain mess + nsAutoCString contentTypeHdr; + Unused << httpChannel->GetResponseHeader("Content-Type"_ns, contentTypeHdr); + nsAutoCString contentType; + httpChannel->GetContentType(contentType); + + // Make sure to do a case-sensitive exact match comparison here. Apache + // 1.x just sends text/plain for "unknown", while Apache 2.x sends + // text/plain with a ISO-8859-1 charset. Debian's Apache version, just to + // be different, sends text/plain with iso-8859-1 charset. For extra fun, + // FC7, RHEL4, and Ubuntu Feisty send charset=UTF-8. Don't do general + // case-insensitive comparison, since we really want to apply this crap as + // rarely as we can. + if (!contentType.EqualsLiteral("text/plain") || + (!contentTypeHdr.EqualsLiteral("text/plain") && + !contentTypeHdr.EqualsLiteral("text/plain; charset=ISO-8859-1") && + !contentTypeHdr.EqualsLiteral("text/plain; charset=iso-8859-1") && + !contentTypeHdr.EqualsLiteral("text/plain; charset=UTF-8"))) { + return; + } + + // Check whether we have content-encoding. If we do, don't try to + // detect the type. + // XXXbz we could improve this by doing a local decompress if we + // wanted, I'm sure. + nsAutoCString contentEncoding; + Unused << httpChannel->GetResponseHeader("Content-Encoding"_ns, + contentEncoding); + if (!contentEncoding.IsEmpty()) { + return; + } + + LastDitchSniff(aRequest); + MutexAutoLock lock(mMutex); + if (mContentType.EqualsLiteral(APPLICATION_OCTET_STREAM)) { + // We want to guess at it instead + mContentType = APPLICATION_GUESS_FROM_EXT; + } else { + // Let the text/plain type we already have be, so that other content + // sniffers can also get a shot at this data. + mContentType.Truncate(); + } +} diff --git a/netwerk/streamconv/converters/nsUnknownDecoder.h b/netwerk/streamconv/converters/nsUnknownDecoder.h new file mode 100644 index 0000000000..3c46d52414 --- /dev/null +++ b/netwerk/streamconv/converters/nsUnknownDecoder.h @@ -0,0 +1,166 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef nsUnknownDecoder_h__ +#define nsUnknownDecoder_h__ + +#include "nsIStreamConverter.h" +#include "nsIThreadRetargetableStreamListener.h" +#include "nsIContentSniffer.h" +#include "mozilla/Mutex.h" +#include "mozilla/Atomics.h" + +#include "nsCOMPtr.h" +#include "nsString.h" + +#define NS_UNKNOWNDECODER_CID \ + { /* 7d7008a0-c49a-11d3-9b22-0080c7cb1080 */ \ + 0x7d7008a0, 0xc49a, 0x11d3, { \ + 0x9b, 0x22, 0x00, 0x80, 0xc7, 0xcb, 0x10, 0x80 \ + } \ + } + +class nsUnknownDecoder : public nsIStreamConverter, + public nsIContentSniffer, + public nsIThreadRetargetableStreamListener { + public: + // nsISupports methods + NS_DECL_ISUPPORTS + + // nsIStreamConverter methods + NS_DECL_NSISTREAMCONVERTER + + // nsIStreamListener methods + NS_DECL_NSISTREAMLISTENER + + // nsIRequestObserver methods + NS_DECL_NSIREQUESTOBSERVER + + // nsIContentSniffer methods + NS_DECL_NSICONTENTSNIFFER + + // nsIThreadRetargetableStreamListener methods + NS_DECL_NSITHREADRETARGETABLESTREAMLISTENER + + nsUnknownDecoder(); + + protected: + virtual ~nsUnknownDecoder(); + + virtual void DetermineContentType(nsIRequest* aRequest); + nsresult FireListenerNotifications(nsIRequest* request, nsISupports* aCtxt); + + class ConvertedStreamListener : public nsIStreamListener { + public: + explicit ConvertedStreamListener(nsUnknownDecoder* aDecoder); + + NS_DECL_ISUPPORTS + NS_DECL_NSIREQUESTOBSERVER + NS_DECL_NSISTREAMLISTENER + + private: + virtual ~ConvertedStreamListener() = default; + static nsresult AppendDataToString(nsIInputStream* inputStream, + void* closure, const char* rawSegment, + uint32_t toOffset, uint32_t count, + uint32_t* writeCount); + nsUnknownDecoder* mDecoder; + }; + + protected: + nsCOMPtr<nsIStreamListener> mNextListener; + + // Function to use to check whether sniffing some potentially + // dangerous types (eg HTML) is ok for this request. We can disable + // sniffing for local files if needed using this. Just a security + // precation thingy... who knows when we suddenly need to flip this + // pref? + bool AllowSniffing(nsIRequest* aRequest); + + // Various sniffer functions. Returning true means that a type + // was determined; false means no luck. + bool SniffForHTML(nsIRequest* aRequest); + bool SniffForXML(nsIRequest* aRequest); + + // SniffURI guesses at the content type based on the URI (typically + // using the extentsion) + bool SniffURI(nsIRequest* aRequest); + + // LastDitchSniff guesses at text/plain vs. application/octet-stream + // by just looking at whether the data contains null bytes, and + // maybe at the fraction of chars with high bit set. Use this only + // as a last-ditch attempt to decide a content type! + bool LastDitchSniff(nsIRequest* aRequest); + + /** + * An entry struct for our array of sniffers. Each entry has either + * a type associated with it (set these with the SNIFFER_ENTRY macro) + * or a function to be executed (set these with the + * SNIFFER_ENTRY_WITH_FUNC macro). The function should take a single + * nsIRequest* and returns bool -- true if it sets mContentType, + * false otherwise + */ + struct nsSnifferEntry { + typedef bool (nsUnknownDecoder::*TypeSniffFunc)(nsIRequest* aRequest); + + const char* mBytes; + uint32_t mByteLen; + + // Exactly one of mMimeType and mContentTypeSniffer should be set non-null + const char* mMimeType; + TypeSniffFunc mContentTypeSniffer; + }; + +#define SNIFFER_ENTRY(_bytes, _type) \ + { _bytes, sizeof(_bytes) - 1, _type, nullptr } + +#define SNIFFER_ENTRY_WITH_FUNC(_bytes, _func) \ + { _bytes, sizeof(_bytes) - 1, nullptr, _func } + + static nsSnifferEntry sSnifferEntries[]; + static uint32_t sSnifferEntryNum; + + // We guarantee in order delivery of OnStart, OnStop and OnData, therefore + // we do not need proper locking for mBuffer. + mozilla::Atomic<char*> mBuffer; + mozilla::Atomic<uint32_t> mBufferLen; + mozilla::Atomic<bool> mRequireHTMLsuffix; + + nsCString mContentType; + + // This mutex syncs: mContentType, mDecodedData and mNextListener. + mutable mozilla::Mutex mMutex; + + protected: + nsresult ConvertEncodedData(nsIRequest* request, const char* data, + uint32_t length); + nsCString mDecodedData; // If data are encoded this will be uncompress data. +}; + +#define NS_BINARYDETECTOR_CID \ + { /* a2027ec6-ba0d-4c72-805d-148233f5f33c */ \ + 0xa2027ec6, 0xba0d, 0x4c72, { \ + 0x80, 0x5d, 0x14, 0x82, 0x33, 0xf5, 0xf3, 0x3c \ + } \ + } + +/** + * Class that detects whether a data stream is text or binary. This reuses + * most of nsUnknownDecoder except the actual content-type determination logic + * -- our overridden DetermineContentType simply calls LastDitchSniff and sets + * the type to APPLICATION_GUESS_FROM_EXT if the data is detected as binary. + */ +class nsBinaryDetector : public nsUnknownDecoder { + protected: + virtual void DetermineContentType(nsIRequest* aRequest) override; +}; + +#define NS_BINARYDETECTOR_CATEGORYENTRY \ + { \ + NS_CONTENT_SNIFFER_CATEGORY, "Binary Detector", \ + NS_BINARYDETECTOR_CONTRACTID \ + } + +#endif /* nsUnknownDecoder_h__ */ |