From 6bf0a5cb5034a7e684dcc3500e841785237ce2dd Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sun, 7 Apr 2024 19:32:43 +0200 Subject: Adding upstream version 1:115.7.0. Signed-off-by: Daniel Baumann --- comm/mailnews/imap/src/nsImapGenericParser.cpp | 407 +++++++++++++++++++++++++ 1 file changed, 407 insertions(+) create mode 100644 comm/mailnews/imap/src/nsImapGenericParser.cpp (limited to 'comm/mailnews/imap/src/nsImapGenericParser.cpp') diff --git a/comm/mailnews/imap/src/nsImapGenericParser.cpp b/comm/mailnews/imap/src/nsImapGenericParser.cpp new file mode 100644 index 0000000000..009c7c1e5a --- /dev/null +++ b/comm/mailnews/imap/src/nsImapGenericParser.cpp @@ -0,0 +1,407 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "msgCore.h" // for pre-compiled headers + +#include "nsImapGenericParser.h" +#include "nsString.h" + +////////////////// nsImapGenericParser ///////////////////////// + +nsImapGenericParser::nsImapGenericParser() + : fNextToken(nullptr), + fCurrentLine(nullptr), + fLineOfTokens(nullptr), + fStartOfLineOfTokens(nullptr), + fCurrentTokenPlaceHolder(nullptr), + fAtEndOfLine(false), + fParserState(stateOK) {} + +nsImapGenericParser::~nsImapGenericParser() { + PR_FREEIF(fCurrentLine); + PR_FREEIF(fStartOfLineOfTokens); +} + +void nsImapGenericParser::HandleMemoryFailure() { SetConnected(false); } + +void nsImapGenericParser::ResetLexAnalyzer() { + PR_FREEIF(fCurrentLine); + PR_FREEIF(fStartOfLineOfTokens); + + fNextToken = fCurrentLine = fLineOfTokens = fStartOfLineOfTokens = + fCurrentTokenPlaceHolder = nullptr; + fAtEndOfLine = false; +} + +bool nsImapGenericParser::LastCommandSuccessful() { + return fParserState == stateOK; +} + +void nsImapGenericParser::SetSyntaxError(bool error, const char* msg) { + if (error) + fParserState |= stateSyntaxErrorFlag; + else + fParserState &= ~stateSyntaxErrorFlag; + NS_ASSERTION(!error, "syntax error in generic parser"); +} + +void nsImapGenericParser::SetConnected(bool connected) { + if (connected) + fParserState &= ~stateDisconnectedFlag; + else + fParserState |= stateDisconnectedFlag; +} + +void nsImapGenericParser::skip_to_CRLF() { + while (Connected() && !fAtEndOfLine) AdvanceToNextToken(); +} + +// fNextToken initially should point to +// a string after the initial open paren ("(") +// After this call, fNextToken points to the +// first character after the matching close +// paren. Only call AdvanceToNextToken() to get the NEXT +// token after the one returned in fNextToken. +void nsImapGenericParser::skip_to_close_paren() { + int numberOfCloseParensNeeded = 1; + while (ContinueParse()) { + // go through fNextToken, account for nested parens + const char* loc; + for (loc = fNextToken; loc && *loc; loc++) { + if (*loc == '(') + numberOfCloseParensNeeded++; + else if (*loc == ')') { + numberOfCloseParensNeeded--; + if (numberOfCloseParensNeeded == 0) { + fNextToken = loc + 1; + if (!fNextToken || !*fNextToken) AdvanceToNextToken(); + return; + } + } else if (*loc == '{' || *loc == '"') { + // quoted or literal + fNextToken = loc; + char* a = CreateString(); + PR_FREEIF(a); + break; // move to next token + } + } + if (ContinueParse()) AdvanceToNextToken(); + } +} + +void nsImapGenericParser::AdvanceToNextToken() { + if (!fCurrentLine || fAtEndOfLine) AdvanceToNextLine(); + if (Connected()) { + if (!fStartOfLineOfTokens) { + // this is the first token of the line; setup tokenizer now + fStartOfLineOfTokens = PL_strdup(fCurrentLine); + if (!fStartOfLineOfTokens) { + HandleMemoryFailure(); + return; + } + fLineOfTokens = fStartOfLineOfTokens; + fCurrentTokenPlaceHolder = fStartOfLineOfTokens; + } + fNextToken = NS_strtok(WHITESPACE, &fCurrentTokenPlaceHolder); + if (!fNextToken) { + fAtEndOfLine = true; + fNextToken = CRLF; + } + } +} + +void nsImapGenericParser::AdvanceToNextLine() { + PR_FREEIF(fCurrentLine); + PR_FREEIF(fStartOfLineOfTokens); + + bool ok = GetNextLineForParser(&fCurrentLine); + if (!ok) { + SetConnected(false); + fStartOfLineOfTokens = nullptr; + fLineOfTokens = nullptr; + fCurrentTokenPlaceHolder = nullptr; + fAtEndOfLine = true; + fNextToken = CRLF; + } else if (!fCurrentLine) { + HandleMemoryFailure(); + } else { + fNextToken = nullptr; + // determine if there are any tokens (without calling AdvanceToNextToken); + // otherwise we are already at end of line + NS_ASSERTION(strlen(WHITESPACE) == 3, "assume 3 chars of whitespace"); + char* firstToken = fCurrentLine; + while (*firstToken && + (*firstToken == WHITESPACE[0] || *firstToken == WHITESPACE[1] || + *firstToken == WHITESPACE[2])) + firstToken++; + fAtEndOfLine = (*firstToken == '\0'); + } +} + +// advances |fLineOfTokens| by |bytesToAdvance| bytes +void nsImapGenericParser::AdvanceTokenizerStartingPoint( + int32_t bytesToAdvance) { + NS_ASSERTION(bytesToAdvance >= 0, "bytesToAdvance must not be negative"); + if (!fStartOfLineOfTokens) { + AdvanceToNextToken(); // the tokenizer was not yet initialized, do it now + if (!fStartOfLineOfTokens) return; + } + + if (!fStartOfLineOfTokens) return; + // The last call to AdvanceToNextToken() cleared the token separator to '\0' + // iff |fCurrentTokenPlaceHolder|. We must recover this token separator now. + if (fCurrentTokenPlaceHolder) { + int endTokenOffset = fCurrentTokenPlaceHolder - fStartOfLineOfTokens - 1; + if (endTokenOffset >= 0) + fStartOfLineOfTokens[endTokenOffset] = fCurrentLine[endTokenOffset]; + } + + NS_ASSERTION(bytesToAdvance + (fLineOfTokens - fStartOfLineOfTokens) <= + (int32_t)strlen(fCurrentLine), + "cannot advance beyond end of fLineOfTokens"); + fLineOfTokens += bytesToAdvance; + fCurrentTokenPlaceHolder = fLineOfTokens; +} + +// RFC3501: astring = 1*ASTRING-CHAR / string +// string = quoted / literal +// This function leaves us off with fCurrentTokenPlaceHolder immediately after +// the end of the Astring. Call AdvanceToNextToken() to get the token after it. +char* nsImapGenericParser::CreateAstring() { + if (*fNextToken == '{') return CreateLiteral(); // literal + if (*fNextToken == '"') return CreateQuoted(); // quoted + return CreateAtom(true); // atom +} + +// Create an atom +// This function does not advance the parser. +// Call AdvanceToNextToken() to get the next token after the atom. +// RFC3501: atom = 1*ATOM-CHAR +// ASTRING-CHAR = ATOM-CHAR / resp-specials +// ATOM-CHAR = +// atom-specials = "(" / ")" / "{" / SP / CTL / list-wildcards / +// quoted-specials / resp-specials +// list-wildcards = "%" / "*" +// quoted-specials = DQUOTE / "\" +// resp-specials = "]" +// "Characters are 7-bit US-ASCII unless otherwise specified." [RFC3501, 1.2.] +char* nsImapGenericParser::CreateAtom(bool isAstring) { + char* rv = PL_strdup(fNextToken); + if (!rv) { + HandleMemoryFailure(); + return nullptr; + } + // We wish to stop at the following characters (in decimal ascii) + // 1-31 (CTL), 32 (SP), 34 '"', 37 '%', 40-42 "()*", 92 '\\', 123 '{' + // also, ']' is only allowed in astrings + char* last = rv; + char c = *last; + while ((c > 42 || c == 33 || c == 35 || c == 36 || c == 38 || c == 39) && + c != '\\' && c != '{' && (isAstring || c != ']')) + c = *++last; + if (rv == last) { + SetSyntaxError(true, "no atom characters found"); + PL_strfree(rv); + return nullptr; + } + if (*last) { + // not the whole token was consumed + *last = '\0'; + AdvanceTokenizerStartingPoint((fNextToken - fLineOfTokens) + (last - rv)); + } + return rv; +} + +// CreateNilString return either NULL (for "NIL") or a string +// Call with fNextToken pointing to the thing which we think is the nilstring. +// This function leaves us off with fCurrentTokenPlaceHolder immediately after +// the end of the string. +// Regardless of type, call AdvanceToNextToken() to get the token after it. +// RFC3501: nstring = string / nil +// nil = "NIL" +char* nsImapGenericParser::CreateNilString() { + if (!PL_strncasecmp(fNextToken, "NIL", 3)) { + // check if there is text after "NIL" in fNextToken, + // equivalent handling as in CreateQuoted + if (fNextToken[3]) + AdvanceTokenizerStartingPoint((fNextToken - fLineOfTokens) + 3); + return NULL; + } + return CreateString(); +} + +// Create a string, which can either be quoted or literal, +// but not an atom. +// This function leaves us off with fCurrentTokenPlaceHolder immediately after +// the end of the String. Call AdvanceToNextToken() to get the token after it. +char* nsImapGenericParser::CreateString() { + if (*fNextToken == '{') { + char* rv = CreateLiteral(); // literal + return (rv); + } + if (*fNextToken == '"') { + char* rv = CreateQuoted(); // quoted + return (rv); + } + SetSyntaxError(true, "string does not start with '{' or '\"'"); + return NULL; +} + +// This function sets fCurrentTokenPlaceHolder immediately after the end of the +// closing quote. Call AdvanceToNextToken() to get the token after it. +// QUOTED_CHAR ::= / +// "\" quoted_specials +// TEXT_CHAR ::= +// quoted_specials ::= <"> / "\" +// Note that according to RFC 1064 and RFC 2060, CRs and LFs are not allowed +// inside a quoted string. It is sufficient to read from the current line only. +char* nsImapGenericParser::CreateQuoted(bool /*skipToEnd*/) { + // one char past opening '"' + char* currentChar = fCurrentLine + (fNextToken - fStartOfLineOfTokens) + 1; + + int escapeCharsCut = 0; + nsCString returnString(currentChar); + int charIndex; + for (charIndex = 0; returnString.CharAt(charIndex) != '"'; charIndex++) { + if (!returnString.CharAt(charIndex)) { + SetSyntaxError(true, "no closing '\"' found in quoted"); + return nullptr; + } + if (returnString.CharAt(charIndex) == '\\') { + // eat the escape character, but keep the escaped character + returnString.Cut(charIndex, 1); + escapeCharsCut++; + } + } + // +2 because of the start and end quotes + AdvanceTokenizerStartingPoint((fNextToken - fLineOfTokens) + charIndex + + escapeCharsCut + 2); + + returnString.SetLength(charIndex); + return ToNewCString(returnString); +} + +// This function leaves us off with fCurrentTokenPlaceHolder immediately after +// the end of the literal string. Call AdvanceToNextToken() to get the token +// after the literal string. +// RFC3501: literal = "{" number "}" CRLF *CHAR8 +// ; Number represents the number of CHAR8s +// CHAR8 = %x01-ff +// ; any OCTET except NUL, %x00 +char* nsImapGenericParser::CreateLiteral() { + int32_t numberOfCharsInMessage = atoi(fNextToken + 1); + uint32_t numBytes = numberOfCharsInMessage + 1; + NS_ASSERTION(numBytes, "overflow!"); + if (!numBytes) return nullptr; + char* returnString = (char*)PR_Malloc(numBytes); + if (!returnString) { + HandleMemoryFailure(); + return nullptr; + } + + int32_t currentLineLength = 0; + int32_t charsReadSoFar = 0; + int32_t bytesToCopy = 0; + while (charsReadSoFar < numberOfCharsInMessage) { + AdvanceToNextLine(); + if (!ContinueParse()) break; + + currentLineLength = strlen(fCurrentLine); + bytesToCopy = (currentLineLength > numberOfCharsInMessage - charsReadSoFar + ? numberOfCharsInMessage - charsReadSoFar + : currentLineLength); + NS_ASSERTION(bytesToCopy, "zero-length line?"); + memcpy(returnString + charsReadSoFar, fCurrentLine, bytesToCopy); + charsReadSoFar += bytesToCopy; + } + + if (ContinueParse()) { + if (currentLineLength == bytesToCopy) { + // We have consumed the entire line. + // Consider the input "{4}\r\n" "L1\r\n" " A2\r\n" which is read + // line-by-line. Reading an Astring, this should result in "L1\r\n". + // Note that the second line is "L1\r\n", where the "\r\n" is part of + // the literal. Hence, we now read the next line to ensure that the + // next call to AdvanceToNextToken() leads to fNextToken=="A2" in our + // example. + AdvanceToNextLine(); + } else + AdvanceTokenizerStartingPoint(bytesToCopy); + } + + returnString[charsReadSoFar] = 0; + return returnString; +} + +// Call this to create a buffer containing all characters within +// a given set of parentheses. +// Call this with fNextToken[0]=='(', that is, the open paren +// of the group. +// It will allocate and return all characters up to and including the +// corresponding closing paren, and leave the parser in the right place +// afterwards. +char* nsImapGenericParser::CreateParenGroup() { + NS_ASSERTION(fNextToken[0] == '(', "we don't have a paren group!"); + + int numOpenParens = 0; + AdvanceTokenizerStartingPoint(fNextToken - fLineOfTokens); + + // Build up a buffer containing the paren group. + nsCString returnString; + char* parenGroupStart = fCurrentTokenPlaceHolder; + NS_ASSERTION(parenGroupStart[0] == '(', "we don't have a paren group (2)!"); + while (*fCurrentTokenPlaceHolder) { + if (*fCurrentTokenPlaceHolder == '{') // literal + { + // Ensure it is a properly formatted literal. + NS_ASSERTION(!strcmp("}\r\n", fCurrentTokenPlaceHolder + + strlen(fCurrentTokenPlaceHolder) - 3), + "not a literal"); + + // Append previous characters and the "{xx}\r\n" to buffer. + returnString.Append(parenGroupStart); + + // Append literal itself. + AdvanceToNextToken(); + if (!ContinueParse()) break; + char* lit = CreateLiteral(); + NS_ASSERTION(lit, "syntax error or out of memory"); + if (!lit) break; + returnString.Append(lit); + PR_Free(lit); + if (!ContinueParse()) break; + parenGroupStart = fCurrentTokenPlaceHolder; + } else if (*fCurrentTokenPlaceHolder == '"') // quoted + { + // Append the _escaped_ version of the quoted string: + // just skip it (because the quoted string must be on the same line). + AdvanceToNextToken(); + if (!ContinueParse()) break; + char* q = CreateQuoted(); + if (!q) break; + PR_Free(q); + if (!ContinueParse()) break; + } else { + // Append this character to the buffer. + char c = *fCurrentTokenPlaceHolder++; + if (c == '(') + numOpenParens++; + else if (c == ')') { + numOpenParens--; + if (numOpenParens == 0) break; + } + } + } + + if (numOpenParens != 0 || !ContinueParse()) { + SetSyntaxError(true, "closing ')' not found in paren group"); + return nullptr; + } + + returnString.Append(parenGroupStart, + fCurrentTokenPlaceHolder - parenGroupStart); + AdvanceToNextToken(); + return ToNewCString(returnString); +} -- cgit v1.2.3