summaryrefslogtreecommitdiffstats
path: root/comm/mailnews/imap/src/nsImapGenericParser.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'comm/mailnews/imap/src/nsImapGenericParser.cpp')
-rw-r--r--comm/mailnews/imap/src/nsImapGenericParser.cpp407
1 files changed, 407 insertions, 0 deletions
diff --git a/comm/mailnews/imap/src/nsImapGenericParser.cpp b/comm/mailnews/imap/src/nsImapGenericParser.cpp
new file mode 100644
index 0000000000..009c7c1e5a
--- /dev/null
+++ b/comm/mailnews/imap/src/nsImapGenericParser.cpp
@@ -0,0 +1,407 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "msgCore.h" // for pre-compiled headers
+
+#include "nsImapGenericParser.h"
+#include "nsString.h"
+
+////////////////// nsImapGenericParser /////////////////////////
+
+nsImapGenericParser::nsImapGenericParser()
+ : fNextToken(nullptr),
+ fCurrentLine(nullptr),
+ fLineOfTokens(nullptr),
+ fStartOfLineOfTokens(nullptr),
+ fCurrentTokenPlaceHolder(nullptr),
+ fAtEndOfLine(false),
+ fParserState(stateOK) {}
+
+nsImapGenericParser::~nsImapGenericParser() {
+ PR_FREEIF(fCurrentLine);
+ PR_FREEIF(fStartOfLineOfTokens);
+}
+
+void nsImapGenericParser::HandleMemoryFailure() { SetConnected(false); }
+
+void nsImapGenericParser::ResetLexAnalyzer() {
+ PR_FREEIF(fCurrentLine);
+ PR_FREEIF(fStartOfLineOfTokens);
+
+ fNextToken = fCurrentLine = fLineOfTokens = fStartOfLineOfTokens =
+ fCurrentTokenPlaceHolder = nullptr;
+ fAtEndOfLine = false;
+}
+
+bool nsImapGenericParser::LastCommandSuccessful() {
+ return fParserState == stateOK;
+}
+
+void nsImapGenericParser::SetSyntaxError(bool error, const char* msg) {
+ if (error)
+ fParserState |= stateSyntaxErrorFlag;
+ else
+ fParserState &= ~stateSyntaxErrorFlag;
+ NS_ASSERTION(!error, "syntax error in generic parser");
+}
+
+void nsImapGenericParser::SetConnected(bool connected) {
+ if (connected)
+ fParserState &= ~stateDisconnectedFlag;
+ else
+ fParserState |= stateDisconnectedFlag;
+}
+
+void nsImapGenericParser::skip_to_CRLF() {
+ while (Connected() && !fAtEndOfLine) AdvanceToNextToken();
+}
+
+// fNextToken initially should point to
+// a string after the initial open paren ("(")
+// After this call, fNextToken points to the
+// first character after the matching close
+// paren. Only call AdvanceToNextToken() to get the NEXT
+// token after the one returned in fNextToken.
+void nsImapGenericParser::skip_to_close_paren() {
+ int numberOfCloseParensNeeded = 1;
+ while (ContinueParse()) {
+ // go through fNextToken, account for nested parens
+ const char* loc;
+ for (loc = fNextToken; loc && *loc; loc++) {
+ if (*loc == '(')
+ numberOfCloseParensNeeded++;
+ else if (*loc == ')') {
+ numberOfCloseParensNeeded--;
+ if (numberOfCloseParensNeeded == 0) {
+ fNextToken = loc + 1;
+ if (!fNextToken || !*fNextToken) AdvanceToNextToken();
+ return;
+ }
+ } else if (*loc == '{' || *loc == '"') {
+ // quoted or literal
+ fNextToken = loc;
+ char* a = CreateString();
+ PR_FREEIF(a);
+ break; // move to next token
+ }
+ }
+ if (ContinueParse()) AdvanceToNextToken();
+ }
+}
+
+void nsImapGenericParser::AdvanceToNextToken() {
+ if (!fCurrentLine || fAtEndOfLine) AdvanceToNextLine();
+ if (Connected()) {
+ if (!fStartOfLineOfTokens) {
+ // this is the first token of the line; setup tokenizer now
+ fStartOfLineOfTokens = PL_strdup(fCurrentLine);
+ if (!fStartOfLineOfTokens) {
+ HandleMemoryFailure();
+ return;
+ }
+ fLineOfTokens = fStartOfLineOfTokens;
+ fCurrentTokenPlaceHolder = fStartOfLineOfTokens;
+ }
+ fNextToken = NS_strtok(WHITESPACE, &fCurrentTokenPlaceHolder);
+ if (!fNextToken) {
+ fAtEndOfLine = true;
+ fNextToken = CRLF;
+ }
+ }
+}
+
+void nsImapGenericParser::AdvanceToNextLine() {
+ PR_FREEIF(fCurrentLine);
+ PR_FREEIF(fStartOfLineOfTokens);
+
+ bool ok = GetNextLineForParser(&fCurrentLine);
+ if (!ok) {
+ SetConnected(false);
+ fStartOfLineOfTokens = nullptr;
+ fLineOfTokens = nullptr;
+ fCurrentTokenPlaceHolder = nullptr;
+ fAtEndOfLine = true;
+ fNextToken = CRLF;
+ } else if (!fCurrentLine) {
+ HandleMemoryFailure();
+ } else {
+ fNextToken = nullptr;
+ // determine if there are any tokens (without calling AdvanceToNextToken);
+ // otherwise we are already at end of line
+ NS_ASSERTION(strlen(WHITESPACE) == 3, "assume 3 chars of whitespace");
+ char* firstToken = fCurrentLine;
+ while (*firstToken &&
+ (*firstToken == WHITESPACE[0] || *firstToken == WHITESPACE[1] ||
+ *firstToken == WHITESPACE[2]))
+ firstToken++;
+ fAtEndOfLine = (*firstToken == '\0');
+ }
+}
+
+// advances |fLineOfTokens| by |bytesToAdvance| bytes
+void nsImapGenericParser::AdvanceTokenizerStartingPoint(
+ int32_t bytesToAdvance) {
+ NS_ASSERTION(bytesToAdvance >= 0, "bytesToAdvance must not be negative");
+ if (!fStartOfLineOfTokens) {
+ AdvanceToNextToken(); // the tokenizer was not yet initialized, do it now
+ if (!fStartOfLineOfTokens) return;
+ }
+
+ if (!fStartOfLineOfTokens) return;
+ // The last call to AdvanceToNextToken() cleared the token separator to '\0'
+ // iff |fCurrentTokenPlaceHolder|. We must recover this token separator now.
+ if (fCurrentTokenPlaceHolder) {
+ int endTokenOffset = fCurrentTokenPlaceHolder - fStartOfLineOfTokens - 1;
+ if (endTokenOffset >= 0)
+ fStartOfLineOfTokens[endTokenOffset] = fCurrentLine[endTokenOffset];
+ }
+
+ NS_ASSERTION(bytesToAdvance + (fLineOfTokens - fStartOfLineOfTokens) <=
+ (int32_t)strlen(fCurrentLine),
+ "cannot advance beyond end of fLineOfTokens");
+ fLineOfTokens += bytesToAdvance;
+ fCurrentTokenPlaceHolder = fLineOfTokens;
+}
+
+// RFC3501: astring = 1*ASTRING-CHAR / string
+// string = quoted / literal
+// This function leaves us off with fCurrentTokenPlaceHolder immediately after
+// the end of the Astring. Call AdvanceToNextToken() to get the token after it.
+char* nsImapGenericParser::CreateAstring() {
+ if (*fNextToken == '{') return CreateLiteral(); // literal
+ if (*fNextToken == '"') return CreateQuoted(); // quoted
+ return CreateAtom(true); // atom
+}
+
+// Create an atom
+// This function does not advance the parser.
+// Call AdvanceToNextToken() to get the next token after the atom.
+// RFC3501: atom = 1*ATOM-CHAR
+// ASTRING-CHAR = ATOM-CHAR / resp-specials
+// ATOM-CHAR = <any CHAR except atom-specials>
+// atom-specials = "(" / ")" / "{" / SP / CTL / list-wildcards /
+// quoted-specials / resp-specials
+// list-wildcards = "%" / "*"
+// quoted-specials = DQUOTE / "\"
+// resp-specials = "]"
+// "Characters are 7-bit US-ASCII unless otherwise specified." [RFC3501, 1.2.]
+char* nsImapGenericParser::CreateAtom(bool isAstring) {
+ char* rv = PL_strdup(fNextToken);
+ if (!rv) {
+ HandleMemoryFailure();
+ return nullptr;
+ }
+ // We wish to stop at the following characters (in decimal ascii)
+ // 1-31 (CTL), 32 (SP), 34 '"', 37 '%', 40-42 "()*", 92 '\\', 123 '{'
+ // also, ']' is only allowed in astrings
+ char* last = rv;
+ char c = *last;
+ while ((c > 42 || c == 33 || c == 35 || c == 36 || c == 38 || c == 39) &&
+ c != '\\' && c != '{' && (isAstring || c != ']'))
+ c = *++last;
+ if (rv == last) {
+ SetSyntaxError(true, "no atom characters found");
+ PL_strfree(rv);
+ return nullptr;
+ }
+ if (*last) {
+ // not the whole token was consumed
+ *last = '\0';
+ AdvanceTokenizerStartingPoint((fNextToken - fLineOfTokens) + (last - rv));
+ }
+ return rv;
+}
+
+// CreateNilString return either NULL (for "NIL") or a string
+// Call with fNextToken pointing to the thing which we think is the nilstring.
+// This function leaves us off with fCurrentTokenPlaceHolder immediately after
+// the end of the string.
+// Regardless of type, call AdvanceToNextToken() to get the token after it.
+// RFC3501: nstring = string / nil
+// nil = "NIL"
+char* nsImapGenericParser::CreateNilString() {
+ if (!PL_strncasecmp(fNextToken, "NIL", 3)) {
+ // check if there is text after "NIL" in fNextToken,
+ // equivalent handling as in CreateQuoted
+ if (fNextToken[3])
+ AdvanceTokenizerStartingPoint((fNextToken - fLineOfTokens) + 3);
+ return NULL;
+ }
+ return CreateString();
+}
+
+// Create a string, which can either be quoted or literal,
+// but not an atom.
+// This function leaves us off with fCurrentTokenPlaceHolder immediately after
+// the end of the String. Call AdvanceToNextToken() to get the token after it.
+char* nsImapGenericParser::CreateString() {
+ if (*fNextToken == '{') {
+ char* rv = CreateLiteral(); // literal
+ return (rv);
+ }
+ if (*fNextToken == '"') {
+ char* rv = CreateQuoted(); // quoted
+ return (rv);
+ }
+ SetSyntaxError(true, "string does not start with '{' or '\"'");
+ return NULL;
+}
+
+// This function sets fCurrentTokenPlaceHolder immediately after the end of the
+// closing quote. Call AdvanceToNextToken() to get the token after it.
+// QUOTED_CHAR ::= <any TEXT_CHAR except quoted_specials> /
+// "\" quoted_specials
+// TEXT_CHAR ::= <any CHAR except CR and LF>
+// quoted_specials ::= <"> / "\"
+// Note that according to RFC 1064 and RFC 2060, CRs and LFs are not allowed
+// inside a quoted string. It is sufficient to read from the current line only.
+char* nsImapGenericParser::CreateQuoted(bool /*skipToEnd*/) {
+ // one char past opening '"'
+ char* currentChar = fCurrentLine + (fNextToken - fStartOfLineOfTokens) + 1;
+
+ int escapeCharsCut = 0;
+ nsCString returnString(currentChar);
+ int charIndex;
+ for (charIndex = 0; returnString.CharAt(charIndex) != '"'; charIndex++) {
+ if (!returnString.CharAt(charIndex)) {
+ SetSyntaxError(true, "no closing '\"' found in quoted");
+ return nullptr;
+ }
+ if (returnString.CharAt(charIndex) == '\\') {
+ // eat the escape character, but keep the escaped character
+ returnString.Cut(charIndex, 1);
+ escapeCharsCut++;
+ }
+ }
+ // +2 because of the start and end quotes
+ AdvanceTokenizerStartingPoint((fNextToken - fLineOfTokens) + charIndex +
+ escapeCharsCut + 2);
+
+ returnString.SetLength(charIndex);
+ return ToNewCString(returnString);
+}
+
+// This function leaves us off with fCurrentTokenPlaceHolder immediately after
+// the end of the literal string. Call AdvanceToNextToken() to get the token
+// after the literal string.
+// RFC3501: literal = "{" number "}" CRLF *CHAR8
+// ; Number represents the number of CHAR8s
+// CHAR8 = %x01-ff
+// ; any OCTET except NUL, %x00
+char* nsImapGenericParser::CreateLiteral() {
+ int32_t numberOfCharsInMessage = atoi(fNextToken + 1);
+ uint32_t numBytes = numberOfCharsInMessage + 1;
+ NS_ASSERTION(numBytes, "overflow!");
+ if (!numBytes) return nullptr;
+ char* returnString = (char*)PR_Malloc(numBytes);
+ if (!returnString) {
+ HandleMemoryFailure();
+ return nullptr;
+ }
+
+ int32_t currentLineLength = 0;
+ int32_t charsReadSoFar = 0;
+ int32_t bytesToCopy = 0;
+ while (charsReadSoFar < numberOfCharsInMessage) {
+ AdvanceToNextLine();
+ if (!ContinueParse()) break;
+
+ currentLineLength = strlen(fCurrentLine);
+ bytesToCopy = (currentLineLength > numberOfCharsInMessage - charsReadSoFar
+ ? numberOfCharsInMessage - charsReadSoFar
+ : currentLineLength);
+ NS_ASSERTION(bytesToCopy, "zero-length line?");
+ memcpy(returnString + charsReadSoFar, fCurrentLine, bytesToCopy);
+ charsReadSoFar += bytesToCopy;
+ }
+
+ if (ContinueParse()) {
+ if (currentLineLength == bytesToCopy) {
+ // We have consumed the entire line.
+ // Consider the input "{4}\r\n" "L1\r\n" " A2\r\n" which is read
+ // line-by-line. Reading an Astring, this should result in "L1\r\n".
+ // Note that the second line is "L1\r\n", where the "\r\n" is part of
+ // the literal. Hence, we now read the next line to ensure that the
+ // next call to AdvanceToNextToken() leads to fNextToken=="A2" in our
+ // example.
+ AdvanceToNextLine();
+ } else
+ AdvanceTokenizerStartingPoint(bytesToCopy);
+ }
+
+ returnString[charsReadSoFar] = 0;
+ return returnString;
+}
+
+// Call this to create a buffer containing all characters within
+// a given set of parentheses.
+// Call this with fNextToken[0]=='(', that is, the open paren
+// of the group.
+// It will allocate and return all characters up to and including the
+// corresponding closing paren, and leave the parser in the right place
+// afterwards.
+char* nsImapGenericParser::CreateParenGroup() {
+ NS_ASSERTION(fNextToken[0] == '(', "we don't have a paren group!");
+
+ int numOpenParens = 0;
+ AdvanceTokenizerStartingPoint(fNextToken - fLineOfTokens);
+
+ // Build up a buffer containing the paren group.
+ nsCString returnString;
+ char* parenGroupStart = fCurrentTokenPlaceHolder;
+ NS_ASSERTION(parenGroupStart[0] == '(', "we don't have a paren group (2)!");
+ while (*fCurrentTokenPlaceHolder) {
+ if (*fCurrentTokenPlaceHolder == '{') // literal
+ {
+ // Ensure it is a properly formatted literal.
+ NS_ASSERTION(!strcmp("}\r\n", fCurrentTokenPlaceHolder +
+ strlen(fCurrentTokenPlaceHolder) - 3),
+ "not a literal");
+
+ // Append previous characters and the "{xx}\r\n" to buffer.
+ returnString.Append(parenGroupStart);
+
+ // Append literal itself.
+ AdvanceToNextToken();
+ if (!ContinueParse()) break;
+ char* lit = CreateLiteral();
+ NS_ASSERTION(lit, "syntax error or out of memory");
+ if (!lit) break;
+ returnString.Append(lit);
+ PR_Free(lit);
+ if (!ContinueParse()) break;
+ parenGroupStart = fCurrentTokenPlaceHolder;
+ } else if (*fCurrentTokenPlaceHolder == '"') // quoted
+ {
+ // Append the _escaped_ version of the quoted string:
+ // just skip it (because the quoted string must be on the same line).
+ AdvanceToNextToken();
+ if (!ContinueParse()) break;
+ char* q = CreateQuoted();
+ if (!q) break;
+ PR_Free(q);
+ if (!ContinueParse()) break;
+ } else {
+ // Append this character to the buffer.
+ char c = *fCurrentTokenPlaceHolder++;
+ if (c == '(')
+ numOpenParens++;
+ else if (c == ')') {
+ numOpenParens--;
+ if (numOpenParens == 0) break;
+ }
+ }
+ }
+
+ if (numOpenParens != 0 || !ContinueParse()) {
+ SetSyntaxError(true, "closing ')' not found in paren group");
+ return nullptr;
+ }
+
+ returnString.Append(parenGroupStart,
+ fCurrentTokenPlaceHolder - parenGroupStart);
+ AdvanceToNextToken();
+ return ToNewCString(returnString);
+}