summaryrefslogtreecommitdiffstats
path: root/comm/mailnews/search/src/nsMsgBodyHandler.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'comm/mailnews/search/src/nsMsgBodyHandler.cpp')
-rw-r--r--comm/mailnews/search/src/nsMsgBodyHandler.cpp464
1 files changed, 464 insertions, 0 deletions
diff --git a/comm/mailnews/search/src/nsMsgBodyHandler.cpp b/comm/mailnews/search/src/nsMsgBodyHandler.cpp
new file mode 100644
index 0000000000..5b77750f63
--- /dev/null
+++ b/comm/mailnews/search/src/nsMsgBodyHandler.cpp
@@ -0,0 +1,464 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "msgCore.h"
+#include "nsMsgSearchCore.h"
+#include "nsMsgUtils.h"
+#include "nsMsgBodyHandler.h"
+#include "nsMsgSearchTerm.h"
+#include "nsIMsgHdr.h"
+#include "nsMsgMessageFlags.h"
+#include "nsISeekableStream.h"
+#include "nsIInputStream.h"
+#include "nsIFile.h"
+#include "plbase64.h"
+#include "prmem.h"
+#include "nsMimeTypes.h"
+
+nsMsgBodyHandler::nsMsgBodyHandler(nsIMsgSearchScopeTerm* scope,
+ uint32_t numLines, nsIMsgDBHdr* msg,
+ nsIMsgDatabase* db) {
+ m_scope = scope;
+ m_numLocalLines = numLines;
+ uint32_t flags;
+ m_lineCountInBodyLines = NS_SUCCEEDED(msg->GetFlags(&flags))
+ ? !(flags & nsMsgMessageFlags::Offline)
+ : true;
+ // account for added x-mozilla-status lines, and envelope line.
+ if (!m_lineCountInBodyLines) m_numLocalLines += 3;
+ m_msgHdr = msg;
+ m_db = db;
+
+ // the following are variables used when the body handler is handling stuff
+ // from filters....through this constructor, that is not the case so we set
+ // them to NULL.
+ m_headers = nullptr;
+ m_headersSize = 0;
+ m_Filtering = false; // make sure we set this before we call initialize...
+
+ Initialize(); // common initialization stuff
+ OpenLocalFolder();
+}
+
+nsMsgBodyHandler::nsMsgBodyHandler(nsIMsgSearchScopeTerm* scope,
+ uint32_t numLines, nsIMsgDBHdr* msg,
+ nsIMsgDatabase* db, const char* headers,
+ uint32_t headersSize, bool Filtering) {
+ m_scope = scope;
+ m_numLocalLines = numLines;
+ uint32_t flags;
+ m_lineCountInBodyLines = NS_SUCCEEDED(msg->GetFlags(&flags))
+ ? !(flags & nsMsgMessageFlags::Offline)
+ : true;
+ // account for added x-mozilla-status lines, and envelope line.
+ if (!m_lineCountInBodyLines) m_numLocalLines += 3;
+ m_msgHdr = msg;
+ m_db = db;
+ m_headers = nullptr;
+ m_headersSize = 0;
+ m_Filtering = Filtering;
+
+ Initialize();
+
+ if (m_Filtering) {
+ m_headers = headers;
+ m_headersSize = headersSize;
+ } else {
+ OpenLocalFolder();
+ }
+}
+
+void nsMsgBodyHandler::Initialize()
+// common initialization code regardless of what body type we are handling...
+{
+ // Default transformations for local message search and MAPI access
+ m_stripHeaders = true;
+ m_partIsHtml = false;
+ m_base64part = false;
+ m_partIsQP = false;
+ m_isMultipart = false;
+ m_partIsText = true; // Default is text/plain, maybe proven otherwise later.
+ m_pastMsgHeaders = false;
+ m_pastPartHeaders = false;
+ m_inMessageAttachment = false;
+ m_headerBytesRead = 0;
+}
+
+nsMsgBodyHandler::~nsMsgBodyHandler() {}
+
+int32_t nsMsgBodyHandler::GetNextLine(nsCString& buf, nsCString& charset) {
+ int32_t length = -1; // length of incoming line or -1 eof
+ int32_t outLength = -1; // length of outgoing line or -1 eof
+ bool eatThisLine = true;
+ nsAutoCString nextLine;
+
+ while (eatThisLine) {
+ // first, handle the filtering case...this is easy....
+ if (m_Filtering) {
+ length = GetNextFilterLine(nextLine);
+ } else {
+ // 3 cases: Offline IMAP, POP, or we are dealing with a news message....
+ // Offline cases should be same as local mail cases, since we're going
+ // to store offline messages in berkeley format folders.
+ if (m_db) {
+ length = GetNextLocalLine(nextLine); // (2) POP
+ }
+ }
+
+ if (length < 0) break; // eof in
+
+ outLength = ApplyTransformations(nextLine, length, eatThisLine, buf);
+ }
+
+ if (outLength < 0) return -1; // eof out
+
+ // For non-multipart messages, the entire message minus headers is encoded
+ // ApplyTransformations can only decode a part
+ if (!m_isMultipart && m_base64part) {
+ Base64Decode(buf);
+ m_base64part = false;
+ // And reapply our transformations...
+ outLength = ApplyTransformations(buf, buf.Length(), eatThisLine, buf);
+ }
+
+ // Process aggregated HTML.
+ if (!m_isMultipart && m_partIsHtml) {
+ StripHtml(buf);
+ outLength = buf.Length();
+ }
+
+ charset = m_partCharset;
+ return outLength;
+}
+
+void nsMsgBodyHandler::OpenLocalFolder() {
+ nsCOMPtr<nsIInputStream> inputStream;
+ nsresult rv = m_scope->GetInputStream(m_msgHdr, getter_AddRefs(inputStream));
+ // Warn and return if GetInputStream fails
+ NS_ENSURE_SUCCESS_VOID(rv);
+ m_fileLineStream = do_QueryInterface(inputStream);
+}
+
+int32_t nsMsgBodyHandler::GetNextFilterLine(nsCString& buf) {
+ // m_nextHdr always points to the next header in the list....the list is NULL
+ // terminated...
+ uint32_t numBytesCopied = 0;
+ if (m_headersSize > 0) {
+ // #mscott. Ugly hack! filter headers list have CRs & LFs inside the NULL
+ // delimited list of header strings. It is possible to have: To NULL CR LF
+ // From. We want to skip over these CR/LFs if they start at the beginning of
+ // what we think is another header.
+
+ while (m_headersSize > 0 && (m_headers[0] == '\r' || m_headers[0] == '\n' ||
+ m_headers[0] == ' ' || m_headers[0] == '\0')) {
+ m_headers++; // skip over these chars...
+ m_headersSize--;
+ }
+
+ if (m_headersSize > 0) {
+ numBytesCopied = strlen(m_headers) + 1;
+ buf.Assign(m_headers);
+ m_headers += numBytesCopied;
+ // be careful...m_headersSize is unsigned. Don't let it go negative or we
+ // overflow to 2^32....*yikes*
+ if (m_headersSize < numBytesCopied)
+ m_headersSize = 0;
+ else
+ m_headersSize -= numBytesCopied; // update # bytes we have read from
+ // the headers list
+
+ return (int32_t)numBytesCopied;
+ }
+ } else if (m_headersSize == 0) {
+ buf.Truncate();
+ }
+ return -1;
+}
+
+// return -1 if no more local lines, length of next line otherwise.
+
+int32_t nsMsgBodyHandler::GetNextLocalLine(nsCString& buf)
+// returns number of bytes copied
+{
+ if (m_numLocalLines) {
+ // I the line count is in body lines, only decrement once we have
+ // processed all the headers. Otherwise the line is not in body
+ // lines and we want to decrement for every line.
+ if (m_pastMsgHeaders || !m_lineCountInBodyLines) m_numLocalLines--;
+ // do we need to check the return value here?
+ if (m_fileLineStream) {
+ bool more = false;
+ nsresult rv = m_fileLineStream->ReadLine(buf, &more);
+ if (NS_SUCCEEDED(rv)) return buf.Length();
+ }
+ }
+
+ return -1;
+}
+
+/**
+ * This method applies a sequence of transformations to the line.
+ *
+ * It applies the following sequences in order
+ * * Removes headers if the searcher doesn't want them
+ * (sets m_past*Headers)
+ * * Determines the current MIME type.
+ * (via SniffPossibleMIMEHeader)
+ * * Strips any HTML if the searcher doesn't want it
+ * * Strips non-text parts
+ * * Decodes any base64 part
+ * (resetting part variables: m_base64part, m_pastPartHeaders, m_partIsHtml,
+ * m_partIsText)
+ *
+ * @param line (in) the current line
+ * @param length (in) the length of said line
+ * @param eatThisLine (out) whether or not to ignore this line
+ * @param buf (inout) if m_base64part, the current part as needed for
+ * decoding; else, it is treated as an out param (a
+ * redundant version of line).
+ * @return the length of the line after applying transformations
+ */
+int32_t nsMsgBodyHandler::ApplyTransformations(const nsCString& line,
+ int32_t length,
+ bool& eatThisLine,
+ nsCString& buf) {
+ eatThisLine = false;
+
+ if (!m_pastPartHeaders) // line is a line from the part headers
+ {
+ if (m_stripHeaders) eatThisLine = true;
+
+ // We have already grabbed all worthwhile information from the headers,
+ // so there is no need to keep track of the current lines
+ buf.Assign(line);
+
+ SniffPossibleMIMEHeader(buf);
+
+ if (buf.IsEmpty() || buf.First() == '\r' || buf.First() == '\n') {
+ if (!m_inMessageAttachment) {
+ m_pastPartHeaders = true;
+ } else {
+ // We're in a message attachment and have just read past the
+ // part header for the attached message. We now need to read
+ // the message headers and any part headers.
+ // We can now forget about the special handling of attached messages.
+ m_inMessageAttachment = false;
+ }
+ }
+
+ // We set m_pastMsgHeaders to 'true' only once.
+ if (m_pastPartHeaders) m_pastMsgHeaders = true;
+
+ return length;
+ }
+
+ // Check to see if this is one of our boundary strings.
+ bool matchedBoundary = false;
+ if (m_isMultipart && m_boundaries.Length() > 0) {
+ for (int32_t i = (int32_t)m_boundaries.Length() - 1; i >= 0; i--) {
+ if (StringBeginsWith(line, m_boundaries[i])) {
+ matchedBoundary = true;
+ // If we matched a boundary, we won't need the nested/later ones any
+ // more.
+ m_boundaries.SetLength(i + 1);
+ break;
+ }
+ }
+ }
+ if (matchedBoundary) {
+ if (m_base64part && m_partIsText) {
+ Base64Decode(buf);
+ // Work on the parsed string
+ if (!buf.Length()) {
+ NS_WARNING("Trying to transform an empty buffer");
+ eatThisLine = true;
+ } else {
+ // It is wrong to call ApplyTransformations() here since this will
+ // lead to the buffer being doubled-up at |buf.Append(line);|
+ // below. ApplyTransformations(buf, buf.Length(), eatThisLine, buf);
+ // Avoid spurious failures
+ eatThisLine = false;
+ }
+ } else if (!m_partIsHtml) {
+ buf.Truncate();
+ eatThisLine = true; // We have no content...
+ }
+
+ if (m_partIsHtml) {
+ StripHtml(buf);
+ }
+
+ // Reset all assumed headers
+ m_base64part = false;
+ // Get ready to sniff new part headers, but do not reset m_pastMsgHeaders
+ // since it will screw the body line count.
+ m_pastPartHeaders = false;
+ m_partIsHtml = false;
+ // If we ever see a multipart message, each part needs to set
+ // 'm_partIsText', so no more defaulting to 'true' when the part is done.
+ m_partIsText = false;
+
+ // Note: we cannot reset 'm_partIsQP' yet since we still need it to process
+ // the last buffer returned here. Parsing the next part will set a new
+ // value.
+ return buf.Length();
+ }
+
+ if (!m_partIsText) {
+ // Ignore non-text parts
+ buf.Truncate();
+ eatThisLine = true;
+ return 0;
+ }
+
+ // Accumulate base64 parts and HTML parts for later decoding or tag stripping.
+ if (m_base64part || m_partIsHtml) {
+ if (m_partIsHtml && !m_base64part) {
+ size_t bufLength = buf.Length();
+ if (!m_partIsQP || bufLength == 0 || !StringEndsWith(buf, "="_ns)) {
+ // Replace newline in HTML with a space.
+ buf.Append(' ');
+ } else {
+ // Strip the soft line break.
+ buf.SetLength(bufLength - 1);
+ }
+ }
+ buf.Append(line);
+ eatThisLine = true;
+ return buf.Length();
+ }
+
+ buf.Assign(line);
+ return buf.Length();
+}
+
+void nsMsgBodyHandler::StripHtml(nsCString& pBufInOut) {
+ char* pBuf = (char*)PR_Malloc(pBufInOut.Length() + 1);
+ if (pBuf) {
+ char* pWalk = pBuf;
+
+ char* pWalkInOut = (char*)pBufInOut.get();
+ bool inTag = false;
+ while (*pWalkInOut) // throw away everything inside < >
+ {
+ if (!inTag) {
+ if (*pWalkInOut == '<')
+ inTag = true;
+ else
+ *pWalk++ = *pWalkInOut;
+ } else {
+ if (*pWalkInOut == '>') inTag = false;
+ }
+ pWalkInOut++;
+ }
+ *pWalk = 0; // null terminator
+
+ pBufInOut.Adopt(pBuf);
+ }
+}
+
+/**
+ * Determines the MIME type, if present, from the current line.
+ *
+ * m_partIsHtml, m_isMultipart, m_partIsText, m_base64part, and boundary are
+ * all set by this method at various points in time.
+ *
+ * @param line (in) a header line that may contain a MIME header
+ */
+void nsMsgBodyHandler::SniffPossibleMIMEHeader(const nsCString& line) {
+ // Some parts of MIME are case-sensitive and other parts are case-insensitive;
+ // specifically, the headers are all case-insensitive and the values we care
+ // about are also case-insensitive, with the sole exception of the boundary
+ // string, so we can't just take the input line and make it lower case.
+ nsCString lowerCaseLine(line);
+ ToLowerCase(lowerCaseLine);
+
+ if (StringBeginsWith(lowerCaseLine, "content-transfer-encoding:"_ns))
+ m_partIsQP = lowerCaseLine.Find("quoted-printable") != kNotFound;
+
+ if (StringBeginsWith(lowerCaseLine, "content-type:"_ns)) {
+ if (lowerCaseLine.LowerCaseFindASCII("text/html") != kNotFound) {
+ m_partIsText = true;
+ m_partIsHtml = true;
+ } else if (lowerCaseLine.Find("multipart/") != kNotFound) {
+ if (m_isMultipart) {
+ // Nested multipart, get ready for new headers.
+ m_base64part = false;
+ m_partIsQP = false;
+ m_pastPartHeaders = false;
+ m_partIsHtml = false;
+ m_partIsText = false;
+ }
+ m_isMultipart = true;
+ m_partCharset.Truncate();
+ } else if (lowerCaseLine.Find("message/") != kNotFound) {
+ // Initialise again.
+ m_base64part = false;
+ m_partIsQP = false;
+ m_pastPartHeaders = false;
+ m_partIsHtml = false;
+ m_partIsText =
+ true; // Default is text/plain, maybe proven otherwise later.
+ m_inMessageAttachment = true;
+ } else if (lowerCaseLine.Find("text/") != kNotFound)
+ m_partIsText = true;
+ else if (lowerCaseLine.Find("text/") == kNotFound)
+ m_partIsText = false; // We have disproven our assumption.
+ }
+
+ int32_t start;
+ if (m_isMultipart && (start = lowerCaseLine.Find("boundary=")) != kNotFound) {
+ start += 9; // strlen("boundary=")
+ if (line[start] == '\"') start++;
+ int32_t end = line.RFindChar('\"');
+ if (end == -1) end = line.Length();
+
+ // Collect all boundaries. Since we only react to crossing a boundary,
+ // we can simply collect the boundaries instead of forming a tree
+ // structure from the message. Keep it simple ;-)
+ nsCString boundary;
+ boundary.AssignLiteral("--");
+ boundary.Append(Substring(line, start, end - start));
+ if (!m_boundaries.Contains(boundary)) m_boundaries.AppendElement(boundary);
+ }
+
+ if (m_isMultipart && (start = lowerCaseLine.Find("charset=")) != kNotFound) {
+ start += 8; // strlen("charset=")
+ bool foundQuote = false;
+ if (line[start] == '\"') {
+ start++;
+ foundQuote = true;
+ }
+ int32_t end = line.FindChar(foundQuote ? '\"' : ';', start);
+ if (end == -1) end = line.Length();
+
+ m_partCharset.Assign(Substring(line, start, end - start));
+ }
+
+ if (StringBeginsWith(lowerCaseLine, "content-transfer-encoding:"_ns) &&
+ lowerCaseLine.LowerCaseFindASCII(ENCODING_BASE64) != kNotFound)
+ m_base64part = true;
+}
+
+/**
+ * Decodes the given base64 string.
+ *
+ * It returns its decoded string in its input.
+ *
+ * @param pBufInOut (inout) a buffer of the string
+ */
+void nsMsgBodyHandler::Base64Decode(nsCString& pBufInOut) {
+ char* decodedBody =
+ PL_Base64Decode(pBufInOut.get(), pBufInOut.Length(), nullptr);
+ if (decodedBody) {
+ // Replace CR LF with spaces.
+ char* q = decodedBody;
+ while (*q) {
+ if (*q == '\n' || *q == '\r') *q = ' ';
+ q++;
+ }
+ pBufInOut.Adopt(decodedBody);
+ }
+}