summaryrefslogtreecommitdiffstats
path: root/comm/mailnews/base/src/HeaderReader.h
diff options
context:
space:
mode:
Diffstat (limited to 'comm/mailnews/base/src/HeaderReader.h')
-rw-r--r--comm/mailnews/base/src/HeaderReader.h305
1 files changed, 305 insertions, 0 deletions
diff --git a/comm/mailnews/base/src/HeaderReader.h b/comm/mailnews/base/src/HeaderReader.h
new file mode 100644
index 0000000000..da8defbbe0
--- /dev/null
+++ b/comm/mailnews/base/src/HeaderReader.h
@@ -0,0 +1,305 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+#ifndef HeaderReader_h__
+#define HeaderReader_h__
+
+#include <algorithm>
+#include "LineReader.h"
+#include "nsMsgUtils.h"
+#include "nsString.h"
+#include "mozilla/Span.h"
+
+/**
+ * HeaderReader parses mail headers from a buffer.
+ * The input is fed in via Parse(), and a callback function is invoked for
+ * each header encountered.
+ *
+ * General goals:
+ *
+ * - Incremental. Parse() can be called multiple times as a buffer grows.
+ * - Works in-place. Headers are returned as byte ranges within the data.
+ * - Works with a partial header block (e.g. sniffing the first N bytes
+ * of a message file). It won't mistakenly emit an incomplete header.
+ * - Track exact byte offsets for values, to support rewriting headers in
+ * place. This is needed to support X-Mozilla-Status et al.
+ * - Avoids copying data where possible.
+ * - Callback is inlined.
+ * - Callback can halt processing (by returning false).
+ * - Tolerant of real-world oddness in input data (for now, we just skip
+ * lines which don't make sense).
+ *
+ * Example usage:
+ * nsCString raw = "To: Alice\r\nFrom: Bob\r\n\r\n...Message body..."_ns;
+ * auto cb = [&](HeaderReader::Header const& hdr) {
+ * printf("-> '%s':'%s'\n", hdr.Name(raw), hdr.Value(raw));
+ * return true;
+ * };
+ *
+ * HeaderReader rdr;
+ * rdr.Parse(raw, cb);
+ * // -> 'To':'Alice'
+ * // -> 'From':'Bob'
+ *
+ * See TestHeaderReader.cpp for more examples.
+ */
+class HeaderReader {
+ public:
+ /**
+ * Parse() scans an input buffer and invokes a callback for each complete
+ * header found.
+ *
+ * It can be called any number of times - it'll pick up where it left off.
+ * The idea is that the caller can accumulate data in multiple chunks and
+ * call Parse() to extract headers incrementally as they come in.
+ * It does rely on data being a single contiguous allocation, but it
+ * doesn't require the data being located in the same memory location
+ * each time. So can it can be safely used on a growable buffer.
+ *
+ * Signature of callback is:
+ * bool hdrCallback(HeaderReader::Hdr const& hdr);
+ *
+ * The callback should return true to continue parsing, or false to halt.
+ * This allows, for example, an early-out if you're scanning for one
+ * specific header and don't care about the rest.
+ *
+ * Parse() stops when one of these conditions is true:
+ * 1. The end of the header block is reached (the final blank line marker
+ * is consumed). Subsequent calls to IsComplete() will return true.
+ * 2. The callback returns false. If Parse() is called again, it will
+ * safely pick up where it left off.
+ * 3. No more headers can be read. There may be some unconsumed data
+ * returned (eg a partial line). Parse() can be safely called again
+ * when more data becomes available. It will resume from the point it
+ * reached previously.
+ *
+ * It is safe to call Parse() on a truncated header block. It will only
+ * invoke the callback for headers which are unambiguously complete.
+ *
+ * @param data - bytes containing the header block to parse.
+ * @param hdrCallback - callback to invoke for each header found
+ *
+ * @returns a span containing the unconsumed (leftover) data.
+ */
+ template <typename HeaderFn>
+ mozilla::Span<const char> Parse(mozilla::Span<const char> data,
+ HeaderFn hdrCallback);
+
+ /**
+ * Complete() returns true if the header block has been fully parsed.
+ * Further calls to Parse() will consume no more data.
+ * The blank line which separates the header block from the body is consumed.
+ */
+ bool IsComplete() const { return mFinished; }
+
+ /**
+ * Hdr holds offsets to a name/value pair within a header block.
+ * The name starts at pos.
+ * The value starts at pos+rawValOffset.
+ */
+ struct Hdr {
+ uint32_t pos{0}; // Start position of header within the block.
+ uint32_t len{0}; // Length of entire header, including final EOL.
+ uint32_t nameLen{0}; // Length of name.
+ uint32_t rawValOffset{0}; // Where the value starts, relative to pos.
+ uint32_t rawValLen{0}; // Excludes final EOL.
+ bool IsEmpty() const { return len == 0; }
+
+ /**
+ * Access the header name as a string.
+ *
+ * @param data - the data originally passed into Parse().
+ * @returns the name within data, wrapped for string access (so it is
+ * valid only as long as data is valid).
+ */
+ nsDependentCSubstring Name(mozilla::Span<const char> data) const {
+ return nsDependentCSubstring(data.Elements() + pos, nameLen);
+ }
+ /**
+ * Access the raw value as a string.
+ *
+ * @param data - the data originally passed into Parse().
+ * @returns the raw data, EOLs and all, wrapped for string access (so it
+ * is valid only as long as data is valid).
+ */
+ nsDependentCSubstring RawValue(mozilla::Span<const char> data) const {
+ return nsDependentCSubstring(data.Elements() + pos + rawValOffset,
+ rawValLen);
+ }
+ /**
+ * Decode the 'cooked' value into a string.
+ * NOTE: handles unfolding multi-line values. No attempt (yet) at dealing
+ * with comments or quoted strings...
+ *
+ * @param data - the data originally passed into Parse().
+ * @returns a new string containing the value.
+ */
+ nsCString Value(mozilla::Span<const char> data) const {
+ nsCString val(RawValue(data));
+ val.ReplaceSubstring("\r\n"_ns, ""_ns);
+ val.ReplaceSubstring("\n"_ns, ""_ns);
+ return val;
+ }
+
+ /**
+ * EOL() returns a string containing the eol characters at the end of the
+ * header. It will be "\n" or "\r\n".
+ * Calling this on an empty hdr struct is unsupported.
+ */
+ nsDependentCSubstring EOL(mozilla::Span<const char> data) const {
+ MOZ_ASSERT(len >= 2); // Empty or malformed?
+
+ uint32_t i = pos + len;
+ int n = 0;
+ if (data[i - 1] == '\n') {
+ ++n;
+ if (data[i - 2] == '\r') {
+ ++n;
+ }
+ }
+ return nsDependentCSubstring(data.Elements() + pos + len - n, n);
+ }
+ };
+
+ private:
+ // How far Parse() has gone so far.
+ uint32_t mPos{0};
+
+ // The current header we're accumulating.
+ Hdr mHdr;
+
+ // Number of EOL chars at the end of previous line (so we can strip it if the
+ // next line is folded).
+ int mEOLSize{0};
+
+ // Set when end of header block detected.
+ bool mFinished{false};
+
+ template <typename HeaderFn>
+ bool HandleLine(mozilla::Span<const char> line, HeaderFn hdrCallback);
+};
+
+// Parse() implementation.
+template <typename HeaderFn>
+mozilla::Span<const char> HeaderReader::Parse(mozilla::Span<const char> data,
+ HeaderFn hdrCallback) {
+ // If were're resuming, skip what we've already scanned.
+ auto remaining = mozilla::Span<const char>(data.cbegin() + mPos, data.cend());
+ if (mFinished) {
+ return remaining;
+ }
+ // Iterate over all the lines of our input.
+ remaining = SplitLines(remaining,
+ [this, hdrCallback](mozilla::Span<const char> line) {
+ return HandleLine(line, hdrCallback);
+ });
+
+ if (!mFinished) {
+ // We didn't get to the end of the header block, but we may still be
+ // able to finalise a previously-started header...
+ if (!mHdr.IsEmpty()) {
+ if (remaining.Length() > 0 && remaining[0] != ' ' &&
+ remaining[0] != '\t') {
+ // Next line isn't folded, so we know the header is complete.
+ mHdr.rawValLen -= mEOLSize;
+ hdrCallback(mHdr);
+ } else {
+ // Can't tell if header is complete. Rewind and try again next time.
+ mPos = mHdr.pos;
+ remaining =
+ mozilla::Span<const char>(data.cbegin() + mPos, data.cend());
+ }
+ mHdr = Hdr();
+ }
+ }
+ return remaining;
+}
+
+// Helper function - we call this on each complete line we encounter.
+template <typename HeaderFn>
+bool HeaderReader::HandleLine(mozilla::Span<const char> line,
+ HeaderFn hdrCallback) {
+ // Should never be here if we've finished.
+ MOZ_ASSERT(!mFinished);
+ // we should _never_ see empty strings.
+ MOZ_ASSERT(!line.IsEmpty());
+
+ // Find the EOL sequence (CRLF or LF).
+ auto eol = line.cend();
+ auto p = eol;
+ if (p > line.cbegin() && *(p - 1) == '\n') {
+ --eol;
+ if ((p - 1) > line.cbegin() && *(p - 2) == '\r') {
+ --eol;
+ }
+ }
+ // We should never have been called with a non-terminated line.
+ MOZ_ASSERT(eol != line.cend());
+
+ // Blank line indicates end of header block.
+ if (eol == line.cbegin()) {
+ if (!mHdr.IsEmpty()) {
+ // Emit the completed header.
+ mHdr.rawValLen -= mEOLSize;
+ hdrCallback(mHdr);
+ mHdr = Hdr();
+ }
+ mFinished = true;
+ mPos += line.Length();
+ return false; // Stop.
+ }
+
+ // A folded line?
+ // Leading space or tab indicates continuation of previous value.
+ if (line[0] == ' ' || line[0] == '\t') {
+ if (!mHdr.IsEmpty()) {
+ // Grow the existing header.
+ mHdr.len += line.Length();
+ mHdr.rawValLen += line.Length();
+ mEOLSize = line.cend() - eol;
+ } else {
+ // UHOH - a folded value but we haven't started a header...
+ // Not much we can do, so we'll just ignore the line.
+ NS_WARNING("Malformed header (bare continuation)");
+ }
+ mPos += line.Length();
+ return true; // Next line, please.
+ }
+
+ bool keepGoing = true;
+ // By now, we're expecting a "name: value" line, to start a fresh header.
+ if (!mHdr.IsEmpty()) {
+ // Flush previous header now we know it's complete.
+ mHdr.rawValLen -= mEOLSize;
+ keepGoing = hdrCallback(mHdr);
+ mHdr = Hdr();
+ }
+
+ auto colon = std::find(line.cbegin(), line.cend(), ':');
+ if (colon == line.cend()) {
+ // UHOH. We were expecting a "name: value" line, but didn't find one.
+ // Just ignore this line.
+ NS_WARNING("Malformed header (expected 'name: value')");
+ mPos += line.Length();
+ return keepGoing;
+ }
+ auto val = colon + 1;
+ if (*val == ' ' || *val == '\t') {
+ // Skip single leading whitespace.
+ ++val;
+ }
+
+ // Start filling out the new header (it may grow if folded lines come next).
+ mHdr.pos = mPos;
+ mHdr.len = line.Length();
+ mHdr.nameLen = colon - line.cbegin();
+
+ mHdr.rawValOffset = val - line.cbegin();
+ mHdr.rawValLen = line.cend() - val;
+ mEOLSize = line.cend() - eol;
+ mPos += line.Length();
+ return keepGoing;
+}
+
+#endif