diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 17:32:43 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 17:32:43 +0000 |
commit | 6bf0a5cb5034a7e684dcc3500e841785237ce2dd (patch) | |
tree | a68f146d7fa01f0134297619fbe7e33db084e0aa /comm/mailnews/base/src/HeaderReader.h | |
parent | Initial commit. (diff) | |
download | thunderbird-6bf0a5cb5034a7e684dcc3500e841785237ce2dd.tar.xz thunderbird-6bf0a5cb5034a7e684dcc3500e841785237ce2dd.zip |
Adding upstream version 1:115.7.0.upstream/1%115.7.0upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'comm/mailnews/base/src/HeaderReader.h')
-rw-r--r-- | comm/mailnews/base/src/HeaderReader.h | 305 |
1 files changed, 305 insertions, 0 deletions
diff --git a/comm/mailnews/base/src/HeaderReader.h b/comm/mailnews/base/src/HeaderReader.h new file mode 100644 index 0000000000..da8defbbe0 --- /dev/null +++ b/comm/mailnews/base/src/HeaderReader.h @@ -0,0 +1,305 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +#ifndef HeaderReader_h__ +#define HeaderReader_h__ + +#include <algorithm> +#include "LineReader.h" +#include "nsMsgUtils.h" +#include "nsString.h" +#include "mozilla/Span.h" + +/** + * HeaderReader parses mail headers from a buffer. + * The input is fed in via Parse(), and a callback function is invoked for + * each header encountered. + * + * General goals: + * + * - Incremental. Parse() can be called multiple times as a buffer grows. + * - Works in-place. Headers are returned as byte ranges within the data. + * - Works with a partial header block (e.g. sniffing the first N bytes + * of a message file). It won't mistakenly emit an incomplete header. + * - Track exact byte offsets for values, to support rewriting headers in + * place. This is needed to support X-Mozilla-Status et al. + * - Avoids copying data where possible. + * - Callback is inlined. + * - Callback can halt processing (by returning false). + * - Tolerant of real-world oddness in input data (for now, we just skip + * lines which don't make sense). + * + * Example usage: + * nsCString raw = "To: Alice\r\nFrom: Bob\r\n\r\n...Message body..."_ns; + * auto cb = [&](HeaderReader::Header const& hdr) { + * printf("-> '%s':'%s'\n", hdr.Name(raw), hdr.Value(raw)); + * return true; + * }; + * + * HeaderReader rdr; + * rdr.Parse(raw, cb); + * // -> 'To':'Alice' + * // -> 'From':'Bob' + * + * See TestHeaderReader.cpp for more examples. + */ +class HeaderReader { + public: + /** + * Parse() scans an input buffer and invokes a callback for each complete + * header found. + * + * It can be called any number of times - it'll pick up where it left off. + * The idea is that the caller can accumulate data in multiple chunks and + * call Parse() to extract headers incrementally as they come in. + * It does rely on data being a single contiguous allocation, but it + * doesn't require the data being located in the same memory location + * each time. So can it can be safely used on a growable buffer. + * + * Signature of callback is: + * bool hdrCallback(HeaderReader::Hdr const& hdr); + * + * The callback should return true to continue parsing, or false to halt. + * This allows, for example, an early-out if you're scanning for one + * specific header and don't care about the rest. + * + * Parse() stops when one of these conditions is true: + * 1. The end of the header block is reached (the final blank line marker + * is consumed). Subsequent calls to IsComplete() will return true. + * 2. The callback returns false. If Parse() is called again, it will + * safely pick up where it left off. + * 3. No more headers can be read. There may be some unconsumed data + * returned (eg a partial line). Parse() can be safely called again + * when more data becomes available. It will resume from the point it + * reached previously. + * + * It is safe to call Parse() on a truncated header block. It will only + * invoke the callback for headers which are unambiguously complete. + * + * @param data - bytes containing the header block to parse. + * @param hdrCallback - callback to invoke for each header found + * + * @returns a span containing the unconsumed (leftover) data. + */ + template <typename HeaderFn> + mozilla::Span<const char> Parse(mozilla::Span<const char> data, + HeaderFn hdrCallback); + + /** + * Complete() returns true if the header block has been fully parsed. + * Further calls to Parse() will consume no more data. + * The blank line which separates the header block from the body is consumed. + */ + bool IsComplete() const { return mFinished; } + + /** + * Hdr holds offsets to a name/value pair within a header block. + * The name starts at pos. + * The value starts at pos+rawValOffset. + */ + struct Hdr { + uint32_t pos{0}; // Start position of header within the block. + uint32_t len{0}; // Length of entire header, including final EOL. + uint32_t nameLen{0}; // Length of name. + uint32_t rawValOffset{0}; // Where the value starts, relative to pos. + uint32_t rawValLen{0}; // Excludes final EOL. + bool IsEmpty() const { return len == 0; } + + /** + * Access the header name as a string. + * + * @param data - the data originally passed into Parse(). + * @returns the name within data, wrapped for string access (so it is + * valid only as long as data is valid). + */ + nsDependentCSubstring Name(mozilla::Span<const char> data) const { + return nsDependentCSubstring(data.Elements() + pos, nameLen); + } + /** + * Access the raw value as a string. + * + * @param data - the data originally passed into Parse(). + * @returns the raw data, EOLs and all, wrapped for string access (so it + * is valid only as long as data is valid). + */ + nsDependentCSubstring RawValue(mozilla::Span<const char> data) const { + return nsDependentCSubstring(data.Elements() + pos + rawValOffset, + rawValLen); + } + /** + * Decode the 'cooked' value into a string. + * NOTE: handles unfolding multi-line values. No attempt (yet) at dealing + * with comments or quoted strings... + * + * @param data - the data originally passed into Parse(). + * @returns a new string containing the value. + */ + nsCString Value(mozilla::Span<const char> data) const { + nsCString val(RawValue(data)); + val.ReplaceSubstring("\r\n"_ns, ""_ns); + val.ReplaceSubstring("\n"_ns, ""_ns); + return val; + } + + /** + * EOL() returns a string containing the eol characters at the end of the + * header. It will be "\n" or "\r\n". + * Calling this on an empty hdr struct is unsupported. + */ + nsDependentCSubstring EOL(mozilla::Span<const char> data) const { + MOZ_ASSERT(len >= 2); // Empty or malformed? + + uint32_t i = pos + len; + int n = 0; + if (data[i - 1] == '\n') { + ++n; + if (data[i - 2] == '\r') { + ++n; + } + } + return nsDependentCSubstring(data.Elements() + pos + len - n, n); + } + }; + + private: + // How far Parse() has gone so far. + uint32_t mPos{0}; + + // The current header we're accumulating. + Hdr mHdr; + + // Number of EOL chars at the end of previous line (so we can strip it if the + // next line is folded). + int mEOLSize{0}; + + // Set when end of header block detected. + bool mFinished{false}; + + template <typename HeaderFn> + bool HandleLine(mozilla::Span<const char> line, HeaderFn hdrCallback); +}; + +// Parse() implementation. +template <typename HeaderFn> +mozilla::Span<const char> HeaderReader::Parse(mozilla::Span<const char> data, + HeaderFn hdrCallback) { + // If were're resuming, skip what we've already scanned. + auto remaining = mozilla::Span<const char>(data.cbegin() + mPos, data.cend()); + if (mFinished) { + return remaining; + } + // Iterate over all the lines of our input. + remaining = SplitLines(remaining, + [this, hdrCallback](mozilla::Span<const char> line) { + return HandleLine(line, hdrCallback); + }); + + if (!mFinished) { + // We didn't get to the end of the header block, but we may still be + // able to finalise a previously-started header... + if (!mHdr.IsEmpty()) { + if (remaining.Length() > 0 && remaining[0] != ' ' && + remaining[0] != '\t') { + // Next line isn't folded, so we know the header is complete. + mHdr.rawValLen -= mEOLSize; + hdrCallback(mHdr); + } else { + // Can't tell if header is complete. Rewind and try again next time. + mPos = mHdr.pos; + remaining = + mozilla::Span<const char>(data.cbegin() + mPos, data.cend()); + } + mHdr = Hdr(); + } + } + return remaining; +} + +// Helper function - we call this on each complete line we encounter. +template <typename HeaderFn> +bool HeaderReader::HandleLine(mozilla::Span<const char> line, + HeaderFn hdrCallback) { + // Should never be here if we've finished. + MOZ_ASSERT(!mFinished); + // we should _never_ see empty strings. + MOZ_ASSERT(!line.IsEmpty()); + + // Find the EOL sequence (CRLF or LF). + auto eol = line.cend(); + auto p = eol; + if (p > line.cbegin() && *(p - 1) == '\n') { + --eol; + if ((p - 1) > line.cbegin() && *(p - 2) == '\r') { + --eol; + } + } + // We should never have been called with a non-terminated line. + MOZ_ASSERT(eol != line.cend()); + + // Blank line indicates end of header block. + if (eol == line.cbegin()) { + if (!mHdr.IsEmpty()) { + // Emit the completed header. + mHdr.rawValLen -= mEOLSize; + hdrCallback(mHdr); + mHdr = Hdr(); + } + mFinished = true; + mPos += line.Length(); + return false; // Stop. + } + + // A folded line? + // Leading space or tab indicates continuation of previous value. + if (line[0] == ' ' || line[0] == '\t') { + if (!mHdr.IsEmpty()) { + // Grow the existing header. + mHdr.len += line.Length(); + mHdr.rawValLen += line.Length(); + mEOLSize = line.cend() - eol; + } else { + // UHOH - a folded value but we haven't started a header... + // Not much we can do, so we'll just ignore the line. + NS_WARNING("Malformed header (bare continuation)"); + } + mPos += line.Length(); + return true; // Next line, please. + } + + bool keepGoing = true; + // By now, we're expecting a "name: value" line, to start a fresh header. + if (!mHdr.IsEmpty()) { + // Flush previous header now we know it's complete. + mHdr.rawValLen -= mEOLSize; + keepGoing = hdrCallback(mHdr); + mHdr = Hdr(); + } + + auto colon = std::find(line.cbegin(), line.cend(), ':'); + if (colon == line.cend()) { + // UHOH. We were expecting a "name: value" line, but didn't find one. + // Just ignore this line. + NS_WARNING("Malformed header (expected 'name: value')"); + mPos += line.Length(); + return keepGoing; + } + auto val = colon + 1; + if (*val == ' ' || *val == '\t') { + // Skip single leading whitespace. + ++val; + } + + // Start filling out the new header (it may grow if folded lines come next). + mHdr.pos = mPos; + mHdr.len = line.Length(); + mHdr.nameLen = colon - line.cbegin(); + + mHdr.rawValOffset = val - line.cbegin(); + mHdr.rawValLen = line.cend() - val; + mEOLSize = line.cend() - eol; + mPos += line.Length(); + return keepGoing; +} + +#endif |