summaryrefslogtreecommitdiffstats
path: root/comm/mailnews/base/src/LineReader.h
diff options
context:
space:
mode:
Diffstat (limited to 'comm/mailnews/base/src/LineReader.h')
-rw-r--r--comm/mailnews/base/src/LineReader.h188
1 files changed, 188 insertions, 0 deletions
diff --git a/comm/mailnews/base/src/LineReader.h b/comm/mailnews/base/src/LineReader.h
new file mode 100644
index 0000000000..292c7ced7c
--- /dev/null
+++ b/comm/mailnews/base/src/LineReader.h
@@ -0,0 +1,188 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+#ifndef LineReader_h__
+#define LineReader_h__
+
+#include <algorithm>
+#include "mozilla/Span.h"
+#include "mozilla/Vector.h"
+
+/**
+ * FirstLine() returns the first line of a span.
+ * The EOL sequence (CRLF or LF) is included in the returned line.
+ * If no lines are found an empty span is returned.
+ */
+inline mozilla::Span<const char> FirstLine(
+ mozilla::Span<const char> const& data) {
+ auto eol = std::find(data.cbegin(), data.cend(), '\n');
+ if (eol == data.cend()) {
+ // no line ending found - return empty span.
+ return data.First(0);
+ }
+ ++eol;
+ return mozilla::Span<const char>(data.cbegin(), eol);
+}
+
+/**
+ * LineReader breaks up continuous character streams into lines.
+ * Data is fed in by calling Feed() as often as required, and a
+ * callback function is invoked to handle each resulting line.
+ *
+ * The resulting lines include the end-of-line char(s), except for any
+ * non-terminated final line.
+ * LF ('\n') is used as the line terminator. CRLF-terminated lines will
+ * be handled correctly - the resultant lines will include the line
+ * terminators exactly as they appear in the input data.
+ *
+ * Goals for LineReader:
+ * - Byte exact. The bytes fed in will appear _exactly_ in the callback fn.
+ * - Callback can be inlined (due to templating).
+ * - Avoid copying data if possible. The internal buffer is only used when
+ * lines are split across incoming chunks of data.
+ * - Tries to avoid heap allocation. If the internal buffer is used, it'll
+ * only allocate memory for long lines (>80 chars).
+ *
+ * Example usage:
+ *
+ * auto callback = [](mozilla::Span<const char> line) {
+ * printf("%s\n", nsCString(line).get());
+ * return true;
+ * };
+ *
+ * LineReader c;
+ * c.Feed("Line 1\r\nLine 2\r\nLine 3", callback);
+ * // -> "Line 1\r\n"
+ * // -> "Line 2\r\n"
+ * c.Feed("\r\nLeftovers.", callback);
+ * // -> "Line 3\r\n"
+ * c.Flush(callback);
+ * // -> "Leftovers."
+ *
+ * See TestLineReader.cpp for more examples.
+ */
+class LineReader {
+ public:
+ /*
+ * Feed() takes in a chunk of data to be split up into lines. You can call
+ * this as often as required to feed in all your data. Don't forget to call
+ * Flush() after the last Feed(), in case the last line has no line endings!
+ *
+ * The callback will be invoked once for each full line extracted.
+ * It should have the form:
+ * The callback is of the form:
+ * bool callback(mozilla::Span<const char> line);
+ *
+ * The data in `line` should be considered valid only until the callback
+ * returns. So if the callback wants to retain data it needs to copy it.
+ * `line` will include any EOL character(s).
+ * The callback should return true to continue processing.
+ * If the callback returns false, processing will stop, even if there is
+ * more data available.
+ */
+ template <typename LineFn>
+ void Feed(mozilla::Span<const char> data, LineFn callback) {
+ bool keepGoing = true;
+ while (!data.IsEmpty() && keepGoing) {
+ auto eol = std::find(data.cbegin(), data.cend(), '\n');
+ if (eol == data.cend()) {
+ // No LF. Just collect and wait for more.
+ // TODO: limit maximum mBuf size, to stop maliciously-crafted input
+ // OOMing us?
+ if (!mBuf.append(data.data(), data.size())) {
+ NS_ERROR("OOM!");
+ }
+ return;
+ }
+
+ // Consume everything up to and including the LF.
+ ++eol;
+ mozilla::Span<const char> line(data.cbegin(), eol);
+ data = mozilla::Span<const char>(eol, data.cend());
+
+ if (mBuf.empty()) {
+ // Pass the data through directly, no copying.
+ keepGoing = callback(line);
+ } else {
+ // Complete the line we previously started.
+ if (!mBuf.append(line.data(), line.size())) {
+ NS_ERROR("OOM!");
+ }
+ keepGoing = callback(mBuf);
+ mBuf.clear();
+ }
+ }
+ }
+
+ /*
+ * Flush() will invoke the callback with any leftover data, after the last
+ * Feed() call has completed.
+ * The line passed to the callback will be a partial line, without a final
+ * LF. If the input data has a final LF, there will be nothing to flush,
+ * and the callback will not be invoked.
+ */
+ template <typename LineFn>
+ void Flush(LineFn callback) {
+ if (!mBuf.empty()) {
+ callback(mBuf);
+ mBuf.clear();
+ }
+ }
+
+ private:
+ // Growable buffer, to collect lines which come in as multiple parts.
+ // Can handle lines up to 80 chars before needing to reallocate.
+ mozilla::Vector<char, 80> mBuf;
+};
+
+/**
+ * SplitLines() invokes a callback for every complete line it finds in the
+ * input data.
+ *
+ * The callback is of the form:
+ * bool callback(mozilla::Span<const char> line);
+ * where line is a span pointing to the range of bytes in the input data
+ * which comprises the line.
+ *
+ * If the callback returns false, processing is halted.
+ *
+ * The lines passed to the callback include end-of-line (EOL) character(s).
+ *
+ * Lines are considered terminated by '\n' (LF) but this means CRLF-delimited
+ * data is also handled correctly.
+ *
+ * This function is byte-exact: if you concatenate all the line spans, along
+ * with the unconsumed data returned at the end, you'll end up with the exact
+ * same byte sequence as the original input data.
+ *
+ * @param data - The input bytes.
+ * @param callback - The callback to invoke for each line.
+ *
+ * @returns the unconsumed data. Usually this will be empty, or an incomplete
+ * line at the end (with no EOL). However if the callback returned
+ * false, all the unused data will be returned.
+ */
+template <typename LineFn>
+mozilla::Span<const char> SplitLines(mozilla::Span<const char> data,
+ LineFn callback) {
+ while (!data.IsEmpty()) {
+ auto eol = std::find(data.cbegin(), data.cend(), '\n');
+ if (eol == data.cend()) {
+ // No LF - we're done. May or may not be some leftover data.
+ break;
+ }
+
+ // Consume everything up to and including the LF.
+ ++eol;
+ mozilla::Span<const char> line(data.cbegin(), eol);
+ data = mozilla::Span<const char>(eol, data.cend());
+
+ if (callback(line) == false) {
+ break;
+ }
+ }
+ return data;
+}
+
+#endif