Adding upstream version 1:115.7.0.upstream/1%115.7.0 upstream

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-07 17:32:43 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-07 17:32:43 +0000
commit: 6bf0a5cb5034a7e684dcc3500e841785237ce2dd (patch)
tree: a68f146d7fa01f0134297619fbe7e33db084e0aa /comm/mailnews/base/src/LineReader.h
parent: Initial commit. (diff)
download: thunderbird-upstream.tar.xz
thunderbird-upstream.zip
1 files changed, 188 insertions, 0 deletions
diff --git a/comm/mailnews/base/src/LineReader.h b/comm/mailnews/base/src/LineReader.h
new file mode 100644
index 0000000000..292c7ced7c
--- /dev/null
+++ b/comm/mailnews/base/src/LineReader.h
@@ -0,0 +1,188 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+#ifndef LineReader_h__
+#define LineReader_h__
+
+#include <algorithm>
+#include "mozilla/Span.h"
+#include "mozilla/Vector.h"
+
+/**
+ * FirstLine() returns the first line of a span.
+ * The EOL sequence (CRLF or LF) is included in the returned line.
+ * If no lines are found an empty span is returned.
+ */
+inline mozilla::Span<const char> FirstLine(
+    mozilla::Span<const char> const& data) {
+  auto eol = std::find(data.cbegin(), data.cend(), '\n');
+  if (eol == data.cend()) {
+    // no line ending found - return empty span.
+    return data.First(0);
+  }
+  ++eol;
+  return mozilla::Span<const char>(data.cbegin(), eol);
+}
+
+/**
+ * LineReader breaks up continuous character streams into lines.
+ * Data is fed in by calling Feed() as often as required, and a
+ * callback function is invoked to handle each resulting line.
+ *
+ * The resulting lines include the end-of-line char(s), except for any
+ * non-terminated final line.
+ * LF ('\n') is used as the line terminator. CRLF-terminated lines will
+ * be handled correctly - the resultant lines will include the line
+ * terminators exactly as they appear in the input data.
+ *
+ * Goals for LineReader:
+ * - Byte exact. The bytes fed in will appear _exactly_ in the callback fn.
+ * - Callback can be inlined (due to templating).
+ * - Avoid copying data if possible. The internal buffer is only used when
+ *   lines are split across incoming chunks of data.
+ * - Tries to avoid heap allocation. If the internal buffer is used, it'll
+ *   only allocate memory for long lines (>80 chars).
+ *
+ * Example usage:
+ *
+ *    auto callback = [](mozilla::Span<const char> line) {
+ *      printf("%s\n", nsCString(line).get());
+ *      return true;
+ *    };
+ *
+ *    LineReader c;
+ *    c.Feed("Line 1\r\nLine 2\r\nLine 3", callback);
+ *    // -> "Line 1\r\n"
+ *    // -> "Line 2\r\n"
+ *    c.Feed("\r\nLeftovers.", callback);
+ *    // -> "Line 3\r\n"
+ *    c.Flush(callback);
+ *    // -> "Leftovers."
+ *
+ * See TestLineReader.cpp for more examples.
+ */
+class LineReader {
+ public:
+  /*
+   * Feed() takes in a chunk of data to be split up into lines. You can call
+   * this as often as required to feed in all your data. Don't forget to call
+   * Flush() after the last Feed(), in case the last line has no line endings!
+   *
+   * The callback will be invoked once for each full line extracted.
+   * It should have the form:
+   * The callback is of the form:
+   *   bool callback(mozilla::Span<const char> line);
+   *
+   * The data in `line` should be considered valid only until the callback
+   * returns. So if the callback wants to retain data it needs to copy it.
+   * `line` will include any EOL character(s).
+   * The callback should return true to continue processing.
+   * If the callback returns false, processing will stop, even if there is
+   * more data available.
+   */
+  template <typename LineFn>
+  void Feed(mozilla::Span<const char> data, LineFn callback) {
+    bool keepGoing = true;
+    while (!data.IsEmpty() && keepGoing) {
+      auto eol = std::find(data.cbegin(), data.cend(), '\n');
+      if (eol == data.cend()) {
+        // No LF. Just collect and wait for more.
+        // TODO: limit maximum mBuf size, to stop maliciously-crafted input
+        // OOMing us?
+        if (!mBuf.append(data.data(), data.size())) {
+          NS_ERROR("OOM!");
+        }
+        return;
+      }
+
+      // Consume everything up to and including the LF.
+      ++eol;
+      mozilla::Span<const char> line(data.cbegin(), eol);
+      data = mozilla::Span<const char>(eol, data.cend());
+
+      if (mBuf.empty()) {
+        // Pass the data through directly, no copying.
+        keepGoing = callback(line);
+      } else {
+        // Complete the line we previously started.
+        if (!mBuf.append(line.data(), line.size())) {
+          NS_ERROR("OOM!");
+        }
+        keepGoing = callback(mBuf);
+        mBuf.clear();
+      }
+    }
+  }
+
+  /*
+   * Flush() will invoke the callback with any leftover data, after the last
+   * Feed() call has completed.
+   * The line passed to the callback will be a partial line, without a final
+   * LF. If the input data has a final LF, there will be nothing to flush,
+   * and the callback will not be invoked.
+   */
+  template <typename LineFn>
+  void Flush(LineFn callback) {
+    if (!mBuf.empty()) {
+      callback(mBuf);
+      mBuf.clear();
+    }
+  }
+
+ private:
+  // Growable buffer, to collect lines which come in as multiple parts.
+  // Can handle lines up to 80 chars before needing to reallocate.
+  mozilla::Vector<char, 80> mBuf;
+};
+
+/**
+ * SplitLines() invokes a callback for every complete line it finds in the
+ * input data.
+ *
+ * The callback is of the form:
+ *   bool callback(mozilla::Span<const char> line);
+ * where line is a span pointing to the range of bytes in the input data
+ * which comprises the line.
+ *
+ * If the callback returns false, processing is halted.
+ *
+ * The lines passed to the callback include end-of-line (EOL) character(s).
+ *
+ * Lines are considered terminated by '\n' (LF) but this means CRLF-delimited
+ * data is also handled correctly.
+ *
+ * This function is byte-exact: if you concatenate all the line spans, along
+ * with the unconsumed data returned at the end, you'll end up with the exact
+ * same byte sequence as the original input data.
+ *
+ * @param data - The input bytes.
+ * @param callback - The callback to invoke for each line.
+ *
+ * @returns the unconsumed data. Usually this will be empty, or an incomplete
+ *          line at the end (with no EOL). However if the callback returned
+ *          false, all the unused data will be returned.
+ */
+template <typename LineFn>
+mozilla::Span<const char> SplitLines(mozilla::Span<const char> data,
+                                     LineFn callback) {
+  while (!data.IsEmpty()) {
+    auto eol = std::find(data.cbegin(), data.cend(), '\n');
+    if (eol == data.cend()) {
+      // No LF - we're done. May or may not be some leftover data.
+      break;
+    }
+
+    // Consume everything up to and including the LF.
+    ++eol;
+    mozilla::Span<const char> line(data.cbegin(), eol);
+    data = mozilla::Span<const char>(eol, data.cend());
+
+    if (callback(line) == false) {
+      break;
+    }
+  }
+  return data;
+}
+
+#endif
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-07 17:32:43 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-07 17:32:43 +0000
commit	6bf0a5cb5034a7e684dcc3500e841785237ce2dd (patch)
tree	a68f146d7fa01f0134297619fbe7e33db084e0aa /comm/mailnews/base/src/LineReader.h
parent	Initial commit. (diff)
download	thunderbird-upstream.tar.xz thunderbird-upstream.zip