From 6bf0a5cb5034a7e684dcc3500e841785237ce2dd Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sun, 7 Apr 2024 19:32:43 +0200 Subject: Adding upstream version 1:115.7.0. Signed-off-by: Daniel Baumann --- comm/mailnews/base/src/LineReader.h | 188 ++++++++++++++++++++++++++++++++++++ 1 file changed, 188 insertions(+) create mode 100644 comm/mailnews/base/src/LineReader.h (limited to 'comm/mailnews/base/src/LineReader.h') diff --git a/comm/mailnews/base/src/LineReader.h b/comm/mailnews/base/src/LineReader.h new file mode 100644 index 0000000000..292c7ced7c --- /dev/null +++ b/comm/mailnews/base/src/LineReader.h @@ -0,0 +1,188 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +#ifndef LineReader_h__ +#define LineReader_h__ + +#include +#include "mozilla/Span.h" +#include "mozilla/Vector.h" + +/** + * FirstLine() returns the first line of a span. + * The EOL sequence (CRLF or LF) is included in the returned line. + * If no lines are found an empty span is returned. + */ +inline mozilla::Span FirstLine( + mozilla::Span const& data) { + auto eol = std::find(data.cbegin(), data.cend(), '\n'); + if (eol == data.cend()) { + // no line ending found - return empty span. + return data.First(0); + } + ++eol; + return mozilla::Span(data.cbegin(), eol); +} + +/** + * LineReader breaks up continuous character streams into lines. + * Data is fed in by calling Feed() as often as required, and a + * callback function is invoked to handle each resulting line. + * + * The resulting lines include the end-of-line char(s), except for any + * non-terminated final line. + * LF ('\n') is used as the line terminator. CRLF-terminated lines will + * be handled correctly - the resultant lines will include the line + * terminators exactly as they appear in the input data. + * + * Goals for LineReader: + * - Byte exact. The bytes fed in will appear _exactly_ in the callback fn. + * - Callback can be inlined (due to templating). + * - Avoid copying data if possible. The internal buffer is only used when + * lines are split across incoming chunks of data. + * - Tries to avoid heap allocation. If the internal buffer is used, it'll + * only allocate memory for long lines (>80 chars). + * + * Example usage: + * + * auto callback = [](mozilla::Span line) { + * printf("%s\n", nsCString(line).get()); + * return true; + * }; + * + * LineReader c; + * c.Feed("Line 1\r\nLine 2\r\nLine 3", callback); + * // -> "Line 1\r\n" + * // -> "Line 2\r\n" + * c.Feed("\r\nLeftovers.", callback); + * // -> "Line 3\r\n" + * c.Flush(callback); + * // -> "Leftovers." + * + * See TestLineReader.cpp for more examples. + */ +class LineReader { + public: + /* + * Feed() takes in a chunk of data to be split up into lines. You can call + * this as often as required to feed in all your data. Don't forget to call + * Flush() after the last Feed(), in case the last line has no line endings! + * + * The callback will be invoked once for each full line extracted. + * It should have the form: + * The callback is of the form: + * bool callback(mozilla::Span line); + * + * The data in `line` should be considered valid only until the callback + * returns. So if the callback wants to retain data it needs to copy it. + * `line` will include any EOL character(s). + * The callback should return true to continue processing. + * If the callback returns false, processing will stop, even if there is + * more data available. + */ + template + void Feed(mozilla::Span data, LineFn callback) { + bool keepGoing = true; + while (!data.IsEmpty() && keepGoing) { + auto eol = std::find(data.cbegin(), data.cend(), '\n'); + if (eol == data.cend()) { + // No LF. Just collect and wait for more. + // TODO: limit maximum mBuf size, to stop maliciously-crafted input + // OOMing us? + if (!mBuf.append(data.data(), data.size())) { + NS_ERROR("OOM!"); + } + return; + } + + // Consume everything up to and including the LF. + ++eol; + mozilla::Span line(data.cbegin(), eol); + data = mozilla::Span(eol, data.cend()); + + if (mBuf.empty()) { + // Pass the data through directly, no copying. + keepGoing = callback(line); + } else { + // Complete the line we previously started. + if (!mBuf.append(line.data(), line.size())) { + NS_ERROR("OOM!"); + } + keepGoing = callback(mBuf); + mBuf.clear(); + } + } + } + + /* + * Flush() will invoke the callback with any leftover data, after the last + * Feed() call has completed. + * The line passed to the callback will be a partial line, without a final + * LF. If the input data has a final LF, there will be nothing to flush, + * and the callback will not be invoked. + */ + template + void Flush(LineFn callback) { + if (!mBuf.empty()) { + callback(mBuf); + mBuf.clear(); + } + } + + private: + // Growable buffer, to collect lines which come in as multiple parts. + // Can handle lines up to 80 chars before needing to reallocate. + mozilla::Vector mBuf; +}; + +/** + * SplitLines() invokes a callback for every complete line it finds in the + * input data. + * + * The callback is of the form: + * bool callback(mozilla::Span line); + * where line is a span pointing to the range of bytes in the input data + * which comprises the line. + * + * If the callback returns false, processing is halted. + * + * The lines passed to the callback include end-of-line (EOL) character(s). + * + * Lines are considered terminated by '\n' (LF) but this means CRLF-delimited + * data is also handled correctly. + * + * This function is byte-exact: if you concatenate all the line spans, along + * with the unconsumed data returned at the end, you'll end up with the exact + * same byte sequence as the original input data. + * + * @param data - The input bytes. + * @param callback - The callback to invoke for each line. + * + * @returns the unconsumed data. Usually this will be empty, or an incomplete + * line at the end (with no EOL). However if the callback returned + * false, all the unused data will be returned. + */ +template +mozilla::Span SplitLines(mozilla::Span data, + LineFn callback) { + while (!data.IsEmpty()) { + auto eol = std::find(data.cbegin(), data.cend(), '\n'); + if (eol == data.cend()) { + // No LF - we're done. May or may not be some leftover data. + break; + } + + // Consume everything up to and including the LF. + ++eol; + mozilla::Span line(data.cbegin(), eol); + data = mozilla::Span(eol, data.cend()); + + if (callback(line) == false) { + break; + } + } + return data; +} + +#endif -- cgit v1.2.3