diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 17:32:43 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 17:32:43 +0000 |
commit | 6bf0a5cb5034a7e684dcc3500e841785237ce2dd (patch) | |
tree | a68f146d7fa01f0134297619fbe7e33db084e0aa /comm/mailnews/base/src/LineReader.h | |
parent | Initial commit. (diff) | |
download | thunderbird-upstream.tar.xz thunderbird-upstream.zip |
Adding upstream version 1:115.7.0.upstream/1%115.7.0upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
-rw-r--r-- | comm/mailnews/base/src/LineReader.h | 188 |
1 files changed, 188 insertions, 0 deletions
diff --git a/comm/mailnews/base/src/LineReader.h b/comm/mailnews/base/src/LineReader.h new file mode 100644 index 0000000000..292c7ced7c --- /dev/null +++ b/comm/mailnews/base/src/LineReader.h @@ -0,0 +1,188 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +#ifndef LineReader_h__ +#define LineReader_h__ + +#include <algorithm> +#include "mozilla/Span.h" +#include "mozilla/Vector.h" + +/** + * FirstLine() returns the first line of a span. + * The EOL sequence (CRLF or LF) is included in the returned line. + * If no lines are found an empty span is returned. + */ +inline mozilla::Span<const char> FirstLine( + mozilla::Span<const char> const& data) { + auto eol = std::find(data.cbegin(), data.cend(), '\n'); + if (eol == data.cend()) { + // no line ending found - return empty span. + return data.First(0); + } + ++eol; + return mozilla::Span<const char>(data.cbegin(), eol); +} + +/** + * LineReader breaks up continuous character streams into lines. + * Data is fed in by calling Feed() as often as required, and a + * callback function is invoked to handle each resulting line. + * + * The resulting lines include the end-of-line char(s), except for any + * non-terminated final line. + * LF ('\n') is used as the line terminator. CRLF-terminated lines will + * be handled correctly - the resultant lines will include the line + * terminators exactly as they appear in the input data. + * + * Goals for LineReader: + * - Byte exact. The bytes fed in will appear _exactly_ in the callback fn. + * - Callback can be inlined (due to templating). + * - Avoid copying data if possible. The internal buffer is only used when + * lines are split across incoming chunks of data. + * - Tries to avoid heap allocation. If the internal buffer is used, it'll + * only allocate memory for long lines (>80 chars). + * + * Example usage: + * + * auto callback = [](mozilla::Span<const char> line) { + * printf("%s\n", nsCString(line).get()); + * return true; + * }; + * + * LineReader c; + * c.Feed("Line 1\r\nLine 2\r\nLine 3", callback); + * // -> "Line 1\r\n" + * // -> "Line 2\r\n" + * c.Feed("\r\nLeftovers.", callback); + * // -> "Line 3\r\n" + * c.Flush(callback); + * // -> "Leftovers." + * + * See TestLineReader.cpp for more examples. + */ +class LineReader { + public: + /* + * Feed() takes in a chunk of data to be split up into lines. You can call + * this as often as required to feed in all your data. Don't forget to call + * Flush() after the last Feed(), in case the last line has no line endings! + * + * The callback will be invoked once for each full line extracted. + * It should have the form: + * The callback is of the form: + * bool callback(mozilla::Span<const char> line); + * + * The data in `line` should be considered valid only until the callback + * returns. So if the callback wants to retain data it needs to copy it. + * `line` will include any EOL character(s). + * The callback should return true to continue processing. + * If the callback returns false, processing will stop, even if there is + * more data available. + */ + template <typename LineFn> + void Feed(mozilla::Span<const char> data, LineFn callback) { + bool keepGoing = true; + while (!data.IsEmpty() && keepGoing) { + auto eol = std::find(data.cbegin(), data.cend(), '\n'); + if (eol == data.cend()) { + // No LF. Just collect and wait for more. + // TODO: limit maximum mBuf size, to stop maliciously-crafted input + // OOMing us? + if (!mBuf.append(data.data(), data.size())) { + NS_ERROR("OOM!"); + } + return; + } + + // Consume everything up to and including the LF. + ++eol; + mozilla::Span<const char> line(data.cbegin(), eol); + data = mozilla::Span<const char>(eol, data.cend()); + + if (mBuf.empty()) { + // Pass the data through directly, no copying. + keepGoing = callback(line); + } else { + // Complete the line we previously started. + if (!mBuf.append(line.data(), line.size())) { + NS_ERROR("OOM!"); + } + keepGoing = callback(mBuf); + mBuf.clear(); + } + } + } + + /* + * Flush() will invoke the callback with any leftover data, after the last + * Feed() call has completed. + * The line passed to the callback will be a partial line, without a final + * LF. If the input data has a final LF, there will be nothing to flush, + * and the callback will not be invoked. + */ + template <typename LineFn> + void Flush(LineFn callback) { + if (!mBuf.empty()) { + callback(mBuf); + mBuf.clear(); + } + } + + private: + // Growable buffer, to collect lines which come in as multiple parts. + // Can handle lines up to 80 chars before needing to reallocate. + mozilla::Vector<char, 80> mBuf; +}; + +/** + * SplitLines() invokes a callback for every complete line it finds in the + * input data. + * + * The callback is of the form: + * bool callback(mozilla::Span<const char> line); + * where line is a span pointing to the range of bytes in the input data + * which comprises the line. + * + * If the callback returns false, processing is halted. + * + * The lines passed to the callback include end-of-line (EOL) character(s). + * + * Lines are considered terminated by '\n' (LF) but this means CRLF-delimited + * data is also handled correctly. + * + * This function is byte-exact: if you concatenate all the line spans, along + * with the unconsumed data returned at the end, you'll end up with the exact + * same byte sequence as the original input data. + * + * @param data - The input bytes. + * @param callback - The callback to invoke for each line. + * + * @returns the unconsumed data. Usually this will be empty, or an incomplete + * line at the end (with no EOL). However if the callback returned + * false, all the unused data will be returned. + */ +template <typename LineFn> +mozilla::Span<const char> SplitLines(mozilla::Span<const char> data, + LineFn callback) { + while (!data.IsEmpty()) { + auto eol = std::find(data.cbegin(), data.cend(), '\n'); + if (eol == data.cend()) { + // No LF - we're done. May or may not be some leftover data. + break; + } + + // Consume everything up to and including the LF. + ++eol; + mozilla::Span<const char> line(data.cbegin(), eol); + data = mozilla::Span<const char>(eol, data.cend()); + + if (callback(line) == false) { + break; + } + } + return data; +} + +#endif |