/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ /* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ #ifndef LineReader_h__ #define LineReader_h__ #include #include "mozilla/Span.h" #include "mozilla/Vector.h" /** * FirstLine() returns the first line of a span. * The EOL sequence (CRLF or LF) is included in the returned line. * If no lines are found an empty span is returned. */ inline mozilla::Span FirstLine( mozilla::Span const& data) { auto eol = std::find(data.cbegin(), data.cend(), '\n'); if (eol == data.cend()) { // no line ending found - return empty span. return data.First(0); } ++eol; return mozilla::Span(data.cbegin(), eol); } /** * LineReader breaks up continuous character streams into lines. * Data is fed in by calling Feed() as often as required, and a * callback function is invoked to handle each resulting line. * * The resulting lines include the end-of-line char(s), except for any * non-terminated final line. * LF ('\n') is used as the line terminator. CRLF-terminated lines will * be handled correctly - the resultant lines will include the line * terminators exactly as they appear in the input data. * * Goals for LineReader: * - Byte exact. The bytes fed in will appear _exactly_ in the callback fn. * - Callback can be inlined (due to templating). * - Avoid copying data if possible. The internal buffer is only used when * lines are split across incoming chunks of data. * - Tries to avoid heap allocation. If the internal buffer is used, it'll * only allocate memory for long lines (>80 chars). * * Example usage: * * auto callback = [](mozilla::Span line) { * printf("%s\n", nsCString(line).get()); * return true; * }; * * LineReader c; * c.Feed("Line 1\r\nLine 2\r\nLine 3", callback); * // -> "Line 1\r\n" * // -> "Line 2\r\n" * c.Feed("\r\nLeftovers.", callback); * // -> "Line 3\r\n" * c.Flush(callback); * // -> "Leftovers." * * See TestLineReader.cpp for more examples. */ class LineReader { public: /* * Feed() takes in a chunk of data to be split up into lines. You can call * this as often as required to feed in all your data. Don't forget to call * Flush() after the last Feed(), in case the last line has no line endings! * * The callback will be invoked once for each full line extracted. * It should have the form: * The callback is of the form: * bool callback(mozilla::Span line); * * The data in `line` should be considered valid only until the callback * returns. So if the callback wants to retain data it needs to copy it. * `line` will include any EOL character(s). * The callback should return true to continue processing. * If the callback returns false, processing will stop, even if there is * more data available. */ template void Feed(mozilla::Span data, LineFn callback) { bool keepGoing = true; while (!data.IsEmpty() && keepGoing) { auto eol = std::find(data.cbegin(), data.cend(), '\n'); if (eol == data.cend()) { // No LF. Just collect and wait for more. // TODO: limit maximum mBuf size, to stop maliciously-crafted input // OOMing us? if (!mBuf.append(data.data(), data.size())) { NS_ERROR("OOM!"); } return; } // Consume everything up to and including the LF. ++eol; mozilla::Span line(data.cbegin(), eol); data = mozilla::Span(eol, data.cend()); if (mBuf.empty()) { // Pass the data through directly, no copying. keepGoing = callback(line); } else { // Complete the line we previously started. if (!mBuf.append(line.data(), line.size())) { NS_ERROR("OOM!"); } keepGoing = callback(mBuf); mBuf.clear(); } } } /* * Flush() will invoke the callback with any leftover data, after the last * Feed() call has completed. * The line passed to the callback will be a partial line, without a final * LF. If the input data has a final LF, there will be nothing to flush, * and the callback will not be invoked. */ template void Flush(LineFn callback) { if (!mBuf.empty()) { callback(mBuf); mBuf.clear(); } } private: // Growable buffer, to collect lines which come in as multiple parts. // Can handle lines up to 80 chars before needing to reallocate. mozilla::Vector mBuf; }; /** * SplitLines() invokes a callback for every complete line it finds in the * input data. * * The callback is of the form: * bool callback(mozilla::Span line); * where line is a span pointing to the range of bytes in the input data * which comprises the line. * * If the callback returns false, processing is halted. * * The lines passed to the callback include end-of-line (EOL) character(s). * * Lines are considered terminated by '\n' (LF) but this means CRLF-delimited * data is also handled correctly. * * This function is byte-exact: if you concatenate all the line spans, along * with the unconsumed data returned at the end, you'll end up with the exact * same byte sequence as the original input data. * * @param data - The input bytes. * @param callback - The callback to invoke for each line. * * @returns the unconsumed data. Usually this will be empty, or an incomplete * line at the end (with no EOL). However if the callback returned * false, all the unused data will be returned. */ template mozilla::Span SplitLines(mozilla::Span data, LineFn callback) { while (!data.IsEmpty()) { auto eol = std::find(data.cbegin(), data.cend(), '\n'); if (eol == data.cend()) { // No LF - we're done. May or may not be some leftover data. break; } // Consume everything up to and including the LF. ++eol; mozilla::Span line(data.cbegin(), eol); data = mozilla::Span(eol, data.cend()); if (callback(line) == false) { break; } } return data; } #endif