diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-15 05:48:59 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-15 05:48:59 +0000 |
commit | c484829272cd13a738e35412498e12f2c9a194ac (patch) | |
tree | a1f5ec09629ee895bd3963fa8820b45f2f4c574b /include/orcus/stream.hpp | |
parent | Initial commit. (diff) | |
download | liborcus-upstream/0.19.2.tar.xz liborcus-upstream/0.19.2.zip |
Adding upstream version 0.19.2.upstream/0.19.2upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
-rw-r--r-- | include/orcus/stream.hpp | 188 |
1 files changed, 188 insertions, 0 deletions
diff --git a/include/orcus/stream.hpp b/include/orcus/stream.hpp new file mode 100644 index 0000000..dd094bb --- /dev/null +++ b/include/orcus/stream.hpp @@ -0,0 +1,188 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +#ifndef INCLUDED_ORCUS_STREAM_HPP +#define INCLUDED_ORCUS_STREAM_HPP + +#include "env.hpp" + +#include <memory> +#include <string> + +namespace orcus { + +/** + * Represents the content of a file. + * + * The file content is memory-mapped initially, but may later become in-memory + * if the non-utf-8 content gets converted to utf-8. + */ +class ORCUS_PSR_DLLPUBLIC file_content +{ + struct impl; + std::unique_ptr<impl> mp_impl; +public: + file_content(const file_content&) = delete; + file_content& operator= (const file_content&) = delete; + + file_content(); + file_content(file_content&& other); + file_content(std::string_view filepath); + ~file_content(); + + /** + * Obtain the memory address to the first character in the content buffer. + * + * @return pointer to the first character in the buffer. + */ + const char* data() const; + + /** + * Return the size of the content i.e. the number of characters in the + * content buffer. + * + * @return size of the content. + */ + size_t size() const; + + /** + * Query whether or not the content is empty. + * + * @return true if the content is empty, otherwise false. + */ + bool empty() const; + + /** + * Swap content with another instance. + * + * @param other another instance to swap content with. + */ + void swap(file_content& other); + + /** + * Load from a new file. This will invalidate the pointer returned from the + * data() method prior to the call. + * + * @param filepath path of the file to load from. + */ + void load(std::string_view filepath); + + /** + * Convert a non-utf-8 stream to a utf-8 one if the source stream contains + * a byte order mark. If not, it does nothing. When the conversion + * happens, the converted content will be stored in-memory. + */ + void convert_to_utf8(); + + std::string_view str() const; +}; + +/** + * Represents the content of an in-memory buffer. Note that this class will + * NOT own the content of the source buffer but simply will reference it, + * except when the original buffer is a non-utf-8 stream and the caller + * chooses to convert it to utf-8 by calling its convert_to_utf8() method. + */ +class ORCUS_PSR_DLLPUBLIC memory_content +{ + struct impl; + std::unique_ptr<impl> mp_impl; +public: + memory_content(const file_content&) = delete; + memory_content& operator= (const file_content&) = delete; + + memory_content(); + memory_content(std::string_view s); + memory_content(memory_content&& other); + ~memory_content(); + + const char* data() const; + size_t size() const; + bool empty() const; + + void swap(memory_content& other); + + /** + * Convert a non-utf-8 stream to a utf-8 one if the source stream contains + * a byte order mark. If not, it does nothing. When the conversion + * happens, the converted content will be owned by the object. + */ + void convert_to_utf8(); + + std::string_view str() const; +}; + +struct ORCUS_PSR_DLLPUBLIC line_with_offset +{ + /** content of the entire line. */ + std::string line; + /** 0-based line number. */ + std::size_t line_number; + /** 0-based offset within the line. */ + std::size_t offset_on_line; + + line_with_offset(std::string _line, std::size_t _line_number, std::size_t _offset_on_line); + line_with_offset(const line_with_offset& other); + line_with_offset(line_with_offset&& other); + ~line_with_offset(); + + bool operator== (const line_with_offset& other) const; + bool operator!= (const line_with_offset& other) const; +}; + +/** + * Generate a sensible error output for parse error including the line where + * the error occurred and the offset of the error position on that line. + * + * @param strm entire character stream where the error occurred. + * @param offset offset of the error position within the stream. + * + * @return string formatted to be usable as an error message for stdout. + */ +ORCUS_PSR_DLLPUBLIC std::string create_parse_error_output(std::string_view strm, std::ptrdiff_t offset); + +/** + * Given a string consisting of multiple lines i.e. multiple line breaks, + * find the line that contains the specified offset position. + * + * @param strm string stream containing multiple lines to search. + * @param offset offset position. + * + * @return structure containing information about the line containing the + * offset position. + * + * @exception std::invalid_argument if the offset value equals or exceeds the + * length of the string stream being searched. + */ +ORCUS_PSR_DLLPUBLIC line_with_offset locate_line_with_offset(std::string_view strm, std::ptrdiff_t offset); + +/** + * Given two strings, locate the position of the first character that is + * different between the two strings. Note that if one of the strings is + * empty (or both of them are empty), it returns 0. + * + * @param left one of the strings to compare. + * @param right one of the strings to compare. + * + * @return position of the first character that is different between the two + * compared strings. + */ +ORCUS_PSR_DLLPUBLIC size_t locate_first_different_char(std::string_view left, std::string_view right); + +/** + * Calculate the logical length of a UTF-8 encoded string. + * + * @param s string to calculate the logical length of. + * @return logical length of the UTF-8 encoded string. + */ +ORCUS_PSR_DLLPUBLIC std::size_t calc_logical_string_length(std::string_view s); + +} // namespace orcus + +#endif + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ |