diff options
Diffstat (limited to 'include/orcus/parser_global.hpp')
-rw-r--r-- | include/orcus/parser_global.hpp | 153 |
1 files changed, 153 insertions, 0 deletions
diff --git a/include/orcus/parser_global.hpp b/include/orcus/parser_global.hpp new file mode 100644 index 0000000..bf5971b --- /dev/null +++ b/include/orcus/parser_global.hpp @@ -0,0 +1,153 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +#ifndef ORCUS_PARSER_GLOBAL_HPP +#define ORCUS_PARSER_GLOBAL_HPP + +#include "env.hpp" + +#include <sstream> + +namespace orcus { + +class cell_buffer; + +enum class string_escape_char_t +{ + invalid, + valid, + control_char +}; + +/** + * Stores state of string parsing. Upon successful parsing the str points + * to the first character of the string and the length stores the size of + * the string. When the parsing fails, the str value becomes nullptr and + * the length stores the error code. + */ +struct parse_quoted_string_state +{ + ORCUS_PSR_DLLPUBLIC static const size_t error_no_closing_quote; + ORCUS_PSR_DLLPUBLIC static const size_t error_illegal_escape_char; + + const char* str; + size_t length; + + /** + * When true, the str pointer points to the temporary buffer storage + * provided by the caller instead of the original character stream. The + * caller must allocate memory and copy the value to it before the buffer + * content changes if the parsed string value needs to be stored. + * + * When false, str points to a position in the original stream, and the + * caller doens't need to allocate memory to store the string value as + * long as the original character stream is alive. + */ + bool transient; + + /** + * When true, the string contains at least one control character - a + * character whose value ranges between 0x00 and 0x1F. + */ + bool has_control_character; +}; + +ORCUS_PSR_DLLPUBLIC bool is_blank(char c); +ORCUS_PSR_DLLPUBLIC bool is_alpha(char c); +ORCUS_PSR_DLLPUBLIC bool is_numeric(char c); + +/** + * Check if the characater is one of allowed characters. Note that you can + * only specify up to 16 allowed characters. + * + * @param c character to check. + * @param allowed string containing all allowed characters. + * + * @return true if the character is one of the allowed characters, false + * otherwise. + */ +ORCUS_PSR_DLLPUBLIC bool is_in(char c, std::string_view allowed); + +/** + * Parse a sequence of characters into a double-precision numeric value. + * + * @param p pointer to the first character to start parsing from. + * @param p_end pointer to the first character not allowed to parse. + * @param value output parameter to assign the matched value to. + * + * @return pointer to the first non-matching character. + */ +ORCUS_PSR_DLLPUBLIC const char* parse_numeric(const char* p, const char* p_end, double& value); + +/** + * Parse a sequence of characters into an integer value. + * + * @param p pointer to the first character to start parsing from. + * @param p_end pointer to the first character not allowed to parse. + * @param value output parameter to assign the matched value to. + * + * @return pointer to the first non-matching character. + * + * @note Use of this function should be eventually replaced with + * std::from_chars() once it becomes available. + */ +ORCUS_PSR_DLLPUBLIC const char* parse_integer(const char* p, const char* p_end, long& value); + +/** + * Two single-quote characters ('') represent one single-quote character. + */ +ORCUS_PSR_DLLPUBLIC parse_quoted_string_state parse_single_quoted_string( + const char*& p, size_t max_length, cell_buffer& buffer); + +/** + * Starting from the opening single quote position, parse string all the way + * to the closing quote. Two single-quote characters ('') will be + * interpreted as encoded one single-quote character. + * + * @param p it should point to the opening single quote character. + * @param max_length maximum length to parse. + * + * @return address of the character immediately after the closing quote, or + * nullptr in case no closing quote is found. + */ +ORCUS_PSR_DLLPUBLIC const char* parse_to_closing_single_quote( + const char* p, size_t max_length); + +ORCUS_PSR_DLLPUBLIC parse_quoted_string_state parse_double_quoted_string( + const char*& p, size_t max_length, cell_buffer& buffer); + +/** + * Starting from the opening double quote position, parse string all the way + * to the closing quote. Two single-quote characters ('') will be + * interpreted as encoded one single-quote character. + * + * @param p it should point to the opening single quote character. + * @param max_length maximum length to parse. + * + * @return address of the character immediately after the closing quote, or + * nullptr in case no closing quote is found. + */ +ORCUS_PSR_DLLPUBLIC const char* parse_to_closing_double_quote( + const char* p, size_t max_length); + +/** + * Given a character that occurs immediately after the escape character '\', + * return what type this character is. + * + * @param c character that occurs immediately after the escape character + * '\'. + * + * @return enum value representing the type of escape character. + */ +ORCUS_PSR_DLLPUBLIC string_escape_char_t get_string_escape_char_type(char c); + +ORCUS_PSR_DLLPUBLIC std::string_view trim(std::string_view str); + +} + +#endif +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ |