summaryrefslogtreecommitdiffstats
path: root/include/orcus/parser_global.hpp
diff options
context:
space:
mode:
Diffstat (limited to 'include/orcus/parser_global.hpp')
-rw-r--r--include/orcus/parser_global.hpp153
1 files changed, 153 insertions, 0 deletions
diff --git a/include/orcus/parser_global.hpp b/include/orcus/parser_global.hpp
new file mode 100644
index 0000000..bf5971b
--- /dev/null
+++ b/include/orcus/parser_global.hpp
@@ -0,0 +1,153 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#ifndef ORCUS_PARSER_GLOBAL_HPP
+#define ORCUS_PARSER_GLOBAL_HPP
+
+#include "env.hpp"
+
+#include <sstream>
+
+namespace orcus {
+
+class cell_buffer;
+
+enum class string_escape_char_t
+{
+ invalid,
+ valid,
+ control_char
+};
+
+/**
+ * Stores state of string parsing. Upon successful parsing the str points
+ * to the first character of the string and the length stores the size of
+ * the string. When the parsing fails, the str value becomes nullptr and
+ * the length stores the error code.
+ */
+struct parse_quoted_string_state
+{
+ ORCUS_PSR_DLLPUBLIC static const size_t error_no_closing_quote;
+ ORCUS_PSR_DLLPUBLIC static const size_t error_illegal_escape_char;
+
+ const char* str;
+ size_t length;
+
+ /**
+ * When true, the str pointer points to the temporary buffer storage
+ * provided by the caller instead of the original character stream. The
+ * caller must allocate memory and copy the value to it before the buffer
+ * content changes if the parsed string value needs to be stored.
+ *
+ * When false, str points to a position in the original stream, and the
+ * caller doens't need to allocate memory to store the string value as
+ * long as the original character stream is alive.
+ */
+ bool transient;
+
+ /**
+ * When true, the string contains at least one control character - a
+ * character whose value ranges between 0x00 and 0x1F.
+ */
+ bool has_control_character;
+};
+
+ORCUS_PSR_DLLPUBLIC bool is_blank(char c);
+ORCUS_PSR_DLLPUBLIC bool is_alpha(char c);
+ORCUS_PSR_DLLPUBLIC bool is_numeric(char c);
+
+/**
+ * Check if the characater is one of allowed characters. Note that you can
+ * only specify up to 16 allowed characters.
+ *
+ * @param c character to check.
+ * @param allowed string containing all allowed characters.
+ *
+ * @return true if the character is one of the allowed characters, false
+ * otherwise.
+ */
+ORCUS_PSR_DLLPUBLIC bool is_in(char c, std::string_view allowed);
+
+/**
+ * Parse a sequence of characters into a double-precision numeric value.
+ *
+ * @param p pointer to the first character to start parsing from.
+ * @param p_end pointer to the first character not allowed to parse.
+ * @param value output parameter to assign the matched value to.
+ *
+ * @return pointer to the first non-matching character.
+ */
+ORCUS_PSR_DLLPUBLIC const char* parse_numeric(const char* p, const char* p_end, double& value);
+
+/**
+ * Parse a sequence of characters into an integer value.
+ *
+ * @param p pointer to the first character to start parsing from.
+ * @param p_end pointer to the first character not allowed to parse.
+ * @param value output parameter to assign the matched value to.
+ *
+ * @return pointer to the first non-matching character.
+ *
+ * @note Use of this function should be eventually replaced with
+ * std::from_chars() once it becomes available.
+ */
+ORCUS_PSR_DLLPUBLIC const char* parse_integer(const char* p, const char* p_end, long& value);
+
+/**
+ * Two single-quote characters ('') represent one single-quote character.
+ */
+ORCUS_PSR_DLLPUBLIC parse_quoted_string_state parse_single_quoted_string(
+ const char*& p, size_t max_length, cell_buffer& buffer);
+
+/**
+ * Starting from the opening single quote position, parse string all the way
+ * to the closing quote. Two single-quote characters ('') will be
+ * interpreted as encoded one single-quote character.
+ *
+ * @param p it should point to the opening single quote character.
+ * @param max_length maximum length to parse.
+ *
+ * @return address of the character immediately after the closing quote, or
+ * nullptr in case no closing quote is found.
+ */
+ORCUS_PSR_DLLPUBLIC const char* parse_to_closing_single_quote(
+ const char* p, size_t max_length);
+
+ORCUS_PSR_DLLPUBLIC parse_quoted_string_state parse_double_quoted_string(
+ const char*& p, size_t max_length, cell_buffer& buffer);
+
+/**
+ * Starting from the opening double quote position, parse string all the way
+ * to the closing quote. Two single-quote characters ('') will be
+ * interpreted as encoded one single-quote character.
+ *
+ * @param p it should point to the opening single quote character.
+ * @param max_length maximum length to parse.
+ *
+ * @return address of the character immediately after the closing quote, or
+ * nullptr in case no closing quote is found.
+ */
+ORCUS_PSR_DLLPUBLIC const char* parse_to_closing_double_quote(
+ const char* p, size_t max_length);
+
+/**
+ * Given a character that occurs immediately after the escape character '\',
+ * return what type this character is.
+ *
+ * @param c character that occurs immediately after the escape character
+ * '\'.
+ *
+ * @return enum value representing the type of escape character.
+ */
+ORCUS_PSR_DLLPUBLIC string_escape_char_t get_string_escape_char_type(char c);
+
+ORCUS_PSR_DLLPUBLIC std::string_view trim(std::string_view str);
+
+}
+
+#endif
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */