summaryrefslogtreecommitdiffstats
path: root/include/orcus/sax_token_parser.hpp
diff options
context:
space:
mode:
Diffstat (limited to 'include/orcus/sax_token_parser.hpp')
-rw-r--r--include/orcus/sax_token_parser.hpp186
1 files changed, 186 insertions, 0 deletions
diff --git a/include/orcus/sax_token_parser.hpp b/include/orcus/sax_token_parser.hpp
new file mode 100644
index 0000000..867c8b5
--- /dev/null
+++ b/include/orcus/sax_token_parser.hpp
@@ -0,0 +1,186 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#ifndef INCLUDED_ORCUS_SAX_TOKEN_PARSER_HPP
+#define INCLUDED_ORCUS_SAX_TOKEN_PARSER_HPP
+
+#include "sax_ns_parser.hpp"
+#include "types.hpp"
+
+#include <vector>
+#include <algorithm>
+#include <functional>
+
+namespace orcus {
+
+class tokens;
+
+class ORCUS_PSR_DLLPUBLIC sax_token_handler_wrapper_base
+{
+protected:
+ xml_declaration_t m_declaration;
+ xml_token_element_t m_elem;
+ const tokens& m_tokens;
+
+ xml_token_t tokenize(std::string_view name) const;
+ void set_element(const sax_ns_parser_element& elem);
+
+public:
+ sax_token_handler_wrapper_base(const tokens& _tokens);
+
+ void attribute(std::string_view name, std::string_view val);
+ void attribute(const sax_ns_parser_attribute& attr);
+};
+
+class sax_token_handler
+{
+public:
+
+ /**
+ * Called immediately after the entire XML declaration has been parsed.
+ *
+ * @param decl struct containing the attributes of the XML declaration.
+ */
+ void declaration(const orcus::xml_declaration_t& decl)
+ {
+ (void)decl;
+ }
+
+ /**
+ * Called at the start of each element.
+ *
+ * @param elem struct containing the element's information as well as all
+ * the attributes that belong to the element.
+ */
+ void start_element(const orcus::xml_token_element_t& elem)
+ {
+ (void)elem;
+ }
+
+ /**
+ * Called at the end of each element.
+ *
+ * @param elem struct containing the element's information as well as all
+ * the attributes that belong to the element.
+ */
+ void end_element(const orcus::xml_token_element_t& elem)
+ {
+ (void)elem;
+ }
+
+ /**
+ * Called when a segment of a text content is parsed. Each text content
+ * is a direct child of an element, which may have multiple child contents
+ * when the element also has a child element that are direct sibling to
+ * the text contents or the text contents are splitted by a comment.
+ *
+ * @param val value of the text content.
+ * @param transient when true, the text content has been converted and is
+ * stored in a temporary buffer due to presence of one or
+ * more encoded characters, in which case <em>the passed
+ * text value needs to be either immediately converted to
+ * a non-text value or be interned within the scope of
+ * the callback</em>.
+ */
+ void characters(std::string_view val, bool transient)
+ {
+ (void)val; (void)transient;
+ }
+};
+
+/**
+ * SAX parser that tokenizes element and attribute names while parsing. All
+ * pre-defined elements and attribute names are translated into integral
+ * identifiers via use of @ref tokens. The user of this class needs to
+ * provide a pre-defined set of element and attribute names at construction
+ * time.
+ *
+ * This parser internally uses @ref sax_ns_parser.
+ *
+ * @tparam HandlerT Handler type with member functions for event callbacks.
+ * Refer to @ref sax_token_handler.
+ */
+template<typename HandlerT>
+class sax_token_parser
+{
+public:
+ typedef HandlerT handler_type;
+
+ sax_token_parser(
+ std::string_view content, const tokens& _tokens,
+ xmlns_context& ns_cxt, handler_type& handler);
+
+ ~sax_token_parser() = default;
+
+ void parse();
+
+private:
+
+ /**
+ * Re-route callbacks from the internal sax_ns_parser into the
+ * sax_token_parser callbacks.
+ */
+ class handler_wrapper : public sax_token_handler_wrapper_base
+ {
+ handler_type& m_handler;
+
+ public:
+ handler_wrapper(const tokens& _tokens, handler_type& handler) :
+ sax_token_handler_wrapper_base(_tokens), m_handler(handler) {}
+
+ void doctype(const sax::doctype_declaration&) {}
+
+ void start_declaration(std::string_view) {}
+
+ void end_declaration(std::string_view)
+ {
+ m_handler.declaration(m_declaration);
+ m_elem.attrs.clear();
+ }
+
+ void start_element(const sax_ns_parser_element& elem)
+ {
+ set_element(elem);
+ m_handler.start_element(m_elem);
+ m_elem.attrs.clear();
+ }
+
+ void end_element(const sax_ns_parser_element& elem)
+ {
+ set_element(elem);
+ m_handler.end_element(m_elem);
+ }
+
+ void characters(std::string_view val, bool transient)
+ {
+ m_handler.characters(val, transient);
+ }
+ };
+
+private:
+ handler_wrapper m_wrapper;
+ sax_ns_parser<handler_wrapper> m_parser;
+};
+
+template<typename HandlerT>
+sax_token_parser<HandlerT>::sax_token_parser(
+ std::string_view content, const tokens& _tokens, xmlns_context& ns_cxt, handler_type& handler) :
+ m_wrapper(_tokens, handler),
+ m_parser(content, ns_cxt, m_wrapper)
+{
+}
+
+template<typename HandlerT>
+void sax_token_parser<HandlerT>::parse()
+{
+ m_parser.parse();
+}
+
+} // namespace orcus
+
+#endif
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */