summaryrefslogtreecommitdiffstats
path: root/src/parser/sax_token_parser_thread.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/parser/sax_token_parser_thread.cpp')
-rw-r--r--src/parser/sax_token_parser_thread.cpp204
1 files changed, 204 insertions, 0 deletions
diff --git a/src/parser/sax_token_parser_thread.cpp b/src/parser/sax_token_parser_thread.cpp
new file mode 100644
index 0000000..3d7b16b
--- /dev/null
+++ b/src/parser/sax_token_parser_thread.cpp
@@ -0,0 +1,204 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#include "orcus/sax_token_parser_thread.hpp"
+#include "orcus/sax_token_parser.hpp"
+#include "orcus/string_pool.hpp"
+#include "orcus/detail/parser_token_buffer.hpp"
+#include "orcus/tokens.hpp"
+#include "orcus/xml_namespace.hpp"
+
+#include <limits>
+#include <iostream>
+
+namespace orcus { namespace sax {
+
+parse_token::parse_token() : type(parse_token_t::unknown) {}
+
+parse_token::parse_token(std::string_view _characters) :
+ type(parse_token_t::characters),
+ value(_characters)
+{
+}
+
+parse_token::parse_token(parse_token_t _type, const xml_token_element_t* _element) :
+ type(_type), value(_element)
+{
+}
+
+parse_token::parse_token(std::string_view msg, std::ptrdiff_t offset) :
+ type(parse_token_t::parse_error),
+ value(parse_error_value_t{msg, offset})
+{
+}
+
+parse_token::parse_token(const parse_token& other) :
+ type(other.type), value(other.value)
+{
+}
+
+bool parse_token::operator== (const parse_token& other) const
+{
+ return type == other.type && value == other.value;
+}
+
+bool parse_token::operator!= (const parse_token& other) const
+{
+ return !operator==(other);
+}
+
+struct parser_thread::impl
+{
+ orcus::detail::thread::parser_token_buffer<parse_tokens_t> m_token_buffer;
+ string_pool m_pool;
+ std::vector<std::unique_ptr<xml_token_element_t>> m_element_store;
+
+ parse_tokens_t m_parser_tokens; // token buffer for the parser thread.
+
+ const char* mp_char;
+ size_t m_size;
+ const tokens& m_tokens;
+ xmlns_context& m_ns_cxt;
+
+ impl(const char* p, size_t n, const tokens& tks, xmlns_context& ns_cxt, size_t min_token_size, size_t max_token_size) :
+ m_token_buffer(min_token_size, max_token_size),
+ mp_char(p), m_size(n), m_tokens(tks), m_ns_cxt(ns_cxt)
+ {
+ }
+
+ void check_and_notify()
+ {
+ m_token_buffer.check_and_notify(m_parser_tokens);
+ }
+
+ void notify_and_finish()
+ {
+ m_token_buffer.notify_and_finish(m_parser_tokens);
+ }
+
+ void abort()
+ {
+ m_token_buffer.abort();
+ }
+
+ void declaration(const orcus::xml_declaration_t& /*decl*/)
+ {
+ }
+
+ void start_element(const orcus::xml_token_element_t& elem)
+ {
+ m_element_store.emplace_back(std::make_unique<orcus::xml_token_element_t>(elem));
+ orcus::xml_token_element_t& this_elem = *m_element_store.back();
+
+ // Go through all attributes and intern transient strings.
+ std::for_each(this_elem.attrs.begin(), this_elem.attrs.end(),
+ [&](xml_token_attr_t& attr)
+ {
+ if (attr.transient)
+ {
+ attr.value = m_pool.intern(attr.value).first;
+ attr.transient = false;
+ }
+ }
+ );
+
+ m_parser_tokens.emplace_back(parse_token_t::start_element, m_element_store.back().get());
+ check_and_notify();
+ }
+
+ void end_element(const orcus::xml_token_element_t& elem)
+ {
+ assert(elem.attrs.empty());
+
+ m_element_store.emplace_back(std::make_unique<orcus::xml_token_element_t>(elem));
+ m_parser_tokens.emplace_back(parse_token_t::end_element, m_element_store.back().get());
+ check_and_notify();
+ }
+
+ void characters(std::string_view val, bool transient)
+ {
+ if (transient)
+ m_parser_tokens.emplace_back(m_pool.intern(val).first);
+ else
+ m_parser_tokens.emplace_back(val);
+
+ check_and_notify();
+ }
+
+ void start()
+ {
+ try
+ {
+ try
+ {
+ orcus::sax_token_parser<impl> parser({mp_char, m_size}, m_tokens, m_ns_cxt, *this);
+ parser.parse();
+ }
+ catch (const malformed_xml_error& e)
+ {
+ std::string_view s = m_pool.intern(e.what()).first;
+ m_parser_tokens.emplace_back(s, e.offset());
+ }
+
+ // TODO : add more exceptions that need to be tokenized and processed by the client thread.
+
+ notify_and_finish();
+ }
+ catch (const orcus::detail::parsing_aborted_error&)
+ {
+ // This is used only to abort the parsing thread prematurely.
+ }
+ }
+
+ bool next_tokens(parse_tokens_t& tokens)
+ {
+ return m_token_buffer.next_tokens(tokens);
+ }
+
+ void swap_string_pool(string_pool& pool)
+ {
+ m_pool.swap(pool);
+ }
+};
+
+parser_thread::parser_thread(
+ const char* p, size_t n, const orcus::tokens& tks, xmlns_context& ns_cxt, size_t min_token_size) :
+ mp_impl(std::make_unique<parser_thread::impl>(
+ p, n, tks, ns_cxt, min_token_size, std::numeric_limits<size_t>::max()/2)) {}
+
+parser_thread::parser_thread(
+ const char* p, size_t n, const orcus::tokens& tks, xmlns_context& ns_cxt, size_t min_token_size, size_t max_token_size) :
+ mp_impl(std::make_unique<parser_thread::impl>(
+ p, n, tks, ns_cxt, min_token_size, max_token_size)) {}
+
+parser_thread::~parser_thread()
+{
+}
+
+void parser_thread::start()
+{
+ mp_impl->start();
+}
+
+bool parser_thread::next_tokens(parse_tokens_t& tokens)
+{
+ return mp_impl->next_tokens(tokens);
+}
+
+void parser_thread::swap_string_pool(string_pool& pool)
+{
+ mp_impl->swap_string_pool(pool);
+}
+
+void parser_thread::abort()
+{
+ mp_impl->abort();
+}
+
+}}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */