summaryrefslogtreecommitdiffstats
path: root/src/liborcus/opc_context.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/liborcus/opc_context.cpp')
-rw-r--r--src/liborcus/opc_context.cpp310
1 files changed, 310 insertions, 0 deletions
diff --git a/src/liborcus/opc_context.cpp b/src/liborcus/opc_context.cpp
new file mode 100644
index 0000000..e3b1bb5
--- /dev/null
+++ b/src/liborcus/opc_context.cpp
@@ -0,0 +1,310 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#include "opc_context.hpp"
+#include "opc_token_constants.hpp"
+#include "ooxml_content_types.hpp"
+#include "ooxml_namespace_types.hpp"
+#include "ooxml_schemas.hpp"
+#include "session_context.hpp"
+
+#include "orcus/exception.hpp"
+
+#include <cassert>
+#include <iostream>
+#include <algorithm>
+
+namespace orcus {
+
+namespace {
+
+class part_ext_attr_parser
+{
+public:
+ part_ext_attr_parser(
+ opc_content_types_context::ct_cache_type* p_ct_cache, xml_token_t attr_name, const config* conf) :
+ mp_ct_cache(p_ct_cache),
+ m_attr_name(attr_name),
+ m_config(conf),
+ m_content_type(nullptr) {}
+
+ part_ext_attr_parser(const part_ext_attr_parser& r) :
+ mp_ct_cache(r.mp_ct_cache),
+ m_attr_name(r.m_attr_name),
+ m_config(r.m_config),
+ m_name(r.m_name),
+ m_content_type(r.m_content_type) {}
+
+ void operator() (const xml_token_attr_t& attr)
+ {
+ if (attr.name == m_attr_name)
+ m_name = attr.value;
+ else if (attr.name == XML_ContentType)
+ m_content_type = to_content_type(attr.value);
+ }
+
+ const std::string_view& get_name() const { return m_name; }
+ content_type_t get_content_type() const { return m_content_type; }
+
+private:
+ content_type_t to_content_type(const std::string_view& p) const
+ {
+ opc_content_types_context::ct_cache_type::const_iterator itr =
+ mp_ct_cache->find(p);
+ if (itr == mp_ct_cache->end())
+ {
+ if (m_config->debug)
+ std::cout << "unknown content type: " << p << std::endl;
+ return nullptr;
+ }
+ std::string_view val = *itr;
+ return val.data();
+ }
+
+private:
+ const opc_content_types_context::ct_cache_type* mp_ct_cache;
+ xml_token_t m_attr_name;
+ const config* m_config;
+ std::string_view m_name;
+ content_type_t m_content_type;
+};
+
+}
+
+opc_content_types_context::opc_content_types_context(session_context& session_cxt, const tokens& _tokens) :
+ xml_context_base(session_cxt, _tokens)
+{
+ // build content type cache.
+ for (const content_type_t* p = CT_all; *p; ++p)
+ m_ct_cache.insert(std::string_view(*p));
+}
+
+opc_content_types_context::~opc_content_types_context()
+{
+}
+
+xml_context_base* opc_content_types_context::create_child_context(xmlns_id_t /*ns*/, xml_token_t /*name*/)
+{
+ return nullptr;
+}
+
+void opc_content_types_context::end_child_context(xmlns_id_t /*ns*/, xml_token_t /*name*/, xml_context_base* /*child*/)
+{
+}
+
+void opc_content_types_context::start_element(xmlns_id_t ns, xml_token_t name, const::std::vector<xml_token_attr_t> &attrs)
+{
+ xml_token_pair_t parent = push_stack(ns, name);
+ switch (name)
+ {
+ case XML_Types:
+ {
+ xml_element_expected(parent, XMLNS_UNKNOWN_ID, XML_UNKNOWN_TOKEN);
+ if (get_config().debug)
+ print_attrs(get_tokens(), attrs);
+ }
+ break;
+ case XML_Override:
+ {
+ xml_element_expected(parent, NS_opc_ct, XML_Types);
+ part_ext_attr_parser func(&m_ct_cache, XML_PartName, &get_config());
+ func = for_each(attrs.begin(), attrs.end(), func);
+
+ // We need to use allocated strings for part names here because
+ // the part names need to survive after the [Content_Types].xml
+ // stream is destroyed.
+ std::string_view part_name = get_session_context().spool.intern(func.get_name()).first;
+ m_parts.push_back(
+ xml_part_t(part_name, func.get_content_type()));
+ }
+ break;
+ case XML_Default:
+ {
+ xml_element_expected(parent, NS_opc_ct, XML_Types);
+ part_ext_attr_parser func(&m_ct_cache, XML_Extension, &get_config());
+ func = for_each(attrs.begin(), attrs.end(), func);
+
+ // Like the part names, we need to use allocated strings for
+ // extension names.
+ std::string_view ext_name = get_session_context().spool.intern(func.get_name()).first;
+ m_ext_defaults.push_back(
+ xml_part_t(ext_name, func.get_content_type()));
+ }
+ break;
+ default:
+ warn_unhandled();
+ }
+}
+
+bool opc_content_types_context::end_element(xmlns_id_t ns, xml_token_t name)
+{
+ return pop_stack(ns, name);
+}
+
+void opc_content_types_context::characters(std::string_view /*str*/, bool /*transient*/)
+{
+}
+
+void opc_content_types_context::pop_parts(std::vector<xml_part_t>& parts)
+{
+ m_parts.swap(parts);
+}
+
+void opc_content_types_context::pop_ext_defaults(std::vector<xml_part_t>& ext_defaults)
+{
+ m_ext_defaults.swap(ext_defaults);
+}
+
+// ============================================================================
+
+namespace {
+
+class rel_attr_parser
+{
+public:
+ rel_attr_parser(session_context* cxt, const opc_relations_context::schema_cache_type* cache, const config* conf) :
+ m_cxt(cxt), mp_schema_cache(cache), mp_config(conf) {}
+
+ void operator() (const xml_token_attr_t& attr)
+ {
+ // Target and rId strings must be interned as they must survive after
+ // the rels part gets destroyed.
+
+ switch (attr.name)
+ {
+ case XML_Target:
+ m_rel.target = m_cxt->spool.intern(attr.value).first;
+ break;
+ case XML_Type:
+ m_rel.type = to_schema(attr.value);
+ break;
+ case XML_Id:
+ m_rel.rid = m_cxt->spool.intern(attr.value).first;
+ break;
+ }
+ }
+
+ const opc_rel_t& get_rel() const { return m_rel; }
+
+private:
+ schema_t to_schema(const std::string_view& p) const
+ {
+ opc_relations_context::schema_cache_type::const_iterator itr =
+ mp_schema_cache->find(p);
+ if (itr == mp_schema_cache->end())
+ {
+ if (mp_config->debug)
+ std::cout << "unknown schema: " << p << std::endl;
+ return nullptr;
+ }
+ std::string_view val = *itr;
+ return val.data();
+ }
+
+private:
+ session_context* m_cxt;
+ const opc_relations_context::schema_cache_type* mp_schema_cache;
+ const config* mp_config;
+ opc_rel_t m_rel;
+};
+
+/**
+ * Compare relations by the rId.
+ */
+struct compare_rels
+{
+ bool operator() (const opc_rel_t& r1, const opc_rel_t& r2) const
+ {
+ std::size_t n1 = r1.rid.size(), n2 = r2.rid.size();
+ std::size_t n = std::min(n1, n2);
+ const char *p1 = r1.rid.data(), *p2 = r2.rid.data();
+ for (std::size_t i = 0; i < n; ++i, ++p1, ++p2)
+ {
+ if (*p1 < *p2)
+ return true;
+ if (*p1 > *p2)
+ return false;
+ assert(*p1 == *p2);
+ }
+ return n1 < n2;
+ }
+};
+
+}
+
+opc_relations_context::opc_relations_context(session_context& session_cxt, const tokens &_tokens) :
+ xml_context_base(session_cxt, _tokens)
+{
+ // build content type cache.
+ for (schema_t* p = SCH_all; *p; ++p)
+ m_schema_cache.insert(std::string_view(*p));
+}
+
+opc_relations_context::~opc_relations_context()
+{
+}
+
+xml_context_base* opc_relations_context::create_child_context(xmlns_id_t /*ns*/, xml_token_t /*name*/)
+{
+ return nullptr;
+}
+
+void opc_relations_context::end_child_context(xmlns_id_t /*ns*/, xml_token_t /*name*/, xml_context_base* /*child*/)
+{
+}
+
+void opc_relations_context::start_element(
+ xmlns_id_t ns, xml_token_t name, const std::vector<xml_token_attr_t> &attrs)
+{
+ xml_token_pair_t parent = push_stack(ns, name);
+ switch (name)
+ {
+ case XML_Relationships:
+ {
+ xml_element_expected(parent, XMLNS_UNKNOWN_ID, XML_UNKNOWN_TOKEN);
+ if (get_config().debug)
+ print_attrs(get_tokens(), attrs);
+ }
+ break;
+ case XML_Relationship:
+ {
+ rel_attr_parser func(&get_session_context(), &m_schema_cache, &get_config());
+ xml_element_expected(parent, NS_opc_rel, XML_Relationships);
+ func = for_each(attrs.begin(), attrs.end(), func);
+ const opc_rel_t& rel = func.get_rel();
+ if (rel.type)
+ m_rels.push_back(rel);
+ }
+ break;
+ default:
+ warn_unhandled();
+ }
+}
+
+bool opc_relations_context::end_element(xmlns_id_t ns, xml_token_t name)
+{
+ return pop_stack(ns, name);
+}
+
+void opc_relations_context::characters(std::string_view /*str*/, bool /*transient*/)
+{
+}
+
+void opc_relations_context::init()
+{
+ m_rels.clear();
+}
+
+void opc_relations_context::pop_rels(std::vector<opc_rel_t>& rels)
+{
+ // Sort by the rId.
+ sort(m_rels.begin(), m_rels.end(), compare_rels());
+ m_rels.swap(rels);
+}
+
+}
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */