summaryrefslogtreecommitdiffstats
path: root/include/orcus/xml_structure_tree.hpp
blob: 423ede43eb81ea5f09366be6ab13bb4c1287a18d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/*
 * This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
 */

#ifndef INCLUDED_ORCUS_XML_STRUCTURE_TREE_HPP
#define INCLUDED_ORCUS_XML_STRUCTURE_TREE_HPP

#include "env.hpp"
#include "types.hpp"

#include <ostream>
#include <memory>
#include <functional>

namespace orcus {

class xmlns_context;

struct ORCUS_DLLPUBLIC xml_table_range_t
{
    std::vector<std::string> paths;
    std::vector<std::string> row_groups;

    xml_table_range_t();
    ~xml_table_range_t();
};

/**
 * Tree representing the structure of elements in XML content.  Recurring
 * elements under the same parent are represented by a single element
 * instance.  This tree only includes elements; no attributes and content
 * nodes appear in this tree.
 */
class ORCUS_DLLPUBLIC xml_structure_tree
{
    struct impl;
    std::unique_ptr<impl> mp_impl;

public:
    xml_structure_tree() = delete;
    xml_structure_tree(const xml_structure_tree&) = delete;
    xml_structure_tree& operator= (const xml_structure_tree&) = delete;

    struct ORCUS_DLLPUBLIC entity_name
    {
        xmlns_id_t ns;
        std::string_view name;

        entity_name();
        entity_name(xmlns_id_t _ns, std::string_view _name);

        bool operator< (const entity_name& r) const;
        bool operator== (const entity_name& r) const;

        struct ORCUS_DLLPUBLIC hash
        {
            size_t operator ()(const entity_name& val) const;
        };
    };

    typedef std::vector<entity_name> entity_names_type;

    struct ORCUS_DLLPUBLIC element
    {
        entity_name name;
        bool repeat;
        bool has_content;

        element();
        element(const entity_name& _name, bool _repeat, bool _has_content);
    };

    struct walker_impl;

    /**
     * This class allows client to traverse the tree.
     */
    class ORCUS_DLLPUBLIC walker
    {
        friend class xml_structure_tree;

        std::unique_ptr<walker_impl> mp_impl;

        walker(const xml_structure_tree::impl& parent_impl);
    public:
        walker() = delete;
        walker(const walker& r);
        ~walker();
        walker& operator= (const walker& r);

        /**
         * Set current position to the root element, and return the root
         * element.
         *
         * @return root element.
         */
        element root();

        /**
         * Descend into a specified child element.
         *
         * @param name name of a child element.
         *
         * @return child element
         *
         * @throw general_error if no child elements exist for the specified
         *        name.
         */
        element descend(const entity_name& name);

        /**
         * Move up to the parent element.
         */
        element ascend();

        /**
         * Move to the element specified by a path expression. The path
         * expression may be generated by
         * <code>xml_structure_tree::walker::get_path</code>.
         *
         * @param path a simple XPath like expression
         *
         * @return element pointed to by the path.
         */
        element move_to(const std::string& path);

        /**
         * Get a list of names of all child elements at the current element
         * position.  The list of names is in order of appearance.
         *
         * @return list of child element names in order of appearance.
         */
        entity_names_type get_children();

        /**
         * Get a list of names of all attributes that belong to current
         * element.  The list of names is in order of appearance.
         *
         * @return list of attribute names in order of appearance.
         */
        entity_names_type get_attributes();

        /**
         * Get a numerical, 0-based index of given XML namespace.
         *
         * @param ns XML namespace ID.
         *
         * @return numeric, 0-based index of XML namespace if found, or
         *         <code>xml_structure_tree::walker::index_not_found</code> if
         *         the namespace is not found in this structure.
         */
        size_t get_xmlns_index(xmlns_id_t ns) const;

        std::string get_xmlns_short_name(xmlns_id_t ns) const;

        /**
         * Convert an entity name to its proper string representation.
         *
         * @param name entity name to convert to string.
         *
         * @return string representation of the entity name, including the
         *         namespace.
         */
        std::string to_string(const entity_name& name) const;

        /**
         * Get a XPath like ID for the element inside of the XML tree.
         *
         */
        std::string get_path() const;
    };

    xml_structure_tree(xmlns_context& xmlns_cxt);
    xml_structure_tree(xml_structure_tree&& other);
    ~xml_structure_tree();

    void parse(std::string_view s);

    void dump_compact(std::ostream& os) const;

    walker get_walker() const;

    using range_handler_type = std::function<void(xml_table_range_t&&)>;

    void process_ranges(range_handler_type rh) const;

    void swap(xml_structure_tree& other);
};

}



#endif
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */