1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
|
diff --git a/include/orcus/xml_structure_tree.hpp b/include/orcus/xml_structure_tree.hpp
index 58cabfd116fa24e35ff27cf8d7512b6e73df33f4..c88808d24bd74c175fa4017328d3e54b4c588c5e 100644
--- a/include/orcus/xml_structure_tree.hpp
+++ b/include/orcus/xml_structure_tree.hpp
@@ -127,6 +127,20 @@ public:
size_t get_xmlns_index(xmlns_id_t ns) const;
std::string get_xmlns_short_name(xmlns_id_t ns) const;
+
+ /**
+ * Get a XPath like ID for the element inside of the XML tree.
+ *
+ */
+ std::string get_path() const;
+
+ /**
+ * Select an element by a path expression. The path expression may be
+ * generated by <code>xml_structure_tree::walker::get_path</code>.
+ *
+ * @param path a simple XPath like expression
+ */
+ element select_by_path(const std::string& path);
};
xml_structure_tree(xmlns_context& xmlns_cxt);
diff --git a/src/liborcus/xml_structure_tree.cpp b/src/liborcus/xml_structure_tree.cpp
index 2778bc05f32841a9441bf471913872e119256895..6622bc57cd2595f12bba80d4bbdb5c24cd6e7bc6 100644
--- a/src/liborcus/xml_structure_tree.cpp
+++ b/src/liborcus/xml_structure_tree.cpp
@@ -12,6 +12,7 @@
#include "orcus/exception.hpp"
#include "orcus/string_pool.hpp"
+#include "string_helper.hpp"
#include <iostream>
#include <sstream>
@@ -275,6 +276,15 @@ struct xml_structure_tree_impl
{
delete mp_root;
}
+
+ std::string get_element_str(const xml_structure_tree::entity_name& name) const
+ {
+ ostringstream ss;
+ if (m_xmlns_cxt.get_index(name.ns) != index_not_found)
+ ss << m_xmlns_cxt.get_short_name(name.ns) << ":";
+ ss << name.name;
+ return ss.str();
+ }
};
struct xml_structure_tree::walker_impl
@@ -423,6 +433,66 @@ string xml_structure_tree::walker::get_xmlns_short_name(xmlns_id_t ns) const
return mp_impl->m_parent_impl.m_xmlns_cxt.get_short_name(ns);
}
+string xml_structure_tree::walker::get_path() const
+{
+ ostringstream ss;
+ for (auto& element : mp_impl->m_scopes)
+ {
+ ss << "/" << mp_impl->m_parent_impl.get_element_str(element.name);
+ }
+
+ return ss.str();
+}
+
+xml_structure_tree::element xml_structure_tree::walker::select_by_path(const std::string& path)
+{
+ pstring p(path);
+ std::vector<pstring> parts = string_helper::split_string(p, '/');
+ if (parts.empty())
+ throw general_error("invalid format for path");
+
+ // string_helper::split_string will create an empty first element due to leading '/'
+ if (parts[0] != "")
+ {
+ throw general_error("invalid format for path");
+ }
+ else
+ {
+ parts.erase(parts.begin());
+ }
+
+ if (parts.empty())
+ throw general_error("invalid format for path");
+
+ element_ref root_ref(mp_impl->mp_root->name, &mp_impl->mp_root->prop);
+ if (pstring(mp_impl->m_parent_impl.get_element_str(root_ref.name)) != parts[0])
+ throw general_error("path does not match any element");
+
+ std::vector<element_ref> scopes;
+ scopes.push_back(root_ref);
+
+ for (size_t i = 1; i < parts.size(); ++i)
+ {
+ const elem_prop& prop = *scopes.back().prop;
+ bool found = false;
+ for (auto& child : prop.child_elements)
+ {
+ if (pstring(mp_impl->m_parent_impl.get_element_str(child.first)) == parts[i])
+ {
+ scopes.emplace_back(child.first, child.second);
+ found = true;
+ break;
+ }
+ }
+ if (!found)
+ throw general_error("path does not match any element");
+ }
+
+ std::swap(mp_impl->m_scopes, scopes);
+ const element_ref& ref = mp_impl->m_scopes.back();
+ return element(ref.name, ref.prop->repeat);
+}
+
xml_structure_tree::xml_structure_tree(xmlns_context& xmlns_cxt) :
mp_impl(new xml_structure_tree_impl(xmlns_cxt)) {}
|