summaryrefslogtreecommitdiffstats
path: root/src/xpath_vtab.cc
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/xpath_vtab.cc393
1 files changed, 393 insertions, 0 deletions
diff --git a/src/xpath_vtab.cc b/src/xpath_vtab.cc
new file mode 100644
index 0000000..f44bde4
--- /dev/null
+++ b/src/xpath_vtab.cc
@@ -0,0 +1,393 @@
+/**
+ * Copyright (c) 2020, Timothy Stack
+ *
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ * * Neither the name of Timothy Stack nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ''AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sstream>
+#include <unordered_map>
+
+#include "base/lnav_log.hh"
+#include "config.h"
+#include "pugixml/pugixml.hpp"
+#include "sql_help.hh"
+#include "sql_util.hh"
+#include "vtab_module.hh"
+#include "xml_util.hh"
+#include "yajlpp/yajlpp.hh"
+
+enum {
+ XP_COL_RESULT,
+ XP_COL_NODE_PATH,
+ XP_COL_NODE_ATTR,
+ XP_COL_NODE_TEXT,
+ XP_COL_XPATH,
+ XP_COL_VALUE,
+};
+
+static thread_local std::unordered_map<std::string, pugi::xpath_query>
+ QUERY_CACHE;
+
+static pugi::xpath_query
+checkout_query(const std::string& query)
+{
+ auto iter = QUERY_CACHE.find(query);
+ if (iter == QUERY_CACHE.end()) {
+ auto xquery = pugi::xpath_query(query.c_str());
+
+ if (!xquery) {
+ return xquery;
+ }
+
+ auto pair = QUERY_CACHE.emplace(query, std::move(xquery));
+
+ iter = pair.first;
+ }
+
+ auto retval = std::move(iter->second);
+
+ QUERY_CACHE.erase(iter);
+
+ return retval;
+}
+
+static void
+checkin_query(const std::string& query_str, pugi::xpath_query query)
+{
+ if (!query) {
+ return;
+ }
+
+ QUERY_CACHE[query_str] = std::move(query);
+}
+
+struct xpath_vtab {
+ static constexpr const char* NAME = "xpath";
+ static constexpr const char* CREATE_STMT = R"(
+-- The xpath() table-valued function allows you to execute an xpath expression
+CREATE TABLE xpath (
+ result text, -- The result of the xpath expression
+ node_path text, -- The absolute path to the node selected by the expression
+ node_attr text, -- The node attributes stored in a JSON object
+ node_text text, -- The text portion of the node selected by the expression
+
+ xpath text HIDDEN,
+ value text HIDDEN
+);
+)";
+
+ struct cursor {
+ sqlite3_vtab_cursor base;
+ sqlite3_int64 c_rowid{0};
+ std::string c_xpath;
+ std::string c_value;
+ bool c_value_as_blob{false};
+ pugi::xpath_query c_query;
+ pugi::xml_document c_doc;
+ pugi::xpath_node_set c_results;
+
+ cursor(sqlite3_vtab* vt) : base({vt}) {}
+
+ ~cursor() { this->reset(); }
+
+ int reset()
+ {
+ this->c_rowid = 0;
+ checkin_query(this->c_xpath, std::move(this->c_query));
+
+ return SQLITE_OK;
+ }
+
+ int next()
+ {
+ this->c_rowid += 1;
+
+ return SQLITE_OK;
+ }
+
+ int eof() { return this->c_rowid >= (int64_t) this->c_results.size(); }
+
+ int get_rowid(sqlite3_int64& rowid_out)
+ {
+ rowid_out = this->c_rowid;
+
+ return SQLITE_OK;
+ }
+ };
+
+ int get_column(const cursor& vc, sqlite3_context* ctx, int col)
+ {
+ switch (col) {
+ case XP_COL_RESULT: {
+ auto& xpath_node = vc.c_results[vc.c_rowid];
+
+ if (xpath_node.node()) {
+ std::ostringstream oss;
+
+ // XXX avoid the extra allocs
+ xpath_node.node().print(oss);
+ auto node_xml = oss.str();
+ sqlite3_result_text(ctx,
+ node_xml.c_str(),
+ node_xml.length(),
+ SQLITE_TRANSIENT);
+ } else if (xpath_node.attribute()) {
+ sqlite3_result_text(ctx,
+ xpath_node.attribute().value(),
+ -1,
+ SQLITE_TRANSIENT);
+ } else {
+ sqlite3_result_null(ctx);
+ }
+ break;
+ }
+ case XP_COL_NODE_PATH: {
+ auto& xpath_node = vc.c_results[vc.c_rowid];
+ auto x_node = xpath_node.node();
+ auto x_attr = xpath_node.attribute();
+
+ if (x_node || x_attr) {
+ if (!x_node) {
+ x_node = xpath_node.parent();
+ }
+
+ auto node_path = lnav::pugixml::get_actual_path(x_node);
+ if (x_attr) {
+ node_path += "/@" + std::string(x_attr.name());
+ }
+ sqlite3_result_text(ctx,
+ node_path.c_str(),
+ node_path.length(),
+ SQLITE_TRANSIENT);
+ } else {
+ sqlite3_result_null(ctx);
+ }
+ break;
+ }
+ case XP_COL_NODE_ATTR: {
+ auto& xpath_node = vc.c_results[vc.c_rowid];
+ auto x_node = xpath_node.node();
+ auto x_attr = xpath_node.attribute();
+
+ if (x_node || x_attr) {
+ if (!x_node) {
+ x_node = xpath_node.parent();
+ }
+
+ yajlpp_gen gen;
+
+ yajl_gen_config(gen, yajl_gen_beautify, false);
+
+ {
+ yajlpp_map attrs(gen);
+
+ for (const auto& attr : x_node.attributes()) {
+ attrs.gen(attr.name());
+ attrs.gen(attr.value());
+ }
+ }
+
+ auto sf = gen.to_string_fragment();
+
+ sqlite3_result_text(
+ ctx, sf.data(), sf.length(), SQLITE_TRANSIENT);
+ sqlite3_result_subtype(ctx, 'J');
+ } else {
+ sqlite3_result_null(ctx);
+ }
+ break;
+ }
+ case XP_COL_NODE_TEXT: {
+ auto& xpath_node = vc.c_results[vc.c_rowid];
+ auto x_node = xpath_node.node();
+ auto x_attr = xpath_node.attribute();
+
+ if (x_node || x_attr) {
+ if (!x_node) {
+ x_node = xpath_node.parent();
+ }
+
+ auto node_text = x_node.text();
+
+ sqlite3_result_text(
+ ctx, node_text.get(), -1, SQLITE_TRANSIENT);
+ } else {
+ sqlite3_result_null(ctx);
+ }
+ break;
+ }
+ case XP_COL_XPATH:
+ sqlite3_result_text(ctx,
+ vc.c_xpath.c_str(),
+ vc.c_xpath.length(),
+ SQLITE_STATIC);
+ break;
+ case XP_COL_VALUE:
+ if (vc.c_value_as_blob) {
+ sqlite3_result_blob64(ctx,
+ vc.c_value.c_str(),
+ vc.c_value.length(),
+ SQLITE_STATIC);
+ } else {
+ sqlite3_result_text(ctx,
+ vc.c_value.c_str(),
+ vc.c_value.length(),
+ SQLITE_STATIC);
+ }
+ break;
+ }
+
+ return SQLITE_OK;
+ }
+};
+
+static int
+rcBestIndex(sqlite3_vtab* tab, sqlite3_index_info* pIdxInfo)
+{
+ vtab_index_constraints vic(pIdxInfo);
+ vtab_index_usage viu(pIdxInfo);
+
+ for (auto iter = vic.begin(); iter != vic.end(); ++iter) {
+ if (iter->op != SQLITE_INDEX_CONSTRAINT_EQ) {
+ continue;
+ }
+
+ switch (iter->iColumn) {
+ case XP_COL_VALUE:
+ case XP_COL_XPATH:
+ viu.column_used(iter);
+ break;
+ }
+ }
+
+ viu.allocate_args(XP_COL_XPATH, XP_COL_VALUE, 2);
+ return SQLITE_OK;
+}
+
+static int
+rcFilter(sqlite3_vtab_cursor* pVtabCursor,
+ int idxNum,
+ const char* idxStr,
+ int argc,
+ sqlite3_value** argv)
+{
+ auto* pCur = (xpath_vtab::cursor*) pVtabCursor;
+
+ if (argc != 2) {
+ pCur->c_xpath.clear();
+ pCur->c_value.clear();
+ return SQLITE_OK;
+ }
+
+ pCur->c_value_as_blob = (sqlite3_value_type(argv[1]) == SQLITE_BLOB);
+ auto byte_count = sqlite3_value_bytes(argv[1]);
+
+ if (byte_count == 0) {
+ pCur->c_rowid = 0;
+ return SQLITE_OK;
+ }
+
+ auto blob = (const char*) sqlite3_value_blob(argv[1]);
+ pCur->c_value.assign(blob, byte_count);
+ auto parse_res = pCur->c_doc.load_string(pCur->c_value.c_str());
+ if (!parse_res) {
+ pVtabCursor->pVtab->zErrMsg
+ = sqlite3_mprintf("Invalid XML document at offset %d: %s",
+ parse_res.offset,
+ parse_res.description());
+ return SQLITE_ERROR;
+ }
+
+ pCur->c_xpath = (const char*) sqlite3_value_text(argv[0]);
+ pCur->c_query = checkout_query(pCur->c_xpath);
+ if (!pCur->c_query) {
+ auto& res = pCur->c_query.result();
+ pVtabCursor->pVtab->zErrMsg
+ = sqlite3_mprintf("Invalid XPATH expression at offset %d: %s",
+ res.offset,
+ res.description());
+ return SQLITE_ERROR;
+ }
+
+ pCur->c_rowid = 0;
+ pCur->c_results = pCur->c_doc.select_nodes(pCur->c_query);
+
+ return SQLITE_OK;
+}
+
+int
+register_xpath_vtab(sqlite3* db)
+{
+ static vtab_module<tvt_no_update<xpath_vtab>> XPATH_MODULE;
+ static help_text xpath_help
+ = help_text("xpath",
+ "A table-valued function that executes an xpath expression "
+ "over an XML "
+ "string and returns the selected values.")
+ .sql_table_valued_function()
+ .with_parameter({
+ "xpath",
+ "The XPATH expression to evaluate over the XML document.",
+ })
+ .with_parameter({"xmldoc", "The XML document as a string."})
+ .with_result({"result", "The result of the XPATH expression."})
+ .with_result({
+ "node_path",
+ "The absolute path to the node containing the result.",
+ })
+ .with_result(
+ {"node_attr", "The node's attributes stored in JSON object."})
+ .with_result({"node_text", "The node's text value."})
+ .with_tags({"string", "xml"})
+ .with_example({
+ "To select the XML nodes on the path '/abc/def'",
+ "SELECT * FROM xpath('/abc/def', '<abc><def "
+ "a=\"b\">Hello</def><def>Bye</def></abc>')",
+ })
+ .with_example({
+ "To select all 'a' attributes on the path '/abc/def'",
+ "SELECT * FROM xpath('/abc/def/@a', '<abc><def "
+ "a=\"b\">Hello</def><def>Bye</def></abc>')",
+ })
+ .with_example({
+ "To select the text nodes on the path '/abc/def'",
+ "SELECT * FROM xpath('/abc/def/text()', '<abc><def "
+ "a=\"b\">Hello &#x2605;</def></abc>')",
+ });
+
+ int rc;
+
+ XPATH_MODULE.vm_module.xBestIndex = rcBestIndex;
+ XPATH_MODULE.vm_module.xFilter = rcFilter;
+
+ rc = XPATH_MODULE.create(db, "xpath");
+ sqlite_function_help.insert(std::make_pair("xpath", &xpath_help));
+ xpath_help.index_tags();
+
+ ensure(rc == SQLITE_OK);
+
+ return rc;
+}