/** * Copyright (c) 2020, Timothy Stack * * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * * Redistributions of source code must retain the above copyright notice, this * list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * Neither the name of Timothy Stack nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ''AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include "base/lnav_log.hh" #include "config.h" #include "pugixml/pugixml.hpp" #include "sql_help.hh" #include "vtab_module.hh" #include "xml_util.hh" #include "yajlpp/yajlpp.hh" enum { XP_COL_RESULT, XP_COL_NODE_PATH, XP_COL_NODE_ATTR, XP_COL_NODE_TEXT, XP_COL_XPATH, XP_COL_VALUE, }; static thread_local std::unordered_map QUERY_CACHE; static pugi::xpath_query checkout_query(const std::string& query) { auto iter = QUERY_CACHE.find(query); if (iter == QUERY_CACHE.end()) { auto xquery = pugi::xpath_query(query.c_str()); if (!xquery) { return xquery; } auto pair = QUERY_CACHE.emplace(query, std::move(xquery)); iter = pair.first; } auto retval = std::move(iter->second); QUERY_CACHE.erase(iter); return retval; } static void checkin_query(const std::string& query_str, pugi::xpath_query query) { if (!query) { return; } QUERY_CACHE[query_str] = std::move(query); } struct xpath_vtab { static constexpr const char* NAME = "xpath"; static constexpr const char* CREATE_STMT = R"( -- The xpath() table-valued function allows you to execute an xpath expression CREATE TABLE xpath ( result text, -- The result of the xpath expression node_path text, -- The absolute path to the node selected by the expression node_attr text, -- The node attributes stored in a JSON object node_text text, -- The text portion of the node selected by the expression xpath text HIDDEN, value text HIDDEN ); )"; struct cursor { sqlite3_vtab_cursor base; sqlite3_int64 c_rowid{0}; std::string c_xpath; std::string c_value; bool c_value_as_blob{false}; pugi::xpath_query c_query; pugi::xml_document c_doc; pugi::xpath_node_set c_results; cursor(sqlite3_vtab* vt) : base({vt}) {} ~cursor() { this->reset(); } int reset() { this->c_rowid = 0; checkin_query(this->c_xpath, std::move(this->c_query)); return SQLITE_OK; } int next() { this->c_rowid += 1; return SQLITE_OK; } int eof() { return this->c_rowid >= (int64_t) this->c_results.size(); } int get_rowid(sqlite3_int64& rowid_out) { rowid_out = this->c_rowid; return SQLITE_OK; } }; int get_column(const cursor& vc, sqlite3_context* ctx, int col) { switch (col) { case XP_COL_RESULT: { const auto& xpath_node = vc.c_results[vc.c_rowid]; if (xpath_node.node()) { std::ostringstream oss; // XXX avoid the extra allocs xpath_node.node().print(oss); auto node_xml = oss.str(); sqlite3_result_text(ctx, node_xml.c_str(), node_xml.length(), SQLITE_TRANSIENT); } else if (xpath_node.attribute()) { sqlite3_result_text(ctx, xpath_node.attribute().value(), -1, SQLITE_TRANSIENT); } else { sqlite3_result_null(ctx); } break; } case XP_COL_NODE_PATH: { auto& xpath_node = vc.c_results[vc.c_rowid]; auto x_node = xpath_node.node(); auto x_attr = xpath_node.attribute(); if (x_node || x_attr) { if (!x_node) { x_node = xpath_node.parent(); } auto node_path = lnav::pugixml::get_actual_path(x_node); if (x_attr) { node_path += "/@" + std::string(x_attr.name()); } sqlite3_result_text(ctx, node_path.c_str(), node_path.length(), SQLITE_TRANSIENT); } else { sqlite3_result_null(ctx); } break; } case XP_COL_NODE_ATTR: { auto& xpath_node = vc.c_results[vc.c_rowid]; auto x_node = xpath_node.node(); auto x_attr = xpath_node.attribute(); if (x_node || x_attr) { if (!x_node) { x_node = xpath_node.parent(); } yajlpp_gen gen; yajl_gen_config(gen, yajl_gen_beautify, false); { yajlpp_map attrs(gen); for (const auto& attr : x_node.attributes()) { attrs.gen(attr.name()); attrs.gen(attr.value()); } } auto sf = gen.to_string_fragment(); sqlite3_result_text( ctx, sf.data(), sf.length(), SQLITE_TRANSIENT); sqlite3_result_subtype(ctx, 'J'); } else { sqlite3_result_null(ctx); } break; } case XP_COL_NODE_TEXT: { auto& xpath_node = vc.c_results[vc.c_rowid]; auto x_node = xpath_node.node(); auto x_attr = xpath_node.attribute(); if (x_node || x_attr) { if (!x_node) { x_node = xpath_node.parent(); } auto node_text = x_node.text(); sqlite3_result_text( ctx, node_text.get(), -1, SQLITE_TRANSIENT); } else { sqlite3_result_null(ctx); } break; } case XP_COL_XPATH: sqlite3_result_text(ctx, vc.c_xpath.c_str(), vc.c_xpath.length(), SQLITE_STATIC); break; case XP_COL_VALUE: if (vc.c_value_as_blob) { sqlite3_result_blob64(ctx, vc.c_value.c_str(), vc.c_value.length(), SQLITE_STATIC); } else { sqlite3_result_text(ctx, vc.c_value.c_str(), vc.c_value.length(), SQLITE_STATIC); } break; } return SQLITE_OK; } }; static int rcBestIndex(sqlite3_vtab* tab, sqlite3_index_info* pIdxInfo) { vtab_index_constraints vic(pIdxInfo); vtab_index_usage viu(pIdxInfo); for (auto iter = vic.begin(); iter != vic.end(); ++iter) { if (iter->op != SQLITE_INDEX_CONSTRAINT_EQ) { continue; } switch (iter->iColumn) { case XP_COL_VALUE: case XP_COL_XPATH: viu.column_used(iter); break; } } viu.allocate_args(XP_COL_XPATH, XP_COL_VALUE, 2); return SQLITE_OK; } static int rcFilter(sqlite3_vtab_cursor* pVtabCursor, int idxNum, const char* idxStr, int argc, sqlite3_value** argv) { auto* pCur = (xpath_vtab::cursor*) pVtabCursor; if (argc != 2) { pCur->c_xpath.clear(); pCur->c_value.clear(); return SQLITE_OK; } pCur->c_value_as_blob = (sqlite3_value_type(argv[1]) == SQLITE_BLOB); auto byte_count = sqlite3_value_bytes(argv[1]); if (byte_count == 0) { pCur->c_rowid = 0; return SQLITE_OK; } auto blob = (const char*) sqlite3_value_blob(argv[1]); pCur->c_value.assign(blob, byte_count); auto parse_res = pCur->c_doc.load_string(pCur->c_value.c_str()); if (!parse_res) { static const intern_string_t ARG1 = intern_string::lookup("xmldoc"); auto attr_xmldoc = attr_line_t(pCur->c_value) .with_attr_for_all(VC_ROLE.value(role_t::VCR_QUOTED_CODE)); auto um = lnav::console::user_message::error("Invalid XML document") .with_reason(parse_res.description()) .with_snippet( lnav::console::snippet::from_content_with_offset( ARG1, attr_xmldoc, parse_res.offset, parse_res.description())); set_vtable_errmsg(pVtabCursor->pVtab, um); return SQLITE_ERROR; } pCur->c_xpath = (const char*) sqlite3_value_text(argv[0]); pCur->c_query = checkout_query(pCur->c_xpath); if (!pCur->c_query) { static const intern_string_t ARG0 = intern_string::lookup("xpath"); const auto& res = pCur->c_query.result(); auto attr_xpath = attr_line_t(pCur->c_xpath) .with_attr_for_all(VC_ROLE.value(role_t::VCR_QUOTED_CODE)); auto um = lnav::console::user_message::error("Invalid XPath expression") .with_reason(res.description()) .with_snippet( lnav::console::snippet::from_content_with_offset( ARG0, attr_xpath, res.offset, res.description())); set_vtable_errmsg(pVtabCursor->pVtab, um); return SQLITE_ERROR; } pCur->c_rowid = 0; pCur->c_results = pCur->c_doc.select_nodes(pCur->c_query); return SQLITE_OK; } int register_xpath_vtab(sqlite3* db) { static vtab_module> XPATH_MODULE; static help_text xpath_help = help_text("xpath", "A table-valued function that executes an xpath expression " "over an XML " "string and returns the selected values.") .sql_table_valued_function() .with_parameter({ "xpath", "The XPATH expression to evaluate over the XML document.", }) .with_parameter({"xmldoc", "The XML document as a string."}) .with_result({"result", "The result of the XPATH expression."}) .with_result({ "node_path", "The absolute path to the node containing the result.", }) .with_result( {"node_attr", "The node's attributes stored in JSON object."}) .with_result({"node_text", "The node's text value."}) .with_tags({"string", "xml"}) .with_example({ "To select the XML nodes on the path '/abc/def'", "SELECT * FROM xpath('/abc/def', 'HelloBye')", }) .with_example({ "To select all 'a' attributes on the path '/abc/def'", "SELECT * FROM xpath('/abc/def/@a', 'HelloBye')", }) .with_example({ "To select the text nodes on the path '/abc/def'", "SELECT * FROM xpath('/abc/def/text()', 'Hello ★')", }); int rc; XPATH_MODULE.vm_module.xBestIndex = rcBestIndex; XPATH_MODULE.vm_module.xFilter = rcFilter; rc = XPATH_MODULE.create(db, "xpath"); sqlite_function_help.insert(std::make_pair("xpath", &xpath_help)); xpath_help.index_tags(); ensure(rc == SQLITE_OK); return rc; }