diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-04 17:44:55 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-04 17:44:55 +0000 |
commit | 5068d34c08f951a7ea6257d305a1627b09a95817 (patch) | |
tree | 08213e2be853396a3b07ce15dbe222644dcd9a89 /src/string-extension-functions.cc | |
parent | Initial commit. (diff) | |
download | lnav-5068d34c08f951a7ea6257d305a1627b09a95817.tar.xz lnav-5068d34c08f951a7ea6257d305a1627b09a95817.zip |
Adding upstream version 0.11.1.upstream/0.11.1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
-rw-r--r-- | src/string-extension-functions.cc | 1251 |
1 files changed, 1251 insertions, 0 deletions
diff --git a/src/string-extension-functions.cc b/src/string-extension-functions.cc new file mode 100644 index 0000000..e20840e --- /dev/null +++ b/src/string-extension-functions.cc @@ -0,0 +1,1251 @@ +/* + * Written by Alexey Tourbin <at@altlinux.org>. + * + * The author has dedicated the code to the public domain. Anyone is free + * to copy, modify, publish, use, compile, sell, or distribute the original + * code, either in source code form or as a compiled binary, for any purpose, + * commercial or non-commercial, and by any means. + */ + +#ifdef __CYGWIN__ +# include <alloca.h> +#endif + +#include <unordered_map> + +#include <sqlite3.h> +#include <stdlib.h> +#include <string.h> + +#include "base/humanize.hh" +#include "base/lnav.gzip.hh" +#include "base/string_util.hh" +#include "column_namer.hh" +#include "config.h" +#include "data_parser.hh" +#include "data_scanner.hh" +#include "elem_to_json.hh" +#include "formats/logfmt/logfmt.parser.hh" +#include "libbase64.h" +#include "mapbox/variant.hpp" +#include "optional.hpp" +#include "pcrepp/pcre2pp.hh" +#include "safe/safe.h" +#include "scn/scn.h" +#include "spookyhash/SpookyV2.h" +#include "sqlite-extension-func.hh" +#include "text_anonymizer.hh" +#include "vtab_module.hh" +#include "vtab_module_json.hh" +#include "yajl/api/yajl_gen.h" +#include "yajlpp/json_op.hh" +#include "yajlpp/yajlpp.hh" +#include "yajlpp/yajlpp_def.hh" + +#if defined(HAVE_LIBCURL) +# include <curl/curl.h> +#endif + +using namespace mapbox; + +struct cache_entry { + std::shared_ptr<lnav::pcre2pp::code> re2; + std::shared_ptr<column_namer> cn{ + std::make_shared<column_namer>(column_namer::language::JSON)}; +}; + +static cache_entry* +find_re(string_fragment re) +{ + using re_cache_t + = std::unordered_map<string_fragment, cache_entry, frag_hasher>; + static thread_local re_cache_t cache; + + auto iter = cache.find(re); + if (iter == cache.end()) { + auto compile_res = lnav::pcre2pp::code::from(re); + if (compile_res.isErr()) { + const static intern_string_t SRC = intern_string::lookup("arg"); + + throw lnav::console::to_user_message(SRC, compile_res.unwrapErr()); + } + + cache_entry c; + + c.re2 = compile_res.unwrap().to_shared(); + auto pair = cache.insert( + std::make_pair(string_fragment::from_str(c.re2->get_pattern()), c)); + + for (int lpc = 0; lpc < c.re2->get_capture_count(); lpc++) { + c.cn->add_column(string_fragment::from_c_str( + c.re2->get_name_for_capture(lpc + 1))); + } + + iter = pair.first; + } + + return &iter->second; +} + +static bool +regexp(string_fragment re, string_fragment str) +{ + auto* reobj = find_re(re); + + return reobj->re2->find_in(str).ignore_error().has_value(); +} + +static util::variant<int64_t, double, const char*, string_fragment, json_string> +regexp_match(string_fragment re, string_fragment str) +{ + auto* reobj = find_re(re); + auto& extractor = *reobj->re2; + + if (extractor.get_capture_count() == 0) { + throw std::runtime_error( + "regular expression does not have any captures"); + } + + auto md = extractor.create_match_data(); + auto match_res = extractor.capture_from(str).into(md).matches(); + if (match_res.is<lnav::pcre2pp::matcher::not_found>()) { + return static_cast<const char*>(nullptr); + } + if (match_res.is<lnav::pcre2pp::matcher::error>()) { + auto err = match_res.get<lnav::pcre2pp::matcher::error>(); + + throw std::runtime_error(err.get_message()); + } + + yajlpp_gen gen; + yajl_gen_config(gen, yajl_gen_beautify, false); + + if (extractor.get_capture_count() == 1) { + auto cap = md[1]; + + if (!cap) { + return static_cast<const char*>(nullptr); + } + + auto scan_int_res = scn::scan_value<int64_t>(cap->to_string_view()); + if (scan_int_res && scan_int_res.empty()) { + return scan_int_res.value(); + } + + auto scan_float_res = scn::scan_value<double>(cap->to_string_view()); + if (scan_float_res && scan_float_res.empty()) { + return scan_float_res.value(); + } + + return cap.value(); + } else { + yajlpp_map root_map(gen); + + for (int lpc = 0; lpc < extractor.get_capture_count(); lpc++) { + const auto& colname = reobj->cn->cn_names[lpc]; + const auto cap = md[lpc + 1]; + + yajl_gen_pstring(gen, colname.data(), colname.length()); + + if (!cap) { + yajl_gen_null(gen); + } else { + auto scan_int_res + = scn::scan_value<int64_t>(cap->to_string_view()); + if (scan_int_res && scan_int_res.empty()) { + yajl_gen_integer(gen, scan_int_res.value()); + } else { + auto scan_float_res + = scn::scan_value<double>(cap->to_string_view()); + if (scan_float_res && scan_float_res.empty()) { + yajl_gen_number(gen, cap->data(), cap->length()); + } else { + yajl_gen_pstring(gen, cap->data(), cap->length()); + } + } + } + } + } + + return json_string(gen); +#if 0 + sqlite3_result_text(ctx, (const char *) buf, len, SQLITE_TRANSIENT); +# ifdef HAVE_SQLITE3_VALUE_SUBTYPE + sqlite3_result_subtype(ctx, JSON_SUBTYPE); +# endif +#endif +} + +json_string +extract(const char* str) +{ + data_scanner ds(str); + data_parser dp(&ds); + + dp.parse(); + // dp.print(stderr, dp.dp_pairs); + + yajlpp_gen gen; + yajl_gen_config(gen, yajl_gen_beautify, false); + + elements_to_json(gen, dp, &dp.dp_pairs); + + return json_string(gen); +} + +json_string +logfmt2json(string_fragment line) +{ + logfmt::parser p(line); + yajlpp_gen gen; + yajl_gen_config(gen, yajl_gen_beautify, false); + + { + yajlpp_map root(gen); + bool done = false; + + while (!done) { + auto pair = p.step(); + + done = pair.match( + [](const logfmt::parser::end_of_input& eoi) { return true; }, + [&root, &gen](const logfmt::parser::kvpair& kvp) { + root.gen(kvp.first); + + kvp.second.match( + [&root](const logfmt::parser::bool_value& bv) { + root.gen(bv.bv_value); + }, + [&root](const logfmt::parser::int_value& iv) { + root.gen(iv.iv_value); + }, + [&root](const logfmt::parser::float_value& fv) { + root.gen(fv.fv_value); + }, + [&root, &gen](const logfmt::parser::quoted_value& qv) { + auto_mem<yajl_handle_t> parse_handle(yajl_free); + json_ptr jp(""); + json_op jo(jp); + + jo.jo_ptr_callbacks = json_op::gen_callbacks; + jo.jo_ptr_data = gen; + parse_handle.reset(yajl_alloc( + &json_op::ptr_callbacks, nullptr, &jo)); + + const auto* json_in + = (const unsigned char*) qv.qv_value.data(); + auto json_len = qv.qv_value.length(); + + if (yajl_parse(parse_handle.in(), json_in, json_len) + != yajl_status_ok + || yajl_complete_parse(parse_handle.in()) + != yajl_status_ok) + { + root.gen(qv.qv_value); + } + }, + [&root](const logfmt::parser::unquoted_value& uv) { + root.gen(uv.uv_value); + }); + + return false; + }, + [](const logfmt::parser::error& e) -> bool { + throw sqlite_func_error("Invalid logfmt: {}", e.e_msg); + }); + } + } + + return json_string(gen); +} + +static std::string +regexp_replace(string_fragment str, string_fragment re, const char* repl) +{ + auto* reobj = find_re(re); + + return reobj->re2->replace(str, repl); +} + +static std::string +spooky_hash(const std::vector<const char*>& args) +{ + byte_array<2, uint64> hash; + SpookyHash context; + + context.Init(0, 0); + for (const auto* const arg : args) { + int64_t len = arg != nullptr ? strlen(arg) : 0; + + context.Update(&len, sizeof(len)); + if (arg == nullptr) { + continue; + } + context.Update(arg, len); + } + context.Final(hash.out(0), hash.out(1)); + + return hash.to_string(); +} + +static void +sql_spooky_hash_step(sqlite3_context* context, int argc, sqlite3_value** argv) +{ + auto* hasher + = (SpookyHash*) sqlite3_aggregate_context(context, sizeof(SpookyHash)); + + for (int lpc = 0; lpc < argc; lpc++) { + const auto* value = sqlite3_value_text(argv[lpc]); + int64_t len = value != nullptr ? strlen((const char*) value) : 0; + + hasher->Update(&len, sizeof(len)); + if (value == nullptr) { + continue; + } + hasher->Update(value, len); + } +} + +static void +sql_spooky_hash_final(sqlite3_context* context) +{ + auto* hasher + = (SpookyHash*) sqlite3_aggregate_context(context, sizeof(SpookyHash)); + + if (hasher == nullptr) { + sqlite3_result_null(context); + } else { + byte_array<2, uint64> hash; + + hasher->Final(hash.out(0), hash.out(1)); + + auto hex = hash.to_string(); + sqlite3_result_text( + context, hex.c_str(), hex.length(), SQLITE_TRANSIENT); + } +} + +struct sparkline_context { + bool sc_initialized{true}; + double sc_max_value{0.0}; + std::vector<double> sc_values; +}; + +static void +sparkline_step(sqlite3_context* context, int argc, sqlite3_value** argv) +{ + auto* sc = (sparkline_context*) sqlite3_aggregate_context( + context, sizeof(sparkline_context)); + + if (!sc->sc_initialized) { + new (sc) sparkline_context; + } + + if (argc == 0) { + return; + } + + sc->sc_values.push_back(sqlite3_value_double(argv[0])); + sc->sc_max_value = std::max(sc->sc_max_value, sc->sc_values.back()); + + if (argc >= 2) { + sc->sc_max_value + = std::max(sc->sc_max_value, sqlite3_value_double(argv[1])); + } +} + +static void +sparkline_final(sqlite3_context* context) +{ + auto* sc = (sparkline_context*) sqlite3_aggregate_context( + context, sizeof(sparkline_context)); + + if (!sc->sc_initialized) { + sqlite3_result_text(context, "", 0, SQLITE_STATIC); + return; + } + + auto* retval = (char*) malloc(sc->sc_values.size() * 3 + 1); + auto* start = retval; + + for (const auto& value : sc->sc_values) { + auto bar = humanize::sparkline(value, sc->sc_max_value); + + strcpy(start, bar.c_str()); + start += bar.length(); + } + *start = '\0'; + + sqlite3_result_text(context, retval, -1, free); + + sc->~sparkline_context(); +} + +nonstd::optional<util::variant<blob_auto_buffer, sqlite3_int64, double>> +sql_gunzip(sqlite3_value* val) +{ + switch (sqlite3_value_type(val)) { + case SQLITE3_TEXT: + case SQLITE_BLOB: { + const auto* buffer = sqlite3_value_blob(val); + auto len = sqlite3_value_bytes(val); + + if (!lnav::gzip::is_gzipped((const char*) buffer, len)) { + return blob_auto_buffer{ + auto_buffer::from((const char*) buffer, len)}; + } + + auto res = lnav::gzip::uncompress("", buffer, len); + + if (res.isErr()) { + throw sqlite_func_error("unable to uncompress -- {}", + res.unwrapErr()); + } + + return blob_auto_buffer{res.unwrap()}; + } + case SQLITE_INTEGER: + return sqlite3_value_int64(val); + case SQLITE_FLOAT: + return sqlite3_value_double(val); + } + + return nonstd::nullopt; +} + +nonstd::optional<blob_auto_buffer> +sql_gzip(sqlite3_value* val) +{ + switch (sqlite3_value_type(val)) { + case SQLITE3_TEXT: + case SQLITE_BLOB: { + const auto* buffer = sqlite3_value_blob(val); + auto len = sqlite3_value_bytes(val); + auto res = lnav::gzip::compress(buffer, len); + + if (res.isErr()) { + throw sqlite_func_error("unable to compress -- {}", + res.unwrapErr()); + } + + return blob_auto_buffer{res.unwrap()}; + } + case SQLITE_INTEGER: + case SQLITE_FLOAT: { + const auto* buffer = sqlite3_value_text(val); + auto res + = lnav::gzip::compress(buffer, strlen((const char*) buffer)); + + if (res.isErr()) { + throw sqlite_func_error("unable to compress -- {}", + res.unwrapErr()); + } + + return blob_auto_buffer{res.unwrap()}; + } + } + + return nonstd::nullopt; +} + +enum class encode_algo { + base64, + hex, + uri, +}; + +template<> +struct from_sqlite<encode_algo> { + inline encode_algo operator()(int argc, sqlite3_value** val, int argi) + { + const char* algo_name = (const char*) sqlite3_value_text(val[argi]); + + if (strcasecmp(algo_name, "base64") == 0) { + return encode_algo::base64; + } + if (strcasecmp(algo_name, "hex") == 0) { + return encode_algo::hex; + } + if (strcasecmp(algo_name, "uri") == 0) { + return encode_algo::uri; + } + + throw from_sqlite_conversion_error("value of 'base64', 'hex', or 'uri'", + argi); + } +}; + +#if defined(HAVE_LIBCURL) +static CURL* +get_curl_easy() +{ + static struct curl_wrapper { + curl_wrapper() { this->cw_value = curl_easy_init(); } + + auto_mem<CURL> cw_value{curl_easy_cleanup}; + } retval; + + return retval.cw_value.in(); +} +#endif + +static mapbox::util::variant<text_auto_buffer, auto_mem<char>, null_value_t> +sql_encode(sqlite3_value* value, encode_algo algo) +{ + switch (sqlite3_value_type(value)) { + case SQLITE_NULL: { + return null_value_t{}; + } + case SQLITE_BLOB: { + const auto* blob + = static_cast<const char*>(sqlite3_value_blob(value)); + auto blob_len = sqlite3_value_bytes(value); + + switch (algo) { + case encode_algo::base64: { + auto buf = auto_buffer::alloc((blob_len * 5) / 3); + auto outlen = buf.capacity(); + + base64_encode(blob, blob_len, buf.in(), &outlen, 0); + buf.resize(outlen); + return text_auto_buffer{std::move(buf)}; + } + case encode_algo::hex: { + auto buf = auto_buffer::alloc(blob_len * 2 + 1); + + for (int lpc = 0; lpc < blob_len; lpc++) { + fmt::format_to(std::back_inserter(buf), + FMT_STRING("{:x}"), + blob[lpc]); + } + + return text_auto_buffer{std::move(buf)}; + } +#if defined(HAVE_LIBCURL) + case encode_algo::uri: { + auto_mem<char> retval(curl_free); + + retval = curl_easy_escape(get_curl_easy(), blob, blob_len); + return std::move(retval); + } +#endif + } + } + default: { + const auto* text = (const char*) sqlite3_value_text(value); + auto text_len = sqlite3_value_bytes(value); + + switch (algo) { + case encode_algo::base64: { + auto buf = auto_buffer::alloc((text_len * 5) / 3); + size_t outlen = buf.capacity(); + + base64_encode(text, text_len, buf.in(), &outlen, 0); + buf.resize(outlen); + return text_auto_buffer{std::move(buf)}; + } + case encode_algo::hex: { + auto buf = auto_buffer::alloc(text_len * 2 + 1); + + for (int lpc = 0; lpc < text_len; lpc++) { + fmt::format_to(std::back_inserter(buf), + FMT_STRING("{:x}"), + text[lpc]); + } + + return text_auto_buffer{std::move(buf)}; + } +#if defined(HAVE_LIBCURL) + case encode_algo::uri: { + auto_mem<char> retval(curl_free); + + retval = curl_easy_escape(get_curl_easy(), text, text_len); + return std::move(retval); + } +#endif + } + } + } + ensure(false); +} + +static mapbox::util::variant<blob_auto_buffer, auto_mem<char>> +sql_decode(string_fragment str, encode_algo algo) +{ + switch (algo) { + case encode_algo::base64: { + auto buf = auto_buffer::alloc(str.length()); + auto outlen = buf.capacity(); + base64_decode(str.data(), str.length(), buf.in(), &outlen, 0); + buf.resize(outlen); + + return blob_auto_buffer{std::move(buf)}; + } + case encode_algo::hex: { + auto buf = auto_buffer::alloc(str.length() / 2); + auto sv = str.to_string_view(); + + while (!sv.empty()) { + int32_t value; + auto scan_res = scn::scan(sv, "{:2x}", value); + if (!scan_res) { + throw sqlite_func_error( + "invalid hex input at: {}", + std::distance(str.begin(), sv.begin())); + } + buf.push_back((char) (value & 0xff)); + sv = scan_res.range_as_string_view(); + } + + return blob_auto_buffer{std::move(buf)}; + } +#if defined(HAVE_LIBCURL) + case encode_algo::uri: { + auto_mem<char> retval(curl_free); + + retval = curl_easy_unescape( + get_curl_easy(), str.data(), str.length(), nullptr); + + return std::move(retval); + } +#endif + } + ensure(false); +} + +std::string +sql_humanize_file_size(file_ssize_t value) +{ + return humanize::file_size(value, humanize::alignment::columnar); +} + +static std::string +sql_anonymize(string_fragment frag) +{ + static safe::Safe<lnav::text_anonymizer> ta; + + return ta.writeAccess()->next(frag); +} + +#if !CURL_AT_LEAST_VERSION(7, 80, 0) +extern "C" +{ +const char* curl_url_strerror(CURLUcode error); +} +#endif + +static json_string +sql_parse_url(string_fragment url_frag) +{ + static auto* CURL_HANDLE = get_curl_easy(); + + auto_mem<CURLU> cu(curl_url_cleanup); + cu = curl_url(); + + auto rc = curl_url_set(cu, CURLUPART_URL, url_frag.data(), 0); + if (rc != CURLUE_OK) { + throw lnav::console::user_message::error( + attr_line_t("invalid URL: ").append_quoted(url_frag.to_string())) + .with_reason(curl_url_strerror(rc)); + } + + auto_mem<char> url_part(curl_free); + yajlpp_gen gen; + yajl_gen_config(gen, yajl_gen_beautify, false); + + { + yajlpp_map root(gen); + + root.gen("scheme"); + rc = curl_url_get(cu, CURLUPART_SCHEME, url_part.out(), 0); + if (rc == CURLUE_OK) { + root.gen(string_fragment::from_c_str(url_part.in())); + } else { + root.gen(); + } + root.gen("user"); + rc = curl_url_get(cu, CURLUPART_USER, url_part.out(), CURLU_URLDECODE); + if (rc == CURLUE_OK) { + root.gen(string_fragment::from_c_str(url_part.in())); + } else { + root.gen(); + } + root.gen("password"); + rc = curl_url_get( + cu, CURLUPART_PASSWORD, url_part.out(), CURLU_URLDECODE); + if (rc == CURLUE_OK) { + root.gen(string_fragment::from_c_str(url_part.in())); + } else { + root.gen(); + } + root.gen("host"); + rc = curl_url_get(cu, CURLUPART_HOST, url_part.out(), CURLU_URLDECODE); + if (rc == CURLUE_OK) { + root.gen(string_fragment::from_c_str(url_part.in())); + } else { + root.gen(); + } + root.gen("port"); + rc = curl_url_get(cu, CURLUPART_PORT, url_part.out(), 0); + if (rc == CURLUE_OK) { + root.gen(string_fragment::from_c_str(url_part.in())); + } else { + root.gen(); + } + root.gen("path"); + rc = curl_url_get(cu, CURLUPART_PATH, url_part.out(), CURLU_URLDECODE); + if (rc == CURLUE_OK) { + root.gen(string_fragment::from_c_str(url_part.in())); + } else { + root.gen(); + } + rc = curl_url_get(cu, CURLUPART_QUERY, url_part.out(), 0); + if (rc == CURLUE_OK) { + root.gen("query"); + root.gen(string_fragment::from_c_str(url_part.in())); + + root.gen("parameters"); + robin_hood::unordered_set<std::string> seen_keys; + yajlpp_map query_map(gen); + + auto query_frag = string_fragment::from_c_str(url_part.in()); + auto remaining = query_frag; + + while (true) { + auto split_res + = remaining.split_when(string_fragment::tag1{'&'}); + + if (!split_res) { + break; + } + + auto_mem<char> kv_pair(curl_free); + auto kv_pair_encoded = split_res->first; + int out_len = 0; + + kv_pair = curl_easy_unescape(CURL_HANDLE, + kv_pair_encoded.data(), + kv_pair_encoded.length(), + &out_len); + auto kv_pair_frag + = string_fragment::from_bytes(kv_pair.in(), out_len); + auto eq_index_opt = kv_pair_frag.find('='); + if (eq_index_opt) { + auto key = kv_pair_frag.sub_range(0, eq_index_opt.value()); + auto val = kv_pair_frag.substr(eq_index_opt.value() + 1); + auto key_str = key.to_string(); + + if (seen_keys.count(key_str) == 0) { + seen_keys.emplace(key_str); + query_map.gen(key); + query_map.gen(val); + } + } else { + auto val_str = split_res->first.to_string(); + + if (seen_keys.count(val_str) == 0) { + seen_keys.insert(val_str); + query_map.gen(split_res->first); + query_map.gen(); + } + } + + if (split_res->second.empty()) { + break; + } + + remaining = split_res->second; + } + } else { + root.gen("query"); + root.gen(); + root.gen("parameters"); + root.gen(); + } + root.gen("fragment"); + rc = curl_url_get( + cu, CURLUPART_FRAGMENT, url_part.out(), CURLU_URLDECODE); + if (rc == CURLUE_OK) { + root.gen(string_fragment::from_c_str(url_part.in())); + } else { + root.gen(); + } + } + + return json_string(gen); +} + +struct url_parts { + nonstd::optional<std::string> up_scheme; + nonstd::optional<std::string> up_username; + nonstd::optional<std::string> up_password; + nonstd::optional<std::string> up_host; + nonstd::optional<std::string> up_port; + nonstd::optional<std::string> up_path; + nonstd::optional<std::string> up_query; + std::map<std::string, nonstd::optional<std::string>> up_parameters; + nonstd::optional<std::string> up_fragment; +}; + +static const json_path_container url_params_handlers = { + yajlpp::pattern_property_handler("(?<param>.+)") + .for_field(&url_parts::up_parameters), +}; + +static const typed_json_path_container<url_parts> url_parts_handlers = { + yajlpp::property_handler("scheme").for_field(&url_parts::up_scheme), + yajlpp::property_handler("username").for_field(&url_parts::up_username), + yajlpp::property_handler("password").for_field(&url_parts::up_password), + yajlpp::property_handler("host").for_field(&url_parts::up_host), + yajlpp::property_handler("port").for_field(&url_parts::up_port), + yajlpp::property_handler("path").for_field(&url_parts::up_path), + yajlpp::property_handler("query").for_field(&url_parts::up_query), + yajlpp::property_handler("parameters").with_children(url_params_handlers), + yajlpp::property_handler("fragment").for_field(&url_parts::up_fragment), +}; + +static auto_mem<char> +sql_unparse_url(string_fragment in) +{ + static auto* CURL_HANDLE = get_curl_easy(); + static intern_string_t SRC = intern_string::lookup("arg"); + + auto parse_res = url_parts_handlers.parser_for(SRC).of(in); + if (parse_res.isErr()) { + throw parse_res.unwrapErr(); + } + + auto up = parse_res.unwrap(); + auto_mem<CURLU> cu(curl_url_cleanup); + cu = curl_url(); + + if (up.up_scheme) { + curl_url_set( + cu, CURLUPART_SCHEME, up.up_scheme->c_str(), CURLU_URLENCODE); + } + if (up.up_username) { + curl_url_set( + cu, CURLUPART_USER, up.up_username->c_str(), CURLU_URLENCODE); + } + if (up.up_password) { + curl_url_set( + cu, CURLUPART_PASSWORD, up.up_password->c_str(), CURLU_URLENCODE); + } + if (up.up_host) { + curl_url_set(cu, CURLUPART_HOST, up.up_host->c_str(), CURLU_URLENCODE); + } + if (up.up_port) { + curl_url_set(cu, CURLUPART_PORT, up.up_port->c_str(), 0); + } + if (up.up_path) { + curl_url_set(cu, CURLUPART_PATH, up.up_path->c_str(), CURLU_URLENCODE); + } + if (up.up_query) { + curl_url_set(cu, CURLUPART_QUERY, up.up_query->c_str(), 0); + } else if (!up.up_parameters.empty()) { + for (const auto& pair : up.up_parameters) { + auto_mem<char> key(curl_free); + auto_mem<char> value(curl_free); + std::string qparam; + + key = curl_easy_escape( + CURL_HANDLE, pair.first.c_str(), pair.first.length()); + if (pair.second) { + value = curl_easy_escape( + CURL_HANDLE, pair.second->c_str(), pair.second->length()); + qparam = fmt::format(FMT_STRING("{}={}"), key.in(), value.in()); + } else { + qparam = key.in(); + } + + curl_url_set( + cu, CURLUPART_QUERY, qparam.c_str(), CURLU_APPENDQUERY); + } + } + if (up.up_fragment) { + curl_url_set( + cu, CURLUPART_FRAGMENT, up.up_fragment->c_str(), CURLU_URLENCODE); + } + + auto_mem<char> retval(curl_free); + + curl_url_get(cu, CURLUPART_URL, retval.out(), 0); + return retval; +} + +int +string_extension_functions(struct FuncDef** basic_funcs, + struct FuncDefAgg** agg_funcs) +{ + static struct FuncDef string_funcs[] = { + sqlite_func_adapter<decltype(®exp), regexp>::builder( + help_text("regexp", "Test if a string matches a regular expression") + .sql_function() + .with_parameter({"re", "The regular expression to use"}) + .with_parameter({ + "str", + "The string to test against the regular expression", + })), + + sqlite_func_adapter<decltype(®exp_match), regexp_match>::builder( + help_text("regexp_match", + "Match a string against a regular expression and return " + "the capture groups as JSON.") + .sql_function() + .with_parameter({"re", "The regular expression to use"}) + .with_parameter({ + "str", + "The string to test against the regular expression", + }) + .with_tags({"string", "regex"}) + .with_example({ + "To capture the digits from the string '123'", + "SELECT regexp_match('(\\d+)', '123')", + }) + .with_example({ + "To capture a number and word into a JSON object with the " + "properties 'col_0' and 'col_1'", + "SELECT regexp_match('(\\d+) (\\w+)', '123 four')", + }) + .with_example({ + "To capture a number and word into a JSON object with the " + "named properties 'num' and 'str'", + "SELECT regexp_match('(?<num>\\d+) (?<str>\\w+)', '123 " + "four')", + })), + + sqlite_func_adapter<decltype(®exp_replace), regexp_replace>::builder( + help_text("regexp_replace", + "Replace the parts of a string that match a regular " + "expression.") + .sql_function() + .with_parameter( + {"str", "The string to perform replacements on"}) + .with_parameter({"re", "The regular expression to match"}) + .with_parameter({ + "repl", + "The replacement string. " + "You can reference capture groups with a " + "backslash followed by the number of the " + "group, starting with 1.", + }) + .with_tags({"string", "regex"}) + .with_example({ + "To replace the word at the start of the string " + "'Hello, World!' with 'Goodbye'", + "SELECT regexp_replace('Hello, World!', " + "'^(\\w+)', 'Goodbye')", + }) + .with_example({ + "To wrap alphanumeric words with angle brackets", + "SELECT regexp_replace('123 abc', '(\\w+)', '<\\1>')", + })), + + sqlite_func_adapter<decltype(&sql_humanize_file_size), + sql_humanize_file_size>:: + builder(help_text( + "humanize_file_size", + "Format the given file size as a human-friendly string") + .sql_function() + .with_parameter({"value", "The file size to format"}) + .with_tags({"string"}) + .with_example({ + "To format an amount", + "SELECT humanize_file_size(10 * 1024 * 1024)", + })), + + sqlite_func_adapter<decltype(&humanize::sparkline), + humanize::sparkline>:: + builder( + help_text("sparkline", + "Function used to generate a sparkline bar chart. " + "The non-aggregate version converts a single numeric " + "value on a range to a bar chart character. The " + "aggregate version returns a string with a bar " + "character for every numeric input") + .sql_function() + .with_parameter({"value", "The numeric value to convert"}) + .with_parameter(help_text("upper", + "The upper bound of the numeric " + "range. The non-aggregate " + "version defaults to 100. The " + "aggregate version uses the " + "largest value in the inputs.") + .optional()) + .with_tags({"string"}) + .with_example({ + "To get the unicode block element for the " + "value 32 in the " + "range of 0-128", + "SELECT sparkline(32, 128)", + }) + .with_example({ + "To chart the values in a JSON array", + "SELECT sparkline(value) FROM json_each('[0, 1, 2, 3, " + "4, 5, 6, 7, 8]')", + })), + + sqlite_func_adapter<decltype(&sql_anonymize), sql_anonymize>::builder( + help_text("anonymize", + "Replace identifying information with random values.") + .sql_function() + .with_parameter({"value", "The text to anonymize"}) + .with_tags({"string"}) + .with_example({ + "To anonymize an IP address", + "SELECT anonymize('Hello, 192.168.1.2')", + })), + + sqlite_func_adapter<decltype(&extract), extract>::builder( + help_text("extract", + "Automatically Parse and extract data from a string") + .sql_function() + .with_parameter({"str", "The string to parse"}) + .with_tags({"string"}) + .with_example({ + "To extract key/value pairs from a string", + "SELECT extract('foo=1 bar=2 name=\"Rolo Tomassi\"')", + }) + .with_example({ + "To extract columnar data from a string", + "SELECT extract('1.0 abc 2.0')", + })), + + sqlite_func_adapter<decltype(&logfmt2json), logfmt2json>::builder( + help_text("logfmt2json", + "Convert a logfmt-encoded string into JSON") + .sql_function() + .with_parameter({"str", "The logfmt message to parse"}) + .with_tags({"string"}) + .with_example({ + "To extract key/value pairs from a log message", + "SELECT logfmt2json('foo=1 bar=2 name=\"Rolo Tomassi\"')", + })), + + sqlite_func_adapter< + decltype(static_cast<bool (*)(const char*, const char*)>( + &startswith)), + startswith>:: + builder(help_text("startswith", + "Test if a string begins with the given prefix") + .sql_function() + .with_parameter({"str", "The string to test"}) + .with_parameter( + {"prefix", "The prefix to check in the string"}) + .with_tags({"string"}) + .with_example({ + "To test if the string 'foobar' starts with 'foo'", + "SELECT startswith('foobar', 'foo')", + }) + .with_example({ + "To test if the string 'foobar' starts with 'bar'", + "SELECT startswith('foobar', 'bar')", + })), + + sqlite_func_adapter<decltype(static_cast<bool (*)( + const char*, const char*)>(&endswith)), + endswith>:: + builder( + help_text("endswith", + "Test if a string ends with the given suffix") + .sql_function() + .with_parameter({"str", "The string to test"}) + .with_parameter( + {"suffix", "The suffix to check in the string"}) + .with_tags({"string"}) + .with_example({ + "To test if the string 'notbad.jpg' ends with '.jpg'", + "SELECT endswith('notbad.jpg', '.jpg')", + }) + .with_example({ + "To test if the string 'notbad.png' starts with '.jpg'", + "SELECT endswith('notbad.png', '.jpg')", + })), + + sqlite_func_adapter<decltype(&spooky_hash), spooky_hash>::builder( + help_text("spooky_hash", + "Compute the hash value for the given arguments.") + .sql_function() + .with_parameter( + help_text("str", "The string to hash").one_or_more()) + .with_tags({"string"}) + .with_example({ + "To produce a hash for the string 'Hello, World!'", + "SELECT spooky_hash('Hello, World!')", + }) + .with_example({ + "To produce a hash for the parameters where one is NULL", + "SELECT spooky_hash('Hello, World!', NULL)", + }) + .with_example({ + "To produce a hash for the parameters where one " + "is an empty string", + "SELECT spooky_hash('Hello, World!', '')", + }) + .with_example({ + "To produce a hash for the parameters where one " + "is a number", + "SELECT spooky_hash('Hello, World!', 123)", + })), + + sqlite_func_adapter<decltype(&sql_gunzip), sql_gunzip>::builder( + help_text("gunzip", "Decompress a gzip file") + .sql_function() + .with_parameter( + help_text("b", "The blob to decompress").one_or_more()) + .with_tags({"string"})), + + sqlite_func_adapter<decltype(&sql_gzip), sql_gzip>::builder( + help_text("gzip", "Compress a string into a gzip file") + .sql_function() + .with_parameter( + help_text("value", "The value to compress").one_or_more()) + .with_tags({"string"})), + + sqlite_func_adapter<decltype(&sql_encode), sql_encode>::builder( + help_text("encode", "Encode the value using the given algorithm") + .sql_function() + .with_parameter(help_text("value", "The value to encode")) + .with_parameter(help_text("algorithm", + "One of the following encoding " + "algorithms: base64, hex, uri")) + .with_tags({"string"}) + .with_example({ + "To base64-encode 'Hello, World!'", + "SELECT encode('Hello, World!', 'base64')", + }) + .with_example({ + "To hex-encode 'Hello, World!'", + "SELECT encode('Hello, World!', 'hex')", + }) + .with_example({ + "To URI-encode 'Hello, World!'", + "SELECT encode('Hello, World!', 'uri')", + })), + + sqlite_func_adapter<decltype(&sql_decode), sql_decode>::builder( + help_text("decode", "Decode the value using the given algorithm") + .sql_function() + .with_parameter(help_text("value", "The value to decode")) + .with_parameter(help_text("algorithm", + "One of the following encoding " + "algorithms: base64, hex, uri")) + .with_tags({"string"}) + .with_example({ + "To decode the URI-encoded string '%63%75%72%6c'", + "SELECT decode('%63%75%72%6c', 'uri')", + })), + + sqlite_func_adapter<decltype(&sql_parse_url), sql_parse_url>::builder( + help_text("parse_url", + "Parse a URL and return the components in a JSON object. " + "Limitations: not all URL schemes are supported and " + "repeated query parameters are not captured.") + .sql_function() + .with_parameter(help_text("url", "The URL to parse")) + .with_result({ + "scheme", + "The URL's scheme", + }) + .with_result({ + "username", + "The name of the user specified in the URL", + }) + .with_result({ + "password", + "The password specified in the URL", + }) + .with_result({ + "host", + "The host name / IP specified in the URL", + }) + .with_result({ + "port", + "The port specified in the URL", + }) + .with_result({ + "path", + "The path specified in the URL", + }) + .with_result({ + "query", + "The query string in the URL", + }) + .with_result({ + "parameters", + "An object containing the query parameters", + }) + .with_result({ + "fragment", + "The fragment specified in the URL", + }) + .with_tags({"string", "url"}) + .with_example({ + "To parse the URL " + "'https://example.com/search?q=hello%20world'", + "SELECT " + "parse_url('https://example.com/search?q=hello%20world')", + }) + .with_example({ + "To parse the URL " + "'https://alice@[fe80::14ff:4ee5:1215:2fb2]'", + "SELECT " + "parse_url('https://alice@[fe80::14ff:4ee5:1215:2fb2]')", + })), + + sqlite_func_adapter<decltype(&sql_unparse_url), sql_unparse_url>:: + builder( + help_text("unparse_url", + "Convert a JSON object containing the parts of a " + "URL into a URL string") + .sql_function() + .with_parameter(help_text( + "obj", "The JSON object containing the URL parts")) + .with_tags({"string", "url"}) + .with_example({ + "To unparse the object " + "'{\"scheme\": \"https\", \"host\": \"example.com\"}'", + "SELECT " + "unparse_url('{\"scheme\": \"https\", \"host\": " + "\"example.com\"}')", + })), + + {nullptr}, + }; + + static struct FuncDefAgg str_agg_funcs[] = { + { + "group_spooky_hash", + -1, + 0, + sql_spooky_hash_step, + sql_spooky_hash_final, + help_text("group_spooky_hash", + "Compute the hash value for the given arguments") + .sql_agg_function() + .with_parameter( + help_text("str", "The string to hash").one_or_more()) + .with_tags({"string"}) + .with_example({ + "To produce a hash of all of the values of 'column1'", + "SELECT group_spooky_hash(column1) FROM (VALUES ('abc'), " + "('123'))", + }), + }, + + { + "sparkline", + -1, + 0, + sparkline_step, + sparkline_final, + }, + + {nullptr}, + }; + + *basic_funcs = string_funcs; + *agg_funcs = str_agg_funcs; + + return SQLITE_OK; +} |