diff options
Diffstat (limited to '')
-rw-r--r-- | src/sql_util.cc | 396 |
1 files changed, 372 insertions, 24 deletions
diff --git a/src/sql_util.cc b/src/sql_util.cc index 97a5344..ce17d9b 100644 --- a/src/sql_util.cc +++ b/src/sql_util.cc @@ -29,30 +29,28 @@ * @file sql_util.cc */ -#include <algorithm> #include <regex> #include <vector> #include "sql_util.hh" #include <ctype.h> -#include <stdio.h> +#include <stdarg.h> #include <string.h> #include "base/auto_mem.hh" #include "base/injector.hh" #include "base/lnav_log.hh" #include "base/string_util.hh" -#include "base/time_util.hh" #include "bound_tags.hh" #include "config.h" #include "lnav_util.hh" #include "pcrepp/pcre2pp.hh" #include "readline_context.hh" #include "readline_highlighters.hh" -#include "shlex.resolver.hh" +#include "sql_execute.hh" #include "sql_help.hh" -#include "sqlite-extension-func.hh" +#include "sqlitepp.hh" using namespace lnav::roles::literals; @@ -287,7 +285,7 @@ const std::unordered_map<unsigned char, const char*> sql_constraint_names = { #endif }; -std::multimap<std::string, help_text*> sqlite_function_help; +std::multimap<std::string, const help_text*> sqlite_function_help; static int handle_db_list(void* ptr, int ncols, char** colvalues, char** colnames) @@ -463,14 +461,15 @@ schema_foreign_key_list(void* ptr, int ncols, char** colvalues, char** colnames) void dump_sqlite_schema(sqlite3* db, std::string& schema_out) { - struct sqlite_metadata_callbacks schema_sql_meta_callbacks - = {schema_collation_list, - schema_db_list, - schema_table_list, - schema_table_info, - schema_foreign_key_list, - &schema_out, - {}}; + struct sqlite_metadata_callbacks schema_sql_meta_callbacks = { + schema_collation_list, + schema_db_list, + schema_table_list, + schema_table_info, + schema_foreign_key_list, + &schema_out, + {}, + }; walk_sqlite_metadata(db, schema_sql_meta_callbacks); } @@ -575,12 +574,12 @@ sql_ident_needs_quote(const char* ident) return false; } -char* +auto_mem<char, sqlite3_free> sql_quote_ident(const char* ident) { bool needs_quote = false; size_t quote_count = 0, alloc_size; - char* retval; + auto_mem<char, sqlite3_free> retval; for (int lpc = 0; ident[lpc]; lpc++) { if ((lpc == 0 && isdigit(ident[lpc])) @@ -594,8 +593,8 @@ sql_quote_ident(const char* ident) } alloc_size = strlen(ident) + quote_count * 2 + (needs_quote ? 2 : 0) + 1; - if ((retval = (char*) sqlite3_malloc(alloc_size)) == NULL) { - retval = NULL; + if ((retval = (char*) sqlite3_malloc(alloc_size)) == nullptr) { + retval = nullptr; } else { char* curr = retval; @@ -675,7 +674,7 @@ annotate_sql_with_error(sqlite3* db, const char* sql, const char* tail) if (erroff != -1) { auto line_with_error - = string_fragment(retval.get_string()) + = string_fragment::from_str(retval.get_string()) .find_boundaries_around(erroff, string_fragment::tag1{'\n'}); auto erroff_in_line = erroff - line_with_error.sf_begin; @@ -1024,8 +1023,12 @@ annotate_sql_statement(attr_line_t& al) &SQL_STRING_ATTR, }, { + lnav::pcre2pp::code::from_const(R"(\A0x[0-9a-fA-F]+)"), + &SQL_NUMBER_ATTR, + }, + { lnav::pcre2pp::code::from_const( - R"(\A-?\d+(?:\.\d*(?:[eE][\-\+]?\d+)?)?|0x[0-9a-fA-F]+$)"), + R"(\A-?\d+(?:\.\d+)?(?:[eE][\-\+]?\d+)?)"), &SQL_NUMBER_ATTR, }, { @@ -1039,7 +1042,8 @@ annotate_sql_statement(attr_line_t& al) &SQL_COMMENT_ATTR, }, { - lnav::pcre2pp::code::from_const(R"(\A(\*|<|>|=|!|\-|\+|\|\|))"), + lnav::pcre2pp::code::from_const( + R"(\A(\*|\->{1,2}|<|>|=|!|\-|\+|\|\|))"), &SQL_OPERATOR_ATTR, }, { @@ -1055,6 +1059,11 @@ annotate_sql_statement(attr_line_t& al) auto& line = al.get_string(); auto& sa = al.get_attrs(); + if (lnav::sql::is_prql(line)) { + lnav::sql::annotate_prql_statement(al); + return; + } + auto cmd_find_res = cmd_pattern.find_in(line, PCRE2_ANCHORED).ignore_error(); if (cmd_find_res) { @@ -1136,6 +1145,9 @@ annotate_sql_statement(attr_line_t& al) std::vector<const help_text*> find_sql_help_for_line(const attr_line_t& al, size_t x) { + static const auto* sql_cmd_map + = injector::get<readline_context::command_map_t*, sql_cmd_map_tag>(); + std::vector<const help_text*> retval; const auto& sa = al.get_attrs(); std::string name; @@ -1144,10 +1156,7 @@ find_sql_help_for_line(const attr_line_t& al, size_t x) { auto sa_opt = get_string_attr(al.get_attrs(), &SQL_COMMAND_ATTR); - if (sa_opt) { - auto* sql_cmd_map = injector::get<readline_context::command_map_t*, - sql_cmd_map_tag>(); auto cmd_name = al.get_substring((*sa_opt)->sa_range); auto cmd_iter = sql_cmd_map->find(cmd_name); @@ -1155,6 +1164,36 @@ find_sql_help_for_line(const attr_line_t& al, size_t x) return {&cmd_iter->second->c_help}; } } + + auto prql_trans_iter = find_string_attr_containing( + al.get_attrs(), &lnav::sql::PRQL_TRANSFORM_ATTR, x); + if (prql_trans_iter != al.get_attrs().end()) { + auto cmd_name = al.get_substring(prql_trans_iter->sa_range); + auto cmd_iter = sql_cmd_map->find(cmd_name); + + if (cmd_iter != sql_cmd_map->end()) { + return {&cmd_iter->second->c_help}; + } + } + } + + auto prql_fqid_iter = find_string_attr_containing( + al.get_attrs(), &lnav::sql ::PRQL_FQID_ATTR, x); + if (prql_fqid_iter != al.get_attrs().end()) { + auto fqid = al.get_substring(prql_fqid_iter->sa_range); + auto cmd_iter = sql_cmd_map->find(fqid); + if (cmd_iter != sql_cmd_map->end()) { + return {&cmd_iter->second->c_help}; + } + + auto func_pair = lnav::sql::prql_functions.equal_range(fqid); + + for (auto func_iter = func_pair.first; func_iter != func_pair.second; + ++func_iter) + { + retval.emplace_back(func_iter->second); + return retval; + } } std::vector<std::string> kw; @@ -1218,3 +1257,312 @@ find_sql_help_for_line(const attr_line_t& al, size_t x) return retval; } + +namespace lnav { +namespace sql { + +auto_mem<char, sqlite3_free> +mprintf(const char* fmt, ...) +{ + auto_mem<char, sqlite3_free> retval; + va_list args; + + va_start(args, fmt); + retval = sqlite3_vmprintf(fmt, args); + va_end(args); + + return retval; +} + +bool +is_prql(const string_fragment& sf) +{ + auto trimmed = sf.trim().skip(string_fragment::tag1{';'}); + + return (trimmed.startswith("let ") || trimmed.startswith("from")); +} + +const char* prql_transforms[] = { + "aggregate", + "append", + "derive", + "filter", + "from", + "group", + "join", + "loop", + "select", + "sort", + "take", + "window", + + nullptr, +}; + +const char* prql_keywords[] = { + "average", "avg", "case", "count", "count_distinct", "false", "func", + "into", "let", "max", "min", "module", "null", "prql", + "stddev", "sum", "true", "type", + + nullptr, +}; + +std::string +prql_keyword_re() +{ + std::string retval = "(?:"; + bool first = true; + + for (const char* kw : prql_keywords) { + if (kw == nullptr) { + break; + } + if (!first) { + retval.append("|"); + } else { + first = false; + } + retval.append("\\b"); + retval.append(kw); + retval.append("\\b"); + } + retval += ")"; + + return retval; +} + +std::string +prql_transform_re() +{ + std::string retval = "(?:"; + bool first = true; + + for (const char* kw : prql_transforms) { + if (kw == nullptr) { + break; + } + if (!first) { + retval.append("|"); + } else { + first = false; + } + retval.append("\\b"); + retval.append(kw); + retval.append("\\b"); + } + retval += ")"; + + return retval; +} + +string_attr_type<void> PRQL_STAGE_ATTR("prql_stage"); +string_attr_type<void> PRQL_TRANSFORM_ATTR("prql_transform"); +string_attr_type<void> PRQL_KEYWORD_ATTR("prql_keyword"); +string_attr_type<void> PRQL_IDENTIFIER_ATTR("prql_ident"); +string_attr_type<void> PRQL_FQID_ATTR("prql_fqid"); +string_attr_type<void> PRQL_DOT_ATTR("prql_dot"); +string_attr_type<void> PRQL_PIPE_ATTR("prql_pipe"); +string_attr_type<void> PRQL_STRING_ATTR("prql_string"); +string_attr_type<void> PRQL_NUMBER_ATTR("prql_number"); +string_attr_type<void> PRQL_OPERATOR_ATTR("prql_oper"); +string_attr_type<void> PRQL_PAREN_ATTR("prql_paren"); +string_attr_type<void> PRQL_UNTERMINATED_PAREN_ATTR("prql_unterminated_paren"); +string_attr_type<void> PRQL_GARBAGE_ATTR("prql_garbage"); +string_attr_type<void> PRQL_COMMENT_ATTR("prql_comment"); + +void +annotate_prql_statement(attr_line_t& al) +{ + static const std::string keyword_re_str = R"(\A)" + prql_keyword_re(); + static const std::string transform_re_str = R"(\A)" + prql_transform_re(); + + static const struct { + lnav::pcre2pp::code re; + string_attr_type<void>* type; + } PATTERNS[] = { + { + lnav::pcre2pp::code::from_const(R"(\A(?:\[|\]|\{|\}|\(|\)))"), + &PRQL_PAREN_ATTR, + }, + { + lnav::pcre2pp::code::from(transform_re_str).unwrap(), + &PRQL_TRANSFORM_ATTR, + }, + { + lnav::pcre2pp::code::from(keyword_re_str).unwrap(), + &PRQL_KEYWORD_ATTR, + }, + { + lnav::pcre2pp::code::from_const(R"(\A(?:f|r|s)?'([^']|\\.)*')"), + &PRQL_STRING_ATTR, + }, + { + lnav::pcre2pp::code::from_const(R"(\A(?:f|r|s)?"([^\"]|\\.)*")"), + &PRQL_STRING_ATTR, + }, + { + lnav::pcre2pp::code::from_const(R"(\A0x[0-9a-fA-F]+)"), + &PRQL_NUMBER_ATTR, + }, + { + lnav::pcre2pp::code::from_const( + R"(\A-?\d+(?:\.\d+)?(?:[eE][\-\+]?\d+)?)"), + &PRQL_NUMBER_ATTR, + }, + { + lnav::pcre2pp::code::from_const( + R"(\A(?:(?:(?:\$)?\b[a-z_]\w*)|`([^`]+)`))", PCRE2_CASELESS), + &PRQL_IDENTIFIER_ATTR, + }, + { + lnav::pcre2pp::code::from_const(R"(\A#.*)"), + &PRQL_COMMENT_ATTR, + }, + { + lnav::pcre2pp::code::from_const( + R"(\A(\*|\->{1,2}|<|>|=>|={1,2}|\|\||&&|!|\-|\+|~=|\.\.|,|\?\?))"), + &PRQL_OPERATOR_ATTR, + }, + { + lnav::pcre2pp::code::from_const(R"(\A\|)"), + &PRQL_PIPE_ATTR, + }, + { + lnav::pcre2pp::code::from_const(R"(\A\.)"), + &PRQL_DOT_ATTR, + }, + { + lnav::pcre2pp::code::from_const(R"(\A.)"), + &PRQL_GARBAGE_ATTR, + }, + }; + + static const auto ws_pattern = lnav::pcre2pp::code::from_const(R"(\A\s+)"); + + const auto& line = al.get_string(); + auto& sa = al.get_attrs(); + auto remaining = string_fragment::from_str(line); + while (!remaining.empty()) { + auto ws_find_res = ws_pattern.find_in(remaining).ignore_error(); + if (ws_find_res) { + remaining = ws_find_res->f_remaining; + continue; + } + for (const auto& pat : PATTERNS) { + auto pat_find_res = pat.re.find_in(remaining).ignore_error(); + if (pat_find_res) { + sa.emplace_back(to_line_range(pat_find_res->f_all), + pat.type->value()); + remaining = pat_find_res->f_remaining; + break; + } + } + } + + auto stages = std::vector<int>{}; + std::vector<std::pair<char, int>> groups; + std::vector<line_range> fqids; + nonstd::optional<line_range> id_start; + bool saw_id_dot = false; + for (const auto& attr : sa) { + if (groups.empty() && attr.sa_type == &PRQL_PIPE_ATTR) { + stages.push_back(attr.sa_range.lr_start); + } + if (!id_start) { + if (attr.sa_type == &PRQL_IDENTIFIER_ATTR) { + id_start = attr.sa_range; + saw_id_dot = false; + } + } else if (!saw_id_dot) { + if (attr.sa_type == &PRQL_DOT_ATTR) { + saw_id_dot = true; + } else { + fqids.emplace_back(id_start.value()); + id_start = nonstd::nullopt; + saw_id_dot = false; + } + } else { + if (attr.sa_type == &PRQL_IDENTIFIER_ATTR) { + id_start = line_range{ + id_start.value().lr_start, + attr.sa_range.lr_end, + }; + } else { + id_start = nonstd::nullopt; + } + saw_id_dot = false; + } + if (attr.sa_type != &PRQL_PAREN_ATTR) { + continue; + } + + auto ch = line[attr.sa_range.lr_start]; + switch (ch) { + case '(': + case '{': + case '[': + groups.emplace_back(ch, attr.sa_range.lr_start); + break; + case ')': + if (!groups.empty() && groups.back().first == '(') { + groups.pop_back(); + } + break; + case '}': + if (!groups.empty() && groups.back().first == '{') { + groups.pop_back(); + } + break; + case ']': + if (!groups.empty() && groups.back().first == '[') { + groups.pop_back(); + } + break; + } + } + if (id_start) { + fqids.emplace_back(id_start.value()); + } + int prev_stage_index = 0; + for (auto stage_index : stages) { + sa.emplace_back(line_range{prev_stage_index, stage_index}, + PRQL_STAGE_ATTR.value()); + prev_stage_index = stage_index; + } + sa.emplace_back( + line_range{prev_stage_index, (int) al.get_string().length()}, + PRQL_STAGE_ATTR.value()); + for (const auto& group : groups) { + sa.emplace_back(line_range{group.second, group.second + 1}, + PRQL_UNTERMINATED_PAREN_ATTR.value()); + } + for (const auto& fqid_range : fqids) { + sa.emplace_back(fqid_range, PRQL_FQID_ATTR.value()); + } + + stable_sort(sa.begin(), sa.end()); +} + +} // namespace sql + +namespace prql { + +std::string +quote_ident(std::string id) +{ + static const auto PLAIN_NAME + = pcre2pp::code::from_const("^[a-zA-Z_][a-zA-Z_0-9]*$"); + + if (PLAIN_NAME.find_in(id).ignore_error()) { + return id; + } + + auto buf = auto_buffer::alloc(id.length() + 8); + quote_content(buf, id, '`'); + + return fmt::format(FMT_STRING("`{}`"), buf.in()); +} + +} // namespace prql + +} // namespace lnav |