diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-04 17:44:55 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-04 17:44:55 +0000 |
commit | 5068d34c08f951a7ea6257d305a1627b09a95817 (patch) | |
tree | 08213e2be853396a3b07ce15dbe222644dcd9a89 /src/log_format.cc | |
parent | Initial commit. (diff) | |
download | lnav-5068d34c08f951a7ea6257d305a1627b09a95817.tar.xz lnav-5068d34c08f951a7ea6257d305a1627b09a95817.zip |
Adding upstream version 0.11.1.upstream/0.11.1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/log_format.cc')
-rw-r--r-- | src/log_format.cc | 3076 |
1 files changed, 3076 insertions, 0 deletions
diff --git a/src/log_format.cc b/src/log_format.cc new file mode 100644 index 0000000..3068b30 --- /dev/null +++ b/src/log_format.cc @@ -0,0 +1,3076 @@ +/** + * Copyright (c) 2007-2015, Timothy Stack + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * * Neither the name of Timothy Stack nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <memory> + +#include <fnmatch.h> +#include <stdio.h> +#include <string.h> + +#include "base/snippet_highlighters.hh" +#include "base/string_util.hh" +#include "command_executor.hh" +#include "config.h" +#include "fmt/format.h" +#include "lnav_util.hh" +#include "log_format_ext.hh" +#include "log_search_table.hh" +#include "log_vtab_impl.hh" +#include "ptimec.hh" +#include "scn/scn.h" +#include "sql_util.hh" +#include "yajlpp/yajlpp.hh" +#include "yajlpp/yajlpp_def.hh" + +using namespace lnav::roles::literals; + +static auto intern_lifetime = intern_string::get_table_lifetime(); + +string_attr_type<void> logline::L_PREFIX("prefix"); +string_attr_type<void> logline::L_TIMESTAMP("timestamp"); +string_attr_type<std::shared_ptr<logfile>> logline::L_FILE("file"); +string_attr_type<bookmark_metadata*> logline::L_PARTITION("partition"); +string_attr_type<void> logline::L_MODULE("module"); +string_attr_type<void> logline::L_OPID("opid"); +string_attr_type<bookmark_metadata*> logline::L_META("meta"); + +external_log_format::mod_map_t external_log_format::MODULE_FORMATS; +std::vector<std::shared_ptr<external_log_format>> + external_log_format::GRAPH_ORDERED_FORMATS; + +struct line_range +logline_value::origin_in_full_msg(const char* msg, ssize_t len) const +{ + if (this->lv_sub_offset == 0) { + return this->lv_origin; + } + + if (len == -1) { + len = strlen(msg); + } + + struct line_range retval = this->lv_origin; + const char *last = msg, *msg_end = msg + len; + + for (int lpc = 0; lpc < this->lv_sub_offset; lpc++) { + const auto* next = (const char*) memchr(last, '\n', msg_end - last); + require(next != nullptr); + + next += 1; + int amount = (next - last); + + retval.lr_start += amount; + if (retval.lr_end != -1) { + retval.lr_end += amount; + } + + last = next + 1; + } + + if (retval.lr_end == -1) { + const auto* eol = (const char*) memchr(last, '\n', msg_end - last); + + if (eol == nullptr) { + retval.lr_end = len; + } else { + retval.lr_end = eol - msg; + } + } + + return retval; +} + +logline_value::logline_value(logline_value_meta lvm, + shared_buffer_ref& sbr, + struct line_range origin) + : lv_meta(std::move(lvm)), lv_origin(origin) +{ + if (sbr.get_data() == nullptr) { + this->lv_meta.lvm_kind = value_kind_t::VALUE_NULL; + } + + switch (this->lv_meta.lvm_kind) { + case value_kind_t::VALUE_JSON: + case value_kind_t::VALUE_XML: + case value_kind_t::VALUE_STRUCT: + case value_kind_t::VALUE_TEXT: + case value_kind_t::VALUE_QUOTED: + case value_kind_t::VALUE_W3C_QUOTED: + case value_kind_t::VALUE_TIMESTAMP: + require(origin.lr_end != -1); + this->lv_frag = string_fragment::from_byte_range( + sbr.get_data(), origin.lr_start, origin.lr_end); + break; + + case value_kind_t::VALUE_NULL: + break; + + case value_kind_t::VALUE_INTEGER: { + auto scan_res + = scn::scan_value<int64_t>(sbr.to_string_view(origin)); + if (scan_res) { + this->lv_value.i = scan_res.value(); + } else { + this->lv_value.i = 0; + } + break; + } + + case value_kind_t::VALUE_FLOAT: { + auto scan_res = scn::scan_value<double>(sbr.to_string_view(origin)); + if (scan_res) { + this->lv_value.d = scan_res.value(); + } else { + this->lv_value.d = 0; + } + break; + } + + case value_kind_t::VALUE_BOOLEAN: + if (strncmp( + sbr.get_data_at(origin.lr_start), "true", origin.length()) + == 0 + || strncmp( + sbr.get_data_at(origin.lr_start), "yes", origin.length()) + == 0) + { + this->lv_value.i = 1; + } else { + this->lv_value.i = 0; + } + break; + + case value_kind_t::VALUE_UNKNOWN: + case value_kind_t::VALUE__MAX: + ensure(0); + break; + } +} + +std::string +logline_value::to_string() const +{ + char buffer[128]; + + switch (this->lv_meta.lvm_kind) { + case value_kind_t::VALUE_NULL: + return "null"; + + case value_kind_t::VALUE_JSON: + case value_kind_t::VALUE_XML: + case value_kind_t::VALUE_STRUCT: + case value_kind_t::VALUE_TEXT: + case value_kind_t::VALUE_TIMESTAMP: + if (this->lv_str) { + return this->lv_str.value(); + } + if (this->lv_frag.empty()) { + return this->lv_intern_string.to_string(); + } + return this->lv_frag.to_string(); + + case value_kind_t::VALUE_QUOTED: + case value_kind_t::VALUE_W3C_QUOTED: + if (this->lv_frag.empty()) { + return ""; + } else { + switch (this->lv_frag.data()[0]) { + case '\'': + case '"': { + auto unquote_func = this->lv_meta.lvm_kind + == value_kind_t::VALUE_W3C_QUOTED + ? unquote_w3c + : unquote; + char unquoted_str[this->lv_frag.length()]; + size_t unquoted_len; + + unquoted_len = unquote_func(unquoted_str, + this->lv_frag.data(), + this->lv_frag.length()); + return {unquoted_str, unquoted_len}; + } + default: + return this->lv_frag.to_string(); + } + } + break; + + case value_kind_t::VALUE_INTEGER: + snprintf(buffer, sizeof(buffer), "%" PRId64, this->lv_value.i); + break; + + case value_kind_t::VALUE_FLOAT: + snprintf(buffer, sizeof(buffer), "%lf", this->lv_value.d); + break; + + case value_kind_t::VALUE_BOOLEAN: + if (this->lv_value.i) { + return "true"; + } else { + return "false"; + } + break; + case value_kind_t::VALUE_UNKNOWN: + case value_kind_t::VALUE__MAX: + ensure(0); + break; + } + + return {buffer}; +} + +std::vector<std::shared_ptr<log_format>> log_format::lf_root_formats; + +std::vector<std::shared_ptr<log_format>>& +log_format::get_root_formats() +{ + return lf_root_formats; +} + +static bool +next_format( + const std::vector<std::shared_ptr<external_log_format::pattern>>& patterns, + int& index, + int& locked_index) +{ + bool retval = true; + + if (locked_index == -1) { + index += 1; + if (index >= (int) patterns.size()) { + retval = false; + } + } else if (index == locked_index) { + retval = false; + } else { + index = locked_index; + } + + return retval; +} + +bool +log_format::next_format(pcre_format* fmt, int& index, int& locked_index) +{ + bool retval = true; + + if (locked_index == -1) { + index += 1; + if (fmt[index].name == nullptr) { + retval = false; + } + } else if (index == locked_index) { + retval = false; + } else { + index = locked_index; + } + + return retval; +} + +const char* +log_format::log_scanf(uint32_t line_number, + string_fragment line, + pcre_format* fmt, + const char* time_fmt[], + struct exttm* tm_out, + struct timeval* tv_out, + + string_fragment* ts_out, + nonstd::optional<string_fragment>* level_out) +{ + int curr_fmt = -1; + const char* retval = nullptr; + bool done = false; + int pat_index = this->last_pattern_index(); + + while (!done && next_format(fmt, curr_fmt, pat_index)) { + static thread_local auto md = lnav::pcre2pp::match_data::unitialized(); + + auto match_res = fmt[curr_fmt] + .pcre->capture_from(line) + .into(md) + .matches(PCRE2_NO_UTF_CHECK) + .ignore_error(); + if (!match_res) { + retval = nullptr; + } else { + auto ts = md[fmt[curr_fmt].pf_timestamp_index]; + + retval = this->lf_date_time.scan( + ts->data(), ts->length(), nullptr, tm_out, *tv_out); + + if (retval) { + *ts_out = ts.value(); + *level_out = md[2]; + if (curr_fmt != pat_index) { + uint32_t lock_line; + + if (this->lf_pattern_locks.empty()) { + lock_line = 0; + } else { + lock_line = line_number; + } + + this->lf_pattern_locks.emplace_back(lock_line, curr_fmt); + } + this->lf_timestamp_flags = tm_out->et_flags; + done = true; + } + } + } + + return retval; +} + +void +log_format::check_for_new_year(std::vector<logline>& dst, + exttm etm, + struct timeval log_tv) +{ + if (dst.empty()) { + return; + } + + time_t diff = dst.back().get_time() - log_tv.tv_sec; + int off_year = 0, off_month = 0, off_day = 0, off_hour = 0; + bool do_change = true; + + if (diff <= 0) { + return; + } + if ((etm.et_flags & ETF_MONTH_SET) && diff >= (24 * 60 * 60)) { + off_year = 1; + } else if (diff >= (24 * 60 * 60)) { + off_month = 1; + } else if (!(etm.et_flags & ETF_DAY_SET) && (diff >= (60 * 60))) { + off_day = 1; + } else if (!(etm.et_flags & ETF_DAY_SET)) { + off_hour = 1; + } else { + do_change = false; + } + + if (!do_change) { + return; + } + log_debug("%d:detected time rollover; offsets=%d %d %d %d", + dst.size(), + off_year, + off_month, + off_day, + off_hour); + for (auto& ll : dst) { + time_t ot = ll.get_time(); + struct tm otm; + + gmtime_r(&ot, &otm); + if (otm.tm_year < off_year) { + otm.tm_year = 0; + } else { + otm.tm_year -= off_year; + } + otm.tm_mon -= off_month; + if (otm.tm_mon < 0) { + otm.tm_mon += 12; + } + auto new_time = tm2sec(&otm); + if (new_time == -1) { + continue; + } + new_time -= (off_day * 24 * 60 * 60) + (off_hour * 60 * 60); + ll.set_time(new_time); + } +} + +/* + * XXX This needs some cleanup. + */ +struct json_log_userdata { + json_log_userdata(shared_buffer_ref& sbr, scan_batch_context* sbc) + : jlu_shared_buffer(sbr), jlu_batch_context(sbc) + { + } + + external_log_format* jlu_format{nullptr}; + const logline* jlu_line{nullptr}; + logline* jlu_base_line{nullptr}; + int jlu_sub_line_count{1}; + yajl_handle jlu_handle{nullptr}; + const char* jlu_line_value{nullptr}; + size_t jlu_line_size{0}; + size_t jlu_sub_start{0}; + shared_buffer_ref& jlu_shared_buffer; + scan_batch_context* jlu_batch_context; +}; + +static int read_json_field(yajlpp_parse_context* ypc, + const unsigned char* str, + size_t len); + +static int +read_json_null(yajlpp_parse_context* ypc) +{ + json_log_userdata* jlu = (json_log_userdata*) ypc->ypc_userdata; + const intern_string_t field_name = ypc->get_path(); + + jlu->jlu_sub_line_count + += jlu->jlu_format->value_line_count(field_name, ypc->is_level(1)); + + return 1; +} + +static int +read_json_bool(yajlpp_parse_context* ypc, int val) +{ + json_log_userdata* jlu = (json_log_userdata*) ypc->ypc_userdata; + const intern_string_t field_name = ypc->get_path(); + + jlu->jlu_sub_line_count + += jlu->jlu_format->value_line_count(field_name, ypc->is_level(1)); + + return 1; +} + +static int +read_json_int(yajlpp_parse_context* ypc, long long val) +{ + json_log_userdata* jlu = (json_log_userdata*) ypc->ypc_userdata; + const intern_string_t field_name = ypc->get_path(); + + if (jlu->jlu_format->lf_timestamp_field == field_name) { + long long divisor = jlu->jlu_format->elf_timestamp_divisor; + struct timeval tv; + + tv.tv_sec = val / divisor; + tv.tv_usec = (val % divisor) * (1000000.0 / divisor); + if (jlu->jlu_format->lf_date_time.dts_local_time) { + struct tm ltm; + localtime_r(&tv.tv_sec, <m); +#ifdef HAVE_STRUCT_TM_TM_ZONE + ltm.tm_zone = nullptr; +#endif + ltm.tm_isdst = 0; + tv.tv_sec = tm2sec(<m); + } + jlu->jlu_base_line->set_time(tv); + } else if (jlu->jlu_format->lf_subsecond_field == field_name) { + uint64_t millis = 0; + switch (jlu->jlu_format->lf_subsecond_unit.value()) { + case log_format::subsecond_unit::milli: + millis = val; + break; + case log_format::subsecond_unit::micro: + millis = std::chrono::duration_cast<std::chrono::milliseconds>( + std::chrono::microseconds(val)) + .count(); + break; + case log_format::subsecond_unit::nano: + millis = std::chrono::duration_cast<std::chrono::milliseconds>( + std::chrono::nanoseconds(val)) + .count(); + break; + } + jlu->jlu_base_line->set_millis(millis); + } else if (jlu->jlu_format->elf_level_field == field_name) { + if (jlu->jlu_format->elf_level_pairs.empty()) { + char level_buf[128]; + + snprintf(level_buf, sizeof(level_buf), "%lld", val); + + jlu->jlu_base_line->set_level(jlu->jlu_format->convert_level( + string_fragment::from_c_str(level_buf), + jlu->jlu_batch_context)); + } else { + std::vector<std::pair<int64_t, log_level_t>>::iterator iter; + + for (iter = jlu->jlu_format->elf_level_pairs.begin(); + iter != jlu->jlu_format->elf_level_pairs.end(); + ++iter) + { + if (iter->first == val) { + jlu->jlu_base_line->set_level(iter->second); + break; + } + } + } + } + + jlu->jlu_sub_line_count + += jlu->jlu_format->value_line_count(field_name, ypc->is_level(1)); + + return 1; +} + +static int +read_json_double(yajlpp_parse_context* ypc, double val) +{ + json_log_userdata* jlu = (json_log_userdata*) ypc->ypc_userdata; + const intern_string_t field_name = ypc->get_path(); + + if (jlu->jlu_format->lf_timestamp_field == field_name) { + double divisor = jlu->jlu_format->elf_timestamp_divisor; + struct timeval tv; + + tv.tv_sec = val / divisor; + tv.tv_usec = fmod(val, divisor) * (1000000.0 / divisor); + if (jlu->jlu_format->lf_date_time.dts_local_time) { + struct tm ltm; + localtime_r(&tv.tv_sec, <m); +#ifdef HAVE_STRUCT_TM_TM_ZONE + ltm.tm_zone = nullptr; +#endif + ltm.tm_isdst = 0; + tv.tv_sec = tm2sec(<m); + } + jlu->jlu_base_line->set_time(tv); + } + + jlu->jlu_sub_line_count + += jlu->jlu_format->value_line_count(field_name, ypc->is_level(1)); + + return 1; +} + +static int +json_array_start(void* ctx) +{ + yajlpp_parse_context* ypc = (yajlpp_parse_context*) ctx; + json_log_userdata* jlu = (json_log_userdata*) ypc->ypc_userdata; + + if (ypc->ypc_path_index_stack.size() == 2) { + const intern_string_t field_name = ypc->get_path_fragment_i(0); + + jlu->jlu_sub_line_count + += jlu->jlu_format->value_line_count(field_name, true); + jlu->jlu_sub_start = yajl_get_bytes_consumed(jlu->jlu_handle) - 1; + } + + return 1; +} + +static int +json_array_end(void* ctx) +{ + yajlpp_parse_context* ypc = (yajlpp_parse_context*) ctx; + json_log_userdata* jlu = (json_log_userdata*) ypc->ypc_userdata; + + if (ypc->ypc_path_index_stack.size() == 1) { + const intern_string_t field_name = ypc->get_path_fragment_i(0); + size_t sub_end = yajl_get_bytes_consumed(jlu->jlu_handle); + jlu->jlu_format->jlf_line_values.lvv_values.emplace_back( + jlu->jlu_format->get_value_meta(field_name, + value_kind_t::VALUE_JSON), + string_fragment::from_byte_range(jlu->jlu_shared_buffer.get_data(), + jlu->jlu_sub_start, + sub_end)); + } + + return 1; +} + +static const struct json_path_container json_log_handlers = { + yajlpp::pattern_property_handler("\\w+") + .add_cb(read_json_null) + .add_cb(read_json_bool) + .add_cb(read_json_int) + .add_cb(read_json_double) + .add_cb(read_json_field), +}; + +static int rewrite_json_field(yajlpp_parse_context* ypc, + const unsigned char* str, + size_t len); + +static int +rewrite_json_null(yajlpp_parse_context* ypc) +{ + json_log_userdata* jlu = (json_log_userdata*) ypc->ypc_userdata; + const intern_string_t field_name = ypc->get_path(); + + if (!ypc->is_level(1) && !jlu->jlu_format->has_value_def(field_name)) { + return 1; + } + jlu->jlu_format->jlf_line_values.lvv_values.emplace_back( + jlu->jlu_format->get_value_meta(field_name, value_kind_t::VALUE_NULL)); + + return 1; +} + +static int +rewrite_json_bool(yajlpp_parse_context* ypc, int val) +{ + json_log_userdata* jlu = (json_log_userdata*) ypc->ypc_userdata; + const intern_string_t field_name = ypc->get_path(); + + if (!ypc->is_level(1) && !jlu->jlu_format->has_value_def(field_name)) { + return 1; + } + jlu->jlu_format->jlf_line_values.lvv_values.emplace_back( + jlu->jlu_format->get_value_meta(field_name, + value_kind_t::VALUE_BOOLEAN), + (bool) val); + return 1; +} + +static int +rewrite_json_int(yajlpp_parse_context* ypc, long long val) +{ + json_log_userdata* jlu = (json_log_userdata*) ypc->ypc_userdata; + const intern_string_t field_name = ypc->get_path(); + + if (!ypc->is_level(1) && !jlu->jlu_format->has_value_def(field_name)) { + return 1; + } + jlu->jlu_format->jlf_line_values.lvv_values.emplace_back( + jlu->jlu_format->get_value_meta(field_name, + value_kind_t::VALUE_INTEGER), + (int64_t) val); + return 1; +} + +static int +rewrite_json_double(yajlpp_parse_context* ypc, double val) +{ + json_log_userdata* jlu = (json_log_userdata*) ypc->ypc_userdata; + const intern_string_t field_name = ypc->get_path(); + + if (!ypc->is_level(1) && !jlu->jlu_format->has_value_def(field_name)) { + return 1; + } + jlu->jlu_format->jlf_line_values.lvv_values.emplace_back( + jlu->jlu_format->get_value_meta(field_name, value_kind_t::VALUE_FLOAT), + val); + + return 1; +} + +static const struct json_path_container json_log_rewrite_handlers = { + yajlpp::pattern_property_handler("\\w+") + .add_cb(rewrite_json_null) + .add_cb(rewrite_json_bool) + .add_cb(rewrite_json_int) + .add_cb(rewrite_json_double) + .add_cb(rewrite_json_field), +}; + +bool +external_log_format::scan_for_partial(shared_buffer_ref& sbr, + size_t& len_out) const +{ + if (this->elf_type != elf_type_t::ELF_TYPE_TEXT) { + return false; + } + + const auto& pat = this->elf_pattern_order[this->last_pattern_index()]; + if (!this->lf_multiline) { + len_out = pat->p_pcre.pp_value->match_partial(sbr.to_string_fragment()); + return true; + } + + if (pat->p_timestamp_end == -1 || pat->p_timestamp_end > (int) sbr.length()) + { + len_out = 0; + return false; + } + + len_out = pat->p_pcre.pp_value->match_partial(sbr.to_string_fragment()); + return (int) len_out > pat->p_timestamp_end; +} + +std::vector<lnav::console::snippet> +external_log_format::get_snippets() const +{ + std::vector<lnav::console::snippet> retval; + + for (const auto& src_pair : this->elf_format_sources) { + retval.emplace_back(lnav::console::snippet::from(src_pair.first, "") + .with_line(src_pair.second)); + } + + return retval; +} + +log_format::scan_result_t +external_log_format::scan(logfile& lf, + std::vector<logline>& dst, + const line_info& li, + shared_buffer_ref& sbr, + scan_batch_context& sbc) +{ + if (this->elf_type == elf_type_t::ELF_TYPE_JSON) { + logline ll(li.li_file_range.fr_offset, 0, 0, LEVEL_INFO); + auto line_frag = sbr.to_string_fragment(); + + if (!line_frag.startswith("{")) { + if (!this->lf_specialized) { + return log_format::SCAN_NO_MATCH; + } + + ll.set_time(dst.back().get_timeval()); + ll.set_level(LEVEL_INVALID); + dst.emplace_back(ll); + return log_format::SCAN_MATCH; + } + + auto& ypc = *(this->jlf_parse_context); + yajl_handle handle = this->jlf_yajl_handle.get(); + json_log_userdata jlu(sbr, &sbc); + + if (!this->lf_specialized && dst.size() >= 3) { + return log_format::SCAN_NO_MATCH; + } + + if (li.li_partial) { + log_debug("skipping partial line at offset %d", + li.li_file_range.fr_offset); + if (this->lf_specialized) { + ll.set_level(LEVEL_INVALID); + dst.emplace_back(ll); + } + return log_format::SCAN_INCOMPLETE; + } + + const auto* line_data = (const unsigned char*) sbr.get_data(); + + yajl_reset(handle); + ypc.set_static_handler(json_log_handlers.jpc_children[0]); + ypc.ypc_userdata = &jlu; + ypc.ypc_ignore_unused = true; + ypc.ypc_alt_callbacks.yajl_start_array = json_array_start; + ypc.ypc_alt_callbacks.yajl_start_map = json_array_start; + ypc.ypc_alt_callbacks.yajl_end_array = nullptr; + ypc.ypc_alt_callbacks.yajl_end_map = nullptr; + jlu.jlu_format = this; + jlu.jlu_base_line = ≪ + jlu.jlu_line_value = sbr.get_data(); + jlu.jlu_line_size = sbr.length(); + jlu.jlu_handle = handle; + if (yajl_parse(handle, line_data, sbr.length()) == yajl_status_ok + && yajl_complete_parse(handle) == yajl_status_ok) + { + if (ll.get_time() == 0) { + if (this->lf_specialized) { + ll.set_ignore(true); + dst.emplace_back(ll); + return log_format::SCAN_MATCH; + } + + log_debug("no match! %.*s", sbr.length(), line_data); + return log_format::SCAN_NO_MATCH; + } + + jlu.jlu_sub_line_count += this->jlf_line_format_init_count; + for (int lpc = 0; lpc < jlu.jlu_sub_line_count; lpc++) { + ll.set_sub_offset(lpc); + if (lpc > 0) { + ll.set_level((log_level_t) (ll.get_level_and_flags() + | LEVEL_CONTINUED)); + } + dst.emplace_back(ll); + } + } else { + unsigned char* msg; + int line_count = 2; + + msg = yajl_get_error( + handle, 1, (const unsigned char*) sbr.get_data(), sbr.length()); + if (msg != nullptr) { + auto msg_frag = string_fragment::from_c_str(msg); + log_debug("Unable to parse line at offset %d: %s", + li.li_file_range.fr_offset, + msg); + line_count = msg_frag.count('\n') + 1; + yajl_free_error(handle, msg); + } + if (!this->lf_specialized) { + return log_format::SCAN_NO_MATCH; + } + for (int lpc = 0; lpc < line_count; lpc++) { + log_level_t level = LEVEL_INVALID; + + ll.set_time(dst.back().get_timeval()); + if (lpc > 0) { + level = (log_level_t) (level | LEVEL_CONTINUED); + } + ll.set_level(level); + ll.set_sub_offset(lpc); + dst.emplace_back(ll); + } + } + + return log_format::SCAN_MATCH; + } + + int curr_fmt = -1, orig_lock = this->last_pattern_index(); + int pat_index = orig_lock; + auto line_sf = sbr.to_string_fragment(); + + while (::next_format(this->elf_pattern_order, curr_fmt, pat_index)) { + static thread_local auto md = lnav::pcre2pp::match_data::unitialized(); + + auto* fpat = this->elf_pattern_order[curr_fmt].get(); + auto* pat = fpat->p_pcre.pp_value.get(); + + if (fpat->p_module_format) { + continue; + } + + auto match_res = pat->capture_from(line_sf) + .into(md) + .matches(PCRE2_NO_UTF_CHECK) + .ignore_error(); + if (!match_res) { + if (!this->lf_pattern_locks.empty() && pat_index != -1) { + curr_fmt = -1; + pat_index = -1; + } + continue; + } + + auto ts = md[fpat->p_timestamp_field_index]; + auto time_cap = md[fpat->p_time_field_index]; + auto level_cap = md[fpat->p_level_field_index]; + auto mod_cap = md[fpat->p_module_field_index]; + auto opid_cap = md[fpat->p_opid_field_index]; + auto body_cap = md[fpat->p_body_field_index]; + const char* last; + struct exttm log_time_tm; + struct timeval log_tv; + uint8_t mod_index = 0, opid = 0; + char combined_datetime_buf[512]; + + if (ts && time_cap) { + auto ts_str_len = snprintf(combined_datetime_buf, + sizeof(combined_datetime_buf), + "%.*sT%.*s", + ts->length(), + ts->data(), + time_cap->length(), + time_cap->data()); + ts = string_fragment::from_bytes(combined_datetime_buf, ts_str_len); + } + + if ((last = this->lf_date_time.scan(ts->data(), + ts->length(), + this->get_timestamp_formats(), + &log_time_tm, + log_tv)) + == nullptr) + { + this->lf_date_time.unlock(); + if ((last = this->lf_date_time.scan(ts->data(), + ts->length(), + this->get_timestamp_formats(), + &log_time_tm, + log_tv)) + == nullptr) + { + continue; + } + } + + auto level = this->convert_level( + level_cap.value_or(string_fragment::invalid()), &sbc); + + this->lf_timestamp_flags = log_time_tm.et_flags; + + if (!((log_time_tm.et_flags & ETF_DAY_SET) + && (log_time_tm.et_flags & ETF_MONTH_SET) + && (log_time_tm.et_flags & ETF_YEAR_SET))) + { + this->check_for_new_year(dst, log_time_tm, log_tv); + } + + if (opid_cap && !opid_cap->empty()) { + { + auto opid_iter = sbc.sbc_opids.find(opid_cap.value()); + + if (opid_iter == sbc.sbc_opids.end()) { + auto opid_copy = opid_cap->to_owned(sbc.sbc_allocator); + auto otr = opid_time_range{log_tv, log_tv}; + sbc.sbc_opids.emplace(opid_copy, otr); + } else { + opid_iter->second.otr_end = log_tv; + } + } + opid = hash_str(opid_cap->data(), opid_cap->length()); + } + + if (mod_cap) { + intern_string_t mod_name = intern_string::lookup(mod_cap.value()); + auto mod_iter = MODULE_FORMATS.find(mod_name); + + if (mod_iter == MODULE_FORMATS.end()) { + mod_index = this->module_scan(body_cap.value(), mod_name); + mod_iter = MODULE_FORMATS.find(mod_name); + } else if (mod_iter->second.mf_mod_format) { + mod_index = mod_iter->second.mf_mod_format->lf_mod_index; + } + + if (mod_index && level_cap && body_cap) { + auto mod_elf = std::dynamic_pointer_cast<external_log_format>( + mod_iter->second.mf_mod_format); + + if (mod_elf) { + static thread_local auto mod_md + = lnav::pcre2pp::match_data::unitialized(); + + shared_buffer_ref body_ref; + + body_cap->trim(); + + int mod_pat_index = mod_elf->last_pattern_index(); + auto& mod_pat = *mod_elf->elf_pattern_order[mod_pat_index]; + auto match_res = mod_pat.p_pcre.pp_value + ->capture_from(body_cap.value()) + .into(mod_md) + .matches(PCRE2_NO_UTF_CHECK) + .ignore_error(); + if (match_res) { + auto mod_level_cap + = mod_md[mod_pat.p_level_field_index]; + + level = mod_elf->convert_level( + mod_level_cap.value_or(string_fragment::invalid()), + &sbc); + } + } + } + } + + for (auto value_index : fpat->p_numeric_value_indexes) { + const indexed_value_def& ivd = fpat->p_value_by_index[value_index]; + const value_def& vd = *ivd.ivd_value_def; + auto num_cap = md[ivd.ivd_index]; + + if (num_cap && num_cap->is_valid()) { + const struct scaling_factor* scaling = nullptr; + + if (ivd.ivd_unit_field_index >= 0) { + auto unit_cap = md[ivd.ivd_unit_field_index]; + + if (unit_cap && unit_cap->is_valid()) { + intern_string_t unit_val + = intern_string::lookup(unit_cap.value()); + + auto unit_iter = vd.vd_unit_scaling.find(unit_val); + if (unit_iter != vd.vd_unit_scaling.end()) { + const auto& sf = unit_iter->second; + + scaling = &sf; + } + } + } + + auto scan_res + = scn::scan_value<double>(num_cap->to_string_view()); + if (scan_res) { + auto dvalue = scan_res.value(); + if (scaling != nullptr) { + scaling->scale(dvalue); + } + this->lf_value_stats[vd.vd_values_index].add_value(dvalue); + } + } + } + + dst.emplace_back( + li.li_file_range.fr_offset, log_tv, level, mod_index, opid); + + if (orig_lock != curr_fmt) { + uint32_t lock_line; + + log_debug("%zu: changing pattern lock %d -> %d", + dst.size() - 1, + orig_lock, + curr_fmt); + if (this->lf_pattern_locks.empty()) { + lock_line = 0; + } else { + lock_line = dst.size() - 1; + } + this->lf_pattern_locks.emplace_back(lock_line, curr_fmt); + } + return log_format::SCAN_MATCH; + } + + if (this->lf_specialized && !this->lf_multiline) { + auto& last_line = dst.back(); + + log_debug("invalid line %d %d", dst.size(), li.li_file_range.fr_offset); + dst.emplace_back(li.li_file_range.fr_offset, + last_line.get_timeval(), + log_level_t::LEVEL_INVALID); + + return log_format::SCAN_MATCH; + } + + return log_format::SCAN_NO_MATCH; +} + +uint8_t +external_log_format::module_scan(string_fragment body_cap, + const intern_string_t& mod_name) +{ + uint8_t mod_index; + body_cap.trim(); + auto& ext_fmts = GRAPH_ORDERED_FORMATS; + module_format mf; + + for (auto& elf : ext_fmts) { + int curr_fmt = -1, fmt_lock = -1; + + while (::next_format(elf->elf_pattern_order, curr_fmt, fmt_lock)) { + static thread_local auto md + = lnav::pcre2pp::match_data::unitialized(); + + auto& fpat = elf->elf_pattern_order[curr_fmt]; + auto& pat = fpat->p_pcre; + + if (!fpat->p_module_format) { + continue; + } + + auto match_res = pat.pp_value->capture_from(body_cap) + .into(md) + .matches(PCRE2_NO_UTF_CHECK) + .ignore_error(); + if (!match_res) { + continue; + } + + log_debug("%s:module format found -- %s (%d)", + mod_name.get(), + elf->get_name().get(), + elf->lf_mod_index); + + mod_index = elf->lf_mod_index; + mf.mf_mod_format = elf->specialized(curr_fmt); + MODULE_FORMATS[mod_name] = mf; + + return mod_index; + } + } + + MODULE_FORMATS[mod_name] = mf; + + return 0; +} + +void +external_log_format::annotate(uint64_t line_number, + string_attrs_t& sa, + logline_value_vector& values, + bool annotate_module) const +{ + static thread_local auto md = lnav::pcre2pp::match_data::unitialized(); + + auto& line = values.lvv_sbr; + struct line_range lr; + + line.erase_ansi(); + if (this->elf_type != elf_type_t::ELF_TYPE_TEXT) { + values = this->jlf_line_values; + sa = this->jlf_line_attrs; + return; + } + + if (line.empty()) { + return; + } + + values.lvv_values.reserve(this->elf_value_defs.size()); + + int pat_index = this->pattern_index_for_line(line_number); + auto& pat = *this->elf_pattern_order[pat_index]; + + sa.reserve(pat.p_pcre.pp_value->get_capture_count()); + auto match_res + = pat.p_pcre.pp_value->capture_from(line.to_string_fragment()) + .into(md) + .matches(PCRE2_NO_UTF_CHECK) + .ignore_error(); + if (!match_res) { + // A continued line still needs a body. + lr.lr_start = 0; + lr.lr_end = line.length(); + sa.emplace_back(lr, SA_BODY.value()); + if (!this->lf_multiline) { + auto len + = pat.p_pcre.pp_value->match_partial(line.to_string_fragment()); + sa.emplace_back( + line_range{(int) len, -1}, + SA_INVALID.value("Log line does not match any pattern")); + } + return; + } + + nonstd::optional<string_fragment> module_cap; + if (!pat.p_module_format) { + auto ts_cap = md[pat.p_timestamp_field_index]; + if (ts_cap) { + sa.emplace_back(to_line_range(ts_cap.value()), + logline::L_TIMESTAMP.value()); + } + + if (pat.p_module_field_index != -1) { + module_cap = md[pat.p_module_field_index]; + if (module_cap) { + sa.emplace_back(to_line_range(module_cap.value()), + logline::L_MODULE.value()); + } + } + + auto opid_cap = md[pat.p_opid_field_index]; + if (opid_cap) { + sa.emplace_back(to_line_range(opid_cap.value()), + logline::L_OPID.value()); + } + } + + auto body_cap = md[pat.p_body_field_index]; + + for (size_t lpc = 0; lpc < pat.p_value_by_index.size(); lpc++) { + const indexed_value_def& ivd = pat.p_value_by_index[lpc]; + const struct scaling_factor* scaling = nullptr; + auto cap = md[ivd.ivd_index]; + const auto& vd = *ivd.ivd_value_def; + + if (ivd.ivd_unit_field_index >= 0) { + auto unit_cap = md[ivd.ivd_unit_field_index]; + + if (unit_cap) { + intern_string_t unit_val + = intern_string::lookup(unit_cap.value()); + auto unit_iter = vd.vd_unit_scaling.find(unit_val); + if (unit_iter != vd.vd_unit_scaling.end()) { + const struct scaling_factor& sf = unit_iter->second; + + scaling = &sf; + } + } + } + + if (cap) { + values.lvv_values.emplace_back( + vd.vd_meta, line, to_line_range(cap.value())); + values.lvv_values.back().apply_scaling(scaling); + } else { + values.lvv_values.emplace_back(vd.vd_meta); + } + if (pat.p_module_format) { + values.lvv_values.back().lv_meta.lvm_from_module = true; + } + } + + bool did_mod_annotate_body = false; + if (annotate_module && module_cap && body_cap && body_cap->is_valid()) { + intern_string_t mod_name = intern_string::lookup(module_cap.value()); + auto mod_iter = MODULE_FORMATS.find(mod_name); + + if (mod_iter != MODULE_FORMATS.end() + && mod_iter->second.mf_mod_format != nullptr) + { + auto& mf = mod_iter->second; + + body_cap->trim(); + auto narrow_res + = line.narrow(body_cap->sf_begin, body_cap->length()); + auto pre_mod_values_size = values.lvv_values.size(); + auto pre_mod_sa_size = sa.size(); + mf.mf_mod_format->annotate(line_number, sa, values, false); + for (size_t lpc = pre_mod_values_size; + lpc < values.lvv_values.size(); + lpc++) + { + values.lvv_values[lpc].lv_origin.shift(0, body_cap->sf_begin); + } + for (size_t lpc = pre_mod_sa_size; lpc < sa.size(); lpc++) { + sa[lpc].sa_range.shift(0, body_cap->sf_begin); + } + line.widen(narrow_res); + did_mod_annotate_body = true; + } + } + if (!did_mod_annotate_body) { + if (body_cap && body_cap->is_valid()) { + lr = to_line_range(body_cap.value()); + } else { + lr.lr_start = line.length(); + lr.lr_end = line.length(); + } + sa.emplace_back(lr, SA_BODY.value()); + } +} + +void +external_log_format::rewrite(exec_context& ec, + shared_buffer_ref& line, + string_attrs_t& sa, + std::string& value_out) +{ + std::vector<logline_value>::iterator shift_iter; + auto& values = *ec.ec_line_values; + + value_out.assign(line.get_data(), line.length()); + + for (auto iter = values.lvv_values.begin(); iter != values.lvv_values.end(); + ++iter) + { + if (!iter->lv_origin.is_valid()) { + log_debug("not rewriting value with invalid origin -- %s", + iter->lv_meta.lvm_name.get()); + continue; + } + + auto vd_iter = this->elf_value_defs.find(iter->lv_meta.lvm_name); + if (vd_iter == this->elf_value_defs.end()) { + log_debug("not rewriting undefined value -- %s", + iter->lv_meta.lvm_name.get()); + continue; + } + + const auto& vd = *vd_iter->second; + + if (vd.vd_rewriter.empty()) { + continue; + } + + auto _sg = ec.enter_source( + vd_iter->second->vd_rewrite_src_name, 1, vd.vd_rewriter); + std::string field_value; + + auto exec_res = execute_any(ec, vd.vd_rewriter); + if (exec_res.isOk()) { + field_value = exec_res.unwrap(); + } else { + field_value = exec_res.unwrapErr().to_attr_line().get_string(); + } + auto adj_origin + = iter->origin_in_full_msg(value_out.c_str(), value_out.length()); + + value_out.erase(adj_origin.lr_start, adj_origin.length()); + + int32_t shift_amount + = ((int32_t) field_value.length()) - adj_origin.length(); + value_out.insert(adj_origin.lr_start, field_value); + for (shift_iter = values.lvv_values.begin(); + shift_iter != values.lvv_values.end(); + ++shift_iter) + { + shift_iter->lv_origin.shift(adj_origin.lr_start, shift_amount); + } + shift_string_attrs(sa, adj_origin.lr_start, shift_amount); + } +} + +static int +read_json_field(yajlpp_parse_context* ypc, const unsigned char* str, size_t len) +{ + json_log_userdata* jlu = (json_log_userdata*) ypc->ypc_userdata; + const intern_string_t field_name = ypc->get_path(); + struct exttm tm_out; + struct timeval tv_out; + + if (jlu->jlu_format->lf_timestamp_field == field_name) { + jlu->jlu_format->lf_date_time.scan( + (const char*) str, + len, + jlu->jlu_format->get_timestamp_formats(), + &tm_out, + tv_out); + // Leave off the machine oriented flag since we convert it anyhow + jlu->jlu_format->lf_timestamp_flags + = tm_out.et_flags & ~ETF_MACHINE_ORIENTED; + jlu->jlu_base_line->set_time(tv_out); + } else if (jlu->jlu_format->elf_level_pointer.pp_value != nullptr) { + if (jlu->jlu_format->elf_level_pointer.pp_value + ->find_in(field_name.to_string_fragment(), PCRE2_NO_UTF_CHECK) + .ignore_error() + .has_value()) + { + jlu->jlu_base_line->set_level(jlu->jlu_format->convert_level( + string_fragment::from_bytes(str, len), jlu->jlu_batch_context)); + } + } + if (jlu->jlu_format->elf_level_field == field_name) { + jlu->jlu_base_line->set_level(jlu->jlu_format->convert_level( + string_fragment::from_bytes(str, len), jlu->jlu_batch_context)); + } + if (jlu->jlu_format->elf_opid_field == field_name) { + uint8_t opid = hash_str((const char*) str, len); + jlu->jlu_base_line->set_opid(opid); + } + + jlu->jlu_sub_line_count += jlu->jlu_format->value_line_count( + field_name, ypc->is_level(1), str, len); + + return 1; +} + +static int +rewrite_json_field(yajlpp_parse_context* ypc, + const unsigned char* str, + size_t len) +{ + static const intern_string_t body_name = intern_string::lookup("body", -1); + json_log_userdata* jlu = (json_log_userdata*) ypc->ypc_userdata; + const intern_string_t field_name = ypc->get_path(); + + if (jlu->jlu_format->lf_timestamp_field == field_name) { + char time_buf[64]; + + // TODO add a timeval kind to logline_value + if (jlu->jlu_line->is_time_skewed()) { + struct timeval tv; + struct exttm tm; + + jlu->jlu_format->lf_date_time.scan( + (const char*) str, + len, + jlu->jlu_format->get_timestamp_formats(), + &tm, + tv); + sql_strftime(time_buf, sizeof(time_buf), tv, 'T'); + } else { + sql_strftime( + time_buf, sizeof(time_buf), jlu->jlu_line->get_timeval(), 'T'); + } + jlu->jlu_format->jlf_line_values.lvv_values.emplace_back( + jlu->jlu_format->get_value_meta(field_name, + value_kind_t::VALUE_TEXT), + std::string{time_buf}); + } else if (jlu->jlu_shared_buffer.contains((const char*) str)) { + auto str_offset = (int) ((const char*) str - jlu->jlu_line_value); + if (field_name == jlu->jlu_format->elf_body_field) { + jlu->jlu_format->jlf_line_values.lvv_values.emplace_back( + jlu->jlu_format->get_value_meta(body_name, + value_kind_t::VALUE_TEXT), + string_fragment::from_byte_range( + jlu->jlu_shared_buffer.get_data(), + str_offset, + str_offset + len)); + } + if (!ypc->is_level(1) && !jlu->jlu_format->has_value_def(field_name)) { + return 1; + } + + jlu->jlu_format->jlf_line_values.lvv_values.emplace_back( + jlu->jlu_format->get_value_meta(field_name, + value_kind_t::VALUE_TEXT), + string_fragment::from_byte_range(jlu->jlu_shared_buffer.get_data(), + str_offset, + str_offset + len)); + } else { + if (field_name == jlu->jlu_format->elf_body_field) { + jlu->jlu_format->jlf_line_values.lvv_values.emplace_back( + jlu->jlu_format->get_value_meta(body_name, + value_kind_t::VALUE_TEXT), + std::string{(const char*) str, len}); + } + if (!ypc->is_level(1) && !jlu->jlu_format->has_value_def(field_name)) { + return 1; + } + + jlu->jlu_format->jlf_line_values.lvv_values.emplace_back( + jlu->jlu_format->get_value_meta(field_name, + value_kind_t::VALUE_TEXT), + std::string{(const char*) str, len}); + } + + return 1; +} + +void +external_log_format::get_subline(const logline& ll, + shared_buffer_ref& sbr, + bool full_message) +{ + if (this->elf_type == elf_type_t::ELF_TYPE_TEXT) { + return; + } + + if (this->jlf_cached_offset != ll.get_offset() + || this->jlf_cached_full != full_message) + { + auto& ypc = *(this->jlf_parse_context); + yajl_handle handle = this->jlf_yajl_handle.get(); + json_log_userdata jlu(sbr, nullptr); + + this->jlf_share_manager.invalidate_refs(); + this->jlf_cached_line.clear(); + this->jlf_line_values.clear(); + this->jlf_line_offsets.clear(); + this->jlf_line_attrs.clear(); + + auto line_frag = sbr.to_string_fragment(); + + if (!line_frag.startswith("{")) { + this->jlf_cached_line.resize(line_frag.length()); + memcpy(this->jlf_cached_line.data(), + line_frag.data(), + line_frag.length()); + this->jlf_line_values.clear(); + sbr.share(this->jlf_share_manager, + &this->jlf_cached_line[0], + this->jlf_cached_line.size()); + this->jlf_line_values.lvv_sbr = sbr; + this->jlf_line_attrs.emplace_back( + line_range{0, -1}, + SA_INVALID.value(fmt::format( + FMT_STRING("line at offset {} is not a JSON-line"), + ll.get_offset()))); + return; + } + + yajl_reset(handle); + ypc.set_static_handler(json_log_rewrite_handlers.jpc_children[0]); + ypc.ypc_userdata = &jlu; + ypc.ypc_ignore_unused = true; + ypc.ypc_alt_callbacks.yajl_start_array = json_array_start; + ypc.ypc_alt_callbacks.yajl_end_array = json_array_end; + ypc.ypc_alt_callbacks.yajl_start_map = json_array_start; + ypc.ypc_alt_callbacks.yajl_end_map = json_array_end; + jlu.jlu_format = this; + jlu.jlu_line = ≪ + jlu.jlu_handle = handle; + jlu.jlu_line_value = sbr.get_data(); + + yajl_status parse_status = yajl_parse( + handle, (const unsigned char*) sbr.get_data(), sbr.length()); + if (parse_status != yajl_status_ok + || yajl_complete_parse(handle) != yajl_status_ok) + { + unsigned char* msg; + std::string full_msg; + + msg = yajl_get_error( + handle, 1, (const unsigned char*) sbr.get_data(), sbr.length()); + if (msg != nullptr) { + full_msg = fmt::format( + FMT_STRING("[offset: {}] {}\n{}"), + ll.get_offset(), + fmt::string_view{sbr.get_data(), sbr.length()}, + reinterpret_cast<char*>(msg)); + yajl_free_error(handle, msg); + } + + this->jlf_cached_line.resize(full_msg.size()); + memcpy( + this->jlf_cached_line.data(), full_msg.data(), full_msg.size()); + this->jlf_line_values.clear(); + this->jlf_line_attrs.emplace_back( + line_range{0, -1}, + SA_INVALID.value("JSON line failed to parse")); + } else { + std::vector<logline_value>::iterator lv_iter; + bool used_values[this->jlf_line_values.lvv_values.size()]; + struct line_range lr; + + memset(used_values, 0, sizeof(used_values)); + + for (lv_iter = this->jlf_line_values.lvv_values.begin(); + lv_iter != this->jlf_line_values.lvv_values.end(); + ++lv_iter) + { + lv_iter->lv_meta.lvm_format = this; + } + + int sub_offset = 1 + this->jlf_line_format_init_count; + for (const auto& jfe : this->jlf_line_format) { + static const intern_string_t ts_field + = intern_string::lookup("__timestamp__", -1); + static const intern_string_t level_field + = intern_string::lookup("__level__"); + size_t begin_size = this->jlf_cached_line.size(); + + switch (jfe.jfe_type) { + case json_log_field::CONSTANT: + this->json_append_to_cache( + jfe.jfe_default_value.c_str(), + jfe.jfe_default_value.size()); + break; + case json_log_field::VARIABLE: + lv_iter = find_if( + this->jlf_line_values.lvv_values.begin(), + this->jlf_line_values.lvv_values.end(), + logline_value_cmp(&jfe.jfe_value.pp_value)); + if (lv_iter != this->jlf_line_values.lvv_values.end()) { + auto str = lv_iter->to_string(); + size_t nl_pos = str.find('\n'); + + lr.lr_start = this->jlf_cached_line.size(); + + lv_iter->lv_meta.lvm_hidden + = lv_iter->lv_meta.lvm_user_hidden; + if ((int) str.size() > jfe.jfe_max_width) { + switch (jfe.jfe_overflow) { + case json_format_element::overflow_t:: + ABBREV: { + this->json_append_to_cache(str.c_str(), + str.size()); + size_t new_size = abbreviate_str( + &this->jlf_cached_line[lr.lr_start], + str.size(), + jfe.jfe_max_width); + + this->jlf_cached_line.resize( + lr.lr_start + new_size); + break; + } + case json_format_element::overflow_t:: + TRUNCATE: { + this->json_append_to_cache( + str.c_str(), jfe.jfe_max_width); + break; + } + case json_format_element::overflow_t:: + DOTDOT: { + size_t middle + = (jfe.jfe_max_width / 2) - 1; + this->json_append_to_cache(str.c_str(), + middle); + this->json_append_to_cache("..", 2); + size_t rest + = (jfe.jfe_max_width - middle - 2); + this->json_append_to_cache( + str.c_str() + str.size() - rest, + rest); + break; + } + } + } else { + sub_offset + += count(str.begin(), str.end(), '\n'); + this->json_append(jfe, str.c_str(), str.size()); + } + + if (nl_pos == std::string::npos || full_message) { + lr.lr_end = this->jlf_cached_line.size(); + } else { + lr.lr_end = lr.lr_start + nl_pos; + } + + if (lv_iter->lv_meta.lvm_name + == this->lf_timestamp_field) + { + this->jlf_line_attrs.emplace_back( + lr, logline::L_TIMESTAMP.value()); + } else if (lv_iter->lv_meta.lvm_name + == this->elf_body_field) + { + this->jlf_line_attrs.emplace_back( + lr, SA_BODY.value()); + } else if (lv_iter->lv_meta.lvm_name + == this->elf_opid_field) + { + this->jlf_line_attrs.emplace_back( + lr, logline::L_OPID.value()); + } + lv_iter->lv_origin = lr; + used_values[distance( + this->jlf_line_values.lvv_values.begin(), + lv_iter)] + = true; + } else if (jfe.jfe_value.pp_value == ts_field) { + struct line_range lr; + ssize_t ts_len; + char ts[64]; + + if (jfe.jfe_ts_format.empty()) { + ts_len = sql_strftime( + ts, sizeof(ts), ll.get_timeval(), 'T'); + } else { + struct exttm et; + + ll.to_exttm(et); + ts_len = ftime_fmt(ts, + sizeof(ts), + jfe.jfe_ts_format.c_str(), + et); + } + lr.lr_start = this->jlf_cached_line.size(); + this->json_append_to_cache(ts, ts_len); + lr.lr_end = this->jlf_cached_line.size(); + this->jlf_line_attrs.emplace_back( + lr, logline::L_TIMESTAMP.value()); + + lv_iter = find_if( + this->jlf_line_values.lvv_values.begin(), + this->jlf_line_values.lvv_values.end(), + logline_value_cmp(&this->lf_timestamp_field)); + if (lv_iter + != this->jlf_line_values.lvv_values.end()) + { + used_values[distance( + this->jlf_line_values.lvv_values.begin(), + lv_iter)] + = true; + } + } else if (jfe.jfe_value.pp_value == level_field + || jfe.jfe_value.pp_value + == this->elf_level_field) + { + this->json_append(jfe, ll.get_level_name(), -1); + } else { + this->json_append(jfe, + jfe.jfe_default_value.c_str(), + jfe.jfe_default_value.size()); + } + + switch (jfe.jfe_text_transform) { + case external_log_format::json_format_element:: + transform_t::NONE: + break; + case external_log_format::json_format_element:: + transform_t::UPPERCASE: + for (size_t cindex = begin_size; + cindex < this->jlf_cached_line.size(); + cindex++) + { + this->jlf_cached_line[cindex] = toupper( + this->jlf_cached_line[cindex]); + } + break; + case external_log_format::json_format_element:: + transform_t::LOWERCASE: + for (size_t cindex = begin_size; + cindex < this->jlf_cached_line.size(); + cindex++) + { + this->jlf_cached_line[cindex] = tolower( + this->jlf_cached_line[cindex]); + } + break; + case external_log_format::json_format_element:: + transform_t::CAPITALIZE: + for (size_t cindex = begin_size; + cindex < begin_size + 1; + cindex++) + { + this->jlf_cached_line[cindex] = toupper( + this->jlf_cached_line[cindex]); + } + for (size_t cindex = begin_size + 1; + cindex < this->jlf_cached_line.size(); + cindex++) + { + this->jlf_cached_line[cindex] = tolower( + this->jlf_cached_line[cindex]); + } + break; + } + break; + } + } + this->json_append_to_cache("\n", 1); + + for (size_t lpc = 0; lpc < this->jlf_line_values.lvv_values.size(); + lpc++) + { + static const intern_string_t body_name + = intern_string::lookup("body", -1); + auto& lv = this->jlf_line_values.lvv_values[lpc]; + + if (lv.lv_meta.lvm_hidden || used_values[lpc] + || body_name == lv.lv_meta.lvm_name) + { + continue; + } + + const std::string str = lv.to_string(); + size_t curr_pos = 0, nl_pos, line_len = -1; + + lv.lv_sub_offset = sub_offset; + lv.lv_origin.lr_start = 2 + lv.lv_meta.lvm_name.size() + 2; + do { + nl_pos = str.find('\n', curr_pos); + if (nl_pos != std::string::npos) { + line_len = nl_pos - curr_pos; + } else { + line_len = str.size() - curr_pos; + } + this->json_append_to_cache(" ", 2); + this->json_append_to_cache(lv.lv_meta.lvm_name.get(), + lv.lv_meta.lvm_name.size()); + this->json_append_to_cache(": ", 2); + this->json_append_to_cache(&str.c_str()[curr_pos], + line_len); + this->json_append_to_cache("\n", 1); + curr_pos = nl_pos + 1; + sub_offset += 1; + } while (nl_pos != std::string::npos && nl_pos < str.size()); + } + } + + this->jlf_line_offsets.push_back(0); + for (size_t lpc = 0; lpc < this->jlf_cached_line.size(); lpc++) { + if (this->jlf_cached_line[lpc] == '\n') { + this->jlf_line_offsets.push_back(lpc + 1); + } + } + this->jlf_line_offsets.push_back(this->jlf_cached_line.size()); + this->jlf_cached_offset = ll.get_offset(); + this->jlf_cached_full = full_message; + } + + off_t this_off = 0, next_off = 0; + + if (!this->jlf_line_offsets.empty() + && ll.get_sub_offset() < this->jlf_line_offsets.size()) + { + require(ll.get_sub_offset() < this->jlf_line_offsets.size()); + + this_off = this->jlf_line_offsets[ll.get_sub_offset()]; + if ((ll.get_sub_offset() + 1) < (int) this->jlf_line_offsets.size()) { + next_off = this->jlf_line_offsets[ll.get_sub_offset() + 1]; + } else { + next_off = this->jlf_cached_line.size(); + } + if (next_off > 0 && this->jlf_cached_line[next_off - 1] == '\n' + && this_off != next_off) + { + next_off -= 1; + } + } + + if (full_message) { + sbr.share(this->jlf_share_manager, + &this->jlf_cached_line[0], + this->jlf_cached_line.size()); + } else { + sbr.share(this->jlf_share_manager, + this->jlf_cached_line.data() + this_off, + next_off - this_off); + } + this->jlf_line_values.lvv_sbr = sbr; +} + +void +external_log_format::build(std::vector<lnav::console::user_message>& errors) +{ + if (!this->lf_timestamp_field.empty()) { + auto& vd = this->elf_value_defs[this->lf_timestamp_field]; + if (vd.get() == nullptr) { + vd = std::make_shared<external_log_format::value_def>( + this->lf_timestamp_field, value_kind_t::VALUE_TEXT, -1, this); + } + vd->vd_meta.lvm_name = this->lf_timestamp_field; + vd->vd_meta.lvm_kind = value_kind_t::VALUE_TEXT; + vd->vd_internal = true; + } + if (startswith(this->elf_level_field.get(), "/")) { + this->elf_level_field + = intern_string::lookup(this->elf_level_field.get() + 1); + } + if (!this->elf_level_field.empty() + && this->elf_value_defs.find(this->elf_level_field) + == this->elf_value_defs.end()) + { + auto& vd = this->elf_value_defs[this->elf_level_field]; + if (vd.get() == nullptr) { + vd = std::make_shared<external_log_format::value_def>( + this->elf_level_field, value_kind_t::VALUE_TEXT, -1, this); + } + vd->vd_meta.lvm_name = this->elf_level_field; + vd->vd_meta.lvm_kind = value_kind_t::VALUE_TEXT; + vd->vd_internal = true; + } + if (!this->elf_body_field.empty()) { + auto& vd = this->elf_value_defs[this->elf_body_field]; + if (vd.get() == nullptr) { + vd = std::make_shared<external_log_format::value_def>( + this->elf_body_field, value_kind_t::VALUE_TEXT, -1, this); + } + vd->vd_meta.lvm_name = this->elf_body_field; + vd->vd_meta.lvm_kind = value_kind_t::VALUE_TEXT; + vd->vd_internal = true; + } + + if (!this->lf_timestamp_format.empty()) { + this->lf_timestamp_format.push_back(nullptr); + } + for (auto iter = this->elf_patterns.begin(); + iter != this->elf_patterns.end(); + ++iter) + { + pattern& pat = *iter->second; + + if (pat.p_pcre.pp_value == nullptr) { + continue; + } + + if (pat.p_module_format) { + this->elf_has_module_format = true; + } + + for (auto named_cap : pat.p_pcre.pp_value->get_named_captures()) { + const intern_string_t name + = intern_string::lookup(named_cap.get_name()); + + if (name == this->lf_timestamp_field) { + pat.p_timestamp_field_index = named_cap.get_index(); + } + if (name == this->lf_time_field) { + pat.p_time_field_index = named_cap.get_index(); + } + if (name == this->elf_level_field) { + pat.p_level_field_index = named_cap.get_index(); + } + if (name == this->elf_module_id_field) { + pat.p_module_field_index = named_cap.get_index(); + } + if (name == this->elf_opid_field) { + pat.p_opid_field_index = named_cap.get_index(); + } + if (name == this->elf_body_field) { + pat.p_body_field_index = named_cap.get_index(); + } + + auto value_iter = this->elf_value_defs.find(name); + if (value_iter != this->elf_value_defs.end()) { + auto vd = value_iter->second; + indexed_value_def ivd; + + ivd.ivd_index = named_cap.get_index(); + if (!vd->vd_unit_field.empty()) { + ivd.ivd_unit_field_index = pat.p_pcre.pp_value->name_index( + vd->vd_unit_field.get()); + } else { + ivd.ivd_unit_field_index = -1; + } + if (!vd->vd_internal && vd->vd_meta.lvm_column == -1) { + vd->vd_meta.lvm_column = this->elf_column_count++; + } + ivd.ivd_value_def = vd; + pat.p_value_by_index.push_back(ivd); + } + } + + stable_sort(pat.p_value_by_index.begin(), pat.p_value_by_index.end()); + + for (int lpc = 0; lpc < (int) pat.p_value_by_index.size(); lpc++) { + auto& ivd = pat.p_value_by_index[lpc]; + auto vd = ivd.ivd_value_def; + + if (!vd->vd_foreign_key && !vd->vd_meta.lvm_identifier) { + switch (vd->vd_meta.lvm_kind) { + case value_kind_t::VALUE_INTEGER: + case value_kind_t::VALUE_FLOAT: + pat.p_numeric_value_indexes.push_back(lpc); + break; + default: + break; + } + } + } + + if (!this->elf_level_field.empty() && pat.p_level_field_index == -1) { + log_warning("%s:level field '%s' not found in pattern", + pat.p_config_path.c_str(), + this->elf_level_field.get()); + } + if (!this->elf_module_id_field.empty() + && pat.p_module_field_index == -1) + { + log_warning("%s:module field '%s' not found in pattern", + pat.p_config_path.c_str(), + this->elf_module_id_field.get()); + } + if (!this->elf_body_field.empty() && pat.p_body_field_index == -1) { + log_warning("%s:body field '%s' not found in pattern", + pat.p_config_path.c_str(), + this->elf_body_field.get()); + } + + this->elf_pattern_order.push_back(iter->second); + } + + if (this->elf_type != elf_type_t::ELF_TYPE_TEXT) { + if (!this->elf_patterns.empty()) { + errors.emplace_back( + lnav::console::user_message::error( + attr_line_t() + .append_quoted( + lnav::roles::symbol(this->elf_name.to_string())) + .append(" is not a valid log format")) + .with_reason("structured logs cannot have regexes") + .with_snippets(this->get_snippets())); + } + if (this->elf_type == elf_type_t::ELF_TYPE_JSON) { + this->jlf_parse_context + = std::make_shared<yajlpp_parse_context>(this->elf_name); + this->jlf_yajl_handle.reset( + yajl_alloc(&this->jlf_parse_context->ypc_callbacks, + nullptr, + this->jlf_parse_context.get()), + yajl_handle_deleter()); + yajl_config( + this->jlf_yajl_handle.get(), yajl_dont_validate_strings, 1); + } + + } else { + if (this->elf_patterns.empty()) { + errors.emplace_back(lnav::console::user_message::error( + attr_line_t() + .append_quoted(lnav::roles::symbol( + this->elf_name.to_string())) + .append(" is not a valid log format")) + .with_reason("no regexes specified") + .with_snippets(this->get_snippets())); + } + } + + stable_sort(this->elf_level_pairs.begin(), this->elf_level_pairs.end()); + + for (auto& vd : this->elf_value_def_order) { + std::vector<std::string>::iterator act_iter; + + vd->vd_meta.lvm_format = this; + if (!vd->vd_internal && vd->vd_meta.lvm_column == -1) { + vd->vd_meta.lvm_column = this->elf_column_count++; + } + + if (vd->vd_meta.lvm_kind == value_kind_t::VALUE_UNKNOWN) { + vd->vd_meta.lvm_kind = value_kind_t::VALUE_TEXT; + } + + if (this->elf_type == elf_type_t::ELF_TYPE_TEXT) { + std::set<std::string> available_captures; + + bool found_in_pattern = false; + for (const auto& pat : this->elf_patterns) { + if (pat.second->p_pcre.pp_value == nullptr) { + continue; + } + + auto cap_index = pat.second->p_pcre.pp_value->name_index( + vd->vd_meta.lvm_name.get()); + if (cap_index >= 0) { + found_in_pattern = true; + break; + } + + for (auto named_cap : + pat.second->p_pcre.pp_value->get_named_captures()) + { + available_captures.insert(named_cap.get_name().to_string()); + } + } + if (!found_in_pattern) { + auto notes + = attr_line_t("the following captures are available:\n ") + .join(available_captures, + VC_ROLE.value(role_t::VCR_SYMBOL), + ", "); + errors.emplace_back( + lnav::console::user_message::warning( + attr_line_t("invalid value ") + .append_quoted(lnav::roles::symbol( + fmt::format(FMT_STRING("/{}/value/{}"), + this->elf_name, + vd->vd_meta.lvm_name.get())))) + .with_reason( + attr_line_t("no patterns have a capture named ") + .append_quoted(vd->vd_meta.lvm_name.get())) + .with_note(notes) + .with_snippets(this->get_snippets()) + .with_help("values are populated from captures in " + "patterns, so at least one pattern must " + "have a capture with this value name")); + } + } + + for (act_iter = vd->vd_action_list.begin(); + act_iter != vd->vd_action_list.end(); + ++act_iter) + { + if (this->lf_action_defs.find(*act_iter) + == this->lf_action_defs.end()) + { +#if 0 + errors.push_back("error:" + this->elf_name.to_string() + ":" + + vd->vd_meta.lvm_name.get() + + ": cannot find action -- " + (*act_iter)); +#endif + } + } + + vd->set_rewrite_src_name(); + } + + for (const auto& td_pair : this->lf_tag_defs) { + const auto& td = td_pair.second; + + if (td->ftd_pattern.pp_value == nullptr + || td->ftd_pattern.pp_value->get_pattern().empty()) + { + errors.emplace_back( + lnav::console::user_message::error( + attr_line_t("invalid tag definition ") + .append_quoted(lnav::roles::symbol( + fmt::format(FMT_STRING("/{}/tags/{}"), + this->elf_name, + td_pair.first)))) + .with_reason( + "tag definitions must have a non-empty pattern") + .with_snippets(this->get_snippets())); + } + } + + if (this->elf_type == elf_type_t::ELF_TYPE_TEXT + && this->elf_samples.empty()) + { + errors.emplace_back( + lnav::console::user_message::error( + attr_line_t() + .append_quoted( + lnav::roles::symbol(this->elf_name.to_string())) + .append(" is not a valid log format")) + .with_reason("log message samples must be included in a format " + "definition") + .with_snippets(this->get_snippets())); + } + + if (!this->lf_subsecond_field.empty() + && !this->lf_subsecond_unit.has_value()) + { + errors.emplace_back( + lnav::console::user_message::error( + attr_line_t() + .append_quoted( + lnav::roles::symbol(this->elf_name.to_string())) + .append(" is not a valid log format")) + .with_reason(attr_line_t() + .append_quoted("subsecond-unit"_symbol) + .append(" must be set when ") + .append_quoted("subsecond-field"_symbol) + .append(" is used")) + .with_snippets(this->get_snippets())); + } + + for (size_t sample_index = 0; sample_index < this->elf_samples.size(); + sample_index += 1) + { + auto& elf_sample = this->elf_samples[sample_index]; + auto sample_lines + = string_fragment(elf_sample.s_line.pp_value).split_lines(); + bool found = false; + + for (auto pat_iter = this->elf_pattern_order.begin(); + pat_iter != this->elf_pattern_order.end(); + ++pat_iter) + { + auto& pat = *(*pat_iter); + + if (!pat.p_pcre.pp_value) { + continue; + } + + auto md = pat.p_pcre.pp_value->create_match_data(); + auto match_res = pat.p_pcre.pp_value->capture_from(sample_lines[0]) + .into(md) + .matches() + .ignore_error(); + if (!match_res) { + continue; + } + found = true; + + if (pat.p_module_format) { + continue; + } + + elf_sample.s_matched_regexes.insert(pat.p_name.to_string()); + pat.p_matched_samples.insert(sample_index); + + if (pat.p_pcre.pp_value->name_index(this->lf_timestamp_field.get()) + < 0) + { + attr_line_t notes; + bool first_note = true; + + if (pat.p_pcre.pp_value->get_capture_count() > 0) { + notes.append("the following captures are available:\n "); + } + for (auto named_cap : pat.p_pcre.pp_value->get_named_captures()) + { + if (!first_note) { + notes.append(", "); + } + notes.append( + lnav::roles::symbol(named_cap.get_name().to_string())); + first_note = false; + } + errors.emplace_back( + lnav::console::user_message::error( + attr_line_t("invalid value for property ") + .append_quoted(lnav::roles::symbol( + fmt::format(FMT_STRING("/{}/timestamp-field"), + this->elf_name)))) + .with_reason( + attr_line_t() + .append_quoted(this->lf_timestamp_field) + .append(" was not found in the pattern at ") + .append(lnav::roles::symbol(pat.p_config_path))) + .with_note(notes) + .with_snippets(this->get_snippets())); + continue; + } + + const auto ts_cap = md[pat.p_timestamp_field_index]; + const auto level_cap = md[pat.p_level_field_index]; + const char* const* custom_formats = this->get_timestamp_formats(); + date_time_scanner dts; + struct timeval tv; + struct exttm tm; + + if (ts_cap && ts_cap->sf_begin == 0) { + pat.p_timestamp_end = ts_cap->sf_end; + } + if (ts_cap + && dts.scan(ts_cap->data(), + ts_cap->length(), + custom_formats, + &tm, + tv) + == nullptr) + { + attr_line_t notes; + + if (custom_formats == nullptr) { + notes.append("the following built-in formats were tried:"); + for (int lpc = 0; PTIMEC_FORMATS[lpc].pf_fmt != nullptr; + lpc++) + { + off_t off = 0; + + PTIMEC_FORMATS[lpc].pf_func( + &tm, ts_cap->data(), off, ts_cap->length()); + notes.append("\n ") + .append(ts_cap.value()) + .append("\n") + .append(2 + off, ' ') + .append("^ "_snippet_border) + .append_quoted( + lnav::roles::symbol(PTIMEC_FORMATS[lpc].pf_fmt)) + .append(" matched up to here"_snippet_border); + } + } else { + notes.append("the following custom formats were tried:"); + for (int lpc = 0; custom_formats[lpc] != nullptr; lpc++) { + off_t off = 0; + + ptime_fmt(custom_formats[lpc], + &tm, + ts_cap->data(), + off, + ts_cap->length()); + notes.append("\n ") + .append(ts_cap.value()) + .append("\n") + .append(2 + off, ' ') + .append("^ "_snippet_border) + .append_quoted( + lnav::roles::symbol(custom_formats[lpc])) + .append(" matched up to here"_snippet_border); + } + } + + errors.emplace_back( + lnav::console::user_message::error( + attr_line_t("invalid sample log message: ") + .append(lnav::to_json(elf_sample.s_line.pp_value))) + .with_reason(attr_line_t("unrecognized timestamp -- ") + .append(ts_cap.value())) + .with_snippet(elf_sample.s_line.to_snippet()) + .with_note(notes) + .with_help(attr_line_t("If the timestamp format is not " + "supported by default, you can " + "add a custom format with the ") + .append_quoted("timestamp-format"_symbol) + .append(" property"))); + } + + auto level = this->convert_level( + level_cap.value_or(string_fragment::invalid()), nullptr); + + if (elf_sample.s_level != LEVEL_UNKNOWN + && elf_sample.s_level != level) + { + attr_line_t note_al; + + note_al.append("matched regex = ") + .append(lnav::roles::symbol(pat.p_name.to_string())) + .append("\n") + .append("captured level = ") + .append_quoted(level_cap->to_string()); + errors.emplace_back( + lnav::console::user_message::error( + attr_line_t("invalid sample log message: ") + .append(lnav::to_json(elf_sample.s_line.pp_value))) + .with_reason(attr_line_t() + .append_quoted(lnav::roles::symbol( + level_names[level])) + .append(" does not match the expected " + "level of ") + .append_quoted(lnav::roles::symbol( + level_names[elf_sample.s_level]))) + .with_snippet(elf_sample.s_line.to_snippet()) + .with_note(note_al)); + } + + { + auto full_match_res + = pat.p_pcre.pp_value + ->capture_from(elf_sample.s_line.pp_value) + .into(md) + .matches() + .ignore_error(); + if (!full_match_res) { + attr_line_t regex_al = pat.p_pcre.pp_value->get_pattern(); + lnav::snippets::regex_highlighter( + regex_al, -1, line_range{0, (int) regex_al.length()}); + errors.emplace_back( + lnav::console::user_message::error( + attr_line_t("invalid pattern: ") + .append_quoted(lnav::roles::symbol( + pat.p_name.to_string()))) + .with_reason("pattern does not match entire " + "multiline sample message") + .with_snippet(elf_sample.s_line.to_snippet()) + .with_note(attr_line_t() + .append(lnav::roles::symbol( + pat.p_name.to_string())) + .append(" = ") + .append(regex_al)) + .with_help( + attr_line_t("use ").append_quoted(".*").append( + " to match new-lines"))); + } else if (static_cast<size_t>(full_match_res->f_all.length()) + != elf_sample.s_line.pp_value.length()) + { + attr_line_t regex_al = pat.p_pcre.pp_value->get_pattern(); + lnav::snippets::regex_highlighter( + regex_al, -1, line_range{0, (int) regex_al.length()}); + auto match_length + = static_cast<size_t>(full_match_res->f_all.length()); + attr_line_t sample_al = elf_sample.s_line.pp_value; + sample_al.append("\n") + .append(match_length, ' ') + .append("^ matched up to here"_error) + .with_attr_for_all( + VC_ROLE.value(role_t::VCR_QUOTED_CODE)); + auto sample_snippet = lnav::console::snippet::from( + elf_sample.s_line.pp_location, sample_al); + errors.emplace_back( + lnav::console::user_message::error( + attr_line_t("invalid pattern: ") + .append_quoted(lnav::roles::symbol( + pat.p_name.to_string()))) + .with_reason("pattern does not match entire " + "message") + .with_snippet(sample_snippet) + .with_note(attr_line_t() + .append(lnav::roles::symbol( + pat.p_name.to_string())) + .append(" = ") + .append(regex_al)) + .with_help("update the regular expression to fully " + "capture the sample message")); + } + } + } + + if (!found && !this->elf_pattern_order.empty()) { + std::vector<std::pair<ssize_t, intern_string_t>> partial_indexes; + attr_line_t notes; + size_t max_name_width = 0; + + for (const auto& pat_iter : this->elf_pattern_order) { + auto& pat = *pat_iter; + + if (!pat.p_pcre.pp_value) { + continue; + } + + partial_indexes.emplace_back( + pat.p_pcre.pp_value->match_partial(sample_lines[0]), + pat.p_name); + max_name_width = std::max(max_name_width, pat.p_name.size()); + } + for (const auto& line_frag : sample_lines) { + auto src_line = attr_line_t(line_frag.to_string()); + if (!line_frag.endswith("\n")) { + src_line.append("\n"); + } + src_line.with_attr_for_all( + VC_ROLE.value(role_t::VCR_QUOTED_CODE)); + notes.append(" ").append(src_line); + for (auto& part_pair : partial_indexes) { + if (part_pair.first >= 0 + && part_pair.first < line_frag.length()) + { + notes.append(" ") + .append(part_pair.first, ' ') + .append("^ "_snippet_border) + .append(lnav::roles::symbol( + part_pair.second.to_string())) + .append(" matched up to here"_snippet_border) + .append("\n"); + } + part_pair.first -= line_frag.length(); + } + } + notes.add_header( + "the following shows how each pattern matched this sample:\n"); + + attr_line_t regex_note; + for (const auto& pat_iter : this->elf_pattern_order) { + if (!pat_iter->p_pcre.pp_value) { + regex_note + .append( + lnav::roles::symbol(fmt::format(FMT_STRING("{:{}}"), + pat_iter->p_name, + max_name_width))) + .append(" is invalid"); + continue; + } + + attr_line_t regex_al = pat_iter->p_pcre.pp_value->get_pattern(); + lnav::snippets::regex_highlighter( + regex_al, -1, line_range{0, (int) regex_al.length()}); + + regex_note + .append(lnav::roles::symbol(fmt::format( + FMT_STRING("{:{}}"), pat_iter->p_name, max_name_width))) + .append(" = ") + .append_quoted(regex_al) + .append("\n"); + } + + errors.emplace_back( + lnav::console::user_message::error( + attr_line_t("invalid sample log message: ") + .append(lnav::to_json(elf_sample.s_line.pp_value))) + .with_reason("sample does not match any patterns") + .with_snippet(elf_sample.s_line.to_snippet()) + .with_note(notes.rtrim()) + .with_note(regex_note)); + } + } + + if (!this->elf_samples.empty()) { + for (const auto& elf_sample : this->elf_samples) { + if (elf_sample.s_matched_regexes.size() <= 1) { + continue; + } + + errors.emplace_back( + lnav::console::user_message::warning( + attr_line_t("invalid log format: ") + .append_quoted( + lnav::roles::symbol(this->elf_name.to_string()))) + .with_reason( + attr_line_t( + "sample is matched by more than one regex: ") + .join(elf_sample.s_matched_regexes, + VC_ROLE.value(role_t::VCR_SYMBOL), + ", ")) + .with_snippet(lnav::console::snippet::from( + elf_sample.s_line.pp_location, + attr_line_t().append(lnav::roles::quoted_code( + elf_sample.s_line.pp_value)))) + .with_help("log format regexes must match a single type " + "of log message")); + } + + for (const auto& pat : this->elf_pattern_order) { + if (pat->p_module_format) { + continue; + } + + if (pat->p_matched_samples.empty()) { + errors.emplace_back( + lnav::console::user_message::warning( + attr_line_t("invalid pattern: ") + .append_quoted( + lnav::roles::symbol(pat->p_config_path))) + .with_reason("pattern does not match any samples") + .with_snippet(lnav::console::snippet::from( + pat->p_pcre.pp_location, "")) + .with_help( + "every pattern should have at least one sample " + "that it matches")); + } + } + } + + for (auto& elf_value_def : this->elf_value_defs) { + if (elf_value_def.second->vd_foreign_key + || elf_value_def.second->vd_meta.lvm_identifier) + { + continue; + } + + switch (elf_value_def.second->vd_meta.lvm_kind) { + case value_kind_t::VALUE_INTEGER: + case value_kind_t::VALUE_FLOAT: + elf_value_def.second->vd_values_index + = this->elf_numeric_value_defs.size(); + this->elf_numeric_value_defs.push_back(elf_value_def.second); + break; + default: + break; + } + } + + this->lf_value_stats.resize(this->elf_numeric_value_defs.size()); + + int format_index = 0; + for (auto iter = this->jlf_line_format.begin(); + iter != this->jlf_line_format.end(); + ++iter, format_index++) + { + static const intern_string_t ts + = intern_string::lookup("__timestamp__"); + static const intern_string_t level_field + = intern_string::lookup("__level__"); + json_format_element& jfe = *iter; + + if (startswith(jfe.jfe_value.pp_value.get(), "/")) { + jfe.jfe_value.pp_value + = intern_string::lookup(jfe.jfe_value.pp_value.get() + 1); + } + if (!jfe.jfe_ts_format.empty()) { + if (!jfe.jfe_value.pp_value.empty() && jfe.jfe_value.pp_value != ts) + { + log_warning( + "%s:line-format[%d]:ignoring field '%s' since " + "timestamp-format was used", + this->elf_name.get(), + format_index, + jfe.jfe_value.pp_value.get()); + } + jfe.jfe_value.pp_value = ts; + } + + switch (jfe.jfe_type) { + case json_log_field::VARIABLE: { + auto vd_iter + = this->elf_value_defs.find(jfe.jfe_value.pp_value); + if (jfe.jfe_value.pp_value == ts) { + this->elf_value_defs[this->lf_timestamp_field] + ->vd_meta.lvm_hidden + = true; + } else if (jfe.jfe_value.pp_value == level_field) { + this->elf_value_defs[this->elf_level_field] + ->vd_meta.lvm_hidden + = true; + } else if (vd_iter == this->elf_value_defs.end()) { + errors.emplace_back( + lnav::console::user_message::error( + attr_line_t("invalid line format element ") + .append_quoted(lnav::roles::symbol(fmt::format( + FMT_STRING("/{}/line-format/{}/field"), + this->elf_name, + format_index)))) + .with_reason( + attr_line_t() + .append_quoted(jfe.jfe_value.pp_value) + .append(" is not a defined value")) + .with_snippet(jfe.jfe_value.to_snippet())); + } + break; + } + case json_log_field::CONSTANT: + this->jlf_line_format_init_count + += std::count(jfe.jfe_default_value.begin(), + jfe.jfe_default_value.end(), + '\n'); + break; + default: + break; + } + } + + for (auto& hd_pair : this->elf_highlighter_patterns) { + external_log_format::highlighter_def& hd = hd_pair.second; + auto fg = styling::color_unit::make_empty(); + auto bg = styling::color_unit::make_empty(); + text_attrs attrs; + + if (!hd.hd_color.pp_value.empty()) { + fg = styling::color_unit::from_str(hd.hd_color.pp_value) + .unwrapOrElse([&](const auto& msg) { + errors.emplace_back( + lnav::console::user_message::error( + attr_line_t() + .append_quoted(hd.hd_color.pp_value) + .append(" is not a valid color value for " + "property ") + .append_quoted(lnav::roles::symbol( + hd.hd_color.pp_path.to_string()))) + .with_reason(msg) + .with_snippet(hd.hd_color.to_snippet())); + return styling::color_unit::make_empty(); + }); + } + + if (!hd.hd_background_color.pp_value.empty()) { + bg = styling::color_unit::from_str(hd.hd_background_color.pp_value) + .unwrapOrElse([&](const auto& msg) { + errors.emplace_back( + lnav::console::user_message::error( + attr_line_t() + .append_quoted( + hd.hd_background_color.pp_value) + .append(" is not a valid color value for " + "property ") + .append_quoted(lnav::roles::symbol( + hd.hd_background_color.pp_path + .to_string()))) + .with_reason(msg) + .with_snippet( + hd.hd_background_color.to_snippet())); + return styling::color_unit::make_empty(); + }); + } + + if (hd.hd_underline) { + attrs.ta_attrs |= A_UNDERLINE; + } + if (hd.hd_blink) { + attrs.ta_attrs |= A_BLINK; + } + + if (hd.hd_pattern.pp_value != nullptr) { + this->lf_highlighters.emplace_back(hd.hd_pattern.pp_value); + this->lf_highlighters.back() + .with_name(hd_pair.first.to_string()) + .with_format_name(this->elf_name) + .with_color(fg, bg) + .with_attrs(attrs); + } + } +} + +void +external_log_format::register_vtabs( + log_vtab_manager* vtab_manager, + std::vector<lnav::console::user_message>& errors) +{ + for (auto& elf_search_table : this->elf_search_tables) { + if (elf_search_table.second.std_pattern.pp_value == nullptr) { + continue; + } + + auto lst = std::make_shared<log_search_table>( + elf_search_table.second.std_pattern.pp_value, + elf_search_table.first); + lst->lst_format = this; + lst->lst_log_path_glob = elf_search_table.second.std_glob; + if (elf_search_table.second.std_level != LEVEL_UNKNOWN) { + lst->lst_log_level = elf_search_table.second.std_level; + } + auto errmsg = vtab_manager->register_vtab(lst); + if (!errmsg.empty()) { +#if 0 + errors.push_back("error:" + this->elf_name.to_string() + ":" + + search_iter->first.to_string() + + ":unable to register table -- " + errmsg); +#endif + } + } +} + +bool +external_log_format::match_samples(const std::vector<sample>& samples) const +{ + for (const auto& sample_iter : samples) { + for (const auto& pat_iter : this->elf_pattern_order) { + auto& pat = *pat_iter; + + if (!pat.p_pcre.pp_value) { + continue; + } + + if (pat.p_pcre.pp_value->find_in(sample_iter.s_line.pp_value) + .ignore_error()) + { + return true; + } + } + } + + return false; +} + +class external_log_table : public log_format_vtab_impl { +public: + explicit external_log_table(const external_log_format& elf) + : log_format_vtab_impl(elf), elt_format(elf) + { + } + + void get_columns(std::vector<vtab_column>& cols) const override + { + const auto& elf = this->elt_format; + + cols.resize(elf.elf_column_count); + for (const auto& vd : elf.elf_value_def_order) { + auto type_pair = log_vtab_impl::logline_value_to_sqlite_type( + vd->vd_meta.lvm_kind); + + if (vd->vd_meta.lvm_column == -1) { + continue; + } + + require(0 <= vd->vd_meta.lvm_column + && vd->vd_meta.lvm_column < elf.elf_column_count); + + cols[vd->vd_meta.lvm_column].vc_name = vd->vd_meta.lvm_name.get(); + cols[vd->vd_meta.lvm_column].vc_type = type_pair.first; + cols[vd->vd_meta.lvm_column].vc_subtype = type_pair.second; + cols[vd->vd_meta.lvm_column].vc_collator = vd->vd_collate; + cols[vd->vd_meta.lvm_column].vc_comment = vd->vd_description; + } + } + + void get_foreign_keys(std::vector<std::string>& keys_inout) const override + { + log_vtab_impl::get_foreign_keys(keys_inout); + + for (const auto& elf_value_def : this->elt_format.elf_value_defs) { + if (elf_value_def.second->vd_foreign_key) { + keys_inout.emplace_back(elf_value_def.first.to_string()); + } + } + } + + bool next(log_cursor& lc, logfile_sub_source& lss) override + { + if (lc.is_eof()) { + return true; + } + + content_line_t cl(lss.at(lc.lc_curr_line)); + auto* lf = lss.find_file_ptr(cl); + auto lf_iter = lf->begin() + cl; + uint8_t mod_id = lf_iter->get_module_id(); + + if (lf_iter->is_continued()) { + return false; + } + + this->elt_module_format.mf_mod_format = nullptr; + if (lf->get_format_name() == this->lfvi_format.get_name()) { + return true; + } else if (mod_id && mod_id == this->lfvi_format.lf_mod_index) { + auto format = lf->get_format(); + + return lf->read_line(lf_iter) + .map([this, format, cl](auto line) { + logline_value_vector values; + struct line_range mod_name_range; + intern_string_t mod_name; + + this->vi_attrs.clear(); + values.lvv_sbr = line; + format->annotate(cl, this->vi_attrs, values, false); + this->elt_container_body + = find_string_attr_range(this->vi_attrs, &SA_BODY); + if (!this->elt_container_body.is_valid()) { + return false; + } + this->elt_container_body.ltrim(line.get_data()); + mod_name_range = find_string_attr_range(this->vi_attrs, + &logline::L_MODULE); + if (!mod_name_range.is_valid()) { + return false; + } + mod_name = intern_string::lookup( + &line.get_data()[mod_name_range.lr_start], + mod_name_range.length()); + this->vi_attrs.clear(); + this->elt_module_format + = external_log_format::MODULE_FORMATS[mod_name]; + if (!this->elt_module_format.mf_mod_format) { + return false; + } + return this->elt_module_format.mf_mod_format->get_name() + == this->lfvi_format.get_name(); + }) + .unwrapOr(false); + } + + return false; + } + + void extract(logfile* lf, + uint64_t line_number, + logline_value_vector& values) override + { + auto& line = values.lvv_sbr; + auto format = lf->get_format(); + + if (this->elt_module_format.mf_mod_format != nullptr) { + shared_buffer_ref body_ref; + + body_ref.subset(line, + this->elt_container_body.lr_start, + this->elt_container_body.length()); + this->vi_attrs.clear(); + auto narrow_res + = values.lvv_sbr.narrow(this->elt_container_body.lr_start, + this->elt_container_body.length()); + values.lvv_values.clear(); + this->elt_module_format.mf_mod_format->annotate( + line_number, this->vi_attrs, values, false); + values.lvv_sbr.widen(narrow_res); + } else { + this->vi_attrs.clear(); + format->annotate(line_number, this->vi_attrs, values, false); + } + } + + const external_log_format& elt_format; + module_format elt_module_format; + struct line_range elt_container_body; +}; + +std::shared_ptr<log_vtab_impl> +external_log_format::get_vtab_impl() const +{ + return std::make_shared<external_log_table>(*this); +} + +std::shared_ptr<log_format> +external_log_format::specialized(int fmt_lock) +{ + auto retval = std::make_shared<external_log_format>(*this); + + retval->lf_specialized = true; + this->lf_pattern_locks.clear(); + if (fmt_lock != -1) { + retval->lf_pattern_locks.emplace_back(0, fmt_lock); + } + + if (this->elf_type == elf_type_t::ELF_TYPE_JSON) { + this->jlf_parse_context + = std::make_shared<yajlpp_parse_context>(this->elf_name); + this->jlf_yajl_handle.reset( + yajl_alloc(&this->jlf_parse_context->ypc_callbacks, + nullptr, + this->jlf_parse_context.get()), + yajl_handle_deleter()); + yajl_config(this->jlf_yajl_handle.get(), yajl_dont_validate_strings, 1); + this->jlf_cached_line.reserve(16 * 1024); + } + + this->lf_value_stats.clear(); + this->lf_value_stats.resize(this->elf_numeric_value_defs.size()); + + return retval; +} + +bool +external_log_format::match_name(const std::string& filename) +{ + if (this->elf_filename_pcre.pp_value == nullptr) { + return true; + } + + return this->elf_filename_pcre.pp_value->find_in(filename) + .ignore_error() + .has_value(); +} + +bool +external_log_format::match_mime_type(const file_format_t ff) const +{ + if (ff == file_format_t::UNKNOWN && this->elf_mime_types.empty()) { + return true; + } + + return this->elf_mime_types.count(ff) == 1; +} + +long +external_log_format::value_line_count(const intern_string_t ist, + bool top_level, + const unsigned char* str, + ssize_t len) const +{ + const auto iter = this->elf_value_defs.find(ist); + long line_count + = (str != nullptr) ? std::count(&str[0], &str[len], '\n') + 1 : 1; + + if (iter == this->elf_value_defs.end()) { + return (this->jlf_hide_extra || !top_level) ? 0 : line_count; + } + + if (iter->second->vd_meta.lvm_hidden) { + return 0; + } + + if (std::find_if(this->jlf_line_format.begin(), + this->jlf_line_format.end(), + json_field_cmp(json_log_field::VARIABLE, ist)) + != this->jlf_line_format.end()) + { + return line_count - 1; + } + + return line_count; +} + +log_level_t +external_log_format::convert_level(string_fragment sf, + scan_batch_context* sbc) const +{ + log_level_t retval = LEVEL_INFO; + + if (sf.is_valid()) { + if (sbc != nullptr && sbc->sbc_cached_level_count > 0) { + auto cached_level_iter + = std::find(std::begin(sbc->sbc_cached_level_strings), + std::begin(sbc->sbc_cached_level_strings) + + sbc->sbc_cached_level_count, + sf); + if (cached_level_iter + != std::begin(sbc->sbc_cached_level_strings) + + sbc->sbc_cached_level_count) + { + auto cache_index + = std::distance(std::begin(sbc->sbc_cached_level_strings), + cached_level_iter); + if (cache_index != 0) { + std::swap(sbc->sbc_cached_level_strings[cache_index], + sbc->sbc_cached_level_strings[0]); + std::swap(sbc->sbc_cached_level_values[cache_index], + sbc->sbc_cached_level_values[0]); + } + return sbc->sbc_cached_level_values[0]; + } + } + + if (this->elf_level_patterns.empty()) { + retval = string2level(sf.data(), sf.length()); + } else { + for (const auto& elf_level_pattern : this->elf_level_patterns) { + if (elf_level_pattern.second.lp_pcre.pp_value + ->find_in(sf, PCRE2_NO_UTF_CHECK) + .ignore_error() + .has_value()) + { + retval = elf_level_pattern.first; + break; + } + } + } + + if (sbc != nullptr && sf.length() < 10) { + size_t cache_index; + + if (sbc->sbc_cached_level_count == 4) { + cache_index = sbc->sbc_cached_level_count - 1; + } else { + cache_index = sbc->sbc_cached_level_count; + sbc->sbc_cached_level_count += 1; + } + sbc->sbc_cached_level_strings[cache_index] = sf.to_string(); + sbc->sbc_cached_level_values[cache_index] = retval; + } + } + + return retval; +} + +logline_value_meta +external_log_format::get_value_meta(intern_string_t field_name, + value_kind_t kind) +{ + auto iter = this->elf_value_defs.find(field_name); + + if (iter == this->elf_value_defs.end()) { + auto retval = logline_value_meta(field_name, kind, -1, this); + + retval.lvm_hidden = this->jlf_hide_extra; + return retval; + } + + auto lvm = iter->second->vd_meta; + + lvm.lvm_kind = kind; + return lvm; +} + +void +external_log_format::json_append( + const external_log_format::json_format_element& jfe, + const char* value, + ssize_t len) +{ + if (len == -1) { + len = strlen(value); + } + if (jfe.jfe_align == json_format_element::align_t::RIGHT) { + if (len < jfe.jfe_min_width) { + this->json_append_to_cache(jfe.jfe_min_width - len); + } + } + this->json_append_to_cache(value, len); + if (jfe.jfe_align == json_format_element::align_t::LEFT) { + if (len < jfe.jfe_min_width) { + this->json_append_to_cache(jfe.jfe_min_width - len); + } + } +} + +intern_string_t +external_log_format::get_pattern_name(uint64_t line_number) const +{ + if (this->elf_type != elf_type_t::ELF_TYPE_TEXT) { + static auto structured = intern_string::lookup("structured"); + + return structured; + } + int pat_index = this->pattern_index_for_line(line_number); + return this->elf_pattern_order[pat_index]->p_name; +} + +int +log_format::pattern_index_for_line(uint64_t line_number) const +{ + auto iter = lower_bound(this->lf_pattern_locks.cbegin(), + this->lf_pattern_locks.cend(), + line_number, + [](const pattern_for_lines& pfl, uint32_t line) { + return pfl.pfl_line < line; + }); + + if (iter == this->lf_pattern_locks.end() || iter->pfl_line != line_number) { + --iter; + } + + return iter->pfl_pat_index; +} + +std::string +log_format::get_pattern_path(uint64_t line_number) const +{ + int pat_index = this->pattern_index_for_line(line_number); + return fmt::format(FMT_STRING("builtin ({})"), pat_index); +} + +intern_string_t +log_format::get_pattern_name(uint64_t line_number) const +{ + char pat_str[128]; + + int pat_index = this->pattern_index_for_line(line_number); + snprintf(pat_str, sizeof(pat_str), "builtin (%d)", pat_index); + return intern_string::lookup(pat_str); +} + +std::shared_ptr<log_format> +log_format::find_root_format(const char* name) +{ + auto& fmts = get_root_formats(); + for (auto& lf : fmts) { + if (lf->get_name() == name) { + return lf; + } + } + return nullptr; +} + +log_format::pattern_for_lines::pattern_for_lines(uint32_t pfl_line, + uint32_t pfl_pat_index) + : pfl_line(pfl_line), pfl_pat_index(pfl_pat_index) +{ +} + +void +logline_value_stats::merge(const logline_value_stats& other) +{ + if (other.lvs_count == 0) { + return; + } + + require(other.lvs_min_value <= other.lvs_max_value); + + if (other.lvs_min_value < this->lvs_min_value) { + this->lvs_min_value = other.lvs_min_value; + } + if (other.lvs_max_value > this->lvs_max_value) { + this->lvs_max_value = other.lvs_max_value; + } + this->lvs_count += other.lvs_count; + this->lvs_total += other.lvs_total; + + ensure(this->lvs_count >= 0); + ensure(this->lvs_min_value <= this->lvs_max_value); +} + +void +logline_value_stats::add_value(double value) +{ + if (value < this->lvs_min_value) { + this->lvs_min_value = value; + } + if (value > this->lvs_max_value) { + this->lvs_max_value = value; + } + this->lvs_count += 1; + this->lvs_total += value; +} + +std::vector<logline_value_meta> +external_log_format::get_value_metadata() const +{ + std::vector<logline_value_meta> retval; + + for (const auto& vd : this->elf_value_def_order) { + retval.emplace_back(vd->vd_meta); + } + + return retval; +} + +bool +format_tag_def::path_restriction::matches(const char* fn) const +{ + return fnmatch(this->p_glob.c_str(), fn, 0) == 0; +} + +/* XXX */ +#include "log_format_impls.cc" |