diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-07 04:48:35 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-07 04:48:35 +0000 |
commit | 207df6fc406e81bfeebdff7f404bd242ff3f099f (patch) | |
tree | a1a796b056909dd0a04ffec163db9363a8757808 /src/log_format.cc | |
parent | Releasing progress-linux version 0.11.2-1~progress7.99u1. (diff) | |
download | lnav-207df6fc406e81bfeebdff7f404bd242ff3f099f.tar.xz lnav-207df6fc406e81bfeebdff7f404bd242ff3f099f.zip |
Merging upstream version 0.12.2.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
-rw-r--r-- | src/log_format.cc | 1355 |
1 files changed, 1206 insertions, 149 deletions
diff --git a/src/log_format.cc b/src/log_format.cc index dd18f60..0569d39 100644 --- a/src/log_format.cc +++ b/src/log_format.cc @@ -27,13 +27,17 @@ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +#include <algorithm> #include <memory> #include <fnmatch.h> #include <stdio.h> #include <string.h> +#include "base/fs_util.hh" #include "base/is_utf8.hh" +#include "base/map_util.hh" +#include "base/opt_util.hh" #include "base/snippet_highlighters.hh" #include "base/string_util.hh" #include "command_executor.hh" @@ -44,8 +48,11 @@ #include "log_search_table.hh" #include "log_vtab_impl.hh" #include "ptimec.hh" +#include "readline_highlighters.hh" #include "scn/scn.h" #include "sql_util.hh" +#include "sqlite-extension-func.hh" +#include "sqlitepp.hh" #include "yajlpp/yajlpp.hh" #include "yajlpp/yajlpp_def.hh" @@ -65,6 +72,199 @@ external_log_format::mod_map_t external_log_format::MODULE_FORMATS; std::vector<std::shared_ptr<external_log_format>> external_log_format::GRAPH_ORDERED_FORMATS; +static const uint32_t DATE_TIME_SET_FLAGS = ETF_YEAR_SET | ETF_MONTH_SET + | ETF_DAY_SET | ETF_HOUR_SET | ETF_MINUTE_SET | ETF_SECOND_SET; + +log_level_stats& +log_level_stats::operator|=(const log_level_stats& rhs) +{ + this->lls_error_count += rhs.lls_error_count; + this->lls_warning_count += rhs.lls_warning_count; + this->lls_total_count += rhs.lls_total_count; + + return *this; +} + +log_op_description& +log_op_description::operator|=(const log_op_description& rhs) +{ + if (!this->lod_id && rhs.lod_id) { + this->lod_id = rhs.lod_id; + } + if (this->lod_elements.size() < rhs.lod_elements.size()) { + this->lod_elements = rhs.lod_elements; + } + + return *this; +} + +opid_time_range& +opid_time_range::operator|=(const opid_time_range& rhs) +{ + this->otr_range |= rhs.otr_range; + this->otr_description |= rhs.otr_description; + this->otr_level_stats |= rhs.otr_level_stats; + for (const auto& rhs_sub : rhs.otr_sub_ops) { + bool found = false; + + for (auto& sub : this->otr_sub_ops) { + if (sub.ostr_subid == rhs_sub.ostr_subid) { + sub.ostr_range |= rhs_sub.ostr_range; + found = true; + } + } + if (!found) { + this->otr_sub_ops.emplace_back(rhs_sub); + } + } + std::stable_sort(this->otr_sub_ops.begin(), this->otr_sub_ops.end()); + + return *this; +} + +void +log_level_stats::update_msg_count(log_level_t lvl) +{ + switch (lvl) { + case LEVEL_FATAL: + case LEVEL_CRITICAL: + case LEVEL_ERROR: + this->lls_error_count += 1; + break; + case LEVEL_WARNING: + this->lls_warning_count += 1; + break; + default: + break; + } + this->lls_total_count += 1; +} + +void +opid_time_range::close_sub_ops(const string_fragment& subid) +{ + for (auto& other_sub : this->otr_sub_ops) { + if (other_sub.ostr_subid == subid) { + other_sub.ostr_open = false; + } + } +} + +opid_sub_time_range* +log_opid_state::sub_op_in_use(ArenaAlloc::Alloc<char>& alloc, + log_opid_map::iterator& op_iter, + const string_fragment& subid, + const timeval& tv, + log_level_t level) +{ + const auto& opid = op_iter->first; + auto sub_iter = this->los_sub_in_use.find(subid); + if (sub_iter == this->los_sub_in_use.end()) { + auto emp_res + = this->los_sub_in_use.emplace(subid.to_owned(alloc), opid); + + sub_iter = emp_res.first; + } + + auto retval = sub_iter->first; + if (sub_iter->second != opid) { + auto other_otr + = lnav::map::find(this->los_opid_ranges, sub_iter->second); + if (other_otr) { + other_otr->get().close_sub_ops(retval); + } + } + sub_iter->second = opid; + + auto& otr = op_iter->second; + auto sub_op_iter = otr.otr_sub_ops.rbegin(); + for (; sub_op_iter != otr.otr_sub_ops.rend(); ++sub_op_iter) { + if (sub_op_iter->ostr_open && sub_op_iter->ostr_subid == retval) { + break; + } + } + if (sub_op_iter == otr.otr_sub_ops.rend()) { + otr.otr_sub_ops.emplace_back(opid_sub_time_range{ + retval, + time_range{tv, tv}, + }); + otr.otr_sub_ops.back().ostr_level_stats.update_msg_count(level); + + return &otr.otr_sub_ops.back(); + } else { + sub_op_iter->ostr_range.extend_to(tv); + sub_op_iter->ostr_level_stats.update_msg_count(level); + return &(*sub_op_iter); + } +} + +nonstd::optional<std::string> +log_format::opid_descriptor::matches(const string_fragment& sf) const +{ + if (this->od_extractor.pp_value) { + static thread_local auto desc_md + = lnav::pcre2pp::match_data::unitialized(); + + auto desc_match_res = this->od_extractor.pp_value->capture_from(sf) + .into(desc_md) + .matches(PCRE2_NO_UTF_CHECK | PCRE2_ANCHORED) + .ignore_error(); + if (desc_match_res) { + return desc_md.to_string(); + } + + return nonstd::nullopt; + } + return sf.to_string(); +} + +std::string +log_format::opid_descriptors::to_string( + const lnav::map::small<size_t, std::string>& lod) const +{ + std::string retval; + + for (size_t lpc = 0; lpc < this->od_descriptors->size(); lpc++) { + retval.append(this->od_descriptors->at(lpc).od_prefix); + auto iter = lod.find(lpc); + if (iter != lod.end()) { + retval.append(iter->second); + } + retval.append(this->od_descriptors->at(lpc).od_suffix); + } + + return retval; +} + +chart_type_t +logline_value_meta::to_chart_type() const +{ + auto retval = chart_type_t::hist; + switch (this->lvm_kind) { + case value_kind_t::VALUE_NULL: + retval = chart_type_t::none; + break; + case value_kind_t::VALUE_INTEGER: + if (!this->lvm_identifier && !this->lvm_foreign_key) { + retval = chart_type_t::spectro; + } + break; + case value_kind_t::VALUE_FLOAT: + retval = chart_type_t::spectro; + break; + case value_kind_t::VALUE_XML: + case value_kind_t::VALUE_JSON: + case value_kind_t::VALUE_BOOLEAN: + case value_kind_t::VALUE_TIMESTAMP: + retval = chart_type_t::none; + break; + default: + break; + } + + return retval; +} + struct line_range logline_value::origin_in_full_msg(const char* msg, ssize_t len) const { @@ -254,6 +454,152 @@ log_format::get_root_formats() return lf_root_formats; } +void +external_log_format::update_op_description( + const std::map<intern_string_t, opid_descriptors>& desc_defs, + log_op_description& lod, + const pattern* fpat, + const lnav::pcre2pp::match_data& md) +{ + nonstd::optional<std::string> desc_elem_str; + if (!lod.lod_id) { + for (const auto& desc_def_pair : desc_defs) { + if (lod.lod_id) { + break; + } + for (const auto& desc_def : *desc_def_pair.second.od_descriptors) { + auto desc_field_index_iter = fpat->p_value_name_to_index.find( + desc_def.od_field.pp_value); + + if (desc_field_index_iter == fpat->p_value_name_to_index.end()) + { + continue; + } + auto desc_cap_opt = md[desc_field_index_iter->second]; + + if (!desc_cap_opt) { + continue; + } + + desc_elem_str = desc_def.matches(desc_cap_opt.value()); + if (desc_elem_str) { + lod.lod_id = desc_def_pair.first; + } + } + } + } + if (lod.lod_id) { + const auto& desc_def_v + = *desc_defs.find(lod.lod_id.value())->second.od_descriptors; + auto& desc_v = lod.lod_elements; + + if (desc_def_v.size() == desc_v.size()) { + return; + } + for (size_t desc_def_index = 0; desc_def_index < desc_def_v.size(); + desc_def_index++) + { + const auto& desc_def = desc_def_v[desc_def_index]; + auto found_desc = desc_v.begin(); + + for (; found_desc != desc_v.end(); ++found_desc) { + if (found_desc->first == desc_def_index) { + break; + } + } + auto desc_field_index_iter + = fpat->p_value_name_to_index.find(desc_def.od_field.pp_value); + + if (desc_field_index_iter == fpat->p_value_name_to_index.end()) { + continue; + } + auto desc_cap_opt = md[desc_field_index_iter->second]; + if (!desc_cap_opt) { + continue; + } + + if (!desc_elem_str) { + desc_elem_str = desc_def.matches(desc_cap_opt.value()); + } + if (desc_elem_str) { + if (found_desc == desc_v.end()) { + desc_v.emplace_back(desc_def_index, desc_elem_str.value()); + } else if (!desc_elem_str->empty()) { + found_desc->second.append(desc_def.od_joiner); + found_desc->second.append(desc_elem_str.value()); + } + } + desc_elem_str = nonstd::nullopt; + } + } +} + +void +external_log_format::update_op_description( + const std::map<intern_string_t, opid_descriptors>& desc_defs, + log_op_description& lod) +{ + nonstd::optional<std::string> desc_elem_str; + if (!lod.lod_id) { + for (const auto& desc_def_pair : desc_defs) { + if (lod.lod_id) { + break; + } + for (const auto& desc_def : *desc_def_pair.second.od_descriptors) { + auto desc_cap_iter + = this->lf_desc_captures.find(desc_def.od_field.pp_value); + + if (desc_cap_iter == this->lf_desc_captures.end()) { + continue; + } + desc_elem_str = desc_def.matches(desc_cap_iter->second); + if (desc_elem_str) { + lod.lod_id = desc_def_pair.first; + } + } + } + } + if (lod.lod_id) { + const auto& desc_def_v + = *desc_defs.find(lod.lod_id.value())->second.od_descriptors; + auto& desc_v = lod.lod_elements; + + if (desc_def_v.size() != desc_v.size()) { + for (size_t desc_def_index = 0; desc_def_index < desc_def_v.size(); + desc_def_index++) + { + const auto& desc_def = desc_def_v[desc_def_index]; + auto found_desc = desc_v.begin(); + + for (; found_desc != desc_v.end(); ++found_desc) { + if (found_desc->first == desc_def_index) { + break; + } + } + auto desc_cap_iter + = this->lf_desc_captures.find(desc_def.od_field.pp_value); + if (desc_cap_iter == this->lf_desc_captures.end()) { + continue; + } + + if (!desc_elem_str) { + desc_elem_str = desc_def.matches(desc_cap_iter->second); + } + if (desc_elem_str) { + if (found_desc == desc_v.end()) { + desc_v.emplace_back(desc_def_index, + desc_elem_str.value()); + } else if (!desc_elem_str->empty()) { + found_desc->second.append(desc_def.od_joiner); + found_desc->second.append(desc_elem_str.value()); + } + } + desc_elem_str = nonstd::nullopt; + } + } + } +} + static bool next_format( const std::vector<std::shared_ptr<external_log_format::pattern>>& patterns, @@ -277,7 +623,7 @@ next_format( } bool -log_format::next_format(pcre_format* fmt, int& index, int& locked_index) +log_format::next_format(const pcre_format* fmt, int& index, int& locked_index) { bool retval = true; @@ -298,7 +644,7 @@ log_format::next_format(pcre_format* fmt, int& index, int& locked_index) const char* log_format::log_scanf(uint32_t line_number, string_fragment line, - pcre_format* fmt, + const pcre_format* fmt, const char* time_fmt[], struct exttm* tm_out, struct timeval* tv_out, @@ -327,6 +673,33 @@ log_format::log_scanf(uint32_t line_number, retval = this->lf_date_time.scan( ts->data(), ts->length(), nullptr, tm_out, *tv_out); + if (retval == nullptr) { + auto ls = this->lf_date_time.unlock(); + retval = this->lf_date_time.scan( + ts->data(), ts->length(), nullptr, tm_out, *tv_out); + if (retval != nullptr) { + auto old_flags + = this->lf_timestamp_flags & DATE_TIME_SET_FLAGS; + auto new_flags = tm_out->et_flags & DATE_TIME_SET_FLAGS; + + // It is unlikely a valid timestamp would lose much + // precision. + if (new_flags != old_flags) { + retval = nullptr; + } + } + if (retval == nullptr) { + this->lf_date_time.relock(ls); + } else { + log_debug( + "%d: changed time format to '%s' due to %.*s", + line_number, + PTIMEC_FORMAT_STR[this->lf_date_time.dts_fmt_lock], + ts->length(), + ts->data()); + } + } + if (retval) { *ts_out = ts.value(); *level_out = md[2]; @@ -372,7 +745,7 @@ log_format::check_for_new_year(std::vector<logline>& dst, off_month = 1; } else if (!(etm.et_flags & ETF_DAY_SET) && (diff >= (60 * 60))) { off_day = 1; - } else if (!(etm.et_flags & ETF_DAY_SET)) { + } else if (!(etm.et_flags & ETF_HOUR_SET) && (diff >= 60)) { off_hour = 1; } else { do_change = false; @@ -448,6 +821,9 @@ struct json_log_userdata { uint32_t jlu_quality{0}; shared_buffer_ref& jlu_shared_buffer; scan_batch_context* jlu_batch_context; + nonstd::optional<string_fragment> jlu_opid_frag; + nonstd::optional<std::string> jlu_subid; + struct exttm jlu_exttm; }; static int read_json_field(yajlpp_parse_context* ypc, @@ -498,33 +874,44 @@ read_json_number(yajlpp_parse_context* ypc, tv.tv_sec = val / divisor; tv.tv_usec = fmod(val, divisor) * (1000000.0 / divisor); - if (jlu->jlu_format->lf_date_time.dts_local_time) { - struct tm ltm; - localtime_r(&tv.tv_sec, <m); -#ifdef HAVE_STRUCT_TM_TM_ZONE - ltm.tm_zone = nullptr; -#endif - ltm.tm_isdst = 0; - tv.tv_sec = tm2sec(<m); + jlu->jlu_format->lf_date_time.to_localtime(tv.tv_sec, jlu->jlu_exttm); + tv.tv_sec = tm2sec(&jlu->jlu_exttm.et_tm); + jlu->jlu_exttm.et_gmtoff + = jlu->jlu_format->lf_date_time.dts_local_offset_cache; + jlu->jlu_exttm.et_flags + |= ETF_MACHINE_ORIENTED | ETF_SUB_NOT_IN_FORMAT | ETF_ZONE_SET; + if (divisor == 1000) { + jlu->jlu_exttm.et_flags |= ETF_MILLIS_SET; + } else { + jlu->jlu_exttm.et_flags |= ETF_MICROS_SET; } + jlu->jlu_exttm.et_nsec = jlu->jlu_exttm.et_nsec = tv.tv_usec * 1000; jlu->jlu_base_line->set_time(tv); } else if (jlu->jlu_format->lf_subsecond_field == field_name) { uint64_t millis = 0; + jlu->jlu_exttm.et_flags &= ~(ETF_MICROS_SET | ETF_MILLIS_SET); switch (jlu->jlu_format->lf_subsecond_unit.value()) { case log_format::subsecond_unit::milli: millis = val; + jlu->jlu_exttm.et_nsec = val * 1000000; + jlu->jlu_exttm.et_flags |= ETF_MILLIS_SET; break; case log_format::subsecond_unit::micro: millis = std::chrono::duration_cast<std::chrono::milliseconds>( std::chrono::microseconds((int64_t) val)) .count(); + jlu->jlu_exttm.et_nsec = val * 1000; + jlu->jlu_exttm.et_flags |= ETF_MICROS_SET; break; case log_format::subsecond_unit::nano: millis = std::chrono::duration_cast<std::chrono::milliseconds>( std::chrono::nanoseconds((int64_t) val)) .count(); + jlu->jlu_exttm.et_nsec = val; + jlu->jlu_exttm.et_flags |= ETF_NANOS_SET; break; } + jlu->jlu_exttm.et_flags |= ETF_SUB_NOT_IN_FORMAT; jlu->jlu_base_line->set_millis(millis); } else if (jlu->jlu_format->elf_level_field == field_name) { if (jlu->jlu_format->elf_level_pairs.empty()) { @@ -636,6 +1023,42 @@ rewrite_json_int(yajlpp_parse_context* ypc, long long val) json_log_userdata* jlu = (json_log_userdata*) ypc->ypc_userdata; const intern_string_t field_name = ypc->get_path(); + if (jlu->jlu_format->lf_timestamp_field == field_name) { + long long divisor = jlu->jlu_format->elf_timestamp_divisor; + struct timeval tv; + + tv.tv_sec = val / divisor; + tv.tv_usec = fmod(val, divisor) * (1000000.0 / divisor); + jlu->jlu_format->lf_date_time.to_localtime(tv.tv_sec, jlu->jlu_exttm); + jlu->jlu_exttm.et_gmtoff + = jlu->jlu_format->lf_date_time.dts_local_offset_cache; + jlu->jlu_exttm.et_flags |= ETF_MACHINE_ORIENTED | ETF_SUB_NOT_IN_FORMAT + | ETF_ZONE_SET | ETF_Z_FOR_UTC; + if (divisor == 1) { + jlu->jlu_exttm.et_flags |= ETF_MICROS_SET; + } else { + jlu->jlu_exttm.et_flags |= ETF_MILLIS_SET; + } + jlu->jlu_exttm.et_nsec = jlu->jlu_exttm.et_nsec = tv.tv_usec * 1000; + } else if (jlu->jlu_format->lf_subsecond_field == field_name) { + jlu->jlu_exttm.et_flags &= ~(ETF_MICROS_SET | ETF_MILLIS_SET); + switch (jlu->jlu_format->lf_subsecond_unit.value()) { + case log_format::subsecond_unit::milli: + jlu->jlu_exttm.et_nsec = val * 1000000; + jlu->jlu_exttm.et_flags |= ETF_MILLIS_SET; + break; + case log_format::subsecond_unit::micro: + jlu->jlu_exttm.et_nsec = val * 1000; + jlu->jlu_exttm.et_flags |= ETF_MICROS_SET; + break; + case log_format::subsecond_unit::nano: + jlu->jlu_exttm.et_nsec = val; + jlu->jlu_exttm.et_flags |= ETF_NANOS_SET; + break; + } + jlu->jlu_exttm.et_flags |= ETF_SUB_NOT_IN_FORMAT; + } + if (!ypc->is_level(1) && !jlu->jlu_format->has_value_def(field_name)) { return 1; } @@ -652,6 +1075,42 @@ rewrite_json_double(yajlpp_parse_context* ypc, double val) json_log_userdata* jlu = (json_log_userdata*) ypc->ypc_userdata; const intern_string_t field_name = ypc->get_path(); + if (jlu->jlu_format->lf_timestamp_field == field_name) { + long long divisor = jlu->jlu_format->elf_timestamp_divisor; + struct timeval tv; + + tv.tv_sec = val / divisor; + tv.tv_usec = fmod(val, divisor) * (1000000.0 / divisor); + jlu->jlu_format->lf_date_time.to_localtime(tv.tv_sec, jlu->jlu_exttm); + jlu->jlu_exttm.et_gmtoff + = jlu->jlu_format->lf_date_time.dts_local_offset_cache; + jlu->jlu_exttm.et_flags |= ETF_MACHINE_ORIENTED | ETF_SUB_NOT_IN_FORMAT + | ETF_ZONE_SET | ETF_Z_FOR_UTC; + if (divisor == 1) { + jlu->jlu_exttm.et_flags |= ETF_MICROS_SET; + } else { + jlu->jlu_exttm.et_flags |= ETF_MILLIS_SET; + } + jlu->jlu_exttm.et_nsec = jlu->jlu_exttm.et_nsec = tv.tv_usec * 1000; + } else if (jlu->jlu_format->lf_subsecond_field == field_name) { + jlu->jlu_exttm.et_flags &= ~(ETF_MICROS_SET | ETF_MILLIS_SET); + switch (jlu->jlu_format->lf_subsecond_unit.value()) { + case log_format::subsecond_unit::milli: + jlu->jlu_exttm.et_nsec = val * 1000000; + jlu->jlu_exttm.et_flags |= ETF_MILLIS_SET; + break; + case log_format::subsecond_unit::micro: + jlu->jlu_exttm.et_nsec = val * 1000; + jlu->jlu_exttm.et_flags |= ETF_MICROS_SET; + break; + case log_format::subsecond_unit::nano: + jlu->jlu_exttm.et_nsec = val; + jlu->jlu_exttm.et_flags |= ETF_NANOS_SET; + break; + } + jlu->jlu_exttm.et_flags |= ETF_SUB_NOT_IN_FORMAT; + } + if (!ypc->is_level(1) && !jlu->jlu_format->has_value_def(field_name)) { return 1; } @@ -715,6 +1174,17 @@ external_log_format::scan(logfile& lf, shared_buffer_ref& sbr, scan_batch_context& sbc) { + if (dst.empty()) { + auto file_options = lf.get_file_options(); + + if (file_options) { + this->lf_date_time.dts_default_zone + = file_options->second.fo_default_zone.pp_value; + } else { + this->lf_date_time.dts_default_zone = nullptr; + } + } + if (this->elf_type == elf_type_t::ELF_TYPE_JSON) { logline ll(li.li_file_range.fr_offset, 0, 0, LEVEL_INFO); auto line_frag = sbr.to_string_fragment(); @@ -734,10 +1204,6 @@ external_log_format::scan(logfile& lf, yajl_handle handle = this->jlf_yajl_handle.get(); json_log_userdata jlu(sbr, &sbc); - if (!this->lf_specialized && dst.size() >= 3) { - return log_format::scan_no_match{"file is not JSON-lines"}; - } - if (li.li_partial) { log_debug("skipping partial line at offset %d", li.li_file_range.fr_offset); @@ -750,6 +1216,9 @@ external_log_format::scan(logfile& lf, const auto* line_data = (const unsigned char*) sbr.get_data(); + this->lf_desc_captures.clear(); + this->lf_desc_allocator.reset(); + yajl_reset(handle); ypc.set_static_handler(json_log_handlers.jpc_children[0]); ypc.ypc_userdata = &jlu; @@ -777,6 +1246,56 @@ external_log_format::scan(logfile& lf, "JSON message does not have expected timestamp property"}; } + if (jlu.jlu_opid_frag) { + this->jlf_line_values.lvv_opid_value + = jlu.jlu_opid_frag->to_string(); + auto opid_iter = sbc.sbc_opids.los_opid_ranges.find( + jlu.jlu_opid_frag.value()); + if (opid_iter == sbc.sbc_opids.los_opid_ranges.end()) { + auto otr = opid_time_range{ + time_range{ll.get_timeval(), ll.get_timeval()}, + }; + auto emplace_res = sbc.sbc_opids.los_opid_ranges.emplace( + jlu.jlu_opid_frag.value(), otr); + opid_iter = emplace_res.first; + } else { + opid_iter->second.otr_range.extend_to(ll.get_timeval()); + } + + opid_iter->second.otr_level_stats.update_msg_count( + ll.get_msg_level()); + + if (jlu.jlu_subid) { + auto subid_frag + = string_fragment::from_str(jlu.jlu_subid.value()); + + auto* ostr + = sbc.sbc_opids.sub_op_in_use(sbc.sbc_allocator, + opid_iter, + subid_frag, + ll.get_timeval(), + ll.get_msg_level()); + if (ostr != nullptr && ostr->ostr_description.empty()) { + log_op_description sub_desc; + this->update_op_description( + *this->lf_subid_description_def, sub_desc); + if (!sub_desc.lod_elements.empty()) { + auto& sub_desc_def + = this->lf_subid_description_def->at( + sub_desc.lod_id.value()); + ostr->ostr_description + = sub_desc_def.to_string(sub_desc.lod_elements); + } + } + } + + auto& otr = opid_iter->second; + this->update_op_description(*this->lf_opid_description_def, + otr.otr_description); + } else { + this->jlf_line_values.lvv_opid_value = nonstd::nullopt; + } + jlu.jlu_sub_line_count += this->jlf_line_format_init_count; for (int lpc = 0; lpc < jlu.jlu_sub_line_count; lpc++) { ll.set_sub_offset(lpc); @@ -852,6 +1371,7 @@ external_log_format::scan(logfile& lf, auto level_cap = md[fpat->p_level_field_index]; auto mod_cap = md[fpat->p_module_field_index]; auto opid_cap = md[fpat->p_opid_field_index]; + auto subid_cap = md[fpat->p_subid_field_index]; auto body_cap = md[fpat->p_body_field_index]; const char* last; struct exttm log_time_tm; @@ -870,14 +1390,20 @@ external_log_format::scan(logfile& lf, ts = string_fragment::from_bytes(combined_datetime_buf, ts_str_len); } - if ((last = this->lf_date_time.scan(ts->data(), - ts->length(), - this->get_timestamp_formats(), - &log_time_tm, - log_tv)) - == nullptr) + auto level = this->convert_level( + level_cap.value_or(string_fragment::invalid()), &sbc); + + if (!ts) { + level = log_level_t::LEVEL_INVALID; + } else if ((last + = this->lf_date_time.scan(ts->data(), + ts->length(), + this->get_timestamp_formats(), + &log_time_tm, + log_tv)) + == nullptr) { - this->lf_date_time.unlock(); + auto ls = this->lf_date_time.unlock(); if ((last = this->lf_date_time.scan(ts->data(), ts->length(), this->get_timestamp_formats(), @@ -885,15 +1411,43 @@ external_log_format::scan(logfile& lf, log_tv)) == nullptr) { + this->lf_date_time.relock(ls); continue; } - } + if (last != nullptr) { + auto old_flags = this->lf_timestamp_flags & DATE_TIME_SET_FLAGS; + auto new_flags = log_time_tm.et_flags & DATE_TIME_SET_FLAGS; - auto level = this->convert_level( - level_cap.value_or(string_fragment::invalid()), &sbc); + // It is unlikely a valid timestamp would lose much + // precision. + if (new_flags != old_flags) { + continue; + } + } + + log_debug("%s:%d: date-time re-locked to %d", + lf.get_unique_path().c_str(), + dst.size(), + this->lf_date_time.dts_fmt_lock); + } this->lf_timestamp_flags = log_time_tm.et_flags; + if (!(this->lf_timestamp_flags + & (ETF_MILLIS_SET | ETF_MICROS_SET | ETF_NANOS_SET)) + && !dst.empty() && dst.back().get_time() == log_tv.tv_sec + && dst.back().get_millis() != 0) + { + auto log_ms = std::chrono::milliseconds(dst.back().get_millis()); + + log_time_tm.et_nsec + = std::chrono::duration_cast<std::chrono::nanoseconds>(log_ms) + .count(); + log_tv.tv_usec + = std::chrono::duration_cast<std::chrono::microseconds>(log_ms) + .count(); + } + if (!((log_time_tm.et_flags & ETF_DAY_SET) && (log_time_tm.et_flags & ETF_MONTH_SET) && (log_time_tm.et_flags & ETF_YEAR_SET))) @@ -902,17 +1456,43 @@ external_log_format::scan(logfile& lf, } if (opid_cap && !opid_cap->empty()) { - { - auto opid_iter = sbc.sbc_opids.find(opid_cap.value()); + auto opid_iter + = sbc.sbc_opids.los_opid_ranges.find(opid_cap.value()); + + if (opid_iter == sbc.sbc_opids.los_opid_ranges.end()) { + auto opid_copy = opid_cap->to_owned(sbc.sbc_allocator); + auto otr = opid_time_range{time_range{log_tv, log_tv}}; + auto emplace_res + = sbc.sbc_opids.los_opid_ranges.emplace(opid_copy, otr); + opid_iter = emplace_res.first; + } else { + opid_iter->second.otr_range.extend_to(log_tv); + } - if (opid_iter == sbc.sbc_opids.end()) { - auto opid_copy = opid_cap->to_owned(sbc.sbc_allocator); - auto otr = opid_time_range{log_tv, log_tv}; - sbc.sbc_opids.emplace(opid_copy, otr); - } else { - opid_iter->second.otr_end = log_tv; + auto& otr = opid_iter->second; + + otr.otr_level_stats.update_msg_count(level); + + if (subid_cap && !subid_cap->empty()) { + auto* ostr = sbc.sbc_opids.sub_op_in_use(sbc.sbc_allocator, + opid_iter, + subid_cap.value(), + log_tv, + level); + if (ostr != nullptr && ostr->ostr_description.empty()) { + log_op_description sub_desc; + this->update_op_description( + *this->lf_subid_description_def, sub_desc, fpat, md); + if (!sub_desc.lod_elements.empty()) { + auto& sub_desc_def = this->lf_subid_description_def->at( + sub_desc.lod_id.value()); + ostr->ostr_description + = sub_desc_def.to_string(sub_desc.lod_elements); + } } } + this->update_op_description( + *this->lf_opid_description_def, otr.otr_description, fpat, md); opid = hash_str(opid_cap->data(), opid_cap->length()); } @@ -999,10 +1579,29 @@ external_log_format::scan(logfile& lf, } } - auto scan_res - = scn::scan_value<double>(num_cap->to_string_view()); - if (scan_res) { - auto dvalue = scan_res.value(); + nonstd::optional<double> dvalue_opt; + switch (vd.vd_meta.lvm_kind) { + case value_kind_t::VALUE_INTEGER: { + auto scan_res = scn::scan_value<int64_t>( + num_cap->to_string_view()); + if (scan_res) { + dvalue_opt = scan_res.value(); + } + break; + } + case value_kind_t::VALUE_FLOAT: { + auto scan_res = scn::scan_value<double>( + num_cap->to_string_view()); + if (scan_res) { + dvalue_opt = scan_res.value(); + } + break; + } + default: + break; + } + if (dvalue_opt) { + auto dvalue = dvalue_opt.value(); if (scaling != nullptr) { scaling->scale(dvalue); } @@ -1018,10 +1617,12 @@ external_log_format::scan(logfile& lf, if (orig_lock != curr_fmt) { uint32_t lock_line; - log_debug("%zu: changing pattern lock %d -> %d", + log_debug("%s:%zu: changing pattern lock %d -> (%d)%s", + lf.get_unique_path().c_str(), dst.size() - 1, orig_lock, - curr_fmt); + curr_fmt, + this->elf_pattern_order[curr_fmt]->p_name.c_str()); if (this->lf_pattern_locks.empty()) { lock_line = 0; } else { @@ -1112,7 +1713,7 @@ external_log_format::annotate(uint64_t line_number, values = this->jlf_line_values; sa = this->jlf_line_attrs; } else { - values.lvv_sbr = this->jlf_line_values.lvv_sbr; + values.lvv_sbr = this->jlf_line_values.lvv_sbr.clone(); for (const auto& llv : this->jlf_line_values.lvv_values) { if (this->jlf_cached_sub_range.contains(llv.lv_origin)) { values.lvv_values.emplace_back(llv); @@ -1124,6 +1725,9 @@ external_log_format::annotate(uint64_t line_number, for (const auto& attr : this->jlf_line_attrs) { if (this->jlf_cached_sub_range.contains(attr.sa_range)) { sa.emplace_back(attr); + sa.back().sa_range.shift( + this->jlf_cached_sub_range.lr_start, + -this->jlf_cached_sub_range.lr_start); } } } @@ -1177,9 +1781,10 @@ external_log_format::annotate(uint64_t line_number, } auto opid_cap = md[pat.p_opid_field_index]; - if (opid_cap) { + if (opid_cap && !opid_cap->empty()) { sa.emplace_back(to_line_range(opid_cap.value()), logline::L_OPID.value()); + values.lvv_opid_value = opid_cap->to_string(); } } @@ -1273,7 +1878,8 @@ external_log_format::rewrite(exec_context& ec, ++iter) { if (!iter->lv_origin.is_valid()) { - log_debug("not rewriting value with invalid origin -- %s", + log_debug("%d: not rewriting value with invalid origin -- %s", + ec.ec_top_line, iter->lv_meta.lvm_name.get()); continue; } @@ -1295,24 +1901,49 @@ external_log_format::rewrite(exec_context& ec, vd_iter->second->vd_rewrite_src_name, 1, vd.vd_rewriter); std::string field_value; - auto exec_res = execute_any(ec, vd.vd_rewriter); - if (exec_res.isOk()) { - field_value = exec_res.unwrap(); - } else { - field_value = exec_res.unwrapErr().to_attr_line().get_string(); + auto_mem<FILE> tmpout(fclose); + + tmpout = std::tmpfile(); + if (!tmpout) { + log_error("unable to create temporary file"); + return; + } + fcntl(fileno(tmpout), F_SETFD, FD_CLOEXEC); + auto fd_copy = auto_fd::dup_of(fileno(tmpout)); + fd_copy.close_on_exec(); + auto ec_out = std::make_pair(tmpout.release(), fclose); + { + exec_context::output_guard og(ec, "tmp", ec_out); + + auto exec_res = execute_any(ec, vd.vd_rewriter); + if (exec_res.isOk()) { + field_value = exec_res.unwrap(); + } else { + field_value = exec_res.unwrapErr().to_attr_line().get_string(); + } + } + struct stat st; + fstat(fd_copy.get(), &st); + if (st.st_size > 0) { + auto buf = auto_buffer::alloc(st.st_size); + + buf.resize(st.st_size); + pread(fd_copy.get(), buf.in(), st.st_size, 0); + field_value = buf.to_string(); } value_out.erase(iter->lv_origin.lr_start, iter->lv_origin.length()); int32_t shift_amount = ((int32_t) field_value.length()) - iter->lv_origin.length(); + auto orig_lr = iter->lv_origin; value_out.insert(iter->lv_origin.lr_start, field_value); for (shift_iter = values.lvv_values.begin(); shift_iter != values.lvv_values.end(); ++shift_iter) { - shift_iter->lv_origin.shift(iter->lv_origin.lr_start, shift_amount); + shift_iter->lv_origin.shift_range(orig_lr, shift_amount); } - shift_string_attrs(sa, iter->lv_origin.lr_start, shift_amount); + shift_string_attrs(sa, orig_lr, shift_amount); } } @@ -1321,37 +1952,78 @@ read_json_field(yajlpp_parse_context* ypc, const unsigned char* str, size_t len) { json_log_userdata* jlu = (json_log_userdata*) ypc->ypc_userdata; const intern_string_t field_name = ypc->get_path(); - struct exttm tm_out; struct timeval tv_out; + auto frag = string_fragment::from_bytes(str, len); if (jlu->jlu_format->lf_timestamp_field == field_name) { - jlu->jlu_format->lf_date_time.scan( + const auto* last = jlu->jlu_format->lf_date_time.scan( (const char*) str, len, jlu->jlu_format->get_timestamp_formats(), - &tm_out, + &jlu->jlu_exttm, tv_out); - // Leave off the machine oriented flag since we convert it anyhow - jlu->jlu_format->lf_timestamp_flags - = tm_out.et_flags & ~ETF_MACHINE_ORIENTED; - jlu->jlu_base_line->set_time(tv_out); + if (last == nullptr) { + auto ls = jlu->jlu_format->lf_date_time.unlock(); + if ((last = jlu->jlu_format->lf_date_time.scan( + (const char*) str, + len, + jlu->jlu_format->get_timestamp_formats(), + &jlu->jlu_exttm, + tv_out)) + == nullptr) + { + jlu->jlu_format->lf_date_time.relock(ls); + } + if (last != nullptr) { + auto old_flags + = jlu->jlu_format->lf_timestamp_flags & DATE_TIME_SET_FLAGS; + auto new_flags = jlu->jlu_exttm.et_flags & DATE_TIME_SET_FLAGS; + + // It is unlikely a valid timestamp would lose much + // precision. + if (new_flags != old_flags) { + last = nullptr; + } + } + } + if (last != nullptr) { + jlu->jlu_format->lf_timestamp_flags = jlu->jlu_exttm.et_flags; + jlu->jlu_base_line->set_time(tv_out); + } } else if (jlu->jlu_format->elf_level_pointer.pp_value != nullptr) { if (jlu->jlu_format->elf_level_pointer.pp_value ->find_in(field_name.to_string_fragment(), PCRE2_NO_UTF_CHECK) .ignore_error() .has_value()) { - jlu->jlu_base_line->set_level(jlu->jlu_format->convert_level( - string_fragment::from_bytes(str, len), jlu->jlu_batch_context)); + jlu->jlu_base_line->set_level( + jlu->jlu_format->convert_level(frag, jlu->jlu_batch_context)); } } if (jlu->jlu_format->elf_level_field == field_name) { - jlu->jlu_base_line->set_level(jlu->jlu_format->convert_level( - string_fragment::from_bytes(str, len), jlu->jlu_batch_context)); + jlu->jlu_base_line->set_level( + jlu->jlu_format->convert_level(frag, jlu->jlu_batch_context)); } if (jlu->jlu_format->elf_opid_field == field_name) { uint8_t opid = hash_str((const char*) str, len); jlu->jlu_base_line->set_opid(opid); + + auto& sbc = *jlu->jlu_batch_context; + auto opid_iter = sbc.sbc_opids.los_opid_ranges.find(frag); + if (opid_iter == sbc.sbc_opids.los_opid_ranges.end()) { + jlu->jlu_opid_frag = frag.to_owned(sbc.sbc_allocator); + } else { + jlu->jlu_opid_frag = opid_iter->first; + } + } + if (jlu->jlu_format->elf_subid_field == field_name) { + jlu->jlu_subid = frag.to_string(); + } + + if (jlu->jlu_format->lf_desc_fields.contains(field_name)) { + auto frag_copy = frag.to_owned(jlu->jlu_format->lf_desc_allocator); + + jlu->jlu_format->lf_desc_captures.emplace(field_name, frag_copy); } jlu->add_sub_lines_for( @@ -1369,25 +2041,55 @@ rewrite_json_field(yajlpp_parse_context* ypc, json_log_userdata* jlu = (json_log_userdata*) ypc->ypc_userdata; const intern_string_t field_name = ypc->get_path(); + if (jlu->jlu_format->elf_opid_field == field_name) { + auto frag = string_fragment::from_bytes(str, len); + jlu->jlu_format->jlf_line_values.lvv_opid_value = frag.to_string(); + } if (jlu->jlu_format->lf_timestamp_field == field_name) { char time_buf[64]; // TODO add a timeval kind to logline_value - if (jlu->jlu_line->is_time_skewed()) { + if (jlu->jlu_line->is_time_skewed() + || (jlu->jlu_format->lf_timestamp_flags + & (ETF_MICROS_SET | ETF_NANOS_SET | ETF_ZONE_SET))) + { struct timeval tv; - struct exttm tm; - jlu->jlu_format->lf_date_time.scan( + const auto* last = jlu->jlu_format->lf_date_time.scan( (const char*) str, len, jlu->jlu_format->get_timestamp_formats(), - &tm, + &jlu->jlu_exttm, tv); - sql_strftime(time_buf, sizeof(time_buf), tv, 'T'); + if (last == nullptr) { + auto ls = jlu->jlu_format->lf_date_time.unlock(); + if ((last = jlu->jlu_format->lf_date_time.scan( + (const char*) str, + len, + jlu->jlu_format->get_timestamp_formats(), + &jlu->jlu_exttm, + tv)) + == nullptr) + { + jlu->jlu_format->lf_date_time.relock(ls); + } + } + jlu->jlu_format->lf_date_time.ftime( + time_buf, + sizeof(time_buf), + jlu->jlu_format->get_timestamp_formats(), + jlu->jlu_exttm); } else { sql_strftime( time_buf, sizeof(time_buf), jlu->jlu_line->get_timeval(), 'T'); } + if (jlu->jlu_exttm.et_flags & ETF_ZONE_SET + && jlu->jlu_format->lf_date_time.dts_zoned_to_local) + { + jlu->jlu_exttm.et_flags &= ~ETF_Z_IS_UTC; + } + jlu->jlu_exttm.et_gmtoff + = jlu->jlu_format->lf_date_time.dts_local_offset_cache; jlu->jlu_format->jlf_line_values.lvv_values.emplace_back( jlu->jlu_format->get_value_meta(field_name, value_kind_t::VALUE_TEXT), @@ -1396,8 +2098,10 @@ rewrite_json_field(yajlpp_parse_context* ypc, auto str_offset = (int) ((const char*) str - jlu->jlu_line_value); if (field_name == jlu->jlu_format->elf_body_field) { jlu->jlu_format->jlf_line_values.lvv_values.emplace_back( - jlu->jlu_format->get_value_meta(body_name, - value_kind_t::VALUE_TEXT), + logline_value_meta(body_name, + value_kind_t::VALUE_TEXT, + logline_value_meta::internal_column{}, + jlu->jlu_format), string_fragment::from_byte_range( jlu->jlu_shared_buffer.get_data(), str_offset, @@ -1416,8 +2120,10 @@ rewrite_json_field(yajlpp_parse_context* ypc, } else { if (field_name == jlu->jlu_format->elf_body_field) { jlu->jlu_format->jlf_line_values.lvv_values.emplace_back( - jlu->jlu_format->get_value_meta(body_name, - value_kind_t::VALUE_TEXT), + logline_value_meta(body_name, + value_kind_t::VALUE_TEXT, + logline_value_meta::internal_column{}, + jlu->jlu_format), std::string{(const char*) str, len}); } if (!ypc->is_level(1) && !jlu->jlu_format->has_value_def(field_name)) { @@ -1466,7 +2172,7 @@ external_log_format::get_subline(const logline& ll, sbr.share(this->jlf_share_manager, &this->jlf_cached_line[0], this->jlf_cached_line.size()); - this->jlf_line_values.lvv_sbr = sbr; + this->jlf_line_values.lvv_sbr = sbr.clone(); this->jlf_line_attrs.emplace_back( line_range{0, -1}, SA_INVALID.value(fmt::format( @@ -1520,15 +2226,18 @@ external_log_format::get_subline(const logline& ll, struct line_range lr; memset(used_values, 0, sizeof(used_values)); - for (lv_iter = this->jlf_line_values.lvv_values.begin(); lv_iter != this->jlf_line_values.lvv_values.end(); ++lv_iter) { lv_iter->lv_meta.lvm_format = this; } + if (jlu.jlu_opid_frag) { + this->jlf_line_values.lvv_opid_value + = jlu.jlu_opid_frag->to_string(); + } - int sub_offset = 1 + this->jlf_line_format_init_count; + int sub_offset = this->jlf_line_format_init_count; for (const auto& jfe : this->jlf_line_format) { static const intern_string_t ts_field = intern_string::lookup("__timestamp__", -1); @@ -1549,6 +2258,14 @@ external_log_format::get_subline(const logline& ll, logline_value_cmp(&jfe.jfe_value.pp_value)); if (lv_iter != this->jlf_line_values.lvv_values.end()) { auto str = lv_iter->to_string(); + value_def* vd = nullptr; + + if (lv_iter->lv_meta.lvm_values_index) { + vd = this->elf_value_def_order + [lv_iter->lv_meta.lvm_values_index + .value()] + .get(); + } while (endswith(str, "\n")) { str.pop_back(); } @@ -1563,15 +2280,13 @@ external_log_format::get_subline(const logline& ll, switch (jfe.jfe_overflow) { case json_format_element::overflow_t:: ABBREV: { - this->json_append_to_cache(str.c_str(), - str.size()); - size_t new_size = abbreviate_str( - &this->jlf_cached_line[lr.lr_start], - str.size(), - jfe.jfe_max_width); - - this->jlf_cached_line.resize( - lr.lr_start + new_size); + size_t new_size + = abbreviate_str(&str[0], + str.size(), + jfe.jfe_max_width); + str.resize(new_size); + this->json_append( + jfe, vd, str.data(), str.size()); break; } case json_format_element::overflow_t:: @@ -1594,16 +2309,19 @@ external_log_format::get_subline(const logline& ll, rest); break; } + case json_format_element::overflow_t:: + LASTWORD: { + size_t new_size + = last_word_str(&str[0], + str.size(), + jfe.jfe_max_width); + str.resize(new_size); + this->json_append( + jfe, vd, str.data(), str.size()); + break; + } } } else { - value_def* vd = nullptr; - - if (lv_iter->lv_meta.lvm_values_index) { - vd = this->elf_value_def_order - [lv_iter->lv_meta.lvm_values_index - .value()] - .get(); - } sub_offset += std::count(str.begin(), str.end(), '\n'); this->json_append( @@ -1627,12 +2345,14 @@ external_log_format::get_subline(const logline& ll, this->jlf_line_attrs.emplace_back( lr, SA_BODY.value()); } else if (lv_iter->lv_meta.lvm_name - == this->elf_opid_field) + == this->elf_opid_field + && !lr.empty()) { this->jlf_line_attrs.emplace_back( lr, logline::L_OPID.value()); } lv_iter->lv_origin = lr; + lv_iter->lv_sub_offset = sub_offset; used_values[std::distance( this->jlf_line_values.lvv_values.begin(), lv_iter)] @@ -1645,14 +2365,22 @@ external_log_format::get_subline(const logline& ll, struct line_range lr; ssize_t ts_len; char ts[64]; + struct exttm et; + ll.to_exttm(et); + et.et_nsec += jlu.jlu_exttm.et_nsec % 1000000; + et.et_gmtoff = jlu.jlu_exttm.et_gmtoff; + et.et_flags |= jlu.jlu_exttm.et_flags; + if (!jfe.jfe_prefix.empty()) { + this->json_append_to_cache(jfe.jfe_prefix); + } if (jfe.jfe_ts_format.empty()) { - ts_len = sql_strftime( - ts, sizeof(ts), ll.get_timeval(), 'T'); + ts_len = this->lf_date_time.ftime( + ts, + sizeof(ts), + this->get_timestamp_formats(), + et); } else { - struct exttm et; - - ll.to_exttm(et); ts_len = ftime_fmt(ts, sizeof(ts), jfe.jfe_ts_format.c_str(), @@ -1676,6 +2404,9 @@ external_log_format::get_subline(const logline& ll, lv_iter)] = true; } + if (!jfe.jfe_suffix.empty()) { + this->json_append_to_cache(jfe.jfe_suffix); + } } else if (jfe.jfe_value.pp_value == level_field || jfe.jfe_value.pp_value == this->elf_level_field) @@ -1747,6 +2478,7 @@ external_log_format::get_subline(const logline& ll, } } this->json_append_to_cache("\n", 1); + sub_offset += 1; for (size_t lpc = 0; lpc < this->jlf_line_values.lvv_values.size(); lpc++) @@ -1786,6 +2518,13 @@ external_log_format::get_subline(const logline& ll, break; } } + lv.lv_origin.lr_end = this->jlf_cached_line.size() - 1; + if (lv.lv_meta.lvm_name == this->elf_opid_field + && !lv.lv_origin.empty()) + { + this->jlf_line_attrs.emplace_back(lv.lv_origin, + logline::L_OPID.value()); + } } } @@ -1833,7 +2572,148 @@ external_log_format::get_subline(const logline& ll, sbr.get_metadata().m_has_ansi = ll.has_ansi(); this->jlf_cached_sub_range.lr_start = this_off; this->jlf_cached_sub_range.lr_end = next_off; - this->jlf_line_values.lvv_sbr = sbr; + this->jlf_line_values.lvv_sbr = sbr.clone(); +} + +struct compiled_header_expr { + auto_mem<sqlite3_stmt> che_stmt{sqlite3_finalize}; + bool che_enabled{true}; +}; + +struct format_header_expressions : public lnav_config_listener { + format_header_expressions() : lnav_config_listener(__FILE__) {} + + auto_sqlite3 e_db; + std::map<intern_string_t, std::map<std::string, compiled_header_expr>> + e_header_exprs; +}; + +using safe_format_header_expressions = safe::Safe<format_header_expressions>; + +static safe_format_header_expressions format_header_exprs; + +nonstd::optional<external_file_format> +detect_mime_type(const ghc::filesystem::path& filename) +{ + uint8_t buffer[1024]; + size_t buffer_size = 0; + + { + auto_fd fd; + + if ((fd = lnav::filesystem::openp(filename, O_RDONLY)) == -1) { + return nonstd::nullopt; + } + + ssize_t rc; + + if ((rc = read(fd, buffer, sizeof(buffer))) == -1) { + return nonstd::nullopt; + } + buffer_size = rc; + } + + auto hexbuf = auto_buffer::alloc(buffer_size * 2); + + for (size_t lpc = 0; lpc < buffer_size; lpc++) { + fmt::format_to( + std::back_inserter(hexbuf), FMT_STRING("{:02x}"), buffer[lpc]); + } + + safe::WriteAccess<safe_format_header_expressions> in(format_header_exprs); + + for (const auto& format : log_format::get_root_formats()) { + auto elf = std::dynamic_pointer_cast<external_log_format>(format); + if (elf == nullptr) { + continue; + } + + if (elf->elf_converter.c_header.h_exprs.he_exprs.empty()) { + continue; + } + + if (buffer_size < elf->elf_converter.c_header.h_size) { + log_debug( + "%s: file content too small (%d) for header detection: %s", + filename.c_str(), + buffer_size, + elf->get_name().get()); + continue; + } + for (const auto& hpair : elf->elf_converter.c_header.h_exprs.he_exprs) { + auto& he = in->e_header_exprs[elf->get_name()][hpair.first]; + + if (!he.che_enabled) { + continue; + } + + auto* stmt = he.che_stmt.in(); + + if (stmt == nullptr) { + continue; + } + sqlite3_reset(stmt); + auto count = sqlite3_bind_parameter_count(stmt); + for (int lpc = 0; lpc < count; lpc++) { + const auto* name = sqlite3_bind_parameter_name(stmt, lpc + 1); + + if (name[0] == '$') { + const char* env_value; + + if ((env_value = getenv(&name[1])) != nullptr) { + sqlite3_bind_text( + stmt, lpc + 1, env_value, -1, SQLITE_STATIC); + } + continue; + } + if (strcmp(name, ":header") == 0) { + sqlite3_bind_text(stmt, + lpc + 1, + hexbuf.in(), + hexbuf.size(), + SQLITE_STATIC); + continue; + } + if (strcmp(name, ":filepath") == 0) { + sqlite3_bind_text( + stmt, lpc + 1, filename.c_str(), -1, SQLITE_STATIC); + continue; + } + } + + auto step_res = sqlite3_step(stmt); + + switch (step_res) { + case SQLITE_OK: + case SQLITE_DONE: + continue; + case SQLITE_ROW: + break; + default: { + log_error( + "failed to execute file-format header expression: " + "%s:%s -- %s", + elf->get_name().get(), + hpair.first.c_str(), + sqlite3_errmsg(in->e_db)); + he.che_enabled = false; + continue; + } + } + + log_info("detected format for: %s -- %s (header-expr: %s)", + filename.c_str(), + elf->get_name().get(), + hpair.first.c_str()); + return external_file_format{ + elf->get_name().to_string(), + elf->elf_converter.c_command.pp_value, + elf->elf_converter.c_command.pp_location.sl_source.to_string(), + }; + } + } + + return nonstd::nullopt; } void @@ -1843,10 +2723,14 @@ external_log_format::build(std::vector<lnav::console::user_message>& errors) auto& vd = this->elf_value_defs[this->lf_timestamp_field]; if (vd.get() == nullptr) { vd = std::make_shared<external_log_format::value_def>( - this->lf_timestamp_field, value_kind_t::VALUE_TEXT, -1, this); + this->lf_timestamp_field, + value_kind_t::VALUE_TEXT, + logline_value_meta::internal_column{}, + this); } vd->vd_meta.lvm_name = this->lf_timestamp_field; vd->vd_meta.lvm_kind = value_kind_t::VALUE_TEXT; + vd->vd_meta.lvm_column = logline_value_meta::internal_column{}; vd->vd_internal = true; } if (startswith(this->elf_level_field.get(), "/")) { @@ -1860,20 +2744,28 @@ external_log_format::build(std::vector<lnav::console::user_message>& errors) auto& vd = this->elf_value_defs[this->elf_level_field]; if (vd.get() == nullptr) { vd = std::make_shared<external_log_format::value_def>( - this->elf_level_field, value_kind_t::VALUE_TEXT, -1, this); + this->elf_level_field, + value_kind_t::VALUE_TEXT, + logline_value_meta::internal_column{}, + this); } vd->vd_meta.lvm_name = this->elf_level_field; vd->vd_meta.lvm_kind = value_kind_t::VALUE_TEXT; + vd->vd_meta.lvm_column = logline_value_meta::internal_column{}; vd->vd_internal = true; } if (!this->elf_body_field.empty()) { auto& vd = this->elf_value_defs[this->elf_body_field]; if (vd.get() == nullptr) { vd = std::make_shared<external_log_format::value_def>( - this->elf_body_field, value_kind_t::VALUE_TEXT, -1, this); + this->elf_body_field, + value_kind_t::VALUE_TEXT, + logline_value_meta::internal_column{}, + this); } vd->vd_meta.lvm_name = this->elf_body_field; vd->vd_meta.lvm_kind = value_kind_t::VALUE_TEXT; + vd->vd_meta.lvm_column = logline_value_meta::internal_column{}; vd->vd_internal = true; } @@ -1913,6 +2805,9 @@ external_log_format::build(std::vector<lnav::console::user_message>& errors) if (name == this->elf_opid_field) { pat.p_opid_field_index = named_cap.get_index(); } + if (name == this->elf_subid_field) { + pat.p_subid_field_index = named_cap.get_index(); + } if (name == this->elf_body_field) { pat.p_body_field_index = named_cap.get_index(); } @@ -1929,12 +2824,17 @@ external_log_format::build(std::vector<lnav::console::user_message>& errors) } else { ivd.ivd_unit_field_index = -1; } - if (!vd->vd_internal && vd->vd_meta.lvm_column == -1) { - vd->vd_meta.lvm_column = this->elf_column_count++; + if (!vd->vd_internal + && !vd->vd_meta.lvm_column + .is<logline_value_meta::table_column>()) + { + vd->vd_meta.lvm_column = logline_value_meta::table_column{ + this->elf_column_count++}; } ivd.ivd_value_def = vd; pat.p_value_by_index.push_back(ivd); } + pat.p_value_name_to_index[name] = named_cap.get_index(); } stable_sort(pat.p_value_by_index.begin(), pat.p_value_by_index.end()); @@ -1943,7 +2843,7 @@ external_log_format::build(std::vector<lnav::console::user_message>& errors) auto& ivd = pat.p_value_by_index[lpc]; auto vd = ivd.ivd_value_def; - if (!vd->vd_foreign_key && !vd->vd_meta.lvm_identifier) { + if (!vd->vd_meta.lvm_foreign_key && !vd->vd_meta.lvm_identifier) { switch (vd->vd_meta.lvm_kind) { case value_kind_t::VALUE_INTEGER: case value_kind_t::VALUE_FLOAT: @@ -1955,6 +2855,16 @@ external_log_format::build(std::vector<lnav::console::user_message>& errors) } } + if (!pat.p_module_format && pat.p_timestamp_field_index == -1) { + errors.emplace_back( + lnav::console::user_message::error( + attr_line_t("invalid pattern: ") + .append_quoted(lnav::roles::symbol(pat.p_config_path))) + .with_reason("no timestamp capture found in the pattern") + .with_snippets(this->get_snippets()) + .with_help("all log messages need a timestamp")); + } + if (!this->elf_level_field.empty() && pat.p_level_field_index == -1) { log_warning("%s:level field '%s' not found in pattern", pat.p_config_path.c_str(), @@ -1988,6 +2898,9 @@ external_log_format::build(std::vector<lnav::console::user_message>& errors) .with_snippets(this->get_snippets())); } if (this->elf_type == elf_type_t::ELF_TYPE_JSON) { + this->lf_multiline = true; + this->lf_structured = true; + this->lf_formatted_lines = true; this->jlf_parse_context = std::make_shared<yajlpp_parse_context>(this->elf_name); this->jlf_yajl_handle.reset( @@ -1998,7 +2911,6 @@ external_log_format::build(std::vector<lnav::console::user_message>& errors) yajl_config( this->jlf_yajl_handle.get(), yajl_dont_validate_strings, 1); } - } else { if (this->elf_patterns.empty()) { errors.emplace_back(lnav::console::user_message::error( @@ -2013,12 +2925,79 @@ external_log_format::build(std::vector<lnav::console::user_message>& errors) stable_sort(this->elf_level_pairs.begin(), this->elf_level_pairs.end()); + { + safe::WriteAccess<safe_format_header_expressions> hexprs( + format_header_exprs); + + if (hexprs->e_db.in() == nullptr) { + if (sqlite3_open(":memory:", hexprs->e_db.out()) != SQLITE_OK) { + log_error("unable to open memory DB"); + return; + } + register_sqlite_funcs(hexprs->e_db.in(), sqlite_registration_funcs); + } + + for (const auto& hpair : this->elf_converter.c_header.h_exprs.he_exprs) + { + auto stmt_str + = fmt::format(FMT_STRING("SELECT 1 WHERE {}"), hpair.second); + compiled_header_expr che; + + log_info("preparing file-format header expression: %s", + stmt_str.c_str()); + auto retcode = sqlite3_prepare_v2(hexprs->e_db.in(), + stmt_str.c_str(), + stmt_str.size(), + che.che_stmt.out(), + nullptr); + if (retcode != SQLITE_OK) { + auto sql_al = attr_line_t(hpair.second) + .with_attr_for_all(SA_PREFORMATTED.value()) + .with_attr_for_all( + VC_ROLE.value(role_t::VCR_QUOTED_CODE)); + readline_sqlite_highlighter(sql_al, -1); + intern_string_t watch_expr_path = intern_string::lookup( + fmt::format(FMT_STRING("/{}/converter/header/expr/{}"), + this->elf_name, + hpair.first)); + auto snippet = lnav::console::snippet::from( + source_location(watch_expr_path), sql_al); + + auto um = lnav::console::user_message::error( + "SQL expression is invalid") + .with_reason(sqlite3_errmsg(hexprs->e_db.in())) + .with_snippet(snippet); + + errors.emplace_back(um); + continue; + } + + hexprs->e_header_exprs[this->elf_name][hpair.first] + = std::move(che); + } + + if (!this->elf_converter.c_header.h_exprs.he_exprs.empty() + && this->elf_converter.c_command.pp_value.empty()) + { + auto um = lnav::console::user_message::error( + "A command is required when a converter is defined") + .with_help( + "The converter command transforms the file " + "into a format that can be consumed by lnav") + .with_snippets(this->get_snippets()); + errors.emplace_back(um); + } + } + for (auto& vd : this->elf_value_def_order) { std::vector<std::string>::iterator act_iter; vd->vd_meta.lvm_format = this; - if (!vd->vd_internal && vd->vd_meta.lvm_column == -1) { - vd->vd_meta.lvm_column = this->elf_column_count++; + if (!vd->vd_internal + && !vd->vd_meta.lvm_column.is<logline_value_meta::table_column>()) + { + vd->vd_meta.lvm_column + = logline_value_meta::table_column{this->elf_column_count++}; } if (vd->vd_meta.lvm_kind == value_kind_t::VALUE_UNKNOWN) { @@ -2108,6 +3087,44 @@ external_log_format::build(std::vector<lnav::console::user_message>& errors) } } + for (const auto& opid_desc_pair : *this->lf_opid_description_def) { + for (const auto& opid_desc : *opid_desc_pair.second.od_descriptors) { + auto iter = this->elf_value_defs.find(opid_desc.od_field.pp_value); + if (iter == this->elf_value_defs.end()) { + errors.emplace_back( + lnav::console::user_message::error( + attr_line_t("invalid opid description field ") + .append_quoted(lnav::roles::symbol( + opid_desc.od_field.pp_path.to_string()))) + .with_reason( + attr_line_t("unknown value name ") + .append_quoted(opid_desc.od_field.pp_value)) + .with_snippets(this->get_snippets())); + } else { + this->lf_desc_fields.insert(iter->first); + } + } + } + + for (const auto& subid_desc_pair : *this->lf_subid_description_def) { + for (const auto& subid_desc : *subid_desc_pair.second.od_descriptors) { + auto iter = this->elf_value_defs.find(subid_desc.od_field.pp_value); + if (iter == this->elf_value_defs.end()) { + errors.emplace_back( + lnav::console::user_message::error( + attr_line_t("invalid subid description field ") + .append_quoted(lnav::roles::symbol( + subid_desc.od_field.pp_path.to_string()))) + .with_reason( + attr_line_t("unknown value name ") + .append_quoted(subid_desc.od_field.pp_value)) + .with_snippets(this->get_snippets())); + } else { + this->lf_desc_fields.insert(iter->first); + } + } + } + if (this->elf_type == elf_type_t::ELF_TYPE_TEXT && this->elf_samples.empty()) { @@ -2218,14 +3235,46 @@ external_log_format::build(std::vector<lnav::console::user_message>& errors) if (ts_cap && ts_cap->sf_begin == 0) { pat.p_timestamp_end = ts_cap->sf_end; } - if (ts_cap - && dts.scan(ts_cap->data(), - ts_cap->length(), - custom_formats, - &tm, - tv) - == nullptr) - { + const char* dts_scan_res = nullptr; + + if (ts_cap) { + dts_scan_res = dts.scan( + ts_cap->data(), ts_cap->length(), custom_formats, &tm, tv); + } + if (dts_scan_res != nullptr) { + if (dts_scan_res != ts_cap->data() + ts_cap->length()) { + auto match_len = dts_scan_res - ts_cap->data(); + auto notes = attr_line_t("the used timestamp format: "); + if (custom_formats == nullptr) { + notes.append(PTIMEC_FORMATS[dts.dts_fmt_lock].pf_fmt); + } else { + notes.append(custom_formats[dts.dts_fmt_lock]); + } + notes.append("\n ") + .append(ts_cap.value()) + .append("\n") + .append(2 + match_len, ' ') + .append("^ matched up to here"_snippet_border); + auto um + = lnav::console::user_message::warning( + attr_line_t("timestamp was not fully matched: ") + .append_quoted(ts_cap.value())) + .with_snippet(elf_sample.s_line.to_snippet()) + .with_note(notes); + + errors.emplace_back(um); + } + } else if (!ts_cap) { + errors.emplace_back( + lnav::console::user_message::error( + attr_line_t("invalid sample log message: ") + .append(lnav::to_json(elf_sample.s_line.pp_value))) + .with_reason(attr_line_t("timestamp was not captured")) + .with_snippet(elf_sample.s_line.to_snippet()) + .with_help(attr_line_t( + "A timestamp needs to be captured in order for a " + "line to be recognized as a log message"))); + } else { attr_line_t notes; if (custom_formats == nullptr) { @@ -2557,7 +3606,7 @@ external_log_format::build(std::vector<lnav::console::user_message>& errors) elf_value_def->vd_meta.lvm_values_index = nonstd::make_optional(value_def_index++); - if (elf_value_def->vd_foreign_key + if (elf_value_def->vd_meta.lvm_foreign_key || elf_value_def->vd_meta.lvm_identifier) { continue; @@ -2784,18 +3833,21 @@ public: auto type_pair = log_vtab_impl::logline_value_to_sqlite_type( vd->vd_meta.lvm_kind); - if (vd->vd_meta.lvm_column == -1) { + if (!vd->vd_meta.lvm_column.is<logline_value_meta::table_column>()) + { continue; } - require(0 <= vd->vd_meta.lvm_column - && vd->vd_meta.lvm_column < elf.elf_column_count); + auto col + = vd->vd_meta.lvm_column.get<logline_value_meta::table_column>() + .value; + require(0 <= col && col < elf.elf_column_count); - cols[vd->vd_meta.lvm_column].vc_name = vd->vd_meta.lvm_name.get(); - cols[vd->vd_meta.lvm_column].vc_type = type_pair.first; - cols[vd->vd_meta.lvm_column].vc_subtype = type_pair.second; - cols[vd->vd_meta.lvm_column].vc_collator = vd->vd_collate; - cols[vd->vd_meta.lvm_column].vc_comment = vd->vd_description; + cols[col].vc_name = vd->vd_meta.lvm_name.get(); + cols[col].vc_type = type_pair.first; + cols[col].vc_subtype = type_pair.second; + cols[col].vc_collator = vd->vd_collate; + cols[col].vc_comment = vd->vd_description; } } @@ -2804,7 +3856,7 @@ public: log_vtab_impl::get_foreign_keys(keys_inout); for (const auto& elf_value_def : this->elt_format.elf_value_defs) { - if (elf_value_def.second->vd_foreign_key) { + if (elf_value_def.second->vd_meta.lvm_foreign_key) { keys_inout.emplace_back(elf_value_def.first.to_string()); } } @@ -2838,7 +3890,7 @@ public: intern_string_t mod_name; this->vi_attrs.clear(); - values.lvv_sbr = line; + values.lvv_sbr = line.clone(); format->annotate(cl, this->vi_attrs, values, false); this->elt_container_body = find_string_attr_range(this->vi_attrs, &SA_BODY); @@ -2949,16 +4001,6 @@ external_log_format::match_name(const std::string& filename) .has_value(); } -bool -external_log_format::match_mime_type(const file_format_t ff) const -{ - if (ff == file_format_t::UNKNOWN && this->elf_mime_types.empty()) { - return true; - } - - return this->elf_mime_types.count(ff) == 1; -} - auto external_log_format::value_line_count(const intern_string_t ist, bool top_level, @@ -2968,7 +4010,16 @@ external_log_format::value_line_count(const intern_string_t ist, { const auto iter = this->elf_value_defs.find(ist); value_line_count_result retval; - if (str != nullptr) { + + if (iter == this->elf_value_defs.end()) { + if (this->jlf_hide_extra || !top_level) { + retval.vlcr_count = 0; + } + + return retval; + } + + if (str != nullptr && !val) { auto frag = string_fragment::from_bytes(str, len); while (frag.endswith("\n")) { frag.pop_back(); @@ -2987,14 +4038,6 @@ external_log_format::value_line_count(const intern_string_t ist, } } - if (iter == this->elf_value_defs.end()) { - if (this->jlf_hide_extra || !top_level) { - retval.vlcr_count = 0; - } - - return retval; - } - if (iter->second->vd_meta.lvm_values_index) { auto& lvs = this->lf_value_stats[iter->second->vd_meta.lvm_values_index .value()]; @@ -3006,6 +4049,11 @@ external_log_format::value_line_count(const intern_string_t ist, } } + if (iter->second->vd_meta.is_hidden()) { + retval.vlcr_count = 0; + return retval; + } + if (std::find_if(this->jlf_line_format.begin(), this->jlf_line_format.end(), json_field_cmp(json_log_field::VARIABLE, ist)) @@ -3015,10 +4063,6 @@ external_log_format::value_line_count(const intern_string_t ist, retval.vlcr_count -= 1; } - if (iter->second->vd_meta.is_hidden()) { - retval.vlcr_count = 0; - } - return retval; } @@ -3091,7 +4135,8 @@ external_log_format::get_value_meta(intern_string_t field_name, auto iter = this->elf_value_defs.find(field_name); if (iter == this->elf_value_defs.end()) { - auto retval = logline_value_meta(field_name, kind, -1, this); + auto retval = logline_value_meta( + field_name, kind, logline_value_meta::external_column{}, this); retval.lvm_hidden = this->jlf_hide_extra; return retval; @@ -3161,6 +4206,10 @@ external_log_format::get_pattern_name(uint64_t line_number) const int log_format::pattern_index_for_line(uint64_t line_number) const { + if (this->lf_pattern_locks.empty()) { + return -1; + } + auto iter = lower_bound(this->lf_pattern_locks.cbegin(), this->lf_pattern_locks.cend(), line_number, @@ -3188,7 +4237,9 @@ log_format::get_pattern_name(uint64_t line_number) const char pat_str[128]; int pat_index = this->pattern_index_for_line(line_number); - snprintf(pat_str, sizeof(pat_str), "builtin (%d)", pat_index); + auto to_n_res = fmt::format_to_n( + pat_str, sizeof(pat_str) - 1, FMT_STRING("builtin ({})"), pat_index); + pat_str[to_n_res.size] = '\0'; return intern_string::lookup(pat_str); } @@ -3332,5 +4383,11 @@ format_tag_def::path_restriction::matches(const char* fn) const return fnmatch(this->p_glob.c_str(), fn, 0) == 0; } +bool +format_partition_def::path_restriction::matches(const char* fn) const +{ + return fnmatch(this->p_glob.c_str(), fn, 0) == 0; +} + /* XXX */ #include "log_format_impls.cc" |