diff options
Diffstat (limited to '')
-rw-r--r-- | src/logfile.cc | 423 |
1 files changed, 320 insertions, 103 deletions
diff --git a/src/logfile.cc b/src/logfile.cc index 32aa196..0384535 100644 --- a/src/logfile.cc +++ b/src/logfile.cc @@ -35,7 +35,6 @@ #include <errno.h> #include <fcntl.h> -#include <stdio.h> #include <string.h> #include <sys/param.h> #include <sys/resource.h> @@ -43,49 +42,55 @@ #include <time.h> #include "base/ansi_scrubber.hh" +#include "base/date_time_scanner.cfg.hh" #include "base/fs_util.hh" #include "base/injector.hh" #include "base/string_util.hh" #include "config.h" +#include "file_options.hh" +#include "hasher.hh" #include "lnav_util.hh" #include "log.watch.hh" #include "log_format.hh" #include "logfile.cfg.hh" +#include "piper.looper.hh" #include "yajlpp/yajlpp_def.hh" static auto intern_lifetime = intern_string::get_table_lifetime(); static const size_t INDEX_RESERVE_INCREMENT = 1024; -static const typed_json_path_container<line_buffer::header_data> - file_header_handlers = { - yajlpp::property_handler("name").for_field( - &line_buffer::header_data::hd_name), +static const typed_json_path_container<lnav::gzip::header> file_header_handlers + = { + yajlpp::property_handler("name").for_field(&lnav::gzip::header::h_name), yajlpp::property_handler("mtime").for_field( - &line_buffer::header_data::hd_mtime), + &lnav::gzip::header::h_mtime), yajlpp::property_handler("comment").for_field( - &line_buffer::header_data::hd_comment), + &lnav::gzip::header::h_comment), }; Result<std::shared_ptr<logfile>, std::string> -logfile::open(std::string filename, logfile_open_options& loo) +logfile::open(ghc::filesystem::path filename, + const logfile_open_options& loo, + auto_fd fd) { require(!filename.empty()); auto lf = std::shared_ptr<logfile>(new logfile(std::move(filename), loo)); memset(&lf->lf_stat, 0, sizeof(lf->lf_stat)); - if (lf->lf_options.loo_fd == -1) { - char resolved_path[PATH_MAX]; + ghc::filesystem::path resolved_path; - errno = 0; - if (realpath(lf->lf_filename.c_str(), resolved_path) == nullptr) { + if (!fd.has_value()) { + auto rp_res = lnav::filesystem::realpath(lf->lf_filename); + if (rp_res.isErr()) { return Err(fmt::format(FMT_STRING("realpath({}) failed with: {}"), lf->lf_filename, - strerror(errno))); + rp_res.unwrapErr())); } - if (stat(resolved_path, &lf->lf_stat) == -1) { + resolved_path = rp_res.unwrap(); + if (lnav::filesystem::statp(resolved_path, &lf->lf_stat) == -1) { return Err(fmt::format(FMT_STRING("stat({}) failed with: {}"), lf->lf_filename, strerror(errno))); @@ -93,31 +98,35 @@ logfile::open(std::string filename, logfile_open_options& loo) if (!S_ISREG(lf->lf_stat.st_mode)) { return Err(fmt::format(FMT_STRING("{} is not a regular file"), - lf->lf_filename, - strerror(errno))); - } - - if ((lf->lf_options.loo_fd = ::open(resolved_path, O_RDONLY)) == -1) { - return Err(fmt::format(FMT_STRING("open({}) failed with: {}"), - lf->lf_filename, - strerror(errno))); + lf->lf_filename)); } + } - lf->lf_options.loo_fd.close_on_exec(); - - log_info("Creating logfile: fd=%d; size=%" PRId64 "; mtime=%" PRId64 - "; filename=%s", - (int) lf->lf_options.loo_fd, - (long long) lf->lf_stat.st_size, - (long long) lf->lf_stat.st_mtime, - lf->lf_filename.c_str()); - + auto_fd lf_fd; + if (fd.has_value()) { + lf_fd = std::move(fd); + } else if ((lf_fd + = lnav::filesystem::openp(resolved_path, O_RDONLY | O_CLOEXEC)) + == -1) + { + return Err(fmt::format(FMT_STRING("open({}) failed with: {}"), + lf->lf_filename, + strerror(errno))); + } else { lf->lf_actual_path = lf->lf_filename; lf->lf_valid_filename = true; - } else { - log_perror(fstat(lf->lf_options.loo_fd, &lf->lf_stat)); - lf->lf_named_file = false; - lf->lf_valid_filename = false; + } + + lf_fd.close_on_exec(); + + log_info("Creating logfile: fd=%d; size=%" PRId64 "; mtime=%" PRId64 + "; filename=%s", + (int) lf_fd, + (long long) lf->lf_stat.st_size, + (long long) lf->lf_stat.st_mtime, + lf->lf_filename.c_str()); + if (lf->lf_actual_path) { + log_info(" actual_path=%s", lf->lf_actual_path->c_str()); } if (!lf->lf_options.loo_filename.empty()) { @@ -126,29 +135,101 @@ logfile::open(std::string filename, logfile_open_options& loo) } lf->lf_content_id = hasher().update(lf->lf_filename).to_string(); - lf->lf_line_buffer.set_fd(lf->lf_options.loo_fd); + lf->lf_line_buffer.set_fd(lf_fd); lf->lf_index.reserve(INDEX_RESERVE_INCREMENT); lf->lf_indexing = lf->lf_options.loo_is_visible; + lf->lf_text_format + = lf->lf_options.loo_text_format.value_or(text_format_t::TF_UNKNOWN); const auto& hdr = lf->lf_line_buffer.get_header_data(); - if (!hdr.empty()) { - lf->lf_embedded_metadata["net.zlib.gzip.header"] - = {text_format_t::TF_JSON, file_header_handlers.to_string(hdr)}; + if (hdr.valid()) { + log_info("%s: has header %d", lf->lf_filename.c_str(), hdr.valid()); + hdr.match( + [&lf](const lnav::gzip::header& gzhdr) { + if (!gzhdr.empty()) { + lf->lf_embedded_metadata["net.zlib.gzip.header"] = { + text_format_t::TF_JSON, + file_header_handlers.to_string(gzhdr), + }; + } + }, + [&lf](const lnav::piper::header& phdr) { + lf->lf_embedded_metadata["org.lnav.piper.header"] = { + text_format_t::TF_JSON, + lnav::piper::header_handlers.to_string(phdr), + }; + log_debug("setting file name: %s", phdr.h_name.c_str()); + lf->set_filename(phdr.h_name); + lf->lf_valid_filename = false; + }); } + lf->file_options_have_changed(); + ensure(lf->invariant()); return Ok(lf); } -logfile::logfile(std::string filename, logfile_open_options& loo) - : lf_filename(std::move(filename)), lf_options(std::move(loo)) +logfile::logfile(ghc::filesystem::path filename, + const logfile_open_options& loo) + : lf_filename(std::move(filename)), lf_options(loo) { - this->lf_opids.writeAccess()->reserve(64); + this->lf_opids.writeAccess()->los_opid_ranges.reserve(64); } -logfile::~logfile() {} +logfile::~logfile() +{ + log_info("destructing logfile: %s", this->lf_filename.c_str()); +} + +bool +logfile::file_options_have_changed() +{ + static auto& safe_options_hier + = injector::get<lnav::safe_file_options_hier&>(); + + bool tz_changed = false; + + { + safe::ReadAccess<lnav::safe_file_options_hier> options_hier( + safe_options_hier); + + if (this->lf_file_options_generation == options_hier->foh_generation) { + return false; + } + auto new_options = options_hier->match(this->get_filename()); + if (this->lf_file_options == new_options) { + this->lf_file_options_generation = options_hier->foh_generation; + return false; + } + + this->lf_file_options = new_options; + log_info("%s: file options have changed", this->lf_filename.c_str()); + if (this->lf_file_options) { + log_info( + " tz=%s", + this->lf_file_options->second.fo_default_zone.pp_value->name() + .c_str()); + if (this->lf_file_options->second.fo_default_zone.pp_value + != nullptr + && this->lf_format != nullptr + && !(this->lf_format->lf_timestamp_flags & ETF_ZONE_SET)) + { + tz_changed = true; + } + } else if (this->lf_format != nullptr + && !(this->lf_format->lf_timestamp_flags & ETF_ZONE_SET) + && this->lf_format->lf_date_time.dts_default_zone != nullptr) + { + tz_changed = true; + } + this->lf_file_options_generation = options_hier->foh_generation; + } + + return tz_changed; +} bool logfile::exists() const @@ -238,6 +319,7 @@ logfile::process_prefix(shared_buffer_ref& sbr, this->lf_index.size(), li.li_file_range.fr_offset, li.li_file_range.fr_size); + auto starting_index_size = this->lf_index.size(); size_t prev_index_size = this->lf_index.size(); for (const auto& curr : root_formats) { if (this->lf_index.size() @@ -260,7 +342,10 @@ logfile::process_prefix(shared_buffer_ref& sbr, this->lf_mismatched_formats.insert(curr->get_name()); continue; } - if (!curr->match_mime_type(this->lf_options.loo_file_format)) { + if (this->lf_options.loo_format_name + && !(curr->get_name() + == this->lf_options.loo_format_name.value())) + { if (li.li_file_range.fr_offset == 0) { log_debug("(%s) does not match file format: %s", curr->get_name().get(), @@ -355,19 +440,46 @@ logfile::process_prefix(shared_buffer_ref& sbr, this->lf_applicable_taggers.emplace_back(td_pair.second); } + for (auto& pd_pair : this->lf_format->lf_partition_defs) { + bool matches = pd_pair.second->fpd_paths.empty(); + for (const auto& pr : pd_pair.second->fpd_paths) { + if (pr.matches(this->lf_filename.c_str())) { + matches = true; + break; + } + } + if (!matches) { + continue; + } + + log_info( + "%s: found applicable partition definition " + "/%s/partitions/%s", + this->lf_filename.c_str(), + this->lf_format->get_name().get(), + pd_pair.second->fpd_name.c_str()); + this->lf_applicable_partitioners.emplace_back(pd_pair.second); + } + /* * We'll go ahead and assume that any previous lines were * written out at the same time as the last one, so we need to * go back and update everything. */ - auto& last_line = this->lf_index[this->lf_index.size() - 1]; + const auto& last_line = this->lf_index.back(); - for (size_t lpc = 0; lpc < this->lf_index.size() - 1; lpc++) { + require_lt(starting_index_size, this->lf_index.size()); + for (size_t lpc = 0; lpc < starting_index_size; lpc++) { if (this->lf_format->lf_multiline) { this->lf_index[lpc].set_time(last_line.get_time()); this->lf_index[lpc].set_millis(last_line.get_millis()); + if (this->lf_format->lf_structured) { + this->lf_index[lpc].set_ignore(true); + } } else { - this->lf_index[lpc].set_ignore(true); + this->lf_index[lpc].set_time(last_line.get_time()); + this->lf_index[lpc].set_millis(last_line.get_millis()); + this->lf_index[lpc].set_level(LEVEL_INVALID); } } @@ -416,8 +528,15 @@ logfile::process_prefix(shared_buffer_ref& sbr, short last_millis = 0; uint8_t last_mod = 0, last_opid = 0; - if (!this->lf_index.empty()) { - logline& ll = this->lf_index.back(); + if (this->lf_format == nullptr && li.li_timestamp.tv_sec != 0) { + last_time = li.li_timestamp.tv_sec; + last_millis + = std::chrono::duration_cast<std::chrono::milliseconds>( + std::chrono::microseconds(li.li_timestamp.tv_usec)) + .count(); + last_level = li.li_level; + } else if (!this->lf_index.empty()) { + const auto& ll = this->lf_index.back(); /* * Assume this line is part of the previous one(s) and copy the @@ -448,6 +567,9 @@ logfile::process_prefix(shared_buffer_ref& sbr, logfile::rebuild_result_t logfile::rebuild_index(nonstd::optional<ui_clock::time_point> deadline) { + static const auto& dts_cfg + = injector::get<const date_time_scanner_ns::config&>(); + if (!this->lf_indexing) { if (this->lf_sort_needed) { this->lf_sort_needed = false; @@ -456,7 +578,11 @@ logfile::rebuild_index(nonstd::optional<ui_clock::time_point> deadline) return rebuild_result_t::NO_NEW_LINES; } - if (this->lf_format != nullptr && this->lf_format->format_changed()) { + if (this->file_options_have_changed() + || (this->lf_format != nullptr + && (this->lf_zoned_to_local_state != dts_cfg.c_zoned_to_local + || this->lf_format->format_changed()))) + { log_info("%s: format has changed, rebuilding", this->lf_filename.c_str()); this->lf_index.clear(); @@ -464,7 +590,16 @@ logfile::rebuild_index(nonstd::optional<ui_clock::time_point> deadline) this->lf_partial_line = false; this->lf_longest_line = 0; this->lf_sort_needed = true; + { + safe::WriteAccess<logfile::safe_opid_state> writable_opid_map( + this->lf_opids); + + writable_opid_map->los_opid_ranges.clear(); + writable_opid_map->los_sub_in_use.clear(); + } + this->lf_allocator.reset(); } + this->lf_zoned_to_local_state = dts_cfg.c_zoned_to_local; auto retval = rebuild_result_t::NO_NEW_LINES; struct stat st; @@ -497,6 +632,11 @@ logfile::rebuild_index(nonstd::optional<ui_clock::time_point> deadline) this->lf_stat.st_mtime); this->close(); return rebuild_result_t::NO_NEW_LINES; + } + + if (this->lf_text_format == text_format_t::TF_BINARY) { + this->lf_index_size = st.st_size; + this->lf_stat = st; } else if (this->lf_line_buffer.is_data_available(this->lf_index_size, st.st_size)) { @@ -589,7 +729,7 @@ logfile::rebuild_index(nonstd::optional<ui_clock::time_point> deadline) "loading file... %s:%d", this->lf_filename.c_str(), begin_size); } scan_batch_context sbc{this->lf_allocator}; - sbc.sbc_opids.reserve(32); + sbc.sbc_opids.los_opid_ranges.reserve(32); auto prev_range = file_range{off}; while (limit > 0) { auto load_result = this->lf_line_buffer.load_next_line(prev_range); @@ -646,19 +786,38 @@ logfile::rebuild_index(nonstd::optional<ui_clock::time_point> deadline) if (old_size == 0 && this->lf_text_format == text_format_t::TF_UNKNOWN) { - file_range fr = this->lf_line_buffer.get_available(); + auto fr = this->lf_line_buffer.get_available(); auto avail_data = this->lf_line_buffer.read_range(fr); this->lf_text_format = avail_data - .map([path = this->get_path()]( - const shared_buffer_ref& avail_sbr) + .map([path = this->get_path(), + this](const shared_buffer_ref& avail_sbr) -> text_format_t { - return detect_text_format( - avail_sbr.to_string_fragment(), path); + auto sbr_str = to_string(avail_sbr); + + if (this->lf_line_buffer.is_piper()) { + auto lines + = string_fragment::from_str(sbr_str) + .split_lines(); + for (auto line_iter = lines.rbegin(); + // XXX rejigger read_range() for + // multi-line reads + std::next(line_iter) != lines.rend(); + ++line_iter) + { + sbr_str.erase(line_iter->sf_begin, 22); + } + } + if (is_utf8(sbr_str).is_valid()) { + auto new_size = erase_ansi_escapes(sbr_str); + sbr_str.resize(new_size); + } + return detect_text_format(sbr_str, path); }) .unwrapOr(text_format_t::TF_UNKNOWN); - log_debug("setting text format to %d", this->lf_text_format); + log_debug("setting text format to %s", + fmt::to_string(this->lf_text_format).c_str()); } if (!li.li_utf8_scan_result.is_valid() && this->lf_text_format != text_format_t::TF_MARKDOWN @@ -687,7 +846,8 @@ logfile::rebuild_index(nonstd::optional<ui_clock::time_point> deadline) } this->lf_longest_line - = std::max(this->lf_longest_line, sbr.length()); + = std::max(this->lf_longest_line, + li.li_utf8_scan_result.usr_column_width_guess); this->lf_partial_line = li.li_partial; sort_needed = this->process_prefix(sbr, li, sbc) || sort_needed; @@ -732,33 +892,60 @@ logfile::rebuild_index(nonstd::optional<ui_clock::time_point> deadline) } #endif if (this->lf_format) { - if (!this->lf_applicable_taggers.empty()) { - auto sf = sbr.to_string_fragment(); + auto sf = sbr.to_string_fragment(); - for (const auto& td : this->lf_applicable_taggers) { - auto curr_ll = this->end() - 1; + for (const auto& td : this->lf_applicable_taggers) { + auto curr_ll = this->end() - 1; + + if (td->ftd_level != LEVEL_UNKNOWN + && td->ftd_level != curr_ll->get_msg_level()) + { + continue; + } - if (td->ftd_level != LEVEL_UNKNOWN - && td->ftd_level != curr_ll->get_msg_level()) - { - continue; + if (td->ftd_pattern.pp_value + ->find_in(sf, PCRE2_NO_UTF_CHECK) + .ignore_error() + .has_value()) + { + while (curr_ll->is_continued()) { + --curr_ll; } + curr_ll->set_meta_mark(true); + auto line_number = static_cast<uint32_t>( + std::distance(this->begin(), curr_ll)); + + this->lf_bookmark_metadata[line_number].add_tag( + td->ftd_name); + } + } + + for (const auto& pd : this->lf_applicable_partitioners) { + static thread_local auto part_md + = lnav::pcre2pp::match_data::unitialized(); + + auto curr_ll = this->end() - 1; - if (td->ftd_pattern.pp_value - ->find_in(sf, PCRE2_NO_UTF_CHECK) - .ignore_error() - .has_value()) - { - curr_ll->set_mark(true); - while (curr_ll->is_continued()) { - --curr_ll; - } - auto line_number = static_cast<uint32_t>( - std::distance(this->begin(), curr_ll)); - - this->lf_bookmark_metadata[line_number].add_tag( - td->ftd_name); + if (pd->fpd_level != LEVEL_UNKNOWN + && pd->fpd_level != curr_ll->get_msg_level()) + { + continue; + } + + auto match_res = pd->fpd_pattern.pp_value->capture_from(sf) + .into(part_md) + .matches(PCRE2_NO_UTF_CHECK) + .ignore_error(); + if (match_res) { + while (curr_ll->is_continued()) { + --curr_ll; } + curr_ll->set_meta_mark(true); + auto line_number = static_cast<uint32_t>( + std::distance(this->begin(), curr_ll)); + + this->lf_bookmark_metadata[line_number].bm_name + = part_md.to_string(); } } @@ -824,26 +1011,25 @@ logfile::rebuild_index(nonstd::optional<ui_clock::time_point> deadline) this->lf_stat = st; { - safe::WriteAccess<logfile::safe_opid_map> writable_opid_map( + safe::WriteAccess<logfile::safe_opid_state> writable_opid_map( this->lf_opids); - for (const auto& opid_pair : sbc.sbc_opids) { - auto opid_iter = writable_opid_map->find(opid_pair.first); + for (const auto& opid_pair : sbc.sbc_opids.los_opid_ranges) { + auto opid_iter + = writable_opid_map->los_opid_ranges.find(opid_pair.first); - if (opid_iter == writable_opid_map->end()) { - writable_opid_map->emplace(opid_pair); + if (opid_iter == writable_opid_map->los_opid_ranges.end()) { + writable_opid_map->los_opid_ranges.emplace(opid_pair); } else { - if (opid_pair.second.otr_begin - < opid_iter->second.otr_begin) - { - opid_iter->second.otr_begin - = opid_pair.second.otr_begin; - } - if (opid_iter->second.otr_end < opid_pair.second.otr_end) { - opid_iter->second.otr_end = opid_pair.second.otr_end; - } + opid_iter->second |= opid_pair.second; } } + log_debug( + "%s: opid_map size: count=%zu; sizeof(otr)=%zu; alloc=%zu", + this->lf_filename.c_str(), + writable_opid_map->los_opid_ranges.size(), + sizeof(opid_time_range), + this->lf_allocator.getNumBytesAllocated()); } if (sort_needed) { @@ -902,10 +1088,30 @@ logfile::read_file() return Err(std::string("file is too large to read")); } - auto retval - = TRY(this->lf_line_buffer.read_range({0, this->lf_stat.st_size})); + auto retval = std::string(); + retval.reserve(this->lf_stat.st_size); + + retval.append(this->lf_line_buffer.get_piper_header_size(), '\x16'); + for (auto iter = this->begin(); iter != this->end(); ++iter) { + auto fr = this->get_file_range(iter); + auto sbr = TRY(this->lf_line_buffer.read_range(fr)); + + if (this->lf_line_buffer.is_piper()) { + retval.append(22, '\x16'); + } + retval.append(sbr.get_data(), sbr.length()); + if (retval.size() < this->lf_stat.st_size) { + retval.push_back('\n'); + } + } + + return Ok(std::move(retval)); +} - return Ok(to_string(retval)); +Result<shared_buffer_ref, std::string> +logfile::read_range(file_range fr) +{ + return this->lf_line_buffer.read_range(fr); } void @@ -929,9 +1135,13 @@ logfile::read_full_message(logfile::const_iterator ll, auto read_result = this->lf_line_buffer.read_range(range_for_line); if (read_result.isErr()) { - log_error("unable to read range %d:%d", + auto errmsg = read_result.unwrapErr(); + log_error("%s:%d:unable to read range %d:%d -- %s", + this->get_unique_path().c_str(), + std::distance(this->cbegin(), ll), range_for_line.fr_offset, - range_for_line.fr_size); + range_for_line.fr_size, + errmsg.c_str()); return; } msg_out = read_result.unwrap(); @@ -1067,14 +1277,21 @@ logfile::find_from_time(const timeval& tv) const return retval; } -void +bool logfile::mark_as_duplicate(const std::string& name) { + safe::WriteAccess<safe_notes> notes(this->lf_notes); + + auto iter = notes->find(note_type::duplicate); + if (iter != notes->end()) { + return false; + } + this->lf_indexing = false; this->lf_options.loo_is_visible = false; - this->lf_notes.writeAccess()->emplace( - note_type::duplicate, - fmt::format(FMT_STRING("hiding duplicate of {}"), name)); + notes->emplace(note_type::duplicate, + fmt::format(FMT_STRING("hiding duplicate of {}"), name)); + return true; } void |