summaryrefslogtreecommitdiffstats
path: root/src/logfile.hh
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/logfile.hh462
1 files changed, 462 insertions, 0 deletions
diff --git a/src/logfile.hh b/src/logfile.hh
new file mode 100644
index 0000000..61aa072
--- /dev/null
+++ b/src/logfile.hh
@@ -0,0 +1,462 @@
+
+/**
+ * Copyright (c) 2007-2012, Timothy Stack
+ *
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ * * Neither the name of Timothy Stack nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ''AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * @file logfile.hh
+ */
+
+#ifndef logfile_hh
+#define logfile_hh
+
+#include <set>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include <stdint.h>
+#include <stdio.h>
+#include <sys/resource.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include "ArenaAlloc/arenaalloc.h"
+#include "base/lnav_log.hh"
+#include "base/result.h"
+#include "bookmarks.hh"
+#include "byte_array.hh"
+#include "ghc/filesystem.hpp"
+#include "line_buffer.hh"
+#include "log_format_fwd.hh"
+#include "logfile_fwd.hh"
+#include "safe/safe.h"
+#include "shared_buffer.hh"
+#include "text_format.hh"
+#include "unique_path.hh"
+
+/**
+ * Observer interface for logfile indexing progress.
+ *
+ * @see logfile
+ */
+class logfile_observer {
+public:
+ virtual ~logfile_observer() = default;
+
+ enum class indexing_result {
+ CONTINUE,
+ BREAK,
+ };
+
+ /**
+ * @param lf The logfile object that is doing the indexing.
+ * @param off The current offset in the file being processed.
+ * @param total The total size of the file.
+ * @return false
+ */
+ virtual indexing_result logfile_indexing(const std::shared_ptr<logfile>& lf,
+ file_off_t off,
+ file_size_t total)
+ = 0;
+};
+
+struct logfile_activity {
+ int64_t la_polls{0};
+ int64_t la_reads{0};
+ struct rusage la_initial_index_rusage {};
+};
+
+/**
+ * Container for the lines in a log file and some metadata.
+ */
+class logfile
+ : public unique_path_source
+ , public std::enable_shared_from_this<logfile> {
+public:
+ using iterator = std::vector<logline>::iterator;
+ using const_iterator = std::vector<logline>::const_iterator;
+
+ struct metadata {
+ text_format_t m_format;
+ std::string m_value;
+ };
+
+ /**
+ * Construct a logfile with the given arguments.
+ *
+ * @param filename The name of the log file.
+ * @param fd The file descriptor for accessing the file or -1 if the
+ * constructor should open the file specified by 'filename'. The
+ * descriptor needs to be seekable.
+ */
+ static Result<std::shared_ptr<logfile>, std::string> open(
+ std::string filename, logfile_open_options& loo);
+
+ ~logfile() override;
+
+ const logfile_activity& get_activity() const { return this->lf_activity; }
+
+ nonstd::optional<ghc::filesystem::path> get_actual_path() const
+ {
+ return this->lf_actual_path;
+ }
+
+ /** @return The filename as given in the constructor. */
+ const std::string& get_filename() const { return this->lf_filename; }
+
+ /** @return The filename as given in the constructor, excluding the path
+ * prefix. */
+ const std::string& get_basename() const { return this->lf_basename; }
+
+ int get_fd() const { return this->lf_line_buffer.get_fd(); }
+
+ /** @param filename The new filename for this log file. */
+ void set_filename(const std::string& filename);
+
+ const std::string& get_content_id() const { return this->lf_content_id; }
+
+ /** @return The inode for this log file. */
+ const struct stat& get_stat() const { return this->lf_stat; }
+
+ size_t get_longest_line_length() const { return this->lf_longest_line; }
+
+ bool is_compressed() const { return this->lf_line_buffer.is_compressed(); }
+
+ bool is_valid_filename() const { return this->lf_valid_filename; }
+
+ file_off_t get_index_size() const { return this->lf_index_size; }
+
+ nonstd::optional<const_iterator> line_for_offset(file_off_t off) const;
+
+ /**
+ * @return The detected format, rebuild_index() must be called before this
+ * will return a value other than NULL.
+ */
+ std::shared_ptr<log_format> get_format() const { return this->lf_format; }
+
+ log_format* get_format_ptr() const { return this->lf_format.get(); }
+
+ intern_string_t get_format_name() const;
+
+ text_format_t get_text_format() const { return this->lf_text_format; }
+
+ /**
+ * @return The last modified time of the file when the file was last
+ * indexed.
+ */
+ time_t get_modified_time() const { return this->lf_index_time; }
+
+ int get_time_offset_line() const { return this->lf_time_offset_line; }
+
+ const struct timeval& get_time_offset() const
+ {
+ return this->lf_time_offset;
+ }
+
+ void adjust_content_time(int line,
+ const struct timeval& tv,
+ bool abs_offset = true);
+
+ void clear_time_offset()
+ {
+ struct timeval tv = {0, 0};
+
+ this->adjust_content_time(-1, tv);
+ }
+
+ void mark_as_duplicate(const std::string& name);
+
+ const logfile_open_options& get_open_options() const
+ {
+ return this->lf_options;
+ }
+
+ void reset_state();
+
+ bool is_time_adjusted() const
+ {
+ return (this->lf_time_offset.tv_sec != 0
+ || this->lf_time_offset.tv_usec != 0);
+ }
+
+ iterator begin() { return this->lf_index.begin(); }
+
+ const_iterator begin() const { return this->lf_index.begin(); }
+
+ const_iterator cbegin() const { return this->lf_index.begin(); }
+
+ iterator end() { return this->lf_index.end(); }
+
+ const_iterator end() const { return this->lf_index.end(); }
+
+ const_iterator cend() const { return this->lf_index.end(); }
+
+ /** @return The number of lines in the index. */
+ size_t size() const { return this->lf_index.size(); }
+
+ nonstd::optional<const_iterator> find_from_time(
+ const struct timeval& tv) const;
+
+ logline& operator[](int index) { return this->lf_index[index]; }
+
+ logline& front() { return this->lf_index.front(); }
+
+ logline& back() { return this->lf_index.back(); }
+
+ /** @return True if this log file still exists. */
+ bool exists() const;
+
+ void close() { this->lf_is_closed = true; }
+
+ bool is_closed() const { return this->lf_is_closed; }
+
+ struct timeval original_line_time(iterator ll);
+
+ Result<shared_buffer_ref, std::string> read_line(iterator ll);
+
+ Result<std::string, std::string> read_file();
+
+ iterator line_base(iterator ll)
+ {
+ auto retval = ll;
+
+ while (retval != this->begin() && retval->get_sub_offset() != 0) {
+ --retval;
+ }
+
+ return retval;
+ }
+
+ iterator message_start(iterator ll)
+ {
+ auto retval = ll;
+
+ while (retval != this->begin()
+ && (retval->get_sub_offset() != 0 || !retval->is_message()))
+ {
+ --retval;
+ }
+
+ return retval;
+ }
+
+ struct message_length_result {
+ file_ssize_t mlr_length;
+ file_range::metadata mlr_metadata;
+ };
+
+ message_length_result message_byte_length(const_iterator ll,
+ bool include_continues = true);
+
+ file_range get_file_range(const_iterator ll, bool include_continues = true)
+ {
+ auto mlr = this->message_byte_length(ll, include_continues);
+
+ return {
+ ll->get_offset(),
+ mlr.mlr_length,
+ mlr.mlr_metadata,
+ };
+ }
+
+ void read_full_message(const_iterator ll,
+ shared_buffer_ref& msg_out,
+ int max_lines = 50);
+
+ Result<shared_buffer_ref, std::string> read_raw_message(const_iterator ll);
+
+ enum class rebuild_result_t {
+ INVALID,
+ NO_NEW_LINES,
+ NEW_LINES,
+ NEW_ORDER,
+ };
+
+ /**
+ * Index any new data in the log file.
+ *
+ * @param lo The observer object that will be called regularly during
+ * indexing.
+ * @return True if any new lines were indexed.
+ */
+ rebuild_result_t rebuild_index(
+ nonstd::optional<ui_clock::time_point> deadline = nonstd::nullopt);
+
+ void reobserve_from(iterator iter);
+
+ void set_logfile_observer(logfile_observer* lo)
+ {
+ this->lf_logfile_observer = lo;
+ }
+
+ void set_logline_observer(logline_observer* llo);
+
+ logline_observer* get_logline_observer() const
+ {
+ return this->lf_logline_observer;
+ }
+
+ bool operator<(const logfile& rhs) const
+ {
+ bool retval;
+
+ if (this->lf_index.empty()) {
+ retval = true;
+ } else if (rhs.lf_index.empty()) {
+ retval = false;
+ } else {
+ retval = this->lf_index[0].get_time() < rhs.lf_index[0].get_time();
+ }
+
+ return retval;
+ }
+
+ bool is_indexing() const { return this->lf_indexing; }
+
+ /** Check the invariants for this object. */
+ bool invariant()
+ {
+ require(!this->lf_filename.empty());
+
+ return true;
+ }
+
+ ghc::filesystem::path get_path() const override;
+
+ enum class note_type {
+ indexing_disabled,
+ duplicate,
+ not_utf,
+ };
+
+ using note_map = std::map<note_type, std::string>;
+ using safe_notes = safe::Safe<note_map>;
+
+ note_map get_notes() const { return *this->lf_notes.readAccess(); }
+
+ using safe_opid_map = safe::Safe<log_opid_map>;
+
+ safe_opid_map& get_opids() { return this->lf_opids; }
+
+ void quiesce() { this->lf_line_buffer.quiesce(); }
+
+ void enable_cache() { this->lf_line_buffer.enable_cache(); }
+
+ void dump_stats();
+
+ robin_hood::unordered_map<uint32_t, bookmark_metadata>&
+ get_bookmark_metadata()
+ {
+ return this->lf_bookmark_metadata;
+ }
+
+ std::map<std::string, metadata>& get_embedded_metadata()
+ {
+ return this->lf_embedded_metadata;
+ }
+
+ const std::map<std::string, metadata>& get_embedded_metadata() const
+ {
+ return this->lf_embedded_metadata;
+ }
+
+protected:
+ /**
+ * Process a line from the file.
+ *
+ * @param offset The offset of the line in the file.
+ * @param prefix The contents of the line.
+ * @param len The length of the 'prefix' string.
+ */
+ bool process_prefix(shared_buffer_ref& sbr,
+ const line_info& li,
+ scan_batch_context& sbc);
+
+ void set_format_base_time(log_format* lf);
+
+private:
+ logfile(std::string filename, logfile_open_options& loo);
+
+ std::string lf_filename;
+ logfile_open_options lf_options;
+ logfile_activity lf_activity;
+ bool lf_named_file{true};
+ bool lf_valid_filename{true};
+ nonstd::optional<ghc::filesystem::path> lf_actual_path;
+ std::string lf_basename;
+ std::string lf_content_id;
+ struct stat lf_stat {};
+ std::shared_ptr<log_format> lf_format;
+ std::vector<logline> lf_index;
+ time_t lf_index_time{0};
+ file_off_t lf_index_size{0};
+ bool lf_sort_needed{false};
+ line_buffer lf_line_buffer;
+ int lf_time_offset_line{0};
+ struct timeval lf_time_offset {
+ 0, 0
+ };
+ bool lf_is_closed{false};
+ bool lf_indexing{true};
+ bool lf_partial_line{false};
+ logline_observer* lf_logline_observer{nullptr};
+ logfile_observer* lf_logfile_observer{nullptr};
+ size_t lf_longest_line{0};
+ text_format_t lf_text_format{text_format_t::TF_UNKNOWN};
+ uint32_t lf_out_of_time_order_count{0};
+ safe_notes lf_notes;
+ safe_opid_map lf_opids;
+ size_t lf_watch_count{0};
+ ArenaAlloc::Alloc<char> lf_allocator{64 * 1024};
+ nonstd::optional<time_t> lf_cached_base_time;
+ nonstd::optional<tm> lf_cached_base_tm;
+
+ nonstd::optional<std::pair<file_off_t, size_t>> lf_next_line_cache;
+ std::set<intern_string_t> lf_mismatched_formats;
+ robin_hood::unordered_map<uint32_t, bookmark_metadata> lf_bookmark_metadata;
+
+ std::vector<std::shared_ptr<format_tag_def>> lf_applicable_taggers;
+ std::map<std::string, metadata> lf_embedded_metadata;
+};
+
+class logline_observer {
+public:
+ virtual ~logline_observer() = default;
+
+ virtual void logline_restart(const logfile& lf, file_size_t rollback_size)
+ = 0;
+
+ virtual void logline_new_lines(const logfile& lf,
+ logfile::const_iterator ll_begin,
+ logfile::const_iterator ll_end,
+ shared_buffer_ref& sbr)
+ = 0;
+
+ virtual void logline_eof(const logfile& lf) = 0;
+};
+
+#endif