summaryrefslogtreecommitdiffstats
path: root/src/grep_proc.hh
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/grep_proc.hh307
1 files changed, 307 insertions, 0 deletions
diff --git a/src/grep_proc.hh b/src/grep_proc.hh
new file mode 100644
index 0000000..58010e3
--- /dev/null
+++ b/src/grep_proc.hh
@@ -0,0 +1,307 @@
+/**
+ * Copyright (c) 2007-2012, Timothy Stack
+ *
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ * * Neither the name of Timothy Stack nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ''AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * @file grep_proc.hh
+ */
+
+#ifndef grep_proc_hh
+#define grep_proc_hh
+
+#include <deque>
+#include <exception>
+#include <string>
+#include <vector>
+
+#include <poll.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "base/auto_fd.hh"
+#include "base/auto_mem.hh"
+#include "base/lnav_log.hh"
+#include "line_buffer.hh"
+#include "pcrepp/pcre2pp.hh"
+#include "pollable.hh"
+#include "strong_int.hh"
+
+template<typename LineType>
+class grep_proc;
+
+/**
+ * Data source for lines to be searched using a grep_proc.
+ */
+template<typename LineType>
+class grep_proc_source {
+public:
+ virtual ~grep_proc_source() = default;
+
+ virtual void register_proc(grep_proc<LineType>* proc)
+ {
+ this->gps_proc = proc;
+ }
+
+ /**
+ * Get the value for a particular line in the source.
+ *
+ * @param line The line to retrieve.
+ * @param value_out The destination for the line value.
+ */
+ virtual bool grep_value_for_line(LineType line, std::string& value_out) = 0;
+
+ virtual LineType grep_initial_line(LineType start, LineType highest)
+ {
+ if (start == -1) {
+ return highest;
+ }
+ return start;
+ }
+
+ virtual void grep_next_line(LineType& line) { line = line + LineType(1); }
+
+ grep_proc<LineType>* gps_proc;
+};
+
+/**
+ * Delegate interface for control messages from the grep_proc.
+ */
+class grep_proc_control {
+public:
+ virtual ~grep_proc_control() = default;
+
+ /** @param msg The error encountered while attempting the grep. */
+ virtual void grep_error(const std::string& msg) {}
+};
+
+/**
+ * Sink for matches produced by a grep_proc instance.
+ */
+template<typename LineType>
+class grep_proc_sink {
+public:
+ virtual ~grep_proc_sink() = default;
+
+ virtual void grep_quiesce() {}
+
+ /** Called at the start of a new grep run. */
+ virtual void grep_begin(grep_proc<LineType>& gp,
+ LineType start,
+ LineType stop)
+ {
+ }
+
+ /** Called periodically between grep_begin and grep_end. */
+ virtual void grep_end_batch(grep_proc<LineType>& gp) {}
+
+ /** Called at the end of a grep run. */
+ virtual void grep_end(grep_proc<LineType>& gp) {}
+
+ /**
+ * Called when a match is found on 'line' and between [start, end).
+ *
+ * @param line The line number that matched.
+ * @param start The offset within the line where the match begins.
+ * @param end The offset of the character after the last character in the
+ * match.
+ */
+ virtual void grep_match(grep_proc<LineType>& gp,
+ LineType line,
+ int start,
+ int end)
+ = 0;
+
+ /**
+ * Called for each captured substring in the line.
+ *
+ * @param line The line number that matched.
+ * @param start The offset within the line where the capture begins.
+ * @param end The offset of the character after the last character in the
+ * capture.
+ * @param capture The captured substring itself.
+ */
+ virtual void grep_capture(grep_proc<LineType>& gp,
+ LineType line,
+ int start,
+ int end,
+ char* capture){};
+
+ virtual void grep_match_end(grep_proc<LineType>& gp, LineType line){};
+};
+
+/**
+ * "Grep" that runs in a separate process so it doesn't stall user-interaction.
+ * This class manages the child process and any interactions between the parent
+ * and child. The source data to be matched comes from the grep_proc_source
+ * delegate and the results are sent to the grep_proc_sink delegate in the
+ * parent process.
+ *
+ * Note: The "grep" executable is not actually used, instead we use the pcre(3)
+ * library directly.
+ */
+template<typename LineType>
+class grep_proc : public pollable {
+public:
+ class error : public std::exception {
+ public:
+ error(int err) : e_err(err){};
+
+ int e_err;
+ };
+
+ /**
+ * Construct a grep_proc object. You must call the start() method
+ * to fork off the child process and begin processing.
+ *
+ * @param code The pcre code to run over the lines of input.
+ * @param gps The source of the data to match.
+ */
+ grep_proc(std::shared_ptr<lnav::pcre2pp::code> code,
+ grep_proc_source<LineType>& gps,
+ std::shared_ptr<pollable_supervisor> ps);
+
+ grep_proc(std::shared_ptr<pollable_supervisor>);
+
+ using injectable = grep_proc(std::shared_ptr<pollable_supervisor>);
+
+ virtual ~grep_proc();
+
+ /** @param gpd The sink to send results to. */
+ void set_sink(grep_proc_sink<LineType>* gpd) { this->gp_sink = gpd; }
+
+ grep_proc& invalidate();
+
+ /** @param gpc The control to send results to. */
+ void set_control(grep_proc_control* gpc) { this->gp_control = gpc; }
+
+ /** @return The sink to send results to. */
+ grep_proc_sink<LineType>* get_sink() { return this->gp_sink; }
+
+ /**
+ * Queue a request to search the input between the given line numbers.
+ *
+ * @param start The line number to start the search at.
+ * @param stop The line number to stop the search at (exclusive) or -1 to
+ * read until the end-of-file.
+ */
+ grep_proc& queue_request(LineType start = LineType(0),
+ LineType stop = LineType(-1))
+ {
+ require(start != -1 || stop == -1);
+ require(stop == -1 || start < stop);
+
+ this->gp_queue.emplace_back(start, stop);
+ if (this->gp_sink) {
+ this->gp_sink->grep_begin(*this, start, stop);
+ }
+
+ return *this;
+ }
+
+ /**
+ * Start the search requests that have been queued up with queue_request.
+ */
+ void start();
+
+ void update_poll_set(std::vector<struct pollfd>& pollfds) override;
+
+ /**
+ * Check the fd_set to see if there is any new data to be processed.
+ *
+ * @param ready_rfds The set of ready-to-read file descriptors.
+ */
+ void check_poll_set(const std::vector<struct pollfd>& pollfds) override;
+
+ /** Check the invariants for this object. */
+ bool invariant()
+ {
+ if (this->gp_child_started) {
+ require(this->gp_child > 0);
+ require(this->gp_line_buffer.get_fd() != -1);
+ } else {
+ /* require(this->gp_child == -1); XXX doesnt work with static destr
+ */
+ require(this->gp_line_buffer.get_fd() == -1);
+ }
+
+ return true;
+ }
+
+protected:
+ /**
+ * Dispatch a line received from the child.
+ */
+ void dispatch_line(char* line);
+
+ /**
+ * Free any resources used by the object and make sure the child has been
+ * terminated.
+ */
+ void cleanup();
+
+ void child_loop();
+
+ virtual void child_init(){};
+
+ virtual void child_batch() { fflush(stdout); }
+
+ virtual void child_term() { fflush(stdout); }
+
+ virtual void handle_match(
+ int line, std::string& line_value, int off, int* matches, int count);
+
+ std::shared_ptr<lnav::pcre2pp::code> gp_pcre;
+ grep_proc_source<LineType>& gp_source; /*< The data source delegate. */
+
+ auto_fd gp_err_pipe; /*< Standard error from the child. */
+ line_buffer gp_line_buffer; /*< Standard out from the child. */
+ file_range gp_pipe_range;
+
+ pid_t gp_child{-1}; /*<
+ * The child's pid or zero in the
+ * child.
+ */
+ bool gp_child_started{false}; /*< True if the child was start()'d. */
+ size_t gp_child_queue_size{0};
+
+ /** The queue of search requests. */
+ std::deque<std::pair<LineType, LineType> > gp_queue;
+ LineType gp_last_line{0}; /*<
+ * The last line number received from
+ * the child. For multiple matches,
+ * the line number is only sent once.
+ */
+ LineType gp_highest_line; /*< The highest numbered line processed
+ * by the grep child process. This
+ * value is used when the start line
+ * for a queued request is -1.
+ */
+ grep_proc_sink<LineType>* gp_sink{nullptr}; /*< The sink delegate. */
+ grep_proc_control* gp_control{nullptr}; /*< The control delegate. */
+};
+
+#endif