summaryrefslogtreecommitdiffstats
path: root/src/pcrepp/pcre2pp.hh
diff options
context:
space:
mode:
Diffstat (limited to 'src/pcrepp/pcre2pp.hh')
-rw-r--r--src/pcrepp/pcre2pp.hh367
1 files changed, 367 insertions, 0 deletions
diff --git a/src/pcrepp/pcre2pp.hh b/src/pcrepp/pcre2pp.hh
new file mode 100644
index 0000000..a40d26c
--- /dev/null
+++ b/src/pcrepp/pcre2pp.hh
@@ -0,0 +1,367 @@
+/**
+ * Copyright (c) 2022, Timothy Stack
+ *
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ * * Neither the name of Timothy Stack nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ''AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef lnav_pcre2pp_hh
+#define lnav_pcre2pp_hh
+
+#define PCRE2_CODE_UNIT_WIDTH 8
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include <pcre2.h>
+
+#include "base/auto_mem.hh"
+#include "base/intern_string.hh"
+#include "base/result.h"
+#include "mapbox/variant.hpp"
+
+namespace lnav {
+namespace pcre2pp {
+
+std::string quote(const char* unquoted);
+
+inline std::string
+quote(const std::string& unquoted)
+{
+ return quote(unquoted.c_str());
+}
+
+class code;
+struct capture_builder;
+class matcher;
+
+struct input {
+ string_fragment i_string;
+ int i_offset{0};
+ int i_next_offset{0};
+};
+
+class match_data {
+public:
+ static match_data unitialized() { return match_data{}; }
+
+ string_fragment leading() const
+ {
+ return this->md_input.i_string.sub_range(this->md_input.i_offset,
+ this->md_ovector[0]);
+ }
+
+ string_fragment remaining() const
+ {
+ if (this->md_capture_end == 0 || this->md_input.i_next_offset == -1) {
+ return string_fragment::invalid();
+ }
+
+ return string_fragment::from_byte_range(
+ this->md_input.i_string.sf_string,
+ this->md_input.i_string.sf_begin + this->md_input.i_next_offset,
+ this->md_input.i_string.sf_end);
+ }
+
+ nonstd::optional<string_fragment> operator[](size_t index) const
+ {
+ if (index >= this->md_capture_end) {
+ return nonstd::nullopt;
+ }
+
+ auto start = this->md_ovector[(index * 2)];
+ auto stop = this->md_ovector[(index * 2) + 1];
+ if (start == PCRE2_UNSET || stop == PCRE2_UNSET) {
+ return nonstd::nullopt;
+ }
+
+ return this->md_input.i_string.sub_range(start, stop);
+ }
+
+ template<typename T, std::size_t N>
+ nonstd::optional<string_fragment> operator[](const T (&name)[N]) const;
+
+ int get_count() const { return this->md_capture_end; }
+
+ uint32_t get_capacity() const { return this->md_ovector_count; }
+
+private:
+ friend matcher;
+ friend code;
+
+ match_data() = default;
+
+ explicit match_data(auto_mem<pcre2_match_data> dat)
+ : md_data(std::move(dat)),
+ md_ovector(pcre2_get_ovector_pointer(this->md_data.in())),
+ md_ovector_count(pcre2_get_ovector_count(this->md_data.in()))
+ {
+ }
+
+ auto_mem<pcre2_match_data> md_data;
+ const code* md_code{nullptr};
+ input md_input;
+ PCRE2_SIZE* md_ovector{nullptr};
+ uint32_t md_ovector_count{0};
+ int md_capture_end{0};
+};
+
+class matcher {
+public:
+ struct found {
+ string_fragment f_all;
+ string_fragment f_remaining;
+ };
+ struct not_found {};
+ struct error {
+ const code* e_code{nullptr};
+ int e_error_code{0};
+ std::string get_message();
+ };
+
+ class matches_result
+ : public mapbox::util::variant<found, not_found, error> {
+ public:
+ using variant::variant;
+
+ nonstd::optional<found> ignore_error()
+ {
+ return this->match(
+ [](found fo) { return nonstd::make_optional(fo); },
+ [](not_found) { return nonstd::nullopt; },
+ [](error err) {
+ handle_error(err);
+ return nonstd::nullopt;
+ });
+ }
+
+ private:
+ static void handle_error(error err);
+ };
+
+ matcher& reload_input(string_fragment sf, int next_offset)
+ {
+ this->mb_input = input{sf, next_offset, next_offset};
+
+ return *this;
+ }
+
+ matches_result matches(uint32_t options = 0);
+
+ int get_next_offset() const { return this->mb_input.i_next_offset; }
+
+private:
+ friend capture_builder;
+
+ matcher(const code& co, input& in, match_data& md)
+ : mb_code(co), mb_input(in), mb_match_data(md)
+ {
+ }
+
+ const code& mb_code;
+ input mb_input;
+ match_data& mb_match_data;
+};
+
+struct capture_builder {
+ const code& mb_code;
+ input mb_input;
+
+ capture_builder at(const string_fragment& remaining) &&
+ {
+ this->mb_input.i_offset = this->mb_input.i_next_offset
+ = remaining.sf_begin;
+ return *this;
+ }
+
+ matcher into(match_data& md) &&;
+
+ template<uint32_t Options = 0, typename F>
+ Result<string_fragment, matcher::error> for_each(F func) &&;
+};
+
+struct compile_error {
+ std::string ce_pattern;
+ int ce_code{0};
+ size_t ce_offset{0};
+
+ std::string get_message() const;
+};
+
+class code {
+public:
+ class named_capture {
+ public:
+ size_t get_index() const;
+ string_fragment get_name() const;
+
+ PCRE2_SPTR nc_entry;
+ };
+
+ class named_captures {
+ public:
+ struct iterator {
+ named_capture operator*() const;
+ iterator& operator++();
+ bool operator==(const iterator& other) const;
+ bool operator!=(const iterator& other) const;
+
+ uint32_t i_entry_size;
+ PCRE2_SPTR i_entry;
+ };
+
+ iterator begin() const;
+ iterator end() const;
+ bool empty() const { return this->nc_count == 0; }
+ size_t size() const { return this->nc_count; }
+
+ private:
+ friend code;
+
+ named_captures() = default;
+
+ uint32_t nc_count{0};
+ uint32_t nc_entry_size{0};
+ PCRE2_SPTR nc_name_table{nullptr};
+ };
+
+ static Result<code, compile_error> from(string_fragment sf,
+ int options = 0);
+
+ template<typename T, std::size_t N>
+ static code from_const(const T (&str)[N], int options = 0)
+ {
+ return from(string_fragment::from_const(str), options).unwrap();
+ }
+
+ const std::string& get_pattern() const { return this->p_pattern; }
+
+ named_captures get_named_captures() const;
+
+ const char* get_name_for_capture(size_t index) const;
+
+ size_t get_capture_count() const;
+
+ int name_index(const char* name) const;
+
+ std::vector<string_fragment> get_captures() const;
+
+ uint32_t get_match_data_capacity() const {
+ return this->p_match_proto.md_ovector_count;
+ }
+
+ match_data create_match_data() const;
+
+ capture_builder capture_from(string_fragment in) const
+ {
+ return capture_builder{
+ *this,
+ input{in},
+ };
+ }
+
+ matcher::matches_result find_in(string_fragment in,
+ uint32_t options = 0) const
+ {
+ static thread_local match_data md = this->create_match_data();
+
+ if (md.md_ovector_count < this->p_match_proto.md_ovector_count) {
+ md = this->create_match_data();
+ }
+
+ return this->capture_from(in).into(md).matches(options);
+ }
+
+ size_t match_partial(string_fragment in) const;
+
+ std::string replace(string_fragment str, const char* repl) const;
+
+ std::shared_ptr<code> to_shared() &&
+ {
+ return std::make_shared<code>(std::move(this->p_code),
+ std::move(this->p_pattern));
+ }
+
+ code(auto_mem<pcre2_code> code, std::string pattern)
+ : p_code(std::move(code)), p_pattern(std::move(pattern)),
+ p_match_proto(this->create_match_data())
+ {
+ }
+
+private:
+ friend matcher;
+ friend match_data;
+
+ auto_mem<pcre2_code> p_code;
+ std::string p_pattern;
+ match_data p_match_proto;
+};
+
+template<typename T, std::size_t N>
+nonstd::optional<string_fragment>
+match_data::operator[](const T (&name)[N]) const
+{
+ auto index = pcre2_substring_number_from_name(
+ this->md_code->p_code.in(),
+ reinterpret_cast<const unsigned char*>(name));
+
+ return this->operator[](index);
+}
+
+template<uint32_t Options, typename F>
+Result<string_fragment, matcher::error>
+capture_builder::for_each(F func) &&
+{
+ auto md = this->mb_code.create_match_data();
+ auto mat = matcher{this->mb_code, this->mb_input, md};
+
+ bool done = false;
+ matcher::error eret;
+
+ while (!done) {
+ auto match_res = mat.matches(Options);
+ done = match_res.match(
+ [mat, &func](matcher::found) {
+ func(mat.mb_match_data);
+ return false;
+ },
+ [](matcher::not_found) { return true; },
+ [&eret](matcher::error err) {
+ eret = err;
+ return true;
+ });
+ }
+
+ if (eret.e_error_code == 0) {
+ return Ok(md.remaining());
+ }
+ return Err(eret);
+}
+
+} // namespace pcre2pp
+} // namespace lnav
+
+#endif