diff options
Diffstat (limited to '')
-rw-r--r-- | src/pcrepp/pcre2pp.hh | 367 |
1 files changed, 367 insertions, 0 deletions
diff --git a/src/pcrepp/pcre2pp.hh b/src/pcrepp/pcre2pp.hh new file mode 100644 index 0000000..a40d26c --- /dev/null +++ b/src/pcrepp/pcre2pp.hh @@ -0,0 +1,367 @@ +/** + * Copyright (c) 2022, Timothy Stack + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * * Neither the name of Timothy Stack nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef lnav_pcre2pp_hh +#define lnav_pcre2pp_hh + +#define PCRE2_CODE_UNIT_WIDTH 8 + +#include <memory> +#include <string> +#include <vector> + +#include <pcre2.h> + +#include "base/auto_mem.hh" +#include "base/intern_string.hh" +#include "base/result.h" +#include "mapbox/variant.hpp" + +namespace lnav { +namespace pcre2pp { + +std::string quote(const char* unquoted); + +inline std::string +quote(const std::string& unquoted) +{ + return quote(unquoted.c_str()); +} + +class code; +struct capture_builder; +class matcher; + +struct input { + string_fragment i_string; + int i_offset{0}; + int i_next_offset{0}; +}; + +class match_data { +public: + static match_data unitialized() { return match_data{}; } + + string_fragment leading() const + { + return this->md_input.i_string.sub_range(this->md_input.i_offset, + this->md_ovector[0]); + } + + string_fragment remaining() const + { + if (this->md_capture_end == 0 || this->md_input.i_next_offset == -1) { + return string_fragment::invalid(); + } + + return string_fragment::from_byte_range( + this->md_input.i_string.sf_string, + this->md_input.i_string.sf_begin + this->md_input.i_next_offset, + this->md_input.i_string.sf_end); + } + + nonstd::optional<string_fragment> operator[](size_t index) const + { + if (index >= this->md_capture_end) { + return nonstd::nullopt; + } + + auto start = this->md_ovector[(index * 2)]; + auto stop = this->md_ovector[(index * 2) + 1]; + if (start == PCRE2_UNSET || stop == PCRE2_UNSET) { + return nonstd::nullopt; + } + + return this->md_input.i_string.sub_range(start, stop); + } + + template<typename T, std::size_t N> + nonstd::optional<string_fragment> operator[](const T (&name)[N]) const; + + int get_count() const { return this->md_capture_end; } + + uint32_t get_capacity() const { return this->md_ovector_count; } + +private: + friend matcher; + friend code; + + match_data() = default; + + explicit match_data(auto_mem<pcre2_match_data> dat) + : md_data(std::move(dat)), + md_ovector(pcre2_get_ovector_pointer(this->md_data.in())), + md_ovector_count(pcre2_get_ovector_count(this->md_data.in())) + { + } + + auto_mem<pcre2_match_data> md_data; + const code* md_code{nullptr}; + input md_input; + PCRE2_SIZE* md_ovector{nullptr}; + uint32_t md_ovector_count{0}; + int md_capture_end{0}; +}; + +class matcher { +public: + struct found { + string_fragment f_all; + string_fragment f_remaining; + }; + struct not_found {}; + struct error { + const code* e_code{nullptr}; + int e_error_code{0}; + std::string get_message(); + }; + + class matches_result + : public mapbox::util::variant<found, not_found, error> { + public: + using variant::variant; + + nonstd::optional<found> ignore_error() + { + return this->match( + [](found fo) { return nonstd::make_optional(fo); }, + [](not_found) { return nonstd::nullopt; }, + [](error err) { + handle_error(err); + return nonstd::nullopt; + }); + } + + private: + static void handle_error(error err); + }; + + matcher& reload_input(string_fragment sf, int next_offset) + { + this->mb_input = input{sf, next_offset, next_offset}; + + return *this; + } + + matches_result matches(uint32_t options = 0); + + int get_next_offset() const { return this->mb_input.i_next_offset; } + +private: + friend capture_builder; + + matcher(const code& co, input& in, match_data& md) + : mb_code(co), mb_input(in), mb_match_data(md) + { + } + + const code& mb_code; + input mb_input; + match_data& mb_match_data; +}; + +struct capture_builder { + const code& mb_code; + input mb_input; + + capture_builder at(const string_fragment& remaining) && + { + this->mb_input.i_offset = this->mb_input.i_next_offset + = remaining.sf_begin; + return *this; + } + + matcher into(match_data& md) &&; + + template<uint32_t Options = 0, typename F> + Result<string_fragment, matcher::error> for_each(F func) &&; +}; + +struct compile_error { + std::string ce_pattern; + int ce_code{0}; + size_t ce_offset{0}; + + std::string get_message() const; +}; + +class code { +public: + class named_capture { + public: + size_t get_index() const; + string_fragment get_name() const; + + PCRE2_SPTR nc_entry; + }; + + class named_captures { + public: + struct iterator { + named_capture operator*() const; + iterator& operator++(); + bool operator==(const iterator& other) const; + bool operator!=(const iterator& other) const; + + uint32_t i_entry_size; + PCRE2_SPTR i_entry; + }; + + iterator begin() const; + iterator end() const; + bool empty() const { return this->nc_count == 0; } + size_t size() const { return this->nc_count; } + + private: + friend code; + + named_captures() = default; + + uint32_t nc_count{0}; + uint32_t nc_entry_size{0}; + PCRE2_SPTR nc_name_table{nullptr}; + }; + + static Result<code, compile_error> from(string_fragment sf, + int options = 0); + + template<typename T, std::size_t N> + static code from_const(const T (&str)[N], int options = 0) + { + return from(string_fragment::from_const(str), options).unwrap(); + } + + const std::string& get_pattern() const { return this->p_pattern; } + + named_captures get_named_captures() const; + + const char* get_name_for_capture(size_t index) const; + + size_t get_capture_count() const; + + int name_index(const char* name) const; + + std::vector<string_fragment> get_captures() const; + + uint32_t get_match_data_capacity() const { + return this->p_match_proto.md_ovector_count; + } + + match_data create_match_data() const; + + capture_builder capture_from(string_fragment in) const + { + return capture_builder{ + *this, + input{in}, + }; + } + + matcher::matches_result find_in(string_fragment in, + uint32_t options = 0) const + { + static thread_local match_data md = this->create_match_data(); + + if (md.md_ovector_count < this->p_match_proto.md_ovector_count) { + md = this->create_match_data(); + } + + return this->capture_from(in).into(md).matches(options); + } + + size_t match_partial(string_fragment in) const; + + std::string replace(string_fragment str, const char* repl) const; + + std::shared_ptr<code> to_shared() && + { + return std::make_shared<code>(std::move(this->p_code), + std::move(this->p_pattern)); + } + + code(auto_mem<pcre2_code> code, std::string pattern) + : p_code(std::move(code)), p_pattern(std::move(pattern)), + p_match_proto(this->create_match_data()) + { + } + +private: + friend matcher; + friend match_data; + + auto_mem<pcre2_code> p_code; + std::string p_pattern; + match_data p_match_proto; +}; + +template<typename T, std::size_t N> +nonstd::optional<string_fragment> +match_data::operator[](const T (&name)[N]) const +{ + auto index = pcre2_substring_number_from_name( + this->md_code->p_code.in(), + reinterpret_cast<const unsigned char*>(name)); + + return this->operator[](index); +} + +template<uint32_t Options, typename F> +Result<string_fragment, matcher::error> +capture_builder::for_each(F func) && +{ + auto md = this->mb_code.create_match_data(); + auto mat = matcher{this->mb_code, this->mb_input, md}; + + bool done = false; + matcher::error eret; + + while (!done) { + auto match_res = mat.matches(Options); + done = match_res.match( + [mat, &func](matcher::found) { + func(mat.mb_match_data); + return false; + }, + [](matcher::not_found) { return true; }, + [&eret](matcher::error err) { + eret = err; + return true; + }); + } + + if (eret.e_error_code == 0) { + return Ok(md.remaining()); + } + return Err(eret); +} + +} // namespace pcre2pp +} // namespace lnav + +#endif |