summaryrefslogtreecommitdiffstats
path: root/src/regex101.import.cc
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/regex101.import.cc395
1 files changed, 395 insertions, 0 deletions
diff --git a/src/regex101.import.cc b/src/regex101.import.cc
new file mode 100644
index 0000000..c0e2984
--- /dev/null
+++ b/src/regex101.import.cc
@@ -0,0 +1,395 @@
+/**
+ * Copyright (c) 2022, Timothy Stack
+ *
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ * * Neither the name of Timothy Stack nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ''AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "regex101.import.hh"
+
+#include "base/fs_util.hh"
+#include "base/itertools.hh"
+#include "base/paths.hh"
+#include "lnav_config.hh"
+#include "log_format.hh"
+#include "log_format_ext.hh"
+#include "pcrepp/pcre2pp.hh"
+#include "regex101.client.hh"
+#include "session_data.hh"
+#include "yajlpp/yajlpp.hh"
+
+using namespace lnav::roles::literals;
+
+static const std::set<std::string> SUPPORTED_FLAVORS = {
+ "pcre",
+ "pcre2",
+};
+
+Result<ghc::filesystem::path, lnav::console::user_message>
+regex101::import(const std::string& url,
+ const std::string& name,
+ const std::string& pat_name)
+{
+ static const auto USER_URL = lnav::pcre2pp::code::from_const(
+ R"(^https://regex101.com/r/(\w+)(?:/(\d+))?)");
+ static thread_local auto md = lnav::pcre2pp::match_data::unitialized();
+ static const auto NAME_RE = lnav::pcre2pp::code::from_const(R"(^\w+$)");
+
+ if (url.empty()) {
+ return Err(lnav::console::user_message::error(
+ "expecting a regex101.com URL to import"));
+ }
+ if (name.empty()) {
+ return Err(lnav::console::user_message::error(
+ "expecting a name for the new format"));
+ }
+
+ auto lformat = log_format::find_root_format(name.c_str());
+ bool existing_format = false;
+
+ if (lformat != nullptr) {
+ auto* ext_format = dynamic_cast<external_log_format*>(lformat.get());
+
+ if (ext_format) {
+ auto found = ext_format->elf_pattern_order
+ | lnav::itertools::find_if([&pat_name](const auto& elem) {
+ return elem->p_name == pat_name;
+ });
+ if (!found) {
+ existing_format = true;
+ }
+ }
+ }
+
+ auto name_find_res = NAME_RE.find_in(name).ignore_error();
+ if (!name_find_res) {
+ auto partial_len = NAME_RE.match_partial(name);
+ return Err(
+ lnav::console::user_message::error(
+ attr_line_t("unable to import: ")
+ .append(lnav::roles::file(url)))
+ .with_reason(attr_line_t("expecting a format name that matches "
+ "the regular expression ")
+ .append_quoted(NAME_RE.get_pattern()))
+ .with_note(attr_line_t(" ")
+ .append_quoted(name)
+ .append("\n ")
+ .append(partial_len, ' ')
+ .append("^ matched up to here"_comment)));
+ }
+
+ auto user_find_res
+ = USER_URL.capture_from(url).into(md).matches().ignore_error();
+ if (!user_find_res) {
+ auto partial_len = USER_URL.match_partial(url);
+ return Err(lnav::console::user_message::error(
+ attr_line_t("unrecognized regex101.com URL: ")
+ .append(lnav::roles::file(url)))
+ .with_reason(attr_line_t("expecting a URL that matches ")
+ .append_quoted(USER_URL.get_pattern()))
+ .with_note(attr_line_t(" ")
+ .append_quoted(url)
+ .append("\n ")
+ .append(partial_len, ' ')
+ .append("^ matched up to here"_comment)));
+ }
+
+ auto permalink = md[1]->to_string();
+
+ auto format_filename = existing_format
+ ? fmt::format(FMT_STRING("{}.regex101-{}.json"), name, permalink)
+ : fmt::format(FMT_STRING("{}.json"), name);
+ auto format_path
+ = lnav::paths::dotlnav() / "formats" / "installed" / format_filename;
+
+ if (ghc::filesystem::exists(format_path)) {
+ return Err(lnav::console::user_message::error(
+ attr_line_t("unable to import: ")
+ .append(lnav::roles::file(url)))
+ .with_reason(
+ attr_line_t("format file already exists: ")
+ .append(lnav::roles::file(format_path.string())))
+ .with_help("delete the existing file to continue"));
+ }
+
+ auto retrieve_res = regex101::client::retrieve(permalink);
+ if (retrieve_res.is<lnav::console::user_message>()) {
+ return Err(retrieve_res.get<lnav::console::user_message>());
+ }
+
+ if (retrieve_res.is<regex101::client::no_entry>()) {
+ return Err(lnav::console::user_message::error(
+ attr_line_t("unknown regex101.com entry: ")
+ .append(lnav::roles::symbol(url))));
+ }
+
+ auto entry = retrieve_res.get<regex101::client::entry>();
+
+ if (SUPPORTED_FLAVORS.count(entry.e_flavor) == 0) {
+ return Err(lnav::console::user_message::error(
+ attr_line_t("invalid regex ")
+ .append_quoted(lnav::roles::symbol(entry.e_regex))
+ .append(" from ")
+ .append_quoted(lnav::roles::symbol(url)))
+ .with_reason(attr_line_t("unsupported regex flavor: ")
+ .append_quoted(
+ lnav::roles::symbol(entry.e_flags)))
+ .with_help(attr_line_t("the supported flavors are: ")
+ .join(SUPPORTED_FLAVORS,
+ VC_ROLE.value(role_t::VCR_SYMBOL),
+ ", ")));
+ }
+
+ auto regex_res = lnav::pcre2pp::code::from(entry.e_regex);
+ if (regex_res.isErr()) {
+ auto parse_error = regex_res.unwrapErr();
+ return Err(lnav::console::user_message::error(
+ attr_line_t("invalid regex ")
+ .append_quoted(lnav::roles::symbol(entry.e_regex))
+ .append(" from ")
+ .append_quoted(lnav::roles::symbol(url)))
+ .with_reason(parse_error.get_message())
+ .with_help("fix the regex and try the import again"));
+ }
+
+ auto regex = regex_res.unwrap();
+ yajlpp_gen gen;
+
+ yajl_gen_config(gen, yajl_gen_beautify, true);
+ {
+ yajlpp_map root_map(gen);
+
+ root_map.gen("$schema");
+ root_map.gen(DEFAULT_FORMAT_SCHEMA);
+
+ root_map.gen(name);
+ {
+ yajlpp_map format_map(gen);
+
+ if (!existing_format) {
+ format_map.gen("description");
+ format_map.gen(fmt::format(
+ FMT_STRING(
+ "Format file generated from regex101 entry -- {}"),
+ url));
+ }
+ format_map.gen("regex");
+ {
+ yajlpp_map regex_map(gen);
+
+ regex_map.gen(pat_name);
+ {
+ yajlpp_map std_map(gen);
+
+ std_map.gen("pattern");
+ std_map.gen(entry.e_regex);
+ }
+ }
+ if (!existing_format) {
+ format_map.gen("value");
+ {
+ yajlpp_map value_map(gen);
+
+ for (auto named_cap : regex.get_named_captures()) {
+ if (named_cap.get_name() == "body") {
+ // don't need to add this as a value
+ continue;
+ }
+
+ value_map.gen(named_cap.get_name());
+ {
+ yajlpp_map cap_map(gen);
+
+ cap_map.gen("kind");
+ cap_map.gen("string");
+ }
+ }
+ }
+ }
+ format_map.gen("sample");
+ {
+ yajlpp_array sample_array(gen);
+
+ if (!entry.e_test_string.empty()) {
+ yajlpp_map elem_map(gen);
+
+ elem_map.gen("line");
+ elem_map.gen(rtrim(entry.e_test_string));
+ }
+ for (const auto& ut : entry.e_unit_tests) {
+ if (ut.ut_test_string.empty()) {
+ continue;
+ }
+
+ yajlpp_map elem_map(gen);
+
+ if (!ut.ut_description.empty()) {
+ elem_map.gen("description");
+ elem_map.gen(ut.ut_description);
+ }
+ elem_map.gen("line");
+ elem_map.gen(rtrim(ut.ut_test_string));
+ }
+ }
+ }
+ }
+
+ auto format_json = gen.to_string_fragment();
+ auto write_res = lnav::filesystem::write_file(format_path, format_json);
+ if (write_res.isErr()) {
+ return Err(lnav::console::user_message::error(
+ attr_line_t("unable to create format file: ")
+ .append(lnav::roles::file(format_path)))
+ .with_reason(write_res.unwrapErr()));
+ }
+
+ lnav::session::regex101::insert_entry({name, pat_name, permalink, ""});
+
+ return Ok(format_path);
+}
+
+ghc::filesystem::path
+regex101::patch_path(const external_log_format* format,
+ const std::string& permalink)
+{
+ if (format->elf_format_source_order.empty()) {
+ return lnav::paths::dotlnav() / "formats" / "installed"
+ / fmt::format(FMT_STRING("{}.regex101-{}.json"),
+ format->get_name(),
+ permalink);
+ }
+
+ auto first_path = format->elf_format_source_order.front();
+
+ return first_path.replace_extension(
+ fmt::format(FMT_STRING("regex101-{}.json"), permalink));
+}
+
+Result<ghc::filesystem::path, lnav::console::user_message>
+regex101::patch(const external_log_format* format,
+ const std::string& pat_name,
+ const regex101::client::entry& entry)
+{
+ yajlpp_gen gen;
+
+ yajl_gen_config(gen, yajl_gen_beautify, true);
+ {
+ yajlpp_map root_map(gen);
+
+ root_map.gen("$schema");
+ root_map.gen(DEFAULT_FORMAT_SCHEMA);
+
+ root_map.gen(format->get_name());
+ {
+ yajlpp_map format_map(gen);
+
+ format_map.gen("regex");
+ {
+ yajlpp_map regex_map(gen);
+
+ regex_map.gen(pat_name);
+ {
+ yajlpp_map pat_map(gen);
+
+ pat_map.gen("pattern");
+ pat_map.gen(entry.e_regex);
+ }
+ }
+
+ auto new_samples
+ = entry.e_unit_tests
+ | lnav::itertools::prepend(regex101::client::unit_test{
+ "",
+ entry.e_test_string,
+ })
+ | lnav::itertools::filter_out([&format](const auto& ut) {
+ if (ut.ut_test_string.empty()) {
+ return true;
+ }
+ return (format->elf_samples
+ | lnav::itertools::find_if(
+ [&ut](const auto& samp) {
+ return samp.s_line.pp_value
+ == rtrim(ut.ut_test_string);
+ }))
+ .has_value();
+ });
+
+ if (!new_samples.empty()) {
+ format_map.gen("sample");
+ {
+ yajlpp_array sample_array(gen);
+
+ for (const auto& ut : entry.e_unit_tests) {
+ yajlpp_map elem_map(gen);
+
+ if (!ut.ut_description.empty()) {
+ elem_map.gen("description");
+ elem_map.gen(ut.ut_description);
+ }
+ elem_map.gen("line");
+ elem_map.gen(rtrim(ut.ut_test_string));
+ }
+ }
+ }
+ }
+ }
+
+ auto retval
+ = regex101::patch_path(format, entry.e_permalink_fragment.value());
+ auto write_res
+ = lnav::filesystem::write_file(retval, gen.to_string_fragment());
+ if (write_res.isErr()) {
+ return Err(lnav::console::user_message::error(
+ attr_line_t("unable to write format patch file: ")
+ .append(lnav::roles::file(retval.string())))
+ .with_reason(write_res.unwrapErr()));
+ }
+
+ return Ok(retval);
+}
+
+regex101::client::entry
+regex101::convert_format_pattern(
+ const external_log_format* format,
+ std::shared_ptr<external_log_format::pattern> pattern)
+{
+ regex101::client::entry en;
+
+ en.e_regex = pattern->p_pcre.pp_value->get_pattern();
+ for (const auto& sample : format->elf_samples) {
+ if (en.e_test_string.empty()) {
+ en.e_test_string = sample.s_line.pp_value;
+ } else {
+ regex101::client::unit_test ut;
+
+ ut.ut_test_string = sample.s_line.pp_value;
+ ut.ut_description = sample.s_description;
+ en.e_unit_tests.emplace_back(ut);
+ }
+ }
+
+ return en;
+}