From 62e4c68907d8d33709c2c1f92a161dff00b3d5f2 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Mon, 15 Apr 2024 22:01:36 +0200 Subject: Adding upstream version 0.11.2. Signed-off-by: Daniel Baumann --- src/formats/logfmt/CMakeLists.txt | 40 +++++ src/formats/logfmt/Makefile.am | 41 +++++ src/formats/logfmt/logfmt.parser.cc | 266 +++++++++++++++++++++++++++++++ src/formats/logfmt/logfmt.parser.hh | 91 +++++++++++ src/formats/logfmt/logfmt.parser.test.cc | 221 +++++++++++++++++++++++++ 5 files changed, 659 insertions(+) create mode 100644 src/formats/logfmt/CMakeLists.txt create mode 100644 src/formats/logfmt/Makefile.am create mode 100644 src/formats/logfmt/logfmt.parser.cc create mode 100644 src/formats/logfmt/logfmt.parser.hh create mode 100644 src/formats/logfmt/logfmt.parser.test.cc (limited to 'src/formats/logfmt') diff --git a/src/formats/logfmt/CMakeLists.txt b/src/formats/logfmt/CMakeLists.txt new file mode 100644 index 0000000..a24abae --- /dev/null +++ b/src/formats/logfmt/CMakeLists.txt @@ -0,0 +1,40 @@ + +add_library( + logfmt + STATIC + logfmt.parser.hh + logfmt.parser.cc +) + +target_include_directories( + logfmt + PUBLIC + . + ${CMAKE_BINARY_DIR}/src + ${CMAKE_SOURCE_DIR}/src +) +target_link_libraries( + logfmt + PRIVATE + cppfmt + cppscnlib +) + +add_executable( + logfmt.parser.test + logfmt.parser.test.cc +) +target_include_directories( + logfmt.parser.test + PUBLIC + . + ${CMAKE_BINARY_DIR}/src + ${CMAKE_SOURCE_DIR}/src + ../../third-party/doctest-root) +target_link_libraries( + logfmt.parser.test + logfmt + base +) + +add_test(NAME logfmt.parser.test COMMAND logfmt.parser.test) diff --git a/src/formats/logfmt/Makefile.am b/src/formats/logfmt/Makefile.am new file mode 100644 index 0000000..8d1f3ff --- /dev/null +++ b/src/formats/logfmt/Makefile.am @@ -0,0 +1,41 @@ + +include $(top_srcdir)/aminclude_static.am + +AM_CPPFLAGS = \ + $(CODE_COVERAGE_CPPFLAGS) \ + -Wall \ + -I$(top_srcdir)/src/ \ + -I$(top_srcdir)/src/third-party \ + -I$(top_srcdir)/src/fmtlib \ + -I$(top_srcdir)/src/third-party/scnlib/include \ + $(LIBARCHIVE_CFLAGS) \ + $(READLINE_CFLAGS) \ + $(SQLITE3_CFLAGS) \ + $(LIBCURL_CPPFLAGS) + +AM_LIBS = $(CODE_COVERAGE_LIBS) +AM_CFLAGS = $(CODE_COVERAGE_CFLAGS) +AM_CXXFLAGS = $(CODE_COVERAGE_CXXFLAGS) + +noinst_LIBRARIES = liblogfmt.a + +noinst_HEADERS = \ + logfmt.parser.hh + +liblogfmt_a_SOURCES = \ + logfmt.parser.cc + +check_PROGRAMS = \ + logfmt.parser.test + +logfmt_parser_test_SOURCES = \ + logfmt.parser.test.cc + +logfmt_parser_test_LDADD = \ + liblogfmt.a \ + $(top_builddir)/src/base/libbase.a \ + $(top_builddir)/src/pcrepp/libpcrepp.a \ + $(top_builddir)/src/third-party/scnlib/src/libscnlib.a + +TESTS = \ + logfmt.parser.test diff --git a/src/formats/logfmt/logfmt.parser.cc b/src/formats/logfmt/logfmt.parser.cc new file mode 100644 index 0000000..20c7252 --- /dev/null +++ b/src/formats/logfmt/logfmt.parser.cc @@ -0,0 +1,266 @@ +/** + * Copyright (c) 2021, Timothy Stack + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * * Neither the name of Timothy Stack nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * @file logfmt.parser.cc + */ + +#include "logfmt.parser.hh" + +#include "base/intern_string.hh" +#include "config.h" +#include "scn/scn.h" + +logfmt::parser::parser(string_fragment sf) : p_next_input(sf) {} + +static bool +is_not_eq(char ch) +{ + return ch != '='; +} + +struct bare_value_predicate { + enum class int_state_t { + INIT, + NEED_DIGIT, + DIGITS, + INVALID, + }; + + enum class float_state_t { + INIT, + NEED_DIGIT, + DIGITS, + FRACTION_DIGIT, + EXPONENT_INIT, + EXPONENT_NEED_DIGIT, + EXPONENT_DIGIT, + INVALID, + }; + + int_state_t bvp_int_state{int_state_t::INIT}; + float_state_t bvp_float_state{float_state_t::INIT}; + size_t bvp_index{0}; + + bool is_integer() const + { + return this->bvp_int_state == int_state_t::DIGITS; + } + + bool is_float() const + { + switch (this->bvp_float_state) { + case float_state_t::DIGITS: + case float_state_t::FRACTION_DIGIT: + case float_state_t::EXPONENT_DIGIT: + return true; + default: + return false; + } + } + + bool operator()(char ch) + { + if (ch == ' ') { + return false; + } + + bool got_digit = isdigit(ch); + switch (this->bvp_int_state) { + case int_state_t::INIT: + if (got_digit) { + this->bvp_int_state = int_state_t::DIGITS; + } else if (ch == '-') { + this->bvp_int_state = int_state_t::NEED_DIGIT; + } else { + this->bvp_int_state = int_state_t::INVALID; + } + break; + case int_state_t::DIGITS: + case int_state_t::NEED_DIGIT: + if (got_digit) { + this->bvp_int_state = int_state_t::DIGITS; + } else { + this->bvp_int_state = int_state_t::INVALID; + } + break; + case int_state_t::INVALID: + break; + } + + switch (this->bvp_float_state) { + case float_state_t::INIT: + if (got_digit) { + this->bvp_float_state = float_state_t::DIGITS; + } else if (ch == '-') { + this->bvp_float_state = float_state_t::NEED_DIGIT; + } else { + this->bvp_float_state = float_state_t::INVALID; + } + break; + case float_state_t::DIGITS: + case float_state_t::NEED_DIGIT: + if (got_digit) { + this->bvp_float_state = float_state_t::DIGITS; + } else if (ch == '.') { + this->bvp_float_state = float_state_t::FRACTION_DIGIT; + } else if (ch == 'e' || ch == 'E') { + this->bvp_float_state = float_state_t::EXPONENT_INIT; + } else { + this->bvp_float_state = float_state_t::INVALID; + } + break; + case float_state_t::FRACTION_DIGIT: + if (got_digit) { + this->bvp_float_state = float_state_t::FRACTION_DIGIT; + } else if (ch == 'e' || ch == 'E') { + this->bvp_float_state = float_state_t::EXPONENT_INIT; + } else { + this->bvp_float_state = float_state_t::INVALID; + } + break; + case float_state_t::EXPONENT_INIT: + if (got_digit) { + this->bvp_float_state = float_state_t::EXPONENT_DIGIT; + } else if (ch == '-' || ch == '+') { + this->bvp_float_state = float_state_t::EXPONENT_NEED_DIGIT; + } else { + this->bvp_float_state = float_state_t::INVALID; + } + break; + case float_state_t::EXPONENT_NEED_DIGIT: + case float_state_t::EXPONENT_DIGIT: + if (got_digit) { + this->bvp_float_state = float_state_t::EXPONENT_DIGIT; + } else { + this->bvp_float_state = float_state_t::INVALID; + } + break; + case float_state_t::INVALID: + break; + } + + this->bvp_index += 1; + + return true; + } +}; + +logfmt::parser::step_result +logfmt::parser::step() +{ + const static auto IS_DQ = string_fragment::tag1{'"'}; + + auto remaining = this->p_next_input.skip(isspace); + + if (remaining.empty()) { + return end_of_input{}; + } + + auto pair_opt = remaining.split_while(is_not_eq); + + if (!pair_opt) { + return error{remaining.sf_begin, "expecting key followed by '='"}; + } + + auto key_frag = pair_opt->first; + auto after_eq = pair_opt->second.consume(string_fragment::tag1{'='}); + + if (!after_eq) { + return error{pair_opt->second.sf_begin, "expecting '='"}; + } + + auto value_start = after_eq.value(); + + if (value_start.startswith("\"")) { + string_fragment::quoted_string_body qsb; + auto quoted_pair = value_start.consume_n(1)->split_while(qsb); + + if (!quoted_pair) { + return error{value_start.sf_begin + 1, "string body missing"}; + } + + auto after_quote = quoted_pair->second.consume(IS_DQ); + + if (!after_quote) { + return error{quoted_pair->second.sf_begin, "non-terminated string"}; + } + + this->p_next_input = after_quote.value(); + return std::make_pair( + key_frag, + quoted_value{string_fragment{quoted_pair->first.sf_string, + quoted_pair->first.sf_begin - 1, + quoted_pair->first.sf_end + 1}}); + } + + bare_value_predicate bvp; + auto value_pair = value_start.split_while(bvp); + + if (value_pair) { + static const auto TRUE_FRAG = string_fragment::from_const("true"); + static const auto FALSE_FRAG = string_fragment::from_const("false"); + + this->p_next_input = value_pair->second; + if (bvp.is_integer()) { + int_value retval; + + auto int_scan_res + = scn::scan_value(value_pair->first.to_string_view()); + if (int_scan_res) { + retval.iv_value = int_scan_res.value(); + } + retval.iv_str_value = value_pair->first; + + return std::make_pair(key_frag, retval); + } + if (bvp.is_float()) { + float_value retval; + + auto float_scan_res + = scn::scan_value(value_pair->first.to_string_view()); + if (float_scan_res) { + retval.fv_value = float_scan_res.value(); + } + retval.fv_str_value = value_pair->first; + + return std::make_pair(key_frag, retval); + } + if (value_pair->first.iequal(TRUE_FRAG)) { + return std::make_pair(key_frag, + bool_value{true, value_pair->first}); + } + if (value_pair->first.iequal(FALSE_FRAG)) { + return std::make_pair(key_frag, + bool_value{false, value_pair->first}); + } + return std::make_pair(key_frag, unquoted_value{value_pair->first}); + } + + this->p_next_input = value_start; + return std::make_pair(key_frag, unquoted_value{string_fragment{}}); +} diff --git a/src/formats/logfmt/logfmt.parser.hh b/src/formats/logfmt/logfmt.parser.hh new file mode 100644 index 0000000..7806001 --- /dev/null +++ b/src/formats/logfmt/logfmt.parser.hh @@ -0,0 +1,91 @@ +/** + * Copyright (c) 2021, Timothy Stack + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * * Neither the name of Timothy Stack nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * @file logfmt.parser.hh + */ + +#ifndef lnav_logfmt_parser_hh +#define lnav_logfmt_parser_hh + +#include "base/intern_string.hh" +#include "base/result.h" +#include "mapbox/variant.hpp" + +namespace logfmt { + +class parser { +public: + explicit parser(string_fragment sf); + + struct end_of_input {}; + struct error { + int e_offset; + const std::string e_msg; + }; + struct unquoted_value { + string_fragment uv_value; + }; + struct quoted_value { + string_fragment qv_value; + }; + struct bool_value { + bool bv_value{false}; + string_fragment bv_str_value; + }; + struct int_value { + int64_t iv_value{0}; + string_fragment iv_str_value; + }; + struct float_value { + double fv_value{0}; + string_fragment fv_str_value; + }; + using value_type = mapbox::util::variant< + bool_value, + int_value, + float_value, + unquoted_value, + quoted_value + >; + + using kvpair = std::pair; + + using step_result = mapbox::util::variant< + end_of_input, + kvpair, + error + >; + + step_result step(); +private: + string_fragment p_next_input; +}; + +} + +#endif diff --git a/src/formats/logfmt/logfmt.parser.test.cc b/src/formats/logfmt/logfmt.parser.test.cc new file mode 100644 index 0000000..2193bfe --- /dev/null +++ b/src/formats/logfmt/logfmt.parser.test.cc @@ -0,0 +1,221 @@ +/** + * Copyright (c) 2021, Timothy Stack + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * * Neither the name of Timothy Stack nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * @file logfmt.parser.test.cc + */ + +#include "config.h" + +#include + +#define DOCTEST_CONFIG_IMPLEMENT_WITH_MAIN +#include "doctest/doctest.h" + +#include "logfmt.parser.hh" + +TEST_CASE("basic") +{ + static const char *line = "abc=def ghi=\"1 2 3 4\" time=333 empty1= tf=true empty2="; + + auto p = logfmt::parser{string_fragment{line}}; + + auto pair1 = p.step(); + + CHECK(pair1.is()); + CHECK(pair1.get().first == "abc"); + CHECK(pair1.get().second + .get().uv_value == "def"); + + auto pair2 = p.step(); + + CHECK(pair2.is()); + CHECK(pair2.get().first == "ghi"); + CHECK(pair2.get().second + .get().qv_value == "\"1 2 3 4\""); + + auto pair3 = p.step(); + + CHECK(pair3.is()); + CHECK(pair3.get().first == "time"); + CHECK(pair3.get().second + .get().iv_value == 333); + + auto pair4 = p.step(); + + CHECK(pair4.is()); + CHECK(pair4.get().first == "empty1"); + CHECK(pair4.get().second + .get().uv_value == ""); + + auto pair5 = p.step(); + + CHECK(pair5.is()); + CHECK(pair5.get().first == "tf"); + CHECK(pair5.get().second + .get().bv_value); + + auto pair6 = p.step(); + + CHECK(pair6.is()); + CHECK(pair6.get().first == "empty2"); + CHECK(pair6.get().second + .get().uv_value == ""); + + auto eoi = p.step(); + CHECK(eoi.is()); +} + +TEST_CASE("floats") +{ + static const char *line = "f1=1.0 f2=-2.0 f3=1.2e3 f4=1.2e-2 f5=2e1 f6=2e+1"; + + auto p = logfmt::parser{string_fragment{line}}; + + auto pair1 = p.step(); + + CHECK(pair1.is()); + CHECK(pair1.get().first == "f1"); + CHECK(pair1.get().second + .get().fv_value == 1.0); + + auto pair2 = p.step(); + + CHECK(pair2.is()); + CHECK(pair2.get().first == "f2"); + CHECK(pair2.get().second + .get().fv_value == -2.0); + + auto pair3 = p.step(); + + CHECK(pair3.is()); + CHECK(pair3.get().first == "f3"); + CHECK(pair3.get().second + .get().fv_value == 1200); + + auto pair4 = p.step(); + + CHECK(pair4.is()); + CHECK(pair4.get().first == "f4"); + CHECK(pair4.get().second + .get().fv_value == 0.012); + + auto pair5 = p.step(); + + CHECK(pair5.is()); + CHECK(pair5.get().first == "f5"); + CHECK(pair5.get().second + .get().fv_value == 20); + + auto pair6 = p.step(); + + CHECK(pair6.is()); + CHECK(pair6.get().first == "f6"); + CHECK(pair6.get().second + .get().fv_value == 20); +} + +TEST_CASE("bad floats") +{ + static const char *line = "bf1=- bf2=-1.2e bf3=1.2.3 bf4=1e2e4"; + + auto p = logfmt::parser{string_fragment{line}}; + + auto pair1 = p.step(); + + CHECK(pair1.is()); + CHECK(pair1.get().first == "bf1"); + CHECK(pair1.get().second + .get().uv_value == "-"); + + auto pair2 = p.step(); + + CHECK(pair2.is()); + CHECK(pair2.get().first == "bf2"); + CHECK(pair2.get().second + .get().uv_value == "-1.2e"); + + auto pair3 = p.step(); + + CHECK(pair3.is()); + CHECK(pair3.get().first == "bf3"); + CHECK(pair3.get().second + .get().uv_value == "1.2.3"); + + auto pair4 = p.step(); + + CHECK(pair4.is()); + CHECK(pair4.get().first == "bf4"); + CHECK(pair4.get().second + .get().uv_value == "1e2e4"); +} + +TEST_CASE("non-terminated string") +{ + static const char *line = "abc=\"12 2"; + + auto p = logfmt::parser{string_fragment{line}}; + auto pair1 = p.step(); + + CHECK(pair1.is()); + CHECK(pair1.get().e_offset == 9); + CHECK(pair1.get().e_msg == "non-terminated string"); +} + +TEST_CASE("missing equals") +{ + static const char *line = "abc"; + + auto p = logfmt::parser{string_fragment{line}}; + auto pair1 = p.step(); + + CHECK(pair1.is()); + CHECK(pair1.get().e_offset == 3); + CHECK(pair1.get().e_msg == "expecting '='"); +} + +TEST_CASE("missing key") +{ + static const char *line = "=def"; + + auto p = logfmt::parser{string_fragment{line}}; + auto pair1 = p.step(); + + CHECK(pair1.is()); + CHECK(pair1.get().e_offset == 0); + CHECK(pair1.get().e_msg == "expecting key followed by '='"); +} + +TEST_CASE("empty") +{ + static const char *line = ""; + + auto p = logfmt::parser{string_fragment{line}}; + auto pair1 = p.step(); + + CHECK(pair1.is()); +} -- cgit v1.2.3