Adding upstream version 0.11.2.upstream/0.11.2

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-15 20:01:36 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-15 20:01:36 +0000
commit: 62e4c68907d8d33709c2c1f92a161dff00b3d5f2 (patch)
tree: adbbaf3acf88ea08f6eeec4b75ee98ad3b07fbdc /src/base/intern_string.hh
parent: Initial commit. (diff)
download: lnav-28bf0ff1a7ad38ac9ddb1f5864ba0161b0617471.tar.xz
lnav-28bf0ff1a7ad38ac9ddb1f5864ba0161b0617471.zip
1 files changed, 865 insertions, 0 deletions
diff --git a/src/base/intern_string.hh b/src/base/intern_string.hh
new file mode 100644
index 0000000..4ae40da
--- /dev/null
+++ b/src/base/intern_string.hh
@@ -0,0 +1,865 @@
+/**
+ * Copyright (c) 2014, Timothy Stack
+ *
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ * * Neither the name of Timothy Stack nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ''AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * @file intern_string.hh
+ */
+
+#ifndef intern_string_hh
+#define intern_string_hh
+
+#include <ostream>
+#include <string>
+#include <vector>
+
+#include <assert.h>
+#include <string.h>
+#include <sys/types.h>
+
+#include "fmt/format.h"
+#include "optional.hpp"
+#include "scn/util/string_view.h"
+#include "strnatcmp.h"
+#include "ww898/cp_utf8.hpp"
+
+struct string_fragment {
+    using iterator = const char*;
+
+    static string_fragment invalid()
+    {
+        string_fragment retval;
+
+        retval.invalidate();
+        return retval;
+    }
+
+    static string_fragment from_c_str(const char* str)
+    {
+        return string_fragment{str, 0, str != nullptr ? (int) strlen(str) : 0};
+    }
+
+    static string_fragment from_c_str(const unsigned char* str)
+    {
+        return string_fragment{
+            str, 0, str != nullptr ? (int) strlen((char*) str) : 0};
+    }
+
+    template<typename T, std::size_t N>
+    static string_fragment from_const(const T (&str)[N])
+    {
+        return string_fragment{str, 0, (int) N - 1};
+    }
+
+    static string_fragment from_str(const std::string& str)
+    {
+        return string_fragment{str.c_str(), 0, (int) str.size()};
+    }
+
+    static string_fragment from_substr(const std::string& str,
+                                       size_t offset,
+                                       size_t length)
+    {
+        return string_fragment{
+            str.c_str(), (int) offset, (int) (offset + length)};
+    }
+
+    static string_fragment from_str_range(const std::string& str,
+                                          size_t begin,
+                                          size_t end)
+    {
+        return string_fragment{str.c_str(), (int) begin, (int) end};
+    }
+
+    static string_fragment from_bytes(const char* bytes, size_t len)
+    {
+        return string_fragment{bytes, 0, (int) len};
+    }
+
+    static string_fragment from_bytes(const unsigned char* bytes, size_t len)
+    {
+        return string_fragment{(const char*) bytes, 0, (int) len};
+    }
+
+    static string_fragment from_memory_buffer(const fmt::memory_buffer& buf)
+    {
+        return string_fragment{buf.data(), 0, (int) buf.size()};
+    }
+
+    static string_fragment from_byte_range(const char* bytes,
+                                           size_t begin,
+                                           size_t end)
+    {
+        return string_fragment{bytes, (int) begin, (int) end};
+    }
+
+    explicit string_fragment(const char* str = "", int begin = 0, int end = -1)
+        : sf_string(str), sf_begin(begin), sf_end(end == -1 ? strlen(str) : end)
+    {
+    }
+
+    explicit string_fragment(const unsigned char* str,
+                             int begin = 0,
+                             int end = -1)
+        : sf_string((const char*) str), sf_begin(begin),
+          sf_end(end == -1 ? strlen((const char*) str) : end)
+    {
+    }
+
+    string_fragment(const std::string& str)
+        : sf_string(str.c_str()), sf_begin(0), sf_end(str.length())
+    {
+    }
+
+    bool is_valid() const
+    {
+        return this->sf_begin != -1 && this->sf_begin <= this->sf_end;
+    }
+
+    int length() const { return this->sf_end - this->sf_begin; }
+
+    Result<ssize_t, const char*> utf8_length() const;
+
+    const char* data() const { return &this->sf_string[this->sf_begin]; }
+
+    const unsigned char* udata() const
+    {
+        return (const unsigned char*) &this->sf_string[this->sf_begin];
+    }
+
+    char* writable_data(int offset = 0)
+    {
+        return (char*) &this->sf_string[this->sf_begin + offset];
+    }
+
+    char front() const { return this->sf_string[this->sf_begin]; }
+
+    uint32_t front_codepoint() const
+    {
+        size_t index = 0;
+        try {
+            return ww898::utf::utf8::read(
+                [this, &index]() { return this->data()[index++]; });
+        } catch (const std::runtime_error& e) {
+            return this->data()[0];
+        }
+    }
+
+    char back() const { return this->sf_string[this->sf_end - 1]; }
+
+    void pop_back()
+    {
+        if (!this->empty()) {
+            this->sf_end -= 1;
+        }
+    }
+
+    iterator begin() const { return &this->sf_string[this->sf_begin]; }
+
+    iterator end() const { return &this->sf_string[this->sf_end]; }
+
+    bool empty() const { return !this->is_valid() || length() == 0; }
+
+    Result<ssize_t, const char*> codepoint_to_byte_index(ssize_t cp_index) const
+    {
+        ssize_t retval = 0;
+
+        while (cp_index > 0) {
+            if (retval >= this->length()) {
+                return Err("index is beyond the end of the string");
+            }
+            auto ch_len = TRY(ww898::utf::utf8::char_size([this, retval]() {
+                return std::make_pair(this->data()[retval],
+                                      this->length() - retval - 1);
+            }));
+
+            retval += ch_len;
+            cp_index -= 1;
+        }
+
+        return Ok(retval);
+    }
+
+    const char& operator[](int index) const
+    {
+        return this->sf_string[sf_begin + index];
+    }
+
+    bool operator==(const std::string& str) const
+    {
+        if (this->length() != (int) str.length()) {
+            return false;
+        }
+
+        return memcmp(
+                   &this->sf_string[this->sf_begin], str.c_str(), str.length())
+            == 0;
+    }
+
+    bool operator==(const string_fragment& sf) const
+    {
+        if (this->length() != sf.length()) {
+            return false;
+        }
+
+        return memcmp(this->data(), sf.data(), sf.length()) == 0;
+    }
+
+    bool iequal(const string_fragment& sf) const
+    {
+        if (this->length() != sf.length()) {
+            return false;
+        }
+
+        return strnatcasecmp(
+                   this->length(), this->data(), sf.length(), sf.data())
+            == 0;
+    }
+
+    bool operator==(const char* str) const
+    {
+        size_t len = strlen(str);
+
+        return len == (size_t) this->length()
+            && strncmp(this->data(), str, this->length()) == 0;
+    }
+
+    bool operator!=(const char* str) const { return !(*this == str); }
+
+    bool startswith(const char* prefix) const
+    {
+        const auto* iter = this->begin();
+
+        while (*prefix != '\0' && iter < this->end() && *prefix == *iter) {
+            prefix += 1;
+            iter += 1;
+        }
+
+        return *prefix == '\0';
+    }
+
+    bool endswith(const char* suffix) const
+    {
+        int suffix_len = strlen(suffix);
+
+        if (suffix_len > this->length()) {
+            return false;
+        }
+
+        const auto* curr = this->end() - suffix_len;
+        while (*suffix != '\0' && *curr == *suffix) {
+            suffix += 1;
+            curr += 1;
+        }
+
+        return *suffix == '\0';
+    }
+
+    string_fragment substr(int begin) const
+    {
+        return string_fragment{
+            this->sf_string, this->sf_begin + begin, this->sf_end};
+    }
+
+    string_fragment sub_range(int begin, int end) const
+    {
+        return string_fragment{
+            this->sf_string, this->sf_begin + begin, this->sf_begin + end};
+    }
+
+    size_t count(char ch) const
+    {
+        size_t retval = 0;
+
+        for (int lpc = this->sf_begin; lpc < this->sf_end; lpc++) {
+            if (this->sf_string[lpc] == ch) {
+                retval += 1;
+            }
+        }
+
+        return retval;
+    }
+
+    nonstd::optional<size_t> find(char ch) const
+    {
+        for (int lpc = this->sf_begin; lpc < this->sf_end; lpc++) {
+            if (this->sf_string[lpc] == ch) {
+                return lpc - this->sf_begin;
+            }
+        }
+
+        return nonstd::nullopt;
+    }
+
+    template<typename P>
+    string_fragment find_left_boundary(size_t start, P&& predicate) const
+    {
+        assert((int) start <= this->length());
+
+        if (start > 0 && start == this->length()) {
+            start -= 1;
+        }
+        while (start > 0) {
+            if (predicate(this->data()[start])) {
+                start += 1;
+                break;
+            }
+            start -= 1;
+        }
+
+        return string_fragment{
+            this->sf_string,
+            (int) start,
+            this->sf_end,
+        };
+    }
+
+    template<typename P>
+    string_fragment find_right_boundary(size_t start, P&& predicate) const
+    {
+        while ((int) start < this->length()) {
+            if (predicate(this->data()[start])) {
+                break;
+            }
+            start += 1;
+        }
+
+        return string_fragment{
+            this->sf_string,
+            this->sf_begin,
+            this->sf_begin + (int) start,
+        };
+    }
+
+    template<typename P>
+    string_fragment find_boundaries_around(size_t start, P&& predicate) const
+    {
+        return this->template find_left_boundary(start, predicate)
+            .find_right_boundary(0, predicate);
+    }
+
+    nonstd::optional<std::pair<uint32_t, string_fragment>> consume_codepoint()
+        const
+    {
+        auto cp = this->front_codepoint();
+        auto index_res = this->codepoint_to_byte_index(1);
+
+        if (index_res.isErr()) {
+            return nonstd::nullopt;
+        }
+
+        return std::make_pair(cp, this->substr(index_res.unwrap()));
+    }
+
+    template<typename P>
+    nonstd::optional<string_fragment> consume(P predicate) const
+    {
+        int consumed = 0;
+        while (consumed < this->length()) {
+            if (!predicate(this->data()[consumed])) {
+                break;
+            }
+
+            consumed += 1;
+        }
+
+        if (consumed == 0) {
+            return nonstd::nullopt;
+        }
+
+        return string_fragment{
+            this->sf_string,
+            this->sf_begin + consumed,
+            this->sf_end,
+        };
+    }
+
+    nonstd::optional<string_fragment> consume_n(int amount) const;
+
+    template<typename P>
+    string_fragment skip(P predicate) const
+    {
+        int offset = 0;
+        while (offset < this->length() && predicate(this->data()[offset])) {
+            offset += 1;
+        }
+
+        return string_fragment{
+            this->sf_string,
+            this->sf_begin + offset,
+            this->sf_end,
+        };
+    }
+
+    using split_result
+        = nonstd::optional<std::pair<string_fragment, string_fragment>>;
+
+    template<typename P>
+    split_result split_while(P&& predicate) const
+    {
+        int consumed = 0;
+        while (consumed < this->length()) {
+            if (!predicate(this->data()[consumed])) {
+                break;
+            }
+
+            consumed += 1;
+        }
+
+        if (consumed == 0) {
+            return nonstd::nullopt;
+        }
+
+        return std::make_pair(
+            string_fragment{
+                this->sf_string,
+                this->sf_begin,
+                this->sf_begin + consumed,
+            },
+            string_fragment{
+                this->sf_string,
+                this->sf_begin + consumed,
+                this->sf_end,
+            });
+    }
+
+    template<typename P>
+    split_result split_when(P&& predicate) const
+    {
+        int consumed = 0;
+        while (consumed < this->length()) {
+            if (predicate(this->data()[consumed])) {
+                break;
+            }
+
+            consumed += 1;
+        }
+
+        if (consumed == 0) {
+            return nonstd::nullopt;
+        }
+
+        return std::make_pair(
+            string_fragment{
+                this->sf_string,
+                this->sf_begin,
+                this->sf_begin + consumed,
+            },
+            string_fragment{
+                this->sf_string,
+                this->sf_begin + consumed + 1,
+                this->sf_end,
+            });
+    }
+
+    split_result split_n(int amount) const;
+
+    std::vector<string_fragment> split_lines() const;
+
+    struct tag1 {
+        const char t_value;
+
+        bool operator()(char ch) const { return this->t_value == ch; }
+    };
+
+    struct quoted_string_body {
+        bool qs_in_escape{false};
+
+        bool operator()(char ch)
+        {
+            if (this->qs_in_escape) {
+                this->qs_in_escape = false;
+                return true;
+            } else if (ch == '\\') {
+                this->qs_in_escape = true;
+                return true;
+            } else if (ch == '"') {
+                return false;
+            } else {
+                return true;
+            }
+        }
+    };
+
+    const char* to_string(char* buf) const
+    {
+        memcpy(buf, this->data(), this->length());
+        buf[this->length()] = '\0';
+
+        return buf;
+    }
+
+    std::string to_string() const
+    {
+        return {this->data(), (size_t) this->length()};
+    }
+
+    void clear()
+    {
+        this->sf_begin = 0;
+        this->sf_end = 0;
+    }
+
+    void invalidate()
+    {
+        this->sf_begin = -1;
+        this->sf_end = -1;
+    }
+
+    string_fragment trim(const char* tokens) const;
+    string_fragment trim() const;
+
+    string_fragment prepend(const char* str, int amount) const
+    {
+        return string_fragment{
+            str,
+            this->sf_begin + amount,
+            this->sf_end + amount,
+        };
+    }
+
+    string_fragment erase_before(const char* str, int amount) const
+    {
+        return string_fragment{
+            str,
+            this->sf_begin - amount,
+            this->sf_end - amount,
+        };
+    }
+
+    string_fragment erase(const char* str, int amount) const
+    {
+        return string_fragment{
+            str,
+            this->sf_begin,
+            this->sf_end - amount,
+        };
+    }
+
+    template<typename A>
+    const char* to_c_str(A allocator) const
+    {
+        auto* retval = allocator.allocate(this->length() + 1);
+        memcpy(retval, this->data(), this->length());
+        retval[this->length()] = '\0';
+        return retval;
+    }
+
+    template<typename A>
+    string_fragment to_owned(A allocator) const
+    {
+        return string_fragment{
+            this->template to_c_str(allocator),
+            0,
+            this->length(),
+        };
+    }
+
+    scn::string_view to_string_view() const
+    {
+        return scn::string_view{this->begin(), this->end()};
+    }
+
+    enum class case_style {
+        lower,
+        upper,
+        camel,
+        mixed,
+    };
+
+    case_style detect_text_case_style() const;
+
+    std::string to_string_with_case_style(case_style style) const;
+
+    const char* sf_string;
+    int sf_begin;
+    int sf_end;
+};
+
+inline bool
+operator==(const std::string& left, const string_fragment& right)
+{
+    return right == left;
+}
+
+inline bool
+operator<(const char* left, const string_fragment& right)
+{
+    int rc = strncmp(left, right.data(), right.length());
+    return rc < 0;
+}
+
+inline void
+operator+=(std::string& left, const string_fragment& right)
+{
+    left.append(right.data(), right.length());
+}
+
+inline bool
+operator<(const string_fragment& left, const char* right)
+{
+    return strncmp(left.data(), right, left.length()) < 0;
+}
+
+inline std::ostream&
+operator<<(std::ostream& os, const string_fragment& sf)
+{
+    os.write(sf.data(), sf.length());
+    return os;
+}
+
+class intern_string {
+public:
+    static const intern_string* lookup(const char* str, ssize_t len) noexcept;
+
+    static const intern_string* lookup(const string_fragment& sf) noexcept;
+
+    static const intern_string* lookup(const std::string& str) noexcept;
+
+    const char* get() const { return this->is_str.c_str(); };
+
+    size_t size() const { return this->is_str.size(); }
+
+    std::string to_string() const { return this->is_str; }
+
+    string_fragment to_string_fragment() const
+    {
+        return string_fragment{this->is_str};
+    }
+
+    bool startswith(const char* prefix) const;
+
+    struct intern_table;
+    static std::shared_ptr<intern_table> get_table_lifetime();
+
+private:
+    friend intern_table;
+
+    intern_string(const char* str, ssize_t len)
+        : is_next(nullptr), is_str(str, (size_t) len)
+    {
+    }
+
+    intern_string* is_next;
+    std::string is_str;
+};
+
+using intern_table_lifetime = std::shared_ptr<intern_string::intern_table>;
+
+class intern_string_t {
+public:
+    using iterator = const char*;
+
+    intern_string_t(const intern_string* is = nullptr) : ist_interned_string(is)
+    {
+    }
+
+    const intern_string* unwrap() const { return this->ist_interned_string; }
+
+    void clear() { this->ist_interned_string = nullptr; };
+
+    bool empty() const { return this->ist_interned_string == nullptr; }
+
+    const char* get() const
+    {
+        if (this->empty()) {
+            return "";
+        }
+        return this->ist_interned_string->get();
+    }
+
+    const char* c_str() const { return this->get(); }
+
+    iterator begin() const { return this->get(); }
+
+    iterator end() const { return this->get() + this->size(); }
+
+    size_t size() const
+    {
+        if (this->ist_interned_string == nullptr) {
+            return 0;
+        }
+        return this->ist_interned_string->size();
+    }
+
+    size_t hash() const
+    {
+        auto ptr = (uintptr_t) this->ist_interned_string;
+
+        return ptr;
+    }
+
+    std::string to_string() const
+    {
+        if (this->ist_interned_string == nullptr) {
+            return "";
+        }
+        return this->ist_interned_string->to_string();
+    }
+
+    string_fragment to_string_fragment() const
+    {
+        if (this->ist_interned_string == nullptr) {
+            return string_fragment{"", 0, 0};
+        }
+        return this->ist_interned_string->to_string_fragment();
+    }
+
+    bool operator<(const intern_string_t& rhs) const
+    {
+        return strcmp(this->get(), rhs.get()) < 0;
+    }
+
+    bool operator==(const intern_string_t& rhs) const
+    {
+        return this->ist_interned_string == rhs.ist_interned_string;
+    }
+
+    bool operator!=(const intern_string_t& rhs) const
+    {
+        return !(*this == rhs);
+    }
+
+    bool operator==(const char* rhs) const
+    {
+        return strcmp(this->get(), rhs) == 0;
+    }
+
+    bool operator!=(const char* rhs) const
+    {
+        return strcmp(this->get(), rhs) != 0;
+    }
+
+    static bool case_lt(const intern_string_t& lhs, const intern_string_t& rhs)
+    {
+        return strnatcasecmp(lhs.size(), lhs.get(), rhs.size(), rhs.get()) < 0;
+    }
+
+private:
+    const intern_string* ist_interned_string;
+};
+
+unsigned long hash_str(const char* str, size_t len);
+
+namespace fmt {
+template<>
+struct formatter<string_fragment> : formatter<string_view> {
+    template<typename FormatContext>
+    auto format(const string_fragment& sf, FormatContext& ctx)
+    {
+        return formatter<string_view>::format(
+            string_view{sf.data(), (size_t) sf.length()}, ctx);
+    }
+};
+
+template<>
+struct formatter<intern_string_t> : formatter<string_view> {
+    template<typename FormatContext>
+    auto format(const intern_string_t& is, FormatContext& ctx)
+    {
+        return formatter<string_view>::format(
+            string_view{is.get(), (size_t) is.size()}, ctx);
+    }
+};
+}  // namespace fmt
+
+namespace std {
+template<>
+struct hash<const intern_string_t> {
+    std::size_t operator()(const intern_string_t& ist) const
+    {
+        return ist.hash();
+    }
+};
+}  // namespace std
+
+inline bool
+operator<(const char* left, const intern_string_t& right)
+{
+    int rc = strncmp(left, right.get(), right.size());
+    return rc < 0;
+}
+
+inline bool
+operator<(const intern_string_t& left, const char* right)
+{
+    return strncmp(left.get(), right, left.size()) < 0;
+}
+
+inline bool
+operator==(const intern_string_t& left, const string_fragment& sf)
+{
+    return ((int) left.size() == sf.length())
+        && (memcmp(left.get(), sf.data(), left.size()) == 0);
+}
+
+inline bool
+operator==(const string_fragment& left, const intern_string_t& right)
+{
+    return (left.length() == (int) right.size())
+        && (memcmp(left.data(), right.get(), left.length()) == 0);
+}
+
+namespace std {
+inline string
+to_string(const string_fragment& s)
+{
+    return {s.data(), (size_t) s.length()};
+}
+
+inline string
+to_string(const intern_string_t& s)
+{
+    return s.to_string();
+}
+}  // namespace std
+
+inline string_fragment
+to_string_fragment(const string_fragment& s)
+{
+    return s;
+}
+
+inline string_fragment
+to_string_fragment(const intern_string_t& s)
+{
+    return string_fragment(s.get(), 0, s.size());
+}
+
+inline string_fragment
+to_string_fragment(const std::string& s)
+{
+    return string_fragment(s.c_str(), 0, s.length());
+}
+
+struct frag_hasher {
+    size_t operator()(const string_fragment& sf) const
+    {
+        return hash_str(sf.data(), sf.length());
+    }
+};
+
+#endif
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-15 20:01:36 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-15 20:01:36 +0000
commit	62e4c68907d8d33709c2c1f92a161dff00b3d5f2 (patch)
tree	adbbaf3acf88ea08f6eeec4b75ee98ad3b07fbdc /src/base/intern_string.hh
parent	Initial commit. (diff)
download	lnav-28bf0ff1a7ad38ac9ddb1f5864ba0161b0617471.tar.xz lnav-28bf0ff1a7ad38ac9ddb1f5864ba0161b0617471.zip