1 files changed, 303 insertions, 0 deletions
diff --git a/src/base/intern_string.cc b/src/base/intern_string.cc
new file mode 100644
index 0000000..8410720
--- /dev/null
+++ b/src/base/intern_string.cc
@@ -0,0 +1,303 @@
+/**
+ * Copyright (c) 2014, Timothy Stack
+ *
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ * * Neither the name of Timothy Stack nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ''AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * @file intern_string.cc
+ */
+
+#include <mutex>
+
+#include "intern_string.hh"
+
+#include <string.h>
+
+#include "config.h"
+#include "pcrepp/pcre2pp.hh"
+#include "xxHash/xxhash.h"
+
+const static int TABLE_SIZE = 4095;
+
+struct intern_string::intern_table {
+    ~intern_table()
+    {
+        for (auto is : this->it_table) {
+            auto curr = is;
+
+            while (curr != nullptr) {
+                auto next = curr->is_next;
+
+                delete curr;
+                curr = next;
+            }
+        }
+    }
+
+    intern_string* it_table[TABLE_SIZE];
+};
+
+intern_table_lifetime
+intern_string::get_table_lifetime()
+{
+    static intern_table_lifetime retval = std::make_shared<intern_table>();
+
+    return retval;
+}
+
+unsigned long
+hash_str(const char* str, size_t len)
+{
+    return XXH3_64bits(str, len);
+}
+
+const intern_string*
+intern_string::lookup(const char* str, ssize_t len) noexcept
+{
+    unsigned long h;
+    intern_string* curr;
+
+    if (len == -1) {
+        len = strlen(str);
+    }
+    h = hash_str(str, len) % TABLE_SIZE;
+
+    {
+        static std::mutex table_mutex;
+
+        std::lock_guard<std::mutex> lk(table_mutex);
+        auto tab = get_table_lifetime();
+
+        curr = tab->it_table[h];
+        while (curr != nullptr) {
+            if (static_cast<ssize_t>(curr->is_str.size()) == len
+                && strncmp(curr->is_str.c_str(), str, len) == 0)
+            {
+                return curr;
+            }
+            curr = curr->is_next;
+        }
+
+        curr = new intern_string(str, len);
+        curr->is_next = tab->it_table[h];
+        tab->it_table[h] = curr;
+
+        return curr;
+    }
+}
+
+const intern_string*
+intern_string::lookup(const string_fragment& sf) noexcept
+{
+    return lookup(sf.data(), sf.length());
+}
+
+const intern_string*
+intern_string::lookup(const std::string& str) noexcept
+{
+    return lookup(str.c_str(), str.size());
+}
+
+bool
+intern_string::startswith(const char* prefix) const
+{
+    const char* curr = this->is_str.data();
+
+    while (*prefix != '\0' && *prefix == *curr) {
+        prefix += 1;
+        curr += 1;
+    }
+
+    return *prefix == '\0';
+}
+
+string_fragment
+string_fragment::trim(const char* tokens) const
+{
+    string_fragment retval = *this;
+
+    while (retval.sf_begin < retval.sf_end) {
+        bool found = false;
+
+        for (int lpc = 0; tokens[lpc] != '\0'; lpc++) {
+            if (retval.sf_string[retval.sf_begin] == tokens[lpc]) {
+                found = true;
+                break;
+            }
+        }
+        if (!found) {
+            break;
+        }
+
+        retval.sf_begin += 1;
+    }
+    while (retval.sf_begin < retval.sf_end) {
+        bool found = false;
+
+        for (int lpc = 0; tokens[lpc] != '\0'; lpc++) {
+            if (retval.sf_string[retval.sf_end - 1] == tokens[lpc]) {
+                found = true;
+                break;
+            }
+        }
+        if (!found) {
+            break;
+        }
+
+        retval.sf_end -= 1;
+    }
+
+    return retval;
+}
+
+string_fragment
+string_fragment::trim() const
+{
+    return this->trim(" \t\r\n");
+}
+
+nonstd::optional<string_fragment>
+string_fragment::consume_n(int amount) const
+{
+    if (amount > this->length()) {
+        return nonstd::nullopt;
+    }
+
+    return string_fragment{
+        this->sf_string,
+        this->sf_begin + amount,
+        this->sf_end,
+    };
+}
+
+string_fragment::split_result
+string_fragment::split_n(int amount) const
+{
+    if (amount > this->length()) {
+        return nonstd::nullopt;
+    }
+
+    return std::make_pair(
+        string_fragment{
+            this->sf_string,
+            this->sf_begin,
+            this->sf_begin + amount,
+        },
+        string_fragment{
+            this->sf_string,
+            this->sf_begin + amount,
+            this->sf_end,
+        });
+}
+
+std::vector<string_fragment>
+string_fragment::split_lines() const
+{
+    std::vector<string_fragment> retval;
+    int start = this->sf_begin;
+
+    for (auto index = start; index < this->sf_end; index++) {
+        if ((*this)[index] == '\n') {
+            retval.emplace_back(this->sf_string, start, index + 1);
+            start = index + 1;
+        }
+    }
+    retval.emplace_back(this->sf_string, start, this->sf_end);
+
+    return retval;
+}
+
+Result<ssize_t, const char*>
+string_fragment::utf8_length() const
+{
+    ssize_t retval = 0;
+
+    for (ssize_t byte_index = this->sf_begin; byte_index < this->sf_end;) {
+        auto ch_size = TRY(ww898::utf::utf8::char_size([this, byte_index]() {
+            return std::make_pair(this->sf_string[byte_index],
+                                  this->sf_end - byte_index);
+        }));
+        byte_index += ch_size;
+        retval += 1;
+    }
+
+    return Ok(retval);
+}
+
+string_fragment::case_style
+string_fragment::detect_text_case_style() const
+{
+    static const auto LOWER_RE
+        = lnav::pcre2pp::code::from_const(R"(^[^A-Z]+$)");
+    static const auto UPPER_RE
+        = lnav::pcre2pp::code::from_const(R"(^[^a-z]+$)");
+    static const auto CAMEL_RE
+        = lnav::pcre2pp::code::from_const(R"(^(?:[A-Z][a-z0-9]+)+$)");
+
+    if (LOWER_RE.find_in(*this).ignore_error().has_value()) {
+        return case_style::lower;
+    }
+    if (UPPER_RE.find_in(*this).ignore_error().has_value()) {
+        return case_style::upper;
+    }
+    if (CAMEL_RE.find_in(*this).ignore_error().has_value()) {
+        return case_style::camel;
+    }
+
+    return case_style::mixed;
+}
+
+std::string
+string_fragment::to_string_with_case_style(case_style style) const
+{
+    std::string retval;
+
+    switch (style) {
+        case case_style::lower: {
+            for (auto ch : *this) {
+                retval.append(1, std::tolower(ch));
+            }
+            break;
+        }
+        case case_style::upper: {
+            for (auto ch : *this) {
+                retval.append(1, std::toupper(ch));
+            }
+            break;
+        }
+        case case_style::camel: {
+            retval = this->to_string();
+            if (!this->empty()) {
+                retval[0] = toupper(retval[0]);
+            }
+            break;
+        }
+        case case_style::mixed: {
+            return this->to_string();
+        }
+    }
+
+    return retval;
+}