summaryrefslogtreecommitdiffstats
path: root/src/base/intern_string.cc
diff options
context:
space:
mode:
Diffstat (limited to 'src/base/intern_string.cc')
-rw-r--r--src/base/intern_string.cc303
1 files changed, 303 insertions, 0 deletions
diff --git a/src/base/intern_string.cc b/src/base/intern_string.cc
new file mode 100644
index 0000000..8410720
--- /dev/null
+++ b/src/base/intern_string.cc
@@ -0,0 +1,303 @@
+/**
+ * Copyright (c) 2014, Timothy Stack
+ *
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ * * Neither the name of Timothy Stack nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ''AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * @file intern_string.cc
+ */
+
+#include <mutex>
+
+#include "intern_string.hh"
+
+#include <string.h>
+
+#include "config.h"
+#include "pcrepp/pcre2pp.hh"
+#include "xxHash/xxhash.h"
+
+const static int TABLE_SIZE = 4095;
+
+struct intern_string::intern_table {
+ ~intern_table()
+ {
+ for (auto is : this->it_table) {
+ auto curr = is;
+
+ while (curr != nullptr) {
+ auto next = curr->is_next;
+
+ delete curr;
+ curr = next;
+ }
+ }
+ }
+
+ intern_string* it_table[TABLE_SIZE];
+};
+
+intern_table_lifetime
+intern_string::get_table_lifetime()
+{
+ static intern_table_lifetime retval = std::make_shared<intern_table>();
+
+ return retval;
+}
+
+unsigned long
+hash_str(const char* str, size_t len)
+{
+ return XXH3_64bits(str, len);
+}
+
+const intern_string*
+intern_string::lookup(const char* str, ssize_t len) noexcept
+{
+ unsigned long h;
+ intern_string* curr;
+
+ if (len == -1) {
+ len = strlen(str);
+ }
+ h = hash_str(str, len) % TABLE_SIZE;
+
+ {
+ static std::mutex table_mutex;
+
+ std::lock_guard<std::mutex> lk(table_mutex);
+ auto tab = get_table_lifetime();
+
+ curr = tab->it_table[h];
+ while (curr != nullptr) {
+ if (static_cast<ssize_t>(curr->is_str.size()) == len
+ && strncmp(curr->is_str.c_str(), str, len) == 0)
+ {
+ return curr;
+ }
+ curr = curr->is_next;
+ }
+
+ curr = new intern_string(str, len);
+ curr->is_next = tab->it_table[h];
+ tab->it_table[h] = curr;
+
+ return curr;
+ }
+}
+
+const intern_string*
+intern_string::lookup(const string_fragment& sf) noexcept
+{
+ return lookup(sf.data(), sf.length());
+}
+
+const intern_string*
+intern_string::lookup(const std::string& str) noexcept
+{
+ return lookup(str.c_str(), str.size());
+}
+
+bool
+intern_string::startswith(const char* prefix) const
+{
+ const char* curr = this->is_str.data();
+
+ while (*prefix != '\0' && *prefix == *curr) {
+ prefix += 1;
+ curr += 1;
+ }
+
+ return *prefix == '\0';
+}
+
+string_fragment
+string_fragment::trim(const char* tokens) const
+{
+ string_fragment retval = *this;
+
+ while (retval.sf_begin < retval.sf_end) {
+ bool found = false;
+
+ for (int lpc = 0; tokens[lpc] != '\0'; lpc++) {
+ if (retval.sf_string[retval.sf_begin] == tokens[lpc]) {
+ found = true;
+ break;
+ }
+ }
+ if (!found) {
+ break;
+ }
+
+ retval.sf_begin += 1;
+ }
+ while (retval.sf_begin < retval.sf_end) {
+ bool found = false;
+
+ for (int lpc = 0; tokens[lpc] != '\0'; lpc++) {
+ if (retval.sf_string[retval.sf_end - 1] == tokens[lpc]) {
+ found = true;
+ break;
+ }
+ }
+ if (!found) {
+ break;
+ }
+
+ retval.sf_end -= 1;
+ }
+
+ return retval;
+}
+
+string_fragment
+string_fragment::trim() const
+{
+ return this->trim(" \t\r\n");
+}
+
+nonstd::optional<string_fragment>
+string_fragment::consume_n(int amount) const
+{
+ if (amount > this->length()) {
+ return nonstd::nullopt;
+ }
+
+ return string_fragment{
+ this->sf_string,
+ this->sf_begin + amount,
+ this->sf_end,
+ };
+}
+
+string_fragment::split_result
+string_fragment::split_n(int amount) const
+{
+ if (amount > this->length()) {
+ return nonstd::nullopt;
+ }
+
+ return std::make_pair(
+ string_fragment{
+ this->sf_string,
+ this->sf_begin,
+ this->sf_begin + amount,
+ },
+ string_fragment{
+ this->sf_string,
+ this->sf_begin + amount,
+ this->sf_end,
+ });
+}
+
+std::vector<string_fragment>
+string_fragment::split_lines() const
+{
+ std::vector<string_fragment> retval;
+ int start = this->sf_begin;
+
+ for (auto index = start; index < this->sf_end; index++) {
+ if ((*this)[index] == '\n') {
+ retval.emplace_back(this->sf_string, start, index + 1);
+ start = index + 1;
+ }
+ }
+ retval.emplace_back(this->sf_string, start, this->sf_end);
+
+ return retval;
+}
+
+Result<ssize_t, const char*>
+string_fragment::utf8_length() const
+{
+ ssize_t retval = 0;
+
+ for (ssize_t byte_index = this->sf_begin; byte_index < this->sf_end;) {
+ auto ch_size = TRY(ww898::utf::utf8::char_size([this, byte_index]() {
+ return std::make_pair(this->sf_string[byte_index],
+ this->sf_end - byte_index);
+ }));
+ byte_index += ch_size;
+ retval += 1;
+ }
+
+ return Ok(retval);
+}
+
+string_fragment::case_style
+string_fragment::detect_text_case_style() const
+{
+ static const auto LOWER_RE
+ = lnav::pcre2pp::code::from_const(R"(^[^A-Z]+$)");
+ static const auto UPPER_RE
+ = lnav::pcre2pp::code::from_const(R"(^[^a-z]+$)");
+ static const auto CAMEL_RE
+ = lnav::pcre2pp::code::from_const(R"(^(?:[A-Z][a-z0-9]+)+$)");
+
+ if (LOWER_RE.find_in(*this).ignore_error().has_value()) {
+ return case_style::lower;
+ }
+ if (UPPER_RE.find_in(*this).ignore_error().has_value()) {
+ return case_style::upper;
+ }
+ if (CAMEL_RE.find_in(*this).ignore_error().has_value()) {
+ return case_style::camel;
+ }
+
+ return case_style::mixed;
+}
+
+std::string
+string_fragment::to_string_with_case_style(case_style style) const
+{
+ std::string retval;
+
+ switch (style) {
+ case case_style::lower: {
+ for (auto ch : *this) {
+ retval.append(1, std::tolower(ch));
+ }
+ break;
+ }
+ case case_style::upper: {
+ for (auto ch : *this) {
+ retval.append(1, std::toupper(ch));
+ }
+ break;
+ }
+ case case_style::camel: {
+ retval = this->to_string();
+ if (!this->empty()) {
+ retval[0] = toupper(retval[0]);
+ }
+ break;
+ }
+ case case_style::mixed: {
+ return this->to_string();
+ }
+ }
+
+ return retval;
+}