From 5068d34c08f951a7ea6257d305a1627b09a95817 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sat, 4 May 2024 19:44:55 +0200 Subject: Adding upstream version 0.11.1. Signed-off-by: Daniel Baumann --- src/base/intern_string.cc | 303 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 303 insertions(+) create mode 100644 src/base/intern_string.cc (limited to 'src/base/intern_string.cc') diff --git a/src/base/intern_string.cc b/src/base/intern_string.cc new file mode 100644 index 0000000..8410720 --- /dev/null +++ b/src/base/intern_string.cc @@ -0,0 +1,303 @@ +/** + * Copyright (c) 2014, Timothy Stack + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * * Neither the name of Timothy Stack nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * @file intern_string.cc + */ + +#include + +#include "intern_string.hh" + +#include + +#include "config.h" +#include "pcrepp/pcre2pp.hh" +#include "xxHash/xxhash.h" + +const static int TABLE_SIZE = 4095; + +struct intern_string::intern_table { + ~intern_table() + { + for (auto is : this->it_table) { + auto curr = is; + + while (curr != nullptr) { + auto next = curr->is_next; + + delete curr; + curr = next; + } + } + } + + intern_string* it_table[TABLE_SIZE]; +}; + +intern_table_lifetime +intern_string::get_table_lifetime() +{ + static intern_table_lifetime retval = std::make_shared(); + + return retval; +} + +unsigned long +hash_str(const char* str, size_t len) +{ + return XXH3_64bits(str, len); +} + +const intern_string* +intern_string::lookup(const char* str, ssize_t len) noexcept +{ + unsigned long h; + intern_string* curr; + + if (len == -1) { + len = strlen(str); + } + h = hash_str(str, len) % TABLE_SIZE; + + { + static std::mutex table_mutex; + + std::lock_guard lk(table_mutex); + auto tab = get_table_lifetime(); + + curr = tab->it_table[h]; + while (curr != nullptr) { + if (static_cast(curr->is_str.size()) == len + && strncmp(curr->is_str.c_str(), str, len) == 0) + { + return curr; + } + curr = curr->is_next; + } + + curr = new intern_string(str, len); + curr->is_next = tab->it_table[h]; + tab->it_table[h] = curr; + + return curr; + } +} + +const intern_string* +intern_string::lookup(const string_fragment& sf) noexcept +{ + return lookup(sf.data(), sf.length()); +} + +const intern_string* +intern_string::lookup(const std::string& str) noexcept +{ + return lookup(str.c_str(), str.size()); +} + +bool +intern_string::startswith(const char* prefix) const +{ + const char* curr = this->is_str.data(); + + while (*prefix != '\0' && *prefix == *curr) { + prefix += 1; + curr += 1; + } + + return *prefix == '\0'; +} + +string_fragment +string_fragment::trim(const char* tokens) const +{ + string_fragment retval = *this; + + while (retval.sf_begin < retval.sf_end) { + bool found = false; + + for (int lpc = 0; tokens[lpc] != '\0'; lpc++) { + if (retval.sf_string[retval.sf_begin] == tokens[lpc]) { + found = true; + break; + } + } + if (!found) { + break; + } + + retval.sf_begin += 1; + } + while (retval.sf_begin < retval.sf_end) { + bool found = false; + + for (int lpc = 0; tokens[lpc] != '\0'; lpc++) { + if (retval.sf_string[retval.sf_end - 1] == tokens[lpc]) { + found = true; + break; + } + } + if (!found) { + break; + } + + retval.sf_end -= 1; + } + + return retval; +} + +string_fragment +string_fragment::trim() const +{ + return this->trim(" \t\r\n"); +} + +nonstd::optional +string_fragment::consume_n(int amount) const +{ + if (amount > this->length()) { + return nonstd::nullopt; + } + + return string_fragment{ + this->sf_string, + this->sf_begin + amount, + this->sf_end, + }; +} + +string_fragment::split_result +string_fragment::split_n(int amount) const +{ + if (amount > this->length()) { + return nonstd::nullopt; + } + + return std::make_pair( + string_fragment{ + this->sf_string, + this->sf_begin, + this->sf_begin + amount, + }, + string_fragment{ + this->sf_string, + this->sf_begin + amount, + this->sf_end, + }); +} + +std::vector +string_fragment::split_lines() const +{ + std::vector retval; + int start = this->sf_begin; + + for (auto index = start; index < this->sf_end; index++) { + if ((*this)[index] == '\n') { + retval.emplace_back(this->sf_string, start, index + 1); + start = index + 1; + } + } + retval.emplace_back(this->sf_string, start, this->sf_end); + + return retval; +} + +Result +string_fragment::utf8_length() const +{ + ssize_t retval = 0; + + for (ssize_t byte_index = this->sf_begin; byte_index < this->sf_end;) { + auto ch_size = TRY(ww898::utf::utf8::char_size([this, byte_index]() { + return std::make_pair(this->sf_string[byte_index], + this->sf_end - byte_index); + })); + byte_index += ch_size; + retval += 1; + } + + return Ok(retval); +} + +string_fragment::case_style +string_fragment::detect_text_case_style() const +{ + static const auto LOWER_RE + = lnav::pcre2pp::code::from_const(R"(^[^A-Z]+$)"); + static const auto UPPER_RE + = lnav::pcre2pp::code::from_const(R"(^[^a-z]+$)"); + static const auto CAMEL_RE + = lnav::pcre2pp::code::from_const(R"(^(?:[A-Z][a-z0-9]+)+$)"); + + if (LOWER_RE.find_in(*this).ignore_error().has_value()) { + return case_style::lower; + } + if (UPPER_RE.find_in(*this).ignore_error().has_value()) { + return case_style::upper; + } + if (CAMEL_RE.find_in(*this).ignore_error().has_value()) { + return case_style::camel; + } + + return case_style::mixed; +} + +std::string +string_fragment::to_string_with_case_style(case_style style) const +{ + std::string retval; + + switch (style) { + case case_style::lower: { + for (auto ch : *this) { + retval.append(1, std::tolower(ch)); + } + break; + } + case case_style::upper: { + for (auto ch : *this) { + retval.append(1, std::toupper(ch)); + } + break; + } + case case_style::camel: { + retval = this->to_string(); + if (!this->empty()) { + retval[0] = toupper(retval[0]); + } + break; + } + case case_style::mixed: { + return this->to_string(); + } + } + + return retval; +} -- cgit v1.2.3