/** * Copyright (c) 2014, Timothy Stack * * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * * Redistributions of source code must retain the above copyright notice, this * list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * Neither the name of Timothy Stack nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ''AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * @file intern_string.cc */ #include #include "intern_string.hh" #include #include "config.h" #include "pcrepp/pcre2pp.hh" #include "xxHash/xxhash.h" const static int TABLE_SIZE = 4095; struct intern_string::intern_table { ~intern_table() { for (auto is : this->it_table) { auto curr = is; while (curr != nullptr) { auto next = curr->is_next; delete curr; curr = next; } } } intern_string* it_table[TABLE_SIZE]; }; intern_table_lifetime intern_string::get_table_lifetime() { static intern_table_lifetime retval = std::make_shared(); return retval; } unsigned long hash_str(const char* str, size_t len) { return XXH3_64bits(str, len); } const intern_string* intern_string::lookup(const char* str, ssize_t len) noexcept { unsigned long h; intern_string* curr; if (len == -1) { len = strlen(str); } h = hash_str(str, len) % TABLE_SIZE; { static std::mutex table_mutex; std::lock_guard lk(table_mutex); auto tab = get_table_lifetime(); curr = tab->it_table[h]; while (curr != nullptr) { if (static_cast(curr->is_str.size()) == len && strncmp(curr->is_str.c_str(), str, len) == 0) { return curr; } curr = curr->is_next; } curr = new intern_string(str, len); curr->is_next = tab->it_table[h]; tab->it_table[h] = curr; return curr; } } const intern_string* intern_string::lookup(const string_fragment& sf) noexcept { return lookup(sf.data(), sf.length()); } const intern_string* intern_string::lookup(const std::string& str) noexcept { return lookup(str.c_str(), str.size()); } bool intern_string::startswith(const char* prefix) const { const char* curr = this->is_str.data(); while (*prefix != '\0' && *prefix == *curr) { prefix += 1; curr += 1; } return *prefix == '\0'; } string_fragment string_fragment::trim(const char* tokens) const { string_fragment retval = *this; while (retval.sf_begin < retval.sf_end) { bool found = false; for (int lpc = 0; tokens[lpc] != '\0'; lpc++) { if (retval.sf_string[retval.sf_begin] == tokens[lpc]) { found = true; break; } } if (!found) { break; } retval.sf_begin += 1; } while (retval.sf_begin < retval.sf_end) { bool found = false; for (int lpc = 0; tokens[lpc] != '\0'; lpc++) { if (retval.sf_string[retval.sf_end - 1] == tokens[lpc]) { found = true; break; } } if (!found) { break; } retval.sf_end -= 1; } return retval; } string_fragment string_fragment::trim() const { return this->trim(" \t\r\n"); } nonstd::optional string_fragment::consume_n(int amount) const { if (amount > this->length()) { return nonstd::nullopt; } return string_fragment{ this->sf_string, this->sf_begin + amount, this->sf_end, }; } string_fragment::split_result string_fragment::split_n(int amount) const { if (amount > this->length()) { return nonstd::nullopt; } return std::make_pair( string_fragment{ this->sf_string, this->sf_begin, this->sf_begin + amount, }, string_fragment{ this->sf_string, this->sf_begin + amount, this->sf_end, }); } std::vector string_fragment::split_lines() const { std::vector retval; int start = this->sf_begin; for (auto index = start; index < this->sf_end; index++) { if ((*this)[index] == '\n') { retval.emplace_back(this->sf_string, start, index + 1); start = index + 1; } } retval.emplace_back(this->sf_string, start, this->sf_end); return retval; } Result string_fragment::utf8_length() const { ssize_t retval = 0; for (ssize_t byte_index = this->sf_begin; byte_index < this->sf_end;) { auto ch_size = TRY(ww898::utf::utf8::char_size([this, byte_index]() { return std::make_pair(this->sf_string[byte_index], this->sf_end - byte_index); })); byte_index += ch_size; retval += 1; } return Ok(retval); } string_fragment::case_style string_fragment::detect_text_case_style() const { static const auto LOWER_RE = lnav::pcre2pp::code::from_const(R"(^[^A-Z]+$)"); static const auto UPPER_RE = lnav::pcre2pp::code::from_const(R"(^[^a-z]+$)"); static const auto CAMEL_RE = lnav::pcre2pp::code::from_const(R"(^(?:[A-Z][a-z0-9]+)+$)"); if (LOWER_RE.find_in(*this).ignore_error().has_value()) { return case_style::lower; } if (UPPER_RE.find_in(*this).ignore_error().has_value()) { return case_style::upper; } if (CAMEL_RE.find_in(*this).ignore_error().has_value()) { return case_style::camel; } return case_style::mixed; } std::string string_fragment::to_string_with_case_style(case_style style) const { std::string retval; switch (style) { case case_style::lower: { for (auto ch : *this) { retval.append(1, std::tolower(ch)); } break; } case case_style::upper: { for (auto ch : *this) { retval.append(1, std::toupper(ch)); } break; } case case_style::camel: { retval = this->to_string(); if (!this->empty()) { retval[0] = toupper(retval[0]); } break; } case case_style::mixed: { return this->to_string(); } } return retval; }