diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-04 17:44:55 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-04 17:44:55 +0000 |
commit | 5068d34c08f951a7ea6257d305a1627b09a95817 (patch) | |
tree | 08213e2be853396a3b07ce15dbe222644dcd9a89 /src/base/string_util.cc | |
parent | Initial commit. (diff) | |
download | lnav-upstream.tar.xz lnav-upstream.zip |
Adding upstream version 0.11.1.upstream/0.11.1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
-rw-r--r-- | src/base/string_util.cc | 307 |
1 files changed, 307 insertions, 0 deletions
diff --git a/src/base/string_util.cc b/src/base/string_util.cc new file mode 100644 index 0000000..d4e0795 --- /dev/null +++ b/src/base/string_util.cc @@ -0,0 +1,307 @@ +/** + * Copyright (c) 2019, Timothy Stack + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * * Neither the name of Timothy Stack nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <algorithm> +#include <iterator> +#include <regex> +#include <sstream> + +#include "string_util.hh" + +#include "config.h" +#include "is_utf8.hh" +#include "lnav_log.hh" + +void +scrub_to_utf8(char* buffer, size_t length) +{ + const char* msg; + int faulty_bytes; + + while (true) { + auto scan_res + = is_utf8((unsigned char*) buffer, length, &msg, &faulty_bytes); + + if (msg == nullptr) { + break; + } + for (int lpc = 0; lpc < faulty_bytes; lpc++) { + buffer[scan_res.usr_end + lpc] = '?'; + } + } +} + +void +quote_content(auto_buffer& buf, const string_fragment& sf, char quote_char) +{ + for (char ch : sf) { + if (ch == quote_char) { + buf.push_back('\\').push_back(ch); + continue; + } + switch (ch) { + case '\\': + buf.push_back('\\').push_back('\\'); + break; + case '\n': + buf.push_back('\\').push_back('n'); + break; + case '\t': + buf.push_back('\\').push_back('t'); + break; + case '\r': + buf.push_back('\\').push_back('r'); + break; + case '\a': + buf.push_back('\\').push_back('a'); + break; + case '\b': + buf.push_back('\\').push_back('b'); + break; + default: + buf.push_back(ch); + break; + } + } +} + +size_t +unquote_content(char* dst, const char* str, size_t len, char quote_char) +{ + size_t index = 0; + + for (size_t lpc = 0; lpc < len; lpc++, index++) { + dst[index] = str[lpc]; + if (str[lpc] == quote_char) { + lpc += 1; + } else if (str[lpc] == '\\' && (lpc + 1) < len) { + switch (str[lpc + 1]) { + case 'n': + dst[index] = '\n'; + break; + case 'r': + dst[index] = '\r'; + break; + case 't': + dst[index] = '\t'; + break; + default: + dst[index] = str[lpc + 1]; + break; + } + lpc += 1; + } + } + dst[index] = '\0'; + + return index; +} + +size_t +unquote(char* dst, const char* str, size_t len) +{ + if (str[0] == 'r' || str[0] == 'u') { + str += 1; + len -= 1; + } + char quote_char = str[0]; + + require(str[0] == '\'' || str[0] == '"'); + + return unquote_content(dst, &str[1], len - 2, quote_char); +} + +size_t +unquote_w3c(char* dst, const char* str, size_t len) +{ + size_t index = 0; + + require(str[0] == '\'' || str[0] == '"'); + + for (size_t lpc = 1; lpc < (len - 1); lpc++, index++) { + dst[index] = str[lpc]; + if (str[lpc] == '"') { + lpc += 1; + } + } + dst[index] = '\0'; + + return index; +} + +void +truncate_to(std::string& str, size_t max_char_len) +{ + static const std::string ELLIPSIS = "\u22ef"; + + if (str.length() < max_char_len) { + return; + } + + auto str_char_len_res = utf8_string_length(str); + + if (str_char_len_res.isErr()) { + // XXX + return; + } + + auto str_char_len = str_char_len_res.unwrap(); + if (str_char_len <= max_char_len) { + return; + } + + if (max_char_len < 3) { + str = ELLIPSIS; + return; + } + + auto chars_to_remove = (str_char_len - max_char_len) + 1; + auto midpoint = str_char_len / 2; + auto chars_to_keep_at_front = midpoint - (chars_to_remove / 2); + auto bytes_to_keep_at_front + = utf8_char_to_byte_index(str, chars_to_keep_at_front); + auto remove_up_to_bytes = utf8_char_to_byte_index( + str, chars_to_keep_at_front + chars_to_remove); + auto bytes_to_remove = remove_up_to_bytes - bytes_to_keep_at_front; + str.erase(bytes_to_keep_at_front, bytes_to_remove); + str.insert(bytes_to_keep_at_front, ELLIPSIS); +} + +bool +is_url(const std::string& fn) +{ + static const auto url_re = std::regex("^(file|https?|ftps?|scp|sftp):.*"); + + return std::regex_match(fn, url_re); +} + +size_t +abbreviate_str(char* str, size_t len, size_t max_len) +{ + size_t last_start = 1; + + if (len < max_len) { + return len; + } + + for (size_t index = 0; index < len; index++) { + switch (str[index]) { + case '.': + case '-': + case '/': + case ':': + memmove(&str[last_start], &str[index], len - index); + len -= (index - last_start); + index = last_start + 1; + last_start = index + 1; + + if (len < max_len) { + return len; + } + break; + } + } + + return len; +} + +void +split_ws(const std::string& str, std::vector<std::string>& toks_out) +{ + std::stringstream ss(str); + std::string buf; + + while (ss >> buf) { + toks_out.push_back(buf); + } +} + +std::string +repeat(const std::string& input, size_t num) +{ + std::ostringstream os; + std::fill_n(std::ostream_iterator<std::string>(os), num, input); + return os.str(); +} + +std::string +center_str(const std::string& subject, size_t width) +{ + std::string retval = subject; + + truncate_to(retval, width); + + auto retval_length = utf8_string_length(retval).unwrapOr(retval.length()); + auto total_fill = width - retval_length; + auto before = total_fill / 2; + auto after = total_fill - before; + + retval.insert(0, before, ' '); + retval.append(after, ' '); + + return retval; +} + +bool +is_blank(const std::string& str) +{ + return std::all_of( + str.begin(), str.end(), [](const auto ch) { return isspace(ch); }); +} + +std::string +scrub_ws(const char* in) +{ + static const std::string TAB_SYMBOL = "\u21e5"; + static const std::string LF_SYMBOL = "\u240a"; + static const std::string CR_SYMBOL = "\u240d"; + + std::string retval; + + for (size_t lpc = 0; in[lpc]; lpc++) { + auto ch = in[lpc]; + + switch (ch) { + case '\t': + retval.append(TAB_SYMBOL); + break; + case '\n': + retval.append(LF_SYMBOL); + break; + case '\r': + retval.append(CR_SYMBOL); + break; + default: + retval.append(1, ch); + break; + } + } + + return retval; +} |