1 files changed, 307 insertions, 0 deletions
diff --git a/src/base/string_util.cc b/src/base/string_util.cc
new file mode 100644
index 0000000..d4e0795
--- /dev/null
+++ b/src/base/string_util.cc
@@ -0,0 +1,307 @@
+/**
+ * Copyright (c) 2019, Timothy Stack
+ *
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ * * Neither the name of Timothy Stack nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ''AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <algorithm>
+#include <iterator>
+#include <regex>
+#include <sstream>
+
+#include "string_util.hh"
+
+#include "config.h"
+#include "is_utf8.hh"
+#include "lnav_log.hh"
+
+void
+scrub_to_utf8(char* buffer, size_t length)
+{
+    const char* msg;
+    int faulty_bytes;
+
+    while (true) {
+        auto scan_res
+            = is_utf8((unsigned char*) buffer, length, &msg, &faulty_bytes);
+
+        if (msg == nullptr) {
+            break;
+        }
+        for (int lpc = 0; lpc < faulty_bytes; lpc++) {
+            buffer[scan_res.usr_end + lpc] = '?';
+        }
+    }
+}
+
+void
+quote_content(auto_buffer& buf, const string_fragment& sf, char quote_char)
+{
+    for (char ch : sf) {
+        if (ch == quote_char) {
+            buf.push_back('\\').push_back(ch);
+            continue;
+        }
+        switch (ch) {
+            case '\\':
+                buf.push_back('\\').push_back('\\');
+                break;
+            case '\n':
+                buf.push_back('\\').push_back('n');
+                break;
+            case '\t':
+                buf.push_back('\\').push_back('t');
+                break;
+            case '\r':
+                buf.push_back('\\').push_back('r');
+                break;
+            case '\a':
+                buf.push_back('\\').push_back('a');
+                break;
+            case '\b':
+                buf.push_back('\\').push_back('b');
+                break;
+            default:
+                buf.push_back(ch);
+                break;
+        }
+    }
+}
+
+size_t
+unquote_content(char* dst, const char* str, size_t len, char quote_char)
+{
+    size_t index = 0;
+
+    for (size_t lpc = 0; lpc < len; lpc++, index++) {
+        dst[index] = str[lpc];
+        if (str[lpc] == quote_char) {
+            lpc += 1;
+        } else if (str[lpc] == '\\' && (lpc + 1) < len) {
+            switch (str[lpc + 1]) {
+                case 'n':
+                    dst[index] = '\n';
+                    break;
+                case 'r':
+                    dst[index] = '\r';
+                    break;
+                case 't':
+                    dst[index] = '\t';
+                    break;
+                default:
+                    dst[index] = str[lpc + 1];
+                    break;
+            }
+            lpc += 1;
+        }
+    }
+    dst[index] = '\0';
+
+    return index;
+}
+
+size_t
+unquote(char* dst, const char* str, size_t len)
+{
+    if (str[0] == 'r' || str[0] == 'u') {
+        str += 1;
+        len -= 1;
+    }
+    char quote_char = str[0];
+
+    require(str[0] == '\'' || str[0] == '"');
+
+    return unquote_content(dst, &str[1], len - 2, quote_char);
+}
+
+size_t
+unquote_w3c(char* dst, const char* str, size_t len)
+{
+    size_t index = 0;
+
+    require(str[0] == '\'' || str[0] == '"');
+
+    for (size_t lpc = 1; lpc < (len - 1); lpc++, index++) {
+        dst[index] = str[lpc];
+        if (str[lpc] == '"') {
+            lpc += 1;
+        }
+    }
+    dst[index] = '\0';
+
+    return index;
+}
+
+void
+truncate_to(std::string& str, size_t max_char_len)
+{
+    static const std::string ELLIPSIS = "\u22ef";
+
+    if (str.length() < max_char_len) {
+        return;
+    }
+
+    auto str_char_len_res = utf8_string_length(str);
+
+    if (str_char_len_res.isErr()) {
+        // XXX
+        return;
+    }
+
+    auto str_char_len = str_char_len_res.unwrap();
+    if (str_char_len <= max_char_len) {
+        return;
+    }
+
+    if (max_char_len < 3) {
+        str = ELLIPSIS;
+        return;
+    }
+
+    auto chars_to_remove = (str_char_len - max_char_len) + 1;
+    auto midpoint = str_char_len / 2;
+    auto chars_to_keep_at_front = midpoint - (chars_to_remove / 2);
+    auto bytes_to_keep_at_front
+        = utf8_char_to_byte_index(str, chars_to_keep_at_front);
+    auto remove_up_to_bytes = utf8_char_to_byte_index(
+        str, chars_to_keep_at_front + chars_to_remove);
+    auto bytes_to_remove = remove_up_to_bytes - bytes_to_keep_at_front;
+    str.erase(bytes_to_keep_at_front, bytes_to_remove);
+    str.insert(bytes_to_keep_at_front, ELLIPSIS);
+}
+
+bool
+is_url(const std::string& fn)
+{
+    static const auto url_re = std::regex("^(file|https?|ftps?|scp|sftp):.*");
+
+    return std::regex_match(fn, url_re);
+}
+
+size_t
+abbreviate_str(char* str, size_t len, size_t max_len)
+{
+    size_t last_start = 1;
+
+    if (len < max_len) {
+        return len;
+    }
+
+    for (size_t index = 0; index < len; index++) {
+        switch (str[index]) {
+            case '.':
+            case '-':
+            case '/':
+            case ':':
+                memmove(&str[last_start], &str[index], len - index);
+                len -= (index - last_start);
+                index = last_start + 1;
+                last_start = index + 1;
+
+                if (len < max_len) {
+                    return len;
+                }
+                break;
+        }
+    }
+
+    return len;
+}
+
+void
+split_ws(const std::string& str, std::vector<std::string>& toks_out)
+{
+    std::stringstream ss(str);
+    std::string buf;
+
+    while (ss >> buf) {
+        toks_out.push_back(buf);
+    }
+}
+
+std::string
+repeat(const std::string& input, size_t num)
+{
+    std::ostringstream os;
+    std::fill_n(std::ostream_iterator<std::string>(os), num, input);
+    return os.str();
+}
+
+std::string
+center_str(const std::string& subject, size_t width)
+{
+    std::string retval = subject;
+
+    truncate_to(retval, width);
+
+    auto retval_length = utf8_string_length(retval).unwrapOr(retval.length());
+    auto total_fill = width - retval_length;
+    auto before = total_fill / 2;
+    auto after = total_fill - before;
+
+    retval.insert(0, before, ' ');
+    retval.append(after, ' ');
+
+    return retval;
+}
+
+bool
+is_blank(const std::string& str)
+{
+    return std::all_of(
+        str.begin(), str.end(), [](const auto ch) { return isspace(ch); });
+}
+
+std::string
+scrub_ws(const char* in)
+{
+    static const std::string TAB_SYMBOL = "\u21e5";
+    static const std::string LF_SYMBOL = "\u240a";
+    static const std::string CR_SYMBOL = "\u240d";
+
+    std::string retval;
+
+    for (size_t lpc = 0; in[lpc]; lpc++) {
+        auto ch = in[lpc];
+
+        switch (ch) {
+            case '\t':
+                retval.append(TAB_SYMBOL);
+                break;
+            case '\n':
+                retval.append(LF_SYMBOL);
+                break;
+            case '\r':
+                retval.append(CR_SYMBOL);
+                break;
+            default:
+                retval.append(1, ch);
+                break;
+        }
+    }
+
+    return retval;
+}