1 files changed, 1309 insertions, 0 deletions
diff --git a/wsutil/str_util.c b/wsutil/str_util.c
new file mode 100644
index 0000000..4243b22
--- /dev/null
+++ b/wsutil/str_util.c
@@ -0,0 +1,1309 @@
+/* str_util.c
+ * String utility routines
+ *
+ * Wireshark - Network traffic analyzer
+ * By Gerald Combs <gerald@wireshark.org>
+ * Copyright 1998 Gerald Combs
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#define _GNU_SOURCE
+#include "config.h"
+#include "str_util.h"
+
+#include <string.h>
+
+#include <ws_codepoints.h>
+
+#include <wsutil/to_str.h>
+
+
+static const char hex[16] = { '0', '1', '2', '3', '4', '5', '6', '7',
+                              '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
+
+char *
+wmem_strconcat(wmem_allocator_t *allocator, const char *first, ...)
+{
+    size_t  len;
+    va_list args;
+    char    *s;
+    char    *concat;
+    char    *ptr;
+
+    if (!first)
+        return NULL;
+
+    len = 1 + strlen(first);
+    va_start(args, first);
+    while ((s = va_arg(args, char*))) {
+        len += strlen(s);
+    }
+    va_end(args);
+
+    ptr = concat = (char *)wmem_alloc(allocator, len);
+
+    ptr = g_stpcpy(ptr, first);
+    va_start(args, first);
+    while ((s = va_arg(args, char*))) {
+        ptr = g_stpcpy(ptr, s);
+    }
+    va_end(args);
+
+    return concat;
+}
+
+char *
+wmem_strjoin(wmem_allocator_t *allocator,
+             const char *separator, const char *first, ...)
+{
+    size_t  len;
+    va_list args;
+    size_t separator_len;
+    char    *s;
+    char    *concat;
+    char    *ptr;
+
+    if (!first)
+        return NULL;
+
+    if (separator == NULL) {
+        separator = "";
+    }
+
+    separator_len = strlen (separator);
+
+    len = 1 + strlen(first); /* + 1 for null byte */
+    va_start(args, first);
+    while ((s = va_arg(args, char*))) {
+        len += (separator_len + strlen(s));
+    }
+    va_end(args);
+
+    ptr = concat = (char *)wmem_alloc(allocator, len);
+    ptr = g_stpcpy(ptr, first);
+    va_start(args, first);
+    while ((s = va_arg(args, char*))) {
+        ptr = g_stpcpy(ptr, separator);
+        ptr = g_stpcpy(ptr, s);
+    }
+    va_end(args);
+
+    return concat;
+
+}
+
+char *
+wmem_strjoinv(wmem_allocator_t *allocator,
+              const char *separator, char **str_array)
+{
+    char *string = NULL;
+
+    ws_return_val_if(!str_array, NULL);
+
+    if (separator == NULL) {
+        separator = "";
+    }
+
+    if (str_array[0]) {
+        int i;
+        char *ptr;
+        size_t len, separator_len;
+
+        separator_len = strlen(separator);
+
+        /* Get first part of length. Plus one for null byte. */
+        len = 1 + strlen(str_array[0]);
+        /* Get the full length, including the separators. */
+        for (i = 1; str_array[i] != NULL; i++) {
+            len += separator_len;
+            len += strlen(str_array[i]);
+        }
+
+        /* Allocate and build the string. */
+        string = (char *)wmem_alloc(allocator, len);
+        ptr = g_stpcpy(string, str_array[0]);
+        for (i = 1; str_array[i] != NULL; i++) {
+            ptr = g_stpcpy(ptr, separator);
+            ptr = g_stpcpy(ptr, str_array[i]);
+        }
+    } else {
+        string = wmem_strdup(allocator, "");
+    }
+
+    return string;
+
+}
+
+char **
+wmem_strsplit(wmem_allocator_t *allocator, const char *src,
+        const char *delimiter, int max_tokens)
+{
+    char *splitted;
+    char *s;
+    unsigned tokens;
+    unsigned sep_len;
+    unsigned i;
+    char **vec;
+
+    if (!src || !delimiter || !delimiter[0])
+        return NULL;
+
+    /* An empty string results in an empty vector. */
+    if (!src[0]) {
+        vec = wmem_new0(allocator, char *);
+        return vec;
+    }
+
+    splitted = wmem_strdup(allocator, src);
+    sep_len = (unsigned)strlen(delimiter);
+
+    if (max_tokens < 1)
+        max_tokens = INT_MAX;
+
+    /* Calculate the number of fields. */
+    s = splitted;
+    tokens = 1;
+    while (tokens < (unsigned)max_tokens && (s = strstr(s, delimiter))) {
+        s += sep_len;
+        tokens++;
+    }
+
+    vec = wmem_alloc_array(allocator, char *, tokens + 1);
+
+    /* Populate the array of string tokens. */
+    s = splitted;
+    vec[0] = s;
+    tokens = 1;
+    while (tokens < (unsigned)max_tokens && (s = strstr(s, delimiter))) {
+        for (i = 0; i < sep_len; i++)
+            s[i] = '\0';
+        s += sep_len;
+        vec[tokens] = s;
+        tokens++;
+
+    }
+
+    vec[tokens] = NULL;
+
+    return vec;
+}
+
+/*
+ * wmem_ascii_strdown:
+ * based on g_ascii_strdown.
+ */
+char*
+wmem_ascii_strdown(wmem_allocator_t *allocator, const char *str, ssize_t len)
+{
+    char *result, *s;
+
+    g_return_val_if_fail (str != NULL, NULL);
+
+    if (len < 0)
+        len = strlen (str);
+
+    result = wmem_strndup(allocator, str, len);
+    for (s = result; *s; s++)
+        *s = g_ascii_tolower (*s);
+
+    return result;
+}
+
+int
+ws_xton(char ch)
+{
+    switch (ch) {
+        case '0': return 0;
+        case '1': return 1;
+        case '2': return 2;
+        case '3': return 3;
+        case '4': return 4;
+        case '5': return 5;
+        case '6': return 6;
+        case '7': return 7;
+        case '8': return 8;
+        case '9': return 9;
+        case 'a':  case 'A': return 10;
+        case 'b':  case 'B': return 11;
+        case 'c':  case 'C': return 12;
+        case 'd':  case 'D': return 13;
+        case 'e':  case 'E': return 14;
+        case 'f':  case 'F': return 15;
+        default: return -1;
+    }
+}
+
+/* Convert all ASCII letters to lower case, in place. */
+char *
+ascii_strdown_inplace(char *str)
+{
+    char *s;
+
+    for (s = str; *s; s++)
+        /* What 'g_ascii_tolower (char c)' does, this should be slightly more efficient */
+        *s = g_ascii_isupper (*s) ? *s - 'A' + 'a' : *s;
+
+    return (str);
+}
+
+/* Convert all ASCII letters to upper case, in place. */
+char *
+ascii_strup_inplace(char *str)
+{
+    char *s;
+
+    for (s = str; *s; s++)
+        /* What 'g_ascii_toupper (char c)' does, this should be slightly more efficient */
+        *s = g_ascii_islower (*s) ? *s - 'a' + 'A' : *s;
+
+    return (str);
+}
+
+/* Check if an entire string is printable. */
+bool
+isprint_string(const char *str)
+{
+    unsigned pos;
+
+    /* Loop until we reach the end of the string (a null) */
+    for(pos = 0; str[pos] != '\0'; pos++){
+        if(!g_ascii_isprint(str[pos])){
+            /* The string contains a non-printable character */
+            return false;
+        }
+    }
+
+    /* The string contains only printable characters */
+    return true;
+}
+
+/* Check if an entire UTF-8 string is printable. */
+bool
+isprint_utf8_string(const char *str, const unsigned length)
+{
+    const char *strend = str + length;
+
+    if (!g_utf8_validate(str, length, NULL)) {
+        return false;
+    }
+
+    while (str < strend) {
+        /* This returns false for G_UNICODE_CONTROL | G_UNICODE_FORMAT |
+         * G_UNICODE_UNASSIGNED | G_UNICODE_SURROGATE
+         * XXX: Could it be ok to have certain format characters, e.g.
+         * U+00AD SOFT HYPHEN? If so, format_text() should be changed too.
+         */
+        if (!g_unichar_isprint(g_utf8_get_char(str))) {
+            return false;
+        }
+        str = g_utf8_next_char(str);
+    }
+
+    return true;
+}
+
+/* Check if an entire string is digits. */
+bool
+isdigit_string(const unsigned char *str)
+{
+    unsigned pos;
+
+    /* Loop until we reach the end of the string (a null) */
+    for(pos = 0; str[pos] != '\0'; pos++){
+        if(!g_ascii_isdigit(str[pos])){
+            /* The string contains a non-digit character */
+            return false;
+        }
+    }
+
+    /* The string contains only digits */
+    return true;
+}
+
+const char *
+ws_ascii_strcasestr(const char *haystack, const char *needle)
+{
+    /* Do not use strcasestr() here, even if a system has it, as it is
+     * locale-dependent (and has different results for e.g. Turkic languages.)
+     * FreeBSD, NetBSD, macOS have a strcasestr_l() that could be used.
+     */
+    size_t hlen = strlen(haystack);
+    size_t nlen = strlen(needle);
+
+    while (hlen-- >= nlen) {
+        if (!g_ascii_strncasecmp(haystack, needle, nlen))
+            return haystack;
+        haystack++;
+    }
+    return NULL;
+}
+
+#define FORMAT_SIZE_UNIT_MASK 0x00ff
+#define FORMAT_SIZE_PFX_MASK 0xff00
+
+static const char *thousands_grouping_fmt = NULL;
+
+DIAG_OFF(format)
+static void test_printf_thousands_grouping(void) {
+    /* test whether wmem_strbuf works with "'" flag character */
+    wmem_strbuf_t *buf = wmem_strbuf_new(NULL, NULL);
+    wmem_strbuf_append_printf(buf, "%'d", 22);
+    if (g_strcmp0(wmem_strbuf_get_str(buf), "22") == 0) {
+        thousands_grouping_fmt = "%'"PRId64;
+    } else {
+        /* Don't use */
+        thousands_grouping_fmt = "%"PRId64;
+    }
+    wmem_strbuf_destroy(buf);
+}
+DIAG_ON(format)
+
+/* Given a size, return its value in a human-readable format */
+/* This doesn't handle fractional values. We might want to make size a double. */
+char *
+format_size_wmem(wmem_allocator_t *allocator, int64_t size,
+                        format_size_units_e unit, uint16_t flags)
+{
+    wmem_strbuf_t *human_str = wmem_strbuf_new(allocator, NULL);
+    int power = 1000;
+    int pfx_off = 0;
+    bool is_small = false;
+    static const char *prefix[] = {" T", " G", " M", " k", " Ti", " Gi", " Mi", " Ki"};
+    char *ret_val;
+
+    if (thousands_grouping_fmt == NULL)
+        test_printf_thousands_grouping();
+
+    if (flags & FORMAT_SIZE_PREFIX_IEC) {
+        pfx_off = 4;
+        power = 1024;
+    }
+
+    if (size / power / power / power / power >= 10) {
+        wmem_strbuf_append_printf(human_str, thousands_grouping_fmt, size / power / power / power / power);
+        wmem_strbuf_append(human_str, prefix[pfx_off]);
+    } else if (size / power / power / power >= 10) {
+        wmem_strbuf_append_printf(human_str, thousands_grouping_fmt, size / power / power / power);
+        wmem_strbuf_append(human_str, prefix[pfx_off+1]);
+    } else if (size / power / power >= 10) {
+        wmem_strbuf_append_printf(human_str, thousands_grouping_fmt, size / power / power);
+        wmem_strbuf_append(human_str, prefix[pfx_off+2]);
+    } else if (size / power >= 10) {
+        wmem_strbuf_append_printf(human_str, thousands_grouping_fmt, size / power);
+        wmem_strbuf_append(human_str, prefix[pfx_off+3]);
+    } else {
+        wmem_strbuf_append_printf(human_str, thousands_grouping_fmt, size);
+        is_small = true;
+    }
+
+    switch (unit) {
+        case FORMAT_SIZE_UNIT_NONE:
+            break;
+        case FORMAT_SIZE_UNIT_BYTES:
+            wmem_strbuf_append(human_str, is_small ? " bytes" : "B");
+            break;
+        case FORMAT_SIZE_UNIT_BITS:
+            wmem_strbuf_append(human_str, is_small ? " bits" : "b");
+            break;
+        case FORMAT_SIZE_UNIT_BITS_S:
+            wmem_strbuf_append(human_str, is_small ? " bits/s" : "bps");
+            break;
+        case FORMAT_SIZE_UNIT_BYTES_S:
+            wmem_strbuf_append(human_str, is_small ? " bytes/s" : "Bps");
+            break;
+        case FORMAT_SIZE_UNIT_PACKETS:
+            wmem_strbuf_append(human_str, is_small ? " packets" : "packets");
+            break;
+        case FORMAT_SIZE_UNIT_PACKETS_S:
+            wmem_strbuf_append(human_str, is_small ? " packets/s" : "packets/s");
+            break;
+        default:
+            ws_assert_not_reached();
+    }
+
+    ret_val = wmem_strbuf_finalize(human_str);
+    return g_strchomp(ret_val);
+}
+
+char
+printable_char_or_period(char c)
+{
+    return g_ascii_isprint(c) ? c : '.';
+}
+
+/*
+ * This is used by the display filter engine and must be compatible
+ * with display filter syntax.
+ */
+static inline bool
+escape_char(char c, char *p)
+{
+    int r = -1;
+    ws_assert(p);
+
+    /*
+     * Backslashes and double-quotes must
+     * be escaped. Whitespace is also escaped.
+     */
+    switch (c) {
+        case '\a': r = 'a'; break;
+        case '\b': r = 'b'; break;
+        case '\f': r = 'f'; break;
+        case '\n': r = 'n'; break;
+        case '\r': r = 'r'; break;
+        case '\t': r = 't'; break;
+        case '\v': r = 'v'; break;
+        case '"':  r = '"'; break;
+        case '\\': r = '\\'; break;
+        case '\0': r = '0'; break;
+    }
+
+    if (r != -1) {
+        *p = r;
+        return true;
+    }
+    return false;
+}
+
+static inline bool
+escape_null(char c, char *p)
+{
+    ws_assert(p);
+    if (c == '\0') {
+        *p = '0';
+        return true;
+    }
+    return false;
+}
+
+static char *
+escape_string_len(wmem_allocator_t *alloc, const char *string, ssize_t len,
+                    bool (*escape_func)(char c, char *p), bool add_quotes)
+{
+    char c, r;
+    wmem_strbuf_t *buf;
+    size_t alloc_size;
+    ssize_t i;
+
+    if (len < 0)
+        len = strlen(string);
+
+    alloc_size = len;
+    if (add_quotes)
+        alloc_size += 2;
+
+    buf = wmem_strbuf_new_sized(alloc, alloc_size);
+
+    if (add_quotes)
+        wmem_strbuf_append_c(buf, '"');
+
+    for (i = 0; i < len; i++) {
+        c = string[i];
+        if ((escape_func(c, &r))) {
+            wmem_strbuf_append_c(buf, '\\');
+            wmem_strbuf_append_c(buf, r);
+        }
+        else {
+            /* Other UTF-8 bytes are passed through. */
+            wmem_strbuf_append_c(buf, c);
+        }
+    }
+
+    if (add_quotes)
+        wmem_strbuf_append_c(buf, '"');
+
+    return wmem_strbuf_finalize(buf);
+}
+
+char *
+ws_escape_string_len(wmem_allocator_t *alloc, const char *string, ssize_t len, bool add_quotes)
+{
+    return escape_string_len(alloc, string, len, escape_char, add_quotes);
+}
+
+char *
+ws_escape_string(wmem_allocator_t *alloc, const char *string, bool add_quotes)
+{
+    return escape_string_len(alloc, string, -1, escape_char, add_quotes);
+}
+
+char *ws_escape_null(wmem_allocator_t *alloc, const char *string, size_t len, bool add_quotes)
+{
+    return escape_string_len(alloc, string, len, escape_null, add_quotes);
+}
+
+const char *
+ws_strerrorname_r(int errnum, char *buf, size_t buf_size)
+{
+#ifdef HAVE_STRERRORNAME_NP
+    const char *errstr = strerrorname_np(errnum);
+    if (errstr != NULL) {
+        (void)g_strlcpy(buf, errstr, buf_size);
+        return buf;
+    }
+#endif
+    snprintf(buf, buf_size, "Errno(%d)", errnum);
+    return buf;
+}
+
+char *
+ws_strdup_underline(wmem_allocator_t *allocator, long offset, size_t len)
+{
+    if (offset < 0)
+        return NULL;
+
+    wmem_strbuf_t *buf = wmem_strbuf_new_sized(allocator, offset + len);
+
+    for (int i = 0; i < offset; i++) {
+        wmem_strbuf_append_c(buf, ' ');
+    }
+    wmem_strbuf_append_c(buf, '^');
+
+    for (size_t l = len; l > 1; l--) {
+        wmem_strbuf_append_c(buf, '~');
+    }
+
+    return wmem_strbuf_finalize(buf);
+}
+
+#define    INITIAL_FMTBUF_SIZE    128
+
+/*
+ * Declare, and initialize, the variables used for an output buffer.
+ */
+#define FMTBUF_VARS \
+    char *fmtbuf = (char*)wmem_alloc(allocator, INITIAL_FMTBUF_SIZE); \
+    unsigned fmtbuf_len = INITIAL_FMTBUF_SIZE; \
+    unsigned column = 0
+
+/*
+ * Expand the buffer to be large enough to add nbytes bytes, plus a
+ * terminating '\0'.
+ */
+#define FMTBUF_EXPAND(nbytes) \
+    /* \
+     * Is there enough room for those bytes and also enough room for \
+     * a terminating '\0'? \
+     */ \
+    if (column+(nbytes+1) >= fmtbuf_len) { \
+        /* \
+         * Double the buffer's size if it's not big enough. \
+         * The size of the buffer starts at 128, so doubling its size \
+         * adds at least another 128 bytes, which is more than enough \
+         * for one more character plus a terminating '\0'. \
+         */ \
+        fmtbuf_len *= 2; \
+        fmtbuf = (char *)wmem_realloc(allocator, fmtbuf, fmtbuf_len); \
+    }
+
+/*
+ * Put a byte into the buffer; space must have been ensured for it.
+ */
+#define FMTBUF_PUTCHAR(b) \
+    fmtbuf[column] = (b); \
+    column++
+
+/*
+ * Add the one-byte argument, as an octal escape sequence, to the end
+ * of the buffer.
+ */
+#define FMTBUF_PUTBYTE_OCTAL(b) \
+    FMTBUF_PUTCHAR((((b)>>6)&03) + '0'); \
+    FMTBUF_PUTCHAR((((b)>>3)&07) + '0'); \
+    FMTBUF_PUTCHAR((((b)>>0)&07) + '0')
+
+/*
+ * Add the one-byte argument, as a hex escape sequence, to the end
+ * of the buffer.
+ */
+#define FMTBUF_PUTBYTE_HEX(b) \
+    FMTBUF_PUTCHAR('\\'); \
+    FMTBUF_PUTCHAR('x'); \
+    FMTBUF_PUTCHAR(hex[((b) >> 4) & 0xF]); \
+    FMTBUF_PUTCHAR(hex[((b) >> 0) & 0xF])
+
+/*
+ * Put the trailing '\0' at the end of the buffer.
+ */
+#define FMTBUF_ENDSTR \
+    fmtbuf[column] = '\0'
+
+static char *
+format_text_internal(wmem_allocator_t *allocator,
+                        const unsigned char *string, size_t len,
+                        bool replace_space)
+{
+    FMTBUF_VARS;
+    const unsigned char *stringend = string + len;
+    unsigned char c;
+
+    while (string < stringend) {
+        /*
+         * Get the first byte of this character.
+         */
+        c = *string++;
+        if (g_ascii_isprint(c)) {
+            /*
+             * Printable ASCII, so not part of a multi-byte UTF-8 sequence.
+             * Make sure there's enough room for one more byte, and add
+             * the character.
+             */
+            FMTBUF_EXPAND(1);
+            FMTBUF_PUTCHAR(c);
+        } else if (replace_space && g_ascii_isspace(c)) {
+            /*
+             * ASCII, so not part of a multi-byte UTF-8 sequence, but
+             * not printable, but is a space character; show it as a
+             * blank.
+             *
+             * Make sure there's enough room for one more byte, and add
+             * the blank.
+             */
+            FMTBUF_EXPAND(1);
+            FMTBUF_PUTCHAR(' ');
+        } else if (c < 128) {
+            /*
+             * ASCII, so not part of a multi-byte UTF-8 sequence, but not
+             * printable.
+             *
+             * That requires a minimum of 2 bytes, one for the backslash
+             * and one for a letter, so make sure we have enough room
+             * for that, plus a trailing '\0'.
+             */
+            FMTBUF_EXPAND(2);
+            FMTBUF_PUTCHAR('\\');
+            switch (c) {
+
+                case '\a':
+                    FMTBUF_PUTCHAR('a');
+                    break;
+
+                case '\b':
+                    FMTBUF_PUTCHAR('b'); /* BS */
+                    break;
+
+                case '\f':
+                    FMTBUF_PUTCHAR('f'); /* FF */
+                    break;
+
+                case '\n':
+                    FMTBUF_PUTCHAR('n'); /* NL */
+                    break;
+
+                case '\r':
+                    FMTBUF_PUTCHAR('r'); /* CR */
+                    break;
+
+                case '\t':
+                    FMTBUF_PUTCHAR('t'); /* tab */
+                    break;
+
+                case '\v':
+                    FMTBUF_PUTCHAR('v');
+                    break;
+
+                default:
+                    /*
+                     * We've already put the backslash, but this
+                     * will put 3 more characters for the octal
+                     * number; make sure we have enough room for
+                     * that, plus the trailing '\0'.
+                     */
+                    FMTBUF_EXPAND(3);
+                    FMTBUF_PUTBYTE_OCTAL(c);
+                    break;
+            }
+        } else {
+            /*
+             * We've fetched the first byte of a multi-byte UTF-8
+             * sequence into c.
+             */
+            int utf8_len;
+            unsigned char mask;
+            gunichar uc;
+            unsigned char first;
+
+            if ((c & 0xe0) == 0xc0) {
+                /* Starts a 2-byte UTF-8 sequence; 1 byte left */
+                utf8_len = 1;
+                mask = 0x1f;
+            } else if ((c & 0xf0) == 0xe0) {
+                /* Starts a 3-byte UTF-8 sequence; 2 bytes left */
+                utf8_len = 2;
+                mask = 0x0f;
+            } else if ((c & 0xf8) == 0xf0) {
+                /* Starts a 4-byte UTF-8 sequence; 3 bytes left */
+                utf8_len = 3;
+                mask = 0x07;
+            } else if ((c & 0xfc) == 0xf8) {
+                /* Starts an old-style 5-byte UTF-8 sequence; 4 bytes left */
+                utf8_len = 4;
+                mask = 0x03;
+            } else if ((c & 0xfe) == 0xfc) {
+                /* Starts an old-style 6-byte UTF-8 sequence; 5 bytes left */
+                utf8_len = 5;
+                mask = 0x01;
+            } else {
+                /* 0xfe or 0xff or a continuation byte - not valid */
+                utf8_len = -1;
+            }
+            if (utf8_len > 0) {
+                /* Try to construct the Unicode character */
+                uc = c & mask;
+                for (int i = 0; i < utf8_len; i++) {
+                    if (string >= stringend) {
+                        /*
+                         * Ran out of octets, so the character is
+                         * incomplete.  Put in a REPLACEMENT CHARACTER
+                         * instead, and then continue the loop, which
+                         * will terminate.
+                         */
+                        uc = UNICODE_REPLACEMENT_CHARACTER;
+                        break;
+                    }
+                    c = *string;
+                    if ((c & 0xc0) != 0x80) {
+                        /*
+                         * Not valid UTF-8 continuation character; put in
+                         * a replacement character, and then re-process
+                         * this octet as the beginning of a new character.
+                         */
+                        uc = UNICODE_REPLACEMENT_CHARACTER;
+                        break;
+                    }
+                    string++;
+                    uc = (uc << 6) | (c & 0x3f);
+                }
+
+                /*
+                 * If this isn't a valid Unicode character, put in
+                 * a REPLACEMENT CHARACTER.
+                 */
+                if (!g_unichar_validate(uc))
+                    uc = UNICODE_REPLACEMENT_CHARACTER;
+            } else {
+                /* 0xfe or 0xff; put it a REPLACEMENT CHARACTER */
+                uc = UNICODE_REPLACEMENT_CHARACTER;
+            }
+
+            /*
+             * OK, is it a printable Unicode character?
+             */
+            if (g_unichar_isprint(uc)) {
+                /*
+                 * Yes - put it into the string as UTF-8.
+                 * This means that if it was an overlong
+                 * encoding, this will put out the right
+                 * sized encoding.
+                 */
+                if (uc < 0x80) {
+                    first = 0;
+                    utf8_len = 1;
+                } else if (uc < 0x800) {
+                    first = 0xc0;
+                    utf8_len = 2;
+                } else if (uc < 0x10000) {
+                    first = 0xe0;
+                    utf8_len = 3;
+                } else if (uc < 0x200000) {
+                    first = 0xf0;
+                    utf8_len = 4;
+                } else if (uc < 0x4000000) {
+                    /*
+                     * This should never happen, as Unicode doesn't
+                     * go that high.
+                     */
+                    first = 0xf8;
+                    utf8_len = 5;
+                } else {
+                    /*
+                     * This should never happen, as Unicode doesn't
+                     * go that high.
+                     */
+                    first = 0xfc;
+                    utf8_len = 6;
+                }
+                FMTBUF_EXPAND(utf8_len);
+                for (int i = utf8_len - 1; i > 0; i--) {
+                    fmtbuf[column + i] = (uc & 0x3f) | 0x80;
+                    uc >>= 6;
+                }
+                fmtbuf[column] = uc | first;
+                column += utf8_len;
+            } else if (replace_space && g_unichar_isspace(uc)) {
+                /*
+                 * Not printable, but is a space character; show it
+                 * as a blank.
+                 *
+                 * Make sure there's enough room for one more byte,
+                 * and add the blank.
+                 */
+                FMTBUF_EXPAND(1);
+                FMTBUF_PUTCHAR(' ');
+            } else if (c < 128) {
+                /*
+                 * ASCII, but not printable.
+                 * Yes, this could happen with an overlong encoding.
+                 *
+                 * That requires a minimum of 2 bytes, one for the
+                 * backslash and one for a letter, so make sure we
+                 * have enough room for that, plus a trailing '\0'.
+                 */
+                FMTBUF_EXPAND(2);
+                FMTBUF_PUTCHAR('\\');
+                switch (c) {
+
+                    case '\a':
+                        FMTBUF_PUTCHAR('a');
+                        break;
+
+                    case '\b':
+                        FMTBUF_PUTCHAR('b'); /* BS */
+                        break;
+
+                    case '\f':
+                        FMTBUF_PUTCHAR('f'); /* FF */
+                        break;
+
+                    case '\n':
+                        FMTBUF_PUTCHAR('n'); /* NL */
+                        break;
+
+                    case '\r':
+                        FMTBUF_PUTCHAR('r'); /* CR */
+                        break;
+
+                    case '\t':
+                        FMTBUF_PUTCHAR('t'); /* tab */
+                        break;
+
+                    case '\v':
+                        FMTBUF_PUTCHAR('v');
+                        break;
+
+                    default:
+                        /*
+                         * We've already put the backslash, but this
+                         * will put 3 more characters for the octal
+                         * number; make sure we have enough room for
+                         * that, plus the trailing '\0'.
+                         */
+                        FMTBUF_EXPAND(3);
+                        FMTBUF_PUTBYTE_OCTAL(c);
+                        break;
+                }
+            } else {
+                /*
+                 * Unicode, but not printable, and not ASCII;
+                 * put it out as \uxxxx or \Uxxxxxxxx.
+                 */
+                if (uc <= 0xFFFF) {
+                    FMTBUF_EXPAND(6);
+                    FMTBUF_PUTCHAR('\\');
+                    FMTBUF_PUTCHAR('u');
+                    FMTBUF_PUTCHAR(hex[(uc >> 12) & 0xF]);
+                    FMTBUF_PUTCHAR(hex[(uc >> 8) & 0xF]);
+                    FMTBUF_PUTCHAR(hex[(uc >> 4) & 0xF]);
+                    FMTBUF_PUTCHAR(hex[(uc >> 0) & 0xF]);
+                } else {
+                    FMTBUF_EXPAND(10);
+                    FMTBUF_PUTCHAR('\\');
+                    FMTBUF_PUTCHAR('U');
+                    FMTBUF_PUTCHAR(hex[(uc >> 28) & 0xF]);
+                    FMTBUF_PUTCHAR(hex[(uc >> 24) & 0xF]);
+                    FMTBUF_PUTCHAR(hex[(uc >> 20) & 0xF]);
+                    FMTBUF_PUTCHAR(hex[(uc >> 16) & 0xF]);
+                    FMTBUF_PUTCHAR(hex[(uc >> 12) & 0xF]);
+                    FMTBUF_PUTCHAR(hex[(uc >> 8) & 0xF]);
+                    FMTBUF_PUTCHAR(hex[(uc >> 4) & 0xF]);
+                    FMTBUF_PUTCHAR(hex[(uc >> 0) & 0xF]);
+                }
+            }
+        }
+    }
+
+    FMTBUF_ENDSTR;
+
+    return fmtbuf;
+}
+
+/*
+ * Given a wmem scope, a not-necessarily-null-terminated string,
+ * expected to be in UTF-8 but possibly containing invalid sequences
+ * (as it may have come from packet data), and the length of the string,
+ * generate a valid UTF-8 string from it, allocated in the specified
+ * wmem scope, that:
+ *
+ *   shows printable Unicode characters as themselves;
+ *
+ *   shows non-printable ASCII characters as C-style escapes (octal
+ *   if not one of the standard ones such as LF -> '\n');
+ *
+ *   shows non-printable Unicode-but-not-ASCII characters as
+ *   their universal character names;
+ *
+ *   shows illegal UTF-8 sequences as a sequence of bytes represented
+ *   as C-style hex escapes (XXX: Does not actually do this. Some illegal
+ *   sequences, such as overlong encodings, the sequences reserved for
+ *   UTF-16 surrogate halves (paired or unpaired), and values outside
+ *   Unicode (i.e., the old sequences for code points above U+10FFFF)
+ *   will be decoded in a permissive way. Other illegal sequences,
+ *   such 0xFE and 0xFF and the presence of a continuation byte where
+ *   not expected (or vice versa its absence), are replaced with
+ *   REPLACEMENT CHARACTER.)
+ *
+ * and return a pointer to it.
+ */
+char *
+format_text(wmem_allocator_t *allocator,
+                        const char *string, size_t len)
+{
+    return format_text_internal(allocator, string, len, false);
+}
+
+/** Given a wmem scope and a null-terminated string, expected to be in
+ *  UTF-8 but possibly containing invalid sequences (as it may have come
+ *  from packet data), and the length of the string, generate a valid
+ *  UTF-8 string from it, allocated in the specified wmem scope, that:
+ *
+ *   shows printable Unicode characters as themselves;
+ *
+ *   shows non-printable ASCII characters as C-style escapes (octal
+ *   if not one of the standard ones such as LF -> '\n');
+ *
+ *   shows non-printable Unicode-but-not-ASCII characters as
+ *   their universal character names;
+ *
+ *   shows illegal UTF-8 sequences as a sequence of bytes represented
+ *   as C-style hex escapes;
+ *
+ *  and return a pointer to it.
+ */
+char *
+format_text_string(wmem_allocator_t* allocator, const char *string)
+{
+    return format_text_internal(allocator, string, strlen(string), false);
+}
+
+/*
+ * Given a string, generate a string from it that shows non-printable
+ * characters as C-style escapes except a whitespace character
+ * (space, tab, carriage return, new line, vertical tab, or formfeed)
+ * which will be replaced by a space, and return a pointer to it.
+ */
+char *
+format_text_wsp(wmem_allocator_t* allocator, const char *string, size_t len)
+{
+    return format_text_internal(allocator, string, len, true);
+}
+
+/*
+ * Given a string, generate a string from it that shows non-printable
+ * characters as the chr parameter passed, except a whitespace character
+ * (space, tab, carriage return, new line, vertical tab, or formfeed)
+ * which will be replaced by a space, and return a pointer to it.
+ *
+ * This does *not* treat the input string as UTF-8.
+ *
+ * This is useful for displaying binary data that frequently but not always
+ * contains text; otherwise the number of C escape codes makes it unreadable.
+ */
+char *
+format_text_chr(wmem_allocator_t *allocator, const char *string, size_t len, char chr)
+{
+    wmem_strbuf_t *buf;
+
+    buf = wmem_strbuf_new_sized(allocator, len + 1);
+    for (const char *p = string; p < string + len; p++) {
+        if (g_ascii_isprint(*p)) {
+            wmem_strbuf_append_c(buf, *p);
+        }
+        else if (g_ascii_isspace(*p)) {
+            wmem_strbuf_append_c(buf, ' ');
+        }
+        else {
+            wmem_strbuf_append_c(buf, chr);
+        }
+    }
+    return wmem_strbuf_finalize(buf);
+}
+
+char *
+format_char(wmem_allocator_t *allocator, char c)
+{
+    char *buf;
+    char r;
+
+    if (g_ascii_isprint(c)) {
+        buf = wmem_alloc_array(allocator, char, 2);
+        buf[0] = c;
+        buf[1] = '\0';
+        return buf;
+    }
+    if (escape_char(c, &r)) {
+        buf = wmem_alloc_array(allocator, char, 3);
+        buf[0] = '\\';
+        buf[1] = r;
+        buf[2] = '\0';
+        return buf;
+    }
+    buf = wmem_alloc_array(allocator, char, 5);
+    buf[0] = '\\';
+    buf[1] = 'x';
+    buf[2] = hex[((uint8_t)c >> 4) & 0xF];
+    buf[3] = hex[((uint8_t)c >> 0) & 0xF];
+    buf[4] = '\0';
+    return buf;
+}
+
+char*
+ws_utf8_truncate(char *string, size_t len)
+{
+    char* last_char;
+
+    /* Ensure that it is null terminated */
+    string[len] = '\0';
+    last_char = g_utf8_find_prev_char(string, string + len);
+    if (last_char != NULL && g_utf8_get_char_validated(last_char, -1) == (gunichar)-2) {
+        /* The last UTF-8 character was truncated into a partial sequence. */
+        *last_char = '\0';
+    }
+    return string;
+}
+
+/* ASCII/EBCDIC conversion tables from
+ * https://web.archive.org/web/20060813174742/http://www.room42.com/store/computer_center/code_tables.shtml
+ */
+#if 0
+static const uint8_t ASCII_translate_EBCDIC [ 256 ] = {
+    0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
+    0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
+    0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18,
+    0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
+    0x40, 0x5A, 0x7F, 0x7B, 0x5B, 0x6C, 0x50, 0x7D, 0x4D,
+    0x5D, 0x5C, 0x4E, 0x6B, 0x60, 0x4B, 0x61,
+    0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8,
+    0xF9, 0x7A, 0x5E, 0x4C, 0x7E, 0x6E, 0x6F,
+    0x7C, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8,
+    0xC9, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6,
+    0xD7, 0xD8, 0xD9, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
+    0xE8, 0xE9, 0xAD, 0xE0, 0xBD, 0x5F, 0x6D,
+    0x7D, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88,
+    0x89, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96,
+    0x97, 0x98, 0x99, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7,
+    0xA8, 0xA9, 0xC0, 0x6A, 0xD0, 0xA1, 0x4B,
+    0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B,
+    0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B,
+    0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B,
+    0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B,
+    0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B,
+    0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B,
+    0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B,
+    0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B,
+    0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B,
+    0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B,
+    0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B,
+    0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B,
+    0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B,
+    0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B,
+    0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B,
+    0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B
+};
+
+void
+ASCII_to_EBCDIC(uint8_t *buf, unsigned bytes)
+{
+    unsigned i;
+    uint8_t   *bufptr;
+
+    bufptr = buf;
+
+    for (i = 0; i < bytes; i++, bufptr++) {
+        *bufptr = ASCII_translate_EBCDIC[*bufptr];
+    }
+}
+
+uint8_t
+ASCII_to_EBCDIC1(uint8_t c)
+{
+    return ASCII_translate_EBCDIC[c];
+}
+#endif
+
+static const uint8_t EBCDIC_translate_ASCII [ 256 ] = {
+    0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+    0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
+    0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
+    0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
+    0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27,
+    0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
+    0x2E, 0x2E, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
+    0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x2E, 0x3F,
+    0x20, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E,
+    0x2E, 0x2E, 0x2E, 0x2E, 0x3C, 0x28, 0x2B, 0x7C,
+    0x26, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E,
+    0x2E, 0x2E, 0x21, 0x24, 0x2A, 0x29, 0x3B, 0x5E,
+    0x2D, 0x2F, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E,
+    0x2E, 0x2E, 0x7C, 0x2C, 0x25, 0x5F, 0x3E, 0x3F,
+    0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E,
+    0x2E, 0x2E, 0x3A, 0x23, 0x40, 0x27, 0x3D, 0x22,
+    0x2E, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
+    0x68, 0x69, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E,
+    0x2E, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70,
+    0x71, 0x72, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E,
+    0x2E, 0x7E, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
+    0x79, 0x7A, 0x2E, 0x2E, 0x2E, 0x5B, 0x2E, 0x2E,
+    0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E,
+    0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x5D, 0x2E, 0x2E,
+    0x7B, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
+    0x48, 0x49, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E,
+    0x7D, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50,
+    0x51, 0x52, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E,
+    0x5C, 0x2E, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58,
+    0x59, 0x5A, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E,
+    0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
+    0x38, 0x39, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E
+};
+
+void
+EBCDIC_to_ASCII(uint8_t *buf, unsigned bytes)
+{
+    unsigned   i;
+    uint8_t *bufptr;
+
+    bufptr = buf;
+
+    for (i = 0; i < bytes; i++, bufptr++) {
+        *bufptr = EBCDIC_translate_ASCII[*bufptr];
+    }
+}
+
+uint8_t
+EBCDIC_to_ASCII1(uint8_t c)
+{
+    return EBCDIC_translate_ASCII[c];
+}
+
+/*
+ * This routine is based on a routine created by Dan Lasley
+ * <DLASLEY@PROMUS.com>.
+ *
+ * It was modified for Wireshark by Gilbert Ramirez and others.
+ */
+
+#define MAX_OFFSET_LEN   8       /* max length of hex offset of bytes */
+#define BYTES_PER_LINE  16      /* max byte values printed on a line */
+#define HEX_DUMP_LEN    (BYTES_PER_LINE*3)
+                                /* max number of characters hex dump takes -
+                                   2 digits plus trailing blank */
+#define DATA_DUMP_LEN   (HEX_DUMP_LEN + 2 + 2 + BYTES_PER_LINE)
+                                /* number of characters those bytes take;
+                                   3 characters per byte of hex dump,
+                                   2 blanks separating hex from ASCII,
+                                   2 optional ASCII dump delimiters,
+                                   1 character per byte of ASCII dump */
+#define MAX_LINE_LEN    (MAX_OFFSET_LEN + 2 + DATA_DUMP_LEN)
+                                /* number of characters per line;
+                                   offset, 2 blanks separating offset
+                                   from data dump, data dump */
+
+bool
+hex_dump_buffer(bool (*print_line)(void *, const char *), void *fp,
+                                    const unsigned char *cp, unsigned length,
+                                    hex_dump_enc encoding,
+                                    unsigned ascii_option)
+{
+    register unsigned int ad, i, j, k, l;
+    unsigned char         c;
+    char                  line[MAX_LINE_LEN + 1];
+    unsigned int          use_digits;
+
+    static char binhex[16] = {
+        '0', '1', '2', '3', '4', '5', '6', '7',
+        '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'};
+
+    /*
+     * How many of the leading digits of the offset will we supply?
+     * We always supply at least 4 digits, but if the maximum offset
+     * won't fit in 4 digits, we use as many digits as will be needed.
+     */
+    if (((length - 1) & 0xF0000000) != 0)
+        use_digits = 8; /* need all 8 digits */
+    else if (((length - 1) & 0x0F000000) != 0)
+        use_digits = 7; /* need 7 digits */
+    else if (((length - 1) & 0x00F00000) != 0)
+        use_digits = 6; /* need 6 digits */
+    else if (((length - 1) & 0x000F0000) != 0)
+        use_digits = 5; /* need 5 digits */
+    else
+        use_digits = 4; /* we'll supply 4 digits */
+
+    ad = 0;
+    i = 0;
+    j = 0;
+    k = 0;
+    while (i < length) {
+        if ((i & 15) == 0) {
+            /*
+             * Start of a new line.
+             */
+            j = 0;
+            l = use_digits;
+            do {
+                l--;
+                c = (ad >> (l*4)) & 0xF;
+                line[j++] = binhex[c];
+            } while (l != 0);
+            line[j++] = ' ';
+            line[j++] = ' ';
+            memset(line+j, ' ', DATA_DUMP_LEN);
+
+            /*
+             * Offset in line of ASCII dump.
+             */
+            k = j + HEX_DUMP_LEN + 2;
+            if (ascii_option == HEXDUMP_ASCII_DELIMIT)
+                line[k++] = '|';
+        }
+        c = *cp++;
+        line[j++] = binhex[c>>4];
+        line[j++] = binhex[c&0xf];
+        j++;
+        if (ascii_option != HEXDUMP_ASCII_EXCLUDE ) {
+            if (encoding == HEXDUMP_ENC_EBCDIC) {
+                c = EBCDIC_to_ASCII1(c);
+            }
+            line[k++] = ((c >= ' ') && (c < 0x7f)) ? c : '.';
+        }
+        i++;
+        if (((i & 15) == 0) || (i == length)) {
+            /*
+             * We'll be starting a new line, or
+             * we're finished printing this buffer;
+             * dump out the line we've constructed,
+             * and advance the offset.
+             */
+            if (ascii_option == HEXDUMP_ASCII_DELIMIT)
+                line[k++] = '|';
+            line[k] = '\0';
+            if (!print_line(fp, line))
+                return false;
+            ad += 16;
+        }
+    }
+    return true;
+}
+
+/*
+ * Editor modelines  -  https://www.wireshark.org/tools/modelines.html
+ *
+ * Local variables:
+ * c-basic-offset: 4
+ * tab-width: 8
+ * indent-tabs-mode: nil
+ * End:
+ *
+ * vi: set shiftwidth=4 tabstop=8 expandtab:
+ * :indentSize=4:tabSize=8:noTabs=true:
+ */