summaryrefslogtreecommitdiffstats
path: root/wsutil/str_util.c
diff options
context:
space:
mode:
Diffstat (limited to 'wsutil/str_util.c')
-rw-r--r--wsutil/str_util.c296
1 files changed, 283 insertions, 13 deletions
diff --git a/wsutil/str_util.c b/wsutil/str_util.c
index 4243b22b..556ef5a9 100644
--- a/wsutil/str_util.c
+++ b/wsutil/str_util.c
@@ -13,6 +13,8 @@
#include "str_util.h"
#include <string.h>
+#include <locale.h>
+#include <math.h>
#include <ws_codepoints.h>
@@ -339,10 +341,41 @@ ws_ascii_strcasestr(const char *haystack, const char *needle)
return NULL;
}
+/* Return the last occurrence of ch in the n bytes of haystack.
+ * If not found or n is 0, return NULL. */
+const uint8_t *
+ws_memrchr(const void *_haystack, int ch, size_t n)
+{
+#ifdef HAVE_MEMRCHR
+ return memrchr(_haystack, ch, n);
+#else
+ /* A generic implementation. This could be optimized considerably,
+ * e.g. by fetching a word at a time.
+ */
+ if (n == 0) {
+ return NULL;
+ }
+ const uint8_t *haystack = _haystack;
+ const uint8_t *p;
+ uint8_t c = (uint8_t)ch;
+
+ const uint8_t *const end = haystack + n - 1;
+
+ for (p = end; p >= haystack; --p) {
+ if (*p == c) {
+ return p;
+ }
+ }
+
+ return NULL;
+#endif /* HAVE_MEMRCHR */
+}
+
#define FORMAT_SIZE_UNIT_MASK 0x00ff
#define FORMAT_SIZE_PFX_MASK 0xff00
-static const char *thousands_grouping_fmt = NULL;
+static const char *thousands_grouping_fmt;
+static const char *thousands_grouping_fmt_flt;
DIAG_OFF(format)
static void test_printf_thousands_grouping(void) {
@@ -351,16 +384,213 @@ static void test_printf_thousands_grouping(void) {
wmem_strbuf_append_printf(buf, "%'d", 22);
if (g_strcmp0(wmem_strbuf_get_str(buf), "22") == 0) {
thousands_grouping_fmt = "%'"PRId64;
+ thousands_grouping_fmt_flt = "%'.*f";
} else {
/* Don't use */
thousands_grouping_fmt = "%"PRId64;
+ thousands_grouping_fmt_flt = "%.*f";
}
wmem_strbuf_destroy(buf);
}
DIAG_ON(format)
+static const char* decimal_point = NULL;
+
+static void truncate_numeric_strbuf(wmem_strbuf_t *strbuf, int n) {
+
+ const char *s = wmem_strbuf_get_str(strbuf);
+ char *p;
+ int count;
+
+ if (decimal_point == NULL) {
+ decimal_point = localeconv()->decimal_point;
+ }
+
+ p = strchr(s, decimal_point[0]);
+ if (p != NULL) {
+ count = n;
+ while (count >= 0) {
+ count--;
+ if (*p == '\0')
+ break;
+ p++;
+ }
+
+ p--;
+ while (*p == '0') {
+ p--;
+ }
+
+ if (*p != decimal_point[0]) {
+ p++;
+ }
+ wmem_strbuf_truncate(strbuf, p - s);
+ }
+}
+
+/* Given a floating point value, return it in a human-readable format,
+ * using units with metric prefixes (falling back to scientific notation
+ * with the base units if outside the range.)
+ */
+char *
+format_units(wmem_allocator_t *allocator, double size,
+ format_size_units_e unit, uint16_t flags,
+ int precision)
+{
+ wmem_strbuf_t *human_str = wmem_strbuf_new(allocator, NULL);
+ double power = 1000.0;
+ int pfx_off = 6;
+ bool is_small = false;
+ /* is_small is when to use the longer, spelled out unit.
+ * We use it for inf, NaN, 0, and unprefixed small values,
+ * but not for unprefixed values using scientific notation
+ * the value is outside the supported prefix range.
+ */
+ bool scientific = false;
+ double abs_size = fabs(size);
+ int exponent = 0;
+ static const char * const si_prefix[] = {" a", " f", " p", " n", " μ", " m", " ", " k", " M", " G", " T", " P", " E"};
+ static const char * const iec_prefix[] = {" ", " Ki", " Mi", " Gi", " Ti", " Pi", " Ei"};
+ const char * const *prefix = si_prefix;
+ int max_exp = (int)G_N_ELEMENTS(si_prefix) - 1;
+
+ char *ret_val;
+
+ if (thousands_grouping_fmt == NULL)
+ test_printf_thousands_grouping();
+
+ if (flags & FORMAT_SIZE_PREFIX_IEC) {
+ prefix = iec_prefix;
+ max_exp = (int)G_N_ELEMENTS(iec_prefix) - 1;
+ power = 1024.0;
+ }
+
+ if (isfinite(size) && size != 0.0) {
+
+ double comp = precision == 0 ? 10.0 : 1.0;
+
+ /* For precision 0, use the range [10, 10*power) because only
+ * one significant digit is not as useful. This is what format_size
+ * does for integers. ("ls -h" uses one digit after the decimal
+ * point only for the [1, 10) range, g_format_size() always displays
+ * tenths.) Prefer non-prefixed units for the range [1,10), though.
+ *
+ * We have a limited number of units to check, so this (which
+ * can be unrolled) is presumably faster than log + floor + pow/exp
+ */
+ if (abs_size < 1.0) {
+ while (abs_size < comp) {
+ abs_size *= power;
+ exponent--;
+ if ((exponent + pfx_off) < 0) {
+ scientific = true;
+ break;
+ }
+ }
+ } else {
+ while (abs_size >= comp*power) {
+ abs_size *= 1/power;
+ exponent++;
+ if ((exponent + pfx_off) > max_exp) {
+ scientific = true;
+ break;
+ }
+ }
+ }
+ }
+
+ if (scientific) {
+ wmem_strbuf_append_printf(human_str, "%.*g", precision + 1, size);
+ exponent = 0;
+ } else {
+ if (exponent == 0) {
+ is_small = true;
+ }
+ size = copysign(abs_size, size);
+ // Truncate trailing zeros, but do it this way because we know
+ // we don't want scientific notation, and we don't want %g to
+ // switch to that if precision is small. (We could always use
+ // %g when precision is large.)
+ wmem_strbuf_append_printf(human_str, thousands_grouping_fmt_flt, precision, size);
+ truncate_numeric_strbuf(human_str, precision);
+ // XXX - when rounding to a certain precision, printf might
+ // round up to "power" from something like 999.99999995, which
+ // looks a little odd on a graph when transitioning from 1,000 bytes
+ // (for values just under 1 kB) to 1 kB (for values 1 kB and larger.)
+ // Due to edge cases in binary fp representation and how printf might
+ // round things, the right way to handle it is taking the printf output
+ // and comparing it to "1000" and "1024" and adjusting the exponent
+ // if so - though we need to compare to the version with the thousands
+ // separator if we have that (which makes it harder to use strnatcmp
+ // as is.)
+ }
+
+ if ((size_t)(pfx_off + exponent) < G_N_ELEMENTS(si_prefix)) {
+ wmem_strbuf_append(human_str, prefix[pfx_off+exponent]);
+ }
+
+ switch (unit) {
+ case FORMAT_SIZE_UNIT_NONE:
+ break;
+ case FORMAT_SIZE_UNIT_BYTES:
+ wmem_strbuf_append(human_str, is_small ? "bytes" : "B");
+ break;
+ case FORMAT_SIZE_UNIT_BITS:
+ wmem_strbuf_append(human_str, is_small ? "bits" : "b");
+ break;
+ case FORMAT_SIZE_UNIT_BITS_S:
+ wmem_strbuf_append(human_str, is_small ? "bits/s" : "bps");
+ break;
+ case FORMAT_SIZE_UNIT_BYTES_S:
+ wmem_strbuf_append(human_str, is_small ? "bytes/s" : "Bps");
+ break;
+ case FORMAT_SIZE_UNIT_PACKETS:
+ wmem_strbuf_append(human_str, is_small ? "packets" : "packets");
+ break;
+ case FORMAT_SIZE_UNIT_PACKETS_S:
+ wmem_strbuf_append(human_str, is_small ? "packets/s" : "packets/s");
+ break;
+ case FORMAT_SIZE_UNIT_EVENTS:
+ wmem_strbuf_append(human_str, is_small ? "events" : "events");
+ break;
+ case FORMAT_SIZE_UNIT_EVENTS_S:
+ wmem_strbuf_append(human_str, is_small ? "events/s" : "events/s");
+ break;
+ case FORMAT_SIZE_UNIT_FIELDS:
+ wmem_strbuf_append(human_str, is_small ? "fields" : "fields");
+ break;
+ case FORMAT_SIZE_UNIT_SECONDS:
+ wmem_strbuf_append(human_str, is_small ? "seconds" : "s");
+ break;
+ case FORMAT_SIZE_UNIT_ERLANGS:
+ wmem_strbuf_append(human_str, is_small ? "erlangs" : "E");
+ break;
+ default:
+ ws_assert_not_reached();
+ }
+
+ ret_val = wmem_strbuf_finalize(human_str);
+ /* Convention is a space between the value and the units. If we have
+ * a prefix, the space is before the prefix. There are two possible
+ * uses of FORMAT_SIZE_UNIT_NONE:
+ * 1. Add a unit immediately after the string returned. In this case,
+ * we would want the string to end with a space if there's no prefix.
+ * 2. The unit appears somewhere else, e.g. in a legend, header, or
+ * different column. In this case, we don't want the string to end
+ * with a space if there's no prefix.
+ * chomping the string here, as we've traditionally done, optimizes for
+ * the latter case but makes the former case harder.
+ * Perhaps the right approach is to distinguish the cases with a new
+ * enum value.
+ */
+ return g_strchomp(ret_val);
+}
+
/* Given a size, return its value in a human-readable format */
-/* This doesn't handle fractional values. We might want to make size a double. */
+/* This doesn't handle fractional values. We might want to just
+ * call the version with the double and precision 0 (possibly
+ * slower due to the use of floating point math, but do we care?)
+ */
char *
format_size_wmem(wmem_allocator_t *allocator, int64_t size,
format_size_units_e unit, uint16_t flags)
@@ -418,6 +648,18 @@ format_size_wmem(wmem_allocator_t *allocator, int64_t size,
case FORMAT_SIZE_UNIT_PACKETS_S:
wmem_strbuf_append(human_str, is_small ? " packets/s" : "packets/s");
break;
+ case FORMAT_SIZE_UNIT_FIELDS:
+ wmem_strbuf_append(human_str, is_small ? " fields" : "fields");
+ break;
+ /* These aren't that practical to use with integers, but
+ * perhaps better than asserting.
+ */
+ case FORMAT_SIZE_UNIT_SECONDS:
+ wmem_strbuf_append(human_str, is_small ? " seconds" : "s");
+ break;
+ case FORMAT_SIZE_UNIT_ERLANGS:
+ wmem_strbuf_append(human_str, is_small ? " erlangs" : "E");
+ break;
default:
ws_assert_not_reached();
}
@@ -443,8 +685,9 @@ escape_char(char c, char *p)
ws_assert(p);
/*
- * Backslashes and double-quotes must
- * be escaped. Whitespace is also escaped.
+ * backslashes and double-quotes must be escaped (double-quotes
+ * are escaped by passing '"' as quote_char in escape_string_len)
+ * whitespace is also escaped.
*/
switch (c) {
case '\a': r = 'a'; break;
@@ -454,7 +697,6 @@ escape_char(char c, char *p)
case '\r': r = 'r'; break;
case '\t': r = 't'; break;
case '\v': r = 'v'; break;
- case '"': r = '"'; break;
case '\\': r = '\\'; break;
case '\0': r = '0'; break;
}
@@ -479,7 +721,8 @@ escape_null(char c, char *p)
static char *
escape_string_len(wmem_allocator_t *alloc, const char *string, ssize_t len,
- bool (*escape_func)(char c, char *p), bool add_quotes)
+ bool (*escape_func)(char c, char *p), bool add_quotes,
+ char quote_char, bool double_quote)
{
char c, r;
wmem_strbuf_t *buf;
@@ -495,8 +738,8 @@ escape_string_len(wmem_allocator_t *alloc, const char *string, ssize_t len,
buf = wmem_strbuf_new_sized(alloc, alloc_size);
- if (add_quotes)
- wmem_strbuf_append_c(buf, '"');
+ if (add_quotes && quote_char != '\0')
+ wmem_strbuf_append_c(buf, quote_char);
for (i = 0; i < len; i++) {
c = string[i];
@@ -504,14 +747,30 @@ escape_string_len(wmem_allocator_t *alloc, const char *string, ssize_t len,
wmem_strbuf_append_c(buf, '\\');
wmem_strbuf_append_c(buf, r);
}
+ else if (c == quote_char && quote_char != '\0') {
+ /* If quoting, we must escape the quote_char somehow. */
+ if (double_quote) {
+ wmem_strbuf_append_c(buf, c);
+ wmem_strbuf_append_c(buf, c);
+ } else {
+ wmem_strbuf_append_c(buf, '\\');
+ wmem_strbuf_append_c(buf, c);
+ }
+ }
+ else if (c == '\\' && quote_char != '\0' && !double_quote) {
+ /* If quoting, and escaping the quote_char with a backslash,
+ * then backslash must be escaped, even if escape_func doesn't. */
+ wmem_strbuf_append_c(buf, '\\');
+ wmem_strbuf_append_c(buf, '\\');
+ }
else {
/* Other UTF-8 bytes are passed through. */
wmem_strbuf_append_c(buf, c);
}
}
- if (add_quotes)
- wmem_strbuf_append_c(buf, '"');
+ if (add_quotes && quote_char != '\0')
+ wmem_strbuf_append_c(buf, quote_char);
return wmem_strbuf_finalize(buf);
}
@@ -519,18 +778,29 @@ escape_string_len(wmem_allocator_t *alloc, const char *string, ssize_t len,
char *
ws_escape_string_len(wmem_allocator_t *alloc, const char *string, ssize_t len, bool add_quotes)
{
- return escape_string_len(alloc, string, len, escape_char, add_quotes);
+ return escape_string_len(alloc, string, len, escape_char, add_quotes, '"', false);
}
char *
ws_escape_string(wmem_allocator_t *alloc, const char *string, bool add_quotes)
{
- return escape_string_len(alloc, string, -1, escape_char, add_quotes);
+ return escape_string_len(alloc, string, -1, escape_char, add_quotes, '"', false);
}
char *ws_escape_null(wmem_allocator_t *alloc, const char *string, size_t len, bool add_quotes)
{
- return escape_string_len(alloc, string, len, escape_null, add_quotes);
+ /* XXX: The existing behavior (maintained) here is not to escape
+ * backslashes even though NUL is escaped.
+ */
+ return escape_string_len(alloc, string, len, escape_null, add_quotes, add_quotes ? '"' : '\0', false);
+}
+
+char *ws_escape_csv(wmem_allocator_t *alloc, const char *string, bool add_quotes, char quote_char, bool double_quote, bool escape_whitespace)
+{
+ if (escape_whitespace)
+ return escape_string_len(alloc, string, -1, escape_char, add_quotes, quote_char, double_quote);
+ else
+ return escape_string_len(alloc, string, -1, escape_null, add_quotes, quote_char, double_quote);
}
const char *