summaryrefslogtreecommitdiffstats
path: root/src/libnetdata/string
diff options
context:
space:
mode:
Diffstat (limited to 'src/libnetdata/string')
-rw-r--r--src/libnetdata/string/README.md9
-rw-r--r--src/libnetdata/string/string.c22
-rw-r--r--src/libnetdata/string/string.h17
-rw-r--r--src/libnetdata/string/utf8.c408
-rw-r--r--src/libnetdata/string/utf8.h78
5 files changed, 519 insertions, 15 deletions
diff --git a/src/libnetdata/string/README.md b/src/libnetdata/string/README.md
index 54c905946..c23160233 100644
--- a/src/libnetdata/string/README.md
+++ b/src/libnetdata/string/README.md
@@ -1,12 +1,3 @@
-<!--
-title: "String"
-custom_edit_url: https://github.com/netdata/netdata/edit/master/src/libnetdata/string/README.md
-sidebar_label: "String"
-learn_status: "Published"
-learn_topic_type: "Tasks"
-learn_rel_path: "Developers/libnetdata"
--->
-
# STRING
STRING provides a way to allocate and free text strings, while de-duplicating them.
diff --git a/src/libnetdata/string/string.c b/src/libnetdata/string/string.c
index 257a3cc4b..107c7eea5 100644
--- a/src/libnetdata/string/string.c
+++ b/src/libnetdata/string/string.c
@@ -347,16 +347,34 @@ void string_freez(STRING *string) {
string_stats_atomic_increment(partition, releases);
}
-inline size_t string_strlen(STRING *string) {
+inline size_t string_strlen(const STRING *string) {
if(unlikely(!string)) return 0;
return string->length - 1;
}
-inline const char *string2str(STRING *string) {
+inline const char *string2str(const STRING *string) {
if(unlikely(!string)) return "";
return string->str;
}
+bool string_ends_with_string(const STRING *whole, const STRING *end) {
+ if(whole == end) return true;
+ if(!whole || !end) return false;
+ if(end->length > whole->length) return false;
+ if(end->length == whole->length) return strcmp(string2str(whole), string2str(end)) == 0;
+ const char *we = string2str(whole);
+ we = &we[string_strlen(whole) - string_strlen(end)];
+ return strncmp(we, end->str, string_strlen(end)) == 0;
+}
+
+bool string_starts_with_string(const STRING *whole, const STRING *end) {
+ if(whole == end) return true;
+ if(!whole || !end) return false;
+ if(end->length > whole->length) return false;
+ if(end->length == whole->length) return strcmp(string2str(whole), string2str(end)) == 0;
+ return strncmp(string2str(whole), string2str(end), string_strlen(end)) == 0;
+}
+
STRING *string_2way_merge(STRING *a, STRING *b) {
static STRING *X = NULL;
diff --git a/src/libnetdata/string/string.h b/src/libnetdata/string/string.h
index c44696be2..e86ac6fb5 100644
--- a/src/libnetdata/string/string.h
+++ b/src/libnetdata/string/string.h
@@ -14,8 +14,10 @@ STRING *string_strndupz(const char *str, size_t len);
STRING *string_dup(STRING *string);
void string_freez(STRING *string);
-size_t string_strlen(STRING *string);
-const char *string2str(STRING *string) NEVERNULL;
+size_t string_strlen(const STRING *string);
+const char *string2str(const STRING *string) NEVERNULL;
+bool string_ends_with_string(const STRING *whole, const STRING *end);
+bool string_starts_with_string(const STRING *whole, const STRING *end);
// keep common prefix/suffix and replace everything else with [x]
STRING *string_2way_merge(STRING *a, STRING *b);
@@ -30,10 +32,21 @@ static inline int string_strcmp(STRING *string, const char *s) {
return strcmp(string2str(string), s);
}
+static inline int string_strncmp(STRING *string, const char *s, size_t n) {
+ return strncmp(string2str(string), s, n);
+}
+
void string_statistics(size_t *inserts, size_t *deletes, size_t *searches, size_t *entries, size_t *references, size_t *memory, size_t *duplications, size_t *releases);
int string_unittest(size_t entries);
void string_init(void);
+static inline void cleanup_string_pp(STRING **stringpp) {
+ if(stringpp)
+ string_freez(*stringpp);
+}
+
+#define CLEAN_STRING _cleanup_(cleanup_string_pp) STRING
+
#endif
diff --git a/src/libnetdata/string/utf8.c b/src/libnetdata/string/utf8.c
new file mode 100644
index 000000000..0b4f138a6
--- /dev/null
+++ b/src/libnetdata/string/utf8.c
@@ -0,0 +1,408 @@
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+#include "../libnetdata.h"
+
+#if defined(OS_WINDOWS)
+/*
+ * Convert any CodePage to UTF16
+ * Goals:
+ * 1. Destination is always NULL terminated
+ * 2. If the destination buffer is not enough, return as much as possible data (truncate)
+ * 3. Always return the number of wide characters written, including the null terminator
+ */
+
+size_t any_to_utf16(uint32_t CodePage, wchar_t *dst, size_t dst_size, const char *src, int src_len, bool *truncated) {
+ if(!src || src_len == 0) {
+ // invalid input
+ if(truncated)
+ *truncated = true;
+
+ if(dst && dst_size)
+ *dst = L'\0';
+ return 0;
+ }
+
+ if(!dst || !dst_size) {
+ // the caller wants to know the buffer to allocate for the conversion
+
+ if(truncated)
+ *truncated = true;
+
+ int required = MultiByteToWideChar(CodePage, 0, src, src_len, NULL, 0);
+ if(required <= 0) return 0; // error in the conversion
+
+ // Add 1 for null terminator only if src_len is not -1
+ // so that the caller can call us again to get the entire string (not truncated)
+ return (size_t)required + ((src_len != -1) ? 1 : 0);
+ }
+
+ // do the conversion directly to the destination buffer
+ int rc = MultiByteToWideChar(CodePage, 0, src, src_len, dst, (int)dst_size);
+ if(rc <= 0) {
+ if(truncated)
+ *truncated = true;
+
+ // conversion failed, let's see why...
+ DWORD status = GetLastError();
+ if(status == ERROR_INSUFFICIENT_BUFFER) {
+ // it cannot fit entirely, let's allocate a new buffer to convert it
+ // and then truncate it to the destination buffer
+
+ // clear errno and LastError to clear the error of the
+ // MultiByteToWideChar() that failed
+ errno_clear();
+
+ // get the required size
+ int required_size = MultiByteToWideChar(CodePage, 0, src, src_len, NULL, 0);
+
+ // mallocz() never fails (exits the program on NULL)
+ wchar_t *tmp = mallocz(required_size * sizeof(wchar_t));
+
+ // convert it, now it should fit
+ rc = MultiByteToWideChar(CodePage, 0, src, src_len, tmp, required_size);
+ if (rc <= 0) {
+ // it failed!
+ *dst = L'\0';
+ freez(tmp);
+ return 0;
+ }
+
+ size_t len = rc;
+
+ // copy as much as we can
+ memcpy(dst, tmp, MIN(len, (dst_size - 1)) * sizeof(wchar_t));
+
+ // null terminate it
+ dst[MIN(len, (dst_size - 1))] = L'\0';
+
+ // free the temporary buffer
+ freez(tmp);
+
+ // return the actual bytes written
+ return MIN(len, dst_size);
+ }
+
+ // empty the destination
+ *dst = L'\0';
+ return 0;
+ }
+
+ size_t len = rc;
+
+ if(truncated)
+ *truncated = false;
+
+ if(len >= dst_size) {
+ if(dst[dst_size - 1] != L'\0') {
+ if (truncated)
+ *truncated = true;
+
+ // Truncate it to fit the null terminator
+ dst[dst_size - 1] = L'\0';
+ }
+ return dst_size;
+ }
+
+ if(dst[len - 1] != L'\0') {
+ // the result is not null terminated
+ // append the null
+ dst[len] = L'\0';
+ return len + 1;
+ }
+
+ // the result is already null terminated
+ return len;
+}
+
+/*
+ * Convert UTF16 (wide-character string) to UTF8
+ * Goals:
+ * 1. Destination is always NULL terminated
+ * 2. If the destination buffer is not enough, return as much as possible data (truncate)
+ * 3. Always return the number of bytes written, including the null terminator
+ */
+
+size_t utf16_to_utf8(char *dst, size_t dst_size, const wchar_t *src, int src_len, bool *truncated) {
+ if (!src || src_len == 0) {
+ // invalid input
+ if(truncated)
+ *truncated = true;
+
+ if(dst && dst_size)
+ *dst = '\0';
+
+ return 0;
+ }
+
+ if (!dst || dst_size == 0) {
+ // The caller wants to know the buffer size required for the conversion
+
+ if(truncated)
+ *truncated = true;
+
+ int required = WideCharToMultiByte(CP_UTF8, 0, src, src_len, NULL, 0, NULL, NULL);
+ if (required <= 0) return 0; // error in the conversion
+
+ // Add 1 for null terminator only if src_len is not -1
+ return (size_t)required + ((src_len != -1) ? 1 : 0);
+ }
+
+ // Perform the conversion directly into the destination buffer
+ int rc = WideCharToMultiByte(CP_UTF8, 0, src, src_len, dst, (int)dst_size, NULL, NULL);
+ if (rc <= 0) {
+ if(truncated)
+ *truncated = true;
+
+ // Conversion failed, let's see why...
+ DWORD status = GetLastError();
+ if (status == ERROR_INSUFFICIENT_BUFFER) {
+ // It cannot fit entirely, let's allocate a new buffer to convert it
+ // and then truncate it to the destination buffer
+
+ // Clear errno and LastError to clear the error of the
+ // WideCharToMultiByte() that failed
+ errno_clear();
+
+ // Get the required size
+ int required_size = WideCharToMultiByte(CP_UTF8, 0, src, src_len, NULL, 0, NULL, NULL);
+
+ // mallocz() never fails (exits the program on NULL)
+ char *tmp = mallocz(required_size * sizeof(char));
+
+ // Convert it, now it should fit
+ rc = WideCharToMultiByte(CP_UTF8, 0, src, src_len, tmp, required_size, NULL, NULL);
+ if (rc <= 0) {
+ // Conversion failed
+ *dst = '\0';
+ freez(tmp);
+ return 0;
+ }
+
+ size_t len = rc;
+
+ // Copy as much as we can
+ memcpy(dst, tmp, MIN(len, (dst_size - 1)) * sizeof(char));
+
+ // Null-terminate it
+ dst[MIN(len, (dst_size - 1))] = '\0';
+
+ // Free the temporary buffer
+ freez(tmp);
+
+ // Return the actual bytes written
+ return MIN(len, dst_size);
+ }
+
+ // Empty the destination
+ *dst = '\0';
+ return 0;
+ }
+
+ size_t len = rc;
+
+ if(truncated)
+ *truncated = false;
+
+ if (len >= dst_size) {
+ if(dst[dst_size - 1] != '\0') {
+ if (truncated)
+ *truncated = true;
+
+ // Truncate it to fit the null terminator
+ dst[dst_size - 1] = '\0';
+ }
+ return dst_size;
+ }
+
+ if (dst[len - 1] != '\0') {
+ // The result is not null-terminated
+ // Append the null terminator
+ dst[len] = '\0';
+ return len + 1;
+ }
+
+ // The result is already null-terminated
+ return len;
+}
+
+// --------------------------------------------------------------------------------------------------------------------
+
+size_t txt_compute_new_size(size_t old_size, size_t required_size) {
+ size_t size = (required_size % 2048 == 0) ? required_size : required_size + 2048;
+ size = (size / 2048) * 2048;
+
+ if(size < old_size * 2)
+ size = old_size * 2;
+
+ return size;
+}
+
+// --------------------------------------------------------------------------------------------------------------------
+// TXT_UTF8
+
+void txt_utf8_cleanup(TXT_UTF8 *dst) {
+ freez(dst->data);
+ dst->data = NULL;
+ dst->used = 0;
+}
+
+void txt_utf8_resize(TXT_UTF8 *dst, size_t required_size, bool keep) {
+ if(required_size <= dst->size)
+ return;
+
+ size_t new_size = txt_compute_new_size(dst->size, required_size);
+
+ if(keep && dst->data)
+ dst->data = reallocz(dst->data, new_size);
+ else {
+ txt_utf8_cleanup(dst);
+ dst->data = mallocz(new_size);
+ dst->used = 0;
+ }
+
+ dst->size = new_size;
+}
+
+void txt_utf8_empty(TXT_UTF8 *dst) {
+ txt_utf8_resize(dst, 1, false);
+ dst->data[0] = '\0';
+ dst->used = 1;
+}
+
+void txt_utf8_set(TXT_UTF8 *dst, const char *txt, size_t txt_len) {
+ txt_utf8_resize(dst, txt_len + 1, false);
+ memcpy(dst->data, txt, txt_len);
+ dst->used = txt_len + 1;
+ dst->data[dst->used - 1] = '\0';
+}
+
+void txt_utf8_append(TXT_UTF8 *dst, const char *txt, size_t txt_len) {
+ if(dst->used <= 1) {
+ // the destination is empty
+ txt_utf8_set(dst, txt, txt_len);
+ }
+ else {
+ // there is something already in the buffer
+ txt_utf8_resize(dst, dst->used + txt_len, true);
+ memcpy(&dst->data[dst->used - 1], txt, txt_len);
+ dst->used += txt_len; // the null was already counted
+ dst->data[dst->used - 1] = '\0';
+ }
+}
+
+// --------------------------------------------------------------------------------------------------------------------
+// TXT_UTF16
+
+void txt_utf16_cleanup(TXT_UTF16 *dst) {
+ freez(dst->data);
+}
+
+void txt_utf16_resize(TXT_UTF16 *dst, size_t required_size, bool keep) {
+ if(required_size <= dst->size)
+ return;
+
+ size_t new_size = txt_compute_new_size(dst->size, required_size);
+
+ if (keep && dst->data) {
+ dst->data = reallocz(dst->data, new_size * sizeof(wchar_t));
+ } else {
+ txt_utf16_cleanup(dst);
+ dst->data = mallocz(new_size * sizeof(wchar_t));
+ dst->used = 0;
+ }
+
+ dst->size = new_size;
+}
+
+void txt_utf16_set(TXT_UTF16 *dst, const wchar_t *txt, size_t txt_len) {
+ txt_utf16_resize(dst, dst->used + txt_len + 1, true);
+ memcpy(dst->data, txt, txt_len * sizeof(wchar_t));
+ dst->used = txt_len + 1;
+ dst->data[dst->used - 1] = '\0';
+}
+
+void txt_utf16_append(TXT_UTF16 *dst, const wchar_t *txt, size_t txt_len) {
+ if(dst->used <= 1) {
+ // the destination is empty
+ txt_utf16_set(dst, txt, txt_len);
+ }
+ else {
+ // there is something already in the buffer
+ txt_utf16_resize(dst, dst->used + txt_len, true);
+ memcpy(&dst->data[dst->used - 1], txt, txt_len * sizeof(wchar_t));
+ dst->used += txt_len; // the null was already counted
+ dst->data[dst->used - 1] = '\0';
+ }
+}
+
+// --------------------------------------------------------------------------------------------------------------------
+
+bool wchar_to_txt_utf8(TXT_UTF8 *dst, const wchar_t *src, int src_len) {
+ if(!src || !src_len) {
+ txt_utf8_empty(dst);
+ return false;
+ }
+
+ if(!dst->data && !dst->size) {
+ size_t size = utf16_to_utf8(NULL, 0, src, src_len, NULL);
+ if(!size) {
+ txt_utf8_empty(dst);
+ return false;
+ }
+
+ // we +1 here to avoid entering the next condition below
+ txt_utf8_resize(dst, size, false);
+ }
+
+ bool truncated = false;
+ dst->used = utf16_to_utf8(dst->data, dst->size, src, src_len, &truncated);
+ if(truncated) {
+ // we need to resize
+ size_t needed = utf16_to_utf8(NULL, 0, src, src_len, NULL); // find the size needed
+ if(!needed) {
+ txt_utf8_empty(dst);
+ return false;
+ }
+
+ txt_utf8_resize(dst, needed, false);
+ dst->used = utf16_to_utf8(dst->data, dst->size, src, src_len, NULL);
+ }
+
+ // Make sure it is not zero padded at the end
+ while(dst->used >= 2 && dst->data[dst->used - 2] == 0)
+ dst->used--;
+
+ internal_fatal(strlen(dst->data) + 1 != dst->used,
+ "Wrong UTF8 string length");
+
+ return true;
+}
+
+bool txt_utf16_to_utf8(TXT_UTF8 *utf8, TXT_UTF16 *utf16) {
+ fatal_assert(utf8 && ((utf8->data && utf8->size) || (!utf8->data && !utf8->size)));
+ fatal_assert(utf16 && ((utf16->data && utf16->size) || (!utf16->data && !utf16->size)));
+
+ // pass the entire utf16 size, including the null terminator
+ // so that the resulting utf8 message will be null terminated too.
+ return wchar_to_txt_utf8(utf8, utf16->data, (int)utf16->used - 1);
+}
+
+char *utf16_to_utf8_strdupz(const wchar_t *src, size_t *dst_len) {
+ size_t size = utf16_to_utf8(NULL, 0, src, -1, NULL);
+ if (size) {
+ char *dst = mallocz(size);
+
+ size = utf16_to_utf8(dst, size, src, -1, NULL);
+ if(dst_len)
+ *dst_len = size - 1;
+
+ return dst;
+ }
+
+ if(dst_len)
+ *dst_len = 0;
+
+ return NULL;
+}
+
+#endif
diff --git a/src/libnetdata/string/utf8.h b/src/libnetdata/string/utf8.h
index 3e6c8c288..f27ba5447 100644
--- a/src/libnetdata/string/utf8.h
+++ b/src/libnetdata/string/utf8.h
@@ -3,7 +3,81 @@
#ifndef NETDATA_STRING_UTF8_H
#define NETDATA_STRING_UTF8_H 1
-#define IS_UTF8_BYTE(x) ((x) & 0x80)
-#define IS_UTF8_STARTBYTE(x) (IS_UTF8_BYTE(x)&&((x) & 0x40))
+#include "../libnetdata.h"
+
+#define IS_UTF8_BYTE(x) ((uint8_t)(x) & (uint8_t)0x80)
+#define IS_UTF8_STARTBYTE(x) (IS_UTF8_BYTE(x) && ((uint8_t)(x) & (uint8_t)0x40))
+
+#ifndef _countof
+#define _countof(x) (sizeof(x) / sizeof(*(x)))
+#endif
+
+#if defined(OS_WINDOWS)
+
+// return an always null terminated wide string, truncate to given size if destination is not big enough,
+// src_len can be -1 use all of it.
+// returns zero on errors, > 0 otherwise (including the null, even if src is not null terminated).
+size_t any_to_utf16(uint32_t CodePage, wchar_t *dst, size_t dst_size, const char *src, int src_len, bool *truncated);
+
+// always null terminated, truncated if it does not fit, src_len can be -1 to use all of it.
+// returns zero on errors, > 0 otherwise (including the null, even if src is not null terminated).
+#define utf8_to_utf16(utf16, utf16_count, src, src_len) any_to_utf16(CP_UTF8, utf16, utf16_count, src, src_len, NULL)
+
+// always null terminated, truncated if it does not fit, src_len can be -1 to use all of it.
+// returns zero on errors, > 0 otherwise (including the null, even if src is not null terminated).
+size_t utf16_to_utf8(char *dst, size_t dst_size, const wchar_t *src, int src_len, bool *truncated);
+
+// --------------------------------------------------------------------------------------------------------------------
+// TXT_UTF8
+
+typedef enum __attribute__((packed)) {
+ TXT_SOURCE_UNKNOWN = 0,
+ TXT_SOURCE_PROVIDER,
+ TXT_SOURCE_FIELD_CACHE,
+ TXT_SOURCE_EVENT_LOG,
+ TXT_SOURCE_HARDCODED,
+
+ // terminator
+ TXT_SOURCE_MAX,
+} TXT_SOURCE;
+
+typedef struct {
+ char *data;
+ uint32_t size; // the allocated size of data buffer
+ uint32_t used; // the used size of the data buffer (including null terminators, if any)
+ TXT_SOURCE src;
+} TXT_UTF8;
+
+void txt_utf8_append(TXT_UTF8 *dst, const char *txt, size_t txt_len);
+void txt_utf8_set(TXT_UTF8 *dst, const char *txt, size_t txt_len);
+void txt_utf8_empty(TXT_UTF8 *dst);
+void txt_utf8_resize(TXT_UTF8 *dst, size_t required_size, bool keep);
+void txt_utf8_cleanup(TXT_UTF8 *dst);
+
+// --------------------------------------------------------------------------------------------------------------------
+// TXT_UTF16
+
+typedef struct {
+ wchar_t *data;
+ uint32_t size; // the allocated size of data buffer
+ uint32_t used; // the used size of the data buffer (including null terminators, if any)
+} TXT_UTF16;
+
+void txt_utf16_cleanup(TXT_UTF16 *dst);
+void txt_utf16_resize(TXT_UTF16 *dst, size_t required_size, bool keep);
+void txt_utf16_set(TXT_UTF16 *dst, const wchar_t *txt, size_t txt_len);
+void txt_utf16_append(TXT_UTF16 *dst, const wchar_t *txt, size_t txt_len);
+
+// --------------------------------------------------------------------------------------------------------------------
+
+size_t txt_compute_new_size(size_t old_size, size_t required_size);
+
+bool txt_utf16_to_utf8(TXT_UTF8 *utf8, TXT_UTF16 *utf16);
+bool wchar_to_txt_utf8(TXT_UTF8 *dst, const wchar_t *src, int src_len);
+char *utf16_to_utf8_strdupz(const wchar_t *src, size_t *dst_len);
+
+// --------------------------------------------------------------------------------------------------------------------
+
+#endif // OS_WINDOWS
#endif /* NETDATA_STRING_UTF8_H */