summaryrefslogtreecommitdiffstats
path: root/src/libnetdata/string
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-07-24 09:54:23 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-07-24 09:54:44 +0000
commit836b47cb7e99a977c5a23b059ca1d0b5065d310e (patch)
tree1604da8f482d02effa033c94a84be42bc0c848c3 /src/libnetdata/string
parentReleasing debian version 1.44.3-2. (diff)
downloadnetdata-836b47cb7e99a977c5a23b059ca1d0b5065d310e.tar.xz
netdata-836b47cb7e99a977c5a23b059ca1d0b5065d310e.zip
Merging upstream version 1.46.3.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/libnetdata/string')
-rw-r--r--src/libnetdata/string/README.md25
-rw-r--r--src/libnetdata/string/string.c704
-rw-r--r--src/libnetdata/string/string.h37
-rw-r--r--src/libnetdata/string/utf8.h9
4 files changed, 775 insertions, 0 deletions
diff --git a/src/libnetdata/string/README.md b/src/libnetdata/string/README.md
new file mode 100644
index 000000000..54c905946
--- /dev/null
+++ b/src/libnetdata/string/README.md
@@ -0,0 +1,25 @@
+<!--
+title: "String"
+custom_edit_url: https://github.com/netdata/netdata/edit/master/src/libnetdata/string/README.md
+sidebar_label: "String"
+learn_status: "Published"
+learn_topic_type: "Tasks"
+learn_rel_path: "Developers/libnetdata"
+-->
+
+# STRING
+
+STRING provides a way to allocate and free text strings, while de-duplicating them.
+
+It can be used similarly to libc string functions:
+
+ - `strdup()` and `strdupz()` become `string_strdupz()`.
+ - `strlen()` becomes `string_strlen()` (and it does not walkthrough the bytes of the string).
+ - `free()` and `freez()` become `string_freez()`.
+
+There is also a special `string_dup()` function that increases the reference counter of a STRING, avoiding the
+index lookup to find it.
+
+Once there is a `STRING *`, the actual `const char *` can be accessed with `string2str()`.
+
+All STRING should be constant. Changing the contents of a `const char *` that has been acquired by `string2str()` should never happen.
diff --git a/src/libnetdata/string/string.c b/src/libnetdata/string/string.c
new file mode 100644
index 000000000..94c11f4b9
--- /dev/null
+++ b/src/libnetdata/string/string.c
@@ -0,0 +1,704 @@
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+#include "../libnetdata.h"
+#include <Judy.h>
+
+typedef int32_t REFCOUNT;
+
+// ----------------------------------------------------------------------------
+// STRING implementation - dedup all STRING
+
+#define STRING_PARTITION_SHIFTS (0)
+#define STRING_PARTITIONS (256 >> STRING_PARTITION_SHIFTS)
+#define string_partition_str(str) ((uint8_t)((str)[0]) >> STRING_PARTITION_SHIFTS)
+#define string_partition(string) (string_partition_str((string)->str))
+
+struct netdata_string {
+ uint32_t length; // the string length including the terminating '\0'
+
+ REFCOUNT refcount; // how many times this string is used
+ // We use a signed number to be able to detect duplicate frees of a string.
+ // If at any point this goes below zero, we have a duplicate free.
+
+ const char str[]; // the string itself, is appended to this structure
+};
+
+static struct string_partition {
+ RW_SPINLOCK spinlock; // the R/W spinlock to protect the Judy array
+
+ Pvoid_t JudyHSArray; // the Judy array - hashtable
+
+ size_t inserts; // the number of successful inserts to the index
+ size_t deletes; // the number of successful deleted from the index
+
+ long int entries; // the number of entries in the index
+ long int memory; // the memory used, without the JudyHS index
+
+#ifdef NETDATA_INTERNAL_CHECKS
+ // internal statistics
+
+ struct {
+ size_t searches; // the number of successful searches in the index
+ size_t releases; // when a string is unreferenced
+ size_t duplications; // when a string is referenced
+ long int active_references; // the number of active references alive
+ } atomic;
+
+ size_t found_deleted_on_search;
+ size_t found_available_on_search;
+ size_t found_deleted_on_insert;
+ size_t found_available_on_insert;
+ size_t spins;
+#endif
+
+} string_base[STRING_PARTITIONS] = { 0 };
+
+#ifdef NETDATA_INTERNAL_CHECKS
+#define string_stats_atomic_increment(partition, var) __atomic_add_fetch(&string_base[partition].atomic.var, 1, __ATOMIC_RELAXED)
+#define string_stats_atomic_decrement(partition, var) __atomic_sub_fetch(&string_base[partition].atomic.var, 1, __ATOMIC_RELAXED)
+#define string_internal_stats_add(partition, var, val) __atomic_add_fetch(&string_base[partition].var, val, __ATOMIC_RELAXED)
+#else
+#define string_stats_atomic_increment(partition, var) do {;} while(0)
+#define string_stats_atomic_decrement(partition, var) do {;} while(0)
+#define string_internal_stats_add(partition, var, val) do {;} while(0)
+#endif
+
+void string_statistics(size_t *inserts, size_t *deletes, size_t *searches, size_t *entries, size_t *references, size_t *memory, size_t *duplications, size_t *releases) {
+ if (inserts) *inserts = 0;
+ if (deletes) *deletes = 0;
+ if (searches) *searches = 0;
+ if (entries) *entries = 0;
+ if (references) *references = 0;
+ if (memory) *memory = 0;
+ if (duplications) *duplications = 0;
+ if (releases) *releases = 0;
+
+ for(size_t i = 0; i < STRING_PARTITIONS ;i++) {
+ if (inserts) *inserts += string_base[i].inserts;
+ if (deletes) *deletes += string_base[i].deletes;
+ if (entries) *entries += (size_t) string_base[i].entries;
+ if (memory) *memory += (size_t) string_base[i].memory;
+
+#ifdef NETDATA_INTERNAL_CHECKS
+ if (searches) *searches += string_base[i].atomic.searches;
+ if (references) *references += (size_t) string_base[i].atomic.active_references;
+ if (duplications) *duplications += string_base[i].atomic.duplications;
+ if (releases) *releases += string_base[i].atomic.releases;
+#endif
+ }
+}
+
+#define string_entry_acquire(se) __atomic_add_fetch(&((se)->refcount), 1, __ATOMIC_SEQ_CST)
+#define string_entry_release(se) __atomic_sub_fetch(&((se)->refcount), 1, __ATOMIC_SEQ_CST)
+
+static inline bool string_entry_check_and_acquire(STRING *se) {
+#ifdef NETDATA_INTERNAL_CHECKS
+ uint8_t partition = string_partition(se);
+#endif
+
+ REFCOUNT expected, desired, count = 0;
+
+ expected = __atomic_load_n(&se->refcount, __ATOMIC_SEQ_CST);
+
+ do {
+ count++;
+
+ if(expected <= 0) {
+ // We cannot use this.
+ // The reference counter reached value zero,
+ // so another thread is deleting this.
+ string_internal_stats_add(partition, spins, count - 1);
+ return false;
+ }
+
+ desired = expected + 1;
+
+ } while(!__atomic_compare_exchange_n(&se->refcount, &expected, desired, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST));
+
+ string_internal_stats_add(partition, spins, count - 1);
+
+ // statistics
+ // string_base.active_references is altered at the in string_strdupz() and string_freez()
+ string_stats_atomic_increment(partition, duplications);
+
+ return true;
+}
+
+STRING *string_dup(STRING *string) {
+ if(unlikely(!string)) return NULL;
+
+#ifdef NETDATA_INTERNAL_CHECKS
+ if(unlikely(__atomic_load_n(&string->refcount, __ATOMIC_SEQ_CST) <= 0))
+ fatal("STRING: tried to %s() a string that is freed (it has %d references).", __FUNCTION__, string->refcount);
+#endif
+
+ string_entry_acquire(string);
+
+#ifdef NETDATA_INTERNAL_CHECKS
+ uint8_t partition = string_partition(string);
+#endif
+
+ // statistics
+ string_stats_atomic_increment(partition, active_references);
+ string_stats_atomic_increment(partition, duplications);
+
+ return string;
+}
+
+// Search the index and return an ACQUIRED string entry, or NULL
+static inline STRING *string_index_search(const char *str, size_t length) {
+ STRING *string;
+
+ uint8_t partition = string_partition_str(str);
+
+ // Find the string in the index
+ // With a read-lock so that multiple readers can use the index concurrently.
+
+ rw_spinlock_read_lock(&string_base[partition].spinlock);
+
+ Pvoid_t *Rc;
+ Rc = JudyHSGet(string_base[partition].JudyHSArray, (void *)str, length - 1);
+ if(likely(Rc)) {
+ // found in the hash table
+ string = *Rc;
+
+ if(string_entry_check_and_acquire(string)) {
+ // we can use this entry
+ string_internal_stats_add(partition, found_available_on_search, 1);
+ }
+ else {
+ // this entry is about to be deleted by another thread
+ // do not touch it, let it go...
+ string = NULL;
+ string_internal_stats_add(partition, found_deleted_on_search, 1);
+ }
+ }
+ else {
+ // not found in the hash table
+ string = NULL;
+ }
+
+ string_stats_atomic_increment(partition, searches);
+ rw_spinlock_read_unlock(&string_base[partition].spinlock);
+
+ return string;
+}
+
+// Insert a string to the index and return an ACQUIRED string entry,
+// or NULL if the call needs to be retried (a deleted entry with the same key is still in the index)
+// The returned entry is ACQUIRED, and it can either be:
+// 1. a new item inserted, or
+// 2. an item found in the index that is not currently deleted
+static inline STRING *string_index_insert(const char *str, size_t length) {
+ STRING *string;
+
+ uint8_t partition = string_partition_str(str);
+
+ rw_spinlock_write_lock(&string_base[partition].spinlock);
+
+ STRING **ptr;
+ {
+ JError_t J_Error;
+ Pvoid_t *Rc = JudyHSIns(&string_base[partition].JudyHSArray, (void *)str, length - 1, &J_Error);
+ if (unlikely(Rc == PJERR)) {
+ fatal(
+ "STRING: Cannot insert entry with name '%s' to JudyHS, JU_ERRNO_* == %u, ID == %d",
+ str,
+ JU_ERRNO(&J_Error),
+ JU_ERRID(&J_Error));
+ }
+ ptr = (STRING **)Rc;
+ }
+
+ if (likely(*ptr == 0)) {
+ // a new item added to the index
+ size_t mem_size = sizeof(STRING) + length;
+ string = mallocz(mem_size);
+ strcpy((char *)string->str, str);
+ string->length = length;
+ string->refcount = 1;
+ *ptr = string;
+ string_base[partition].inserts++;
+ string_base[partition].entries++;
+ string_base[partition].memory += (long)(mem_size + JUDYHS_INDEX_SIZE_ESTIMATE(length));
+ }
+ else {
+ // the item is already in the index
+ string = *ptr;
+
+ if(string_entry_check_and_acquire(string)) {
+ // we can use this entry
+ string_internal_stats_add(partition, found_available_on_insert, 1);
+ }
+ else {
+ // this entry is about to be deleted by another thread
+ // do not touch it, let it go...
+ string = NULL;
+ string_internal_stats_add(partition, found_deleted_on_insert, 1);
+ }
+
+ string_stats_atomic_increment(partition, searches);
+ }
+
+ rw_spinlock_write_unlock(&string_base[partition].spinlock);
+ return string;
+}
+
+// delete an entry from the index
+static inline void string_index_delete(STRING *string) {
+ uint8_t partition = string_partition(string);
+
+ rw_spinlock_write_lock(&string_base[partition].spinlock);
+
+#ifdef NETDATA_INTERNAL_CHECKS
+ if(unlikely(__atomic_load_n(&string->refcount, __ATOMIC_SEQ_CST) != 0))
+ fatal("STRING: tried to delete a string at %s() that is already freed (it has %d references).", __FUNCTION__, string->refcount);
+#endif
+
+ bool deleted = false;
+
+ if (likely(string_base[partition].JudyHSArray)) {
+ JError_t J_Error;
+ int ret = JudyHSDel(&string_base[partition].JudyHSArray, (void *)string->str, string->length - 1, &J_Error);
+ if (unlikely(ret == JERR)) {
+ netdata_log_error(
+ "STRING: Cannot delete entry with name '%s' from JudyHS, JU_ERRNO_* == %u, ID == %d",
+ string->str,
+ JU_ERRNO(&J_Error),
+ JU_ERRID(&J_Error));
+ } else
+ deleted = true;
+ }
+
+ if (unlikely(!deleted))
+ netdata_log_error("STRING: tried to delete '%s' that is not in the index. Ignoring it.", string->str);
+ else {
+ size_t mem_size = sizeof(STRING) + string->length;
+ string_base[partition].deletes++;
+ string_base[partition].entries--;
+ string_base[partition].memory -= (long)(mem_size + JUDYHS_INDEX_SIZE_ESTIMATE(string->length));
+ freez(string);
+ }
+
+ rw_spinlock_write_unlock(&string_base[partition].spinlock);
+}
+
+STRING *string_strdupz(const char *str) {
+ if(unlikely(!str || !*str)) return NULL;
+
+#ifdef NETDATA_INTERNAL_CHECKS
+ uint8_t partition = string_partition_str(str);
+#endif
+
+ size_t length = strlen(str) + 1;
+ STRING *string = string_index_search(str, length);
+
+ while(!string) {
+ // The search above did not find anything,
+ // We loop here, because during insert we may find an entry that is being deleted by another thread.
+ // So, we have to let it go and retry to insert it again.
+
+ string = string_index_insert(str, length);
+ }
+
+ // statistics
+ string_stats_atomic_increment(partition, active_references);
+
+ return string;
+}
+
+STRING *string_strndupz(const char *str, size_t len) {
+ if(unlikely(!str || !*str || !len)) return NULL;
+
+#ifdef NETDATA_INTERNAL_CHECKS
+ uint8_t partition = string_partition_str(str);
+#endif
+
+ char buf[len + 1];
+ memcpy(buf, str, len);
+ buf[len] = '\0';
+
+ STRING *string = string_index_search(buf, len + 1);
+ while(!string)
+ string = string_index_insert(buf, len + 1);
+
+ string_stats_atomic_increment(partition, active_references);
+ return string;
+}
+
+void string_freez(STRING *string) {
+ if(unlikely(!string)) return;
+
+#ifdef NETDATA_INTERNAL_CHECKS
+ uint8_t partition = string_partition(string);
+#endif
+ REFCOUNT refcount = string_entry_release(string);
+
+#ifdef NETDATA_INTERNAL_CHECKS
+ if(unlikely(refcount < 0))
+ fatal("STRING: tried to %s() a string that is already freed (it has %d references).", __FUNCTION__, string->refcount);
+#endif
+
+ if(unlikely(refcount == 0))
+ string_index_delete(string);
+
+ // statistics
+ string_stats_atomic_decrement(partition, active_references);
+ string_stats_atomic_increment(partition, releases);
+}
+
+inline size_t string_strlen(STRING *string) {
+ if(unlikely(!string)) return 0;
+ return string->length - 1;
+}
+
+inline const char *string2str(STRING *string) {
+ if(unlikely(!string)) return "";
+ return string->str;
+}
+
+STRING *string_2way_merge(STRING *a, STRING *b) {
+ static STRING *X = NULL;
+
+ if(unlikely(!X)) {
+ X = string_strdupz("[x]");
+ }
+
+ if(unlikely(a == b)) return string_dup(a);
+ if(unlikely(a == X)) return string_dup(a);
+ if(unlikely(b == X)) return string_dup(b);
+ if(unlikely(!a)) return string_dup(X);
+ if(unlikely(!b)) return string_dup(X);
+
+ size_t alen = string_strlen(a);
+ size_t blen = string_strlen(b);
+ size_t length = alen + blen + string_strlen(X) + 1;
+ char buf1[length + 1], buf2[length + 1], *dst1;
+ const char *s1, *s2;
+
+ s1 = string2str(a);
+ s2 = string2str(b);
+ dst1 = buf1;
+ for( ; *s1 && *s2 && *s1 == *s2 ;s1++, s2++)
+ *dst1++ = *s1;
+
+ *dst1 = '\0';
+
+ if(*s1 != '\0' || *s2 != '\0') {
+ *dst1++ = '[';
+ *dst1++ = 'x';
+ *dst1++ = ']';
+
+ s1 = &(string2str(a))[alen - 1];
+ s2 = &(string2str(b))[blen - 1];
+ char *dst2 = &buf2[length];
+ *dst2 = '\0';
+ for (; *s1 && *s2 && *s1 == *s2; s1--, s2--)
+ *(--dst2) = *s1;
+
+ strcpy(dst1, dst2);
+ }
+
+ return string_strdupz(buf1);
+}
+
+// ----------------------------------------------------------------------------
+// STRING unit test
+
+struct thread_unittest {
+ int join;
+ int dups;
+};
+
+static void *string_thread(void *arg) {
+ struct thread_unittest *tu = arg;
+
+ for(; 1 ;) {
+ if(__atomic_load_n(&tu->join, __ATOMIC_RELAXED))
+ break;
+
+ STRING *s = string_strdupz("string thread checking 1234567890");
+
+ for(int i = 0; i < tu->dups ; i++)
+ string_dup(s);
+
+ for(int i = 0; i < tu->dups ; i++)
+ string_freez(s);
+
+ string_freez(s);
+ }
+
+ return arg;
+}
+
+static char **string_unittest_generate_names(size_t entries) {
+ char **names = mallocz(sizeof(char *) * entries);
+ for(size_t i = 0; i < entries ;i++) {
+ char buf[25 + 1] = "";
+ snprintfz(buf, sizeof(buf) - 1, "name.%zu.0123456789.%zu \t !@#$%%^&*(),./[]{}\\|~`", i, entries / 2 + i);
+ names[i] = strdupz(buf);
+ }
+ return names;
+}
+
+static void string_unittest_free_char_pp(char **pp, size_t entries) {
+ for(size_t i = 0; i < entries ;i++)
+ freez(pp[i]);
+
+ freez(pp);
+}
+
+static long unittest_string_entries(void) {
+ long entries = 0;
+ for(size_t p = 0; p < STRING_PARTITIONS ;p++)
+ entries += string_base[p].entries;
+
+ return entries;
+}
+
+#ifdef NETDATA_INTERNAL_CHECKS
+
+static size_t unittest_string_found_deleted_on_search(void) {
+ size_t entries = 0;
+ for(size_t p = 0; p < STRING_PARTITIONS ;p++)
+ entries += string_base[p].found_deleted_on_search;
+
+ return entries;
+}
+static size_t unittest_string_found_available_on_search(void) {
+ size_t entries = 0;
+ for(size_t p = 0; p < STRING_PARTITIONS ;p++)
+ entries += string_base[p].found_available_on_search;
+
+ return entries;
+}
+static size_t unittest_string_found_deleted_on_insert(void) {
+ size_t entries = 0;
+ for(size_t p = 0; p < STRING_PARTITIONS ;p++)
+ entries += string_base[p].found_deleted_on_insert;
+
+ return entries;
+}
+static size_t unittest_string_found_available_on_insert(void) {
+ size_t entries = 0;
+ for(size_t p = 0; p < STRING_PARTITIONS ;p++)
+ entries += string_base[p].found_available_on_insert;
+
+ return entries;
+}
+static size_t unittest_string_spins(void) {
+ size_t entries = 0;
+ for(size_t p = 0; p < STRING_PARTITIONS ;p++)
+ entries += string_base[p].spins;
+
+ return entries;
+}
+
+#endif // NETDATA_INTERNAL_CHECKS
+
+int string_unittest(size_t entries) {
+ size_t errors = 0;
+
+ fprintf(stderr, "Generating %zu names and values...\n", entries);
+ char **names = string_unittest_generate_names(entries);
+
+ // check string
+ {
+ long entries_starting = unittest_string_entries();
+
+ fprintf(stderr, "\nChecking strings...\n");
+
+ STRING *s1 = string_strdupz("hello unittest");
+ STRING *s2 = string_strdupz("hello unittest");
+ if(s1 != s2) {
+ errors++;
+ fprintf(stderr, "ERROR: duplicating strings are not deduplicated\n");
+ }
+ else
+ fprintf(stderr, "OK: duplicating string are deduplicated\n");
+
+ STRING *s3 = string_dup(s1);
+ if(s3 != s1) {
+ errors++;
+ fprintf(stderr, "ERROR: cloning strings are not deduplicated\n");
+ }
+ else
+ fprintf(stderr, "OK: cloning string are deduplicated\n");
+
+ if(s1->refcount != 3) {
+ errors++;
+ fprintf(stderr, "ERROR: string refcount is not 3\n");
+ }
+ else
+ fprintf(stderr, "OK: string refcount is 3\n");
+
+ STRING *s4 = string_strdupz("world unittest");
+ if(s4 == s1) {
+ errors++;
+ fprintf(stderr, "ERROR: string is sharing pointers on different strings\n");
+ }
+ else
+ fprintf(stderr, "OK: string is properly handling different strings\n");
+
+ usec_t start_ut, end_ut;
+ STRING **strings = mallocz(entries * sizeof(STRING *));
+
+ start_ut = now_realtime_usec();
+ for(size_t i = 0; i < entries ;i++) {
+ strings[i] = string_strdupz(names[i]);
+ }
+ end_ut = now_realtime_usec();
+ fprintf(stderr, "Created %zu strings in %"PRIu64" usecs\n", entries, end_ut - start_ut);
+
+ start_ut = now_realtime_usec();
+ for(size_t i = 0; i < entries ;i++) {
+ strings[i] = string_dup(strings[i]);
+ }
+ end_ut = now_realtime_usec();
+ fprintf(stderr, "Cloned %zu strings in %"PRIu64" usecs\n", entries, end_ut - start_ut);
+
+ start_ut = now_realtime_usec();
+ for(size_t i = 0; i < entries ;i++) {
+ strings[i] = string_strdupz(string2str(strings[i]));
+ }
+ end_ut = now_realtime_usec();
+ fprintf(stderr, "Found %zu existing strings in %"PRIu64" usecs\n", entries, end_ut - start_ut);
+
+ start_ut = now_realtime_usec();
+ for(size_t i = 0; i < entries ;i++) {
+ string_freez(strings[i]);
+ }
+ end_ut = now_realtime_usec();
+ fprintf(stderr, "Released %zu referenced strings in %"PRIu64" usecs\n", entries, end_ut - start_ut);
+
+ start_ut = now_realtime_usec();
+ for(size_t i = 0; i < entries ;i++) {
+ string_freez(strings[i]);
+ }
+ end_ut = now_realtime_usec();
+ fprintf(stderr, "Released (again) %zu referenced strings in %"PRIu64" usecs\n", entries, end_ut - start_ut);
+
+ start_ut = now_realtime_usec();
+ for(size_t i = 0; i < entries ;i++) {
+ string_freez(strings[i]);
+ }
+ end_ut = now_realtime_usec();
+ fprintf(stderr, "Freed %zu strings in %"PRIu64" usecs\n", entries, end_ut - start_ut);
+
+ freez(strings);
+
+ if(unittest_string_entries() != entries_starting + 2) {
+ errors++;
+ fprintf(stderr, "ERROR: strings dictionary should have %ld items but it has %ld\n",
+ entries_starting + 2, unittest_string_entries());
+ }
+ else
+ fprintf(stderr, "OK: strings dictionary has 2 items\n");
+ }
+
+ // check 2-way merge
+ {
+ struct testcase {
+ char *src1; char *src2; char *expected;
+ } tests[] = {
+ { "", "", ""},
+ { "a", "", "[x]"},
+ { "", "a", "[x]"},
+ { "a", "a", "a"},
+ { "abcd", "abcd", "abcd"},
+ { "foo_cs", "bar_cs", "[x]_cs"},
+ { "cp_UNIQUE_INFIX_cs", "cp_unique_infix_cs", "cp_[x]_cs"},
+ { "cp_UNIQUE_INFIX_ci_unique_infix_cs", "cp_unique_infix_ci_UNIQUE_INFIX_cs", "cp_[x]_cs"},
+ { "foo[1234]", "foo[4321]", "foo[[x]]"},
+ { NULL, NULL, NULL },
+ };
+
+ for (struct testcase *tc = &tests[0]; tc->expected != NULL; tc++) {
+ STRING *src1 = string_strdupz(tc->src1);
+ STRING *src2 = string_strdupz(tc->src2);
+ STRING *expected = string_strdupz(tc->expected);
+
+ STRING *result = string_2way_merge(src1, src2);
+ if (string_cmp(result, expected) != 0) {
+ fprintf(stderr, "string_2way_merge(\"%s\", \"%s\") -> \"%s\" (expected=\"%s\")\n",
+ string2str(src1),
+ string2str(src2),
+ string2str(result),
+ string2str(expected));
+ errors++;
+ }
+
+ string_freez(src1);
+ string_freez(src2);
+ string_freez(expected);
+ string_freez(result);
+ }
+ }
+
+ // threads testing of string
+ {
+ struct thread_unittest tu = {
+ .dups = 1,
+ .join = 0,
+ };
+
+#ifdef NETDATA_INTERNAL_CHECKS
+ size_t ofound_deleted_on_search = unittest_string_found_deleted_on_search(),
+ ofound_available_on_search = unittest_string_found_available_on_search(),
+ ofound_deleted_on_insert = unittest_string_found_deleted_on_insert(),
+ ofound_available_on_insert = unittest_string_found_available_on_insert(),
+ ospins = unittest_string_spins();
+#endif
+
+ size_t oinserts, odeletes, osearches, oentries, oreferences, omemory, oduplications, oreleases;
+ string_statistics(&oinserts, &odeletes, &osearches, &oentries, &oreferences, &omemory, &oduplications, &oreleases);
+
+ time_t seconds_to_run = 5;
+ int threads_to_create = 2;
+ fprintf(
+ stderr,
+ "Checking string concurrency with %d threads for %lld seconds...\n",
+ threads_to_create,
+ (long long)seconds_to_run);
+ // check string concurrency
+ ND_THREAD *threads[threads_to_create];
+ tu.join = 0;
+ for (int i = 0; i < threads_to_create; i++) {
+ char buf[100 + 1];
+ snprintf(buf, 100, "string%d", i);
+ threads[i] = nd_thread_create(buf, NETDATA_THREAD_OPTION_DONT_LOG | NETDATA_THREAD_OPTION_JOINABLE, string_thread, &tu);
+ }
+ sleep_usec(seconds_to_run * USEC_PER_SEC);
+
+ __atomic_store_n(&tu.join, 1, __ATOMIC_RELAXED);
+ for (int i = 0; i < threads_to_create; i++)
+ nd_thread_join(threads[i]);
+
+ size_t inserts, deletes, searches, sentries, references, memory, duplications, releases;
+ string_statistics(&inserts, &deletes, &searches, &sentries, &references, &memory, &duplications, &releases);
+
+ fprintf(stderr, "inserts %zu, deletes %zu, searches %zu, entries %zu, references %zu, memory %zu, duplications %zu, releases %zu\n",
+ inserts - oinserts, deletes - odeletes, searches - osearches, sentries - oentries, references - oreferences, memory - omemory, duplications - oduplications, releases - oreleases);
+
+#ifdef NETDATA_INTERNAL_CHECKS
+ size_t found_deleted_on_search = unittest_string_found_deleted_on_search(),
+ found_available_on_search = unittest_string_found_available_on_search(),
+ found_deleted_on_insert = unittest_string_found_deleted_on_insert(),
+ found_available_on_insert = unittest_string_found_available_on_insert(),
+ spins = unittest_string_spins();
+
+ fprintf(stderr, "on insert: %zu ok + %zu deleted\non search: %zu ok + %zu deleted\nspins: %zu\n",
+ found_available_on_insert - ofound_available_on_insert,
+ found_deleted_on_insert - ofound_deleted_on_insert,
+ found_available_on_search - ofound_available_on_search,
+ found_deleted_on_search - ofound_deleted_on_search,
+ spins - ospins
+ );
+#endif
+ }
+
+ string_unittest_free_char_pp(names, entries);
+
+ fprintf(stderr, "\n%zu errors found\n", errors);
+ return errors ? 1 : 0;
+}
diff --git a/src/libnetdata/string/string.h b/src/libnetdata/string/string.h
new file mode 100644
index 000000000..f2ff9666c
--- /dev/null
+++ b/src/libnetdata/string/string.h
@@ -0,0 +1,37 @@
+
+#ifndef NETDATA_STRING_H
+#define NETDATA_STRING_H 1
+
+#include "../libnetdata.h"
+
+// ----------------------------------------------------------------------------
+// STRING implementation
+
+typedef struct netdata_string STRING;
+
+STRING *string_strdupz(const char *str);
+STRING *string_strndupz(const char *str, size_t len);
+
+STRING *string_dup(STRING *string);
+void string_freez(STRING *string);
+size_t string_strlen(STRING *string);
+const char *string2str(STRING *string) NEVERNULL;
+
+// keep common prefix/suffix and replace everything else with [x]
+STRING *string_2way_merge(STRING *a, STRING *b);
+
+static inline int string_cmp(STRING *s1, STRING *s2) {
+ // STRINGs are deduplicated, so the same strings have the same pointer
+ // when they differ, we do the typical strcmp() comparison
+ return (s1 == s2)?0:strcmp(string2str(s1), string2str(s2));
+}
+
+static inline int string_strcmp(STRING *string, const char *s) {
+ return strcmp(string2str(string), s);
+}
+
+void string_statistics(size_t *inserts, size_t *deletes, size_t *searches, size_t *entries, size_t *references, size_t *memory, size_t *duplications, size_t *releases);
+
+int string_unittest(size_t entries);
+
+#endif
diff --git a/src/libnetdata/string/utf8.h b/src/libnetdata/string/utf8.h
new file mode 100644
index 000000000..3e6c8c288
--- /dev/null
+++ b/src/libnetdata/string/utf8.h
@@ -0,0 +1,9 @@
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+#ifndef NETDATA_STRING_UTF8_H
+#define NETDATA_STRING_UTF8_H 1
+
+#define IS_UTF8_BYTE(x) ((x) & 0x80)
+#define IS_UTF8_STARTBYTE(x) (IS_UTF8_BYTE(x)&&((x) & 0x40))
+
+#endif /* NETDATA_STRING_UTF8_H */