summaryrefslogtreecommitdiffstats
path: root/src/libutil/fstring.c
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/libutil/fstring.c482
1 files changed, 482 insertions, 0 deletions
diff --git a/src/libutil/fstring.c b/src/libutil/fstring.c
new file mode 100644
index 0000000..a921f32
--- /dev/null
+++ b/src/libutil/fstring.c
@@ -0,0 +1,482 @@
+/*-
+ * Copyright 2016 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "fstring.h"
+#include "str_util.h"
+#include "contrib/fastutf8/fastutf8.h"
+#include "contrib/mumhash/mum.h"
+
+
+#ifdef WITH_JEMALLOC
+#include <jemalloc/jemalloc.h>
+#if (JEMALLOC_VERSION_MAJOR == 3 && JEMALLOC_VERSION_MINOR >= 6) || (JEMALLOC_VERSION_MAJOR > 3)
+#define HAVE_MALLOC_SIZE 1
+#define sys_alloc_size(sz) nallocx(sz, 0)
+#endif
+#elif defined(__APPLE__)
+#include <malloc/malloc.h>
+#define HAVE_MALLOC_SIZE 1
+#define sys_alloc_size(sz) malloc_good_size(sz)
+#endif
+
+static const gsize default_initial_size = 16;
+
+#define fstravail(s) ((s)->allocated - (s)->len)
+
+rspamd_fstring_t *
+rspamd_fstring_new(void)
+{
+ rspamd_fstring_t *s;
+
+ if ((s = malloc(default_initial_size + sizeof(*s))) == NULL) {
+ g_error("%s: failed to allocate %" G_GSIZE_FORMAT " bytes",
+ G_STRLOC, default_initial_size + sizeof(*s));
+
+ return NULL;
+ }
+
+ s->len = 0;
+ s->allocated = default_initial_size;
+
+ return s;
+}
+
+rspamd_fstring_t *
+rspamd_fstring_sized_new(gsize initial_size)
+{
+ rspamd_fstring_t *s;
+ gsize real_size = MAX(default_initial_size, initial_size);
+
+ if ((s = malloc(real_size + sizeof(*s))) == NULL) {
+ g_error("%s: failed to allocate %" G_GSIZE_FORMAT " bytes",
+ G_STRLOC, real_size + sizeof(*s));
+
+ return NULL;
+ }
+ s->len = 0;
+ s->allocated = real_size;
+
+ return s;
+}
+
+rspamd_fstring_t *
+rspamd_fstring_new_init(const gchar *init, gsize len)
+{
+ rspamd_fstring_t *s;
+ gsize real_size = MAX(default_initial_size, len);
+
+ if ((s = malloc(real_size + sizeof(*s))) == NULL) {
+ g_error("%s: failed to allocate %" G_GSIZE_FORMAT " bytes",
+ G_STRLOC, real_size + sizeof(*s));
+
+ abort();
+ }
+
+ s->len = len;
+ s->allocated = real_size;
+ memcpy(s->str, init, len);
+
+ return s;
+}
+
+rspamd_fstring_t *
+rspamd_fstring_assign(rspamd_fstring_t *str, const gchar *init, gsize len)
+{
+ gsize avail;
+
+ if (str == NULL) {
+ return rspamd_fstring_new_init(init, len);
+ }
+
+ avail = fstravail(str);
+
+ if (avail < len) {
+ str = rspamd_fstring_grow(str, len);
+ }
+
+ if (len > 0) {
+ memcpy(str->str, init, len);
+ }
+
+ str->len = len;
+
+ return str;
+}
+
+void rspamd_fstring_free(rspamd_fstring_t *str)
+{
+ free(str);
+}
+
+inline gsize
+rspamd_fstring_suggest_size(gsize len, gsize allocated, gsize needed_len)
+{
+ gsize newlen, optlen = 0;
+
+ if (allocated < 4096) {
+ newlen = MAX(len + needed_len, allocated * 2);
+ }
+ else {
+ newlen = MAX(len + needed_len, 1 + allocated * 3 / 2);
+ }
+
+#ifdef HAVE_MALLOC_SIZE
+ optlen = sys_alloc_size(newlen + sizeof(rspamd_fstring_t));
+#endif
+
+ return MAX(newlen, optlen);
+}
+
+rspamd_fstring_t *
+rspamd_fstring_grow(rspamd_fstring_t *str, gsize needed_len)
+{
+ gsize newlen;
+ gpointer nptr;
+
+ newlen = rspamd_fstring_suggest_size(str->len, str->allocated, needed_len);
+
+ nptr = realloc(str, newlen + sizeof(*str));
+
+ if (nptr == NULL) {
+ /* Avoid memory leak */
+ free(str);
+ g_error("%s: failed to re-allocate %" G_GSIZE_FORMAT " bytes",
+ G_STRLOC, newlen + sizeof(*str));
+ abort();
+ }
+
+ str = nptr;
+ str->allocated = newlen;
+
+ return str;
+}
+
+rspamd_fstring_t *
+rspamd_fstring_append(rspamd_fstring_t *str, const char *in, gsize len)
+{
+ if (str == NULL) {
+ str = rspamd_fstring_new_init(in, len);
+ }
+ else {
+ gsize avail = fstravail(str);
+
+ if (avail < len) {
+ str = rspamd_fstring_grow(str, len);
+ }
+
+ memcpy(str->str + str->len, in, len);
+ str->len += len;
+ }
+
+ return str;
+}
+
+rspamd_fstring_t *
+rspamd_fstring_append_chars(rspamd_fstring_t *str,
+ char c, gsize len)
+{
+ if (str == NULL) {
+ str = rspamd_fstring_sized_new(len);
+
+ memset(str->str + str->len, c, len);
+ str->len += len;
+ }
+ else {
+ gsize avail = fstravail(str);
+
+ if (avail < len) {
+ str = rspamd_fstring_grow(str, len);
+ }
+
+ memset(str->str + str->len, c, len);
+ str->len += len;
+ }
+
+ return str;
+}
+
+void rspamd_fstring_erase(rspamd_fstring_t *str, gsize pos, gsize len)
+{
+ if (pos < str->len) {
+ if (pos + len > str->len) {
+ len = str->len - pos;
+ }
+
+ if (len == str->len - pos) {
+ /* Fast path */
+ str->len = pos;
+ }
+ else {
+ memmove(str->str + pos, str->str + pos + len, str->len - pos);
+ str->len -= pos;
+ }
+ }
+ else {
+ /* Do nothing */
+ }
+}
+
+/* Compat code */
+static guint64
+fstrhash_c(guint64 c, guint64 hval)
+{
+ return mum_hash_step(hval, c);
+}
+
+
+/*
+ * Return hash value for a string
+ */
+guint32
+rspamd_fstrhash_lc(const rspamd_ftok_t *str, gboolean is_utf)
+{
+ gsize i;
+ guint64 hval;
+ const gchar *p, *end = NULL;
+ gunichar uc;
+
+ if (str == NULL) {
+ return 0;
+ }
+
+ p = str->begin;
+ hval = str->len;
+ end = p + str->len;
+
+ if (is_utf) {
+ if (rspamd_fast_utf8_validate(p, str->len) != 0) {
+ return rspamd_fstrhash_lc(str, FALSE);
+ }
+ while (p < end) {
+ uc = g_unichar_tolower(g_utf8_get_char(p));
+ hval = fstrhash_c(uc, hval);
+ p = g_utf8_next_char(p);
+ }
+ }
+ else {
+ gsize large_steps = str->len / sizeof(guint64);
+ for (i = 0; i < large_steps; i++, p += sizeof(guint64)) {
+ /* Copy to the uint64 lowercasing each byte */
+ union {
+ char c[sizeof(guint64)];
+ guint64 iu64;
+ } t;
+ for (int j = 0; j < sizeof(guint64); j++) {
+ t.c[j] = g_ascii_tolower(p[j]);
+ }
+ hval = fstrhash_c(t.iu64, hval);
+ }
+
+ gsize remain = str->len % sizeof(guint64);
+ for (i = 0; i < remain; i++, p++) {
+ hval = fstrhash_c(g_ascii_tolower(*p), hval);
+ }
+ }
+
+ return hval;
+}
+
+gboolean
+rspamd_fstring_equal(const rspamd_fstring_t *s1,
+ const rspamd_fstring_t *s2)
+{
+ g_assert(s1 != NULL && s2 != NULL);
+
+ if (s1->len == s2->len) {
+ return (memcmp(s1->str, s2->str, s1->len) == 0);
+ }
+
+ return FALSE;
+}
+
+gint rspamd_fstring_casecmp(const rspamd_fstring_t *s1,
+ const rspamd_fstring_t *s2)
+{
+ gint ret = 0;
+
+ g_assert(s1 != NULL && s2 != NULL);
+
+ if (s1->len == s2->len) {
+ ret = rspamd_lc_cmp(s1->str, s2->str, s1->len);
+ }
+ else {
+ ret = s1->len - s2->len;
+ }
+
+ return ret;
+}
+
+gint rspamd_fstring_cmp(const rspamd_fstring_t *s1,
+ const rspamd_fstring_t *s2)
+{
+ g_assert(s1 != NULL && s2 != NULL);
+
+ if (s1->len == s2->len) {
+ return memcmp(s1->str, s2->str, s1->len);
+ }
+
+ return s1->len - s2->len;
+}
+
+gint rspamd_ftok_casecmp(const rspamd_ftok_t *s1,
+ const rspamd_ftok_t *s2)
+{
+ gint ret = 0;
+
+ g_assert(s1 != NULL && s2 != NULL);
+
+ if (s1->len == s2->len) {
+ ret = rspamd_lc_cmp(s1->begin, s2->begin, s1->len);
+ }
+ else {
+ ret = s1->len - s2->len;
+ }
+
+ return ret;
+}
+
+gint rspamd_ftok_cmp(const rspamd_ftok_t *s1,
+ const rspamd_ftok_t *s2)
+{
+ g_assert(s1 != NULL && s2 != NULL);
+
+ if (s1->len == s2->len) {
+ return memcmp(s1->begin, s2->begin, s1->len);
+ }
+
+ return s1->len - s2->len;
+}
+
+gboolean
+rspamd_ftok_starts_with(const rspamd_ftok_t *s1,
+ const rspamd_ftok_t *s2)
+{
+ g_assert(s1 != NULL && s2 != NULL);
+
+ if (s1->len >= s2->len) {
+ return !!(memcmp(s1->begin, s2->begin, s2->len) == 0);
+ }
+
+ return FALSE;
+}
+
+void rspamd_fstring_mapped_ftok_free(gpointer p)
+{
+ rspamd_ftok_t *tok = p;
+ rspamd_fstring_t *storage;
+
+ storage = (rspamd_fstring_t *) (tok->begin - 2 * sizeof(gsize));
+ rspamd_fstring_free(storage);
+ g_free(tok);
+}
+
+rspamd_ftok_t *
+rspamd_ftok_map(const rspamd_fstring_t *s)
+{
+ rspamd_ftok_t *tok;
+
+ g_assert(s != NULL);
+
+ tok = g_malloc(sizeof(*tok));
+ tok->begin = s->str;
+ tok->len = s->len;
+
+ return tok;
+}
+
+char *
+rspamd_fstring_cstr(const rspamd_fstring_t *s)
+{
+ char *result;
+
+ if (s == NULL) {
+ return NULL;
+ }
+
+ result = g_malloc(s->len + 1);
+ memcpy(result, s->str, s->len);
+ result[s->len] = '\0';
+
+ return result;
+}
+
+char *
+rspamd_ftok_cstr(const rspamd_ftok_t *s)
+{
+ char *result;
+
+ if (s == NULL) {
+ return NULL;
+ }
+
+ result = g_malloc(s->len + 1);
+ memcpy(result, s->begin, s->len);
+ result[s->len] = '\0';
+
+ return result;
+}
+
+gboolean
+rspamd_ftok_cstr_equal(const rspamd_ftok_t *s, const gchar *pat,
+ gboolean icase)
+{
+ gsize slen;
+ rspamd_ftok_t srch;
+
+ g_assert(s != NULL);
+ g_assert(pat != NULL);
+
+ slen = strlen(pat);
+ srch.begin = pat;
+ srch.len = slen;
+
+ if (icase) {
+ return (rspamd_ftok_casecmp(s, &srch) == 0);
+ }
+
+ return (rspamd_ftok_cmp(s, &srch) == 0);
+}
+
+gchar *
+rspamd_ftokdup(const rspamd_ftok_t *src)
+{
+ gchar *newstr;
+
+ if (src == NULL) {
+ return NULL;
+ }
+
+ newstr = g_malloc(src->len + 1);
+ memcpy(newstr, src->begin, src->len);
+ newstr[src->len] = '\0';
+
+ return newstr;
+}
+
+gchar *
+rspamd_fstringdup(const rspamd_fstring_t *src)
+{
+ gchar *newstr;
+
+ if (src == NULL) {
+ return NULL;
+ }
+
+ newstr = g_malloc(src->len + 1);
+ memcpy(newstr, src->str, src->len);
+ newstr[src->len] = '\0';
+
+ return newstr;
+}