summaryrefslogtreecommitdiffstats
path: root/src/datasets.c
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/datasets.c1743
1 files changed, 1743 insertions, 0 deletions
diff --git a/src/datasets.c b/src/datasets.c
new file mode 100644
index 0000000..d89ed8d
--- /dev/null
+++ b/src/datasets.c
@@ -0,0 +1,1743 @@
+/* Copyright (C) 2017-2020 Open Information Security Foundation
+ *
+ * You can copy, redistribute or modify this Program under the terms of
+ * the GNU General Public License version 2 as published by the Free
+ * Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+/**
+ * \file
+ *
+ * \author Victor Julien <victor@inliniac.net>
+ */
+
+#include "suricata-common.h"
+#include "suricata.h"
+#include "conf.h"
+#include "datasets.h"
+#include "datasets-string.h"
+#include "datasets-ipv4.h"
+#include "datasets-ipv6.h"
+#include "datasets-md5.h"
+#include "datasets-sha256.h"
+#include "datasets-reputation.h"
+#include "util-conf.h"
+#include "util-thash.h"
+#include "util-print.h"
+#include "util-base64.h" // decode base64
+#include "util-byte.h"
+#include "util-misc.h"
+#include "util-path.h"
+#include "util-debug.h"
+
+SCMutex sets_lock = SCMUTEX_INITIALIZER;
+static Dataset *sets = NULL;
+static uint32_t set_ids = 0;
+
+static int DatasetAddwRep(Dataset *set, const uint8_t *data, const uint32_t data_len,
+ DataRepType *rep);
+
+static inline void DatasetUnlockData(THashData *d)
+{
+ (void) THashDecrUsecnt(d);
+ THashDataUnlock(d);
+}
+static bool DatasetIsStatic(const char *save, const char *load);
+static void GetDefaultMemcap(uint64_t *memcap, uint32_t *hashsize);
+
+enum DatasetTypes DatasetGetTypeFromString(const char *s)
+{
+ if (strcasecmp("md5", s) == 0)
+ return DATASET_TYPE_MD5;
+ if (strcasecmp("sha256", s) == 0)
+ return DATASET_TYPE_SHA256;
+ if (strcasecmp("string", s) == 0)
+ return DATASET_TYPE_STRING;
+ if (strcasecmp("ipv4", s) == 0)
+ return DATASET_TYPE_IPV4;
+ if (strcasecmp("ip", s) == 0)
+ return DATASET_TYPE_IPV6;
+ return DATASET_TYPE_NOTSET;
+}
+
+static Dataset *DatasetAlloc(const char *name)
+{
+ Dataset *set = SCCalloc(1, sizeof(*set));
+ if (set) {
+ set->id = set_ids++;
+ }
+ return set;
+}
+
+static Dataset *DatasetSearchByName(const char *name)
+{
+ Dataset *set = sets;
+ while (set) {
+ if (strcasecmp(name, set->name) == 0 && set->hidden == false) {
+ return set;
+ }
+ set = set->next;
+ }
+ return NULL;
+}
+
+static int HexToRaw(const uint8_t *in, size_t ins, uint8_t *out, size_t outs)
+{
+ if (ins < 2)
+ return -1;
+ if (ins % 2 != 0)
+ return -1;
+ if (outs != ins / 2)
+ return -1;
+
+ uint8_t hash[outs];
+ memset(hash, 0, outs);
+ size_t i, x;
+ for (x = 0, i = 0; i < ins; i+=2, x++) {
+ char buf[3] = { 0, 0, 0 };
+ buf[0] = in[i];
+ buf[1] = in[i+1];
+
+ long value = strtol(buf, NULL, 16);
+ if (value >= 0 && value <= 255)
+ hash[x] = (uint8_t)value;
+ else {
+ SCLogError("hash byte out of range %ld", value);
+ return -1;
+ }
+ }
+
+ memcpy(out, hash, outs);
+ return 0;
+}
+
+static int ParseRepLine(const char *in, size_t ins, DataRepType *rep_out)
+{
+ SCLogDebug("in '%s'", in);
+ char raw[ins + 1];
+ memcpy(raw, in, ins);
+ raw[ins] = '\0';
+ char *line = raw;
+
+ char *ptrs[1] = {NULL};
+ int idx = 0;
+
+ size_t i = 0;
+ while (i < ins + 1) {
+ if (line[i] == ',' || line[i] == '\n' || line[i] == '\0') {
+ line[i] = '\0';
+ SCLogDebug("line '%s'", line);
+
+ ptrs[idx] = line;
+ idx++;
+
+ if (idx == 1)
+ break;
+ } else {
+ i++;
+ }
+ }
+
+ if (idx != 1) {
+ SCLogDebug("idx %d", idx);
+ return -1;
+ }
+
+ uint16_t v = 0;
+ int r = StringParseU16RangeCheck(&v, 10, strlen(ptrs[0]), ptrs[0], 0, USHRT_MAX);
+ if (r != (int)strlen(ptrs[0])) {
+ SCLogError("'%s' is not a valid reputation value (0-65535)", ptrs[0]);
+ return -1;
+ }
+ SCLogDebug("v %"PRIu16" raw %s", v, ptrs[0]);
+
+ rep_out->value = v;
+ return 0;
+}
+
+static int DatasetLoadIPv4(Dataset *set)
+{
+ if (strlen(set->load) == 0)
+ return 0;
+
+ SCLogConfig("dataset: %s loading from '%s'", set->name, set->load);
+ const char *fopen_mode = "r";
+ if (strlen(set->save) > 0 && strcmp(set->save, set->load) == 0) {
+ fopen_mode = "a+";
+ }
+
+ FILE *fp = fopen(set->load, fopen_mode);
+ if (fp == NULL) {
+ SCLogError("fopen '%s' failed: %s", set->load, strerror(errno));
+ return -1;
+ }
+
+ uint32_t cnt = 0;
+ char line[1024];
+ while (fgets(line, (int)sizeof(line), fp) != NULL) {
+ char *r = strchr(line, ',');
+ if (r == NULL) {
+ line[strlen(line) - 1] = '\0';
+ SCLogDebug("line: '%s'", line);
+
+ struct in_addr in;
+ if (inet_pton(AF_INET, line, &in) != 1) {
+ FatalErrorOnInit("dataset data parse failed %s/%s: %s", set->name, set->load, line);
+ continue;
+ }
+
+ if (DatasetAdd(set, (const uint8_t *)&in.s_addr, 4) < 0) {
+ FatalErrorOnInit("dataset data add failed %s/%s", set->name, set->load);
+ continue;
+ }
+ cnt++;
+
+ /* list with rep data */
+ } else {
+ line[strlen(line) - 1] = '\0';
+ SCLogDebug("IPv4 with REP line: '%s'", line);
+
+ *r = '\0';
+
+ struct in_addr in;
+ if (inet_pton(AF_INET, line, &in) != 1) {
+ FatalErrorOnInit("dataset data parse failed %s/%s: %s", set->name, set->load, line);
+ continue;
+ }
+
+ r++;
+
+ DataRepType rep = { .value = 0 };
+ if (ParseRepLine(r, strlen(r), &rep) < 0) {
+ FatalErrorOnInit("bad rep for dataset %s/%s", set->name, set->load);
+ continue;
+ }
+
+ SCLogDebug("rep v:%u", rep.value);
+ if (DatasetAddwRep(set, (const uint8_t *)&in.s_addr, 4, &rep) < 0) {
+ FatalErrorOnInit("dataset data add failed %s/%s", set->name, set->load);
+ continue;
+ }
+
+ cnt++;
+ }
+ }
+ THashConsolidateMemcap(set->hash);
+
+ fclose(fp);
+ SCLogConfig("dataset: %s loaded %u records", set->name, cnt);
+ return 0;
+}
+
+static int ParseIpv6String(Dataset *set, char *line, struct in6_addr *in6)
+{
+ /* Checking IPv6 case */
+ char *got_colon = strchr(line, ':');
+ if (got_colon) {
+ uint32_t ip6addr[4];
+ if (inet_pton(AF_INET6, line, in6) != 1) {
+ FatalErrorOnInit("dataset data parse failed %s/%s: %s", set->name, set->load, line);
+ return -1;
+ }
+ memcpy(&ip6addr, in6->s6_addr, sizeof(ip6addr));
+ /* IPv4 in IPv6 notation needs transformation to internal Suricata storage */
+ if (ip6addr[0] == 0 && ip6addr[1] == 0 && ip6addr[2] == 0xFFFF0000) {
+ ip6addr[0] = ip6addr[3];
+ ip6addr[2] = 0;
+ ip6addr[3] = 0;
+ memcpy(in6, ip6addr, sizeof(struct in6_addr));
+ }
+ } else {
+ /* IPv4 case */
+ struct in_addr in;
+ if (inet_pton(AF_INET, line, &in) != 1) {
+ FatalErrorOnInit("dataset data parse failed %s/%s: %s", set->name, set->load, line);
+ return -1;
+ }
+ memset(in6, 0, sizeof(struct in6_addr));
+ memcpy(in6, &in, sizeof(struct in_addr));
+ }
+ return 0;
+}
+
+static int DatasetLoadIPv6(Dataset *set)
+{
+ if (strlen(set->load) == 0)
+ return 0;
+
+ SCLogConfig("dataset: %s loading from '%s'", set->name, set->load);
+ const char *fopen_mode = "r";
+ if (strlen(set->save) > 0 && strcmp(set->save, set->load) == 0) {
+ fopen_mode = "a+";
+ }
+
+ FILE *fp = fopen(set->load, fopen_mode);
+ if (fp == NULL) {
+ SCLogError("fopen '%s' failed: %s", set->load, strerror(errno));
+ return -1;
+ }
+
+ uint32_t cnt = 0;
+ char line[1024];
+ while (fgets(line, (int)sizeof(line), fp) != NULL) {
+ char *r = strchr(line, ',');
+ if (r == NULL) {
+ line[strlen(line) - 1] = '\0';
+ SCLogDebug("line: '%s'", line);
+
+ struct in6_addr in6;
+ int ret = ParseIpv6String(set, line, &in6);
+ if (ret < 0) {
+ FatalErrorOnInit("unable to parse IP address");
+ continue;
+ }
+
+ if (DatasetAdd(set, (const uint8_t *)&in6.s6_addr, 16) < 0) {
+ FatalErrorOnInit("dataset data add failed %s/%s", set->name, set->load);
+ continue;
+ }
+ cnt++;
+
+ /* list with rep data */
+ } else {
+ line[strlen(line) - 1] = '\0';
+ SCLogDebug("IPv6 with REP line: '%s'", line);
+
+ *r = '\0';
+
+ struct in6_addr in6;
+ int ret = ParseIpv6String(set, line, &in6);
+ if (ret < 0) {
+ FatalErrorOnInit("unable to parse IP address");
+ continue;
+ }
+
+ r++;
+
+ DataRepType rep = { .value = 0 };
+ if (ParseRepLine(r, strlen(r), &rep) < 0) {
+ FatalErrorOnInit("bad rep for dataset %s/%s", set->name, set->load);
+ continue;
+ }
+
+ SCLogDebug("rep v:%u", rep.value);
+ if (DatasetAddwRep(set, (const uint8_t *)&in6.s6_addr, 16, &rep) < 0) {
+ FatalErrorOnInit("dataset data add failed %s/%s", set->name, set->load);
+ continue;
+ }
+
+ cnt++;
+ }
+ }
+ THashConsolidateMemcap(set->hash);
+
+ fclose(fp);
+ SCLogConfig("dataset: %s loaded %u records", set->name, cnt);
+ return 0;
+}
+
+static int DatasetLoadMd5(Dataset *set)
+{
+ if (strlen(set->load) == 0)
+ return 0;
+
+ SCLogConfig("dataset: %s loading from '%s'", set->name, set->load);
+ const char *fopen_mode = "r";
+ if (strlen(set->save) > 0 && strcmp(set->save, set->load) == 0) {
+ fopen_mode = "a+";
+ }
+
+ FILE *fp = fopen(set->load, fopen_mode);
+ if (fp == NULL) {
+ SCLogError("fopen '%s' failed: %s", set->load, strerror(errno));
+ return -1;
+ }
+
+ uint32_t cnt = 0;
+ char line[1024];
+ while (fgets(line, (int)sizeof(line), fp) != NULL) {
+ /* straight black/white list */
+ if (strlen(line) == 33) {
+ line[strlen(line) - 1] = '\0';
+ SCLogDebug("line: '%s'", line);
+
+ uint8_t hash[16];
+ if (HexToRaw((const uint8_t *)line, 32, hash, sizeof(hash)) < 0) {
+ FatalErrorOnInit("bad hash for dataset %s/%s", set->name, set->load);
+ continue;
+ }
+
+ if (DatasetAdd(set, (const uint8_t *)hash, 16) < 0) {
+ FatalErrorOnInit("dataset data add failed %s/%s", set->name, set->load);
+ continue;
+ }
+ cnt++;
+
+ /* list with rep data */
+ } else if (strlen(line) > 33 && line[32] == ',') {
+ line[strlen(line) - 1] = '\0';
+ SCLogDebug("MD5 with REP line: '%s'", line);
+
+ uint8_t hash[16];
+ if (HexToRaw((const uint8_t *)line, 32, hash, sizeof(hash)) < 0) {
+ FatalErrorOnInit("bad hash for dataset %s/%s", set->name, set->load);
+ continue;
+ }
+
+ DataRepType rep = { .value = 0};
+ if (ParseRepLine(line + 33, strlen(line) - 33, &rep) < 0) {
+ FatalErrorOnInit("bad rep for dataset %s/%s", set->name, set->load);
+ continue;
+ }
+
+ SCLogDebug("rep v:%u", rep.value);
+ if (DatasetAddwRep(set, hash, 16, &rep) < 0) {
+ FatalErrorOnInit("dataset data add failed %s/%s", set->name, set->load);
+ continue;
+ }
+
+ cnt++;
+ }
+ else {
+ FatalErrorOnInit("MD5 bad line len %u: '%s'", (uint32_t)strlen(line), line);
+ continue;
+ }
+ }
+ THashConsolidateMemcap(set->hash);
+
+ fclose(fp);
+ SCLogConfig("dataset: %s loaded %u records", set->name, cnt);
+ return 0;
+}
+
+static int DatasetLoadSha256(Dataset *set)
+{
+ if (strlen(set->load) == 0)
+ return 0;
+
+ SCLogConfig("dataset: %s loading from '%s'", set->name, set->load);
+ const char *fopen_mode = "r";
+ if (strlen(set->save) > 0 && strcmp(set->save, set->load) == 0) {
+ fopen_mode = "a+";
+ }
+
+ FILE *fp = fopen(set->load, fopen_mode);
+ if (fp == NULL) {
+ SCLogError("fopen '%s' failed: %s", set->load, strerror(errno));
+ return -1;
+ }
+
+ uint32_t cnt = 0;
+ char line[1024];
+ while (fgets(line, (int)sizeof(line), fp) != NULL) {
+ /* straight black/white list */
+ if (strlen(line) == 65) {
+ line[strlen(line) - 1] = '\0';
+ SCLogDebug("line: '%s'", line);
+
+ uint8_t hash[32];
+ if (HexToRaw((const uint8_t *)line, 64, hash, sizeof(hash)) < 0) {
+ FatalErrorOnInit("bad hash for dataset %s/%s", set->name, set->load);
+ continue;
+ }
+
+ if (DatasetAdd(set, (const uint8_t *)hash, (uint32_t)32) < 0) {
+ FatalErrorOnInit("dataset data add failed %s/%s", set->name, set->load);
+ continue;
+ }
+ cnt++;
+
+ /* list with rep data */
+ } else if (strlen(line) > 65 && line[64] == ',') {
+ line[strlen(line) - 1] = '\0';
+ SCLogDebug("SHA-256 with REP line: '%s'", line);
+
+ uint8_t hash[32];
+ if (HexToRaw((const uint8_t *)line, 64, hash, sizeof(hash)) < 0) {
+ FatalErrorOnInit("bad hash for dataset %s/%s", set->name, set->load);
+ continue;
+ }
+
+ DataRepType rep = { .value = 0 };
+ if (ParseRepLine(line + 65, strlen(line) - 65, &rep) < 0) {
+ FatalErrorOnInit("bad rep for dataset %s/%s", set->name, set->load);
+ continue;
+ }
+
+ SCLogDebug("rep %u", rep.value);
+
+ if (DatasetAddwRep(set, hash, 32, &rep) < 0) {
+ FatalErrorOnInit("dataset data add failed %s/%s", set->name, set->load);
+ continue;
+ }
+ cnt++;
+ }
+ }
+ THashConsolidateMemcap(set->hash);
+
+ fclose(fp);
+ SCLogConfig("dataset: %s loaded %u records", set->name, cnt);
+ return 0;
+}
+
+static int DatasetLoadString(Dataset *set)
+{
+ if (strlen(set->load) == 0)
+ return 0;
+
+ SCLogConfig("dataset: %s loading from '%s'", set->name, set->load);
+ const char *fopen_mode = "r";
+ if (strlen(set->save) > 0 && strcmp(set->save, set->load) == 0) {
+ fopen_mode = "a+";
+ }
+
+ FILE *fp = fopen(set->load, fopen_mode);
+ if (fp == NULL) {
+ SCLogError("fopen '%s' failed: %s", set->load, strerror(errno));
+ return -1;
+ }
+
+ uint32_t cnt = 0;
+ char line[1024];
+ while (fgets(line, (int)sizeof(line), fp) != NULL) {
+ if (strlen(line) <= 1)
+ continue;
+
+ char *r = strchr(line, ',');
+ if (r == NULL) {
+ line[strlen(line) - 1] = '\0';
+ SCLogDebug("line: '%s'", line);
+
+ // coverity[alloc_strlen : FALSE]
+ uint8_t decoded[strlen(line)];
+ uint32_t consumed = 0, num_decoded = 0;
+ Base64Ecode code = DecodeBase64(decoded, strlen(line), (const uint8_t *)line,
+ strlen(line), &consumed, &num_decoded, BASE64_MODE_STRICT);
+ if (code == BASE64_ECODE_ERR) {
+ FatalErrorOnInit("bad base64 encoding %s/%s", set->name, set->load);
+ continue;
+ }
+
+ if (DatasetAdd(set, (const uint8_t *)decoded, num_decoded) < 0) {
+ FatalErrorOnInit("dataset data add failed %s/%s", set->name, set->load);
+ continue;
+ }
+ cnt++;
+ } else {
+ line[strlen(line) - 1] = '\0';
+ SCLogDebug("line: '%s'", line);
+
+ *r = '\0';
+
+ // coverity[alloc_strlen : FALSE]
+ uint8_t decoded[strlen(line)];
+ uint32_t consumed = 0, num_decoded = 0;
+ Base64Ecode code = DecodeBase64(decoded, strlen(line), (const uint8_t *)line,
+ strlen(line), &consumed, &num_decoded, BASE64_MODE_STRICT);
+ if (code == BASE64_ECODE_ERR) {
+ FatalErrorOnInit("bad base64 encoding %s/%s", set->name, set->load);
+ continue;
+ }
+
+ r++;
+ SCLogDebug("r '%s'", r);
+
+ DataRepType rep = { .value = 0 };
+ if (ParseRepLine(r, strlen(r), &rep) < 0) {
+ FatalErrorOnInit("die: bad rep");
+ continue;
+ }
+ SCLogDebug("rep %u", rep.value);
+
+ if (DatasetAddwRep(set, (const uint8_t *)decoded, num_decoded, &rep) < 0) {
+ FatalErrorOnInit("dataset data add failed %s/%s", set->name, set->load);
+ continue;
+ }
+ cnt++;
+
+ SCLogDebug("line with rep %s, %s", line, r);
+ }
+ }
+ THashConsolidateMemcap(set->hash);
+
+ fclose(fp);
+ SCLogConfig("dataset: %s loaded %u records", set->name, cnt);
+ return 0;
+}
+
+extern bool g_system;
+
+enum DatasetGetPathType {
+ TYPE_STATE,
+ TYPE_LOAD,
+};
+
+static void DatasetGetPath(const char *in_path,
+ char *out_path, size_t out_size, enum DatasetGetPathType type)
+{
+ char path[PATH_MAX];
+ struct stat st;
+
+ if (PathIsAbsolute(in_path)) {
+ strlcpy(path, in_path, sizeof(path));
+ strlcpy(out_path, path, out_size);
+ return;
+ }
+
+ const char *data_dir = ConfigGetDataDirectory();
+ if (stat(data_dir, &st) != 0) {
+ SCLogDebug("data-dir '%s': %s", data_dir, strerror(errno));
+ return;
+ }
+
+ snprintf(path, sizeof(path), "%s/%s", data_dir, in_path); // TODO WINDOWS
+
+ if (type == TYPE_LOAD) {
+ if (stat(path, &st) != 0) {
+ SCLogDebug("path %s: %s", path, strerror(errno));
+ if (!g_system) {
+ snprintf(path, sizeof(path), "%s", in_path);
+ }
+ }
+ }
+ strlcpy(out_path, path, out_size);
+ SCLogDebug("in_path \'%s\' => \'%s\'", in_path, out_path);
+}
+
+/** \brief look for set by name without creating it */
+Dataset *DatasetFind(const char *name, enum DatasetTypes type)
+{
+ SCMutexLock(&sets_lock);
+ Dataset *set = DatasetSearchByName(name);
+ if (set) {
+ if (set->type != type) {
+ SCMutexUnlock(&sets_lock);
+ return NULL;
+ }
+ }
+ SCMutexUnlock(&sets_lock);
+ return set;
+}
+
+Dataset *DatasetGet(const char *name, enum DatasetTypes type, const char *save, const char *load,
+ uint64_t memcap, uint32_t hashsize)
+{
+ uint64_t default_memcap = 0;
+ uint32_t default_hashsize = 0;
+ if (strlen(name) > DATASET_NAME_MAX_LEN) {
+ return NULL;
+ }
+
+ SCMutexLock(&sets_lock);
+ Dataset *set = DatasetSearchByName(name);
+ if (set) {
+ if (type != DATASET_TYPE_NOTSET && set->type != type) {
+ SCLogError("dataset %s already "
+ "exists and is of type %u",
+ set->name, set->type);
+ SCMutexUnlock(&sets_lock);
+ return NULL;
+ }
+
+ if ((save == NULL || strlen(save) == 0) &&
+ (load == NULL || strlen(load) == 0)) {
+ // OK, rule keyword doesn't have to set state/load,
+ // even when yaml set has set it.
+ } else {
+ if ((save == NULL && strlen(set->save) > 0) ||
+ (save != NULL && strcmp(set->save, save) != 0)) {
+ SCLogError("dataset %s save mismatch: %s != %s", set->name, set->save, save);
+ SCMutexUnlock(&sets_lock);
+ return NULL;
+ }
+ if ((load == NULL && strlen(set->load) > 0) ||
+ (load != NULL && strcmp(set->load, load) != 0)) {
+ SCLogError("dataset %s load mismatch: %s != %s", set->name, set->load, load);
+ SCMutexUnlock(&sets_lock);
+ return NULL;
+ }
+ }
+
+ SCMutexUnlock(&sets_lock);
+ return set;
+ } else {
+ if (type == DATASET_TYPE_NOTSET) {
+ SCLogError("dataset %s not defined", name);
+ goto out_err;
+ }
+ }
+
+ set = DatasetAlloc(name);
+ if (set == NULL) {
+ goto out_err;
+ }
+
+ strlcpy(set->name, name, sizeof(set->name));
+ set->type = type;
+ if (save && strlen(save)) {
+ strlcpy(set->save, save, sizeof(set->save));
+ SCLogDebug("name %s save '%s'", name, set->save);
+ }
+ if (load && strlen(load)) {
+ strlcpy(set->load, load, sizeof(set->load));
+ SCLogDebug("set \'%s\' loading \'%s\' from \'%s\'", set->name, load, set->load);
+ }
+
+ char cnf_name[128];
+ snprintf(cnf_name, sizeof(cnf_name), "datasets.%s.hash", name);
+
+ GetDefaultMemcap(&default_memcap, &default_hashsize);
+ switch (type) {
+ case DATASET_TYPE_MD5:
+ set->hash = THashInit(cnf_name, sizeof(Md5Type), Md5StrSet, Md5StrFree, Md5StrHash,
+ Md5StrCompare, load != NULL ? 1 : 0, memcap > 0 ? memcap : default_memcap,
+ hashsize > 0 ? hashsize : default_hashsize);
+ if (set->hash == NULL)
+ goto out_err;
+ if (DatasetLoadMd5(set) < 0)
+ goto out_err;
+ break;
+ case DATASET_TYPE_STRING:
+ set->hash = THashInit(cnf_name, sizeof(StringType), StringSet, StringFree, StringHash,
+ StringCompare, load != NULL ? 1 : 0, memcap > 0 ? memcap : default_memcap,
+ hashsize > 0 ? hashsize : default_hashsize);
+ if (set->hash == NULL)
+ goto out_err;
+ if (DatasetLoadString(set) < 0)
+ goto out_err;
+ break;
+ case DATASET_TYPE_SHA256:
+ set->hash = THashInit(cnf_name, sizeof(Sha256Type), Sha256StrSet, Sha256StrFree,
+ Sha256StrHash, Sha256StrCompare, load != NULL ? 1 : 0,
+ memcap > 0 ? memcap : default_memcap,
+ hashsize > 0 ? hashsize : default_hashsize);
+ if (set->hash == NULL)
+ goto out_err;
+ if (DatasetLoadSha256(set) < 0)
+ goto out_err;
+ break;
+ case DATASET_TYPE_IPV4:
+ set->hash = THashInit(cnf_name, sizeof(IPv4Type), IPv4Set, IPv4Free, IPv4Hash,
+ IPv4Compare, load != NULL ? 1 : 0, memcap > 0 ? memcap : default_memcap,
+ hashsize > 0 ? hashsize : default_hashsize);
+ if (set->hash == NULL)
+ goto out_err;
+ if (DatasetLoadIPv4(set) < 0)
+ goto out_err;
+ break;
+ case DATASET_TYPE_IPV6:
+ set->hash = THashInit(cnf_name, sizeof(IPv6Type), IPv6Set, IPv6Free, IPv6Hash,
+ IPv6Compare, load != NULL ? 1 : 0, memcap > 0 ? memcap : default_memcap,
+ hashsize > 0 ? hashsize : default_hashsize);
+ if (set->hash == NULL)
+ goto out_err;
+ if (DatasetLoadIPv6(set) < 0)
+ goto out_err;
+ break;
+ }
+
+ SCLogDebug("set %p/%s type %u save %s load %s",
+ set, set->name, set->type, set->save, set->load);
+
+ set->next = sets;
+ sets = set;
+
+ SCMutexUnlock(&sets_lock);
+ return set;
+out_err:
+ if (set) {
+ if (set->hash) {
+ THashShutdown(set->hash);
+ }
+ SCFree(set);
+ }
+ SCMutexUnlock(&sets_lock);
+ return NULL;
+}
+
+static bool DatasetIsStatic(const char *save, const char *load)
+{
+ /* A set is static if it does not have any dynamic properties like
+ * save and/or state defined but has load defined.
+ * */
+ if ((load != NULL && strlen(load) > 0) &&
+ (save == NULL || strlen(save) == 0)) {
+ return true;
+ }
+ return false;
+}
+
+void DatasetReload(void)
+{
+ /* In order to reload the datasets, just mark the current sets as hidden
+ * and clean them up later.
+ * New datasets shall be created with the rule reload and do not require
+ * any intervention.
+ * */
+ SCMutexLock(&sets_lock);
+ Dataset *set = sets;
+ while (set) {
+ if (!DatasetIsStatic(set->save, set->load) || set->from_yaml == true) {
+ SCLogDebug("Not a static set, skipping %s", set->name);
+ set = set->next;
+ continue;
+ }
+ set->hidden = true;
+ SCLogDebug("Set %s at %p hidden successfully", set->name, set);
+ set = set->next;
+ }
+ SCMutexUnlock(&sets_lock);
+}
+
+void DatasetPostReloadCleanup(void)
+{
+ SCLogDebug("Post Reload Cleanup starting.. Hidden sets will be removed");
+ SCMutexLock(&sets_lock);
+ Dataset *cur = sets;
+ Dataset *prev = NULL;
+ while (cur) {
+ Dataset *next = cur->next;
+ if (cur->hidden == false) {
+ prev = cur;
+ cur = next;
+ continue;
+ }
+ // Delete the set in case it was hidden
+ if (prev != NULL) {
+ prev->next = next;
+ } else {
+ sets = next;
+ }
+ THashShutdown(cur->hash);
+ SCFree(cur);
+ cur = next;
+ }
+ SCMutexUnlock(&sets_lock);
+}
+
+static void GetDefaultMemcap(uint64_t *memcap, uint32_t *hashsize)
+{
+ const char *str = NULL;
+ if (ConfGet("datasets.defaults.memcap", &str) == 1) {
+ if (ParseSizeStringU64(str, memcap) < 0) {
+ SCLogWarning("memcap value cannot be deduced: %s,"
+ " resetting to default",
+ str);
+ *memcap = 0;
+ }
+ }
+ if (ConfGet("datasets.defaults.hashsize", &str) == 1) {
+ if (ParseSizeStringU32(str, hashsize) < 0) {
+ SCLogWarning("hashsize value cannot be deduced: %s,"
+ " resetting to default",
+ str);
+ *hashsize = 0;
+ }
+ }
+}
+
+int DatasetsInit(void)
+{
+ SCLogDebug("datasets start");
+ ConfNode *datasets = ConfGetNode("datasets");
+ uint64_t default_memcap = 0;
+ uint32_t default_hashsize = 0;
+ GetDefaultMemcap(&default_memcap, &default_hashsize);
+ if (datasets != NULL) {
+ int list_pos = 0;
+ ConfNode *iter = NULL;
+ TAILQ_FOREACH(iter, &datasets->head, next) {
+ if (iter->name == NULL) {
+ list_pos++;
+ continue;
+ }
+
+ char save[PATH_MAX] = "";
+ char load[PATH_MAX] = "";
+ uint64_t memcap = 0;
+ uint32_t hashsize = 0;
+
+ const char *set_name = iter->name;
+ if (strlen(set_name) > DATASET_NAME_MAX_LEN) {
+ FatalErrorOnInit(
+ "set name '%s' too long, max %d chars", set_name, DATASET_NAME_MAX_LEN);
+ continue;
+ }
+
+ ConfNode *set_type =
+ ConfNodeLookupChild(iter, "type");
+ if (set_type == NULL) {
+ list_pos++;
+ continue;
+ }
+
+ ConfNode *set_save =
+ ConfNodeLookupChild(iter, "state");
+ if (set_save) {
+ DatasetGetPath(set_save->val, save, sizeof(save), TYPE_STATE);
+ strlcpy(load, save, sizeof(load));
+ } else {
+ ConfNode *set_load =
+ ConfNodeLookupChild(iter, "load");
+ if (set_load) {
+ DatasetGetPath(set_load->val, load, sizeof(load), TYPE_LOAD);
+ }
+ }
+
+ ConfNode *set_memcap = ConfNodeLookupChild(iter, "memcap");
+ if (set_memcap) {
+ if (ParseSizeStringU64(set_memcap->val, &memcap) < 0) {
+ SCLogWarning("memcap value cannot be"
+ " deduced: %s, resetting to default",
+ set_memcap->val);
+ memcap = 0;
+ }
+ }
+ ConfNode *set_hashsize = ConfNodeLookupChild(iter, "hashsize");
+ if (set_hashsize) {
+ if (ParseSizeStringU32(set_hashsize->val, &hashsize) < 0) {
+ SCLogWarning("hashsize value cannot be"
+ " deduced: %s, resetting to default",
+ set_hashsize->val);
+ hashsize = 0;
+ }
+ }
+ char conf_str[1024];
+ snprintf(conf_str, sizeof(conf_str), "datasets.%d.%s", list_pos, set_name);
+
+ SCLogDebug("set %s type %s. Conf %s", set_name, set_type->val, conf_str);
+
+ if (strcmp(set_type->val, "md5") == 0) {
+ Dataset *dset = DatasetGet(set_name, DATASET_TYPE_MD5, save, load,
+ memcap > 0 ? memcap : default_memcap,
+ hashsize > 0 ? hashsize : default_hashsize);
+ if (dset == NULL) {
+ FatalErrorOnInit("failed to setup dataset for %s", set_name);
+ continue;
+ }
+ SCLogDebug("dataset %s: id %u type %s", set_name, dset->id, set_type->val);
+ dset->from_yaml = true;
+
+ } else if (strcmp(set_type->val, "sha256") == 0) {
+ Dataset *dset = DatasetGet(set_name, DATASET_TYPE_SHA256, save, load,
+ memcap > 0 ? memcap : default_memcap,
+ hashsize > 0 ? hashsize : default_hashsize);
+ if (dset == NULL) {
+ FatalErrorOnInit("failed to setup dataset for %s", set_name);
+ continue;
+ }
+ SCLogDebug("dataset %s: id %u type %s", set_name, dset->id, set_type->val);
+ dset->from_yaml = true;
+
+ } else if (strcmp(set_type->val, "string") == 0) {
+ Dataset *dset = DatasetGet(set_name, DATASET_TYPE_STRING, save, load,
+ memcap > 0 ? memcap : default_memcap,
+ hashsize > 0 ? hashsize : default_hashsize);
+ if (dset == NULL) {
+ FatalErrorOnInit("failed to setup dataset for %s", set_name);
+ continue;
+ }
+ SCLogDebug("dataset %s: id %u type %s", set_name, dset->id, set_type->val);
+ dset->from_yaml = true;
+
+ } else if (strcmp(set_type->val, "ipv4") == 0) {
+ Dataset *dset = DatasetGet(set_name, DATASET_TYPE_IPV4, save, load,
+ memcap > 0 ? memcap : default_memcap,
+ hashsize > 0 ? hashsize : default_hashsize);
+ if (dset == NULL) {
+ FatalErrorOnInit("failed to setup dataset for %s", set_name);
+ continue;
+ }
+ SCLogDebug("dataset %s: id %u type %s", set_name, dset->id, set_type->val);
+ dset->from_yaml = true;
+
+ } else if (strcmp(set_type->val, "ip") == 0) {
+ Dataset *dset = DatasetGet(set_name, DATASET_TYPE_IPV6, save, load,
+ memcap > 0 ? memcap : default_memcap,
+ hashsize > 0 ? hashsize : default_hashsize);
+ if (dset == NULL) {
+ FatalErrorOnInit("failed to setup dataset for %s", set_name);
+ continue;
+ }
+ SCLogDebug("dataset %s: id %u type %s", set_name, dset->id, set_type->val);
+ dset->from_yaml = true;
+ }
+
+ list_pos++;
+ }
+ }
+ SCLogDebug("datasets done: %p", datasets);
+ return 0;
+}
+
+void DatasetsDestroy(void)
+{
+ SCLogDebug("destroying datasets: %p", sets);
+ SCMutexLock(&sets_lock);
+ Dataset *set = sets;
+ while (set) {
+ SCLogDebug("destroying set %s", set->name);
+ Dataset *next = set->next;
+ THashShutdown(set->hash);
+ SCFree(set);
+ set = next;
+ }
+ sets = NULL;
+ SCMutexUnlock(&sets_lock);
+ SCLogDebug("destroying datasets done: %p", sets);
+}
+
+static int SaveCallback(void *ctx, const uint8_t *data, const uint32_t data_len)
+{
+ FILE *fp = ctx;
+ //PrintRawDataFp(fp, data, data_len);
+ if (fp) {
+ return fwrite(data, data_len, 1, fp);
+ }
+ return 0;
+}
+
+static int Md5AsAscii(const void *s, char *out, size_t out_size)
+{
+ const Md5Type *md5 = s;
+ char str[256];
+ PrintHexString(str, sizeof(str), (uint8_t *)md5->md5, sizeof(md5->md5));
+ strlcat(out, str, out_size);
+ strlcat(out, "\n", out_size);
+ return strlen(out);
+}
+
+static int Sha256AsAscii(const void *s, char *out, size_t out_size)
+{
+ const Sha256Type *sha = s;
+ char str[256];
+ PrintHexString(str, sizeof(str), (uint8_t *)sha->sha256, sizeof(sha->sha256));
+ strlcat(out, str, out_size);
+ strlcat(out, "\n", out_size);
+ return strlen(out);
+}
+
+static int IPv4AsAscii(const void *s, char *out, size_t out_size)
+{
+ const IPv4Type *ip4 = s;
+ char str[256];
+ PrintInet(AF_INET, ip4->ipv4, str, sizeof(str));
+ strlcat(out, str, out_size);
+ strlcat(out, "\n", out_size);
+ return strlen(out);
+}
+
+static int IPv6AsAscii(const void *s, char *out, size_t out_size)
+{
+ const IPv6Type *ip6 = s;
+ char str[256];
+ bool is_ipv4 = true;
+ for (int i = 4; i <= 15; i++) {
+ if (ip6->ipv6[i] != 0) {
+ is_ipv4 = false;
+ break;
+ }
+ }
+ if (is_ipv4) {
+ PrintInet(AF_INET, ip6->ipv6, str, sizeof(str));
+ } else {
+ PrintInet(AF_INET6, ip6->ipv6, str, sizeof(str));
+ }
+ strlcat(out, str, out_size);
+ strlcat(out, "\n", out_size);
+ return strlen(out);
+}
+
+void DatasetsSave(void)
+{
+ SCLogDebug("saving datasets: %p", sets);
+ SCMutexLock(&sets_lock);
+ Dataset *set = sets;
+ while (set) {
+ if (strlen(set->save) == 0)
+ goto next;
+
+ FILE *fp = fopen(set->save, "w");
+ if (fp == NULL)
+ goto next;
+
+ SCLogDebug("dumping %s to %s", set->name, set->save);
+
+ switch (set->type) {
+ case DATASET_TYPE_STRING:
+ THashWalk(set->hash, StringAsBase64, SaveCallback, fp);
+ break;
+ case DATASET_TYPE_MD5:
+ THashWalk(set->hash, Md5AsAscii, SaveCallback, fp);
+ break;
+ case DATASET_TYPE_SHA256:
+ THashWalk(set->hash, Sha256AsAscii, SaveCallback, fp);
+ break;
+ case DATASET_TYPE_IPV4:
+ THashWalk(set->hash, IPv4AsAscii, SaveCallback, fp);
+ break;
+ case DATASET_TYPE_IPV6:
+ THashWalk(set->hash, IPv6AsAscii, SaveCallback, fp);
+ break;
+ }
+
+ fclose(fp);
+
+ next:
+ set = set->next;
+ }
+ SCMutexUnlock(&sets_lock);
+}
+
+static int DatasetLookupString(Dataset *set, const uint8_t *data, const uint32_t data_len)
+{
+ if (set == NULL)
+ return -1;
+
+ StringType lookup = { .ptr = (uint8_t *)data, .len = data_len, .rep.value = 0 };
+ THashData *rdata = THashLookupFromHash(set->hash, &lookup);
+ if (rdata) {
+ DatasetUnlockData(rdata);
+ return 1;
+ }
+ return 0;
+}
+
+static DataRepResultType DatasetLookupStringwRep(Dataset *set,
+ const uint8_t *data, const uint32_t data_len, const DataRepType *rep)
+{
+ DataRepResultType rrep = { .found = false, .rep = { .value = 0 }};
+
+ if (set == NULL)
+ return rrep;
+
+ StringType lookup = { .ptr = (uint8_t *)data, .len = data_len, .rep = *rep };
+ THashData *rdata = THashLookupFromHash(set->hash, &lookup);
+ if (rdata) {
+ StringType *found = rdata->data;
+ rrep.found = true;
+ rrep.rep = found->rep;
+ DatasetUnlockData(rdata);
+ return rrep;
+ }
+ return rrep;
+}
+
+static int DatasetLookupIPv4(Dataset *set, const uint8_t *data, const uint32_t data_len)
+{
+ if (set == NULL)
+ return -1;
+
+ if (data_len != 4)
+ return -1;
+
+ IPv4Type lookup = { .rep.value = 0 };
+ memcpy(lookup.ipv4, data, 4);
+ THashData *rdata = THashLookupFromHash(set->hash, &lookup);
+ if (rdata) {
+ DatasetUnlockData(rdata);
+ return 1;
+ }
+ return 0;
+}
+
+static DataRepResultType DatasetLookupIPv4wRep(
+ Dataset *set, const uint8_t *data, const uint32_t data_len, const DataRepType *rep)
+{
+ DataRepResultType rrep = { .found = false, .rep = { .value = 0 } };
+
+ if (set == NULL)
+ return rrep;
+
+ if (data_len != 4)
+ return rrep;
+
+ IPv4Type lookup = { .rep.value = 0 };
+ memcpy(lookup.ipv4, data, data_len);
+ THashData *rdata = THashLookupFromHash(set->hash, &lookup);
+ if (rdata) {
+ IPv4Type *found = rdata->data;
+ rrep.found = true;
+ rrep.rep = found->rep;
+ DatasetUnlockData(rdata);
+ return rrep;
+ }
+ return rrep;
+}
+
+static int DatasetLookupIPv6(Dataset *set, const uint8_t *data, const uint32_t data_len)
+{
+ if (set == NULL)
+ return -1;
+
+ if (data_len != 16 && data_len != 4)
+ return -1;
+
+ IPv6Type lookup = { .rep.value = 0 };
+ memcpy(lookup.ipv6, data, data_len);
+ THashData *rdata = THashLookupFromHash(set->hash, &lookup);
+ if (rdata) {
+ DatasetUnlockData(rdata);
+ return 1;
+ }
+ return 0;
+}
+
+static DataRepResultType DatasetLookupIPv6wRep(
+ Dataset *set, const uint8_t *data, const uint32_t data_len, const DataRepType *rep)
+{
+ DataRepResultType rrep = { .found = false, .rep = { .value = 0 } };
+
+ if (set == NULL)
+ return rrep;
+
+ if (data_len != 16 && data_len != 4)
+ return rrep;
+
+ IPv6Type lookup = { .rep.value = 0 };
+ memcpy(lookup.ipv6, data, data_len);
+ THashData *rdata = THashLookupFromHash(set->hash, &lookup);
+ if (rdata) {
+ IPv6Type *found = rdata->data;
+ rrep.found = true;
+ rrep.rep = found->rep;
+ DatasetUnlockData(rdata);
+ return rrep;
+ }
+ return rrep;
+}
+
+static int DatasetLookupMd5(Dataset *set, const uint8_t *data, const uint32_t data_len)
+{
+ if (set == NULL)
+ return -1;
+
+ if (data_len != 16)
+ return -1;
+
+ Md5Type lookup = { .rep.value = 0 };
+ memcpy(lookup.md5, data, data_len);
+ THashData *rdata = THashLookupFromHash(set->hash, &lookup);
+ if (rdata) {
+ DatasetUnlockData(rdata);
+ return 1;
+ }
+ return 0;
+}
+
+static DataRepResultType DatasetLookupMd5wRep(Dataset *set,
+ const uint8_t *data, const uint32_t data_len, const DataRepType *rep)
+{
+ DataRepResultType rrep = { .found = false, .rep = { .value = 0 }};
+
+ if (set == NULL)
+ return rrep;
+
+ if (data_len != 16)
+ return rrep;
+
+ Md5Type lookup = { .rep.value = 0};
+ memcpy(lookup.md5, data, data_len);
+ THashData *rdata = THashLookupFromHash(set->hash, &lookup);
+ if (rdata) {
+ Md5Type *found = rdata->data;
+ rrep.found = true;
+ rrep.rep = found->rep;
+ DatasetUnlockData(rdata);
+ return rrep;
+ }
+ return rrep;
+}
+
+static int DatasetLookupSha256(Dataset *set, const uint8_t *data, const uint32_t data_len)
+{
+ if (set == NULL)
+ return -1;
+
+ if (data_len != 32)
+ return -1;
+
+ Sha256Type lookup = { .rep.value = 0 };
+ memcpy(lookup.sha256, data, data_len);
+ THashData *rdata = THashLookupFromHash(set->hash, &lookup);
+ if (rdata) {
+ DatasetUnlockData(rdata);
+ return 1;
+ }
+ return 0;
+}
+
+static DataRepResultType DatasetLookupSha256wRep(Dataset *set,
+ const uint8_t *data, const uint32_t data_len, const DataRepType *rep)
+{
+ DataRepResultType rrep = { .found = false, .rep = { .value = 0 }};
+
+ if (set == NULL)
+ return rrep;
+
+ if (data_len != 32)
+ return rrep;
+
+ Sha256Type lookup = { .rep.value = 0 };
+ memcpy(lookup.sha256, data, data_len);
+ THashData *rdata = THashLookupFromHash(set->hash, &lookup);
+ if (rdata) {
+ Sha256Type *found = rdata->data;
+ rrep.found = true;
+ rrep.rep = found->rep;
+ DatasetUnlockData(rdata);
+ return rrep;
+ }
+ return rrep;
+}
+
+/**
+ * \brief see if \a data is part of the set
+ * \param set dataset
+ * \param data data to look up
+ * \param data_len length in bytes of \a data
+ * \retval -1 error
+ * \retval 0 not found
+ * \retval 1 found
+ */
+int DatasetLookup(Dataset *set, const uint8_t *data, const uint32_t data_len)
+{
+ if (set == NULL)
+ return -1;
+
+ switch (set->type) {
+ case DATASET_TYPE_STRING:
+ return DatasetLookupString(set, data, data_len);
+ case DATASET_TYPE_MD5:
+ return DatasetLookupMd5(set, data, data_len);
+ case DATASET_TYPE_SHA256:
+ return DatasetLookupSha256(set, data, data_len);
+ case DATASET_TYPE_IPV4:
+ return DatasetLookupIPv4(set, data, data_len);
+ case DATASET_TYPE_IPV6:
+ return DatasetLookupIPv6(set, data, data_len);
+ }
+ return -1;
+}
+
+DataRepResultType DatasetLookupwRep(Dataset *set, const uint8_t *data, const uint32_t data_len,
+ const DataRepType *rep)
+{
+ DataRepResultType rrep = { .found = false, .rep = { .value = 0 }};
+ if (set == NULL)
+ return rrep;
+
+ switch (set->type) {
+ case DATASET_TYPE_STRING:
+ return DatasetLookupStringwRep(set, data, data_len, rep);
+ case DATASET_TYPE_MD5:
+ return DatasetLookupMd5wRep(set, data, data_len, rep);
+ case DATASET_TYPE_SHA256:
+ return DatasetLookupSha256wRep(set, data, data_len, rep);
+ case DATASET_TYPE_IPV4:
+ return DatasetLookupIPv4wRep(set, data, data_len, rep);
+ case DATASET_TYPE_IPV6:
+ return DatasetLookupIPv6wRep(set, data, data_len, rep);
+ }
+ return rrep;
+}
+
+/**
+ * \retval 1 data was added to the hash
+ * \retval 0 data was not added to the hash as it is already there
+ * \retval -1 failed to add data to the hash
+ */
+static int DatasetAddString(Dataset *set, const uint8_t *data, const uint32_t data_len)
+{
+ if (set == NULL)
+ return -1;
+
+ StringType lookup = { .ptr = (uint8_t *)data, .len = data_len,
+ .rep.value = 0 };
+ struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
+ if (res.data) {
+ DatasetUnlockData(res.data);
+ return res.is_new ? 1 : 0;
+ }
+ return -1;
+}
+
+/**
+ * \retval 1 data was added to the hash
+ * \retval 0 data was not added to the hash as it is already there
+ * \retval -1 failed to add data to the hash
+ */
+static int DatasetAddStringwRep(
+ Dataset *set, const uint8_t *data, const uint32_t data_len, const DataRepType *rep)
+{
+ if (set == NULL)
+ return -1;
+
+ StringType lookup = { .ptr = (uint8_t *)data, .len = data_len,
+ .rep = *rep };
+ struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
+ if (res.data) {
+ DatasetUnlockData(res.data);
+ return res.is_new ? 1 : 0;
+ }
+ return -1;
+}
+
+static int DatasetAddIPv4(Dataset *set, const uint8_t *data, const uint32_t data_len)
+{
+ if (set == NULL) {
+ return -1;
+ }
+
+ if (data_len < 4) {
+ return -2;
+ }
+
+ IPv4Type lookup = { .rep.value = 0 };
+ memcpy(lookup.ipv4, data, 4);
+ struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
+ if (res.data) {
+ DatasetUnlockData(res.data);
+ return res.is_new ? 1 : 0;
+ }
+ return -1;
+}
+
+static int DatasetAddIPv6(Dataset *set, const uint8_t *data, const uint32_t data_len)
+{
+ if (set == NULL) {
+ return -1;
+ }
+
+ if (data_len != 16) {
+ return -2;
+ }
+
+ IPv6Type lookup = { .rep.value = 0 };
+ memcpy(lookup.ipv6, data, 16);
+ struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
+ if (res.data) {
+ DatasetUnlockData(res.data);
+ return res.is_new ? 1 : 0;
+ }
+ return -1;
+}
+
+static int DatasetAddIPv4wRep(
+ Dataset *set, const uint8_t *data, const uint32_t data_len, const DataRepType *rep)
+{
+ if (set == NULL)
+ return -1;
+
+ if (data_len < 4)
+ return -2;
+
+ IPv4Type lookup = { .rep = *rep };
+ memcpy(lookup.ipv4, data, 4);
+ struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
+ if (res.data) {
+ DatasetUnlockData(res.data);
+ return res.is_new ? 1 : 0;
+ }
+ return -1;
+}
+
+static int DatasetAddIPv6wRep(
+ Dataset *set, const uint8_t *data, const uint32_t data_len, const DataRepType *rep)
+{
+ if (set == NULL)
+ return -1;
+
+ if (data_len != 16)
+ return -2;
+
+ IPv6Type lookup = { .rep = *rep };
+ memcpy(lookup.ipv6, data, 16);
+ struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
+ if (res.data) {
+ DatasetUnlockData(res.data);
+ return res.is_new ? 1 : 0;
+ }
+ return -1;
+}
+
+static int DatasetAddMd5(Dataset *set, const uint8_t *data, const uint32_t data_len)
+{
+ if (set == NULL)
+ return -1;
+
+ if (data_len != 16)
+ return -2;
+
+ Md5Type lookup = { .rep.value = 0 };
+ memcpy(lookup.md5, data, 16);
+ struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
+ if (res.data) {
+ DatasetUnlockData(res.data);
+ return res.is_new ? 1 : 0;
+ }
+ return -1;
+}
+
+static int DatasetAddMd5wRep(
+ Dataset *set, const uint8_t *data, const uint32_t data_len, const DataRepType *rep)
+{
+ if (set == NULL)
+ return -1;
+
+ if (data_len != 16)
+ return -2;
+
+ Md5Type lookup = { .rep = *rep };
+ memcpy(lookup.md5, data, 16);
+ struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
+ if (res.data) {
+ DatasetUnlockData(res.data);
+ return res.is_new ? 1 : 0;
+ }
+ return -1;
+}
+
+static int DatasetAddSha256wRep(
+ Dataset *set, const uint8_t *data, const uint32_t data_len, const DataRepType *rep)
+{
+ if (set == NULL)
+ return -1;
+
+ if (data_len != 32)
+ return -2;
+
+ Sha256Type lookup = { .rep = *rep };
+ memcpy(lookup.sha256, data, 32);
+ struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
+ if (res.data) {
+ DatasetUnlockData(res.data);
+ return res.is_new ? 1 : 0;
+ }
+ return -1;
+}
+
+static int DatasetAddSha256(Dataset *set, const uint8_t *data, const uint32_t data_len)
+{
+ if (set == NULL)
+ return -1;
+
+ if (data_len != 32)
+ return -2;
+
+ Sha256Type lookup = { .rep.value = 0 };
+ memcpy(lookup.sha256, data, 32);
+ struct THashDataGetResult res = THashGetFromHash(set->hash, &lookup);
+ if (res.data) {
+ DatasetUnlockData(res.data);
+ return res.is_new ? 1 : 0;
+ }
+ return -1;
+}
+
+int DatasetAdd(Dataset *set, const uint8_t *data, const uint32_t data_len)
+{
+ if (set == NULL)
+ return -1;
+
+ switch (set->type) {
+ case DATASET_TYPE_STRING:
+ return DatasetAddString(set, data, data_len);
+ case DATASET_TYPE_MD5:
+ return DatasetAddMd5(set, data, data_len);
+ case DATASET_TYPE_SHA256:
+ return DatasetAddSha256(set, data, data_len);
+ case DATASET_TYPE_IPV4:
+ return DatasetAddIPv4(set, data, data_len);
+ case DATASET_TYPE_IPV6:
+ return DatasetAddIPv6(set, data, data_len);
+ }
+ return -1;
+}
+
+static int DatasetAddwRep(Dataset *set, const uint8_t *data, const uint32_t data_len,
+ DataRepType *rep)
+{
+ if (set == NULL)
+ return -1;
+
+ switch (set->type) {
+ case DATASET_TYPE_STRING:
+ return DatasetAddStringwRep(set, data, data_len, rep);
+ case DATASET_TYPE_MD5:
+ return DatasetAddMd5wRep(set, data, data_len, rep);
+ case DATASET_TYPE_SHA256:
+ return DatasetAddSha256wRep(set, data, data_len, rep);
+ case DATASET_TYPE_IPV4:
+ return DatasetAddIPv4wRep(set, data, data_len, rep);
+ case DATASET_TYPE_IPV6:
+ return DatasetAddIPv6wRep(set, data, data_len, rep);
+ }
+ return -1;
+}
+
+typedef int (*DatasetOpFunc)(Dataset *set, const uint8_t *data, const uint32_t data_len);
+
+static int DatasetOpSerialized(Dataset *set, const char *string, DatasetOpFunc DatasetOpString,
+ DatasetOpFunc DatasetOpMd5, DatasetOpFunc DatasetOpSha256, DatasetOpFunc DatasetOpIPv4,
+ DatasetOpFunc DatasetOpIPv6)
+{
+ if (set == NULL)
+ return -1;
+
+ switch (set->type) {
+ case DATASET_TYPE_STRING: {
+ // coverity[alloc_strlen : FALSE]
+ uint8_t decoded[strlen(string)];
+ uint32_t consumed = 0, num_decoded = 0;
+ Base64Ecode code = DecodeBase64(decoded, strlen(string), (const uint8_t *)string,
+ strlen(string), &consumed, &num_decoded, BASE64_MODE_STRICT);
+ if (code == BASE64_ECODE_ERR) {
+ return -2;
+ }
+
+ return DatasetOpString(set, decoded, num_decoded);
+ }
+ case DATASET_TYPE_MD5: {
+ if (strlen(string) != 32)
+ return -2;
+ uint8_t hash[16];
+ if (HexToRaw((const uint8_t *)string, 32, hash, sizeof(hash)) < 0)
+ return -2;
+ return DatasetOpMd5(set, hash, 16);
+ }
+ case DATASET_TYPE_SHA256: {
+ if (strlen(string) != 64)
+ return -2;
+ uint8_t hash[32];
+ if (HexToRaw((const uint8_t *)string, 64, hash, sizeof(hash)) < 0)
+ return -2;
+ return DatasetOpSha256(set, hash, 32);
+ }
+ case DATASET_TYPE_IPV4: {
+ struct in_addr in;
+ if (inet_pton(AF_INET, string, &in) != 1)
+ return -2;
+ return DatasetOpIPv4(set, (uint8_t *)&in.s_addr, 4);
+ }
+ case DATASET_TYPE_IPV6: {
+ struct in6_addr in;
+ if (inet_pton(AF_INET6, string, &in) != 1)
+ return -2;
+ return DatasetOpIPv6(set, (uint8_t *)&in.s6_addr, 16);
+ }
+ }
+ return -1;
+}
+
+/** \brief add serialized data to set
+ * \retval int 1 added
+ * \retval int 0 already in hash
+ * \retval int -1 API error (not added)
+ * \retval int -2 DATA error
+ */
+int DatasetAddSerialized(Dataset *set, const char *string)
+{
+ return DatasetOpSerialized(set, string, DatasetAddString, DatasetAddMd5, DatasetAddSha256,
+ DatasetAddIPv4, DatasetAddIPv6);
+}
+
+/** \brief add serialized data to set
+ * \retval int 1 added
+ * \retval int 0 already in hash
+ * \retval int -1 API error (not added)
+ * \retval int -2 DATA error
+ */
+int DatasetLookupSerialized(Dataset *set, const char *string)
+{
+ return DatasetOpSerialized(set, string, DatasetLookupString, DatasetLookupMd5,
+ DatasetLookupSha256, DatasetLookupIPv4, DatasetLookupIPv6);
+}
+
+/**
+ * \retval 1 data was removed from the hash
+ * \retval 0 data not removed (busy)
+ * \retval -1 data not found
+ */
+static int DatasetRemoveString(Dataset *set, const uint8_t *data, const uint32_t data_len)
+{
+ if (set == NULL)
+ return -1;
+
+ StringType lookup = { .ptr = (uint8_t *)data, .len = data_len,
+ .rep.value = 0 };
+ return THashRemoveFromHash(set->hash, &lookup);
+}
+
+static int DatasetRemoveIPv4(Dataset *set, const uint8_t *data, const uint32_t data_len)
+{
+ if (set == NULL)
+ return -1;
+
+ if (data_len != 4)
+ return -2;
+
+ IPv4Type lookup = { .rep.value = 0 };
+ memcpy(lookup.ipv4, data, 4);
+ return THashRemoveFromHash(set->hash, &lookup);
+}
+
+static int DatasetRemoveIPv6(Dataset *set, const uint8_t *data, const uint32_t data_len)
+{
+ if (set == NULL)
+ return -1;
+
+ if (data_len != 16)
+ return -2;
+
+ IPv6Type lookup = { .rep.value = 0 };
+ memcpy(lookup.ipv6, data, 16);
+ return THashRemoveFromHash(set->hash, &lookup);
+}
+
+static int DatasetRemoveMd5(Dataset *set, const uint8_t *data, const uint32_t data_len)
+{
+ if (set == NULL)
+ return -1;
+
+ if (data_len != 16)
+ return -2;
+
+ Md5Type lookup = { .rep.value = 0 };
+ memcpy(lookup.md5, data, 16);
+ return THashRemoveFromHash(set->hash, &lookup);
+}
+
+static int DatasetRemoveSha256(Dataset *set, const uint8_t *data, const uint32_t data_len)
+{
+ if (set == NULL)
+ return -1;
+
+ if (data_len != 32)
+ return -2;
+
+ Sha256Type lookup = { .rep.value = 0 };
+ memcpy(lookup.sha256, data, 32);
+ return THashRemoveFromHash(set->hash, &lookup);
+}
+
+/** \brief remove serialized data from set
+ * \retval int 1 removed
+ * \retval int 0 found but busy (not removed)
+ * \retval int -1 API error (not removed)
+ * \retval int -2 DATA error */
+int DatasetRemoveSerialized(Dataset *set, const char *string)
+{
+ return DatasetOpSerialized(set, string, DatasetRemoveString, DatasetRemoveMd5,
+ DatasetRemoveSha256, DatasetRemoveIPv4, DatasetRemoveIPv6);
+}