diff options
Diffstat (limited to 'src/knot/modules/rrl')
-rw-r--r-- | src/knot/modules/rrl/Makefile.inc | 15 | ||||
-rw-r--r-- | src/knot/modules/rrl/functions.c | 554 | ||||
-rw-r--r-- | src/knot/modules/rrl/functions.h | 111 | ||||
-rw-r--r-- | src/knot/modules/rrl/rrl.c | 208 | ||||
-rw-r--r-- | src/knot/modules/rrl/rrl.rst | 133 |
5 files changed, 1021 insertions, 0 deletions
diff --git a/src/knot/modules/rrl/Makefile.inc b/src/knot/modules/rrl/Makefile.inc new file mode 100644 index 0000000..d82edf9 --- /dev/null +++ b/src/knot/modules/rrl/Makefile.inc @@ -0,0 +1,15 @@ +knot_modules_rrl_la_SOURCES = knot/modules/rrl/rrl.c \ + knot/modules/rrl/functions.c \ + knot/modules/rrl/functions.h +EXTRA_DIST += knot/modules/rrl/rrl.rst + +if STATIC_MODULE_rrl +libknotd_la_SOURCES += $(knot_modules_rrl_la_SOURCES) +endif + +if SHARED_MODULE_rrl +knot_modules_rrl_la_LDFLAGS = $(KNOTD_MOD_LDFLAGS) +knot_modules_rrl_la_CPPFLAGS = $(KNOTD_MOD_CPPFLAGS) +knot_modules_rrl_la_LIBADD = $(libcontrib_LIBS) +pkglib_LTLIBRARIES += knot/modules/rrl.la +endif diff --git a/src/knot/modules/rrl/functions.c b/src/knot/modules/rrl/functions.c new file mode 100644 index 0000000..df35394 --- /dev/null +++ b/src/knot/modules/rrl/functions.c @@ -0,0 +1,554 @@ +/* Copyright (C) 2023 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. + */ + +#include <assert.h> +#include <time.h> + +#include "knot/modules/rrl/functions.h" +#include "contrib/musl/inet_ntop.h" +#include "contrib/openbsd/strlcat.h" +#include "contrib/sockaddr.h" +#include "contrib/time.h" +#include "libdnssec/error.h" +#include "libdnssec/random.h" + +/* Hopscotch defines. */ +#define HOP_LEN (sizeof(unsigned)*8) +/* Limits (class, ipv6 remote, dname) */ +#define RRL_CLSBLK_MAXLEN (1 + 8 + 255) +/* CIDR block prefix lengths for v4/v6 */ +#define RRL_V4_PREFIX_LEN 3 /* /24 */ +#define RRL_V6_PREFIX_LEN 7 /* /56 */ +/* Defaults */ +#define RRL_SSTART 2 /* 1/Nth of the rate for slow start */ +#define RRL_PSIZE_LARGE 1024 +#define RRL_CAPACITY 4 /* Window size in seconds */ +#define RRL_LOCK_GRANULARITY 32 /* Last digit granularity */ + +/* Classification */ +enum { + CLS_NULL = 0 << 0, /* Empty bucket. */ + CLS_NORMAL = 1 << 0, /* Normal response. */ + CLS_ERROR = 1 << 1, /* Error response. */ + CLS_NXDOMAIN = 1 << 2, /* NXDOMAIN (special case of error). */ + CLS_EMPTY = 1 << 3, /* Empty response. */ + CLS_LARGE = 1 << 4, /* Response size over threshold (1024k). */ + CLS_WILDCARD = 1 << 5, /* Wildcard query. */ + CLS_ANY = 1 << 6, /* ANY query (spec. class). */ + CLS_DNSSEC = 1 << 7 /* DNSSEC related RR query (spec. class) */ +}; + +/* Classification string. */ +struct cls_name { + int code; + const char *name; +}; + +static const struct cls_name rrl_cls_names[] = { + { CLS_NORMAL, "POSITIVE" }, + { CLS_ERROR, "ERROR" }, + { CLS_NXDOMAIN, "NXDOMAIN"}, + { CLS_EMPTY, "EMPTY"}, + { CLS_LARGE, "LARGE"}, + { CLS_WILDCARD, "WILDCARD"}, + { CLS_ANY, "ANY"}, + { CLS_DNSSEC, "DNSSEC"}, + { CLS_NULL, "NULL"}, + { CLS_NULL, NULL} +}; + +static inline const char *rrl_clsstr(int code) +{ + for (const struct cls_name *c = rrl_cls_names; c->name; c++) { + if (c->code == code) { + return c->name; + } + } + + return "unknown class"; +} + +/* Bucket flags. */ +enum { + RRL_BF_NULL = 0 << 0, /* No flags. */ + RRL_BF_SSTART = 1 << 0, /* Bucket in slow-start after collision. */ + RRL_BF_ELIMIT = 1 << 1 /* Bucket is rate-limited. */ +}; + +static uint8_t rrl_clsid(rrl_req_t *p) +{ + /* Check error code */ + int ret = CLS_NULL; + switch (knot_wire_get_rcode(p->wire)) { + case KNOT_RCODE_NOERROR: ret = CLS_NORMAL; break; + case KNOT_RCODE_NXDOMAIN: return CLS_NXDOMAIN; break; + default: return CLS_ERROR; break; + } + + /* Check if answered from a qname */ + if (ret == CLS_NORMAL && p->flags & RRL_REQ_WILDCARD) { + return CLS_WILDCARD; + } + + /* Check query type for spec. classes. */ + if (p->query) { + switch(knot_pkt_qtype(p->query)) { + case KNOT_RRTYPE_ANY: /* ANY spec. class */ + return CLS_ANY; + break; + case KNOT_RRTYPE_DNSKEY: + case KNOT_RRTYPE_RRSIG: + case KNOT_RRTYPE_DS: /* DNSSEC-related RR class. */ + return CLS_DNSSEC; + break; + default: + break; + } + } + + /* Check packet size for threshold. */ + if (p->len >= RRL_PSIZE_LARGE) { + return CLS_LARGE; + } + + /* Check ancount */ + if (knot_wire_get_ancount(p->wire) == 0) { + return CLS_EMPTY; + } + + return ret; +} + +static int rrl_clsname(uint8_t *dst, size_t maxlen, uint8_t cls, rrl_req_t *req, + const knot_dname_t *name) +{ + if (name == NULL) { + /* Fallback for errors etc. */ + name = (const knot_dname_t *)"\x00"; + } + + switch (cls) { + case CLS_ERROR: /* Could be a non-existent zone or garbage. */ + case CLS_NXDOMAIN: /* Queries to non-existent names in zone. */ + case CLS_WILDCARD: /* Queries to names covered by a wildcard. */ + break; + default: + /* Use QNAME */ + if (req->query) { + name = knot_pkt_qname(req->query); + } + break; + } + + /* Write to wire */ + return knot_dname_to_wire(dst, name, maxlen); +} + +static int rrl_classify(uint8_t *dst, size_t maxlen, const struct sockaddr_storage *remote, + rrl_req_t *req, const knot_dname_t *name) +{ + /* Class */ + uint8_t cls = rrl_clsid(req); + *dst = cls; + int blklen = sizeof(cls); + + /* Address (in network byteorder, adjust masks). */ + uint64_t netblk = 0; + if (remote->ss_family == AF_INET6) { + struct sockaddr_in6 *ipv6 = (struct sockaddr_in6 *)remote; + memcpy(&netblk, &ipv6->sin6_addr, RRL_V6_PREFIX_LEN); + } else { + struct sockaddr_in *ipv4 = (struct sockaddr_in *)remote; + memcpy(&netblk, &ipv4->sin_addr, RRL_V4_PREFIX_LEN); + } + memcpy(dst + blklen, &netblk, sizeof(netblk)); + blklen += sizeof(netblk); + + /* Name */ + int ret = rrl_clsname(dst + blklen, maxlen - blklen, cls, req, name); + if (ret < 0) { + return ret; + } + uint8_t len = ret; + blklen += len; + + return blklen; +} + +static int bucket_free(rrl_item_t *bucket, uint32_t now) +{ + return bucket->cls == CLS_NULL || (bucket->time + 1 < now); +} + +static int bucket_match(rrl_item_t *bucket, rrl_item_t *match) +{ + return bucket->cls == match->cls && + bucket->netblk == match->netblk && + bucket->qname == match->qname; +} + +static int find_free(rrl_table_t *tbl, unsigned id, uint32_t now) +{ + for (int i = id; i < tbl->size; i++) { + if (bucket_free(&tbl->arr[i], now)) { + return i - id; + } + } + for (int i = 0; i < id; i++) { + if (bucket_free(&tbl->arr[i], now)) { + return i + (tbl->size - id); + } + } + + /* this happens if table is full... force vacate current elm */ + return id; +} + +static inline unsigned find_match(rrl_table_t *tbl, uint32_t id, rrl_item_t *m) +{ + unsigned new_id = 0; + unsigned hop = 0; + unsigned match_bitmap = tbl->arr[id].hop; + while (match_bitmap != 0) { + hop = __builtin_ctz(match_bitmap); /* offset of next potential match */ + new_id = (id + hop) % tbl->size; + if (bucket_match(&tbl->arr[new_id], m)) { + return hop; + } else { + match_bitmap &= ~(1 << hop); /* clear potential match */ + } + } + + return HOP_LEN + 1; +} + +static inline unsigned reduce_dist(rrl_table_t *tbl, unsigned id, unsigned dist, unsigned *free_id) +{ + unsigned rd = HOP_LEN - 1; + while (rd > 0) { + unsigned vacate_id = (tbl->size + *free_id - rd) % tbl->size; /* bucket to be vacated */ + if (tbl->arr[vacate_id].hop != 0) { + unsigned hop = __builtin_ctz(tbl->arr[vacate_id].hop); /* offset of first valid bucket */ + if (hop < rd) { /* only offsets in <vacate_id, free_id> are interesting */ + unsigned new_id = (vacate_id + hop) % tbl->size; /* this item will be displaced to [free_id] */ + unsigned keep_hop = tbl->arr[*free_id].hop; /* unpredictable padding */ + memcpy(tbl->arr + *free_id, tbl->arr + new_id, sizeof(rrl_item_t)); + tbl->arr[*free_id].hop = keep_hop; + tbl->arr[new_id].cls = CLS_NULL; + tbl->arr[vacate_id].hop &= ~(1 << hop); + tbl->arr[vacate_id].hop |= 1 << rd; + *free_id = new_id; + return dist - (rd - hop); + } + } + --rd; + } + + assert(rd == 0); /* this happens with p=1/fact(HOP_LEN) */ + *free_id = id; + dist = 0; /* force vacate initial element */ + return dist; +} + +static void subnet_tostr(char *dst, size_t maxlen, const struct sockaddr_storage *ss) +{ + const void *addr; + const char *suffix; + + if (ss->ss_family == AF_INET6) { + addr = &((struct sockaddr_in6 *)ss)->sin6_addr; + suffix = "/56"; + } else { + addr = &((struct sockaddr_in *)ss)->sin_addr; + suffix = "/24"; + } + + if (knot_inet_ntop(ss->ss_family, addr, dst, maxlen) != NULL) { + strlcat(dst, suffix, maxlen); + } else { + dst[0] = '\0'; + } +} + +static void rrl_log_state(knotd_mod_t *mod, const struct sockaddr_storage *ss, + uint16_t flags, uint8_t cls, const knot_dname_t *qname) +{ + if (mod == NULL || ss == NULL) { + return; + } + + char addr_str[SOCKADDR_STRLEN]; + subnet_tostr(addr_str, sizeof(addr_str), ss); + + const char *what = "leaves"; + if (flags & RRL_BF_ELIMIT) { + what = "enters"; + } + + knot_dname_txt_storage_t buf; + char *qname_str = knot_dname_to_str(buf, qname, sizeof(buf)); + if (qname_str == NULL) { + qname_str = "?"; + } + + knotd_mod_log(mod, LOG_NOTICE, "address/subnet %s, class %s, qname %s, %s limiting", + addr_str, rrl_clsstr(cls), qname_str, what); +} + +static void rrl_lock(rrl_table_t *tbl, int lk_id) +{ + assert(lk_id > -1); + pthread_mutex_lock(tbl->lk + lk_id); +} + +static void rrl_unlock(rrl_table_t *tbl, int lk_id) +{ + assert(lk_id > -1); + pthread_mutex_unlock(tbl->lk + lk_id); +} + +static int rrl_setlocks(rrl_table_t *tbl, uint32_t granularity) +{ + assert(!tbl->lk); /* Cannot change while locks are used. */ + assert(granularity <= tbl->size / 10); /* Due to int. division err. */ + + if (pthread_mutex_init(&tbl->ll, NULL) < 0) { + return KNOT_ENOMEM; + } + + /* Alloc new locks. */ + tbl->lk = malloc(granularity * sizeof(pthread_mutex_t)); + if (!tbl->lk) { + return KNOT_ENOMEM; + } + memset(tbl->lk, 0, granularity * sizeof(pthread_mutex_t)); + + /* Initialize. */ + for (size_t i = 0; i < granularity; ++i) { + if (pthread_mutex_init(tbl->lk + i, NULL) < 0) { + break; + } + ++tbl->lk_count; + } + + /* Incomplete initialization */ + if (tbl->lk_count != granularity) { + for (size_t i = 0; i < tbl->lk_count; ++i) { + pthread_mutex_destroy(tbl->lk + i); + } + free(tbl->lk); + tbl->lk_count = 0; + return KNOT_ERROR; + } + + return KNOT_EOK; +} + +rrl_table_t *rrl_create(size_t size, uint32_t rate) +{ + if (size == 0) { + return NULL; + } + + const size_t tbl_len = sizeof(rrl_table_t) + size * sizeof(rrl_item_t); + rrl_table_t *tbl = calloc(1, tbl_len); + if (!tbl) { + return NULL; + } + tbl->size = size; + tbl->rate = rate; + + if (dnssec_random_buffer((uint8_t *)&tbl->key, sizeof(tbl->key)) != DNSSEC_EOK) { + free(tbl); + return NULL; + } + + if (rrl_setlocks(tbl, RRL_LOCK_GRANULARITY) != KNOT_EOK) { + free(tbl); + return NULL; + } + + return tbl; +} + +static knot_dname_t *buf_qname(uint8_t *buf) +{ + return buf + sizeof(uint8_t) + sizeof(uint64_t); +} + +/*! \brief Get bucket for current combination of parameters. */ +static rrl_item_t *rrl_hash(rrl_table_t *tbl, const struct sockaddr_storage *remote, + rrl_req_t *req, const knot_dname_t *zone, uint32_t stamp, + int *lock, uint8_t *buf, size_t buf_len) +{ + int len = rrl_classify(buf, buf_len, remote, req, zone); + if (len < 0) { + return NULL; + } + + uint32_t id = SipHash24(&tbl->key, buf, len) % tbl->size; + + /* Lock for lookup. */ + pthread_mutex_lock(&tbl->ll); + + /* Find an exact match in <id, id + HOP_LEN). */ + knot_dname_t *qname = buf_qname(buf); + uint64_t netblk; + memcpy(&netblk, buf + sizeof(uint8_t), sizeof(netblk)); + rrl_item_t match = { + .hop = 0, + .netblk = netblk, + .ntok = tbl->rate * RRL_CAPACITY, + .cls = buf[0], + .flags = RRL_BF_NULL, + .qname = SipHash24(&tbl->key, qname, knot_dname_size(qname)), + .time = stamp + }; + + unsigned dist = find_match(tbl, id, &match); + if (dist > HOP_LEN) { /* not an exact match, find free element [f] */ + dist = find_free(tbl, id, stamp); + } + + /* Reduce distance to fit <id, id + HOP_LEN) */ + unsigned free_id = (id + dist) % tbl->size; + while (dist >= HOP_LEN) { + dist = reduce_dist(tbl, id, dist, &free_id); + } + + /* Assign granular lock and unlock lookup. */ + *lock = free_id % tbl->lk_count; + rrl_lock(tbl, *lock); + pthread_mutex_unlock(&tbl->ll); + + /* found free bucket which is in <id, id + HOP_LEN) */ + tbl->arr[id].hop |= (1 << dist); + rrl_item_t *bucket = &tbl->arr[free_id]; + assert(free_id == (id + dist) % tbl->size); + + /* Inspect bucket state. */ + unsigned hop = bucket->hop; + if (bucket->cls == CLS_NULL) { + memcpy(bucket, &match, sizeof(rrl_item_t)); + bucket->hop = hop; + } + /* Check for collisions. */ + if (!bucket_match(bucket, &match)) { + if (!(bucket->flags & RRL_BF_SSTART)) { + memcpy(bucket, &match, sizeof(rrl_item_t)); + bucket->hop = hop; + bucket->ntok = tbl->rate + tbl->rate / RRL_SSTART; + bucket->flags |= RRL_BF_SSTART; + } + } + + return bucket; +} + +int rrl_query(rrl_table_t *rrl, const struct sockaddr_storage *remote, + rrl_req_t *req, const knot_dname_t *zone, knotd_mod_t *mod) +{ + if (!rrl || !req || !remote) { + return KNOT_EINVAL; + } + + uint8_t buf[RRL_CLSBLK_MAXLEN]; + + /* Calculate hash and fetch */ + int ret = KNOT_EOK; + int lock = -1; + uint32_t now = time_now().tv_sec; + rrl_item_t *bucket = rrl_hash(rrl, remote, req, zone, now, &lock, buf, sizeof(buf)); + if (!bucket) { + if (lock > -1) { + rrl_unlock(rrl, lock); + } + return KNOT_ERROR; + } + + /* Calculate rate for dT */ + uint32_t dt = now - bucket->time; + if (dt > RRL_CAPACITY) { + dt = RRL_CAPACITY; + } + /* Visit bucket. */ + bucket->time = now; + if (dt > 0) { /* Window moved. */ + + /* Check state change. */ + if ((bucket->ntok > 0 || dt > 1) && (bucket->flags & RRL_BF_ELIMIT)) { + bucket->flags &= ~RRL_BF_ELIMIT; + rrl_log_state(mod, remote, bucket->flags, bucket->cls, + knot_pkt_qname(req->query)); + } + + /* Add new tokens. */ + uint32_t dn = rrl->rate * dt; + if (bucket->flags & RRL_BF_SSTART) { /* Bucket in slow-start. */ + bucket->flags &= ~RRL_BF_SSTART; + } + bucket->ntok += dn; + if (bucket->ntok > RRL_CAPACITY * rrl->rate) { + bucket->ntok = RRL_CAPACITY * rrl->rate; + } + } + + /* Last item taken. */ + if (bucket->ntok == 1 && !(bucket->flags & RRL_BF_ELIMIT)) { + bucket->flags |= RRL_BF_ELIMIT; + rrl_log_state(mod, remote, bucket->flags, bucket->cls, + knot_pkt_qname(req->query)); + } + + /* Decay current bucket. */ + if (bucket->ntok > 0) { + --bucket->ntok; + } else if (bucket->ntok == 0) { + ret = KNOT_ELIMIT; + } + + if (lock > -1) { + rrl_unlock(rrl, lock); + } + return ret; +} + +bool rrl_slip_roll(int n_slip) +{ + switch (n_slip) { + case 0: + return false; + case 1: + return true; + default: + return (dnssec_random_uint16_t() % n_slip == 0); + } +} + +void rrl_destroy(rrl_table_t *rrl) +{ + if (rrl) { + if (rrl->lk_count > 0) { + pthread_mutex_destroy(&rrl->ll); + } + for (size_t i = 0; i < rrl->lk_count; ++i) { + pthread_mutex_destroy(rrl->lk + i); + } + free(rrl->lk); + } + + free(rrl); +} diff --git a/src/knot/modules/rrl/functions.h b/src/knot/modules/rrl/functions.h new file mode 100644 index 0000000..0f09234 --- /dev/null +++ b/src/knot/modules/rrl/functions.h @@ -0,0 +1,111 @@ +/* Copyright (C) 2019 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. + */ + +#pragma once + +#include <stdint.h> +#include <pthread.h> +#include <sys/socket.h> + +#include "libknot/libknot.h" +#include "knot/include/module.h" +#include "contrib/openbsd/siphash.h" + +/*! + * \brief RRL hash bucket. + */ +typedef struct { + unsigned hop; /* Hop bitmap. */ + uint64_t netblk; /* Prefix associated. */ + uint16_t ntok; /* Tokens available. */ + uint8_t cls; /* Bucket class. */ + uint8_t flags; /* Flags. */ + uint32_t qname; /* imputed(QNAME) hash. */ + uint32_t time; /* Timestamp. */ +} rrl_item_t; + +/*! + * \brief RRL hash bucket table. + * + * Table is fixed size, so collisions may occur and are dealt with + * in a way, that hashbucket rate is reset and enters slow-start for 1 dt. + * When a bucket is in a slow-start mode, it cannot reset again for the time + * period. + * + * To avoid lock contention, N locks are created and distributed amongst buckets. + * As of now lock K for bucket N is calculated as K = N % (num_buckets). + */ + +typedef struct { + SIPHASH_KEY key; /* Siphash key. */ + uint32_t rate; /* Configured RRL limit. */ + pthread_mutex_t ll; + pthread_mutex_t *lk; /* Table locks. */ + unsigned lk_count; /* Table lock count (granularity). */ + size_t size; /* Number of buckets. */ + rrl_item_t arr[]; /* Buckets. */ +} rrl_table_t; + +/*! \brief RRL request flags. */ +typedef enum { + RRL_REQ_NOFLAG = 0 << 0, /*!< No flags. */ + RRL_REQ_WILDCARD = 1 << 1 /*!< Query to wildcard name. */ +} rrl_req_flag_t; + +/*! + * \brief RRL request descriptor. + */ +typedef struct { + const uint8_t *wire; + uint16_t len; + rrl_req_flag_t flags; + knot_pkt_t *query; +} rrl_req_t; + +/*! + * \brief Create a RRL table. + * \param size Fixed hashtable size (reasonable large prime is recommended). + * \param rate Rate (in pkts/sec). + * \return created table or NULL. + */ +rrl_table_t *rrl_create(size_t size, uint32_t rate); + +/*! + * \brief Query the RRL table for accept or deny, when the rate limit is reached. + * + * \param rrl RRL table. + * \param remote Source address. + * \param req RRL request (containing resp., flags and question). + * \param zone Zone name related to the response (or NULL). + * \param mod Query module (needed for logging). + * \retval KNOT_EOK if passed. + * \retval KNOT_ELIMIT when the limit is reached. + */ +int rrl_query(rrl_table_t *rrl, const struct sockaddr_storage *remote, + rrl_req_t *req, const knot_dname_t *zone, knotd_mod_t *mod); + +/*! + * \brief Roll a dice whether answer slips or not. + * \param n_slip Number represents every Nth answer that is slipped. + * \return true or false + */ +bool rrl_slip_roll(int n_slip); + +/*! + * \brief Destroy RRL table. + * \param rrl RRL table. + */ +void rrl_destroy(rrl_table_t *rrl); diff --git a/src/knot/modules/rrl/rrl.c b/src/knot/modules/rrl/rrl.c new file mode 100644 index 0000000..64f6cbf --- /dev/null +++ b/src/knot/modules/rrl/rrl.c @@ -0,0 +1,208 @@ +/* Copyright (C) 2022 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. + */ + +#include "knot/include/module.h" +#include "knot/nameserver/process_query.h" // Dependency on qdata->extra! +#include "knot/modules/rrl/functions.h" + +#define MOD_RATE_LIMIT "\x0A""rate-limit" +#define MOD_SLIP "\x04""slip" +#define MOD_TBL_SIZE "\x0A""table-size" +#define MOD_WHITELIST "\x09""whitelist" + +const yp_item_t rrl_conf[] = { + { MOD_RATE_LIMIT, YP_TINT, YP_VINT = { 1, INT32_MAX } }, + { MOD_SLIP, YP_TINT, YP_VINT = { 0, 100, 1 } }, + { MOD_TBL_SIZE, YP_TINT, YP_VINT = { 1, INT32_MAX, 393241 } }, + { MOD_WHITELIST, YP_TNET, YP_VNONE, YP_FMULTI }, + { NULL } +}; + +int rrl_conf_check(knotd_conf_check_args_t *args) +{ + knotd_conf_t limit = knotd_conf_check_item(args, MOD_RATE_LIMIT); + if (limit.count == 0) { + args->err_str = "no rate limit specified"; + return KNOT_EINVAL; + } + + return KNOT_EOK; +} + +typedef struct { + rrl_table_t *rrl; + int slip; + knotd_conf_t whitelist; +} rrl_ctx_t; + +static const knot_dname_t *name_from_rrsig(const knot_rrset_t *rr) +{ + if (rr == NULL) { + return NULL; + } + if (rr->type != KNOT_RRTYPE_RRSIG) { + return NULL; + } + + // This is a signature. + return knot_rrsig_signer_name(rr->rrs.rdata); +} + +static const knot_dname_t *name_from_authrr(const knot_rrset_t *rr) +{ + if (rr == NULL) { + return NULL; + } + if (rr->type != KNOT_RRTYPE_NS && rr->type != KNOT_RRTYPE_SOA) { + return NULL; + } + + // This is a valid authority RR. + return rr->owner; +} + +static knotd_state_t ratelimit_apply(knotd_state_t state, knot_pkt_t *pkt, + knotd_qdata_t *qdata, knotd_mod_t *mod) +{ + assert(pkt && qdata && mod); + + rrl_ctx_t *ctx = knotd_mod_ctx(mod); + + // Rate limit is applied to pure UDP only. + if (qdata->params->proto != KNOTD_QUERY_PROTO_UDP) { + return state; + } + + // Rate limit is not applied to responses with a valid cookie. + if (qdata->params->flags & KNOTD_QUERY_FLAG_COOKIE) { + return state; + } + + // Exempt clients. + if (knotd_conf_addr_range_match(&ctx->whitelist, knotd_qdata_remote_addr(qdata))) { + return state; + } + + rrl_req_t req = { + .wire = pkt->wire, + .query = qdata->query + }; + + if (!EMPTY_LIST(qdata->extra->wildcards)) { + req.flags = RRL_REQ_WILDCARD; + } + + // Take the zone name if known. + const knot_dname_t *zone_name = knotd_qdata_zone_name(qdata); + + // Take the signer name as zone name if there is an RRSIG. + if (zone_name == NULL) { + const knot_pktsection_t *ans = knot_pkt_section(pkt, KNOT_ANSWER); + for (int i = 0; i < ans->count; i++) { + zone_name = name_from_rrsig(knot_pkt_rr(ans, i)); + if (zone_name != NULL) { + break; + } + } + } + + // Take the NS or SOA owner name if there is no RRSIG. + if (zone_name == NULL) { + const knot_pktsection_t *auth = knot_pkt_section(pkt, KNOT_AUTHORITY); + for (int i = 0; i < auth->count; i++) { + zone_name = name_from_authrr(knot_pkt_rr(auth, i)); + if (zone_name != NULL) { + break; + } + } + } + + if (rrl_query(ctx->rrl, knotd_qdata_remote_addr(qdata), &req, zone_name, mod) == KNOT_EOK) { + // Rate limiting not applied. + return state; + } + + if (rrl_slip_roll(ctx->slip)) { + // Slip the answer. + knotd_mod_stats_incr(mod, qdata->params->thread_id, 0, 0, 1); + qdata->err_truncated = true; + return KNOTD_STATE_FAIL; + } else { + // Drop the answer. + knotd_mod_stats_incr(mod, qdata->params->thread_id, 1, 0, 1); + return KNOTD_STATE_NOOP; + } +} + +static void ctx_free(rrl_ctx_t *ctx) +{ + assert(ctx); + + rrl_destroy(ctx->rrl); + free(ctx); +} + +int rrl_load(knotd_mod_t *mod) +{ + // Create RRL context. + rrl_ctx_t *ctx = calloc(1, sizeof(rrl_ctx_t)); + if (ctx == NULL) { + return KNOT_ENOMEM; + } + + // Create table. + uint32_t rate = knotd_conf_mod(mod, MOD_RATE_LIMIT).single.integer; + size_t size = knotd_conf_mod(mod, MOD_TBL_SIZE).single.integer; + ctx->rrl = rrl_create(size, rate); + if (ctx->rrl == NULL) { + ctx_free(ctx); + return KNOT_ENOMEM; + } + + // Get slip. + ctx->slip = knotd_conf_mod(mod, MOD_SLIP).single.integer; + + // Get whitelist. + ctx->whitelist = knotd_conf_mod(mod, MOD_WHITELIST); + + // Set up statistics counters. + int ret = knotd_mod_stats_add(mod, "slipped", 1, NULL); + if (ret != KNOT_EOK) { + ctx_free(ctx); + return ret; + } + + ret = knotd_mod_stats_add(mod, "dropped", 1, NULL); + if (ret != KNOT_EOK) { + ctx_free(ctx); + return ret; + } + + knotd_mod_ctx_set(mod, ctx); + + return knotd_mod_hook(mod, KNOTD_STAGE_END, ratelimit_apply); +} + +void rrl_unload(knotd_mod_t *mod) +{ + rrl_ctx_t *ctx = knotd_mod_ctx(mod); + + knotd_conf_free(&ctx->whitelist); + ctx_free(ctx); +} + +KNOTD_MOD_API(rrl, KNOTD_MOD_FLAG_SCOPE_ANY, + rrl_load, rrl_unload, rrl_conf, rrl_conf_check); diff --git a/src/knot/modules/rrl/rrl.rst b/src/knot/modules/rrl/rrl.rst new file mode 100644 index 0000000..3fc7892 --- /dev/null +++ b/src/knot/modules/rrl/rrl.rst @@ -0,0 +1,133 @@ +.. _mod-rrl: + +``rrl`` — Response rate limiting +================================ + +Response rate limiting (RRL) is a method to combat DNS reflection amplification +attacks. These attacks rely on the fact that source address of a UDP query +can be forged, and without a worldwide deployment of `BCP38 +<https://tools.ietf.org/html/bcp38>`_, such a forgery cannot be prevented. +An attacker can use a DNS server (or multiple servers) as an amplification +source and can flood a victim with a large number of unsolicited DNS responses. +The RRL lowers the amplification factor of these attacks by sending some of +the responses as truncated or by dropping them altogether. + +.. NOTE:: + The module introduces two statistics counters. The number of slipped and + dropped responses. + +.. NOTE:: + If the :ref:`Cookies<mod-cookies>` module is active, RRL is not applied + for responses with a valid DNS cookie. + +Example +------- + +You can enable RRL by setting the module globally or per zone. + +:: + + mod-rrl: + - id: default + rate-limit: 200 # Allow 200 resp/s for each flow + slip: 2 # Approximately every other response slips + + template: + - id: default + global-module: mod-rrl/default # Enable RRL globally + +Module reference +---------------- + +:: + + mod-rrl: + - id: STR + rate-limit: INT + slip: INT + table-size: INT + whitelist: ADDR[/INT] | ADDR-ADDR ... + +.. _mod-rrl_id: + +id +.. + +A module identifier. + +.. _mod-rrl_rate-limit: + +rate-limit +.......... + +Rate limiting is based on the token bucket scheme. A rate basically +represents a number of tokens available each second. Each response is +processed and classified (based on several discriminators, e.g. +source netblock, query type, zone name, rcode, etc.). Classified responses are +then hashed and assigned to a bucket containing number of available +tokens, timestamp and metadata. When available tokens are exhausted, +response is dropped or sent as truncated (see :ref:`mod-rrl_slip`). +Number of available tokens is recalculated each second. + +*Required* + +.. _mod-rrl_table-size: + +table-size +.......... + +Size of the hash table in a number of buckets. The larger the hash table, the lesser +the probability of a hash collision, but at the expense of additional memory costs. +Each bucket is estimated roughly to 32 bytes. The size should be selected as +a reasonably large prime due to better hash function distribution properties. +Hash table is internally chained and works well up to a fill rate of 90 %, general +rule of thumb is to select a prime near 1.2 * maximum_qps. + +*Default:* ``393241`` + +.. _mod-rrl_slip: + +slip +.... + +As attacks using DNS/UDP are usually based on a forged source address, +an attacker could deny services to the victim's netblock if all +responses would be completely blocked. The idea behind SLIP mechanism +is to send each N\ :sup:`th` response as truncated, thus allowing client to +reconnect via TCP for at least some degree of service. It is worth +noting, that some responses can't be truncated (e.g. SERVFAIL). + +- Setting the value to **0** will cause that all rate-limited responses will + be dropped. The outbound bandwidth and packet rate will be strictly capped + by the :ref:`mod-rrl_rate-limit` option. All legitimate requestors affected + by the limit will face denial of service and will observe excessive timeouts. + Therefore this setting is not recommended. + +- Setting the value to **1** will cause that all rate-limited responses will + be sent as truncated. The amplification factor of the attack will be reduced, + but the outbound data bandwidth won't be lower than the incoming bandwidth. + Also the outbound packet rate will be the same as without RRL. + +- Setting the value to **2** will cause that approximately half of the rate-limited responses + will be dropped, the other half will be sent as truncated. With this + configuration, both outbound bandwidth and packet rate will be lower than the + inbound. On the other hand, the dropped responses enlarge the time window + for possible cache poisoning attack on the resolver. + +- Setting the value to anything **larger than 2** will keep on decreasing + the outgoing rate-limited bandwidth, packet rate, and chances to notify + legitimate requestors to reconnect using TCP. These attributes are inversely + proportional to the configured value. Setting the value high is not advisable. + +*Default:* ``1`` + +.. _mod-rrl_whitelist: + +whitelist +......... + +A list of IP addresses, network subnets, or network ranges to exempt from +rate limiting. Empty list means that no incoming connection will be +white-listed. + +*Default:* not set |