summaryrefslogtreecommitdiffstats
path: root/src/source-netmap.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/source-netmap.c')
-rw-r--r--src/source-netmap.c994
1 files changed, 994 insertions, 0 deletions
diff --git a/src/source-netmap.c b/src/source-netmap.c
new file mode 100644
index 0000000..0b04b41
--- /dev/null
+++ b/src/source-netmap.c
@@ -0,0 +1,994 @@
+/* Copyright (C) 2011-2022 Open Information Security Foundation
+ *
+ * You can copy, redistribute or modify this Program under the terms of
+ * the GNU General Public License version 2 as published by the Free
+ * Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+/**
+* \defgroup netmap Netmap running mode
+*
+* @{
+*/
+
+/**
+ * \file
+ *
+ * \author Aleksey Katargin <gureedo@gmail.com>
+ * \author Victor Julien <victor@inliniac.net>
+ * \author Bill Meeks <billmeeks8@gmail.com>
+ *
+ * Netmap socket acquisition support
+ *
+ * Many thanks to Luigi Rizzo for guidance and support.
+ *
+ */
+
+#include "suricata.h"
+#include "suricata-common.h"
+#include "tm-threads.h"
+#include "packet.h"
+#include "util-bpf.h"
+#include "util-privs.h"
+#include "util-validate.h"
+#include "util-datalink.h"
+
+#include "source-netmap.h"
+
+#ifdef HAVE_NETMAP
+
+#define NETMAP_WITH_LIBS
+#ifdef DEBUG
+#define DEBUG_NETMAP_USER
+#endif
+
+#include <net/netmap_user.h>
+#include <libnetmap.h>
+
+#endif /* HAVE_NETMAP */
+
+#include "util-ioctl.h"
+
+#ifndef HAVE_NETMAP
+
+/**
+* \brief this function prints an error message and exits.
+*/
+static TmEcode NoNetmapSupportExit(ThreadVars *tv, const void *initdata, void **data)
+{
+ FatalError("Error creating thread %s: Netmap is not enabled. "
+ "Make sure to pass --enable-netmap to configure when building.",
+ tv->name);
+}
+
+void TmModuleReceiveNetmapRegister (void)
+{
+ tmm_modules[TMM_RECEIVENETMAP].name = "ReceiveNetmap";
+ tmm_modules[TMM_RECEIVENETMAP].ThreadInit = NoNetmapSupportExit;
+ tmm_modules[TMM_RECEIVENETMAP].flags = TM_FLAG_RECEIVE_TM;
+}
+
+/**
+* \brief Registration Function for DecodeNetmap.
+*/
+void TmModuleDecodeNetmapRegister (void)
+{
+ tmm_modules[TMM_DECODENETMAP].name = "DecodeNetmap";
+ tmm_modules[TMM_DECODENETMAP].ThreadInit = NoNetmapSupportExit;
+ tmm_modules[TMM_DECODENETMAP].flags = TM_FLAG_DECODE_TM;
+}
+
+#else /* We have NETMAP support */
+
+#include "action-globals.h"
+
+#define POLL_TIMEOUT 100
+
+#if defined(__linux__)
+#define POLL_EVENTS (POLLHUP|POLLRDHUP|POLLERR|POLLNVAL)
+
+#ifndef IFF_PPROMISC
+#define IFF_PPROMISC IFF_PROMISC
+#endif
+
+#else
+#define POLL_EVENTS (POLLHUP|POLLERR|POLLNVAL)
+#endif
+
+enum { NETMAP_FLAG_ZERO_COPY = 1, NETMAP_FLAG_EXCL_RING_ACCESS = 2 };
+
+/**
+ * \brief Netmap device instance. Each ring for each device gets its own
+ * device.
+ */
+typedef struct NetmapDevice_
+{
+ struct nmport_d *nmd;
+ unsigned int ref;
+ SC_ATOMIC_DECLARE(unsigned int, threads_run);
+ TAILQ_ENTRY(NetmapDevice_) next;
+ // actual ifname can only be 16, but we store a bit more,
+ // like the options string and a 'netmap:' prefix.
+ char ifname[32];
+ int ring;
+ int direction; // 0 rx, 1 tx
+
+ // autofp: Used to lock a destination ring while we are sending data.
+ SCMutex netmap_dev_lock;
+} NetmapDevice;
+
+/**
+ * \brief Module thread local variables.
+ */
+typedef struct NetmapThreadVars_
+{
+ /* receive interface */
+ NetmapDevice *ifsrc;
+ /* dst interface for IPS mode */
+ NetmapDevice *ifdst;
+
+ int flags;
+ struct bpf_program bpf_prog;
+
+ /* suricata internals */
+ TmSlot *slot;
+ ThreadVars *tv;
+ LiveDevice *livedev;
+
+ /* copy from config */
+ int copy_mode;
+ ChecksumValidationMode checksum_mode;
+
+ /* counters */
+ uint64_t pkts;
+ uint64_t bytes;
+ uint64_t drops;
+ uint16_t capture_kernel_packets;
+ uint16_t capture_kernel_drops;
+} NetmapThreadVars;
+
+typedef TAILQ_HEAD(NetmapDeviceList_, NetmapDevice_) NetmapDeviceList;
+
+static NetmapDeviceList netmap_devlist = TAILQ_HEAD_INITIALIZER(netmap_devlist);
+static SCMutex netmap_devlist_lock = SCMUTEX_INITIALIZER;
+
+/** \brief get RSS RX-queue count
+ * \retval rx_rings RSS RX queue count or 0 on error
+ */
+int NetmapGetRSSCount(const char *ifname)
+{
+ struct nmreq_port_info_get req;
+ struct nmreq_header hdr;
+ int rx_rings = 0;
+
+ /* we need the base interface name to query queues */
+ char base_name[IFNAMSIZ];
+ strlcpy(base_name, ifname, sizeof(base_name));
+ if (strlen(base_name) > 0 &&
+ (base_name[strlen(base_name) - 1] == '^' || base_name[strlen(base_name) - 1] == '*')) {
+ base_name[strlen(base_name) - 1] = '\0';
+ }
+
+ SCMutexLock(&netmap_devlist_lock);
+
+ /* open netmap device */
+ int fd = open("/dev/netmap", O_RDWR);
+ if (fd == -1) {
+ SCLogError("%s: open netmap device failed: %s", ifname, strerror(errno));
+ goto error_open;
+ }
+
+ /* query netmap interface info for ring count */
+ memset(&req, 0, sizeof(req));
+ memset(&hdr, 0, sizeof(hdr));
+ hdr.nr_version = NETMAP_API;
+ hdr.nr_reqtype = NETMAP_REQ_PORT_INFO_GET;
+ hdr.nr_body = (uintptr_t)&req;
+ strlcpy(hdr.nr_name, base_name, sizeof(hdr.nr_name));
+
+ if (ioctl(fd, NIOCCTRL, &hdr) != 0) {
+ SCLogError(
+ "Query of netmap HW rings count on %s failed; error: %s", ifname, strerror(errno));
+ goto error_fd;
+ };
+
+ /* return RX rings count if it equals TX rings count */
+ if (req.nr_rx_rings == req.nr_tx_rings) {
+ rx_rings = req.nr_rx_rings;
+ }
+
+error_fd:
+ close(fd);
+error_open:
+ SCMutexUnlock(&netmap_devlist_lock);
+ return rx_rings;
+}
+
+static void NetmapDestroyDevice(NetmapDevice *pdev)
+{
+ nmport_close(pdev->nmd);
+ SCMutexDestroy(&pdev->netmap_dev_lock);
+ SCFree(pdev);
+}
+
+/**
+ * \brief Close or dereference netmap device instance.
+ * \param dev Netmap device instance.
+ * \return Zero on success.
+ */
+static int NetmapClose(NetmapDevice *dev)
+{
+ NetmapDevice *pdev, *tmp;
+
+ SCMutexLock(&netmap_devlist_lock);
+
+ TAILQ_FOREACH_SAFE (pdev, &netmap_devlist, next, tmp) {
+ if (pdev == dev) {
+ pdev->ref--;
+ if (!pdev->ref) {
+ NetmapDestroyDevice(pdev);
+ }
+ SCMutexUnlock(&netmap_devlist_lock);
+ return 0;
+ }
+ }
+
+ SCMutexUnlock(&netmap_devlist_lock);
+ return -1;
+}
+
+/**
+ * \brief Close all open netmap device instances.
+ */
+static void NetmapCloseAll(void)
+{
+ NetmapDevice *pdev, *tmp;
+
+ SCMutexLock(&netmap_devlist_lock);
+
+ TAILQ_FOREACH_SAFE (pdev, &netmap_devlist, next, tmp) {
+ NetmapDestroyDevice(pdev);
+ }
+
+ SCMutexUnlock(&netmap_devlist_lock);
+}
+
+/**
+ * \brief Open interface in netmap mode.
+ * \param ifname Interface name.
+ * \param promisc Enable promiscuous mode.
+ * \param dev Pointer to requested netmap device instance.
+ * \param verbose Verbose error logging.
+ * \param read Indicates direction: RX or TX
+ * \param zerocopy 1 if zerocopy access requested
+ * \param soft Use Host stack (software) interface
+ * \return Zero on success.
+ */
+static int NetmapOpen(NetmapIfaceSettings *ns, NetmapDevice **pdevice, int verbose, int read,
+ bool zerocopy, bool soft)
+{
+ SCEnter();
+ SCLogDebug("ifname %s", ns->iface);
+
+ char base_name[IFNAMSIZ];
+ strlcpy(base_name, ns->iface, sizeof(base_name));
+ if (strlen(base_name) > 0 &&
+ (base_name[strlen(base_name)-1] == '^' ||
+ base_name[strlen(base_name)-1] == '*'))
+ {
+ base_name[strlen(base_name)-1] = '\0';
+ }
+
+ if (ns->real) {
+ /* check interface is up */
+ int if_flags = GetIfaceFlags(base_name);
+ if (if_flags == -1) {
+ if (verbose) {
+ SCLogError("%s: cannot access network interface: %s", base_name, ns->iface);
+ }
+ goto error;
+ }
+
+ /* bring iface up if it is down */
+ if ((if_flags & IFF_UP) == 0) {
+ SCLogError("%s: interface is down", base_name);
+ goto error;
+ }
+ /* if needed, try to set iface in promisc mode */
+ if (ns->promisc && (if_flags & (IFF_PROMISC|IFF_PPROMISC)) == 0) {
+ if_flags |= IFF_PPROMISC;
+ SetIfaceFlags(base_name, if_flags); // TODO reset at exit
+ // TODO move to parse config?
+ }
+ }
+ NetmapDevice *pdev = NULL, *spdev = NULL;
+ pdev = SCCalloc(1, sizeof(*pdev));
+ if (unlikely(pdev == NULL)) {
+ SCLogError("%s: memory allocation failed", base_name);
+ goto error;
+ }
+ SC_ATOMIC_INIT(pdev->threads_run);
+
+ SCMutexLock(&netmap_devlist_lock);
+
+ const int direction = (read != 1);
+ int ring = 0;
+ /* Search for interface in our already opened list. */
+ /* We will find it when opening multiple rings on */
+ /* the device when it exposes multiple RSS queues. */
+ TAILQ_FOREACH(spdev, &netmap_devlist, next) {
+ SCLogDebug("spdev %s", spdev->ifname);
+ if (direction == spdev->direction && strcmp(ns->iface, spdev->ifname) == 0) {
+ ring = spdev->ring + 1;
+ }
+ }
+ SCLogDebug("netmap/%s: using ring %d", ns->iface, ring);
+
+ const char *opt_R = "R";
+ const char *opt_T = "T";
+ const char *opt_x = "x"; // not for IPS
+ const char *opt_z = "z"; // zero copy, not for IPS
+
+ /* assemble options string */
+ char optstr[16];
+ if (ns->ips)
+ opt_x = "";
+ // z seems to not play well with multiple opens of a real dev on linux
+ opt_z = "";
+
+ /*
+ * How netmap endpoint names are selected:
+ *
+ * The following logic within the "retry" loop builds endpoint names.
+ *
+ * IPS Mode:
+ * There are two endpoints: one hardware NIC and either a hardware NIC or host stack "NIC".
+ *
+ * IDS Mode:
+ * One endpoint -- usually a hardware NIC.
+ *
+ * IPS mode -- with one endpoint a host stack "NIC":
+ * When using multiple rings/threads, then the open of the initial Ring 0 MUST
+ * instruct netmap to open multiple Host Stack rings (as the default is to open only a single
+ * pair). This is also critical for the HW NIC endpoint. This is done by adding
+ * “@conf:host-rings=x” suffix option (where “x” is the number of host rings desired)
+ * to BOTH endpoint nmport_open_desc() calls for ring 0 (hardware and host stack).
+ * For subsequent additional ring open calls, omit the suffix option specifying host ring count.
+ *
+ * IPS mode -- both endpoints are hardware NICs:
+ * Do NOT pass any suffix option (even for Ring 0). You do not need to tell netmap how many
+ * rings, because it already knows the correct value from the NIC driver itself. Specifying a
+ * desired ring count when both ends are Hardware NICs confuses netmap, and it seems to default
+ * to using only a single hardware ring. In this scenario, specify only the specific ring number
+ * being opened.
+ */
+
+ // loop to retry opening if unsupported options are used
+retry:
+ snprintf(optstr, sizeof(optstr), "%s%s%s", opt_z, opt_x, direction == 0 ? opt_R : opt_T);
+
+ char devname[128];
+ if (strncmp(ns->iface, "netmap:", 7) == 0) {
+ snprintf(devname, sizeof(devname), "%s}%d%s%s",
+ ns->iface, ring, strlen(optstr) ? "/" : "", optstr);
+ } else if (strlen(ns->iface) > 5 && strncmp(ns->iface, "vale", 4) == 0 && isdigit(ns->iface[4])) {
+ snprintf(devname, sizeof(devname), "%s", ns->iface);
+ } else if (ring == 0 && ns->threads == 1) {
+ /* just a single thread and ring, so don't use ring param */
+ snprintf(devname, sizeof(devname), "netmap:%s%s%s",
+ ns->iface, strlen(optstr) ? "/" : "", optstr);
+ SCLogDebug("device with %s-ring enabled (devname): %s", soft ? "SW" : "HW", devname);
+ } else {
+ /* Going to be using multiple threads and rings */
+ if (ns->sw_ring) {
+ /* Opening a host stack interface */
+ if (ring == 0) {
+ /* Ring 0, so tell netmap how many host rings we want created */
+ snprintf(devname, sizeof(devname), "netmap:%s%d%s%s@conf:host-rings=%d", ns->iface,
+ ring, strlen(optstr) ? "/" : "", optstr, ns->threads);
+ } else {
+ /* Software (host) ring, but not initial open of ring 0 */
+ snprintf(devname, sizeof(devname), "netmap:%s%d%s%s", ns->iface, ring,
+ strlen(optstr) ? "/" : "", optstr);
+ }
+ SCLogDebug("device with SW-ring enabled (devname): %s", devname);
+ } else if (ring == 0 && soft) {
+ /* Ring 0 of HW endpoint, and other endpoint is SW stack,
+ * so request SW host stack rings to match HW rings count.
+ */
+ snprintf(devname, sizeof(devname), "netmap:%s-%d%s%s@conf:host-rings=%d", ns->iface,
+ ring, strlen(optstr) ? "/" : "", optstr, ns->threads);
+ SCLogDebug("device with HW-ring enabled (devname): %s", devname);
+ } else {
+ /* Hardware ring other than ring 0, or both endpoints are HW
+ * and there is no host stack (SW) endpoint */
+ snprintf(devname, sizeof(devname), "netmap:%s-%d%s%s", ns->iface, ring,
+ strlen(optstr) ? "/" : "", optstr);
+ SCLogDebug("device with HW-ring enabled (devname): %s", devname);
+ }
+ }
+
+ strlcpy(pdev->ifname, ns->iface, sizeof(pdev->ifname));
+
+ /* have the netmap API parse device name and prepare the port descriptor for us */
+ pdev->nmd = nmport_prepare(devname);
+
+ if (pdev->nmd != NULL) {
+ /* For RX devices, set the nr_mode flag we need on the netmap port TX rings prior to opening
+ */
+ if (read) {
+ pdev->nmd->reg.nr_flags |= NR_NO_TX_POLL;
+ }
+
+ /* Now attempt to actually open the netmap port descriptor */
+ if (nmport_open_desc(pdev->nmd) < 0) {
+ /* the open failed, so clean-up the descriptor and fall through to error handler */
+ nmport_close(pdev->nmd);
+ pdev->nmd = NULL;
+ }
+ }
+
+ if (pdev->nmd == NULL) {
+ if (errno == EINVAL) {
+ if (opt_z[0] == 'z') {
+ SCLogNotice(
+ "%s: dev '%s' got EINVAL: going to retry without 'z'", base_name, devname);
+ opt_z = "";
+ goto retry;
+ } else if (opt_x[0] == 'x') {
+ SCLogNotice(
+ "%s: dev '%s' got EINVAL: going to retry without 'x'", base_name, devname);
+ opt_x = "";
+ goto retry;
+ }
+ }
+
+ NetmapCloseAll();
+ FatalError("opening devname %s failed: %s", devname, strerror(errno));
+ }
+
+ /* Work around bug in libnetmap library where "cur_{r,t}x_ring" values not initialized */
+ SCLogDebug("%s -- cur rings: [%d, %d] first rings: [%d, %d]", devname, pdev->nmd->cur_rx_ring,
+ pdev->nmd->cur_tx_ring, pdev->nmd->first_rx_ring, pdev->nmd->first_tx_ring);
+ pdev->nmd->cur_rx_ring = pdev->nmd->first_rx_ring;
+ pdev->nmd->cur_tx_ring = pdev->nmd->first_tx_ring;
+
+ SCLogInfo("%s: %s opened [fd: %d]", devname, ns->iface, pdev->nmd->fd);
+
+ pdev->direction = direction;
+ pdev->ring = ring;
+ SCMutexInit(&pdev->netmap_dev_lock, NULL);
+ TAILQ_INSERT_TAIL(&netmap_devlist, pdev, next);
+
+ SCMutexUnlock(&netmap_devlist_lock);
+ *pdevice = pdev;
+
+ return 0;
+error:
+ return -1;
+}
+
+/**
+ * \brief PcapDumpCounters
+ * \param ntv
+ */
+static inline void NetmapDumpCounters(NetmapThreadVars *ntv)
+{
+ StatsAddUI64(ntv->tv, ntv->capture_kernel_packets, ntv->pkts);
+ StatsAddUI64(ntv->tv, ntv->capture_kernel_drops, ntv->drops);
+ (void) SC_ATOMIC_ADD(ntv->livedev->drop, ntv->drops);
+ (void) SC_ATOMIC_ADD(ntv->livedev->pkts, ntv->pkts);
+ ntv->drops = 0;
+ ntv->pkts = 0;
+}
+
+/**
+ * \brief Init function for ReceiveNetmap.
+ * \param tv pointer to ThreadVars
+ * \param initdata pointer to the interface passed from the user
+ * \param data pointer gets populated with NetmapThreadVars
+ */
+static TmEcode ReceiveNetmapThreadInit(ThreadVars *tv, const void *initdata, void **data)
+{
+ SCEnter();
+
+ NetmapIfaceConfig *aconf = (NetmapIfaceConfig *)initdata;
+ if (initdata == NULL) {
+ SCLogError("initdata == NULL");
+ SCReturnInt(TM_ECODE_FAILED);
+ }
+
+ NetmapThreadVars *ntv = SCCalloc(1, sizeof(*ntv));
+ if (unlikely(ntv == NULL)) {
+ SCLogError("Memory allocation failed");
+ goto error;
+ }
+
+ ntv->livedev = LiveGetDevice(aconf->iface_name);
+ if (ntv->livedev == NULL) {
+ SCLogError("Unable to find Live device");
+ goto error_ntv;
+ }
+
+ ntv->tv = tv;
+ ntv->checksum_mode = aconf->in.checksum_mode;
+ ntv->copy_mode = aconf->in.copy_mode;
+
+ /* enable zero-copy mode for workers runmode */
+ char const *active_runmode = RunmodeGetActive();
+ if (strcmp("workers", active_runmode) == 0) {
+ ntv->flags |= NETMAP_FLAG_ZERO_COPY;
+ SCLogDebug("Enabling zero copy mode for %s", aconf->in.iface);
+ } else if (strcmp("autofp", active_runmode) == 0) {
+ ntv->flags |= NETMAP_FLAG_EXCL_RING_ACCESS;
+ }
+
+ /* Need to insure open of ring 0 conveys requested ring count for open */
+ bool soft = aconf->in.sw_ring || aconf->out.sw_ring;
+ if (NetmapOpen(&aconf->in, &ntv->ifsrc, 1, 1, (ntv->flags & NETMAP_FLAG_ZERO_COPY) != 0,
+ soft) != 0) {
+ goto error_ntv;
+ }
+
+ if (aconf->in.copy_mode != NETMAP_COPY_MODE_NONE) {
+ if (NetmapOpen(&aconf->out, &ntv->ifdst, 1, 0, (ntv->flags & NETMAP_FLAG_ZERO_COPY) != 0,
+ soft) != 0) {
+ goto error_src;
+ }
+ }
+
+ /* basic counters */
+ ntv->capture_kernel_packets = StatsRegisterCounter("capture.kernel_packets",
+ ntv->tv);
+ ntv->capture_kernel_drops = StatsRegisterCounter("capture.kernel_drops",
+ ntv->tv);
+
+ if (aconf->in.bpf_filter) {
+ SCLogConfig("%s: using BPF '%s'", ntv->ifsrc->ifname, aconf->in.bpf_filter);
+ char errbuf[PCAP_ERRBUF_SIZE];
+ if (SCBPFCompile(default_packet_size, /* snaplen_arg */
+ LINKTYPE_ETHERNET, /* linktype_arg */
+ &ntv->bpf_prog, /* program */
+ aconf->in.bpf_filter, /* const char *buf */
+ 1, /* optimize */
+ PCAP_NETMASK_UNKNOWN, /* mask */
+ errbuf,
+ sizeof(errbuf)) == -1)
+ {
+ SCLogError("%s: failed to compile BPF \"%s\": %s", ntv->ifsrc->ifname,
+ aconf->in.bpf_filter, errbuf);
+ goto error_dst;
+ }
+ }
+
+ SCLogDebug("thread: %s polling on fd: %d", tv->name, ntv->ifsrc->nmd->fd);
+
+ DatalinkSetGlobalType(LINKTYPE_ETHERNET);
+
+ *data = (void *)ntv;
+ aconf->DerefFunc(aconf);
+ SCReturnInt(TM_ECODE_OK);
+
+error_dst:
+ if (aconf->in.copy_mode != NETMAP_COPY_MODE_NONE) {
+ NetmapClose(ntv->ifdst);
+ }
+
+error_src:
+ NetmapClose(ntv->ifsrc);
+
+error_ntv:
+ SCFree(ntv);
+
+error:
+ aconf->DerefFunc(aconf);
+ SCReturnInt(TM_ECODE_FAILED);
+}
+
+/**
+ * \brief Output packet to destination interface or drop.
+ * \param ntv Thread local variables.
+ * \param p Source packet.
+ */
+static TmEcode NetmapWritePacket(NetmapThreadVars *ntv, Packet *p)
+{
+ if (ntv->copy_mode == NETMAP_COPY_MODE_IPS) {
+ if (PacketCheckAction(p, ACTION_DROP)) {
+ return TM_ECODE_OK;
+ }
+ }
+ DEBUG_VALIDATE_BUG_ON(ntv->ifdst == NULL);
+
+ /* Lock the destination netmap ring while writing to it */
+ if (ntv->flags & NETMAP_FLAG_EXCL_RING_ACCESS) {
+ SCMutexLock(&ntv->ifdst->netmap_dev_lock);
+ }
+
+ int write_tries = 0;
+try_write:
+ /* attempt to write the packet into the netmap ring buffer(s) */
+ if (nmport_inject(ntv->ifdst->nmd, GET_PKT_DATA(p), GET_PKT_LEN(p)) == 0) {
+
+ /* writing the packet failed, but ask kernel to sync TX rings
+ * for us as the ring buffers may simply be full */
+ (void)ioctl(ntv->ifdst->nmd->fd, NIOCTXSYNC, 0);
+
+ /* Try write up to 2 more times before giving up */
+ if (write_tries < 3) {
+ write_tries++;
+ goto try_write;
+ }
+
+ if (ntv->flags & NETMAP_FLAG_EXCL_RING_ACCESS) {
+ SCMutexUnlock(&ntv->ifdst->netmap_dev_lock);
+ }
+ SCLogDebug("failed to send %s -> %s", ntv->ifsrc->ifname, ntv->ifdst->ifname);
+ ntv->drops++;
+ return TM_ECODE_FAILED;
+ }
+
+ SCLogDebug("sent successfully: %s(%d)->%s(%d) (%u)", ntv->ifsrc->ifname, ntv->ifsrc->ring,
+ ntv->ifdst->ifname, ntv->ifdst->ring, GET_PKT_LEN(p));
+
+ /* Instruct netmap to push the data on the TX ring on the destination port */
+ (void)ioctl(ntv->ifdst->nmd->fd, NIOCTXSYNC, 0);
+ if (ntv->flags & NETMAP_FLAG_EXCL_RING_ACCESS) {
+ SCMutexUnlock(&ntv->ifdst->netmap_dev_lock);
+ }
+ return TM_ECODE_OK;
+}
+
+/**
+ * \brief Packet release routine.
+ * \param p Packet.
+ */
+static void NetmapReleasePacket(Packet *p)
+{
+ NetmapThreadVars *ntv = (NetmapThreadVars *)p->netmap_v.ntv;
+
+ if ((ntv->copy_mode != NETMAP_COPY_MODE_NONE) && !PKT_IS_PSEUDOPKT(p)) {
+ NetmapWritePacket(ntv, p);
+ }
+
+ PacketFreeOrRelease(p);
+}
+
+static void NetmapProcessPacket(NetmapThreadVars *ntv, const struct nm_pkthdr *ph)
+{
+ if (ntv->bpf_prog.bf_len) {
+ struct pcap_pkthdr pkthdr = { {0, 0}, ph->len, ph->len };
+ if (pcap_offline_filter(&ntv->bpf_prog, &pkthdr, ph->buf) == 0) {
+ return;
+ }
+ }
+
+ Packet *p = PacketPoolGetPacket();
+ if (unlikely(p == NULL)) {
+ return;
+ }
+
+ PKT_SET_SRC(p, PKT_SRC_WIRE);
+ p->livedev = ntv->livedev;
+ p->datalink = LINKTYPE_ETHERNET;
+ p->ts = SCTIME_FROM_TIMEVAL(&ph->ts);
+ ntv->pkts++;
+ ntv->bytes += ph->len;
+
+ if (ntv->flags & NETMAP_FLAG_ZERO_COPY) {
+ if (PacketSetData(p, (uint8_t *)ph->buf, ph->len) == -1) {
+ TmqhOutputPacketpool(ntv->tv, p);
+ return;
+ }
+ } else {
+ if (PacketCopyData(p, (uint8_t *)ph->buf, ph->len) == -1) {
+ TmqhOutputPacketpool(ntv->tv, p);
+ return;
+ }
+ }
+
+ p->ReleasePacket = NetmapReleasePacket;
+ p->netmap_v.ntv = ntv;
+
+ SCLogDebug("pktlen: %" PRIu32 " (pkt %p, pkt data %p)",
+ GET_PKT_LEN(p), p, GET_PKT_DATA(p));
+
+ (void)TmThreadsSlotProcessPkt(ntv->tv, ntv->slot, p);
+}
+
+/**
+ * \brief Copy netmap rings data into Packet structures.
+ * \param *d nmport_d (or nm_desc) netmap if structure.
+ * \param cnt int count of packets to read (-1 = all).
+ * \param *ntv NetmapThreadVars.
+ */
+static TmEcode NetmapReadPackets(struct nmport_d *d, int cnt, NetmapThreadVars *ntv)
+{
+ struct nm_pkthdr hdr;
+ int last_ring = d->last_rx_ring - d->first_rx_ring + 1;
+ int cur_ring, got = 0, cur_rx_ring = d->cur_rx_ring;
+
+ memset(&hdr, 0, sizeof(hdr));
+ hdr.flags = NM_MORE_PKTS;
+
+ if (cnt == 0)
+ cnt = -1;
+
+ for (cur_ring = 0; cur_ring < last_ring && cnt != got; cur_ring++, cur_rx_ring++) {
+ struct netmap_ring *ring;
+
+ if (cur_rx_ring > d->last_rx_ring)
+ cur_rx_ring = d->first_rx_ring;
+
+ ring = NETMAP_RXRING(d->nifp, cur_rx_ring);
+
+ /* cycle through the non-empty ring slots to fetch their data */
+ for (; !nm_ring_empty(ring) && cnt != got; got++) {
+ u_int idx, i;
+ u_char *oldbuf;
+ struct netmap_slot *slot;
+
+ if (hdr.buf) { /* from previous round */
+ NetmapProcessPacket(ntv, &hdr);
+ }
+
+ i = ring->cur;
+ slot = &ring->slot[i];
+ idx = slot->buf_idx;
+ d->cur_rx_ring = cur_rx_ring;
+ hdr.slot = slot;
+ oldbuf = hdr.buf = (u_char *)NETMAP_BUF(ring, idx);
+ hdr.len = hdr.caplen = slot->len;
+
+ /* loop through the ring slots to get packet data */
+ while (slot->flags & NS_MOREFRAG) {
+ /* packet can be fragmented across multiple slots, */
+ /* so loop until we find the slot with the flag */
+ /* cleared, signalling the end of the packet data. */
+ u_char *nbuf;
+ u_int oldlen = slot->len;
+ i = nm_ring_next(ring, i);
+ slot = &ring->slot[i];
+ hdr.len += slot->len;
+ nbuf = (u_char *)NETMAP_BUF(ring, slot->buf_idx);
+
+ if (oldbuf != NULL && nbuf - oldbuf == ring->nr_buf_size &&
+ oldlen == ring->nr_buf_size) {
+ hdr.caplen += slot->len;
+ oldbuf = nbuf;
+ } else {
+ oldbuf = NULL;
+ }
+ }
+
+ hdr.ts = ring->ts;
+ ring->head = ring->cur = nm_ring_next(ring, i);
+ }
+ }
+
+ if (hdr.buf) { /* from previous round */
+ hdr.flags = 0;
+ NetmapProcessPacket(ntv, &hdr);
+ }
+ return got;
+}
+
+/**
+ * \brief Main netmap reading loop function
+ */
+static TmEcode ReceiveNetmapLoop(ThreadVars *tv, void *data, void *slot)
+{
+ SCEnter();
+
+ TmSlot *s = (TmSlot *)slot;
+ NetmapThreadVars *ntv = (NetmapThreadVars *)data;
+ struct pollfd fds;
+
+ ntv->slot = s->slot_next;
+ fds.fd = ntv->ifsrc->nmd->fd;
+ fds.events = POLLIN;
+
+ SCLogDebug("thread %s polling on %d", tv->name, fds.fd);
+
+ // Indicate that the thread is actually running its application level code (i.e., it can poll
+ // packets)
+ TmThreadsSetFlag(tv, THV_RUNNING);
+
+ for(;;) {
+ if (unlikely(suricata_ctl_flags != 0)) {
+ break;
+ }
+
+ /* make sure we have at least one packet in the packet pool,
+ * to prevent us from alloc'ing packets at line rate */
+ PacketPoolWait();
+
+ int r = poll(&fds, 1, POLL_TIMEOUT);
+ if (r < 0) {
+ /* error */
+ if (errno != EINTR)
+ SCLogError("%s: error polling netmap: %s", ntv->ifsrc->ifname, strerror(errno));
+ continue;
+
+ } else if (r == 0) {
+ /* no events, timeout */
+ /* sync counters */
+ NetmapDumpCounters(ntv);
+ StatsSyncCountersIfSignalled(tv);
+
+ /* poll timed out, lets handle the timeout */
+ TmThreadsCaptureHandleTimeout(tv, NULL);
+ continue;
+ }
+
+ if (unlikely(fds.revents & POLL_EVENTS)) {
+ if (fds.revents & POLLERR) {
+ SCLogError("%s: error reading netmap data via polling: %s", ntv->ifsrc->ifname,
+ strerror(errno));
+ } else if (fds.revents & POLLNVAL) {
+ SCLogError("%s: invalid polling request", ntv->ifsrc->ifname);
+ }
+ continue;
+ }
+
+ if (likely(fds.revents & POLLIN)) {
+ /* have data on RX ring, so copy to Packet for processing */
+ NetmapReadPackets(ntv->ifsrc->nmd, -1, ntv);
+ }
+
+ NetmapDumpCounters(ntv);
+ StatsSyncCountersIfSignalled(tv);
+ }
+
+ NetmapDumpCounters(ntv);
+ StatsSyncCountersIfSignalled(tv);
+ SCReturnInt(TM_ECODE_OK);
+}
+
+/**
+ * \brief This function prints stats to the screen at exit.
+ * \param tv pointer to ThreadVars
+ * \param data pointer that gets cast into NetmapThreadVars for ntv
+ */
+static void ReceiveNetmapThreadExitStats(ThreadVars *tv, void *data)
+{
+ SCEnter();
+ NetmapThreadVars *ntv = (NetmapThreadVars *)data;
+
+ NetmapDumpCounters(ntv);
+ SCLogPerf("%s: (%s) packets %" PRIu64 ", dropped %" PRIu64 ", bytes %" PRIu64 "",
+ ntv->ifsrc->ifname, tv->name,
+ StatsGetLocalCounterValue(tv, ntv->capture_kernel_packets),
+ StatsGetLocalCounterValue(tv, ntv->capture_kernel_drops), ntv->bytes);
+}
+
+/**
+ * \brief
+ * \param tv
+ * \param data Pointer to NetmapThreadVars.
+ */
+static TmEcode ReceiveNetmapThreadDeinit(ThreadVars *tv, void *data)
+{
+ SCEnter();
+
+ NetmapThreadVars *ntv = (NetmapThreadVars *)data;
+
+ if (ntv->ifsrc) {
+ NetmapClose(ntv->ifsrc);
+ ntv->ifsrc = NULL;
+ }
+ if (ntv->ifdst) {
+ NetmapClose(ntv->ifdst);
+ ntv->ifdst = NULL;
+ }
+ if (ntv->bpf_prog.bf_insns) {
+ SCBPFFree(&ntv->bpf_prog);
+ }
+
+ SCFree(ntv);
+
+ SCReturnInt(TM_ECODE_OK);
+}
+
+/**
+ * \brief Prepare netmap decode thread.
+ * \param tv Thread local variables.
+ * \param initdata Thread config.
+ * \param data Pointer to DecodeThreadVars placed here.
+ */
+static TmEcode DecodeNetmapThreadInit(ThreadVars *tv, const void *initdata, void **data)
+{
+ SCEnter();
+
+ DecodeThreadVars *dtv = DecodeThreadVarsAlloc(tv);
+ if (dtv == NULL)
+ SCReturnInt(TM_ECODE_FAILED);
+
+ DecodeRegisterPerfCounters(dtv, tv);
+
+ *data = (void *)dtv;
+
+ SCReturnInt(TM_ECODE_OK);
+}
+
+/**
+ * \brief This function passes off to link type decoders.
+ *
+ * \param t pointer to ThreadVars
+ * \param p pointer to the current packet
+ * \param data pointer that gets cast into NetmapThreadVars for ntv
+ */
+static TmEcode DecodeNetmap(ThreadVars *tv, Packet *p, void *data)
+{
+ SCEnter();
+
+ DecodeThreadVars *dtv = (DecodeThreadVars *)data;
+
+ BUG_ON(PKT_IS_PSEUDOPKT(p));
+
+ /* update counters */
+ DecodeUpdatePacketCounters(tv, dtv, p);
+
+ DecodeEthernet(tv, dtv, p, GET_PKT_DATA(p), GET_PKT_LEN(p));
+
+ PacketDecodeFinalize(tv, dtv, p);
+
+ SCReturnInt(TM_ECODE_OK);
+}
+
+/**
+ * \brief
+ * \param tv
+ * \param data Pointer to DecodeThreadVars.
+ */
+static TmEcode DecodeNetmapThreadDeinit(ThreadVars *tv, void *data)
+{
+ SCEnter();
+
+ if (data != NULL)
+ DecodeThreadVarsFree(tv, data);
+
+ SCReturnInt(TM_ECODE_OK);
+}
+
+/**
+ * \brief Registration Function for ReceiveNetmap.
+ */
+void TmModuleReceiveNetmapRegister(void)
+{
+ tmm_modules[TMM_RECEIVENETMAP].name = "ReceiveNetmap";
+ tmm_modules[TMM_RECEIVENETMAP].ThreadInit = ReceiveNetmapThreadInit;
+ tmm_modules[TMM_RECEIVENETMAP].PktAcqLoop = ReceiveNetmapLoop;
+ tmm_modules[TMM_RECEIVENETMAP].ThreadExitPrintStats = ReceiveNetmapThreadExitStats;
+ tmm_modules[TMM_RECEIVENETMAP].ThreadDeinit = ReceiveNetmapThreadDeinit;
+ tmm_modules[TMM_RECEIVENETMAP].cap_flags = SC_CAP_NET_RAW;
+ tmm_modules[TMM_RECEIVENETMAP].flags = TM_FLAG_RECEIVE_TM;
+}
+
+/**
+ * \brief Registration Function for DecodeNetmap.
+ */
+void TmModuleDecodeNetmapRegister(void)
+{
+ tmm_modules[TMM_DECODENETMAP].name = "DecodeNetmap";
+ tmm_modules[TMM_DECODENETMAP].ThreadInit = DecodeNetmapThreadInit;
+ tmm_modules[TMM_DECODENETMAP].Func = DecodeNetmap;
+ tmm_modules[TMM_DECODENETMAP].ThreadDeinit = DecodeNetmapThreadDeinit;
+ tmm_modules[TMM_DECODENETMAP].cap_flags = 0;
+ tmm_modules[TMM_DECODENETMAP].flags = TM_FLAG_DECODE_TM;
+}
+
+#endif /* HAVE_NETMAP */
+
+/**
+* @}
+*/