/* Copyright (C) 2011-2022 Open Information Security Foundation * * You can copy, redistribute or modify this Program under the terms of * the GNU General Public License version 2 as published by the Free * Software Foundation. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * version 2 along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA * 02110-1301, USA. */ /** * \defgroup netmap Netmap running mode * * @{ */ /** * \file * * \author Aleksey Katargin * \author Victor Julien * \author Bill Meeks * * Netmap socket acquisition support * * Many thanks to Luigi Rizzo for guidance and support. * */ #include "suricata.h" #include "suricata-common.h" #include "tm-threads.h" #include "packet.h" #include "util-bpf.h" #include "util-privs.h" #include "util-validate.h" #include "util-datalink.h" #include "source-netmap.h" #ifdef HAVE_NETMAP #define NETMAP_WITH_LIBS #ifdef DEBUG #define DEBUG_NETMAP_USER #endif #include #include #endif /* HAVE_NETMAP */ #include "util-ioctl.h" #ifndef HAVE_NETMAP /** * \brief this function prints an error message and exits. */ static TmEcode NoNetmapSupportExit(ThreadVars *tv, const void *initdata, void **data) { FatalError("Error creating thread %s: Netmap is not enabled. " "Make sure to pass --enable-netmap to configure when building.", tv->name); } void TmModuleReceiveNetmapRegister (void) { tmm_modules[TMM_RECEIVENETMAP].name = "ReceiveNetmap"; tmm_modules[TMM_RECEIVENETMAP].ThreadInit = NoNetmapSupportExit; tmm_modules[TMM_RECEIVENETMAP].flags = TM_FLAG_RECEIVE_TM; } /** * \brief Registration Function for DecodeNetmap. */ void TmModuleDecodeNetmapRegister (void) { tmm_modules[TMM_DECODENETMAP].name = "DecodeNetmap"; tmm_modules[TMM_DECODENETMAP].ThreadInit = NoNetmapSupportExit; tmm_modules[TMM_DECODENETMAP].flags = TM_FLAG_DECODE_TM; } #else /* We have NETMAP support */ #include "action-globals.h" #define POLL_TIMEOUT 100 #if defined(__linux__) #define POLL_EVENTS (POLLHUP|POLLRDHUP|POLLERR|POLLNVAL) #ifndef IFF_PPROMISC #define IFF_PPROMISC IFF_PROMISC #endif #else #define POLL_EVENTS (POLLHUP|POLLERR|POLLNVAL) #endif enum { NETMAP_FLAG_ZERO_COPY = 1, NETMAP_FLAG_EXCL_RING_ACCESS = 2 }; /** * \brief Netmap device instance. Each ring for each device gets its own * device. */ typedef struct NetmapDevice_ { struct nmport_d *nmd; unsigned int ref; SC_ATOMIC_DECLARE(unsigned int, threads_run); TAILQ_ENTRY(NetmapDevice_) next; // actual ifname can only be 16, but we store a bit more, // like the options string and a 'netmap:' prefix. char ifname[32]; int ring; int direction; // 0 rx, 1 tx // autofp: Used to lock a destination ring while we are sending data. SCMutex netmap_dev_lock; } NetmapDevice; /** * \brief Module thread local variables. */ typedef struct NetmapThreadVars_ { /* receive interface */ NetmapDevice *ifsrc; /* dst interface for IPS mode */ NetmapDevice *ifdst; int flags; struct bpf_program bpf_prog; /* suricata internals */ TmSlot *slot; ThreadVars *tv; LiveDevice *livedev; /* copy from config */ int copy_mode; ChecksumValidationMode checksum_mode; /* counters */ uint64_t pkts; uint64_t bytes; uint64_t drops; uint16_t capture_kernel_packets; uint16_t capture_kernel_drops; } NetmapThreadVars; typedef TAILQ_HEAD(NetmapDeviceList_, NetmapDevice_) NetmapDeviceList; static NetmapDeviceList netmap_devlist = TAILQ_HEAD_INITIALIZER(netmap_devlist); static SCMutex netmap_devlist_lock = SCMUTEX_INITIALIZER; /** \brief get RSS RX-queue count * \retval rx_rings RSS RX queue count or 0 on error */ int NetmapGetRSSCount(const char *ifname) { struct nmreq_port_info_get req; struct nmreq_header hdr; int rx_rings = 0; /* we need the base interface name to query queues */ char base_name[IFNAMSIZ]; strlcpy(base_name, ifname, sizeof(base_name)); if (strlen(base_name) > 0 && (base_name[strlen(base_name) - 1] == '^' || base_name[strlen(base_name) - 1] == '*')) { base_name[strlen(base_name) - 1] = '\0'; } SCMutexLock(&netmap_devlist_lock); /* open netmap device */ int fd = open("/dev/netmap", O_RDWR); if (fd == -1) { SCLogError("%s: open netmap device failed: %s", ifname, strerror(errno)); goto error_open; } /* query netmap interface info for ring count */ memset(&req, 0, sizeof(req)); memset(&hdr, 0, sizeof(hdr)); hdr.nr_version = NETMAP_API; hdr.nr_reqtype = NETMAP_REQ_PORT_INFO_GET; hdr.nr_body = (uintptr_t)&req; strlcpy(hdr.nr_name, base_name, sizeof(hdr.nr_name)); if (ioctl(fd, NIOCCTRL, &hdr) != 0) { SCLogError( "Query of netmap HW rings count on %s failed; error: %s", ifname, strerror(errno)); goto error_fd; }; /* return RX rings count if it equals TX rings count */ if (req.nr_rx_rings == req.nr_tx_rings) { rx_rings = req.nr_rx_rings; } error_fd: close(fd); error_open: SCMutexUnlock(&netmap_devlist_lock); return rx_rings; } static void NetmapDestroyDevice(NetmapDevice *pdev) { nmport_close(pdev->nmd); SCMutexDestroy(&pdev->netmap_dev_lock); SCFree(pdev); } /** * \brief Close or dereference netmap device instance. * \param dev Netmap device instance. * \return Zero on success. */ static int NetmapClose(NetmapDevice *dev) { NetmapDevice *pdev, *tmp; SCMutexLock(&netmap_devlist_lock); TAILQ_FOREACH_SAFE (pdev, &netmap_devlist, next, tmp) { if (pdev == dev) { pdev->ref--; if (!pdev->ref) { NetmapDestroyDevice(pdev); } SCMutexUnlock(&netmap_devlist_lock); return 0; } } SCMutexUnlock(&netmap_devlist_lock); return -1; } /** * \brief Close all open netmap device instances. */ static void NetmapCloseAll(void) { NetmapDevice *pdev, *tmp; SCMutexLock(&netmap_devlist_lock); TAILQ_FOREACH_SAFE (pdev, &netmap_devlist, next, tmp) { NetmapDestroyDevice(pdev); } SCMutexUnlock(&netmap_devlist_lock); } /** * \brief Open interface in netmap mode. * \param ifname Interface name. * \param promisc Enable promiscuous mode. * \param dev Pointer to requested netmap device instance. * \param verbose Verbose error logging. * \param read Indicates direction: RX or TX * \param zerocopy 1 if zerocopy access requested * \param soft Use Host stack (software) interface * \return Zero on success. */ static int NetmapOpen(NetmapIfaceSettings *ns, NetmapDevice **pdevice, int verbose, int read, bool zerocopy, bool soft) { SCEnter(); SCLogDebug("ifname %s", ns->iface); char base_name[IFNAMSIZ]; strlcpy(base_name, ns->iface, sizeof(base_name)); if (strlen(base_name) > 0 && (base_name[strlen(base_name)-1] == '^' || base_name[strlen(base_name)-1] == '*')) { base_name[strlen(base_name)-1] = '\0'; } if (ns->real) { /* check interface is up */ int if_flags = GetIfaceFlags(base_name); if (if_flags == -1) { if (verbose) { SCLogError("%s: cannot access network interface: %s", base_name, ns->iface); } goto error; } /* bring iface up if it is down */ if ((if_flags & IFF_UP) == 0) { SCLogError("%s: interface is down", base_name); goto error; } /* if needed, try to set iface in promisc mode */ if (ns->promisc && (if_flags & (IFF_PROMISC|IFF_PPROMISC)) == 0) { if_flags |= IFF_PPROMISC; SetIfaceFlags(base_name, if_flags); // TODO reset at exit // TODO move to parse config? } } NetmapDevice *pdev = NULL, *spdev = NULL; pdev = SCCalloc(1, sizeof(*pdev)); if (unlikely(pdev == NULL)) { SCLogError("%s: memory allocation failed", base_name); goto error; } SC_ATOMIC_INIT(pdev->threads_run); SCMutexLock(&netmap_devlist_lock); const int direction = (read != 1); int ring = 0; /* Search for interface in our already opened list. */ /* We will find it when opening multiple rings on */ /* the device when it exposes multiple RSS queues. */ TAILQ_FOREACH(spdev, &netmap_devlist, next) { SCLogDebug("spdev %s", spdev->ifname); if (direction == spdev->direction && strcmp(ns->iface, spdev->ifname) == 0) { ring = spdev->ring + 1; } } SCLogDebug("netmap/%s: using ring %d", ns->iface, ring); const char *opt_R = "R"; const char *opt_T = "T"; const char *opt_x = "x"; // not for IPS const char *opt_z = "z"; // zero copy, not for IPS /* assemble options string */ char optstr[16]; if (ns->ips) opt_x = ""; // z seems to not play well with multiple opens of a real dev on linux opt_z = ""; /* * How netmap endpoint names are selected: * * The following logic within the "retry" loop builds endpoint names. * * IPS Mode: * There are two endpoints: one hardware NIC and either a hardware NIC or host stack "NIC". * * IDS Mode: * One endpoint -- usually a hardware NIC. * * IPS mode -- with one endpoint a host stack "NIC": * When using multiple rings/threads, then the open of the initial Ring 0 MUST * instruct netmap to open multiple Host Stack rings (as the default is to open only a single * pair). This is also critical for the HW NIC endpoint. This is done by adding * “@conf:host-rings=x” suffix option (where “x” is the number of host rings desired) * to BOTH endpoint nmport_open_desc() calls for ring 0 (hardware and host stack). * For subsequent additional ring open calls, omit the suffix option specifying host ring count. * * IPS mode -- both endpoints are hardware NICs: * Do NOT pass any suffix option (even for Ring 0). You do not need to tell netmap how many * rings, because it already knows the correct value from the NIC driver itself. Specifying a * desired ring count when both ends are Hardware NICs confuses netmap, and it seems to default * to using only a single hardware ring. In this scenario, specify only the specific ring number * being opened. */ // loop to retry opening if unsupported options are used retry: snprintf(optstr, sizeof(optstr), "%s%s%s", opt_z, opt_x, direction == 0 ? opt_R : opt_T); char devname[128]; if (strncmp(ns->iface, "netmap:", 7) == 0) { snprintf(devname, sizeof(devname), "%s}%d%s%s", ns->iface, ring, strlen(optstr) ? "/" : "", optstr); } else if (strlen(ns->iface) > 5 && strncmp(ns->iface, "vale", 4) == 0 && isdigit(ns->iface[4])) { snprintf(devname, sizeof(devname), "%s", ns->iface); } else if (ring == 0 && ns->threads == 1) { /* just a single thread and ring, so don't use ring param */ snprintf(devname, sizeof(devname), "netmap:%s%s%s", ns->iface, strlen(optstr) ? "/" : "", optstr); SCLogDebug("device with %s-ring enabled (devname): %s", soft ? "SW" : "HW", devname); } else { /* Going to be using multiple threads and rings */ if (ns->sw_ring) { /* Opening a host stack interface */ if (ring == 0) { /* Ring 0, so tell netmap how many host rings we want created */ snprintf(devname, sizeof(devname), "netmap:%s%d%s%s@conf:host-rings=%d", ns->iface, ring, strlen(optstr) ? "/" : "", optstr, ns->threads); } else { /* Software (host) ring, but not initial open of ring 0 */ snprintf(devname, sizeof(devname), "netmap:%s%d%s%s", ns->iface, ring, strlen(optstr) ? "/" : "", optstr); } SCLogDebug("device with SW-ring enabled (devname): %s", devname); } else if (ring == 0 && soft) { /* Ring 0 of HW endpoint, and other endpoint is SW stack, * so request SW host stack rings to match HW rings count. */ snprintf(devname, sizeof(devname), "netmap:%s-%d%s%s@conf:host-rings=%d", ns->iface, ring, strlen(optstr) ? "/" : "", optstr, ns->threads); SCLogDebug("device with HW-ring enabled (devname): %s", devname); } else { /* Hardware ring other than ring 0, or both endpoints are HW * and there is no host stack (SW) endpoint */ snprintf(devname, sizeof(devname), "netmap:%s-%d%s%s", ns->iface, ring, strlen(optstr) ? "/" : "", optstr); SCLogDebug("device with HW-ring enabled (devname): %s", devname); } } strlcpy(pdev->ifname, ns->iface, sizeof(pdev->ifname)); /* have the netmap API parse device name and prepare the port descriptor for us */ pdev->nmd = nmport_prepare(devname); if (pdev->nmd != NULL) { /* For RX devices, set the nr_mode flag we need on the netmap port TX rings prior to opening */ if (read) { pdev->nmd->reg.nr_flags |= NR_NO_TX_POLL; } /* Now attempt to actually open the netmap port descriptor */ if (nmport_open_desc(pdev->nmd) < 0) { /* the open failed, so clean-up the descriptor and fall through to error handler */ nmport_close(pdev->nmd); pdev->nmd = NULL; } } if (pdev->nmd == NULL) { if (errno == EINVAL) { if (opt_z[0] == 'z') { SCLogNotice( "%s: dev '%s' got EINVAL: going to retry without 'z'", base_name, devname); opt_z = ""; goto retry; } else if (opt_x[0] == 'x') { SCLogNotice( "%s: dev '%s' got EINVAL: going to retry without 'x'", base_name, devname); opt_x = ""; goto retry; } } SCMutexUnlock(&netmap_devlist_lock); NetmapCloseAll(); FatalError("opening devname %s failed: %s", devname, strerror(errno)); } /* Work around bug in libnetmap library where "cur_{r,t}x_ring" values not initialized */ SCLogDebug("%s -- cur rings: [%d, %d] first rings: [%d, %d]", devname, pdev->nmd->cur_rx_ring, pdev->nmd->cur_tx_ring, pdev->nmd->first_rx_ring, pdev->nmd->first_tx_ring); pdev->nmd->cur_rx_ring = pdev->nmd->first_rx_ring; pdev->nmd->cur_tx_ring = pdev->nmd->first_tx_ring; SCLogInfo("%s: %s opened [fd: %d]", devname, ns->iface, pdev->nmd->fd); pdev->direction = direction; pdev->ring = ring; SCMutexInit(&pdev->netmap_dev_lock, NULL); TAILQ_INSERT_TAIL(&netmap_devlist, pdev, next); SCMutexUnlock(&netmap_devlist_lock); *pdevice = pdev; return 0; error: return -1; } /** * \brief PcapDumpCounters * \param ntv */ static inline void NetmapDumpCounters(NetmapThreadVars *ntv) { StatsAddUI64(ntv->tv, ntv->capture_kernel_packets, ntv->pkts); StatsAddUI64(ntv->tv, ntv->capture_kernel_drops, ntv->drops); (void) SC_ATOMIC_ADD(ntv->livedev->drop, ntv->drops); (void) SC_ATOMIC_ADD(ntv->livedev->pkts, ntv->pkts); ntv->drops = 0; ntv->pkts = 0; } /** * \brief Init function for ReceiveNetmap. * \param tv pointer to ThreadVars * \param initdata pointer to the interface passed from the user * \param data pointer gets populated with NetmapThreadVars */ static TmEcode ReceiveNetmapThreadInit(ThreadVars *tv, const void *initdata, void **data) { SCEnter(); NetmapIfaceConfig *aconf = (NetmapIfaceConfig *)initdata; if (initdata == NULL) { SCLogError("initdata == NULL"); SCReturnInt(TM_ECODE_FAILED); } NetmapThreadVars *ntv = SCCalloc(1, sizeof(*ntv)); if (unlikely(ntv == NULL)) { SCLogError("Memory allocation failed"); goto error; } ntv->livedev = LiveGetDevice(aconf->iface_name); if (ntv->livedev == NULL) { SCLogError("Unable to find Live device"); goto error_ntv; } ntv->tv = tv; ntv->checksum_mode = aconf->in.checksum_mode; ntv->copy_mode = aconf->in.copy_mode; /* enable zero-copy mode for workers runmode */ char const *active_runmode = RunmodeGetActive(); if (strcmp("workers", active_runmode) == 0) { ntv->flags |= NETMAP_FLAG_ZERO_COPY; SCLogDebug("Enabling zero copy mode for %s", aconf->in.iface); } else if (strcmp("autofp", active_runmode) == 0) { ntv->flags |= NETMAP_FLAG_EXCL_RING_ACCESS; } /* Need to insure open of ring 0 conveys requested ring count for open */ bool soft = aconf->in.sw_ring || aconf->out.sw_ring; if (NetmapOpen(&aconf->in, &ntv->ifsrc, 1, 1, (ntv->flags & NETMAP_FLAG_ZERO_COPY) != 0, soft) != 0) { goto error_ntv; } if (aconf->in.copy_mode != NETMAP_COPY_MODE_NONE) { if (NetmapOpen(&aconf->out, &ntv->ifdst, 1, 0, (ntv->flags & NETMAP_FLAG_ZERO_COPY) != 0, soft) != 0) { goto error_src; } } /* basic counters */ ntv->capture_kernel_packets = StatsRegisterCounter("capture.kernel_packets", ntv->tv); ntv->capture_kernel_drops = StatsRegisterCounter("capture.kernel_drops", ntv->tv); if (aconf->in.bpf_filter) { SCLogConfig("%s: using BPF '%s'", ntv->ifsrc->ifname, aconf->in.bpf_filter); char errbuf[PCAP_ERRBUF_SIZE]; if (SCBPFCompile(default_packet_size, /* snaplen_arg */ LINKTYPE_ETHERNET, /* linktype_arg */ &ntv->bpf_prog, /* program */ aconf->in.bpf_filter, /* const char *buf */ 1, /* optimize */ PCAP_NETMASK_UNKNOWN, /* mask */ errbuf, sizeof(errbuf)) == -1) { SCLogError("%s: failed to compile BPF \"%s\": %s", ntv->ifsrc->ifname, aconf->in.bpf_filter, errbuf); goto error_dst; } } SCLogDebug("thread: %s polling on fd: %d", tv->name, ntv->ifsrc->nmd->fd); DatalinkSetGlobalType(LINKTYPE_ETHERNET); *data = (void *)ntv; aconf->DerefFunc(aconf); SCReturnInt(TM_ECODE_OK); error_dst: if (aconf->in.copy_mode != NETMAP_COPY_MODE_NONE) { NetmapClose(ntv->ifdst); } error_src: NetmapClose(ntv->ifsrc); error_ntv: SCFree(ntv); error: aconf->DerefFunc(aconf); SCReturnInt(TM_ECODE_FAILED); } /** * \brief Output packet to destination interface or drop. * \param ntv Thread local variables. * \param p Source packet. */ static TmEcode NetmapWritePacket(NetmapThreadVars *ntv, Packet *p) { if (ntv->copy_mode == NETMAP_COPY_MODE_IPS) { if (PacketCheckAction(p, ACTION_DROP)) { return TM_ECODE_OK; } } DEBUG_VALIDATE_BUG_ON(ntv->ifdst == NULL); /* Lock the destination netmap ring while writing to it */ if (ntv->flags & NETMAP_FLAG_EXCL_RING_ACCESS) { SCMutexLock(&ntv->ifdst->netmap_dev_lock); } int write_tries = 0; try_write: /* attempt to write the packet into the netmap ring buffer(s) */ if (nmport_inject(ntv->ifdst->nmd, GET_PKT_DATA(p), GET_PKT_LEN(p)) == 0) { /* writing the packet failed, but ask kernel to sync TX rings * for us as the ring buffers may simply be full */ (void)ioctl(ntv->ifdst->nmd->fd, NIOCTXSYNC, 0); /* Try write up to 2 more times before giving up */ if (write_tries < 3) { write_tries++; goto try_write; } if (ntv->flags & NETMAP_FLAG_EXCL_RING_ACCESS) { SCMutexUnlock(&ntv->ifdst->netmap_dev_lock); } SCLogDebug("failed to send %s -> %s", ntv->ifsrc->ifname, ntv->ifdst->ifname); ntv->drops++; return TM_ECODE_FAILED; } SCLogDebug("sent successfully: %s(%d)->%s(%d) (%u)", ntv->ifsrc->ifname, ntv->ifsrc->ring, ntv->ifdst->ifname, ntv->ifdst->ring, GET_PKT_LEN(p)); /* Instruct netmap to push the data on the TX ring on the destination port */ (void)ioctl(ntv->ifdst->nmd->fd, NIOCTXSYNC, 0); if (ntv->flags & NETMAP_FLAG_EXCL_RING_ACCESS) { SCMutexUnlock(&ntv->ifdst->netmap_dev_lock); } return TM_ECODE_OK; } /** * \brief Packet release routine. * \param p Packet. */ static void NetmapReleasePacket(Packet *p) { NetmapThreadVars *ntv = (NetmapThreadVars *)p->netmap_v.ntv; if ((ntv->copy_mode != NETMAP_COPY_MODE_NONE) && !PKT_IS_PSEUDOPKT(p)) { NetmapWritePacket(ntv, p); } PacketFreeOrRelease(p); } static void NetmapProcessPacket(NetmapThreadVars *ntv, const struct nm_pkthdr *ph) { if (ntv->bpf_prog.bf_len) { struct pcap_pkthdr pkthdr = { {0, 0}, ph->len, ph->len }; if (pcap_offline_filter(&ntv->bpf_prog, &pkthdr, ph->buf) == 0) { return; } } Packet *p = PacketPoolGetPacket(); if (unlikely(p == NULL)) { return; } PKT_SET_SRC(p, PKT_SRC_WIRE); p->livedev = ntv->livedev; p->datalink = LINKTYPE_ETHERNET; p->ts = SCTIME_FROM_TIMEVAL(&ph->ts); ntv->pkts++; ntv->bytes += ph->len; if (ntv->flags & NETMAP_FLAG_ZERO_COPY) { if (PacketSetData(p, (uint8_t *)ph->buf, ph->len) == -1) { TmqhOutputPacketpool(ntv->tv, p); return; } } else { if (PacketCopyData(p, (uint8_t *)ph->buf, ph->len) == -1) { TmqhOutputPacketpool(ntv->tv, p); return; } } p->ReleasePacket = NetmapReleasePacket; p->netmap_v.ntv = ntv; SCLogDebug("pktlen: %" PRIu32 " (pkt %p, pkt data %p)", GET_PKT_LEN(p), p, GET_PKT_DATA(p)); (void)TmThreadsSlotProcessPkt(ntv->tv, ntv->slot, p); } /** * \brief Copy netmap rings data into Packet structures. * \param *d nmport_d (or nm_desc) netmap if structure. * \param cnt int count of packets to read (-1 = all). * \param *ntv NetmapThreadVars. */ static TmEcode NetmapReadPackets(struct nmport_d *d, int cnt, NetmapThreadVars *ntv) { struct nm_pkthdr hdr; int last_ring = d->last_rx_ring - d->first_rx_ring + 1; int cur_ring, got = 0, cur_rx_ring = d->cur_rx_ring; memset(&hdr, 0, sizeof(hdr)); hdr.flags = NM_MORE_PKTS; if (cnt == 0) cnt = -1; for (cur_ring = 0; cur_ring < last_ring && cnt != got; cur_ring++, cur_rx_ring++) { struct netmap_ring *ring; if (cur_rx_ring > d->last_rx_ring) cur_rx_ring = d->first_rx_ring; ring = NETMAP_RXRING(d->nifp, cur_rx_ring); /* cycle through the non-empty ring slots to fetch their data */ for (; !nm_ring_empty(ring) && cnt != got; got++) { u_int idx, i; u_char *oldbuf; struct netmap_slot *slot; if (hdr.buf) { /* from previous round */ NetmapProcessPacket(ntv, &hdr); } i = ring->cur; slot = &ring->slot[i]; idx = slot->buf_idx; d->cur_rx_ring = cur_rx_ring; hdr.slot = slot; oldbuf = hdr.buf = (u_char *)NETMAP_BUF(ring, idx); hdr.len = hdr.caplen = slot->len; /* loop through the ring slots to get packet data */ while (slot->flags & NS_MOREFRAG) { /* packet can be fragmented across multiple slots, */ /* so loop until we find the slot with the flag */ /* cleared, signalling the end of the packet data. */ u_char *nbuf; u_int oldlen = slot->len; i = nm_ring_next(ring, i); slot = &ring->slot[i]; hdr.len += slot->len; nbuf = (u_char *)NETMAP_BUF(ring, slot->buf_idx); if (oldbuf != NULL && nbuf - oldbuf == ring->nr_buf_size && oldlen == ring->nr_buf_size) { hdr.caplen += slot->len; oldbuf = nbuf; } else { oldbuf = NULL; } } hdr.ts = ring->ts; ring->head = ring->cur = nm_ring_next(ring, i); } } if (hdr.buf) { /* from previous round */ hdr.flags = 0; NetmapProcessPacket(ntv, &hdr); } return got; } /** * \brief Main netmap reading loop function */ static TmEcode ReceiveNetmapLoop(ThreadVars *tv, void *data, void *slot) { SCEnter(); TmSlot *s = (TmSlot *)slot; NetmapThreadVars *ntv = (NetmapThreadVars *)data; struct pollfd fds; ntv->slot = s->slot_next; fds.fd = ntv->ifsrc->nmd->fd; fds.events = POLLIN; SCLogDebug("thread %s polling on %d", tv->name, fds.fd); // Indicate that the thread is actually running its application level code (i.e., it can poll // packets) TmThreadsSetFlag(tv, THV_RUNNING); for(;;) { if (unlikely(suricata_ctl_flags != 0)) { break; } /* make sure we have at least one packet in the packet pool, * to prevent us from alloc'ing packets at line rate */ PacketPoolWait(); int r = poll(&fds, 1, POLL_TIMEOUT); if (r < 0) { /* error */ if (errno != EINTR) SCLogError("%s: error polling netmap: %s", ntv->ifsrc->ifname, strerror(errno)); continue; } else if (r == 0) { /* no events, timeout */ /* sync counters */ NetmapDumpCounters(ntv); StatsSyncCountersIfSignalled(tv); /* poll timed out, lets handle the timeout */ TmThreadsCaptureHandleTimeout(tv, NULL); continue; } if (unlikely(fds.revents & POLL_EVENTS)) { if (fds.revents & POLLERR) { SCLogError("%s: error reading netmap data via polling: %s", ntv->ifsrc->ifname, strerror(errno)); } else if (fds.revents & POLLNVAL) { SCLogError("%s: invalid polling request", ntv->ifsrc->ifname); } continue; } if (likely(fds.revents & POLLIN)) { /* have data on RX ring, so copy to Packet for processing */ NetmapReadPackets(ntv->ifsrc->nmd, -1, ntv); } NetmapDumpCounters(ntv); StatsSyncCountersIfSignalled(tv); } NetmapDumpCounters(ntv); StatsSyncCountersIfSignalled(tv); SCReturnInt(TM_ECODE_OK); } /** * \brief This function prints stats to the screen at exit. * \param tv pointer to ThreadVars * \param data pointer that gets cast into NetmapThreadVars for ntv */ static void ReceiveNetmapThreadExitStats(ThreadVars *tv, void *data) { SCEnter(); NetmapThreadVars *ntv = (NetmapThreadVars *)data; NetmapDumpCounters(ntv); SCLogPerf("%s: (%s) packets %" PRIu64 ", dropped %" PRIu64 ", bytes %" PRIu64 "", ntv->ifsrc->ifname, tv->name, StatsGetLocalCounterValue(tv, ntv->capture_kernel_packets), StatsGetLocalCounterValue(tv, ntv->capture_kernel_drops), ntv->bytes); } /** * \brief * \param tv * \param data Pointer to NetmapThreadVars. */ static TmEcode ReceiveNetmapThreadDeinit(ThreadVars *tv, void *data) { SCEnter(); NetmapThreadVars *ntv = (NetmapThreadVars *)data; if (ntv->ifsrc) { NetmapClose(ntv->ifsrc); ntv->ifsrc = NULL; } if (ntv->ifdst) { NetmapClose(ntv->ifdst); ntv->ifdst = NULL; } if (ntv->bpf_prog.bf_insns) { SCBPFFree(&ntv->bpf_prog); } SCFree(ntv); SCReturnInt(TM_ECODE_OK); } /** * \brief Prepare netmap decode thread. * \param tv Thread local variables. * \param initdata Thread config. * \param data Pointer to DecodeThreadVars placed here. */ static TmEcode DecodeNetmapThreadInit(ThreadVars *tv, const void *initdata, void **data) { SCEnter(); DecodeThreadVars *dtv = DecodeThreadVarsAlloc(tv); if (dtv == NULL) SCReturnInt(TM_ECODE_FAILED); DecodeRegisterPerfCounters(dtv, tv); *data = (void *)dtv; SCReturnInt(TM_ECODE_OK); } /** * \brief This function passes off to link type decoders. * * \param t pointer to ThreadVars * \param p pointer to the current packet * \param data pointer that gets cast into NetmapThreadVars for ntv */ static TmEcode DecodeNetmap(ThreadVars *tv, Packet *p, void *data) { SCEnter(); DecodeThreadVars *dtv = (DecodeThreadVars *)data; BUG_ON(PKT_IS_PSEUDOPKT(p)); /* update counters */ DecodeUpdatePacketCounters(tv, dtv, p); DecodeEthernet(tv, dtv, p, GET_PKT_DATA(p), GET_PKT_LEN(p)); PacketDecodeFinalize(tv, dtv, p); SCReturnInt(TM_ECODE_OK); } /** * \brief * \param tv * \param data Pointer to DecodeThreadVars. */ static TmEcode DecodeNetmapThreadDeinit(ThreadVars *tv, void *data) { SCEnter(); if (data != NULL) DecodeThreadVarsFree(tv, data); SCReturnInt(TM_ECODE_OK); } /** * \brief Registration Function for ReceiveNetmap. */ void TmModuleReceiveNetmapRegister(void) { tmm_modules[TMM_RECEIVENETMAP].name = "ReceiveNetmap"; tmm_modules[TMM_RECEIVENETMAP].ThreadInit = ReceiveNetmapThreadInit; tmm_modules[TMM_RECEIVENETMAP].PktAcqLoop = ReceiveNetmapLoop; tmm_modules[TMM_RECEIVENETMAP].ThreadExitPrintStats = ReceiveNetmapThreadExitStats; tmm_modules[TMM_RECEIVENETMAP].ThreadDeinit = ReceiveNetmapThreadDeinit; tmm_modules[TMM_RECEIVENETMAP].cap_flags = SC_CAP_NET_RAW; tmm_modules[TMM_RECEIVENETMAP].flags = TM_FLAG_RECEIVE_TM; } /** * \brief Registration Function for DecodeNetmap. */ void TmModuleDecodeNetmapRegister(void) { tmm_modules[TMM_DECODENETMAP].name = "DecodeNetmap"; tmm_modules[TMM_DECODENETMAP].ThreadInit = DecodeNetmapThreadInit; tmm_modules[TMM_DECODENETMAP].Func = DecodeNetmap; tmm_modules[TMM_DECODENETMAP].ThreadDeinit = DecodeNetmapThreadDeinit; tmm_modules[TMM_DECODENETMAP].cap_flags = 0; tmm_modules[TMM_DECODENETMAP].flags = TM_FLAG_DECODE_TM; } #endif /* HAVE_NETMAP */ /** * @} */