diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-09 13:14:35 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-09 13:14:35 +0000 |
commit | 9b8a97db9ec4b795e29e72289005fbc58484ebeb (patch) | |
tree | e24ca2d68215e57b4759fe5c032629821eabb250 /misc | |
parent | Initial commit. (diff) | |
download | iproute2-9b8a97db9ec4b795e29e72289005fbc58484ebeb.tar.xz iproute2-9b8a97db9ec4b795e29e72289005fbc58484ebeb.zip |
Adding upstream version 6.8.0.upstream/6.8.0
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'misc')
-rw-r--r-- | misc/.gitignore | 7 | ||||
-rw-r--r-- | misc/Makefile | 42 | ||||
-rw-r--r-- | misc/arpd.c | 833 | ||||
-rw-r--r-- | misc/ifstat.c | 1040 | ||||
-rw-r--r-- | misc/lnstat.c | 379 | ||||
-rw-r--r-- | misc/lnstat.h | 43 | ||||
-rw-r--r-- | misc/lnstat_util.c | 325 | ||||
-rw-r--r-- | misc/nstat.c | 776 | ||||
-rw-r--r-- | misc/rtacct.c | 622 | ||||
-rw-r--r-- | misc/ss.c | 5887 | ||||
-rw-r--r-- | misc/ss_util.h | 23 | ||||
-rw-r--r-- | misc/ssfilter.h | 34 | ||||
-rw-r--r-- | misc/ssfilter.y | 369 | ||||
-rw-r--r-- | misc/ssfilter_check.c | 104 |
14 files changed, 10484 insertions, 0 deletions
diff --git a/misc/.gitignore b/misc/.gitignore new file mode 100644 index 0000000..d7df0b0 --- /dev/null +++ b/misc/.gitignore @@ -0,0 +1,7 @@ +arpd +ifstat +ss +ssfilter.tab.c +nstat +lnstat +rtacct diff --git a/misc/Makefile b/misc/Makefile new file mode 100644 index 0000000..50dae79 --- /dev/null +++ b/misc/Makefile @@ -0,0 +1,42 @@ +# SPDX-License-Identifier: GPL-2.0 +SSOBJ=ss.o ssfilter_check.o ssfilter.tab.o +LNSTATOBJ=lnstat.o lnstat_util.o + +TARGETS=ss nstat ifstat rtacct lnstat + +include ../config.mk + +ifeq ($(HAVE_BERKELEY_DB),y) + TARGETS += arpd +endif + +all: $(TARGETS) + +ss: $(SSOBJ) + $(QUIET_LINK)$(CC) $^ $(LDFLAGS) $(LDLIBS) -o $@ + +nstat: nstat.c + $(QUIET_CC)$(CC) $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) -o nstat nstat.c $(LDLIBS) -lm + +ifstat: ifstat.c + $(QUIET_CC)$(CC) $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) -o ifstat ifstat.c $(LDLIBS) -lm + +rtacct: rtacct.c + $(QUIET_CC)$(CC) $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) -o rtacct rtacct.c $(LDLIBS) -lm + +arpd: arpd.c + $(QUIET_CC)$(CC) $(CFLAGS) -I$(DBM_INCLUDE) $(CPPFLAGS) $(LDFLAGS) -o arpd arpd.c $(LDLIBS) -ldb + +ssfilter.tab.c: ssfilter.y + $(QUIET_YACC)$(YACC) -b ssfilter ssfilter.y + +lnstat: $(LNSTATOBJ) + $(QUIET_LINK)$(CC) $^ $(LDFLAGS) $(LDLIBS) -o $@ + +install: all + install -m 0755 $(TARGETS) $(DESTDIR)$(SBINDIR) + ln -sf lnstat $(DESTDIR)$(SBINDIR)/rtstat + ln -sf lnstat $(DESTDIR)$(SBINDIR)/ctstat + +clean: + rm -f *.o $(TARGETS) ssfilter.c diff --git a/misc/arpd.c b/misc/arpd.c new file mode 100644 index 0000000..1ef837c --- /dev/null +++ b/misc/arpd.c @@ -0,0 +1,833 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * arpd.c ARP helper daemon. + * + * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> + */ + +#include <stdio.h> +#include <syslog.h> +#include <malloc.h> +#include <string.h> +#include <unistd.h> +#include <stdlib.h> +#include <netdb.h> +#include <db_185.h> +#include <sys/ioctl.h> +#include <sys/poll.h> +#include <errno.h> +#include <fcntl.h> +#include <sys/uio.h> +#include <sys/socket.h> +#include <sys/time.h> +#include <time.h> +#include <signal.h> +#include <linux/if.h> +#include <linux/if_ether.h> +#include <linux/if_arp.h> +#include <netinet/in.h> +#include <arpa/inet.h> +#include <linux/if_packet.h> +#include <linux/filter.h> + +#include "libnetlink.h" +#include "utils.h" +#include "rt_names.h" + +DB *dbase; +char *dbname = "/var/lib/arpd/arpd.db"; + +int ifnum; +int *ifvec; +char **ifnames; + +struct dbkey { + __u32 iface; + __u32 addr; +}; + +#define IS_NEG(x) (((__u8 *)(x))[0] == 0xFF) +#define NEG_TIME(x) (((x)[2]<<24)|((x)[3]<<16)|((x)[4]<<8)|(x)[5]) +#define NEG_AGE(x) ((__u32)time(NULL) - NEG_TIME((__u8 *)x)) +#define NEG_VALID(x) (NEG_AGE(x) < negative_timeout) +#define NEG_CNT(x) (((__u8 *)(x))[1]) + +struct rtnl_handle rth; + +struct pollfd pset[2]; +int udp_sock = -1; + +volatile int do_exit; +volatile int do_sync; +volatile int do_stats; + +struct { + unsigned long arp_new; + unsigned long arp_change; + + unsigned long app_recv; + unsigned long app_success; + unsigned long app_bad; + unsigned long app_neg; + unsigned long app_suppressed; + + unsigned long kern_neg; + unsigned long kern_new; + unsigned long kern_change; + + unsigned long probes_sent; + unsigned long probes_suppressed; +} stats; + +int active_probing; +int negative_timeout = 60; +int no_kernel_broadcasts; +int broadcast_rate = 1000; +int broadcast_burst = 3000; +int poll_timeout = 30000; + +static void usage(void) +{ + fprintf(stderr, + "Usage: arpd [ -lkh? ] [ -a N ] [ -b dbase ] [ -B number ] [ -f file ] [ -n time ] [-p interval ] [ -R rate ] [ interfaces ]\n"); + exit(1); +} + +static int handle_if(int ifindex) +{ + int i; + + if (ifnum == 0) + return 1; + + for (i = 0; i < ifnum; i++) + if (ifvec[i] == ifindex) + return 1; + return 0; +} + +int sysctl_adjusted; + +static void do_sysctl_adjustments(void) +{ + int i; + + if (!ifnum) + return; + + for (i = 0; i < ifnum; i++) { + char buf[128]; + FILE *fp; + + if (active_probing) { + sprintf(buf, "/proc/sys/net/ipv4/neigh/%s/mcast_solicit", ifnames[i]); + if ((fp = fopen(buf, "w")) != NULL) { + if (no_kernel_broadcasts) + strcpy(buf, "0\n"); + else + sprintf(buf, "%d\n", active_probing >= 2 ? 1 : 3-active_probing); + fputs(buf, fp); + fclose(fp); + } + } + + sprintf(buf, "/proc/sys/net/ipv4/neigh/%s/app_solicit", ifnames[i]); + if ((fp = fopen(buf, "w")) != NULL) { + sprintf(buf, "%d\n", active_probing <= 1 ? 1 : active_probing); + fputs(buf, fp); + fclose(fp); + } + } + sysctl_adjusted = 1; +} + +static void undo_sysctl_adjustments(void) +{ + int i; + + if (!sysctl_adjusted) + return; + + for (i = 0; i < ifnum; i++) { + char buf[128]; + FILE *fp; + + if (active_probing) { + sprintf(buf, "/proc/sys/net/ipv4/neigh/%s/mcast_solicit", ifnames[i]); + if ((fp = fopen(buf, "w")) != NULL) { + strcpy(buf, "3\n"); + fputs(buf, fp); + fclose(fp); + } + } + sprintf(buf, "/proc/sys/net/ipv4/neigh/%s/app_solicit", ifnames[i]); + if ((fp = fopen(buf, "w")) != NULL) { + strcpy(buf, "0\n"); + fputs(buf, fp); + fclose(fp); + } + } + sysctl_adjusted = 0; +} + + +static int send_probe(int ifindex, __u32 addr) +{ + struct ifreq ifr = { .ifr_ifindex = ifindex }; + struct sockaddr_in dst = { + .sin_family = AF_INET, + .sin_port = htons(1025), + .sin_addr.s_addr = addr, + }; + socklen_t len; + unsigned char buf[256]; + struct arphdr *ah = (struct arphdr *)buf; + unsigned char *p = (unsigned char *)(ah+1); + struct sockaddr_ll sll = { + .sll_family = AF_PACKET, + .sll_ifindex = ifindex, + .sll_protocol = htons(ETH_P_ARP), + }; + + if (ioctl(udp_sock, SIOCGIFNAME, &ifr)) + return -1; + if (ioctl(udp_sock, SIOCGIFHWADDR, &ifr)) + return -1; + if (ifr.ifr_hwaddr.sa_family != ARPHRD_ETHER) + return -1; + if (setsockopt(udp_sock, SOL_SOCKET, SO_BINDTODEVICE, ifr.ifr_name, strlen(ifr.ifr_name)+1) < 0) + return -1; + + if (connect(udp_sock, (struct sockaddr *)&dst, sizeof(dst)) < 0) + return -1; + len = sizeof(dst); + if (getsockname(udp_sock, (struct sockaddr *)&dst, &len) < 0) + return -1; + + ah->ar_hrd = htons(ifr.ifr_hwaddr.sa_family); + ah->ar_pro = htons(ETH_P_IP); + ah->ar_hln = 6; + ah->ar_pln = 4; + ah->ar_op = htons(ARPOP_REQUEST); + + memcpy(p, ifr.ifr_hwaddr.sa_data, ah->ar_hln); + p += ah->ar_hln; + + memcpy(p, &dst.sin_addr, 4); + p += 4; + + memset(sll.sll_addr, 0xFF, sizeof(sll.sll_addr)); + memcpy(p, &sll.sll_addr, ah->ar_hln); + p += ah->ar_hln; + + memcpy(p, &addr, 4); + p += 4; + + if (sendto(pset[0].fd, buf, p-buf, 0, (struct sockaddr *)&sll, sizeof(sll)) < 0) + return -1; + stats.probes_sent++; + return 0; +} + +/* Be very tough on sending probes: 1 per second with burst of 3. */ + +static int queue_active_probe(int ifindex, __u32 addr) +{ + static struct timeval prev; + static int buckets; + struct timeval now; + + gettimeofday(&now, NULL); + if (prev.tv_sec) { + int diff = (now.tv_sec-prev.tv_sec)*1000+(now.tv_usec-prev.tv_usec)/1000; + + buckets += diff; + } else { + buckets = broadcast_burst; + } + if (buckets > broadcast_burst) + buckets = broadcast_burst; + if (buckets >= broadcast_rate && !send_probe(ifindex, addr)) { + buckets -= broadcast_rate; + prev = now; + return 0; + } + stats.probes_suppressed++; + return -1; +} + +static int respond_to_kernel(int ifindex, __u32 addr, char *lla, int llalen) +{ + struct { + struct nlmsghdr n; + struct ndmsg ndm; + char buf[256]; + } req = { + .n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg)), + .n.nlmsg_flags = NLM_F_REQUEST, + .n.nlmsg_type = RTM_NEWNEIGH, + .ndm.ndm_family = AF_INET, + .ndm.ndm_state = NUD_STALE, + .ndm.ndm_ifindex = ifindex, + .ndm.ndm_type = RTN_UNICAST, + }; + + addattr_l(&req.n, sizeof(req), NDA_DST, &addr, 4); + addattr_l(&req.n, sizeof(req), NDA_LLADDR, lla, llalen); + return rtnl_send(&rth, &req, req.n.nlmsg_len) <= 0; +} + +static void prepare_neg_entry(__u8 *ndata, __u32 stamp) +{ + ndata[0] = 0xFF; + ndata[1] = 0; + ndata[2] = stamp>>24; + ndata[3] = stamp>>16; + ndata[4] = stamp>>8; + ndata[5] = stamp; +} + + +static int do_one_request(struct nlmsghdr *n) +{ + struct ndmsg *ndm = NLMSG_DATA(n); + int len = n->nlmsg_len; + struct rtattr *tb[NDA_MAX+1]; + struct dbkey key; + DBT dbkey, dbdat; + int do_acct = 0; + + if (n->nlmsg_type == NLMSG_DONE) { + dbase->sync(dbase, 0); + + /* Now we have at least mirror of kernel db, so that + * may start real resolution. + */ + do_sysctl_adjustments(); + return 0; + } + + if (n->nlmsg_type != RTM_GETNEIGH && n->nlmsg_type != RTM_NEWNEIGH) + return 0; + + len -= NLMSG_LENGTH(sizeof(*ndm)); + if (len < 0) + return -1; + + if (ndm->ndm_family != AF_INET || + (ifnum && !handle_if(ndm->ndm_ifindex)) || + ndm->ndm_flags || + ndm->ndm_type != RTN_UNICAST || + !(ndm->ndm_state&~NUD_NOARP)) + return 0; + + parse_rtattr(tb, NDA_MAX, NDA_RTA(ndm), len); + + if (!tb[NDA_DST]) + return 0; + + key.iface = ndm->ndm_ifindex; + memcpy(&key.addr, RTA_DATA(tb[NDA_DST]), 4); + dbkey.data = &key; + dbkey.size = sizeof(key); + + if (dbase->get(dbase, &dbkey, &dbdat, 0) != 0) { + dbdat.data = 0; + dbdat.size = 0; + } + + if (n->nlmsg_type == RTM_GETNEIGH) { + if (!(n->nlmsg_flags&NLM_F_REQUEST)) + return 0; + + if (!(ndm->ndm_state&(NUD_PROBE|NUD_INCOMPLETE))) { + stats.app_bad++; + return 0; + } + + if (ndm->ndm_state&NUD_PROBE) { + /* If we get this, kernel still has some valid + * address, but unicast probing failed and host + * is either dead or changed its mac address. + * Kernel is going to initiate broadcast resolution. + * OK, we invalidate our information as well. + */ + if (dbdat.data && !IS_NEG(dbdat.data)) + stats.app_neg++; + + dbase->del(dbase, &dbkey, 0); + } else { + /* If we get this kernel does not have any information. + * If we have something tell this to kernel. */ + stats.app_recv++; + if (dbdat.data && !IS_NEG(dbdat.data)) { + stats.app_success++; + respond_to_kernel(key.iface, key.addr, dbdat.data, dbdat.size); + return 0; + } + + /* Sheeit! We have nothing to tell. */ + /* If we have recent negative entry, be silent. */ + if (dbdat.data && NEG_VALID(dbdat.data)) { + if (NEG_CNT(dbdat.data) >= active_probing) { + stats.app_suppressed++; + return 0; + } + do_acct = 1; + } + } + + if (active_probing && + queue_active_probe(ndm->ndm_ifindex, key.addr) == 0 && + do_acct) { + NEG_CNT(dbdat.data)++; + dbase->put(dbase, &dbkey, &dbdat, 0); + } + } else if (n->nlmsg_type == RTM_NEWNEIGH) { + if (n->nlmsg_flags&NLM_F_REQUEST) + return 0; + + if (ndm->ndm_state&NUD_FAILED) { + /* Kernel was not able to resolve. Host is dead. + * Create negative entry if it is not present + * or renew it if it is too old. */ + if (!dbdat.data || + !IS_NEG(dbdat.data) || + !NEG_VALID(dbdat.data)) { + __u8 ndata[6]; + + stats.kern_neg++; + prepare_neg_entry(ndata, time(NULL)); + dbdat.data = ndata; + dbdat.size = sizeof(ndata); + dbase->put(dbase, &dbkey, &dbdat, 0); + } + } else if (tb[NDA_LLADDR]) { + if (dbdat.data && !IS_NEG(dbdat.data)) { + if (memcmp(RTA_DATA(tb[NDA_LLADDR]), dbdat.data, dbdat.size) == 0) + return 0; + stats.kern_change++; + } else { + stats.kern_new++; + } + dbdat.data = RTA_DATA(tb[NDA_LLADDR]); + dbdat.size = RTA_PAYLOAD(tb[NDA_LLADDR]); + dbase->put(dbase, &dbkey, &dbdat, 0); + } + } + return 0; +} + +static void load_initial_table(void) +{ + if (rtnl_neighdump_req(&rth, AF_INET, NULL) < 0) { + perror("dump request failed"); + exit(1); + } + +} + +static void get_kern_msg(void) +{ + int status; + struct nlmsghdr *h; + struct sockaddr_nl nladdr = {}; + struct iovec iov; + char buf[8192]; + struct msghdr msg = { + (void *)&nladdr, sizeof(nladdr), + &iov, 1, + NULL, 0, + 0 + }; + + iov.iov_base = buf; + iov.iov_len = sizeof(buf); + + status = recvmsg(rth.fd, &msg, MSG_DONTWAIT); + + if (status <= 0) + return; + + if (msg.msg_namelen != sizeof(nladdr)) + return; + + if (nladdr.nl_pid) + return; + + for (h = (struct nlmsghdr *)buf; status >= sizeof(*h); ) { + int len = h->nlmsg_len; + int l = len - sizeof(*h); + + if (l < 0 || len > status) + return; + + if (do_one_request(h) < 0) + return; + + status -= NLMSG_ALIGN(len); + h = (struct nlmsghdr *)((char *)h + NLMSG_ALIGN(len)); + } +} + +/* Receive gratuitous ARP messages and store them, that's all. */ +static void get_arp_pkt(void) +{ + unsigned char buf[1024]; + struct sockaddr_ll sll; + socklen_t sll_len = sizeof(sll); + struct arphdr *a = (struct arphdr *)buf; + struct dbkey key; + DBT dbkey, dbdat; + int n; + + n = recvfrom(pset[0].fd, buf, sizeof(buf), MSG_DONTWAIT, + (struct sockaddr *)&sll, &sll_len); + if (n < 0) { + if (errno != EINTR && errno != EAGAIN) + syslog(LOG_ERR, "recvfrom: %m"); + return; + } + + if (ifnum && !handle_if(sll.sll_ifindex)) + return; + + /* Sanity checks */ + + if (n < sizeof(*a) || + (a->ar_op != htons(ARPOP_REQUEST) && + a->ar_op != htons(ARPOP_REPLY)) || + a->ar_pln != 4 || + a->ar_pro != htons(ETH_P_IP) || + a->ar_hln != sll.sll_halen || + sizeof(*a) + 2*4 + 2*a->ar_hln > n) + return; + + key.iface = sll.sll_ifindex; + memcpy(&key.addr, (char *)(a+1) + a->ar_hln, 4); + + /* DAD message, ignore. */ + if (key.addr == 0) + return; + + dbkey.data = &key; + dbkey.size = sizeof(key); + + if (dbase->get(dbase, &dbkey, &dbdat, 0) == 0 && !IS_NEG(dbdat.data)) { + if (memcmp(dbdat.data, a+1, dbdat.size) == 0) + return; + stats.arp_change++; + } else { + stats.arp_new++; + } + + dbdat.data = a+1; + dbdat.size = a->ar_hln; + dbase->put(dbase, &dbkey, &dbdat, 0); +} + +static void catch_signal(int sig, void (*handler)(int)) +{ + struct sigaction sa = { .sa_handler = handler }; + +#ifdef SA_INTERRUPT + sa.sa_flags = SA_INTERRUPT; +#endif + sigaction(sig, &sa, NULL); +} + +#include <setjmp.h> +sigjmp_buf env; +volatile int in_poll; + +static void sig_exit(int signo) +{ + do_exit = 1; + if (in_poll) + siglongjmp(env, 1); +} + +static void sig_sync(int signo) +{ + do_sync = 1; + if (in_poll) + siglongjmp(env, 1); +} + +static void sig_stats(int signo) +{ + do_sync = 1; + do_stats = 1; + if (in_poll) + siglongjmp(env, 1); +} + +static void send_stats(void) +{ + syslog(LOG_INFO, "arp_rcv: n%lu c%lu app_rcv: tot %lu hits %lu bad %lu neg %lu sup %lu", + stats.arp_new, stats.arp_change, + + stats.app_recv, stats.app_success, + stats.app_bad, stats.app_neg, stats.app_suppressed + ); + syslog(LOG_INFO, "kern: n%lu c%lu neg %lu arp_send: %lu rlim %lu", + stats.kern_new, stats.kern_change, stats.kern_neg, + + stats.probes_sent, stats.probes_suppressed + ); + do_stats = 0; +} + + +int main(int argc, char **argv) +{ + int opt; + int do_list = 0; + char *do_load = NULL; + + while ((opt = getopt(argc, argv, "h?b:lf:a:n:p:kR:B:")) != EOF) { + switch (opt) { + case 'b': + dbname = optarg; + break; + case 'f': + if (do_load) { + fprintf(stderr, "Duplicate option -f\n"); + usage(); + } + do_load = optarg; + break; + case 'l': + do_list = 1; + break; + case 'a': + active_probing = atoi(optarg); + break; + case 'n': + negative_timeout = atoi(optarg); + break; + case 'k': + no_kernel_broadcasts = 1; + break; + case 'p': + if ((poll_timeout = 1000 * strtod(optarg, NULL)) < 100) { + fprintf(stderr, "Invalid poll timeout\n"); + exit(-1); + } + break; + case 'R': + if ((broadcast_rate = atoi(optarg)) <= 0 || + (broadcast_rate = 1000/broadcast_rate) <= 0) { + fprintf(stderr, "Invalid ARP rate\n"); + exit(-1); + } + break; + case 'B': + if ((broadcast_burst = atoi(optarg)) <= 0 || + (broadcast_burst = 1000*broadcast_burst) <= 0) { + fprintf(stderr, "Invalid ARP burst\n"); + exit(-1); + } + break; + case 'h': + case '?': + default: + usage(); + } + } + argc -= optind; + argv += optind; + + if (argc > 0) { + ifnum = argc; + ifnames = argv; + ifvec = malloc(argc*sizeof(int)); + if (!ifvec) { + perror("malloc"); + exit(-1); + } + } + + if ((udp_sock = socket(AF_INET, SOCK_DGRAM, 0)) < 0) { + perror("socket"); + exit(-1); + } + + if (ifnum) { + int i; + struct ifreq ifr = {}; + + for (i = 0; i < ifnum; i++) { + if (get_ifname(ifr.ifr_name, ifnames[i])) + invarg("not a valid ifname", ifnames[i]); + if (ioctl(udp_sock, SIOCGIFINDEX, &ifr)) { + perror("ioctl(SIOCGIFINDEX)"); + exit(-1); + } + ifvec[i] = ifr.ifr_ifindex; + } + } + + dbase = dbopen(dbname, O_CREAT|O_RDWR, 0644, DB_HASH, NULL); + if (dbase == NULL) { + perror("db_open"); + exit(-1); + } + + if (do_load) { + char buf[128]; + FILE *fp; + struct dbkey k; + DBT dbkey, dbdat; + + dbkey.data = &k; + dbkey.size = sizeof(k); + + if (strcmp(do_load, "-") == 0 || strcmp(do_load, "--") == 0) { + fp = stdin; + } else if ((fp = fopen(do_load, "r")) == NULL) { + perror("fopen"); + goto do_abort; + } + + buf[sizeof(buf)-1] = 0; + while (fgets(buf, sizeof(buf), fp)) { + __u8 b1[6]; + char ipbuf[128]; + char macbuf[128]; + + if (buf[0] == '#') + continue; + + if (sscanf(buf, "%u%s%s", &k.iface, ipbuf, macbuf) != 3) { + fprintf(stderr, "Wrong format of input file \"%s\"\n", do_load); + goto do_abort; + } + if (strncmp(macbuf, "FAILED:", 7) == 0) + continue; + if (!inet_aton(ipbuf, (struct in_addr *)&k.addr)) { + fprintf(stderr, "Invalid IP address: \"%s\"\n", ipbuf); + goto do_abort; + } + + if (ll_addr_a2n((char *) b1, 6, macbuf) != 6) + goto do_abort; + dbdat.size = 6; + + if (dbase->put(dbase, &dbkey, &dbdat, 0)) { + perror("hash->put"); + goto do_abort; + } + } + dbase->sync(dbase, 0); + if (fp != stdin) + fclose(fp); + } + + if (do_list) { + DBT dbkey, dbdat; + + printf("%-8s %-15s %s\n", "#Ifindex", "IP", "MAC"); + while (dbase->seq(dbase, &dbkey, &dbdat, R_NEXT) == 0) { + struct dbkey *key = dbkey.data; + + if (handle_if(key->iface)) { + if (!IS_NEG(dbdat.data)) { + char b1[18]; + + printf("%-8d %-15s %s\n", + key->iface, + inet_ntoa(*(struct in_addr *)&key->addr), + ll_addr_n2a(dbdat.data, 6, ARPHRD_ETHER, b1, 18)); + } else { + printf("%-8d %-15s FAILED: %dsec ago\n", + key->iface, + inet_ntoa(*(struct in_addr *)&key->addr), + NEG_AGE(dbdat.data)); + } + } + } + } + + if (do_load || do_list) + goto out; + + pset[0].fd = socket(PF_PACKET, SOCK_DGRAM, 0); + if (pset[0].fd < 0) { + perror("socket"); + exit(-1); + } + + if (1) { + struct sockaddr_ll sll = { + .sll_family = AF_PACKET, + .sll_protocol = htons(ETH_P_ARP), + .sll_ifindex = (ifnum == 1 ? ifvec[0] : 0), + }; + + if (bind(pset[0].fd, (struct sockaddr *)&sll, sizeof(sll)) < 0) { + perror("bind"); + goto do_abort; + } + } + + if (rtnl_open(&rth, RTMGRP_NEIGH) < 0) { + perror("rtnl_open"); + goto do_abort; + } + pset[1].fd = rth.fd; + + load_initial_table(); + + if (daemon(0, 0)) { + perror("arpd: daemon"); + goto do_abort; + } + + openlog("arpd", LOG_PID | LOG_CONS, LOG_DAEMON); + catch_signal(SIGINT, sig_exit); + catch_signal(SIGTERM, sig_exit); + catch_signal(SIGHUP, sig_sync); + catch_signal(SIGUSR1, sig_stats); + +#define EVENTS (POLLIN|POLLPRI|POLLERR|POLLHUP) + pset[0].events = EVENTS; + pset[0].revents = 0; + pset[1].events = EVENTS; + pset[1].revents = 0; + + sigsetjmp(env, 1); + + for (;;) { + in_poll = 1; + + if (do_exit) + break; + if (do_sync) { + in_poll = 0; + dbase->sync(dbase, 0); + do_sync = 0; + in_poll = 1; + } + if (do_stats) + send_stats(); + if (poll(pset, 2, poll_timeout) > 0) { + in_poll = 0; + if (pset[0].revents&EVENTS) + get_arp_pkt(); + if (pset[1].revents&EVENTS) + get_kern_msg(); + } else { + do_sync = 1; + } + } + + undo_sysctl_adjustments(); +out: + dbase->close(dbase); + exit(0); + +do_abort: + dbase->close(dbase); + exit(-1); +} diff --git a/misc/ifstat.c b/misc/ifstat.c new file mode 100644 index 0000000..7290109 --- /dev/null +++ b/misc/ifstat.c @@ -0,0 +1,1040 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * ifstat.c handy utility to read net interface statistics + * + * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> + */ + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <fcntl.h> +#include <string.h> +#include <errno.h> +#include <time.h> +#include <sys/time.h> +#include <fnmatch.h> +#include <sys/file.h> +#include <sys/socket.h> +#include <sys/un.h> +#include <sys/poll.h> +#include <sys/wait.h> +#include <sys/stat.h> +#include <signal.h> +#include <math.h> +#include <getopt.h> + +#include <linux/if.h> +#include <linux/if_link.h> + +#include "libnetlink.h" +#include "json_writer.h" +#include "version.h" +#include "utils.h" + +int dump_zeros; +int reset_history; +int ignore_history; +int no_output; +int json_output; +int no_update; +int scan_interval; +int time_constant; +int show_errors; +double W; +char **patterns; +int npatterns; +bool is_extended; +int filter_type; +int sub_type; + +char info_source[128]; +int source_mismatch; + +#define MAXS (sizeof(struct rtnl_link_stats)/sizeof(__u32)) +#define NO_SUB_TYPE 0xffff + +struct ifstat_ent { + struct ifstat_ent *next; + char *name; + int ifindex; + unsigned long long val[MAXS]; + double rate[MAXS]; + __u32 ival[MAXS]; +}; + +static const char *stats[MAXS] = { + "rx_packets", + "tx_packets", + "rx_bytes", + "tx_bytes", + "rx_errors", + "tx_errors", + "rx_dropped", + "tx_dropped", + "multicast", + "collisions", + "rx_length_errors", + "rx_over_errors", + "rx_crc_errors", + "rx_frame_errors", + "rx_fifo_errors", + "rx_missed_errors", + "tx_aborted_errors", + "tx_carrier_errors", + "tx_fifo_errors", + "tx_heartbeat_errors", + "tx_window_errors", + "rx_compressed", + "tx_compressed" +}; + +struct ifstat_ent *kern_db; +struct ifstat_ent *hist_db; + +static int match(const char *id) +{ + int i; + + if (npatterns == 0) + return 1; + + for (i = 0; i < npatterns; i++) { + if (!fnmatch(patterns[i], id, FNM_CASEFOLD)) + return 1; + } + return 0; +} + +static int get_nlmsg_extended(struct nlmsghdr *m, void *arg) +{ + struct if_stats_msg *ifsm = NLMSG_DATA(m); + struct rtattr *tb[IFLA_STATS_MAX+1]; + int len = m->nlmsg_len; + struct ifstat_ent *n; + + if (m->nlmsg_type != RTM_NEWSTATS) + return 0; + + len -= NLMSG_LENGTH(sizeof(*ifsm)); + if (len < 0) { + errno = EINVAL; + return -1; + } + + parse_rtattr(tb, IFLA_STATS_MAX, IFLA_STATS_RTA(ifsm), len); + if (tb[filter_type] == NULL) + return 0; + + n = malloc(sizeof(*n)); + if (!n) { + errno = ENOMEM; + return -1; + } + + n->ifindex = ifsm->ifindex; + n->name = strdup(ll_index_to_name(ifsm->ifindex)); + + if (sub_type == NO_SUB_TYPE) { + memcpy(&n->val, RTA_DATA(tb[filter_type]), sizeof(n->val)); + } else { + struct rtattr *attr; + + attr = parse_rtattr_one_nested(sub_type, tb[filter_type]); + if (attr == NULL) { + free(n); + return 0; + } + memcpy(&n->val, RTA_DATA(attr), sizeof(n->val)); + } + memset(&n->rate, 0, sizeof(n->rate)); + n->next = kern_db; + kern_db = n; + return 0; +} + +static int get_nlmsg(struct nlmsghdr *m, void *arg) +{ + struct ifinfomsg *ifi = NLMSG_DATA(m); + struct rtattr *tb[IFLA_MAX+1]; + int len = m->nlmsg_len; + struct ifstat_ent *n; + int i; + + if (m->nlmsg_type != RTM_NEWLINK) + return 0; + + len -= NLMSG_LENGTH(sizeof(*ifi)); + if (len < 0) { + errno = EINVAL; + return -1; + } + + if (!(ifi->ifi_flags&IFF_UP)) + return 0; + + parse_rtattr(tb, IFLA_MAX, IFLA_RTA(ifi), len); + if (tb[IFLA_IFNAME] == NULL || tb[IFLA_STATS] == NULL) + return 0; + + n = malloc(sizeof(*n)); + if (!n) { + errno = ENOMEM; + return -1; + } + n->ifindex = ifi->ifi_index; + n->name = strdup(RTA_DATA(tb[IFLA_IFNAME])); + memcpy(&n->ival, RTA_DATA(tb[IFLA_STATS]), sizeof(n->ival)); + memset(&n->rate, 0, sizeof(n->rate)); + for (i = 0; i < MAXS; i++) + n->val[i] = n->ival[i]; + n->next = kern_db; + kern_db = n; + return 0; +} + +static void load_info(void) +{ + struct ifstat_ent *db, *n; + struct rtnl_handle rth; + __u32 filter_mask; + + if (rtnl_open(&rth, 0) < 0) + exit(1); + + if (is_extended) { + ll_init_map(&rth); + filter_mask = IFLA_STATS_FILTER_BIT(filter_type); + if (rtnl_statsdump_req_filter(&rth, AF_UNSPEC, + filter_mask, NULL, NULL) < 0) { + perror("Cannot send dump request"); + exit(1); + } + + if (rtnl_dump_filter(&rth, get_nlmsg_extended, NULL) < 0) { + perror("Dump terminated\n"); + exit(1); + } + } else { + if (rtnl_linkdump_req(&rth, AF_INET) < 0) { + perror("Cannot send dump request"); + exit(1); + } + + if (rtnl_dump_filter(&rth, get_nlmsg, NULL) < 0) { + perror("Dump terminated\n"); + exit(1); + } + } + + rtnl_close(&rth); + + db = kern_db; + kern_db = NULL; + + while (db) { + n = db; + db = db->next; + n->next = kern_db; + kern_db = n; + } +} + +static void load_raw_table(FILE *fp) +{ + char buf[4096]; + struct ifstat_ent *db = NULL; + struct ifstat_ent *n; + + while (fgets(buf, sizeof(buf), fp) != NULL) { + char *p; + char *next; + int i; + + if (buf[0] == '#') { + buf[strlen(buf)-1] = 0; + if (info_source[0] && strcmp(info_source, buf+1)) + source_mismatch = 1; + strlcpy(info_source, buf+1, sizeof(info_source)); + continue; + } + if ((n = malloc(sizeof(*n))) == NULL) + abort(); + + if (!(p = strchr(buf, ' '))) + abort(); + *p++ = 0; + + if (sscanf(buf, "%d", &n->ifindex) != 1) + abort(); + if (!(next = strchr(p, ' '))) + abort(); + *next++ = 0; + + n->name = strdup(p); + p = next; + + for (i = 0; i < MAXS; i++) { + unsigned int rate; + + if (!(next = strchr(p, ' '))) + abort(); + *next++ = 0; + if (sscanf(p, "%llu", n->val+i) != 1) + abort(); + n->ival[i] = (__u32)n->val[i]; + p = next; + if (!(next = strchr(p, ' '))) + abort(); + *next++ = 0; + if (sscanf(p, "%u", &rate) != 1) + abort(); + n->rate[i] = rate; + p = next; + } + n->next = db; + db = n; + } + + while (db) { + n = db; + db = db->next; + n->next = kern_db; + kern_db = n; + } +} + +static void dump_raw_db(FILE *fp, int to_hist) +{ + json_writer_t *jw = json_output ? jsonw_new(fp) : NULL; + struct ifstat_ent *n, *h; + + h = hist_db; + if (jw) { + jsonw_start_object(jw); + jsonw_pretty(jw, pretty); + jsonw_name(jw, info_source); + jsonw_start_object(jw); + } else + fprintf(fp, "#%s\n", info_source); + + for (n = kern_db; n; n = n->next) { + int i; + unsigned long long *vals = n->val; + double *rates = n->rate; + + if (!match(n->name)) { + struct ifstat_ent *h1; + + if (!to_hist) + continue; + for (h1 = h; h1; h1 = h1->next) { + if (h1->ifindex == n->ifindex) { + vals = h1->val; + rates = h1->rate; + h = h1->next; + break; + } + } + } + + if (jw) { + jsonw_name(jw, n->name); + jsonw_start_object(jw); + + for (i = 0; i < MAXS && stats[i]; i++) + jsonw_uint_field(jw, stats[i], vals[i]); + jsonw_end_object(jw); + } else { + fprintf(fp, "%d %s ", n->ifindex, n->name); + for (i = 0; i < MAXS; i++) + fprintf(fp, "%llu %u ", vals[i], + (unsigned int)rates[i]); + fprintf(fp, "\n"); + } + } + if (jw) { + jsonw_end_object(jw); + + jsonw_end_object(jw); + jsonw_destroy(&jw); + } +} + +/* use communication definitions of meg/kilo etc */ +static const unsigned long long giga = 1000000000ull; +static const unsigned long long mega = 1000000; +static const unsigned long long kilo = 1000; + +static void format_rate(FILE *fp, const unsigned long long *vals, + const double *rates, int i) +{ + char temp[64]; + + if (vals[i] > giga) + fprintf(fp, "%7lluM ", vals[i]/mega); + else if (vals[i] > mega) + fprintf(fp, "%7lluK ", vals[i]/kilo); + else + fprintf(fp, "%8llu ", vals[i]); + + if (rates[i] > mega) { + sprintf(temp, "%uM", (unsigned int)(rates[i]/mega)); + fprintf(fp, "%-6s ", temp); + } else if (rates[i] > kilo) { + sprintf(temp, "%uK", (unsigned int)(rates[i]/kilo)); + fprintf(fp, "%-6s ", temp); + } else + fprintf(fp, "%-6u ", (unsigned int)rates[i]); +} + +static void format_pair(FILE *fp, const unsigned long long *vals, int i, int k) +{ + char temp[64]; + + if (vals[i] > giga) + fprintf(fp, "%7lluM ", vals[i]/mega); + else if (vals[i] > mega) + fprintf(fp, "%7lluK ", vals[i]/kilo); + else + fprintf(fp, "%8llu ", vals[i]); + + if (vals[k] > giga) { + sprintf(temp, "%uM", (unsigned int)(vals[k]/mega)); + fprintf(fp, "%-6s ", temp); + } else if (vals[k] > mega) { + sprintf(temp, "%uK", (unsigned int)(vals[k]/kilo)); + fprintf(fp, "%-6s ", temp); + } else + fprintf(fp, "%-6u ", (unsigned int)vals[k]); +} + +static void print_head(FILE *fp) +{ + fprintf(fp, "#%s\n", info_source); + fprintf(fp, "%-15s ", "Interface"); + + fprintf(fp, "%8s/%-6s ", "RX Pkts", "Rate"); + fprintf(fp, "%8s/%-6s ", "TX Pkts", "Rate"); + fprintf(fp, "%8s/%-6s ", "RX Data", "Rate"); + fprintf(fp, "%8s/%-6s\n", "TX Data", "Rate"); + + if (!show_errors) { + fprintf(fp, "%-15s ", ""); + fprintf(fp, "%8s/%-6s ", "RX Errs", "Drop"); + fprintf(fp, "%8s/%-6s ", "TX Errs", "Drop"); + fprintf(fp, "%8s/%-6s ", "RX Over", "Rate"); + fprintf(fp, "%8s/%-6s\n", "TX Coll", "Rate"); + } else { + fprintf(fp, "%-15s ", ""); + fprintf(fp, "%8s/%-6s ", "RX Errs", "Rate"); + fprintf(fp, "%8s/%-6s ", "RX Drop", "Rate"); + fprintf(fp, "%8s/%-6s ", "RX Over", "Rate"); + fprintf(fp, "%8s/%-6s\n", "RX Leng", "Rate"); + + fprintf(fp, "%-15s ", ""); + fprintf(fp, "%8s/%-6s ", "RX Crc", "Rate"); + fprintf(fp, "%8s/%-6s ", "RX Frm", "Rate"); + fprintf(fp, "%8s/%-6s ", "RX Fifo", "Rate"); + fprintf(fp, "%8s/%-6s\n", "RX Miss", "Rate"); + + fprintf(fp, "%-15s ", ""); + fprintf(fp, "%8s/%-6s ", "TX Errs", "Rate"); + fprintf(fp, "%8s/%-6s ", "TX Drop", "Rate"); + fprintf(fp, "%8s/%-6s ", "TX Coll", "Rate"); + fprintf(fp, "%8s/%-6s\n", "TX Carr", "Rate"); + + fprintf(fp, "%-15s ", ""); + fprintf(fp, "%8s/%-6s ", "TX Abrt", "Rate"); + fprintf(fp, "%8s/%-6s ", "TX Fifo", "Rate"); + fprintf(fp, "%8s/%-6s ", "TX Hear", "Rate"); + fprintf(fp, "%8s/%-6s\n", "TX Wind", "Rate"); + } +} + +static void print_one_json(json_writer_t *jw, const struct ifstat_ent *n, + const unsigned long long *vals) +{ + int i, m = show_errors ? 20 : 10; + + jsonw_name(jw, n->name); + jsonw_start_object(jw); + + for (i = 0; i < m && stats[i]; i++) + jsonw_uint_field(jw, stats[i], vals[i]); + + jsonw_end_object(jw); +} + +static void print_one_if(FILE *fp, const struct ifstat_ent *n, + const unsigned long long *vals) +{ + int i; + + fprintf(fp, "%-15s ", n->name); + for (i = 0; i < 4; i++) + format_rate(fp, vals, n->rate, i); + fprintf(fp, "\n"); + + if (!show_errors) { + fprintf(fp, "%-15s ", ""); + format_pair(fp, vals, 4, 6); + format_pair(fp, vals, 5, 7); + format_rate(fp, vals, n->rate, 11); + format_rate(fp, vals, n->rate, 9); + fprintf(fp, "\n"); + } else { + fprintf(fp, "%-15s ", ""); + format_rate(fp, vals, n->rate, 4); + format_rate(fp, vals, n->rate, 6); + format_rate(fp, vals, n->rate, 11); + format_rate(fp, vals, n->rate, 10); + fprintf(fp, "\n"); + + fprintf(fp, "%-15s ", ""); + format_rate(fp, vals, n->rate, 12); + format_rate(fp, vals, n->rate, 13); + format_rate(fp, vals, n->rate, 14); + format_rate(fp, vals, n->rate, 15); + fprintf(fp, "\n"); + + fprintf(fp, "%-15s ", ""); + format_rate(fp, vals, n->rate, 5); + format_rate(fp, vals, n->rate, 7); + format_rate(fp, vals, n->rate, 9); + format_rate(fp, vals, n->rate, 17); + fprintf(fp, "\n"); + + fprintf(fp, "%-15s ", ""); + format_rate(fp, vals, n->rate, 16); + format_rate(fp, vals, n->rate, 18); + format_rate(fp, vals, n->rate, 19); + format_rate(fp, vals, n->rate, 20); + fprintf(fp, "\n"); + } +} + +static void dump_kern_db(FILE *fp) +{ + json_writer_t *jw = json_output ? jsonw_new(fp) : NULL; + struct ifstat_ent *n; + + if (jw) { + jsonw_start_object(jw); + jsonw_pretty(jw, pretty); + jsonw_name(jw, info_source); + jsonw_start_object(jw); + } else + print_head(fp); + + for (n = kern_db; n; n = n->next) { + if (!match(n->name)) + continue; + + if (jw) + print_one_json(jw, n, n->val); + else + print_one_if(fp, n, n->val); + } + if (jw) { + jsonw_end_object(jw); + + jsonw_end_object(jw); + jsonw_destroy(&jw); + } +} + +static void dump_incr_db(FILE *fp) +{ + struct ifstat_ent *n, *h; + json_writer_t *jw = json_output ? jsonw_new(fp) : NULL; + + h = hist_db; + if (jw) { + jsonw_start_object(jw); + jsonw_pretty(jw, pretty); + jsonw_name(jw, info_source); + jsonw_start_object(jw); + } else + print_head(fp); + + for (n = kern_db; n; n = n->next) { + int i; + unsigned long long vals[MAXS]; + struct ifstat_ent *h1; + + memcpy(vals, n->val, sizeof(vals)); + + for (h1 = h; h1; h1 = h1->next) { + if (h1->ifindex == n->ifindex) { + for (i = 0; i < MAXS; i++) + vals[i] -= h1->val[i]; + h = h1->next; + break; + } + } + if (!match(n->name)) + continue; + + if (jw) + print_one_json(jw, n, n->val); + else + print_one_if(fp, n, vals); + } + + if (jw) { + jsonw_end_object(jw); + + jsonw_end_object(jw); + jsonw_destroy(&jw); + } +} + +static int children; + +static void sigchild(int signo) +{ +} + +static void update_db(int interval) +{ + struct ifstat_ent *n, *h; + + n = kern_db; + kern_db = NULL; + + load_info(); + + h = kern_db; + kern_db = n; + + for (n = kern_db; n; n = n->next) { + struct ifstat_ent *h1; + + for (h1 = h; h1; h1 = h1->next) { + if (h1->ifindex == n->ifindex) { + int i; + + for (i = 0; i < MAXS; i++) { + if (h1->ival[i] < n->ival[i]) { + memset(n->ival, 0, sizeof(n->ival)); + break; + } + } + for (i = 0; i < MAXS; i++) { + double sample; + __u64 incr; + + if (is_extended) { + incr = h1->val[i] - n->val[i]; + n->val[i] = h1->val[i]; + } else { + incr = (__u32) (h1->ival[i] - n->ival[i]); + n->val[i] += incr; + n->ival[i] = h1->ival[i]; + } + + sample = (double)(incr*1000)/interval; + if (interval >= scan_interval) { + n->rate[i] += W*(sample-n->rate[i]); + } else if (interval >= 1000) { + if (interval >= time_constant) { + n->rate[i] = sample; + } else { + double w = W*(double)interval/scan_interval; + + n->rate[i] += w*(sample-n->rate[i]); + } + } + } + + while (h != h1) { + struct ifstat_ent *tmp = h; + + h = h->next; + free(tmp->name); + free(tmp); + }; + h = h1->next; + free(h1->name); + free(h1); + break; + } + } + } +} + +#define T_DIFF(a, b) (((a).tv_sec-(b).tv_sec)*1000 + ((a).tv_usec-(b).tv_usec)/1000) + + +static void server_loop(int fd) +{ + struct timeval snaptime = { 0 }; + struct pollfd p; + + p.fd = fd; + p.events = p.revents = POLLIN; + + sprintf(info_source, "%d.%lu sampling_interval=%d time_const=%d", + getpid(), (unsigned long)random(), scan_interval/1000, time_constant/1000); + + load_info(); + + for (;;) { + int status; + time_t tdiff; + struct timeval now; + + gettimeofday(&now, NULL); + tdiff = T_DIFF(now, snaptime); + if (tdiff >= scan_interval) { + update_db(tdiff); + snaptime = now; + tdiff = 0; + } + + if (poll(&p, 1, scan_interval - tdiff) > 0 + && (p.revents&POLLIN)) { + int clnt = accept(fd, NULL, NULL); + + if (clnt >= 0) { + pid_t pid; + + if (children >= 5) { + close(clnt); + } else if ((pid = fork()) != 0) { + if (pid > 0) + children++; + close(clnt); + } else { + FILE *fp = fdopen(clnt, "w"); + + if (fp) + dump_raw_db(fp, 0); + exit(0); + } + } + } + while (children && waitpid(-1, &status, WNOHANG) > 0) + children--; + } +} + +static int verify_forging(int fd) +{ + struct ucred cred; + socklen_t olen = sizeof(cred); + + if (getsockopt(fd, SOL_SOCKET, SO_PEERCRED, (void *)&cred, &olen) || + olen < sizeof(cred)) + return -1; + if (cred.uid == getuid() || cred.uid == 0) + return 0; + return -1; +} + +static void xstat_usage(void) +{ + fprintf(stderr, +"Usage: ifstat supported xstats:\n" +" cpu_hits Counts only packets that went via the CPU.\n"); +} + +struct extended_stats_options_t { + char *name; + int id; + int sub_type; +}; + +/* Note: if one xstat name is subset of another, it should be before it in this + * list. + * Name length must be under 64 chars. + */ +static const struct extended_stats_options_t extended_stats_options[] = { + {"cpu_hits", IFLA_STATS_LINK_OFFLOAD_XSTATS, IFLA_OFFLOAD_XSTATS_CPU_HIT}, +}; + +static const char *get_filter_type(const char *name) +{ + int name_len; + int i; + + name_len = strlen(name); + for (i = 0; i < ARRAY_SIZE(extended_stats_options); i++) { + const struct extended_stats_options_t *xstat; + + xstat = &extended_stats_options[i]; + if (strncmp(name, xstat->name, name_len) == 0) { + filter_type = xstat->id; + sub_type = xstat->sub_type; + return xstat->name; + } + } + + fprintf(stderr, "invalid ifstat extension %s\n", name); + xstat_usage(); + return NULL; +} + +static void usage(void) __attribute__((noreturn)); + +static void usage(void) +{ + fprintf(stderr, +"Usage: ifstat [OPTION] [ PATTERN [ PATTERN ] ]\n" +" -h, --help this message\n" +" -a, --ignore ignore history\n" +" -d, --scan=SECS sample every statistics every SECS\n" +" -e, --errors show errors\n" +" -j, --json format output in JSON\n" +" -n, --nooutput do history only\n" +" -p, --pretty pretty print\n" +" -r, --reset reset history\n" +" -s, --noupdate don't update history\n" +" -t, --interval=SECS report average over the last SECS\n" +" -V, --version output version information\n" +" -z, --zeros show entries with zero activity\n" +" -x, --extended=TYPE show extended stats of TYPE\n"); + + exit(-1); +} + +static const struct option longopts[] = { + { "help", 0, 0, 'h' }, + { "ignore", 0, 0, 'a' }, + { "scan", 1, 0, 'd'}, + { "errors", 0, 0, 'e' }, + { "nooutput", 0, 0, 'n' }, + { "json", 0, 0, 'j' }, + { "reset", 0, 0, 'r' }, + { "pretty", 0, 0, 'p' }, + { "noupdate", 0, 0, 's' }, + { "interval", 1, 0, 't' }, + { "version", 0, 0, 'V' }, + { "zeros", 0, 0, 'z' }, + { "extended", 1, 0, 'x'}, + { 0 } +}; + +int main(int argc, char *argv[]) +{ + char hist_name[128]; + struct sockaddr_un sun; + FILE *hist_fp = NULL; + const char *stats_type = NULL; + int ch; + int fd; + + is_extended = false; + while ((ch = getopt_long(argc, argv, "hjpvVzrnasd:t:ex:", + longopts, NULL)) != EOF) { + switch (ch) { + case 'z': + dump_zeros = 1; + break; + case 'r': + reset_history = 1; + break; + case 'a': + ignore_history = 1; + break; + case 's': + no_update = 1; + break; + case 'n': + no_output = 1; + break; + case 'e': + show_errors = 1; + break; + case 'j': + json_output = 1; + break; + case 'p': + pretty = 1; + break; + case 'd': + scan_interval = atoi(optarg) * 1000; + if (scan_interval <= 0) { + fprintf(stderr, "ifstat: invalid scan interval\n"); + exit(-1); + } + break; + case 't': + time_constant = atoi(optarg); + if (time_constant <= 0) { + fprintf(stderr, "ifstat: invalid time constant divisor\n"); + exit(-1); + } + break; + case 'x': + stats_type = optarg; + is_extended = true; + break; + case 'v': + case 'V': + printf("ifstat utility, iproute2-%s\n", version); + exit(0); + case 'h': + case '?': + default: + usage(); + } + } + + argc -= optind; + argv += optind; + + if (stats_type) { + stats_type = get_filter_type(stats_type); + if (!stats_type) + exit(-1); + } + + sun.sun_family = AF_UNIX; + sun.sun_path[0] = 0; + sprintf(sun.sun_path+1, "ifstat%d", getuid()); + + if (scan_interval > 0) { + if (time_constant == 0) + time_constant = 60; + time_constant *= 1000; + W = 1 - 1/exp(log(10)*(double)scan_interval/time_constant); + if ((fd = socket(AF_UNIX, SOCK_STREAM, 0)) < 0) { + perror("ifstat: socket"); + exit(-1); + } + if (bind(fd, (struct sockaddr *)&sun, 2+1+strlen(sun.sun_path+1)) < 0) { + perror("ifstat: bind"); + exit(-1); + } + if (listen(fd, 5) < 0) { + perror("ifstat: listen"); + exit(-1); + } + if (daemon(0, 0)) { + perror("ifstat: daemon"); + exit(-1); + } + signal(SIGPIPE, SIG_IGN); + signal(SIGCHLD, sigchild); + server_loop(fd); + exit(0); + } + + patterns = argv; + npatterns = argc; + + if (getenv("IFSTAT_HISTORY")) + snprintf(hist_name, sizeof(hist_name), + "%s", getenv("IFSTAT_HISTORY")); + else + if (!stats_type) + snprintf(hist_name, sizeof(hist_name), + "%s/.ifstat.u%d", P_tmpdir, getuid()); + else + snprintf(hist_name, sizeof(hist_name), + "%s/.%s_ifstat.u%d", P_tmpdir, stats_type, + getuid()); + + if (reset_history && unlink(hist_name) < 0) { + perror("ifstat: unlink history file"); + exit(-1); + } + + if (!ignore_history || !no_update) { + struct stat stb; + + fd = open(hist_name, O_RDWR|O_CREAT|O_NOFOLLOW, 0600); + if (fd < 0) { + perror("ifstat: open history file"); + exit(-1); + } + if ((hist_fp = fdopen(fd, "r+")) == NULL) { + perror("ifstat: fdopen history file"); + exit(-1); + } + if (flock(fileno(hist_fp), LOCK_EX)) { + perror("ifstat: flock history file"); + exit(-1); + } + if (fstat(fileno(hist_fp), &stb) != 0) { + perror("ifstat: fstat history file"); + exit(-1); + } + if (stb.st_nlink != 1 || stb.st_uid != getuid()) { + fprintf(stderr, "ifstat: something is so wrong with history file, that I prefer not to proceed.\n"); + exit(-1); + } + if (!ignore_history) { + FILE *tfp; + long uptime = -1; + + if ((tfp = fopen("/proc/uptime", "r")) != NULL) { + if (fscanf(tfp, "%ld", &uptime) != 1) + uptime = -1; + fclose(tfp); + } + if (uptime >= 0 && time(NULL) >= stb.st_mtime+uptime) { + fprintf(stderr, "ifstat: history is aged out, resetting\n"); + if (ftruncate(fileno(hist_fp), 0)) + perror("ifstat: ftruncate"); + } + } + + load_raw_table(hist_fp); + + hist_db = kern_db; + kern_db = NULL; + } + + if ((fd = socket(AF_UNIX, SOCK_STREAM, 0)) >= 0 && + (connect(fd, (struct sockaddr *)&sun, 2+1+strlen(sun.sun_path+1)) == 0 + || (strcpy(sun.sun_path+1, "ifstat0"), + connect(fd, (struct sockaddr *)&sun, 2+1+strlen(sun.sun_path+1)) == 0)) + && verify_forging(fd) == 0) { + FILE *sfp = fdopen(fd, "r"); + + if (!sfp) { + fprintf(stderr, "ifstat: fdopen failed: %s\n", + strerror(errno)); + close(fd); + } else { + load_raw_table(sfp); + if (hist_db && source_mismatch) { + fprintf(stderr, "ifstat: history is stale, ignoring it.\n"); + hist_db = NULL; + } + fclose(sfp); + } + } else { + if (fd >= 0) + close(fd); + if (hist_db && info_source[0] && strcmp(info_source, "kernel")) { + fprintf(stderr, "ifstat: history is stale, ignoring it.\n"); + hist_db = NULL; + info_source[0] = 0; + } + load_info(); + if (info_source[0] == 0) + strcpy(info_source, "kernel"); + } + + if (!no_output) { + if (ignore_history || hist_db == NULL) + dump_kern_db(stdout); + else + dump_incr_db(stdout); + } + + if (!no_update) { + if (ftruncate(fileno(hist_fp), 0)) + perror("ifstat: ftruncate"); + rewind(hist_fp); + + json_output = 0; + dump_raw_db(hist_fp, 1); + fclose(hist_fp); + } + exit(0); +} diff --git a/misc/lnstat.c b/misc/lnstat.c new file mode 100644 index 0000000..f802a0f --- /dev/null +++ b/misc/lnstat.c @@ -0,0 +1,379 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* lnstat - Unified linux network statistics + * + * Copyright (C) 2004 by Harald Welte <laforge@gnumonks.org> + * + * Development of this code was funded by Astaro AG, http://www.astaro.com/ + * + * Based on original concept and ideas from predecessor rtstat.c: + * + * Copyright 2001 by Robert Olsson <robert.olsson@its.uu.se> + * Uppsala University, Sweden + */ + +/* Maximum number of fields that can be displayed */ +#define MAX_FIELDS 128 + +/* Maximum number of header lines */ +#define HDR_LINES 10 + +/* default field width if none specified */ +#define FIELD_WIDTH_DEFAULT 8 +#define FIELD_WIDTH_MAX 20 + +#define DEFAULT_INTERVAL 2 + +#define HDR_LINE_LENGTH (MAX_FIELDS*FIELD_WIDTH_MAX) + +#include <unistd.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <getopt.h> + +#include <json_writer.h> +#include "lnstat.h" +#include "version.h" + +static struct option opts[] = { + { "version", 0, NULL, 'V' }, + { "count", 1, NULL, 'c' }, + { "dump", 0, NULL, 'd' }, + { "json", 0, NULL, 'j' }, + { "file", 1, NULL, 'f' }, + { "help", 0, NULL, 'h' }, + { "interval", 1, NULL, 'i' }, + { "keys", 1, NULL, 'k' }, + { "subject", 1, NULL, 's' }, + { "width", 1, NULL, 'w' }, + { "oneline", 0, NULL, 0 }, +}; + +static int usage(char *name, int exit_code) +{ + fprintf(stderr, + "%s Version %s\n" + "Copyright (C) 2004 by Harald Welte <laforge@gnumonks.org>\n" + "This program is free software licensed under GNU GPLv2\nwith ABSOLUTELY NO WARRANTY.\n" + "\n" + "Parameters:\n" + " -V --version Print Version of Program\n" + " -c --count <count> " + "Print <count> number of intervals\n" + " -d --dump " + "Dump list of available files/keys\n" + " -j --json " + "Display in JSON format\n" + " -f --file <file> Statistics file to use\n" + " -h --help This help message\n" + " -i --interval <intv> " + "Set interval to 'intv' seconds\n" + " -k --keys k,k,k,... Display only keys specified\n" + " -s --subject [0-2] Control header printing:\n" + " 0 = never\n" + " 1 = once\n" + " 2 = every 20 lines (default))\n" + " -w --width n,n,n,... Width for each field\n" + "\n", + name, version); + + exit(exit_code); +} + +struct field_param { + const char *name; + struct lnstat_field *lf; + struct { + unsigned int width; + } print; +}; + +struct field_params { + unsigned int num; + struct field_param params[MAX_FIELDS]; +}; + +static void print_line(FILE *of, const struct lnstat_file *lnstat_files, + const struct field_params *fp) +{ + int i; + + for (i = 0; i < fp->num; i++) { + const struct lnstat_field *lf = fp->params[i].lf; + + fprintf(of, "%*lu|", fp->params[i].print.width, lf->result); + } + fputc('\n', of); +} + +static void print_json(FILE *of, const struct lnstat_file *lnstat_files, + const struct field_params *fp) +{ + json_writer_t *jw = jsonw_new(of); + int i; + + if (jw == NULL) { + fprintf(stderr, "Failed to create JSON writer\n"); + exit(1); + } + jsonw_start_object(jw); + for (i = 0; i < fp->num; i++) { + const struct lnstat_field *lf = fp->params[i].lf; + + jsonw_uint_field(jw, lf->name, lf->result); + } + jsonw_end_object(jw); + jsonw_destroy(&jw); +} + +/* find lnstat_field according to user specification */ +static int map_field_params(struct lnstat_file *lnstat_files, + struct field_params *fps, int interval) +{ + int i, j = 0; + struct lnstat_file *lf; + + /* no field specification on commandline, need to build default */ + if (!fps->num) { + for (lf = lnstat_files; lf; lf = lf->next) { + for (i = 0; i < lf->num_fields; i++) { + fps->params[j].lf = &lf->fields[i]; + fps->params[j].lf->file->interval.tv_sec = + interval; + if (!fps->params[j].print.width) + fps->params[j].print.width = + FIELD_WIDTH_DEFAULT; + + if (++j >= MAX_FIELDS - 1) { + fprintf(stderr, + "WARN: MAX_FIELDS (%d) reached, truncating number of keys\n", + MAX_FIELDS); + goto full; + } + } + } +full: + fps->num = j; + return 1; + } + + for (i = 0; i < fps->num; i++) { + fps->params[i].lf = lnstat_find_field(lnstat_files, + fps->params[i].name); + if (!fps->params[i].lf) { + fprintf(stderr, "Field `%s' unknown\n", + fps->params[i].name); + return 0; + } + fps->params[i].lf->file->interval.tv_sec = interval; + if (!fps->params[i].print.width) + fps->params[i].print.width = FIELD_WIDTH_DEFAULT; + } + return 1; +} + +struct table_hdr { + int num_lines; + char *hdr[HDR_LINES]; +}; + +static struct table_hdr *build_hdr_string(struct lnstat_file *lnstat_files, + struct field_params *fps, + int linewidth) +{ + int h, i; + static struct table_hdr th; + int ofs = 0; + + for (i = 0; i < HDR_LINES; i++) + th.hdr[i] = calloc(1, HDR_LINE_LENGTH); + + for (i = 0; i < fps->num; i++) { + char *cname, *fname = fps->params[i].lf->name; + unsigned int width = fps->params[i].print.width; + + snprintf(th.hdr[0]+ofs, width+2, "%*.*s|", width, width, + fps->params[i].lf->file->basename); + + cname = fname; + for (h = 1; h < HDR_LINES; h++) { + if (cname - fname >= strlen(fname)) + snprintf(th.hdr[h]+ofs, width+2, + "%*.*s|", width, width, ""); + else { + th.num_lines = h+1; + snprintf(th.hdr[h]+ofs, width+2, + "%*.*s|", width, width, cname); + } + cname += width; + } + ofs += width+1; + } + + /* fill in spaces */ + for (h = 1; h < th.num_lines; h++) { + for (i = 0; i < ofs; i++) { + if (th.hdr[h][i] == '\0') + th.hdr[h][i] = ' '; + } + } + + return &th; +} + +static int print_hdr(FILE *of, struct table_hdr *th) +{ + int i; + + for (i = 0; i < th->num_lines; i++) { + fputs(th->hdr[i], of); + fputc('\n', of); + } + return 0; +} + + +int main(int argc, char **argv) +{ + struct lnstat_file *lnstat_files; + const char *basename; + int i, c; + int interval = DEFAULT_INTERVAL; + int hdr = 2; + enum { + MODE_DUMP, + MODE_JSON, + MODE_NORMAL, + } mode = MODE_NORMAL; + unsigned long count = 0; + struct table_hdr *header; + static struct field_params fp; + int num_req_files = 0; + char *req_files[LNSTAT_MAX_FILES]; + + /* backwards compatibility mode for old tools */ + basename = strrchr(argv[0], '/'); + if (basename) + basename += 1; /* name after slash */ + else + basename = argv[0]; /* no slash */ + + if (!strcmp(basename, "rtstat")) { + /* rtstat compatibility mode */ + req_files[0] = "rt_cache"; + num_req_files = 1; + } else if (!strcmp(basename, "ctstat")) { + /* ctstat compatibility mode */ + req_files[0] = "ip_conntrack"; + num_req_files = 1; + } + + while ((c = getopt_long(argc, argv, "Vc:djpf:h?i:k:s:w:", + opts, NULL)) != -1) { + int len = 0; + char *tmp, *tok; + + switch (c) { + case 'c': + count = strtoul(optarg, NULL, 0); + break; + case 'd': + mode = MODE_DUMP; + break; + case 'j': + mode = MODE_JSON; + break; + case 'f': + req_files[num_req_files++] = strdup(optarg); + break; + case '?': + case 'h': + usage(argv[0], 0); + break; + case 'i': + sscanf(optarg, "%u", &interval); + break; + case 'k': + tmp = strdup(optarg); + if (!tmp) + break; + for (tok = strtok(tmp, ","); + tok; + tok = strtok(NULL, ",")) { + if (fp.num >= MAX_FIELDS) { + fprintf(stderr, + "WARN: too many keys requested: (%d max)\n", + MAX_FIELDS); + break; + } + fp.params[fp.num++].name = tok; + } + break; + case 's': + sscanf(optarg, "%u", &hdr); + break; + case 'w': + tmp = strdup(optarg); + if (!tmp) + break; + i = 0; + for (tok = strtok(tmp, ","); + tok; + tok = strtok(NULL, ",")) { + len = strtoul(tok, NULL, 0); + if (len > FIELD_WIDTH_MAX) + len = FIELD_WIDTH_MAX; + fp.params[i].print.width = len; + i++; + } + if (i == 1) { + for (i = 0; i < MAX_FIELDS; i++) + fp.params[i].print.width = len; + } + free(tmp); + break; + default: + usage(argv[0], 1); + break; + } + } + + lnstat_files = lnstat_scan_dir(PROC_NET_STAT, num_req_files, + (const char **) req_files); + + switch (mode) { + case MODE_DUMP: + lnstat_dump(stdout, lnstat_files); + break; + + case MODE_NORMAL: + case MODE_JSON: + if (!map_field_params(lnstat_files, &fp, interval)) + exit(1); + + header = build_hdr_string(lnstat_files, &fp, 80); + if (!header) + exit(1); + + if (interval < 1) + interval = 1; + + for (i = 0; i < count || !count; i++) { + lnstat_update(lnstat_files); + if (mode == MODE_JSON) + print_json(stdout, lnstat_files, &fp); + else { + if ((hdr > 1 && !(i % 20)) || + (hdr == 1 && i == 0)) + print_hdr(stdout, header); + print_line(stdout, lnstat_files, &fp); + } + fflush(stdout); + if (i < count - 1 || !count) + sleep(interval); + } + break; + } + + return 1; +} diff --git a/misc/lnstat.h b/misc/lnstat.h new file mode 100644 index 0000000..433599c --- /dev/null +++ b/misc/lnstat.h @@ -0,0 +1,43 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LNSTAT_H +#define _LNSTAT_H + +#include <limits.h> +#include <sys/select.h> + +#define PROC_NET_STAT "/proc/net/stat" + +#define LNSTAT_MAX_FILES 32 +#define LNSTAT_MAX_FIELDS_PER_LINE 32 +#define LNSTAT_MAX_FIELD_NAME_LEN 32 + +struct lnstat_file; + +struct lnstat_field { + struct lnstat_file *file; + unsigned int num; /* field number in line */ + char name[LNSTAT_MAX_FIELD_NAME_LEN+1]; + unsigned long values[2]; /* two buffers for values */ + unsigned long result; +}; + +struct lnstat_file { + struct lnstat_file *next; + char path[PATH_MAX+1]; + char basename[NAME_MAX+1]; + struct timeval last_read; /* last time of read */ + struct timeval interval; /* interval */ + int compat; /* 1 == backwards compat mode */ + FILE *fp; + unsigned int num_fields; /* number of fields */ + struct lnstat_field fields[LNSTAT_MAX_FIELDS_PER_LINE]; +}; + + +struct lnstat_file *lnstat_scan_dir(const char *path, const int num_req_files, + const char **req_files); +int lnstat_update(struct lnstat_file *lnstat_files); +int lnstat_dump(FILE *outfd, struct lnstat_file *lnstat_files); +struct lnstat_field *lnstat_find_field(struct lnstat_file *lnstat_files, + const char *name); +#endif /* _LNSTAT_H */ diff --git a/misc/lnstat_util.c b/misc/lnstat_util.c new file mode 100644 index 0000000..3f53e91 --- /dev/null +++ b/misc/lnstat_util.c @@ -0,0 +1,325 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* lnstat.c: Unified linux network statistics + * + * Copyright (C) 2004 by Harald Welte <laforge@gnumonks.org> + * + * Development of this code was funded by Astaro AG, http://www.astaro.com/ + * + * Based on original concept and ideas from predecessor rtstat.c: + * + * Copyright 2001 by Robert Olsson <robert.olsson@its.uu.se> + * Uppsala University, Sweden + */ + +#include <unistd.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <dirent.h> +#include <limits.h> +#include <time.h> + +#include <sys/time.h> +#include <sys/types.h> + +#include "lnstat.h" + +/* size of temp buffer used to read lines from procfiles */ +#define FGETS_BUF_SIZE 1024 + + +#define RTSTAT_COMPAT_LINE "entries in_hit in_slow_tot in_no_route in_brd in_martian_dst in_martian_src out_hit out_slow_tot out_slow_mc gc_total gc_ignored gc_goal_miss gc_dst_overflow in_hlist_search out_hlist_search\n" + +/* Read (and summarize for SMP) the different stats vars. */ +static int scan_lines(struct lnstat_file *lf, int i) +{ + char buf[FGETS_BUF_SIZE]; + int j, num_lines = 0; + + for (j = 0; j < lf->num_fields; j++) + lf->fields[j].values[i] = 0; + + rewind(lf->fp); + /* skip first line */ + if (!lf->compat && !fgets(buf, sizeof(buf)-1, lf->fp)) + return -1; + + while (!feof(lf->fp) && fgets(buf, sizeof(buf)-1, lf->fp)) { + char *ptr = buf; + + num_lines++; + + gettimeofday(&lf->last_read, NULL); + + for (j = 0; j < lf->num_fields; j++) { + unsigned long f = strtoul(ptr, &ptr, 16); + + if (j == 0) + lf->fields[j].values[i] = f; + else + lf->fields[j].values[i] += f; + } + } + return num_lines; +} + +static int time_after(struct timeval *last, + struct timeval *tout, + struct timeval *now) +{ + if (now->tv_sec > last->tv_sec + tout->tv_sec) + return 1; + + if (now->tv_sec == last->tv_sec + tout->tv_sec) { + if (now->tv_usec > last->tv_usec + tout->tv_usec) + return 1; + } + + return 0; +} + +int lnstat_update(struct lnstat_file *lnstat_files) +{ + struct lnstat_file *lf; + struct timeval tv; + + gettimeofday(&tv, NULL); + + for (lf = lnstat_files; lf; lf = lf->next) { + if (time_after(&lf->last_read, &lf->interval, &tv)) { + int i; + struct lnstat_field *lfi; + + scan_lines(lf, 1); + + for (i = 0, lfi = &lf->fields[i]; + i < lf->num_fields; i++, lfi = &lf->fields[i]) { + if (i == 0) + lfi->result = lfi->values[1]; + else + lfi->result = (lfi->values[1]-lfi->values[0]) + / lf->interval.tv_sec; + } + + scan_lines(lf, 0); + } + } + + return 0; +} + +/* scan first template line and fill in per-field data structures */ +static int __lnstat_scan_fields(struct lnstat_file *lf, char *buf) +{ + char *tok; + int i; + + tok = strtok(buf, " \t\n"); + for (i = 0; i < LNSTAT_MAX_FIELDS_PER_LINE; i++) { + lf->fields[i].file = lf; + strncpy(lf->fields[i].name, tok, LNSTAT_MAX_FIELD_NAME_LEN); + /* has to be null-terminate since we initialize to zero + * and field size is NAME_LEN + 1 */ + tok = strtok(NULL, " \t\n"); + if (!tok) { + lf->num_fields = i+1; + return 0; + } + } + return 0; +} + +static int lnstat_scan_fields(struct lnstat_file *lf) +{ + char buf[FGETS_BUF_SIZE]; + + rewind(lf->fp); + if (!fgets(buf, sizeof(buf)-1, lf->fp)) + return -1; + + return __lnstat_scan_fields(lf, buf); +} + +/* fake function emulating lnstat_scan_fields() for old kernels */ +static int lnstat_scan_compat_rtstat_fields(struct lnstat_file *lf) +{ + char buf[FGETS_BUF_SIZE]; + + strncpy(buf, RTSTAT_COMPAT_LINE, sizeof(buf) - 1); + buf[sizeof(buf) - 1] = '\0'; + + return __lnstat_scan_fields(lf, buf); +} + +/* find out whether string 'name; is in given string array */ +static int name_in_array(const int num, const char **arr, const char *name) +{ + int i; + + for (i = 0; i < num; i++) { + if (!strcmp(arr[i], name)) + return 1; + } + return 0; +} + +/* allocate lnstat_file and open given file */ +static struct lnstat_file *alloc_and_open(const char *path, const char *file) +{ + struct lnstat_file *lf; + + /* allocate */ + lf = calloc(1, sizeof(*lf)); + if (!lf) { + fprintf(stderr, "out of memory\n"); + return NULL; + } + + /* initialize */ + snprintf(lf->basename, sizeof(lf->basename), "%s", file); + snprintf(lf->path, sizeof(lf->path), "%s/%s", path, file); + + /* initialize to default */ + lf->interval.tv_sec = 1; + + /* open */ + lf->fp = fopen(lf->path, "r"); + if (!lf->fp) { + perror(lf->path); + free(lf); + return NULL; + } + + return lf; +} + + +/* lnstat_scan_dir - find and parse all available statistics files/fields */ +struct lnstat_file *lnstat_scan_dir(const char *path, const int num_req_files, + const char **req_files) +{ + DIR *dir; + struct lnstat_file *lnstat_files = NULL; + struct dirent *de; + + if (!path) + path = PROC_NET_STAT; + + dir = opendir(path); + if (!dir) { + struct lnstat_file *lf; + /* Old kernel, before /proc/net/stat was introduced */ + fprintf(stderr, "Your kernel doesn't have lnstat support. "); + + /* we only support rtstat, not multiple files */ + if (num_req_files >= 2) { + fputc('\n', stderr); + return NULL; + } + + /* we really only accept rt_cache */ + if (num_req_files && !name_in_array(num_req_files, + req_files, "rt_cache")) { + fputc('\n', stderr); + return NULL; + } + + fprintf(stderr, "Fallback to old rtstat-only operation\n"); + + lf = alloc_and_open("/proc/net", "rt_cache_stat"); + if (!lf) + return NULL; + lf->compat = 1; + strncpy(lf->basename, "rt_cache", sizeof(lf->basename)); + + /* FIXME: support for old files */ + if (lnstat_scan_compat_rtstat_fields(lf) < 0) + return NULL; + + lf->next = lnstat_files; + lnstat_files = lf; + return lnstat_files; + } + + while ((de = readdir(dir))) { + struct lnstat_file *lf; + + if (de->d_type != DT_REG) + continue; + + if (num_req_files && !name_in_array(num_req_files, + req_files, de->d_name)) + continue; + + lf = alloc_and_open(path, de->d_name); + if (!lf) { + closedir(dir); + return NULL; + } + + /* fill in field structure */ + if (lnstat_scan_fields(lf) < 0) { + closedir(dir); + return NULL; + } + + /* prepend to global list */ + lf->next = lnstat_files; + lnstat_files = lf; + } + closedir(dir); + + return lnstat_files; +} + +int lnstat_dump(FILE *outfd, struct lnstat_file *lnstat_files) +{ + struct lnstat_file *lf; + + for (lf = lnstat_files; lf; lf = lf->next) { + int i; + + fprintf(outfd, "%s:\n", lf->path); + + for (i = 0; i < lf->num_fields; i++) + fprintf(outfd, "\t%2u: %s\n", i+1, lf->fields[i].name); + + } + return 0; +} + +struct lnstat_field *lnstat_find_field(struct lnstat_file *lnstat_files, + const char *name) +{ + struct lnstat_file *lf; + struct lnstat_field *ret = NULL; + const char *colon = strchr(name, ':'); + char *file; + const char *field; + + if (colon) { + file = strndup(name, colon-name); + field = colon+1; + } else { + file = NULL; + field = name; + } + + for (lf = lnstat_files; lf; lf = lf->next) { + int i; + + if (file && strcmp(file, lf->basename)) + continue; + + for (i = 0; i < lf->num_fields; i++) { + if (!strcmp(field, lf->fields[i].name)) { + ret = &lf->fields[i]; + goto out; + } + } + } +out: + free(file); + + return ret; +} diff --git a/misc/nstat.c b/misc/nstat.c new file mode 100644 index 0000000..2c10fea --- /dev/null +++ b/misc/nstat.c @@ -0,0 +1,776 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * nstat.c handy utility to read counters /proc/net/netstat and snmp + * + * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> + */ + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <fcntl.h> +#include <string.h> +#include <errno.h> +#include <time.h> +#include <sys/time.h> +#include <fnmatch.h> +#include <sys/file.h> +#include <sys/socket.h> +#include <sys/un.h> +#include <sys/poll.h> +#include <sys/wait.h> +#include <sys/stat.h> +#include <signal.h> +#include <math.h> +#include <getopt.h> + +#include <json_writer.h> +#include "version.h" +#include "utils.h" + +int dump_zeros; +int reset_history; +int ignore_history; +int no_output; +int json_output; +int no_update; +int scan_interval; +int time_constant; +double W; +char **patterns; +int npatterns; + +char info_source[128]; +int source_mismatch; + +static int generic_proc_open(const char *env, char *name) +{ + char store[128]; + char *p = getenv(env); + + if (!p) { + p = getenv("PROC_ROOT") ? : "/proc"; + snprintf(store, sizeof(store)-1, "%s/%s", p, name); + p = store; + } + return open(p, O_RDONLY); +} + +static int net_netstat_open(void) +{ + return generic_proc_open("PROC_NET_NETSTAT", "net/netstat"); +} + +static int net_snmp_open(void) +{ + return generic_proc_open("PROC_NET_SNMP", "net/snmp"); +} + +static int net_snmp6_open(void) +{ + return generic_proc_open("PROC_NET_SNMP6", "net/snmp6"); +} + +static int net_sctp_snmp_open(void) +{ + return generic_proc_open("PROC_NET_SCTP_SNMP", "net/sctp/snmp"); +} + +struct nstat_ent { + struct nstat_ent *next; + char *id; + unsigned long long val; + double rate; +}; + +struct nstat_ent *kern_db; +struct nstat_ent *hist_db; + +static const char *useless_numbers[] = { + "IpForwarding", "IpDefaultTTL", + "TcpRtoAlgorithm", "TcpRtoMin", "TcpRtoMax", + "TcpMaxConn", "TcpCurrEstab" +}; + +static int useless_number(const char *id) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(useless_numbers); i++) + if (strcmp(id, useless_numbers[i]) == 0) + return 1; + return 0; +} + +static int match(const char *id) +{ + int i; + + if (npatterns == 0) + return 1; + + for (i = 0; i < npatterns; i++) { + if (!fnmatch(patterns[i], id, FNM_CASEFOLD)) + return 1; + } + return 0; +} + +static void load_good_table(FILE *fp) +{ + char buf[4096]; + struct nstat_ent *db = NULL; + struct nstat_ent *n; + + while (fgets(buf, sizeof(buf), fp) != NULL) { + int nr; + unsigned long long val; + double rate; + char idbuf[sizeof(buf)]; + + if (buf[0] == '#') { + buf[strlen(buf)-1] = 0; + if (info_source[0] && strcmp(info_source, buf+1)) + source_mismatch = 1; + strlcpy(info_source, buf + 1, sizeof(info_source)); + continue; + } + /* idbuf is as big as buf, so this is safe */ + nr = sscanf(buf, "%s%llu%lg", idbuf, &val, &rate); + if (nr < 2) { + fprintf(stderr, "%s:%d: error parsing history file\n", + __FILE__, __LINE__); + exit(-2); + } + if (nr < 3) + rate = 0; + if (useless_number(idbuf)) + continue; + if ((n = malloc(sizeof(*n))) == NULL) { + perror("nstat: malloc"); + exit(-1); + } + n->id = strdup(idbuf); + n->val = val; + n->rate = rate; + n->next = db; + db = n; + } + + while (db) { + n = db; + db = db->next; + n->next = kern_db; + kern_db = n; + } +} + +static int count_spaces(const char *line) +{ + int count = 0; + char c; + + while ((c = *line++) != 0) + count += c == ' ' || c == '\n'; + return count; +} + +static void load_ugly_table(FILE *fp) +{ + char *buf = NULL; + size_t buflen = 0; + ssize_t nread; + struct nstat_ent *db = NULL; + struct nstat_ent *n; + + while ((nread = getline(&buf, &buflen, fp)) != -1) { + char idbuf[4096]; + int off; + char *p; + int count1, count2, skip = 0; + + p = strchr(buf, ':'); + if (!p) { + fprintf(stderr, "%s:%d: error parsing history file\n", + __FILE__, __LINE__); + exit(-2); + } + count1 = count_spaces(buf); + *p = 0; + idbuf[0] = 0; + strncat(idbuf, buf, sizeof(idbuf) - 1); + off = p - buf; + p += 2; + + while (*p) { + char *next; + + if ((next = strchr(p, ' ')) != NULL) + *next++ = 0; + else if ((next = strchr(p, '\n')) != NULL) + *next++ = 0; + if (off < sizeof(idbuf)) { + idbuf[off] = 0; + strncat(idbuf, p, sizeof(idbuf) - off - 1); + } + n = malloc(sizeof(*n)); + if (!n) { + perror("nstat: malloc"); + exit(-1); + } + n->id = strdup(idbuf); + if (n->id == NULL) { + perror("nstat: strdup"); + exit(-1); + } + n->rate = 0; + n->next = db; + db = n; + if (next == NULL) + break; + p = next; + } + n = db; + nread = getline(&buf, &buflen, fp); + if (nread == -1) { + fprintf(stderr, "%s:%d: error parsing history file\n", + __FILE__, __LINE__); + exit(-2); + } + count2 = count_spaces(buf); + if (count2 > count1) + skip = count2 - count1; + do { + p = strrchr(buf, ' '); + if (!p) { + fprintf(stderr, "%s:%d: error parsing history file\n", + __FILE__, __LINE__); + exit(-2); + } + *p = 0; + if (sscanf(p+1, "%llu", &n->val) != 1) { + fprintf(stderr, "%s:%d: error parsing history file\n", + __FILE__, __LINE__); + exit(-2); + } + /* Trick to skip "dummy" trailing ICMP MIB in 2.4 */ + if (skip) + skip--; + else + n = n->next; + } while (p > buf + off + 2); + } + free(buf); + + while (db) { + n = db; + db = db->next; + if (useless_number(n->id)) { + free(n->id); + free(n); + } else { + n->next = kern_db; + kern_db = n; + } + } +} + +static void load_sctp_snmp(void) +{ + FILE *fp = fdopen(net_sctp_snmp_open(), "r"); + + if (fp) { + load_good_table(fp); + fclose(fp); + } +} + +static void load_snmp(void) +{ + FILE *fp = fdopen(net_snmp_open(), "r"); + + if (fp) { + load_ugly_table(fp); + fclose(fp); + } +} + +static void load_snmp6(void) +{ + FILE *fp = fdopen(net_snmp6_open(), "r"); + + if (fp) { + load_good_table(fp); + fclose(fp); + } +} + +static void load_netstat(void) +{ + FILE *fp = fdopen(net_netstat_open(), "r"); + + if (fp) { + load_ugly_table(fp); + fclose(fp); + } +} + + +static void dump_kern_db(FILE *fp, int to_hist) +{ + json_writer_t *jw = json_output ? jsonw_new(fp) : NULL; + struct nstat_ent *n, *h; + + h = hist_db; + if (jw) { + jsonw_start_object(jw); + jsonw_pretty(jw, pretty); + jsonw_name(jw, info_source); + jsonw_start_object(jw); + } else + fprintf(fp, "#%s\n", info_source); + + for (n = kern_db; n; n = n->next) { + unsigned long long val = n->val; + + if (!dump_zeros && !val && !n->rate) + continue; + if (!match(n->id)) { + struct nstat_ent *h1; + + if (!to_hist) + continue; + for (h1 = h; h1; h1 = h1->next) { + if (strcmp(h1->id, n->id) == 0) { + val = h1->val; + h = h1->next; + break; + } + } + } + + if (jw) + jsonw_uint_field(jw, n->id, val); + else + fprintf(fp, "%-32s%-16llu%6.1f\n", n->id, val, n->rate); + } + + if (jw) { + jsonw_end_object(jw); + + jsonw_end_object(jw); + jsonw_destroy(&jw); + } +} + +static void dump_incr_db(FILE *fp) +{ + json_writer_t *jw = json_output ? jsonw_new(fp) : NULL; + struct nstat_ent *n, *h; + + h = hist_db; + if (jw) { + jsonw_start_object(jw); + jsonw_pretty(jw, pretty); + jsonw_name(jw, info_source); + jsonw_start_object(jw); + } else + fprintf(fp, "#%s\n", info_source); + + for (n = kern_db; n; n = n->next) { + int ovfl = 0; + unsigned long long val = n->val; + struct nstat_ent *h1; + + for (h1 = h; h1; h1 = h1->next) { + if (strcmp(h1->id, n->id) == 0) { + if (val < h1->val) { + ovfl = 1; + val = h1->val; + } + val -= h1->val; + h = h1->next; + break; + } + } + if (!dump_zeros && !val && !n->rate) + continue; + if (!match(n->id)) + continue; + + if (jw) + jsonw_uint_field(jw, n->id, val); + else + fprintf(fp, "%-32s%-16llu%6.1f%s\n", n->id, val, + n->rate, ovfl?" (overflow)":""); + } + + if (jw) { + jsonw_end_object(jw); + + jsonw_end_object(jw); + jsonw_destroy(&jw); + } +} + +static int children; + +static void sigchild(int signo) +{ +} + +static void update_db(int interval) +{ + struct nstat_ent *n, *h; + + n = kern_db; + kern_db = NULL; + + load_netstat(); + load_snmp6(); + load_snmp(); + load_sctp_snmp(); + + h = kern_db; + kern_db = n; + + for (n = kern_db; n; n = n->next) { + struct nstat_ent *h1; + + for (h1 = h; h1; h1 = h1->next) { + if (strcmp(h1->id, n->id) == 0) { + double sample; + unsigned long long incr = h1->val - n->val; + + n->val = h1->val; + sample = (double)incr * 1000.0 / interval; + if (interval >= scan_interval) { + n->rate += W*(sample-n->rate); + } else if (interval >= 1000) { + if (interval >= time_constant) { + n->rate = sample; + } else { + double w = W*(double)interval/scan_interval; + + n->rate += w*(sample-n->rate); + } + } + + while (h != h1) { + struct nstat_ent *tmp = h; + + h = h->next; + free(tmp->id); + free(tmp); + }; + h = h1->next; + free(h1->id); + free(h1); + break; + } + } + } +} + +#define T_DIFF(a, b) (((a).tv_sec-(b).tv_sec)*1000 + ((a).tv_usec-(b).tv_usec)/1000) + + +static void server_loop(int fd) +{ + struct timeval snaptime = { 0 }; + struct pollfd p; + + p.fd = fd; + p.events = p.revents = POLLIN; + + sprintf(info_source, "%d.%lu sampling_interval=%d time_const=%d", + getpid(), (unsigned long)random(), scan_interval/1000, time_constant/1000); + + load_netstat(); + load_snmp6(); + load_snmp(); + load_sctp_snmp(); + + for (;;) { + int status; + time_t tdiff; + struct timeval now; + + gettimeofday(&now, NULL); + tdiff = T_DIFF(now, snaptime); + if (tdiff >= scan_interval) { + update_db(tdiff); + snaptime = now; + tdiff = 0; + } + if (poll(&p, 1, scan_interval - tdiff) > 0 + && (p.revents&POLLIN)) { + int clnt = accept(fd, NULL, NULL); + + if (clnt >= 0) { + pid_t pid; + + if (children >= 5) { + close(clnt); + } else if ((pid = fork()) != 0) { + if (pid > 0) + children++; + close(clnt); + } else { + FILE *fp = fdopen(clnt, "w"); + + if (fp) + dump_kern_db(fp, 0); + exit(0); + } + } + } + while (children && waitpid(-1, &status, WNOHANG) > 0) + children--; + } +} + +static int verify_forging(int fd) +{ + struct ucred cred; + socklen_t olen = sizeof(cred); + + if (getsockopt(fd, SOL_SOCKET, SO_PEERCRED, (void *)&cred, &olen) || + olen < sizeof(cred)) + return -1; + if (cred.uid == getuid() || cred.uid == 0) + return 0; + return -1; +} + +static void usage(void) __attribute__((noreturn)); + +static void usage(void) +{ + fprintf(stderr, + "Usage: nstat [OPTION] [ PATTERN [ PATTERN ] ]\n" + " -h, --help this message\n" + " -a, --ignore ignore history\n" + " -d, --scan=SECS sample every statistics every SECS\n" + " -j, --json format output in JSON\n" + " -n, --nooutput do history only\n" + " -p, --pretty pretty print\n" + " -r, --reset reset history\n" + " -s, --noupdate don't update history\n" + " -t, --interval=SECS report average over the last SECS\n" + " -V, --version output version information\n" + " -z, --zeros show entries with zero activity\n"); + exit(-1); +} + +static const struct option longopts[] = { + { "help", 0, 0, 'h' }, + { "ignore", 0, 0, 'a' }, + { "scan", 1, 0, 'd'}, + { "nooutput", 0, 0, 'n' }, + { "json", 0, 0, 'j' }, + { "reset", 0, 0, 'r' }, + { "noupdate", 0, 0, 's' }, + { "pretty", 0, 0, 'p' }, + { "interval", 1, 0, 't' }, + { "version", 0, 0, 'V' }, + { "zeros", 0, 0, 'z' }, + { 0 } +}; + +int main(int argc, char *argv[]) +{ + char *hist_name; + struct sockaddr_un sun; + FILE *hist_fp = NULL; + int ch; + int fd; + + while ((ch = getopt_long(argc, argv, "h?vVzrnasd:t:jp", + longopts, NULL)) != EOF) { + switch (ch) { + case 'z': + dump_zeros = 1; + break; + case 'r': + reset_history = 1; + break; + case 'a': + ignore_history = 1; + break; + case 's': + no_update = 1; + break; + case 'n': + no_output = 1; + break; + case 'd': + scan_interval = 1000*atoi(optarg); + break; + case 't': + if (sscanf(optarg, "%d", &time_constant) != 1 || + time_constant <= 0) { + fprintf(stderr, "nstat: invalid time constant divisor\n"); + exit(-1); + } + break; + case 'j': + json_output = 1; + break; + case 'p': + pretty = 1; + break; + case 'v': + case 'V': + printf("nstat utility, iproute2-%s\n", version); + exit(0); + case 'h': + case '?': + default: + usage(); + } + } + + argc -= optind; + argv += optind; + + sun.sun_family = AF_UNIX; + sun.sun_path[0] = 0; + sprintf(sun.sun_path+1, "nstat%d", getuid()); + + if (scan_interval > 0) { + if (time_constant == 0) + time_constant = 60; + time_constant *= 1000; + W = 1 - 1/exp(log(10)*(double)scan_interval/time_constant); + if ((fd = socket(AF_UNIX, SOCK_STREAM, 0)) < 0) { + perror("nstat: socket"); + exit(-1); + } + if (bind(fd, (struct sockaddr *)&sun, 2+1+strlen(sun.sun_path+1)) < 0) { + perror("nstat: bind"); + exit(-1); + } + if (listen(fd, 5) < 0) { + perror("nstat: listen"); + exit(-1); + } + if (daemon(0, 0)) { + perror("nstat: daemon"); + exit(-1); + } + signal(SIGPIPE, SIG_IGN); + signal(SIGCHLD, sigchild); + server_loop(fd); + exit(0); + } + + patterns = argv; + npatterns = argc; + + if ((hist_name = getenv("NSTAT_HISTORY")) == NULL) { + hist_name = malloc(128); + sprintf(hist_name, "/tmp/.nstat.u%d", getuid()); + } + + if (reset_history) + unlink(hist_name); + + if (!ignore_history || !no_update) { + struct stat stb; + + fd = open(hist_name, O_RDWR|O_CREAT|O_NOFOLLOW, 0600); + if (fd < 0) { + perror("nstat: open history file"); + exit(-1); + } + if ((hist_fp = fdopen(fd, "r+")) == NULL) { + perror("nstat: fdopen history file"); + exit(-1); + } + if (flock(fileno(hist_fp), LOCK_EX)) { + perror("nstat: flock history file"); + exit(-1); + } + if (fstat(fileno(hist_fp), &stb) != 0) { + perror("nstat: fstat history file"); + exit(-1); + } + if (stb.st_nlink != 1 || stb.st_uid != getuid()) { + fprintf(stderr, "nstat: something is so wrong with history file, that I prefer not to proceed.\n"); + exit(-1); + } + if (!ignore_history) { + FILE *tfp; + long uptime = -1; + + if ((tfp = fopen("/proc/uptime", "r")) != NULL) { + if (fscanf(tfp, "%ld", &uptime) != 1) + uptime = -1; + fclose(tfp); + } + if (uptime >= 0 && time(NULL) >= stb.st_mtime+uptime) { + fprintf(stderr, "nstat: history is aged out, resetting\n"); + if (ftruncate(fileno(hist_fp), 0) < 0) + perror("nstat: ftruncate"); + } + } + + load_good_table(hist_fp); + + hist_db = kern_db; + kern_db = NULL; + } + + if ((fd = socket(AF_UNIX, SOCK_STREAM, 0)) >= 0 && + (connect(fd, (struct sockaddr *)&sun, 2+1+strlen(sun.sun_path+1)) == 0 + || (strcpy(sun.sun_path+1, "nstat0"), + connect(fd, (struct sockaddr *)&sun, 2+1+strlen(sun.sun_path+1)) == 0)) + && verify_forging(fd) == 0) { + FILE *sfp = fdopen(fd, "r"); + + if (!sfp) { + fprintf(stderr, "nstat: fdopen failed: %s\n", + strerror(errno)); + close(fd); + } else { + load_good_table(sfp); + if (hist_db && source_mismatch) { + fprintf(stderr, "nstat: history is stale, ignoring it.\n"); + hist_db = NULL; + } + fclose(sfp); + } + } else { + if (fd >= 0) + close(fd); + if (hist_db && info_source[0] && strcmp(info_source, "kernel")) { + fprintf(stderr, "nstat: history is stale, ignoring it.\n"); + hist_db = NULL; + info_source[0] = 0; + } + load_netstat(); + load_snmp6(); + load_snmp(); + load_sctp_snmp(); + if (info_source[0] == 0) + strcpy(info_source, "kernel"); + } + + if (!no_output) { + if (ignore_history || hist_db == NULL) + dump_kern_db(stdout, 0); + else + dump_incr_db(stdout); + } + if (!no_update) { + if (ftruncate(fileno(hist_fp), 0) < 0) + perror("nstat: ftruncate"); + rewind(hist_fp); + + json_output = 0; + dump_kern_db(hist_fp, 1); + fclose(hist_fp); + } + exit(0); +} diff --git a/misc/rtacct.c b/misc/rtacct.c new file mode 100644 index 0000000..08363bf --- /dev/null +++ b/misc/rtacct.c @@ -0,0 +1,622 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * rtacct.c Applet to display contents of /proc/net/rt_acct. + * + * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> + */ + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <fcntl.h> +#include <string.h> +#include <errno.h> +#include <time.h> +#include <sys/time.h> +#include <sys/file.h> +#include <sys/socket.h> +#include <sys/un.h> +#include <sys/poll.h> +#include <sys/wait.h> +#include <sys/stat.h> +#include <sys/mman.h> +#include <signal.h> +#include <math.h> + +#include "rt_names.h" + +#include "version.h" + +int reset_history; +int ignore_history; +int no_output; +int no_update; +int scan_interval; +int time_constant; +int dump_zeros; +unsigned long magic_number; +double W; + +static int generic_proc_open(const char *env, const char *name) +{ + char store[1024]; + char *p = getenv(env); + + if (!p) { + p = getenv("PROC_ROOT") ? : "/proc"; + snprintf(store, sizeof(store)-1, "%s/%s", p, name); + p = store; + } + return open(p, O_RDONLY); +} + +static int net_rtacct_open(void) +{ + return generic_proc_open("PROC_NET_RTACCT", "net/rt_acct"); +} + +static __u32 rmap[256/4]; + +struct rtacct_data { + __u32 ival[256*4]; + + unsigned long long val[256*4]; + double rate[256*4]; + char signature[128]; +}; + +static struct rtacct_data kern_db_static; + +static struct rtacct_data *kern_db = &kern_db_static; +static struct rtacct_data *hist_db; + +static void nread(int fd, char *buf, int tot) +{ + int count = 0; + + while (count < tot) { + int n = read(fd, buf+count, tot-count); + + if (n < 0) { + if (errno == EINTR) + continue; + exit(-1); + } + if (n == 0) + exit(-1); + count += n; + } +} + +static __u32 *read_kern_table(__u32 *tbl) +{ + static __u32 *tbl_ptr; + int fd; + + if (magic_number) { + if (tbl_ptr != NULL) + return tbl_ptr; + + fd = open("/dev/mem", O_RDONLY); + if (fd < 0) { + perror("magic open"); + exit(-1); + } + tbl_ptr = mmap(NULL, 4096, + PROT_READ, + MAP_SHARED, + fd, magic_number); + if ((unsigned long)tbl_ptr == ~0UL) { + perror("magic mmap"); + exit(-1); + } + close(fd); + return tbl_ptr; + } + + fd = net_rtacct_open(); + if (fd >= 0) { + nread(fd, (char *)tbl, 256*16); + close(fd); + } else { + memset(tbl, 0, 256*16); + } + return tbl; +} + +static void format_rate(FILE *fp, double rate) +{ + char temp[64]; + + if (rate > 1024*1024) { + sprintf(temp, "%uM", (unsigned int)rint(rate/(1024*1024))); + fprintf(fp, " %-10s", temp); + } else if (rate > 1024) { + sprintf(temp, "%uK", (unsigned int)rint(rate/1024)); + fprintf(fp, " %-10s", temp); + } else + fprintf(fp, " %-10u", (unsigned int)rate); +} + +static void format_count(FILE *fp, unsigned long long val) +{ + if (val > 1024*1024*1024) + fprintf(fp, " %10lluM", val/(1024*1024)); + else if (val > 1024*1024) + fprintf(fp, " %10lluK", val/1024); + else + fprintf(fp, " %10llu", val); +} + +static void dump_abs_db(FILE *fp) +{ + int realm; + char b1[16]; + + if (!no_output) { + fprintf(fp, "#%s\n", kern_db->signature); + fprintf(fp, +"%-10s %-10s " +"%-10s %-10s " +"%-10s \n" + , "Realm", "BytesTo", "PktsTo", "BytesFrom", "PktsFrom"); + fprintf(fp, +"%-10s %-10s " +"%-10s %-10s " +"%-10s \n" + , "", "BPSTo", "PPSTo", "BPSFrom", "PPSFrom"); + + } + + for (realm = 0; realm < 256; realm++) { + int i; + unsigned long long *val; + double *rate; + + if (!(rmap[realm>>5] & (1<<(realm&0x1f)))) + continue; + + val = &kern_db->val[realm*4]; + rate = &kern_db->rate[realm*4]; + + if (!dump_zeros && + !val[0] && !rate[0] && + !val[1] && !rate[1] && + !val[2] && !rate[2] && + !val[3] && !rate[3]) + continue; + + if (hist_db) { + memcpy(&hist_db->val[realm*4], val, sizeof(*val)*4); + } + + if (no_output) + continue; + + fprintf(fp, "%-10s", rtnl_rtrealm_n2a(realm, b1, sizeof(b1))); + for (i = 0; i < 4; i++) + format_count(fp, val[i]); + fprintf(fp, "\n%-10s", ""); + for (i = 0; i < 4; i++) + format_rate(fp, rate[i]); + fprintf(fp, "\n"); + } +} + + +static void dump_incr_db(FILE *fp) +{ + int k, realm; + char b1[16]; + + if (!no_output) { + fprintf(fp, "#%s\n", kern_db->signature); + fprintf(fp, +"%-10s %-10s " +"%-10s %-10s " +"%-10s \n" + , "Realm", "BytesTo", "PktsTo", "BytesFrom", "PktsFrom"); + fprintf(fp, +"%-10s %-10s " +"%-10s %-10s " +"%-10s \n" + , "", "BPSTo", "PPSTo", "BPSFrom", "PPSFrom"); + } + + for (realm = 0; realm < 256; realm++) { + int ovfl = 0; + int i; + unsigned long long *val; + double *rate; + unsigned long long rval[4]; + + if (!(rmap[realm>>5] & (1<<(realm&0x1f)))) + continue; + + val = &kern_db->val[realm*4]; + rate = &kern_db->rate[realm*4]; + + for (k = 0; k < 4; k++) { + rval[k] = val[k]; + if (rval[k] < hist_db->val[realm*4+k]) + ovfl = 1; + else + rval[k] -= hist_db->val[realm*4+k]; + } + if (ovfl) { + for (k = 0; k < 4; k++) + rval[k] = val[k]; + } + if (hist_db) { + memcpy(&hist_db->val[realm*4], val, sizeof(*val)*4); + } + + if (no_output) + continue; + + if (!dump_zeros && + !rval[0] && !rate[0] && + !rval[1] && !rate[1] && + !rval[2] && !rate[2] && + !rval[3] && !rate[3]) + continue; + + + fprintf(fp, "%-10s", rtnl_rtrealm_n2a(realm, b1, sizeof(b1))); + for (i = 0; i < 4; i++) + format_count(fp, rval[i]); + fprintf(fp, "\n%-10s", ""); + for (i = 0; i < 4; i++) + format_rate(fp, rate[i]); + fprintf(fp, "\n"); + } +} + + +static int children; + +static void sigchild(int signo) +{ +} + +/* Server side only: read kernel data, update tables, calculate rates. */ + +static void update_db(int interval) +{ + int i; + __u32 *ival; + __u32 _ival[256*4]; + + ival = read_kern_table(_ival); + + for (i = 0; i < 256*4; i++) { + double sample; + __u32 incr = ival[i] - kern_db->ival[i]; + + if (ival[i] == 0 && incr == 0 && + kern_db->val[i] == 0 && kern_db->rate[i] == 0) + continue; + + kern_db->val[i] += incr; + kern_db->ival[i] = ival[i]; + sample = (double)(incr*1000)/interval; + if (interval >= scan_interval) { + kern_db->rate[i] += W*(sample-kern_db->rate[i]); + } else if (interval >= 1000) { + if (interval >= time_constant) { + kern_db->rate[i] = sample; + } else { + double w = W*(double)interval/scan_interval; + + kern_db->rate[i] += w*(sample-kern_db->rate[i]); + } + } + } +} + +static void send_db(int fd) +{ + int tot = 0; + + while (tot < sizeof(*kern_db)) { + int n = write(fd, ((char *)kern_db) + tot, sizeof(*kern_db)-tot); + + if (n < 0) { + if (errno == EINTR) + continue; + return; + } + tot += n; + } +} + + + +#define T_DIFF(a, b) (((a).tv_sec-(b).tv_sec)*1000 + ((a).tv_usec-(b).tv_usec)/1000) + + +static void pad_kern_table(struct rtacct_data *dat, __u32 *ival) +{ + int i; + + memset(dat->rate, 0, sizeof(dat->rate)); + if (dat->ival != ival) + memcpy(dat->ival, ival, sizeof(dat->ival)); + for (i = 0; i < 256*4; i++) + dat->val[i] = ival[i]; +} + +static void server_loop(int fd) +{ + struct timeval snaptime = { 0 }; + struct pollfd p; + + p.fd = fd; + p.events = p.revents = POLLIN; + + sprintf(kern_db->signature, + "%u.%lu sampling_interval=%d time_const=%d", + (unsigned int) getpid(), (unsigned long)random(), + scan_interval/1000, time_constant/1000); + + pad_kern_table(kern_db, read_kern_table(kern_db->ival)); + + for (;;) { + int status; + int tdiff; + struct timeval now; + + gettimeofday(&now, NULL); + tdiff = T_DIFF(now, snaptime); + if (tdiff >= scan_interval) { + update_db(tdiff); + snaptime = now; + tdiff = 0; + } + if (poll(&p, 1, tdiff + scan_interval) > 0 + && (p.revents&POLLIN)) { + int clnt = accept(fd, NULL, NULL); + + if (clnt >= 0) { + pid_t pid; + + if (children >= 5) { + close(clnt); + } else if ((pid = fork()) != 0) { + if (pid > 0) + children++; + close(clnt); + } else { + if (tdiff > 0) + update_db(tdiff); + send_db(clnt); + exit(0); + } + } + } + while (children && waitpid(-1, &status, WNOHANG) > 0) + children--; + } +} + +static int verify_forging(int fd) +{ + struct ucred cred; + socklen_t olen = sizeof(cred); + + if (getsockopt(fd, SOL_SOCKET, SO_PEERCRED, (void *)&cred, &olen) || + olen < sizeof(cred)) + return -1; + if (cred.uid == getuid() || cred.uid == 0) + return 0; + return -1; +} + +static void usage(void) __attribute__((noreturn)); + +static void usage(void) +{ + fprintf(stderr, +"Usage: rtacct [ -h?vVzrnasd:t: ] [ ListOfRealms ]\n" + ); + exit(-1); +} + +int main(int argc, char *argv[]) +{ + char hist_name[128]; + struct sockaddr_un sun; + int ch; + int fd; + + while ((ch = getopt(argc, argv, "h?vVzrM:nasd:t:")) != EOF) { + switch (ch) { + case 'z': + dump_zeros = 1; + break; + case 'r': + reset_history = 1; + break; + case 'a': + ignore_history = 1; + break; + case 's': + no_update = 1; + break; + case 'n': + no_output = 1; + break; + case 'd': + scan_interval = 1000*atoi(optarg); + break; + case 't': + if (sscanf(optarg, "%d", &time_constant) != 1 || + time_constant <= 0) { + fprintf(stderr, "rtacct: invalid time constant divisor\n"); + exit(-1); + } + break; + case 'v': + case 'V': + printf("rtacct utility, iproute2-%s\n", version); + exit(0); + case 'M': + /* Some secret undocumented option, nobody + * is expected to ask about its sense. See? + */ + sscanf(optarg, "%lx", &magic_number); + break; + case 'h': + case '?': + default: + usage(); + } + } + + argc -= optind; + argv += optind; + + if (argc) { + while (argc > 0) { + __u32 realm; + + if (rtnl_rtrealm_a2n(&realm, argv[0])) { + fprintf(stderr, "Warning: realm \"%s\" does not exist.\n", argv[0]); + exit(-1); + } + rmap[realm>>5] |= (1<<(realm&0x1f)); + argc--; argv++; + } + } else { + memset(rmap, ~0, sizeof(rmap)); + /* Always suppress zeros. */ + dump_zeros = 0; + } + + sun.sun_family = AF_UNIX; + sun.sun_path[0] = 0; + sprintf(sun.sun_path+1, "rtacct%d", getuid()); + + if (scan_interval > 0) { + if (time_constant == 0) + time_constant = 60; + time_constant *= 1000; + W = 1 - 1/exp(log(10)*(double)scan_interval/time_constant); + if ((fd = socket(AF_UNIX, SOCK_STREAM, 0)) < 0) { + perror("rtacct: socket"); + exit(-1); + } + if (bind(fd, (struct sockaddr *)&sun, 2+1+strlen(sun.sun_path+1)) < 0) { + perror("rtacct: bind"); + exit(-1); + } + if (listen(fd, 5) < 0) { + perror("rtacct: listen"); + exit(-1); + } + if (daemon(0, 0)) { + perror("rtacct: daemon"); + exit(-1); + } + signal(SIGPIPE, SIG_IGN); + signal(SIGCHLD, sigchild); + server_loop(fd); + exit(0); + } + + if (getenv("RTACCT_HISTORY")) + snprintf(hist_name, sizeof(hist_name), "%s", getenv("RTACCT_HISTORY")); + else + sprintf(hist_name, "/tmp/.rtacct.u%d", getuid()); + + if (reset_history) + unlink(hist_name); + + if (!ignore_history || !no_update) { + struct stat stb; + + fd = open(hist_name, O_RDWR|O_CREAT|O_NOFOLLOW, 0600); + if (fd < 0) { + perror("rtacct: open history file"); + exit(-1); + } + if (flock(fd, LOCK_EX)) { + perror("rtacct: flock history file"); + exit(-1); + } + if (fstat(fd, &stb) != 0) { + perror("rtacct: fstat history file"); + exit(-1); + } + if (stb.st_nlink != 1 || stb.st_uid != getuid()) { + fprintf(stderr, "rtacct: something is so wrong with history file, that I prefer not to proceed.\n"); + exit(-1); + } + if (stb.st_size != sizeof(*hist_db)) + if (write(fd, kern_db, sizeof(*hist_db)) < 0) { + perror("rtacct: write history file"); + exit(-1); + } + + hist_db = mmap(NULL, sizeof(*hist_db), + PROT_READ|PROT_WRITE, + no_update ? MAP_PRIVATE : MAP_SHARED, + fd, 0); + + if ((unsigned long)hist_db == ~0UL) { + perror("mmap"); + exit(-1); + } + + if (!ignore_history) { + FILE *tfp; + long uptime = -1; + + if ((tfp = fopen("/proc/uptime", "r")) != NULL) { + if (fscanf(tfp, "%ld", &uptime) != 1) + uptime = -1; + fclose(tfp); + } + + if (uptime >= 0 && time(NULL) >= stb.st_mtime+uptime) { + fprintf(stderr, "rtacct: history is aged out, resetting\n"); + memset(hist_db, 0, sizeof(*hist_db)); + } + } + + close(fd); + } + + if ((fd = socket(AF_UNIX, SOCK_STREAM, 0)) >= 0 && + (connect(fd, (struct sockaddr *)&sun, 2+1+strlen(sun.sun_path+1)) == 0 + || (strcpy(sun.sun_path+1, "rtacct0"), + connect(fd, (struct sockaddr *)&sun, 2+1+strlen(sun.sun_path+1)) == 0)) + && verify_forging(fd) == 0) { + nread(fd, (char *)kern_db, sizeof(*kern_db)); + if (hist_db && hist_db->signature[0] && + strcmp(kern_db->signature, hist_db->signature)) { + fprintf(stderr, "rtacct: history is stale, ignoring it.\n"); + hist_db = NULL; + } + close(fd); + } else { + if (fd >= 0) + close(fd); + + if (hist_db && hist_db->signature[0] && + strcmp(hist_db->signature, "kernel")) { + fprintf(stderr, "rtacct: history is stale, ignoring it.\n"); + hist_db = NULL; + } + + pad_kern_table(kern_db, read_kern_table(kern_db->ival)); + strcpy(kern_db->signature, "kernel"); + } + + if (ignore_history || hist_db == NULL) + dump_abs_db(stdout); + else + dump_incr_db(stdout); + + exit(0); +} diff --git a/misc/ss.c b/misc/ss.c new file mode 100644 index 0000000..fb560a5 --- /dev/null +++ b/misc/ss.c @@ -0,0 +1,5887 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * ss.c "sockstat", socket statistics + * + * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> + */ + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <fcntl.h> +#include <sys/ioctl.h> +#include <sys/socket.h> +#include <sys/uio.h> +#include <sys/sysmacros.h> +#include <netinet/in.h> +#include <string.h> +#include <errno.h> +#include <netdb.h> +#include <arpa/inet.h> +#include <dirent.h> +#include <fnmatch.h> +#include <getopt.h> +#include <stdbool.h> +#include <limits.h> +#include <stdarg.h> +#include <ctype.h> + +#include "ss_util.h" +#include "utils.h" +#include "ll_map.h" +#include "libnetlink.h" +#include "namespace.h" +#include "version.h" +#include "rt_names.h" +#include "cg_map.h" +#include "selinux.h" + +#include <linux/tcp.h> +#include <linux/unix_diag.h> +#include <linux/netdevice.h> /* for MAX_ADDR_LEN */ +#include <linux/filter.h> +#include <linux/xdp_diag.h> +#include <linux/packet_diag.h> +#include <linux/netlink_diag.h> +#include <linux/sctp.h> +#include <linux/vm_sockets_diag.h> +#include <linux/net.h> +#include <linux/tipc.h> +#include <linux/tipc_netlink.h> +#include <linux/tipc_sockets_diag.h> +#include <linux/tls.h> +#include <linux/mptcp.h> + +#if HAVE_RPC +#include <rpc/rpc.h> +#include <rpc/xdr.h> +#endif + +/* AF_VSOCK/PF_VSOCK is only provided since glibc 2.18 */ +#ifndef PF_VSOCK +#define PF_VSOCK 40 +#endif +#ifndef AF_VSOCK +#define AF_VSOCK PF_VSOCK +#endif + +#ifndef IPPROTO_MPTCP +#define IPPROTO_MPTCP 262 +#endif + +#define BUF_CHUNK (1024 * 1024) /* Buffer chunk allocation size */ +#define BUF_CHUNKS_MAX 5 /* Maximum number of allocated buffer chunks */ +#define LEN_ALIGN(x) (((x) + 1) & ~1) + +int preferred_family = AF_UNSPEC; +static int show_options; +int show_details; +static int show_processes; +static int show_threads; +static int show_mem; +static int show_tcpinfo; +static int show_bpf; +static int show_proc_ctx; +static int show_sock_ctx; +static int show_header = 1; +static int follow_events; +static int sctp_ino; +static int show_tipcinfo; +static int show_tos; +static int show_cgroup; +static int show_inet_sockopt; +int oneline; + +enum col_id { + COL_NETID, + COL_STATE, + COL_RECVQ, + COL_SENDQ, + COL_ADDR, + COL_SERV, + COL_RADDR, + COL_RSERV, + COL_PROC, + COL_EXT, + COL_MAX +}; + +enum col_align { + ALIGN_LEFT, + ALIGN_CENTER, + ALIGN_RIGHT +}; + +struct column { + const enum col_align align; + const char *header; + const char *ldelim; + int disabled; + int width; /* Calculated, including additional layout spacing */ + int max_len; /* Measured maximum field length in this column */ +}; + +static struct column columns[] = { + { ALIGN_LEFT, "Netid", "", 0, 0, 0 }, + { ALIGN_LEFT, "State", " ", 0, 0, 0 }, + { ALIGN_LEFT, "Recv-Q", " ", 0, 0, 0 }, + { ALIGN_LEFT, "Send-Q", " ", 0, 0, 0 }, + { ALIGN_RIGHT, "Local Address:", " ", 0, 0, 0 }, + { ALIGN_LEFT, "Port", "", 0, 0, 0 }, + { ALIGN_RIGHT, "Peer Address:", " ", 0, 0, 0 }, + { ALIGN_LEFT, "Port", "", 0, 0, 0 }, + { ALIGN_LEFT, "Process", "", 0, 0, 0 }, + { ALIGN_LEFT, "", "", 0, 0, 0 }, +}; + +static struct column *current_field = columns; + +/* Output buffer: chained chunks of BUF_CHUNK bytes. Each field is written to + * the buffer as a variable size token. A token consists of a 16 bits length + * field, followed by a string which is not NULL-terminated. + * + * A new chunk is allocated and linked when the current chunk doesn't have + * enough room to store the current token as a whole. + */ +struct buf_chunk { + struct buf_chunk *next; /* Next chained chunk */ + char *end; /* Current end of content */ + char data[0]; +}; + +struct buf_token { + uint16_t len; /* Data length, excluding length descriptor */ + char data[0]; +}; + +static struct { + struct buf_token *cur; /* Position of current token in chunk */ + struct buf_chunk *head; /* First chunk */ + struct buf_chunk *tail; /* Current chunk */ + int chunks; /* Number of allocated chunks */ +} buffer; + +static const char *TCP_PROTO = "tcp"; +static const char *UDP_PROTO = "udp"; +#ifdef HAVE_RPC +static const char *TCP6_PROTO = "tcp6"; +static const char *UDP6_PROTO = "udp6"; +static const char *SCTP_PROTO = "sctp"; +#endif +static const char *RAW_PROTO = "raw"; +static const char *dg_proto; + +enum { + TCP_DB, + MPTCP_DB, + DCCP_DB, + UDP_DB, + RAW_DB, + UNIX_DG_DB, + UNIX_ST_DB, + UNIX_SQ_DB, + PACKET_DG_DB, + PACKET_R_DB, + NETLINK_DB, + SCTP_DB, + VSOCK_ST_DB, + VSOCK_DG_DB, + TIPC_DB, + XDP_DB, + MAX_DB +}; + +#define PACKET_DBM ((1<<PACKET_DG_DB)|(1<<PACKET_R_DB)) +#define UNIX_DBM ((1<<UNIX_DG_DB)|(1<<UNIX_ST_DB)|(1<<UNIX_SQ_DB)) +#define ALL_DB ((1<<MAX_DB)-1) +#define INET_L4_DBM ((1<<TCP_DB)|(1<<MPTCP_DB)|(1<<UDP_DB)|(1<<DCCP_DB)|(1<<SCTP_DB)) +#define INET_DBM (INET_L4_DBM | (1<<RAW_DB)) +#define VSOCK_DBM ((1<<VSOCK_ST_DB)|(1<<VSOCK_DG_DB)) + +enum { + SS_UNKNOWN, + SS_ESTABLISHED, + SS_SYN_SENT, + SS_SYN_RECV, + SS_FIN_WAIT1, + SS_FIN_WAIT2, + SS_TIME_WAIT, + SS_CLOSE, + SS_CLOSE_WAIT, + SS_LAST_ACK, + SS_LISTEN, + SS_CLOSING, + SS_NEW_SYN_RECV, /* Kernel only value, not for use in user space */ + SS_BOUND_INACTIVE, + SS_MAX +}; + +enum { + SCTP_STATE_CLOSED = 0, + SCTP_STATE_COOKIE_WAIT = 1, + SCTP_STATE_COOKIE_ECHOED = 2, + SCTP_STATE_ESTABLISHED = 3, + SCTP_STATE_SHUTDOWN_PENDING = 4, + SCTP_STATE_SHUTDOWN_SENT = 5, + SCTP_STATE_SHUTDOWN_RECEIVED = 6, + SCTP_STATE_SHUTDOWN_ACK_SENT = 7, +}; + +#define SS_ALL ((1 << SS_MAX) - 1) +#define SS_CONN (SS_ALL & ~((1<<SS_LISTEN)|(1<<SS_CLOSE)|(1<<SS_TIME_WAIT)|(1<<SS_SYN_RECV))) +#define TIPC_SS_CONN ((1<<SS_ESTABLISHED)|(1<<SS_LISTEN)|(1<<SS_CLOSE)) + +#include "ssfilter.h" + +struct filter { + int dbs; + int states; + uint64_t families; + struct ssfilter *f; + bool kill; + struct rtnl_handle *rth_for_killing; +}; + +#define FAMILY_MASK(family) ((uint64_t)1 << (family)) + +static const struct filter default_dbs[MAX_DB] = { + [TCP_DB] = { + .states = SS_CONN, + .families = FAMILY_MASK(AF_INET) | FAMILY_MASK(AF_INET6), + }, + [MPTCP_DB] = { + .states = SS_CONN, + .families = FAMILY_MASK(AF_INET) | FAMILY_MASK(AF_INET6), + }, + [DCCP_DB] = { + .states = SS_CONN, + .families = FAMILY_MASK(AF_INET) | FAMILY_MASK(AF_INET6), + }, + [UDP_DB] = { + .states = (1 << SS_ESTABLISHED), + .families = FAMILY_MASK(AF_INET) | FAMILY_MASK(AF_INET6), + }, + [RAW_DB] = { + .states = (1 << SS_ESTABLISHED), + .families = FAMILY_MASK(AF_INET) | FAMILY_MASK(AF_INET6), + }, + [UNIX_DG_DB] = { + .states = (1 << SS_CLOSE), + .families = FAMILY_MASK(AF_UNIX), + }, + [UNIX_ST_DB] = { + .states = SS_CONN, + .families = FAMILY_MASK(AF_UNIX), + }, + [UNIX_SQ_DB] = { + .states = SS_CONN, + .families = FAMILY_MASK(AF_UNIX), + }, + [PACKET_DG_DB] = { + .states = (1 << SS_CLOSE), + .families = FAMILY_MASK(AF_PACKET), + }, + [PACKET_R_DB] = { + .states = (1 << SS_CLOSE), + .families = FAMILY_MASK(AF_PACKET), + }, + [NETLINK_DB] = { + .states = (1 << SS_CLOSE), + .families = FAMILY_MASK(AF_NETLINK), + }, + [SCTP_DB] = { + .states = SS_CONN, + .families = FAMILY_MASK(AF_INET) | FAMILY_MASK(AF_INET6), + }, + [VSOCK_ST_DB] = { + .states = SS_CONN, + .families = FAMILY_MASK(AF_VSOCK), + }, + [VSOCK_DG_DB] = { + .states = SS_CONN, + .families = FAMILY_MASK(AF_VSOCK), + }, + [TIPC_DB] = { + .states = TIPC_SS_CONN, + .families = FAMILY_MASK(AF_TIPC), + }, + [XDP_DB] = { + .states = (1 << SS_CLOSE), + .families = FAMILY_MASK(AF_XDP), + }, +}; + +static const struct filter default_afs[AF_MAX] = { + [AF_INET] = { + .dbs = INET_DBM, + .states = SS_CONN, + }, + [AF_INET6] = { + .dbs = INET_DBM, + .states = SS_CONN, + }, + [AF_UNIX] = { + .dbs = UNIX_DBM, + .states = SS_CONN, + }, + [AF_PACKET] = { + .dbs = PACKET_DBM, + .states = (1 << SS_CLOSE), + }, + [AF_NETLINK] = { + .dbs = (1 << NETLINK_DB), + .states = (1 << SS_CLOSE), + }, + [AF_VSOCK] = { + .dbs = VSOCK_DBM, + .states = SS_CONN, + }, + [AF_TIPC] = { + .dbs = (1 << TIPC_DB), + .states = TIPC_SS_CONN, + }, + [AF_XDP] = { + .dbs = (1 << XDP_DB), + .states = (1 << SS_CLOSE), + }, +}; + +static int do_default = 1; +static struct filter current_filter; + +static void filter_db_set(struct filter *f, int db, bool enable) +{ + if (enable) { + f->states |= default_dbs[db].states; + f->dbs |= 1 << db; + } else { + f->dbs &= ~(1 << db); + } + do_default = 0; +} + +static int filter_db_parse(struct filter *f, const char *s) +{ + const struct { + const char *name; + int dbs[MAX_DB + 1]; + } db_name_tbl[] = { +#define ENTRY(name, ...) { #name, { __VA_ARGS__, MAX_DB } } + ENTRY(all, UDP_DB, DCCP_DB, TCP_DB, MPTCP_DB, RAW_DB, + UNIX_ST_DB, UNIX_DG_DB, UNIX_SQ_DB, + PACKET_R_DB, PACKET_DG_DB, NETLINK_DB, + SCTP_DB, VSOCK_ST_DB, VSOCK_DG_DB, XDP_DB), + ENTRY(inet, UDP_DB, DCCP_DB, TCP_DB, MPTCP_DB, SCTP_DB, RAW_DB), + ENTRY(udp, UDP_DB), + ENTRY(dccp, DCCP_DB), + ENTRY(tcp, TCP_DB), + ENTRY(mptcp, MPTCP_DB), + ENTRY(sctp, SCTP_DB), + ENTRY(raw, RAW_DB), + ENTRY(unix, UNIX_ST_DB, UNIX_DG_DB, UNIX_SQ_DB), + ENTRY(unix_stream, UNIX_ST_DB), + ENTRY(u_str, UNIX_ST_DB), /* alias for unix_stream */ + ENTRY(unix_dgram, UNIX_DG_DB), + ENTRY(u_dgr, UNIX_DG_DB), /* alias for unix_dgram */ + ENTRY(unix_seqpacket, UNIX_SQ_DB), + ENTRY(u_seq, UNIX_SQ_DB), /* alias for unix_seqpacket */ + ENTRY(packet, PACKET_R_DB, PACKET_DG_DB), + ENTRY(packet_raw, PACKET_R_DB), + ENTRY(p_raw, PACKET_R_DB), /* alias for packet_raw */ + ENTRY(packet_dgram, PACKET_DG_DB), + ENTRY(p_dgr, PACKET_DG_DB), /* alias for packet_dgram */ + ENTRY(netlink, NETLINK_DB), + ENTRY(tipc, TIPC_DB), + ENTRY(vsock, VSOCK_ST_DB, VSOCK_DG_DB), + ENTRY(vsock_stream, VSOCK_ST_DB), + ENTRY(v_str, VSOCK_ST_DB), /* alias for vsock_stream */ + ENTRY(vsock_dgram, VSOCK_DG_DB), + ENTRY(v_dgr, VSOCK_DG_DB), /* alias for vsock_dgram */ + ENTRY(xdp, XDP_DB), +#undef ENTRY + }; + bool enable = true; + unsigned int i; + const int *dbp; + + if (s[0] == '!') { + enable = false; + s++; + } + for (i = 0; i < ARRAY_SIZE(db_name_tbl); i++) { + if (strcmp(s, db_name_tbl[i].name)) + continue; + for (dbp = db_name_tbl[i].dbs; *dbp != MAX_DB; dbp++) + filter_db_set(f, *dbp, enable); + return 0; + } + return -1; +} + +static void filter_af_set(struct filter *f, int af) +{ + f->states |= default_afs[af].states; + f->families |= FAMILY_MASK(af); + do_default = 0; + preferred_family = af; +} + +static int filter_af_get(struct filter *f, int af) +{ + return !!(f->families & FAMILY_MASK(af)); +} + +static void filter_states_set(struct filter *f, int states) +{ + if (states) + f->states = states; +} + +static void filter_merge_defaults(struct filter *f) +{ + int db; + int af; + + for (db = 0; db < MAX_DB; db++) { + if (!(f->dbs & (1 << db))) + continue; + + if (!(default_dbs[db].families & f->families)) + f->families |= default_dbs[db].families; + } + for (af = 0; af < AF_MAX; af++) { + if (!(f->families & FAMILY_MASK(af))) + continue; + + if (!(default_afs[af].dbs & f->dbs)) + f->dbs |= default_afs[af].dbs; + } +} + +static FILE *generic_proc_open(const char *env, const char *name) +{ + const char *p = getenv(env); + char store[128]; + + if (!p) { + p = getenv("PROC_ROOT") ? : "/proc"; + snprintf(store, sizeof(store)-1, "%s/%s", p, name); + p = store; + } + + return fopen(p, "r"); +} +#define net_tcp_open() generic_proc_open("PROC_NET_TCP", "net/tcp") +#define net_tcp6_open() generic_proc_open("PROC_NET_TCP6", "net/tcp6") +#define net_udp_open() generic_proc_open("PROC_NET_UDP", "net/udp") +#define net_udp6_open() generic_proc_open("PROC_NET_UDP6", "net/udp6") +#define net_raw_open() generic_proc_open("PROC_NET_RAW", "net/raw") +#define net_raw6_open() generic_proc_open("PROC_NET_RAW6", "net/raw6") +#define net_unix_open() generic_proc_open("PROC_NET_UNIX", "net/unix") +#define net_packet_open() generic_proc_open("PROC_NET_PACKET", \ + "net/packet") +#define net_netlink_open() generic_proc_open("PROC_NET_NETLINK", \ + "net/netlink") +#define net_sockstat_open() generic_proc_open("PROC_NET_SOCKSTAT", \ + "net/sockstat") +#define net_sockstat6_open() generic_proc_open("PROC_NET_SOCKSTAT6", \ + "net/sockstat6") +#define net_snmp_open() generic_proc_open("PROC_NET_SNMP", "net/snmp") +#define ephemeral_ports_open() generic_proc_open("PROC_IP_LOCAL_PORT_RANGE", \ + "sys/net/ipv4/ip_local_port_range") + +struct user_ent { + struct user_ent *next; + unsigned int ino; + int pid; + int tid; + int fd; + char *task; + char *task_ctx; + char *socket_ctx; +}; + +#define USER_ENT_HASH_SIZE 256 +static struct user_ent *user_ent_hash[USER_ENT_HASH_SIZE]; + +static int user_ent_hashfn(unsigned int ino) +{ + int val = (ino >> 24) ^ (ino >> 16) ^ (ino >> 8) ^ ino; + + return val & (USER_ENT_HASH_SIZE - 1); +} + +static void user_ent_add(unsigned int ino, char *task, + int pid, int tid, int fd, + char *task_ctx, + char *sock_ctx) +{ + struct user_ent *p, **pp; + + p = malloc(sizeof(struct user_ent)); + if (!p) { + fprintf(stderr, "ss: failed to malloc buffer\n"); + abort(); + } + p->next = NULL; + p->ino = ino; + p->pid = pid; + p->tid = tid; + p->fd = fd; + p->task = strdup(task); + p->task_ctx = strdup(task_ctx); + p->socket_ctx = strdup(sock_ctx); + + pp = &user_ent_hash[user_ent_hashfn(ino)]; + p->next = *pp; + *pp = p; +} + +#define MAX_PATH_LEN 1024 + +static void user_ent_hash_build_task(char *path, int pid, int tid) +{ + const char *no_ctx = "unavailable"; + char task[16] = {'\0', }; + char stat[MAX_PATH_LEN]; + int pos_id, pos_fd; + char *task_context; + struct dirent *d; + DIR *dir; + + if (getpidcon(tid, &task_context) != 0) + task_context = strdup(no_ctx); + + pos_id = strlen(path); /* $PROC_ROOT/$ID/ */ + + snprintf(path + pos_id, MAX_PATH_LEN - pos_id, "fd/"); + dir = opendir(path); + if (!dir) { + freecon(task_context); + return; + } + + pos_fd = strlen(path); /* $PROC_ROOT/$ID/fd/ */ + + while ((d = readdir(dir)) != NULL) { + const char *pattern = "socket:["; + char *sock_context; + unsigned int ino; + ssize_t link_len; + char lnk[64]; + int fd; + + if (sscanf(d->d_name, "%d%*c", &fd) != 1) + continue; + + snprintf(path + pos_fd, MAX_PATH_LEN - pos_fd, "%d", fd); + + link_len = readlink(path, lnk, sizeof(lnk) - 1); + if (link_len == -1) + continue; + lnk[link_len] = '\0'; + + if (strncmp(lnk, pattern, strlen(pattern))) + continue; + + if (sscanf(lnk, "socket:[%u]", &ino) != 1) + continue; + + if (getfilecon(path, &sock_context) <= 0) + sock_context = strdup(no_ctx); + + if (task[0] == '\0') { + FILE *fp; + + strlcpy(stat, path, pos_id + 1); + snprintf(stat + pos_id, sizeof(stat) - pos_id, "stat"); + + fp = fopen(stat, "r"); + if (fp) { + if (fscanf(fp, "%*d (%[^)])", task) < 1) { + ; /* ignore */ + } + fclose(fp); + } + } + + user_ent_add(ino, task, pid, tid, fd, task_context, sock_context); + freecon(sock_context); + } + + freecon(task_context); + closedir(dir); +} + +static void user_ent_destroy(void) +{ + struct user_ent *p, *p_next; + int cnt = 0; + + while (cnt != USER_ENT_HASH_SIZE) { + p = user_ent_hash[cnt]; + while (p) { + free(p->task); + free(p->task_ctx); + free(p->socket_ctx); + p_next = p->next; + free(p); + p = p_next; + } + cnt++; + } +} + +static void user_ent_hash_build(void) +{ + const char *root = getenv("PROC_ROOT") ? : "/proc/"; + char name[MAX_PATH_LEN]; + struct dirent *d; + int nameoff; + DIR *dir; + + strlcpy(name, root, sizeof(name)); + + if (strlen(name) == 0 || name[strlen(name) - 1] != '/') + strcat(name, "/"); + + nameoff = strlen(name); + + dir = opendir(name); + if (!dir) + return; + + while ((d = readdir(dir)) != NULL) { + int pid; + + if (sscanf(d->d_name, "%d%*c", &pid) != 1) + continue; + + snprintf(name + nameoff, sizeof(name) - nameoff, "%d/", pid); + user_ent_hash_build_task(name, pid, pid); + + if (show_threads) { + struct dirent *task_d; + DIR *task_dir; + + snprintf(name + nameoff, sizeof(name) - nameoff, "%d/task/", pid); + + task_dir = opendir(name); + if (!task_dir) + continue; + + while ((task_d = readdir(task_dir)) != NULL) { + int tid; + + if (sscanf(task_d->d_name, "%d%*c", &tid) != 1) + continue; + if (tid == pid) + continue; + + snprintf(name + nameoff, sizeof(name) - nameoff, "%d/", tid); + user_ent_hash_build_task(name, pid, tid); + } + closedir(task_dir); + } + } + closedir(dir); +} + +enum entry_types { + USERS, + PROC_CTX, + PROC_SOCK_CTX +}; + +#define ENTRY_BUF_SIZE 512 +static int find_entry(unsigned int ino, char **buf, int type) +{ + struct user_ent *p; + int cnt = 0; + char *ptr; + char thread_info[16] = {'\0', }; + char *new_buf; + int len, new_buf_len; + int buf_used = 0; + int buf_len = 0; + + if (!ino) + return 0; + + p = user_ent_hash[user_ent_hashfn(ino)]; + ptr = *buf = NULL; + while (p) { + if (p->ino != ino) + goto next; + + while (1) { + ptr = *buf + buf_used; + + if (show_threads) + snprintf(thread_info, sizeof(thread_info), "tid=%d,", p->tid); + + switch (type) { + case USERS: + len = snprintf(ptr, buf_len - buf_used, + "(\"%s\",pid=%d,%sfd=%d),", + p->task, p->pid, thread_info, p->fd); + break; + case PROC_CTX: + len = snprintf(ptr, buf_len - buf_used, + "(\"%s\",pid=%d,%sproc_ctx=%s,fd=%d),", + p->task, p->pid, thread_info, + p->task_ctx, p->fd); + break; + case PROC_SOCK_CTX: + len = snprintf(ptr, buf_len - buf_used, + "(\"%s\",pid=%d,%sproc_ctx=%s,fd=%d,sock_ctx=%s),", + p->task, p->pid, thread_info, + p->task_ctx, p->fd, + p->socket_ctx); + break; + default: + fprintf(stderr, "ss: invalid type: %d\n", type); + abort(); + } + + if (len < 0 || len >= buf_len - buf_used) { + new_buf_len = buf_len + ENTRY_BUF_SIZE; + new_buf = realloc(*buf, new_buf_len); + if (!new_buf) { + fprintf(stderr, "ss: failed to malloc buffer\n"); + abort(); + } + *buf = new_buf; + buf_len = new_buf_len; + continue; + } else { + buf_used += len; + break; + } + } + cnt++; +next: + p = p->next; + } + if (buf_used) { + ptr = *buf + buf_used; + ptr[-1] = '\0'; + } + return cnt; +} + +static unsigned long long cookie_sk_get(const uint32_t *cookie) +{ + return (((unsigned long long)cookie[1] << 31) << 1) | cookie[0]; +} + +static const char *sctp_sstate_name[] = { + [SCTP_STATE_CLOSED] = "CLOSED", + [SCTP_STATE_COOKIE_WAIT] = "COOKIE_WAIT", + [SCTP_STATE_COOKIE_ECHOED] = "COOKIE_ECHOED", + [SCTP_STATE_ESTABLISHED] = "ESTAB", + [SCTP_STATE_SHUTDOWN_PENDING] = "SHUTDOWN_PENDING", + [SCTP_STATE_SHUTDOWN_SENT] = "SHUTDOWN_SENT", + [SCTP_STATE_SHUTDOWN_RECEIVED] = "SHUTDOWN_RECEIVED", + [SCTP_STATE_SHUTDOWN_ACK_SENT] = "ACK_SENT", +}; + +static const char * const stype_nameg[] = { + "UNKNOWN", + [SOCK_STREAM] = "STREAM", + [SOCK_DGRAM] = "DGRAM", + [SOCK_RDM] = "RDM", + [SOCK_SEQPACKET] = "SEQPACKET", +}; + +struct sockstat { + struct sockstat *next; + unsigned int type; + uint16_t prot; + uint16_t raw_prot; + inet_prefix local; + inet_prefix remote; + int lport; + int rport; + int state; + int rq, wq; + unsigned int ino; + unsigned int uid; + int refcnt; + unsigned int iface; + unsigned long long sk; + char *name; + char *peer_name; + __u32 mark; + __u64 cgroup_id; +}; + +struct dctcpstat { + unsigned int ce_state; + unsigned int alpha; + unsigned int ab_ecn; + unsigned int ab_tot; + bool enabled; +}; + +struct tcpstat { + struct sockstat ss; + unsigned int timer; + unsigned int timeout; + int probes; + char cong_alg[16]; + double rto, ato, rtt, rttvar; + int qack, ssthresh, backoff; + double send_bps; + int snd_wscale; + int rcv_wscale; + int mss; + int rcv_mss; + int advmss; + unsigned int pmtu; + unsigned int cwnd; + unsigned int lastsnd; + unsigned int lastrcv; + unsigned int lastack; + double pacing_rate; + double pacing_rate_max; + double delivery_rate; + unsigned long long bytes_acked; + unsigned long long bytes_received; + unsigned int segs_out; + unsigned int segs_in; + unsigned int data_segs_out; + unsigned int data_segs_in; + unsigned int unacked; + unsigned int retrans; + unsigned int retrans_total; + unsigned int lost; + unsigned int sacked; + unsigned int fackets; + unsigned int reordering; + unsigned int not_sent; + unsigned int delivered; + unsigned int delivered_ce; + unsigned int dsack_dups; + unsigned int reord_seen; + double rcv_rtt; + double min_rtt; + unsigned int rcv_ooopack; + unsigned int snd_wnd; + unsigned int rcv_wnd; + unsigned int rehash; + int rcv_space; + unsigned int rcv_ssthresh; + unsigned long long busy_time; + unsigned long long rwnd_limited; + unsigned long long sndbuf_limited; + unsigned long long bytes_sent; + unsigned long long bytes_retrans; + bool has_ts_opt; + bool has_usec_ts_opt; + bool has_sack_opt; + bool has_ecn_opt; + bool has_ecnseen_opt; + bool has_fastopen_opt; + bool has_wscale_opt; + bool app_limited; + struct dctcpstat *dctcp; + struct tcp_bbr_info *bbr_info; +}; + +/* SCTP assocs share the same inode number with their parent endpoint. So if we + * have seen the inode number before, it must be an assoc instead of the next + * endpoint. */ +static bool is_sctp_assoc(struct sockstat *s, const char *sock_name) +{ + if (strcmp(sock_name, "sctp")) + return false; + if (!sctp_ino || sctp_ino != s->ino) + return false; + return true; +} + +static const char *unix_netid_name(int type) +{ + switch (type) { + case SOCK_STREAM: + return "u_str"; + case SOCK_SEQPACKET: + return "u_seq"; + case SOCK_DGRAM: + default: + return "u_dgr"; + } +} + +static const char *proto_name(int protocol) +{ + switch (protocol) { + case 0: + return "raw"; + case IPPROTO_UDP: + return "udp"; + case IPPROTO_TCP: + return "tcp"; + case IPPROTO_MPTCP: + return "mptcp"; + case IPPROTO_SCTP: + return "sctp"; + case IPPROTO_DCCP: + return "dccp"; + case IPPROTO_ICMPV6: + return "icmp6"; + } + + return "???"; +} + +static const char *vsock_netid_name(int type) +{ + switch (type) { + case SOCK_STREAM: + return "v_str"; + case SOCK_DGRAM: + return "v_dgr"; + default: + return "???"; + } +} + +static const char *tipc_netid_name(int type) +{ + switch (type) { + case SOCK_STREAM: + return "ti_st"; + case SOCK_DGRAM: + return "ti_dg"; + case SOCK_RDM: + return "ti_rd"; + case SOCK_SEQPACKET: + return "ti_sq"; + default: + return "???"; + } +} + +/* Allocate and initialize a new buffer chunk */ +static struct buf_chunk *buf_chunk_new(void) +{ + struct buf_chunk *new = malloc(BUF_CHUNK); + + if (!new) + abort(); + + new->next = NULL; + + /* This is also the last block */ + buffer.tail = new; + + /* Next token will be stored at the beginning of chunk data area, and + * its initial length is zero. + */ + buffer.cur = (struct buf_token *)new->data; + buffer.cur->len = 0; + + new->end = buffer.cur->data; + + buffer.chunks++; + + return new; +} + +/* Return available tail room in given chunk */ +static int buf_chunk_avail(struct buf_chunk *chunk) +{ + return BUF_CHUNK - offsetof(struct buf_chunk, data) - + (chunk->end - chunk->data); +} + +/* Update end pointer and token length, link new chunk if we hit the end of the + * current one. Return -EAGAIN if we got a new chunk, caller has to print again. + */ +static int buf_update(int len) +{ + struct buf_chunk *chunk = buffer.tail; + struct buf_token *t = buffer.cur; + + /* Claim success if new content fits in the current chunk, and anyway + * if this is the first token in the chunk: in the latter case, + * allocating a new chunk won't help, so we'll just cut the output. + */ + if ((len < buf_chunk_avail(chunk) && len != -1 /* glibc < 2.0.6 */) || + t == (struct buf_token *)chunk->data) { + len = min(len, buf_chunk_avail(chunk)); + + /* Total field length can't exceed 2^16 bytes, cut as needed */ + len = min(len, USHRT_MAX - t->len); + + chunk->end += len; + t->len += len; + return 0; + } + + /* Content truncated, time to allocate more */ + chunk->next = buf_chunk_new(); + + /* Copy current token over to new chunk, including length descriptor */ + memcpy(chunk->next->data, t, sizeof(t->len) + t->len); + chunk->next->end += t->len; + + /* Discard partially written field in old chunk */ + chunk->end -= t->len + sizeof(t->len); + + return -EAGAIN; +} + +/* Append content to buffer as part of the current field */ +__attribute__((format(printf, 1, 2))) +static void out(const char *fmt, ...) +{ + struct column *f = current_field; + va_list args; + char *pos; + int len; + + if (f->disabled) + return; + + if (!buffer.head) + buffer.head = buf_chunk_new(); + +again: /* Append to buffer: if we have a new chunk, print again */ + + pos = buffer.cur->data + buffer.cur->len; + va_start(args, fmt); + + /* Limit to tail room. If we hit the limit, buf_update() will tell us */ + len = vsnprintf(pos, buf_chunk_avail(buffer.tail), fmt, args); + va_end(args); + + if (buf_update(len)) + goto again; +} + +static int print_left_spacing(struct column *f, int stored, int printed) +{ + int s; + + if (!f->width || f->align == ALIGN_LEFT) + return 0; + + s = f->width - stored - printed; + if (f->align == ALIGN_CENTER) + /* If count of total spacing is odd, shift right by one */ + s = (s + 1) / 2; + + if (s > 0) + return printf("%*c", s, ' '); + + return 0; +} + +static void print_right_spacing(struct column *f, int printed) +{ + int s; + + if (!f->width || f->align == ALIGN_RIGHT) + return; + + s = f->width - printed; + if (f->align == ALIGN_CENTER) + s /= 2; + + if (s > 0) + printf("%*c", s, ' '); +} + +/* Done with field: update buffer pointer, start new token after current one */ +static void field_flush(struct column *f) +{ + struct buf_chunk *chunk; + unsigned int pad; + + if (f->disabled) + return; + + chunk = buffer.tail; + pad = buffer.cur->len % 2; + + if (buffer.cur->len > f->max_len) + f->max_len = buffer.cur->len; + + /* We need a new chunk if we can't store the next length descriptor. + * Mind the gap between end of previous token and next aligned position + * for length descriptor. + */ + if (buf_chunk_avail(chunk) - pad < sizeof(buffer.cur->len)) { + chunk->end += pad; + chunk->next = buf_chunk_new(); + return; + } + + buffer.cur = (struct buf_token *)(buffer.cur->data + + LEN_ALIGN(buffer.cur->len)); + buffer.cur->len = 0; + buffer.tail->end = buffer.cur->data; +} + +static int field_is_last(struct column *f) +{ + return f - columns == COL_MAX - 1; +} + +/* Get the next available token in the buffer starting from the current token */ +static struct buf_token *buf_token_next(struct buf_token *cur) +{ + struct buf_chunk *chunk = buffer.tail; + + /* If we reached the end of chunk contents, get token from next chunk */ + if (cur->data + LEN_ALIGN(cur->len) == chunk->end) { + buffer.tail = chunk = chunk->next; + return chunk ? (struct buf_token *)chunk->data : NULL; + } + + return (struct buf_token *)(cur->data + LEN_ALIGN(cur->len)); +} + +/* Free up all allocated buffer chunks */ +static void buf_free_all(void) +{ + struct buf_chunk *tmp; + + for (buffer.tail = buffer.head; buffer.tail; ) { + tmp = buffer.tail; + buffer.tail = buffer.tail->next; + free(tmp); + } + buffer.head = NULL; + buffer.chunks = 0; +} + +/* Get current screen width, returns -1 if TIOCGWINSZ fails */ +static int render_screen_width(void) +{ + int width = -1; + + if (isatty(STDOUT_FILENO)) { + struct winsize w; + + if (ioctl(STDOUT_FILENO, TIOCGWINSZ, &w) != -1) { + if (w.ws_col > 0) + width = w.ws_col; + } + } + + return width; +} + +/* Calculate column width from contents length. If columns don't fit on one + * line, break them into the least possible amount of lines and keep them + * aligned across lines. Available screen space is equally spread between fields + * as additional spacing. + */ +static void render_calc_width(void) +{ + int screen_width, first, len = 0, linecols = 0; + struct column *c, *eol = columns - 1; + bool compact_output = false; + + screen_width = render_screen_width(); + if (screen_width == -1) { + screen_width = INT_MAX; + compact_output = true; + } + + /* First pass: set width for each column to measured content length */ + for (first = 1, c = columns; c - columns < COL_MAX; c++) { + if (c->disabled) + continue; + + if (!first && c->max_len) + c->width = c->max_len + strlen(c->ldelim); + else + c->width = c->max_len; + + /* But don't exceed screen size. If we exceed the screen size + * for even a single field, it will just start on a line of its + * own and then naturally wrap. + */ + c->width = min(c->width, screen_width); + + if (c->width) + first = 0; + } + + if (compact_output) { + /* Compact output, skip extending columns. */ + return; + } + + /* Second pass: find out newlines and distribute available spacing */ + for (c = columns; c - columns < COL_MAX; c++) { + int pad, spacing, rem, last; + struct column *tmp; + + if (!c->width) + continue; + + linecols++; + len += c->width; + + for (last = 1, tmp = c + 1; tmp - columns < COL_MAX; tmp++) { + if (tmp->width) { + last = 0; + break; + } + } + + if (!last && len < screen_width) { + /* Columns fit on screen so far, nothing to do yet */ + continue; + } + + if (len == screen_width) { + /* Exact fit, just start with new line */ + goto newline; + } + + if (len > screen_width) { + /* Screen width exceeded: go back one column */ + len -= c->width; + c--; + linecols--; + } + + /* Distribute remaining space to columns on this line */ + pad = screen_width - len; + spacing = pad / linecols; + rem = pad % linecols; + for (tmp = c; tmp > eol; tmp--) { + if (!tmp->width) + continue; + + tmp->width += spacing; + if (rem) { + tmp->width++; + rem--; + } + } + +newline: + /* Line break: reset line counters, mark end-of-line */ + eol = c; + len = 0; + linecols = 0; + } +} + +/* Render buffered output with spacing and delimiters, then free up buffers */ +static void render(void) +{ + struct buf_token *token; + int printed, line_started = 0; + struct column *f; + + if (!buffer.head) + return; + + token = (struct buf_token *)buffer.head->data; + + /* Ensure end alignment of last token, it wasn't necessarily flushed */ + buffer.tail->end += buffer.cur->len % 2; + + render_calc_width(); + + /* Rewind and replay */ + buffer.tail = buffer.head; + + f = columns; + while (!f->width) + f++; + + while (token) { + /* Print left delimiter only if we already started a line */ + if (line_started++) + printed = printf("%s", f->ldelim); + else + printed = 0; + + /* Print field content from token data with spacing */ + printed += print_left_spacing(f, token->len, printed); + printed += fwrite(token->data, 1, token->len, stdout); + print_right_spacing(f, printed); + + /* Go to next non-empty field, deal with end-of-line */ + do { + if (field_is_last(f)) { + printf("\n"); + f = columns; + line_started = 0; + } else { + f++; + } + } while (f->disabled); + + token = buf_token_next(token); + } + /* Deal with final end-of-line when the last non-empty field printed + * is not the last field. + */ + if (line_started) + printf("\n"); + + buf_free_all(); + current_field = columns; +} + +/* Move to next field, and render buffer if we reached the maximum number of + * chunks, at the last field in a line. + */ +static void field_next(void) +{ + if (field_is_last(current_field) && buffer.chunks >= BUF_CHUNKS_MAX) { + render(); + return; + } + + field_flush(current_field); + if (field_is_last(current_field)) + current_field = columns; + else + current_field++; +} + +/* Walk through fields and flush them until we reach the desired one */ +static void field_set(enum col_id id) +{ + while (id != current_field - columns) + field_next(); +} + +/* Print header for all non-empty columns */ +static void print_header(void) +{ + while (!field_is_last(current_field)) { + if (!current_field->disabled) + out("%s", current_field->header); + field_next(); + } +} + +static void sock_state_print(struct sockstat *s) +{ + const char *sock_name; + static const char * const sstate_name[] = { + "UNKNOWN", + [SS_ESTABLISHED] = "ESTAB", + [SS_SYN_SENT] = "SYN-SENT", + [SS_SYN_RECV] = "SYN-RECV", + [SS_FIN_WAIT1] = "FIN-WAIT-1", + [SS_FIN_WAIT2] = "FIN-WAIT-2", + [SS_TIME_WAIT] = "TIME-WAIT", + [SS_CLOSE] = "UNCONN", + [SS_CLOSE_WAIT] = "CLOSE-WAIT", + [SS_LAST_ACK] = "LAST-ACK", + [SS_LISTEN] = "LISTEN", + [SS_CLOSING] = "CLOSING", + [SS_NEW_SYN_RECV] = "UNDEF", /* Never returned by kernel */ + [SS_BOUND_INACTIVE] = "UNDEF", /* Never returned by kernel */ + }; + + switch (s->local.family) { + case AF_UNIX: + sock_name = unix_netid_name(s->type); + break; + case AF_INET: + case AF_INET6: + sock_name = proto_name(s->type); + break; + case AF_PACKET: + sock_name = s->type == SOCK_RAW ? "p_raw" : "p_dgr"; + break; + case AF_NETLINK: + sock_name = "nl"; + break; + case AF_TIPC: + sock_name = tipc_netid_name(s->type); + break; + case AF_VSOCK: + sock_name = vsock_netid_name(s->type); + break; + case AF_XDP: + sock_name = "xdp"; + break; + default: + sock_name = "unknown"; + } + + if (is_sctp_assoc(s, sock_name)) { + field_set(COL_STATE); /* Empty Netid field */ + out("`- %s", sctp_sstate_name[s->state]); + } else { + field_set(COL_NETID); + out("%s", sock_name); + field_set(COL_STATE); + out("%s", sstate_name[s->state]); + } + + field_set(COL_RECVQ); + out("%-6d", s->rq); + field_set(COL_SENDQ); + out("%-6d", s->wq); + field_set(COL_ADDR); +} + +static void sock_details_print(struct sockstat *s) +{ + if (s->uid) + out(" uid:%u", s->uid); + + out(" ino:%u", s->ino); + out(" sk:%llx", s->sk); + + if (s->mark) + out(" fwmark:0x%x", s->mark); + + if (s->cgroup_id) + out(" cgroup:%s", cg_id_to_path(s->cgroup_id)); +} + +static void sock_addr_print(const char *addr, char *delim, const char *port, + const char *ifname) +{ + if (ifname) + out("%s" "%%" "%s%s", addr, ifname, delim); + else + out("%s%s", addr, delim); + + field_next(); + out("%s", port); + field_next(); +} + +static const char *print_ms_timer(unsigned int timeout) +{ + static char buf[64]; + int secs, msecs, minutes; + + secs = timeout/1000; + minutes = secs/60; + secs = secs%60; + msecs = timeout%1000; + buf[0] = 0; + if (minutes) { + msecs = 0; + snprintf(buf, sizeof(buf)-16, "%dmin", minutes); + if (minutes > 9) + secs = 0; + } + if (secs) { + if (secs > 9) + msecs = 0; + sprintf(buf+strlen(buf), "%d%s", secs, msecs ? "." : "sec"); + } + if (msecs) + sprintf(buf+strlen(buf), "%03dms", msecs); + return buf; +} + +struct scache { + struct scache *next; + int port; + char *name; + const char *proto; +}; + +static struct scache *rlist; + +#ifdef HAVE_RPC +static CLIENT *rpc_client_create(rpcprog_t prog, rpcvers_t vers) +{ + struct netbuf nbuf; + struct sockaddr_un saddr; + int sock; + + memset(&saddr, 0, sizeof(saddr)); + sock = socket(AF_LOCAL, SOCK_STREAM, 0); + if (sock < 0) + return NULL; + + saddr.sun_family = AF_LOCAL; + strcpy(saddr.sun_path, _PATH_RPCBINDSOCK); + nbuf.len = SUN_LEN(&saddr); + nbuf.maxlen = sizeof(struct sockaddr_un); + nbuf.buf = &saddr; + + return clnt_vc_create(sock, &nbuf, prog, vers, 0, 0); +} + +static void init_service_resolver(void) +{ + struct rpcblist *rhead = NULL; + struct timeval timeout; + struct rpcent *rpc; + enum clnt_stat res; + CLIENT *client; + + timeout.tv_sec = 5; + timeout.tv_usec = 0; + + client = rpc_client_create(PMAPPROG, RPCBVERS4); + if (!client) + return; + + res = clnt_call(client, RPCBPROC_DUMP, (xdrproc_t)xdr_void, NULL, + (xdrproc_t)xdr_rpcblist_ptr, (char *)&rhead, + timeout); + if (res != RPC_SUCCESS) + return; + + for (; rhead; rhead = rhead->rpcb_next) { + char prog[128] = "rpc."; + struct scache *c; + int hport, lport, ok; + + c = malloc(sizeof(*c)); + if (!c) + continue; + + ok = sscanf(rhead->rpcb_map.r_addr, "::.%d.%d", &hport, &lport); + if (!ok) + ok = sscanf(rhead->rpcb_map.r_addr, "0.0.0.0.%d.%d", + &hport, &lport); + if (!ok) + continue; + c->port = hport << 8 | lport; + + if (strcmp(rhead->rpcb_map.r_netid, TCP_PROTO) == 0 || + strcmp(rhead->rpcb_map.r_netid, TCP6_PROTO) == 0) + c->proto = TCP_PROTO; + else if (strcmp(rhead->rpcb_map.r_netid, UDP_PROTO) == 0 || + strcmp(rhead->rpcb_map.r_netid, UDP6_PROTO) == 0) + c->proto = UDP_PROTO; + else if (strcmp(rhead->rpcb_map.r_netid, SCTP_PROTO) == 0) + c->proto = SCTP_PROTO; + else + continue; + + rpc = getrpcbynumber(rhead->rpcb_map.r_prog); + if (rpc) { + strncat(prog, rpc->r_name, 128 - strlen(prog)); + c->name = strdup(prog); + } + + c->next = rlist; + rlist = c; + } +} +#endif + +/* Even do not try default linux ephemeral port ranges: + * default /etc/services contains so much of useless crap + * wouldbe "allocated" to this area that resolution + * is really harmful. I shrug each time when seeing + * "socks" or "cfinger" in dumps. + */ +static int is_ephemeral(int port) +{ + static int min = 0, max; + + if (!min) { + FILE *f = ephemeral_ports_open(); + + if (!f || fscanf(f, "%d %d", &min, &max) < 2) { + min = 1024; + max = 4999; + } + if (f) + fclose(f); + } + return port >= min && port <= max; +} + + +static const char *__resolve_service(int port) +{ + struct scache *c; + + for (c = rlist; c; c = c->next) { + if (c->port == port && c->proto == dg_proto) + return c->name; + } + + if (!is_ephemeral(port)) { + static int notfirst; + struct servent *se; + + if (!notfirst) { + setservent(1); + notfirst = 1; + } + se = getservbyport(htons(port), dg_proto); + if (se) + return se->s_name; + } + + return NULL; +} + +#define SCACHE_BUCKETS 1024 +static struct scache *cache_htab[SCACHE_BUCKETS]; + +static const char *resolve_service(int port) +{ + static char buf[128]; + struct scache *c; + const char *res; + int hash; + + if (port == 0) { + buf[0] = '*'; + buf[1] = 0; + return buf; + } + + if (numeric) + goto do_numeric; + + if (dg_proto == RAW_PROTO) + return inet_proto_n2a(port, buf, sizeof(buf)); + + + hash = (port^(((unsigned long)dg_proto)>>2)) % SCACHE_BUCKETS; + + for (c = cache_htab[hash]; c; c = c->next) { + if (c->port == port && c->proto == dg_proto) + goto do_cache; + } + + c = malloc(sizeof(*c)); + if (!c) + goto do_numeric; + res = __resolve_service(port); + c->port = port; + c->name = res ? strdup(res) : NULL; + c->proto = dg_proto; + c->next = cache_htab[hash]; + cache_htab[hash] = c; + +do_cache: + if (c->name) + return c->name; + +do_numeric: + sprintf(buf, "%u", port); + return buf; +} + +static void inet_addr_print(const inet_prefix *a, int port, + unsigned int ifindex, bool v6only) +{ + char buf[1024]; + const char *ap = buf; + const char *ifname = NULL; + + if (a->family == AF_INET) { + ap = format_host(AF_INET, 4, a->data); + } else { + if (!v6only && + !memcmp(a->data, &in6addr_any, sizeof(in6addr_any))) { + buf[0] = '*'; + buf[1] = 0; + } else { + ap = format_host(a->family, 16, a->data); + + /* Numeric IPv6 addresses should be bracketed */ + if (strchr(ap, ':')) { + snprintf(buf, sizeof(buf), + "[%s]", ap); + ap = buf; + } + } + } + + if (ifindex) + ifname = ll_index_to_name(ifindex); + + sock_addr_print(ap, ":", resolve_service(port), ifname); +} + +struct aafilter { + inet_prefix addr; + long port; + unsigned int iface; + __u32 mark; + __u32 mask; + __u64 cgroup_id; + struct aafilter *next; +}; + +static int inet2_addr_match(const inet_prefix *a, const inet_prefix *p, + int plen) +{ + if (!inet_addr_match(a, p, plen)) + return 0; + + /* Cursed "v4 mapped" addresses: v4 mapped socket matches + * pure IPv4 rule, but v4-mapped rule selects only v4-mapped + * sockets. Fair? */ + if (p->family == AF_INET && a->family == AF_INET6) { + if (a->data[0] == 0 && a->data[1] == 0 && + a->data[2] == htonl(0xffff)) { + inet_prefix tmp = *a; + + tmp.data[0] = a->data[3]; + return inet_addr_match(&tmp, p, plen); + } + } + return 1; +} + +static int unix_match(const inet_prefix *a, const inet_prefix *p) +{ + char *addr, *pattern; + + memcpy(&addr, a->data, sizeof(addr)); + memcpy(&pattern, p->data, sizeof(pattern)); + if (pattern == NULL) + return 1; + if (addr == NULL) + addr = ""; + return !fnmatch(pattern, addr, FNM_CASEFOLD); +} + +static int run_ssfilter(struct ssfilter *f, struct sockstat *s) +{ + switch (f->type) { + case SSF_S_AUTO: + { + if (s->local.family == AF_UNIX) { + char *p; + + memcpy(&p, s->local.data, sizeof(p)); + return p == NULL || (p[0] == '@' && strlen(p) == 6 && + strspn(p+1, "0123456789abcdef") == 5); + } + if (s->local.family == AF_PACKET) + return s->lport == 0 && s->local.data[0] == 0; + if (s->local.family == AF_NETLINK) + return s->lport < 0; + if (s->local.family == AF_VSOCK) + return s->lport > 1023; + + return is_ephemeral(s->lport); + } + case SSF_DCOND: + { + struct aafilter *a = (void *)f->pred; + + if (a->addr.family == AF_UNIX) + return unix_match(&s->remote, &a->addr); + if (a->port != -1 && a->port != s->rport) + return 0; + if (a->addr.bitlen) { + do { + if (!inet2_addr_match(&s->remote, &a->addr, a->addr.bitlen)) + return 1; + } while ((a = a->next) != NULL); + return 0; + } + return 1; + } + case SSF_SCOND: + { + struct aafilter *a = (void *)f->pred; + + if (a->addr.family == AF_UNIX) + return unix_match(&s->local, &a->addr); + if (a->port != -1 && a->port != s->lport) + return 0; + if (a->addr.bitlen) { + do { + if (!inet2_addr_match(&s->local, &a->addr, a->addr.bitlen)) + return 1; + } while ((a = a->next) != NULL); + return 0; + } + return 1; + } + case SSF_D_GE: + { + struct aafilter *a = (void *)f->pred; + + return s->rport >= a->port; + } + case SSF_D_LE: + { + struct aafilter *a = (void *)f->pred; + + return s->rport <= a->port; + } + case SSF_S_GE: + { + struct aafilter *a = (void *)f->pred; + + return s->lport >= a->port; + } + case SSF_S_LE: + { + struct aafilter *a = (void *)f->pred; + + return s->lport <= a->port; + } + case SSF_DEVCOND: + { + struct aafilter *a = (void *)f->pred; + + return s->iface == a->iface; + } + case SSF_MARKMASK: + { + struct aafilter *a = (void *)f->pred; + + return (s->mark & a->mask) == a->mark; + } + case SSF_CGROUPCOND: + { + struct aafilter *a = (void *)f->pred; + + return s->cgroup_id == a->cgroup_id; + } + /* Yup. It is recursion. Sorry. */ + case SSF_AND: + return run_ssfilter(f->pred, s) && run_ssfilter(f->post, s); + case SSF_OR: + return run_ssfilter(f->pred, s) || run_ssfilter(f->post, s); + case SSF_NOT: + return !run_ssfilter(f->pred, s); + default: + abort(); + } +} + +/* Relocate external jumps by reloc. */ +static void ssfilter_patch(char *a, int len, int reloc) +{ + while (len > 0) { + struct inet_diag_bc_op *op = (struct inet_diag_bc_op *)a; + + if (op->no == len+4) + op->no += reloc; + len -= op->yes; + a += op->yes; + } + if (len < 0) + abort(); +} + +static int ssfilter_bytecompile(struct ssfilter *f, char **bytecode) +{ + switch (f->type) { + case SSF_S_AUTO: + { + if (!(*bytecode = malloc(4))) abort(); + ((struct inet_diag_bc_op *)*bytecode)[0] = (struct inet_diag_bc_op){ INET_DIAG_BC_AUTO, 4, 8 }; + return 4; + } + case SSF_DCOND: + case SSF_SCOND: + { + struct aafilter *a = (void *)f->pred; + struct aafilter *b; + char *ptr; + int code = (f->type == SSF_DCOND ? INET_DIAG_BC_D_COND : INET_DIAG_BC_S_COND); + int len = 0; + + for (b = a; b; b = b->next) { + len += 4 + sizeof(struct inet_diag_hostcond); + if (a->addr.family == AF_INET6) + len += 16; + else + len += 4; + if (b->next) + len += 4; + } + if (!(ptr = malloc(len))) abort(); + *bytecode = ptr; + for (b = a; b; b = b->next) { + struct inet_diag_bc_op *op = (struct inet_diag_bc_op *)ptr; + int alen = (a->addr.family == AF_INET6 ? 16 : 4); + int oplen = alen + 4 + sizeof(struct inet_diag_hostcond); + struct inet_diag_hostcond *cond = (struct inet_diag_hostcond *)(ptr+4); + + *op = (struct inet_diag_bc_op){ code, oplen, oplen+4 }; + cond->family = a->addr.family; + cond->port = a->port; + cond->prefix_len = a->addr.bitlen; + memcpy(cond->addr, a->addr.data, alen); + ptr += oplen; + if (b->next) { + op = (struct inet_diag_bc_op *)ptr; + *op = (struct inet_diag_bc_op){ INET_DIAG_BC_JMP, 4, len - (ptr-*bytecode)}; + ptr += 4; + } + } + return ptr - *bytecode; + } + case SSF_D_GE: + { + struct aafilter *x = (void *)f->pred; + + if (!(*bytecode = malloc(8))) abort(); + ((struct inet_diag_bc_op *)*bytecode)[0] = (struct inet_diag_bc_op){ INET_DIAG_BC_D_GE, 8, 12 }; + ((struct inet_diag_bc_op *)*bytecode)[1] = (struct inet_diag_bc_op){ 0, 0, x->port }; + return 8; + } + case SSF_D_LE: + { + struct aafilter *x = (void *)f->pred; + + if (!(*bytecode = malloc(8))) abort(); + ((struct inet_diag_bc_op *)*bytecode)[0] = (struct inet_diag_bc_op){ INET_DIAG_BC_D_LE, 8, 12 }; + ((struct inet_diag_bc_op *)*bytecode)[1] = (struct inet_diag_bc_op){ 0, 0, x->port }; + return 8; + } + case SSF_S_GE: + { + struct aafilter *x = (void *)f->pred; + + if (!(*bytecode = malloc(8))) abort(); + ((struct inet_diag_bc_op *)*bytecode)[0] = (struct inet_diag_bc_op){ INET_DIAG_BC_S_GE, 8, 12 }; + ((struct inet_diag_bc_op *)*bytecode)[1] = (struct inet_diag_bc_op){ 0, 0, x->port }; + return 8; + } + case SSF_S_LE: + { + struct aafilter *x = (void *)f->pred; + + if (!(*bytecode = malloc(8))) abort(); + ((struct inet_diag_bc_op *)*bytecode)[0] = (struct inet_diag_bc_op){ INET_DIAG_BC_S_LE, 8, 12 }; + ((struct inet_diag_bc_op *)*bytecode)[1] = (struct inet_diag_bc_op){ 0, 0, x->port }; + return 8; + } + + case SSF_AND: + { + char *a1 = NULL, *a2 = NULL, *a; + int l1, l2; + + l1 = ssfilter_bytecompile(f->pred, &a1); + l2 = ssfilter_bytecompile(f->post, &a2); + if (!l1 || !l2) { + free(a1); + free(a2); + return 0; + } + if (!(a = malloc(l1+l2))) abort(); + memcpy(a, a1, l1); + memcpy(a+l1, a2, l2); + free(a1); free(a2); + ssfilter_patch(a, l1, l2); + *bytecode = a; + return l1+l2; + } + case SSF_OR: + { + char *a1 = NULL, *a2 = NULL, *a; + int l1, l2; + + l1 = ssfilter_bytecompile(f->pred, &a1); + l2 = ssfilter_bytecompile(f->post, &a2); + if (!l1 || !l2) { + free(a1); + free(a2); + return 0; + } + if (!(a = malloc(l1+l2+4))) abort(); + memcpy(a, a1, l1); + memcpy(a+l1+4, a2, l2); + free(a1); free(a2); + *(struct inet_diag_bc_op *)(a+l1) = (struct inet_diag_bc_op){ INET_DIAG_BC_JMP, 4, l2+4 }; + *bytecode = a; + return l1+l2+4; + } + case SSF_NOT: + { + char *a1 = NULL, *a; + int l1; + + l1 = ssfilter_bytecompile(f->pred, &a1); + if (!l1) { + free(a1); + return 0; + } + if (!(a = malloc(l1+4))) abort(); + memcpy(a, a1, l1); + free(a1); + *(struct inet_diag_bc_op *)(a+l1) = (struct inet_diag_bc_op){ INET_DIAG_BC_JMP, 4, 8 }; + *bytecode = a; + return l1+4; + } + case SSF_DEVCOND: + { + /* bytecompile for SSF_DEVCOND not supported yet */ + return 0; + } + case SSF_MARKMASK: + { + struct aafilter *a = (void *)f->pred; + struct instr { + struct inet_diag_bc_op op; + struct inet_diag_markcond cond; + }; + int inslen = sizeof(struct instr); + + if (!(*bytecode = malloc(inslen))) abort(); + ((struct instr *)*bytecode)[0] = (struct instr) { + { INET_DIAG_BC_MARK_COND, inslen, inslen + 4 }, + { a->mark, a->mask}, + }; + + return inslen; + } + case SSF_CGROUPCOND: + { + struct aafilter *a = (void *)f->pred; + struct instr { + struct inet_diag_bc_op op; + __u64 cgroup_id; + } __attribute__((packed)); + int inslen = sizeof(struct instr); + + if (!(*bytecode = malloc(inslen))) abort(); + ((struct instr *)*bytecode)[0] = (struct instr) { + { INET_DIAG_BC_CGROUP_COND, inslen, inslen + 4 }, + a->cgroup_id, + }; + + return inslen; + } + default: + abort(); + } +} + +static int remember_he(struct aafilter *a, struct hostent *he) +{ + char **ptr = he->h_addr_list; + int cnt = 0; + int len; + + if (he->h_addrtype == AF_INET) + len = 4; + else if (he->h_addrtype == AF_INET6) + len = 16; + else + return 0; + + while (*ptr) { + struct aafilter *b = a; + + if (a->addr.bitlen) { + if ((b = malloc(sizeof(*b))) == NULL) + return cnt; + *b = *a; + a->next = b; + } + memcpy(b->addr.data, *ptr, len); + b->addr.bytelen = len; + b->addr.bitlen = len*8; + b->addr.family = he->h_addrtype; + ptr++; + cnt++; + } + return cnt; +} + +static int get_dns_host(struct aafilter *a, const char *addr, int fam) +{ + static int notfirst; + int cnt = 0; + struct hostent *he; + + a->addr.bitlen = 0; + if (!notfirst) { + sethostent(1); + notfirst = 1; + } + he = gethostbyname2(addr, fam == AF_UNSPEC ? AF_INET : fam); + if (he) + cnt = remember_he(a, he); + if (fam == AF_UNSPEC) { + he = gethostbyname2(addr, AF_INET6); + if (he) + cnt += remember_he(a, he); + } + return !cnt; +} + +static int xll_initted; + +static void xll_init(void) +{ + struct rtnl_handle rth; + + if (rtnl_open(&rth, 0) < 0) + exit(1); + + ll_init_map(&rth); + rtnl_close(&rth); + xll_initted = 1; +} + +static const char *xll_index_to_name(int index) +{ + if (!xll_initted) + xll_init(); + return ll_index_to_name(index); +} + +static int xll_name_to_index(const char *dev) +{ + if (!xll_initted) + xll_init(); + return ll_name_to_index(dev); +} + +void *parse_devcond(char *name) +{ + struct aafilter a = { .iface = 0 }; + struct aafilter *res; + + a.iface = xll_name_to_index(name); + if (a.iface == 0) { + char *end; + unsigned long n; + + n = strtoul(name, &end, 0); + if (!end || end == name || *end || n > UINT_MAX) + return NULL; + + a.iface = n; + } + + res = malloc(sizeof(*res)); + *res = a; + + return res; +} + +static void vsock_set_inet_prefix(inet_prefix *a, __u32 cid) +{ + *a = (inet_prefix){ + .bytelen = sizeof(cid), + .family = AF_VSOCK, + }; + memcpy(a->data, &cid, sizeof(cid)); +} + +static char* find_port(char *addr, bool is_port) +{ + char *port = NULL; + if (is_port) + port = addr; + else + port = strchr(addr, ':'); + if (port && *port == ':') + *port++ = '\0'; + return port; +} + +void *parse_hostcond(char *addr, bool is_port) +{ + char *port = NULL; + struct aafilter a = { .port = -1 }; + struct aafilter *res; + int fam = preferred_family; + struct filter *f = ¤t_filter; + + if (strncmp(addr, "unix:", 5) == 0) { + fam = AF_UNIX; + addr += 5; + } else if (strncmp(addr, "link:", 5) == 0) { + fam = AF_PACKET; + addr += 5; + } else if (strncmp(addr, "netlink:", 8) == 0) { + fam = AF_NETLINK; + addr += 8; + } else if (strncmp(addr, "vsock:", 6) == 0) { + fam = AF_VSOCK; + addr += 6; + } else if (strncmp(addr, "inet:", 5) == 0) { + fam = AF_INET; + addr += 5; + } else if (strncmp(addr, "inet6:", 6) == 0) { + fam = AF_INET6; + addr += 6; + } + + if (fam == AF_UNIX) { + char *p; + + a.addr.family = AF_UNIX; + p = strdup(addr); + a.addr.bitlen = 8*strlen(p); + memcpy(a.addr.data, &p, sizeof(p)); + goto out; + } + + if (fam == AF_PACKET) { + a.addr.family = AF_PACKET; + a.addr.bitlen = 0; + port = find_port(addr, is_port); + if (port) { + if (*port && strcmp(port, "*")) { + if (get_long(&a.port, port, 0)) { + if ((a.port = xll_name_to_index(port)) <= 0) + return NULL; + } + } + } + if (!is_port && addr[0] && strcmp(addr, "*")) { + unsigned short tmp; + + a.addr.bitlen = 32; + if (ll_proto_a2n(&tmp, addr)) + return NULL; + a.addr.data[0] = ntohs(tmp); + } + goto out; + } + + if (fam == AF_NETLINK) { + a.addr.family = AF_NETLINK; + a.addr.bitlen = 0; + port = find_port(addr, is_port); + if (port) { + if (*port && strcmp(port, "*")) { + if (get_long(&a.port, port, 0)) { + if (strcmp(port, "kernel") == 0) + a.port = 0; + else + return NULL; + } + } + } + if (!is_port && addr[0] && strcmp(addr, "*")) { + a.addr.bitlen = 32; + if (nl_proto_a2n(&a.addr.data[0], addr) == -1) + return NULL; + } + goto out; + } + + if (fam == AF_VSOCK) { + __u32 cid = ~(__u32)0; + + a.addr.family = AF_VSOCK; + + port = find_port(addr, is_port); + + if (port && strcmp(port, "*") && + get_u32((__u32 *)&a.port, port, 0)) + return NULL; + + if (!is_port && addr[0] && strcmp(addr, "*")) { + a.addr.bitlen = 32; + if (get_u32(&cid, addr, 0)) + return NULL; + } + vsock_set_inet_prefix(&a.addr, cid); + goto out; + } + + /* URL-like literal [] */ + if (addr[0] == '[') { + addr++; + if ((port = strchr(addr, ']')) == NULL) + return NULL; + *port++ = 0; + } else if (addr[0] == '*') { + port = addr+1; + } else { + port = strrchr(strchr(addr, '/') ? : addr, ':'); + } + + if (is_port) + port = addr; + + if (port && *port) { + if (*port == ':') + *port++ = 0; + + if (*port && *port != '*') { + if (get_long(&a.port, port, 0)) { + struct servent *se1 = NULL; + struct servent *se2 = NULL; + + if (current_filter.dbs&(1<<UDP_DB)) + se1 = getservbyname(port, UDP_PROTO); + if (current_filter.dbs&(1<<TCP_DB)) + se2 = getservbyname(port, TCP_PROTO); + if (se1 && se2 && se1->s_port != se2->s_port) { + fprintf(stderr, "Error: ambiguous port \"%s\".\n", port); + return NULL; + } + if (!se1) + se1 = se2; + if (se1) { + a.port = ntohs(se1->s_port); + } else { + struct scache *s; + + for (s = rlist; s; s = s->next) { + if ((s->proto == UDP_PROTO && + (current_filter.dbs&(1<<UDP_DB))) || + (s->proto == TCP_PROTO && + (current_filter.dbs&(1<<TCP_DB)))) { + if (s->name && strcmp(s->name, port) == 0) { + if (a.port > 0 && a.port != s->port) { + fprintf(stderr, "Error: ambiguous port \"%s\".\n", port); + return NULL; + } + a.port = s->port; + } + } + } + if (a.port <= 0) { + fprintf(stderr, "Error: \"%s\" does not look like a port.\n", port); + return NULL; + } + } + } + } + } + if (!is_port && *addr && *addr != '*') { + if (get_prefix_1(&a.addr, addr, fam)) { + if (get_dns_host(&a, addr, fam)) { + fprintf(stderr, "Error: an inet prefix is expected rather than \"%s\".\n", addr); + return NULL; + } + } + } + +out: + if (fam != AF_UNSPEC) { + int states = f->states; + f->families = 0; + filter_af_set(f, fam); + filter_states_set(f, states); + } + + res = malloc(sizeof(*res)); + if (res) + memcpy(res, &a, sizeof(a)); + return res; +} + +void *parse_markmask(const char *markmask) +{ + struct aafilter a, *res; + + if (strchr(markmask, '/')) { + if (sscanf(markmask, "%i/%i", &a.mark, &a.mask) != 2) + return NULL; + } else { + a.mask = 0xffffffff; + if (sscanf(markmask, "%i", &a.mark) != 1) + return NULL; + } + + res = malloc(sizeof(*res)); + if (res) + memcpy(res, &a, sizeof(a)); + return res; +} + +void *parse_cgroupcond(const char *path) +{ + struct aafilter *res; + __u64 id; + + id = get_cgroup2_id(path); + if (!id) + return NULL; + + res = malloc(sizeof(*res)); + if (res) + res->cgroup_id = id; + + return res; +} + +static void proc_ctx_print(struct sockstat *s) +{ + char *buf; + + if (show_proc_ctx || show_sock_ctx) { + if (find_entry(s->ino, &buf, + (show_proc_ctx & show_sock_ctx) ? + PROC_SOCK_CTX : PROC_CTX) > 0) { + out(" users:(%s)", buf); + free(buf); + } + } else if (show_processes || show_threads) { + if (find_entry(s->ino, &buf, USERS) > 0) { + out(" users:(%s)", buf); + free(buf); + } + } + + field_next(); +} + +static void inet_stats_print(struct sockstat *s, bool v6only) +{ + sock_state_print(s); + + inet_addr_print(&s->local, s->lport, s->iface, v6only); + inet_addr_print(&s->remote, s->rport, 0, v6only); + + proc_ctx_print(s); +} + +static int proc_parse_inet_addr(char *loc, char *rem, int family, struct + sockstat * s) +{ + s->local.family = s->remote.family = family; + if (family == AF_INET) { + sscanf(loc, "%x:%x", s->local.data, (unsigned *)&s->lport); + sscanf(rem, "%x:%x", s->remote.data, (unsigned *)&s->rport); + s->local.bytelen = s->remote.bytelen = 4; + return 0; + } else { + sscanf(loc, "%08x%08x%08x%08x:%x", + s->local.data, + s->local.data + 1, + s->local.data + 2, + s->local.data + 3, + &s->lport); + sscanf(rem, "%08x%08x%08x%08x:%x", + s->remote.data, + s->remote.data + 1, + s->remote.data + 2, + s->remote.data + 3, + &s->rport); + s->local.bytelen = s->remote.bytelen = 16; + return 0; + } + return -1; +} + +static int proc_inet_split_line(char *line, char **loc, char **rem, char **data) +{ + char *p; + + if ((p = strchr(line, ':')) == NULL) + return -1; + + *loc = p+2; + if ((p = strchr(*loc, ':')) == NULL) + return -1; + + p[5] = 0; + *rem = p+6; + if ((p = strchr(*rem, ':')) == NULL) + return -1; + + p[5] = 0; + *data = p+6; + return 0; +} + +/* + * Display bandwidth in standard units + * See: https://en.wikipedia.org/wiki/Data-rate_units + * bw is in bits per second + */ +static char *sprint_bw(char *buf, double bw) +{ + if (numeric) + sprintf(buf, "%.0f", bw); + else if (bw >= 1e12) + sprintf(buf, "%.3gT", bw / 1e12); + else if (bw >= 1e9) + sprintf(buf, "%.3gG", bw / 1e9); + else if (bw >= 1e6) + sprintf(buf, "%.3gM", bw / 1e6); + else if (bw >= 1e3) + sprintf(buf, "%.3gk", bw / 1e3); + else + sprintf(buf, "%g", bw); + + return buf; +} + +static void sctp_stats_print(struct sctp_info *s) +{ + if (s->sctpi_tag) + out(" tag:%x", s->sctpi_tag); + if (s->sctpi_state) + out(" state:%s", sctp_sstate_name[s->sctpi_state]); + if (s->sctpi_rwnd) + out(" rwnd:%d", s->sctpi_rwnd); + if (s->sctpi_unackdata) + out(" unackdata:%d", s->sctpi_unackdata); + if (s->sctpi_penddata) + out(" penddata:%d", s->sctpi_penddata); + if (s->sctpi_instrms) + out(" instrms:%d", s->sctpi_instrms); + if (s->sctpi_outstrms) + out(" outstrms:%d", s->sctpi_outstrms); + if (s->sctpi_inqueue) + out(" inqueue:%d", s->sctpi_inqueue); + if (s->sctpi_outqueue) + out(" outqueue:%d", s->sctpi_outqueue); + if (s->sctpi_overall_error) + out(" overerr:%d", s->sctpi_overall_error); + if (s->sctpi_max_burst) + out(" maxburst:%d", s->sctpi_max_burst); + if (s->sctpi_maxseg) + out(" maxseg:%d", s->sctpi_maxseg); + if (s->sctpi_peer_rwnd) + out(" prwnd:%d", s->sctpi_peer_rwnd); + if (s->sctpi_peer_tag) + out(" ptag:%x", s->sctpi_peer_tag); + if (s->sctpi_peer_capable) + out(" pcapable:%d", s->sctpi_peer_capable); + if (s->sctpi_peer_sack) + out(" psack:%d", s->sctpi_peer_sack); + if (s->sctpi_s_autoclose) + out(" autoclose:%d", s->sctpi_s_autoclose); + if (s->sctpi_s_adaptation_ind) + out(" adapind:%d", s->sctpi_s_adaptation_ind); + if (s->sctpi_s_pd_point) + out(" pdpoint:%d", s->sctpi_s_pd_point); + if (s->sctpi_s_nodelay) + out(" nodelay:%d", s->sctpi_s_nodelay); + if (s->sctpi_s_disable_fragments) + out(" nofrag:%d", s->sctpi_s_disable_fragments); + if (s->sctpi_s_v4mapped) + out(" v4mapped:%d", s->sctpi_s_v4mapped); + if (s->sctpi_s_frag_interleave) + out(" fraginl:%d", s->sctpi_s_frag_interleave); +} + +static void tcp_stats_print(struct tcpstat *s) +{ + char b1[64]; + + if (s->has_ts_opt) + out(" ts"); + if (s->has_usec_ts_opt) + out(" usec_ts"); + if (s->has_sack_opt) + out(" sack"); + if (s->has_ecn_opt) + out(" ecn"); + if (s->has_ecnseen_opt) + out(" ecnseen"); + if (s->has_fastopen_opt) + out(" fastopen"); + if (s->cong_alg[0]) + out(" %s", s->cong_alg); + if (s->has_wscale_opt) + out(" wscale:%d,%d", s->snd_wscale, s->rcv_wscale); + if (s->rto) + out(" rto:%g", s->rto); + if (s->backoff) + out(" backoff:%u", s->backoff); + if (s->rtt) + out(" rtt:%g/%g", s->rtt, s->rttvar); + if (s->ato) + out(" ato:%g", s->ato); + + if (s->qack) + out(" qack:%d", s->qack); + if (s->qack & 1) + out(" bidir"); + + if (s->mss) + out(" mss:%d", s->mss); + if (s->pmtu) + out(" pmtu:%u", s->pmtu); + if (s->rcv_mss) + out(" rcvmss:%d", s->rcv_mss); + if (s->advmss) + out(" advmss:%d", s->advmss); + if (s->cwnd) + out(" cwnd:%u", s->cwnd); + if (s->ssthresh) + out(" ssthresh:%d", s->ssthresh); + + if (s->bytes_sent) + out(" bytes_sent:%llu", s->bytes_sent); + if (s->bytes_retrans) + out(" bytes_retrans:%llu", s->bytes_retrans); + if (s->bytes_acked) + out(" bytes_acked:%llu", s->bytes_acked); + if (s->bytes_received) + out(" bytes_received:%llu", s->bytes_received); + if (s->segs_out) + out(" segs_out:%u", s->segs_out); + if (s->segs_in) + out(" segs_in:%u", s->segs_in); + if (s->data_segs_out) + out(" data_segs_out:%u", s->data_segs_out); + if (s->data_segs_in) + out(" data_segs_in:%u", s->data_segs_in); + + if (s->dctcp && s->dctcp->enabled) { + struct dctcpstat *dctcp = s->dctcp; + + out(" dctcp:(ce_state:%u,alpha:%u,ab_ecn:%u,ab_tot:%u)", + dctcp->ce_state, dctcp->alpha, dctcp->ab_ecn, + dctcp->ab_tot); + } else if (s->dctcp) { + out(" dctcp:fallback_mode"); + } + + if (s->bbr_info) { + __u64 bw; + + bw = s->bbr_info->bbr_bw_hi; + bw <<= 32; + bw |= s->bbr_info->bbr_bw_lo; + + out(" bbr:(bw:%sbps,mrtt:%g", + sprint_bw(b1, bw * 8.0), + (double)s->bbr_info->bbr_min_rtt / 1000.0); + if (s->bbr_info->bbr_pacing_gain) + out(",pacing_gain:%g", + (double)s->bbr_info->bbr_pacing_gain / 256.0); + if (s->bbr_info->bbr_cwnd_gain) + out(",cwnd_gain:%g", + (double)s->bbr_info->bbr_cwnd_gain / 256.0); + out(")"); + } + + if (s->send_bps) + out(" send %sbps", sprint_bw(b1, s->send_bps)); + if (s->lastsnd) + out(" lastsnd:%u", s->lastsnd); + if (s->lastrcv) + out(" lastrcv:%u", s->lastrcv); + if (s->lastack) + out(" lastack:%u", s->lastack); + + if (s->pacing_rate) { + out(" pacing_rate %sbps", sprint_bw(b1, s->pacing_rate)); + if (s->pacing_rate_max) + out("/%sbps", sprint_bw(b1, s->pacing_rate_max)); + } + + if (s->delivery_rate) + out(" delivery_rate %sbps", sprint_bw(b1, s->delivery_rate)); + if (s->delivered) + out(" delivered:%u", s->delivered); + if (s->delivered_ce) + out(" delivered_ce:%u", s->delivered_ce); + if (s->app_limited) + out(" app_limited"); + + if (s->busy_time) { + out(" busy:%llums", s->busy_time / 1000); + if (s->rwnd_limited) + out(" rwnd_limited:%llums(%.1f%%)", + s->rwnd_limited / 1000, + 100.0 * s->rwnd_limited / s->busy_time); + if (s->sndbuf_limited) + out(" sndbuf_limited:%llums(%.1f%%)", + s->sndbuf_limited / 1000, + 100.0 * s->sndbuf_limited / s->busy_time); + } + + if (s->unacked) + out(" unacked:%u", s->unacked); + if (s->retrans || s->retrans_total) + out(" retrans:%u/%u", s->retrans, s->retrans_total); + if (s->lost) + out(" lost:%u", s->lost); + if (s->sacked && s->ss.state != SS_LISTEN) + out(" sacked:%u", s->sacked); + if (s->dsack_dups) + out(" dsack_dups:%u", s->dsack_dups); + if (s->fackets) + out(" fackets:%u", s->fackets); + if (s->reordering != 3) + out(" reordering:%d", s->reordering); + if (s->reord_seen) + out(" reord_seen:%d", s->reord_seen); + if (s->rcv_rtt) + out(" rcv_rtt:%g", s->rcv_rtt); + if (s->rcv_space) + out(" rcv_space:%d", s->rcv_space); + if (s->rcv_ssthresh) + out(" rcv_ssthresh:%u", s->rcv_ssthresh); + if (s->not_sent) + out(" notsent:%u", s->not_sent); + if (s->min_rtt) + out(" minrtt:%g", s->min_rtt); + if (s->rcv_ooopack) + out(" rcv_ooopack:%u", s->rcv_ooopack); + if (s->snd_wnd) + out(" snd_wnd:%u", s->snd_wnd); + if (s->rcv_wnd) + out(" rcv_wnd:%u", s->rcv_wnd); + if (s->rehash) + out(" rehash:%u", s->rehash); +} + +static void tcp_timer_print(struct tcpstat *s) +{ + static const char * const tmr_name[] = { + "off", + "on", + "keepalive", + "timewait", + "persist", + "unknown" + }; + + if (s->timer) { + if (s->timer > 4) + s->timer = 5; + out(" timer:(%s,%s,%d)", + tmr_name[s->timer], + print_ms_timer(s->timeout), + s->retrans); + } +} + +static void sctp_timer_print(struct tcpstat *s) +{ + if (s->timer) + out(" timer:(T3_RTX,%s,%d)", + print_ms_timer(s->timeout), s->retrans); +} + +static int tcp_show_line(char *line, const struct filter *f, int family) +{ + int rto = 0, ato = 0; + struct tcpstat s = {}; + char *loc, *rem, *data; + char opt[256]; + int n; + int hz = get_user_hz(); + + if (proc_inet_split_line(line, &loc, &rem, &data)) + return -1; + + int state = (data[1] >= 'A') ? (data[1] - 'A' + 10) : (data[1] - '0'); + + if (!(f->states & (1 << state))) + return 0; + + proc_parse_inet_addr(loc, rem, family, &s.ss); + + if (f->f && run_ssfilter(f->f, &s.ss) == 0) + return 0; + + opt[0] = 0; + n = sscanf(data, "%x %x:%x %x:%x %x %d %d %u %d %llx %d %d %d %u %d %[^\n]\n", + &s.ss.state, &s.ss.wq, &s.ss.rq, + &s.timer, &s.timeout, &s.retrans, &s.ss.uid, &s.probes, + &s.ss.ino, &s.ss.refcnt, &s.ss.sk, &rto, &ato, &s.qack, &s.cwnd, + &s.ssthresh, opt); + + if (n < 17) + opt[0] = 0; + + if (n < 12) { + rto = 0; + s.cwnd = 2; + s.ssthresh = -1; + ato = s.qack = 0; + } + + s.retrans = s.timer != 1 ? s.probes : s.retrans; + s.timeout = (s.timeout * 1000 + hz - 1) / hz; + s.ato = (double)ato / hz; + s.qack /= 2; + s.rto = (double)rto; + s.ssthresh = s.ssthresh == -1 ? 0 : s.ssthresh; + s.rto = s.rto != 3 * hz ? s.rto / hz : 0; + s.ss.type = IPPROTO_TCP; + + inet_stats_print(&s.ss, false); + + if (show_options) + tcp_timer_print(&s); + + if (show_details) { + sock_details_print(&s.ss); + if (opt[0]) + out(" opt:\"%s\"", opt); + } + + if (show_tcpinfo) + tcp_stats_print(&s); + + return 0; +} + +static int generic_record_read(FILE *fp, + int (*worker)(char*, const struct filter *, int), + const struct filter *f, int fam) +{ + char line[256]; + + /* skip header */ + if (fgets(line, sizeof(line), fp) == NULL) + goto outerr; + + while (fgets(line, sizeof(line), fp) != NULL) { + int n = strlen(line); + + if (n == 0 || line[n-1] != '\n') { + errno = -EINVAL; + return -1; + } + line[n-1] = 0; + + if (worker(line, f, fam) < 0) + return 0; + } +outerr: + + return ferror(fp) ? -1 : 0; +} + +static void print_skmeminfo(struct rtattr *tb[], int attrtype) +{ + const __u32 *skmeminfo; + + if (!tb[attrtype]) { + if (attrtype == INET_DIAG_SKMEMINFO) { + if (!tb[INET_DIAG_MEMINFO]) + return; + + const struct inet_diag_meminfo *minfo = + RTA_DATA(tb[INET_DIAG_MEMINFO]); + + out(" mem:(r%u,w%u,f%u,t%u)", + minfo->idiag_rmem, + minfo->idiag_wmem, + minfo->idiag_fmem, + minfo->idiag_tmem); + } + return; + } + + skmeminfo = RTA_DATA(tb[attrtype]); + + out(" skmem:(r%u,rb%u,t%u,tb%u,f%u,w%u,o%u", + skmeminfo[SK_MEMINFO_RMEM_ALLOC], + skmeminfo[SK_MEMINFO_RCVBUF], + skmeminfo[SK_MEMINFO_WMEM_ALLOC], + skmeminfo[SK_MEMINFO_SNDBUF], + skmeminfo[SK_MEMINFO_FWD_ALLOC], + skmeminfo[SK_MEMINFO_WMEM_QUEUED], + skmeminfo[SK_MEMINFO_OPTMEM]); + + if (RTA_PAYLOAD(tb[attrtype]) >= + (SK_MEMINFO_BACKLOG + 1) * sizeof(__u32)) + out(",bl%u", skmeminfo[SK_MEMINFO_BACKLOG]); + + if (RTA_PAYLOAD(tb[attrtype]) >= + (SK_MEMINFO_DROPS + 1) * sizeof(__u32)) + out(",d%u", skmeminfo[SK_MEMINFO_DROPS]); + + out(")"); +} + +/* like lib/utils.c print_escape_buf(), but use out(), not printf()! */ +static void out_escape_buf(const __u8 *buf, size_t len, const char *escape) +{ + size_t i; + + for (i = 0; i < len; ++i) { + if (isprint(buf[i]) && buf[i] != '\\' && + !strchr(escape, buf[i])) + out("%c", buf[i]); + else + out("\\%03o", buf[i]); + } +} + +static void print_md5sig(struct tcp_diag_md5sig *sig) +{ + out("%s/%d=", + format_host(sig->tcpm_family, + sig->tcpm_family == AF_INET6 ? 16 : 4, + &sig->tcpm_addr), + sig->tcpm_prefixlen); + out_escape_buf(sig->tcpm_key, sig->tcpm_keylen, " ,"); +} + +static void tcp_tls_version(struct rtattr *attr) +{ + u_int16_t val; + + if (!attr) + return; + val = rta_getattr_u16(attr); + + switch (val) { + case TLS_1_2_VERSION: + out(" version: 1.2"); + break; + case TLS_1_3_VERSION: + out(" version: 1.3"); + break; + default: + out(" version: unknown(%hu)", val); + break; + } +} + +static void tcp_tls_cipher(struct rtattr *attr) +{ + u_int16_t val; + + if (!attr) + return; + val = rta_getattr_u16(attr); + + switch (val) { + case TLS_CIPHER_AES_GCM_128: + out(" cipher: aes-gcm-128"); + break; + case TLS_CIPHER_AES_GCM_256: + out(" cipher: aes-gcm-256"); + break; + } +} + +static void tcp_tls_conf(const char *name, struct rtattr *attr) +{ + u_int16_t val; + + if (!attr) + return; + val = rta_getattr_u16(attr); + + switch (val) { + case TLS_CONF_BASE: + out(" %s: none", name); + break; + case TLS_CONF_SW: + out(" %s: sw", name); + break; + case TLS_CONF_HW: + out(" %s: hw", name); + break; + case TLS_CONF_HW_RECORD: + out(" %s: hw-record", name); + break; + default: + out(" %s: unknown(%hu)", name, val); + break; + } +} + +static void mptcp_subflow_info(struct rtattr *tb[]) +{ + u_int32_t flags = 0; + + if (tb[MPTCP_SUBFLOW_ATTR_FLAGS]) { + char caps[32 + 1] = { 0 }, *cap = &caps[0]; + + flags = rta_getattr_u32(tb[MPTCP_SUBFLOW_ATTR_FLAGS]); + + if (flags & MPTCP_SUBFLOW_FLAG_MCAP_REM) + *cap++ = 'M'; + if (flags & MPTCP_SUBFLOW_FLAG_MCAP_LOC) + *cap++ = 'm'; + if (flags & MPTCP_SUBFLOW_FLAG_JOIN_REM) + *cap++ = 'J'; + if (flags & MPTCP_SUBFLOW_FLAG_JOIN_LOC) + *cap++ = 'j'; + if (flags & MPTCP_SUBFLOW_FLAG_BKUP_REM) + *cap++ = 'B'; + if (flags & MPTCP_SUBFLOW_FLAG_BKUP_LOC) + *cap++ = 'b'; + if (flags & MPTCP_SUBFLOW_FLAG_FULLY_ESTABLISHED) + *cap++ = 'e'; + if (flags & MPTCP_SUBFLOW_FLAG_CONNECTED) + *cap++ = 'c'; + if (flags & MPTCP_SUBFLOW_FLAG_MAPVALID) + *cap++ = 'v'; + if (flags) + out(" flags:%s", caps); + } + if (tb[MPTCP_SUBFLOW_ATTR_TOKEN_REM] && + tb[MPTCP_SUBFLOW_ATTR_TOKEN_LOC] && + tb[MPTCP_SUBFLOW_ATTR_ID_REM] && + tb[MPTCP_SUBFLOW_ATTR_ID_LOC]) + out(" token:%04x(id:%hhu)/%04x(id:%hhu)", + rta_getattr_u32(tb[MPTCP_SUBFLOW_ATTR_TOKEN_REM]), + rta_getattr_u8(tb[MPTCP_SUBFLOW_ATTR_ID_REM]), + rta_getattr_u32(tb[MPTCP_SUBFLOW_ATTR_TOKEN_LOC]), + rta_getattr_u8(tb[MPTCP_SUBFLOW_ATTR_ID_LOC])); + if (tb[MPTCP_SUBFLOW_ATTR_MAP_SEQ]) + out(" seq:%llx", + rta_getattr_u64(tb[MPTCP_SUBFLOW_ATTR_MAP_SEQ])); + if (tb[MPTCP_SUBFLOW_ATTR_MAP_SFSEQ]) + out(" sfseq:%x", + rta_getattr_u32(tb[MPTCP_SUBFLOW_ATTR_MAP_SFSEQ])); + if (tb[MPTCP_SUBFLOW_ATTR_SSN_OFFSET]) + out(" ssnoff:%x", + rta_getattr_u32(tb[MPTCP_SUBFLOW_ATTR_SSN_OFFSET])); + if (tb[MPTCP_SUBFLOW_ATTR_MAP_DATALEN]) + out(" maplen:%x", + rta_getattr_u32(tb[MPTCP_SUBFLOW_ATTR_MAP_DATALEN])); +} + +#define TCPI_HAS_OPT(info, opt) !!(info->tcpi_options & (opt)) + +static void tcp_show_info(const struct nlmsghdr *nlh, struct inet_diag_msg *r, + struct rtattr *tb[]) +{ + double rtt = 0; + struct tcpstat s = {}; + + s.ss.state = r->idiag_state; + + print_skmeminfo(tb, INET_DIAG_SKMEMINFO); + + if (tb[INET_DIAG_INFO]) { + struct tcp_info *info; + int len = RTA_PAYLOAD(tb[INET_DIAG_INFO]); + + /* workaround for older kernels with less fields */ + if (len < sizeof(*info)) { + info = alloca(sizeof(*info)); + memcpy(info, RTA_DATA(tb[INET_DIAG_INFO]), len); + memset((char *)info + len, 0, sizeof(*info) - len); + } else + info = RTA_DATA(tb[INET_DIAG_INFO]); + + if (show_options) { + s.has_ts_opt = TCPI_HAS_OPT(info, TCPI_OPT_TIMESTAMPS); + s.has_usec_ts_opt = TCPI_HAS_OPT(info, TCPI_OPT_USEC_TS); + s.has_sack_opt = TCPI_HAS_OPT(info, TCPI_OPT_SACK); + s.has_ecn_opt = TCPI_HAS_OPT(info, TCPI_OPT_ECN); + s.has_ecnseen_opt = TCPI_HAS_OPT(info, TCPI_OPT_ECN_SEEN); + s.has_fastopen_opt = TCPI_HAS_OPT(info, TCPI_OPT_SYN_DATA); + } + + if (tb[INET_DIAG_CONG]) + strncpy(s.cong_alg, + rta_getattr_str(tb[INET_DIAG_CONG]), + sizeof(s.cong_alg) - 1); + + if (TCPI_HAS_OPT(info, TCPI_OPT_WSCALE)) { + s.has_wscale_opt = true; + s.snd_wscale = info->tcpi_snd_wscale; + s.rcv_wscale = info->tcpi_rcv_wscale; + } + + if (info->tcpi_rto && info->tcpi_rto != 3000000) + s.rto = (double)info->tcpi_rto / 1000; + + s.backoff = info->tcpi_backoff; + s.rtt = (double)info->tcpi_rtt / 1000; + s.rttvar = (double)info->tcpi_rttvar / 1000; + s.ato = (double)info->tcpi_ato / 1000; + s.mss = info->tcpi_snd_mss; + s.rcv_mss = info->tcpi_rcv_mss; + s.advmss = info->tcpi_advmss; + s.rcv_space = info->tcpi_rcv_space; + s.rcv_rtt = (double)info->tcpi_rcv_rtt / 1000; + s.lastsnd = info->tcpi_last_data_sent; + s.lastrcv = info->tcpi_last_data_recv; + s.lastack = info->tcpi_last_ack_recv; + s.unacked = info->tcpi_unacked; + s.retrans = info->tcpi_retrans; + s.retrans_total = info->tcpi_total_retrans; + s.lost = info->tcpi_lost; + s.sacked = info->tcpi_sacked; + s.fackets = info->tcpi_fackets; + s.reordering = info->tcpi_reordering; + s.rcv_ssthresh = info->tcpi_rcv_ssthresh; + s.cwnd = info->tcpi_snd_cwnd; + s.pmtu = info->tcpi_pmtu; + + if (info->tcpi_snd_ssthresh < 0xFFFF) + s.ssthresh = info->tcpi_snd_ssthresh; + + rtt = (double) info->tcpi_rtt; + if (tb[INET_DIAG_VEGASINFO]) { + const struct tcpvegas_info *vinfo + = RTA_DATA(tb[INET_DIAG_VEGASINFO]); + + if (vinfo->tcpv_enabled && + vinfo->tcpv_rtt && vinfo->tcpv_rtt != 0x7fffffff) + rtt = vinfo->tcpv_rtt; + } + + if (tb[INET_DIAG_DCTCPINFO]) { + struct dctcpstat *dctcp = malloc(sizeof(struct + dctcpstat)); + + const struct tcp_dctcp_info *dinfo + = RTA_DATA(tb[INET_DIAG_DCTCPINFO]); + + dctcp->enabled = !!dinfo->dctcp_enabled; + dctcp->ce_state = dinfo->dctcp_ce_state; + dctcp->alpha = dinfo->dctcp_alpha; + dctcp->ab_ecn = dinfo->dctcp_ab_ecn; + dctcp->ab_tot = dinfo->dctcp_ab_tot; + s.dctcp = dctcp; + } + + if (tb[INET_DIAG_BBRINFO]) { + const void *bbr_info = RTA_DATA(tb[INET_DIAG_BBRINFO]); + int len = min(RTA_PAYLOAD(tb[INET_DIAG_BBRINFO]), + sizeof(*s.bbr_info)); + + s.bbr_info = calloc(1, sizeof(*s.bbr_info)); + if (s.bbr_info && bbr_info) + memcpy(s.bbr_info, bbr_info, len); + } + + if (rtt > 0 && info->tcpi_snd_mss && info->tcpi_snd_cwnd) { + s.send_bps = (double) info->tcpi_snd_cwnd * + (double)info->tcpi_snd_mss * 8000000. / rtt; + } + + if (info->tcpi_pacing_rate && + info->tcpi_pacing_rate != ~0ULL) { + s.pacing_rate = info->tcpi_pacing_rate * 8.; + + if (info->tcpi_max_pacing_rate && + info->tcpi_max_pacing_rate != ~0ULL) + s.pacing_rate_max = info->tcpi_max_pacing_rate * 8.; + } + s.bytes_acked = info->tcpi_bytes_acked; + s.bytes_received = info->tcpi_bytes_received; + s.segs_out = info->tcpi_segs_out; + s.segs_in = info->tcpi_segs_in; + s.data_segs_out = info->tcpi_data_segs_out; + s.data_segs_in = info->tcpi_data_segs_in; + s.not_sent = info->tcpi_notsent_bytes; + if (info->tcpi_min_rtt && info->tcpi_min_rtt != ~0U) + s.min_rtt = (double) info->tcpi_min_rtt / 1000; + s.delivery_rate = info->tcpi_delivery_rate * 8.; + s.app_limited = info->tcpi_delivery_rate_app_limited; + s.busy_time = info->tcpi_busy_time; + s.rwnd_limited = info->tcpi_rwnd_limited; + s.sndbuf_limited = info->tcpi_sndbuf_limited; + s.delivered = info->tcpi_delivered; + s.delivered_ce = info->tcpi_delivered_ce; + s.dsack_dups = info->tcpi_dsack_dups; + s.reord_seen = info->tcpi_reord_seen; + s.bytes_sent = info->tcpi_bytes_sent; + s.bytes_retrans = info->tcpi_bytes_retrans; + s.rcv_ooopack = info->tcpi_rcv_ooopack; + s.snd_wnd = info->tcpi_snd_wnd; + s.rcv_wnd = info->tcpi_rcv_wnd; + s.rehash = info->tcpi_rehash; + tcp_stats_print(&s); + free(s.dctcp); + free(s.bbr_info); + } + if (tb[INET_DIAG_MD5SIG]) { + struct tcp_diag_md5sig *sig = RTA_DATA(tb[INET_DIAG_MD5SIG]); + int len = RTA_PAYLOAD(tb[INET_DIAG_MD5SIG]); + + out(" md5keys:"); + print_md5sig(sig++); + for (len -= sizeof(*sig); len > 0; len -= sizeof(*sig)) { + out(","); + print_md5sig(sig++); + } + } + if (tb[INET_DIAG_ULP_INFO]) { + struct rtattr *ulpinfo[INET_ULP_INFO_MAX + 1] = { 0 }; + + parse_rtattr_nested(ulpinfo, INET_ULP_INFO_MAX, + tb[INET_DIAG_ULP_INFO]); + + if (ulpinfo[INET_ULP_INFO_NAME]) + out(" tcp-ulp-%s", + rta_getattr_str(ulpinfo[INET_ULP_INFO_NAME])); + + if (ulpinfo[INET_ULP_INFO_TLS]) { + struct rtattr *tlsinfo[TLS_INFO_MAX + 1] = { 0 }; + + parse_rtattr_nested(tlsinfo, TLS_INFO_MAX, + ulpinfo[INET_ULP_INFO_TLS]); + + tcp_tls_version(tlsinfo[TLS_INFO_VERSION]); + tcp_tls_cipher(tlsinfo[TLS_INFO_CIPHER]); + tcp_tls_conf("rxconf", tlsinfo[TLS_INFO_RXCONF]); + tcp_tls_conf("txconf", tlsinfo[TLS_INFO_TXCONF]); + if (!!tlsinfo[TLS_INFO_ZC_RO_TX]) + out(" zc_ro_tx"); + if (!!tlsinfo[TLS_INFO_RX_NO_PAD]) + out(" no_pad_rx"); + } + if (ulpinfo[INET_ULP_INFO_MPTCP]) { + struct rtattr *sfinfo[MPTCP_SUBFLOW_ATTR_MAX + 1] = + { 0 }; + + parse_rtattr_nested(sfinfo, MPTCP_SUBFLOW_ATTR_MAX, + ulpinfo[INET_ULP_INFO_MPTCP]); + mptcp_subflow_info(sfinfo); + } + } +} + +static void mptcp_stats_print(struct mptcp_info *s) +{ + if (s->mptcpi_subflows) + out(" subflows:%u", s->mptcpi_subflows); + if (s->mptcpi_add_addr_signal) + out(" add_addr_signal:%u", s->mptcpi_add_addr_signal); + if (s->mptcpi_add_addr_accepted) + out(" add_addr_accepted:%u", s->mptcpi_add_addr_accepted); + if (s->mptcpi_subflows_max) + out(" subflows_max:%u", s->mptcpi_subflows_max); + if (s->mptcpi_add_addr_signal_max) + out(" add_addr_signal_max:%u", s->mptcpi_add_addr_signal_max); + if (s->mptcpi_add_addr_accepted_max) + out(" add_addr_accepted_max:%u", s->mptcpi_add_addr_accepted_max); + if (s->mptcpi_flags & MPTCP_INFO_FLAG_FALLBACK) + out(" fallback"); + if (s->mptcpi_flags & MPTCP_INFO_FLAG_REMOTE_KEY_RECEIVED) + out(" remote_key"); + if (s->mptcpi_token) + out(" token:%x", s->mptcpi_token); + if (s->mptcpi_write_seq) + out(" write_seq:%llu", s->mptcpi_write_seq); + if (s->mptcpi_snd_una) + out(" snd_una:%llu", s->mptcpi_snd_una); + if (s->mptcpi_rcv_nxt) + out(" rcv_nxt:%llu", s->mptcpi_rcv_nxt); + if (s->mptcpi_local_addr_used) + out(" local_addr_used:%u", s->mptcpi_local_addr_used); + if (s->mptcpi_local_addr_max) + out(" local_addr_max:%u", s->mptcpi_local_addr_max); + if (s->mptcpi_csum_enabled) + out(" csum_enabled:%u", s->mptcpi_csum_enabled); + if (s->mptcpi_retransmits) + out(" retransmits:%u", s->mptcpi_retransmits); + if (s->mptcpi_bytes_retrans) + out(" bytes_retrans:%llu", s->mptcpi_bytes_retrans); + if (s->mptcpi_bytes_sent) + out(" bytes_sent:%llu", s->mptcpi_bytes_sent); + if (s->mptcpi_bytes_received) + out(" bytes_received:%llu", s->mptcpi_bytes_received); + if (s->mptcpi_bytes_acked) + out(" bytes_acked:%llu", s->mptcpi_bytes_acked); + if (s->mptcpi_subflows_total) + out(" subflows_total:%u", s->mptcpi_subflows_total); +} + +static void mptcp_show_info(const struct nlmsghdr *nlh, struct inet_diag_msg *r, + struct rtattr *tb[]) +{ + print_skmeminfo(tb, INET_DIAG_SKMEMINFO); + + if (tb[INET_DIAG_INFO]) { + struct mptcp_info *info; + int len = RTA_PAYLOAD(tb[INET_DIAG_INFO]); + + /* workaround for older kernels with less fields */ + if (len < sizeof(*info)) { + info = alloca(sizeof(*info)); + memcpy(info, RTA_DATA(tb[INET_DIAG_INFO]), len); + memset((char *)info + len, 0, sizeof(*info) - len); + } else + info = RTA_DATA(tb[INET_DIAG_INFO]); + + mptcp_stats_print(info); + } +} + +static const char *format_host_sa(struct sockaddr_storage *sa) +{ + union { + struct sockaddr_in sin; + struct sockaddr_in6 sin6; + } *saddr = (void *)sa; + + switch (sa->ss_family) { + case AF_INET: + return format_host(AF_INET, 4, &saddr->sin.sin_addr); + case AF_INET6: + return format_host(AF_INET6, 16, &saddr->sin6.sin6_addr); + default: + return ""; + } +} + +static void sctp_show_info(const struct nlmsghdr *nlh, struct inet_diag_msg *r, + struct rtattr *tb[]) +{ + struct sockaddr_storage *sa; + int len; + + print_skmeminfo(tb, INET_DIAG_SKMEMINFO); + + if (tb[INET_DIAG_LOCALS]) { + len = RTA_PAYLOAD(tb[INET_DIAG_LOCALS]); + sa = RTA_DATA(tb[INET_DIAG_LOCALS]); + + out(" locals:%s", format_host_sa(sa)); + for (sa++, len -= sizeof(*sa); len > 0; sa++, len -= sizeof(*sa)) + out(",%s", format_host_sa(sa)); + + } + if (tb[INET_DIAG_PEERS]) { + len = RTA_PAYLOAD(tb[INET_DIAG_PEERS]); + sa = RTA_DATA(tb[INET_DIAG_PEERS]); + + out(" peers:%s", format_host_sa(sa)); + for (sa++, len -= sizeof(*sa); len > 0; sa++, len -= sizeof(*sa)) + out(",%s", format_host_sa(sa)); + } + if (tb[INET_DIAG_INFO]) { + struct sctp_info *info; + len = RTA_PAYLOAD(tb[INET_DIAG_INFO]); + + /* workaround for older kernels with less fields */ + if (len < sizeof(*info)) { + info = alloca(sizeof(*info)); + memcpy(info, RTA_DATA(tb[INET_DIAG_INFO]), len); + memset((char *)info + len, 0, sizeof(*info) - len); + } else + info = RTA_DATA(tb[INET_DIAG_INFO]); + + sctp_stats_print(info); + } +} + +static void parse_diag_msg(struct nlmsghdr *nlh, struct sockstat *s) +{ + struct rtattr *tb[INET_DIAG_MAX+1]; + struct inet_diag_msg *r = NLMSG_DATA(nlh); + + parse_rtattr(tb, INET_DIAG_MAX, (struct rtattr *)(r+1), + nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*r))); + + s->state = r->idiag_state; + s->local.family = s->remote.family = r->idiag_family; + s->lport = ntohs(r->id.idiag_sport); + s->rport = ntohs(r->id.idiag_dport); + s->wq = r->idiag_wqueue; + s->rq = r->idiag_rqueue; + s->ino = r->idiag_inode; + s->uid = r->idiag_uid; + s->iface = r->id.idiag_if; + s->sk = cookie_sk_get(&r->id.idiag_cookie[0]); + + s->mark = 0; + if (tb[INET_DIAG_MARK]) + s->mark = rta_getattr_u32(tb[INET_DIAG_MARK]); + s->cgroup_id = 0; + if (tb[INET_DIAG_CGROUP_ID]) + s->cgroup_id = rta_getattr_u64(tb[INET_DIAG_CGROUP_ID]); + if (tb[INET_DIAG_PROTOCOL]) + s->raw_prot = rta_getattr_u8(tb[INET_DIAG_PROTOCOL]); + else + s->raw_prot = 0; + + if (s->local.family == AF_INET) + s->local.bytelen = s->remote.bytelen = 4; + else + s->local.bytelen = s->remote.bytelen = 16; + + memcpy(s->local.data, r->id.idiag_src, s->local.bytelen); + memcpy(s->remote.data, r->id.idiag_dst, s->local.bytelen); +} + +static int inet_show_sock(struct nlmsghdr *nlh, + struct sockstat *s) +{ + struct rtattr *tb[INET_DIAG_MAX+1]; + struct inet_diag_msg *r = NLMSG_DATA(nlh); + unsigned char v6only = 0; + + parse_rtattr(tb, INET_DIAG_MAX, (struct rtattr *)(r+1), + nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*r))); + + if (tb[INET_DIAG_PROTOCOL]) + s->type = rta_getattr_u8(tb[INET_DIAG_PROTOCOL]); + + if (s->local.family == AF_INET6 && tb[INET_DIAG_SKV6ONLY]) + v6only = rta_getattr_u8(tb[INET_DIAG_SKV6ONLY]); + + inet_stats_print(s, v6only); + + if (show_options) { + struct tcpstat t = {}; + + t.timer = r->idiag_timer; + t.timeout = r->idiag_expires; + t.retrans = r->idiag_retrans; + if (s->type == IPPROTO_SCTP) + sctp_timer_print(&t); + else + tcp_timer_print(&t); + } + + if (show_details) { + sock_details_print(s); + if (s->local.family == AF_INET6 && tb[INET_DIAG_SKV6ONLY]) + out(" v6only:%u", v6only); + + if (tb[INET_DIAG_SHUTDOWN]) { + unsigned char mask; + + mask = rta_getattr_u8(tb[INET_DIAG_SHUTDOWN]); + out(" %c-%c", + mask & 1 ? '-' : '<', mask & 2 ? '-' : '>'); + } + } + + if (show_tos) { + if (tb[INET_DIAG_TOS]) + out(" tos:%#x", rta_getattr_u8(tb[INET_DIAG_TOS])); + if (tb[INET_DIAG_TCLASS]) + out(" tclass:%#x", rta_getattr_u8(tb[INET_DIAG_TCLASS])); + if (tb[INET_DIAG_CLASS_ID]) + out(" class_id:%#x", rta_getattr_u32(tb[INET_DIAG_CLASS_ID])); + } + + if (show_cgroup) { + if (tb[INET_DIAG_CGROUP_ID]) + out(" cgroup:%s", cg_id_to_path(rta_getattr_u64(tb[INET_DIAG_CGROUP_ID]))); + } + + if (show_inet_sockopt) { + if (tb[INET_DIAG_SOCKOPT] && RTA_PAYLOAD(tb[INET_DIAG_SOCKOPT]) >= + sizeof(struct inet_diag_sockopt)) { + const struct inet_diag_sockopt *sockopt = + RTA_DATA(tb[INET_DIAG_SOCKOPT]); + if (!oneline) + out("\n\tinet-sockopt: ("); + else + out(" inet-sockopt: ("); + if (sockopt->recverr) + out(" recverr"); + if (sockopt->is_icsk) + out(" is_icsk"); + if (sockopt->freebind) + out(" freebind"); + if (sockopt->hdrincl) + out(" hdrincl"); + if (sockopt->mc_loop) + out(" mc_loop"); + if (sockopt->transparent) + out(" transparent"); + if (sockopt->mc_all) + out(" mc_all"); + if (sockopt->nodefrag) + out(" nodefrag"); + if (sockopt->bind_address_no_port) + out(" bind_addr_no_port"); + if (sockopt->recverr_rfc4884) + out(" recverr_rfc4884"); + if (sockopt->defer_connect) + out(" defer_connect"); + out(")"); + } + } + + if (show_mem || (show_tcpinfo && s->type != IPPROTO_UDP)) { + if (!oneline) + out("\n\t"); + if (s->type == IPPROTO_SCTP) + sctp_show_info(nlh, r, tb); + else if (s->type == IPPROTO_MPTCP) + mptcp_show_info(nlh, r, tb); + else + tcp_show_info(nlh, r, tb); + } + sctp_ino = s->ino; + + return 0; +} + +static int tcpdiag_send(int fd, int protocol, struct filter *f) +{ + struct sockaddr_nl nladdr = { .nl_family = AF_NETLINK }; + struct { + struct nlmsghdr nlh; + struct inet_diag_req r; + } req = { + .nlh.nlmsg_len = sizeof(req), + .nlh.nlmsg_flags = NLM_F_ROOT | NLM_F_MATCH | NLM_F_REQUEST, + .nlh.nlmsg_seq = MAGIC_SEQ, + .r.idiag_family = AF_INET, + .r.idiag_states = f->states, + }; + char *bc = NULL; + int bclen; + struct msghdr msg; + struct rtattr rta; + struct iovec iov[3]; + int iovlen = 1; + + if (protocol == IPPROTO_TCP) + req.nlh.nlmsg_type = TCPDIAG_GETSOCK; + else if (protocol == IPPROTO_DCCP) + req.nlh.nlmsg_type = DCCPDIAG_GETSOCK; + else + return -1; + + if (show_mem) { + req.r.idiag_ext |= (1<<(INET_DIAG_MEMINFO-1)); + req.r.idiag_ext |= (1<<(INET_DIAG_SKMEMINFO-1)); + } + + if (show_tcpinfo) { + req.r.idiag_ext |= (1<<(INET_DIAG_INFO-1)); + req.r.idiag_ext |= (1<<(INET_DIAG_VEGASINFO-1)); + req.r.idiag_ext |= (1<<(INET_DIAG_CONG-1)); + } + + if (show_tos) { + req.r.idiag_ext |= (1<<(INET_DIAG_TOS-1)); + req.r.idiag_ext |= (1<<(INET_DIAG_TCLASS-1)); + } + + iov[0] = (struct iovec){ + .iov_base = &req, + .iov_len = sizeof(req) + }; + if (f->f) { + bclen = ssfilter_bytecompile(f->f, &bc); + if (bclen) { + rta.rta_type = INET_DIAG_REQ_BYTECODE; + rta.rta_len = RTA_LENGTH(bclen); + iov[1] = (struct iovec){ &rta, sizeof(rta) }; + iov[2] = (struct iovec){ bc, bclen }; + req.nlh.nlmsg_len += RTA_LENGTH(bclen); + iovlen = 3; + } + } + + msg = (struct msghdr) { + .msg_name = (void *)&nladdr, + .msg_namelen = sizeof(nladdr), + .msg_iov = iov, + .msg_iovlen = iovlen, + }; + + if (sendmsg(fd, &msg, 0) < 0) { + close(fd); + return -1; + } + + return 0; +} + +static int sockdiag_send(int family, int fd, int protocol, struct filter *f) +{ + struct sockaddr_nl nladdr = { .nl_family = AF_NETLINK }; + DIAG_REQUEST(req, struct inet_diag_req_v2 r); + char *bc = NULL; + int bclen; + __u32 proto; + struct msghdr msg; + struct rtattr rta_bc; + struct rtattr rta_proto; + struct iovec iov[5]; + int iovlen = 1; + + if (family == PF_UNSPEC) + return tcpdiag_send(fd, protocol, f); + + memset(&req.r, 0, sizeof(req.r)); + req.r.sdiag_family = family; + req.r.sdiag_protocol = protocol; + req.r.idiag_states = f->states; + if (show_mem) { + req.r.idiag_ext |= (1<<(INET_DIAG_MEMINFO-1)); + req.r.idiag_ext |= (1<<(INET_DIAG_SKMEMINFO-1)); + } + + if (show_tcpinfo) { + req.r.idiag_ext |= (1<<(INET_DIAG_INFO-1)); + req.r.idiag_ext |= (1<<(INET_DIAG_VEGASINFO-1)); + req.r.idiag_ext |= (1<<(INET_DIAG_CONG-1)); + } + + if (show_tos) { + req.r.idiag_ext |= (1<<(INET_DIAG_TOS-1)); + req.r.idiag_ext |= (1<<(INET_DIAG_TCLASS-1)); + } + + iov[0] = (struct iovec){ + .iov_base = &req, + .iov_len = sizeof(req) + }; + if (f->f) { + bclen = ssfilter_bytecompile(f->f, &bc); + if (bclen) { + rta_bc.rta_type = INET_DIAG_REQ_BYTECODE; + rta_bc.rta_len = RTA_LENGTH(bclen); + iov[1] = (struct iovec){ &rta_bc, sizeof(rta_bc) }; + iov[2] = (struct iovec){ bc, bclen }; + req.nlh.nlmsg_len += RTA_LENGTH(bclen); + iovlen = 3; + } + } + + /* put extended protocol attribute, if required */ + if (protocol > 255) { + rta_proto.rta_type = INET_DIAG_REQ_PROTOCOL; + rta_proto.rta_len = RTA_LENGTH(sizeof(proto)); + proto = protocol; + iov[iovlen] = (struct iovec){ &rta_proto, sizeof(rta_proto) }; + iov[iovlen + 1] = (struct iovec){ &proto, sizeof(proto) }; + req.nlh.nlmsg_len += RTA_LENGTH(sizeof(proto)); + iovlen += 2; + } + + msg = (struct msghdr) { + .msg_name = (void *)&nladdr, + .msg_namelen = sizeof(nladdr), + .msg_iov = iov, + .msg_iovlen = iovlen, + }; + + if (sendmsg(fd, &msg, 0) < 0) { + close(fd); + return -1; + } + + return 0; +} + +struct inet_diag_arg { + struct filter *f; + int protocol; + struct rtnl_handle *rth; +}; + +static int kill_inet_sock(struct nlmsghdr *h, void *arg, struct sockstat *s) +{ + struct inet_diag_msg *d = NLMSG_DATA(h); + struct inet_diag_arg *diag_arg = arg; + struct rtnl_handle *rth = diag_arg->rth; + + DIAG_REQUEST(req, struct inet_diag_req_v2 r); + + req.nlh.nlmsg_type = SOCK_DESTROY; + req.nlh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; + req.nlh.nlmsg_seq = ++rth->seq; + req.r.sdiag_family = d->idiag_family; + req.r.sdiag_protocol = diag_arg->protocol; + req.r.id = d->id; + + if (diag_arg->protocol == IPPROTO_RAW) { + struct inet_diag_req_raw *raw = (void *)&req.r; + + BUILD_BUG_ON(sizeof(req.r) != sizeof(*raw)); + raw->sdiag_raw_protocol = s->raw_prot; + } + + return rtnl_talk(rth, &req.nlh, NULL); +} + +static int show_one_inet_sock(struct nlmsghdr *h, void *arg) +{ + int err; + struct inet_diag_arg *diag_arg = arg; + struct inet_diag_msg *r = NLMSG_DATA(h); + struct sockstat s = {}; + + if (!(diag_arg->f->families & FAMILY_MASK(r->idiag_family))) + return 0; + + parse_diag_msg(h, &s); + s.type = diag_arg->protocol; + + if (diag_arg->f->f && run_ssfilter(diag_arg->f->f, &s) == 0) + return 0; + + if (diag_arg->f->kill && kill_inet_sock(h, arg, &s) != 0) { + if (errno == EOPNOTSUPP || errno == ENOENT) { + /* Socket can't be closed, or is already closed. */ + return 0; + } else { + perror("SOCK_DESTROY answers"); + return -1; + } + } + + err = inet_show_sock(h, &s); + if (err < 0) + return err; + + return 0; +} + +static int inet_show_netlink(struct filter *f, FILE *dump_fp, int protocol) +{ + int err = 0; + struct rtnl_handle rth, rth2; + int family = PF_INET; + struct inet_diag_arg arg = { .f = f, .protocol = protocol }; + + if (rtnl_open_byproto(&rth, 0, NETLINK_SOCK_DIAG)) + return -1; + + if (f->kill) { + if (rtnl_open_byproto(&rth2, 0, NETLINK_SOCK_DIAG)) { + rtnl_close(&rth); + return -1; + } + arg.rth = &rth2; + } + + rth.dump = MAGIC_SEQ; + rth.dump_fp = dump_fp; + if (preferred_family == PF_INET6) + family = PF_INET6; + + /* extended protocol will use INET_DIAG_REQ_PROTOCOL, + * not supported by older kernels. On such kernel + * rtnl_dump will bail with rtnl_dump_error(). + * Suppress the error to avoid confusing the user + */ + if (protocol > 255) + rth.flags |= RTNL_HANDLE_F_SUPPRESS_NLERR; + +again: + if ((err = sockdiag_send(family, rth.fd, protocol, f))) + goto Exit; + + if ((err = rtnl_dump_filter(&rth, show_one_inet_sock, &arg))) { + if (family != PF_UNSPEC) { + family = PF_UNSPEC; + goto again; + } + goto Exit; + } + if (family == PF_INET && preferred_family != PF_INET) { + family = PF_INET6; + goto again; + } + +Exit: + rtnl_close(&rth); + if (arg.rth) + rtnl_close(arg.rth); + return err; +} + +static int tcp_show_netlink_file(struct filter *f) +{ + FILE *fp; + char buf[16384]; + int err = -1; + + if ((fp = fopen(getenv("TCPDIAG_FILE"), "r")) == NULL) { + perror("fopen($TCPDIAG_FILE)"); + return err; + } + + while (1) { + int err2; + size_t status, nitems; + struct nlmsghdr *h = (struct nlmsghdr *)buf; + struct sockstat s = {}; + + status = fread(buf, 1, sizeof(*h), fp); + if (status != sizeof(*h)) { + if (ferror(fp)) + perror("Reading header from $TCPDIAG_FILE"); + if (feof(fp)) + fprintf(stderr, "Unexpected EOF reading $TCPDIAG_FILE"); + break; + } + + nitems = NLMSG_ALIGN(h->nlmsg_len - sizeof(*h)); + status = fread(h+1, 1, nitems, fp); + + if (status != nitems) { + if (ferror(fp)) + perror("Reading $TCPDIAG_FILE"); + if (feof(fp)) + fprintf(stderr, "Unexpected EOF reading $TCPDIAG_FILE"); + break; + } + + /* The only legal exit point */ + if (h->nlmsg_type == NLMSG_DONE) { + err = 0; + break; + } + + if (h->nlmsg_type == NLMSG_ERROR) { + struct nlmsgerr *err = (struct nlmsgerr *)NLMSG_DATA(h); + + if (h->nlmsg_len < NLMSG_LENGTH(sizeof(struct nlmsgerr))) { + fprintf(stderr, "ERROR truncated\n"); + } else { + errno = -err->error; + perror("TCPDIAG answered"); + } + break; + } + + parse_diag_msg(h, &s); + s.type = IPPROTO_TCP; + + if (f && f->f && run_ssfilter(f->f, &s) == 0) + continue; + + err2 = inet_show_sock(h, &s); + if (err2 < 0) { + err = err2; + break; + } + } + + fclose(fp); + return err; +} + +static int tcp_show(struct filter *f) +{ + FILE *fp = NULL; + char *buf = NULL; + int bufsize = 1024*1024; + + if (!filter_af_get(f, AF_INET) && !filter_af_get(f, AF_INET6)) + return 0; + + dg_proto = TCP_PROTO; + + if (getenv("TCPDIAG_FILE")) + return tcp_show_netlink_file(f); + + if (!getenv("PROC_NET_TCP") && !getenv("PROC_ROOT") + && inet_show_netlink(f, NULL, IPPROTO_TCP) == 0) + return 0; + + /* Sigh... We have to parse /proc/net/tcp... */ + while (bufsize >= 64*1024) { + if ((buf = malloc(bufsize)) != NULL) + break; + bufsize /= 2; + } + if (buf == NULL) { + errno = ENOMEM; + return -1; + } + + if (f->families & FAMILY_MASK(AF_INET)) { + if ((fp = net_tcp_open()) == NULL) + goto outerr; + + setbuffer(fp, buf, bufsize); + if (generic_record_read(fp, tcp_show_line, f, AF_INET)) + goto outerr; + fclose(fp); + } + + if ((f->families & FAMILY_MASK(AF_INET6)) && + (fp = net_tcp6_open()) != NULL) { + setbuffer(fp, buf, bufsize); + if (generic_record_read(fp, tcp_show_line, f, AF_INET6)) + goto outerr; + fclose(fp); + } + + free(buf); + return 0; + +outerr: + do { + int saved_errno = errno; + + free(buf); + if (fp) + fclose(fp); + errno = saved_errno; + return -1; + } while (0); +} + +static int mptcp_show(struct filter *f) +{ + if (!filter_af_get(f, AF_INET) && !filter_af_get(f, AF_INET6)) + return 0; + + if (!getenv("PROC_NET_MPTCP") && !getenv("PROC_ROOT") + && inet_show_netlink(f, NULL, IPPROTO_MPTCP) == 0) + return 0; + + return 0; +} + +static int dccp_show(struct filter *f) +{ + if (!filter_af_get(f, AF_INET) && !filter_af_get(f, AF_INET6)) + return 0; + + if (!getenv("PROC_NET_DCCP") && !getenv("PROC_ROOT") + && inet_show_netlink(f, NULL, IPPROTO_DCCP) == 0) + return 0; + + return 0; +} + +static int sctp_show(struct filter *f) +{ + if (!filter_af_get(f, AF_INET) && !filter_af_get(f, AF_INET6)) + return 0; + + if (!getenv("PROC_NET_SCTP") && !getenv("PROC_ROOT") + && inet_show_netlink(f, NULL, IPPROTO_SCTP) == 0) + return 0; + + return 0; +} + +static int dgram_show_line(char *line, const struct filter *f, int family) +{ + struct sockstat s = {}; + char *loc, *rem, *data; + char opt[256]; + int n; + + if (proc_inet_split_line(line, &loc, &rem, &data)) + return -1; + + int state = (data[1] >= 'A') ? (data[1] - 'A' + 10) : (data[1] - '0'); + + if (!(f->states & (1 << state))) + return 0; + + proc_parse_inet_addr(loc, rem, family, &s); + + if (f->f && run_ssfilter(f->f, &s) == 0) + return 0; + + opt[0] = 0; + n = sscanf(data, "%x %x:%x %*x:%*x %*x %d %*d %u %d %llx %[^\n]\n", + &s.state, &s.wq, &s.rq, + &s.uid, &s.ino, + &s.refcnt, &s.sk, opt); + + if (n < 9) + opt[0] = 0; + + s.type = dg_proto == UDP_PROTO ? IPPROTO_UDP : 0; + inet_stats_print(&s, false); + + if (show_details && opt[0]) + out(" opt:\"%s\"", opt); + + return 0; +} + +static int udp_show(struct filter *f) +{ + FILE *fp = NULL; + + if (!filter_af_get(f, AF_INET) && !filter_af_get(f, AF_INET6)) + return 0; + + dg_proto = UDP_PROTO; + + if (!getenv("PROC_NET_UDP") && !getenv("PROC_ROOT") + && inet_show_netlink(f, NULL, IPPROTO_UDP) == 0) + return 0; + + if (f->families&FAMILY_MASK(AF_INET)) { + if ((fp = net_udp_open()) == NULL) + goto outerr; + if (generic_record_read(fp, dgram_show_line, f, AF_INET)) + goto outerr; + fclose(fp); + } + + if ((f->families&FAMILY_MASK(AF_INET6)) && + (fp = net_udp6_open()) != NULL) { + if (generic_record_read(fp, dgram_show_line, f, AF_INET6)) + goto outerr; + fclose(fp); + } + return 0; + +outerr: + do { + int saved_errno = errno; + + if (fp) + fclose(fp); + errno = saved_errno; + return -1; + } while (0); +} + +static int raw_show(struct filter *f) +{ + FILE *fp = NULL; + + if (!filter_af_get(f, AF_INET) && !filter_af_get(f, AF_INET6)) + return 0; + + dg_proto = RAW_PROTO; + + if (!getenv("PROC_NET_RAW") && !getenv("PROC_ROOT") && + inet_show_netlink(f, NULL, IPPROTO_RAW) == 0) + return 0; + + if (f->families&FAMILY_MASK(AF_INET)) { + if ((fp = net_raw_open()) == NULL) + goto outerr; + if (generic_record_read(fp, dgram_show_line, f, AF_INET)) + goto outerr; + fclose(fp); + } + + if ((f->families&FAMILY_MASK(AF_INET6)) && + (fp = net_raw6_open()) != NULL) { + if (generic_record_read(fp, dgram_show_line, f, AF_INET6)) + goto outerr; + fclose(fp); + } + return 0; + +outerr: + do { + int saved_errno = errno; + + if (fp) + fclose(fp); + errno = saved_errno; + return -1; + } while (0); +} + +#define MAX_UNIX_REMEMBER (1024*1024/sizeof(struct sockstat)) + +static void unix_list_drop_first(struct sockstat **list) +{ + struct sockstat *s = *list; + + (*list) = (*list)->next; + free(s->name); + free(s); +} + +static bool unix_type_skip(struct sockstat *s, struct filter *f) +{ + if (s->type == SOCK_STREAM && !(f->dbs&(1<<UNIX_ST_DB))) + return true; + if (s->type == SOCK_DGRAM && !(f->dbs&(1<<UNIX_DG_DB))) + return true; + if (s->type == SOCK_SEQPACKET && !(f->dbs&(1<<UNIX_SQ_DB))) + return true; + return false; +} + +static void unix_stats_print(struct sockstat *s, struct filter *f) +{ + char port_name[30] = {}; + + sock_state_print(s); + + sock_addr_print(s->name ?: "*", " ", + uint_to_str(s->lport, port_name), NULL); + sock_addr_print(s->peer_name ?: "*", " ", + uint_to_str(s->rport, port_name), NULL); + + proc_ctx_print(s); +} + +static int unix_show_sock(struct nlmsghdr *nlh, void *arg) +{ + struct filter *f = (struct filter *)arg; + struct unix_diag_msg *r = NLMSG_DATA(nlh); + struct rtattr *tb[UNIX_DIAG_MAX+1]; + char name[128]; + struct sockstat stat = { .name = "*", .peer_name = "*" }; + + parse_rtattr(tb, UNIX_DIAG_MAX, (struct rtattr *)(r+1), + nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*r))); + + stat.type = r->udiag_type; + stat.state = r->udiag_state; + stat.ino = stat.lport = r->udiag_ino; + stat.local.family = stat.remote.family = AF_UNIX; + + if (unix_type_skip(&stat, f)) + return 0; + + if (tb[UNIX_DIAG_RQLEN]) { + struct unix_diag_rqlen *rql = RTA_DATA(tb[UNIX_DIAG_RQLEN]); + + stat.rq = rql->udiag_rqueue; + stat.wq = rql->udiag_wqueue; + } + if (tb[UNIX_DIAG_NAME]) { + int len = RTA_PAYLOAD(tb[UNIX_DIAG_NAME]); + + memcpy(name, RTA_DATA(tb[UNIX_DIAG_NAME]), len); + name[len] = '\0'; + if (name[0] == '\0') { + int i; + for (i = 0; i < len; i++) + if (name[i] == '\0') + name[i] = '@'; + } + stat.name = &name[0]; + memcpy(stat.local.data, &stat.name, sizeof(stat.name)); + } + if (tb[UNIX_DIAG_PEER]) + stat.rport = rta_getattr_u32(tb[UNIX_DIAG_PEER]); + + if (f->f && run_ssfilter(f->f, &stat) == 0) + return 0; + + unix_stats_print(&stat, f); + + if (show_mem) + print_skmeminfo(tb, UNIX_DIAG_MEMINFO); + if (show_details) { + if (tb[UNIX_DIAG_SHUTDOWN]) { + unsigned char mask; + + mask = rta_getattr_u8(tb[UNIX_DIAG_SHUTDOWN]); + out(" %c-%c", + mask & 1 ? '-' : '<', mask & 2 ? '-' : '>'); + } + if (tb[UNIX_DIAG_VFS]) { + struct unix_diag_vfs *uv = RTA_DATA(tb[UNIX_DIAG_VFS]); + + out(" ino:%u dev:%u/%u", uv->udiag_vfs_ino, major(uv->udiag_vfs_dev), + minor(uv->udiag_vfs_dev)); + } + if (tb[UNIX_DIAG_ICONS]) { + int len = RTA_PAYLOAD(tb[UNIX_DIAG_ICONS]); + __u32 *peers = RTA_DATA(tb[UNIX_DIAG_ICONS]); + int i; + + out(" peers:"); + for (i = 0; i < len / sizeof(__u32); i++) + out(" %u", peers[i]); + } + } + + return 0; +} + +static int handle_netlink_request(struct filter *f, struct nlmsghdr *req, + size_t size, rtnl_filter_t show_one_sock) +{ + int ret = -1; + struct rtnl_handle rth; + + if (rtnl_open_byproto(&rth, 0, NETLINK_SOCK_DIAG)) + return -1; + + rth.dump = MAGIC_SEQ; + + if (rtnl_send(&rth, req, size) < 0) + goto Exit; + + if (rtnl_dump_filter(&rth, show_one_sock, f)) + goto Exit; + + ret = 0; +Exit: + rtnl_close(&rth); + return ret; +} + +static int unix_show_netlink(struct filter *f) +{ + DIAG_REQUEST(req, struct unix_diag_req r); + + req.r.sdiag_family = AF_UNIX; + req.r.udiag_states = f->states; + req.r.udiag_show = UDIAG_SHOW_NAME | UDIAG_SHOW_PEER | UDIAG_SHOW_RQLEN; + if (show_mem) + req.r.udiag_show |= UDIAG_SHOW_MEMINFO; + if (show_details) + req.r.udiag_show |= UDIAG_SHOW_VFS | UDIAG_SHOW_ICONS; + + return handle_netlink_request(f, &req.nlh, sizeof(req), unix_show_sock); +} + +static int unix_show(struct filter *f) +{ + FILE *fp; + char buf[256]; + char name[128]; + int newformat = 0; + int cnt; + struct sockstat *list = NULL; + const int unix_state_map[] = { SS_CLOSE, SS_SYN_SENT, + SS_ESTABLISHED, SS_CLOSING }; + + if (!filter_af_get(f, AF_UNIX)) + return 0; + + if (!getenv("PROC_NET_UNIX") && !getenv("PROC_ROOT") + && unix_show_netlink(f) == 0) + return 0; + + if ((fp = net_unix_open()) == NULL) + return -1; + if (!fgets(buf, sizeof(buf), fp)) { + fclose(fp); + return -1; + } + + if (memcmp(buf, "Peer", 4) == 0) + newformat = 1; + cnt = 0; + + while (fgets(buf, sizeof(buf), fp)) { + struct sockstat *u, **insp; + int flags; + + if (!(u = calloc(1, sizeof(*u)))) + break; + + if (sscanf(buf, "%x: %x %x %x %x %x %d %s", + &u->rport, &u->rq, &u->wq, &flags, &u->type, + &u->state, &u->ino, name) < 8) + name[0] = 0; + + u->lport = u->ino; + u->local.family = u->remote.family = AF_UNIX; + + if (flags & (1 << 16)) { + u->state = SS_LISTEN; + } else if (u->state > 0 && + u->state <= ARRAY_SIZE(unix_state_map)) { + u->state = unix_state_map[u->state-1]; + if (u->type == SOCK_DGRAM && u->state == SS_CLOSE && u->rport) + u->state = SS_ESTABLISHED; + } + if (unix_type_skip(u, f) || + !(f->states & (1 << u->state))) { + free(u); + continue; + } + + if (!newformat) { + u->rport = 0; + u->rq = 0; + u->wq = 0; + } + + if (name[0]) { + u->name = strdup(name); + if (!u->name) { + free(u); + break; + } + } + + if (u->rport) { + struct sockstat *p; + + for (p = list; p; p = p->next) { + if (u->rport == p->lport) + break; + } + if (!p) + u->peer_name = "?"; + else + u->peer_name = p->name ? : "*"; + } + + if (f->f) { + struct sockstat st = { + .local.family = AF_UNIX, + .remote.family = AF_UNIX, + }; + + memcpy(st.local.data, &u->name, sizeof(u->name)); + /* when parsing the old format rport is set to 0 and + * therefore peer_name remains NULL + */ + if (u->peer_name && strcmp(u->peer_name, "*")) + memcpy(st.remote.data, &u->peer_name, + sizeof(u->peer_name)); + if (run_ssfilter(f->f, &st) == 0) { + free(u->name); + free(u); + continue; + } + } + + insp = &list; + while (*insp) { + if (u->type < (*insp)->type || + (u->type == (*insp)->type && + u->ino < (*insp)->ino)) + break; + insp = &(*insp)->next; + } + u->next = *insp; + *insp = u; + + if (++cnt > MAX_UNIX_REMEMBER) { + while (list) { + unix_stats_print(list, f); + unix_list_drop_first(&list); + } + cnt = 0; + } + } + fclose(fp); + while (list) { + unix_stats_print(list, f); + unix_list_drop_first(&list); + } + + return 0; +} + +static int packet_stats_print(struct sockstat *s, const struct filter *f) +{ + const char *addr, *port; + char ll_name[16]; + + s->local.family = s->remote.family = AF_PACKET; + + if (f->f) { + s->local.data[0] = s->prot; + if (run_ssfilter(f->f, s) == 0) + return 1; + } + + sock_state_print(s); + + if (s->prot == 3) + addr = "*"; + else + addr = ll_proto_n2a(htons(s->prot), ll_name, sizeof(ll_name)); + + if (s->iface == 0) + port = "*"; + else + port = xll_index_to_name(s->iface); + + sock_addr_print(addr, ":", port, NULL); + sock_addr_print("", "*", "", NULL); + + proc_ctx_print(s); + + if (show_details) + sock_details_print(s); + + return 0; +} + +static void packet_show_ring(struct packet_diag_ring *ring) +{ + out("blk_size:%d", ring->pdr_block_size); + out(",blk_nr:%d", ring->pdr_block_nr); + out(",frm_size:%d", ring->pdr_frame_size); + out(",frm_nr:%d", ring->pdr_frame_nr); + out(",tmo:%d", ring->pdr_retire_tmo); + out(",features:0x%x", ring->pdr_features); +} + +static int packet_show_sock(struct nlmsghdr *nlh, void *arg) +{ + const struct filter *f = arg; + struct packet_diag_msg *r = NLMSG_DATA(nlh); + struct packet_diag_info *pinfo = NULL; + struct packet_diag_ring *ring_rx = NULL, *ring_tx = NULL; + struct rtattr *tb[PACKET_DIAG_MAX+1]; + struct sockstat stat = {}; + uint32_t fanout = 0; + bool has_fanout = false; + + parse_rtattr(tb, PACKET_DIAG_MAX, (struct rtattr *)(r+1), + nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*r))); + + /* use /proc/net/packet if all info are not available */ + if (!tb[PACKET_DIAG_MEMINFO]) + return -1; + + stat.type = r->pdiag_type; + stat.prot = r->pdiag_num; + stat.ino = r->pdiag_ino; + stat.state = SS_CLOSE; + stat.sk = cookie_sk_get(&r->pdiag_cookie[0]); + + if (tb[PACKET_DIAG_MEMINFO]) { + __u32 *skmeminfo = RTA_DATA(tb[PACKET_DIAG_MEMINFO]); + + stat.rq = skmeminfo[SK_MEMINFO_RMEM_ALLOC]; + } + + if (tb[PACKET_DIAG_INFO]) { + pinfo = RTA_DATA(tb[PACKET_DIAG_INFO]); + stat.lport = stat.iface = pinfo->pdi_index; + } + + if (tb[PACKET_DIAG_UID]) + stat.uid = rta_getattr_u32(tb[PACKET_DIAG_UID]); + + if (tb[PACKET_DIAG_RX_RING]) + ring_rx = RTA_DATA(tb[PACKET_DIAG_RX_RING]); + + if (tb[PACKET_DIAG_TX_RING]) + ring_tx = RTA_DATA(tb[PACKET_DIAG_TX_RING]); + + if (tb[PACKET_DIAG_FANOUT]) { + has_fanout = true; + fanout = rta_getattr_u32(tb[PACKET_DIAG_FANOUT]); + } + + if (packet_stats_print(&stat, f)) + return 0; + + if (show_details) { + if (pinfo) { + if (oneline) + out(" ver:%d", pinfo->pdi_version); + else + out("\n\tver:%d", pinfo->pdi_version); + out(" cpy_thresh:%d", pinfo->pdi_copy_thresh); + out(" flags( "); + if (pinfo->pdi_flags & PDI_RUNNING) + out("running"); + if (pinfo->pdi_flags & PDI_AUXDATA) + out(" auxdata"); + if (pinfo->pdi_flags & PDI_ORIGDEV) + out(" origdev"); + if (pinfo->pdi_flags & PDI_VNETHDR) + out(" vnethdr"); + if (pinfo->pdi_flags & PDI_LOSS) + out(" loss"); + if (!pinfo->pdi_flags) + out("0"); + out(" )"); + } + if (ring_rx) { + if (oneline) + out(" ring_rx("); + else + out("\n\tring_rx("); + packet_show_ring(ring_rx); + out(")"); + } + if (ring_tx) { + if (oneline) + out(" ring_tx("); + else + out("\n\tring_tx("); + packet_show_ring(ring_tx); + out(")"); + } + if (has_fanout) { + uint16_t type = (fanout >> 16) & 0xffff; + + if (oneline) + out(" fanout("); + else + out("\n\tfanout("); + out("id:%d,", fanout & 0xffff); + out("type:"); + + if (type == 0) + out("hash"); + else if (type == 1) + out("lb"); + else if (type == 2) + out("cpu"); + else if (type == 3) + out("roll"); + else if (type == 4) + out("random"); + else if (type == 5) + out("qm"); + else + out("0x%x", type); + + out(")"); + } + } + + if (show_bpf && tb[PACKET_DIAG_FILTER]) { + struct sock_filter *fil = + RTA_DATA(tb[PACKET_DIAG_FILTER]); + int num = RTA_PAYLOAD(tb[PACKET_DIAG_FILTER]) / + sizeof(struct sock_filter); + + if (oneline) + out(" bpf filter (%d): ", num); + else + out("\n\tbpf filter (%d): ", num); + while (num) { + out(" 0x%02x %u %u %u,", + fil->code, fil->jt, fil->jf, fil->k); + num--; + fil++; + } + } + + if (show_mem) + print_skmeminfo(tb, PACKET_DIAG_MEMINFO); + return 0; +} + +static int packet_show_netlink(struct filter *f) +{ + DIAG_REQUEST(req, struct packet_diag_req r); + + req.r.sdiag_family = AF_PACKET; + req.r.pdiag_show = PACKET_SHOW_INFO | PACKET_SHOW_MEMINFO | + PACKET_SHOW_FILTER | PACKET_SHOW_RING_CFG | PACKET_SHOW_FANOUT; + + return handle_netlink_request(f, &req.nlh, sizeof(req), packet_show_sock); +} + +static int packet_show_line(char *buf, const struct filter *f, int fam) +{ + unsigned long long sk; + struct sockstat stat = {}; + int type, prot, iface, state, rq, uid, ino; + + sscanf(buf, "%llx %*d %d %x %d %d %u %u %u", + &sk, + &type, &prot, &iface, &state, + &rq, &uid, &ino); + + if (type == SOCK_RAW && !(f->dbs & (1<<PACKET_R_DB))) + return 0; + if (type == SOCK_DGRAM && !(f->dbs & (1<<PACKET_DG_DB))) + return 0; + + stat.type = type; + stat.prot = prot; + stat.lport = stat.iface = iface; + stat.state = state; + stat.rq = rq; + stat.uid = uid; + stat.ino = ino; + stat.state = SS_CLOSE; + + if (packet_stats_print(&stat, f)) + return 0; + + return 0; +} + +static int packet_show(struct filter *f) +{ + FILE *fp; + int rc = 0; + + if (!filter_af_get(f, AF_PACKET) || !(f->states & (1 << SS_CLOSE))) + return 0; + + if (!getenv("PROC_NET_PACKET") && !getenv("PROC_ROOT") && + packet_show_netlink(f) == 0) + return 0; + + if ((fp = net_packet_open()) == NULL) + return -1; + if (generic_record_read(fp, packet_show_line, f, AF_PACKET)) + rc = -1; + + fclose(fp); + return rc; +} + +static int xdp_stats_print(struct sockstat *s, const struct filter *f) +{ + const char *addr, *port; + char q_str[16]; + + s->local.family = s->remote.family = AF_XDP; + + if (f->f) { + if (run_ssfilter(f->f, s) == 0) + return 1; + } + + sock_state_print(s); + + if (s->iface) { + addr = xll_index_to_name(s->iface); + snprintf(q_str, sizeof(q_str), "q%d", s->lport); + port = q_str; + sock_addr_print(addr, ":", port, NULL); + } else { + sock_addr_print("", "*", "", NULL); + } + + sock_addr_print("", "*", "", NULL); + + proc_ctx_print(s); + + if (show_details) + sock_details_print(s); + + return 0; +} + +static void xdp_show_ring(const char *name, struct xdp_diag_ring *ring) +{ + if (oneline) + out(" %s(", name); + else + out("\n\t%s(", name); + out("entries:%u", ring->entries); + out(")"); +} + +static void xdp_show_umem(struct xdp_diag_umem *umem, struct xdp_diag_ring *fr, + struct xdp_diag_ring *cr) +{ + if (oneline) + out(" tumem("); + else + out("\n\tumem("); + out("id:%u", umem->id); + out(",size:%llu", umem->size); + out(",num_pages:%u", umem->num_pages); + out(",chunk_size:%u", umem->chunk_size); + out(",headroom:%u", umem->headroom); + out(",ifindex:%u", umem->ifindex); + out(",qid:%u", umem->queue_id); + out(",zc:%u", umem->flags & XDP_DU_F_ZEROCOPY); + out(",refs:%u", umem->refs); + out(")"); + + if (fr) + xdp_show_ring("fr", fr); + if (cr) + xdp_show_ring("cr", cr); +} + +static void xdp_show_stats(struct xdp_diag_stats *stats) +{ + if (oneline) + out(" stats("); + else + out("\n\tstats("); + out("rx dropped:%llu", stats->n_rx_dropped); + out(",rx invalid:%llu", stats->n_rx_invalid); + out(",rx queue full:%llu", stats->n_rx_full); + out(",rx fill ring empty:%llu", stats->n_fill_ring_empty); + out(",tx invalid:%llu", stats->n_tx_invalid); + out(",tx ring empty:%llu", stats->n_tx_ring_empty); + out(")"); +} + +static int xdp_show_sock(struct nlmsghdr *nlh, void *arg) +{ + struct xdp_diag_ring *rx = NULL, *tx = NULL, *fr = NULL, *cr = NULL; + struct xdp_diag_msg *msg = NLMSG_DATA(nlh); + struct rtattr *tb[XDP_DIAG_MAX + 1]; + struct xdp_diag_info *info = NULL; + struct xdp_diag_umem *umem = NULL; + struct xdp_diag_stats *stats = NULL; + const struct filter *f = arg; + struct sockstat stat = {}; + + parse_rtattr(tb, XDP_DIAG_MAX, (struct rtattr *)(msg + 1), + nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*msg))); + + stat.type = msg->xdiag_type; + stat.ino = msg->xdiag_ino; + stat.state = SS_CLOSE; + stat.sk = cookie_sk_get(&msg->xdiag_cookie[0]); + + if (tb[XDP_DIAG_INFO]) { + info = RTA_DATA(tb[XDP_DIAG_INFO]); + stat.iface = info->ifindex; + stat.lport = info->queue_id; + } + + if (tb[XDP_DIAG_UID]) + stat.uid = rta_getattr_u32(tb[XDP_DIAG_UID]); + if (tb[XDP_DIAG_RX_RING]) + rx = RTA_DATA(tb[XDP_DIAG_RX_RING]); + if (tb[XDP_DIAG_TX_RING]) + tx = RTA_DATA(tb[XDP_DIAG_TX_RING]); + if (tb[XDP_DIAG_UMEM]) + umem = RTA_DATA(tb[XDP_DIAG_UMEM]); + if (tb[XDP_DIAG_UMEM_FILL_RING]) + fr = RTA_DATA(tb[XDP_DIAG_UMEM_FILL_RING]); + if (tb[XDP_DIAG_UMEM_COMPLETION_RING]) + cr = RTA_DATA(tb[XDP_DIAG_UMEM_COMPLETION_RING]); + if (tb[XDP_DIAG_MEMINFO]) { + __u32 *skmeminfo = RTA_DATA(tb[XDP_DIAG_MEMINFO]); + + stat.rq = skmeminfo[SK_MEMINFO_RMEM_ALLOC]; + } + if (tb[XDP_DIAG_STATS]) + stats = RTA_DATA(tb[XDP_DIAG_STATS]); + + if (xdp_stats_print(&stat, f)) + return 0; + + if (show_details) { + if (rx) + xdp_show_ring("rx", rx); + if (tx) + xdp_show_ring("tx", tx); + if (umem) + xdp_show_umem(umem, fr, cr); + if (stats) + xdp_show_stats(stats); + } + + if (show_mem) + print_skmeminfo(tb, XDP_DIAG_MEMINFO); // really? + + + return 0; +} + +static int xdp_show(struct filter *f) +{ + DIAG_REQUEST(req, struct xdp_diag_req r); + + if (!filter_af_get(f, AF_XDP) || !(f->states & (1 << SS_CLOSE))) + return 0; + + req.r.sdiag_family = AF_XDP; + req.r.xdiag_show = XDP_SHOW_INFO | XDP_SHOW_RING_CFG | XDP_SHOW_UMEM | + XDP_SHOW_MEMINFO | XDP_SHOW_STATS; + + return handle_netlink_request(f, &req.nlh, sizeof(req), xdp_show_sock); +} + +static int netlink_show_one(struct filter *f, + int prot, int pid, unsigned int groups, + int state, int dst_pid, unsigned int dst_group, + int rq, int wq, + unsigned long long sk, unsigned long long cb) +{ + struct sockstat st = { + .state = SS_CLOSE, + .rq = rq, + .wq = wq, + .local.family = AF_NETLINK, + .remote.family = AF_NETLINK, + }; + + SPRINT_BUF(prot_buf) = {}; + const char *prot_name; + char procname[64] = {}; + + if (f->f) { + st.rport = -1; + st.lport = pid; + st.local.data[0] = prot; + if (run_ssfilter(f->f, &st) == 0) + return 1; + } + + sock_state_print(&st); + + prot_name = nl_proto_n2a(prot, prot_buf, sizeof(prot_buf)); + + if (pid == -1) { + procname[0] = '*'; + } else if (!numeric) { + int done = 0; + + if (!pid) { + done = 1; + strncpy(procname, "kernel", 7); + } else if (pid > 0) { + FILE *fp; + + snprintf(procname, sizeof(procname), "%s/%d/stat", + getenv("PROC_ROOT") ? : "/proc", pid); + if ((fp = fopen(procname, "r")) != NULL) { + if (fscanf(fp, "%*d (%[^)])", procname) == 1) { + snprintf(procname+strlen(procname), + sizeof(procname)-strlen(procname), + "/%d", pid); + done = 1; + } + fclose(fp); + } + } + if (!done) + int_to_str(pid, procname); + } else { + int_to_str(pid, procname); + } + + sock_addr_print(prot_name, ":", procname, NULL); + + if (state == NETLINK_CONNECTED) { + char dst_group_buf[30]; + char dst_pid_buf[30]; + + sock_addr_print(int_to_str(dst_group, dst_group_buf), ":", + int_to_str(dst_pid, dst_pid_buf), NULL); + } else { + sock_addr_print("", "*", "", NULL); + } + + char *pid_context = NULL; + + if (show_proc_ctx) { + /* The pid value will either be: + * 0 if destination kernel - show kernel initial context. + * A valid process pid - use getpidcon. + * A unique value allocated by the kernel or netlink user + * to the process - show context as "not available". + */ + if (!pid) + security_get_initial_context("kernel", &pid_context); + else if (pid > 0) + getpidcon(pid, &pid_context); + + out(" proc_ctx=%s", pid_context ? : "unavailable"); + freecon(pid_context); + } + + if (show_details) { + out(" sk=%llx cb=%llx groups=0x%08x", sk, cb, groups); + } + + return 0; +} + +static int netlink_show_sock(struct nlmsghdr *nlh, void *arg) +{ + struct filter *f = (struct filter *)arg; + struct netlink_diag_msg *r = NLMSG_DATA(nlh); + struct rtattr *tb[NETLINK_DIAG_MAX+1]; + int rq = 0, wq = 0; + unsigned long groups = 0; + + parse_rtattr(tb, NETLINK_DIAG_MAX, (struct rtattr *)(r+1), + nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*r))); + + if (tb[NETLINK_DIAG_GROUPS] && RTA_PAYLOAD(tb[NETLINK_DIAG_GROUPS])) + groups = *(unsigned long *) RTA_DATA(tb[NETLINK_DIAG_GROUPS]); + + if (tb[NETLINK_DIAG_MEMINFO]) { + const __u32 *skmeminfo; + + skmeminfo = RTA_DATA(tb[NETLINK_DIAG_MEMINFO]); + + rq = skmeminfo[SK_MEMINFO_RMEM_ALLOC]; + wq = skmeminfo[SK_MEMINFO_WMEM_ALLOC]; + } + + if (netlink_show_one(f, r->ndiag_protocol, r->ndiag_portid, groups, + r->ndiag_state, r->ndiag_dst_portid, r->ndiag_dst_group, + rq, wq, 0, 0)) { + return 0; + } + + if (show_mem) { + out("\t"); + print_skmeminfo(tb, NETLINK_DIAG_MEMINFO); + } + + return 0; +} + +static int netlink_show_netlink(struct filter *f) +{ + DIAG_REQUEST(req, struct netlink_diag_req r); + + req.r.sdiag_family = AF_NETLINK; + req.r.sdiag_protocol = NDIAG_PROTO_ALL; + req.r.ndiag_show = NDIAG_SHOW_GROUPS | NDIAG_SHOW_MEMINFO; + + return handle_netlink_request(f, &req.nlh, sizeof(req), netlink_show_sock); +} + +static int netlink_show(struct filter *f) +{ + FILE *fp; + char buf[256]; + int prot, pid; + unsigned int groups; + int rq, wq, rc; + unsigned long long sk, cb; + + if (!filter_af_get(f, AF_NETLINK) || !(f->states & (1 << SS_CLOSE))) + return 0; + + if (!getenv("PROC_NET_NETLINK") && !getenv("PROC_ROOT") && + netlink_show_netlink(f) == 0) + return 0; + + if ((fp = net_netlink_open()) == NULL) + return -1; + if (!fgets(buf, sizeof(buf), fp)) { + fclose(fp); + return -1; + } + + while (fgets(buf, sizeof(buf), fp)) { + sscanf(buf, "%llx %d %d %x %d %d %llx %d", + &sk, + &prot, &pid, &groups, &rq, &wq, &cb, &rc); + + netlink_show_one(f, prot, pid, groups, 0, 0, 0, rq, wq, sk, cb); + } + + fclose(fp); + return 0; +} + +static bool vsock_type_skip(struct sockstat *s, struct filter *f) +{ + if (s->type == SOCK_STREAM && !(f->dbs & (1 << VSOCK_ST_DB))) + return true; + if (s->type == SOCK_DGRAM && !(f->dbs & (1 << VSOCK_DG_DB))) + return true; + return false; +} + +static void vsock_addr_print(inet_prefix *a, __u32 port) +{ + char cid_str[sizeof("4294967295")]; + char port_str[sizeof("4294967295")]; + __u32 cid; + + memcpy(&cid, a->data, sizeof(cid)); + + if (cid == ~(__u32)0) + snprintf(cid_str, sizeof(cid_str), "*"); + else + snprintf(cid_str, sizeof(cid_str), "%u", cid); + + if (port == ~(__u32)0) + snprintf(port_str, sizeof(port_str), "*"); + else + snprintf(port_str, sizeof(port_str), "%u", port); + + sock_addr_print(cid_str, ":", port_str, NULL); +} + +static void vsock_stats_print(struct sockstat *s, struct filter *f) +{ + sock_state_print(s); + + vsock_addr_print(&s->local, s->lport); + vsock_addr_print(&s->remote, s->rport); + + proc_ctx_print(s); +} + +static int vsock_show_sock(struct nlmsghdr *nlh, void *arg) +{ + struct filter *f = (struct filter *)arg; + struct vsock_diag_msg *r = NLMSG_DATA(nlh); + struct sockstat stat = { + .type = r->vdiag_type, + .lport = r->vdiag_src_port, + .rport = r->vdiag_dst_port, + .state = r->vdiag_state, + .ino = r->vdiag_ino, + }; + + vsock_set_inet_prefix(&stat.local, r->vdiag_src_cid); + vsock_set_inet_prefix(&stat.remote, r->vdiag_dst_cid); + + if (vsock_type_skip(&stat, f)) + return 0; + + if (f->f && run_ssfilter(f->f, &stat) == 0) + return 0; + + vsock_stats_print(&stat, f); + + return 0; +} + +static int vsock_show(struct filter *f) +{ + DIAG_REQUEST(req, struct vsock_diag_req r); + + if (!filter_af_get(f, AF_VSOCK)) + return 0; + + req.r.sdiag_family = AF_VSOCK; + req.r.vdiag_states = f->states; + + return handle_netlink_request(f, &req.nlh, sizeof(req), vsock_show_sock); +} + +static void tipc_sock_addr_print(struct rtattr *net_addr, struct rtattr *id) +{ + uint32_t node = rta_getattr_u32(net_addr); + uint32_t identity = rta_getattr_u32(id); + + SPRINT_BUF(addr) = {}; + SPRINT_BUF(port) = {}; + + sprintf(addr, "%u", node); + sprintf(port, "%u", identity); + sock_addr_print(addr, ":", port, NULL); + +} + +static int tipc_show_sock(struct nlmsghdr *nlh, void *arg) +{ + struct rtattr *stat[TIPC_NLA_SOCK_STAT_MAX + 1] = {}; + struct rtattr *attrs[TIPC_NLA_SOCK_MAX + 1] = {}; + struct rtattr *con[TIPC_NLA_CON_MAX + 1] = {}; + struct rtattr *info[TIPC_NLA_MAX + 1] = {}; + struct rtattr *msg_ref; + struct sockstat ss = {}; + + parse_rtattr(info, TIPC_NLA_MAX, NLMSG_DATA(nlh), + NLMSG_PAYLOAD(nlh, 0)); + + if (!info[TIPC_NLA_SOCK]) + return 0; + + msg_ref = info[TIPC_NLA_SOCK]; + parse_rtattr(attrs, TIPC_NLA_SOCK_MAX, RTA_DATA(msg_ref), + RTA_PAYLOAD(msg_ref)); + + msg_ref = attrs[TIPC_NLA_SOCK_STAT]; + parse_rtattr(stat, TIPC_NLA_SOCK_STAT_MAX, + RTA_DATA(msg_ref), RTA_PAYLOAD(msg_ref)); + + + ss.local.family = AF_TIPC; + ss.type = rta_getattr_u32(attrs[TIPC_NLA_SOCK_TYPE]); + ss.state = rta_getattr_u32(attrs[TIPC_NLA_SOCK_TIPC_STATE]); + ss.uid = rta_getattr_u32(attrs[TIPC_NLA_SOCK_UID]); + ss.ino = rta_getattr_u32(attrs[TIPC_NLA_SOCK_INO]); + ss.rq = rta_getattr_u32(stat[TIPC_NLA_SOCK_STAT_RCVQ]); + ss.wq = rta_getattr_u32(stat[TIPC_NLA_SOCK_STAT_SENDQ]); + ss.sk = rta_getattr_u64(attrs[TIPC_NLA_SOCK_COOKIE]); + + sock_state_print (&ss); + + tipc_sock_addr_print(attrs[TIPC_NLA_SOCK_ADDR], + attrs[TIPC_NLA_SOCK_REF]); + + msg_ref = attrs[TIPC_NLA_SOCK_CON]; + if (msg_ref) { + parse_rtattr(con, TIPC_NLA_CON_MAX, + RTA_DATA(msg_ref), RTA_PAYLOAD(msg_ref)); + + tipc_sock_addr_print(con[TIPC_NLA_CON_NODE], + con[TIPC_NLA_CON_SOCK]); + } else + sock_addr_print("", "-", "", NULL); + + if (show_details) + sock_details_print(&ss); + + proc_ctx_print(&ss); + + if (show_tipcinfo) { + if (oneline) + out(" type:%s", stype_nameg[ss.type]); + else + out("\n type:%s", stype_nameg[ss.type]); + out(" cong:%s ", + stat[TIPC_NLA_SOCK_STAT_LINK_CONG] ? "link" : + stat[TIPC_NLA_SOCK_STAT_CONN_CONG] ? "conn" : "none"); + out(" drop:%d ", + rta_getattr_u32(stat[TIPC_NLA_SOCK_STAT_DROP])); + + if (attrs[TIPC_NLA_SOCK_HAS_PUBL]) + out(" publ"); + + if (con[TIPC_NLA_CON_FLAG]) + out(" via {%u,%u} ", + rta_getattr_u32(con[TIPC_NLA_CON_TYPE]), + rta_getattr_u32(con[TIPC_NLA_CON_INST])); + } + + return 0; +} + +static int tipc_show(struct filter *f) +{ + DIAG_REQUEST(req, struct tipc_sock_diag_req r); + + memset(&req.r, 0, sizeof(req.r)); + req.r.sdiag_family = AF_TIPC; + req.r.tidiag_states = f->states; + + return handle_netlink_request(f, &req.nlh, sizeof(req), tipc_show_sock); +} + +struct sock_diag_msg { + __u8 sdiag_family; +}; + +static int generic_show_sock(struct nlmsghdr *nlh, void *arg) +{ + struct sock_diag_msg *r = NLMSG_DATA(nlh); + struct inet_diag_arg inet_arg = { .f = arg, .protocol = IPPROTO_MAX }; + int ret; + + switch (r->sdiag_family) { + case AF_INET: + case AF_INET6: + inet_arg.rth = inet_arg.f->rth_for_killing; + ret = show_one_inet_sock(nlh, &inet_arg); + break; + case AF_UNIX: + ret = unix_show_sock(nlh, arg); + break; + case AF_PACKET: + ret = packet_show_sock(nlh, arg); + break; + case AF_NETLINK: + ret = netlink_show_sock(nlh, arg); + break; + case AF_VSOCK: + ret = vsock_show_sock(nlh, arg); + break; + case AF_XDP: + ret = xdp_show_sock(nlh, arg); + break; + default: + ret = -1; + } + + render(); + + return ret; +} + +static int handle_follow_request(struct filter *f) +{ + int ret = 0; + int groups = 0; + struct rtnl_handle rth, rth2; + + if (f->families & FAMILY_MASK(AF_INET) && f->dbs & (1 << TCP_DB)) + groups |= 1 << (SKNLGRP_INET_TCP_DESTROY - 1); + if (f->families & FAMILY_MASK(AF_INET) && f->dbs & (1 << UDP_DB)) + groups |= 1 << (SKNLGRP_INET_UDP_DESTROY - 1); + if (f->families & FAMILY_MASK(AF_INET6) && f->dbs & (1 << TCP_DB)) + groups |= 1 << (SKNLGRP_INET6_TCP_DESTROY - 1); + if (f->families & FAMILY_MASK(AF_INET6) && f->dbs & (1 << UDP_DB)) + groups |= 1 << (SKNLGRP_INET6_UDP_DESTROY - 1); + + if (groups == 0) + return -1; + + if (rtnl_open_byproto(&rth, groups, NETLINK_SOCK_DIAG)) + return -1; + + rth.dump = 0; + rth.local.nl_pid = 0; + + if (f->kill) { + if (rtnl_open_byproto(&rth2, groups, NETLINK_SOCK_DIAG)) { + rtnl_close(&rth); + return -1; + } + f->rth_for_killing = &rth2; + } + + if (rtnl_dump_filter(&rth, generic_show_sock, f)) + ret = -1; + + rtnl_close(&rth); + if (f->rth_for_killing) + rtnl_close(f->rth_for_killing); + return ret; +} + +static int get_snmp_int(char *proto, char *key, int *result) +{ + char buf[1024]; + FILE *fp; + int protolen = strlen(proto); + int keylen = strlen(key); + + *result = 0; + + if ((fp = net_snmp_open()) == NULL) + return -1; + + while (fgets(buf, sizeof(buf), fp) != NULL) { + char *p = buf; + int pos = 0; + + if (memcmp(buf, proto, protolen)) + continue; + while ((p = strchr(p, ' ')) != NULL) { + pos++; + p++; + if (memcmp(p, key, keylen) == 0 && + (p[keylen] == ' ' || p[keylen] == '\n')) + break; + } + if (fgets(buf, sizeof(buf), fp) == NULL) + break; + if (memcmp(buf, proto, protolen)) + break; + p = buf; + while ((p = strchr(p, ' ')) != NULL) { + p++; + if (--pos == 0) { + sscanf(p, "%d", result); + fclose(fp); + return 0; + } + } + } + + fclose(fp); + errno = ESRCH; + return -1; +} + + +/* Get stats from sockstat */ + +struct ssummary { + int socks; + int tcp_mem; + int tcp_total; + int tcp_orphans; + int tcp_tws; + int tcp4_hashed; + int udp4; + int raw4; + int frag4; + int frag4_mem; + int tcp6_hashed; + int udp6; + int raw6; + int frag6; + int frag6_mem; +}; + +static void get_sockstat_line(char *line, struct ssummary *s) +{ + char id[256], rem[256]; + + if (sscanf(line, "%[^ ] %[^\n]\n", id, rem) != 2) + return; + + if (strcmp(id, "sockets:") == 0) + sscanf(rem, "%*s%d", &s->socks); + else if (strcmp(id, "UDP:") == 0) + sscanf(rem, "%*s%d", &s->udp4); + else if (strcmp(id, "UDP6:") == 0) + sscanf(rem, "%*s%d", &s->udp6); + else if (strcmp(id, "RAW:") == 0) + sscanf(rem, "%*s%d", &s->raw4); + else if (strcmp(id, "RAW6:") == 0) + sscanf(rem, "%*s%d", &s->raw6); + else if (strcmp(id, "TCP6:") == 0) + sscanf(rem, "%*s%d", &s->tcp6_hashed); + else if (strcmp(id, "FRAG:") == 0) + sscanf(rem, "%*s%d%*s%d", &s->frag4, &s->frag4_mem); + else if (strcmp(id, "FRAG6:") == 0) + sscanf(rem, "%*s%d%*s%d", &s->frag6, &s->frag6_mem); + else if (strcmp(id, "TCP:") == 0) + sscanf(rem, "%*s%d%*s%d%*s%d%*s%d%*s%d", + &s->tcp4_hashed, + &s->tcp_orphans, &s->tcp_tws, &s->tcp_total, &s->tcp_mem); +} + +static int get_sockstat(struct ssummary *s) +{ + char buf[256]; + FILE *fp; + + memset(s, 0, sizeof(*s)); + + if ((fp = net_sockstat_open()) == NULL) + return -1; + while (fgets(buf, sizeof(buf), fp) != NULL) + get_sockstat_line(buf, s); + fclose(fp); + + if ((fp = net_sockstat6_open()) == NULL) + return 0; + while (fgets(buf, sizeof(buf), fp) != NULL) + get_sockstat_line(buf, s); + fclose(fp); + + return 0; +} + +static int print_summary(void) +{ + struct ssummary s; + int tcp_estab; + + if (get_sockstat(&s) < 0) + perror("ss: get_sockstat"); + if (get_snmp_int("Tcp:", "CurrEstab", &tcp_estab) < 0) + perror("ss: get_snmpstat"); + + printf("Total: %d\n", s.socks); + + printf("TCP: %d (estab %d, closed %d, orphaned %d, timewait %d)\n", + s.tcp_total + s.tcp_tws, tcp_estab, + s.tcp_total - (s.tcp4_hashed + s.tcp6_hashed - s.tcp_tws), + s.tcp_orphans, s.tcp_tws); + + printf("\n"); + printf("Transport Total IP IPv6\n"); + printf("RAW %-9d %-9d %-9d\n", s.raw4+s.raw6, s.raw4, s.raw6); + printf("UDP %-9d %-9d %-9d\n", s.udp4+s.udp6, s.udp4, s.udp6); + printf("TCP %-9d %-9d %-9d\n", s.tcp4_hashed+s.tcp6_hashed, s.tcp4_hashed, s.tcp6_hashed); + printf("INET %-9d %-9d %-9d\n", + s.raw4+s.udp4+s.tcp4_hashed+ + s.raw6+s.udp6+s.tcp6_hashed, + s.raw4+s.udp4+s.tcp4_hashed, + s.raw6+s.udp6+s.tcp6_hashed); + printf("FRAG %-9d %-9d %-9d\n", s.frag4+s.frag6, s.frag4, s.frag6); + + printf("\n"); + + return 0; +} + +static void _usage(FILE *dest) +{ + fprintf(dest, +"Usage: ss [ OPTIONS ]\n" +" ss [ OPTIONS ] [ FILTER ]\n" +" -h, --help this message\n" +" -V, --version output version information\n" +" -n, --numeric don't resolve service names\n" +" -r, --resolve resolve host names\n" +" -a, --all display all sockets\n" +" -l, --listening display listening sockets\n" +" -B, --bound-inactive display TCP bound but inactive sockets\n" +" -o, --options show timer information\n" +" -e, --extended show detailed socket information\n" +" -m, --memory show socket memory usage\n" +" -p, --processes show process using socket\n" +" -T, --threads show thread using socket\n" +" -i, --info show internal TCP information\n" +" --tipcinfo show internal tipc socket information\n" +" -s, --summary show socket usage summary\n" +" --tos show tos and priority information\n" +" --cgroup show cgroup information\n" +" -b, --bpf show bpf filter socket information\n" +" -E, --events continually display sockets as they are destroyed\n" +" -Z, --context display task SELinux security contexts\n" +" -z, --contexts display task and socket SELinux security contexts\n" +" -N, --net switch to the specified network namespace name\n" +"\n" +" -4, --ipv4 display only IP version 4 sockets\n" +" -6, --ipv6 display only IP version 6 sockets\n" +" -0, --packet display PACKET sockets\n" +" -t, --tcp display only TCP sockets\n" +" -M, --mptcp display only MPTCP sockets\n" +" -S, --sctp display only SCTP sockets\n" +" -u, --udp display only UDP sockets\n" +" -d, --dccp display only DCCP sockets\n" +" -w, --raw display only RAW sockets\n" +" -x, --unix display only Unix domain sockets\n" +" --tipc display only TIPC sockets\n" +" --vsock display only vsock sockets\n" +" --xdp display only XDP sockets\n" +" -f, --family=FAMILY display sockets of type FAMILY\n" +" FAMILY := {inet|inet6|link|unix|netlink|vsock|tipc|xdp|help}\n" +"\n" +" -K, --kill forcibly close sockets, display what was closed\n" +" -H, --no-header Suppress header line\n" +" -O, --oneline socket's data printed on a single line\n" +" --inet-sockopt show various inet socket options\n" +"\n" +" -A, --query=QUERY, --socket=QUERY\n" +" QUERY := {all|inet|tcp|mptcp|udp|raw|unix|unix_dgram|unix_stream|unix_seqpacket|packet|packet_raw|packet_dgram|netlink|dccp|sctp|vsock_stream|vsock_dgram|tipc|xdp}[,QUERY]\n" +"\n" +" -D, --diag=FILE Dump raw information about TCP sockets to FILE\n" +" -F, --filter=FILE read filter information from FILE\n" +" FILTER := [ state STATE-FILTER ] [ EXPRESSION ]\n" +" STATE-FILTER := {all|connected|synchronized|bucket|big|TCP-STATES}\n" +" TCP-STATES := {established|syn-sent|syn-recv|fin-wait-{1,2}|time-wait|closed|close-wait|last-ack|listening|closing}\n" +" connected := {established|syn-sent|syn-recv|fin-wait-{1,2}|time-wait|close-wait|last-ack|closing}\n" +" synchronized := {established|syn-recv|fin-wait-{1,2}|time-wait|close-wait|last-ack|closing}\n" +" bucket := {syn-recv|time-wait}\n" +" big := {established|syn-sent|fin-wait-{1,2}|closed|close-wait|last-ack|listening|closing}\n" + ); +} + +static void help(void) __attribute__((noreturn)); +static void help(void) +{ + _usage(stdout); + exit(0); +} + +static void usage(void) __attribute__((noreturn)); +static void usage(void) +{ + _usage(stderr); + exit(-1); +} + + +static int scan_state(const char *state) +{ + static const char * const sstate_namel[] = { + "UNKNOWN", + [SS_ESTABLISHED] = "established", + [SS_SYN_SENT] = "syn-sent", + [SS_SYN_RECV] = "syn-recv", + [SS_FIN_WAIT1] = "fin-wait-1", + [SS_FIN_WAIT2] = "fin-wait-2", + [SS_TIME_WAIT] = "time-wait", + [SS_CLOSE] = "unconnected", + [SS_CLOSE_WAIT] = "close-wait", + [SS_LAST_ACK] = "last-ack", + [SS_LISTEN] = "listening", + [SS_CLOSING] = "closing", + [SS_NEW_SYN_RECV] = "new-syn-recv", + [SS_BOUND_INACTIVE] = "bound-inactive", + }; + int i; + + /* NEW_SYN_RECV is a kernel implementation detail. It shouldn't be used + * or even be visible by users. + */ + if (strcasecmp(state, "new-syn-recv") == 0) + goto wrong_state; + + if (strcasecmp(state, "close") == 0 || + strcasecmp(state, "closed") == 0) + return (1<<SS_CLOSE); + if (strcasecmp(state, "syn-rcv") == 0) + return (1<<SS_SYN_RECV); + if (strcasecmp(state, "established") == 0) + return (1<<SS_ESTABLISHED); + if (strcasecmp(state, "all") == 0) + return SS_ALL; + if (strcasecmp(state, "connected") == 0) + return SS_ALL & ~((1<<SS_CLOSE)|(1<<SS_LISTEN)); + if (strcasecmp(state, "synchronized") == 0) + return SS_ALL & ~((1<<SS_CLOSE)|(1<<SS_LISTEN)|(1<<SS_SYN_SENT)); + if (strcasecmp(state, "bucket") == 0) + return (1<<SS_SYN_RECV)|(1<<SS_TIME_WAIT); + if (strcasecmp(state, "big") == 0) + return SS_ALL & ~((1<<SS_SYN_RECV)|(1<<SS_TIME_WAIT)); + for (i = 0; i < SS_MAX; i++) { + if (strcasecmp(state, sstate_namel[i]) == 0) + return (1<<i); + } + +wrong_state: + fprintf(stderr, "ss: wrong state name: %s\n", state); + exit(-1); +} + +/* Values 'v' and 'V' are already used so a non-character is used */ +#define OPT_VSOCK 256 + +/* Values of 't' are already used so a non-character is used */ +#define OPT_TIPCSOCK 257 +#define OPT_TIPCINFO 258 + +#define OPT_TOS 259 + +/* Values of 'x' are already used so a non-character is used */ +#define OPT_XDPSOCK 260 + +#define OPT_CGROUP 261 + +#define OPT_INET_SOCKOPT 262 + +static const struct option long_opts[] = { + { "numeric", 0, 0, 'n' }, + { "resolve", 0, 0, 'r' }, + { "options", 0, 0, 'o' }, + { "extended", 0, 0, 'e' }, + { "memory", 0, 0, 'm' }, + { "info", 0, 0, 'i' }, + { "processes", 0, 0, 'p' }, + { "threads", 0, 0, 'T' }, + { "bpf", 0, 0, 'b' }, + { "events", 0, 0, 'E' }, + { "dccp", 0, 0, 'd' }, + { "tcp", 0, 0, 't' }, + { "sctp", 0, 0, 'S' }, + { "udp", 0, 0, 'u' }, + { "raw", 0, 0, 'w' }, + { "unix", 0, 0, 'x' }, + { "tipc", 0, 0, OPT_TIPCSOCK}, + { "vsock", 0, 0, OPT_VSOCK }, + { "all", 0, 0, 'a' }, + { "listening", 0, 0, 'l' }, + { "bound-inactive", 0, 0, 'B' }, + { "ipv4", 0, 0, '4' }, + { "ipv6", 0, 0, '6' }, + { "packet", 0, 0, '0' }, + { "family", 1, 0, 'f' }, + { "socket", 1, 0, 'A' }, + { "query", 1, 0, 'A' }, + { "summary", 0, 0, 's' }, + { "diag", 1, 0, 'D' }, + { "filter", 1, 0, 'F' }, + { "version", 0, 0, 'V' }, + { "help", 0, 0, 'h' }, + { "context", 0, 0, 'Z' }, + { "contexts", 0, 0, 'z' }, + { "net", 1, 0, 'N' }, + { "tipcinfo", 0, 0, OPT_TIPCINFO}, + { "tos", 0, 0, OPT_TOS }, + { "cgroup", 0, 0, OPT_CGROUP }, + { "kill", 0, 0, 'K' }, + { "no-header", 0, 0, 'H' }, + { "xdp", 0, 0, OPT_XDPSOCK}, + { "mptcp", 0, 0, 'M' }, + { "oneline", 0, 0, 'O' }, + { "inet-sockopt", 0, 0, OPT_INET_SOCKOPT }, + { 0 } + +}; + +int main(int argc, char *argv[]) +{ + int saw_states = 0; + int saw_query = 0; + int do_summary = 0; + const char *dump_tcpdiag = NULL; + FILE *filter_fp = NULL; + int ch; + int state_filter = 0; + + while ((ch = getopt_long(argc, argv, + "dhalBetuwxnro460spTbEf:mMiA:D:F:vVzZN:KHSO", + long_opts, NULL)) != EOF) { + switch (ch) { + case 'n': + numeric = 1; + break; + case 'r': + resolve_hosts = 1; + break; + case 'o': + show_options = 1; + break; + case 'e': + show_options = 1; + show_details++; + break; + case 'm': + show_mem = 1; + break; + case 'i': + show_tcpinfo = 1; + break; + case 'p': + show_processes++; + break; + case 'T': + show_threads++; + break; + case 'b': + show_options = 1; + show_bpf++; + break; + case 'E': + follow_events = 1; + break; + case 'd': + filter_db_set(¤t_filter, DCCP_DB, true); + break; + case 't': + filter_db_set(¤t_filter, TCP_DB, true); + break; + case 'S': + filter_db_set(¤t_filter, SCTP_DB, true); + break; + case 'u': + filter_db_set(¤t_filter, UDP_DB, true); + break; + case 'w': + filter_db_set(¤t_filter, RAW_DB, true); + break; + case 'x': + filter_af_set(¤t_filter, AF_UNIX); + break; + case OPT_VSOCK: + filter_af_set(¤t_filter, AF_VSOCK); + break; + case OPT_TIPCSOCK: + filter_af_set(¤t_filter, AF_TIPC); + break; + case 'a': + state_filter = SS_ALL; + break; + case 'l': + state_filter = (1 << SS_LISTEN) | (1 << SS_CLOSE); + break; + case 'B': + state_filter = 1 << SS_BOUND_INACTIVE; + break; + case '4': + filter_af_set(¤t_filter, AF_INET); + break; + case '6': + filter_af_set(¤t_filter, AF_INET6); + break; + case '0': + filter_af_set(¤t_filter, AF_PACKET); + break; + case OPT_XDPSOCK: + filter_af_set(¤t_filter, AF_XDP); + break; + case 'M': + filter_db_set(¤t_filter, MPTCP_DB, true); + break; + case 'f': + if (strcmp(optarg, "inet") == 0) + filter_af_set(¤t_filter, AF_INET); + else if (strcmp(optarg, "inet6") == 0) + filter_af_set(¤t_filter, AF_INET6); + else if (strcmp(optarg, "link") == 0) + filter_af_set(¤t_filter, AF_PACKET); + else if (strcmp(optarg, "unix") == 0) + filter_af_set(¤t_filter, AF_UNIX); + else if (strcmp(optarg, "netlink") == 0) + filter_af_set(¤t_filter, AF_NETLINK); + else if (strcmp(optarg, "tipc") == 0) + filter_af_set(¤t_filter, AF_TIPC); + else if (strcmp(optarg, "vsock") == 0) + filter_af_set(¤t_filter, AF_VSOCK); + else if (strcmp(optarg, "xdp") == 0) + filter_af_set(¤t_filter, AF_XDP); + else if (strcmp(optarg, "help") == 0) + help(); + else { + fprintf(stderr, "ss: \"%s\" is invalid family\n", + optarg); + usage(); + } + break; + case 'A': + { + char *p, *p1; + + if (!saw_query) { + current_filter.dbs = 0; + state_filter = state_filter ? + state_filter : SS_CONN; + saw_query = 1; + do_default = 0; + } + p = p1 = optarg; + do { + if ((p1 = strchr(p, ',')) != NULL) + *p1 = 0; + if (filter_db_parse(¤t_filter, p)) { + fprintf(stderr, "ss: \"%s\" is illegal socket table id\n", p); + usage(); + } + p = p1 + 1; + } while (p1); + break; + } + case 's': + do_summary = 1; + break; + case 'D': + dump_tcpdiag = optarg; + break; + case 'F': + if (filter_fp) { + fprintf(stderr, "More than one filter file\n"); + exit(-1); + } + if (optarg[0] == '-') + filter_fp = stdin; + else + filter_fp = fopen(optarg, "r"); + if (!filter_fp) { + perror("fopen filter file"); + exit(-1); + } + break; + case 'v': + case 'V': + printf("ss utility, iproute2-%s\n", version); + exit(0); + case 'z': + show_sock_ctx++; + /* fall through */ + case 'Z': + if (!is_selinux_enabled()) { + fprintf(stderr, "ss: SELinux is not enabled.\n"); + exit(1); + } + show_proc_ctx++; + break; + case 'N': + if (netns_switch(optarg)) + exit(1); + break; + case OPT_TIPCINFO: + show_tipcinfo = 1; + break; + case OPT_TOS: + show_tos = 1; + break; + case OPT_CGROUP: + show_cgroup = 1; + break; + case 'K': + current_filter.kill = 1; + break; + case 'H': + show_header = 0; + break; + case 'O': + oneline = 1; + break; + case OPT_INET_SOCKOPT: + show_inet_sockopt = 1; + break; + case 'h': + help(); + case '?': + default: + usage(); + } + } + + if (show_processes || show_threads || show_proc_ctx || show_sock_ctx) + user_ent_hash_build(); + + argc -= optind; + argv += optind; + + if (do_summary) { + print_summary(); + if (do_default && argc == 0) + exit(0); + } + + while (argc > 0) { + if (strcmp(*argv, "state") == 0) { + NEXT_ARG(); + if (!saw_states) + state_filter = 0; + state_filter |= scan_state(*argv); + saw_states = 1; + } else if (strcmp(*argv, "exclude") == 0 || + strcmp(*argv, "excl") == 0) { + NEXT_ARG(); + if (!saw_states) + state_filter = SS_ALL; + state_filter &= ~scan_state(*argv); + saw_states = 1; + } else { + break; + } + argc--; argv++; + } + + if (do_default) { + state_filter = state_filter ? state_filter : SS_CONN; + filter_db_parse(¤t_filter, "all"); + } + + filter_states_set(¤t_filter, state_filter); + filter_merge_defaults(¤t_filter); + +#ifdef HAVE_RPC + if (!numeric && resolve_hosts && + (current_filter.dbs & (UNIX_DBM|INET_L4_DBM))) + init_service_resolver(); +#endif + + if (current_filter.dbs == 0) { + fprintf(stderr, "ss: no socket tables to show with such filter.\n"); + exit(0); + } + if (current_filter.families == 0) { + fprintf(stderr, "ss: no families to show with such filter.\n"); + exit(0); + } + if (current_filter.states == 0) { + fprintf(stderr, "ss: no socket states to show with such filter.\n"); + exit(0); + } + + if (dump_tcpdiag) { + FILE *dump_fp = stdout; + + if (!(current_filter.dbs & (1<<TCP_DB))) { + fprintf(stderr, "ss: tcpdiag dump requested and no tcp in filter.\n"); + exit(0); + } + if (dump_tcpdiag[0] != '-') { + dump_fp = fopen(dump_tcpdiag, "w"); + if (!dump_tcpdiag) { + perror("fopen dump file"); + exit(-1); + } + } + inet_show_netlink(¤t_filter, dump_fp, IPPROTO_TCP); + fflush(dump_fp); + exit(0); + } + + if (ssfilter_parse(¤t_filter.f, argc, argv, filter_fp)) + usage(); + + if (!show_processes) + columns[COL_PROC].disabled = 1; + + if (!(current_filter.dbs & (current_filter.dbs - 1))) + columns[COL_NETID].disabled = 1; + + if (!(current_filter.states & (current_filter.states - 1))) + columns[COL_STATE].disabled = 1; + + if (show_header) + print_header(); + + fflush(stdout); + + if (follow_events) + exit(handle_follow_request(¤t_filter)); + + if (current_filter.dbs & (1<<NETLINK_DB)) + netlink_show(¤t_filter); + if (current_filter.dbs & PACKET_DBM) + packet_show(¤t_filter); + if (current_filter.dbs & UNIX_DBM) + unix_show(¤t_filter); + if (current_filter.dbs & (1<<RAW_DB)) + raw_show(¤t_filter); + if (current_filter.dbs & (1<<UDP_DB)) + udp_show(¤t_filter); + if (current_filter.dbs & (1<<TCP_DB)) + tcp_show(¤t_filter); + if (current_filter.dbs & (1<<DCCP_DB)) + dccp_show(¤t_filter); + if (current_filter.dbs & (1<<SCTP_DB)) + sctp_show(¤t_filter); + if (current_filter.dbs & VSOCK_DBM) + vsock_show(¤t_filter); + if (current_filter.dbs & (1<<TIPC_DB)) + tipc_show(¤t_filter); + if (current_filter.dbs & (1<<XDP_DB)) + xdp_show(¤t_filter); + if (current_filter.dbs & (1<<MPTCP_DB)) + mptcp_show(¤t_filter); + + if (show_processes || show_threads || show_proc_ctx || show_sock_ctx) + user_ent_destroy(); + + render(); + + return 0; +} diff --git a/misc/ss_util.h b/misc/ss_util.h new file mode 100644 index 0000000..37936c6 --- /dev/null +++ b/misc/ss_util.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef __SS_UTIL_H__ +#define __SS_UTIL_H__ + +#include <linux/sock_diag.h> +#include <linux/inet_diag.h> + +#define MAGIC_SEQ 123456 + +#define DIAG_REQUEST(_req, _r) \ + struct { \ + struct nlmsghdr nlh; \ + _r; \ + } _req = { \ + .nlh = { \ + .nlmsg_type = SOCK_DIAG_BY_FAMILY, \ + .nlmsg_flags = NLM_F_ROOT|NLM_F_MATCH|NLM_F_REQUEST,\ + .nlmsg_seq = MAGIC_SEQ, \ + .nlmsg_len = sizeof(_req), \ + }, \ + } + +#endif /* __SS_UTIL_H__ */ diff --git a/misc/ssfilter.h b/misc/ssfilter.h new file mode 100644 index 0000000..73e55e0 --- /dev/null +++ b/misc/ssfilter.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#include <stdbool.h> + +enum { + SSF_DCOND, + SSF_SCOND, + SSF_OR, + SSF_AND, + SSF_NOT, + SSF_D_GE, + SSF_D_LE, + SSF_S_GE, + SSF_S_LE, + SSF_S_AUTO, + SSF_DEVCOND, + SSF_MARKMASK, + SSF_CGROUPCOND, + SSF__MAX +}; + +bool ssfilter_is_supported(int type); + +struct ssfilter +{ + int type; + struct ssfilter *post; + struct ssfilter *pred; +}; + +int ssfilter_parse(struct ssfilter **f, int argc, char **argv, FILE *fp); +void *parse_hostcond(char *addr, bool is_port); +void *parse_devcond(char *name); +void *parse_markmask(const char *markmask); +void *parse_cgroupcond(const char *path); diff --git a/misc/ssfilter.y b/misc/ssfilter.y new file mode 100644 index 0000000..3195723 --- /dev/null +++ b/misc/ssfilter.y @@ -0,0 +1,369 @@ +%{ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#include <stdio.h> +#include <stdlib.h> +#include <malloc.h> +#include <string.h> +#include "ssfilter.h" + +typedef struct ssfilter * ssfilter_t; + +#define YYSTYPE ssfilter_t + +static struct ssfilter * alloc_node(int type, void *pred) +{ + struct ssfilter *n; + + if (!ssfilter_is_supported(type)) { + fprintf(stderr, "It looks like such filter is not supported! Too old kernel?\n"); + exit(-1); + } + + n = malloc(sizeof(*n)); + if (n == NULL) + abort(); + n->type = type; + n->pred = pred; + n->post = NULL; + return n; +} + +static char **yy_argv; +static int yy_argc; +static FILE *yy_fp; +static ssfilter_t *yy_ret; +static int tok_type = -1; + +static int yylex(void); + +static void yyerror(char *s) +{ + fprintf(stderr, "ss: bison bellows (while parsing filter): \"%s!\"", s); +} + +%} + +%token HOSTCOND DCOND SCOND DPORT SPORT LEQ GEQ NEQ AUTOBOUND DEVCOND DEVNAME MARKMASK FWMARK CGROUPCOND CGROUPPATH +%left '|' +%left '&' +%nonassoc '!' + +%% +applet: exprlist + { + *yy_ret = $1; + $$ = $1; + } + | null + ; + +null: /* NOTHING */ { $$ = NULL; } + ; + +exprlist: expr + | exprlist '|' expr + { + $$ = alloc_node(SSF_OR, $1); + $$->post = $3; + } + | exprlist '&' expr + { + $$ = alloc_node(SSF_AND, $1); + $$->post = $3; + } + | exprlist expr + { + $$ = alloc_node(SSF_AND, $1); + $$->post = $2; + } + ; + +eq: '=' + | /* nothing */ + ; + +expr: '(' exprlist ')' + { + $$ = $2; + } + | '!' expr + { + $$ = alloc_node(SSF_NOT, $2); + } + | DCOND eq HOSTCOND + { + $$ = alloc_node(SSF_DCOND, $3); + } + | SCOND eq HOSTCOND + { + $$ = alloc_node(SSF_SCOND, $3); + } + | DPORT GEQ HOSTCOND + { + $$ = alloc_node(SSF_D_GE, $3); + } + | DPORT LEQ HOSTCOND + { + $$ = alloc_node(SSF_D_LE, $3); + } + | DPORT '>' HOSTCOND + { + $$ = alloc_node(SSF_NOT, alloc_node(SSF_D_LE, $3)); + } + | DPORT '<' HOSTCOND + { + $$ = alloc_node(SSF_NOT, alloc_node(SSF_D_GE, $3)); + } + | DPORT eq HOSTCOND + { + $$ = alloc_node(SSF_DCOND, $3); + } + | DPORT NEQ HOSTCOND + { + $$ = alloc_node(SSF_NOT, alloc_node(SSF_DCOND, $3)); + } + + | SPORT GEQ HOSTCOND + { + $$ = alloc_node(SSF_S_GE, $3); + } + | SPORT LEQ HOSTCOND + { + $$ = alloc_node(SSF_S_LE, $3); + } + | SPORT '>' HOSTCOND + { + $$ = alloc_node(SSF_NOT, alloc_node(SSF_S_LE, $3)); + } + | SPORT '<' HOSTCOND + { + $$ = alloc_node(SSF_NOT, alloc_node(SSF_S_GE, $3)); + } + | SPORT eq HOSTCOND + { + $$ = alloc_node(SSF_SCOND, $3); + } + | SPORT NEQ HOSTCOND + { + $$ = alloc_node(SSF_NOT, alloc_node(SSF_SCOND, $3)); + } + | DEVNAME eq DEVCOND + { + $$ = alloc_node(SSF_DEVCOND, $3); + } + | DEVNAME NEQ DEVCOND + { + $$ = alloc_node(SSF_NOT, alloc_node(SSF_DEVCOND, $3)); + } + | FWMARK eq MARKMASK + { + $$ = alloc_node(SSF_MARKMASK, $3); + } + | FWMARK NEQ MARKMASK + { + $$ = alloc_node(SSF_NOT, alloc_node(SSF_MARKMASK, $3)); + } + | CGROUPPATH eq CGROUPCOND + { + $$ = alloc_node(SSF_CGROUPCOND, $3); + } + | CGROUPPATH NEQ CGROUPCOND + { + $$ = alloc_node(SSF_NOT, alloc_node(SSF_CGROUPCOND, $3)); + } + | AUTOBOUND + { + $$ = alloc_node(SSF_S_AUTO, NULL); + } +; +%% + +static char *get_token_from_line(char **ptr) +{ + char *tok, *cp = *ptr; + + while (*cp == ' ' || *cp == '\t') cp++; + + if (*cp == 0) { + *ptr = cp; + return NULL; + } + + tok = cp; + + while (*cp != 0 && *cp != ' ' && *cp != '\t') { + /* Backslash escapes everything. */ + if (*cp == '\\') { + char *tp; + for (tp = cp; tp != tok; tp--) + *tp = *(tp-1); + cp++; + tok++; + if (*cp == 0) + break; + } + cp++; + } + if (*cp) + *cp++ = 0; + *ptr = cp; + return tok; +} + +int yylex(void) +{ + static char argbuf[1024]; + static char *tokptr = argbuf; + static int argc; + char *curtok; + + do { + while (*tokptr == 0) { + tokptr = NULL; + if (argc < yy_argc) { + tokptr = yy_argv[argc]; + argc++; + } else if (yy_fp) { + while (tokptr == NULL) { + size_t len; + + if (fgets(argbuf, sizeof(argbuf), yy_fp) == NULL) + return 0; + + len = strnlen(argbuf, sizeof(argbuf)); + if (len == 0) { + fprintf(stderr, "Invalid line\n"); + exit(-1); + } + + if (len >= sizeof(argbuf) - 1) { + fprintf(stderr, "Too long line in filter\n"); + exit(-1); + } + if (argbuf[len - 1] == '\n') + argbuf[len-1] = 0; + if (argbuf[0] == '#' || argbuf[0] == '0') + continue; + tokptr = argbuf; + } + } else { + return 0; + } + } + } while ((curtok = get_token_from_line(&tokptr)) == NULL); + + if (strcmp(curtok, "!") == 0 || + strcmp(curtok, "not") == 0) + return '!'; + if (strcmp(curtok, "&") == 0 || + strcmp(curtok, "&&") == 0 || + strcmp(curtok, "and") == 0) + return '&'; + if (strcmp(curtok, "|") == 0 || + strcmp(curtok, "||") == 0 || + strcmp(curtok, "or") == 0) + return '|'; + if (strcmp(curtok, "(") == 0) + return '('; + if (strcmp(curtok, ")") == 0) + return ')'; + if (strcmp(curtok, "dst") == 0) { + tok_type = DCOND; + return DCOND; + } + if (strcmp(curtok, "src") == 0) { + tok_type = SCOND; + return SCOND; + } + if (strcmp(curtok, "dport") == 0) { + tok_type = DPORT; + return DPORT; + } + if (strcmp(curtok, "sport") == 0) { + tok_type = SPORT; + return SPORT; + } + if (strcmp(curtok, "dev") == 0) { + tok_type = DEVNAME; + return DEVNAME; + } + if (strcmp(curtok, "fwmark") == 0) { + tok_type = FWMARK; + return FWMARK; + } + if (strcmp(curtok, "cgroup") == 0) { + tok_type = CGROUPPATH; + return CGROUPPATH; + } + if (strcmp(curtok, ">=") == 0 || + strcmp(curtok, "ge") == 0 || + strcmp(curtok, "geq") == 0) + return GEQ; + if (strcmp(curtok, "<=") == 0 || + strcmp(curtok, "le") == 0 || + strcmp(curtok, "leq") == 0) + return LEQ; + if (strcmp(curtok, "!=") == 0 || + strcmp(curtok, "ne") == 0 || + strcmp(curtok, "neq") == 0) + return NEQ; + if (strcmp(curtok, "=") == 0 || + strcmp(curtok, "==") == 0 || + strcmp(curtok, "eq") == 0) + return '='; + if (strcmp(curtok, ">") == 0 || + strcmp(curtok, "gt") == 0) + return '>'; + if (strcmp(curtok, "<") == 0 || + strcmp(curtok, "lt") == 0) + return '<'; + if (strcmp(curtok, "autobound") == 0) { + tok_type = AUTOBOUND; + return AUTOBOUND; + } + if (tok_type == DEVNAME) { + yylval = (void*)parse_devcond(curtok); + if (yylval == NULL) { + fprintf(stderr, "Cannot parse device.\n"); + exit(1); + } + return DEVCOND; + } + if (tok_type == FWMARK) { + yylval = (void*)parse_markmask(curtok); + if (yylval == NULL) { + fprintf(stderr, "Cannot parse mark %s.\n", curtok); + exit(1); + } + return MARKMASK; + } + if (tok_type == CGROUPPATH) { + yylval = (void*)parse_cgroupcond(curtok); + if (yylval == NULL) { + fprintf(stderr, "Cannot parse cgroup %s.\n", curtok); + exit(1); + } + return CGROUPCOND; + } + yylval = (void*)parse_hostcond(curtok, tok_type == SPORT || tok_type == DPORT); + if (yylval == NULL) { + fprintf(stderr, "Cannot parse dst/src address.\n"); + exit(1); + } + return HOSTCOND; +} + +int ssfilter_parse(struct ssfilter **f, int argc, char **argv, FILE *fp) +{ + yy_argc = argc; + yy_argv = argv; + yy_fp = fp; + yy_ret = f; + + if (yyparse()) { + fprintf(stderr, " Sorry.\n"); + return -1; + } + return 0; +} diff --git a/misc/ssfilter_check.c b/misc/ssfilter_check.c new file mode 100644 index 0000000..02709ce --- /dev/null +++ b/misc/ssfilter_check.c @@ -0,0 +1,104 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#include <stdio.h> +#include <stdlib.h> +#include <errno.h> + +#include "libnetlink.h" +#include "ssfilter.h" +#include "ss_util.h" + +static int dummy_filter(struct nlmsghdr *n, void *arg) +{ + /* just stops rtnl_dump_filter() */ + return -1; +} + +static bool cgroup_filter_check(void) +{ + struct sockaddr_nl nladdr = { .nl_family = AF_NETLINK }; + DIAG_REQUEST(req, struct inet_diag_req_v2 r); + struct instr { + struct inet_diag_bc_op op; + __u64 cgroup_id; + } __attribute__((packed)); + int inslen = sizeof(struct instr); + struct instr instr = { + { INET_DIAG_BC_CGROUP_COND, inslen, inslen + 4 }, + 0 + }; + struct rtnl_handle rth; + struct iovec iov[3]; + struct msghdr msg; + struct rtattr rta; + int ret = false; + int iovlen = 3; + + if (rtnl_open_byproto(&rth, 0, NETLINK_SOCK_DIAG)) + return false; + rth.dump = MAGIC_SEQ; + rth.flags = RTNL_HANDLE_F_SUPPRESS_NLERR; + + memset(&req.r, 0, sizeof(req.r)); + req.r.sdiag_family = AF_INET; + req.r.sdiag_protocol = IPPROTO_TCP; + req.nlh.nlmsg_len += RTA_LENGTH(inslen); + + rta.rta_type = INET_DIAG_REQ_BYTECODE; + rta.rta_len = RTA_LENGTH(inslen); + + iov[0] = (struct iovec) { &req, sizeof(req) }; + iov[1] = (struct iovec) { &rta, sizeof(rta) }; + iov[2] = (struct iovec) { &instr, inslen }; + + msg = (struct msghdr) { + .msg_name = (void *)&nladdr, + .msg_namelen = sizeof(nladdr), + .msg_iov = iov, + .msg_iovlen = iovlen, + }; + + if (sendmsg(rth.fd, &msg, 0) < 0) + goto out; + + if (rtnl_dump_filter(&rth, dummy_filter, NULL) < 0) { + ret = (errno != EINVAL); + goto out; + } + + ret = true; + +out: + rtnl_close(&rth); + + return ret; +} + + +struct filter_check_t { + bool (*check)(void); + unsigned int checked:1, + supported:1; +}; + +static struct filter_check_t filter_checks[SSF__MAX] = { + [SSF_CGROUPCOND] = { cgroup_filter_check, 0 }, +}; + +bool ssfilter_is_supported(int type) +{ + struct filter_check_t f; + + if (type >= SSF__MAX) + return false; + + f = filter_checks[type]; + if (!f.check) + return true; + + if (!f.checked) { + f.supported = f.check(); + f.checked = 1; + } + + return f.supported; +} |