summaryrefslogtreecommitdiffstats
path: root/src/seastar/dpdk/drivers/net/tap/tap_netlink.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/seastar/dpdk/drivers/net/tap/tap_netlink.c')
-rw-r--r--src/seastar/dpdk/drivers/net/tap/tap_netlink.c343
1 files changed, 343 insertions, 0 deletions
diff --git a/src/seastar/dpdk/drivers/net/tap/tap_netlink.c b/src/seastar/dpdk/drivers/net/tap/tap_netlink.c
new file mode 100644
index 000000000..14bbbec75
--- /dev/null
+++ b/src/seastar/dpdk/drivers/net/tap/tap_netlink.c
@@ -0,0 +1,343 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2017 6WIND S.A.
+ * Copyright 2017 Mellanox Technologies, Ltd
+ */
+
+#include <errno.h>
+#include <inttypes.h>
+#include <linux/netlink.h>
+#include <string.h>
+#include <sys/socket.h>
+#include <unistd.h>
+
+#include <rte_malloc.h>
+#include <tap_netlink.h>
+#include <rte_random.h>
+#include "tap_log.h"
+
+/* Must be quite large to support dumping a huge list of QDISC or filters. */
+#define BUF_SIZE (32 * 1024) /* Size of the buffer to receive kernel messages */
+#define SNDBUF_SIZE 32768 /* Send buffer size for the netlink socket */
+#define RCVBUF_SIZE 32768 /* Receive buffer size for the netlink socket */
+
+struct nested_tail {
+ struct rtattr *tail;
+ struct nested_tail *prev;
+};
+
+/**
+ * Initialize a netlink socket for communicating with the kernel.
+ *
+ * @param nl_groups
+ * Set it to a netlink group value (e.g. RTMGRP_LINK) to receive messages for
+ * specific netlink multicast groups. Otherwise, no subscription will be made.
+ *
+ * @return
+ * netlink socket file descriptor on success, -1 otherwise.
+ */
+int
+tap_nl_init(uint32_t nl_groups)
+{
+ int fd, sndbuf_size = SNDBUF_SIZE, rcvbuf_size = RCVBUF_SIZE;
+ struct sockaddr_nl local = {
+ .nl_family = AF_NETLINK,
+ .nl_groups = nl_groups,
+ };
+
+ fd = socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, NETLINK_ROUTE);
+ if (fd < 0) {
+ TAP_LOG(ERR, "Unable to create a netlink socket");
+ return -1;
+ }
+ if (setsockopt(fd, SOL_SOCKET, SO_SNDBUF, &sndbuf_size, sizeof(int))) {
+ TAP_LOG(ERR, "Unable to set socket buffer send size");
+ close(fd);
+ return -1;
+ }
+ if (setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &rcvbuf_size, sizeof(int))) {
+ TAP_LOG(ERR, "Unable to set socket buffer receive size");
+ close(fd);
+ return -1;
+ }
+ if (bind(fd, (struct sockaddr *)&local, sizeof(local)) < 0) {
+ TAP_LOG(ERR, "Unable to bind to the netlink socket");
+ close(fd);
+ return -1;
+ }
+ return fd;
+}
+
+/**
+ * Clean up a netlink socket once all communicating with the kernel is finished.
+ *
+ * @param[in] nlsk_fd
+ * The netlink socket file descriptor used for communication.
+ *
+ * @return
+ * 0 on success, -1 otherwise.
+ */
+int
+tap_nl_final(int nlsk_fd)
+{
+ if (close(nlsk_fd)) {
+ TAP_LOG(ERR, "Failed to close netlink socket: %s (%d)",
+ strerror(errno), errno);
+ return -1;
+ }
+ return 0;
+}
+
+/**
+ * Send a message to the kernel on the netlink socket.
+ *
+ * @param[in] nlsk_fd
+ * The netlink socket file descriptor used for communication.
+ * @param[in] nh
+ * The netlink message send to the kernel.
+ *
+ * @return
+ * the number of sent bytes on success, -1 otherwise.
+ */
+int
+tap_nl_send(int nlsk_fd, struct nlmsghdr *nh)
+{
+ /* man 7 netlink EXAMPLE */
+ struct sockaddr_nl sa = {
+ .nl_family = AF_NETLINK,
+ };
+ struct iovec iov = {
+ .iov_base = nh,
+ .iov_len = nh->nlmsg_len,
+ };
+ struct msghdr msg = {
+ .msg_name = &sa,
+ .msg_namelen = sizeof(sa),
+ .msg_iov = &iov,
+ .msg_iovlen = 1,
+ };
+ int send_bytes;
+
+ nh->nlmsg_pid = 0; /* communication with the kernel uses pid 0 */
+ nh->nlmsg_seq = (uint32_t)rte_rand();
+ send_bytes = sendmsg(nlsk_fd, &msg, 0);
+ if (send_bytes < 0) {
+ TAP_LOG(ERR, "Failed to send netlink message: %s (%d)",
+ strerror(errno), errno);
+ return -1;
+ }
+ return send_bytes;
+}
+
+/**
+ * Check that the kernel sends an appropriate ACK in response
+ * to an tap_nl_send().
+ *
+ * @param[in] nlsk_fd
+ * The netlink socket file descriptor used for communication.
+ *
+ * @return
+ * 0 on success, -1 otherwise with errno set.
+ */
+int
+tap_nl_recv_ack(int nlsk_fd)
+{
+ return tap_nl_recv(nlsk_fd, NULL, NULL);
+}
+
+/**
+ * Receive a message from the kernel on the netlink socket, following an
+ * tap_nl_send().
+ *
+ * @param[in] nlsk_fd
+ * The netlink socket file descriptor used for communication.
+ * @param[in] cb
+ * The callback function to call for each netlink message received.
+ * @param[in, out] arg
+ * Custom arguments for the callback.
+ *
+ * @return
+ * 0 on success, -1 otherwise with errno set.
+ */
+int
+tap_nl_recv(int nlsk_fd, int (*cb)(struct nlmsghdr *, void *arg), void *arg)
+{
+ /* man 7 netlink EXAMPLE */
+ struct sockaddr_nl sa;
+ char buf[BUF_SIZE];
+ struct iovec iov = {
+ .iov_base = buf,
+ .iov_len = sizeof(buf),
+ };
+ struct msghdr msg = {
+ .msg_name = &sa,
+ .msg_namelen = sizeof(sa),
+ .msg_iov = &iov,
+ /* One message at a time */
+ .msg_iovlen = 1,
+ };
+ int multipart = 0;
+ int ret = 0;
+
+ do {
+ struct nlmsghdr *nh;
+ int recv_bytes = 0;
+
+ recv_bytes = recvmsg(nlsk_fd, &msg, 0);
+ if (recv_bytes < 0)
+ return -1;
+ for (nh = (struct nlmsghdr *)buf;
+ NLMSG_OK(nh, (unsigned int)recv_bytes);
+ nh = NLMSG_NEXT(nh, recv_bytes)) {
+ if (nh->nlmsg_type == NLMSG_ERROR) {
+ struct nlmsgerr *err_data = NLMSG_DATA(nh);
+
+ if (err_data->error < 0) {
+ errno = -err_data->error;
+ return -1;
+ }
+ /* Ack message. */
+ return 0;
+ }
+ /* Multi-part msgs and their trailing DONE message. */
+ if (nh->nlmsg_flags & NLM_F_MULTI) {
+ if (nh->nlmsg_type == NLMSG_DONE)
+ return 0;
+ multipart = 1;
+ }
+ if (cb)
+ ret = cb(nh, arg);
+ }
+ } while (multipart);
+ return ret;
+}
+
+/**
+ * Append a netlink attribute to a message.
+ *
+ * @param[in, out] nh
+ * The netlink message to parse, received from the kernel.
+ * @param[in] type
+ * The type of attribute to append.
+ * @param[in] data_len
+ * The length of the data to append.
+ * @param[in] data
+ * The data to append.
+ */
+void
+tap_nlattr_add(struct nlmsghdr *nh, unsigned short type,
+ unsigned int data_len, const void *data)
+{
+ /* see man 3 rtnetlink */
+ struct rtattr *rta;
+
+ rta = (struct rtattr *)NLMSG_TAIL(nh);
+ rta->rta_len = RTA_LENGTH(data_len);
+ rta->rta_type = type;
+ memcpy(RTA_DATA(rta), data, data_len);
+ nh->nlmsg_len = NLMSG_ALIGN(nh->nlmsg_len) + RTA_ALIGN(rta->rta_len);
+}
+
+/**
+ * Append a uint8_t netlink attribute to a message.
+ *
+ * @param[in, out] nh
+ * The netlink message to parse, received from the kernel.
+ * @param[in] type
+ * The type of attribute to append.
+ * @param[in] data
+ * The data to append.
+ */
+void
+tap_nlattr_add8(struct nlmsghdr *nh, unsigned short type, uint8_t data)
+{
+ tap_nlattr_add(nh, type, sizeof(uint8_t), &data);
+}
+
+/**
+ * Append a uint16_t netlink attribute to a message.
+ *
+ * @param[in, out] nh
+ * The netlink message to parse, received from the kernel.
+ * @param[in] type
+ * The type of attribute to append.
+ * @param[in] data
+ * The data to append.
+ */
+void
+tap_nlattr_add16(struct nlmsghdr *nh, unsigned short type, uint16_t data)
+{
+ tap_nlattr_add(nh, type, sizeof(uint16_t), &data);
+}
+
+/**
+ * Append a uint16_t netlink attribute to a message.
+ *
+ * @param[in, out] nh
+ * The netlink message to parse, received from the kernel.
+ * @param[in] type
+ * The type of attribute to append.
+ * @param[in] data
+ * The data to append.
+ */
+void
+tap_nlattr_add32(struct nlmsghdr *nh, unsigned short type, uint32_t data)
+{
+ tap_nlattr_add(nh, type, sizeof(uint32_t), &data);
+}
+
+/**
+ * Start a nested netlink attribute.
+ * It must be followed later by a call to tap_nlattr_nested_finish().
+ *
+ * @param[in, out] msg
+ * The netlink message where to edit the nested_tails metadata.
+ * @param[in] type
+ * The nested attribute type to append.
+ *
+ * @return
+ * -1 if adding a nested netlink attribute failed, 0 otherwise.
+ */
+int
+tap_nlattr_nested_start(struct nlmsg *msg, uint16_t type)
+{
+ struct nested_tail *tail;
+
+ tail = rte_zmalloc(NULL, sizeof(struct nested_tail), 0);
+ if (!tail) {
+ TAP_LOG(ERR,
+ "Couldn't allocate memory for nested netlink attribute");
+ return -1;
+ }
+
+ tail->tail = (struct rtattr *)NLMSG_TAIL(&msg->nh);
+
+ tap_nlattr_add(&msg->nh, type, 0, NULL);
+
+ tail->prev = msg->nested_tails;
+
+ msg->nested_tails = tail;
+
+ return 0;
+}
+
+/**
+ * End a nested netlink attribute.
+ * It follows a call to tap_nlattr_nested_start().
+ * In effect, it will modify the nested attribute length to include every bytes
+ * from the nested attribute start, up to here.
+ *
+ * @param[in, out] msg
+ * The netlink message where to edit the nested_tails metadata.
+ */
+void
+tap_nlattr_nested_finish(struct nlmsg *msg)
+{
+ struct nested_tail *tail = msg->nested_tails;
+
+ tail->tail->rta_len = (char *)NLMSG_TAIL(&msg->nh) - (char *)tail->tail;
+
+ if (tail->prev)
+ msg->nested_tails = tail->prev;
+
+ rte_free(tail);
+}