diff options
Diffstat (limited to '')
-rw-r--r-- | src/libsystemd/sd-device/device-monitor.c | 929 |
1 files changed, 929 insertions, 0 deletions
diff --git a/src/libsystemd/sd-device/device-monitor.c b/src/libsystemd/sd-device/device-monitor.c new file mode 100644 index 0000000..bb4f9bd --- /dev/null +++ b/src/libsystemd/sd-device/device-monitor.c @@ -0,0 +1,929 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <errno.h> +#include <linux/filter.h> +#include <linux/netlink.h> +#include <linux/sockios.h> +#include <sys/ioctl.h> +#include <unistd.h> + +#include "sd-device.h" +#include "sd-event.h" + +#include "MurmurHash2.h" +#include "alloc-util.h" +#include "device-filter.h" +#include "device-monitor-private.h" +#include "device-private.h" +#include "device-util.h" +#include "errno-util.h" +#include "fd-util.h" +#include "format-util.h" +#include "hashmap.h" +#include "iovec-util.h" +#include "missing_socket.h" +#include "mountpoint-util.h" +#include "set.h" +#include "socket-util.h" +#include "stat-util.h" +#include "string-util.h" +#include "strv.h" +#include "uid-range.h" + +#define log_monitor(m, format, ...) \ + log_debug("sd-device-monitor(%s): " format, strna(m ? m->description : NULL), ##__VA_ARGS__) +#define log_monitor_errno(m, r, format, ...) \ + log_debug_errno(r, "sd-device-monitor(%s): " format, strna(m ? m->description : NULL), ##__VA_ARGS__) +#define log_device_monitor(d, m, format, ...) \ + log_device_debug(d, "sd-device-monitor(%s): " format, strna(m ? m->description : NULL), ##__VA_ARGS__) +#define log_device_monitor_errno(d, m, r, format, ...) \ + log_device_debug_errno(d, r, "sd-device-monitor(%s): " format, strna(m ? m->description : NULL), ##__VA_ARGS__) + +struct sd_device_monitor { + unsigned n_ref; + + int sock; + union sockaddr_union snl; + union sockaddr_union snl_trusted_sender; + bool bound; + + UidRange *mapped_userns_uid_range; + + Hashmap *subsystem_filter; + Set *tag_filter; + Hashmap *match_sysattr_filter; + Hashmap *nomatch_sysattr_filter; + Set *match_parent_filter; + Set *nomatch_parent_filter; + bool filter_uptodate; + + sd_event *event; + sd_event_source *event_source; + char *description; + sd_device_monitor_handler_t callback; + void *userdata; +}; + +#define UDEV_MONITOR_MAGIC 0xfeedcafe + +typedef struct monitor_netlink_header { + /* "libudev" prefix to distinguish libudev and kernel messages */ + char prefix[8]; + /* Magic to protect against daemon <-> Library message format mismatch + * Used in the kernel from socket filter rules; needs to be stored in network order */ + unsigned magic; + /* Total length of header structure known to the sender */ + unsigned header_size; + /* Properties string buffer */ + unsigned properties_off; + unsigned properties_len; + /* Hashes of primary device properties strings, to let libudev subscribers + * use in-kernel socket filters; values need to be stored in network order */ + unsigned filter_subsystem_hash; + unsigned filter_devtype_hash; + unsigned filter_tag_bloom_hi; + unsigned filter_tag_bloom_lo; +} monitor_netlink_header; + +static int monitor_set_nl_address(sd_device_monitor *m) { + union sockaddr_union snl; + socklen_t addrlen; + + assert(m); + + /* Get the address the kernel has assigned us. + * It is usually, but not necessarily the pid. */ + addrlen = sizeof(struct sockaddr_nl); + if (getsockname(m->sock, &snl.sa, &addrlen) < 0) + return -errno; + + m->snl.nl.nl_pid = snl.nl.nl_pid; + return 0; +} + +int device_monitor_allow_unicast_sender(sd_device_monitor *m, sd_device_monitor *sender) { + assert(m); + assert(sender); + + m->snl_trusted_sender.nl.nl_pid = sender->snl.nl.nl_pid; + return 0; +} + +_public_ int sd_device_monitor_set_receive_buffer_size(sd_device_monitor *m, size_t size) { + assert_return(m, -EINVAL); + + return fd_set_rcvbuf(m->sock, size, false); +} + +int device_monitor_disconnect(sd_device_monitor *m) { + assert(m); + + m->sock = safe_close(m->sock); + return 0; +} + +int device_monitor_get_fd(sd_device_monitor *m) { + assert(m); + + return m->sock; +} + +int device_monitor_new_full(sd_device_monitor **ret, MonitorNetlinkGroup group, int fd) { + _cleanup_(sd_device_monitor_unrefp) sd_device_monitor *m = NULL; + _cleanup_close_ int sock = -EBADF; + int r; + + assert(group >= 0 && group < _MONITOR_NETLINK_GROUP_MAX); + assert_return(ret, -EINVAL); + + if (group == MONITOR_GROUP_UDEV && + access("/run/udev/control", F_OK) < 0 && + dev_is_devtmpfs() <= 0) { + + /* + * We do not support subscribing to uevents if no instance of + * udev is running. Uevents would otherwise broadcast the + * processing data of the host into containers, which is not + * desired. + * + * Containers will currently not get any udev uevents, until + * a supporting infrastructure is available. + * + * We do not set a netlink multicast group here, so the socket + * will not receive any messages. + */ + + log_monitor(m, "The udev service seems not to be active, disabling the monitor."); + group = MONITOR_GROUP_NONE; + } + + if (fd < 0) { + sock = socket(AF_NETLINK, SOCK_RAW|SOCK_CLOEXEC|SOCK_NONBLOCK, NETLINK_KOBJECT_UEVENT); + if (sock < 0) + return log_monitor_errno(m, errno, "Failed to create socket: %m"); + } + + m = new(sd_device_monitor, 1); + if (!m) + return -ENOMEM; + + *m = (sd_device_monitor) { + .n_ref = 1, + .sock = fd >= 0 ? fd : TAKE_FD(sock), + .bound = fd >= 0, + .snl.nl.nl_family = AF_NETLINK, + .snl.nl.nl_groups = group, + }; + + if (fd >= 0) { + r = monitor_set_nl_address(m); + if (r < 0) { + log_monitor_errno(m, r, "Failed to set netlink address: %m"); + goto fail; + } + } + + if (DEBUG_LOGGING) { + _cleanup_close_ int netns = -EBADF; + + /* So here's the thing: only AF_NETLINK sockets from the main network namespace will get + * hardware events. Let's check if ours is from there, and if not generate a debug message, + * since we cannot possibly work correctly otherwise. This is just a safety check to make + * things easier to debug. */ + + netns = ioctl(m->sock, SIOCGSKNS); + if (netns < 0) + log_monitor_errno(m, errno, "Unable to get network namespace of udev netlink socket, unable to determine if we are in host netns, ignoring: %m"); + else { + struct stat a, b; + + if (fstat(netns, &a) < 0) { + r = log_monitor_errno(m, errno, "Failed to stat netns of udev netlink socket: %m"); + goto fail; + } + + if (stat("/proc/1/ns/net", &b) < 0) { + if (ERRNO_IS_PRIVILEGE(errno)) + /* If we can't access PID1's netns info due to permissions, it's fine, this is a + * safety check only after all. */ + log_monitor_errno(m, errno, "No permission to stat PID1's netns, unable to determine if we are in host netns, ignoring: %m"); + else + log_monitor_errno(m, errno, "Failed to stat PID1's netns, ignoring: %m"); + + } else if (!stat_inode_same(&a, &b)) + log_monitor(m, "Netlink socket we listen on is not from host netns, we won't see device events."); + } + } + + /* Let's bump the receive buffer size, but only if we are not called via socket activation, as in + * that case the service manager sets the receive buffer size for us, and the value in the .socket + * unit should take full effect. */ + if (fd < 0) { + r = sd_device_monitor_set_receive_buffer_size(m, 128*1024*1024); + if (r < 0) + log_monitor_errno(m, r, "Failed to increase receive buffer size, ignoring: %m"); + } + + *ret = TAKE_PTR(m); + return 0; + +fail: + /* Let's unset the socket fd in the monitor object before we destroy it so that the fd passed in is + * not closed on failure. */ + if (fd >= 0) + m->sock = -1; + + return r; +} + +_public_ int sd_device_monitor_new(sd_device_monitor **ret) { + return device_monitor_new_full(ret, MONITOR_GROUP_UDEV, -1); +} + +_public_ int sd_device_monitor_stop(sd_device_monitor *m) { + assert_return(m, -EINVAL); + + m->event_source = sd_event_source_unref(m->event_source); + (void) device_monitor_disconnect(m); + + return 0; +} + +static int device_monitor_event_handler(sd_event_source *s, int fd, uint32_t revents, void *userdata) { + _cleanup_(sd_device_unrefp) sd_device *device = NULL; + _unused_ _cleanup_(log_context_unrefp) LogContext *c = NULL; + sd_device_monitor *m = ASSERT_PTR(userdata); + + if (device_monitor_receive_device(m, &device) <= 0) + return 0; + + if (log_context_enabled()) + c = log_context_new_strv_consume(device_make_log_fields(device)); + + if (m->callback) + return m->callback(m, device, m->userdata); + + return 0; +} + +_public_ int sd_device_monitor_start(sd_device_monitor *m, sd_device_monitor_handler_t callback, void *userdata) { + int r; + + assert_return(m, -EINVAL); + + if (!m->event) { + r = sd_device_monitor_attach_event(m, NULL); + if (r < 0) + return r; + } + + r = device_monitor_enable_receiving(m); + if (r < 0) + return r; + + m->callback = callback; + m->userdata = userdata; + + r = sd_event_add_io(m->event, &m->event_source, m->sock, EPOLLIN, device_monitor_event_handler, m); + if (r < 0) + return r; + + (void) sd_event_source_set_description(m->event_source, m->description ?: "sd-device-monitor"); + + return 0; +} + +_public_ int sd_device_monitor_detach_event(sd_device_monitor *m) { + assert_return(m, -EINVAL); + + (void) sd_device_monitor_stop(m); + m->event = sd_event_unref(m->event); + + return 0; +} + +_public_ int sd_device_monitor_attach_event(sd_device_monitor *m, sd_event *event) { + int r; + + assert_return(m, -EINVAL); + assert_return(!m->event, -EBUSY); + + if (event) + m->event = sd_event_ref(event); + else { + r = sd_event_default(&m->event); + if (r < 0) + return r; + } + + return 0; +} + +_public_ sd_event *sd_device_monitor_get_event(sd_device_monitor *m) { + assert_return(m, NULL); + + return m->event; +} + +_public_ sd_event_source *sd_device_monitor_get_event_source(sd_device_monitor *m) { + assert_return(m, NULL); + + return m->event_source; +} + +_public_ int sd_device_monitor_set_description(sd_device_monitor *m, const char *description) { + int r; + + assert_return(m, -EINVAL); + + r = free_and_strdup(&m->description, description); + if (r <= 0) + return r; + + if (m->event_source) + (void) sd_event_source_set_description(m->event_source, description); + + return r; +} + +_public_ int sd_device_monitor_get_description(sd_device_monitor *m, const char **ret) { + assert_return(m, -EINVAL); + assert_return(ret, -EINVAL); + + *ret = m->description; + return 0; +} + +int device_monitor_enable_receiving(sd_device_monitor *m) { + int r; + + assert(m); + + r = sd_device_monitor_filter_update(m); + if (r < 0) + return log_monitor_errno(m, r, "Failed to update filter: %m"); + + if (!m->bound) { + /* enable receiving of sender credentials */ + r = setsockopt_int(m->sock, SOL_SOCKET, SO_PASSCRED, true); + if (r < 0) + return log_monitor_errno(m, r, "Failed to set socket option SO_PASSCRED: %m"); + + if (bind(m->sock, &m->snl.sa, sizeof(struct sockaddr_nl)) < 0) + return log_monitor_errno(m, errno, "Failed to bind monitoring socket: %m"); + + m->bound = true; + + r = monitor_set_nl_address(m); + if (r < 0) + return log_monitor_errno(m, r, "Failed to set address: %m"); + } + + return 0; +} + +static sd_device_monitor *device_monitor_free(sd_device_monitor *m) { + assert(m); + + (void) sd_device_monitor_detach_event(m); + + uid_range_free(m->mapped_userns_uid_range); + free(m->description); + hashmap_free(m->subsystem_filter); + set_free(m->tag_filter); + hashmap_free(m->match_sysattr_filter); + hashmap_free(m->nomatch_sysattr_filter); + set_free(m->match_parent_filter); + set_free(m->nomatch_parent_filter); + + return mfree(m); +} + +DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(sd_device_monitor, sd_device_monitor, device_monitor_free); + +static int check_subsystem_filter(sd_device_monitor *m, sd_device *device) { + const char *s, *subsystem, *d, *devtype = NULL; + int r; + + assert(m); + assert(device); + + if (hashmap_isempty(m->subsystem_filter)) + return true; + + r = sd_device_get_subsystem(device, &subsystem); + if (r < 0) + return r; + + r = sd_device_get_devtype(device, &devtype); + if (r < 0 && r != -ENOENT) + return r; + + HASHMAP_FOREACH_KEY(d, s, m->subsystem_filter) { + if (!streq(s, subsystem)) + continue; + + if (!d || streq_ptr(d, devtype)) + return true; + } + + return false; +} + +static bool check_tag_filter(sd_device_monitor *m, sd_device *device) { + const char *tag; + + assert(m); + assert(device); + + if (set_isempty(m->tag_filter)) + return true; + + SET_FOREACH(tag, m->tag_filter) + if (sd_device_has_tag(device, tag) > 0) + return true; + + return false; +} + +static int passes_filter(sd_device_monitor *m, sd_device *device) { + int r; + + assert(m); + assert(device); + + r = check_subsystem_filter(m, device); + if (r <= 0) + return r; + + if (!check_tag_filter(m, device)) + return false; + + if (!device_match_sysattr(device, m->match_sysattr_filter, m->nomatch_sysattr_filter)) + return false; + + return device_match_parent(device, m->match_parent_filter, m->nomatch_parent_filter); +} + +static bool check_sender_uid(sd_device_monitor *m, uid_t uid) { + int r; + + assert(m); + + /* Always trust messages from uid 0. */ + if (uid == 0) + return true; + + /* Trust messages sent by the same UID we are running. Currently, such situation happens only for + * unicast messages. */ + if (uid == getuid() || uid == geteuid()) + return true; + + if (!m->mapped_userns_uid_range) { + r = uid_range_load_userns(&m->mapped_userns_uid_range, NULL); + if (r < 0) + log_monitor_errno(m, r, "Failed to load UID ranges mapped to the current user namespace, ignoring: %m"); + } + + /* Trust messages come from outside of the current user namespace. */ + if (!uid_range_contains(m->mapped_userns_uid_range, uid)) + return true; + + /* Otherwise, refuse messages. */ + return false; +} + +int device_monitor_receive_device(sd_device_monitor *m, sd_device **ret) { + _cleanup_(sd_device_unrefp) sd_device *device = NULL; + _cleanup_free_ uint8_t *buf_alloc = NULL; + union { + monitor_netlink_header *nlh; + char *nulstr; + uint8_t *buf; + } message; + struct iovec iov; + CMSG_BUFFER_TYPE(CMSG_SPACE(sizeof(struct ucred))) control; + union sockaddr_union snl; + struct msghdr smsg = { + .msg_iov = &iov, + .msg_iovlen = 1, + .msg_control = &control, + .msg_controllen = sizeof(control), + .msg_name = &snl, + .msg_namelen = sizeof(snl), + }; + struct ucred *cred; + size_t offset; + ssize_t n; + bool is_initialized = false; + int r; + + assert(m); + assert(ret); + + n = next_datagram_size_fd(m->sock); + if (n < 0) { + if (!ERRNO_IS_TRANSIENT(n)) + log_monitor_errno(m, n, "Failed to get the received message size: %m"); + return n; + } + + if ((size_t) n < ALLOCA_MAX / sizeof(uint8_t) / 2) + message.buf = newa(uint8_t, n); + else { + buf_alloc = new(uint8_t, n); + if (!buf_alloc) + return log_oom_debug(); + + message.buf = buf_alloc; + } + + iov = IOVEC_MAKE(message.buf, n); + + n = recvmsg(m->sock, &smsg, 0); + if (n < 0) { + if (!ERRNO_IS_TRANSIENT(errno)) + log_monitor_errno(m, errno, "Failed to receive message: %m"); + return -errno; + } + + if (smsg.msg_flags & MSG_TRUNC) + return log_monitor_errno(m, SYNTHETIC_ERRNO(EINVAL), "Received truncated message, ignoring message."); + + if (n < 32) + return log_monitor_errno(m, SYNTHETIC_ERRNO(EINVAL), "Invalid message length (%zi), ignoring message.", n); + + if (snl.nl.nl_groups == MONITOR_GROUP_NONE) { + /* unicast message, check if we trust the sender */ + if (m->snl_trusted_sender.nl.nl_pid == 0 || + snl.nl.nl_pid != m->snl_trusted_sender.nl.nl_pid) + return log_monitor_errno(m, SYNTHETIC_ERRNO(EAGAIN), + "Unicast netlink message ignored."); + + } else if (snl.nl.nl_groups == MONITOR_GROUP_KERNEL) { + if (snl.nl.nl_pid > 0) + return log_monitor_errno(m, SYNTHETIC_ERRNO(EAGAIN), + "Multicast kernel netlink message from PID %"PRIu32" ignored.", + snl.nl.nl_pid); + } + + cred = CMSG_FIND_DATA(&smsg, SOL_SOCKET, SCM_CREDENTIALS, struct ucred); + if (!cred) + return log_monitor_errno(m, SYNTHETIC_ERRNO(EAGAIN), + "No sender credentials received, ignoring message."); + + if (!check_sender_uid(m, cred->uid)) + return log_monitor_errno(m, SYNTHETIC_ERRNO(EAGAIN), + "Sender uid="UID_FMT", message ignored.", cred->uid); + + if (!memchr(message.buf, 0, n)) + return log_monitor_errno(m, SYNTHETIC_ERRNO(EAGAIN), "Received message without NUL, ignoring message."); + + if (streq(message.nulstr, "libudev")) { + /* udev message needs proper version magic */ + if (message.nlh->magic != htobe32(UDEV_MONITOR_MAGIC)) + return log_monitor_errno(m, SYNTHETIC_ERRNO(EAGAIN), + "Invalid message signature (%x != %x).", + message.nlh->magic, htobe32(UDEV_MONITOR_MAGIC)); + + if (message.nlh->properties_off + 32 > (size_t) n) + return log_monitor_errno(m, SYNTHETIC_ERRNO(EAGAIN), + "Invalid offset for properties (%u > %zi).", + message.nlh->properties_off + 32, n); + + offset = message.nlh->properties_off; + + /* devices received from udev are always initialized */ + is_initialized = true; + + } else { + /* check kernel message header */ + if (!strstr(message.nulstr, "@/")) + return log_monitor_errno(m, SYNTHETIC_ERRNO(EAGAIN), "Invalid message header."); + + offset = strlen(message.nulstr) + 1; + if (offset >= (size_t) n) + return log_monitor_errno(m, SYNTHETIC_ERRNO(EAGAIN), "Invalid message length."); + } + + r = device_new_from_nulstr(&device, message.nulstr + offset, n - offset); + if (r < 0) + return log_monitor_errno(m, r, "Failed to create device from received message: %m"); + + if (is_initialized) + device_set_is_initialized(device); + + /* Skip device, if it does not pass the current filter */ + r = passes_filter(m, device); + if (r < 0) + return log_device_monitor_errno(device, m, r, "Failed to check received device passing filter: %m"); + if (r == 0) + log_device_monitor(device, m, "Received device does not pass filter, ignoring."); + else + *ret = TAKE_PTR(device); + + return r; +} + +static uint32_t string_hash32(const char *str) { + return MurmurHash2(str, strlen(str), 0); +} + +/* Get a bunch of bit numbers out of the hash, and set the bits in our bit field */ +static uint64_t string_bloom64(const char *str) { + uint64_t bits = 0; + uint32_t hash = string_hash32(str); + + bits |= UINT64_C(1) << (hash & 63); + bits |= UINT64_C(1) << ((hash >> 6) & 63); + bits |= UINT64_C(1) << ((hash >> 12) & 63); + bits |= UINT64_C(1) << ((hash >> 18) & 63); + return bits; +} + +int device_monitor_send_device( + sd_device_monitor *m, + sd_device_monitor *destination, + sd_device *device) { + + monitor_netlink_header nlh = { + .prefix = "libudev", + .magic = htobe32(UDEV_MONITOR_MAGIC), + .header_size = sizeof nlh, + }; + struct iovec iov[2] = { + { .iov_base = &nlh, .iov_len = sizeof nlh }, + }; + struct msghdr smsg = { + .msg_iov = iov, + .msg_iovlen = 2, + }; + /* default destination for sending */ + union sockaddr_union default_destination = { + .nl.nl_family = AF_NETLINK, + .nl.nl_groups = MONITOR_GROUP_UDEV, + }; + uint64_t tag_bloom_bits; + const char *buf, *val; + ssize_t count; + size_t blen; + int r; + + assert(m); + assert(device); + + r = device_get_properties_nulstr(device, &buf, &blen); + if (r < 0) + return log_device_monitor_errno(device, m, r, "Failed to get device properties: %m"); + if (blen < 32) + return log_device_monitor_errno(device, m, SYNTHETIC_ERRNO(EINVAL), + "Length of device property nulstr is too small to contain valid device information."); + + /* fill in versioned header */ + r = sd_device_get_subsystem(device, &val); + if (r < 0) + return log_device_monitor_errno(device, m, r, "Failed to get device subsystem: %m"); + nlh.filter_subsystem_hash = htobe32(string_hash32(val)); + + if (sd_device_get_devtype(device, &val) >= 0) + nlh.filter_devtype_hash = htobe32(string_hash32(val)); + + /* add tag bloom filter */ + tag_bloom_bits = 0; + FOREACH_DEVICE_TAG(device, tag) + tag_bloom_bits |= string_bloom64(tag); + + if (tag_bloom_bits > 0) { + nlh.filter_tag_bloom_hi = htobe32(tag_bloom_bits >> 32); + nlh.filter_tag_bloom_lo = htobe32(tag_bloom_bits & 0xffffffff); + } + + /* add properties list */ + nlh.properties_off = iov[0].iov_len; + nlh.properties_len = blen; + iov[1] = IOVEC_MAKE((char*) buf, blen); + + /* + * Use custom address for target, or the default one. + * + * If we send to a multicast group, we will get + * ECONNREFUSED, which is expected. + */ + smsg.msg_name = destination ? &destination->snl : &default_destination; + smsg.msg_namelen = sizeof(struct sockaddr_nl); + count = sendmsg(m->sock, &smsg, 0); + if (count < 0) { + if (!destination && errno == ECONNREFUSED) { + log_device_monitor(device, m, "Passed to netlink monitor."); + return 0; + } else + return log_device_monitor_errno(device, m, errno, "Failed to send device to netlink monitor: %m"); + } + + log_device_monitor(device, m, "Passed %zi byte to netlink monitor.", count); + return count; +} + +static void bpf_stmt(struct sock_filter *ins, unsigned *i, + unsigned short code, unsigned data) { + ins[(*i)++] = (struct sock_filter) { + .code = code, + .k = data, + }; +} + +static void bpf_jmp(struct sock_filter *ins, unsigned *i, + unsigned short code, unsigned data, + unsigned short jt, unsigned short jf) { + ins[(*i)++] = (struct sock_filter) { + .code = code, + .jt = jt, + .jf = jf, + .k = data, + }; +} + +_public_ int sd_device_monitor_filter_update(sd_device_monitor *m) { + struct sock_filter ins[512] = {}; + struct sock_fprog filter; + const char *subsystem, *devtype, *tag; + unsigned i = 0; + + assert_return(m, -EINVAL); + + if (m->filter_uptodate) + return 0; + + if (m->snl.nl.nl_groups == MONITOR_GROUP_KERNEL || + (hashmap_isempty(m->subsystem_filter) && + set_isempty(m->tag_filter))) { + m->filter_uptodate = true; + return 0; + } + + /* load magic in A */ + bpf_stmt(ins, &i, BPF_LD|BPF_W|BPF_ABS, offsetof(monitor_netlink_header, magic)); + /* jump if magic matches */ + bpf_jmp(ins, &i, BPF_JMP|BPF_JEQ|BPF_K, UDEV_MONITOR_MAGIC, 1, 0); + /* wrong magic, pass packet */ + bpf_stmt(ins, &i, BPF_RET|BPF_K, 0xffffffff); + + if (!set_isempty(m->tag_filter)) { + int tag_matches = set_size(m->tag_filter); + + /* add all tags matches */ + SET_FOREACH(tag, m->tag_filter) { + uint64_t tag_bloom_bits = string_bloom64(tag); + uint32_t tag_bloom_hi = tag_bloom_bits >> 32; + uint32_t tag_bloom_lo = tag_bloom_bits & 0xffffffff; + + /* load device bloom bits in A */ + bpf_stmt(ins, &i, BPF_LD|BPF_W|BPF_ABS, offsetof(monitor_netlink_header, filter_tag_bloom_hi)); + /* clear bits (tag bits & bloom bits) */ + bpf_stmt(ins, &i, BPF_ALU|BPF_AND|BPF_K, tag_bloom_hi); + /* jump to next tag if it does not match */ + bpf_jmp(ins, &i, BPF_JMP|BPF_JEQ|BPF_K, tag_bloom_hi, 0, 3); + + /* load device bloom bits in A */ + bpf_stmt(ins, &i, BPF_LD|BPF_W|BPF_ABS, offsetof(monitor_netlink_header, filter_tag_bloom_lo)); + /* clear bits (tag bits & bloom bits) */ + bpf_stmt(ins, &i, BPF_ALU|BPF_AND|BPF_K, tag_bloom_lo); + /* jump behind end of tag match block if tag matches */ + tag_matches--; + bpf_jmp(ins, &i, BPF_JMP|BPF_JEQ|BPF_K, tag_bloom_lo, 1 + (tag_matches * 6), 0); + } + + /* nothing matched, drop packet */ + bpf_stmt(ins, &i, BPF_RET|BPF_K, 0); + } + + /* add all subsystem matches */ + if (!hashmap_isempty(m->subsystem_filter)) { + HASHMAP_FOREACH_KEY(devtype, subsystem, m->subsystem_filter) { + uint32_t hash = string_hash32(subsystem); + + /* load device subsystem value in A */ + bpf_stmt(ins, &i, BPF_LD|BPF_W|BPF_ABS, offsetof(monitor_netlink_header, filter_subsystem_hash)); + if (!devtype) { + /* jump if subsystem does not match */ + bpf_jmp(ins, &i, BPF_JMP|BPF_JEQ|BPF_K, hash, 0, 1); + } else { + /* jump if subsystem does not match */ + bpf_jmp(ins, &i, BPF_JMP|BPF_JEQ|BPF_K, hash, 0, 3); + /* load device devtype value in A */ + bpf_stmt(ins, &i, BPF_LD|BPF_W|BPF_ABS, offsetof(monitor_netlink_header, filter_devtype_hash)); + /* jump if value does not match */ + hash = string_hash32(devtype); + bpf_jmp(ins, &i, BPF_JMP|BPF_JEQ|BPF_K, hash, 0, 1); + } + + /* matched, pass packet */ + bpf_stmt(ins, &i, BPF_RET|BPF_K, 0xffffffff); + + if (i+1 >= ELEMENTSOF(ins)) + return -E2BIG; + } + + /* nothing matched, drop packet */ + bpf_stmt(ins, &i, BPF_RET|BPF_K, 0); + } + + /* matched, pass packet */ + bpf_stmt(ins, &i, BPF_RET|BPF_K, 0xffffffff); + + /* install filter */ + filter = (struct sock_fprog) { + .len = i, + .filter = ins, + }; + if (setsockopt(m->sock, SOL_SOCKET, SO_ATTACH_FILTER, &filter, sizeof(filter)) < 0) + return -errno; + + m->filter_uptodate = true; + return 0; +} + +_public_ int sd_device_monitor_filter_add_match_subsystem_devtype(sd_device_monitor *m, const char *subsystem, const char *devtype) { + int r; + + assert_return(m, -EINVAL); + assert_return(subsystem, -EINVAL); + + /* Do not use string_has_ops_free_free or hashmap_put_strdup() here, as this may be called + * multiple times with the same subsystem but different devtypes. */ + r = hashmap_put_strdup_full(&m->subsystem_filter, &trivial_hash_ops_free_free, subsystem, devtype); + if (r <= 0) + return r; + + m->filter_uptodate = false; + return r; +} + +_public_ int sd_device_monitor_filter_add_match_tag(sd_device_monitor *m, const char *tag) { + int r; + + assert_return(m, -EINVAL); + assert_return(tag, -EINVAL); + + r = set_put_strdup(&m->tag_filter, tag); + if (r <= 0) + return r; + + m->filter_uptodate = false; + return r; +} + +_public_ int sd_device_monitor_filter_add_match_sysattr(sd_device_monitor *m, const char *sysattr, const char *value, int match) { + Hashmap **hashmap; + + assert_return(m, -EINVAL); + assert_return(sysattr, -EINVAL); + + if (match) + hashmap = &m->match_sysattr_filter; + else + hashmap = &m->nomatch_sysattr_filter; + + /* TODO: unset m->filter_uptodate on success when we support this filter on BPF. */ + return update_match_strv(hashmap, sysattr, value, /* clear_on_null = */ true); +} + +_public_ int sd_device_monitor_filter_add_match_parent(sd_device_monitor *m, sd_device *device, int match) { + const char *syspath; + Set **set; + int r; + + assert_return(m, -EINVAL); + assert_return(device, -EINVAL); + + r = sd_device_get_syspath(device, &syspath); + if (r < 0) + return r; + + if (match) + set = &m->match_parent_filter; + else + set = &m->nomatch_parent_filter; + + /* TODO: unset m->filter_uptodate on success when we support this filter on BPF. */ + return set_put_strdup(set, syspath); +} + +_public_ int sd_device_monitor_filter_remove(sd_device_monitor *m) { + static const struct sock_fprog filter = { 0, NULL }; + + assert_return(m, -EINVAL); + + m->subsystem_filter = hashmap_free(m->subsystem_filter); + m->tag_filter = set_free(m->tag_filter); + m->match_sysattr_filter = hashmap_free(m->match_sysattr_filter); + m->nomatch_sysattr_filter = hashmap_free(m->nomatch_sysattr_filter); + m->match_parent_filter = set_free(m->match_parent_filter); + m->nomatch_parent_filter = set_free(m->nomatch_parent_filter); + + if (setsockopt(m->sock, SOL_SOCKET, SO_DETACH_FILTER, &filter, sizeof(filter)) < 0) + return -errno; + + m->filter_uptodate = true; + return 0; +} |