/* SPDX-License-Identifier: GPL-2.0-or-later */ #include #include #include #include "alloc-util.h" #include "blockdev-util.h" #include "common-signal.h" #include "device-monitor-private.h" #include "device-private.h" #include "device-util.h" #include "errno-util.h" #include "fd-util.h" #include "io-util.h" #include "path-util.h" #include "process-util.h" #include "signal-util.h" #include "string-util.h" #include "udev-event.h" #include "udev-spawn.h" #include "udev-trace.h" #include "udev-util.h" #include "udev-watch.h" #include "udev-worker.h" void udev_worker_done(UdevWorker *worker) { assert(worker); sd_event_unref(worker->event); sd_netlink_unref(worker->rtnl); sd_device_monitor_unref(worker->monitor); hashmap_free(worker->properties); udev_rules_free(worker->rules); safe_close(worker->pipe_fd); } int udev_get_whole_disk(sd_device *dev, sd_device **ret_device, const char **ret_devname) { const char *val; int r; assert(dev); if (device_for_action(dev, SD_DEVICE_REMOVE)) goto irrelevant; r = sd_device_get_sysname(dev, &val); if (r < 0) return log_device_debug_errno(dev, r, "Failed to get sysname: %m"); /* Exclude the following devices: * For "dm-", see the comment added by e918a1b5a94f270186dca59156354acd2a596494. * For "md", see the commit message of 2e5b17d01347d3c3118be2b8ad63d20415dbb1f0, * but not sure the assumption is still valid even when partitions are created on the md * devices, surprisingly which seems to be possible, see PR #22973. * For "drbd", see the commit message of fee854ee8ccde0cd28e0f925dea18cce35f3993d. */ if (STARTSWITH_SET(val, "dm-", "md", "drbd")) goto irrelevant; r = block_device_get_whole_disk(dev, &dev); if (IN_SET(r, -ENOTBLK, /* The device is not a block device. */ -ENODEV /* The whole disk device was not found, it may already be removed. */)) goto irrelevant; if (r < 0) return log_device_debug_errno(dev, r, "Failed to get whole disk device: %m"); r = sd_device_get_devname(dev, &val); if (r < 0) return log_device_debug_errno(dev, r, "Failed to get devname: %m"); if (ret_device) *ret_device = dev; if (ret_devname) *ret_devname = val; return 1; irrelevant: if (ret_device) *ret_device = NULL; if (ret_devname) *ret_devname = NULL; return 0; } static int worker_lock_whole_disk(sd_device *dev, int *ret_fd) { _cleanup_close_ int fd = -EBADF; sd_device *dev_whole_disk; const char *val; int r; assert(dev); assert(ret_fd); /* Take a shared lock on the device node; this establishes a concept of device "ownership" to * serialize device access. External processes holding an exclusive lock will cause udev to skip the * event handling; in the case udev acquired the lock, the external process can block until udev has * finished its event handling. */ r = udev_get_whole_disk(dev, &dev_whole_disk, &val); if (r < 0) return r; if (r == 0) goto nolock; fd = sd_device_open(dev_whole_disk, O_RDONLY|O_CLOEXEC|O_NONBLOCK|O_NOCTTY); if (fd < 0) { bool ignore = ERRNO_IS_DEVICE_ABSENT(fd); log_device_debug_errno(dev, fd, "Failed to open '%s'%s: %m", val, ignore ? ", ignoring" : ""); if (!ignore) return fd; goto nolock; } if (flock(fd, LOCK_SH|LOCK_NB) < 0) return log_device_debug_errno(dev, errno, "Failed to flock(%s): %m", val); *ret_fd = TAKE_FD(fd); return 1; nolock: *ret_fd = -EBADF; return 0; } static int worker_mark_block_device_read_only(sd_device *dev) { _cleanup_close_ int fd = -EBADF; const char *val; int state = 1, r; assert(dev); /* Do this only once, when the block device is new. If the device is later retriggered let's not * toggle the bit again, so that people can boot up with full read-only mode and then unset the bit * for specific devices only. */ if (!device_for_action(dev, SD_DEVICE_ADD)) return 0; r = sd_device_get_subsystem(dev, &val); if (r < 0) return log_device_debug_errno(dev, r, "Failed to get subsystem: %m"); if (!streq(val, "block")) return 0; r = sd_device_get_sysname(dev, &val); if (r < 0) return log_device_debug_errno(dev, r, "Failed to get sysname: %m"); /* Exclude synthetic devices for now, this is supposed to be a safety feature to avoid modification * of physical devices, and what sits on top of those doesn't really matter if we don't allow the * underlying block devices to receive changes. */ if (STARTSWITH_SET(val, "dm-", "md", "drbd", "loop", "nbd", "zram")) return 0; fd = sd_device_open(dev, O_RDONLY|O_CLOEXEC|O_NONBLOCK|O_NOCTTY); if (fd < 0) return log_device_debug_errno(dev, fd, "Failed to open '%s', ignoring: %m", val); if (ioctl(fd, BLKROSET, &state) < 0) return log_device_warning_errno(dev, errno, "Failed to mark block device '%s' read-only: %m", val); log_device_info(dev, "Successfully marked block device '%s' read-only.", val); return 0; } static int worker_process_device(UdevWorker *worker, sd_device *dev) { _cleanup_(udev_event_freep) UdevEvent *udev_event = NULL; _cleanup_close_ int fd_lock = -EBADF; int r; assert(worker); assert(dev); log_device_uevent(dev, "Processing device"); udev_event = udev_event_new(dev, worker->exec_delay_usec, worker->rtnl, worker->log_level); if (!udev_event) return -ENOMEM; /* If this is a block device and the device is locked currently via the BSD advisory locks, * someone else is using it exclusively. We don't run our udev rules now to not interfere. * Instead of processing the event, we requeue the event and will try again after a delay. * * The user-facing side of this: https://systemd.io/BLOCK_DEVICE_LOCKING */ r = worker_lock_whole_disk(dev, &fd_lock); if (r == -EAGAIN) return EVENT_RESULT_TRY_AGAIN; if (r < 0) return r; if (worker->blockdev_read_only) (void) worker_mark_block_device_read_only(dev); /* apply rules, create node, symlinks */ r = udev_event_execute_rules( udev_event, worker->inotify_fd, worker->timeout_usec, worker->timeout_signal, worker->properties, worker->rules); if (r < 0) return r; udev_event_execute_run(udev_event, worker->timeout_usec, worker->timeout_signal); if (!worker->rtnl) /* in case rtnl was initialized */ worker->rtnl = sd_netlink_ref(udev_event->rtnl); if (udev_event->inotify_watch) { r = udev_watch_begin(worker->inotify_fd, dev); if (r < 0 && r != -ENOENT) /* The device may be already removed, ignore -ENOENT. */ log_device_warning_errno(dev, r, "Failed to add inotify watch, ignoring: %m"); } log_device_uevent(dev, "Device processed"); return 0; } void udev_broadcast_result(sd_device_monitor *monitor, sd_device *dev, EventResult result) { int r; assert(dev); /* On exit, manager->monitor is already NULL. */ if (!monitor) return; if (result != EVENT_RESULT_SUCCESS) { (void) device_add_property(dev, "UDEV_WORKER_FAILED", "1"); switch (result) { case EVENT_RESULT_NERRNO_MIN ... EVENT_RESULT_NERRNO_MAX: { const char *str; (void) device_add_propertyf(dev, "UDEV_WORKER_ERRNO", "%i", -result); str = errno_to_name(result); if (str) (void) device_add_property(dev, "UDEV_WORKER_ERRNO_NAME", str); break; } case EVENT_RESULT_EXIT_STATUS_BASE ... EVENT_RESULT_EXIT_STATUS_MAX: (void) device_add_propertyf(dev, "UDEV_WORKER_EXIT_STATUS", "%i", result - EVENT_RESULT_EXIT_STATUS_BASE); break; case EVENT_RESULT_TRY_AGAIN: assert_not_reached(); break; case EVENT_RESULT_SIGNAL_BASE ... EVENT_RESULT_SIGNAL_MAX: { const char *str; (void) device_add_propertyf(dev, "UDEV_WORKER_SIGNAL", "%i", result - EVENT_RESULT_SIGNAL_BASE); str = signal_to_string(result - EVENT_RESULT_SIGNAL_BASE); if (str) (void) device_add_property(dev, "UDEV_WORKER_SIGNAL_NAME", str); break; } default: log_device_warning(dev, "Unknown event result \"%i\", ignoring.", result); } } r = device_monitor_send_device(monitor, NULL, dev); if (r < 0) log_device_warning_errno(dev, r, "Failed to broadcast event to libudev listeners, ignoring: %m"); } static int worker_send_result(UdevWorker *worker, EventResult result) { assert(worker); assert(worker->pipe_fd >= 0); return loop_write(worker->pipe_fd, &result, sizeof(result)); } static int worker_device_monitor_handler(sd_device_monitor *monitor, sd_device *dev, void *userdata) { UdevWorker *worker = ASSERT_PTR(userdata); int r; assert(dev); r = worker_process_device(worker, dev); if (r == EVENT_RESULT_TRY_AGAIN) /* if we couldn't acquire the flock(), then requeue the event */ log_device_debug(dev, "Block device is currently locked, requeueing the event."); else { if (r < 0) log_device_warning_errno(dev, r, "Failed to process device, ignoring: %m"); /* send processed event back to libudev listeners */ udev_broadcast_result(monitor, dev, r); } /* send udevd the result of the event execution */ r = worker_send_result(worker, r); if (r < 0) log_device_warning_errno(dev, r, "Failed to send signal to main daemon, ignoring: %m"); /* Reset the log level, as it might be changed by "OPTIONS=log_level=". */ log_set_max_level(worker->log_level); return 1; } int udev_worker_main(UdevWorker *worker, sd_device *dev) { int r; assert(worker); assert(worker->monitor); assert(dev); DEVICE_TRACE_POINT(worker_spawned, dev, getpid_cached()); assert_se(sigprocmask_many(SIG_BLOCK, NULL, SIGTERM, -1) >= 0); /* Reset OOM score, we only protect the main daemon. */ r = set_oom_score_adjust(0); if (r < 0) log_debug_errno(r, "Failed to reset OOM score, ignoring: %m"); r = sd_event_new(&worker->event); if (r < 0) return log_error_errno(r, "Failed to allocate event loop: %m"); r = sd_event_add_signal(worker->event, NULL, SIGTERM, NULL, NULL); if (r < 0) return log_error_errno(r, "Failed to set SIGTERM event: %m"); r = sd_device_monitor_attach_event(worker->monitor, worker->event); if (r < 0) return log_error_errno(r, "Failed to attach event loop to device monitor: %m"); r = sd_device_monitor_start(worker->monitor, worker_device_monitor_handler, worker); if (r < 0) return log_error_errno(r, "Failed to start device monitor: %m"); /* Process first device */ (void) worker_device_monitor_handler(worker->monitor, dev, worker); r = sd_event_loop(worker->event); if (r < 0) return log_error_errno(r, "Event loop failed: %m"); return 0; }