summaryrefslogtreecommitdiffstats
path: root/src/udev/udev-worker.c
blob: 53722b21bd6dc156a3e91b5e066c6e0255c871d0 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
/* SPDX-License-Identifier: GPL-2.0-or-later */

#include <sys/file.h>
#include <sys/ioctl.h>
#include <sys/mount.h>

#include "alloc-util.h"
#include "blockdev-util.h"
#include "common-signal.h"
#include "device-monitor-private.h"
#include "device-private.h"
#include "device-util.h"
#include "errno-util.h"
#include "fd-util.h"
#include "io-util.h"
#include "path-util.h"
#include "process-util.h"
#include "signal-util.h"
#include "string-util.h"
#include "udev-event.h"
#include "udev-spawn.h"
#include "udev-trace.h"
#include "udev-util.h"
#include "udev-watch.h"
#include "udev-worker.h"

void udev_worker_done(UdevWorker *worker) {
        assert(worker);

        sd_event_unref(worker->event);
        sd_netlink_unref(worker->rtnl);
        sd_device_monitor_unref(worker->monitor);
        hashmap_free(worker->properties);
        udev_rules_free(worker->rules);
        safe_close(worker->pipe_fd);
}

int udev_get_whole_disk(sd_device *dev, sd_device **ret_device, const char **ret_devname) {
        const char *val;
        int r;

        assert(dev);

        if (device_for_action(dev, SD_DEVICE_REMOVE))
                goto irrelevant;

        r = sd_device_get_sysname(dev, &val);
        if (r < 0)
                return log_device_debug_errno(dev, r, "Failed to get sysname: %m");

        /* Exclude the following devices:
         * For "dm-", see the comment added by e918a1b5a94f270186dca59156354acd2a596494.
         * For "md", see the commit message of 2e5b17d01347d3c3118be2b8ad63d20415dbb1f0,
         * but not sure the assumption is still valid even when partitions are created on the md
         * devices, surprisingly which seems to be possible, see PR #22973.
         * For "drbd", see the commit message of fee854ee8ccde0cd28e0f925dea18cce35f3993d. */
        if (STARTSWITH_SET(val, "dm-", "md", "drbd"))
                goto irrelevant;

        r = block_device_get_whole_disk(dev, &dev);
        if (IN_SET(r,
                   -ENOTBLK, /* The device is not a block device. */
                   -ENODEV   /* The whole disk device was not found, it may already be removed. */))
                goto irrelevant;
        if (r < 0)
                return log_device_debug_errno(dev, r, "Failed to get whole disk device: %m");

        r = sd_device_get_devname(dev, &val);
        if (r < 0)
                return log_device_debug_errno(dev, r, "Failed to get devname: %m");

        if (ret_device)
                *ret_device = dev;
        if (ret_devname)
                *ret_devname = val;
        return 1;

irrelevant:
        if (ret_device)
                *ret_device = NULL;
        if (ret_devname)
                *ret_devname = NULL;
        return 0;
}

static int worker_lock_whole_disk(sd_device *dev, int *ret_fd) {
        _cleanup_close_ int fd = -EBADF;
        sd_device *dev_whole_disk;
        const char *val;
        int r;

        assert(dev);
        assert(ret_fd);

        /* Take a shared lock on the device node; this establishes a concept of device "ownership" to
         * serialize device access. External processes holding an exclusive lock will cause udev to skip the
         * event handling; in the case udev acquired the lock, the external process can block until udev has
         * finished its event handling. */

        r = udev_get_whole_disk(dev, &dev_whole_disk, &val);
        if (r < 0)
                return r;
        if (r == 0)
                goto nolock;

        fd = sd_device_open(dev_whole_disk, O_RDONLY|O_CLOEXEC|O_NONBLOCK|O_NOCTTY);
        if (fd < 0) {
                bool ignore = ERRNO_IS_DEVICE_ABSENT(fd);

                log_device_debug_errno(dev, fd, "Failed to open '%s'%s: %m", val, ignore ? ", ignoring" : "");
                if (!ignore)
                        return fd;

                goto nolock;
        }

        if (flock(fd, LOCK_SH|LOCK_NB) < 0)
                return log_device_debug_errno(dev, errno, "Failed to flock(%s): %m", val);

        *ret_fd = TAKE_FD(fd);
        return 1;

nolock:
        *ret_fd = -EBADF;
        return 0;
}

static int worker_mark_block_device_read_only(sd_device *dev) {
        _cleanup_close_ int fd = -EBADF;
        const char *val;
        int state = 1, r;

        assert(dev);

        /* Do this only once, when the block device is new. If the device is later retriggered let's not
         * toggle the bit again, so that people can boot up with full read-only mode and then unset the bit
         * for specific devices only. */
        if (!device_for_action(dev, SD_DEVICE_ADD))
                return 0;

        r = sd_device_get_subsystem(dev, &val);
        if (r < 0)
                return log_device_debug_errno(dev, r, "Failed to get subsystem: %m");

        if (!streq(val, "block"))
                return 0;

        r = sd_device_get_sysname(dev, &val);
        if (r < 0)
                return log_device_debug_errno(dev, r, "Failed to get sysname: %m");

        /* Exclude synthetic devices for now, this is supposed to be a safety feature to avoid modification
         * of physical devices, and what sits on top of those doesn't really matter if we don't allow the
         * underlying block devices to receive changes. */
        if (STARTSWITH_SET(val, "dm-", "md", "drbd", "loop", "nbd", "zram"))
                return 0;

        fd = sd_device_open(dev, O_RDONLY|O_CLOEXEC|O_NONBLOCK|O_NOCTTY);
        if (fd < 0)
                return log_device_debug_errno(dev, fd, "Failed to open '%s', ignoring: %m", val);

        if (ioctl(fd, BLKROSET, &state) < 0)
                return log_device_warning_errno(dev, errno, "Failed to mark block device '%s' read-only: %m", val);

        log_device_info(dev, "Successfully marked block device '%s' read-only.", val);
        return 0;
}

static int worker_process_device(UdevWorker *worker, sd_device *dev) {
        _cleanup_(udev_event_freep) UdevEvent *udev_event = NULL;
        _cleanup_close_ int fd_lock = -EBADF;
        int r;

        assert(worker);
        assert(dev);

        log_device_uevent(dev, "Processing device");

        udev_event = udev_event_new(dev, worker->exec_delay_usec, worker->rtnl, worker->log_level);
        if (!udev_event)
                return -ENOMEM;

        /* If this is a block device and the device is locked currently via the BSD advisory locks,
         * someone else is using it exclusively. We don't run our udev rules now to not interfere.
         * Instead of processing the event, we requeue the event and will try again after a delay.
         *
         * The user-facing side of this: https://systemd.io/BLOCK_DEVICE_LOCKING */
        r = worker_lock_whole_disk(dev, &fd_lock);
        if (r == -EAGAIN)
                return EVENT_RESULT_TRY_AGAIN;
        if (r < 0)
                return r;

        if (worker->blockdev_read_only)
                (void) worker_mark_block_device_read_only(dev);

        /* apply rules, create node, symlinks */
        r = udev_event_execute_rules(
                          udev_event,
                          worker->inotify_fd,
                          worker->timeout_usec,
                          worker->timeout_signal,
                          worker->properties,
                          worker->rules);
        if (r < 0)
                return r;

        udev_event_execute_run(udev_event, worker->timeout_usec, worker->timeout_signal);

        if (!worker->rtnl)
                /* in case rtnl was initialized */
                worker->rtnl = sd_netlink_ref(udev_event->rtnl);

        if (udev_event->inotify_watch) {
                r = udev_watch_begin(worker->inotify_fd, dev);
                if (r < 0 && r != -ENOENT) /* The device may be already removed, ignore -ENOENT. */
                        log_device_warning_errno(dev, r, "Failed to add inotify watch, ignoring: %m");
        }

        log_device_uevent(dev, "Device processed");
        return 0;
}

void udev_broadcast_result(sd_device_monitor *monitor, sd_device *dev, EventResult result) {
        int r;

        assert(dev);

        /* On exit, manager->monitor is already NULL. */
        if (!monitor)
                return;

        if (result != EVENT_RESULT_SUCCESS) {
                (void) device_add_property(dev, "UDEV_WORKER_FAILED", "1");

                switch (result) {
                case EVENT_RESULT_NERRNO_MIN ... EVENT_RESULT_NERRNO_MAX: {
                        const char *str;

                        (void) device_add_propertyf(dev, "UDEV_WORKER_ERRNO", "%i", -result);

                        str = errno_to_name(result);
                        if (str)
                                (void) device_add_property(dev, "UDEV_WORKER_ERRNO_NAME", str);
                        break;
                }
                case EVENT_RESULT_EXIT_STATUS_BASE ... EVENT_RESULT_EXIT_STATUS_MAX:
                        (void) device_add_propertyf(dev, "UDEV_WORKER_EXIT_STATUS", "%i", result - EVENT_RESULT_EXIT_STATUS_BASE);
                        break;

                case EVENT_RESULT_TRY_AGAIN:
                        assert_not_reached();
                        break;

                case EVENT_RESULT_SIGNAL_BASE ... EVENT_RESULT_SIGNAL_MAX: {
                        const char *str;

                        (void) device_add_propertyf(dev, "UDEV_WORKER_SIGNAL", "%i", result - EVENT_RESULT_SIGNAL_BASE);

                        str = signal_to_string(result - EVENT_RESULT_SIGNAL_BASE);
                        if (str)
                                (void) device_add_property(dev, "UDEV_WORKER_SIGNAL_NAME", str);
                        break;
                }
                default:
                        log_device_warning(dev, "Unknown event result \"%i\", ignoring.", result);
                }
        }

        r = device_monitor_send_device(monitor, NULL, dev);
        if (r < 0)
                log_device_warning_errno(dev, r,
                                         "Failed to broadcast event to libudev listeners, ignoring: %m");
}

static int worker_send_result(UdevWorker *worker, EventResult result) {
        assert(worker);
        assert(worker->pipe_fd >= 0);

        return loop_write(worker->pipe_fd, &result, sizeof(result));
}

static int worker_device_monitor_handler(sd_device_monitor *monitor, sd_device *dev, void *userdata) {
        UdevWorker *worker = ASSERT_PTR(userdata);
        int r;

        assert(dev);

        r = worker_process_device(worker, dev);
        if (r == EVENT_RESULT_TRY_AGAIN)
                /* if we couldn't acquire the flock(), then requeue the event */
                log_device_debug(dev, "Block device is currently locked, requeueing the event.");
        else {
                if (r < 0)
                        log_device_warning_errno(dev, r, "Failed to process device, ignoring: %m");

                /* send processed event back to libudev listeners */
                udev_broadcast_result(monitor, dev, r);
        }

        /* send udevd the result of the event execution */
        r = worker_send_result(worker, r);
        if (r < 0)
                log_device_warning_errno(dev, r, "Failed to send signal to main daemon, ignoring: %m");

        /* Reset the log level, as it might be changed by "OPTIONS=log_level=". */
        log_set_max_level(worker->log_level);

        return 1;
}

int udev_worker_main(UdevWorker *worker, sd_device *dev) {
        int r;

        assert(worker);
        assert(worker->monitor);
        assert(dev);

        DEVICE_TRACE_POINT(worker_spawned, dev, getpid_cached());

        assert_se(sigprocmask_many(SIG_BLOCK, NULL, SIGTERM, -1) >= 0);

        /* Reset OOM score, we only protect the main daemon. */
        r = set_oom_score_adjust(0);
        if (r < 0)
                log_debug_errno(r, "Failed to reset OOM score, ignoring: %m");

        r = sd_event_new(&worker->event);
        if (r < 0)
                return log_error_errno(r, "Failed to allocate event loop: %m");

        r = sd_event_add_signal(worker->event, NULL, SIGTERM, NULL, NULL);
        if (r < 0)
                return log_error_errno(r, "Failed to set SIGTERM event: %m");

        r = sd_device_monitor_attach_event(worker->monitor, worker->event);
        if (r < 0)
                return log_error_errno(r, "Failed to attach event loop to device monitor: %m");

        r = sd_device_monitor_start(worker->monitor, worker_device_monitor_handler, worker);
        if (r < 0)
                return log_error_errno(r, "Failed to start device monitor: %m");

        /* Process first device */
        (void) worker_device_monitor_handler(worker->monitor, dev, worker);

        r = sd_event_loop(worker->event);
        if (r < 0)
                return log_error_errno(r, "Event loop failed: %m");

        return 0;
}