summaryrefslogtreecommitdiffstats
path: root/src/libsystemd/sd-event/event-source.h
blob: f4e38d78d085b847334df478042697ecbc4b9023 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
#pragma once
/* SPDX-License-Identifier: LGPL-2.1-or-later */

#include <sys/epoll.h>
#include <sys/timerfd.h>
#include <sys/wait.h>

#include "sd-event.h"

#include "hashmap.h"
#include "inotify-util.h"
#include "list.h"
#include "prioq.h"
#include "ratelimit.h"

typedef enum EventSourceType {
        SOURCE_IO,
        SOURCE_TIME_REALTIME,
        SOURCE_TIME_BOOTTIME,
        SOURCE_TIME_MONOTONIC,
        SOURCE_TIME_REALTIME_ALARM,
        SOURCE_TIME_BOOTTIME_ALARM,
        SOURCE_SIGNAL,
        SOURCE_CHILD,
        SOURCE_DEFER,
        SOURCE_POST,
        SOURCE_EXIT,
        SOURCE_WATCHDOG,
        SOURCE_INOTIFY,
        SOURCE_MEMORY_PRESSURE,
        _SOURCE_EVENT_SOURCE_TYPE_MAX,
        _SOURCE_EVENT_SOURCE_TYPE_INVALID = -EINVAL,
} EventSourceType;

/* All objects we use in epoll events start with this value, so that
 * we know how to dispatch it */
typedef enum WakeupType {
        WAKEUP_NONE,
        WAKEUP_EVENT_SOURCE, /* either I/O or pidfd wakeup */
        WAKEUP_CLOCK_DATA,
        WAKEUP_SIGNAL_DATA,
        WAKEUP_INOTIFY_DATA,
        _WAKEUP_TYPE_MAX,
        _WAKEUP_TYPE_INVALID = -EINVAL,
} WakeupType;

struct inode_data;

struct sd_event_source {
        WakeupType wakeup;

        unsigned n_ref;

        sd_event *event;
        void *userdata;
        sd_event_handler_t prepare;

        char *description;

        EventSourceType type;
        signed int enabled:3;
        bool pending:1;
        bool dispatching:1;
        bool floating:1;
        bool exit_on_failure:1;
        bool ratelimited:1;

        int64_t priority;
        unsigned pending_index;
        unsigned prepare_index;
        uint64_t pending_iteration;
        uint64_t prepare_iteration;

        sd_event_destroy_t destroy_callback;
        sd_event_handler_t ratelimit_expire_callback;

        LIST_FIELDS(sd_event_source, sources);

        RateLimit rate_limit;

        /* These are primarily fields relevant for time event sources, but since any event source can
         * effectively become one when rate-limited, this is part of the common fields. */
        unsigned earliest_index;
        unsigned latest_index;

        union {
                struct {
                        sd_event_io_handler_t callback;
                        int fd;
                        uint32_t events;
                        uint32_t revents;
                        bool registered:1;
                        bool owned:1;
                } io;
                struct {
                        sd_event_time_handler_t callback;
                        usec_t next, accuracy;
                } time;
                struct {
                        sd_event_signal_handler_t callback;
                        struct signalfd_siginfo siginfo;
                        int sig;
                        bool unblock;
                } signal;
                struct {
                        sd_event_child_handler_t callback;
                        siginfo_t siginfo;
                        pid_t pid;
                        int options;
                        int pidfd;
                        bool registered:1; /* whether the pidfd is registered in the epoll */
                        bool pidfd_owned:1; /* close pidfd when event source is freed */
                        bool process_owned:1; /* kill+reap process when event source is freed */
                        bool exited:1; /* true if process exited (i.e. if there's value in SIGKILLing it if we want to get rid of it) */
                        bool waited:1; /* true if process was waited for (i.e. if there's value in waitid(P_PID)'ing it if we want to get rid of it) */
                } child;
                struct {
                        sd_event_handler_t callback;
                } defer;
                struct {
                        sd_event_handler_t callback;
                } post;
                struct {
                        sd_event_handler_t callback;
                        unsigned prioq_index;
                } exit;
                struct {
                        sd_event_inotify_handler_t callback;
                        uint32_t mask;
                        struct inode_data *inode_data;
                        LIST_FIELDS(sd_event_source, by_inode_data);
                } inotify;
                struct {
                        int fd;
                        sd_event_handler_t callback;
                        void *write_buffer;
                        size_t write_buffer_size;
                        uint32_t events, revents;
                        LIST_FIELDS(sd_event_source, write_list);
                        bool registered:1;
                        bool locked:1;
                        bool in_write_list:1;
                } memory_pressure;
        };
};

struct clock_data {
        WakeupType wakeup;
        int fd;

        /* For all clocks we maintain two priority queues each, one
         * ordered for the earliest times the events may be
         * dispatched, and one ordered by the latest times they must
         * have been dispatched. The range between the top entries in
         * the two prioqs is the time window we can freely schedule
         * wakeups in */

        Prioq *earliest;
        Prioq *latest;
        usec_t next;

        bool needs_rearm:1;
};

struct signal_data {
        WakeupType wakeup;

        /* For each priority we maintain one signal fd, so that we
         * only have to dequeue a single event per priority at a
         * time. */

        int fd;
        int64_t priority;
        sigset_t sigset;
        sd_event_source *current;
};

/* A structure listing all event sources currently watching a specific inode */
struct inode_data {
        /* The identifier for the inode, the combination of the .st_dev + .st_ino fields of the file */
        ino_t ino;
        dev_t dev;

        /* An fd of the inode to watch. The fd is kept open until the next iteration of the loop, so that we can
         * rearrange the priority still until then, as we need the original inode to change the priority as we need to
         * add a watch descriptor to the right inotify for the priority which we can only do if we have a handle to the
         * original inode. We keep a list of all inode_data objects with an open fd in the to_close list (see below) of
         * the sd-event object, so that it is efficient to close everything, before entering the next event loop
         * iteration. */
        int fd;

        /* The inotify "watch descriptor" */
        int wd;

        /* The combination of the mask of all inotify watches on this inode we manage. This is also the mask that has
         * most recently been set on the watch descriptor. */
        uint32_t combined_mask;

        /* All event sources subscribed to this inode */
        LIST_HEAD(sd_event_source, event_sources);

        /* The inotify object we watch this inode with */
        struct inotify_data *inotify_data;

        /* A linked list of all inode data objects with fds to close (see above) */
        LIST_FIELDS(struct inode_data, to_close);
};

/* A structure encapsulating an inotify fd */
struct inotify_data {
        WakeupType wakeup;

        /* For each priority we maintain one inotify fd, so that we only have to dequeue a single event per priority at
         * a time */

        int fd;
        int64_t priority;

        Hashmap *inodes; /* The inode_data structures keyed by dev+ino */
        Hashmap *wd;     /* The inode_data structures keyed by the watch descriptor for each */

        /* The buffer we read inotify events into */
        union inotify_event_buffer buffer;
        size_t buffer_filled; /* fill level of the buffer */

        /* How many event sources are currently marked pending for this inotify. We won't read new events off the
         * inotify fd as long as there are still pending events on the inotify (because we have no strategy of queuing
         * the events locally if they can't be coalesced). */
        unsigned n_pending;

        /* If this counter is non-zero, don't GC the inotify data object even if not used to watch any inode
         * anymore. This is useful to pin the object for a bit longer, after the last event source needing it
         * is gone. */
        unsigned n_busy;

        /* A linked list of all inotify objects with data already read, that still need processing. We keep this list
         * to make it efficient to figure out what inotify objects to process data on next. */
        LIST_FIELDS(struct inotify_data, buffered);
};