summaryrefslogtreecommitdiffstats
path: root/src/libsystemd
diff options
context:
space:
mode:
Diffstat (limited to 'src/libsystemd')
-rw-r--r--src/libsystemd/libsystemd.pc.in20
-rw-r--r--src/libsystemd/libsystemd.sym836
-rw-r--r--src/libsystemd/meson.build265
-rw-r--r--src/libsystemd/sd-bus/bus-common-errors.c151
-rw-r--r--src/libsystemd/sd-bus/bus-common-errors.h155
-rw-r--r--src/libsystemd/sd-bus/bus-container.c103
-rw-r--r--src/libsystemd/sd-bus/bus-container.h6
-rw-r--r--src/libsystemd/sd-bus/bus-control.c1038
-rw-r--r--src/libsystemd/sd-bus/bus-control.h9
-rw-r--r--src/libsystemd/sd-bus/bus-convenience.c824
-rw-r--r--src/libsystemd/sd-bus/bus-creds.c1337
-rw-r--r--src/libsystemd/sd-bus/bus-creds.h72
-rw-r--r--src/libsystemd/sd-bus/bus-dump.c649
-rw-r--r--src/libsystemd/sd-bus/bus-dump.h12
-rw-r--r--src/libsystemd/sd-bus/bus-error.c628
-rw-r--r--src/libsystemd/sd-bus/bus-error.h57
-rw-r--r--src/libsystemd/sd-bus/bus-internal.c338
-rw-r--r--src/libsystemd/sd-bus/bus-internal.h427
-rw-r--r--src/libsystemd/sd-bus/bus-introspect.c290
-rw-r--r--src/libsystemd/sd-bus/bus-introspect.h25
-rw-r--r--src/libsystemd/sd-bus/bus-kernel.c44
-rw-r--r--src/libsystemd/sd-bus/bus-kernel.h24
-rw-r--r--src/libsystemd/sd-bus/bus-match.c1058
-rw-r--r--src/libsystemd/sd-bus/bus-match.h82
-rw-r--r--src/libsystemd/sd-bus/bus-message.c4712
-rw-r--r--src/libsystemd/sd-bus/bus-message.h191
-rw-r--r--src/libsystemd/sd-bus/bus-objects.c3033
-rw-r--r--src/libsystemd/sd-bus/bus-objects.h20
-rw-r--r--src/libsystemd/sd-bus/bus-protocol.h88
-rw-r--r--src/libsystemd/sd-bus/bus-signature.c146
-rw-r--r--src/libsystemd/sd-bus/bus-signature.h10
-rw-r--r--src/libsystemd/sd-bus/bus-slot.c311
-rw-r--r--src/libsystemd/sd-bus/bus-slot.h10
-rw-r--r--src/libsystemd/sd-bus/bus-socket.c1428
-rw-r--r--src/libsystemd/sd-bus/bus-socket.h20
-rw-r--r--src/libsystemd/sd-bus/bus-track.c495
-rw-r--r--src/libsystemd/sd-bus/bus-track.h5
-rw-r--r--src/libsystemd/sd-bus/bus-type.c162
-rw-r--r--src/libsystemd/sd-bus/bus-type.h16
-rw-r--r--src/libsystemd/sd-bus/fuzz-bus-match.c86
-rw-r--r--src/libsystemd/sd-bus/fuzz-bus-match.options2
-rw-r--r--src/libsystemd/sd-bus/fuzz-bus-message.c42
-rw-r--r--src/libsystemd/sd-bus/sd-bus.c4441
-rw-r--r--src/libsystemd/sd-bus/test-bus-address.c61
-rw-r--r--src/libsystemd/sd-bus/test-bus-benchmark.c326
-rw-r--r--src/libsystemd/sd-bus/test-bus-chat.c539
-rw-r--r--src/libsystemd/sd-bus/test-bus-cleanup.c105
-rw-r--r--src/libsystemd/sd-bus/test-bus-creds.c34
-rw-r--r--src/libsystemd/sd-bus/test-bus-error.c294
-rw-r--r--src/libsystemd/sd-bus/test-bus-introspect.c33
-rw-r--r--src/libsystemd/sd-bus/test-bus-marshal.c418
-rw-r--r--src/libsystemd/sd-bus/test-bus-match.c145
-rw-r--r--src/libsystemd/sd-bus/test-bus-objects.c677
-rw-r--r--src/libsystemd/sd-bus/test-bus-peersockaddr.c127
-rw-r--r--src/libsystemd/sd-bus/test-bus-queue-ref-cycle.c56
-rw-r--r--src/libsystemd/sd-bus/test-bus-server.c185
-rw-r--r--src/libsystemd/sd-bus/test-bus-signature.c150
-rw-r--r--src/libsystemd/sd-bus/test-bus-track.c151
l---------src/libsystemd/sd-bus/test-bus-vtable-cc.cc1
-rw-r--r--src/libsystemd/sd-bus/test-bus-vtable.c76
-rw-r--r--src/libsystemd/sd-bus/test-bus-watch-bind.c228
-rw-r--r--src/libsystemd/sd-bus/test-vtable-data.h132
-rw-r--r--src/libsystemd/sd-daemon/sd-daemon.c775
-rw-r--r--src/libsystemd/sd-device/device-enumerator-private.h32
-rw-r--r--src/libsystemd/sd-device/device-enumerator.c1194
-rw-r--r--src/libsystemd/sd-device/device-filter.c115
-rw-r--r--src/libsystemd/sd-device/device-filter.h13
-rw-r--r--src/libsystemd/sd-device/device-internal.h117
-rw-r--r--src/libsystemd/sd-device/device-monitor-private.h22
-rw-r--r--src/libsystemd/sd-device/device-monitor.c929
-rw-r--r--src/libsystemd/sd-device/device-private.c903
-rw-r--r--src/libsystemd/sd-device/device-private.h74
-rw-r--r--src/libsystemd/sd-device/device-util.c141
-rw-r--r--src/libsystemd/sd-device/device-util.h104
-rw-r--r--src/libsystemd/sd-device/sd-device.c2715
-rw-r--r--src/libsystemd/sd-device/test-device-util.c23
-rw-r--r--src/libsystemd/sd-device/test-sd-device-monitor.c344
-rw-r--r--src/libsystemd/sd-device/test-sd-device-thread.c51
-rw-r--r--src/libsystemd/sd-device/test-sd-device.c678
-rw-r--r--src/libsystemd/sd-event/event-source.h239
-rw-r--r--src/libsystemd/sd-event/event-util.c153
-rw-r--r--src/libsystemd/sd-event/event-util.h34
-rw-r--r--src/libsystemd/sd-event/sd-event.c5357
-rw-r--r--src/libsystemd/sd-event/test-event.c902
-rw-r--r--src/libsystemd/sd-hwdb/hwdb-internal.h89
-rw-r--r--src/libsystemd/sd-hwdb/sd-hwdb.c436
-rw-r--r--src/libsystemd/sd-id128/id128-util.c265
-rw-r--r--src/libsystemd/sd-id128/id128-util.h58
-rw-r--r--src/libsystemd/sd-id128/sd-id128.c382
-rw-r--r--src/libsystemd/sd-journal/audit-type.c6
-rw-r--r--src/libsystemd/sd-journal/audit-type.h22
-rw-r--r--src/libsystemd/sd-journal/audit_type-to-name.awk14
-rw-r--r--src/libsystemd/sd-journal/catalog.c743
-rw-r--r--src/libsystemd/sd-journal/catalog.h19
-rw-r--r--src/libsystemd/sd-journal/fsprg.c381
-rw-r--r--src/libsystemd/sd-journal/fsprg.h61
-rwxr-xr-xsrc/libsystemd/sd-journal/generate-audit_type-list.sh17
-rw-r--r--src/libsystemd/sd-journal/journal-authenticate.c525
-rw-r--r--src/libsystemd/sd-journal/journal-authenticate.h23
-rw-r--r--src/libsystemd/sd-journal/journal-def.h269
-rw-r--r--src/libsystemd/sd-journal/journal-file.c4696
-rw-r--r--src/libsystemd/sd-journal/journal-file.h393
-rw-r--r--src/libsystemd/sd-journal/journal-internal.h142
-rw-r--r--src/libsystemd/sd-journal/journal-send.c576
-rw-r--r--src/libsystemd/sd-journal/journal-send.h7
-rw-r--r--src/libsystemd/sd-journal/journal-vacuum.c330
-rw-r--r--src/libsystemd/sd-journal/journal-vacuum.h9
-rw-r--r--src/libsystemd/sd-journal/journal-verify.c1436
-rw-r--r--src/libsystemd/sd-journal/journal-verify.h6
-rw-r--r--src/libsystemd/sd-journal/lookup3.c1002
-rw-r--r--src/libsystemd/sd-journal/lookup3.h23
-rw-r--r--src/libsystemd/sd-journal/mmap-cache.c562
-rw-r--r--src/libsystemd/sd-journal/mmap-cache.h60
-rw-r--r--src/libsystemd/sd-journal/sd-journal.c3528
-rw-r--r--src/libsystemd/sd-journal/test-audit-type.c24
-rw-r--r--src/libsystemd/sd-journal/test-catalog.c235
-rw-r--r--src/libsystemd/sd-journal/test-journal-append.c269
-rw-r--r--src/libsystemd/sd-journal/test-journal-enum.c37
-rw-r--r--src/libsystemd/sd-journal/test-journal-file.c45
-rw-r--r--src/libsystemd/sd-journal/test-journal-flush.c118
-rw-r--r--src/libsystemd/sd-journal/test-journal-init.c68
-rw-r--r--src/libsystemd/sd-journal/test-journal-interleaving.c737
-rw-r--r--src/libsystemd/sd-journal/test-journal-match.c61
-rw-r--r--src/libsystemd/sd-journal/test-journal-send.c111
-rw-r--r--src/libsystemd/sd-journal/test-journal-stream.c201
-rw-r--r--src/libsystemd/sd-journal/test-journal-verify.c210
-rw-r--r--src/libsystemd/sd-journal/test-journal.c280
-rw-r--r--src/libsystemd/sd-journal/test-mmap-cache.c68
-rw-r--r--src/libsystemd/sd-login/sd-login.c1323
-rw-r--r--src/libsystemd/sd-login/test-login.c334
-rw-r--r--src/libsystemd/sd-netlink/netlink-genl.c488
-rw-r--r--src/libsystemd/sd-netlink/netlink-genl.h8
-rw-r--r--src/libsystemd/sd-netlink/netlink-internal.h212
-rw-r--r--src/libsystemd/sd-netlink/netlink-message-nfnl.c417
-rw-r--r--src/libsystemd/sd-netlink/netlink-message-rtnl.c1204
-rw-r--r--src/libsystemd/sd-netlink/netlink-message.c1421
-rw-r--r--src/libsystemd/sd-netlink/netlink-slot.c188
-rw-r--r--src/libsystemd/sd-netlink/netlink-slot.h14
-rw-r--r--src/libsystemd/sd-netlink/netlink-socket.c459
-rw-r--r--src/libsystemd/sd-netlink/netlink-types-genl.c251
-rw-r--r--src/libsystemd/sd-netlink/netlink-types-internal.h66
-rw-r--r--src/libsystemd/sd-netlink/netlink-types-nfnl.c194
-rw-r--r--src/libsystemd/sd-netlink/netlink-types-rtnl.c1229
-rw-r--r--src/libsystemd/sd-netlink/netlink-types.c153
-rw-r--r--src/libsystemd/sd-netlink/netlink-types.h63
-rw-r--r--src/libsystemd/sd-netlink/netlink-util.c818
-rw-r--r--src/libsystemd/sd-netlink/netlink-util.h113
-rw-r--r--src/libsystemd/sd-netlink/sd-netlink.c909
-rw-r--r--src/libsystemd/sd-netlink/test-netlink.c686
-rw-r--r--src/libsystemd/sd-network/network-util.c157
-rw-r--r--src/libsystemd/sd-network/network-util.h86
-rw-r--r--src/libsystemd/sd-network/sd-network.c462
-rw-r--r--src/libsystemd/sd-path/sd-path.c693
-rw-r--r--src/libsystemd/sd-resolve/resolve-private.h39
-rw-r--r--src/libsystemd/sd-resolve/sd-resolve.c1296
-rw-r--r--src/libsystemd/sd-resolve/test-resolve.c111
156 files changed, 74259 insertions, 0 deletions
diff --git a/src/libsystemd/libsystemd.pc.in b/src/libsystemd/libsystemd.pc.in
new file mode 100644
index 0000000..3a43ef6
--- /dev/null
+++ b/src/libsystemd/libsystemd.pc.in
@@ -0,0 +1,20 @@
+# SPDX-License-Identifier: LGPL-2.1-or-later
+#
+# This file is part of systemd.
+#
+# systemd is free software; you can redistribute it and/or modify it
+# under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or
+# (at your option) any later version.
+
+prefix={{PREFIX}}
+exec_prefix={{PREFIX}}
+libdir={{LIBDIR}}
+includedir={{INCLUDE_DIR}}
+
+Name: systemd
+Description: systemd Library
+URL: {{PROJECT_URL}}
+Version: {{PROJECT_VERSION}}
+Libs: -L${libdir} -lsystemd
+Cflags: -I${includedir}
diff --git a/src/libsystemd/libsystemd.sym b/src/libsystemd/libsystemd.sym
new file mode 100644
index 0000000..4113920
--- /dev/null
+++ b/src/libsystemd/libsystemd.sym
@@ -0,0 +1,836 @@
+/***
+ SPDX-License-Identifier: LGPL-2.1-or-later
+
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+***/
+
+LIBSYSTEMD_209 {
+global:
+ /* sd-journal */
+ sd_journal_print;
+ sd_journal_printv;
+ sd_journal_send;
+ sd_journal_sendv;
+ sd_journal_stream_fd;
+ sd_journal_open;
+ sd_journal_close;
+ sd_journal_previous;
+ sd_journal_next;
+ sd_journal_previous_skip;
+ sd_journal_next_skip;
+ sd_journal_get_realtime_usec;
+ sd_journal_get_monotonic_usec;
+ sd_journal_get_data;
+ sd_journal_enumerate_data;
+ sd_journal_restart_data;
+ sd_journal_add_match;
+ sd_journal_flush_matches;
+ sd_journal_seek_head;
+ sd_journal_seek_tail;
+ sd_journal_seek_monotonic_usec;
+ sd_journal_seek_realtime_usec;
+ sd_journal_seek_cursor;
+ sd_journal_get_cursor;
+ sd_journal_get_fd;
+ sd_journal_process;
+ sd_journal_print_with_location;
+ sd_journal_printv_with_location;
+ sd_journal_send_with_location;
+ sd_journal_sendv_with_location;
+ sd_journal_get_cutoff_realtime_usec;
+ sd_journal_get_cutoff_monotonic_usec;
+ sd_journal_wait;
+ sd_journal_open_directory;
+ sd_journal_add_disjunction;
+ sd_journal_perror;
+ sd_journal_perror_with_location;
+ sd_journal_get_usage;
+ sd_journal_test_cursor;
+ sd_journal_query_unique;
+ sd_journal_enumerate_unique;
+ sd_journal_restart_unique;
+ sd_journal_get_catalog;
+ sd_journal_get_catalog_for_message_id;
+ sd_journal_set_data_threshold;
+ sd_journal_get_data_threshold;
+ sd_journal_reliable_fd;
+ sd_journal_get_events;
+ sd_journal_get_timeout;
+ sd_journal_add_conjunction;
+ sd_journal_open_files;
+ sd_journal_open_container;
+
+ /* sd-daemon */
+ sd_booted;
+ sd_is_fifo;
+ sd_is_mq;
+ sd_is_socket;
+ sd_is_socket_inet;
+ sd_is_socket_unix;
+ sd_is_special;
+ sd_listen_fds;
+ sd_notify;
+ sd_notifyf;
+ sd_watchdog_enabled;
+
+ /* sd-id128 */
+ sd_id128_to_string;
+ sd_id128_from_string;
+ sd_id128_randomize;
+ sd_id128_get_machine;
+ sd_id128_get_boot;
+
+ /* sd-login */
+ sd_get_seats;
+ sd_get_sessions;
+ sd_get_uids;
+ sd_login_monitor_flush;
+ sd_login_monitor_get_fd;
+ sd_login_monitor_new;
+ sd_login_monitor_unref;
+ sd_pid_get_owner_uid;
+ sd_pid_get_session;
+ sd_seat_can_multi_session;
+ sd_seat_get_active;
+ sd_seat_get_sessions;
+ sd_session_get_seat;
+ sd_session_get_uid;
+ sd_session_is_active;
+ sd_uid_get_seats;
+ sd_uid_get_sessions;
+ sd_uid_get_state;
+ sd_uid_is_on_seat;
+ sd_pid_get_unit;
+ sd_session_get_service;
+ sd_session_get_type;
+ sd_session_get_class;
+ sd_session_get_display;
+ sd_session_get_state;
+ sd_seat_can_tty;
+ sd_seat_can_graphical;
+ sd_session_get_tty;
+ sd_login_monitor_get_events;
+ sd_login_monitor_get_timeout;
+ sd_pid_get_user_unit;
+ sd_pid_get_machine_name;
+ sd_get_machine_names;
+ sd_pid_get_slice;
+ sd_session_get_vt;
+ sd_session_is_remote;
+ sd_session_get_remote_user;
+ sd_session_get_remote_host;
+local:
+ *;
+};
+
+LIBSYSTEMD_211 {
+global:
+ sd_machine_get_class;
+ sd_peer_get_session;
+ sd_peer_get_owner_uid;
+ sd_peer_get_unit;
+ sd_peer_get_user_unit;
+ sd_peer_get_machine_name;
+ sd_peer_get_slice;
+} LIBSYSTEMD_209;
+
+LIBSYSTEMD_213 {
+global:
+ sd_uid_get_display;
+} LIBSYSTEMD_211;
+
+LIBSYSTEMD_214 {
+global:
+ sd_pid_notify;
+ sd_pid_notifyf;
+} LIBSYSTEMD_213;
+
+LIBSYSTEMD_216 {
+global:
+ sd_machine_get_ifindices;
+} LIBSYSTEMD_214;
+
+LIBSYSTEMD_217 {
+global:
+ sd_session_get_desktop;
+} LIBSYSTEMD_216;
+
+LIBSYSTEMD_219 {
+global:
+ sd_pid_notify_with_fds;
+} LIBSYSTEMD_217;
+
+LIBSYSTEMD_220 {
+global:
+ sd_pid_get_user_slice;
+ sd_peer_get_user_slice;
+} LIBSYSTEMD_219;
+
+LIBSYSTEMD_221 {
+global:
+ /* sd-bus */
+ sd_bus_default;
+ sd_bus_default_user;
+ sd_bus_default_system;
+ sd_bus_open;
+ sd_bus_open_user;
+ sd_bus_open_system;
+ sd_bus_open_system_remote;
+ sd_bus_open_system_machine;
+ sd_bus_new;
+ sd_bus_set_address;
+ sd_bus_set_fd;
+ sd_bus_set_exec;
+ sd_bus_get_address;
+ sd_bus_set_bus_client;
+ sd_bus_is_bus_client;
+ sd_bus_set_server;
+ sd_bus_is_server;
+ sd_bus_set_anonymous;
+ sd_bus_is_anonymous;
+ sd_bus_set_trusted;
+ sd_bus_is_trusted;
+ sd_bus_set_monitor;
+ sd_bus_is_monitor;
+ sd_bus_set_description;
+ sd_bus_get_description;
+ sd_bus_negotiate_creds;
+ sd_bus_negotiate_timestamp;
+ sd_bus_negotiate_fds;
+ sd_bus_can_send;
+ sd_bus_get_creds_mask;
+ sd_bus_set_allow_interactive_authorization;
+ sd_bus_get_allow_interactive_authorization;
+ sd_bus_start;
+ sd_bus_close;
+ sd_bus_try_close;
+ sd_bus_ref;
+ sd_bus_unref;
+ sd_bus_is_open;
+ sd_bus_get_bus_id;
+ sd_bus_get_scope;
+ sd_bus_get_tid;
+ sd_bus_get_owner_creds;
+ sd_bus_send;
+ sd_bus_send_to;
+ sd_bus_call;
+ sd_bus_call_async;
+ sd_bus_get_fd;
+ sd_bus_get_events;
+ sd_bus_get_timeout;
+ sd_bus_process;
+ sd_bus_process_priority;
+ sd_bus_wait;
+ sd_bus_flush;
+ sd_bus_get_current_slot;
+ sd_bus_get_current_message;
+ sd_bus_get_current_handler;
+ sd_bus_get_current_userdata;
+ sd_bus_attach_event;
+ sd_bus_detach_event;
+ sd_bus_get_event;
+ sd_bus_add_filter;
+ sd_bus_add_match;
+ sd_bus_add_object;
+ sd_bus_add_fallback;
+ sd_bus_add_object_vtable;
+ sd_bus_add_fallback_vtable;
+ sd_bus_add_node_enumerator;
+ sd_bus_add_object_manager;
+ sd_bus_slot_ref;
+ sd_bus_slot_unref;
+ sd_bus_slot_get_bus;
+ sd_bus_slot_get_userdata;
+ sd_bus_slot_set_userdata;
+ sd_bus_slot_get_description;
+ sd_bus_slot_set_description;
+ sd_bus_slot_get_current_message;
+ sd_bus_slot_get_current_handler;
+ sd_bus_slot_get_current_userdata;
+ sd_bus_message_new_signal;
+ sd_bus_message_new_method_call;
+ sd_bus_message_new_method_return;
+ sd_bus_message_new_method_error;
+ sd_bus_message_new_method_errorf;
+ sd_bus_message_new_method_errno;
+ sd_bus_message_new_method_errnof;
+ sd_bus_message_ref;
+ sd_bus_message_unref;
+ sd_bus_message_get_type;
+ sd_bus_message_get_cookie;
+ sd_bus_message_get_reply_cookie;
+ sd_bus_message_get_priority;
+ sd_bus_message_get_expect_reply;
+ sd_bus_message_get_auto_start;
+ sd_bus_message_get_allow_interactive_authorization;
+ sd_bus_message_get_signature;
+ sd_bus_message_get_path;
+ sd_bus_message_get_interface;
+ sd_bus_message_get_member;
+ sd_bus_message_get_destination;
+ sd_bus_message_get_sender;
+ sd_bus_message_get_error;
+ sd_bus_message_get_errno;
+ sd_bus_message_get_monotonic_usec;
+ sd_bus_message_get_realtime_usec;
+ sd_bus_message_get_seqnum;
+ sd_bus_message_get_bus;
+ sd_bus_message_get_creds;
+ sd_bus_message_is_signal;
+ sd_bus_message_is_method_call;
+ sd_bus_message_is_method_error;
+ sd_bus_message_is_empty;
+ sd_bus_message_has_signature;
+ sd_bus_message_set_expect_reply;
+ sd_bus_message_set_auto_start;
+ sd_bus_message_set_allow_interactive_authorization;
+ sd_bus_message_set_destination;
+ sd_bus_message_set_priority;
+ sd_bus_message_append;
+ sd_bus_message_append_basic;
+ sd_bus_message_append_array;
+ sd_bus_message_append_array_space;
+ sd_bus_message_append_array_iovec;
+ sd_bus_message_append_array_memfd;
+ sd_bus_message_append_string_space;
+ sd_bus_message_append_string_iovec;
+ sd_bus_message_append_string_memfd;
+ sd_bus_message_append_strv;
+ sd_bus_message_open_container;
+ sd_bus_message_close_container;
+ sd_bus_message_copy;
+ sd_bus_message_read;
+ sd_bus_message_read_basic;
+ sd_bus_message_read_array;
+ sd_bus_message_read_strv;
+ sd_bus_message_skip;
+ sd_bus_message_enter_container;
+ sd_bus_message_exit_container;
+ sd_bus_message_peek_type;
+ sd_bus_message_verify_type;
+ sd_bus_message_at_end;
+ sd_bus_message_rewind;
+ sd_bus_get_unique_name;
+ sd_bus_request_name;
+ sd_bus_release_name;
+ sd_bus_list_names;
+ sd_bus_get_name_creds;
+ sd_bus_get_name_machine_id;
+ sd_bus_call_method;
+ sd_bus_call_method_async;
+ sd_bus_get_property;
+ sd_bus_get_property_trivial;
+ sd_bus_get_property_string;
+ sd_bus_get_property_strv;
+ sd_bus_set_property;
+ sd_bus_reply_method_return;
+ sd_bus_reply_method_error;
+ sd_bus_reply_method_errorf;
+ sd_bus_reply_method_errno;
+ sd_bus_reply_method_errnof;
+ sd_bus_emit_signal;
+ sd_bus_emit_properties_changed_strv;
+ sd_bus_emit_properties_changed;
+ sd_bus_emit_interfaces_added_strv;
+ sd_bus_emit_interfaces_added;
+ sd_bus_emit_interfaces_removed_strv;
+ sd_bus_emit_interfaces_removed;
+ sd_bus_query_sender_creds;
+ sd_bus_query_sender_privilege;
+ sd_bus_creds_new_from_pid;
+ sd_bus_creds_ref;
+ sd_bus_creds_unref;
+ sd_bus_creds_get_mask;
+ sd_bus_creds_get_augmented_mask;
+ sd_bus_creds_get_pid;
+ sd_bus_creds_get_ppid;
+ sd_bus_creds_get_tid;
+ sd_bus_creds_get_uid;
+ sd_bus_creds_get_euid;
+ sd_bus_creds_get_suid;
+ sd_bus_creds_get_fsuid;
+ sd_bus_creds_get_gid;
+ sd_bus_creds_get_egid;
+ sd_bus_creds_get_sgid;
+ sd_bus_creds_get_fsgid;
+ sd_bus_creds_get_supplementary_gids;
+ sd_bus_creds_get_comm;
+ sd_bus_creds_get_tid_comm;
+ sd_bus_creds_get_exe;
+ sd_bus_creds_get_cmdline;
+ sd_bus_creds_get_cgroup;
+ sd_bus_creds_get_unit;
+ sd_bus_creds_get_slice;
+ sd_bus_creds_get_user_unit;
+ sd_bus_creds_get_user_slice;
+ sd_bus_creds_get_session;
+ sd_bus_creds_get_owner_uid;
+ sd_bus_creds_has_effective_cap;
+ sd_bus_creds_has_permitted_cap;
+ sd_bus_creds_has_inheritable_cap;
+ sd_bus_creds_has_bounding_cap;
+ sd_bus_creds_get_selinux_context;
+ sd_bus_creds_get_audit_session_id;
+ sd_bus_creds_get_audit_login_uid;
+ sd_bus_creds_get_tty;
+ sd_bus_creds_get_unique_name;
+ sd_bus_creds_get_well_known_names;
+ sd_bus_creds_get_description;
+ sd_bus_error_free;
+ sd_bus_error_set;
+ sd_bus_error_setf;
+ sd_bus_error_set_const;
+ sd_bus_error_set_errno;
+ sd_bus_error_set_errnof;
+ sd_bus_error_set_errnofv;
+ sd_bus_error_get_errno;
+ sd_bus_error_copy;
+ sd_bus_error_is_set;
+ sd_bus_error_has_name;
+ sd_bus_error_add_map;
+ sd_bus_path_encode;
+ sd_bus_path_decode;
+ sd_bus_track_new;
+ sd_bus_track_ref;
+ sd_bus_track_unref;
+ sd_bus_track_get_bus;
+ sd_bus_track_get_userdata;
+ sd_bus_track_set_userdata;
+ sd_bus_track_add_sender;
+ sd_bus_track_remove_sender;
+ sd_bus_track_add_name;
+ sd_bus_track_remove_name;
+ sd_bus_track_count;
+ sd_bus_track_contains;
+ sd_bus_track_first;
+ sd_bus_track_next;
+
+ /* sd-event */
+ sd_event_default;
+ sd_event_new;
+ sd_event_ref;
+ sd_event_unref;
+ sd_event_add_io;
+ sd_event_add_time;
+ sd_event_add_signal;
+ sd_event_add_child;
+ sd_event_add_defer;
+ sd_event_add_post;
+ sd_event_add_exit;
+ sd_event_prepare;
+ sd_event_wait;
+ sd_event_dispatch;
+ sd_event_run;
+ sd_event_loop;
+ sd_event_exit;
+ sd_event_now;
+ sd_event_get_fd;
+ sd_event_get_state;
+ sd_event_get_tid;
+ sd_event_get_exit_code;
+ sd_event_set_watchdog;
+ sd_event_get_watchdog;
+ sd_event_source_ref;
+ sd_event_source_unref;
+ sd_event_source_get_event;
+ sd_event_source_get_userdata;
+ sd_event_source_set_userdata;
+ sd_event_source_set_description;
+ sd_event_source_get_description;
+ sd_event_source_set_prepare;
+ sd_event_source_get_pending;
+ sd_event_source_get_priority;
+ sd_event_source_set_priority;
+ sd_event_source_get_enabled;
+ sd_event_source_set_enabled;
+ sd_event_source_get_io_fd;
+ sd_event_source_set_io_fd;
+ sd_event_source_get_io_events;
+ sd_event_source_set_io_events;
+ sd_event_source_get_io_revents;
+ sd_event_source_get_time;
+ sd_event_source_set_time;
+ sd_event_source_set_time_accuracy;
+ sd_event_source_get_time_accuracy;
+ sd_event_source_get_time_clock;
+ sd_event_source_get_signal;
+ sd_event_source_get_child_pid;
+} LIBSYSTEMD_220;
+
+LIBSYSTEMD_222 {
+global:
+ /* sd-bus */
+ sd_bus_emit_object_added;
+ sd_bus_emit_object_removed;
+ sd_bus_flush_close_unref;
+} LIBSYSTEMD_221;
+
+LIBSYSTEMD_226 {
+global:
+ sd_pid_get_cgroup;
+ sd_peer_get_cgroup;
+} LIBSYSTEMD_222;
+
+LIBSYSTEMD_227 {
+global:
+ sd_bus_default_flush_close;
+ sd_bus_path_decode_many;
+ sd_bus_path_encode_many;
+ sd_listen_fds_with_names;
+} LIBSYSTEMD_226;
+
+LIBSYSTEMD_229 {
+global:
+ sd_journal_has_runtime_files;
+ sd_journal_has_persistent_files;
+ sd_journal_enumerate_fields;
+ sd_journal_restart_fields;
+} LIBSYSTEMD_227;
+
+LIBSYSTEMD_230 {
+global:
+ sd_journal_open_directory_fd;
+ sd_journal_open_files_fd;
+} LIBSYSTEMD_229;
+
+LIBSYSTEMD_231 {
+global:
+ sd_event_get_iteration;
+} LIBSYSTEMD_230;
+
+LIBSYSTEMD_232 {
+global:
+ sd_bus_track_set_recursive;
+ sd_bus_track_get_recursive;
+ sd_bus_track_count_name;
+ sd_bus_track_count_sender;
+ sd_bus_set_exit_on_disconnect;
+ sd_bus_get_exit_on_disconnect;
+ sd_id128_get_invocation;
+} LIBSYSTEMD_231;
+
+LIBSYSTEMD_233 {
+global:
+ sd_id128_get_machine_app_specific;
+ sd_is_socket_sockaddr;
+} LIBSYSTEMD_232;
+
+LIBSYSTEMD_234 {
+global:
+ sd_bus_message_appendv;
+} LIBSYSTEMD_233;
+
+LIBSYSTEMD_236 {
+global:
+ sd_bus_message_new;
+ sd_bus_message_seal;
+} LIBSYSTEMD_234;
+
+LIBSYSTEMD_237 {
+global:
+ sd_bus_set_watch_bind;
+ sd_bus_get_watch_bind;
+ sd_bus_request_name_async;
+ sd_bus_release_name_async;
+ sd_bus_add_match_async;
+ sd_bus_match_signal;
+ sd_bus_match_signal_async;
+ sd_bus_is_ready;
+ sd_bus_set_connected_signal;
+ sd_bus_get_connected_signal;
+ sd_bus_set_sender;
+ sd_bus_get_sender;
+ sd_bus_message_set_sender;
+ sd_event_source_get_io_fd_own;
+ sd_event_source_set_io_fd_own;
+} LIBSYSTEMD_236;
+
+LIBSYSTEMD_238 {
+global:
+ sd_bus_get_n_queued_read;
+ sd_bus_get_n_queued_write;
+} LIBSYSTEMD_237;
+
+LIBSYSTEMD_239 {
+global:
+ sd_bus_open_with_description;
+ sd_bus_open_user_with_description;
+ sd_bus_open_system_with_description;
+ sd_bus_slot_get_floating;
+ sd_bus_slot_set_floating;
+ sd_bus_slot_get_destroy_callback;
+ sd_bus_slot_set_destroy_callback;
+ sd_bus_track_get_destroy_callback;
+ sd_bus_track_set_destroy_callback;
+ sd_event_add_inotify;
+ sd_event_source_get_inotify_mask;
+ sd_event_source_set_destroy_callback;
+ sd_event_source_get_destroy_callback;
+} LIBSYSTEMD_238;
+
+LIBSYSTEMD_240 {
+global:
+ sd_bus_message_readv;
+ sd_bus_set_method_call_timeout;
+ sd_bus_get_method_call_timeout;
+
+ sd_bus_error_move;
+
+ sd_bus_set_close_on_exit;
+ sd_bus_get_close_on_exit;
+
+ sd_device_ref;
+ sd_device_unref;
+
+ sd_device_new_from_syspath;
+ sd_device_new_from_devnum;
+ sd_device_new_from_subsystem_sysname;
+ sd_device_new_from_device_id;
+
+ sd_device_get_parent;
+ sd_device_get_parent_with_subsystem_devtype;
+
+ sd_device_get_syspath;
+ sd_device_get_subsystem;
+ sd_device_get_devtype;
+ sd_device_get_devnum;
+ sd_device_get_ifindex;
+ sd_device_get_driver;
+ sd_device_get_devpath;
+ sd_device_get_devname;
+ sd_device_get_sysname;
+ sd_device_get_sysnum;
+
+ sd_device_get_is_initialized;
+ sd_device_get_usec_since_initialized;
+
+ sd_device_get_tag_first;
+ sd_device_get_tag_next;
+ sd_device_get_devlink_first;
+ sd_device_get_devlink_next;
+ sd_device_get_property_first;
+ sd_device_get_property_next;
+ sd_device_get_sysattr_first;
+ sd_device_get_sysattr_next;
+
+ sd_device_has_tag;
+ sd_device_get_property_value;
+ sd_device_get_sysattr_value;
+
+ sd_device_set_sysattr_value;
+
+ sd_device_enumerator_new;
+ sd_device_enumerator_ref;
+ sd_device_enumerator_unref;
+
+ sd_device_enumerator_get_device_first;
+ sd_device_enumerator_get_device_next;
+ sd_device_enumerator_get_subsystem_first;
+ sd_device_enumerator_get_subsystem_next;
+
+ sd_device_enumerator_add_match_subsystem;
+ sd_device_enumerator_add_match_sysattr;
+ sd_device_enumerator_add_match_property;
+ sd_device_enumerator_add_match_sysname;
+ sd_device_enumerator_add_match_tag;
+ sd_device_enumerator_add_match_parent;
+ sd_device_enumerator_allow_uninitialized;
+
+ sd_hwdb_ref;
+ sd_hwdb_unref;
+
+ sd_hwdb_new;
+
+ sd_hwdb_get;
+
+ sd_hwdb_seek;
+ sd_hwdb_enumerate;
+
+ sd_id128_get_boot_app_specific;
+
+ sd_device_monitor_new;
+ sd_device_monitor_ref;
+ sd_device_monitor_unref;
+
+ sd_device_monitor_set_receive_buffer_size;
+ sd_device_monitor_attach_event;
+ sd_device_monitor_detach_event;
+ sd_device_monitor_get_event;
+ sd_device_monitor_get_event_source;
+ sd_device_monitor_start;
+ sd_device_monitor_stop;
+
+ sd_device_monitor_filter_add_match_subsystem_devtype;
+ sd_device_monitor_filter_add_match_tag;
+ sd_device_monitor_filter_update;
+ sd_device_monitor_filter_remove;
+
+ sd_event_source_get_floating;
+ sd_event_source_set_floating;
+} LIBSYSTEMD_239;
+
+LIBSYSTEMD_241 {
+global:
+ sd_bus_close_unref;
+} LIBSYSTEMD_240;
+
+LIBSYSTEMD_243 {
+global:
+ sd_bus_object_vtable_format;
+ sd_event_source_disable_unref;
+} LIBSYSTEMD_241;
+
+LIBSYSTEMD_245 {
+global:
+ sd_bus_enqueue_for_read;
+ sd_bus_message_dump;
+ sd_bus_message_sensitive;
+ sd_event_add_child_pidfd;
+ sd_event_source_get_child_pidfd;
+ sd_event_source_get_child_pidfd_own;
+ sd_event_source_set_child_pidfd_own;
+ sd_event_source_get_child_process_own;
+ sd_event_source_set_child_process_own;
+ sd_event_source_send_child_signal;
+ sd_journal_open_namespace;
+} LIBSYSTEMD_243;
+
+LIBSYSTEMD_246 {
+global:
+ sd_bus_interface_name_is_valid;
+ sd_bus_service_name_is_valid;
+ sd_bus_member_name_is_valid;
+ sd_bus_object_path_is_valid;
+
+ sd_bus_call_methodv;
+ sd_bus_call_method_asyncv;
+ sd_bus_emit_signalv;
+ sd_bus_reply_method_errnofv;
+ sd_bus_reply_method_errorfv;
+ sd_bus_reply_method_returnv;
+ sd_bus_set_propertyv;
+
+ sd_path_lookup;
+ sd_path_lookup_strv;
+
+ sd_notify_barrier;
+
+ sd_journal_enumerate_available_data;
+ sd_journal_enumerate_available_unique;
+} LIBSYSTEMD_245;
+
+LIBSYSTEMD_247 {
+global:
+ sd_event_add_time_relative;
+ sd_event_source_set_time_relative;
+ sd_event_source_get_exit_on_failure;
+ sd_event_source_set_exit_on_failure;
+
+ sd_bus_error_has_names_sentinel;
+
+ sd_device_get_current_tag_first;
+ sd_device_get_current_tag_next;
+ sd_device_has_current_tag;
+ sd_device_set_sysattr_valuef;
+} LIBSYSTEMD_246;
+
+LIBSYSTEMD_248 {
+global:
+ sd_bus_open_user_machine;
+ sd_bus_message_send;
+
+ sd_event_source_set_ratelimit;
+ sd_event_source_get_ratelimit;
+ sd_event_source_is_ratelimited;
+
+ sd_device_get_action;
+ sd_device_get_seqnum;
+ sd_device_new_from_stat_rdev;
+ sd_device_trigger;
+} LIBSYSTEMD_247;
+
+LIBSYSTEMD_249 {
+global:
+ sd_device_monitor_filter_add_match_sysattr;
+ sd_device_monitor_filter_add_match_parent;
+ sd_device_get_usec_initialized;
+ sd_device_trigger_with_uuid;
+ sd_device_get_trigger_uuid;
+ sd_device_new_from_ifname;
+ sd_device_new_from_ifindex;
+} LIBSYSTEMD_248;
+
+LIBSYSTEMD_250 {
+global:
+ sd_device_get_diskseq;
+ sd_event_add_inotify_fd;
+ sd_event_source_set_ratelimit_expire_callback;
+} LIBSYSTEMD_249;
+
+LIBSYSTEMD_251 {
+global:
+ sd_id128_to_uuid_string;
+ sd_device_new_from_devname;
+ sd_device_new_from_path;
+ sd_device_open;
+ sd_device_enumerator_add_nomatch_sysname;
+} LIBSYSTEMD_250;
+
+LIBSYSTEMD_252 {
+global:
+ sd_bus_message_read_strv_extend;
+ sd_bus_error_setfv;
+
+ sd_device_new_child;
+ sd_device_get_child_first;
+ sd_device_get_child_next;
+ sd_device_monitor_set_description;
+ sd_device_monitor_get_description;
+
+ sd_event_set_signal_exit;
+
+ sd_id128_string_equal;
+
+ sd_hwdb_new_from_path;
+} LIBSYSTEMD_251;
+
+LIBSYSTEMD_253 {
+global:
+ sd_bus_emit_signal_to;
+ sd_bus_emit_signal_tov;
+ sd_bus_message_new_signal_to;
+ sd_pidfd_get_cgroup;
+ sd_pidfd_get_machine_name;
+ sd_pidfd_get_owner_uid;
+ sd_pidfd_get_session;
+ sd_pidfd_get_slice;
+ sd_pidfd_get_unit;
+ sd_pidfd_get_user_slice;
+ sd_pidfd_get_user_unit;
+} LIBSYSTEMD_252;
+
+LIBSYSTEMD_254 {
+global:
+ sd_journal_get_seqnum;
+ sd_session_get_username;
+ sd_session_get_start_time;
+ sd_uid_get_login_time;
+ sd_pid_notifyf_with_fds;
+ sd_event_add_memory_pressure;
+ sd_event_source_set_memory_pressure_type;
+ sd_event_source_set_memory_pressure_period;
+ sd_event_trim_memory;
+ sd_pid_notify_barrier;
+ sd_event_source_leave_ratelimit;
+ sd_journal_step_one;
+ sd_session_get_leader;
+} LIBSYSTEMD_253;
+
+LIBSYSTEMD_255 {
+global:
+ sd_id128_get_app_specific;
+ sd_device_enumerator_add_match_property_required;
+} LIBSYSTEMD_254;
diff --git a/src/libsystemd/meson.build b/src/libsystemd/meson.build
new file mode 100644
index 0000000..5d18f97
--- /dev/null
+++ b/src/libsystemd/meson.build
@@ -0,0 +1,265 @@
+# SPDX-License-Identifier: LGPL-2.1-or-later
+
+sd_journal_sources = files(
+ 'sd-journal/audit-type.c',
+ 'sd-journal/catalog.c',
+ 'sd-journal/journal-file.c',
+ 'sd-journal/journal-send.c',
+ 'sd-journal/journal-vacuum.c',
+ 'sd-journal/journal-verify.c',
+ 'sd-journal/lookup3.c',
+ 'sd-journal/mmap-cache.c',
+ 'sd-journal/sd-journal.c',
+)
+
+if conf.get('HAVE_GCRYPT') == 1
+ sd_journal_sources += files(
+ 'sd-journal/fsprg.c',
+ 'sd-journal/journal-authenticate.c',
+ )
+endif
+
+audit_type_includes = [config_h,
+ missing_audit_h,
+ 'linux/audit.h']
+if conf.get('HAVE_AUDIT') == 1
+ audit_type_includes += 'libaudit.h'
+endif
+
+generate_audit_type_list = find_program('sd-journal/generate-audit_type-list.sh')
+audit_type_list_txt = custom_target(
+ 'audit_type-list.txt',
+ output : 'audit_type-list.txt',
+ command : [generate_audit_type_list, cpp] + audit_type_includes,
+ capture : true)
+
+audit_type_to_name = custom_target(
+ 'audit_type-to-name.h',
+ input : ['sd-journal/audit_type-to-name.awk', audit_type_list_txt],
+ output : 'audit_type-to-name.h',
+ command : [awk, '-f', '@INPUT0@', '@INPUT1@'],
+ capture : true)
+
+sd_journal_sources += [audit_type_to_name]
+
+############################################################
+
+id128_sources = files(
+ 'sd-id128/id128-util.c',
+ 'sd-id128/sd-id128.c',
+)
+
+############################################################
+
+sd_daemon_sources = files('sd-daemon/sd-daemon.c')
+
+############################################################
+
+sd_event_sources = files(
+ 'sd-event/event-util.c',
+ 'sd-event/sd-event.c',
+)
+
+############################################################
+
+sd_login_sources = files('sd-login/sd-login.c')
+
+############################################################
+
+libsystemd_sources = files(
+ 'sd-bus/bus-common-errors.c',
+ 'sd-bus/bus-container.c',
+ 'sd-bus/bus-control.c',
+ 'sd-bus/bus-convenience.c',
+ 'sd-bus/bus-creds.c',
+ 'sd-bus/bus-dump.c',
+ 'sd-bus/bus-error.c',
+ 'sd-bus/bus-internal.c',
+ 'sd-bus/bus-introspect.c',
+ 'sd-bus/bus-kernel.c',
+ 'sd-bus/bus-match.c',
+ 'sd-bus/bus-message.c',
+ 'sd-bus/bus-objects.c',
+ 'sd-bus/bus-signature.c',
+ 'sd-bus/bus-slot.c',
+ 'sd-bus/bus-socket.c',
+ 'sd-bus/bus-track.c',
+ 'sd-bus/bus-type.c',
+ 'sd-bus/sd-bus.c',
+ 'sd-device/device-enumerator.c',
+ 'sd-device/device-filter.c',
+ 'sd-device/device-monitor.c',
+ 'sd-device/device-private.c',
+ 'sd-device/device-util.c',
+ 'sd-device/sd-device.c',
+ 'sd-hwdb/sd-hwdb.c',
+ 'sd-netlink/netlink-genl.c',
+ 'sd-netlink/netlink-message-nfnl.c',
+ 'sd-netlink/netlink-message-rtnl.c',
+ 'sd-netlink/netlink-message.c',
+ 'sd-netlink/netlink-slot.c',
+ 'sd-netlink/netlink-socket.c',
+ 'sd-netlink/netlink-types-genl.c',
+ 'sd-netlink/netlink-types-nfnl.c',
+ 'sd-netlink/netlink-types-rtnl.c',
+ 'sd-netlink/netlink-types.c',
+ 'sd-netlink/netlink-util.c',
+ 'sd-netlink/sd-netlink.c',
+ 'sd-network/network-util.c',
+ 'sd-network/sd-network.c',
+ 'sd-path/sd-path.c',
+ 'sd-resolve/sd-resolve.c',
+) + sd_journal_sources + id128_sources + sd_daemon_sources + sd_event_sources + sd_login_sources
+
+libsystemd_c_args = ['-fvisibility=default']
+
+libsystemd_static = static_library(
+ 'systemd_static',
+ libsystemd_sources,
+ include_directories : libsystemd_includes,
+ c_args : libsystemd_c_args,
+ link_with : [libbasic,
+ libbasic_compress],
+ dependencies : [threads,
+ librt,
+ userspace],
+ build_by_default : false)
+
+libsystemd_dir_path = meson.current_source_dir()
+
+libsystemd_sym = files('libsystemd.sym')
+libsystemd_sym_path = libsystemd_dir_path / 'libsystemd.sym'
+
+static_libsystemd = get_option('static-libsystemd')
+static_libsystemd_pic = static_libsystemd == 'true' or static_libsystemd == 'pic'
+
+libsystemd_pc = custom_target(
+ 'libsystemd.pc',
+ input : 'libsystemd.pc.in',
+ output : 'libsystemd.pc',
+ command : [jinja2_cmdline, '@INPUT@', '@OUTPUT@'],
+ install : pkgconfiglibdir != 'no',
+ install_tag : 'devel',
+ install_dir : pkgconfiglibdir)
+
+############################################################
+
+simple_tests += files(
+ 'sd-journal/test-audit-type.c',
+ 'sd-journal/test-catalog.c',
+ 'sd-journal/test-journal-file.c',
+ 'sd-journal/test-journal-init.c',
+ 'sd-journal/test-journal-match.c',
+ 'sd-journal/test-journal-send.c',
+ 'sd-journal/test-mmap-cache.c',
+)
+
+libsystemd_tests += [
+ {
+ 'sources' : files('sd-journal/test-journal-enum.c'),
+ 'timeout' : 360,
+ },
+]
+
+############################################################
+
+simple_tests += files(
+ 'sd-bus/test-bus-creds.c',
+ 'sd-bus/test-bus-introspect.c',
+ 'sd-bus/test-bus-match.c',
+ 'sd-bus/test-bus-vtable.c',
+ 'sd-device/test-device-util.c',
+ 'sd-device/test-sd-device-monitor.c',
+ 'sd-device/test-sd-device.c',
+ 'sd-event/test-event.c',
+ 'sd-journal/test-journal-flush.c',
+ 'sd-journal/test-journal-interleaving.c',
+ 'sd-journal/test-journal-stream.c',
+ 'sd-journal/test-journal.c',
+ 'sd-login/test-login.c',
+ 'sd-netlink/test-netlink.c',
+)
+
+libsystemd_tests += [
+ {
+ 'sources' : files('sd-bus/test-bus-address.c'),
+ 'dependencies' : threads
+ },
+ {
+ 'sources' : files('sd-bus/test-bus-benchmark.c'),
+ 'dependencies' : threads,
+ 'type' : 'manual',
+ },
+ {
+ 'sources' : files('sd-bus/test-bus-chat.c'),
+ 'dependencies' : threads,
+ },
+ {
+ 'sources' : files('sd-bus/test-bus-cleanup.c'),
+ 'dependencies' : [threads, libseccomp],
+ },
+ {
+ 'sources' : files('sd-bus/test-bus-marshal.c'),
+ 'dependencies' : [
+ libdbus,
+ libgio,
+ libglib,
+ libgobject,
+ libm,
+ threads,
+ ],
+ },
+ {
+ 'sources' : files('sd-bus/test-bus-objects.c'),
+ 'dependencies' : threads,
+ },
+ {
+ 'sources' : files('sd-bus/test-bus-peersockaddr.c'),
+ 'dependencies' : threads,
+ },
+ {
+ 'sources' : files('sd-bus/test-bus-queue-ref-cycle.c'),
+ 'dependencies' : threads,
+ },
+ {
+ 'sources' : files('sd-bus/test-bus-server.c'),
+ 'dependencies' : threads,
+ },
+ {
+ 'sources' : files('sd-bus/test-bus-signature.c'),
+ 'dependencies' : threads,
+ },
+ {
+ 'sources' : files('sd-bus/test-bus-track.c'),
+ 'dependencies' : libseccomp,
+ },
+ {
+ 'sources' : files('sd-bus/test-bus-watch-bind.c'),
+ 'dependencies' : threads,
+ 'timeout' : 120,
+ },
+ {
+ 'sources' : files('sd-journal/test-journal-append.c'),
+ 'type' : 'manual',
+ },
+ {
+ 'sources' : files('sd-journal/test-journal-verify.c'),
+ 'timeout' : 90,
+ },
+ {
+ 'sources' : files('sd-resolve/test-resolve.c'),
+ 'dependencies' : threads,
+ 'timeout' : 120,
+ },
+]
+
+if cxx_cmd != ''
+ simple_tests += files('sd-bus/test-bus-vtable-cc.cc')
+endif
+
+############################################################
+
+simple_fuzzers += files(
+ 'sd-bus/fuzz-bus-match.c',
+ 'sd-bus/fuzz-bus-message.c',
+)
diff --git a/src/libsystemd/sd-bus/bus-common-errors.c b/src/libsystemd/sd-bus/bus-common-errors.c
new file mode 100644
index 0000000..df26fd7
--- /dev/null
+++ b/src/libsystemd/sd-bus/bus-common-errors.c
@@ -0,0 +1,151 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+
+#include "sd-bus.h"
+
+#include "bus-common-errors.h"
+#include "bus-error.h"
+
+BUS_ERROR_MAP_ELF_REGISTER const sd_bus_error_map bus_common_errors[] = {
+ SD_BUS_ERROR_MAP(BUS_ERROR_NO_SUCH_UNIT, ENOENT),
+ SD_BUS_ERROR_MAP(BUS_ERROR_NO_UNIT_FOR_PID, ESRCH),
+ SD_BUS_ERROR_MAP(BUS_ERROR_NO_UNIT_FOR_INVOCATION_ID, ENOENT),
+ SD_BUS_ERROR_MAP(BUS_ERROR_UNIT_EXISTS, EEXIST),
+ SD_BUS_ERROR_MAP(BUS_ERROR_LOAD_FAILED, EIO),
+ SD_BUS_ERROR_MAP(BUS_ERROR_BAD_UNIT_SETTING, ENOEXEC),
+ SD_BUS_ERROR_MAP(BUS_ERROR_JOB_FAILED, EREMOTEIO),
+ SD_BUS_ERROR_MAP(BUS_ERROR_NO_SUCH_JOB, ENOENT),
+ SD_BUS_ERROR_MAP(BUS_ERROR_NOT_SUBSCRIBED, EINVAL),
+ SD_BUS_ERROR_MAP(BUS_ERROR_ALREADY_SUBSCRIBED, EINVAL),
+ SD_BUS_ERROR_MAP(BUS_ERROR_ONLY_BY_DEPENDENCY, EINVAL),
+ SD_BUS_ERROR_MAP(BUS_ERROR_TRANSACTION_JOBS_CONFLICTING, EDEADLK),
+ SD_BUS_ERROR_MAP(BUS_ERROR_TRANSACTION_ORDER_IS_CYCLIC, EDEADLK),
+ SD_BUS_ERROR_MAP(BUS_ERROR_TRANSACTION_IS_DESTRUCTIVE, EDEADLK),
+ SD_BUS_ERROR_MAP(BUS_ERROR_UNIT_MASKED, ERFKILL),
+ SD_BUS_ERROR_MAP(BUS_ERROR_UNIT_GENERATED, EADDRNOTAVAIL),
+ SD_BUS_ERROR_MAP(BUS_ERROR_UNIT_LINKED, ELOOP),
+ SD_BUS_ERROR_MAP(BUS_ERROR_JOB_TYPE_NOT_APPLICABLE, EBADR),
+ SD_BUS_ERROR_MAP(BUS_ERROR_NO_ISOLATION, EPERM),
+ SD_BUS_ERROR_MAP(BUS_ERROR_SHUTTING_DOWN, ECANCELED),
+ SD_BUS_ERROR_MAP(BUS_ERROR_SCOPE_NOT_RUNNING, EHOSTDOWN),
+ SD_BUS_ERROR_MAP(BUS_ERROR_NO_SUCH_DYNAMIC_USER, ESRCH),
+ SD_BUS_ERROR_MAP(BUS_ERROR_NOT_REFERENCED, EUNATCH),
+ SD_BUS_ERROR_MAP(BUS_ERROR_DISK_FULL, ENOSPC),
+ SD_BUS_ERROR_MAP(BUS_ERROR_FILE_DESCRIPTOR_STORE_DISABLED,
+ EHOSTDOWN),
+
+ SD_BUS_ERROR_MAP(BUS_ERROR_NO_SUCH_MACHINE, ENXIO),
+ SD_BUS_ERROR_MAP(BUS_ERROR_NO_SUCH_IMAGE, ENOENT),
+ SD_BUS_ERROR_MAP(BUS_ERROR_NO_MACHINE_FOR_PID, ENXIO),
+ SD_BUS_ERROR_MAP(BUS_ERROR_MACHINE_EXISTS, EEXIST),
+ SD_BUS_ERROR_MAP(BUS_ERROR_NO_PRIVATE_NETWORKING, ENOSYS),
+ SD_BUS_ERROR_MAP(BUS_ERROR_NO_SUCH_USER_MAPPING, ENXIO),
+ SD_BUS_ERROR_MAP(BUS_ERROR_NO_SUCH_GROUP_MAPPING, ENXIO),
+
+ SD_BUS_ERROR_MAP(BUS_ERROR_NO_SUCH_PORTABLE_IMAGE, ENOENT),
+ SD_BUS_ERROR_MAP(BUS_ERROR_BAD_PORTABLE_IMAGE_TYPE, EMEDIUMTYPE),
+
+ SD_BUS_ERROR_MAP(BUS_ERROR_NO_SUCH_SESSION, ENXIO),
+ SD_BUS_ERROR_MAP(BUS_ERROR_NO_SESSION_FOR_PID, ENXIO),
+ SD_BUS_ERROR_MAP(BUS_ERROR_NO_SUCH_USER, ENXIO),
+ SD_BUS_ERROR_MAP(BUS_ERROR_NO_USER_FOR_PID, ENXIO),
+ SD_BUS_ERROR_MAP(BUS_ERROR_NO_SUCH_SEAT, ENXIO),
+ SD_BUS_ERROR_MAP(BUS_ERROR_SESSION_NOT_ON_SEAT, EINVAL),
+ SD_BUS_ERROR_MAP(BUS_ERROR_NOT_IN_CONTROL, EINVAL),
+ SD_BUS_ERROR_MAP(BUS_ERROR_DEVICE_IS_TAKEN, EINVAL),
+ SD_BUS_ERROR_MAP(BUS_ERROR_DEVICE_NOT_TAKEN, EINVAL),
+ SD_BUS_ERROR_MAP(BUS_ERROR_OPERATION_IN_PROGRESS, EINPROGRESS),
+ SD_BUS_ERROR_MAP(BUS_ERROR_SLEEP_VERB_NOT_SUPPORTED, EOPNOTSUPP),
+ SD_BUS_ERROR_MAP(BUS_ERROR_SESSION_BUSY, EBUSY),
+ SD_BUS_ERROR_MAP(BUS_ERROR_NOT_YOUR_DEVICE, EPERM),
+
+ SD_BUS_ERROR_MAP(BUS_ERROR_AUTOMATIC_TIME_SYNC_ENABLED, EALREADY),
+ SD_BUS_ERROR_MAP(BUS_ERROR_NO_NTP_SUPPORT, EOPNOTSUPP),
+
+ SD_BUS_ERROR_MAP(BUS_ERROR_NO_SUCH_PROCESS, ESRCH),
+
+ SD_BUS_ERROR_MAP(BUS_ERROR_NO_NAME_SERVERS, ESRCH),
+ SD_BUS_ERROR_MAP(BUS_ERROR_INVALID_REPLY, EINVAL),
+ SD_BUS_ERROR_MAP(BUS_ERROR_NO_SUCH_RR, ENOENT),
+ SD_BUS_ERROR_MAP(BUS_ERROR_CNAME_LOOP, EDEADLK),
+ SD_BUS_ERROR_MAP(BUS_ERROR_ABORTED, ECANCELED),
+ SD_BUS_ERROR_MAP(BUS_ERROR_NO_SUCH_SERVICE, EUNATCH),
+ SD_BUS_ERROR_MAP(BUS_ERROR_DNSSEC_FAILED, EHOSTUNREACH),
+ SD_BUS_ERROR_MAP(BUS_ERROR_NO_TRUST_ANCHOR, EHOSTUNREACH),
+ SD_BUS_ERROR_MAP(BUS_ERROR_RR_TYPE_UNSUPPORTED, EOPNOTSUPP),
+ SD_BUS_ERROR_MAP(BUS_ERROR_NO_SUCH_LINK, ENXIO),
+ SD_BUS_ERROR_MAP(BUS_ERROR_LINK_BUSY, EBUSY),
+ SD_BUS_ERROR_MAP(BUS_ERROR_NETWORK_DOWN, ENETDOWN),
+ SD_BUS_ERROR_MAP(BUS_ERROR_NO_SOURCE, ESRCH),
+ SD_BUS_ERROR_MAP(BUS_ERROR_STUB_LOOP, ELOOP),
+ SD_BUS_ERROR_MAP(BUS_ERROR_NO_SUCH_DNSSD_SERVICE, ENOENT),
+ SD_BUS_ERROR_MAP(BUS_ERROR_DNSSD_SERVICE_EXISTS, EEXIST),
+
+ SD_BUS_ERROR_MAP(BUS_ERROR_DNS_FORMERR, EBADMSG),
+ SD_BUS_ERROR_MAP(BUS_ERROR_DNS_SERVFAIL, EHOSTDOWN),
+ SD_BUS_ERROR_MAP(BUS_ERROR_DNS_NXDOMAIN, ENXIO),
+ SD_BUS_ERROR_MAP(BUS_ERROR_DNS_NOTIMP, ENOSYS),
+ SD_BUS_ERROR_MAP(BUS_ERROR_DNS_REFUSED, EACCES),
+ SD_BUS_ERROR_MAP(BUS_ERROR_DNS_YXDOMAIN, EEXIST),
+ SD_BUS_ERROR_MAP(BUS_ERROR_DNS_YRRSET, EEXIST),
+ SD_BUS_ERROR_MAP(BUS_ERROR_DNS_NXRRSET, ENOENT),
+ SD_BUS_ERROR_MAP(BUS_ERROR_DNS_NOTAUTH, EACCES),
+ SD_BUS_ERROR_MAP(BUS_ERROR_DNS_NOTZONE, EREMOTE),
+ SD_BUS_ERROR_MAP(BUS_ERROR_DNS_BADVERS, EBADMSG),
+ SD_BUS_ERROR_MAP(BUS_ERROR_DNS_BADKEY, EKEYREJECTED),
+ SD_BUS_ERROR_MAP(BUS_ERROR_DNS_BADTIME, EBADMSG),
+ SD_BUS_ERROR_MAP(BUS_ERROR_DNS_BADMODE, EBADMSG),
+ SD_BUS_ERROR_MAP(BUS_ERROR_DNS_BADNAME, EBADMSG),
+ SD_BUS_ERROR_MAP(BUS_ERROR_DNS_BADALG, EBADMSG),
+ SD_BUS_ERROR_MAP(BUS_ERROR_DNS_BADTRUNC, EBADMSG),
+ SD_BUS_ERROR_MAP(BUS_ERROR_DNS_BADCOOKIE, EBADR),
+
+ SD_BUS_ERROR_MAP(BUS_ERROR_NO_SUCH_TRANSFER, ENXIO),
+ SD_BUS_ERROR_MAP(BUS_ERROR_TRANSFER_IN_PROGRESS, EBUSY),
+
+ SD_BUS_ERROR_MAP(BUS_ERROR_NO_PRODUCT_UUID, EOPNOTSUPP),
+ SD_BUS_ERROR_MAP(BUS_ERROR_FILE_IS_PROTECTED, EACCES),
+ SD_BUS_ERROR_MAP(BUS_ERROR_READ_ONLY_FILESYSTEM, EROFS),
+
+ SD_BUS_ERROR_MAP(BUS_ERROR_SPEED_METER_INACTIVE, EOPNOTSUPP),
+ SD_BUS_ERROR_MAP(BUS_ERROR_UNMANAGED_INTERFACE, EOPNOTSUPP),
+
+ SD_BUS_ERROR_MAP(BUS_ERROR_NO_SUCH_HOME, EEXIST),
+ SD_BUS_ERROR_MAP(BUS_ERROR_UID_IN_USE, EEXIST),
+ SD_BUS_ERROR_MAP(BUS_ERROR_USER_NAME_EXISTS, EEXIST),
+ SD_BUS_ERROR_MAP(BUS_ERROR_HOME_EXISTS, EEXIST),
+ SD_BUS_ERROR_MAP(BUS_ERROR_HOME_ALREADY_ACTIVE, EALREADY),
+ SD_BUS_ERROR_MAP(BUS_ERROR_HOME_ALREADY_FIXATED, EALREADY),
+ SD_BUS_ERROR_MAP(BUS_ERROR_HOME_UNFIXATED, EADDRNOTAVAIL),
+ SD_BUS_ERROR_MAP(BUS_ERROR_HOME_NOT_ACTIVE, EALREADY),
+ SD_BUS_ERROR_MAP(BUS_ERROR_HOME_ABSENT, EREMOTE),
+ SD_BUS_ERROR_MAP(BUS_ERROR_HOME_BUSY, EBUSY),
+ SD_BUS_ERROR_MAP(BUS_ERROR_BAD_PASSWORD, ENOKEY),
+ SD_BUS_ERROR_MAP(BUS_ERROR_LOW_PASSWORD_QUALITY, EUCLEAN),
+ SD_BUS_ERROR_MAP(BUS_ERROR_BAD_PASSWORD_AND_NO_TOKEN, EBADSLT),
+ SD_BUS_ERROR_MAP(BUS_ERROR_TOKEN_PIN_NEEDED, ENOANO),
+ SD_BUS_ERROR_MAP(BUS_ERROR_TOKEN_PROTECTED_AUTHENTICATION_PATH_NEEDED, ERFKILL),
+ SD_BUS_ERROR_MAP(BUS_ERROR_TOKEN_USER_PRESENCE_NEEDED, EMEDIUMTYPE),
+ SD_BUS_ERROR_MAP(BUS_ERROR_TOKEN_USER_VERIFICATION_NEEDED, ENOCSI),
+ SD_BUS_ERROR_MAP(BUS_ERROR_TOKEN_ACTION_TIMEOUT, ENOSTR),
+ SD_BUS_ERROR_MAP(BUS_ERROR_TOKEN_PIN_LOCKED, EOWNERDEAD),
+ SD_BUS_ERROR_MAP(BUS_ERROR_TOKEN_BAD_PIN, ENOLCK),
+ SD_BUS_ERROR_MAP(BUS_ERROR_TOKEN_BAD_PIN_FEW_TRIES_LEFT, ETOOMANYREFS),
+ SD_BUS_ERROR_MAP(BUS_ERROR_TOKEN_BAD_PIN_ONE_TRY_LEFT, EUCLEAN),
+ SD_BUS_ERROR_MAP(BUS_ERROR_BAD_SIGNATURE, EKEYREJECTED),
+ SD_BUS_ERROR_MAP(BUS_ERROR_HOME_RECORD_MISMATCH, EUCLEAN),
+ SD_BUS_ERROR_MAP(BUS_ERROR_HOME_RECORD_DOWNGRADE, ESTALE),
+ SD_BUS_ERROR_MAP(BUS_ERROR_HOME_RECORD_SIGNED, EROFS),
+ SD_BUS_ERROR_MAP(BUS_ERROR_BAD_HOME_SIZE, ERANGE),
+ SD_BUS_ERROR_MAP(BUS_ERROR_NO_PRIVATE_KEY, ENOPKG),
+ SD_BUS_ERROR_MAP(BUS_ERROR_HOME_LOCKED, ENOEXEC),
+ SD_BUS_ERROR_MAP(BUS_ERROR_HOME_NOT_LOCKED, ENOEXEC),
+ SD_BUS_ERROR_MAP(BUS_ERROR_TOO_MANY_OPERATIONS, ENOBUFS),
+ SD_BUS_ERROR_MAP(BUS_ERROR_AUTHENTICATION_LIMIT_HIT, ETOOMANYREFS),
+ SD_BUS_ERROR_MAP(BUS_ERROR_HOME_CANT_AUTHENTICATE, EKEYREVOKED),
+ SD_BUS_ERROR_MAP(BUS_ERROR_HOME_IN_USE, EADDRINUSE),
+ SD_BUS_ERROR_MAP(BUS_ERROR_REBALANCE_NOT_NEEDED, EALREADY),
+
+ SD_BUS_ERROR_MAP_END
+};
diff --git a/src/libsystemd/sd-bus/bus-common-errors.h b/src/libsystemd/sd-bus/bus-common-errors.h
new file mode 100644
index 0000000..3a0eef4
--- /dev/null
+++ b/src/libsystemd/sd-bus/bus-common-errors.h
@@ -0,0 +1,155 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "bus-error.h"
+
+#define BUS_ERROR_NO_SUCH_UNIT "org.freedesktop.systemd1.NoSuchUnit"
+#define BUS_ERROR_NO_UNIT_FOR_PID "org.freedesktop.systemd1.NoUnitForPID"
+#define BUS_ERROR_NO_UNIT_FOR_INVOCATION_ID "org.freedesktop.systemd1.NoUnitForInvocationID"
+#define BUS_ERROR_UNIT_EXISTS "org.freedesktop.systemd1.UnitExists"
+#define BUS_ERROR_LOAD_FAILED "org.freedesktop.systemd1.LoadFailed"
+#define BUS_ERROR_BAD_UNIT_SETTING "org.freedesktop.systemd1.BadUnitSetting"
+#define BUS_ERROR_JOB_FAILED "org.freedesktop.systemd1.JobFailed"
+#define BUS_ERROR_NO_SUCH_JOB "org.freedesktop.systemd1.NoSuchJob"
+#define BUS_ERROR_NOT_SUBSCRIBED "org.freedesktop.systemd1.NotSubscribed"
+#define BUS_ERROR_ALREADY_SUBSCRIBED "org.freedesktop.systemd1.AlreadySubscribed"
+#define BUS_ERROR_ONLY_BY_DEPENDENCY "org.freedesktop.systemd1.OnlyByDependency"
+#define BUS_ERROR_TRANSACTION_JOBS_CONFLICTING "org.freedesktop.systemd1.TransactionJobsConflicting"
+#define BUS_ERROR_TRANSACTION_ORDER_IS_CYCLIC "org.freedesktop.systemd1.TransactionOrderIsCyclic"
+#define BUS_ERROR_TRANSACTION_IS_DESTRUCTIVE "org.freedesktop.systemd1.TransactionIsDestructive"
+#define BUS_ERROR_UNIT_MASKED "org.freedesktop.systemd1.UnitMasked"
+#define BUS_ERROR_UNIT_GENERATED "org.freedesktop.systemd1.UnitGenerated"
+#define BUS_ERROR_UNIT_LINKED "org.freedesktop.systemd1.UnitLinked"
+#define BUS_ERROR_UNIT_BAD_PATH "org.freedesktop.systemd1.UnitBadPath"
+#define BUS_ERROR_JOB_TYPE_NOT_APPLICABLE "org.freedesktop.systemd1.JobTypeNotApplicable"
+#define BUS_ERROR_NO_ISOLATION "org.freedesktop.systemd1.NoIsolation"
+#define BUS_ERROR_SHUTTING_DOWN "org.freedesktop.systemd1.ShuttingDown"
+#define BUS_ERROR_SCOPE_NOT_RUNNING "org.freedesktop.systemd1.ScopeNotRunning"
+#define BUS_ERROR_NO_SUCH_DYNAMIC_USER "org.freedesktop.systemd1.NoSuchDynamicUser"
+#define BUS_ERROR_NOT_REFERENCED "org.freedesktop.systemd1.NotReferenced"
+#define BUS_ERROR_DISK_FULL "org.freedesktop.systemd1.DiskFull"
+#define BUS_ERROR_NOTHING_TO_CLEAN "org.freedesktop.systemd1.NothingToClean"
+#define BUS_ERROR_UNIT_BUSY "org.freedesktop.systemd1.UnitBusy"
+#define BUS_ERROR_UNIT_INACTIVE "org.freedesktop.systemd1.UnitInactive"
+#define BUS_ERROR_FREEZE_CANCELLED "org.freedesktop.systemd1.FreezeCancelled"
+#define BUS_ERROR_FILE_DESCRIPTOR_STORE_DISABLED \
+ "org.freedesktop.systemd1.FileDescriptorStoreDisabled"
+
+#define BUS_ERROR_NO_SUCH_MACHINE "org.freedesktop.machine1.NoSuchMachine"
+#define BUS_ERROR_NO_SUCH_IMAGE "org.freedesktop.machine1.NoSuchImage"
+#define BUS_ERROR_NO_MACHINE_FOR_PID "org.freedesktop.machine1.NoMachineForPID"
+#define BUS_ERROR_MACHINE_EXISTS "org.freedesktop.machine1.MachineExists"
+#define BUS_ERROR_NO_PRIVATE_NETWORKING "org.freedesktop.machine1.NoPrivateNetworking"
+#define BUS_ERROR_NO_SUCH_USER_MAPPING "org.freedesktop.machine1.NoSuchUserMapping"
+#define BUS_ERROR_NO_SUCH_GROUP_MAPPING "org.freedesktop.machine1.NoSuchGroupMapping"
+
+#define BUS_ERROR_NO_SUCH_PORTABLE_IMAGE "org.freedesktop.portable1.NoSuchImage"
+#define BUS_ERROR_BAD_PORTABLE_IMAGE_TYPE "org.freedesktop.portable1.BadImageType"
+
+#define BUS_ERROR_NO_SUCH_SESSION "org.freedesktop.login1.NoSuchSession"
+#define BUS_ERROR_NO_SESSION_FOR_PID "org.freedesktop.login1.NoSessionForPID"
+#define BUS_ERROR_NO_SUCH_USER "org.freedesktop.login1.NoSuchUser"
+#define BUS_ERROR_NO_USER_FOR_PID "org.freedesktop.login1.NoUserForPID"
+#define BUS_ERROR_NO_SUCH_SEAT "org.freedesktop.login1.NoSuchSeat"
+#define BUS_ERROR_SESSION_NOT_ON_SEAT "org.freedesktop.login1.SessionNotOnSeat"
+#define BUS_ERROR_NOT_IN_CONTROL "org.freedesktop.login1.NotInControl"
+#define BUS_ERROR_DEVICE_IS_TAKEN "org.freedesktop.login1.DeviceIsTaken"
+#define BUS_ERROR_DEVICE_NOT_TAKEN "org.freedesktop.login1.DeviceNotTaken"
+#define BUS_ERROR_OPERATION_IN_PROGRESS "org.freedesktop.login1.OperationInProgress"
+#define BUS_ERROR_SLEEP_VERB_NOT_SUPPORTED "org.freedesktop.login1.SleepVerbNotSupported"
+#define BUS_ERROR_SESSION_BUSY "org.freedesktop.login1.SessionBusy"
+#define BUS_ERROR_NOT_YOUR_DEVICE "org.freedesktop.login1.NotYourDevice"
+
+#define BUS_ERROR_AUTOMATIC_TIME_SYNC_ENABLED "org.freedesktop.timedate1.AutomaticTimeSyncEnabled"
+#define BUS_ERROR_NO_NTP_SUPPORT "org.freedesktop.timedate1.NoNTPSupport"
+
+#define BUS_ERROR_NO_SUCH_PROCESS "org.freedesktop.systemd1.NoSuchProcess"
+
+#define BUS_ERROR_NO_NAME_SERVERS "org.freedesktop.resolve1.NoNameServers"
+#define BUS_ERROR_INVALID_REPLY "org.freedesktop.resolve1.InvalidReply"
+#define BUS_ERROR_NO_SUCH_RR "org.freedesktop.resolve1.NoSuchRR"
+#define BUS_ERROR_CNAME_LOOP "org.freedesktop.resolve1.CNameLoop"
+#define BUS_ERROR_ABORTED "org.freedesktop.resolve1.Aborted"
+#define BUS_ERROR_NO_SUCH_SERVICE "org.freedesktop.resolve1.NoSuchService"
+#define BUS_ERROR_DNSSEC_FAILED "org.freedesktop.resolve1.DnssecFailed"
+#define BUS_ERROR_NO_TRUST_ANCHOR "org.freedesktop.resolve1.NoTrustAnchor"
+#define BUS_ERROR_RR_TYPE_UNSUPPORTED "org.freedesktop.resolve1.ResourceRecordTypeUnsupported"
+#define BUS_ERROR_NO_SUCH_LINK "org.freedesktop.resolve1.NoSuchLink"
+#define BUS_ERROR_LINK_BUSY "org.freedesktop.resolve1.LinkBusy"
+#define BUS_ERROR_NETWORK_DOWN "org.freedesktop.resolve1.NetworkDown"
+#define BUS_ERROR_NO_SOURCE "org.freedesktop.resolve1.NoSource"
+#define BUS_ERROR_STUB_LOOP "org.freedesktop.resolve1.StubLoop"
+#define BUS_ERROR_NO_SUCH_DNSSD_SERVICE "org.freedesktop.resolve1.NoSuchDnssdService"
+#define BUS_ERROR_DNSSD_SERVICE_EXISTS "org.freedesktop.resolve1.DnssdServiceExists"
+
+#define _BUS_ERROR_DNS "org.freedesktop.resolve1.DnsError."
+#define BUS_ERROR_DNS_FORMERR _BUS_ERROR_DNS "FORMERR"
+#define BUS_ERROR_DNS_SERVFAIL _BUS_ERROR_DNS "SERVFAIL"
+#define BUS_ERROR_DNS_NXDOMAIN _BUS_ERROR_DNS "NXDOMAIN"
+#define BUS_ERROR_DNS_NOTIMP _BUS_ERROR_DNS "NOTIMP"
+#define BUS_ERROR_DNS_REFUSED _BUS_ERROR_DNS "REFUSED"
+#define BUS_ERROR_DNS_YXDOMAIN _BUS_ERROR_DNS "YXDOMAIN"
+#define BUS_ERROR_DNS_YRRSET _BUS_ERROR_DNS "YRRSET"
+#define BUS_ERROR_DNS_NXRRSET _BUS_ERROR_DNS "NXRRSET"
+#define BUS_ERROR_DNS_NOTAUTH _BUS_ERROR_DNS "NOTAUTH"
+#define BUS_ERROR_DNS_NOTZONE _BUS_ERROR_DNS "NOTZONE"
+#define BUS_ERROR_DNS_BADVERS _BUS_ERROR_DNS "BADVERS"
+#define BUS_ERROR_DNS_BADKEY _BUS_ERROR_DNS "BADKEY"
+#define BUS_ERROR_DNS_BADTIME _BUS_ERROR_DNS "BADTIME"
+#define BUS_ERROR_DNS_BADMODE _BUS_ERROR_DNS "BADMODE"
+#define BUS_ERROR_DNS_BADNAME _BUS_ERROR_DNS "BADNAME"
+#define BUS_ERROR_DNS_BADALG _BUS_ERROR_DNS "BADALG"
+#define BUS_ERROR_DNS_BADTRUNC _BUS_ERROR_DNS "BADTRUNC"
+#define BUS_ERROR_DNS_BADCOOKIE _BUS_ERROR_DNS "BADCOOKIE"
+
+#define BUS_ERROR_NO_SUCH_TRANSFER "org.freedesktop.import1.NoSuchTransfer"
+#define BUS_ERROR_TRANSFER_IN_PROGRESS "org.freedesktop.import1.TransferInProgress"
+
+#define BUS_ERROR_NO_PRODUCT_UUID "org.freedesktop.hostname1.NoProductUUID"
+#define BUS_ERROR_FILE_IS_PROTECTED "org.freedesktop.hostname1.FileIsProtected"
+#define BUS_ERROR_READ_ONLY_FILESYSTEM "org.freedesktop.hostname1.ReadOnlyFilesystem"
+
+#define BUS_ERROR_SPEED_METER_INACTIVE "org.freedesktop.network1.SpeedMeterInactive"
+#define BUS_ERROR_UNMANAGED_INTERFACE "org.freedesktop.network1.UnmanagedInterface"
+
+#define BUS_ERROR_NO_SUCH_HOME "org.freedesktop.home1.NoSuchHome"
+#define BUS_ERROR_UID_IN_USE "org.freedesktop.home1.UIDInUse"
+#define BUS_ERROR_USER_NAME_EXISTS "org.freedesktop.home1.UserNameExists"
+#define BUS_ERROR_HOME_EXISTS "org.freedesktop.home1.HomeExists"
+#define BUS_ERROR_HOME_ALREADY_ACTIVE "org.freedesktop.home1.HomeAlreadyActive"
+#define BUS_ERROR_HOME_ALREADY_FIXATED "org.freedesktop.home1.HomeAlreadyFixated"
+#define BUS_ERROR_HOME_UNFIXATED "org.freedesktop.home1.HomeUnfixated"
+#define BUS_ERROR_HOME_NOT_ACTIVE "org.freedesktop.home1.HomeNotActive"
+#define BUS_ERROR_HOME_ABSENT "org.freedesktop.home1.HomeAbsent"
+#define BUS_ERROR_HOME_BUSY "org.freedesktop.home1.HomeBusy"
+#define BUS_ERROR_BAD_PASSWORD "org.freedesktop.home1.BadPassword"
+#define BUS_ERROR_BAD_RECOVERY_KEY "org.freedesktop.home1.BadRecoveryKey"
+#define BUS_ERROR_LOW_PASSWORD_QUALITY "org.freedesktop.home1.LowPasswordQuality"
+#define BUS_ERROR_BAD_PASSWORD_AND_NO_TOKEN "org.freedesktop.home1.BadPasswordAndNoToken"
+#define BUS_ERROR_TOKEN_PIN_NEEDED "org.freedesktop.home1.TokenPinNeeded"
+#define BUS_ERROR_TOKEN_PROTECTED_AUTHENTICATION_PATH_NEEDED \
+ "org.freedesktop.home1.TokenProtectedAuthenticationPathNeeded"
+#define BUS_ERROR_TOKEN_USER_PRESENCE_NEEDED "org.freedesktop.home1.TokenUserPresenceNeeded"
+#define BUS_ERROR_TOKEN_USER_VERIFICATION_NEEDED \
+ "org.freedesktop.home1.TokenUserVerificationNeeded"
+#define BUS_ERROR_TOKEN_ACTION_TIMEOUT "org.freedesktop.home1.TokenActionTimeout"
+#define BUS_ERROR_TOKEN_PIN_LOCKED "org.freedesktop.home1.TokenPinLocked"
+#define BUS_ERROR_TOKEN_BAD_PIN "org.freedesktop.home1.BadPin"
+#define BUS_ERROR_TOKEN_BAD_PIN_FEW_TRIES_LEFT "org.freedesktop.home1.BadPinFewTriesLeft"
+#define BUS_ERROR_TOKEN_BAD_PIN_ONE_TRY_LEFT "org.freedesktop.home1.BadPinOneTryLeft"
+#define BUS_ERROR_BAD_SIGNATURE "org.freedesktop.home1.BadSignature"
+#define BUS_ERROR_HOME_RECORD_MISMATCH "org.freedesktop.home1.RecordMismatch"
+#define BUS_ERROR_HOME_RECORD_DOWNGRADE "org.freedesktop.home1.RecordDowngrade"
+#define BUS_ERROR_HOME_RECORD_SIGNED "org.freedesktop.home1.RecordSigned"
+#define BUS_ERROR_BAD_HOME_SIZE "org.freedesktop.home1.BadHomeSize"
+#define BUS_ERROR_NO_PRIVATE_KEY "org.freedesktop.home1.NoPrivateKey"
+#define BUS_ERROR_HOME_LOCKED "org.freedesktop.home1.HomeLocked"
+#define BUS_ERROR_HOME_NOT_LOCKED "org.freedesktop.home1.HomeNotLocked"
+#define BUS_ERROR_NO_DISK_SPACE "org.freedesktop.home1.NoDiskSpace"
+#define BUS_ERROR_TOO_MANY_OPERATIONS "org.freedesktop.home1.TooManyOperations"
+#define BUS_ERROR_AUTHENTICATION_LIMIT_HIT "org.freedesktop.home1.AuthenticationLimitHit"
+#define BUS_ERROR_HOME_CANT_AUTHENTICATE "org.freedesktop.home1.HomeCantAuthenticate"
+#define BUS_ERROR_HOME_IN_USE "org.freedesktop.home1.HomeInUse"
+#define BUS_ERROR_REBALANCE_NOT_NEEDED "org.freedesktop.home1.RebalanceNotNeeded"
+
+BUS_ERROR_MAP_ELF_USE(bus_common_errors);
diff --git a/src/libsystemd/sd-bus/bus-container.c b/src/libsystemd/sd-bus/bus-container.c
new file mode 100644
index 0000000..4146a6e
--- /dev/null
+++ b/src/libsystemd/sd-bus/bus-container.c
@@ -0,0 +1,103 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <fcntl.h>
+#include <unistd.h>
+
+#include "bus-container.h"
+#include "bus-internal.h"
+#include "bus-socket.h"
+#include "fd-util.h"
+#include "namespace-util.h"
+#include "process-util.h"
+#include "string-util.h"
+
+int bus_container_connect_socket(sd_bus *b) {
+ _cleanup_close_pair_ int pair[2] = EBADF_PAIR;
+ _cleanup_close_ int pidnsfd = -EBADF, mntnsfd = -EBADF, usernsfd = -EBADF, rootfd = -EBADF;
+ int r, error_buf = 0;
+ pid_t child;
+ ssize_t n;
+
+ assert(b);
+ assert(b->input_fd < 0);
+ assert(b->output_fd < 0);
+ assert(b->nspid > 0 || b->machine);
+
+ if (b->nspid <= 0) {
+ log_debug("sd-bus: connecting bus%s%s to machine %s...",
+ b->description ? " " : "", strempty(b->description), b->machine);
+
+ r = container_get_leader(b->machine, &b->nspid);
+ if (r < 0)
+ return r;
+ } else
+ log_debug("sd-bus: connecting bus%s%s to namespace of PID "PID_FMT"...",
+ b->description ? " " : "", strempty(b->description), b->nspid);
+
+ r = namespace_open(b->nspid, &pidnsfd, &mntnsfd, NULL, &usernsfd, &rootfd);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to open namespace of PID "PID_FMT": %m", b->nspid);
+
+ b->input_fd = socket(b->sockaddr.sa.sa_family, SOCK_STREAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
+ if (b->input_fd < 0)
+ return log_debug_errno(errno, "Failed to create a socket: %m");
+
+ b->input_fd = fd_move_above_stdio(b->input_fd);
+
+ b->output_fd = b->input_fd;
+
+ bus_socket_setup(b);
+
+ if (socketpair(AF_UNIX, SOCK_SEQPACKET|SOCK_CLOEXEC, 0, pair) < 0)
+ return log_debug_errno(errno, "Failed to create a socket pair: %m");
+
+ r = namespace_fork("(sd-buscntrns)", "(sd-buscntr)", NULL, 0, FORK_RESET_SIGNALS|FORK_DEATHSIG_SIGKILL,
+ pidnsfd, mntnsfd, -1, usernsfd, rootfd, &child);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to create namespace for (sd-buscntr): %m");
+ if (r == 0) {
+ pair[0] = safe_close(pair[0]);
+
+ r = connect(b->input_fd, &b->sockaddr.sa, b->sockaddr_size);
+ if (r < 0) {
+ /* Try to send error up */
+ error_buf = errno;
+ (void) write(pair[1], &error_buf, sizeof(error_buf));
+ _exit(EXIT_FAILURE);
+ }
+
+ _exit(EXIT_SUCCESS);
+ }
+
+ pair[1] = safe_close(pair[1]);
+
+ r = wait_for_terminate_and_check("(sd-buscntrns)", child, 0);
+ if (r < 0)
+ return r;
+ bool nonzero_exit_status = r != EXIT_SUCCESS;
+
+ n = read(pair[0], &error_buf, sizeof(error_buf));
+ if (n < 0)
+ return log_debug_errno(errno, "Failed to read error status from (sd-buscntr): %m");
+
+ if (n > 0) {
+ if (n != sizeof(error_buf))
+ return log_debug_errno(SYNTHETIC_ERRNO(EIO),
+ "Read error status of unexpected length %zd from (sd-buscntr): %m", n);
+
+ if (error_buf < 0)
+ return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
+ "Got unexpected error status from (sd-buscntr): %m");
+
+ if (error_buf == EINPROGRESS)
+ return 1;
+
+ if (error_buf > 0)
+ return log_debug_errno(error_buf, "(sd-buscntr) failed to connect to D-Bus socket: %m");
+ }
+
+ if (nonzero_exit_status)
+ return -EPROTO;
+
+ return bus_socket_start_auth(b);
+}
diff --git a/src/libsystemd/sd-bus/bus-container.h b/src/libsystemd/sd-bus/bus-container.h
new file mode 100644
index 0000000..cb503a5
--- /dev/null
+++ b/src/libsystemd/sd-bus/bus-container.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "sd-bus.h"
+
+int bus_container_connect_socket(sd_bus *b);
diff --git a/src/libsystemd/sd-bus/bus-control.c b/src/libsystemd/sd-bus/bus-control.c
new file mode 100644
index 0000000..1355e41
--- /dev/null
+++ b/src/libsystemd/sd-bus/bus-control.c
@@ -0,0 +1,1038 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#if HAVE_VALGRIND_MEMCHECK_H
+#include <valgrind/memcheck.h>
+#endif
+
+#include <errno.h>
+#include <stddef.h>
+
+#include "sd-bus.h"
+
+#include "alloc-util.h"
+#include "bus-control.h"
+#include "bus-internal.h"
+#include "bus-message.h"
+#include "capability-util.h"
+#include "process-util.h"
+#include "stdio-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "user-util.h"
+
+_public_ int sd_bus_get_unique_name(sd_bus *bus, const char **unique) {
+ int r;
+
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(unique, -EINVAL);
+ assert_return(!bus_origin_changed(bus), -ECHILD);
+
+ if (!bus->bus_client)
+ return -EINVAL;
+
+ r = bus_ensure_running(bus);
+ if (r < 0)
+ return r;
+
+ *unique = bus->unique_name;
+ return 0;
+}
+
+static int validate_request_name_parameters(
+ sd_bus *bus,
+ const char *name,
+ uint64_t flags,
+ uint32_t *ret_param) {
+
+ uint32_t param = 0;
+
+ assert(bus);
+ assert(name);
+ assert(ret_param);
+
+ assert_return(!(flags & ~(SD_BUS_NAME_ALLOW_REPLACEMENT|SD_BUS_NAME_REPLACE_EXISTING|SD_BUS_NAME_QUEUE)), -EINVAL);
+ assert_return(service_name_is_valid(name), -EINVAL);
+ assert_return(name[0] != ':', -EINVAL);
+
+ if (!bus->bus_client)
+ return -EINVAL;
+
+ /* Don't allow requesting the special driver and local names */
+ if (STR_IN_SET(name, "org.freedesktop.DBus", "org.freedesktop.DBus.Local"))
+ return -EINVAL;
+
+ if (!BUS_IS_OPEN(bus->state))
+ return -ENOTCONN;
+
+ if (flags & SD_BUS_NAME_ALLOW_REPLACEMENT)
+ param |= BUS_NAME_ALLOW_REPLACEMENT;
+ if (flags & SD_BUS_NAME_REPLACE_EXISTING)
+ param |= BUS_NAME_REPLACE_EXISTING;
+ if (!(flags & SD_BUS_NAME_QUEUE))
+ param |= BUS_NAME_DO_NOT_QUEUE;
+
+ *ret_param = param;
+
+ return 0;
+}
+
+_public_ int sd_bus_request_name(
+ sd_bus *bus,
+ const char *name,
+ uint64_t flags) {
+
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ uint32_t ret, param = 0;
+ int r;
+
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(name, -EINVAL);
+ assert_return(!bus_origin_changed(bus), -ECHILD);
+
+ r = validate_request_name_parameters(bus, name, flags, &param);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_call_method(
+ bus,
+ "org.freedesktop.DBus",
+ "/org/freedesktop/DBus",
+ "org.freedesktop.DBus",
+ "RequestName",
+ NULL,
+ &reply,
+ "su",
+ name,
+ param);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read(reply, "u", &ret);
+ if (r < 0)
+ return r;
+
+ switch (ret) {
+
+ case BUS_NAME_ALREADY_OWNER:
+ return -EALREADY;
+
+ case BUS_NAME_EXISTS:
+ return -EEXIST;
+
+ case BUS_NAME_IN_QUEUE:
+ return 0;
+
+ case BUS_NAME_PRIMARY_OWNER:
+ return 1;
+ }
+
+ return -EIO;
+}
+
+static int default_request_name_handler(
+ sd_bus_message *m,
+ void *userdata,
+ sd_bus_error *ret_error) {
+
+ uint32_t ret;
+ int r;
+
+ assert(m);
+
+ if (sd_bus_message_is_method_error(m, NULL)) {
+ log_debug_errno(sd_bus_message_get_errno(m),
+ "Unable to request name, failing connection: %s",
+ sd_bus_message_get_error(m)->message);
+
+ bus_enter_closing(sd_bus_message_get_bus(m));
+ return 1;
+ }
+
+ r = sd_bus_message_read(m, "u", &ret);
+ if (r < 0)
+ return r;
+
+ switch (ret) {
+
+ case BUS_NAME_ALREADY_OWNER:
+ log_debug("Already owner of requested service name, ignoring.");
+ return 1;
+
+ case BUS_NAME_IN_QUEUE:
+ log_debug("In queue for requested service name.");
+ return 1;
+
+ case BUS_NAME_PRIMARY_OWNER:
+ log_debug("Successfully acquired requested service name.");
+ return 1;
+
+ case BUS_NAME_EXISTS:
+ log_debug("Requested service name already owned, failing connection.");
+ bus_enter_closing(sd_bus_message_get_bus(m));
+ return 1;
+ }
+
+ log_debug("Unexpected response from RequestName(), failing connection.");
+ bus_enter_closing(sd_bus_message_get_bus(m));
+ return 1;
+}
+
+_public_ int sd_bus_request_name_async(
+ sd_bus *bus,
+ sd_bus_slot **ret_slot,
+ const char *name,
+ uint64_t flags,
+ sd_bus_message_handler_t callback,
+ void *userdata) {
+
+ uint32_t param = 0;
+ int r;
+
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(name, -EINVAL);
+ assert_return(!bus_origin_changed(bus), -ECHILD);
+
+ r = validate_request_name_parameters(bus, name, flags, &param);
+ if (r < 0)
+ return r;
+
+ return sd_bus_call_method_async(
+ bus,
+ ret_slot,
+ "org.freedesktop.DBus",
+ "/org/freedesktop/DBus",
+ "org.freedesktop.DBus",
+ "RequestName",
+ callback ?: default_request_name_handler,
+ userdata,
+ "su",
+ name,
+ param);
+}
+
+static int validate_release_name_parameters(
+ sd_bus *bus,
+ const char *name) {
+
+ assert(bus);
+ assert(name);
+
+ assert_return(service_name_is_valid(name), -EINVAL);
+ assert_return(name[0] != ':', -EINVAL);
+
+ if (!bus->bus_client)
+ return -EINVAL;
+
+ /* Don't allow releasing the special driver and local names */
+ if (STR_IN_SET(name, "org.freedesktop.DBus", "org.freedesktop.DBus.Local"))
+ return -EINVAL;
+
+ if (!BUS_IS_OPEN(bus->state))
+ return -ENOTCONN;
+
+ return 0;
+}
+
+_public_ int sd_bus_release_name(
+ sd_bus *bus,
+ const char *name) {
+
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ uint32_t ret;
+ int r;
+
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(name, -EINVAL);
+ assert_return(!bus_origin_changed(bus), -ECHILD);
+
+ r = validate_release_name_parameters(bus, name);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_call_method(
+ bus,
+ "org.freedesktop.DBus",
+ "/org/freedesktop/DBus",
+ "org.freedesktop.DBus",
+ "ReleaseName",
+ NULL,
+ &reply,
+ "s",
+ name);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read(reply, "u", &ret);
+ if (r < 0)
+ return r;
+
+ switch (ret) {
+
+ case BUS_NAME_NON_EXISTENT:
+ return -ESRCH;
+
+ case BUS_NAME_NOT_OWNER:
+ return -EADDRINUSE;
+
+ case BUS_NAME_RELEASED:
+ return 0;
+ }
+
+ return -EIO;
+}
+
+static int default_release_name_handler(
+ sd_bus_message *m,
+ void *userdata,
+ sd_bus_error *ret_error) {
+
+ uint32_t ret;
+ int r;
+
+ assert(m);
+
+ if (sd_bus_message_is_method_error(m, NULL)) {
+ log_debug_errno(sd_bus_message_get_errno(m),
+ "Unable to release name, failing connection: %s",
+ sd_bus_message_get_error(m)->message);
+
+ bus_enter_closing(sd_bus_message_get_bus(m));
+ return 1;
+ }
+
+ r = sd_bus_message_read(m, "u", &ret);
+ if (r < 0)
+ return r;
+
+ switch (ret) {
+
+ case BUS_NAME_NON_EXISTENT:
+ log_debug("Name asked to release is not taken currently, ignoring.");
+ return 1;
+
+ case BUS_NAME_NOT_OWNER:
+ log_debug("Name asked to release is owned by somebody else, ignoring.");
+ return 1;
+
+ case BUS_NAME_RELEASED:
+ log_debug("Name successfully released.");
+ return 1;
+ }
+
+ log_debug("Unexpected response from ReleaseName(), failing connection.");
+ bus_enter_closing(sd_bus_message_get_bus(m));
+ return 1;
+}
+
+_public_ int sd_bus_release_name_async(
+ sd_bus *bus,
+ sd_bus_slot **ret_slot,
+ const char *name,
+ sd_bus_message_handler_t callback,
+ void *userdata) {
+
+ int r;
+
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(name, -EINVAL);
+ assert_return(!bus_origin_changed(bus), -ECHILD);
+
+ r = validate_release_name_parameters(bus, name);
+ if (r < 0)
+ return r;
+
+ return sd_bus_call_method_async(
+ bus,
+ ret_slot,
+ "org.freedesktop.DBus",
+ "/org/freedesktop/DBus",
+ "org.freedesktop.DBus",
+ "ReleaseName",
+ callback ?: default_release_name_handler,
+ userdata,
+ "s",
+ name);
+}
+
+_public_ int sd_bus_list_names(sd_bus *bus, char ***acquired, char ***activatable) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ _cleanup_strv_free_ char **x = NULL, **y = NULL;
+ int r;
+
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(acquired || activatable, -EINVAL);
+ assert_return(!bus_origin_changed(bus), -ECHILD);
+
+ if (!bus->bus_client)
+ return -EINVAL;
+
+ if (!BUS_IS_OPEN(bus->state))
+ return -ENOTCONN;
+
+ if (acquired) {
+ r = sd_bus_call_method(
+ bus,
+ "org.freedesktop.DBus",
+ "/org/freedesktop/DBus",
+ "org.freedesktop.DBus",
+ "ListNames",
+ NULL,
+ &reply,
+ NULL);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read_strv(reply, &x);
+ if (r < 0)
+ return r;
+
+ reply = sd_bus_message_unref(reply);
+ }
+
+ if (activatable) {
+ r = sd_bus_call_method(
+ bus,
+ "org.freedesktop.DBus",
+ "/org/freedesktop/DBus",
+ "org.freedesktop.DBus",
+ "ListActivatableNames",
+ NULL,
+ &reply,
+ NULL);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read_strv(reply, &y);
+ if (r < 0)
+ return r;
+
+ *activatable = TAKE_PTR(y);
+ }
+
+ if (acquired)
+ *acquired = TAKE_PTR(x);
+
+ return 0;
+}
+
+_public_ int sd_bus_get_name_creds(
+ sd_bus *bus,
+ const char *name,
+ uint64_t mask,
+ sd_bus_creds **creds) {
+
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply_unique = NULL, *reply = NULL;
+ _cleanup_(sd_bus_creds_unrefp) sd_bus_creds *c = NULL;
+ const char *unique;
+ pid_t pid = 0;
+ int r;
+
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(name, -EINVAL);
+ assert_return((mask & ~SD_BUS_CREDS_AUGMENT) <= _SD_BUS_CREDS_ALL, -EOPNOTSUPP);
+ assert_return(mask == 0 || creds, -EINVAL);
+ assert_return(!bus_origin_changed(bus), -ECHILD);
+ assert_return(service_name_is_valid(name), -EINVAL);
+
+ if (!bus->bus_client)
+ return -EINVAL;
+
+ /* Turn off augmenting if this isn't a local connection. If the connection is not local, then /proc is not
+ * going to match. */
+ if (!bus->is_local)
+ mask &= ~SD_BUS_CREDS_AUGMENT;
+
+ if (streq(name, "org.freedesktop.DBus.Local"))
+ return -EINVAL;
+
+ if (streq(name, "org.freedesktop.DBus"))
+ return sd_bus_get_owner_creds(bus, mask, creds);
+
+ if (!BUS_IS_OPEN(bus->state))
+ return -ENOTCONN;
+
+ /* If the name is unique anyway, we can use it directly */
+ unique = name[0] == ':' ? name : NULL;
+
+ /* Only query the owner if the caller wants to know it and the name is not unique anyway, or if the caller just
+ * wants to check whether a name exists */
+ if ((FLAGS_SET(mask, SD_BUS_CREDS_UNIQUE_NAME) && !unique) || mask == 0) {
+ r = sd_bus_call_method(
+ bus,
+ "org.freedesktop.DBus",
+ "/org/freedesktop/DBus",
+ "org.freedesktop.DBus",
+ "GetNameOwner",
+ NULL,
+ &reply_unique,
+ "s",
+ name);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read(reply_unique, "s", &unique);
+ if (r < 0)
+ return r;
+ }
+
+ if (mask != 0) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ bool need_pid, need_uid, need_selinux, need_separate_calls;
+
+ c = bus_creds_new();
+ if (!c)
+ return -ENOMEM;
+
+ if ((mask & SD_BUS_CREDS_UNIQUE_NAME) && unique) {
+ c->unique_name = strdup(unique);
+ if (!c->unique_name)
+ return -ENOMEM;
+
+ c->mask |= SD_BUS_CREDS_UNIQUE_NAME;
+ }
+
+ need_pid = (mask & SD_BUS_CREDS_PID) ||
+ ((mask & SD_BUS_CREDS_AUGMENT) &&
+ (mask & (SD_BUS_CREDS_UID|SD_BUS_CREDS_SUID|SD_BUS_CREDS_FSUID|
+ SD_BUS_CREDS_GID|SD_BUS_CREDS_EGID|SD_BUS_CREDS_SGID|SD_BUS_CREDS_FSGID|
+ SD_BUS_CREDS_SUPPLEMENTARY_GIDS|
+ SD_BUS_CREDS_COMM|SD_BUS_CREDS_EXE|SD_BUS_CREDS_CMDLINE|
+ SD_BUS_CREDS_CGROUP|SD_BUS_CREDS_UNIT|SD_BUS_CREDS_USER_UNIT|SD_BUS_CREDS_SLICE|SD_BUS_CREDS_SESSION|SD_BUS_CREDS_OWNER_UID|
+ SD_BUS_CREDS_EFFECTIVE_CAPS|SD_BUS_CREDS_PERMITTED_CAPS|SD_BUS_CREDS_INHERITABLE_CAPS|SD_BUS_CREDS_BOUNDING_CAPS|
+ SD_BUS_CREDS_SELINUX_CONTEXT|
+ SD_BUS_CREDS_AUDIT_SESSION_ID|SD_BUS_CREDS_AUDIT_LOGIN_UID)));
+ need_uid = mask & SD_BUS_CREDS_EUID;
+ need_selinux = mask & SD_BUS_CREDS_SELINUX_CONTEXT;
+
+ if (need_pid + need_uid + need_selinux > 1) {
+
+ /* If we need more than one of the credentials, then use GetConnectionCredentials() */
+
+ r = sd_bus_call_method(
+ bus,
+ "org.freedesktop.DBus",
+ "/org/freedesktop/DBus",
+ "org.freedesktop.DBus",
+ "GetConnectionCredentials",
+ &error,
+ &reply,
+ "s",
+ unique ?: name);
+
+ if (r < 0) {
+
+ if (!sd_bus_error_has_name(&error, SD_BUS_ERROR_UNKNOWN_METHOD))
+ return r;
+
+ /* If we got an unknown method error, fall back to the individual calls... */
+ need_separate_calls = true;
+ sd_bus_error_free(&error);
+
+ } else {
+ need_separate_calls = false;
+
+ r = sd_bus_message_enter_container(reply, 'a', "{sv}");
+ if (r < 0)
+ return r;
+
+ for (;;) {
+ const char *m;
+
+ r = sd_bus_message_enter_container(reply, 'e', "sv");
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ r = sd_bus_message_read(reply, "s", &m);
+ if (r < 0)
+ return r;
+
+ if (need_uid && streq(m, "UnixUserID")) {
+ uint32_t u;
+
+ r = sd_bus_message_read(reply, "v", "u", &u);
+ if (r < 0)
+ return r;
+
+ c->euid = u;
+ c->mask |= SD_BUS_CREDS_EUID;
+
+ } else if (need_pid && streq(m, "ProcessID")) {
+ uint32_t p;
+
+ r = sd_bus_message_read(reply, "v", "u", &p);
+ if (r < 0)
+ return r;
+
+ pid = p;
+ if (mask & SD_BUS_CREDS_PID) {
+ c->pid = p;
+ c->mask |= SD_BUS_CREDS_PID;
+ }
+
+ } else if (need_selinux && streq(m, "LinuxSecurityLabel")) {
+ const void *p = NULL;
+ size_t sz = 0;
+
+ r = sd_bus_message_enter_container(reply, 'v', "ay");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read_array(reply, 'y', &p, &sz);
+ if (r < 0)
+ return r;
+
+ r = free_and_strndup(&c->label, p, sz);
+ if (r < 0)
+ return r;
+
+ c->mask |= SD_BUS_CREDS_SELINUX_CONTEXT;
+
+ r = sd_bus_message_exit_container(reply);
+ if (r < 0)
+ return r;
+ } else {
+ r = sd_bus_message_skip(reply, "v");
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_bus_message_exit_container(reply);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_bus_message_exit_container(reply);
+ if (r < 0)
+ return r;
+
+ if (need_pid && pid == 0)
+ return -EPROTO;
+ }
+
+ } else /* When we only need a single field, then let's use separate calls */
+ need_separate_calls = true;
+
+ if (need_separate_calls) {
+ if (need_pid) {
+ uint32_t u;
+
+ r = sd_bus_call_method(
+ bus,
+ "org.freedesktop.DBus",
+ "/org/freedesktop/DBus",
+ "org.freedesktop.DBus",
+ "GetConnectionUnixProcessID",
+ NULL,
+ &reply,
+ "s",
+ unique ?: name);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read(reply, "u", &u);
+ if (r < 0)
+ return r;
+
+ pid = u;
+ if (mask & SD_BUS_CREDS_PID) {
+ c->pid = u;
+ c->mask |= SD_BUS_CREDS_PID;
+ }
+
+ reply = sd_bus_message_unref(reply);
+ }
+
+ if (need_uid) {
+ uint32_t u;
+
+ r = sd_bus_call_method(
+ bus,
+ "org.freedesktop.DBus",
+ "/org/freedesktop/DBus",
+ "org.freedesktop.DBus",
+ "GetConnectionUnixUser",
+ NULL,
+ &reply,
+ "s",
+ unique ?: name);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read(reply, "u", &u);
+ if (r < 0)
+ return r;
+
+ c->euid = u;
+ c->mask |= SD_BUS_CREDS_EUID;
+
+ reply = sd_bus_message_unref(reply);
+ }
+
+ if (need_selinux) {
+ const void *p = NULL;
+ size_t sz = 0;
+
+ r = sd_bus_call_method(
+ bus,
+ "org.freedesktop.DBus",
+ "/org/freedesktop/DBus",
+ "org.freedesktop.DBus",
+ "GetConnectionSELinuxSecurityContext",
+ &error,
+ &reply,
+ "s",
+ unique ?: name);
+ if (r < 0) {
+ if (!sd_bus_error_has_name(&error, SD_BUS_ERROR_SELINUX_SECURITY_CONTEXT_UNKNOWN))
+ return r;
+
+ /* no data is fine */
+ } else {
+ r = sd_bus_message_read_array(reply, 'y', &p, &sz);
+ if (r < 0)
+ return r;
+
+ c->label = memdup_suffix0(p, sz);
+ if (!c->label)
+ return -ENOMEM;
+
+ c->mask |= SD_BUS_CREDS_SELINUX_CONTEXT;
+ }
+ }
+ }
+
+ r = bus_creds_add_more(c, mask, pid, 0);
+ if (r < 0 && r != -ESRCH) /* Return the error, but ignore ESRCH which just means the process is already gone */
+ return r;
+ }
+
+ if (creds)
+ *creds = TAKE_PTR(c);
+
+ return 0;
+}
+
+static int parse_sockaddr_string(const char *t, char **ret_comm, char **ret_description) {
+ _cleanup_free_ char *comm = NULL, *description = NULL;
+ const char *e, *sl;
+
+ assert(t);
+ assert(ret_comm);
+ assert(ret_description);
+
+ e = strstrafter(t, "/bus/");
+ if (!e) {
+ log_debug("Didn't find /bus/ substring in peer socket address, ignoring.");
+ goto not_found;
+ }
+
+ sl = strchr(e, '/');
+ if (!sl) {
+ log_debug("Didn't find / substring after /bus/ in peer socket address, ignoring.");
+ goto not_found;
+ }
+
+ if (sl - e > 0) {
+ comm = strndup(e, sl - e);
+ if (!comm)
+ return -ENOMEM;
+ }
+
+ sl++;
+ if (!isempty(sl)) {
+ description = strdup(sl);
+ if (!description)
+ return -ENOMEM;
+ }
+
+ *ret_comm = TAKE_PTR(comm);
+ *ret_description = TAKE_PTR(description);
+ return 0;
+
+not_found:
+ *ret_comm = *ret_description = NULL;
+ return 0;
+}
+
+_public_ int sd_bus_get_owner_creds(sd_bus *bus, uint64_t mask, sd_bus_creds **ret) {
+ _cleanup_(sd_bus_creds_unrefp) sd_bus_creds *c = NULL;
+ bool do_label, do_groups, do_sockaddr_peer;
+ pid_t pid = 0;
+ int r;
+
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return((mask & ~SD_BUS_CREDS_AUGMENT) <= _SD_BUS_CREDS_ALL, -EOPNOTSUPP);
+ assert_return(ret, -EINVAL);
+ assert_return(!bus_origin_changed(bus), -ECHILD);
+
+ if (!BUS_IS_OPEN(bus->state))
+ return -ENOTCONN;
+
+ if (!bus->is_local)
+ mask &= ~SD_BUS_CREDS_AUGMENT;
+
+ do_label = bus->label && (mask & SD_BUS_CREDS_SELINUX_CONTEXT);
+ do_groups = bus->n_groups != SIZE_MAX && (mask & SD_BUS_CREDS_SUPPLEMENTARY_GIDS);
+ do_sockaddr_peer = bus->sockaddr_size_peer >= offsetof(struct sockaddr_un, sun_path) + 1 &&
+ bus->sockaddr_peer.sa.sa_family == AF_UNIX &&
+ bus->sockaddr_peer.un.sun_path[0] == 0;
+
+ /* Avoid allocating anything if we have no chance of returning useful data */
+ if (!bus->ucred_valid && !do_label && !do_groups && !do_sockaddr_peer)
+ return -ENODATA;
+
+ c = bus_creds_new();
+ if (!c)
+ return -ENOMEM;
+
+ if (bus->ucred_valid) {
+ if (pid_is_valid(bus->ucred.pid)) {
+ pid = c->pid = bus->ucred.pid;
+ c->mask |= SD_BUS_CREDS_PID & mask;
+ }
+
+ if (uid_is_valid(bus->ucred.uid)) {
+ c->euid = bus->ucred.uid;
+ c->mask |= SD_BUS_CREDS_EUID & mask;
+ }
+
+ if (gid_is_valid(bus->ucred.gid)) {
+ c->egid = bus->ucred.gid;
+ c->mask |= SD_BUS_CREDS_EGID & mask;
+ }
+ }
+
+ if (do_label) {
+ c->label = strdup(bus->label);
+ if (!c->label)
+ return -ENOMEM;
+
+ c->mask |= SD_BUS_CREDS_SELINUX_CONTEXT;
+ }
+
+ if (do_groups) {
+ c->supplementary_gids = newdup(gid_t, bus->groups, bus->n_groups);
+ if (!c->supplementary_gids)
+ return -ENOMEM;
+
+ c->n_supplementary_gids = bus->n_groups;
+
+ c->mask |= SD_BUS_CREDS_SUPPLEMENTARY_GIDS;
+ }
+
+ if (do_sockaddr_peer) {
+ _cleanup_free_ char *t = NULL;
+
+ assert(bus->sockaddr_size_peer >= offsetof(struct sockaddr_un, sun_path) + 1);
+ assert(bus->sockaddr_peer.sa.sa_family == AF_UNIX);
+ assert(bus->sockaddr_peer.un.sun_path[0] == 0);
+
+ /* So this is an abstract namespace socket, good. Now let's find the data we are interested in */
+ r = make_cstring(bus->sockaddr_peer.un.sun_path + 1,
+ bus->sockaddr_size_peer - offsetof(struct sockaddr_un, sun_path) - 1,
+ MAKE_CSTRING_ALLOW_TRAILING_NUL,
+ &t);
+ if (r == -ENOMEM)
+ return r;
+ if (r < 0)
+ log_debug_errno(r, "Can't extract string from peer socket address, ignoring: %m");
+ else {
+ r = parse_sockaddr_string(t, &c->comm, &c->description);
+ if (r < 0)
+ return r;
+
+ if (c->comm)
+ c->mask |= SD_BUS_CREDS_COMM & mask;
+
+ if (c->description)
+ c->mask |= SD_BUS_CREDS_DESCRIPTION & mask;
+ }
+ }
+
+ r = bus_creds_add_more(c, mask, pid, 0);
+ if (r < 0 && r != -ESRCH) /* If the process vanished, then don't complain, just return what we got */
+ return r;
+
+ *ret = TAKE_PTR(c);
+
+ return 0;
+}
+
+#define append_eavesdrop(bus, m) \
+ ((bus)->is_monitor \
+ ? (isempty(m) ? "eavesdrop='true'" : strjoina((m), ",eavesdrop='true'")) \
+ : (m))
+
+int bus_add_match_internal(
+ sd_bus *bus,
+ const char *match,
+ uint64_t timeout_usec,
+ uint64_t *ret_counter) {
+
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL, *reply = NULL;
+ const char *e;
+ int r;
+
+ assert(bus);
+
+ if (!bus->bus_client)
+ return -EINVAL;
+
+ e = append_eavesdrop(bus, match);
+
+ r = sd_bus_message_new_method_call(
+ bus,
+ &m,
+ "org.freedesktop.DBus",
+ "/org/freedesktop/DBus",
+ "org.freedesktop.DBus",
+ "AddMatch");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(m, "s", e);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_call(
+ bus,
+ m,
+ timeout_usec,
+ NULL,
+ &reply);
+ if (r < 0)
+ return r;
+
+ /* If the caller asked for it, return the read counter of the reply */
+ if (ret_counter)
+ *ret_counter = reply->read_counter;
+
+ return r;
+}
+
+int bus_add_match_internal_async(
+ sd_bus *bus,
+ sd_bus_slot **ret_slot,
+ const char *match,
+ sd_bus_message_handler_t callback,
+ void *userdata,
+ uint64_t timeout_usec) {
+
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ const char *e;
+ int r;
+
+ assert(bus);
+
+ if (!bus->bus_client)
+ return -EINVAL;
+
+ e = append_eavesdrop(bus, match);
+
+ r = sd_bus_message_new_method_call(
+ bus,
+ &m,
+ "org.freedesktop.DBus",
+ "/org/freedesktop/DBus",
+ "org.freedesktop.DBus",
+ "AddMatch");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(m, "s", e);
+ if (r < 0)
+ return r;
+
+ return sd_bus_call_async(
+ bus,
+ ret_slot,
+ m,
+ callback,
+ userdata,
+ timeout_usec);
+}
+
+int bus_remove_match_internal(
+ sd_bus *bus,
+ const char *match) {
+
+ const char *e;
+
+ assert(bus);
+ assert(match);
+
+ if (!bus->bus_client)
+ return -EINVAL;
+
+ e = append_eavesdrop(bus, match);
+
+ /* Fire and forget */
+
+ return sd_bus_call_method_async(
+ bus,
+ NULL,
+ "org.freedesktop.DBus",
+ "/org/freedesktop/DBus",
+ "org.freedesktop.DBus",
+ "RemoveMatch",
+ NULL,
+ NULL,
+ "s",
+ e);
+}
+
+_public_ int sd_bus_get_name_machine_id(sd_bus *bus, const char *name, sd_id128_t *machine) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL, *m = NULL;
+ const char *mid;
+ int r;
+
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(name, -EINVAL);
+ assert_return(machine, -EINVAL);
+ assert_return(!bus_origin_changed(bus), -ECHILD);
+ assert_return(service_name_is_valid(name), -EINVAL);
+
+ if (!bus->bus_client)
+ return -EINVAL;
+
+ if (!BUS_IS_OPEN(bus->state))
+ return -ENOTCONN;
+
+ if (streq_ptr(name, bus->unique_name))
+ return sd_id128_get_machine(machine);
+
+ r = sd_bus_message_new_method_call(
+ bus,
+ &m,
+ name,
+ "/",
+ "org.freedesktop.DBus.Peer",
+ "GetMachineId");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_set_auto_start(m, false);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_call(bus, m, 0, NULL, &reply);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read(reply, "s", &mid);
+ if (r < 0)
+ return r;
+
+ return sd_id128_from_string(mid, machine);
+}
diff --git a/src/libsystemd/sd-bus/bus-control.h b/src/libsystemd/sd-bus/bus-control.h
new file mode 100644
index 0000000..1cd4fb8
--- /dev/null
+++ b/src/libsystemd/sd-bus/bus-control.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "sd-bus.h"
+
+int bus_add_match_internal(sd_bus *bus, const char *match, uint64_t timeout_usec, uint64_t *ret_counter);
+int bus_add_match_internal_async(sd_bus *bus, sd_bus_slot **ret, const char *match, sd_bus_message_handler_t callback, void *userdata, uint64_t timeout_usec);
+
+int bus_remove_match_internal(sd_bus *bus, const char *match);
diff --git a/src/libsystemd/sd-bus/bus-convenience.c b/src/libsystemd/sd-bus/bus-convenience.c
new file mode 100644
index 0000000..989e577
--- /dev/null
+++ b/src/libsystemd/sd-bus/bus-convenience.c
@@ -0,0 +1,824 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <unistd.h>
+#include <sys/types.h>
+
+#include "bus-internal.h"
+#include "bus-message.h"
+#include "bus-signature.h"
+#include "bus-type.h"
+#include "string-util.h"
+
+_public_ int sd_bus_message_send(sd_bus_message *reply) {
+ assert_return(reply, -EINVAL);
+ assert_return(reply->bus, -EINVAL);
+ assert_return(!bus_origin_changed(reply->bus), -ECHILD);
+
+ return sd_bus_send(reply->bus, reply, NULL);
+}
+
+_public_ int sd_bus_emit_signal_tov(
+ sd_bus *bus,
+ const char *destination,
+ const char *path,
+ const char *interface,
+ const char *member,
+ const char *types, va_list ap) {
+
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ int r;
+
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(!bus_origin_changed(bus), -ECHILD);
+
+ if (!BUS_IS_OPEN(bus->state))
+ return -ENOTCONN;
+
+ r = sd_bus_message_new_signal_to(bus, &m, destination, path, interface, member);
+ if (r < 0)
+ return r;
+
+ if (!isempty(types)) {
+ r = sd_bus_message_appendv(m, types, ap);
+ if (r < 0)
+ return r;
+ }
+
+ return sd_bus_send(bus, m, NULL);
+}
+
+_public_ int sd_bus_emit_signal_to(
+ sd_bus *bus,
+ const char *destination,
+ const char *path,
+ const char *interface,
+ const char *member,
+ const char *types, ...) {
+
+ va_list ap;
+ int r;
+
+ va_start(ap, types);
+ r = sd_bus_emit_signal_tov(bus, destination, path, interface, member, types, ap);
+ va_end(ap);
+
+ return r;
+}
+
+_public_ int sd_bus_emit_signalv(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *member,
+ const char *types, va_list ap) {
+
+ return sd_bus_emit_signal_tov(bus, NULL, path, interface, member, types, ap);
+}
+
+_public_ int sd_bus_emit_signal(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *member,
+ const char *types, ...) {
+
+ va_list ap;
+ int r;
+
+ va_start(ap, types);
+ r = sd_bus_emit_signalv(bus, path, interface, member, types, ap);
+ va_end(ap);
+
+ return r;
+}
+
+_public_ int sd_bus_call_method_asyncv(
+ sd_bus *bus,
+ sd_bus_slot **slot,
+ const char *destination,
+ const char *path,
+ const char *interface,
+ const char *member,
+ sd_bus_message_handler_t callback,
+ void *userdata,
+ const char *types, va_list ap) {
+
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ int r;
+
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(!bus_origin_changed(bus), -ECHILD);
+
+ if (!BUS_IS_OPEN(bus->state))
+ return -ENOTCONN;
+
+ r = sd_bus_message_new_method_call(bus, &m, destination, path, interface, member);
+ if (r < 0)
+ return r;
+
+ if (!isempty(types)) {
+ r = sd_bus_message_appendv(m, types, ap);
+ if (r < 0)
+ return r;
+ }
+
+ return sd_bus_call_async(bus, slot, m, callback, userdata, 0);
+}
+
+_public_ int sd_bus_call_method_async(
+ sd_bus *bus,
+ sd_bus_slot **slot,
+ const char *destination,
+ const char *path,
+ const char *interface,
+ const char *member,
+ sd_bus_message_handler_t callback,
+ void *userdata,
+ const char *types, ...) {
+
+ va_list ap;
+ int r;
+
+ va_start(ap, types);
+ r = sd_bus_call_method_asyncv(bus, slot, destination, path, interface, member, callback, userdata, types, ap);
+ va_end(ap);
+
+ return r;
+}
+
+_public_ int sd_bus_call_methodv(
+ sd_bus *bus,
+ const char *destination,
+ const char *path,
+ const char *interface,
+ const char *member,
+ sd_bus_error *error,
+ sd_bus_message **reply,
+ const char *types, va_list ap) {
+
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ int r;
+
+ bus_assert_return(bus, -EINVAL, error);
+ bus_assert_return(bus = bus_resolve(bus), -ENOPKG, error);
+ bus_assert_return(!bus_origin_changed(bus), -ECHILD, error);
+
+ if (!BUS_IS_OPEN(bus->state)) {
+ r = -ENOTCONN;
+ goto fail;
+ }
+
+ r = sd_bus_message_new_method_call(bus, &m, destination, path, interface, member);
+ if (r < 0)
+ goto fail;
+
+ if (!isempty(types)) {
+ r = sd_bus_message_appendv(m, types, ap);
+ if (r < 0)
+ goto fail;
+ }
+
+ return sd_bus_call(bus, m, 0, error, reply);
+
+fail:
+ return sd_bus_error_set_errno(error, r);
+}
+
+_public_ int sd_bus_call_method(
+ sd_bus *bus,
+ const char *destination,
+ const char *path,
+ const char *interface,
+ const char *member,
+ sd_bus_error *error,
+ sd_bus_message **reply,
+ const char *types, ...) {
+
+ va_list ap;
+ int r;
+
+ va_start(ap, types);
+ r = sd_bus_call_methodv(bus, destination, path, interface, member, error, reply, types, ap);
+ va_end(ap);
+
+ return r;
+}
+
+_public_ int sd_bus_reply_method_returnv(
+ sd_bus_message *call,
+ const char *types, va_list ap) {
+
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ int r;
+
+ assert_return(call, -EINVAL);
+ assert_return(call->sealed, -EPERM);
+ assert_return(call->header->type == SD_BUS_MESSAGE_METHOD_CALL, -EINVAL);
+ assert_return(call->bus, -EINVAL);
+ assert_return(!bus_origin_changed(call->bus), -ECHILD);
+
+ if (!BUS_IS_OPEN(call->bus->state))
+ return -ENOTCONN;
+
+ if (call->header->flags & BUS_MESSAGE_NO_REPLY_EXPECTED)
+ return 0;
+
+ r = sd_bus_message_new_method_return(call, &m);
+ if (r < 0)
+ return r;
+
+ if (!isempty(types)) {
+ r = sd_bus_message_appendv(m, types, ap);
+ if (r < 0)
+ return r;
+ }
+
+ return sd_bus_message_send(m);
+}
+
+_public_ int sd_bus_reply_method_return(
+ sd_bus_message *call,
+ const char *types, ...) {
+
+ va_list ap;
+ int r;
+
+ va_start(ap, types);
+ r = sd_bus_reply_method_returnv(call, types, ap);
+ va_end(ap);
+
+ return r;
+}
+
+_public_ int sd_bus_reply_method_error(
+ sd_bus_message *call,
+ const sd_bus_error *e) {
+
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ int r;
+
+ assert_return(call, -EINVAL);
+ assert_return(call->sealed, -EPERM);
+ assert_return(call->header->type == SD_BUS_MESSAGE_METHOD_CALL, -EINVAL);
+ assert_return(sd_bus_error_is_set(e), -EINVAL);
+ assert_return(call->bus, -EINVAL);
+ assert_return(!bus_origin_changed(call->bus), -ECHILD);
+
+ if (!BUS_IS_OPEN(call->bus->state))
+ return -ENOTCONN;
+
+ if (call->header->flags & BUS_MESSAGE_NO_REPLY_EXPECTED)
+ return 0;
+
+ r = sd_bus_message_new_method_error(call, &m, e);
+ if (r < 0)
+ return r;
+
+ return sd_bus_message_send(m);
+}
+
+_public_ int sd_bus_reply_method_errorfv(
+ sd_bus_message *call,
+ const char *name,
+ const char *format,
+ va_list ap) {
+
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+
+ assert_return(call, -EINVAL);
+ assert_return(call->sealed, -EPERM);
+ assert_return(call->header->type == SD_BUS_MESSAGE_METHOD_CALL, -EINVAL);
+ assert_return(call->bus, -EINVAL);
+ assert_return(!bus_origin_changed(call->bus), -ECHILD);
+
+ if (!BUS_IS_OPEN(call->bus->state))
+ return -ENOTCONN;
+
+ if (call->header->flags & BUS_MESSAGE_NO_REPLY_EXPECTED)
+ return 0;
+
+ sd_bus_error_setfv(&error, name, format, ap);
+
+ return sd_bus_reply_method_error(call, &error);
+}
+
+_public_ int sd_bus_reply_method_errorf(
+ sd_bus_message *call,
+ const char *name,
+ const char *format,
+ ...) {
+
+ va_list ap;
+ int r;
+
+ va_start(ap, format);
+ r = sd_bus_reply_method_errorfv(call, name, format, ap);
+ va_end(ap);
+
+ return r;
+}
+
+_public_ int sd_bus_reply_method_errno(
+ sd_bus_message *call,
+ int error,
+ const sd_bus_error *p) {
+
+ _cleanup_(sd_bus_error_free) sd_bus_error berror = SD_BUS_ERROR_NULL;
+
+ assert_return(call, -EINVAL);
+ assert_return(call->sealed, -EPERM);
+ assert_return(call->header->type == SD_BUS_MESSAGE_METHOD_CALL, -EINVAL);
+ assert_return(call->bus, -EINVAL);
+ assert_return(!bus_origin_changed(call->bus), -ECHILD);
+
+ if (!BUS_IS_OPEN(call->bus->state))
+ return -ENOTCONN;
+
+ if (call->header->flags & BUS_MESSAGE_NO_REPLY_EXPECTED)
+ return 0;
+
+ if (sd_bus_error_is_set(p))
+ return sd_bus_reply_method_error(call, p);
+
+ sd_bus_error_set_errno(&berror, error);
+
+ return sd_bus_reply_method_error(call, &berror);
+}
+
+_public_ int sd_bus_reply_method_errnofv(
+ sd_bus_message *call,
+ int error,
+ const char *format,
+ va_list ap) {
+
+ _cleanup_(sd_bus_error_free) sd_bus_error berror = SD_BUS_ERROR_NULL;
+
+ assert_return(call, -EINVAL);
+ assert_return(call->sealed, -EPERM);
+ assert_return(call->header->type == SD_BUS_MESSAGE_METHOD_CALL, -EINVAL);
+ assert_return(call->bus, -EINVAL);
+ assert_return(!bus_origin_changed(call->bus), -ECHILD);
+
+ if (!BUS_IS_OPEN(call->bus->state))
+ return -ENOTCONN;
+
+ if (call->header->flags & BUS_MESSAGE_NO_REPLY_EXPECTED)
+ return 0;
+
+ sd_bus_error_set_errnofv(&berror, error, format, ap);
+
+ return sd_bus_reply_method_error(call, &berror);
+}
+
+_public_ int sd_bus_reply_method_errnof(
+ sd_bus_message *call,
+ int error,
+ const char *format,
+ ...) {
+
+ va_list ap;
+ int r;
+
+ va_start(ap, format);
+ r = sd_bus_reply_method_errnofv(call, error, format, ap);
+ va_end(ap);
+
+ return r;
+}
+
+_public_ int sd_bus_get_property(
+ sd_bus *bus,
+ const char *destination,
+ const char *path,
+ const char *interface,
+ const char *member,
+ sd_bus_error *error,
+ sd_bus_message **reply,
+ const char *type) {
+
+ sd_bus_message *rep = NULL;
+ int r;
+
+ bus_assert_return(bus, -EINVAL, error);
+ bus_assert_return(bus = bus_resolve(bus), -ENOPKG, error);
+ bus_assert_return(isempty(interface) || interface_name_is_valid(interface), -EINVAL, error);
+ bus_assert_return(member_name_is_valid(member), -EINVAL, error);
+ bus_assert_return(reply, -EINVAL, error);
+ bus_assert_return(signature_is_single(type, false), -EINVAL, error);
+ bus_assert_return(!bus_origin_changed(bus), -ECHILD, error);
+
+ if (!BUS_IS_OPEN(bus->state)) {
+ r = -ENOTCONN;
+ goto fail;
+ }
+
+ r = sd_bus_call_method(bus, destination, path,
+ "org.freedesktop.DBus.Properties", "Get",
+ error, &rep,
+ "ss", strempty(interface), member);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_enter_container(rep, 'v', type);
+ if (r < 0) {
+ sd_bus_message_unref(rep);
+ goto fail;
+ }
+
+ *reply = rep;
+ return 0;
+
+fail:
+ return sd_bus_error_set_errno(error, r);
+}
+
+_public_ int sd_bus_get_property_trivial(
+ sd_bus *bus,
+ const char *destination,
+ const char *path,
+ const char *interface,
+ const char *member,
+ sd_bus_error *error,
+ char type, void *ptr) {
+
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ int r;
+
+ bus_assert_return(bus, -EINVAL, error);
+ bus_assert_return(bus = bus_resolve(bus), -ENOPKG, error);
+ bus_assert_return(isempty(interface) || interface_name_is_valid(interface), -EINVAL, error);
+ bus_assert_return(member_name_is_valid(member), -EINVAL, error);
+ bus_assert_return(bus_type_is_trivial(type), -EINVAL, error);
+ bus_assert_return(ptr, -EINVAL, error);
+ bus_assert_return(!bus_origin_changed(bus), -ECHILD, error);
+
+ if (!BUS_IS_OPEN(bus->state)) {
+ r = -ENOTCONN;
+ goto fail;
+ }
+
+ r = sd_bus_call_method(bus, destination, path, "org.freedesktop.DBus.Properties", "Get", error, &reply, "ss", strempty(interface), member);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_enter_container(reply, 'v', CHAR_TO_STR(type));
+ if (r < 0)
+ goto fail;
+
+ r = sd_bus_message_read_basic(reply, type, ptr);
+ if (r < 0)
+ goto fail;
+
+ return 0;
+
+fail:
+ return sd_bus_error_set_errno(error, r);
+}
+
+_public_ int sd_bus_get_property_string(
+ sd_bus *bus,
+ const char *destination,
+ const char *path,
+ const char *interface,
+ const char *member,
+ sd_bus_error *error,
+ char **ret) {
+
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ const char *s;
+ char *n;
+ int r;
+
+ bus_assert_return(bus, -EINVAL, error);
+ bus_assert_return(bus = bus_resolve(bus), -ENOPKG, error);
+ bus_assert_return(isempty(interface) || interface_name_is_valid(interface), -EINVAL, error);
+ bus_assert_return(member_name_is_valid(member), -EINVAL, error);
+ bus_assert_return(ret, -EINVAL, error);
+ bus_assert_return(!bus_origin_changed(bus), -ECHILD, error);
+
+ if (!BUS_IS_OPEN(bus->state)) {
+ r = -ENOTCONN;
+ goto fail;
+ }
+
+ r = sd_bus_call_method(bus, destination, path, "org.freedesktop.DBus.Properties", "Get", error, &reply, "ss", strempty(interface), member);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_enter_container(reply, 'v', "s");
+ if (r < 0)
+ goto fail;
+
+ r = sd_bus_message_read_basic(reply, 's', &s);
+ if (r < 0)
+ goto fail;
+
+ n = strdup(s);
+ if (!n) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ *ret = n;
+ return 0;
+
+fail:
+ return sd_bus_error_set_errno(error, r);
+}
+
+_public_ int sd_bus_get_property_strv(
+ sd_bus *bus,
+ const char *destination,
+ const char *path,
+ const char *interface,
+ const char *member,
+ sd_bus_error *error,
+ char ***ret) {
+
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ int r;
+
+ bus_assert_return(bus, -EINVAL, error);
+ bus_assert_return(bus = bus_resolve(bus), -ENOPKG, error);
+ bus_assert_return(isempty(interface) || interface_name_is_valid(interface), -EINVAL, error);
+ bus_assert_return(member_name_is_valid(member), -EINVAL, error);
+ bus_assert_return(ret, -EINVAL, error);
+ bus_assert_return(!bus_origin_changed(bus), -ECHILD, error);
+
+ if (!BUS_IS_OPEN(bus->state)) {
+ r = -ENOTCONN;
+ goto fail;
+ }
+
+ r = sd_bus_call_method(bus, destination, path, "org.freedesktop.DBus.Properties", "Get", error, &reply, "ss", strempty(interface), member);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_enter_container(reply, 'v', NULL);
+ if (r < 0)
+ goto fail;
+
+ r = sd_bus_message_read_strv(reply, ret);
+ if (r < 0)
+ goto fail;
+
+ return 0;
+
+fail:
+ return sd_bus_error_set_errno(error, r);
+}
+
+_public_ int sd_bus_set_propertyv(
+ sd_bus *bus,
+ const char *destination,
+ const char *path,
+ const char *interface,
+ const char *member,
+ sd_bus_error *error,
+ const char *type, va_list ap) {
+
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ int r;
+
+ bus_assert_return(bus, -EINVAL, error);
+ bus_assert_return(bus = bus_resolve(bus), -ENOPKG, error);
+ bus_assert_return(isempty(interface) || interface_name_is_valid(interface), -EINVAL, error);
+ bus_assert_return(member_name_is_valid(member), -EINVAL, error);
+ bus_assert_return(signature_is_single(type, false), -EINVAL, error);
+ bus_assert_return(!bus_origin_changed(bus), -ECHILD, error);
+
+ if (!BUS_IS_OPEN(bus->state)) {
+ r = -ENOTCONN;
+ goto fail;
+ }
+
+ r = sd_bus_message_new_method_call(bus, &m, destination, path, "org.freedesktop.DBus.Properties", "Set");
+ if (r < 0)
+ goto fail;
+
+ r = sd_bus_message_append(m, "ss", strempty(interface), member);
+ if (r < 0)
+ goto fail;
+
+ r = sd_bus_message_open_container(m, 'v', type);
+ if (r < 0)
+ goto fail;
+
+ r = sd_bus_message_appendv(m, type, ap);
+ if (r < 0)
+ goto fail;
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ goto fail;
+
+ return sd_bus_call(bus, m, 0, error, NULL);
+
+fail:
+ return sd_bus_error_set_errno(error, r);
+}
+
+_public_ int sd_bus_set_property(
+ sd_bus *bus,
+ const char *destination,
+ const char *path,
+ const char *interface,
+ const char *member,
+ sd_bus_error *error,
+ const char *type, ...) {
+
+ va_list ap;
+ int r;
+
+ va_start(ap, type);
+ r = sd_bus_set_propertyv(bus, destination, path, interface, member, error, type, ap);
+ va_end(ap);
+
+ return r;
+}
+
+_public_ int sd_bus_query_sender_creds(sd_bus_message *call, uint64_t mask, sd_bus_creds **ret) {
+ sd_bus_creds *c;
+ int r;
+
+ assert_return(call, -EINVAL);
+ assert_return(call->sealed, -EPERM);
+ assert_return(call->bus, -EINVAL);
+ assert_return(!bus_origin_changed(call->bus), -ECHILD);
+ assert_return(ret, -EINVAL);
+
+ if (!BUS_IS_OPEN(call->bus->state))
+ return -ENOTCONN;
+
+ c = sd_bus_message_get_creds(call);
+
+ /* All data we need? */
+ if (c && (mask & ~SD_BUS_CREDS_AUGMENT & ~c->mask) == 0) {
+ *ret = sd_bus_creds_ref(c);
+ return 0;
+ }
+
+ /* No data passed? Or not enough data passed to retrieve the missing bits? */
+ if (!c || !(c->mask & SD_BUS_CREDS_PID)) {
+ /* We couldn't read anything from the call, let's try
+ * to get it from the sender or peer. */
+
+ if (call->sender)
+ /* There's a sender, but the creds are missing. */
+ return sd_bus_get_name_creds(call->bus, call->sender, mask, ret);
+ else
+ /* There's no sender. For direct connections
+ * the credentials of the AF_UNIX peer matter,
+ * which may be queried via sd_bus_get_owner_creds(). */
+ return sd_bus_get_owner_creds(call->bus, mask, ret);
+ }
+
+ r = bus_creds_extend_by_pid(c, mask, ret);
+ if (r == -ESRCH) {
+ /* Process doesn't exist anymore? propagate the few things we have */
+ *ret = sd_bus_creds_ref(c);
+ return 0;
+ }
+
+ return r;
+}
+
+_public_ int sd_bus_query_sender_privilege(sd_bus_message *call, int capability) {
+ _cleanup_(sd_bus_creds_unrefp) sd_bus_creds *creds = NULL;
+ uid_t our_uid;
+ bool know_caps = false;
+ int r;
+
+ assert_return(call, -EINVAL);
+ assert_return(call->sealed, -EPERM);
+ assert_return(call->bus, -EINVAL);
+ assert_return(!bus_origin_changed(call->bus), -ECHILD);
+
+ if (!BUS_IS_OPEN(call->bus->state))
+ return -ENOTCONN;
+
+ if (capability >= 0) {
+
+ r = sd_bus_query_sender_creds(call, SD_BUS_CREDS_UID|SD_BUS_CREDS_EUID|SD_BUS_CREDS_EFFECTIVE_CAPS, &creds);
+ if (r < 0)
+ return r;
+
+ /* We cannot use augmented caps for authorization,
+ * since then data is acquired raceful from
+ * /proc. This can never actually happen, but let's
+ * better be safe than sorry, and do an extra check
+ * here. */
+ assert_return((sd_bus_creds_get_augmented_mask(creds) & SD_BUS_CREDS_EFFECTIVE_CAPS) == 0, -EPERM);
+
+ r = sd_bus_creds_has_effective_cap(creds, capability);
+ if (r > 0)
+ return 1;
+ if (r == 0)
+ know_caps = true;
+ } else {
+ r = sd_bus_query_sender_creds(call, SD_BUS_CREDS_UID|SD_BUS_CREDS_EUID, &creds);
+ if (r < 0)
+ return r;
+ }
+
+ /* Now, check the UID, but only if the capability check wasn't
+ * sufficient */
+ our_uid = getuid();
+ if (our_uid != 0 || !know_caps || capability < 0) {
+ uid_t sender_uid;
+
+ /* We cannot use augmented uid/euid for authorization,
+ * since then data is acquired raceful from
+ * /proc. This can never actually happen, but let's
+ * better be safe than sorry, and do an extra check
+ * here. */
+ assert_return((sd_bus_creds_get_augmented_mask(creds) & (SD_BUS_CREDS_UID|SD_BUS_CREDS_EUID)) == 0, -EPERM);
+
+ /* Try to use the EUID, if we have it. */
+ r = sd_bus_creds_get_euid(creds, &sender_uid);
+ if (r < 0)
+ r = sd_bus_creds_get_uid(creds, &sender_uid);
+
+ if (r >= 0) {
+ /* Sender has same UID as us, then let's grant access */
+ if (sender_uid == our_uid)
+ return 1;
+
+ /* Sender is root, we are not root. */
+ if (our_uid != 0 && sender_uid == 0)
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+#define make_expression(sender, path, interface, member) \
+ strjoina( \
+ "type='signal'", \
+ sender ? ",sender='" : "", \
+ sender ?: "", \
+ sender ? "'" : "", \
+ path ? ",path='" : "", \
+ path ?: "", \
+ path ? "'" : "", \
+ interface ? ",interface='" : "", \
+ interface ?: "", \
+ interface ? "'" : "", \
+ member ? ",member='" : "", \
+ member ?: "", \
+ member ? "'" : "" \
+ )
+
+_public_ int sd_bus_match_signal(
+ sd_bus *bus,
+ sd_bus_slot **ret,
+ const char *sender,
+ const char *path,
+ const char *interface,
+ const char *member,
+ sd_bus_message_handler_t callback,
+ void *userdata) {
+
+ const char *expression;
+
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(!bus_origin_changed(bus), -ECHILD);
+ assert_return(!sender || service_name_is_valid(sender), -EINVAL);
+ assert_return(!path || object_path_is_valid(path), -EINVAL);
+ assert_return(!interface || interface_name_is_valid(interface), -EINVAL);
+ assert_return(!member || member_name_is_valid(member), -EINVAL);
+
+ expression = make_expression(sender, path, interface, member);
+
+ return sd_bus_add_match(bus, ret, expression, callback, userdata);
+}
+
+_public_ int sd_bus_match_signal_async(
+ sd_bus *bus,
+ sd_bus_slot **ret,
+ const char *sender,
+ const char *path,
+ const char *interface,
+ const char *member,
+ sd_bus_message_handler_t callback,
+ sd_bus_message_handler_t install_callback,
+ void *userdata) {
+
+ const char *expression;
+
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(!bus_origin_changed(bus), -ECHILD);
+ assert_return(!sender || service_name_is_valid(sender), -EINVAL);
+ assert_return(!path || object_path_is_valid(path), -EINVAL);
+ assert_return(!interface || interface_name_is_valid(interface), -EINVAL);
+ assert_return(!member || member_name_is_valid(member), -EINVAL);
+
+ expression = make_expression(sender, path, interface, member);
+
+ return sd_bus_add_match_async(bus, ret, expression, callback, install_callback, userdata);
+}
diff --git a/src/libsystemd/sd-bus/bus-creds.c b/src/libsystemd/sd-bus/bus-creds.c
new file mode 100644
index 0000000..c6d8caa
--- /dev/null
+++ b/src/libsystemd/sd-bus/bus-creds.c
@@ -0,0 +1,1337 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <linux/capability.h>
+#include <stdlib.h>
+
+#include "alloc-util.h"
+#include "audit-util.h"
+#include "bus-creds.h"
+#include "bus-label.h"
+#include "bus-message.h"
+#include "capability-util.h"
+#include "cgroup-util.h"
+#include "errno-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "format-util.h"
+#include "hexdecoct.h"
+#include "nulstr-util.h"
+#include "parse-util.h"
+#include "process-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "terminal-util.h"
+#include "user-util.h"
+
+enum {
+ CAP_OFFSET_INHERITABLE = 0,
+ CAP_OFFSET_PERMITTED = 1,
+ CAP_OFFSET_EFFECTIVE = 2,
+ CAP_OFFSET_BOUNDING = 3
+};
+
+void bus_creds_done(sd_bus_creds *c) {
+ assert(c);
+
+ /* For internal bus cred structures that are allocated by
+ * something else */
+
+ free(c->session);
+ free(c->unit);
+ free(c->user_unit);
+ free(c->slice);
+ free(c->user_slice);
+ free(c->unescaped_description);
+ free(c->supplementary_gids);
+ free(c->tty);
+
+ free(c->well_known_names); /* note that this is an strv, but
+ * we only free the array, not the
+ * strings the array points to. The
+ * full strv we only free if
+ * c->allocated is set, see
+ * below. */
+
+ strv_free(c->cmdline_array);
+}
+
+_public_ sd_bus_creds *sd_bus_creds_ref(sd_bus_creds *c) {
+
+ if (!c)
+ return NULL;
+
+ if (c->allocated) {
+ assert(c->n_ref > 0);
+ c->n_ref++;
+ } else {
+ sd_bus_message *m;
+
+ /* If this is an embedded creds structure, then
+ * forward ref counting to the message */
+ m = container_of(c, sd_bus_message, creds);
+ sd_bus_message_ref(m);
+ }
+
+ return c;
+}
+
+_public_ sd_bus_creds *sd_bus_creds_unref(sd_bus_creds *c) {
+
+ if (!c)
+ return NULL;
+
+ if (c->allocated) {
+ assert(c->n_ref > 0);
+ c->n_ref--;
+
+ if (c->n_ref == 0) {
+ free(c->comm);
+ free(c->tid_comm);
+ free(c->exe);
+ free(c->cmdline);
+ free(c->cgroup);
+ free(c->capability);
+ free(c->label);
+ free(c->unique_name);
+ free(c->cgroup_root);
+ free(c->description);
+
+ c->supplementary_gids = mfree(c->supplementary_gids);
+
+ c->well_known_names = strv_free(c->well_known_names);
+
+ bus_creds_done(c);
+
+ free(c);
+ }
+ } else {
+ sd_bus_message *m;
+
+ m = container_of(c, sd_bus_message, creds);
+ sd_bus_message_unref(m);
+ }
+
+ return NULL;
+}
+
+_public_ uint64_t sd_bus_creds_get_mask(const sd_bus_creds *c) {
+ assert_return(c, 0);
+
+ return c->mask;
+}
+
+_public_ uint64_t sd_bus_creds_get_augmented_mask(const sd_bus_creds *c) {
+ assert_return(c, 0);
+
+ return c->augmented;
+}
+
+sd_bus_creds* bus_creds_new(void) {
+ sd_bus_creds *c;
+
+ c = new0(sd_bus_creds, 1);
+ if (!c)
+ return NULL;
+
+ c->allocated = true;
+ c->n_ref = 1;
+ return c;
+}
+
+_public_ int sd_bus_creds_new_from_pid(sd_bus_creds **ret, pid_t pid, uint64_t mask) {
+ _cleanup_(sd_bus_creds_unrefp) sd_bus_creds *c = NULL;
+ int r;
+
+ assert_return(pid >= 0, -EINVAL);
+ assert_return(mask <= _SD_BUS_CREDS_ALL, -EOPNOTSUPP);
+ assert_return(ret, -EINVAL);
+
+ if (pid == 0)
+ pid = getpid_cached();
+
+ c = bus_creds_new();
+ if (!c)
+ return -ENOMEM;
+
+ r = bus_creds_add_more(c, mask | SD_BUS_CREDS_AUGMENT, pid, 0);
+ if (r < 0)
+ return r;
+
+ /* Check if the process existed at all, in case we haven't
+ * figured that out already */
+ r = pid_is_alive(pid);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -ESRCH;
+
+ *ret = TAKE_PTR(c);
+ return 0;
+}
+
+_public_ int sd_bus_creds_get_uid(sd_bus_creds *c, uid_t *uid) {
+ assert_return(c, -EINVAL);
+ assert_return(uid, -EINVAL);
+
+ if (!(c->mask & SD_BUS_CREDS_UID))
+ return -ENODATA;
+
+ *uid = c->uid;
+ return 0;
+}
+
+_public_ int sd_bus_creds_get_euid(sd_bus_creds *c, uid_t *euid) {
+ assert_return(c, -EINVAL);
+ assert_return(euid, -EINVAL);
+
+ if (!(c->mask & SD_BUS_CREDS_EUID))
+ return -ENODATA;
+
+ *euid = c->euid;
+ return 0;
+}
+
+_public_ int sd_bus_creds_get_suid(sd_bus_creds *c, uid_t *suid) {
+ assert_return(c, -EINVAL);
+ assert_return(suid, -EINVAL);
+
+ if (!(c->mask & SD_BUS_CREDS_SUID))
+ return -ENODATA;
+
+ *suid = c->suid;
+ return 0;
+}
+
+_public_ int sd_bus_creds_get_fsuid(sd_bus_creds *c, uid_t *fsuid) {
+ assert_return(c, -EINVAL);
+ assert_return(fsuid, -EINVAL);
+
+ if (!(c->mask & SD_BUS_CREDS_FSUID))
+ return -ENODATA;
+
+ *fsuid = c->fsuid;
+ return 0;
+}
+
+_public_ int sd_bus_creds_get_gid(sd_bus_creds *c, gid_t *gid) {
+ assert_return(c, -EINVAL);
+ assert_return(gid, -EINVAL);
+
+ if (!(c->mask & SD_BUS_CREDS_GID))
+ return -ENODATA;
+
+ *gid = c->gid;
+ return 0;
+}
+
+_public_ int sd_bus_creds_get_egid(sd_bus_creds *c, gid_t *egid) {
+ assert_return(c, -EINVAL);
+ assert_return(egid, -EINVAL);
+
+ if (!(c->mask & SD_BUS_CREDS_EGID))
+ return -ENODATA;
+
+ *egid = c->egid;
+ return 0;
+}
+
+_public_ int sd_bus_creds_get_sgid(sd_bus_creds *c, gid_t *sgid) {
+ assert_return(c, -EINVAL);
+ assert_return(sgid, -EINVAL);
+
+ if (!(c->mask & SD_BUS_CREDS_SGID))
+ return -ENODATA;
+
+ *sgid = c->sgid;
+ return 0;
+}
+
+_public_ int sd_bus_creds_get_fsgid(sd_bus_creds *c, gid_t *fsgid) {
+ assert_return(c, -EINVAL);
+ assert_return(fsgid, -EINVAL);
+
+ if (!(c->mask & SD_BUS_CREDS_FSGID))
+ return -ENODATA;
+
+ *fsgid = c->fsgid;
+ return 0;
+}
+
+_public_ int sd_bus_creds_get_supplementary_gids(sd_bus_creds *c, const gid_t **gids) {
+ assert_return(c, -EINVAL);
+ assert_return(gids, -EINVAL);
+
+ if (!(c->mask & SD_BUS_CREDS_SUPPLEMENTARY_GIDS))
+ return -ENODATA;
+
+ *gids = c->supplementary_gids;
+ return (int) c->n_supplementary_gids;
+}
+
+_public_ int sd_bus_creds_get_pid(sd_bus_creds *c, pid_t *pid) {
+ assert_return(c, -EINVAL);
+ assert_return(pid, -EINVAL);
+
+ if (!(c->mask & SD_BUS_CREDS_PID))
+ return -ENODATA;
+
+ assert(c->pid > 0);
+ *pid = c->pid;
+ return 0;
+}
+
+_public_ int sd_bus_creds_get_ppid(sd_bus_creds *c, pid_t *ppid) {
+ assert_return(c, -EINVAL);
+ assert_return(ppid, -EINVAL);
+
+ if (!(c->mask & SD_BUS_CREDS_PPID))
+ return -ENODATA;
+
+ /* PID 1 has no parent process. Let's distinguish the case of
+ * not knowing and not having a parent process by the returned
+ * error code. */
+ if (c->ppid == 0)
+ return -ENXIO;
+
+ *ppid = c->ppid;
+ return 0;
+}
+
+_public_ int sd_bus_creds_get_tid(sd_bus_creds *c, pid_t *tid) {
+ assert_return(c, -EINVAL);
+ assert_return(tid, -EINVAL);
+
+ if (!(c->mask & SD_BUS_CREDS_TID))
+ return -ENODATA;
+
+ assert(c->tid > 0);
+ *tid = c->tid;
+ return 0;
+}
+
+_public_ int sd_bus_creds_get_selinux_context(sd_bus_creds *c, const char **ret) {
+ assert_return(c, -EINVAL);
+
+ if (!(c->mask & SD_BUS_CREDS_SELINUX_CONTEXT))
+ return -ENODATA;
+
+ assert(c->label);
+ *ret = c->label;
+ return 0;
+}
+
+_public_ int sd_bus_creds_get_comm(sd_bus_creds *c, const char **ret) {
+ assert_return(c, -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ if (!(c->mask & SD_BUS_CREDS_COMM))
+ return -ENODATA;
+
+ assert(c->comm);
+ *ret = c->comm;
+ return 0;
+}
+
+_public_ int sd_bus_creds_get_tid_comm(sd_bus_creds *c, const char **ret) {
+ assert_return(c, -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ if (!(c->mask & SD_BUS_CREDS_TID_COMM))
+ return -ENODATA;
+
+ assert(c->tid_comm);
+ *ret = c->tid_comm;
+ return 0;
+}
+
+_public_ int sd_bus_creds_get_exe(sd_bus_creds *c, const char **ret) {
+ assert_return(c, -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ if (!(c->mask & SD_BUS_CREDS_EXE))
+ return -ENODATA;
+
+ if (!c->exe)
+ return -ENXIO;
+
+ *ret = c->exe;
+ return 0;
+}
+
+_public_ int sd_bus_creds_get_cgroup(sd_bus_creds *c, const char **ret) {
+ assert_return(c, -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ if (!(c->mask & SD_BUS_CREDS_CGROUP))
+ return -ENODATA;
+
+ assert(c->cgroup);
+ *ret = c->cgroup;
+ return 0;
+}
+
+_public_ int sd_bus_creds_get_unit(sd_bus_creds *c, const char **ret) {
+ int r;
+
+ assert_return(c, -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ if (!(c->mask & SD_BUS_CREDS_UNIT))
+ return -ENODATA;
+
+ assert(c->cgroup);
+
+ if (!c->unit) {
+ const char *shifted;
+
+ r = cg_shift_path(c->cgroup, c->cgroup_root, &shifted);
+ if (r < 0)
+ return r;
+
+ r = cg_path_get_unit(shifted, (char**) &c->unit);
+ if (r < 0)
+ return r;
+ }
+
+ *ret = c->unit;
+ return 0;
+}
+
+_public_ int sd_bus_creds_get_user_unit(sd_bus_creds *c, const char **ret) {
+ int r;
+
+ assert_return(c, -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ if (!(c->mask & SD_BUS_CREDS_USER_UNIT))
+ return -ENODATA;
+
+ assert(c->cgroup);
+
+ if (!c->user_unit) {
+ const char *shifted;
+
+ r = cg_shift_path(c->cgroup, c->cgroup_root, &shifted);
+ if (r < 0)
+ return r;
+
+ r = cg_path_get_user_unit(shifted, (char**) &c->user_unit);
+ if (r < 0)
+ return r;
+ }
+
+ *ret = c->user_unit;
+ return 0;
+}
+
+_public_ int sd_bus_creds_get_slice(sd_bus_creds *c, const char **ret) {
+ int r;
+
+ assert_return(c, -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ if (!(c->mask & SD_BUS_CREDS_SLICE))
+ return -ENODATA;
+
+ assert(c->cgroup);
+
+ if (!c->slice) {
+ const char *shifted;
+
+ r = cg_shift_path(c->cgroup, c->cgroup_root, &shifted);
+ if (r < 0)
+ return r;
+
+ r = cg_path_get_slice(shifted, (char**) &c->slice);
+ if (r < 0)
+ return r;
+ }
+
+ *ret = c->slice;
+ return 0;
+}
+
+_public_ int sd_bus_creds_get_user_slice(sd_bus_creds *c, const char **ret) {
+ int r;
+
+ assert_return(c, -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ if (!(c->mask & SD_BUS_CREDS_USER_SLICE))
+ return -ENODATA;
+
+ assert(c->cgroup);
+
+ if (!c->user_slice) {
+ const char *shifted;
+
+ r = cg_shift_path(c->cgroup, c->cgroup_root, &shifted);
+ if (r < 0)
+ return r;
+
+ r = cg_path_get_user_slice(shifted, (char**) &c->user_slice);
+ if (r < 0)
+ return r;
+ }
+
+ *ret = c->user_slice;
+ return 0;
+}
+
+_public_ int sd_bus_creds_get_session(sd_bus_creds *c, const char **ret) {
+ int r;
+
+ assert_return(c, -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ if (!(c->mask & SD_BUS_CREDS_SESSION))
+ return -ENODATA;
+
+ assert(c->cgroup);
+
+ if (!c->session) {
+ const char *shifted;
+
+ r = cg_shift_path(c->cgroup, c->cgroup_root, &shifted);
+ if (r < 0)
+ return r;
+
+ r = cg_path_get_session(shifted, (char**) &c->session);
+ if (r < 0)
+ return r;
+ }
+
+ *ret = c->session;
+ return 0;
+}
+
+_public_ int sd_bus_creds_get_owner_uid(sd_bus_creds *c, uid_t *uid) {
+ const char *shifted;
+ int r;
+
+ assert_return(c, -EINVAL);
+ assert_return(uid, -EINVAL);
+
+ if (!(c->mask & SD_BUS_CREDS_OWNER_UID))
+ return -ENODATA;
+
+ assert(c->cgroup);
+
+ r = cg_shift_path(c->cgroup, c->cgroup_root, &shifted);
+ if (r < 0)
+ return r;
+
+ return cg_path_get_owner_uid(shifted, uid);
+}
+
+_public_ int sd_bus_creds_get_cmdline(sd_bus_creds *c, char ***cmdline) {
+ assert_return(c, -EINVAL);
+
+ if (!(c->mask & SD_BUS_CREDS_CMDLINE))
+ return -ENODATA;
+
+ if (!c->cmdline)
+ return -ENXIO;
+
+ if (!c->cmdline_array) {
+ c->cmdline_array = strv_parse_nulstr(c->cmdline, c->cmdline_size);
+ if (!c->cmdline_array)
+ return -ENOMEM;
+ }
+
+ *cmdline = c->cmdline_array;
+ return 0;
+}
+
+_public_ int sd_bus_creds_get_audit_session_id(sd_bus_creds *c, uint32_t *sessionid) {
+ assert_return(c, -EINVAL);
+ assert_return(sessionid, -EINVAL);
+
+ if (!(c->mask & SD_BUS_CREDS_AUDIT_SESSION_ID))
+ return -ENODATA;
+
+ if (!audit_session_is_valid(c->audit_session_id))
+ return -ENXIO;
+
+ *sessionid = c->audit_session_id;
+ return 0;
+}
+
+_public_ int sd_bus_creds_get_audit_login_uid(sd_bus_creds *c, uid_t *uid) {
+ assert_return(c, -EINVAL);
+ assert_return(uid, -EINVAL);
+
+ if (!(c->mask & SD_BUS_CREDS_AUDIT_LOGIN_UID))
+ return -ENODATA;
+
+ if (!uid_is_valid(c->audit_login_uid))
+ return -ENXIO;
+
+ *uid = c->audit_login_uid;
+ return 0;
+}
+
+_public_ int sd_bus_creds_get_tty(sd_bus_creds *c, const char **ret) {
+ assert_return(c, -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ if (!(c->mask & SD_BUS_CREDS_TTY))
+ return -ENODATA;
+
+ if (!c->tty)
+ return -ENXIO;
+
+ *ret = c->tty;
+ return 0;
+}
+
+_public_ int sd_bus_creds_get_unique_name(sd_bus_creds *c, const char **unique_name) {
+ assert_return(c, -EINVAL);
+ assert_return(unique_name, -EINVAL);
+
+ if (!(c->mask & SD_BUS_CREDS_UNIQUE_NAME))
+ return -ENODATA;
+
+ *unique_name = c->unique_name;
+ return 0;
+}
+
+_public_ int sd_bus_creds_get_well_known_names(sd_bus_creds *c, char ***well_known_names) {
+ assert_return(c, -EINVAL);
+ assert_return(well_known_names, -EINVAL);
+
+ if (!(c->mask & SD_BUS_CREDS_WELL_KNOWN_NAMES))
+ return -ENODATA;
+
+ /* As a special hack we return the bus driver as well-known
+ * names list when this is requested. */
+ if (c->well_known_names_driver) {
+ static const char* const wkn[] = {
+ "org.freedesktop.DBus",
+ NULL
+ };
+
+ *well_known_names = (char**) wkn;
+ return 0;
+ }
+
+ if (c->well_known_names_local) {
+ static const char* const wkn[] = {
+ "org.freedesktop.DBus.Local",
+ NULL
+ };
+
+ *well_known_names = (char**) wkn;
+ return 0;
+ }
+
+ *well_known_names = c->well_known_names;
+ return 0;
+}
+
+_public_ int sd_bus_creds_get_description(sd_bus_creds *c, const char **ret) {
+ assert_return(c, -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ if (!(c->mask & SD_BUS_CREDS_DESCRIPTION))
+ return -ENODATA;
+
+ assert(c->description);
+
+ if (!c->unescaped_description) {
+ c->unescaped_description = bus_label_unescape(c->description);
+ if (!c->unescaped_description)
+ return -ENOMEM;
+ }
+
+ *ret = c->unescaped_description;
+ return 0;
+}
+
+static int has_cap(sd_bus_creds *c, size_t offset, int capability) {
+ size_t sz;
+
+ assert(c);
+ assert(capability >= 0);
+ assert(c->capability);
+
+ unsigned lc = cap_last_cap();
+
+ if ((unsigned) capability > lc)
+ return 0;
+
+ /* If the last cap is 63, then there are 64 caps defined, and we need 2 entries à 32-bit hence. *
+ * If the last cap is 64, then there are 65 caps defined, and we need 3 entries à 32-bit hence. */
+ sz = DIV_ROUND_UP(lc+1, 32LU);
+
+ return !!(c->capability[offset * sz + CAP_TO_INDEX((uint32_t) capability)] & CAP_TO_MASK_CORRECTED((uint32_t) capability));
+}
+
+_public_ int sd_bus_creds_has_effective_cap(sd_bus_creds *c, int capability) {
+ assert_return(c, -EINVAL);
+ assert_return(capability >= 0, -EINVAL);
+
+ if (!(c->mask & SD_BUS_CREDS_EFFECTIVE_CAPS))
+ return -ENODATA;
+
+ return has_cap(c, CAP_OFFSET_EFFECTIVE, capability);
+}
+
+_public_ int sd_bus_creds_has_permitted_cap(sd_bus_creds *c, int capability) {
+ assert_return(c, -EINVAL);
+ assert_return(capability >= 0, -EINVAL);
+
+ if (!(c->mask & SD_BUS_CREDS_PERMITTED_CAPS))
+ return -ENODATA;
+
+ return has_cap(c, CAP_OFFSET_PERMITTED, capability);
+}
+
+_public_ int sd_bus_creds_has_inheritable_cap(sd_bus_creds *c, int capability) {
+ assert_return(c, -EINVAL);
+ assert_return(capability >= 0, -EINVAL);
+
+ if (!(c->mask & SD_BUS_CREDS_INHERITABLE_CAPS))
+ return -ENODATA;
+
+ return has_cap(c, CAP_OFFSET_INHERITABLE, capability);
+}
+
+_public_ int sd_bus_creds_has_bounding_cap(sd_bus_creds *c, int capability) {
+ assert_return(c, -EINVAL);
+ assert_return(capability >= 0, -EINVAL);
+
+ if (!(c->mask & SD_BUS_CREDS_BOUNDING_CAPS))
+ return -ENODATA;
+
+ return has_cap(c, CAP_OFFSET_BOUNDING, capability);
+}
+
+static int parse_caps(sd_bus_creds *c, unsigned offset, const char *p) {
+ size_t sz, max;
+ unsigned i, j;
+
+ assert(c);
+ assert(p);
+
+ max = DIV_ROUND_UP(cap_last_cap()+1, 32U);
+ p += strspn(p, WHITESPACE);
+
+ sz = strlen(p);
+ if (sz % 8 != 0)
+ return -EINVAL;
+
+ sz /= 8;
+ if (sz > max)
+ return -EINVAL;
+
+ if (!c->capability) {
+ c->capability = new0(uint32_t, max * 4);
+ if (!c->capability)
+ return -ENOMEM;
+ }
+
+ for (i = 0; i < sz; i ++) {
+ uint32_t v = 0;
+
+ for (j = 0; j < 8; ++j) {
+ int t;
+
+ t = unhexchar(*p++);
+ if (t < 0)
+ return -EINVAL;
+
+ v = (v << 4) | t;
+ }
+
+ c->capability[offset * max + (sz - i - 1)] = v;
+ }
+
+ return 0;
+}
+
+int bus_creds_add_more(sd_bus_creds *c, uint64_t mask, pid_t pid, pid_t tid) {
+ uint64_t missing;
+ int r;
+
+ assert(c);
+ assert(c->allocated);
+
+ if (!(mask & SD_BUS_CREDS_AUGMENT))
+ return 0;
+
+ /* Try to retrieve PID from creds if it wasn't passed to us */
+ if (pid > 0) {
+ c->pid = pid;
+ c->mask |= SD_BUS_CREDS_PID;
+ } else if (c->mask & SD_BUS_CREDS_PID)
+ pid = c->pid;
+ else
+ /* Without pid we cannot do much... */
+ return 0;
+
+ /* Try to retrieve TID from creds if it wasn't passed to us */
+ if (tid <= 0 && (c->mask & SD_BUS_CREDS_TID))
+ tid = c->tid;
+
+ /* Calculate what we shall and can add */
+ missing = mask & ~(c->mask|SD_BUS_CREDS_PID|SD_BUS_CREDS_TID|SD_BUS_CREDS_UNIQUE_NAME|SD_BUS_CREDS_WELL_KNOWN_NAMES|SD_BUS_CREDS_DESCRIPTION|SD_BUS_CREDS_AUGMENT);
+ if (missing == 0)
+ return 0;
+
+ if (tid > 0) {
+ c->tid = tid;
+ c->mask |= SD_BUS_CREDS_TID;
+ }
+
+ if (missing & (SD_BUS_CREDS_PPID |
+ SD_BUS_CREDS_UID | SD_BUS_CREDS_EUID | SD_BUS_CREDS_SUID | SD_BUS_CREDS_FSUID |
+ SD_BUS_CREDS_GID | SD_BUS_CREDS_EGID | SD_BUS_CREDS_SGID | SD_BUS_CREDS_FSGID |
+ SD_BUS_CREDS_SUPPLEMENTARY_GIDS |
+ SD_BUS_CREDS_EFFECTIVE_CAPS | SD_BUS_CREDS_INHERITABLE_CAPS |
+ SD_BUS_CREDS_PERMITTED_CAPS | SD_BUS_CREDS_BOUNDING_CAPS)) {
+
+ _cleanup_fclose_ FILE *f = NULL;
+ const char *p;
+
+ p = procfs_file_alloca(pid, "status");
+
+ f = fopen(p, "re");
+ if (!f) {
+ if (errno == ENOENT)
+ return -ESRCH;
+ else if (!ERRNO_IS_PRIVILEGE(errno))
+ return -errno;
+ } else {
+
+ for (;;) {
+ _cleanup_free_ char *line = NULL;
+
+ r = read_line(f, LONG_LINE_MAX, &line);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ if (missing & SD_BUS_CREDS_PPID) {
+ p = startswith(line, "PPid:");
+ if (p) {
+ p += strspn(p, WHITESPACE);
+
+ /* Explicitly check for PPID 0 (which is the case for PID 1) */
+ if (!streq(p, "0")) {
+ r = parse_pid(p, &c->ppid);
+ if (r < 0)
+ return r;
+
+ } else
+ c->ppid = 0;
+
+ c->mask |= SD_BUS_CREDS_PPID;
+ continue;
+ }
+ }
+
+ if (missing & (SD_BUS_CREDS_UID|SD_BUS_CREDS_EUID|SD_BUS_CREDS_SUID|SD_BUS_CREDS_FSUID)) {
+ p = startswith(line, "Uid:");
+ if (p) {
+ unsigned long uid, euid, suid, fsuid;
+
+ p += strspn(p, WHITESPACE);
+ if (sscanf(p, "%lu %lu %lu %lu", &uid, &euid, &suid, &fsuid) != 4)
+ return -EIO;
+
+ if (missing & SD_BUS_CREDS_UID)
+ c->uid = (uid_t) uid;
+ if (missing & SD_BUS_CREDS_EUID)
+ c->euid = (uid_t) euid;
+ if (missing & SD_BUS_CREDS_SUID)
+ c->suid = (uid_t) suid;
+ if (missing & SD_BUS_CREDS_FSUID)
+ c->fsuid = (uid_t) fsuid;
+
+ c->mask |= missing & (SD_BUS_CREDS_UID|SD_BUS_CREDS_EUID|SD_BUS_CREDS_SUID|SD_BUS_CREDS_FSUID);
+ continue;
+ }
+ }
+
+ if (missing & (SD_BUS_CREDS_GID|SD_BUS_CREDS_EGID|SD_BUS_CREDS_SGID|SD_BUS_CREDS_FSGID)) {
+ p = startswith(line, "Gid:");
+ if (p) {
+ unsigned long gid, egid, sgid, fsgid;
+
+ p += strspn(p, WHITESPACE);
+ if (sscanf(p, "%lu %lu %lu %lu", &gid, &egid, &sgid, &fsgid) != 4)
+ return -EIO;
+
+ if (missing & SD_BUS_CREDS_GID)
+ c->gid = (gid_t) gid;
+ if (missing & SD_BUS_CREDS_EGID)
+ c->egid = (gid_t) egid;
+ if (missing & SD_BUS_CREDS_SGID)
+ c->sgid = (gid_t) sgid;
+ if (missing & SD_BUS_CREDS_FSGID)
+ c->fsgid = (gid_t) fsgid;
+
+ c->mask |= missing & (SD_BUS_CREDS_GID|SD_BUS_CREDS_EGID|SD_BUS_CREDS_SGID|SD_BUS_CREDS_FSGID);
+ continue;
+ }
+ }
+
+ if (missing & SD_BUS_CREDS_SUPPLEMENTARY_GIDS) {
+ p = startswith(line, "Groups:");
+ if (p) {
+ for (;;) {
+ unsigned long g;
+ int n = 0;
+
+ p += strspn(p, WHITESPACE);
+ if (*p == 0)
+ break;
+
+ if (sscanf(p, "%lu%n", &g, &n) != 1)
+ return -EIO;
+
+ if (!GREEDY_REALLOC(c->supplementary_gids, c->n_supplementary_gids+1))
+ return -ENOMEM;
+
+ c->supplementary_gids[c->n_supplementary_gids++] = (gid_t) g;
+ p += n;
+ }
+
+ c->mask |= SD_BUS_CREDS_SUPPLEMENTARY_GIDS;
+ continue;
+ }
+ }
+
+ if (missing & SD_BUS_CREDS_EFFECTIVE_CAPS) {
+ p = startswith(line, "CapEff:");
+ if (p) {
+ r = parse_caps(c, CAP_OFFSET_EFFECTIVE, p);
+ if (r < 0)
+ return r;
+
+ c->mask |= SD_BUS_CREDS_EFFECTIVE_CAPS;
+ continue;
+ }
+ }
+
+ if (missing & SD_BUS_CREDS_PERMITTED_CAPS) {
+ p = startswith(line, "CapPrm:");
+ if (p) {
+ r = parse_caps(c, CAP_OFFSET_PERMITTED, p);
+ if (r < 0)
+ return r;
+
+ c->mask |= SD_BUS_CREDS_PERMITTED_CAPS;
+ continue;
+ }
+ }
+
+ if (missing & SD_BUS_CREDS_INHERITABLE_CAPS) {
+ p = startswith(line, "CapInh:");
+ if (p) {
+ r = parse_caps(c, CAP_OFFSET_INHERITABLE, p);
+ if (r < 0)
+ return r;
+
+ c->mask |= SD_BUS_CREDS_INHERITABLE_CAPS;
+ continue;
+ }
+ }
+
+ if (missing & SD_BUS_CREDS_BOUNDING_CAPS) {
+ p = startswith(line, "CapBnd:");
+ if (p) {
+ r = parse_caps(c, CAP_OFFSET_BOUNDING, p);
+ if (r < 0)
+ return r;
+
+ c->mask |= SD_BUS_CREDS_BOUNDING_CAPS;
+ continue;
+ }
+ }
+ }
+ }
+ }
+
+ if (missing & SD_BUS_CREDS_SELINUX_CONTEXT) {
+ const char *p;
+
+ p = procfs_file_alloca(pid, "attr/current");
+ r = read_one_line_file(p, &c->label);
+ if (r < 0) {
+ if (!IN_SET(r, -ENOENT, -EINVAL, -EPERM, -EACCES))
+ return r;
+ } else
+ c->mask |= SD_BUS_CREDS_SELINUX_CONTEXT;
+ }
+
+ if (missing & SD_BUS_CREDS_COMM) {
+ r = pid_get_comm(pid, &c->comm);
+ if (r < 0) {
+ if (!ERRNO_IS_PRIVILEGE(r))
+ return r;
+ } else
+ c->mask |= SD_BUS_CREDS_COMM;
+ }
+
+ if (missing & SD_BUS_CREDS_EXE) {
+ r = get_process_exe(pid, &c->exe);
+ if (r == -ESRCH) {
+ /* Unfortunately we cannot really distinguish
+ * the case here where the process does not
+ * exist, and /proc/$PID/exe being unreadable
+ * because $PID is a kernel thread. Hence,
+ * assume it is a kernel thread, and rely on
+ * that this case is caught with a later
+ * call. */
+ c->exe = NULL;
+ c->mask |= SD_BUS_CREDS_EXE;
+ } else if (r < 0) {
+ if (!ERRNO_IS_PRIVILEGE(r))
+ return r;
+ } else
+ c->mask |= SD_BUS_CREDS_EXE;
+ }
+
+ if (missing & SD_BUS_CREDS_CMDLINE) {
+ const char *p;
+
+ p = procfs_file_alloca(pid, "cmdline");
+ r = read_full_virtual_file(p, &c->cmdline, &c->cmdline_size);
+ if (r == -ENOENT)
+ return -ESRCH;
+ if (r < 0) {
+ if (!ERRNO_IS_PRIVILEGE(r))
+ return r;
+ } else {
+ if (c->cmdline_size == 0)
+ c->cmdline = mfree(c->cmdline);
+
+ c->mask |= SD_BUS_CREDS_CMDLINE;
+ }
+ }
+
+ if (tid > 0 && (missing & SD_BUS_CREDS_TID_COMM)) {
+ _cleanup_free_ char *p = NULL;
+
+ if (asprintf(&p, "/proc/"PID_FMT"/task/"PID_FMT"/comm", pid, tid) < 0)
+ return -ENOMEM;
+
+ r = read_one_line_file(p, &c->tid_comm);
+ if (r == -ENOENT)
+ return -ESRCH;
+ if (r < 0) {
+ if (!ERRNO_IS_PRIVILEGE(r))
+ return r;
+ } else
+ c->mask |= SD_BUS_CREDS_TID_COMM;
+ }
+
+ if (missing & (SD_BUS_CREDS_CGROUP|SD_BUS_CREDS_UNIT|SD_BUS_CREDS_USER_UNIT|SD_BUS_CREDS_SLICE|SD_BUS_CREDS_USER_SLICE|SD_BUS_CREDS_SESSION|SD_BUS_CREDS_OWNER_UID)) {
+
+ if (!c->cgroup) {
+ r = cg_pid_get_path(NULL, pid, &c->cgroup);
+ if (r < 0) {
+ if (!ERRNO_IS_PRIVILEGE(r))
+ return r;
+ }
+ }
+
+ if (!c->cgroup_root) {
+ r = cg_get_root_path(&c->cgroup_root);
+ if (r < 0)
+ return r;
+ }
+
+ if (c->cgroup)
+ c->mask |= missing & (SD_BUS_CREDS_CGROUP|SD_BUS_CREDS_UNIT|SD_BUS_CREDS_USER_UNIT|SD_BUS_CREDS_SLICE|SD_BUS_CREDS_USER_SLICE|SD_BUS_CREDS_SESSION|SD_BUS_CREDS_OWNER_UID);
+ }
+
+ if (missing & SD_BUS_CREDS_AUDIT_SESSION_ID) {
+ r = audit_session_from_pid(pid, &c->audit_session_id);
+ if (r == -ENODATA) {
+ /* ENODATA means: no audit session id assigned */
+ c->audit_session_id = AUDIT_SESSION_INVALID;
+ c->mask |= SD_BUS_CREDS_AUDIT_SESSION_ID;
+ } else if (r < 0) {
+ if (!IN_SET(r, -EOPNOTSUPP, -ENOENT, -EPERM, -EACCES))
+ return r;
+ } else
+ c->mask |= SD_BUS_CREDS_AUDIT_SESSION_ID;
+ }
+
+ if (missing & SD_BUS_CREDS_AUDIT_LOGIN_UID) {
+ r = audit_loginuid_from_pid(pid, &c->audit_login_uid);
+ if (r == -ENODATA) {
+ /* ENODATA means: no audit login uid assigned */
+ c->audit_login_uid = UID_INVALID;
+ c->mask |= SD_BUS_CREDS_AUDIT_LOGIN_UID;
+ } else if (r < 0) {
+ if (!IN_SET(r, -EOPNOTSUPP, -ENOENT, -EPERM, -EACCES))
+ return r;
+ } else
+ c->mask |= SD_BUS_CREDS_AUDIT_LOGIN_UID;
+ }
+
+ if (missing & SD_BUS_CREDS_TTY) {
+ r = get_ctty(pid, NULL, &c->tty);
+ if (r == -ENXIO) {
+ /* ENXIO means: process has no controlling TTY */
+ c->tty = NULL;
+ c->mask |= SD_BUS_CREDS_TTY;
+ } else if (r < 0) {
+ if (!IN_SET(r, -EPERM, -EACCES, -ENOENT))
+ return r;
+ } else
+ c->mask |= SD_BUS_CREDS_TTY;
+ }
+
+ /* In case only the exe path was to be read we cannot distinguish the case where the exe path was
+ * unreadable because the process was a kernel thread, or when the process didn't exist at
+ * all. Hence, let's do a final check, to be sure. */
+ r = pid_is_alive(pid);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -ESRCH;
+
+ if (tid > 0 && tid != pid && pid_is_unwaited(tid) == 0)
+ return -ESRCH;
+
+ c->augmented = missing & c->mask;
+
+ return 0;
+}
+
+int bus_creds_extend_by_pid(sd_bus_creds *c, uint64_t mask, sd_bus_creds **ret) {
+ _cleanup_(sd_bus_creds_unrefp) sd_bus_creds *n = NULL;
+ int r;
+
+ assert(c);
+ assert(ret);
+
+ if ((mask & ~c->mask) == 0 || (!(mask & SD_BUS_CREDS_AUGMENT))) {
+ /* There's already all data we need, or augmentation
+ * wasn't turned on. */
+
+ *ret = sd_bus_creds_ref(c);
+ return 0;
+ }
+
+ n = bus_creds_new();
+ if (!n)
+ return -ENOMEM;
+
+ /* Copy the original data over */
+
+ if (c->mask & mask & SD_BUS_CREDS_PID) {
+ n->pid = c->pid;
+ n->mask |= SD_BUS_CREDS_PID;
+ }
+
+ if (c->mask & mask & SD_BUS_CREDS_TID) {
+ n->tid = c->tid;
+ n->mask |= SD_BUS_CREDS_TID;
+ }
+
+ if (c->mask & mask & SD_BUS_CREDS_PPID) {
+ n->ppid = c->ppid;
+ n->mask |= SD_BUS_CREDS_PPID;
+ }
+
+ if (c->mask & mask & SD_BUS_CREDS_UID) {
+ n->uid = c->uid;
+ n->mask |= SD_BUS_CREDS_UID;
+ }
+
+ if (c->mask & mask & SD_BUS_CREDS_EUID) {
+ n->euid = c->euid;
+ n->mask |= SD_BUS_CREDS_EUID;
+ }
+
+ if (c->mask & mask & SD_BUS_CREDS_SUID) {
+ n->suid = c->suid;
+ n->mask |= SD_BUS_CREDS_SUID;
+ }
+
+ if (c->mask & mask & SD_BUS_CREDS_FSUID) {
+ n->fsuid = c->fsuid;
+ n->mask |= SD_BUS_CREDS_FSUID;
+ }
+
+ if (c->mask & mask & SD_BUS_CREDS_GID) {
+ n->gid = c->gid;
+ n->mask |= SD_BUS_CREDS_GID;
+ }
+
+ if (c->mask & mask & SD_BUS_CREDS_EGID) {
+ n->egid = c->egid;
+ n->mask |= SD_BUS_CREDS_EGID;
+ }
+
+ if (c->mask & mask & SD_BUS_CREDS_SGID) {
+ n->sgid = c->sgid;
+ n->mask |= SD_BUS_CREDS_SGID;
+ }
+
+ if (c->mask & mask & SD_BUS_CREDS_FSGID) {
+ n->fsgid = c->fsgid;
+ n->mask |= SD_BUS_CREDS_FSGID;
+ }
+
+ if (c->mask & mask & SD_BUS_CREDS_SUPPLEMENTARY_GIDS) {
+ if (c->supplementary_gids) {
+ n->supplementary_gids = newdup(gid_t, c->supplementary_gids, c->n_supplementary_gids);
+ if (!n->supplementary_gids)
+ return -ENOMEM;
+ n->n_supplementary_gids = c->n_supplementary_gids;
+ } else {
+ n->supplementary_gids = NULL;
+ n->n_supplementary_gids = 0;
+ }
+
+ n->mask |= SD_BUS_CREDS_SUPPLEMENTARY_GIDS;
+ }
+
+ if (c->mask & mask & SD_BUS_CREDS_COMM) {
+ assert(c->comm);
+
+ n->comm = strdup(c->comm);
+ if (!n->comm)
+ return -ENOMEM;
+
+ n->mask |= SD_BUS_CREDS_COMM;
+ }
+
+ if (c->mask & mask & SD_BUS_CREDS_TID_COMM) {
+ assert(c->tid_comm);
+
+ n->tid_comm = strdup(c->tid_comm);
+ if (!n->tid_comm)
+ return -ENOMEM;
+
+ n->mask |= SD_BUS_CREDS_TID_COMM;
+ }
+
+ if (c->mask & mask & SD_BUS_CREDS_EXE) {
+ if (c->exe) {
+ n->exe = strdup(c->exe);
+ if (!n->exe)
+ return -ENOMEM;
+ } else
+ n->exe = NULL;
+
+ n->mask |= SD_BUS_CREDS_EXE;
+ }
+
+ if (c->mask & mask & SD_BUS_CREDS_CMDLINE) {
+ if (c->cmdline) {
+ n->cmdline = memdup(c->cmdline, c->cmdline_size);
+ if (!n->cmdline)
+ return -ENOMEM;
+
+ n->cmdline_size = c->cmdline_size;
+ } else {
+ n->cmdline = NULL;
+ n->cmdline_size = 0;
+ }
+
+ n->mask |= SD_BUS_CREDS_CMDLINE;
+ }
+
+ if (c->mask & mask & (SD_BUS_CREDS_CGROUP|SD_BUS_CREDS_SESSION|SD_BUS_CREDS_UNIT|SD_BUS_CREDS_USER_UNIT|SD_BUS_CREDS_SLICE|SD_BUS_CREDS_USER_SLICE|SD_BUS_CREDS_OWNER_UID)) {
+ assert(c->cgroup);
+
+ n->cgroup = strdup(c->cgroup);
+ if (!n->cgroup)
+ return -ENOMEM;
+
+ n->cgroup_root = strdup(c->cgroup_root);
+ if (!n->cgroup_root)
+ return -ENOMEM;
+
+ n->mask |= mask & (SD_BUS_CREDS_CGROUP|SD_BUS_CREDS_SESSION|SD_BUS_CREDS_UNIT|SD_BUS_CREDS_USER_UNIT|SD_BUS_CREDS_SLICE|SD_BUS_CREDS_USER_SLICE|SD_BUS_CREDS_OWNER_UID);
+ }
+
+ if (c->mask & mask & (SD_BUS_CREDS_EFFECTIVE_CAPS|SD_BUS_CREDS_PERMITTED_CAPS|SD_BUS_CREDS_INHERITABLE_CAPS|SD_BUS_CREDS_BOUNDING_CAPS)) {
+ assert(c->capability);
+
+ n->capability = memdup(c->capability, DIV_ROUND_UP(cap_last_cap()+1, 32U) * 4 * 4);
+ if (!n->capability)
+ return -ENOMEM;
+
+ n->mask |= c->mask & mask & (SD_BUS_CREDS_EFFECTIVE_CAPS|SD_BUS_CREDS_PERMITTED_CAPS|SD_BUS_CREDS_INHERITABLE_CAPS|SD_BUS_CREDS_BOUNDING_CAPS);
+ }
+
+ if (c->mask & mask & SD_BUS_CREDS_SELINUX_CONTEXT) {
+ assert(c->label);
+
+ n->label = strdup(c->label);
+ if (!n->label)
+ return -ENOMEM;
+ n->mask |= SD_BUS_CREDS_SELINUX_CONTEXT;
+ }
+
+ if (c->mask & mask & SD_BUS_CREDS_AUDIT_SESSION_ID) {
+ n->audit_session_id = c->audit_session_id;
+ n->mask |= SD_BUS_CREDS_AUDIT_SESSION_ID;
+ }
+ if (c->mask & mask & SD_BUS_CREDS_AUDIT_LOGIN_UID) {
+ n->audit_login_uid = c->audit_login_uid;
+ n->mask |= SD_BUS_CREDS_AUDIT_LOGIN_UID;
+ }
+
+ if (c->mask & mask & SD_BUS_CREDS_TTY) {
+ if (c->tty) {
+ n->tty = strdup(c->tty);
+ if (!n->tty)
+ return -ENOMEM;
+ } else
+ n->tty = NULL;
+ n->mask |= SD_BUS_CREDS_TTY;
+ }
+
+ if (c->mask & mask & SD_BUS_CREDS_UNIQUE_NAME) {
+ assert(c->unique_name);
+
+ n->unique_name = strdup(c->unique_name);
+ if (!n->unique_name)
+ return -ENOMEM;
+ n->mask |= SD_BUS_CREDS_UNIQUE_NAME;
+ }
+
+ if (c->mask & mask & SD_BUS_CREDS_WELL_KNOWN_NAMES) {
+ if (strv_isempty(c->well_known_names))
+ n->well_known_names = NULL;
+ else {
+ n->well_known_names = strv_copy(c->well_known_names);
+ if (!n->well_known_names)
+ return -ENOMEM;
+ }
+ n->well_known_names_driver = c->well_known_names_driver;
+ n->well_known_names_local = c->well_known_names_local;
+ n->mask |= SD_BUS_CREDS_WELL_KNOWN_NAMES;
+ }
+
+ if (c->mask & mask & SD_BUS_CREDS_DESCRIPTION) {
+ assert(c->description);
+ n->description = strdup(c->description);
+ if (!n->description)
+ return -ENOMEM;
+ n->mask |= SD_BUS_CREDS_DESCRIPTION;
+ }
+
+ n->augmented = c->augmented & n->mask;
+
+ /* Get more data */
+
+ r = bus_creds_add_more(n, mask, 0, 0);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(n);
+
+ return 0;
+}
diff --git a/src/libsystemd/sd-bus/bus-creds.h b/src/libsystemd/sd-bus/bus-creds.h
new file mode 100644
index 0000000..7806d9e
--- /dev/null
+++ b/src/libsystemd/sd-bus/bus-creds.h
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+
+#include "sd-bus.h"
+
+struct sd_bus_creds {
+ bool allocated;
+ unsigned n_ref;
+
+ uint64_t mask;
+ uint64_t augmented;
+
+ uid_t uid;
+ uid_t euid;
+ uid_t suid;
+ uid_t fsuid;
+ gid_t gid;
+ gid_t egid;
+ gid_t sgid;
+ gid_t fsgid;
+
+ gid_t *supplementary_gids;
+ unsigned n_supplementary_gids;
+
+ pid_t ppid;
+ pid_t pid;
+ pid_t tid;
+
+ char *comm;
+ char *tid_comm;
+ char *exe;
+
+ char *cmdline;
+ size_t cmdline_size;
+ char **cmdline_array;
+
+ char *cgroup;
+ char *session;
+ char *unit;
+ char *user_unit;
+ char *slice;
+ char *user_slice;
+
+ char *tty;
+
+ uint32_t *capability;
+
+ uint32_t audit_session_id;
+ uid_t audit_login_uid;
+
+ char *label;
+
+ char *unique_name;
+
+ char **well_known_names;
+ bool well_known_names_driver:1;
+ bool well_known_names_local:1;
+
+ char *cgroup_root;
+
+ char *description, *unescaped_description;
+};
+
+sd_bus_creds* bus_creds_new(void);
+
+void bus_creds_done(sd_bus_creds *c);
+
+int bus_creds_add_more(sd_bus_creds *c, uint64_t mask, pid_t pid, pid_t tid);
+
+int bus_creds_extend_by_pid(sd_bus_creds *c, uint64_t mask, sd_bus_creds **ret);
diff --git a/src/libsystemd/sd-bus/bus-dump.c b/src/libsystemd/sd-bus/bus-dump.c
new file mode 100644
index 0000000..6d24f3b
--- /dev/null
+++ b/src/libsystemd/sd-bus/bus-dump.c
@@ -0,0 +1,649 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <sys/time.h>
+
+#include "alloc-util.h"
+#include "bus-dump.h"
+#include "bus-internal.h"
+#include "bus-message.h"
+#include "bus-type.h"
+#include "cap-list.h"
+#include "capability-util.h"
+#include "fileio.h"
+#include "format-util.h"
+#include "glyph-util.h"
+#include "macro.h"
+#include "pcapng.h"
+#include "string-util.h"
+#include "strv.h"
+#include "terminal-util.h"
+
+static char *indent(unsigned level, uint64_t flags) {
+ char *p;
+ unsigned n, i = 0;
+
+ n = 0;
+
+ if (flags & SD_BUS_MESSAGE_DUMP_SUBTREE_ONLY && level > 0)
+ level -= 1;
+
+ if (flags & SD_BUS_MESSAGE_DUMP_WITH_HEADER)
+ n += 2;
+
+ p = new(char, n + level*8 + 1);
+ if (!p)
+ return NULL;
+
+ if (flags & SD_BUS_MESSAGE_DUMP_WITH_HEADER) {
+ p[i++] = ' ';
+ p[i++] = ' ';
+ }
+
+ memset(p + i, ' ', level*8);
+ p[i + level*8] = 0;
+
+ return p;
+}
+
+_public_ int sd_bus_message_dump(sd_bus_message *m, FILE *f, uint64_t flags) {
+ unsigned level = 1;
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return((flags & ~_SD_BUS_MESSAGE_DUMP_KNOWN_FLAGS) == 0, -EINVAL);
+
+ if (!f)
+ f = stdout;
+
+ if (flags & SD_BUS_MESSAGE_DUMP_WITH_HEADER) {
+ usec_t ts = m->realtime;
+
+ if (ts == 0)
+ ts = now(CLOCK_REALTIME);
+
+ fprintf(f,
+ "%s%s%s Type=%s%s%s Endian=%c Flags=%u Version=%u",
+ m->header->type == SD_BUS_MESSAGE_METHOD_ERROR ? ansi_highlight_red() :
+ m->header->type == SD_BUS_MESSAGE_METHOD_RETURN ? ansi_highlight_green() :
+ m->header->type != SD_BUS_MESSAGE_SIGNAL ? ansi_highlight() : "",
+ special_glyph(SPECIAL_GLYPH_TRIANGULAR_BULLET),
+ ansi_normal(),
+
+ ansi_highlight(),
+ bus_message_type_to_string(m->header->type) ?: "(unknown)",
+ ansi_normal(),
+
+ m->header->endian,
+ m->header->flags,
+ m->header->version);
+
+ /* Display synthetic message serial number in a more readable
+ * format than UINT32_MAX */
+ if (BUS_MESSAGE_COOKIE(m) == UINT32_MAX)
+ fprintf(f, " Cookie=-1");
+ else
+ fprintf(f, " Cookie=%" PRIu64, BUS_MESSAGE_COOKIE(m));
+
+ if (m->reply_cookie != 0)
+ fprintf(f, " ReplyCookie=%" PRIu64, m->reply_cookie);
+
+ fprintf(f, " Timestamp=\"%s\"\n", strna(FORMAT_TIMESTAMP_STYLE(ts, TIMESTAMP_US_UTC)));
+
+ if (m->sender)
+ fprintf(f, " Sender=%s%s%s", ansi_highlight(), m->sender, ansi_normal());
+ if (m->destination)
+ fprintf(f, " Destination=%s%s%s", ansi_highlight(), m->destination, ansi_normal());
+ if (m->path)
+ fprintf(f, " Path=%s%s%s", ansi_highlight(), m->path, ansi_normal());
+ if (m->interface)
+ fprintf(f, " Interface=%s%s%s", ansi_highlight(), m->interface, ansi_normal());
+ if (m->member)
+ fprintf(f, " Member=%s%s%s", ansi_highlight(), m->member, ansi_normal());
+
+ if (m->sender || m->destination || m->path || m->interface || m->member)
+ fputs("\n", f);
+
+ if (sd_bus_error_is_set(&m->error))
+ fprintf(f,
+ " ErrorName=%s%s%s"
+ " ErrorMessage=%s\"%s\"%s\n",
+ ansi_highlight_red(), strna(m->error.name), ansi_normal(),
+ ansi_highlight_red(), strna(m->error.message), ansi_normal());
+
+ if (m->monotonic != 0)
+ fprintf(f, " Monotonic="USEC_FMT, m->monotonic);
+ if (m->realtime != 0)
+ fprintf(f, " Realtime="USEC_FMT, m->realtime);
+ if (m->seqnum != 0)
+ fprintf(f, " SequenceNumber=%"PRIu64, m->seqnum);
+
+ if (m->monotonic != 0 || m->realtime != 0 || m->seqnum != 0)
+ fputs("\n", f);
+
+ bus_creds_dump(&m->creds, f, true);
+ }
+
+ r = sd_bus_message_rewind(m, !(flags & SD_BUS_MESSAGE_DUMP_SUBTREE_ONLY));
+ if (r < 0)
+ return log_error_errno(r, "Failed to rewind: %m");
+
+ if (!(flags & SD_BUS_MESSAGE_DUMP_SUBTREE_ONLY)) {
+ _cleanup_free_ char *prefix = NULL;
+
+ prefix = indent(0, flags);
+ if (!prefix)
+ return log_oom();
+
+ fprintf(f, "%sMESSAGE \"%s\" {\n", prefix, strempty(m->root_container.signature));
+ }
+
+ for (;;) {
+ _cleanup_free_ char *prefix = NULL;
+ const char *contents = NULL;
+ char type;
+ union {
+ uint8_t u8;
+ uint16_t u16;
+ int16_t s16;
+ uint32_t u32;
+ int32_t s32;
+ uint64_t u64;
+ int64_t s64;
+ double d64;
+ const char *string;
+ int i;
+ } basic;
+
+ r = sd_bus_message_peek_type(m, &type, &contents);
+ if (r < 0)
+ return log_error_errno(r, "Failed to peek type: %m");
+
+ if (r == 0) {
+ if (level <= 1)
+ break;
+
+ r = sd_bus_message_exit_container(m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to exit container: %m");
+
+ level--;
+
+ prefix = indent(level, flags);
+ if (!prefix)
+ return log_oom();
+
+ fprintf(f, "%s};\n", prefix);
+ continue;
+ }
+
+ prefix = indent(level, flags);
+ if (!prefix)
+ return log_oom();
+
+ if (bus_type_is_container(type) > 0) {
+ r = sd_bus_message_enter_container(m, type, contents);
+ if (r < 0)
+ return log_error_errno(r, "Failed to enter container: %m");
+
+ if (type == SD_BUS_TYPE_ARRAY)
+ fprintf(f, "%sARRAY \"%s\" {\n", prefix, contents);
+ else if (type == SD_BUS_TYPE_VARIANT)
+ fprintf(f, "%sVARIANT \"%s\" {\n", prefix, contents);
+ else if (type == SD_BUS_TYPE_STRUCT)
+ fprintf(f, "%sSTRUCT \"%s\" {\n", prefix, contents);
+ else if (type == SD_BUS_TYPE_DICT_ENTRY)
+ fprintf(f, "%sDICT_ENTRY \"%s\" {\n", prefix, contents);
+
+ level++;
+
+ continue;
+ }
+
+ r = sd_bus_message_read_basic(m, type, &basic);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get basic: %m");
+
+ assert(r > 0);
+
+ switch (type) {
+
+ case SD_BUS_TYPE_BYTE:
+ fprintf(f, "%sBYTE %s%u%s;\n", prefix, ansi_highlight(), basic.u8, ansi_normal());
+ break;
+
+ case SD_BUS_TYPE_BOOLEAN:
+ fprintf(f, "%sBOOLEAN %s%s%s;\n", prefix, ansi_highlight(), true_false(basic.i), ansi_normal());
+ break;
+
+ case SD_BUS_TYPE_INT16:
+ fprintf(f, "%sINT16 %s%i%s;\n", prefix, ansi_highlight(), basic.s16, ansi_normal());
+ break;
+
+ case SD_BUS_TYPE_UINT16:
+ fprintf(f, "%sUINT16 %s%u%s;\n", prefix, ansi_highlight(), basic.u16, ansi_normal());
+ break;
+
+ case SD_BUS_TYPE_INT32:
+ fprintf(f, "%sINT32 %s%i%s;\n", prefix, ansi_highlight(), basic.s32, ansi_normal());
+ break;
+
+ case SD_BUS_TYPE_UINT32:
+ fprintf(f, "%sUINT32 %s%u%s;\n", prefix, ansi_highlight(), basic.u32, ansi_normal());
+ break;
+
+ case SD_BUS_TYPE_INT64:
+ fprintf(f, "%sINT64 %s%"PRIi64"%s;\n", prefix, ansi_highlight(), basic.s64, ansi_normal());
+ break;
+
+ case SD_BUS_TYPE_UINT64:
+ fprintf(f, "%sUINT64 %s%"PRIu64"%s;\n", prefix, ansi_highlight(), basic.u64, ansi_normal());
+ break;
+
+ case SD_BUS_TYPE_DOUBLE:
+ fprintf(f, "%sDOUBLE %s%g%s;\n", prefix, ansi_highlight(), basic.d64, ansi_normal());
+ break;
+
+ case SD_BUS_TYPE_STRING:
+ fprintf(f, "%sSTRING \"%s%s%s\";\n", prefix, ansi_highlight(), basic.string, ansi_normal());
+ break;
+
+ case SD_BUS_TYPE_OBJECT_PATH:
+ fprintf(f, "%sOBJECT_PATH \"%s%s%s\";\n", prefix, ansi_highlight(), basic.string, ansi_normal());
+ break;
+
+ case SD_BUS_TYPE_SIGNATURE:
+ fprintf(f, "%sSIGNATURE \"%s%s%s\";\n", prefix, ansi_highlight(), basic.string, ansi_normal());
+ break;
+
+ case SD_BUS_TYPE_UNIX_FD:
+ fprintf(f, "%sUNIX_FD %s%i%s;\n", prefix, ansi_highlight(), basic.i, ansi_normal());
+ break;
+
+ default:
+ assert_not_reached();
+ }
+ }
+
+ if (!(flags & SD_BUS_MESSAGE_DUMP_SUBTREE_ONLY)) {
+ _cleanup_free_ char *prefix = NULL;
+
+ prefix = indent(0, flags);
+ if (!prefix)
+ return log_oom();
+
+ fprintf(f, "%s};\n\n", prefix);
+ }
+
+ return 0;
+}
+
+static void dump_capabilities(
+ sd_bus_creds *c,
+ FILE *f,
+ const char *name,
+ bool terse,
+ int (*has)(sd_bus_creds *c, int capability)) {
+
+ unsigned long i, last_cap;
+ unsigned n = 0;
+ int r;
+
+ assert(c);
+ assert(f);
+ assert(name);
+ assert(has);
+
+ i = 0;
+ r = has(c, i);
+ if (r < 0)
+ return;
+
+ fprintf(f, "%s%s=%s", terse ? " " : "", name, terse ? "" : ansi_highlight());
+ last_cap = cap_last_cap();
+
+ for (;;) {
+ if (r > 0) {
+
+ if (n > 0)
+ fputc(' ', f);
+ if (n % 4 == 3)
+ fprintf(f, terse ? "\n " : "\n ");
+
+ fprintf(f, "%s", strna(capability_to_name(i)));
+ n++;
+ }
+
+ i++;
+
+ if (i > last_cap)
+ break;
+
+ r = has(c, i);
+ }
+
+ fputs("\n", f);
+
+ if (!terse)
+ fputs(ansi_normal(), f);
+}
+
+int bus_creds_dump(sd_bus_creds *c, FILE *f, bool terse) {
+ uid_t owner, audit_loginuid;
+ uint32_t audit_sessionid;
+ char **cmdline = NULL, **well_known = NULL;
+ const char *prefix, *color, *suffix, *s;
+ int r, q, v, w, z;
+
+ assert(c);
+
+ if (!f)
+ f = stdout;
+
+ if (terse) {
+ prefix = " ";
+ suffix = "";
+ color = "";
+ } else {
+ const char *off;
+
+ prefix = "";
+ color = ansi_highlight();
+
+ off = ansi_normal();
+ suffix = strjoina(off, "\n");
+ }
+
+ if (c->mask & SD_BUS_CREDS_PID)
+ fprintf(f, "%sPID=%s"PID_FMT"%s", prefix, color, c->pid, suffix);
+ if (c->mask & SD_BUS_CREDS_TID)
+ fprintf(f, "%sTID=%s"PID_FMT"%s", prefix, color, c->tid, suffix);
+ if (c->mask & SD_BUS_CREDS_PPID) {
+ if (c->ppid == 0)
+ fprintf(f, "%sPPID=%sn/a%s", prefix, color, suffix);
+ else
+ fprintf(f, "%sPPID=%s"PID_FMT"%s", prefix, color, c->ppid, suffix);
+ }
+ if (c->mask & SD_BUS_CREDS_TTY)
+ fprintf(f, "%sTTY=%s%s%s", prefix, color, strna(c->tty), suffix);
+
+ if (terse && ((c->mask & (SD_BUS_CREDS_PID|SD_BUS_CREDS_TID|SD_BUS_CREDS_PPID|SD_BUS_CREDS_TTY))))
+ fputs("\n", f);
+
+ if (c->mask & SD_BUS_CREDS_UID)
+ fprintf(f, "%sUID=%s"UID_FMT"%s", prefix, color, c->uid, suffix);
+ if (c->mask & SD_BUS_CREDS_EUID)
+ fprintf(f, "%sEUID=%s"UID_FMT"%s", prefix, color, c->euid, suffix);
+ if (c->mask & SD_BUS_CREDS_SUID)
+ fprintf(f, "%sSUID=%s"UID_FMT"%s", prefix, color, c->suid, suffix);
+ if (c->mask & SD_BUS_CREDS_FSUID)
+ fprintf(f, "%sFSUID=%s"UID_FMT"%s", prefix, color, c->fsuid, suffix);
+ r = sd_bus_creds_get_owner_uid(c, &owner);
+ if (r >= 0)
+ fprintf(f, "%sOwnerUID=%s"UID_FMT"%s", prefix, color, owner, suffix);
+ if (c->mask & SD_BUS_CREDS_GID)
+ fprintf(f, "%sGID=%s"GID_FMT"%s", prefix, color, c->gid, suffix);
+ if (c->mask & SD_BUS_CREDS_EGID)
+ fprintf(f, "%sEGID=%s"GID_FMT"%s", prefix, color, c->egid, suffix);
+ if (c->mask & SD_BUS_CREDS_SGID)
+ fprintf(f, "%sSGID=%s"GID_FMT"%s", prefix, color, c->sgid, suffix);
+ if (c->mask & SD_BUS_CREDS_FSGID)
+ fprintf(f, "%sFSGID=%s"GID_FMT"%s", prefix, color, c->fsgid, suffix);
+
+ if (c->mask & SD_BUS_CREDS_SUPPLEMENTARY_GIDS) {
+ fprintf(f, "%sSupplementaryGIDs=%s", prefix, color);
+ for (unsigned i = 0; i < c->n_supplementary_gids; i++)
+ fprintf(f, "%s" GID_FMT, i > 0 ? " " : "", c->supplementary_gids[i]);
+ fprintf(f, "%s", suffix);
+ }
+
+ if (terse && ((c->mask & (SD_BUS_CREDS_UID|SD_BUS_CREDS_EUID|SD_BUS_CREDS_SUID|SD_BUS_CREDS_FSUID|
+ SD_BUS_CREDS_GID|SD_BUS_CREDS_EGID|SD_BUS_CREDS_SGID|SD_BUS_CREDS_FSGID|
+ SD_BUS_CREDS_SUPPLEMENTARY_GIDS)) || r >= 0))
+ fputs("\n", f);
+
+ if (c->mask & SD_BUS_CREDS_COMM)
+ fprintf(f, "%sComm=%s%s%s", prefix, color, c->comm, suffix);
+ if (c->mask & SD_BUS_CREDS_TID_COMM)
+ fprintf(f, "%sTIDComm=%s%s%s", prefix, color, c->tid_comm, suffix);
+ if (c->mask & SD_BUS_CREDS_EXE)
+ fprintf(f, "%sExe=%s%s%s", prefix, color, strna(c->exe), suffix);
+
+ if (terse && (c->mask & (SD_BUS_CREDS_EXE|SD_BUS_CREDS_COMM|SD_BUS_CREDS_TID_COMM)))
+ fputs("\n", f);
+
+ r = sd_bus_creds_get_cmdline(c, &cmdline);
+ if (r >= 0) {
+ fprintf(f, "%sCommandLine=%s", prefix, color);
+ STRV_FOREACH(i, cmdline) {
+ if (i != cmdline)
+ fputc(' ', f);
+
+ fputs(*i, f);
+ }
+
+ fprintf(f, "%s", suffix);
+ } else if (r != -ENODATA)
+ fprintf(f, "%sCommandLine=%sn/a%s", prefix, color, suffix);
+
+ if (c->mask & SD_BUS_CREDS_SELINUX_CONTEXT)
+ fprintf(f, "%sLabel=%s%s%s", prefix, color, c->label, suffix);
+ if (c->mask & SD_BUS_CREDS_DESCRIPTION)
+ fprintf(f, "%sDescription=%s%s%s", prefix, color, c->description, suffix);
+
+ if (terse && (c->mask & (SD_BUS_CREDS_SELINUX_CONTEXT|SD_BUS_CREDS_DESCRIPTION)))
+ fputs("\n", f);
+
+ if (c->mask & SD_BUS_CREDS_CGROUP)
+ fprintf(f, "%sCGroup=%s%s%s", prefix, color, c->cgroup, suffix);
+ s = NULL;
+ r = sd_bus_creds_get_unit(c, &s);
+ if (r != -ENODATA)
+ fprintf(f, "%sUnit=%s%s%s", prefix, color, strna(s), suffix);
+ s = NULL;
+ v = sd_bus_creds_get_slice(c, &s);
+ if (v != -ENODATA)
+ fprintf(f, "%sSlice=%s%s%s", prefix, color, strna(s), suffix);
+ s = NULL;
+ q = sd_bus_creds_get_user_unit(c, &s);
+ if (q != -ENODATA)
+ fprintf(f, "%sUserUnit=%s%s%s", prefix, color, strna(s), suffix);
+ s = NULL;
+ w = sd_bus_creds_get_user_slice(c, &s);
+ if (w != -ENODATA)
+ fprintf(f, "%sUserSlice=%s%s%s", prefix, color, strna(s), suffix);
+ s = NULL;
+ z = sd_bus_creds_get_session(c, &s);
+ if (z != -ENODATA)
+ fprintf(f, "%sSession=%s%s%s", prefix, color, strna(s), suffix);
+
+ if (terse && ((c->mask & SD_BUS_CREDS_CGROUP) || r != -ENODATA || q != -ENODATA || v != -ENODATA || w != -ENODATA || z != -ENODATA))
+ fputs("\n", f);
+
+ r = sd_bus_creds_get_audit_login_uid(c, &audit_loginuid);
+ if (r >= 0)
+ fprintf(f, "%sAuditLoginUID=%s"UID_FMT"%s", prefix, color, audit_loginuid, suffix);
+ else if (r != -ENODATA)
+ fprintf(f, "%sAuditLoginUID=%sn/a%s", prefix, color, suffix);
+ q = sd_bus_creds_get_audit_session_id(c, &audit_sessionid);
+ if (q >= 0)
+ fprintf(f, "%sAuditSessionID=%s%"PRIu32"%s", prefix, color, audit_sessionid, suffix);
+ else if (q != -ENODATA)
+ fprintf(f, "%sAuditSessionID=%sn/a%s", prefix, color, suffix);
+
+ if (terse && (r != -ENODATA || q != -ENODATA))
+ fputs("\n", f);
+
+ if (c->mask & SD_BUS_CREDS_UNIQUE_NAME)
+ fprintf(f, "%sUniqueName=%s%s%s", prefix, color, c->unique_name, suffix);
+
+ if (sd_bus_creds_get_well_known_names(c, &well_known) >= 0) {
+ fprintf(f, "%sWellKnownNames=%s", prefix, color);
+ STRV_FOREACH(i, well_known) {
+ if (i != well_known)
+ fputc(' ', f);
+
+ fputs(*i, f);
+ }
+
+ fprintf(f, "%s", suffix);
+ }
+
+ if (terse && (c->mask & SD_BUS_CREDS_UNIQUE_NAME || well_known))
+ fputc('\n', f);
+
+ dump_capabilities(c, f, "EffectiveCapabilities", terse, sd_bus_creds_has_effective_cap);
+ dump_capabilities(c, f, "PermittedCapabilities", terse, sd_bus_creds_has_permitted_cap);
+ dump_capabilities(c, f, "InheritableCapabilities", terse, sd_bus_creds_has_inheritable_cap);
+ dump_capabilities(c, f, "BoundingCapabilities", terse, sd_bus_creds_has_bounding_cap);
+
+ return 0;
+}
+
+static uint16_t pcapng_optlen(size_t len) {
+ return ALIGN4(len + sizeof(struct pcapng_option));
+}
+
+static void pcapng_putopt(FILE *f, uint16_t code, const void *data, size_t len) {
+ struct pcapng_option opt = {
+ .code = code,
+ .length = len,
+ };
+
+ assert(f);
+ assert((uint16_t) len == len);
+ assert(data || len == 0);
+
+ fwrite(&opt, 1, sizeof(opt), f);
+ if (len > 0) {
+ size_t pad = ALIGN4(len) - len;
+
+ fwrite(data, 1, len, f);
+
+ assert(pad < sizeof(uint32_t));
+ while (pad-- > 0)
+ fputc('\0', f);
+ }
+}
+
+static void pcapng_section_header(FILE *f, const char *os, const char *app) {
+ uint32_t len;
+
+ assert(f);
+
+ /* determine length of section header and options */
+ len = sizeof(struct pcapng_section);
+ if (os)
+ len += pcapng_optlen(strlen(os));
+ if (app)
+ len += pcapng_optlen(strlen(app));
+ len += pcapng_optlen(0); /* OPT_END */
+ len += sizeof(uint32_t); /* trailer length */
+
+ struct pcapng_section hdr = {
+ .block_type = PCAPNG_SECTION_BLOCK,
+ .block_length = len,
+ .byte_order_magic = PCAPNG_BYTE_ORDER_MAGIC,
+ .major_version = PCAPNG_MAJOR_VERS,
+ .minor_version = PCAPNG_MINOR_VERS,
+ .section_length = UINT64_MAX,
+ };
+
+ fwrite(&hdr, 1, sizeof(hdr), f);
+ if (os)
+ pcapng_putopt(f, PCAPNG_SHB_OS, os, strlen(os));
+ if (app)
+ pcapng_putopt(f, PCAPNG_SHB_USERAPPL, app, strlen(app));
+ pcapng_putopt(f, PCAPNG_OPT_END, NULL, 0);
+ fwrite(&len, 1, sizeof(uint32_t), f);
+}
+
+/* Only have a single instance of dbus pseudo interface */
+static void pcapng_interface_header(FILE *f, size_t snaplen) {
+ uint32_t len;
+
+ assert(f);
+ assert(snaplen > 0);
+ assert((size_t) (uint32_t) snaplen == snaplen);
+
+ /* no options (yet) */
+ len = sizeof(struct pcapng_interface_block) + sizeof(uint32_t);
+ struct pcapng_interface_block hdr = {
+ .block_type = PCAPNG_INTERFACE_BLOCK,
+ .block_length = len,
+ .link_type = 231, /* D-Bus */
+ .snap_len = snaplen,
+ };
+
+ fwrite(&hdr, 1, sizeof(hdr), f);
+ fwrite(&len, 1, sizeof(uint32_t), f);
+}
+
+int bus_pcap_header(size_t snaplen, const char *os, const char *info, FILE *f) {
+ if (!f)
+ f = stdout;
+
+ pcapng_section_header(f, os, info);
+ pcapng_interface_header(f, snaplen);
+ return fflush_and_check(f);
+}
+
+int bus_message_pcap_frame(sd_bus_message *m, size_t snaplen, FILE *f) {
+ struct bus_body_part *part;
+ size_t msglen, caplen, pad;
+ uint32_t length;
+ uint64_t ts;
+ unsigned i;
+ size_t w;
+
+ if (!f)
+ f = stdout;
+
+ assert(m);
+ assert(snaplen > 0);
+ assert((size_t) (uint32_t) snaplen == snaplen);
+
+ ts = m->realtime ?: now(CLOCK_REALTIME);
+ msglen = BUS_MESSAGE_SIZE(m);
+ caplen = MIN(msglen, snaplen);
+ pad = ALIGN4(caplen) - caplen;
+
+ /* packet block has no options */
+ length = sizeof(struct pcapng_enhance_packet_block)
+ + caplen + pad + sizeof(uint32_t);
+
+ struct pcapng_enhance_packet_block epb = {
+ .block_type = PCAPNG_ENHANCED_PACKET_BLOCK,
+ .block_length = length,
+ .interface_id = 0,
+ .timestamp_hi = (uint32_t)(ts >> 32),
+ .timestamp_lo = (uint32_t)ts,
+ .original_length = msglen,
+ .capture_length = caplen,
+ };
+
+ /* write the pcapng enhanced packet block header */
+ fwrite(&epb, 1, sizeof(epb), f);
+
+ /* write the dbus header */
+ w = MIN(BUS_MESSAGE_BODY_BEGIN(m), snaplen);
+ fwrite(m->header, 1, w, f);
+ snaplen -= w;
+
+ /* write the dbus body */
+ MESSAGE_FOREACH_PART(part, i, m) {
+ if (snaplen <= 0)
+ break;
+
+ w = MIN(part->size, snaplen);
+ fwrite(part->data, 1, w, f);
+ snaplen -= w;
+ }
+
+ while (pad-- > 0)
+ fputc('\0', f);
+
+ /* trailing block length */
+ fwrite(&length, 1, sizeof(uint32_t), f);
+
+ return fflush_and_check(f);
+}
diff --git a/src/libsystemd/sd-bus/bus-dump.h b/src/libsystemd/sd-bus/bus-dump.h
new file mode 100644
index 0000000..e7470ba
--- /dev/null
+++ b/src/libsystemd/sd-bus/bus-dump.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+#include <stdio.h>
+
+#include "sd-bus.h"
+
+int bus_creds_dump(sd_bus_creds *c, FILE *f, bool terse);
+
+int bus_pcap_header(size_t snaplen, const char *os, const char *app, FILE *f);
+int bus_message_pcap_frame(sd_bus_message *m, size_t snaplen, FILE *f);
diff --git a/src/libsystemd/sd-bus/bus-error.c b/src/libsystemd/sd-bus/bus-error.c
new file mode 100644
index 0000000..77b2e1a
--- /dev/null
+++ b/src/libsystemd/sd-bus/bus-error.c
@@ -0,0 +1,628 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <stdarg.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "sd-bus.h"
+
+#include "alloc-util.h"
+#include "bus-error.h"
+#include "errno-list.h"
+#include "errno-util.h"
+#include "string-util.h"
+#include "strv.h"
+
+BUS_ERROR_MAP_ELF_REGISTER const sd_bus_error_map bus_standard_errors[] = {
+ SD_BUS_ERROR_MAP(SD_BUS_ERROR_FAILED, EACCES),
+ SD_BUS_ERROR_MAP(SD_BUS_ERROR_NO_MEMORY, ENOMEM),
+ SD_BUS_ERROR_MAP(SD_BUS_ERROR_SERVICE_UNKNOWN, EHOSTUNREACH),
+ SD_BUS_ERROR_MAP(SD_BUS_ERROR_NAME_HAS_NO_OWNER, ENXIO),
+ SD_BUS_ERROR_MAP(SD_BUS_ERROR_NO_REPLY, ETIMEDOUT),
+ SD_BUS_ERROR_MAP(SD_BUS_ERROR_IO_ERROR, EIO),
+ SD_BUS_ERROR_MAP(SD_BUS_ERROR_BAD_ADDRESS, EADDRNOTAVAIL),
+ SD_BUS_ERROR_MAP(SD_BUS_ERROR_NOT_SUPPORTED, EOPNOTSUPP),
+ SD_BUS_ERROR_MAP(SD_BUS_ERROR_LIMITS_EXCEEDED, ENOBUFS),
+ SD_BUS_ERROR_MAP(SD_BUS_ERROR_ACCESS_DENIED, EACCES),
+ SD_BUS_ERROR_MAP(SD_BUS_ERROR_AUTH_FAILED, EACCES),
+ SD_BUS_ERROR_MAP(SD_BUS_ERROR_NO_SERVER, EHOSTDOWN),
+ SD_BUS_ERROR_MAP(SD_BUS_ERROR_TIMEOUT, ETIMEDOUT),
+ SD_BUS_ERROR_MAP(SD_BUS_ERROR_NO_NETWORK, ENONET),
+ SD_BUS_ERROR_MAP(SD_BUS_ERROR_ADDRESS_IN_USE, EADDRINUSE),
+ SD_BUS_ERROR_MAP(SD_BUS_ERROR_DISCONNECTED, ECONNRESET),
+ SD_BUS_ERROR_MAP(SD_BUS_ERROR_INVALID_ARGS, EINVAL),
+ SD_BUS_ERROR_MAP(SD_BUS_ERROR_FILE_NOT_FOUND, ENOENT),
+ SD_BUS_ERROR_MAP(SD_BUS_ERROR_FILE_EXISTS, EEXIST),
+ SD_BUS_ERROR_MAP(SD_BUS_ERROR_UNKNOWN_METHOD, EBADR),
+ SD_BUS_ERROR_MAP(SD_BUS_ERROR_UNKNOWN_OBJECT, EBADR),
+ SD_BUS_ERROR_MAP(SD_BUS_ERROR_UNKNOWN_INTERFACE, EBADR),
+ SD_BUS_ERROR_MAP(SD_BUS_ERROR_UNKNOWN_PROPERTY, EBADR),
+ SD_BUS_ERROR_MAP(SD_BUS_ERROR_PROPERTY_READ_ONLY, EROFS),
+ SD_BUS_ERROR_MAP(SD_BUS_ERROR_UNIX_PROCESS_ID_UNKNOWN, ESRCH),
+ SD_BUS_ERROR_MAP(SD_BUS_ERROR_INVALID_SIGNATURE, EINVAL),
+ SD_BUS_ERROR_MAP(SD_BUS_ERROR_INCONSISTENT_MESSAGE, EBADMSG),
+ SD_BUS_ERROR_MAP(SD_BUS_ERROR_TIMED_OUT, ETIMEDOUT),
+ SD_BUS_ERROR_MAP(SD_BUS_ERROR_MATCH_RULE_NOT_FOUND, ENOENT),
+ SD_BUS_ERROR_MAP(SD_BUS_ERROR_MATCH_RULE_INVALID, EINVAL),
+ SD_BUS_ERROR_MAP(SD_BUS_ERROR_INTERACTIVE_AUTHORIZATION_REQUIRED, EACCES),
+ SD_BUS_ERROR_MAP(SD_BUS_ERROR_INVALID_FILE_CONTENT, EINVAL),
+ SD_BUS_ERROR_MAP(SD_BUS_ERROR_SELINUX_SECURITY_CONTEXT_UNKNOWN, ESRCH),
+ SD_BUS_ERROR_MAP(SD_BUS_ERROR_OBJECT_PATH_IN_USE, EBUSY),
+ SD_BUS_ERROR_MAP_END
+};
+
+/* GCC maps this magically to the beginning and end of the BUS_ERROR_MAP section */
+extern const sd_bus_error_map __start_SYSTEMD_BUS_ERROR_MAP[];
+extern const sd_bus_error_map __stop_SYSTEMD_BUS_ERROR_MAP[];
+
+/* Additional maps registered with sd_bus_error_add_map() are in this
+ * NULL terminated array */
+static const sd_bus_error_map **additional_error_maps = NULL;
+
+static int bus_error_name_to_errno(const char *name) {
+ const sd_bus_error_map **map, *m;
+ const char *p;
+ int r;
+
+ if (!name)
+ return EINVAL;
+
+ p = startswith(name, "System.Error.");
+ if (p) {
+ r = errno_from_name(p);
+ if (r < 0)
+ return EIO;
+
+ return r;
+ }
+
+ if (additional_error_maps)
+ for (map = additional_error_maps; *map; map++)
+ for (m = *map;; m++) {
+ /* For additional error maps the end marker is actually the end marker */
+ if (m->code == BUS_ERROR_MAP_END_MARKER)
+ break;
+
+ if (streq(m->name, name)) {
+ assert(m->code > 0);
+ return m->code;
+ }
+ }
+
+ m = ALIGN_PTR(__start_SYSTEMD_BUS_ERROR_MAP);
+ while (m < __stop_SYSTEMD_BUS_ERROR_MAP) {
+ /* For magic ELF error maps, the end marker might
+ * appear in the middle of things, since multiple maps
+ * might appear in the same section. Hence, let's skip
+ * over it, but realign the pointer to the next 8 byte
+ * boundary, which is the selected alignment for the
+ * arrays. */
+ if (m->code == BUS_ERROR_MAP_END_MARKER) {
+ m = ALIGN_PTR(m + 1);
+ continue;
+ }
+
+ if (streq(m->name, name)) {
+ assert(m->code > 0);
+ return m->code;
+ }
+
+ m++;
+ }
+
+ return EIO;
+}
+
+static sd_bus_error errno_to_bus_error_const(int error) {
+
+ if (error < 0)
+ error = -error;
+
+ switch (error) {
+
+ case ENOMEM:
+ return BUS_ERROR_OOM;
+
+ case EPERM:
+ case EACCES:
+ return SD_BUS_ERROR_MAKE_CONST(SD_BUS_ERROR_ACCESS_DENIED, "Access denied");
+
+ case EINVAL:
+ return SD_BUS_ERROR_MAKE_CONST(SD_BUS_ERROR_INVALID_ARGS, "Invalid argument");
+
+ case ESRCH:
+ return SD_BUS_ERROR_MAKE_CONST(SD_BUS_ERROR_UNIX_PROCESS_ID_UNKNOWN, "No such process");
+
+ case ENOENT:
+ return SD_BUS_ERROR_MAKE_CONST(SD_BUS_ERROR_FILE_NOT_FOUND, "File not found");
+
+ case EEXIST:
+ return SD_BUS_ERROR_MAKE_CONST(SD_BUS_ERROR_FILE_EXISTS, "File exists");
+
+ case ETIMEDOUT:
+ case ETIME:
+ return SD_BUS_ERROR_MAKE_CONST(SD_BUS_ERROR_TIMEOUT, "Timed out");
+
+ case EIO:
+ return SD_BUS_ERROR_MAKE_CONST(SD_BUS_ERROR_IO_ERROR, "Input/output error");
+
+ case ENETRESET:
+ case ECONNABORTED:
+ case ECONNRESET:
+ return SD_BUS_ERROR_MAKE_CONST(SD_BUS_ERROR_DISCONNECTED, "Disconnected");
+
+ case EOPNOTSUPP:
+ return SD_BUS_ERROR_MAKE_CONST(SD_BUS_ERROR_NOT_SUPPORTED, "Not supported");
+
+ case EADDRNOTAVAIL:
+ return SD_BUS_ERROR_MAKE_CONST(SD_BUS_ERROR_BAD_ADDRESS, "Address not available");
+
+ case ENOBUFS:
+ return SD_BUS_ERROR_MAKE_CONST(SD_BUS_ERROR_LIMITS_EXCEEDED, "Limits exceeded");
+
+ case EADDRINUSE:
+ return SD_BUS_ERROR_MAKE_CONST(SD_BUS_ERROR_ADDRESS_IN_USE, "Address in use");
+
+ case EBADMSG:
+ return SD_BUS_ERROR_MAKE_CONST(SD_BUS_ERROR_INCONSISTENT_MESSAGE, "Inconsistent message");
+ }
+
+ return SD_BUS_ERROR_NULL;
+}
+
+static int errno_to_bus_error_name_new(int error, char **ret) {
+ const char *name;
+ char *n;
+
+ if (error < 0)
+ error = -error;
+
+ name = errno_to_name(error);
+ if (!name)
+ return 0;
+
+ n = strjoin("System.Error.", name);
+ if (!n)
+ return -ENOMEM;
+
+ *ret = n;
+ return 1;
+}
+
+bool bus_error_is_dirty(sd_bus_error *e) {
+ if (!e)
+ return false;
+
+ return e->name || e->message || e->_need_free != 0;
+}
+
+_public_ void sd_bus_error_free(sd_bus_error *e) {
+ if (!e)
+ return;
+
+ if (e->_need_free > 0) {
+ free((void*) e->name);
+ free((void*) e->message);
+ }
+
+ *e = SD_BUS_ERROR_NULL;
+}
+
+_public_ int sd_bus_error_set(sd_bus_error *e, const char *name, const char *message) {
+ int r;
+
+ if (!name)
+ return 0;
+
+ if (e) {
+ assert_return(!bus_error_is_dirty(e), -EINVAL);
+
+ e->name = strdup(name);
+ if (!e->name) {
+ *e = BUS_ERROR_OOM;
+ return -ENOMEM;
+ }
+
+ if (message)
+ e->message = strdup(message);
+
+ e->_need_free = 1;
+ }
+
+ r = bus_error_name_to_errno(name);
+ assert(r > 0);
+ return -r;
+}
+
+_public_ int sd_bus_error_setfv(sd_bus_error *e, const char *name, const char *format, va_list ap) {
+ int r;
+
+ if (!name)
+ return 0;
+
+ if (e) {
+ assert_return(!bus_error_is_dirty(e), -EINVAL);
+
+ e->name = strdup(name);
+ if (!e->name) {
+ *e = BUS_ERROR_OOM;
+ return -ENOMEM;
+ }
+
+ if (format) {
+ _cleanup_free_ char *mesg = NULL;
+
+ /* If we hit OOM on formatting the pretty message, we ignore
+ * this, since we at least managed to write the error name */
+
+ if (vasprintf(&mesg, format, ap) >= 0)
+ e->message = TAKE_PTR(mesg);
+ }
+
+ e->_need_free = 1;
+ }
+
+ r = bus_error_name_to_errno(name);
+ assert(r > 0);
+ return -r;
+}
+
+_public_ int sd_bus_error_setf(sd_bus_error *e, const char *name, const char *format, ...) {
+ int r;
+
+ if (format) {
+ va_list ap;
+
+ va_start(ap, format);
+ r = sd_bus_error_setfv(e, name, format, ap);
+ assert(!name || r < 0);
+ va_end(ap);
+
+ return r;
+ }
+
+ r = sd_bus_error_set(e, name, NULL);
+ assert(!name || r < 0);
+ return r;
+}
+
+_public_ int sd_bus_error_copy(sd_bus_error *dest, const sd_bus_error *e) {
+
+ if (!sd_bus_error_is_set(e))
+ return 0;
+ if (!dest)
+ goto finish;
+
+ assert_return(!bus_error_is_dirty(dest), -EINVAL);
+
+ /*
+ * _need_free < 0 indicates that the error is temporarily const, needs deep copying
+ * _need_free == 0 indicates that the error is perpetually const, needs no deep copying
+ * _need_free > 0 indicates that the error is fully dynamic, needs deep copying
+ */
+
+ if (e->_need_free == 0)
+ *dest = *e;
+ else {
+ dest->name = strdup(e->name);
+ if (!dest->name) {
+ *dest = BUS_ERROR_OOM;
+ return -ENOMEM;
+ }
+
+ if (e->message)
+ dest->message = strdup(e->message);
+
+ dest->_need_free = 1;
+ }
+
+finish:
+ return -bus_error_name_to_errno(e->name);
+}
+
+_public_ int sd_bus_error_move(sd_bus_error *dest, sd_bus_error *e) {
+ int r;
+
+ if (!sd_bus_error_is_set(e)) {
+
+ if (dest)
+ *dest = SD_BUS_ERROR_NULL;
+
+ return 0;
+ }
+
+ r = -bus_error_name_to_errno(e->name);
+
+ if (dest) {
+ *dest = *e;
+ *e = SD_BUS_ERROR_NULL;
+ } else
+ sd_bus_error_free(e);
+
+ return r;
+}
+
+_public_ int sd_bus_error_set_const(sd_bus_error *e, const char *name, const char *message) {
+ if (!name)
+ return 0;
+ if (!e)
+ goto finish;
+
+ assert_return(!bus_error_is_dirty(e), -EINVAL);
+
+ *e = SD_BUS_ERROR_MAKE_CONST(name, message);
+
+finish:
+ return -bus_error_name_to_errno(name);
+}
+
+_public_ int sd_bus_error_is_set(const sd_bus_error *e) {
+ if (!e)
+ return 0;
+
+ return !!e->name;
+}
+
+_public_ int sd_bus_error_has_name(const sd_bus_error *e, const char *name) {
+ if (!e)
+ return 0;
+
+ return streq_ptr(e->name, name);
+}
+
+_public_ int sd_bus_error_has_names_sentinel(const sd_bus_error *e, ...) {
+ if (!e || !e->name)
+ return 0;
+
+ va_list ap;
+ const char *p;
+
+ va_start(ap, e);
+ while ((p = va_arg(ap, const char *)))
+ if (streq(p, e->name))
+ break;
+ va_end(ap);
+ return !!p;
+}
+
+_public_ int sd_bus_error_get_errno(const sd_bus_error* e) {
+ if (!e || !e->name)
+ return 0;
+
+ return bus_error_name_to_errno(e->name);
+}
+
+static void bus_error_strerror(sd_bus_error *e, int error) {
+ size_t k = 64;
+ char *m;
+
+ assert(e);
+
+ for (;;) {
+ char *x;
+
+ m = new(char, k);
+ if (!m)
+ return;
+
+ errno = 0;
+ x = strerror_r(error, m, k);
+ if (errno == ERANGE || strlen(x) >= k - 1) {
+ free(m);
+ k *= 2;
+ continue;
+ }
+
+ if (errno) {
+ free(m);
+ return;
+ }
+
+ if (x == m) {
+ if (e->_need_free > 0) {
+ /* Error is already dynamic, let's just update the message */
+ free((char*) e->message);
+ e->message = x;
+
+ } else {
+ char *t;
+ /* Error was const so far, let's make it dynamic, if we can */
+
+ t = strdup(e->name);
+ if (!t) {
+ free(m);
+ return;
+ }
+
+ e->_need_free = 1;
+ e->name = t;
+ e->message = x;
+ }
+ } else {
+ free(m);
+
+ if (e->_need_free > 0) {
+ char *t;
+
+ /* Error is dynamic, let's hence make the message also dynamic */
+ t = strdup(x);
+ if (!t)
+ return;
+
+ free((char*) e->message);
+ e->message = t;
+ } else {
+ /* Error is const, hence we can just override */
+ e->message = x;
+ }
+ }
+
+ return;
+ }
+}
+
+_public_ int sd_bus_error_set_errno(sd_bus_error *e, int error) {
+
+ if (error < 0)
+ error = -error;
+
+ if (!e)
+ return -error;
+ if (error == 0)
+ return 0;
+
+ assert_return(!bus_error_is_dirty(e), -EINVAL);
+
+ /* First, try a const translation */
+ *e = errno_to_bus_error_const(error);
+
+ if (!sd_bus_error_is_set(e)) {
+ int k;
+
+ /* If that didn't work, try a dynamic one. */
+
+ k = errno_to_bus_error_name_new(error, (char**) &e->name);
+ if (k > 0)
+ e->_need_free = 1;
+ else if (k < 0) {
+ *e = BUS_ERROR_OOM;
+ return -error;
+ } else
+ *e = BUS_ERROR_FAILED;
+ }
+
+ /* Now, fill in the message from strerror_r() if we can */
+ bus_error_strerror(e, error);
+ return -error;
+}
+
+_public_ int sd_bus_error_set_errnofv(sd_bus_error *e, int error, const char *format, va_list ap) {
+ PROTECT_ERRNO;
+
+ if (error < 0)
+ error = -error;
+
+ if (!e)
+ return -error;
+ if (error == 0)
+ return 0;
+
+ assert_return(!bus_error_is_dirty(e), -EINVAL);
+
+ /* First, try a const translation */
+ *e = errno_to_bus_error_const(error);
+
+ if (!sd_bus_error_is_set(e)) {
+ int k;
+
+ /* If that didn't work, try a dynamic one */
+
+ k = errno_to_bus_error_name_new(error, (char**) &e->name);
+ if (k > 0)
+ e->_need_free = 1;
+ else if (k < 0) {
+ *e = BUS_ERROR_OOM;
+ return -ENOMEM;
+ } else
+ *e = BUS_ERROR_FAILED;
+ }
+
+ if (format) {
+ _cleanup_free_ char *m = NULL;
+
+ /* Then, let's try to fill in the supplied message */
+
+ errno = error; /* Make sure that %m resolves to the specified error */
+ if (vasprintf(&m, format, ap) < 0)
+ goto fail;
+
+ if (e->_need_free <= 0) {
+ char *t;
+
+ t = strdup(e->name);
+ if (!t)
+ goto fail;
+
+ e->_need_free = 1;
+ e->name = t;
+ }
+
+ e->message = TAKE_PTR(m);
+ return -error;
+ }
+
+fail:
+ /* If that didn't work, use strerror_r() for the message */
+ bus_error_strerror(e, error);
+ return -error;
+}
+
+_public_ int sd_bus_error_set_errnof(sd_bus_error *e, int error, const char *format, ...) {
+ int r;
+
+ if (error < 0)
+ error = -error;
+
+ if (!e)
+ return -error;
+ if (error == 0)
+ return 0;
+
+ assert_return(!bus_error_is_dirty(e), -EINVAL);
+
+ if (format) {
+ va_list ap;
+
+ va_start(ap, format);
+ r = sd_bus_error_set_errnofv(e, error, format, ap);
+ va_end(ap);
+
+ return r;
+ }
+
+ return sd_bus_error_set_errno(e, error);
+}
+
+const char* _bus_error_message(const sd_bus_error *e, int error, char buf[static ERRNO_BUF_LEN]) {
+ /* Sometimes, the D-Bus server is a little bit too verbose with
+ * its error messages, so let's override them here */
+ if (sd_bus_error_has_name(e, SD_BUS_ERROR_ACCESS_DENIED))
+ return "Access denied";
+
+ if (e && e->message)
+ return e->message;
+
+ return strerror_r(abs(error), buf, ERRNO_BUF_LEN);
+}
+
+static bool map_ok(const sd_bus_error_map *map) {
+ for (; map->code != BUS_ERROR_MAP_END_MARKER; map++)
+ if (!map->name || map->code <= 0)
+ return false;
+ return true;
+}
+
+_public_ int sd_bus_error_add_map(const sd_bus_error_map *map) {
+ const sd_bus_error_map **maps = NULL;
+ unsigned n = 0;
+
+ assert_return(map, -EINVAL);
+ assert_return(map_ok(map), -EINVAL);
+
+ if (additional_error_maps)
+ for (; additional_error_maps[n] != NULL; n++)
+ if (additional_error_maps[n] == map)
+ return 0;
+
+ maps = reallocarray(additional_error_maps, n + 2, sizeof(struct sd_bus_error_map*));
+ if (!maps)
+ return -ENOMEM;
+
+ maps[n] = map;
+ maps[n+1] = NULL;
+
+ additional_error_maps = maps;
+ return 1;
+}
diff --git a/src/libsystemd/sd-bus/bus-error.h b/src/libsystemd/sd-bus/bus-error.h
new file mode 100644
index 0000000..c8768c9
--- /dev/null
+++ b/src/libsystemd/sd-bus/bus-error.h
@@ -0,0 +1,57 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+
+#include "sd-bus.h"
+
+#include "errno-util.h"
+#include "macro.h"
+
+bool bus_error_is_dirty(sd_bus_error *e);
+
+const char* _bus_error_message(const sd_bus_error *e, int error, char buf[static ERRNO_BUF_LEN]);
+
+/* Note: the lifetime of the compound literal is the immediately surrounding block,
+ * see C11 §6.5.2.5, and
+ * https://stackoverflow.com/questions/34880638/compound-literal-lifetime-and-if-blocks */
+#define bus_error_message(e, error) _bus_error_message(e, error, (char[ERRNO_BUF_LEN]){})
+
+#define BUS_ERROR_OOM SD_BUS_ERROR_MAKE_CONST(SD_BUS_ERROR_NO_MEMORY, "Out of memory")
+#define BUS_ERROR_FAILED SD_BUS_ERROR_MAKE_CONST(SD_BUS_ERROR_FAILED, "Operation failed")
+
+/*
+ * There are two ways to register error maps with the error translation
+ * logic: by using BUS_ERROR_MAP_ELF_REGISTER, which however only
+ * works when linked into the same ELF module, or via
+ * sd_bus_error_add_map() which is the official, external API, that
+ * works from any module.
+ *
+ * Note that BUS_ERROR_MAP_ELF_REGISTER has to be used as decorator in
+ * the bus error table, and BUS_ERROR_MAP_ELF_USE has to be used at
+ * least once per compilation unit (i.e. per library), to ensure that
+ * the error map is really added to the final binary.
+ *
+ * In addition, set the retain attribute so that the section cannot be
+ * discarded by ld --gc-sections -z start-stop-gc. Older compilers would
+ * warn for the unknown attribute, so just disable -Wattributes.
+ */
+
+#define BUS_ERROR_MAP_ELF_REGISTER \
+ _Pragma("GCC diagnostic ignored \"-Wattributes\"") \
+ _section_("SYSTEMD_BUS_ERROR_MAP") \
+ _used_ \
+ _retain_ \
+ _alignptr_ \
+ _variable_no_sanitize_address_
+
+#define BUS_ERROR_MAP_ELF_USE(errors) \
+ extern const sd_bus_error_map errors[]; \
+ _used_ \
+ static const sd_bus_error_map * const CONCATENATE(errors ## _copy_, __COUNTER__) = errors;
+
+/* We use something exotic as end marker, to ensure people build the
+ * maps using the macsd-ros. */
+#define BUS_ERROR_MAP_END_MARKER -'x'
+
+BUS_ERROR_MAP_ELF_USE(bus_standard_errors);
diff --git a/src/libsystemd/sd-bus/bus-internal.c b/src/libsystemd/sd-bus/bus-internal.c
new file mode 100644
index 0000000..a249b84
--- /dev/null
+++ b/src/libsystemd/sd-bus/bus-internal.c
@@ -0,0 +1,338 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "alloc-util.h"
+#include "bus-internal.h"
+#include "bus-message.h"
+#include "escape.h"
+#include "hexdecoct.h"
+#include "string-util.h"
+
+bool object_path_is_valid(const char *p) {
+ const char *q;
+ bool slash;
+
+ if (!p)
+ return false;
+
+ if (p[0] != '/')
+ return false;
+
+ if (p[1] == 0)
+ return true;
+
+ for (slash = true, q = p+1; *q; q++)
+ if (*q == '/') {
+ if (slash)
+ return false;
+
+ slash = true;
+ } else {
+ bool good;
+
+ good = ascii_isalpha(*q) ||
+ ascii_isdigit(*q) ||
+ *q == '_';
+
+ if (!good)
+ return false;
+
+ slash = false;
+ }
+
+ if (slash)
+ return false;
+
+ return (q - p) <= BUS_PATH_SIZE_MAX;
+}
+
+char* object_path_startswith(const char *a, const char *b) {
+ const char *p;
+
+ if (!object_path_is_valid(a) ||
+ !object_path_is_valid(b))
+ return NULL;
+
+ if (streq(b, "/"))
+ return (char*) a + 1;
+
+ p = startswith(a, b);
+ if (!p)
+ return NULL;
+
+ if (*p == 0)
+ return (char*) p;
+
+ if (*p == '/')
+ return (char*) p + 1;
+
+ return NULL;
+}
+
+bool interface_name_is_valid(const char *p) {
+ const char *q;
+ bool dot, found_dot = false;
+
+ if (isempty(p))
+ return false;
+
+ for (dot = true, q = p; *q; q++)
+ if (*q == '.') {
+ if (dot)
+ return false;
+
+ found_dot = dot = true;
+ } else {
+ bool good;
+
+ good =
+ ascii_isalpha(*q) ||
+ (!dot && ascii_isdigit(*q)) ||
+ *q == '_';
+
+ if (!good) {
+ if (DEBUG_LOGGING) {
+ _cleanup_free_ char *iface = cescape(p);
+ log_debug("The interface %s is invalid as it contains special character", strnull(iface));
+ }
+ return false;
+ }
+
+ dot = false;
+ }
+
+ if (q - p > SD_BUS_MAXIMUM_NAME_LENGTH)
+ return false;
+
+ if (dot)
+ return false;
+
+ if (!found_dot)
+ return false;
+
+ return true;
+}
+
+bool service_name_is_valid(const char *p) {
+ const char *q;
+ bool dot, found_dot = false, unique;
+
+ if (isempty(p))
+ return false;
+
+ unique = p[0] == ':';
+
+ for (dot = true, q = unique ? p+1 : p; *q; q++)
+ if (*q == '.') {
+ if (dot)
+ return false;
+
+ found_dot = dot = true;
+ } else {
+ bool good;
+
+ good =
+ ascii_isalpha(*q) ||
+ ((!dot || unique) && ascii_isdigit(*q)) ||
+ IN_SET(*q, '_', '-');
+
+ if (!good)
+ return false;
+
+ dot = false;
+ }
+
+ if (q - p > SD_BUS_MAXIMUM_NAME_LENGTH)
+ return false;
+
+ if (dot)
+ return false;
+
+ if (!found_dot)
+ return false;
+
+ return true;
+}
+
+bool member_name_is_valid(const char *p) {
+ const char *q;
+
+ if (isempty(p))
+ return false;
+
+ for (q = p; *q; q++) {
+ bool good;
+
+ good =
+ ascii_isalpha(*q) ||
+ ascii_isdigit(*q) ||
+ *q == '_';
+
+ if (!good)
+ return false;
+ }
+
+ if (q - p > SD_BUS_MAXIMUM_NAME_LENGTH)
+ return false;
+
+ return true;
+}
+
+/*
+ * Complex pattern match
+ * This checks whether @a is a 'complex-prefix' of @b, or @b is a
+ * 'complex-prefix' of @a, based on strings that consist of labels with @c as
+ * separator. This function returns true if:
+ * - both strings are equal
+ * - either is a prefix of the other and ends with @c
+ * The second rule makes sure that either string needs to be fully included in
+ * the other, and the string which is considered the prefix needs to end with a
+ * separator.
+ */
+static bool complex_pattern_check(char c, const char *a, const char *b) {
+ bool separator = false;
+
+ if (!a && !b)
+ return true;
+
+ if (!a || !b)
+ return false;
+
+ for (;;) {
+ if (*a != *b)
+ return (separator && (*a == 0 || *b == 0));
+
+ if (*a == 0)
+ return true;
+
+ separator = *a == c;
+
+ a++, b++;
+ }
+}
+
+bool namespace_complex_pattern(const char *pattern, const char *value) {
+ return complex_pattern_check('.', pattern, value);
+}
+
+bool path_complex_pattern(const char *pattern, const char *value) {
+ return complex_pattern_check('/', pattern, value);
+}
+
+/*
+ * Simple pattern match
+ * This checks whether @a is a 'simple-prefix' of @b, based on strings that
+ * consist of labels with @c as separator. This function returns true, if:
+ * - if @a and @b are equal
+ * - if @a is a prefix of @b, and the first following character in @b (or the
+ * last character in @a) is @c
+ * The second rule basically makes sure that if @a is a prefix of @b, then @b
+ * must follow with a new label separated by @c. It cannot extend the label.
+ */
+static bool simple_pattern_check(char c, const char *a, const char *b) {
+ bool separator = false;
+
+ if (!a && !b)
+ return true;
+
+ if (!a || !b)
+ return false;
+
+ for (;;) {
+ if (*a != *b)
+ return *a == 0 && (*b == c || separator);
+
+ if (*a == 0)
+ return true;
+
+ separator = *a == c;
+
+ a++, b++;
+ }
+}
+
+bool namespace_simple_pattern(const char *pattern, const char *value) {
+ return simple_pattern_check('.', pattern, value);
+}
+
+bool path_simple_pattern(const char *pattern, const char *value) {
+ return simple_pattern_check('/', pattern, value);
+}
+
+int bus_message_type_from_string(const char *s, uint8_t *u) {
+ if (streq(s, "signal"))
+ *u = SD_BUS_MESSAGE_SIGNAL;
+ else if (streq(s, "method_call"))
+ *u = SD_BUS_MESSAGE_METHOD_CALL;
+ else if (streq(s, "error"))
+ *u = SD_BUS_MESSAGE_METHOD_ERROR;
+ else if (streq(s, "method_return"))
+ *u = SD_BUS_MESSAGE_METHOD_RETURN;
+ else
+ return -EINVAL;
+
+ return 0;
+}
+
+const char *bus_message_type_to_string(uint8_t u) {
+ if (u == SD_BUS_MESSAGE_SIGNAL)
+ return "signal";
+ else if (u == SD_BUS_MESSAGE_METHOD_CALL)
+ return "method_call";
+ else if (u == SD_BUS_MESSAGE_METHOD_ERROR)
+ return "error";
+ else if (u == SD_BUS_MESSAGE_METHOD_RETURN)
+ return "method_return";
+ else
+ return NULL;
+}
+
+char *bus_address_escape(const char *v) {
+ const char *a;
+ char *r, *b;
+
+ r = new(char, strlen(v)*3+1);
+ if (!r)
+ return NULL;
+
+ for (a = v, b = r; *a; a++) {
+
+ if (ascii_isdigit(*a) ||
+ ascii_isalpha(*a) ||
+ strchr("_-/.", *a))
+ *(b++) = *a;
+ else {
+ *(b++) = '%';
+ *(b++) = hexchar(*a >> 4);
+ *(b++) = hexchar(*a & 0xF);
+ }
+ }
+
+ *b = 0;
+ return r;
+}
+
+int bus_maybe_reply_error(sd_bus_message *m, int r, sd_bus_error *error) {
+ assert(m);
+
+ if (sd_bus_error_is_set(error) || r < 0) {
+ if (m->header->type == SD_BUS_MESSAGE_METHOD_CALL)
+ sd_bus_reply_method_errno(m, r, error);
+ } else
+ return r;
+
+ log_debug("Failed to process message type=%s sender=%s destination=%s path=%s interface=%s member=%s cookie=%" PRIu64 " reply_cookie=%" PRIu64 " signature=%s error-name=%s error-message=%s: %s",
+ bus_message_type_to_string(m->header->type),
+ strna(sd_bus_message_get_sender(m)),
+ strna(sd_bus_message_get_destination(m)),
+ strna(sd_bus_message_get_path(m)),
+ strna(sd_bus_message_get_interface(m)),
+ strna(sd_bus_message_get_member(m)),
+ BUS_MESSAGE_COOKIE(m),
+ m->reply_cookie,
+ strna(m->root_container.signature),
+ strna(m->error.name),
+ strna(m->error.message),
+ bus_error_message(error, r));
+
+ return 1;
+}
diff --git a/src/libsystemd/sd-bus/bus-internal.h b/src/libsystemd/sd-bus/bus-internal.h
new file mode 100644
index 0000000..098a518
--- /dev/null
+++ b/src/libsystemd/sd-bus/bus-internal.h
@@ -0,0 +1,427 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <pthread.h>
+
+#include "sd-bus.h"
+
+#include "bus-error.h"
+#include "bus-kernel.h"
+#include "bus-match.h"
+#include "constants.h"
+#include "hashmap.h"
+#include "list.h"
+#include "prioq.h"
+#include "runtime-scope.h"
+#include "socket-util.h"
+#include "time-util.h"
+
+/* Note that we use the new /run prefix here (instead of /var/run) since we require them to be aliases and
+ * that way we become independent of /var being mounted */
+#define DEFAULT_SYSTEM_BUS_ADDRESS "unix:path=/run/dbus/system_bus_socket"
+#define DEFAULT_USER_BUS_ADDRESS_FMT "unix:path=%s/bus"
+
+struct reply_callback {
+ sd_bus_message_handler_t callback;
+ usec_t timeout_usec; /* this is a relative timeout until we reach the BUS_HELLO state, and an absolute one right after */
+ uint64_t cookie;
+ unsigned prioq_idx;
+};
+
+struct filter_callback {
+ sd_bus_message_handler_t callback;
+
+ unsigned last_iteration;
+
+ LIST_FIELDS(struct filter_callback, callbacks);
+};
+
+struct match_callback {
+ sd_bus_message_handler_t callback;
+ sd_bus_message_handler_t install_callback;
+
+ sd_bus_slot *install_slot; /* The AddMatch() call */
+
+ unsigned last_iteration;
+
+ /* Don't dispatch this slot with messages that arrived in any iteration before or at the this
+ * one. We use this to ensure that matches don't apply "retroactively" and confuse the caller:
+ * only messages received after the match was installed will be considered. */
+ uint64_t after;
+
+ char *match_string;
+
+ struct bus_match_node *match_node;
+};
+
+struct node {
+ char *path;
+ struct node *parent;
+ LIST_HEAD(struct node, child);
+ LIST_FIELDS(struct node, siblings);
+
+ LIST_HEAD(struct node_callback, callbacks);
+ LIST_HEAD(struct node_vtable, vtables);
+ LIST_HEAD(struct node_enumerator, enumerators);
+ LIST_HEAD(struct node_object_manager, object_managers);
+};
+
+struct node_callback {
+ struct node *node;
+
+ bool is_fallback:1;
+ unsigned last_iteration;
+
+ sd_bus_message_handler_t callback;
+
+ LIST_FIELDS(struct node_callback, callbacks);
+};
+
+struct node_enumerator {
+ struct node *node;
+
+ sd_bus_node_enumerator_t callback;
+
+ unsigned last_iteration;
+
+ LIST_FIELDS(struct node_enumerator, enumerators);
+};
+
+struct node_object_manager {
+ struct node *node;
+
+ LIST_FIELDS(struct node_object_manager, object_managers);
+};
+
+struct node_vtable {
+ struct node *node;
+
+ bool is_fallback:1;
+ unsigned last_iteration;
+
+ char *interface;
+ const sd_bus_vtable *vtable;
+ sd_bus_object_find_t find;
+
+ LIST_FIELDS(struct node_vtable, vtables);
+};
+
+struct vtable_member {
+ const char *path;
+ const char *interface;
+ const char *member;
+ struct node_vtable *parent;
+ unsigned last_iteration;
+ const sd_bus_vtable *vtable;
+};
+
+typedef enum BusSlotType {
+ BUS_REPLY_CALLBACK,
+ BUS_FILTER_CALLBACK,
+ BUS_MATCH_CALLBACK,
+ BUS_NODE_CALLBACK,
+ BUS_NODE_ENUMERATOR,
+ BUS_NODE_VTABLE,
+ BUS_NODE_OBJECT_MANAGER,
+ _BUS_SLOT_INVALID = -EINVAL,
+} BusSlotType;
+
+struct sd_bus_slot {
+ unsigned n_ref;
+ BusSlotType type:8;
+
+ /* Slots can be "floating" or not. If they are not floating (the usual case) then they reference the
+ * bus object they are associated with. This means the bus object stays allocated at least as long as
+ * there is a slot around associated with it. If it is floating, then the slot's lifecycle is bound
+ * to the lifecycle of the bus: it will be disconnected from the bus when the bus is destroyed, and
+ * it keeping the slot reffed hence won't mean the bus stays reffed too. Internally this means the
+ * reference direction is reversed: floating slots objects are referenced by the bus object, and not
+ * vice versa. */
+ bool floating;
+ bool match_added;
+
+ sd_bus *bus;
+ void *userdata;
+ sd_bus_destroy_t destroy_callback;
+
+ char *description;
+
+ LIST_FIELDS(sd_bus_slot, slots);
+
+ union {
+ struct reply_callback reply_callback;
+ struct filter_callback filter_callback;
+ struct match_callback match_callback;
+ struct node_callback node_callback;
+ struct node_enumerator node_enumerator;
+ struct node_object_manager node_object_manager;
+ struct node_vtable node_vtable;
+ };
+};
+
+enum bus_state {
+ BUS_UNSET,
+ BUS_WATCH_BIND, /* waiting for the socket to appear via inotify */
+ BUS_OPENING, /* the kernel's connect() is still not ready */
+ BUS_AUTHENTICATING, /* we are currently in the "SASL" authorization phase of dbus */
+ BUS_HELLO, /* we are waiting for the Hello() response */
+ BUS_RUNNING,
+ BUS_CLOSING,
+ BUS_CLOSED,
+ _BUS_STATE_MAX,
+};
+
+static inline bool BUS_IS_OPEN(enum bus_state state) {
+ return state > BUS_UNSET && state < BUS_CLOSING;
+}
+
+enum bus_auth {
+ _BUS_AUTH_INVALID,
+ BUS_AUTH_EXTERNAL,
+ BUS_AUTH_ANONYMOUS
+};
+
+struct sd_bus {
+ unsigned n_ref;
+
+ enum bus_state state;
+ int input_fd, output_fd;
+ int inotify_fd;
+ int message_version;
+ int message_endian;
+
+ bool can_fds:1;
+ bool bus_client:1;
+ bool ucred_valid:1;
+ bool is_server:1;
+ bool anonymous_auth:1;
+ bool prefer_readv:1;
+ bool prefer_writev:1;
+ bool match_callbacks_modified:1;
+ bool filter_callbacks_modified:1;
+ bool nodes_modified:1;
+ bool trusted:1;
+ bool manual_peer_interface:1;
+ bool allow_interactive_authorization:1;
+ bool exit_on_disconnect:1;
+ bool exited:1;
+ bool exit_triggered:1;
+ bool is_local:1;
+ bool watch_bind:1;
+ bool is_monitor:1;
+ bool accept_fd:1;
+ bool attach_timestamp:1;
+ bool connected_signal:1;
+ bool close_on_exit:1;
+
+ RuntimeScope runtime_scope;
+
+ signed int use_memfd:2;
+
+ void *rbuffer;
+ size_t rbuffer_size;
+
+ sd_bus_message **rqueue;
+ size_t rqueue_size;
+
+ sd_bus_message **wqueue;
+ size_t wqueue_size;
+ size_t windex;
+
+ uint64_t cookie;
+ uint64_t read_counter; /* A counter for each incoming msg */
+
+ char *unique_name;
+ uint64_t unique_id;
+
+ struct bus_match_node match_callbacks;
+ Prioq *reply_callbacks_prioq;
+ OrderedHashmap *reply_callbacks;
+ LIST_HEAD(struct filter_callback, filter_callbacks);
+
+ Hashmap *nodes;
+ Hashmap *vtable_methods;
+ Hashmap *vtable_properties;
+
+ union sockaddr_union sockaddr;
+ socklen_t sockaddr_size;
+
+ pid_t nspid;
+ char *machine;
+
+ sd_id128_t server_id;
+
+ char *address;
+ unsigned address_index;
+
+ int last_connect_error;
+
+ enum bus_auth auth;
+ unsigned auth_index;
+ struct iovec auth_iovec[3];
+ size_t auth_rbegin;
+ char *auth_buffer;
+ usec_t auth_timeout;
+
+ struct ucred ucred;
+ char *label;
+ gid_t *groups;
+ size_t n_groups;
+ union sockaddr_union sockaddr_peer;
+ socklen_t sockaddr_size_peer;
+
+ uint64_t creds_mask;
+
+ int *fds;
+ size_t n_fds;
+
+ char *exec_path;
+ char **exec_argv;
+
+ /* We do locking around the memfd cache, since we want to
+ * allow people to process a sd_bus_message in a different
+ * thread then it was generated on and free it there. Since
+ * adding something to the memfd cache might happen when a
+ * message is released, we hence need to protect this bit with
+ * a mutex. */
+ pthread_mutex_t memfd_cache_mutex;
+ struct memfd_cache memfd_cache[MEMFD_CACHE_MAX];
+ unsigned n_memfd_cache;
+
+ uint64_t origin_id;
+ pid_t busexec_pid;
+
+ unsigned iteration_counter;
+
+ sd_event_source *input_io_event_source;
+ sd_event_source *output_io_event_source;
+ sd_event_source *time_event_source;
+ sd_event_source *quit_event_source;
+ sd_event_source *inotify_event_source;
+ sd_event *event;
+ int event_priority;
+
+ pid_t tid;
+
+ sd_bus_message *current_message;
+ sd_bus_slot *current_slot;
+ sd_bus_message_handler_t current_handler;
+ void *current_userdata;
+
+ sd_bus **default_bus_ptr;
+
+ char *description;
+ char *patch_sender;
+
+ sd_bus_track *track_queue;
+
+ LIST_HEAD(sd_bus_slot, slots);
+ LIST_HEAD(sd_bus_track, tracks);
+
+ int *inotify_watches;
+ size_t n_inotify_watches;
+
+ /* zero means use value specified by $SYSTEMD_BUS_TIMEOUT= environment variable or built-in default */
+ usec_t method_call_timeout;
+};
+
+/* For method calls we timeout at 25s, like in the D-Bus reference implementation */
+#define BUS_DEFAULT_TIMEOUT ((usec_t) (25 * USEC_PER_SEC))
+
+/* For the authentication phase we grant 90s, to provide extra room during boot, when RNGs and such are not filled up
+ * with enough entropy yet and might delay the boot */
+#define BUS_AUTH_TIMEOUT ((usec_t) DEFAULT_TIMEOUT_USEC)
+
+#define BUS_WQUEUE_MAX (384*1024)
+#define BUS_RQUEUE_MAX (384*1024)
+
+#define BUS_MESSAGE_SIZE_MAX (128*1024*1024)
+#define BUS_AUTH_SIZE_MAX (64*1024)
+/* Note that the D-Bus specification states that bus paths shall have no size limit. We enforce here one
+ * anyway, since truly unbounded strings are a security problem. The limit we pick is relatively large however,
+ * to not clash unnecessarily with real-life applications. */
+#define BUS_PATH_SIZE_MAX (64*1024)
+
+#define BUS_CONTAINER_DEPTH 128
+
+/* Defined by the specification as maximum size of an array in bytes */
+#define BUS_ARRAY_MAX_SIZE 67108864
+
+#define BUS_FDS_MAX 1024
+
+#define BUS_EXEC_ARGV_MAX 256
+
+bool interface_name_is_valid(const char *p) _pure_;
+bool service_name_is_valid(const char *p) _pure_;
+bool member_name_is_valid(const char *p) _pure_;
+bool object_path_is_valid(const char *p) _pure_;
+
+char *object_path_startswith(const char *a, const char *b) _pure_;
+
+bool namespace_complex_pattern(const char *pattern, const char *value) _pure_;
+bool path_complex_pattern(const char *pattern, const char *value) _pure_;
+
+bool namespace_simple_pattern(const char *pattern, const char *value) _pure_;
+bool path_simple_pattern(const char *pattern, const char *value) _pure_;
+
+int bus_message_type_from_string(const char *s, uint8_t *u);
+const char *bus_message_type_to_string(uint8_t u) _pure_;
+
+#define error_name_is_valid interface_name_is_valid
+
+sd_bus *bus_resolve(sd_bus *bus);
+
+int bus_ensure_running(sd_bus *bus);
+int bus_start_running(sd_bus *bus);
+int bus_next_address(sd_bus *bus);
+
+int bus_seal_synthetic_message(sd_bus *b, sd_bus_message *m);
+
+int bus_rqueue_make_room(sd_bus *bus);
+
+bool bus_origin_changed(sd_bus *bus);
+
+char *bus_address_escape(const char *v);
+
+int bus_attach_io_events(sd_bus *b);
+int bus_attach_inotify_event(sd_bus *b);
+
+void bus_close_inotify_fd(sd_bus *b);
+void bus_close_io_fds(sd_bus *b);
+
+int bus_add_match_full(
+ sd_bus *bus,
+ sd_bus_slot **slot,
+ bool asynchronous,
+ const char *match,
+ sd_bus_message_handler_t callback,
+ sd_bus_message_handler_t install_callback,
+ void *userdata,
+ uint64_t timeout_usec);
+
+#define OBJECT_PATH_FOREACH_PREFIX(prefix, path) \
+ for (char *_slash = ({ strcpy((prefix), (path)); streq((prefix), "/") ? NULL : strrchr((prefix), '/'); }) ; \
+ _slash && ((_slash[(_slash) == (prefix)] = 0), true); \
+ _slash = streq((prefix), "/") ? NULL : strrchr((prefix), '/'))
+
+/* If we are invoking callbacks of a bus object, ensure unreffing the
+ * bus from the callback doesn't destroy the object we are working on */
+#define BUS_DONT_DESTROY(bus) \
+ _cleanup_(sd_bus_unrefp) _unused_ sd_bus *_dont_destroy_##bus = sd_bus_ref(bus)
+
+int bus_set_address_system(sd_bus *bus);
+int bus_set_address_user(sd_bus *bus);
+int bus_set_address_system_remote(sd_bus *b, const char *host);
+int bus_set_address_machine(sd_bus *b, RuntimeScope runtime_scope, const char *machine);
+
+int bus_maybe_reply_error(sd_bus_message *m, int r, sd_bus_error *error);
+
+#define bus_assert_return(expr, r, error) \
+ do { \
+ if (!assert_log(expr, #expr)) \
+ return sd_bus_error_set_errno(error, r); \
+ } while (false)
+
+void bus_enter_closing(sd_bus *bus);
+
+void bus_set_state(sd_bus *bus, enum bus_state state);
diff --git a/src/libsystemd/sd-bus/bus-introspect.c b/src/libsystemd/sd-bus/bus-introspect.c
new file mode 100644
index 0000000..84c8774
--- /dev/null
+++ b/src/libsystemd/sd-bus/bus-introspect.c
@@ -0,0 +1,290 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "bus-internal.h"
+#include "bus-introspect.h"
+#include "bus-objects.h"
+#include "bus-protocol.h"
+#include "bus-signature.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "memory-util.h"
+#include "memstream-util.h"
+#include "string-util.h"
+
+#define BUS_INTROSPECT_DOCTYPE \
+ "<!DOCTYPE node PUBLIC \"-//freedesktop//DTD D-BUS Object Introspection 1.0//EN\"\n" \
+ "\"https://www.freedesktop.org/standards/dbus/1.0/introspect.dtd\">\n"
+
+#define BUS_INTROSPECT_INTERFACE_PEER \
+ " <interface name=\"org.freedesktop.DBus.Peer\">\n" \
+ " <method name=\"Ping\"/>\n" \
+ " <method name=\"GetMachineId\">\n" \
+ " <arg type=\"s\" name=\"machine_uuid\" direction=\"out\"/>\n" \
+ " </method>\n" \
+ " </interface>\n"
+
+#define BUS_INTROSPECT_INTERFACE_INTROSPECTABLE \
+ " <interface name=\"org.freedesktop.DBus.Introspectable\">\n" \
+ " <method name=\"Introspect\">\n" \
+ " <arg name=\"xml_data\" type=\"s\" direction=\"out\"/>\n" \
+ " </method>\n" \
+ " </interface>\n"
+
+#define BUS_INTROSPECT_INTERFACE_PROPERTIES \
+ " <interface name=\"org.freedesktop.DBus.Properties\">\n" \
+ " <method name=\"Get\">\n" \
+ " <arg name=\"interface_name\" direction=\"in\" type=\"s\"/>\n" \
+ " <arg name=\"property_name\" direction=\"in\" type=\"s\"/>\n" \
+ " <arg name=\"value\" direction=\"out\" type=\"v\"/>\n" \
+ " </method>\n" \
+ " <method name=\"GetAll\">\n" \
+ " <arg name=\"interface_name\" direction=\"in\" type=\"s\"/>\n" \
+ " <arg name=\"props\" direction=\"out\" type=\"a{sv}\"/>\n" \
+ " </method>\n" \
+ " <method name=\"Set\">\n" \
+ " <arg name=\"interface_name\" direction=\"in\" type=\"s\"/>\n" \
+ " <arg name=\"property_name\" direction=\"in\" type=\"s\"/>\n" \
+ " <arg name=\"value\" direction=\"in\" type=\"v\"/>\n" \
+ " </method>\n" \
+ " <signal name=\"PropertiesChanged\">\n" \
+ " <arg type=\"s\" name=\"interface_name\"/>\n" \
+ " <arg type=\"a{sv}\" name=\"changed_properties\"/>\n" \
+ " <arg type=\"as\" name=\"invalidated_properties\"/>\n" \
+ " </signal>\n" \
+ " </interface>\n"
+
+#define BUS_INTROSPECT_INTERFACE_OBJECT_MANAGER \
+ " <interface name=\"org.freedesktop.DBus.ObjectManager\">\n" \
+ " <method name=\"GetManagedObjects\">\n" \
+ " <arg type=\"a{oa{sa{sv}}}\" name=\"object_paths_interfaces_and_properties\" direction=\"out\"/>\n" \
+ " </method>\n" \
+ " <signal name=\"InterfacesAdded\">\n" \
+ " <arg type=\"o\" name=\"object_path\"/>\n" \
+ " <arg type=\"a{sa{sv}}\" name=\"interfaces_and_properties\"/>\n" \
+ " </signal>\n" \
+ " <signal name=\"InterfacesRemoved\">\n" \
+ " <arg type=\"o\" name=\"object_path\"/>\n" \
+ " <arg type=\"as\" name=\"interfaces\"/>\n" \
+ " </signal>\n" \
+ " </interface>\n"
+
+int introspect_begin(struct introspect *i, bool trusted) {
+ FILE *f;
+
+ assert(i);
+
+ *i = (struct introspect) {
+ .trusted = trusted,
+ };
+
+ f = memstream_init(&i->m);
+ if (!f)
+ return -ENOMEM;
+
+ fputs(BUS_INTROSPECT_DOCTYPE
+ "<node>\n", f);
+
+ return 0;
+}
+
+int introspect_write_default_interfaces(struct introspect *i, bool object_manager) {
+ assert(i);
+ assert(i->m.f);
+
+ fputs(BUS_INTROSPECT_INTERFACE_PEER
+ BUS_INTROSPECT_INTERFACE_INTROSPECTABLE
+ BUS_INTROSPECT_INTERFACE_PROPERTIES, i->m.f);
+
+ if (object_manager)
+ fputs(BUS_INTROSPECT_INTERFACE_OBJECT_MANAGER, i->m.f);
+
+ return 0;
+}
+
+static int set_interface_name(struct introspect *i, const char *interface_name) {
+ assert(i);
+ assert(i->m.f);
+
+ if (streq_ptr(i->interface_name, interface_name))
+ return 0;
+
+ if (i->interface_name)
+ fputs(" </interface>\n", i->m.f);
+
+ if (interface_name)
+ fprintf(i->m.f, " <interface name=\"%s\">\n", interface_name);
+
+ return free_and_strdup(&i->interface_name, interface_name);
+}
+
+int introspect_write_child_nodes(struct introspect *i, OrderedSet *s, const char *prefix) {
+ char *node;
+
+ assert(i);
+ assert(i->m.f);
+ assert(prefix);
+
+ assert_se(set_interface_name(i, NULL) >= 0);
+
+ while ((node = ordered_set_steal_first(s))) {
+ const char *e;
+
+ e = object_path_startswith(node, prefix);
+ if (e && e[0])
+ fprintf(i->m.f, " <node name=\"%s\"/>\n", e);
+
+ free(node);
+ }
+
+ return 0;
+}
+
+static void introspect_write_flags(struct introspect *i, int type, uint64_t flags) {
+ assert(i);
+ assert(i->m.f);
+
+ if (flags & SD_BUS_VTABLE_DEPRECATED)
+ fputs(" <annotation name=\"org.freedesktop.DBus.Deprecated\" value=\"true\"/>\n", i->m.f);
+
+ if (type == _SD_BUS_VTABLE_METHOD && (flags & SD_BUS_VTABLE_METHOD_NO_REPLY))
+ fputs(" <annotation name=\"org.freedesktop.DBus.Method.NoReply\" value=\"true\"/>\n", i->m.f);
+
+ if (IN_SET(type, _SD_BUS_VTABLE_PROPERTY, _SD_BUS_VTABLE_WRITABLE_PROPERTY)) {
+ if (flags & SD_BUS_VTABLE_PROPERTY_EXPLICIT)
+ fputs(" <annotation name=\"org.freedesktop.systemd1.Explicit\" value=\"true\"/>\n", i->m.f);
+
+ if (flags & SD_BUS_VTABLE_PROPERTY_CONST)
+ fputs(" <annotation name=\"org.freedesktop.DBus.Property.EmitsChangedSignal\" value=\"const\"/>\n", i->m.f);
+ else if (flags & SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION)
+ fputs(" <annotation name=\"org.freedesktop.DBus.Property.EmitsChangedSignal\" value=\"invalidates\"/>\n", i->m.f);
+ else if (!(flags & SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE))
+ fputs(" <annotation name=\"org.freedesktop.DBus.Property.EmitsChangedSignal\" value=\"false\"/>\n", i->m.f);
+ }
+
+ if (!i->trusted &&
+ IN_SET(type, _SD_BUS_VTABLE_METHOD, _SD_BUS_VTABLE_WRITABLE_PROPERTY) &&
+ !(flags & SD_BUS_VTABLE_UNPRIVILEGED))
+ fputs(" <annotation name=\"org.freedesktop.systemd1.Privileged\" value=\"true\"/>\n", i->m.f);
+}
+
+/* Note that "names" is both an input and an output parameter. It initially points to the first argument name in a
+ NULL-separated list of strings, and is then advanced with each argument, and the resulting pointer is returned. */
+static int introspect_write_arguments(struct introspect *i, const char *signature, const char **names, const char *direction) {
+ int r;
+
+ assert(i);
+ assert(i->m.f);
+
+ for (;;) {
+ size_t l;
+
+ if (!*signature)
+ return 0;
+
+ r = signature_element_length(signature, &l);
+ if (r < 0)
+ return r;
+
+ fprintf(i->m.f, " <arg type=\"%.*s\"", (int) l, signature);
+
+ if (**names != '\0') {
+ fprintf(i->m.f, " name=\"%s\"", *names);
+ *names += strlen(*names) + 1;
+ }
+
+ if (direction)
+ fprintf(i->m.f, " direction=\"%s\"/>\n", direction);
+ else
+ fputs("/>\n", i->m.f);
+
+ signature += l;
+ }
+}
+
+int introspect_write_interface(
+ struct introspect *i,
+ const char *interface_name,
+ const sd_bus_vtable *v) {
+
+ const sd_bus_vtable *vtable = ASSERT_PTR(v);
+ const char *names = "";
+ int r;
+
+ assert(i);
+ assert(i->m.f);
+ assert(interface_name);
+
+ r = set_interface_name(i, interface_name);
+ if (r < 0)
+ return r;
+
+ for (; v->type != _SD_BUS_VTABLE_END; v = bus_vtable_next(vtable, v)) {
+
+ /* Ignore methods, signals and properties that are
+ * marked "hidden", but do show the interface
+ * itself */
+
+ if (v->type != _SD_BUS_VTABLE_START && (v->flags & SD_BUS_VTABLE_HIDDEN))
+ continue;
+
+ switch (v->type) {
+
+ case _SD_BUS_VTABLE_START:
+ if (v->flags & SD_BUS_VTABLE_DEPRECATED)
+ fputs(" <annotation name=\"org.freedesktop.DBus.Deprecated\" value=\"true\"/>\n", i->m.f);
+ break;
+
+ case _SD_BUS_VTABLE_METHOD:
+ fprintf(i->m.f, " <method name=\"%s\">\n", v->x.method.member);
+ if (bus_vtable_has_names(vtable))
+ names = strempty(v->x.method.names);
+ introspect_write_arguments(i, strempty(v->x.method.signature), &names, "in");
+ introspect_write_arguments(i, strempty(v->x.method.result), &names, "out");
+ introspect_write_flags(i, v->type, v->flags);
+ fputs(" </method>\n", i->m.f);
+ break;
+
+ case _SD_BUS_VTABLE_PROPERTY:
+ case _SD_BUS_VTABLE_WRITABLE_PROPERTY:
+ fprintf(i->m.f, " <property name=\"%s\" type=\"%s\" access=\"%s\">\n",
+ v->x.property.member,
+ v->x.property.signature,
+ v->type == _SD_BUS_VTABLE_WRITABLE_PROPERTY ? "readwrite" : "read");
+ introspect_write_flags(i, v->type, v->flags);
+ fputs(" </property>\n", i->m.f);
+ break;
+
+ case _SD_BUS_VTABLE_SIGNAL:
+ fprintf(i->m.f, " <signal name=\"%s\">\n", v->x.signal.member);
+ if (bus_vtable_has_names(vtable))
+ names = strempty(v->x.signal.names);
+ introspect_write_arguments(i, strempty(v->x.signal.signature), &names, NULL);
+ introspect_write_flags(i, v->type, v->flags);
+ fputs(" </signal>\n", i->m.f);
+ break;
+ }
+
+ }
+
+ return 0;
+}
+
+int introspect_finish(struct introspect *i, char **ret) {
+ assert(i);
+ assert(i->m.f);
+
+ assert_se(set_interface_name(i, NULL) >= 0);
+
+ fputs("</node>\n", i->m.f);
+
+ return memstream_finalize(&i->m, ret, NULL);
+}
+
+void introspect_done(struct introspect *i) {
+ assert(i);
+
+ /* Normally introspect_finish() does all the work, this is just a backup for error paths */
+
+ memstream_done(&i->m);
+ free(i->interface_name);
+}
diff --git a/src/libsystemd/sd-bus/bus-introspect.h b/src/libsystemd/sd-bus/bus-introspect.h
new file mode 100644
index 0000000..83bcfb2
--- /dev/null
+++ b/src/libsystemd/sd-bus/bus-introspect.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdio.h>
+
+#include "sd-bus.h"
+
+#include "memstream-util.h"
+#include "ordered-set.h"
+
+struct introspect {
+ MemStream m;
+ char *interface_name;
+ bool trusted;
+};
+
+int introspect_begin(struct introspect *i, bool trusted);
+int introspect_write_default_interfaces(struct introspect *i, bool object_manager);
+int introspect_write_child_nodes(struct introspect *i, OrderedSet *s, const char *prefix);
+int introspect_write_interface(
+ struct introspect *i,
+ const char *interface_name,
+ const sd_bus_vtable *v);
+int introspect_finish(struct introspect *i, char **ret);
+void introspect_done(struct introspect *i);
diff --git a/src/libsystemd/sd-bus/bus-kernel.c b/src/libsystemd/sd-bus/bus-kernel.c
new file mode 100644
index 0000000..d7ff834
--- /dev/null
+++ b/src/libsystemd/sd-bus/bus-kernel.c
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#if HAVE_VALGRIND_MEMCHECK_H
+#include <valgrind/memcheck.h>
+#endif
+
+#include <fcntl.h>
+#include <malloc.h>
+#include <sys/mman.h>
+#include <sys/prctl.h>
+
+#include "alloc-util.h"
+#include "bus-internal.h"
+#include "bus-kernel.h"
+#include "bus-label.h"
+#include "bus-message.h"
+#include "capability-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "format-util.h"
+#include "memfd-util.h"
+#include "parse-util.h"
+#include "stdio-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "user-util.h"
+#include "memory-util.h"
+
+void close_and_munmap(int fd, void *address, size_t size) {
+ if (size > 0) {
+ size = PAGE_ALIGN(size);
+ assert(size < SIZE_MAX);
+ assert_se(munmap(address, size) >= 0);
+ }
+
+ safe_close(fd);
+}
+
+void bus_flush_memfd(sd_bus *b) {
+ assert(b);
+
+ for (unsigned i = 0; i < b->n_memfd_cache; i++)
+ close_and_munmap(b->memfd_cache[i].fd, b->memfd_cache[i].address, b->memfd_cache[i].mapped);
+}
diff --git a/src/libsystemd/sd-bus/bus-kernel.h b/src/libsystemd/sd-bus/bus-kernel.h
new file mode 100644
index 0000000..be8e0ce
--- /dev/null
+++ b/src/libsystemd/sd-bus/bus-kernel.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "sd-bus.h"
+
+#define MEMFD_CACHE_MAX 32
+
+/* When we cache a memfd block for reuse, we will truncate blocks
+ * longer than this in order not to keep too much data around. */
+#define MEMFD_CACHE_ITEM_SIZE_MAX (128*1024)
+
+/* This determines at which minimum size we prefer sending memfds over
+ * sending vectors */
+#define MEMFD_MIN_SIZE (512*1024)
+
+struct memfd_cache {
+ int fd;
+ void *address;
+ size_t mapped;
+ size_t allocated;
+};
+
+void close_and_munmap(int fd, void *address, size_t size);
+void bus_flush_memfd(sd_bus *bus);
diff --git a/src/libsystemd/sd-bus/bus-match.c b/src/libsystemd/sd-bus/bus-match.c
new file mode 100644
index 0000000..606304d
--- /dev/null
+++ b/src/libsystemd/sd-bus/bus-match.c
@@ -0,0 +1,1058 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "alloc-util.h"
+#include "bus-internal.h"
+#include "bus-match.h"
+#include "bus-message.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "hexdecoct.h"
+#include "memstream-util.h"
+#include "sort-util.h"
+#include "string-util.h"
+#include "strv.h"
+
+/* Example:
+ *
+ * A: type=signal,sender=foo,interface=bar
+ * B: type=signal,sender=quux,interface=fips
+ * C: type=signal,sender=quux,interface=waldo
+ * D: type=signal,member=test
+ * E: sender=miau
+ * F: type=signal
+ * G: type=signal
+ *
+ * results in this tree:
+ *
+ * BUS_MATCH_ROOT
+ * + BUS_MATCH_MESSAGE_TYPE
+ * | ` BUS_MATCH_VALUE: value == signal
+ * | + DBUS_MATCH_SENDER
+ * | | + BUS_MATCH_VALUE: value == foo
+ * | | | ` DBUS_MATCH_INTERFACE
+ * | | | ` BUS_MATCH_VALUE: value == bar
+ * | | | ` BUS_MATCH_LEAF: A
+ * | | ` BUS_MATCH_VALUE: value == quux
+ * | | ` DBUS_MATCH_INTERFACE
+ * | | | BUS_MATCH_VALUE: value == fips
+ * | | | ` BUS_MATCH_LEAF: B
+ * | | ` BUS_MATCH_VALUE: value == waldo
+ * | | ` BUS_MATCH_LEAF: C
+ * | + DBUS_MATCH_MEMBER
+ * | | ` BUS_MATCH_VALUE: value == test
+ * | | ` BUS_MATCH_LEAF: D
+ * | + BUS_MATCH_LEAF: F
+ * | ` BUS_MATCH_LEAF: G
+ * ` BUS_MATCH_SENDER
+ * ` BUS_MATCH_VALUE: value == miau
+ * ` BUS_MATCH_LEAF: E
+ */
+
+static bool BUS_MATCH_IS_COMPARE(enum bus_match_node_type t) {
+ return t >= BUS_MATCH_SENDER && t <= BUS_MATCH_ARG_HAS_LAST;
+}
+
+static bool BUS_MATCH_CAN_HASH(enum bus_match_node_type t) {
+ return (t >= BUS_MATCH_MESSAGE_TYPE && t <= BUS_MATCH_PATH) ||
+ (t >= BUS_MATCH_ARG && t <= BUS_MATCH_ARG_LAST) ||
+ (t >= BUS_MATCH_ARG_HAS && t <= BUS_MATCH_ARG_HAS_LAST);
+}
+
+static void bus_match_node_free(struct bus_match_node *node) {
+ assert(node);
+ assert(node->parent);
+ assert(!node->child);
+ assert(node->type != BUS_MATCH_ROOT);
+ assert(node->type < _BUS_MATCH_NODE_TYPE_MAX);
+
+ if (node->parent->child) {
+ /* We are apparently linked into the parent's child
+ * list. Let's remove us from there. */
+ if (node->prev) {
+ assert(node->prev->next == node);
+ node->prev->next = node->next;
+ } else {
+ assert(node->parent->child == node);
+ node->parent->child = node->next;
+ }
+
+ if (node->next)
+ node->next->prev = node->prev;
+ }
+
+ if (node->type == BUS_MATCH_VALUE) {
+ /* We might be in the parent's hash table, so clean
+ * this up */
+
+ if (node->parent->type == BUS_MATCH_MESSAGE_TYPE)
+ hashmap_remove(node->parent->compare.children, UINT_TO_PTR(node->value.u8));
+ else if (BUS_MATCH_CAN_HASH(node->parent->type) && node->value.str)
+ hashmap_remove(node->parent->compare.children, node->value.str);
+
+ free(node->value.str);
+ }
+
+ if (BUS_MATCH_IS_COMPARE(node->type)) {
+ assert(hashmap_isempty(node->compare.children));
+ hashmap_free(node->compare.children);
+ }
+
+ free(node);
+}
+
+static bool bus_match_node_maybe_free(struct bus_match_node *node) {
+ assert(node);
+
+ if (node->type == BUS_MATCH_ROOT)
+ return false;
+
+ if (node->child)
+ return false;
+
+ if (BUS_MATCH_IS_COMPARE(node->type) && !hashmap_isempty(node->compare.children))
+ return true;
+
+ bus_match_node_free(node);
+ return true;
+}
+
+static bool value_node_test(
+ struct bus_match_node *node,
+ enum bus_match_node_type parent_type,
+ uint8_t value_u8,
+ const char *value_str,
+ char **value_strv,
+ sd_bus_message *m) {
+
+ assert(node);
+ assert(node->type == BUS_MATCH_VALUE);
+
+ /* Tests parameters against this value node, doing prefix
+ * magic and stuff. */
+
+ switch (parent_type) {
+
+ case BUS_MATCH_MESSAGE_TYPE:
+ return node->value.u8 == value_u8;
+
+ case BUS_MATCH_SENDER:
+ if (streq_ptr(node->value.str, value_str))
+ return true;
+
+ if (m->creds.mask & SD_BUS_CREDS_WELL_KNOWN_NAMES) {
+ /* on kdbus we have the well known names list
+ * in the credentials, let's make use of that
+ * for an accurate match */
+
+ STRV_FOREACH(i, m->creds.well_known_names)
+ if (streq_ptr(node->value.str, *i))
+ return true;
+
+ } else {
+
+ /* If we don't have kdbus, we don't know the
+ * well-known names of the senders. In that,
+ * let's just hope that dbus-daemon doesn't
+ * send us stuff we didn't want. */
+
+ if (node->value.str[0] != ':' && value_str && value_str[0] == ':')
+ return true;
+ }
+
+ return false;
+
+ case BUS_MATCH_DESTINATION:
+ case BUS_MATCH_INTERFACE:
+ case BUS_MATCH_MEMBER:
+ case BUS_MATCH_PATH:
+ case BUS_MATCH_ARG ... BUS_MATCH_ARG_LAST:
+
+ if (value_str)
+ return streq_ptr(node->value.str, value_str);
+
+ return false;
+
+ case BUS_MATCH_ARG_HAS ... BUS_MATCH_ARG_HAS_LAST: {
+ STRV_FOREACH(i, value_strv)
+ if (streq_ptr(node->value.str, *i))
+ return true;
+
+ return false;
+ }
+
+ case BUS_MATCH_ARG_NAMESPACE ... BUS_MATCH_ARG_NAMESPACE_LAST:
+ if (value_str)
+ return namespace_simple_pattern(node->value.str, value_str);
+
+ return false;
+
+ case BUS_MATCH_PATH_NAMESPACE:
+ return path_simple_pattern(node->value.str, value_str);
+
+ case BUS_MATCH_ARG_PATH ... BUS_MATCH_ARG_PATH_LAST:
+ if (value_str)
+ return path_complex_pattern(node->value.str, value_str);
+
+ return false;
+
+ default:
+ assert_not_reached();
+ }
+}
+
+static bool value_node_same(
+ struct bus_match_node *node,
+ enum bus_match_node_type parent_type,
+ uint8_t value_u8,
+ const char *value_str) {
+
+ /* Tests parameters against this value node, not doing prefix
+ * magic and stuff, i.e. this one actually compares the match
+ * itself. */
+
+ assert(node);
+ assert(node->type == BUS_MATCH_VALUE);
+
+ switch (parent_type) {
+
+ case BUS_MATCH_MESSAGE_TYPE:
+ return node->value.u8 == value_u8;
+
+ case BUS_MATCH_SENDER:
+ case BUS_MATCH_DESTINATION:
+ case BUS_MATCH_INTERFACE:
+ case BUS_MATCH_MEMBER:
+ case BUS_MATCH_PATH:
+ case BUS_MATCH_ARG ... BUS_MATCH_ARG_LAST:
+ case BUS_MATCH_ARG_HAS ... BUS_MATCH_ARG_HAS_LAST:
+ case BUS_MATCH_ARG_NAMESPACE ... BUS_MATCH_ARG_NAMESPACE_LAST:
+ case BUS_MATCH_PATH_NAMESPACE:
+ case BUS_MATCH_ARG_PATH ... BUS_MATCH_ARG_PATH_LAST:
+ return streq(node->value.str, value_str);
+
+ default:
+ assert_not_reached();
+ }
+}
+
+int bus_match_run(
+ sd_bus *bus,
+ struct bus_match_node *node,
+ sd_bus_message *m) {
+
+ _cleanup_strv_free_ char **test_strv = NULL;
+ const char *test_str = NULL;
+ uint8_t test_u8 = 0;
+ int r;
+
+ assert(m);
+
+ if (!node)
+ return 0;
+
+ if (bus && bus->match_callbacks_modified)
+ return 0;
+
+ /* Not these special semantics: when traversing the tree we
+ * usually let bus_match_run() when called for a node
+ * recursively invoke bus_match_run(). There's are two
+ * exceptions here though, which are BUS_NODE_ROOT (which
+ * cannot have a sibling), and BUS_NODE_VALUE (whose siblings
+ * are invoked anyway by its parent. */
+
+ switch (node->type) {
+
+ case BUS_MATCH_ROOT:
+
+ /* Run all children. Since we cannot have any siblings
+ * we won't call any. The children of the root node
+ * are compares or leaves, they will automatically
+ * call their siblings. */
+ return bus_match_run(bus, node->child, m);
+
+ case BUS_MATCH_VALUE:
+
+ /* Run all children. We don't execute any siblings, we
+ * assume our caller does that. The children of value
+ * nodes are compares or leaves, they will
+ * automatically call their siblings */
+
+ assert(node->child);
+ return bus_match_run(bus, node->child, m);
+
+ case BUS_MATCH_LEAF:
+
+ if (bus) {
+ /* Don't run this match as long as the AddMatch() call is not complete yet.
+ *
+ * Don't run this match unless the 'after' counter has been reached.
+ *
+ * Don't run this match more than once per iteration */
+
+ if (node->leaf.callback->install_slot ||
+ m->read_counter <= node->leaf.callback->after ||
+ node->leaf.callback->last_iteration == bus->iteration_counter)
+ return bus_match_run(bus, node->next, m);
+
+ node->leaf.callback->last_iteration = bus->iteration_counter;
+ }
+
+ r = sd_bus_message_rewind(m, true);
+ if (r < 0)
+ return r;
+
+ /* Run the callback. And then invoke siblings. */
+ if (node->leaf.callback->callback) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error_buffer = SD_BUS_ERROR_NULL;
+ sd_bus_slot *slot;
+
+ slot = container_of(node->leaf.callback, sd_bus_slot, match_callback);
+ if (bus) {
+ bus->current_slot = sd_bus_slot_ref(slot);
+ bus->current_handler = node->leaf.callback->callback;
+ bus->current_userdata = slot->userdata;
+ }
+ r = node->leaf.callback->callback(m, slot->userdata, &error_buffer);
+ if (bus) {
+ bus->current_userdata = NULL;
+ bus->current_handler = NULL;
+ bus->current_slot = sd_bus_slot_unref(slot);
+ }
+
+ r = bus_maybe_reply_error(m, r, &error_buffer);
+ if (r != 0)
+ return r;
+
+ if (bus && bus->match_callbacks_modified)
+ return 0;
+ }
+
+ return bus_match_run(bus, node->next, m);
+
+ case BUS_MATCH_MESSAGE_TYPE:
+ test_u8 = m->header->type;
+ break;
+
+ case BUS_MATCH_SENDER:
+ test_str = m->sender;
+ /* FIXME: resolve test_str from a well-known to a unique name first */
+ break;
+
+ case BUS_MATCH_DESTINATION:
+ test_str = m->destination;
+ break;
+
+ case BUS_MATCH_INTERFACE:
+ test_str = m->interface;
+ break;
+
+ case BUS_MATCH_MEMBER:
+ test_str = m->member;
+ break;
+
+ case BUS_MATCH_PATH:
+ case BUS_MATCH_PATH_NAMESPACE:
+ test_str = m->path;
+ break;
+
+ case BUS_MATCH_ARG ... BUS_MATCH_ARG_LAST:
+ (void) bus_message_get_arg(m, node->type - BUS_MATCH_ARG, &test_str);
+ break;
+
+ case BUS_MATCH_ARG_PATH ... BUS_MATCH_ARG_PATH_LAST:
+ (void) bus_message_get_arg(m, node->type - BUS_MATCH_ARG_PATH, &test_str);
+ break;
+
+ case BUS_MATCH_ARG_NAMESPACE ... BUS_MATCH_ARG_NAMESPACE_LAST:
+ (void) bus_message_get_arg(m, node->type - BUS_MATCH_ARG_NAMESPACE, &test_str);
+ break;
+
+ case BUS_MATCH_ARG_HAS ... BUS_MATCH_ARG_HAS_LAST:
+ (void) bus_message_get_arg_strv(m, node->type - BUS_MATCH_ARG_HAS, &test_strv);
+ break;
+
+ default:
+ assert_not_reached();
+ }
+
+ if (BUS_MATCH_CAN_HASH(node->type)) {
+ struct bus_match_node *found;
+
+ /* Lookup via hash table, nice! So let's jump directly. */
+
+ if (test_str)
+ found = hashmap_get(node->compare.children, test_str);
+ else if (test_strv) {
+ STRV_FOREACH(i, test_strv) {
+ found = hashmap_get(node->compare.children, *i);
+ if (found) {
+ r = bus_match_run(bus, found, m);
+ if (r != 0)
+ return r;
+ }
+ }
+
+ found = NULL;
+ } else if (node->type == BUS_MATCH_MESSAGE_TYPE)
+ found = hashmap_get(node->compare.children, UINT_TO_PTR(test_u8));
+ else
+ found = NULL;
+
+ if (found) {
+ r = bus_match_run(bus, found, m);
+ if (r != 0)
+ return r;
+ }
+ } else
+ /* No hash table, so let's iterate manually... */
+ for (struct bus_match_node *c = node->child; c; c = c->next) {
+ if (!value_node_test(c, node->type, test_u8, test_str, test_strv, m))
+ continue;
+
+ r = bus_match_run(bus, c, m);
+ if (r != 0)
+ return r;
+
+ if (bus && bus->match_callbacks_modified)
+ return 0;
+ }
+
+ if (bus && bus->match_callbacks_modified)
+ return 0;
+
+ /* And now, let's invoke our siblings */
+ return bus_match_run(bus, node->next, m);
+}
+
+static int bus_match_add_compare_value(
+ struct bus_match_node *where,
+ enum bus_match_node_type t,
+ uint8_t value_u8,
+ const char *value_str,
+ struct bus_match_node **ret) {
+
+ struct bus_match_node *c, *n = NULL;
+ int r;
+
+ assert(where);
+ assert(IN_SET(where->type, BUS_MATCH_ROOT, BUS_MATCH_VALUE));
+ assert(BUS_MATCH_IS_COMPARE(t));
+ assert(ret);
+
+ for (c = where->child; c && c->type != t; c = c->next)
+ ;
+
+ if (c) {
+ /* Comparison node already exists? Then let's see if the value node exists too. */
+
+ if (t == BUS_MATCH_MESSAGE_TYPE)
+ n = hashmap_get(c->compare.children, UINT_TO_PTR(value_u8));
+ else if (BUS_MATCH_CAN_HASH(t))
+ n = hashmap_get(c->compare.children, value_str);
+ else
+ for (n = c->child; n && !value_node_same(n, t, value_u8, value_str); n = n->next)
+ ;
+
+ if (n) {
+ *ret = n;
+ return 0;
+ }
+ } else {
+ /* Comparison node, doesn't exist yet? Then let's create it. */
+
+ c = new0(struct bus_match_node, 1);
+ if (!c) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ c->type = t;
+ c->parent = where;
+ c->next = where->child;
+ if (c->next)
+ c->next->prev = c;
+ where->child = c;
+
+ if (t == BUS_MATCH_MESSAGE_TYPE) {
+ c->compare.children = hashmap_new(NULL);
+ if (!c->compare.children) {
+ r = -ENOMEM;
+ goto fail;
+ }
+ } else if (BUS_MATCH_CAN_HASH(t)) {
+ c->compare.children = hashmap_new(&string_hash_ops);
+ if (!c->compare.children) {
+ r = -ENOMEM;
+ goto fail;
+ }
+ }
+ }
+
+ n = new0(struct bus_match_node, 1);
+ if (!n) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ n->type = BUS_MATCH_VALUE;
+ n->value.u8 = value_u8;
+ if (value_str) {
+ n->value.str = strdup(value_str);
+ if (!n->value.str) {
+ r = -ENOMEM;
+ goto fail;
+ }
+ }
+
+ n->parent = c;
+ if (c->compare.children) {
+
+ if (t == BUS_MATCH_MESSAGE_TYPE)
+ r = hashmap_put(c->compare.children, UINT_TO_PTR(value_u8), n);
+ else
+ r = hashmap_put(c->compare.children, n->value.str, n);
+
+ if (r < 0)
+ goto fail;
+ } else {
+ n->next = c->child;
+ if (n->next)
+ n->next->prev = n;
+ c->child = n;
+ }
+
+ *ret = n;
+ return 1;
+
+fail:
+ if (c)
+ bus_match_node_maybe_free(c);
+
+ if (n) {
+ free(n->value.str);
+ free(n);
+ }
+
+ return r;
+}
+
+static int bus_match_add_leaf(
+ struct bus_match_node *where,
+ struct match_callback *callback) {
+
+ struct bus_match_node *n;
+
+ assert(where);
+ assert(IN_SET(where->type, BUS_MATCH_ROOT, BUS_MATCH_VALUE));
+ assert(callback);
+
+ n = new0(struct bus_match_node, 1);
+ if (!n)
+ return -ENOMEM;
+
+ n->type = BUS_MATCH_LEAF;
+ n->parent = where;
+ n->next = where->child;
+ if (n->next)
+ n->next->prev = n;
+
+ n->leaf.callback = callback;
+ callback->match_node = n;
+
+ where->child = n;
+
+ return 1;
+}
+
+enum bus_match_node_type bus_match_node_type_from_string(const char *k, size_t n) {
+ assert(k);
+
+ if (n == 4 && startswith(k, "type"))
+ return BUS_MATCH_MESSAGE_TYPE;
+ if (n == 6 && startswith(k, "sender"))
+ return BUS_MATCH_SENDER;
+ if (n == 11 && startswith(k, "destination"))
+ return BUS_MATCH_DESTINATION;
+ if (n == 9 && startswith(k, "interface"))
+ return BUS_MATCH_INTERFACE;
+ if (n == 6 && startswith(k, "member"))
+ return BUS_MATCH_MEMBER;
+ if (n == 4 && startswith(k, "path"))
+ return BUS_MATCH_PATH;
+ if (n == 14 && startswith(k, "path_namespace"))
+ return BUS_MATCH_PATH_NAMESPACE;
+
+ if (n == 4 && startswith(k, "arg")) {
+ int j;
+
+ j = undecchar(k[3]);
+ if (j < 0)
+ return -EINVAL;
+
+ return BUS_MATCH_ARG + j;
+ }
+
+ if (n == 5 && startswith(k, "arg")) {
+ int a, b;
+ enum bus_match_node_type t;
+
+ a = undecchar(k[3]);
+ b = undecchar(k[4]);
+ if (a <= 0 || b < 0)
+ return -EINVAL;
+
+ t = BUS_MATCH_ARG + a * 10 + b;
+ if (t > BUS_MATCH_ARG_LAST)
+ return -EINVAL;
+
+ return t;
+ }
+
+ if (n == 8 && startswith(k, "arg") && startswith(k + 4, "path")) {
+ int j;
+
+ j = undecchar(k[3]);
+ if (j < 0)
+ return -EINVAL;
+
+ return BUS_MATCH_ARG_PATH + j;
+ }
+
+ if (n == 9 && startswith(k, "arg") && startswith(k + 5, "path")) {
+ enum bus_match_node_type t;
+ int a, b;
+
+ a = undecchar(k[3]);
+ b = undecchar(k[4]);
+ if (a <= 0 || b < 0)
+ return -EINVAL;
+
+ t = BUS_MATCH_ARG_PATH + a * 10 + b;
+ if (t > BUS_MATCH_ARG_PATH_LAST)
+ return -EINVAL;
+
+ return t;
+ }
+
+ if (n == 13 && startswith(k, "arg") && startswith(k + 4, "namespace")) {
+ int j;
+
+ j = undecchar(k[3]);
+ if (j < 0)
+ return -EINVAL;
+
+ return BUS_MATCH_ARG_NAMESPACE + j;
+ }
+
+ if (n == 14 && startswith(k, "arg") && startswith(k + 5, "namespace")) {
+ enum bus_match_node_type t;
+ int a, b;
+
+ a = undecchar(k[3]);
+ b = undecchar(k[4]);
+ if (a <= 0 || b < 0)
+ return -EINVAL;
+
+ t = BUS_MATCH_ARG_NAMESPACE + a * 10 + b;
+ if (t > BUS_MATCH_ARG_NAMESPACE_LAST)
+ return -EINVAL;
+
+ return t;
+ }
+
+ if (n == 7 && startswith(k, "arg") && startswith(k + 4, "has")) {
+ int j;
+
+ j = undecchar(k[3]);
+ if (j < 0)
+ return -EINVAL;
+
+ return BUS_MATCH_ARG_HAS + j;
+ }
+
+ if (n == 8 && startswith(k, "arg") && startswith(k + 5, "has")) {
+ enum bus_match_node_type t;
+ int a, b;
+
+ a = undecchar(k[3]);
+ b = undecchar(k[4]);
+ if (a <= 0 || b < 0)
+ return -EINVAL;
+
+ t = BUS_MATCH_ARG_HAS + a * 10 + b;
+ if (t > BUS_MATCH_ARG_HAS_LAST)
+ return -EINVAL;
+
+ return t;
+ }
+
+ return -EINVAL;
+}
+
+static int match_component_compare(const struct bus_match_component *a, const struct bus_match_component *b) {
+ return CMP(a->type, b->type);
+}
+
+void bus_match_parse_free(struct bus_match_component *components, size_t n_components) {
+ for (size_t i = 0; i < n_components; i++)
+ free(components[i].value_str);
+
+ free(components);
+}
+
+int bus_match_parse(
+ const char *match,
+ struct bus_match_component **ret_components,
+ size_t *ret_n_components) {
+
+ struct bus_match_component *components = NULL;
+ size_t n_components = 0;
+ int r;
+
+ assert(match);
+ assert(ret_components);
+ assert(ret_n_components);
+
+ CLEANUP_ARRAY(components, n_components, bus_match_parse_free);
+
+ while (*match != '\0') {
+ const char *eq, *q;
+ enum bus_match_node_type t;
+ size_t j = 0;
+ _cleanup_free_ char *value = NULL;
+ bool escaped = false, quoted;
+ uint8_t u;
+
+ /* Avahi's match rules appear to include whitespace, skip over it */
+ match += strspn(match, " ");
+
+ eq = strchr(match, '=');
+ if (!eq)
+ return -EINVAL;
+
+ t = bus_match_node_type_from_string(match, eq - match);
+ if (t < 0)
+ return -EINVAL;
+
+ quoted = eq[1] == '\'';
+
+ for (q = eq + 1 + quoted;; q++) {
+
+ if (*q == '\0') {
+
+ if (quoted)
+ return -EINVAL;
+
+ if (value)
+ value[j] = '\0';
+ break;
+ }
+
+ if (!escaped) {
+ if (*q == '\\') {
+ escaped = true;
+ continue;
+ }
+
+ if (quoted) {
+ if (*q == '\'') {
+ if (value)
+ value[j] = '\0';
+ break;
+ }
+ } else {
+ if (*q == ',') {
+ if (value)
+ value[j] = '\0';
+ break;
+ }
+ }
+ }
+
+ if (!GREEDY_REALLOC(value, j + 2))
+ return -ENOMEM;
+
+ value[j++] = *q;
+ escaped = false;
+ }
+
+ if (!value) {
+ value = strdup("");
+ if (!value)
+ return -ENOMEM;
+ }
+
+ if (t == BUS_MATCH_MESSAGE_TYPE) {
+ r = bus_message_type_from_string(value, &u);
+ if (r < 0)
+ return r;
+
+ value = mfree(value);
+ } else
+ u = 0;
+
+ if (!GREEDY_REALLOC(components, n_components + 1))
+ return -ENOMEM;
+
+ components[n_components++] = (struct bus_match_component) {
+ .type = t,
+ .value_str = TAKE_PTR(value),
+ .value_u8 = u,
+ };
+
+ if (q[quoted] == 0)
+ break;
+
+ if (q[quoted] != ',')
+ return -EINVAL;
+
+ match = q + 1 + quoted;
+ }
+
+ /* Order the whole thing, so that we always generate the same tree */
+ typesafe_qsort(components, n_components, match_component_compare);
+
+ /* Check for duplicates */
+ for (size_t i = 0; i+1 < n_components; i++)
+ if (components[i].type == components[i+1].type)
+ return -EINVAL;
+
+ *ret_components = TAKE_PTR(components);
+ *ret_n_components = n_components;
+
+ return 0;
+}
+
+char *bus_match_to_string(struct bus_match_component *components, size_t n_components) {
+ _cleanup_(memstream_done) MemStream m = {};
+ FILE *f;
+ int r;
+
+ if (n_components <= 0)
+ return strdup("");
+
+ assert(components);
+
+ f = memstream_init(&m);
+ if (!f)
+ return NULL;
+
+ for (size_t i = 0; i < n_components; i++) {
+ char buf[32];
+
+ if (i != 0)
+ fputc(',', f);
+
+ fputs(bus_match_node_type_to_string(components[i].type, buf, sizeof(buf)), f);
+ fputc('=', f);
+ fputc('\'', f);
+
+ if (components[i].type == BUS_MATCH_MESSAGE_TYPE)
+ fputs(bus_message_type_to_string(components[i].value_u8), f);
+ else
+ fputs(components[i].value_str, f);
+
+ fputc('\'', f);
+ }
+
+ char *buffer;
+ r = memstream_finalize(&m, &buffer, NULL);
+ if (r < 0)
+ return NULL;
+
+ return buffer;
+}
+
+int bus_match_add(
+ struct bus_match_node *root,
+ struct bus_match_component *components,
+ size_t n_components,
+ struct match_callback *callback) {
+
+ int r;
+
+ assert(root);
+ assert(callback);
+
+ for (size_t i = 0; i < n_components; i++) {
+ r = bus_match_add_compare_value(root,
+ components[i].type,
+ components[i].value_u8,
+ components[i].value_str,
+ &root);
+ if (r < 0)
+ return r;
+ }
+
+ return bus_match_add_leaf(root, callback);
+}
+
+int bus_match_remove(
+ struct bus_match_node *root,
+ struct match_callback *callback) {
+
+ struct bus_match_node *node, *pp;
+
+ assert(root);
+ assert(callback);
+
+ node = callback->match_node;
+ if (!node)
+ return 0;
+
+ assert(node->type == BUS_MATCH_LEAF);
+
+ callback->match_node = NULL;
+
+ /* Free the leaf */
+ pp = node->parent;
+ bus_match_node_free(node);
+
+ /* Prune the tree above */
+ while (pp) {
+ node = pp;
+ pp = node->parent;
+
+ if (!bus_match_node_maybe_free(node))
+ break;
+ }
+
+ return 1;
+}
+
+void bus_match_free(struct bus_match_node *node) {
+ struct bus_match_node *c;
+
+ if (!node)
+ return;
+
+ if (BUS_MATCH_CAN_HASH(node->type)) {
+
+ HASHMAP_FOREACH(c, node->compare.children)
+ bus_match_free(c);
+
+ assert(hashmap_isempty(node->compare.children));
+ }
+
+ while ((c = node->child))
+ bus_match_free(c);
+
+ if (node->type != BUS_MATCH_ROOT)
+ bus_match_node_free(node);
+}
+
+const char* bus_match_node_type_to_string(enum bus_match_node_type t, char buf[], size_t l) {
+ switch (t) {
+
+ case BUS_MATCH_ROOT:
+ return "root";
+
+ case BUS_MATCH_VALUE:
+ return "value";
+
+ case BUS_MATCH_LEAF:
+ return "leaf";
+
+ case BUS_MATCH_MESSAGE_TYPE:
+ return "type";
+
+ case BUS_MATCH_SENDER:
+ return "sender";
+
+ case BUS_MATCH_DESTINATION:
+ return "destination";
+
+ case BUS_MATCH_INTERFACE:
+ return "interface";
+
+ case BUS_MATCH_MEMBER:
+ return "member";
+
+ case BUS_MATCH_PATH:
+ return "path";
+
+ case BUS_MATCH_PATH_NAMESPACE:
+ return "path_namespace";
+
+ case BUS_MATCH_ARG ... BUS_MATCH_ARG_LAST:
+ return snprintf_ok(buf, l, "arg%i", t - BUS_MATCH_ARG);
+
+ case BUS_MATCH_ARG_PATH ... BUS_MATCH_ARG_PATH_LAST:
+ return snprintf_ok(buf, l, "arg%ipath", t - BUS_MATCH_ARG_PATH);
+
+ case BUS_MATCH_ARG_NAMESPACE ... BUS_MATCH_ARG_NAMESPACE_LAST:
+ return snprintf_ok(buf, l, "arg%inamespace", t - BUS_MATCH_ARG_NAMESPACE);
+
+ case BUS_MATCH_ARG_HAS ... BUS_MATCH_ARG_HAS_LAST:
+ return snprintf_ok(buf, l, "arg%ihas", t - BUS_MATCH_ARG_HAS);
+
+ default:
+ return NULL;
+ }
+}
+
+void bus_match_dump(FILE *out, struct bus_match_node *node, unsigned level) {
+ char buf[32];
+
+ if (!node)
+ return;
+
+ fprintf(out, "%*s[%s]", 2 * (int) level, "", bus_match_node_type_to_string(node->type, buf, sizeof(buf)));
+
+ if (node->type == BUS_MATCH_VALUE) {
+ if (node->parent->type == BUS_MATCH_MESSAGE_TYPE)
+ fprintf(out, " <%u>\n", node->value.u8);
+ else
+ fprintf(out, " <%s>\n", node->value.str);
+ } else if (node->type == BUS_MATCH_ROOT)
+ fputs(" root\n", out);
+ else if (node->type == BUS_MATCH_LEAF)
+ fprintf(out, " %p/%p\n", node->leaf.callback->callback,
+ container_of(node->leaf.callback, sd_bus_slot, match_callback)->userdata);
+ else
+ putc('\n', out);
+
+ if (BUS_MATCH_CAN_HASH(node->type)) {
+ struct bus_match_node *c;
+ HASHMAP_FOREACH(c, node->compare.children)
+ bus_match_dump(out, c, level + 1);
+ }
+
+ for (struct bus_match_node *c = node->child; c; c = c->next)
+ bus_match_dump(out, c, level + 1);
+}
+
+enum bus_match_scope bus_match_get_scope(const struct bus_match_component *components, size_t n_components) {
+ bool found_driver = false;
+
+ if (n_components <= 0)
+ return BUS_MATCH_GENERIC;
+
+ assert(components);
+
+ /* Checks whether the specified match can only match the
+ * pseudo-service for local messages, which we detect by
+ * sender, interface or path. If a match is not restricted to
+ * local messages, then we check if it only matches on the
+ * driver. */
+
+ for (size_t i = 0; i < n_components; i++) {
+ const struct bus_match_component *c = components + i;
+
+ if (c->type == BUS_MATCH_SENDER) {
+ if (streq_ptr(c->value_str, "org.freedesktop.DBus.Local"))
+ return BUS_MATCH_LOCAL;
+
+ if (streq_ptr(c->value_str, "org.freedesktop.DBus"))
+ found_driver = true;
+ }
+
+ if (c->type == BUS_MATCH_INTERFACE && streq_ptr(c->value_str, "org.freedesktop.DBus.Local"))
+ return BUS_MATCH_LOCAL;
+
+ if (c->type == BUS_MATCH_PATH && streq_ptr(c->value_str, "/org/freedesktop/DBus/Local"))
+ return BUS_MATCH_LOCAL;
+ }
+
+ return found_driver ? BUS_MATCH_DRIVER : BUS_MATCH_GENERIC;
+}
diff --git a/src/libsystemd/sd-bus/bus-match.h b/src/libsystemd/sd-bus/bus-match.h
new file mode 100644
index 0000000..ccb6aae
--- /dev/null
+++ b/src/libsystemd/sd-bus/bus-match.h
@@ -0,0 +1,82 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdio.h>
+
+#include "sd-bus.h"
+
+#include "hashmap.h"
+
+enum bus_match_node_type {
+ BUS_MATCH_ROOT,
+ BUS_MATCH_VALUE,
+ BUS_MATCH_LEAF,
+
+ /* The following are all different kinds of compare nodes */
+ BUS_MATCH_SENDER,
+ BUS_MATCH_MESSAGE_TYPE,
+ BUS_MATCH_DESTINATION,
+ BUS_MATCH_INTERFACE,
+ BUS_MATCH_MEMBER,
+ BUS_MATCH_PATH,
+ BUS_MATCH_PATH_NAMESPACE,
+ BUS_MATCH_ARG,
+ BUS_MATCH_ARG_LAST = BUS_MATCH_ARG + 63,
+ BUS_MATCH_ARG_PATH,
+ BUS_MATCH_ARG_PATH_LAST = BUS_MATCH_ARG_PATH + 63,
+ BUS_MATCH_ARG_NAMESPACE,
+ BUS_MATCH_ARG_NAMESPACE_LAST = BUS_MATCH_ARG_NAMESPACE + 63,
+ BUS_MATCH_ARG_HAS,
+ BUS_MATCH_ARG_HAS_LAST = BUS_MATCH_ARG_HAS + 63,
+ _BUS_MATCH_NODE_TYPE_MAX,
+ _BUS_MATCH_NODE_TYPE_INVALID = -EINVAL,
+};
+
+struct bus_match_node {
+ enum bus_match_node_type type;
+ struct bus_match_node *parent, *next, *prev, *child;
+
+ union {
+ struct {
+ char *str;
+ uint8_t u8;
+ } value;
+ struct {
+ struct match_callback *callback;
+ } leaf;
+ struct {
+ /* If this is set, then the child is NULL */
+ Hashmap *children;
+ } compare;
+ };
+};
+
+struct bus_match_component {
+ enum bus_match_node_type type;
+ uint8_t value_u8;
+ char *value_str;
+};
+
+enum bus_match_scope {
+ BUS_MATCH_GENERIC,
+ BUS_MATCH_LOCAL,
+ BUS_MATCH_DRIVER,
+};
+
+int bus_match_run(sd_bus *bus, struct bus_match_node *root, sd_bus_message *m);
+
+int bus_match_add(struct bus_match_node *root, struct bus_match_component *components, size_t n_components, struct match_callback *callback);
+int bus_match_remove(struct bus_match_node *root, struct match_callback *callback);
+
+void bus_match_free(struct bus_match_node *node);
+
+void bus_match_dump(FILE *out, struct bus_match_node *node, unsigned level);
+
+const char* bus_match_node_type_to_string(enum bus_match_node_type t, char buf[], size_t l);
+enum bus_match_node_type bus_match_node_type_from_string(const char *k, size_t n);
+
+int bus_match_parse(const char *match, struct bus_match_component **ret_components, size_t *ret_n_components);
+void bus_match_parse_free(struct bus_match_component *components, size_t n_components);
+char *bus_match_to_string(struct bus_match_component *components, size_t n_components);
+
+enum bus_match_scope bus_match_get_scope(const struct bus_match_component *components, size_t n_components);
diff --git a/src/libsystemd/sd-bus/bus-message.c b/src/libsystemd/sd-bus/bus-message.c
new file mode 100644
index 0000000..ab8b068
--- /dev/null
+++ b/src/libsystemd/sd-bus/bus-message.c
@@ -0,0 +1,4712 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <sys/mman.h>
+
+#include "sd-bus.h"
+
+#include "alloc-util.h"
+#include "bus-internal.h"
+#include "bus-message.h"
+#include "bus-signature.h"
+#include "bus-type.h"
+#include "fd-util.h"
+#include "iovec-util.h"
+#include "memfd-util.h"
+#include "memory-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "time-util.h"
+#include "utf8.h"
+
+static int message_append_basic(sd_bus_message *m, char type, const void *p, const void **stored);
+static int message_parse_fields(sd_bus_message *m);
+
+static void *adjust_pointer(const void *p, void *old_base, size_t sz, void *new_base) {
+
+ if (!p)
+ return NULL;
+
+ if (old_base == new_base)
+ return (void*) p;
+
+ if ((uint8_t*) p < (uint8_t*) old_base)
+ return (void*) p;
+
+ if ((uint8_t*) p >= (uint8_t*) old_base + sz)
+ return (void*) p;
+
+ return (uint8_t*) new_base + ((uint8_t*) p - (uint8_t*) old_base);
+}
+
+static void message_free_part(sd_bus_message *m, struct bus_body_part *part) {
+ assert(m);
+ assert(part);
+
+ if (part->memfd >= 0) {
+ /* erase if requested, but only if the memfd is not sealed yet, i.e. is writable */
+ if (m->sensitive && !m->sealed)
+ explicit_bzero_safe(part->data, part->size);
+
+ close_and_munmap(part->memfd, part->mmap_begin, part->mapped);
+ } else if (part->munmap_this)
+ /* We don't erase sensitive data here, since the data is memory mapped from someone else, and
+ * we just don't know if it's OK to write to it */
+ munmap(part->mmap_begin, part->mapped);
+ else {
+ /* Erase this if that is requested. Since this is regular memory we know we can write it. */
+ if (m->sensitive)
+ explicit_bzero_safe(part->data, part->size);
+
+ if (part->free_this)
+ free(part->data);
+ }
+
+ if (part != &m->body)
+ free(part);
+}
+
+static void message_reset_parts(sd_bus_message *m) {
+ struct bus_body_part *part;
+
+ assert(m);
+
+ part = &m->body;
+ while (m->n_body_parts > 0) {
+ struct bus_body_part *next = part->next;
+ message_free_part(m, part);
+ part = next;
+ m->n_body_parts--;
+ }
+
+ m->body_end = NULL;
+
+ m->cached_rindex_part = NULL;
+ m->cached_rindex_part_begin = 0;
+}
+
+static struct bus_container *message_get_last_container(sd_bus_message *m) {
+ assert(m);
+
+ if (m->n_containers == 0)
+ return &m->root_container;
+
+ assert(m->containers);
+ return m->containers + m->n_containers - 1;
+}
+
+static void message_free_last_container(sd_bus_message *m) {
+ struct bus_container *c;
+
+ c = message_get_last_container(m);
+
+ free(c->signature);
+ free(c->peeked_signature);
+
+ /* Move to previous container, but not if we are on root container */
+ if (m->n_containers > 0)
+ m->n_containers--;
+}
+
+static void message_reset_containers(sd_bus_message *m) {
+ assert(m);
+
+ while (m->n_containers > 0)
+ message_free_last_container(m);
+
+ m->containers = mfree(m->containers);
+ m->root_container.index = 0;
+}
+
+static sd_bus_message* message_free(sd_bus_message *m) {
+ assert(m);
+
+ message_reset_parts(m);
+
+ if (m->free_header)
+ free(m->header);
+
+ /* Note that we don't unref m->bus here. That's already done by sd_bus_message_unref() as each user
+ * reference to the bus message also is considered a reference to the bus connection itself. */
+
+ if (m->free_fds) {
+ close_many(m->fds, m->n_fds);
+ free(m->fds);
+ }
+
+ if (m->iovec != m->iovec_fixed)
+ free(m->iovec);
+
+ message_reset_containers(m);
+ assert(m->n_containers == 0);
+ message_free_last_container(m);
+
+ bus_creds_done(&m->creds);
+ return mfree(m);
+}
+
+static void *message_extend_fields(sd_bus_message *m, size_t sz, bool add_offset) {
+ void *op, *np;
+ size_t old_size, new_size, start;
+
+ assert(m);
+
+ if (m->poisoned)
+ return NULL;
+
+ old_size = sizeof(struct bus_header) + m->fields_size;
+ start = ALIGN8(old_size);
+ new_size = start + sz;
+
+ if (new_size < start || new_size > UINT32_MAX)
+ goto poison;
+
+ if (old_size == new_size)
+ return (uint8_t*) m->header + old_size;
+
+ if (m->free_header) {
+ np = realloc(m->header, ALIGN8(new_size));
+ if (!np)
+ goto poison;
+ } else {
+ /* Initially, the header is allocated as part of
+ * the sd_bus_message itself, let's replace it by
+ * dynamic data */
+
+ np = malloc(ALIGN8(new_size));
+ if (!np)
+ goto poison;
+
+ memcpy(np, m->header, sizeof(struct bus_header));
+ }
+
+ /* Zero out padding */
+ if (start > old_size)
+ memzero((uint8_t*) np + old_size, start - old_size);
+
+ op = m->header;
+ m->header = np;
+ m->fields_size = new_size - sizeof(struct bus_header);
+
+ /* Adjust quick access pointers */
+ m->path = adjust_pointer(m->path, op, old_size, m->header);
+ m->interface = adjust_pointer(m->interface, op, old_size, m->header);
+ m->member = adjust_pointer(m->member, op, old_size, m->header);
+ m->destination = adjust_pointer(m->destination, op, old_size, m->header);
+ m->sender = adjust_pointer(m->sender, op, old_size, m->header);
+ m->error.name = adjust_pointer(m->error.name, op, old_size, m->header);
+
+ m->free_header = true;
+
+ if (add_offset) {
+ if (m->n_header_offsets >= ELEMENTSOF(m->header_offsets))
+ goto poison;
+
+ m->header_offsets[m->n_header_offsets++] = new_size - sizeof(struct bus_header);
+ }
+
+ return (uint8_t*) np + start;
+
+poison:
+ m->poisoned = true;
+ return NULL;
+}
+
+static int message_append_field_string(
+ sd_bus_message *m,
+ uint64_t h,
+ char type,
+ const char *s,
+ const char **ret) {
+
+ size_t l;
+ uint8_t *p;
+
+ assert(m);
+
+ /* dbus only allows 8-bit header field ids */
+ if (h > 0xFF)
+ return -EINVAL;
+
+ /* dbus doesn't allow strings over 32-bit */
+ l = strlen(s);
+ if (l > UINT32_MAX)
+ return -EINVAL;
+
+ /* Signature "(yv)" where the variant contains "s" */
+
+ /* (field id byte + (signature length + signature 's' + NUL) + (string length + string + NUL)) */
+ p = message_extend_fields(m, 4 + 4 + l + 1, false);
+ if (!p)
+ return -ENOMEM;
+
+ p[0] = (uint8_t) h;
+ p[1] = 1;
+ p[2] = type;
+ p[3] = 0;
+
+ ((uint32_t*) p)[1] = l;
+ memcpy(p + 8, s, l + 1);
+
+ if (ret)
+ *ret = (char*) p + 8;
+
+ return 0;
+}
+
+static int message_append_field_signature(
+ sd_bus_message *m,
+ uint64_t h,
+ const char *s,
+ const char **ret) {
+
+ size_t l;
+ uint8_t *p;
+
+ assert(m);
+
+ /* dbus only allows 8-bit header field ids */
+ if (h > 0xFF)
+ return -EINVAL;
+
+ /* dbus doesn't allow signatures over 8-bit */
+ l = strlen(s);
+ if (l > SD_BUS_MAXIMUM_SIGNATURE_LENGTH)
+ return -EINVAL;
+
+ /* Signature "(yv)" where the variant contains "g" */
+
+ /* (field id byte + (signature length + signature 'g' + NUL) + (string length + string + NUL)) */
+ p = message_extend_fields(m, 4 + 1 + l + 1, false);
+ if (!p)
+ return -ENOMEM;
+
+ p[0] = (uint8_t) h;
+ p[1] = 1;
+ p[2] = SD_BUS_TYPE_SIGNATURE;
+ p[3] = 0;
+ p[4] = l;
+ memcpy(p + 5, s, l + 1);
+
+ if (ret)
+ *ret = (const char*) p + 5;
+
+ return 0;
+}
+
+static int message_append_field_uint32(sd_bus_message *m, uint64_t h, uint32_t x) {
+ uint8_t *p;
+
+ assert(m);
+
+ /* dbus only allows 8-bit header field ids */
+ if (h > 0xFF)
+ return -EINVAL;
+
+ /* (field id byte + (signature length + signature 'u' + NUL) + value) */
+ p = message_extend_fields(m, 4 + 4, false);
+ if (!p)
+ return -ENOMEM;
+
+ p[0] = (uint8_t) h;
+ p[1] = 1;
+ p[2] = 'u';
+ p[3] = 0;
+
+ ((uint32_t*) p)[1] = x;
+
+ return 0;
+}
+
+static int message_append_reply_cookie(sd_bus_message *m, uint64_t cookie) {
+ assert(m);
+
+ /* 64-bit cookies are not supported */
+ if (cookie > UINT32_MAX)
+ return -EOPNOTSUPP;
+
+ return message_append_field_uint32(m, BUS_MESSAGE_HEADER_REPLY_SERIAL, (uint32_t) cookie);
+}
+
+static int message_from_header(
+ sd_bus *bus,
+ void *buffer,
+ size_t message_size,
+ int *fds,
+ size_t n_fds,
+ const char *label,
+ sd_bus_message **ret) {
+
+ _cleanup_free_ sd_bus_message *m = NULL;
+ struct bus_header *h;
+ size_t a, label_sz = 0; /* avoid false maybe-uninitialized warning */
+
+ assert(bus);
+ assert(buffer || message_size <= 0);
+ assert(fds || n_fds <= 0);
+ assert(ret);
+
+ if (message_size < sizeof(struct bus_header))
+ return -EBADMSG;
+
+ h = buffer;
+ if (!IN_SET(h->version, 1, 2))
+ return -EBADMSG;
+
+ if (h->type == _SD_BUS_MESSAGE_TYPE_INVALID)
+ return -EBADMSG;
+
+ if (!IN_SET(h->endian, BUS_LITTLE_ENDIAN, BUS_BIG_ENDIAN))
+ return -EBADMSG;
+
+ /* Note that we are happy with unknown flags in the flags header! */
+
+ a = ALIGN(sizeof(sd_bus_message));
+
+ if (label) {
+ label_sz = strlen(label);
+ a += label_sz + 1;
+ }
+
+ m = malloc0(a);
+ if (!m)
+ return -ENOMEM;
+
+ m->sealed = true;
+ m->header = buffer;
+
+ if (h->serial == 0)
+ return -EBADMSG;
+
+ m->fields_size = BUS_MESSAGE_BSWAP32(m, h->fields_size);
+ m->body_size = BUS_MESSAGE_BSWAP32(m, h->body_size);
+
+ assert(message_size >= sizeof(struct bus_header));
+ if (ALIGN8(m->fields_size) > message_size - sizeof(struct bus_header) ||
+ m->body_size != message_size - sizeof(struct bus_header) - ALIGN8(m->fields_size))
+ return -EBADMSG;
+
+ m->fds = fds;
+ m->n_fds = n_fds;
+
+ if (label) {
+ m->creds.label = (char*) m + ALIGN(sizeof(sd_bus_message));
+ memcpy(m->creds.label, label, label_sz + 1);
+
+ m->creds.mask |= SD_BUS_CREDS_SELINUX_CONTEXT;
+ }
+
+ m->n_ref = 1;
+ m->bus = sd_bus_ref(bus);
+
+ *ret = TAKE_PTR(m);
+
+ return 0;
+}
+
+int bus_message_from_malloc(
+ sd_bus *bus,
+ void *buffer,
+ size_t length,
+ int *fds,
+ size_t n_fds,
+ const char *label,
+ sd_bus_message **ret) {
+
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ size_t sz;
+ int r;
+
+ r = message_from_header(
+ bus,
+ buffer, length,
+ fds, n_fds,
+ label,
+ &m);
+ if (r < 0)
+ return r;
+
+ sz = length - sizeof(struct bus_header) - ALIGN8(m->fields_size);
+ if (sz > 0) {
+ m->n_body_parts = 1;
+ m->body.data = (uint8_t*) buffer + sizeof(struct bus_header) + ALIGN8(m->fields_size);
+ m->body.size = sz;
+ m->body.sealed = true;
+ m->body.memfd = -EBADF;
+ }
+
+ m->n_iovec = 1;
+ m->iovec = m->iovec_fixed;
+ m->iovec[0] = IOVEC_MAKE(buffer, length);
+
+ r = message_parse_fields(m);
+ if (r < 0)
+ return r;
+
+ /* We take possession of the memory and fds now */
+ m->free_header = true;
+ m->free_fds = true;
+
+ *ret = TAKE_PTR(m);
+ return 0;
+}
+
+_public_ int sd_bus_message_new(
+ sd_bus *bus,
+ sd_bus_message **m,
+ uint8_t type) {
+
+ assert_return(bus, -ENOTCONN);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(bus->state != BUS_UNSET, -ENOTCONN);
+ assert_return(m, -EINVAL);
+ /* Creation of messages with _SD_BUS_MESSAGE_TYPE_INVALID is allowed. */
+ assert_return(type < _SD_BUS_MESSAGE_TYPE_MAX, -EINVAL);
+
+ sd_bus_message *t = malloc0(ALIGN(sizeof(sd_bus_message)) + sizeof(struct bus_header));
+ if (!t)
+ return -ENOMEM;
+
+ t->n_ref = 1;
+ t->bus = sd_bus_ref(bus);
+ t->header = (struct bus_header*) ((uint8_t*) t + ALIGN(sizeof(struct sd_bus_message)));
+ t->header->endian = BUS_NATIVE_ENDIAN;
+ t->header->type = type;
+ t->header->version = bus->message_version;
+ t->allow_fds = bus->can_fds || !IN_SET(bus->state, BUS_HELLO, BUS_RUNNING);
+
+ if (bus->allow_interactive_authorization)
+ t->header->flags |= BUS_MESSAGE_ALLOW_INTERACTIVE_AUTHORIZATION;
+
+ *m = t;
+ return 0;
+}
+
+_public_ int sd_bus_message_new_signal_to(
+ sd_bus *bus,
+ sd_bus_message **m,
+ const char *destination,
+ const char *path,
+ const char *interface,
+ const char *member) {
+
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *t = NULL;
+ int r;
+
+ assert_return(bus, -ENOTCONN);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(bus->state != BUS_UNSET, -ENOTCONN);
+ assert_return(!destination || service_name_is_valid(destination), -EINVAL);
+ assert_return(object_path_is_valid(path), -EINVAL);
+ assert_return(interface_name_is_valid(interface), -EINVAL);
+ assert_return(member_name_is_valid(member), -EINVAL);
+ assert_return(m, -EINVAL);
+
+ r = sd_bus_message_new(bus, &t, SD_BUS_MESSAGE_SIGNAL);
+ if (r < 0)
+ return -ENOMEM;
+
+ assert(t);
+
+ t->header->flags |= BUS_MESSAGE_NO_REPLY_EXPECTED;
+
+ r = message_append_field_string(t, BUS_MESSAGE_HEADER_PATH, SD_BUS_TYPE_OBJECT_PATH, path, &t->path);
+ if (r < 0)
+ return r;
+ r = message_append_field_string(t, BUS_MESSAGE_HEADER_INTERFACE, SD_BUS_TYPE_STRING, interface, &t->interface);
+ if (r < 0)
+ return r;
+ r = message_append_field_string(t, BUS_MESSAGE_HEADER_MEMBER, SD_BUS_TYPE_STRING, member, &t->member);
+ if (r < 0)
+ return r;
+
+ if (destination) {
+ r = message_append_field_string(t, BUS_MESSAGE_HEADER_DESTINATION, SD_BUS_TYPE_STRING, destination, &t->destination);
+ if (r < 0)
+ return r;
+ }
+
+ *m = TAKE_PTR(t);
+ return 0;
+}
+
+_public_ int sd_bus_message_new_signal(
+ sd_bus *bus,
+ sd_bus_message **m,
+ const char *path,
+ const char *interface,
+ const char *member) {
+
+ return sd_bus_message_new_signal_to(bus, m, NULL, path, interface, member);
+}
+
+_public_ int sd_bus_message_new_method_call(
+ sd_bus *bus,
+ sd_bus_message **m,
+ const char *destination,
+ const char *path,
+ const char *interface,
+ const char *member) {
+
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *t = NULL;
+ int r;
+
+ assert_return(bus, -ENOTCONN);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(bus->state != BUS_UNSET, -ENOTCONN);
+ assert_return(!destination || service_name_is_valid(destination), -EINVAL);
+ assert_return(object_path_is_valid(path), -EINVAL);
+ assert_return(!interface || interface_name_is_valid(interface), -EINVAL);
+ assert_return(member_name_is_valid(member), -EINVAL);
+ assert_return(m, -EINVAL);
+
+ r = sd_bus_message_new(bus, &t, SD_BUS_MESSAGE_METHOD_CALL);
+ if (r < 0)
+ return -ENOMEM;
+
+ assert(t);
+
+ r = message_append_field_string(t, BUS_MESSAGE_HEADER_PATH, SD_BUS_TYPE_OBJECT_PATH, path, &t->path);
+ if (r < 0)
+ return r;
+ r = message_append_field_string(t, BUS_MESSAGE_HEADER_MEMBER, SD_BUS_TYPE_STRING, member, &t->member);
+ if (r < 0)
+ return r;
+
+ if (interface) {
+ r = message_append_field_string(t, BUS_MESSAGE_HEADER_INTERFACE, SD_BUS_TYPE_STRING, interface, &t->interface);
+ if (r < 0)
+ return r;
+ }
+
+ if (destination) {
+ r = message_append_field_string(t, BUS_MESSAGE_HEADER_DESTINATION, SD_BUS_TYPE_STRING, destination, &t->destination);
+ if (r < 0)
+ return r;
+ }
+
+ *m = TAKE_PTR(t);
+ return 0;
+}
+
+static int message_new_reply(
+ sd_bus_message *call,
+ uint8_t type,
+ sd_bus_message **m) {
+
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *t = NULL;
+ uint64_t cookie;
+ int r;
+
+ assert_return(call, -EINVAL);
+ assert_return(call->sealed, -EPERM);
+ assert_return(call->header->type == SD_BUS_MESSAGE_METHOD_CALL, -EINVAL);
+ assert_return(call->bus->state != BUS_UNSET, -ENOTCONN);
+ assert_return(m, -EINVAL);
+
+ cookie = BUS_MESSAGE_COOKIE(call);
+ if (cookie == 0)
+ return -EOPNOTSUPP;
+
+ r = sd_bus_message_new(call->bus, &t, type);
+ if (r < 0)
+ return -ENOMEM;
+
+ assert(t);
+
+ t->header->flags |= BUS_MESSAGE_NO_REPLY_EXPECTED;
+ t->reply_cookie = cookie;
+ r = message_append_reply_cookie(t, t->reply_cookie);
+ if (r < 0)
+ return r;
+
+ if (call->sender) {
+ r = message_append_field_string(t, BUS_MESSAGE_HEADER_DESTINATION, SD_BUS_TYPE_STRING, call->sender, &t->destination);
+ if (r < 0)
+ return r;
+ }
+
+ t->dont_send = !!(call->header->flags & BUS_MESSAGE_NO_REPLY_EXPECTED);
+ t->enforced_reply_signature = call->enforced_reply_signature;
+
+ /* let's copy the sensitive flag over. Let's do that as a safety precaution to keep a transaction
+ * wholly sensitive if already the incoming message was sensitive. This is particularly useful when a
+ * vtable record sets the SD_BUS_VTABLE_SENSITIVE flag on a method call, since this means it applies
+ * to both the message call and the reply. */
+ t->sensitive = call->sensitive;
+
+ *m = TAKE_PTR(t);
+ return 0;
+}
+
+_public_ int sd_bus_message_new_method_return(
+ sd_bus_message *call,
+ sd_bus_message **m) {
+
+ return message_new_reply(call, SD_BUS_MESSAGE_METHOD_RETURN, m);
+}
+
+_public_ int sd_bus_message_new_method_error(
+ sd_bus_message *call,
+ sd_bus_message **m,
+ const sd_bus_error *e) {
+
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *t = NULL;
+ int r;
+
+ assert_return(sd_bus_error_is_set(e), -EINVAL);
+ assert_return(m, -EINVAL);
+
+ r = message_new_reply(call, SD_BUS_MESSAGE_METHOD_ERROR, &t);
+ if (r < 0)
+ return r;
+
+ r = message_append_field_string(t, BUS_MESSAGE_HEADER_ERROR_NAME, SD_BUS_TYPE_STRING, e->name, &t->error.name);
+ if (r < 0)
+ return r;
+
+ if (e->message) {
+ r = message_append_basic(t, SD_BUS_TYPE_STRING, e->message, (const void**) &t->error.message);
+ if (r < 0)
+ return r;
+ }
+
+ t->error._need_free = -1;
+
+ *m = TAKE_PTR(t);
+ return 0;
+}
+
+_public_ int sd_bus_message_new_method_errorf(
+ sd_bus_message *call,
+ sd_bus_message **m,
+ const char *name,
+ const char *format,
+ ...) {
+
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ va_list ap;
+
+ assert_return(name, -EINVAL);
+ assert_return(m, -EINVAL);
+
+ va_start(ap, format);
+ sd_bus_error_setfv(&error, name, format, ap);
+ va_end(ap);
+
+ return sd_bus_message_new_method_error(call, m, &error);
+}
+
+_public_ int sd_bus_message_new_method_errno(
+ sd_bus_message *call,
+ sd_bus_message **m,
+ int error,
+ const sd_bus_error *p) {
+
+ _cleanup_(sd_bus_error_free) sd_bus_error berror = SD_BUS_ERROR_NULL;
+
+ if (sd_bus_error_is_set(p))
+ return sd_bus_message_new_method_error(call, m, p);
+
+ sd_bus_error_set_errno(&berror, error);
+
+ return sd_bus_message_new_method_error(call, m, &berror);
+}
+
+_public_ int sd_bus_message_new_method_errnof(
+ sd_bus_message *call,
+ sd_bus_message **m,
+ int error,
+ const char *format,
+ ...) {
+
+ _cleanup_(sd_bus_error_free) sd_bus_error berror = SD_BUS_ERROR_NULL;
+ va_list ap;
+
+ va_start(ap, format);
+ sd_bus_error_set_errnofv(&berror, error, format, ap);
+ va_end(ap);
+
+ return sd_bus_message_new_method_error(call, m, &berror);
+}
+
+void bus_message_set_sender_local(sd_bus *bus, sd_bus_message *m) {
+ assert(bus);
+ assert(m);
+
+ m->sender = m->creds.unique_name = (char*) "org.freedesktop.DBus.Local";
+ m->creds.well_known_names_local = true;
+ m->creds.mask |= (SD_BUS_CREDS_UNIQUE_NAME|SD_BUS_CREDS_WELL_KNOWN_NAMES) & bus->creds_mask;
+}
+
+void bus_message_set_sender_driver(sd_bus *bus, sd_bus_message *m) {
+ assert(bus);
+ assert(m);
+
+ m->sender = m->creds.unique_name = (char*) "org.freedesktop.DBus";
+ m->creds.well_known_names_driver = true;
+ m->creds.mask |= (SD_BUS_CREDS_UNIQUE_NAME|SD_BUS_CREDS_WELL_KNOWN_NAMES) & bus->creds_mask;
+}
+
+int bus_message_new_synthetic_error(
+ sd_bus *bus,
+ uint64_t cookie,
+ const sd_bus_error *e,
+ sd_bus_message **m) {
+
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *t = NULL;
+ int r;
+
+ assert(bus);
+ assert(sd_bus_error_is_set(e));
+ assert(m);
+
+ r = sd_bus_message_new(bus, &t, SD_BUS_MESSAGE_METHOD_ERROR);
+ if (r < 0)
+ return -ENOMEM;
+
+ assert(t);
+
+ t->header->flags |= BUS_MESSAGE_NO_REPLY_EXPECTED;
+ t->reply_cookie = cookie;
+
+ r = message_append_reply_cookie(t, t->reply_cookie);
+ if (r < 0)
+ return r;
+
+ if (bus && bus->unique_name) {
+ r = message_append_field_string(t, BUS_MESSAGE_HEADER_DESTINATION, SD_BUS_TYPE_STRING, bus->unique_name, &t->destination);
+ if (r < 0)
+ return r;
+ }
+
+ r = message_append_field_string(t, BUS_MESSAGE_HEADER_ERROR_NAME, SD_BUS_TYPE_STRING, e->name, &t->error.name);
+ if (r < 0)
+ return r;
+
+ if (e->message) {
+ r = message_append_basic(t, SD_BUS_TYPE_STRING, e->message, (const void**) &t->error.message);
+ if (r < 0)
+ return r;
+ }
+
+ t->error._need_free = -1;
+
+ bus_message_set_sender_driver(bus, t);
+
+ *m = TAKE_PTR(t);
+ return 0;
+}
+
+_public_ sd_bus_message* sd_bus_message_ref(sd_bus_message *m) {
+ if (!m)
+ return NULL;
+
+ /* We are fine if this message so far was either explicitly reffed or not reffed but queued into at
+ * least one bus connection object. */
+ assert(m->n_ref > 0 || m->n_queued > 0);
+
+ m->n_ref++;
+
+ /* Each user reference to a bus message shall also be considered a ref on the bus */
+ sd_bus_ref(m->bus);
+ return m;
+}
+
+_public_ sd_bus_message* sd_bus_message_unref(sd_bus_message *m) {
+ if (!m)
+ return NULL;
+
+ assert(m->n_ref > 0);
+
+ sd_bus_unref(m->bus); /* Each regular ref is also a ref on the bus connection. Let's hence drop it
+ * here. Note we have to do this before decrementing our own n_ref here, since
+ * otherwise, if this message is currently queued sd_bus_unref() might call
+ * bus_message_unref_queued() for this which might then destroy the message
+ * while we are still processing it. */
+ m->n_ref--;
+
+ if (m->n_ref > 0 || m->n_queued > 0)
+ return NULL;
+
+ /* Unset the bus field if neither the user has a reference nor this message is queued. We are careful
+ * to reset the field only after the last reference to the bus is dropped, after all we might keep
+ * multiple references to the bus, once for each reference kept on ourselves. */
+ m->bus = NULL;
+
+ return message_free(m);
+}
+
+sd_bus_message* bus_message_ref_queued(sd_bus_message *m, sd_bus *bus) {
+ if (!m)
+ return NULL;
+
+ /* If this is a different bus than the message is associated with, then implicitly turn this into a
+ * regular reference. This means that you can create a memory leak by enqueuing a message generated
+ * on one bus onto another at the same time as enqueueing a message from the second one on the first,
+ * as we'll not detect the cyclic references there. */
+ if (bus != m->bus)
+ return sd_bus_message_ref(m);
+
+ assert(m->n_ref > 0 || m->n_queued > 0);
+ m->n_queued++;
+
+ return m;
+}
+
+sd_bus_message* bus_message_unref_queued(sd_bus_message *m, sd_bus *bus) {
+ if (!m)
+ return NULL;
+
+ if (bus != m->bus)
+ return sd_bus_message_unref(m);
+
+ assert(m->n_queued > 0);
+ m->n_queued--;
+
+ if (m->n_ref > 0 || m->n_queued > 0)
+ return NULL;
+
+ m->bus = NULL;
+
+ return message_free(m);
+}
+
+_public_ int sd_bus_message_get_type(sd_bus_message *m, uint8_t *type) {
+ assert_return(m, -EINVAL);
+ assert_return(type, -EINVAL);
+
+ *type = m->header->type;
+ return 0;
+}
+
+_public_ int sd_bus_message_get_cookie(sd_bus_message *m, uint64_t *cookie) {
+ uint64_t c;
+
+ assert_return(m, -EINVAL);
+ assert_return(cookie, -EINVAL);
+
+ c = BUS_MESSAGE_COOKIE(m);
+ if (c == 0)
+ return -ENODATA;
+
+ *cookie = BUS_MESSAGE_COOKIE(m);
+ return 0;
+}
+
+_public_ int sd_bus_message_get_reply_cookie(sd_bus_message *m, uint64_t *cookie) {
+ assert_return(m, -EINVAL);
+ assert_return(cookie, -EINVAL);
+
+ if (m->reply_cookie == 0)
+ return -ENODATA;
+
+ *cookie = m->reply_cookie;
+ return 0;
+}
+
+_public_ int sd_bus_message_get_expect_reply(sd_bus_message *m) {
+ assert_return(m, -EINVAL);
+
+ return m->header->type == SD_BUS_MESSAGE_METHOD_CALL &&
+ !(m->header->flags & BUS_MESSAGE_NO_REPLY_EXPECTED);
+}
+
+_public_ int sd_bus_message_get_auto_start(sd_bus_message *m) {
+ assert_return(m, -EINVAL);
+
+ return !(m->header->flags & BUS_MESSAGE_NO_AUTO_START);
+}
+
+_public_ int sd_bus_message_get_allow_interactive_authorization(sd_bus_message *m) {
+ assert_return(m, -EINVAL);
+
+ return m->header->type == SD_BUS_MESSAGE_METHOD_CALL &&
+ (m->header->flags & BUS_MESSAGE_ALLOW_INTERACTIVE_AUTHORIZATION);
+}
+
+_public_ const char *sd_bus_message_get_path(sd_bus_message *m) {
+ assert_return(m, NULL);
+
+ return m->path;
+}
+
+_public_ const char *sd_bus_message_get_interface(sd_bus_message *m) {
+ assert_return(m, NULL);
+
+ return m->interface;
+}
+
+_public_ const char *sd_bus_message_get_member(sd_bus_message *m) {
+ assert_return(m, NULL);
+
+ return m->member;
+}
+
+_public_ const char *sd_bus_message_get_destination(sd_bus_message *m) {
+ assert_return(m, NULL);
+
+ return m->destination;
+}
+
+_public_ const char *sd_bus_message_get_sender(sd_bus_message *m) {
+ assert_return(m, NULL);
+
+ return m->sender;
+}
+
+_public_ const sd_bus_error *sd_bus_message_get_error(sd_bus_message *m) {
+ assert_return(m, NULL);
+
+ if (!sd_bus_error_is_set(&m->error))
+ return NULL;
+
+ return &m->error;
+}
+
+_public_ int sd_bus_message_get_monotonic_usec(sd_bus_message *m, uint64_t *usec) {
+ assert_return(m, -EINVAL);
+ assert_return(usec, -EINVAL);
+
+ if (m->monotonic <= 0)
+ return -ENODATA;
+
+ *usec = m->monotonic;
+ return 0;
+}
+
+_public_ int sd_bus_message_get_realtime_usec(sd_bus_message *m, uint64_t *usec) {
+ assert_return(m, -EINVAL);
+ assert_return(usec, -EINVAL);
+
+ if (m->realtime <= 0)
+ return -ENODATA;
+
+ *usec = m->realtime;
+ return 0;
+}
+
+_public_ int sd_bus_message_get_seqnum(sd_bus_message *m, uint64_t *seqnum) {
+ assert_return(m, -EINVAL);
+ assert_return(seqnum, -EINVAL);
+
+ if (m->seqnum <= 0)
+ return -ENODATA;
+
+ *seqnum = m->seqnum;
+ return 0;
+}
+
+_public_ sd_bus_creds *sd_bus_message_get_creds(sd_bus_message *m) {
+ assert_return(m, NULL);
+
+ if (m->creds.mask == 0)
+ return NULL;
+
+ return &m->creds;
+}
+
+_public_ int sd_bus_message_is_signal(
+ sd_bus_message *m,
+ const char *interface,
+ const char *member) {
+
+ assert_return(m, -EINVAL);
+
+ if (m->header->type != SD_BUS_MESSAGE_SIGNAL)
+ return 0;
+
+ if (interface && !streq_ptr(m->interface, interface))
+ return 0;
+
+ if (member && !streq_ptr(m->member, member))
+ return 0;
+
+ return 1;
+}
+
+_public_ int sd_bus_message_is_method_call(
+ sd_bus_message *m,
+ const char *interface,
+ const char *member) {
+
+ assert_return(m, -EINVAL);
+
+ if (m->header->type != SD_BUS_MESSAGE_METHOD_CALL)
+ return 0;
+
+ if (interface && !streq_ptr(m->interface, interface))
+ return 0;
+
+ if (member && !streq_ptr(m->member, member))
+ return 0;
+
+ return 1;
+}
+
+_public_ int sd_bus_message_is_method_error(sd_bus_message *m, const char *name) {
+ assert_return(m, -EINVAL);
+
+ if (m->header->type != SD_BUS_MESSAGE_METHOD_ERROR)
+ return 0;
+
+ if (name && !streq_ptr(m->error.name, name))
+ return 0;
+
+ return 1;
+}
+
+_public_ int sd_bus_message_set_expect_reply(sd_bus_message *m, int b) {
+ assert_return(m, -EINVAL);
+ assert_return(!m->sealed, -EPERM);
+ assert_return(m->header->type == SD_BUS_MESSAGE_METHOD_CALL, -EPERM);
+
+ SET_FLAG(m->header->flags, BUS_MESSAGE_NO_REPLY_EXPECTED, !b);
+
+ return 0;
+}
+
+_public_ int sd_bus_message_set_auto_start(sd_bus_message *m, int b) {
+ assert_return(m, -EINVAL);
+ assert_return(!m->sealed, -EPERM);
+
+ SET_FLAG(m->header->flags, BUS_MESSAGE_NO_AUTO_START, !b);
+
+ return 0;
+}
+
+_public_ int sd_bus_message_set_allow_interactive_authorization(sd_bus_message *m, int b) {
+ assert_return(m, -EINVAL);
+ assert_return(!m->sealed, -EPERM);
+
+ SET_FLAG(m->header->flags, BUS_MESSAGE_ALLOW_INTERACTIVE_AUTHORIZATION, b);
+
+ return 0;
+}
+
+static struct bus_body_part *message_append_part(sd_bus_message *m) {
+ struct bus_body_part *part;
+
+ assert(m);
+
+ if (m->poisoned)
+ return NULL;
+
+ if (m->n_body_parts <= 0) {
+ part = &m->body;
+ zero(*part);
+ } else {
+ assert(m->body_end);
+
+ part = new0(struct bus_body_part, 1);
+ if (!part) {
+ m->poisoned = true;
+ return NULL;
+ }
+
+ m->body_end->next = part;
+ }
+
+ part->memfd = -EBADF;
+ m->body_end = part;
+ m->n_body_parts++;
+
+ return part;
+}
+
+static void part_zero(struct bus_body_part *part, size_t sz) {
+ assert(part);
+ assert(sz > 0);
+ assert(sz < 8);
+
+ /* All other fields can be left in their defaults */
+ assert(!part->data);
+ assert(part->memfd < 0);
+
+ part->size = sz;
+ part->is_zero = true;
+ part->sealed = true;
+}
+
+static int part_make_space(
+ struct sd_bus_message *m,
+ struct bus_body_part *part,
+ size_t sz,
+ void **q) {
+
+ void *n;
+
+ assert(m);
+ assert(part);
+ assert(!part->sealed);
+
+ if (m->poisoned)
+ return -ENOMEM;
+
+ if (part->allocated == 0 || sz > part->allocated) {
+ size_t new_allocated;
+
+ new_allocated = sz > 0 ? 2 * sz : 64;
+ n = realloc(part->data, new_allocated);
+ if (!n) {
+ m->poisoned = true;
+ return -ENOMEM;
+ }
+
+ part->data = n;
+ part->allocated = new_allocated;
+ part->free_this = true;
+ }
+
+ if (q)
+ *q = part->data ? (uint8_t*) part->data + part->size : NULL;
+
+ part->size = sz;
+ return 0;
+}
+
+static void message_extend_containers(sd_bus_message *m, size_t expand) {
+ assert(m);
+
+ if (expand <= 0)
+ return;
+
+ if (m->n_containers <= 0)
+ return;
+
+ /* Update counters */
+ for (struct bus_container *c = m->containers; c < m->containers + m->n_containers; c++)
+ if (c->array_size)
+ *c->array_size += expand;
+}
+
+static void *message_extend_body(
+ sd_bus_message *m,
+ size_t align,
+ size_t sz) {
+
+ size_t start_body, end_body, padding, added;
+ void *p;
+ int r;
+
+ assert(m);
+ assert(align > 0);
+ assert(!m->sealed);
+
+ if (m->poisoned)
+ return NULL;
+
+ start_body = ALIGN_TO(m->body_size, align);
+ end_body = start_body + sz;
+
+ padding = start_body - m->body_size;
+ added = padding + sz;
+
+ /* Check for 32-bit overflows */
+ if (end_body < start_body || end_body > UINT32_MAX) {
+ m->poisoned = true;
+ return NULL;
+ }
+
+ if (added > 0) {
+ struct bus_body_part *part = NULL;
+ bool add_new_part;
+
+ add_new_part =
+ m->n_body_parts <= 0 ||
+ m->body_end->sealed ||
+ (padding != ALIGN_TO(m->body_end->size, align) - m->body_end->size);
+ /* If this must be an inlined extension, let's create a new part if
+ * the previous part is large enough to be inlined. */
+
+ if (add_new_part) {
+ if (padding > 0) {
+ part = message_append_part(m);
+ if (!part)
+ return NULL;
+
+ part_zero(part, padding);
+ }
+
+ part = message_append_part(m);
+ if (!part)
+ return NULL;
+
+ r = part_make_space(m, part, sz, &p);
+ if (r < 0)
+ return NULL;
+ } else {
+ void *op;
+ size_t os, start_part, end_part;
+
+ part = m->body_end;
+ op = part->data;
+ os = part->size;
+
+ start_part = ALIGN_TO(part->size, align);
+ end_part = start_part + sz;
+
+ r = part_make_space(m, part, end_part, &p);
+ if (r < 0)
+ return NULL;
+
+ if (padding > 0) {
+ memzero(p, padding);
+ p = (uint8_t*) p + padding;
+ }
+
+ /* Readjust pointers */
+ if (m->n_containers > 0)
+ for (struct bus_container *c = m->containers; c < m->containers + m->n_containers; c++)
+ c->array_size = adjust_pointer(c->array_size, op, os, part->data);
+
+ m->error.message = (const char*) adjust_pointer(m->error.message, op, os, part->data);
+ }
+ } else
+ /* Return something that is not NULL and is aligned */
+ p = (uint8_t*) align;
+
+ m->body_size = end_body;
+ message_extend_containers(m, added);
+
+ return p;
+}
+
+static int message_push_fd(sd_bus_message *m, int fd) {
+ int *f, copy;
+
+ assert(m);
+
+ if (fd < 0)
+ return -EINVAL;
+
+ if (!m->allow_fds)
+ return -EOPNOTSUPP;
+
+ copy = fcntl(fd, F_DUPFD_CLOEXEC, 3);
+ if (copy < 0)
+ return -errno;
+
+ f = reallocarray(m->fds, m->n_fds + 1, sizeof(int));
+ if (!f) {
+ m->poisoned = true;
+ safe_close(copy);
+ return -ENOMEM;
+ }
+
+ m->fds = f;
+ m->fds[m->n_fds] = copy;
+ m->free_fds = true;
+
+ return copy;
+}
+
+int message_append_basic(sd_bus_message *m, char type, const void *p, const void **stored) {
+ _cleanup_close_ int fd = -EBADF;
+ struct bus_container *c;
+ ssize_t align, sz;
+ uint32_t u32;
+ void *a;
+
+ assert_return(m, -EINVAL);
+ assert_return(!m->sealed, -EPERM);
+ assert_return(bus_type_is_basic(type), -EINVAL);
+ assert_return(!m->poisoned, -ESTALE);
+
+ c = message_get_last_container(m);
+
+ if (c->signature && c->signature[c->index]) {
+ /* Container signature is already set */
+
+ if (c->signature[c->index] != type)
+ return -ENXIO;
+ } else {
+ char *e;
+
+ /* Maybe we can append to the signature? But only if this is the top-level container */
+ if (c->enclosing != 0)
+ return -ENXIO;
+
+ e = strextend(&c->signature, CHAR_TO_STR(type));
+ if (!e) {
+ m->poisoned = true;
+ return -ENOMEM;
+ }
+ }
+
+ switch (type) {
+
+ case SD_BUS_TYPE_STRING:
+ /* To make things easy we'll serialize a NULL string
+ * into the empty string */
+ p = strempty(p);
+
+ if (!utf8_is_valid(p))
+ return -EINVAL;
+
+ align = 4;
+ sz = 4 + strlen(p) + 1;
+ break;
+
+ case SD_BUS_TYPE_OBJECT_PATH:
+
+ if (!p)
+ return -EINVAL;
+
+ if (!object_path_is_valid(p))
+ return -EINVAL;
+
+ align = 4;
+ sz = 4 + strlen(p) + 1;
+ break;
+
+ case SD_BUS_TYPE_SIGNATURE:
+
+ p = strempty(p);
+
+ if (!signature_is_valid(p, /* allow_dict_entry = */ true))
+ return -EINVAL;
+
+ align = 1;
+ sz = 1 + strlen(p) + 1;
+ break;
+
+ case SD_BUS_TYPE_BOOLEAN:
+
+ u32 = p && *(int*) p;
+ p = &u32;
+
+ align = sz = 4;
+ break;
+
+ case SD_BUS_TYPE_UNIX_FD:
+
+ if (!p)
+ return -EINVAL;
+
+ fd = message_push_fd(m, *(int*) p);
+ if (fd < 0)
+ return fd;
+
+ u32 = m->n_fds;
+ p = &u32;
+
+ align = sz = 4;
+ break;
+
+ default:
+ align = bus_type_get_alignment(type);
+ sz = bus_type_get_size(type);
+ break;
+ }
+
+ assert(align > 0);
+ assert(sz > 0);
+
+ a = message_extend_body(m, align, sz);
+ if (!a)
+ return -ENOMEM;
+
+ if (IN_SET(type, SD_BUS_TYPE_STRING, SD_BUS_TYPE_OBJECT_PATH)) {
+ *(uint32_t*) a = sz - 5;
+ memcpy((uint8_t*) a + 4, p, sz - 4);
+
+ if (stored)
+ *stored = (const uint8_t*) a + 4;
+
+ } else if (type == SD_BUS_TYPE_SIGNATURE) {
+ *(uint8_t*) a = sz - 2;
+ memcpy((uint8_t*) a + 1, p, sz - 1);
+
+ if (stored)
+ *stored = (const uint8_t*) a + 1;
+ } else {
+ memcpy(a, p, sz);
+
+ if (stored)
+ *stored = a;
+ }
+
+ if (type == SD_BUS_TYPE_UNIX_FD)
+ m->n_fds++;
+
+ if (c->enclosing != SD_BUS_TYPE_ARRAY)
+ c->index++;
+
+ fd = -EBADF;
+ return 0;
+}
+
+_public_ int sd_bus_message_append_basic(sd_bus_message *m, char type, const void *p) {
+ return message_append_basic(m, type, p, NULL);
+}
+
+_public_ int sd_bus_message_append_string_space(
+ sd_bus_message *m,
+ size_t size,
+ char **s) {
+
+ struct bus_container *c;
+ void *a;
+
+ assert_return(m, -EINVAL);
+ assert_return(s, -EINVAL);
+ assert_return(!m->sealed, -EPERM);
+ assert_return(!m->poisoned, -ESTALE);
+
+ c = message_get_last_container(m);
+
+ if (c->signature && c->signature[c->index]) {
+ /* Container signature is already set */
+
+ if (c->signature[c->index] != SD_BUS_TYPE_STRING)
+ return -ENXIO;
+ } else {
+ char *e;
+
+ /* Maybe we can append to the signature? But only if this is the top-level container */
+ if (c->enclosing != 0)
+ return -ENXIO;
+
+ e = strextend(&c->signature, CHAR_TO_STR(SD_BUS_TYPE_STRING));
+ if (!e) {
+ m->poisoned = true;
+ return -ENOMEM;
+ }
+ }
+
+ a = message_extend_body(m, 4, 4 + size + 1);
+ if (!a)
+ return -ENOMEM;
+
+ *(uint32_t*) a = size;
+ *s = (char*) a + 4;
+
+ (*s)[size] = 0;
+
+ if (c->enclosing != SD_BUS_TYPE_ARRAY)
+ c->index++;
+
+ return 0;
+}
+
+_public_ int sd_bus_message_append_string_iovec(
+ sd_bus_message *m,
+ const struct iovec *iov,
+ unsigned n /* should be size_t, but is API now… 😞 */) {
+
+ size_t size;
+ unsigned i;
+ char *p;
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(!m->sealed, -EPERM);
+ assert_return(iov || n == 0, -EINVAL);
+ assert_return(!m->poisoned, -ESTALE);
+
+ size = iovec_total_size(iov, n);
+
+ r = sd_bus_message_append_string_space(m, size, &p);
+ if (r < 0)
+ return r;
+
+ for (i = 0; i < n; i++) {
+
+ if (iov[i].iov_base)
+ memcpy(p, iov[i].iov_base, iov[i].iov_len);
+ else
+ memset(p, ' ', iov[i].iov_len);
+
+ p += iov[i].iov_len;
+ }
+
+ return 0;
+}
+
+static int bus_message_open_array(
+ sd_bus_message *m,
+ struct bus_container *c,
+ const char *contents,
+ uint32_t **array_size,
+ size_t *begin) {
+
+ unsigned nindex;
+ int alignment;
+ void *a, *op;
+ size_t os;
+ struct bus_body_part *o;
+
+ assert(m);
+ assert(c);
+ assert(contents);
+ assert(array_size);
+ assert(begin);
+
+ if (!signature_is_single(contents, true))
+ return -EINVAL;
+
+ if (c->signature && c->signature[c->index]) {
+
+ /* Verify the existing signature */
+
+ if (c->signature[c->index] != SD_BUS_TYPE_ARRAY)
+ return -ENXIO;
+
+ if (!startswith(c->signature + c->index + 1, contents))
+ return -ENXIO;
+
+ nindex = c->index + 1 + strlen(contents);
+ } else {
+ char *e;
+
+ if (c->enclosing != 0)
+ return -ENXIO;
+
+ /* Extend the existing signature */
+
+ e = strextend(&c->signature, CHAR_TO_STR(SD_BUS_TYPE_ARRAY), contents);
+ if (!e) {
+ m->poisoned = true;
+ return -ENOMEM;
+ }
+
+ nindex = e - c->signature;
+ }
+
+ alignment = bus_type_get_alignment(contents[0]);
+ if (alignment < 0)
+ return alignment;
+
+ a = message_extend_body(m, 4, 4);
+ if (!a)
+ return -ENOMEM;
+
+ o = m->body_end;
+ op = m->body_end->data;
+ os = m->body_end->size;
+
+ /* Add alignment between size and first element */
+ if (!message_extend_body(m, alignment, 0))
+ return -ENOMEM;
+
+ /* location of array size might have changed so let's readjust a */
+ if (o == m->body_end)
+ a = adjust_pointer(a, op, os, m->body_end->data);
+
+ *(uint32_t*) a = 0;
+ *array_size = a;
+
+ if (c->enclosing != SD_BUS_TYPE_ARRAY)
+ c->index = nindex;
+
+ return 0;
+}
+
+static int bus_message_open_variant(
+ sd_bus_message *m,
+ struct bus_container *c,
+ const char *contents) {
+
+ size_t l;
+ void *a;
+
+ assert(m);
+ assert(c);
+ assert(contents);
+
+ if (!signature_is_single(contents, false))
+ return -EINVAL;
+
+ if (*contents == SD_BUS_TYPE_DICT_ENTRY_BEGIN)
+ return -EINVAL;
+
+ if (c->signature && c->signature[c->index]) {
+
+ if (c->signature[c->index] != SD_BUS_TYPE_VARIANT)
+ return -ENXIO;
+
+ } else {
+ char *e;
+
+ if (c->enclosing != 0)
+ return -ENXIO;
+
+ e = strextend(&c->signature, CHAR_TO_STR(SD_BUS_TYPE_VARIANT));
+ if (!e) {
+ m->poisoned = true;
+ return -ENOMEM;
+ }
+ }
+
+ l = strlen(contents);
+ a = message_extend_body(m, 1, 1 + l + 1);
+ if (!a)
+ return -ENOMEM;
+
+ *(uint8_t*) a = l;
+ memcpy((uint8_t*) a + 1, contents, l + 1);
+
+ if (c->enclosing != SD_BUS_TYPE_ARRAY)
+ c->index++;
+
+ return 0;
+}
+
+static int bus_message_open_struct(
+ sd_bus_message *m,
+ struct bus_container *c,
+ const char *contents,
+ size_t *begin) {
+
+ size_t nindex;
+
+ assert(m);
+ assert(c);
+ assert(contents);
+ assert(begin);
+
+ if (!signature_is_valid(contents, false))
+ return -EINVAL;
+
+ if (c->signature && c->signature[c->index]) {
+ size_t l;
+
+ l = strlen(contents);
+
+ if (c->signature[c->index] != SD_BUS_TYPE_STRUCT_BEGIN ||
+ !startswith(c->signature + c->index + 1, contents) ||
+ c->signature[c->index + 1 + l] != SD_BUS_TYPE_STRUCT_END)
+ return -ENXIO;
+
+ nindex = c->index + 1 + l + 1;
+ } else {
+ char *e;
+
+ if (c->enclosing != 0)
+ return -ENXIO;
+
+ e = strextend(&c->signature, CHAR_TO_STR(SD_BUS_TYPE_STRUCT_BEGIN), contents, CHAR_TO_STR(SD_BUS_TYPE_STRUCT_END));
+ if (!e) {
+ m->poisoned = true;
+ return -ENOMEM;
+ }
+
+ nindex = e - c->signature;
+ }
+
+ /* Align contents to 8 byte boundary */
+ if (!message_extend_body(m, 8, 0))
+ return -ENOMEM;
+
+ if (c->enclosing != SD_BUS_TYPE_ARRAY)
+ c->index = nindex;
+
+ return 0;
+}
+
+static int bus_message_open_dict_entry(
+ sd_bus_message *m,
+ struct bus_container *c,
+ const char *contents,
+ size_t *begin) {
+
+ assert(m);
+ assert(c);
+ assert(contents);
+ assert(begin);
+
+ if (!signature_is_pair(contents))
+ return -EINVAL;
+
+ if (c->enclosing != SD_BUS_TYPE_ARRAY)
+ return -ENXIO;
+
+ if (c->signature && c->signature[c->index]) {
+ size_t l;
+
+ l = strlen(contents);
+
+ if (c->signature[c->index] != SD_BUS_TYPE_DICT_ENTRY_BEGIN ||
+ !startswith(c->signature + c->index + 1, contents) ||
+ c->signature[c->index + 1 + l] != SD_BUS_TYPE_DICT_ENTRY_END)
+ return -ENXIO;
+ } else
+ return -ENXIO;
+
+ /* Align contents to 8 byte boundary */
+ if (!message_extend_body(m, 8, 0))
+ return -ENOMEM;
+
+ return 0;
+}
+
+_public_ int sd_bus_message_open_container(
+ sd_bus_message *m,
+ char type,
+ const char *contents) {
+
+ struct bus_container *c;
+ uint32_t *array_size = NULL;
+ _cleanup_free_ char *signature = NULL;
+ size_t before, begin = 0;
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(!m->sealed, -EPERM);
+ assert_return(contents, -EINVAL);
+ assert_return(!m->poisoned, -ESTALE);
+
+ /* Make sure we have space for one more container */
+ if (!GREEDY_REALLOC(m->containers, m->n_containers + 1)) {
+ m->poisoned = true;
+ return -ENOMEM;
+ }
+
+ c = message_get_last_container(m);
+
+ signature = strdup(contents);
+ if (!signature) {
+ m->poisoned = true;
+ return -ENOMEM;
+ }
+
+ /* Save old index in the parent container, in case we have to
+ * abort this container */
+ c->saved_index = c->index;
+ before = m->body_size;
+
+ if (type == SD_BUS_TYPE_ARRAY)
+ r = bus_message_open_array(m, c, contents, &array_size, &begin);
+ else if (type == SD_BUS_TYPE_VARIANT)
+ r = bus_message_open_variant(m, c, contents);
+ else if (type == SD_BUS_TYPE_STRUCT)
+ r = bus_message_open_struct(m, c, contents, &begin);
+ else if (type == SD_BUS_TYPE_DICT_ENTRY)
+ r = bus_message_open_dict_entry(m, c, contents, &begin);
+ else
+ r = -EINVAL;
+ if (r < 0)
+ return r;
+
+ /* OK, let's fill it in */
+ m->containers[m->n_containers++] = (struct bus_container) {
+ .enclosing = type,
+ .signature = TAKE_PTR(signature),
+ .array_size = array_size,
+ .before = before,
+ .begin = begin,
+ };
+
+ return 0;
+}
+
+_public_ int sd_bus_message_close_container(sd_bus_message *m) {
+ struct bus_container *c;
+
+ assert_return(m, -EINVAL);
+ assert_return(!m->sealed, -EPERM);
+ assert_return(m->n_containers > 0, -EINVAL);
+ assert_return(!m->poisoned, -ESTALE);
+
+ c = message_get_last_container(m);
+
+ if (c->enclosing != SD_BUS_TYPE_ARRAY)
+ if (c->signature && c->signature[c->index] != 0)
+ return -EINVAL;
+
+ m->n_containers--;
+
+ free(c->signature);
+
+ return 0;
+}
+
+typedef struct {
+ const char *types;
+ unsigned n_struct;
+ unsigned n_array;
+} TypeStack;
+
+static int type_stack_push(TypeStack *stack, unsigned max, unsigned *i, const char *types, unsigned n_struct, unsigned n_array) {
+ assert(stack);
+ assert(max > 0);
+
+ if (*i >= max)
+ return -EINVAL;
+
+ stack[*i].types = types;
+ stack[*i].n_struct = n_struct;
+ stack[*i].n_array = n_array;
+ (*i)++;
+
+ return 0;
+}
+
+static int type_stack_pop(TypeStack *stack, unsigned max, unsigned *i, const char **types, unsigned *n_struct, unsigned *n_array) {
+ assert(stack);
+ assert(max > 0);
+ assert(types);
+ assert(n_struct);
+ assert(n_array);
+
+ if (*i <= 0)
+ return 0;
+
+ (*i)--;
+ *types = stack[*i].types;
+ *n_struct = stack[*i].n_struct;
+ *n_array = stack[*i].n_array;
+
+ return 1;
+}
+
+_public_ int sd_bus_message_appendv(
+ sd_bus_message *m,
+ const char *types,
+ va_list ap) {
+
+ unsigned n_array, n_struct;
+ TypeStack stack[BUS_CONTAINER_DEPTH];
+ unsigned stack_ptr = 0;
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(types, -EINVAL);
+ assert_return(!m->sealed, -EPERM);
+ assert_return(!m->poisoned, -ESTALE);
+
+ n_array = UINT_MAX;
+ n_struct = strlen(types);
+
+ for (;;) {
+ const char *t;
+
+ if (n_array == 0 || (n_array == UINT_MAX && n_struct == 0)) {
+ r = type_stack_pop(stack, ELEMENTSOF(stack), &stack_ptr, &types, &n_struct, &n_array);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return r;
+
+ continue;
+ }
+
+ t = types;
+ if (n_array != UINT_MAX)
+ n_array--;
+ else {
+ types++;
+ n_struct--;
+ }
+
+ switch (*t) {
+
+ case SD_BUS_TYPE_BYTE: {
+ uint8_t x;
+
+ x = (uint8_t) va_arg(ap, int);
+ r = sd_bus_message_append_basic(m, *t, &x);
+ break;
+ }
+
+ case SD_BUS_TYPE_BOOLEAN:
+ case SD_BUS_TYPE_INT32:
+ case SD_BUS_TYPE_UINT32:
+ case SD_BUS_TYPE_UNIX_FD: {
+ uint32_t x;
+
+ /* We assume a boolean is the same as int32_t */
+ assert_cc(sizeof(int32_t) == sizeof(int));
+
+ x = va_arg(ap, uint32_t);
+ r = sd_bus_message_append_basic(m, *t, &x);
+ break;
+ }
+
+ case SD_BUS_TYPE_INT16:
+ case SD_BUS_TYPE_UINT16: {
+ uint16_t x;
+
+ x = (uint16_t) va_arg(ap, int);
+ r = sd_bus_message_append_basic(m, *t, &x);
+ break;
+ }
+
+ case SD_BUS_TYPE_INT64:
+ case SD_BUS_TYPE_UINT64: {
+ uint64_t x;
+
+ x = va_arg(ap, uint64_t);
+ r = sd_bus_message_append_basic(m, *t, &x);
+ break;
+ }
+
+ case SD_BUS_TYPE_DOUBLE: {
+ double x;
+
+ x = va_arg(ap, double);
+ r = sd_bus_message_append_basic(m, *t, &x);
+ break;
+ }
+
+ case SD_BUS_TYPE_STRING:
+ case SD_BUS_TYPE_OBJECT_PATH:
+ case SD_BUS_TYPE_SIGNATURE: {
+ const char *x;
+
+ x = va_arg(ap, const char*);
+ r = sd_bus_message_append_basic(m, *t, x);
+ break;
+ }
+
+ case SD_BUS_TYPE_ARRAY: {
+ size_t k;
+
+ r = signature_element_length(t + 1, &k);
+ if (r < 0)
+ return r;
+
+ {
+ char s[k + 1];
+ memcpy(s, t + 1, k);
+ s[k] = 0;
+
+ r = sd_bus_message_open_container(m, SD_BUS_TYPE_ARRAY, s);
+ if (r < 0)
+ return r;
+ }
+
+ if (n_array == UINT_MAX) {
+ types += k;
+ n_struct -= k;
+ }
+
+ r = type_stack_push(stack, ELEMENTSOF(stack), &stack_ptr, types, n_struct, n_array);
+ if (r < 0)
+ return r;
+
+ types = t + 1;
+ n_struct = k;
+ n_array = va_arg(ap, unsigned);
+
+ break;
+ }
+
+ case SD_BUS_TYPE_VARIANT: {
+ const char *s;
+
+ s = va_arg(ap, const char*);
+ if (!s)
+ return -EINVAL;
+
+ r = sd_bus_message_open_container(m, SD_BUS_TYPE_VARIANT, s);
+ if (r < 0)
+ return r;
+
+ r = type_stack_push(stack, ELEMENTSOF(stack), &stack_ptr, types, n_struct, n_array);
+ if (r < 0)
+ return r;
+
+ types = s;
+ n_struct = strlen(s);
+ n_array = UINT_MAX;
+
+ break;
+ }
+
+ case SD_BUS_TYPE_STRUCT_BEGIN:
+ case SD_BUS_TYPE_DICT_ENTRY_BEGIN: {
+ size_t k;
+
+ r = signature_element_length(t, &k);
+ if (r < 0)
+ return r;
+ if (k < 2)
+ return -ERANGE;
+
+ {
+ char s[k - 1];
+
+ memcpy(s, t + 1, k - 2);
+ s[k - 2] = 0;
+
+ r = sd_bus_message_open_container(m, *t == SD_BUS_TYPE_STRUCT_BEGIN ? SD_BUS_TYPE_STRUCT : SD_BUS_TYPE_DICT_ENTRY, s);
+ if (r < 0)
+ return r;
+ }
+
+ if (n_array == UINT_MAX) {
+ types += k - 1;
+ n_struct -= k - 1;
+ }
+
+ r = type_stack_push(stack, ELEMENTSOF(stack), &stack_ptr, types, n_struct, n_array);
+ if (r < 0)
+ return r;
+
+ types = t + 1;
+ n_struct = k - 2;
+ n_array = UINT_MAX;
+
+ break;
+ }
+
+ default:
+ r = -EINVAL;
+ }
+
+ if (r < 0)
+ return r;
+ }
+
+ return 1;
+}
+
+_public_ int sd_bus_message_append(sd_bus_message *m, const char *types, ...) {
+ va_list ap;
+ int r;
+
+ va_start(ap, types);
+ r = sd_bus_message_appendv(m, types, ap);
+ va_end(ap);
+
+ return r;
+}
+
+_public_ int sd_bus_message_append_array_space(
+ sd_bus_message *m,
+ char type,
+ size_t size,
+ void **ptr) {
+
+ ssize_t align, sz;
+ void *a;
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(!m->sealed, -EPERM);
+ assert_return(bus_type_is_trivial(type) && type != SD_BUS_TYPE_BOOLEAN, -EINVAL);
+ assert_return(ptr || size == 0, -EINVAL);
+ assert_return(!m->poisoned, -ESTALE);
+
+ align = bus_type_get_alignment(type);
+ sz = bus_type_get_size(type);
+
+ assert_se(align > 0);
+ assert_se(sz > 0);
+
+ if (size % sz != 0)
+ return -EINVAL;
+
+ r = sd_bus_message_open_container(m, SD_BUS_TYPE_ARRAY, CHAR_TO_STR(type));
+ if (r < 0)
+ return r;
+
+ a = message_extend_body(m, align, size);
+ if (!a)
+ return -ENOMEM;
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return r;
+
+ *ptr = a;
+ return 0;
+}
+
+_public_ int sd_bus_message_append_array(
+ sd_bus_message *m,
+ char type,
+ const void *ptr,
+ size_t size) {
+ int r;
+ void *p;
+
+ assert_return(m, -EINVAL);
+ assert_return(!m->sealed, -EPERM);
+ assert_return(bus_type_is_trivial(type), -EINVAL);
+ assert_return(ptr || size == 0, -EINVAL);
+ assert_return(!m->poisoned, -ESTALE);
+
+ r = sd_bus_message_append_array_space(m, type, size, &p);
+ if (r < 0)
+ return r;
+
+ memcpy_safe(p, ptr, size);
+
+ return 0;
+}
+
+_public_ int sd_bus_message_append_array_iovec(
+ sd_bus_message *m,
+ char type,
+ const struct iovec *iov,
+ unsigned n /* should be size_t, but is API now… 😞 */) {
+
+ size_t size;
+ unsigned i;
+ void *p;
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(!m->sealed, -EPERM);
+ assert_return(bus_type_is_trivial(type), -EINVAL);
+ assert_return(iov || n == 0, -EINVAL);
+ assert_return(!m->poisoned, -ESTALE);
+
+ size = iovec_total_size(iov, n);
+
+ r = sd_bus_message_append_array_space(m, type, size, &p);
+ if (r < 0)
+ return r;
+
+ for (i = 0; i < n; i++) {
+
+ if (iov[i].iov_base)
+ memcpy(p, iov[i].iov_base, iov[i].iov_len);
+ else
+ memzero(p, iov[i].iov_len);
+
+ p = (uint8_t*) p + iov[i].iov_len;
+ }
+
+ return 0;
+}
+
+_public_ int sd_bus_message_append_array_memfd(
+ sd_bus_message *m,
+ char type,
+ int memfd,
+ uint64_t offset,
+ uint64_t size) {
+
+ _cleanup_close_ int copy_fd = -EBADF;
+ struct bus_body_part *part;
+ ssize_t align, sz;
+ uint64_t real_size;
+ void *a;
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(memfd >= 0, -EBADF);
+ assert_return(bus_type_is_trivial(type), -EINVAL);
+ assert_return(size > 0, -EINVAL);
+ assert_return(!m->sealed, -EPERM);
+ assert_return(!m->poisoned, -ESTALE);
+
+ r = memfd_set_sealed(memfd);
+ if (r < 0)
+ return r;
+
+ copy_fd = fcntl(memfd, F_DUPFD_CLOEXEC, 3);
+ if (copy_fd < 0)
+ return copy_fd;
+
+ r = memfd_get_size(memfd, &real_size);
+ if (r < 0)
+ return r;
+
+ if (offset == 0 && size == UINT64_MAX)
+ size = real_size;
+ else if (offset + size > real_size)
+ return -EMSGSIZE;
+
+ align = bus_type_get_alignment(type);
+ sz = bus_type_get_size(type);
+
+ assert_se(align > 0);
+ assert_se(sz > 0);
+
+ if (offset % align != 0)
+ return -EINVAL;
+
+ if (size % sz != 0)
+ return -EINVAL;
+
+ if (size > (uint64_t) UINT32_MAX)
+ return -EINVAL;
+
+ r = sd_bus_message_open_container(m, SD_BUS_TYPE_ARRAY, CHAR_TO_STR(type));
+ if (r < 0)
+ return r;
+
+ a = message_extend_body(m, align, 0);
+ if (!a)
+ return -ENOMEM;
+
+ part = message_append_part(m);
+ if (!part)
+ return -ENOMEM;
+
+ part->memfd = copy_fd;
+ part->memfd_offset = offset;
+ part->sealed = true;
+ part->size = size;
+ copy_fd = -EBADF;
+
+ m->body_size += size;
+ message_extend_containers(m, size);
+
+ return sd_bus_message_close_container(m);
+}
+
+_public_ int sd_bus_message_append_string_memfd(
+ sd_bus_message *m,
+ int memfd,
+ uint64_t offset,
+ uint64_t size) {
+
+ _cleanup_close_ int copy_fd = -EBADF;
+ struct bus_body_part *part;
+ struct bus_container *c;
+ uint64_t real_size;
+ void *a;
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(memfd >= 0, -EBADF);
+ assert_return(size > 0, -EINVAL);
+ assert_return(!m->sealed, -EPERM);
+ assert_return(!m->poisoned, -ESTALE);
+
+ r = memfd_set_sealed(memfd);
+ if (r < 0)
+ return r;
+
+ copy_fd = fcntl(memfd, FD_CLOEXEC, 3);
+ if (copy_fd < 0)
+ return copy_fd;
+
+ r = memfd_get_size(memfd, &real_size);
+ if (r < 0)
+ return r;
+
+ if (offset == 0 && size == UINT64_MAX)
+ size = real_size;
+ else if (offset + size > real_size)
+ return -EMSGSIZE;
+
+ /* We require this to be NUL terminated */
+ if (size == 0)
+ return -EINVAL;
+
+ if (size > (uint64_t) UINT32_MAX)
+ return -EINVAL;
+
+ c = message_get_last_container(m);
+ if (c->signature && c->signature[c->index]) {
+ /* Container signature is already set */
+
+ if (c->signature[c->index] != SD_BUS_TYPE_STRING)
+ return -ENXIO;
+ } else {
+ char *e;
+
+ /* Maybe we can append to the signature? But only if this is the top-level container */
+ if (c->enclosing != 0)
+ return -ENXIO;
+
+ e = strextend(&c->signature, CHAR_TO_STR(SD_BUS_TYPE_STRING));
+ if (!e) {
+ m->poisoned = true;
+ return -ENOMEM;
+ }
+ }
+
+ a = message_extend_body(m, 4, 4);
+ if (!a)
+ return -ENOMEM;
+
+ *(uint32_t*) a = size - 1;
+
+ part = message_append_part(m);
+ if (!part)
+ return -ENOMEM;
+
+ part->memfd = copy_fd;
+ part->memfd_offset = offset;
+ part->sealed = true;
+ part->size = size;
+ copy_fd = -EBADF;
+
+ m->body_size += size;
+ message_extend_containers(m, size);
+
+ if (c->enclosing != SD_BUS_TYPE_ARRAY)
+ c->index++;
+
+ return 0;
+}
+
+_public_ int sd_bus_message_append_strv(sd_bus_message *m, char **l) {
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(!m->sealed, -EPERM);
+ assert_return(!m->poisoned, -ESTALE);
+
+ r = sd_bus_message_open_container(m, 'a', "s");
+ if (r < 0)
+ return r;
+
+ STRV_FOREACH(i, l) {
+ r = sd_bus_message_append_basic(m, 's', *i);
+ if (r < 0)
+ return r;
+ }
+
+ return sd_bus_message_close_container(m);
+}
+
+static int bus_message_close_header(sd_bus_message *m) {
+ assert(m);
+
+ /* The actual user data is finished now, we just complete the variant and struct now. Remember
+ * this position, so that during parsing we know where to put the outer container end. */
+ m->user_body_size = m->body_size;
+
+ m->header->fields_size = m->fields_size;
+ m->header->body_size = m->body_size;
+
+ return 0;
+}
+
+_public_ int sd_bus_message_seal(sd_bus_message *m, uint64_t cookie, uint64_t timeout_usec) {
+ struct bus_body_part *part;
+ size_t a;
+ unsigned i;
+ int r;
+
+ assert_return(m, -EINVAL);
+
+ if (m->sealed)
+ return -EPERM;
+
+ if (m->n_containers > 0)
+ return -EBADMSG;
+
+ if (m->poisoned)
+ return -ESTALE;
+
+ if (cookie > UINT32_MAX)
+ return -EOPNOTSUPP;
+
+ /* In vtables the return signature of method calls is listed,
+ * let's check if they match if this is a response */
+ if (m->header->type == SD_BUS_MESSAGE_METHOD_RETURN &&
+ m->enforced_reply_signature &&
+ !streq(strempty(m->root_container.signature), m->enforced_reply_signature))
+ return -ENOMSG;
+
+ /* If there's a non-trivial signature set, then add it in here */
+ if (!isempty(m->root_container.signature)) {
+ r = message_append_field_signature(m, BUS_MESSAGE_HEADER_SIGNATURE, m->root_container.signature, NULL);
+ if (r < 0)
+ return r;
+ }
+
+ if (m->n_fds > 0) {
+ r = message_append_field_uint32(m, BUS_MESSAGE_HEADER_UNIX_FDS, m->n_fds);
+ if (r < 0)
+ return r;
+ }
+
+ r = bus_message_close_header(m);
+ if (r < 0)
+ return r;
+
+ m->header->serial = (uint32_t) cookie;
+
+ m->timeout = m->header->flags & BUS_MESSAGE_NO_REPLY_EXPECTED ? 0 : timeout_usec;
+
+ /* Add padding at the end of the fields part, since we know
+ * the body needs to start at an 8 byte alignment. We made
+ * sure we allocated enough space for this, so all we need to
+ * do here is to zero it out. */
+ a = ALIGN8(m->fields_size) - m->fields_size;
+ if (a > 0)
+ memzero((uint8_t*) BUS_MESSAGE_FIELDS(m) + m->fields_size, a);
+
+ /* If this is something we can send as memfd, then let's seal
+ the memfd now. Note that we can send memfds as payload only
+ for directed messages, and not for broadcasts. */
+ if (m->destination && m->bus->use_memfd) {
+ MESSAGE_FOREACH_PART(part, i, m)
+ if (part->memfd >= 0 &&
+ !part->sealed &&
+ (part->size > MEMFD_MIN_SIZE || m->bus->use_memfd < 0) &&
+ part != m->body_end) { /* The last part may never be sent as memfd */
+ uint64_t sz;
+
+ /* Try to seal it if that makes
+ * sense. First, unmap our own map to
+ * make sure we don't keep it busy. */
+ bus_body_part_unmap(part);
+
+ /* Then, sync up real memfd size */
+ sz = part->size;
+ r = memfd_set_size(part->memfd, sz);
+ if (r < 0)
+ return r;
+
+ /* Finally, try to seal */
+ if (memfd_set_sealed(part->memfd) >= 0)
+ part->sealed = true;
+ }
+ }
+
+ m->root_container.end = m->user_body_size;
+ m->root_container.index = 0;
+
+ m->sealed = true;
+
+ return 0;
+}
+
+int bus_body_part_map(struct bus_body_part *part) {
+ void *p;
+ size_t psz, shift;
+
+ assert_se(part);
+
+ if (part->data)
+ return 0;
+
+ if (part->size <= 0)
+ return 0;
+
+ /* For smaller zero parts (as used for padding) we don't need to map anything... */
+ if (part->memfd < 0 && part->is_zero && part->size < 8) {
+ static const uint8_t zeroes[7] = { };
+ part->data = (void*) zeroes;
+ return 0;
+ }
+
+ shift = PAGE_OFFSET(part->memfd_offset);
+ psz = PAGE_ALIGN(part->size + shift);
+ if (psz >= SIZE_MAX)
+ return -EFBIG;
+
+ if (part->memfd >= 0)
+ p = mmap(NULL, psz, PROT_READ, MAP_PRIVATE, part->memfd, part->memfd_offset - shift);
+ else if (part->is_zero)
+ p = mmap(NULL, psz, PROT_READ, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
+ else
+ return -EINVAL;
+
+ if (p == MAP_FAILED)
+ return -errno;
+
+ part->mapped = psz;
+ part->mmap_begin = p;
+ part->data = (uint8_t*) p + shift;
+ part->munmap_this = true;
+
+ return 0;
+}
+
+void bus_body_part_unmap(struct bus_body_part *part) {
+
+ assert_se(part);
+
+ if (part->memfd < 0)
+ return;
+
+ if (!part->mmap_begin)
+ return;
+
+ if (!part->munmap_this)
+ return;
+
+ assert_se(munmap(part->mmap_begin, part->mapped) == 0);
+
+ part->mmap_begin = NULL;
+ part->data = NULL;
+ part->mapped = 0;
+ part->munmap_this = false;
+
+ return;
+}
+
+static bool message_end_of_signature(sd_bus_message *m) {
+ struct bus_container *c;
+
+ assert(m);
+
+ c = message_get_last_container(m);
+ return !c->signature || c->signature[c->index] == 0;
+}
+
+static bool message_end_of_array(sd_bus_message *m, size_t index) {
+ struct bus_container *c;
+
+ assert(m);
+
+ c = message_get_last_container(m);
+ if (c->enclosing != SD_BUS_TYPE_ARRAY)
+ return false;
+
+ assert(c->array_size);
+ return index >= c->begin + BUS_MESSAGE_BSWAP32(m, *c->array_size);
+}
+
+_public_ int sd_bus_message_at_end(sd_bus_message *m, int complete) {
+ assert_return(m, -EINVAL);
+ assert_return(m->sealed, -EPERM);
+
+ if (complete && m->n_containers > 0)
+ return false;
+
+ if (message_end_of_signature(m))
+ return true;
+
+ if (message_end_of_array(m, m->rindex))
+ return true;
+
+ return false;
+}
+
+static struct bus_body_part* find_part(sd_bus_message *m, size_t index, size_t sz, void **p) {
+ struct bus_body_part *part;
+ size_t begin;
+ int r;
+
+ assert(m);
+
+ if (m->cached_rindex_part && index >= m->cached_rindex_part_begin) {
+ part = m->cached_rindex_part;
+ begin = m->cached_rindex_part_begin;
+ } else {
+ part = &m->body;
+ begin = 0;
+ }
+
+ while (part) {
+ if (index < begin)
+ return NULL;
+
+ if (index + sz <= begin + part->size) {
+
+ r = bus_body_part_map(part);
+ if (r < 0)
+ return NULL;
+
+ if (p)
+ *p = part->data ? (uint8_t*) part->data + index - begin
+ : NULL; /* Avoid dereferencing a NULL pointer. */
+
+ m->cached_rindex_part = part;
+ m->cached_rindex_part_begin = begin;
+
+ return part;
+ }
+
+ begin += part->size;
+ part = part->next;
+ }
+
+ return NULL;
+}
+
+static int message_peek_body(
+ sd_bus_message *m,
+ size_t *rindex,
+ size_t align,
+ size_t nbytes,
+ void **ret) {
+
+ size_t k, start, end, padding;
+ struct bus_body_part *part;
+ uint8_t *q;
+
+ assert(m);
+ assert(rindex);
+ assert(align > 0);
+
+ start = ALIGN_TO(*rindex, align);
+ if (start > m->user_body_size)
+ return -EBADMSG;
+
+ padding = start - *rindex;
+
+ /* Avoid overflow below */
+ if (nbytes > SIZE_MAX - start)
+ return -EBADMSG;
+
+ end = start + nbytes;
+ if (end > m->user_body_size)
+ return -EBADMSG;
+
+ part = find_part(m, *rindex, padding, (void**) &q);
+ if (!part)
+ return -EBADMSG;
+
+ if (q) {
+ /* Verify padding */
+ for (k = 0; k < padding; k++)
+ if (q[k] != 0)
+ return -EBADMSG;
+ }
+
+ part = find_part(m, start, nbytes, (void**) &q);
+ if (!part || (nbytes > 0 && !q))
+ return -EBADMSG;
+
+ *rindex = end;
+
+ if (ret)
+ *ret = q;
+
+ return 0;
+}
+
+static bool validate_nul(const char *s, size_t l) {
+
+ /* Check for NUL chars in the string */
+ if (memchr(s, 0, l))
+ return false;
+
+ /* Check for NUL termination */
+ if (s[l] != 0)
+ return false;
+
+ return true;
+}
+
+static bool validate_string(const char *s, size_t l) {
+
+ if (!validate_nul(s, l))
+ return false;
+
+ /* Check if valid UTF8 */
+ if (!utf8_is_valid(s))
+ return false;
+
+ return true;
+}
+
+static bool validate_signature(const char *s, size_t l) {
+
+ if (!validate_nul(s, l))
+ return false;
+
+ /* Check if valid signature */
+ if (!signature_is_valid(s, true))
+ return false;
+
+ return true;
+}
+
+static bool validate_object_path(const char *s, size_t l) {
+
+ if (!validate_nul(s, l))
+ return false;
+
+ if (!object_path_is_valid(s))
+ return false;
+
+ return true;
+}
+
+_public_ int sd_bus_message_read_basic(sd_bus_message *m, char type, void *p) {
+ struct bus_container *c;
+ size_t rindex;
+ void *q;
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->sealed, -EPERM);
+ assert_return(bus_type_is_basic(type), -EINVAL);
+
+ if (message_end_of_signature(m))
+ return -ENXIO;
+
+ if (message_end_of_array(m, m->rindex))
+ return 0;
+
+ c = message_get_last_container(m);
+ if (c->signature[c->index] != type)
+ return -ENXIO;
+
+ rindex = m->rindex;
+
+ if (IN_SET(type, SD_BUS_TYPE_STRING, SD_BUS_TYPE_OBJECT_PATH)) {
+ uint32_t l;
+ bool ok;
+
+ r = message_peek_body(m, &rindex, 4, 4, &q);
+ if (r < 0)
+ return r;
+
+ l = BUS_MESSAGE_BSWAP32(m, *(uint32_t*) q);
+ if (l == UINT32_MAX)
+ /* avoid overflow right below */
+ return -EBADMSG;
+
+ r = message_peek_body(m, &rindex, 1, l+1, &q);
+ if (r < 0)
+ return r;
+
+ if (type == SD_BUS_TYPE_OBJECT_PATH)
+ ok = validate_object_path(q, l);
+ else
+ ok = validate_string(q, l);
+ if (!ok)
+ return -EBADMSG;
+
+ if (p)
+ *(const char**) p = q;
+
+ } else if (type == SD_BUS_TYPE_SIGNATURE) {
+ uint8_t l;
+
+ r = message_peek_body(m, &rindex, 1, 1, &q);
+ if (r < 0)
+ return r;
+
+ l = *(uint8_t*) q;
+ if (l == UINT8_MAX)
+ /* avoid overflow right below */
+ return -EBADMSG;
+
+ r = message_peek_body(m, &rindex, 1, l+1, &q);
+ if (r < 0)
+ return r;
+
+ if (!validate_signature(q, l))
+ return -EBADMSG;
+
+ if (p)
+ *(const char**) p = q;
+
+ } else {
+ ssize_t sz, align;
+
+ align = bus_type_get_alignment(type);
+ assert(align > 0);
+
+ sz = bus_type_get_size(type);
+ assert(sz > 0);
+
+ r = message_peek_body(m, &rindex, align, sz, &q);
+ if (r < 0)
+ return r;
+
+ switch (type) {
+
+ case SD_BUS_TYPE_BYTE:
+ if (p)
+ *(uint8_t*) p = *(uint8_t*) q;
+ break;
+
+ case SD_BUS_TYPE_BOOLEAN:
+ if (p)
+ *(int*) p = !!*(uint32_t*) q;
+ break;
+
+ case SD_BUS_TYPE_INT16:
+ case SD_BUS_TYPE_UINT16:
+ if (p)
+ *(uint16_t*) p = BUS_MESSAGE_BSWAP16(m, *(uint16_t*) q);
+ break;
+
+ case SD_BUS_TYPE_INT32:
+ case SD_BUS_TYPE_UINT32:
+ if (p)
+ *(uint32_t*) p = BUS_MESSAGE_BSWAP32(m, *(uint32_t*) q);
+ break;
+
+ case SD_BUS_TYPE_INT64:
+ case SD_BUS_TYPE_UINT64:
+ case SD_BUS_TYPE_DOUBLE:
+ if (p)
+ *(uint64_t*) p = BUS_MESSAGE_BSWAP64(m, *(uint64_t*) q);
+ break;
+
+ case SD_BUS_TYPE_UNIX_FD: {
+ uint32_t j;
+
+ j = BUS_MESSAGE_BSWAP32(m, *(uint32_t*) q);
+ if (j >= m->n_fds)
+ return -EBADMSG;
+
+ if (p)
+ *(int*) p = m->fds[j];
+ break;
+ }
+
+ default:
+ assert_not_reached();
+ }
+ }
+
+ m->rindex = rindex;
+
+ if (c->enclosing != SD_BUS_TYPE_ARRAY)
+ c->index++;
+
+ return 1;
+}
+
+static int bus_message_enter_array(
+ sd_bus_message *m,
+ struct bus_container *c,
+ const char *contents,
+ uint32_t **array_size) {
+
+ size_t rindex;
+ void *q;
+ int alignment, r;
+
+ assert(m);
+ assert(c);
+ assert(contents);
+ assert(array_size);
+
+ if (!signature_is_single(contents, true))
+ return -EINVAL;
+
+ if (!c->signature || c->signature[c->index] == 0)
+ return -ENXIO;
+
+ if (c->signature[c->index] != SD_BUS_TYPE_ARRAY)
+ return -ENXIO;
+
+ if (!startswith(c->signature + c->index + 1, contents))
+ return -ENXIO;
+
+ rindex = m->rindex;
+
+ r = message_peek_body(m, &rindex, 4, 4, &q);
+ if (r < 0)
+ return r;
+
+ if (BUS_MESSAGE_BSWAP32(m, *(uint32_t*) q) > BUS_ARRAY_MAX_SIZE)
+ return -EBADMSG;
+
+ alignment = bus_type_get_alignment(contents[0]);
+ if (alignment < 0)
+ return alignment;
+
+ r = message_peek_body(m, &rindex, alignment, 0, NULL);
+ if (r < 0)
+ return r;
+
+ *array_size = (uint32_t*) q;
+
+ m->rindex = rindex;
+
+ if (c->enclosing != SD_BUS_TYPE_ARRAY)
+ c->index += 1 + strlen(contents);
+
+ return 1;
+}
+
+static int bus_message_enter_variant(
+ sd_bus_message *m,
+ struct bus_container *c,
+ const char *contents) {
+
+ size_t rindex;
+ uint8_t l;
+ void *q;
+ int r;
+
+ assert(m);
+ assert(c);
+ assert(contents);
+
+ if (!signature_is_single(contents, false))
+ return -EINVAL;
+
+ if (*contents == SD_BUS_TYPE_DICT_ENTRY_BEGIN)
+ return -EINVAL;
+
+ if (!c->signature || c->signature[c->index] == 0)
+ return -ENXIO;
+
+ if (c->signature[c->index] != SD_BUS_TYPE_VARIANT)
+ return -ENXIO;
+
+ rindex = m->rindex;
+
+ r = message_peek_body(m, &rindex, 1, 1, &q);
+ if (r < 0)
+ return r;
+
+ l = *(uint8_t*) q;
+ if (l == UINT8_MAX)
+ /* avoid overflow right below */
+ return -EBADMSG;
+
+ r = message_peek_body(m, &rindex, 1, l+1, &q);
+ if (r < 0)
+ return r;
+
+ if (!validate_signature(q, l))
+ return -EBADMSG;
+
+ if (!streq(q, contents))
+ return -ENXIO;
+
+ m->rindex = rindex;
+
+ if (c->enclosing != SD_BUS_TYPE_ARRAY)
+ c->index++;
+
+ return 1;
+}
+
+static int bus_message_enter_struct(
+ sd_bus_message *m,
+ struct bus_container *c,
+ const char *contents) {
+
+ size_t l;
+ int r;
+
+ assert(m);
+ assert(c);
+ assert(contents);
+
+ if (!signature_is_valid(contents, false))
+ return -EINVAL;
+
+ if (!c->signature || c->signature[c->index] == 0)
+ return -ENXIO;
+
+ l = strlen(contents);
+
+ if (c->signature[c->index] != SD_BUS_TYPE_STRUCT_BEGIN ||
+ !startswith(c->signature + c->index + 1, contents) ||
+ c->signature[c->index + 1 + l] != SD_BUS_TYPE_STRUCT_END)
+ return -ENXIO;
+
+ r = message_peek_body(m, &m->rindex, 8, 0, NULL);
+ if (r < 0)
+ return r;
+
+ if (c->enclosing != SD_BUS_TYPE_ARRAY)
+ c->index += 1 + l + 1;
+
+ return 1;
+}
+
+static int bus_message_enter_dict_entry(
+ sd_bus_message *m,
+ struct bus_container *c,
+ const char *contents) {
+
+ size_t l;
+ int r;
+
+ assert(m);
+ assert(c);
+ assert(contents);
+
+ if (!signature_is_pair(contents))
+ return -EINVAL;
+
+ if (c->enclosing != SD_BUS_TYPE_ARRAY)
+ return -ENXIO;
+
+ if (!c->signature || c->signature[c->index] == 0)
+ return 0;
+
+ l = strlen(contents);
+
+ if (c->signature[c->index] != SD_BUS_TYPE_DICT_ENTRY_BEGIN ||
+ !startswith(c->signature + c->index + 1, contents) ||
+ c->signature[c->index + 1 + l] != SD_BUS_TYPE_DICT_ENTRY_END)
+ return -ENXIO;
+
+ r = message_peek_body(m, &m->rindex, 8, 0, NULL);
+ if (r < 0)
+ return r;
+
+ if (c->enclosing != SD_BUS_TYPE_ARRAY)
+ c->index += 1 + l + 1;
+
+ return 1;
+}
+
+_public_ int sd_bus_message_enter_container(sd_bus_message *m,
+ char type,
+ const char *contents) {
+ struct bus_container *c;
+ uint32_t *array_size = NULL;
+ _cleanup_free_ char *signature = NULL;
+ size_t before;
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->sealed, -EPERM);
+ assert_return(type != 0 || !contents, -EINVAL);
+
+ if (type == 0 || !contents) {
+ const char *cc;
+ char tt;
+
+ /* Allow entering into anonymous containers */
+ r = sd_bus_message_peek_type(m, &tt, &cc);
+ if (r < 0)
+ return r;
+
+ if (type != 0 && type != tt)
+ return -ENXIO;
+
+ if (contents && !streq(contents, cc))
+ return -ENXIO;
+
+ type = tt;
+ contents = cc;
+ }
+
+ /*
+ * We enforce a global limit on container depth, that is much
+ * higher than the 32 structs and 32 arrays the specification
+ * mandates. This is simpler to implement for us, and we need
+ * this only to ensure our container array doesn't grow
+ * without bounds. We are happy to return any data from a
+ * message as long as the data itself is valid, even if the
+ * overall message might be not.
+ *
+ * Note that the message signature is validated when
+ * parsing the headers, and that validation does check the
+ * 32/32 limit.
+ *
+ * Note that the specification defines no limits on the depth
+ * of stacked variants, but we do.
+ */
+ if (m->n_containers >= BUS_CONTAINER_DEPTH)
+ return -EBADMSG;
+
+ if (!GREEDY_REALLOC(m->containers, m->n_containers + 1))
+ return -ENOMEM;
+
+ if (message_end_of_signature(m))
+ return -ENXIO;
+
+ if (message_end_of_array(m, m->rindex))
+ return 0;
+
+ c = message_get_last_container(m);
+
+ signature = strdup(contents);
+ if (!signature)
+ return -ENOMEM;
+
+ c->saved_index = c->index;
+ before = m->rindex;
+
+ if (type == SD_BUS_TYPE_ARRAY)
+ r = bus_message_enter_array(m, c, contents, &array_size);
+ else if (type == SD_BUS_TYPE_VARIANT)
+ r = bus_message_enter_variant(m, c, contents);
+ else if (type == SD_BUS_TYPE_STRUCT)
+ r = bus_message_enter_struct(m, c, contents);
+ else if (type == SD_BUS_TYPE_DICT_ENTRY)
+ r = bus_message_enter_dict_entry(m, c, contents);
+ else
+ r = -EINVAL;
+ if (r <= 0)
+ return r;
+
+ /* OK, let's fill it in */
+ m->containers[m->n_containers++] = (struct bus_container) {
+ .enclosing = type,
+ .signature = TAKE_PTR(signature),
+
+ .before = before,
+ .begin = m->rindex,
+ /* Unary type has fixed size of 1, but virtual size of 0 */
+ .end = m->rindex,
+ .array_size = array_size,
+ };
+
+ return 1;
+}
+
+_public_ int sd_bus_message_exit_container(sd_bus_message *m) {
+ struct bus_container *c;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->sealed, -EPERM);
+ assert_return(m->n_containers > 0, -ENXIO);
+
+ c = message_get_last_container(m);
+
+ if (c->enclosing != SD_BUS_TYPE_ARRAY) {
+ if (c->signature && c->signature[c->index] != 0)
+ return -EBUSY;
+ }
+
+ if (c->enclosing == SD_BUS_TYPE_ARRAY) {
+ uint32_t l;
+
+ l = BUS_MESSAGE_BSWAP32(m, *c->array_size);
+ if (c->begin + l != m->rindex)
+ return -EBUSY;
+ }
+
+ message_free_last_container(m);
+
+ return 1;
+}
+
+static void message_quit_container(sd_bus_message *m) {
+ struct bus_container *c;
+
+ assert(m);
+ assert(m->sealed);
+ assert(m->n_containers > 0);
+
+ /* Undo seeks */
+ c = message_get_last_container(m);
+ assert(m->rindex >= c->before);
+ m->rindex = c->before;
+
+ /* Free container */
+ message_free_last_container(m);
+
+ /* Correct index of new top-level container */
+ c = message_get_last_container(m);
+ c->index = c->saved_index;
+}
+
+_public_ int sd_bus_message_peek_type(sd_bus_message *m, char *type, const char **contents) {
+ struct bus_container *c;
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->sealed, -EPERM);
+
+ if (message_end_of_signature(m))
+ goto eof;
+
+ if (message_end_of_array(m, m->rindex))
+ goto eof;
+
+ c = message_get_last_container(m);
+
+ if (bus_type_is_basic(c->signature[c->index])) {
+ if (contents)
+ *contents = NULL;
+ if (type)
+ *type = c->signature[c->index];
+ return 1;
+ }
+
+ if (c->signature[c->index] == SD_BUS_TYPE_ARRAY) {
+
+ if (contents) {
+ size_t l;
+
+ r = signature_element_length(c->signature+c->index+1, &l);
+ if (r < 0)
+ return r;
+
+ /* signature_element_length does verification internally */
+
+ /* The array element must not be empty */
+ assert(l >= 1);
+ if (free_and_strndup(&c->peeked_signature,
+ c->signature + c->index + 1, l) < 0)
+ return -ENOMEM;
+
+ *contents = c->peeked_signature;
+ }
+
+ if (type)
+ *type = SD_BUS_TYPE_ARRAY;
+
+ return 1;
+ }
+
+ if (IN_SET(c->signature[c->index], SD_BUS_TYPE_STRUCT_BEGIN, SD_BUS_TYPE_DICT_ENTRY_BEGIN)) {
+
+ if (contents) {
+ size_t l;
+
+ r = signature_element_length(c->signature+c->index, &l);
+ if (r < 0)
+ return r;
+
+ assert(l >= 3);
+ if (free_and_strndup(&c->peeked_signature,
+ c->signature + c->index + 1, l - 2) < 0)
+ return -ENOMEM;
+
+ *contents = c->peeked_signature;
+ }
+
+ if (type)
+ *type = c->signature[c->index] == SD_BUS_TYPE_STRUCT_BEGIN ? SD_BUS_TYPE_STRUCT : SD_BUS_TYPE_DICT_ENTRY;
+
+ return 1;
+ }
+
+ if (c->signature[c->index] == SD_BUS_TYPE_VARIANT) {
+ if (contents) {
+ size_t rindex, l;
+ void *q;
+
+ rindex = m->rindex;
+ r = message_peek_body(m, &rindex, 1, 1, &q);
+ if (r < 0)
+ return r;
+
+ l = *(uint8_t*) q;
+ if (l == UINT8_MAX)
+ /* avoid overflow right below */
+ return -EBADMSG;
+
+ r = message_peek_body(m, &rindex, 1, l+1, &q);
+ if (r < 0)
+ return r;
+
+ if (!validate_signature(q, l))
+ return -EBADMSG;
+
+ *contents = q;
+ }
+
+ if (type)
+ *type = SD_BUS_TYPE_VARIANT;
+
+ return 1;
+ }
+
+ return -EINVAL;
+
+eof:
+ if (type)
+ *type = 0;
+ if (contents)
+ *contents = NULL;
+ return 0;
+}
+
+_public_ int sd_bus_message_rewind(sd_bus_message *m, int complete) {
+ struct bus_container *c;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->sealed, -EPERM);
+
+ if (complete) {
+ message_reset_containers(m);
+ m->rindex = 0;
+
+ c = message_get_last_container(m);
+ } else {
+ c = message_get_last_container(m);
+
+ c->index = 0;
+ m->rindex = c->begin;
+ }
+
+ return !isempty(c->signature);
+}
+
+_public_ int sd_bus_message_readv(
+ sd_bus_message *m,
+ const char *types,
+ va_list ap) {
+
+ unsigned n_array, n_struct;
+ TypeStack stack[BUS_CONTAINER_DEPTH];
+ unsigned stack_ptr = 0;
+ unsigned n_loop = 0;
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->sealed, -EPERM);
+ assert_return(types, -EINVAL);
+
+ if (isempty(types))
+ return 0;
+
+ /* Ideally, we'd just call ourselves recursively on every
+ * complex type. However, the state of a va_list that is
+ * passed to a function is undefined after that function
+ * returns. This means we need to decode the va_list linearly
+ * in a single stackframe. We hence implement our own
+ * home-grown stack in an array. */
+
+ n_array = UINT_MAX; /* length of current array entries */
+ n_struct = strlen(types); /* length of current struct contents signature */
+
+ for (;;) {
+ const char *t;
+
+ n_loop++;
+
+ if (n_array == 0 || (n_array == UINT_MAX && n_struct == 0)) {
+ r = type_stack_pop(stack, ELEMENTSOF(stack), &stack_ptr, &types, &n_struct, &n_array);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ r = sd_bus_message_exit_container(m);
+ if (r < 0)
+ return r;
+
+ continue;
+ }
+
+ t = types;
+ if (n_array != UINT_MAX)
+ n_array--;
+ else {
+ types++;
+ n_struct--;
+ }
+
+ switch (*t) {
+
+ case SD_BUS_TYPE_BYTE:
+ case SD_BUS_TYPE_BOOLEAN:
+ case SD_BUS_TYPE_INT16:
+ case SD_BUS_TYPE_UINT16:
+ case SD_BUS_TYPE_INT32:
+ case SD_BUS_TYPE_UINT32:
+ case SD_BUS_TYPE_INT64:
+ case SD_BUS_TYPE_UINT64:
+ case SD_BUS_TYPE_DOUBLE:
+ case SD_BUS_TYPE_STRING:
+ case SD_BUS_TYPE_OBJECT_PATH:
+ case SD_BUS_TYPE_SIGNATURE:
+ case SD_BUS_TYPE_UNIX_FD: {
+ void *p;
+
+ p = va_arg(ap, void*);
+ r = sd_bus_message_read_basic(m, *t, p);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ if (n_loop <= 1)
+ return 0;
+
+ return -ENXIO;
+ }
+
+ break;
+ }
+
+ case SD_BUS_TYPE_ARRAY: {
+ size_t k;
+
+ r = signature_element_length(t + 1, &k);
+ if (r < 0)
+ return r;
+
+ {
+ char s[k + 1];
+ memcpy(s, t + 1, k);
+ s[k] = 0;
+
+ r = sd_bus_message_enter_container(m, SD_BUS_TYPE_ARRAY, s);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ if (n_loop <= 1)
+ return 0;
+
+ return -ENXIO;
+ }
+ }
+
+ if (n_array == UINT_MAX) {
+ types += k;
+ n_struct -= k;
+ }
+
+ r = type_stack_push(stack, ELEMENTSOF(stack), &stack_ptr, types, n_struct, n_array);
+ if (r < 0)
+ return r;
+
+ types = t + 1;
+ n_struct = k;
+ n_array = va_arg(ap, unsigned);
+
+ break;
+ }
+
+ case SD_BUS_TYPE_VARIANT: {
+ const char *s;
+
+ s = va_arg(ap, const char *);
+ if (!s)
+ return -EINVAL;
+
+ r = sd_bus_message_enter_container(m, SD_BUS_TYPE_VARIANT, s);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ if (n_loop <= 1)
+ return 0;
+
+ return -ENXIO;
+ }
+
+ r = type_stack_push(stack, ELEMENTSOF(stack), &stack_ptr, types, n_struct, n_array);
+ if (r < 0)
+ return r;
+
+ types = s;
+ n_struct = strlen(s);
+ n_array = UINT_MAX;
+
+ break;
+ }
+
+ case SD_BUS_TYPE_STRUCT_BEGIN:
+ case SD_BUS_TYPE_DICT_ENTRY_BEGIN: {
+ size_t k;
+
+ r = signature_element_length(t, &k);
+ if (r < 0)
+ return r;
+ if (k < 2)
+ return -ERANGE;
+
+ {
+ char s[k - 1];
+ memcpy(s, t + 1, k - 2);
+ s[k - 2] = 0;
+
+ r = sd_bus_message_enter_container(m, *t == SD_BUS_TYPE_STRUCT_BEGIN ? SD_BUS_TYPE_STRUCT : SD_BUS_TYPE_DICT_ENTRY, s);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ if (n_loop <= 1)
+ return 0;
+ return -ENXIO;
+ }
+ }
+
+ if (n_array == UINT_MAX) {
+ types += k - 1;
+ n_struct -= k - 1;
+ }
+
+ r = type_stack_push(stack, ELEMENTSOF(stack), &stack_ptr, types, n_struct, n_array);
+ if (r < 0)
+ return r;
+
+ types = t + 1;
+ n_struct = k - 2;
+ n_array = UINT_MAX;
+
+ break;
+ }
+
+ default:
+ return -EINVAL;
+ }
+ }
+
+ return 1;
+}
+
+_public_ int sd_bus_message_read(sd_bus_message *m, const char *types, ...) {
+ va_list ap;
+ int r;
+
+ va_start(ap, types);
+ r = sd_bus_message_readv(m, types, ap);
+ va_end(ap);
+
+ return r;
+}
+
+_public_ int sd_bus_message_skip(sd_bus_message *m, const char *types) {
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->sealed, -EPERM);
+
+ /* If types is NULL, read exactly one element */
+ if (!types) {
+ struct bus_container *c;
+ size_t l;
+
+ if (message_end_of_signature(m))
+ return -ENXIO;
+
+ if (message_end_of_array(m, m->rindex))
+ return 0;
+
+ c = message_get_last_container(m);
+
+ r = signature_element_length(c->signature + c->index, &l);
+ if (r < 0)
+ return r;
+
+ types = strndupa_safe(c->signature + c->index, l);
+ }
+
+ switch (*types) {
+
+ case 0: /* Nothing to drop */
+ return 0;
+
+ case SD_BUS_TYPE_BYTE:
+ case SD_BUS_TYPE_BOOLEAN:
+ case SD_BUS_TYPE_INT16:
+ case SD_BUS_TYPE_UINT16:
+ case SD_BUS_TYPE_INT32:
+ case SD_BUS_TYPE_UINT32:
+ case SD_BUS_TYPE_INT64:
+ case SD_BUS_TYPE_UINT64:
+ case SD_BUS_TYPE_DOUBLE:
+ case SD_BUS_TYPE_STRING:
+ case SD_BUS_TYPE_OBJECT_PATH:
+ case SD_BUS_TYPE_SIGNATURE:
+ case SD_BUS_TYPE_UNIX_FD:
+
+ r = sd_bus_message_read_basic(m, *types, NULL);
+ if (r <= 0)
+ return r;
+
+ r = sd_bus_message_skip(m, types + 1);
+ if (r < 0)
+ return r;
+
+ return 1;
+
+ case SD_BUS_TYPE_ARRAY: {
+ size_t k;
+
+ r = signature_element_length(types + 1, &k);
+ if (r < 0)
+ return r;
+
+ {
+ char s[k+1];
+ memcpy(s, types+1, k);
+ s[k] = 0;
+
+ r = sd_bus_message_enter_container(m, SD_BUS_TYPE_ARRAY, s);
+ if (r <= 0)
+ return r;
+
+ for (;;) {
+ r = sd_bus_message_skip(m, s);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+ }
+
+ r = sd_bus_message_exit_container(m);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_bus_message_skip(m, types + 1 + k);
+ if (r < 0)
+ return r;
+
+ return 1;
+ }
+
+ case SD_BUS_TYPE_VARIANT: {
+ const char *contents;
+ char x;
+
+ r = sd_bus_message_peek_type(m, &x, &contents);
+ if (r <= 0)
+ return r;
+
+ if (x != SD_BUS_TYPE_VARIANT)
+ return -ENXIO;
+
+ r = sd_bus_message_enter_container(m, SD_BUS_TYPE_VARIANT, contents);
+ if (r <= 0)
+ return r;
+
+ r = sd_bus_message_skip(m, contents);
+ if (r < 0)
+ return r;
+ assert(r != 0);
+
+ r = sd_bus_message_exit_container(m);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_skip(m, types + 1);
+ if (r < 0)
+ return r;
+
+ return 1;
+ }
+
+ case SD_BUS_TYPE_STRUCT_BEGIN:
+ case SD_BUS_TYPE_DICT_ENTRY_BEGIN: {
+ size_t k;
+
+ r = signature_element_length(types, &k);
+ if (r < 0)
+ return r;
+ if (k < 2)
+ return -ERANGE;
+
+ {
+ char s[k-1];
+ memcpy(s, types+1, k-2);
+ s[k-2] = 0;
+
+ r = sd_bus_message_enter_container(m, *types == SD_BUS_TYPE_STRUCT_BEGIN ? SD_BUS_TYPE_STRUCT : SD_BUS_TYPE_DICT_ENTRY, s);
+ if (r <= 0)
+ return r;
+
+ r = sd_bus_message_skip(m, s);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_exit_container(m);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_bus_message_skip(m, types + k);
+ if (r < 0)
+ return r;
+
+ return 1;
+ }
+
+ default:
+ return -EINVAL;
+ }
+}
+
+_public_ int sd_bus_message_read_array(
+ sd_bus_message *m,
+ char type,
+ const void **ptr,
+ size_t *size) {
+
+ struct bus_container *c;
+ void *p;
+ size_t sz;
+ ssize_t align;
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->sealed, -EPERM);
+ assert_return(bus_type_is_trivial(type), -EINVAL);
+ assert_return(ptr, -EINVAL);
+ assert_return(size, -EINVAL);
+ assert_return(!BUS_MESSAGE_NEED_BSWAP(m), -EOPNOTSUPP);
+
+ r = sd_bus_message_enter_container(m, SD_BUS_TYPE_ARRAY, CHAR_TO_STR(type));
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ *ptr = NULL;
+ *size = 0;
+ return 0;
+ }
+
+ c = message_get_last_container(m);
+
+ align = bus_type_get_alignment(type);
+ if (align < 0)
+ return align;
+
+ sz = BUS_MESSAGE_BSWAP32(m, *c->array_size);
+
+ if (sz == 0)
+ /* Zero length array, let's return some aligned
+ * pointer that is not NULL */
+ p = (uint8_t*) align;
+ else {
+ r = message_peek_body(m, &m->rindex, align, sz, &p);
+ if (r < 0)
+ goto fail;
+ }
+
+ r = sd_bus_message_exit_container(m);
+ if (r < 0)
+ goto fail;
+
+ *ptr = (const void*) p;
+ *size = sz;
+
+ return 1;
+
+fail:
+ message_quit_container(m);
+ return r;
+}
+
+static int message_peek_fields(
+ sd_bus_message *m,
+ size_t *rindex,
+ size_t align,
+ size_t nbytes,
+ void **ret) {
+
+ size_t start, end;
+
+ assert(m);
+ assert(rindex);
+ assert(align > 0);
+
+ start = ALIGN_TO(*rindex, align);
+ if (start > m->fields_size)
+ return -EBADMSG;
+
+ /* Avoid overflow below */
+ if (nbytes > SIZE_MAX - start)
+ return -EBADMSG;
+
+ end = start + nbytes;
+ if (end > m->fields_size)
+ return -EBADMSG;
+
+ /* Verify that padding is 0 */
+ uint8_t *p = BUS_MESSAGE_FIELDS(m);
+ for (size_t k = *rindex; k < start; k++)
+ if (p[k] != 0)
+ return -EBADMSG;
+
+ if (ret)
+ *ret = p + start;
+
+ *rindex = end;
+ return 1;
+}
+
+static int message_peek_field_uint32(
+ sd_bus_message *m,
+ size_t *ri,
+ size_t item_size,
+ uint32_t *ret) {
+
+ int r;
+ void *q;
+
+ assert(m);
+ assert(ri);
+
+ r = message_peek_fields(m, ri, 4, 4, &q);
+ if (r < 0)
+ return r;
+
+ if (ret)
+ *ret = BUS_MESSAGE_BSWAP32(m, *(uint32_t*) q);
+
+ return 0;
+}
+
+static int message_peek_field_string(
+ sd_bus_message *m,
+ bool (*validate)(const char *p),
+ size_t *ri,
+ size_t item_size,
+ const char **ret) {
+
+ uint32_t l;
+ int r;
+ void *q;
+
+ assert(m);
+ assert(ri);
+
+ r = message_peek_field_uint32(m, ri, 4, &l);
+ if (r < 0)
+ return r;
+
+ if (l == UINT32_MAX)
+ /* avoid overflow right below */
+ return -EBADMSG;
+
+ r = message_peek_fields(m, ri, 1, l+1, &q);
+ if (r < 0)
+ return r;
+
+ if (validate) {
+ if (!validate_nul(q, l))
+ return -EBADMSG;
+
+ if (!validate(q))
+ return -EBADMSG;
+ } else {
+ if (!validate_string(q, l))
+ return -EBADMSG;
+ }
+
+ if (ret)
+ *ret = q;
+
+ return 0;
+}
+
+static int message_peek_field_signature(
+ sd_bus_message *m,
+ size_t *ri,
+ size_t item_size,
+ const char **ret) {
+
+ size_t l;
+ int r;
+ void *q;
+
+ assert(m);
+ assert(ri);
+
+ r = message_peek_fields(m, ri, 1, 1, &q);
+ if (r < 0)
+ return r;
+
+ l = *(uint8_t*) q;
+ if (l == UINT8_MAX)
+ /* avoid overflow right below */
+ return -EBADMSG;
+
+ r = message_peek_fields(m, ri, 1, l+1, &q);
+ if (r < 0)
+ return r;
+
+ if (!validate_signature(q, l))
+ return -EBADMSG;
+
+ if (ret)
+ *ret = q;
+
+ return 0;
+}
+
+static int message_skip_fields(
+ sd_bus_message *m,
+ size_t *ri,
+ uint32_t array_size,
+ const char **signature) {
+
+ size_t original_index;
+ int r;
+
+ assert(m);
+ assert(ri);
+ assert(signature);
+
+ original_index = *ri;
+
+ for (;;) {
+ char t;
+ size_t l;
+
+ if (array_size != UINT32_MAX &&
+ array_size <= *ri - original_index)
+ return 0;
+
+ t = **signature;
+ if (!t)
+ return 0;
+
+ if (t == SD_BUS_TYPE_STRING) {
+
+ r = message_peek_field_string(m, NULL, ri, 0, NULL);
+ if (r < 0)
+ return r;
+
+ (*signature)++;
+
+ } else if (t == SD_BUS_TYPE_OBJECT_PATH) {
+
+ r = message_peek_field_string(m, object_path_is_valid, ri, 0, NULL);
+ if (r < 0)
+ return r;
+
+ (*signature)++;
+
+ } else if (t == SD_BUS_TYPE_SIGNATURE) {
+
+ r = message_peek_field_signature(m, ri, 0, NULL);
+ if (r < 0)
+ return r;
+
+ (*signature)++;
+
+ } else if (bus_type_is_basic(t)) {
+ ssize_t align, k;
+
+ align = bus_type_get_alignment(t);
+ k = bus_type_get_size(t);
+ assert(align > 0 && k > 0);
+
+ r = message_peek_fields(m, ri, align, k, NULL);
+ if (r < 0)
+ return r;
+
+ (*signature)++;
+
+ } else if (t == SD_BUS_TYPE_ARRAY) {
+
+ r = signature_element_length(*signature + 1, &l);
+ if (r < 0)
+ return r;
+
+ assert(l >= 1);
+ {
+ char sig[l + 1], *s = sig;
+ uint32_t nas;
+ int alignment;
+
+ strncpy(sig, *signature + 1, l);
+ sig[l] = '\0';
+
+ alignment = bus_type_get_alignment(sig[0]);
+ if (alignment < 0)
+ return alignment;
+
+ r = message_peek_field_uint32(m, ri, 0, &nas);
+ if (r < 0)
+ return r;
+ if (nas > BUS_ARRAY_MAX_SIZE)
+ return -EBADMSG;
+
+ r = message_peek_fields(m, ri, alignment, 0, NULL);
+ if (r < 0)
+ return r;
+
+ r = message_skip_fields(m, ri, nas, (const char**) &s);
+ if (r < 0)
+ return r;
+ }
+
+ (*signature) += 1 + l;
+
+ } else if (t == SD_BUS_TYPE_VARIANT) {
+ const char *s;
+
+ r = message_peek_field_signature(m, ri, 0, &s);
+ if (r < 0)
+ return r;
+
+ r = message_skip_fields(m, ri, UINT32_MAX, (const char**) &s);
+ if (r < 0)
+ return r;
+
+ (*signature)++;
+
+ } else if (IN_SET(t, SD_BUS_TYPE_STRUCT, SD_BUS_TYPE_DICT_ENTRY)) {
+
+ r = signature_element_length(*signature, &l);
+ if (r < 0)
+ return r;
+
+ assert(l >= 2);
+ {
+ char sig[l + 1], *s = sig;
+ strncpy(sig, *signature + 1, l);
+ sig[l] = '\0';
+
+ r = message_skip_fields(m, ri, UINT32_MAX, (const char**) &s);
+ if (r < 0)
+ return r;
+ }
+
+ *signature += l;
+ } else
+ return -EBADMSG;
+ }
+}
+
+static int message_parse_fields(sd_bus_message *m) {
+ uint32_t unix_fds = 0;
+ bool unix_fds_set = false;
+ int r;
+
+ assert(m);
+
+ m->user_body_size = m->body_size;
+
+ for (size_t ri = 0; ri < m->fields_size; ) {
+ const char *signature;
+ uint64_t field_type;
+ size_t item_size = SIZE_MAX;
+ uint8_t *u8;
+
+ r = message_peek_fields(m, &ri, 8, 1, (void**) &u8);
+ if (r < 0)
+ return r;
+
+ field_type = *u8;
+
+ r = message_peek_field_signature(m, &ri, 0, &signature);
+ if (r < 0)
+ return r;
+
+ switch (field_type) {
+
+ case _BUS_MESSAGE_HEADER_INVALID:
+ return -EBADMSG;
+
+ case BUS_MESSAGE_HEADER_PATH:
+
+ if (m->path)
+ return -EBADMSG;
+
+ if (!streq(signature, "o"))
+ return -EBADMSG;
+
+ r = message_peek_field_string(m, object_path_is_valid, &ri, item_size, &m->path);
+ break;
+
+ case BUS_MESSAGE_HEADER_INTERFACE:
+
+ if (m->interface)
+ return -EBADMSG;
+
+ if (!streq(signature, "s"))
+ return -EBADMSG;
+
+ r = message_peek_field_string(m, interface_name_is_valid, &ri, item_size, &m->interface);
+ break;
+
+ case BUS_MESSAGE_HEADER_MEMBER:
+
+ if (m->member)
+ return -EBADMSG;
+
+ if (!streq(signature, "s"))
+ return -EBADMSG;
+
+ r = message_peek_field_string(m, member_name_is_valid, &ri, item_size, &m->member);
+ break;
+
+ case BUS_MESSAGE_HEADER_ERROR_NAME:
+
+ if (m->error.name)
+ return -EBADMSG;
+
+ if (!streq(signature, "s"))
+ return -EBADMSG;
+
+ r = message_peek_field_string(m, error_name_is_valid, &ri, item_size, &m->error.name);
+ if (r >= 0)
+ m->error._need_free = -1;
+
+ break;
+
+ case BUS_MESSAGE_HEADER_DESTINATION:
+
+ if (m->destination)
+ return -EBADMSG;
+
+ if (!streq(signature, "s"))
+ return -EBADMSG;
+
+ r = message_peek_field_string(m, service_name_is_valid, &ri, item_size, &m->destination);
+ break;
+
+ case BUS_MESSAGE_HEADER_SENDER:
+
+ if (m->sender)
+ return -EBADMSG;
+
+ if (!streq(signature, "s"))
+ return -EBADMSG;
+
+ r = message_peek_field_string(m, service_name_is_valid, &ri, item_size, &m->sender);
+
+ if (r >= 0 && m->sender[0] == ':' && m->bus->bus_client) {
+ m->creds.unique_name = (char*) m->sender;
+ m->creds.mask |= SD_BUS_CREDS_UNIQUE_NAME & m->bus->creds_mask;
+ }
+
+ break;
+
+ case BUS_MESSAGE_HEADER_SIGNATURE: {
+ const char *s;
+ char *c;
+
+ if (m->root_container.signature)
+ return -EBADMSG;
+
+ if (!streq(signature, "g"))
+ return -EBADMSG;
+
+ r = message_peek_field_signature(m, &ri, item_size, &s);
+ if (r < 0)
+ return r;
+
+ c = strdup(s);
+ if (!c)
+ return -ENOMEM;
+
+ free_and_replace(m->root_container.signature, c);
+ break;
+ }
+
+ case BUS_MESSAGE_HEADER_REPLY_SERIAL: {
+ uint32_t serial;
+
+ if (m->reply_cookie != 0)
+ return -EBADMSG;
+
+
+ if (!streq(signature, "u"))
+ return -EBADMSG;
+
+ r = message_peek_field_uint32(m, &ri, item_size, &serial);
+ if (r < 0)
+ return r;
+
+ m->reply_cookie = serial;
+
+ if (m->reply_cookie == 0)
+ return -EBADMSG;
+
+ break;
+ }
+ case BUS_MESSAGE_HEADER_UNIX_FDS:
+ if (unix_fds_set)
+ return -EBADMSG;
+
+ if (!streq(signature, "u"))
+ return -EBADMSG;
+
+ r = message_peek_field_uint32(m, &ri, item_size, &unix_fds);
+ if (r < 0)
+ return -EBADMSG;
+
+ unix_fds_set = true;
+ break;
+
+ default:
+ r = message_skip_fields(m, &ri, UINT32_MAX, (const char **) &signature);
+ }
+ if (r < 0)
+ return r;
+ }
+
+ if (m->n_fds != unix_fds)
+ return -EBADMSG;
+
+ switch (m->header->type) {
+
+ case SD_BUS_MESSAGE_SIGNAL:
+ if (!m->path || !m->interface || !m->member)
+ return -EBADMSG;
+
+ if (m->reply_cookie != 0)
+ return -EBADMSG;
+
+ break;
+
+ case SD_BUS_MESSAGE_METHOD_CALL:
+
+ if (!m->path || !m->member)
+ return -EBADMSG;
+
+ if (m->reply_cookie != 0)
+ return -EBADMSG;
+
+ break;
+
+ case SD_BUS_MESSAGE_METHOD_RETURN:
+
+ if (m->reply_cookie == 0)
+ return -EBADMSG;
+ break;
+
+ case SD_BUS_MESSAGE_METHOD_ERROR:
+
+ if (m->reply_cookie == 0 || !m->error.name)
+ return -EBADMSG;
+ break;
+ }
+
+ /* Refuse non-local messages that claim they are local */
+ if (streq_ptr(m->path, "/org/freedesktop/DBus/Local"))
+ return -EBADMSG;
+ if (streq_ptr(m->interface, "org.freedesktop.DBus.Local"))
+ return -EBADMSG;
+ if (streq_ptr(m->sender, "org.freedesktop.DBus.Local"))
+ return -EBADMSG;
+
+ m->root_container.end = m->user_body_size;
+
+ /* Try to read the error message, but if we can't it's a non-issue */
+ if (m->header->type == SD_BUS_MESSAGE_METHOD_ERROR)
+ (void) sd_bus_message_read(m, "s", &m->error.message);
+
+ return 0;
+}
+
+_public_ int sd_bus_message_set_destination(sd_bus_message *m, const char *destination) {
+ assert_return(m, -EINVAL);
+ assert_return(destination, -EINVAL);
+ assert_return(service_name_is_valid(destination), -EINVAL);
+ assert_return(!m->sealed, -EPERM);
+ assert_return(!m->destination, -EEXIST);
+
+ return message_append_field_string(m, BUS_MESSAGE_HEADER_DESTINATION, SD_BUS_TYPE_STRING, destination, &m->destination);
+}
+
+_public_ int sd_bus_message_set_sender(sd_bus_message *m, const char *sender) {
+ assert_return(m, -EINVAL);
+ assert_return(sender, -EINVAL);
+ assert_return(service_name_is_valid(sender), -EINVAL);
+ assert_return(!m->sealed, -EPERM);
+ assert_return(!m->sender, -EEXIST);
+
+ return message_append_field_string(m, BUS_MESSAGE_HEADER_SENDER, SD_BUS_TYPE_STRING, sender, &m->sender);
+}
+
+int bus_message_get_blob(sd_bus_message *m, void **buffer, size_t *sz) {
+ size_t total;
+ void *p, *e;
+ size_t i;
+ struct bus_body_part *part;
+
+ assert(m);
+ assert(buffer);
+ assert(sz);
+
+ total = BUS_MESSAGE_SIZE(m);
+
+ p = malloc(total);
+ if (!p)
+ return -ENOMEM;
+
+ e = mempcpy(p, m->header, BUS_MESSAGE_BODY_BEGIN(m));
+ MESSAGE_FOREACH_PART(part, i, m)
+ e = mempcpy(e, part->data, part->size);
+
+ assert(total == (size_t) ((uint8_t*) e - (uint8_t*) p));
+
+ *buffer = p;
+ *sz = total;
+
+ return 0;
+}
+
+_public_ int sd_bus_message_read_strv_extend(sd_bus_message *m, char ***l) {
+ char type;
+ const char *contents, *s;
+ int r;
+
+ assert(m);
+ assert(l);
+
+ r = sd_bus_message_peek_type(m, &type, &contents);
+ if (r < 0)
+ return r;
+
+ if (type != SD_BUS_TYPE_ARRAY || !STR_IN_SET(contents, "s", "o", "g"))
+ return -ENXIO;
+
+ r = sd_bus_message_enter_container(m, 'a', NULL);
+ if (r <= 0)
+ return r;
+
+ /* sd_bus_message_read_basic() does content validation for us. */
+ while ((r = sd_bus_message_read_basic(m, *contents, &s)) > 0) {
+ r = strv_extend(l, s);
+ if (r < 0)
+ return r;
+ }
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_exit_container(m);
+ if (r < 0)
+ return r;
+
+ return 1;
+}
+
+_public_ int sd_bus_message_read_strv(sd_bus_message *m, char ***l) {
+ _cleanup_strv_free_ char **strv = NULL;
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->sealed, -EPERM);
+ assert_return(l, -EINVAL);
+
+ r = sd_bus_message_read_strv_extend(m, &strv);
+ if (r <= 0)
+ return r;
+
+ *l = TAKE_PTR(strv);
+ return 1;
+}
+
+static int bus_message_get_arg_skip(
+ sd_bus_message *m,
+ unsigned i,
+ char *_type,
+ const char **_contents) {
+
+ unsigned j;
+ int r;
+
+ r = sd_bus_message_rewind(m, true);
+ if (r < 0)
+ return r;
+
+ for (j = 0;; j++) {
+ const char *contents;
+ char type;
+
+ r = sd_bus_message_peek_type(m, &type, &contents);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -ENXIO;
+
+ /* Don't match against arguments after the first one we don't understand */
+ if (!IN_SET(type, SD_BUS_TYPE_STRING, SD_BUS_TYPE_OBJECT_PATH, SD_BUS_TYPE_SIGNATURE) &&
+ !(type == SD_BUS_TYPE_ARRAY && STR_IN_SET(contents, "s", "o", "g")))
+ return -ENXIO;
+
+ if (j >= i) {
+ if (_contents)
+ *_contents = contents;
+ if (_type)
+ *_type = type;
+ return 0;
+ }
+
+ r = sd_bus_message_skip(m, NULL);
+ if (r < 0)
+ return r;
+ }
+
+}
+
+int bus_message_get_arg(sd_bus_message *m, unsigned i, const char **str) {
+ char type;
+ int r;
+
+ assert(m);
+ assert(str);
+
+ r = bus_message_get_arg_skip(m, i, &type, NULL);
+ if (r < 0)
+ return r;
+
+ if (!IN_SET(type, SD_BUS_TYPE_STRING, SD_BUS_TYPE_OBJECT_PATH, SD_BUS_TYPE_SIGNATURE))
+ return -ENXIO;
+
+ return sd_bus_message_read_basic(m, type, str);
+}
+
+int bus_message_get_arg_strv(sd_bus_message *m, unsigned i, char ***strv) {
+ const char *contents;
+ char type;
+ int r;
+
+ assert(m);
+ assert(strv);
+
+ r = bus_message_get_arg_skip(m, i, &type, &contents);
+ if (r < 0)
+ return r;
+
+ if (type != SD_BUS_TYPE_ARRAY)
+ return -ENXIO;
+ if (!STR_IN_SET(contents, "s", "o", "g"))
+ return -ENXIO;
+
+ return sd_bus_message_read_strv(m, strv);
+}
+
+_public_ int sd_bus_message_get_errno(sd_bus_message *m) {
+ assert_return(m, EINVAL);
+
+ if (m->header->type != SD_BUS_MESSAGE_METHOD_ERROR)
+ return 0;
+
+ return sd_bus_error_get_errno(&m->error);
+}
+
+_public_ const char* sd_bus_message_get_signature(sd_bus_message *m, int complete) {
+ struct bus_container *c;
+
+ assert_return(m, NULL);
+
+ c = complete ? &m->root_container : message_get_last_container(m);
+ return strempty(c->signature);
+}
+
+_public_ int sd_bus_message_is_empty(sd_bus_message *m) {
+ assert_return(m, -EINVAL);
+
+ return isempty(m->root_container.signature);
+}
+
+_public_ int sd_bus_message_has_signature(sd_bus_message *m, const char *signature) {
+ assert_return(m, -EINVAL);
+
+ return streq(strempty(m->root_container.signature), strempty(signature));
+}
+
+_public_ int sd_bus_message_copy(sd_bus_message *m, sd_bus_message *source, int all) {
+ bool done_something = false;
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(source, -EINVAL);
+ assert_return(!m->sealed, -EPERM);
+ assert_return(source->sealed, -EPERM);
+
+ do {
+ const char *contents;
+ char type;
+ union {
+ uint8_t u8;
+ uint16_t u16;
+ int16_t s16;
+ uint32_t u32;
+ int32_t s32;
+ uint64_t u64;
+ int64_t s64;
+ double d64;
+ const char *string;
+ int i;
+ } basic;
+
+ r = sd_bus_message_peek_type(source, &type, &contents);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ done_something = true;
+
+ if (bus_type_is_container(type) > 0) {
+
+ r = sd_bus_message_enter_container(source, type, contents);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_open_container(m, type, contents);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_copy(m, source, true);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_exit_container(source);
+ if (r < 0)
+ return r;
+
+ continue;
+ }
+
+ r = sd_bus_message_read_basic(source, type, &basic);
+ if (r < 0)
+ return r;
+
+ assert(r > 0);
+
+ if (IN_SET(type, SD_BUS_TYPE_OBJECT_PATH, SD_BUS_TYPE_SIGNATURE, SD_BUS_TYPE_STRING))
+ r = sd_bus_message_append_basic(m, type, basic.string);
+ else
+ r = sd_bus_message_append_basic(m, type, &basic);
+
+ if (r < 0)
+ return r;
+
+ } while (all);
+
+ return done_something;
+}
+
+_public_ int sd_bus_message_verify_type(sd_bus_message *m, char type, const char *contents) {
+ const char *c;
+ char t;
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->sealed, -EPERM);
+ assert_return(!type || bus_type_is_valid(type), -EINVAL);
+ assert_return(!contents || signature_is_valid(contents, true), -EINVAL);
+ assert_return(type || contents, -EINVAL);
+ assert_return(!contents || !type || bus_type_is_container(type), -EINVAL);
+
+ r = sd_bus_message_peek_type(m, &t, &c);
+ if (r <= 0)
+ return r;
+
+ if (type != 0 && type != t)
+ return 0;
+
+ if (contents && !streq_ptr(contents, c))
+ return 0;
+
+ return 1;
+}
+
+_public_ sd_bus *sd_bus_message_get_bus(sd_bus_message *m) {
+ assert_return(m, NULL);
+
+ return m->bus;
+}
+
+int bus_message_remarshal(sd_bus *bus, sd_bus_message **m) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *n = NULL;
+ usec_t timeout;
+ int r;
+
+ assert(bus);
+ assert(m);
+ assert(*m);
+
+ switch ((*m)->header->type) {
+
+ case SD_BUS_MESSAGE_SIGNAL:
+ r = sd_bus_message_new_signal(bus, &n, (*m)->path, (*m)->interface, (*m)->member);
+ if (r < 0)
+ return r;
+
+ break;
+
+ case SD_BUS_MESSAGE_METHOD_CALL:
+ r = sd_bus_message_new_method_call(bus, &n, (*m)->destination, (*m)->path, (*m)->interface, (*m)->member);
+ if (r < 0)
+ return r;
+
+ break;
+
+ case SD_BUS_MESSAGE_METHOD_RETURN:
+ case SD_BUS_MESSAGE_METHOD_ERROR:
+
+ r = sd_bus_message_new(bus, &n, (*m)->header->type);
+ if (r < 0)
+ return -ENOMEM;
+
+ assert(n);
+
+ n->reply_cookie = (*m)->reply_cookie;
+
+ r = message_append_reply_cookie(n, n->reply_cookie);
+ if (r < 0)
+ return r;
+
+ if ((*m)->header->type == SD_BUS_MESSAGE_METHOD_ERROR && (*m)->error.name) {
+ r = message_append_field_string(n, BUS_MESSAGE_HEADER_ERROR_NAME, SD_BUS_TYPE_STRING, (*m)->error.name, &n->error.message);
+ if (r < 0)
+ return r;
+
+ n->error._need_free = -1;
+ }
+
+ break;
+
+ default:
+ return -EINVAL;
+ }
+
+ if ((*m)->destination && !n->destination) {
+ r = message_append_field_string(n, BUS_MESSAGE_HEADER_DESTINATION, SD_BUS_TYPE_STRING, (*m)->destination, &n->destination);
+ if (r < 0)
+ return r;
+ }
+
+ if ((*m)->sender && !n->sender) {
+ r = message_append_field_string(n, BUS_MESSAGE_HEADER_SENDER, SD_BUS_TYPE_STRING, (*m)->sender, &n->sender);
+ if (r < 0)
+ return r;
+ }
+
+ n->header->flags |= (*m)->header->flags & (BUS_MESSAGE_NO_REPLY_EXPECTED|BUS_MESSAGE_NO_AUTO_START);
+
+ r = sd_bus_message_copy(n, *m, true);
+ if (r < 0)
+ return r;
+
+ timeout = (*m)->timeout;
+ if (timeout == 0 && !((*m)->header->flags & BUS_MESSAGE_NO_REPLY_EXPECTED)) {
+ r = sd_bus_get_method_call_timeout(bus, &timeout);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_bus_message_seal(n, BUS_MESSAGE_COOKIE(*m), timeout);
+ if (r < 0)
+ return r;
+
+ sd_bus_message_unref(*m);
+ *m = TAKE_PTR(n);
+
+ return 0;
+}
+
+_public_ int sd_bus_message_get_priority(sd_bus_message *m, int64_t *priority) {
+ static bool warned = false;
+
+ assert_return(m, -EINVAL);
+ assert_return(priority, -EINVAL);
+
+ if (!warned) {
+ log_debug("sd_bus_message_get_priority() is deprecated and always returns 0.");
+ warned = true;
+ }
+
+ *priority = 0;
+ return 0;
+}
+
+_public_ int sd_bus_message_set_priority(sd_bus_message *m, int64_t priority) {
+ static bool warned = false;
+
+ assert_return(m, -EINVAL);
+ assert_return(!m->sealed, -EPERM);
+
+ if (!warned) {
+ log_debug("sd_bus_message_set_priority() is deprecated and does nothing.");
+ warned = true;
+ }
+
+ return 0;
+}
+
+_public_ int sd_bus_message_sensitive(sd_bus_message *m) {
+ assert_return(m, -EINVAL);
+
+ m->sensitive = true;
+ return 0;
+}
+
+char** bus_message_make_log_fields(sd_bus_message *m) {
+ _cleanup_strv_free_ char **strv = NULL;
+
+ assert(m);
+
+ (void) strv_extend_assignment(&strv, "DBUS_MESSAGE_TYPE", bus_message_type_to_string(m->header->type));
+ (void) strv_extend_assignment(&strv, "DBUS_SENDER", sd_bus_message_get_sender(m));
+ (void) strv_extend_assignment(&strv, "DBUS_DESTINATION", sd_bus_message_get_destination(m));
+ (void) strv_extend_assignment(&strv, "DBUS_PATH", sd_bus_message_get_path(m));
+ (void) strv_extend_assignment(&strv, "DBUS_INTERFACE", sd_bus_message_get_interface(m));
+ (void) strv_extend_assignment(&strv, "DBUS_MEMBER", sd_bus_message_get_member(m));
+
+ (void) strv_extendf(&strv, "DBUS_MESSAGE_COOKIE=%" PRIu64, BUS_MESSAGE_COOKIE(m));
+ if (m->reply_cookie != 0)
+ (void) strv_extendf(&strv, "DBUS_MESSAGE_REPLY_COOKIE=%" PRIu64, m->reply_cookie);
+
+ (void) strv_extend_assignment(&strv, "DBUS_SIGNATURE", m->root_container.signature);
+ (void) strv_extend_assignment(&strv, "DBUS_ERROR_NAME", m->error.name);
+ (void) strv_extend_assignment(&strv, "DBUS_ERROR_MESSAGE", m->error.message);
+
+ return TAKE_PTR(strv);
+}
diff --git a/src/libsystemd/sd-bus/bus-message.h b/src/libsystemd/sd-bus/bus-message.h
new file mode 100644
index 0000000..76f0d85
--- /dev/null
+++ b/src/libsystemd/sd-bus/bus-message.h
@@ -0,0 +1,191 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <byteswap.h>
+#include <stdbool.h>
+#include <sys/socket.h>
+
+#include "sd-bus.h"
+
+#include "bus-creds.h"
+#include "bus-protocol.h"
+#include "macro.h"
+#include "time-util.h"
+
+struct bus_container {
+ char enclosing;
+
+ /* Indexes into the signature string */
+ unsigned index, saved_index;
+ char *signature;
+
+ size_t before, begin, end;
+
+ /* pointer to the array size value, if this is a value */
+ uint32_t *array_size;
+
+ char *peeked_signature;
+};
+
+struct bus_body_part {
+ struct bus_body_part *next;
+ void *data;
+ void *mmap_begin;
+ size_t size;
+ size_t mapped;
+ size_t allocated;
+ uint64_t memfd_offset;
+ int memfd;
+ bool free_this:1;
+ bool munmap_this:1;
+ bool sealed:1;
+ bool is_zero:1;
+};
+
+struct sd_bus_message {
+ /* Caveat: a message can be referenced in two different ways: the main (user-facing) way will also
+ * pin the bus connection object the message is associated with. The secondary way ("queued") is used
+ * when a message is in the read or write queues of the bus connection object, which will not pin the
+ * bus connection object. This is necessary so that we don't have to have a pair of cyclic references
+ * between a message that is queued and its connection: as soon as a message is only referenced by
+ * the connection (by means of being queued) and the connection itself has no other references it
+ * will be freed. */
+
+ unsigned n_ref; /* Counter of references that pin the connection */
+ unsigned n_queued; /* Counter of references that do not pin the connection */
+
+ sd_bus *bus;
+
+ uint64_t reply_cookie;
+
+ const char *path;
+ const char *interface;
+ const char *member;
+ const char *destination;
+ const char *sender;
+
+ sd_bus_error error;
+
+ sd_bus_creds creds;
+
+ usec_t monotonic;
+ usec_t realtime;
+ uint64_t seqnum;
+ uint64_t verify_destination_id;
+
+ bool sealed:1;
+ bool dont_send:1;
+ bool allow_fds:1;
+ bool free_header:1;
+ bool free_fds:1;
+ bool poisoned:1;
+ bool sensitive:1;
+
+ /* The first bytes of the message */
+ struct bus_header *header;
+
+ size_t fields_size;
+ size_t body_size;
+ size_t user_body_size;
+
+ struct bus_body_part body;
+ struct bus_body_part *body_end;
+ unsigned n_body_parts;
+
+ size_t rindex;
+ struct bus_body_part *cached_rindex_part;
+ size_t cached_rindex_part_begin;
+
+ uint32_t n_fds;
+ int *fds;
+
+ struct bus_container root_container, *containers;
+ size_t n_containers;
+
+ struct iovec *iovec;
+ struct iovec iovec_fixed[2];
+ unsigned n_iovec;
+
+ char *peeked_signature;
+
+ /* If set replies to this message must carry the signature
+ * specified here to successfully seal. This is initialized
+ * from the vtable data */
+ const char *enforced_reply_signature;
+
+ usec_t timeout;
+
+ size_t header_offsets[_BUS_MESSAGE_HEADER_MAX];
+ unsigned n_header_offsets;
+
+ uint64_t read_counter;
+};
+
+static inline bool BUS_MESSAGE_NEED_BSWAP(sd_bus_message *m) {
+ return m->header->endian != BUS_NATIVE_ENDIAN;
+}
+
+static inline uint16_t BUS_MESSAGE_BSWAP16(sd_bus_message *m, uint16_t u) {
+ return BUS_MESSAGE_NEED_BSWAP(m) ? bswap_16(u) : u;
+}
+
+static inline uint32_t BUS_MESSAGE_BSWAP32(sd_bus_message *m, uint32_t u) {
+ return BUS_MESSAGE_NEED_BSWAP(m) ? bswap_32(u) : u;
+}
+
+static inline uint64_t BUS_MESSAGE_BSWAP64(sd_bus_message *m, uint64_t u) {
+ return BUS_MESSAGE_NEED_BSWAP(m) ? bswap_64(u) : u;
+}
+
+static inline uint64_t BUS_MESSAGE_COOKIE(sd_bus_message *m) {
+ return BUS_MESSAGE_BSWAP32(m, m->header->serial);
+}
+
+static inline size_t BUS_MESSAGE_SIZE(sd_bus_message *m) {
+ return
+ sizeof(struct bus_header) +
+ ALIGN8(m->fields_size) +
+ m->body_size;
+}
+
+static inline size_t BUS_MESSAGE_BODY_BEGIN(sd_bus_message *m) {
+ return
+ sizeof(struct bus_header) +
+ ALIGN8(m->fields_size);
+}
+
+static inline void* BUS_MESSAGE_FIELDS(sd_bus_message *m) {
+ return (uint8_t*) m->header + sizeof(struct bus_header);
+}
+
+int bus_message_get_blob(sd_bus_message *m, void **buffer, size_t *sz);
+
+int bus_message_from_malloc(
+ sd_bus *bus,
+ void *buffer,
+ size_t length,
+ int *fds,
+ size_t n_fds,
+ const char *label,
+ sd_bus_message **ret);
+
+int bus_message_get_arg(sd_bus_message *m, unsigned i, const char **str);
+int bus_message_get_arg_strv(sd_bus_message *m, unsigned i, char ***strv);
+
+#define MESSAGE_FOREACH_PART(part, i, m) \
+ for ((i) = 0, (part) = &(m)->body; (i) < (m)->n_body_parts; (i)++, (part) = (part)->next)
+
+int bus_body_part_map(struct bus_body_part *part);
+void bus_body_part_unmap(struct bus_body_part *part);
+
+int bus_message_new_synthetic_error(sd_bus *bus, uint64_t serial, const sd_bus_error *e, sd_bus_message **m);
+
+int bus_message_remarshal(sd_bus *bus, sd_bus_message **m);
+
+void bus_message_set_sender_driver(sd_bus *bus, sd_bus_message *m);
+void bus_message_set_sender_local(sd_bus *bus, sd_bus_message *m);
+
+sd_bus_message* bus_message_ref_queued(sd_bus_message *m, sd_bus *bus);
+sd_bus_message* bus_message_unref_queued(sd_bus_message *m, sd_bus *bus);
+
+char** bus_message_make_log_fields(sd_bus_message *m);
diff --git a/src/libsystemd/sd-bus/bus-objects.c b/src/libsystemd/sd-bus/bus-objects.c
new file mode 100644
index 0000000..c25c40f
--- /dev/null
+++ b/src/libsystemd/sd-bus/bus-objects.c
@@ -0,0 +1,3033 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "alloc-util.h"
+#include "bus-internal.h"
+#include "bus-introspect.h"
+#include "bus-message.h"
+#include "bus-objects.h"
+#include "bus-signature.h"
+#include "bus-slot.h"
+#include "bus-type.h"
+#include "missing_capability.h"
+#include "string-util.h"
+#include "strv.h"
+
+static int node_vtable_get_userdata(
+ sd_bus *bus,
+ const char *path,
+ struct node_vtable *c,
+ void **userdata,
+ sd_bus_error *error) {
+
+ sd_bus_slot *s;
+ void *u, *found_u = NULL;
+ int r;
+
+ assert(bus);
+ assert(path);
+ assert(c);
+
+ s = container_of(c, sd_bus_slot, node_vtable);
+ u = s->userdata;
+ if (c->find) {
+ bus->current_slot = sd_bus_slot_ref(s);
+ bus->current_userdata = u;
+ r = c->find(bus, path, c->interface, u, &found_u, error);
+ bus->current_userdata = NULL;
+ bus->current_slot = sd_bus_slot_unref(s);
+
+ if (r < 0)
+ return r;
+ if (sd_bus_error_is_set(error))
+ return -sd_bus_error_get_errno(error);
+ if (r == 0)
+ return r;
+ } else
+ found_u = u;
+
+ if (userdata)
+ *userdata = found_u;
+
+ return 1;
+}
+
+static void *vtable_method_convert_userdata(const sd_bus_vtable *p, void *u) {
+ assert(p);
+
+ if (!u || FLAGS_SET(p->flags, SD_BUS_VTABLE_ABSOLUTE_OFFSET))
+ return SIZE_TO_PTR(p->x.method.offset); /* don't add offset on NULL, to make ubsan happy */
+
+ return (uint8_t*) u + p->x.method.offset;
+}
+
+static void *vtable_property_convert_userdata(const sd_bus_vtable *p, void *u) {
+ assert(p);
+
+ if (!u || FLAGS_SET(p->flags, SD_BUS_VTABLE_ABSOLUTE_OFFSET))
+ return SIZE_TO_PTR(p->x.property.offset); /* as above */
+
+ return (uint8_t*) u + p->x.property.offset;
+}
+
+static int vtable_property_get_userdata(
+ sd_bus *bus,
+ const char *path,
+ struct vtable_member *p,
+ void **userdata,
+ sd_bus_error *error) {
+
+ void *u;
+ int r;
+
+ assert(bus);
+ assert(path);
+ assert(p);
+ assert(userdata);
+
+ r = node_vtable_get_userdata(bus, path, p->parent, &u, error);
+ if (r <= 0)
+ return r;
+ if (bus->nodes_modified)
+ return 0;
+
+ *userdata = vtable_property_convert_userdata(p->vtable, u);
+ return 1;
+}
+
+static int add_enumerated_to_set(
+ sd_bus *bus,
+ const char *prefix,
+ struct node_enumerator *first,
+ OrderedSet *s,
+ sd_bus_error *error) {
+
+ int r;
+
+ assert(bus);
+ assert(prefix);
+ assert(s);
+
+ LIST_FOREACH(enumerators, c, first) {
+ char **children = NULL;
+ sd_bus_slot *slot;
+
+ if (bus->nodes_modified)
+ return 0;
+
+ slot = container_of(c, sd_bus_slot, node_enumerator);
+
+ bus->current_slot = sd_bus_slot_ref(slot);
+ bus->current_userdata = slot->userdata;
+ r = c->callback(bus, prefix, slot->userdata, &children, error);
+ bus->current_userdata = NULL;
+ bus->current_slot = sd_bus_slot_unref(slot);
+
+ if (r < 0)
+ return r;
+ if (sd_bus_error_is_set(error))
+ return -sd_bus_error_get_errno(error);
+
+ STRV_FOREACH(k, children) {
+ if (r < 0) {
+ free(*k);
+ continue;
+ }
+
+ if (!object_path_is_valid(*k)) {
+ free(*k);
+ r = -EINVAL;
+ continue;
+ }
+
+ if (!object_path_startswith(*k, prefix)) {
+ free(*k);
+ continue;
+ }
+
+ r = ordered_set_consume(s, *k);
+ if (r == -EEXIST)
+ r = 0;
+ }
+
+ free(children);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+enum {
+ /* if set, add_subtree() works recursively */
+ CHILDREN_RECURSIVE = 1 << 0,
+ /* if set, add_subtree() scans object-manager hierarchies recursively */
+ CHILDREN_SUBHIERARCHIES = 1 << 1,
+};
+
+static int add_subtree_to_set(
+ sd_bus *bus,
+ const char *prefix,
+ struct node *n,
+ unsigned flags,
+ OrderedSet *s,
+ sd_bus_error *error) {
+
+ int r;
+
+ assert(bus);
+ assert(prefix);
+ assert(n);
+ assert(s);
+
+ r = add_enumerated_to_set(bus, prefix, n->enumerators, s, error);
+ if (r < 0)
+ return r;
+ if (bus->nodes_modified)
+ return 0;
+
+ LIST_FOREACH(siblings, i, n->child) {
+ char *t;
+
+ if (!object_path_startswith(i->path, prefix))
+ continue;
+
+ t = strdup(i->path);
+ if (!t)
+ return -ENOMEM;
+
+ r = ordered_set_consume(s, t);
+ if (r < 0 && r != -EEXIST)
+ return r;
+
+ if ((flags & CHILDREN_RECURSIVE) &&
+ ((flags & CHILDREN_SUBHIERARCHIES) || !i->object_managers)) {
+ r = add_subtree_to_set(bus, prefix, i, flags, s, error);
+ if (r < 0)
+ return r;
+ if (bus->nodes_modified)
+ return 0;
+ }
+ }
+
+ return 0;
+}
+
+static int get_child_nodes(
+ sd_bus *bus,
+ const char *prefix,
+ struct node *n,
+ unsigned flags,
+ OrderedSet **ret,
+ sd_bus_error *error) {
+
+ _cleanup_ordered_set_free_free_ OrderedSet *s = NULL;
+ int r;
+
+ assert(bus);
+ assert(prefix);
+ assert(n);
+ assert(ret);
+
+ s = ordered_set_new(&string_hash_ops);
+ if (!s)
+ return -ENOMEM;
+
+ r = add_subtree_to_set(bus, prefix, n, flags, s, error);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(s);
+ return 0;
+}
+
+static int node_callbacks_run(
+ sd_bus *bus,
+ sd_bus_message *m,
+ struct node_callback *first,
+ bool require_fallback,
+ bool *found_object) {
+
+ int r;
+
+ assert(bus);
+ assert(m);
+ assert(found_object);
+
+ LIST_FOREACH(callbacks, c, first) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error_buffer = SD_BUS_ERROR_NULL;
+ sd_bus_slot *slot;
+
+ if (bus->nodes_modified)
+ return 0;
+
+ if (require_fallback && !c->is_fallback)
+ continue;
+
+ *found_object = true;
+
+ if (c->last_iteration == bus->iteration_counter)
+ continue;
+
+ c->last_iteration = bus->iteration_counter;
+
+ r = sd_bus_message_rewind(m, true);
+ if (r < 0)
+ return r;
+
+ slot = container_of(c, sd_bus_slot, node_callback);
+
+ bus->current_slot = sd_bus_slot_ref(slot);
+ bus->current_handler = c->callback;
+ bus->current_userdata = slot->userdata;
+ r = c->callback(m, slot->userdata, &error_buffer);
+ bus->current_userdata = NULL;
+ bus->current_handler = NULL;
+ bus->current_slot = sd_bus_slot_unref(slot);
+
+ r = bus_maybe_reply_error(m, r, &error_buffer);
+ if (r != 0)
+ return r;
+ }
+
+ return 0;
+}
+
+#define CAPABILITY_SHIFT(x) (((x) >> __builtin_ctzll(_SD_BUS_VTABLE_CAPABILITY_MASK)) & 0xFFFF)
+
+static int check_access(sd_bus *bus, sd_bus_message *m, struct vtable_member *c, sd_bus_error *error) {
+ uint64_t cap;
+ int r;
+
+ assert(bus);
+ assert(m);
+ assert(c);
+
+ /* If the entire bus is trusted let's grant access */
+ if (bus->trusted)
+ return 0;
+
+ /* If the member is marked UNPRIVILEGED let's grant access */
+ if (c->vtable->flags & SD_BUS_VTABLE_UNPRIVILEGED)
+ return 0;
+
+ /* Check that the caller has the requested capability set. Note that the flags value contains the
+ * capability number plus one, which we need to subtract here. We do this so that we have 0 as
+ * special value for the default. */
+ cap = CAPABILITY_SHIFT(c->vtable->flags);
+ if (cap == 0)
+ cap = CAPABILITY_SHIFT(c->parent->vtable[0].flags);
+ if (cap == 0)
+ cap = CAP_SYS_ADMIN;
+ else
+ cap--;
+
+ r = sd_bus_query_sender_privilege(m, cap);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ return 0;
+
+ return sd_bus_error_setf(error, SD_BUS_ERROR_ACCESS_DENIED, "Access to %s.%s() not permitted.", c->interface, c->member);
+}
+
+static int method_callbacks_run(
+ sd_bus *bus,
+ sd_bus_message *m,
+ struct vtable_member *c,
+ bool require_fallback,
+ bool *found_object) {
+
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ const char *signature;
+ void *u;
+ int r;
+
+ assert(bus);
+ assert(m);
+ assert(c);
+ assert(found_object);
+
+ if (require_fallback && !c->parent->is_fallback)
+ return 0;
+
+ if (FLAGS_SET(c->vtable->flags, SD_BUS_VTABLE_SENSITIVE)) {
+ r = sd_bus_message_sensitive(m);
+ if (r < 0)
+ return r;
+ }
+
+ r = check_access(bus, m, c, &error);
+ if (r < 0)
+ return bus_maybe_reply_error(m, r, &error);
+
+ r = node_vtable_get_userdata(bus, m->path, c->parent, &u, &error);
+ if (r <= 0)
+ return bus_maybe_reply_error(m, r, &error);
+ if (bus->nodes_modified)
+ return 0;
+
+ u = vtable_method_convert_userdata(c->vtable, u);
+
+ *found_object = true;
+
+ if (c->last_iteration == bus->iteration_counter)
+ return 0;
+
+ c->last_iteration = bus->iteration_counter;
+
+ r = sd_bus_message_rewind(m, true);
+ if (r < 0)
+ return r;
+
+ signature = sd_bus_message_get_signature(m, true);
+ if (!signature)
+ return -EINVAL;
+
+ if (!streq(strempty(c->vtable->x.method.signature), signature))
+ return sd_bus_reply_method_errorf(
+ m,
+ SD_BUS_ERROR_INVALID_ARGS,
+ "Invalid arguments '%s' to call %s.%s(), expecting '%s'.",
+ signature, c->interface, c->member, strempty(c->vtable->x.method.signature));
+
+ /* Keep track what the signature of the reply to this message
+ * should be, so that this can be enforced when sealing the
+ * reply. */
+ m->enforced_reply_signature = strempty(c->vtable->x.method.result);
+
+ if (c->vtable->x.method.handler) {
+ sd_bus_slot *slot;
+
+ slot = container_of(c->parent, sd_bus_slot, node_vtable);
+
+ bus->current_slot = sd_bus_slot_ref(slot);
+ bus->current_handler = c->vtable->x.method.handler;
+ bus->current_userdata = u;
+ r = c->vtable->x.method.handler(m, u, &error);
+ bus->current_userdata = NULL;
+ bus->current_handler = NULL;
+ bus->current_slot = sd_bus_slot_unref(slot);
+
+ return bus_maybe_reply_error(m, r, &error);
+ }
+
+ /* If the method callback is NULL, make this a successful NOP */
+ r = sd_bus_reply_method_return(m, NULL);
+ if (r < 0)
+ return r;
+
+ return 1;
+}
+
+static int invoke_property_get(
+ sd_bus *bus,
+ sd_bus_slot *slot,
+ const sd_bus_vtable *v,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ const void *p;
+ int r;
+
+ assert(bus);
+ assert(slot);
+ assert(v);
+ assert(path);
+ assert(interface);
+ assert(property);
+ assert(reply);
+
+ if (v->x.property.get) {
+
+ bus->current_slot = sd_bus_slot_ref(slot);
+ bus->current_userdata = userdata;
+ r = v->x.property.get(bus, path, interface, property, reply, userdata, error);
+ bus->current_userdata = NULL;
+ bus->current_slot = sd_bus_slot_unref(slot);
+
+ if (r < 0)
+ return r;
+ if (sd_bus_error_is_set(error))
+ return -sd_bus_error_get_errno(error);
+ return r;
+ }
+
+ /* Automatic handling if no callback is defined. */
+
+ if (streq(v->x.property.signature, "as"))
+ return sd_bus_message_append_strv(reply, *(char***) userdata);
+
+ assert(signature_is_single(v->x.property.signature, false));
+ assert(bus_type_is_basic(v->x.property.signature[0]));
+
+ switch (v->x.property.signature[0]) {
+
+ case SD_BUS_TYPE_STRING:
+ case SD_BUS_TYPE_SIGNATURE:
+ p = strempty(*(char**) userdata);
+ break;
+
+ case SD_BUS_TYPE_OBJECT_PATH:
+ p = *(char**) userdata;
+ assert(p);
+ break;
+
+ default:
+ p = userdata;
+ break;
+ }
+
+ return sd_bus_message_append_basic(reply, v->x.property.signature[0], p);
+}
+
+static int invoke_property_set(
+ sd_bus *bus,
+ sd_bus_slot *slot,
+ const sd_bus_vtable *v,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *value,
+ void *userdata,
+ sd_bus_error *error) {
+
+ int r;
+
+ assert(bus);
+ assert(slot);
+ assert(v);
+ assert(path);
+ assert(interface);
+ assert(property);
+ assert(value);
+
+ if (v->x.property.set) {
+
+ bus->current_slot = sd_bus_slot_ref(slot);
+ bus->current_userdata = userdata;
+ r = v->x.property.set(bus, path, interface, property, value, userdata, error);
+ bus->current_userdata = NULL;
+ bus->current_slot = sd_bus_slot_unref(slot);
+
+ if (r < 0)
+ return r;
+ if (sd_bus_error_is_set(error))
+ return -sd_bus_error_get_errno(error);
+ return r;
+ }
+
+ /* Automatic handling if no callback is defined. */
+
+ assert(signature_is_single(v->x.property.signature, false));
+ assert(bus_type_is_basic(v->x.property.signature[0]));
+
+ switch (v->x.property.signature[0]) {
+
+ case SD_BUS_TYPE_STRING:
+ case SD_BUS_TYPE_OBJECT_PATH:
+ case SD_BUS_TYPE_SIGNATURE: {
+ const char *p;
+ char *n;
+
+ r = sd_bus_message_read_basic(value, v->x.property.signature[0], &p);
+ if (r < 0)
+ return r;
+
+ n = strdup(p);
+ if (!n)
+ return -ENOMEM;
+
+ free(*(char**) userdata);
+ *(char**) userdata = n;
+
+ break;
+ }
+
+ default:
+ r = sd_bus_message_read_basic(value, v->x.property.signature[0], userdata);
+ if (r < 0)
+ return r;
+
+ break;
+ }
+
+ return 1;
+}
+
+static int property_get_set_callbacks_run(
+ sd_bus *bus,
+ sd_bus_message *m,
+ struct vtable_member *c,
+ bool require_fallback,
+ bool is_get,
+ bool *found_object) {
+
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ sd_bus_slot *slot;
+ void *u = NULL;
+ int r;
+
+ assert(bus);
+ assert(m);
+ assert(c);
+ assert(found_object);
+
+ if (require_fallback && !c->parent->is_fallback)
+ return 0;
+
+ if (FLAGS_SET(c->vtable->flags, SD_BUS_VTABLE_SENSITIVE)) {
+ r = sd_bus_message_sensitive(m);
+ if (r < 0)
+ return r;
+ }
+
+ r = vtable_property_get_userdata(bus, m->path, c, &u, &error);
+ if (r <= 0)
+ return bus_maybe_reply_error(m, r, &error);
+ if (bus->nodes_modified)
+ return 0;
+
+ slot = container_of(c->parent, sd_bus_slot, node_vtable);
+
+ *found_object = true;
+
+ r = sd_bus_message_new_method_return(m, &reply);
+ if (r < 0)
+ return r;
+
+ if (FLAGS_SET(c->vtable->flags, SD_BUS_VTABLE_SENSITIVE)) {
+ r = sd_bus_message_sensitive(reply);
+ if (r < 0)
+ return r;
+ }
+
+ if (is_get) {
+ /* Note that we do not protect against reexecution
+ * here (using the last_iteration check, see below),
+ * should the node tree have changed and we got called
+ * again. We assume that property Get() calls are
+ * ultimately without side-effects or if they aren't
+ * then at least idempotent. */
+
+ r = sd_bus_message_open_container(reply, 'v', c->vtable->x.property.signature);
+ if (r < 0)
+ return r;
+
+ /* Note that we do not do an access check here. Read
+ * access to properties is always unrestricted, since
+ * PropertiesChanged signals broadcast contents
+ * anyway. */
+
+ r = invoke_property_get(bus, slot, c->vtable, m->path, c->interface, c->member, reply, u, &error);
+ if (r < 0)
+ return bus_maybe_reply_error(m, r, &error);
+
+ if (bus->nodes_modified)
+ return 0;
+
+ r = sd_bus_message_close_container(reply);
+ if (r < 0)
+ return r;
+
+ } else {
+ const char *signature = NULL;
+ char type = 0;
+
+ if (c->vtable->type != _SD_BUS_VTABLE_WRITABLE_PROPERTY)
+ return sd_bus_reply_method_errorf(m, SD_BUS_ERROR_PROPERTY_READ_ONLY, "Property '%s' is not writable.", c->member);
+
+ /* Avoid that we call the set routine more than once
+ * if the processing of this message got restarted
+ * because the node tree changed. */
+ if (c->last_iteration == bus->iteration_counter)
+ return 0;
+
+ c->last_iteration = bus->iteration_counter;
+
+ r = sd_bus_message_peek_type(m, &type, &signature);
+ if (r < 0)
+ return r;
+
+ if (type != 'v')
+ return sd_bus_reply_method_errorf(m, SD_BUS_ERROR_INVALID_SIGNATURE,
+ "Incorrect signature when setting property '%s', expected 'v', got '%c'.",
+ c->member, type);
+ if (!streq(strempty(signature), strempty(c->vtable->x.property.signature)))
+ return sd_bus_reply_method_errorf(m, SD_BUS_ERROR_INVALID_ARGS,
+ "Incorrect parameters for property '%s', expected '%s', got '%s'.",
+ c->member, strempty(c->vtable->x.property.signature), strempty(signature));
+
+ r = sd_bus_message_enter_container(m, 'v', c->vtable->x.property.signature);
+ if (r < 0)
+ return r;
+
+ r = check_access(bus, m, c, &error);
+ if (r < 0)
+ return bus_maybe_reply_error(m, r, &error);
+
+ r = invoke_property_set(bus, slot, c->vtable, m->path, c->interface, c->member, m, u, &error);
+ if (r < 0)
+ return bus_maybe_reply_error(m, r, &error);
+
+ if (bus->nodes_modified)
+ return 0;
+
+ r = sd_bus_message_exit_container(m);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_bus_send(bus, reply, NULL);
+ if (r < 0)
+ return r;
+
+ return 1;
+}
+
+static int vtable_append_one_property(
+ sd_bus *bus,
+ sd_bus_message *reply,
+ const char *path,
+ struct node_vtable *c,
+ const sd_bus_vtable *v,
+ void *userdata,
+ sd_bus_error *error) {
+
+ sd_bus_slot *slot;
+ int r;
+
+ assert(bus);
+ assert(reply);
+ assert(path);
+ assert(c);
+ assert(v);
+
+ if (FLAGS_SET(c->vtable->flags, SD_BUS_VTABLE_SENSITIVE)) {
+ r = sd_bus_message_sensitive(reply);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_bus_message_open_container(reply, 'e', "sv");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(reply, "s", v->x.property.member);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_open_container(reply, 'v', v->x.property.signature);
+ if (r < 0)
+ return r;
+
+ slot = container_of(c, sd_bus_slot, node_vtable);
+
+ r = invoke_property_get(bus, slot, v, path, c->interface, v->x.property.member, reply, vtable_property_convert_userdata(v, userdata), error);
+ if (r < 0)
+ return r;
+ if (bus->nodes_modified)
+ return 0;
+
+ r = sd_bus_message_close_container(reply);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_close_container(reply);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static int vtable_append_all_properties(
+ sd_bus *bus,
+ sd_bus_message *reply,
+ const char *path,
+ struct node_vtable *c,
+ void *userdata,
+ sd_bus_error *error) {
+
+ const sd_bus_vtable *v;
+ int r;
+
+ assert(bus);
+ assert(reply);
+ assert(path);
+ assert(c);
+
+ if (c->vtable[0].flags & SD_BUS_VTABLE_HIDDEN)
+ return 1;
+
+ v = c->vtable;
+ for (v = bus_vtable_next(c->vtable, v); v->type != _SD_BUS_VTABLE_END; v = bus_vtable_next(c->vtable, v)) {
+ if (!IN_SET(v->type, _SD_BUS_VTABLE_PROPERTY, _SD_BUS_VTABLE_WRITABLE_PROPERTY))
+ continue;
+
+ if (v->flags & SD_BUS_VTABLE_HIDDEN)
+ continue;
+
+ /* Let's not include properties marked as "explicit" in any message that contains a generic
+ * dump of properties, but only in those generated as a response to an explicit request. */
+ if (v->flags & SD_BUS_VTABLE_PROPERTY_EXPLICIT)
+ continue;
+
+ /* Let's not include properties marked only for invalidation on change (i.e. in contrast to
+ * those whose new values are included in PropertiesChanges message) in any signals. This is
+ * useful to ensure they aren't included in InterfacesAdded messages. */
+ if (reply->header->type != SD_BUS_MESSAGE_METHOD_RETURN &&
+ FLAGS_SET(v->flags, SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION))
+ continue;
+
+ r = vtable_append_one_property(bus, reply, path, c, v, userdata, error);
+ if (r < 0)
+ return r;
+ if (bus->nodes_modified)
+ return 0;
+ }
+
+ return 1;
+}
+
+static int property_get_all_callbacks_run(
+ sd_bus *bus,
+ sd_bus_message *m,
+ struct node_vtable *first,
+ bool require_fallback,
+ const char *iface,
+ bool *found_object) {
+
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ bool found_interface;
+ int r;
+
+ assert(bus);
+ assert(m);
+ assert(found_object);
+
+ r = sd_bus_message_new_method_return(m, &reply);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_open_container(reply, 'a', "{sv}");
+ if (r < 0)
+ return r;
+
+ found_interface = !iface || STR_IN_SET(iface,
+ "org.freedesktop.DBus.Properties",
+ "org.freedesktop.DBus.Peer",
+ "org.freedesktop.DBus.Introspectable");
+
+ LIST_FOREACH(vtables, c, first) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ void *u;
+
+ if (require_fallback && !c->is_fallback)
+ continue;
+
+ r = node_vtable_get_userdata(bus, m->path, c, &u, &error);
+ if (r < 0)
+ return bus_maybe_reply_error(m, r, &error);
+ if (bus->nodes_modified)
+ return 0;
+ if (r == 0)
+ continue;
+
+ *found_object = true;
+
+ if (iface && !streq(c->interface, iface))
+ continue;
+ found_interface = true;
+
+ r = vtable_append_all_properties(bus, reply, m->path, c, u, &error);
+ if (r < 0)
+ return bus_maybe_reply_error(m, r, &error);
+ if (bus->nodes_modified)
+ return 0;
+ }
+
+ if (!*found_object)
+ return 0;
+
+ if (!found_interface) {
+ r = sd_bus_reply_method_errorf(
+ m,
+ SD_BUS_ERROR_UNKNOWN_INTERFACE,
+ "Unknown interface '%s'.", iface);
+ if (r < 0)
+ return r;
+
+ return 1;
+ }
+
+ r = sd_bus_message_close_container(reply);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_send(bus, reply, NULL);
+ if (r < 0)
+ return r;
+
+ return 1;
+}
+
+static int bus_node_exists(
+ sd_bus *bus,
+ struct node *n,
+ const char *path,
+ bool require_fallback) {
+
+ int r;
+
+ assert(bus);
+ assert(n);
+ assert(path);
+
+ /* Tests if there's anything attached directly to this node
+ * for the specified path */
+
+ if (!require_fallback && (n->enumerators || n->object_managers))
+ return true;
+
+ LIST_FOREACH(callbacks, k, n->callbacks) {
+ if (require_fallback && !k->is_fallback)
+ continue;
+
+ return 1;
+ }
+
+ LIST_FOREACH(vtables, c, n->vtables) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+
+ if (require_fallback && !c->is_fallback)
+ continue;
+
+ r = node_vtable_get_userdata(bus, path, c, NULL, &error);
+ if (r != 0)
+ return r;
+ if (bus->nodes_modified)
+ return 0;
+ }
+
+ return 0;
+}
+
+int introspect_path(
+ sd_bus *bus,
+ const char *path,
+ struct node *n,
+ bool require_fallback,
+ bool ignore_nodes_modified,
+ bool *found_object,
+ char **ret,
+ sd_bus_error *error) {
+
+ _cleanup_ordered_set_free_free_ OrderedSet *s = NULL;
+ _cleanup_(introspect_done) struct introspect intro = {};
+ bool empty;
+ int r;
+
+ if (!n) {
+ n = hashmap_get(bus->nodes, path);
+ if (!n)
+ return -ENOENT;
+ }
+
+ r = get_child_nodes(bus, path, n, 0, &s, error);
+ if (r < 0)
+ return r;
+ if (bus->nodes_modified && !ignore_nodes_modified)
+ return 0;
+
+ r = introspect_begin(&intro, bus->trusted);
+ if (r < 0)
+ return r;
+
+ r = introspect_write_default_interfaces(&intro, !require_fallback && n->object_managers);
+ if (r < 0)
+ return r;
+
+ empty = ordered_set_isempty(s);
+
+ LIST_FOREACH(vtables, c, n->vtables) {
+ if (require_fallback && !c->is_fallback)
+ continue;
+
+ r = node_vtable_get_userdata(bus, path, c, NULL, error);
+ if (r < 0)
+ return r;
+ if (bus->nodes_modified && !ignore_nodes_modified)
+ return 0;
+ if (r == 0)
+ continue;
+
+ empty = false;
+
+ if (c->vtable[0].flags & SD_BUS_VTABLE_HIDDEN)
+ continue;
+
+ r = introspect_write_interface(&intro, c->interface, c->vtable);
+ if (r < 0)
+ return r;
+ }
+
+ if (empty) {
+ /* Nothing?, let's see if we exist at all, and if not
+ * refuse to do anything */
+ r = bus_node_exists(bus, n, path, require_fallback);
+ if (r <= 0)
+ return r;
+ if (bus->nodes_modified && !ignore_nodes_modified)
+ return 0;
+ }
+
+ if (found_object)
+ *found_object = true;
+
+ r = introspect_write_child_nodes(&intro, s, path);
+ if (r < 0)
+ return r;
+
+ r = introspect_finish(&intro, ret);
+ if (r < 0)
+ return r;
+
+ return 1;
+}
+
+static int process_introspect(
+ sd_bus *bus,
+ sd_bus_message *m,
+ struct node *n,
+ bool require_fallback,
+ bool *found_object) {
+
+ _cleanup_free_ char *s = NULL;
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ int r;
+
+ assert(bus);
+ assert(m);
+ assert(n);
+ assert(found_object);
+
+ r = introspect_path(bus, m->path, n, require_fallback, false, found_object, &s, &error);
+ if (r < 0)
+ return bus_maybe_reply_error(m, r, &error);
+ if (r == 0)
+ /* nodes_modified == true */
+ return 0;
+
+ r = sd_bus_message_new_method_return(m, &reply);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(reply, "s", s);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_send(bus, reply, NULL);
+ if (r < 0)
+ return r;
+
+ return 1;
+}
+
+static int object_manager_serialize_path(
+ sd_bus *bus,
+ sd_bus_message *reply,
+ const char *prefix,
+ const char *path,
+ bool require_fallback,
+ bool *found_object_manager,
+ sd_bus_error *error) {
+
+ const char *previous_interface = NULL;
+ bool found_something = false;
+ struct node *n;
+ int r;
+
+ assert(bus);
+ assert(reply);
+ assert(prefix);
+ assert(path);
+ assert(found_object_manager);
+ assert(error);
+
+ n = hashmap_get(bus->nodes, prefix);
+ if (!n)
+ return 0;
+
+ if (!require_fallback && n->object_managers)
+ *found_object_manager = true;
+
+ LIST_FOREACH(vtables, i, n->vtables) {
+ void *u;
+
+ if (require_fallback && !i->is_fallback)
+ continue;
+
+ r = node_vtable_get_userdata(bus, path, i, &u, error);
+ if (r < 0)
+ return r;
+ if (bus->nodes_modified)
+ return 0;
+ if (r == 0)
+ continue;
+
+ if (!found_something) {
+
+ /* Open the object part */
+
+ r = sd_bus_message_open_container(reply, 'e', "oa{sa{sv}}");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(reply, "o", path);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_open_container(reply, 'a', "{sa{sv}}");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(reply, "{sa{sv}}", "org.freedesktop.DBus.Peer", 0);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(reply, "{sa{sv}}", "org.freedesktop.DBus.Introspectable", 0);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(reply, "{sa{sv}}", "org.freedesktop.DBus.Properties", 0);
+ if (r < 0)
+ return r;
+
+ if (*found_object_manager) {
+ r = sd_bus_message_append(
+ reply, "{sa{sv}}", "org.freedesktop.DBus.ObjectManager", 0);
+ if (r < 0)
+ return r;
+ }
+
+ found_something = true;
+ }
+
+ if (!streq_ptr(previous_interface, i->interface)) {
+
+ /* Maybe close the previous interface part */
+
+ if (previous_interface) {
+ r = sd_bus_message_close_container(reply);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_close_container(reply);
+ if (r < 0)
+ return r;
+ }
+
+ /* Open the new interface part */
+
+ r = sd_bus_message_open_container(reply, 'e', "sa{sv}");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(reply, "s", i->interface);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_open_container(reply, 'a', "{sv}");
+ if (r < 0)
+ return r;
+ }
+
+ r = vtable_append_all_properties(bus, reply, path, i, u, error);
+ if (r < 0)
+ return r;
+ if (bus->nodes_modified)
+ return 0;
+
+ previous_interface = i->interface;
+ }
+
+ if (previous_interface) {
+ r = sd_bus_message_close_container(reply);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_close_container(reply);
+ if (r < 0)
+ return r;
+ }
+
+ if (found_something) {
+ r = sd_bus_message_close_container(reply);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_close_container(reply);
+ if (r < 0)
+ return r;
+ }
+
+ return 1;
+}
+
+static int object_manager_serialize_path_and_fallbacks(
+ sd_bus *bus,
+ sd_bus_message *reply,
+ const char *path,
+ sd_bus_error *error) {
+
+ _cleanup_free_ char *prefix = NULL;
+ size_t pl;
+ int r;
+ bool found_object_manager = false;
+
+ assert(bus);
+ assert(reply);
+ assert(path);
+ assert(error);
+
+ /* First, add all vtables registered for this path */
+ r = object_manager_serialize_path(bus, reply, path, path, false, &found_object_manager, error);
+ if (r < 0)
+ return r;
+ if (bus->nodes_modified)
+ return 0;
+
+ /* Second, add fallback vtables registered for any of the prefixes */
+ pl = strlen(path);
+ assert(pl <= BUS_PATH_SIZE_MAX);
+ prefix = new(char, pl + 1);
+ if (!prefix)
+ return -ENOMEM;
+
+ OBJECT_PATH_FOREACH_PREFIX(prefix, path) {
+ r = object_manager_serialize_path(bus, reply, prefix, path, true, &found_object_manager, error);
+ if (r < 0)
+ return r;
+ if (bus->nodes_modified)
+ return 0;
+ }
+
+ return 0;
+}
+
+static int process_get_managed_objects(
+ sd_bus *bus,
+ sd_bus_message *m,
+ struct node *n,
+ bool require_fallback,
+ bool *found_object) {
+
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ _cleanup_ordered_set_free_free_ OrderedSet *s = NULL;
+ char *path;
+ int r;
+
+ assert(bus);
+ assert(m);
+ assert(n);
+ assert(found_object);
+
+ /* Spec says, GetManagedObjects() is only implemented on the root of a
+ * sub-tree. Therefore, we require a registered object-manager on
+ * exactly the queried path, otherwise, we refuse to respond. */
+
+ if (require_fallback || !n->object_managers)
+ return 0;
+
+ r = get_child_nodes(bus, m->path, n, CHILDREN_RECURSIVE, &s, &error);
+ if (r < 0)
+ return bus_maybe_reply_error(m, r, &error);
+ if (bus->nodes_modified)
+ return 0;
+
+ r = sd_bus_message_new_method_return(m, &reply);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_open_container(reply, 'a', "{oa{sa{sv}}}");
+ if (r < 0)
+ return r;
+
+ ORDERED_SET_FOREACH(path, s) {
+ r = object_manager_serialize_path_and_fallbacks(bus, reply, path, &error);
+ if (r < 0)
+ return bus_maybe_reply_error(m, r, &error);
+
+ if (bus->nodes_modified)
+ return 0;
+ }
+
+ r = sd_bus_message_close_container(reply);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_send(bus, reply, NULL);
+ if (r < 0)
+ return r;
+
+ return 1;
+}
+
+static int object_find_and_run(
+ sd_bus *bus,
+ sd_bus_message *m,
+ const char *p,
+ bool require_fallback,
+ bool *found_object) {
+
+ struct node *n;
+ struct vtable_member vtable_key, *v;
+ int r;
+
+ assert(bus);
+ assert(m);
+ assert(p);
+ assert(found_object);
+
+ n = hashmap_get(bus->nodes, p);
+ if (!n)
+ return 0;
+
+ /* First, try object callbacks */
+ r = node_callbacks_run(bus, m, n->callbacks, require_fallback, found_object);
+ if (r != 0)
+ return r;
+ if (bus->nodes_modified)
+ return 0;
+
+ if (!m->interface || !m->member)
+ return 0;
+
+ /* Then, look for a known method */
+ vtable_key.path = (char*) p;
+ vtable_key.interface = m->interface;
+ vtable_key.member = m->member;
+
+ v = hashmap_get(bus->vtable_methods, &vtable_key);
+ if (v) {
+ r = method_callbacks_run(bus, m, v, require_fallback, found_object);
+ if (r != 0)
+ return r;
+ if (bus->nodes_modified)
+ return 0;
+ }
+
+ /* Then, look for a known property */
+ if (streq(m->interface, "org.freedesktop.DBus.Properties")) {
+ bool get = false;
+
+ get = streq(m->member, "Get");
+
+ if (get || streq(m->member, "Set")) {
+
+ r = sd_bus_message_rewind(m, true);
+ if (r < 0)
+ return r;
+
+ vtable_key.path = (char*) p;
+
+ r = sd_bus_message_read(m, "ss", &vtable_key.interface, &vtable_key.member);
+ if (r < 0)
+ return sd_bus_reply_method_errorf(m, SD_BUS_ERROR_INVALID_ARGS, "Expected interface and member parameters");
+
+ v = hashmap_get(bus->vtable_properties, &vtable_key);
+ if (v) {
+ r = property_get_set_callbacks_run(bus, m, v, require_fallback, get, found_object);
+ if (r != 0)
+ return r;
+ }
+
+ } else if (streq(m->member, "GetAll")) {
+ const char *iface;
+
+ r = sd_bus_message_rewind(m, true);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read(m, "s", &iface);
+ if (r < 0)
+ return sd_bus_reply_method_errorf(m, SD_BUS_ERROR_INVALID_ARGS, "Expected interface parameter");
+
+ if (iface[0] == 0)
+ iface = NULL;
+
+ r = property_get_all_callbacks_run(bus, m, n->vtables, require_fallback, iface, found_object);
+ if (r != 0)
+ return r;
+ }
+
+ } else if (sd_bus_message_is_method_call(m, "org.freedesktop.DBus.Introspectable", "Introspect")) {
+
+ if (!isempty(sd_bus_message_get_signature(m, true)))
+ return sd_bus_reply_method_errorf(m, SD_BUS_ERROR_INVALID_ARGS, "Expected no parameters");
+
+ r = process_introspect(bus, m, n, require_fallback, found_object);
+ if (r != 0)
+ return r;
+
+ } else if (sd_bus_message_is_method_call(m, "org.freedesktop.DBus.ObjectManager", "GetManagedObjects")) {
+
+ if (!isempty(sd_bus_message_get_signature(m, true)))
+ return sd_bus_reply_method_errorf(m, SD_BUS_ERROR_INVALID_ARGS, "Expected no parameters");
+
+ r = process_get_managed_objects(bus, m, n, require_fallback, found_object);
+ if (r != 0)
+ return r;
+ }
+
+ if (bus->nodes_modified)
+ return 0;
+
+ if (!*found_object) {
+ r = bus_node_exists(bus, n, m->path, require_fallback);
+ if (r < 0)
+ return bus_maybe_reply_error(m, r, NULL);
+ if (bus->nodes_modified)
+ return 0;
+ if (r > 0)
+ *found_object = true;
+ }
+
+ return 0;
+}
+
+int bus_process_object(sd_bus *bus, sd_bus_message *m) {
+ _cleanup_free_ char *prefix = NULL;
+ int r;
+ size_t pl;
+ bool found_object = false;
+
+ assert(bus);
+ assert(m);
+
+ if (bus->is_monitor)
+ return 0;
+
+ if (m->header->type != SD_BUS_MESSAGE_METHOD_CALL)
+ return 0;
+
+ if (hashmap_isempty(bus->nodes))
+ return 0;
+
+ /* Never respond to broadcast messages */
+ if (bus->bus_client && !m->destination)
+ return 0;
+
+ assert(m->path);
+ assert(m->member);
+
+ pl = strlen(m->path);
+ assert(pl <= BUS_PATH_SIZE_MAX);
+ prefix = new(char, pl + 1);
+ if (!prefix)
+ return -ENOMEM;
+
+ do {
+ bus->nodes_modified = false;
+
+ r = object_find_and_run(bus, m, m->path, false, &found_object);
+ if (r != 0)
+ return r;
+
+ /* Look for fallback prefixes */
+ OBJECT_PATH_FOREACH_PREFIX(prefix, m->path) {
+
+ if (bus->nodes_modified)
+ break;
+
+ r = object_find_and_run(bus, m, prefix, true, &found_object);
+ if (r != 0)
+ return r;
+ }
+
+ } while (bus->nodes_modified);
+
+ if (!found_object)
+ return 0;
+
+ if (sd_bus_message_is_method_call(m, "org.freedesktop.DBus.Properties", "Get") ||
+ sd_bus_message_is_method_call(m, "org.freedesktop.DBus.Properties", "Set")) {
+ const char *interface = NULL, *property = NULL;
+
+ (void) sd_bus_message_rewind(m, true);
+ (void) sd_bus_message_read_basic(m, 's', &interface);
+ (void) sd_bus_message_read_basic(m, 's', &property);
+
+ r = sd_bus_reply_method_errorf(
+ m,
+ SD_BUS_ERROR_UNKNOWN_PROPERTY,
+ "Unknown interface %s or property %s.", strnull(interface), strnull(property));
+ } else
+ r = sd_bus_reply_method_errorf(
+ m,
+ SD_BUS_ERROR_UNKNOWN_METHOD,
+ "Unknown method %s or interface %s.", m->member, m->interface);
+
+ if (r < 0)
+ return r;
+
+ return 1;
+}
+
+static struct node* bus_node_allocate(sd_bus *bus, const char *path) {
+ struct node *n, *parent;
+ const char *e;
+ _cleanup_free_ char *s = NULL;
+ char *p;
+ int r;
+
+ assert(bus);
+ assert(path);
+ assert(path[0] == '/');
+
+ n = hashmap_get(bus->nodes, path);
+ if (n)
+ return n;
+
+ r = hashmap_ensure_allocated(&bus->nodes, &string_hash_ops);
+ if (r < 0)
+ return NULL;
+
+ s = strdup(path);
+ if (!s)
+ return NULL;
+
+ if (streq(path, "/"))
+ parent = NULL;
+ else {
+ assert_se(e = strrchr(path, '/'));
+
+ p = strndupa_safe(path, MAX(1, e - path));
+
+ parent = bus_node_allocate(bus, p);
+ if (!parent)
+ return NULL;
+ }
+
+ n = new0(struct node, 1);
+ if (!n)
+ return NULL;
+
+ n->parent = parent;
+ n->path = TAKE_PTR(s);
+
+ r = hashmap_put(bus->nodes, n->path, n);
+ if (r < 0) {
+ free(n->path);
+ return mfree(n);
+ }
+
+ if (parent)
+ LIST_PREPEND(siblings, parent->child, n);
+
+ return n;
+}
+
+void bus_node_gc(sd_bus *b, struct node *n) {
+ assert(b);
+
+ if (!n)
+ return;
+
+ if (n->child ||
+ n->callbacks ||
+ n->vtables ||
+ n->enumerators ||
+ n->object_managers)
+ return;
+
+ assert_se(hashmap_remove(b->nodes, n->path) == n);
+
+ if (n->parent)
+ LIST_REMOVE(siblings, n->parent->child, n);
+
+ free(n->path);
+ bus_node_gc(b, n->parent);
+ free(n);
+}
+
+static int bus_find_parent_object_manager(sd_bus *bus, struct node **out, const char *path, bool* path_has_object_manager) {
+ struct node *n;
+
+ assert(bus);
+ assert(path);
+ assert(path_has_object_manager);
+
+ n = hashmap_get(bus->nodes, path);
+
+ if (n)
+ *path_has_object_manager = n->object_managers;
+
+ if (!n) {
+ _cleanup_free_ char *prefix = NULL;
+ size_t pl;
+
+ pl = strlen(path);
+ assert(pl <= BUS_PATH_SIZE_MAX);
+ prefix = new(char, pl + 1);
+ if (!prefix)
+ return -ENOMEM;
+
+ OBJECT_PATH_FOREACH_PREFIX(prefix, path) {
+ n = hashmap_get(bus->nodes, prefix);
+ if (n)
+ break;
+ }
+ }
+
+ while (n && !n->object_managers)
+ n = n->parent;
+
+ if (out)
+ *out = n;
+ return !!n;
+}
+
+static int bus_add_object(
+ sd_bus *bus,
+ sd_bus_slot **slot,
+ bool fallback,
+ const char *path,
+ sd_bus_message_handler_t callback,
+ void *userdata) {
+
+ sd_bus_slot *s;
+ struct node *n;
+ int r;
+
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(object_path_is_valid(path), -EINVAL);
+ assert_return(callback, -EINVAL);
+ assert_return(!bus_origin_changed(bus), -ECHILD);
+
+ n = bus_node_allocate(bus, path);
+ if (!n)
+ return -ENOMEM;
+
+ s = bus_slot_allocate(bus, !slot, BUS_NODE_CALLBACK, sizeof(struct node_callback), userdata);
+ if (!s) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ s->node_callback.callback = callback;
+ s->node_callback.is_fallback = fallback;
+
+ s->node_callback.node = n;
+ LIST_PREPEND(callbacks, n->callbacks, &s->node_callback);
+ bus->nodes_modified = true;
+
+ if (slot)
+ *slot = s;
+
+ return 0;
+
+fail:
+ sd_bus_slot_unref(s);
+ bus_node_gc(bus, n);
+
+ return r;
+}
+
+_public_ int sd_bus_add_object(
+ sd_bus *bus,
+ sd_bus_slot **slot,
+ const char *path,
+ sd_bus_message_handler_t callback,
+ void *userdata) {
+
+ return bus_add_object(bus, slot, false, path, callback, userdata);
+}
+
+_public_ int sd_bus_add_fallback(
+ sd_bus *bus,
+ sd_bus_slot **slot,
+ const char *prefix,
+ sd_bus_message_handler_t callback,
+ void *userdata) {
+
+ return bus_add_object(bus, slot, true, prefix, callback, userdata);
+}
+
+static void vtable_member_hash_func(const struct vtable_member *m, struct siphash *state) {
+ assert(m);
+
+ string_hash_func(m->path, state);
+ string_hash_func(m->interface, state);
+ string_hash_func(m->member, state);
+}
+
+static int vtable_member_compare_func(const struct vtable_member *x, const struct vtable_member *y) {
+ int r;
+
+ assert(x);
+ assert(y);
+
+ r = strcmp(x->path, y->path);
+ if (r != 0)
+ return r;
+
+ r = strcmp(x->interface, y->interface);
+ if (r != 0)
+ return r;
+
+ return strcmp(x->member, y->member);
+}
+
+DEFINE_PRIVATE_HASH_OPS(vtable_member_hash_ops, struct vtable_member, vtable_member_hash_func, vtable_member_compare_func);
+
+typedef enum {
+ NAMES_FIRST_PART = 1 << 0, /* first part of argument name list (input names). It is reset by names_are_valid() */
+ NAMES_PRESENT = 1 << 1, /* at least one argument name is present, so the names will checked.
+ This flag is set and used internally by names_are_valid(), but needs to be stored across calls for 2-parts list */
+ NAMES_SINGLE_PART = 1 << 2, /* argument name list consisting of a single part */
+} names_flags;
+
+static bool names_are_valid(const char *signature, const char **names, names_flags *flags) {
+ int r;
+
+ if ((*flags & NAMES_FIRST_PART || *flags & NAMES_SINGLE_PART) && **names != '\0')
+ *flags |= NAMES_PRESENT;
+
+ for (;*flags & NAMES_PRESENT;) {
+ size_t l;
+
+ if (!*signature)
+ break;
+
+ r = signature_element_length(signature, &l);
+ if (r < 0)
+ return false;
+
+ if (**names != '\0') {
+ if (!member_name_is_valid(*names))
+ return false;
+ *names += strlen(*names) + 1;
+ } else if (*flags & NAMES_PRESENT)
+ return false;
+
+ signature += l;
+ }
+ /* let's check if there are more argument names specified than the signature allows */
+ if (*flags & NAMES_PRESENT && **names != '\0' && !(*flags & NAMES_FIRST_PART))
+ return false;
+ *flags &= ~NAMES_FIRST_PART;
+ return true;
+}
+
+/* the current version of this struct is defined in sd-bus-vtable.h, but we need to list here the historical versions
+ to make sure the calling code is compatible with one of these */
+struct sd_bus_vtable_221 {
+ uint8_t type:8;
+ uint64_t flags:56;
+ union {
+ struct {
+ size_t element_size;
+ } start;
+ struct {
+ const char *member;
+ const char *signature;
+ const char *result;
+ sd_bus_message_handler_t handler;
+ size_t offset;
+ } method;
+ struct {
+ const char *member;
+ const char *signature;
+ } signal;
+ struct {
+ const char *member;
+ const char *signature;
+ sd_bus_property_get_t get;
+ sd_bus_property_set_t set;
+ size_t offset;
+ } property;
+ } x;
+};
+/* Structure size up to v241 */
+#define VTABLE_ELEMENT_SIZE_221 sizeof(struct sd_bus_vtable_221)
+
+/* Size of the structure when "features" field was added. If the structure definition is augmented, a copy of
+ * the structure definition will need to be made (similarly to the sd_bus_vtable_221 above), and this
+ * definition updated to refer to it. */
+#define VTABLE_ELEMENT_SIZE_242 sizeof(struct sd_bus_vtable)
+
+static int vtable_features(const sd_bus_vtable *vtable) {
+ if (vtable[0].x.start.element_size < VTABLE_ELEMENT_SIZE_242 ||
+ !vtable[0].x.start.vtable_format_reference)
+ return 0;
+ return vtable[0].x.start.features;
+}
+
+bool bus_vtable_has_names(const sd_bus_vtable *vtable) {
+ return vtable_features(vtable) & _SD_BUS_VTABLE_PARAM_NAMES;
+}
+
+const sd_bus_vtable* bus_vtable_next(const sd_bus_vtable *vtable, const sd_bus_vtable *v) {
+ return (const sd_bus_vtable*) ((char*) v + vtable[0].x.start.element_size);
+}
+
+static int add_object_vtable_internal(
+ sd_bus *bus,
+ sd_bus_slot **slot,
+ const char *path,
+ const char *interface,
+ const sd_bus_vtable *vtable,
+ bool fallback,
+ sd_bus_object_find_t find,
+ void *userdata) {
+
+ sd_bus_slot *s = NULL;
+ struct node_vtable *existing = NULL;
+ const sd_bus_vtable *v;
+ struct node *n;
+ int r;
+ const char *names = "";
+ names_flags nf;
+
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(object_path_is_valid(path), -EINVAL);
+ assert_return(interface_name_is_valid(interface), -EINVAL);
+ assert_return(vtable, -EINVAL);
+ assert_return(vtable[0].type == _SD_BUS_VTABLE_START, -EINVAL);
+ assert_return(vtable[0].x.start.element_size == VTABLE_ELEMENT_SIZE_221 ||
+ vtable[0].x.start.element_size >= VTABLE_ELEMENT_SIZE_242,
+ -EINVAL);
+ assert_return(!bus_origin_changed(bus), -ECHILD);
+ assert_return(!streq(interface, "org.freedesktop.DBus.Properties") &&
+ !streq(interface, "org.freedesktop.DBus.Introspectable") &&
+ !streq(interface, "org.freedesktop.DBus.Peer") &&
+ !streq(interface, "org.freedesktop.DBus.ObjectManager"), -EINVAL);
+
+ r = hashmap_ensure_allocated(&bus->vtable_methods, &vtable_member_hash_ops);
+ if (r < 0)
+ return r;
+
+ r = hashmap_ensure_allocated(&bus->vtable_properties, &vtable_member_hash_ops);
+ if (r < 0)
+ return r;
+
+ n = bus_node_allocate(bus, path);
+ if (!n)
+ return -ENOMEM;
+
+ LIST_FOREACH(vtables, i, n->vtables) {
+ if (i->is_fallback != fallback) {
+ r = -EPROTOTYPE;
+ goto fail;
+ }
+
+ if (streq(i->interface, interface)) {
+
+ if (i->vtable == vtable) {
+ r = -EEXIST;
+ goto fail;
+ }
+
+ existing = i;
+ }
+ }
+
+ s = bus_slot_allocate(bus, !slot, BUS_NODE_VTABLE, sizeof(struct node_vtable), userdata);
+ if (!s) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ s->node_vtable.is_fallback = fallback;
+ s->node_vtable.vtable = vtable;
+ s->node_vtable.find = find;
+
+ s->node_vtable.interface = strdup(interface);
+ if (!s->node_vtable.interface) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ v = s->node_vtable.vtable;
+ for (v = bus_vtable_next(vtable, v); v->type != _SD_BUS_VTABLE_END; v = bus_vtable_next(vtable, v)) {
+
+ switch (v->type) {
+
+ case _SD_BUS_VTABLE_METHOD: {
+ struct vtable_member *m;
+ nf = NAMES_FIRST_PART;
+
+ if (bus_vtable_has_names(vtable))
+ names = strempty(v->x.method.names);
+
+ if (!member_name_is_valid(v->x.method.member) ||
+ !signature_is_valid(strempty(v->x.method.signature), false) ||
+ !signature_is_valid(strempty(v->x.method.result), false) ||
+ !names_are_valid(strempty(v->x.method.signature), &names, &nf) ||
+ !names_are_valid(strempty(v->x.method.result), &names, &nf) ||
+ !(v->x.method.handler || (isempty(v->x.method.signature) && isempty(v->x.method.result))) ||
+ v->flags & (SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE|SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION)) {
+ r = -EINVAL;
+ goto fail;
+ }
+
+ m = new0(struct vtable_member, 1);
+ if (!m) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ m->parent = &s->node_vtable;
+ m->path = n->path;
+ m->interface = s->node_vtable.interface;
+ m->member = v->x.method.member;
+ m->vtable = v;
+
+ r = hashmap_put(bus->vtable_methods, m, m);
+ if (r < 0) {
+ free(m);
+ goto fail;
+ }
+
+ break;
+ }
+
+ case _SD_BUS_VTABLE_WRITABLE_PROPERTY:
+
+ if (!(v->x.property.set || bus_type_is_basic(v->x.property.signature[0]))) {
+ r = -EINVAL;
+ goto fail;
+ }
+
+ if (v->flags & SD_BUS_VTABLE_PROPERTY_CONST) {
+ r = -EINVAL;
+ goto fail;
+ }
+
+ _fallthrough_;
+ case _SD_BUS_VTABLE_PROPERTY: {
+ struct vtable_member *m;
+
+ if (!member_name_is_valid(v->x.property.member) ||
+ !signature_is_single(v->x.property.signature, false) ||
+ !(v->x.property.get || bus_type_is_basic(v->x.property.signature[0]) || streq(v->x.property.signature, "as")) ||
+ (v->flags & SD_BUS_VTABLE_METHOD_NO_REPLY) ||
+ (!!(v->flags & SD_BUS_VTABLE_PROPERTY_CONST) + !!(v->flags & SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE) + !!(v->flags & SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION)) > 1 ||
+ ((v->flags & SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE) && (v->flags & SD_BUS_VTABLE_PROPERTY_EXPLICIT)) ||
+ (v->flags & SD_BUS_VTABLE_UNPRIVILEGED && v->type == _SD_BUS_VTABLE_PROPERTY)) {
+ r = -EINVAL;
+ goto fail;
+ }
+
+ m = new0(struct vtable_member, 1);
+ if (!m) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ m->parent = &s->node_vtable;
+ m->path = n->path;
+ m->interface = s->node_vtable.interface;
+ m->member = v->x.property.member;
+ m->vtable = v;
+
+ r = hashmap_put(bus->vtable_properties, m, m);
+ if (r < 0) {
+ free(m);
+ goto fail;
+ }
+
+ break;
+ }
+
+ case _SD_BUS_VTABLE_SIGNAL:
+ nf = NAMES_SINGLE_PART;
+
+ if (bus_vtable_has_names(vtable))
+ names = strempty(v->x.signal.names);
+
+ if (!member_name_is_valid(v->x.signal.member) ||
+ !signature_is_valid(strempty(v->x.signal.signature), false) ||
+ !names_are_valid(strempty(v->x.signal.signature), &names, &nf) ||
+ v->flags & SD_BUS_VTABLE_UNPRIVILEGED) {
+ r = -EINVAL;
+ goto fail;
+ }
+
+ break;
+
+ default:
+ r = -EINVAL;
+ goto fail;
+ }
+ }
+
+ s->node_vtable.node = n;
+ LIST_INSERT_AFTER(vtables, n->vtables, existing, &s->node_vtable);
+ bus->nodes_modified = true;
+
+ if (slot)
+ *slot = s;
+
+ return 0;
+
+fail:
+ sd_bus_slot_unref(s);
+ bus_node_gc(bus, n);
+
+ return r;
+}
+
+/* This symbol exists solely to tell the linker that the "new" vtable format is used. */
+_public_ const unsigned sd_bus_object_vtable_format = 242;
+
+_public_ int sd_bus_add_object_vtable(
+ sd_bus *bus,
+ sd_bus_slot **slot,
+ const char *path,
+ const char *interface,
+ const sd_bus_vtable *vtable,
+ void *userdata) {
+
+ return add_object_vtable_internal(bus, slot, path, interface, vtable, false, NULL, userdata);
+}
+
+_public_ int sd_bus_add_fallback_vtable(
+ sd_bus *bus,
+ sd_bus_slot **slot,
+ const char *prefix,
+ const char *interface,
+ const sd_bus_vtable *vtable,
+ sd_bus_object_find_t find,
+ void *userdata) {
+
+ return add_object_vtable_internal(bus, slot, prefix, interface, vtable, true, find, userdata);
+}
+
+_public_ int sd_bus_add_node_enumerator(
+ sd_bus *bus,
+ sd_bus_slot **slot,
+ const char *path,
+ sd_bus_node_enumerator_t callback,
+ void *userdata) {
+
+ sd_bus_slot *s;
+ struct node *n;
+ int r;
+
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(object_path_is_valid(path), -EINVAL);
+ assert_return(callback, -EINVAL);
+ assert_return(!bus_origin_changed(bus), -ECHILD);
+
+ n = bus_node_allocate(bus, path);
+ if (!n)
+ return -ENOMEM;
+
+ s = bus_slot_allocate(bus, !slot, BUS_NODE_ENUMERATOR, sizeof(struct node_enumerator), userdata);
+ if (!s) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ s->node_enumerator.callback = callback;
+
+ s->node_enumerator.node = n;
+ LIST_PREPEND(enumerators, n->enumerators, &s->node_enumerator);
+ bus->nodes_modified = true;
+
+ if (slot)
+ *slot = s;
+
+ return 0;
+
+fail:
+ sd_bus_slot_unref(s);
+ bus_node_gc(bus, n);
+
+ return r;
+}
+
+static int emit_properties_changed_on_interface(
+ sd_bus *bus,
+ const char *prefix,
+ const char *path,
+ const char *interface,
+ bool require_fallback,
+ bool *found_interface,
+ char **names) {
+
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ bool has_invalidating = false, has_changing = false;
+ struct vtable_member key = {};
+ struct node *n;
+ void *u = NULL;
+ int r;
+
+ assert(bus);
+ assert(prefix);
+ assert(path);
+ assert(interface);
+ assert(found_interface);
+
+ n = hashmap_get(bus->nodes, prefix);
+ if (!n)
+ return 0;
+
+ r = sd_bus_message_new_signal(bus, &m, path, "org.freedesktop.DBus.Properties", "PropertiesChanged");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(m, "s", interface);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_open_container(m, 'a', "{sv}");
+ if (r < 0)
+ return r;
+
+ key.path = prefix;
+ key.interface = interface;
+
+ LIST_FOREACH(vtables, c, n->vtables) {
+ if (require_fallback && !c->is_fallback)
+ continue;
+
+ if (!streq(c->interface, interface))
+ continue;
+
+ r = node_vtable_get_userdata(bus, path, c, &u, &error);
+ if (r < 0)
+ return r;
+ if (bus->nodes_modified)
+ return 0;
+ if (r == 0)
+ continue;
+
+ *found_interface = true;
+
+ if (names) {
+ /* If the caller specified a list of
+ * properties we include exactly those in the
+ * PropertiesChanged message */
+
+ STRV_FOREACH(property, names) {
+ struct vtable_member *v;
+
+ assert_return(member_name_is_valid(*property), -EINVAL);
+
+ key.member = *property;
+ v = hashmap_get(bus->vtable_properties, &key);
+ if (!v)
+ return -ENOENT;
+
+ /* If there are two vtables for the same
+ * interface, let's handle this property when
+ * we come to that vtable. */
+ if (c != v->parent)
+ continue;
+
+ assert_return(v->vtable->flags & SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE ||
+ v->vtable->flags & SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION, -EDOM);
+
+ assert_return(!(v->vtable->flags & SD_BUS_VTABLE_HIDDEN), -EDOM);
+
+ if (v->vtable->flags & SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION) {
+ has_invalidating = true;
+ continue;
+ }
+
+ has_changing = true;
+
+ r = vtable_append_one_property(bus, m, m->path, c, v->vtable, u, &error);
+ if (r < 0)
+ return r;
+ if (bus->nodes_modified)
+ return 0;
+ }
+ } else {
+ const sd_bus_vtable *v;
+
+ /* If the caller specified no properties list
+ * we include all properties that are marked
+ * as changing in the message. */
+
+ v = c->vtable;
+ for (v = bus_vtable_next(c->vtable, v); v->type != _SD_BUS_VTABLE_END; v = bus_vtable_next(c->vtable, v)) {
+ if (!IN_SET(v->type, _SD_BUS_VTABLE_PROPERTY, _SD_BUS_VTABLE_WRITABLE_PROPERTY))
+ continue;
+
+ if (v->flags & SD_BUS_VTABLE_HIDDEN)
+ continue;
+
+ if (v->flags & SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION) {
+ has_invalidating = true;
+ continue;
+ }
+
+ if (!(v->flags & SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE))
+ continue;
+
+ has_changing = true;
+
+ r = vtable_append_one_property(bus, m, m->path, c, v, u, &error);
+ if (r < 0)
+ return r;
+ if (bus->nodes_modified)
+ return 0;
+ }
+ }
+ }
+
+ if (!has_invalidating && !has_changing)
+ return 0;
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_open_container(m, 'a', "s");
+ if (r < 0)
+ return r;
+
+ if (has_invalidating) {
+ LIST_FOREACH(vtables, c, n->vtables) {
+ if (require_fallback && !c->is_fallback)
+ continue;
+
+ if (!streq(c->interface, interface))
+ continue;
+
+ r = node_vtable_get_userdata(bus, path, c, &u, &error);
+ if (r < 0)
+ return r;
+ if (bus->nodes_modified)
+ return 0;
+ if (r == 0)
+ continue;
+
+ if (names) {
+ STRV_FOREACH(property, names) {
+ struct vtable_member *v;
+
+ key.member = *property;
+ assert_se(v = hashmap_get(bus->vtable_properties, &key));
+ assert(c == v->parent);
+
+ if (!(v->vtable->flags & SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION))
+ continue;
+
+ r = sd_bus_message_append(m, "s", *property);
+ if (r < 0)
+ return r;
+ }
+ } else {
+ const sd_bus_vtable *v;
+
+ v = c->vtable;
+ for (v = bus_vtable_next(c->vtable, v); v->type != _SD_BUS_VTABLE_END; v = bus_vtable_next(c->vtable, v)) {
+ if (!IN_SET(v->type, _SD_BUS_VTABLE_PROPERTY, _SD_BUS_VTABLE_WRITABLE_PROPERTY))
+ continue;
+
+ if (v->flags & SD_BUS_VTABLE_HIDDEN)
+ continue;
+
+ if (!(v->flags & SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION))
+ continue;
+
+ r = sd_bus_message_append(m, "s", v->x.property.member);
+ if (r < 0)
+ return r;
+ }
+ }
+ }
+ }
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_send(bus, m, NULL);
+ if (r < 0)
+ return r;
+
+ return 1;
+}
+
+_public_ int sd_bus_emit_properties_changed_strv(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ char **names) {
+
+ _cleanup_free_ char *prefix = NULL;
+ bool found_interface = false;
+ size_t pl;
+ int r;
+
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(object_path_is_valid(path), -EINVAL);
+ assert_return(interface_name_is_valid(interface), -EINVAL);
+ assert_return(!bus_origin_changed(bus), -ECHILD);
+
+ if (!BUS_IS_OPEN(bus->state))
+ return -ENOTCONN;
+
+ /* A non-NULL but empty names list means nothing needs to be
+ generated. A NULL list OTOH indicates that all properties
+ that are set to EMITS_CHANGE or EMITS_INVALIDATION shall be
+ included in the PropertiesChanged message. */
+ if (names && names[0] == NULL)
+ return 0;
+
+ BUS_DONT_DESTROY(bus);
+
+ pl = strlen(path);
+ assert(pl <= BUS_PATH_SIZE_MAX);
+ prefix = new(char, pl + 1);
+ if (!prefix)
+ return -ENOMEM;
+
+ do {
+ bus->nodes_modified = false;
+
+ r = emit_properties_changed_on_interface(bus, path, path, interface, false, &found_interface, names);
+ if (r != 0)
+ return r;
+ if (bus->nodes_modified)
+ continue;
+
+ OBJECT_PATH_FOREACH_PREFIX(prefix, path) {
+ r = emit_properties_changed_on_interface(bus, prefix, path, interface, true, &found_interface, names);
+ if (r != 0)
+ return r;
+ if (bus->nodes_modified)
+ break;
+ }
+
+ } while (bus->nodes_modified);
+
+ return found_interface ? 0 : -ENOENT;
+}
+
+_public_ int sd_bus_emit_properties_changed(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *name, ...) {
+
+ char **names;
+
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(object_path_is_valid(path), -EINVAL);
+ assert_return(interface_name_is_valid(interface), -EINVAL);
+ assert_return(!bus_origin_changed(bus), -ECHILD);
+
+ if (!BUS_IS_OPEN(bus->state))
+ return -ENOTCONN;
+
+ if (!name)
+ return 0;
+
+ names = strv_from_stdarg_alloca(name);
+
+ return sd_bus_emit_properties_changed_strv(bus, path, interface, names);
+}
+
+static int object_added_append_all_prefix(
+ sd_bus *bus,
+ sd_bus_message *m,
+ OrderedSet *s,
+ const char *prefix,
+ const char *path,
+ bool require_fallback) {
+
+ const char *previous_interface = NULL;
+ struct node *n;
+ int r;
+
+ assert(bus);
+ assert(m);
+ assert(s);
+ assert(prefix);
+ assert(path);
+
+ n = hashmap_get(bus->nodes, prefix);
+ if (!n)
+ return 0;
+
+ LIST_FOREACH(vtables, c, n->vtables) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ void *u = NULL;
+
+ if (require_fallback && !c->is_fallback)
+ continue;
+
+ r = node_vtable_get_userdata(bus, path, c, &u, &error);
+ if (r < 0)
+ return r;
+ if (bus->nodes_modified)
+ return 0;
+ if (r == 0)
+ continue;
+
+ if (!streq_ptr(c->interface, previous_interface)) {
+ /* If a child-node already handled this interface, we
+ * skip it on any of its parents. The child vtables
+ * always fully override any conflicting vtables of
+ * any parent node. */
+ if (ordered_set_get(s, c->interface))
+ continue;
+
+ r = ordered_set_put(s, c->interface);
+ if (r < 0)
+ return r;
+
+ if (previous_interface) {
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return r;
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_bus_message_open_container(m, 'e', "sa{sv}");
+ if (r < 0)
+ return r;
+ r = sd_bus_message_append(m, "s", c->interface);
+ if (r < 0)
+ return r;
+ r = sd_bus_message_open_container(m, 'a', "{sv}");
+ if (r < 0)
+ return r;
+
+ previous_interface = c->interface;
+ }
+
+ r = vtable_append_all_properties(bus, m, path, c, u, &error);
+ if (r < 0)
+ return r;
+ if (bus->nodes_modified)
+ return 0;
+ }
+
+ if (previous_interface) {
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return r;
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+static int object_added_append_all(sd_bus *bus, sd_bus_message *m, const char *path, bool path_has_object_manager) {
+ _cleanup_ordered_set_free_ OrderedSet *s = NULL;
+ _cleanup_free_ char *prefix = NULL;
+ size_t pl;
+ int r;
+
+ assert(bus);
+ assert(m);
+ assert(path);
+
+ /*
+ * This appends all interfaces registered on path @path. We first add
+ * the builtin interfaces, which are always available and handled by
+ * sd-bus. Then, we add all interfaces registered on the exact node,
+ * followed by all fallback interfaces registered on any parent prefix.
+ *
+ * If an interface is registered multiple times on the same node with
+ * different vtables, we merge all the properties across all vtables.
+ * However, if a child node has the same interface registered as one of
+ * its parent nodes has as fallback, we make the child overwrite the
+ * parent instead of extending it. Therefore, we keep a "Set" of all
+ * handled interfaces during parent traversal, so we skip interfaces on
+ * a parent that were overwritten by a child.
+ */
+
+ s = ordered_set_new(&string_hash_ops);
+ if (!s)
+ return -ENOMEM;
+
+ r = sd_bus_message_append(m, "{sa{sv}}", "org.freedesktop.DBus.Peer", 0);
+ if (r < 0)
+ return r;
+ r = sd_bus_message_append(m, "{sa{sv}}", "org.freedesktop.DBus.Introspectable", 0);
+ if (r < 0)
+ return r;
+ r = sd_bus_message_append(m, "{sa{sv}}", "org.freedesktop.DBus.Properties", 0);
+ if (r < 0)
+ return r;
+ if (path_has_object_manager){
+ r = sd_bus_message_append(m, "{sa{sv}}", "org.freedesktop.DBus.ObjectManager", 0);
+ if (r < 0)
+ return r;
+ }
+
+ r = object_added_append_all_prefix(bus, m, s, path, path, false);
+ if (r < 0)
+ return r;
+ if (bus->nodes_modified)
+ return 0;
+
+ pl = strlen(path);
+ assert(pl <= BUS_PATH_SIZE_MAX);
+ prefix = new(char, pl + 1);
+ if (!prefix)
+ return -ENOMEM;
+
+ OBJECT_PATH_FOREACH_PREFIX(prefix, path) {
+ r = object_added_append_all_prefix(bus, m, s, prefix, path, true);
+ if (r < 0)
+ return r;
+ if (bus->nodes_modified)
+ return 0;
+ }
+
+ return 0;
+}
+
+_public_ int sd_bus_emit_object_added(sd_bus *bus, const char *path) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ struct node *object_manager;
+ int r;
+
+ /*
+ * This emits an InterfacesAdded signal on the given path, by iterating
+ * all registered vtables and fallback vtables on the path. All
+ * properties are queried and included in the signal.
+ * This call is equivalent to sd_bus_emit_interfaces_added() with an
+ * explicit list of registered interfaces. However, unlike
+ * interfaces_added(), this call can figure out the list of supported
+ * interfaces itself. Furthermore, it properly adds the builtin
+ * org.freedesktop.DBus.* interfaces.
+ */
+
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(object_path_is_valid(path), -EINVAL);
+ assert_return(!bus_origin_changed(bus), -ECHILD);
+
+ if (!BUS_IS_OPEN(bus->state))
+ return -ENOTCONN;
+
+ bool path_has_object_manager = false;
+ r = bus_find_parent_object_manager(bus, &object_manager, path, &path_has_object_manager);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -ESRCH;
+
+ BUS_DONT_DESTROY(bus);
+
+ do {
+ bus->nodes_modified = false;
+ m = sd_bus_message_unref(m);
+
+ r = sd_bus_message_new_signal(bus, &m, object_manager->path, "org.freedesktop.DBus.ObjectManager", "InterfacesAdded");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append_basic(m, 'o', path);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_open_container(m, 'a', "{sa{sv}}");
+ if (r < 0)
+ return r;
+
+ r = object_added_append_all(bus, m, path, path_has_object_manager);
+ if (r < 0)
+ return r;
+
+ if (bus->nodes_modified)
+ continue;
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return r;
+
+ } while (bus->nodes_modified);
+
+ return sd_bus_send(bus, m, NULL);
+}
+
+static int object_removed_append_all_prefix(
+ sd_bus *bus,
+ sd_bus_message *m,
+ OrderedSet *s,
+ const char *prefix,
+ const char *path,
+ bool require_fallback) {
+
+ const char *previous_interface = NULL;
+ struct node *n;
+ int r;
+
+ assert(bus);
+ assert(m);
+ assert(s);
+ assert(prefix);
+ assert(path);
+
+ n = hashmap_get(bus->nodes, prefix);
+ if (!n)
+ return 0;
+
+ LIST_FOREACH(vtables, c, n->vtables) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ void *u = NULL;
+
+ if (require_fallback && !c->is_fallback)
+ continue;
+ if (streq_ptr(c->interface, previous_interface))
+ continue;
+
+ /* If a child-node already handled this interface, we
+ * skip it on any of its parents. The child vtables
+ * always fully override any conflicting vtables of
+ * any parent node. */
+ if (ordered_set_get(s, c->interface))
+ continue;
+
+ r = node_vtable_get_userdata(bus, path, c, &u, &error);
+ if (r < 0)
+ return r;
+ if (bus->nodes_modified)
+ return 0;
+ if (r == 0)
+ continue;
+
+ r = ordered_set_put(s, c->interface);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(m, "s", c->interface);
+ if (r < 0)
+ return r;
+
+ previous_interface = c->interface;
+ }
+
+ return 0;
+}
+
+static int object_removed_append_all(sd_bus *bus, sd_bus_message *m, const char *path, bool path_has_object_manager) {
+ _cleanup_ordered_set_free_ OrderedSet *s = NULL;
+ _cleanup_free_ char *prefix = NULL;
+ size_t pl;
+ int r;
+
+ assert(bus);
+ assert(m);
+ assert(path);
+
+ /* see sd_bus_emit_object_added() for details */
+
+ s = ordered_set_new(&string_hash_ops);
+ if (!s)
+ return -ENOMEM;
+
+ r = sd_bus_message_append(m, "s", "org.freedesktop.DBus.Peer");
+ if (r < 0)
+ return r;
+ r = sd_bus_message_append(m, "s", "org.freedesktop.DBus.Introspectable");
+ if (r < 0)
+ return r;
+ r = sd_bus_message_append(m, "s", "org.freedesktop.DBus.Properties");
+ if (r < 0)
+ return r;
+
+ if (path_has_object_manager){
+ r = sd_bus_message_append(m, "s", "org.freedesktop.DBus.ObjectManager");
+ if (r < 0)
+ return r;
+ }
+
+ r = object_removed_append_all_prefix(bus, m, s, path, path, false);
+ if (r < 0)
+ return r;
+ if (bus->nodes_modified)
+ return 0;
+
+ pl = strlen(path);
+ assert(pl <= BUS_PATH_SIZE_MAX);
+ prefix = new(char, pl + 1);
+ if (!prefix)
+ return -ENOMEM;
+
+ OBJECT_PATH_FOREACH_PREFIX(prefix, path) {
+ r = object_removed_append_all_prefix(bus, m, s, prefix, path, true);
+ if (r < 0)
+ return r;
+ if (bus->nodes_modified)
+ return 0;
+ }
+
+ return 0;
+}
+
+_public_ int sd_bus_emit_object_removed(sd_bus *bus, const char *path) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ struct node *object_manager;
+ int r;
+
+ /*
+ * This is like sd_bus_emit_object_added(), but emits an
+ * InterfacesRemoved signal on the given path. This only includes any
+ * registered interfaces but skips the properties. Note that this will
+ * call into the find() callbacks of any registered vtable. Therefore,
+ * you must call this function before destroying/unlinking your object.
+ * Otherwise, the list of interfaces will be incomplete. However, note
+ * that this will *NOT* call into any property callback. Therefore, the
+ * object might be in an "destructed" state, as long as we can find it.
+ */
+
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(object_path_is_valid(path), -EINVAL);
+ assert_return(!bus_origin_changed(bus), -ECHILD);
+
+ if (!BUS_IS_OPEN(bus->state))
+ return -ENOTCONN;
+
+ bool path_has_object_manager = false;
+ r = bus_find_parent_object_manager(bus, &object_manager, path, &path_has_object_manager);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -ESRCH;
+
+ BUS_DONT_DESTROY(bus);
+
+ do {
+ bus->nodes_modified = false;
+ m = sd_bus_message_unref(m);
+
+ r = sd_bus_message_new_signal(bus, &m, object_manager->path, "org.freedesktop.DBus.ObjectManager", "InterfacesRemoved");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append_basic(m, 'o', path);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_open_container(m, 'a', "s");
+ if (r < 0)
+ return r;
+
+ r = object_removed_append_all(bus, m, path, path_has_object_manager);
+ if (r < 0)
+ return r;
+
+ if (bus->nodes_modified)
+ continue;
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return r;
+
+ } while (bus->nodes_modified);
+
+ return sd_bus_send(bus, m, NULL);
+}
+
+static int interfaces_added_append_one_prefix(
+ sd_bus *bus,
+ sd_bus_message *m,
+ const char *prefix,
+ const char *path,
+ const char *interface,
+ bool require_fallback) {
+
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ bool found_interface = false;
+ struct node *n;
+ void *u = NULL;
+ int r;
+
+ assert(bus);
+ assert(m);
+ assert(prefix);
+ assert(path);
+ assert(interface);
+
+ n = hashmap_get(bus->nodes, prefix);
+ if (!n)
+ return 0;
+
+ LIST_FOREACH(vtables, c, n->vtables) {
+ if (require_fallback && !c->is_fallback)
+ continue;
+
+ if (!streq(c->interface, interface))
+ continue;
+
+ r = node_vtable_get_userdata(bus, path, c, &u, &error);
+ if (r < 0)
+ return r;
+ if (bus->nodes_modified)
+ return 0;
+ if (r == 0)
+ continue;
+
+ if (!found_interface) {
+ r = sd_bus_message_append_basic(m, 's', interface);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_open_container(m, 'a', "{sv}");
+ if (r < 0)
+ return r;
+
+ found_interface = true;
+ }
+
+ r = vtable_append_all_properties(bus, m, path, c, u, &error);
+ if (r < 0)
+ return r;
+ if (bus->nodes_modified)
+ return 0;
+ }
+
+ if (found_interface) {
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return r;
+ }
+
+ return found_interface;
+}
+
+static int interfaces_added_append_one(
+ sd_bus *bus,
+ sd_bus_message *m,
+ const char *path,
+ const char *interface) {
+
+ _cleanup_free_ char *prefix = NULL;
+ size_t pl;
+ int r;
+
+ assert(bus);
+ assert(m);
+ assert(path);
+ assert(interface);
+
+ r = interfaces_added_append_one_prefix(bus, m, path, path, interface, false);
+ if (r != 0)
+ return r;
+ if (bus->nodes_modified)
+ return 0;
+
+ pl = strlen(path);
+ assert(pl <= BUS_PATH_SIZE_MAX);
+ prefix = new(char, pl + 1);
+ if (!prefix)
+ return -ENOMEM;
+
+ OBJECT_PATH_FOREACH_PREFIX(prefix, path) {
+ r = interfaces_added_append_one_prefix(bus, m, prefix, path, interface, true);
+ if (r != 0)
+ return r;
+ if (bus->nodes_modified)
+ return 0;
+ }
+
+ return -ENOENT;
+}
+
+_public_ int sd_bus_emit_interfaces_added_strv(sd_bus *bus, const char *path, char **interfaces) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ struct node *object_manager;
+ int r;
+
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(object_path_is_valid(path), -EINVAL);
+ assert_return(!bus_origin_changed(bus), -ECHILD);
+
+ if (!BUS_IS_OPEN(bus->state))
+ return -ENOTCONN;
+
+ if (strv_isempty(interfaces))
+ return 0;
+
+ bool path_has_object_manager = false;
+ r = bus_find_parent_object_manager(bus, &object_manager, path, &path_has_object_manager);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -ESRCH;
+
+ BUS_DONT_DESTROY(bus);
+
+ do {
+ bus->nodes_modified = false;
+ m = sd_bus_message_unref(m);
+
+ r = sd_bus_message_new_signal(bus, &m, object_manager->path, "org.freedesktop.DBus.ObjectManager", "InterfacesAdded");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append_basic(m, 'o', path);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_open_container(m, 'a', "{sa{sv}}");
+ if (r < 0)
+ return r;
+
+ STRV_FOREACH(i, interfaces) {
+ assert_return(interface_name_is_valid(*i), -EINVAL);
+
+ r = sd_bus_message_open_container(m, 'e', "sa{sv}");
+ if (r < 0)
+ return r;
+
+ r = interfaces_added_append_one(bus, m, path, *i);
+ if (r < 0)
+ return r;
+
+ if (bus->nodes_modified)
+ break;
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return r;
+ }
+
+ if (bus->nodes_modified)
+ continue;
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return r;
+
+ } while (bus->nodes_modified);
+
+ return sd_bus_send(bus, m, NULL);
+}
+
+_public_ int sd_bus_emit_interfaces_added(sd_bus *bus, const char *path, const char *interface, ...) {
+ char **interfaces;
+
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(object_path_is_valid(path), -EINVAL);
+ assert_return(!bus_origin_changed(bus), -ECHILD);
+
+ if (!BUS_IS_OPEN(bus->state))
+ return -ENOTCONN;
+
+ interfaces = strv_from_stdarg_alloca(interface);
+
+ return sd_bus_emit_interfaces_added_strv(bus, path, interfaces);
+}
+
+_public_ int sd_bus_emit_interfaces_removed_strv(sd_bus *bus, const char *path, char **interfaces) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ struct node *object_manager;
+ int r;
+
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(object_path_is_valid(path), -EINVAL);
+ assert_return(!bus_origin_changed(bus), -ECHILD);
+
+ if (!BUS_IS_OPEN(bus->state))
+ return -ENOTCONN;
+
+ if (strv_isempty(interfaces))
+ return 0;
+
+ bool path_has_object_manager = false;
+ r = bus_find_parent_object_manager(bus, &object_manager, path, &path_has_object_manager);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -ESRCH;
+
+ r = sd_bus_message_new_signal(bus, &m, object_manager->path, "org.freedesktop.DBus.ObjectManager", "InterfacesRemoved");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append_basic(m, 'o', path);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append_strv(m, interfaces);
+ if (r < 0)
+ return r;
+
+ return sd_bus_send(bus, m, NULL);
+}
+
+_public_ int sd_bus_emit_interfaces_removed(sd_bus *bus, const char *path, const char *interface, ...) {
+ char **interfaces;
+
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(object_path_is_valid(path), -EINVAL);
+ assert_return(!bus_origin_changed(bus), -ECHILD);
+
+ if (!BUS_IS_OPEN(bus->state))
+ return -ENOTCONN;
+
+ interfaces = strv_from_stdarg_alloca(interface);
+
+ return sd_bus_emit_interfaces_removed_strv(bus, path, interfaces);
+}
+
+_public_ int sd_bus_add_object_manager(sd_bus *bus, sd_bus_slot **slot, const char *path) {
+ sd_bus_slot *s;
+ struct node *n;
+ int r;
+
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(object_path_is_valid(path), -EINVAL);
+ assert_return(!bus_origin_changed(bus), -ECHILD);
+
+ n = bus_node_allocate(bus, path);
+ if (!n)
+ return -ENOMEM;
+
+ s = bus_slot_allocate(bus, !slot, BUS_NODE_OBJECT_MANAGER, sizeof(struct node_object_manager), NULL);
+ if (!s) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ s->node_object_manager.node = n;
+ LIST_PREPEND(object_managers, n->object_managers, &s->node_object_manager);
+ bus->nodes_modified = true;
+
+ if (slot)
+ *slot = s;
+
+ return 0;
+
+fail:
+ sd_bus_slot_unref(s);
+ bus_node_gc(bus, n);
+
+ return r;
+}
diff --git a/src/libsystemd/sd-bus/bus-objects.h b/src/libsystemd/sd-bus/bus-objects.h
new file mode 100644
index 0000000..20fccfa
--- /dev/null
+++ b/src/libsystemd/sd-bus/bus-objects.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "bus-internal.h"
+#include "bus-introspect.h"
+
+const sd_bus_vtable* bus_vtable_next(const sd_bus_vtable *vtable, const sd_bus_vtable *v);
+bool bus_vtable_has_names(const sd_bus_vtable *vtable);
+int bus_process_object(sd_bus *bus, sd_bus_message *m);
+void bus_node_gc(sd_bus *b, struct node *n);
+
+int introspect_path(
+ sd_bus *bus,
+ const char *path,
+ struct node *n,
+ bool require_fallback,
+ bool ignore_nodes_modified,
+ bool *found_object,
+ char **ret,
+ sd_bus_error *error);
diff --git a/src/libsystemd/sd-bus/bus-protocol.h b/src/libsystemd/sd-bus/bus-protocol.h
new file mode 100644
index 0000000..be46b5f
--- /dev/null
+++ b/src/libsystemd/sd-bus/bus-protocol.h
@@ -0,0 +1,88 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <endian.h>
+
+#include "macro.h"
+
+/* Packet header */
+
+struct _packed_ bus_header {
+ uint8_t endian;
+ uint8_t type;
+ uint8_t flags;
+ uint8_t version;
+ uint32_t body_size;
+ /* Note that what the bus spec calls "serial" we'll call "cookie" instead, because we don't
+ * want to imply that the cookie was in any way monotonically increasing. */
+ uint32_t serial;
+ uint32_t fields_size;
+};
+
+/* Endianness */
+
+enum {
+ _BUS_INVALID_ENDIAN = 0,
+ BUS_LITTLE_ENDIAN = 'l',
+ BUS_BIG_ENDIAN = 'B',
+#if __BYTE_ORDER == __BIG_ENDIAN
+ BUS_NATIVE_ENDIAN = BUS_BIG_ENDIAN,
+ BUS_REVERSE_ENDIAN = BUS_LITTLE_ENDIAN
+#else
+ BUS_NATIVE_ENDIAN = BUS_LITTLE_ENDIAN,
+ BUS_REVERSE_ENDIAN = BUS_BIG_ENDIAN
+#endif
+};
+
+/* Flags */
+
+enum {
+ BUS_MESSAGE_NO_REPLY_EXPECTED = 1 << 0,
+ BUS_MESSAGE_NO_AUTO_START = 1 << 1,
+ BUS_MESSAGE_ALLOW_INTERACTIVE_AUTHORIZATION = 1 << 2,
+};
+
+/* Header fields */
+
+enum {
+ _BUS_MESSAGE_HEADER_INVALID = 0,
+ BUS_MESSAGE_HEADER_PATH,
+ BUS_MESSAGE_HEADER_INTERFACE,
+ BUS_MESSAGE_HEADER_MEMBER,
+ BUS_MESSAGE_HEADER_ERROR_NAME,
+ BUS_MESSAGE_HEADER_REPLY_SERIAL,
+ BUS_MESSAGE_HEADER_DESTINATION,
+ BUS_MESSAGE_HEADER_SENDER,
+ BUS_MESSAGE_HEADER_SIGNATURE,
+ BUS_MESSAGE_HEADER_UNIX_FDS,
+ _BUS_MESSAGE_HEADER_MAX
+};
+
+/* RequestName parameters */
+
+enum {
+ BUS_NAME_ALLOW_REPLACEMENT = 1 << 0,
+ BUS_NAME_REPLACE_EXISTING = 1 << 1,
+ BUS_NAME_DO_NOT_QUEUE = 1 << 2,
+};
+
+/* RequestName returns */
+enum {
+ BUS_NAME_PRIMARY_OWNER = 1,
+ BUS_NAME_IN_QUEUE = 2,
+ BUS_NAME_EXISTS = 3,
+ BUS_NAME_ALREADY_OWNER = 4
+};
+
+/* ReleaseName returns */
+enum {
+ BUS_NAME_RELEASED = 1,
+ BUS_NAME_NON_EXISTENT = 2,
+ BUS_NAME_NOT_OWNER = 3,
+};
+
+/* StartServiceByName returns */
+enum {
+ BUS_START_REPLY_SUCCESS = 1,
+ BUS_START_REPLY_ALREADY_RUNNING = 2,
+};
diff --git a/src/libsystemd/sd-bus/bus-signature.c b/src/libsystemd/sd-bus/bus-signature.c
new file mode 100644
index 0000000..78c7436
--- /dev/null
+++ b/src/libsystemd/sd-bus/bus-signature.c
@@ -0,0 +1,146 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "sd-bus.h"
+
+#include "bus-signature.h"
+#include "bus-type.h"
+
+static int signature_element_length_internal(
+ const char *s,
+ bool allow_dict_entry,
+ unsigned array_depth,
+ unsigned struct_depth,
+ size_t *l) {
+
+ int r;
+
+ if (!s)
+ return -EINVAL;
+
+ assert(l);
+
+ if (bus_type_is_basic(*s) || *s == SD_BUS_TYPE_VARIANT) {
+ *l = 1;
+ return 0;
+ }
+
+ if (*s == SD_BUS_TYPE_ARRAY) {
+ size_t t;
+
+ if (array_depth >= 32)
+ return -EINVAL;
+
+ r = signature_element_length_internal(s + 1, true, array_depth+1, struct_depth, &t);
+ if (r < 0)
+ return r;
+
+ *l = t + 1;
+ return 0;
+ }
+
+ if (*s == SD_BUS_TYPE_STRUCT_BEGIN) {
+ const char *p = s + 1;
+
+ if (struct_depth >= 32)
+ return -EINVAL;
+
+ while (*p != SD_BUS_TYPE_STRUCT_END) {
+ size_t t;
+
+ r = signature_element_length_internal(p, false, array_depth, struct_depth+1, &t);
+ if (r < 0)
+ return r;
+
+ p += t;
+ }
+
+ if (p - s < 2)
+ /* D-Bus spec: Empty structures are not allowed; there
+ * must be at least one type code between the parentheses.
+ */
+ return -EINVAL;
+
+ *l = p - s + 1;
+ return 0;
+ }
+
+ if (*s == SD_BUS_TYPE_DICT_ENTRY_BEGIN && allow_dict_entry) {
+ const char *p = s + 1;
+ unsigned n = 0;
+
+ if (struct_depth >= 32)
+ return -EINVAL;
+
+ while (*p != SD_BUS_TYPE_DICT_ENTRY_END) {
+ size_t t;
+
+ if (n == 0 && !bus_type_is_basic(*p))
+ return -EINVAL;
+
+ r = signature_element_length_internal(p, false, array_depth, struct_depth+1, &t);
+ if (r < 0)
+ return r;
+
+ p += t;
+ n++;
+ }
+
+ if (n != 2)
+ return -EINVAL;
+
+ *l = p - s + 1;
+ return 0;
+ }
+
+ return -EINVAL;
+}
+
+int signature_element_length(const char *s, size_t *l) {
+ return signature_element_length_internal(s, true, 0, 0, l);
+}
+
+bool signature_is_single(const char *s, bool allow_dict_entry) {
+ int r;
+ size_t t;
+
+ if (!s)
+ return false;
+
+ r = signature_element_length_internal(s, allow_dict_entry, 0, 0, &t);
+ if (r < 0)
+ return false;
+
+ return s[t] == 0;
+}
+
+bool signature_is_pair(const char *s) {
+
+ if (!s)
+ return false;
+
+ if (!bus_type_is_basic(*s))
+ return false;
+
+ return signature_is_single(s + 1, false);
+}
+
+bool signature_is_valid(const char *s, bool allow_dict_entry) {
+ const char *p;
+ int r;
+
+ if (!s)
+ return false;
+
+ p = s;
+ while (*p) {
+ size_t t;
+
+ r = signature_element_length_internal(p, allow_dict_entry, 0, 0, &t);
+ if (r < 0)
+ return false;
+
+ p += t;
+ }
+
+ return p - s <= SD_BUS_MAXIMUM_SIGNATURE_LENGTH;
+}
diff --git a/src/libsystemd/sd-bus/bus-signature.h b/src/libsystemd/sd-bus/bus-signature.h
new file mode 100644
index 0000000..314fcc2
--- /dev/null
+++ b/src/libsystemd/sd-bus/bus-signature.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+
+bool signature_is_single(const char *s, bool allow_dict_entry);
+bool signature_is_pair(const char *s);
+bool signature_is_valid(const char *s, bool allow_dict_entry);
+
+int signature_element_length(const char *s, size_t *l);
diff --git a/src/libsystemd/sd-bus/bus-slot.c b/src/libsystemd/sd-bus/bus-slot.c
new file mode 100644
index 0000000..9f28957
--- /dev/null
+++ b/src/libsystemd/sd-bus/bus-slot.c
@@ -0,0 +1,311 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "sd-bus.h"
+
+#include "alloc-util.h"
+#include "bus-control.h"
+#include "bus-objects.h"
+#include "bus-slot.h"
+#include "string-util.h"
+
+sd_bus_slot *bus_slot_allocate(
+ sd_bus *bus,
+ bool floating,
+ BusSlotType type,
+ size_t extra,
+ void *userdata) {
+
+ sd_bus_slot *slot;
+
+ assert(bus);
+
+ slot = malloc0(offsetof(sd_bus_slot, reply_callback) + extra);
+ if (!slot)
+ return NULL;
+
+ slot->n_ref = 1;
+ slot->type = type;
+ slot->bus = bus;
+ slot->floating = floating;
+ slot->userdata = userdata;
+
+ if (!floating)
+ sd_bus_ref(bus);
+
+ LIST_PREPEND(slots, bus->slots, slot);
+
+ return slot;
+}
+
+void bus_slot_disconnect(sd_bus_slot *slot, bool unref) {
+ sd_bus *bus;
+
+ assert(slot);
+
+ if (!slot->bus)
+ return;
+
+ switch (slot->type) {
+
+ case BUS_REPLY_CALLBACK:
+
+ if (slot->reply_callback.cookie != 0)
+ ordered_hashmap_remove(slot->bus->reply_callbacks, &slot->reply_callback.cookie);
+
+ if (slot->reply_callback.timeout_usec != 0)
+ prioq_remove(slot->bus->reply_callbacks_prioq, &slot->reply_callback, &slot->reply_callback.prioq_idx);
+
+ break;
+
+ case BUS_FILTER_CALLBACK:
+ slot->bus->filter_callbacks_modified = true;
+ LIST_REMOVE(callbacks, slot->bus->filter_callbacks, &slot->filter_callback);
+ break;
+
+ case BUS_MATCH_CALLBACK:
+
+ if (slot->match_added)
+ (void) bus_remove_match_internal(slot->bus, slot->match_callback.match_string);
+
+ if (slot->match_callback.install_slot) {
+ bus_slot_disconnect(slot->match_callback.install_slot, true);
+ slot->match_callback.install_slot = sd_bus_slot_unref(slot->match_callback.install_slot);
+ }
+
+ slot->bus->match_callbacks_modified = true;
+ bus_match_remove(&slot->bus->match_callbacks, &slot->match_callback);
+
+ slot->match_callback.match_string = mfree(slot->match_callback.match_string);
+
+ break;
+
+ case BUS_NODE_CALLBACK:
+
+ if (slot->node_callback.node) {
+ LIST_REMOVE(callbacks, slot->node_callback.node->callbacks, &slot->node_callback);
+ slot->bus->nodes_modified = true;
+
+ bus_node_gc(slot->bus, slot->node_callback.node);
+ }
+
+ break;
+
+ case BUS_NODE_ENUMERATOR:
+
+ if (slot->node_enumerator.node) {
+ LIST_REMOVE(enumerators, slot->node_enumerator.node->enumerators, &slot->node_enumerator);
+ slot->bus->nodes_modified = true;
+
+ bus_node_gc(slot->bus, slot->node_enumerator.node);
+ }
+
+ break;
+
+ case BUS_NODE_OBJECT_MANAGER:
+
+ if (slot->node_object_manager.node) {
+ LIST_REMOVE(object_managers, slot->node_object_manager.node->object_managers, &slot->node_object_manager);
+ slot->bus->nodes_modified = true;
+
+ bus_node_gc(slot->bus, slot->node_object_manager.node);
+ }
+
+ break;
+
+ case BUS_NODE_VTABLE:
+
+ if (slot->node_vtable.node && slot->node_vtable.interface && slot->node_vtable.vtable) {
+ const sd_bus_vtable *v;
+
+ for (v = slot->node_vtable.vtable; v->type != _SD_BUS_VTABLE_END; v = bus_vtable_next(slot->node_vtable.vtable, v)) {
+ struct vtable_member *x = NULL;
+
+ switch (v->type) {
+
+ case _SD_BUS_VTABLE_METHOD: {
+ struct vtable_member key;
+
+ key.path = slot->node_vtable.node->path;
+ key.interface = slot->node_vtable.interface;
+ key.member = v->x.method.member;
+
+ x = hashmap_remove(slot->bus->vtable_methods, &key);
+ break;
+ }
+
+ case _SD_BUS_VTABLE_PROPERTY:
+ case _SD_BUS_VTABLE_WRITABLE_PROPERTY: {
+ struct vtable_member key;
+
+ key.path = slot->node_vtable.node->path;
+ key.interface = slot->node_vtable.interface;
+ key.member = v->x.method.member;
+
+ x = hashmap_remove(slot->bus->vtable_properties, &key);
+ break;
+ }}
+
+ free(x);
+ }
+ }
+
+ slot->node_vtable.interface = mfree(slot->node_vtable.interface);
+
+ if (slot->node_vtable.node) {
+ LIST_REMOVE(vtables, slot->node_vtable.node->vtables, &slot->node_vtable);
+ slot->bus->nodes_modified = true;
+
+ bus_node_gc(slot->bus, slot->node_vtable.node);
+ }
+
+ break;
+
+ default:
+ assert_not_reached();
+ }
+
+ bus = slot->bus;
+
+ slot->type = _BUS_SLOT_INVALID;
+ slot->bus = NULL;
+ LIST_REMOVE(slots, bus->slots, slot);
+
+ if (!slot->floating)
+ sd_bus_unref(bus);
+ else if (unref)
+ sd_bus_slot_unref(slot);
+}
+
+static sd_bus_slot* bus_slot_free(sd_bus_slot *slot) {
+ assert(slot);
+
+ bus_slot_disconnect(slot, false);
+
+ if (slot->destroy_callback)
+ slot->destroy_callback(slot->userdata);
+
+ free(slot->description);
+ return mfree(slot);
+}
+
+DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(sd_bus_slot, sd_bus_slot, bus_slot_free);
+
+_public_ sd_bus* sd_bus_slot_get_bus(sd_bus_slot *slot) {
+ assert_return(slot, NULL);
+
+ return slot->bus;
+}
+
+_public_ void *sd_bus_slot_get_userdata(sd_bus_slot *slot) {
+ assert_return(slot, NULL);
+
+ return slot->userdata;
+}
+
+_public_ void *sd_bus_slot_set_userdata(sd_bus_slot *slot, void *userdata) {
+ void *ret;
+
+ assert_return(slot, NULL);
+
+ ret = slot->userdata;
+ slot->userdata = userdata;
+
+ return ret;
+}
+
+_public_ int sd_bus_slot_set_destroy_callback(sd_bus_slot *slot, sd_bus_destroy_t callback) {
+ assert_return(slot, -EINVAL);
+
+ slot->destroy_callback = callback;
+ return 0;
+}
+
+_public_ int sd_bus_slot_get_destroy_callback(sd_bus_slot *slot, sd_bus_destroy_t *callback) {
+ assert_return(slot, -EINVAL);
+
+ if (callback)
+ *callback = slot->destroy_callback;
+
+ return !!slot->destroy_callback;
+}
+
+_public_ sd_bus_message *sd_bus_slot_get_current_message(sd_bus_slot *slot) {
+ assert_return(slot, NULL);
+ assert_return(slot->type >= 0, NULL);
+
+ if (slot->bus->current_slot != slot)
+ return NULL;
+
+ return slot->bus->current_message;
+}
+
+_public_ sd_bus_message_handler_t sd_bus_slot_get_current_handler(sd_bus_slot *slot) {
+ assert_return(slot, NULL);
+ assert_return(slot->type >= 0, NULL);
+
+ if (slot->bus->current_slot != slot)
+ return NULL;
+
+ return slot->bus->current_handler;
+}
+
+_public_ void* sd_bus_slot_get_current_userdata(sd_bus_slot *slot) {
+ assert_return(slot, NULL);
+ assert_return(slot->type >= 0, NULL);
+
+ if (slot->bus->current_slot != slot)
+ return NULL;
+
+ return slot->bus->current_userdata;
+}
+
+_public_ int sd_bus_slot_get_floating(sd_bus_slot *slot) {
+ assert_return(slot, -EINVAL);
+
+ return slot->floating;
+}
+
+_public_ int sd_bus_slot_set_floating(sd_bus_slot *slot, int b) {
+ assert_return(slot, -EINVAL);
+
+ if (slot->floating == !!b)
+ return 0;
+
+ if (!slot->bus) /* already disconnected slots can't be reconnected */
+ return -ESTALE;
+
+ slot->floating = b;
+
+ /* When a slot is "floating" then the bus references the slot. Otherwise the slot references the bus. Hence,
+ * when we move from one to the other, let's increase one reference and decrease the other. */
+
+ if (b) {
+ sd_bus_slot_ref(slot);
+ sd_bus_unref(slot->bus);
+ } else {
+ sd_bus_ref(slot->bus);
+ sd_bus_slot_unref(slot);
+ }
+
+ return 1;
+}
+
+_public_ int sd_bus_slot_set_description(sd_bus_slot *slot, const char *description) {
+ assert_return(slot, -EINVAL);
+
+ return free_and_strdup(&slot->description, description);
+}
+
+_public_ int sd_bus_slot_get_description(sd_bus_slot *slot, const char **description) {
+ assert_return(slot, -EINVAL);
+ assert_return(description, -EINVAL);
+
+ if (slot->description)
+ *description = slot->description;
+ else if (slot->type == BUS_MATCH_CALLBACK)
+ *description = slot->match_callback.match_string;
+ else
+ return -ENXIO;
+
+ return 0;
+}
diff --git a/src/libsystemd/sd-bus/bus-slot.h b/src/libsystemd/sd-bus/bus-slot.h
new file mode 100644
index 0000000..8116195
--- /dev/null
+++ b/src/libsystemd/sd-bus/bus-slot.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "sd-bus.h"
+
+#include "bus-internal.h"
+
+sd_bus_slot *bus_slot_allocate(sd_bus *bus, bool floating, BusSlotType type, size_t extra, void *userdata);
+
+void bus_slot_disconnect(sd_bus_slot *slot, bool unref);
diff --git a/src/libsystemd/sd-bus/bus-socket.c b/src/libsystemd/sd-bus/bus-socket.c
new file mode 100644
index 0000000..5ade8e9
--- /dev/null
+++ b/src/libsystemd/sd-bus/bus-socket.c
@@ -0,0 +1,1428 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <endian.h>
+#include <poll.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include "sd-bus.h"
+#include "sd-daemon.h"
+
+#include "alloc-util.h"
+#include "bus-internal.h"
+#include "bus-message.h"
+#include "bus-socket.h"
+#include "escape.h"
+#include "fd-util.h"
+#include "format-util.h"
+#include "fs-util.h"
+#include "hexdecoct.h"
+#include "io-util.h"
+#include "iovec-util.h"
+#include "macro.h"
+#include "memory-util.h"
+#include "path-util.h"
+#include "process-util.h"
+#include "random-util.h"
+#include "signal-util.h"
+#include "stdio-util.h"
+#include "string-util.h"
+#include "user-util.h"
+#include "utf8.h"
+
+#define SNDBUF_SIZE (8*1024*1024)
+
+static void iovec_advance(struct iovec iov[], unsigned *idx, size_t size) {
+
+ while (size > 0) {
+ struct iovec *i = iov + *idx;
+
+ if (i->iov_len > size) {
+ i->iov_base = (uint8_t*) i->iov_base + size;
+ i->iov_len -= size;
+ return;
+ }
+
+ size -= i->iov_len;
+
+ *i = IOVEC_MAKE(NULL, 0);
+
+ (*idx)++;
+ }
+}
+
+static int append_iovec(sd_bus_message *m, const void *p, size_t sz) {
+ assert(m);
+ assert(p);
+ assert(sz > 0);
+
+ m->iovec[m->n_iovec++] = IOVEC_MAKE((void*) p, sz);
+
+ return 0;
+}
+
+static int bus_message_setup_iovec(sd_bus_message *m) {
+ struct bus_body_part *part;
+ unsigned n, i;
+ int r;
+
+ assert(m);
+ assert(m->sealed);
+
+ if (m->n_iovec > 0)
+ return 0;
+
+ assert(!m->iovec);
+
+ n = 1 + m->n_body_parts;
+ if (n < ELEMENTSOF(m->iovec_fixed))
+ m->iovec = m->iovec_fixed;
+ else {
+ m->iovec = new(struct iovec, n);
+ if (!m->iovec) {
+ r = -ENOMEM;
+ goto fail;
+ }
+ }
+
+ r = append_iovec(m, m->header, BUS_MESSAGE_BODY_BEGIN(m));
+ if (r < 0)
+ goto fail;
+
+ MESSAGE_FOREACH_PART(part, i, m) {
+ r = bus_body_part_map(part);
+ if (r < 0)
+ goto fail;
+
+ r = append_iovec(m, part->data, part->size);
+ if (r < 0)
+ goto fail;
+ }
+
+ assert(n == m->n_iovec);
+
+ return 0;
+
+fail:
+ m->poisoned = true;
+ return r;
+}
+
+bool bus_socket_auth_needs_write(sd_bus *b) {
+
+ unsigned i;
+
+ if (b->auth_index >= ELEMENTSOF(b->auth_iovec))
+ return false;
+
+ for (i = b->auth_index; i < ELEMENTSOF(b->auth_iovec); i++) {
+ struct iovec *j = b->auth_iovec + i;
+
+ if (j->iov_len > 0)
+ return true;
+ }
+
+ return false;
+}
+
+static int bus_socket_auth_verify_client(sd_bus *b) {
+ char *l, *lines[4] = {};
+ sd_id128_t peer;
+ size_t i, n;
+ int r;
+
+ assert(b);
+
+ /*
+ * We expect up to three response lines:
+ * "DATA\r\n" (optional)
+ * "OK <server-id>\r\n"
+ * "AGREE_UNIX_FD\r\n" (optional)
+ */
+
+ n = 0;
+ lines[n] = b->rbuffer;
+ for (i = 0; i < 3; ++i) {
+ l = memmem_safe(lines[n], b->rbuffer_size - (lines[n] - (char*) b->rbuffer), "\r\n", 2);
+ if (l)
+ lines[++n] = l + 2;
+ else
+ break;
+ }
+
+ /*
+ * If we sent a non-empty initial response, then we just expect an OK
+ * reply. We currently do this if, and only if, we picked ANONYMOUS.
+ * If we did not send an initial response, then we expect a DATA
+ * challenge, reply with our own DATA, and expect an OK reply. We do
+ * this for EXTERNAL.
+ * If FD negotiation was requested, we additionally expect
+ * an AGREE_UNIX_FD response in all cases.
+ */
+ if (n < (b->anonymous_auth ? 1U : 2U) + !!b->accept_fd)
+ return 0; /* wait for more data */
+
+ i = 0;
+
+ /* In case of EXTERNAL, verify the first response was DATA. */
+ if (!b->anonymous_auth) {
+ l = lines[i++];
+ if (lines[i] - l == 4 + 2) {
+ if (memcmp(l, "DATA", 4))
+ return -EPERM;
+ } else if (lines[i] - l == 3 + 32 + 2) {
+ /*
+ * Old versions of the server-side implementation of
+ * `sd-bus` replied with "OK <id>" to "AUTH" requests
+ * from a client, even if the "AUTH" line did not
+ * contain inlined arguments. Therefore, we also accept
+ * "OK <id>" here, even though it is technically the
+ * wrong reply. We ignore the "<id>" parameter, though,
+ * since it has no real value.
+ */
+ if (memcmp(l, "OK ", 3))
+ return -EPERM;
+ } else
+ return -EPERM;
+ }
+
+ /* Now check the OK line. */
+ l = lines[i++];
+
+ if (lines[i] - l != 3 + 32 + 2)
+ return -EPERM;
+ if (memcmp(l, "OK ", 3))
+ return -EPERM;
+
+ b->auth = b->anonymous_auth ? BUS_AUTH_ANONYMOUS : BUS_AUTH_EXTERNAL;
+
+ for (unsigned j = 0; j < 32; j += 2) {
+ int x, y;
+
+ x = unhexchar(l[3 + j]);
+ y = unhexchar(l[3 + j + 1]);
+
+ if (x < 0 || y < 0)
+ return -EINVAL;
+
+ peer.bytes[j/2] = ((uint8_t) x << 4 | (uint8_t) y);
+ }
+
+ if (!sd_id128_is_null(b->server_id) &&
+ !sd_id128_equal(b->server_id, peer))
+ return -EPERM;
+
+ b->server_id = peer;
+
+ /* And possibly check the third line, too */
+ if (b->accept_fd) {
+ l = lines[i++];
+ b->can_fds = !!memory_startswith(l, lines[i] - l, "AGREE_UNIX_FD");
+ }
+
+ assert(i == n);
+
+ b->rbuffer_size -= (lines[i] - (char*) b->rbuffer);
+ memmove(b->rbuffer, lines[i], b->rbuffer_size);
+
+ r = bus_start_running(b);
+ if (r < 0)
+ return r;
+
+ return 1;
+}
+
+static bool line_equals(const char *s, size_t m, const char *line) {
+ size_t l;
+
+ l = strlen(line);
+ if (l != m)
+ return false;
+
+ return memcmp(s, line, l) == 0;
+}
+
+static bool line_begins(const char *s, size_t m, const char *word) {
+ const char *p;
+
+ p = memory_startswith(s, m, word);
+ return p && (p == (s + m) || *p == ' ');
+}
+
+static int verify_anonymous_token(sd_bus *b, const char *p, size_t l) {
+ _cleanup_free_ char *token = NULL;
+ size_t len;
+ int r;
+
+ if (!b->anonymous_auth)
+ return 0;
+
+ if (l <= 0)
+ return 1;
+
+ assert(p[0] == ' ');
+ p++; l--;
+
+ if (l % 2 != 0)
+ return 0;
+
+ r = unhexmem(p, l, (void **) &token, &len);
+ if (r < 0)
+ return 0;
+
+ if (memchr(token, 0, len))
+ return 0;
+
+ return !!utf8_is_valid(token);
+}
+
+static int verify_external_token(sd_bus *b, const char *p, size_t l) {
+ _cleanup_free_ char *token = NULL;
+ size_t len;
+ uid_t u;
+ int r;
+
+ /* We don't do any real authentication here. Instead, if
+ * the owner of this bus wanted authentication they should have
+ * checked SO_PEERCRED before even creating the bus object. */
+
+ if (!b->anonymous_auth && !b->ucred_valid)
+ return 0;
+
+ if (l <= 0)
+ return 1;
+
+ assert(p[0] == ' ');
+ p++; l--;
+
+ if (l % 2 != 0)
+ return 0;
+
+ r = unhexmem(p, l, (void**) &token, &len);
+ if (r < 0)
+ return 0;
+
+ if (memchr(token, 0, len))
+ return 0;
+
+ r = parse_uid(token, &u);
+ if (r < 0)
+ return 0;
+
+ /* We ignore the passed value if anonymous authentication is
+ * on anyway. */
+ if (!b->anonymous_auth && u != b->ucred.uid)
+ return 0;
+
+ return 1;
+}
+
+static int bus_socket_auth_write(sd_bus *b, const char *t) {
+ char *p;
+ size_t l;
+
+ assert(b);
+ assert(t);
+
+ /* We only make use of the first iovec */
+ assert(IN_SET(b->auth_index, 0, 1));
+
+ l = strlen(t);
+ p = malloc(b->auth_iovec[0].iov_len + l);
+ if (!p)
+ return -ENOMEM;
+
+ memcpy_safe(p, b->auth_iovec[0].iov_base, b->auth_iovec[0].iov_len);
+ memcpy(p + b->auth_iovec[0].iov_len, t, l);
+
+ b->auth_iovec[0].iov_base = p;
+ b->auth_iovec[0].iov_len += l;
+
+ free_and_replace(b->auth_buffer, p);
+ b->auth_index = 0;
+ return 0;
+}
+
+static int bus_socket_auth_write_ok(sd_bus *b) {
+ char t[3 + 32 + 2 + 1];
+
+ assert(b);
+
+ xsprintf(t, "OK " SD_ID128_FORMAT_STR "\r\n", SD_ID128_FORMAT_VAL(b->server_id));
+
+ return bus_socket_auth_write(b, t);
+}
+
+static int bus_socket_auth_verify_server(sd_bus *b) {
+ char *e;
+ const char *line;
+ size_t l;
+ bool processed = false;
+ int r;
+
+ assert(b);
+
+ if (b->rbuffer_size < 1)
+ return 0;
+
+ /* First char must be a NUL byte */
+ if (*(char*) b->rbuffer != 0)
+ return -EIO;
+
+ if (b->rbuffer_size < 3)
+ return 0;
+
+ /* Begin with the first line */
+ if (b->auth_rbegin <= 0)
+ b->auth_rbegin = 1;
+
+ for (;;) {
+ /* Check if line is complete */
+ line = (char*) b->rbuffer + b->auth_rbegin;
+ e = memmem_safe(line, b->rbuffer_size - b->auth_rbegin, "\r\n", 2);
+ if (!e)
+ return processed;
+
+ l = e - line;
+
+ if (line_begins(line, l, "AUTH ANONYMOUS")) {
+
+ r = verify_anonymous_token(b,
+ line + strlen("AUTH ANONYMOUS"),
+ l - strlen("AUTH ANONYMOUS"));
+ if (r < 0)
+ return r;
+ if (r == 0)
+ r = bus_socket_auth_write(b, "REJECTED\r\n");
+ else {
+ b->auth = BUS_AUTH_ANONYMOUS;
+ if (l <= strlen("AUTH ANONYMOUS"))
+ r = bus_socket_auth_write(b, "DATA\r\n");
+ else
+ r = bus_socket_auth_write_ok(b);
+ }
+
+ } else if (line_begins(line, l, "AUTH EXTERNAL")) {
+
+ r = verify_external_token(b,
+ line + strlen("AUTH EXTERNAL"),
+ l - strlen("AUTH EXTERNAL"));
+ if (r < 0)
+ return r;
+ if (r == 0)
+ r = bus_socket_auth_write(b, "REJECTED\r\n");
+ else {
+ b->auth = BUS_AUTH_EXTERNAL;
+ if (l <= strlen("AUTH EXTERNAL"))
+ r = bus_socket_auth_write(b, "DATA\r\n");
+ else
+ r = bus_socket_auth_write_ok(b);
+ }
+
+ } else if (line_begins(line, l, "AUTH"))
+ r = bus_socket_auth_write(b, "REJECTED EXTERNAL ANONYMOUS\r\n");
+ else if (line_equals(line, l, "CANCEL") ||
+ line_begins(line, l, "ERROR")) {
+
+ b->auth = _BUS_AUTH_INVALID;
+ r = bus_socket_auth_write(b, "REJECTED\r\n");
+
+ } else if (line_equals(line, l, "BEGIN")) {
+
+ if (b->auth == _BUS_AUTH_INVALID)
+ r = bus_socket_auth_write(b, "ERROR\r\n");
+ else {
+ /* We can't leave from the auth phase
+ * before we haven't written
+ * everything queued, so let's check
+ * that */
+
+ if (bus_socket_auth_needs_write(b))
+ return 1;
+
+ b->rbuffer_size -= (e + 2 - (char*) b->rbuffer);
+ memmove(b->rbuffer, e + 2, b->rbuffer_size);
+ return bus_start_running(b);
+ }
+
+ } else if (line_begins(line, l, "DATA")) {
+
+ if (b->auth == _BUS_AUTH_INVALID)
+ r = bus_socket_auth_write(b, "ERROR\r\n");
+ else {
+ if (b->auth == BUS_AUTH_ANONYMOUS)
+ r = verify_anonymous_token(b, line + 4, l - 4);
+ else
+ r = verify_external_token(b, line + 4, l - 4);
+
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ b->auth = _BUS_AUTH_INVALID;
+ r = bus_socket_auth_write(b, "REJECTED\r\n");
+ } else
+ r = bus_socket_auth_write_ok(b);
+ }
+ } else if (line_equals(line, l, "NEGOTIATE_UNIX_FD")) {
+ if (b->auth == _BUS_AUTH_INVALID || !b->accept_fd)
+ r = bus_socket_auth_write(b, "ERROR\r\n");
+ else {
+ b->can_fds = true;
+ r = bus_socket_auth_write(b, "AGREE_UNIX_FD\r\n");
+ }
+ } else
+ r = bus_socket_auth_write(b, "ERROR\r\n");
+
+ if (r < 0)
+ return r;
+
+ b->auth_rbegin = e + 2 - (char*) b->rbuffer;
+
+ processed = true;
+ }
+}
+
+static int bus_socket_auth_verify(sd_bus *b) {
+ assert(b);
+
+ if (b->is_server)
+ return bus_socket_auth_verify_server(b);
+ else
+ return bus_socket_auth_verify_client(b);
+}
+
+static int bus_socket_write_auth(sd_bus *b) {
+ ssize_t k;
+
+ assert(b);
+ assert(b->state == BUS_AUTHENTICATING);
+
+ if (!bus_socket_auth_needs_write(b))
+ return 0;
+
+ if (b->prefer_writev)
+ k = writev(b->output_fd, b->auth_iovec + b->auth_index, ELEMENTSOF(b->auth_iovec) - b->auth_index);
+ else {
+ struct msghdr mh = {
+ .msg_iov = b->auth_iovec + b->auth_index,
+ .msg_iovlen = ELEMENTSOF(b->auth_iovec) - b->auth_index,
+ };
+
+ k = sendmsg(b->output_fd, &mh, MSG_DONTWAIT|MSG_NOSIGNAL);
+ if (k < 0 && errno == ENOTSOCK) {
+ b->prefer_writev = true;
+ k = writev(b->output_fd, b->auth_iovec + b->auth_index, ELEMENTSOF(b->auth_iovec) - b->auth_index);
+ }
+ }
+
+ if (k < 0)
+ return ERRNO_IS_TRANSIENT(errno) ? 0 : -errno;
+
+ iovec_advance(b->auth_iovec, &b->auth_index, (size_t) k);
+
+ /* Now crank the state machine since we might be able to make progress after writing. For example,
+ * the server only processes "BEGIN" when the write buffer is empty.
+ */
+ return bus_socket_auth_verify(b);
+}
+
+static int bus_socket_read_auth(sd_bus *b) {
+ struct msghdr mh;
+ struct iovec iov = {};
+ size_t n;
+ ssize_t k;
+ int r;
+ void *p;
+ CMSG_BUFFER_TYPE(CMSG_SPACE(sizeof(int) * BUS_FDS_MAX)) control;
+ bool handle_cmsg = false;
+
+ assert(b);
+ assert(b->state == BUS_AUTHENTICATING);
+
+ r = bus_socket_auth_verify(b);
+ if (r != 0)
+ return r;
+
+ n = MAX(256u, b->rbuffer_size * 2);
+
+ if (n > BUS_AUTH_SIZE_MAX)
+ n = BUS_AUTH_SIZE_MAX;
+
+ if (b->rbuffer_size >= n)
+ return -ENOBUFS;
+
+ p = realloc(b->rbuffer, n);
+ if (!p)
+ return -ENOMEM;
+
+ b->rbuffer = p;
+
+ iov = IOVEC_MAKE((uint8_t *)b->rbuffer + b->rbuffer_size, n - b->rbuffer_size);
+
+ if (b->prefer_readv) {
+ k = readv(b->input_fd, &iov, 1);
+ if (k < 0)
+ k = -errno;
+ } else {
+ mh = (struct msghdr) {
+ .msg_iov = &iov,
+ .msg_iovlen = 1,
+ .msg_control = &control,
+ .msg_controllen = sizeof(control),
+ };
+
+ k = recvmsg_safe(b->input_fd, &mh, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
+ if (k == -ENOTSOCK) {
+ b->prefer_readv = true;
+ k = readv(b->input_fd, &iov, 1);
+ if (k < 0)
+ k = -errno;
+ } else
+ handle_cmsg = true;
+ }
+ if (ERRNO_IS_NEG_TRANSIENT(k))
+ return 0;
+ if (k < 0)
+ return (int) k;
+ if (k == 0) {
+ if (handle_cmsg)
+ cmsg_close_all(&mh); /* paranoia, we shouldn't have gotten any fds on EOF */
+ return -ECONNRESET;
+ }
+
+ b->rbuffer_size += k;
+
+ if (handle_cmsg) {
+ struct cmsghdr *cmsg;
+
+ CMSG_FOREACH(cmsg, &mh)
+ if (cmsg->cmsg_level == SOL_SOCKET &&
+ cmsg->cmsg_type == SCM_RIGHTS) {
+ int j;
+
+ /* Whut? We received fds during the auth
+ * protocol? Somebody is playing games with
+ * us. Close them all, and fail */
+ j = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
+ close_many(CMSG_TYPED_DATA(cmsg, int), j);
+ return -EIO;
+ } else
+ log_debug("Got unexpected auxiliary data with level=%d and type=%d",
+ cmsg->cmsg_level, cmsg->cmsg_type);
+ }
+
+ r = bus_socket_auth_verify(b);
+ if (r != 0)
+ return r;
+
+ return 1;
+}
+
+void bus_socket_setup(sd_bus *b) {
+ assert(b);
+
+ /* Increase the buffers to 8 MB */
+ (void) fd_increase_rxbuf(b->input_fd, SNDBUF_SIZE);
+ (void) fd_inc_sndbuf(b->output_fd, SNDBUF_SIZE);
+
+ b->message_version = 1;
+ b->message_endian = 0;
+}
+
+static void bus_get_peercred(sd_bus *b) {
+ int r;
+
+ assert(b);
+ assert(!b->ucred_valid);
+ assert(!b->label);
+ assert(b->n_groups == SIZE_MAX);
+
+ /* Get the peer for socketpair() sockets */
+ b->ucred_valid = getpeercred(b->input_fd, &b->ucred) >= 0;
+
+ /* Get the SELinux context of the peer */
+ r = getpeersec(b->input_fd, &b->label);
+ if (r < 0 && !IN_SET(r, -EOPNOTSUPP, -ENOPROTOOPT))
+ log_debug_errno(r, "Failed to determine peer security context: %m");
+
+ /* Get the list of auxiliary groups of the peer */
+ r = getpeergroups(b->input_fd, &b->groups);
+ if (r >= 0)
+ b->n_groups = (size_t) r;
+ else if (!IN_SET(r, -EOPNOTSUPP, -ENOPROTOOPT))
+ log_debug_errno(r, "Failed to determine peer's group list: %m");
+
+ /* Let's query the peers socket address, it might carry information such as the peer's comm or
+ * description string */
+ zero(b->sockaddr_peer);
+ b->sockaddr_size_peer = 0;
+
+ socklen_t l = sizeof(b->sockaddr_peer) - 1; /* Leave space for a NUL */
+ if (getpeername(b->input_fd, &b->sockaddr_peer.sa, &l) < 0)
+ log_debug_errno(errno, "Failed to get peer's socket address, ignoring: %m");
+ else
+ b->sockaddr_size_peer = l;
+}
+
+static int bus_socket_start_auth_client(sd_bus *b) {
+ static const char sasl_auth_anonymous[] = {
+ /*
+ * We use an arbitrary trace-string for the ANONYMOUS authentication. It can be used by the
+ * message broker to aid debugging of clients. We fully anonymize the connection and use a
+ * static default.
+ */
+ /* HEX a n o n y m o u s */
+ "\0AUTH ANONYMOUS 616e6f6e796d6f7573\r\n"
+ };
+ static const char sasl_auth_external[] = {
+ "\0AUTH EXTERNAL\r\n"
+ "DATA\r\n"
+ };
+ static const char sasl_negotiate_unix_fd[] = {
+ "NEGOTIATE_UNIX_FD\r\n"
+ };
+ static const char sasl_begin[] = {
+ "BEGIN\r\n"
+ };
+ size_t i = 0;
+
+ assert(b);
+
+ if (b->anonymous_auth)
+ b->auth_iovec[i++] = IOVEC_MAKE((char*) sasl_auth_anonymous, sizeof(sasl_auth_anonymous) - 1);
+ else
+ b->auth_iovec[i++] = IOVEC_MAKE((char*) sasl_auth_external, sizeof(sasl_auth_external) - 1);
+
+ if (b->accept_fd)
+ b->auth_iovec[i++] = IOVEC_MAKE_STRING(sasl_negotiate_unix_fd);
+
+ b->auth_iovec[i++] = IOVEC_MAKE_STRING(sasl_begin);
+
+ return bus_socket_write_auth(b);
+}
+
+int bus_socket_start_auth(sd_bus *b) {
+ assert(b);
+
+ bus_get_peercred(b);
+
+ bus_set_state(b, BUS_AUTHENTICATING);
+ b->auth_timeout = now(CLOCK_MONOTONIC) + BUS_AUTH_TIMEOUT;
+
+ if (sd_is_socket(b->input_fd, AF_UNIX, 0, 0) <= 0)
+ b->accept_fd = false;
+
+ if (b->output_fd != b->input_fd)
+ if (sd_is_socket(b->output_fd, AF_UNIX, 0, 0) <= 0)
+ b->accept_fd = false;
+
+ if (b->is_server)
+ return bus_socket_read_auth(b);
+ else
+ return bus_socket_start_auth_client(b);
+}
+
+static int bus_socket_inotify_setup(sd_bus *b) {
+ _cleanup_free_ int *new_watches = NULL;
+ _cleanup_free_ char *absolute = NULL;
+ size_t n = 0, done = 0, i;
+ unsigned max_follow = 32;
+ const char *p;
+ int wd, r;
+
+ assert(b);
+ assert(b->watch_bind);
+ assert(b->sockaddr.sa.sa_family == AF_UNIX);
+ assert(b->sockaddr.un.sun_path[0] != 0);
+
+ /* Sets up an inotify fd in case watch_bind is enabled: wait until the configured AF_UNIX file system
+ * socket appears before connecting to it. The implemented is pretty simplistic: we just subscribe to
+ * relevant changes to all components of the path, and every time we get an event for that we try to
+ * reconnect again, without actually caring what precisely the event we got told us. If we still
+ * can't connect we re-subscribe to all relevant changes of anything in the path, so that our watches
+ * include any possibly newly created path components. */
+
+ if (b->inotify_fd < 0) {
+ b->inotify_fd = inotify_init1(IN_NONBLOCK|IN_CLOEXEC);
+ if (b->inotify_fd < 0)
+ return -errno;
+
+ b->inotify_fd = fd_move_above_stdio(b->inotify_fd);
+ }
+
+ /* Make sure the path is NUL terminated */
+ p = strndupa_safe(b->sockaddr.un.sun_path,
+ sizeof(b->sockaddr.un.sun_path));
+
+ /* Make sure the path is absolute */
+ r = path_make_absolute_cwd(p, &absolute);
+ if (r < 0)
+ goto fail;
+
+ /* Watch all components of the path, and don't mind any prefix that doesn't exist yet. For the
+ * innermost directory that exists we want to know when files are created or moved into it. For all
+ * parents of it we just care if they are removed or renamed. */
+
+ if (!GREEDY_REALLOC(new_watches, n + 1)) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ /* Start with the top-level directory, which is a bit simpler than the rest, since it can't be a
+ * symlink, and always exists */
+ wd = inotify_add_watch(b->inotify_fd, "/", IN_CREATE|IN_MOVED_TO);
+ if (wd < 0) {
+ r = log_debug_errno(errno, "Failed to add inotify watch on /: %m");
+ goto fail;
+ } else
+ new_watches[n++] = wd;
+
+ for (;;) {
+ _cleanup_free_ char *component = NULL, *prefix = NULL, *destination = NULL;
+ size_t n_slashes, n_component;
+ char *c = NULL;
+
+ n_slashes = strspn(absolute + done, "/");
+ n_component = n_slashes + strcspn(absolute + done + n_slashes, "/");
+
+ if (n_component == 0) /* The end */
+ break;
+
+ component = strndup(absolute + done, n_component);
+ if (!component) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ /* A trailing slash? That's a directory, and not a socket then */
+ if (path_equal(component, "/")) {
+ r = -EISDIR;
+ goto fail;
+ }
+
+ /* A single dot? Let's eat this up */
+ if (path_equal(component, "/.")) {
+ done += n_component;
+ continue;
+ }
+
+ prefix = strndup(absolute, done + n_component);
+ if (!prefix) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ if (!GREEDY_REALLOC(new_watches, n + 1)) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ wd = inotify_add_watch(b->inotify_fd, prefix, IN_DELETE_SELF|IN_MOVE_SELF|IN_ATTRIB|IN_CREATE|IN_MOVED_TO|IN_DONT_FOLLOW);
+ log_debug("Added inotify watch for %s on bus %s: %i", prefix, strna(b->description), wd);
+
+ if (wd < 0) {
+ if (IN_SET(errno, ENOENT, ELOOP))
+ break; /* This component doesn't exist yet, or the path contains a cyclic symlink right now */
+
+ r = log_debug_errno(errno, "Failed to add inotify watch on %s: %m", empty_to_root(prefix));
+ goto fail;
+ } else
+ new_watches[n++] = wd;
+
+ /* Check if this is possibly a symlink. If so, let's follow it and watch it too. */
+ r = readlink_malloc(prefix, &destination);
+ if (r == -EINVAL) { /* not a symlink */
+ done += n_component;
+ continue;
+ }
+ if (r < 0)
+ goto fail;
+
+ if (isempty(destination)) { /* Empty symlink target? Yuck! */
+ r = -EINVAL;
+ goto fail;
+ }
+
+ if (max_follow <= 0) { /* Let's make sure we don't follow symlinks forever */
+ r = -ELOOP;
+ goto fail;
+ }
+
+ if (path_is_absolute(destination)) {
+ /* For absolute symlinks we build the new path and start anew */
+ c = strjoin(destination, absolute + done + n_component);
+ done = 0;
+ } else {
+ _cleanup_free_ char *t = NULL;
+
+ /* For relative symlinks we replace the last component, and try again */
+ t = strndup(absolute, done);
+ if (!t)
+ return -ENOMEM;
+
+ c = strjoin(t, "/", destination, absolute + done + n_component);
+ }
+ if (!c) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ free_and_replace(absolute, c);
+
+ max_follow--;
+ }
+
+ /* And now, let's remove all watches from the previous iteration we don't need anymore */
+ for (i = 0; i < b->n_inotify_watches; i++) {
+ bool found = false;
+ size_t j;
+
+ for (j = 0; j < n; j++)
+ if (new_watches[j] == b->inotify_watches[i]) {
+ found = true;
+ break;
+ }
+
+ if (found)
+ continue;
+
+ (void) inotify_rm_watch(b->inotify_fd, b->inotify_watches[i]);
+ }
+
+ free_and_replace(b->inotify_watches, new_watches);
+ b->n_inotify_watches = n;
+
+ return 0;
+
+fail:
+ bus_close_inotify_fd(b);
+ return r;
+}
+
+static int bind_description(sd_bus *b, int fd, int family) {
+ _cleanup_free_ char *bind_name = NULL, *comm = NULL;
+ union sockaddr_union bsa;
+ const char *d = NULL;
+ int r;
+
+ assert(b);
+ assert(fd >= 0);
+
+ /* If this is an AF_UNIX socket, let's set our client's socket address to carry the description
+ * string for this bus connection. This is useful for debugging things, as the connection name is
+ * visible in various socket-related tools, and can even be queried by the server side. */
+
+ if (family != AF_UNIX)
+ return 0;
+
+ (void) sd_bus_get_description(b, &d);
+
+ /* Generate a recognizable source address in the abstract namespace. We'll include:
+ * - a random 64-bit value (to avoid collisions)
+ * - our "comm" process name (suppressed if contains "/" to avoid parsing issues)
+ * - the description string of the bus connection. */
+ (void) pid_get_comm(0, &comm);
+ if (comm && strchr(comm, '/'))
+ comm = mfree(comm);
+
+ if (!d && !comm) /* skip if we don't have either field, rely on kernel autobind instead */
+ return 0;
+
+ if (asprintf(&bind_name, "@%" PRIx64 "/bus/%s/%s", random_u64(), strempty(comm), strempty(d)) < 0)
+ return -ENOMEM;
+
+ strshorten(bind_name, sizeof_field(struct sockaddr_un, sun_path));
+
+ r = sockaddr_un_set_path(&bsa.un, bind_name);
+ if (r < 0)
+ return r;
+
+ if (bind(fd, &bsa.sa, r) < 0)
+ return -errno;
+
+ return 0;
+}
+
+int bus_socket_connect(sd_bus *b) {
+ bool inotify_done = false;
+ int r;
+
+ assert(b);
+
+ for (;;) {
+ assert(b->input_fd < 0);
+ assert(b->output_fd < 0);
+ assert(b->sockaddr.sa.sa_family != AF_UNSPEC);
+
+ if (DEBUG_LOGGING) {
+ _cleanup_free_ char *pretty = NULL;
+ (void) sockaddr_pretty(&b->sockaddr.sa, b->sockaddr_size, false, true, &pretty);
+ log_debug("sd-bus: starting bus%s%s by connecting to %s...",
+ b->description ? " " : "", strempty(b->description), strnull(pretty));
+ }
+
+ b->input_fd = socket(b->sockaddr.sa.sa_family, SOCK_STREAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
+ if (b->input_fd < 0)
+ return -errno;
+
+ r = bind_description(b, b->input_fd, b->sockaddr.sa.sa_family);
+ if (r < 0)
+ return r;
+
+ b->input_fd = fd_move_above_stdio(b->input_fd);
+
+ b->output_fd = b->input_fd;
+ bus_socket_setup(b);
+
+ if (connect(b->input_fd, &b->sockaddr.sa, b->sockaddr_size) < 0) {
+ if (errno == EINPROGRESS) {
+
+ /* If we have any inotify watches open, close them now, we don't need them anymore, as
+ * we have successfully initiated a connection */
+ bus_close_inotify_fd(b);
+
+ /* Note that very likely we are already in BUS_OPENING state here, as we enter it when
+ * we start parsing the address string. The only reason we set the state explicitly
+ * here, is to undo BUS_WATCH_BIND, in case we did the inotify magic. */
+ bus_set_state(b, BUS_OPENING);
+ return 1;
+ }
+
+ if (IN_SET(errno, ENOENT, ECONNREFUSED) && /* ENOENT → unix socket doesn't exist at all; ECONNREFUSED → unix socket stale */
+ b->watch_bind &&
+ b->sockaddr.sa.sa_family == AF_UNIX &&
+ b->sockaddr.un.sun_path[0] != 0) {
+
+ /* This connection attempt failed, let's release the socket for now, and start with a
+ * fresh one when reconnecting. */
+ bus_close_io_fds(b);
+
+ if (inotify_done) {
+ /* inotify set up already, don't do it again, just return now, and remember
+ * that we are waiting for inotify events now. */
+ bus_set_state(b, BUS_WATCH_BIND);
+ return 1;
+ }
+
+ /* This is a file system socket, and the inotify logic is enabled. Let's create the necessary inotify fd. */
+ r = bus_socket_inotify_setup(b);
+ if (r < 0)
+ return r;
+
+ /* Let's now try to connect a second time, because in theory there's otherwise a race
+ * here: the socket might have been created in the time between our first connect() and
+ * the time we set up the inotify logic. But let's remember that we set up inotify now,
+ * so that we don't do the connect() more than twice. */
+ inotify_done = true;
+
+ } else
+ return -errno;
+ } else
+ break;
+ }
+
+ /* Yay, established, we don't need no inotify anymore! */
+ bus_close_inotify_fd(b);
+
+ return bus_socket_start_auth(b);
+}
+
+int bus_socket_exec(sd_bus *b) {
+ int s[2], r;
+
+ assert(b);
+ assert(b->input_fd < 0);
+ assert(b->output_fd < 0);
+ assert(b->exec_path);
+ assert(b->busexec_pid == 0);
+
+ if (DEBUG_LOGGING) {
+ _cleanup_free_ char *line = NULL;
+
+ if (b->exec_argv)
+ line = quote_command_line(b->exec_argv, SHELL_ESCAPE_EMPTY);
+
+ log_debug("sd-bus: starting bus%s%s with %s%s",
+ b->description ? " " : "", strempty(b->description),
+ line ?: b->exec_path,
+ b->exec_argv && !line ? "…" : "");
+ }
+
+ r = socketpair(AF_UNIX, SOCK_STREAM|SOCK_NONBLOCK|SOCK_CLOEXEC, 0, s);
+ if (r < 0)
+ return -errno;
+
+ r = safe_fork_full("(sd-busexec)",
+ (int[]) { s[1], s[1], STDERR_FILENO },
+ NULL, 0,
+ FORK_RESET_SIGNALS|FORK_CLOSE_ALL_FDS|FORK_REARRANGE_STDIO|FORK_RLIMIT_NOFILE_SAFE, &b->busexec_pid);
+ if (r < 0) {
+ safe_close_pair(s);
+ return r;
+ }
+ if (r == 0) {
+ /* Child */
+
+ if (b->exec_argv)
+ execvp(b->exec_path, b->exec_argv);
+ else
+ execvp(b->exec_path, STRV_MAKE(b->exec_path));
+
+ _exit(EXIT_FAILURE);
+ }
+
+ safe_close(s[1]);
+ b->output_fd = b->input_fd = fd_move_above_stdio(s[0]);
+
+ bus_socket_setup(b);
+
+ return bus_socket_start_auth(b);
+}
+
+int bus_socket_take_fd(sd_bus *b) {
+ assert(b);
+
+ bus_socket_setup(b);
+
+ return bus_socket_start_auth(b);
+}
+
+int bus_socket_write_message(sd_bus *bus, sd_bus_message *m, size_t *idx) {
+ struct iovec *iov;
+ ssize_t k;
+ size_t n;
+ unsigned j;
+ int r;
+
+ assert(bus);
+ assert(m);
+ assert(idx);
+ assert(IN_SET(bus->state, BUS_RUNNING, BUS_HELLO));
+
+ if (*idx >= BUS_MESSAGE_SIZE(m))
+ return 0;
+
+ r = bus_message_setup_iovec(m);
+ if (r < 0)
+ return r;
+
+ n = m->n_iovec * sizeof(struct iovec);
+ iov = newa(struct iovec, n);
+ memcpy_safe(iov, m->iovec, n);
+
+ j = 0;
+ iovec_advance(iov, &j, *idx);
+
+ if (bus->prefer_writev)
+ k = writev(bus->output_fd, iov, m->n_iovec);
+ else {
+ struct msghdr mh = {
+ .msg_iov = iov,
+ .msg_iovlen = m->n_iovec,
+ };
+
+ if (m->n_fds > 0 && *idx == 0) {
+ struct cmsghdr *control;
+
+ mh.msg_controllen = CMSG_SPACE(sizeof(int) * m->n_fds);
+ mh.msg_control = alloca0(mh.msg_controllen);
+ control = CMSG_FIRSTHDR(&mh);
+ control->cmsg_len = CMSG_LEN(sizeof(int) * m->n_fds);
+ control->cmsg_level = SOL_SOCKET;
+ control->cmsg_type = SCM_RIGHTS;
+ memcpy(CMSG_DATA(control), m->fds, sizeof(int) * m->n_fds);
+ }
+
+ k = sendmsg(bus->output_fd, &mh, MSG_DONTWAIT|MSG_NOSIGNAL);
+ if (k < 0 && errno == ENOTSOCK) {
+ bus->prefer_writev = true;
+ k = writev(bus->output_fd, iov, m->n_iovec);
+ }
+ }
+
+ if (k < 0)
+ return ERRNO_IS_TRANSIENT(errno) ? 0 : -errno;
+
+ *idx += (size_t) k;
+ return 1;
+}
+
+static int bus_socket_read_message_need(sd_bus *bus, size_t *need) {
+ uint32_t a, b;
+ uint8_t e;
+ uint64_t sum;
+
+ assert(bus);
+ assert(need);
+ assert(IN_SET(bus->state, BUS_RUNNING, BUS_HELLO));
+
+ if (bus->rbuffer_size < sizeof(struct bus_header)) {
+ *need = sizeof(struct bus_header) + 8;
+
+ /* Minimum message size:
+ *
+ * Header +
+ *
+ * Method Call: +2 string headers
+ * Signal: +3 string headers
+ * Method Error: +1 string headers
+ * +1 uint32 headers
+ * Method Reply: +1 uint32 headers
+ *
+ * A string header is at least 9 bytes
+ * A uint32 header is at least 8 bytes
+ *
+ * Hence the minimum message size of a valid message
+ * is header + 8 bytes */
+
+ return 0;
+ }
+
+ a = ((const uint32_t*) bus->rbuffer)[1];
+ b = ((const uint32_t*) bus->rbuffer)[3];
+
+ e = ((const uint8_t*) bus->rbuffer)[0];
+ if (e == BUS_LITTLE_ENDIAN) {
+ a = le32toh(a);
+ b = le32toh(b);
+ } else if (e == BUS_BIG_ENDIAN) {
+ a = be32toh(a);
+ b = be32toh(b);
+ } else
+ return -EBADMSG;
+
+ sum = (uint64_t) sizeof(struct bus_header) + (uint64_t) ALIGN8(b) + (uint64_t) a;
+ if (sum >= BUS_MESSAGE_SIZE_MAX)
+ return -ENOBUFS;
+
+ *need = (size_t) sum;
+ return 0;
+}
+
+static int bus_socket_make_message(sd_bus *bus, size_t size) {
+ sd_bus_message *t = NULL;
+ void *b;
+ int r;
+
+ assert(bus);
+ assert(bus->rbuffer_size >= size);
+ assert(IN_SET(bus->state, BUS_RUNNING, BUS_HELLO));
+
+ r = bus_rqueue_make_room(bus);
+ if (r < 0)
+ return r;
+
+ if (bus->rbuffer_size > size) {
+ b = memdup((const uint8_t*) bus->rbuffer + size,
+ bus->rbuffer_size - size);
+ if (!b)
+ return -ENOMEM;
+ } else
+ b = NULL;
+
+ r = bus_message_from_malloc(bus,
+ bus->rbuffer, size,
+ bus->fds, bus->n_fds,
+ NULL,
+ &t);
+ if (r == -EBADMSG) {
+ log_debug_errno(r, "Received invalid message from connection %s, dropping.", strna(bus->description));
+ free(bus->rbuffer); /* We want to drop current rbuffer and proceed with whatever remains in b */
+ } else if (r < 0) {
+ free(b);
+ return r;
+ }
+
+ /* rbuffer ownership was either transferred to t, or we got EBADMSG and dropped it. */
+ bus->rbuffer = b;
+ bus->rbuffer_size -= size;
+
+ bus->fds = NULL;
+ bus->n_fds = 0;
+
+ if (t) {
+ t->read_counter = ++bus->read_counter;
+ bus->rqueue[bus->rqueue_size++] = bus_message_ref_queued(t, bus);
+ sd_bus_message_unref(t);
+ }
+
+ return 1;
+}
+
+int bus_socket_read_message(sd_bus *bus) {
+ struct msghdr mh;
+ struct iovec iov = {};
+ ssize_t k;
+ size_t need;
+ int r;
+ void *b;
+ CMSG_BUFFER_TYPE(CMSG_SPACE(sizeof(int) * BUS_FDS_MAX)) control;
+ bool handle_cmsg = false;
+
+ assert(bus);
+ assert(IN_SET(bus->state, BUS_RUNNING, BUS_HELLO));
+
+ r = bus_socket_read_message_need(bus, &need);
+ if (r < 0)
+ return r;
+
+ if (bus->rbuffer_size >= need)
+ return bus_socket_make_message(bus, need);
+
+ b = realloc(bus->rbuffer, need);
+ if (!b)
+ return -ENOMEM;
+
+ bus->rbuffer = b;
+
+ iov = IOVEC_MAKE((uint8_t *)bus->rbuffer + bus->rbuffer_size, need - bus->rbuffer_size);
+
+ if (bus->prefer_readv) {
+ k = readv(bus->input_fd, &iov, 1);
+ if (k < 0)
+ k = -errno;
+ } else {
+ mh = (struct msghdr) {
+ .msg_iov = &iov,
+ .msg_iovlen = 1,
+ .msg_control = &control,
+ .msg_controllen = sizeof(control),
+ };
+
+ k = recvmsg_safe(bus->input_fd, &mh, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
+ if (k == -ENOTSOCK) {
+ bus->prefer_readv = true;
+ k = readv(bus->input_fd, &iov, 1);
+ if (k < 0)
+ k = -errno;
+ } else
+ handle_cmsg = true;
+ }
+ if (ERRNO_IS_NEG_TRANSIENT(k))
+ return 0;
+ if (k < 0)
+ return (int) k;
+ if (k == 0) {
+ if (handle_cmsg)
+ cmsg_close_all(&mh); /* On EOF we shouldn't have gotten an fd, but let's make sure */
+ return -ECONNRESET;
+ }
+
+ bus->rbuffer_size += k;
+
+ if (handle_cmsg) {
+ struct cmsghdr *cmsg;
+
+ CMSG_FOREACH(cmsg, &mh)
+ if (cmsg->cmsg_level == SOL_SOCKET &&
+ cmsg->cmsg_type == SCM_RIGHTS) {
+ int n, *f, i;
+
+ n = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
+
+ if (!bus->can_fds) {
+ /* Whut? We received fds but this
+ * isn't actually enabled? Close them,
+ * and fail */
+
+ close_many(CMSG_TYPED_DATA(cmsg, int), n);
+ return -EIO;
+ }
+
+ f = reallocarray(bus->fds, bus->n_fds + n, sizeof(int));
+ if (!f) {
+ close_many(CMSG_TYPED_DATA(cmsg, int), n);
+ return -ENOMEM;
+ }
+
+ for (i = 0; i < n; i++)
+ f[bus->n_fds++] = fd_move_above_stdio(CMSG_TYPED_DATA(cmsg, int)[i]);
+ bus->fds = f;
+ } else
+ log_debug("Got unexpected auxiliary data with level=%d and type=%d",
+ cmsg->cmsg_level, cmsg->cmsg_type);
+ }
+
+ r = bus_socket_read_message_need(bus, &need);
+ if (r < 0)
+ return r;
+
+ if (bus->rbuffer_size >= need)
+ return bus_socket_make_message(bus, need);
+
+ return 1;
+}
+
+int bus_socket_process_opening(sd_bus *b) {
+ int error = 0, events, r;
+ socklen_t slen = sizeof(error);
+
+ assert(b->state == BUS_OPENING);
+
+ events = fd_wait_for_event(b->output_fd, POLLOUT, 0);
+ if (ERRNO_IS_NEG_TRANSIENT(events))
+ return 0;
+ if (events < 0)
+ return events;
+ if (!(events & (POLLOUT|POLLERR|POLLHUP)))
+ return 0;
+
+ r = getsockopt(b->output_fd, SOL_SOCKET, SO_ERROR, &error, &slen);
+ if (r < 0)
+ b->last_connect_error = errno;
+ else if (error != 0)
+ b->last_connect_error = error;
+ else if (events & (POLLERR|POLLHUP))
+ b->last_connect_error = ECONNREFUSED;
+ else
+ return bus_socket_start_auth(b);
+
+ return bus_next_address(b);
+}
+
+int bus_socket_process_authenticating(sd_bus *b) {
+ int r;
+
+ assert(b);
+ assert(b->state == BUS_AUTHENTICATING);
+
+ if (now(CLOCK_MONOTONIC) >= b->auth_timeout)
+ return -ETIMEDOUT;
+
+ r = bus_socket_write_auth(b);
+ if (r != 0)
+ return r;
+
+ return bus_socket_read_auth(b);
+}
+
+int bus_socket_process_watch_bind(sd_bus *b) {
+ int r, q;
+
+ assert(b);
+ assert(b->state == BUS_WATCH_BIND);
+ assert(b->inotify_fd >= 0);
+
+ r = flush_fd(b->inotify_fd);
+ if (r <= 0)
+ return r;
+
+ log_debug("Got inotify event on bus %s.", strna(b->description));
+
+ /* We flushed events out of the inotify fd. In that case, maybe the socket is valid now? Let's try to connect
+ * to it again */
+
+ r = bus_socket_connect(b);
+ if (r < 0)
+ return r;
+
+ q = bus_attach_io_events(b);
+ if (q < 0)
+ return q;
+
+ q = bus_attach_inotify_event(b);
+ if (q < 0)
+ return q;
+
+ return r;
+}
diff --git a/src/libsystemd/sd-bus/bus-socket.h b/src/libsystemd/sd-bus/bus-socket.h
new file mode 100644
index 0000000..52bc404
--- /dev/null
+++ b/src/libsystemd/sd-bus/bus-socket.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "sd-bus.h"
+
+void bus_socket_setup(sd_bus *b);
+
+int bus_socket_connect(sd_bus *b);
+int bus_socket_exec(sd_bus *b);
+int bus_socket_take_fd(sd_bus *b);
+int bus_socket_start_auth(sd_bus *b);
+
+int bus_socket_write_message(sd_bus *bus, sd_bus_message *m, size_t *idx);
+int bus_socket_read_message(sd_bus *bus);
+
+int bus_socket_process_opening(sd_bus *b);
+int bus_socket_process_authenticating(sd_bus *b);
+int bus_socket_process_watch_bind(sd_bus *b);
+
+bool bus_socket_auth_needs_write(sd_bus *b);
diff --git a/src/libsystemd/sd-bus/bus-track.c b/src/libsystemd/sd-bus/bus-track.c
new file mode 100644
index 0000000..f9c59a1
--- /dev/null
+++ b/src/libsystemd/sd-bus/bus-track.c
@@ -0,0 +1,495 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "sd-bus.h"
+
+#include "alloc-util.h"
+#include "bus-internal.h"
+#include "bus-track.h"
+#include "string-util.h"
+
+struct track_item {
+ unsigned n_ref;
+ char *name;
+ sd_bus_slot *slot;
+};
+
+struct sd_bus_track {
+ unsigned n_ref;
+ unsigned n_adding; /* are we in the process of adding a new name? */
+ sd_bus *bus;
+ sd_bus_track_handler_t handler;
+ void *userdata;
+ Hashmap *names;
+ LIST_FIELDS(sd_bus_track, queue);
+ Iterator iterator;
+ bool in_list:1; /* In bus->tracks? */
+ bool in_queue:1; /* In bus->track_queue? */
+ bool modified:1;
+ bool recursive:1;
+ sd_bus_destroy_t destroy_callback;
+
+ LIST_FIELDS(sd_bus_track, tracks);
+};
+
+#define MATCH_FOR_NAME(name) \
+ strjoina("type='signal'," \
+ "sender='org.freedesktop.DBus'," \
+ "path='/org/freedesktop/DBus'," \
+ "interface='org.freedesktop.DBus'," \
+ "member='NameOwnerChanged'," \
+ "arg0='", name, "'")
+
+static struct track_item* track_item_free(struct track_item *i) {
+ if (!i)
+ return NULL;
+
+ sd_bus_slot_unref(i->slot);
+ free(i->name);
+ return mfree(i);
+}
+
+DEFINE_PRIVATE_TRIVIAL_UNREF_FUNC(struct track_item, track_item, track_item_free);
+DEFINE_TRIVIAL_CLEANUP_FUNC(struct track_item*, track_item_unref);
+DEFINE_PRIVATE_HASH_OPS_WITH_VALUE_DESTRUCTOR(track_item_hash_ops, char, string_hash_func, string_compare_func,
+ struct track_item, track_item_free);
+
+static void bus_track_add_to_queue(sd_bus_track *track) {
+ assert(track);
+
+ /* Adds the bus track object to the queue of objects we should dispatch next, subject to a number of
+ * conditions. */
+
+ /* Already in the queue? */
+ if (track->in_queue)
+ return;
+
+ /* if we are currently in the process of adding a new name, then let's not enqueue this just yet, let's wait
+ * until the addition is complete. */
+ if (track->n_adding > 0)
+ return;
+
+ /* still referenced? */
+ if (hashmap_size(track->names) > 0)
+ return;
+
+ /* Nothing to call? */
+ if (!track->handler)
+ return;
+
+ /* Already closed? */
+ if (!track->in_list)
+ return;
+
+ LIST_PREPEND(queue, track->bus->track_queue, track);
+ track->in_queue = true;
+}
+
+static void bus_track_remove_from_queue(sd_bus_track *track) {
+ assert(track);
+
+ if (!track->in_queue)
+ return;
+
+ LIST_REMOVE(queue, track->bus->track_queue, track);
+ track->in_queue = false;
+}
+
+static int bus_track_remove_name_fully(sd_bus_track *track, const char *name) {
+ struct track_item *i;
+
+ assert(track);
+ assert(name);
+
+ i = hashmap_remove(track->names, name);
+ if (!i)
+ return 0;
+
+ track_item_free(i);
+
+ bus_track_add_to_queue(track);
+
+ track->modified = true;
+ return 1;
+}
+
+_public_ int sd_bus_track_new(
+ sd_bus *bus,
+ sd_bus_track **track,
+ sd_bus_track_handler_t handler,
+ void *userdata) {
+
+ sd_bus_track *t;
+
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(track, -EINVAL);
+
+ if (!bus->bus_client)
+ return -EINVAL;
+
+ t = new0(sd_bus_track, 1);
+ if (!t)
+ return -ENOMEM;
+
+ t->n_ref = 1;
+ t->handler = handler;
+ t->userdata = userdata;
+ t->bus = sd_bus_ref(bus);
+
+ LIST_PREPEND(tracks, bus->tracks, t);
+ t->in_list = true;
+
+ bus_track_add_to_queue(t);
+
+ *track = t;
+ return 0;
+}
+
+static sd_bus_track *track_free(sd_bus_track *track) {
+ assert(track);
+
+ if (track->in_list)
+ LIST_REMOVE(tracks, track->bus->tracks, track);
+
+ bus_track_remove_from_queue(track);
+ track->names = hashmap_free(track->names);
+ track->bus = sd_bus_unref(track->bus);
+
+ if (track->destroy_callback)
+ track->destroy_callback(track->userdata);
+
+ return mfree(track);
+}
+
+DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(sd_bus_track, sd_bus_track, track_free);
+
+static int on_name_owner_changed(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+ sd_bus_track *track = ASSERT_PTR(userdata);
+ const char *name;
+ int r;
+
+ assert(message);
+
+ r = sd_bus_message_read(message, "sss", &name, NULL, NULL);
+ if (r < 0)
+ return 0;
+
+ bus_track_remove_name_fully(track, name);
+ return 0;
+}
+
+_public_ int sd_bus_track_add_name(sd_bus_track *track, const char *name) {
+ _cleanup_(track_item_unrefp) struct track_item *n = NULL;
+ struct track_item *i;
+ const char *match;
+ int r;
+
+ assert_return(track, -EINVAL);
+ assert_return(service_name_is_valid(name), -EINVAL);
+
+ i = hashmap_get(track->names, name);
+ if (i) {
+ if (track->recursive) {
+ assert(i->n_ref > 0);
+
+ /* Manual overflow check (instead of a DEFINE_TRIVIAL_REF_FUNC() helper or so), so
+ * that we can return a proper error, given this is almost always called in a
+ * directly client controllable way, and thus better should never hit an assertion
+ * here. */
+ if (i->n_ref >= UINT_MAX)
+ return -EOVERFLOW;
+
+ i->n_ref++;
+ }
+
+ bus_track_remove_from_queue(track);
+ return 0;
+ }
+
+ r = hashmap_ensure_allocated(&track->names, &track_item_hash_ops);
+ if (r < 0)
+ return r;
+
+ n = new(struct track_item, 1);
+ if (!n)
+ return -ENOMEM;
+
+ *n = (struct track_item) {
+ .n_ref = 1,
+ };
+
+ n->name = strdup(name);
+ if (!n->name)
+ return -ENOMEM;
+
+ /* First, subscribe to this name */
+ match = MATCH_FOR_NAME(name);
+
+ bus_track_remove_from_queue(track); /* don't dispatch this while we work in it */
+
+ r = sd_bus_add_match_async(track->bus, &n->slot, match, on_name_owner_changed, NULL, track);
+ if (r < 0) {
+ bus_track_add_to_queue(track);
+ return r;
+ }
+
+ r = hashmap_put(track->names, n->name, n);
+ if (r < 0) {
+ bus_track_add_to_queue(track);
+ return r;
+ }
+
+ /* Second, check if it is currently existing, or maybe doesn't, or maybe disappeared already. */
+ track->n_adding++; /* again, make sure this isn't dispatch while we are working in it */
+ r = sd_bus_get_name_creds(track->bus, name, 0, NULL);
+ track->n_adding--;
+ if (r < 0) {
+ hashmap_remove(track->names, name);
+ bus_track_add_to_queue(track);
+ return r;
+ }
+
+ TAKE_PTR(n);
+
+ bus_track_remove_from_queue(track);
+ track->modified = true;
+
+ return 1;
+}
+
+_public_ int sd_bus_track_remove_name(sd_bus_track *track, const char *name) {
+ struct track_item *i;
+
+ assert_return(name, -EINVAL);
+
+ if (!track) /* Treat a NULL track object as an empty track object */
+ return 0;
+
+ i = hashmap_get(track->names, name);
+ if (!i)
+ return 0;
+
+ assert(i->n_ref >= 1);
+ if (i->n_ref <= 1)
+ return bus_track_remove_name_fully(track, name);
+
+ track_item_unref(i);
+
+ return 1;
+}
+
+_public_ unsigned sd_bus_track_count(sd_bus_track *track) {
+
+ if (!track) /* Let's consider a NULL object equivalent to an empty object */
+ return 0;
+
+ /* This signature really should have returned an int, so that we can propagate errors. But well, ... Also, note
+ * that this returns the number of names being watched, and multiple references to the same name are not
+ * counted. */
+
+ return hashmap_size(track->names);
+}
+
+_public_ const char* sd_bus_track_contains(sd_bus_track *track, const char *name) {
+ assert_return(name, NULL);
+
+ if (!track) /* Let's consider a NULL object equivalent to an empty object */
+ return NULL;
+
+ return hashmap_contains(track->names, name) ? name : NULL;
+}
+
+_public_ const char* sd_bus_track_first(sd_bus_track *track) {
+ const char *n = NULL;
+
+ if (!track)
+ return NULL;
+
+ track->modified = false;
+ track->iterator = ITERATOR_FIRST;
+
+ (void) hashmap_iterate(track->names, &track->iterator, NULL, (const void**) &n);
+ return n;
+}
+
+_public_ const char* sd_bus_track_next(sd_bus_track *track) {
+ const char *n = NULL;
+
+ if (!track)
+ return NULL;
+
+ if (track->modified)
+ return NULL;
+
+ (void) hashmap_iterate(track->names, &track->iterator, NULL, (const void**) &n);
+ return n;
+}
+
+_public_ int sd_bus_track_add_sender(sd_bus_track *track, sd_bus_message *m) {
+ const char *sender;
+
+ assert_return(track, -EINVAL);
+ assert_return(m, -EINVAL);
+
+ if (sd_bus_message_get_bus(m) != track->bus)
+ return -EINVAL;
+
+ sender = sd_bus_message_get_sender(m);
+ if (!sender)
+ return -EINVAL;
+
+ return sd_bus_track_add_name(track, sender);
+}
+
+_public_ int sd_bus_track_remove_sender(sd_bus_track *track, sd_bus_message *m) {
+ const char *sender;
+
+ assert_return(m, -EINVAL);
+
+ if (!track) /* Treat a NULL track object as an empty track object */
+ return 0;
+
+ if (sd_bus_message_get_bus(m) != track->bus)
+ return -EINVAL;
+
+ sender = sd_bus_message_get_sender(m);
+ if (!sender)
+ return -EINVAL;
+
+ return sd_bus_track_remove_name(track, sender);
+}
+
+_public_ sd_bus* sd_bus_track_get_bus(sd_bus_track *track) {
+ assert_return(track, NULL);
+
+ return track->bus;
+}
+
+void bus_track_dispatch(sd_bus_track *track) {
+ int r;
+
+ assert(track);
+ assert(track->handler);
+
+ bus_track_remove_from_queue(track);
+
+ sd_bus_track_ref(track);
+
+ r = track->handler(track, track->userdata);
+ if (r < 0)
+ log_debug_errno(r, "Failed to process track handler: %m");
+ else if (r == 0)
+ bus_track_add_to_queue(track);
+
+ sd_bus_track_unref(track);
+}
+
+void bus_track_close(sd_bus_track *track) {
+ assert(track);
+
+ /* Called whenever our bus connected is closed. If so, and our track object is non-empty, dispatch it
+ * immediately, as we are closing now, but first flush out all names. */
+
+ if (!track->in_list)
+ return; /* We already closed this one, don't close it again. */
+
+ /* Remember that this one is closed now */
+ LIST_REMOVE(tracks, track->bus->tracks, track);
+ track->in_list = false;
+
+ /* If there's no name in this one anyway, we don't have to dispatch */
+ if (hashmap_isempty(track->names))
+ return;
+
+ /* Let's flush out all names */
+ hashmap_clear(track->names);
+
+ /* Invoke handler */
+ if (track->handler)
+ bus_track_dispatch(track);
+}
+
+_public_ void *sd_bus_track_get_userdata(sd_bus_track *track) {
+ assert_return(track, NULL);
+
+ return track->userdata;
+}
+
+_public_ void *sd_bus_track_set_userdata(sd_bus_track *track, void *userdata) {
+ void *ret;
+
+ assert_return(track, NULL);
+
+ ret = track->userdata;
+ track->userdata = userdata;
+
+ return ret;
+}
+
+_public_ int sd_bus_track_set_destroy_callback(sd_bus_track *track, sd_bus_destroy_t callback) {
+ assert_return(track, -EINVAL);
+
+ track->destroy_callback = callback;
+ return 0;
+}
+
+_public_ int sd_bus_track_get_destroy_callback(sd_bus_track *track, sd_bus_destroy_t *ret) {
+ assert_return(track, -EINVAL);
+
+ if (ret)
+ *ret = track->destroy_callback;
+
+ return !!track->destroy_callback;
+}
+
+_public_ int sd_bus_track_set_recursive(sd_bus_track *track, int b) {
+ assert_return(track, -EINVAL);
+
+ if (track->recursive == !!b)
+ return 0;
+
+ if (!hashmap_isempty(track->names))
+ return -EBUSY;
+
+ track->recursive = b;
+ return 0;
+}
+
+_public_ int sd_bus_track_get_recursive(sd_bus_track *track) {
+ assert_return(track, -EINVAL);
+
+ return track->recursive;
+}
+
+_public_ int sd_bus_track_count_sender(sd_bus_track *track, sd_bus_message *m) {
+ const char *sender;
+
+ assert_return(m, -EINVAL);
+
+ if (!track) /* Let's consider a NULL object equivalent to an empty object */
+ return 0;
+
+ if (sd_bus_message_get_bus(m) != track->bus)
+ return -EINVAL;
+
+ sender = sd_bus_message_get_sender(m);
+ if (!sender)
+ return -EINVAL;
+
+ return sd_bus_track_count_name(track, sender);
+}
+
+_public_ int sd_bus_track_count_name(sd_bus_track *track, const char *name) {
+ struct track_item *i;
+
+ assert_return(service_name_is_valid(name), -EINVAL);
+
+ if (!track) /* Let's consider a NULL object equivalent to an empty object */
+ return 0;
+
+ i = hashmap_get(track->names, name);
+ if (!i)
+ return 0;
+
+ return i->n_ref;
+}
diff --git a/src/libsystemd/sd-bus/bus-track.h b/src/libsystemd/sd-bus/bus-track.h
new file mode 100644
index 0000000..8dae1f3
--- /dev/null
+++ b/src/libsystemd/sd-bus/bus-track.h
@@ -0,0 +1,5 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+void bus_track_dispatch(sd_bus_track *track);
+void bus_track_close(sd_bus_track *track);
diff --git a/src/libsystemd/sd-bus/bus-type.c b/src/libsystemd/sd-bus/bus-type.c
new file mode 100644
index 0000000..6a0f53d
--- /dev/null
+++ b/src/libsystemd/sd-bus/bus-type.c
@@ -0,0 +1,162 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+
+#include "sd-bus.h"
+
+#include "bus-internal.h"
+#include "bus-type.h"
+
+bool bus_type_is_valid(char c) {
+ static const char valid[] = {
+ SD_BUS_TYPE_BYTE,
+ SD_BUS_TYPE_BOOLEAN,
+ SD_BUS_TYPE_INT16,
+ SD_BUS_TYPE_UINT16,
+ SD_BUS_TYPE_INT32,
+ SD_BUS_TYPE_UINT32,
+ SD_BUS_TYPE_INT64,
+ SD_BUS_TYPE_UINT64,
+ SD_BUS_TYPE_DOUBLE,
+ SD_BUS_TYPE_STRING,
+ SD_BUS_TYPE_OBJECT_PATH,
+ SD_BUS_TYPE_SIGNATURE,
+ SD_BUS_TYPE_ARRAY,
+ SD_BUS_TYPE_VARIANT,
+ SD_BUS_TYPE_STRUCT,
+ SD_BUS_TYPE_DICT_ENTRY,
+ SD_BUS_TYPE_UNIX_FD
+ };
+
+ return !!memchr(valid, c, sizeof(valid));
+}
+
+bool bus_type_is_basic(char c) {
+ static const char valid[] = {
+ SD_BUS_TYPE_BYTE,
+ SD_BUS_TYPE_BOOLEAN,
+ SD_BUS_TYPE_INT16,
+ SD_BUS_TYPE_UINT16,
+ SD_BUS_TYPE_INT32,
+ SD_BUS_TYPE_UINT32,
+ SD_BUS_TYPE_INT64,
+ SD_BUS_TYPE_UINT64,
+ SD_BUS_TYPE_DOUBLE,
+ SD_BUS_TYPE_STRING,
+ SD_BUS_TYPE_OBJECT_PATH,
+ SD_BUS_TYPE_SIGNATURE,
+ SD_BUS_TYPE_UNIX_FD
+ };
+
+ return !!memchr(valid, c, sizeof(valid));
+}
+
+bool bus_type_is_trivial(char c) {
+ static const char valid[] = {
+ SD_BUS_TYPE_BYTE,
+ SD_BUS_TYPE_BOOLEAN,
+ SD_BUS_TYPE_INT16,
+ SD_BUS_TYPE_UINT16,
+ SD_BUS_TYPE_INT32,
+ SD_BUS_TYPE_UINT32,
+ SD_BUS_TYPE_INT64,
+ SD_BUS_TYPE_UINT64,
+ SD_BUS_TYPE_DOUBLE
+ };
+
+ return !!memchr(valid, c, sizeof(valid));
+}
+
+bool bus_type_is_container(char c) {
+ static const char valid[] = {
+ SD_BUS_TYPE_ARRAY,
+ SD_BUS_TYPE_VARIANT,
+ SD_BUS_TYPE_STRUCT,
+ SD_BUS_TYPE_DICT_ENTRY
+ };
+
+ return !!memchr(valid, c, sizeof(valid));
+}
+
+int bus_type_get_alignment(char c) {
+
+ switch (c) {
+ case SD_BUS_TYPE_BYTE:
+ case SD_BUS_TYPE_SIGNATURE:
+ case SD_BUS_TYPE_VARIANT:
+ return 1;
+
+ case SD_BUS_TYPE_INT16:
+ case SD_BUS_TYPE_UINT16:
+ return 2;
+
+ case SD_BUS_TYPE_BOOLEAN:
+ case SD_BUS_TYPE_INT32:
+ case SD_BUS_TYPE_UINT32:
+ case SD_BUS_TYPE_STRING:
+ case SD_BUS_TYPE_OBJECT_PATH:
+ case SD_BUS_TYPE_ARRAY:
+ case SD_BUS_TYPE_UNIX_FD:
+ return 4;
+
+ case SD_BUS_TYPE_INT64:
+ case SD_BUS_TYPE_UINT64:
+ case SD_BUS_TYPE_DOUBLE:
+ case SD_BUS_TYPE_STRUCT:
+ case SD_BUS_TYPE_STRUCT_BEGIN:
+ case SD_BUS_TYPE_DICT_ENTRY:
+ case SD_BUS_TYPE_DICT_ENTRY_BEGIN:
+ return 8;
+ }
+
+ return -EINVAL;
+}
+
+int bus_type_get_size(char c) {
+
+ switch (c) {
+ case SD_BUS_TYPE_BYTE:
+ return 1;
+
+ case SD_BUS_TYPE_INT16:
+ case SD_BUS_TYPE_UINT16:
+ return 2;
+
+ case SD_BUS_TYPE_BOOLEAN:
+ case SD_BUS_TYPE_INT32:
+ case SD_BUS_TYPE_UINT32:
+ case SD_BUS_TYPE_UNIX_FD:
+ return 4;
+
+ case SD_BUS_TYPE_INT64:
+ case SD_BUS_TYPE_UINT64:
+ case SD_BUS_TYPE_DOUBLE:
+ return 8;
+ }
+
+ return -EINVAL;
+}
+
+_public_ int sd_bus_interface_name_is_valid(const char *p) {
+ assert_return(p, -EINVAL);
+
+ return interface_name_is_valid(p);
+}
+
+_public_ int sd_bus_service_name_is_valid(const char *p) {
+ assert_return(p, -EINVAL);
+
+ return service_name_is_valid(p);
+}
+
+_public_ int sd_bus_member_name_is_valid(const char *p) {
+ assert_return(p, -EINVAL);
+
+ return member_name_is_valid(p);
+}
+
+_public_ int sd_bus_object_path_is_valid(const char *p) {
+ assert_return(p, -EINVAL);
+
+ return object_path_is_valid(p);
+}
diff --git a/src/libsystemd/sd-bus/bus-type.h b/src/libsystemd/sd-bus/bus-type.h
new file mode 100644
index 0000000..490108a
--- /dev/null
+++ b/src/libsystemd/sd-bus/bus-type.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+
+#include "macro.h"
+
+bool bus_type_is_valid(char c) _const_;
+bool bus_type_is_basic(char c) _const_;
+/* "trivial" is systemd's term for what the D-Bus Specification calls
+ * a "fixed type": that is, a basic type of fixed length */
+bool bus_type_is_trivial(char c) _const_;
+bool bus_type_is_container(char c) _const_;
+
+int bus_type_get_alignment(char c) _const_;
+int bus_type_get_size(char c) _const_;
diff --git a/src/libsystemd/sd-bus/fuzz-bus-match.c b/src/libsystemd/sd-bus/fuzz-bus-match.c
new file mode 100644
index 0000000..16da534
--- /dev/null
+++ b/src/libsystemd/sd-bus/fuzz-bus-match.c
@@ -0,0 +1,86 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "alloc-util.h"
+#include "bus-internal.h"
+#include "bus-match.h"
+#include "env-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "fuzz.h"
+#include "memstream-util.h"
+
+int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
+ _cleanup_(memstream_done) MemStream m = {};
+ _cleanup_(sd_bus_unrefp) sd_bus *bus = NULL;
+ FILE *g = NULL;
+ int r;
+
+ if (outside_size_range(size, 0, 65536))
+ return 0;
+
+ fuzz_setup_logging();
+
+ r = sd_bus_new(&bus);
+ assert_se(r >= 0);
+
+ _cleanup_(bus_match_free) struct bus_match_node root = {
+ .type = BUS_MATCH_ROOT,
+ };
+
+ /* Note that we use the pointer to match_callback substructure, but the code
+ * uses container_of() to access outside of the passed-in type. */
+ sd_bus_slot slot = {
+ .type = BUS_MATCH_CALLBACK,
+ .match_callback = {},
+ };
+
+ if (getenv_bool("SYSTEMD_FUZZ_OUTPUT") <= 0)
+ assert_se(g = memstream_init(&m));
+
+ for (size_t offset = 0; offset < size; ) {
+ _cleanup_free_ char *line = NULL;
+ char *end;
+
+ end = memchr((char*) data + offset, '\n', size - offset);
+
+ line = memdup_suffix0((char*) data + offset,
+ end ? end - (char*) data - offset : size - offset);
+ if (!line)
+ return log_oom_debug();
+
+ offset = end ? (size_t) (end - (char*) data + 1) : size;
+
+ struct bus_match_component *components;
+ size_t n_components;
+ r = bus_match_parse(line, &components, &n_components);
+ if (IN_SET(r, -EINVAL, -ENOMEM)) {
+ log_debug_errno(r, "Failed to parse line: %m");
+ continue;
+ }
+ assert_se(r >= 0); /* We only expect EINVAL and ENOMEM errors, or success. */
+
+ CLEANUP_ARRAY(components, n_components, bus_match_parse_free);
+
+ log_debug("Parsed %zu components.", n_components);
+
+ _cleanup_free_ char *again = bus_match_to_string(components, n_components);
+ if (!again) {
+ log_oom();
+ break;
+ }
+
+ if (g)
+ fprintf(g, "%s\n", again);
+
+ r = bus_match_add(&root, components, n_components, &slot.match_callback);
+ if (r < 0) {
+ log_error_errno(r, "Failed to add match: %m");
+ break;
+ }
+ }
+
+ bus_match_dump(g ?: stdout, &root, 0); /* We do this even on failure, to check consistency after error. */
+ bus_match_free(&root);
+
+ return 0;
+}
diff --git a/src/libsystemd/sd-bus/fuzz-bus-match.options b/src/libsystemd/sd-bus/fuzz-bus-match.options
new file mode 100644
index 0000000..678d526
--- /dev/null
+++ b/src/libsystemd/sd-bus/fuzz-bus-match.options
@@ -0,0 +1,2 @@
+[libfuzzer]
+max_len = 65536
diff --git a/src/libsystemd/sd-bus/fuzz-bus-message.c b/src/libsystemd/sd-bus/fuzz-bus-message.c
new file mode 100644
index 0000000..ca7091e
--- /dev/null
+++ b/src/libsystemd/sd-bus/fuzz-bus-message.c
@@ -0,0 +1,42 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "alloc-util.h"
+#include "bus-dump.h"
+#include "bus-message.h"
+#include "env-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "fuzz.h"
+#include "memstream-util.h"
+
+int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
+ _cleanup_(memstream_done) MemStream ms = {};
+ _cleanup_(sd_bus_unrefp) sd_bus *bus = NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ _cleanup_free_ void *buffer = NULL;
+ FILE *g = NULL;
+ int r;
+
+ fuzz_setup_logging();
+
+ r = sd_bus_new(&bus);
+ assert_se(r >= 0);
+
+ assert_se(buffer = memdup(data, size));
+
+ r = bus_message_from_malloc(bus, buffer, size, NULL, 0, NULL, &m);
+ if (r == -EBADMSG)
+ return 0;
+ assert_se(r >= 0);
+ TAKE_PTR(buffer);
+
+ if (getenv_bool("SYSTEMD_FUZZ_OUTPUT") <= 0)
+ assert_se(g = memstream_init(&ms));
+
+ sd_bus_message_dump(m, g ?: stdout, SD_BUS_MESSAGE_DUMP_WITH_HEADER);
+
+ r = sd_bus_message_rewind(m, true);
+ assert_se(r >= 0);
+
+ return 0;
+}
diff --git a/src/libsystemd/sd-bus/sd-bus.c b/src/libsystemd/sd-bus/sd-bus.c
new file mode 100644
index 0000000..8befc97
--- /dev/null
+++ b/src/libsystemd/sd-bus/sd-bus.c
@@ -0,0 +1,4441 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <endian.h>
+#include <netdb.h>
+#include <pthread.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "sd-bus.h"
+
+#include "af-list.h"
+#include "alloc-util.h"
+#include "bus-container.h"
+#include "bus-control.h"
+#include "bus-internal.h"
+#include "bus-kernel.h"
+#include "bus-label.h"
+#include "bus-message.h"
+#include "bus-objects.h"
+#include "bus-protocol.h"
+#include "bus-slot.h"
+#include "bus-socket.h"
+#include "bus-track.h"
+#include "bus-type.h"
+#include "cgroup-util.h"
+#include "constants.h"
+#include "errno-util.h"
+#include "fd-util.h"
+#include "glyph-util.h"
+#include "hexdecoct.h"
+#include "hostname-util.h"
+#include "io-util.h"
+#include "macro.h"
+#include "memory-util.h"
+#include "missing_syscall.h"
+#include "missing_threads.h"
+#include "origin-id.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "process-util.h"
+#include "stdio-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "user-util.h"
+
+#define log_debug_bus_message(m) \
+ do { \
+ sd_bus_message *_mm = (m); \
+ log_debug("Got message type=%s sender=%s destination=%s path=%s interface=%s member=%s " \
+ " cookie=%" PRIu64 " reply_cookie=%" PRIu64 \
+ " signature=%s error-name=%s error-message=%s", \
+ strna(bus_message_type_to_string(_mm->header->type)), \
+ strna(sd_bus_message_get_sender(_mm)), \
+ strna(sd_bus_message_get_destination(_mm)), \
+ strna(sd_bus_message_get_path(_mm)), \
+ strna(sd_bus_message_get_interface(_mm)), \
+ strna(sd_bus_message_get_member(_mm)), \
+ BUS_MESSAGE_COOKIE(_mm), \
+ _mm->reply_cookie, \
+ strna(_mm->root_container.signature), \
+ strna(_mm->error.name), \
+ strna(_mm->error.message)); \
+ } while (false)
+
+static int bus_poll(sd_bus *bus, bool need_more, uint64_t timeout_usec);
+static void bus_detach_io_events(sd_bus *b);
+
+static thread_local sd_bus *default_system_bus = NULL;
+static thread_local sd_bus *default_user_bus = NULL;
+static thread_local sd_bus *default_starter_bus = NULL;
+
+static sd_bus **bus_choose_default(int (**bus_open)(sd_bus **)) {
+ const char *e;
+
+ /* Let's try our best to reuse another cached connection. If
+ * the starter bus type is set, connect via our normal
+ * connection logic, ignoring $DBUS_STARTER_ADDRESS, so that
+ * we can share the connection with the user/system default
+ * bus. */
+
+ e = secure_getenv("DBUS_STARTER_BUS_TYPE");
+ if (e) {
+ if (streq(e, "system")) {
+ if (bus_open)
+ *bus_open = sd_bus_open_system;
+ return &default_system_bus;
+ } else if (STR_IN_SET(e, "user", "session")) {
+ if (bus_open)
+ *bus_open = sd_bus_open_user;
+ return &default_user_bus;
+ }
+ }
+
+ /* No type is specified, so we have not other option than to
+ * use the starter address if it is set. */
+ e = secure_getenv("DBUS_STARTER_ADDRESS");
+ if (e) {
+ if (bus_open)
+ *bus_open = sd_bus_open;
+ return &default_starter_bus;
+ }
+
+ /* Finally, if nothing is set use the cached connection for
+ * the right scope */
+
+ if (cg_pid_get_owner_uid(0, NULL) >= 0) {
+ if (bus_open)
+ *bus_open = sd_bus_open_user;
+ return &default_user_bus;
+ } else {
+ if (bus_open)
+ *bus_open = sd_bus_open_system;
+ return &default_system_bus;
+ }
+}
+
+sd_bus *bus_resolve(sd_bus *bus) {
+ switch ((uintptr_t) bus) {
+ case (uintptr_t) SD_BUS_DEFAULT:
+ return *(bus_choose_default(NULL));
+ case (uintptr_t) SD_BUS_DEFAULT_USER:
+ return default_user_bus;
+ case (uintptr_t) SD_BUS_DEFAULT_SYSTEM:
+ return default_system_bus;
+ default:
+ return bus;
+ }
+}
+
+void bus_close_io_fds(sd_bus *b) {
+ assert(b);
+
+ bus_detach_io_events(b);
+
+ if (b->input_fd != b->output_fd)
+ safe_close(b->output_fd);
+ b->output_fd = b->input_fd = safe_close(b->input_fd);
+}
+
+void bus_close_inotify_fd(sd_bus *b) {
+ assert(b);
+
+ b->inotify_event_source = sd_event_source_disable_unref(b->inotify_event_source);
+
+ b->inotify_fd = safe_close(b->inotify_fd);
+ b->inotify_watches = mfree(b->inotify_watches);
+ b->n_inotify_watches = 0;
+}
+
+static void bus_reset_queues(sd_bus *b) {
+ assert(b);
+
+ while (b->rqueue_size > 0)
+ bus_message_unref_queued(b->rqueue[--b->rqueue_size], b);
+
+ b->rqueue = mfree(b->rqueue);
+
+ while (b->wqueue_size > 0)
+ bus_message_unref_queued(b->wqueue[--b->wqueue_size], b);
+
+ b->wqueue = mfree(b->wqueue);
+}
+
+static sd_bus* bus_free(sd_bus *b) {
+ sd_bus_slot *s;
+
+ assert(b);
+ assert(!b->track_queue);
+ assert(!b->tracks);
+
+ b->state = BUS_CLOSED;
+
+ sd_bus_detach_event(b);
+
+ while ((s = b->slots)) {
+ /* At this point only floating slots can still be
+ * around, because the non-floating ones keep a
+ * reference to the bus, and we thus couldn't be
+ * destructing right now... We forcibly disconnect the
+ * slots here, so that they still can be referenced by
+ * apps, but are dead. */
+
+ assert(s->floating);
+ bus_slot_disconnect(s, true);
+ }
+
+ if (b->default_bus_ptr)
+ *b->default_bus_ptr = NULL;
+
+ bus_close_io_fds(b);
+ bus_close_inotify_fd(b);
+
+ free(b->label);
+ free(b->groups);
+ free(b->rbuffer);
+ free(b->unique_name);
+ free(b->auth_buffer);
+ free(b->address);
+ free(b->machine);
+ free(b->description);
+ free(b->patch_sender);
+
+ free(b->exec_path);
+ strv_free(b->exec_argv);
+
+ close_many(b->fds, b->n_fds);
+ free(b->fds);
+
+ bus_reset_queues(b);
+
+ ordered_hashmap_free_free(b->reply_callbacks);
+ prioq_free(b->reply_callbacks_prioq);
+
+ assert(b->match_callbacks.type == BUS_MATCH_ROOT);
+ bus_match_free(&b->match_callbacks);
+
+ hashmap_free_free(b->vtable_methods);
+ hashmap_free_free(b->vtable_properties);
+
+ assert(hashmap_isempty(b->nodes));
+ hashmap_free(b->nodes);
+
+ bus_flush_memfd(b);
+
+ assert_se(pthread_mutex_destroy(&b->memfd_cache_mutex) == 0);
+
+ return mfree(b);
+}
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(sd_bus*, bus_free);
+
+DEFINE_ORIGIN_ID_HELPERS(sd_bus, bus);
+
+_public_ int sd_bus_new(sd_bus **ret) {
+ _cleanup_free_ sd_bus *b = NULL;
+
+ assert_return(ret, -EINVAL);
+
+ b = new(sd_bus, 1);
+ if (!b)
+ return -ENOMEM;
+
+ *b = (sd_bus) {
+ .n_ref = 1,
+ .input_fd = -EBADF,
+ .output_fd = -EBADF,
+ .inotify_fd = -EBADF,
+ .message_version = 1,
+ .creds_mask = SD_BUS_CREDS_WELL_KNOWN_NAMES|SD_BUS_CREDS_UNIQUE_NAME,
+ .accept_fd = true,
+ .origin_id = origin_id_query(),
+ .n_groups = SIZE_MAX,
+ .close_on_exit = true,
+ .ucred = UCRED_INVALID,
+ .runtime_scope = _RUNTIME_SCOPE_INVALID,
+ };
+
+ /* We guarantee that wqueue always has space for at least one entry */
+ if (!GREEDY_REALLOC(b->wqueue, 1))
+ return -ENOMEM;
+
+ assert_se(pthread_mutex_init(&b->memfd_cache_mutex, NULL) == 0);
+
+ *ret = TAKE_PTR(b);
+ return 0;
+}
+
+_public_ int sd_bus_set_address(sd_bus *bus, const char *address) {
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(bus->state == BUS_UNSET, -EPERM);
+ assert_return(address, -EINVAL);
+ assert_return(!bus_origin_changed(bus), -ECHILD);
+
+ return free_and_strdup(&bus->address, address);
+}
+
+_public_ int sd_bus_set_fd(sd_bus *bus, int input_fd, int output_fd) {
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(bus->state == BUS_UNSET, -EPERM);
+ assert_return(input_fd >= 0, -EBADF);
+ assert_return(output_fd >= 0, -EBADF);
+ assert_return(!bus_origin_changed(bus), -ECHILD);
+
+ bus->input_fd = input_fd;
+ bus->output_fd = output_fd;
+ return 0;
+}
+
+_public_ int sd_bus_set_exec(sd_bus *bus, const char *path, char *const *argv) {
+ _cleanup_strv_free_ char **a = NULL;
+ int r;
+
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(bus->state == BUS_UNSET, -EPERM);
+ assert_return(path, -EINVAL);
+ assert_return(!strv_isempty(argv), -EINVAL);
+ assert_return(!bus_origin_changed(bus), -ECHILD);
+
+ a = strv_copy(argv);
+ if (!a)
+ return -ENOMEM;
+
+ r = free_and_strdup(&bus->exec_path, path);
+ if (r < 0)
+ return r;
+
+ return strv_free_and_replace(bus->exec_argv, a);
+}
+
+_public_ int sd_bus_set_bus_client(sd_bus *bus, int b) {
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(bus->state == BUS_UNSET, -EPERM);
+ assert_return(!bus->patch_sender, -EPERM);
+ assert_return(!bus_origin_changed(bus), -ECHILD);
+
+ bus->bus_client = !!b;
+ return 0;
+}
+
+_public_ int sd_bus_set_monitor(sd_bus *bus, int b) {
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(bus->state == BUS_UNSET, -EPERM);
+ assert_return(!bus_origin_changed(bus), -ECHILD);
+
+ bus->is_monitor = !!b;
+ return 0;
+}
+
+_public_ int sd_bus_negotiate_fds(sd_bus *bus, int b) {
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(bus->state == BUS_UNSET, -EPERM);
+ assert_return(!bus_origin_changed(bus), -ECHILD);
+
+ bus->accept_fd = !!b;
+ return 0;
+}
+
+_public_ int sd_bus_negotiate_timestamp(sd_bus *bus, int b) {
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(!IN_SET(bus->state, BUS_CLOSING, BUS_CLOSED), -EPERM);
+ assert_return(!bus_origin_changed(bus), -ECHILD);
+
+ /* This is not actually supported by any of our transports these days, but we do honour it for synthetic
+ * replies, and maybe one day classic D-Bus learns this too */
+ bus->attach_timestamp = !!b;
+
+ return 0;
+}
+
+_public_ int sd_bus_negotiate_creds(sd_bus *bus, int b, uint64_t mask) {
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(mask <= _SD_BUS_CREDS_ALL, -EINVAL);
+ assert_return(!IN_SET(bus->state, BUS_CLOSING, BUS_CLOSED), -EPERM);
+ assert_return(!bus_origin_changed(bus), -ECHILD);
+
+ SET_FLAG(bus->creds_mask, mask, b);
+
+ /* The well knowns we need unconditionally, so that matches can work */
+ bus->creds_mask |= SD_BUS_CREDS_WELL_KNOWN_NAMES|SD_BUS_CREDS_UNIQUE_NAME;
+
+ return 0;
+}
+
+_public_ int sd_bus_set_server(sd_bus *bus, int b, sd_id128_t server_id) {
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(b || sd_id128_equal(server_id, SD_ID128_NULL), -EINVAL);
+ assert_return(bus->state == BUS_UNSET, -EPERM);
+ assert_return(!bus_origin_changed(bus), -ECHILD);
+
+ bus->is_server = !!b;
+ bus->server_id = server_id;
+ return 0;
+}
+
+_public_ int sd_bus_set_anonymous(sd_bus *bus, int b) {
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(bus->state == BUS_UNSET, -EPERM);
+ assert_return(!bus_origin_changed(bus), -ECHILD);
+
+ bus->anonymous_auth = !!b;
+ return 0;
+}
+
+_public_ int sd_bus_set_trusted(sd_bus *bus, int b) {
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(bus->state == BUS_UNSET, -EPERM);
+ assert_return(!bus_origin_changed(bus), -ECHILD);
+
+ bus->trusted = !!b;
+ return 0;
+}
+
+_public_ int sd_bus_set_description(sd_bus *bus, const char *description) {
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(bus->state == BUS_UNSET, -EPERM);
+ assert_return(!bus_origin_changed(bus), -ECHILD);
+
+ return free_and_strdup(&bus->description, description);
+}
+
+_public_ int sd_bus_set_allow_interactive_authorization(sd_bus *bus, int b) {
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(!bus_origin_changed(bus), -ECHILD);
+
+ bus->allow_interactive_authorization = !!b;
+ return 0;
+}
+
+_public_ int sd_bus_get_allow_interactive_authorization(sd_bus *bus) {
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(!bus_origin_changed(bus), -ECHILD);
+
+ return bus->allow_interactive_authorization;
+}
+
+_public_ int sd_bus_set_watch_bind(sd_bus *bus, int b) {
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(bus->state == BUS_UNSET, -EPERM);
+ assert_return(!bus_origin_changed(bus), -ECHILD);
+
+ bus->watch_bind = !!b;
+ return 0;
+}
+
+_public_ int sd_bus_get_watch_bind(sd_bus *bus) {
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(!bus_origin_changed(bus), -ECHILD);
+
+ return bus->watch_bind;
+}
+
+_public_ int sd_bus_set_connected_signal(sd_bus *bus, int b) {
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(bus->state == BUS_UNSET, -EPERM);
+ assert_return(!bus_origin_changed(bus), -ECHILD);
+
+ bus->connected_signal = !!b;
+ return 0;
+}
+
+_public_ int sd_bus_get_connected_signal(sd_bus *bus) {
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(!bus_origin_changed(bus), -ECHILD);
+
+ return bus->connected_signal;
+}
+
+static int synthesize_connected_signal(sd_bus *bus) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ int r;
+
+ assert(bus);
+
+ /* If enabled, synthesizes a local "Connected" signal mirroring the local "Disconnected" signal. This is called
+ * whenever we fully established a connection, i.e. after the authorization phase, and after receiving the
+ * Hello() reply. Or in other words, whenever we enter BUS_RUNNING state.
+ *
+ * This is useful so that clients can start doing stuff whenever the connection is fully established in a way
+ * that works independently from whether we connected to a full bus or just a direct connection. */
+
+ if (!bus->connected_signal)
+ return 0;
+
+ r = sd_bus_message_new_signal(
+ bus,
+ &m,
+ "/org/freedesktop/DBus/Local",
+ "org.freedesktop.DBus.Local",
+ "Connected");
+ if (r < 0)
+ return r;
+
+ bus_message_set_sender_local(bus, m);
+ m->read_counter = ++bus->read_counter;
+
+ r = bus_seal_synthetic_message(bus, m);
+ if (r < 0)
+ return r;
+
+ r = bus_rqueue_make_room(bus);
+ if (r < 0)
+ return r;
+
+ /* Insert at the very front */
+ memmove(bus->rqueue + 1, bus->rqueue, sizeof(sd_bus_message*) * bus->rqueue_size);
+ bus->rqueue[0] = bus_message_ref_queued(m, bus);
+ bus->rqueue_size++;
+
+ return 0;
+}
+
+void bus_set_state(sd_bus *bus, enum bus_state state) {
+ static const char* const table[_BUS_STATE_MAX] = {
+ [BUS_UNSET] = "UNSET",
+ [BUS_WATCH_BIND] = "WATCH_BIND",
+ [BUS_OPENING] = "OPENING",
+ [BUS_AUTHENTICATING] = "AUTHENTICATING",
+ [BUS_HELLO] = "HELLO",
+ [BUS_RUNNING] = "RUNNING",
+ [BUS_CLOSING] = "CLOSING",
+ [BUS_CLOSED] = "CLOSED",
+ };
+
+ assert(bus);
+ assert(state < _BUS_STATE_MAX);
+
+ if (state == bus->state)
+ return;
+
+ log_debug("Bus %s: changing state %s %s %s", strna(bus->description),
+ table[bus->state], special_glyph(SPECIAL_GLYPH_ARROW_RIGHT), table[state]);
+ bus->state = state;
+}
+
+static int hello_callback(sd_bus_message *reply, void *userdata, sd_bus_error *error) {
+ const char *s;
+ sd_bus *bus;
+ int r;
+
+ assert(reply);
+ bus = reply->bus;
+ assert(bus);
+ assert(IN_SET(bus->state, BUS_HELLO, BUS_CLOSING));
+
+ r = sd_bus_message_get_errno(reply);
+ if (r > 0) {
+ r = -r;
+ goto fail;
+ }
+
+ r = sd_bus_message_read(reply, "s", &s);
+ if (r < 0)
+ goto fail;
+
+ if (!service_name_is_valid(s) || s[0] != ':') {
+ r = -EBADMSG;
+ goto fail;
+ }
+
+ r = free_and_strdup(&bus->unique_name, s);
+ if (r < 0)
+ goto fail;
+
+ if (bus->state == BUS_HELLO) {
+ bus_set_state(bus, BUS_RUNNING);
+
+ r = synthesize_connected_signal(bus);
+ if (r < 0)
+ goto fail;
+ }
+
+ return 1;
+
+fail:
+ /* When Hello() failed, let's propagate this in two ways: first we return the error immediately here,
+ * which is the propagated up towards the event loop. Let's also invalidate the connection, so that
+ * if the user then calls back into us again we won't wait any longer. */
+
+ bus_set_state(bus, BUS_CLOSING);
+ return r;
+}
+
+static int bus_send_hello(sd_bus *bus) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ int r;
+
+ assert(bus);
+
+ if (!bus->bus_client)
+ return 0;
+
+ r = sd_bus_message_new_method_call(
+ bus,
+ &m,
+ "org.freedesktop.DBus",
+ "/org/freedesktop/DBus",
+ "org.freedesktop.DBus",
+ "Hello");
+ if (r < 0)
+ return r;
+
+ return sd_bus_call_async(bus, NULL, m, hello_callback, NULL, 0);
+}
+
+int bus_start_running(sd_bus *bus) {
+ struct reply_callback *c;
+ usec_t n;
+ int r;
+
+ assert(bus);
+ assert(bus->state < BUS_HELLO);
+
+ /* We start all method call timeouts when we enter BUS_HELLO or BUS_RUNNING mode. At this point let's convert
+ * all relative to absolute timestamps. Note that we do not reshuffle the reply callback priority queue since
+ * adding a fixed value to all entries should not alter the internal order. */
+
+ n = now(CLOCK_MONOTONIC);
+ ORDERED_HASHMAP_FOREACH(c, bus->reply_callbacks) {
+ if (c->timeout_usec == 0)
+ continue;
+
+ c->timeout_usec = usec_add(n, c->timeout_usec);
+ }
+
+ if (bus->bus_client) {
+ bus_set_state(bus, BUS_HELLO);
+ return 1;
+ }
+
+ bus_set_state(bus, BUS_RUNNING);
+
+ r = synthesize_connected_signal(bus);
+ if (r < 0)
+ return r;
+
+ return 1;
+}
+
+static int parse_address_key(const char **p, const char *key, char **value) {
+ _cleanup_free_ char *r = NULL;
+ size_t l, n = 0;
+ const char *a;
+
+ assert(p);
+ assert(*p);
+ assert(value);
+
+ if (key) {
+ l = strlen(key);
+ if (strncmp(*p, key, l) != 0)
+ return 0;
+
+ if ((*p)[l] != '=')
+ return 0;
+
+ if (*value)
+ return -EINVAL;
+
+ a = *p + l + 1;
+ } else
+ a = *p;
+
+ while (!IN_SET(*a, ';', ',', 0)) {
+ char c;
+
+ if (*a == '%') {
+ int x, y;
+
+ x = unhexchar(a[1]);
+ if (x < 0)
+ return x;
+
+ y = unhexchar(a[2]);
+ if (y < 0)
+ return y;
+
+ c = (char) ((x << 4) | y);
+ a += 3;
+ } else {
+ c = *a;
+ a++;
+ }
+
+ if (!GREEDY_REALLOC(r, n + 2))
+ return -ENOMEM;
+
+ r[n++] = c;
+ }
+
+ if (!r) {
+ r = strdup("");
+ if (!r)
+ return -ENOMEM;
+ } else
+ r[n] = 0;
+
+ if (*a == ',')
+ a++;
+
+ *p = a;
+
+ free_and_replace(*value, r);
+
+ return 1;
+}
+
+static void skip_address_key(const char **p) {
+ assert(p);
+ assert(*p);
+
+ *p += strcspn(*p, ",");
+
+ if (**p == ',')
+ (*p)++;
+}
+
+static int parse_unix_address(sd_bus *b, const char **p, char **guid) {
+ _cleanup_free_ char *path = NULL, *abstract = NULL;
+ size_t l;
+ int r;
+
+ assert(b);
+ assert(p);
+ assert(*p);
+ assert(guid);
+
+ while (!IN_SET(**p, 0, ';')) {
+ r = parse_address_key(p, "guid", guid);
+ if (r < 0)
+ return r;
+ else if (r > 0)
+ continue;
+
+ r = parse_address_key(p, "path", &path);
+ if (r < 0)
+ return r;
+ else if (r > 0)
+ continue;
+
+ r = parse_address_key(p, "abstract", &abstract);
+ if (r < 0)
+ return r;
+ else if (r > 0)
+ continue;
+
+ skip_address_key(p);
+ }
+
+ if (!path && !abstract)
+ return -EINVAL;
+
+ if (path && abstract)
+ return -EINVAL;
+
+ if (path) {
+ l = strlen(path);
+ if (l >= sizeof(b->sockaddr.un.sun_path)) /* We insist on NUL termination */
+ return -E2BIG;
+
+ b->sockaddr.un = (struct sockaddr_un) {
+ .sun_family = AF_UNIX,
+ };
+
+ memcpy(b->sockaddr.un.sun_path, path, l);
+ b->sockaddr_size = offsetof(struct sockaddr_un, sun_path) + l + 1;
+
+ } else {
+ assert(abstract);
+
+ l = strlen(abstract);
+ if (l >= sizeof(b->sockaddr.un.sun_path) - 1) /* We insist on NUL termination */
+ return -E2BIG;
+
+ b->sockaddr.un = (struct sockaddr_un) {
+ .sun_family = AF_UNIX,
+ };
+
+ memcpy(b->sockaddr.un.sun_path+1, abstract, l);
+ b->sockaddr_size = offsetof(struct sockaddr_un, sun_path) + 1 + l;
+ }
+
+ b->is_local = true;
+
+ return 0;
+}
+
+static int parse_tcp_address(sd_bus *b, const char **p, char **guid) {
+ _cleanup_free_ char *host = NULL, *port = NULL, *family = NULL;
+ int r;
+ struct addrinfo *result, hints = {
+ .ai_socktype = SOCK_STREAM,
+ };
+
+ assert(b);
+ assert(p);
+ assert(*p);
+ assert(guid);
+
+ while (!IN_SET(**p, 0, ';')) {
+ r = parse_address_key(p, "guid", guid);
+ if (r < 0)
+ return r;
+ else if (r > 0)
+ continue;
+
+ r = parse_address_key(p, "host", &host);
+ if (r < 0)
+ return r;
+ else if (r > 0)
+ continue;
+
+ r = parse_address_key(p, "port", &port);
+ if (r < 0)
+ return r;
+ else if (r > 0)
+ continue;
+
+ r = parse_address_key(p, "family", &family);
+ if (r < 0)
+ return r;
+ else if (r > 0)
+ continue;
+
+ skip_address_key(p);
+ }
+
+ if (!host || !port)
+ return -EINVAL;
+
+ if (family) {
+ hints.ai_family = af_from_ipv4_ipv6(family);
+ if (hints.ai_family == AF_UNSPEC)
+ return -EINVAL;
+ }
+
+ r = getaddrinfo(host, port, &hints, &result);
+ if (r == EAI_SYSTEM)
+ return -errno;
+ else if (r != 0)
+ return -EADDRNOTAVAIL;
+
+ memcpy(&b->sockaddr, result->ai_addr, result->ai_addrlen);
+ b->sockaddr_size = result->ai_addrlen;
+
+ freeaddrinfo(result);
+
+ b->is_local = false;
+
+ return 0;
+}
+
+static int parse_exec_address(sd_bus *b, const char **p, char **guid) {
+ char *path = NULL;
+ unsigned n_argv = 0, j;
+ char **argv = NULL;
+ int r;
+
+ assert(b);
+ assert(p);
+ assert(*p);
+ assert(guid);
+
+ while (!IN_SET(**p, 0, ';')) {
+ r = parse_address_key(p, "guid", guid);
+ if (r < 0)
+ goto fail;
+ else if (r > 0)
+ continue;
+
+ r = parse_address_key(p, "path", &path);
+ if (r < 0)
+ goto fail;
+ else if (r > 0)
+ continue;
+
+ if (startswith(*p, "argv")) {
+ unsigned ul;
+
+ errno = 0;
+ ul = strtoul(*p + 4, (char**) p, 10);
+ if (errno > 0 || **p != '=' || ul > 256) {
+ r = -EINVAL;
+ goto fail;
+ }
+
+ (*p)++;
+
+ if (ul >= n_argv) {
+ if (!GREEDY_REALLOC0(argv, ul + 2)) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ n_argv = ul + 1;
+ }
+
+ r = parse_address_key(p, NULL, argv + ul);
+ if (r < 0)
+ goto fail;
+
+ continue;
+ }
+
+ skip_address_key(p);
+ }
+
+ if (!path) {
+ r = -EINVAL;
+ goto fail;
+ }
+
+ /* Make sure there are no holes in the array, with the
+ * exception of argv[0] */
+ for (j = 1; j < n_argv; j++)
+ if (!argv[j]) {
+ r = -EINVAL;
+ goto fail;
+ }
+
+ if (argv && argv[0] == NULL) {
+ argv[0] = strdup(path);
+ if (!argv[0]) {
+ r = -ENOMEM;
+ goto fail;
+ }
+ }
+
+ b->exec_path = path;
+ b->exec_argv = argv;
+
+ b->is_local = false;
+
+ return 0;
+
+fail:
+ for (j = 0; j < n_argv; j++)
+ free(argv[j]);
+
+ free(argv);
+ free(path);
+ return r;
+}
+
+static int parse_container_unix_address(sd_bus *b, const char **p, char **guid) {
+ _cleanup_free_ char *machine = NULL, *pid = NULL;
+ int r;
+
+ assert(b);
+ assert(p);
+ assert(*p);
+ assert(guid);
+
+ while (!IN_SET(**p, 0, ';')) {
+ r = parse_address_key(p, "guid", guid);
+ if (r < 0)
+ return r;
+ else if (r > 0)
+ continue;
+
+ r = parse_address_key(p, "machine", &machine);
+ if (r < 0)
+ return r;
+ else if (r > 0)
+ continue;
+
+ r = parse_address_key(p, "pid", &pid);
+ if (r < 0)
+ return r;
+ else if (r > 0)
+ continue;
+
+ skip_address_key(p);
+ }
+
+ if (!machine == !pid)
+ return -EINVAL;
+
+ if (machine) {
+ if (!hostname_is_valid(machine, VALID_HOSTNAME_DOT_HOST))
+ return -EINVAL;
+
+ free_and_replace(b->machine, machine);
+ } else
+ b->machine = mfree(b->machine);
+
+ if (pid) {
+ r = parse_pid(pid, &b->nspid);
+ if (r < 0)
+ return r;
+ } else
+ b->nspid = 0;
+
+ b->sockaddr.un = (struct sockaddr_un) {
+ .sun_family = AF_UNIX,
+ /* Note that we use the old /var/run prefix here, to increase compatibility with really old containers */
+ .sun_path = "/var/run/dbus/system_bus_socket",
+ };
+ b->sockaddr_size = SOCKADDR_UN_LEN(b->sockaddr.un);
+ b->is_local = false;
+
+ return 0;
+}
+
+static void bus_reset_parsed_address(sd_bus *b) {
+ assert(b);
+
+ zero(b->sockaddr);
+ b->sockaddr_size = 0;
+ b->exec_argv = strv_free(b->exec_argv);
+ b->exec_path = mfree(b->exec_path);
+ b->server_id = SD_ID128_NULL;
+ b->machine = mfree(b->machine);
+ b->nspid = 0;
+}
+
+static int bus_parse_next_address(sd_bus *b) {
+ _cleanup_free_ char *guid = NULL;
+ const char *a;
+ int r;
+
+ assert(b);
+
+ if (!b->address)
+ return 0;
+ if (b->address[b->address_index] == 0)
+ return 0;
+
+ bus_reset_parsed_address(b);
+
+ a = b->address + b->address_index;
+
+ while (*a != 0) {
+
+ if (*a == ';') {
+ a++;
+ continue;
+ }
+
+ if (startswith(a, "unix:")) {
+ a += 5;
+
+ r = parse_unix_address(b, &a, &guid);
+ if (r < 0)
+ return r;
+ break;
+
+ } else if (startswith(a, "tcp:")) {
+
+ a += 4;
+ r = parse_tcp_address(b, &a, &guid);
+ if (r < 0)
+ return r;
+
+ break;
+
+ } else if (startswith(a, "unixexec:")) {
+
+ a += 9;
+ r = parse_exec_address(b, &a, &guid);
+ if (r < 0)
+ return r;
+
+ break;
+
+ } else if (startswith(a, "x-machine-unix:")) {
+
+ a += 15;
+ r = parse_container_unix_address(b, &a, &guid);
+ if (r < 0)
+ return r;
+
+ break;
+ }
+
+ a = strchr(a, ';');
+ if (!a)
+ return 0;
+ }
+
+ if (guid) {
+ r = sd_id128_from_string(guid, &b->server_id);
+ if (r < 0)
+ return r;
+ }
+
+ b->address_index = a - b->address;
+ return 1;
+}
+
+static void bus_kill_exec(sd_bus *bus) {
+ if (!pid_is_valid(bus->busexec_pid))
+ return;
+
+ sigterm_wait(TAKE_PID(bus->busexec_pid));
+}
+
+static int bus_start_address(sd_bus *b) {
+ int r;
+
+ assert(b);
+
+ for (;;) {
+ bus_close_io_fds(b);
+ bus_close_inotify_fd(b);
+
+ bus_kill_exec(b);
+
+ /* If you provide multiple different bus-addresses, we
+ * try all of them in order and use the first one that
+ * succeeds. */
+
+ if (b->exec_path)
+ r = bus_socket_exec(b);
+ else if ((b->nspid > 0 || b->machine) && b->sockaddr.sa.sa_family != AF_UNSPEC)
+ r = bus_container_connect_socket(b);
+ else if (b->sockaddr.sa.sa_family != AF_UNSPEC)
+ r = bus_socket_connect(b);
+ else
+ goto next;
+
+ if (r >= 0) {
+ int q;
+
+ q = bus_attach_io_events(b);
+ if (q < 0)
+ return q;
+
+ q = bus_attach_inotify_event(b);
+ if (q < 0)
+ return q;
+
+ return r;
+ }
+
+ b->last_connect_error = -r;
+
+ next:
+ r = bus_parse_next_address(b);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return b->last_connect_error > 0 ? -b->last_connect_error : -ECONNREFUSED;
+ }
+}
+
+int bus_next_address(sd_bus *b) {
+ assert(b);
+
+ bus_reset_parsed_address(b);
+ return bus_start_address(b);
+}
+
+static int bus_start_fd(sd_bus *b) {
+ struct stat st;
+ int r;
+
+ assert(b);
+ assert(b->input_fd >= 0);
+ assert(b->output_fd >= 0);
+
+ if (DEBUG_LOGGING) {
+ _cleanup_free_ char *pi = NULL, *po = NULL;
+ (void) fd_get_path(b->input_fd, &pi);
+ (void) fd_get_path(b->output_fd, &po);
+ log_debug("sd-bus: starting bus%s%s on fds %d/%d (%s, %s)...",
+ b->description ? " " : "", strempty(b->description),
+ b->input_fd, b->output_fd,
+ pi ?: "???", po ?: "???");
+ }
+
+ r = fd_nonblock(b->input_fd, true);
+ if (r < 0)
+ return r;
+
+ r = fd_cloexec(b->input_fd, true);
+ if (r < 0)
+ return r;
+
+ if (b->input_fd != b->output_fd) {
+ r = fd_nonblock(b->output_fd, true);
+ if (r < 0)
+ return r;
+
+ r = fd_cloexec(b->output_fd, true);
+ if (r < 0)
+ return r;
+ }
+
+ if (fstat(b->input_fd, &st) < 0)
+ return -errno;
+
+ return bus_socket_take_fd(b);
+}
+
+_public_ int sd_bus_start(sd_bus *bus) {
+ int r;
+
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(bus->state == BUS_UNSET, -EPERM);
+ assert_return(!bus_origin_changed(bus), -ECHILD);
+
+ bus_set_state(bus, BUS_OPENING);
+
+ if (bus->is_server && bus->bus_client)
+ return -EINVAL;
+
+ if (bus->input_fd >= 0)
+ r = bus_start_fd(bus);
+ else if (bus->address || bus->sockaddr.sa.sa_family != AF_UNSPEC || bus->exec_path || bus->machine)
+ r = bus_start_address(bus);
+ else
+ return -EINVAL;
+
+ if (r < 0) {
+ sd_bus_close(bus);
+ return r;
+ }
+
+ return bus_send_hello(bus);
+}
+
+_public_ int sd_bus_open_with_description(sd_bus **ret, const char *description) {
+ const char *e;
+ _cleanup_(bus_freep) sd_bus *b = NULL;
+ int r;
+
+ assert_return(ret, -EINVAL);
+
+ /* Let's connect to the starter bus if it is set, and
+ * otherwise to the bus that is appropriate for the scope
+ * we are running in */
+
+ e = secure_getenv("DBUS_STARTER_BUS_TYPE");
+ if (e) {
+ if (streq(e, "system"))
+ return sd_bus_open_system_with_description(ret, description);
+ else if (STR_IN_SET(e, "session", "user"))
+ return sd_bus_open_user_with_description(ret, description);
+ }
+
+ e = secure_getenv("DBUS_STARTER_ADDRESS");
+ if (!e) {
+ if (cg_pid_get_owner_uid(0, NULL) >= 0)
+ return sd_bus_open_user_with_description(ret, description);
+ else
+ return sd_bus_open_system_with_description(ret, description);
+ }
+
+ r = sd_bus_new(&b);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_set_address(b, e);
+ if (r < 0)
+ return r;
+
+ b->bus_client = true;
+
+ /* We don't know whether the bus is trusted or not, so better
+ * be safe, and authenticate everything */
+ b->trusted = false;
+ b->is_local = false;
+ b->creds_mask |= SD_BUS_CREDS_UID | SD_BUS_CREDS_EUID | SD_BUS_CREDS_EFFECTIVE_CAPS;
+
+ r = sd_bus_start(b);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(b);
+ return 0;
+}
+
+_public_ int sd_bus_open(sd_bus **ret) {
+ return sd_bus_open_with_description(ret, NULL);
+}
+
+int bus_set_address_system(sd_bus *b) {
+ const char *e;
+ int r;
+
+ assert(b);
+
+ e = secure_getenv("DBUS_SYSTEM_BUS_ADDRESS");
+
+ r = sd_bus_set_address(b, e ?: DEFAULT_SYSTEM_BUS_ADDRESS);
+ if (r < 0)
+ return r;
+
+ b->runtime_scope = RUNTIME_SCOPE_SYSTEM;
+ return r;
+}
+
+_public_ int sd_bus_open_system_with_description(sd_bus **ret, const char *description) {
+ _cleanup_(bus_freep) sd_bus *b = NULL;
+ int r;
+
+ assert_return(ret, -EINVAL);
+
+ r = sd_bus_new(&b);
+ if (r < 0)
+ return r;
+
+ if (description) {
+ r = sd_bus_set_description(b, description);
+ if (r < 0)
+ return r;
+ }
+
+ r = bus_set_address_system(b);
+ if (r < 0)
+ return r;
+
+ b->bus_client = true;
+
+ /* Let's do per-method access control on the system bus. We
+ * need the caller's UID and capability set for that. */
+ b->trusted = false;
+ b->creds_mask |= SD_BUS_CREDS_UID | SD_BUS_CREDS_EUID | SD_BUS_CREDS_EFFECTIVE_CAPS;
+ b->is_local = true;
+
+ r = sd_bus_start(b);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(b);
+ return 0;
+}
+
+_public_ int sd_bus_open_system(sd_bus **ret) {
+ return sd_bus_open_system_with_description(ret, NULL);
+}
+
+int bus_set_address_user(sd_bus *b) {
+ const char *a;
+ _cleanup_free_ char *_a = NULL;
+ int r;
+
+ assert(b);
+
+ a = secure_getenv("DBUS_SESSION_BUS_ADDRESS");
+ if (!a) {
+ const char *e;
+ _cleanup_free_ char *ee = NULL;
+
+ e = secure_getenv("XDG_RUNTIME_DIR");
+ if (!e)
+ return log_debug_errno(SYNTHETIC_ERRNO(ENOMEDIUM),
+ "sd-bus: $XDG_RUNTIME_DIR not set, cannot connect to user bus.");
+
+ ee = bus_address_escape(e);
+ if (!ee)
+ return -ENOMEM;
+
+ if (asprintf(&_a, DEFAULT_USER_BUS_ADDRESS_FMT, ee) < 0)
+ return -ENOMEM;
+ a = _a;
+ }
+
+ r = sd_bus_set_address(b, a);
+ if (r < 0)
+ return r;
+
+ b->runtime_scope = RUNTIME_SCOPE_USER;
+ return r;
+}
+
+_public_ int sd_bus_open_user_with_description(sd_bus **ret, const char *description) {
+ _cleanup_(bus_freep) sd_bus *b = NULL;
+ int r;
+
+ assert_return(ret, -EINVAL);
+
+ r = sd_bus_new(&b);
+ if (r < 0)
+ return r;
+
+ if (description) {
+ r = sd_bus_set_description(b, description);
+ if (r < 0)
+ return r;
+ }
+
+ r = bus_set_address_user(b);
+ if (r < 0)
+ return r;
+
+ b->bus_client = true;
+
+ /* We don't do any per-method access control on the user bus. */
+ b->trusted = true;
+ b->is_local = true;
+
+ r = sd_bus_start(b);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(b);
+ return 0;
+}
+
+_public_ int sd_bus_open_user(sd_bus **ret) {
+ return sd_bus_open_user_with_description(ret, NULL);
+}
+
+int bus_set_address_system_remote(sd_bus *b, const char *host) {
+ _cleanup_free_ char *e = NULL;
+ char *m = NULL, *c = NULL, *a, *rbracket = NULL, *p = NULL;
+
+ assert(b);
+ assert(host);
+
+ /* Skip ":"s in ipv6 addresses */
+ if (*host == '[') {
+ char *t;
+
+ rbracket = strchr(host, ']');
+ if (!rbracket)
+ return -EINVAL;
+ t = strndupa_safe(host + 1, rbracket - host - 1);
+ e = bus_address_escape(t);
+ if (!e)
+ return -ENOMEM;
+ } else if ((a = strchr(host, '@'))) {
+ if (*(a + 1) == '[') {
+ _cleanup_free_ char *t = NULL;
+
+ rbracket = strchr(a + 1, ']');
+ if (!rbracket)
+ return -EINVAL;
+ t = new0(char, strlen(host));
+ if (!t)
+ return -ENOMEM;
+ strncat(t, host, a - host + 1);
+ strncat(t, a + 2, rbracket - a - 2);
+ e = bus_address_escape(t);
+ if (!e)
+ return -ENOMEM;
+ } else if (*(a + 1) == '\0' || strchr(a + 1, '@'))
+ return -EINVAL;
+ }
+
+ /* Let's see if a port was given */
+ m = strchr(rbracket ? rbracket + 1 : host, ':');
+ if (m) {
+ char *t;
+ bool got_forward_slash = false;
+
+ p = m + 1;
+
+ t = strchr(p, '/');
+ if (t) {
+ p = strndupa_safe(p, t - p);
+ got_forward_slash = true;
+ }
+
+ if (!in_charset(p, "0123456789") || *p == '\0') {
+ if (!hostname_is_valid(p, 0) || got_forward_slash)
+ return -EINVAL;
+
+ m = TAKE_PTR(p);
+ goto interpret_port_as_machine_old_syntax;
+ }
+ }
+
+ /* Let's see if a machine was given */
+ m = strchr(rbracket ? rbracket + 1 : host, '/');
+ if (m) {
+ m++;
+interpret_port_as_machine_old_syntax:
+ /* Let's make sure this is not a port of some kind,
+ * and is a valid machine name. */
+ if (!in_charset(m, "0123456789") && hostname_is_valid(m, 0))
+ c = strjoina(",argv", p ? "7" : "5", "=--machine=", m);
+ }
+
+ if (!e) {
+ char *t;
+
+ t = strndupa_safe(host, strcspn(host, ":/"));
+
+ e = bus_address_escape(t);
+ if (!e)
+ return -ENOMEM;
+ }
+
+ a = strjoin("unixexec:path=ssh,argv1=-xT", p ? ",argv2=-p,argv3=" : "", strempty(p),
+ ",argv", p ? "4" : "2", "=--,argv", p ? "5" : "3", "=", e,
+ ",argv", p ? "6" : "4", "=systemd-stdio-bridge", c);
+ if (!a)
+ return -ENOMEM;
+
+ return free_and_replace(b->address, a);
+}
+
+_public_ int sd_bus_open_system_remote(sd_bus **ret, const char *host) {
+ _cleanup_(bus_freep) sd_bus *b = NULL;
+ int r;
+
+ assert_return(host, -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ r = sd_bus_new(&b);
+ if (r < 0)
+ return r;
+
+ r = bus_set_address_system_remote(b, host);
+ if (r < 0)
+ return r;
+
+ b->bus_client = true;
+ b->trusted = false;
+ b->runtime_scope = RUNTIME_SCOPE_SYSTEM;
+ b->is_local = false;
+
+ r = sd_bus_start(b);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(b);
+ return 0;
+}
+
+int bus_set_address_machine(sd_bus *b, RuntimeScope runtime_scope, const char *machine) {
+ _cleanup_free_ char *a = NULL;
+ const char *rhs;
+
+ assert(b);
+ assert(machine);
+
+ rhs = strchr(machine, '@');
+ if (rhs || runtime_scope == RUNTIME_SCOPE_USER) {
+ _cleanup_free_ char *u = NULL, *eu = NULL, *erhs = NULL;
+
+ /* If there's an "@" in the container specification, we'll connect as a user specified at its
+ * left hand side, which is useful in combination with user=true. This isn't as trivial as it
+ * might sound: it's not sufficient to enter the container and connect to some socket there,
+ * since the --user socket path depends on $XDG_RUNTIME_DIR which is set via PAM. Thus, to be
+ * able to connect, we need to have a PAM session. Our way out? We use systemd-run to get
+ * into the container and acquire a PAM session there, and then invoke systemd-stdio-bridge
+ * in it, which propagates the bus transport to us. */
+
+ if (rhs) {
+ if (rhs > machine)
+ u = strndup(machine, rhs - machine);
+ else
+ u = getusername_malloc(); /* Empty user name, let's use the local one */
+ if (!u)
+ return -ENOMEM;
+
+ eu = bus_address_escape(u);
+ if (!eu)
+ return -ENOMEM;
+
+ rhs++;
+ } else {
+ /* No "@" specified but we shall connect to the user instance? Then assume root (and
+ * not a user named identically to the calling one). This means:
+ *
+ * --machine=foobar --user → connect to user bus of root user in container "foobar"
+ * --machine=@foobar --user → connect to user bus of user named like the calling user in container "foobar"
+ *
+ * Why? so that behaviour for "--machine=foobar --system" is roughly similar to
+ * "--machine=foobar --user": both times we unconditionally connect as root user
+ * regardless what the calling user is. */
+
+ rhs = machine;
+ }
+
+ if (!isempty(rhs)) {
+ erhs = bus_address_escape(rhs);
+ if (!erhs)
+ return -ENOMEM;
+ }
+
+ /* systemd-run -M… -PGq --wait -pUser=… -pPAMName=login systemd-stdio-bridge */
+
+ a = strjoin("unixexec:path=systemd-run,"
+ "argv1=-M", erhs ?: ".host", ","
+ "argv2=-PGq,"
+ "argv3=--wait,"
+ "argv4=-pUser%3d", eu ?: "root", ",",
+ "argv5=-pPAMName%3dlogin,"
+ "argv6=systemd-stdio-bridge");
+ if (!a)
+ return -ENOMEM;
+
+ if (runtime_scope == RUNTIME_SCOPE_USER) {
+ /* Ideally we'd use the "--user" switch to systemd-stdio-bridge here, but it's only
+ * available in recent systemd versions. Using the "-p" switch with the explicit path
+ * is a working alternative, and is compatible with older versions, hence that's what
+ * we use here. */
+ if (!strextend(&a, ",argv7=-punix:path%3d%24%7bXDG_RUNTIME_DIR%7d/bus"))
+ return -ENOMEM;
+ }
+ } else {
+ _cleanup_free_ char *e = NULL;
+
+ /* Just a container name, we can go the simple way, and just join the container, and connect
+ * to the well-known path of the system bus there. */
+
+ e = bus_address_escape(machine);
+ if (!e)
+ return -ENOMEM;
+
+ a = strjoin("x-machine-unix:machine=", e);
+ if (!a)
+ return -ENOMEM;
+ }
+
+ return free_and_replace(b->address, a);
+}
+
+static int user_and_machine_valid(const char *user_and_machine) {
+ const char *h;
+
+ /* Checks if a container specification in the form "user@container" or just "container" is valid.
+ *
+ * If the "@" syntax is used we'll allow either the "user" or the "container" part to be omitted, but
+ * not both. */
+
+ h = strchr(user_and_machine, '@');
+ if (!h)
+ h = user_and_machine;
+ else {
+ _cleanup_free_ char *user = NULL;
+
+ user = strndup(user_and_machine, h - user_and_machine);
+ if (!user)
+ return -ENOMEM;
+
+ if (!isempty(user) && !valid_user_group_name(user, VALID_USER_RELAX | VALID_USER_ALLOW_NUMERIC))
+ return false;
+
+ h++;
+
+ if (isempty(h))
+ return !isempty(user);
+ }
+
+ return hostname_is_valid(h, VALID_HOSTNAME_DOT_HOST);
+}
+
+static int user_and_machine_equivalent(const char *user_and_machine) {
+ _cleanup_free_ char *un = NULL;
+ const char *f;
+
+ /* Returns true if the specified user+machine name are actually equivalent to our own identity and
+ * our own host. If so we can shortcut things. Why bother? Because that way we don't have to fork
+ * off short-lived worker processes that are then unavailable for authentication and logging in the
+ * peer. Moreover joining a namespace requires privileges. If we are in the right namespace anyway,
+ * we can avoid permission problems thus. */
+
+ assert(user_and_machine);
+
+ /* Omitting the user name means that we shall use the same user name as we run as locally, which
+ * means we'll end up on the same host, let's shortcut */
+ if (streq(user_and_machine, "@.host"))
+ return true;
+
+ /* Otherwise, if we are root, then we can also allow the ".host" syntax, as that's the user this
+ * would connect to. */
+ uid_t uid = geteuid();
+
+ if (uid == 0 && STR_IN_SET(user_and_machine, ".host", "root@.host", "0@.host"))
+ return true;
+
+ /* Otherwise, we have to figure out our user id and name, and compare things with that. */
+ char buf[DECIMAL_STR_MAX(uid_t)];
+ xsprintf(buf, UID_FMT, uid);
+
+ f = startswith(user_and_machine, buf);
+ if (!f) {
+ un = getusername_malloc();
+ if (!un)
+ return -ENOMEM;
+
+ f = startswith(user_and_machine, un);
+ if (!f)
+ return false;
+ }
+
+ return STR_IN_SET(f, "@", "@.host");
+}
+
+_public_ int sd_bus_open_system_machine(sd_bus **ret, const char *user_and_machine) {
+ _cleanup_(bus_freep) sd_bus *b = NULL;
+ int r;
+
+ assert_return(user_and_machine, -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ if (user_and_machine_equivalent(user_and_machine))
+ return sd_bus_open_system(ret);
+
+ r = user_and_machine_valid(user_and_machine);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -EINVAL;
+
+ r = sd_bus_new(&b);
+ if (r < 0)
+ return r;
+
+ r = bus_set_address_machine(b, RUNTIME_SCOPE_SYSTEM, user_and_machine);
+ if (r < 0)
+ return r;
+
+ b->bus_client = true;
+ b->runtime_scope = RUNTIME_SCOPE_SYSTEM;
+
+ r = sd_bus_start(b);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(b);
+ return 0;
+}
+
+_public_ int sd_bus_open_user_machine(sd_bus **ret, const char *user_and_machine) {
+ _cleanup_(bus_freep) sd_bus *b = NULL;
+ int r;
+
+ assert_return(user_and_machine, -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ /* Shortcut things if we'd end up on this host and as the same user. */
+ if (user_and_machine_equivalent(user_and_machine))
+ return sd_bus_open_user(ret);
+
+ r = user_and_machine_valid(user_and_machine);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -EINVAL;
+
+ r = sd_bus_new(&b);
+ if (r < 0)
+ return r;
+
+ r = bus_set_address_machine(b, RUNTIME_SCOPE_USER, user_and_machine);
+ if (r < 0)
+ return r;
+
+ b->bus_client = true;
+ b->trusted = true;
+
+ r = sd_bus_start(b);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(b);
+ return 0;
+}
+
+_public_ void sd_bus_close(sd_bus *bus) {
+ if (!bus)
+ return;
+ if (bus->state == BUS_CLOSED)
+ return;
+ if (bus_origin_changed(bus))
+ return;
+
+ /* Don't leave ssh hanging around */
+ bus_kill_exec(bus);
+
+ bus_set_state(bus, BUS_CLOSED);
+
+ sd_bus_detach_event(bus);
+
+ /* Drop all queued messages so that they drop references to
+ * the bus object and the bus may be freed */
+ bus_reset_queues(bus);
+
+ bus_close_io_fds(bus);
+ bus_close_inotify_fd(bus);
+}
+
+_public_ sd_bus *sd_bus_close_unref(sd_bus *bus) {
+ if (!bus)
+ return NULL;
+ if (bus_origin_changed(bus))
+ return NULL;
+
+ sd_bus_close(bus);
+
+ return sd_bus_unref(bus);
+}
+
+_public_ sd_bus* sd_bus_flush_close_unref(sd_bus *bus) {
+ if (!bus)
+ return NULL;
+ if (bus_origin_changed(bus))
+ return NULL;
+
+ /* Have to do this before flush() to prevent hang */
+ bus_kill_exec(bus);
+ sd_bus_flush(bus);
+
+ return sd_bus_close_unref(bus);
+}
+
+void bus_enter_closing(sd_bus *bus) {
+ assert(bus);
+
+ if (!IN_SET(bus->state, BUS_WATCH_BIND, BUS_OPENING, BUS_AUTHENTICATING, BUS_HELLO, BUS_RUNNING))
+ return;
+
+ bus_set_state(bus, BUS_CLOSING);
+}
+
+/* Define manually so we can add the PID check */
+_public_ sd_bus *sd_bus_ref(sd_bus *bus) {
+ if (!bus)
+ return NULL;
+ if (bus_origin_changed(bus))
+ return NULL;
+
+ bus->n_ref++;
+
+ return bus;
+}
+
+_public_ sd_bus* sd_bus_unref(sd_bus *bus) {
+ if (!bus)
+ return NULL;
+ if (bus_origin_changed(bus))
+ return NULL;
+
+ assert(bus->n_ref > 0);
+ if (--bus->n_ref > 0)
+ return NULL;
+
+ return bus_free(bus);
+}
+
+_public_ int sd_bus_is_open(sd_bus *bus) {
+ if (!bus)
+ return 0;
+
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(!bus_origin_changed(bus), -ECHILD);
+
+ return BUS_IS_OPEN(bus->state);
+}
+
+_public_ int sd_bus_is_ready(sd_bus *bus) {
+ if (!bus)
+ return 0;
+
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(!bus_origin_changed(bus), -ECHILD);
+
+ return bus->state == BUS_RUNNING;
+}
+
+_public_ int sd_bus_can_send(sd_bus *bus, char type) {
+ int r;
+
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(bus->state != BUS_UNSET, -ENOTCONN);
+ assert_return(!bus_origin_changed(bus), -ECHILD);
+
+ if (bus->is_monitor)
+ return 0;
+
+ if (type == SD_BUS_TYPE_UNIX_FD) {
+ if (!bus->accept_fd)
+ return 0;
+
+ r = bus_ensure_running(bus);
+ if (r < 0)
+ return r;
+
+ return bus->can_fds;
+ }
+
+ return bus_type_is_valid(type);
+}
+
+_public_ int sd_bus_get_bus_id(sd_bus *bus, sd_id128_t *id) {
+ int r;
+
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(id, -EINVAL);
+ assert_return(!bus_origin_changed(bus), -ECHILD);
+
+ r = bus_ensure_running(bus);
+ if (r < 0)
+ return r;
+
+ *id = bus->server_id;
+ return 0;
+}
+
+#define COOKIE_CYCLED (UINT32_C(1) << 31)
+
+static uint64_t cookie_inc(uint64_t cookie) {
+
+ /* Stay within the 32-bit range, since classic D-Bus can't deal with more */
+ if (cookie >= UINT32_MAX)
+ return COOKIE_CYCLED; /* Don't go back to zero, but use the highest bit for checking
+ * whether we are looping. */
+
+ return cookie + 1;
+}
+
+static int next_cookie(sd_bus *b) {
+ uint64_t new_cookie;
+
+ assert(b);
+
+ new_cookie = cookie_inc(b->cookie);
+
+ /* Small optimization: don't bother with checking for cookie reuse until we overran cookiespace at
+ * least once, but then do it thorougly. */
+ if (FLAGS_SET(new_cookie, COOKIE_CYCLED)) {
+ uint32_t i;
+
+ /* Check if the cookie is currently in use. If so, pick the next one */
+ for (i = 0; i < COOKIE_CYCLED; i++) {
+ if (!ordered_hashmap_contains(b->reply_callbacks, &new_cookie))
+ goto good;
+
+ new_cookie = cookie_inc(new_cookie);
+ }
+
+ /* Can't fulfill request */
+ return -EBUSY;
+ }
+
+good:
+ b->cookie = new_cookie;
+ return 0;
+}
+
+static int bus_seal_message(sd_bus *b, sd_bus_message *m, usec_t timeout) {
+ int r;
+
+ assert(b);
+ assert(m);
+
+ if (m->sealed) {
+ /* If we copy the same message to multiple
+ * destinations, avoid using the same cookie
+ * numbers. */
+ b->cookie = MAX(b->cookie, BUS_MESSAGE_COOKIE(m));
+ return 0;
+ }
+
+ if (timeout == 0) {
+ r = sd_bus_get_method_call_timeout(b, &timeout);
+ if (r < 0)
+ return r;
+ }
+
+ if (!m->sender && b->patch_sender) {
+ r = sd_bus_message_set_sender(m, b->patch_sender);
+ if (r < 0)
+ return r;
+ }
+
+ r = next_cookie(b);
+ if (r < 0)
+ return r;
+
+ return sd_bus_message_seal(m, b->cookie, timeout);
+}
+
+static int bus_remarshal_message(sd_bus *b, sd_bus_message **m) {
+ bool remarshal = false;
+
+ assert(b);
+
+ /* wrong packet version */
+ if (b->message_version != 0 && b->message_version != (*m)->header->version)
+ remarshal = true;
+
+ /* wrong packet endianness */
+ if (b->message_endian != 0 && b->message_endian != (*m)->header->endian)
+ remarshal = true;
+
+ return remarshal ? bus_message_remarshal(b, m) : 0;
+}
+
+int bus_seal_synthetic_message(sd_bus *b, sd_bus_message *m) {
+ assert(b);
+ assert(m);
+
+ /* Fake some timestamps, if they were requested, and not
+ * already initialized */
+ if (b->attach_timestamp) {
+ if (m->realtime <= 0)
+ m->realtime = now(CLOCK_REALTIME);
+
+ if (m->monotonic <= 0)
+ m->monotonic = now(CLOCK_MONOTONIC);
+ }
+
+ /* The bus specification says the serial number cannot be 0,
+ * hence let's fill something in for synthetic messages. Since
+ * synthetic messages might have a fake sender and we don't
+ * want to interfere with the real sender's serial numbers we
+ * pick a fixed, artificial one. */
+ return sd_bus_message_seal(m, UINT32_MAX, 0);
+}
+
+static int bus_write_message(sd_bus *bus, sd_bus_message *m, size_t *idx) {
+ int r;
+
+ assert(bus);
+ assert(m);
+
+ r = bus_socket_write_message(bus, m, idx);
+ if (r <= 0)
+ return r;
+
+ if (*idx >= BUS_MESSAGE_SIZE(m))
+ log_debug("Sent message type=%s sender=%s destination=%s path=%s interface=%s member=%s"
+ " cookie=%" PRIu64 " reply_cookie=%" PRIu64
+ " signature=%s error-name=%s error-message=%s",
+ bus_message_type_to_string(m->header->type),
+ strna(sd_bus_message_get_sender(m)),
+ strna(sd_bus_message_get_destination(m)),
+ strna(sd_bus_message_get_path(m)),
+ strna(sd_bus_message_get_interface(m)),
+ strna(sd_bus_message_get_member(m)),
+ BUS_MESSAGE_COOKIE(m),
+ m->reply_cookie,
+ strna(m->root_container.signature),
+ strna(m->error.name),
+ strna(m->error.message));
+
+ return r;
+}
+
+static int dispatch_wqueue(sd_bus *bus) {
+ int r, ret = 0;
+
+ assert(bus);
+ assert(IN_SET(bus->state, BUS_RUNNING, BUS_HELLO));
+
+ while (bus->wqueue_size > 0) {
+
+ r = bus_write_message(bus, bus->wqueue[0], &bus->windex);
+ if (r < 0)
+ return r;
+ else if (r == 0)
+ /* Didn't do anything this time */
+ return ret;
+ else if (bus->windex >= BUS_MESSAGE_SIZE(bus->wqueue[0])) {
+ /* Fully written. Let's drop the entry from
+ * the queue.
+ *
+ * This isn't particularly optimized, but
+ * well, this is supposed to be our worst-case
+ * buffer only, and the socket buffer is
+ * supposed to be our primary buffer, and if
+ * it got full, then all bets are off
+ * anyway. */
+
+ bus->wqueue_size--;
+ bus_message_unref_queued(bus->wqueue[0], bus);
+ memmove(bus->wqueue, bus->wqueue + 1, sizeof(sd_bus_message*) * bus->wqueue_size);
+ bus->windex = 0;
+
+ ret = 1;
+ }
+ }
+
+ return ret;
+}
+
+static int bus_read_message(sd_bus *bus) {
+ assert(bus);
+
+ return bus_socket_read_message(bus);
+}
+
+int bus_rqueue_make_room(sd_bus *bus) {
+ assert(bus);
+
+ if (bus->rqueue_size >= BUS_RQUEUE_MAX)
+ return -ENOBUFS;
+
+ if (!GREEDY_REALLOC(bus->rqueue, bus->rqueue_size + 1))
+ return -ENOMEM;
+
+ return 0;
+}
+
+static void rqueue_drop_one(sd_bus *bus, size_t i) {
+ assert(bus);
+ assert(i < bus->rqueue_size);
+
+ bus_message_unref_queued(bus->rqueue[i], bus);
+ memmove(bus->rqueue + i, bus->rqueue + i + 1, sizeof(sd_bus_message*) * (bus->rqueue_size - i - 1));
+ bus->rqueue_size--;
+}
+
+static int dispatch_rqueue(sd_bus *bus, sd_bus_message **m) {
+ int r, ret = 0;
+
+ assert(bus);
+ assert(m);
+ assert(IN_SET(bus->state, BUS_RUNNING, BUS_HELLO));
+
+ for (;;) {
+ if (bus->rqueue_size > 0) {
+ /* Dispatch a queued message */
+ *m = sd_bus_message_ref(bus->rqueue[0]);
+ rqueue_drop_one(bus, 0);
+ return 1;
+ }
+
+ /* Try to read a new message */
+ r = bus_read_message(bus);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ *m = NULL;
+ return ret;
+ }
+
+ ret = 1;
+ }
+}
+
+_public_ int sd_bus_send(sd_bus *bus, sd_bus_message *_m, uint64_t *cookie) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = sd_bus_message_ref(_m);
+ int r;
+
+ assert_return(m, -EINVAL);
+
+ if (bus)
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ else
+ assert_return(bus = m->bus, -ENOTCONN);
+ assert_return(!bus_origin_changed(bus), -ECHILD);
+
+ if (!BUS_IS_OPEN(bus->state))
+ return -ENOTCONN;
+
+ if (m->n_fds > 0) {
+ r = sd_bus_can_send(bus, SD_BUS_TYPE_UNIX_FD);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -EOPNOTSUPP;
+ }
+
+ /* If the cookie number isn't kept, then we know that no reply
+ * is expected */
+ if (!cookie && !m->sealed)
+ m->header->flags |= BUS_MESSAGE_NO_REPLY_EXPECTED;
+
+ r = bus_seal_message(bus, m, 0);
+ if (r < 0)
+ return r;
+
+ /* Remarshall if we have to. This will possibly unref the
+ * message and place a replacement in m */
+ r = bus_remarshal_message(bus, &m);
+ if (r < 0)
+ return r;
+
+ /* If this is a reply and no reply was requested, then let's
+ * suppress this, if we can */
+ if (m->dont_send)
+ goto finish;
+
+ if (IN_SET(bus->state, BUS_RUNNING, BUS_HELLO) && bus->wqueue_size <= 0) {
+ size_t idx = 0;
+
+ r = bus_write_message(bus, m, &idx);
+ if (ERRNO_IS_NEG_DISCONNECT(r)) {
+ bus_enter_closing(bus);
+ return -ECONNRESET;
+ } else if (r < 0)
+ return r;
+
+ if (idx < BUS_MESSAGE_SIZE(m)) {
+ /* Wasn't fully written. So let's remember how
+ * much was written. Note that the first entry
+ * of the wqueue array is always allocated so
+ * that we always can remember how much was
+ * written. */
+ bus->wqueue[0] = bus_message_ref_queued(m, bus);
+ bus->wqueue_size = 1;
+ bus->windex = idx;
+ }
+
+ } else {
+ /* Just append it to the queue. */
+
+ if (bus->wqueue_size >= BUS_WQUEUE_MAX)
+ return -ENOBUFS;
+
+ if (!GREEDY_REALLOC(bus->wqueue, bus->wqueue_size + 1))
+ return -ENOMEM;
+
+ bus->wqueue[bus->wqueue_size++] = bus_message_ref_queued(m, bus);
+ }
+
+finish:
+ if (cookie)
+ *cookie = BUS_MESSAGE_COOKIE(m);
+
+ return 1;
+}
+
+_public_ int sd_bus_send_to(sd_bus *bus, sd_bus_message *m, const char *destination, uint64_t *cookie) {
+ int r;
+
+ assert_return(m, -EINVAL);
+
+ if (bus)
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ else
+ assert_return(bus = m->bus, -ENOTCONN);
+ assert_return(!bus_origin_changed(bus), -ECHILD);
+
+ if (!BUS_IS_OPEN(bus->state))
+ return -ENOTCONN;
+
+ if (!streq_ptr(m->destination, destination)) {
+
+ if (!destination)
+ return -EEXIST;
+
+ r = sd_bus_message_set_destination(m, destination);
+ if (r < 0)
+ return r;
+ }
+
+ return sd_bus_send(bus, m, cookie);
+}
+
+static usec_t calc_elapse(sd_bus *bus, uint64_t usec) {
+ assert(bus);
+
+ assert_cc(sizeof(usec_t) == sizeof(uint64_t));
+
+ if (usec == USEC_INFINITY)
+ return 0;
+
+ /* We start all timeouts the instant we enter BUS_HELLO/BUS_RUNNING state, so that the don't run in parallel
+ * with any connection setup states. Hence, if a method callback is started earlier than that we just store the
+ * relative timestamp, and afterwards the absolute one. */
+
+ if (IN_SET(bus->state, BUS_WATCH_BIND, BUS_OPENING, BUS_AUTHENTICATING))
+ return usec;
+ else
+ return usec_add(now(CLOCK_MONOTONIC), usec);
+}
+
+static int timeout_compare(const void *a, const void *b) {
+ const struct reply_callback *x = a, *y = b;
+
+ if (x->timeout_usec != 0 && y->timeout_usec == 0)
+ return -1;
+
+ if (x->timeout_usec == 0 && y->timeout_usec != 0)
+ return 1;
+
+ return CMP(x->timeout_usec, y->timeout_usec);
+}
+
+_public_ int sd_bus_call_async(
+ sd_bus *bus,
+ sd_bus_slot **slot,
+ sd_bus_message *_m,
+ sd_bus_message_handler_t callback,
+ void *userdata,
+ uint64_t usec) {
+
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = sd_bus_message_ref(_m);
+ _cleanup_(sd_bus_slot_unrefp) sd_bus_slot *s = NULL;
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->header->type == SD_BUS_MESSAGE_METHOD_CALL, -EINVAL);
+ assert_return(!m->sealed || (!!callback == !(m->header->flags & BUS_MESSAGE_NO_REPLY_EXPECTED)), -EINVAL);
+
+ if (bus)
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ else
+ assert_return(bus = m->bus, -ENOTCONN);
+ assert_return(!bus_origin_changed(bus), -ECHILD);
+
+ if (!BUS_IS_OPEN(bus->state))
+ return -ENOTCONN;
+
+ /* If no callback is specified and there's no interest in a slot, then there's no reason to ask for a reply */
+ if (!callback && !slot && !m->sealed)
+ m->header->flags |= BUS_MESSAGE_NO_REPLY_EXPECTED;
+
+ r = ordered_hashmap_ensure_allocated(&bus->reply_callbacks, &uint64_hash_ops);
+ if (r < 0)
+ return r;
+
+ r = prioq_ensure_allocated(&bus->reply_callbacks_prioq, timeout_compare);
+ if (r < 0)
+ return r;
+
+ r = bus_seal_message(bus, m, usec);
+ if (r < 0)
+ return r;
+
+ r = bus_remarshal_message(bus, &m);
+ if (r < 0)
+ return r;
+
+ if (slot || callback) {
+ s = bus_slot_allocate(bus, !slot, BUS_REPLY_CALLBACK, sizeof(struct reply_callback), userdata);
+ if (!s)
+ return -ENOMEM;
+
+ s->reply_callback.callback = callback;
+
+ s->reply_callback.cookie = BUS_MESSAGE_COOKIE(m);
+ r = ordered_hashmap_put(bus->reply_callbacks, &s->reply_callback.cookie, &s->reply_callback);
+ if (r < 0) {
+ s->reply_callback.cookie = 0;
+ return r;
+ }
+
+ s->reply_callback.timeout_usec = calc_elapse(bus, m->timeout);
+ if (s->reply_callback.timeout_usec != 0) {
+ r = prioq_put(bus->reply_callbacks_prioq, &s->reply_callback, &s->reply_callback.prioq_idx);
+ if (r < 0) {
+ s->reply_callback.timeout_usec = 0;
+ return r;
+ }
+ }
+ }
+
+ r = sd_bus_send(bus, m, s ? &s->reply_callback.cookie : NULL);
+ if (r < 0)
+ return r;
+
+ if (slot)
+ *slot = s;
+ s = NULL;
+
+ return r;
+}
+
+int bus_ensure_running(sd_bus *bus) {
+ int r;
+
+ assert(bus);
+
+ if (bus->state == BUS_RUNNING)
+ return 1;
+
+ for (;;) {
+ if (IN_SET(bus->state, BUS_UNSET, BUS_CLOSED, BUS_CLOSING))
+ return -ENOTCONN;
+
+ r = sd_bus_process(bus, NULL);
+ if (r < 0)
+ return r;
+ if (bus->state == BUS_RUNNING)
+ return 1;
+ if (r > 0)
+ continue;
+
+ r = sd_bus_wait(bus, UINT64_MAX);
+ if (r < 0)
+ return r;
+ }
+}
+
+_public_ int sd_bus_call(
+ sd_bus *bus,
+ sd_bus_message *_m,
+ uint64_t usec,
+ sd_bus_error *error,
+ sd_bus_message **reply) {
+
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = sd_bus_message_ref(_m);
+ usec_t timeout;
+ uint64_t cookie;
+ size_t i;
+ int r;
+
+ bus_assert_return(m, -EINVAL, error);
+ bus_assert_return(m->header->type == SD_BUS_MESSAGE_METHOD_CALL, -EINVAL, error);
+ bus_assert_return(!(m->header->flags & BUS_MESSAGE_NO_REPLY_EXPECTED), -EINVAL, error);
+ bus_assert_return(!bus_error_is_dirty(error), -EINVAL, error);
+
+ if (bus)
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ else
+ assert_return(bus = m->bus, -ENOTCONN);
+ bus_assert_return(!bus_origin_changed(bus), -ECHILD, error);
+
+ if (!BUS_IS_OPEN(bus->state)) {
+ r = -ENOTCONN;
+ goto fail;
+ }
+
+ r = bus_ensure_running(bus);
+ if (r < 0)
+ goto fail;
+
+ i = bus->rqueue_size;
+
+ r = bus_seal_message(bus, m, usec);
+ if (r < 0)
+ goto fail;
+
+ r = bus_remarshal_message(bus, &m);
+ if (r < 0)
+ goto fail;
+
+ r = sd_bus_send(bus, m, &cookie);
+ if (r < 0)
+ goto fail;
+
+ timeout = calc_elapse(bus, m->timeout);
+
+ for (;;) {
+ usec_t left;
+
+ while (i < bus->rqueue_size) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *incoming = NULL;
+
+ incoming = sd_bus_message_ref(bus->rqueue[i]);
+
+ if (incoming->reply_cookie == cookie) {
+ /* Found a match! */
+
+ rqueue_drop_one(bus, i);
+ log_debug_bus_message(incoming);
+
+ if (incoming->header->type == SD_BUS_MESSAGE_METHOD_RETURN) {
+
+ if (incoming->n_fds <= 0 || bus->accept_fd) {
+ if (reply)
+ *reply = TAKE_PTR(incoming);
+
+ return 1;
+ }
+
+ return sd_bus_error_set(error, SD_BUS_ERROR_INCONSISTENT_MESSAGE,
+ "Reply message contained file descriptors which I couldn't accept. Sorry.");
+
+ } else if (incoming->header->type == SD_BUS_MESSAGE_METHOD_ERROR)
+ return sd_bus_error_copy(error, &incoming->error);
+ else {
+ r = -EIO;
+ goto fail;
+ }
+
+ } else if (BUS_MESSAGE_COOKIE(incoming) == cookie &&
+ bus->unique_name &&
+ incoming->sender &&
+ streq(bus->unique_name, incoming->sender)) {
+
+ rqueue_drop_one(bus, i);
+
+ /* Our own message? Somebody is trying to send its own client a message,
+ * let's not dead-lock, let's fail immediately. */
+
+ r = -ELOOP;
+ goto fail;
+ }
+
+ /* Try to read more, right-away */
+ i++;
+ }
+
+ r = bus_read_message(bus);
+ if (r < 0) {
+ if (ERRNO_IS_DISCONNECT(r)) {
+ bus_enter_closing(bus);
+ r = -ECONNRESET;
+ }
+
+ goto fail;
+ }
+ if (r > 0)
+ continue;
+
+ if (timeout > 0) {
+ usec_t n;
+
+ n = now(CLOCK_MONOTONIC);
+ if (n >= timeout) {
+ r = -ETIMEDOUT;
+ goto fail;
+ }
+
+ left = timeout - n;
+ } else
+ left = UINT64_MAX;
+
+ r = bus_poll(bus, true, left);
+ if (ERRNO_IS_NEG_TRANSIENT(r))
+ continue;
+ if (r < 0)
+ goto fail;
+ if (r == 0) {
+ r = -ETIMEDOUT;
+ goto fail;
+ }
+
+ r = dispatch_wqueue(bus);
+ if (r < 0) {
+ if (ERRNO_IS_DISCONNECT(r)) {
+ bus_enter_closing(bus);
+ r = -ECONNRESET;
+ }
+
+ goto fail;
+ }
+ }
+
+fail:
+ return sd_bus_error_set_errno(error, r);
+}
+
+_public_ int sd_bus_get_fd(sd_bus *bus) {
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(bus->input_fd == bus->output_fd, -EPERM);
+ assert_return(!bus_origin_changed(bus), -ECHILD);
+
+ if (bus->state == BUS_CLOSED)
+ return -ENOTCONN;
+
+ if (bus->inotify_fd >= 0)
+ return bus->inotify_fd;
+
+ if (bus->input_fd >= 0)
+ return bus->input_fd;
+
+ return -ENOTCONN;
+}
+
+_public_ int sd_bus_get_events(sd_bus *bus) {
+ int flags = 0;
+
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(!bus_origin_changed(bus), -ECHILD);
+
+ switch (bus->state) {
+
+ case BUS_UNSET:
+ case BUS_CLOSED:
+ return -ENOTCONN;
+
+ case BUS_WATCH_BIND:
+ flags |= POLLIN;
+ break;
+
+ case BUS_OPENING:
+ flags |= POLLOUT;
+ break;
+
+ case BUS_AUTHENTICATING:
+ if (bus_socket_auth_needs_write(bus))
+ flags |= POLLOUT;
+
+ flags |= POLLIN;
+ break;
+
+ case BUS_RUNNING:
+ case BUS_HELLO:
+ if (bus->rqueue_size <= 0)
+ flags |= POLLIN;
+ if (bus->wqueue_size > 0)
+ flags |= POLLOUT;
+ break;
+
+ case BUS_CLOSING:
+ break;
+
+ default:
+ assert_not_reached();
+ }
+
+ return flags;
+}
+
+_public_ int sd_bus_get_timeout(sd_bus *bus, uint64_t *timeout_usec) {
+ struct reply_callback *c;
+
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(timeout_usec, -EINVAL);
+ assert_return(!bus_origin_changed(bus), -ECHILD);
+
+ if (!BUS_IS_OPEN(bus->state) && bus->state != BUS_CLOSING)
+ return -ENOTCONN;
+
+ if (bus->track_queue) {
+ *timeout_usec = 0;
+ return 1;
+ }
+
+ switch (bus->state) {
+
+ case BUS_AUTHENTICATING:
+ *timeout_usec = bus->auth_timeout;
+ return 1;
+
+ case BUS_RUNNING:
+ case BUS_HELLO:
+ if (bus->rqueue_size > 0) {
+ *timeout_usec = 0;
+ return 1;
+ }
+
+ c = prioq_peek(bus->reply_callbacks_prioq);
+ if (!c) {
+ *timeout_usec = UINT64_MAX;
+ return 0;
+ }
+
+ if (c->timeout_usec == 0) {
+ *timeout_usec = UINT64_MAX;
+ return 0;
+ }
+
+ *timeout_usec = c->timeout_usec;
+ return 1;
+
+ case BUS_CLOSING:
+ *timeout_usec = 0;
+ return 1;
+
+ case BUS_WATCH_BIND:
+ case BUS_OPENING:
+ *timeout_usec = UINT64_MAX;
+ return 0;
+
+ default:
+ assert_not_reached();
+ }
+}
+
+static int process_timeout(sd_bus *bus) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error_buffer = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message* m = NULL;
+ struct reply_callback *c;
+ sd_bus_slot *slot;
+ bool is_hello;
+ usec_t n;
+ int r;
+
+ assert(bus);
+ assert(IN_SET(bus->state, BUS_RUNNING, BUS_HELLO));
+
+ c = prioq_peek(bus->reply_callbacks_prioq);
+ if (!c)
+ return 0;
+
+ n = now(CLOCK_MONOTONIC);
+ if (c->timeout_usec > n)
+ return 0;
+
+ r = bus_message_new_synthetic_error(
+ bus,
+ c->cookie,
+ &SD_BUS_ERROR_MAKE_CONST(SD_BUS_ERROR_NO_REPLY, "Method call timed out"),
+ &m);
+ if (r < 0)
+ return r;
+
+ m->read_counter = ++bus->read_counter;
+
+ r = bus_seal_synthetic_message(bus, m);
+ if (r < 0)
+ return r;
+
+ assert_se(prioq_pop(bus->reply_callbacks_prioq) == c);
+ c->timeout_usec = 0;
+
+ ordered_hashmap_remove(bus->reply_callbacks, &c->cookie);
+ c->cookie = 0;
+
+ slot = container_of(c, sd_bus_slot, reply_callback);
+
+ bus->iteration_counter++;
+
+ is_hello = bus->state == BUS_HELLO && c->callback == hello_callback;
+
+ bus->current_message = m;
+ bus->current_slot = sd_bus_slot_ref(slot);
+ bus->current_handler = c->callback;
+ bus->current_userdata = slot->userdata;
+ r = c->callback(m, slot->userdata, &error_buffer);
+ bus->current_userdata = NULL;
+ bus->current_handler = NULL;
+ bus->current_slot = NULL;
+ bus->current_message = NULL;
+
+ if (slot->floating)
+ bus_slot_disconnect(slot, true);
+
+ sd_bus_slot_unref(slot);
+
+ /* When this is the hello message and it timed out, then make sure to propagate the error up, don't just log
+ * and ignore the callback handler's return value. */
+ if (is_hello)
+ return r;
+
+ return bus_maybe_reply_error(m, r, &error_buffer);
+}
+
+static int process_hello(sd_bus *bus, sd_bus_message *m) {
+ assert(bus);
+ assert(m);
+
+ if (bus->state != BUS_HELLO)
+ return 0;
+
+ /* Let's make sure the first message on the bus is the HELLO
+ * reply. But note that we don't actually parse the message
+ * here (we leave that to the usual handling), we just verify
+ * we don't let any earlier msg through. */
+
+ if (!IN_SET(m->header->type, SD_BUS_MESSAGE_METHOD_RETURN, SD_BUS_MESSAGE_METHOD_ERROR))
+ return -EIO;
+
+ if (m->reply_cookie != 1)
+ return -EIO;
+
+ return 0;
+}
+
+static int process_reply(sd_bus *bus, sd_bus_message *m) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *synthetic_reply = NULL;
+ _cleanup_(sd_bus_error_free) sd_bus_error error_buffer = SD_BUS_ERROR_NULL;
+ struct reply_callback *c;
+ sd_bus_slot *slot;
+ bool is_hello;
+ int r;
+
+ assert(bus);
+ assert(m);
+
+ if (!IN_SET(m->header->type, SD_BUS_MESSAGE_METHOD_RETURN, SD_BUS_MESSAGE_METHOD_ERROR))
+ return 0;
+
+ if (m->destination && bus->unique_name && !streq_ptr(m->destination, bus->unique_name))
+ return 0;
+
+ c = ordered_hashmap_remove(bus->reply_callbacks, &m->reply_cookie);
+ if (!c)
+ return 0;
+
+ c->cookie = 0;
+
+ slot = container_of(c, sd_bus_slot, reply_callback);
+
+ if (m->n_fds > 0 && !bus->accept_fd) {
+
+ /* If the reply contained a file descriptor which we
+ * didn't want we pass an error instead. */
+
+ r = bus_message_new_synthetic_error(
+ bus,
+ m->reply_cookie,
+ &SD_BUS_ERROR_MAKE_CONST(SD_BUS_ERROR_INCONSISTENT_MESSAGE, "Reply message contained file descriptor"),
+ &synthetic_reply);
+ if (r < 0)
+ return r;
+
+ /* Copy over original timestamp */
+ synthetic_reply->realtime = m->realtime;
+ synthetic_reply->monotonic = m->monotonic;
+ synthetic_reply->seqnum = m->seqnum;
+ synthetic_reply->read_counter = m->read_counter;
+
+ r = bus_seal_synthetic_message(bus, synthetic_reply);
+ if (r < 0)
+ return r;
+
+ m = synthetic_reply;
+ } else {
+ r = sd_bus_message_rewind(m, true);
+ if (r < 0)
+ return r;
+ }
+
+ if (c->timeout_usec != 0) {
+ prioq_remove(bus->reply_callbacks_prioq, c, &c->prioq_idx);
+ c->timeout_usec = 0;
+ }
+
+ is_hello = bus->state == BUS_HELLO && c->callback == hello_callback;
+
+ bus->current_slot = sd_bus_slot_ref(slot);
+ bus->current_handler = c->callback;
+ bus->current_userdata = slot->userdata;
+ r = c->callback(m, slot->userdata, &error_buffer);
+ bus->current_userdata = NULL;
+ bus->current_handler = NULL;
+ bus->current_slot = NULL;
+
+ if (slot->floating)
+ bus_slot_disconnect(slot, true);
+
+ sd_bus_slot_unref(slot);
+
+ /* When this is the hello message and it failed, then make sure to propagate the error up, don't just log and
+ * ignore the callback handler's return value. */
+ if (is_hello)
+ return r;
+
+ return bus_maybe_reply_error(m, r, &error_buffer);
+}
+
+static int process_filter(sd_bus *bus, sd_bus_message *m) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error_buffer = SD_BUS_ERROR_NULL;
+ int r;
+
+ assert(bus);
+ assert(m);
+
+ do {
+ bus->filter_callbacks_modified = false;
+
+ LIST_FOREACH(callbacks, l, bus->filter_callbacks) {
+ sd_bus_slot *slot;
+
+ if (bus->filter_callbacks_modified)
+ break;
+
+ /* Don't run this more than once per iteration */
+ if (l->last_iteration == bus->iteration_counter)
+ continue;
+
+ l->last_iteration = bus->iteration_counter;
+
+ r = sd_bus_message_rewind(m, true);
+ if (r < 0)
+ return r;
+
+ slot = container_of(l, sd_bus_slot, filter_callback);
+
+ bus->current_slot = sd_bus_slot_ref(slot);
+ bus->current_handler = l->callback;
+ bus->current_userdata = slot->userdata;
+ r = l->callback(m, slot->userdata, &error_buffer);
+ bus->current_userdata = NULL;
+ bus->current_handler = NULL;
+ bus->current_slot = sd_bus_slot_unref(slot);
+
+ r = bus_maybe_reply_error(m, r, &error_buffer);
+ if (r != 0)
+ return r;
+
+ }
+
+ } while (bus->filter_callbacks_modified);
+
+ return 0;
+}
+
+static int process_match(sd_bus *bus, sd_bus_message *m) {
+ int r;
+
+ assert(bus);
+ assert(m);
+
+ do {
+ bus->match_callbacks_modified = false;
+
+ r = bus_match_run(bus, &bus->match_callbacks, m);
+ if (r != 0)
+ return r;
+
+ } while (bus->match_callbacks_modified);
+
+ return 0;
+}
+
+static int process_builtin(sd_bus *bus, sd_bus_message *m) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ int r;
+
+ assert(bus);
+ assert(m);
+
+ if (bus->is_monitor)
+ return 0;
+
+ if (bus->manual_peer_interface)
+ return 0;
+
+ if (m->header->type != SD_BUS_MESSAGE_METHOD_CALL)
+ return 0;
+
+ if (!streq_ptr(m->interface, "org.freedesktop.DBus.Peer"))
+ return 0;
+
+ if (m->header->flags & BUS_MESSAGE_NO_REPLY_EXPECTED)
+ return 1;
+
+ if (streq_ptr(m->member, "Ping"))
+ r = sd_bus_message_new_method_return(m, &reply);
+ else if (streq_ptr(m->member, "GetMachineId")) {
+ sd_id128_t id;
+
+ r = sd_id128_get_machine(&id);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_new_method_return(m, &reply);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(reply, "s", SD_ID128_TO_STRING(id));
+ } else {
+ r = sd_bus_message_new_method_errorf(
+ m, &reply,
+ SD_BUS_ERROR_UNKNOWN_METHOD,
+ "Unknown method '%s' on interface '%s'.", m->member, m->interface);
+ }
+ if (r < 0)
+ return r;
+
+ r = sd_bus_send(bus, reply, NULL);
+ if (r < 0)
+ return r;
+
+ return 1;
+}
+
+static int process_fd_check(sd_bus *bus, sd_bus_message *m) {
+ assert(bus);
+ assert(m);
+
+ /* If we got a message with a file descriptor which we didn't
+ * want to accept, then let's drop it. How can this even
+ * happen? For example, when the kernel queues a message into
+ * an activatable names's queue which allows fds, and then is
+ * delivered to us later even though we ourselves did not
+ * negotiate it. */
+
+ if (bus->is_monitor)
+ return 0;
+
+ if (m->n_fds <= 0)
+ return 0;
+
+ if (bus->accept_fd)
+ return 0;
+
+ if (m->header->type != SD_BUS_MESSAGE_METHOD_CALL)
+ return 1; /* just eat it up */
+
+ return sd_bus_reply_method_errorf(m, SD_BUS_ERROR_INCONSISTENT_MESSAGE,
+ "Message contains file descriptors, which I cannot accept. Sorry.");
+}
+
+static int process_message(sd_bus *bus, sd_bus_message *m) {
+ _unused_ _cleanup_(log_context_unrefp) LogContext *c = NULL;
+ int r;
+
+ assert(bus);
+ assert(m);
+
+ bus->current_message = m;
+ bus->iteration_counter++;
+
+ if (log_context_enabled())
+ c = log_context_new_strv_consume(bus_message_make_log_fields(m));
+
+ log_debug_bus_message(m);
+
+ r = process_hello(bus, m);
+ if (r != 0)
+ goto finish;
+
+ r = process_reply(bus, m);
+ if (r != 0)
+ goto finish;
+
+ r = process_fd_check(bus, m);
+ if (r != 0)
+ goto finish;
+
+ r = process_filter(bus, m);
+ if (r != 0)
+ goto finish;
+
+ r = process_match(bus, m);
+ if (r != 0)
+ goto finish;
+
+ r = process_builtin(bus, m);
+ if (r != 0)
+ goto finish;
+
+ r = bus_process_object(bus, m);
+
+finish:
+ bus->current_message = NULL;
+ return r;
+}
+
+static int dispatch_track(sd_bus *bus) {
+ assert(bus);
+
+ if (!bus->track_queue)
+ return 0;
+
+ bus_track_dispatch(bus->track_queue);
+ return 1;
+}
+
+static int process_running(sd_bus *bus, sd_bus_message **ret) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ int r;
+
+ assert(bus);
+ assert(IN_SET(bus->state, BUS_RUNNING, BUS_HELLO));
+
+ r = process_timeout(bus);
+ if (r != 0)
+ goto null_message;
+
+ r = dispatch_wqueue(bus);
+ if (r != 0)
+ goto null_message;
+
+ r = dispatch_track(bus);
+ if (r != 0)
+ goto null_message;
+
+ r = dispatch_rqueue(bus, &m);
+ if (r < 0)
+ return r;
+ if (!m)
+ goto null_message;
+
+ r = process_message(bus, m);
+ if (r != 0)
+ goto null_message;
+
+ if (ret) {
+ r = sd_bus_message_rewind(m, true);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(m);
+ return 1;
+ }
+
+ if (m->header->type == SD_BUS_MESSAGE_METHOD_CALL) {
+
+ log_debug("Unprocessed message call sender=%s object=%s interface=%s member=%s",
+ strna(sd_bus_message_get_sender(m)),
+ strna(sd_bus_message_get_path(m)),
+ strna(sd_bus_message_get_interface(m)),
+ strna(sd_bus_message_get_member(m)));
+
+ r = sd_bus_reply_method_errorf(
+ m,
+ SD_BUS_ERROR_UNKNOWN_OBJECT,
+ "Unknown object '%s'.", m->path);
+ if (r < 0)
+ return r;
+ }
+
+ return 1;
+
+null_message:
+ if (r >= 0 && ret)
+ *ret = NULL;
+
+ return r;
+}
+
+static int bus_exit_now(sd_bus *bus, sd_event *event) {
+ assert(bus);
+
+ /* Exit due to close, if this is requested. If this is bus object is attached to an event source, invokes
+ * sd_event_exit(), otherwise invokes libc exit(). */
+
+ if (bus->exited) /* did we already exit? */
+ return 0;
+ if (!bus->exit_triggered) /* was the exit condition triggered? */
+ return 0;
+ if (!bus->exit_on_disconnect) /* Shall we actually exit on disconnection? */
+ return 0;
+
+ bus->exited = true; /* never exit more than once */
+
+ log_debug("Bus connection disconnected, exiting.");
+
+ if (!event)
+ event = bus->event;
+
+ if (event)
+ return sd_event_exit(event, EXIT_FAILURE);
+ else
+ exit(EXIT_FAILURE);
+
+ assert_not_reached();
+}
+
+static int process_closing_reply_callback(sd_bus *bus, struct reply_callback *c) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error_buffer = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ sd_bus_slot *slot;
+ int r;
+
+ assert(bus);
+ assert(c);
+
+ r = bus_message_new_synthetic_error(
+ bus,
+ c->cookie,
+ &SD_BUS_ERROR_MAKE_CONST(SD_BUS_ERROR_NO_REPLY, "Connection terminated"),
+ &m);
+ if (r < 0)
+ return r;
+
+ m->read_counter = ++bus->read_counter;
+
+ r = bus_seal_synthetic_message(bus, m);
+ if (r < 0)
+ return r;
+
+ if (c->timeout_usec != 0) {
+ prioq_remove(bus->reply_callbacks_prioq, c, &c->prioq_idx);
+ c->timeout_usec = 0;
+ }
+
+ ordered_hashmap_remove(bus->reply_callbacks, &c->cookie);
+ c->cookie = 0;
+
+ slot = container_of(c, sd_bus_slot, reply_callback);
+
+ bus->iteration_counter++;
+
+ bus->current_message = m;
+ bus->current_slot = sd_bus_slot_ref(slot);
+ bus->current_handler = c->callback;
+ bus->current_userdata = slot->userdata;
+ r = c->callback(m, slot->userdata, &error_buffer);
+ bus->current_userdata = NULL;
+ bus->current_handler = NULL;
+ bus->current_slot = NULL;
+ bus->current_message = NULL;
+
+ if (slot->floating)
+ bus_slot_disconnect(slot, true);
+
+ sd_bus_slot_unref(slot);
+
+ return bus_maybe_reply_error(m, r, &error_buffer);
+}
+
+static int process_closing(sd_bus *bus, sd_bus_message **ret) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ _cleanup_(sd_event_unrefp) sd_event *event = NULL;
+ struct reply_callback *c;
+ int r;
+
+ assert(bus);
+ assert(bus->state == BUS_CLOSING);
+
+ /* First, fail all outstanding method calls */
+ c = ordered_hashmap_first(bus->reply_callbacks);
+ if (c)
+ return process_closing_reply_callback(bus, c);
+
+ /* Then, fake-drop all remaining bus tracking references */
+ if (bus->tracks) {
+ bus_track_close(bus->tracks);
+ return 1;
+ }
+
+ /* Then, synthesize a Disconnected message */
+ r = sd_bus_message_new_signal(
+ bus,
+ &m,
+ "/org/freedesktop/DBus/Local",
+ "org.freedesktop.DBus.Local",
+ "Disconnected");
+ if (r < 0)
+ return r;
+
+ bus_message_set_sender_local(bus, m);
+ m->read_counter = ++bus->read_counter;
+
+ r = bus_seal_synthetic_message(bus, m);
+ if (r < 0)
+ return r;
+
+ /* sd_bus_close() will deref the event and set bus->event to NULL. But in bus_exit_now() we use
+ * bus->event to decide whether to return from the event loop or exit(), but given it's always NULL
+ * at that point, it always exit(). Ref it here and pass it through further down to avoid that. */
+ event = sd_event_ref(bus->event);
+ sd_bus_close(bus);
+
+ bus->current_message = m;
+ bus->iteration_counter++;
+
+ r = process_filter(bus, m);
+ if (r != 0)
+ goto finish;
+
+ r = process_match(bus, m);
+ if (r != 0)
+ goto finish;
+
+ /* Nothing else to do, exit now, if the condition holds */
+ bus->exit_triggered = true;
+ (void) bus_exit_now(bus, event);
+
+ if (ret)
+ *ret = TAKE_PTR(m);
+
+ r = 1;
+
+finish:
+ bus->current_message = NULL;
+
+ return r;
+}
+
+static int bus_process_internal(sd_bus *bus, sd_bus_message **ret) {
+ int r;
+
+ /* Returns 0 when we didn't do anything. This should cause the
+ * caller to invoke sd_bus_wait() before returning the next
+ * time. Returns > 0 when we did something, which possibly
+ * means *ret is filled in with an unprocessed message. */
+
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(!bus_origin_changed(bus), -ECHILD);
+
+ /* We don't allow recursively invoking sd_bus_process(). */
+ assert_return(!bus->current_message, -EBUSY);
+ assert(!bus->current_slot); /* This should be NULL whenever bus->current_message is */
+
+ BUS_DONT_DESTROY(bus);
+
+ switch (bus->state) {
+
+ case BUS_UNSET:
+ return -ENOTCONN;
+
+ case BUS_CLOSED:
+ return -ECONNRESET;
+
+ case BUS_WATCH_BIND:
+ r = bus_socket_process_watch_bind(bus);
+ break;
+
+ case BUS_OPENING:
+ r = bus_socket_process_opening(bus);
+ break;
+
+ case BUS_AUTHENTICATING:
+ r = bus_socket_process_authenticating(bus);
+ break;
+
+ case BUS_RUNNING:
+ case BUS_HELLO:
+ r = process_running(bus, ret);
+ if (r >= 0)
+ return r;
+
+ /* This branch initializes *ret, hence we don't use the generic error checking below */
+ break;
+
+ case BUS_CLOSING:
+ return process_closing(bus, ret);
+
+ default:
+ assert_not_reached();
+ }
+
+ if (ERRNO_IS_NEG_DISCONNECT(r)) {
+ bus_enter_closing(bus);
+ r = 1;
+ } else if (r < 0)
+ return r;
+
+ if (ret)
+ *ret = NULL;
+
+ return r;
+}
+
+_public_ int sd_bus_process(sd_bus *bus, sd_bus_message **ret) {
+ return bus_process_internal(bus, ret);
+}
+
+_public_ int sd_bus_process_priority(sd_bus *bus, int64_t priority, sd_bus_message **ret) {
+ return bus_process_internal(bus, ret);
+}
+
+static int bus_poll(sd_bus *bus, bool need_more, uint64_t timeout_usec) {
+ struct pollfd p[2] = {};
+ usec_t m = USEC_INFINITY;
+ int r, n;
+
+ assert(bus);
+
+ if (bus->state == BUS_CLOSING)
+ return 1;
+
+ if (!BUS_IS_OPEN(bus->state))
+ return -ENOTCONN;
+
+ if (bus->state == BUS_WATCH_BIND) {
+ assert(bus->inotify_fd >= 0);
+
+ p[0].events = POLLIN;
+ p[0].fd = bus->inotify_fd;
+ n = 1;
+ } else {
+ int e;
+
+ e = sd_bus_get_events(bus);
+ if (e < 0)
+ return e;
+
+ if (need_more)
+ /* The caller really needs some more data, they don't
+ * care about what's already read, or any timeouts
+ * except its own. */
+ e |= POLLIN;
+ else {
+ usec_t until;
+ /* The caller wants to process if there's something to
+ * process, but doesn't care otherwise */
+
+ r = sd_bus_get_timeout(bus, &until);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ m = usec_sub_unsigned(until, now(CLOCK_MONOTONIC));
+ }
+
+ p[0].fd = bus->input_fd;
+ if (bus->output_fd == bus->input_fd) {
+ p[0].events = e;
+ n = 1;
+ } else {
+ p[0].events = e & POLLIN;
+ p[1].fd = bus->output_fd;
+ p[1].events = e & POLLOUT;
+ n = 2;
+ }
+ }
+
+ if (timeout_usec != UINT64_MAX && (m == USEC_INFINITY || timeout_usec < m))
+ m = timeout_usec;
+
+ r = ppoll_usec(p, n, m);
+ if (r <= 0)
+ return r;
+
+ return 1;
+}
+
+_public_ int sd_bus_wait(sd_bus *bus, uint64_t timeout_usec) {
+ int r;
+
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(!bus_origin_changed(bus), -ECHILD);
+
+ if (bus->state == BUS_CLOSING)
+ return 0;
+
+ if (!BUS_IS_OPEN(bus->state))
+ return -ENOTCONN;
+
+ if (bus->rqueue_size > 0)
+ return 0;
+
+ r = bus_poll(bus, false, timeout_usec);
+ if (ERRNO_IS_NEG_TRANSIENT(r))
+ return 1; /* treat EINTR as success, but let's exit, so that the caller will call back into us soon. */
+
+ return r;
+}
+
+_public_ int sd_bus_flush(sd_bus *bus) {
+ int r;
+
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(!bus_origin_changed(bus), -ECHILD);
+
+ if (bus->state == BUS_CLOSING)
+ return 0;
+
+ if (!BUS_IS_OPEN(bus->state))
+ return -ENOTCONN;
+
+ /* We never were connected? Don't hang in inotify for good, as there's no timeout set for it */
+ if (bus->state == BUS_WATCH_BIND)
+ return -EUNATCH;
+
+ r = bus_ensure_running(bus);
+ if (r < 0)
+ return r;
+
+ if (bus->wqueue_size <= 0)
+ return 0;
+
+ for (;;) {
+ r = dispatch_wqueue(bus);
+ if (ERRNO_IS_NEG_DISCONNECT(r)) {
+ bus_enter_closing(bus);
+ return -ECONNRESET;
+ } else if (r < 0)
+ return r;
+
+ if (bus->wqueue_size <= 0)
+ return 0;
+
+ r = bus_poll(bus, false, UINT64_MAX);
+ if (ERRNO_IS_NEG_TRANSIENT(r))
+ continue;
+ if (r < 0)
+ return r;
+ }
+}
+
+_public_ int sd_bus_add_filter(
+ sd_bus *bus,
+ sd_bus_slot **slot,
+ sd_bus_message_handler_t callback,
+ void *userdata) {
+
+ sd_bus_slot *s;
+
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(callback, -EINVAL);
+ assert_return(!bus_origin_changed(bus), -ECHILD);
+
+ s = bus_slot_allocate(bus, !slot, BUS_FILTER_CALLBACK, sizeof(struct filter_callback), userdata);
+ if (!s)
+ return -ENOMEM;
+
+ s->filter_callback.callback = callback;
+
+ bus->filter_callbacks_modified = true;
+ LIST_PREPEND(callbacks, bus->filter_callbacks, &s->filter_callback);
+
+ if (slot)
+ *slot = s;
+
+ return 0;
+}
+
+static int add_match_callback(
+ sd_bus_message *m,
+ void *userdata,
+ sd_bus_error *ret_error) {
+
+ sd_bus_slot *match_slot = ASSERT_PTR(userdata);
+ bool failed = false;
+ int r;
+
+ assert(m);
+
+ sd_bus_slot_ref(match_slot);
+
+ if (sd_bus_message_is_method_error(m, NULL)) {
+ log_debug_errno(sd_bus_message_get_errno(m),
+ "Unable to add match %s, failing connection: %s",
+ match_slot->match_callback.match_string,
+ sd_bus_message_get_error(m)->message);
+
+ failed = true;
+ } else
+ log_debug("Match %s successfully installed.", match_slot->match_callback.match_string);
+
+ if (match_slot->match_callback.install_callback) {
+ sd_bus *bus;
+
+ bus = sd_bus_message_get_bus(m);
+
+ /* This function has been called as slot handler, and we want to call another slot handler. Let's
+ * update the slot callback metadata temporarily with our own data, and then revert back to the old
+ * values. */
+
+ assert(bus->current_slot == match_slot->match_callback.install_slot);
+ assert(bus->current_handler == add_match_callback);
+ assert(bus->current_userdata == userdata);
+
+ bus->current_slot = match_slot;
+ bus->current_handler = match_slot->match_callback.install_callback;
+ bus->current_userdata = match_slot->userdata;
+
+ r = match_slot->match_callback.install_callback(m, match_slot->userdata, ret_error);
+
+ bus->current_slot = match_slot->match_callback.install_slot;
+ bus->current_handler = add_match_callback;
+ bus->current_userdata = userdata;
+ } else {
+ if (failed) /* Generic failure handling: destroy the connection */
+ bus_enter_closing(sd_bus_message_get_bus(m));
+
+ r = 1;
+ }
+
+ /* We don't need the install method reply slot anymore, let's free it */
+ match_slot->match_callback.install_slot = sd_bus_slot_unref(match_slot->match_callback.install_slot);
+
+ if (failed && match_slot->floating)
+ bus_slot_disconnect(match_slot, true);
+
+ sd_bus_slot_unref(match_slot);
+
+ return r;
+}
+
+int bus_add_match_full(
+ sd_bus *bus,
+ sd_bus_slot **slot,
+ bool asynchronous,
+ const char *match,
+ sd_bus_message_handler_t callback,
+ sd_bus_message_handler_t install_callback,
+ void *userdata,
+ uint64_t timeout_usec) {
+
+ struct bus_match_component *components = NULL;
+ size_t n_components = 0;
+ _cleanup_(sd_bus_slot_unrefp) sd_bus_slot *s = NULL;
+ int r;
+
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(match, -EINVAL);
+ assert_return(!bus_origin_changed(bus), -ECHILD);
+
+ CLEANUP_ARRAY(components, n_components, bus_match_parse_free);
+
+ r = bus_match_parse(match, &components, &n_components);
+ if (r < 0)
+ return r;
+
+ s = bus_slot_allocate(bus, !slot, BUS_MATCH_CALLBACK, sizeof(struct match_callback), userdata);
+ if (!s)
+ return -ENOMEM;
+
+ s->match_callback.callback = callback;
+ s->match_callback.install_callback = install_callback;
+
+ if (bus->bus_client) {
+ enum bus_match_scope scope;
+
+ scope = bus_match_get_scope(components, n_components);
+
+ /* Do not install server-side matches for matches against the local service, interface or bus path. */
+ if (scope != BUS_MATCH_LOCAL) {
+
+ /* We store the original match string, so that we can use it to remove the match again. */
+
+ s->match_callback.match_string = strdup(match);
+ if (!s->match_callback.match_string)
+ return -ENOMEM;
+
+ if (asynchronous) {
+ r = bus_add_match_internal_async(bus,
+ &s->match_callback.install_slot,
+ s->match_callback.match_string,
+ add_match_callback,
+ s,
+ timeout_usec);
+
+ if (r < 0)
+ return r;
+
+ /* Make the slot of the match call floating now. We need the reference, but we don't
+ * want that this match pins the bus object, hence we first create it non-floating, but
+ * then make it floating. */
+ r = sd_bus_slot_set_floating(s->match_callback.install_slot, true);
+ } else
+ r = bus_add_match_internal(bus,
+ s->match_callback.match_string,
+ timeout_usec,
+ &s->match_callback.after);
+ if (r < 0)
+ return r;
+
+ s->match_added = true;
+ }
+ }
+
+ bus->match_callbacks_modified = true;
+ r = bus_match_add(&bus->match_callbacks, components, n_components, &s->match_callback);
+ if (r < 0)
+ return r;
+
+ if (slot)
+ *slot = s;
+ s = NULL;
+
+ return 0;
+}
+
+_public_ int sd_bus_add_match(
+ sd_bus *bus,
+ sd_bus_slot **slot,
+ const char *match,
+ sd_bus_message_handler_t callback,
+ void *userdata) {
+
+ return bus_add_match_full(bus, slot, false, match, callback, NULL, userdata, 0);
+}
+
+_public_ int sd_bus_add_match_async(
+ sd_bus *bus,
+ sd_bus_slot **slot,
+ const char *match,
+ sd_bus_message_handler_t callback,
+ sd_bus_message_handler_t install_callback,
+ void *userdata) {
+
+ return bus_add_match_full(bus, slot, true, match, callback, install_callback, userdata, 0);
+}
+
+static int io_callback(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
+ sd_bus *bus = ASSERT_PTR(userdata);
+ int r;
+
+ /* Note that this is called both on input_fd, output_fd as well as inotify_fd events */
+
+ r = sd_bus_process(bus, NULL);
+ if (r < 0) {
+ log_debug_errno(r, "Processing of bus failed, closing down: %m");
+ bus_enter_closing(bus);
+ }
+
+ return 1;
+}
+
+static int time_callback(sd_event_source *s, uint64_t usec, void *userdata) {
+ sd_bus *bus = ASSERT_PTR(userdata);
+ int r;
+
+ r = sd_bus_process(bus, NULL);
+ if (r < 0) {
+ log_debug_errno(r, "Processing of bus failed, closing down: %m");
+ bus_enter_closing(bus);
+ }
+
+ return 1;
+}
+
+static int prepare_callback(sd_event_source *s, void *userdata) {
+ sd_bus *bus = ASSERT_PTR(userdata);
+ int r, e;
+ usec_t until;
+
+ assert(s);
+
+ e = sd_bus_get_events(bus);
+ if (e < 0) {
+ r = e;
+ goto fail;
+ }
+
+ if (bus->output_fd != bus->input_fd) {
+
+ r = sd_event_source_set_io_events(bus->input_io_event_source, e & POLLIN);
+ if (r < 0)
+ goto fail;
+
+ r = sd_event_source_set_io_events(bus->output_io_event_source, e & POLLOUT);
+ } else
+ r = sd_event_source_set_io_events(bus->input_io_event_source, e);
+ if (r < 0)
+ goto fail;
+
+ r = sd_bus_get_timeout(bus, &until);
+ if (r < 0)
+ goto fail;
+ if (r > 0) {
+ int j;
+
+ j = sd_event_source_set_time(bus->time_event_source, until);
+ if (j < 0) {
+ r = j;
+ goto fail;
+ }
+ }
+
+ r = sd_event_source_set_enabled(bus->time_event_source, r > 0 ? SD_EVENT_ONESHOT : SD_EVENT_OFF);
+ if (r < 0)
+ goto fail;
+
+ return 1;
+
+fail:
+ log_debug_errno(r, "Preparing of bus events failed, closing down: %m");
+ bus_enter_closing(bus);
+
+ return 1;
+}
+
+static int quit_callback(sd_event_source *event, void *userdata) {
+ sd_bus *bus = userdata;
+
+ assert(event);
+
+ if (bus->close_on_exit) {
+ sd_bus_flush(bus);
+ sd_bus_close(bus);
+ }
+
+ return 1;
+}
+
+int bus_attach_io_events(sd_bus *bus) {
+ int r;
+
+ assert(bus);
+
+ if (bus->input_fd < 0)
+ return 0;
+
+ if (!bus->event)
+ return 0;
+
+ if (!bus->input_io_event_source) {
+ r = sd_event_add_io(bus->event, &bus->input_io_event_source, bus->input_fd, 0, io_callback, bus);
+ if (r < 0)
+ return r;
+
+ r = sd_event_source_set_prepare(bus->input_io_event_source, prepare_callback);
+ if (r < 0)
+ return r;
+
+ r = sd_event_source_set_priority(bus->input_io_event_source, bus->event_priority);
+ if (r < 0)
+ return r;
+
+ r = sd_event_source_set_description(bus->input_io_event_source, "bus-input");
+ } else
+ r = sd_event_source_set_io_fd(bus->input_io_event_source, bus->input_fd);
+
+ if (r < 0)
+ return r;
+
+ if (bus->output_fd != bus->input_fd) {
+ assert(bus->output_fd >= 0);
+
+ if (!bus->output_io_event_source) {
+ r = sd_event_add_io(bus->event, &bus->output_io_event_source, bus->output_fd, 0, io_callback, bus);
+ if (r < 0)
+ return r;
+
+ r = sd_event_source_set_priority(bus->output_io_event_source, bus->event_priority);
+ if (r < 0)
+ return r;
+
+ r = sd_event_source_set_description(bus->input_io_event_source, "bus-output");
+ } else
+ r = sd_event_source_set_io_fd(bus->output_io_event_source, bus->output_fd);
+
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+static void bus_detach_io_events(sd_bus *bus) {
+ assert(bus);
+
+ bus->input_io_event_source = sd_event_source_disable_unref(bus->input_io_event_source);
+ bus->output_io_event_source = sd_event_source_disable_unref(bus->output_io_event_source);
+}
+
+int bus_attach_inotify_event(sd_bus *bus) {
+ int r;
+
+ assert(bus);
+
+ if (bus->inotify_fd < 0)
+ return 0;
+
+ if (!bus->event)
+ return 0;
+
+ if (!bus->inotify_event_source) {
+ r = sd_event_add_io(bus->event, &bus->inotify_event_source, bus->inotify_fd, EPOLLIN, io_callback, bus);
+ if (r < 0)
+ return r;
+
+ r = sd_event_source_set_priority(bus->inotify_event_source, bus->event_priority);
+ if (r < 0)
+ return r;
+
+ r = sd_event_source_set_description(bus->inotify_event_source, "bus-inotify");
+ } else
+ r = sd_event_source_set_io_fd(bus->inotify_event_source, bus->inotify_fd);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+_public_ int sd_bus_attach_event(sd_bus *bus, sd_event *event, int priority) {
+ int r;
+
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(!bus->event, -EBUSY);
+
+ assert(!bus->input_io_event_source);
+ assert(!bus->output_io_event_source);
+ assert(!bus->time_event_source);
+
+ if (event)
+ bus->event = sd_event_ref(event);
+ else {
+ r = sd_event_default(&bus->event);
+ if (r < 0)
+ return r;
+ }
+
+ bus->event_priority = priority;
+
+ r = sd_event_add_time(bus->event, &bus->time_event_source, CLOCK_MONOTONIC, 0, 0, time_callback, bus);
+ if (r < 0)
+ goto fail;
+
+ r = sd_event_source_set_priority(bus->time_event_source, priority);
+ if (r < 0)
+ goto fail;
+
+ r = sd_event_source_set_description(bus->time_event_source, "bus-time");
+ if (r < 0)
+ goto fail;
+
+ r = sd_event_add_exit(bus->event, &bus->quit_event_source, quit_callback, bus);
+ if (r < 0)
+ goto fail;
+
+ r = sd_event_source_set_description(bus->quit_event_source, "bus-exit");
+ if (r < 0)
+ goto fail;
+
+ r = bus_attach_io_events(bus);
+ if (r < 0)
+ goto fail;
+
+ r = bus_attach_inotify_event(bus);
+ if (r < 0)
+ goto fail;
+
+ return 0;
+
+fail:
+ sd_bus_detach_event(bus);
+ return r;
+}
+
+_public_ int sd_bus_detach_event(sd_bus *bus) {
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+
+ if (!bus->event)
+ return 0;
+
+ bus_detach_io_events(bus);
+ bus->inotify_event_source = sd_event_source_disable_unref(bus->inotify_event_source);
+ bus->time_event_source = sd_event_source_disable_unref(bus->time_event_source);
+ bus->quit_event_source = sd_event_source_disable_unref(bus->quit_event_source);
+
+ bus->event = sd_event_unref(bus->event);
+ return 1;
+}
+
+_public_ sd_event* sd_bus_get_event(sd_bus *bus) {
+ assert_return(bus = bus_resolve(bus), NULL);
+
+ return bus->event;
+}
+
+_public_ sd_bus_message* sd_bus_get_current_message(sd_bus *bus) {
+ assert_return(bus = bus_resolve(bus), NULL);
+
+ return bus->current_message;
+}
+
+_public_ sd_bus_slot* sd_bus_get_current_slot(sd_bus *bus) {
+ assert_return(bus = bus_resolve(bus), NULL);
+
+ return bus->current_slot;
+}
+
+_public_ sd_bus_message_handler_t sd_bus_get_current_handler(sd_bus *bus) {
+ assert_return(bus = bus_resolve(bus), NULL);
+
+ return bus->current_handler;
+}
+
+_public_ void* sd_bus_get_current_userdata(sd_bus *bus) {
+ assert_return(bus = bus_resolve(bus), NULL);
+
+ return bus->current_userdata;
+}
+
+static int bus_default(int (*bus_open)(sd_bus **), sd_bus **default_bus, sd_bus **ret) {
+ sd_bus *b = NULL;
+ int r;
+
+ assert(bus_open);
+ assert(default_bus);
+
+ if (!ret)
+ return !!*default_bus;
+
+ if (*default_bus) {
+ *ret = sd_bus_ref(*default_bus);
+ return 0;
+ }
+
+ r = bus_open(&b);
+ if (r < 0)
+ return r;
+
+ b->default_bus_ptr = default_bus;
+ b->tid = gettid();
+ *default_bus = b;
+
+ *ret = b;
+ return 1;
+}
+
+_public_ int sd_bus_default_system(sd_bus **ret) {
+ return bus_default(sd_bus_open_system, &default_system_bus, ret);
+}
+
+_public_ int sd_bus_default_user(sd_bus **ret) {
+ return bus_default(sd_bus_open_user, &default_user_bus, ret);
+}
+
+_public_ int sd_bus_default(sd_bus **ret) {
+ int (*bus_open)(sd_bus **) = NULL;
+ sd_bus **busp;
+
+ busp = bus_choose_default(&bus_open);
+ return bus_default(bus_open, busp, ret);
+}
+
+_public_ int sd_bus_get_tid(sd_bus *b, pid_t *tid) {
+ assert_return(b, -EINVAL);
+ assert_return(tid, -EINVAL);
+ assert_return(!bus_origin_changed(b), -ECHILD);
+
+ if (b->tid != 0) {
+ *tid = b->tid;
+ return 0;
+ }
+
+ if (b->event)
+ return sd_event_get_tid(b->event, tid);
+
+ return -ENXIO;
+}
+
+_public_ int sd_bus_path_encode(const char *prefix, const char *external_id, char **ret_path) {
+ _cleanup_free_ char *e = NULL;
+ char *ret;
+
+ assert_return(object_path_is_valid(prefix), -EINVAL);
+ assert_return(external_id, -EINVAL);
+ assert_return(ret_path, -EINVAL);
+
+ e = bus_label_escape(external_id);
+ if (!e)
+ return -ENOMEM;
+
+ ret = path_join(prefix, e);
+ if (!ret)
+ return -ENOMEM;
+
+ *ret_path = ret;
+ return 0;
+}
+
+_public_ int sd_bus_path_decode(const char *path, const char *prefix, char **external_id) {
+ const char *e;
+ char *ret;
+
+ assert_return(object_path_is_valid(path), -EINVAL);
+ assert_return(object_path_is_valid(prefix), -EINVAL);
+ assert_return(external_id, -EINVAL);
+
+ e = object_path_startswith(path, prefix);
+ if (!e) {
+ *external_id = NULL;
+ return 0;
+ }
+
+ /* Note that 'e' might be an empty string here. That's expected. E.g. a case where the subtree
+ * corresponds to a subtree on a disk, and we want to return something that represents the root
+ * of the filesystem. */
+
+ ret = bus_label_unescape(e);
+ if (!ret)
+ return -ENOMEM;
+
+ *external_id = ret;
+ return 1;
+}
+
+_public_ int sd_bus_path_encode_many(char **out, const char *path_template, ...) {
+ _cleanup_strv_free_ char **labels = NULL;
+ char *path, *path_pos, **label_pos;
+ const char *sep, *template_pos;
+ size_t path_length;
+ va_list list;
+ int r;
+
+ assert_return(out, -EINVAL);
+ assert_return(path_template, -EINVAL);
+
+ path_length = strlen(path_template);
+
+ va_start(list, path_template);
+ for (sep = strchr(path_template, '%'); sep; sep = strchr(sep + 1, '%')) {
+ const char *arg;
+ char *label;
+
+ arg = va_arg(list, const char *);
+ if (!arg) {
+ va_end(list);
+ return -EINVAL;
+ }
+
+ label = bus_label_escape(arg);
+ if (!label) {
+ va_end(list);
+ return -ENOMEM;
+ }
+
+ r = strv_consume(&labels, label);
+ if (r < 0) {
+ va_end(list);
+ return r;
+ }
+
+ /* add label length, but account for the format character */
+ path_length += strlen(label) - 1;
+ }
+ va_end(list);
+
+ path = malloc(path_length + 1);
+ if (!path)
+ return -ENOMEM;
+
+ path_pos = path;
+ label_pos = labels;
+
+ for (template_pos = path_template; *template_pos; ) {
+ sep = strchrnul(template_pos, '%');
+ path_pos = mempcpy(path_pos, template_pos, sep - template_pos);
+ if (!*sep)
+ break;
+
+ path_pos = stpcpy(path_pos, *label_pos++);
+ template_pos = sep + 1;
+ }
+
+ *path_pos = 0;
+ *out = path;
+ return 0;
+}
+
+_public_ int sd_bus_path_decode_many(const char *path, const char *path_template, ...) {
+ _cleanup_strv_free_ char **labels = NULL;
+ const char *template_pos, *path_pos;
+ char **label_pos;
+ va_list list;
+ int r;
+
+ /*
+ * This decodes an object-path based on a template argument. The
+ * template consists of a verbatim path, optionally including special
+ * directives:
+ *
+ * - Each occurrence of '%' in the template matches an arbitrary
+ * substring of a label in the given path. At most one such
+ * directive is allowed per label. For each such directive, the
+ * caller must provide an output parameter (char **) via va_arg. If
+ * NULL is passed, the given label is verified, but not returned.
+ * For each matched label, the *decoded* label is stored in the
+ * passed output argument, and the caller is responsible to free
+ * it. Note that the output arguments are only modified if the
+ * actually path matched the template. Otherwise, they're left
+ * untouched.
+ *
+ * This function returns <0 on error, 0 if the path does not match the
+ * template, 1 if it matched.
+ */
+
+ assert_return(path, -EINVAL);
+ assert_return(path_template, -EINVAL);
+
+ path_pos = path;
+
+ for (template_pos = path_template; *template_pos; ) {
+ const char *sep;
+ size_t length;
+ char *label;
+
+ /* verify everything until the next '%' matches verbatim */
+ sep = strchrnul(template_pos, '%');
+ length = sep - template_pos;
+ if (strncmp(path_pos, template_pos, length))
+ return 0;
+
+ path_pos += length;
+ template_pos += length;
+
+ if (!*template_pos)
+ break;
+
+ /* We found the next '%' character. Everything up until here
+ * matched. We now skip ahead to the end of this label and make
+ * sure it matches the tail of the label in the path. Then we
+ * decode the string in-between and save it for later use. */
+
+ ++template_pos; /* skip over '%' */
+
+ sep = strchrnul(template_pos, '/');
+ length = sep - template_pos; /* length of suffix to match verbatim */
+
+ /* verify the suffixes match */
+ sep = strchrnul(path_pos, '/');
+ if (sep - path_pos < (ssize_t)length ||
+ strncmp(sep - length, template_pos, length))
+ return 0;
+
+ template_pos += length; /* skip over matched label */
+ length = sep - path_pos - length; /* length of sub-label to decode */
+
+ /* store unescaped label for later use */
+ label = bus_label_unescape_n(path_pos, length);
+ if (!label)
+ return -ENOMEM;
+
+ r = strv_consume(&labels, label);
+ if (r < 0)
+ return r;
+
+ path_pos = sep; /* skip decoded label and suffix */
+ }
+
+ /* end of template must match end of path */
+ if (*path_pos)
+ return 0;
+
+ /* copy the labels over to the caller */
+ va_start(list, path_template);
+ for (label_pos = labels; label_pos && *label_pos; ++label_pos) {
+ char **arg;
+
+ arg = va_arg(list, char **);
+ if (arg)
+ *arg = *label_pos;
+ else
+ free(*label_pos);
+ }
+ va_end(list);
+
+ labels = mfree(labels);
+ return 1;
+}
+
+_public_ int sd_bus_try_close(sd_bus *bus) {
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(!bus_origin_changed(bus), -ECHILD);
+
+ return -EOPNOTSUPP;
+}
+
+_public_ int sd_bus_get_description(sd_bus *bus, const char **description) {
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(description, -EINVAL);
+
+ const char *d = bus->description;
+ if (!d)
+ d = runtime_scope_to_string(bus->runtime_scope);
+ if (!d)
+ return -ENXIO;
+
+ *description = d;
+ return 0;
+}
+
+_public_ int sd_bus_get_scope(sd_bus *bus, const char **scope) {
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(scope, -EINVAL);
+ assert_return(!bus_origin_changed(bus), -ECHILD);
+
+ if (bus->runtime_scope < 0)
+ return -ENODATA;
+
+ *scope = runtime_scope_to_string(bus->runtime_scope);
+ return 0;
+}
+
+_public_ int sd_bus_get_address(sd_bus *bus, const char **address) {
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(address, -EINVAL);
+ assert_return(!bus_origin_changed(bus), -ECHILD);
+
+ if (bus->address) {
+ *address = bus->address;
+ return 0;
+ }
+
+ return -ENODATA;
+}
+
+_public_ int sd_bus_get_creds_mask(sd_bus *bus, uint64_t *mask) {
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(mask, -EINVAL);
+ assert_return(!bus_origin_changed(bus), -ECHILD);
+
+ *mask = bus->creds_mask;
+ return 0;
+}
+
+_public_ int sd_bus_is_bus_client(sd_bus *bus) {
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(!bus_origin_changed(bus), -ECHILD);
+
+ return bus->bus_client;
+}
+
+_public_ int sd_bus_is_server(sd_bus *bus) {
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(!bus_origin_changed(bus), -ECHILD);
+
+ return bus->is_server;
+}
+
+_public_ int sd_bus_is_anonymous(sd_bus *bus) {
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(!bus_origin_changed(bus), -ECHILD);
+
+ return bus->anonymous_auth;
+}
+
+_public_ int sd_bus_is_trusted(sd_bus *bus) {
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(!bus_origin_changed(bus), -ECHILD);
+
+ return bus->trusted;
+}
+
+_public_ int sd_bus_is_monitor(sd_bus *bus) {
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(!bus_origin_changed(bus), -ECHILD);
+
+ return bus->is_monitor;
+}
+
+static void flush_close(sd_bus *bus) {
+ if (!bus)
+ return;
+
+ /* Flushes and closes the specified bus. We take a ref before,
+ * to ensure the flushing does not cause the bus to be
+ * unreferenced. */
+
+ sd_bus_flush_close_unref(sd_bus_ref(bus));
+}
+
+_public_ void sd_bus_default_flush_close(void) {
+ flush_close(default_starter_bus);
+ flush_close(default_user_bus);
+ flush_close(default_system_bus);
+}
+
+_public_ int sd_bus_set_exit_on_disconnect(sd_bus *bus, int b) {
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+
+ /* Turns on exit-on-disconnect, and triggers it immediately if the bus connection was already
+ * disconnected. Note that this is triggered exclusively on disconnections triggered by the server side, never
+ * from the client side. */
+ bus->exit_on_disconnect = b;
+
+ /* If the exit condition was triggered already, exit immediately. */
+ return bus_exit_now(bus, /* event= */ NULL);
+}
+
+_public_ int sd_bus_get_exit_on_disconnect(sd_bus *bus) {
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+
+ return bus->exit_on_disconnect;
+}
+
+_public_ int sd_bus_set_sender(sd_bus *bus, const char *sender) {
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(!bus->bus_client, -EPERM);
+ assert_return(!sender || service_name_is_valid(sender), -EINVAL);
+
+ return free_and_strdup(&bus->patch_sender, sender);
+}
+
+_public_ int sd_bus_get_sender(sd_bus *bus, const char **ret) {
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(ret, -EINVAL);
+
+ if (!bus->patch_sender)
+ return -ENODATA;
+
+ *ret = bus->patch_sender;
+ return 0;
+}
+
+_public_ int sd_bus_get_n_queued_read(sd_bus *bus, uint64_t *ret) {
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(!bus_origin_changed(bus), -ECHILD);
+ assert_return(ret, -EINVAL);
+
+ *ret = bus->rqueue_size;
+ return 0;
+}
+
+_public_ int sd_bus_get_n_queued_write(sd_bus *bus, uint64_t *ret) {
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(!bus_origin_changed(bus), -ECHILD);
+ assert_return(ret, -EINVAL);
+
+ *ret = bus->wqueue_size;
+ return 0;
+}
+
+_public_ int sd_bus_set_method_call_timeout(sd_bus *bus, uint64_t usec) {
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+
+ bus->method_call_timeout = usec;
+ return 0;
+}
+
+_public_ int sd_bus_get_method_call_timeout(sd_bus *bus, uint64_t *ret) {
+ const char *e;
+ usec_t usec;
+
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(ret, -EINVAL);
+
+ if (bus->method_call_timeout != 0) {
+ *ret = bus->method_call_timeout;
+ return 0;
+ }
+
+ e = secure_getenv("SYSTEMD_BUS_TIMEOUT");
+ if (e && parse_sec(e, &usec) >= 0 && usec != 0) {
+ /* Save the parsed value to avoid multiple parsing. To change the timeout value,
+ * use sd_bus_set_method_call_timeout() instead of setenv(). */
+ *ret = bus->method_call_timeout = usec;
+ return 0;
+ }
+
+ *ret = bus->method_call_timeout = BUS_DEFAULT_TIMEOUT;
+ return 0;
+}
+
+_public_ int sd_bus_set_close_on_exit(sd_bus *bus, int b) {
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+
+ bus->close_on_exit = b;
+ return 0;
+}
+
+_public_ int sd_bus_get_close_on_exit(sd_bus *bus) {
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+
+ return bus->close_on_exit;
+}
+
+_public_ int sd_bus_enqueue_for_read(sd_bus *bus, sd_bus_message *m) {
+ int r;
+
+ assert_return(bus, -EINVAL);
+ assert_return(bus = bus_resolve(bus), -ENOPKG);
+ assert_return(m, -EINVAL);
+ assert_return(m->sealed, -EINVAL);
+ assert_return(!bus_origin_changed(bus), -ECHILD);
+
+ if (!BUS_IS_OPEN(bus->state))
+ return -ENOTCONN;
+
+ /* Re-enqueue a message for reading. This is primarily useful for PolicyKit-style authentication,
+ * where we accept a message, then determine we need to interactively authenticate the user, and then
+ * we want to process the message again. */
+
+ r = bus_rqueue_make_room(bus);
+ if (r < 0)
+ return r;
+
+ bus->rqueue[bus->rqueue_size++] = bus_message_ref_queued(m, bus);
+ return 0;
+}
diff --git a/src/libsystemd/sd-bus/test-bus-address.c b/src/libsystemd/sd-bus/test-bus-address.c
new file mode 100644
index 0000000..347ba1a
--- /dev/null
+++ b/src/libsystemd/sd-bus/test-bus-address.c
@@ -0,0 +1,61 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "sd-bus.h"
+
+#include "bus-internal.h"
+#include "log.h"
+#include "string-util.h"
+#include "strv.h"
+#include "tests.h"
+
+static void test_one_address(sd_bus *b,
+ const char *host,
+ int result, const char *expected) {
+ int r;
+
+ r = bus_set_address_system_remote(b, host);
+ log_info("\"%s\" → %d, \"%s\"", host, r, strna(r >= 0 ? b->address : NULL));
+ assert_se(r == result);
+ if (r >= 0)
+ assert_se(streq_ptr(b->address, expected));
+}
+
+TEST(bus_set_address_system_remote) {
+ _cleanup_(sd_bus_unrefp) sd_bus *b = NULL;
+
+ assert_se(sd_bus_new(&b) >= 0);
+ if (!strv_isempty(saved_argv + 1)) {
+ STRV_FOREACH(a, saved_argv + 1)
+ test_one_address(b, *a, 0, NULL);
+ return;
+ };
+
+ test_one_address(b, "host",
+ 0, "unixexec:path=ssh,argv1=-xT,argv2=--,argv3=host,argv4=systemd-stdio-bridge");
+ test_one_address(b, "host:123",
+ 0, "unixexec:path=ssh,argv1=-xT,argv2=-p,argv3=123,argv4=--,argv5=host,argv6=systemd-stdio-bridge");
+ test_one_address(b, "host:123:123",
+ -EINVAL, NULL);
+ test_one_address(b, "host:",
+ -EINVAL, NULL);
+ test_one_address(b, "user@host",
+ 0, "unixexec:path=ssh,argv1=-xT,argv2=--,argv3=user%40host,argv4=systemd-stdio-bridge");
+ test_one_address(b, "user@host@host",
+ -EINVAL, NULL);
+ test_one_address(b, "[::1]",
+ 0, "unixexec:path=ssh,argv1=-xT,argv2=--,argv3=%3a%3a1,argv4=systemd-stdio-bridge");
+ test_one_address(b, "user@[::1]",
+ 0, "unixexec:path=ssh,argv1=-xT,argv2=--,argv3=user%40%3a%3a1,argv4=systemd-stdio-bridge");
+ test_one_address(b, "user@[::1]:99",
+ 0, "unixexec:path=ssh,argv1=-xT,argv2=-p,argv3=99,argv4=--,argv5=user%40%3a%3a1,argv6=systemd-stdio-bridge");
+ test_one_address(b, "user@[::1]:",
+ -EINVAL, NULL);
+ test_one_address(b, "user@[::1:",
+ -EINVAL, NULL);
+ test_one_address(b, "user@",
+ -EINVAL, NULL);
+ test_one_address(b, "user@@",
+ -EINVAL, NULL);
+}
+
+DEFINE_TEST_MAIN(LOG_INFO);
diff --git a/src/libsystemd/sd-bus/test-bus-benchmark.c b/src/libsystemd/sd-bus/test-bus-benchmark.c
new file mode 100644
index 0000000..d988588
--- /dev/null
+++ b/src/libsystemd/sd-bus/test-bus-benchmark.c
@@ -0,0 +1,326 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "sd-bus.h"
+
+#include "alloc-util.h"
+#include "bus-internal.h"
+#include "bus-kernel.h"
+#include "constants.h"
+#include "fd-util.h"
+#include "missing_resource.h"
+#include "string-util.h"
+#include "tests.h"
+#include "time-util.h"
+
+#define MAX_SIZE (2*1024*1024)
+
+static usec_t arg_loop_usec = 100 * USEC_PER_MSEC;
+
+typedef enum Type {
+ TYPE_LEGACY,
+ TYPE_DIRECT,
+} Type;
+
+static void server(sd_bus *b, size_t *result) {
+ int r;
+
+ for (;;) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+
+ r = sd_bus_process(b, &m);
+ assert_se(r >= 0);
+
+ if (r == 0)
+ assert_se(sd_bus_wait(b, USEC_INFINITY) >= 0);
+ if (!m)
+ continue;
+
+ if (sd_bus_message_is_method_call(m, "benchmark.server", "Ping"))
+ assert_se(sd_bus_reply_method_return(m, NULL) >= 0);
+ else if (sd_bus_message_is_method_call(m, "benchmark.server", "Work")) {
+ const void *p;
+ size_t sz;
+
+ /* Make sure the mmap is mapped */
+ assert_se(sd_bus_message_read_array(m, 'y', &p, &sz) > 0);
+
+ r = sd_bus_reply_method_return(m, NULL);
+ assert_se(r >= 0);
+ } else if (sd_bus_message_is_method_call(m, "benchmark.server", "Exit")) {
+ uint64_t res;
+ assert_se(sd_bus_message_read(m, "t", &res) > 0);
+
+ *result = res;
+ return;
+
+ } else if (!sd_bus_message_is_signal(m, NULL, NULL))
+ assert_not_reached();
+ }
+}
+
+static void transaction(sd_bus *b, size_t sz, const char *server_name) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL, *reply = NULL;
+ uint8_t *p;
+
+ assert_se(sd_bus_message_new_method_call(b, &m, server_name, "/", "benchmark.server", "Work") >= 0);
+ assert_se(sd_bus_message_append_array_space(m, 'y', sz, (void**) &p) >= 0);
+
+ memset(p, 0x80, sz);
+
+ assert_se(sd_bus_call(b, m, 0, NULL, &reply) >= 0);
+}
+
+static void client_bisect(const char *address, const char *server_name) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *x = NULL;
+ size_t lsize, rsize, csize;
+ sd_bus *b;
+ int r;
+
+ r = sd_bus_new(&b);
+ assert_se(r >= 0);
+
+ r = sd_bus_set_address(b, address);
+ assert_se(r >= 0);
+
+ r = sd_bus_start(b);
+ assert_se(r >= 0);
+
+ r = sd_bus_call_method(b, server_name, "/", "benchmark.server", "Ping", NULL, NULL, NULL);
+ assert_se(r >= 0);
+
+ lsize = 1;
+ rsize = MAX_SIZE;
+
+ printf("SIZE\tCOPY\tMEMFD\n");
+
+ for (;;) {
+ usec_t t;
+ unsigned n_copying, n_memfd;
+
+ csize = (lsize + rsize) / 2;
+
+ if (csize <= lsize)
+ break;
+
+ if (csize <= 0)
+ break;
+
+ printf("%zu\t", csize);
+
+ b->use_memfd = 0;
+
+ t = now(CLOCK_MONOTONIC);
+ for (n_copying = 0;; n_copying++) {
+ transaction(b, csize, server_name);
+ if (now(CLOCK_MONOTONIC) >= t + arg_loop_usec)
+ break;
+ }
+ printf("%u\t", (unsigned) ((n_copying * USEC_PER_SEC) / arg_loop_usec));
+
+ b->use_memfd = -1;
+
+ t = now(CLOCK_MONOTONIC);
+ for (n_memfd = 0;; n_memfd++) {
+ transaction(b, csize, server_name);
+ if (now(CLOCK_MONOTONIC) >= t + arg_loop_usec)
+ break;
+ }
+ printf("%u\n", (unsigned) ((n_memfd * USEC_PER_SEC) / arg_loop_usec));
+
+ if (n_copying == n_memfd)
+ break;
+
+ if (n_copying > n_memfd)
+ lsize = csize;
+ else
+ rsize = csize;
+ }
+
+ b->use_memfd = 1;
+ assert_se(sd_bus_message_new_method_call(b, &x, server_name, "/", "benchmark.server", "Exit") >= 0);
+ assert_se(sd_bus_message_append(x, "t", csize) >= 0);
+ assert_se(sd_bus_send(b, x, NULL) >= 0);
+
+ sd_bus_unref(b);
+}
+
+static void client_chart(Type type, const char *address, const char *server_name, int fd) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *x = NULL;
+ size_t csize;
+ sd_bus *b;
+ int r;
+
+ r = sd_bus_new(&b);
+ assert_se(r >= 0);
+
+ if (type == TYPE_DIRECT) {
+ r = sd_bus_set_fd(b, fd, fd);
+ assert_se(r >= 0);
+ } else {
+ r = sd_bus_set_address(b, address);
+ assert_se(r >= 0);
+
+ r = sd_bus_set_bus_client(b, true);
+ assert_se(r >= 0);
+ }
+
+ r = sd_bus_start(b);
+ assert_se(r >= 0);
+
+ r = sd_bus_call_method(b, server_name, "/", "benchmark.server", "Ping", NULL, NULL, NULL);
+ assert_se(r >= 0);
+
+ switch (type) {
+ case TYPE_LEGACY:
+ printf("SIZE\tLEGACY\n");
+ break;
+ case TYPE_DIRECT:
+ printf("SIZE\tDIRECT\n");
+ break;
+ }
+
+ for (csize = 1; csize <= MAX_SIZE; csize *= 2) {
+ usec_t t;
+ unsigned n_memfd;
+
+ printf("%zu\t", csize);
+
+ t = now(CLOCK_MONOTONIC);
+ for (n_memfd = 0;; n_memfd++) {
+ transaction(b, csize, server_name);
+ if (now(CLOCK_MONOTONIC) >= t + arg_loop_usec)
+ break;
+ }
+
+ printf("%u\n", (unsigned) ((n_memfd * USEC_PER_SEC) / arg_loop_usec));
+ }
+
+ b->use_memfd = 1;
+ assert_se(sd_bus_message_new_method_call(b, &x, server_name, "/", "benchmark.server", "Exit") >= 0);
+ assert_se(sd_bus_message_append(x, "t", csize) >= 0);
+ assert_se(sd_bus_send(b, x, NULL) >= 0);
+
+ sd_bus_unref(b);
+}
+
+int main(int argc, char *argv[]) {
+ enum {
+ MODE_BISECT,
+ MODE_CHART,
+ } mode = MODE_BISECT;
+ Type type = TYPE_LEGACY;
+ int i, pair[2] = EBADF_PAIR;
+ _cleanup_free_ char *address = NULL, *server_name = NULL;
+ _cleanup_close_ int bus_ref = -EBADF;
+ const char *unique;
+ cpu_set_t cpuset;
+ size_t result;
+ sd_bus *b;
+ pid_t pid;
+ int r;
+
+ test_setup_logging(LOG_DEBUG);
+
+ for (i = 1; i < argc; i++) {
+ if (streq(argv[i], "chart")) {
+ mode = MODE_CHART;
+ continue;
+ } else if (streq(argv[i], "legacy")) {
+ type = TYPE_LEGACY;
+ continue;
+ } else if (streq(argv[i], "direct")) {
+ type = TYPE_DIRECT;
+ continue;
+ }
+
+ assert_se(parse_sec(argv[i], &arg_loop_usec) >= 0);
+ }
+
+ assert_se(arg_loop_usec > 0);
+
+ if (type == TYPE_LEGACY) {
+ const char *e;
+
+ e = secure_getenv("DBUS_SESSION_BUS_ADDRESS");
+ assert_se(e);
+
+ address = strdup(e);
+ assert_se(address);
+ }
+
+ r = sd_bus_new(&b);
+ assert_se(r >= 0);
+
+ if (type == TYPE_DIRECT) {
+ assert_se(socketpair(AF_UNIX, SOCK_STREAM, 0, pair) >= 0);
+
+ r = sd_bus_set_fd(b, pair[0], pair[0]);
+ assert_se(r >= 0);
+
+ r = sd_bus_set_server(b, true, SD_ID128_NULL);
+ assert_se(r >= 0);
+ } else {
+ r = sd_bus_set_address(b, address);
+ assert_se(r >= 0);
+
+ r = sd_bus_set_bus_client(b, true);
+ assert_se(r >= 0);
+ }
+
+ r = sd_bus_start(b);
+ assert_se(r >= 0);
+
+ if (type != TYPE_DIRECT) {
+ r = sd_bus_get_unique_name(b, &unique);
+ assert_se(r >= 0);
+
+ server_name = strdup(unique);
+ assert_se(server_name);
+ }
+
+ sync();
+ setpriority(PRIO_PROCESS, 0, -19);
+
+ pid = fork();
+ assert_se(pid >= 0);
+
+ if (pid == 0) {
+ CPU_ZERO(&cpuset);
+ CPU_SET(0, &cpuset);
+ pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &cpuset);
+
+ safe_close(bus_ref);
+ sd_bus_unref(b);
+
+ switch (mode) {
+ case MODE_BISECT:
+ client_bisect(address, server_name);
+ break;
+
+ case MODE_CHART:
+ client_chart(type, address, server_name, pair[1]);
+ break;
+ }
+
+ _exit(EXIT_SUCCESS);
+ }
+
+ CPU_ZERO(&cpuset);
+ CPU_SET(1, &cpuset);
+ pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &cpuset);
+
+ server(b, &result);
+
+ if (mode == MODE_BISECT)
+ printf("Copying/memfd are equally fast at %zu bytes\n", result);
+
+ assert_se(waitpid(pid, NULL, 0) == pid);
+
+ safe_close(pair[1]);
+ sd_bus_unref(b);
+
+ return 0;
+}
diff --git a/src/libsystemd/sd-bus/test-bus-chat.c b/src/libsystemd/sd-bus/test-bus-chat.c
new file mode 100644
index 0000000..da1340f
--- /dev/null
+++ b/src/libsystemd/sd-bus/test-bus-chat.c
@@ -0,0 +1,539 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <fcntl.h>
+#include <pthread.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include "sd-bus.h"
+
+#include "alloc-util.h"
+#include "bus-error.h"
+#include "bus-internal.h"
+#include "bus-match.h"
+#include "errno-util.h"
+#include "fd-util.h"
+#include "format-util.h"
+#include "log.h"
+#include "macro.h"
+#include "string-util.h"
+#include "tests.h"
+
+static int match_callback(sd_bus_message *m, void *userdata, sd_bus_error *ret_error) {
+ log_info("Match triggered! destination=%s interface=%s member=%s",
+ strna(sd_bus_message_get_destination(m)),
+ strna(sd_bus_message_get_interface(m)),
+ strna(sd_bus_message_get_member(m)));
+ return 0;
+}
+
+static int object_callback(sd_bus_message *m, void *userdata, sd_bus_error *ret_error) {
+ int r;
+
+ if (sd_bus_message_is_method_error(m, NULL))
+ return 0;
+
+ if (sd_bus_message_is_method_call(m, "org.object.test", "Foobar")) {
+ log_info("Invoked Foobar() on %s", sd_bus_message_get_path(m));
+
+ r = sd_bus_reply_method_return(m, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to send reply: %m");
+
+ return 1;
+ }
+
+ return 0;
+}
+
+static int server_init(sd_bus **ret_bus) {
+ _cleanup_(sd_bus_unrefp) sd_bus *bus = NULL;
+ const char *unique, *desc;
+ sd_id128_t id;
+ int r;
+
+ assert_se(ret_bus);
+
+ r = sd_bus_open_user_with_description(&bus, "my bus!");
+ if (r < 0)
+ return log_error_errno(r, "Failed to connect to user bus: %m");
+
+ r = sd_bus_get_bus_id(bus, &id);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get server ID: %m");
+
+ r = sd_bus_get_unique_name(bus, &unique);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get unique name: %m");
+
+ assert_se(sd_bus_get_description(bus, &desc) >= 0);
+ assert_se(streq(desc, "my bus!"));
+
+ log_info("Peer ID is " SD_ID128_FORMAT_STR ".", SD_ID128_FORMAT_VAL(id));
+ log_info("Unique ID: %s", unique);
+ log_info("Can send file handles: %i", sd_bus_can_send(bus, 'h'));
+
+ r = sd_bus_request_name(bus, "org.freedesktop.systemd.test", 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to acquire name: %m");
+
+ r = sd_bus_add_fallback(bus, NULL, "/foo/bar", object_callback, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add object: %m");
+
+ r = sd_bus_match_signal(bus, NULL, NULL, NULL, "foo.bar", "Notify", match_callback, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to request match: %m");
+
+ r = sd_bus_match_signal(bus, NULL, NULL, NULL, "foo.bar", "NotifyTo", match_callback, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to request match: %m");
+
+ r = sd_bus_add_match(bus, NULL, "type='signal',interface='org.freedesktop.DBus',member='NameOwnerChanged'", match_callback, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add match: %m");
+
+ bus_match_dump(stdout, &bus->match_callbacks, 0);
+
+ *ret_bus = TAKE_PTR(bus);
+ return 0;
+}
+
+static int server(sd_bus *_bus) {
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = ASSERT_PTR(_bus);
+ bool client1_gone = false, client2_gone = false;
+ int r;
+
+ while (!client1_gone || !client2_gone) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ _cleanup_(sd_bus_creds_unrefp) sd_bus_creds *creds = NULL;
+ pid_t pid = 0;
+ const char *label = NULL;
+
+ r = sd_bus_process(bus, &m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to process requests: %m");
+ if (r == 0) {
+ r = sd_bus_wait(bus, UINT64_MAX);
+ if (r < 0)
+ return log_error_errno(r, "Failed to wait: %m");
+
+ continue;
+ }
+ if (!m)
+ continue;
+
+ r = sd_bus_query_sender_creds(m, SD_BUS_CREDS_AUGMENT | SD_BUS_CREDS_PID | SD_BUS_CREDS_SELINUX_CONTEXT, &creds);
+ if (r < 0)
+ log_debug_errno(r, "Failed to query sender credentials, ignoring: %m");
+ else {
+ r = sd_bus_creds_get_pid(creds, &pid);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get sender pid: %m");
+
+ (void) sd_bus_creds_get_selinux_context(creds, &label);
+ }
+
+ log_info("Got message! member=%s pid="PID_FMT" label=%s",
+ strna(sd_bus_message_get_member(m)),
+ pid,
+ strna(label));
+
+ /* sd_bus_message_dump(m); */
+ /* sd_bus_message_rewind(m, true); */
+
+ if (sd_bus_message_is_method_call(m, "org.freedesktop.systemd.test", "LowerCase")) {
+ const char *hello;
+ _cleanup_free_ char *lowercase = NULL;
+
+ r = sd_bus_message_read(m, "s", &hello);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get parameter: %m");
+
+ lowercase = strdup(hello);
+ if (!lowercase)
+ return log_oom();
+
+ ascii_strlower(lowercase);
+
+ r = sd_bus_reply_method_return(m, "s", lowercase);
+ if (r < 0)
+ return log_error_errno(r, "Failed to send reply: %m");
+
+ } else if (sd_bus_message_is_method_call(m, "org.freedesktop.systemd.test", "ExitClient1")) {
+
+ r = sd_bus_reply_method_return(m, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to send reply: %m");
+
+ client1_gone = true;
+ } else if (sd_bus_message_is_method_call(m, "org.freedesktop.systemd.test", "ExitClient2")) {
+
+ r = sd_bus_reply_method_return(m, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to send reply: %m");
+
+ client2_gone = true;
+ } else if (sd_bus_message_is_method_call(m, "org.freedesktop.systemd.test", "Slow")) {
+
+ sleep(1);
+
+ r = sd_bus_reply_method_return(m, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to send reply: %m");
+
+ } else if (sd_bus_message_is_method_call(m, "org.freedesktop.systemd.test", "FileDescriptor")) {
+ int fd;
+ static const char x = 'X';
+
+ r = sd_bus_message_read(m, "h", &fd);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get parameter: %m");
+
+ log_info("Received fd=%d", fd);
+
+ if (write(fd, &x, 1) < 0) {
+ r = log_error_errno(errno, "Failed to write to fd: %m");
+ safe_close(fd);
+ return r;
+ }
+
+ r = sd_bus_reply_method_return(m, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to send reply: %m");
+
+ } else if (sd_bus_message_is_method_call(m, NULL, NULL)) {
+
+ r = sd_bus_reply_method_error(
+ m,
+ &SD_BUS_ERROR_MAKE_CONST(SD_BUS_ERROR_UNKNOWN_METHOD, "Unknown method."));
+ if (r < 0)
+ return log_error_errno(r, "Failed to send reply: %m");
+ }
+ }
+
+ return 0;
+}
+
+static void* client1(void *p) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ const char *hello;
+ int r;
+ _cleanup_close_pair_ int pp[2] = EBADF_PAIR;
+ char x;
+
+ r = sd_bus_open_user(&bus);
+ if (r < 0) {
+ log_error_errno(r, "Failed to connect to user bus: %m");
+ goto finish;
+ }
+
+ r = sd_bus_call_method(
+ bus,
+ "org.freedesktop.systemd.test",
+ "/",
+ "org.freedesktop.systemd.test",
+ "LowerCase",
+ &error,
+ &reply,
+ "s",
+ "HELLO");
+ if (r < 0) {
+ log_error_errno(r, "Failed to issue method call: %m");
+ goto finish;
+ }
+
+ r = sd_bus_message_read(reply, "s", &hello);
+ if (r < 0) {
+ log_error_errno(r, "Failed to get string: %m");
+ goto finish;
+ }
+
+ assert_se(streq(hello, "hello"));
+
+ if (pipe2(pp, O_CLOEXEC|O_NONBLOCK) < 0) {
+ r = log_error_errno(errno, "Failed to allocate pipe: %m");
+ goto finish;
+ }
+
+ log_info("Sending fd=%d", pp[1]);
+
+ r = sd_bus_call_method(
+ bus,
+ "org.freedesktop.systemd.test",
+ "/",
+ "org.freedesktop.systemd.test",
+ "FileDescriptor",
+ &error,
+ NULL,
+ "h",
+ pp[1]);
+ if (r < 0) {
+ log_error_errno(r, "Failed to issue method call: %m");
+ goto finish;
+ }
+
+ errno = 0;
+ if (read(pp[0], &x, 1) <= 0) {
+ log_error("Failed to read from pipe: %s", STRERROR_OR_EOF(errno));
+ goto finish;
+ }
+
+ r = 0;
+
+finish:
+ if (bus) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *q = NULL;
+
+ r = sd_bus_message_new_method_call(
+ bus,
+ &q,
+ "org.freedesktop.systemd.test",
+ "/",
+ "org.freedesktop.systemd.test",
+ "ExitClient1");
+ if (r < 0)
+ log_error_errno(r, "Failed to allocate method call: %m");
+ else
+ sd_bus_send(bus, q, NULL);
+
+ }
+
+ return INT_TO_PTR(r);
+}
+
+static int quit_callback(sd_bus_message *m, void *userdata, sd_bus_error *ret_error) {
+ bool *x = userdata;
+
+ log_error_errno(sd_bus_message_get_errno(m), "Quit callback: %m");
+
+ *x = 1;
+ return 1;
+}
+
+static void* client2(void *p) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL, *reply = NULL;
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ bool quit = false;
+ const char *mid;
+ int r;
+
+ r = sd_bus_open_user(&bus);
+ if (r < 0) {
+ log_error_errno(r, "Failed to connect to user bus: %m");
+ goto finish;
+ }
+
+ r = sd_bus_message_new_method_call(
+ bus,
+ &m,
+ "org.freedesktop.systemd.test",
+ "/foo/bar/waldo/piep",
+ "org.object.test",
+ "Foobar");
+ if (r < 0) {
+ log_error_errno(r, "Failed to allocate method call: %m");
+ goto finish;
+ }
+
+ r = sd_bus_send(bus, m, NULL);
+ if (r < 0) {
+ log_error("Failed to issue method call: %s", bus_error_message(&error, r));
+ goto finish;
+ }
+
+ m = sd_bus_message_unref(m);
+
+ r = sd_bus_message_new_signal(
+ bus,
+ &m,
+ "/foobar",
+ "foo.bar",
+ "Notify");
+ if (r < 0) {
+ log_error_errno(r, "Failed to allocate signal: %m");
+ goto finish;
+ }
+
+ r = sd_bus_send(bus, m, NULL);
+ if (r < 0) {
+ log_error("Failed to issue signal: %s", bus_error_message(&error, r));
+ goto finish;
+ }
+
+ m = sd_bus_message_unref(m);
+
+ r = sd_bus_message_new_signal_to(
+ bus,
+ &m,
+ "org.freedesktop.systemd.test",
+ "/foobar",
+ "foo.bar",
+ "NotifyTo");
+ if (r < 0) {
+ log_error_errno(r, "Failed to allocate signal to: %m");
+ goto finish;
+ }
+
+ r = sd_bus_send(bus, m, NULL);
+ if (r < 0) {
+ log_error("Failed to issue signal to: %s", bus_error_message(&error, r));
+ goto finish;
+ }
+
+ m = sd_bus_message_unref(m);
+
+ r = sd_bus_message_new_method_call(
+ bus,
+ &m,
+ "org.freedesktop.systemd.test",
+ "/",
+ "org.freedesktop.DBus.Peer",
+ "GetMachineId");
+ if (r < 0) {
+ log_error_errno(r, "Failed to allocate method call: %m");
+ goto finish;
+ }
+
+ r = sd_bus_call(bus, m, 0, &error, &reply);
+ if (r < 0) {
+ log_error("Failed to issue method call: %s", bus_error_message(&error, r));
+ goto finish;
+ }
+
+ r = sd_bus_message_read(reply, "s", &mid);
+ if (r < 0) {
+ log_error_errno(r, "Failed to parse machine ID: %m");
+ goto finish;
+ }
+
+ log_info("Machine ID is %s.", mid);
+
+ m = sd_bus_message_unref(m);
+
+ r = sd_bus_message_new_method_call(
+ bus,
+ &m,
+ "org.freedesktop.systemd.test",
+ "/",
+ "org.freedesktop.systemd.test",
+ "Slow");
+ if (r < 0) {
+ log_error_errno(r, "Failed to allocate method call: %m");
+ goto finish;
+ }
+
+ reply = sd_bus_message_unref(reply);
+
+ r = sd_bus_call(bus, m, 200 * USEC_PER_MSEC, &error, &reply);
+ if (r < 0)
+ log_debug("Failed to issue method call: %s", bus_error_message(&error, r));
+ else {
+ log_error("Slow call unexpectedly succeed.");
+ r = -ENOANO;
+ goto finish;
+ }
+
+ m = sd_bus_message_unref(m);
+
+ r = sd_bus_message_new_method_call(
+ bus,
+ &m,
+ "org.freedesktop.systemd.test",
+ "/",
+ "org.freedesktop.systemd.test",
+ "Slow");
+ if (r < 0) {
+ log_error_errno(r, "Failed to allocate method call: %m");
+ goto finish;
+ }
+
+ r = sd_bus_call_async(bus, NULL, m, quit_callback, &quit, 200 * USEC_PER_MSEC);
+ if (r < 0) {
+ log_info("Failed to issue method call: %s", bus_error_message(&error, r));
+ goto finish;
+ }
+
+ while (!quit) {
+ r = sd_bus_process(bus, NULL);
+ if (r < 0) {
+ log_error_errno(r, "Failed to process requests: %m");
+ goto finish;
+ }
+ if (r == 0) {
+ r = sd_bus_wait(bus, UINT64_MAX);
+ if (r < 0) {
+ log_error_errno(r, "Failed to wait: %m");
+ goto finish;
+ }
+ }
+ }
+
+ r = 0;
+
+finish:
+ if (bus) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *q = NULL;
+
+ r = sd_bus_message_new_method_call(
+ bus,
+ &q,
+ "org.freedesktop.systemd.test",
+ "/",
+ "org.freedesktop.systemd.test",
+ "ExitClient2");
+ if (r < 0) {
+ log_error_errno(r, "Failed to allocate method call: %m");
+ goto finish;
+ }
+
+ (void) sd_bus_send(bus, q, NULL);
+ }
+
+ return INT_TO_PTR(r);
+}
+
+int main(int argc, char *argv[]) {
+ pthread_t c1, c2;
+ sd_bus *bus;
+ void *p;
+ int q, r;
+
+ test_setup_logging(LOG_INFO);
+
+ r = server_init(&bus);
+ if (r < 0)
+ return log_tests_skipped("Failed to connect to bus");
+
+ log_info("Initialized...");
+
+ r = pthread_create(&c1, NULL, client1, bus);
+ if (r != 0)
+ return EXIT_FAILURE;
+
+ r = pthread_create(&c2, NULL, client2, bus);
+ if (r != 0)
+ return EXIT_FAILURE;
+
+ r = server(bus);
+
+ q = pthread_join(c1, &p);
+ if (q != 0)
+ return EXIT_FAILURE;
+ if (PTR_TO_INT(p) < 0)
+ return EXIT_FAILURE;
+
+ q = pthread_join(c2, &p);
+ if (q != 0)
+ return EXIT_FAILURE;
+ if (PTR_TO_INT(p) < 0)
+ return EXIT_FAILURE;
+
+ if (r < 0)
+ return EXIT_FAILURE;
+
+ return EXIT_SUCCESS;
+}
diff --git a/src/libsystemd/sd-bus/test-bus-cleanup.c b/src/libsystemd/sd-bus/test-bus-cleanup.c
new file mode 100644
index 0000000..3e14627
--- /dev/null
+++ b/src/libsystemd/sd-bus/test-bus-cleanup.c
@@ -0,0 +1,105 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <stdio.h>
+#include <unistd.h>
+
+#include "sd-bus.h"
+
+#include "bus-internal.h"
+#include "bus-message.h"
+#include "process-util.h"
+#include "tests.h"
+
+static bool use_system_bus = false;
+
+static void test_bus_new(void) {
+ _cleanup_(sd_bus_unrefp) sd_bus *bus = NULL;
+
+ assert_se(sd_bus_new(&bus) == 0);
+ assert_se(bus->n_ref == 1);
+}
+
+static void test_bus_fork(void) {
+ _cleanup_(sd_bus_unrefp) sd_bus *bus = NULL;
+ int r;
+
+ assert_se(sd_bus_new(&bus) == 0);
+ assert_se(bus->n_ref == 1);
+
+ /* Check that after a fork the cleanup functions return NULL */
+ r = safe_fork("(bus-fork-test)", FORK_WAIT|FORK_LOG, NULL);
+ if (r == 0) {
+ assert_se(bus);
+ assert_se(sd_bus_is_ready(bus) == -ECHILD);
+ assert_se(sd_bus_flush_close_unref(bus) == NULL);
+ assert_se(sd_bus_close_unref(bus) == NULL);
+ assert_se(sd_bus_unref(bus) == NULL);
+ sd_bus_close(bus);
+ assert_se(bus->n_ref == 1);
+ _exit(EXIT_SUCCESS);
+ }
+
+ assert_se(r >= 0);
+ assert_se(bus->n_ref == 1);
+}
+
+static int test_bus_open(void) {
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ int r;
+
+ r = sd_bus_open_user(&bus);
+ if (IN_SET(r, -ECONNREFUSED, -ENOENT, -ENOMEDIUM)) {
+ r = sd_bus_open_system(&bus);
+ if (IN_SET(r, -ECONNREFUSED, -ENOENT))
+ return r;
+ use_system_bus = true;
+ }
+
+ assert_se(r >= 0);
+ assert_se(bus->n_ref >= 1); /* we send a hello message when opening, so the count is above 1 */
+
+ return 0;
+}
+
+static void test_bus_new_method_call(void) {
+ sd_bus *bus = NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+
+ assert_se(use_system_bus ? sd_bus_open_system(&bus) >= 0 : sd_bus_open_user(&bus) >= 0);
+
+ assert_se(sd_bus_message_new_method_call(bus, &m, "a.service.name", "/an/object/path", "an.interface.name", "AMethodName") >= 0);
+
+ assert_se(m->n_ref == 1); /* We hold the only reference to the message */
+ assert_se(bus->n_ref >= 2);
+ sd_bus_flush_close_unref(bus);
+ assert_se(m->n_ref == 1);
+}
+
+static void test_bus_new_signal(void) {
+ sd_bus *bus = NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+
+ assert_se(use_system_bus ? sd_bus_open_system(&bus) >= 0 : sd_bus_open_user(&bus) >= 0);
+
+ assert_se(sd_bus_message_new_signal(bus, &m, "/an/object/path", "an.interface.name", "Name") >= 0);
+
+ assert_se(m->n_ref == 1); /* We hold the only reference to the message */
+ assert_se(bus->n_ref >= 2);
+ sd_bus_flush_close_unref(bus);
+ assert_se(m->n_ref == 1);
+}
+
+int main(int argc, char **argv) {
+ test_setup_logging(LOG_INFO);
+
+ test_bus_new();
+ test_bus_fork();
+
+ if (test_bus_open() < 0)
+ return log_tests_skipped("Failed to connect to bus");
+
+ test_bus_new_method_call();
+ test_bus_new_signal();
+
+ return EXIT_SUCCESS;
+}
diff --git a/src/libsystemd/sd-bus/test-bus-creds.c b/src/libsystemd/sd-bus/test-bus-creds.c
new file mode 100644
index 0000000..13801be
--- /dev/null
+++ b/src/libsystemd/sd-bus/test-bus-creds.c
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "sd-bus.h"
+
+#include "bus-dump.h"
+#include "cgroup-util.h"
+#include "tests.h"
+
+int main(int argc, char *argv[]) {
+ _cleanup_(sd_bus_creds_unrefp) sd_bus_creds *creds = NULL;
+ int r;
+
+ test_setup_logging(LOG_DEBUG);
+
+ if (cg_unified() == -ENOMEDIUM)
+ return log_tests_skipped("/sys/fs/cgroup/ not available");
+
+ r = sd_bus_creds_new_from_pid(&creds, 0, _SD_BUS_CREDS_ALL);
+ log_full_errno(r < 0 ? LOG_ERR : LOG_DEBUG, r, "sd_bus_creds_new_from_pid: %m");
+ assert_se(r >= 0);
+
+ bus_creds_dump(creds, NULL, true);
+
+ creds = sd_bus_creds_unref(creds);
+
+ r = sd_bus_creds_new_from_pid(&creds, 1, _SD_BUS_CREDS_ALL);
+ if (r != -EACCES) {
+ assert_se(r >= 0);
+ putchar('\n');
+ bus_creds_dump(creds, NULL, true);
+ }
+
+ return 0;
+}
diff --git a/src/libsystemd/sd-bus/test-bus-error.c b/src/libsystemd/sd-bus/test-bus-error.c
new file mode 100644
index 0000000..a55f3f9
--- /dev/null
+++ b/src/libsystemd/sd-bus/test-bus-error.c
@@ -0,0 +1,294 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "sd-bus.h"
+
+#include "alloc-util.h"
+#include "bus-common-errors.h"
+#include "bus-error.h"
+#include "errno-list.h"
+#include "errno-util.h"
+#include "string-util.h"
+#include "tests.h"
+
+TEST(error) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL, second = SD_BUS_ERROR_NULL;
+ const sd_bus_error const_error = SD_BUS_ERROR_MAKE_CONST(SD_BUS_ERROR_FILE_EXISTS, "const error");
+ const sd_bus_error temporarily_const_error = {
+ .name = SD_BUS_ERROR_ACCESS_DENIED,
+ .message = "oh! no",
+ ._need_free = -1,
+ };
+
+ assert_se(!sd_bus_error_is_set(&error));
+ assert_se(sd_bus_error_set(&error, SD_BUS_ERROR_NOT_SUPPORTED, "xxx") == -EOPNOTSUPP);
+ assert_se(streq(error.name, SD_BUS_ERROR_NOT_SUPPORTED));
+ assert_se(streq(error.message, "xxx"));
+ assert_se(sd_bus_error_has_name(&error, SD_BUS_ERROR_NOT_SUPPORTED));
+ assert_se(sd_bus_error_has_names_sentinel(&error, SD_BUS_ERROR_NOT_SUPPORTED, NULL));
+ assert_se(sd_bus_error_has_names(&error, SD_BUS_ERROR_NOT_SUPPORTED));
+ assert_se(sd_bus_error_has_names(&error, SD_BUS_ERROR_NOT_SUPPORTED, SD_BUS_ERROR_FILE_NOT_FOUND));
+ assert_se(sd_bus_error_has_names(&error, SD_BUS_ERROR_FILE_NOT_FOUND, SD_BUS_ERROR_NOT_SUPPORTED, NULL));
+ assert_se(!sd_bus_error_has_names(&error, SD_BUS_ERROR_FILE_NOT_FOUND));
+ assert_se(sd_bus_error_get_errno(&error) == EOPNOTSUPP);
+ assert_se(sd_bus_error_is_set(&error));
+ sd_bus_error_free(&error);
+
+ /* Check with no error */
+ assert_se(!sd_bus_error_is_set(&error));
+ assert_se(sd_bus_error_setf(&error, NULL, "yyy %i", -1) == 0);
+ assert_se(error.name == NULL);
+ assert_se(error.message == NULL);
+ assert_se(!sd_bus_error_has_name(&error, SD_BUS_ERROR_FILE_NOT_FOUND));
+ assert_se(!sd_bus_error_has_names(&error, SD_BUS_ERROR_FILE_NOT_FOUND));
+ assert_se(sd_bus_error_get_errno(&error) == 0);
+ assert_se(!sd_bus_error_is_set(&error));
+
+ assert_se(sd_bus_error_setf(&error, SD_BUS_ERROR_FILE_NOT_FOUND, "yyy %i", -1) == -ENOENT);
+ assert_se(streq(error.name, SD_BUS_ERROR_FILE_NOT_FOUND));
+ assert_se(streq(error.message, "yyy -1"));
+ assert_se(sd_bus_error_has_name(&error, SD_BUS_ERROR_FILE_NOT_FOUND));
+ assert_se(sd_bus_error_has_names(&error, SD_BUS_ERROR_FILE_NOT_FOUND));
+ assert_se(sd_bus_error_get_errno(&error) == ENOENT);
+ assert_se(sd_bus_error_is_set(&error));
+
+ assert_se(!sd_bus_error_is_set(&second));
+ assert_se(second._need_free == 0);
+ assert_se(error._need_free > 0);
+ assert_se(sd_bus_error_copy(&second, &error) == -ENOENT);
+ assert_se(second._need_free > 0);
+ assert_se(streq(error.name, second.name));
+ assert_se(streq(error.message, second.message));
+ assert_se(sd_bus_error_get_errno(&second) == ENOENT);
+ assert_se(sd_bus_error_has_name(&second, SD_BUS_ERROR_FILE_NOT_FOUND));
+ assert_se(sd_bus_error_has_names(&second, SD_BUS_ERROR_FILE_NOT_FOUND));
+ assert_se(sd_bus_error_is_set(&second));
+
+ sd_bus_error_free(&error);
+ sd_bus_error_free(&second);
+
+ assert_se(!sd_bus_error_is_set(&second));
+ assert_se(const_error._need_free == 0);
+ assert_se(sd_bus_error_copy(&second, &const_error) == -EEXIST);
+ assert_se(second._need_free == 0);
+ assert_se(streq(const_error.name, second.name));
+ assert_se(streq(const_error.message, second.message));
+ assert_se(sd_bus_error_get_errno(&second) == EEXIST);
+ assert_se(sd_bus_error_has_name(&second, SD_BUS_ERROR_FILE_EXISTS));
+ assert_se(sd_bus_error_is_set(&second));
+ sd_bus_error_free(&second);
+
+ assert_se(!sd_bus_error_is_set(&second));
+ assert_se(temporarily_const_error._need_free < 0);
+ assert_se(sd_bus_error_copy(&second, &temporarily_const_error) == -EACCES);
+ assert_se(second._need_free > 0);
+ assert_se(streq(temporarily_const_error.name, second.name));
+ assert_se(streq(temporarily_const_error.message, second.message));
+ assert_se(sd_bus_error_get_errno(&second) == EACCES);
+ assert_se(sd_bus_error_has_name(&second, SD_BUS_ERROR_ACCESS_DENIED));
+ assert_se(sd_bus_error_is_set(&second));
+
+ assert_se(!sd_bus_error_is_set(&error));
+ assert_se(sd_bus_error_set_const(&error, "System.Error.EUCLEAN", "Hallo") == -EUCLEAN);
+ assert_se(streq(error.name, "System.Error.EUCLEAN"));
+ assert_se(streq(error.message, "Hallo"));
+ assert_se(sd_bus_error_has_name(&error, "System.Error.EUCLEAN"));
+ assert_se(sd_bus_error_get_errno(&error) == EUCLEAN);
+ assert_se(sd_bus_error_is_set(&error));
+ sd_bus_error_free(&error);
+
+ assert_se(!sd_bus_error_is_set(&error));
+ assert_se(sd_bus_error_set_errno(&error, EBUSY) == -EBUSY);
+ assert_se(streq(error.name, "System.Error.EBUSY"));
+ assert_se(streq(error.message, STRERROR(EBUSY)));
+ assert_se(sd_bus_error_has_name(&error, "System.Error.EBUSY"));
+ assert_se(sd_bus_error_get_errno(&error) == EBUSY);
+ assert_se(sd_bus_error_is_set(&error));
+ sd_bus_error_free(&error);
+
+ assert_se(!sd_bus_error_is_set(&error));
+ assert_se(sd_bus_error_set_errnof(&error, EIO, "Waldi %c", 'X') == -EIO);
+ assert_se(streq(error.name, SD_BUS_ERROR_IO_ERROR));
+ assert_se(streq(error.message, "Waldi X"));
+ assert_se(sd_bus_error_has_name(&error, SD_BUS_ERROR_IO_ERROR));
+ assert_se(sd_bus_error_get_errno(&error) == EIO);
+ assert_se(sd_bus_error_is_set(&error));
+ sd_bus_error_free(&error);
+
+ /* Check with no error */
+ assert_se(!sd_bus_error_is_set(&error));
+ assert_se(sd_bus_error_set_errnof(&error, 0, "Waldi %c", 'X') == 0);
+ assert_se(error.name == NULL);
+ assert_se(error.message == NULL);
+ assert_se(!sd_bus_error_has_name(&error, SD_BUS_ERROR_IO_ERROR));
+ assert_se(sd_bus_error_get_errno(&error) == 0);
+ assert_se(!sd_bus_error_is_set(&error));
+}
+
+extern const sd_bus_error_map __start_SYSTEMD_BUS_ERROR_MAP[];
+extern const sd_bus_error_map __stop_SYSTEMD_BUS_ERROR_MAP[];
+
+static int dump_mapping_table(void) {
+ const sd_bus_error_map *m;
+
+ printf("----- errno mappings ------\n");
+ m = ALIGN_PTR(__start_SYSTEMD_BUS_ERROR_MAP);
+ while (m < __stop_SYSTEMD_BUS_ERROR_MAP) {
+
+ if (m->code == BUS_ERROR_MAP_END_MARKER) {
+ m = ALIGN_PTR(m + 1);
+ continue;
+ }
+
+ printf("%s -> %i/%s\n", strna(m->name), m->code, strna(errno_to_name(m->code)));
+ m++;
+ }
+ printf("---------------------------\n");
+
+ return EXIT_SUCCESS;
+}
+
+TEST(errno_mapping_standard) {
+ assert_se(sd_bus_error_set(NULL, "System.Error.EUCLEAN", NULL) == -EUCLEAN);
+ assert_se(sd_bus_error_set(NULL, "System.Error.EBUSY", NULL) == -EBUSY);
+ assert_se(sd_bus_error_set(NULL, "System.Error.EINVAL", NULL) == -EINVAL);
+ assert_se(sd_bus_error_set(NULL, "System.Error.WHATSIT", NULL) == -EIO);
+}
+
+BUS_ERROR_MAP_ELF_REGISTER const sd_bus_error_map test_errors[] = {
+ SD_BUS_ERROR_MAP("org.freedesktop.custom-dbus-error", 5),
+ SD_BUS_ERROR_MAP("org.freedesktop.custom-dbus-error-2", 52),
+ SD_BUS_ERROR_MAP_END
+};
+
+BUS_ERROR_MAP_ELF_REGISTER const sd_bus_error_map test_errors2[] = {
+ SD_BUS_ERROR_MAP("org.freedesktop.custom-dbus-error-3", 33),
+ SD_BUS_ERROR_MAP("org.freedesktop.custom-dbus-error-4", 44),
+ SD_BUS_ERROR_MAP("org.freedesktop.custom-dbus-error-33", 333),
+ SD_BUS_ERROR_MAP_END
+};
+
+static const sd_bus_error_map test_errors3[] = {
+ SD_BUS_ERROR_MAP("org.freedesktop.custom-dbus-error-88", 888),
+ SD_BUS_ERROR_MAP("org.freedesktop.custom-dbus-error-99", 999),
+ SD_BUS_ERROR_MAP_END
+};
+
+static const sd_bus_error_map test_errors4[] = {
+ SD_BUS_ERROR_MAP("org.freedesktop.custom-dbus-error-77", 777),
+ SD_BUS_ERROR_MAP("org.freedesktop.custom-dbus-error-78", 778),
+ SD_BUS_ERROR_MAP_END
+};
+
+static const sd_bus_error_map test_errors_bad1[] = {
+ SD_BUS_ERROR_MAP("org.freedesktop.custom-dbus-error-1", 0),
+ SD_BUS_ERROR_MAP_END
+};
+
+static const sd_bus_error_map test_errors_bad2[] = {
+ SD_BUS_ERROR_MAP("org.freedesktop.custom-dbus-error-1", -1),
+ SD_BUS_ERROR_MAP_END
+};
+
+TEST(errno_mapping_custom) {
+ assert_se(sd_bus_error_set(NULL, "org.freedesktop.custom-dbus-error", NULL) == -5);
+ assert_se(sd_bus_error_set(NULL, "org.freedesktop.custom-dbus-error-2", NULL) == -52);
+ assert_se(sd_bus_error_set(NULL, "org.freedesktop.custom-dbus-error-x", NULL) == -EIO);
+ assert_se(sd_bus_error_set(NULL, "org.freedesktop.custom-dbus-error-33", NULL) == -333);
+
+ assert_se(sd_bus_error_set(NULL, "org.freedesktop.custom-dbus-error-88", NULL) == -EIO);
+ assert_se(sd_bus_error_set(NULL, "org.freedesktop.custom-dbus-error-99", NULL) == -EIO);
+ assert_se(sd_bus_error_set(NULL, "org.freedesktop.custom-dbus-error-77", NULL) == -EIO);
+
+ assert_se(sd_bus_error_add_map(test_errors3) > 0);
+ assert_se(sd_bus_error_set(NULL, "org.freedesktop.custom-dbus-error-88", NULL) == -888);
+ assert_se(sd_bus_error_add_map(test_errors4) > 0);
+ assert_se(sd_bus_error_add_map(test_errors4) == 0);
+ assert_se(sd_bus_error_add_map(test_errors3) == 0);
+
+ assert_se(sd_bus_error_set(NULL, "org.freedesktop.custom-dbus-error-99", NULL) == -999);
+ assert_se(sd_bus_error_set(NULL, "org.freedesktop.custom-dbus-error-77", NULL) == -777);
+ assert_se(sd_bus_error_set(NULL, "org.freedesktop.custom-dbus-error-78", NULL) == -778);
+ assert_se(sd_bus_error_set(NULL, "org.freedesktop.custom-dbus-error-2", NULL) == -52);
+ assert_se(sd_bus_error_set(NULL, "org.freedesktop.custom-dbus-error-y", NULL) == -EIO);
+
+ assert_se(sd_bus_error_set(NULL, BUS_ERROR_NO_SUCH_UNIT, NULL) == -ENOENT);
+
+ assert_se(sd_bus_error_add_map(test_errors_bad1) == -EINVAL);
+ assert_se(sd_bus_error_add_map(test_errors_bad2) == -EINVAL);
+}
+
+TEST(sd_bus_error_set_errnof) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_free_ char *str = NULL;
+
+ assert_se(sd_bus_error_set_errnof(NULL, 0, NULL) == 0);
+ assert_se(sd_bus_error_set_errnof(NULL, ENOANO, NULL) == -ENOANO);
+
+ assert_se(sd_bus_error_set_errnof(&error, 0, NULL) == 0);
+ assert_se(!bus_error_is_dirty(&error));
+
+ assert_se(sd_bus_error_set_errnof(&error, EACCES, NULL) == -EACCES);
+ assert_se(sd_bus_error_has_name(&error, SD_BUS_ERROR_ACCESS_DENIED));
+ errno = EACCES;
+ assert_se(asprintf(&str, "%m") >= 0);
+ assert_se(streq(error.message, str));
+ assert_se(error._need_free == 0);
+
+ str = mfree(str);
+ sd_bus_error_free(&error);
+
+ assert_se(sd_bus_error_set_errnof(&error, ENOANO, NULL) == -ENOANO);
+ assert_se(sd_bus_error_has_name(&error, "System.Error.ENOANO"));
+ errno = ENOANO;
+ assert_se(asprintf(&str, "%m") >= 0);
+ assert_se(streq(error.message, str));
+ assert_se(error._need_free == 1);
+
+ str = mfree(str);
+ sd_bus_error_free(&error);
+
+ assert_se(sd_bus_error_set_errnof(&error, 100000, NULL) == -100000);
+ assert_se(sd_bus_error_has_name(&error, SD_BUS_ERROR_FAILED));
+ errno = 100000;
+ assert_se(asprintf(&str, "%m") >= 0);
+ assert_se(streq(error.message, str));
+ assert_se(error._need_free == 1);
+
+ str = mfree(str);
+ sd_bus_error_free(&error);
+
+ assert_se(sd_bus_error_set_errnof(NULL, 0, "hoge %s: %m", "foo") == 0);
+ assert_se(sd_bus_error_set_errnof(NULL, ENOANO, "hoge %s: %m", "foo") == -ENOANO);
+
+ assert_se(sd_bus_error_set_errnof(&error, 0, "hoge %s: %m", "foo") == 0);
+ assert_se(!bus_error_is_dirty(&error));
+
+ assert_se(sd_bus_error_set_errnof(&error, EACCES, "hoge %s: %m", "foo") == -EACCES);
+ assert_se(sd_bus_error_has_name(&error, SD_BUS_ERROR_ACCESS_DENIED));
+ errno = EACCES;
+ assert_se(asprintf(&str, "hoge %s: %m", "foo") >= 0);
+ assert_se(streq(error.message, str));
+ assert_se(error._need_free == 1);
+
+ str = mfree(str);
+ sd_bus_error_free(&error);
+
+ assert_se(sd_bus_error_set_errnof(&error, ENOANO, "hoge %s: %m", "foo") == -ENOANO);
+ assert_se(sd_bus_error_has_name(&error, "System.Error.ENOANO"));
+ errno = ENOANO;
+ assert_se(asprintf(&str, "hoge %s: %m", "foo") >= 0);
+ assert_se(streq(error.message, str));
+ assert_se(error._need_free == 1);
+
+ str = mfree(str);
+ sd_bus_error_free(&error);
+
+ assert_se(sd_bus_error_set_errnof(&error, 100000, "hoge %s: %m", "foo") == -100000);
+ assert_se(sd_bus_error_has_name(&error, SD_BUS_ERROR_FAILED));
+ errno = 100000;
+ assert_se(asprintf(&str, "hoge %s: %m", "foo") >= 0);
+ assert_se(streq(error.message, str));
+ assert_se(error._need_free == 1);
+}
+
+DEFINE_TEST_MAIN_WITH_INTRO(LOG_INFO, dump_mapping_table);
diff --git a/src/libsystemd/sd-bus/test-bus-introspect.c b/src/libsystemd/sd-bus/test-bus-introspect.c
new file mode 100644
index 0000000..3c026ae
--- /dev/null
+++ b/src/libsystemd/sd-bus/test-bus-introspect.c
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "bus-introspect.h"
+#include "log.h"
+#include "tests.h"
+
+#include "test-vtable-data.h"
+
+static void test_manual_introspection_one(const sd_bus_vtable vtable[]) {
+ struct introspect intro = {};
+ _cleanup_free_ char *s = NULL;
+
+ log_info("/* %s */", __func__);
+
+ assert_se(introspect_begin(&intro, false) >= 0);
+
+ assert_se(introspect_write_interface(&intro, "org.foo", vtable) >= 0);
+ /* write again to check if output looks OK for a different interface */
+ assert_se(introspect_write_interface(&intro, "org.foo.bar", vtable) >= 0);
+ assert_se(introspect_finish(&intro, &s) == 0);
+
+ fputs(s, stdout);
+ fputs("\n", stdout);
+}
+
+TEST(manual_introspection) {
+ test_manual_introspection_one(test_vtable_1);
+ test_manual_introspection_one(test_vtable_2);
+ test_manual_introspection_one(test_vtable_deprecated);
+ test_manual_introspection_one((const sd_bus_vtable *) vtable_format_221);
+}
+
+DEFINE_TEST_MAIN(LOG_DEBUG);
diff --git a/src/libsystemd/sd-bus/test-bus-marshal.c b/src/libsystemd/sd-bus/test-bus-marshal.c
new file mode 100644
index 0000000..0044d33
--- /dev/null
+++ b/src/libsystemd/sd-bus/test-bus-marshal.c
@@ -0,0 +1,418 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <math.h>
+#include <stdlib.h>
+
+#if HAVE_GLIB
+#include <gio/gio.h>
+#endif
+
+#if HAVE_DBUS
+#include <dbus/dbus.h>
+#endif
+
+#include "sd-bus.h"
+
+#include "alloc-util.h"
+#include "bus-dump.h"
+#include "bus-label.h"
+#include "bus-message.h"
+#include "bus-util.h"
+#include "escape.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "log.h"
+#include "memstream-util.h"
+#include "tests.h"
+
+static void test_bus_path_encode_unique(void) {
+ _cleanup_free_ char *a = NULL, *b = NULL, *c = NULL, *d = NULL, *e = NULL;
+
+ assert_se(bus_path_encode_unique(NULL, "/foo/bar", "some.sender", "a.suffix", &a) >= 0 && streq_ptr(a, "/foo/bar/some_2esender/a_2esuffix"));
+ assert_se(bus_path_decode_unique(a, "/foo/bar", &b, &c) > 0 && streq_ptr(b, "some.sender") && streq_ptr(c, "a.suffix"));
+ assert_se(bus_path_decode_unique(a, "/bar/foo", &d, &d) == 0 && !d);
+ assert_se(bus_path_decode_unique("/foo/bar/onlyOneSuffix", "/foo/bar", &d, &d) == 0 && !d);
+ assert_se(bus_path_decode_unique("/foo/bar/_/_", "/foo/bar", &d, &e) > 0 && streq_ptr(d, "") && streq_ptr(e, ""));
+}
+
+static void test_bus_path_encode(void) {
+ _cleanup_free_ char *a = NULL, *b = NULL, *c = NULL, *d = NULL, *e = NULL, *f = NULL, *g = NULL;
+
+ assert_se(sd_bus_path_encode("/foo/bar", "waldo", &a) >= 0 && streq(a, "/foo/bar/waldo"));
+ assert_se(sd_bus_path_decode(a, "/waldo", &b) == 0 && b == NULL);
+ assert_se(sd_bus_path_decode(a, "/foo/bar", &b) > 0 && streq(b, "waldo"));
+
+ assert_se(sd_bus_path_encode("xxxx", "waldo", &c) < 0);
+ assert_se(sd_bus_path_encode("/foo/", "waldo", &c) < 0);
+
+ assert_se(sd_bus_path_encode("/foo/bar", "", &c) >= 0 && streq(c, "/foo/bar/_"));
+ assert_se(sd_bus_path_decode(c, "/foo/bar", &d) > 0 && streq(d, ""));
+
+ assert_se(sd_bus_path_encode("/foo/bar", "foo.bar", &e) >= 0 && streq(e, "/foo/bar/foo_2ebar"));
+ assert_se(sd_bus_path_decode(e, "/foo/bar", &f) > 0 && streq(f, "foo.bar"));
+
+ assert_se(sd_bus_path_decode("/waldo", "/waldo", &g) > 0 && streq(g, ""));
+}
+
+static void test_bus_path_encode_many(void) {
+ _cleanup_free_ char *a = NULL, *b = NULL, *c = NULL, *d = NULL, *e = NULL, *f = NULL;
+
+ assert_se(sd_bus_path_decode_many("/foo/bar", "/prefix/%", NULL) == 0);
+ assert_se(sd_bus_path_decode_many("/prefix/bar", "/prefix/%bar", NULL) == 1);
+ assert_se(sd_bus_path_decode_many("/foo/bar", "/prefix/%/suffix", NULL) == 0);
+ assert_se(sd_bus_path_decode_many("/prefix/foobar/suffix", "/prefix/%/suffix", &a) == 1 && streq_ptr(a, "foobar"));
+ assert_se(sd_bus_path_decode_many("/prefix/one_foo_two/mid/three_bar_four/suffix", "/prefix/one_%_two/mid/three_%_four/suffix", &b, &c) == 1 && streq_ptr(b, "foo") && streq_ptr(c, "bar"));
+ assert_se(sd_bus_path_decode_many("/prefix/one_foo_two/mid/three_bar_four/suffix", "/prefix/one_%_two/mid/three_%_four/suffix", NULL, &d) == 1 && streq_ptr(d, "bar"));
+
+ assert_se(sd_bus_path_decode_many("/foo/bar", "/foo/bar/%", NULL) == 0);
+ assert_se(sd_bus_path_decode_many("/foo/bar/suffix", "/foo/bar%", NULL) == 0);
+ assert_se(sd_bus_path_decode_many("/foo/bar/suffix", "/foo/%/bar", NULL) == 0);
+ assert_se(sd_bus_path_decode_many("/foo/bar/suffix", "/foo/%bar", NULL) == 0);
+ assert_se(sd_bus_path_decode_many("/foo/bar/suffix", "/foo/bar/suffix") == 1);
+ assert_se(sd_bus_path_decode_many("/foo/bar/suffix", "/foo/%%/suffix", NULL, NULL) == 0); /* multiple '%' are treated verbatim */
+ assert_se(sd_bus_path_decode_many("/foo/bar/suffix", "/foo/%/suffi", NULL) == 0);
+ assert_se(sd_bus_path_decode_many("/foo/bar/suffix", "/foo/%/suffix", &e) == 1 && streq_ptr(e, "bar"));
+ assert_se(sd_bus_path_decode_many("/foo/bar/suffix", "/foo/%/%", NULL, NULL) == 1);
+ assert_se(sd_bus_path_decode_many("/foo/bar/suffix", "/%/%/%", NULL, NULL, NULL) == 1);
+ assert_se(sd_bus_path_decode_many("/foo/bar/suffix", "%/%/%", NULL, NULL, NULL) == 0);
+ assert_se(sd_bus_path_decode_many("/foo/bar/suffix", "/%/%", NULL, NULL) == 0);
+ assert_se(sd_bus_path_decode_many("/foo/bar/suffix", "/%/%/", NULL, NULL) == 0);
+ assert_se(sd_bus_path_decode_many("/foo/bar/suffix", "/%/", NULL) == 0);
+ assert_se(sd_bus_path_decode_many("/foo/bar/suffix", "/%", NULL) == 0);
+ assert_se(sd_bus_path_decode_many("/foo/bar/suffix", "%", NULL) == 0);
+
+ assert_se(sd_bus_path_encode_many(&f, "/prefix/one_%_two/mid/three_%_four/suffix", "foo", "bar") >= 0 && streq_ptr(f, "/prefix/one_foo_two/mid/three_bar_four/suffix"));
+}
+
+static void test_bus_label_escape_one(const char *a, const char *b) {
+ _cleanup_free_ char *t = NULL, *x = NULL, *y = NULL;
+
+ assert_se(t = bus_label_escape(a));
+ assert_se(streq(t, b));
+
+ assert_se(x = bus_label_unescape(t));
+ assert_se(streq(a, x));
+
+ assert_se(y = bus_label_unescape(b));
+ assert_se(streq(a, y));
+}
+
+static void test_bus_label_escape(void) {
+ test_bus_label_escape_one("foo123bar", "foo123bar");
+ test_bus_label_escape_one("foo.bar", "foo_2ebar");
+ test_bus_label_escape_one("foo_2ebar", "foo_5f2ebar");
+ test_bus_label_escape_one("", "_");
+ test_bus_label_escape_one("_", "_5f");
+ test_bus_label_escape_one("1", "_31");
+ test_bus_label_escape_one(":1", "_3a1");
+}
+
+int main(int argc, char *argv[]) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL, *copy = NULL;
+ _cleanup_free_ char *h = NULL, *first = NULL, *second = NULL, *third = NULL;
+ const int32_t integer_array[] = { -1, -2, 0, 1, 2 }, *return_array;
+ const char *x, *x2, *y, *z, *a, *b, *c, *d, *a_signature;
+ size_t sz, first_size, second_size = 0, third_size = 0;
+ _cleanup_(sd_bus_unrefp) sd_bus *bus = NULL;
+ _cleanup_(memstream_done) MemStream ms = {};
+ void *buffer = NULL;
+ int r, boolean;
+ uint64_t u64;
+ uint8_t u, v;
+ double dbl;
+ FILE *mf;
+ char *s;
+
+ test_setup_logging(LOG_INFO);
+
+ r = sd_bus_default_user(&bus);
+ if (r < 0)
+ r = sd_bus_default_system(&bus);
+ if (r < 0)
+ return log_tests_skipped("Failed to connect to bus");
+
+ r = sd_bus_message_new_method_call(bus, &m, "foobar.waldo", "/", "foobar.waldo", "Piep");
+ assert_se(r >= 0);
+
+ r = sd_bus_message_append(m, "");
+ assert_se(r >= 0);
+
+ r = sd_bus_message_append(m, "s", "a string");
+ assert_se(r >= 0);
+
+ r = sd_bus_message_append(m, "s", NULL);
+ assert_se(r >= 0);
+
+ r = sd_bus_message_append(m, "asg", 2, "string #1", "string #2", "sba(tt)ss");
+ assert_se(r >= 0);
+
+ r = sd_bus_message_append(m, "sass", "foobar", 5, "foo", "bar", "waldo", "piep", "pap", "after");
+ assert_se(r >= 0);
+
+ r = sd_bus_message_append(m, "a{yv}", 2, 3, "s", "foo", 5, "s", "waldo");
+ assert_se(r >= 0);
+
+ r = sd_bus_message_append(m, "y(ty)y(yt)y", 8, 777ULL, 7, 9, 77, 7777ULL, 10);
+ assert_se(r >= 0);
+
+ r = sd_bus_message_append(m, "()");
+ assert_se(r == -EINVAL);
+
+ r = sd_bus_message_append(m, "ba(ss)", 255, 3, "aaa", "1", "bbb", "2", "ccc", "3");
+ assert_se(r >= 0);
+
+ r = sd_bus_message_open_container(m, 'a', "s");
+ assert_se(r >= 0);
+
+ r = sd_bus_message_append_basic(m, 's', "foobar");
+ assert_se(r >= 0);
+
+ r = sd_bus_message_append_basic(m, 's', "waldo");
+ assert_se(r >= 0);
+
+ r = sd_bus_message_close_container(m);
+ assert_se(r >= 0);
+
+ r = sd_bus_message_append_string_space(m, 5, &s);
+ assert_se(r >= 0);
+ strcpy(s, "hallo");
+
+ r = sd_bus_message_append_array(m, 'i', integer_array, sizeof(integer_array));
+ assert_se(r >= 0);
+
+ r = sd_bus_message_append_array(m, 'u', NULL, 0);
+ assert_se(r >= 0);
+
+ r = sd_bus_message_append(m, "a(stdo)", 1, "foo", 815ULL, 47.0, "/");
+ assert_se(r >= 0);
+
+ r = sd_bus_message_seal(m, 4711, 0);
+ assert_se(r >= 0);
+
+ sd_bus_message_dump(m, stdout, SD_BUS_MESSAGE_DUMP_WITH_HEADER);
+
+ assert_se(mf = memstream_init(&ms));
+ sd_bus_message_dump(m, mf, 0);
+ assert_se(memstream_finalize(&ms, &first, &first_size) >= 0);
+
+ r = bus_message_get_blob(m, &buffer, &sz);
+ assert_se(r >= 0);
+
+ h = cescape_length(buffer, sz);
+ assert_se(h);
+ log_info("message size = %zu, contents =\n%s", sz, h);
+
+#if HAVE_GLIB
+ /* Work-around for asan bug. See c8d980a3e962aba2ea3a4cedf75fa94890a6d746. */
+#if !HAS_FEATURE_ADDRESS_SANITIZER
+ {
+ GDBusMessage *g;
+ char *p;
+
+#if !defined(GLIB_VERSION_2_36)
+ g_type_init();
+#endif
+
+ g = g_dbus_message_new_from_blob(buffer, sz, 0, NULL);
+ p = g_dbus_message_print(g, 0);
+ log_info("%s", p);
+ g_free(p);
+ g_object_unref(g);
+ }
+#endif
+#endif
+
+#if HAVE_DBUS
+ {
+ DBusMessage *w;
+ DBusError error;
+
+ dbus_error_init(&error);
+
+ w = dbus_message_demarshal(buffer, sz, &error);
+ if (!w)
+ log_error("%s", error.message);
+ else
+ dbus_message_unref(w);
+
+ dbus_error_free(&error);
+ }
+#endif
+
+ m = sd_bus_message_unref(m);
+
+ r = bus_message_from_malloc(bus, buffer, sz, NULL, 0, NULL, &m);
+ assert_se(r >= 0);
+
+ sd_bus_message_dump(m, stdout, SD_BUS_MESSAGE_DUMP_WITH_HEADER);
+
+ assert_se(mf = memstream_init(&ms));
+ sd_bus_message_dump(m, mf, 0);
+ assert_se(memstream_finalize(&ms, &second, &second_size) >= 0);
+ assert_se(first_size == second_size);
+ assert_se(memcmp(first, second, first_size) == 0);
+
+ assert_se(sd_bus_message_rewind(m, true) >= 0);
+
+ r = sd_bus_message_read(m, "ssasg", &x, &x2, 2, &y, &z, &a_signature);
+ assert_se(r > 0);
+ assert_se(streq(x, "a string"));
+ assert_se(streq(x2, ""));
+ assert_se(streq(y, "string #1"));
+ assert_se(streq(z, "string #2"));
+ assert_se(streq(a_signature, "sba(tt)ss"));
+
+ r = sd_bus_message_read(m, "sass", &x, 5, &y, &z, &a, &b, &c, &d);
+ assert_se(r > 0);
+ assert_se(streq(x, "foobar"));
+ assert_se(streq(y, "foo"));
+ assert_se(streq(z, "bar"));
+ assert_se(streq(a, "waldo"));
+ assert_se(streq(b, "piep"));
+ assert_se(streq(c, "pap"));
+ assert_se(streq(d, "after"));
+
+ r = sd_bus_message_read(m, "a{yv}", 2, &u, "s", &x, &v, "s", &y);
+ assert_se(r > 0);
+ assert_se(u == 3);
+ assert_se(streq(x, "foo"));
+ assert_se(v == 5);
+ assert_se(streq(y, "waldo"));
+
+ r = sd_bus_message_read(m, "y(ty)", &v, &u64, &u);
+ assert_se(r > 0);
+ assert_se(v == 8);
+ assert_se(u64 == 777);
+ assert_se(u == 7);
+
+ r = sd_bus_message_read(m, "y(yt)", &v, &u, &u64);
+ assert_se(r > 0);
+ assert_se(v == 9);
+ assert_se(u == 77);
+ assert_se(u64 == 7777);
+
+ r = sd_bus_message_read(m, "y", &v);
+ assert_se(r > 0);
+ assert_se(v == 10);
+
+ r = sd_bus_message_read(m, "()");
+ assert_se(r < 0);
+
+ r = sd_bus_message_read(m, "ba(ss)", &boolean, 3, &x, &y, &a, &b, &c, &d);
+ assert_se(r > 0);
+ assert_se(boolean);
+ assert_se(streq(x, "aaa"));
+ assert_se(streq(y, "1"));
+ assert_se(streq(a, "bbb"));
+ assert_se(streq(b, "2"));
+ assert_se(streq(c, "ccc"));
+ assert_se(streq(d, "3"));
+
+ assert_se(sd_bus_message_verify_type(m, 'a', "s") > 0);
+
+ r = sd_bus_message_read(m, "as", 2, &x, &y);
+ assert_se(r > 0);
+ assert_se(streq(x, "foobar"));
+ assert_se(streq(y, "waldo"));
+
+ r = sd_bus_message_read_basic(m, 's', &s);
+ assert_se(r > 0);
+ assert_se(streq(s, "hallo"));
+
+ r = sd_bus_message_read_array(m, 'i', (const void**) &return_array, &sz);
+ assert_se(r > 0);
+ assert_se(sz == sizeof(integer_array));
+ assert_se(memcmp(integer_array, return_array, sz) == 0);
+
+ r = sd_bus_message_read_array(m, 'u', (const void**) &return_array, &sz);
+ assert_se(r > 0);
+ assert_se(sz == 0);
+
+ r = sd_bus_message_read(m, "a(stdo)", 1, &x, &u64, &dbl, &y);
+ assert_se(r > 0);
+ assert_se(streq(x, "foo"));
+ assert_se(u64 == 815ULL);
+ assert_se(fabs(dbl - 47.0) < 0.1);
+ assert_se(streq(y, "/"));
+
+ r = sd_bus_message_peek_type(m, NULL, NULL);
+ assert_se(r == 0);
+
+ r = sd_bus_message_new_method_call(bus, &copy, "foobar.waldo", "/", "foobar.waldo", "Piep");
+ assert_se(r >= 0);
+
+ r = sd_bus_message_rewind(m, true);
+ assert_se(r >= 0);
+
+ r = sd_bus_message_copy(copy, m, true);
+ assert_se(r >= 0);
+
+ r = sd_bus_message_seal(copy, 4712, 0);
+ assert_se(r >= 0);
+
+ assert_se(mf = memstream_init(&ms));
+ sd_bus_message_dump(copy, mf, 0);
+ assert_se(memstream_finalize(&ms, &third, &third_size) >= 0);
+
+ printf("<%.*s>\n", (int) first_size, first);
+ printf("<%.*s>\n", (int) third_size, third);
+
+ assert_se(first_size == third_size);
+ assert_se(memcmp(first, third, third_size) == 0);
+
+ r = sd_bus_message_rewind(m, true);
+ assert_se(r >= 0);
+
+ assert_se(sd_bus_message_verify_type(m, 's', NULL) > 0);
+
+ r = sd_bus_message_skip(m, "ssasg");
+ assert_se(r > 0);
+
+ assert_se(sd_bus_message_verify_type(m, 's', NULL) > 0);
+
+ r = sd_bus_message_skip(m, "sass");
+ assert_se(r >= 0);
+
+ assert_se(sd_bus_message_verify_type(m, 'a', "{yv}") > 0);
+
+ r = sd_bus_message_skip(m, "a{yv}y(ty)y(yt)y");
+ assert_se(r >= 0);
+
+ assert_se(sd_bus_message_verify_type(m, 'b', NULL) > 0);
+
+ r = sd_bus_message_read(m, "b", &boolean);
+ assert_se(r > 0);
+ assert_se(boolean);
+
+ r = sd_bus_message_enter_container(m, 0, NULL);
+ assert_se(r > 0);
+
+ r = sd_bus_message_read(m, "(ss)", &x, &y);
+ assert_se(r > 0);
+
+ r = sd_bus_message_read(m, "(ss)", &a, &b);
+ assert_se(r > 0);
+
+ r = sd_bus_message_read(m, "(ss)", &c, &d);
+ assert_se(r > 0);
+
+ r = sd_bus_message_read(m, "(ss)", &x, &y);
+ assert_se(r == 0);
+
+ r = sd_bus_message_exit_container(m);
+ assert_se(r >= 0);
+
+ assert_se(streq(x, "aaa"));
+ assert_se(streq(y, "1"));
+ assert_se(streq(a, "bbb"));
+ assert_se(streq(b, "2"));
+ assert_se(streq(c, "ccc"));
+ assert_se(streq(d, "3"));
+
+ test_bus_label_escape();
+ test_bus_path_encode();
+ test_bus_path_encode_unique();
+ test_bus_path_encode_many();
+
+ return 0;
+}
diff --git a/src/libsystemd/sd-bus/test-bus-match.c b/src/libsystemd/sd-bus/test-bus-match.c
new file mode 100644
index 0000000..2d77557
--- /dev/null
+++ b/src/libsystemd/sd-bus/test-bus-match.c
@@ -0,0 +1,145 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "bus-match.h"
+#include "bus-message.h"
+#include "bus-slot.h"
+#include "log.h"
+#include "macro.h"
+#include "memory-util.h"
+#include "tests.h"
+
+static bool mask[32];
+
+static int filter(sd_bus_message *m, void *userdata, sd_bus_error *ret_error) {
+ log_info("Ran %u", PTR_TO_UINT(userdata));
+ assert_se(PTR_TO_UINT(userdata) < ELEMENTSOF(mask));
+ mask[PTR_TO_UINT(userdata)] = true;
+ return 0;
+}
+
+static bool mask_contains(unsigned a[], unsigned n) {
+ unsigned i, j;
+
+ for (i = 0; i < ELEMENTSOF(mask); i++) {
+ bool found = false;
+
+ for (j = 0; j < n; j++)
+ if (a[j] == i) {
+ found = true;
+ break;
+ }
+
+ if (found != mask[i])
+ return false;
+ }
+
+ return true;
+}
+
+static int match_add(sd_bus_slot *slots, struct bus_match_node *root, const char *match, int value) {
+ struct bus_match_component *components;
+ size_t n_components;
+ sd_bus_slot *s;
+ int r;
+
+ s = slots + value;
+
+ r = bus_match_parse(match, &components, &n_components);
+ if (r < 0)
+ return r;
+
+ CLEANUP_ARRAY(components, n_components, bus_match_parse_free);
+
+ s->userdata = INT_TO_PTR(value);
+ s->match_callback.callback = filter;
+
+ return bus_match_add(root, components, n_components, &s->match_callback);
+}
+
+static void test_match_scope(const char *match, enum bus_match_scope scope) {
+ struct bus_match_component *components = NULL;
+ size_t n_components = 0;
+
+ CLEANUP_ARRAY(components, n_components, bus_match_parse_free);
+
+ assert_se(bus_match_parse(match, &components, &n_components) >= 0);
+ assert_se(bus_match_get_scope(components, n_components) == scope);
+}
+
+int main(int argc, char *argv[]) {
+ struct bus_match_node root = {
+ .type = BUS_MATCH_ROOT,
+ };
+
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ sd_bus_slot slots[19] = {};
+ int r;
+
+ test_setup_logging(LOG_INFO);
+
+ r = sd_bus_open_user(&bus);
+ if (r < 0)
+ r = sd_bus_open_system(&bus);
+ if (r < 0)
+ return log_tests_skipped("Failed to connect to bus");
+
+ assert_se(match_add(slots, &root, "arg2='wal\\'do',sender='foo',type='signal',interface='bar.x',", 1) >= 0);
+ assert_se(match_add(slots, &root, "arg2='wal\\'do2',sender='foo',type='signal',interface='bar.x',", 2) >= 0);
+ assert_se(match_add(slots, &root, "arg3='test',sender='foo',type='signal',interface='bar.x',", 3) >= 0);
+ assert_se(match_add(slots, &root, "arg3='test',sender='foo',type='method_call',interface='bar.x',", 4) >= 0);
+ assert_se(match_add(slots, &root, "", 5) >= 0);
+ assert_se(match_add(slots, &root, "interface='quux.x'", 6) >= 0);
+ assert_se(match_add(slots, &root, "interface='bar.x'", 7) >= 0);
+ assert_se(match_add(slots, &root, "member='waldo',path='/foo/bar'", 8) >= 0);
+ assert_se(match_add(slots, &root, "path='/foo/bar'", 9) >= 0);
+ assert_se(match_add(slots, &root, "path_namespace='/foo'", 10) >= 0);
+ assert_se(match_add(slots, &root, "path_namespace='/foo/quux'", 11) >= 0);
+ assert_se(match_add(slots, &root, "arg1='two'", 12) >= 0);
+ assert_se(match_add(slots, &root, "member='waldo',arg2path='/prefix/'", 13) >= 0);
+ assert_se(match_add(slots, &root, "member=waldo,path='/foo/bar',arg3namespace='prefix'", 14) >= 0);
+ assert_se(match_add(slots, &root, "arg4has='pi'", 15) >= 0);
+ assert_se(match_add(slots, &root, "arg4has='pa'", 16) >= 0);
+ assert_se(match_add(slots, &root, "arg4has='po'", 17) >= 0);
+ assert_se(match_add(slots, &root, "arg4='pi'", 18) >= 0);
+
+ bus_match_dump(stdout, &root, 0);
+
+ assert_se(sd_bus_message_new_signal(bus, &m, "/foo/bar", "bar.x", "waldo") >= 0);
+ assert_se(sd_bus_message_append(m, "ssssas", "one", "two", "/prefix/three", "prefix.four", 3, "pi", "pa", "po") >= 0);
+ assert_se(sd_bus_message_seal(m, 1, 0) >= 0);
+
+ zero(mask);
+ assert_se(bus_match_run(NULL, &root, m) == 0);
+ assert_se(mask_contains((unsigned[]) { 9, 8, 7, 5, 10, 12, 13, 14, 15, 16, 17 }, 11));
+
+ assert_se(bus_match_remove(&root, &slots[8].match_callback) >= 0);
+ assert_se(bus_match_remove(&root, &slots[13].match_callback) >= 0);
+
+ bus_match_dump(stdout, &root, 0);
+
+ zero(mask);
+ assert_se(bus_match_run(NULL, &root, m) == 0);
+ assert_se(mask_contains((unsigned[]) { 9, 5, 10, 12, 14, 7, 15, 16, 17 }, 9));
+
+ for (enum bus_match_node_type i = 0; i < _BUS_MATCH_NODE_TYPE_MAX; i++) {
+ char buf[32];
+ const char *x;
+
+ assert_se(x = bus_match_node_type_to_string(i, buf, sizeof(buf)));
+
+ if (i >= BUS_MATCH_MESSAGE_TYPE)
+ assert_se(bus_match_node_type_from_string(x, strlen(x)) == i);
+ }
+
+ bus_match_free(&root);
+
+ test_match_scope("interface='foobar'", BUS_MATCH_GENERIC);
+ test_match_scope("", BUS_MATCH_GENERIC);
+ test_match_scope("interface='org.freedesktop.DBus.Local'", BUS_MATCH_LOCAL);
+ test_match_scope("sender='org.freedesktop.DBus.Local'", BUS_MATCH_LOCAL);
+ test_match_scope("member='gurke',path='/org/freedesktop/DBus/Local'", BUS_MATCH_LOCAL);
+ test_match_scope("arg2='piep',sender='org.freedesktop.DBus',member='waldo'", BUS_MATCH_DRIVER);
+
+ return 0;
+}
diff --git a/src/libsystemd/sd-bus/test-bus-objects.c b/src/libsystemd/sd-bus/test-bus-objects.c
new file mode 100644
index 0000000..ccdd0d5
--- /dev/null
+++ b/src/libsystemd/sd-bus/test-bus-objects.c
@@ -0,0 +1,677 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <pthread.h>
+#include <stdlib.h>
+
+#include "sd-bus.h"
+
+#include "alloc-util.h"
+#include "bus-dump.h"
+#include "bus-internal.h"
+#include "bus-message.h"
+#include "log.h"
+#include "macro.h"
+#include "strv.h"
+#include "tests.h"
+
+struct context {
+ int fds[2];
+ bool quit;
+ char *something;
+ char *automatic_string_property;
+ uint32_t automatic_integer_property;
+};
+
+static int something_handler(sd_bus_message *m, void *userdata, sd_bus_error *error) {
+ struct context *c = userdata;
+ const char *s;
+ char *n = NULL;
+ int r;
+
+ r = sd_bus_message_read(m, "s", &s);
+ assert_se(r > 0);
+
+ n = strjoin("<<<", s, ">>>");
+ assert_se(n);
+
+ free(c->something);
+ c->something = n;
+
+ log_info("AlterSomething() called, got %s, returning %s", s, n);
+
+ /* This should fail, since the return type doesn't match */
+ assert_se(sd_bus_reply_method_return(m, "u", 4711) == -ENOMSG);
+
+ r = sd_bus_reply_method_return(m, "s", n);
+ assert_se(r >= 0);
+
+ return 1;
+}
+
+static int exit_handler(sd_bus_message *m, void *userdata, sd_bus_error *error) {
+ struct context *c = userdata;
+ int r;
+
+ c->quit = true;
+
+ log_info("Exit called");
+
+ r = sd_bus_reply_method_return(m, "");
+ assert_se(r >= 0);
+
+ return 1;
+}
+
+static int get_handler(sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *reply, void *userdata, sd_bus_error *error) {
+ struct context *c = userdata;
+ int r;
+
+ log_info("property get for %s called, returning \"%s\".", property, c->something);
+
+ r = sd_bus_message_append(reply, "s", c->something);
+ assert_se(r >= 0);
+
+ return 1;
+}
+
+static int set_handler(sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *value, void *userdata, sd_bus_error *error) {
+ struct context *c = userdata;
+ const char *s;
+ char *n;
+ int r;
+
+ log_info("property set for %s called", property);
+
+ r = sd_bus_message_read(value, "s", &s);
+ assert_se(r >= 0);
+
+ n = strdup(s);
+ assert_se(n);
+
+ free(c->something);
+ c->something = n;
+
+ return 1;
+}
+
+static int value_handler(sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *reply, void *userdata, sd_bus_error *error) {
+ _cleanup_free_ char *s = NULL;
+ const char *x;
+ int r;
+
+ assert_se(asprintf(&s, "object %p, path %s", userdata, path) >= 0);
+ r = sd_bus_message_append(reply, "s", s);
+ assert_se(r >= 0);
+
+ assert_se(x = startswith(path, "/value/"));
+
+ assert_se(PTR_TO_UINT(userdata) == 30);
+
+ return 1;
+}
+
+static int notify_test(sd_bus_message *m, void *userdata, sd_bus_error *error) {
+ int r;
+
+ assert_se(sd_bus_emit_properties_changed(sd_bus_message_get_bus(m), m->path, "org.freedesktop.systemd.ValueTest", "Value", NULL) >= 0);
+
+ r = sd_bus_reply_method_return(m, NULL);
+ assert_se(r >= 0);
+
+ return 1;
+}
+
+static int notify_test2(sd_bus_message *m, void *userdata, sd_bus_error *error) {
+ int r;
+
+ assert_se(sd_bus_emit_properties_changed_strv(sd_bus_message_get_bus(m), m->path, "org.freedesktop.systemd.ValueTest", NULL) >= 0);
+
+ r = sd_bus_reply_method_return(m, NULL);
+ assert_se(r >= 0);
+
+ return 1;
+}
+
+static int emit_interfaces_added(sd_bus_message *m, void *userdata, sd_bus_error *error) {
+ int r;
+
+ assert_se(sd_bus_emit_interfaces_added(sd_bus_message_get_bus(m), "/value/a/x", "org.freedesktop.systemd.ValueTest", NULL) >= 0);
+
+ r = sd_bus_reply_method_return(m, NULL);
+ assert_se(r >= 0);
+
+ return 1;
+}
+
+static int emit_interfaces_removed(sd_bus_message *m, void *userdata, sd_bus_error *error) {
+ int r;
+
+ assert_se(sd_bus_emit_interfaces_removed(sd_bus_message_get_bus(m), "/value/a/x", "org.freedesktop.systemd.ValueTest", NULL) >= 0);
+
+ r = sd_bus_reply_method_return(m, NULL);
+ assert_se(r >= 0);
+
+ return 1;
+}
+
+static int emit_object_added(sd_bus_message *m, void *userdata, sd_bus_error *error) {
+ int r;
+
+ assert_se(sd_bus_emit_object_added(sd_bus_message_get_bus(m), "/value/a/x") >= 0);
+
+ r = sd_bus_reply_method_return(m, NULL);
+ assert_se(r >= 0);
+
+ return 1;
+}
+
+static int emit_object_with_manager_added(sd_bus_message *m, void *userdata, sd_bus_error *error) {
+ assert_se(sd_bus_emit_object_added(sd_bus_message_get_bus(m), "/value/a") >= 0);
+
+ return ASSERT_SE_NONNEG(sd_bus_reply_method_return(m, NULL));
+}
+
+static int emit_object_removed(sd_bus_message *m, void *userdata, sd_bus_error *error) {
+ int r;
+
+ assert_se(sd_bus_emit_object_removed(sd_bus_message_get_bus(m), "/value/a/x") >= 0);
+
+ r = sd_bus_reply_method_return(m, NULL);
+ assert_se(r >= 0);
+
+ return 1;
+}
+
+static const sd_bus_vtable vtable[] = {
+ SD_BUS_VTABLE_START(0),
+ SD_BUS_METHOD("AlterSomething", "s", "s", something_handler, 0),
+ SD_BUS_METHOD("Exit", "", "", exit_handler, 0),
+ SD_BUS_WRITABLE_PROPERTY("Something", "s", get_handler, set_handler, 0, 0),
+ SD_BUS_WRITABLE_PROPERTY("AutomaticStringProperty", "s", NULL, NULL, offsetof(struct context, automatic_string_property), 0),
+ SD_BUS_WRITABLE_PROPERTY("AutomaticIntegerProperty", "u", NULL, NULL, offsetof(struct context, automatic_integer_property), 0),
+ SD_BUS_METHOD("NoOperation", NULL, NULL, NULL, 0),
+ SD_BUS_METHOD("EmitInterfacesAdded", NULL, NULL, emit_interfaces_added, 0),
+ SD_BUS_METHOD("EmitInterfacesRemoved", NULL, NULL, emit_interfaces_removed, 0),
+ SD_BUS_METHOD("EmitObjectAdded", NULL, NULL, emit_object_added, 0),
+ SD_BUS_METHOD("EmitObjectWithManagerAdded", NULL, NULL, emit_object_with_manager_added, 0),
+ SD_BUS_METHOD("EmitObjectRemoved", NULL, NULL, emit_object_removed, 0),
+ SD_BUS_VTABLE_END
+};
+
+static const sd_bus_vtable vtable2[] = {
+ SD_BUS_VTABLE_START(0),
+ SD_BUS_METHOD("NotifyTest", "", "", notify_test, 0),
+ SD_BUS_METHOD("NotifyTest2", "", "", notify_test2, 0),
+ SD_BUS_PROPERTY("Value", "s", value_handler, 10, SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("Value2", "s", value_handler, 10, SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION),
+ SD_BUS_PROPERTY("Value3", "s", value_handler, 10, SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("Value4", "s", value_handler, 10, 0),
+ SD_BUS_PROPERTY("AnExplicitProperty", "s", NULL, offsetof(struct context, something), SD_BUS_VTABLE_PROPERTY_EXPLICIT|SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION),
+ SD_BUS_VTABLE_END
+};
+
+static int enumerator_callback(sd_bus *bus, const char *path, void *userdata, char ***nodes, sd_bus_error *error) {
+
+ if (object_path_startswith("/value", path))
+ assert_se(*nodes = strv_new("/value/a", "/value/b", "/value/c"));
+
+ return 1;
+}
+
+static int enumerator2_callback(sd_bus *bus, const char *path, void *userdata, char ***nodes, sd_bus_error *error) {
+
+ if (object_path_startswith("/value/a", path))
+ assert_se(*nodes = strv_new("/value/a/x", "/value/a/y", "/value/a/z"));
+
+ return 1;
+}
+
+static int enumerator3_callback(sd_bus *bus, const char *path, void *userdata, char ***nodes, sd_bus_error *error) {
+ _cleanup_strv_free_ char **v = NULL;
+
+ if (!object_path_startswith("/value/b", path))
+ return 1;
+
+ for (unsigned i = 0; i < 30; i++)
+ assert_se(strv_extendf(&v, "/value/b/%u", i) >= 0);
+
+ *nodes = TAKE_PTR(v);
+ return 1;
+}
+
+static void *server(void *p) {
+ struct context *c = p;
+ sd_bus *bus = NULL;
+ sd_id128_t id;
+ int r;
+
+ c->quit = false;
+
+ assert_se(sd_id128_randomize(&id) >= 0);
+
+ assert_se(sd_bus_new(&bus) >= 0);
+ assert_se(sd_bus_set_fd(bus, c->fds[0], c->fds[0]) >= 0);
+ assert_se(sd_bus_set_server(bus, 1, id) >= 0);
+
+ assert_se(sd_bus_add_object_vtable(bus, NULL, "/foo", "org.freedesktop.systemd.test", vtable, c) >= 0);
+ assert_se(sd_bus_add_object_vtable(bus, NULL, "/foo", "org.freedesktop.systemd.test2", vtable, c) >= 0);
+ assert_se(sd_bus_add_fallback_vtable(bus, NULL, "/value", "org.freedesktop.systemd.ValueTest", vtable2, NULL, UINT_TO_PTR(20)) >= 0);
+ assert_se(sd_bus_add_node_enumerator(bus, NULL, "/value", enumerator_callback, NULL) >= 0);
+ assert_se(sd_bus_add_node_enumerator(bus, NULL, "/value/a", enumerator2_callback, NULL) >= 0);
+ assert_se(sd_bus_add_node_enumerator(bus, NULL, "/value/b", enumerator3_callback, NULL) >= 0);
+ assert_se(sd_bus_add_object_manager(bus, NULL, "/value") >= 0);
+ assert_se(sd_bus_add_object_manager(bus, NULL, "/value/a") >= 0);
+
+ assert_se(sd_bus_start(bus) >= 0);
+
+ log_error("Entering event loop on server");
+
+ while (!c->quit) {
+ log_error("Loop!");
+
+ r = sd_bus_process(bus, NULL);
+ if (r < 0) {
+ log_error_errno(r, "Failed to process requests: %m");
+ goto fail;
+ }
+
+ if (r == 0) {
+ r = sd_bus_wait(bus, UINT64_MAX);
+ if (r < 0) {
+ log_error_errno(r, "Failed to wait: %m");
+ goto fail;
+ }
+
+ continue;
+ }
+ }
+
+ r = 0;
+
+fail:
+ if (bus) {
+ sd_bus_flush(bus);
+ sd_bus_unref(bus);
+ }
+
+ return INT_TO_PTR(r);
+}
+
+static int client(struct context *c) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ _cleanup_(sd_bus_unrefp) sd_bus *bus = NULL;
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_strv_free_ char **lines = NULL;
+ const char *s;
+ int r;
+
+ assert_se(sd_bus_new(&bus) >= 0);
+ assert_se(sd_bus_set_fd(bus, c->fds[1], c->fds[1]) >= 0);
+ assert_se(sd_bus_start(bus) >= 0);
+
+ r = sd_bus_call_method(bus, "org.freedesktop.systemd.test", "/foo", "org.freedesktop.systemd.test", "NoOperation", &error, NULL, NULL);
+ assert_se(r >= 0);
+
+ r = sd_bus_call_method(bus, "org.freedesktop.systemd.test", "/foo", "org.freedesktop.systemd.test", "AlterSomething", &error, &reply, "s", "hallo");
+ assert_se(r >= 0);
+
+ r = sd_bus_message_read(reply, "s", &s);
+ assert_se(r >= 0);
+ assert_se(streq(s, "<<<hallo>>>"));
+
+ reply = sd_bus_message_unref(reply);
+
+ r = sd_bus_call_method(bus, "org.freedesktop.systemd.test", "/foo", "org.freedesktop.systemd.test", "Doesntexist", &error, &reply, "");
+ assert_se(r < 0);
+ assert_se(sd_bus_error_has_name(&error, SD_BUS_ERROR_UNKNOWN_METHOD));
+
+ sd_bus_error_free(&error);
+
+ r = sd_bus_call_method(bus, "org.freedesktop.systemd.test", "/foo", "org.freedesktop.systemd.test", "Doesntexist", &error, &reply, NULL); /* NULL and "" are equivalent */
+ assert_se(r < 0);
+ assert_se(sd_bus_error_has_name(&error, SD_BUS_ERROR_UNKNOWN_METHOD));
+
+ sd_bus_error_free(&error);
+
+ r = sd_bus_call_method(bus, "org.freedesktop.systemd.test", "/foo", "org.freedesktop.systemd.test", "AlterSomething", &error, &reply, "as", 1, "hallo");
+ assert_se(r < 0);
+ assert_se(sd_bus_error_has_name(&error, SD_BUS_ERROR_INVALID_ARGS));
+
+ sd_bus_error_free(&error);
+
+ r = sd_bus_get_property(bus, "org.freedesktop.systemd.test", "/foo", "org.freedesktop.systemd.test", "Something", &error, &reply, "s");
+ assert_se(r >= 0);
+
+ r = sd_bus_message_read(reply, "s", &s);
+ assert_se(r >= 0);
+ assert_se(streq(s, "<<<hallo>>>"));
+
+ reply = sd_bus_message_unref(reply);
+
+ r = sd_bus_set_property(bus, "org.freedesktop.systemd.test", "/foo", "org.freedesktop.systemd.test", "Something", &error, "s", "test");
+ assert_se(r >= 0);
+
+ r = sd_bus_get_property(bus, "org.freedesktop.systemd.test", "/foo", "org.freedesktop.systemd.test", "Something", &error, &reply, "s");
+ assert_se(r >= 0);
+
+ r = sd_bus_message_read(reply, "s", &s);
+ assert_se(r >= 0);
+ assert_se(streq(s, "test"));
+
+ reply = sd_bus_message_unref(reply);
+
+ r = sd_bus_set_property(bus, "org.freedesktop.systemd.test", "/foo", "org.freedesktop.systemd.test", "AutomaticIntegerProperty", &error, "u", 815);
+ assert_se(r >= 0);
+
+ assert_se(c->automatic_integer_property == 815);
+
+ r = sd_bus_set_property(bus, "org.freedesktop.systemd.test", "/foo", "org.freedesktop.systemd.test", "AutomaticStringProperty", &error, "s", "Du Dödel, Du!");
+ assert_se(r >= 0);
+
+ assert_se(streq(c->automatic_string_property, "Du Dödel, Du!"));
+
+ r = sd_bus_call_method(bus, "org.freedesktop.systemd.test", "/foo", "org.freedesktop.DBus.Introspectable", "Introspect", &error, &reply, "");
+ assert_se(r >= 0);
+
+ r = sd_bus_message_read(reply, "s", &s);
+ assert_se(r >= 0);
+ fputs(s, stdout);
+
+ reply = sd_bus_message_unref(reply);
+
+ r = sd_bus_call_method(bus, "org.freedesktop.systemd.test", "/foo", "org.freedesktop.DBus.Introspectable", "Introspect", &error, &reply, NULL); /* NULL and "" are equivalent */
+ assert_se(r >= 0);
+
+ r = sd_bus_message_read(reply, "s", &s);
+ assert_se(r >= 0);
+ fputs(s, stdout);
+
+ reply = sd_bus_message_unref(reply);
+
+ r = sd_bus_get_property(bus, "org.freedesktop.systemd.test", "/value/xuzz", "org.freedesktop.systemd.ValueTest", "Value", &error, &reply, "s");
+ assert_se(r >= 0);
+
+ r = sd_bus_message_read(reply, "s", &s);
+ assert_se(r >= 0);
+ log_info("read %s", s);
+
+ reply = sd_bus_message_unref(reply);
+
+ r = sd_bus_call_method(bus, "org.freedesktop.systemd.test", "/", "org.freedesktop.DBus.Introspectable", "Introspect", &error, &reply, NULL);
+ assert_se(r >= 0);
+
+ r = sd_bus_message_read(reply, "s", &s);
+ assert_se(r >= 0);
+ fputs(s, stdout);
+
+ reply = sd_bus_message_unref(reply);
+
+ r = sd_bus_call_method(bus, "org.freedesktop.systemd.test", "/value", "org.freedesktop.DBus.Introspectable", "Introspect", &error, &reply, NULL);
+ assert_se(r >= 0);
+
+ r = sd_bus_message_read(reply, "s", &s);
+ assert_se(r >= 0);
+ fputs(s, stdout);
+
+ reply = sd_bus_message_unref(reply);
+
+ r = sd_bus_call_method(bus, "org.freedesktop.systemd.test", "/value/a", "org.freedesktop.DBus.Introspectable", "Introspect", &error, &reply, NULL);
+ assert_se(r >= 0);
+
+ r = sd_bus_message_read(reply, "s", &s);
+ assert_se(r >= 0);
+ fputs(s, stdout);
+
+ assert_se(lines = strv_split_newlines(s));
+ assert_se(strv_contains(lines, " <node name=\"x\"/>"));
+ assert_se(strv_contains(lines, " <node name=\"y\"/>"));
+ assert_se(strv_contains(lines, " <node name=\"z\"/>"));
+ lines = strv_free(lines);
+
+ reply = sd_bus_message_unref(reply);
+
+ r = sd_bus_call_method(bus, "org.freedesktop.systemd.test", "/value/b", "org.freedesktop.DBus.Introspectable", "Introspect", &error, &reply, NULL);
+ assert_se(r >= 0);
+
+ r = sd_bus_message_read(reply, "s", &s);
+ assert_se(r >= 0);
+ fputs(s, stdout);
+
+ assert_se(lines = strv_split_newlines(s));
+ for (unsigned i = 0; i < 30; i++) {
+ _cleanup_free_ char *n = NULL;
+
+ assert_se(asprintf(&n, " <node name=\"%u\"/>", i) >= 0);
+ assert_se(strv_contains(lines, n));
+ }
+ lines = strv_free(lines);
+
+ reply = sd_bus_message_unref(reply);
+
+ r = sd_bus_call_method(bus, "org.freedesktop.systemd.test", "/foo", "org.freedesktop.DBus.Properties", "GetAll", &error, &reply, "s", NULL);
+ assert_se(r >= 0);
+
+ sd_bus_message_dump(reply, stdout, SD_BUS_MESSAGE_DUMP_WITH_HEADER);
+
+ reply = sd_bus_message_unref(reply);
+
+ r = sd_bus_call_method(bus, "org.freedesktop.systemd.test", "/value/a", "org.freedesktop.DBus.Properties", "GetAll", &error, &reply, "s", "org.freedesktop.systemd.ValueTest2");
+ assert_se(r < 0);
+ assert_se(sd_bus_error_has_name(&error, SD_BUS_ERROR_UNKNOWN_INTERFACE));
+ sd_bus_error_free(&error);
+
+ r = sd_bus_call_method(bus, "org.freedesktop.systemd.test", "/foo", "org.freedesktop.DBus.ObjectManager", "GetManagedObjects", &error, &reply, NULL);
+ assert_se(r < 0);
+ assert_se(sd_bus_error_has_name(&error, SD_BUS_ERROR_UNKNOWN_METHOD));
+ sd_bus_error_free(&error);
+
+ r = sd_bus_call_method(bus, "org.freedesktop.systemd.test", "/value", "org.freedesktop.DBus.ObjectManager", "GetManagedObjects", &error, &reply, NULL);
+ assert_se(r >= 0);
+
+ sd_bus_message_dump(reply, stdout, SD_BUS_MESSAGE_DUMP_WITH_HEADER);
+
+ /* Check that /value/b does not have ObjectManager interface but /value/a does */
+ assert_se(sd_bus_message_rewind(reply, 1) > 0);
+ assert_se(sd_bus_message_enter_container(reply, SD_BUS_TYPE_ARRAY, "{oa{sa{sv}}}") > 0);
+ while (ASSERT_SE_NONNEG(sd_bus_message_enter_container(reply, SD_BUS_TYPE_DICT_ENTRY, "oa{sa{sv}}")) > 0) {
+ const char *path = NULL;
+ assert_se(sd_bus_message_read_basic(reply, 'o', &path) > 0);
+ if (STR_IN_SET(path, "/value/b", "/value/a")) {
+ /* Check that there is no object manager interface here */
+ bool found_object_manager_interface = false;
+ assert_se(sd_bus_message_enter_container(reply, SD_BUS_TYPE_ARRAY, "{sa{sv}}") > 0);
+ while (ASSERT_SE_NONNEG(sd_bus_message_enter_container(reply, SD_BUS_TYPE_DICT_ENTRY, "sa{sv}")) > 0) {
+ const char *interface_name = NULL;
+ assert_se(sd_bus_message_read_basic(reply, 's', &interface_name) > 0);
+
+ if (streq(interface_name, "org.freedesktop.DBus.ObjectManager")) {
+ assert_se(!streq(path, "/value/b"));
+ found_object_manager_interface = true;
+ }
+
+ assert_se(sd_bus_message_skip(reply, "a{sv}") >= 0);
+ assert_se(sd_bus_message_exit_container(reply) >= 0);
+ }
+ assert_se(sd_bus_message_exit_container(reply) >= 0);
+
+ if (streq(path, "/value/a")) {
+ /* ObjectManager must be here */
+ assert_se(found_object_manager_interface);
+ }
+
+ } else
+ assert_se(sd_bus_message_skip(reply, "a{sa{sv}}") >= 0);
+
+ assert_se(sd_bus_message_exit_container(reply) >= 0);
+ }
+
+ reply = sd_bus_message_unref(reply);
+
+ r = sd_bus_call_method(bus, "org.freedesktop.systemd.test", "/value/a", "org.freedesktop.systemd.ValueTest", "NotifyTest", &error, NULL, NULL);
+ assert_se(r >= 0);
+
+ r = sd_bus_process(bus, &reply);
+ assert_se(r > 0);
+
+ assert_se(sd_bus_message_is_signal(reply, "org.freedesktop.DBus.Properties", "PropertiesChanged"));
+ sd_bus_message_dump(reply, stdout, SD_BUS_MESSAGE_DUMP_WITH_HEADER);
+
+ reply = sd_bus_message_unref(reply);
+
+ r = sd_bus_call_method(bus, "org.freedesktop.systemd.test", "/value/a", "org.freedesktop.systemd.ValueTest", "NotifyTest2", &error, NULL, NULL);
+ assert_se(r >= 0);
+
+ r = sd_bus_process(bus, &reply);
+ assert_se(r > 0);
+
+ assert_se(sd_bus_message_is_signal(reply, "org.freedesktop.DBus.Properties", "PropertiesChanged"));
+ sd_bus_message_dump(reply, stdout, SD_BUS_MESSAGE_DUMP_WITH_HEADER);
+
+ reply = sd_bus_message_unref(reply);
+
+ r = sd_bus_call_method(bus, "org.freedesktop.systemd.test", "/foo", "org.freedesktop.systemd.test", "EmitInterfacesAdded", &error, NULL, NULL);
+ assert_se(r >= 0);
+
+ r = sd_bus_process(bus, &reply);
+ assert_se(r > 0);
+
+ assert_se(sd_bus_message_is_signal(reply, "org.freedesktop.DBus.ObjectManager", "InterfacesAdded"));
+ sd_bus_message_dump(reply, stdout, SD_BUS_MESSAGE_DUMP_WITH_HEADER);
+
+ reply = sd_bus_message_unref(reply);
+
+ r = sd_bus_call_method(bus, "org.freedesktop.systemd.test", "/foo", "org.freedesktop.systemd.test", "EmitInterfacesRemoved", &error, NULL, NULL);
+ assert_se(r >= 0);
+
+ r = sd_bus_process(bus, &reply);
+ assert_se(r > 0);
+
+ assert_se(sd_bus_message_is_signal(reply, "org.freedesktop.DBus.ObjectManager", "InterfacesRemoved"));
+ sd_bus_message_dump(reply, stdout, SD_BUS_MESSAGE_DUMP_WITH_HEADER);
+
+ reply = sd_bus_message_unref(reply);
+
+ r = sd_bus_call_method(bus, "org.freedesktop.systemd.test", "/foo", "org.freedesktop.systemd.test", "EmitObjectAdded", &error, NULL, NULL);
+ assert_se(r >= 0);
+
+ r = sd_bus_process(bus, &reply);
+ assert_se(r > 0);
+
+ assert_se(sd_bus_message_is_signal(reply, "org.freedesktop.DBus.ObjectManager", "InterfacesAdded"));
+ sd_bus_message_dump(reply, stdout, SD_BUS_MESSAGE_DUMP_WITH_HEADER);
+
+ /* Check if /value/a/x does not have org.freedesktop.DBus.ObjectManager */
+ assert_se(sd_bus_message_rewind(reply, 1) >= 0);
+ const char* should_be_value_a_x = NULL;
+ assert_se(sd_bus_message_read_basic(reply, 'o', &should_be_value_a_x) > 0);
+ assert_se(streq(should_be_value_a_x, "/value/a/x"));
+ assert_se(sd_bus_message_enter_container(reply, SD_BUS_TYPE_ARRAY, "{sa{sv}}") > 0);
+ while (ASSERT_SE_NONNEG(sd_bus_message_enter_container(reply, SD_BUS_TYPE_DICT_ENTRY, "sa{sv}")) > 0) {
+ const char* interface_name = NULL;
+ assert_se(sd_bus_message_read_basic(reply, 's', &interface_name) > 0);
+
+ assert(!streq(interface_name, "org.freedesktop.DBus.ObjectManager"));
+
+ assert_se(sd_bus_message_skip(reply, "a{sv}") >= 0);
+
+ assert_se(sd_bus_message_exit_container(reply) >= 0);
+ }
+
+ reply = sd_bus_message_unref(reply);
+
+ assert_se(sd_bus_call_method(bus, "org.freedesktop.systemd.test", "/foo", "org.freedesktop.systemd.test", "EmitObjectWithManagerAdded", &error, NULL, NULL) >= 0);
+
+ assert_se(sd_bus_process(bus, &reply) > 0);
+
+ assert_se(sd_bus_message_is_signal(reply, "org.freedesktop.DBus.ObjectManager", "InterfacesAdded"));
+ sd_bus_message_dump(reply, stdout, SD_BUS_MESSAGE_DUMP_WITH_HEADER);
+
+ /* Check if /value/a has org.freedesktop.DBus.ObjectManager */
+ assert_se(sd_bus_message_rewind(reply, 1) >= 0);
+ const char* should_be_value_a = NULL;
+ bool found_object_manager = false;
+ assert_se(sd_bus_message_read_basic(reply, 'o', &should_be_value_a) > 0);
+ assert_se(streq(should_be_value_a, "/value/a"));
+ assert_se(sd_bus_message_enter_container(reply, SD_BUS_TYPE_ARRAY, "{sa{sv}}") > 0);
+ while (ASSERT_SE_NONNEG(sd_bus_message_enter_container(reply, SD_BUS_TYPE_DICT_ENTRY, "sa{sv}")) > 0) {
+ const char* interface_name = NULL;
+ assert_se(sd_bus_message_read_basic(reply, 's', &interface_name));
+
+ if (streq(interface_name, "org.freedesktop.DBus.ObjectManager")) {
+ found_object_manager = true;
+ break;
+ }
+
+ assert_se(sd_bus_message_skip(reply, "a{sv}") >= 0);
+
+ assert_se(sd_bus_message_exit_container(reply) >= 0);
+ }
+ assert_se(found_object_manager);
+
+ reply = sd_bus_message_unref(reply);
+
+ r = sd_bus_call_method(bus, "org.freedesktop.systemd.test", "/foo", "org.freedesktop.systemd.test", "EmitObjectRemoved", &error, NULL, NULL);
+ assert_se(r >= 0);
+
+ r = sd_bus_process(bus, &reply);
+ assert_se(r > 0);
+
+ assert_se(sd_bus_message_is_signal(reply, "org.freedesktop.DBus.ObjectManager", "InterfacesRemoved"));
+ sd_bus_message_dump(reply, stdout, SD_BUS_MESSAGE_DUMP_WITH_HEADER);
+
+ /* Check if /value/a/x does not have org.freedesktop.DBus.ObjectManager */
+ assert_se(sd_bus_message_rewind(reply, 1) >= 0);
+ should_be_value_a_x = NULL;
+ assert_se(sd_bus_message_read_basic(reply, 'o', &should_be_value_a_x) > 0);
+ assert_se(streq(should_be_value_a_x, "/value/a/x"));
+ assert_se(sd_bus_message_enter_container(reply, SD_BUS_TYPE_ARRAY, "s") > 0);
+ const char* deleted_interface_name = NULL;
+ while (ASSERT_SE_NONNEG(sd_bus_message_read_basic(reply, 's', &deleted_interface_name)) > 0) {
+ assert(!streq(deleted_interface_name, "org.freedesktop.DBus.ObjectManager"));
+ }
+ assert_se(sd_bus_message_exit_container(reply) >= 0);
+
+ reply = sd_bus_message_unref(reply);
+
+ r = sd_bus_call_method(bus, "org.freedesktop.systemd.test", "/foo", "org.freedesktop.systemd.test", "Exit", &error, NULL, NULL);
+ assert_se(r >= 0);
+
+ sd_bus_flush(bus);
+
+ return 0;
+}
+
+int main(int argc, char *argv[]) {
+ struct context c = {};
+ pthread_t s;
+ void *p;
+ int r, q;
+
+ test_setup_logging(LOG_DEBUG);
+
+ c.automatic_integer_property = 4711;
+ assert_se(c.automatic_string_property = strdup("dudeldu"));
+
+ assert_se(socketpair(AF_UNIX, SOCK_STREAM, 0, c.fds) >= 0);
+
+ r = pthread_create(&s, NULL, server, &c);
+ if (r != 0)
+ return -r;
+
+ r = client(&c);
+
+ q = pthread_join(s, &p);
+ if (q != 0)
+ return -q;
+
+ if (r < 0)
+ return r;
+
+ if (PTR_TO_INT(p) < 0)
+ return PTR_TO_INT(p);
+
+ free(c.something);
+ free(c.automatic_string_property);
+
+ return EXIT_SUCCESS;
+}
diff --git a/src/libsystemd/sd-bus/test-bus-peersockaddr.c b/src/libsystemd/sd-bus/test-bus-peersockaddr.c
new file mode 100644
index 0000000..79556e8
--- /dev/null
+++ b/src/libsystemd/sd-bus/test-bus-peersockaddr.c
@@ -0,0 +1,127 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <pthread.h>
+#include <unistd.h>
+
+#include "sd-bus.h"
+
+#include "fd-util.h"
+#include "process-util.h"
+#include "socket-util.h"
+#include "tests.h"
+
+static void *server(void *p) {
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ _cleanup_close_ int listen_fd = PTR_TO_INT(p), fd = -EBADF;
+ _cleanup_(sd_bus_creds_unrefp) sd_bus_creds *c = NULL;
+ _cleanup_free_ char *our_comm = NULL;
+ sd_id128_t id;
+ int r;
+
+ assert_se(sd_id128_randomize(&id) >= 0);
+
+ fd = accept4(listen_fd, NULL, NULL, SOCK_CLOEXEC|SOCK_NONBLOCK);
+ assert_se(fd >= 0);
+
+ assert_se(sd_bus_new(&bus) >= 0);
+ assert_se(sd_bus_set_fd(bus, fd, fd) >= 0);
+ TAKE_FD(fd);
+ assert_se(sd_bus_set_server(bus, true, id) >= 0);
+ assert_se(sd_bus_negotiate_creds(bus, 1, SD_BUS_CREDS_EUID|SD_BUS_CREDS_EGID|SD_BUS_CREDS_PID|SD_BUS_CREDS_COMM|SD_BUS_CREDS_DESCRIPTION) >= 0);
+
+ assert_se(sd_bus_start(bus) >= 0);
+
+ assert_se(sd_bus_get_owner_creds(bus, SD_BUS_CREDS_EUID|SD_BUS_CREDS_EGID|SD_BUS_CREDS_PID|SD_BUS_CREDS_COMM|SD_BUS_CREDS_DESCRIPTION, &c) >= 0);
+
+ uid_t u;
+ assert_se(sd_bus_creds_get_euid(c, &u) >= 0);
+ assert_se(u == getuid());
+
+ gid_t g;
+ assert_se(sd_bus_creds_get_egid(c, &g) >= 0);
+ assert_se(g == getgid());
+
+ pid_t pid;
+ assert_se(sd_bus_creds_get_pid(c, &pid) >= 0);
+ assert_se(pid == getpid_cached());
+
+ const char *comm;
+ assert_se(sd_bus_creds_get_comm(c, &comm) >= 0);
+ assert_se(pid_get_comm(0, &our_comm) >= 0);
+ assert_se(streq_ptr(comm, our_comm));
+
+ const char *description;
+ assert_se(sd_bus_creds_get_description(c, &description) >= 0);
+ assert_se(streq_ptr(description, "wuffwuff"));
+
+ for (;;) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+
+ r = sd_bus_process(bus, &m);
+ assert_se(r >= 0);
+
+ if (r == 0) {
+ assert_se(sd_bus_wait(bus, UINT64_MAX) >= 0);
+ continue;
+ }
+
+ if (m && sd_bus_message_is_method_call(m, "foo.foo", "Foo") > 0) {
+ assert_se(sd_bus_reply_method_return(m, "s", "bar") >= 0);
+ break;
+ }
+ }
+
+ return NULL;
+}
+
+static void* client(void *p) {
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ const char *z;
+
+ assert_se(sd_bus_new(&bus) >= 0);
+ assert_se(sd_bus_set_description(bus, "wuffwuff") >= 0);
+ assert_se(sd_bus_set_address(bus, p) >= 0);
+ assert_se(sd_bus_start(bus) >= 0);
+
+ assert_se(sd_bus_call_method(bus, "foo.foo", "/foo", "foo.foo", "Foo", NULL, &reply, "s", "foo") >= 0);
+
+ assert_se(sd_bus_message_read(reply, "s", &z) >= 0);
+ assert_se(streq_ptr(z, "bar"));
+
+ return NULL;
+}
+
+TEST(description) {
+ _cleanup_free_ char *a = NULL;
+ _cleanup_close_ int fd = -EBADF;
+ union sockaddr_union sa = {
+ .un.sun_family = AF_UNIX,
+ };
+ socklen_t salen;
+ pthread_t s, c;
+
+ fd = socket(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC, 0);
+ assert_se(fd >= 0);
+
+ assert_se(bind(fd, &sa.sa, offsetof(struct sockaddr_un, sun_path)) >= 0); /* force auto-bind */
+
+ assert_se(listen(fd, 1) >= 0);
+
+ salen = sizeof(sa);
+ assert_se(getsockname(fd, &sa.sa, &salen) >= 0);
+ assert_se(salen >= offsetof(struct sockaddr_un, sun_path));
+ assert_se(sa.un.sun_path[0] == 0);
+
+ assert_se(asprintf(&a, "unix:abstract=%s", sa.un.sun_path + 1) >= 0);
+
+ assert_se(pthread_create(&s, NULL, server, INT_TO_PTR(fd)) == 0);
+ TAKE_FD(fd);
+
+ assert_se(pthread_create(&c, NULL, client, a) == 0);
+
+ assert_se(pthread_join(s, NULL) == 0);
+ assert_se(pthread_join(c, NULL) == 0);
+}
+
+DEFINE_TEST_MAIN(LOG_INFO);
diff --git a/src/libsystemd/sd-bus/test-bus-queue-ref-cycle.c b/src/libsystemd/sd-bus/test-bus-queue-ref-cycle.c
new file mode 100644
index 0000000..7c2fa72
--- /dev/null
+++ b/src/libsystemd/sd-bus/test-bus-queue-ref-cycle.c
@@ -0,0 +1,56 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#include "sd-bus.h"
+
+#include "main-func.h"
+#include "tests.h"
+
+static int test_ref_unref(void) {
+ sd_bus_message *m = NULL;
+ sd_bus *bus = NULL;
+ int r;
+
+ /* This test will result in a memory leak in <= v240, but not on v241. Hence to be really useful it
+ * should be run through a leak tracker such as valgrind. */
+
+ r = sd_bus_open_system(&bus);
+ if (r < 0)
+ return log_tests_skipped("Failed to connect to bus");
+
+ /* Create a message and enqueue it (this shouldn't send it though as the connection setup is not complete yet) */
+ assert_se(sd_bus_message_new_method_call(bus, &m, "foo.bar", "/foo", "quux.quux", "waldo") >= 0);
+ assert_se(sd_bus_send(bus, m, NULL) >= 0);
+
+ /* Let's now unref the message first and the bus second. */
+ m = sd_bus_message_unref(m);
+ bus = sd_bus_unref(bus);
+
+ /* We should have a memory leak now on <= v240. Let's do this again, but destroy in the opposite
+ * order. On v240 that too should be a leak. */
+
+ r = sd_bus_open_system(&bus);
+ if (r < 0)
+ return log_tests_skipped("Failed to connect to bus");
+
+ assert_se(sd_bus_message_new_method_call(bus, &m, "foo.bar", "/foo", "quux.quux", "waldo") >= 0);
+ assert_se(sd_bus_send(bus, m, NULL) >= 0);
+
+ /* Let's now unref things in the opposite order */
+ bus = sd_bus_unref(bus);
+ m = sd_bus_message_unref(m);
+
+ return 0;
+}
+
+static int run(int argc, char *argv[]) {
+ int r;
+
+ test_setup_logging(LOG_INFO);
+
+ r = test_ref_unref();
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+DEFINE_MAIN_FUNCTION(run);
diff --git a/src/libsystemd/sd-bus/test-bus-server.c b/src/libsystemd/sd-bus/test-bus-server.c
new file mode 100644
index 0000000..8049e33
--- /dev/null
+++ b/src/libsystemd/sd-bus/test-bus-server.c
@@ -0,0 +1,185 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <pthread.h>
+#include <stdlib.h>
+
+#include "sd-bus.h"
+
+#include "bus-internal.h"
+#include "log.h"
+#include "macro.h"
+#include "memory-util.h"
+#include "string-util.h"
+#include "tests.h"
+
+struct context {
+ int fds[2];
+
+ bool client_negotiate_unix_fds;
+ bool server_negotiate_unix_fds;
+
+ bool client_anonymous_auth;
+ bool server_anonymous_auth;
+};
+
+static int _server(struct context *c) {
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ sd_id128_t id;
+ bool quit = false;
+ int r;
+
+ assert_se(sd_id128_randomize(&id) >= 0);
+
+ assert_se(sd_bus_new(&bus) >= 0);
+ assert_se(sd_bus_set_fd(bus, c->fds[0], c->fds[0]) >= 0);
+ assert_se(sd_bus_set_server(bus, 1, id) >= 0);
+ assert_se(sd_bus_set_anonymous(bus, c->server_anonymous_auth) >= 0);
+ assert_se(sd_bus_negotiate_fds(bus, c->server_negotiate_unix_fds) >= 0);
+ assert_se(sd_bus_start(bus) >= 0);
+
+ while (!quit) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL, *reply = NULL;
+
+ r = sd_bus_process(bus, &m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to process requests: %m");
+
+ if (r == 0) {
+ r = sd_bus_wait(bus, UINT64_MAX);
+ if (r < 0)
+ return log_error_errno(r, "Failed to wait: %m");
+ continue;
+ }
+
+ if (!m)
+ continue;
+
+ log_info("Got message! member=%s", strna(sd_bus_message_get_member(m)));
+
+ if (sd_bus_message_is_method_call(m, "org.freedesktop.systemd.test", "Exit")) {
+
+ assert_se((sd_bus_can_send(bus, 'h') >= 1) ==
+ (c->server_negotiate_unix_fds && c->client_negotiate_unix_fds));
+
+ r = sd_bus_message_new_method_return(m, &reply);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate return: %m");
+
+ quit = true;
+
+ } else if (sd_bus_message_is_method_call(m, NULL, NULL)) {
+ r = sd_bus_message_new_method_error(
+ m,
+ &reply,
+ &SD_BUS_ERROR_MAKE_CONST(SD_BUS_ERROR_UNKNOWN_METHOD, "Unknown method."));
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate return: %m");
+ }
+
+ if (reply) {
+ r = sd_bus_send(bus, reply, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to send reply: %m");
+ }
+ }
+
+ return 0;
+}
+
+static void* server(void *p) {
+ return INT_TO_PTR(_server(p));
+}
+
+static int client(struct context *c) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL, *reply = NULL;
+ _cleanup_(sd_bus_unrefp) sd_bus *bus = NULL;
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ int r;
+
+ assert_se(sd_bus_new(&bus) >= 0);
+ assert_se(sd_bus_set_fd(bus, c->fds[1], c->fds[1]) >= 0);
+ assert_se(sd_bus_negotiate_fds(bus, c->client_negotiate_unix_fds) >= 0);
+ assert_se(sd_bus_set_anonymous(bus, c->client_anonymous_auth) >= 0);
+ assert_se(sd_bus_start(bus) >= 0);
+
+ r = sd_bus_message_new_method_call(
+ bus,
+ &m,
+ "org.freedesktop.systemd.test",
+ "/",
+ "org.freedesktop.systemd.test",
+ "Exit");
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate method call: %m");
+
+ r = sd_bus_call(bus, m, 0, &error, &reply);
+ if (r < 0)
+ return log_error_errno(r, "Failed to issue method call: %s", bus_error_message(&error, r));
+
+ return 0;
+}
+
+static int test_one(bool client_negotiate_unix_fds, bool server_negotiate_unix_fds,
+ bool client_anonymous_auth, bool server_anonymous_auth) {
+
+ struct context c;
+ pthread_t s;
+ void *p;
+ int r, q;
+
+ zero(c);
+
+ assert_se(socketpair(AF_UNIX, SOCK_STREAM, 0, c.fds) >= 0);
+
+ c.client_negotiate_unix_fds = client_negotiate_unix_fds;
+ c.server_negotiate_unix_fds = server_negotiate_unix_fds;
+ c.client_anonymous_auth = client_anonymous_auth;
+ c.server_anonymous_auth = server_anonymous_auth;
+
+ r = pthread_create(&s, NULL, server, &c);
+ if (r != 0)
+ return -r;
+
+ r = client(&c);
+
+ q = pthread_join(s, &p);
+ if (q != 0)
+ return -q;
+
+ if (r < 0)
+ return r;
+
+ if (PTR_TO_INT(p) < 0)
+ return PTR_TO_INT(p);
+
+ return 0;
+}
+
+int main(int argc, char *argv[]) {
+ int r;
+
+ test_setup_logging(LOG_DEBUG);
+
+ r = test_one(true, true, false, false);
+ assert_se(r >= 0);
+
+ r = test_one(true, false, false, false);
+ assert_se(r >= 0);
+
+ r = test_one(false, true, false, false);
+ assert_se(r >= 0);
+
+ r = test_one(false, false, false, false);
+ assert_se(r >= 0);
+
+ r = test_one(true, true, true, true);
+ assert_se(r >= 0);
+
+ r = test_one(true, true, false, true);
+ assert_se(r >= 0);
+
+ r = test_one(true, true, true, false);
+ assert_se(r == -EPERM);
+
+ return EXIT_SUCCESS;
+}
diff --git a/src/libsystemd/sd-bus/test-bus-signature.c b/src/libsystemd/sd-bus/test-bus-signature.c
new file mode 100644
index 0000000..5a4c811
--- /dev/null
+++ b/src/libsystemd/sd-bus/test-bus-signature.c
@@ -0,0 +1,150 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "bus-internal.h"
+#include "bus-signature.h"
+#include "log.h"
+#include "string-util.h"
+#include "tests.h"
+
+int main(int argc, char *argv[]) {
+ char prefix[256];
+ int r;
+
+ test_setup_logging(LOG_DEBUG);
+
+ assert_se(signature_is_single("y", false));
+ assert_se(signature_is_single("u", false));
+ assert_se(signature_is_single("v", false));
+ assert_se(signature_is_single("as", false));
+ assert_se(signature_is_single("(ss)", false));
+ assert_se(!signature_is_single("()", false));
+ assert_se(!signature_is_single("(()()()()())", false));
+ assert_se(!signature_is_single("(((())))", false));
+ assert_se(signature_is_single("((((s))))", false));
+ assert_se(signature_is_single("{ss}", true));
+ assert_se(signature_is_single("a{ss}", false));
+ assert_se(!signature_is_single("uu", false));
+ assert_se(!signature_is_single("", false));
+ assert_se(!signature_is_single("(", false));
+ assert_se(!signature_is_single(")", false));
+ assert_se(!signature_is_single("())", false));
+ assert_se(!signature_is_single("((())", false));
+ assert_se(!signature_is_single("{)", false));
+ assert_se(!signature_is_single("{}", true));
+ assert_se(!signature_is_single("{sss}", true));
+ assert_se(!signature_is_single("{s}", true));
+ assert_se(!signature_is_single("{ss}", false));
+ assert_se(!signature_is_single("{ass}", true));
+ assert_se(!signature_is_single("a}", true));
+
+ assert_se(signature_is_pair("yy"));
+ assert_se(signature_is_pair("ss"));
+ assert_se(signature_is_pair("sas"));
+ assert_se(signature_is_pair("sv"));
+ assert_se(signature_is_pair("sa(vs)"));
+ assert_se(!signature_is_pair(""));
+ assert_se(!signature_is_pair("va"));
+ assert_se(!signature_is_pair("sss"));
+ assert_se(!signature_is_pair("{s}ss"));
+
+ assert_se(signature_is_valid("ssa{ss}sssub", true));
+ assert_se(signature_is_valid("ssa{ss}sssub", false));
+ assert_se(signature_is_valid("{ss}", true));
+ assert_se(!signature_is_valid("{ss}", false));
+ assert_se(signature_is_valid("", true));
+ assert_se(signature_is_valid("", false));
+
+ assert_se(signature_is_valid("sssusa(uuubbba(uu)uuuu)a{u(uuuvas)}", false));
+
+ assert_se(!signature_is_valid("a", false));
+ assert_se(signature_is_valid("as", false));
+ assert_se(signature_is_valid("aas", false));
+ assert_se(signature_is_valid("aaas", false));
+ assert_se(signature_is_valid("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaad", false));
+ assert_se(signature_is_valid("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaas", false));
+ assert_se(!signature_is_valid("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaau", false));
+
+ assert_se(signature_is_valid("((((((((((((((((((((((((((((((((s))))))))))))))))))))))))))))))))", false));
+ assert_se(!signature_is_valid("((((((((((((((((((((((((((((((((()))))))))))))))))))))))))))))))))", false));
+
+ assert_se(namespace_complex_pattern("", ""));
+ assert_se(namespace_complex_pattern("foobar", "foobar"));
+ assert_se(namespace_complex_pattern("foobar.waldo", "foobar.waldo"));
+ assert_se(namespace_complex_pattern("foobar.", "foobar.waldo"));
+ assert_se(namespace_complex_pattern("foobar.waldo", "foobar."));
+ assert_se(!namespace_complex_pattern("foobar.waldo", "foobar"));
+ assert_se(!namespace_complex_pattern("foobar", "foobar.waldo"));
+ assert_se(!namespace_complex_pattern("", "foo"));
+ assert_se(!namespace_complex_pattern("foo", ""));
+ assert_se(!namespace_complex_pattern("foo.", ""));
+
+ assert_se(path_complex_pattern("", ""));
+ assert_se(!path_complex_pattern("", "/"));
+ assert_se(!path_complex_pattern("/", ""));
+ assert_se(path_complex_pattern("/", "/"));
+ assert_se(path_complex_pattern("/foobar/", "/"));
+ assert_se(!path_complex_pattern("/foobar/", "/foobar"));
+ assert_se(path_complex_pattern("/foobar", "/foobar"));
+ assert_se(!path_complex_pattern("/foobar", "/foobar/"));
+ assert_se(!path_complex_pattern("/foobar", "/foobar/waldo"));
+ assert_se(path_complex_pattern("/foobar/", "/foobar/waldo"));
+ assert_se(path_complex_pattern("/foobar/waldo", "/foobar/"));
+
+ assert_se(path_simple_pattern("/foo/", "/foo/bar/waldo"));
+
+ assert_se(namespace_simple_pattern("", ""));
+ assert_se(namespace_simple_pattern("", ".foobar"));
+ assert_se(namespace_simple_pattern("foobar", "foobar"));
+ assert_se(namespace_simple_pattern("foobar.waldo", "foobar.waldo"));
+ assert_se(namespace_simple_pattern("foobar", "foobar.waldo"));
+ assert_se(!namespace_simple_pattern("foobar.waldo", "foobar"));
+ assert_se(!namespace_simple_pattern("", "foo"));
+ assert_se(!namespace_simple_pattern("foo", ""));
+ assert_se(namespace_simple_pattern("foo.", "foo.bar.waldo"));
+
+ assert_se(streq(object_path_startswith("/foo/bar", "/foo"), "bar"));
+ assert_se(streq(object_path_startswith("/foo", "/foo"), ""));
+ assert_se(streq(object_path_startswith("/foo", "/"), "foo"));
+ assert_se(streq(object_path_startswith("/", "/"), ""));
+ assert_se(!object_path_startswith("/foo", "/bar"));
+ assert_se(!object_path_startswith("/", "/bar"));
+ assert_se(!object_path_startswith("/foo", ""));
+
+ assert_se(object_path_is_valid("/foo/bar"));
+ assert_se(object_path_is_valid("/foo"));
+ assert_se(object_path_is_valid("/"));
+ assert_se(object_path_is_valid("/foo5"));
+ assert_se(object_path_is_valid("/foo_5"));
+ assert_se(!object_path_is_valid(""));
+ assert_se(!object_path_is_valid("/foo/"));
+ assert_se(!object_path_is_valid("//"));
+ assert_se(!object_path_is_valid("//foo"));
+ assert_se(!object_path_is_valid("/foo//bar"));
+ assert_se(!object_path_is_valid("/foo/aaaäöä"));
+
+ OBJECT_PATH_FOREACH_PREFIX(prefix, "/") {
+ log_info("<%s>", prefix);
+ assert_not_reached();
+ }
+
+ r = 0;
+ OBJECT_PATH_FOREACH_PREFIX(prefix, "/xxx") {
+ log_info("<%s>", prefix);
+ assert_se(streq(prefix, "/"));
+ assert_se(r == 0);
+ r++;
+ }
+ assert_se(r == 1);
+
+ r = 0;
+ OBJECT_PATH_FOREACH_PREFIX(prefix, "/xxx/yyy/zzz") {
+ log_info("<%s>", prefix);
+ assert_se(r != 0 || streq(prefix, "/xxx/yyy"));
+ assert_se(r != 1 || streq(prefix, "/xxx"));
+ assert_se(r != 2 || streq(prefix, "/"));
+ r++;
+ }
+ assert_se(r == 3);
+
+ return 0;
+}
diff --git a/src/libsystemd/sd-bus/test-bus-track.c b/src/libsystemd/sd-bus/test-bus-track.c
new file mode 100644
index 0000000..5604e84
--- /dev/null
+++ b/src/libsystemd/sd-bus/test-bus-track.c
@@ -0,0 +1,151 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <sys/socket.h>
+
+#include "sd-bus.h"
+
+#include "macro.h"
+#include "tests.h"
+
+static bool track_cb_called_x = false;
+static bool track_cb_called_y = false;
+static bool track_destroy_called_z = false;
+
+static int track_cb_x(sd_bus_track *t, void *userdata) {
+
+ log_error("TRACK CB X");
+
+ assert_se(!track_cb_called_x);
+ track_cb_called_x = true;
+
+ /* This means b's name disappeared. Let's now disconnect, to make sure the track handling on disconnect works
+ * as it should. */
+
+ assert_se(shutdown(sd_bus_get_fd(sd_bus_track_get_bus(t)), SHUT_RDWR) >= 0);
+ return 1;
+}
+
+static int track_cb_y(sd_bus_track *t, void *userdata) {
+
+ log_error("TRACK CB Y");
+
+ assert_se(!track_cb_called_y);
+ track_cb_called_y = true;
+
+ /* We got disconnected, let's close everything */
+
+ assert_se(sd_event_exit(sd_bus_get_event(sd_bus_track_get_bus(t)), EXIT_SUCCESS) >= 0);
+
+ return 0;
+}
+
+static int track_cb_z(sd_bus_track *t, void *userdata) {
+ assert_not_reached();
+}
+
+static void track_destroy_z(void *userdata) {
+ track_destroy_called_z = true;
+}
+
+int main(int argc, char *argv[]) {
+ _cleanup_(sd_event_unrefp) sd_event *event = NULL;
+ _cleanup_(sd_bus_track_unrefp) sd_bus_track *x = NULL, *y = NULL, *z = NULL;
+ _cleanup_(sd_bus_unrefp) sd_bus *a = NULL, *b = NULL;
+ bool use_system_bus = false;
+ const char *unique;
+ int r;
+
+ test_setup_logging(LOG_INFO);
+
+ assert_se(sd_event_default(&event) >= 0);
+
+ r = sd_bus_open_user(&a);
+ if (IN_SET(r, -ECONNREFUSED, -ENOENT, -ENOMEDIUM)) {
+ r = sd_bus_open_system(&a);
+ if (IN_SET(r, -ECONNREFUSED, -ENOENT))
+ return log_tests_skipped("Failed to connect to bus");
+ use_system_bus = true;
+ }
+ assert_se(r >= 0);
+
+ assert_se(sd_bus_attach_event(a, event, SD_EVENT_PRIORITY_NORMAL) >= 0);
+
+ if (use_system_bus)
+ assert_se(sd_bus_open_system(&b) >= 0);
+ else
+ assert_se(sd_bus_open_user(&b) >= 0);
+
+ assert_se(sd_bus_attach_event(b, event, SD_EVENT_PRIORITY_NORMAL) >= 0);
+
+ /* Watch b's name from a */
+ assert_se(sd_bus_track_new(a, &x, track_cb_x, NULL) >= 0);
+
+ assert_se(sd_bus_get_unique_name(b, &unique) >= 0);
+
+ assert_se(sd_bus_track_add_name(x, unique) >= 0);
+
+ /* Watch's a's own name from a */
+ assert_se(sd_bus_track_new(a, &y, track_cb_y, NULL) >= 0);
+
+ assert_se(sd_bus_get_unique_name(a, &unique) >= 0);
+
+ assert_se(sd_bus_track_add_name(y, unique) >= 0);
+
+ /* Basic tests. */
+ assert_se(sd_bus_track_new(a, &z, track_cb_z, NULL) >= 0);
+
+ /* non-recursive case */
+ assert_se(sd_bus_track_set_recursive(z, false) >= 0);
+ assert_se(sd_bus_track_get_recursive(z) == 0);
+ assert_se(!sd_bus_track_contains(z, unique));
+ assert_se(sd_bus_track_count_name(z, unique) == 0);
+ assert_se(sd_bus_track_remove_name(z, unique) == 0);
+ assert_se(sd_bus_track_add_name(z, unique) >= 0);
+ assert_se(sd_bus_track_add_name(z, unique) >= 0);
+ assert_se(sd_bus_track_add_name(z, unique) >= 0);
+ assert_se(sd_bus_track_set_recursive(z, true) == -EBUSY);
+ assert_se(sd_bus_track_contains(z, unique));
+ assert_se(sd_bus_track_count_name(z, unique) == 1);
+ assert_se(sd_bus_track_remove_name(z, unique) == 1);
+ assert_se(!sd_bus_track_contains(z, unique));
+ assert_se(sd_bus_track_count_name(z, unique) == 0);
+ assert_se(sd_bus_track_remove_name(z, unique) == 0);
+
+ /* recursive case */
+ assert_se(sd_bus_track_set_recursive(z, true) >= 0);
+ assert_se(sd_bus_track_get_recursive(z) == 1);
+ assert_se(!sd_bus_track_contains(z, unique));
+ assert_se(sd_bus_track_count_name(z, unique) == 0);
+ assert_se(sd_bus_track_remove_name(z, unique) == 0);
+ assert_se(sd_bus_track_add_name(z, unique) >= 0);
+ assert_se(sd_bus_track_add_name(z, unique) >= 0);
+ assert_se(sd_bus_track_add_name(z, unique) >= 0);
+ assert_se(sd_bus_track_set_recursive(z, false) == -EBUSY);
+ assert_se(sd_bus_track_contains(z, unique));
+ assert_se(sd_bus_track_count_name(z, unique) == 3);
+ assert_se(sd_bus_track_remove_name(z, unique) == 1);
+ assert_se(sd_bus_track_contains(z, unique));
+ assert_se(sd_bus_track_count_name(z, unique) == 2);
+ assert_se(sd_bus_track_remove_name(z, unique) == 1);
+ assert_se(sd_bus_track_contains(z, unique));
+ assert_se(sd_bus_track_count_name(z, unique) == 1);
+ assert_se(sd_bus_track_remove_name(z, unique) == 1);
+ assert_se(!sd_bus_track_contains(z, unique));
+ assert_se(sd_bus_track_count_name(z, unique) == 0);
+ assert_se(sd_bus_track_remove_name(z, unique) == 0);
+
+ assert_se(sd_bus_track_set_destroy_callback(z, track_destroy_z) >= 0);
+ z = sd_bus_track_unref(z);
+ assert_se(track_destroy_called_z);
+
+ /* Now make b's name disappear */
+ sd_bus_close(b);
+
+ assert_se(sd_event_loop(event) >= 0);
+
+ assert_se(track_cb_called_x);
+ assert_se(track_cb_called_y);
+
+ return 0;
+}
diff --git a/src/libsystemd/sd-bus/test-bus-vtable-cc.cc b/src/libsystemd/sd-bus/test-bus-vtable-cc.cc
new file mode 120000
index 0000000..abee398
--- /dev/null
+++ b/src/libsystemd/sd-bus/test-bus-vtable-cc.cc
@@ -0,0 +1 @@
+test-bus-vtable.c \ No newline at end of file
diff --git a/src/libsystemd/sd-bus/test-bus-vtable.c b/src/libsystemd/sd-bus/test-bus-vtable.c
new file mode 100644
index 0000000..fe12238
--- /dev/null
+++ b/src/libsystemd/sd-bus/test-bus-vtable.c
@@ -0,0 +1,76 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <stdbool.h>
+#include <stddef.h>
+
+/* We use system assert.h here, because we don't want to keep macro.h and log.h C++ compatible */
+#undef NDEBUG
+#include <assert.h>
+#include <errno.h>
+#include <stdio.h>
+
+#include "sd-bus-vtable.h"
+
+#ifndef __cplusplus
+# include "bus-objects.h"
+#endif
+
+#include "test-vtable-data.h"
+
+#define DEFAULT_BUS_PATH "unix:path=/run/dbus/system_bus_socket"
+
+static struct context c = {};
+static int happy_finder_object = 0;
+
+static int happy_finder(sd_bus *bus, const char *path, const char *interface, void *userdata, void **found, sd_bus_error *error) {
+ assert(userdata);
+ assert(userdata == &c);
+
+#ifndef __cplusplus
+ log_info("%s called", __func__);
+#endif
+
+ happy_finder_object++;
+ *found = &happy_finder_object;
+ return 1; /* found */
+}
+
+static void test_vtable(void) {
+ sd_bus *bus = NULL;
+ int r;
+
+ assert(sd_bus_new(&bus) >= 0);
+
+ assert(sd_bus_add_object_vtable(bus, NULL, "/foo", "org.freedesktop.systemd.testVtable", test_vtable_2, &c) >= 0);
+ assert(sd_bus_add_object_vtable(bus, NULL, "/foo", "org.freedesktop.systemd.testVtable2", test_vtable_2, &c) >= 0);
+ /* the cast on the line below is needed to test with the old version of the table */
+ assert(sd_bus_add_object_vtable(bus, NULL, "/foo", "org.freedesktop.systemd.testVtable221",
+ (const sd_bus_vtable *)vtable_format_221, &c) >= 0);
+
+ assert(sd_bus_add_fallback_vtable(bus, NULL, "/fallback", "org.freedesktop.systemd.testVtable2", test_vtable_2, happy_finder, &c) >= 0);
+
+ assert(sd_bus_set_address(bus, DEFAULT_BUS_PATH) >= 0);
+ r = sd_bus_start(bus);
+ assert(r == 0 || /* success */
+ r == -ENOENT /* dbus is inactive */ );
+
+#ifndef __cplusplus
+ _cleanup_free_ char *s, *s2;
+
+ assert_se(introspect_path(bus, "/foo", NULL, false, true, NULL, &s, NULL) == 1);
+ fputs(s, stdout);
+
+ assert_se(introspect_path(bus, "/fallback", NULL, false, true, NULL, &s2, NULL) == 1);
+ fputs(s2, stdout);
+
+ assert_se(happy_finder_object == 1);
+#endif
+
+ sd_bus_unref(bus);
+}
+
+int main(int argc, char **argv) {
+ test_vtable();
+
+ return 0;
+}
diff --git a/src/libsystemd/sd-bus/test-bus-watch-bind.c b/src/libsystemd/sd-bus/test-bus-watch-bind.c
new file mode 100644
index 0000000..7f73c6e
--- /dev/null
+++ b/src/libsystemd/sd-bus/test-bus-watch-bind.c
@@ -0,0 +1,228 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <pthread.h>
+
+#include "sd-bus.h"
+#include "sd-event.h"
+#include "sd-id128.h"
+
+#include "alloc-util.h"
+#include "bus-internal.h"
+#include "fd-util.h"
+#include "fs-util.h"
+#include "mkdir.h"
+#include "path-util.h"
+#include "random-util.h"
+#include "rm-rf.h"
+#include "socket-util.h"
+#include "string-util.h"
+#include "tmpfile-util.h"
+#include "tests.h"
+
+static int method_foobar(sd_bus_message *m, void *userdata, sd_bus_error *ret_error) {
+ log_info("Got Foobar() call.");
+
+ assert_se(sd_event_exit(sd_bus_get_event(sd_bus_message_get_bus(m)), 0) >= 0);
+ return sd_bus_reply_method_return(m, NULL);
+}
+
+static int method_exit(sd_bus_message *m, void *userdata, sd_bus_error *ret_error) {
+ log_info("Got Exit() call");
+
+ assert_se(sd_bus_reply_method_return(m, NULL) >= 0);
+ /* Simulate D-Bus going away to test the bus_exit_now() path with exit_on_disconnect set */
+ bus_enter_closing(sd_bus_message_get_bus(m));
+ return 0;
+}
+
+static const sd_bus_vtable vtable[] = {
+ SD_BUS_VTABLE_START(0),
+ SD_BUS_METHOD("Foobar", NULL, NULL, method_foobar, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("Exit", NULL, NULL, method_exit, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_VTABLE_END,
+};
+
+static void* thread_server(void *p) {
+ _cleanup_free_ char *suffixed = NULL, *suffixed2 = NULL, *d = NULL;
+ _cleanup_close_ int fd = -EBADF;
+ union sockaddr_union u;
+ const char *path = p;
+ int r;
+
+ log_debug("Initializing server");
+
+ /* Let's play some games, by slowly creating the socket directory, and renaming it in the middle */
+ usleep_safe(100 * USEC_PER_MSEC);
+
+ assert_se(mkdir_parents(path, 0755) >= 0);
+ usleep_safe(100 * USEC_PER_MSEC);
+
+ assert_se(path_extract_directory(path, &d) >= 0);
+ assert_se(asprintf(&suffixed, "%s.%" PRIx64, d, random_u64()) >= 0);
+ assert_se(rename(d, suffixed) >= 0);
+ usleep_safe(100 * USEC_PER_MSEC);
+
+ assert_se(asprintf(&suffixed2, "%s.%" PRIx64, d, random_u64()) >= 0);
+ assert_se(symlink(suffixed2, d) >= 0);
+ usleep_safe(100 * USEC_PER_MSEC);
+
+ assert_se(symlink(basename(suffixed), suffixed2) >= 0);
+ usleep_safe(100 * USEC_PER_MSEC);
+
+ socklen_t sa_len;
+ r = sockaddr_un_set_path(&u.un, path);
+ assert_se(r >= 0);
+ sa_len = r;
+
+ fd = socket(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC, 0);
+ assert_se(fd >= 0);
+
+ assert_se(bind(fd, &u.sa, sa_len) >= 0);
+ usleep_safe(100 * USEC_PER_MSEC);
+
+ assert_se(listen(fd, SOMAXCONN_DELUXE) >= 0);
+ usleep_safe(100 * USEC_PER_MSEC);
+
+ assert_se(touch(path) >= 0);
+ usleep_safe(100 * USEC_PER_MSEC);
+
+ log_debug("Initialized server");
+
+ for (;;) {
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ _cleanup_(sd_event_unrefp) sd_event *event = NULL;
+ sd_id128_t id;
+ int bus_fd, code;
+
+ assert_se(sd_id128_randomize(&id) >= 0);
+
+ assert_se(sd_event_new(&event) >= 0);
+
+ bus_fd = accept4(fd, NULL, NULL, SOCK_NONBLOCK|SOCK_CLOEXEC);
+ assert_se(bus_fd >= 0);
+
+ log_debug("Accepted server connection");
+
+ assert_se(sd_bus_new(&bus) >= 0);
+ assert_se(sd_bus_set_exit_on_disconnect(bus, true) >= 0);
+ assert_se(sd_bus_set_description(bus, "server") >= 0);
+ assert_se(sd_bus_set_fd(bus, bus_fd, bus_fd) >= 0);
+ assert_se(sd_bus_set_server(bus, true, id) >= 0);
+ /* assert_se(sd_bus_set_anonymous(bus, true) >= 0); */
+
+ assert_se(sd_bus_attach_event(bus, event, 0) >= 0);
+
+ assert_se(sd_bus_add_object_vtable(bus, NULL, "/foo", "foo.TestInterface", vtable, NULL) >= 0);
+
+ assert_se(sd_bus_start(bus) >= 0);
+
+ assert_se(sd_event_loop(event) >= 0);
+
+ assert_se(sd_event_get_exit_code(event, &code) >= 0);
+
+ if (code > 0)
+ break;
+ }
+
+ log_debug("Server done");
+
+ return NULL;
+}
+
+static void* thread_client1(void *p) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ const char *path = p, *t;
+ int r;
+
+ log_debug("Initializing client1");
+
+ assert_se(sd_bus_new(&bus) >= 0);
+ assert_se(sd_bus_set_description(bus, "client1") >= 0);
+
+ t = strjoina("unix:path=", path);
+ assert_se(sd_bus_set_address(bus, t) >= 0);
+ assert_se(sd_bus_set_watch_bind(bus, true) >= 0);
+ assert_se(sd_bus_start(bus) >= 0);
+
+ r = sd_bus_call_method(bus, "foo.bar", "/foo", "foo.TestInterface", "Foobar", &error, NULL, NULL);
+ assert_se(r >= 0);
+
+ log_debug("Client1 done");
+
+ return NULL;
+}
+
+static int client2_callback(sd_bus_message *m, void *userdata, sd_bus_error *ret_error) {
+ assert_se(sd_bus_message_is_method_error(m, NULL) == 0);
+ assert_se(sd_event_exit(sd_bus_get_event(sd_bus_message_get_bus(m)), 0) >= 0);
+ return 0;
+}
+
+static void* thread_client2(void *p) {
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ _cleanup_(sd_event_unrefp) sd_event *event = NULL;
+ const char *path = p, *t;
+
+ log_debug("Initializing client2");
+
+ assert_se(sd_event_new(&event) >= 0);
+ assert_se(sd_bus_new(&bus) >= 0);
+ assert_se(sd_bus_set_description(bus, "client2") >= 0);
+
+ t = strjoina("unix:path=", path);
+ assert_se(sd_bus_set_address(bus, t) >= 0);
+ assert_se(sd_bus_set_watch_bind(bus, true) >= 0);
+ assert_se(sd_bus_attach_event(bus, event, 0) >= 0);
+ assert_se(sd_bus_start(bus) >= 0);
+
+ assert_se(sd_bus_call_method_async(bus, NULL, "foo.bar", "/foo", "foo.TestInterface", "Foobar", client2_callback, NULL, NULL) >= 0);
+
+ assert_se(sd_event_loop(event) >= 0);
+
+ log_debug("Client2 done");
+
+ return NULL;
+}
+
+static void request_exit(const char *path) {
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ const char *t;
+
+ assert_se(sd_bus_new(&bus) >= 0);
+
+ t = strjoina("unix:path=", path);
+ assert_se(sd_bus_set_address(bus, t) >= 0);
+ assert_se(sd_bus_set_watch_bind(bus, true) >= 0);
+ assert_se(sd_bus_set_description(bus, "request-exit") >= 0);
+ assert_se(sd_bus_start(bus) >= 0);
+
+ assert_se(sd_bus_call_method(bus, "foo.bar", "/foo", "foo.TestInterface", "Exit", NULL, NULL, NULL) >= 0);
+}
+
+int main(int argc, char *argv[]) {
+ _cleanup_(rm_rf_physical_and_freep) char *d = NULL;
+ pthread_t server, client1, client2;
+ char *path;
+
+ test_setup_logging(LOG_DEBUG);
+
+ /* We use /dev/shm here rather than /tmp, since some weird distros might set up /tmp as some weird fs that
+ * doesn't support inotify properly. */
+ assert_se(mkdtemp_malloc("/dev/shm/systemd-watch-bind-XXXXXX", &d) >= 0);
+
+ path = strjoina(d, "/this/is/a/socket");
+
+ assert_se(pthread_create(&server, NULL, thread_server, path) == 0);
+ assert_se(pthread_create(&client1, NULL, thread_client1, path) == 0);
+ assert_se(pthread_create(&client2, NULL, thread_client2, path) == 0);
+
+ assert_se(pthread_join(client1, NULL) == 0);
+ assert_se(pthread_join(client2, NULL) == 0);
+
+ request_exit(path);
+
+ assert_se(pthread_join(server, NULL) == 0);
+
+ return 0;
+}
diff --git a/src/libsystemd/sd-bus/test-vtable-data.h b/src/libsystemd/sd-bus/test-vtable-data.h
new file mode 100644
index 0000000..7269a49
--- /dev/null
+++ b/src/libsystemd/sd-bus/test-vtable-data.h
@@ -0,0 +1,132 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+/* This is meant to be included in other files, hence no headers */
+
+struct context {
+ bool quit;
+ char *something;
+ char *automatic_string_property;
+ uint32_t automatic_integer_property;
+};
+
+static int handler(sd_bus_message *m, void *userdata, sd_bus_error *error) {
+ return 1;
+}
+
+static int value_handler(sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *reply, void *userdata, sd_bus_error *error) {
+ return 1;
+}
+
+static int get_handler(sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *reply, void *userdata, sd_bus_error *error) {
+ return 1;
+}
+
+static int set_handler(sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *value, void *userdata, sd_bus_error *error) {
+ return 1;
+}
+
+static const sd_bus_vtable test_vtable_1[] = {
+ SD_BUS_VTABLE_START(0),
+ SD_BUS_METHOD("Hello", "ssas", "a(uu)", NULL, 0),
+ SD_BUS_METHOD("DeprecatedHello", "", "", NULL, SD_BUS_VTABLE_DEPRECATED),
+ SD_BUS_METHOD("DeprecatedHelloNoReply", "", "", NULL, SD_BUS_VTABLE_DEPRECATED|SD_BUS_VTABLE_METHOD_NO_REPLY),
+ SD_BUS_SIGNAL("Wowza", "sss", 0),
+ SD_BUS_SIGNAL("DeprecatedWowza", "ut", SD_BUS_VTABLE_DEPRECATED),
+ SD_BUS_WRITABLE_PROPERTY("AProperty", "s", get_handler, set_handler, 0, 0),
+ SD_BUS_PROPERTY("AReadOnlyDeprecatedProperty", "(ut)", get_handler, 0, SD_BUS_VTABLE_DEPRECATED),
+ SD_BUS_PROPERTY("ChangingProperty", "t", get_handler, 0, SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("Invalidating", "t", get_handler, 0, SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION),
+ SD_BUS_PROPERTY("Constant", "t", get_handler, 0, SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_PROPERTY_EXPLICIT),
+ SD_BUS_VTABLE_END
+};
+
+static const sd_bus_vtable test_vtable_2[] = {
+ SD_BUS_VTABLE_START(0),
+ SD_BUS_METHOD("AlterSomething", "s", "s", handler, SD_BUS_VTABLE_UNPRIVILEGED),
+ SD_BUS_METHOD("Exit", "", "", handler, 0),
+ SD_BUS_METHOD_WITH_OFFSET("AlterSomething2", "s", "s", handler, 200, 0),
+ SD_BUS_METHOD_WITH_OFFSET("Exit2", "", "", handler, 200, 0),
+ SD_BUS_METHOD_WITH_NAMES_OFFSET("AlterSomething3", "so", SD_BUS_PARAM(string) SD_BUS_PARAM(path),
+ "s", SD_BUS_PARAM(returnstring), handler, 200, 0),
+ SD_BUS_METHOD_WITH_NAMES("Exit3", "bx", SD_BUS_PARAM(with_confirmation) SD_BUS_PARAM(after_msec),
+ "bb", SD_BUS_PARAM(accepted) SD_BUS_PARAM(scheduled), handler, 0),
+ SD_BUS_PROPERTY("Value", "s", value_handler, 10, SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+ SD_BUS_PROPERTY("Value2", "s", value_handler, 10, SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION),
+ SD_BUS_PROPERTY("Value3", "s", value_handler, 10, SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("Value4", "s", value_handler, 10, 0),
+ SD_BUS_PROPERTY("AnExplicitProperty", "s", NULL, offsetof(struct context, something),
+ SD_BUS_VTABLE_PROPERTY_EXPLICIT|SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION),
+ SD_BUS_WRITABLE_PROPERTY("Something", "s", get_handler, set_handler, 0, 0),
+ SD_BUS_WRITABLE_PROPERTY("AutomaticStringProperty", "s", NULL, NULL,
+ offsetof(struct context, automatic_string_property), 0),
+ SD_BUS_WRITABLE_PROPERTY("AutomaticIntegerProperty", "u", NULL, NULL,
+ offsetof(struct context, automatic_integer_property), 0),
+ SD_BUS_METHOD("NoOperation", NULL, NULL, NULL, 0),
+ SD_BUS_SIGNAL("DummySignal", "b", 0),
+ SD_BUS_SIGNAL("DummySignal2", "so", 0),
+ SD_BUS_SIGNAL_WITH_NAMES("DummySignal3", "so", SD_BUS_PARAM(string) SD_BUS_PARAM(path), 0),
+ SD_BUS_VTABLE_END
+};
+
+static const sd_bus_vtable test_vtable_deprecated[] = {
+ SD_BUS_VTABLE_START(SD_BUS_VTABLE_DEPRECATED),
+ SD_BUS_VTABLE_END
+};
+
+struct sd_bus_vtable_221 {
+ uint8_t type:8;
+ uint64_t flags:56;
+ union {
+ struct {
+ size_t element_size;
+ } start;
+ struct {
+ const char *member;
+ const char *signature;
+ const char *result;
+ sd_bus_message_handler_t handler;
+ size_t offset;
+ } method;
+ struct {
+ const char *member;
+ const char *signature;
+ } signal;
+ struct {
+ const char *member;
+ const char *signature;
+ sd_bus_property_get_t get;
+ sd_bus_property_set_t set;
+ size_t offset;
+ } property;
+ } x;
+};
+
+static const struct sd_bus_vtable_221 vtable_format_221[] = {
+ {
+ .type = _SD_BUS_VTABLE_START,
+ .flags = 0,
+ .x = {
+ .start = {
+ .element_size = sizeof(struct sd_bus_vtable_221)
+ },
+ },
+ },
+ {
+ .type = _SD_BUS_VTABLE_METHOD,
+ .flags = 0,
+ .x = {
+ .method = {
+ .member = "Exit",
+ .signature = "",
+ .result = "",
+ .handler = handler,
+ .offset = 0,
+ },
+ },
+ },
+ {
+ .type = _SD_BUS_VTABLE_END,
+ .flags = 0,
+ .x = { { 0 } },
+ }
+};
diff --git a/src/libsystemd/sd-daemon/sd-daemon.c b/src/libsystemd/sd-daemon/sd-daemon.c
new file mode 100644
index 0000000..6a60cde
--- /dev/null
+++ b/src/libsystemd/sd-daemon/sd-daemon.c
@@ -0,0 +1,775 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <limits.h>
+#include <mqueue.h>
+#include <netinet/in.h>
+#include <poll.h>
+#include <stdarg.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/stat.h>
+#include <sys/un.h>
+#include <unistd.h>
+
+#include "sd-daemon.h"
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "fs-util.h"
+#include "io-util.h"
+#include "iovec-util.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "process-util.h"
+#include "socket-util.h"
+#include "stat-util.h"
+#include "strv.h"
+#include "time-util.h"
+
+#define SNDBUF_SIZE (8*1024*1024)
+
+static void unsetenv_all(bool unset_environment) {
+ if (!unset_environment)
+ return;
+
+ assert_se(unsetenv("LISTEN_PID") == 0);
+ assert_se(unsetenv("LISTEN_FDS") == 0);
+ assert_se(unsetenv("LISTEN_FDNAMES") == 0);
+}
+
+_public_ int sd_listen_fds(int unset_environment) {
+ const char *e;
+ int n, r;
+ pid_t pid;
+
+ e = getenv("LISTEN_PID");
+ if (!e) {
+ r = 0;
+ goto finish;
+ }
+
+ r = parse_pid(e, &pid);
+ if (r < 0)
+ goto finish;
+
+ /* Is this for us? */
+ if (getpid_cached() != pid) {
+ r = 0;
+ goto finish;
+ }
+
+ e = getenv("LISTEN_FDS");
+ if (!e) {
+ r = 0;
+ goto finish;
+ }
+
+ r = safe_atoi(e, &n);
+ if (r < 0)
+ goto finish;
+
+ assert_cc(SD_LISTEN_FDS_START < INT_MAX);
+ if (n <= 0 || n > INT_MAX - SD_LISTEN_FDS_START) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ for (int fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd ++) {
+ r = fd_cloexec(fd, true);
+ if (r < 0)
+ goto finish;
+ }
+
+ r = n;
+
+finish:
+ unsetenv_all(unset_environment);
+ return r;
+}
+
+_public_ int sd_listen_fds_with_names(int unset_environment, char ***names) {
+ _cleanup_strv_free_ char **l = NULL;
+ bool have_names;
+ int n_names = 0, n_fds;
+ const char *e;
+ int r;
+
+ if (!names)
+ return sd_listen_fds(unset_environment);
+
+ e = getenv("LISTEN_FDNAMES");
+ if (e) {
+ n_names = strv_split_full(&l, e, ":", EXTRACT_DONT_COALESCE_SEPARATORS);
+ if (n_names < 0) {
+ unsetenv_all(unset_environment);
+ return n_names;
+ }
+
+ have_names = true;
+ } else
+ have_names = false;
+
+ n_fds = sd_listen_fds(unset_environment);
+ if (n_fds <= 0)
+ return n_fds;
+
+ if (have_names) {
+ if (n_names != n_fds)
+ return -EINVAL;
+ } else {
+ r = strv_extend_n(&l, "unknown", n_fds);
+ if (r < 0)
+ return r;
+ }
+
+ *names = TAKE_PTR(l);
+
+ return n_fds;
+}
+
+_public_ int sd_is_fifo(int fd, const char *path) {
+ struct stat st_fd;
+
+ assert_return(fd >= 0, -EBADF);
+
+ if (fstat(fd, &st_fd) < 0)
+ return -errno;
+
+ if (!S_ISFIFO(st_fd.st_mode))
+ return 0;
+
+ if (path) {
+ struct stat st_path;
+
+ if (stat(path, &st_path) < 0) {
+
+ if (IN_SET(errno, ENOENT, ENOTDIR))
+ return 0;
+
+ return -errno;
+ }
+
+ return stat_inode_same(&st_path, &st_fd);
+ }
+
+ return 1;
+}
+
+_public_ int sd_is_special(int fd, const char *path) {
+ struct stat st_fd;
+
+ assert_return(fd >= 0, -EBADF);
+
+ if (fstat(fd, &st_fd) < 0)
+ return -errno;
+
+ if (!S_ISREG(st_fd.st_mode) && !S_ISCHR(st_fd.st_mode))
+ return 0;
+
+ if (path) {
+ struct stat st_path;
+
+ if (stat(path, &st_path) < 0) {
+
+ if (IN_SET(errno, ENOENT, ENOTDIR))
+ return 0;
+
+ return -errno;
+ }
+
+ if (S_ISREG(st_fd.st_mode) && S_ISREG(st_path.st_mode))
+ return stat_inode_same(&st_path, &st_fd);
+ else if (S_ISCHR(st_fd.st_mode) && S_ISCHR(st_path.st_mode))
+ return st_path.st_rdev == st_fd.st_rdev;
+ else
+ return 0;
+ }
+
+ return 1;
+}
+
+static int is_socket_internal(int fd, int type, int listening) {
+ struct stat st_fd;
+
+ assert_return(fd >= 0, -EBADF);
+ assert_return(type >= 0, -EINVAL);
+
+ if (fstat(fd, &st_fd) < 0)
+ return -errno;
+
+ if (!S_ISSOCK(st_fd.st_mode))
+ return 0;
+
+ if (type != 0) {
+ int other_type = 0;
+ socklen_t l = sizeof(other_type);
+
+ if (getsockopt(fd, SOL_SOCKET, SO_TYPE, &other_type, &l) < 0)
+ return -errno;
+
+ if (l != sizeof(other_type))
+ return -EINVAL;
+
+ if (other_type != type)
+ return 0;
+ }
+
+ if (listening >= 0) {
+ int accepting = 0;
+ socklen_t l = sizeof(accepting);
+
+ if (getsockopt(fd, SOL_SOCKET, SO_ACCEPTCONN, &accepting, &l) < 0)
+ return -errno;
+
+ if (l != sizeof(accepting))
+ return -EINVAL;
+
+ if (!accepting != !listening)
+ return 0;
+ }
+
+ return 1;
+}
+
+_public_ int sd_is_socket(int fd, int family, int type, int listening) {
+ int r;
+
+ assert_return(fd >= 0, -EBADF);
+ assert_return(family >= 0, -EINVAL);
+
+ r = is_socket_internal(fd, type, listening);
+ if (r <= 0)
+ return r;
+
+ if (family > 0) {
+ union sockaddr_union sockaddr = {};
+ socklen_t l = sizeof(sockaddr);
+
+ if (getsockname(fd, &sockaddr.sa, &l) < 0)
+ return -errno;
+
+ if (l < sizeof(sa_family_t))
+ return -EINVAL;
+
+ return sockaddr.sa.sa_family == family;
+ }
+
+ return 1;
+}
+
+_public_ int sd_is_socket_inet(int fd, int family, int type, int listening, uint16_t port) {
+ union sockaddr_union sockaddr = {};
+ socklen_t l = sizeof(sockaddr);
+ int r;
+
+ assert_return(fd >= 0, -EBADF);
+ assert_return(IN_SET(family, 0, AF_INET, AF_INET6), -EINVAL);
+
+ r = is_socket_internal(fd, type, listening);
+ if (r <= 0)
+ return r;
+
+ if (getsockname(fd, &sockaddr.sa, &l) < 0)
+ return -errno;
+
+ if (l < sizeof(sa_family_t))
+ return -EINVAL;
+
+ if (!IN_SET(sockaddr.sa.sa_family, AF_INET, AF_INET6))
+ return 0;
+
+ if (family != 0)
+ if (sockaddr.sa.sa_family != family)
+ return 0;
+
+ if (port > 0) {
+ unsigned sa_port;
+
+ r = sockaddr_port(&sockaddr.sa, &sa_port);
+ if (r < 0)
+ return r;
+
+ return port == sa_port;
+ }
+
+ return 1;
+}
+
+_public_ int sd_is_socket_sockaddr(int fd, int type, const struct sockaddr* addr, unsigned addr_len, int listening) {
+ union sockaddr_union sockaddr = {};
+ socklen_t l = sizeof(sockaddr);
+ int r;
+
+ assert_return(fd >= 0, -EBADF);
+ assert_return(addr, -EINVAL);
+ assert_return(addr_len >= sizeof(sa_family_t), -ENOBUFS);
+ assert_return(IN_SET(addr->sa_family, AF_INET, AF_INET6), -EPFNOSUPPORT);
+
+ r = is_socket_internal(fd, type, listening);
+ if (r <= 0)
+ return r;
+
+ if (getsockname(fd, &sockaddr.sa, &l) < 0)
+ return -errno;
+
+ if (l < sizeof(sa_family_t))
+ return -EINVAL;
+
+ if (sockaddr.sa.sa_family != addr->sa_family)
+ return 0;
+
+ if (sockaddr.sa.sa_family == AF_INET) {
+ const struct sockaddr_in *in = (const struct sockaddr_in *) addr;
+
+ if (l < sizeof(struct sockaddr_in) || addr_len < sizeof(struct sockaddr_in))
+ return -EINVAL;
+
+ if (in->sin_port != 0 &&
+ sockaddr.in.sin_port != in->sin_port)
+ return false;
+
+ return sockaddr.in.sin_addr.s_addr == in->sin_addr.s_addr;
+
+ } else {
+ const struct sockaddr_in6 *in = (const struct sockaddr_in6 *) addr;
+
+ if (l < sizeof(struct sockaddr_in6) || addr_len < sizeof(struct sockaddr_in6))
+ return -EINVAL;
+
+ if (in->sin6_port != 0 &&
+ sockaddr.in6.sin6_port != in->sin6_port)
+ return false;
+
+ if (in->sin6_flowinfo != 0 &&
+ sockaddr.in6.sin6_flowinfo != in->sin6_flowinfo)
+ return false;
+
+ if (in->sin6_scope_id != 0 &&
+ sockaddr.in6.sin6_scope_id != in->sin6_scope_id)
+ return false;
+
+ return memcmp(sockaddr.in6.sin6_addr.s6_addr, in->sin6_addr.s6_addr,
+ sizeof(in->sin6_addr.s6_addr)) == 0;
+ }
+}
+
+_public_ int sd_is_socket_unix(int fd, int type, int listening, const char *path, size_t length) {
+ union sockaddr_union sockaddr = {};
+ socklen_t l = sizeof(sockaddr);
+ int r;
+
+ assert_return(fd >= 0, -EBADF);
+
+ r = is_socket_internal(fd, type, listening);
+ if (r <= 0)
+ return r;
+
+ if (getsockname(fd, &sockaddr.sa, &l) < 0)
+ return -errno;
+
+ if (l < sizeof(sa_family_t))
+ return -EINVAL;
+
+ if (sockaddr.sa.sa_family != AF_UNIX)
+ return 0;
+
+ if (path) {
+ if (length == 0)
+ length = strlen(path);
+
+ if (length == 0)
+ /* Unnamed socket */
+ return l == offsetof(struct sockaddr_un, sun_path);
+
+ if (path[0])
+ /* Normal path socket */
+ return
+ (l >= offsetof(struct sockaddr_un, sun_path) + length + 1) &&
+ memcmp(path, sockaddr.un.sun_path, length+1) == 0;
+ else
+ /* Abstract namespace socket */
+ return
+ (l == offsetof(struct sockaddr_un, sun_path) + length) &&
+ memcmp(path, sockaddr.un.sun_path, length) == 0;
+ }
+
+ return 1;
+}
+
+_public_ int sd_is_mq(int fd, const char *path) {
+ struct mq_attr attr;
+
+ /* Check that the fd is valid */
+ assert_return(fcntl(fd, F_GETFD) >= 0, -errno);
+
+ if (mq_getattr(fd, &attr) < 0) {
+ if (errno == EBADF)
+ /* A non-mq fd (or an invalid one, but we ruled that out above) */
+ return 0;
+ return -errno;
+ }
+
+ if (path) {
+ _cleanup_free_ char *fpath = NULL;
+ struct stat a, b;
+
+ assert_return(path_is_absolute(path), -EINVAL);
+
+ if (fstat(fd, &a) < 0)
+ return -errno;
+
+ fpath = path_join("/dev/mqueue", path);
+ if (!fpath)
+ return -ENOMEM;
+
+ if (stat(fpath, &b) < 0)
+ return -errno;
+
+ if (!stat_inode_same(&a, &b))
+ return 0;
+ }
+
+ return 1;
+}
+
+static int vsock_bind_privileged_port(int fd) {
+ union sockaddr_union sa = {
+ .vm.svm_family = AF_VSOCK,
+ .vm.svm_cid = VMADDR_CID_ANY,
+ .vm.svm_port = 1023,
+ };
+ int r;
+
+ assert(fd >= 0);
+
+ do
+ r = RET_NERRNO(bind(fd, &sa.sa, sizeof(sa.vm)));
+ while (r == -EADDRINUSE && --sa.vm.svm_port > 0);
+
+ return r;
+}
+
+static int pid_notify_with_fds_internal(
+ pid_t pid,
+ const char *state,
+ const int *fds,
+ unsigned n_fds) {
+ SocketAddress address;
+ struct iovec iovec;
+ struct msghdr msghdr = {
+ .msg_iov = &iovec,
+ .msg_iovlen = 1,
+ .msg_name = &address.sockaddr,
+ };
+ _cleanup_close_ int fd = -EBADF;
+ struct cmsghdr *cmsg = NULL;
+ const char *e;
+ bool send_ucred;
+ ssize_t n;
+ int type, r;
+
+ if (!state)
+ return -EINVAL;
+
+ if (n_fds > 0 && !fds)
+ return -EINVAL;
+
+ e = getenv("NOTIFY_SOCKET");
+ if (!e)
+ return 0;
+
+ /* Allow AF_UNIX and AF_VSOCK, reject the rest. */
+ r = socket_address_parse_unix(&address, e);
+ if (r == -EPROTO)
+ r = socket_address_parse_vsock(&address, e);
+ if (r < 0)
+ return r;
+ msghdr.msg_namelen = address.size;
+
+ /* If we didn't get an address (which is a normal pattern when specifying VSOCK tuples) error out,
+ * we always require a specific CID. */
+ if (address.sockaddr.vm.svm_family == AF_VSOCK && address.sockaddr.vm.svm_cid == VMADDR_CID_ANY)
+ return -EINVAL;
+
+ type = address.type == 0 ? SOCK_DGRAM : address.type;
+
+ /* At the time of writing QEMU does not yet support AF_VSOCK + SOCK_DGRAM and returns
+ * ENODEV. Fallback to SOCK_SEQPACKET in that case. */
+ fd = socket(address.sockaddr.sa.sa_family, type|SOCK_CLOEXEC, 0);
+ if (fd < 0) {
+ if (!(ERRNO_IS_NOT_SUPPORTED(errno) || errno == ENODEV) || address.sockaddr.sa.sa_family != AF_VSOCK || address.type > 0)
+ return log_debug_errno(errno, "Failed to open %s notify socket to '%s': %m", socket_address_type_to_string(type), e);
+
+ type = SOCK_SEQPACKET;
+ fd = socket(address.sockaddr.sa.sa_family, type|SOCK_CLOEXEC, 0);
+ if (fd < 0 && ERRNO_IS_NOT_SUPPORTED(errno)) {
+ type = SOCK_STREAM;
+ fd = socket(address.sockaddr.sa.sa_family, type|SOCK_CLOEXEC, 0);
+ }
+ if (fd < 0)
+ return log_debug_errno(errno, "Failed to open %s socket to '%s': %m", socket_address_type_to_string(type), e);
+ }
+
+ if (address.sockaddr.sa.sa_family == AF_VSOCK) {
+ r = vsock_bind_privileged_port(fd);
+ if (r < 0 && !ERRNO_IS_PRIVILEGE(r))
+ return log_debug_errno(r, "Failed to bind socket to privileged port: %m");
+ }
+
+ if (IN_SET(type, SOCK_STREAM, SOCK_SEQPACKET)) {
+ if (connect(fd, &address.sockaddr.sa, address.size) < 0)
+ return log_debug_errno(errno, "Failed to connect socket to '%s': %m", e);
+
+ msghdr.msg_name = NULL;
+ msghdr.msg_namelen = 0;
+ }
+
+ (void) fd_inc_sndbuf(fd, SNDBUF_SIZE);
+
+ iovec = IOVEC_MAKE_STRING(state);
+
+ send_ucred =
+ (pid != 0 && pid != getpid_cached()) ||
+ getuid() != geteuid() ||
+ getgid() != getegid();
+
+ if (n_fds > 0 || send_ucred) {
+ /* CMSG_SPACE(0) may return value different than zero, which results in miscalculated controllen. */
+ msghdr.msg_controllen =
+ (n_fds > 0 ? CMSG_SPACE(sizeof(int) * n_fds) : 0) +
+ (send_ucred ? CMSG_SPACE(sizeof(struct ucred)) : 0);
+
+ msghdr.msg_control = alloca0(msghdr.msg_controllen);
+
+ cmsg = CMSG_FIRSTHDR(&msghdr);
+ if (n_fds > 0) {
+ cmsg->cmsg_level = SOL_SOCKET;
+ cmsg->cmsg_type = SCM_RIGHTS;
+ cmsg->cmsg_len = CMSG_LEN(sizeof(int) * n_fds);
+
+ memcpy(CMSG_DATA(cmsg), fds, sizeof(int) * n_fds);
+
+ if (send_ucred)
+ assert_se(cmsg = CMSG_NXTHDR(&msghdr, cmsg));
+ }
+
+ if (send_ucred) {
+ struct ucred *ucred;
+
+ cmsg->cmsg_level = SOL_SOCKET;
+ cmsg->cmsg_type = SCM_CREDENTIALS;
+ cmsg->cmsg_len = CMSG_LEN(sizeof(struct ucred));
+
+ ucred = CMSG_TYPED_DATA(cmsg, struct ucred);
+ ucred->pid = pid != 0 ? pid : getpid_cached();
+ ucred->uid = getuid();
+ ucred->gid = getgid();
+ }
+ }
+
+ do {
+ /* First try with fake ucred data, as requested */
+ n = sendmsg(fd, &msghdr, MSG_NOSIGNAL);
+ if (n < 0) {
+ if (!send_ucred)
+ return log_debug_errno(errno, "Failed to send notify message to '%s': %m", e);
+
+ /* If that failed, try with our own ucred instead */
+ msghdr.msg_controllen -= CMSG_SPACE(sizeof(struct ucred));
+ if (msghdr.msg_controllen == 0)
+ msghdr.msg_control = NULL;
+
+ n = 0;
+ send_ucred = false;
+ } else {
+ /* Unless we're using SOCK_STREAM, we expect to write all the contents immediately. */
+ if (type != SOCK_STREAM && (size_t) n < iovec_total_size(msghdr.msg_iov, msghdr.msg_iovlen))
+ return -EIO;
+
+ /* Make sure we only send fds and ucred once, even if we're using SOCK_STREAM. */
+ msghdr.msg_control = NULL;
+ msghdr.msg_controllen = 0;
+ }
+ } while (!iovec_increment(msghdr.msg_iov, msghdr.msg_iovlen, n));
+
+ return 1;
+}
+
+_public_ int sd_pid_notify_with_fds(
+ pid_t pid,
+ int unset_environment,
+ const char *state,
+ const int *fds,
+ unsigned n_fds) {
+
+ int r;
+
+ r = pid_notify_with_fds_internal(pid, state, fds, n_fds);
+
+ if (unset_environment)
+ assert_se(unsetenv("NOTIFY_SOCKET") == 0);
+
+ return r;
+}
+
+_public_ int sd_pid_notify_barrier(pid_t pid, int unset_environment, uint64_t timeout) {
+ _cleanup_close_pair_ int pipe_fd[2] = EBADF_PAIR;
+ int r;
+
+ if (pipe2(pipe_fd, O_CLOEXEC) < 0)
+ return -errno;
+
+ r = sd_pid_notify_with_fds(pid, unset_environment, "BARRIER=1", &pipe_fd[1], 1);
+ if (r <= 0)
+ return r;
+
+ pipe_fd[1] = safe_close(pipe_fd[1]);
+
+ r = fd_wait_for_event(pipe_fd[0], 0 /* POLLHUP is implicit */, timeout);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -ETIMEDOUT;
+
+ return 1;
+}
+
+_public_ int sd_notify_barrier(int unset_environment, uint64_t timeout) {
+ return sd_pid_notify_barrier(0, unset_environment, timeout);
+}
+
+_public_ int sd_pid_notify(pid_t pid, int unset_environment, const char *state) {
+ return sd_pid_notify_with_fds(pid, unset_environment, state, NULL, 0);
+}
+
+_public_ int sd_notify(int unset_environment, const char *state) {
+ return sd_pid_notify_with_fds(0, unset_environment, state, NULL, 0);
+}
+
+_public_ int sd_pid_notifyf(pid_t pid, int unset_environment, const char *format, ...) {
+ _cleanup_free_ char *p = NULL;
+ int r;
+
+ if (format) {
+ va_list ap;
+
+ va_start(ap, format);
+ r = vasprintf(&p, format, ap);
+ va_end(ap);
+
+ if (r < 0 || !p)
+ return -ENOMEM;
+ }
+
+ return sd_pid_notify(pid, unset_environment, p);
+}
+
+_public_ int sd_notifyf(int unset_environment, const char *format, ...) {
+ _cleanup_free_ char *p = NULL;
+ int r;
+
+ if (format) {
+ va_list ap;
+
+ va_start(ap, format);
+ r = vasprintf(&p, format, ap);
+ va_end(ap);
+
+ if (r < 0 || !p)
+ return -ENOMEM;
+ }
+
+ return sd_pid_notify(0, unset_environment, p);
+}
+
+_public_ int sd_pid_notifyf_with_fds(
+ pid_t pid,
+ int unset_environment,
+ const int *fds, size_t n_fds,
+ const char *format, ...) {
+
+ _cleanup_free_ char *p = NULL;
+ int r;
+
+ /* Paranoia check: we traditionally used 'unsigned' as array size, but we nowadays more correctly use
+ * 'size_t'. sd_pid_notifyf_with_fds() and sd_pid_notify_with_fds() are from different eras, hence
+ * differ in this. Let's catch resulting incompatibilites early, even though they are pretty much
+ * theoretic only */
+ if (n_fds > UINT_MAX)
+ return -E2BIG;
+
+ if (format) {
+ va_list ap;
+
+ va_start(ap, format);
+ r = vasprintf(&p, format, ap);
+ va_end(ap);
+
+ if (r < 0 || !p)
+ return -ENOMEM;
+ }
+
+ return sd_pid_notify_with_fds(pid, unset_environment, p, fds, n_fds);
+}
+
+_public_ int sd_booted(void) {
+ /* We test whether the runtime unit file directory has been
+ * created. This takes place in mount-setup.c, so is
+ * guaranteed to happen very early during boot. */
+
+ if (laccess("/run/systemd/system/", F_OK) >= 0)
+ return true;
+
+ if (errno == ENOENT)
+ return false;
+
+ return -errno;
+}
+
+_public_ int sd_watchdog_enabled(int unset_environment, uint64_t *usec) {
+ const char *s, *p = ""; /* p is set to dummy value to do unsetting */
+ uint64_t u;
+ int r = 0;
+
+ s = getenv("WATCHDOG_USEC");
+ if (!s)
+ goto finish;
+
+ r = safe_atou64(s, &u);
+ if (r < 0)
+ goto finish;
+ if (!timestamp_is_set(u)) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ p = getenv("WATCHDOG_PID");
+ if (p) {
+ pid_t pid;
+
+ r = parse_pid(p, &pid);
+ if (r < 0)
+ goto finish;
+
+ /* Is this for us? */
+ if (getpid_cached() != pid) {
+ r = 0;
+ goto finish;
+ }
+ }
+
+ if (usec)
+ *usec = u;
+
+ r = 1;
+
+finish:
+ if (unset_environment && s)
+ assert_se(unsetenv("WATCHDOG_USEC") == 0);
+ if (unset_environment && p)
+ assert_se(unsetenv("WATCHDOG_PID") == 0);
+
+ return r;
+}
diff --git a/src/libsystemd/sd-device/device-enumerator-private.h b/src/libsystemd/sd-device/device-enumerator-private.h
new file mode 100644
index 0000000..cf62fab
--- /dev/null
+++ b/src/libsystemd/sd-device/device-enumerator-private.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+
+#include "sd-device.h"
+
+typedef enum MatchInitializedType {
+ MATCH_INITIALIZED_NO, /* only devices without a db entry */
+ MATCH_INITIALIZED_YES, /* only devices with a db entry */
+ MATCH_INITIALIZED_ALL, /* all devices */
+ MATCH_INITIALIZED_COMPAT, /* only devices that have no devnode/ifindex or have a db entry */
+ _MATCH_INITIALIZED_MAX,
+ _MATCH_INITIALIZED_INVALID = -EINVAL,
+} MatchInitializedType;
+
+int device_enumerator_scan_devices(sd_device_enumerator *enumerator);
+int device_enumerator_scan_subsystems(sd_device_enumerator *enumerator);
+int device_enumerator_scan_devices_and_subsystems(sd_device_enumerator *enumerator);
+int device_enumerator_add_device(sd_device_enumerator *enumerator, sd_device *device);
+int device_enumerator_add_parent_devices(sd_device_enumerator *enumerator, sd_device *device);
+int device_enumerator_add_match_is_initialized(sd_device_enumerator *enumerator, MatchInitializedType type);
+int device_enumerator_add_match_parent_incremental(sd_device_enumerator *enumerator, sd_device *parent);
+int device_enumerator_add_prioritized_subsystem(sd_device_enumerator *enumerator, const char *subsystem);
+sd_device *device_enumerator_get_first(sd_device_enumerator *enumerator);
+sd_device *device_enumerator_get_next(sd_device_enumerator *enumerator);
+sd_device **device_enumerator_get_devices(sd_device_enumerator *enumerator, size_t *ret_n_devices);
+
+#define FOREACH_DEVICE_AND_SUBSYSTEM(enumerator, device) \
+ for (device = device_enumerator_get_first(enumerator); \
+ device; \
+ device = device_enumerator_get_next(enumerator))
diff --git a/src/libsystemd/sd-device/device-enumerator.c b/src/libsystemd/sd-device/device-enumerator.c
new file mode 100644
index 0000000..15c5c42
--- /dev/null
+++ b/src/libsystemd/sd-device/device-enumerator.c
@@ -0,0 +1,1194 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <fcntl.h>
+#include <unistd.h>
+
+#include "sd-device.h"
+
+#include "alloc-util.h"
+#include "device-enumerator-private.h"
+#include "device-filter.h"
+#include "device-util.h"
+#include "dirent-util.h"
+#include "fd-util.h"
+#include "set.h"
+#include "sort-util.h"
+#include "string-util.h"
+#include "strv.h"
+
+typedef enum DeviceEnumerationType {
+ DEVICE_ENUMERATION_TYPE_DEVICES,
+ DEVICE_ENUMERATION_TYPE_SUBSYSTEMS,
+ DEVICE_ENUMERATION_TYPE_ALL,
+ _DEVICE_ENUMERATION_TYPE_MAX,
+ _DEVICE_ENUMERATION_TYPE_INVALID = -EINVAL,
+} DeviceEnumerationType;
+
+struct sd_device_enumerator {
+ unsigned n_ref;
+
+ DeviceEnumerationType type;
+ Hashmap *devices_by_syspath;
+ sd_device **devices;
+ size_t n_devices, current_device_index;
+ bool scan_uptodate;
+ bool sorted;
+
+ char **prioritized_subsystems;
+ Set *match_subsystem;
+ Set *nomatch_subsystem;
+ Hashmap *match_sysattr;
+ Hashmap *nomatch_sysattr;
+ Hashmap *match_property;
+ Hashmap *match_property_required;
+ Set *match_sysname;
+ Set *nomatch_sysname;
+ Set *match_tag;
+ Set *match_parent;
+ MatchInitializedType match_initialized;
+};
+
+_public_ int sd_device_enumerator_new(sd_device_enumerator **ret) {
+ _cleanup_(sd_device_enumerator_unrefp) sd_device_enumerator *enumerator = NULL;
+
+ assert(ret);
+
+ enumerator = new(sd_device_enumerator, 1);
+ if (!enumerator)
+ return -ENOMEM;
+
+ *enumerator = (sd_device_enumerator) {
+ .n_ref = 1,
+ .type = _DEVICE_ENUMERATION_TYPE_INVALID,
+ .match_initialized = MATCH_INITIALIZED_COMPAT,
+ };
+
+ *ret = TAKE_PTR(enumerator);
+
+ return 0;
+}
+
+static void device_unref_many(sd_device **devices, size_t n) {
+ assert(devices || n == 0);
+
+ for (size_t i = 0; i < n; i++)
+ sd_device_unref(devices[i]);
+}
+
+static void device_enumerator_unref_devices(sd_device_enumerator *enumerator) {
+ assert(enumerator);
+
+ hashmap_clear_with_destructor(enumerator->devices_by_syspath, sd_device_unref);
+ device_unref_many(enumerator->devices, enumerator->n_devices);
+ enumerator->devices = mfree(enumerator->devices);
+ enumerator->n_devices = 0;
+}
+
+static sd_device_enumerator *device_enumerator_free(sd_device_enumerator *enumerator) {
+ assert(enumerator);
+
+ device_enumerator_unref_devices(enumerator);
+
+ hashmap_free(enumerator->devices_by_syspath);
+ strv_free(enumerator->prioritized_subsystems);
+ set_free(enumerator->match_subsystem);
+ set_free(enumerator->nomatch_subsystem);
+ hashmap_free(enumerator->match_sysattr);
+ hashmap_free(enumerator->nomatch_sysattr);
+ hashmap_free(enumerator->match_property);
+ hashmap_free(enumerator->match_property_required);
+ set_free(enumerator->match_sysname);
+ set_free(enumerator->nomatch_sysname);
+ set_free(enumerator->match_tag);
+ set_free(enumerator->match_parent);
+
+ return mfree(enumerator);
+}
+
+DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(sd_device_enumerator, sd_device_enumerator, device_enumerator_free);
+
+int device_enumerator_add_prioritized_subsystem(sd_device_enumerator *enumerator, const char *subsystem) {
+ int r;
+
+ assert(enumerator);
+ assert(subsystem);
+
+ if (strv_contains(enumerator->prioritized_subsystems, subsystem))
+ return 0;
+
+ r = strv_extend(&enumerator->prioritized_subsystems, subsystem);
+ if (r < 0)
+ return r;
+
+ enumerator->scan_uptodate = false;
+
+ return 1;
+}
+
+_public_ int sd_device_enumerator_add_match_subsystem(sd_device_enumerator *enumerator, const char *subsystem, int match) {
+ Set **set;
+ int r;
+
+ assert_return(enumerator, -EINVAL);
+ assert_return(subsystem, -EINVAL);
+
+ if (match)
+ set = &enumerator->match_subsystem;
+ else
+ set = &enumerator->nomatch_subsystem;
+
+ r = set_put_strdup(set, subsystem);
+ if (r <= 0)
+ return r;
+
+ enumerator->scan_uptodate = false;
+
+ return 1;
+}
+
+_public_ int sd_device_enumerator_add_match_sysattr(sd_device_enumerator *enumerator, const char *sysattr, const char *value, int match) {
+ Hashmap **hashmap;
+ int r;
+
+ assert_return(enumerator, -EINVAL);
+ assert_return(sysattr, -EINVAL);
+
+ if (match)
+ hashmap = &enumerator->match_sysattr;
+ else
+ hashmap = &enumerator->nomatch_sysattr;
+
+ r = update_match_strv(hashmap, sysattr, value, /* clear_on_null = */ true);
+ if (r <= 0)
+ return r;
+
+ enumerator->scan_uptodate = false;
+
+ return 1;
+}
+
+_public_ int sd_device_enumerator_add_match_property(sd_device_enumerator *enumerator, const char *property, const char *value) {
+ int r;
+
+ assert_return(enumerator, -EINVAL);
+ assert_return(property, -EINVAL);
+
+ r = update_match_strv(&enumerator->match_property, property, value, /* clear_on_null = */ false);
+ if (r <= 0)
+ return r;
+
+ enumerator->scan_uptodate = false;
+
+ return 1;
+}
+
+_public_ int sd_device_enumerator_add_match_property_required(sd_device_enumerator *enumerator, const char *property, const char *value) {
+ int r;
+
+ assert_return(enumerator, -EINVAL);
+ assert_return(property, -EINVAL);
+
+ r = update_match_strv(&enumerator->match_property_required, property, value, /* clear_on_null = */ false);
+ if (r <= 0)
+ return r;
+
+ enumerator->scan_uptodate = false;
+
+ return 1;
+}
+
+static int device_enumerator_add_match_sysname(sd_device_enumerator *enumerator, const char *sysname, bool match) {
+ int r;
+
+ assert_return(enumerator, -EINVAL);
+ assert_return(sysname, -EINVAL);
+
+ r = set_put_strdup(match ? &enumerator->match_sysname : &enumerator->nomatch_sysname, sysname);
+ if (r <= 0)
+ return r;
+
+ enumerator->scan_uptodate = false;
+
+ return 1;
+}
+
+_public_ int sd_device_enumerator_add_match_sysname(sd_device_enumerator *enumerator, const char *sysname) {
+ return device_enumerator_add_match_sysname(enumerator, sysname, true);
+}
+
+_public_ int sd_device_enumerator_add_nomatch_sysname(sd_device_enumerator *enumerator, const char *sysname) {
+ return device_enumerator_add_match_sysname(enumerator, sysname, false);
+}
+
+_public_ int sd_device_enumerator_add_match_tag(sd_device_enumerator *enumerator, const char *tag) {
+ int r;
+
+ assert_return(enumerator, -EINVAL);
+ assert_return(tag, -EINVAL);
+
+ r = set_put_strdup(&enumerator->match_tag, tag);
+ if (r <= 0)
+ return r;
+
+ enumerator->scan_uptodate = false;
+
+ return 1;
+}
+
+int device_enumerator_add_match_parent_incremental(sd_device_enumerator *enumerator, sd_device *parent) {
+ const char *path;
+ int r;
+
+ assert(enumerator);
+ assert(parent);
+
+ r = sd_device_get_syspath(parent, &path);
+ if (r < 0)
+ return r;
+
+ r = set_put_strdup(&enumerator->match_parent, path);
+ if (r <= 0)
+ return r;
+
+ enumerator->scan_uptodate = false;
+
+ return 1;
+}
+
+_public_ int sd_device_enumerator_add_match_parent(sd_device_enumerator *enumerator, sd_device *parent) {
+ assert_return(enumerator, -EINVAL);
+ assert_return(parent, -EINVAL);
+
+ set_clear(enumerator->match_parent);
+
+ return device_enumerator_add_match_parent_incremental(enumerator, parent);
+}
+
+_public_ int sd_device_enumerator_allow_uninitialized(sd_device_enumerator *enumerator) {
+ assert_return(enumerator, -EINVAL);
+
+ enumerator->match_initialized = MATCH_INITIALIZED_ALL;
+
+ enumerator->scan_uptodate = false;
+
+ return 1;
+}
+
+int device_enumerator_add_match_is_initialized(sd_device_enumerator *enumerator, MatchInitializedType type) {
+ assert_return(enumerator, -EINVAL);
+ assert_return(type >= 0 && type < _MATCH_INITIALIZED_MAX, -EINVAL);
+
+ enumerator->match_initialized = type;
+
+ enumerator->scan_uptodate = false;
+
+ return 1;
+}
+
+static int sound_device_compare(const char *devpath_a, const char *devpath_b) {
+ const char *sound_a, *sound_b;
+ size_t prefix_len;
+
+ assert(devpath_a);
+ assert(devpath_b);
+
+ /* For sound cards the control device must be enumerated last to make sure it's the final
+ * device node that gets ACLs applied. Applications rely on this fact and use ACL changes on
+ * the control node as an indicator that the ACL change of the entire sound card completed. The
+ * kernel makes this guarantee when creating those devices, and hence we should too when
+ * enumerating them. */
+
+ sound_a = strstrafter(devpath_a, "/sound/card");
+ if (!sound_a)
+ return 0;
+
+ sound_a = strchr(devpath_a, '/');
+ if (!sound_a)
+ return 0;
+
+ prefix_len = sound_a - devpath_a;
+
+ if (!strneq(devpath_a, devpath_b, prefix_len))
+ return 0;
+
+ sound_b = devpath_b + prefix_len;
+
+ return CMP(!!startswith(sound_a, "/controlC"),
+ !!startswith(sound_b, "/controlC"));
+}
+
+static bool devpath_is_late_block(const char *devpath) {
+ assert(devpath);
+
+ return strstr(devpath, "/block/md") || strstr(devpath, "/block/dm-");
+}
+
+static int device_compare(sd_device * const *a, sd_device * const *b) {
+ const char *devpath_a, *devpath_b;
+ int r;
+
+ assert(a);
+ assert(b);
+ assert(*a);
+ assert(*b);
+
+ assert_se(sd_device_get_devpath(*(sd_device**) a, &devpath_a) >= 0);
+ assert_se(sd_device_get_devpath(*(sd_device**) b, &devpath_b) >= 0);
+
+ r = sound_device_compare(devpath_a, devpath_b);
+ if (r != 0)
+ return r;
+
+ /* md and dm devices are enumerated after all other devices */
+ r = CMP(devpath_is_late_block(devpath_a), devpath_is_late_block(devpath_b));
+ if (r != 0)
+ return r;
+
+ return path_compare(devpath_a, devpath_b);
+}
+
+static int enumerator_sort_devices(sd_device_enumerator *enumerator) {
+ size_t n_sorted = 0, n = 0;
+ sd_device **devices;
+ sd_device *device;
+ int r;
+
+ assert(enumerator);
+
+ if (enumerator->sorted)
+ return 0;
+
+ devices = new(sd_device*, hashmap_size(enumerator->devices_by_syspath));
+ if (!devices)
+ return -ENOMEM;
+
+ STRV_FOREACH(prioritized_subsystem, enumerator->prioritized_subsystems) {
+
+ for (;;) {
+ const char *syspath;
+ size_t m = n;
+
+ HASHMAP_FOREACH_KEY(device, syspath, enumerator->devices_by_syspath) {
+ _cleanup_free_ char *p = NULL;
+ const char *subsys;
+
+ if (sd_device_get_subsystem(device, &subsys) < 0)
+ continue;
+
+ if (!streq(subsys, *prioritized_subsystem))
+ continue;
+
+ devices[n++] = sd_device_ref(device);
+
+ for (;;) {
+ _cleanup_free_ char *q = NULL;
+
+ r = path_extract_directory(p ?: syspath, &q);
+ if (r == -EADDRNOTAVAIL)
+ break;
+ if (r < 0)
+ goto failed;
+
+ device = hashmap_get(enumerator->devices_by_syspath, q);
+ if (device)
+ devices[n++] = sd_device_ref(device);
+
+ free_and_replace(p, q);
+ }
+
+ break;
+ }
+
+ /* We cannot remove multiple entries in the loop HASHMAP_FOREACH_KEY() above. */
+ for (size_t i = m; i < n; i++) {
+ r = sd_device_get_syspath(devices[i], &syspath);
+ if (r < 0)
+ goto failed;
+
+ assert_se(hashmap_remove(enumerator->devices_by_syspath, syspath) == devices[i]);
+ sd_device_unref(devices[i]);
+ }
+
+ if (m == n)
+ break;
+ }
+
+ typesafe_qsort(devices + n_sorted, n - n_sorted, device_compare);
+ n_sorted = n;
+ }
+
+ HASHMAP_FOREACH(device, enumerator->devices_by_syspath)
+ devices[n++] = sd_device_ref(device);
+
+ /* Move all devices back to the hashmap. Otherwise, devices added by
+ * udev_enumerate_add_syspath() -> device_enumerator_add_device() may not be listed. */
+ for (size_t i = 0; i < n_sorted; i++) {
+ const char *syspath;
+
+ r = sd_device_get_syspath(devices[i], &syspath);
+ if (r < 0)
+ goto failed;
+
+ r = hashmap_put(enumerator->devices_by_syspath, syspath, devices[i]);
+ if (r < 0)
+ goto failed;
+ assert(r > 0);
+
+ sd_device_ref(devices[i]);
+ }
+
+ typesafe_qsort(devices + n_sorted, n - n_sorted, device_compare);
+
+ device_unref_many(enumerator->devices, enumerator->n_devices);
+
+ enumerator->n_devices = n;
+ free_and_replace(enumerator->devices, devices);
+
+ enumerator->sorted = true;
+ return 0;
+
+failed:
+ device_unref_many(devices, n);
+ free(devices);
+ return r;
+}
+
+int device_enumerator_add_device(sd_device_enumerator *enumerator, sd_device *device) {
+ const char *syspath;
+ int r;
+
+ assert_return(enumerator, -EINVAL);
+ assert_return(device, -EINVAL);
+
+ r = sd_device_get_syspath(device, &syspath);
+ if (r < 0)
+ return r;
+
+ r = hashmap_ensure_put(&enumerator->devices_by_syspath, &string_hash_ops, syspath, device);
+ if (IN_SET(r, -EEXIST, 0))
+ return 0;
+ if (r < 0)
+ return r;
+
+ sd_device_ref(device);
+
+ enumerator->sorted = false;
+ return 1;
+}
+
+static bool match_property(Hashmap *properties, sd_device *device, bool match_all) {
+ const char *property_pattern;
+ char * const *value_patterns;
+
+ assert(device);
+
+ /* Unlike device_match_sysattr(), this accepts device that has at least one matching property. */
+
+ if (hashmap_isempty(properties))
+ return true;
+
+ HASHMAP_FOREACH_KEY(value_patterns, property_pattern, properties) {
+ bool match = false;
+
+ FOREACH_DEVICE_PROPERTY(device, property, value) {
+ if (fnmatch(property_pattern, property, 0) != 0)
+ continue;
+
+ match = strv_fnmatch(value_patterns, value);
+ if (match) {
+ if (!match_all)
+ return true;
+
+ break;
+ }
+ }
+
+ if (!match && match_all)
+ return false;
+ }
+
+ return match_all;
+}
+
+static bool match_tag(sd_device_enumerator *enumerator, sd_device *device) {
+ const char *tag;
+
+ assert(enumerator);
+ assert(device);
+
+ SET_FOREACH(tag, enumerator->match_tag)
+ if (!sd_device_has_tag(device, tag))
+ return false;
+
+ return true;
+}
+
+static bool match_sysname(sd_device_enumerator *enumerator, const char *sysname) {
+ assert(enumerator);
+ assert(sysname);
+
+ return set_fnmatch(enumerator->match_sysname, enumerator->nomatch_sysname, sysname);
+}
+
+static int match_initialized(sd_device_enumerator *enumerator, sd_device *device) {
+ int r;
+
+ assert(enumerator);
+ assert(device);
+
+ if (enumerator->match_initialized == MATCH_INITIALIZED_ALL)
+ return true;
+
+ r = sd_device_get_is_initialized(device);
+ if (r == -ENOENT) /* this is necessarily racey, so ignore missing devices */
+ return false;
+ if (r < 0)
+ return r;
+
+ if (enumerator->match_initialized == MATCH_INITIALIZED_COMPAT) {
+ /* only devices that have no devnode/ifindex or have a db entry are accepted. */
+ if (r > 0)
+ return true;
+
+ if (sd_device_get_devnum(device, NULL) >= 0)
+ return false;
+
+ if (sd_device_get_ifindex(device, NULL) >= 0)
+ return false;
+
+ return true;
+ }
+
+ return (enumerator->match_initialized == MATCH_INITIALIZED_NO) == (r == 0);
+}
+
+static bool match_subsystem(sd_device_enumerator *enumerator, const char *subsystem) {
+ assert(enumerator);
+
+ if (!subsystem)
+ return false;
+
+ return set_fnmatch(enumerator->match_subsystem, enumerator->nomatch_subsystem, subsystem);
+}
+
+typedef enum MatchFlag {
+ MATCH_SYSNAME = 1u << 0,
+ MATCH_SUBSYSTEM = 1u << 1,
+ MATCH_PARENT = 1u << 2,
+ MATCH_TAG = 1u << 3,
+
+ MATCH_ALL = (1u << 4) - 1,
+} MatchFlag;
+
+static int test_matches(
+ sd_device_enumerator *enumerator,
+ sd_device *device,
+ MatchFlag flags) {
+
+ int r;
+
+ assert(enumerator);
+ assert(device);
+
+ if (FLAGS_SET(flags, MATCH_SYSNAME)) {
+ const char *sysname;
+
+ r = sd_device_get_sysname(device, &sysname);
+ if (r < 0)
+ return r;
+
+ if (!match_sysname(enumerator, sysname))
+ return false;
+ }
+
+ if (FLAGS_SET(flags, MATCH_SUBSYSTEM)) {
+ const char *subsystem;
+
+ r = sd_device_get_subsystem(device, &subsystem);
+ if (r == -ENOENT)
+ return false;
+ if (r < 0)
+ return r;
+
+ if (!match_subsystem(enumerator, subsystem))
+ return false;
+ }
+
+ if (FLAGS_SET(flags, MATCH_PARENT) &&
+ !device_match_parent(device, enumerator->match_parent, NULL))
+ return false;
+
+ if (FLAGS_SET(flags, MATCH_TAG) &&
+ !match_tag(enumerator, device))
+ return false;
+
+ r = match_initialized(enumerator, device);
+ if (r <= 0)
+ return r;
+
+ if (!match_property(enumerator->match_property, device, /* match_all = */ false))
+ return false;
+
+ if (!match_property(enumerator->match_property_required, device, /* match_all = */ true))
+ return false;
+
+ if (!device_match_sysattr(device, enumerator->match_sysattr, enumerator->nomatch_sysattr))
+ return false;
+
+ return true;
+}
+
+static int enumerator_add_parent_devices(
+ sd_device_enumerator *enumerator,
+ sd_device *device,
+ MatchFlag flags) {
+
+ int r;
+
+ assert(enumerator);
+ assert(device);
+
+ for (;;) {
+ r = sd_device_get_parent(device, &device);
+ if (r == -ENOENT) /* Reached the top? */
+ return 0;
+ if (r < 0)
+ return r;
+
+ r = test_matches(enumerator, device, flags);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ continue;
+
+ r = device_enumerator_add_device(enumerator, device);
+ if (r < 0)
+ return r;
+ if (r == 0) /* Exists already? Then no need to go further up. */
+ return 0;
+ }
+}
+
+int device_enumerator_add_parent_devices(sd_device_enumerator *enumerator, sd_device *device) {
+ return enumerator_add_parent_devices(enumerator, device, MATCH_ALL & (~MATCH_PARENT));
+}
+
+static bool relevant_sysfs_subdir(const struct dirent *de) {
+ assert(de);
+
+ if (de->d_name[0] == '.')
+ return false;
+
+ /* Also filter out regular files and such, i.e. stuff that definitely isn't a kobject path. (Note
+ * that we rely on the fact that sysfs fills in d_type here, i.e. doesn't do DT_UNKNOWN) */
+ return IN_SET(de->d_type, DT_DIR, DT_LNK);
+}
+
+static int enumerator_scan_dir_and_add_devices(
+ sd_device_enumerator *enumerator,
+ const char *basedir,
+ const char *subdir1,
+ const char *subdir2) {
+
+ _cleanup_closedir_ DIR *dir = NULL;
+ char *path;
+ int k, r = 0;
+
+ assert(enumerator);
+ assert(basedir);
+
+ path = strjoina("/sys/", basedir, "/");
+
+ if (subdir1)
+ path = strjoina(path, subdir1, "/");
+
+ if (subdir2)
+ path = strjoina(path, subdir2, "/");
+
+ dir = opendir(path);
+ if (!dir) {
+ bool ignore = errno == ENOENT;
+
+ /* this is necessarily racey, so ignore missing directories */
+ log_debug_errno(errno,
+ "sd-device-enumerator: Failed to open directory %s%s: %m",
+ path, ignore ? ", ignoring" : "");
+ return ignore ? 0 : -errno;
+ }
+
+ FOREACH_DIRENT_ALL(de, dir, return -errno) {
+ _cleanup_(sd_device_unrefp) sd_device *device = NULL;
+ char syspath[strlen(path) + 1 + strlen(de->d_name) + 1];
+
+ if (!relevant_sysfs_subdir(de))
+ continue;
+
+ if (!match_sysname(enumerator, de->d_name))
+ continue;
+
+ (void) sprintf(syspath, "%s%s", path, de->d_name);
+
+ k = sd_device_new_from_syspath(&device, syspath);
+ if (k < 0) {
+ if (k != -ENODEV)
+ /* this is necessarily racey, so ignore missing devices */
+ r = k;
+
+ continue;
+ }
+
+ k = test_matches(enumerator, device, MATCH_ALL & (~MATCH_SYSNAME)); /* sysname is already tested. */
+ if (k <= 0) {
+ if (k < 0)
+ r = k;
+ continue;
+ }
+
+ k = device_enumerator_add_device(enumerator, device);
+ if (k < 0)
+ r = k;
+
+ /* Also include all potentially matching parent devices in the enumeration. These are things
+ * like root busses — e.g. /sys/devices/pci0000:00/ or /sys/devices/pnp0/, which ar not
+ * linked from /sys/class/ or /sys/bus/, hence pick them up explicitly here. */
+ k = enumerator_add_parent_devices(enumerator, device, MATCH_ALL);
+ if (k < 0)
+ r = k;
+ }
+
+ return r;
+}
+
+static int enumerator_scan_dir(
+ sd_device_enumerator *enumerator,
+ const char *basedir,
+ const char *subdir,
+ const char *subsystem) {
+
+ _cleanup_closedir_ DIR *dir = NULL;
+ char *path;
+ int r = 0;
+
+ path = strjoina("/sys/", basedir);
+
+ dir = opendir(path);
+ if (!dir) {
+ bool ignore = errno == ENOENT;
+
+ log_debug_errno(errno,
+ "sd-device-enumerator: Failed to open directory %s%s: %m",
+ path, ignore ? ", ignoring" : "");
+ return ignore ? 0 : -errno;
+ }
+
+ FOREACH_DIRENT_ALL(de, dir, return -errno) {
+ int k;
+
+ if (!relevant_sysfs_subdir(de))
+ continue;
+
+ if (!match_subsystem(enumerator, subsystem ?: de->d_name))
+ continue;
+
+ k = enumerator_scan_dir_and_add_devices(enumerator, basedir, de->d_name, subdir);
+ if (k < 0)
+ r = k;
+ }
+
+ return r;
+}
+
+static int enumerator_scan_devices_tag(sd_device_enumerator *enumerator, const char *tag) {
+ _cleanup_closedir_ DIR *dir = NULL;
+ char *path;
+ int r = 0;
+
+ assert(enumerator);
+ assert(tag);
+
+ path = strjoina("/run/udev/tags/", tag);
+
+ dir = opendir(path);
+ if (!dir) {
+ bool ignore = errno == ENOENT;
+
+ log_debug_errno(errno,
+ "sd-device-enumerator: Failed to open directory %s%s: %m",
+ path, ignore ? ", ignoring" : "");
+ return ignore ? 0 : -errno;
+ }
+
+ /* TODO: filter away subsystems? */
+
+ FOREACH_DIRENT_ALL(de, dir, return -errno) {
+ _cleanup_(sd_device_unrefp) sd_device *device = NULL;
+ int k;
+
+ if (de->d_name[0] == '.')
+ continue;
+
+ k = sd_device_new_from_device_id(&device, de->d_name);
+ if (k < 0) {
+ if (k != -ENODEV)
+ /* this is necessarily racy, so ignore missing devices */
+ r = k;
+
+ continue;
+ }
+
+ /* Generated from tag, hence not necessary to check tag again. */
+ k = test_matches(enumerator, device, MATCH_ALL & (~MATCH_TAG));
+ if (k < 0)
+ r = k;
+ if (k <= 0)
+ continue;
+
+ k = device_enumerator_add_device(enumerator, device);
+ if (k < 0) {
+ r = k;
+ continue;
+ }
+ }
+
+ return r;
+}
+
+static int enumerator_scan_devices_tags(sd_device_enumerator *enumerator) {
+ const char *tag;
+ int r = 0;
+
+ assert(enumerator);
+
+ SET_FOREACH(tag, enumerator->match_tag) {
+ int k;
+
+ k = enumerator_scan_devices_tag(enumerator, tag);
+ if (k < 0)
+ r = k;
+ }
+
+ return r;
+}
+
+static int parent_add_child(sd_device_enumerator *enumerator, const char *path, MatchFlag flags) {
+ _cleanup_(sd_device_unrefp) sd_device *device = NULL;
+ int r;
+
+ r = sd_device_new_from_syspath(&device, path);
+ if (r == -ENODEV)
+ /* this is necessarily racy, so ignore missing devices */
+ return 0;
+ else if (r < 0)
+ return r;
+
+ r = test_matches(enumerator, device, flags);
+ if (r <= 0)
+ return r;
+
+ return device_enumerator_add_device(enumerator, device);
+}
+
+static int parent_crawl_children(sd_device_enumerator *enumerator, const char *path, Set **stack) {
+ _cleanup_closedir_ DIR *dir = NULL;
+ int r = 0;
+
+ assert(enumerator);
+ assert(path);
+ assert(stack);
+
+ dir = opendir(path);
+ if (!dir) {
+ bool ignore = errno == ENOENT;
+
+ log_debug_errno(errno,
+ "sd-device-enumerator: Failed to open directory %s%s: %m",
+ path, ignore ? ", ignoring" : "");
+ return ignore ? 0 : -errno;
+ }
+
+ FOREACH_DIRENT_ALL(de, dir, return -errno) {
+ _cleanup_free_ char *child = NULL;
+ int k;
+
+ if (de->d_name[0] == '.')
+ continue;
+
+ if (de->d_type != DT_DIR)
+ continue;
+
+ child = path_join(path, de->d_name);
+ if (!child)
+ return -ENOMEM;
+
+ /* Let's check sysname filter earlier. The other tests require the sd-device object created
+ * from the path, thus much costly. */
+ if (match_sysname(enumerator, de->d_name)) {
+ k = parent_add_child(enumerator, child, MATCH_ALL & (~(MATCH_SYSNAME|MATCH_PARENT)));
+ if (k < 0)
+ r = k;
+ }
+
+ k = set_ensure_consume(stack, &path_hash_ops_free, TAKE_PTR(child));
+ if (k < 0)
+ r = k;
+ }
+
+ return r;
+}
+
+static int enumerator_scan_devices_children(sd_device_enumerator *enumerator) {
+ _cleanup_set_free_ Set *stack = NULL;
+ const char *path;
+ int r = 0, k;
+
+ assert(enumerator);
+
+ SET_FOREACH(path, enumerator->match_parent) {
+ k = parent_add_child(enumerator, path, MATCH_ALL & (~MATCH_PARENT));
+ if (k < 0)
+ r = k;
+
+ k = parent_crawl_children(enumerator, path, &stack);
+ if (k < 0)
+ r = k;
+ }
+
+ for (;;) {
+ _cleanup_free_ char *p = NULL;
+
+ p = set_steal_first(stack);
+ if (!p)
+ return r;
+
+ k = parent_crawl_children(enumerator, p, &stack);
+ if (k < 0)
+ r = k;
+ }
+}
+
+static int enumerator_scan_devices_all(sd_device_enumerator *enumerator) {
+ int k, r = 0;
+
+ k = enumerator_scan_dir(enumerator, "bus", "devices", NULL);
+ if (k < 0)
+ r = log_debug_errno(k, "sd-device-enumerator: Failed to scan /sys/bus: %m");
+
+ k = enumerator_scan_dir(enumerator, "class", NULL, NULL);
+ if (k < 0)
+ r = log_debug_errno(k, "sd-device-enumerator: Failed to scan /sys/class: %m");
+
+ return r;
+}
+
+int device_enumerator_scan_devices(sd_device_enumerator *enumerator) {
+ int r = 0, k;
+
+ assert(enumerator);
+
+ if (enumerator->scan_uptodate &&
+ enumerator->type == DEVICE_ENUMERATION_TYPE_DEVICES)
+ return 0;
+
+ device_enumerator_unref_devices(enumerator);
+
+ if (!set_isempty(enumerator->match_tag)) {
+ k = enumerator_scan_devices_tags(enumerator);
+ if (k < 0)
+ r = k;
+ } else if (enumerator->match_parent) {
+ k = enumerator_scan_devices_children(enumerator);
+ if (k < 0)
+ r = k;
+ } else {
+ k = enumerator_scan_devices_all(enumerator);
+ if (k < 0)
+ r = k;
+ }
+
+ enumerator->scan_uptodate = true;
+ enumerator->type = DEVICE_ENUMERATION_TYPE_DEVICES;
+
+ return r;
+}
+
+_public_ sd_device *sd_device_enumerator_get_device_first(sd_device_enumerator *enumerator) {
+ assert_return(enumerator, NULL);
+
+ if (device_enumerator_scan_devices(enumerator) < 0)
+ return NULL;
+
+ if (enumerator_sort_devices(enumerator) < 0)
+ return NULL;
+
+ enumerator->current_device_index = 0;
+
+ if (enumerator->n_devices == 0)
+ return NULL;
+
+ return enumerator->devices[0];
+}
+
+_public_ sd_device *sd_device_enumerator_get_device_next(sd_device_enumerator *enumerator) {
+ assert_return(enumerator, NULL);
+
+ if (!enumerator->scan_uptodate ||
+ !enumerator->sorted ||
+ enumerator->type != DEVICE_ENUMERATION_TYPE_DEVICES ||
+ enumerator->current_device_index + 1 >= enumerator->n_devices)
+ return NULL;
+
+ return enumerator->devices[++enumerator->current_device_index];
+}
+
+int device_enumerator_scan_subsystems(sd_device_enumerator *enumerator) {
+ int r = 0, k;
+
+ assert(enumerator);
+
+ if (enumerator->scan_uptodate &&
+ enumerator->type == DEVICE_ENUMERATION_TYPE_SUBSYSTEMS)
+ return 0;
+
+ device_enumerator_unref_devices(enumerator);
+
+ /* modules */
+ if (match_subsystem(enumerator, "module")) {
+ k = enumerator_scan_dir_and_add_devices(enumerator, "module", NULL, NULL);
+ if (k < 0)
+ r = log_debug_errno(k, "sd-device-enumerator: Failed to scan modules: %m");
+ }
+
+ /* subsystems (only buses support coldplug) */
+ if (match_subsystem(enumerator, "subsystem")) {
+ k = enumerator_scan_dir_and_add_devices(enumerator, "bus", NULL, NULL);
+ if (k < 0)
+ r = log_debug_errno(k, "sd-device-enumerator: Failed to scan subsystems: %m");
+ }
+
+ /* subsystem drivers */
+ if (match_subsystem(enumerator, "drivers")) {
+ k = enumerator_scan_dir(enumerator, "bus", "drivers", "drivers");
+ if (k < 0)
+ r = log_debug_errno(k, "sd-device-enumerator: Failed to scan drivers: %m");
+ }
+
+ enumerator->scan_uptodate = true;
+ enumerator->type = DEVICE_ENUMERATION_TYPE_SUBSYSTEMS;
+
+ return r;
+}
+
+_public_ sd_device *sd_device_enumerator_get_subsystem_first(sd_device_enumerator *enumerator) {
+ assert_return(enumerator, NULL);
+
+ if (device_enumerator_scan_subsystems(enumerator) < 0)
+ return NULL;
+
+ if (enumerator_sort_devices(enumerator) < 0)
+ return NULL;
+
+ enumerator->current_device_index = 0;
+
+ if (enumerator->n_devices == 0)
+ return NULL;
+
+ return enumerator->devices[0];
+}
+
+_public_ sd_device *sd_device_enumerator_get_subsystem_next(sd_device_enumerator *enumerator) {
+ assert_return(enumerator, NULL);
+
+ if (!enumerator->scan_uptodate ||
+ !enumerator->sorted ||
+ enumerator->type != DEVICE_ENUMERATION_TYPE_SUBSYSTEMS ||
+ enumerator->current_device_index + 1 >= enumerator->n_devices)
+ return NULL;
+
+ return enumerator->devices[++enumerator->current_device_index];
+}
+
+int device_enumerator_scan_devices_and_subsystems(sd_device_enumerator *enumerator) {
+ int r;
+
+ assert(enumerator);
+
+ if (enumerator->scan_uptodate &&
+ enumerator->type == DEVICE_ENUMERATION_TYPE_ALL)
+ return 0;
+
+ device_enumerator_unref_devices(enumerator);
+
+ if (!set_isempty(enumerator->match_tag))
+ r = enumerator_scan_devices_tags(enumerator);
+ else if (enumerator->match_parent)
+ r = enumerator_scan_devices_children(enumerator);
+ else {
+ int k;
+
+ r = enumerator_scan_devices_all(enumerator);
+
+ if (match_subsystem(enumerator, "module")) {
+ k = enumerator_scan_dir_and_add_devices(enumerator, "module", NULL, NULL);
+ if (k < 0)
+ r = log_debug_errno(k, "sd-device-enumerator: Failed to scan modules: %m");
+ }
+ if (match_subsystem(enumerator, "subsystem")) {
+ k = enumerator_scan_dir_and_add_devices(enumerator, "bus", NULL, NULL);
+ if (k < 0)
+ r = log_debug_errno(k, "sd-device-enumerator: Failed to scan subsystems: %m");
+ }
+
+ if (match_subsystem(enumerator, "drivers")) {
+ k = enumerator_scan_dir(enumerator, "bus", "drivers", "drivers");
+ if (k < 0)
+ r = log_debug_errno(k, "sd-device-enumerator: Failed to scan drivers: %m");
+ }
+ }
+
+ enumerator->scan_uptodate = true;
+ enumerator->type = DEVICE_ENUMERATION_TYPE_ALL;
+
+ return r;
+}
+
+sd_device *device_enumerator_get_first(sd_device_enumerator *enumerator) {
+ assert_return(enumerator, NULL);
+
+ if (!enumerator->scan_uptodate)
+ return NULL;
+
+ if (enumerator_sort_devices(enumerator) < 0)
+ return NULL;
+
+ enumerator->current_device_index = 0;
+
+ if (enumerator->n_devices == 0)
+ return NULL;
+
+ return enumerator->devices[0];
+}
+
+sd_device *device_enumerator_get_next(sd_device_enumerator *enumerator) {
+ assert_return(enumerator, NULL);
+
+ if (!enumerator->scan_uptodate ||
+ !enumerator->sorted ||
+ enumerator->current_device_index + 1 >= enumerator->n_devices)
+ return NULL;
+
+ return enumerator->devices[++enumerator->current_device_index];
+}
+
+sd_device **device_enumerator_get_devices(sd_device_enumerator *enumerator, size_t *ret_n_devices) {
+ assert(enumerator);
+ assert(ret_n_devices);
+
+ if (!enumerator->scan_uptodate)
+ return NULL;
+
+ if (enumerator_sort_devices(enumerator) < 0)
+ return NULL;
+
+ *ret_n_devices = enumerator->n_devices;
+ return enumerator->devices;
+}
diff --git a/src/libsystemd/sd-device/device-filter.c b/src/libsystemd/sd-device/device-filter.c
new file mode 100644
index 0000000..4101e7d
--- /dev/null
+++ b/src/libsystemd/sd-device/device-filter.c
@@ -0,0 +1,115 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <fnmatch.h>
+
+#include "device-filter.h"
+#include "path-util.h"
+
+int update_match_strv(Hashmap **match_strv, const char *key, const char *value, bool clear_on_null) {
+ char **strv;
+ int r;
+
+ assert(match_strv);
+ assert(key);
+
+ strv = hashmap_get(*match_strv, key);
+ if (strv) {
+ if (!value) {
+ char **v;
+
+ if (strv_isempty(strv) || !clear_on_null)
+ return 0;
+
+ /* Accept all value. Clear previous assignment. */
+
+ v = new0(char*, 1);
+ if (!v)
+ return -ENOMEM;
+
+ strv_free_and_replace(strv, v);
+ } else {
+ if (strv_contains(strv, value))
+ return 0;
+
+ r = strv_extend(&strv, value);
+ if (r < 0)
+ return r;
+ }
+
+ r = hashmap_update(*match_strv, key, strv);
+ if (r < 0)
+ return r;
+
+ } else {
+ _cleanup_strv_free_ char **strv_alloc = NULL;
+ _cleanup_free_ char *key_alloc = NULL;
+
+ key_alloc = strdup(key);
+ if (!key_alloc)
+ return -ENOMEM;
+
+ strv_alloc = strv_new(value);
+ if (!strv_alloc)
+ return -ENOMEM;
+
+ r = hashmap_ensure_put(match_strv, &string_hash_ops_free_strv_free, key_alloc, strv_alloc);
+ if (r < 0)
+ return r;
+
+ TAKE_PTR(key_alloc);
+ TAKE_PTR(strv_alloc);
+ }
+
+ return 1;
+}
+
+static bool device_match_sysattr_value(sd_device *device, const char *sysattr, char * const *patterns) {
+ const char *value;
+
+ assert(device);
+ assert(sysattr);
+
+ if (sd_device_get_sysattr_value(device, sysattr, &value) < 0)
+ return false;
+
+ return strv_fnmatch_or_empty(patterns, value, 0);
+}
+
+bool device_match_sysattr(sd_device *device, Hashmap *match_sysattr, Hashmap *nomatch_sysattr) {
+ char * const *patterns;
+ const char *sysattr;
+
+ assert(device);
+
+ HASHMAP_FOREACH_KEY(patterns, sysattr, match_sysattr)
+ if (!device_match_sysattr_value(device, sysattr, patterns))
+ return false;
+
+ HASHMAP_FOREACH_KEY(patterns, sysattr, nomatch_sysattr)
+ if (device_match_sysattr_value(device, sysattr, patterns))
+ return false;
+
+ return true;
+}
+
+bool device_match_parent(sd_device *device, Set *match_parent, Set *nomatch_parent) {
+ const char *syspath_parent, *syspath;
+
+ assert(device);
+
+ if (sd_device_get_syspath(device, &syspath) < 0)
+ return false;
+
+ SET_FOREACH(syspath_parent, nomatch_parent)
+ if (path_startswith(syspath, syspath_parent))
+ return false;
+
+ if (set_isempty(match_parent))
+ return true;
+
+ SET_FOREACH(syspath_parent, match_parent)
+ if (path_startswith(syspath, syspath_parent))
+ return true;
+
+ return false;
+}
diff --git a/src/libsystemd/sd-device/device-filter.h b/src/libsystemd/sd-device/device-filter.h
new file mode 100644
index 0000000..0c5f34e
--- /dev/null
+++ b/src/libsystemd/sd-device/device-filter.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+
+#include "sd-device.h"
+
+#include "hashmap.h"
+#include "set.h"
+
+int update_match_strv(Hashmap **match_strv, const char *key, const char *value, bool clear_on_null);
+bool device_match_sysattr(sd_device *device, Hashmap *match_sysattr, Hashmap *nomatch_sysattr);
+bool device_match_parent(sd_device *device, Set *match_parent, Set *nomatch_parent);
diff --git a/src/libsystemd/sd-device/device-internal.h b/src/libsystemd/sd-device/device-internal.h
new file mode 100644
index 0000000..a465eb2
--- /dev/null
+++ b/src/libsystemd/sd-device/device-internal.h
@@ -0,0 +1,117 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "sd-device.h"
+
+#include "device-private.h"
+#include "hashmap.h"
+#include "set.h"
+#include "time-util.h"
+
+#define LATEST_UDEV_DATABASE_VERSION 1
+
+struct sd_device {
+ unsigned n_ref;
+
+ /* The database version indicates the supported features by the udev database.
+ * This is saved and parsed in V field.
+ *
+ * 0: None of the following features are supported (systemd version <= 246).
+ * 1: The current tags (Q) and the database version (V) features are implemented (>= 247).
+ */
+ unsigned database_version;
+
+ sd_device *parent;
+
+ OrderedHashmap *properties;
+ Iterator properties_iterator;
+ uint64_t properties_generation; /* changes whenever the properties are changed */
+ uint64_t properties_iterator_generation; /* generation when iteration was started */
+
+ /* the subset of the properties that should be written to the db */
+ OrderedHashmap *properties_db;
+
+ Hashmap *sysattr_values; /* cached sysattr values */
+
+ Set *sysattrs; /* names of sysattrs */
+ Iterator sysattrs_iterator;
+
+ Set *all_tags, *current_tags;
+ Iterator all_tags_iterator, current_tags_iterator;
+ uint64_t all_tags_iterator_generation, current_tags_iterator_generation; /* generation when iteration was started */
+ uint64_t tags_generation; /* changes whenever the tags are changed */
+
+ Set *devlinks;
+ Iterator devlinks_iterator;
+ uint64_t devlinks_generation; /* changes whenever the devlinks are changed */
+ uint64_t devlinks_iterator_generation; /* generation when iteration was started */
+ int devlink_priority;
+
+ Hashmap *children;
+ Iterator children_iterator;
+ bool children_enumerated;
+
+ int ifindex;
+ char *devtype;
+ char *devname;
+ dev_t devnum;
+
+ char **properties_strv; /* the properties hashmap as a strv */
+ char *properties_nulstr; /* the same as a nulstr */
+ size_t properties_nulstr_len;
+
+ char *syspath;
+ const char *devpath;
+ const char *sysnum;
+ char *sysname;
+
+ char *subsystem;
+ char *driver_subsystem; /* only set for the 'drivers' subsystem */
+ char *driver;
+
+ char *device_id;
+
+ usec_t usec_initialized;
+
+ mode_t devmode;
+ uid_t devuid;
+ gid_t devgid;
+
+ uint64_t diskseq; /* Block device sequence number, monothonically incremented by the kernel on create/attach */
+
+ /* only set when device is passed through netlink */
+ sd_device_action_t action;
+ uint64_t seqnum;
+
+ bool parent_set:1; /* no need to try to reload parent */
+ bool sysattrs_read:1; /* don't try to re-read sysattrs once read */
+ bool property_tags_outdated:1; /* need to update TAGS= or CURRENT_TAGS= property */
+ bool property_devlinks_outdated:1; /* need to update DEVLINKS= property */
+ bool properties_buf_outdated:1; /* need to reread hashmap */
+ bool subsystem_set:1; /* don't reread subsystem */
+ bool driver_set:1; /* don't reread driver */
+ bool uevent_loaded:1; /* don't reread uevent */
+ bool db_loaded; /* don't reread db */
+
+ bool is_initialized:1;
+ bool sealed:1; /* don't read more information from uevent/db */
+ bool db_persist:1; /* don't clean up the db when switching from initrd to real root */
+};
+
+int device_new_aux(sd_device **ret);
+int device_add_property_aux(sd_device *device, const char *key, const char *value, bool db);
+static inline int device_add_property_internal(sd_device *device, const char *key, const char *value) {
+ return device_add_property_aux(device, key, value, false);
+}
+
+int device_set_syspath(sd_device *device, const char *_syspath, bool verify);
+int device_set_ifindex(sd_device *device, const char *ifindex);
+int device_set_devmode(sd_device *device, const char *devmode);
+int device_set_devname(sd_device *device, const char *devname);
+int device_set_devtype(sd_device *device, const char *devtype);
+int device_set_devnum(sd_device *device, const char *major, const char *minor);
+int device_set_subsystem(sd_device *device, const char *subsystem);
+int device_set_diskseq(sd_device *device, const char *str);
+int device_set_drivers_subsystem(sd_device *device);
+int device_set_driver(sd_device *device, const char *driver);
+int device_set_usec_initialized(sd_device *device, usec_t when);
diff --git a/src/libsystemd/sd-device/device-monitor-private.h b/src/libsystemd/sd-device/device-monitor-private.h
new file mode 100644
index 0000000..33e2714
--- /dev/null
+++ b/src/libsystemd/sd-device/device-monitor-private.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <errno.h>
+
+#include "sd-device.h"
+
+typedef enum MonitorNetlinkGroup {
+ MONITOR_GROUP_NONE,
+ MONITOR_GROUP_KERNEL,
+ MONITOR_GROUP_UDEV,
+ _MONITOR_NETLINK_GROUP_MAX,
+ _MONITOR_NETLINK_GROUP_INVALID = -EINVAL,
+} MonitorNetlinkGroup;
+
+int device_monitor_new_full(sd_device_monitor **ret, MonitorNetlinkGroup group, int fd);
+int device_monitor_disconnect(sd_device_monitor *m);
+int device_monitor_allow_unicast_sender(sd_device_monitor *m, sd_device_monitor *sender);
+int device_monitor_enable_receiving(sd_device_monitor *m);
+int device_monitor_get_fd(sd_device_monitor *m);
+int device_monitor_send_device(sd_device_monitor *m, sd_device_monitor *destination, sd_device *device);
+int device_monitor_receive_device(sd_device_monitor *m, sd_device **ret);
diff --git a/src/libsystemd/sd-device/device-monitor.c b/src/libsystemd/sd-device/device-monitor.c
new file mode 100644
index 0000000..bb4f9bd
--- /dev/null
+++ b/src/libsystemd/sd-device/device-monitor.c
@@ -0,0 +1,929 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <linux/filter.h>
+#include <linux/netlink.h>
+#include <linux/sockios.h>
+#include <sys/ioctl.h>
+#include <unistd.h>
+
+#include "sd-device.h"
+#include "sd-event.h"
+
+#include "MurmurHash2.h"
+#include "alloc-util.h"
+#include "device-filter.h"
+#include "device-monitor-private.h"
+#include "device-private.h"
+#include "device-util.h"
+#include "errno-util.h"
+#include "fd-util.h"
+#include "format-util.h"
+#include "hashmap.h"
+#include "iovec-util.h"
+#include "missing_socket.h"
+#include "mountpoint-util.h"
+#include "set.h"
+#include "socket-util.h"
+#include "stat-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "uid-range.h"
+
+#define log_monitor(m, format, ...) \
+ log_debug("sd-device-monitor(%s): " format, strna(m ? m->description : NULL), ##__VA_ARGS__)
+#define log_monitor_errno(m, r, format, ...) \
+ log_debug_errno(r, "sd-device-monitor(%s): " format, strna(m ? m->description : NULL), ##__VA_ARGS__)
+#define log_device_monitor(d, m, format, ...) \
+ log_device_debug(d, "sd-device-monitor(%s): " format, strna(m ? m->description : NULL), ##__VA_ARGS__)
+#define log_device_monitor_errno(d, m, r, format, ...) \
+ log_device_debug_errno(d, r, "sd-device-monitor(%s): " format, strna(m ? m->description : NULL), ##__VA_ARGS__)
+
+struct sd_device_monitor {
+ unsigned n_ref;
+
+ int sock;
+ union sockaddr_union snl;
+ union sockaddr_union snl_trusted_sender;
+ bool bound;
+
+ UidRange *mapped_userns_uid_range;
+
+ Hashmap *subsystem_filter;
+ Set *tag_filter;
+ Hashmap *match_sysattr_filter;
+ Hashmap *nomatch_sysattr_filter;
+ Set *match_parent_filter;
+ Set *nomatch_parent_filter;
+ bool filter_uptodate;
+
+ sd_event *event;
+ sd_event_source *event_source;
+ char *description;
+ sd_device_monitor_handler_t callback;
+ void *userdata;
+};
+
+#define UDEV_MONITOR_MAGIC 0xfeedcafe
+
+typedef struct monitor_netlink_header {
+ /* "libudev" prefix to distinguish libudev and kernel messages */
+ char prefix[8];
+ /* Magic to protect against daemon <-> Library message format mismatch
+ * Used in the kernel from socket filter rules; needs to be stored in network order */
+ unsigned magic;
+ /* Total length of header structure known to the sender */
+ unsigned header_size;
+ /* Properties string buffer */
+ unsigned properties_off;
+ unsigned properties_len;
+ /* Hashes of primary device properties strings, to let libudev subscribers
+ * use in-kernel socket filters; values need to be stored in network order */
+ unsigned filter_subsystem_hash;
+ unsigned filter_devtype_hash;
+ unsigned filter_tag_bloom_hi;
+ unsigned filter_tag_bloom_lo;
+} monitor_netlink_header;
+
+static int monitor_set_nl_address(sd_device_monitor *m) {
+ union sockaddr_union snl;
+ socklen_t addrlen;
+
+ assert(m);
+
+ /* Get the address the kernel has assigned us.
+ * It is usually, but not necessarily the pid. */
+ addrlen = sizeof(struct sockaddr_nl);
+ if (getsockname(m->sock, &snl.sa, &addrlen) < 0)
+ return -errno;
+
+ m->snl.nl.nl_pid = snl.nl.nl_pid;
+ return 0;
+}
+
+int device_monitor_allow_unicast_sender(sd_device_monitor *m, sd_device_monitor *sender) {
+ assert(m);
+ assert(sender);
+
+ m->snl_trusted_sender.nl.nl_pid = sender->snl.nl.nl_pid;
+ return 0;
+}
+
+_public_ int sd_device_monitor_set_receive_buffer_size(sd_device_monitor *m, size_t size) {
+ assert_return(m, -EINVAL);
+
+ return fd_set_rcvbuf(m->sock, size, false);
+}
+
+int device_monitor_disconnect(sd_device_monitor *m) {
+ assert(m);
+
+ m->sock = safe_close(m->sock);
+ return 0;
+}
+
+int device_monitor_get_fd(sd_device_monitor *m) {
+ assert(m);
+
+ return m->sock;
+}
+
+int device_monitor_new_full(sd_device_monitor **ret, MonitorNetlinkGroup group, int fd) {
+ _cleanup_(sd_device_monitor_unrefp) sd_device_monitor *m = NULL;
+ _cleanup_close_ int sock = -EBADF;
+ int r;
+
+ assert(group >= 0 && group < _MONITOR_NETLINK_GROUP_MAX);
+ assert_return(ret, -EINVAL);
+
+ if (group == MONITOR_GROUP_UDEV &&
+ access("/run/udev/control", F_OK) < 0 &&
+ dev_is_devtmpfs() <= 0) {
+
+ /*
+ * We do not support subscribing to uevents if no instance of
+ * udev is running. Uevents would otherwise broadcast the
+ * processing data of the host into containers, which is not
+ * desired.
+ *
+ * Containers will currently not get any udev uevents, until
+ * a supporting infrastructure is available.
+ *
+ * We do not set a netlink multicast group here, so the socket
+ * will not receive any messages.
+ */
+
+ log_monitor(m, "The udev service seems not to be active, disabling the monitor.");
+ group = MONITOR_GROUP_NONE;
+ }
+
+ if (fd < 0) {
+ sock = socket(AF_NETLINK, SOCK_RAW|SOCK_CLOEXEC|SOCK_NONBLOCK, NETLINK_KOBJECT_UEVENT);
+ if (sock < 0)
+ return log_monitor_errno(m, errno, "Failed to create socket: %m");
+ }
+
+ m = new(sd_device_monitor, 1);
+ if (!m)
+ return -ENOMEM;
+
+ *m = (sd_device_monitor) {
+ .n_ref = 1,
+ .sock = fd >= 0 ? fd : TAKE_FD(sock),
+ .bound = fd >= 0,
+ .snl.nl.nl_family = AF_NETLINK,
+ .snl.nl.nl_groups = group,
+ };
+
+ if (fd >= 0) {
+ r = monitor_set_nl_address(m);
+ if (r < 0) {
+ log_monitor_errno(m, r, "Failed to set netlink address: %m");
+ goto fail;
+ }
+ }
+
+ if (DEBUG_LOGGING) {
+ _cleanup_close_ int netns = -EBADF;
+
+ /* So here's the thing: only AF_NETLINK sockets from the main network namespace will get
+ * hardware events. Let's check if ours is from there, and if not generate a debug message,
+ * since we cannot possibly work correctly otherwise. This is just a safety check to make
+ * things easier to debug. */
+
+ netns = ioctl(m->sock, SIOCGSKNS);
+ if (netns < 0)
+ log_monitor_errno(m, errno, "Unable to get network namespace of udev netlink socket, unable to determine if we are in host netns, ignoring: %m");
+ else {
+ struct stat a, b;
+
+ if (fstat(netns, &a) < 0) {
+ r = log_monitor_errno(m, errno, "Failed to stat netns of udev netlink socket: %m");
+ goto fail;
+ }
+
+ if (stat("/proc/1/ns/net", &b) < 0) {
+ if (ERRNO_IS_PRIVILEGE(errno))
+ /* If we can't access PID1's netns info due to permissions, it's fine, this is a
+ * safety check only after all. */
+ log_monitor_errno(m, errno, "No permission to stat PID1's netns, unable to determine if we are in host netns, ignoring: %m");
+ else
+ log_monitor_errno(m, errno, "Failed to stat PID1's netns, ignoring: %m");
+
+ } else if (!stat_inode_same(&a, &b))
+ log_monitor(m, "Netlink socket we listen on is not from host netns, we won't see device events.");
+ }
+ }
+
+ /* Let's bump the receive buffer size, but only if we are not called via socket activation, as in
+ * that case the service manager sets the receive buffer size for us, and the value in the .socket
+ * unit should take full effect. */
+ if (fd < 0) {
+ r = sd_device_monitor_set_receive_buffer_size(m, 128*1024*1024);
+ if (r < 0)
+ log_monitor_errno(m, r, "Failed to increase receive buffer size, ignoring: %m");
+ }
+
+ *ret = TAKE_PTR(m);
+ return 0;
+
+fail:
+ /* Let's unset the socket fd in the monitor object before we destroy it so that the fd passed in is
+ * not closed on failure. */
+ if (fd >= 0)
+ m->sock = -1;
+
+ return r;
+}
+
+_public_ int sd_device_monitor_new(sd_device_monitor **ret) {
+ return device_monitor_new_full(ret, MONITOR_GROUP_UDEV, -1);
+}
+
+_public_ int sd_device_monitor_stop(sd_device_monitor *m) {
+ assert_return(m, -EINVAL);
+
+ m->event_source = sd_event_source_unref(m->event_source);
+ (void) device_monitor_disconnect(m);
+
+ return 0;
+}
+
+static int device_monitor_event_handler(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
+ _cleanup_(sd_device_unrefp) sd_device *device = NULL;
+ _unused_ _cleanup_(log_context_unrefp) LogContext *c = NULL;
+ sd_device_monitor *m = ASSERT_PTR(userdata);
+
+ if (device_monitor_receive_device(m, &device) <= 0)
+ return 0;
+
+ if (log_context_enabled())
+ c = log_context_new_strv_consume(device_make_log_fields(device));
+
+ if (m->callback)
+ return m->callback(m, device, m->userdata);
+
+ return 0;
+}
+
+_public_ int sd_device_monitor_start(sd_device_monitor *m, sd_device_monitor_handler_t callback, void *userdata) {
+ int r;
+
+ assert_return(m, -EINVAL);
+
+ if (!m->event) {
+ r = sd_device_monitor_attach_event(m, NULL);
+ if (r < 0)
+ return r;
+ }
+
+ r = device_monitor_enable_receiving(m);
+ if (r < 0)
+ return r;
+
+ m->callback = callback;
+ m->userdata = userdata;
+
+ r = sd_event_add_io(m->event, &m->event_source, m->sock, EPOLLIN, device_monitor_event_handler, m);
+ if (r < 0)
+ return r;
+
+ (void) sd_event_source_set_description(m->event_source, m->description ?: "sd-device-monitor");
+
+ return 0;
+}
+
+_public_ int sd_device_monitor_detach_event(sd_device_monitor *m) {
+ assert_return(m, -EINVAL);
+
+ (void) sd_device_monitor_stop(m);
+ m->event = sd_event_unref(m->event);
+
+ return 0;
+}
+
+_public_ int sd_device_monitor_attach_event(sd_device_monitor *m, sd_event *event) {
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(!m->event, -EBUSY);
+
+ if (event)
+ m->event = sd_event_ref(event);
+ else {
+ r = sd_event_default(&m->event);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+_public_ sd_event *sd_device_monitor_get_event(sd_device_monitor *m) {
+ assert_return(m, NULL);
+
+ return m->event;
+}
+
+_public_ sd_event_source *sd_device_monitor_get_event_source(sd_device_monitor *m) {
+ assert_return(m, NULL);
+
+ return m->event_source;
+}
+
+_public_ int sd_device_monitor_set_description(sd_device_monitor *m, const char *description) {
+ int r;
+
+ assert_return(m, -EINVAL);
+
+ r = free_and_strdup(&m->description, description);
+ if (r <= 0)
+ return r;
+
+ if (m->event_source)
+ (void) sd_event_source_set_description(m->event_source, description);
+
+ return r;
+}
+
+_public_ int sd_device_monitor_get_description(sd_device_monitor *m, const char **ret) {
+ assert_return(m, -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ *ret = m->description;
+ return 0;
+}
+
+int device_monitor_enable_receiving(sd_device_monitor *m) {
+ int r;
+
+ assert(m);
+
+ r = sd_device_monitor_filter_update(m);
+ if (r < 0)
+ return log_monitor_errno(m, r, "Failed to update filter: %m");
+
+ if (!m->bound) {
+ /* enable receiving of sender credentials */
+ r = setsockopt_int(m->sock, SOL_SOCKET, SO_PASSCRED, true);
+ if (r < 0)
+ return log_monitor_errno(m, r, "Failed to set socket option SO_PASSCRED: %m");
+
+ if (bind(m->sock, &m->snl.sa, sizeof(struct sockaddr_nl)) < 0)
+ return log_monitor_errno(m, errno, "Failed to bind monitoring socket: %m");
+
+ m->bound = true;
+
+ r = monitor_set_nl_address(m);
+ if (r < 0)
+ return log_monitor_errno(m, r, "Failed to set address: %m");
+ }
+
+ return 0;
+}
+
+static sd_device_monitor *device_monitor_free(sd_device_monitor *m) {
+ assert(m);
+
+ (void) sd_device_monitor_detach_event(m);
+
+ uid_range_free(m->mapped_userns_uid_range);
+ free(m->description);
+ hashmap_free(m->subsystem_filter);
+ set_free(m->tag_filter);
+ hashmap_free(m->match_sysattr_filter);
+ hashmap_free(m->nomatch_sysattr_filter);
+ set_free(m->match_parent_filter);
+ set_free(m->nomatch_parent_filter);
+
+ return mfree(m);
+}
+
+DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(sd_device_monitor, sd_device_monitor, device_monitor_free);
+
+static int check_subsystem_filter(sd_device_monitor *m, sd_device *device) {
+ const char *s, *subsystem, *d, *devtype = NULL;
+ int r;
+
+ assert(m);
+ assert(device);
+
+ if (hashmap_isempty(m->subsystem_filter))
+ return true;
+
+ r = sd_device_get_subsystem(device, &subsystem);
+ if (r < 0)
+ return r;
+
+ r = sd_device_get_devtype(device, &devtype);
+ if (r < 0 && r != -ENOENT)
+ return r;
+
+ HASHMAP_FOREACH_KEY(d, s, m->subsystem_filter) {
+ if (!streq(s, subsystem))
+ continue;
+
+ if (!d || streq_ptr(d, devtype))
+ return true;
+ }
+
+ return false;
+}
+
+static bool check_tag_filter(sd_device_monitor *m, sd_device *device) {
+ const char *tag;
+
+ assert(m);
+ assert(device);
+
+ if (set_isempty(m->tag_filter))
+ return true;
+
+ SET_FOREACH(tag, m->tag_filter)
+ if (sd_device_has_tag(device, tag) > 0)
+ return true;
+
+ return false;
+}
+
+static int passes_filter(sd_device_monitor *m, sd_device *device) {
+ int r;
+
+ assert(m);
+ assert(device);
+
+ r = check_subsystem_filter(m, device);
+ if (r <= 0)
+ return r;
+
+ if (!check_tag_filter(m, device))
+ return false;
+
+ if (!device_match_sysattr(device, m->match_sysattr_filter, m->nomatch_sysattr_filter))
+ return false;
+
+ return device_match_parent(device, m->match_parent_filter, m->nomatch_parent_filter);
+}
+
+static bool check_sender_uid(sd_device_monitor *m, uid_t uid) {
+ int r;
+
+ assert(m);
+
+ /* Always trust messages from uid 0. */
+ if (uid == 0)
+ return true;
+
+ /* Trust messages sent by the same UID we are running. Currently, such situation happens only for
+ * unicast messages. */
+ if (uid == getuid() || uid == geteuid())
+ return true;
+
+ if (!m->mapped_userns_uid_range) {
+ r = uid_range_load_userns(&m->mapped_userns_uid_range, NULL);
+ if (r < 0)
+ log_monitor_errno(m, r, "Failed to load UID ranges mapped to the current user namespace, ignoring: %m");
+ }
+
+ /* Trust messages come from outside of the current user namespace. */
+ if (!uid_range_contains(m->mapped_userns_uid_range, uid))
+ return true;
+
+ /* Otherwise, refuse messages. */
+ return false;
+}
+
+int device_monitor_receive_device(sd_device_monitor *m, sd_device **ret) {
+ _cleanup_(sd_device_unrefp) sd_device *device = NULL;
+ _cleanup_free_ uint8_t *buf_alloc = NULL;
+ union {
+ monitor_netlink_header *nlh;
+ char *nulstr;
+ uint8_t *buf;
+ } message;
+ struct iovec iov;
+ CMSG_BUFFER_TYPE(CMSG_SPACE(sizeof(struct ucred))) control;
+ union sockaddr_union snl;
+ struct msghdr smsg = {
+ .msg_iov = &iov,
+ .msg_iovlen = 1,
+ .msg_control = &control,
+ .msg_controllen = sizeof(control),
+ .msg_name = &snl,
+ .msg_namelen = sizeof(snl),
+ };
+ struct ucred *cred;
+ size_t offset;
+ ssize_t n;
+ bool is_initialized = false;
+ int r;
+
+ assert(m);
+ assert(ret);
+
+ n = next_datagram_size_fd(m->sock);
+ if (n < 0) {
+ if (!ERRNO_IS_TRANSIENT(n))
+ log_monitor_errno(m, n, "Failed to get the received message size: %m");
+ return n;
+ }
+
+ if ((size_t) n < ALLOCA_MAX / sizeof(uint8_t) / 2)
+ message.buf = newa(uint8_t, n);
+ else {
+ buf_alloc = new(uint8_t, n);
+ if (!buf_alloc)
+ return log_oom_debug();
+
+ message.buf = buf_alloc;
+ }
+
+ iov = IOVEC_MAKE(message.buf, n);
+
+ n = recvmsg(m->sock, &smsg, 0);
+ if (n < 0) {
+ if (!ERRNO_IS_TRANSIENT(errno))
+ log_monitor_errno(m, errno, "Failed to receive message: %m");
+ return -errno;
+ }
+
+ if (smsg.msg_flags & MSG_TRUNC)
+ return log_monitor_errno(m, SYNTHETIC_ERRNO(EINVAL), "Received truncated message, ignoring message.");
+
+ if (n < 32)
+ return log_monitor_errno(m, SYNTHETIC_ERRNO(EINVAL), "Invalid message length (%zi), ignoring message.", n);
+
+ if (snl.nl.nl_groups == MONITOR_GROUP_NONE) {
+ /* unicast message, check if we trust the sender */
+ if (m->snl_trusted_sender.nl.nl_pid == 0 ||
+ snl.nl.nl_pid != m->snl_trusted_sender.nl.nl_pid)
+ return log_monitor_errno(m, SYNTHETIC_ERRNO(EAGAIN),
+ "Unicast netlink message ignored.");
+
+ } else if (snl.nl.nl_groups == MONITOR_GROUP_KERNEL) {
+ if (snl.nl.nl_pid > 0)
+ return log_monitor_errno(m, SYNTHETIC_ERRNO(EAGAIN),
+ "Multicast kernel netlink message from PID %"PRIu32" ignored.",
+ snl.nl.nl_pid);
+ }
+
+ cred = CMSG_FIND_DATA(&smsg, SOL_SOCKET, SCM_CREDENTIALS, struct ucred);
+ if (!cred)
+ return log_monitor_errno(m, SYNTHETIC_ERRNO(EAGAIN),
+ "No sender credentials received, ignoring message.");
+
+ if (!check_sender_uid(m, cred->uid))
+ return log_monitor_errno(m, SYNTHETIC_ERRNO(EAGAIN),
+ "Sender uid="UID_FMT", message ignored.", cred->uid);
+
+ if (!memchr(message.buf, 0, n))
+ return log_monitor_errno(m, SYNTHETIC_ERRNO(EAGAIN), "Received message without NUL, ignoring message.");
+
+ if (streq(message.nulstr, "libudev")) {
+ /* udev message needs proper version magic */
+ if (message.nlh->magic != htobe32(UDEV_MONITOR_MAGIC))
+ return log_monitor_errno(m, SYNTHETIC_ERRNO(EAGAIN),
+ "Invalid message signature (%x != %x).",
+ message.nlh->magic, htobe32(UDEV_MONITOR_MAGIC));
+
+ if (message.nlh->properties_off + 32 > (size_t) n)
+ return log_monitor_errno(m, SYNTHETIC_ERRNO(EAGAIN),
+ "Invalid offset for properties (%u > %zi).",
+ message.nlh->properties_off + 32, n);
+
+ offset = message.nlh->properties_off;
+
+ /* devices received from udev are always initialized */
+ is_initialized = true;
+
+ } else {
+ /* check kernel message header */
+ if (!strstr(message.nulstr, "@/"))
+ return log_monitor_errno(m, SYNTHETIC_ERRNO(EAGAIN), "Invalid message header.");
+
+ offset = strlen(message.nulstr) + 1;
+ if (offset >= (size_t) n)
+ return log_monitor_errno(m, SYNTHETIC_ERRNO(EAGAIN), "Invalid message length.");
+ }
+
+ r = device_new_from_nulstr(&device, message.nulstr + offset, n - offset);
+ if (r < 0)
+ return log_monitor_errno(m, r, "Failed to create device from received message: %m");
+
+ if (is_initialized)
+ device_set_is_initialized(device);
+
+ /* Skip device, if it does not pass the current filter */
+ r = passes_filter(m, device);
+ if (r < 0)
+ return log_device_monitor_errno(device, m, r, "Failed to check received device passing filter: %m");
+ if (r == 0)
+ log_device_monitor(device, m, "Received device does not pass filter, ignoring.");
+ else
+ *ret = TAKE_PTR(device);
+
+ return r;
+}
+
+static uint32_t string_hash32(const char *str) {
+ return MurmurHash2(str, strlen(str), 0);
+}
+
+/* Get a bunch of bit numbers out of the hash, and set the bits in our bit field */
+static uint64_t string_bloom64(const char *str) {
+ uint64_t bits = 0;
+ uint32_t hash = string_hash32(str);
+
+ bits |= UINT64_C(1) << (hash & 63);
+ bits |= UINT64_C(1) << ((hash >> 6) & 63);
+ bits |= UINT64_C(1) << ((hash >> 12) & 63);
+ bits |= UINT64_C(1) << ((hash >> 18) & 63);
+ return bits;
+}
+
+int device_monitor_send_device(
+ sd_device_monitor *m,
+ sd_device_monitor *destination,
+ sd_device *device) {
+
+ monitor_netlink_header nlh = {
+ .prefix = "libudev",
+ .magic = htobe32(UDEV_MONITOR_MAGIC),
+ .header_size = sizeof nlh,
+ };
+ struct iovec iov[2] = {
+ { .iov_base = &nlh, .iov_len = sizeof nlh },
+ };
+ struct msghdr smsg = {
+ .msg_iov = iov,
+ .msg_iovlen = 2,
+ };
+ /* default destination for sending */
+ union sockaddr_union default_destination = {
+ .nl.nl_family = AF_NETLINK,
+ .nl.nl_groups = MONITOR_GROUP_UDEV,
+ };
+ uint64_t tag_bloom_bits;
+ const char *buf, *val;
+ ssize_t count;
+ size_t blen;
+ int r;
+
+ assert(m);
+ assert(device);
+
+ r = device_get_properties_nulstr(device, &buf, &blen);
+ if (r < 0)
+ return log_device_monitor_errno(device, m, r, "Failed to get device properties: %m");
+ if (blen < 32)
+ return log_device_monitor_errno(device, m, SYNTHETIC_ERRNO(EINVAL),
+ "Length of device property nulstr is too small to contain valid device information.");
+
+ /* fill in versioned header */
+ r = sd_device_get_subsystem(device, &val);
+ if (r < 0)
+ return log_device_monitor_errno(device, m, r, "Failed to get device subsystem: %m");
+ nlh.filter_subsystem_hash = htobe32(string_hash32(val));
+
+ if (sd_device_get_devtype(device, &val) >= 0)
+ nlh.filter_devtype_hash = htobe32(string_hash32(val));
+
+ /* add tag bloom filter */
+ tag_bloom_bits = 0;
+ FOREACH_DEVICE_TAG(device, tag)
+ tag_bloom_bits |= string_bloom64(tag);
+
+ if (tag_bloom_bits > 0) {
+ nlh.filter_tag_bloom_hi = htobe32(tag_bloom_bits >> 32);
+ nlh.filter_tag_bloom_lo = htobe32(tag_bloom_bits & 0xffffffff);
+ }
+
+ /* add properties list */
+ nlh.properties_off = iov[0].iov_len;
+ nlh.properties_len = blen;
+ iov[1] = IOVEC_MAKE((char*) buf, blen);
+
+ /*
+ * Use custom address for target, or the default one.
+ *
+ * If we send to a multicast group, we will get
+ * ECONNREFUSED, which is expected.
+ */
+ smsg.msg_name = destination ? &destination->snl : &default_destination;
+ smsg.msg_namelen = sizeof(struct sockaddr_nl);
+ count = sendmsg(m->sock, &smsg, 0);
+ if (count < 0) {
+ if (!destination && errno == ECONNREFUSED) {
+ log_device_monitor(device, m, "Passed to netlink monitor.");
+ return 0;
+ } else
+ return log_device_monitor_errno(device, m, errno, "Failed to send device to netlink monitor: %m");
+ }
+
+ log_device_monitor(device, m, "Passed %zi byte to netlink monitor.", count);
+ return count;
+}
+
+static void bpf_stmt(struct sock_filter *ins, unsigned *i,
+ unsigned short code, unsigned data) {
+ ins[(*i)++] = (struct sock_filter) {
+ .code = code,
+ .k = data,
+ };
+}
+
+static void bpf_jmp(struct sock_filter *ins, unsigned *i,
+ unsigned short code, unsigned data,
+ unsigned short jt, unsigned short jf) {
+ ins[(*i)++] = (struct sock_filter) {
+ .code = code,
+ .jt = jt,
+ .jf = jf,
+ .k = data,
+ };
+}
+
+_public_ int sd_device_monitor_filter_update(sd_device_monitor *m) {
+ struct sock_filter ins[512] = {};
+ struct sock_fprog filter;
+ const char *subsystem, *devtype, *tag;
+ unsigned i = 0;
+
+ assert_return(m, -EINVAL);
+
+ if (m->filter_uptodate)
+ return 0;
+
+ if (m->snl.nl.nl_groups == MONITOR_GROUP_KERNEL ||
+ (hashmap_isempty(m->subsystem_filter) &&
+ set_isempty(m->tag_filter))) {
+ m->filter_uptodate = true;
+ return 0;
+ }
+
+ /* load magic in A */
+ bpf_stmt(ins, &i, BPF_LD|BPF_W|BPF_ABS, offsetof(monitor_netlink_header, magic));
+ /* jump if magic matches */
+ bpf_jmp(ins, &i, BPF_JMP|BPF_JEQ|BPF_K, UDEV_MONITOR_MAGIC, 1, 0);
+ /* wrong magic, pass packet */
+ bpf_stmt(ins, &i, BPF_RET|BPF_K, 0xffffffff);
+
+ if (!set_isempty(m->tag_filter)) {
+ int tag_matches = set_size(m->tag_filter);
+
+ /* add all tags matches */
+ SET_FOREACH(tag, m->tag_filter) {
+ uint64_t tag_bloom_bits = string_bloom64(tag);
+ uint32_t tag_bloom_hi = tag_bloom_bits >> 32;
+ uint32_t tag_bloom_lo = tag_bloom_bits & 0xffffffff;
+
+ /* load device bloom bits in A */
+ bpf_stmt(ins, &i, BPF_LD|BPF_W|BPF_ABS, offsetof(monitor_netlink_header, filter_tag_bloom_hi));
+ /* clear bits (tag bits & bloom bits) */
+ bpf_stmt(ins, &i, BPF_ALU|BPF_AND|BPF_K, tag_bloom_hi);
+ /* jump to next tag if it does not match */
+ bpf_jmp(ins, &i, BPF_JMP|BPF_JEQ|BPF_K, tag_bloom_hi, 0, 3);
+
+ /* load device bloom bits in A */
+ bpf_stmt(ins, &i, BPF_LD|BPF_W|BPF_ABS, offsetof(monitor_netlink_header, filter_tag_bloom_lo));
+ /* clear bits (tag bits & bloom bits) */
+ bpf_stmt(ins, &i, BPF_ALU|BPF_AND|BPF_K, tag_bloom_lo);
+ /* jump behind end of tag match block if tag matches */
+ tag_matches--;
+ bpf_jmp(ins, &i, BPF_JMP|BPF_JEQ|BPF_K, tag_bloom_lo, 1 + (tag_matches * 6), 0);
+ }
+
+ /* nothing matched, drop packet */
+ bpf_stmt(ins, &i, BPF_RET|BPF_K, 0);
+ }
+
+ /* add all subsystem matches */
+ if (!hashmap_isempty(m->subsystem_filter)) {
+ HASHMAP_FOREACH_KEY(devtype, subsystem, m->subsystem_filter) {
+ uint32_t hash = string_hash32(subsystem);
+
+ /* load device subsystem value in A */
+ bpf_stmt(ins, &i, BPF_LD|BPF_W|BPF_ABS, offsetof(monitor_netlink_header, filter_subsystem_hash));
+ if (!devtype) {
+ /* jump if subsystem does not match */
+ bpf_jmp(ins, &i, BPF_JMP|BPF_JEQ|BPF_K, hash, 0, 1);
+ } else {
+ /* jump if subsystem does not match */
+ bpf_jmp(ins, &i, BPF_JMP|BPF_JEQ|BPF_K, hash, 0, 3);
+ /* load device devtype value in A */
+ bpf_stmt(ins, &i, BPF_LD|BPF_W|BPF_ABS, offsetof(monitor_netlink_header, filter_devtype_hash));
+ /* jump if value does not match */
+ hash = string_hash32(devtype);
+ bpf_jmp(ins, &i, BPF_JMP|BPF_JEQ|BPF_K, hash, 0, 1);
+ }
+
+ /* matched, pass packet */
+ bpf_stmt(ins, &i, BPF_RET|BPF_K, 0xffffffff);
+
+ if (i+1 >= ELEMENTSOF(ins))
+ return -E2BIG;
+ }
+
+ /* nothing matched, drop packet */
+ bpf_stmt(ins, &i, BPF_RET|BPF_K, 0);
+ }
+
+ /* matched, pass packet */
+ bpf_stmt(ins, &i, BPF_RET|BPF_K, 0xffffffff);
+
+ /* install filter */
+ filter = (struct sock_fprog) {
+ .len = i,
+ .filter = ins,
+ };
+ if (setsockopt(m->sock, SOL_SOCKET, SO_ATTACH_FILTER, &filter, sizeof(filter)) < 0)
+ return -errno;
+
+ m->filter_uptodate = true;
+ return 0;
+}
+
+_public_ int sd_device_monitor_filter_add_match_subsystem_devtype(sd_device_monitor *m, const char *subsystem, const char *devtype) {
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(subsystem, -EINVAL);
+
+ /* Do not use string_has_ops_free_free or hashmap_put_strdup() here, as this may be called
+ * multiple times with the same subsystem but different devtypes. */
+ r = hashmap_put_strdup_full(&m->subsystem_filter, &trivial_hash_ops_free_free, subsystem, devtype);
+ if (r <= 0)
+ return r;
+
+ m->filter_uptodate = false;
+ return r;
+}
+
+_public_ int sd_device_monitor_filter_add_match_tag(sd_device_monitor *m, const char *tag) {
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(tag, -EINVAL);
+
+ r = set_put_strdup(&m->tag_filter, tag);
+ if (r <= 0)
+ return r;
+
+ m->filter_uptodate = false;
+ return r;
+}
+
+_public_ int sd_device_monitor_filter_add_match_sysattr(sd_device_monitor *m, const char *sysattr, const char *value, int match) {
+ Hashmap **hashmap;
+
+ assert_return(m, -EINVAL);
+ assert_return(sysattr, -EINVAL);
+
+ if (match)
+ hashmap = &m->match_sysattr_filter;
+ else
+ hashmap = &m->nomatch_sysattr_filter;
+
+ /* TODO: unset m->filter_uptodate on success when we support this filter on BPF. */
+ return update_match_strv(hashmap, sysattr, value, /* clear_on_null = */ true);
+}
+
+_public_ int sd_device_monitor_filter_add_match_parent(sd_device_monitor *m, sd_device *device, int match) {
+ const char *syspath;
+ Set **set;
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(device, -EINVAL);
+
+ r = sd_device_get_syspath(device, &syspath);
+ if (r < 0)
+ return r;
+
+ if (match)
+ set = &m->match_parent_filter;
+ else
+ set = &m->nomatch_parent_filter;
+
+ /* TODO: unset m->filter_uptodate on success when we support this filter on BPF. */
+ return set_put_strdup(set, syspath);
+}
+
+_public_ int sd_device_monitor_filter_remove(sd_device_monitor *m) {
+ static const struct sock_fprog filter = { 0, NULL };
+
+ assert_return(m, -EINVAL);
+
+ m->subsystem_filter = hashmap_free(m->subsystem_filter);
+ m->tag_filter = set_free(m->tag_filter);
+ m->match_sysattr_filter = hashmap_free(m->match_sysattr_filter);
+ m->nomatch_sysattr_filter = hashmap_free(m->nomatch_sysattr_filter);
+ m->match_parent_filter = set_free(m->match_parent_filter);
+ m->nomatch_parent_filter = set_free(m->nomatch_parent_filter);
+
+ if (setsockopt(m->sock, SOL_SOCKET, SO_DETACH_FILTER, &filter, sizeof(filter)) < 0)
+ return -errno;
+
+ m->filter_uptodate = true;
+ return 0;
+}
diff --git a/src/libsystemd/sd-device/device-private.c b/src/libsystemd/sd-device/device-private.c
new file mode 100644
index 0000000..0edabfb
--- /dev/null
+++ b/src/libsystemd/sd-device/device-private.c
@@ -0,0 +1,903 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <ctype.h>
+#include <net/if.h>
+#include <sys/types.h>
+
+#include "sd-device.h"
+
+#include "alloc-util.h"
+#include "device-internal.h"
+#include "device-private.h"
+#include "device-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "fs-util.h"
+#include "hashmap.h"
+#include "macro.h"
+#include "mkdir.h"
+#include "nulstr-util.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "set.h"
+#include "stdio-util.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "strv.h"
+#include "strxcpyx.h"
+#include "tmpfile-util.h"
+#include "user-util.h"
+
+int device_add_property(sd_device *device, const char *key, const char *value) {
+ int r;
+
+ assert(device);
+ assert(key);
+
+ r = device_add_property_aux(device, key, value, false);
+ if (r < 0)
+ return r;
+
+ if (key[0] != '.') {
+ r = device_add_property_aux(device, key, value, true);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+int device_add_propertyf(sd_device *device, const char *key, const char *format, ...) {
+ _cleanup_free_ char *value = NULL;
+ va_list ap;
+ int r;
+
+ assert(device);
+ assert(key);
+
+ if (!format)
+ return device_add_property(device, key, NULL);
+
+ va_start(ap, format);
+ r = vasprintf(&value, format, ap);
+ va_end(ap);
+
+ if (r < 0)
+ return -ENOMEM;
+
+ return device_add_property(device, key, value);
+}
+
+void device_set_devlink_priority(sd_device *device, int priority) {
+ assert(device);
+
+ device->devlink_priority = priority;
+}
+
+void device_set_is_initialized(sd_device *device) {
+ assert(device);
+
+ device->is_initialized = true;
+}
+
+int device_ensure_usec_initialized(sd_device *device, sd_device *device_old) {
+ usec_t when;
+
+ assert(device);
+
+ if (device_old && device_old->usec_initialized > 0)
+ when = device_old->usec_initialized;
+ else
+ when = now(CLOCK_MONOTONIC);
+
+ return device_set_usec_initialized(device, when);
+}
+
+uint64_t device_get_properties_generation(sd_device *device) {
+ assert(device);
+
+ return device->properties_generation;
+}
+
+uint64_t device_get_tags_generation(sd_device *device) {
+ assert(device);
+
+ return device->tags_generation;
+}
+
+uint64_t device_get_devlinks_generation(sd_device *device) {
+ assert(device);
+
+ return device->devlinks_generation;
+}
+
+int device_get_devnode_mode(sd_device *device, mode_t *ret) {
+ int r;
+
+ assert(device);
+
+ r = device_read_db(device);
+ if (r < 0)
+ return r;
+
+ if (device->devmode == MODE_INVALID)
+ return -ENOENT;
+
+ if (ret)
+ *ret = device->devmode;
+
+ return 0;
+}
+
+int device_get_devnode_uid(sd_device *device, uid_t *ret) {
+ int r;
+
+ assert(device);
+
+ r = device_read_db(device);
+ if (r < 0)
+ return r;
+
+ if (device->devuid == UID_INVALID)
+ return -ENOENT;
+
+ if (ret)
+ *ret = device->devuid;
+
+ return 0;
+}
+
+static int device_set_devuid(sd_device *device, const char *uid) {
+ uid_t u;
+ int r;
+
+ assert(device);
+ assert(uid);
+
+ r = parse_uid(uid, &u);
+ if (r < 0)
+ return r;
+
+ r = device_add_property_internal(device, "DEVUID", uid);
+ if (r < 0)
+ return r;
+
+ device->devuid = u;
+
+ return 0;
+}
+
+int device_get_devnode_gid(sd_device *device, gid_t *ret) {
+ int r;
+
+ assert(device);
+
+ r = device_read_db(device);
+ if (r < 0)
+ return r;
+
+ if (device->devgid == GID_INVALID)
+ return -ENOENT;
+
+ if (ret)
+ *ret = device->devgid;
+
+ return 0;
+}
+
+static int device_set_devgid(sd_device *device, const char *gid) {
+ gid_t g;
+ int r;
+
+ assert(device);
+ assert(gid);
+
+ r = parse_gid(gid, &g);
+ if (r < 0)
+ return r;
+
+ r = device_add_property_internal(device, "DEVGID", gid);
+ if (r < 0)
+ return r;
+
+ device->devgid = g;
+
+ return 0;
+}
+
+int device_set_action(sd_device *device, sd_device_action_t a) {
+ int r;
+
+ assert(device);
+ assert(a >= 0 && a < _SD_DEVICE_ACTION_MAX);
+
+ r = device_add_property_internal(device, "ACTION", device_action_to_string(a));
+ if (r < 0)
+ return r;
+
+ device->action = a;
+
+ return 0;
+}
+
+static int device_set_action_from_string(sd_device *device, const char *action) {
+ sd_device_action_t a;
+
+ assert(device);
+ assert(action);
+
+ a = device_action_from_string(action);
+ if (a < 0)
+ return a;
+
+ return device_set_action(device, a);
+}
+
+static int device_set_seqnum(sd_device *device, const char *str) {
+ uint64_t seqnum;
+ int r;
+
+ assert(device);
+ assert(str);
+
+ r = safe_atou64(str, &seqnum);
+ if (r < 0)
+ return r;
+ if (seqnum == 0)
+ return -EINVAL;
+
+ r = device_add_property_internal(device, "SEQNUM", str);
+ if (r < 0)
+ return r;
+
+ device->seqnum = seqnum;
+
+ return 0;
+}
+
+static int device_amend(sd_device *device, const char *key, const char *value) {
+ int r;
+
+ assert(device);
+ assert(key);
+ assert(value);
+
+ if (streq(key, "DEVPATH")) {
+ char *path;
+
+ path = strjoina("/sys", value);
+
+ /* the caller must verify or trust this data (e.g., if it comes from the kernel) */
+ r = device_set_syspath(device, path, false);
+ if (r < 0)
+ return log_device_debug_errno(device, r, "sd-device: Failed to set syspath to '%s': %m", path);
+ } else if (streq(key, "SUBSYSTEM")) {
+ r = device_set_subsystem(device, value);
+ if (r < 0)
+ return log_device_debug_errno(device, r, "sd-device: Failed to set subsystem to '%s': %m", value);
+ } else if (streq(key, "DEVTYPE")) {
+ r = device_set_devtype(device, value);
+ if (r < 0)
+ return log_device_debug_errno(device, r, "sd-device: Failed to set devtype to '%s': %m", value);
+ } else if (streq(key, "DEVNAME")) {
+ r = device_set_devname(device, value);
+ if (r < 0)
+ return log_device_debug_errno(device, r, "sd-device: Failed to set devname to '%s': %m", value);
+ } else if (streq(key, "USEC_INITIALIZED")) {
+ usec_t t;
+
+ r = safe_atou64(value, &t);
+ if (r < 0)
+ return log_device_debug_errno(device, r, "sd-device: Failed to parse timestamp '%s': %m", value);
+
+ r = device_set_usec_initialized(device, t);
+ if (r < 0)
+ return log_device_debug_errno(device, r, "sd-device: Failed to set usec-initialized to '%s': %m", value);
+ } else if (streq(key, "DRIVER")) {
+ r = device_set_driver(device, value);
+ if (r < 0)
+ return log_device_debug_errno(device, r, "sd-device: Failed to set driver to '%s': %m", value);
+ } else if (streq(key, "IFINDEX")) {
+ r = device_set_ifindex(device, value);
+ if (r < 0)
+ return log_device_debug_errno(device, r, "sd-device: Failed to set ifindex to '%s': %m", value);
+ } else if (streq(key, "DEVMODE")) {
+ r = device_set_devmode(device, value);
+ if (r < 0)
+ return log_device_debug_errno(device, r, "sd-device: Failed to set devmode to '%s': %m", value);
+ } else if (streq(key, "DEVUID")) {
+ r = device_set_devuid(device, value);
+ if (r < 0)
+ return log_device_debug_errno(device, r, "sd-device: Failed to set devuid to '%s': %m", value);
+ } else if (streq(key, "DEVGID")) {
+ r = device_set_devgid(device, value);
+ if (r < 0)
+ return log_device_debug_errno(device, r, "sd-device: Failed to set devgid to '%s': %m", value);
+ } else if (streq(key, "ACTION")) {
+ r = device_set_action_from_string(device, value);
+ if (r < 0)
+ return log_device_debug_errno(device, r, "sd-device: Failed to set action to '%s': %m", value);
+ } else if (streq(key, "SEQNUM")) {
+ r = device_set_seqnum(device, value);
+ if (r < 0)
+ return log_device_debug_errno(device, r, "sd-device: Failed to set SEQNUM to '%s': %m", value);
+ } else if (streq(key, "DISKSEQ")) {
+ r = device_set_diskseq(device, value);
+ if (r < 0)
+ return log_device_debug_errno(device, r, "sd-device: Failed to set DISKSEQ to '%s': %m", value);
+ } else if (streq(key, "DEVLINKS")) {
+ for (const char *p = value;;) {
+ _cleanup_free_ char *word = NULL;
+
+ /* udev rules may set escaped strings, and sd-device does not modify the input
+ * strings. So, it is also necessary to keep the strings received through
+ * sd-device-monitor. */
+ r = extract_first_word(&p, &word, NULL, EXTRACT_RETAIN_ESCAPE);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ r = device_add_devlink(device, word);
+ if (r < 0)
+ return log_device_debug_errno(device, r, "sd-device: Failed to add devlink '%s': %m", word);
+ }
+ } else if (STR_IN_SET(key, "TAGS", "CURRENT_TAGS")) {
+ for (const char *p = value;;) {
+ _cleanup_free_ char *word = NULL;
+
+ r = extract_first_word(&p, &word, ":", EXTRACT_DONT_COALESCE_SEPARATORS);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+ if (isempty(word))
+ continue;
+
+ r = device_add_tag(device, word, streq(key, "CURRENT_TAGS"));
+ if (r < 0)
+ return log_device_debug_errno(device, r, "sd-device: Failed to add tag '%s': %m", word);
+ }
+ } else if (streq(key, "UDEV_DATABASE_VERSION")) {
+ r = safe_atou(value, &device->database_version);
+ if (r < 0)
+ return log_device_debug_errno(device, r, "sd-device: Failed to parse udev database version '%s': %m", value);
+ } else {
+ r = device_add_property_internal(device, key, value);
+ if (r < 0)
+ return log_device_debug_errno(device, r, "sd-device: Failed to add property '%s=%s': %m", key, value);
+ }
+
+ return 0;
+}
+
+static int device_append(
+ sd_device *device,
+ char *key,
+ const char **_major,
+ const char **_minor) {
+
+ const char *major = NULL, *minor = NULL;
+ char *value;
+ int r;
+
+ assert(device);
+ assert(key);
+ assert(_major);
+ assert(_minor);
+
+ value = strchr(key, '=');
+ if (!value)
+ return log_device_debug_errno(device, SYNTHETIC_ERRNO(EINVAL),
+ "sd-device: Not a key-value pair: '%s'", key);
+
+ *value = '\0';
+
+ value++;
+
+ if (streq(key, "MAJOR"))
+ major = value;
+ else if (streq(key, "MINOR"))
+ minor = value;
+ else {
+ r = device_amend(device, key, value);
+ if (r < 0)
+ return r;
+ }
+
+ if (major)
+ *_major = major;
+
+ if (minor)
+ *_minor = minor;
+
+ return 0;
+}
+
+void device_seal(sd_device *device) {
+ assert(device);
+
+ device->sealed = true;
+}
+
+static int device_verify(sd_device *device) {
+ int r;
+
+ assert(device);
+
+ if (!device->devpath || !device->subsystem || device->action < 0 || device->seqnum == 0)
+ return log_device_debug_errno(device, SYNTHETIC_ERRNO(EINVAL),
+ "sd-device: Device created from strv or nulstr lacks devpath, subsystem, action or seqnum.");
+
+ if (streq(device->subsystem, "drivers")) {
+ r = device_set_drivers_subsystem(device);
+ if (r < 0)
+ return r;
+ }
+
+ device->sealed = true;
+
+ return 0;
+}
+
+int device_new_from_strv(sd_device **ret, char **strv) {
+ _cleanup_(sd_device_unrefp) sd_device *device = NULL;
+ const char *major = NULL, *minor = NULL;
+ int r;
+
+ assert(ret);
+ assert(strv);
+
+ r = device_new_aux(&device);
+ if (r < 0)
+ return r;
+
+ STRV_FOREACH(key, strv) {
+ r = device_append(device, *key, &major, &minor);
+ if (r < 0)
+ return r;
+ }
+
+ if (major) {
+ r = device_set_devnum(device, major, minor);
+ if (r < 0)
+ return log_device_debug_errno(device, r, "sd-device: Failed to set devnum %s:%s: %m", major, minor);
+ }
+
+ r = device_verify(device);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(device);
+
+ return 0;
+}
+
+int device_new_from_nulstr(sd_device **ret, char *nulstr, size_t len) {
+ _cleanup_(sd_device_unrefp) sd_device *device = NULL;
+ const char *major = NULL, *minor = NULL;
+ int r;
+
+ assert(ret);
+ assert(nulstr);
+ assert(len);
+
+ r = device_new_aux(&device);
+ if (r < 0)
+ return r;
+
+ for (size_t i = 0; i < len; ) {
+ char *key;
+ const char *end;
+
+ key = nulstr + i;
+ end = memchr(key, '\0', len - i);
+ if (!end)
+ return log_device_debug_errno(device, SYNTHETIC_ERRNO(EINVAL),
+ "sd-device: Failed to parse nulstr");
+
+ i += end - key + 1;
+
+ /* netlink messages for some devices contain an unwanted newline at the end of value.
+ * Let's drop the newline and remaining characters after the newline. */
+ truncate_nl(key);
+
+ r = device_append(device, key, &major, &minor);
+ if (r < 0)
+ return r;
+ }
+
+ if (major) {
+ r = device_set_devnum(device, major, minor);
+ if (r < 0)
+ return log_device_debug_errno(device, r, "sd-device: Failed to set devnum %s:%s: %m", major, minor);
+ }
+
+ r = device_verify(device);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(device);
+
+ return 0;
+}
+
+static int device_update_properties_bufs(sd_device *device) {
+ _cleanup_free_ char **buf_strv = NULL, *buf_nulstr = NULL;
+ size_t nulstr_len = 0, num = 0;
+
+ assert(device);
+
+ if (!device->properties_buf_outdated)
+ return 0;
+
+ /* append udev database version */
+ buf_nulstr = newdup(char, "UDEV_DATABASE_VERSION=" STRINGIFY(LATEST_UDEV_DATABASE_VERSION) "\0",
+ STRLEN("UDEV_DATABASE_VERSION=" STRINGIFY(LATEST_UDEV_DATABASE_VERSION)) + 2);
+ if (!buf_nulstr)
+ return -ENOMEM;
+
+ nulstr_len += STRLEN("UDEV_DATABASE_VERSION=" STRINGIFY(LATEST_UDEV_DATABASE_VERSION)) + 1;
+ num++;
+
+ FOREACH_DEVICE_PROPERTY(device, prop, val) {
+ size_t len = 0;
+
+ len = strlen(prop) + 1 + strlen(val);
+
+ buf_nulstr = GREEDY_REALLOC0(buf_nulstr, nulstr_len + len + 2);
+ if (!buf_nulstr)
+ return -ENOMEM;
+
+ strscpyl(buf_nulstr + nulstr_len, len + 1, prop, "=", val, NULL);
+ nulstr_len += len + 1;
+ num++;
+ }
+
+ /* build buf_strv from buf_nulstr */
+ buf_strv = new0(char*, num + 1);
+ if (!buf_strv)
+ return -ENOMEM;
+
+ size_t i = 0;
+ NULSTR_FOREACH(p, buf_nulstr)
+ buf_strv[i++] = p;
+ assert(i == num);
+
+ free_and_replace(device->properties_nulstr, buf_nulstr);
+ device->properties_nulstr_len = nulstr_len;
+ free_and_replace(device->properties_strv, buf_strv);
+
+ device->properties_buf_outdated = false;
+ return 0;
+}
+
+int device_get_properties_nulstr(sd_device *device, const char **ret_nulstr, size_t *ret_len) {
+ int r;
+
+ assert(device);
+
+ r = device_update_properties_bufs(device);
+ if (r < 0)
+ return r;
+
+ if (ret_nulstr)
+ *ret_nulstr = device->properties_nulstr;
+ if (ret_len)
+ *ret_len = device->properties_nulstr_len;
+
+ return 0;
+}
+
+int device_get_properties_strv(sd_device *device, char ***ret) {
+ int r;
+
+ assert(device);
+
+ r = device_update_properties_bufs(device);
+ if (r < 0)
+ return r;
+
+ if (ret)
+ *ret = device->properties_strv;
+
+ return 0;
+}
+
+int device_get_devlink_priority(sd_device *device, int *ret) {
+ int r;
+
+ assert(device);
+
+ r = device_read_db(device);
+ if (r < 0)
+ return r;
+
+ if (ret)
+ *ret = device->devlink_priority;
+
+ return 0;
+}
+
+int device_clone_with_db(sd_device *device, sd_device **ret) {
+ _cleanup_(sd_device_unrefp) sd_device *dest = NULL;
+ const char *key, *val;
+ int r;
+
+ assert(device);
+ assert(ret);
+
+ /* The device may be already removed. Let's copy minimal set of information that was obtained through
+ * netlink socket. */
+
+ r = device_new_aux(&dest);
+ if (r < 0)
+ return r;
+
+ /* Seal device to prevent reading the uevent file, as the device may have been already removed. */
+ dest->sealed = true;
+
+ /* Copy syspath, then also devname, sysname or sysnum can be obtained. */
+ r = device_set_syspath(dest, device->syspath, false);
+ if (r < 0)
+ return r;
+
+ /* Copy other information stored in database. Here, do not use FOREACH_DEVICE_PROPERTY() and
+ * sd_device_get_property_value(), as they calls device_properties_prepare() ->
+ * device_read_uevent_file(), but as commented in the above, the device may be already removed and
+ * reading uevent file may fail. */
+ ORDERED_HASHMAP_FOREACH_KEY(val, key, device->properties) {
+ if (streq(key, "MINOR"))
+ continue;
+
+ if (streq(key, "MAJOR")) {
+ const char *minor = NULL;
+
+ minor = ordered_hashmap_get(device->properties, "MINOR");
+ r = device_set_devnum(dest, val, minor);
+ } else
+ r = device_amend(dest, key, val);
+ if (r < 0)
+ return r;
+
+ if (streq(key, "SUBSYSTEM") && streq(val, "drivers")) {
+ r = free_and_strdup(&dest->driver_subsystem, device->driver_subsystem);
+ if (r < 0)
+ return r;
+ }
+ }
+
+ /* Finally, read the udev database. */
+ r = device_read_db_internal(dest, /* force = */ true);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(dest);
+ return 0;
+}
+
+void device_cleanup_tags(sd_device *device) {
+ assert(device);
+
+ device->all_tags = set_free_free(device->all_tags);
+ device->current_tags = set_free_free(device->current_tags);
+ device->property_tags_outdated = true;
+ device->tags_generation++;
+}
+
+void device_cleanup_devlinks(sd_device *device) {
+ assert(device);
+
+ set_free_free(device->devlinks);
+ device->devlinks = NULL;
+ device->property_devlinks_outdated = true;
+ device->devlinks_generation++;
+}
+
+void device_remove_tag(sd_device *device, const char *tag) {
+ assert(device);
+ assert(tag);
+
+ free(set_remove(device->current_tags, tag));
+ device->property_tags_outdated = true;
+ device->tags_generation++;
+}
+
+static int device_tag(sd_device *device, const char *tag, bool add) {
+ const char *id;
+ char *path;
+ int r;
+
+ assert(device);
+ assert(tag);
+
+ r = device_get_device_id(device, &id);
+ if (r < 0)
+ return r;
+
+ path = strjoina("/run/udev/tags/", tag, "/", id);
+
+ if (add)
+ return touch_file(path, true, USEC_INFINITY, UID_INVALID, GID_INVALID, 0444);
+
+ if (unlink(path) < 0 && errno != ENOENT)
+ return -errno;
+
+ return 0;
+}
+
+int device_tag_index(sd_device *device, sd_device *device_old, bool add) {
+ int r = 0, k;
+
+ if (add && device_old)
+ /* delete possible left-over tags */
+ FOREACH_DEVICE_TAG(device_old, tag)
+ if (!sd_device_has_tag(device, tag)) {
+ k = device_tag(device_old, tag, false);
+ if (r >= 0 && k < 0)
+ r = k;
+ }
+
+ FOREACH_DEVICE_TAG(device, tag) {
+ k = device_tag(device, tag, add);
+ if (r >= 0 && k < 0)
+ r = k;
+ }
+
+ return r;
+}
+
+static bool device_has_info(sd_device *device) {
+ assert(device);
+
+ if (!set_isempty(device->devlinks))
+ return true;
+
+ if (device->devlink_priority != 0)
+ return true;
+
+ if (!ordered_hashmap_isempty(device->properties_db))
+ return true;
+
+ if (!set_isempty(device->all_tags))
+ return true;
+
+ if (!set_isempty(device->current_tags))
+ return true;
+
+ return false;
+}
+
+void device_set_db_persist(sd_device *device) {
+ assert(device);
+
+ device->db_persist = true;
+}
+
+int device_update_db(sd_device *device) {
+ const char *id;
+ char *path;
+ _cleanup_fclose_ FILE *f = NULL;
+ _cleanup_(unlink_and_freep) char *path_tmp = NULL;
+ bool has_info;
+ int r;
+
+ assert(device);
+
+ has_info = device_has_info(device);
+
+ r = device_get_device_id(device, &id);
+ if (r < 0)
+ return r;
+
+ path = strjoina("/run/udev/data/", id);
+
+ /* do not store anything for otherwise empty devices */
+ if (!has_info && major(device->devnum) == 0 && device->ifindex == 0) {
+ if (unlink(path) < 0 && errno != ENOENT)
+ return -errno;
+
+ return 0;
+ }
+
+ /* write a database file */
+ r = mkdir_parents(path, 0755);
+ if (r < 0)
+ return r;
+
+ r = fopen_temporary(path, &f, &path_tmp);
+ if (r < 0)
+ return r;
+
+ /* set 'sticky' bit to indicate that we should not clean the database when we transition from initrd
+ * to the real root */
+ if (fchmod(fileno(f), device->db_persist ? 01644 : 0644) < 0) {
+ r = -errno;
+ goto fail;
+ }
+
+ if (has_info) {
+ const char *property, *value, *ct;
+
+ if (major(device->devnum) > 0) {
+ FOREACH_DEVICE_DEVLINK(device, devlink)
+ fprintf(f, "S:%s\n", devlink + STRLEN("/dev/"));
+
+ if (device->devlink_priority != 0)
+ fprintf(f, "L:%i\n", device->devlink_priority);
+ }
+
+ if (device->usec_initialized > 0)
+ fprintf(f, "I:"USEC_FMT"\n", device->usec_initialized);
+
+ ORDERED_HASHMAP_FOREACH_KEY(value, property, device->properties_db)
+ fprintf(f, "E:%s=%s\n", property, value);
+
+ FOREACH_DEVICE_TAG(device, tag)
+ fprintf(f, "G:%s\n", tag); /* Any tag */
+
+ SET_FOREACH(ct, device->current_tags)
+ fprintf(f, "Q:%s\n", ct); /* Current tag */
+
+ /* Always write the latest database version here, instead of the value stored in
+ * device->database_version, as which may be 0. */
+ fputs("V:" STRINGIFY(LATEST_UDEV_DATABASE_VERSION) "\n", f);
+ }
+
+ r = fflush_and_check(f);
+ if (r < 0)
+ goto fail;
+
+ if (rename(path_tmp, path) < 0) {
+ r = -errno;
+ goto fail;
+ }
+
+ path_tmp = mfree(path_tmp);
+
+ log_device_debug(device, "sd-device: Created %s file '%s' for '%s'", has_info ? "db" : "empty",
+ path, device->devpath);
+
+ return 0;
+
+fail:
+ (void) unlink(path);
+
+ return log_device_debug_errno(device, r, "sd-device: Failed to create %s file '%s' for '%s'", has_info ? "db" : "empty", path, device->devpath);
+}
+
+int device_delete_db(sd_device *device) {
+ const char *id;
+ char *path;
+ int r;
+
+ assert(device);
+
+ r = device_get_device_id(device, &id);
+ if (r < 0)
+ return r;
+
+ path = strjoina("/run/udev/data/", id);
+
+ if (unlink(path) < 0 && errno != ENOENT)
+ return -errno;
+
+ return 0;
+}
+
+static const char* const device_action_table[_SD_DEVICE_ACTION_MAX] = {
+ [SD_DEVICE_ADD] = "add",
+ [SD_DEVICE_REMOVE] = "remove",
+ [SD_DEVICE_CHANGE] = "change",
+ [SD_DEVICE_MOVE] = "move",
+ [SD_DEVICE_ONLINE] = "online",
+ [SD_DEVICE_OFFLINE] = "offline",
+ [SD_DEVICE_BIND] = "bind",
+ [SD_DEVICE_UNBIND] = "unbind",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(device_action, sd_device_action_t);
+
+void dump_device_action_table(void) {
+ DUMP_STRING_TABLE(device_action, sd_device_action_t, _SD_DEVICE_ACTION_MAX);
+}
diff --git a/src/libsystemd/sd-device/device-private.h b/src/libsystemd/sd-device/device-private.h
new file mode 100644
index 0000000..b903d1a
--- /dev/null
+++ b/src/libsystemd/sd-device/device-private.h
@@ -0,0 +1,74 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <dirent.h>
+#include <inttypes.h>
+#include <stdbool.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include "sd-device.h"
+
+#include "macro.h"
+
+int device_new_from_mode_and_devnum(sd_device **ret, mode_t mode, dev_t devnum);
+int device_new_from_nulstr(sd_device **ret, char *nulstr, size_t len);
+int device_new_from_strv(sd_device **ret, char **strv);
+
+int device_opendir(sd_device *device, const char *subdir, DIR **ret);
+
+int device_get_property_bool(sd_device *device, const char *key);
+int device_get_property_int(sd_device *device, const char *key, int *ret);
+int device_get_sysattr_int(sd_device *device, const char *sysattr, int *ret_value);
+int device_get_sysattr_unsigned(sd_device *device, const char *sysattr, unsigned *ret_value);
+int device_get_sysattr_bool(sd_device *device, const char *sysattr);
+int device_get_device_id(sd_device *device, const char **ret);
+int device_get_devlink_priority(sd_device *device, int *ret);
+int device_get_devnode_mode(sd_device *device, mode_t *ret);
+int device_get_devnode_uid(sd_device *device, uid_t *ret);
+int device_get_devnode_gid(sd_device *device, gid_t *ret);
+
+void device_clear_sysattr_cache(sd_device *device);
+int device_cache_sysattr_value(sd_device *device, const char *key, char *value);
+int device_get_cached_sysattr_value(sd_device *device, const char *key, const char **ret_value);
+
+void device_seal(sd_device *device);
+void device_set_is_initialized(sd_device *device);
+void device_set_db_persist(sd_device *device);
+void device_set_devlink_priority(sd_device *device, int priority);
+int device_ensure_usec_initialized(sd_device *device, sd_device *device_old);
+int device_add_devlink(sd_device *device, const char *devlink);
+int device_remove_devlink(sd_device *device, const char *devlink);
+bool device_has_devlink(sd_device *device, const char *devlink);
+int device_add_property(sd_device *device, const char *property, const char *value);
+int device_add_propertyf(sd_device *device, const char *key, const char *format, ...) _printf_(3, 4);
+int device_add_tag(sd_device *device, const char *tag, bool both);
+void device_remove_tag(sd_device *device, const char *tag);
+void device_cleanup_tags(sd_device *device);
+void device_cleanup_devlinks(sd_device *device);
+
+uint64_t device_get_properties_generation(sd_device *device);
+uint64_t device_get_tags_generation(sd_device *device);
+uint64_t device_get_devlinks_generation(sd_device *device);
+
+int device_properties_prepare(sd_device *device);
+int device_get_properties_nulstr(sd_device *device, const char **ret_nulstr, size_t *ret_len);
+int device_get_properties_strv(sd_device *device, char ***ret);
+
+int device_clone_with_db(sd_device *device, sd_device **ret);
+
+int device_tag_index(sd_device *dev, sd_device *dev_old, bool add);
+int device_update_db(sd_device *device);
+int device_delete_db(sd_device *device);
+int device_read_db_internal_filename(sd_device *device, const char *filename); /* For fuzzer */
+int device_read_db_internal(sd_device *device, bool force);
+static inline int device_read_db(sd_device *device) {
+ return device_read_db_internal(device, false);
+}
+
+int device_read_uevent_file(sd_device *device);
+
+int device_set_action(sd_device *device, sd_device_action_t a);
+sd_device_action_t device_action_from_string(const char *s) _pure_;
+const char *device_action_to_string(sd_device_action_t a) _const_;
+void dump_device_action_table(void);
diff --git a/src/libsystemd/sd-device/device-util.c b/src/libsystemd/sd-device/device-util.c
new file mode 100644
index 0000000..529eff2
--- /dev/null
+++ b/src/libsystemd/sd-device/device-util.c
@@ -0,0 +1,141 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "device-private.h"
+#include "device-util.h"
+#include "devnum-util.h"
+#include "fd-util.h"
+#include "string-util.h"
+#include "strv.h"
+
+int devname_from_devnum(mode_t mode, dev_t devnum, char **ret) {
+ _cleanup_(sd_device_unrefp) sd_device *dev = NULL;
+ _cleanup_free_ char *s = NULL;
+ const char *devname;
+ int r;
+
+ assert(ret);
+
+ if (devnum_is_zero(devnum))
+ return device_path_make_inaccessible(mode, ret);
+
+ r = device_new_from_mode_and_devnum(&dev, mode, devnum);
+ if (r < 0)
+ return r;
+
+ r = sd_device_get_devname(dev, &devname);
+ if (r < 0)
+ return r;
+
+ s = strdup(devname);
+ if (!s)
+ return -ENOMEM;
+
+ *ret = TAKE_PTR(s);
+ return 0;
+}
+
+int device_open_from_devnum(mode_t mode, dev_t devnum, int flags, char **ret) {
+ _cleanup_(sd_device_unrefp) sd_device *dev = NULL;
+ _cleanup_close_ int fd = -EBADF;
+ int r;
+
+ r = device_new_from_mode_and_devnum(&dev, mode, devnum);
+ if (r < 0)
+ return r;
+
+ fd = sd_device_open(dev, flags);
+ if (fd < 0)
+ return fd;
+
+ if (ret) {
+ const char *devname;
+ char *s;
+
+ r = sd_device_get_devname(dev, &devname);
+ if (r < 0)
+ return r;
+
+ s = strdup(devname);
+ if (!s)
+ return -ENOMEM;
+
+ *ret = s;
+ }
+
+ return TAKE_FD(fd);
+}
+
+static int add_string_field(
+ sd_device *device,
+ const char *field,
+ int (*func)(sd_device *dev, const char **s),
+ char ***strv) {
+
+ const char *s;
+ int r;
+
+ assert(device);
+ assert(field);
+ assert(func);
+ assert(strv);
+
+ r = func(device, &s);
+ if (r < 0 && r != -ENOENT)
+ log_device_debug_errno(device, r, "Failed to get device \"%s\" property, ignoring: %m", field);
+ if (r >= 0)
+ (void) strv_extend_assignment(strv, field, s);
+
+ return 0;
+}
+
+char** device_make_log_fields(sd_device *device) {
+ _cleanup_strv_free_ char **strv = NULL;
+ dev_t devnum;
+ int ifindex;
+ sd_device_action_t action;
+ uint64_t seqnum, diskseq;
+ int r;
+
+ assert(device);
+
+ (void) add_string_field(device, "SYSPATH", sd_device_get_syspath, &strv);
+ (void) add_string_field(device, "SUBSYSTEM", sd_device_get_subsystem, &strv);
+ (void) add_string_field(device, "DEVTYPE", sd_device_get_devtype, &strv);
+ (void) add_string_field(device, "DRIVER", sd_device_get_driver, &strv);
+ (void) add_string_field(device, "DEVPATH", sd_device_get_devpath, &strv);
+ (void) add_string_field(device, "DEVNAME", sd_device_get_devname, &strv);
+ (void) add_string_field(device, "SYSNAME", sd_device_get_sysname, &strv);
+ (void) add_string_field(device, "SYSNUM", sd_device_get_sysnum, &strv);
+
+ r = sd_device_get_devnum(device, &devnum);
+ if (r < 0 && r != -ENOENT)
+ log_device_debug_errno(device, r, "Failed to get device \"DEVNUM\" property, ignoring: %m");
+ if (r >= 0)
+ (void) strv_extendf(&strv, "DEVNUM="DEVNUM_FORMAT_STR, DEVNUM_FORMAT_VAL(devnum));
+
+ r = sd_device_get_ifindex(device, &ifindex);
+ if (r < 0 && r != -ENOENT)
+ log_device_debug_errno(device, r, "Failed to get device \"IFINDEX\" property, ignoring: %m");
+ if (r >= 0)
+ (void) strv_extendf(&strv, "IFINDEX=%i", ifindex);
+
+ r = sd_device_get_action(device, &action);
+ if (r < 0 && r != -ENOENT)
+ log_device_debug_errno(device, r, "Failed to get device \"ACTION\" property, ignoring: %m");
+ if (r >= 0)
+ (void) strv_extendf(&strv, "ACTION=%s", device_action_to_string(action));
+
+ r = sd_device_get_seqnum(device, &seqnum);
+ if (r < 0 && r != -ENOENT)
+ log_device_debug_errno(device, r, "Failed to get device \"SEQNUM\" property, ignoring: %m");
+ if (r >= 0)
+ (void) strv_extendf(&strv, "SEQNUM=%"PRIu64, seqnum);
+
+ r = sd_device_get_diskseq(device, &diskseq);
+ if (r < 0 && r != -ENOENT)
+ log_device_debug_errno(device, r, "Failed to get device \"DISKSEQ\" property, ignoring: %m");
+ if (r >= 0)
+ (void) strv_extendf(&strv, "DISKSEQ=%"PRIu64, diskseq);
+
+ return TAKE_PTR(strv);
+}
diff --git a/src/libsystemd/sd-device/device-util.h b/src/libsystemd/sd-device/device-util.h
new file mode 100644
index 0000000..bf86ddc
--- /dev/null
+++ b/src/libsystemd/sd-device/device-util.h
@@ -0,0 +1,104 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include "sd-device.h"
+
+#include "alloc-util.h"
+#include "log.h"
+#include "macro.h"
+
+#define device_unref_and_replace(a, b) \
+ unref_and_replace_full(a, b, sd_device_ref, sd_device_unref)
+
+#define FOREACH_DEVICE_PROPERTY(device, key, value) \
+ for (const char *value, *key = sd_device_get_property_first(device, &value); \
+ key; \
+ key = sd_device_get_property_next(device, &value))
+
+#define FOREACH_DEVICE_TAG(device, tag) \
+ for (const char *tag = sd_device_get_tag_first(device); \
+ tag; \
+ tag = sd_device_get_tag_next(device))
+
+#define FOREACH_DEVICE_CURRENT_TAG(device, tag) \
+ for (const char *tag = sd_device_get_current_tag_first(device); \
+ tag; \
+ tag = sd_device_get_current_tag_next(device))
+
+#define FOREACH_DEVICE_SYSATTR(device, attr) \
+ for (const char *attr = sd_device_get_sysattr_first(device); \
+ attr; \
+ attr = sd_device_get_sysattr_next(device))
+
+#define FOREACH_DEVICE_DEVLINK(device, devlink) \
+ for (const char *devlink = sd_device_get_devlink_first(device); \
+ devlink; \
+ devlink = sd_device_get_devlink_next(device))
+
+#define _FOREACH_DEVICE_CHILD(device, child, suffix_ptr) \
+ for (sd_device *child = sd_device_get_child_first(device, suffix_ptr); \
+ child; \
+ child = sd_device_get_child_next(device, suffix_ptr))
+
+#define FOREACH_DEVICE_CHILD(device, child) \
+ _FOREACH_DEVICE_CHILD(device, child, NULL)
+
+#define FOREACH_DEVICE_CHILD_WITH_SUFFIX(device, child, suffix) \
+ _FOREACH_DEVICE_CHILD(device, child, &suffix)
+
+#define FOREACH_DEVICE(enumerator, device) \
+ for (sd_device *device = sd_device_enumerator_get_device_first(enumerator); \
+ device; \
+ device = sd_device_enumerator_get_device_next(enumerator))
+
+#define FOREACH_SUBSYSTEM(enumerator, device) \
+ for (sd_device *device = sd_device_enumerator_get_subsystem_first(enumerator); \
+ device; \
+ device = sd_device_enumerator_get_subsystem_next(enumerator))
+
+#define log_device_full_errno_zerook(device, level, error, ...) \
+ ({ \
+ const char *_sysname = NULL; \
+ sd_device *_d = (device); \
+ int _level = (level), _e = (error); \
+ \
+ if (_d && _unlikely_(log_get_max_level() >= LOG_PRI(_level))) \
+ (void) sd_device_get_sysname(_d, &_sysname); \
+ log_object_internal(_level, _e, PROJECT_FILE, __LINE__, __func__, \
+ _sysname ? "DEVICE=" : NULL, _sysname, \
+ NULL, NULL, __VA_ARGS__); \
+ })
+
+#define log_device_full_errno(device, level, error, ...) \
+ ({ \
+ int _error = (error); \
+ ASSERT_NON_ZERO(_error); \
+ log_device_full_errno_zerook(device, level, _error, __VA_ARGS__); \
+ })
+
+#define log_device_full(device, level, ...) (void) log_device_full_errno_zerook(device, level, 0, __VA_ARGS__)
+
+#define log_device_debug(device, ...) log_device_full(device, LOG_DEBUG, __VA_ARGS__)
+#define log_device_info(device, ...) log_device_full(device, LOG_INFO, __VA_ARGS__)
+#define log_device_notice(device, ...) log_device_full(device, LOG_NOTICE, __VA_ARGS__)
+#define log_device_warning(device, ...) log_device_full(device, LOG_WARNING, __VA_ARGS__)
+#define log_device_error(device, ...) log_device_full(device, LOG_ERR, __VA_ARGS__)
+
+#define log_device_debug_errno(device, error, ...) log_device_full_errno(device, LOG_DEBUG, error, __VA_ARGS__)
+#define log_device_info_errno(device, error, ...) log_device_full_errno(device, LOG_INFO, error, __VA_ARGS__)
+#define log_device_notice_errno(device, error, ...) log_device_full_errno(device, LOG_NOTICE, error, __VA_ARGS__)
+#define log_device_warning_errno(device, error, ...) log_device_full_errno(device, LOG_WARNING, error, __VA_ARGS__)
+#define log_device_error_errno(device, error, ...) log_device_full_errno(device, LOG_ERR, error, __VA_ARGS__)
+
+int devname_from_devnum(mode_t mode, dev_t devnum, char **ret);
+static inline int devname_from_stat_rdev(const struct stat *st, char **ret) {
+ assert(st);
+ return devname_from_devnum(st->st_mode, st->st_rdev, ret);
+}
+int device_open_from_devnum(mode_t mode, dev_t devnum, int flags, char **ret);
+
+char** device_make_log_fields(sd_device *device);
diff --git a/src/libsystemd/sd-device/sd-device.c b/src/libsystemd/sd-device/sd-device.c
new file mode 100644
index 0000000..2fbc619
--- /dev/null
+++ b/src/libsystemd/sd-device/sd-device.c
@@ -0,0 +1,2715 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <ctype.h>
+#include <net/if.h>
+#include <sys/ioctl.h>
+#include <sys/types.h>
+
+#include "sd-device.h"
+
+#include "alloc-util.h"
+#include "chase.h"
+#include "device-internal.h"
+#include "device-private.h"
+#include "device-util.h"
+#include "devnum-util.h"
+#include "dirent-util.h"
+#include "env-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "format-util.h"
+#include "fs-util.h"
+#include "hashmap.h"
+#include "id128-util.h"
+#include "macro.h"
+#include "missing_magic.h"
+#include "netlink-util.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "set.h"
+#include "socket-util.h"
+#include "stdio-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "strxcpyx.h"
+#include "user-util.h"
+
+int device_new_aux(sd_device **ret) {
+ sd_device *device;
+
+ assert(ret);
+
+ device = new(sd_device, 1);
+ if (!device)
+ return -ENOMEM;
+
+ *device = (sd_device) {
+ .n_ref = 1,
+ .devmode = MODE_INVALID,
+ .devuid = UID_INVALID,
+ .devgid = GID_INVALID,
+ .action = _SD_DEVICE_ACTION_INVALID,
+ };
+
+ *ret = device;
+ return 0;
+}
+
+static sd_device *device_free(sd_device *device) {
+ assert(device);
+
+ sd_device_unref(device->parent);
+ free(device->syspath);
+ free(device->sysname);
+ free(device->devtype);
+ free(device->devname);
+ free(device->subsystem);
+ free(device->driver_subsystem);
+ free(device->driver);
+ free(device->device_id);
+ free(device->properties_strv);
+ free(device->properties_nulstr);
+
+ ordered_hashmap_free(device->properties);
+ ordered_hashmap_free(device->properties_db);
+ hashmap_free(device->sysattr_values);
+ set_free(device->sysattrs);
+ set_free(device->all_tags);
+ set_free(device->current_tags);
+ set_free(device->devlinks);
+ hashmap_free(device->children);
+
+ return mfree(device);
+}
+
+DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(sd_device, sd_device, device_free);
+
+int device_add_property_aux(sd_device *device, const char *key, const char *value, bool db) {
+ OrderedHashmap **properties;
+
+ assert(device);
+ assert(key);
+
+ if (db)
+ properties = &device->properties_db;
+ else
+ properties = &device->properties;
+
+ if (value) {
+ _unused_ _cleanup_free_ char *old_value = NULL;
+ _cleanup_free_ char *new_key = NULL, *new_value = NULL, *old_key = NULL;
+ int r;
+
+ r = ordered_hashmap_ensure_allocated(properties, &string_hash_ops_free_free);
+ if (r < 0)
+ return r;
+
+ new_key = strdup(key);
+ if (!new_key)
+ return -ENOMEM;
+
+ new_value = strdup(value);
+ if (!new_value)
+ return -ENOMEM;
+
+ old_value = ordered_hashmap_get2(*properties, key, (void**) &old_key);
+
+ /* ordered_hashmap_replace() does not fail when the hashmap already has the entry. */
+ r = ordered_hashmap_replace(*properties, new_key, new_value);
+ if (r < 0)
+ return r;
+
+ TAKE_PTR(new_key);
+ TAKE_PTR(new_value);
+ } else {
+ _unused_ _cleanup_free_ char *old_value = NULL;
+ _cleanup_free_ char *old_key = NULL;
+
+ old_value = ordered_hashmap_remove2(*properties, key, (void**) &old_key);
+ }
+
+ if (!db) {
+ device->properties_generation++;
+ device->properties_buf_outdated = true;
+ }
+
+ return 0;
+}
+
+int device_set_syspath(sd_device *device, const char *_syspath, bool verify) {
+ _cleanup_free_ char *syspath = NULL;
+ const char *devpath;
+ int r;
+
+ assert(device);
+ assert(_syspath);
+
+ if (verify) {
+ _cleanup_close_ int fd = -EBADF;
+
+ /* The input path maybe a symlink located outside of /sys. Let's try to chase the symlink at first.
+ * The primary use case is that e.g. /proc/device-tree is a symlink to /sys/firmware/devicetree/base.
+ * By chasing symlinks in the path at first, we can call sd_device_new_from_path() with such path. */
+ r = chase(_syspath, NULL, 0, &syspath, &fd);
+ if (r == -ENOENT)
+ /* the device does not exist (any more?) */
+ return log_debug_errno(SYNTHETIC_ERRNO(ENODEV),
+ "sd-device: Failed to chase symlinks in \"%s\".", _syspath);
+ if (r < 0)
+ return log_debug_errno(r, "sd-device: Failed to get target of '%s': %m", _syspath);
+
+ if (!path_startswith(syspath, "/sys")) {
+ _cleanup_free_ char *real_sys = NULL, *new_syspath = NULL;
+ char *p;
+
+ /* /sys is a symlink to somewhere sysfs is mounted on? In that case, we convert the path to real sysfs to "/sys". */
+ r = chase("/sys", NULL, 0, &real_sys, NULL);
+ if (r < 0)
+ return log_debug_errno(r, "sd-device: Failed to chase symlink /sys: %m");
+
+ p = path_startswith(syspath, real_sys);
+ if (!p)
+ return log_debug_errno(SYNTHETIC_ERRNO(ENODEV),
+ "sd-device: Canonicalized path '%s' does not starts with sysfs mount point '%s'",
+ syspath, real_sys);
+
+ new_syspath = path_join("/sys", p);
+ if (!new_syspath)
+ return log_oom_debug();
+
+ free_and_replace(syspath, new_syspath);
+ path_simplify(syspath);
+ }
+
+ if (path_startswith(syspath, "/sys/devices/")) {
+ /* For proper devices, stricter rules apply: they must have a 'uevent' file,
+ * otherwise we won't allow them */
+
+ if (faccessat(fd, "uevent", F_OK, 0) < 0) {
+ if (errno == ENOENT)
+ /* This is not a valid device. Note, this condition is quite often
+ * satisfied when enumerating devices or finding a parent device.
+ * Hence, use log_trace_errno() here. */
+ return log_trace_errno(SYNTHETIC_ERRNO(ENODEV),
+ "sd-device: the uevent file \"%s/uevent\" does not exist.", syspath);
+ if (errno == ENOTDIR)
+ /* Not actually a directory. */
+ return log_debug_errno(SYNTHETIC_ERRNO(ENODEV),
+ "sd-device: the syspath \"%s\" is not a directory.", syspath);
+
+ return log_debug_errno(errno, "sd-device: cannot find uevent file for %s: %m", syspath);
+ }
+ } else {
+ struct stat st;
+
+ /* For everything else lax rules apply: they just need to be a directory */
+
+ if (fstat(fd, &st) < 0)
+ return log_debug_errno(errno, "sd-device: failed to check if syspath \"%s\" is a directory: %m", syspath);
+ if (!S_ISDIR(st.st_mode))
+ return log_debug_errno(SYNTHETIC_ERRNO(ENODEV),
+ "sd-device: the syspath \"%s\" is not a directory.", syspath);
+ }
+
+ /* Only operate on sysfs, i.e. refuse going down into /sys/fs/cgroup/ or similar places where
+ * things are not arranged as kobjects in kernel, and hence don't necessarily have
+ * kobject/attribute structure. */
+ r = getenv_bool_secure("SYSTEMD_DEVICE_VERIFY_SYSFS");
+ if (r < 0 && r != -ENXIO)
+ log_debug_errno(r, "Failed to parse $SYSTEMD_DEVICE_VERIFY_SYSFS value: %m");
+ if (r != 0) {
+ r = fd_is_fs_type(fd, SYSFS_MAGIC);
+ if (r < 0)
+ return log_debug_errno(r, "sd-device: failed to check if syspath \"%s\" is backed by sysfs.", syspath);
+ if (r == 0)
+ return log_debug_errno(SYNTHETIC_ERRNO(ENODEV),
+ "sd-device: the syspath \"%s\" is outside of sysfs, refusing.", syspath);
+ }
+ } else {
+ /* must be a subdirectory of /sys */
+ if (!path_startswith(_syspath, "/sys/"))
+ return log_debug_errno(SYNTHETIC_ERRNO(EINVAL),
+ "sd-device: Syspath '%s' is not a subdirectory of /sys",
+ _syspath);
+
+ r = path_simplify_alloc(_syspath, &syspath);
+ if (r < 0)
+ return r;
+ }
+
+ assert_se(devpath = startswith(syspath, "/sys"));
+ if (devpath[0] != '/')
+ return log_debug_errno(SYNTHETIC_ERRNO(ENODEV), "sd-device: \"/sys\" alone is not a valid device path.");
+
+ r = device_add_property_internal(device, "DEVPATH", devpath);
+ if (r < 0)
+ return log_debug_errno(r, "sd-device: Failed to add \"DEVPATH\" property for device \"%s\": %m", syspath);
+
+ free_and_replace(device->syspath, syspath);
+ device->devpath = devpath;
+
+ /* Unset sysname and sysnum, they will be assigned when requested. */
+ device->sysnum = NULL;
+ device->sysname = mfree(device->sysname);
+ return 0;
+}
+
+static int device_new_from_syspath(sd_device **ret, const char *syspath, bool strict) {
+ _cleanup_(sd_device_unrefp) sd_device *device = NULL;
+ int r;
+
+ assert_return(ret, -EINVAL);
+ assert_return(syspath, -EINVAL);
+
+ if (strict && !path_startswith(syspath, "/sys/"))
+ return -EINVAL;
+
+ r = device_new_aux(&device);
+ if (r < 0)
+ return r;
+
+ r = device_set_syspath(device, syspath, /* verify= */ true);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(device);
+ return 0;
+}
+
+_public_ int sd_device_new_from_syspath(sd_device **ret, const char *syspath) {
+ return device_new_from_syspath(ret, syspath, /* strict = */ true);
+}
+
+int device_new_from_mode_and_devnum(sd_device **ret, mode_t mode, dev_t devnum) {
+ _cleanup_(sd_device_unrefp) sd_device *dev = NULL;
+ _cleanup_free_ char *syspath = NULL;
+ const char *t, *subsystem = NULL;
+ dev_t n;
+ int r;
+
+ assert(ret);
+
+ if (S_ISCHR(mode))
+ t = "char";
+ else if (S_ISBLK(mode))
+ t = "block";
+ else
+ return -ENOTTY;
+
+ if (major(devnum) == 0)
+ return -ENODEV;
+
+ if (asprintf(&syspath, "/sys/dev/%s/" DEVNUM_FORMAT_STR, t, DEVNUM_FORMAT_VAL(devnum)) < 0)
+ return -ENOMEM;
+
+ r = sd_device_new_from_syspath(&dev, syspath);
+ if (r < 0)
+ return r;
+
+ r = sd_device_get_devnum(dev, &n);
+ if (r == -ENOENT)
+ return -ENXIO;
+ if (r < 0)
+ return r;
+ if (n != devnum)
+ return -ENXIO;
+
+ r = sd_device_get_subsystem(dev, &subsystem);
+ if (r < 0 && r != -ENOENT)
+ return r;
+ if (streq_ptr(subsystem, "block") != !!S_ISBLK(mode))
+ return -ENXIO;
+
+ *ret = TAKE_PTR(dev);
+ return 0;
+}
+
+_public_ int sd_device_new_from_devnum(sd_device **ret, char type, dev_t devnum) {
+ assert_return(ret, -EINVAL);
+ assert_return(IN_SET(type, 'b', 'c'), -EINVAL);
+
+ return device_new_from_mode_and_devnum(ret, type == 'b' ? S_IFBLK : S_IFCHR, devnum);
+}
+
+static int device_new_from_main_ifname(sd_device **ret, const char *ifname) {
+ const char *syspath;
+
+ assert(ret);
+ assert(ifname);
+
+ syspath = strjoina("/sys/class/net/", ifname);
+ return sd_device_new_from_syspath(ret, syspath);
+}
+
+_public_ int sd_device_new_from_ifname(sd_device **ret, const char *ifname) {
+ _cleanup_free_ char *main_name = NULL;
+ int r;
+
+ assert_return(ret, -EINVAL);
+ assert_return(ifname, -EINVAL);
+
+ r = parse_ifindex(ifname);
+ if (r > 0)
+ return sd_device_new_from_ifindex(ret, r);
+
+ if (ifname_valid(ifname)) {
+ r = device_new_from_main_ifname(ret, ifname);
+ if (r >= 0)
+ return r;
+ }
+
+ r = rtnl_resolve_link_alternative_name(NULL, ifname, &main_name);
+ if (r < 0)
+ return r;
+
+ return device_new_from_main_ifname(ret, main_name);
+}
+
+_public_ int sd_device_new_from_ifindex(sd_device **ret, int ifindex) {
+ _cleanup_(sd_device_unrefp) sd_device *dev = NULL;
+ char ifname[IF_NAMESIZE];
+ int r, i;
+
+ assert_return(ret, -EINVAL);
+ assert_return(ifindex > 0, -EINVAL);
+
+ if (format_ifname(ifindex, ifname) < 0)
+ return -ENODEV;
+
+ r = device_new_from_main_ifname(&dev, ifname);
+ if (r < 0)
+ return r;
+
+ r = sd_device_get_ifindex(dev, &i);
+ if (r == -ENOENT)
+ return -ENXIO;
+ if (r < 0)
+ return r;
+ if (i != ifindex)
+ return -ENXIO;
+
+ *ret = TAKE_PTR(dev);
+ return 0;
+}
+
+static int device_strjoin_new(
+ const char *a,
+ const char *b,
+ const char *c,
+ const char *d,
+ sd_device **ret) {
+
+ const char *p;
+ int r;
+
+ p = strjoina(a, b, c, d);
+ if (access(p, F_OK) < 0)
+ return IN_SET(errno, ENOENT, ENAMETOOLONG) ? 0 : -errno; /* If this sysfs is too long then it doesn't exist either */
+
+ r = sd_device_new_from_syspath(ret, p);
+ if (r < 0)
+ return r;
+
+ return 1;
+}
+
+_public_ int sd_device_new_from_subsystem_sysname(
+ sd_device **ret,
+ const char *subsystem,
+ const char *sysname) {
+
+ char *name;
+ int r;
+
+ assert_return(ret, -EINVAL);
+ assert_return(subsystem, -EINVAL);
+ assert_return(sysname, -EINVAL);
+
+ if (!path_is_normalized(subsystem))
+ return -EINVAL;
+ if (!path_is_normalized(sysname))
+ return -EINVAL;
+
+ /* translate sysname back to sysfs filename */
+ name = strdupa_safe(sysname);
+ string_replace_char(name, '/', '!');
+
+ if (streq(subsystem, "subsystem")) {
+ FOREACH_STRING(s, "/sys/bus/", "/sys/class/") {
+ r = device_strjoin_new(s, name, NULL, NULL, ret);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ return 0;
+ }
+
+ } else if (streq(subsystem, "module")) {
+ r = device_strjoin_new("/sys/module/", name, NULL, NULL, ret);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ return 0;
+
+ } else if (streq(subsystem, "drivers")) {
+ const char *sep;
+
+ sep = strchr(name, ':');
+ if (sep && sep[1] != '\0') { /* Require ":" and something non-empty after that. */
+
+ const char *subsys = memdupa_suffix0(name, sep - name);
+ sep++;
+
+ if (streq(sep, "drivers")) /* If the sysname is "drivers", then it's the drivers directory itself that is meant. */
+ r = device_strjoin_new("/sys/bus/", subsys, "/drivers", NULL, ret);
+ else
+ r = device_strjoin_new("/sys/bus/", subsys, "/drivers/", sep, ret);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ return 0;
+ }
+ }
+
+ r = device_strjoin_new("/sys/bus/", subsystem, "/devices/", name, ret);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ return 0;
+
+ r = device_strjoin_new("/sys/class/", subsystem, "/", name, ret);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ return 0;
+
+ r = device_strjoin_new("/sys/firmware/", subsystem, "/", name, ret);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ return 0;
+
+ return -ENODEV;
+}
+
+_public_ int sd_device_new_from_stat_rdev(sd_device **ret, const struct stat *st) {
+ assert_return(ret, -EINVAL);
+ assert_return(st, -EINVAL);
+
+ return device_new_from_mode_and_devnum(ret, st->st_mode, st->st_rdev);
+}
+
+_public_ int sd_device_new_from_devname(sd_device **ret, const char *devname) {
+ struct stat st;
+ dev_t devnum;
+ mode_t mode;
+
+ assert_return(ret, -EINVAL);
+ assert_return(devname, -EINVAL);
+
+ /* This function actually accepts both devlinks and devnames, i.e. both symlinks and device
+ * nodes below /dev/. */
+
+ /* Also ignore when the specified path is "/dev". */
+ if (isempty(path_startswith(devname, "/dev")))
+ return -EINVAL;
+
+ if (device_path_parse_major_minor(devname, &mode, &devnum) >= 0)
+ /* Let's shortcut when "/dev/block/maj:min" or "/dev/char/maj:min" is specified.
+ * In that case, we can directly convert the path to syspath, hence it is not necessary
+ * that the specified path exists. So, this works fine without udevd being running. */
+ return device_new_from_mode_and_devnum(ret, mode, devnum);
+
+ if (stat(devname, &st) < 0)
+ return ERRNO_IS_DEVICE_ABSENT(errno) ? -ENODEV : -errno;
+
+ return sd_device_new_from_stat_rdev(ret, &st);
+}
+
+_public_ int sd_device_new_from_path(sd_device **ret, const char *path) {
+ assert_return(ret, -EINVAL);
+ assert_return(path, -EINVAL);
+
+ if (path_startswith(path, "/dev"))
+ return sd_device_new_from_devname(ret, path);
+
+ return device_new_from_syspath(ret, path, /* strict = */ false);
+}
+
+int device_set_devtype(sd_device *device, const char *devtype) {
+ _cleanup_free_ char *t = NULL;
+ int r;
+
+ assert(device);
+ assert(devtype);
+
+ t = strdup(devtype);
+ if (!t)
+ return -ENOMEM;
+
+ r = device_add_property_internal(device, "DEVTYPE", t);
+ if (r < 0)
+ return r;
+
+ return free_and_replace(device->devtype, t);
+}
+
+int device_set_ifindex(sd_device *device, const char *name) {
+ int r, ifindex;
+
+ assert(device);
+ assert(name);
+
+ ifindex = parse_ifindex(name);
+ if (ifindex < 0)
+ return ifindex;
+
+ r = device_add_property_internal(device, "IFINDEX", name);
+ if (r < 0)
+ return r;
+
+ device->ifindex = ifindex;
+
+ return 0;
+}
+
+static int mangle_devname(const char *p, char **ret) {
+ char *q;
+
+ assert(p);
+ assert(ret);
+
+ if (!path_is_safe(p))
+ return -EINVAL;
+
+ /* When the path is absolute, it must start with "/dev/", but ignore "/dev/" itself. */
+ if (path_is_absolute(p)) {
+ if (isempty(path_startswith(p, "/dev/")))
+ return -EINVAL;
+
+ q = strdup(p);
+ } else
+ q = path_join("/dev/", p);
+ if (!q)
+ return -ENOMEM;
+
+ path_simplify(q);
+
+ *ret = q;
+ return 0;
+}
+
+int device_set_devname(sd_device *device, const char *devname) {
+ _cleanup_free_ char *t = NULL;
+ int r;
+
+ assert(device);
+ assert(devname);
+
+ r = mangle_devname(devname, &t);
+ if (r < 0)
+ return r;
+
+ r = device_add_property_internal(device, "DEVNAME", t);
+ if (r < 0)
+ return r;
+
+ return free_and_replace(device->devname, t);
+}
+
+int device_set_devmode(sd_device *device, const char *_devmode) {
+ unsigned devmode;
+ int r;
+
+ assert(device);
+ assert(_devmode);
+
+ r = safe_atou(_devmode, &devmode);
+ if (r < 0)
+ return r;
+
+ if (devmode > 07777)
+ return -EINVAL;
+
+ r = device_add_property_internal(device, "DEVMODE", _devmode);
+ if (r < 0)
+ return r;
+
+ device->devmode = devmode;
+
+ return 0;
+}
+
+int device_set_devnum(sd_device *device, const char *major, const char *minor) {
+ unsigned maj, min = 0;
+ int r;
+
+ assert(device);
+ assert(major);
+
+ r = safe_atou(major, &maj);
+ if (r < 0)
+ return r;
+ if (maj == 0)
+ return 0;
+ if (!DEVICE_MAJOR_VALID(maj))
+ return -EINVAL;
+
+ if (minor) {
+ r = safe_atou(minor, &min);
+ if (r < 0)
+ return r;
+ if (!DEVICE_MINOR_VALID(min))
+ return -EINVAL;
+ }
+
+ r = device_add_property_internal(device, "MAJOR", major);
+ if (r < 0)
+ return r;
+
+ if (minor) {
+ r = device_add_property_internal(device, "MINOR", minor);
+ if (r < 0)
+ return r;
+ }
+
+ device->devnum = makedev(maj, min);
+
+ return 0;
+}
+
+int device_set_diskseq(sd_device *device, const char *str) {
+ uint64_t diskseq;
+ int r;
+
+ assert(device);
+ assert(str);
+
+ r = safe_atou64(str, &diskseq);
+ if (r < 0)
+ return r;
+ if (diskseq == 0)
+ return -EINVAL;
+
+ r = device_add_property_internal(device, "DISKSEQ", str);
+ if (r < 0)
+ return r;
+
+ device->diskseq = diskseq;
+
+ return 0;
+}
+
+static int handle_uevent_line(
+ sd_device *device,
+ const char *key,
+ const char *value,
+ const char **major,
+ const char **minor) {
+
+ assert(device);
+ assert(key);
+ assert(value);
+ assert(major);
+ assert(minor);
+
+ if (streq(key, "DEVTYPE"))
+ return device_set_devtype(device, value);
+ if (streq(key, "IFINDEX"))
+ return device_set_ifindex(device, value);
+ if (streq(key, "DEVNAME"))
+ return device_set_devname(device, value);
+ if (streq(key, "DEVMODE"))
+ return device_set_devmode(device, value);
+ if (streq(key, "DISKSEQ"))
+ return device_set_diskseq(device, value);
+ if (streq(key, "MAJOR"))
+ *major = value;
+ else if (streq(key, "MINOR"))
+ *minor = value;
+ else
+ return device_add_property_internal(device, key, value);
+
+ return 0;
+}
+
+int device_read_uevent_file(sd_device *device) {
+ _cleanup_free_ char *uevent = NULL;
+ const char *syspath, *key = NULL, *value = NULL, *major = NULL, *minor = NULL;
+ char *path;
+ size_t uevent_len;
+ int r;
+
+ enum {
+ PRE_KEY,
+ KEY,
+ PRE_VALUE,
+ VALUE,
+ INVALID_LINE,
+ } state = PRE_KEY;
+
+ assert(device);
+
+ if (device->uevent_loaded || device->sealed)
+ return 0;
+
+ r = sd_device_get_syspath(device, &syspath);
+ if (r < 0)
+ return r;
+
+ device->uevent_loaded = true;
+
+ path = strjoina(syspath, "/uevent");
+
+ r = read_full_virtual_file(path, &uevent, &uevent_len);
+ if (r == -EACCES || ERRNO_IS_NEG_DEVICE_ABSENT(r))
+ /* The uevent files may be write-only, the device may be already removed, or the device
+ * may not have the uevent file. */
+ return 0;
+ if (r < 0)
+ return log_device_debug_errno(device, r, "sd-device: Failed to read uevent file '%s': %m", path);
+
+ for (size_t i = 0; i < uevent_len; i++)
+ switch (state) {
+ case PRE_KEY:
+ if (!strchr(NEWLINE, uevent[i])) {
+ key = &uevent[i];
+
+ state = KEY;
+ }
+
+ break;
+ case KEY:
+ if (uevent[i] == '=') {
+ uevent[i] = '\0';
+
+ state = PRE_VALUE;
+ } else if (strchr(NEWLINE, uevent[i])) {
+ uevent[i] = '\0';
+ log_device_debug(device, "sd-device: Invalid uevent line '%s', ignoring", key);
+
+ state = PRE_KEY;
+ }
+
+ break;
+ case PRE_VALUE:
+ value = &uevent[i];
+ state = VALUE;
+
+ _fallthrough_; /* to handle empty property */
+ case VALUE:
+ if (strchr(NEWLINE, uevent[i])) {
+ uevent[i] = '\0';
+
+ r = handle_uevent_line(device, key, value, &major, &minor);
+ if (r < 0)
+ log_device_debug_errno(device, r, "sd-device: Failed to handle uevent entry '%s=%s', ignoring: %m", key, value);
+
+ state = PRE_KEY;
+ }
+
+ break;
+ default:
+ assert_not_reached();
+ }
+
+ if (major) {
+ r = device_set_devnum(device, major, minor);
+ if (r < 0)
+ log_device_debug_errno(device, r, "sd-device: Failed to set 'MAJOR=%s' or 'MINOR=%s' from '%s', ignoring: %m", major, strna(minor), path);
+ }
+
+ return 0;
+}
+
+_public_ int sd_device_get_ifindex(sd_device *device, int *ifindex) {
+ int r;
+
+ assert_return(device, -EINVAL);
+
+ r = device_read_uevent_file(device);
+ if (r < 0)
+ return r;
+
+ if (device->ifindex <= 0)
+ return -ENOENT;
+
+ if (ifindex)
+ *ifindex = device->ifindex;
+
+ return 0;
+}
+
+_public_ int sd_device_new_from_device_id(sd_device **ret, const char *id) {
+ int r;
+
+ assert_return(ret, -EINVAL);
+ assert_return(id, -EINVAL);
+
+ switch (id[0]) {
+ case 'b':
+ case 'c': {
+ dev_t devt;
+
+ if (isempty(id))
+ return -EINVAL;
+
+ r = parse_devnum(id + 1, &devt);
+ if (r < 0)
+ return r;
+
+ return sd_device_new_from_devnum(ret, id[0], devt);
+ }
+
+ case 'n': {
+ int ifindex;
+
+ ifindex = parse_ifindex(id + 1);
+ if (ifindex < 0)
+ return ifindex;
+
+ return sd_device_new_from_ifindex(ret, ifindex);
+ }
+
+ case '+': {
+ const char *subsys, *sep;
+
+ sep = strchr(id + 1, ':');
+ if (!sep || sep - id - 1 > NAME_MAX)
+ return -EINVAL;
+
+ subsys = memdupa_suffix0(id + 1, sep - id - 1);
+
+ return sd_device_new_from_subsystem_sysname(ret, subsys, sep + 1);
+ }
+
+ default:
+ return -EINVAL;
+ }
+}
+
+_public_ int sd_device_get_syspath(sd_device *device, const char **ret) {
+ assert_return(device, -EINVAL);
+
+ assert(path_startswith(device->syspath, "/sys/"));
+
+ if (ret)
+ *ret = device->syspath;
+
+ return 0;
+}
+
+DEFINE_PRIVATE_HASH_OPS_FULL(
+ device_by_path_hash_ops,
+ char, path_hash_func, path_compare, free,
+ sd_device, sd_device_unref);
+
+static int device_enumerate_children_internal(sd_device *device, const char *subdir, Set **stack, Hashmap **children) {
+ _cleanup_closedir_ DIR *dir = NULL;
+ int r;
+
+ assert(device);
+ assert(stack);
+ assert(children);
+
+ r = device_opendir(device, subdir, &dir);
+ if (r < 0)
+ return r;
+
+ FOREACH_DIRENT_ALL(de, dir, return -errno) {
+ _cleanup_(sd_device_unrefp) sd_device *child = NULL;
+ _cleanup_free_ char *p = NULL;
+
+ if (dot_or_dot_dot(de->d_name))
+ continue;
+
+ if (!IN_SET(de->d_type, DT_LNK, DT_DIR))
+ continue;
+
+ if (subdir)
+ p = path_join(subdir, de->d_name);
+ else
+ p = strdup(de->d_name);
+ if (!p)
+ return -ENOMEM;
+
+ /* Try to create child device. */
+ r = sd_device_new_child(&child, device, p);
+ if (r >= 0) {
+ /* OK, this is a child device, saving it. */
+ r = hashmap_ensure_put(children, &device_by_path_hash_ops, p, child);
+ if (r < 0)
+ return r;
+
+ TAKE_PTR(p);
+ TAKE_PTR(child);
+ } else if (r == -ENODEV) {
+ /* This is not a child device. Push the sub-directory into stack, and read it later. */
+
+ if (de->d_type == DT_LNK)
+ /* Do not follow symlinks, otherwise, we will enter an infinite loop, e.g.,
+ * /sys/class/block/nvme0n1/subsystem/nvme0n1/subsystem/nvme0n1/subsystem/… */
+ continue;
+
+ r = set_ensure_consume(stack, &path_hash_ops_free, TAKE_PTR(p));
+ if (r < 0)
+ return r;
+ } else
+ return r;
+ }
+
+ return 0;
+}
+
+static int device_enumerate_children(sd_device *device) {
+ _cleanup_hashmap_free_ Hashmap *children = NULL;
+ _cleanup_set_free_ Set *stack = NULL;
+ int r;
+
+ assert(device);
+
+ if (device->children_enumerated)
+ return 0; /* Already enumerated. */
+
+ r = device_enumerate_children_internal(device, NULL, &stack, &children);
+ if (r < 0)
+ return r;
+
+ for (;;) {
+ _cleanup_free_ char *subdir = NULL;
+
+ subdir = set_steal_first(stack);
+ if (!subdir)
+ break;
+
+ r = device_enumerate_children_internal(device, subdir, &stack, &children);
+ if (r < 0)
+ return r;
+ }
+
+ device->children_enumerated = true;
+ device->children = TAKE_PTR(children);
+ return 1; /* Enumerated. */
+}
+
+_public_ sd_device *sd_device_get_child_first(sd_device *device, const char **ret_suffix) {
+ int r;
+
+ assert(device);
+
+ r = device_enumerate_children(device);
+ if (r < 0) {
+ log_device_debug_errno(device, r, "sd-device: failed to enumerate child devices: %m");
+ if (ret_suffix)
+ *ret_suffix = NULL;
+ return NULL;
+ }
+
+ device->children_iterator = ITERATOR_FIRST;
+
+ return sd_device_get_child_next(device, ret_suffix);
+}
+
+_public_ sd_device *sd_device_get_child_next(sd_device *device, const char **ret_suffix) {
+ sd_device *child;
+
+ assert(device);
+
+ (void) hashmap_iterate(device->children, &device->children_iterator, (void**) &child, (const void**) ret_suffix);
+ return child;
+}
+
+_public_ int sd_device_new_child(sd_device **ret, sd_device *device, const char *suffix) {
+ _cleanup_free_ char *path = NULL;
+ sd_device *child;
+ const char *s;
+ int r;
+
+ assert_return(ret, -EINVAL);
+ assert_return(device, -EINVAL);
+ assert_return(suffix, -EINVAL);
+
+ if (!path_is_safe(suffix))
+ return -EINVAL;
+
+ /* If we have already enumerated children, try to find the child from the cache. */
+ child = hashmap_get(device->children, suffix);
+ if (child) {
+ *ret = sd_device_ref(child);
+ return 0;
+ }
+
+ r = sd_device_get_syspath(device, &s);
+ if (r < 0)
+ return r;
+
+ path = path_join(s, suffix);
+ if (!path)
+ return -ENOMEM;
+
+ return sd_device_new_from_syspath(ret, path);
+}
+
+static int device_new_from_child(sd_device **ret, sd_device *child) {
+ _cleanup_free_ char *path = NULL;
+ const char *syspath;
+ int r;
+
+ assert(ret);
+ assert(child);
+
+ r = sd_device_get_syspath(child, &syspath);
+ if (r < 0)
+ return r;
+
+ for (;;) {
+ _cleanup_free_ char *p = NULL;
+
+ r = path_extract_directory(path ?: syspath, &p);
+ if (r < 0)
+ return r;
+
+ if (path_equal(p, "/sys"))
+ return -ENODEV;
+
+ r = sd_device_new_from_syspath(ret, p);
+ if (r != -ENODEV)
+ return r;
+
+ free_and_replace(path, p);
+ }
+}
+
+_public_ int sd_device_get_parent(sd_device *child, sd_device **ret) {
+ int r;
+
+ assert_return(child, -EINVAL);
+
+ if (!child->parent_set) {
+ r = device_new_from_child(&child->parent, child);
+ if (r < 0 && r != -ENODEV)
+ return r;
+
+ child->parent_set = true;
+ }
+
+ if (!child->parent)
+ return -ENOENT;
+
+ if (ret)
+ *ret = child->parent;
+ return 0;
+}
+
+int device_set_subsystem(sd_device *device, const char *subsystem) {
+ _cleanup_free_ char *s = NULL;
+ int r;
+
+ assert(device);
+
+ if (subsystem) {
+ s = strdup(subsystem);
+ if (!s)
+ return -ENOMEM;
+ }
+
+ r = device_add_property_internal(device, "SUBSYSTEM", s);
+ if (r < 0)
+ return r;
+
+ device->subsystem_set = true;
+ return free_and_replace(device->subsystem, s);
+}
+
+int device_set_drivers_subsystem(sd_device *device) {
+ _cleanup_free_ char *subsystem = NULL;
+ const char *devpath, *drivers, *p;
+ int r;
+
+ assert(device);
+
+ r = sd_device_get_devpath(device, &devpath);
+ if (r < 0)
+ return r;
+
+ drivers = strstr(devpath, "/drivers/");
+ if (!drivers)
+ drivers = endswith(devpath, "/drivers");
+ if (!drivers)
+ return -EINVAL;
+
+ /* Find the path component immediately before the "/drivers/" string */
+ r = path_find_last_component(devpath, /* accept_dot_dot= */ false, &drivers, &p);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -EINVAL;
+
+ subsystem = strndup(p, r);
+ if (!subsystem)
+ return -ENOMEM;
+
+ r = device_set_subsystem(device, "drivers");
+ if (r < 0)
+ return r;
+
+ return free_and_replace(device->driver_subsystem, subsystem);
+}
+
+_public_ int sd_device_get_subsystem(sd_device *device, const char **ret) {
+ int r;
+
+ assert_return(device, -EINVAL);
+
+ if (!device->subsystem_set) {
+ _cleanup_free_ char *subsystem = NULL;
+ const char *syspath;
+ char *path;
+
+ r = sd_device_get_syspath(device, &syspath);
+ if (r < 0)
+ return r;
+
+ /* read 'subsystem' link */
+ path = strjoina(syspath, "/subsystem");
+ r = readlink_value(path, &subsystem);
+ if (r < 0 && r != -ENOENT)
+ return log_device_debug_errno(device, r,
+ "sd-device: Failed to read subsystem for %s: %m",
+ device->devpath);
+
+ if (subsystem)
+ r = device_set_subsystem(device, subsystem);
+ /* use implicit names */
+ else if (!isempty(path_startswith(device->devpath, "/module/")))
+ r = device_set_subsystem(device, "module");
+ else if (strstr(syspath, "/drivers/") || endswith(syspath, "/drivers"))
+ r = device_set_drivers_subsystem(device);
+ else if (!isempty(PATH_STARTSWITH_SET(device->devpath, "/class/", "/bus/")))
+ r = device_set_subsystem(device, "subsystem");
+ else {
+ device->subsystem_set = true;
+ r = 0;
+ }
+ if (r < 0)
+ return log_device_debug_errno(device, r,
+ "sd-device: Failed to set subsystem for %s: %m",
+ device->devpath);
+ }
+
+ if (!device->subsystem)
+ return -ENOENT;
+
+ if (ret)
+ *ret = device->subsystem;
+ return 0;
+}
+
+_public_ int sd_device_get_devtype(sd_device *device, const char **devtype) {
+ int r;
+
+ assert_return(device, -EINVAL);
+
+ r = device_read_uevent_file(device);
+ if (r < 0)
+ return r;
+
+ if (!device->devtype)
+ return -ENOENT;
+
+ if (devtype)
+ *devtype = device->devtype;
+
+ return !!device->devtype;
+}
+
+_public_ int sd_device_get_parent_with_subsystem_devtype(sd_device *child, const char *subsystem, const char *devtype, sd_device **ret) {
+ sd_device *parent = NULL;
+ int r;
+
+ assert_return(child, -EINVAL);
+ assert_return(subsystem, -EINVAL);
+
+ r = sd_device_get_parent(child, &parent);
+ while (r >= 0) {
+ const char *parent_subsystem = NULL;
+
+ (void) sd_device_get_subsystem(parent, &parent_subsystem);
+ if (streq_ptr(parent_subsystem, subsystem)) {
+ const char *parent_devtype = NULL;
+
+ if (!devtype)
+ break;
+
+ (void) sd_device_get_devtype(parent, &parent_devtype);
+ if (streq_ptr(parent_devtype, devtype))
+ break;
+ }
+ r = sd_device_get_parent(parent, &parent);
+ }
+
+ if (r < 0)
+ return r;
+
+ if (ret)
+ *ret = parent;
+ return 0;
+}
+
+_public_ int sd_device_get_devnum(sd_device *device, dev_t *devnum) {
+ int r;
+
+ assert_return(device, -EINVAL);
+
+ r = device_read_uevent_file(device);
+ if (r < 0)
+ return r;
+
+ if (major(device->devnum) <= 0)
+ return -ENOENT;
+
+ if (devnum)
+ *devnum = device->devnum;
+
+ return 0;
+}
+
+int device_set_driver(sd_device *device, const char *driver) {
+ _cleanup_free_ char *d = NULL;
+ int r;
+
+ assert(device);
+
+ if (driver) {
+ d = strdup(driver);
+ if (!d)
+ return -ENOMEM;
+ }
+
+ r = device_add_property_internal(device, "DRIVER", d);
+ if (r < 0)
+ return r;
+
+ device->driver_set = true;
+ return free_and_replace(device->driver, d);
+}
+
+_public_ int sd_device_get_driver(sd_device *device, const char **ret) {
+ assert_return(device, -EINVAL);
+
+ if (!device->driver_set) {
+ _cleanup_free_ char *driver = NULL;
+ const char *syspath;
+ char *path;
+ int r;
+
+ r = sd_device_get_syspath(device, &syspath);
+ if (r < 0)
+ return r;
+
+ path = strjoina(syspath, "/driver");
+ r = readlink_value(path, &driver);
+ if (r < 0 && r != -ENOENT)
+ return log_device_debug_errno(device, r,
+ "sd-device: readlink(\"%s\") failed: %m", path);
+
+ r = device_set_driver(device, driver);
+ if (r < 0)
+ return log_device_debug_errno(device, r,
+ "sd-device: Failed to set driver \"%s\": %m", driver);
+ }
+
+ if (!device->driver)
+ return -ENOENT;
+
+ if (ret)
+ *ret = device->driver;
+ return 0;
+}
+
+_public_ int sd_device_get_devpath(sd_device *device, const char **ret) {
+ assert_return(device, -EINVAL);
+
+ assert(device->devpath);
+ assert(device->devpath[0] == '/');
+
+ if (ret)
+ *ret = device->devpath;
+
+ return 0;
+}
+
+_public_ int sd_device_get_devname(sd_device *device, const char **devname) {
+ int r;
+
+ assert_return(device, -EINVAL);
+
+ r = device_read_uevent_file(device);
+ if (r < 0)
+ return r;
+
+ if (!device->devname)
+ return -ENOENT;
+
+ assert(!isempty(path_startswith(device->devname, "/dev/")));
+
+ if (devname)
+ *devname = device->devname;
+ return 0;
+}
+
+static int device_set_sysname_and_sysnum(sd_device *device) {
+ _cleanup_free_ char *sysname = NULL;
+ size_t len, n;
+ int r;
+
+ assert(device);
+
+ r = path_extract_filename(device->devpath, &sysname);
+ if (r < 0)
+ return r;
+ if (r == O_DIRECTORY)
+ return -EINVAL;
+
+ /* some devices have '!' in their name, change that to '/' */
+ string_replace_char(sysname, '!', '/');
+
+ n = strspn_from_end(sysname, DIGITS);
+ len = strlen(sysname);
+ assert(n <= len);
+ if (n == len)
+ n = 0; /* Do not set sysnum for number only sysname. */
+
+ device->sysnum = n > 0 ? sysname + len - n : NULL;
+ return free_and_replace(device->sysname, sysname);
+}
+
+_public_ int sd_device_get_sysname(sd_device *device, const char **ret) {
+ int r;
+
+ assert_return(device, -EINVAL);
+
+ if (!device->sysname) {
+ r = device_set_sysname_and_sysnum(device);
+ if (r < 0)
+ return r;
+ }
+
+ if (ret)
+ *ret = device->sysname;
+ return 0;
+}
+
+_public_ int sd_device_get_sysnum(sd_device *device, const char **ret) {
+ int r;
+
+ assert_return(device, -EINVAL);
+
+ if (!device->sysname) {
+ r = device_set_sysname_and_sysnum(device);
+ if (r < 0)
+ return r;
+ }
+
+ if (!device->sysnum)
+ return -ENOENT;
+
+ if (ret)
+ *ret = device->sysnum;
+ return 0;
+}
+
+_public_ int sd_device_get_action(sd_device *device, sd_device_action_t *ret) {
+ assert_return(device, -EINVAL);
+
+ if (device->action < 0)
+ return -ENOENT;
+
+ if (ret)
+ *ret = device->action;
+
+ return 0;
+}
+
+_public_ int sd_device_get_seqnum(sd_device *device, uint64_t *ret) {
+ assert_return(device, -EINVAL);
+
+ if (device->seqnum == 0)
+ return -ENOENT;
+
+ if (ret)
+ *ret = device->seqnum;
+
+ return 0;
+}
+
+_public_ int sd_device_get_diskseq(sd_device *device, uint64_t *ret) {
+ int r;
+
+ assert_return(device, -EINVAL);
+
+ r = device_read_uevent_file(device);
+ if (r < 0)
+ return r;
+
+ if (device->diskseq == 0)
+ return -ENOENT;
+
+ if (ret)
+ *ret = device->diskseq;
+
+ return 0;
+}
+
+static bool is_valid_tag(const char *tag) {
+ assert(tag);
+
+ return in_charset(tag, ALPHANUMERICAL "-_") && filename_is_valid(tag);
+}
+
+int device_add_tag(sd_device *device, const char *tag, bool both) {
+ int r, added;
+
+ assert(device);
+ assert(tag);
+
+ if (!is_valid_tag(tag))
+ return -EINVAL;
+
+ /* Definitely add to the "all" list of tags (i.e. the sticky list) */
+ added = set_put_strdup(&device->all_tags, tag);
+ if (added < 0)
+ return added;
+
+ /* And optionally, also add it to the current list of tags */
+ if (both) {
+ r = set_put_strdup(&device->current_tags, tag);
+ if (r < 0) {
+ if (added > 0)
+ (void) set_remove(device->all_tags, tag);
+
+ return r;
+ }
+ }
+
+ device->tags_generation++;
+ device->property_tags_outdated = true;
+
+ return 0;
+}
+
+int device_add_devlink(sd_device *device, const char *devlink) {
+ char *p;
+ int r;
+
+ assert(device);
+ assert(devlink);
+
+ r = mangle_devname(devlink, &p);
+ if (r < 0)
+ return r;
+
+ r = set_ensure_consume(&device->devlinks, &path_hash_ops_free, p);
+ if (r < 0)
+ return r;
+
+ device->devlinks_generation++;
+ device->property_devlinks_outdated = true;
+
+ return r; /* return 1 when newly added, 0 when already exists */
+}
+
+int device_remove_devlink(sd_device *device, const char *devlink) {
+ _cleanup_free_ char *p = NULL, *s = NULL;
+ int r;
+
+ assert(device);
+ assert(devlink);
+
+ r = mangle_devname(devlink, &p);
+ if (r < 0)
+ return r;
+
+ s = set_remove(device->devlinks, p);
+ if (!s)
+ return 0; /* does not exist */
+
+ device->devlinks_generation++;
+ device->property_devlinks_outdated = true;
+ return 1; /* removed */
+}
+
+bool device_has_devlink(sd_device *device, const char *devlink) {
+ assert(device);
+ assert(devlink);
+
+ return set_contains(device->devlinks, devlink);
+}
+
+static int device_add_property_internal_from_string(sd_device *device, const char *str) {
+ _cleanup_free_ char *key = NULL;
+ char *value;
+ int r;
+
+ assert(device);
+ assert(str);
+
+ key = strdup(str);
+ if (!key)
+ return -ENOMEM;
+
+ value = strchr(key, '=');
+ if (!value)
+ return -EINVAL;
+
+ *value = '\0';
+
+ if (isempty(++value))
+ value = NULL;
+
+ /* Add the property to both sd_device::properties and sd_device::properties_db,
+ * as this is called by only handle_db_line(). */
+ r = device_add_property_aux(device, key, value, false);
+ if (r < 0)
+ return r;
+
+ return device_add_property_aux(device, key, value, true);
+}
+
+int device_set_usec_initialized(sd_device *device, usec_t when) {
+ char s[DECIMAL_STR_MAX(usec_t)];
+ int r;
+
+ assert(device);
+
+ xsprintf(s, USEC_FMT, when);
+
+ r = device_add_property_internal(device, "USEC_INITIALIZED", s);
+ if (r < 0)
+ return r;
+
+ device->usec_initialized = when;
+ return 0;
+}
+
+static int handle_db_line(sd_device *device, char key, const char *value) {
+ int r;
+
+ assert(device);
+ assert(value);
+
+ switch (key) {
+ case 'G': /* Any tag */
+ case 'Q': /* Current tag */
+ return device_add_tag(device, value, key == 'Q');
+
+ case 'S': {
+ const char *path;
+
+ path = strjoina("/dev/", value);
+ return device_add_devlink(device, path);
+ }
+ case 'E':
+ return device_add_property_internal_from_string(device, value);
+
+ case 'I': {
+ usec_t t;
+
+ r = safe_atou64(value, &t);
+ if (r < 0)
+ return r;
+
+ return device_set_usec_initialized(device, t);
+ }
+ case 'L':
+ return safe_atoi(value, &device->devlink_priority);
+
+ case 'W':
+ /* Deprecated. Previously, watch handle is both saved in database and /run/udev/watch.
+ * However, the handle saved in database may not be updated when the handle is updated
+ * or removed. Moreover, it is not necessary to store the handle within the database,
+ * as its value becomes meaningless when udevd is restarted. */
+ return 0;
+
+ case 'V':
+ return safe_atou(value, &device->database_version);
+
+ default:
+ log_device_debug(device, "sd-device: Unknown key '%c' in device db, ignoring", key);
+ return 0;
+ }
+}
+
+int device_get_device_id(sd_device *device, const char **ret) {
+ assert(device);
+ assert(ret);
+
+ if (!device->device_id) {
+ _cleanup_free_ char *id = NULL;
+ const char *subsystem;
+ dev_t devnum;
+ int ifindex, r;
+
+ r = sd_device_get_subsystem(device, &subsystem);
+ if (r < 0)
+ return r;
+
+ if (sd_device_get_devnum(device, &devnum) >= 0) {
+ /* use dev_t — b259:131072, c254:0 */
+ if (asprintf(&id, "%c" DEVNUM_FORMAT_STR,
+ streq(subsystem, "block") ? 'b' : 'c',
+ DEVNUM_FORMAT_VAL(devnum)) < 0)
+ return -ENOMEM;
+ } else if (sd_device_get_ifindex(device, &ifindex) >= 0) {
+ /* use netdev ifindex — n3 */
+ if (asprintf(&id, "n%u", (unsigned) ifindex) < 0)
+ return -ENOMEM;
+ } else {
+ _cleanup_free_ char *sysname = NULL;
+
+ /* use $subsys:$sysname — pci:0000:00:1f.2
+ * sd_device_get_sysname() has '!' translated, get it from devpath */
+ r = path_extract_filename(device->devpath, &sysname);
+ if (r < 0)
+ return r;
+ if (r == O_DIRECTORY)
+ return -EINVAL;
+
+ if (streq(subsystem, "drivers")) {
+ /* the 'drivers' pseudo-subsystem is special, and needs the real
+ * subsystem encoded as well */
+ assert(device->driver_subsystem);
+ id = strjoin("+drivers:", device->driver_subsystem, ":", sysname);
+ } else
+ id = strjoin("+", subsystem, ":", sysname);
+ if (!id)
+ return -ENOMEM;
+ }
+
+ if (!filename_is_valid(id))
+ return -EINVAL;
+
+ device->device_id = TAKE_PTR(id);
+ }
+
+ *ret = device->device_id;
+ return 0;
+}
+
+int device_read_db_internal_filename(sd_device *device, const char *filename) {
+ _cleanup_free_ char *db = NULL;
+ const char *value;
+ size_t db_len;
+ char key = '\0'; /* Unnecessary initialization to appease gcc-12.0.0-0.4.fc36 */
+ int r;
+
+ enum {
+ PRE_KEY,
+ KEY,
+ PRE_VALUE,
+ VALUE,
+ INVALID_LINE,
+ } state = PRE_KEY;
+
+ assert(device);
+ assert(filename);
+
+ r = read_full_file(filename, &db, &db_len);
+ if (r < 0) {
+ if (r == -ENOENT)
+ return 0;
+
+ return log_device_debug_errno(device, r, "sd-device: Failed to read db '%s': %m", filename);
+ }
+
+ /* devices with a database entry are initialized */
+ device->is_initialized = true;
+
+ device->db_loaded = true;
+
+ for (size_t i = 0; i < db_len; i++)
+ switch (state) {
+ case PRE_KEY:
+ if (!strchr(NEWLINE, db[i])) {
+ key = db[i];
+
+ state = KEY;
+ }
+
+ break;
+ case KEY:
+ if (db[i] != ':') {
+ log_device_debug(device, "sd-device: Invalid db entry with key '%c', ignoring", key);
+
+ state = INVALID_LINE;
+ } else {
+ db[i] = '\0';
+
+ state = PRE_VALUE;
+ }
+
+ break;
+ case PRE_VALUE:
+ value = &db[i];
+
+ state = VALUE;
+
+ break;
+ case INVALID_LINE:
+ if (strchr(NEWLINE, db[i]))
+ state = PRE_KEY;
+
+ break;
+ case VALUE:
+ if (strchr(NEWLINE, db[i])) {
+ db[i] = '\0';
+ r = handle_db_line(device, key, value);
+ if (r < 0)
+ log_device_debug_errno(device, r, "sd-device: Failed to handle db entry '%c:%s', ignoring: %m",
+ key, value);
+
+ state = PRE_KEY;
+ }
+
+ break;
+ default:
+ return log_device_debug_errno(device, SYNTHETIC_ERRNO(EINVAL), "sd-device: invalid db syntax.");
+ }
+
+ return 0;
+}
+
+int device_read_db_internal(sd_device *device, bool force) {
+ const char *id, *path;
+ int r;
+
+ assert(device);
+
+ if (device->db_loaded || (!force && device->sealed))
+ return 0;
+
+ r = device_get_device_id(device, &id);
+ if (r < 0)
+ return r;
+
+ path = strjoina("/run/udev/data/", id);
+
+ return device_read_db_internal_filename(device, path);
+}
+
+_public_ int sd_device_get_is_initialized(sd_device *device) {
+ int r;
+
+ assert_return(device, -EINVAL);
+
+ r = device_read_db(device);
+ if (r == -ENOENT)
+ /* The device may be already removed or renamed. */
+ return false;
+ if (r < 0)
+ return r;
+
+ return device->is_initialized;
+}
+
+_public_ int sd_device_get_usec_initialized(sd_device *device, uint64_t *ret) {
+ int r;
+
+ assert_return(device, -EINVAL);
+
+ r = sd_device_get_is_initialized(device);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -EBUSY;
+
+ if (device->usec_initialized == 0)
+ return -ENODATA;
+
+ if (ret)
+ *ret = device->usec_initialized;
+
+ return 0;
+}
+
+_public_ int sd_device_get_usec_since_initialized(sd_device *device, uint64_t *ret) {
+ usec_t now_ts, ts;
+ int r;
+
+ assert_return(device, -EINVAL);
+
+ r = sd_device_get_usec_initialized(device, &ts);
+ if (r < 0)
+ return r;
+
+ now_ts = now(CLOCK_MONOTONIC);
+
+ if (now_ts < ts)
+ return -EIO;
+
+ if (ret)
+ *ret = usec_sub_unsigned(now_ts, ts);
+
+ return 0;
+}
+
+_public_ const char *sd_device_get_tag_first(sd_device *device) {
+ void *v;
+
+ assert_return(device, NULL);
+
+ (void) device_read_db(device);
+
+ device->all_tags_iterator_generation = device->tags_generation;
+ device->all_tags_iterator = ITERATOR_FIRST;
+
+ (void) set_iterate(device->all_tags, &device->all_tags_iterator, &v);
+ return v;
+}
+
+_public_ const char *sd_device_get_tag_next(sd_device *device) {
+ void *v;
+
+ assert_return(device, NULL);
+
+ (void) device_read_db(device);
+
+ if (device->all_tags_iterator_generation != device->tags_generation)
+ return NULL;
+
+ (void) set_iterate(device->all_tags, &device->all_tags_iterator, &v);
+ return v;
+}
+
+static bool device_database_supports_current_tags(sd_device *device) {
+ assert(device);
+
+ (void) device_read_db(device);
+
+ /* The current tags (saved in Q field) feature is implemented in database version 1.
+ * If the database version is 0, then the tags (NOT current tags, saved in G field) are not
+ * sticky. Thus, we can safely bypass the operations for the current tags (Q) to tags (G). */
+
+ return device->database_version >= 1;
+}
+
+_public_ const char *sd_device_get_current_tag_first(sd_device *device) {
+ void *v;
+
+ assert_return(device, NULL);
+
+ if (!device_database_supports_current_tags(device))
+ return sd_device_get_tag_first(device);
+
+ (void) device_read_db(device);
+
+ device->current_tags_iterator_generation = device->tags_generation;
+ device->current_tags_iterator = ITERATOR_FIRST;
+
+ (void) set_iterate(device->current_tags, &device->current_tags_iterator, &v);
+ return v;
+}
+
+_public_ const char *sd_device_get_current_tag_next(sd_device *device) {
+ void *v;
+
+ assert_return(device, NULL);
+
+ if (!device_database_supports_current_tags(device))
+ return sd_device_get_tag_next(device);
+
+ (void) device_read_db(device);
+
+ if (device->current_tags_iterator_generation != device->tags_generation)
+ return NULL;
+
+ (void) set_iterate(device->current_tags, &device->current_tags_iterator, &v);
+ return v;
+}
+
+_public_ const char *sd_device_get_devlink_first(sd_device *device) {
+ void *v;
+
+ assert_return(device, NULL);
+
+ (void) device_read_db(device);
+
+ device->devlinks_iterator_generation = device->devlinks_generation;
+ device->devlinks_iterator = ITERATOR_FIRST;
+
+ (void) set_iterate(device->devlinks, &device->devlinks_iterator, &v);
+ return v;
+}
+
+_public_ const char *sd_device_get_devlink_next(sd_device *device) {
+ void *v;
+
+ assert_return(device, NULL);
+
+ (void) device_read_db(device);
+
+ if (device->devlinks_iterator_generation != device->devlinks_generation)
+ return NULL;
+
+ (void) set_iterate(device->devlinks, &device->devlinks_iterator, &v);
+ return v;
+}
+
+int device_properties_prepare(sd_device *device) {
+ int r;
+
+ assert(device);
+
+ r = device_read_uevent_file(device);
+ if (r < 0)
+ return r;
+
+ r = device_read_db(device);
+ if (r < 0)
+ return r;
+
+ if (device->property_devlinks_outdated) {
+ _cleanup_free_ char *devlinks = NULL;
+
+ r = set_strjoin(device->devlinks, " ", false, &devlinks);
+ if (r < 0)
+ return r;
+
+ if (!isempty(devlinks)) {
+ r = device_add_property_internal(device, "DEVLINKS", devlinks);
+ if (r < 0)
+ return r;
+ }
+
+ device->property_devlinks_outdated = false;
+ }
+
+ if (device->property_tags_outdated) {
+ _cleanup_free_ char *tags = NULL;
+
+ r = set_strjoin(device->all_tags, ":", true, &tags);
+ if (r < 0)
+ return r;
+
+ if (!isempty(tags)) {
+ r = device_add_property_internal(device, "TAGS", tags);
+ if (r < 0)
+ return r;
+ }
+
+ tags = mfree(tags);
+ r = set_strjoin(device->current_tags, ":", true, &tags);
+ if (r < 0)
+ return r;
+
+ if (!isempty(tags)) {
+ r = device_add_property_internal(device, "CURRENT_TAGS", tags);
+ if (r < 0)
+ return r;
+ }
+
+ device->property_tags_outdated = false;
+ }
+
+ return 0;
+}
+
+_public_ const char *sd_device_get_property_first(sd_device *device, const char **_value) {
+ const char *key;
+ int r;
+
+ assert_return(device, NULL);
+
+ r = device_properties_prepare(device);
+ if (r < 0)
+ return NULL;
+
+ device->properties_iterator_generation = device->properties_generation;
+ device->properties_iterator = ITERATOR_FIRST;
+
+ (void) ordered_hashmap_iterate(device->properties, &device->properties_iterator, (void**)_value, (const void**)&key);
+ return key;
+}
+
+_public_ const char *sd_device_get_property_next(sd_device *device, const char **_value) {
+ const char *key;
+ int r;
+
+ assert_return(device, NULL);
+
+ r = device_properties_prepare(device);
+ if (r < 0)
+ return NULL;
+
+ if (device->properties_iterator_generation != device->properties_generation)
+ return NULL;
+
+ (void) ordered_hashmap_iterate(device->properties, &device->properties_iterator, (void**)_value, (const void**)&key);
+ return key;
+}
+
+static int device_sysattrs_read_all_internal(sd_device *device, const char *subdir, Set **stack) {
+ _cleanup_closedir_ DIR *dir = NULL;
+ int r;
+
+ assert(device);
+ assert(stack);
+
+ r = device_opendir(device, subdir, &dir);
+ if (r == -ENOENT && subdir)
+ return 0; /* Maybe, this is a child device, and is already removed. */
+ if (r < 0)
+ return r;
+
+ if (subdir) {
+ if (faccessat(dirfd(dir), "uevent", F_OK, 0) >= 0)
+ return 0; /* this is a child device, skipping */
+ if (errno != ENOENT) {
+ log_device_debug_errno(device, errno,
+ "sd-device: Failed to access %s/uevent, ignoring sub-directory %s: %m",
+ subdir, subdir);
+ return 0;
+ }
+ }
+
+ FOREACH_DIRENT_ALL(de, dir, return -errno) {
+ _cleanup_free_ char *p = NULL;
+ struct stat statbuf;
+
+ if (dot_or_dot_dot(de->d_name))
+ continue;
+
+ /* only handle symlinks, regular files, and directories */
+ if (!IN_SET(de->d_type, DT_LNK, DT_REG, DT_DIR))
+ continue;
+
+ if (subdir) {
+ p = path_join(subdir, de->d_name);
+ if (!p)
+ return -ENOMEM;
+ }
+
+ if (de->d_type == DT_DIR) {
+ /* push the sub-directory into the stack, and read it later. */
+ if (p)
+ r = set_ensure_consume(stack, &path_hash_ops_free, TAKE_PTR(p));
+ else
+ r = set_put_strdup_full(stack, &path_hash_ops_free, de->d_name);
+ if (r < 0)
+ return r;
+
+ continue;
+ }
+
+ if (fstatat(dirfd(dir), de->d_name, &statbuf, AT_SYMLINK_NOFOLLOW) < 0)
+ continue;
+
+ if ((statbuf.st_mode & (S_IRUSR | S_IWUSR)) == 0)
+ continue;
+
+ if (p)
+ r = set_ensure_consume(&device->sysattrs, &path_hash_ops_free, TAKE_PTR(p));
+ else
+ r = set_put_strdup_full(&device->sysattrs, &path_hash_ops_free, de->d_name);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+static int device_sysattrs_read_all(sd_device *device) {
+ _cleanup_set_free_ Set *stack = NULL;
+ int r;
+
+ assert(device);
+
+ if (device->sysattrs_read)
+ return 0;
+
+ r = device_sysattrs_read_all_internal(device, NULL, &stack);
+ if (r < 0)
+ return r;
+
+ for (;;) {
+ _cleanup_free_ char *subdir = NULL;
+
+ subdir = set_steal_first(stack);
+ if (!subdir)
+ break;
+
+ r = device_sysattrs_read_all_internal(device, subdir, &stack);
+ if (r < 0)
+ return r;
+ }
+
+ device->sysattrs_read = true;
+
+ return 0;
+}
+
+_public_ const char *sd_device_get_sysattr_first(sd_device *device) {
+ void *v;
+ int r;
+
+ assert_return(device, NULL);
+
+ if (!device->sysattrs_read) {
+ r = device_sysattrs_read_all(device);
+ if (r < 0) {
+ errno = -r;
+ return NULL;
+ }
+ }
+
+ device->sysattrs_iterator = ITERATOR_FIRST;
+
+ (void) set_iterate(device->sysattrs, &device->sysattrs_iterator, &v);
+ return v;
+}
+
+_public_ const char *sd_device_get_sysattr_next(sd_device *device) {
+ void *v;
+
+ assert_return(device, NULL);
+
+ if (!device->sysattrs_read)
+ return NULL;
+
+ (void) set_iterate(device->sysattrs, &device->sysattrs_iterator, &v);
+ return v;
+}
+
+_public_ int sd_device_has_tag(sd_device *device, const char *tag) {
+ assert_return(device, -EINVAL);
+ assert_return(tag, -EINVAL);
+
+ (void) device_read_db(device);
+
+ return set_contains(device->all_tags, tag);
+}
+
+_public_ int sd_device_has_current_tag(sd_device *device, const char *tag) {
+ assert_return(device, -EINVAL);
+ assert_return(tag, -EINVAL);
+
+ if (!device_database_supports_current_tags(device))
+ return sd_device_has_tag(device, tag);
+
+ (void) device_read_db(device);
+
+ return set_contains(device->current_tags, tag);
+}
+
+_public_ int sd_device_get_property_value(sd_device *device, const char *key, const char **ret_value) {
+ const char *value;
+ int r;
+
+ assert_return(device, -EINVAL);
+ assert_return(key, -EINVAL);
+
+ r = device_properties_prepare(device);
+ if (r < 0)
+ return r;
+
+ value = ordered_hashmap_get(device->properties, key);
+ if (!value)
+ return -ENOENT;
+
+ if (ret_value)
+ *ret_value = value;
+ return 0;
+}
+
+int device_get_property_bool(sd_device *device, const char *key) {
+ const char *value;
+ int r;
+
+ assert(device);
+ assert(key);
+
+ r = sd_device_get_property_value(device, key, &value);
+ if (r < 0)
+ return r;
+
+ return parse_boolean(value);
+}
+
+int device_get_property_int(sd_device *device, const char *key, int *ret) {
+ const char *value;
+ int r, v;
+
+ assert(device);
+ assert(key);
+
+ r = sd_device_get_property_value(device, key, &value);
+ if (r < 0)
+ return r;
+
+ r = safe_atoi(value, &v);
+ if (r < 0)
+ return r;
+
+ if (ret)
+ *ret = v;
+ return 0;
+}
+
+_public_ int sd_device_get_trigger_uuid(sd_device *device, sd_id128_t *ret) {
+ const char *s;
+ sd_id128_t id;
+ int r;
+
+ assert_return(device, -EINVAL);
+
+ /* Retrieves the UUID attached to a uevent when triggering it from userspace via
+ * sd_device_trigger_with_uuid() or an equivalent interface. Returns -ENOENT if the record is not
+ * caused by a synthetic event and -ENODATA if it was but no UUID was specified */
+
+ r = sd_device_get_property_value(device, "SYNTH_UUID", &s);
+ if (r < 0)
+ return r;
+
+ if (streq(s, "0")) /* SYNTH_UUID=0 is set whenever a device is triggered by userspace without specifying a UUID */
+ return -ENODATA;
+
+ r = sd_id128_from_string(s, &id);
+ if (r < 0)
+ return r;
+
+ if (ret)
+ *ret = id;
+
+ return 0;
+}
+
+void device_clear_sysattr_cache(sd_device *device) {
+ device->sysattr_values = hashmap_free(device->sysattr_values);
+}
+
+int device_cache_sysattr_value(sd_device *device, const char *key, char *value) {
+ _unused_ _cleanup_free_ char *old_value = NULL;
+ _cleanup_free_ char *new_key = NULL;
+ int r;
+
+ assert(device);
+ assert(key);
+
+ /* This takes the reference of the input value. The input value may be NULL.
+ * This replaces the value if it already exists. */
+
+ /* First, remove the old cache entry. So, we do not need to clear cache on error. */
+ old_value = hashmap_remove2(device->sysattr_values, key, (void **) &new_key);
+ if (!new_key) {
+ new_key = strdup(key);
+ if (!new_key)
+ return -ENOMEM;
+ }
+
+ r = hashmap_ensure_put(&device->sysattr_values, &path_hash_ops_free_free, new_key, value);
+ if (r < 0)
+ return r;
+
+ TAKE_PTR(new_key);
+
+ return 0;
+}
+
+int device_get_cached_sysattr_value(sd_device *device, const char *key, const char **ret_value) {
+ const char *k = NULL, *value;
+
+ assert(device);
+ assert(key);
+
+ value = hashmap_get2(device->sysattr_values, key, (void **) &k);
+ if (!k)
+ return -ESTALE; /* We have not read the attribute. */
+ if (!value)
+ return -ENOENT; /* We have looked up the attribute before and it did not exist. */
+ if (ret_value)
+ *ret_value = value;
+ return 0;
+}
+
+/* We cache all sysattr lookups. If an attribute does not exist, it is stored
+ * with a NULL value in the cache, otherwise the returned string is stored */
+_public_ int sd_device_get_sysattr_value(sd_device *device, const char *sysattr, const char **ret_value) {
+ _cleanup_free_ char *value = NULL, *path = NULL;
+ const char *syspath;
+ struct stat statbuf;
+ int r;
+
+ assert_return(device, -EINVAL);
+ assert_return(sysattr, -EINVAL);
+
+ /* look for possibly already cached result */
+ r = device_get_cached_sysattr_value(device, sysattr, ret_value);
+ if (r != -ESTALE)
+ return r;
+
+ r = sd_device_get_syspath(device, &syspath);
+ if (r < 0)
+ return r;
+
+ path = path_join(syspath, sysattr);
+ if (!path)
+ return -ENOMEM;
+
+ if (lstat(path, &statbuf) < 0) {
+ int k;
+
+ r = -errno;
+
+ /* remember that we could not access the sysattr */
+ k = device_cache_sysattr_value(device, sysattr, NULL);
+ if (k < 0)
+ log_device_debug_errno(device, k,
+ "sd-device: failed to cache attribute '%s' with NULL, ignoring: %m",
+ sysattr);
+
+ return r;
+ } else if (S_ISLNK(statbuf.st_mode)) {
+ /* Some core links return only the last element of the target path,
+ * these are just values, the paths should not be exposed. */
+ if (STR_IN_SET(sysattr, "driver", "subsystem", "module")) {
+ r = readlink_value(path, &value);
+ if (r < 0)
+ return r;
+ } else
+ return -EINVAL;
+ } else if (S_ISDIR(statbuf.st_mode))
+ /* skip directories */
+ return -EISDIR;
+ else if (!(statbuf.st_mode & S_IRUSR))
+ /* skip non-readable files */
+ return -EPERM;
+ else {
+ size_t size;
+
+ /* Read attribute value, Some attributes contain embedded '\0'. So, it is necessary to
+ * also get the size of the result. See issue #20025. */
+ r = read_full_virtual_file(path, &value, &size);
+ if (r < 0)
+ return r;
+
+ /* drop trailing newlines */
+ while (size > 0 && strchr(NEWLINE, value[--size]))
+ value[size] = '\0';
+ }
+
+ /* Unfortunately, we need to return 'const char*' instead of 'char*'. Hence, failure in caching
+ * sysattr value is critical unlike the other places. */
+ r = device_cache_sysattr_value(device, sysattr, value);
+ if (r < 0) {
+ log_device_debug_errno(device, r,
+ "sd-device: failed to cache attribute '%s' with '%s'%s: %m",
+ sysattr, value, ret_value ? "" : ", ignoring");
+ if (ret_value)
+ return r;
+
+ return 0;
+ }
+
+ if (ret_value)
+ *ret_value = value;
+
+ TAKE_PTR(value);
+ return 0;
+}
+
+int device_get_sysattr_int(sd_device *device, const char *sysattr, int *ret_value) {
+ const char *value;
+ int r;
+
+ r = sd_device_get_sysattr_value(device, sysattr, &value);
+ if (r < 0)
+ return r;
+
+ int v;
+ r = safe_atoi(value, &v);
+ if (r < 0)
+ return log_device_debug_errno(device, r, "Failed to parse '%s' attribute: %m", sysattr);
+
+ if (ret_value)
+ *ret_value = v;
+ /* We return "true" if the value is positive. */
+ return v > 0;
+}
+
+int device_get_sysattr_unsigned(sd_device *device, const char *sysattr, unsigned *ret_value) {
+ const char *value;
+ int r;
+
+ r = sd_device_get_sysattr_value(device, sysattr, &value);
+ if (r < 0)
+ return r;
+
+ unsigned v;
+ r = safe_atou(value, &v);
+ if (r < 0)
+ return log_device_debug_errno(device, r, "Failed to parse '%s' attribute: %m", sysattr);
+
+ if (ret_value)
+ *ret_value = v;
+ /* We return "true" if the value is positive. */
+ return v > 0;
+}
+
+int device_get_sysattr_bool(sd_device *device, const char *sysattr) {
+ const char *value;
+ int r;
+
+ assert(device);
+ assert(sysattr);
+
+ r = sd_device_get_sysattr_value(device, sysattr, &value);
+ if (r < 0)
+ return r;
+
+ return parse_boolean(value);
+}
+
+static void device_remove_cached_sysattr_value(sd_device *device, const char *_key) {
+ _cleanup_free_ char *key = NULL;
+
+ assert(device);
+ assert(_key);
+
+ free(hashmap_remove2(device->sysattr_values, _key, (void **) &key));
+}
+
+_public_ int sd_device_set_sysattr_value(sd_device *device, const char *sysattr, const char *_value) {
+ _cleanup_free_ char *value = NULL, *path = NULL;
+ const char *syspath;
+ size_t len;
+ int r;
+
+ assert_return(device, -EINVAL);
+ assert_return(sysattr, -EINVAL);
+
+ /* Set the attribute and save it in the cache. */
+
+ if (!_value) {
+ /* If input value is NULL, then clear cache and not write anything. */
+ device_remove_cached_sysattr_value(device, sysattr);
+ return 0;
+ }
+
+ r = sd_device_get_syspath(device, &syspath);
+ if (r < 0)
+ return r;
+
+ path = path_join(syspath, sysattr);
+ if (!path)
+ return -ENOMEM;
+
+ len = strlen(_value);
+
+ /* drop trailing newlines */
+ while (len > 0 && strchr(NEWLINE, _value[len - 1]))
+ len --;
+
+ /* value length is limited to 4k */
+ if (len > 4096)
+ return -EINVAL;
+
+ value = strndup(_value, len);
+ if (!value)
+ return -ENOMEM;
+
+ r = write_string_file(path, value, WRITE_STRING_FILE_DISABLE_BUFFER | WRITE_STRING_FILE_NOFOLLOW);
+ if (r < 0) {
+ /* On failure, clear cache entry, as we do not know how it fails. */
+ device_remove_cached_sysattr_value(device, sysattr);
+ return r;
+ }
+
+ /* Do not cache action string written into uevent file. */
+ if (streq(sysattr, "uevent"))
+ return 0;
+
+ r = device_cache_sysattr_value(device, sysattr, value);
+ if (r < 0)
+ log_device_debug_errno(device, r,
+ "sd-device: failed to cache attribute '%s' with '%s', ignoring: %m",
+ sysattr, value);
+ else
+ TAKE_PTR(value);
+
+ return 0;
+}
+
+_public_ int sd_device_set_sysattr_valuef(sd_device *device, const char *sysattr, const char *format, ...) {
+ _cleanup_free_ char *value = NULL;
+ va_list ap;
+ int r;
+
+ assert_return(device, -EINVAL);
+ assert_return(sysattr, -EINVAL);
+
+ if (!format) {
+ device_remove_cached_sysattr_value(device, sysattr);
+ return 0;
+ }
+
+ va_start(ap, format);
+ r = vasprintf(&value, format, ap);
+ va_end(ap);
+
+ if (r < 0)
+ return -ENOMEM;
+
+ return sd_device_set_sysattr_value(device, sysattr, value);
+}
+
+_public_ int sd_device_trigger(sd_device *device, sd_device_action_t action) {
+ const char *s;
+
+ assert_return(device, -EINVAL);
+
+ s = device_action_to_string(action);
+ if (!s)
+ return -EINVAL;
+
+ /* This uses the simple no-UUID interface of kernel < 4.13 */
+ return sd_device_set_sysattr_value(device, "uevent", s);
+}
+
+_public_ int sd_device_trigger_with_uuid(
+ sd_device *device,
+ sd_device_action_t action,
+ sd_id128_t *ret_uuid) {
+
+ const char *s, *j;
+ sd_id128_t u;
+ int r;
+
+ assert_return(device, -EINVAL);
+
+ /* If no one wants to know the UUID, use the simple interface from pre-4.13 times */
+ if (!ret_uuid)
+ return sd_device_trigger(device, action);
+
+ s = device_action_to_string(action);
+ if (!s)
+ return -EINVAL;
+
+ r = sd_id128_randomize(&u);
+ if (r < 0)
+ return r;
+
+ j = strjoina(s, " ", SD_ID128_TO_UUID_STRING(u));
+
+ r = sd_device_set_sysattr_value(device, "uevent", j);
+ if (r < 0)
+ return r;
+
+ *ret_uuid = u;
+ return 0;
+}
+
+_public_ int sd_device_open(sd_device *device, int flags) {
+ _cleanup_close_ int fd = -EBADF, fd2 = -EBADF;
+ const char *devname, *subsystem = NULL;
+ uint64_t q, diskseq = 0;
+ struct stat st;
+ dev_t devnum;
+ int r;
+
+ assert_return(device, -EINVAL);
+ assert_return(FLAGS_SET(flags, O_PATH) || !FLAGS_SET(flags, O_NOFOLLOW), -EINVAL);
+
+ r = sd_device_get_devname(device, &devname);
+ if (r == -ENOENT)
+ return -ENOEXEC;
+ if (r < 0)
+ return r;
+
+ r = sd_device_get_devnum(device, &devnum);
+ if (r == -ENOENT)
+ return -ENOEXEC;
+ if (r < 0)
+ return r;
+
+ r = sd_device_get_subsystem(device, &subsystem);
+ if (r < 0 && r != -ENOENT)
+ return r;
+
+ fd = open(devname, FLAGS_SET(flags, O_PATH) ? flags : O_CLOEXEC|O_NOFOLLOW|O_PATH);
+ if (fd < 0)
+ return -errno;
+
+ if (fstat(fd, &st) < 0)
+ return -errno;
+
+ if (st.st_rdev != devnum)
+ return -ENXIO;
+
+ if (streq_ptr(subsystem, "block") ? !S_ISBLK(st.st_mode) : !S_ISCHR(st.st_mode))
+ return -ENXIO;
+
+ /* If flags has O_PATH, then we cannot check diskseq. Let's return earlier. */
+ if (FLAGS_SET(flags, O_PATH))
+ return TAKE_FD(fd);
+
+ /* If the device is not initialized, then we cannot determine if we should check diskseq through
+ * ID_IGNORE_DISKSEQ property. Let's skip to check diskseq in that case. */
+ r = sd_device_get_is_initialized(device);
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ r = device_get_property_bool(device, "ID_IGNORE_DISKSEQ");
+ if (r < 0 && r != -ENOENT)
+ return r;
+ if (r <= 0) {
+ r = sd_device_get_diskseq(device, &diskseq);
+ if (r < 0 && r != -ENOENT)
+ return r;
+ }
+ }
+
+ fd2 = fd_reopen(fd, flags);
+ if (fd2 < 0)
+ return fd2;
+
+ if (diskseq == 0)
+ return TAKE_FD(fd2);
+
+ r = fd_get_diskseq(fd2, &q);
+ if (r < 0)
+ return r;
+
+ if (q != diskseq)
+ return -ENXIO;
+
+ return TAKE_FD(fd2);
+}
+
+int device_opendir(sd_device *device, const char *subdir, DIR **ret) {
+ _cleanup_closedir_ DIR *d = NULL;
+ _cleanup_free_ char *path = NULL;
+ const char *syspath;
+ int r;
+
+ assert(device);
+ assert(ret);
+
+ r = sd_device_get_syspath(device, &syspath);
+ if (r < 0)
+ return r;
+
+ if (subdir) {
+ if (!path_is_safe(subdir))
+ return -EINVAL;
+
+ path = path_join(syspath, subdir);
+ if (!path)
+ return -ENOMEM;
+ }
+
+ d = opendir(path ?: syspath);
+ if (!d)
+ return -errno;
+
+ *ret = TAKE_PTR(d);
+ return 0;
+}
diff --git a/src/libsystemd/sd-device/test-device-util.c b/src/libsystemd/sd-device/test-device-util.c
new file mode 100644
index 0000000..bc8ab66
--- /dev/null
+++ b/src/libsystemd/sd-device/test-device-util.c
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "device-util.h"
+#include "tests.h"
+
+TEST(log_device_full) {
+ int r;
+
+ for (int level = LOG_ERR; level <= LOG_DEBUG; level++) {
+ log_device_full(NULL, level, "test level=%d: %m", level);
+
+ r = log_device_full_errno(NULL, level, EUCLEAN, "test level=%d errno=EUCLEAN: %m", level);
+ assert_se(r == -EUCLEAN);
+
+ r = log_device_full_errno(NULL, level, 0, "test level=%d errno=0: %m", level);
+ assert_se(r == 0);
+
+ r = log_device_full_errno(NULL, level, SYNTHETIC_ERRNO(ENODATA), "test level=%d errno=S(ENODATA): %m", level);
+ assert_se(r == -ENODATA);
+ }
+}
+
+DEFINE_TEST_MAIN(LOG_INFO);
diff --git a/src/libsystemd/sd-device/test-sd-device-monitor.c b/src/libsystemd/sd-device/test-sd-device-monitor.c
new file mode 100644
index 0000000..e124e00
--- /dev/null
+++ b/src/libsystemd/sd-device/test-sd-device-monitor.c
@@ -0,0 +1,344 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <stdbool.h>
+#include <unistd.h>
+
+#include "sd-device.h"
+#include "sd-event.h"
+
+#include "device-monitor-private.h"
+#include "device-private.h"
+#include "device-util.h"
+#include "macro.h"
+#include "path-util.h"
+#include "stat-util.h"
+#include "string-util.h"
+#include "tests.h"
+#include "virt.h"
+
+static int monitor_handler(sd_device_monitor *m, sd_device *d, void *userdata) {
+ const char *s, *syspath = userdata;
+
+ assert_se(sd_device_get_syspath(d, &s) >= 0);
+ assert_se(streq(s, syspath));
+
+ return sd_event_exit(sd_device_monitor_get_event(m), 100);
+}
+
+static void test_receive_device_fail(void) {
+ _cleanup_(sd_device_monitor_unrefp) sd_device_monitor *monitor_server = NULL, *monitor_client = NULL;
+ _cleanup_(sd_device_unrefp) sd_device *loopback = NULL;
+ const char *syspath;
+
+ log_info("/* %s */", __func__);
+
+ /* Try to send device with invalid action and without seqnum. */
+ assert_se(sd_device_new_from_syspath(&loopback, "/sys/class/net/lo") >= 0);
+ assert_se(device_add_property(loopback, "ACTION", "hoge") >= 0);
+
+ assert_se(sd_device_get_syspath(loopback, &syspath) >= 0);
+
+ assert_se(device_monitor_new_full(&monitor_server, MONITOR_GROUP_NONE, -1) >= 0);
+ assert_se(sd_device_monitor_set_description(monitor_server, "sender") >= 0);
+ assert_se(sd_device_monitor_start(monitor_server, NULL, NULL) >= 0);
+
+ assert_se(device_monitor_new_full(&monitor_client, MONITOR_GROUP_NONE, -1) >= 0);
+ assert_se(sd_device_monitor_set_description(monitor_client, "receiver") >= 0);
+ assert_se(device_monitor_allow_unicast_sender(monitor_client, monitor_server) >= 0);
+ assert_se(sd_device_monitor_start(monitor_client, monitor_handler, (void *) syspath) >= 0);
+
+ assert_se(device_monitor_send_device(monitor_server, monitor_client, loopback) >= 0);
+ assert_se(sd_event_run(sd_device_monitor_get_event(monitor_client), 0) >= 0);
+}
+
+static void test_send_receive_one(sd_device *device, bool subsystem_filter, bool tag_filter, bool use_bpf) {
+ _cleanup_(sd_device_monitor_unrefp) sd_device_monitor *monitor_server = NULL, *monitor_client = NULL;
+ const char *syspath, *subsystem, *devtype = NULL;
+
+ log_device_info(device, "/* %s(subsystem_filter=%s, tag_filter=%s, use_bpf=%s) */", __func__,
+ true_false(subsystem_filter), true_false(tag_filter), true_false(use_bpf));
+
+ assert_se(sd_device_get_syspath(device, &syspath) >= 0);
+
+ assert_se(device_monitor_new_full(&monitor_server, MONITOR_GROUP_NONE, -1) >= 0);
+ assert_se(sd_device_monitor_set_description(monitor_server, "sender") >= 0);
+ assert_se(sd_device_monitor_start(monitor_server, NULL, NULL) >= 0);
+
+ assert_se(device_monitor_new_full(&monitor_client, MONITOR_GROUP_NONE, -1) >= 0);
+ assert_se(sd_device_monitor_set_description(monitor_client, "receiver") >= 0);
+ assert_se(device_monitor_allow_unicast_sender(monitor_client, monitor_server) >= 0);
+ assert_se(sd_device_monitor_start(monitor_client, monitor_handler, (void *) syspath) >= 0);
+
+ if (subsystem_filter) {
+ assert_se(sd_device_get_subsystem(device, &subsystem) >= 0);
+ (void) sd_device_get_devtype(device, &devtype);
+ assert_se(sd_device_monitor_filter_add_match_subsystem_devtype(monitor_client, subsystem, devtype) >= 0);
+ }
+
+ if (tag_filter)
+ FOREACH_DEVICE_TAG(device, tag)
+ assert_se(sd_device_monitor_filter_add_match_tag(monitor_client, tag) >= 0);
+
+ if ((subsystem_filter || tag_filter) && use_bpf)
+ assert_se(sd_device_monitor_filter_update(monitor_client) >= 0);
+
+ assert_se(device_monitor_send_device(monitor_server, monitor_client, device) >= 0);
+ assert_se(sd_event_loop(sd_device_monitor_get_event(monitor_client)) == 100);
+}
+
+static void test_subsystem_filter(sd_device *device) {
+ _cleanup_(sd_device_monitor_unrefp) sd_device_monitor *monitor_server = NULL, *monitor_client = NULL;
+ _cleanup_(sd_device_enumerator_unrefp) sd_device_enumerator *e = NULL;
+ const char *syspath, *subsystem;
+
+ log_device_info(device, "/* %s */", __func__);
+
+ assert_se(sd_device_get_syspath(device, &syspath) >= 0);
+ assert_se(sd_device_get_subsystem(device, &subsystem) >= 0);
+
+ assert_se(device_monitor_new_full(&monitor_server, MONITOR_GROUP_NONE, -1) >= 0);
+ assert_se(sd_device_monitor_set_description(monitor_server, "sender") >= 0);
+ assert_se(sd_device_monitor_start(monitor_server, NULL, NULL) >= 0);
+
+ assert_se(device_monitor_new_full(&monitor_client, MONITOR_GROUP_NONE, -1) >= 0);
+ assert_se(sd_device_monitor_set_description(monitor_client, "receiver") >= 0);
+ assert_se(device_monitor_allow_unicast_sender(monitor_client, monitor_server) >= 0);
+ assert_se(sd_device_monitor_filter_add_match_subsystem_devtype(monitor_client, subsystem, NULL) >= 0);
+ assert_se(sd_device_monitor_start(monitor_client, monitor_handler, (void *) syspath) >= 0);
+
+ assert_se(sd_device_enumerator_new(&e) >= 0);
+ assert_se(sd_device_enumerator_add_match_subsystem(e, subsystem, false) >= 0);
+ FOREACH_DEVICE(e, d) {
+ const char *p, *s;
+
+ assert_se(sd_device_get_syspath(d, &p) >= 0);
+ assert_se(sd_device_get_subsystem(d, &s) >= 0);
+
+ assert_se(device_add_property(d, "ACTION", "add") >= 0);
+ assert_se(device_add_property(d, "SEQNUM", "10") >= 0);
+
+ log_device_debug(d, "Sending device subsystem:%s syspath:%s", s, p);
+ assert_se(device_monitor_send_device(monitor_server, monitor_client, d) >= 0);
+ }
+
+ log_device_info(device, "Sending device subsystem:%s syspath:%s", subsystem, syspath);
+ assert_se(device_monitor_send_device(monitor_server, monitor_client, device) >= 0);
+ assert_se(sd_event_loop(sd_device_monitor_get_event(monitor_client)) == 100);
+}
+
+static void test_tag_filter(sd_device *device) {
+ _cleanup_(sd_device_monitor_unrefp) sd_device_monitor *monitor_server = NULL, *monitor_client = NULL;
+ _cleanup_(sd_device_enumerator_unrefp) sd_device_enumerator *e = NULL;
+ const char *syspath;
+
+ log_device_info(device, "/* %s */", __func__);
+
+ assert_se(sd_device_get_syspath(device, &syspath) >= 0);
+
+ assert_se(device_monitor_new_full(&monitor_server, MONITOR_GROUP_NONE, -1) >= 0);
+ assert_se(sd_device_monitor_set_description(monitor_server, "sender") >= 0);
+ assert_se(sd_device_monitor_start(monitor_server, NULL, NULL) >= 0);
+
+ assert_se(device_monitor_new_full(&monitor_client, MONITOR_GROUP_NONE, -1) >= 0);
+ assert_se(sd_device_monitor_set_description(monitor_client, "receiver") >= 0);
+ assert_se(device_monitor_allow_unicast_sender(monitor_client, monitor_server) >= 0);
+ assert_se(sd_device_monitor_filter_add_match_tag(monitor_client, "TEST_SD_DEVICE_MONITOR") >= 0);
+ assert_se(sd_device_monitor_start(monitor_client, monitor_handler, (void *) syspath) >= 0);
+
+ assert_se(sd_device_enumerator_new(&e) >= 0);
+ FOREACH_DEVICE(e, d) {
+ const char *p;
+
+ assert_se(sd_device_get_syspath(d, &p) >= 0);
+
+ assert_se(device_add_property(d, "ACTION", "add") >= 0);
+ assert_se(device_add_property(d, "SEQNUM", "10") >= 0);
+
+ log_device_debug(d, "Sending device syspath:%s", p);
+ assert_se(device_monitor_send_device(monitor_server, monitor_client, d) >= 0);
+ }
+
+ log_device_info(device, "Sending device syspath:%s", syspath);
+ assert_se(device_monitor_send_device(monitor_server, monitor_client, device) >= 0);
+ assert_se(sd_event_loop(sd_device_monitor_get_event(monitor_client)) == 100);
+
+}
+
+static void test_sysattr_filter(sd_device *device, const char *sysattr) {
+ _cleanup_(sd_device_monitor_unrefp) sd_device_monitor *monitor_server = NULL, *monitor_client = NULL;
+ _cleanup_(sd_device_enumerator_unrefp) sd_device_enumerator *e = NULL;
+ const char *syspath, *sysattr_value;
+
+ log_device_info(device, "/* %s(%s) */", __func__, sysattr);
+
+ assert_se(sd_device_get_syspath(device, &syspath) >= 0);
+ assert_se(sd_device_get_sysattr_value(device, sysattr, &sysattr_value) >= 0);
+
+ assert_se(device_monitor_new_full(&monitor_server, MONITOR_GROUP_NONE, -1) >= 0);
+ assert_se(sd_device_monitor_set_description(monitor_server, "sender") >= 0);
+ assert_se(sd_device_monitor_start(monitor_server, NULL, NULL) >= 0);
+
+ assert_se(device_monitor_new_full(&monitor_client, MONITOR_GROUP_NONE, -1) >= 0);
+ assert_se(sd_device_monitor_set_description(monitor_client, "receiver") >= 0);
+ assert_se(device_monitor_allow_unicast_sender(monitor_client, monitor_server) >= 0);
+ assert_se(sd_device_monitor_filter_add_match_sysattr(monitor_client, sysattr, sysattr_value, true) >= 0);
+ assert_se(sd_device_monitor_start(monitor_client, monitor_handler, (void *) syspath) >= 0);
+
+ assert_se(sd_device_enumerator_new(&e) >= 0);
+ assert_se(sd_device_enumerator_add_match_sysattr(e, sysattr, sysattr_value, false) >= 0);
+ FOREACH_DEVICE(e, d) {
+ const char *p;
+
+ assert_se(sd_device_get_syspath(d, &p) >= 0);
+
+ assert_se(device_add_property(d, "ACTION", "add") >= 0);
+ assert_se(device_add_property(d, "SEQNUM", "10") >= 0);
+
+ log_device_debug(d, "Sending device syspath:%s", p);
+ assert_se(device_monitor_send_device(monitor_server, monitor_client, d) >= 0);
+
+ /* The sysattr filter is not implemented in BPF yet. So, sending multiple devices may fills up
+ * buffer and device_monitor_send_device() may return EAGAIN. Let's send one device here,
+ * which should be filtered out by the receiver. */
+ break;
+ }
+
+ log_device_info(device, "Sending device syspath:%s", syspath);
+ assert_se(device_monitor_send_device(monitor_server, monitor_client, device) >= 0);
+ assert_se(sd_event_loop(sd_device_monitor_get_event(monitor_client)) == 100);
+
+}
+
+static void test_parent_filter(sd_device *device) {
+ _cleanup_(sd_device_monitor_unrefp) sd_device_monitor *monitor_server = NULL, *monitor_client = NULL;
+ _cleanup_(sd_device_enumerator_unrefp) sd_device_enumerator *e = NULL;
+ const char *syspath, *parent_syspath;
+ sd_device *parent;
+ int r;
+
+ log_device_info(device, "/* %s */", __func__);
+
+ assert_se(sd_device_get_syspath(device, &syspath) >= 0);
+ r = sd_device_get_parent(device, &parent);
+ if (r < 0)
+ return (void) log_device_info(device, "Device does not have parent, skipping.");
+ assert_se(sd_device_get_syspath(parent, &parent_syspath) >= 0);
+
+ assert_se(device_monitor_new_full(&monitor_server, MONITOR_GROUP_NONE, -1) >= 0);
+ assert_se(sd_device_monitor_set_description(monitor_server, "sender") >= 0);
+ assert_se(sd_device_monitor_start(monitor_server, NULL, NULL) >= 0);
+
+ assert_se(device_monitor_new_full(&monitor_client, MONITOR_GROUP_NONE, -1) >= 0);
+ assert_se(sd_device_monitor_set_description(monitor_client, "receiver") >= 0);
+ assert_se(device_monitor_allow_unicast_sender(monitor_client, monitor_server) >= 0);
+ assert_se(sd_device_monitor_filter_add_match_parent(monitor_client, parent, true) >= 0);
+ assert_se(sd_device_monitor_start(monitor_client, monitor_handler, (void *) syspath) >= 0);
+
+ assert_se(sd_device_enumerator_new(&e) >= 0);
+ FOREACH_DEVICE(e, d) {
+ const char *p;
+
+ assert_se(sd_device_get_syspath(d, &p) >= 0);
+ if (path_startswith(p, parent_syspath))
+ continue;
+
+ assert_se(device_add_property(d, "ACTION", "add") >= 0);
+ assert_se(device_add_property(d, "SEQNUM", "10") >= 0);
+
+ log_device_debug(d, "Sending device syspath:%s", p);
+ assert_se(device_monitor_send_device(monitor_server, monitor_client, d) >= 0);
+
+ /* The parent filter is not implemented in BPF yet. So, sending multiple devices may fills up
+ * buffer and device_monitor_send_device() may return EAGAIN. Let's send one device here,
+ * which should be filtered out by the receiver. */
+ break;
+ }
+
+ log_device_info(device, "Sending device syspath:%s", syspath);
+ assert_se(device_monitor_send_device(monitor_server, monitor_client, device) >= 0);
+ assert_se(sd_event_loop(sd_device_monitor_get_event(monitor_client)) == 100);
+
+}
+
+static void test_sd_device_monitor_filter_remove(sd_device *device) {
+ _cleanup_(sd_device_monitor_unrefp) sd_device_monitor *monitor_server = NULL, *monitor_client = NULL;
+ const char *syspath;
+
+ log_device_info(device, "/* %s */", __func__);
+
+ assert_se(sd_device_get_syspath(device, &syspath) >= 0);
+
+ assert_se(device_monitor_new_full(&monitor_server, MONITOR_GROUP_NONE, -1) >= 0);
+ assert_se(sd_device_monitor_set_description(monitor_server, "sender") >= 0);
+ assert_se(sd_device_monitor_start(monitor_server, NULL, NULL) >= 0);
+
+ assert_se(device_monitor_new_full(&monitor_client, MONITOR_GROUP_NONE, -1) >= 0);
+ assert_se(sd_device_monitor_set_description(monitor_client, "receiver") >= 0);
+ assert_se(device_monitor_allow_unicast_sender(monitor_client, monitor_server) >= 0);
+ assert_se(sd_device_monitor_start(monitor_client, monitor_handler, (void *) syspath) >= 0);
+
+ assert_se(sd_device_monitor_filter_add_match_subsystem_devtype(monitor_client, "hoge", NULL) >= 0);
+ assert_se(sd_device_monitor_filter_update(monitor_client) >= 0);
+
+ assert_se(device_monitor_send_device(monitor_server, monitor_client, device) >= 0);
+ assert_se(sd_event_run(sd_device_monitor_get_event(monitor_client), 0) >= 0);
+
+ assert_se(sd_device_monitor_filter_remove(monitor_client) >= 0);
+
+ assert_se(device_monitor_send_device(monitor_server, monitor_client, device) >= 0);
+ assert_se(sd_event_loop(sd_device_monitor_get_event(monitor_client)) == 100);
+}
+
+int main(int argc, char *argv[]) {
+ _cleanup_(sd_device_unrefp) sd_device *loopback = NULL, *sda = NULL;
+ int r;
+
+ test_setup_logging(LOG_INFO);
+
+ if (getuid() != 0)
+ return log_tests_skipped("not root");
+
+ if (path_is_read_only_fs("/sys") > 0)
+ return log_tests_skipped("Running in container");
+
+ test_receive_device_fail();
+
+ assert_se(sd_device_new_from_syspath(&loopback, "/sys/class/net/lo") >= 0);
+ assert_se(device_add_property(loopback, "ACTION", "add") >= 0);
+ assert_se(device_add_property(loopback, "SEQNUM", "10") >= 0);
+ assert_se(device_add_tag(loopback, "TEST_SD_DEVICE_MONITOR", true) >= 0);
+
+ test_send_receive_one(loopback, false, false, false);
+ test_send_receive_one(loopback, true, false, false);
+ test_send_receive_one(loopback, false, true, false);
+ test_send_receive_one(loopback, true, true, false);
+ test_send_receive_one(loopback, true, false, true);
+ test_send_receive_one(loopback, false, true, true);
+ test_send_receive_one(loopback, true, true, true);
+
+ test_subsystem_filter(loopback);
+ test_tag_filter(loopback);
+ test_sysattr_filter(loopback, "ifindex");
+ test_sd_device_monitor_filter_remove(loopback);
+
+ r = sd_device_new_from_subsystem_sysname(&sda, "block", "sda");
+ if (r < 0) {
+ log_info_errno(r, "Failed to create sd_device for sda, skipping remaining tests: %m");
+ return 0;
+ }
+
+ assert_se(device_add_property(sda, "ACTION", "change") >= 0);
+ assert_se(device_add_property(sda, "SEQNUM", "11") >= 0);
+
+ test_send_receive_one(sda, false, false, false);
+ test_send_receive_one(sda, true, false, false);
+ test_send_receive_one(sda, false, true, false);
+ test_send_receive_one(sda, true, true, false);
+ test_send_receive_one(sda, true, false, true);
+ test_send_receive_one(sda, false, true, true);
+ test_send_receive_one(sda, true, true, true);
+
+ test_parent_filter(sda);
+
+ return 0;
+}
diff --git a/src/libsystemd/sd-device/test-sd-device-thread.c b/src/libsystemd/sd-device/test-sd-device-thread.c
new file mode 100644
index 0000000..c99d179
--- /dev/null
+++ b/src/libsystemd/sd-device/test-sd-device-thread.c
@@ -0,0 +1,51 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "sd-device.h"
+
+#include "device-util.h"
+
+#define handle_error_errno(error, msg) \
+ ({ \
+ errno = abs(error); \
+ perror(msg); \
+ EXIT_FAILURE; \
+ })
+
+static void* thread(void *p) {
+ sd_device **d = p;
+
+ *d = sd_device_unref(*d);
+
+ return NULL;
+}
+
+int main(int argc, char *argv[]) {
+ sd_device *loopback;
+ pthread_t t;
+ int r;
+
+ r = sd_device_new_from_syspath(&loopback, "/sys/class/net/lo");
+ if (r < 0)
+ return handle_error_errno(r, "Failed to create loopback device object");
+
+ FOREACH_DEVICE_PROPERTY(loopback, key, value)
+ printf("%s=%s\n", key, value);
+
+ r = pthread_create(&t, NULL, thread, &loopback);
+ if (r != 0)
+ return handle_error_errno(r, "Failed to create thread");
+
+ r = pthread_join(t, NULL);
+ if (r != 0)
+ return handle_error_errno(r, "Failed to wait thread finished");
+
+ if (loopback)
+ return handle_error_errno(r, "loopback device is not unref()ed");
+
+ return 0;
+}
diff --git a/src/libsystemd/sd-device/test-sd-device.c b/src/libsystemd/sd-device/test-sd-device.c
new file mode 100644
index 0000000..bce99b5
--- /dev/null
+++ b/src/libsystemd/sd-device/test-sd-device.c
@@ -0,0 +1,678 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <ctype.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+#include "device-enumerator-private.h"
+#include "device-internal.h"
+#include "device-private.h"
+#include "device-util.h"
+#include "errno-util.h"
+#include "fd-util.h"
+#include "hashmap.h"
+#include "nulstr-util.h"
+#include "path-util.h"
+#include "rm-rf.h"
+#include "stat-util.h"
+#include "string-util.h"
+#include "tests.h"
+#include "time-util.h"
+#include "tmpfile-util.h"
+#include "udev-util.h"
+
+static void test_sd_device_one(sd_device *d) {
+ _cleanup_(sd_device_unrefp) sd_device *dev = NULL;
+ const char *syspath, *sysname, *subsystem = NULL, *devname, *val;
+ bool is_block = false;
+ dev_t devnum;
+ usec_t usec;
+ int ifindex, r;
+
+ assert_se(sd_device_get_syspath(d, &syspath) >= 0);
+ assert_se(path_startswith(syspath, "/sys"));
+ assert_se(sd_device_get_sysname(d, &sysname) >= 0);
+
+ log_info("%s(%s)", __func__, syspath);
+
+ assert_se(sd_device_new_from_syspath(&dev, syspath) >= 0);
+ assert_se(sd_device_get_syspath(dev, &val) >= 0);
+ assert_se(streq(syspath, val));
+ dev = sd_device_unref(dev);
+
+ assert_se(sd_device_new_from_path(&dev, syspath) >= 0);
+ assert_se(sd_device_get_syspath(dev, &val) >= 0);
+ assert_se(streq(syspath, val));
+ dev = sd_device_unref(dev);
+
+ r = sd_device_get_ifindex(d, &ifindex);
+ if (r >= 0) {
+ assert_se(ifindex > 0);
+
+ r = sd_device_new_from_ifindex(&dev, ifindex);
+ if (r == -ENODEV)
+ log_device_warning_errno(d, r,
+ "Failed to create sd-device object from ifindex %i. "
+ "Maybe running on a non-host network namespace.", ifindex);
+ else {
+ assert_se(r >= 0);
+ assert_se(sd_device_get_syspath(dev, &val) >= 0);
+ assert_se(streq(syspath, val));
+ dev = sd_device_unref(dev);
+ }
+
+ /* This does not require the interface really exists on the network namespace.
+ * Hence, this should always succeed. */
+ assert_se(sd_device_new_from_ifname(&dev, sysname) >= 0);
+ assert_se(sd_device_get_syspath(dev, &val) >= 0);
+ assert_se(streq(syspath, val));
+ dev = sd_device_unref(dev);
+ } else
+ assert_se(r == -ENOENT);
+
+ r = sd_device_get_subsystem(d, &subsystem);
+ if (r < 0)
+ assert_se(r == -ENOENT);
+ else if (!streq(subsystem, "gpio")) { /* Unfortunately, there exist /sys/class/gpio and /sys/bus/gpio.
+ * Hence, sd_device_new_from_subsystem_sysname() and
+ * sd_device_new_from_device_id() may not work as expected. */
+ const char *name, *id;
+
+ if (streq(subsystem, "drivers"))
+ name = strjoina(d->driver_subsystem, ":", sysname);
+ else
+ name = sysname;
+ assert_se(sd_device_new_from_subsystem_sysname(&dev, subsystem, name) >= 0);
+ assert_se(sd_device_get_syspath(dev, &val) >= 0);
+ assert_se(streq(syspath, val));
+ dev = sd_device_unref(dev);
+
+ /* The device ID depends on subsystem. */
+ assert_se(device_get_device_id(d, &id) >= 0);
+ r = sd_device_new_from_device_id(&dev, id);
+ if (r == -ENODEV && ifindex > 0)
+ log_device_warning_errno(d, r,
+ "Failed to create sd-device object from device ID \"%s\". "
+ "Maybe running on a non-host network namespace.", id);
+ else {
+ assert_se(r >= 0);
+ assert_se(sd_device_get_syspath(dev, &val) >= 0);
+ assert_se(streq(syspath, val));
+ dev = sd_device_unref(dev);
+ }
+
+ /* These require udev database, and reading database requires device ID. */
+ r = sd_device_get_is_initialized(d);
+ if (r > 0) {
+ r = sd_device_get_usec_since_initialized(d, &usec);
+ assert_se((r >= 0 && usec > 0) || r == -ENODATA);
+ } else
+ assert(r == 0);
+
+ r = sd_device_get_property_value(d, "ID_NET_DRIVER", &val);
+ assert_se(r >= 0 || r == -ENOENT);
+ }
+
+ is_block = streq_ptr(subsystem, "block");
+
+ r = sd_device_get_devname(d, &devname);
+ if (r >= 0) {
+ r = sd_device_new_from_devname(&dev, devname);
+ if (r >= 0) {
+ assert_se(sd_device_get_syspath(dev, &val) >= 0);
+ assert_se(streq(syspath, val));
+ dev = sd_device_unref(dev);
+ } else
+ assert_se(r == -ENODEV || ERRNO_IS_PRIVILEGE(r));
+
+ r = sd_device_new_from_path(&dev, devname);
+ if (r >= 0) {
+ assert_se(sd_device_get_syspath(dev, &val) >= 0);
+ assert_se(streq(syspath, val));
+ dev = sd_device_unref(dev);
+
+ _cleanup_close_ int fd = -EBADF;
+ fd = sd_device_open(d, O_CLOEXEC| O_NONBLOCK | (is_block ? O_RDONLY : O_NOCTTY | O_PATH));
+ assert_se(fd >= 0 || ERRNO_IS_PRIVILEGE(fd));
+ } else
+ assert_se(r == -ENODEV || ERRNO_IS_PRIVILEGE(r));
+ } else
+ assert_se(r == -ENOENT);
+
+ r = sd_device_get_devnum(d, &devnum);
+ if (r >= 0) {
+ _cleanup_free_ char *p = NULL;
+
+ assert_se(major(devnum) > 0);
+
+ assert_se(sd_device_new_from_devnum(&dev, is_block ? 'b' : 'c', devnum) >= 0);
+ assert_se(sd_device_get_syspath(dev, &val) >= 0);
+ assert_se(streq(syspath, val));
+ dev = sd_device_unref(dev);
+
+ assert_se(asprintf(&p, "/dev/%s/%u:%u", is_block ? "block" : "char", major(devnum), minor(devnum)) >= 0);
+ assert_se(sd_device_new_from_devname(&dev, p) >= 0);
+ assert_se(sd_device_get_syspath(dev, &val) >= 0);
+ assert_se(streq(syspath, val));
+ dev = sd_device_unref(dev);
+
+ assert_se(sd_device_new_from_path(&dev, p) >= 0);
+ assert_se(sd_device_get_syspath(dev, &val) >= 0);
+ assert_se(streq(syspath, val));
+ dev = sd_device_unref(dev);
+ } else
+ assert_se(r == -ENOENT);
+
+ assert_se(sd_device_get_devpath(d, &val) >= 0);
+
+ r = sd_device_get_devtype(d, &val);
+ assert_se(r >= 0 || r == -ENOENT);
+
+ r = sd_device_get_driver(d, &val);
+ assert_se(r >= 0 || r == -ENOENT);
+
+ r = sd_device_get_sysnum(d, &val);
+ if (r >= 0) {
+ assert_se(val > sysname);
+ assert_se(val < sysname + strlen(sysname));
+ assert_se(in_charset(val, DIGITS));
+ assert_se(!ascii_isdigit(val[-1]));
+ } else
+ assert_se(r == -ENOENT);
+
+ r = sd_device_get_sysattr_value(d, "nsid", NULL);
+ if (r >= 0) {
+ unsigned x;
+
+ assert_se(device_get_sysattr_unsigned(d, "nsid", NULL) >= 0);
+ r = device_get_sysattr_unsigned(d, "nsid", &x);
+ assert_se(r >= 0);
+ assert_se((x > 0) == (r > 0));
+ } else
+ assert_se(ERRNO_IS_PRIVILEGE(r) || IN_SET(r, -ENOENT, -EINVAL));
+}
+
+TEST(sd_device_enumerator_devices) {
+ _cleanup_(sd_device_enumerator_unrefp) sd_device_enumerator *e = NULL;
+
+ assert_se(sd_device_enumerator_new(&e) >= 0);
+ assert_se(sd_device_enumerator_allow_uninitialized(e) >= 0);
+ /* On some CI environments, it seems some loop block devices and corresponding bdi devices sometimes
+ * disappear during running this test. Let's exclude them here for stability. */
+ assert_se(sd_device_enumerator_add_match_subsystem(e, "bdi", false) >= 0);
+ assert_se(sd_device_enumerator_add_nomatch_sysname(e, "loop*") >= 0);
+ /* On CentOS CI, systemd-networkd-tests.py may be running when this test is invoked. The networkd
+ * test creates and removes many network interfaces, and may interfere with this test. */
+ assert_se(sd_device_enumerator_add_match_subsystem(e, "net", false) >= 0);
+ FOREACH_DEVICE(e, d)
+ test_sd_device_one(d);
+}
+
+TEST(sd_device_enumerator_subsystems) {
+ _cleanup_(sd_device_enumerator_unrefp) sd_device_enumerator *e = NULL;
+
+ assert_se(sd_device_enumerator_new(&e) >= 0);
+ assert_se(sd_device_enumerator_allow_uninitialized(e) >= 0);
+ FOREACH_SUBSYSTEM(e, d)
+ test_sd_device_one(d);
+}
+
+static void test_sd_device_enumerator_filter_subsystem_one(
+ const char *subsystem,
+ Hashmap *h,
+ unsigned *ret_n_new_dev,
+ unsigned *ret_n_removed_dev) {
+
+ _cleanup_(sd_device_enumerator_unrefp) sd_device_enumerator *e = NULL;
+ unsigned n_new_dev = 0, n_removed_dev = 0;
+ sd_device *dev;
+
+ assert_se(sd_device_enumerator_new(&e) >= 0);
+ assert_se(sd_device_enumerator_add_match_subsystem(e, subsystem, true) >= 0);
+ assert_se(sd_device_enumerator_add_nomatch_sysname(e, "loop*") >= 0);
+
+ FOREACH_DEVICE(e, d) {
+ const char *syspath;
+ sd_device *t;
+
+ assert_se(sd_device_get_syspath(d, &syspath) >= 0);
+ t = hashmap_remove(h, syspath);
+
+ if (!t) {
+ log_warning("New device found: subsystem:%s syspath:%s", subsystem, syspath);
+ n_new_dev++;
+ }
+
+ assert_se(!sd_device_unref(t));
+ }
+
+ HASHMAP_FOREACH(dev, h) {
+ const char *syspath;
+
+ assert_se(sd_device_get_syspath(dev, &syspath) >= 0);
+ log_warning("Device removed: subsystem:%s syspath:%s", subsystem, syspath);
+ n_removed_dev++;
+
+ assert_se(!sd_device_unref(dev));
+ }
+
+ hashmap_free(h);
+
+ *ret_n_new_dev = n_new_dev;
+ *ret_n_removed_dev = n_removed_dev;
+}
+
+static bool test_sd_device_enumerator_filter_subsystem_trial(void) {
+ _cleanup_(sd_device_enumerator_unrefp) sd_device_enumerator *e = NULL;
+ _cleanup_hashmap_free_ Hashmap *subsystems = NULL;
+ unsigned n_new_dev = 0, n_removed_dev = 0;
+ Hashmap *h;
+ char *s;
+
+ assert_se(subsystems = hashmap_new(&string_hash_ops));
+ assert_se(sd_device_enumerator_new(&e) >= 0);
+ /* See comments in TEST(sd_device_enumerator_devices). */
+ assert_se(sd_device_enumerator_add_match_subsystem(e, "bdi", false) >= 0);
+ assert_se(sd_device_enumerator_add_nomatch_sysname(e, "loop*") >= 0);
+ assert_se(sd_device_enumerator_add_match_subsystem(e, "net", false) >= 0);
+
+ FOREACH_DEVICE(e, d) {
+ const char *syspath, *subsystem;
+ int r;
+
+ assert_se(sd_device_get_syspath(d, &syspath) >= 0);
+
+ r = sd_device_get_subsystem(d, &subsystem);
+ assert_se(r >= 0 || r == -ENOENT);
+ if (r < 0)
+ continue;
+
+ h = hashmap_get(subsystems, subsystem);
+ if (!h) {
+ char *str;
+ assert_se(str = strdup(subsystem));
+ assert_se(h = hashmap_new(&string_hash_ops));
+ assert_se(hashmap_put(subsystems, str, h) >= 0);
+ }
+
+ assert_se(hashmap_put(h, syspath, d) >= 0);
+ assert_se(sd_device_ref(d));
+
+ log_debug("Added subsystem:%s syspath:%s", subsystem, syspath);
+ }
+
+ while ((h = hashmap_steal_first_key_and_value(subsystems, (void**) &s))) {
+ unsigned n, m;
+
+ test_sd_device_enumerator_filter_subsystem_one(s, TAKE_PTR(h), &n, &m);
+ free(s);
+
+ n_new_dev += n;
+ n_removed_dev += m;
+ }
+
+ if (n_new_dev > 0)
+ log_warning("%u new devices are found in re-scan", n_new_dev);
+ if (n_removed_dev > 0)
+ log_warning("%u devices removed in re-scan", n_removed_dev);
+
+ return n_new_dev + n_removed_dev == 0;
+}
+
+static bool test_sd_device_enumerator_filter_subsystem_trial_many(void) {
+ for (unsigned i = 0; i < 20; i++) {
+ log_debug("%s(): trial %u", __func__, i);
+ if (test_sd_device_enumerator_filter_subsystem_trial())
+ return true;
+ }
+
+ return false;
+}
+
+static int on_inotify(sd_event_source *s, const struct inotify_event *event, void *userdata) {
+ if (test_sd_device_enumerator_filter_subsystem_trial_many())
+ return sd_event_exit(sd_event_source_get_event(s), 0);
+
+ return sd_event_exit(sd_event_source_get_event(s), -EBUSY);
+}
+
+TEST(sd_device_enumerator_filter_subsystem) {
+ /* The test test_sd_device_enumerator_filter_subsystem_trial() is quite racy. Let's run the function
+ * several times after the udev queue becomes empty. */
+
+ if (!udev_available() || (access("/run/udev", F_OK) < 0 && errno == ENOENT)) {
+ assert_se(test_sd_device_enumerator_filter_subsystem_trial_many());
+ return;
+ }
+
+ _cleanup_(sd_event_unrefp) sd_event *event = NULL;
+ assert_se(sd_event_default(&event) >= 0);
+ assert_se(sd_event_add_inotify(event, NULL, "/run/udev" , IN_DELETE, on_inotify, NULL) >= 0);
+
+ if (udev_queue_is_empty() == 0) {
+ log_debug("udev queue is not empty, waiting for all queued events to be processed.");
+ assert_se(sd_event_loop(event) >= 0);
+ } else
+ assert_se(test_sd_device_enumerator_filter_subsystem_trial_many());
+}
+
+TEST(sd_device_enumerator_add_match_sysattr) {
+ _cleanup_(sd_device_enumerator_unrefp) sd_device_enumerator *e = NULL;
+ sd_device *dev;
+ int ifindex;
+
+ assert_se(sd_device_enumerator_new(&e) >= 0);
+ assert_se(sd_device_enumerator_allow_uninitialized(e) >= 0);
+ assert_se(sd_device_enumerator_add_match_subsystem(e, "net", true) >= 0);
+ assert_se(sd_device_enumerator_add_match_sysattr(e, "ifindex", "1", true) >= 0);
+ assert_se(sd_device_enumerator_add_match_sysattr(e, "ifindex", "hoge", true) >= 0);
+ assert_se(sd_device_enumerator_add_match_sysattr(e, "ifindex", "foo", true) >= 0);
+ assert_se(sd_device_enumerator_add_match_sysattr(e, "ifindex", "bar", false) >= 0);
+ assert_se(sd_device_enumerator_add_match_sysattr(e, "ifindex", "baz", false) >= 0);
+
+ dev = sd_device_enumerator_get_device_first(e);
+ assert_se(dev);
+ assert_se(sd_device_get_ifindex(dev, &ifindex) >= 0);
+ assert_se(ifindex == 1);
+
+ assert_se(!sd_device_enumerator_get_device_next(e));
+}
+
+TEST(sd_device_enumerator_add_match_property) {
+ _cleanup_(sd_device_enumerator_unrefp) sd_device_enumerator *e = NULL;
+ sd_device *dev;
+ int ifindex;
+
+ assert_se(sd_device_enumerator_new(&e) >= 0);
+ assert_se(sd_device_enumerator_allow_uninitialized(e) >= 0);
+ assert_se(sd_device_enumerator_add_match_subsystem(e, "net", true) >= 0);
+ assert_se(sd_device_enumerator_add_match_sysattr(e, "ifindex", "1", true) >= 0);
+ assert_se(sd_device_enumerator_add_match_property(e, "IFINDE*", "1*") >= 0);
+ assert_se(sd_device_enumerator_add_match_property(e, "IFINDE*", "hoge") >= 0);
+ assert_se(sd_device_enumerator_add_match_property(e, "IFINDE*", NULL) >= 0);
+ assert_se(sd_device_enumerator_add_match_property(e, "AAAAA", "BBBB") >= 0);
+ assert_se(sd_device_enumerator_add_match_property(e, "FOOOO", NULL) >= 0);
+
+ dev = sd_device_enumerator_get_device_first(e);
+ assert_se(dev);
+ assert_se(sd_device_get_ifindex(dev, &ifindex) >= 0);
+ assert_se(ifindex == 1);
+}
+
+TEST(sd_device_enumerator_add_match_property_required) {
+ _cleanup_(sd_device_enumerator_unrefp) sd_device_enumerator *e = NULL;
+ sd_device *dev;
+ int ifindex;
+
+ assert_se(sd_device_enumerator_new(&e) >= 0);
+ assert_se(sd_device_enumerator_allow_uninitialized(e) >= 0);
+ assert_se(sd_device_enumerator_add_match_subsystem(e, "net", true) >= 0);
+ assert_se(sd_device_enumerator_add_match_sysattr(e, "ifindex", "1", true) >= 0);
+ assert_se(sd_device_enumerator_add_match_property_required(e, "IFINDE*", "1*") >= 0);
+
+ /* Only one required match which should be satisfied. */
+ dev = sd_device_enumerator_get_device_first(e);
+ assert_se(dev);
+ assert_se(sd_device_get_ifindex(dev, &ifindex) >= 0);
+ assert_se(ifindex == 1);
+
+ /* Now let's add a bunch of garbage properties which should not be satisfied. */
+ assert_se(sd_device_enumerator_add_match_property_required(e, "IFINDE*", "hoge") >= 0);
+ assert_se(sd_device_enumerator_add_match_property_required(e, "IFINDE*", NULL) >= 0);
+ assert_se(sd_device_enumerator_add_match_property_required(e, "AAAAA", "BBBB") >= 0);
+ assert_se(sd_device_enumerator_add_match_property_required(e, "FOOOO", NULL) >= 0);
+
+ assert_se(!sd_device_enumerator_get_device_first(e));
+}
+
+static void check_parent_match(sd_device_enumerator *e, sd_device *dev) {
+ const char *syspath;
+ bool found = false;
+
+ assert_se(sd_device_get_syspath(dev, &syspath) >= 0);
+
+ FOREACH_DEVICE(e, d) {
+ const char *s;
+
+ assert_se(sd_device_get_syspath(d, &s) >= 0);
+ if (streq(s, syspath)) {
+ found = true;
+ break;
+ }
+ }
+
+ if (!found) {
+ log_device_debug(dev, "not enumerated, already removed??");
+ /* If the original device not found, then the device should be already removed. */
+ assert_se(access(syspath, F_OK) < 0);
+ assert_se(errno == ENOENT);
+ }
+}
+
+TEST(sd_device_enumerator_add_match_parent) {
+ _cleanup_(sd_device_enumerator_unrefp) sd_device_enumerator *e = NULL;
+ int r;
+
+ assert_se(sd_device_enumerator_new(&e) >= 0);
+ assert_se(sd_device_enumerator_allow_uninitialized(e) >= 0);
+ /* See comments in TEST(sd_device_enumerator_devices). */
+ assert_se(sd_device_enumerator_add_match_subsystem(e, "bdi", false) >= 0);
+ assert_se(sd_device_enumerator_add_nomatch_sysname(e, "loop*") >= 0);
+ assert_se(sd_device_enumerator_add_match_subsystem(e, "net", false) >= 0);
+
+ if (!slow_tests_enabled())
+ assert_se(sd_device_enumerator_add_match_subsystem(e, "block", true) >= 0);
+
+ FOREACH_DEVICE(e, dev) {
+ _cleanup_(sd_device_enumerator_unrefp) sd_device_enumerator *p = NULL;
+ const char *syspath;
+ sd_device *parent;
+
+ assert_se(sd_device_get_syspath(dev, &syspath) >= 0);
+
+ r = sd_device_get_parent(dev, &parent);
+ if (r < 0) {
+ assert_se(ERRNO_IS_DEVICE_ABSENT(r));
+ continue;
+ }
+
+ log_debug("> %s", syspath);
+
+ assert_se(sd_device_enumerator_new(&p) >= 0);
+ assert_se(sd_device_enumerator_allow_uninitialized(p) >= 0);
+ assert_se(sd_device_enumerator_add_match_parent(p, parent) >= 0);
+
+ check_parent_match(p, dev);
+
+ /* If the device does not have subsystem, then it is not enumerated. */
+ r = sd_device_get_subsystem(parent, NULL);
+ if (r < 0) {
+ assert_se(r == -ENOENT);
+ continue;
+ }
+ check_parent_match(p, parent);
+ }
+}
+
+TEST(sd_device_get_child) {
+ _cleanup_(sd_device_enumerator_unrefp) sd_device_enumerator *e = NULL;
+ int r;
+
+ assert_se(sd_device_enumerator_new(&e) >= 0);
+ assert_se(sd_device_enumerator_allow_uninitialized(e) >= 0);
+ /* See comments in TEST(sd_device_enumerator_devices). */
+ assert_se(sd_device_enumerator_add_match_subsystem(e, "bdi", false) >= 0);
+ assert_se(sd_device_enumerator_add_nomatch_sysname(e, "loop*") >= 0);
+ assert_se(sd_device_enumerator_add_match_subsystem(e, "net", false) >= 0);
+
+ if (!slow_tests_enabled())
+ assert_se(sd_device_enumerator_add_match_subsystem(e, "block", true) >= 0);
+
+ FOREACH_DEVICE(e, dev) {
+ const char *syspath, *parent_syspath, *expected_suffix, *suffix;
+ sd_device *parent;
+ bool found = false;
+
+ assert_se(sd_device_get_syspath(dev, &syspath) >= 0);
+
+ r = sd_device_get_parent(dev, &parent);
+ if (r < 0) {
+ assert_se(ERRNO_IS_DEVICE_ABSENT(r));
+ continue;
+ }
+
+ assert_se(sd_device_get_syspath(parent, &parent_syspath) >= 0);
+ assert_se(expected_suffix = path_startswith(syspath, parent_syspath));
+
+ log_debug("> %s", syspath);
+
+ FOREACH_DEVICE_CHILD_WITH_SUFFIX(parent, child, suffix) {
+ const char *s;
+
+ assert_se(child);
+ assert_se(suffix);
+
+ if (!streq(suffix, expected_suffix))
+ continue;
+
+ assert_se(sd_device_get_syspath(child, &s) >= 0);
+ assert_se(streq(s, syspath));
+ found = true;
+ break;
+ }
+ assert_se(found);
+ }
+}
+
+TEST(sd_device_new_from_nulstr) {
+ const char *devlinks =
+ "/dev/disk/by-partuuid/1290d63a-42cc-4c71-b87c-xxxxxxxxxxxx\0"
+ "/dev/disk/by-path/pci-0000:00:0f.0-scsi-0:0:0:0-part3\0"
+ "/dev/disk/by-label/Arch\\x20Linux\0"
+ "/dev/disk/by-uuid/a07b87e5-4af5-4a59-bde9-yyyyyyyyyyyy\0"
+ "/dev/disk/by-partlabel/Arch\\x20Linux\0"
+ "\0";
+
+ _cleanup_(sd_device_unrefp) sd_device *device = NULL, *from_nulstr = NULL;
+ _cleanup_free_ char *nulstr_copy = NULL;
+ const char *nulstr;
+ size_t len;
+
+ assert_se(sd_device_new_from_syspath(&device, "/sys/class/net/lo") >= 0);
+
+ /* Yeah, of course, setting devlink to the loopback interface is nonsense. But this is just a
+ * test for generating and parsing nulstr. For issue #17772. */
+ NULSTR_FOREACH(devlink, devlinks) {
+ log_device_info(device, "setting devlink: %s", devlink);
+ assert_se(device_add_devlink(device, devlink) >= 0);
+ assert_se(set_contains(device->devlinks, devlink));
+ }
+
+ /* For issue #23799 */
+ assert_se(device_add_tag(device, "tag1", false) >= 0);
+ assert_se(device_add_tag(device, "tag2", false) >= 0);
+ assert_se(device_add_tag(device, "current-tag1", true) >= 0);
+ assert_se(device_add_tag(device, "current-tag2", true) >= 0);
+
+ /* These properties are necessary for device_new_from_nulstr(). See device_verify(). */
+ assert_se(device_add_property_internal(device, "SEQNUM", "1") >= 0);
+ assert_se(device_add_property_internal(device, "ACTION", "change") >= 0);
+
+ assert_se(device_get_properties_nulstr(device, &nulstr, &len) >= 0);
+ assert_se(nulstr_copy = newdup(char, nulstr, len));
+ assert_se(device_new_from_nulstr(&from_nulstr, nulstr_copy, len) >= 0);
+
+ assert_se(sd_device_has_tag(from_nulstr, "tag1") == 1);
+ assert_se(sd_device_has_tag(from_nulstr, "tag2") == 1);
+ assert_se(sd_device_has_tag(from_nulstr, "current-tag1") == 1);
+ assert_se(sd_device_has_tag(from_nulstr, "current-tag2") == 1);
+ assert_se(sd_device_has_current_tag(from_nulstr, "tag1") == 0);
+ assert_se(sd_device_has_current_tag(from_nulstr, "tag2") == 0);
+ assert_se(sd_device_has_current_tag(from_nulstr, "current-tag1") == 1);
+ assert_se(sd_device_has_current_tag(from_nulstr, "current-tag2") == 1);
+
+ NULSTR_FOREACH(devlink, devlinks) {
+ log_device_info(from_nulstr, "checking devlink: %s", devlink);
+ assert_se(set_contains(from_nulstr->devlinks, devlink));
+ }
+}
+
+TEST(sd_device_new_from_path) {
+ _cleanup_(sd_device_enumerator_unrefp) sd_device_enumerator *e = NULL;
+ _cleanup_(rm_rf_physical_and_freep) char *tmpdir = NULL;
+ int r;
+
+ assert_se(mkdtemp_malloc("/tmp/test-sd-device.XXXXXXX", &tmpdir) >= 0);
+
+ assert_se(sd_device_enumerator_new(&e) >= 0);
+ assert_se(sd_device_enumerator_allow_uninitialized(e) >= 0);
+ assert_se(sd_device_enumerator_add_match_subsystem(e, "block", true) >= 0);
+ assert_se(sd_device_enumerator_add_nomatch_sysname(e, "loop*") >= 0);
+ assert_se(sd_device_enumerator_add_match_property(e, "DEVNAME", "*") >= 0);
+
+ FOREACH_DEVICE(e, dev) {
+ _cleanup_(sd_device_unrefp) sd_device *d = NULL;
+ const char *syspath, *devpath, *sysname, *s;
+ _cleanup_free_ char *path = NULL;
+
+ assert_se(sd_device_get_sysname(dev, &sysname) >= 0);
+
+ log_debug("%s(%s)", __func__, sysname);
+
+ assert_se(sd_device_get_syspath(dev, &syspath) >= 0);
+ assert_se(sd_device_new_from_path(&d, syspath) >= 0);
+ assert_se(sd_device_get_syspath(d, &s) >= 0);
+ assert_se(streq(s, syspath));
+ d = sd_device_unref(d);
+
+ assert_se(sd_device_get_devname(dev, &devpath) >= 0);
+ r = sd_device_new_from_path(&d, devpath);
+ if (r >= 0) {
+ assert_se(sd_device_get_syspath(d, &s) >= 0);
+ assert_se(streq(s, syspath));
+ d = sd_device_unref(d);
+ } else
+ assert_se(r == -ENODEV || ERRNO_IS_PRIVILEGE(r));
+
+ assert_se(path = path_join(tmpdir, sysname));
+ assert_se(symlink(syspath, path) >= 0);
+ assert_se(sd_device_new_from_path(&d, path) >= 0);
+ assert_se(sd_device_get_syspath(d, &s) >= 0);
+ assert_se(streq(s, syspath));
+ }
+}
+
+static void test_devname_from_devnum_one(const char *path) {
+ _cleanup_free_ char *resolved = NULL;
+ struct stat st;
+
+ log_debug("> %s", path);
+
+ if (stat(path, &st) < 0) {
+ assert_se(errno == ENOENT);
+ log_notice("Path %s not found, skipping test", path);
+ return;
+ }
+
+ assert_se(devname_from_devnum(st.st_mode, st.st_rdev, &resolved) >= 0);
+ assert_se(path_equal(path, resolved));
+ resolved = mfree(resolved);
+ assert_se(devname_from_stat_rdev(&st, &resolved) >= 0);
+ assert_se(path_equal(path, resolved));
+}
+
+TEST(devname_from_devnum) {
+ test_devname_from_devnum_one("/dev/null");
+ test_devname_from_devnum_one("/dev/zero");
+ test_devname_from_devnum_one("/dev/full");
+ test_devname_from_devnum_one("/dev/random");
+ test_devname_from_devnum_one("/dev/urandom");
+ test_devname_from_devnum_one("/dev/tty");
+
+ if (is_device_node("/run/systemd/inaccessible/blk") > 0) {
+ test_devname_from_devnum_one("/run/systemd/inaccessible/chr");
+ test_devname_from_devnum_one("/run/systemd/inaccessible/blk");
+ }
+}
+
+DEFINE_TEST_MAIN(LOG_INFO);
diff --git a/src/libsystemd/sd-event/event-source.h b/src/libsystemd/sd-event/event-source.h
new file mode 100644
index 0000000..f4e38d7
--- /dev/null
+++ b/src/libsystemd/sd-event/event-source.h
@@ -0,0 +1,239 @@
+#pragma once
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <sys/epoll.h>
+#include <sys/timerfd.h>
+#include <sys/wait.h>
+
+#include "sd-event.h"
+
+#include "hashmap.h"
+#include "inotify-util.h"
+#include "list.h"
+#include "prioq.h"
+#include "ratelimit.h"
+
+typedef enum EventSourceType {
+ SOURCE_IO,
+ SOURCE_TIME_REALTIME,
+ SOURCE_TIME_BOOTTIME,
+ SOURCE_TIME_MONOTONIC,
+ SOURCE_TIME_REALTIME_ALARM,
+ SOURCE_TIME_BOOTTIME_ALARM,
+ SOURCE_SIGNAL,
+ SOURCE_CHILD,
+ SOURCE_DEFER,
+ SOURCE_POST,
+ SOURCE_EXIT,
+ SOURCE_WATCHDOG,
+ SOURCE_INOTIFY,
+ SOURCE_MEMORY_PRESSURE,
+ _SOURCE_EVENT_SOURCE_TYPE_MAX,
+ _SOURCE_EVENT_SOURCE_TYPE_INVALID = -EINVAL,
+} EventSourceType;
+
+/* All objects we use in epoll events start with this value, so that
+ * we know how to dispatch it */
+typedef enum WakeupType {
+ WAKEUP_NONE,
+ WAKEUP_EVENT_SOURCE, /* either I/O or pidfd wakeup */
+ WAKEUP_CLOCK_DATA,
+ WAKEUP_SIGNAL_DATA,
+ WAKEUP_INOTIFY_DATA,
+ _WAKEUP_TYPE_MAX,
+ _WAKEUP_TYPE_INVALID = -EINVAL,
+} WakeupType;
+
+struct inode_data;
+
+struct sd_event_source {
+ WakeupType wakeup;
+
+ unsigned n_ref;
+
+ sd_event *event;
+ void *userdata;
+ sd_event_handler_t prepare;
+
+ char *description;
+
+ EventSourceType type;
+ signed int enabled:3;
+ bool pending:1;
+ bool dispatching:1;
+ bool floating:1;
+ bool exit_on_failure:1;
+ bool ratelimited:1;
+
+ int64_t priority;
+ unsigned pending_index;
+ unsigned prepare_index;
+ uint64_t pending_iteration;
+ uint64_t prepare_iteration;
+
+ sd_event_destroy_t destroy_callback;
+ sd_event_handler_t ratelimit_expire_callback;
+
+ LIST_FIELDS(sd_event_source, sources);
+
+ RateLimit rate_limit;
+
+ /* These are primarily fields relevant for time event sources, but since any event source can
+ * effectively become one when rate-limited, this is part of the common fields. */
+ unsigned earliest_index;
+ unsigned latest_index;
+
+ union {
+ struct {
+ sd_event_io_handler_t callback;
+ int fd;
+ uint32_t events;
+ uint32_t revents;
+ bool registered:1;
+ bool owned:1;
+ } io;
+ struct {
+ sd_event_time_handler_t callback;
+ usec_t next, accuracy;
+ } time;
+ struct {
+ sd_event_signal_handler_t callback;
+ struct signalfd_siginfo siginfo;
+ int sig;
+ bool unblock;
+ } signal;
+ struct {
+ sd_event_child_handler_t callback;
+ siginfo_t siginfo;
+ pid_t pid;
+ int options;
+ int pidfd;
+ bool registered:1; /* whether the pidfd is registered in the epoll */
+ bool pidfd_owned:1; /* close pidfd when event source is freed */
+ bool process_owned:1; /* kill+reap process when event source is freed */
+ bool exited:1; /* true if process exited (i.e. if there's value in SIGKILLing it if we want to get rid of it) */
+ bool waited:1; /* true if process was waited for (i.e. if there's value in waitid(P_PID)'ing it if we want to get rid of it) */
+ } child;
+ struct {
+ sd_event_handler_t callback;
+ } defer;
+ struct {
+ sd_event_handler_t callback;
+ } post;
+ struct {
+ sd_event_handler_t callback;
+ unsigned prioq_index;
+ } exit;
+ struct {
+ sd_event_inotify_handler_t callback;
+ uint32_t mask;
+ struct inode_data *inode_data;
+ LIST_FIELDS(sd_event_source, by_inode_data);
+ } inotify;
+ struct {
+ int fd;
+ sd_event_handler_t callback;
+ void *write_buffer;
+ size_t write_buffer_size;
+ uint32_t events, revents;
+ LIST_FIELDS(sd_event_source, write_list);
+ bool registered:1;
+ bool locked:1;
+ bool in_write_list:1;
+ } memory_pressure;
+ };
+};
+
+struct clock_data {
+ WakeupType wakeup;
+ int fd;
+
+ /* For all clocks we maintain two priority queues each, one
+ * ordered for the earliest times the events may be
+ * dispatched, and one ordered by the latest times they must
+ * have been dispatched. The range between the top entries in
+ * the two prioqs is the time window we can freely schedule
+ * wakeups in */
+
+ Prioq *earliest;
+ Prioq *latest;
+ usec_t next;
+
+ bool needs_rearm:1;
+};
+
+struct signal_data {
+ WakeupType wakeup;
+
+ /* For each priority we maintain one signal fd, so that we
+ * only have to dequeue a single event per priority at a
+ * time. */
+
+ int fd;
+ int64_t priority;
+ sigset_t sigset;
+ sd_event_source *current;
+};
+
+/* A structure listing all event sources currently watching a specific inode */
+struct inode_data {
+ /* The identifier for the inode, the combination of the .st_dev + .st_ino fields of the file */
+ ino_t ino;
+ dev_t dev;
+
+ /* An fd of the inode to watch. The fd is kept open until the next iteration of the loop, so that we can
+ * rearrange the priority still until then, as we need the original inode to change the priority as we need to
+ * add a watch descriptor to the right inotify for the priority which we can only do if we have a handle to the
+ * original inode. We keep a list of all inode_data objects with an open fd in the to_close list (see below) of
+ * the sd-event object, so that it is efficient to close everything, before entering the next event loop
+ * iteration. */
+ int fd;
+
+ /* The inotify "watch descriptor" */
+ int wd;
+
+ /* The combination of the mask of all inotify watches on this inode we manage. This is also the mask that has
+ * most recently been set on the watch descriptor. */
+ uint32_t combined_mask;
+
+ /* All event sources subscribed to this inode */
+ LIST_HEAD(sd_event_source, event_sources);
+
+ /* The inotify object we watch this inode with */
+ struct inotify_data *inotify_data;
+
+ /* A linked list of all inode data objects with fds to close (see above) */
+ LIST_FIELDS(struct inode_data, to_close);
+};
+
+/* A structure encapsulating an inotify fd */
+struct inotify_data {
+ WakeupType wakeup;
+
+ /* For each priority we maintain one inotify fd, so that we only have to dequeue a single event per priority at
+ * a time */
+
+ int fd;
+ int64_t priority;
+
+ Hashmap *inodes; /* The inode_data structures keyed by dev+ino */
+ Hashmap *wd; /* The inode_data structures keyed by the watch descriptor for each */
+
+ /* The buffer we read inotify events into */
+ union inotify_event_buffer buffer;
+ size_t buffer_filled; /* fill level of the buffer */
+
+ /* How many event sources are currently marked pending for this inotify. We won't read new events off the
+ * inotify fd as long as there are still pending events on the inotify (because we have no strategy of queuing
+ * the events locally if they can't be coalesced). */
+ unsigned n_pending;
+
+ /* If this counter is non-zero, don't GC the inotify data object even if not used to watch any inode
+ * anymore. This is useful to pin the object for a bit longer, after the last event source needing it
+ * is gone. */
+ unsigned n_busy;
+
+ /* A linked list of all inotify objects with data already read, that still need processing. We keep this list
+ * to make it efficient to figure out what inotify objects to process data on next. */
+ LIST_FIELDS(struct inotify_data, buffered);
+};
diff --git a/src/libsystemd/sd-event/event-util.c b/src/libsystemd/sd-event/event-util.c
new file mode 100644
index 0000000..a310122
--- /dev/null
+++ b/src/libsystemd/sd-event/event-util.c
@@ -0,0 +1,153 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+
+#include "event-source.h"
+#include "event-util.h"
+#include "fd-util.h"
+#include "log.h"
+#include "string-util.h"
+
+int event_reset_time(
+ sd_event *e,
+ sd_event_source **s,
+ clockid_t clock,
+ uint64_t usec,
+ uint64_t accuracy,
+ sd_event_time_handler_t callback,
+ void *userdata,
+ int64_t priority,
+ const char *description,
+ bool force_reset) {
+
+ bool created = false;
+ int enabled, r;
+ clockid_t c;
+
+ assert(e);
+ assert(s);
+
+ if (*s) {
+ if (!force_reset) {
+ r = sd_event_source_get_enabled(*s, &enabled);
+ if (r < 0)
+ return log_debug_errno(r, "sd-event: Failed to query whether event source \"%s\" is enabled or not: %m",
+ strna((*s)->description ?: description));
+
+ if (enabled != SD_EVENT_OFF)
+ return 0;
+ }
+
+ r = sd_event_source_get_time_clock(*s, &c);
+ if (r < 0)
+ return log_debug_errno(r, "sd-event: Failed to get clock id of event source \"%s\": %m", strna((*s)->description ?: description));
+
+ if (c != clock)
+ return log_debug_errno(SYNTHETIC_ERRNO(EINVAL),
+ "sd-event: Current clock id %i of event source \"%s\" is different from specified one %i.",
+ (int)c,
+ strna((*s)->description ?: description),
+ (int)clock);
+
+ r = sd_event_source_set_time(*s, usec);
+ if (r < 0)
+ return log_debug_errno(r, "sd-event: Failed to set time for event source \"%s\": %m", strna((*s)->description ?: description));
+
+ r = sd_event_source_set_time_accuracy(*s, accuracy);
+ if (r < 0)
+ return log_debug_errno(r, "sd-event: Failed to set accuracy for event source \"%s\": %m", strna((*s)->description ?: description));
+
+ /* callback function is not updated, as we do not have sd_event_source_set_time_callback(). */
+
+ (void) sd_event_source_set_userdata(*s, userdata);
+
+ r = sd_event_source_set_enabled(*s, SD_EVENT_ONESHOT);
+ if (r < 0)
+ return log_debug_errno(r, "sd-event: Failed to enable event source \"%s\": %m", strna((*s)->description ?: description));
+ } else {
+ r = sd_event_add_time(e, s, clock, usec, accuracy, callback, userdata);
+ if (r < 0)
+ return log_debug_errno(r, "sd-event: Failed to create timer event \"%s\": %m", strna(description));
+
+ created = true;
+ }
+
+ r = sd_event_source_set_priority(*s, priority);
+ if (r < 0)
+ return log_debug_errno(r, "sd-event: Failed to set priority for event source \"%s\": %m", strna((*s)->description ?: description));
+
+ if (description) {
+ r = sd_event_source_set_description(*s, description);
+ if (r < 0)
+ return log_debug_errno(r, "sd-event: Failed to set description for event source \"%s\": %m", description);
+ }
+
+ return created;
+}
+
+int event_reset_time_relative(
+ sd_event *e,
+ sd_event_source **s,
+ clockid_t clock,
+ uint64_t usec,
+ uint64_t accuracy,
+ sd_event_time_handler_t callback,
+ void *userdata,
+ int64_t priority,
+ const char *description,
+ bool force_reset) {
+
+ int r;
+
+ assert(e);
+
+ if (usec > 0) {
+ usec_t usec_now;
+
+ r = sd_event_now(e, clock, &usec_now);
+ if (r < 0)
+ return log_debug_errno(r, "sd-event: Failed to get the current time: %m");
+
+ usec = usec_add(usec_now, usec);
+ }
+
+ return event_reset_time(e, s, clock, usec, accuracy, callback, userdata, priority, description, force_reset);
+}
+
+int event_add_time_change(sd_event *e, sd_event_source **ret, sd_event_io_handler_t callback, void *userdata) {
+ _cleanup_(sd_event_source_unrefp) sd_event_source *s = NULL;
+ _cleanup_close_ int fd = -EBADF;
+ int r;
+
+ assert(e);
+
+ /* Allocates an IO event source that gets woken up whenever the clock changes. Needs to be recreated on each event */
+
+ fd = time_change_fd();
+ if (fd < 0)
+ return fd;
+
+ r = sd_event_add_io(e, &s, fd, EPOLLIN, callback, userdata);
+ if (r < 0)
+ return r;
+
+ r = sd_event_source_set_io_fd_own(s, true);
+ if (r < 0)
+ return r;
+
+ TAKE_FD(fd);
+
+ r = sd_event_source_set_description(s, "time-change");
+ if (r < 0)
+ return r;
+
+ if (ret)
+ *ret = TAKE_PTR(s);
+ else {
+ r = sd_event_source_set_floating(s, true);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
diff --git a/src/libsystemd/sd-event/event-util.h b/src/libsystemd/sd-event/event-util.h
new file mode 100644
index 0000000..c185584
--- /dev/null
+++ b/src/libsystemd/sd-event/event-util.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+
+#include "sd-event.h"
+
+int event_reset_time(
+ sd_event *e,
+ sd_event_source **s,
+ clockid_t clock,
+ uint64_t usec,
+ uint64_t accuracy,
+ sd_event_time_handler_t callback,
+ void *userdata,
+ int64_t priority,
+ const char *description,
+ bool force_reset);
+int event_reset_time_relative(
+ sd_event *e,
+ sd_event_source **s,
+ clockid_t clock,
+ uint64_t usec,
+ uint64_t accuracy,
+ sd_event_time_handler_t callback,
+ void *userdata,
+ int64_t priority,
+ const char *description,
+ bool force_reset);
+static inline int event_source_disable(sd_event_source *s) {
+ return sd_event_source_set_enabled(s, SD_EVENT_OFF);
+}
+
+int event_add_time_change(sd_event *e, sd_event_source **ret, sd_event_io_handler_t callback, void *userdata);
diff --git a/src/libsystemd/sd-event/sd-event.c b/src/libsystemd/sd-event/sd-event.c
new file mode 100644
index 0000000..288798a
--- /dev/null
+++ b/src/libsystemd/sd-event/sd-event.c
@@ -0,0 +1,5357 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <sys/epoll.h>
+#include <sys/timerfd.h>
+#include <sys/wait.h>
+
+#include "sd-daemon.h"
+#include "sd-event.h"
+#include "sd-id128.h"
+#include "sd-messages.h"
+
+#include "alloc-util.h"
+#include "env-util.h"
+#include "event-source.h"
+#include "fd-util.h"
+#include "fs-util.h"
+#include "glyph-util.h"
+#include "hashmap.h"
+#include "hexdecoct.h"
+#include "list.h"
+#include "logarithm.h"
+#include "macro.h"
+#include "mallinfo-util.h"
+#include "memory-util.h"
+#include "missing_magic.h"
+#include "missing_syscall.h"
+#include "missing_threads.h"
+#include "origin-id.h"
+#include "path-util.h"
+#include "prioq.h"
+#include "process-util.h"
+#include "psi-util.h"
+#include "set.h"
+#include "signal-util.h"
+#include "socket-util.h"
+#include "stat-util.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "strxcpyx.h"
+#include "time-util.h"
+
+#define DEFAULT_ACCURACY_USEC (250 * USEC_PER_MSEC)
+
+static bool EVENT_SOURCE_WATCH_PIDFD(sd_event_source *s) {
+ /* Returns true if this is a PID event source and can be implemented by watching EPOLLIN */
+ return s &&
+ s->type == SOURCE_CHILD &&
+ s->child.pidfd >= 0 &&
+ s->child.options == WEXITED;
+}
+
+static bool event_source_is_online(sd_event_source *s) {
+ assert(s);
+ return s->enabled != SD_EVENT_OFF && !s->ratelimited;
+}
+
+static bool event_source_is_offline(sd_event_source *s) {
+ assert(s);
+ return s->enabled == SD_EVENT_OFF || s->ratelimited;
+}
+
+static const char* const event_source_type_table[_SOURCE_EVENT_SOURCE_TYPE_MAX] = {
+ [SOURCE_IO] = "io",
+ [SOURCE_TIME_REALTIME] = "realtime",
+ [SOURCE_TIME_BOOTTIME] = "boottime",
+ [SOURCE_TIME_MONOTONIC] = "monotonic",
+ [SOURCE_TIME_REALTIME_ALARM] = "realtime-alarm",
+ [SOURCE_TIME_BOOTTIME_ALARM] = "boottime-alarm",
+ [SOURCE_SIGNAL] = "signal",
+ [SOURCE_CHILD] = "child",
+ [SOURCE_DEFER] = "defer",
+ [SOURCE_POST] = "post",
+ [SOURCE_EXIT] = "exit",
+ [SOURCE_WATCHDOG] = "watchdog",
+ [SOURCE_INOTIFY] = "inotify",
+ [SOURCE_MEMORY_PRESSURE] = "memory-pressure",
+};
+
+DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(event_source_type, int);
+
+#define EVENT_SOURCE_IS_TIME(t) \
+ IN_SET((t), \
+ SOURCE_TIME_REALTIME, \
+ SOURCE_TIME_BOOTTIME, \
+ SOURCE_TIME_MONOTONIC, \
+ SOURCE_TIME_REALTIME_ALARM, \
+ SOURCE_TIME_BOOTTIME_ALARM)
+
+#define EVENT_SOURCE_CAN_RATE_LIMIT(t) \
+ IN_SET((t), \
+ SOURCE_IO, \
+ SOURCE_TIME_REALTIME, \
+ SOURCE_TIME_BOOTTIME, \
+ SOURCE_TIME_MONOTONIC, \
+ SOURCE_TIME_REALTIME_ALARM, \
+ SOURCE_TIME_BOOTTIME_ALARM, \
+ SOURCE_SIGNAL, \
+ SOURCE_DEFER, \
+ SOURCE_INOTIFY, \
+ SOURCE_MEMORY_PRESSURE)
+
+/* This is used to assert that we didn't pass an unexpected source type to event_source_time_prioq_put().
+ * Time sources and ratelimited sources can be passed, so effectively this is the same as the
+ * EVENT_SOURCE_CAN_RATE_LIMIT() macro. */
+#define EVENT_SOURCE_USES_TIME_PRIOQ(t) EVENT_SOURCE_CAN_RATE_LIMIT(t)
+
+struct sd_event {
+ unsigned n_ref;
+
+ int epoll_fd;
+ int watchdog_fd;
+
+ Prioq *pending;
+ Prioq *prepare;
+
+ /* timerfd_create() only supports these five clocks so far. We
+ * can add support for more clocks when the kernel learns to
+ * deal with them, too. */
+ struct clock_data realtime;
+ struct clock_data boottime;
+ struct clock_data monotonic;
+ struct clock_data realtime_alarm;
+ struct clock_data boottime_alarm;
+
+ usec_t perturb;
+
+ sd_event_source **signal_sources; /* indexed by signal number */
+ Hashmap *signal_data; /* indexed by priority */
+
+ Hashmap *child_sources;
+ unsigned n_online_child_sources;
+
+ Set *post_sources;
+
+ Prioq *exit;
+
+ Hashmap *inotify_data; /* indexed by priority */
+
+ /* A list of inode structures that still have an fd open, that we need to close before the next loop iteration */
+ LIST_HEAD(struct inode_data, inode_data_to_close_list);
+
+ /* A list of inotify objects that already have events buffered which aren't processed yet */
+ LIST_HEAD(struct inotify_data, buffered_inotify_data_list);
+
+ /* A list of memory pressure event sources that still need their subscription string written */
+ LIST_HEAD(sd_event_source, memory_pressure_write_list);
+
+ uint64_t origin_id;
+
+ uint64_t iteration;
+ triple_timestamp timestamp;
+ int state;
+
+ bool exit_requested:1;
+ bool need_process_child:1;
+ bool watchdog:1;
+ bool profile_delays:1;
+
+ int exit_code;
+
+ pid_t tid;
+ sd_event **default_event_ptr;
+
+ usec_t watchdog_last, watchdog_period;
+
+ unsigned n_sources;
+
+ struct epoll_event *event_queue;
+
+ LIST_HEAD(sd_event_source, sources);
+
+ sd_event_source *sigint_event_source, *sigterm_event_source;
+
+ usec_t last_run_usec, last_log_usec;
+ unsigned delays[sizeof(usec_t) * 8];
+};
+
+DEFINE_PRIVATE_ORIGIN_ID_HELPERS(sd_event, event);
+
+static thread_local sd_event *default_event = NULL;
+
+static void source_disconnect(sd_event_source *s);
+static void event_gc_inode_data(sd_event *e, struct inode_data *d);
+
+static sd_event *event_resolve(sd_event *e) {
+ return e == SD_EVENT_DEFAULT ? default_event : e;
+}
+
+static int pending_prioq_compare(const void *a, const void *b) {
+ const sd_event_source *x = a, *y = b;
+ int r;
+
+ assert(x->pending);
+ assert(y->pending);
+
+ /* Enabled ones first */
+ r = CMP(x->enabled == SD_EVENT_OFF, y->enabled == SD_EVENT_OFF);
+ if (r != 0)
+ return r;
+
+ /* Non rate-limited ones first. */
+ r = CMP(!!x->ratelimited, !!y->ratelimited);
+ if (r != 0)
+ return r;
+
+ /* Lower priority values first */
+ r = CMP(x->priority, y->priority);
+ if (r != 0)
+ return r;
+
+ /* Older entries first */
+ return CMP(x->pending_iteration, y->pending_iteration);
+}
+
+static int prepare_prioq_compare(const void *a, const void *b) {
+ const sd_event_source *x = a, *y = b;
+ int r;
+
+ assert(x->prepare);
+ assert(y->prepare);
+
+ /* Enabled ones first */
+ r = CMP(x->enabled == SD_EVENT_OFF, y->enabled == SD_EVENT_OFF);
+ if (r != 0)
+ return r;
+
+ /* Non rate-limited ones first. */
+ r = CMP(!!x->ratelimited, !!y->ratelimited);
+ if (r != 0)
+ return r;
+
+ /* Move most recently prepared ones last, so that we can stop
+ * preparing as soon as we hit one that has already been
+ * prepared in the current iteration */
+ r = CMP(x->prepare_iteration, y->prepare_iteration);
+ if (r != 0)
+ return r;
+
+ /* Lower priority values first */
+ return CMP(x->priority, y->priority);
+}
+
+static usec_t time_event_source_next(const sd_event_source *s) {
+ assert(s);
+
+ /* We have two kinds of event sources that have elapsation times associated with them: the actual
+ * time based ones and the ones for which a ratelimit can be in effect (where we want to be notified
+ * once the ratelimit time window ends). Let's return the next elapsing time depending on what we are
+ * looking at here. */
+
+ if (s->ratelimited) { /* If rate-limited the next elapsation is when the ratelimit time window ends */
+ assert(s->rate_limit.begin != 0);
+ assert(s->rate_limit.interval != 0);
+ return usec_add(s->rate_limit.begin, s->rate_limit.interval);
+ }
+
+ /* Otherwise this must be a time event source, if not ratelimited */
+ if (EVENT_SOURCE_IS_TIME(s->type))
+ return s->time.next;
+
+ return USEC_INFINITY;
+}
+
+static usec_t time_event_source_latest(const sd_event_source *s) {
+ assert(s);
+
+ if (s->ratelimited) { /* For ratelimited stuff the earliest and the latest time shall actually be the
+ * same, as we should avoid adding additional inaccuracy on an inaccuracy time
+ * window */
+ assert(s->rate_limit.begin != 0);
+ assert(s->rate_limit.interval != 0);
+ return usec_add(s->rate_limit.begin, s->rate_limit.interval);
+ }
+
+ /* Must be a time event source, if not ratelimited */
+ if (EVENT_SOURCE_IS_TIME(s->type))
+ return usec_add(s->time.next, s->time.accuracy);
+
+ return USEC_INFINITY;
+}
+
+static bool event_source_timer_candidate(const sd_event_source *s) {
+ assert(s);
+
+ /* Returns true for event sources that either are not pending yet (i.e. where it's worth to mark them pending)
+ * or which are currently ratelimited (i.e. where it's worth leaving the ratelimited state) */
+ return !s->pending || s->ratelimited;
+}
+
+static int time_prioq_compare(const void *a, const void *b, usec_t (*time_func)(const sd_event_source *s)) {
+ const sd_event_source *x = a, *y = b;
+ int r;
+
+ /* Enabled ones first */
+ r = CMP(x->enabled == SD_EVENT_OFF, y->enabled == SD_EVENT_OFF);
+ if (r != 0)
+ return r;
+
+ /* Order "non-pending OR ratelimited" before "pending AND not-ratelimited" */
+ r = CMP(!event_source_timer_candidate(x), !event_source_timer_candidate(y));
+ if (r != 0)
+ return r;
+
+ /* Order by time */
+ return CMP(time_func(x), time_func(y));
+}
+
+static int earliest_time_prioq_compare(const void *a, const void *b) {
+ return time_prioq_compare(a, b, time_event_source_next);
+}
+
+static int latest_time_prioq_compare(const void *a, const void *b) {
+ return time_prioq_compare(a, b, time_event_source_latest);
+}
+
+static int exit_prioq_compare(const void *a, const void *b) {
+ const sd_event_source *x = a, *y = b;
+ int r;
+
+ assert(x->type == SOURCE_EXIT);
+ assert(y->type == SOURCE_EXIT);
+
+ /* Enabled ones first */
+ r = CMP(x->enabled == SD_EVENT_OFF, y->enabled == SD_EVENT_OFF);
+ if (r != 0)
+ return r;
+
+ /* Lower priority values first */
+ return CMP(x->priority, y->priority);
+}
+
+static void free_clock_data(struct clock_data *d) {
+ assert(d);
+ assert(d->wakeup == WAKEUP_CLOCK_DATA);
+
+ safe_close(d->fd);
+ prioq_free(d->earliest);
+ prioq_free(d->latest);
+}
+
+static sd_event *event_free(sd_event *e) {
+ sd_event_source *s;
+
+ assert(e);
+
+ e->sigterm_event_source = sd_event_source_unref(e->sigterm_event_source);
+ e->sigint_event_source = sd_event_source_unref(e->sigint_event_source);
+
+ while ((s = e->sources)) {
+ assert(s->floating);
+ source_disconnect(s);
+ sd_event_source_unref(s);
+ }
+
+ assert(e->n_sources == 0);
+
+ if (e->default_event_ptr)
+ *(e->default_event_ptr) = NULL;
+
+ safe_close(e->epoll_fd);
+ safe_close(e->watchdog_fd);
+
+ free_clock_data(&e->realtime);
+ free_clock_data(&e->boottime);
+ free_clock_data(&e->monotonic);
+ free_clock_data(&e->realtime_alarm);
+ free_clock_data(&e->boottime_alarm);
+
+ prioq_free(e->pending);
+ prioq_free(e->prepare);
+ prioq_free(e->exit);
+
+ free(e->signal_sources);
+ hashmap_free(e->signal_data);
+
+ hashmap_free(e->inotify_data);
+
+ hashmap_free(e->child_sources);
+ set_free(e->post_sources);
+
+ free(e->event_queue);
+
+ return mfree(e);
+}
+
+_public_ int sd_event_new(sd_event** ret) {
+ sd_event *e;
+ int r;
+
+ assert_return(ret, -EINVAL);
+
+ e = new(sd_event, 1);
+ if (!e)
+ return -ENOMEM;
+
+ *e = (sd_event) {
+ .n_ref = 1,
+ .epoll_fd = -EBADF,
+ .watchdog_fd = -EBADF,
+ .realtime.wakeup = WAKEUP_CLOCK_DATA,
+ .realtime.fd = -EBADF,
+ .realtime.next = USEC_INFINITY,
+ .boottime.wakeup = WAKEUP_CLOCK_DATA,
+ .boottime.fd = -EBADF,
+ .boottime.next = USEC_INFINITY,
+ .monotonic.wakeup = WAKEUP_CLOCK_DATA,
+ .monotonic.fd = -EBADF,
+ .monotonic.next = USEC_INFINITY,
+ .realtime_alarm.wakeup = WAKEUP_CLOCK_DATA,
+ .realtime_alarm.fd = -EBADF,
+ .realtime_alarm.next = USEC_INFINITY,
+ .boottime_alarm.wakeup = WAKEUP_CLOCK_DATA,
+ .boottime_alarm.fd = -EBADF,
+ .boottime_alarm.next = USEC_INFINITY,
+ .perturb = USEC_INFINITY,
+ .origin_id = origin_id_query(),
+ };
+
+ r = prioq_ensure_allocated(&e->pending, pending_prioq_compare);
+ if (r < 0)
+ goto fail;
+
+ e->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
+ if (e->epoll_fd < 0) {
+ r = -errno;
+ goto fail;
+ }
+
+ e->epoll_fd = fd_move_above_stdio(e->epoll_fd);
+
+ if (secure_getenv("SD_EVENT_PROFILE_DELAYS")) {
+ log_debug("Event loop profiling enabled. Logarithmic histogram of event loop iterations in the range 2^0 %s 2^63 us will be logged every 5s.",
+ special_glyph(SPECIAL_GLYPH_ELLIPSIS));
+ e->profile_delays = true;
+ }
+
+ *ret = e;
+ return 0;
+
+fail:
+ event_free(e);
+ return r;
+}
+
+/* Define manually so we can add the origin check */
+_public_ sd_event *sd_event_ref(sd_event *e) {
+ if (!e)
+ return NULL;
+ if (event_origin_changed(e))
+ return NULL;
+
+ e->n_ref++;
+
+ return e;
+}
+
+_public_ sd_event* sd_event_unref(sd_event *e) {
+ if (!e)
+ return NULL;
+ if (event_origin_changed(e))
+ return NULL;
+
+ assert(e->n_ref > 0);
+ if (--e->n_ref > 0)
+ return NULL;
+
+ return event_free(e);
+}
+
+#define PROTECT_EVENT(e) \
+ _unused_ _cleanup_(sd_event_unrefp) sd_event *_ref = sd_event_ref(e);
+
+_public_ sd_event_source* sd_event_source_disable_unref(sd_event_source *s) {
+ if (s)
+ (void) sd_event_source_set_enabled(s, SD_EVENT_OFF);
+ return sd_event_source_unref(s);
+}
+
+static void source_io_unregister(sd_event_source *s) {
+ assert(s);
+ assert(s->type == SOURCE_IO);
+
+ if (event_origin_changed(s->event))
+ return;
+
+ if (!s->io.registered)
+ return;
+
+ if (epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, s->io.fd, NULL) < 0)
+ log_debug_errno(errno, "Failed to remove source %s (type %s) from epoll, ignoring: %m",
+ strna(s->description), event_source_type_to_string(s->type));
+
+ s->io.registered = false;
+}
+
+static int source_io_register(
+ sd_event_source *s,
+ int enabled,
+ uint32_t events) {
+
+ assert(s);
+ assert(s->type == SOURCE_IO);
+ assert(enabled != SD_EVENT_OFF);
+
+ struct epoll_event ev = {
+ .events = events | (enabled == SD_EVENT_ONESHOT ? EPOLLONESHOT : 0),
+ .data.ptr = s,
+ };
+
+ if (epoll_ctl(s->event->epoll_fd,
+ s->io.registered ? EPOLL_CTL_MOD : EPOLL_CTL_ADD,
+ s->io.fd, &ev) < 0)
+ return -errno;
+
+ s->io.registered = true;
+
+ return 0;
+}
+
+static void source_child_pidfd_unregister(sd_event_source *s) {
+ assert(s);
+ assert(s->type == SOURCE_CHILD);
+
+ if (event_origin_changed(s->event))
+ return;
+
+ if (!s->child.registered)
+ return;
+
+ if (EVENT_SOURCE_WATCH_PIDFD(s))
+ if (epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, s->child.pidfd, NULL) < 0)
+ log_debug_errno(errno, "Failed to remove source %s (type %s) from epoll, ignoring: %m",
+ strna(s->description), event_source_type_to_string(s->type));
+
+ s->child.registered = false;
+}
+
+static int source_child_pidfd_register(sd_event_source *s, int enabled) {
+ assert(s);
+ assert(s->type == SOURCE_CHILD);
+ assert(enabled != SD_EVENT_OFF);
+
+ if (EVENT_SOURCE_WATCH_PIDFD(s)) {
+ struct epoll_event ev = {
+ .events = EPOLLIN | (enabled == SD_EVENT_ONESHOT ? EPOLLONESHOT : 0),
+ .data.ptr = s,
+ };
+
+ if (epoll_ctl(s->event->epoll_fd,
+ s->child.registered ? EPOLL_CTL_MOD : EPOLL_CTL_ADD,
+ s->child.pidfd, &ev) < 0)
+ return -errno;
+ }
+
+ s->child.registered = true;
+ return 0;
+}
+
+static void source_memory_pressure_unregister(sd_event_source *s) {
+ assert(s);
+ assert(s->type == SOURCE_MEMORY_PRESSURE);
+
+ if (event_origin_changed(s->event))
+ return;
+
+ if (!s->memory_pressure.registered)
+ return;
+
+ if (epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, s->memory_pressure.fd, NULL) < 0)
+ log_debug_errno(errno, "Failed to remove source %s (type %s) from epoll, ignoring: %m",
+ strna(s->description), event_source_type_to_string(s->type));
+
+ s->memory_pressure.registered = false;
+}
+
+static int source_memory_pressure_register(sd_event_source *s, int enabled) {
+ assert(s);
+ assert(s->type == SOURCE_MEMORY_PRESSURE);
+ assert(enabled != SD_EVENT_OFF);
+
+ struct epoll_event ev = {
+ .events = s->memory_pressure.write_buffer_size > 0 ? EPOLLOUT :
+ (s->memory_pressure.events | (enabled == SD_EVENT_ONESHOT ? EPOLLONESHOT : 0)),
+ .data.ptr = s,
+ };
+
+ if (epoll_ctl(s->event->epoll_fd,
+ s->memory_pressure.registered ? EPOLL_CTL_MOD : EPOLL_CTL_ADD,
+ s->memory_pressure.fd, &ev) < 0)
+ return -errno;
+
+ s->memory_pressure.registered = true;
+ return 0;
+}
+
+static void source_memory_pressure_add_to_write_list(sd_event_source *s) {
+ assert(s);
+ assert(s->type == SOURCE_MEMORY_PRESSURE);
+
+ if (s->memory_pressure.in_write_list)
+ return;
+
+ LIST_PREPEND(memory_pressure.write_list, s->event->memory_pressure_write_list, s);
+ s->memory_pressure.in_write_list = true;
+}
+
+static void source_memory_pressure_remove_from_write_list(sd_event_source *s) {
+ assert(s);
+ assert(s->type == SOURCE_MEMORY_PRESSURE);
+
+ if (!s->memory_pressure.in_write_list)
+ return;
+
+ LIST_REMOVE(memory_pressure.write_list, s->event->memory_pressure_write_list, s);
+ s->memory_pressure.in_write_list = false;
+}
+
+static clockid_t event_source_type_to_clock(EventSourceType t) {
+
+ switch (t) {
+
+ case SOURCE_TIME_REALTIME:
+ return CLOCK_REALTIME;
+
+ case SOURCE_TIME_BOOTTIME:
+ return CLOCK_BOOTTIME;
+
+ case SOURCE_TIME_MONOTONIC:
+ return CLOCK_MONOTONIC;
+
+ case SOURCE_TIME_REALTIME_ALARM:
+ return CLOCK_REALTIME_ALARM;
+
+ case SOURCE_TIME_BOOTTIME_ALARM:
+ return CLOCK_BOOTTIME_ALARM;
+
+ default:
+ return (clockid_t) -1;
+ }
+}
+
+static EventSourceType clock_to_event_source_type(clockid_t clock) {
+
+ switch (clock) {
+
+ case CLOCK_REALTIME:
+ return SOURCE_TIME_REALTIME;
+
+ case CLOCK_BOOTTIME:
+ return SOURCE_TIME_BOOTTIME;
+
+ case CLOCK_MONOTONIC:
+ return SOURCE_TIME_MONOTONIC;
+
+ case CLOCK_REALTIME_ALARM:
+ return SOURCE_TIME_REALTIME_ALARM;
+
+ case CLOCK_BOOTTIME_ALARM:
+ return SOURCE_TIME_BOOTTIME_ALARM;
+
+ default:
+ return _SOURCE_EVENT_SOURCE_TYPE_INVALID;
+ }
+}
+
+static struct clock_data* event_get_clock_data(sd_event *e, EventSourceType t) {
+ assert(e);
+
+ switch (t) {
+
+ case SOURCE_TIME_REALTIME:
+ return &e->realtime;
+
+ case SOURCE_TIME_BOOTTIME:
+ return &e->boottime;
+
+ case SOURCE_TIME_MONOTONIC:
+ return &e->monotonic;
+
+ case SOURCE_TIME_REALTIME_ALARM:
+ return &e->realtime_alarm;
+
+ case SOURCE_TIME_BOOTTIME_ALARM:
+ return &e->boottime_alarm;
+
+ default:
+ return NULL;
+ }
+}
+
+static void event_free_signal_data(sd_event *e, struct signal_data *d) {
+ assert(e);
+
+ if (!d)
+ return;
+
+ hashmap_remove(e->signal_data, &d->priority);
+ safe_close(d->fd);
+ free(d);
+}
+
+static int event_make_signal_data(
+ sd_event *e,
+ int sig,
+ struct signal_data **ret) {
+
+ struct signal_data *d;
+ bool added = false;
+ sigset_t ss_copy;
+ int64_t priority;
+ int r;
+
+ assert(e);
+
+ if (event_origin_changed(e))
+ return -ECHILD;
+
+ if (e->signal_sources && e->signal_sources[sig])
+ priority = e->signal_sources[sig]->priority;
+ else
+ priority = SD_EVENT_PRIORITY_NORMAL;
+
+ d = hashmap_get(e->signal_data, &priority);
+ if (d) {
+ if (sigismember(&d->sigset, sig) > 0) {
+ if (ret)
+ *ret = d;
+ return 0;
+ }
+ } else {
+ d = new(struct signal_data, 1);
+ if (!d)
+ return -ENOMEM;
+
+ *d = (struct signal_data) {
+ .wakeup = WAKEUP_SIGNAL_DATA,
+ .fd = -EBADF,
+ .priority = priority,
+ };
+
+ r = hashmap_ensure_put(&e->signal_data, &uint64_hash_ops, &d->priority, d);
+ if (r < 0) {
+ free(d);
+ return r;
+ }
+
+ added = true;
+ }
+
+ ss_copy = d->sigset;
+ assert_se(sigaddset(&ss_copy, sig) >= 0);
+
+ r = signalfd(d->fd >= 0 ? d->fd : -1, /* the first arg must be -1 or a valid signalfd */
+ &ss_copy,
+ SFD_NONBLOCK|SFD_CLOEXEC);
+ if (r < 0) {
+ r = -errno;
+ goto fail;
+ }
+
+ d->sigset = ss_copy;
+
+ if (d->fd >= 0) {
+ if (ret)
+ *ret = d;
+ return 0;
+ }
+
+ d->fd = fd_move_above_stdio(r);
+
+ struct epoll_event ev = {
+ .events = EPOLLIN,
+ .data.ptr = d,
+ };
+
+ if (epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, d->fd, &ev) < 0) {
+ r = -errno;
+ goto fail;
+ }
+
+ if (ret)
+ *ret = d;
+
+ return 0;
+
+fail:
+ if (added)
+ event_free_signal_data(e, d);
+
+ return r;
+}
+
+static void event_unmask_signal_data(sd_event *e, struct signal_data *d, int sig) {
+ assert(e);
+ assert(d);
+
+ /* Turns off the specified signal in the signal data
+ * object. If the signal mask of the object becomes empty that
+ * way removes it. */
+
+ if (sigismember(&d->sigset, sig) == 0)
+ return;
+
+ assert_se(sigdelset(&d->sigset, sig) >= 0);
+
+ if (sigisemptyset(&d->sigset)) {
+ /* If all the mask is all-zero we can get rid of the structure */
+ event_free_signal_data(e, d);
+ return;
+ }
+
+ if (event_origin_changed(e))
+ return;
+
+ assert(d->fd >= 0);
+
+ if (signalfd(d->fd, &d->sigset, SFD_NONBLOCK|SFD_CLOEXEC) < 0)
+ log_debug_errno(errno, "Failed to unset signal bit, ignoring: %m");
+}
+
+static void event_gc_signal_data(sd_event *e, const int64_t *priority, int sig) {
+ struct signal_data *d;
+ static const int64_t zero_priority = 0;
+
+ assert(e);
+
+ /* Rechecks if the specified signal is still something we are interested in. If not, we'll unmask it,
+ * and possibly drop the signalfd for it. */
+
+ if (sig == SIGCHLD &&
+ e->n_online_child_sources > 0)
+ return;
+
+ if (e->signal_sources &&
+ e->signal_sources[sig] &&
+ event_source_is_online(e->signal_sources[sig]))
+ return;
+
+ /*
+ * The specified signal might be enabled in three different queues:
+ *
+ * 1) the one that belongs to the priority passed (if it is non-NULL)
+ * 2) the one that belongs to the priority of the event source of the signal (if there is one)
+ * 3) the 0 priority (to cover the SIGCHLD case)
+ *
+ * Hence, let's remove it from all three here.
+ */
+
+ if (priority) {
+ d = hashmap_get(e->signal_data, priority);
+ if (d)
+ event_unmask_signal_data(e, d, sig);
+ }
+
+ if (e->signal_sources && e->signal_sources[sig]) {
+ d = hashmap_get(e->signal_data, &e->signal_sources[sig]->priority);
+ if (d)
+ event_unmask_signal_data(e, d, sig);
+ }
+
+ d = hashmap_get(e->signal_data, &zero_priority);
+ if (d)
+ event_unmask_signal_data(e, d, sig);
+}
+
+static void event_source_pp_prioq_reshuffle(sd_event_source *s) {
+ assert(s);
+
+ /* Reshuffles the pending + prepare prioqs. Called whenever the dispatch order changes, i.e. when
+ * they are enabled/disabled or marked pending and such. */
+
+ if (s->pending)
+ prioq_reshuffle(s->event->pending, s, &s->pending_index);
+
+ if (s->prepare)
+ prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
+}
+
+static void event_source_time_prioq_reshuffle(sd_event_source *s) {
+ struct clock_data *d;
+
+ assert(s);
+
+ /* Called whenever the event source's timer ordering properties changed, i.e. time, accuracy,
+ * pending, enable state, and ratelimiting state. Makes sure the two prioq's are ordered
+ * properly again. */
+
+ if (s->ratelimited)
+ d = &s->event->monotonic;
+ else if (EVENT_SOURCE_IS_TIME(s->type))
+ assert_se(d = event_get_clock_data(s->event, s->type));
+ else
+ return; /* no-op for an event source which is neither a timer nor ratelimited. */
+
+ prioq_reshuffle(d->earliest, s, &s->earliest_index);
+ prioq_reshuffle(d->latest, s, &s->latest_index);
+ d->needs_rearm = true;
+}
+
+static void event_source_time_prioq_remove(
+ sd_event_source *s,
+ struct clock_data *d) {
+
+ assert(s);
+ assert(d);
+
+ prioq_remove(d->earliest, s, &s->earliest_index);
+ prioq_remove(d->latest, s, &s->latest_index);
+ s->earliest_index = s->latest_index = PRIOQ_IDX_NULL;
+ d->needs_rearm = true;
+}
+
+static void source_disconnect(sd_event_source *s) {
+ sd_event *event;
+ int r;
+
+ assert(s);
+
+ if (!s->event)
+ return;
+
+ assert(s->event->n_sources > 0);
+
+ switch (s->type) {
+
+ case SOURCE_IO:
+ if (s->io.fd >= 0)
+ source_io_unregister(s);
+
+ break;
+
+ case SOURCE_TIME_REALTIME:
+ case SOURCE_TIME_BOOTTIME:
+ case SOURCE_TIME_MONOTONIC:
+ case SOURCE_TIME_REALTIME_ALARM:
+ case SOURCE_TIME_BOOTTIME_ALARM:
+ /* Only remove this event source from the time event source here if it is not ratelimited. If
+ * it is ratelimited, we'll remove it below, separately. Why? Because the clock used might
+ * differ: ratelimiting always uses CLOCK_MONOTONIC, but timer events might use any clock */
+
+ if (!s->ratelimited) {
+ struct clock_data *d;
+ assert_se(d = event_get_clock_data(s->event, s->type));
+ event_source_time_prioq_remove(s, d);
+ }
+
+ break;
+
+ case SOURCE_SIGNAL:
+ if (s->signal.sig > 0) {
+
+ if (s->event->signal_sources)
+ s->event->signal_sources[s->signal.sig] = NULL;
+
+ event_gc_signal_data(s->event, &s->priority, s->signal.sig);
+
+ if (s->signal.unblock) {
+ sigset_t new_ss;
+
+ if (sigemptyset(&new_ss) < 0)
+ log_debug_errno(errno, "Failed to reset signal set, ignoring: %m");
+ else if (sigaddset(&new_ss, s->signal.sig) < 0)
+ log_debug_errno(errno, "Failed to add signal %i to signal mask, ignoring: %m", s->signal.sig);
+ else {
+ r = pthread_sigmask(SIG_UNBLOCK, &new_ss, NULL);
+ if (r != 0)
+ log_debug_errno(r, "Failed to unblock signal %i, ignoring: %m", s->signal.sig);
+ }
+ }
+ }
+
+ break;
+
+ case SOURCE_CHILD:
+ if (event_origin_changed(s->event))
+ s->child.process_owned = false;
+
+ if (s->child.pid > 0) {
+ if (event_source_is_online(s)) {
+ assert(s->event->n_online_child_sources > 0);
+ s->event->n_online_child_sources--;
+ }
+
+ (void) hashmap_remove(s->event->child_sources, PID_TO_PTR(s->child.pid));
+ }
+
+ if (EVENT_SOURCE_WATCH_PIDFD(s))
+ source_child_pidfd_unregister(s);
+ else
+ event_gc_signal_data(s->event, &s->priority, SIGCHLD);
+
+ break;
+
+ case SOURCE_DEFER:
+ /* nothing */
+ break;
+
+ case SOURCE_POST:
+ set_remove(s->event->post_sources, s);
+ break;
+
+ case SOURCE_EXIT:
+ prioq_remove(s->event->exit, s, &s->exit.prioq_index);
+ break;
+
+ case SOURCE_INOTIFY: {
+ struct inode_data *inode_data;
+
+ inode_data = s->inotify.inode_data;
+ if (inode_data) {
+ struct inotify_data *inotify_data;
+ assert_se(inotify_data = inode_data->inotify_data);
+
+ /* Detach this event source from the inode object */
+ LIST_REMOVE(inotify.by_inode_data, inode_data->event_sources, s);
+ s->inotify.inode_data = NULL;
+
+ if (s->pending) {
+ assert(inotify_data->n_pending > 0);
+ inotify_data->n_pending--;
+ }
+
+ /* Note that we don't reduce the inotify mask for the watch descriptor here if the inode is
+ * continued to being watched. That's because inotify doesn't really have an API for that: we
+ * can only change watch masks with access to the original inode either by fd or by path. But
+ * paths aren't stable, and keeping an O_PATH fd open all the time would mean wasting an fd
+ * continuously and keeping the mount busy which we can't really do. We could reconstruct the
+ * original inode from /proc/self/fdinfo/$INOTIFY_FD (as all watch descriptors are listed
+ * there), but given the need for open_by_handle_at() which is privileged and not universally
+ * available this would be quite an incomplete solution. Hence we go the other way, leave the
+ * mask set, even if it is not minimized now, and ignore all events we aren't interested in
+ * anymore after reception. Yes, this sucks, but … Linux … */
+
+ /* Maybe release the inode data (and its inotify) */
+ event_gc_inode_data(s->event, inode_data);
+ }
+
+ break;
+ }
+
+ case SOURCE_MEMORY_PRESSURE:
+ source_memory_pressure_remove_from_write_list(s);
+ source_memory_pressure_unregister(s);
+ break;
+
+ default:
+ assert_not_reached();
+ }
+
+ if (s->pending)
+ prioq_remove(s->event->pending, s, &s->pending_index);
+
+ if (s->prepare)
+ prioq_remove(s->event->prepare, s, &s->prepare_index);
+
+ if (s->ratelimited)
+ event_source_time_prioq_remove(s, &s->event->monotonic);
+
+ event = TAKE_PTR(s->event);
+ LIST_REMOVE(sources, event->sources, s);
+ event->n_sources--;
+
+ /* Note that we don't invalidate the type here, since we still need it in order to close the fd or
+ * pidfd associated with this event source, which we'll do only on source_free(). */
+
+ if (!s->floating)
+ sd_event_unref(event);
+}
+
+static sd_event_source* source_free(sd_event_source *s) {
+ assert(s);
+
+ source_disconnect(s);
+
+ if (s->type == SOURCE_IO && s->io.owned)
+ s->io.fd = safe_close(s->io.fd);
+
+ if (s->type == SOURCE_CHILD) {
+ /* Eventually the kernel will do this automatically for us, but for now let's emulate this (unreliably) in userspace. */
+
+ if (s->child.process_owned) {
+
+ if (!s->child.exited) {
+ bool sent = false;
+
+ if (s->child.pidfd >= 0) {
+ if (pidfd_send_signal(s->child.pidfd, SIGKILL, NULL, 0) < 0) {
+ if (errno == ESRCH) /* Already dead */
+ sent = true;
+ else if (!ERRNO_IS_NOT_SUPPORTED(errno))
+ log_debug_errno(errno, "Failed to kill process " PID_FMT " via pidfd_send_signal(), re-trying via kill(): %m",
+ s->child.pid);
+ } else
+ sent = true;
+ }
+
+ if (!sent)
+ if (kill(s->child.pid, SIGKILL) < 0)
+ if (errno != ESRCH) /* Already dead */
+ log_debug_errno(errno, "Failed to kill process " PID_FMT " via kill(), ignoring: %m",
+ s->child.pid);
+ }
+
+ if (!s->child.waited) {
+ siginfo_t si = {};
+
+ /* Reap the child if we can */
+ (void) waitid(P_PID, s->child.pid, &si, WEXITED);
+ }
+ }
+
+ if (s->child.pidfd_owned)
+ s->child.pidfd = safe_close(s->child.pidfd);
+ }
+
+ if (s->type == SOURCE_MEMORY_PRESSURE) {
+ s->memory_pressure.fd = safe_close(s->memory_pressure.fd);
+ s->memory_pressure.write_buffer = mfree(s->memory_pressure.write_buffer);
+ }
+
+ if (s->destroy_callback)
+ s->destroy_callback(s->userdata);
+
+ free(s->description);
+ return mfree(s);
+}
+DEFINE_TRIVIAL_CLEANUP_FUNC(sd_event_source*, source_free);
+
+static int source_set_pending(sd_event_source *s, bool b) {
+ int r;
+
+ assert(s);
+ assert(s->type != SOURCE_EXIT);
+
+ if (s->pending == b)
+ return 0;
+
+ s->pending = b;
+
+ if (b) {
+ s->pending_iteration = s->event->iteration;
+
+ r = prioq_put(s->event->pending, s, &s->pending_index);
+ if (r < 0) {
+ s->pending = false;
+ return r;
+ }
+ } else
+ assert_se(prioq_remove(s->event->pending, s, &s->pending_index));
+
+ if (EVENT_SOURCE_IS_TIME(s->type))
+ event_source_time_prioq_reshuffle(s);
+
+ if (s->type == SOURCE_SIGNAL && !b) {
+ struct signal_data *d;
+
+ d = hashmap_get(s->event->signal_data, &s->priority);
+ if (d && d->current == s)
+ d->current = NULL;
+ }
+
+ if (s->type == SOURCE_INOTIFY) {
+
+ assert(s->inotify.inode_data);
+ assert(s->inotify.inode_data->inotify_data);
+
+ if (b)
+ s->inotify.inode_data->inotify_data->n_pending ++;
+ else {
+ assert(s->inotify.inode_data->inotify_data->n_pending > 0);
+ s->inotify.inode_data->inotify_data->n_pending --;
+ }
+ }
+
+ return 1;
+}
+
+static sd_event_source *source_new(sd_event *e, bool floating, EventSourceType type) {
+
+ /* Let's allocate exactly what we need. Note that the difference of the smallest event source
+ * structure to the largest is 144 bytes on x86-64 at the time of writing, i.e. more than two cache
+ * lines. */
+ static const size_t size_table[_SOURCE_EVENT_SOURCE_TYPE_MAX] = {
+ [SOURCE_IO] = endoffsetof_field(sd_event_source, io),
+ [SOURCE_TIME_REALTIME] = endoffsetof_field(sd_event_source, time),
+ [SOURCE_TIME_BOOTTIME] = endoffsetof_field(sd_event_source, time),
+ [SOURCE_TIME_MONOTONIC] = endoffsetof_field(sd_event_source, time),
+ [SOURCE_TIME_REALTIME_ALARM] = endoffsetof_field(sd_event_source, time),
+ [SOURCE_TIME_BOOTTIME_ALARM] = endoffsetof_field(sd_event_source, time),
+ [SOURCE_SIGNAL] = endoffsetof_field(sd_event_source, signal),
+ [SOURCE_CHILD] = endoffsetof_field(sd_event_source, child),
+ [SOURCE_DEFER] = endoffsetof_field(sd_event_source, defer),
+ [SOURCE_POST] = endoffsetof_field(sd_event_source, post),
+ [SOURCE_EXIT] = endoffsetof_field(sd_event_source, exit),
+ [SOURCE_INOTIFY] = endoffsetof_field(sd_event_source, inotify),
+ [SOURCE_MEMORY_PRESSURE] = endoffsetof_field(sd_event_source, memory_pressure),
+ };
+
+ sd_event_source *s;
+
+ assert(e);
+ assert(type >= 0);
+ assert(type < _SOURCE_EVENT_SOURCE_TYPE_MAX);
+ assert(size_table[type] > 0);
+
+ s = malloc0(size_table[type]);
+ if (!s)
+ return NULL;
+ /* We use expand_to_usable() here to tell gcc that it should consider this an object of the full
+ * size, even if we only allocate the initial part we need. */
+ s = expand_to_usable(s, sizeof(sd_event_source));
+
+ /* Note: we cannot use compound initialization here, because sizeof(sd_event_source) is likely larger
+ * than what we allocated here. */
+ s->n_ref = 1;
+ s->event = e;
+ s->floating = floating;
+ s->type = type;
+ s->pending_index = PRIOQ_IDX_NULL;
+ s->prepare_index = PRIOQ_IDX_NULL;
+
+ if (!floating)
+ sd_event_ref(e);
+
+ LIST_PREPEND(sources, e->sources, s);
+ e->n_sources++;
+
+ return s;
+}
+
+static int io_exit_callback(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
+ assert(s);
+
+ return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
+}
+
+_public_ int sd_event_add_io(
+ sd_event *e,
+ sd_event_source **ret,
+ int fd,
+ uint32_t events,
+ sd_event_io_handler_t callback,
+ void *userdata) {
+
+ _cleanup_(source_freep) sd_event_source *s = NULL;
+ int r;
+
+ assert_return(e, -EINVAL);
+ assert_return(e = event_resolve(e), -ENOPKG);
+ assert_return(fd >= 0, -EBADF);
+ assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
+ assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
+ assert_return(!event_origin_changed(e), -ECHILD);
+
+ if (!callback)
+ callback = io_exit_callback;
+
+ s = source_new(e, !ret, SOURCE_IO);
+ if (!s)
+ return -ENOMEM;
+
+ s->wakeup = WAKEUP_EVENT_SOURCE;
+ s->io.fd = fd;
+ s->io.events = events;
+ s->io.callback = callback;
+ s->userdata = userdata;
+ s->enabled = SD_EVENT_ON;
+
+ r = source_io_register(s, s->enabled, events);
+ if (r < 0)
+ return r;
+
+ if (ret)
+ *ret = s;
+ TAKE_PTR(s);
+
+ return 0;
+}
+
+static void initialize_perturb(sd_event *e) {
+ sd_id128_t id = {};
+
+ /* When we sleep for longer, we try to realign the wakeup to the same time within each
+ * minute/second/250ms, so that events all across the system can be coalesced into a single CPU
+ * wakeup. However, let's take some system-specific randomness for this value, so that in a network
+ * of systems with synced clocks timer events are distributed a bit. Here, we calculate a
+ * perturbation usec offset from the boot ID (or machine ID if failed, e.g. /proc is not mounted). */
+
+ if (_likely_(e->perturb != USEC_INFINITY))
+ return;
+
+ if (sd_id128_get_boot(&id) >= 0 || sd_id128_get_machine(&id) >= 0)
+ e->perturb = (id.qwords[0] ^ id.qwords[1]) % USEC_PER_MINUTE;
+ else
+ e->perturb = 0; /* This is a super early process without /proc and /etc ?? */
+}
+
+static int event_setup_timer_fd(
+ sd_event *e,
+ struct clock_data *d,
+ clockid_t clock) {
+
+ assert(e);
+ assert(d);
+
+ if (_likely_(d->fd >= 0))
+ return 0;
+
+ _cleanup_close_ int fd = -EBADF;
+
+ fd = timerfd_create(clock, TFD_NONBLOCK|TFD_CLOEXEC);
+ if (fd < 0)
+ return -errno;
+
+ fd = fd_move_above_stdio(fd);
+
+ struct epoll_event ev = {
+ .events = EPOLLIN,
+ .data.ptr = d,
+ };
+
+ if (epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, fd, &ev) < 0)
+ return -errno;
+
+ d->fd = TAKE_FD(fd);
+ return 0;
+}
+
+static int time_exit_callback(sd_event_source *s, uint64_t usec, void *userdata) {
+ assert(s);
+
+ return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
+}
+
+static int setup_clock_data(sd_event *e, struct clock_data *d, clockid_t clock) {
+ int r;
+
+ assert(d);
+
+ if (d->fd < 0) {
+ r = event_setup_timer_fd(e, d, clock);
+ if (r < 0)
+ return r;
+ }
+
+ r = prioq_ensure_allocated(&d->earliest, earliest_time_prioq_compare);
+ if (r < 0)
+ return r;
+
+ r = prioq_ensure_allocated(&d->latest, latest_time_prioq_compare);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static int event_source_time_prioq_put(
+ sd_event_source *s,
+ struct clock_data *d) {
+
+ int r;
+
+ assert(s);
+ assert(d);
+ assert(EVENT_SOURCE_USES_TIME_PRIOQ(s->type));
+
+ r = prioq_put(d->earliest, s, &s->earliest_index);
+ if (r < 0)
+ return r;
+
+ r = prioq_put(d->latest, s, &s->latest_index);
+ if (r < 0) {
+ assert_se(prioq_remove(d->earliest, s, &s->earliest_index) > 0);
+ s->earliest_index = PRIOQ_IDX_NULL;
+ return r;
+ }
+
+ d->needs_rearm = true;
+ return 0;
+}
+
+_public_ int sd_event_add_time(
+ sd_event *e,
+ sd_event_source **ret,
+ clockid_t clock,
+ uint64_t usec,
+ uint64_t accuracy,
+ sd_event_time_handler_t callback,
+ void *userdata) {
+
+ EventSourceType type;
+ _cleanup_(source_freep) sd_event_source *s = NULL;
+ struct clock_data *d;
+ int r;
+
+ assert_return(e, -EINVAL);
+ assert_return(e = event_resolve(e), -ENOPKG);
+ assert_return(accuracy != UINT64_MAX, -EINVAL);
+ assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
+ assert_return(!event_origin_changed(e), -ECHILD);
+
+ if (!clock_supported(clock)) /* Checks whether the kernel supports the clock */
+ return -EOPNOTSUPP;
+
+ type = clock_to_event_source_type(clock); /* checks whether sd-event supports this clock */
+ if (type < 0)
+ return -EOPNOTSUPP;
+
+ if (!callback)
+ callback = time_exit_callback;
+
+ assert_se(d = event_get_clock_data(e, type));
+
+ r = setup_clock_data(e, d, clock);
+ if (r < 0)
+ return r;
+
+ s = source_new(e, !ret, type);
+ if (!s)
+ return -ENOMEM;
+
+ s->time.next = usec;
+ s->time.accuracy = accuracy == 0 ? DEFAULT_ACCURACY_USEC : accuracy;
+ s->time.callback = callback;
+ s->earliest_index = s->latest_index = PRIOQ_IDX_NULL;
+ s->userdata = userdata;
+ s->enabled = SD_EVENT_ONESHOT;
+
+ r = event_source_time_prioq_put(s, d);
+ if (r < 0)
+ return r;
+
+ if (ret)
+ *ret = s;
+ TAKE_PTR(s);
+
+ return 0;
+}
+
+_public_ int sd_event_add_time_relative(
+ sd_event *e,
+ sd_event_source **ret,
+ clockid_t clock,
+ uint64_t usec,
+ uint64_t accuracy,
+ sd_event_time_handler_t callback,
+ void *userdata) {
+
+ usec_t t;
+ int r;
+
+ /* Same as sd_event_add_time() but operates relative to the event loop's current point in time, and
+ * checks for overflow. */
+
+ r = sd_event_now(e, clock, &t);
+ if (r < 0)
+ return r;
+
+ if (usec >= USEC_INFINITY - t)
+ return -EOVERFLOW;
+
+ return sd_event_add_time(e, ret, clock, t + usec, accuracy, callback, userdata);
+}
+
+static int signal_exit_callback(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) {
+ assert(s);
+
+ return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
+}
+
+_public_ int sd_event_add_signal(
+ sd_event *e,
+ sd_event_source **ret,
+ int sig,
+ sd_event_signal_handler_t callback,
+ void *userdata) {
+
+ _cleanup_(source_freep) sd_event_source *s = NULL;
+ struct signal_data *d;
+ sigset_t new_ss;
+ bool block_it;
+ int r;
+
+ assert_return(e, -EINVAL);
+ assert_return(e = event_resolve(e), -ENOPKG);
+ assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
+ assert_return(!event_origin_changed(e), -ECHILD);
+
+ /* Let's make sure our special flag stays outside of the valid signal range */
+ assert_cc(_NSIG < SD_EVENT_SIGNAL_PROCMASK);
+
+ if (sig & SD_EVENT_SIGNAL_PROCMASK) {
+ sig &= ~SD_EVENT_SIGNAL_PROCMASK;
+ assert_return(SIGNAL_VALID(sig), -EINVAL);
+
+ block_it = true;
+ } else {
+ assert_return(SIGNAL_VALID(sig), -EINVAL);
+
+ r = signal_is_blocked(sig);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -EBUSY;
+
+ block_it = false;
+ }
+
+ if (!callback)
+ callback = signal_exit_callback;
+
+ if (!e->signal_sources) {
+ e->signal_sources = new0(sd_event_source*, _NSIG);
+ if (!e->signal_sources)
+ return -ENOMEM;
+ } else if (e->signal_sources[sig])
+ return -EBUSY;
+
+ s = source_new(e, !ret, SOURCE_SIGNAL);
+ if (!s)
+ return -ENOMEM;
+
+ s->signal.sig = sig;
+ s->signal.callback = callback;
+ s->userdata = userdata;
+ s->enabled = SD_EVENT_ON;
+
+ e->signal_sources[sig] = s;
+
+ if (block_it) {
+ sigset_t old_ss;
+
+ if (sigemptyset(&new_ss) < 0)
+ return -errno;
+
+ if (sigaddset(&new_ss, sig) < 0)
+ return -errno;
+
+ r = pthread_sigmask(SIG_BLOCK, &new_ss, &old_ss);
+ if (r != 0)
+ return -r;
+
+ r = sigismember(&old_ss, sig);
+ if (r < 0)
+ return -errno;
+
+ s->signal.unblock = !r;
+ } else
+ s->signal.unblock = false;
+
+ r = event_make_signal_data(e, sig, &d);
+ if (r < 0) {
+ if (s->signal.unblock)
+ (void) pthread_sigmask(SIG_UNBLOCK, &new_ss, NULL);
+
+ return r;
+ }
+
+ /* Use the signal name as description for the event source by default */
+ (void) sd_event_source_set_description(s, signal_to_string(sig));
+
+ if (ret)
+ *ret = s;
+ TAKE_PTR(s);
+
+ return 0;
+}
+
+static int child_exit_callback(sd_event_source *s, const siginfo_t *si, void *userdata) {
+ assert(s);
+
+ return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
+}
+
+static bool shall_use_pidfd(void) {
+ /* Mostly relevant for debugging, i.e. this is used in test-event.c to test the event loop once with and once without pidfd */
+ return getenv_bool_secure("SYSTEMD_PIDFD") != 0;
+}
+
+_public_ int sd_event_add_child(
+ sd_event *e,
+ sd_event_source **ret,
+ pid_t pid,
+ int options,
+ sd_event_child_handler_t callback,
+ void *userdata) {
+
+ _cleanup_(source_freep) sd_event_source *s = NULL;
+ int r;
+
+ assert_return(e, -EINVAL);
+ assert_return(e = event_resolve(e), -ENOPKG);
+ assert_return(pid > 1, -EINVAL);
+ assert_return(!(options & ~(WEXITED|WSTOPPED|WCONTINUED)), -EINVAL);
+ assert_return(options != 0, -EINVAL);
+ assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
+ assert_return(!event_origin_changed(e), -ECHILD);
+
+ if (!callback)
+ callback = child_exit_callback;
+
+ if (e->n_online_child_sources == 0) {
+ /* Caller must block SIGCHLD before using us to watch children, even if pidfd is available,
+ * for compatibility with pre-pidfd and because we don't want the reap the child processes
+ * ourselves, i.e. call waitid(), and don't want Linux' default internal logic for that to
+ * take effect.
+ *
+ * (As an optimization we only do this check on the first child event source created.) */
+ r = signal_is_blocked(SIGCHLD);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -EBUSY;
+ }
+
+ r = hashmap_ensure_allocated(&e->child_sources, NULL);
+ if (r < 0)
+ return r;
+
+ if (hashmap_contains(e->child_sources, PID_TO_PTR(pid)))
+ return -EBUSY;
+
+ s = source_new(e, !ret, SOURCE_CHILD);
+ if (!s)
+ return -ENOMEM;
+
+ s->wakeup = WAKEUP_EVENT_SOURCE;
+ s->child.options = options;
+ s->child.callback = callback;
+ s->userdata = userdata;
+ s->enabled = SD_EVENT_ONESHOT;
+
+ /* We always take a pidfd here if we can, even if we wait for anything else than WEXITED, so that we
+ * pin the PID, and make regular waitid() handling race-free. */
+
+ if (shall_use_pidfd()) {
+ s->child.pidfd = pidfd_open(pid, 0);
+ if (s->child.pidfd < 0) {
+ /* Propagate errors unless the syscall is not supported or blocked */
+ if (!ERRNO_IS_NOT_SUPPORTED(errno) && !ERRNO_IS_PRIVILEGE(errno))
+ return -errno;
+ } else
+ s->child.pidfd_owned = true; /* If we allocate the pidfd we own it by default */
+ } else
+ s->child.pidfd = -EBADF;
+
+ if (EVENT_SOURCE_WATCH_PIDFD(s)) {
+ /* We have a pidfd and we only want to watch for exit */
+ r = source_child_pidfd_register(s, s->enabled);
+ if (r < 0)
+ return r;
+
+ } else {
+ /* We have no pidfd or we shall wait for some other event than WEXITED */
+ r = event_make_signal_data(e, SIGCHLD, NULL);
+ if (r < 0)
+ return r;
+
+ e->need_process_child = true;
+ }
+
+ r = hashmap_put(e->child_sources, PID_TO_PTR(pid), s);
+ if (r < 0)
+ return r;
+
+ /* These must be done after everything succeeds. */
+ s->child.pid = pid;
+ e->n_online_child_sources++;
+
+ if (ret)
+ *ret = s;
+ TAKE_PTR(s);
+ return 0;
+}
+
+_public_ int sd_event_add_child_pidfd(
+ sd_event *e,
+ sd_event_source **ret,
+ int pidfd,
+ int options,
+ sd_event_child_handler_t callback,
+ void *userdata) {
+
+
+ _cleanup_(source_freep) sd_event_source *s = NULL;
+ pid_t pid;
+ int r;
+
+ assert_return(e, -EINVAL);
+ assert_return(e = event_resolve(e), -ENOPKG);
+ assert_return(pidfd >= 0, -EBADF);
+ assert_return(!(options & ~(WEXITED|WSTOPPED|WCONTINUED)), -EINVAL);
+ assert_return(options != 0, -EINVAL);
+ assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
+ assert_return(!event_origin_changed(e), -ECHILD);
+
+ if (!callback)
+ callback = child_exit_callback;
+
+ if (e->n_online_child_sources == 0) {
+ r = signal_is_blocked(SIGCHLD);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -EBUSY;
+ }
+
+ r = hashmap_ensure_allocated(&e->child_sources, NULL);
+ if (r < 0)
+ return r;
+
+ r = pidfd_get_pid(pidfd, &pid);
+ if (r < 0)
+ return r;
+
+ if (hashmap_contains(e->child_sources, PID_TO_PTR(pid)))
+ return -EBUSY;
+
+ s = source_new(e, !ret, SOURCE_CHILD);
+ if (!s)
+ return -ENOMEM;
+
+ s->wakeup = WAKEUP_EVENT_SOURCE;
+ s->child.pidfd = pidfd;
+ s->child.pid = pid;
+ s->child.options = options;
+ s->child.callback = callback;
+ s->child.pidfd_owned = false; /* If we got the pidfd passed in we don't own it by default (similar to the IO fd case) */
+ s->userdata = userdata;
+ s->enabled = SD_EVENT_ONESHOT;
+
+ r = hashmap_put(e->child_sources, PID_TO_PTR(pid), s);
+ if (r < 0)
+ return r;
+
+ if (EVENT_SOURCE_WATCH_PIDFD(s)) {
+ /* We only want to watch for WEXITED */
+ r = source_child_pidfd_register(s, s->enabled);
+ if (r < 0)
+ return r;
+ } else {
+ /* We shall wait for some other event than WEXITED */
+ r = event_make_signal_data(e, SIGCHLD, NULL);
+ if (r < 0)
+ return r;
+
+ e->need_process_child = true;
+ }
+
+ e->n_online_child_sources++;
+
+ if (ret)
+ *ret = s;
+ TAKE_PTR(s);
+ return 0;
+}
+
+static int generic_exit_callback(sd_event_source *s, void *userdata) {
+ assert(s);
+
+ return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
+}
+
+_public_ int sd_event_add_defer(
+ sd_event *e,
+ sd_event_source **ret,
+ sd_event_handler_t callback,
+ void *userdata) {
+
+ _cleanup_(source_freep) sd_event_source *s = NULL;
+ int r;
+
+ assert_return(e, -EINVAL);
+ assert_return(e = event_resolve(e), -ENOPKG);
+ assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
+ assert_return(!event_origin_changed(e), -ECHILD);
+
+ if (!callback)
+ callback = generic_exit_callback;
+
+ s = source_new(e, !ret, SOURCE_DEFER);
+ if (!s)
+ return -ENOMEM;
+
+ s->defer.callback = callback;
+ s->userdata = userdata;
+ s->enabled = SD_EVENT_ONESHOT;
+
+ r = source_set_pending(s, true);
+ if (r < 0)
+ return r;
+
+ if (ret)
+ *ret = s;
+ TAKE_PTR(s);
+
+ return 0;
+}
+
+_public_ int sd_event_add_post(
+ sd_event *e,
+ sd_event_source **ret,
+ sd_event_handler_t callback,
+ void *userdata) {
+
+ _cleanup_(source_freep) sd_event_source *s = NULL;
+ int r;
+
+ assert_return(e, -EINVAL);
+ assert_return(e = event_resolve(e), -ENOPKG);
+ assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
+ assert_return(!event_origin_changed(e), -ECHILD);
+
+ if (!callback)
+ callback = generic_exit_callback;
+
+ s = source_new(e, !ret, SOURCE_POST);
+ if (!s)
+ return -ENOMEM;
+
+ s->post.callback = callback;
+ s->userdata = userdata;
+ s->enabled = SD_EVENT_ON;
+
+ r = set_ensure_put(&e->post_sources, NULL, s);
+ if (r < 0)
+ return r;
+ assert(r > 0);
+
+ if (ret)
+ *ret = s;
+ TAKE_PTR(s);
+
+ return 0;
+}
+
+_public_ int sd_event_add_exit(
+ sd_event *e,
+ sd_event_source **ret,
+ sd_event_handler_t callback,
+ void *userdata) {
+
+ _cleanup_(source_freep) sd_event_source *s = NULL;
+ int r;
+
+ assert_return(e, -EINVAL);
+ assert_return(e = event_resolve(e), -ENOPKG);
+ assert_return(callback, -EINVAL);
+ assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
+ assert_return(!event_origin_changed(e), -ECHILD);
+
+ r = prioq_ensure_allocated(&e->exit, exit_prioq_compare);
+ if (r < 0)
+ return r;
+
+ s = source_new(e, !ret, SOURCE_EXIT);
+ if (!s)
+ return -ENOMEM;
+
+ s->exit.callback = callback;
+ s->userdata = userdata;
+ s->exit.prioq_index = PRIOQ_IDX_NULL;
+ s->enabled = SD_EVENT_ONESHOT;
+
+ r = prioq_put(s->event->exit, s, &s->exit.prioq_index);
+ if (r < 0)
+ return r;
+
+ if (ret)
+ *ret = s;
+ TAKE_PTR(s);
+
+ return 0;
+}
+
+_public_ int sd_event_trim_memory(void) {
+ int r;
+
+ /* A default implementation of a memory pressure callback. Simply releases our own allocation caches
+ * and glibc's. This is automatically used when people call sd_event_add_memory_pressure() with a
+ * NULL callback parameter. */
+
+ log_debug("Memory pressure event, trimming malloc() memory.");
+
+#if HAVE_GENERIC_MALLINFO
+ generic_mallinfo before_mallinfo = generic_mallinfo_get();
+#endif
+
+ usec_t before_timestamp = now(CLOCK_MONOTONIC);
+ hashmap_trim_pools();
+ r = malloc_trim(0);
+ usec_t after_timestamp = now(CLOCK_MONOTONIC);
+
+ if (r > 0)
+ log_debug("Successfully trimmed some memory.");
+ else
+ log_debug("Couldn't trim any memory.");
+
+ usec_t period = after_timestamp - before_timestamp;
+
+#if HAVE_GENERIC_MALLINFO
+ generic_mallinfo after_mallinfo = generic_mallinfo_get();
+ size_t l = LESS_BY((size_t) before_mallinfo.hblkhd, (size_t) after_mallinfo.hblkhd) +
+ LESS_BY((size_t) before_mallinfo.arena, (size_t) after_mallinfo.arena);
+ log_struct(LOG_DEBUG,
+ LOG_MESSAGE("Memory trimming took %s, returned %s to OS.",
+ FORMAT_TIMESPAN(period, 0),
+ FORMAT_BYTES(l)),
+ "MESSAGE_ID=" SD_MESSAGE_MEMORY_TRIM_STR,
+ "TRIMMED_BYTES=%zu", l,
+ "TRIMMED_USEC=" USEC_FMT, period);
+#else
+ log_struct(LOG_DEBUG,
+ LOG_MESSAGE("Memory trimming took %s.",
+ FORMAT_TIMESPAN(period, 0)),
+ "MESSAGE_ID=" SD_MESSAGE_MEMORY_TRIM_STR,
+ "TRIMMED_USEC=" USEC_FMT, period);
+#endif
+
+ return 0;
+}
+
+static int memory_pressure_callback(sd_event_source *s, void *userdata) {
+ assert(s);
+
+ sd_event_trim_memory();
+ return 0;
+}
+
+_public_ int sd_event_add_memory_pressure(
+ sd_event *e,
+ sd_event_source **ret,
+ sd_event_handler_t callback,
+ void *userdata) {
+
+ _cleanup_free_ char *w = NULL;
+ _cleanup_(source_freep) sd_event_source *s = NULL;
+ _cleanup_close_ int path_fd = -EBADF, fd = -EBADF;
+ _cleanup_free_ void *write_buffer = NULL;
+ const char *watch, *watch_fallback = NULL, *env;
+ size_t write_buffer_size = 0;
+ struct stat st;
+ uint32_t events;
+ bool locked;
+ int r;
+
+ assert_return(e, -EINVAL);
+ assert_return(e = event_resolve(e), -ENOPKG);
+ assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
+ assert_return(!event_origin_changed(e), -ECHILD);
+
+ if (!callback)
+ callback = memory_pressure_callback;
+
+ s = source_new(e, !ret, SOURCE_MEMORY_PRESSURE);
+ if (!s)
+ return -ENOMEM;
+
+ s->wakeup = WAKEUP_EVENT_SOURCE;
+ s->memory_pressure.callback = callback;
+ s->userdata = userdata;
+ s->enabled = SD_EVENT_ON;
+ s->memory_pressure.fd = -EBADF;
+
+ env = secure_getenv("MEMORY_PRESSURE_WATCH");
+ if (env) {
+ if (isempty(env) || path_equal(env, "/dev/null"))
+ return log_debug_errno(SYNTHETIC_ERRNO(EHOSTDOWN),
+ "Memory pressure logic is explicitly disabled via $MEMORY_PRESSURE_WATCH.");
+
+ if (!path_is_absolute(env) || !path_is_normalized(env))
+ return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
+ "$MEMORY_PRESSURE_WATCH set to invalid path: %s", env);
+
+ watch = env;
+
+ env = secure_getenv("MEMORY_PRESSURE_WRITE");
+ if (env) {
+ r = unbase64mem(env, SIZE_MAX, &write_buffer, &write_buffer_size);
+ if (r < 0)
+ return r;
+ }
+
+ locked = true;
+ } else {
+
+ r = is_pressure_supported();
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -EOPNOTSUPP;
+
+ /* By default we want to watch memory pressure on the local cgroup, but we'll fall back on
+ * the system wide pressure if for some reason we cannot (which could be: memory controller
+ * not delegated to us, or PSI simply not available in the kernel). On legacy cgroupv1 we'll
+ * only use the system-wide logic. */
+ r = cg_all_unified();
+ if (r < 0)
+ return r;
+ if (r == 0)
+ watch = "/proc/pressure/memory";
+ else {
+ _cleanup_free_ char *cg = NULL;
+
+ r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, 0, &cg);
+ if (r < 0)
+ return r;
+
+ w = path_join("/sys/fs/cgroup", cg, "memory.pressure");
+ if (!w)
+ return -ENOMEM;
+
+ watch = w;
+ watch_fallback = "/proc/pressure/memory";
+ }
+
+ /* Android uses three levels in its userspace low memory killer logic:
+ * some 70000 1000000
+ * some 100000 1000000
+ * full 70000 1000000
+ *
+ * GNOME's low memory monitor uses:
+ * some 70000 1000000
+ * some 100000 1000000
+ * full 100000 1000000
+ *
+ * We'll default to the middle level that both agree on. Except we do it on a 2s window
+ * (i.e. 200ms per 2s, rather than 100ms per 1s), because that's the window duration the
+ * kernel will allow us to do unprivileged, also in the future. */
+ if (asprintf((char**) &write_buffer,
+ "%s " USEC_FMT " " USEC_FMT,
+ MEMORY_PRESSURE_DEFAULT_TYPE,
+ MEMORY_PRESSURE_DEFAULT_THRESHOLD_USEC,
+ MEMORY_PRESSURE_DEFAULT_WINDOW_USEC) < 0)
+ return -ENOMEM;
+
+ write_buffer_size = strlen(write_buffer) + 1;
+ locked = false;
+ }
+
+ path_fd = open(watch, O_PATH|O_CLOEXEC);
+ if (path_fd < 0) {
+ if (errno != ENOENT)
+ return -errno;
+
+ /* We got ENOENT. Three options now: try the fallback if we have one, or return the error as
+ * is (if based on user/env config), or return -EOPNOTSUPP (because we picked the path, and
+ * the PSI service apparently is not supported) */
+ if (!watch_fallback)
+ return locked ? -ENOENT : -EOPNOTSUPP;
+
+ path_fd = open(watch_fallback, O_PATH|O_CLOEXEC);
+ if (path_fd < 0) {
+ if (errno == ENOENT) /* PSI is not available in the kernel even under the fallback path? */
+ return -EOPNOTSUPP;
+ return -errno;
+ }
+ }
+
+ if (fstat(path_fd, &st) < 0)
+ return -errno;
+
+ if (S_ISSOCK(st.st_mode)) {
+ fd = socket(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
+ if (fd < 0)
+ return -errno;
+
+ r = connect_unix_path(fd, path_fd, NULL);
+ if (r < 0)
+ return r;
+
+ events = EPOLLIN;
+
+ } else if (S_ISREG(st.st_mode) || S_ISFIFO(st.st_mode) || S_ISCHR(st.st_mode)) {
+ fd = fd_reopen(path_fd, (write_buffer_size > 0 ? O_RDWR : O_RDONLY) |O_CLOEXEC|O_NONBLOCK|O_NOCTTY);
+ if (fd < 0)
+ return fd;
+
+ if (S_ISREG(st.st_mode)) {
+ struct statfs sfs;
+
+ /* If this is a regular file validate this is a procfs or cgroupfs file, where we look for EPOLLPRI */
+
+ if (fstatfs(fd, &sfs) < 0)
+ return -errno;
+
+ if (!is_fs_type(&sfs, PROC_SUPER_MAGIC) &&
+ !is_fs_type(&sfs, CGROUP2_SUPER_MAGIC))
+ return -ENOTTY;
+
+ events = EPOLLPRI;
+ } else
+ /* For fifos and char devices just watch for EPOLLIN */
+ events = EPOLLIN;
+
+ } else if (S_ISDIR(st.st_mode))
+ return -EISDIR;
+ else
+ return -EBADF;
+
+ s->memory_pressure.fd = TAKE_FD(fd);
+ s->memory_pressure.write_buffer = TAKE_PTR(write_buffer);
+ s->memory_pressure.write_buffer_size = write_buffer_size;
+ s->memory_pressure.events = events;
+ s->memory_pressure.locked = locked;
+
+ /* So here's the thing: if we are talking to PSI we need to write the watch string before adding the
+ * fd to epoll (if we ignore this, then the watch won't work). Hence we'll not actually register the
+ * fd with the epoll right-away. Instead, we just add the event source to a list of memory pressure
+ * event sources on which writes must be executed before the first event loop iteration is
+ * executed. (We could also write the data here, right away, but we want to give the caller the
+ * freedom to call sd_event_source_set_memory_pressure_type() and
+ * sd_event_source_set_memory_pressure_rate() before we write it. */
+
+ if (s->memory_pressure.write_buffer_size > 0)
+ source_memory_pressure_add_to_write_list(s);
+ else {
+ r = source_memory_pressure_register(s, s->enabled);
+ if (r < 0)
+ return r;
+ }
+
+ if (ret)
+ *ret = s;
+ TAKE_PTR(s);
+
+ return 0;
+}
+
+static void event_free_inotify_data(sd_event *e, struct inotify_data *d) {
+ assert(e);
+
+ if (!d)
+ return;
+
+ assert(hashmap_isempty(d->inodes));
+ assert(hashmap_isempty(d->wd));
+
+ if (d->buffer_filled > 0)
+ LIST_REMOVE(buffered, e->buffered_inotify_data_list, d);
+
+ hashmap_free(d->inodes);
+ hashmap_free(d->wd);
+
+ assert_se(hashmap_remove(e->inotify_data, &d->priority) == d);
+
+ if (d->fd >= 0) {
+ if (!event_origin_changed(e) &&
+ epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, d->fd, NULL) < 0)
+ log_debug_errno(errno, "Failed to remove inotify fd from epoll, ignoring: %m");
+
+ safe_close(d->fd);
+ }
+ free(d);
+}
+
+static int event_make_inotify_data(
+ sd_event *e,
+ int64_t priority,
+ struct inotify_data **ret) {
+
+ _cleanup_close_ int fd = -EBADF;
+ struct inotify_data *d;
+ int r;
+
+ assert(e);
+
+ d = hashmap_get(e->inotify_data, &priority);
+ if (d) {
+ if (ret)
+ *ret = d;
+ return 0;
+ }
+
+ fd = inotify_init1(IN_NONBLOCK|O_CLOEXEC);
+ if (fd < 0)
+ return -errno;
+
+ fd = fd_move_above_stdio(fd);
+
+ d = new(struct inotify_data, 1);
+ if (!d)
+ return -ENOMEM;
+
+ *d = (struct inotify_data) {
+ .wakeup = WAKEUP_INOTIFY_DATA,
+ .fd = TAKE_FD(fd),
+ .priority = priority,
+ };
+
+ r = hashmap_ensure_put(&e->inotify_data, &uint64_hash_ops, &d->priority, d);
+ if (r < 0) {
+ d->fd = safe_close(d->fd);
+ free(d);
+ return r;
+ }
+
+ struct epoll_event ev = {
+ .events = EPOLLIN,
+ .data.ptr = d,
+ };
+
+ if (epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, d->fd, &ev) < 0) {
+ r = -errno;
+ d->fd = safe_close(d->fd); /* let's close this ourselves, as event_free_inotify_data() would otherwise
+ * remove the fd from the epoll first, which we don't want as we couldn't
+ * add it in the first place. */
+ event_free_inotify_data(e, d);
+ return r;
+ }
+
+ if (ret)
+ *ret = d;
+
+ return 1;
+}
+
+static int inode_data_compare(const struct inode_data *x, const struct inode_data *y) {
+ int r;
+
+ assert(x);
+ assert(y);
+
+ r = CMP(x->dev, y->dev);
+ if (r != 0)
+ return r;
+
+ return CMP(x->ino, y->ino);
+}
+
+static void inode_data_hash_func(const struct inode_data *d, struct siphash *state) {
+ assert(d);
+
+ siphash24_compress(&d->dev, sizeof(d->dev), state);
+ siphash24_compress(&d->ino, sizeof(d->ino), state);
+}
+
+DEFINE_PRIVATE_HASH_OPS(inode_data_hash_ops, struct inode_data, inode_data_hash_func, inode_data_compare);
+
+static void event_free_inode_data(
+ sd_event *e,
+ struct inode_data *d) {
+
+ assert(e);
+
+ if (!d)
+ return;
+
+ assert(!d->event_sources);
+
+ if (d->fd >= 0) {
+ LIST_REMOVE(to_close, e->inode_data_to_close_list, d);
+ safe_close(d->fd);
+ }
+
+ if (d->inotify_data) {
+
+ if (d->wd >= 0) {
+ if (d->inotify_data->fd >= 0 && !event_origin_changed(e)) {
+ /* So here's a problem. At the time this runs the watch descriptor might already be
+ * invalidated, because an IN_IGNORED event might be queued right the moment we enter
+ * the syscall. Hence, whenever we get EINVAL, ignore it entirely, since it's a very
+ * likely case to happen. */
+
+ if (inotify_rm_watch(d->inotify_data->fd, d->wd) < 0 && errno != EINVAL)
+ log_debug_errno(errno, "Failed to remove watch descriptor %i from inotify, ignoring: %m", d->wd);
+ }
+
+ assert_se(hashmap_remove(d->inotify_data->wd, INT_TO_PTR(d->wd)) == d);
+ }
+
+ assert_se(hashmap_remove(d->inotify_data->inodes, d) == d);
+ }
+
+ free(d);
+}
+
+static void event_gc_inotify_data(
+ sd_event *e,
+ struct inotify_data *d) {
+
+ assert(e);
+
+ /* GCs the inotify data object if we don't need it anymore. That's the case if we don't want to watch
+ * any inode with it anymore, which in turn happens if no event source of this priority is interested
+ * in any inode any longer. That said, we maintain an extra busy counter: if non-zero we'll delay GC
+ * (under the expectation that the GC is called again once the counter is decremented). */
+
+ if (!d)
+ return;
+
+ if (!hashmap_isempty(d->inodes))
+ return;
+
+ if (d->n_busy > 0)
+ return;
+
+ event_free_inotify_data(e, d);
+}
+
+static void event_gc_inode_data(
+ sd_event *e,
+ struct inode_data *d) {
+
+ struct inotify_data *inotify_data;
+
+ assert(e);
+
+ if (!d)
+ return;
+
+ if (d->event_sources)
+ return;
+
+ inotify_data = d->inotify_data;
+ event_free_inode_data(e, d);
+
+ event_gc_inotify_data(e, inotify_data);
+}
+
+static int event_make_inode_data(
+ sd_event *e,
+ struct inotify_data *inotify_data,
+ dev_t dev,
+ ino_t ino,
+ struct inode_data **ret) {
+
+ struct inode_data *d, key;
+ int r;
+
+ assert(e);
+ assert(inotify_data);
+
+ key = (struct inode_data) {
+ .ino = ino,
+ .dev = dev,
+ };
+
+ d = hashmap_get(inotify_data->inodes, &key);
+ if (d) {
+ if (ret)
+ *ret = d;
+
+ return 0;
+ }
+
+ r = hashmap_ensure_allocated(&inotify_data->inodes, &inode_data_hash_ops);
+ if (r < 0)
+ return r;
+
+ d = new(struct inode_data, 1);
+ if (!d)
+ return -ENOMEM;
+
+ *d = (struct inode_data) {
+ .dev = dev,
+ .ino = ino,
+ .wd = -1,
+ .fd = -EBADF,
+ .inotify_data = inotify_data,
+ };
+
+ r = hashmap_put(inotify_data->inodes, d, d);
+ if (r < 0) {
+ free(d);
+ return r;
+ }
+
+ if (ret)
+ *ret = d;
+
+ return 1;
+}
+
+static uint32_t inode_data_determine_mask(struct inode_data *d) {
+ bool excl_unlink = true;
+ uint32_t combined = 0;
+
+ assert(d);
+
+ /* Combines the watch masks of all event sources watching this inode. We generally just OR them together, but
+ * the IN_EXCL_UNLINK flag is ANDed instead.
+ *
+ * Note that we add all sources to the mask here, regardless whether enabled, disabled or oneshot. That's
+ * because we cannot change the mask anymore after the event source was created once, since the kernel has no
+ * API for that. Hence we need to subscribe to the maximum mask we ever might be interested in, and suppress
+ * events we don't care for client-side. */
+
+ LIST_FOREACH(inotify.by_inode_data, s, d->event_sources) {
+
+ if ((s->inotify.mask & IN_EXCL_UNLINK) == 0)
+ excl_unlink = false;
+
+ combined |= s->inotify.mask;
+ }
+
+ return (combined & ~(IN_ONESHOT|IN_DONT_FOLLOW|IN_ONLYDIR|IN_EXCL_UNLINK)) | (excl_unlink ? IN_EXCL_UNLINK : 0);
+}
+
+static int inode_data_realize_watch(sd_event *e, struct inode_data *d) {
+ uint32_t combined_mask;
+ int wd, r;
+
+ assert(d);
+ assert(d->fd >= 0);
+
+ combined_mask = inode_data_determine_mask(d);
+
+ if (d->wd >= 0 && combined_mask == d->combined_mask)
+ return 0;
+
+ r = hashmap_ensure_allocated(&d->inotify_data->wd, NULL);
+ if (r < 0)
+ return r;
+
+ wd = inotify_add_watch_fd(d->inotify_data->fd, d->fd, combined_mask);
+ if (wd < 0)
+ return -errno;
+
+ if (d->wd < 0) {
+ r = hashmap_put(d->inotify_data->wd, INT_TO_PTR(wd), d);
+ if (r < 0) {
+ (void) inotify_rm_watch(d->inotify_data->fd, wd);
+ return r;
+ }
+
+ d->wd = wd;
+
+ } else if (d->wd != wd) {
+
+ log_debug("Weird, the watch descriptor we already knew for this inode changed?");
+ (void) inotify_rm_watch(d->fd, wd);
+ return -EINVAL;
+ }
+
+ d->combined_mask = combined_mask;
+ return 1;
+}
+
+static int inotify_exit_callback(sd_event_source *s, const struct inotify_event *event, void *userdata) {
+ assert(s);
+
+ return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
+}
+
+static int event_add_inotify_fd_internal(
+ sd_event *e,
+ sd_event_source **ret,
+ int fd,
+ bool donate,
+ uint32_t mask,
+ sd_event_inotify_handler_t callback,
+ void *userdata) {
+
+ _cleanup_close_ int donated_fd = donate ? fd : -EBADF;
+ _cleanup_(source_freep) sd_event_source *s = NULL;
+ struct inotify_data *inotify_data = NULL;
+ struct inode_data *inode_data = NULL;
+ struct stat st;
+ int r;
+
+ assert_return(e, -EINVAL);
+ assert_return(e = event_resolve(e), -ENOPKG);
+ assert_return(fd >= 0, -EBADF);
+ assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
+ assert_return(!event_origin_changed(e), -ECHILD);
+
+ if (!callback)
+ callback = inotify_exit_callback;
+
+ /* Refuse IN_MASK_ADD since we coalesce watches on the same inode, and hence really don't want to merge
+ * masks. Or in other words, this whole code exists only to manage IN_MASK_ADD type operations for you, hence
+ * the user can't use them for us. */
+ if (mask & IN_MASK_ADD)
+ return -EINVAL;
+
+ if (fstat(fd, &st) < 0)
+ return -errno;
+
+ s = source_new(e, !ret, SOURCE_INOTIFY);
+ if (!s)
+ return -ENOMEM;
+
+ s->enabled = mask & IN_ONESHOT ? SD_EVENT_ONESHOT : SD_EVENT_ON;
+ s->inotify.mask = mask;
+ s->inotify.callback = callback;
+ s->userdata = userdata;
+
+ /* Allocate an inotify object for this priority, and an inode object within it */
+ r = event_make_inotify_data(e, SD_EVENT_PRIORITY_NORMAL, &inotify_data);
+ if (r < 0)
+ return r;
+
+ r = event_make_inode_data(e, inotify_data, st.st_dev, st.st_ino, &inode_data);
+ if (r < 0) {
+ event_gc_inotify_data(e, inotify_data);
+ return r;
+ }
+
+ /* Keep the O_PATH fd around until the first iteration of the loop, so that we can still change the priority of
+ * the event source, until then, for which we need the original inode. */
+ if (inode_data->fd < 0) {
+ if (donated_fd >= 0)
+ inode_data->fd = TAKE_FD(donated_fd);
+ else {
+ inode_data->fd = fcntl(fd, F_DUPFD_CLOEXEC, 3);
+ if (inode_data->fd < 0) {
+ r = -errno;
+ event_gc_inode_data(e, inode_data);
+ return r;
+ }
+ }
+
+ LIST_PREPEND(to_close, e->inode_data_to_close_list, inode_data);
+ }
+
+ /* Link our event source to the inode data object */
+ LIST_PREPEND(inotify.by_inode_data, inode_data->event_sources, s);
+ s->inotify.inode_data = inode_data;
+
+ /* Actually realize the watch now */
+ r = inode_data_realize_watch(e, inode_data);
+ if (r < 0)
+ return r;
+
+ if (ret)
+ *ret = s;
+ TAKE_PTR(s);
+
+ return 0;
+}
+
+_public_ int sd_event_add_inotify_fd(
+ sd_event *e,
+ sd_event_source **ret,
+ int fd,
+ uint32_t mask,
+ sd_event_inotify_handler_t callback,
+ void *userdata) {
+
+ return event_add_inotify_fd_internal(e, ret, fd, /* donate= */ false, mask, callback, userdata);
+}
+
+_public_ int sd_event_add_inotify(
+ sd_event *e,
+ sd_event_source **ret,
+ const char *path,
+ uint32_t mask,
+ sd_event_inotify_handler_t callback,
+ void *userdata) {
+
+ sd_event_source *s = NULL; /* avoid false maybe-uninitialized warning */
+ int fd, r;
+
+ assert_return(path, -EINVAL);
+
+ fd = open(path, O_PATH | O_CLOEXEC |
+ (mask & IN_ONLYDIR ? O_DIRECTORY : 0) |
+ (mask & IN_DONT_FOLLOW ? O_NOFOLLOW : 0));
+ if (fd < 0)
+ return -errno;
+
+ r = event_add_inotify_fd_internal(e, &s, fd, /* donate= */ true, mask, callback, userdata);
+ if (r < 0)
+ return r;
+
+ (void) sd_event_source_set_description(s, path);
+
+ if (ret)
+ *ret = s;
+
+ return r;
+}
+
+static sd_event_source* event_source_free(sd_event_source *s) {
+ if (!s)
+ return NULL;
+
+ /* Here's a special hack: when we are called from a
+ * dispatch handler we won't free the event source
+ * immediately, but we will detach the fd from the
+ * epoll. This way it is safe for the caller to unref
+ * the event source and immediately close the fd, but
+ * we still retain a valid event source object after
+ * the callback. */
+
+ if (s->dispatching)
+ source_disconnect(s);
+ else
+ source_free(s);
+
+ return NULL;
+}
+
+DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(sd_event_source, sd_event_source, event_source_free);
+
+_public_ int sd_event_source_set_description(sd_event_source *s, const char *description) {
+ assert_return(s, -EINVAL);
+ assert_return(!event_origin_changed(s->event), -ECHILD);
+
+ return free_and_strdup(&s->description, description);
+}
+
+_public_ int sd_event_source_get_description(sd_event_source *s, const char **description) {
+ assert_return(s, -EINVAL);
+ assert_return(description, -EINVAL);
+
+ if (!s->description)
+ return -ENXIO;
+
+ *description = s->description;
+ return 0;
+}
+
+_public_ sd_event *sd_event_source_get_event(sd_event_source *s) {
+ assert_return(s, NULL);
+ assert_return(!event_origin_changed(s->event), NULL);
+
+ return s->event;
+}
+
+_public_ int sd_event_source_get_pending(sd_event_source *s) {
+ assert_return(s, -EINVAL);
+ assert_return(s->type != SOURCE_EXIT, -EDOM);
+ assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
+ assert_return(!event_origin_changed(s->event), -ECHILD);
+
+ return s->pending;
+}
+
+_public_ int sd_event_source_get_io_fd(sd_event_source *s) {
+ assert_return(s, -EINVAL);
+ assert_return(s->type == SOURCE_IO, -EDOM);
+ assert_return(!event_origin_changed(s->event), -ECHILD);
+
+ return s->io.fd;
+}
+
+_public_ int sd_event_source_set_io_fd(sd_event_source *s, int fd) {
+ int r;
+
+ assert_return(s, -EINVAL);
+ assert_return(fd >= 0, -EBADF);
+ assert_return(s->type == SOURCE_IO, -EDOM);
+ assert_return(!event_origin_changed(s->event), -ECHILD);
+
+ if (s->io.fd == fd)
+ return 0;
+
+ if (event_source_is_offline(s)) {
+ s->io.fd = fd;
+ s->io.registered = false;
+ } else {
+ int saved_fd;
+
+ saved_fd = s->io.fd;
+ assert(s->io.registered);
+
+ s->io.fd = fd;
+ s->io.registered = false;
+
+ r = source_io_register(s, s->enabled, s->io.events);
+ if (r < 0) {
+ s->io.fd = saved_fd;
+ s->io.registered = true;
+ return r;
+ }
+
+ (void) epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, saved_fd, NULL);
+ }
+
+ return 0;
+}
+
+_public_ int sd_event_source_get_io_fd_own(sd_event_source *s) {
+ assert_return(s, -EINVAL);
+ assert_return(s->type == SOURCE_IO, -EDOM);
+ assert_return(!event_origin_changed(s->event), -ECHILD);
+
+ return s->io.owned;
+}
+
+_public_ int sd_event_source_set_io_fd_own(sd_event_source *s, int own) {
+ assert_return(s, -EINVAL);
+ assert_return(s->type == SOURCE_IO, -EDOM);
+ assert_return(!event_origin_changed(s->event), -ECHILD);
+
+ s->io.owned = own;
+ return 0;
+}
+
+_public_ int sd_event_source_get_io_events(sd_event_source *s, uint32_t* events) {
+ assert_return(s, -EINVAL);
+ assert_return(events, -EINVAL);
+ assert_return(s->type == SOURCE_IO, -EDOM);
+ assert_return(!event_origin_changed(s->event), -ECHILD);
+
+ *events = s->io.events;
+ return 0;
+}
+
+_public_ int sd_event_source_set_io_events(sd_event_source *s, uint32_t events) {
+ int r;
+
+ assert_return(s, -EINVAL);
+ assert_return(s->type == SOURCE_IO, -EDOM);
+ assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
+ assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
+ assert_return(!event_origin_changed(s->event), -ECHILD);
+
+ /* edge-triggered updates are never skipped, so we can reset edges */
+ if (s->io.events == events && !(events & EPOLLET))
+ return 0;
+
+ r = source_set_pending(s, false);
+ if (r < 0)
+ return r;
+
+ if (event_source_is_online(s)) {
+ r = source_io_register(s, s->enabled, events);
+ if (r < 0)
+ return r;
+ }
+
+ s->io.events = events;
+
+ return 0;
+}
+
+_public_ int sd_event_source_get_io_revents(sd_event_source *s, uint32_t* revents) {
+ assert_return(s, -EINVAL);
+ assert_return(revents, -EINVAL);
+ assert_return(s->type == SOURCE_IO, -EDOM);
+ assert_return(s->pending, -ENODATA);
+ assert_return(!event_origin_changed(s->event), -ECHILD);
+
+ *revents = s->io.revents;
+ return 0;
+}
+
+_public_ int sd_event_source_get_signal(sd_event_source *s) {
+ assert_return(s, -EINVAL);
+ assert_return(s->type == SOURCE_SIGNAL, -EDOM);
+ assert_return(!event_origin_changed(s->event), -ECHILD);
+
+ return s->signal.sig;
+}
+
+_public_ int sd_event_source_get_priority(sd_event_source *s, int64_t *priority) {
+ assert_return(s, -EINVAL);
+ assert_return(!event_origin_changed(s->event), -ECHILD);
+
+ *priority = s->priority;
+ return 0;
+}
+
+_public_ int sd_event_source_set_priority(sd_event_source *s, int64_t priority) {
+ bool rm_inotify = false, rm_inode = false;
+ struct inotify_data *new_inotify_data = NULL;
+ struct inode_data *new_inode_data = NULL;
+ int r;
+
+ assert_return(s, -EINVAL);
+ assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
+ assert_return(!event_origin_changed(s->event), -ECHILD);
+
+ if (s->priority == priority)
+ return 0;
+
+ if (s->type == SOURCE_INOTIFY) {
+ struct inode_data *old_inode_data;
+
+ assert(s->inotify.inode_data);
+ old_inode_data = s->inotify.inode_data;
+
+ /* We need the original fd to change the priority. If we don't have it we can't change the priority,
+ * anymore. Note that we close any fds when entering the next event loop iteration, i.e. for inotify
+ * events we allow priority changes only until the first following iteration. */
+ if (old_inode_data->fd < 0)
+ return -EOPNOTSUPP;
+
+ r = event_make_inotify_data(s->event, priority, &new_inotify_data);
+ if (r < 0)
+ return r;
+ rm_inotify = r > 0;
+
+ r = event_make_inode_data(s->event, new_inotify_data, old_inode_data->dev, old_inode_data->ino, &new_inode_data);
+ if (r < 0)
+ goto fail;
+ rm_inode = r > 0;
+
+ if (new_inode_data->fd < 0) {
+ /* Duplicate the fd for the new inode object if we don't have any yet */
+ new_inode_data->fd = fcntl(old_inode_data->fd, F_DUPFD_CLOEXEC, 3);
+ if (new_inode_data->fd < 0) {
+ r = -errno;
+ goto fail;
+ }
+
+ LIST_PREPEND(to_close, s->event->inode_data_to_close_list, new_inode_data);
+ }
+
+ /* Move the event source to the new inode data structure */
+ LIST_REMOVE(inotify.by_inode_data, old_inode_data->event_sources, s);
+ LIST_PREPEND(inotify.by_inode_data, new_inode_data->event_sources, s);
+ s->inotify.inode_data = new_inode_data;
+
+ /* Now create the new watch */
+ r = inode_data_realize_watch(s->event, new_inode_data);
+ if (r < 0) {
+ /* Move it back */
+ LIST_REMOVE(inotify.by_inode_data, new_inode_data->event_sources, s);
+ LIST_PREPEND(inotify.by_inode_data, old_inode_data->event_sources, s);
+ s->inotify.inode_data = old_inode_data;
+ goto fail;
+ }
+
+ s->priority = priority;
+
+ event_gc_inode_data(s->event, old_inode_data);
+
+ } else if (s->type == SOURCE_SIGNAL && event_source_is_online(s)) {
+ struct signal_data *old, *d;
+
+ /* Move us from the signalfd belonging to the old
+ * priority to the signalfd of the new priority */
+
+ assert_se(old = hashmap_get(s->event->signal_data, &s->priority));
+
+ s->priority = priority;
+
+ r = event_make_signal_data(s->event, s->signal.sig, &d);
+ if (r < 0) {
+ s->priority = old->priority;
+ return r;
+ }
+
+ event_unmask_signal_data(s->event, old, s->signal.sig);
+ } else
+ s->priority = priority;
+
+ event_source_pp_prioq_reshuffle(s);
+
+ if (s->type == SOURCE_EXIT)
+ prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
+
+ return 0;
+
+fail:
+ if (rm_inode)
+ event_free_inode_data(s->event, new_inode_data);
+
+ if (rm_inotify)
+ event_free_inotify_data(s->event, new_inotify_data);
+
+ return r;
+}
+
+_public_ int sd_event_source_get_enabled(sd_event_source *s, int *ret) {
+ /* Quick mode: the event source doesn't exist and we only want to query boolean enablement state. */
+ if (!s && !ret)
+ return false;
+
+ assert_return(s, -EINVAL);
+ assert_return(!event_origin_changed(s->event), -ECHILD);
+
+ if (ret)
+ *ret = s->enabled;
+
+ return s->enabled != SD_EVENT_OFF;
+}
+
+static int event_source_offline(
+ sd_event_source *s,
+ int enabled,
+ bool ratelimited) {
+
+ bool was_offline;
+ int r;
+
+ assert(s);
+ assert(enabled == SD_EVENT_OFF || ratelimited);
+
+ /* Unset the pending flag when this event source is disabled */
+ if (s->enabled != SD_EVENT_OFF &&
+ enabled == SD_EVENT_OFF &&
+ !IN_SET(s->type, SOURCE_DEFER, SOURCE_EXIT)) {
+ r = source_set_pending(s, false);
+ if (r < 0)
+ return r;
+ }
+
+ was_offline = event_source_is_offline(s);
+ s->enabled = enabled;
+ s->ratelimited = ratelimited;
+
+ switch (s->type) {
+
+ case SOURCE_IO:
+ source_io_unregister(s);
+ break;
+
+ case SOURCE_SIGNAL:
+ event_gc_signal_data(s->event, &s->priority, s->signal.sig);
+ break;
+
+ case SOURCE_CHILD:
+ if (!was_offline) {
+ assert(s->event->n_online_child_sources > 0);
+ s->event->n_online_child_sources--;
+ }
+
+ if (EVENT_SOURCE_WATCH_PIDFD(s))
+ source_child_pidfd_unregister(s);
+ else
+ event_gc_signal_data(s->event, &s->priority, SIGCHLD);
+ break;
+
+ case SOURCE_EXIT:
+ prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
+ break;
+
+ case SOURCE_MEMORY_PRESSURE:
+ source_memory_pressure_unregister(s);
+ break;
+
+ case SOURCE_TIME_REALTIME:
+ case SOURCE_TIME_BOOTTIME:
+ case SOURCE_TIME_MONOTONIC:
+ case SOURCE_TIME_REALTIME_ALARM:
+ case SOURCE_TIME_BOOTTIME_ALARM:
+ case SOURCE_DEFER:
+ case SOURCE_POST:
+ case SOURCE_INOTIFY:
+ break;
+
+ default:
+ assert_not_reached();
+ }
+
+ /* Always reshuffle time prioq, as the ratelimited flag may be changed. */
+ event_source_time_prioq_reshuffle(s);
+
+ return 1;
+}
+
+static int event_source_online(
+ sd_event_source *s,
+ int enabled,
+ bool ratelimited) {
+
+ bool was_online;
+ int r;
+
+ assert(s);
+ assert(enabled != SD_EVENT_OFF || !ratelimited);
+
+ /* Unset the pending flag when this event source is enabled */
+ if (s->enabled == SD_EVENT_OFF &&
+ enabled != SD_EVENT_OFF &&
+ !IN_SET(s->type, SOURCE_DEFER, SOURCE_EXIT)) {
+ r = source_set_pending(s, false);
+ if (r < 0)
+ return r;
+ }
+
+ /* Are we really ready for onlining? */
+ if (enabled == SD_EVENT_OFF || ratelimited) {
+ /* Nope, we are not ready for onlining, then just update the precise state and exit */
+ s->enabled = enabled;
+ s->ratelimited = ratelimited;
+ return 0;
+ }
+
+ was_online = event_source_is_online(s);
+
+ switch (s->type) {
+ case SOURCE_IO:
+ r = source_io_register(s, enabled, s->io.events);
+ if (r < 0)
+ return r;
+ break;
+
+ case SOURCE_SIGNAL:
+ r = event_make_signal_data(s->event, s->signal.sig, NULL);
+ if (r < 0) {
+ event_gc_signal_data(s->event, &s->priority, s->signal.sig);
+ return r;
+ }
+
+ break;
+
+ case SOURCE_CHILD:
+ if (EVENT_SOURCE_WATCH_PIDFD(s)) {
+ /* yes, we have pidfd */
+
+ r = source_child_pidfd_register(s, enabled);
+ if (r < 0)
+ return r;
+ } else {
+ /* no pidfd, or something other to watch for than WEXITED */
+
+ r = event_make_signal_data(s->event, SIGCHLD, NULL);
+ if (r < 0) {
+ event_gc_signal_data(s->event, &s->priority, SIGCHLD);
+ return r;
+ }
+ }
+
+ if (!was_online)
+ s->event->n_online_child_sources++;
+ break;
+
+ case SOURCE_MEMORY_PRESSURE:
+ r = source_memory_pressure_register(s, enabled);
+ if (r < 0)
+ return r;
+
+ break;
+
+ case SOURCE_TIME_REALTIME:
+ case SOURCE_TIME_BOOTTIME:
+ case SOURCE_TIME_MONOTONIC:
+ case SOURCE_TIME_REALTIME_ALARM:
+ case SOURCE_TIME_BOOTTIME_ALARM:
+ case SOURCE_EXIT:
+ case SOURCE_DEFER:
+ case SOURCE_POST:
+ case SOURCE_INOTIFY:
+ break;
+
+ default:
+ assert_not_reached();
+ }
+
+ s->enabled = enabled;
+ s->ratelimited = ratelimited;
+
+ /* Non-failing operations below */
+ if (s->type == SOURCE_EXIT)
+ prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
+
+ /* Always reshuffle time prioq, as the ratelimited flag may be changed. */
+ event_source_time_prioq_reshuffle(s);
+
+ return 1;
+}
+
+_public_ int sd_event_source_set_enabled(sd_event_source *s, int m) {
+ int r;
+
+ assert_return(IN_SET(m, SD_EVENT_OFF, SD_EVENT_ON, SD_EVENT_ONESHOT), -EINVAL);
+
+ /* Quick mode: if the source doesn't exist, SD_EVENT_OFF is a noop. */
+ if (m == SD_EVENT_OFF && !s)
+ return 0;
+
+ assert_return(s, -EINVAL);
+ assert_return(!event_origin_changed(s->event), -ECHILD);
+
+ /* If we are dead anyway, we are fine with turning off sources, but everything else needs to fail. */
+ if (s->event->state == SD_EVENT_FINISHED)
+ return m == SD_EVENT_OFF ? 0 : -ESTALE;
+
+ if (s->enabled == m) /* No change? */
+ return 0;
+
+ if (m == SD_EVENT_OFF)
+ r = event_source_offline(s, m, s->ratelimited);
+ else {
+ if (s->enabled != SD_EVENT_OFF) {
+ /* Switching from "on" to "oneshot" or back? If that's the case, we can take a shortcut, the
+ * event source is already enabled after all. */
+ s->enabled = m;
+ return 0;
+ }
+
+ r = event_source_online(s, m, s->ratelimited);
+ }
+ if (r < 0)
+ return r;
+
+ event_source_pp_prioq_reshuffle(s);
+ return 0;
+}
+
+_public_ int sd_event_source_get_time(sd_event_source *s, uint64_t *usec) {
+ assert_return(s, -EINVAL);
+ assert_return(usec, -EINVAL);
+ assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
+ assert_return(!event_origin_changed(s->event), -ECHILD);
+
+ *usec = s->time.next;
+ return 0;
+}
+
+_public_ int sd_event_source_set_time(sd_event_source *s, uint64_t usec) {
+ int r;
+
+ assert_return(s, -EINVAL);
+ assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
+ assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
+ assert_return(!event_origin_changed(s->event), -ECHILD);
+
+ r = source_set_pending(s, false);
+ if (r < 0)
+ return r;
+
+ s->time.next = usec;
+
+ event_source_time_prioq_reshuffle(s);
+ return 0;
+}
+
+_public_ int sd_event_source_set_time_relative(sd_event_source *s, uint64_t usec) {
+ usec_t t;
+ int r;
+
+ assert_return(s, -EINVAL);
+ assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
+ assert_return(!event_origin_changed(s->event), -ECHILD);
+
+ if (usec == USEC_INFINITY)
+ return sd_event_source_set_time(s, USEC_INFINITY);
+
+ r = sd_event_now(s->event, event_source_type_to_clock(s->type), &t);
+ if (r < 0)
+ return r;
+
+ usec = usec_add(t, usec);
+ if (usec == USEC_INFINITY)
+ return -EOVERFLOW;
+
+ return sd_event_source_set_time(s, usec);
+}
+
+_public_ int sd_event_source_get_time_accuracy(sd_event_source *s, uint64_t *usec) {
+ assert_return(s, -EINVAL);
+ assert_return(usec, -EINVAL);
+ assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
+ assert_return(!event_origin_changed(s->event), -ECHILD);
+
+ *usec = s->time.accuracy;
+ return 0;
+}
+
+_public_ int sd_event_source_set_time_accuracy(sd_event_source *s, uint64_t usec) {
+ int r;
+
+ assert_return(s, -EINVAL);
+ assert_return(usec != UINT64_MAX, -EINVAL);
+ assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
+ assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
+ assert_return(!event_origin_changed(s->event), -ECHILD);
+
+ r = source_set_pending(s, false);
+ if (r < 0)
+ return r;
+
+ if (usec == 0)
+ usec = DEFAULT_ACCURACY_USEC;
+
+ s->time.accuracy = usec;
+
+ event_source_time_prioq_reshuffle(s);
+ return 0;
+}
+
+_public_ int sd_event_source_get_time_clock(sd_event_source *s, clockid_t *clock) {
+ assert_return(s, -EINVAL);
+ assert_return(clock, -EINVAL);
+ assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
+ assert_return(!event_origin_changed(s->event), -ECHILD);
+
+ *clock = event_source_type_to_clock(s->type);
+ return 0;
+}
+
+_public_ int sd_event_source_get_child_pid(sd_event_source *s, pid_t *pid) {
+ assert_return(s, -EINVAL);
+ assert_return(pid, -EINVAL);
+ assert_return(s->type == SOURCE_CHILD, -EDOM);
+ assert_return(!event_origin_changed(s->event), -ECHILD);
+
+ *pid = s->child.pid;
+ return 0;
+}
+
+_public_ int sd_event_source_get_child_pidfd(sd_event_source *s) {
+ assert_return(s, -EINVAL);
+ assert_return(s->type == SOURCE_CHILD, -EDOM);
+ assert_return(!event_origin_changed(s->event), -ECHILD);
+
+ if (s->child.pidfd < 0)
+ return -EOPNOTSUPP;
+
+ return s->child.pidfd;
+}
+
+_public_ int sd_event_source_send_child_signal(sd_event_source *s, int sig, const siginfo_t *si, unsigned flags) {
+ assert_return(s, -EINVAL);
+ assert_return(s->type == SOURCE_CHILD, -EDOM);
+ assert_return(!event_origin_changed(s->event), -ECHILD);
+ assert_return(SIGNAL_VALID(sig), -EINVAL);
+
+ /* If we already have seen indication the process exited refuse sending a signal early. This way we
+ * can be sure we don't accidentally kill the wrong process on PID reuse when pidfds are not
+ * available. */
+ if (s->child.exited)
+ return -ESRCH;
+
+ if (s->child.pidfd >= 0) {
+ siginfo_t copy;
+
+ /* pidfd_send_signal() changes the siginfo_t argument. This is weird, let's hence copy the
+ * structure here */
+ if (si)
+ copy = *si;
+
+ if (pidfd_send_signal(s->child.pidfd, sig, si ? &copy : NULL, 0) < 0) {
+ /* Let's propagate the error only if the system call is not implemented or prohibited */
+ if (!ERRNO_IS_NOT_SUPPORTED(errno) && !ERRNO_IS_PRIVILEGE(errno))
+ return -errno;
+ } else
+ return 0;
+ }
+
+ /* Flags are only supported for pidfd_send_signal(), not for rt_sigqueueinfo(), hence let's refuse
+ * this here. */
+ if (flags != 0)
+ return -EOPNOTSUPP;
+
+ if (si) {
+ /* We use rt_sigqueueinfo() only if siginfo_t is specified. */
+ siginfo_t copy = *si;
+
+ if (rt_sigqueueinfo(s->child.pid, sig, &copy) < 0)
+ return -errno;
+ } else if (kill(s->child.pid, sig) < 0)
+ return -errno;
+
+ return 0;
+}
+
+_public_ int sd_event_source_get_child_pidfd_own(sd_event_source *s) {
+ assert_return(s, -EINVAL);
+ assert_return(s->type == SOURCE_CHILD, -EDOM);
+ assert_return(!event_origin_changed(s->event), -ECHILD);
+
+ if (s->child.pidfd < 0)
+ return -EOPNOTSUPP;
+
+ return s->child.pidfd_owned;
+}
+
+_public_ int sd_event_source_set_child_pidfd_own(sd_event_source *s, int own) {
+ assert_return(s, -EINVAL);
+ assert_return(s->type == SOURCE_CHILD, -EDOM);
+ assert_return(!event_origin_changed(s->event), -ECHILD);
+
+ if (s->child.pidfd < 0)
+ return -EOPNOTSUPP;
+
+ s->child.pidfd_owned = own;
+ return 0;
+}
+
+_public_ int sd_event_source_get_child_process_own(sd_event_source *s) {
+ assert_return(s, -EINVAL);
+ assert_return(s->type == SOURCE_CHILD, -EDOM);
+ assert_return(!event_origin_changed(s->event), -ECHILD);
+
+ return s->child.process_owned;
+}
+
+_public_ int sd_event_source_set_child_process_own(sd_event_source *s, int own) {
+ assert_return(s, -EINVAL);
+ assert_return(s->type == SOURCE_CHILD, -EDOM);
+ assert_return(!event_origin_changed(s->event), -ECHILD);
+
+ s->child.process_owned = own;
+ return 0;
+}
+
+_public_ int sd_event_source_get_inotify_mask(sd_event_source *s, uint32_t *mask) {
+ assert_return(s, -EINVAL);
+ assert_return(mask, -EINVAL);
+ assert_return(s->type == SOURCE_INOTIFY, -EDOM);
+ assert_return(!event_origin_changed(s->event), -ECHILD);
+
+ *mask = s->inotify.mask;
+ return 0;
+}
+
+_public_ int sd_event_source_set_prepare(sd_event_source *s, sd_event_handler_t callback) {
+ int r;
+
+ assert_return(s, -EINVAL);
+ assert_return(s->type != SOURCE_EXIT, -EDOM);
+ assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
+ assert_return(!event_origin_changed(s->event), -ECHILD);
+
+ if (s->prepare == callback)
+ return 0;
+
+ if (callback && s->prepare) {
+ s->prepare = callback;
+ return 0;
+ }
+
+ r = prioq_ensure_allocated(&s->event->prepare, prepare_prioq_compare);
+ if (r < 0)
+ return r;
+
+ s->prepare = callback;
+
+ if (callback) {
+ r = prioq_put(s->event->prepare, s, &s->prepare_index);
+ if (r < 0)
+ return r;
+ } else
+ prioq_remove(s->event->prepare, s, &s->prepare_index);
+
+ return 0;
+}
+
+_public_ void* sd_event_source_get_userdata(sd_event_source *s) {
+ assert_return(s, NULL);
+ assert_return(!event_origin_changed(s->event), NULL);
+
+ return s->userdata;
+}
+
+_public_ void *sd_event_source_set_userdata(sd_event_source *s, void *userdata) {
+ void *ret;
+
+ assert_return(s, NULL);
+ assert_return(!event_origin_changed(s->event), NULL);
+
+ ret = s->userdata;
+ s->userdata = userdata;
+
+ return ret;
+}
+
+static int event_source_enter_ratelimited(sd_event_source *s) {
+ int r;
+
+ assert(s);
+
+ /* When an event source becomes ratelimited, we place it in the CLOCK_MONOTONIC priority queue, with
+ * the end of the rate limit time window, much as if it was a timer event source. */
+
+ if (s->ratelimited)
+ return 0; /* Already ratelimited, this is a NOP hence */
+
+ /* Make sure we can install a CLOCK_MONOTONIC event further down. */
+ r = setup_clock_data(s->event, &s->event->monotonic, CLOCK_MONOTONIC);
+ if (r < 0)
+ return r;
+
+ /* Timer event sources are already using the earliest/latest queues for the timer scheduling. Let's
+ * first remove them from the prioq appropriate for their own clock, so that we can use the prioq
+ * fields of the event source then for adding it to the CLOCK_MONOTONIC prioq instead. */
+ if (EVENT_SOURCE_IS_TIME(s->type))
+ event_source_time_prioq_remove(s, event_get_clock_data(s->event, s->type));
+
+ /* Now, let's add the event source to the monotonic clock instead */
+ r = event_source_time_prioq_put(s, &s->event->monotonic);
+ if (r < 0)
+ goto fail;
+
+ /* And let's take the event source officially offline */
+ r = event_source_offline(s, s->enabled, /* ratelimited= */ true);
+ if (r < 0) {
+ event_source_time_prioq_remove(s, &s->event->monotonic);
+ goto fail;
+ }
+
+ event_source_pp_prioq_reshuffle(s);
+
+ log_debug("Event source %p (%s) entered rate limit state.", s, strna(s->description));
+ return 0;
+
+fail:
+ /* Reinstall time event sources in the priority queue as before. This shouldn't fail, since the queue
+ * space for it should already be allocated. */
+ if (EVENT_SOURCE_IS_TIME(s->type))
+ assert_se(event_source_time_prioq_put(s, event_get_clock_data(s->event, s->type)) >= 0);
+
+ return r;
+}
+
+static int event_source_leave_ratelimit(sd_event_source *s, bool run_callback) {
+ int r;
+
+ assert(s);
+
+ if (!s->ratelimited)
+ return 0;
+
+ /* Let's take the event source out of the monotonic prioq first. */
+ event_source_time_prioq_remove(s, &s->event->monotonic);
+
+ /* Let's then add the event source to its native clock prioq again — if this is a timer event source */
+ if (EVENT_SOURCE_IS_TIME(s->type)) {
+ r = event_source_time_prioq_put(s, event_get_clock_data(s->event, s->type));
+ if (r < 0)
+ goto fail;
+ }
+
+ /* Let's try to take it online again. */
+ r = event_source_online(s, s->enabled, /* ratelimited= */ false);
+ if (r < 0) {
+ /* Do something roughly sensible when this failed: undo the two prioq ops above */
+ if (EVENT_SOURCE_IS_TIME(s->type))
+ event_source_time_prioq_remove(s, event_get_clock_data(s->event, s->type));
+
+ goto fail;
+ }
+
+ event_source_pp_prioq_reshuffle(s);
+ ratelimit_reset(&s->rate_limit);
+
+ log_debug("Event source %p (%s) left rate limit state.", s, strna(s->description));
+
+ if (run_callback && s->ratelimit_expire_callback) {
+ s->dispatching = true;
+ r = s->ratelimit_expire_callback(s, s->userdata);
+ s->dispatching = false;
+
+ if (r < 0) {
+ log_debug_errno(r, "Ratelimit expiry callback of event source %s (type %s) returned error, %s: %m",
+ strna(s->description),
+ event_source_type_to_string(s->type),
+ s->exit_on_failure ? "exiting" : "disabling");
+
+ if (s->exit_on_failure)
+ (void) sd_event_exit(s->event, r);
+ }
+
+ if (s->n_ref == 0)
+ source_free(s);
+ else if (r < 0)
+ assert_se(sd_event_source_set_enabled(s, SD_EVENT_OFF) >= 0);
+
+ return 1;
+ }
+
+ return 0;
+
+fail:
+ /* Do something somewhat reasonable when we cannot move an event sources out of ratelimited mode:
+ * simply put it back in it, maybe we can then process it more successfully next iteration. */
+ assert_se(event_source_time_prioq_put(s, &s->event->monotonic) >= 0);
+
+ return r;
+}
+
+static usec_t sleep_between(sd_event *e, usec_t a, usec_t b) {
+ usec_t c;
+ assert(e);
+ assert(a <= b);
+
+ if (a <= 0)
+ return 0;
+ if (a >= USEC_INFINITY)
+ return USEC_INFINITY;
+
+ if (b <= a + 1)
+ return a;
+
+ initialize_perturb(e);
+
+ /*
+ Find a good time to wake up again between times a and b. We
+ have two goals here:
+
+ a) We want to wake up as seldom as possible, hence prefer
+ later times over earlier times.
+
+ b) But if we have to wake up, then let's make sure to
+ dispatch as much as possible on the entire system.
+
+ We implement this by waking up everywhere at the same time
+ within any given minute if we can, synchronised via the
+ perturbation value determined from the boot ID. If we can't,
+ then we try to find the same spot in every 10s, then 1s and
+ then 250ms step. Otherwise, we pick the last possible time
+ to wake up.
+ */
+
+ c = (b / USEC_PER_MINUTE) * USEC_PER_MINUTE + e->perturb;
+ if (c >= b) {
+ if (_unlikely_(c < USEC_PER_MINUTE))
+ return b;
+
+ c -= USEC_PER_MINUTE;
+ }
+
+ if (c >= a)
+ return c;
+
+ c = (b / (USEC_PER_SEC*10)) * (USEC_PER_SEC*10) + (e->perturb % (USEC_PER_SEC*10));
+ if (c >= b) {
+ if (_unlikely_(c < USEC_PER_SEC*10))
+ return b;
+
+ c -= USEC_PER_SEC*10;
+ }
+
+ if (c >= a)
+ return c;
+
+ c = (b / USEC_PER_SEC) * USEC_PER_SEC + (e->perturb % USEC_PER_SEC);
+ if (c >= b) {
+ if (_unlikely_(c < USEC_PER_SEC))
+ return b;
+
+ c -= USEC_PER_SEC;
+ }
+
+ if (c >= a)
+ return c;
+
+ c = (b / (USEC_PER_MSEC*250)) * (USEC_PER_MSEC*250) + (e->perturb % (USEC_PER_MSEC*250));
+ if (c >= b) {
+ if (_unlikely_(c < USEC_PER_MSEC*250))
+ return b;
+
+ c -= USEC_PER_MSEC*250;
+ }
+
+ if (c >= a)
+ return c;
+
+ return b;
+}
+
+static int event_arm_timer(
+ sd_event *e,
+ struct clock_data *d) {
+
+ struct itimerspec its = {};
+ sd_event_source *a, *b;
+ usec_t t;
+
+ assert(e);
+ assert(d);
+
+ if (!d->needs_rearm)
+ return 0;
+
+ d->needs_rearm = false;
+
+ a = prioq_peek(d->earliest);
+ assert(!a || EVENT_SOURCE_USES_TIME_PRIOQ(a->type));
+ if (!a || a->enabled == SD_EVENT_OFF || time_event_source_next(a) == USEC_INFINITY) {
+
+ if (d->fd < 0)
+ return 0;
+
+ if (d->next == USEC_INFINITY)
+ return 0;
+
+ /* disarm */
+ if (timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL) < 0)
+ return -errno;
+
+ d->next = USEC_INFINITY;
+ return 0;
+ }
+
+ b = prioq_peek(d->latest);
+ assert(!b || EVENT_SOURCE_USES_TIME_PRIOQ(b->type));
+ assert(b && b->enabled != SD_EVENT_OFF);
+
+ t = sleep_between(e, time_event_source_next(a), time_event_source_latest(b));
+ if (d->next == t)
+ return 0;
+
+ assert_se(d->fd >= 0);
+
+ if (t == 0) {
+ /* We don't want to disarm here, just mean some time looooong ago. */
+ its.it_value.tv_sec = 0;
+ its.it_value.tv_nsec = 1;
+ } else
+ timespec_store(&its.it_value, t);
+
+ if (timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL) < 0)
+ return -errno;
+
+ d->next = t;
+ return 0;
+}
+
+static int process_io(sd_event *e, sd_event_source *s, uint32_t revents) {
+ assert(e);
+ assert(s);
+ assert(s->type == SOURCE_IO);
+
+ /* If the event source was already pending, we just OR in the
+ * new revents, otherwise we reset the value. The ORing is
+ * necessary to handle EPOLLONESHOT events properly where
+ * readability might happen independently of writability, and
+ * we need to keep track of both */
+
+ if (s->pending)
+ s->io.revents |= revents;
+ else
+ s->io.revents = revents;
+
+ return source_set_pending(s, true);
+}
+
+static int flush_timer(sd_event *e, int fd, uint32_t events, usec_t *next) {
+ uint64_t x;
+ ssize_t ss;
+
+ assert(e);
+ assert(fd >= 0);
+
+ assert_return(events == EPOLLIN, -EIO);
+
+ ss = read(fd, &x, sizeof(x));
+ if (ss < 0) {
+ if (ERRNO_IS_TRANSIENT(errno))
+ return 0;
+
+ return -errno;
+ }
+
+ if (_unlikely_(ss != sizeof(x)))
+ return -EIO;
+
+ if (next)
+ *next = USEC_INFINITY;
+
+ return 0;
+}
+
+static int process_timer(
+ sd_event *e,
+ usec_t n,
+ struct clock_data *d) {
+
+ sd_event_source *s;
+ bool callback_invoked = false;
+ int r;
+
+ assert(e);
+ assert(d);
+
+ for (;;) {
+ s = prioq_peek(d->earliest);
+ assert(!s || EVENT_SOURCE_USES_TIME_PRIOQ(s->type));
+
+ if (!s || time_event_source_next(s) > n)
+ break;
+
+ if (s->ratelimited) {
+ /* This is an event sources whose ratelimit window has ended. Let's turn it on
+ * again. */
+ assert(s->ratelimited);
+
+ r = event_source_leave_ratelimit(s, /* run_callback */ true);
+ if (r < 0)
+ return r;
+ else if (r == 1)
+ callback_invoked = true;
+
+ continue;
+ }
+
+ if (s->enabled == SD_EVENT_OFF || s->pending)
+ break;
+
+ r = source_set_pending(s, true);
+ if (r < 0)
+ return r;
+
+ event_source_time_prioq_reshuffle(s);
+ }
+
+ return callback_invoked;
+}
+
+static int process_child(sd_event *e, int64_t threshold, int64_t *ret_min_priority) {
+ int64_t min_priority = threshold;
+ bool something_new = false;
+ sd_event_source *s;
+ int r;
+
+ assert(e);
+ assert(ret_min_priority);
+
+ if (!e->need_process_child) {
+ *ret_min_priority = min_priority;
+ return 0;
+ }
+
+ e->need_process_child = false;
+
+ /* So, this is ugly. We iteratively invoke waitid() with P_PID + WNOHANG for each PID we wait
+ * for, instead of using P_ALL. This is because we only want to get child information of very
+ * specific child processes, and not all of them. We might not have processed the SIGCHLD event
+ * of a previous invocation and we don't want to maintain a unbounded *per-child* event queue,
+ * hence we really don't want anything flushed out of the kernel's queue that we don't care
+ * about. Since this is O(n) this means that if you have a lot of processes you probably want
+ * to handle SIGCHLD yourself.
+ *
+ * We do not reap the children here (by using WNOWAIT), this is only done after the event
+ * source is dispatched so that the callback still sees the process as a zombie. */
+
+ HASHMAP_FOREACH(s, e->child_sources) {
+ assert(s->type == SOURCE_CHILD);
+
+ if (s->priority > threshold)
+ continue;
+
+ if (s->pending)
+ continue;
+
+ if (event_source_is_offline(s))
+ continue;
+
+ if (s->child.exited)
+ continue;
+
+ if (EVENT_SOURCE_WATCH_PIDFD(s))
+ /* There's a usable pidfd known for this event source? Then don't waitid() for
+ * it here */
+ continue;
+
+ zero(s->child.siginfo);
+ if (waitid(P_PID, s->child.pid, &s->child.siginfo,
+ WNOHANG | (s->child.options & WEXITED ? WNOWAIT : 0) | s->child.options) < 0)
+ return negative_errno();
+
+ if (s->child.siginfo.si_pid != 0) {
+ bool zombie = IN_SET(s->child.siginfo.si_code, CLD_EXITED, CLD_KILLED, CLD_DUMPED);
+
+ if (zombie)
+ s->child.exited = true;
+
+ if (!zombie && (s->child.options & WEXITED)) {
+ /* If the child isn't dead then let's immediately remove the state
+ * change from the queue, since there's no benefit in leaving it
+ * queued. */
+
+ assert(s->child.options & (WSTOPPED|WCONTINUED));
+ (void) waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|(s->child.options & (WSTOPPED|WCONTINUED)));
+ }
+
+ r = source_set_pending(s, true);
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ something_new = true;
+ min_priority = MIN(min_priority, s->priority);
+ }
+ }
+ }
+
+ *ret_min_priority = min_priority;
+ return something_new;
+}
+
+static int process_pidfd(sd_event *e, sd_event_source *s, uint32_t revents) {
+ assert(e);
+ assert(s);
+ assert(s->type == SOURCE_CHILD);
+
+ if (s->pending)
+ return 0;
+
+ if (event_source_is_offline(s))
+ return 0;
+
+ if (!EVENT_SOURCE_WATCH_PIDFD(s))
+ return 0;
+
+ zero(s->child.siginfo);
+ if (waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG | WNOWAIT | s->child.options) < 0)
+ return -errno;
+
+ if (s->child.siginfo.si_pid == 0)
+ return 0;
+
+ if (IN_SET(s->child.siginfo.si_code, CLD_EXITED, CLD_KILLED, CLD_DUMPED))
+ s->child.exited = true;
+
+ return source_set_pending(s, true);
+}
+
+static int process_signal(sd_event *e, struct signal_data *d, uint32_t events, int64_t *min_priority) {
+ int r;
+
+ assert(e);
+ assert(d);
+ assert_return(events == EPOLLIN, -EIO);
+ assert(min_priority);
+
+ /* If there's a signal queued on this priority and SIGCHLD is on this priority too, then make
+ * sure to recheck the children we watch. This is because we only ever dequeue the first signal
+ * per priority, and if we dequeue one, and SIGCHLD might be enqueued later we wouldn't know,
+ * but we might have higher priority children we care about hence we need to check that
+ * explicitly. */
+
+ if (sigismember(&d->sigset, SIGCHLD))
+ e->need_process_child = true;
+
+ /* If there's already an event source pending for this priority we don't read another */
+ if (d->current)
+ return 0;
+
+ for (;;) {
+ struct signalfd_siginfo si;
+ ssize_t n;
+ sd_event_source *s = NULL;
+
+ n = read(d->fd, &si, sizeof(si));
+ if (n < 0) {
+ if (ERRNO_IS_TRANSIENT(errno))
+ return 0;
+
+ return -errno;
+ }
+
+ if (_unlikely_(n != sizeof(si)))
+ return -EIO;
+
+ assert(SIGNAL_VALID(si.ssi_signo));
+
+ if (e->signal_sources)
+ s = e->signal_sources[si.ssi_signo];
+ if (!s)
+ continue;
+ if (s->pending)
+ continue;
+
+ s->signal.siginfo = si;
+ d->current = s;
+
+ r = source_set_pending(s, true);
+ if (r < 0)
+ return r;
+ if (r > 0 && *min_priority >= s->priority) {
+ *min_priority = s->priority;
+ return 1; /* an event source with smaller priority is queued. */
+ }
+
+ return 0;
+ }
+}
+
+static int event_inotify_data_read(sd_event *e, struct inotify_data *d, uint32_t revents, int64_t threshold) {
+ ssize_t n;
+
+ assert(e);
+ assert(d);
+
+ assert_return(revents == EPOLLIN, -EIO);
+
+ /* If there's already an event source pending for this priority, don't read another */
+ if (d->n_pending > 0)
+ return 0;
+
+ /* Is the read buffer non-empty? If so, let's not read more */
+ if (d->buffer_filled > 0)
+ return 0;
+
+ if (d->priority > threshold)
+ return 0;
+
+ n = read(d->fd, &d->buffer, sizeof(d->buffer));
+ if (n < 0) {
+ if (ERRNO_IS_TRANSIENT(errno))
+ return 0;
+
+ return -errno;
+ }
+
+ assert(n > 0);
+ d->buffer_filled = (size_t) n;
+ LIST_PREPEND(buffered, e->buffered_inotify_data_list, d);
+
+ return 1;
+}
+
+static void event_inotify_data_drop(sd_event *e, struct inotify_data *d, size_t sz) {
+ assert(e);
+ assert(d);
+ assert(sz <= d->buffer_filled);
+
+ if (sz == 0)
+ return;
+
+ /* Move the rest to the buffer to the front, in order to get things properly aligned again */
+ memmove(d->buffer.raw, d->buffer.raw + sz, d->buffer_filled - sz);
+ d->buffer_filled -= sz;
+
+ if (d->buffer_filled == 0)
+ LIST_REMOVE(buffered, e->buffered_inotify_data_list, d);
+}
+
+static int event_inotify_data_process(sd_event *e, struct inotify_data *d) {
+ int r;
+
+ assert(e);
+ assert(d);
+
+ /* If there's already an event source pending for this priority, don't read another */
+ if (d->n_pending > 0)
+ return 0;
+
+ while (d->buffer_filled > 0) {
+ size_t sz;
+
+ /* Let's validate that the event structures are complete */
+ if (d->buffer_filled < offsetof(struct inotify_event, name))
+ return -EIO;
+
+ sz = offsetof(struct inotify_event, name) + d->buffer.ev.len;
+ if (d->buffer_filled < sz)
+ return -EIO;
+
+ if (d->buffer.ev.mask & IN_Q_OVERFLOW) {
+ struct inode_data *inode_data;
+
+ /* The queue overran, let's pass this event to all event sources connected to this inotify
+ * object */
+
+ HASHMAP_FOREACH(inode_data, d->inodes)
+ LIST_FOREACH(inotify.by_inode_data, s, inode_data->event_sources) {
+
+ if (event_source_is_offline(s))
+ continue;
+
+ r = source_set_pending(s, true);
+ if (r < 0)
+ return r;
+ }
+ } else {
+ struct inode_data *inode_data;
+
+ /* Find the inode object for this watch descriptor. If IN_IGNORED is set we also remove it from
+ * our watch descriptor table. */
+ if (d->buffer.ev.mask & IN_IGNORED) {
+
+ inode_data = hashmap_remove(d->wd, INT_TO_PTR(d->buffer.ev.wd));
+ if (!inode_data) {
+ event_inotify_data_drop(e, d, sz);
+ continue;
+ }
+
+ /* The watch descriptor was removed by the kernel, let's drop it here too */
+ inode_data->wd = -1;
+ } else {
+ inode_data = hashmap_get(d->wd, INT_TO_PTR(d->buffer.ev.wd));
+ if (!inode_data) {
+ event_inotify_data_drop(e, d, sz);
+ continue;
+ }
+ }
+
+ /* Trigger all event sources that are interested in these events. Also trigger all event
+ * sources if IN_IGNORED or IN_UNMOUNT is set. */
+ LIST_FOREACH(inotify.by_inode_data, s, inode_data->event_sources) {
+
+ if (event_source_is_offline(s))
+ continue;
+
+ if ((d->buffer.ev.mask & (IN_IGNORED|IN_UNMOUNT)) == 0 &&
+ (s->inotify.mask & d->buffer.ev.mask & IN_ALL_EVENTS) == 0)
+ continue;
+
+ r = source_set_pending(s, true);
+ if (r < 0)
+ return r;
+ }
+ }
+
+ /* Something pending now? If so, let's finish, otherwise let's read more. */
+ if (d->n_pending > 0)
+ return 1;
+ }
+
+ return 0;
+}
+
+static int process_inotify(sd_event *e) {
+ int r, done = 0;
+
+ assert(e);
+
+ LIST_FOREACH(buffered, d, e->buffered_inotify_data_list) {
+ r = event_inotify_data_process(e, d);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ done ++;
+ }
+
+ return done;
+}
+
+static int process_memory_pressure(sd_event_source *s, uint32_t revents) {
+ assert(s);
+ assert(s->type == SOURCE_MEMORY_PRESSURE);
+
+ if (s->pending)
+ s->memory_pressure.revents |= revents;
+ else
+ s->memory_pressure.revents = revents;
+
+ return source_set_pending(s, true);
+}
+
+static int source_memory_pressure_write(sd_event_source *s) {
+ ssize_t n;
+ int r;
+
+ assert(s);
+ assert(s->type == SOURCE_MEMORY_PRESSURE);
+
+ /* once we start writing, the buffer is locked, we allow no further changes. */
+ s->memory_pressure.locked = true;
+
+ if (s->memory_pressure.write_buffer_size > 0) {
+ n = write(s->memory_pressure.fd, s->memory_pressure.write_buffer, s->memory_pressure.write_buffer_size);
+ if (n < 0) {
+ if (!ERRNO_IS_TRANSIENT(errno)) {
+ /* If kernel is built with CONFIG_PSI_DEFAULT_DISABLED it will expose PSI
+ * files, but then generates EOPNOSUPP on read() and write() (instead of on
+ * open()!). This sucks hard, since we can only detect this kind of failure
+ * so late. Let's make the best of it, and turn off the event source like we
+ * do for failed event source handlers. */
+
+ log_debug_errno(errno, "Writing memory pressure settings to kernel failed, disabling memory pressure event source: %m");
+ assert_se(sd_event_source_set_enabled(s, SD_EVENT_OFF) >= 0);
+ return 0;
+ }
+
+ n = 0;
+ }
+ } else
+ n = 0;
+
+ assert(n >= 0);
+
+ if ((size_t) n == s->memory_pressure.write_buffer_size) {
+ s->memory_pressure.write_buffer = mfree(s->memory_pressure.write_buffer);
+
+ if (n > 0) {
+ s->memory_pressure.write_buffer_size = 0;
+
+ /* Update epoll events mask, since we have now written everything and don't care for EPOLLOUT anymore */
+ r = source_memory_pressure_register(s, s->enabled);
+ if (r < 0)
+ return r;
+ }
+ } else if (n > 0) {
+ _cleanup_free_ void *c = NULL;
+
+ assert((size_t) n < s->memory_pressure.write_buffer_size);
+
+ c = memdup((uint8_t*) s->memory_pressure.write_buffer + n, s->memory_pressure.write_buffer_size - n);
+ if (!c)
+ return -ENOMEM;
+
+ free_and_replace(s->memory_pressure.write_buffer, c);
+ s->memory_pressure.write_buffer_size -= n;
+ return 1;
+ }
+
+ return 0;
+}
+
+static int source_memory_pressure_initiate_dispatch(sd_event_source *s) {
+ int r;
+
+ assert(s);
+ assert(s->type == SOURCE_MEMORY_PRESSURE);
+
+ r = source_memory_pressure_write(s);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ return 1; /* if we wrote something, then don't continue with dispatching user dispatch
+ * function. Instead, shortcut it so that we wait for next EPOLLOUT immediately. */
+
+ /* No pending incoming IO? Then let's not continue further */
+ if ((s->memory_pressure.revents & (EPOLLIN|EPOLLPRI)) == 0) {
+
+ /* Treat IO errors on the notifier the same ways errors returned from a callback */
+ if ((s->memory_pressure.revents & (EPOLLHUP|EPOLLERR|EPOLLRDHUP)) != 0)
+ return -EIO;
+
+ return 1; /* leave dispatch, we already processed everything */
+ }
+
+ if (s->memory_pressure.revents & EPOLLIN) {
+ uint8_t pipe_buf[PIPE_BUF];
+ ssize_t n;
+
+ /* If the fd is readable, then flush out anything that might be queued */
+
+ n = read(s->memory_pressure.fd, pipe_buf, sizeof(pipe_buf));
+ if (n < 0 && !ERRNO_IS_TRANSIENT(errno))
+ return -errno;
+ }
+
+ return 0; /* go on, dispatch to user callback */
+}
+
+static int source_dispatch(sd_event_source *s) {
+ EventSourceType saved_type;
+ sd_event *saved_event;
+ int r = 0;
+
+ assert(s);
+ assert(s->pending || s->type == SOURCE_EXIT);
+
+ /* Save the event source type, here, so that we still know it after the event callback which might
+ * invalidate the event. */
+ saved_type = s->type;
+
+ /* Similarly, store a reference to the event loop object, so that we can still access it after the
+ * callback might have invalidated/disconnected the event source. */
+ saved_event = s->event;
+ PROTECT_EVENT(saved_event);
+
+ /* Check if we hit the ratelimit for this event source, and if so, let's disable it. */
+ assert(!s->ratelimited);
+ if (!ratelimit_below(&s->rate_limit)) {
+ r = event_source_enter_ratelimited(s);
+ if (r < 0)
+ return r;
+
+ return 1;
+ }
+
+ if (!IN_SET(s->type, SOURCE_DEFER, SOURCE_EXIT)) {
+ r = source_set_pending(s, false);
+ if (r < 0)
+ return r;
+ }
+
+ if (s->type != SOURCE_POST) {
+ sd_event_source *z;
+
+ /* If we execute a non-post source, let's mark all post sources as pending. */
+
+ SET_FOREACH(z, s->event->post_sources) {
+ if (event_source_is_offline(z))
+ continue;
+
+ r = source_set_pending(z, true);
+ if (r < 0)
+ return r;
+ }
+ }
+
+ if (s->type == SOURCE_MEMORY_PRESSURE) {
+ r = source_memory_pressure_initiate_dispatch(s);
+ if (r == -EIO) /* handle EIO errors similar to callback errors */
+ goto finish;
+ if (r < 0)
+ return r;
+ if (r > 0) /* already handled */
+ return 1;
+ }
+
+ if (s->enabled == SD_EVENT_ONESHOT) {
+ r = sd_event_source_set_enabled(s, SD_EVENT_OFF);
+ if (r < 0)
+ return r;
+ }
+
+ s->dispatching = true;
+
+ switch (s->type) {
+
+ case SOURCE_IO:
+ r = s->io.callback(s, s->io.fd, s->io.revents, s->userdata);
+ break;
+
+ case SOURCE_TIME_REALTIME:
+ case SOURCE_TIME_BOOTTIME:
+ case SOURCE_TIME_MONOTONIC:
+ case SOURCE_TIME_REALTIME_ALARM:
+ case SOURCE_TIME_BOOTTIME_ALARM:
+ r = s->time.callback(s, s->time.next, s->userdata);
+ break;
+
+ case SOURCE_SIGNAL:
+ r = s->signal.callback(s, &s->signal.siginfo, s->userdata);
+ break;
+
+ case SOURCE_CHILD: {
+ bool zombie;
+
+ zombie = IN_SET(s->child.siginfo.si_code, CLD_EXITED, CLD_KILLED, CLD_DUMPED);
+
+ r = s->child.callback(s, &s->child.siginfo, s->userdata);
+
+ /* Now, reap the PID for good. */
+ if (zombie) {
+ (void) waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|WEXITED);
+ s->child.waited = true;
+ }
+
+ break;
+ }
+
+ case SOURCE_DEFER:
+ r = s->defer.callback(s, s->userdata);
+ break;
+
+ case SOURCE_POST:
+ r = s->post.callback(s, s->userdata);
+ break;
+
+ case SOURCE_EXIT:
+ r = s->exit.callback(s, s->userdata);
+ break;
+
+ case SOURCE_INOTIFY: {
+ struct sd_event *e = s->event;
+ struct inotify_data *d;
+ size_t sz;
+
+ assert(s->inotify.inode_data);
+ assert_se(d = s->inotify.inode_data->inotify_data);
+
+ assert(d->buffer_filled >= offsetof(struct inotify_event, name));
+ sz = offsetof(struct inotify_event, name) + d->buffer.ev.len;
+ assert(d->buffer_filled >= sz);
+
+ /* If the inotify callback destroys the event source then this likely means we don't need to
+ * watch the inode anymore, and thus also won't need the inotify object anymore. But if we'd
+ * free it immediately, then we couldn't drop the event from the inotify event queue without
+ * memory corruption anymore, as below. Hence, let's not free it immediately, but mark it
+ * "busy" with a counter (which will ensure it's not GC'ed away prematurely). Let's then
+ * explicitly GC it after we are done dropping the inotify event from the buffer. */
+ d->n_busy++;
+ r = s->inotify.callback(s, &d->buffer.ev, s->userdata);
+ d->n_busy--;
+
+ /* When no event is pending anymore on this inotify object, then let's drop the event from
+ * the inotify event queue buffer. */
+ if (d->n_pending == 0)
+ event_inotify_data_drop(e, d, sz);
+
+ /* Now we don't want to access 'd' anymore, it's OK to GC now. */
+ event_gc_inotify_data(e, d);
+ break;
+ }
+
+ case SOURCE_MEMORY_PRESSURE:
+ r = s->memory_pressure.callback(s, s->userdata);
+ break;
+
+ case SOURCE_WATCHDOG:
+ case _SOURCE_EVENT_SOURCE_TYPE_MAX:
+ case _SOURCE_EVENT_SOURCE_TYPE_INVALID:
+ assert_not_reached();
+ }
+
+ s->dispatching = false;
+
+finish:
+ if (r < 0) {
+ log_debug_errno(r, "Event source %s (type %s) returned error, %s: %m",
+ strna(s->description),
+ event_source_type_to_string(saved_type),
+ s->exit_on_failure ? "exiting" : "disabling");
+
+ if (s->exit_on_failure)
+ (void) sd_event_exit(saved_event, r);
+ }
+
+ if (s->n_ref == 0)
+ source_free(s);
+ else if (r < 0)
+ assert_se(sd_event_source_set_enabled(s, SD_EVENT_OFF) >= 0);
+
+ return 1;
+}
+
+static int event_prepare(sd_event *e) {
+ int r;
+
+ assert(e);
+
+ for (;;) {
+ sd_event_source *s;
+
+ s = prioq_peek(e->prepare);
+ if (!s || s->prepare_iteration == e->iteration || event_source_is_offline(s))
+ break;
+
+ s->prepare_iteration = e->iteration;
+ prioq_reshuffle(e->prepare, s, &s->prepare_index);
+
+ assert(s->prepare);
+ s->dispatching = true;
+ r = s->prepare(s, s->userdata);
+ s->dispatching = false;
+
+ if (r < 0) {
+ log_debug_errno(r, "Prepare callback of event source %s (type %s) returned error, %s: %m",
+ strna(s->description),
+ event_source_type_to_string(s->type),
+ s->exit_on_failure ? "exiting" : "disabling");
+
+ if (s->exit_on_failure)
+ (void) sd_event_exit(e, r);
+ }
+
+ if (s->n_ref == 0)
+ source_free(s);
+ else if (r < 0)
+ assert_se(sd_event_source_set_enabled(s, SD_EVENT_OFF) >= 0);
+ }
+
+ return 0;
+}
+
+static int dispatch_exit(sd_event *e) {
+ sd_event_source *p;
+ int r;
+
+ assert(e);
+
+ p = prioq_peek(e->exit);
+ assert(!p || p->type == SOURCE_EXIT);
+
+ if (!p || event_source_is_offline(p)) {
+ e->state = SD_EVENT_FINISHED;
+ return 0;
+ }
+
+ PROTECT_EVENT(e);
+ e->iteration++;
+ e->state = SD_EVENT_EXITING;
+ r = source_dispatch(p);
+ e->state = SD_EVENT_INITIAL;
+ return r;
+}
+
+static sd_event_source* event_next_pending(sd_event *e) {
+ sd_event_source *p;
+
+ assert(e);
+
+ p = prioq_peek(e->pending);
+ if (!p)
+ return NULL;
+
+ if (event_source_is_offline(p))
+ return NULL;
+
+ return p;
+}
+
+static int arm_watchdog(sd_event *e) {
+ struct itimerspec its = {};
+ usec_t t;
+
+ assert(e);
+ assert(e->watchdog_fd >= 0);
+
+ t = sleep_between(e,
+ usec_add(e->watchdog_last, (e->watchdog_period / 2)),
+ usec_add(e->watchdog_last, (e->watchdog_period * 3 / 4)));
+
+ timespec_store(&its.it_value, t);
+
+ /* Make sure we never set the watchdog to 0, which tells the
+ * kernel to disable it. */
+ if (its.it_value.tv_sec == 0 && its.it_value.tv_nsec == 0)
+ its.it_value.tv_nsec = 1;
+
+ return RET_NERRNO(timerfd_settime(e->watchdog_fd, TFD_TIMER_ABSTIME, &its, NULL));
+}
+
+static int process_watchdog(sd_event *e) {
+ assert(e);
+
+ if (!e->watchdog)
+ return 0;
+
+ /* Don't notify watchdog too often */
+ if (e->watchdog_last + e->watchdog_period / 4 > e->timestamp.monotonic)
+ return 0;
+
+ sd_notify(false, "WATCHDOG=1");
+ e->watchdog_last = e->timestamp.monotonic;
+
+ return arm_watchdog(e);
+}
+
+static void event_close_inode_data_fds(sd_event *e) {
+ struct inode_data *d;
+
+ assert(e);
+
+ /* Close the fds pointing to the inodes to watch now. We need to close them as they might otherwise pin
+ * filesystems. But we can't close them right-away as we need them as long as the user still wants to make
+ * adjustments to the event source, such as changing the priority (which requires us to remove and re-add a watch
+ * for the inode). Hence, let's close them when entering the first iteration after they were added, as a
+ * compromise. */
+
+ while ((d = e->inode_data_to_close_list)) {
+ assert(d->fd >= 0);
+ d->fd = safe_close(d->fd);
+
+ LIST_REMOVE(to_close, e->inode_data_to_close_list, d);
+ }
+}
+
+static int event_memory_pressure_write_list(sd_event *e) {
+ int r;
+
+ assert(e);
+
+ for (;;) {
+ sd_event_source *s;
+
+ s = LIST_POP(memory_pressure.write_list, e->memory_pressure_write_list);
+ if (!s)
+ break;
+
+ assert(s->type == SOURCE_MEMORY_PRESSURE);
+ assert(s->memory_pressure.write_buffer_size > 0);
+ s->memory_pressure.in_write_list = false;
+
+ r = source_memory_pressure_write(s);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+_public_ int sd_event_prepare(sd_event *e) {
+ int r;
+
+ assert_return(e, -EINVAL);
+ assert_return(e = event_resolve(e), -ENOPKG);
+ assert_return(!event_origin_changed(e), -ECHILD);
+ assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
+ assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
+
+ /* Let's check that if we are a default event loop we are executed in the correct thread. We only do
+ * this check here once, since gettid() is typically not cached, and thus want to minimize
+ * syscalls */
+ assert_return(!e->default_event_ptr || e->tid == gettid(), -EREMOTEIO);
+
+ /* Make sure that none of the preparation callbacks ends up freeing the event source under our feet */
+ PROTECT_EVENT(e);
+
+ if (e->exit_requested)
+ goto pending;
+
+ e->iteration++;
+
+ e->state = SD_EVENT_PREPARING;
+ r = event_prepare(e);
+ e->state = SD_EVENT_INITIAL;
+ if (r < 0)
+ return r;
+
+ r = event_memory_pressure_write_list(e);
+ if (r < 0)
+ return r;
+
+ r = event_arm_timer(e, &e->realtime);
+ if (r < 0)
+ return r;
+
+ r = event_arm_timer(e, &e->boottime);
+ if (r < 0)
+ return r;
+
+ r = event_arm_timer(e, &e->monotonic);
+ if (r < 0)
+ return r;
+
+ r = event_arm_timer(e, &e->realtime_alarm);
+ if (r < 0)
+ return r;
+
+ r = event_arm_timer(e, &e->boottime_alarm);
+ if (r < 0)
+ return r;
+
+ event_close_inode_data_fds(e);
+
+ if (event_next_pending(e) || e->need_process_child || e->buffered_inotify_data_list)
+ goto pending;
+
+ e->state = SD_EVENT_ARMED;
+
+ return 0;
+
+pending:
+ e->state = SD_EVENT_ARMED;
+ r = sd_event_wait(e, 0);
+ if (r == 0)
+ e->state = SD_EVENT_ARMED;
+
+ return r;
+}
+
+static int epoll_wait_usec(
+ int fd,
+ struct epoll_event *events,
+ int maxevents,
+ usec_t timeout) {
+
+ int msec;
+ /* A wrapper that uses epoll_pwait2() if available, and falls back to epoll_wait() if not. */
+
+#if HAVE_EPOLL_PWAIT2
+ static bool epoll_pwait2_absent = false;
+ int r;
+
+ /* epoll_pwait2() was added to Linux 5.11 (2021-02-14) and to glibc in 2.35 (2022-02-03). In contrast
+ * to other syscalls we don't bother with our own fallback syscall wrappers on old libcs, since this
+ * is not that obvious to implement given the libc and kernel definitions differ in the last
+ * argument. Moreover, the only reason to use it is the more accurate time-outs (which is not a
+ * biggie), let's hence rely on glibc's definitions, and fallback to epoll_pwait() when that's
+ * missing. */
+
+ if (!epoll_pwait2_absent && timeout != USEC_INFINITY) {
+ r = epoll_pwait2(fd,
+ events,
+ maxevents,
+ TIMESPEC_STORE(timeout),
+ NULL);
+ if (r >= 0)
+ return r;
+ if (!ERRNO_IS_NOT_SUPPORTED(errno) && !ERRNO_IS_PRIVILEGE(errno))
+ return -errno; /* Only fallback to old epoll_wait() if the syscall is masked or not
+ * supported. */
+
+ epoll_pwait2_absent = true;
+ }
+#endif
+
+ if (timeout == USEC_INFINITY)
+ msec = -1;
+ else {
+ usec_t k;
+
+ k = DIV_ROUND_UP(timeout, USEC_PER_MSEC);
+ if (k >= INT_MAX)
+ msec = INT_MAX; /* Saturate */
+ else
+ msec = (int) k;
+ }
+
+ return RET_NERRNO(epoll_wait(fd, events, maxevents, msec));
+}
+
+static int process_epoll(sd_event *e, usec_t timeout, int64_t threshold, int64_t *ret_min_priority) {
+ size_t n_event_queue, m, n_event_max;
+ int64_t min_priority = threshold;
+ bool something_new = false;
+ int r;
+
+ assert(e);
+ assert(ret_min_priority);
+
+ n_event_queue = MAX(e->n_sources, 1u);
+ if (!GREEDY_REALLOC(e->event_queue, n_event_queue))
+ return -ENOMEM;
+
+ n_event_max = MALLOC_ELEMENTSOF(e->event_queue);
+
+ /* If we still have inotify data buffered, then query the other fds, but don't wait on it */
+ if (e->buffered_inotify_data_list)
+ timeout = 0;
+
+ for (;;) {
+ r = epoll_wait_usec(
+ e->epoll_fd,
+ e->event_queue,
+ n_event_max,
+ timeout);
+ if (r < 0)
+ return r;
+
+ m = (size_t) r;
+
+ if (m < n_event_max)
+ break;
+
+ if (n_event_max >= n_event_queue * 10)
+ break;
+
+ if (!GREEDY_REALLOC(e->event_queue, n_event_max + n_event_queue))
+ return -ENOMEM;
+
+ n_event_max = MALLOC_ELEMENTSOF(e->event_queue);
+ timeout = 0;
+ }
+
+ /* Set timestamp only when this is called first time. */
+ if (threshold == INT64_MAX)
+ triple_timestamp_now(&e->timestamp);
+
+ for (size_t i = 0; i < m; i++) {
+
+ if (e->event_queue[i].data.ptr == INT_TO_PTR(SOURCE_WATCHDOG))
+ r = flush_timer(e, e->watchdog_fd, e->event_queue[i].events, NULL);
+ else {
+ WakeupType *t = e->event_queue[i].data.ptr;
+
+ switch (*t) {
+
+ case WAKEUP_EVENT_SOURCE: {
+ sd_event_source *s = e->event_queue[i].data.ptr;
+
+ assert(s);
+
+ if (s->priority > threshold)
+ continue;
+
+ min_priority = MIN(min_priority, s->priority);
+
+ switch (s->type) {
+
+ case SOURCE_IO:
+ r = process_io(e, s, e->event_queue[i].events);
+ break;
+
+ case SOURCE_CHILD:
+ r = process_pidfd(e, s, e->event_queue[i].events);
+ break;
+
+ case SOURCE_MEMORY_PRESSURE:
+ r = process_memory_pressure(s, e->event_queue[i].events);
+ break;
+
+ default:
+ assert_not_reached();
+ }
+
+ break;
+ }
+
+ case WAKEUP_CLOCK_DATA: {
+ struct clock_data *d = e->event_queue[i].data.ptr;
+
+ assert(d);
+
+ r = flush_timer(e, d->fd, e->event_queue[i].events, &d->next);
+ break;
+ }
+
+ case WAKEUP_SIGNAL_DATA:
+ r = process_signal(e, e->event_queue[i].data.ptr, e->event_queue[i].events, &min_priority);
+ break;
+
+ case WAKEUP_INOTIFY_DATA:
+ r = event_inotify_data_read(e, e->event_queue[i].data.ptr, e->event_queue[i].events, threshold);
+ break;
+
+ default:
+ assert_not_reached();
+ }
+ }
+ if (r < 0)
+ return r;
+ if (r > 0)
+ something_new = true;
+ }
+
+ *ret_min_priority = min_priority;
+ return something_new;
+}
+
+_public_ int sd_event_wait(sd_event *e, uint64_t timeout) {
+ int r;
+
+ assert_return(e, -EINVAL);
+ assert_return(e = event_resolve(e), -ENOPKG);
+ assert_return(!event_origin_changed(e), -ECHILD);
+ assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
+ assert_return(e->state == SD_EVENT_ARMED, -EBUSY);
+
+ if (e->exit_requested) {
+ e->state = SD_EVENT_PENDING;
+ return 1;
+ }
+
+ for (int64_t threshold = INT64_MAX; ; threshold--) {
+ int64_t epoll_min_priority, child_min_priority;
+
+ /* There may be a possibility that new epoll (especially IO) and child events are
+ * triggered just after process_epoll() call but before process_child(), and the new IO
+ * events may have higher priority than the child events. To salvage these events,
+ * let's call epoll_wait() again, but accepts only events with higher priority than the
+ * previous. See issue https://github.com/systemd/systemd/issues/18190 and comments
+ * https://github.com/systemd/systemd/pull/18750#issuecomment-785801085
+ * https://github.com/systemd/systemd/pull/18922#issuecomment-792825226 */
+
+ r = process_epoll(e, timeout, threshold, &epoll_min_priority);
+ if (r == -EINTR) {
+ e->state = SD_EVENT_PENDING;
+ return 1;
+ }
+ if (r < 0)
+ goto finish;
+ if (r == 0 && threshold < INT64_MAX)
+ /* No new epoll event. */
+ break;
+
+ r = process_child(e, threshold, &child_min_priority);
+ if (r < 0)
+ goto finish;
+ if (r == 0)
+ /* No new child event. */
+ break;
+
+ threshold = MIN(epoll_min_priority, child_min_priority);
+ if (threshold == INT64_MIN)
+ break;
+
+ timeout = 0;
+ }
+
+ r = process_watchdog(e);
+ if (r < 0)
+ goto finish;
+
+ r = process_inotify(e);
+ if (r < 0)
+ goto finish;
+
+ r = process_timer(e, e->timestamp.realtime, &e->realtime);
+ if (r < 0)
+ goto finish;
+
+ r = process_timer(e, e->timestamp.boottime, &e->boottime);
+ if (r < 0)
+ goto finish;
+
+ r = process_timer(e, e->timestamp.realtime, &e->realtime_alarm);
+ if (r < 0)
+ goto finish;
+
+ r = process_timer(e, e->timestamp.boottime, &e->boottime_alarm);
+ if (r < 0)
+ goto finish;
+
+ r = process_timer(e, e->timestamp.monotonic, &e->monotonic);
+ if (r < 0)
+ goto finish;
+ else if (r == 1) {
+ /* Ratelimit expiry callback was called. Let's postpone processing pending sources and
+ * put loop in the initial state in order to evaluate (in the next iteration) also sources
+ * there were potentially re-enabled by the callback.
+ *
+ * Wondering why we treat only this invocation of process_timer() differently? Once event
+ * source is ratelimited we essentially transform it into CLOCK_MONOTONIC timer hence
+ * ratelimit expiry callback is never called for any other timer type. */
+ r = 0;
+ goto finish;
+ }
+
+ if (event_next_pending(e)) {
+ e->state = SD_EVENT_PENDING;
+ return 1;
+ }
+
+ r = 0;
+
+finish:
+ e->state = SD_EVENT_INITIAL;
+
+ return r;
+}
+
+_public_ int sd_event_dispatch(sd_event *e) {
+ sd_event_source *p;
+ int r;
+
+ assert_return(e, -EINVAL);
+ assert_return(e = event_resolve(e), -ENOPKG);
+ assert_return(!event_origin_changed(e), -ECHILD);
+ assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
+ assert_return(e->state == SD_EVENT_PENDING, -EBUSY);
+
+ if (e->exit_requested)
+ return dispatch_exit(e);
+
+ p = event_next_pending(e);
+ if (p) {
+ PROTECT_EVENT(e);
+
+ e->state = SD_EVENT_RUNNING;
+ r = source_dispatch(p);
+ e->state = SD_EVENT_INITIAL;
+ return r;
+ }
+
+ e->state = SD_EVENT_INITIAL;
+
+ return 1;
+}
+
+static void event_log_delays(sd_event *e) {
+ char b[ELEMENTSOF(e->delays) * DECIMAL_STR_MAX(unsigned) + 1], *p;
+ size_t l, i;
+
+ p = b;
+ l = sizeof(b);
+ for (i = 0; i < ELEMENTSOF(e->delays); i++) {
+ l = strpcpyf(&p, l, "%u ", e->delays[i]);
+ e->delays[i] = 0;
+ }
+ log_debug("Event loop iterations: %s", b);
+}
+
+_public_ int sd_event_run(sd_event *e, uint64_t timeout) {
+ int r;
+
+ assert_return(e, -EINVAL);
+ assert_return(e = event_resolve(e), -ENOPKG);
+ assert_return(!event_origin_changed(e), -ECHILD);
+ assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
+ assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
+
+ if (e->profile_delays && e->last_run_usec != 0) {
+ usec_t this_run;
+ unsigned l;
+
+ this_run = now(CLOCK_MONOTONIC);
+
+ l = log2u64(this_run - e->last_run_usec);
+ assert(l < ELEMENTSOF(e->delays));
+ e->delays[l]++;
+
+ if (this_run - e->last_log_usec >= 5*USEC_PER_SEC) {
+ event_log_delays(e);
+ e->last_log_usec = this_run;
+ }
+ }
+
+ /* Make sure that none of the preparation callbacks ends up freeing the event source under our feet */
+ PROTECT_EVENT(e);
+
+ r = sd_event_prepare(e);
+ if (r == 0)
+ /* There was nothing? Then wait... */
+ r = sd_event_wait(e, timeout);
+
+ if (e->profile_delays)
+ e->last_run_usec = now(CLOCK_MONOTONIC);
+
+ if (r > 0) {
+ /* There's something now, then let's dispatch it */
+ r = sd_event_dispatch(e);
+ if (r < 0)
+ return r;
+
+ return 1;
+ }
+
+ return r;
+}
+
+_public_ int sd_event_loop(sd_event *e) {
+ int r;
+
+ assert_return(e, -EINVAL);
+ assert_return(e = event_resolve(e), -ENOPKG);
+ assert_return(!event_origin_changed(e), -ECHILD);
+ assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
+
+
+ PROTECT_EVENT(e);
+
+ while (e->state != SD_EVENT_FINISHED) {
+ r = sd_event_run(e, UINT64_MAX);
+ if (r < 0)
+ return r;
+ }
+
+ return e->exit_code;
+}
+
+_public_ int sd_event_get_fd(sd_event *e) {
+ assert_return(e, -EINVAL);
+ assert_return(e = event_resolve(e), -ENOPKG);
+ assert_return(!event_origin_changed(e), -ECHILD);
+
+ return e->epoll_fd;
+}
+
+_public_ int sd_event_get_state(sd_event *e) {
+ assert_return(e, -EINVAL);
+ assert_return(e = event_resolve(e), -ENOPKG);
+ assert_return(!event_origin_changed(e), -ECHILD);
+
+ return e->state;
+}
+
+_public_ int sd_event_get_exit_code(sd_event *e, int *code) {
+ assert_return(e, -EINVAL);
+ assert_return(e = event_resolve(e), -ENOPKG);
+ assert_return(code, -EINVAL);
+ assert_return(!event_origin_changed(e), -ECHILD);
+
+ if (!e->exit_requested)
+ return -ENODATA;
+
+ *code = e->exit_code;
+ return 0;
+}
+
+_public_ int sd_event_exit(sd_event *e, int code) {
+ assert_return(e, -EINVAL);
+ assert_return(e = event_resolve(e), -ENOPKG);
+ assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
+ assert_return(!event_origin_changed(e), -ECHILD);
+
+ e->exit_requested = true;
+ e->exit_code = code;
+
+ return 0;
+}
+
+_public_ int sd_event_now(sd_event *e, clockid_t clock, uint64_t *usec) {
+ assert_return(e, -EINVAL);
+ assert_return(e = event_resolve(e), -ENOPKG);
+ assert_return(usec, -EINVAL);
+ assert_return(!event_origin_changed(e), -ECHILD);
+
+ if (!TRIPLE_TIMESTAMP_HAS_CLOCK(clock))
+ return -EOPNOTSUPP;
+
+ if (!triple_timestamp_is_set(&e->timestamp)) {
+ /* Implicitly fall back to now() if we never ran before and thus have no cached time. */
+ *usec = now(clock);
+ return 1;
+ }
+
+ *usec = triple_timestamp_by_clock(&e->timestamp, clock);
+ return 0;
+}
+
+_public_ int sd_event_default(sd_event **ret) {
+ sd_event *e = NULL;
+ int r;
+
+ if (!ret)
+ return !!default_event;
+
+ if (default_event) {
+ *ret = sd_event_ref(default_event);
+ return 0;
+ }
+
+ r = sd_event_new(&e);
+ if (r < 0)
+ return r;
+
+ e->default_event_ptr = &default_event;
+ e->tid = gettid();
+ default_event = e;
+
+ *ret = e;
+ return 1;
+}
+
+_public_ int sd_event_get_tid(sd_event *e, pid_t *tid) {
+ assert_return(e, -EINVAL);
+ assert_return(e = event_resolve(e), -ENOPKG);
+ assert_return(tid, -EINVAL);
+ assert_return(!event_origin_changed(e), -ECHILD);
+
+ if (e->tid != 0) {
+ *tid = e->tid;
+ return 0;
+ }
+
+ return -ENXIO;
+}
+
+_public_ int sd_event_set_watchdog(sd_event *e, int b) {
+ int r;
+
+ assert_return(e, -EINVAL);
+ assert_return(e = event_resolve(e), -ENOPKG);
+ assert_return(!event_origin_changed(e), -ECHILD);
+
+ if (e->watchdog == !!b)
+ return e->watchdog;
+
+ if (b) {
+ r = sd_watchdog_enabled(false, &e->watchdog_period);
+ if (r <= 0)
+ return r;
+
+ /* Issue first ping immediately */
+ sd_notify(false, "WATCHDOG=1");
+ e->watchdog_last = now(CLOCK_MONOTONIC);
+
+ e->watchdog_fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK|TFD_CLOEXEC);
+ if (e->watchdog_fd < 0)
+ return -errno;
+
+ r = arm_watchdog(e);
+ if (r < 0)
+ goto fail;
+
+ struct epoll_event ev = {
+ .events = EPOLLIN,
+ .data.ptr = INT_TO_PTR(SOURCE_WATCHDOG),
+ };
+
+ if (epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->watchdog_fd, &ev) < 0) {
+ r = -errno;
+ goto fail;
+ }
+
+ } else {
+ if (e->watchdog_fd >= 0) {
+ (void) epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, e->watchdog_fd, NULL);
+ e->watchdog_fd = safe_close(e->watchdog_fd);
+ }
+ }
+
+ e->watchdog = !!b;
+ return e->watchdog;
+
+fail:
+ e->watchdog_fd = safe_close(e->watchdog_fd);
+ return r;
+}
+
+_public_ int sd_event_get_watchdog(sd_event *e) {
+ assert_return(e, -EINVAL);
+ assert_return(e = event_resolve(e), -ENOPKG);
+ assert_return(!event_origin_changed(e), -ECHILD);
+
+ return e->watchdog;
+}
+
+_public_ int sd_event_get_iteration(sd_event *e, uint64_t *ret) {
+ assert_return(e, -EINVAL);
+ assert_return(e = event_resolve(e), -ENOPKG);
+ assert_return(!event_origin_changed(e), -ECHILD);
+
+ *ret = e->iteration;
+ return 0;
+}
+
+_public_ int sd_event_source_set_destroy_callback(sd_event_source *s, sd_event_destroy_t callback) {
+ assert_return(s, -EINVAL);
+ assert_return(s->event, -EINVAL);
+ assert_return(!event_origin_changed(s->event), -ECHILD);
+
+ s->destroy_callback = callback;
+ return 0;
+}
+
+_public_ int sd_event_source_get_destroy_callback(sd_event_source *s, sd_event_destroy_t *ret) {
+ assert_return(s, -EINVAL);
+ assert_return(!event_origin_changed(s->event), -ECHILD);
+
+ if (ret)
+ *ret = s->destroy_callback;
+
+ return !!s->destroy_callback;
+}
+
+_public_ int sd_event_source_get_floating(sd_event_source *s) {
+ assert_return(s, -EINVAL);
+ assert_return(!event_origin_changed(s->event), -ECHILD);
+
+ return s->floating;
+}
+
+_public_ int sd_event_source_set_floating(sd_event_source *s, int b) {
+ assert_return(s, -EINVAL);
+ assert_return(!event_origin_changed(s->event), -ECHILD);
+
+ if (s->floating == !!b)
+ return 0;
+
+ if (!s->event) /* Already disconnected */
+ return -ESTALE;
+
+ s->floating = b;
+
+ if (b) {
+ sd_event_source_ref(s);
+ sd_event_unref(s->event);
+ } else {
+ sd_event_ref(s->event);
+ sd_event_source_unref(s);
+ }
+
+ return 1;
+}
+
+_public_ int sd_event_source_get_exit_on_failure(sd_event_source *s) {
+ assert_return(s, -EINVAL);
+ assert_return(s->type != SOURCE_EXIT, -EDOM);
+ assert_return(!event_origin_changed(s->event), -ECHILD);
+
+ return s->exit_on_failure;
+}
+
+_public_ int sd_event_source_set_exit_on_failure(sd_event_source *s, int b) {
+ assert_return(s, -EINVAL);
+ assert_return(s->type != SOURCE_EXIT, -EDOM);
+ assert_return(!event_origin_changed(s->event), -ECHILD);
+
+ if (s->exit_on_failure == !!b)
+ return 0;
+
+ s->exit_on_failure = b;
+ return 1;
+}
+
+_public_ int sd_event_source_set_ratelimit(sd_event_source *s, uint64_t interval, unsigned burst) {
+ int r;
+
+ assert_return(s, -EINVAL);
+ assert_return(!event_origin_changed(s->event), -ECHILD);
+
+ /* Turning on ratelimiting on event source types that don't support it, is a loggable offense. Doing
+ * so is a programming error. */
+ assert_return(EVENT_SOURCE_CAN_RATE_LIMIT(s->type), -EDOM);
+
+ /* When ratelimiting is configured we'll always reset the rate limit state first and start fresh,
+ * non-ratelimited. */
+ r = event_source_leave_ratelimit(s, /* run_callback */ false);
+ if (r < 0)
+ return r;
+
+ s->rate_limit = (RateLimit) { interval, burst };
+ return 0;
+}
+
+_public_ int sd_event_source_set_ratelimit_expire_callback(sd_event_source *s, sd_event_handler_t callback) {
+ assert_return(s, -EINVAL);
+ assert_return(!event_origin_changed(s->event), -ECHILD);
+
+ s->ratelimit_expire_callback = callback;
+ return 0;
+}
+
+_public_ int sd_event_source_get_ratelimit(sd_event_source *s, uint64_t *ret_interval, unsigned *ret_burst) {
+ assert_return(s, -EINVAL);
+ assert_return(!event_origin_changed(s->event), -ECHILD);
+
+ /* Querying whether an event source has ratelimiting configured is not a loggable offense, hence
+ * don't use assert_return(). Unlike turning on ratelimiting it's not really a programming error. */
+ if (!EVENT_SOURCE_CAN_RATE_LIMIT(s->type))
+ return -EDOM;
+
+ if (!ratelimit_configured(&s->rate_limit))
+ return -ENOEXEC;
+
+ if (ret_interval)
+ *ret_interval = s->rate_limit.interval;
+ if (ret_burst)
+ *ret_burst = s->rate_limit.burst;
+
+ return 0;
+}
+
+_public_ int sd_event_source_is_ratelimited(sd_event_source *s) {
+ assert_return(s, -EINVAL);
+ assert_return(!event_origin_changed(s->event), -ECHILD);
+
+ if (!EVENT_SOURCE_CAN_RATE_LIMIT(s->type))
+ return false;
+
+ if (!ratelimit_configured(&s->rate_limit))
+ return false;
+
+ return s->ratelimited;
+}
+
+_public_ int sd_event_source_leave_ratelimit(sd_event_source *s) {
+ int r;
+
+ assert_return(s, -EINVAL);
+
+ if (!EVENT_SOURCE_CAN_RATE_LIMIT(s->type))
+ return 0;
+
+ if (!ratelimit_configured(&s->rate_limit))
+ return 0;
+
+ if (!s->ratelimited)
+ return 0;
+
+ r = event_source_leave_ratelimit(s, /* run_callback */ false);
+ if (r < 0)
+ return r;
+
+ return 1; /* tell caller that we indeed just left the ratelimit state */
+}
+
+_public_ int sd_event_set_signal_exit(sd_event *e, int b) {
+ bool change = false;
+ int r;
+
+ assert_return(e, -EINVAL);
+
+ if (b) {
+ /* We want to maintain pointers to these event sources, so that we can destroy them when told
+ * so. But we also don't want them to pin the event loop itself. Hence we mark them as
+ * floating after creation (and undo this before deleting them again). */
+
+ if (!e->sigint_event_source) {
+ r = sd_event_add_signal(e, &e->sigint_event_source, SIGINT | SD_EVENT_SIGNAL_PROCMASK, NULL, NULL);
+ if (r < 0)
+ return r;
+
+ assert(sd_event_source_set_floating(e->sigint_event_source, true) >= 0);
+ change = true;
+ }
+
+ if (!e->sigterm_event_source) {
+ r = sd_event_add_signal(e, &e->sigterm_event_source, SIGTERM | SD_EVENT_SIGNAL_PROCMASK, NULL, NULL);
+ if (r < 0) {
+ if (change) {
+ assert(sd_event_source_set_floating(e->sigint_event_source, false) >= 0);
+ e->sigint_event_source = sd_event_source_unref(e->sigint_event_source);
+ }
+
+ return r;
+ }
+
+ assert(sd_event_source_set_floating(e->sigterm_event_source, true) >= 0);
+ change = true;
+ }
+
+ } else {
+ if (e->sigint_event_source) {
+ assert(sd_event_source_set_floating(e->sigint_event_source, false) >= 0);
+ e->sigint_event_source = sd_event_source_unref(e->sigint_event_source);
+ change = true;
+ }
+
+ if (e->sigterm_event_source) {
+ assert(sd_event_source_set_floating(e->sigterm_event_source, false) >= 0);
+ e->sigterm_event_source = sd_event_source_unref(e->sigterm_event_source);
+ change = true;
+ }
+ }
+
+ return change;
+}
+
+_public_ int sd_event_source_set_memory_pressure_type(sd_event_source *s, const char *ty) {
+ _cleanup_free_ char *b = NULL;
+ _cleanup_free_ void *w = NULL;
+
+ assert_return(s, -EINVAL);
+ assert_return(s->type == SOURCE_MEMORY_PRESSURE, -EDOM);
+ assert_return(ty, -EINVAL);
+ assert_return(!event_origin_changed(s->event), -ECHILD);
+
+ if (!STR_IN_SET(ty, "some", "full"))
+ return -EINVAL;
+
+ if (s->memory_pressure.locked) /* Refuse adjusting parameters, if caller told us how to watch for events */
+ return -EBUSY;
+
+ char* space = memchr(s->memory_pressure.write_buffer, ' ', s->memory_pressure.write_buffer_size);
+ if (!space)
+ return -EINVAL;
+
+ size_t l = (char*) space - (char*) s->memory_pressure.write_buffer;
+ b = memdup_suffix0(s->memory_pressure.write_buffer, l);
+ if (!b)
+ return -ENOMEM;
+ if (!STR_IN_SET(b, "some", "full"))
+ return -EINVAL;
+
+ if (streq(b, ty))
+ return 0;
+
+ size_t nl = strlen(ty) + (s->memory_pressure.write_buffer_size - l);
+ w = new(char, nl);
+ if (!w)
+ return -ENOMEM;
+
+ memcpy(stpcpy(w, ty), space, (s->memory_pressure.write_buffer_size - l));
+
+ free_and_replace(s->memory_pressure.write_buffer, w);
+ s->memory_pressure.write_buffer_size = nl;
+ s->memory_pressure.locked = false;
+
+ return 1;
+}
+
+_public_ int sd_event_source_set_memory_pressure_period(sd_event_source *s, uint64_t threshold_usec, uint64_t window_usec) {
+ _cleanup_free_ char *b = NULL;
+ _cleanup_free_ void *w = NULL;
+
+ assert_return(s, -EINVAL);
+ assert_return(s->type == SOURCE_MEMORY_PRESSURE, -EDOM);
+ assert_return(!event_origin_changed(s->event), -ECHILD);
+
+ if (threshold_usec <= 0 || threshold_usec >= UINT64_MAX)
+ return -ERANGE;
+ if (window_usec <= 0 || window_usec >= UINT64_MAX)
+ return -ERANGE;
+ if (threshold_usec > window_usec)
+ return -EINVAL;
+
+ if (s->memory_pressure.locked) /* Refuse adjusting parameters, if caller told us how to watch for events */
+ return -EBUSY;
+
+ char* space = memchr(s->memory_pressure.write_buffer, ' ', s->memory_pressure.write_buffer_size);
+ if (!space)
+ return -EINVAL;
+
+ size_t l = (char*) space - (char*) s->memory_pressure.write_buffer;
+ b = memdup_suffix0(s->memory_pressure.write_buffer, l);
+ if (!b)
+ return -ENOMEM;
+ if (!STR_IN_SET(b, "some", "full"))
+ return -EINVAL;
+
+ if (asprintf((char**) &w,
+ "%s " USEC_FMT " " USEC_FMT "",
+ b,
+ threshold_usec,
+ window_usec) < 0)
+ return -EINVAL;
+
+ l = strlen(w) + 1;
+ if (memcmp_nn(s->memory_pressure.write_buffer, s->memory_pressure.write_buffer_size, w, l) == 0)
+ return 0;
+
+ free_and_replace(s->memory_pressure.write_buffer, w);
+ s->memory_pressure.write_buffer_size = l;
+ s->memory_pressure.locked = false;
+
+ return 1;
+}
diff --git a/src/libsystemd/sd-event/test-event.c b/src/libsystemd/sd-event/test-event.c
new file mode 100644
index 0000000..63d3ee7
--- /dev/null
+++ b/src/libsystemd/sd-event/test-event.c
@@ -0,0 +1,902 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "sd-event.h"
+
+#include "alloc-util.h"
+#include "exec-util.h"
+#include "fd-util.h"
+#include "fs-util.h"
+#include "log.h"
+#include "macro.h"
+#include "missing_syscall.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "process-util.h"
+#include "random-util.h"
+#include "rm-rf.h"
+#include "signal-util.h"
+#include "stdio-util.h"
+#include "string-util.h"
+#include "tests.h"
+#include "tmpfile-util.h"
+
+static int prepare_handler(sd_event_source *s, void *userdata) {
+ log_info("preparing %c", PTR_TO_INT(userdata));
+ return 1;
+}
+
+static bool got_a, got_b, got_c, got_unref;
+static unsigned got_d;
+
+static int unref_handler(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
+ sd_event_source_unref(s);
+ got_unref = true;
+ return 0;
+}
+
+static int io_handler(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
+
+ log_info("got IO on %c", PTR_TO_INT(userdata));
+
+ if (userdata == INT_TO_PTR('a')) {
+ assert_se(sd_event_source_set_enabled(s, SD_EVENT_OFF) >= 0);
+ assert_se(!got_a);
+ got_a = true;
+ } else if (userdata == INT_TO_PTR('b')) {
+ assert_se(!got_b);
+ got_b = true;
+ } else if (userdata == INT_TO_PTR('d')) {
+ got_d++;
+ if (got_d < 2)
+ assert_se(sd_event_source_set_enabled(s, SD_EVENT_ONESHOT) >= 0);
+ else
+ assert_se(sd_event_source_set_enabled(s, SD_EVENT_OFF) >= 0);
+ } else
+ assert_not_reached();
+
+ return 1;
+}
+
+static int child_handler(sd_event_source *s, const siginfo_t *si, void *userdata) {
+
+ assert_se(s);
+ assert_se(si);
+
+ assert_se(si->si_uid == getuid());
+ assert_se(si->si_signo == SIGCHLD);
+ assert_se(si->si_code == CLD_EXITED);
+ assert_se(si->si_status == 78);
+
+ log_info("got child on %c", PTR_TO_INT(userdata));
+
+ assert_se(userdata == INT_TO_PTR('f'));
+
+ assert_se(sd_event_exit(sd_event_source_get_event(s), 0) >= 0);
+ sd_event_source_unref(s);
+
+ return 1;
+}
+
+static int signal_handler(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) {
+ sd_event_source *p = NULL;
+ pid_t pid;
+ siginfo_t plain_si;
+
+ assert_se(s);
+ assert_se(si);
+
+ log_info("got signal on %c", PTR_TO_INT(userdata));
+
+ assert_se(userdata == INT_TO_PTR('e'));
+
+ assert_se(sigprocmask_many(SIG_BLOCK, NULL, SIGCHLD, SIGUSR2, -1) >= 0);
+
+ pid = fork();
+ assert_se(pid >= 0);
+
+ if (pid == 0) {
+ sigset_t ss;
+
+ assert_se(sigemptyset(&ss) >= 0);
+ assert_se(sigaddset(&ss, SIGUSR2) >= 0);
+
+ zero(plain_si);
+ assert_se(sigwaitinfo(&ss, &plain_si) >= 0);
+
+ assert_se(plain_si.si_signo == SIGUSR2);
+ assert_se(plain_si.si_value.sival_int == 4711);
+
+ _exit(78);
+ }
+
+ assert_se(sd_event_add_child(sd_event_source_get_event(s), &p, pid, WEXITED, child_handler, INT_TO_PTR('f')) >= 0);
+ assert_se(sd_event_source_set_enabled(p, SD_EVENT_ONESHOT) >= 0);
+ assert_se(sd_event_source_set_child_process_own(p, true) >= 0);
+
+ /* We can't use structured initialization here, since the structure contains various unions and these
+ * fields lie in overlapping (carefully aligned) unions that LLVM is allergic to allow assignments
+ * to */
+ zero(plain_si);
+ plain_si.si_signo = SIGUSR2;
+ plain_si.si_code = SI_QUEUE;
+ plain_si.si_pid = getpid_cached();
+ plain_si.si_uid = getuid();
+ plain_si.si_value.sival_int = 4711;
+
+ assert_se(sd_event_source_send_child_signal(p, SIGUSR2, &plain_si, 0) >= 0);
+
+ sd_event_source_unref(s);
+
+ return 1;
+}
+
+static int defer_handler(sd_event_source *s, void *userdata) {
+ sd_event_source *p = NULL;
+
+ assert_se(s);
+
+ log_info("got defer on %c", PTR_TO_INT(userdata));
+
+ assert_se(userdata == INT_TO_PTR('d'));
+
+ assert_se(sigprocmask_many(SIG_BLOCK, NULL, SIGUSR1, -1) >= 0);
+
+ assert_se(sd_event_add_signal(sd_event_source_get_event(s), &p, SIGUSR1, signal_handler, INT_TO_PTR('e')) >= 0);
+ assert_se(sd_event_source_set_enabled(p, SD_EVENT_ONESHOT) >= 0);
+ raise(SIGUSR1);
+
+ sd_event_source_unref(s);
+
+ return 1;
+}
+
+static bool do_quit;
+
+static int time_handler(sd_event_source *s, uint64_t usec, void *userdata) {
+ log_info("got timer on %c", PTR_TO_INT(userdata));
+
+ if (userdata == INT_TO_PTR('c')) {
+
+ if (do_quit) {
+ sd_event_source *p;
+
+ assert_se(sd_event_add_defer(sd_event_source_get_event(s), &p, defer_handler, INT_TO_PTR('d')) >= 0);
+ assert_se(sd_event_source_set_enabled(p, SD_EVENT_ONESHOT) >= 0);
+ } else {
+ assert_se(!got_c);
+ got_c = true;
+ }
+ } else
+ assert_not_reached();
+
+ return 2;
+}
+
+static bool got_exit = false;
+
+static int exit_handler(sd_event_source *s, void *userdata) {
+ log_info("got quit handler on %c", PTR_TO_INT(userdata));
+
+ got_exit = true;
+
+ return 3;
+}
+
+static bool got_post = false;
+
+static int post_handler(sd_event_source *s, void *userdata) {
+ log_info("got post handler");
+
+ got_post = true;
+
+ return 2;
+}
+
+static void test_basic_one(bool with_pidfd) {
+ sd_event *e = NULL;
+ sd_event_source *w = NULL, *x = NULL, *y = NULL, *z = NULL, *q = NULL, *t = NULL;
+ static const char ch = 'x';
+ int a[2] = EBADF_PAIR, b[2] = EBADF_PAIR,
+ d[2] = EBADF_PAIR, k[2] = EBADF_PAIR;
+ uint64_t event_now;
+ int64_t priority;
+
+ log_info("/* %s(pidfd=%s) */", __func__, yes_no(with_pidfd));
+
+ assert_se(setenv("SYSTEMD_PIDFD", yes_no(with_pidfd), 1) >= 0);
+
+ assert_se(pipe(a) >= 0);
+ assert_se(pipe(b) >= 0);
+ assert_se(pipe(d) >= 0);
+ assert_se(pipe(k) >= 0);
+
+ assert_se(sd_event_default(&e) >= 0);
+ assert_se(sd_event_now(e, CLOCK_MONOTONIC, &event_now) > 0);
+
+ assert_se(sd_event_set_watchdog(e, true) >= 0);
+
+ /* Test whether we cleanly can destroy an io event source from its own handler */
+ got_unref = false;
+ assert_se(sd_event_add_io(e, &t, k[0], EPOLLIN, unref_handler, NULL) >= 0);
+ assert_se(write(k[1], &ch, 1) == 1);
+ assert_se(sd_event_run(e, UINT64_MAX) >= 1);
+ assert_se(got_unref);
+
+ got_a = false, got_b = false, got_c = false, got_d = 0;
+
+ /* Add a oneshot handler, trigger it, reenable it, and trigger it again. */
+ assert_se(sd_event_add_io(e, &w, d[0], EPOLLIN, io_handler, INT_TO_PTR('d')) >= 0);
+ assert_se(sd_event_source_set_enabled(w, SD_EVENT_ONESHOT) >= 0);
+ assert_se(write(d[1], &ch, 1) >= 0);
+ assert_se(sd_event_run(e, UINT64_MAX) >= 1);
+ assert_se(got_d == 1);
+ assert_se(write(d[1], &ch, 1) >= 0);
+ assert_se(sd_event_run(e, UINT64_MAX) >= 1);
+ assert_se(got_d == 2);
+
+ assert_se(sd_event_add_io(e, &x, a[0], EPOLLIN, io_handler, INT_TO_PTR('a')) >= 0);
+ assert_se(sd_event_add_io(e, &y, b[0], EPOLLIN, io_handler, INT_TO_PTR('b')) >= 0);
+
+ do_quit = false;
+ assert_se(sd_event_add_time(e, &z, CLOCK_MONOTONIC, 0, 0, time_handler, INT_TO_PTR('c')) >= 0);
+ assert_se(sd_event_add_exit(e, &q, exit_handler, INT_TO_PTR('g')) >= 0);
+
+ assert_se(sd_event_source_set_priority(x, 99) >= 0);
+ assert_se(sd_event_source_get_priority(x, &priority) >= 0);
+ assert_se(priority == 99);
+ assert_se(sd_event_source_set_enabled(y, SD_EVENT_ONESHOT) >= 0);
+ assert_se(sd_event_source_set_prepare(x, prepare_handler) >= 0);
+ assert_se(sd_event_source_set_priority(z, 50) >= 0);
+ assert_se(sd_event_source_set_enabled(z, SD_EVENT_ONESHOT) >= 0);
+ assert_se(sd_event_source_set_prepare(z, prepare_handler) >= 0);
+
+ /* Test for floating event sources */
+ assert_se(sigprocmask_many(SIG_BLOCK, NULL, SIGRTMIN+1, -1) >= 0);
+ assert_se(sd_event_add_signal(e, NULL, SIGRTMIN+1, NULL, NULL) >= 0);
+
+ assert_se(write(a[1], &ch, 1) >= 0);
+ assert_se(write(b[1], &ch, 1) >= 0);
+
+ assert_se(!got_a && !got_b && !got_c);
+
+ assert_se(sd_event_run(e, UINT64_MAX) >= 1);
+
+ assert_se(!got_a && got_b && !got_c);
+
+ assert_se(sd_event_run(e, UINT64_MAX) >= 1);
+
+ assert_se(!got_a && got_b && got_c);
+
+ assert_se(sd_event_run(e, UINT64_MAX) >= 1);
+
+ assert_se(got_a && got_b && got_c);
+
+ sd_event_source_unref(x);
+ sd_event_source_unref(y);
+
+ do_quit = true;
+ assert_se(sd_event_add_post(e, NULL, post_handler, NULL) >= 0);
+ assert_se(sd_event_now(e, CLOCK_MONOTONIC, &event_now) == 0);
+ assert_se(sd_event_source_set_time(z, event_now + 200 * USEC_PER_MSEC) >= 0);
+ assert_se(sd_event_source_set_enabled(z, SD_EVENT_ONESHOT) >= 0);
+
+ assert_se(sd_event_loop(e) >= 0);
+ assert_se(got_post);
+ assert_se(got_exit);
+
+ sd_event_source_unref(z);
+ sd_event_source_unref(q);
+
+ sd_event_source_unref(w);
+
+ sd_event_unref(e);
+
+ safe_close_pair(a);
+ safe_close_pair(b);
+ safe_close_pair(d);
+ safe_close_pair(k);
+
+ assert_se(unsetenv("SYSTEMD_PIDFD") >= 0);
+}
+
+TEST(basic) {
+ test_basic_one(true); /* test with pidfd */
+ test_basic_one(false); /* test without pidfd */
+}
+
+TEST(sd_event_now) {
+ _cleanup_(sd_event_unrefp) sd_event *e = NULL;
+ uint64_t event_now;
+
+ assert_se(sd_event_new(&e) >= 0);
+ assert_se(sd_event_now(e, CLOCK_MONOTONIC, &event_now) > 0);
+ assert_se(sd_event_now(e, CLOCK_REALTIME, &event_now) > 0);
+ assert_se(sd_event_now(e, CLOCK_REALTIME_ALARM, &event_now) > 0);
+ assert_se(sd_event_now(e, CLOCK_BOOTTIME, &event_now) > 0);
+ assert_se(sd_event_now(e, CLOCK_BOOTTIME_ALARM, &event_now) > 0);
+ assert_se(sd_event_now(e, -1, &event_now) == -EOPNOTSUPP);
+ assert_se(sd_event_now(e, 900 /* arbitrary big number */, &event_now) == -EOPNOTSUPP);
+
+ assert_se(sd_event_run(e, 0) == 0);
+
+ assert_se(sd_event_now(e, CLOCK_MONOTONIC, &event_now) == 0);
+ assert_se(sd_event_now(e, CLOCK_REALTIME, &event_now) == 0);
+ assert_se(sd_event_now(e, CLOCK_REALTIME_ALARM, &event_now) == 0);
+ assert_se(sd_event_now(e, CLOCK_BOOTTIME, &event_now) == 0);
+ assert_se(sd_event_now(e, CLOCK_BOOTTIME_ALARM, &event_now) == 0);
+ assert_se(sd_event_now(e, -1, &event_now) == -EOPNOTSUPP);
+ assert_se(sd_event_now(e, 900 /* arbitrary big number */, &event_now) == -EOPNOTSUPP);
+}
+
+static int last_rtqueue_sigval = 0;
+static int n_rtqueue = 0;
+
+static int rtqueue_handler(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) {
+ last_rtqueue_sigval = si->ssi_int;
+ n_rtqueue++;
+ return 0;
+}
+
+TEST(rtqueue) {
+ sd_event_source *u = NULL, *v = NULL, *s = NULL;
+ sd_event *e = NULL;
+
+ assert_se(sd_event_default(&e) >= 0);
+
+ assert_se(sigprocmask_many(SIG_BLOCK, NULL, SIGRTMIN+2, SIGRTMIN+3, SIGUSR2, -1) >= 0);
+ assert_se(sd_event_add_signal(e, &u, SIGRTMIN+2, rtqueue_handler, NULL) >= 0);
+ assert_se(sd_event_add_signal(e, &v, SIGRTMIN+3, rtqueue_handler, NULL) >= 0);
+ assert_se(sd_event_add_signal(e, &s, SIGUSR2, rtqueue_handler, NULL) >= 0);
+
+ assert_se(sd_event_source_set_priority(v, -10) >= 0);
+
+ assert_se(sigqueue(getpid_cached(), SIGRTMIN+2, (union sigval) { .sival_int = 1 }) >= 0);
+ assert_se(sigqueue(getpid_cached(), SIGRTMIN+3, (union sigval) { .sival_int = 2 }) >= 0);
+ assert_se(sigqueue(getpid_cached(), SIGUSR2, (union sigval) { .sival_int = 3 }) >= 0);
+ assert_se(sigqueue(getpid_cached(), SIGRTMIN+3, (union sigval) { .sival_int = 4 }) >= 0);
+ assert_se(sigqueue(getpid_cached(), SIGUSR2, (union sigval) { .sival_int = 5 }) >= 0);
+
+ assert_se(n_rtqueue == 0);
+ assert_se(last_rtqueue_sigval == 0);
+
+ assert_se(sd_event_run(e, UINT64_MAX) >= 1);
+ assert_se(n_rtqueue == 1);
+ assert_se(last_rtqueue_sigval == 2); /* first SIGRTMIN+3 */
+
+ assert_se(sd_event_run(e, UINT64_MAX) >= 1);
+ assert_se(n_rtqueue == 2);
+ assert_se(last_rtqueue_sigval == 4); /* second SIGRTMIN+3 */
+
+ assert_se(sd_event_run(e, UINT64_MAX) >= 1);
+ assert_se(n_rtqueue == 3);
+ assert_se(last_rtqueue_sigval == 3); /* first SIGUSR2 */
+
+ assert_se(sd_event_run(e, UINT64_MAX) >= 1);
+ assert_se(n_rtqueue == 4);
+ assert_se(last_rtqueue_sigval == 1); /* SIGRTMIN+2 */
+
+ assert_se(sd_event_run(e, 0) == 0); /* the other SIGUSR2 is dropped, because the first one was still queued */
+ assert_se(n_rtqueue == 4);
+ assert_se(last_rtqueue_sigval == 1);
+
+ sd_event_source_unref(u);
+ sd_event_source_unref(v);
+ sd_event_source_unref(s);
+
+ sd_event_unref(e);
+}
+
+#define CREATE_EVENTS_MAX (70000U)
+
+struct inotify_context {
+ bool delete_self_handler_called;
+ unsigned create_called[CREATE_EVENTS_MAX];
+ unsigned create_overflow;
+ unsigned n_create_events;
+};
+
+static void maybe_exit(sd_event_source *s, struct inotify_context *c) {
+ unsigned n;
+
+ assert_se(s);
+ assert_se(c);
+
+ if (!c->delete_self_handler_called)
+ return;
+
+ for (n = 0; n < 3; n++) {
+ unsigned i;
+
+ if (c->create_overflow & (1U << n))
+ continue;
+
+ for (i = 0; i < c->n_create_events; i++)
+ if (!(c->create_called[i] & (1U << n)))
+ return;
+ }
+
+ sd_event_exit(sd_event_source_get_event(s), 0);
+}
+
+static int inotify_handler(sd_event_source *s, const struct inotify_event *ev, void *userdata) {
+ struct inotify_context *c = userdata;
+ const char *description;
+ unsigned bit, n;
+
+ assert_se(sd_event_source_get_description(s, &description) >= 0);
+ assert_se(safe_atou(description, &n) >= 0);
+
+ assert_se(n <= 3);
+ bit = 1U << n;
+
+ if (ev->mask & IN_Q_OVERFLOW) {
+ log_info("inotify-handler <%s>: overflow", description);
+ c->create_overflow |= bit;
+ } else if (ev->mask & IN_CREATE) {
+ if (streq(ev->name, "sub"))
+ log_debug("inotify-handler <%s>: create on %s", description, ev->name);
+ else {
+ unsigned i;
+
+ assert_se(safe_atou(ev->name, &i) >= 0);
+ assert_se(i < c->n_create_events);
+ c->create_called[i] |= bit;
+ }
+ } else if (ev->mask & IN_DELETE) {
+ log_info("inotify-handler <%s>: delete of %s", description, ev->name);
+ assert_se(streq(ev->name, "sub"));
+ } else
+ assert_not_reached();
+
+ maybe_exit(s, c);
+ return 1;
+}
+
+static int delete_self_handler(sd_event_source *s, const struct inotify_event *ev, void *userdata) {
+ struct inotify_context *c = userdata;
+
+ if (ev->mask & IN_Q_OVERFLOW) {
+ log_info("delete-self-handler: overflow");
+ c->delete_self_handler_called = true;
+ } else if (ev->mask & IN_DELETE_SELF) {
+ log_info("delete-self-handler: delete-self");
+ c->delete_self_handler_called = true;
+ } else if (ev->mask & IN_IGNORED) {
+ log_info("delete-self-handler: ignore");
+ } else
+ assert_not_reached();
+
+ maybe_exit(s, c);
+ return 1;
+}
+
+static void test_inotify_one(unsigned n_create_events) {
+ _cleanup_(rm_rf_physical_and_freep) char *p = NULL;
+ sd_event_source *a = NULL, *b = NULL, *c = NULL, *d = NULL;
+ struct inotify_context context = {
+ .n_create_events = n_create_events,
+ };
+ sd_event *e = NULL;
+ const char *q;
+ unsigned i;
+
+ log_info("/* %s(%u) */", __func__, n_create_events);
+
+ assert_se(sd_event_default(&e) >= 0);
+
+ assert_se(mkdtemp_malloc("/tmp/test-inotify-XXXXXX", &p) >= 0);
+
+ assert_se(sd_event_add_inotify(e, &a, p, IN_CREATE|IN_ONLYDIR, inotify_handler, &context) >= 0);
+ assert_se(sd_event_add_inotify(e, &b, p, IN_CREATE|IN_DELETE|IN_DONT_FOLLOW, inotify_handler, &context) >= 0);
+ assert_se(sd_event_source_set_priority(b, SD_EVENT_PRIORITY_IDLE) >= 0);
+ assert_se(sd_event_source_set_priority(b, SD_EVENT_PRIORITY_NORMAL) >= 0);
+ assert_se(sd_event_add_inotify(e, &c, p, IN_CREATE|IN_DELETE|IN_EXCL_UNLINK, inotify_handler, &context) >= 0);
+ assert_se(sd_event_source_set_priority(c, SD_EVENT_PRIORITY_IDLE) >= 0);
+
+ assert_se(sd_event_source_set_description(a, "0") >= 0);
+ assert_se(sd_event_source_set_description(b, "1") >= 0);
+ assert_se(sd_event_source_set_description(c, "2") >= 0);
+
+ q = strjoina(p, "/sub");
+ assert_se(touch(q) >= 0);
+ assert_se(sd_event_add_inotify(e, &d, q, IN_DELETE_SELF, delete_self_handler, &context) >= 0);
+
+ for (i = 0; i < n_create_events; i++) {
+ char buf[DECIMAL_STR_MAX(unsigned)+1];
+ _cleanup_free_ char *z = NULL;
+
+ xsprintf(buf, "%u", i);
+ assert_se(z = path_join(p, buf));
+
+ assert_se(touch(z) >= 0);
+ }
+
+ assert_se(unlink(q) >= 0);
+
+ assert_se(sd_event_loop(e) >= 0);
+
+ sd_event_source_unref(a);
+ sd_event_source_unref(b);
+ sd_event_source_unref(c);
+ sd_event_source_unref(d);
+
+ sd_event_unref(e);
+}
+
+TEST(inotify) {
+ test_inotify_one(100); /* should work without overflow */
+ test_inotify_one(33000); /* should trigger a q overflow */
+}
+
+static int pidfd_handler(sd_event_source *s, const siginfo_t *si, void *userdata) {
+ assert_se(s);
+ assert_se(si);
+
+ assert_se(si->si_uid == getuid());
+ assert_se(si->si_signo == SIGCHLD);
+ assert_se(si->si_code == CLD_EXITED);
+ assert_se(si->si_status == 66);
+
+ log_info("got pidfd on %c", PTR_TO_INT(userdata));
+
+ assert_se(userdata == INT_TO_PTR('p'));
+
+ assert_se(sd_event_exit(sd_event_source_get_event(s), 0) >= 0);
+ sd_event_source_unref(s);
+
+ return 0;
+}
+
+TEST(pidfd) {
+ sd_event_source *s = NULL, *t = NULL;
+ sd_event *e = NULL;
+ int pidfd;
+ pid_t pid, pid2;
+
+ assert_se(sigprocmask_many(SIG_BLOCK, NULL, SIGCHLD, -1) >= 0);
+
+ pid = fork();
+ if (pid == 0)
+ /* child */
+ _exit(66);
+
+ assert_se(pid > 1);
+
+ pidfd = pidfd_open(pid, 0);
+ if (pidfd < 0) {
+ /* No pidfd_open() supported or blocked? */
+ assert_se(ERRNO_IS_NOT_SUPPORTED(errno) || ERRNO_IS_PRIVILEGE(errno));
+ (void) wait_for_terminate(pid, NULL);
+ return;
+ }
+
+ pid2 = fork();
+ if (pid2 == 0)
+ freeze();
+
+ assert_se(pid > 2);
+
+ assert_se(sd_event_default(&e) >= 0);
+ assert_se(sd_event_add_child_pidfd(e, &s, pidfd, WEXITED, pidfd_handler, INT_TO_PTR('p')) >= 0);
+ assert_se(sd_event_source_set_child_pidfd_own(s, true) >= 0);
+
+ /* This one should never trigger, since our second child lives forever */
+ assert_se(sd_event_add_child(e, &t, pid2, WEXITED, pidfd_handler, INT_TO_PTR('q')) >= 0);
+ assert_se(sd_event_source_set_child_process_own(t, true) >= 0);
+
+ assert_se(sd_event_loop(e) >= 0);
+
+ /* Child should still be alive */
+ assert_se(kill(pid2, 0) >= 0);
+
+ t = sd_event_source_unref(t);
+
+ /* Child should now be dead, since we dropped the ref */
+ assert_se(kill(pid2, 0) < 0 && errno == ESRCH);
+
+ sd_event_unref(e);
+}
+
+static int ratelimit_io_handler(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
+ unsigned *c = (unsigned*) userdata;
+ *c += 1;
+ return 0;
+}
+
+static int ratelimit_time_handler(sd_event_source *s, uint64_t usec, void *userdata) {
+ int r;
+
+ r = sd_event_source_set_enabled(s, SD_EVENT_ON);
+ if (r < 0)
+ log_warning_errno(r, "Failed to turn on notify event source: %m");
+
+ r = sd_event_source_set_time(s, usec + 1000);
+ if (r < 0)
+ log_error_errno(r, "Failed to restart watchdog event source: %m");
+
+ unsigned *c = (unsigned*) userdata;
+ *c += 1;
+
+ return 0;
+}
+
+static int expired = -1;
+static int ratelimit_expired(sd_event_source *s, void *userdata) {
+ return ++expired;
+}
+
+TEST(ratelimit) {
+ _cleanup_close_pair_ int p[2] = EBADF_PAIR;
+ _cleanup_(sd_event_unrefp) sd_event *e = NULL;
+ _cleanup_(sd_event_source_unrefp) sd_event_source *s = NULL;
+ uint64_t interval;
+ unsigned count, burst;
+
+ assert_se(sd_event_default(&e) >= 0);
+ assert_se(pipe2(p, O_CLOEXEC|O_NONBLOCK) >= 0);
+
+ assert_se(sd_event_add_io(e, &s, p[0], EPOLLIN, ratelimit_io_handler, &count) >= 0);
+ assert_se(sd_event_source_set_description(s, "test-ratelimit-io") >= 0);
+ assert_se(sd_event_source_set_ratelimit(s, 1 * USEC_PER_SEC, 5) >= 0);
+ assert_se(sd_event_source_get_ratelimit(s, &interval, &burst) >= 0);
+ assert_se(interval == 1 * USEC_PER_SEC && burst == 5);
+
+ assert_se(write(p[1], "1", 1) == 1);
+
+ count = 0;
+ for (unsigned i = 0; i < 10; i++) {
+ log_debug("slow loop iteration %u", i);
+ assert_se(sd_event_run(e, UINT64_MAX) >= 0);
+ assert_se(usleep_safe(250 * USEC_PER_MSEC) >= 0);
+ }
+
+ assert_se(sd_event_source_is_ratelimited(s) == 0);
+ assert_se(count == 10);
+ log_info("ratelimit_io_handler: called %u times, event source not ratelimited", count);
+
+ assert_se(sd_event_source_set_ratelimit(s, 0, 0) >= 0);
+ assert_se(sd_event_source_set_ratelimit(s, 1 * USEC_PER_SEC, 5) >= 0);
+
+ count = 0;
+ for (unsigned i = 0; i < 10; i++) {
+ log_debug("fast event loop iteration %u", i);
+ assert_se(sd_event_run(e, UINT64_MAX) >= 0);
+ assert_se(usleep_safe(10) >= 0);
+ }
+ log_info("ratelimit_io_handler: called %u times, event source got ratelimited", count);
+ assert_se(count < 10);
+
+ s = sd_event_source_unref(s);
+ safe_close_pair(p);
+
+ count = 0;
+ assert_se(sd_event_add_time_relative(e, &s, CLOCK_MONOTONIC, 1000, 1, ratelimit_time_handler, &count) >= 0);
+ assert_se(sd_event_source_set_ratelimit(s, 1 * USEC_PER_SEC, 10) == 0);
+
+ do {
+ assert_se(sd_event_run(e, UINT64_MAX) >= 0);
+ } while (!sd_event_source_is_ratelimited(s));
+
+ log_info("ratelimit_time_handler: called %u times, event source got ratelimited", count);
+ assert_se(count == 10);
+
+ /* In order to get rid of active rate limit client needs to disable it explicitly */
+ assert_se(sd_event_source_set_ratelimit(s, 0, 0) >= 0);
+ assert_se(!sd_event_source_is_ratelimited(s));
+
+ assert_se(sd_event_source_set_ratelimit(s, 1 * USEC_PER_SEC, 10) >= 0);
+
+ /* Set callback that will be invoked when we leave rate limited state. */
+ assert_se(sd_event_source_set_ratelimit_expire_callback(s, ratelimit_expired) >= 0);
+
+ do {
+ assert_se(sd_event_run(e, UINT64_MAX) >= 0);
+ } while (!sd_event_source_is_ratelimited(s));
+
+ log_info("ratelimit_time_handler: called 10 more times, event source got ratelimited");
+ assert_se(count == 20);
+
+ /* Dispatch the event loop once more and check that ratelimit expiration callback got called */
+ assert_se(sd_event_run(e, UINT64_MAX) >= 0);
+ assert_se(expired == 0);
+}
+
+TEST(simple_timeout) {
+ _cleanup_(sd_event_unrefp) sd_event *e = NULL;
+ usec_t f, t, some_time;
+
+ some_time = random_u64_range(2 * USEC_PER_SEC);
+
+ assert_se(sd_event_default(&e) >= 0);
+
+ assert_se(sd_event_prepare(e) == 0);
+
+ f = now(CLOCK_MONOTONIC);
+ assert_se(sd_event_wait(e, some_time) >= 0);
+ t = now(CLOCK_MONOTONIC);
+
+ /* The event loop may sleep longer than the specified time (timer accuracy, scheduling latencies, …),
+ * but never shorter. Let's check that. */
+ assert_se(t >= usec_add(f, some_time));
+}
+
+static int inotify_self_destroy_handler(sd_event_source *s, const struct inotify_event *ev, void *userdata) {
+ sd_event_source **p = userdata;
+
+ assert_se(ev);
+ assert_se(p);
+ assert_se(*p == s);
+
+ assert_se(FLAGS_SET(ev->mask, IN_ATTRIB));
+
+ assert_se(sd_event_exit(sd_event_source_get_event(s), 0) >= 0);
+
+ *p = sd_event_source_unref(*p); /* here's what we actually intend to test: we destroy the event
+ * source from inside the event source handler */
+ return 1;
+}
+
+TEST(inotify_self_destroy) {
+ _cleanup_(sd_event_source_unrefp) sd_event_source *s = NULL;
+ _cleanup_(sd_event_unrefp) sd_event *e = NULL;
+ char path[] = "/tmp/inotifyXXXXXX";
+ _cleanup_close_ int fd = -EBADF;
+
+ /* Tests that destroying an inotify event source from its own handler is safe */
+
+ assert_se(sd_event_default(&e) >= 0);
+
+ fd = mkostemp_safe(path);
+ assert_se(fd >= 0);
+ assert_se(sd_event_add_inotify_fd(e, &s, fd, IN_ATTRIB, inotify_self_destroy_handler, &s) >= 0);
+ fd = safe_close(fd);
+ assert_se(unlink(path) >= 0); /* This will trigger IN_ATTRIB because link count goes to zero */
+ assert_se(sd_event_loop(e) >= 0);
+}
+
+struct inotify_process_buffered_data_context {
+ const char *path[2];
+ unsigned i;
+};
+
+static int inotify_process_buffered_data_handler(sd_event_source *s, const struct inotify_event *ev, void *userdata) {
+ struct inotify_process_buffered_data_context *c = ASSERT_PTR(userdata);
+ const char *description;
+
+ assert_se(sd_event_source_get_description(s, &description) >= 0);
+
+ assert_se(c->i < 2);
+ assert_se(streq(c->path[c->i], description));
+ c->i++;
+
+ return 1;
+}
+
+TEST(inotify_process_buffered_data) {
+ _cleanup_(rm_rf_physical_and_freep) char *p = NULL, *q = NULL;
+ _cleanup_(sd_event_source_unrefp) sd_event_source *a = NULL, *b = NULL;
+ _cleanup_(sd_event_unrefp) sd_event *e = NULL;
+ _cleanup_free_ char *z = NULL;
+
+ /* For issue #23826 */
+
+ assert_se(sd_event_default(&e) >= 0);
+
+ assert_se(mkdtemp_malloc("/tmp/test-inotify-XXXXXX", &p) >= 0);
+ assert_se(mkdtemp_malloc("/tmp/test-inotify-XXXXXX", &q) >= 0);
+
+ struct inotify_process_buffered_data_context context = {
+ .path = { p, q },
+ };
+
+ assert_se(sd_event_add_inotify(e, &a, p, IN_CREATE, inotify_process_buffered_data_handler, &context) >= 0);
+ assert_se(sd_event_add_inotify(e, &b, q, IN_CREATE, inotify_process_buffered_data_handler, &context) >= 0);
+
+ assert_se(z = path_join(p, "aaa"));
+ assert_se(touch(z) >= 0);
+ z = mfree(z);
+ assert_se(z = path_join(q, "bbb"));
+ assert_se(touch(z) >= 0);
+ z = mfree(z);
+
+ assert_se(sd_event_run(e, 10 * USEC_PER_SEC) > 0);
+ assert_se(sd_event_prepare(e) > 0); /* issue #23826: this was 0. */
+ assert_se(sd_event_dispatch(e) > 0);
+ assert_se(sd_event_prepare(e) == 0);
+ assert_se(sd_event_wait(e, 0) == 0);
+}
+
+TEST(fork) {
+ _cleanup_(sd_event_unrefp) sd_event *e = NULL;
+ int r;
+
+ assert_se(sd_event_default(&e) >= 0);
+ assert_se(sd_event_prepare(e) == 0);
+
+ /* Check that after a fork the cleanup functions return NULL */
+ r = safe_fork("(bus-fork-test)", FORK_WAIT|FORK_LOG, NULL);
+ if (r == 0) {
+ assert_se(e);
+ assert_se(sd_event_ref(e) == NULL);
+ assert_se(sd_event_unref(e) == NULL);
+ _exit(EXIT_SUCCESS);
+ }
+
+ assert_se(r >= 0);
+}
+
+static int hup_callback(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
+ unsigned *c = userdata;
+
+ assert_se(revents == EPOLLHUP);
+
+ (*c)++;
+ return 0;
+}
+
+TEST(leave_ratelimit) {
+ bool expect_ratelimit = false, manually_left_ratelimit = false;
+ _cleanup_(sd_event_source_unrefp) sd_event_source *s = NULL;
+ _cleanup_(sd_event_unrefp) sd_event *e = NULL;
+ _cleanup_close_pair_ int pfd[2] = EBADF_PAIR;
+ unsigned c = 0;
+ int r;
+
+ assert_se(sd_event_default(&e) >= 0);
+
+ /* Create an event source that will continuously fire by creating a pipe whose write side is closed,
+ * and which hence will only see EOF and constant EPOLLHUP */
+ assert_se(pipe2(pfd, O_CLOEXEC) >= 0);
+ assert_se(sd_event_add_io(e, &s, pfd[0], EPOLLIN, hup_callback, &c) >= 0);
+ assert_se(sd_event_source_set_io_fd_own(s, true) >= 0);
+ assert_se(sd_event_source_set_ratelimit(s, 5*USEC_PER_MINUTE, 5) >= 0);
+
+ pfd[0] = -EBADF;
+ pfd[1] = safe_close(pfd[1]); /* Trigger continuous EOF */
+
+ for (;;) {
+ r = sd_event_prepare(e);
+ assert_se(r >= 0);
+
+ if (r == 0) {
+ r = sd_event_wait(e, UINT64_MAX);
+ assert_se(r > 0);
+ }
+
+ r = sd_event_dispatch(e);
+ assert_se(r > 0);
+
+ r = sd_event_source_is_ratelimited(s);
+ assert_se(r >= 0);
+
+ if (c < 5)
+ /* First four dispatches should just work */
+ assert_se(!r);
+ else if (c == 5) {
+ /* The fifth dispatch should still work, but we now expect the ratelimit to be hit subsequently */
+ if (!expect_ratelimit) {
+ assert_se(!r);
+ assert_se(sd_event_source_leave_ratelimit(s) == 0); /* this should be a NOP, and return 0 hence */
+ expect_ratelimit = true;
+ } else {
+ /* We expected the ratelimit, let's leave it manually, and verify it */
+ assert_se(r);
+ assert_se(sd_event_source_leave_ratelimit(s) > 0); /* we are ratelimited, hence should return > 0 */
+ assert_se(sd_event_source_is_ratelimited(s) == 0);
+
+ manually_left_ratelimit = true;
+ }
+
+ } else if (c == 6)
+ /* On the sixth iteration let's just exit */
+ break;
+ }
+
+ /* Verify we definitely hit the ratelimit and left it manually again */
+ assert_se(manually_left_ratelimit);
+}
+
+DEFINE_TEST_MAIN(LOG_DEBUG);
diff --git a/src/libsystemd/sd-hwdb/hwdb-internal.h b/src/libsystemd/sd-hwdb/hwdb-internal.h
new file mode 100644
index 0000000..9db3b31
--- /dev/null
+++ b/src/libsystemd/sd-hwdb/hwdb-internal.h
@@ -0,0 +1,89 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdint.h>
+#include <sys/stat.h>
+
+#include "constants.h"
+#include "hashmap.h"
+#include "sparse-endian.h"
+
+#define HWDB_SIG { 'K', 'S', 'L', 'P', 'H', 'H', 'R', 'H' }
+
+struct sd_hwdb {
+ unsigned n_ref;
+
+ FILE *f;
+ struct stat st;
+ union {
+ struct trie_header_f *head;
+ const char *map;
+ };
+
+ OrderedHashmap *properties;
+ Iterator properties_iterator;
+ bool properties_modified;
+};
+
+/* on-disk trie objects */
+struct trie_header_f {
+ uint8_t signature[8];
+
+ /* version of tool which created the file */
+ le64_t tool_version;
+ le64_t file_size;
+
+ /* size of structures to allow them to grow */
+ le64_t header_size;
+ le64_t node_size;
+ le64_t child_entry_size;
+ le64_t value_entry_size;
+
+ /* offset of the root trie node */
+ le64_t nodes_root_off;
+
+ /* size of the nodes and string section */
+ le64_t nodes_len;
+ le64_t strings_len;
+} _packed_;
+
+struct trie_node_f {
+ /* prefix of lookup string, shared by all children */
+ le64_t prefix_off;
+ /* size of children entry array appended to the node */
+ uint8_t children_count;
+ uint8_t padding[7];
+ /* size of value entry array appended to the node */
+ le64_t values_count;
+} _packed_;
+
+/* array of child entries, follows directly the node record */
+struct trie_child_entry_f {
+ /* index of the child node */
+ uint8_t c;
+ uint8_t padding[7];
+ /* offset of the child node */
+ le64_t child_off;
+} _packed_;
+
+/* array of value entries, follows directly the node record/child array */
+struct trie_value_entry_f {
+ le64_t key_off;
+ le64_t value_off;
+} _packed_;
+
+/* v2 extends v1 with filename and line-number */
+struct trie_value_entry2_f {
+ le64_t key_off;
+ le64_t value_off;
+ le64_t filename_off;
+ le32_t line_number;
+ le16_t file_priority;
+ le16_t padding;
+} _packed_;
+
+#define hwdb_bin_paths \
+ "/etc/systemd/hwdb/hwdb.bin\0" \
+ "/etc/udev/hwdb.bin\0" \
+ "/usr/lib/systemd/hwdb/hwdb.bin\0" \
+ UDEVLIBEXECDIR "/hwdb.bin\0"
diff --git a/src/libsystemd/sd-hwdb/sd-hwdb.c b/src/libsystemd/sd-hwdb/sd-hwdb.c
new file mode 100644
index 0000000..f163314
--- /dev/null
+++ b/src/libsystemd/sd-hwdb/sd-hwdb.c
@@ -0,0 +1,436 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+/***
+ Copyright © 2008 Alan Jenkins <alan.christopher.jenkins@googlemail.com>
+***/
+
+#include <errno.h>
+#include <fnmatch.h>
+#include <inttypes.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+
+#include "sd-hwdb.h"
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "hashmap.h"
+#include "hwdb-internal.h"
+#include "nulstr-util.h"
+#include "string-util.h"
+#include "time-util.h"
+
+struct linebuf {
+ char bytes[LINE_MAX];
+ size_t size;
+ size_t len;
+};
+
+static void linebuf_init(struct linebuf *buf) {
+ buf->size = 0;
+ buf->len = 0;
+}
+
+static const char *linebuf_get(struct linebuf *buf) {
+ if (buf->len + 1 >= sizeof(buf->bytes))
+ return NULL;
+ buf->bytes[buf->len] = '\0';
+ return buf->bytes;
+}
+
+static bool linebuf_add(struct linebuf *buf, const char *s, size_t len) {
+ if (buf->len + len >= sizeof(buf->bytes))
+ return false;
+ memcpy(buf->bytes + buf->len, s, len);
+ buf->len += len;
+ return true;
+}
+
+static bool linebuf_add_char(struct linebuf *buf, char c) {
+ if (buf->len + 1 >= sizeof(buf->bytes))
+ return false;
+ buf->bytes[buf->len++] = c;
+ return true;
+}
+
+static void linebuf_rem(struct linebuf *buf, size_t count) {
+ assert(buf->len >= count);
+ buf->len -= count;
+}
+
+static void linebuf_rem_char(struct linebuf *buf) {
+ linebuf_rem(buf, 1);
+}
+
+static const struct trie_child_entry_f *trie_node_child(sd_hwdb *hwdb, const struct trie_node_f *node, size_t idx) {
+ const char *base = (const char *)node;
+
+ base += le64toh(hwdb->head->node_size);
+ base += idx * le64toh(hwdb->head->child_entry_size);
+ return (const struct trie_child_entry_f *)base;
+}
+
+static const struct trie_value_entry_f *trie_node_value(sd_hwdb *hwdb, const struct trie_node_f *node, size_t idx) {
+ const char *base = (const char *)node;
+
+ base += le64toh(hwdb->head->node_size);
+ base += node->children_count * le64toh(hwdb->head->child_entry_size);
+ base += idx * le64toh(hwdb->head->value_entry_size);
+ return (const struct trie_value_entry_f *)base;
+}
+
+static const struct trie_node_f *trie_node_from_off(sd_hwdb *hwdb, le64_t off) {
+ return (const struct trie_node_f *)(hwdb->map + le64toh(off));
+}
+
+static const char *trie_string(sd_hwdb *hwdb, le64_t off) {
+ return hwdb->map + le64toh(off);
+}
+
+static int trie_children_cmp_f(const void *v1, const void *v2) {
+ const struct trie_child_entry_f *n1 = v1;
+ const struct trie_child_entry_f *n2 = v2;
+
+ return n1->c - n2->c;
+}
+
+static const struct trie_node_f *node_lookup_f(sd_hwdb *hwdb, const struct trie_node_f *node, uint8_t c) {
+ struct trie_child_entry_f *child;
+ struct trie_child_entry_f search;
+
+ search.c = c;
+ child = bsearch(&search, (const char *)node + le64toh(hwdb->head->node_size), node->children_count,
+ le64toh(hwdb->head->child_entry_size), trie_children_cmp_f);
+ if (child)
+ return trie_node_from_off(hwdb, child->child_off);
+ return NULL;
+}
+
+static int hwdb_add_property(sd_hwdb *hwdb, const struct trie_value_entry_f *entry) {
+ const char *key;
+ int r;
+
+ assert(hwdb);
+
+ key = trie_string(hwdb, entry->key_off);
+
+ /*
+ * Silently ignore all properties which do not start with a
+ * space; future extensions might use additional prefixes.
+ */
+ if (key[0] != ' ')
+ return 0;
+
+ key++;
+
+ if (le64toh(hwdb->head->value_entry_size) >= sizeof(struct trie_value_entry2_f)) {
+ const struct trie_value_entry2_f *old, *entry2;
+
+ entry2 = (const struct trie_value_entry2_f *)entry;
+ old = ordered_hashmap_get(hwdb->properties, key);
+ if (old) {
+ /* On duplicates, we order by filename priority and line-number.
+ *
+ * v2 of the format had 64 bits for the line number.
+ * v3 reuses top 32 bits of line_number to store the priority.
+ * We check the top bits — if they are zero we have v2 format.
+ * This means that v2 clients will print wrong line numbers with
+ * v3 data.
+ *
+ * For v3 data: we compare the priority (of the source file)
+ * and the line number.
+ *
+ * For v2 data: we rely on the fact that the filenames in the hwdb
+ * are added in the order of priority (higher later), because they
+ * are *processed* in the order of priority. So we compare the
+ * indices to determine which file had higher priority. Comparing
+ * the strings alphabetically would be useless, because those are
+ * full paths, and e.g. /usr/lib would sort after /etc, even
+ * though it has lower priority. This is not reliable because of
+ * suffix compression, but should work for the most common case of
+ * /usr/lib/udev/hwbd.d and /etc/udev/hwdb.d, and is better than
+ * not doing the comparison at all.
+ */
+ bool lower;
+
+ if (entry2->file_priority == 0)
+ lower = entry2->filename_off < old->filename_off ||
+ (entry2->filename_off == old->filename_off && entry2->line_number < old->line_number);
+ else
+ lower = entry2->file_priority < old->file_priority ||
+ (entry2->file_priority == old->file_priority && entry2->line_number < old->line_number);
+ if (lower)
+ return 0;
+ }
+ }
+
+ r = ordered_hashmap_ensure_allocated(&hwdb->properties, &string_hash_ops);
+ if (r < 0)
+ return r;
+
+ r = ordered_hashmap_replace(hwdb->properties, key, (void *)entry);
+ if (r < 0)
+ return r;
+
+ hwdb->properties_modified = true;
+
+ return 0;
+}
+
+static int trie_fnmatch_f(sd_hwdb *hwdb, const struct trie_node_f *node, size_t p,
+ struct linebuf *buf, const char *search) {
+ size_t len;
+ size_t i;
+ const char *prefix;
+ int err;
+
+ prefix = trie_string(hwdb, node->prefix_off);
+ len = strlen(prefix + p);
+ linebuf_add(buf, prefix + p, len);
+
+ for (i = 0; i < node->children_count; i++) {
+ const struct trie_child_entry_f *child = trie_node_child(hwdb, node, i);
+
+ linebuf_add_char(buf, child->c);
+ err = trie_fnmatch_f(hwdb, trie_node_from_off(hwdb, child->child_off), 0, buf, search);
+ if (err < 0)
+ return err;
+ linebuf_rem_char(buf);
+ }
+
+ if (le64toh(node->values_count) && fnmatch(linebuf_get(buf), search, 0) == 0)
+ for (i = 0; i < le64toh(node->values_count); i++) {
+ err = hwdb_add_property(hwdb, trie_node_value(hwdb, node, i));
+ if (err < 0)
+ return err;
+ }
+
+ linebuf_rem(buf, len);
+ return 0;
+}
+
+static int trie_search_f(sd_hwdb *hwdb, const char *search) {
+ struct linebuf buf;
+ const struct trie_node_f *node;
+ size_t i = 0;
+ int err;
+
+ linebuf_init(&buf);
+
+ node = trie_node_from_off(hwdb, hwdb->head->nodes_root_off);
+ while (node) {
+ const struct trie_node_f *child;
+ size_t p = 0;
+
+ if (node->prefix_off) {
+ char c;
+
+ for (; (c = trie_string(hwdb, node->prefix_off)[p]); p++) {
+ if (IN_SET(c, '*', '?', '['))
+ return trie_fnmatch_f(hwdb, node, p, &buf, search + i + p);
+ if (c != search[i + p])
+ return 0;
+ }
+ i += p;
+ }
+
+ child = node_lookup_f(hwdb, node, '*');
+ if (child) {
+ linebuf_add_char(&buf, '*');
+ err = trie_fnmatch_f(hwdb, child, 0, &buf, search + i);
+ if (err < 0)
+ return err;
+ linebuf_rem_char(&buf);
+ }
+
+ child = node_lookup_f(hwdb, node, '?');
+ if (child) {
+ linebuf_add_char(&buf, '?');
+ err = trie_fnmatch_f(hwdb, child, 0, &buf, search + i);
+ if (err < 0)
+ return err;
+ linebuf_rem_char(&buf);
+ }
+
+ child = node_lookup_f(hwdb, node, '[');
+ if (child) {
+ linebuf_add_char(&buf, '[');
+ err = trie_fnmatch_f(hwdb, child, 0, &buf, search + i);
+ if (err < 0)
+ return err;
+ linebuf_rem_char(&buf);
+ }
+
+ if (search[i] == '\0') {
+ size_t n;
+
+ for (n = 0; n < le64toh(node->values_count); n++) {
+ err = hwdb_add_property(hwdb, trie_node_value(hwdb, node, n));
+ if (err < 0)
+ return err;
+ }
+ return 0;
+ }
+
+ child = node_lookup_f(hwdb, node, search[i]);
+ node = child;
+ i++;
+ }
+ return 0;
+}
+
+static int hwdb_new(const char *path, sd_hwdb **ret) {
+ _cleanup_(sd_hwdb_unrefp) sd_hwdb *hwdb = NULL;
+ const char sig[] = HWDB_SIG;
+
+ assert_return(ret, -EINVAL);
+
+ hwdb = new0(sd_hwdb, 1);
+ if (!hwdb)
+ return -ENOMEM;
+
+ hwdb->n_ref = 1;
+
+ /* Find hwdb.bin in the explicit path if provided, or iterate over hwdb_bin_paths otherwise */
+ if (!isempty(path)) {
+ log_debug("Trying to open \"%s\"...", path);
+ hwdb->f = fopen(path, "re");
+ if (!hwdb->f)
+ return log_debug_errno(errno, "Failed to open %s: %m", path);
+ } else {
+ NULSTR_FOREACH(p, hwdb_bin_paths) {
+ log_debug("Trying to open \"%s\"...", p);
+ hwdb->f = fopen(p, "re");
+ if (hwdb->f) {
+ path = p;
+ break;
+ }
+ if (errno != ENOENT)
+ return log_debug_errno(errno, "Failed to open %s: %m", p);
+ }
+
+ if (!hwdb->f)
+ return log_debug_errno(SYNTHETIC_ERRNO(ENOENT),
+ "hwdb.bin does not exist, please run 'systemd-hwdb update'");
+ }
+
+ if (fstat(fileno(hwdb->f), &hwdb->st) < 0)
+ return log_debug_errno(errno, "Failed to stat %s: %m", path);
+ if (hwdb->st.st_size < (off_t) offsetof(struct trie_header_f, strings_len) + 8)
+ return log_debug_errno(SYNTHETIC_ERRNO(EIO), "File %s is too short: %m", path);
+ if (file_offset_beyond_memory_size(hwdb->st.st_size))
+ return log_debug_errno(SYNTHETIC_ERRNO(EFBIG), "File %s is too long: %m", path);
+
+ hwdb->map = mmap(0, hwdb->st.st_size, PROT_READ, MAP_SHARED, fileno(hwdb->f), 0);
+ if (hwdb->map == MAP_FAILED)
+ return log_debug_errno(errno, "Failed to map %s: %m", path);
+
+ if (memcmp(hwdb->map, sig, sizeof(hwdb->head->signature)) != 0 ||
+ (size_t) hwdb->st.st_size != le64toh(hwdb->head->file_size))
+ return log_debug_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Failed to recognize the format of %s", path);
+
+ log_debug("=== trie on-disk ===");
+ log_debug("tool version: %"PRIu64, le64toh(hwdb->head->tool_version));
+ log_debug("file size: %8"PRIi64" bytes", hwdb->st.st_size);
+ log_debug("header size %8"PRIu64" bytes", le64toh(hwdb->head->header_size));
+ log_debug("strings %8"PRIu64" bytes", le64toh(hwdb->head->strings_len));
+ log_debug("nodes %8"PRIu64" bytes", le64toh(hwdb->head->nodes_len));
+
+ *ret = TAKE_PTR(hwdb);
+
+ return 0;
+}
+
+_public_ int sd_hwdb_new_from_path(const char *path, sd_hwdb **ret) {
+ assert_return(!isempty(path), -EINVAL);
+
+ return hwdb_new(path, ret);
+}
+
+_public_ int sd_hwdb_new(sd_hwdb **ret) {
+ return hwdb_new(NULL, ret);
+}
+
+static sd_hwdb *hwdb_free(sd_hwdb *hwdb) {
+ assert(hwdb);
+
+ if (hwdb->map)
+ munmap((void *)hwdb->map, hwdb->st.st_size);
+ safe_fclose(hwdb->f);
+ ordered_hashmap_free(hwdb->properties);
+ return mfree(hwdb);
+}
+
+DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(sd_hwdb, sd_hwdb, hwdb_free)
+
+static int properties_prepare(sd_hwdb *hwdb, const char *modalias) {
+ assert(hwdb);
+ assert(modalias);
+
+ ordered_hashmap_clear(hwdb->properties);
+ hwdb->properties_modified = true;
+
+ return trie_search_f(hwdb, modalias);
+}
+
+_public_ int sd_hwdb_get(sd_hwdb *hwdb, const char *modalias, const char *key, const char **_value) {
+ const struct trie_value_entry_f *entry;
+ int r;
+
+ assert_return(hwdb, -EINVAL);
+ assert_return(hwdb->f, -EINVAL);
+ assert_return(modalias, -EINVAL);
+ assert_return(_value, -EINVAL);
+
+ r = properties_prepare(hwdb, modalias);
+ if (r < 0)
+ return r;
+
+ entry = ordered_hashmap_get(hwdb->properties, key);
+ if (!entry)
+ return -ENOENT;
+
+ *_value = trie_string(hwdb, entry->value_off);
+
+ return 0;
+}
+
+_public_ int sd_hwdb_seek(sd_hwdb *hwdb, const char *modalias) {
+ int r;
+
+ assert_return(hwdb, -EINVAL);
+ assert_return(hwdb->f, -EINVAL);
+ assert_return(modalias, -EINVAL);
+
+ r = properties_prepare(hwdb, modalias);
+ if (r < 0)
+ return r;
+
+ hwdb->properties_modified = false;
+ hwdb->properties_iterator = ITERATOR_FIRST;
+
+ return 0;
+}
+
+_public_ int sd_hwdb_enumerate(sd_hwdb *hwdb, const char **key, const char **value) {
+ const struct trie_value_entry_f *entry;
+ const void *k;
+
+ assert_return(hwdb, -EINVAL);
+ assert_return(key, -EINVAL);
+ assert_return(value, -EINVAL);
+
+ if (hwdb->properties_modified)
+ return -EAGAIN;
+
+ if (!ordered_hashmap_iterate(hwdb->properties, &hwdb->properties_iterator, (void **)&entry, &k))
+ return 0;
+
+ *key = k;
+ *value = trie_string(hwdb, entry->value_off);
+
+ return 1;
+}
diff --git a/src/libsystemd/sd-id128/id128-util.c b/src/libsystemd/sd-id128/id128-util.c
new file mode 100644
index 0000000..94bfd70
--- /dev/null
+++ b/src/libsystemd/sd-id128/id128-util.c
@@ -0,0 +1,265 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+#include "fd-util.h"
+#include "fs-util.h"
+#include "hexdecoct.h"
+#include "id128-util.h"
+#include "io-util.h"
+#include "sha256.h"
+#include "stdio-util.h"
+#include "string-util.h"
+#include "sync-util.h"
+#include "virt.h"
+
+int id128_from_string_nonzero(const char *s, sd_id128_t *ret) {
+ sd_id128_t t;
+ int r;
+
+ assert(ret);
+
+ r = sd_id128_from_string(ASSERT_PTR(s), &t);
+ if (r < 0)
+ return r;
+
+ if (sd_id128_is_null(t))
+ return -ENXIO;
+
+ *ret = t;
+ return 0;
+}
+
+bool id128_is_valid(const char *s) {
+ size_t l;
+
+ assert(s);
+
+ l = strlen(s);
+
+ if (l == SD_ID128_STRING_MAX - 1)
+ /* Plain formatted 128-bit hex string */
+ return in_charset(s, HEXDIGITS);
+
+ if (l == SD_ID128_UUID_STRING_MAX - 1) {
+ /* Formatted UUID */
+ for (size_t i = 0; i < l; i++) {
+ char c = s[i];
+
+ if (IN_SET(i, 8, 13, 18, 23)) {
+ if (c != '-')
+ return false;
+ } else if (!ascii_ishex(c))
+ return false;
+ }
+ return true;
+ }
+
+ return false;
+}
+
+int id128_read_fd(int fd, Id128Flag f, sd_id128_t *ret) {
+ char buffer[SD_ID128_UUID_STRING_MAX + 1]; /* +1 is for trailing newline */
+ sd_id128_t id;
+ ssize_t l;
+ int r;
+
+ assert(fd >= 0);
+
+ /* Reads an 128-bit ID from a file, which may either be in plain format (32 hex digits), or in UUID format, both
+ * optionally followed by a newline and nothing else. ID files should really be newline terminated, but if they
+ * aren't that's OK too, following the rule of "Be conservative in what you send, be liberal in what you
+ * accept".
+ *
+ * This returns the following:
+ * -ENOMEDIUM: an empty string,
+ * -ENOPKG: "uninitialized" or "uninitialized\n",
+ * -EUCLEAN: other invalid strings. */
+
+ l = loop_read(fd, buffer, sizeof(buffer), false); /* we expect a short read of either 32/33 or 36/37 chars */
+ if (l < 0)
+ return (int) l;
+ if (l == 0) /* empty? */
+ return -ENOMEDIUM;
+
+ switch (l) {
+
+ case STRLEN("uninitialized"):
+ case STRLEN("uninitialized\n"):
+ return strneq(buffer, "uninitialized\n", l) ? -ENOPKG : -EINVAL;
+
+ case SD_ID128_STRING_MAX: /* plain UUID with trailing newline */
+ if (buffer[SD_ID128_STRING_MAX-1] != '\n')
+ return -EUCLEAN;
+
+ _fallthrough_;
+ case SD_ID128_STRING_MAX-1: /* plain UUID without trailing newline */
+ if (!FLAGS_SET(f, ID128_FORMAT_PLAIN))
+ return -EUCLEAN;
+
+ buffer[SD_ID128_STRING_MAX-1] = 0;
+ break;
+
+ case SD_ID128_UUID_STRING_MAX: /* RFC UUID with trailing newline */
+ if (buffer[SD_ID128_UUID_STRING_MAX-1] != '\n')
+ return -EUCLEAN;
+
+ _fallthrough_;
+ case SD_ID128_UUID_STRING_MAX-1: /* RFC UUID without trailing newline */
+ if (!FLAGS_SET(f, ID128_FORMAT_UUID))
+ return -EUCLEAN;
+
+ buffer[SD_ID128_UUID_STRING_MAX-1] = 0;
+ break;
+
+ default:
+ return -EUCLEAN;
+ }
+
+ r = sd_id128_from_string(buffer, &id);
+ if (r == -EINVAL)
+ return -EUCLEAN;
+ if (r < 0)
+ return r;
+
+ if (FLAGS_SET(f, ID128_REFUSE_NULL) && sd_id128_is_null(id))
+ return -ENOMEDIUM;
+
+ if (ret)
+ *ret = id;
+ return 0;
+}
+
+int id128_read_at(int dir_fd, const char *path, Id128Flag f, sd_id128_t *ret) {
+ _cleanup_close_ int fd = -EBADF;
+
+ assert(dir_fd >= 0 || dir_fd == AT_FDCWD);
+ assert(path);
+
+ fd = xopenat(dir_fd, path, O_RDONLY|O_CLOEXEC|O_NOCTTY, /* xopen_flags = */ 0, /* mode = */ 0);
+ if (fd < 0)
+ return fd;
+
+ return id128_read_fd(fd, f, ret);
+}
+
+int id128_write_fd(int fd, Id128Flag f, sd_id128_t id) {
+ char buffer[SD_ID128_UUID_STRING_MAX + 1]; /* +1 is for trailing newline */
+ size_t sz;
+ int r;
+
+ assert(fd >= 0);
+ assert(IN_SET((f & ID128_FORMAT_ANY), ID128_FORMAT_PLAIN, ID128_FORMAT_UUID));
+
+ if (FLAGS_SET(f, ID128_REFUSE_NULL) && sd_id128_is_null(id))
+ return -ENOMEDIUM;
+
+ if (FLAGS_SET(f, ID128_FORMAT_PLAIN)) {
+ assert_se(sd_id128_to_string(id, buffer));
+ sz = SD_ID128_STRING_MAX;
+ } else {
+ assert_se(sd_id128_to_uuid_string(id, buffer));
+ sz = SD_ID128_UUID_STRING_MAX;
+ }
+
+ buffer[sz - 1] = '\n';
+ r = loop_write(fd, buffer, sz);
+ if (r < 0)
+ return r;
+
+ if (FLAGS_SET(f, ID128_SYNC_ON_WRITE)) {
+ r = fsync_full(fd);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+int id128_write_at(int dir_fd, const char *path, Id128Flag f, sd_id128_t id) {
+ _cleanup_close_ int fd = -EBADF;
+
+ assert(dir_fd >= 0 || dir_fd == AT_FDCWD);
+ assert(path);
+
+ fd = xopenat(dir_fd, path, O_WRONLY|O_CREAT|O_CLOEXEC|O_NOCTTY|O_TRUNC, /* xopen_flags = */ 0, 0444);
+ if (fd < 0)
+ return fd;
+
+ return id128_write_fd(fd, f, id);
+}
+
+void id128_hash_func(const sd_id128_t *p, struct siphash *state) {
+ siphash24_compress(p, sizeof(sd_id128_t), state);
+}
+
+int id128_compare_func(const sd_id128_t *a, const sd_id128_t *b) {
+ return memcmp(a, b, 16);
+}
+
+sd_id128_t id128_make_v4_uuid(sd_id128_t id) {
+ /* Stolen from generate_random_uuid() of drivers/char/random.c
+ * in the kernel sources */
+
+ /* Set UUID version to 4 --- truly random generation */
+ id.bytes[6] = (id.bytes[6] & 0x0F) | 0x40;
+
+ /* Set the UUID variant to DCE */
+ id.bytes[8] = (id.bytes[8] & 0x3F) | 0x80;
+
+ return id;
+}
+
+DEFINE_HASH_OPS(id128_hash_ops, sd_id128_t, id128_hash_func, id128_compare_func);
+DEFINE_HASH_OPS_WITH_KEY_DESTRUCTOR(id128_hash_ops_free, sd_id128_t, id128_hash_func, id128_compare_func, free);
+
+int id128_get_product(sd_id128_t *ret) {
+ sd_id128_t uuid;
+ int r;
+
+ assert(ret);
+
+ /* Reads the systems product UUID from DMI or devicetree (where it is located on POWER). This is
+ * particularly relevant in VM environments, where VM managers typically place a VM uuid there. */
+
+ r = detect_container();
+ if (r < 0)
+ return r;
+ if (r > 0) /* Refuse returning this in containers, as this is not a property of our system then, but
+ * of the host */
+ return -ENOENT;
+
+ r = id128_read("/sys/class/dmi/id/product_uuid", ID128_FORMAT_UUID, &uuid);
+ if (r == -ENOENT)
+ r = id128_read("/proc/device-tree/vm,uuid", ID128_FORMAT_UUID, &uuid);
+ if (r == -ENOENT)
+ r = id128_read("/sys/hypervisor/uuid", ID128_FORMAT_UUID, &uuid);
+ if (r < 0)
+ return r;
+
+ if (sd_id128_is_null(uuid) || sd_id128_is_allf(uuid))
+ return -EADDRNOTAVAIL; /* Recognizable error */
+
+ *ret = uuid;
+ return 0;
+}
+
+sd_id128_t id128_digest(const void *data, size_t size) {
+ assert(data || size == 0);
+
+ /* Hashes a UUID from some arbitrary data */
+
+ if (size == SIZE_MAX)
+ size = strlen(data);
+
+ uint8_t h[SHA256_DIGEST_SIZE];
+ sd_id128_t id;
+
+ /* Take the first half of the SHA256 result */
+ assert_cc(sizeof(h) >= sizeof(id.bytes));
+ memcpy(id.bytes, sha256_direct(data, size, h), sizeof(id.bytes));
+
+ return id128_make_v4_uuid(id);
+}
diff --git a/src/libsystemd/sd-id128/id128-util.h b/src/libsystemd/sd-id128/id128-util.h
new file mode 100644
index 0000000..53ba50a
--- /dev/null
+++ b/src/libsystemd/sd-id128/id128-util.h
@@ -0,0 +1,58 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <fcntl.h>
+#include <stdbool.h>
+
+#include "sd-id128.h"
+
+#include "errno-util.h"
+#include "hash-funcs.h"
+#include "macro.h"
+
+bool id128_is_valid(const char *s) _pure_;
+
+typedef enum Id128Flag {
+ ID128_FORMAT_PLAIN = 1 << 0, /* formatted as 32 hex chars as-is */
+ ID128_FORMAT_UUID = 1 << 1, /* formatted as 36 character uuid string */
+ ID128_FORMAT_ANY = ID128_FORMAT_PLAIN | ID128_FORMAT_UUID,
+
+ ID128_SYNC_ON_WRITE = 1 << 2, /* Sync the file after write. Used only when writing an ID. */
+ ID128_REFUSE_NULL = 1 << 3, /* Refuse all zero ID with -ENOMEDIUM. */
+} Id128Flag;
+
+int id128_from_string_nonzero(const char *s, sd_id128_t *ret);
+
+int id128_read_fd(int fd, Id128Flag f, sd_id128_t *ret);
+int id128_read_at(int dir_fd, const char *path, Id128Flag f, sd_id128_t *ret);
+static inline int id128_read(const char *path, Id128Flag f, sd_id128_t *ret) {
+ return id128_read_at(AT_FDCWD, path, f, ret);
+}
+
+int id128_write_fd(int fd, Id128Flag f, sd_id128_t id);
+int id128_write_at(int dir_fd, const char *path, Id128Flag f, sd_id128_t id);
+static inline int id128_write(const char *path, Id128Flag f, sd_id128_t id) {
+ return id128_write_at(AT_FDCWD, path, f, id);
+}
+
+int id128_get_machine(const char *root, sd_id128_t *ret);
+int id128_get_machine_at(int rfd, sd_id128_t *ret);
+
+void id128_hash_func(const sd_id128_t *p, struct siphash *state);
+int id128_compare_func(const sd_id128_t *a, const sd_id128_t *b) _pure_;
+extern const struct hash_ops id128_hash_ops;
+extern const struct hash_ops id128_hash_ops_free;
+
+sd_id128_t id128_make_v4_uuid(sd_id128_t id);
+
+int id128_get_product(sd_id128_t *ret);
+
+sd_id128_t id128_digest(const void *data, size_t size);
+
+/* A helper to check for the three relevant cases of "machine ID not initialized" */
+#define ERRNO_IS_NEG_MACHINE_ID_UNSET(r) \
+ IN_SET(r, \
+ -ENOENT, \
+ -ENOMEDIUM, \
+ -ENOPKG)
+_DEFINE_ABS_WRAPPER(MACHINE_ID_UNSET);
diff --git a/src/libsystemd/sd-id128/sd-id128.c b/src/libsystemd/sd-id128/sd-id128.c
new file mode 100644
index 0000000..9fda79a
--- /dev/null
+++ b/src/libsystemd/sd-id128/sd-id128.c
@@ -0,0 +1,382 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+#include "sd-id128.h"
+
+#include "alloc-util.h"
+#include "chase.h"
+#include "fd-util.h"
+#include "hexdecoct.h"
+#include "hmac.h"
+#include "id128-util.h"
+#include "io-util.h"
+#include "macro.h"
+#include "missing_syscall.h"
+#include "missing_threads.h"
+#include "path-util.h"
+#include "random-util.h"
+#include "stat-util.h"
+#include "user-util.h"
+
+_public_ char *sd_id128_to_string(sd_id128_t id, char s[_SD_ARRAY_STATIC SD_ID128_STRING_MAX]) {
+ size_t k = 0;
+
+ assert_return(s, NULL);
+
+ for (size_t n = 0; n < sizeof(sd_id128_t); n++) {
+ s[k++] = hexchar(id.bytes[n] >> 4);
+ s[k++] = hexchar(id.bytes[n] & 0xF);
+ }
+
+ assert(k == SD_ID128_STRING_MAX - 1);
+ s[k] = 0;
+
+ return s;
+}
+
+_public_ char *sd_id128_to_uuid_string(sd_id128_t id, char s[_SD_ARRAY_STATIC SD_ID128_UUID_STRING_MAX]) {
+ size_t k = 0;
+
+ assert_return(s, NULL);
+
+ /* Similar to sd_id128_to_string() but formats the result as UUID instead of plain hex chars */
+
+ for (size_t n = 0; n < sizeof(sd_id128_t); n++) {
+
+ if (IN_SET(n, 4, 6, 8, 10))
+ s[k++] = '-';
+
+ s[k++] = hexchar(id.bytes[n] >> 4);
+ s[k++] = hexchar(id.bytes[n] & 0xF);
+ }
+
+ assert(k == SD_ID128_UUID_STRING_MAX - 1);
+ s[k] = 0;
+
+ return s;
+}
+
+_public_ int sd_id128_from_string(const char *s, sd_id128_t *ret) {
+ size_t n, i;
+ sd_id128_t t;
+ bool is_guid = false;
+
+ assert_return(s, -EINVAL);
+
+ for (n = 0, i = 0; n < sizeof(sd_id128_t);) {
+ int a, b;
+
+ if (s[i] == '-') {
+ /* Is this a GUID? Then be nice, and skip over
+ * the dashes */
+
+ if (i == 8)
+ is_guid = true;
+ else if (IN_SET(i, 13, 18, 23)) {
+ if (!is_guid)
+ return -EINVAL;
+ } else
+ return -EINVAL;
+
+ i++;
+ continue;
+ }
+
+ a = unhexchar(s[i++]);
+ if (a < 0)
+ return -EINVAL;
+
+ b = unhexchar(s[i++]);
+ if (b < 0)
+ return -EINVAL;
+
+ t.bytes[n++] = (a << 4) | b;
+ }
+
+ if (i != (is_guid ? SD_ID128_UUID_STRING_MAX : SD_ID128_STRING_MAX) - 1)
+ return -EINVAL;
+
+ if (s[i] != 0)
+ return -EINVAL;
+
+ if (ret)
+ *ret = t;
+ return 0;
+}
+
+_public_ int sd_id128_string_equal(const char *s, sd_id128_t id) {
+ sd_id128_t parsed;
+ int r;
+
+ if (!s)
+ return false;
+
+ /* Checks if the specified string matches a valid string representation of the specified 128 bit ID/uuid */
+
+ r = sd_id128_from_string(s, &parsed);
+ if (r < 0)
+ return r;
+
+ return sd_id128_equal(parsed, id);
+}
+
+_public_ int sd_id128_get_machine(sd_id128_t *ret) {
+ static thread_local sd_id128_t saved_machine_id = {};
+ int r;
+
+ if (sd_id128_is_null(saved_machine_id)) {
+ r = id128_read("/etc/machine-id", ID128_FORMAT_PLAIN | ID128_REFUSE_NULL, &saved_machine_id);
+ if (r < 0)
+ return r;
+ }
+
+ if (ret)
+ *ret = saved_machine_id;
+ return 0;
+}
+
+int id128_get_machine_at(int rfd, sd_id128_t *ret) {
+ _cleanup_close_ int fd = -EBADF;
+ int r;
+
+ assert(rfd >= 0 || rfd == AT_FDCWD);
+
+ r = dir_fd_is_root_or_cwd(rfd);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ return sd_id128_get_machine(ret);
+
+ fd = chase_and_openat(rfd, "/etc/machine-id", CHASE_AT_RESOLVE_IN_ROOT, O_RDONLY|O_CLOEXEC|O_NOCTTY, NULL);
+ if (fd < 0)
+ return fd;
+
+ return id128_read_fd(fd, ID128_FORMAT_PLAIN | ID128_REFUSE_NULL, ret);
+}
+
+int id128_get_machine(const char *root, sd_id128_t *ret) {
+ _cleanup_close_ int fd = -EBADF;
+
+ if (empty_or_root(root))
+ return sd_id128_get_machine(ret);
+
+ fd = chase_and_open("/etc/machine-id", root, CHASE_PREFIX_ROOT, O_RDONLY|O_CLOEXEC|O_NOCTTY, NULL);
+ if (fd < 0)
+ return fd;
+
+ return id128_read_fd(fd, ID128_FORMAT_PLAIN | ID128_REFUSE_NULL, ret);
+}
+
+_public_ int sd_id128_get_boot(sd_id128_t *ret) {
+ static thread_local sd_id128_t saved_boot_id = {};
+ int r;
+
+ if (sd_id128_is_null(saved_boot_id)) {
+ r = id128_read("/proc/sys/kernel/random/boot_id", ID128_FORMAT_UUID | ID128_REFUSE_NULL, &saved_boot_id);
+ if (r == -ENOENT && proc_mounted() == 0)
+ return -ENOSYS;
+ if (r < 0)
+ return r;
+ }
+
+ if (ret)
+ *ret = saved_boot_id;
+ return 0;
+}
+
+static int get_invocation_from_keyring(sd_id128_t *ret) {
+ _cleanup_free_ char *description = NULL;
+ char *d, *p, *g, *u, *e;
+ unsigned long perms;
+ key_serial_t key;
+ size_t sz = 256;
+ uid_t uid;
+ gid_t gid;
+ int r, c;
+
+#define MAX_PERMS ((unsigned long) (KEY_POS_VIEW|KEY_POS_READ|KEY_POS_SEARCH| \
+ KEY_USR_VIEW|KEY_USR_READ|KEY_USR_SEARCH))
+
+ assert(ret);
+
+ key = request_key("user", "invocation_id", NULL, 0);
+ if (key == -1) {
+ /* Keyring support not available? No invocation key stored? */
+ if (IN_SET(errno, ENOSYS, ENOKEY))
+ return -ENXIO;
+
+ return -errno;
+ }
+
+ for (;;) {
+ description = new(char, sz);
+ if (!description)
+ return -ENOMEM;
+
+ c = keyctl(KEYCTL_DESCRIBE, key, (unsigned long) description, sz, 0);
+ if (c < 0)
+ return -errno;
+
+ if ((size_t) c <= sz)
+ break;
+
+ sz = c;
+ free(description);
+ }
+
+ /* The kernel returns a final NUL in the string, verify that. */
+ assert(description[c-1] == 0);
+
+ /* Chop off the final description string */
+ d = strrchr(description, ';');
+ if (!d)
+ return -EUCLEAN;
+ *d = 0;
+
+ /* Look for the permissions */
+ p = strrchr(description, ';');
+ if (!p)
+ return -EUCLEAN;
+
+ errno = 0;
+ perms = strtoul(p + 1, &e, 16);
+ if (errno > 0)
+ return -errno;
+ if (e == p + 1) /* Read at least one character */
+ return -EUCLEAN;
+ if (e != d) /* Must reached the end */
+ return -EUCLEAN;
+
+ if ((perms & ~MAX_PERMS) != 0)
+ return -EPERM;
+
+ *p = 0;
+
+ /* Look for the group ID */
+ g = strrchr(description, ';');
+ if (!g)
+ return -EUCLEAN;
+ r = parse_gid(g + 1, &gid);
+ if (r < 0)
+ return r;
+ if (gid != 0)
+ return -EPERM;
+ *g = 0;
+
+ /* Look for the user ID */
+ u = strrchr(description, ';');
+ if (!u)
+ return -EUCLEAN;
+ r = parse_uid(u + 1, &uid);
+ if (r < 0)
+ return r;
+ if (uid != 0)
+ return -EPERM;
+
+ c = keyctl(KEYCTL_READ, key, (unsigned long) ret, sizeof(sd_id128_t), 0);
+ if (c < 0)
+ return -errno;
+ if (c != sizeof(sd_id128_t))
+ return -EUCLEAN;
+
+ return 0;
+}
+
+static int get_invocation_from_environment(sd_id128_t *ret) {
+ const char *e;
+ int r;
+
+ assert(ret);
+
+ e = secure_getenv("INVOCATION_ID");
+ if (!e)
+ return -ENXIO;
+
+ r = sd_id128_from_string(e, ret);
+ return r == -EINVAL ? -EUCLEAN : r;
+}
+
+_public_ int sd_id128_get_invocation(sd_id128_t *ret) {
+ static thread_local sd_id128_t saved_invocation_id = {};
+ int r;
+
+ if (sd_id128_is_null(saved_invocation_id)) {
+ /* We first check the environment. The environment variable is primarily relevant for user
+ * services, and sufficiently safe as long as no privilege boundary is involved. */
+ r = get_invocation_from_environment(&saved_invocation_id);
+ if (r == -ENXIO)
+ /* The kernel keyring is relevant for system services (as for user services we don't
+ * store the invocation ID in the keyring, as there'd be no trust benefit in that). */
+ r = get_invocation_from_keyring(&saved_invocation_id);
+ if (r < 0)
+ return r;
+
+ if (sd_id128_is_null(saved_invocation_id))
+ return -ENOMEDIUM;
+ }
+
+ if (ret)
+ *ret = saved_invocation_id;
+ return 0;
+}
+
+_public_ int sd_id128_randomize(sd_id128_t *ret) {
+ sd_id128_t t;
+
+ assert_return(ret, -EINVAL);
+
+ random_bytes(&t, sizeof(t));
+
+ /* Turn this into a valid v4 UUID, to be nice. Note that we
+ * only guarantee this for newly generated UUIDs, not for
+ * pre-existing ones. */
+
+ *ret = id128_make_v4_uuid(t);
+ return 0;
+}
+
+_public_ int sd_id128_get_app_specific(sd_id128_t base, sd_id128_t app_id, sd_id128_t *ret) {
+ assert_cc(sizeof(sd_id128_t) < SHA256_DIGEST_SIZE); /* Check that we don't need to pad with zeros. */
+ union {
+ uint8_t hmac[SHA256_DIGEST_SIZE];
+ sd_id128_t result;
+ } buf;
+
+ assert_return(ret, -EINVAL);
+ assert_return(!sd_id128_is_null(app_id), -ENXIO);
+
+ hmac_sha256(&base, sizeof(base), &app_id, sizeof(app_id), buf.hmac);
+
+ /* Take only the first half. */
+ *ret = id128_make_v4_uuid(buf.result);
+ return 0;
+}
+
+_public_ int sd_id128_get_machine_app_specific(sd_id128_t app_id, sd_id128_t *ret) {
+ sd_id128_t id;
+ int r;
+
+ assert_return(ret, -EINVAL);
+
+ r = sd_id128_get_machine(&id);
+ if (r < 0)
+ return r;
+
+ return sd_id128_get_app_specific(id, app_id, ret);
+}
+
+_public_ int sd_id128_get_boot_app_specific(sd_id128_t app_id, sd_id128_t *ret) {
+ sd_id128_t id;
+ int r;
+
+ assert_return(ret, -EINVAL);
+
+ r = sd_id128_get_boot(&id);
+ if (r < 0)
+ return r;
+
+ return sd_id128_get_app_specific(id, app_id, ret);
+}
diff --git a/src/libsystemd/sd-journal/audit-type.c b/src/libsystemd/sd-journal/audit-type.c
new file mode 100644
index 0000000..122cdf5
--- /dev/null
+++ b/src/libsystemd/sd-journal/audit-type.c
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "audit-type.h"
+#include "missing_audit.h"
+
+#include "audit_type-to-name.h"
diff --git a/src/libsystemd/sd-journal/audit-type.h b/src/libsystemd/sd-journal/audit-type.h
new file mode 100644
index 0000000..f2c4898
--- /dev/null
+++ b/src/libsystemd/sd-journal/audit-type.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdio.h>
+
+#include "alloc-util.h"
+#include "macro.h"
+
+const char *audit_type_to_string(int type);
+int audit_type_from_string(const char *s);
+
+/* This is inspired by DNS TYPEnnn formatting */
+#define audit_type_name_alloca(type) \
+ ({ \
+ const char *_s_; \
+ _s_ = audit_type_to_string(type); \
+ if (!_s_) { \
+ _s_ = newa(char, STRLEN("AUDIT") + DECIMAL_STR_MAX(int)); \
+ sprintf((char*) _s_, "AUDIT%04i", type); \
+ } \
+ _s_; \
+ })
diff --git a/src/libsystemd/sd-journal/audit_type-to-name.awk b/src/libsystemd/sd-journal/audit_type-to-name.awk
new file mode 100644
index 0000000..a859c44
--- /dev/null
+++ b/src/libsystemd/sd-journal/audit_type-to-name.awk
@@ -0,0 +1,14 @@
+# SPDX-License-Identifier: LGPL-2.1-or-later
+
+BEGIN{
+ print "const char *audit_type_to_string(int type) {"
+ print " switch (type) {"
+}
+{
+ printf " case AUDIT_%s: return \"%s\";\n", $1, $1
+}
+END{
+ print " default: return NULL;"
+ print " }"
+ print "}"
+}
diff --git a/src/libsystemd/sd-journal/catalog.c b/src/libsystemd/sd-journal/catalog.c
new file mode 100644
index 0000000..ae91534
--- /dev/null
+++ b/src/libsystemd/sd-journal/catalog.c
@@ -0,0 +1,743 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <locale.h>
+#include <stdio.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "sd-id128.h"
+
+#include "alloc-util.h"
+#include "catalog.h"
+#include "conf-files.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "hashmap.h"
+#include "log.h"
+#include "memory-util.h"
+#include "mkdir.h"
+#include "path-util.h"
+#include "siphash24.h"
+#include "sort-util.h"
+#include "sparse-endian.h"
+#include "strbuf.h"
+#include "string-util.h"
+#include "strv.h"
+#include "tmpfile-util.h"
+
+const char * const catalog_file_dirs[] = {
+ "/usr/local/lib/systemd/catalog/",
+ "/usr/lib/systemd/catalog/",
+ NULL
+};
+
+#define CATALOG_SIGNATURE { 'R', 'H', 'H', 'H', 'K', 'S', 'L', 'P' }
+
+typedef struct CatalogHeader {
+ uint8_t signature[8]; /* "RHHHKSLP" */
+ le32_t compatible_flags;
+ le32_t incompatible_flags;
+ le64_t header_size;
+ le64_t n_items;
+ le64_t catalog_item_size;
+} CatalogHeader;
+
+typedef struct CatalogItem {
+ sd_id128_t id;
+ char language[32]; /* One byte is used for termination, so the maximum allowed
+ * length of the string is actually 31 bytes. */
+ le64_t offset;
+} CatalogItem;
+
+static void catalog_hash_func(const CatalogItem *i, struct siphash *state) {
+ siphash24_compress(&i->id, sizeof(i->id), state);
+ siphash24_compress_string(i->language, state);
+}
+
+static int catalog_compare_func(const CatalogItem *a, const CatalogItem *b) {
+ unsigned k;
+ int r;
+
+ for (k = 0; k < ELEMENTSOF(b->id.bytes); k++) {
+ r = CMP(a->id.bytes[k], b->id.bytes[k]);
+ if (r != 0)
+ return r;
+ }
+
+ return strcmp(a->language, b->language);
+}
+
+DEFINE_HASH_OPS(catalog_hash_ops, CatalogItem, catalog_hash_func, catalog_compare_func);
+
+static bool next_header(const char **s) {
+ const char *e;
+
+ e = strchr(*s, '\n');
+
+ /* Unexpected end */
+ if (!e)
+ return false;
+
+ /* End of headers */
+ if (e == *s)
+ return false;
+
+ *s = e + 1;
+ return true;
+}
+
+static const char *skip_header(const char *s) {
+ while (next_header(&s))
+ ;
+ return s;
+}
+
+static char *combine_entries(const char *one, const char *two) {
+ const char *b1, *b2;
+ size_t l1, l2, n;
+ char *dest, *p;
+
+ /* Find split point of headers to body */
+ b1 = skip_header(one);
+ b2 = skip_header(two);
+
+ l1 = strlen(one);
+ l2 = strlen(two);
+ dest = new(char, l1 + l2 + 1);
+ if (!dest) {
+ log_oom();
+ return NULL;
+ }
+
+ p = dest;
+
+ /* Headers from @one */
+ n = b1 - one;
+ p = mempcpy(p, one, n);
+
+ /* Headers from @two, these will only be found if not present above */
+ n = b2 - two;
+ p = mempcpy(p, two, n);
+
+ /* Body from @one */
+ n = l1 - (b1 - one);
+ if (n > 0)
+ p = mempcpy(p, b1, n);
+ /* Body from @two */
+ else {
+ n = l2 - (b2 - two);
+ p = mempcpy(p, b2, n);
+ }
+
+ assert(p - dest <= (ptrdiff_t)(l1 + l2));
+ p[0] = '\0';
+ return dest;
+}
+
+static int finish_item(
+ OrderedHashmap *h,
+ sd_id128_t id,
+ const char *language,
+ char *payload, size_t payload_size) {
+
+ _cleanup_free_ CatalogItem *i = NULL;
+ _cleanup_free_ char *combined = NULL;
+ char *prev;
+ int r;
+
+ assert(h);
+ assert(payload);
+ assert(payload_size > 0);
+
+ i = new0(CatalogItem, 1);
+ if (!i)
+ return log_oom();
+
+ i->id = id;
+ if (language) {
+ assert(strlen(language) > 1 && strlen(language) < 32);
+ strcpy(i->language, language);
+ }
+
+ prev = ordered_hashmap_get(h, i);
+ if (prev) {
+ /* Already have such an item, combine them */
+ combined = combine_entries(payload, prev);
+ if (!combined)
+ return log_oom();
+
+ r = ordered_hashmap_update(h, i, combined);
+ if (r < 0)
+ return log_error_errno(r, "Failed to update catalog item: %m");
+
+ TAKE_PTR(combined);
+ free(prev);
+ } else {
+ /* A new item */
+ combined = memdup(payload, payload_size + 1);
+ if (!combined)
+ return log_oom();
+
+ r = ordered_hashmap_put(h, i, combined);
+ if (r < 0)
+ return log_error_errno(r, "Failed to insert catalog item: %m");
+
+ TAKE_PTR(i);
+ TAKE_PTR(combined);
+ }
+
+ return 0;
+}
+
+int catalog_file_lang(const char* filename, char **lang) {
+ char *beg, *end, *_lang;
+
+ end = endswith(filename, ".catalog");
+ if (!end)
+ return 0;
+
+ beg = end - 1;
+ while (beg > filename && !IN_SET(*beg, '.', '/') && end - beg < 32)
+ beg--;
+
+ if (*beg != '.' || end <= beg + 1)
+ return 0;
+
+ _lang = strndup(beg + 1, end - beg - 1);
+ if (!_lang)
+ return -ENOMEM;
+
+ *lang = _lang;
+ return 1;
+}
+
+static int catalog_entry_lang(
+ const char* filename,
+ unsigned line,
+ const char* t,
+ const char* deflang,
+ char **ret) {
+
+ size_t c;
+ char *z;
+
+ c = strlen(t);
+ if (c < 2)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "[%s:%u] Language too short.", filename, line);
+ if (c > 31)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "[%s:%u] language too long.", filename, line);
+
+ if (deflang) {
+ if (streq(t, deflang)) {
+ log_warning("[%s:%u] language specified unnecessarily", filename, line);
+ return 0;
+ }
+
+ log_warning("[%s:%u] language differs from default for file", filename, line);
+ }
+
+ z = strdup(t);
+ if (!z)
+ return -ENOMEM;
+
+ *ret = z;
+ return 0;
+}
+
+int catalog_import_file(OrderedHashmap *h, const char *path) {
+ _cleanup_fclose_ FILE *f = NULL;
+ _cleanup_free_ char *payload = NULL;
+ size_t payload_size = 0;
+ unsigned n = 0;
+ sd_id128_t id;
+ _cleanup_free_ char *deflang = NULL, *lang = NULL;
+ bool got_id = false, empty_line = true;
+ int r;
+
+ assert(h);
+ assert(path);
+
+ f = fopen(path, "re");
+ if (!f)
+ return log_error_errno(errno, "Failed to open file %s: %m", path);
+
+ r = catalog_file_lang(path, &deflang);
+ if (r < 0)
+ log_error_errno(r, "Failed to determine language for file %s: %m", path);
+ if (r == 1)
+ log_debug("File %s has language %s.", path, deflang);
+
+ for (;;) {
+ _cleanup_free_ char *line = NULL;
+ size_t line_len;
+
+ r = read_line(f, LONG_LINE_MAX, &line);
+ if (r < 0)
+ return log_error_errno(r, "Failed to read file %s: %m", path);
+ if (r == 0)
+ break;
+
+ n++;
+
+ if (isempty(line)) {
+ empty_line = true;
+ continue;
+ }
+
+ if (strchr(COMMENTS, line[0]))
+ continue;
+
+ if (empty_line &&
+ strlen(line) >= 2+1+32 &&
+ line[0] == '-' &&
+ line[1] == '-' &&
+ line[2] == ' ' &&
+ IN_SET(line[2+1+32], ' ', '\0')) {
+
+ bool with_language;
+ sd_id128_t jd;
+
+ /* New entry */
+
+ with_language = line[2+1+32] != '\0';
+ line[2+1+32] = '\0';
+
+ if (sd_id128_from_string(line + 2 + 1, &jd) >= 0) {
+
+ if (got_id) {
+ if (payload_size == 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "[%s:%u] No payload text.",
+ path,
+ n);
+
+ r = finish_item(h, id, lang ?: deflang, payload, payload_size);
+ if (r < 0)
+ return r;
+
+ lang = mfree(lang);
+ payload_size = 0;
+ }
+
+ if (with_language) {
+ char *t;
+
+ t = strstrip(line + 2 + 1 + 32 + 1);
+ r = catalog_entry_lang(path, n, t, deflang, &lang);
+ if (r < 0)
+ return r;
+ }
+
+ got_id = true;
+ empty_line = false;
+ id = jd;
+
+ continue;
+ }
+ }
+
+ /* Payload */
+ if (!got_id)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "[%s:%u] Got payload before ID.",
+ path, n);
+
+ line_len = strlen(line);
+ if (!GREEDY_REALLOC(payload, payload_size + (empty_line ? 1 : 0) + line_len + 1 + 1))
+ return log_oom();
+
+ if (empty_line)
+ payload[payload_size++] = '\n';
+ memcpy(payload + payload_size, line, line_len);
+ payload_size += line_len;
+ payload[payload_size++] = '\n';
+ payload[payload_size] = '\0';
+
+ empty_line = false;
+ }
+
+ if (got_id) {
+ if (payload_size == 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "[%s:%u] No payload text.",
+ path, n);
+
+ r = finish_item(h, id, lang ?: deflang, payload, payload_size);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+static int64_t write_catalog(
+ const char *database,
+ struct strbuf *sb,
+ CatalogItem *items,
+ size_t n) {
+
+ _cleanup_fclose_ FILE *w = NULL;
+ _cleanup_free_ char *p = NULL;
+ CatalogHeader header;
+ size_t k;
+ int r;
+
+ r = mkdir_parents(database, 0755);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create parent directories of %s: %m", database);
+
+ r = fopen_temporary(database, &w, &p);
+ if (r < 0)
+ return log_error_errno(r, "Failed to open database for writing: %s: %m",
+ database);
+
+ header = (CatalogHeader) {
+ .signature = CATALOG_SIGNATURE,
+ .header_size = htole64(CONST_ALIGN_TO(sizeof(CatalogHeader), 8)),
+ .catalog_item_size = htole64(sizeof(CatalogItem)),
+ .n_items = htole64(n),
+ };
+
+ r = -EIO;
+
+ k = fwrite(&header, 1, sizeof(header), w);
+ if (k != sizeof(header)) {
+ log_error("%s: failed to write header.", p);
+ goto error;
+ }
+
+ k = fwrite(items, 1, n * sizeof(CatalogItem), w);
+ if (k != n * sizeof(CatalogItem)) {
+ log_error("%s: failed to write database.", p);
+ goto error;
+ }
+
+ k = fwrite(sb->buf, 1, sb->len, w);
+ if (k != sb->len) {
+ log_error("%s: failed to write strings.", p);
+ goto error;
+ }
+
+ r = fflush_and_check(w);
+ if (r < 0) {
+ log_error_errno(r, "%s: failed to write database: %m", p);
+ goto error;
+ }
+
+ (void) fchmod(fileno(w), 0644);
+
+ if (rename(p, database) < 0) {
+ r = log_error_errno(errno, "rename (%s -> %s) failed: %m", p, database);
+ goto error;
+ }
+
+ return ftello(w);
+
+error:
+ (void) unlink(p);
+ return r;
+}
+
+int catalog_update(const char* database, const char* root, const char* const* dirs) {
+ _cleanup_strv_free_ char **files = NULL;
+ _cleanup_(strbuf_freep) struct strbuf *sb = NULL;
+ _cleanup_ordered_hashmap_free_free_free_ OrderedHashmap *h = NULL;
+ _cleanup_free_ CatalogItem *items = NULL;
+ ssize_t offset;
+ char *payload;
+ CatalogItem *i;
+ unsigned n;
+ int r;
+ int64_t sz;
+
+ h = ordered_hashmap_new(&catalog_hash_ops);
+ sb = strbuf_new();
+ if (!h || !sb)
+ return log_oom();
+
+ r = conf_files_list_strv(&files, ".catalog", root, 0, dirs);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get catalog files: %m");
+
+ STRV_FOREACH(f, files) {
+ log_debug("Reading file '%s'", *f);
+ r = catalog_import_file(h, *f);
+ if (r < 0)
+ return log_error_errno(r, "Failed to import file '%s': %m", *f);
+ }
+
+ if (ordered_hashmap_size(h) <= 0) {
+ log_info("No items in catalog.");
+ return 0;
+ } else
+ log_debug("Found %u items in catalog.", ordered_hashmap_size(h));
+
+ items = new(CatalogItem, ordered_hashmap_size(h));
+ if (!items)
+ return log_oom();
+
+ n = 0;
+ ORDERED_HASHMAP_FOREACH_KEY(payload, i, h) {
+ log_trace("Found " SD_ID128_FORMAT_STR ", language %s",
+ SD_ID128_FORMAT_VAL(i->id),
+ isempty(i->language) ? "C" : i->language);
+
+ offset = strbuf_add_string(sb, payload, strlen(payload));
+ if (offset < 0)
+ return log_oom();
+
+ i->offset = htole64((uint64_t) offset);
+ items[n++] = *i;
+ }
+
+ assert(n == ordered_hashmap_size(h));
+ typesafe_qsort(items, n, catalog_compare_func);
+
+ strbuf_complete(sb);
+
+ sz = write_catalog(database, sb, items, n);
+ if (sz < 0)
+ return log_error_errno(sz, "Failed to write %s: %m", database);
+
+ log_debug("%s: wrote %u items, with %zu bytes of strings, %"PRIi64" total size.",
+ database, n, sb->len, sz);
+ return 0;
+}
+
+static int open_mmap(const char *database, int *_fd, struct stat *_st, void **_p) {
+ _cleanup_close_ int fd = -EBADF;
+ const CatalogHeader *h;
+ struct stat st;
+ void *p;
+
+ assert(_fd);
+ assert(_st);
+ assert(_p);
+
+ fd = open(database, O_RDONLY|O_CLOEXEC);
+ if (fd < 0)
+ return -errno;
+
+ if (fstat(fd, &st) < 0)
+ return -errno;
+
+ if (st.st_size < (off_t) sizeof(CatalogHeader) || file_offset_beyond_memory_size(st.st_size))
+ return -EINVAL;
+
+ p = mmap(NULL, st.st_size, PROT_READ, MAP_SHARED, fd, 0);
+ if (p == MAP_FAILED)
+ return -errno;
+
+ h = p;
+ if (memcmp(h->signature, (const uint8_t[]) CATALOG_SIGNATURE, sizeof(h->signature)) != 0 ||
+ le64toh(h->header_size) < sizeof(CatalogHeader) ||
+ le64toh(h->catalog_item_size) < sizeof(CatalogItem) ||
+ h->incompatible_flags != 0 ||
+ le64toh(h->n_items) <= 0 ||
+ st.st_size < (off_t) (le64toh(h->header_size) + le64toh(h->catalog_item_size) * le64toh(h->n_items))) {
+ munmap(p, st.st_size);
+ return -EBADMSG;
+ }
+
+ *_fd = TAKE_FD(fd);
+ *_st = st;
+ *_p = p;
+
+ return 0;
+}
+
+static const char *find_id(void *p, sd_id128_t id) {
+ CatalogItem *f = NULL, key = { .id = id };
+ const CatalogHeader *h = p;
+ const char *loc;
+
+ loc = setlocale(LC_MESSAGES, NULL);
+ if (!isempty(loc) && !STR_IN_SET(loc, "C", "POSIX")) {
+ size_t len;
+
+ len = strcspn(loc, ".@");
+ if (len > sizeof(key.language) - 1)
+ log_debug("LC_MESSAGES value too long, ignoring: \"%.*s\"", (int) len, loc);
+ else {
+ strncpy(key.language, loc, len);
+ key.language[len] = '\0';
+
+ f = bsearch(&key,
+ (const uint8_t*) p + le64toh(h->header_size),
+ le64toh(h->n_items),
+ le64toh(h->catalog_item_size),
+ (comparison_fn_t) catalog_compare_func);
+ if (!f) {
+ char *e;
+
+ e = strchr(key.language, '_');
+ if (e) {
+ *e = 0;
+ f = bsearch(&key,
+ (const uint8_t*) p + le64toh(h->header_size),
+ le64toh(h->n_items),
+ le64toh(h->catalog_item_size),
+ (comparison_fn_t) catalog_compare_func);
+ }
+ }
+ }
+ }
+
+ if (!f) {
+ zero(key.language);
+ f = bsearch(&key,
+ (const uint8_t*) p + le64toh(h->header_size),
+ le64toh(h->n_items),
+ le64toh(h->catalog_item_size),
+ (comparison_fn_t) catalog_compare_func);
+ }
+
+ if (!f)
+ return NULL;
+
+ return (const char*) p +
+ le64toh(h->header_size) +
+ le64toh(h->n_items) * le64toh(h->catalog_item_size) +
+ le64toh(f->offset);
+}
+
+int catalog_get(const char* database, sd_id128_t id, char **_text) {
+ _cleanup_close_ int fd = -EBADF;
+ void *p = NULL;
+ struct stat st = {};
+ char *text = NULL;
+ int r;
+ const char *s;
+
+ assert(_text);
+
+ r = open_mmap(database, &fd, &st, &p);
+ if (r < 0)
+ return r;
+
+ s = find_id(p, id);
+ if (!s) {
+ r = -ENOENT;
+ goto finish;
+ }
+
+ text = strdup(s);
+ if (!text) {
+ r = -ENOMEM;
+ goto finish;
+ }
+
+ *_text = text;
+ r = 0;
+
+finish:
+ if (p)
+ munmap(p, st.st_size);
+
+ return r;
+}
+
+static char *find_header(const char *s, const char *header) {
+
+ for (;;) {
+ const char *v;
+
+ v = startswith(s, header);
+ if (v) {
+ v += strspn(v, WHITESPACE);
+ return strndup(v, strcspn(v, NEWLINE));
+ }
+
+ if (!next_header(&s))
+ return NULL;
+ }
+}
+
+static void dump_catalog_entry(FILE *f, sd_id128_t id, const char *s, bool oneline) {
+ if (oneline) {
+ _cleanup_free_ char *subject = NULL, *defined_by = NULL;
+
+ subject = find_header(s, "Subject:");
+ defined_by = find_header(s, "Defined-By:");
+
+ fprintf(f, SD_ID128_FORMAT_STR " %s: %s\n",
+ SD_ID128_FORMAT_VAL(id),
+ strna(defined_by), strna(subject));
+ } else
+ fprintf(f, "-- " SD_ID128_FORMAT_STR "\n%s\n",
+ SD_ID128_FORMAT_VAL(id), s);
+}
+
+int catalog_list(FILE *f, const char *database, bool oneline) {
+ _cleanup_close_ int fd = -EBADF;
+ void *p = NULL;
+ struct stat st;
+ const CatalogHeader *h;
+ const CatalogItem *items;
+ int r;
+ unsigned n;
+ sd_id128_t last_id;
+ bool last_id_set = false;
+
+ r = open_mmap(database, &fd, &st, &p);
+ if (r < 0)
+ return r;
+
+ h = p;
+ items = (const CatalogItem*) ((const uint8_t*) p + le64toh(h->header_size));
+
+ for (n = 0; n < le64toh(h->n_items); n++) {
+ const char *s;
+
+ if (last_id_set && sd_id128_equal(last_id, items[n].id))
+ continue;
+
+ assert_se(s = find_id(p, items[n].id));
+
+ dump_catalog_entry(f, items[n].id, s, oneline);
+
+ last_id_set = true;
+ last_id = items[n].id;
+ }
+
+ munmap(p, st.st_size);
+
+ return 0;
+}
+
+int catalog_list_items(FILE *f, const char *database, bool oneline, char **items) {
+ int r = 0;
+
+ STRV_FOREACH(item, items) {
+ sd_id128_t id;
+ int k;
+ _cleanup_free_ char *msg = NULL;
+
+ k = sd_id128_from_string(*item, &id);
+ if (k < 0) {
+ log_error_errno(k, "Failed to parse id128 '%s': %m", *item);
+ if (r == 0)
+ r = k;
+ continue;
+ }
+
+ k = catalog_get(database, id, &msg);
+ if (k < 0) {
+ log_full_errno(k == -ENOENT ? LOG_NOTICE : LOG_ERR, k,
+ "Failed to retrieve catalog entry for '%s': %m", *item);
+ if (r == 0)
+ r = k;
+ continue;
+ }
+
+ dump_catalog_entry(f, id, msg, oneline);
+ }
+
+ return r;
+}
diff --git a/src/libsystemd/sd-journal/catalog.h b/src/libsystemd/sd-journal/catalog.h
new file mode 100644
index 0000000..df27869
--- /dev/null
+++ b/src/libsystemd/sd-journal/catalog.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+#include <stdio.h>
+
+#include "sd-id128.h"
+
+#include "hashmap.h"
+#include "strbuf.h"
+
+int catalog_import_file(OrderedHashmap *h, const char *path);
+int catalog_update(const char* database, const char* root, const char* const* dirs);
+int catalog_get(const char* database, sd_id128_t id, char **data);
+int catalog_list(FILE *f, const char* database, bool oneline);
+int catalog_list_items(FILE *f, const char* database, bool oneline, char **items);
+int catalog_file_lang(const char *filename, char **lang);
+extern const char * const catalog_file_dirs[];
+extern const struct hash_ops catalog_hash_ops;
diff --git a/src/libsystemd/sd-journal/fsprg.c b/src/libsystemd/sd-journal/fsprg.c
new file mode 100644
index 0000000..e86be6a
--- /dev/null
+++ b/src/libsystemd/sd-journal/fsprg.c
@@ -0,0 +1,381 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later
+ *
+ * fsprg v0.1 - (seekable) forward-secure pseudorandom generator
+ * Copyright © 2012 B. Poettering
+ * Contact: fsprg@point-at-infinity.org
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301 USA
+ */
+
+/*
+ * See "Practical Secure Logging: Seekable Sequential Key Generators"
+ * by G. A. Marson, B. Poettering for details:
+ *
+ * http://eprint.iacr.org/2013/397
+ */
+
+#include <string.h>
+
+#include "fsprg.h"
+#include "gcrypt-util.h"
+#include "memory-util.h"
+
+#define ISVALID_SECPAR(secpar) (((secpar) % 16 == 0) && ((secpar) >= 16) && ((secpar) <= 16384))
+#define VALIDATE_SECPAR(secpar) assert(ISVALID_SECPAR(secpar));
+
+#define RND_HASH GCRY_MD_SHA256
+#define RND_GEN_P 0x01
+#define RND_GEN_Q 0x02
+#define RND_GEN_X 0x03
+
+#pragma GCC diagnostic ignored "-Wpointer-arith"
+/* TODO: remove void* arithmetic and this work-around */
+
+/******************************************************************************/
+
+static void mpi_export(void *buf, size_t buflen, const gcry_mpi_t x) {
+ unsigned len;
+ size_t nwritten;
+
+ assert(gcry_mpi_cmp_ui(x, 0) >= 0);
+ len = (gcry_mpi_get_nbits(x) + 7) / 8;
+ assert(len <= buflen);
+ memzero(buf, buflen);
+ gcry_mpi_print(GCRYMPI_FMT_USG, buf + (buflen - len), len, &nwritten, x);
+ assert(nwritten == len);
+}
+
+static gcry_mpi_t mpi_import(const void *buf, size_t buflen) {
+ gcry_mpi_t h;
+ _unused_ unsigned len;
+
+ assert_se(gcry_mpi_scan(&h, GCRYMPI_FMT_USG, buf, buflen, NULL) == 0);
+ len = (gcry_mpi_get_nbits(h) + 7) / 8;
+ assert(len <= buflen);
+ assert(gcry_mpi_cmp_ui(h, 0) >= 0);
+
+ return h;
+}
+
+static void uint64_export(void *buf, size_t buflen, uint64_t x) {
+ assert(buflen == 8);
+ ((uint8_t*) buf)[0] = (x >> 56) & 0xff;
+ ((uint8_t*) buf)[1] = (x >> 48) & 0xff;
+ ((uint8_t*) buf)[2] = (x >> 40) & 0xff;
+ ((uint8_t*) buf)[3] = (x >> 32) & 0xff;
+ ((uint8_t*) buf)[4] = (x >> 24) & 0xff;
+ ((uint8_t*) buf)[5] = (x >> 16) & 0xff;
+ ((uint8_t*) buf)[6] = (x >> 8) & 0xff;
+ ((uint8_t*) buf)[7] = (x >> 0) & 0xff;
+}
+
+static uint64_t uint64_import(const void *buf, size_t buflen) {
+ assert(buflen == 8);
+ return
+ (uint64_t)(((uint8_t*) buf)[0]) << 56 |
+ (uint64_t)(((uint8_t*) buf)[1]) << 48 |
+ (uint64_t)(((uint8_t*) buf)[2]) << 40 |
+ (uint64_t)(((uint8_t*) buf)[3]) << 32 |
+ (uint64_t)(((uint8_t*) buf)[4]) << 24 |
+ (uint64_t)(((uint8_t*) buf)[5]) << 16 |
+ (uint64_t)(((uint8_t*) buf)[6]) << 8 |
+ (uint64_t)(((uint8_t*) buf)[7]) << 0;
+}
+
+/* deterministically generate from seed/idx a string of buflen pseudorandom bytes */
+static void det_randomize(void *buf, size_t buflen, const void *seed, size_t seedlen, uint32_t idx) {
+ gcry_md_hd_t hd, hd2;
+ size_t olen, cpylen;
+ gcry_error_t err;
+ uint32_t ctr;
+
+ olen = gcry_md_get_algo_dlen(RND_HASH);
+ err = gcry_md_open(&hd, RND_HASH, 0);
+ assert_se(gcry_err_code(err) == GPG_ERR_NO_ERROR); /* This shouldn't happen */
+ gcry_md_write(hd, seed, seedlen);
+ gcry_md_putc(hd, (idx >> 24) & 0xff);
+ gcry_md_putc(hd, (idx >> 16) & 0xff);
+ gcry_md_putc(hd, (idx >> 8) & 0xff);
+ gcry_md_putc(hd, (idx >> 0) & 0xff);
+
+ for (ctr = 0; buflen; ctr++) {
+ err = gcry_md_copy(&hd2, hd);
+ assert_se(gcry_err_code(err) == GPG_ERR_NO_ERROR); /* This shouldn't happen */
+ gcry_md_putc(hd2, (ctr >> 24) & 0xff);
+ gcry_md_putc(hd2, (ctr >> 16) & 0xff);
+ gcry_md_putc(hd2, (ctr >> 8) & 0xff);
+ gcry_md_putc(hd2, (ctr >> 0) & 0xff);
+ gcry_md_final(hd2);
+ cpylen = (buflen < olen) ? buflen : olen;
+ memcpy(buf, gcry_md_read(hd2, RND_HASH), cpylen);
+ gcry_md_close(hd2);
+ buf += cpylen;
+ buflen -= cpylen;
+ }
+ gcry_md_close(hd);
+}
+
+/* deterministically generate from seed/idx a prime of length `bits' that is 3 (mod 4) */
+static gcry_mpi_t genprime3mod4(int bits, const void *seed, size_t seedlen, uint32_t idx) {
+ size_t buflen = bits / 8;
+ uint8_t buf[buflen];
+ gcry_mpi_t p;
+
+ assert(bits % 8 == 0);
+ assert(buflen > 0);
+
+ det_randomize(buf, buflen, seed, seedlen, idx);
+ buf[0] |= 0xc0; /* set upper two bits, so that n=pq has maximum size */
+ buf[buflen - 1] |= 0x03; /* set lower two bits, to have result 3 (mod 4) */
+
+ p = mpi_import(buf, buflen);
+ while (gcry_prime_check(p, 0))
+ gcry_mpi_add_ui(p, p, 4);
+
+ return p;
+}
+
+/* deterministically generate from seed/idx a quadratic residue (mod n) */
+static gcry_mpi_t gensquare(const gcry_mpi_t n, const void *seed, size_t seedlen, uint32_t idx, unsigned secpar) {
+ size_t buflen = secpar / 8;
+ uint8_t buf[buflen];
+ gcry_mpi_t x;
+
+ det_randomize(buf, buflen, seed, seedlen, idx);
+ buf[0] &= 0x7f; /* clear upper bit, so that we have x < n */
+ x = mpi_import(buf, buflen);
+ assert(gcry_mpi_cmp(x, n) < 0);
+ gcry_mpi_mulm(x, x, x, n);
+ return x;
+}
+
+/* compute 2^m (mod phi(p)), for a prime p */
+static gcry_mpi_t twopowmodphi(uint64_t m, const gcry_mpi_t p) {
+ gcry_mpi_t phi, r;
+ int n;
+
+ phi = gcry_mpi_new(0);
+ gcry_mpi_sub_ui(phi, p, 1);
+
+ /* count number of used bits in m */
+ for (n = 0; (1ULL << n) <= m; n++)
+ ;
+
+ r = gcry_mpi_new(0);
+ gcry_mpi_set_ui(r, 1);
+ while (n) { /* square and multiply algorithm for fast exponentiation */
+ n--;
+ gcry_mpi_mulm(r, r, r, phi);
+ if (m & ((uint64_t)1 << n)) {
+ gcry_mpi_add(r, r, r);
+ if (gcry_mpi_cmp(r, phi) >= 0)
+ gcry_mpi_sub(r, r, phi);
+ }
+ }
+
+ gcry_mpi_release(phi);
+ return r;
+}
+
+/* Decompose $x \in Z_n$ into $(xp,xq) \in Z_p \times Z_q$ using Chinese Remainder Theorem */
+static void CRT_decompose(gcry_mpi_t *xp, gcry_mpi_t *xq, const gcry_mpi_t x, const gcry_mpi_t p, const gcry_mpi_t q) {
+ *xp = gcry_mpi_new(0);
+ *xq = gcry_mpi_new(0);
+ gcry_mpi_mod(*xp, x, p);
+ gcry_mpi_mod(*xq, x, q);
+}
+
+/* Compose $(xp,xq) \in Z_p \times Z_q$ into $x \in Z_n$ using Chinese Remainder Theorem */
+static void CRT_compose(gcry_mpi_t *x, const gcry_mpi_t xp, const gcry_mpi_t xq, const gcry_mpi_t p, const gcry_mpi_t q) {
+ gcry_mpi_t a, u;
+
+ a = gcry_mpi_new(0);
+ u = gcry_mpi_new(0);
+ *x = gcry_mpi_new(0);
+ gcry_mpi_subm(a, xq, xp, q);
+ gcry_mpi_invm(u, p, q);
+ gcry_mpi_mulm(a, a, u, q); /* a = (xq - xp) / p (mod q) */
+ gcry_mpi_mul(*x, p, a);
+ gcry_mpi_add(*x, *x, xp); /* x = p * ((xq - xp) / p mod q) + xp */
+ gcry_mpi_release(a);
+ gcry_mpi_release(u);
+}
+
+/******************************************************************************/
+
+size_t FSPRG_mskinbytes(unsigned _secpar) {
+ VALIDATE_SECPAR(_secpar);
+ return 2 + 2 * (_secpar / 2) / 8; /* to store header,p,q */
+}
+
+size_t FSPRG_mpkinbytes(unsigned _secpar) {
+ VALIDATE_SECPAR(_secpar);
+ return 2 + _secpar / 8; /* to store header,n */
+}
+
+size_t FSPRG_stateinbytes(unsigned _secpar) {
+ VALIDATE_SECPAR(_secpar);
+ return 2 + 2 * _secpar / 8 + 8; /* to store header,n,x,epoch */
+}
+
+static void store_secpar(void *buf, uint16_t secpar) {
+ secpar = secpar / 16 - 1;
+ ((uint8_t*) buf)[0] = (secpar >> 8) & 0xff;
+ ((uint8_t*) buf)[1] = (secpar >> 0) & 0xff;
+}
+
+static uint16_t read_secpar(const void *buf) {
+ uint16_t secpar;
+ secpar =
+ (uint16_t)(((uint8_t*) buf)[0]) << 8 |
+ (uint16_t)(((uint8_t*) buf)[1]) << 0;
+ return 16 * (secpar + 1);
+}
+
+void FSPRG_GenMK(void *msk, void *mpk, const void *seed, size_t seedlen, unsigned _secpar) {
+ uint8_t iseed[FSPRG_RECOMMENDED_SEEDLEN];
+ gcry_mpi_t n, p, q;
+ uint16_t secpar;
+
+ VALIDATE_SECPAR(_secpar);
+ secpar = _secpar;
+
+ initialize_libgcrypt(false);
+
+ if (!seed) {
+ gcry_randomize(iseed, FSPRG_RECOMMENDED_SEEDLEN, GCRY_STRONG_RANDOM);
+ seed = iseed;
+ seedlen = FSPRG_RECOMMENDED_SEEDLEN;
+ }
+
+ p = genprime3mod4(secpar / 2, seed, seedlen, RND_GEN_P);
+ q = genprime3mod4(secpar / 2, seed, seedlen, RND_GEN_Q);
+
+ if (msk) {
+ store_secpar(msk + 0, secpar);
+ mpi_export(msk + 2 + 0 * (secpar / 2) / 8, (secpar / 2) / 8, p);
+ mpi_export(msk + 2 + 1 * (secpar / 2) / 8, (secpar / 2) / 8, q);
+ }
+
+ if (mpk) {
+ n = gcry_mpi_new(0);
+ gcry_mpi_mul(n, p, q);
+ assert(gcry_mpi_get_nbits(n) == secpar);
+
+ store_secpar(mpk + 0, secpar);
+ mpi_export(mpk + 2, secpar / 8, n);
+
+ gcry_mpi_release(n);
+ }
+
+ gcry_mpi_release(p);
+ gcry_mpi_release(q);
+}
+
+void FSPRG_GenState0(void *state, const void *mpk, const void *seed, size_t seedlen) {
+ gcry_mpi_t n, x;
+ uint16_t secpar;
+
+ initialize_libgcrypt(false);
+
+ secpar = read_secpar(mpk + 0);
+ n = mpi_import(mpk + 2, secpar / 8);
+ x = gensquare(n, seed, seedlen, RND_GEN_X, secpar);
+
+ memcpy(state, mpk, 2 + secpar / 8);
+ mpi_export(state + 2 + 1 * secpar / 8, secpar / 8, x);
+ memzero(state + 2 + 2 * secpar / 8, 8);
+
+ gcry_mpi_release(n);
+ gcry_mpi_release(x);
+}
+
+void FSPRG_Evolve(void *state) {
+ gcry_mpi_t n, x;
+ uint16_t secpar;
+ uint64_t epoch;
+
+ initialize_libgcrypt(false);
+
+ secpar = read_secpar(state + 0);
+ n = mpi_import(state + 2 + 0 * secpar / 8, secpar / 8);
+ x = mpi_import(state + 2 + 1 * secpar / 8, secpar / 8);
+ epoch = uint64_import(state + 2 + 2 * secpar / 8, 8);
+
+ gcry_mpi_mulm(x, x, x, n);
+ epoch++;
+
+ mpi_export(state + 2 + 1 * secpar / 8, secpar / 8, x);
+ uint64_export(state + 2 + 2 * secpar / 8, 8, epoch);
+
+ gcry_mpi_release(n);
+ gcry_mpi_release(x);
+}
+
+uint64_t FSPRG_GetEpoch(const void *state) {
+ uint16_t secpar;
+ secpar = read_secpar(state + 0);
+ return uint64_import(state + 2 + 2 * secpar / 8, 8);
+}
+
+void FSPRG_Seek(void *state, uint64_t epoch, const void *msk, const void *seed, size_t seedlen) {
+ gcry_mpi_t p, q, n, x, xp, xq, kp, kq, xm;
+ uint16_t secpar;
+
+ initialize_libgcrypt(false);
+
+ secpar = read_secpar(msk + 0);
+ p = mpi_import(msk + 2 + 0 * (secpar / 2) / 8, (secpar / 2) / 8);
+ q = mpi_import(msk + 2 + 1 * (secpar / 2) / 8, (secpar / 2) / 8);
+
+ n = gcry_mpi_new(0);
+ gcry_mpi_mul(n, p, q);
+
+ x = gensquare(n, seed, seedlen, RND_GEN_X, secpar);
+ CRT_decompose(&xp, &xq, x, p, q); /* split (mod n) into (mod p) and (mod q) using CRT */
+
+ kp = twopowmodphi(epoch, p); /* compute 2^epoch (mod phi(p)) */
+ kq = twopowmodphi(epoch, q); /* compute 2^epoch (mod phi(q)) */
+
+ gcry_mpi_powm(xp, xp, kp, p); /* compute x^(2^epoch) (mod p) */
+ gcry_mpi_powm(xq, xq, kq, q); /* compute x^(2^epoch) (mod q) */
+
+ CRT_compose(&xm, xp, xq, p, q); /* combine (mod p) and (mod q) to (mod n) using CRT */
+
+ store_secpar(state + 0, secpar);
+ mpi_export(state + 2 + 0 * secpar / 8, secpar / 8, n);
+ mpi_export(state + 2 + 1 * secpar / 8, secpar / 8, xm);
+ uint64_export(state + 2 + 2 * secpar / 8, 8, epoch);
+
+ gcry_mpi_release(p);
+ gcry_mpi_release(q);
+ gcry_mpi_release(n);
+ gcry_mpi_release(x);
+ gcry_mpi_release(xp);
+ gcry_mpi_release(xq);
+ gcry_mpi_release(kp);
+ gcry_mpi_release(kq);
+ gcry_mpi_release(xm);
+}
+
+void FSPRG_GetKey(const void *state, void *key, size_t keylen, uint32_t idx) {
+ uint16_t secpar;
+
+ initialize_libgcrypt(false);
+
+ secpar = read_secpar(state + 0);
+ det_randomize(key, keylen, state + 2, 2 * secpar / 8 + 8, idx);
+}
diff --git a/src/libsystemd/sd-journal/fsprg.h b/src/libsystemd/sd-journal/fsprg.h
new file mode 100644
index 0000000..d3d88aa
--- /dev/null
+++ b/src/libsystemd/sd-journal/fsprg.h
@@ -0,0 +1,61 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+/*
+ * fsprg v0.1 - (seekable) forward-secure pseudorandom generator
+ * Copyright © 2012 B. Poettering
+ * Contact: fsprg@point-at-infinity.org
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301 USA
+ */
+
+#include <inttypes.h>
+#include <sys/types.h>
+
+#include "macro.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define FSPRG_RECOMMENDED_SECPAR 1536
+#define FSPRG_RECOMMENDED_SEEDLEN (96/8)
+
+size_t FSPRG_mskinbytes(unsigned secpar) _const_;
+size_t FSPRG_mpkinbytes(unsigned secpar) _const_;
+size_t FSPRG_stateinbytes(unsigned secpar) _const_;
+
+/* Setup msk and mpk. Providing seed != NULL makes this algorithm deterministic. */
+void FSPRG_GenMK(void *msk, void *mpk, const void *seed, size_t seedlen, unsigned secpar);
+
+/* Initialize state deterministically in dependence on seed. */
+/* Note: in case one wants to run only one GenState0 per GenMK it is safe to use
+ the same seed for both GenMK and GenState0.
+*/
+void FSPRG_GenState0(void *state, const void *mpk, const void *seed, size_t seedlen);
+
+void FSPRG_Evolve(void *state);
+
+uint64_t FSPRG_GetEpoch(const void *state) _pure_;
+
+/* Seek to any arbitrary state (by providing msk together with seed from GenState0). */
+void FSPRG_Seek(void *state, uint64_t epoch, const void *msk, const void *seed, size_t seedlen);
+
+void FSPRG_GetKey(const void *state, void *key, size_t keylen, uint32_t idx);
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/src/libsystemd/sd-journal/generate-audit_type-list.sh b/src/libsystemd/sd-journal/generate-audit_type-list.sh
new file mode 100755
index 0000000..3851ea1
--- /dev/null
+++ b/src/libsystemd/sd-journal/generate-audit_type-list.sh
@@ -0,0 +1,17 @@
+#!/usr/bin/env bash
+# SPDX-License-Identifier: LGPL-2.1-or-later
+set -eu
+set -o pipefail
+
+cpp="${1:?}"
+shift
+
+includes=()
+for i in "$@"; do
+ includes+=(-include "$i")
+done
+
+$cpp -dM "${includes[@]}" - </dev/null | \
+ grep -vE 'AUDIT_.*(FIRST|LAST)_' | \
+ sed -r -n 's/^#define\s+AUDIT_(\w+)\s+([0-9]{4})\s*$$/\1\t\2/p' | \
+ sort -k2
diff --git a/src/libsystemd/sd-journal/journal-authenticate.c b/src/libsystemd/sd-journal/journal-authenticate.c
new file mode 100644
index 0000000..8e7533e
--- /dev/null
+++ b/src/libsystemd/sd-journal/journal-authenticate.c
@@ -0,0 +1,525 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <fcntl.h>
+#include <sys/mman.h>
+
+#include "fd-util.h"
+#include "fsprg.h"
+#include "gcrypt-util.h"
+#include "hexdecoct.h"
+#include "journal-authenticate.h"
+#include "journal-def.h"
+#include "journal-file.h"
+#include "memory-util.h"
+#include "time-util.h"
+
+static void* fssheader_free(FSSHeader *p) {
+ /* mmap() returns MAP_FAILED on error and sets the errno */
+ if (!p || p == MAP_FAILED)
+ return NULL;
+
+ assert_se(munmap(p, PAGE_ALIGN(sizeof(FSSHeader))) >= 0);
+ return NULL;
+}
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(FSSHeader*, fssheader_free);
+
+static uint64_t journal_file_tag_seqnum(JournalFile *f) {
+ uint64_t r;
+
+ assert(f);
+
+ r = le64toh(f->header->n_tags) + 1;
+ f->header->n_tags = htole64(r);
+
+ return r;
+}
+
+int journal_file_append_tag(JournalFile *f) {
+ Object *o;
+ uint64_t p;
+ int r;
+
+ assert(f);
+
+ if (!JOURNAL_HEADER_SEALED(f->header))
+ return 0;
+
+ if (!f->hmac_running) {
+ r = journal_file_hmac_start(f);
+ if (r < 0)
+ return r;
+ }
+
+ assert(f->hmac);
+
+ r = journal_file_append_object(f, OBJECT_TAG, sizeof(struct TagObject), &o, &p);
+ if (r < 0)
+ return r;
+
+ o->tag.seqnum = htole64(journal_file_tag_seqnum(f));
+ o->tag.epoch = htole64(FSPRG_GetEpoch(f->fsprg_state));
+
+ log_debug("Writing tag %"PRIu64" for epoch %"PRIu64"",
+ le64toh(o->tag.seqnum),
+ FSPRG_GetEpoch(f->fsprg_state));
+
+ /* Add the tag object itself, so that we can protect its
+ * header. This will exclude the actual hash value in it */
+ r = journal_file_hmac_put_object(f, OBJECT_TAG, o, p);
+ if (r < 0)
+ return r;
+
+ /* Get the HMAC tag and store it in the object */
+ memcpy(o->tag.tag, gcry_md_read(f->hmac, 0), TAG_LENGTH);
+ f->hmac_running = false;
+
+ return 0;
+}
+
+int journal_file_hmac_start(JournalFile *f) {
+ uint8_t key[256 / 8]; /* Let's pass 256 bit from FSPRG to HMAC */
+ gcry_error_t err;
+
+ assert(f);
+
+ if (!JOURNAL_HEADER_SEALED(f->header))
+ return 0;
+
+ if (f->hmac_running)
+ return 0;
+
+ /* Prepare HMAC for next cycle */
+ gcry_md_reset(f->hmac);
+ FSPRG_GetKey(f->fsprg_state, key, sizeof(key), 0);
+ err = gcry_md_setkey(f->hmac, key, sizeof(key));
+ if (gcry_err_code(err) != GPG_ERR_NO_ERROR)
+ return log_debug_errno(SYNTHETIC_ERRNO(EIO),
+ "gcry_md_setkey() failed with error code: %s",
+ gcry_strerror(err));
+
+ f->hmac_running = true;
+
+ return 0;
+}
+
+static int journal_file_get_epoch(JournalFile *f, uint64_t realtime, uint64_t *epoch) {
+ uint64_t t;
+
+ assert(f);
+ assert(epoch);
+ assert(JOURNAL_HEADER_SEALED(f->header));
+
+ if (f->fss_start_usec == 0 || f->fss_interval_usec == 0)
+ return -EOPNOTSUPP;
+
+ if (realtime < f->fss_start_usec)
+ return -ESTALE;
+
+ t = realtime - f->fss_start_usec;
+ t = t / f->fss_interval_usec;
+
+ *epoch = t;
+
+ return 0;
+}
+
+static int journal_file_fsprg_need_evolve(JournalFile *f, uint64_t realtime) {
+ uint64_t goal, epoch;
+ int r;
+
+ assert(f);
+
+ if (!JOURNAL_HEADER_SEALED(f->header))
+ return 0;
+
+ r = journal_file_get_epoch(f, realtime, &goal);
+ if (r < 0)
+ return r;
+
+ epoch = FSPRG_GetEpoch(f->fsprg_state);
+ if (epoch > goal)
+ return -ESTALE;
+
+ return epoch != goal;
+}
+
+int journal_file_fsprg_evolve(JournalFile *f, uint64_t realtime) {
+ uint64_t goal, epoch;
+ int r;
+
+ assert(f);
+
+ if (!JOURNAL_HEADER_SEALED(f->header))
+ return 0;
+
+ r = journal_file_get_epoch(f, realtime, &goal);
+ if (r < 0)
+ return r;
+
+ epoch = FSPRG_GetEpoch(f->fsprg_state);
+ if (epoch < goal)
+ log_debug("Evolving FSPRG key from epoch %"PRIu64" to %"PRIu64".", epoch, goal);
+
+ for (;;) {
+ if (epoch > goal)
+ return -ESTALE;
+ if (epoch == goal)
+ return 0;
+
+ FSPRG_Evolve(f->fsprg_state);
+ epoch = FSPRG_GetEpoch(f->fsprg_state);
+ if (epoch < goal) {
+ r = journal_file_append_tag(f);
+ if (r < 0)
+ return r;
+ }
+ }
+}
+
+int journal_file_fsprg_seek(JournalFile *f, uint64_t goal) {
+ void *msk;
+ uint64_t epoch;
+
+ assert(f);
+
+ if (!JOURNAL_HEADER_SEALED(f->header))
+ return 0;
+
+ assert(f->fsprg_seed);
+
+ if (f->fsprg_state) {
+ /* Cheaper... */
+
+ epoch = FSPRG_GetEpoch(f->fsprg_state);
+ if (goal == epoch)
+ return 0;
+
+ if (goal == epoch + 1) {
+ FSPRG_Evolve(f->fsprg_state);
+ return 0;
+ }
+ } else {
+ f->fsprg_state_size = FSPRG_stateinbytes(FSPRG_RECOMMENDED_SECPAR);
+ f->fsprg_state = malloc(f->fsprg_state_size);
+ if (!f->fsprg_state)
+ return -ENOMEM;
+ }
+
+ log_debug("Seeking FSPRG key to %"PRIu64".", goal);
+
+ msk = alloca_safe(FSPRG_mskinbytes(FSPRG_RECOMMENDED_SECPAR));
+ FSPRG_GenMK(msk, NULL, f->fsprg_seed, f->fsprg_seed_size, FSPRG_RECOMMENDED_SECPAR);
+ FSPRG_Seek(f->fsprg_state, goal, msk, f->fsprg_seed, f->fsprg_seed_size);
+
+ return 0;
+}
+
+int journal_file_maybe_append_tag(JournalFile *f, uint64_t realtime) {
+ int r;
+
+ assert(f);
+
+ if (!JOURNAL_HEADER_SEALED(f->header))
+ return 0;
+
+ if (realtime <= 0)
+ realtime = now(CLOCK_REALTIME);
+
+ r = journal_file_fsprg_need_evolve(f, realtime);
+ if (r <= 0)
+ return 0;
+
+ r = journal_file_append_tag(f);
+ if (r < 0)
+ return r;
+
+ r = journal_file_fsprg_evolve(f, realtime);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int journal_file_hmac_put_object(JournalFile *f, ObjectType type, Object *o, uint64_t p) {
+ int r;
+
+ assert(f);
+
+ if (!JOURNAL_HEADER_SEALED(f->header))
+ return 0;
+
+ r = journal_file_hmac_start(f);
+ if (r < 0)
+ return r;
+
+ if (!o) {
+ r = journal_file_move_to_object(f, type, p, &o);
+ if (r < 0)
+ return r;
+ } else if (type > OBJECT_UNUSED && o->object.type != type)
+ return -EBADMSG;
+
+ gcry_md_write(f->hmac, o, offsetof(ObjectHeader, payload));
+
+ switch (o->object.type) {
+
+ case OBJECT_DATA:
+ /* All but hash and payload are mutable */
+ gcry_md_write(f->hmac, &o->data.hash, sizeof(o->data.hash));
+ gcry_md_write(f->hmac, journal_file_data_payload_field(f, o), le64toh(o->object.size) - journal_file_data_payload_offset(f));
+ break;
+
+ case OBJECT_FIELD:
+ /* Same here */
+ gcry_md_write(f->hmac, &o->field.hash, sizeof(o->field.hash));
+ gcry_md_write(f->hmac, o->field.payload, le64toh(o->object.size) - offsetof(Object, field.payload));
+ break;
+
+ case OBJECT_ENTRY:
+ /* All */
+ gcry_md_write(f->hmac, &o->entry.seqnum, le64toh(o->object.size) - offsetof(Object, entry.seqnum));
+ break;
+
+ case OBJECT_FIELD_HASH_TABLE:
+ case OBJECT_DATA_HASH_TABLE:
+ case OBJECT_ENTRY_ARRAY:
+ /* Nothing: everything is mutable */
+ break;
+
+ case OBJECT_TAG:
+ /* All but the tag itself */
+ gcry_md_write(f->hmac, &o->tag.seqnum, sizeof(o->tag.seqnum));
+ gcry_md_write(f->hmac, &o->tag.epoch, sizeof(o->tag.epoch));
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+int journal_file_hmac_put_header(JournalFile *f) {
+ int r;
+
+ assert(f);
+
+ if (!JOURNAL_HEADER_SEALED(f->header))
+ return 0;
+
+ r = journal_file_hmac_start(f);
+ if (r < 0)
+ return r;
+
+ /* All but state+reserved, boot_id, arena_size,
+ * tail_object_offset, n_objects, n_entries,
+ * tail_entry_seqnum, head_entry_seqnum, entry_array_offset,
+ * head_entry_realtime, tail_entry_realtime,
+ * tail_entry_monotonic, n_data, n_fields, n_tags,
+ * n_entry_arrays. */
+
+ gcry_md_write(f->hmac, f->header->signature, offsetof(Header, state) - offsetof(Header, signature));
+ gcry_md_write(f->hmac, &f->header->file_id, offsetof(Header, tail_entry_boot_id) - offsetof(Header, file_id));
+ gcry_md_write(f->hmac, &f->header->seqnum_id, offsetof(Header, arena_size) - offsetof(Header, seqnum_id));
+ gcry_md_write(f->hmac, &f->header->data_hash_table_offset, offsetof(Header, tail_object_offset) - offsetof(Header, data_hash_table_offset));
+
+ return 0;
+}
+
+int journal_file_fss_load(JournalFile *f) {
+ _cleanup_close_ int fd = -EBADF;
+ _cleanup_free_ char *path = NULL;
+ _cleanup_(fssheader_freep) FSSHeader *header = NULL;
+ struct stat st;
+ sd_id128_t machine;
+ int r;
+
+ assert(f);
+
+ /* This function is used to determine whether sealing should be enabled in the journal header so we
+ * can't check the header to check if sealing is enabled here. */
+
+ r = sd_id128_get_machine(&machine);
+ if (r < 0)
+ return r;
+
+ if (asprintf(&path, "/var/log/journal/" SD_ID128_FORMAT_STR "/fss",
+ SD_ID128_FORMAT_VAL(machine)) < 0)
+ return -ENOMEM;
+
+ fd = open(path, O_RDWR|O_CLOEXEC|O_NOCTTY, 0600);
+ if (fd < 0) {
+ if (errno != ENOENT)
+ log_error_errno(errno, "Failed to open %s: %m", path);
+
+ return -errno;
+ }
+
+ if (fstat(fd, &st) < 0)
+ return -errno;
+
+ if (st.st_size < (off_t) sizeof(FSSHeader))
+ return -ENODATA;
+
+ header = mmap(NULL, PAGE_ALIGN(sizeof(FSSHeader)), PROT_READ, MAP_SHARED, fd, 0);
+ if (header == MAP_FAILED)
+ return -errno;
+
+ if (memcmp(header->signature, FSS_HEADER_SIGNATURE, 8) != 0)
+ return -EBADMSG;
+
+ if (header->incompatible_flags != 0)
+ return -EPROTONOSUPPORT;
+
+ if (le64toh(header->header_size) < sizeof(FSSHeader))
+ return -EBADMSG;
+
+ if (le64toh(header->fsprg_state_size) != FSPRG_stateinbytes(le16toh(header->fsprg_secpar)))
+ return -EBADMSG;
+
+ f->fss_file_size = le64toh(header->header_size) + le64toh(header->fsprg_state_size);
+ if ((uint64_t) st.st_size < f->fss_file_size)
+ return -ENODATA;
+
+ if (!sd_id128_equal(machine, header->machine_id))
+ return -EHOSTDOWN;
+
+ if (le64toh(header->start_usec) <= 0 || le64toh(header->interval_usec) <= 0)
+ return -EBADMSG;
+
+ size_t sz = PAGE_ALIGN(f->fss_file_size);
+ assert(sz < SIZE_MAX);
+ f->fss_file = mmap(NULL, sz, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
+ if (f->fss_file == MAP_FAILED) {
+ f->fss_file = NULL;
+ return -errno;
+ }
+
+ f->fss_start_usec = le64toh(f->fss_file->start_usec);
+ f->fss_interval_usec = le64toh(f->fss_file->interval_usec);
+
+ f->fsprg_state = (uint8_t*) f->fss_file + le64toh(f->fss_file->header_size);
+ f->fsprg_state_size = le64toh(f->fss_file->fsprg_state_size);
+
+ return 0;
+}
+
+int journal_file_hmac_setup(JournalFile *f) {
+ gcry_error_t e;
+
+ if (!JOURNAL_HEADER_SEALED(f->header))
+ return 0;
+
+ initialize_libgcrypt(true);
+
+ e = gcry_md_open(&f->hmac, GCRY_MD_SHA256, GCRY_MD_FLAG_HMAC);
+ if (e != 0)
+ return -EOPNOTSUPP;
+
+ return 0;
+}
+
+int journal_file_append_first_tag(JournalFile *f) {
+ uint64_t p;
+ int r;
+
+ if (!JOURNAL_HEADER_SEALED(f->header))
+ return 0;
+
+ log_debug("Calculating first tag...");
+
+ r = journal_file_hmac_put_header(f);
+ if (r < 0)
+ return r;
+
+ p = le64toh(f->header->field_hash_table_offset);
+ if (p < offsetof(Object, hash_table.items))
+ return -EINVAL;
+ p -= offsetof(Object, hash_table.items);
+
+ r = journal_file_hmac_put_object(f, OBJECT_FIELD_HASH_TABLE, NULL, p);
+ if (r < 0)
+ return r;
+
+ p = le64toh(f->header->data_hash_table_offset);
+ if (p < offsetof(Object, hash_table.items))
+ return -EINVAL;
+ p -= offsetof(Object, hash_table.items);
+
+ r = journal_file_hmac_put_object(f, OBJECT_DATA_HASH_TABLE, NULL, p);
+ if (r < 0)
+ return r;
+
+ r = journal_file_append_tag(f);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int journal_file_parse_verification_key(JournalFile *f, const char *key) {
+ _cleanup_free_ uint8_t *seed = NULL;
+ size_t seed_size;
+ const char *k;
+ unsigned long long start, interval;
+ int r;
+
+ assert(f);
+ assert(key);
+
+ seed_size = FSPRG_RECOMMENDED_SEEDLEN;
+ seed = malloc(seed_size);
+ if (!seed)
+ return -ENOMEM;
+
+ k = key;
+ for (size_t c = 0; c < seed_size; c++) {
+ int x, y;
+
+ k = skip_leading_chars(k, "-");
+
+ x = unhexchar(*k);
+ if (x < 0)
+ return -EINVAL;
+ k++;
+
+ y = unhexchar(*k);
+ if (y < 0)
+ return -EINVAL;
+ k++;
+
+ seed[c] = (uint8_t) (x * 16 + y);
+ }
+
+ if (*k != '/')
+ return -EINVAL;
+ k++;
+
+ r = sscanf(k, "%llx-%llx", &start, &interval);
+ if (r != 2)
+ return -EINVAL;
+
+ f->fsprg_seed = TAKE_PTR(seed);
+ f->fsprg_seed_size = seed_size;
+
+ f->fss_start_usec = start * interval;
+ f->fss_interval_usec = interval;
+
+ return 0;
+}
+
+bool journal_file_next_evolve_usec(JournalFile *f, usec_t *u) {
+ uint64_t epoch;
+
+ assert(f);
+ assert(u);
+
+ if (!JOURNAL_HEADER_SEALED(f->header))
+ return false;
+
+ epoch = FSPRG_GetEpoch(f->fsprg_state);
+
+ *u = (usec_t) (f->fss_start_usec + f->fss_interval_usec * epoch + f->fss_interval_usec);
+
+ return true;
+}
diff --git a/src/libsystemd/sd-journal/journal-authenticate.h b/src/libsystemd/sd-journal/journal-authenticate.h
new file mode 100644
index 0000000..e895722
--- /dev/null
+++ b/src/libsystemd/sd-journal/journal-authenticate.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+
+#include "journal-file.h"
+
+int journal_file_append_tag(JournalFile *f);
+int journal_file_maybe_append_tag(JournalFile *f, uint64_t realtime);
+int journal_file_append_first_tag(JournalFile *f);
+
+int journal_file_hmac_setup(JournalFile *f);
+int journal_file_hmac_start(JournalFile *f);
+int journal_file_hmac_put_header(JournalFile *f);
+int journal_file_hmac_put_object(JournalFile *f, ObjectType type, Object *o, uint64_t p);
+
+int journal_file_fss_load(JournalFile *f);
+int journal_file_parse_verification_key(JournalFile *f, const char *key);
+
+int journal_file_fsprg_evolve(JournalFile *f, uint64_t realtime);
+int journal_file_fsprg_seek(JournalFile *f, uint64_t epoch);
+
+bool journal_file_next_evolve_usec(JournalFile *f, usec_t *u);
diff --git a/src/libsystemd/sd-journal/journal-def.h b/src/libsystemd/sd-journal/journal-def.h
new file mode 100644
index 0000000..1b10f24
--- /dev/null
+++ b/src/libsystemd/sd-journal/journal-def.h
@@ -0,0 +1,269 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "sd-id128.h"
+
+#include "macro.h"
+#include "sparse-endian.h"
+
+/*
+ * If you change this file you probably should also change its documentation:
+ *
+ * https://systemd.io/JOURNAL_FILE_FORMAT
+ */
+
+typedef struct Header Header;
+
+typedef struct ObjectHeader ObjectHeader;
+typedef union Object Object;
+
+typedef struct DataObject DataObject;
+typedef struct FieldObject FieldObject;
+typedef struct EntryObject EntryObject;
+typedef struct HashTableObject HashTableObject;
+typedef struct EntryArrayObject EntryArrayObject;
+typedef struct TagObject TagObject;
+
+typedef struct HashItem HashItem;
+
+typedef struct FSSHeader FSSHeader;
+
+/* Object types */
+typedef enum ObjectType {
+ OBJECT_UNUSED, /* also serves as "any type" or "additional category" */
+ OBJECT_DATA,
+ OBJECT_FIELD,
+ OBJECT_ENTRY,
+ OBJECT_DATA_HASH_TABLE,
+ OBJECT_FIELD_HASH_TABLE,
+ OBJECT_ENTRY_ARRAY,
+ OBJECT_TAG,
+ _OBJECT_TYPE_MAX,
+ _OBJECT_TYPE_INVALID = -EINVAL,
+} ObjectType;
+
+/* Object flags (note that src/basic/compress.h uses the same values for the compression types) */
+enum {
+ OBJECT_COMPRESSED_XZ = 1 << 0,
+ OBJECT_COMPRESSED_LZ4 = 1 << 1,
+ OBJECT_COMPRESSED_ZSTD = 1 << 2,
+ _OBJECT_COMPRESSED_MASK = OBJECT_COMPRESSED_XZ | OBJECT_COMPRESSED_LZ4 | OBJECT_COMPRESSED_ZSTD,
+};
+
+struct ObjectHeader {
+ uint8_t type;
+ uint8_t flags;
+ uint8_t reserved[6];
+ le64_t size;
+ uint8_t payload[];
+} _packed_;
+
+#define DataObject__contents { \
+ ObjectHeader object; \
+ le64_t hash; \
+ le64_t next_hash_offset; \
+ le64_t next_field_offset; \
+ le64_t entry_offset; /* the first array entry we store inline */ \
+ le64_t entry_array_offset; \
+ le64_t n_entries; \
+ union { \
+ struct { \
+ uint8_t payload[0]; \
+ } regular; \
+ struct { \
+ le32_t tail_entry_array_offset; \
+ le32_t tail_entry_array_n_entries; \
+ uint8_t payload[0]; \
+ } compact; \
+ }; \
+}
+
+struct DataObject DataObject__contents;
+struct DataObject__packed DataObject__contents _packed_;
+assert_cc(sizeof(struct DataObject) == sizeof(struct DataObject__packed));
+
+#define FieldObject__contents { \
+ ObjectHeader object; \
+ le64_t hash; \
+ le64_t next_hash_offset; \
+ le64_t head_data_offset; \
+ uint8_t payload[]; \
+}
+
+struct FieldObject FieldObject__contents;
+struct FieldObject__packed FieldObject__contents _packed_;
+assert_cc(sizeof(struct FieldObject) == sizeof(struct FieldObject__packed));
+
+#define EntryObject__contents { \
+ ObjectHeader object; \
+ le64_t seqnum; \
+ le64_t realtime; \
+ le64_t monotonic; \
+ sd_id128_t boot_id; \
+ le64_t xor_hash; \
+ union { \
+ struct { \
+ dummy_t __empty__regular; \
+ struct { \
+ le64_t object_offset; \
+ le64_t hash; \
+ } regular[]; \
+ }; \
+ struct { \
+ dummy_t __empty_compact; \
+ struct { \
+ le32_t object_offset; \
+ } compact[]; \
+ }; \
+ } items; \
+}
+
+struct EntryObject EntryObject__contents;
+struct EntryObject__packed EntryObject__contents _packed_;
+assert_cc(sizeof(struct EntryObject) == sizeof(struct EntryObject__packed));
+
+struct HashItem {
+ le64_t head_hash_offset;
+ le64_t tail_hash_offset;
+} _packed_;
+
+struct HashTableObject {
+ ObjectHeader object;
+ HashItem items[];
+} _packed_;
+
+struct EntryArrayObject {
+ ObjectHeader object;
+ le64_t next_entry_array_offset;
+ union {
+ DECLARE_FLEX_ARRAY(le64_t, regular);
+ DECLARE_FLEX_ARRAY(le32_t, compact);
+ } items;
+} _packed_;
+
+#define TAG_LENGTH (256/8)
+
+struct TagObject {
+ ObjectHeader object;
+ le64_t seqnum;
+ le64_t epoch;
+ uint8_t tag[TAG_LENGTH]; /* SHA-256 HMAC */
+} _packed_;
+
+union Object {
+ ObjectHeader object;
+ DataObject data;
+ FieldObject field;
+ EntryObject entry;
+ HashTableObject hash_table;
+ EntryArrayObject entry_array;
+ TagObject tag;
+};
+
+enum {
+ STATE_OFFLINE = 0,
+ STATE_ONLINE = 1,
+ STATE_ARCHIVED = 2,
+ _STATE_MAX
+};
+
+/* Header flags */
+enum {
+ HEADER_INCOMPATIBLE_COMPRESSED_XZ = 1 << 0,
+ HEADER_INCOMPATIBLE_COMPRESSED_LZ4 = 1 << 1,
+ HEADER_INCOMPATIBLE_KEYED_HASH = 1 << 2,
+ HEADER_INCOMPATIBLE_COMPRESSED_ZSTD = 1 << 3,
+ HEADER_INCOMPATIBLE_COMPACT = 1 << 4,
+
+ HEADER_INCOMPATIBLE_ANY = HEADER_INCOMPATIBLE_COMPRESSED_XZ |
+ HEADER_INCOMPATIBLE_COMPRESSED_LZ4 |
+ HEADER_INCOMPATIBLE_KEYED_HASH |
+ HEADER_INCOMPATIBLE_COMPRESSED_ZSTD |
+ HEADER_INCOMPATIBLE_COMPACT,
+
+ HEADER_INCOMPATIBLE_SUPPORTED = (HAVE_XZ ? HEADER_INCOMPATIBLE_COMPRESSED_XZ : 0) |
+ (HAVE_LZ4 ? HEADER_INCOMPATIBLE_COMPRESSED_LZ4 : 0) |
+ (HAVE_ZSTD ? HEADER_INCOMPATIBLE_COMPRESSED_ZSTD : 0) |
+ HEADER_INCOMPATIBLE_KEYED_HASH |
+ HEADER_INCOMPATIBLE_COMPACT,
+};
+
+
+enum {
+ HEADER_COMPATIBLE_SEALED = 1 << 0,
+ HEADER_COMPATIBLE_TAIL_ENTRY_BOOT_ID = 1 << 1, /* if set, the last_entry_boot_id field in the header is exclusively refreshed when an entry is appended */
+ HEADER_COMPATIBLE_SEALED_CONTINUOUS = 1 << 2,
+ HEADER_COMPATIBLE_ANY = HEADER_COMPATIBLE_SEALED |
+ HEADER_COMPATIBLE_TAIL_ENTRY_BOOT_ID |
+ HEADER_COMPATIBLE_SEALED_CONTINUOUS,
+
+ HEADER_COMPATIBLE_SUPPORTED = (HAVE_GCRYPT ? HEADER_COMPATIBLE_SEALED | HEADER_COMPATIBLE_SEALED_CONTINUOUS : 0) |
+ HEADER_COMPATIBLE_TAIL_ENTRY_BOOT_ID,
+};
+
+
+#define HEADER_SIGNATURE \
+ ((const uint8_t[]) { 'L', 'P', 'K', 'S', 'H', 'H', 'R', 'H' })
+
+#define struct_Header__contents { \
+ uint8_t signature[8]; /* "LPKSHHRH" */ \
+ le32_t compatible_flags; \
+ le32_t incompatible_flags; \
+ uint8_t state; \
+ uint8_t reserved[7]; \
+ sd_id128_t file_id; \
+ sd_id128_t machine_id; \
+ sd_id128_t tail_entry_boot_id; \
+ sd_id128_t seqnum_id; \
+ le64_t header_size; \
+ le64_t arena_size; \
+ le64_t data_hash_table_offset; \
+ le64_t data_hash_table_size; \
+ le64_t field_hash_table_offset; \
+ le64_t field_hash_table_size; \
+ le64_t tail_object_offset; \
+ le64_t n_objects; \
+ le64_t n_entries; \
+ le64_t tail_entry_seqnum; \
+ le64_t head_entry_seqnum; \
+ le64_t entry_array_offset; \
+ le64_t head_entry_realtime; \
+ le64_t tail_entry_realtime; \
+ le64_t tail_entry_monotonic; \
+ /* Added in 187 */ \
+ le64_t n_data; \
+ le64_t n_fields; \
+ /* Added in 189 */ \
+ le64_t n_tags; \
+ le64_t n_entry_arrays; \
+ /* Added in 246 */ \
+ le64_t data_hash_chain_depth; \
+ le64_t field_hash_chain_depth; \
+ /* Added in 252 */ \
+ le32_t tail_entry_array_offset; \
+ le32_t tail_entry_array_n_entries; \
+ /* Added in 254 */ \
+ le64_t tail_entry_offset; \
+ }
+
+struct Header struct_Header__contents;
+struct Header__packed struct_Header__contents _packed_;
+assert_cc(sizeof(struct Header) == sizeof(struct Header__packed));
+assert_cc(sizeof(struct Header) == 272);
+
+#define FSS_HEADER_SIGNATURE \
+ ((const char[]) { 'K', 'S', 'H', 'H', 'R', 'H', 'L', 'P' })
+
+struct FSSHeader {
+ uint8_t signature[8]; /* "KSHHRHLP" */
+ le32_t compatible_flags;
+ le32_t incompatible_flags;
+ sd_id128_t machine_id;
+ sd_id128_t boot_id; /* last writer */
+ le64_t header_size;
+ le64_t start_usec;
+ le64_t interval_usec;
+ le16_t fsprg_secpar;
+ le16_t reserved[3];
+ le64_t fsprg_state_size;
+} _packed_;
diff --git a/src/libsystemd/sd-journal/journal-file.c b/src/libsystemd/sd-journal/journal-file.c
new file mode 100644
index 0000000..d2493a0
--- /dev/null
+++ b/src/libsystemd/sd-journal/journal-file.c
@@ -0,0 +1,4696 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/fs.h>
+#include <linux/magic.h>
+#include <pthread.h>
+#include <stddef.h>
+#include <sys/mman.h>
+#include <sys/statvfs.h>
+#include <sys/uio.h>
+#include <unistd.h>
+
+#include "sd-event.h"
+
+#include "alloc-util.h"
+#include "chattr-util.h"
+#include "compress.h"
+#include "env-util.h"
+#include "fd-util.h"
+#include "format-util.h"
+#include "fs-util.h"
+#include "id128-util.h"
+#include "journal-authenticate.h"
+#include "journal-def.h"
+#include "journal-file.h"
+#include "journal-internal.h"
+#include "lookup3.h"
+#include "memory-util.h"
+#include "missing_threads.h"
+#include "path-util.h"
+#include "prioq.h"
+#include "random-util.h"
+#include "set.h"
+#include "sort-util.h"
+#include "stat-util.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "strv.h"
+#include "sync-util.h"
+#include "user-util.h"
+#include "xattr-util.h"
+
+#define DEFAULT_DATA_HASH_TABLE_SIZE (2047ULL*sizeof(HashItem))
+#define DEFAULT_FIELD_HASH_TABLE_SIZE (333ULL*sizeof(HashItem))
+
+#define DEFAULT_COMPRESS_THRESHOLD (512ULL)
+#define MIN_COMPRESS_THRESHOLD (8ULL)
+
+#define U64_KB UINT64_C(1024)
+#define U64_MB (UINT64_C(1024) * U64_KB)
+#define U64_GB (UINT64_C(1024) * U64_MB)
+
+/* This is the minimum journal file size */
+#define JOURNAL_FILE_SIZE_MIN (512 * U64_KB) /* 512 KiB */
+#define JOURNAL_COMPACT_SIZE_MAX ((uint64_t) UINT32_MAX) /* 4 GiB */
+
+/* These are the lower and upper bounds if we deduce the max_use value from the file system size */
+#define MAX_USE_LOWER (1 * U64_MB) /* 1 MiB */
+#define MAX_USE_UPPER (4 * U64_GB) /* 4 GiB */
+
+/* Those are the lower and upper bounds for the minimal use limit,
+ * i.e. how much we'll use even if keep_free suggests otherwise. */
+#define MIN_USE_LOW (1 * U64_MB) /* 1 MiB */
+#define MIN_USE_HIGH (16 * U64_MB) /* 16 MiB */
+
+/* This is the upper bound if we deduce max_size from max_use */
+#define MAX_SIZE_UPPER (128 * U64_MB) /* 128 MiB */
+
+/* This is the upper bound if we deduce the keep_free value from the file system size */
+#define KEEP_FREE_UPPER (4 * U64_GB) /* 4 GiB */
+
+/* This is the keep_free value when we can't determine the system size */
+#define DEFAULT_KEEP_FREE (1 * U64_MB) /* 1 MB */
+
+/* This is the default maximum number of journal files to keep around. */
+#define DEFAULT_N_MAX_FILES 100
+
+/* n_data was the first entry we added after the initial file format design */
+#define HEADER_SIZE_MIN ALIGN64(offsetof(Header, n_data))
+
+/* How many entries to keep in the entry array chain cache at max */
+#define CHAIN_CACHE_MAX 20
+
+/* How much to increase the journal file size at once each time we allocate something new. */
+#define FILE_SIZE_INCREASE (8 * U64_MB) /* 8MB */
+
+/* Reread fstat() of the file for detecting deletions at least this often */
+#define LAST_STAT_REFRESH_USEC (5*USEC_PER_SEC)
+
+/* Longest hash chain to rotate after */
+#define HASH_CHAIN_DEPTH_MAX 100
+
+#ifdef __clang__
+# pragma GCC diagnostic ignored "-Waddress-of-packed-member"
+#endif
+
+static int mmap_prot_from_open_flags(int flags) {
+ switch (flags & O_ACCMODE) {
+ case O_RDONLY:
+ return PROT_READ;
+ case O_WRONLY:
+ return PROT_WRITE;
+ case O_RDWR:
+ return PROT_READ|PROT_WRITE;
+ default:
+ assert_not_reached();
+ }
+}
+
+int journal_file_tail_end_by_pread(JournalFile *f, uint64_t *ret_offset) {
+ uint64_t p;
+ int r;
+
+ assert(f);
+ assert(f->header);
+ assert(ret_offset);
+
+ /* Same as journal_file_tail_end_by_mmap() below, but operates with pread() to avoid the mmap cache
+ * (and thus is thread safe) */
+
+ p = le64toh(f->header->tail_object_offset);
+ if (p == 0)
+ p = le64toh(f->header->header_size);
+ else {
+ Object tail;
+ uint64_t sz;
+
+ r = journal_file_read_object_header(f, OBJECT_UNUSED, p, &tail);
+ if (r < 0)
+ return r;
+
+ sz = le64toh(tail.object.size);
+ if (sz > UINT64_MAX - sizeof(uint64_t) + 1)
+ return -EBADMSG;
+
+ sz = ALIGN64(sz);
+ if (p > UINT64_MAX - sz)
+ return -EBADMSG;
+
+ p += sz;
+ }
+
+ *ret_offset = p;
+
+ return 0;
+}
+
+int journal_file_tail_end_by_mmap(JournalFile *f, uint64_t *ret_offset) {
+ uint64_t p;
+ int r;
+
+ assert(f);
+ assert(f->header);
+ assert(ret_offset);
+
+ /* Same as journal_file_tail_end_by_pread() above, but operates with the usual mmap logic */
+
+ p = le64toh(f->header->tail_object_offset);
+ if (p == 0)
+ p = le64toh(f->header->header_size);
+ else {
+ Object *tail;
+ uint64_t sz;
+
+ r = journal_file_move_to_object(f, OBJECT_UNUSED, p, &tail);
+ if (r < 0)
+ return r;
+
+ sz = le64toh(READ_NOW(tail->object.size));
+ if (sz > UINT64_MAX - sizeof(uint64_t) + 1)
+ return -EBADMSG;
+
+ sz = ALIGN64(sz);
+ if (p > UINT64_MAX - sz)
+ return -EBADMSG;
+
+ p += sz;
+ }
+
+ *ret_offset = p;
+
+ return 0;
+}
+
+int journal_file_set_offline_thread_join(JournalFile *f) {
+ int r;
+
+ assert(f);
+
+ if (f->offline_state == OFFLINE_JOINED)
+ return 0;
+
+ r = pthread_join(f->offline_thread, NULL);
+ if (r)
+ return -r;
+
+ f->offline_state = OFFLINE_JOINED;
+
+ if (mmap_cache_fd_got_sigbus(f->cache_fd))
+ return -EIO;
+
+ return 0;
+}
+
+static int journal_file_set_online(JournalFile *f) {
+ bool wait = true;
+
+ assert(f);
+
+ if (!journal_file_writable(f))
+ return -EPERM;
+
+ if (f->fd < 0 || !f->header)
+ return -EINVAL;
+
+ while (wait) {
+ switch (f->offline_state) {
+ case OFFLINE_JOINED:
+ /* No offline thread, no need to wait. */
+ wait = false;
+ break;
+
+ case OFFLINE_SYNCING: {
+ OfflineState tmp_state = OFFLINE_SYNCING;
+ if (!__atomic_compare_exchange_n(&f->offline_state, &tmp_state, OFFLINE_CANCEL,
+ false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST))
+ continue;
+ }
+ /* Canceled syncing prior to offlining, no need to wait. */
+ wait = false;
+ break;
+
+ case OFFLINE_AGAIN_FROM_SYNCING: {
+ OfflineState tmp_state = OFFLINE_AGAIN_FROM_SYNCING;
+ if (!__atomic_compare_exchange_n(&f->offline_state, &tmp_state, OFFLINE_CANCEL,
+ false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST))
+ continue;
+ }
+ /* Canceled restart from syncing, no need to wait. */
+ wait = false;
+ break;
+
+ case OFFLINE_AGAIN_FROM_OFFLINING: {
+ OfflineState tmp_state = OFFLINE_AGAIN_FROM_OFFLINING;
+ if (!__atomic_compare_exchange_n(&f->offline_state, &tmp_state, OFFLINE_CANCEL,
+ false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST))
+ continue;
+ }
+ /* Canceled restart from offlining, must wait for offlining to complete however. */
+ _fallthrough_;
+ default: {
+ int r;
+
+ r = journal_file_set_offline_thread_join(f);
+ if (r < 0)
+ return r;
+
+ wait = false;
+ break;
+ }
+ }
+ }
+
+ if (mmap_cache_fd_got_sigbus(f->cache_fd))
+ return -EIO;
+
+ switch (f->header->state) {
+ case STATE_ONLINE:
+ return 0;
+
+ case STATE_OFFLINE:
+ f->header->state = STATE_ONLINE;
+ (void) fsync(f->fd);
+ return 0;
+
+ default:
+ return -EINVAL;
+ }
+}
+
+JournalFile* journal_file_close(JournalFile *f) {
+ if (!f)
+ return NULL;
+
+ assert(f->newest_boot_id_prioq_idx == PRIOQ_IDX_NULL);
+
+ if (f->cache_fd)
+ mmap_cache_fd_free(f->cache_fd);
+
+ if (f->close_fd)
+ safe_close(f->fd);
+ free(f->path);
+
+ ordered_hashmap_free_free(f->chain_cache);
+
+#if HAVE_COMPRESSION
+ free(f->compress_buffer);
+#endif
+
+#if HAVE_GCRYPT
+ if (f->fss_file) {
+ size_t sz = PAGE_ALIGN(f->fss_file_size);
+ assert(sz < SIZE_MAX);
+ munmap(f->fss_file, sz);
+ } else
+ free(f->fsprg_state);
+
+ free(f->fsprg_seed);
+
+ if (f->hmac)
+ gcry_md_close(f->hmac);
+#endif
+
+ return mfree(f);
+}
+
+static bool keyed_hash_requested(void) {
+ static thread_local int cached = -1;
+ int r;
+
+ if (cached < 0) {
+ r = getenv_bool("SYSTEMD_JOURNAL_KEYED_HASH");
+ if (r < 0) {
+ if (r != -ENXIO)
+ log_debug_errno(r, "Failed to parse $SYSTEMD_JOURNAL_KEYED_HASH environment variable, ignoring: %m");
+ cached = true;
+ } else
+ cached = r;
+ }
+
+ return cached;
+}
+
+static bool compact_mode_requested(void) {
+ static thread_local int cached = -1;
+ int r;
+
+ if (cached < 0) {
+ r = getenv_bool("SYSTEMD_JOURNAL_COMPACT");
+ if (r < 0) {
+ if (r != -ENXIO)
+ log_debug_errno(r, "Failed to parse $SYSTEMD_JOURNAL_COMPACT environment variable, ignoring: %m");
+ cached = true;
+ } else
+ cached = r;
+ }
+
+ return cached;
+}
+
+#if HAVE_COMPRESSION
+static Compression getenv_compression(void) {
+ Compression c;
+ const char *e;
+ int r;
+
+ e = getenv("SYSTEMD_JOURNAL_COMPRESS");
+ if (!e)
+ return DEFAULT_COMPRESSION;
+
+ r = parse_boolean(e);
+ if (r >= 0)
+ return r ? DEFAULT_COMPRESSION : COMPRESSION_NONE;
+
+ c = compression_from_string(e);
+ if (c < 0) {
+ log_debug_errno(c, "Failed to parse SYSTEMD_JOURNAL_COMPRESS value, ignoring: %s", e);
+ return DEFAULT_COMPRESSION;
+ }
+
+ if (!compression_supported(c)) {
+ log_debug("Unsupported compression algorithm specified, ignoring: %s", e);
+ return DEFAULT_COMPRESSION;
+ }
+
+ return c;
+}
+#endif
+
+static Compression compression_requested(void) {
+#if HAVE_COMPRESSION
+ static thread_local Compression cached = _COMPRESSION_INVALID;
+
+ if (cached < 0)
+ cached = getenv_compression();
+
+ return cached;
+#else
+ return COMPRESSION_NONE;
+#endif
+}
+
+static int journal_file_init_header(
+ JournalFile *f,
+ JournalFileFlags file_flags,
+ JournalFile *template) {
+
+ bool seal = false;
+ ssize_t k;
+ int r;
+
+ assert(f);
+
+#if HAVE_GCRYPT
+ /* Try to load the FSPRG state, and if we can't, then just don't do sealing */
+ seal = FLAGS_SET(file_flags, JOURNAL_SEAL) && journal_file_fss_load(f) >= 0;
+#endif
+
+ Header h = {
+ .header_size = htole64(ALIGN64(sizeof(h))),
+ .incompatible_flags = htole32(
+ FLAGS_SET(file_flags, JOURNAL_COMPRESS) * COMPRESSION_TO_HEADER_INCOMPATIBLE_FLAG(compression_requested()) |
+ keyed_hash_requested() * HEADER_INCOMPATIBLE_KEYED_HASH |
+ compact_mode_requested() * HEADER_INCOMPATIBLE_COMPACT),
+ .compatible_flags = htole32(
+ (seal * (HEADER_COMPATIBLE_SEALED | HEADER_COMPATIBLE_SEALED_CONTINUOUS) ) |
+ HEADER_COMPATIBLE_TAIL_ENTRY_BOOT_ID),
+ };
+
+ assert_cc(sizeof(h.signature) == sizeof(HEADER_SIGNATURE));
+ memcpy(h.signature, HEADER_SIGNATURE, sizeof(HEADER_SIGNATURE));
+
+ r = sd_id128_randomize(&h.file_id);
+ if (r < 0)
+ return r;
+
+ r = sd_id128_get_machine(&h.machine_id);
+ if (r < 0 && !ERRNO_IS_MACHINE_ID_UNSET(r))
+ return r; /* If we have no valid machine ID (test environment?), let's simply leave the
+ * machine ID field all zeroes. */
+
+ if (template) {
+ h.seqnum_id = template->header->seqnum_id;
+ h.tail_entry_seqnum = template->header->tail_entry_seqnum;
+ } else
+ h.seqnum_id = h.file_id;
+
+ k = pwrite(f->fd, &h, sizeof(h), 0);
+ if (k < 0)
+ return -errno;
+ if (k != sizeof(h))
+ return -EIO;
+
+ return 0;
+}
+
+static int journal_file_refresh_header(JournalFile *f) {
+ int r;
+
+ assert(f);
+ assert(f->header);
+
+ /* We used to update the header's boot ID field here, but we don't do that anymore, as per
+ * HEADER_COMPATIBLE_TAIL_ENTRY_BOOT_ID */
+
+ r = journal_file_set_online(f);
+
+ /* Sync the online state to disk; likely just created a new file, also sync the directory this file
+ * is located in. */
+ (void) fsync_full(f->fd);
+
+ return r;
+}
+
+static bool warn_wrong_flags(const JournalFile *f, bool compatible) {
+ const uint32_t any = compatible ? HEADER_COMPATIBLE_ANY : HEADER_INCOMPATIBLE_ANY,
+ supported = compatible ? HEADER_COMPATIBLE_SUPPORTED : HEADER_INCOMPATIBLE_SUPPORTED;
+ const char *type = compatible ? "compatible" : "incompatible";
+ uint32_t flags;
+
+ assert(f);
+ assert(f->header);
+
+ flags = le32toh(compatible ? f->header->compatible_flags : f->header->incompatible_flags);
+
+ if (flags & ~supported) {
+ if (flags & ~any)
+ log_debug("Journal file %s has unknown %s flags 0x%"PRIx32,
+ f->path, type, flags & ~any);
+ flags = (flags & any) & ~supported;
+ if (flags) {
+ const char* strv[6];
+ size_t n = 0;
+ _cleanup_free_ char *t = NULL;
+
+ if (compatible) {
+ if (flags & HEADER_COMPATIBLE_SEALED)
+ strv[n++] = "sealed";
+ if (flags & HEADER_COMPATIBLE_SEALED_CONTINUOUS)
+ strv[n++] = "sealed-continuous";
+ } else {
+ if (flags & HEADER_INCOMPATIBLE_COMPRESSED_XZ)
+ strv[n++] = "xz-compressed";
+ if (flags & HEADER_INCOMPATIBLE_COMPRESSED_LZ4)
+ strv[n++] = "lz4-compressed";
+ if (flags & HEADER_INCOMPATIBLE_COMPRESSED_ZSTD)
+ strv[n++] = "zstd-compressed";
+ if (flags & HEADER_INCOMPATIBLE_KEYED_HASH)
+ strv[n++] = "keyed-hash";
+ if (flags & HEADER_INCOMPATIBLE_COMPACT)
+ strv[n++] = "compact";
+ }
+ strv[n] = NULL;
+ assert(n < ELEMENTSOF(strv));
+
+ t = strv_join((char**) strv, ", ");
+ log_debug("Journal file %s uses %s %s %s disabled at compilation time.",
+ f->path, type, n > 1 ? "flags" : "flag", strnull(t));
+ }
+ return true;
+ }
+
+ return false;
+}
+
+static bool offset_is_valid(uint64_t offset, uint64_t header_size, uint64_t tail_object_offset) {
+ if (offset == 0)
+ return true;
+ if (!VALID64(offset))
+ return false;
+ if (offset < header_size)
+ return false;
+ if (offset > tail_object_offset)
+ return false;
+ return true;
+}
+
+static bool hash_table_is_valid(uint64_t offset, uint64_t size, uint64_t header_size, uint64_t arena_size, uint64_t tail_object_offset) {
+ if ((offset == 0) != (size == 0))
+ return false;
+ if (offset == 0)
+ return true;
+ if (offset <= offsetof(Object, hash_table.items))
+ return false;
+ offset -= offsetof(Object, hash_table.items);
+ if (!offset_is_valid(offset, header_size, tail_object_offset))
+ return false;
+ assert(offset <= header_size + arena_size);
+ if (size > header_size + arena_size - offset)
+ return false;
+ return true;
+}
+
+static int journal_file_verify_header(JournalFile *f) {
+ uint64_t arena_size, header_size;
+
+ assert(f);
+ assert(f->header);
+
+ if (memcmp(f->header->signature, HEADER_SIGNATURE, 8))
+ return -EBADMSG;
+
+ /* In both read and write mode we refuse to open files with incompatible
+ * flags we don't know. */
+ if (warn_wrong_flags(f, false))
+ return -EPROTONOSUPPORT;
+
+ /* When open for writing we refuse to open files with compatible flags, too. */
+ if (journal_file_writable(f) && warn_wrong_flags(f, true))
+ return -EPROTONOSUPPORT;
+
+ if (f->header->state >= _STATE_MAX)
+ return -EBADMSG;
+
+ header_size = le64toh(READ_NOW(f->header->header_size));
+
+ /* The first addition was n_data, so check that we are at least this large */
+ if (header_size < HEADER_SIZE_MIN)
+ return -EBADMSG;
+
+ /* When open for writing we refuse to open files with a mismatch of the header size, i.e. writing to
+ * files implementing older or new header structures. */
+ if (journal_file_writable(f) && header_size != sizeof(Header))
+ return -EPROTONOSUPPORT;
+
+ /* Don't write to journal files without the new boot ID update behavior guarantee. */
+ if (journal_file_writable(f) && !JOURNAL_HEADER_TAIL_ENTRY_BOOT_ID(f->header))
+ return -EPROTONOSUPPORT;
+
+ if (JOURNAL_HEADER_SEALED(f->header) && !JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays))
+ return -EBADMSG;
+
+ arena_size = le64toh(READ_NOW(f->header->arena_size));
+
+ if (UINT64_MAX - header_size < arena_size || header_size + arena_size > (uint64_t) f->last_stat.st_size)
+ return -ENODATA;
+
+ uint64_t tail_object_offset = le64toh(f->header->tail_object_offset);
+ if (!offset_is_valid(tail_object_offset, header_size, UINT64_MAX))
+ return -ENODATA;
+ if (header_size + arena_size < tail_object_offset)
+ return -ENODATA;
+ if (header_size + arena_size - tail_object_offset < sizeof(ObjectHeader))
+ return -ENODATA;
+
+ if (!hash_table_is_valid(le64toh(f->header->data_hash_table_offset),
+ le64toh(f->header->data_hash_table_size),
+ header_size, arena_size, tail_object_offset))
+ return -ENODATA;
+
+ if (!hash_table_is_valid(le64toh(f->header->field_hash_table_offset),
+ le64toh(f->header->field_hash_table_size),
+ header_size, arena_size, tail_object_offset))
+ return -ENODATA;
+
+ uint64_t entry_array_offset = le64toh(f->header->entry_array_offset);
+ if (!offset_is_valid(entry_array_offset, header_size, tail_object_offset))
+ return -ENODATA;
+
+ if (JOURNAL_HEADER_CONTAINS(f->header, tail_entry_array_offset)) {
+ uint32_t offset = le32toh(f->header->tail_entry_array_offset);
+ uint32_t n = le32toh(f->header->tail_entry_array_n_entries);
+
+ if (!offset_is_valid(offset, header_size, tail_object_offset))
+ return -ENODATA;
+ if (entry_array_offset > offset)
+ return -ENODATA;
+ if (entry_array_offset == 0 && offset != 0)
+ return -ENODATA;
+ if ((offset == 0) != (n == 0))
+ return -ENODATA;
+ assert(offset <= header_size + arena_size);
+ if ((uint64_t) n * journal_file_entry_array_item_size(f) > header_size + arena_size - offset)
+ return -ENODATA;
+ }
+
+ if (JOURNAL_HEADER_CONTAINS(f->header, tail_entry_offset)) {
+ uint64_t offset = le64toh(f->header->tail_entry_offset);
+
+ if (!offset_is_valid(offset, header_size, tail_object_offset))
+ return -ENODATA;
+
+ if (offset > 0) {
+ /* When there is an entry object, then these fields must be filled. */
+ if (sd_id128_is_null(f->header->tail_entry_boot_id))
+ return -ENODATA;
+ if (!VALID_REALTIME(le64toh(f->header->head_entry_realtime)))
+ return -ENODATA;
+ if (!VALID_REALTIME(le64toh(f->header->tail_entry_realtime)))
+ return -ENODATA;
+ if (!VALID_MONOTONIC(le64toh(f->header->tail_entry_realtime)))
+ return -ENODATA;
+ } else {
+ /* Otherwise, the fields must be zero. */
+ if (JOURNAL_HEADER_TAIL_ENTRY_BOOT_ID(f->header) &&
+ !sd_id128_is_null(f->header->tail_entry_boot_id))
+ return -ENODATA;
+ if (f->header->head_entry_realtime != 0)
+ return -ENODATA;
+ if (f->header->tail_entry_realtime != 0)
+ return -ENODATA;
+ if (f->header->tail_entry_realtime != 0)
+ return -ENODATA;
+ }
+ }
+
+ /* Verify number of objects */
+ uint64_t n_objects = le64toh(f->header->n_objects);
+ if (n_objects > arena_size / sizeof(ObjectHeader))
+ return -ENODATA;
+
+ uint64_t n_entries = le64toh(f->header->n_entries);
+ if (n_entries > n_objects)
+ return -ENODATA;
+
+ if (JOURNAL_HEADER_CONTAINS(f->header, n_data) &&
+ le64toh(f->header->n_data) > n_objects)
+ return -ENODATA;
+
+ if (JOURNAL_HEADER_CONTAINS(f->header, n_fields) &&
+ le64toh(f->header->n_fields) > n_objects)
+ return -ENODATA;
+
+ if (JOURNAL_HEADER_CONTAINS(f->header, n_tags) &&
+ le64toh(f->header->n_tags) > n_objects)
+ return -ENODATA;
+
+ if (JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays) &&
+ le64toh(f->header->n_entry_arrays) > n_objects)
+ return -ENODATA;
+
+ if (JOURNAL_HEADER_CONTAINS(f->header, tail_entry_array_n_entries) &&
+ le32toh(f->header->tail_entry_array_n_entries) > n_entries)
+ return -ENODATA;
+
+ if (journal_file_writable(f)) {
+ sd_id128_t machine_id;
+ uint8_t state;
+ int r;
+
+ r = sd_id128_get_machine(&machine_id);
+ if (ERRNO_IS_NEG_MACHINE_ID_UNSET(r)) /* Gracefully handle the machine ID not being initialized yet */
+ machine_id = SD_ID128_NULL;
+ else if (r < 0)
+ return r;
+
+ if (!sd_id128_equal(machine_id, f->header->machine_id))
+ return log_debug_errno(SYNTHETIC_ERRNO(EHOSTDOWN),
+ "Trying to open journal file from different host for writing, refusing.");
+
+ state = f->header->state;
+
+ if (state == STATE_ARCHIVED)
+ return -ESHUTDOWN; /* Already archived */
+ if (state == STATE_ONLINE)
+ return log_debug_errno(SYNTHETIC_ERRNO(EBUSY),
+ "Journal file %s is already online. Assuming unclean closing.",
+ f->path);
+ if (state != STATE_OFFLINE)
+ return log_debug_errno(SYNTHETIC_ERRNO(EBUSY),
+ "Journal file %s has unknown state %i.",
+ f->path, state);
+
+ if (f->header->field_hash_table_size == 0 || f->header->data_hash_table_size == 0)
+ return -EBADMSG;
+ }
+
+ return 0;
+}
+
+int journal_file_fstat(JournalFile *f) {
+ int r;
+
+ assert(f);
+ assert(f->fd >= 0);
+
+ if (fstat(f->fd, &f->last_stat) < 0)
+ return -errno;
+
+ f->last_stat_usec = now(CLOCK_MONOTONIC);
+
+ /* Refuse dealing with files that aren't regular */
+ r = stat_verify_regular(&f->last_stat);
+ if (r < 0)
+ return r;
+
+ /* Refuse appending to files that are already deleted */
+ if (f->last_stat.st_nlink <= 0)
+ return -EIDRM;
+
+ return 0;
+}
+
+static int journal_file_allocate(JournalFile *f, uint64_t offset, uint64_t size) {
+ uint64_t old_size, new_size, old_header_size, old_arena_size;
+ int r;
+
+ assert(f);
+ assert(f->header);
+
+ /* We assume that this file is not sparse, and we know that for sure, since we always call
+ * posix_fallocate() ourselves */
+
+ if (size > PAGE_ALIGN_DOWN_U64(UINT64_MAX) - offset)
+ return -EINVAL;
+
+ if (mmap_cache_fd_got_sigbus(f->cache_fd))
+ return -EIO;
+
+ old_header_size = le64toh(READ_NOW(f->header->header_size));
+ old_arena_size = le64toh(READ_NOW(f->header->arena_size));
+ if (old_arena_size > PAGE_ALIGN_DOWN_U64(UINT64_MAX) - old_header_size)
+ return -EBADMSG;
+
+ old_size = old_header_size + old_arena_size;
+
+ new_size = MAX(PAGE_ALIGN_U64(offset + size), old_header_size);
+
+ if (new_size <= old_size) {
+
+ /* We already pre-allocated enough space, but before
+ * we write to it, let's check with fstat() if the
+ * file got deleted, in order make sure we don't throw
+ * away the data immediately. Don't check fstat() for
+ * all writes though, but only once ever 10s. */
+
+ if (f->last_stat_usec + LAST_STAT_REFRESH_USEC > now(CLOCK_MONOTONIC))
+ return 0;
+
+ return journal_file_fstat(f);
+ }
+
+ /* Allocate more space. */
+
+ if (f->metrics.max_size > 0 && new_size > f->metrics.max_size)
+ return -E2BIG;
+
+ /* Refuse to go over 4G in compact mode so offsets can be stored in 32-bit. */
+ if (JOURNAL_HEADER_COMPACT(f->header) && new_size > UINT32_MAX)
+ return -E2BIG;
+
+ if (new_size > f->metrics.min_size && f->metrics.keep_free > 0) {
+ struct statvfs svfs;
+
+ if (fstatvfs(f->fd, &svfs) >= 0) {
+ uint64_t available;
+
+ available = LESS_BY(u64_multiply_safe(svfs.f_bfree, svfs.f_bsize), f->metrics.keep_free);
+
+ if (new_size - old_size > available)
+ return -E2BIG;
+ }
+ }
+
+ /* Increase by larger blocks at once */
+ new_size = ROUND_UP(new_size, FILE_SIZE_INCREASE);
+ if (f->metrics.max_size > 0 && new_size > f->metrics.max_size)
+ new_size = f->metrics.max_size;
+
+ /* Note that the glibc fallocate() fallback is very
+ inefficient, hence we try to minimize the allocation area
+ as we can. */
+ r = posix_fallocate_loop(f->fd, old_size, new_size - old_size);
+ if (r < 0)
+ return r;
+
+ f->header->arena_size = htole64(new_size - old_header_size);
+
+ return journal_file_fstat(f);
+}
+
+static int journal_file_move_to(
+ JournalFile *f,
+ ObjectType type,
+ bool keep_always,
+ uint64_t offset,
+ uint64_t size,
+ void **ret) {
+
+ int r;
+
+ assert(f);
+ assert(ret);
+
+ /* This function may clear, overwrite, or alter previously cached entries with the same type. After
+ * this function has been called, all previously read objects with the same type may be invalidated,
+ * hence must be re-read before use. */
+
+ if (size <= 0)
+ return -EINVAL;
+
+ if (size > UINT64_MAX - offset)
+ return -EBADMSG;
+
+ /* Avoid SIGBUS on invalid accesses */
+ if (offset + size > (uint64_t) f->last_stat.st_size) {
+ /* Hmm, out of range? Let's refresh the fstat() data
+ * first, before we trust that check. */
+
+ r = journal_file_fstat(f);
+ if (r < 0)
+ return r;
+
+ if (offset + size > (uint64_t) f->last_stat.st_size)
+ return -EADDRNOTAVAIL;
+ }
+
+ return mmap_cache_fd_get(f->cache_fd, type_to_category(type), keep_always, offset, size, &f->last_stat, ret);
+}
+
+static uint64_t minimum_header_size(JournalFile *f, Object *o) {
+
+ static const uint64_t table[] = {
+ [OBJECT_DATA] = sizeof(DataObject),
+ [OBJECT_FIELD] = sizeof(FieldObject),
+ [OBJECT_ENTRY] = sizeof(EntryObject),
+ [OBJECT_DATA_HASH_TABLE] = sizeof(HashTableObject),
+ [OBJECT_FIELD_HASH_TABLE] = sizeof(HashTableObject),
+ [OBJECT_ENTRY_ARRAY] = sizeof(EntryArrayObject),
+ [OBJECT_TAG] = sizeof(TagObject),
+ };
+
+ assert(f);
+ assert(o);
+
+ if (o->object.type == OBJECT_DATA)
+ return journal_file_data_payload_offset(f);
+
+ if (o->object.type >= ELEMENTSOF(table) || table[o->object.type] <= 0)
+ return sizeof(ObjectHeader);
+
+ return table[o->object.type];
+}
+
+static int check_object_header(JournalFile *f, Object *o, ObjectType type, uint64_t offset) {
+ uint64_t s;
+
+ assert(f);
+ assert(o);
+
+ s = le64toh(READ_NOW(o->object.size));
+ if (s == 0)
+ return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
+ "Attempt to move to uninitialized object: %" PRIu64,
+ offset);
+
+ if (s < sizeof(ObjectHeader))
+ return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
+ "Attempt to move to overly short object with size %"PRIu64": %" PRIu64,
+ s, offset);
+
+ if (o->object.type <= OBJECT_UNUSED || o->object.type >= _OBJECT_TYPE_MAX)
+ return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
+ "Attempt to move to object with invalid type (%u): %" PRIu64,
+ o->object.type, offset);
+
+ if (type > OBJECT_UNUSED && o->object.type != type)
+ return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
+ "Found %s object while expecting %s object: %" PRIu64,
+ journal_object_type_to_string(o->object.type),
+ journal_object_type_to_string(type),
+ offset);
+
+ if (s < minimum_header_size(f, o))
+ return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
+ "Size of %s object (%"PRIu64") is smaller than the minimum object size (%"PRIu64"): %" PRIu64,
+ journal_object_type_to_string(o->object.type),
+ s,
+ minimum_header_size(f, o),
+ offset);
+
+ return 0;
+}
+
+/* Lightweight object checks. We want this to be fast, so that we won't
+ * slowdown every journal_file_move_to_object() call too much. */
+static int check_object(JournalFile *f, Object *o, uint64_t offset) {
+ assert(f);
+ assert(o);
+
+ switch (o->object.type) {
+
+ case OBJECT_DATA:
+ if ((le64toh(o->data.entry_offset) == 0) ^ (le64toh(o->data.n_entries) == 0))
+ return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
+ "Bad data n_entries: %" PRIu64 ": %" PRIu64,
+ le64toh(o->data.n_entries),
+ offset);
+
+ if (le64toh(o->object.size) <= journal_file_data_payload_offset(f))
+ return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
+ "Bad data size (<= %zu): %" PRIu64 ": %" PRIu64,
+ journal_file_data_payload_offset(f),
+ le64toh(o->object.size),
+ offset);
+
+ if (!VALID64(le64toh(o->data.next_hash_offset)) ||
+ !VALID64(le64toh(o->data.next_field_offset)) ||
+ !VALID64(le64toh(o->data.entry_offset)) ||
+ !VALID64(le64toh(o->data.entry_array_offset)))
+ return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
+ "Invalid offset, next_hash_offset=" OFSfmt ", next_field_offset=" OFSfmt ", entry_offset=" OFSfmt ", entry_array_offset=" OFSfmt ": %" PRIu64,
+ le64toh(o->data.next_hash_offset),
+ le64toh(o->data.next_field_offset),
+ le64toh(o->data.entry_offset),
+ le64toh(o->data.entry_array_offset),
+ offset);
+
+ break;
+
+ case OBJECT_FIELD:
+ if (le64toh(o->object.size) <= offsetof(Object, field.payload))
+ return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
+ "Bad field size (<= %zu): %" PRIu64 ": %" PRIu64,
+ offsetof(Object, field.payload),
+ le64toh(o->object.size),
+ offset);
+
+ if (!VALID64(le64toh(o->field.next_hash_offset)) ||
+ !VALID64(le64toh(o->field.head_data_offset)))
+ return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
+ "Invalid offset, next_hash_offset=" OFSfmt ", head_data_offset=" OFSfmt ": %" PRIu64,
+ le64toh(o->field.next_hash_offset),
+ le64toh(o->field.head_data_offset),
+ offset);
+ break;
+
+ case OBJECT_ENTRY: {
+ uint64_t sz;
+
+ sz = le64toh(READ_NOW(o->object.size));
+ if (sz < offsetof(Object, entry.items) ||
+ (sz - offsetof(Object, entry.items)) % journal_file_entry_item_size(f) != 0)
+ return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
+ "Bad entry size (<= %zu): %" PRIu64 ": %" PRIu64,
+ offsetof(Object, entry.items),
+ sz,
+ offset);
+
+ if ((sz - offsetof(Object, entry.items)) / journal_file_entry_item_size(f) <= 0)
+ return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
+ "Invalid number items in entry: %" PRIu64 ": %" PRIu64,
+ (sz - offsetof(Object, entry.items)) / journal_file_entry_item_size(f),
+ offset);
+
+ if (le64toh(o->entry.seqnum) <= 0)
+ return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
+ "Invalid entry seqnum: %" PRIx64 ": %" PRIu64,
+ le64toh(o->entry.seqnum),
+ offset);
+
+ if (!VALID_REALTIME(le64toh(o->entry.realtime)))
+ return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
+ "Invalid entry realtime timestamp: %" PRIu64 ": %" PRIu64,
+ le64toh(o->entry.realtime),
+ offset);
+
+ if (!VALID_MONOTONIC(le64toh(o->entry.monotonic)))
+ return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
+ "Invalid entry monotonic timestamp: %" PRIu64 ": %" PRIu64,
+ le64toh(o->entry.monotonic),
+ offset);
+
+ if (sd_id128_is_null(o->entry.boot_id))
+ return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
+ "Invalid object entry with an empty boot ID: %" PRIu64,
+ offset);
+
+ break;
+ }
+
+ case OBJECT_DATA_HASH_TABLE:
+ case OBJECT_FIELD_HASH_TABLE: {
+ uint64_t sz;
+
+ sz = le64toh(READ_NOW(o->object.size));
+ if (sz < offsetof(Object, hash_table.items) ||
+ (sz - offsetof(Object, hash_table.items)) % sizeof(HashItem) != 0 ||
+ (sz - offsetof(Object, hash_table.items)) / sizeof(HashItem) <= 0)
+ return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
+ "Invalid %s hash table size: %" PRIu64 ": %" PRIu64,
+ journal_object_type_to_string(o->object.type),
+ sz,
+ offset);
+
+ break;
+ }
+
+ case OBJECT_ENTRY_ARRAY: {
+ uint64_t sz, next;
+
+ sz = le64toh(READ_NOW(o->object.size));
+ if (sz < offsetof(Object, entry_array.items) ||
+ (sz - offsetof(Object, entry_array.items)) % journal_file_entry_array_item_size(f) != 0 ||
+ (sz - offsetof(Object, entry_array.items)) / journal_file_entry_array_item_size(f) <= 0)
+ return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
+ "Invalid object entry array size: %" PRIu64 ": %" PRIu64,
+ sz,
+ offset);
+ /* Here, we request that the offset of each entry array object is in strictly increasing order. */
+ next = le64toh(o->entry_array.next_entry_array_offset);
+ if (!VALID64(next) || (next > 0 && next <= offset))
+ return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
+ "Invalid object entry array next_entry_array_offset: %" PRIu64 ": %" PRIu64,
+ next,
+ offset);
+
+ break;
+ }
+
+ case OBJECT_TAG:
+ if (le64toh(o->object.size) != sizeof(TagObject))
+ return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
+ "Invalid object tag size: %" PRIu64 ": %" PRIu64,
+ le64toh(o->object.size),
+ offset);
+
+ if (!VALID_EPOCH(le64toh(o->tag.epoch)))
+ return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
+ "Invalid object tag epoch: %" PRIu64 ": %" PRIu64,
+ le64toh(o->tag.epoch), offset);
+
+ break;
+ }
+
+ return 0;
+}
+
+int journal_file_move_to_object(JournalFile *f, ObjectType type, uint64_t offset, Object **ret) {
+ int r;
+ Object *o;
+
+ assert(f);
+
+ /* Even if this function fails, it may clear, overwrite, or alter previously cached entries with the
+ * same type. After this function has been called, all previously read objects with the same type may
+ * be invalidated, hence must be re-read before use. */
+
+ /* Objects may only be located at multiple of 64 bit */
+ if (!VALID64(offset))
+ return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
+ "Attempt to move to %s object at non-64-bit boundary: %" PRIu64,
+ journal_object_type_to_string(type),
+ offset);
+
+ /* Object may not be located in the file header */
+ if (offset < le64toh(f->header->header_size))
+ return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
+ "Attempt to move to %s object located in file header: %" PRIu64,
+ journal_object_type_to_string(type),
+ offset);
+
+ r = journal_file_move_to(f, type, false, offset, sizeof(ObjectHeader), (void**) &o);
+ if (r < 0)
+ return r;
+
+ r = check_object_header(f, o, type, offset);
+ if (r < 0)
+ return r;
+
+ r = journal_file_move_to(f, type, false, offset, le64toh(READ_NOW(o->object.size)), (void**) &o);
+ if (r < 0)
+ return r;
+
+ r = check_object_header(f, o, type, offset);
+ if (r < 0)
+ return r;
+
+ r = check_object(f, o, offset);
+ if (r < 0)
+ return r;
+
+ if (ret)
+ *ret = o;
+
+ return 0;
+}
+
+int journal_file_pin_object(JournalFile *f, Object *o) {
+ assert(f);
+ assert(o);
+
+ /* This attaches the mmap window that provides the object to the 'pinning' category. So, reading
+ * another object with the same type will not invalidate the object, until this function is called
+ * for another object. */
+ return mmap_cache_fd_pin(f->cache_fd, type_to_category(o->object.type), o, le64toh(o->object.size));
+}
+
+int journal_file_read_object_header(JournalFile *f, ObjectType type, uint64_t offset, Object *ret) {
+ ssize_t n;
+ Object o;
+ int r;
+
+ assert(f);
+
+ /* Objects may only be located at multiple of 64 bit */
+ if (!VALID64(offset))
+ return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
+ "Attempt to read %s object at non-64-bit boundary: %" PRIu64,
+ journal_object_type_to_string(type), offset);
+
+ /* Object may not be located in the file header */
+ if (offset < le64toh(f->header->header_size))
+ return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
+ "Attempt to read %s object located in file header: %" PRIu64,
+ journal_object_type_to_string(type), offset);
+
+ /* This will likely read too much data but it avoids having to call pread() twice. */
+ n = pread(f->fd, &o, sizeof(o), offset);
+ if (n < 0)
+ return log_debug_errno(errno, "Failed to read journal %s object at offset: %" PRIu64,
+ journal_object_type_to_string(type), offset);
+
+ if ((size_t) n < sizeof(o.object))
+ return log_debug_errno(SYNTHETIC_ERRNO(EIO),
+ "Failed to read short %s object at offset: %" PRIu64,
+ journal_object_type_to_string(type), offset);
+
+ r = check_object_header(f, &o, type, offset);
+ if (r < 0)
+ return r;
+
+ if ((size_t) n < minimum_header_size(f, &o))
+ return log_debug_errno(SYNTHETIC_ERRNO(EIO),
+ "Short read while reading %s object: %" PRIu64,
+ journal_object_type_to_string(type), offset);
+
+ r = check_object(f, &o, offset);
+ if (r < 0)
+ return r;
+
+ if (ret)
+ *ret = o;
+
+ return 0;
+}
+
+static uint64_t inc_seqnum(uint64_t seqnum) {
+ if (seqnum < UINT64_MAX-1)
+ return seqnum + 1;
+
+ return 1; /* skip over UINT64_MAX and 0 when we run out of seqnums and start again */
+}
+
+static uint64_t journal_file_entry_seqnum(
+ JournalFile *f,
+ uint64_t *seqnum) {
+
+ uint64_t next_seqnum;
+
+ assert(f);
+ assert(f->header);
+
+ /* Picks a new sequence number for the entry we are about to add and returns it. */
+
+ next_seqnum = inc_seqnum(le64toh(f->header->tail_entry_seqnum));
+
+ /* If an external seqnum counter was passed, we update both the local and the external one, and set
+ * it to the maximum of both */
+ if (seqnum)
+ *seqnum = next_seqnum = MAX(inc_seqnum(*seqnum), next_seqnum);
+
+ f->header->tail_entry_seqnum = htole64(next_seqnum);
+
+ if (f->header->head_entry_seqnum == 0)
+ f->header->head_entry_seqnum = htole64(next_seqnum);
+
+ return next_seqnum;
+}
+
+int journal_file_append_object(
+ JournalFile *f,
+ ObjectType type,
+ uint64_t size,
+ Object **ret_object,
+ uint64_t *ret_offset) {
+
+ int r;
+ uint64_t p;
+ Object *o;
+
+ assert(f);
+ assert(f->header);
+ assert(type > OBJECT_UNUSED && type < _OBJECT_TYPE_MAX);
+ assert(size >= sizeof(ObjectHeader));
+
+ r = journal_file_set_online(f);
+ if (r < 0)
+ return r;
+
+ r = journal_file_tail_end_by_mmap(f, &p);
+ if (r < 0)
+ return r;
+
+ r = journal_file_allocate(f, p, size);
+ if (r < 0)
+ return r;
+
+ r = journal_file_move_to(f, type, false, p, size, (void**) &o);
+ if (r < 0)
+ return r;
+
+ o->object = (ObjectHeader) {
+ .type = type,
+ .size = htole64(size),
+ };
+
+ f->header->tail_object_offset = htole64(p);
+ f->header->n_objects = htole64(le64toh(f->header->n_objects) + 1);
+
+ if (ret_object)
+ *ret_object = o;
+
+ if (ret_offset)
+ *ret_offset = p;
+
+ return 0;
+}
+
+static int journal_file_setup_data_hash_table(JournalFile *f) {
+ uint64_t s, p;
+ Object *o;
+ int r;
+
+ assert(f);
+ assert(f->header);
+
+ /* We estimate that we need 1 hash table entry per 768 bytes
+ of journal file and we want to make sure we never get
+ beyond 75% fill level. Calculate the hash table size for
+ the maximum file size based on these metrics. */
+
+ s = (f->metrics.max_size * 4 / 768 / 3) * sizeof(HashItem);
+ if (s < DEFAULT_DATA_HASH_TABLE_SIZE)
+ s = DEFAULT_DATA_HASH_TABLE_SIZE;
+
+ log_debug("Reserving %"PRIu64" entries in data hash table.", s / sizeof(HashItem));
+
+ r = journal_file_append_object(f,
+ OBJECT_DATA_HASH_TABLE,
+ offsetof(Object, hash_table.items) + s,
+ &o, &p);
+ if (r < 0)
+ return r;
+
+ memzero(o->hash_table.items, s);
+
+ f->header->data_hash_table_offset = htole64(p + offsetof(Object, hash_table.items));
+ f->header->data_hash_table_size = htole64(s);
+
+ return 0;
+}
+
+static int journal_file_setup_field_hash_table(JournalFile *f) {
+ uint64_t s, p;
+ Object *o;
+ int r;
+
+ assert(f);
+ assert(f->header);
+
+ /* We use a fixed size hash table for the fields as this
+ * number should grow very slowly only */
+
+ s = DEFAULT_FIELD_HASH_TABLE_SIZE;
+ log_debug("Reserving %"PRIu64" entries in field hash table.", s / sizeof(HashItem));
+
+ r = journal_file_append_object(f,
+ OBJECT_FIELD_HASH_TABLE,
+ offsetof(Object, hash_table.items) + s,
+ &o, &p);
+ if (r < 0)
+ return r;
+
+ memzero(o->hash_table.items, s);
+
+ f->header->field_hash_table_offset = htole64(p + offsetof(Object, hash_table.items));
+ f->header->field_hash_table_size = htole64(s);
+
+ return 0;
+}
+
+int journal_file_map_data_hash_table(JournalFile *f) {
+ uint64_t s, p;
+ void *t;
+ int r;
+
+ assert(f);
+ assert(f->header);
+
+ if (f->data_hash_table)
+ return 0;
+
+ p = le64toh(f->header->data_hash_table_offset);
+ s = le64toh(f->header->data_hash_table_size);
+
+ r = journal_file_move_to(f,
+ OBJECT_DATA_HASH_TABLE,
+ true,
+ p, s,
+ &t);
+ if (r < 0)
+ return r;
+
+ f->data_hash_table = t;
+ return 0;
+}
+
+int journal_file_map_field_hash_table(JournalFile *f) {
+ uint64_t s, p;
+ void *t;
+ int r;
+
+ assert(f);
+ assert(f->header);
+
+ if (f->field_hash_table)
+ return 0;
+
+ p = le64toh(f->header->field_hash_table_offset);
+ s = le64toh(f->header->field_hash_table_size);
+
+ r = journal_file_move_to(f,
+ OBJECT_FIELD_HASH_TABLE,
+ true,
+ p, s,
+ &t);
+ if (r < 0)
+ return r;
+
+ f->field_hash_table = t;
+ return 0;
+}
+
+static int journal_file_link_field(
+ JournalFile *f,
+ Object *o,
+ uint64_t offset,
+ uint64_t hash) {
+
+ uint64_t p, h, m;
+ int r;
+
+ assert(f);
+ assert(f->header);
+ assert(f->field_hash_table);
+ assert(o);
+ assert(offset > 0);
+
+ if (o->object.type != OBJECT_FIELD)
+ return -EINVAL;
+
+ m = le64toh(READ_NOW(f->header->field_hash_table_size)) / sizeof(HashItem);
+ if (m <= 0)
+ return -EBADMSG;
+
+ /* This might alter the window we are looking at */
+ o->field.next_hash_offset = o->field.head_data_offset = 0;
+
+ h = hash % m;
+ p = le64toh(f->field_hash_table[h].tail_hash_offset);
+ if (p == 0)
+ f->field_hash_table[h].head_hash_offset = htole64(offset);
+ else {
+ r = journal_file_move_to_object(f, OBJECT_FIELD, p, &o);
+ if (r < 0)
+ return r;
+
+ o->field.next_hash_offset = htole64(offset);
+ }
+
+ f->field_hash_table[h].tail_hash_offset = htole64(offset);
+
+ if (JOURNAL_HEADER_CONTAINS(f->header, n_fields))
+ f->header->n_fields = htole64(le64toh(f->header->n_fields) + 1);
+
+ return 0;
+}
+
+static int journal_file_link_data(
+ JournalFile *f,
+ Object *o,
+ uint64_t offset,
+ uint64_t hash) {
+
+ uint64_t p, h, m;
+ int r;
+
+ assert(f);
+ assert(f->header);
+ assert(f->data_hash_table);
+ assert(o);
+ assert(offset > 0);
+
+ if (o->object.type != OBJECT_DATA)
+ return -EINVAL;
+
+ m = le64toh(READ_NOW(f->header->data_hash_table_size)) / sizeof(HashItem);
+ if (m <= 0)
+ return -EBADMSG;
+
+ /* This might alter the window we are looking at */
+ o->data.next_hash_offset = o->data.next_field_offset = 0;
+ o->data.entry_offset = o->data.entry_array_offset = 0;
+ o->data.n_entries = 0;
+
+ h = hash % m;
+ p = le64toh(f->data_hash_table[h].tail_hash_offset);
+ if (p == 0)
+ /* Only entry in the hash table is easy */
+ f->data_hash_table[h].head_hash_offset = htole64(offset);
+ else {
+ /* Move back to the previous data object, to patch in
+ * pointer */
+
+ r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
+ if (r < 0)
+ return r;
+
+ o->data.next_hash_offset = htole64(offset);
+ }
+
+ f->data_hash_table[h].tail_hash_offset = htole64(offset);
+
+ if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
+ f->header->n_data = htole64(le64toh(f->header->n_data) + 1);
+
+ return 0;
+}
+
+static int get_next_hash_offset(
+ JournalFile *f,
+ uint64_t *p,
+ le64_t *next_hash_offset,
+ uint64_t *depth,
+ le64_t *header_max_depth) {
+
+ uint64_t nextp;
+
+ assert(f);
+ assert(p);
+ assert(next_hash_offset);
+ assert(depth);
+
+ nextp = le64toh(READ_NOW(*next_hash_offset));
+ if (nextp > 0) {
+ if (nextp <= *p) /* Refuse going in loops */
+ return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
+ "Detected hash item loop in %s, refusing.", f->path);
+
+ (*depth)++;
+
+ /* If the depth of this hash chain is larger than all others we have seen so far, record it */
+ if (header_max_depth && journal_file_writable(f))
+ *header_max_depth = htole64(MAX(*depth, le64toh(*header_max_depth)));
+ }
+
+ *p = nextp;
+ return 0;
+}
+
+int journal_file_find_field_object_with_hash(
+ JournalFile *f,
+ const void *field,
+ uint64_t size,
+ uint64_t hash,
+ Object **ret_object,
+ uint64_t *ret_offset) {
+
+ uint64_t p, osize, h, m, depth = 0;
+ int r;
+
+ assert(f);
+ assert(f->header);
+ assert(field);
+ assert(size > 0);
+
+ /* If the field hash table is empty, we can't find anything */
+ if (le64toh(f->header->field_hash_table_size) <= 0)
+ return 0;
+
+ /* Map the field hash table, if it isn't mapped yet. */
+ r = journal_file_map_field_hash_table(f);
+ if (r < 0)
+ return r;
+
+ osize = offsetof(Object, field.payload) + size;
+
+ m = le64toh(READ_NOW(f->header->field_hash_table_size)) / sizeof(HashItem);
+ if (m <= 0)
+ return -EBADMSG;
+
+ h = hash % m;
+ p = le64toh(f->field_hash_table[h].head_hash_offset);
+ while (p > 0) {
+ Object *o;
+
+ r = journal_file_move_to_object(f, OBJECT_FIELD, p, &o);
+ if (r < 0)
+ return r;
+
+ if (le64toh(o->field.hash) == hash &&
+ le64toh(o->object.size) == osize &&
+ memcmp(o->field.payload, field, size) == 0) {
+
+ if (ret_object)
+ *ret_object = o;
+ if (ret_offset)
+ *ret_offset = p;
+
+ return 1;
+ }
+
+ r = get_next_hash_offset(
+ f,
+ &p,
+ &o->field.next_hash_offset,
+ &depth,
+ JOURNAL_HEADER_CONTAINS(f->header, field_hash_chain_depth) ? &f->header->field_hash_chain_depth : NULL);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+uint64_t journal_file_hash_data(
+ JournalFile *f,
+ const void *data,
+ size_t sz) {
+
+ assert(f);
+ assert(f->header);
+ assert(data || sz == 0);
+
+ /* We try to unify our codebase on siphash, hence new-styled journal files utilizing the keyed hash
+ * function use siphash. Old journal files use the Jenkins hash. */
+
+ if (JOURNAL_HEADER_KEYED_HASH(f->header))
+ return siphash24(data, sz, f->header->file_id.bytes);
+
+ return jenkins_hash64(data, sz);
+}
+
+int journal_file_find_field_object(
+ JournalFile *f,
+ const void *field,
+ uint64_t size,
+ Object **ret_object,
+ uint64_t *ret_offset) {
+
+ assert(f);
+ assert(field);
+ assert(size > 0);
+
+ return journal_file_find_field_object_with_hash(
+ f,
+ field, size,
+ journal_file_hash_data(f, field, size),
+ ret_object, ret_offset);
+}
+
+int journal_file_find_data_object_with_hash(
+ JournalFile *f,
+ const void *data,
+ uint64_t size,
+ uint64_t hash,
+ Object **ret_object,
+ uint64_t *ret_offset) {
+
+ uint64_t p, h, m, depth = 0;
+ int r;
+
+ assert(f);
+ assert(f->header);
+ assert(data || size == 0);
+
+ /* If there's no data hash table, then there's no entry. */
+ if (le64toh(f->header->data_hash_table_size) <= 0)
+ return 0;
+
+ /* Map the data hash table, if it isn't mapped yet. */
+ r = journal_file_map_data_hash_table(f);
+ if (r < 0)
+ return r;
+
+ m = le64toh(READ_NOW(f->header->data_hash_table_size)) / sizeof(HashItem);
+ if (m <= 0)
+ return -EBADMSG;
+
+ h = hash % m;
+ p = le64toh(f->data_hash_table[h].head_hash_offset);
+
+ while (p > 0) {
+ Object *o;
+ void *d;
+ size_t rsize;
+
+ r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
+ if (r < 0)
+ return r;
+
+ if (le64toh(o->data.hash) != hash)
+ goto next;
+
+ r = journal_file_data_payload(f, o, p, NULL, 0, 0, &d, &rsize);
+ if (r < 0)
+ return r;
+ assert(r > 0); /* journal_file_data_payload() always returns > 0 if no field is provided. */
+
+ if (memcmp_nn(data, size, d, rsize) == 0) {
+ if (ret_object)
+ *ret_object = o;
+
+ if (ret_offset)
+ *ret_offset = p;
+
+ return 1;
+ }
+
+ next:
+ r = get_next_hash_offset(
+ f,
+ &p,
+ &o->data.next_hash_offset,
+ &depth,
+ JOURNAL_HEADER_CONTAINS(f->header, data_hash_chain_depth) ? &f->header->data_hash_chain_depth : NULL);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+int journal_file_find_data_object(
+ JournalFile *f,
+ const void *data,
+ uint64_t size,
+ Object **ret_object,
+ uint64_t *ret_offset) {
+
+ assert(f);
+ assert(data || size == 0);
+
+ return journal_file_find_data_object_with_hash(
+ f,
+ data, size,
+ journal_file_hash_data(f, data, size),
+ ret_object, ret_offset);
+}
+
+bool journal_field_valid(const char *p, size_t l, bool allow_protected) {
+ /* We kinda enforce POSIX syntax recommendations for
+ environment variables here, but make a couple of additional
+ requirements.
+
+ http://pubs.opengroup.org/onlinepubs/000095399/basedefs/xbd_chap08.html */
+
+ assert(p);
+
+ if (l == SIZE_MAX)
+ l = strlen(p);
+
+ /* No empty field names */
+ if (l <= 0)
+ return false;
+
+ /* Don't allow names longer than 64 chars */
+ if (l > 64)
+ return false;
+
+ /* Variables starting with an underscore are protected */
+ if (!allow_protected && p[0] == '_')
+ return false;
+
+ /* Don't allow digits as first character */
+ if (ascii_isdigit(p[0]))
+ return false;
+
+ /* Only allow A-Z0-9 and '_' */
+ for (const char *a = p; a < p + l; a++)
+ if ((*a < 'A' || *a > 'Z') &&
+ !ascii_isdigit(*a) &&
+ *a != '_')
+ return false;
+
+ return true;
+}
+
+static int journal_file_append_field(
+ JournalFile *f,
+ const void *field,
+ uint64_t size,
+ Object **ret_object,
+ uint64_t *ret_offset) {
+
+ uint64_t hash, p;
+ uint64_t osize;
+ Object *o;
+ int r;
+
+ assert(f);
+ assert(field);
+ assert(size > 0);
+
+ if (!journal_field_valid(field, size, true))
+ return -EBADMSG;
+
+ hash = journal_file_hash_data(f, field, size);
+
+ r = journal_file_find_field_object_with_hash(f, field, size, hash, ret_object, ret_offset);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ return 0;
+
+ osize = offsetof(Object, field.payload) + size;
+ r = journal_file_append_object(f, OBJECT_FIELD, osize, &o, &p);
+ if (r < 0)
+ return r;
+
+ o->field.hash = htole64(hash);
+ memcpy(o->field.payload, field, size);
+
+ r = journal_file_link_field(f, o, p, hash);
+ if (r < 0)
+ return r;
+
+ /* The linking might have altered the window, so let's only pass the offset to hmac which will
+ * move to the object again if needed. */
+
+#if HAVE_GCRYPT
+ r = journal_file_hmac_put_object(f, OBJECT_FIELD, NULL, p);
+ if (r < 0)
+ return r;
+#endif
+
+ if (ret_object) {
+ r = journal_file_move_to_object(f, OBJECT_FIELD, p, ret_object);
+ if (r < 0)
+ return r;
+ }
+
+ if (ret_offset)
+ *ret_offset = p;
+
+ return 0;
+}
+
+static int maybe_compress_payload(JournalFile *f, uint8_t *dst, const uint8_t *src, uint64_t size, size_t *rsize) {
+ assert(f);
+ assert(f->header);
+
+#if HAVE_COMPRESSION
+ Compression c;
+ int r;
+
+ c = JOURNAL_FILE_COMPRESSION(f);
+ if (c == COMPRESSION_NONE || size < f->compress_threshold_bytes)
+ return 0;
+
+ r = compress_blob(c, src, size, dst, size - 1, rsize);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to compress data object using %s, ignoring: %m", compression_to_string(c));
+
+ log_debug("Compressed data object %"PRIu64" -> %zu using %s", size, *rsize, compression_to_string(c));
+
+ return 1; /* compressed */
+#else
+ return 0;
+#endif
+}
+
+static int journal_file_append_data(
+ JournalFile *f,
+ const void *data,
+ uint64_t size,
+ Object **ret_object,
+ uint64_t *ret_offset) {
+
+ uint64_t hash, p, osize;
+ Object *o, *fo;
+ size_t rsize = 0;
+ const void *eq;
+ int r;
+
+ assert(f);
+
+ if (!data || size == 0)
+ return -EINVAL;
+
+ hash = journal_file_hash_data(f, data, size);
+
+ r = journal_file_find_data_object_with_hash(f, data, size, hash, ret_object, ret_offset);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ return 0;
+
+ eq = memchr(data, '=', size);
+ if (!eq)
+ return -EINVAL;
+
+ osize = journal_file_data_payload_offset(f) + size;
+ r = journal_file_append_object(f, OBJECT_DATA, osize, &o, &p);
+ if (r < 0)
+ return r;
+
+ o->data.hash = htole64(hash);
+
+ r = maybe_compress_payload(f, journal_file_data_payload_field(f, o), data, size, &rsize);
+ if (r <= 0)
+ /* We don't really care failures, let's continue without compression */
+ memcpy_safe(journal_file_data_payload_field(f, o), data, size);
+ else {
+ Compression c = JOURNAL_FILE_COMPRESSION(f);
+
+ assert(c >= 0 && c < _COMPRESSION_MAX && c != COMPRESSION_NONE);
+
+ o->object.size = htole64(journal_file_data_payload_offset(f) + rsize);
+ o->object.flags |= COMPRESSION_TO_OBJECT_FLAG(c);
+ }
+
+ r = journal_file_link_data(f, o, p, hash);
+ if (r < 0)
+ return r;
+
+ /* The linking might have altered the window, so let's refresh our pointer. */
+ r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
+ if (r < 0)
+ return r;
+
+#if HAVE_GCRYPT
+ r = journal_file_hmac_put_object(f, OBJECT_DATA, o, p);
+ if (r < 0)
+ return r;
+#endif
+
+ /* Create field object ... */
+ r = journal_file_append_field(f, data, (uint8_t*) eq - (uint8_t*) data, &fo, NULL);
+ if (r < 0)
+ return r;
+
+ /* ... and link it in. */
+ o->data.next_field_offset = fo->field.head_data_offset;
+ fo->field.head_data_offset = le64toh(p);
+
+ if (ret_object)
+ *ret_object = o;
+
+ if (ret_offset)
+ *ret_offset = p;
+
+ return 0;
+}
+
+static int maybe_decompress_payload(
+ JournalFile *f,
+ uint8_t *payload,
+ uint64_t size,
+ Compression compression,
+ const char *field,
+ size_t field_length,
+ size_t data_threshold,
+ void **ret_data,
+ size_t *ret_size) {
+
+ assert(f);
+
+ /* We can't read objects larger than 4G on a 32-bit machine */
+ if ((uint64_t) (size_t) size != size)
+ return -E2BIG;
+
+ if (compression != COMPRESSION_NONE) {
+#if HAVE_COMPRESSION
+ size_t rsize;
+ int r;
+
+ if (field) {
+ r = decompress_startswith(compression, payload, size, &f->compress_buffer, field,
+ field_length, '=');
+ if (r < 0)
+ return log_debug_errno(r,
+ "Cannot decompress %s object of length %" PRIu64 ": %m",
+ compression_to_string(compression),
+ size);
+ if (r == 0) {
+ if (ret_data)
+ *ret_data = NULL;
+ if (ret_size)
+ *ret_size = 0;
+ return 0;
+ }
+ }
+
+ r = decompress_blob(compression, payload, size, &f->compress_buffer, &rsize, 0);
+ if (r < 0)
+ return r;
+
+ if (ret_data)
+ *ret_data = f->compress_buffer;
+ if (ret_size)
+ *ret_size = rsize;
+#else
+ return -EPROTONOSUPPORT;
+#endif
+ } else {
+ if (field && (size < field_length + 1 || memcmp(payload, field, field_length) != 0 || payload[field_length] != '=')) {
+ if (ret_data)
+ *ret_data = NULL;
+ if (ret_size)
+ *ret_size = 0;
+ return 0;
+ }
+
+ if (ret_data)
+ *ret_data = payload;
+ if (ret_size)
+ *ret_size = (size_t) size;
+ }
+
+ return 1;
+}
+
+int journal_file_data_payload(
+ JournalFile *f,
+ Object *o,
+ uint64_t offset,
+ const char *field,
+ size_t field_length,
+ size_t data_threshold,
+ void **ret_data,
+ size_t *ret_size) {
+
+ uint64_t size;
+ Compression c;
+ int r;
+
+ assert(f);
+ assert(!field == (field_length == 0)); /* These must be specified together. */
+
+ if (!o) {
+ r = journal_file_move_to_object(f, OBJECT_DATA, offset, &o);
+ if (r < 0)
+ return r;
+ }
+
+ size = le64toh(READ_NOW(o->object.size));
+ if (size < journal_file_data_payload_offset(f))
+ return -EBADMSG;
+
+ size -= journal_file_data_payload_offset(f);
+
+ c = COMPRESSION_FROM_OBJECT(o);
+ if (c < 0)
+ return -EPROTONOSUPPORT;
+
+ return maybe_decompress_payload(f, journal_file_data_payload_field(f, o), size, c, field,
+ field_length, data_threshold, ret_data, ret_size);
+}
+
+uint64_t journal_file_entry_n_items(JournalFile *f, Object *o) {
+ uint64_t sz;
+
+ assert(f);
+ assert(o);
+
+ if (o->object.type != OBJECT_ENTRY)
+ return 0;
+
+ sz = le64toh(READ_NOW(o->object.size));
+ if (sz < offsetof(Object, entry.items))
+ return 0;
+
+ return (sz - offsetof(Object, entry.items)) / journal_file_entry_item_size(f);
+}
+
+uint64_t journal_file_entry_array_n_items(JournalFile *f, Object *o) {
+ uint64_t sz;
+
+ assert(f);
+ assert(o);
+
+ if (o->object.type != OBJECT_ENTRY_ARRAY)
+ return 0;
+
+ sz = le64toh(READ_NOW(o->object.size));
+ if (sz < offsetof(Object, entry_array.items))
+ return 0;
+
+ return (sz - offsetof(Object, entry_array.items)) / journal_file_entry_array_item_size(f);
+}
+
+uint64_t journal_file_hash_table_n_items(Object *o) {
+ uint64_t sz;
+
+ assert(o);
+
+ if (!IN_SET(o->object.type, OBJECT_DATA_HASH_TABLE, OBJECT_FIELD_HASH_TABLE))
+ return 0;
+
+ sz = le64toh(READ_NOW(o->object.size));
+ if (sz < offsetof(Object, hash_table.items))
+ return 0;
+
+ return (sz - offsetof(Object, hash_table.items)) / sizeof(HashItem);
+}
+
+static void write_entry_array_item(JournalFile *f, Object *o, uint64_t i, uint64_t p) {
+ assert(f);
+ assert(o);
+
+ if (JOURNAL_HEADER_COMPACT(f->header)) {
+ assert(p <= UINT32_MAX);
+ o->entry_array.items.compact[i] = htole32(p);
+ } else
+ o->entry_array.items.regular[i] = htole64(p);
+}
+
+static int link_entry_into_array(
+ JournalFile *f,
+ le64_t *first,
+ le64_t *idx,
+ le32_t *tail,
+ le32_t *tidx,
+ uint64_t p) {
+
+ uint64_t n = 0, ap = 0, q, i, a, hidx;
+ Object *o;
+ int r;
+
+ assert(f);
+ assert(f->header);
+ assert(first);
+ assert(idx);
+ assert(p > 0);
+
+ a = tail ? le32toh(*tail) : le64toh(*first);
+ hidx = le64toh(READ_NOW(*idx));
+ i = tidx ? le32toh(READ_NOW(*tidx)) : hidx;
+
+ while (a > 0) {
+ r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
+ if (r < 0)
+ return r;
+
+ n = journal_file_entry_array_n_items(f, o);
+ if (i < n) {
+ write_entry_array_item(f, o, i, p);
+ *idx = htole64(hidx + 1);
+ if (tidx)
+ *tidx = htole32(le32toh(*tidx) + 1);
+ return 0;
+ }
+
+ i -= n;
+ ap = a;
+ a = le64toh(o->entry_array.next_entry_array_offset);
+ }
+
+ if (hidx > n)
+ n = (hidx+1) * 2;
+ else
+ n = n * 2;
+
+ if (n < 4)
+ n = 4;
+
+ r = journal_file_append_object(f, OBJECT_ENTRY_ARRAY,
+ offsetof(Object, entry_array.items) + n * journal_file_entry_array_item_size(f),
+ &o, &q);
+ if (r < 0)
+ return r;
+
+#if HAVE_GCRYPT
+ r = journal_file_hmac_put_object(f, OBJECT_ENTRY_ARRAY, o, q);
+ if (r < 0)
+ return r;
+#endif
+
+ write_entry_array_item(f, o, i, p);
+
+ if (ap == 0)
+ *first = htole64(q);
+ else {
+ r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, ap, &o);
+ if (r < 0)
+ return r;
+
+ o->entry_array.next_entry_array_offset = htole64(q);
+ }
+
+ if (tail)
+ *tail = htole32(q);
+
+ if (JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays))
+ f->header->n_entry_arrays = htole64(le64toh(f->header->n_entry_arrays) + 1);
+
+ *idx = htole64(hidx + 1);
+ if (tidx)
+ *tidx = htole32(1);
+
+ return 0;
+}
+
+static int link_entry_into_array_plus_one(
+ JournalFile *f,
+ le64_t *extra,
+ le64_t *first,
+ le64_t *idx,
+ le32_t *tail,
+ le32_t *tidx,
+ uint64_t p) {
+
+ uint64_t hidx;
+ int r;
+
+ assert(f);
+ assert(extra);
+ assert(first);
+ assert(idx);
+ assert(p > 0);
+
+ hidx = le64toh(READ_NOW(*idx));
+ if (hidx == UINT64_MAX)
+ return -EBADMSG;
+ if (hidx == 0)
+ *extra = htole64(p);
+ else {
+ le64_t i;
+
+ i = htole64(hidx - 1);
+ r = link_entry_into_array(f, first, &i, tail, tidx, p);
+ if (r < 0)
+ return r;
+ }
+
+ *idx = htole64(hidx + 1);
+ return 0;
+}
+
+static int journal_file_link_entry_item(JournalFile *f, uint64_t offset, uint64_t p) {
+ Object *o;
+ int r;
+
+ assert(f);
+ assert(offset > 0);
+
+ r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
+ if (r < 0)
+ return r;
+
+ return link_entry_into_array_plus_one(f,
+ &o->data.entry_offset,
+ &o->data.entry_array_offset,
+ &o->data.n_entries,
+ JOURNAL_HEADER_COMPACT(f->header) ? &o->data.compact.tail_entry_array_offset : NULL,
+ JOURNAL_HEADER_COMPACT(f->header) ? &o->data.compact.tail_entry_array_n_entries : NULL,
+ offset);
+}
+
+static int journal_file_link_entry(
+ JournalFile *f,
+ Object *o,
+ uint64_t offset,
+ const EntryItem items[],
+ size_t n_items) {
+
+ int r;
+
+ assert(f);
+ assert(f->header);
+ assert(o);
+ assert(offset > 0);
+
+ if (o->object.type != OBJECT_ENTRY)
+ return -EINVAL;
+
+ __atomic_thread_fence(__ATOMIC_SEQ_CST);
+
+ /* Link up the entry itself */
+ r = link_entry_into_array(f,
+ &f->header->entry_array_offset,
+ &f->header->n_entries,
+ JOURNAL_HEADER_CONTAINS(f->header, tail_entry_array_offset) ? &f->header->tail_entry_array_offset : NULL,
+ JOURNAL_HEADER_CONTAINS(f->header, tail_entry_array_n_entries) ? &f->header->tail_entry_array_n_entries : NULL,
+ offset);
+ if (r < 0)
+ return r;
+
+ /* log_debug("=> %s seqnr=%"PRIu64" n_entries=%"PRIu64, f->path, o->entry.seqnum, f->header->n_entries); */
+
+ if (f->header->head_entry_realtime == 0)
+ f->header->head_entry_realtime = o->entry.realtime;
+
+ f->header->tail_entry_realtime = o->entry.realtime;
+ f->header->tail_entry_monotonic = o->entry.monotonic;
+ if (JOURNAL_HEADER_CONTAINS(f->header, tail_entry_offset))
+ f->header->tail_entry_offset = htole64(offset);
+ f->newest_mtime = 0; /* we have a new tail entry now, explicitly invalidate newest boot id/timestamp info */
+
+ /* Link up the items */
+ for (uint64_t i = 0; i < n_items; i++) {
+ int k;
+
+ /* If we fail to link an entry item because we can't allocate a new entry array, don't fail
+ * immediately but try to link the other entry items since it might still be possible to link
+ * those if they don't require a new entry array to be allocated. */
+
+ k = journal_file_link_entry_item(f, offset, items[i].object_offset);
+ if (k == -E2BIG)
+ r = k;
+ else if (k < 0)
+ return k;
+ }
+
+ return r;
+}
+
+static void write_entry_item(JournalFile *f, Object *o, uint64_t i, const EntryItem *item) {
+ assert(f);
+ assert(o);
+ assert(item);
+
+ if (JOURNAL_HEADER_COMPACT(f->header)) {
+ assert(item->object_offset <= UINT32_MAX);
+ o->entry.items.compact[i].object_offset = htole32(item->object_offset);
+ } else {
+ o->entry.items.regular[i].object_offset = htole64(item->object_offset);
+ o->entry.items.regular[i].hash = htole64(item->hash);
+ }
+}
+
+static int journal_file_append_entry_internal(
+ JournalFile *f,
+ const dual_timestamp *ts,
+ const sd_id128_t *boot_id,
+ const sd_id128_t *machine_id,
+ uint64_t xor_hash,
+ const EntryItem items[],
+ size_t n_items,
+ uint64_t *seqnum,
+ sd_id128_t *seqnum_id,
+ Object **ret_object,
+ uint64_t *ret_offset) {
+
+ uint64_t np;
+ uint64_t osize;
+ Object *o;
+ int r;
+
+ assert(f);
+ assert(f->header);
+ assert(ts);
+ assert(boot_id);
+ assert(!sd_id128_is_null(*boot_id));
+ assert(items || n_items == 0);
+
+ if (f->strict_order) {
+ /* If requested be stricter with ordering in this journal file, to make searching via
+ * bisection fully deterministic. This is an optional feature, so that if desired journal
+ * files can be written where the ordering is not strictly enforced (in which case bisection
+ * will yield *a* result, but not the *only* result, when searching for points in
+ * time). Strict ordering mode is enabled when journald originally writes the files, but
+ * might not necessarily be if other tools (the remoting tools for example) write journal
+ * files from combined sources.
+ *
+ * Typically, if any of the errors generated here are seen journald will just rotate the
+ * journal files and start anew. */
+
+ if (ts->realtime < le64toh(f->header->tail_entry_realtime))
+ return log_debug_errno(SYNTHETIC_ERRNO(EREMCHG),
+ "Realtime timestamp %" PRIu64 " smaller than previous realtime "
+ "timestamp %" PRIu64 ", refusing entry.",
+ ts->realtime, le64toh(f->header->tail_entry_realtime));
+
+ if (sd_id128_equal(*boot_id, f->header->tail_entry_boot_id) &&
+ ts->monotonic < le64toh(f->header->tail_entry_monotonic))
+ return log_debug_errno(
+ SYNTHETIC_ERRNO(ENOTNAM),
+ "Monotonic timestamp %" PRIu64
+ " smaller than previous monotonic timestamp %" PRIu64
+ " while having the same boot ID, refusing entry.",
+ ts->monotonic,
+ le64toh(f->header->tail_entry_monotonic));
+ }
+
+ if (seqnum_id) {
+ /* Settle the passed in sequence number ID */
+
+ if (sd_id128_is_null(*seqnum_id))
+ *seqnum_id = f->header->seqnum_id; /* Caller has none assigned, then copy the one from the file */
+ else if (!sd_id128_equal(*seqnum_id, f->header->seqnum_id)) {
+ /* Different seqnum IDs? We can't allow entries from multiple IDs end up in the same journal.*/
+ if (le64toh(f->header->n_entries) == 0)
+ f->header->seqnum_id = *seqnum_id; /* Caller has one, and file so far has no entries, then copy the one from the caller */
+ else
+ return log_debug_errno(SYNTHETIC_ERRNO(EILSEQ),
+ "Sequence number IDs don't match, refusing entry.");
+ }
+ }
+
+ if (machine_id && sd_id128_is_null(f->header->machine_id))
+ /* Initialize machine ID when not set yet */
+ f->header->machine_id = *machine_id;
+
+ osize = offsetof(Object, entry.items) + (n_items * journal_file_entry_item_size(f));
+
+ r = journal_file_append_object(f, OBJECT_ENTRY, osize, &o, &np);
+ if (r < 0)
+ return r;
+
+ o->entry.seqnum = htole64(journal_file_entry_seqnum(f, seqnum));
+ o->entry.realtime = htole64(ts->realtime);
+ o->entry.monotonic = htole64(ts->monotonic);
+ o->entry.xor_hash = htole64(xor_hash);
+ o->entry.boot_id = f->header->tail_entry_boot_id = *boot_id;
+
+ for (size_t i = 0; i < n_items; i++)
+ write_entry_item(f, o, i, &items[i]);
+
+#if HAVE_GCRYPT
+ r = journal_file_hmac_put_object(f, OBJECT_ENTRY, o, np);
+ if (r < 0)
+ return r;
+#endif
+
+ r = journal_file_link_entry(f, o, np, items, n_items);
+ if (r < 0)
+ return r;
+
+ if (ret_object)
+ *ret_object = o;
+
+ if (ret_offset)
+ *ret_offset = np;
+
+ return r;
+}
+
+void journal_file_post_change(JournalFile *f) {
+ assert(f);
+
+ if (f->fd < 0)
+ return;
+
+ /* inotify() does not receive IN_MODIFY events from file
+ * accesses done via mmap(). After each access we hence
+ * trigger IN_MODIFY by truncating the journal file to its
+ * current size which triggers IN_MODIFY. */
+
+ __atomic_thread_fence(__ATOMIC_SEQ_CST);
+
+ if (ftruncate(f->fd, f->last_stat.st_size) < 0)
+ log_debug_errno(errno, "Failed to truncate file to its own size: %m");
+}
+
+static int post_change_thunk(sd_event_source *timer, uint64_t usec, void *userdata) {
+ assert(userdata);
+
+ journal_file_post_change(userdata);
+
+ return 1;
+}
+
+static void schedule_post_change(JournalFile *f) {
+ sd_event *e;
+ int r;
+
+ assert(f);
+ assert(f->post_change_timer);
+
+ assert_se(e = sd_event_source_get_event(f->post_change_timer));
+
+ /* If we are already going down, post the change immediately. */
+ if (IN_SET(sd_event_get_state(e), SD_EVENT_EXITING, SD_EVENT_FINISHED))
+ goto fail;
+
+ r = sd_event_source_get_enabled(f->post_change_timer, NULL);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to get ftruncate timer state: %m");
+ goto fail;
+ }
+ if (r > 0)
+ return;
+
+ r = sd_event_source_set_time_relative(f->post_change_timer, f->post_change_timer_period);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to set time for scheduling ftruncate: %m");
+ goto fail;
+ }
+
+ r = sd_event_source_set_enabled(f->post_change_timer, SD_EVENT_ONESHOT);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to enable scheduled ftruncate: %m");
+ goto fail;
+ }
+
+ return;
+
+fail:
+ /* On failure, let's simply post the change immediately. */
+ journal_file_post_change(f);
+}
+
+/* Enable coalesced change posting in a timer on the provided sd_event instance */
+int journal_file_enable_post_change_timer(JournalFile *f, sd_event *e, usec_t t) {
+ _cleanup_(sd_event_source_unrefp) sd_event_source *timer = NULL;
+ int r;
+
+ assert(f);
+ assert_return(!f->post_change_timer, -EINVAL);
+ assert(e);
+ assert(t);
+
+ /* If we are already going down, we cannot install the timer.
+ * In such case, the caller needs to call journal_file_post_change() explicitly. */
+ if (IN_SET(sd_event_get_state(e), SD_EVENT_EXITING, SD_EVENT_FINISHED))
+ return 0;
+
+ r = sd_event_add_time(e, &timer, CLOCK_MONOTONIC, 0, 0, post_change_thunk, f);
+ if (r < 0)
+ return r;
+
+ r = sd_event_source_set_enabled(timer, SD_EVENT_OFF);
+ if (r < 0)
+ return r;
+
+ f->post_change_timer = TAKE_PTR(timer);
+ f->post_change_timer_period = t;
+
+ return 1;
+}
+
+static int entry_item_cmp(const EntryItem *a, const EntryItem *b) {
+ return CMP(ASSERT_PTR(a)->object_offset, ASSERT_PTR(b)->object_offset);
+}
+
+static size_t remove_duplicate_entry_items(EntryItem items[], size_t n) {
+ size_t j = 1;
+
+ assert(items || n == 0);
+
+ if (n <= 1)
+ return n;
+
+ for (size_t i = 1; i < n; i++)
+ if (items[i].object_offset != items[j - 1].object_offset)
+ items[j++] = items[i];
+
+ return j;
+}
+
+int journal_file_append_entry(
+ JournalFile *f,
+ const dual_timestamp *ts,
+ const sd_id128_t *boot_id,
+ const struct iovec iovec[],
+ size_t n_iovec,
+ uint64_t *seqnum,
+ sd_id128_t *seqnum_id,
+ Object **ret_object,
+ uint64_t *ret_offset) {
+
+ _cleanup_free_ EntryItem *items_alloc = NULL;
+ EntryItem *items;
+ uint64_t xor_hash = 0;
+ struct dual_timestamp _ts;
+ sd_id128_t _boot_id, _machine_id, *machine_id;
+ int r;
+
+ assert(f);
+ assert(f->header);
+ assert(iovec);
+ assert(n_iovec > 0);
+
+ if (ts) {
+ if (!VALID_REALTIME(ts->realtime))
+ return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
+ "Invalid realtime timestamp %" PRIu64 ", refusing entry.",
+ ts->realtime);
+ if (!VALID_MONOTONIC(ts->monotonic))
+ return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
+ "Invalid monotomic timestamp %" PRIu64 ", refusing entry.",
+ ts->monotonic);
+ } else {
+ dual_timestamp_now(&_ts);
+ ts = &_ts;
+ }
+
+ if (boot_id) {
+ if (sd_id128_is_null(*boot_id))
+ return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG), "Empty boot ID, refusing entry.");
+ } else {
+ r = sd_id128_get_boot(&_boot_id);
+ if (r < 0)
+ return r;
+
+ boot_id = &_boot_id;
+ }
+
+ r = sd_id128_get_machine(&_machine_id);
+ if (ERRNO_IS_NEG_MACHINE_ID_UNSET(r))
+ /* Gracefully handle the machine ID not being initialized yet */
+ machine_id = NULL;
+ else if (r < 0)
+ return r;
+ else
+ machine_id = &_machine_id;
+
+#if HAVE_GCRYPT
+ r = journal_file_maybe_append_tag(f, ts->realtime);
+ if (r < 0)
+ return r;
+#endif
+
+ if (n_iovec < ALLOCA_MAX / sizeof(EntryItem) / 2)
+ items = newa(EntryItem, n_iovec);
+ else {
+ items_alloc = new(EntryItem, n_iovec);
+ if (!items_alloc)
+ return -ENOMEM;
+
+ items = items_alloc;
+ }
+
+ for (size_t i = 0; i < n_iovec; i++) {
+ uint64_t p;
+ Object *o;
+
+ r = journal_file_append_data(f, iovec[i].iov_base, iovec[i].iov_len, &o, &p);
+ if (r < 0)
+ return r;
+
+ /* When calculating the XOR hash field, we need to take special care if the "keyed-hash"
+ * journal file flag is on. We use the XOR hash field to quickly determine the identity of a
+ * specific record, and give records with otherwise identical position (i.e. match in seqno,
+ * timestamp, …) a stable ordering. But for that we can't have it that the hash of the
+ * objects in each file is different since they are keyed. Hence let's calculate the Jenkins
+ * hash here for that. This also has the benefit that cursors for old and new journal files
+ * are completely identical (they include the XOR hash after all). For classic Jenkins-hash
+ * files things are easier, we can just take the value from the stored record directly. */
+
+ if (JOURNAL_HEADER_KEYED_HASH(f->header))
+ xor_hash ^= jenkins_hash64(iovec[i].iov_base, iovec[i].iov_len);
+ else
+ xor_hash ^= le64toh(o->data.hash);
+
+ items[i] = (EntryItem) {
+ .object_offset = p,
+ .hash = le64toh(o->data.hash),
+ };
+ }
+
+ /* Order by the position on disk, in order to improve seek
+ * times for rotating media. */
+ typesafe_qsort(items, n_iovec, entry_item_cmp);
+ n_iovec = remove_duplicate_entry_items(items, n_iovec);
+
+ r = journal_file_append_entry_internal(
+ f,
+ ts,
+ boot_id,
+ machine_id,
+ xor_hash,
+ items,
+ n_iovec,
+ seqnum,
+ seqnum_id,
+ ret_object,
+ ret_offset);
+
+ /* If the memory mapping triggered a SIGBUS then we return an
+ * IO error and ignore the error code passed down to us, since
+ * it is very likely just an effect of a nullified replacement
+ * mapping page */
+
+ if (mmap_cache_fd_got_sigbus(f->cache_fd))
+ r = -EIO;
+
+ if (f->post_change_timer)
+ schedule_post_change(f);
+ else
+ journal_file_post_change(f);
+
+ return r;
+}
+
+typedef struct ChainCacheItem {
+ uint64_t first; /* The offset of the entry array object at the beginning of the chain,
+ * i.e., le64toh(f->header->entry_array_offset), or le64toh(o->data.entry_offset). */
+ uint64_t array; /* The offset of the cached entry array object. */
+ uint64_t begin; /* The offset of the first item in the cached array. */
+ uint64_t total; /* The total number of items in all arrays before the cached one in the chain. */
+ uint64_t last_index; /* The last index we looked at in the cached array, to optimize locality when bisecting. */
+} ChainCacheItem;
+
+static void chain_cache_put(
+ OrderedHashmap *h,
+ ChainCacheItem *ci,
+ uint64_t first,
+ uint64_t array,
+ uint64_t begin,
+ uint64_t total,
+ uint64_t last_index) {
+
+ assert(h);
+
+ if (!ci) {
+ /* If the chain item to cache for this chain is the
+ * first one it's not worth caching anything */
+ if (array == first)
+ return;
+
+ if (ordered_hashmap_size(h) >= CHAIN_CACHE_MAX) {
+ ci = ordered_hashmap_steal_first(h);
+ assert(ci);
+ } else {
+ ci = new(ChainCacheItem, 1);
+ if (!ci)
+ return;
+ }
+
+ ci->first = first;
+
+ if (ordered_hashmap_put(h, &ci->first, ci) < 0) {
+ free(ci);
+ return;
+ }
+ } else
+ assert(ci->first == first);
+
+ ci->array = array;
+ ci->begin = begin;
+ ci->total = total;
+ ci->last_index = last_index;
+}
+
+static int bump_array_index(uint64_t *i, direction_t direction, uint64_t n) {
+ assert(i);
+
+ /* Increase or decrease the specified index, in the right direction. */
+
+ if (direction == DIRECTION_DOWN) {
+ if (*i >= n - 1)
+ return 0;
+
+ (*i)++;
+ } else {
+ if (*i <= 0)
+ return 0;
+
+ (*i)--;
+ }
+
+ return 1;
+}
+
+static int bump_entry_array(
+ JournalFile *f,
+ Object *o, /* the current entry array object. */
+ uint64_t offset, /* the offset of the entry array object. */
+ uint64_t first, /* The offset of the first entry array object in the chain. */
+ direction_t direction,
+ uint64_t *ret) {
+
+ int r;
+
+ assert(f);
+ assert(ret);
+
+ if (direction == DIRECTION_DOWN) {
+ assert(o);
+ assert(o->object.type == OBJECT_ENTRY_ARRAY);
+
+ *ret = le64toh(o->entry_array.next_entry_array_offset);
+ } else {
+
+ /* Entry array chains are a singly linked list, so to find the previous array in the chain, we have
+ * to start iterating from the top. */
+
+ assert(offset > 0);
+
+ uint64_t p = first, q = 0;
+ while (p > 0 && p != offset) {
+ r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, p, &o);
+ if (r < 0)
+ return r;
+
+ q = p;
+ p = le64toh(o->entry_array.next_entry_array_offset);
+ }
+
+ /* If we can't find the previous entry array in the entry array chain, we're likely dealing with a
+ * corrupted journal file. */
+ if (p == 0)
+ return -EBADMSG;
+
+ *ret = q;
+ }
+
+ return *ret > 0;
+}
+
+static int generic_array_get(
+ JournalFile *f,
+ uint64_t first, /* The offset of the first entry array object in the chain. */
+ uint64_t i, /* The index of the target object counted from the beginning of the entry array chain. */
+ direction_t direction,
+ Object **ret_object, /* The found object. */
+ uint64_t *ret_offset) { /* The offset of the found object. */
+
+ uint64_t a, t = 0, k;
+ ChainCacheItem *ci;
+ Object *o = NULL;
+ int r;
+
+ assert(f);
+
+ /* FIXME: fix return value assignment on success. */
+
+ a = first;
+
+ /* Try the chain cache first */
+ ci = ordered_hashmap_get(f->chain_cache, &first);
+ if (ci && i > ci->total) {
+ a = ci->array;
+ i -= ci->total;
+ t = ci->total;
+ }
+
+ while (a > 0) {
+ r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
+ if (IN_SET(r, -EBADMSG, -EADDRNOTAVAIL)) {
+ /* If there's corruption and we're going downwards, let's pretend we reached the
+ * final entry in the entry array chain. */
+
+ if (direction == DIRECTION_DOWN)
+ return 0;
+
+ /* If there's corruption and we're going upwards, move back to the previous entry
+ * array and start iterating entries from there. */
+
+ i = UINT64_MAX;
+ break;
+ }
+ if (r < 0)
+ return r;
+
+ k = journal_file_entry_array_n_items(f, o);
+ if (k == 0)
+ return 0;
+
+ if (i < k)
+ break;
+
+ /* The index is larger than the number of elements in the array. Let's move to the next array. */
+ i -= k;
+ t += k;
+ a = le64toh(o->entry_array.next_entry_array_offset);
+ }
+
+ /* If we've found the right location, now look for the first non-corrupt entry object (in the right
+ * direction). */
+
+ while (a > 0) {
+ if (i == UINT64_MAX) {
+ r = bump_entry_array(f, o, a, first, direction, &a);
+ if (r <= 0)
+ return r;
+
+ r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
+ if (r < 0)
+ return r;
+
+ k = journal_file_entry_array_n_items(f, o);
+ if (k == 0)
+ break;
+
+ if (direction == DIRECTION_DOWN)
+ i = 0;
+ else {
+ /* We moved to the previous array. The total must be decreased. */
+ if (t < k)
+ return -EBADMSG; /* chain cache is broken ? */
+
+ i = k - 1;
+ t -= k;
+ }
+ }
+
+ do {
+ uint64_t p;
+
+ p = journal_file_entry_array_item(f, o, i);
+
+ r = journal_file_move_to_object(f, OBJECT_ENTRY, p, ret_object);
+ if (r >= 0) {
+ /* Let's cache this item for the next invocation */
+ chain_cache_put(f->chain_cache, ci, first, a, journal_file_entry_array_item(f, o, 0), t, i);
+
+ if (ret_offset)
+ *ret_offset = p;
+
+ return 1;
+ }
+ if (!IN_SET(r, -EADDRNOTAVAIL, -EBADMSG))
+ return r;
+
+ /* OK, so this entry is borked. Most likely some entry didn't get synced to
+ * disk properly, let's see if the next one might work for us instead. */
+ log_debug_errno(r, "Entry item %" PRIu64 " is bad, skipping over it.", i);
+
+ } while (bump_array_index(&i, direction, k) > 0);
+
+ /* All entries tried in the above do-while loop are broken. Let's move to the next (or previous) array. */
+
+ if (direction == DIRECTION_DOWN)
+ /* We are going to the next array, the total must be incremented. */
+ t += k;
+
+ i = UINT64_MAX;
+ }
+
+ return 0;
+}
+
+enum {
+ TEST_FOUND, /* The current object passes the test. */
+ TEST_LEFT, /* The current object is in an earlier position, and the object we are looking
+ * for should exist in a later position. */
+ TEST_RIGHT, /* The current object is in a later position, and the object we are looking for
+ * should exist in an earlier position. */
+ TEST_GOTO_NEXT, /* No matching object exists in this array and earlier arrays, go to the next array. */
+ TEST_GOTO_PREVIOUS, /* No matching object exists in this array and later arrays, go to the previous array. */
+};
+
+static int generic_array_bisect_step(
+ JournalFile *f,
+ Object *array, /* entry array object */
+ uint64_t i, /* index of the entry item in the array we will test. */
+ uint64_t needle,
+ int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
+ direction_t direction,
+ uint64_t *m, /* The maximum number of the entries we will check in the array. */
+ uint64_t *left, /* The index of the left boundary in the array. */
+ uint64_t *right) { /* The index of the right boundary in the array. */
+
+ uint64_t p;
+ int r;
+
+ assert(f);
+ assert(array);
+ assert(test_object);
+ assert(m);
+ assert(left);
+ assert(right);
+ assert(*left <= i);
+ assert(i <= *right);
+ assert(*right < *m);
+
+ p = journal_file_entry_array_item(f, array, i);
+ if (p <= 0)
+ r = -EBADMSG;
+ else
+ r = test_object(f, p, needle);
+ if (IN_SET(r, -EBADMSG, -EADDRNOTAVAIL)) {
+ log_debug_errno(r, "Encountered invalid entry while bisecting, cutting algorithm short.");
+
+ if (i == *left) {
+ /* This happens on two situations:
+ *
+ * a) i == 0 (hence, *left == 0):
+ * The first entry in the array is corrupted, let's go back to the previous array.
+ *
+ * b) *right == *left or *left + 1, and we are going to downwards:
+ * In that case, the (i-1)-th object has been already tested in the previous call,
+ * which returned TEST_LEFT. See below. So, there is no matching entry in this
+ * array nor in the whole entry array chain. */
+ assert(i == 0 || (*right - *left <= 1 && direction == DIRECTION_DOWN));
+ return TEST_GOTO_PREVIOUS;
+ }
+
+ /* Otherwise, cutting the array short. So, here we limit the number of elements we will see
+ * in this array, and set the right boundary to the last possibly non-corrupted object. */
+ *m = i;
+ *right = i - 1;
+ return TEST_RIGHT;
+ }
+ if (r < 0)
+ return r;
+
+ if (r == TEST_FOUND)
+ /* There may be multiple entries that match with the needle. When the direction is down, we
+ * need to find the first matching entry, hence the right boundary can be moved, but the left
+ * one cannot. Similarly, when the direction is up, we need to find the last matching entry,
+ * hence the left boundary can be moved, but the right one cannot. */
+ r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
+
+ if (r == TEST_RIGHT) {
+ /* Currently, left --- needle --- i --- right, hence we can move the right boundary to i. */
+ if (direction == DIRECTION_DOWN)
+ *right = i;
+ else {
+ if (i == 0)
+ return TEST_GOTO_PREVIOUS;
+ *right = i - 1;
+ }
+ } else {
+ /* Currently, left --- i --- needle --- right, hence we can move the left boundary to i. */
+ if (direction == DIRECTION_DOWN) {
+ /* Note, here *m is always positive, as by the assertions at the beginning, we have
+ * 0 <= *left <= i <= *right < m */
+ if (i == *m - 1)
+ return TEST_GOTO_NEXT;
+
+ *left = i + 1;
+ } else
+ *left = i;
+ }
+
+ return r;
+}
+
+static int generic_array_bisect(
+ JournalFile *f,
+ uint64_t first, /* The offset of the first entry array object in the chain. */
+ uint64_t n, /* The total number of elements in the chain of the entry array. */
+ uint64_t needle, /* The target value (e.g. seqnum, monotonic, realtime, ...). */
+ int (*test_object)(JournalFile *f,
+ uint64_t p, /* the offset of the (data or entry) object that will be tested. */
+ uint64_t needle),
+ direction_t direction,
+ Object **ret_object, /* The found object. */
+ uint64_t *ret_offset, /* The offset of the found object. */
+ uint64_t *ret_idx) { /* The index of the found object counted from the beginning of the entry array chain. */
+
+ /* Given an entry array chain, this function finds the object "closest" to the given needle in the
+ * chain, taking into account the provided direction. A function can be provided to determine how
+ * an object is matched against the given needle.
+ *
+ * Given a journal file, the offset of an object and the needle, the test_object() function should
+ * return TEST_RIGHT if the needle is located earlier in the entry array chain, TEST_LEFT if the
+ * needle is located later in the entry array chain, and TEST_FOUND if the object matches the needle.
+ * If test_object() returns TEST_FOUND for a specific object, that object's information will be used
+ * to populate the return values of this function. If test_object() never returns TEST_FOUND, the
+ * return values are populated with the details of one of the objects closest to the needle. If the
+ * direction is DIRECTION_UP, the earlier object is used. Otherwise, the later object is used.
+ * If there are multiple objects that test_object() return TEST_FOUND for, then the first matching
+ * object returned when direction is DIRECTION_DOWN. Otherwise the last object is returned. */
+
+ uint64_t a, p, t = 0, i, last_index = UINT64_MAX;
+ ChainCacheItem *ci;
+ Object *array;
+ int r;
+
+ assert(f);
+ assert(test_object);
+
+ if (n <= 0)
+ return 0;
+
+ /* Start with the first array in the chain */
+ a = first;
+
+ ci = ordered_hashmap_get(f->chain_cache, &first);
+ if (ci && n > ci->total && ci->begin != 0) {
+ /* Ah, we have iterated this bisection array chain previously! Let's see if we can skip ahead
+ * in the chain, as far as the last time. But we can't jump backwards in the chain, so let's
+ * check that first. */
+
+ r = test_object(f, ci->begin, needle);
+ if (IN_SET(r, -EBADMSG, -EADDRNOTAVAIL))
+ log_debug_errno(r, "Cached entry is corrupted, ignoring: %m");
+ else if (r < 0)
+ return r;
+ else if (r == TEST_LEFT) {
+ /* OK, what we are looking for is right of the begin of this EntryArray, so let's
+ * jump straight to previously cached array in the chain */
+
+ a = ci->array;
+ n -= ci->total;
+ t = ci->total;
+ last_index = ci->last_index;
+ }
+ }
+
+ while (a > 0) {
+ uint64_t left, right, k, m, m_original;
+
+ r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &array);
+ if (r < 0)
+ return r;
+
+ k = journal_file_entry_array_n_items(f, array);
+ m = m_original = MIN(k, n);
+ if (m <= 0)
+ return 0;
+
+ left = 0;
+ right = m - 1;
+
+ if (direction == DIRECTION_UP) {
+ /* If we're going upwards, the last entry of the previous array may pass the test,
+ * and the first entry of the current array may not pass. In that case, the last
+ * entry of the previous array must be returned. Hence, we need to test the first
+ * entry of the current array. */
+ r = generic_array_bisect_step(f, array, 0, needle, test_object, direction, &m, &left, &right);
+ if (r < 0)
+ return r;
+ if (r == TEST_GOTO_PREVIOUS)
+ goto previous;
+ }
+
+ /* Test the last entry of this array, to determine if we should go to the next array. */
+ r = generic_array_bisect_step(f, array, right, needle, test_object, direction, &m, &left, &right);
+ if (r < 0)
+ return r;
+ if (r == TEST_GOTO_PREVIOUS)
+ goto previous;
+
+ /* The expected entry should be in this array, (or the last entry of the previous array). */
+ if (r == TEST_RIGHT) {
+
+ /* If we cached the last index we looked at, let's try to not to jump too wildly
+ * around and see if we can limit the range to look at early to the immediate
+ * neighbors of the last index we looked at. */
+
+ if (last_index > 0 && left < last_index - 1 && last_index - 1 < right) {
+ r = generic_array_bisect_step(f, array, last_index - 1, needle, test_object, direction, &m, &left, &right);
+ if (r < 0)
+ return r;
+ if (r == TEST_GOTO_PREVIOUS)
+ goto previous;
+ }
+
+ if (last_index < UINT64_MAX && left < last_index + 1 && last_index + 1 < right) {
+ r = generic_array_bisect_step(f, array, last_index + 1, needle, test_object, direction, &m, &left, &right);
+ if (r < 0)
+ return r;
+ if (r == TEST_GOTO_PREVIOUS)
+ goto previous;
+ }
+
+ for (;;) {
+ if (left == right) {
+ /* We found one or more corrupted entries in generic_array_bisect_step().
+ * In that case, the entry pointed by 'right' may not be tested.
+ *
+ * When we are going to downwards, the entry object pointed by 'left'
+ * has not been tested yet, Hence, even if left == right, we still
+ * have to check the final entry to see if it actually matches.
+ *
+ * On the other hand, when we are going to upwards, the entry pointed
+ * by 'left' is always tested, So, it is not necessary to test the
+ * final entry again. */
+ if (m != m_original && direction == DIRECTION_DOWN) {
+ r = generic_array_bisect_step(f, array, left, needle, test_object, direction, &m, &left, &right);
+ if (r < 0)
+ return r;
+ if (IN_SET(r, TEST_GOTO_PREVIOUS, TEST_GOTO_NEXT))
+ return 0; /* The entry does not pass the test, or is corrupted */
+
+ assert(TEST_RIGHT);
+ assert(left == right);
+ }
+
+ i = left;
+ goto found;
+ }
+
+ assert(left < right);
+ i = (left + right + (direction == DIRECTION_UP)) / 2;
+
+ r = generic_array_bisect_step(f, array, i, needle, test_object, direction, &m, &left, &right);
+ if (r < 0)
+ return r;
+ if (r == TEST_GOTO_PREVIOUS)
+ goto previous;
+ if (r == TEST_GOTO_NEXT)
+ return 0; /* Found a corrupt entry, and the array was cut short. */
+ }
+ }
+
+ /* Not found in this array (or the last entry of this array should be returned), go to the next array. */
+ assert(r == (direction == DIRECTION_DOWN ? TEST_GOTO_NEXT : TEST_LEFT));
+
+ if (k >= n) {
+ if (direction == DIRECTION_UP) {
+ assert(n > 0);
+ i = n - 1;
+ goto found;
+ }
+
+ return 0;
+ }
+
+ n -= k;
+ t += k;
+ last_index = UINT64_MAX;
+ a = le64toh(array->entry_array.next_entry_array_offset);
+ }
+
+ return 0;
+
+previous:
+ /* Not found in the current array, return the last entry of the previous array. */
+ assert(r == TEST_GOTO_PREVIOUS);
+
+ /* The current array is the first in the chain. no previous array. */
+ if (t == 0)
+ return 0;
+
+ /* When we are going downwards, there is no matching entries in the previous array. */
+ if (direction == DIRECTION_DOWN)
+ return 0;
+
+ /* Indicate to go to the previous array later. Note, do not move to the previous array here,
+ * as that may invalidate the current array object in the mmap cache and
+ * journal_file_entry_array_item() below may read invalid address. */
+ i = UINT64_MAX;
+
+found:
+ p = journal_file_entry_array_item(f, array, 0);
+ if (p <= 0)
+ return -EBADMSG;
+
+ /* Let's cache this item for the next invocation */
+ chain_cache_put(f->chain_cache, ci, first, a, p, t, i);
+
+ if (i == UINT64_MAX) {
+ uint64_t m;
+
+ /* Get the last entry of the previous array. */
+
+ r = bump_entry_array(f, NULL, a, first, DIRECTION_UP, &a);
+ if (r <= 0)
+ return r;
+
+ r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &array);
+ if (r < 0)
+ return r;
+
+ m = journal_file_entry_array_n_items(f, array);
+ if (m == 0 || t < m)
+ return -EBADMSG;
+
+ t -= m;
+ i = m - 1;
+ }
+
+ p = journal_file_entry_array_item(f, array, i);
+ if (p == 0)
+ return -EBADMSG;
+
+ if (ret_object) {
+ r = journal_file_move_to_object(f, OBJECT_ENTRY, p, ret_object);
+ if (r < 0)
+ return r;
+ }
+
+ if (ret_offset)
+ *ret_offset = p;
+
+ if (ret_idx)
+ *ret_idx = t + i;
+
+ return 1;
+}
+
+static int generic_array_bisect_for_data(
+ JournalFile *f,
+ Object *d,
+ uint64_t needle,
+ int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
+ direction_t direction,
+ Object **ret_object,
+ uint64_t *ret_offset) {
+
+ uint64_t extra, first, n;
+ int r;
+
+ assert(f);
+ assert(d);
+ assert(d->object.type == OBJECT_DATA);
+ assert(test_object);
+
+ n = le64toh(d->data.n_entries);
+ if (n <= 0)
+ return 0;
+ n--; /* n_entries is the number of entries linked to the data object, including the 'extra' entry. */
+
+ extra = le64toh(d->data.entry_offset);
+ first = le64toh(d->data.entry_array_offset);
+
+ /* This bisects the array in object 'first', but first checks an extra. */
+ r = test_object(f, extra, needle);
+ if (r < 0)
+ return r;
+
+ if (direction == DIRECTION_DOWN) {
+ /* If we are going downwards, then we need to return the first object that passes the test.
+ * When there is no object that passes the test, we need to return the first object that
+ * test_object() returns TEST_RIGHT for. */
+ if (IN_SET(r,
+ TEST_FOUND, /* The 'extra' object passes the test. Hence, this is the first
+ * object that passes the test. */
+ TEST_RIGHT)) /* The 'extra' object is the first object that test_object() returns
+ * TEST_RIGHT for, and no object exists even in the chained arrays
+ * that passes the test. */
+ goto use_extra; /* The 'extra' object is exactly the one we are looking for. It is
+ * not necessary to bisect the chained arrays. */
+
+ /* Otherwise, the 'extra' object is not the one we are looking for. Search in the arrays. */
+
+ } else {
+ /* If we are going upwards, then we need to return the last object that passes the test.
+ * When there is no object that passes the test, we need to return the the last object that
+ * test_object() returns TEST_LEFT for. */
+ if (r == TEST_RIGHT)
+ return 0; /* Not only the 'extra' object, but also all objects in the chained arrays
+ * will never get TEST_FOUND or TEST_LEFT. The object we are looking for
+ * does not exist. */
+
+ /* Even if the 'extra' object passes the test, there may be multiple objects in the arrays
+ * that also pass the test. Hence, we need to bisect the arrays for finding the last matching
+ * object. */
+ }
+
+ r = generic_array_bisect(f, first, n, needle, test_object, direction, ret_object, ret_offset, NULL);
+ if (r != 0)
+ return r; /* When > 0, the found object is the first (or last, when DIRECTION_UP) object.
+ * Hence, return the found object now. */
+
+ /* No matching object found in the chained arrays.
+ * DIRECTION_DOWN : the 'extra' object neither matches the condition. There is no matching object.
+ * DIRECTION_UP : the 'extra' object matches the condition. So, return it. */
+ if (direction == DIRECTION_DOWN)
+ return 0;
+
+use_extra:
+ if (ret_object) {
+ r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, ret_object);
+ if (r < 0)
+ return r;
+ }
+
+ if (ret_offset)
+ *ret_offset = extra;
+
+ return 1;
+}
+
+static int test_object_offset(JournalFile *f, uint64_t p, uint64_t needle) {
+ assert(f);
+ assert(p > 0);
+
+ if (p == needle)
+ return TEST_FOUND;
+ else if (p < needle)
+ return TEST_LEFT;
+ else
+ return TEST_RIGHT;
+}
+
+int journal_file_move_to_entry_by_offset(
+ JournalFile *f,
+ uint64_t p,
+ direction_t direction,
+ Object **ret_object,
+ uint64_t *ret_offset) {
+
+ assert(f);
+ assert(f->header);
+
+ return generic_array_bisect(
+ f,
+ le64toh(f->header->entry_array_offset),
+ le64toh(f->header->n_entries),
+ p,
+ test_object_offset,
+ direction,
+ ret_object, ret_offset, NULL);
+}
+
+static int test_object_seqnum(JournalFile *f, uint64_t p, uint64_t needle) {
+ uint64_t sq;
+ Object *o;
+ int r;
+
+ assert(f);
+ assert(p > 0);
+
+ r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
+ if (r < 0)
+ return r;
+
+ sq = le64toh(READ_NOW(o->entry.seqnum));
+ if (sq == needle)
+ return TEST_FOUND;
+ else if (sq < needle)
+ return TEST_LEFT;
+ else
+ return TEST_RIGHT;
+}
+
+int journal_file_move_to_entry_by_seqnum(
+ JournalFile *f,
+ uint64_t seqnum,
+ direction_t direction,
+ Object **ret_object,
+ uint64_t *ret_offset) {
+
+ assert(f);
+ assert(f->header);
+
+ return generic_array_bisect(
+ f,
+ le64toh(f->header->entry_array_offset),
+ le64toh(f->header->n_entries),
+ seqnum,
+ test_object_seqnum,
+ direction,
+ ret_object, ret_offset, NULL);
+}
+
+static int test_object_realtime(JournalFile *f, uint64_t p, uint64_t needle) {
+ Object *o;
+ uint64_t rt;
+ int r;
+
+ assert(f);
+ assert(p > 0);
+
+ r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
+ if (r < 0)
+ return r;
+
+ rt = le64toh(READ_NOW(o->entry.realtime));
+ if (rt == needle)
+ return TEST_FOUND;
+ else if (rt < needle)
+ return TEST_LEFT;
+ else
+ return TEST_RIGHT;
+}
+
+int journal_file_move_to_entry_by_realtime(
+ JournalFile *f,
+ uint64_t realtime,
+ direction_t direction,
+ Object **ret_object,
+ uint64_t *ret_offset) {
+
+ assert(f);
+ assert(f->header);
+
+ return generic_array_bisect(
+ f,
+ le64toh(f->header->entry_array_offset),
+ le64toh(f->header->n_entries),
+ realtime,
+ test_object_realtime,
+ direction,
+ ret_object, ret_offset, NULL);
+}
+
+static int test_object_monotonic(JournalFile *f, uint64_t p, uint64_t needle) {
+ Object *o;
+ uint64_t m;
+ int r;
+
+ assert(f);
+ assert(p > 0);
+
+ r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
+ if (r < 0)
+ return r;
+
+ m = le64toh(READ_NOW(o->entry.monotonic));
+ if (m == needle)
+ return TEST_FOUND;
+ else if (m < needle)
+ return TEST_LEFT;
+ else
+ return TEST_RIGHT;
+}
+
+static int find_data_object_by_boot_id(
+ JournalFile *f,
+ sd_id128_t boot_id,
+ Object **ret_object,
+ uint64_t *ret_offset) {
+
+ char t[STRLEN("_BOOT_ID=") + 32 + 1] = "_BOOT_ID=";
+
+ assert(f);
+
+ sd_id128_to_string(boot_id, t + 9);
+ return journal_file_find_data_object(f, t, sizeof(t) - 1, ret_object, ret_offset);
+}
+
+int journal_file_move_to_entry_by_monotonic(
+ JournalFile *f,
+ sd_id128_t boot_id,
+ uint64_t monotonic,
+ direction_t direction,
+ Object **ret_object,
+ uint64_t *ret_offset) {
+
+ Object *o;
+ int r;
+
+ assert(f);
+
+ r = find_data_object_by_boot_id(f, boot_id, &o, NULL);
+ if (r <= 0)
+ return r;
+
+ return generic_array_bisect_for_data(
+ f,
+ o,
+ monotonic,
+ test_object_monotonic,
+ direction,
+ ret_object, ret_offset);
+}
+
+void journal_file_reset_location(JournalFile *f) {
+ assert(f);
+
+ f->location_type = LOCATION_HEAD;
+ f->current_offset = 0;
+ f->current_seqnum = 0;
+ f->current_realtime = 0;
+ f->current_monotonic = 0;
+ zero(f->current_boot_id);
+ f->current_xor_hash = 0;
+
+ /* Also reset the previous reading direction. Otherwise, next_beyond_location() may wrongly handle we
+ * already hit EOF. See issue #29216. */
+ f->last_direction = _DIRECTION_INVALID;
+}
+
+void journal_file_save_location(JournalFile *f, Object *o, uint64_t offset) {
+ assert(f);
+ assert(o);
+
+ f->location_type = LOCATION_SEEK;
+ f->current_offset = offset;
+ f->current_seqnum = le64toh(o->entry.seqnum);
+ f->current_realtime = le64toh(o->entry.realtime);
+ f->current_monotonic = le64toh(o->entry.monotonic);
+ f->current_boot_id = o->entry.boot_id;
+ f->current_xor_hash = le64toh(o->entry.xor_hash);
+}
+
+static bool check_properly_ordered(uint64_t new_offset, uint64_t old_offset, direction_t direction) {
+
+ /* Consider it an error if any of the two offsets is uninitialized */
+ if (old_offset == 0 || new_offset == 0)
+ return false;
+
+ /* If we go down, the new offset must be larger than the old one. */
+ return direction == DIRECTION_DOWN ?
+ new_offset > old_offset :
+ new_offset < old_offset;
+}
+
+int journal_file_next_entry(
+ JournalFile *f,
+ uint64_t p,
+ direction_t direction,
+ Object **ret_object,
+ uint64_t *ret_offset) {
+
+ uint64_t i, n, q;
+ Object *o;
+ int r;
+
+ assert(f);
+ assert(f->header);
+
+ /* FIXME: fix return value assignment. */
+
+ n = le64toh(READ_NOW(f->header->n_entries));
+ if (n <= 0)
+ return 0;
+
+ /* When the input offset 'p' is zero, return the first (or last on DIRECTION_UP) entry. */
+ if (p == 0)
+ return generic_array_get(f,
+ le64toh(f->header->entry_array_offset),
+ direction == DIRECTION_DOWN ? 0 : n - 1,
+ direction,
+ ret_object, ret_offset);
+
+ /* Otherwise, first find the nearest entry object. */
+ r = generic_array_bisect(f,
+ le64toh(f->header->entry_array_offset),
+ le64toh(f->header->n_entries),
+ p,
+ test_object_offset,
+ direction,
+ ret_object ? &o : NULL, &q, &i);
+ if (r <= 0)
+ return r;
+
+ assert(direction == DIRECTION_DOWN ? p <= q : q <= p);
+
+ /* If the input offset 'p' points to an entry object, generic_array_bisect() should provides
+ * the same offset, and the index needs to be shifted. Otherwise, use the found object as is,
+ * as it is the nearest entry object from the input offset 'p'. */
+
+ if (p != q)
+ goto found;
+
+ r = bump_array_index(&i, direction, n);
+ if (r <= 0)
+ return r;
+
+ /* And jump to it */
+ r = generic_array_get(f, le64toh(f->header->entry_array_offset), i, direction, ret_object ? &o : NULL, &q);
+ if (r <= 0)
+ return r;
+
+ /* Ensure our array is properly ordered. */
+ if (!check_properly_ordered(q, p, direction))
+ return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
+ "%s: entry array not properly ordered at entry index %" PRIu64,
+ f->path, i);
+found:
+ if (ret_object)
+ *ret_object = o;
+ if (ret_offset)
+ *ret_offset = q;
+
+ return 1;
+}
+
+int journal_file_move_to_entry_for_data(
+ JournalFile *f,
+ Object *d,
+ direction_t direction,
+ Object **ret_object,
+ uint64_t *ret_offset) {
+
+ uint64_t extra, first, n;
+ int r = 0;
+
+ assert(f);
+ assert(d);
+ assert(d->object.type == OBJECT_DATA);
+ assert(IN_SET(direction, DIRECTION_DOWN, DIRECTION_UP));
+
+ /* FIXME: fix return value assignment. */
+
+ /* This returns the first (when the direction is down, otherwise the last) entry linked to the
+ * specified data object. */
+
+ n = le64toh(d->data.n_entries);
+ if (n <= 0)
+ return 0;
+ n--; /* n_entries is the number of entries linked to the data object, including the 'extra' entry. */
+
+ extra = le64toh(d->data.entry_offset);
+ first = le64toh(d->data.entry_array_offset);
+
+ if (direction == DIRECTION_DOWN && extra > 0) {
+ /* When we are going downwards, first try to read the extra entry. */
+ r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, ret_object);
+ if (r >= 0)
+ goto use_extra;
+ if (!IN_SET(r, -EADDRNOTAVAIL, -EBADMSG))
+ return r;
+ }
+
+ if (n > 0) {
+ /* DIRECTION_DOWN : The extra entry is broken, falling back to the entries in the array.
+ * DIRECTION_UP : Try to find a valid entry in the array from the tail. */
+ r = generic_array_get(f,
+ first,
+ direction == DIRECTION_DOWN ? 0 : n - 1,
+ direction,
+ ret_object, ret_offset);
+ if (!IN_SET(r, 0, -EADDRNOTAVAIL, -EBADMSG))
+ return r; /* found or critical error. */
+ }
+
+ if (direction == DIRECTION_UP && extra > 0) {
+ /* No valid entry exists in the chained array, falling back to the extra entry. */
+ r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, ret_object);
+ if (r >= 0)
+ goto use_extra;
+ }
+
+ return r;
+
+use_extra:
+ if (ret_offset)
+ *ret_offset = extra;
+
+ return 1;
+}
+
+int journal_file_move_to_entry_by_offset_for_data(
+ JournalFile *f,
+ Object *d,
+ uint64_t p,
+ direction_t direction,
+ Object **ret, uint64_t *ret_offset) {
+
+ assert(f);
+ assert(d);
+ assert(d->object.type == OBJECT_DATA);
+
+ return generic_array_bisect_for_data(
+ f,
+ d,
+ p,
+ test_object_offset,
+ direction,
+ ret, ret_offset);
+}
+
+int journal_file_move_to_entry_by_monotonic_for_data(
+ JournalFile *f,
+ Object *d,
+ sd_id128_t boot_id,
+ uint64_t monotonic,
+ direction_t direction,
+ Object **ret_object,
+ uint64_t *ret_offset) {
+
+ Object *o, *entry;
+ uint64_t z;
+ int r;
+
+ assert(f);
+ assert(d);
+ assert(d->object.type == OBJECT_DATA);
+
+ /* First, pin the given data object, before reading the _BOOT_ID= data object below. */
+ r = journal_file_pin_object(f, d);
+ if (r < 0)
+ return r;
+
+ /* Then, read a data object for _BOOT_ID= and seek by time. */
+ r = find_data_object_by_boot_id(f, boot_id, &o, NULL);
+ if (r <= 0)
+ return r;
+
+ r = generic_array_bisect_for_data(f,
+ o,
+ monotonic,
+ test_object_monotonic,
+ direction,
+ NULL, &z);
+ if (r <= 0)
+ return r;
+
+ /* And now, continue seeking until we find an entry that exists in both bisection arrays. */
+ for (;;) {
+ uint64_t p;
+
+ /* The journal entry found by the above bisect_plus_one() may not have the specified data,
+ * that is, it may not be linked in the data object. So, we need to check that. */
+
+ r = journal_file_move_to_entry_by_offset_for_data(
+ f, d, z, direction, ret_object ? &entry : NULL, &p);
+ if (r <= 0)
+ return r;
+ if (p == z)
+ break; /* The journal entry has the specified data. Yay! */
+
+ /* If the entry does not have the data, then move to the next (or previous, depends on the
+ * 'direction') entry linked to the data object. But, the next entry may be in another boot.
+ * So, we need to check that the entry has the matching boot ID. */
+
+ r = journal_file_move_to_entry_by_offset_for_data(
+ f, o, p, direction, ret_object ? &entry : NULL, &z);
+ if (r <= 0)
+ return r;
+ if (p == z)
+ break; /* The journal entry has the specified boot ID. Yay! */
+
+ /* If not, let's try to the next entry... */
+ }
+
+ if (ret_object)
+ *ret_object = entry;
+ if (ret_offset)
+ *ret_offset = z;
+ return 1;
+}
+
+int journal_file_move_to_entry_by_seqnum_for_data(
+ JournalFile *f,
+ Object *d,
+ uint64_t seqnum,
+ direction_t direction,
+ Object **ret_object,
+ uint64_t *ret_offset) {
+
+ assert(f);
+ assert(d);
+ assert(d->object.type == OBJECT_DATA);
+
+ return generic_array_bisect_for_data(
+ f,
+ d,
+ seqnum,
+ test_object_seqnum,
+ direction,
+ ret_object, ret_offset);
+}
+
+int journal_file_move_to_entry_by_realtime_for_data(
+ JournalFile *f,
+ Object *d,
+ uint64_t realtime,
+ direction_t direction,
+ Object **ret, uint64_t *ret_offset) {
+
+ assert(f);
+ assert(d);
+ assert(d->object.type == OBJECT_DATA);
+
+ return generic_array_bisect_for_data(
+ f,
+ d,
+ realtime,
+ test_object_realtime,
+ direction,
+ ret, ret_offset);
+}
+
+void journal_file_dump(JournalFile *f) {
+ Object *o;
+ uint64_t p;
+ int r;
+
+ assert(f);
+ assert(f->header);
+
+ journal_file_print_header(f);
+
+ p = le64toh(READ_NOW(f->header->header_size));
+ while (p != 0) {
+ const char *s;
+ Compression c;
+
+ r = journal_file_move_to_object(f, OBJECT_UNUSED, p, &o);
+ if (r < 0)
+ goto fail;
+
+ s = journal_object_type_to_string(o->object.type);
+
+ switch (o->object.type) {
+
+ case OBJECT_ENTRY:
+ assert(s);
+
+ printf("Type: %s seqnum=%"PRIu64" monotonic=%"PRIu64" realtime=%"PRIu64"\n",
+ s,
+ le64toh(o->entry.seqnum),
+ le64toh(o->entry.monotonic),
+ le64toh(o->entry.realtime));
+ break;
+
+ case OBJECT_TAG:
+ assert(s);
+
+ printf("Type: %s seqnum=%"PRIu64" epoch=%"PRIu64"\n",
+ s,
+ le64toh(o->tag.seqnum),
+ le64toh(o->tag.epoch));
+ break;
+
+ default:
+ if (s)
+ printf("Type: %s \n", s);
+ else
+ printf("Type: unknown (%i)", o->object.type);
+
+ break;
+ }
+
+ c = COMPRESSION_FROM_OBJECT(o);
+ if (c > COMPRESSION_NONE)
+ printf("Flags: %s\n",
+ compression_to_string(c));
+
+ if (p == le64toh(f->header->tail_object_offset))
+ p = 0;
+ else
+ p += ALIGN64(le64toh(o->object.size));
+ }
+
+ return;
+fail:
+ log_error("File corrupt");
+}
+
+/* Note: the lifetime of the compound literal is the immediately surrounding block. */
+#define FORMAT_TIMESTAMP_SAFE(t) (FORMAT_TIMESTAMP(t) ?: " --- ")
+
+void journal_file_print_header(JournalFile *f) {
+ struct stat st;
+
+ assert(f);
+ assert(f->header);
+
+ printf("File path: %s\n"
+ "File ID: %s\n"
+ "Machine ID: %s\n"
+ "Boot ID: %s\n"
+ "Sequential number ID: %s\n"
+ "State: %s\n"
+ "Compatible flags:%s%s%s%s\n"
+ "Incompatible flags:%s%s%s%s%s%s\n"
+ "Header size: %"PRIu64"\n"
+ "Arena size: %"PRIu64"\n"
+ "Data hash table size: %"PRIu64"\n"
+ "Field hash table size: %"PRIu64"\n"
+ "Rotate suggested: %s\n"
+ "Head sequential number: %"PRIu64" (%"PRIx64")\n"
+ "Tail sequential number: %"PRIu64" (%"PRIx64")\n"
+ "Head realtime timestamp: %s (%"PRIx64")\n"
+ "Tail realtime timestamp: %s (%"PRIx64")\n"
+ "Tail monotonic timestamp: %s (%"PRIx64")\n"
+ "Objects: %"PRIu64"\n"
+ "Entry objects: %"PRIu64"\n",
+ f->path,
+ SD_ID128_TO_STRING(f->header->file_id),
+ SD_ID128_TO_STRING(f->header->machine_id),
+ SD_ID128_TO_STRING(f->header->tail_entry_boot_id),
+ SD_ID128_TO_STRING(f->header->seqnum_id),
+ f->header->state == STATE_OFFLINE ? "OFFLINE" :
+ f->header->state == STATE_ONLINE ? "ONLINE" :
+ f->header->state == STATE_ARCHIVED ? "ARCHIVED" : "UNKNOWN",
+ JOURNAL_HEADER_SEALED(f->header) ? " SEALED" : "",
+ JOURNAL_HEADER_SEALED_CONTINUOUS(f->header) ? " SEALED_CONTINUOUS" : "",
+ JOURNAL_HEADER_TAIL_ENTRY_BOOT_ID(f->header) ? " TAIL_ENTRY_BOOT_ID" : "",
+ (le32toh(f->header->compatible_flags) & ~HEADER_COMPATIBLE_ANY) ? " ???" : "",
+ JOURNAL_HEADER_COMPRESSED_XZ(f->header) ? " COMPRESSED-XZ" : "",
+ JOURNAL_HEADER_COMPRESSED_LZ4(f->header) ? " COMPRESSED-LZ4" : "",
+ JOURNAL_HEADER_COMPRESSED_ZSTD(f->header) ? " COMPRESSED-ZSTD" : "",
+ JOURNAL_HEADER_KEYED_HASH(f->header) ? " KEYED-HASH" : "",
+ JOURNAL_HEADER_COMPACT(f->header) ? " COMPACT" : "",
+ (le32toh(f->header->incompatible_flags) & ~HEADER_INCOMPATIBLE_ANY) ? " ???" : "",
+ le64toh(f->header->header_size),
+ le64toh(f->header->arena_size),
+ le64toh(f->header->data_hash_table_size) / sizeof(HashItem),
+ le64toh(f->header->field_hash_table_size) / sizeof(HashItem),
+ yes_no(journal_file_rotate_suggested(f, 0, LOG_DEBUG)),
+ le64toh(f->header->head_entry_seqnum), le64toh(f->header->head_entry_seqnum),
+ le64toh(f->header->tail_entry_seqnum), le64toh(f->header->tail_entry_seqnum),
+ FORMAT_TIMESTAMP_SAFE(le64toh(f->header->head_entry_realtime)), le64toh(f->header->head_entry_realtime),
+ FORMAT_TIMESTAMP_SAFE(le64toh(f->header->tail_entry_realtime)), le64toh(f->header->tail_entry_realtime),
+ FORMAT_TIMESPAN(le64toh(f->header->tail_entry_monotonic), USEC_PER_MSEC), le64toh(f->header->tail_entry_monotonic),
+ le64toh(f->header->n_objects),
+ le64toh(f->header->n_entries));
+
+ if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
+ printf("Data objects: %"PRIu64"\n"
+ "Data hash table fill: %.1f%%\n",
+ le64toh(f->header->n_data),
+ 100.0 * (double) le64toh(f->header->n_data) / ((double) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem))));
+
+ if (JOURNAL_HEADER_CONTAINS(f->header, n_fields))
+ printf("Field objects: %"PRIu64"\n"
+ "Field hash table fill: %.1f%%\n",
+ le64toh(f->header->n_fields),
+ 100.0 * (double) le64toh(f->header->n_fields) / ((double) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem))));
+
+ if (JOURNAL_HEADER_CONTAINS(f->header, n_tags))
+ printf("Tag objects: %"PRIu64"\n",
+ le64toh(f->header->n_tags));
+ if (JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays))
+ printf("Entry array objects: %"PRIu64"\n",
+ le64toh(f->header->n_entry_arrays));
+
+ if (JOURNAL_HEADER_CONTAINS(f->header, field_hash_chain_depth))
+ printf("Deepest field hash chain: %" PRIu64"\n",
+ f->header->field_hash_chain_depth);
+
+ if (JOURNAL_HEADER_CONTAINS(f->header, data_hash_chain_depth))
+ printf("Deepest data hash chain: %" PRIu64"\n",
+ f->header->data_hash_chain_depth);
+
+ if (fstat(f->fd, &st) >= 0)
+ printf("Disk usage: %s\n", FORMAT_BYTES((uint64_t) st.st_blocks * 512ULL));
+}
+
+static int journal_file_warn_btrfs(JournalFile *f) {
+ unsigned attrs;
+ int r;
+
+ assert(f);
+
+ /* Before we write anything, check if the COW logic is turned
+ * off on btrfs. Given our write pattern that is quite
+ * unfriendly to COW file systems this should greatly improve
+ * performance on COW file systems, such as btrfs, at the
+ * expense of data integrity features (which shouldn't be too
+ * bad, given that we do our own checksumming). */
+
+ r = fd_is_fs_type(f->fd, BTRFS_SUPER_MAGIC);
+ if (r < 0)
+ return log_ratelimit_warning_errno(r, JOURNAL_LOG_RATELIMIT, "Failed to determine if journal is on btrfs: %m");
+ if (r == 0)
+ return 0;
+
+ r = read_attr_fd(f->fd, &attrs);
+ if (r < 0)
+ return log_ratelimit_warning_errno(r, JOURNAL_LOG_RATELIMIT, "Failed to read file attributes: %m");
+
+ if (attrs & FS_NOCOW_FL) {
+ log_debug("Detected btrfs file system with copy-on-write disabled, all is good.");
+ return 0;
+ }
+
+ log_ratelimit_notice(JOURNAL_LOG_RATELIMIT,
+ "Creating journal file %s on a btrfs file system, and copy-on-write is enabled. "
+ "This is likely to slow down journal access substantially, please consider turning "
+ "off the copy-on-write file attribute on the journal directory, using chattr +C.",
+ f->path);
+
+ return 1;
+}
+
+static void journal_default_metrics(JournalMetrics *m, int fd, bool compact) {
+ struct statvfs ss;
+ uint64_t fs_size = 0;
+
+ assert(m);
+ assert(fd >= 0);
+
+ if (fstatvfs(fd, &ss) >= 0)
+ fs_size = u64_multiply_safe(ss.f_frsize, ss.f_blocks);
+ else
+ log_debug_errno(errno, "Failed to determine disk size: %m");
+
+ if (m->max_use == UINT64_MAX) {
+
+ if (fs_size > 0)
+ m->max_use = CLAMP(PAGE_ALIGN_U64(fs_size / 10), /* 10% of file system size */
+ MAX_USE_LOWER, MAX_USE_UPPER);
+ else
+ m->max_use = MAX_USE_LOWER;
+ } else {
+ m->max_use = PAGE_ALIGN_U64(m->max_use);
+
+ if (m->max_use != 0 && m->max_use < JOURNAL_FILE_SIZE_MIN*2)
+ m->max_use = JOURNAL_FILE_SIZE_MIN*2;
+ }
+
+ if (m->min_use == UINT64_MAX) {
+ if (fs_size > 0)
+ m->min_use = CLAMP(PAGE_ALIGN_U64(fs_size / 50), /* 2% of file system size */
+ MIN_USE_LOW, MIN_USE_HIGH);
+ else
+ m->min_use = MIN_USE_LOW;
+ }
+
+ if (m->min_use > m->max_use)
+ m->min_use = m->max_use;
+
+ if (m->max_size == UINT64_MAX)
+ m->max_size = MIN(PAGE_ALIGN_U64(m->max_use / 8), /* 8 chunks */
+ MAX_SIZE_UPPER);
+ else
+ m->max_size = PAGE_ALIGN_U64(m->max_size);
+
+ if (compact && m->max_size > JOURNAL_COMPACT_SIZE_MAX)
+ m->max_size = JOURNAL_COMPACT_SIZE_MAX;
+
+ if (m->max_size != 0) {
+ if (m->max_size < JOURNAL_FILE_SIZE_MIN)
+ m->max_size = JOURNAL_FILE_SIZE_MIN;
+
+ if (m->max_use != 0 && m->max_size*2 > m->max_use)
+ m->max_use = m->max_size*2;
+ }
+
+ if (m->min_size == UINT64_MAX)
+ m->min_size = JOURNAL_FILE_SIZE_MIN;
+ else
+ m->min_size = CLAMP(PAGE_ALIGN_U64(m->min_size),
+ JOURNAL_FILE_SIZE_MIN,
+ m->max_size ?: UINT64_MAX);
+
+ if (m->keep_free == UINT64_MAX) {
+ if (fs_size > 0)
+ m->keep_free = MIN(PAGE_ALIGN_U64(fs_size / 20), /* 5% of file system size */
+ KEEP_FREE_UPPER);
+ else
+ m->keep_free = DEFAULT_KEEP_FREE;
+ }
+
+ if (m->n_max_files == UINT64_MAX)
+ m->n_max_files = DEFAULT_N_MAX_FILES;
+
+ log_debug("Fixed min_use=%s max_use=%s max_size=%s min_size=%s keep_free=%s n_max_files=%" PRIu64,
+ FORMAT_BYTES(m->min_use),
+ FORMAT_BYTES(m->max_use),
+ FORMAT_BYTES(m->max_size),
+ FORMAT_BYTES(m->min_size),
+ FORMAT_BYTES(m->keep_free),
+ m->n_max_files);
+}
+
+int journal_file_open(
+ int fd,
+ const char *fname,
+ int open_flags,
+ JournalFileFlags file_flags,
+ mode_t mode,
+ uint64_t compress_threshold_bytes,
+ JournalMetrics *metrics,
+ MMapCache *mmap_cache,
+ JournalFile *template,
+ JournalFile **ret) {
+
+ bool newly_created = false;
+ JournalFile *f;
+ void *h;
+ int r;
+
+ assert(fd >= 0 || fname);
+ assert(file_flags >= 0);
+ assert(file_flags <= _JOURNAL_FILE_FLAGS_MAX);
+ assert(mmap_cache);
+ assert(ret);
+
+ if (!IN_SET((open_flags & O_ACCMODE), O_RDONLY, O_RDWR))
+ return -EINVAL;
+
+ if ((open_flags & O_ACCMODE) == O_RDONLY && FLAGS_SET(open_flags, O_CREAT))
+ return -EINVAL;
+
+ if (fname && (open_flags & O_CREAT) && !endswith(fname, ".journal"))
+ return -EINVAL;
+
+ f = new(JournalFile, 1);
+ if (!f)
+ return -ENOMEM;
+
+ *f = (JournalFile) {
+ .fd = fd,
+ .mode = mode,
+ .open_flags = open_flags,
+ .compress_threshold_bytes = compress_threshold_bytes == UINT64_MAX ?
+ DEFAULT_COMPRESS_THRESHOLD :
+ MAX(MIN_COMPRESS_THRESHOLD, compress_threshold_bytes),
+ .strict_order = FLAGS_SET(file_flags, JOURNAL_STRICT_ORDER),
+ .newest_boot_id_prioq_idx = PRIOQ_IDX_NULL,
+ .last_direction = _DIRECTION_INVALID,
+ };
+
+ if (fname) {
+ f->path = strdup(fname);
+ if (!f->path) {
+ r = -ENOMEM;
+ goto fail;
+ }
+ } else {
+ assert(fd >= 0);
+
+ /* If we don't know the path, fill in something explanatory and vaguely useful */
+ if (asprintf(&f->path, "/proc/self/%i", fd) < 0) {
+ r = -ENOMEM;
+ goto fail;
+ }
+ }
+
+ f->chain_cache = ordered_hashmap_new(&uint64_hash_ops);
+ if (!f->chain_cache) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ if (f->fd < 0) {
+ /* We pass O_NONBLOCK here, so that in case somebody pointed us to some character device node or FIFO
+ * or so, we likely fail quickly than block for long. For regular files O_NONBLOCK has no effect, hence
+ * it doesn't hurt in that case. */
+
+ f->fd = openat_report_new(AT_FDCWD, f->path, f->open_flags|O_CLOEXEC|O_NONBLOCK, f->mode, &newly_created);
+ if (f->fd < 0) {
+ r = f->fd;
+ goto fail;
+ }
+
+ /* fds we opened here by us should also be closed by us. */
+ f->close_fd = true;
+
+ r = fd_nonblock(f->fd, false);
+ if (r < 0)
+ goto fail;
+
+ if (!newly_created) {
+ r = journal_file_fstat(f);
+ if (r < 0)
+ goto fail;
+ }
+ } else {
+ r = journal_file_fstat(f);
+ if (r < 0)
+ goto fail;
+
+ /* If we just got the fd passed in, we don't really know if we created the file anew */
+ newly_created = f->last_stat.st_size == 0 && journal_file_writable(f);
+ }
+
+ r = mmap_cache_add_fd(mmap_cache, f->fd, mmap_prot_from_open_flags(open_flags), &f->cache_fd);
+ if (r < 0)
+ goto fail;
+
+ if (newly_created) {
+ (void) journal_file_warn_btrfs(f);
+
+ /* Let's attach the creation time to the journal file, so that the vacuuming code knows the age of this
+ * file even if the file might end up corrupted one day... Ideally we'd just use the creation time many
+ * file systems maintain for each file, but the API to query this is very new, hence let's emulate this
+ * via extended attributes. If extended attributes are not supported we'll just skip this, and rely
+ * solely on mtime/atime/ctime of the file. */
+ (void) fd_setcrtime(f->fd, 0);
+
+ r = journal_file_init_header(f, file_flags, template);
+ if (r < 0)
+ goto fail;
+
+ r = journal_file_fstat(f);
+ if (r < 0)
+ goto fail;
+ }
+
+ if (f->last_stat.st_size < (off_t) HEADER_SIZE_MIN) {
+ r = -ENODATA;
+ goto fail;
+ }
+
+ r = mmap_cache_fd_get(f->cache_fd, MMAP_CACHE_CATEGORY_HEADER, true, 0, PAGE_ALIGN(sizeof(Header)), &f->last_stat, &h);
+ if (r == -EINVAL) {
+ /* Some file systems (jffs2 or p9fs) don't support mmap() properly (or only read-only
+ * mmap()), and return EINVAL in that case. Let's propagate that as a more recognizable error
+ * code. */
+ r = -EAFNOSUPPORT;
+ goto fail;
+ }
+ if (r < 0)
+ goto fail;
+
+ f->header = h;
+
+ if (!newly_created) {
+ r = journal_file_verify_header(f);
+ if (r < 0)
+ goto fail;
+ }
+
+#if HAVE_GCRYPT
+ if (!newly_created && journal_file_writable(f) && JOURNAL_HEADER_SEALED(f->header)) {
+ r = journal_file_fss_load(f);
+ if (r < 0)
+ goto fail;
+ }
+#endif
+
+ if (journal_file_writable(f)) {
+ if (metrics) {
+ journal_default_metrics(metrics, f->fd, JOURNAL_HEADER_COMPACT(f->header));
+ f->metrics = *metrics;
+ } else if (template)
+ f->metrics = template->metrics;
+
+ r = journal_file_refresh_header(f);
+ if (r < 0)
+ goto fail;
+ }
+
+#if HAVE_GCRYPT
+ r = journal_file_hmac_setup(f);
+ if (r < 0)
+ goto fail;
+#endif
+
+ if (newly_created) {
+ r = journal_file_setup_field_hash_table(f);
+ if (r < 0)
+ goto fail;
+
+ r = journal_file_setup_data_hash_table(f);
+ if (r < 0)
+ goto fail;
+
+#if HAVE_GCRYPT
+ r = journal_file_append_first_tag(f);
+ if (r < 0)
+ goto fail;
+#endif
+ }
+
+ if (mmap_cache_fd_got_sigbus(f->cache_fd)) {
+ r = -EIO;
+ goto fail;
+ }
+
+ if (template && template->post_change_timer) {
+ r = journal_file_enable_post_change_timer(
+ f,
+ sd_event_source_get_event(template->post_change_timer),
+ template->post_change_timer_period);
+
+ if (r < 0)
+ goto fail;
+ }
+
+ /* The file is opened now successfully, thus we take possession of any passed in fd. */
+ f->close_fd = true;
+
+ if (DEBUG_LOGGING) {
+ static int last_seal = -1, last_keyed_hash = -1;
+ static Compression last_compression = _COMPRESSION_INVALID;
+ static uint64_t last_bytes = UINT64_MAX;
+
+ if (last_seal != JOURNAL_HEADER_SEALED(f->header) ||
+ last_keyed_hash != JOURNAL_HEADER_KEYED_HASH(f->header) ||
+ last_compression != JOURNAL_FILE_COMPRESSION(f) ||
+ last_bytes != f->compress_threshold_bytes) {
+
+ log_debug("Journal effective settings seal=%s keyed_hash=%s compress=%s compress_threshold_bytes=%s",
+ yes_no(JOURNAL_HEADER_SEALED(f->header)), yes_no(JOURNAL_HEADER_KEYED_HASH(f->header)),
+ compression_to_string(JOURNAL_FILE_COMPRESSION(f)), FORMAT_BYTES(f->compress_threshold_bytes));
+ last_seal = JOURNAL_HEADER_SEALED(f->header);
+ last_keyed_hash = JOURNAL_HEADER_KEYED_HASH(f->header);
+ last_compression = JOURNAL_FILE_COMPRESSION(f);
+ last_bytes = f->compress_threshold_bytes;
+ }
+ }
+
+ *ret = f;
+ return 0;
+
+fail:
+ if (f->cache_fd && mmap_cache_fd_got_sigbus(f->cache_fd))
+ r = -EIO;
+
+ (void) journal_file_close(f);
+
+ if (newly_created && fd < 0)
+ (void) unlink(fname);
+
+ return r;
+}
+
+int journal_file_parse_uid_from_filename(const char *path, uid_t *ret_uid) {
+ _cleanup_free_ char *buf = NULL, *p = NULL;
+ const char *a, *b, *at;
+ int r;
+
+ /* This helper returns -EREMOTE when the filename doesn't match user online/offline journal
+ * pattern. Hence it currently doesn't parse archived or disposed user journals. */
+
+ assert(path);
+ assert(ret_uid);
+
+ r = path_extract_filename(path, &p);
+ if (r < 0)
+ return r;
+ if (r == O_DIRECTORY)
+ return -EISDIR;
+
+ a = startswith(p, "user-");
+ if (!a)
+ return -EREMOTE;
+ b = endswith(p, ".journal");
+ if (!b)
+ return -EREMOTE;
+
+ at = strchr(a, '@');
+ if (at)
+ return -EREMOTE;
+
+ buf = strndup(a, b-a);
+ if (!buf)
+ return -ENOMEM;
+
+ return parse_uid(buf, ret_uid);
+}
+
+int journal_file_archive(JournalFile *f, char **ret_previous_path) {
+ _cleanup_free_ char *p = NULL;
+
+ assert(f);
+
+ if (!journal_file_writable(f))
+ return -EINVAL;
+
+ /* Is this a journal file that was passed to us as fd? If so, we synthesized a path name for it, and we refuse
+ * rotation, since we don't know the actual path, and couldn't rename the file hence. */
+ if (path_startswith(f->path, "/proc/self/fd"))
+ return -EINVAL;
+
+ if (!endswith(f->path, ".journal"))
+ return -EINVAL;
+
+ if (asprintf(&p, "%.*s@" SD_ID128_FORMAT_STR "-%016"PRIx64"-%016"PRIx64".journal",
+ (int) strlen(f->path) - 8, f->path,
+ SD_ID128_FORMAT_VAL(f->header->seqnum_id),
+ le64toh(f->header->head_entry_seqnum),
+ le64toh(f->header->head_entry_realtime)) < 0)
+ return -ENOMEM;
+
+ /* Try to rename the file to the archived version. If the file already was deleted, we'll get ENOENT, let's
+ * ignore that case. */
+ if (rename(f->path, p) < 0 && errno != ENOENT)
+ return -errno;
+
+ /* Sync the rename to disk */
+ (void) fsync_directory_of_file(f->fd);
+
+ if (ret_previous_path)
+ *ret_previous_path = f->path;
+ else
+ free(f->path);
+
+ f->path = TAKE_PTR(p);
+
+ /* Set as archive so offlining commits w/state=STATE_ARCHIVED. Previously we would set old_file->header->state
+ * to STATE_ARCHIVED directly here, but journal_file_set_offline() short-circuits when state != STATE_ONLINE,
+ * which would result in the rotated journal never getting fsync() called before closing. Now we simply queue
+ * the archive state by setting an archive bit, leaving the state as STATE_ONLINE so proper offlining
+ * occurs. */
+ f->archive = true;
+
+ return 0;
+}
+
+int journal_file_dispose(int dir_fd, const char *fname) {
+ _cleanup_free_ char *p = NULL;
+
+ assert(fname);
+
+ /* Renames a journal file to *.journal~, i.e. to mark it as corrupted or otherwise uncleanly shutdown. Note that
+ * this is done without looking into the file or changing any of its contents. The idea is that this is called
+ * whenever something is suspicious and we want to move the file away and make clear that it is not accessed
+ * for writing anymore. */
+
+ if (!endswith(fname, ".journal"))
+ return -EINVAL;
+
+ if (asprintf(&p, "%.*s@%016" PRIx64 "-%016" PRIx64 ".journal~",
+ (int) strlen(fname) - 8, fname,
+ now(CLOCK_REALTIME),
+ random_u64()) < 0)
+ return -ENOMEM;
+
+ if (renameat(dir_fd, fname, dir_fd, p) < 0)
+ return -errno;
+
+ return 0;
+}
+
+int journal_file_copy_entry(
+ JournalFile *from,
+ JournalFile *to,
+ Object *o,
+ uint64_t p,
+ uint64_t *seqnum,
+ sd_id128_t *seqnum_id) {
+
+ _cleanup_free_ EntryItem *items_alloc = NULL;
+ EntryItem *items;
+ uint64_t n, m = 0, xor_hash = 0;
+ sd_id128_t boot_id;
+ dual_timestamp ts;
+ int r;
+
+ assert(from);
+ assert(to);
+ assert(o);
+ assert(p > 0);
+
+ if (!journal_file_writable(to))
+ return -EPERM;
+
+ ts = (dual_timestamp) {
+ .monotonic = le64toh(o->entry.monotonic),
+ .realtime = le64toh(o->entry.realtime),
+ };
+ boot_id = o->entry.boot_id;
+
+ n = journal_file_entry_n_items(from, o);
+ if (n == 0)
+ return 0;
+
+ if (n < ALLOCA_MAX / sizeof(EntryItem) / 2)
+ items = newa(EntryItem, n);
+ else {
+ items_alloc = new(EntryItem, n);
+ if (!items_alloc)
+ return -ENOMEM;
+
+ items = items_alloc;
+ }
+
+ for (uint64_t i = 0; i < n; i++) {
+ uint64_t h, q;
+ void *data;
+ size_t l;
+ Object *u;
+
+ q = journal_file_entry_item_object_offset(from, o, i);
+ r = journal_file_data_payload(from, NULL, q, NULL, 0, 0, &data, &l);
+ if (IN_SET(r, -EADDRNOTAVAIL, -EBADMSG)) {
+ log_debug_errno(r, "Entry item %"PRIu64" data object is bad, skipping over it: %m", i);
+ continue;
+ }
+ if (r < 0)
+ return r;
+ assert(r > 0);
+
+ if (l == 0)
+ return -EBADMSG;
+
+ r = journal_file_append_data(to, data, l, &u, &h);
+ if (r < 0)
+ return r;
+
+ if (JOURNAL_HEADER_KEYED_HASH(to->header))
+ xor_hash ^= jenkins_hash64(data, l);
+ else
+ xor_hash ^= le64toh(u->data.hash);
+
+ items[m++] = (EntryItem) {
+ .object_offset = h,
+ .hash = le64toh(u->data.hash),
+ };
+ }
+
+ if (m == 0)
+ return 0;
+
+ r = journal_file_append_entry_internal(
+ to,
+ &ts,
+ &boot_id,
+ &from->header->machine_id,
+ xor_hash,
+ items,
+ m,
+ seqnum,
+ seqnum_id,
+ /* ret_object= */ NULL,
+ /* ret_offset= */ NULL);
+
+ if (mmap_cache_fd_got_sigbus(to->cache_fd))
+ return -EIO;
+
+ return r;
+}
+
+void journal_reset_metrics(JournalMetrics *m) {
+ assert(m);
+
+ /* Set everything to "pick automatic values". */
+
+ *m = (JournalMetrics) {
+ .min_use = UINT64_MAX,
+ .max_use = UINT64_MAX,
+ .min_size = UINT64_MAX,
+ .max_size = UINT64_MAX,
+ .keep_free = UINT64_MAX,
+ .n_max_files = UINT64_MAX,
+ };
+}
+
+int journal_file_get_cutoff_realtime_usec(JournalFile *f, usec_t *ret_from, usec_t *ret_to) {
+ assert(f);
+ assert(f->header);
+ assert(ret_from || ret_to);
+
+ if (ret_from) {
+ if (f->header->head_entry_realtime == 0)
+ return -ENOENT;
+
+ *ret_from = le64toh(f->header->head_entry_realtime);
+ }
+
+ if (ret_to) {
+ if (f->header->tail_entry_realtime == 0)
+ return -ENOENT;
+
+ *ret_to = le64toh(f->header->tail_entry_realtime);
+ }
+
+ return 1;
+}
+
+int journal_file_get_cutoff_monotonic_usec(JournalFile *f, sd_id128_t boot_id, usec_t *ret_from, usec_t *ret_to) {
+ Object *o;
+ uint64_t p;
+ int r;
+
+ assert(f);
+ assert(ret_from || ret_to);
+
+ /* FIXME: fix return value assignment on success with 0. */
+
+ r = find_data_object_by_boot_id(f, boot_id, &o, &p);
+ if (r <= 0)
+ return r;
+
+ if (le64toh(o->data.n_entries) <= 0)
+ return 0;
+
+ if (ret_from) {
+ r = journal_file_move_to_object(f, OBJECT_ENTRY, le64toh(o->data.entry_offset), &o);
+ if (r < 0)
+ return r;
+
+ *ret_from = le64toh(o->entry.monotonic);
+ }
+
+ if (ret_to) {
+ r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
+ if (r < 0)
+ return r;
+
+ r = journal_file_move_to_entry_for_data(f, o, DIRECTION_UP, &o, NULL);
+ if (r <= 0)
+ return r;
+
+ *ret_to = le64toh(o->entry.monotonic);
+ }
+
+ return 1;
+}
+
+bool journal_file_rotate_suggested(JournalFile *f, usec_t max_file_usec, int log_level) {
+ assert(f);
+ assert(f->header);
+
+ /* If we gained new header fields we gained new features,
+ * hence suggest a rotation */
+ if (le64toh(f->header->header_size) < sizeof(Header)) {
+ log_ratelimit_full(log_level, JOURNAL_LOG_RATELIMIT,
+ "%s uses an outdated header, suggesting rotation.", f->path);
+ return true;
+ }
+
+ /* Let's check if the hash tables grew over a certain fill level (75%, borrowing this value from
+ * Java's hash table implementation), and if so suggest a rotation. To calculate the fill level we
+ * need the n_data field, which only exists in newer versions. */
+
+ if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
+ if (le64toh(f->header->n_data) * 4ULL > (le64toh(f->header->data_hash_table_size) / sizeof(HashItem)) * 3ULL) {
+ log_ratelimit_full(
+ log_level, JOURNAL_LOG_RATELIMIT,
+ "Data hash table of %s has a fill level at %.1f (%"PRIu64" of %"PRIu64" items, %"PRIu64" file size, %"PRIu64" bytes per hash table item), suggesting rotation.",
+ f->path,
+ 100.0 * (double) le64toh(f->header->n_data) / ((double) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem))),
+ le64toh(f->header->n_data),
+ le64toh(f->header->data_hash_table_size) / sizeof(HashItem),
+ (uint64_t) f->last_stat.st_size,
+ f->last_stat.st_size / le64toh(f->header->n_data));
+ return true;
+ }
+
+ if (JOURNAL_HEADER_CONTAINS(f->header, n_fields))
+ if (le64toh(f->header->n_fields) * 4ULL > (le64toh(f->header->field_hash_table_size) / sizeof(HashItem)) * 3ULL) {
+ log_ratelimit_full(
+ log_level, JOURNAL_LOG_RATELIMIT,
+ "Field hash table of %s has a fill level at %.1f (%"PRIu64" of %"PRIu64" items), suggesting rotation.",
+ f->path,
+ 100.0 * (double) le64toh(f->header->n_fields) / ((double) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem))),
+ le64toh(f->header->n_fields),
+ le64toh(f->header->field_hash_table_size) / sizeof(HashItem));
+ return true;
+ }
+
+ /* If there are too many hash collisions somebody is most likely playing games with us. Hence, if our
+ * longest chain is longer than some threshold, let's suggest rotation. */
+ if (JOURNAL_HEADER_CONTAINS(f->header, data_hash_chain_depth) &&
+ le64toh(f->header->data_hash_chain_depth) > HASH_CHAIN_DEPTH_MAX) {
+ log_ratelimit_full(
+ log_level, JOURNAL_LOG_RATELIMIT,
+ "Data hash table of %s has deepest hash chain of length %" PRIu64 ", suggesting rotation.",
+ f->path, le64toh(f->header->data_hash_chain_depth));
+ return true;
+ }
+
+ if (JOURNAL_HEADER_CONTAINS(f->header, field_hash_chain_depth) &&
+ le64toh(f->header->field_hash_chain_depth) > HASH_CHAIN_DEPTH_MAX) {
+ log_ratelimit_full(
+ log_level, JOURNAL_LOG_RATELIMIT,
+ "Field hash table of %s has deepest hash chain of length at %" PRIu64 ", suggesting rotation.",
+ f->path, le64toh(f->header->field_hash_chain_depth));
+ return true;
+ }
+
+ /* Are the data objects properly indexed by field objects? */
+ if (JOURNAL_HEADER_CONTAINS(f->header, n_data) &&
+ JOURNAL_HEADER_CONTAINS(f->header, n_fields) &&
+ le64toh(f->header->n_data) > 0 &&
+ le64toh(f->header->n_fields) == 0) {
+ log_ratelimit_full(
+ log_level, JOURNAL_LOG_RATELIMIT,
+ "Data objects of %s are not indexed by field objects, suggesting rotation.",
+ f->path);
+ return true;
+ }
+
+ if (max_file_usec > 0) {
+ usec_t t, h;
+
+ h = le64toh(f->header->head_entry_realtime);
+ t = now(CLOCK_REALTIME);
+
+ if (h > 0 && t > h + max_file_usec) {
+ log_ratelimit_full(
+ log_level, JOURNAL_LOG_RATELIMIT,
+ "Oldest entry in %s is older than the configured file retention duration (%s), suggesting rotation.",
+ f->path, FORMAT_TIMESPAN(max_file_usec, USEC_PER_SEC));
+ return true;
+ }
+ }
+
+ return false;
+}
+
+static const char * const journal_object_type_table[] = {
+ [OBJECT_UNUSED] = "unused",
+ [OBJECT_DATA] = "data",
+ [OBJECT_FIELD] = "field",
+ [OBJECT_ENTRY] = "entry",
+ [OBJECT_DATA_HASH_TABLE] = "data hash table",
+ [OBJECT_FIELD_HASH_TABLE] = "field hash table",
+ [OBJECT_ENTRY_ARRAY] = "entry array",
+ [OBJECT_TAG] = "tag",
+};
+
+DEFINE_STRING_TABLE_LOOKUP_TO_STRING(journal_object_type, ObjectType);
diff --git a/src/libsystemd/sd-journal/journal-file.h b/src/libsystemd/sd-journal/journal-file.h
new file mode 100644
index 0000000..81fafb9
--- /dev/null
+++ b/src/libsystemd/sd-journal/journal-file.h
@@ -0,0 +1,393 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <fcntl.h>
+#include <inttypes.h>
+#include <sys/uio.h>
+
+#if HAVE_GCRYPT
+# include <gcrypt.h>
+#endif
+
+#include "sd-event.h"
+#include "sd-id128.h"
+
+#include "compress.h"
+#include "hashmap.h"
+#include "journal-def.h"
+#include "mmap-cache.h"
+#include "sparse-endian.h"
+#include "time-util.h"
+
+typedef struct JournalMetrics {
+ /* For all these: UINT64_MAX means "pick automatically", and 0 means "no limit enforced" */
+ uint64_t max_size; /* how large journal files grow at max */
+ uint64_t min_size; /* how large journal files grow at least */
+ uint64_t max_use; /* how much disk space to use in total at max, keep_free permitting */
+ uint64_t min_use; /* how much disk space to use in total at least, even if keep_free says not to */
+ uint64_t keep_free; /* how much to keep free on disk */
+ uint64_t n_max_files; /* how many files to keep around at max */
+} JournalMetrics;
+
+typedef enum direction {
+ DIRECTION_UP,
+ DIRECTION_DOWN,
+ _DIRECTION_INVALID = -EINVAL,
+} direction_t;
+
+typedef enum LocationType {
+ /* The first and last entries, resp. */
+ LOCATION_HEAD,
+ LOCATION_TAIL,
+
+ /* We already read the entry we currently point to, and the
+ * next one to read should probably not be this one again. */
+ LOCATION_DISCRETE,
+
+ /* We should seek to the precise location specified, and
+ * return it, as we haven't read it yet. */
+ LOCATION_SEEK
+} LocationType;
+
+typedef enum OfflineState {
+ OFFLINE_JOINED,
+ OFFLINE_SYNCING,
+ OFFLINE_OFFLINING,
+ OFFLINE_CANCEL,
+ OFFLINE_AGAIN_FROM_SYNCING,
+ OFFLINE_AGAIN_FROM_OFFLINING,
+ OFFLINE_DONE
+} OfflineState;
+
+typedef struct JournalFile {
+ int fd;
+ MMapFileDescriptor *cache_fd;
+
+ mode_t mode;
+
+ int open_flags;
+ bool close_fd:1;
+ bool archive:1;
+ bool strict_order:1;
+
+ direction_t last_direction;
+ LocationType location_type;
+ uint64_t last_n_entries;
+
+ char *path;
+ struct stat last_stat;
+ usec_t last_stat_usec;
+
+ Header *header;
+ HashItem *data_hash_table;
+ HashItem *field_hash_table;
+
+ uint64_t current_offset;
+ uint64_t current_seqnum;
+ uint64_t current_realtime;
+ uint64_t current_monotonic;
+ sd_id128_t current_boot_id;
+ uint64_t current_xor_hash;
+
+ JournalMetrics metrics;
+
+ sd_event_source *post_change_timer;
+ usec_t post_change_timer_period;
+
+ OrderedHashmap *chain_cache;
+
+ pthread_t offline_thread;
+ volatile OfflineState offline_state;
+
+ unsigned last_seen_generation;
+
+ uint64_t compress_threshold_bytes;
+#if HAVE_COMPRESSION
+ void *compress_buffer;
+#endif
+
+#if HAVE_GCRYPT
+ gcry_md_hd_t hmac;
+ bool hmac_running;
+
+ FSSHeader *fss_file;
+ size_t fss_file_size;
+
+ uint64_t fss_start_usec;
+ uint64_t fss_interval_usec;
+
+ void *fsprg_state;
+ size_t fsprg_state_size;
+
+ void *fsprg_seed;
+ size_t fsprg_seed_size;
+#endif
+
+ /* When we insert this file into the per-boot priority queue 'newest_by_boot_id' in sd_journal, then by these keys */
+ sd_id128_t newest_boot_id;
+ sd_id128_t newest_machine_id;
+ uint64_t newest_monotonic_usec;
+ uint64_t newest_realtime_usec;
+ unsigned newest_boot_id_prioq_idx;
+ usec_t newest_mtime;
+} JournalFile;
+
+typedef enum JournalFileFlags {
+ JOURNAL_COMPRESS = 1 << 0,
+ JOURNAL_SEAL = 1 << 1,
+ JOURNAL_STRICT_ORDER = 1 << 2,
+ _JOURNAL_FILE_FLAGS_MAX = JOURNAL_COMPRESS|JOURNAL_SEAL|JOURNAL_STRICT_ORDER,
+} JournalFileFlags;
+
+typedef struct {
+ uint64_t object_offset;
+ uint64_t hash;
+} EntryItem;
+
+int journal_file_open(
+ int fd,
+ const char *fname,
+ int open_flags,
+ JournalFileFlags file_flags,
+ mode_t mode,
+ uint64_t compress_threshold_bytes,
+ JournalMetrics *metrics,
+ MMapCache *mmap_cache,
+ JournalFile *template,
+ JournalFile **ret);
+
+int journal_file_set_offline_thread_join(JournalFile *f);
+JournalFile* journal_file_close(JournalFile *j);
+int journal_file_fstat(JournalFile *f);
+DEFINE_TRIVIAL_CLEANUP_FUNC(JournalFile*, journal_file_close);
+
+#define ALIGN64(x) (((x) + 7ULL) & ~7ULL)
+#define VALID64(x) (((x) & 7ULL) == 0ULL)
+
+/* Use six characters to cover the offsets common in smallish journal
+ * files without adding too many zeros. */
+#define OFSfmt "%06"PRIx64
+
+static inline bool VALID_REALTIME(uint64_t u) {
+ /* This considers timestamps until the year 3112 valid. That should be plenty room... */
+ return u > 0 && u < (1ULL << 55);
+}
+
+static inline bool VALID_MONOTONIC(uint64_t u) {
+ /* This considers timestamps until 1142 years of runtime valid. */
+ return u < (1ULL << 55);
+}
+
+static inline bool VALID_EPOCH(uint64_t u) {
+ /* This allows changing the key for 1142 years, every usec. */
+ return u < (1ULL << 55);
+}
+
+#define JOURNAL_HEADER_CONTAINS(h, field) \
+ (le64toh((h)->header_size) >= offsetof(Header, field) + sizeof((h)->field))
+
+#define JOURNAL_HEADER_SEALED(h) \
+ FLAGS_SET(le32toh((h)->compatible_flags), HEADER_COMPATIBLE_SEALED)
+
+#define JOURNAL_HEADER_SEALED_CONTINUOUS(h) \
+ FLAGS_SET(le32toh((h)->compatible_flags), HEADER_COMPATIBLE_SEALED_CONTINUOUS)
+
+#define JOURNAL_HEADER_TAIL_ENTRY_BOOT_ID(h) \
+ FLAGS_SET(le32toh((h)->compatible_flags), HEADER_COMPATIBLE_TAIL_ENTRY_BOOT_ID)
+
+#define JOURNAL_HEADER_COMPRESSED_XZ(h) \
+ FLAGS_SET(le32toh((h)->incompatible_flags), HEADER_INCOMPATIBLE_COMPRESSED_XZ)
+
+#define JOURNAL_HEADER_COMPRESSED_LZ4(h) \
+ FLAGS_SET(le32toh((h)->incompatible_flags), HEADER_INCOMPATIBLE_COMPRESSED_LZ4)
+
+#define JOURNAL_HEADER_COMPRESSED_ZSTD(h) \
+ FLAGS_SET(le32toh((h)->incompatible_flags), HEADER_INCOMPATIBLE_COMPRESSED_ZSTD)
+
+#define JOURNAL_HEADER_KEYED_HASH(h) \
+ FLAGS_SET(le32toh((h)->incompatible_flags), HEADER_INCOMPATIBLE_KEYED_HASH)
+
+#define JOURNAL_HEADER_COMPACT(h) \
+ FLAGS_SET(le32toh((h)->incompatible_flags), HEADER_INCOMPATIBLE_COMPACT)
+
+int journal_file_move_to_object(JournalFile *f, ObjectType type, uint64_t offset, Object **ret);
+int journal_file_pin_object(JournalFile *f, Object *o);
+int journal_file_read_object_header(JournalFile *f, ObjectType type, uint64_t offset, Object *ret);
+
+int journal_file_tail_end_by_pread(JournalFile *f, uint64_t *ret_offset);
+int journal_file_tail_end_by_mmap(JournalFile *f, uint64_t *ret_offset);
+
+static inline uint64_t journal_file_entry_item_object_offset(JournalFile *f, Object *o, size_t i) {
+ assert(f);
+ assert(o);
+ return JOURNAL_HEADER_COMPACT(f->header) ? le32toh(o->entry.items.compact[i].object_offset) :
+ le64toh(o->entry.items.regular[i].object_offset);
+}
+
+static inline size_t journal_file_entry_item_size(JournalFile *f) {
+ assert(f);
+ return JOURNAL_HEADER_COMPACT(f->header) ? sizeof_field(Object, entry.items.compact[0]) :
+ sizeof_field(Object, entry.items.regular[0]);
+}
+
+uint64_t journal_file_entry_n_items(JournalFile *f, Object *o) _pure_;
+
+int journal_file_data_payload(
+ JournalFile *f,
+ Object *o,
+ uint64_t offset,
+ const char *field,
+ size_t field_length,
+ size_t data_threshold,
+ void **ret_data,
+ size_t *ret_size);
+
+static inline size_t journal_file_data_payload_offset(JournalFile *f) {
+ return JOURNAL_HEADER_COMPACT(f->header)
+ ? offsetof(Object, data.compact.payload)
+ : offsetof(Object, data.regular.payload);
+}
+
+static inline uint8_t* journal_file_data_payload_field(JournalFile *f, Object *o) {
+ return JOURNAL_HEADER_COMPACT(f->header) ? o->data.compact.payload : o->data.regular.payload;
+}
+
+uint64_t journal_file_entry_array_n_items(JournalFile *f, Object *o) _pure_;
+
+static inline uint64_t journal_file_entry_array_item(JournalFile *f, Object *o, size_t i) {
+ assert(f);
+ assert(o);
+ return JOURNAL_HEADER_COMPACT(f->header) ? le32toh(o->entry_array.items.compact[i]) :
+ le64toh(o->entry_array.items.regular[i]);
+}
+
+static inline size_t journal_file_entry_array_item_size(JournalFile *f) {
+ assert(f);
+ return JOURNAL_HEADER_COMPACT(f->header) ? sizeof(le32_t) : sizeof(le64_t);
+}
+
+uint64_t journal_file_hash_table_n_items(Object *o) _pure_;
+
+int journal_file_append_object(JournalFile *f, ObjectType type, uint64_t size, Object **ret_object, uint64_t *ret_offset);
+int journal_file_append_entry(
+ JournalFile *f,
+ const dual_timestamp *ts,
+ const sd_id128_t *boot_id,
+ const struct iovec iovec[],
+ size_t n_iovec,
+ uint64_t *seqnum,
+ sd_id128_t *seqnum_id,
+ Object **ret_object,
+ uint64_t *ret_offset);
+
+int journal_file_find_data_object(JournalFile *f, const void *data, uint64_t size, Object **ret_object, uint64_t *ret_offset);
+int journal_file_find_data_object_with_hash(JournalFile *f, const void *data, uint64_t size, uint64_t hash, Object **ret_object, uint64_t *ret_offset);
+
+int journal_file_find_field_object(JournalFile *f, const void *field, uint64_t size, Object **ret_object, uint64_t *ret_offset);
+int journal_file_find_field_object_with_hash(JournalFile *f, const void *field, uint64_t size, uint64_t hash, Object **ret_object, uint64_t *ret_offset);
+
+void journal_file_reset_location(JournalFile *f);
+void journal_file_save_location(JournalFile *f, Object *o, uint64_t offset);
+int journal_file_next_entry(JournalFile *f, uint64_t p, direction_t direction, Object **ret_object, uint64_t *ret_offset);
+
+int journal_file_move_to_entry_by_offset(JournalFile *f, uint64_t p, direction_t direction, Object **ret_object, uint64_t *ret_offset);
+int journal_file_move_to_entry_by_seqnum(JournalFile *f, uint64_t seqnum, direction_t direction, Object **ret_object, uint64_t *ret_offset);
+int journal_file_move_to_entry_by_realtime(JournalFile *f, uint64_t realtime, direction_t direction, Object **ret_object, uint64_t *ret_offset);
+int journal_file_move_to_entry_by_monotonic(JournalFile *f, sd_id128_t boot_id, uint64_t monotonic, direction_t direction, Object **ret_object, uint64_t *ret_offset);
+
+int journal_file_move_to_entry_for_data(JournalFile *f, Object *d, direction_t direction, Object **ret_object, uint64_t *ret_offset);
+
+int journal_file_move_to_entry_by_offset_for_data(JournalFile *f, Object *d, uint64_t p, direction_t direction, Object **ret_object, uint64_t *ret_offset);
+int journal_file_move_to_entry_by_seqnum_for_data(JournalFile *f, Object *d, uint64_t seqnum, direction_t direction, Object **ret_object, uint64_t *ret_offset);
+int journal_file_move_to_entry_by_realtime_for_data(JournalFile *f, Object *d, uint64_t realtime, direction_t direction, Object **ret_object, uint64_t *ret_offset);
+int journal_file_move_to_entry_by_monotonic_for_data(JournalFile *f, Object *d, sd_id128_t boot_id, uint64_t monotonic, direction_t direction, Object **ret_object, uint64_t *ret_offset);
+
+int journal_file_copy_entry(JournalFile *from, JournalFile *to, Object *o, uint64_t p, uint64_t *seqnum, sd_id128_t *seqnum_id);
+
+void journal_file_dump(JournalFile *f);
+void journal_file_print_header(JournalFile *f);
+
+int journal_file_archive(JournalFile *f, char **ret_previous_path);
+int journal_file_parse_uid_from_filename(const char *path, uid_t *uid);
+JournalFile* journal_initiate_close(JournalFile *f, Set *deferred_closes);
+
+int journal_file_dispose(int dir_fd, const char *fname);
+
+void journal_file_post_change(JournalFile *f);
+int journal_file_enable_post_change_timer(JournalFile *f, sd_event *e, usec_t t);
+
+void journal_reset_metrics(JournalMetrics *m);
+
+int journal_file_get_cutoff_realtime_usec(JournalFile *f, usec_t *ret_from, usec_t *ret_to);
+int journal_file_get_cutoff_monotonic_usec(JournalFile *f, sd_id128_t boot, usec_t *ret_from, usec_t *ret_to);
+
+bool journal_file_rotate_suggested(JournalFile *f, usec_t max_file_usec, int log_level);
+
+int journal_file_map_data_hash_table(JournalFile *f);
+int journal_file_map_field_hash_table(JournalFile *f);
+
+static inline Compression JOURNAL_FILE_COMPRESSION(JournalFile *f) {
+ assert(f);
+
+ if (JOURNAL_HEADER_COMPRESSED_XZ(f->header))
+ return COMPRESSION_XZ;
+ if (JOURNAL_HEADER_COMPRESSED_LZ4(f->header))
+ return COMPRESSION_LZ4;
+ if (JOURNAL_HEADER_COMPRESSED_ZSTD(f->header))
+ return COMPRESSION_ZSTD;
+ return COMPRESSION_NONE;
+}
+
+uint64_t journal_file_hash_data(JournalFile *f, const void *data, size_t sz);
+
+bool journal_field_valid(const char *p, size_t l, bool allow_protected);
+
+const char* journal_object_type_to_string(ObjectType type) _const_;
+
+static inline Compression COMPRESSION_FROM_OBJECT(const Object *o) {
+ assert(o);
+
+ switch (o->object.flags & _OBJECT_COMPRESSED_MASK) {
+ case 0:
+ return COMPRESSION_NONE;
+ case OBJECT_COMPRESSED_XZ:
+ return COMPRESSION_XZ;
+ case OBJECT_COMPRESSED_LZ4:
+ return COMPRESSION_LZ4;
+ case OBJECT_COMPRESSED_ZSTD:
+ return COMPRESSION_ZSTD;
+ default:
+ return _COMPRESSION_INVALID;
+ }
+}
+
+static inline uint8_t COMPRESSION_TO_OBJECT_FLAG(Compression c) {
+ switch (c) {
+ case COMPRESSION_XZ:
+ return OBJECT_COMPRESSED_XZ;
+ case COMPRESSION_LZ4:
+ return OBJECT_COMPRESSED_LZ4;
+ case COMPRESSION_ZSTD:
+ return OBJECT_COMPRESSED_ZSTD;
+ default:
+ return 0;
+ }
+}
+
+static inline uint32_t COMPRESSION_TO_HEADER_INCOMPATIBLE_FLAG(Compression c) {
+ switch (c) {
+ case COMPRESSION_XZ:
+ return HEADER_INCOMPATIBLE_COMPRESSED_XZ;
+ case COMPRESSION_LZ4:
+ return HEADER_INCOMPATIBLE_COMPRESSED_LZ4;
+ case COMPRESSION_ZSTD:
+ return HEADER_INCOMPATIBLE_COMPRESSED_ZSTD;
+ default:
+ return 0;
+ }
+}
+
+static inline bool journal_file_writable(JournalFile *f) {
+ assert(f);
+ return (f->open_flags & O_ACCMODE) != O_RDONLY;
+}
diff --git a/src/libsystemd/sd-journal/journal-internal.h b/src/libsystemd/sd-journal/journal-internal.h
new file mode 100644
index 0000000..259aac8
--- /dev/null
+++ b/src/libsystemd/sd-journal/journal-internal.h
@@ -0,0 +1,142 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <inttypes.h>
+#include <stdbool.h>
+#include <sys/types.h>
+
+#include "sd-id128.h"
+#include "sd-journal.h"
+
+#include "hashmap.h"
+#include "journal-def.h"
+#include "journal-file.h"
+#include "list.h"
+#include "set.h"
+
+#define JOURNAL_FILES_MAX 7168u
+
+#define JOURNAL_LOG_RATELIMIT ((const RateLimit) { .interval = 60 * USEC_PER_SEC, .burst = 3 })
+
+typedef struct Match Match;
+typedef struct Location Location;
+typedef struct Directory Directory;
+
+typedef enum MatchType {
+ MATCH_DISCRETE,
+ MATCH_OR_TERM,
+ MATCH_AND_TERM
+} MatchType;
+
+struct Match {
+ MatchType type;
+ Match *parent;
+ LIST_FIELDS(Match, matches);
+
+ /* For concrete matches */
+ char *data;
+ size_t size;
+ uint64_t hash; /* old-style jenkins hash. New-style siphash is different per file, hence won't be cached here */
+
+ /* For terms */
+ LIST_HEAD(Match, matches);
+};
+
+struct Location {
+ LocationType type;
+
+ bool seqnum_set:1;
+ bool realtime_set:1;
+ bool monotonic_set:1;
+ bool xor_hash_set:1;
+
+ uint64_t seqnum;
+ sd_id128_t seqnum_id;
+
+ uint64_t realtime;
+
+ uint64_t monotonic;
+ sd_id128_t boot_id;
+
+ uint64_t xor_hash;
+};
+
+struct Directory {
+ char *path;
+ int wd;
+ bool is_root;
+ unsigned last_seen_generation;
+};
+
+struct sd_journal {
+ int toplevel_fd;
+
+ char *path;
+ char *prefix;
+ char *namespace;
+
+ OrderedHashmap *files;
+ IteratedCache *files_cache;
+ MMapCache *mmap;
+ Hashmap *newest_by_boot_id; /* key: boot_id, value: prioq, ordered by monotonic timestamp of last update */
+
+ Location current_location;
+
+ JournalFile *current_file;
+ uint64_t current_field;
+
+ Match *level0, *level1, *level2;
+
+ uint64_t origin_id;
+
+ int inotify_fd;
+ unsigned current_invalidate_counter, last_invalidate_counter;
+ usec_t last_process_usec;
+ unsigned generation;
+
+ /* Iterating through unique fields and their data values */
+ char *unique_field;
+ JournalFile *unique_file;
+ uint64_t unique_offset;
+
+ /* Iterating through known fields */
+ JournalFile *fields_file;
+ uint64_t fields_offset;
+ uint64_t fields_hash_table_index;
+ char *fields_buffer;
+
+ int flags;
+
+ bool on_network:1;
+ bool no_new_files:1;
+ bool no_inotify:1;
+ bool unique_file_lost:1; /* File we were iterating over got
+ removed, and there were no more
+ files, so sd_j_enumerate_unique
+ will return a value equal to 0. */
+ bool fields_file_lost:1;
+ bool has_runtime_files:1;
+ bool has_persistent_files:1;
+
+ size_t data_threshold;
+
+ Hashmap *directories_by_path;
+ Hashmap *directories_by_wd;
+
+ Hashmap *errors;
+};
+
+char *journal_make_match_string(sd_journal *j);
+void journal_print_header(sd_journal *j);
+
+#define JOURNAL_FOREACH_DATA_RETVAL(j, data, l, retval) \
+ for (sd_journal_restart_data(j); ((retval) = sd_journal_enumerate_data((j), &(data), &(l))) > 0; )
+
+/* All errors that we might encounter while extracting a field that are not real errors,
+ * but only mean that the field is too large or we don't support the compression. */
+static inline bool JOURNAL_ERRNO_IS_UNAVAILABLE_FIELD(int r) {
+ return IN_SET(abs(r),
+ ENOBUFS, /* Field or decompressed field too large */
+ E2BIG, /* Field too large for pointer width */
+ EPROTONOSUPPORT); /* Unsupported compression */
+}
diff --git a/src/libsystemd/sd-journal/journal-send.c b/src/libsystemd/sd-journal/journal-send.c
new file mode 100644
index 0000000..be23b2f
--- /dev/null
+++ b/src/libsystemd/sd-journal/journal-send.c
@@ -0,0 +1,576 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <printf.h>
+#include <stddef.h>
+#include <sys/un.h>
+#include <unistd.h>
+#if HAVE_VALGRIND_VALGRIND_H
+# include <valgrind/valgrind.h>
+#endif
+
+#define SD_JOURNAL_SUPPRESS_LOCATION
+
+#include "sd-journal.h"
+
+#include "alloc-util.h"
+#include "errno-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "io-util.h"
+#include "iovec-util.h"
+#include "journal-send.h"
+#include "memfd-util.h"
+#include "missing_syscall.h"
+#include "process-util.h"
+#include "socket-util.h"
+#include "stdio-util.h"
+#include "string-util.h"
+#include "tmpfile-util.h"
+
+#define SNDBUF_SIZE (8*1024*1024)
+
+#define ALLOCA_CODE_FUNC(f, func) \
+ do { \
+ size_t _fl; \
+ const char *_func = (func); \
+ char **_f = &(f); \
+ _fl = strlen(_func) + 1; \
+ *_f = newa(char, _fl + 10); \
+ memcpy(*_f, "CODE_FUNC=", 10); \
+ memcpy(*_f + 10, _func, _fl); \
+ } while (false)
+
+/* We open a single fd, and we'll share it with the current process,
+ * all its threads, and all its subprocesses. This means we need to
+ * initialize it atomically, and need to operate on it atomically
+ * never assuming we are the only user */
+static int fd_plus_one = 0;
+
+static int journal_fd(void) {
+ int fd;
+
+retry:
+ if (fd_plus_one > 0)
+ return fd_plus_one - 1;
+
+ fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0);
+ if (fd < 0)
+ return -errno;
+
+ fd_inc_sndbuf(fd, SNDBUF_SIZE);
+
+ if (!__atomic_compare_exchange_n(&fd_plus_one, &(int){0}, fd+1,
+ false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) {
+ safe_close(fd);
+ goto retry;
+ }
+
+ return fd;
+}
+
+int journal_fd_nonblock(bool nonblock) {
+ int r;
+
+ r = journal_fd();
+ if (r < 0)
+ return r;
+
+ return fd_nonblock(r, nonblock);
+}
+
+void close_journal_fd(void) {
+#if HAVE_VALGRIND_VALGRIND_H
+ /* Be nice to valgrind. This is not atomic, so it is useful mainly for debugging. */
+
+ if (!RUNNING_ON_VALGRIND)
+ return;
+
+ if (getpid_cached() != gettid())
+ return;
+
+ if (fd_plus_one <= 0)
+ return;
+
+ safe_close(fd_plus_one - 1);
+ fd_plus_one = 0;
+#endif
+}
+
+_public_ int sd_journal_print(int priority, const char *format, ...) {
+ int r;
+ va_list ap;
+
+ va_start(ap, format);
+ r = sd_journal_printv(priority, format, ap);
+ va_end(ap);
+
+ return r;
+}
+
+_public_ int sd_journal_printv(int priority, const char *format, va_list ap) {
+ char p[STRLEN("PRIORITY=") + DECIMAL_STR_MAX(int) + 1];
+ char sbuf[LINE_MAX + 8] = "MESSAGE=";
+ struct iovec iov[2];
+ int len;
+ va_list aq;
+ char *buffer = sbuf;
+
+ assert_return(priority >= 0, -EINVAL);
+ assert_return(priority <= 7, -EINVAL);
+ assert_return(format, -EINVAL);
+
+ xsprintf(p, "PRIORITY=%i", priority & LOG_PRIMASK);
+
+ va_copy(aq, ap);
+ len = vsnprintf(buffer + 8, LINE_MAX, format, aq);
+ va_end(aq);
+
+ if (len >= (int)LONG_LINE_MAX - 8)
+ return -ENOBUFS;
+
+ /* Allocate large buffer to accommodate big message */
+ if (len >= LINE_MAX) {
+ buffer = alloca_safe(len + 9);
+ memcpy(buffer, "MESSAGE=", 8);
+ assert_se(vsnprintf(buffer + 8, len + 1, format, ap) == len);
+ }
+
+ /* Strip trailing whitespace, keep prefix whitespace. */
+ (void) strstrip(buffer);
+
+ /* Suppress empty lines */
+ if (isempty(buffer + 8))
+ return 0;
+
+ iov[0] = IOVEC_MAKE_STRING(buffer);
+ iov[1] = IOVEC_MAKE_STRING(p);
+
+ return sd_journal_sendv(iov, 2);
+}
+
+_printf_(1, 0) static int fill_iovec_sprintf(
+ const char *format,
+ va_list ap,
+ size_t extra,
+ struct iovec **ret_iov,
+ size_t *ret_n_iov) {
+
+ PROTECT_ERRNO;
+ struct iovec *iov = NULL;
+ size_t n = 0;
+
+ assert(ret_iov);
+ assert(ret_n_iov);
+
+ if (extra > 0) {
+ if (!GREEDY_REALLOC0(iov, extra))
+ return -ENOMEM;
+
+ n = extra;
+ }
+
+ CLEANUP_ARRAY(iov, n, iovec_array_free);
+
+ while (format) {
+ _cleanup_free_ char *buffer = NULL;
+ va_list aq;
+
+ va_copy(aq, ap);
+ if (vasprintf(&buffer, format, aq) < 0) {
+ va_end(aq);
+ return -ENOMEM;
+ }
+ va_end(aq);
+
+ VA_FORMAT_ADVANCE(format, ap);
+ format = va_arg(ap, char *);
+
+ if (!GREEDY_REALLOC(iov, n + 1))
+ return -ENOMEM;
+
+ /* strip trailing whitespace, keep prefixing whitespace */
+ iov[n++] = IOVEC_MAKE_STRING(delete_trailing_chars(TAKE_PTR(buffer), NULL));
+ }
+
+ *ret_iov = TAKE_PTR(iov);
+ *ret_n_iov = n;
+ return 0;
+}
+
+_public_ int sd_journal_send(const char *format, ...) {
+ struct iovec *iov = NULL;
+ size_t n_iov = 0;
+ va_list ap;
+ int r;
+
+ CLEANUP_ARRAY(iov, n_iov, iovec_array_free);
+
+ va_start(ap, format);
+ r = fill_iovec_sprintf(format, ap, 0, &iov, &n_iov);
+ va_end(ap);
+ if (r < 0)
+ return r;
+
+ return sd_journal_sendv(iov, n_iov);
+}
+
+_public_ int sd_journal_sendv(const struct iovec *iov, int n) {
+ PROTECT_ERRNO;
+ int fd, r;
+ _cleanup_close_ int buffer_fd = -EBADF;
+ struct iovec *w;
+ uint64_t *l;
+ int i, j = 0;
+ static const union sockaddr_union sa = {
+ .un.sun_family = AF_UNIX,
+ .un.sun_path = "/run/systemd/journal/socket",
+ };
+ struct msghdr mh = {
+ .msg_name = (struct sockaddr*) &sa.sa,
+ .msg_namelen = SOCKADDR_UN_LEN(sa.un),
+ };
+ ssize_t k;
+ bool have_syslog_identifier = false;
+ bool seal = true;
+
+ assert_return(iov, -EINVAL);
+ assert_return(n > 0, -EINVAL);
+
+ w = newa(struct iovec, n * 5 + 3);
+ l = newa(uint64_t, n);
+
+ for (i = 0; i < n; i++) {
+ char *c, *nl;
+
+ if (_unlikely_(!iov[i].iov_base || iov[i].iov_len <= 1))
+ return -EINVAL;
+
+ c = memchr(iov[i].iov_base, '=', iov[i].iov_len);
+ if (_unlikely_(!c || c == iov[i].iov_base))
+ return -EINVAL;
+
+ have_syslog_identifier = have_syslog_identifier ||
+ (c == (char *) iov[i].iov_base + 17 &&
+ startswith(iov[i].iov_base, "SYSLOG_IDENTIFIER"));
+
+ nl = memchr(iov[i].iov_base, '\n', iov[i].iov_len);
+ if (nl) {
+ if (_unlikely_(nl < c))
+ return -EINVAL;
+
+ /* Already includes a newline? Bummer, then
+ * let's write the variable name, then a
+ * newline, then the size (64-bit LE), followed
+ * by the data and a final newline */
+
+ w[j++] = IOVEC_MAKE(iov[i].iov_base, c - (char*) iov[i].iov_base);
+ w[j++] = IOVEC_MAKE_STRING("\n");
+
+ l[i] = htole64(iov[i].iov_len - (c - (char*) iov[i].iov_base) - 1);
+ w[j++] = IOVEC_MAKE(&l[i], sizeof(uint64_t));
+
+ w[j++] = IOVEC_MAKE(c + 1, iov[i].iov_len - (c - (char*) iov[i].iov_base) - 1);
+ } else
+ /* Nothing special? Then just add the line and
+ * append a newline */
+ w[j++] = iov[i];
+
+ w[j++] = IOVEC_MAKE_STRING("\n");
+ }
+
+ if (!have_syslog_identifier &&
+ string_is_safe(program_invocation_short_name)) {
+
+ /* Implicitly add program_invocation_short_name, if it
+ * is not set explicitly. We only do this for
+ * program_invocation_short_name, and nothing else
+ * since everything else is much nicer to retrieve
+ * from the outside. */
+
+ w[j++] = IOVEC_MAKE_STRING("SYSLOG_IDENTIFIER=");
+ w[j++] = IOVEC_MAKE_STRING(program_invocation_short_name);
+ w[j++] = IOVEC_MAKE_STRING("\n");
+ }
+
+ fd = journal_fd();
+ if (_unlikely_(fd < 0))
+ return fd;
+
+ mh.msg_iov = w;
+ mh.msg_iovlen = j;
+
+ k = sendmsg(fd, &mh, MSG_NOSIGNAL);
+ if (k >= 0)
+ return 0;
+
+ /* Fail silently if the journal is not available */
+ if (errno == ENOENT)
+ return 0;
+
+ if (!IN_SET(errno, EMSGSIZE, ENOBUFS, EAGAIN))
+ return -errno;
+
+ /* Message doesn't fit... Let's dump the data in a memfd or
+ * temporary file and just pass a file descriptor of it to the
+ * other side.
+ *
+ * For the temporary files we use /dev/shm instead of /tmp
+ * here, since we want this to be a tmpfs, and one that is
+ * available from early boot on and where unprivileged users
+ * can create files. */
+ buffer_fd = memfd_new(NULL);
+ if (buffer_fd < 0) {
+ if (buffer_fd == -ENOSYS) {
+ buffer_fd = open_tmpfile_unlinkable("/dev/shm", O_RDWR | O_CLOEXEC);
+ if (buffer_fd < 0)
+ return buffer_fd;
+
+ seal = false;
+ } else
+ return buffer_fd;
+ }
+
+ n = writev(buffer_fd, w, j);
+ if (n < 0)
+ return -errno;
+
+ if (seal) {
+ r = memfd_set_sealed(buffer_fd);
+ if (r < 0)
+ return r;
+ }
+
+ r = send_one_fd_sa(fd, buffer_fd, mh.msg_name, mh.msg_namelen, 0);
+ if (r == -ENOENT)
+ /* Fail silently if the journal is not available */
+ return 0;
+ return r;
+}
+
+static int fill_iovec_perror_and_send(const char *message, int skip, struct iovec iov[]) {
+ PROTECT_ERRNO;
+ size_t n, k;
+
+ k = isempty(message) ? 0 : strlen(message) + 2;
+ n = 8 + k + 256 + 1;
+
+ for (;;) {
+ char buffer[n];
+ char* j;
+
+ errno = 0;
+ j = strerror_r(_saved_errno_, buffer + 8 + k, n - 8 - k);
+ if (errno == 0) {
+ char error[STRLEN("ERRNO=") + DECIMAL_STR_MAX(int) + 1];
+
+ if (j != buffer + 8 + k)
+ memmove(buffer + 8 + k, j, strlen(j)+1);
+
+ memcpy(buffer, "MESSAGE=", 8);
+
+ if (k > 0) {
+ memcpy(buffer + 8, message, k - 2);
+ memcpy(buffer + 8 + k - 2, ": ", 2);
+ }
+
+ xsprintf(error, "ERRNO=%i", _saved_errno_);
+
+ assert_cc(3 == LOG_ERR);
+ iov[skip+0] = IOVEC_MAKE_STRING("PRIORITY=3");
+ iov[skip+1] = IOVEC_MAKE_STRING(buffer);
+ iov[skip+2] = IOVEC_MAKE_STRING(error);
+
+ return sd_journal_sendv(iov, skip + 3);
+ }
+
+ if (errno != ERANGE)
+ return -errno;
+
+ n *= 2;
+ }
+}
+
+_public_ int sd_journal_perror(const char *message) {
+ struct iovec iovec[3];
+
+ return fill_iovec_perror_and_send(message, 0, iovec);
+}
+
+_public_ int sd_journal_stream_fd(const char *identifier, int priority, int level_prefix) {
+ _cleanup_close_ int fd = -EBADF;
+ char *header;
+ size_t l;
+ int r;
+
+ assert_return(priority >= 0, -EINVAL);
+ assert_return(priority <= 7, -EINVAL);
+
+ fd = socket(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC, 0);
+ if (fd < 0)
+ return -errno;
+
+ r = connect_unix_path(fd, AT_FDCWD, "/run/systemd/journal/stdout");
+ if (r < 0)
+ return r;
+
+ if (shutdown(fd, SHUT_RD) < 0)
+ return -errno;
+
+ (void) fd_inc_sndbuf(fd, SNDBUF_SIZE);
+
+ identifier = strempty(identifier);
+
+ l = strlen(identifier);
+ header = newa(char, l + 1 + 1 + 2 + 2 + 2 + 2 + 2);
+
+ memcpy(header, identifier, l);
+ header[l++] = '\n';
+ header[l++] = '\n'; /* unit id */
+ header[l++] = '0' + priority;
+ header[l++] = '\n';
+ header[l++] = '0' + !!level_prefix;
+ header[l++] = '\n';
+ header[l++] = '0';
+ header[l++] = '\n';
+ header[l++] = '0';
+ header[l++] = '\n';
+ header[l++] = '0';
+ header[l++] = '\n';
+
+ r = loop_write(fd, header, l);
+ if (r < 0)
+ return r;
+
+ return TAKE_FD(fd);
+}
+
+_public_ int sd_journal_print_with_location(int priority, const char *file, const char *line, const char *func, const char *format, ...) {
+ int r;
+ va_list ap;
+
+ va_start(ap, format);
+ r = sd_journal_printv_with_location(priority, file, line, func, format, ap);
+ va_end(ap);
+
+ return r;
+}
+
+_public_ int sd_journal_printv_with_location(int priority, const char *file, const char *line, const char *func, const char *format, va_list ap) {
+ char p[STRLEN("PRIORITY=") + DECIMAL_STR_MAX(int) + 1];
+ char sbuf[LINE_MAX + 8] = "MESSAGE=";
+ struct iovec iov[5];
+ char *f;
+ int len;
+ char *buffer = sbuf;
+ va_list aq;
+
+ assert_return(priority >= 0, -EINVAL);
+ assert_return(priority <= 7, -EINVAL);
+ assert_return(format, -EINVAL);
+
+ xsprintf(p, "PRIORITY=%i", priority & LOG_PRIMASK);
+
+ va_copy(aq, ap);
+ len = vsnprintf(buffer + 8, LINE_MAX, format, aq);
+ va_end(aq);
+
+ if (len >= (int)LONG_LINE_MAX - 8)
+ return -ENOBUFS;
+
+ /* Allocate large buffer to accommodate big message */
+ if (len >= LINE_MAX) {
+ buffer = alloca_safe(len + 9);
+ memcpy(buffer, "MESSAGE=", 8);
+ assert_se(vsnprintf(buffer + 8, len + 1, format, ap) == len);
+ }
+
+ /* Strip trailing whitespace, keep prefixing whitespace */
+ (void) strstrip(buffer);
+
+ /* Suppress empty lines */
+ if (isempty(buffer + 8))
+ return 0;
+
+ /* func is initialized from __func__ which is not a macro, but
+ * a static const char[], hence cannot easily be prefixed with
+ * CODE_FUNC=, hence let's do it manually here. */
+ ALLOCA_CODE_FUNC(f, func);
+
+ iov[0] = IOVEC_MAKE_STRING(buffer);
+ iov[1] = IOVEC_MAKE_STRING(p);
+ iov[2] = IOVEC_MAKE_STRING(file);
+ iov[3] = IOVEC_MAKE_STRING(line);
+ iov[4] = IOVEC_MAKE_STRING(f);
+
+ return sd_journal_sendv(iov, ELEMENTSOF(iov));
+}
+
+_public_ int sd_journal_send_with_location(const char *file, const char *line, const char *func, const char *format, ...) {
+ struct iovec *iov = NULL;
+ size_t n_iov = 0;
+ va_list ap;
+ char *f;
+ int r;
+
+ CLEANUP_ARRAY(iov, n_iov, iovec_array_free);
+
+ va_start(ap, format);
+ r = fill_iovec_sprintf(format, ap, 3, &iov, &n_iov);
+ va_end(ap);
+ if (r < 0)
+ return r;
+
+ ALLOCA_CODE_FUNC(f, func);
+
+ iov[0] = IOVEC_MAKE_STRING(file);
+ iov[1] = IOVEC_MAKE_STRING(line);
+ iov[2] = IOVEC_MAKE_STRING(f);
+
+ r = sd_journal_sendv(iov, n_iov);
+
+ iov[0] = iov[1] = iov[2] = (struct iovec) {};
+
+ return r;
+}
+
+_public_ int sd_journal_sendv_with_location(
+ const char *file, const char *line,
+ const char *func,
+ const struct iovec *iov, int n) {
+
+ struct iovec *niov;
+ char *f;
+
+ assert_return(iov, -EINVAL);
+ assert_return(n > 0, -EINVAL);
+
+ niov = newa(struct iovec, n + 3);
+ memcpy(niov, iov, sizeof(struct iovec) * n);
+
+ ALLOCA_CODE_FUNC(f, func);
+
+ niov[n++] = IOVEC_MAKE_STRING(file);
+ niov[n++] = IOVEC_MAKE_STRING(line);
+ niov[n++] = IOVEC_MAKE_STRING(f);
+
+ return sd_journal_sendv(niov, n);
+}
+
+_public_ int sd_journal_perror_with_location(
+ const char *file, const char *line,
+ const char *func,
+ const char *message) {
+
+ struct iovec iov[6];
+ char *f;
+
+ ALLOCA_CODE_FUNC(f, func);
+
+ iov[0] = IOVEC_MAKE_STRING(file);
+ iov[1] = IOVEC_MAKE_STRING(line);
+ iov[2] = IOVEC_MAKE_STRING(f);
+
+ return fill_iovec_perror_and_send(message, 3, iov);
+}
diff --git a/src/libsystemd/sd-journal/journal-send.h b/src/libsystemd/sd-journal/journal-send.h
new file mode 100644
index 0000000..24315e2
--- /dev/null
+++ b/src/libsystemd/sd-journal/journal-send.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+
+int journal_fd_nonblock(bool nonblock);
+void close_journal_fd(void);
diff --git a/src/libsystemd/sd-journal/journal-vacuum.c b/src/libsystemd/sd-journal/journal-vacuum.c
new file mode 100644
index 0000000..829edb3
--- /dev/null
+++ b/src/libsystemd/sd-journal/journal-vacuum.c
@@ -0,0 +1,330 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include "sd-id128.h"
+
+#include "alloc-util.h"
+#include "dirent-util.h"
+#include "fd-util.h"
+#include "format-util.h"
+#include "fs-util.h"
+#include "journal-def.h"
+#include "journal-file.h"
+#include "journal-internal.h"
+#include "journal-vacuum.h"
+#include "sort-util.h"
+#include "string-util.h"
+#include "time-util.h"
+#include "xattr-util.h"
+
+typedef struct vacuum_info {
+ uint64_t usage;
+ char *filename;
+
+ uint64_t realtime;
+
+ sd_id128_t seqnum_id;
+ uint64_t seqnum;
+ bool have_seqnum;
+} vacuum_info;
+
+static int vacuum_info_compare(const vacuum_info *a, const vacuum_info *b) {
+ int r;
+
+ if (a->have_seqnum && b->have_seqnum &&
+ sd_id128_equal(a->seqnum_id, b->seqnum_id))
+ return CMP(a->seqnum, b->seqnum);
+
+ r = CMP(a->realtime, b->realtime);
+ if (r != 0)
+ return r;
+
+ if (a->have_seqnum && b->have_seqnum)
+ return memcmp(&a->seqnum_id, &b->seqnum_id, 16);
+
+ return strcmp(a->filename, b->filename);
+}
+
+static void vacuum_info_array_free(vacuum_info *list, size_t n) {
+ if (!list)
+ return;
+
+ FOREACH_ARRAY(i, list, n)
+ free(i->filename);
+
+ free(list);
+}
+
+static void patch_realtime(
+ int fd,
+ const char *fn,
+ const struct stat *st,
+ unsigned long long *realtime) {
+
+ usec_t x;
+
+ /* The timestamp was determined by the file name, but let's see if the file might actually be older
+ * than the file name suggested... */
+
+ assert(fd >= 0);
+ assert(fn);
+ assert(st);
+ assert(realtime);
+
+ x = timespec_load(&st->st_ctim);
+ if (timestamp_is_set(x) && x < *realtime)
+ *realtime = x;
+
+ x = timespec_load(&st->st_atim);
+ if (timestamp_is_set(x) && x < *realtime)
+ *realtime = x;
+
+ x = timespec_load(&st->st_mtim);
+ if (timestamp_is_set(x) && x < *realtime)
+ *realtime = x;
+
+ /* Let's read the original creation time, if possible. Ideally we'd just query the creation time the
+ * FS might provide, but unfortunately there's currently no sane API to query it. Hence let's
+ * implement this manually... */
+
+ if (fd_getcrtime_at(fd, fn, AT_SYMLINK_FOLLOW, &x) >= 0 && x < *realtime)
+ *realtime = x;
+}
+
+static int journal_file_empty(int dir_fd, const char *name) {
+ _cleanup_close_ int fd = -EBADF;
+ struct stat st;
+ le64_t n_entries;
+ ssize_t n;
+
+ fd = openat(dir_fd, name, O_RDONLY|O_CLOEXEC|O_NOFOLLOW|O_NONBLOCK|O_NOATIME);
+ if (fd < 0) {
+ /* Maybe failed due to O_NOATIME and lack of privileges? */
+ fd = openat(dir_fd, name, O_RDONLY|O_CLOEXEC|O_NOFOLLOW|O_NONBLOCK);
+ if (fd < 0)
+ return -errno;
+ }
+
+ if (fstat(fd, &st) < 0)
+ return -errno;
+
+ /* If an offline file doesn't even have a header we consider it empty */
+ if (st.st_size < (off_t) sizeof(Header))
+ return 1;
+
+ /* If the number of entries is empty, we consider it empty, too */
+ n = pread(fd, &n_entries, sizeof(n_entries), offsetof(Header, n_entries));
+ if (n < 0)
+ return -errno;
+ if (n != sizeof(n_entries))
+ return -EIO;
+
+ return le64toh(n_entries) <= 0;
+}
+
+int journal_directory_vacuum(
+ const char *directory,
+ uint64_t max_use,
+ uint64_t n_max_files,
+ usec_t max_retention_usec,
+ usec_t *oldest_usec,
+ bool verbose) {
+
+ uint64_t sum = 0, freed = 0, n_active_files = 0;
+ size_t n_list = 0, i;
+ _cleanup_closedir_ DIR *d = NULL;
+ vacuum_info *list = NULL;
+ usec_t retention_limit = 0;
+ int r;
+
+ CLEANUP_ARRAY(list, n_list, vacuum_info_array_free);
+
+ assert(directory);
+
+ if (max_use <= 0 && max_retention_usec <= 0 && n_max_files <= 0)
+ return 0;
+
+ if (max_retention_usec > 0)
+ retention_limit = usec_sub_unsigned(now(CLOCK_REALTIME), max_retention_usec);
+
+ d = opendir(directory);
+ if (!d)
+ return -errno;
+
+ FOREACH_DIRENT_ALL(de, d, return -errno) {
+ unsigned long long seqnum = 0, realtime;
+ _cleanup_free_ char *p = NULL;
+ sd_id128_t seqnum_id;
+ bool have_seqnum;
+ uint64_t size;
+ struct stat st;
+ size_t q;
+
+ if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0) {
+ log_debug_errno(errno, "Failed to stat file %s while vacuuming, ignoring: %m", de->d_name);
+ continue;
+ }
+
+ if (!S_ISREG(st.st_mode))
+ continue;
+
+ size = 512UL * (uint64_t) st.st_blocks;
+
+ q = strlen(de->d_name);
+
+ if (endswith(de->d_name, ".journal")) {
+
+ /* Vacuum archived files. Active files are
+ * left around */
+
+ if (q < 1 + 32 + 1 + 16 + 1 + 16 + 8) {
+ n_active_files++;
+ sum += size;
+ continue;
+ }
+
+ if (de->d_name[q-8-16-1] != '-' ||
+ de->d_name[q-8-16-1-16-1] != '-' ||
+ de->d_name[q-8-16-1-16-1-32-1] != '@') {
+ n_active_files++;
+ sum += size;
+ continue;
+ }
+
+ p = strdup(de->d_name);
+ if (!p)
+ return -ENOMEM;
+
+ de->d_name[q-8-16-1-16-1] = 0;
+ if (sd_id128_from_string(de->d_name + q-8-16-1-16-1-32, &seqnum_id) < 0) {
+ n_active_files++;
+ sum += size;
+ continue;
+ }
+
+ if (sscanf(de->d_name + q-8-16-1-16, "%16llx-%16llx.journal", &seqnum, &realtime) != 2) {
+ n_active_files++;
+ sum += size;
+ continue;
+ }
+
+ have_seqnum = true;
+
+ } else if (endswith(de->d_name, ".journal~")) {
+ unsigned long long tmp;
+
+ /* seqnum_id won't be initialised before use below, so set to 0 */
+ seqnum_id = SD_ID128_NULL;
+
+ /* Vacuum corrupted files */
+
+ if (q < 1 + 16 + 1 + 16 + 8 + 1) {
+ n_active_files++;
+ sum += size;
+ continue;
+ }
+
+ if (de->d_name[q-1-8-16-1] != '-' ||
+ de->d_name[q-1-8-16-1-16-1] != '@') {
+ n_active_files++;
+ sum += size;
+ continue;
+ }
+
+ p = strdup(de->d_name);
+ if (!p)
+ return -ENOMEM;
+
+ if (sscanf(de->d_name + q-1-8-16-1-16, "%16llx-%16llx.journal~", &realtime, &tmp) != 2) {
+ n_active_files++;
+ sum += size;
+ continue;
+ }
+
+ have_seqnum = false;
+ } else {
+ /* We do not vacuum unknown files! */
+ log_debug("Not vacuuming unknown file %s.", de->d_name);
+ continue;
+ }
+
+ r = journal_file_empty(dirfd(d), p);
+ if (r < 0) {
+ log_debug_errno(r, "Failed check if %s is empty, ignoring: %m", p);
+ continue;
+ }
+ if (r > 0) {
+ /* Always vacuum empty non-online files. */
+
+ r = unlinkat_deallocate(dirfd(d), p, 0);
+ if (r >= 0) {
+
+ log_full(verbose ? LOG_INFO : LOG_DEBUG,
+ "Deleted empty archived journal %s/%s (%s).", directory, p, FORMAT_BYTES(size));
+
+ freed += size;
+ } else if (r != -ENOENT)
+ log_ratelimit_warning_errno(r, JOURNAL_LOG_RATELIMIT,
+ "Failed to delete empty archived journal %s/%s: %m",
+ directory, p);
+
+ continue;
+ }
+
+ patch_realtime(dirfd(d), p, &st, &realtime);
+
+ if (!GREEDY_REALLOC(list, n_list + 1))
+ return -ENOMEM;
+
+ list[n_list++] = (vacuum_info) {
+ .filename = TAKE_PTR(p),
+ .usage = size,
+ .seqnum = seqnum,
+ .realtime = realtime,
+ .seqnum_id = seqnum_id,
+ .have_seqnum = have_seqnum,
+ };
+
+ sum += size;
+ }
+
+ typesafe_qsort(list, n_list, vacuum_info_compare);
+
+ for (i = 0; i < n_list; i++) {
+ uint64_t left;
+
+ left = n_active_files + n_list - i;
+
+ if ((max_retention_usec <= 0 || list[i].realtime >= retention_limit) &&
+ (max_use <= 0 || sum <= max_use) &&
+ (n_max_files <= 0 || left <= n_max_files))
+ break;
+
+ r = unlinkat_deallocate(dirfd(d), list[i].filename, 0);
+ if (r >= 0) {
+ log_full(verbose ? LOG_INFO : LOG_DEBUG, "Deleted archived journal %s/%s (%s).",
+ directory, list[i].filename, FORMAT_BYTES(list[i].usage));
+ freed += list[i].usage;
+
+ if (list[i].usage < sum)
+ sum -= list[i].usage;
+ else
+ sum = 0;
+
+ } else if (r != -ENOENT)
+ log_ratelimit_warning_errno(r, JOURNAL_LOG_RATELIMIT,
+ "Failed to delete archived journal %s/%s: %m",
+ directory, list[i].filename);
+ }
+
+ if (oldest_usec && i < n_list && (*oldest_usec == 0 || list[i].realtime < *oldest_usec))
+ *oldest_usec = list[i].realtime;
+
+ log_full(verbose ? LOG_INFO : LOG_DEBUG, "Vacuuming done, freed %s of archived journals from %s.",
+ FORMAT_BYTES(freed), directory);
+
+ return 0;
+}
diff --git a/src/libsystemd/sd-journal/journal-vacuum.h b/src/libsystemd/sd-journal/journal-vacuum.h
new file mode 100644
index 0000000..d87c847
--- /dev/null
+++ b/src/libsystemd/sd-journal/journal-vacuum.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <inttypes.h>
+#include <stdbool.h>
+
+#include "time-util.h"
+
+int journal_directory_vacuum(const char *directory, uint64_t max_use, uint64_t n_max_files, usec_t max_retention_usec, usec_t *oldest_usec, bool verbose);
diff --git a/src/libsystemd/sd-journal/journal-verify.c b/src/libsystemd/sd-journal/journal-verify.c
new file mode 100644
index 0000000..bdaa01d
--- /dev/null
+++ b/src/libsystemd/sd-journal/journal-verify.c
@@ -0,0 +1,1436 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <fcntl.h>
+#include <stddef.h>
+#include <sys/mman.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "compress.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "fs-util.h"
+#include "journal-authenticate.h"
+#include "journal-def.h"
+#include "journal-file.h"
+#include "journal-verify.h"
+#include "lookup3.h"
+#include "macro.h"
+#include "terminal-util.h"
+#include "tmpfile-util.h"
+
+static void draw_progress(uint64_t p, usec_t *last_usec) {
+ unsigned n, i, j, k;
+ usec_t z, x;
+
+ if (!on_tty())
+ return;
+
+ z = now(CLOCK_MONOTONIC);
+ x = *last_usec;
+
+ if (x != 0 && x + 40 * USEC_PER_MSEC > z)
+ return;
+
+ *last_usec = z;
+
+ n = (3 * columns()) / 4;
+ j = (n * (unsigned) p) / 65535ULL;
+ k = n - j;
+
+ fputs("\r", stdout);
+ if (colors_enabled())
+ fputs("\x1B[?25l", stdout);
+
+ fputs(ansi_highlight_green(), stdout);
+
+ for (i = 0; i < j; i++)
+ fputs("\xe2\x96\x88", stdout);
+
+ fputs(ansi_normal(), stdout);
+
+ for (i = 0; i < k; i++)
+ fputs("\xe2\x96\x91", stdout);
+
+ printf(" %3"PRIu64"%%", 100U * p / 65535U);
+
+ fputs("\r", stdout);
+ if (colors_enabled())
+ fputs("\x1B[?25h", stdout);
+
+ fflush(stdout);
+}
+
+static uint64_t scale_progress(uint64_t scale, uint64_t p, uint64_t m) {
+ /* Calculates scale * p / m, but handles m == 0 safely, and saturates.
+ * Currently all callers use m >= 1, but we keep the check to be defensive.
+ */
+
+ if (p >= m || m == 0)
+ return scale;
+
+ return scale * p / m;
+}
+
+static void flush_progress(void) {
+ unsigned n, i;
+
+ if (!on_tty())
+ return;
+
+ n = (3 * columns()) / 4;
+
+ putchar('\r');
+
+ for (i = 0; i < n + 5; i++)
+ putchar(' ');
+
+ putchar('\r');
+ fflush(stdout);
+}
+
+#define debug(_offset, _fmt, ...) do { \
+ flush_progress(); \
+ log_debug(OFSfmt": " _fmt, _offset, ##__VA_ARGS__); \
+ } while (0)
+
+#define warning(_offset, _fmt, ...) do { \
+ flush_progress(); \
+ log_warning(OFSfmt": " _fmt, _offset, ##__VA_ARGS__); \
+ } while (0)
+
+#define error(_offset, _fmt, ...) do { \
+ flush_progress(); \
+ log_error(OFSfmt": " _fmt, (uint64_t)_offset, ##__VA_ARGS__); \
+ } while (0)
+
+#define error_errno(_offset, error, _fmt, ...) do { \
+ flush_progress(); \
+ log_error_errno(error, OFSfmt": " _fmt, (uint64_t)_offset, ##__VA_ARGS__); \
+ } while (0)
+
+static int hash_payload(JournalFile *f, Object *o, uint64_t offset, const uint8_t *src, uint64_t size, uint64_t *res_hash) {
+ Compression c;
+ int r;
+
+ assert(o);
+ assert(src);
+ assert(res_hash);
+
+ c = COMPRESSION_FROM_OBJECT(o);
+ if (c < 0)
+ return -EBADMSG;
+ if (c != COMPRESSION_NONE) {
+ _cleanup_free_ void *b = NULL;
+ size_t b_size;
+
+ r = decompress_blob(c, src, size, &b, &b_size, 0);
+ if (r < 0) {
+ error_errno(offset, r, "%s decompression failed: %m",
+ compression_to_string(c));
+ return r;
+ }
+
+ *res_hash = journal_file_hash_data(f, b, b_size);
+ } else
+ *res_hash = journal_file_hash_data(f, src, size);
+
+ return 0;
+}
+
+static int journal_file_object_verify(JournalFile *f, uint64_t offset, Object *o) {
+ assert(f);
+ assert(offset);
+ assert(o);
+
+ /* This does various superficial tests about the length an
+ * possible field values. It does not follow any references to
+ * other objects. */
+
+ if ((o->object.flags & _OBJECT_COMPRESSED_MASK) != 0 &&
+ o->object.type != OBJECT_DATA) {
+ error(offset,
+ "Found compressed object of type %s that isn't of type data, which is not allowed.",
+ journal_object_type_to_string(o->object.type));
+ return -EBADMSG;
+ }
+
+ switch (o->object.type) {
+
+ case OBJECT_DATA: {
+ uint64_t h1, h2;
+ int r;
+
+ if (le64toh(o->data.entry_offset) == 0)
+ warning(offset, "Unused data (entry_offset==0)");
+
+ if ((le64toh(o->data.entry_offset) == 0) ^ (le64toh(o->data.n_entries) == 0)) {
+ error(offset, "Bad n_entries: %"PRIu64, le64toh(o->data.n_entries));
+ return -EBADMSG;
+ }
+
+ if (le64toh(o->object.size) - journal_file_data_payload_offset(f) <= 0) {
+ error(offset, "Bad object size (<= %zu): %"PRIu64,
+ journal_file_data_payload_offset(f),
+ le64toh(o->object.size));
+ return -EBADMSG;
+ }
+
+ h1 = le64toh(o->data.hash);
+ r = hash_payload(f, o, offset, journal_file_data_payload_field(f, o),
+ le64toh(o->object.size) - journal_file_data_payload_offset(f),
+ &h2);
+ if (r < 0)
+ return r;
+
+ if (h1 != h2) {
+ error(offset, "Invalid hash (%08" PRIx64 " vs. %08" PRIx64 ")", h1, h2);
+ return -EBADMSG;
+ }
+
+ if (!VALID64(le64toh(o->data.next_hash_offset)) ||
+ !VALID64(le64toh(o->data.next_field_offset)) ||
+ !VALID64(le64toh(o->data.entry_offset)) ||
+ !VALID64(le64toh(o->data.entry_array_offset))) {
+ error(offset, "Invalid offset (next_hash_offset="OFSfmt", next_field_offset="OFSfmt", entry_offset="OFSfmt", entry_array_offset="OFSfmt,
+ le64toh(o->data.next_hash_offset),
+ le64toh(o->data.next_field_offset),
+ le64toh(o->data.entry_offset),
+ le64toh(o->data.entry_array_offset));
+ return -EBADMSG;
+ }
+
+ break;
+ }
+
+ case OBJECT_FIELD: {
+ uint64_t h1, h2;
+ int r;
+
+ if (le64toh(o->object.size) - offsetof(Object, field.payload) <= 0) {
+ error(offset,
+ "Bad field size (<= %zu): %"PRIu64,
+ offsetof(Object, field.payload),
+ le64toh(o->object.size));
+ return -EBADMSG;
+ }
+
+ h1 = le64toh(o->field.hash);
+ r = hash_payload(f, o, offset, o->field.payload,
+ le64toh(o->object.size) - offsetof(Object, field.payload),
+ &h2);
+ if (r < 0)
+ return r;
+
+ if (h1 != h2) {
+ error(offset, "Invalid hash (%08" PRIx64 " vs. %08" PRIx64 ")", h1, h2);
+ return -EBADMSG;
+ }
+
+ if (!VALID64(le64toh(o->field.next_hash_offset)) ||
+ !VALID64(le64toh(o->field.head_data_offset))) {
+ error(offset,
+ "Invalid offset (next_hash_offset="OFSfmt", head_data_offset="OFSfmt,
+ le64toh(o->field.next_hash_offset),
+ le64toh(o->field.head_data_offset));
+ return -EBADMSG;
+ }
+ break;
+ }
+
+ case OBJECT_ENTRY:
+ if ((le64toh(o->object.size) - offsetof(Object, entry.items)) % journal_file_entry_item_size(f) != 0) {
+ error(offset,
+ "Bad entry size (<= %zu): %"PRIu64,
+ offsetof(Object, entry.items),
+ le64toh(o->object.size));
+ return -EBADMSG;
+ }
+
+ if ((le64toh(o->object.size) - offsetof(Object, entry.items)) / journal_file_entry_item_size(f) <= 0) {
+ error(offset,
+ "Invalid number items in entry: %"PRIu64,
+ (le64toh(o->object.size) - offsetof(Object, entry.items)) / journal_file_entry_item_size(f));
+ return -EBADMSG;
+ }
+
+ if (le64toh(o->entry.seqnum) <= 0) {
+ error(offset,
+ "Invalid entry seqnum: %"PRIx64,
+ le64toh(o->entry.seqnum));
+ return -EBADMSG;
+ }
+
+ if (!VALID_REALTIME(le64toh(o->entry.realtime))) {
+ error(offset,
+ "Invalid entry realtime timestamp: %"PRIu64,
+ le64toh(o->entry.realtime));
+ return -EBADMSG;
+ }
+
+ if (!VALID_MONOTONIC(le64toh(o->entry.monotonic))) {
+ error(offset,
+ "Invalid entry monotonic timestamp: %"PRIu64,
+ le64toh(o->entry.monotonic));
+ return -EBADMSG;
+ }
+
+ for (uint64_t i = 0; i < journal_file_entry_n_items(f, o); i++) {
+ if (journal_file_entry_item_object_offset(f, o, i) == 0 ||
+ !VALID64(journal_file_entry_item_object_offset(f, o, i))) {
+ error(offset,
+ "Invalid entry item (%"PRIu64"/%"PRIu64") offset: "OFSfmt,
+ i, journal_file_entry_n_items(f, o),
+ journal_file_entry_item_object_offset(f, o, i));
+ return -EBADMSG;
+ }
+ }
+
+ break;
+
+ case OBJECT_DATA_HASH_TABLE:
+ case OBJECT_FIELD_HASH_TABLE:
+ if ((le64toh(o->object.size) - offsetof(Object, hash_table.items)) % sizeof(HashItem) != 0 ||
+ (le64toh(o->object.size) - offsetof(Object, hash_table.items)) / sizeof(HashItem) <= 0) {
+ error(offset,
+ "Invalid %s size: %"PRIu64,
+ journal_object_type_to_string(o->object.type),
+ le64toh(o->object.size));
+ return -EBADMSG;
+ }
+
+ for (uint64_t i = 0; i < journal_file_hash_table_n_items(o); i++) {
+ if (o->hash_table.items[i].head_hash_offset != 0 &&
+ !VALID64(le64toh(o->hash_table.items[i].head_hash_offset))) {
+ error(offset,
+ "Invalid %s hash table item (%"PRIu64"/%"PRIu64") head_hash_offset: "OFSfmt,
+ journal_object_type_to_string(o->object.type),
+ i, journal_file_hash_table_n_items(o),
+ le64toh(o->hash_table.items[i].head_hash_offset));
+ return -EBADMSG;
+ }
+ if (o->hash_table.items[i].tail_hash_offset != 0 &&
+ !VALID64(le64toh(o->hash_table.items[i].tail_hash_offset))) {
+ error(offset,
+ "Invalid %s hash table item (%"PRIu64"/%"PRIu64") tail_hash_offset: "OFSfmt,
+ journal_object_type_to_string(o->object.type),
+ i, journal_file_hash_table_n_items(o),
+ le64toh(o->hash_table.items[i].tail_hash_offset));
+ return -EBADMSG;
+ }
+
+ if ((o->hash_table.items[i].head_hash_offset != 0) !=
+ (o->hash_table.items[i].tail_hash_offset != 0)) {
+ error(offset,
+ "Invalid %s hash table item (%"PRIu64"/%"PRIu64"): head_hash_offset="OFSfmt" tail_hash_offset="OFSfmt,
+ journal_object_type_to_string(o->object.type),
+ i, journal_file_hash_table_n_items(o),
+ le64toh(o->hash_table.items[i].head_hash_offset),
+ le64toh(o->hash_table.items[i].tail_hash_offset));
+ return -EBADMSG;
+ }
+ }
+
+ break;
+
+ case OBJECT_ENTRY_ARRAY:
+ if ((le64toh(o->object.size) - offsetof(Object, entry_array.items)) % journal_file_entry_array_item_size(f) != 0 ||
+ (le64toh(o->object.size) - offsetof(Object, entry_array.items)) / journal_file_entry_array_item_size(f) <= 0) {
+ error(offset,
+ "Invalid object entry array size: %"PRIu64,
+ le64toh(o->object.size));
+ return -EBADMSG;
+ }
+
+ if (!VALID64(le64toh(o->entry_array.next_entry_array_offset))) {
+ error(offset,
+ "Invalid object entry array next_entry_array_offset: "OFSfmt,
+ le64toh(o->entry_array.next_entry_array_offset));
+ return -EBADMSG;
+ }
+
+ for (uint64_t i = 0; i < journal_file_entry_array_n_items(f, o); i++) {
+ uint64_t q = journal_file_entry_array_item(f, o, i);
+ if (q != 0 && !VALID64(q)) {
+ error(offset,
+ "Invalid object entry array item (%"PRIu64"/%"PRIu64"): "OFSfmt,
+ i, journal_file_entry_array_n_items(f, o), q);
+ return -EBADMSG;
+ }
+ }
+
+ break;
+
+ case OBJECT_TAG:
+ if (le64toh(o->object.size) != sizeof(TagObject)) {
+ error(offset,
+ "Invalid object tag size: %"PRIu64,
+ le64toh(o->object.size));
+ return -EBADMSG;
+ }
+
+ if (!VALID_EPOCH(le64toh(o->tag.epoch))) {
+ error(offset,
+ "Invalid object tag epoch: %"PRIu64,
+ le64toh(o->tag.epoch));
+ return -EBADMSG;
+ }
+
+ break;
+ }
+
+ return 0;
+}
+
+static int write_uint64(FILE *fp, uint64_t p) {
+ if (fwrite(&p, sizeof(p), 1, fp) != 1)
+ return -EIO;
+
+ return 0;
+}
+
+static int contains_uint64(MMapFileDescriptor *f, uint64_t n, uint64_t p) {
+ uint64_t a, b;
+ int r;
+
+ assert(f);
+
+ /* Bisection ... */
+
+ a = 0; b = n;
+ while (a < b) {
+ uint64_t c, *z;
+
+ c = (a + b) / 2;
+
+ r = mmap_cache_fd_get(f, 0, false, c * sizeof(uint64_t), sizeof(uint64_t), NULL, (void **) &z);
+ if (r < 0)
+ return r;
+
+ if (*z == p)
+ return 1;
+
+ if (a + 1 >= b)
+ return 0;
+
+ if (p < *z)
+ b = c;
+ else
+ a = c;
+ }
+
+ return 0;
+}
+
+static int verify_data(
+ JournalFile *f,
+ Object *o, uint64_t p,
+ MMapFileDescriptor *cache_entry_fd, uint64_t n_entries,
+ MMapFileDescriptor *cache_entry_array_fd, uint64_t n_entry_arrays) {
+
+ uint64_t i, n, a, last, q;
+ int r;
+
+ assert(f);
+ assert(o);
+ assert(cache_entry_fd);
+ assert(cache_entry_array_fd);
+
+ n = le64toh(o->data.n_entries);
+ a = le64toh(o->data.entry_array_offset);
+
+ /* Entry array means at least two objects */
+ if (a && n < 2) {
+ error(p, "Entry array present (entry_array_offset="OFSfmt", but n_entries=%"PRIu64")", a, n);
+ return -EBADMSG;
+ }
+
+ if (n == 0)
+ return 0;
+
+ /* We already checked that earlier */
+ assert(o->data.entry_offset);
+
+ last = q = le64toh(o->data.entry_offset);
+ if (!contains_uint64(cache_entry_fd, n_entries, q)) {
+ error(p, "Data object references invalid entry at "OFSfmt, q);
+ return -EBADMSG;
+ }
+
+ r = journal_file_move_to_entry_by_offset(f, q, DIRECTION_DOWN, NULL, NULL);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ error(q, "Entry object doesn't exist in the main entry array");
+ return -EBADMSG;
+ }
+
+ i = 1;
+ while (i < n) {
+ uint64_t next, m, j;
+
+ if (a == 0) {
+ error(p, "Array chain too short");
+ return -EBADMSG;
+ }
+
+ if (!contains_uint64(cache_entry_array_fd, n_entry_arrays, a)) {
+ error(p, "Invalid array offset "OFSfmt, a);
+ return -EBADMSG;
+ }
+
+ r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
+ if (r < 0)
+ return r;
+
+ next = le64toh(o->entry_array.next_entry_array_offset);
+ if (next != 0 && next <= a) {
+ error(p, "Array chain has cycle (jumps back from "OFSfmt" to "OFSfmt")", a, next);
+ return -EBADMSG;
+ }
+
+ m = journal_file_entry_array_n_items(f, o);
+ for (j = 0; i < n && j < m; i++, j++) {
+
+ q = journal_file_entry_array_item(f, o, j);
+ if (q <= last) {
+ error(p, "Data object's entry array not sorted (%"PRIu64" <= %"PRIu64")", q, last);
+ return -EBADMSG;
+ }
+ last = q;
+
+ if (!contains_uint64(cache_entry_fd, n_entries, q)) {
+ error(p, "Data object references invalid entry at "OFSfmt, q);
+ return -EBADMSG;
+ }
+
+ r = journal_file_move_to_entry_by_offset(f, q, DIRECTION_DOWN, NULL, NULL);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ error(q, "Entry object doesn't exist in the main entry array");
+ return -EBADMSG;
+ }
+
+ /* Pointer might have moved, reposition */
+ r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
+ if (r < 0)
+ return r;
+ }
+
+ a = next;
+ }
+
+ return 0;
+}
+
+static int verify_data_hash_table(
+ JournalFile *f,
+ MMapFileDescriptor *cache_data_fd, uint64_t n_data,
+ MMapFileDescriptor *cache_entry_fd, uint64_t n_entries,
+ MMapFileDescriptor *cache_entry_array_fd, uint64_t n_entry_arrays,
+ usec_t *last_usec,
+ bool show_progress) {
+
+ uint64_t i, n;
+ int r;
+
+ assert(f);
+ assert(cache_data_fd);
+ assert(cache_entry_fd);
+ assert(cache_entry_array_fd);
+ assert(last_usec);
+
+ n = le64toh(f->header->data_hash_table_size) / sizeof(HashItem);
+ if (n <= 0)
+ return 0;
+
+ r = journal_file_map_data_hash_table(f);
+ if (r < 0)
+ return log_error_errno(r, "Failed to map data hash table: %m");
+
+ for (i = 0; i < n; i++) {
+ uint64_t last = 0, p;
+
+ if (show_progress)
+ draw_progress(0xC000 + scale_progress(0x3FFF, i, n), last_usec);
+
+ p = le64toh(f->data_hash_table[i].head_hash_offset);
+ while (p != 0) {
+ Object *o;
+ uint64_t next;
+
+ if (!contains_uint64(cache_data_fd, n_data, p)) {
+ error(p, "Invalid data object at hash entry %"PRIu64" of %"PRIu64, i, n);
+ return -EBADMSG;
+ }
+
+ r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
+ if (r < 0)
+ return r;
+
+ next = le64toh(o->data.next_hash_offset);
+ if (next != 0 && next <= p) {
+ error(p, "Hash chain has a cycle in hash entry %"PRIu64" of %"PRIu64, i, n);
+ return -EBADMSG;
+ }
+
+ if (le64toh(o->data.hash) % n != i) {
+ error(p, "Hash value mismatch in hash entry %"PRIu64" of %"PRIu64, i, n);
+ return -EBADMSG;
+ }
+
+ r = verify_data(f, o, p, cache_entry_fd, n_entries, cache_entry_array_fd, n_entry_arrays);
+ if (r < 0)
+ return r;
+
+ last = p;
+ p = next;
+ }
+
+ if (last != le64toh(f->data_hash_table[i].tail_hash_offset)) {
+ error(p,
+ "Tail hash pointer mismatch in hash table (%"PRIu64" != %"PRIu64")",
+ last,
+ le64toh(f->data_hash_table[i].tail_hash_offset));
+ return -EBADMSG;
+ }
+ }
+
+ return 0;
+}
+
+static int data_object_in_hash_table(JournalFile *f, uint64_t hash, uint64_t p) {
+ uint64_t n, h, q;
+ int r;
+ assert(f);
+
+ n = le64toh(f->header->data_hash_table_size) / sizeof(HashItem);
+ if (n <= 0)
+ return 0;
+
+ r = journal_file_map_data_hash_table(f);
+ if (r < 0)
+ return log_error_errno(r, "Failed to map data hash table: %m");
+
+ h = hash % n;
+
+ q = le64toh(f->data_hash_table[h].head_hash_offset);
+ while (q != 0) {
+ Object *o;
+
+ if (p == q)
+ return 1;
+
+ r = journal_file_move_to_object(f, OBJECT_DATA, q, &o);
+ if (r < 0)
+ return r;
+
+ q = le64toh(o->data.next_hash_offset);
+ }
+
+ return 0;
+}
+
+static int verify_entry(
+ JournalFile *f,
+ Object *o, uint64_t p,
+ MMapFileDescriptor *cache_data_fd, uint64_t n_data,
+ bool last) {
+
+ uint64_t i, n;
+ int r;
+
+ assert(f);
+ assert(o);
+ assert(cache_data_fd);
+
+ n = journal_file_entry_n_items(f, o);
+ for (i = 0; i < n; i++) {
+ uint64_t q;
+ Object *u;
+
+ q = journal_file_entry_item_object_offset(f, o, i);
+
+ if (!contains_uint64(cache_data_fd, n_data, q)) {
+ error(p, "Invalid data object of entry");
+ return -EBADMSG;
+ }
+
+ r = journal_file_move_to_object(f, OBJECT_DATA, q, &u);
+ if (r < 0)
+ return r;
+
+ r = data_object_in_hash_table(f, le64toh(u->data.hash), q);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ error(p, "Data object missing from hash table");
+ return -EBADMSG;
+ }
+
+ /* Pointer might have moved, reposition */
+ r = journal_file_move_to_object(f, OBJECT_DATA, q, &u);
+ if (r < 0)
+ return r;
+
+ r = journal_file_move_to_entry_by_offset_for_data(f, u, p, DIRECTION_DOWN, NULL, NULL);
+ if (r < 0)
+ return r;
+
+ /* The last entry object has a very high chance of not being referenced as journal files
+ * almost always run out of space during linking of entry items when trying to add a new
+ * entry array so let's not error in that scenario. */
+ if (r == 0 && !last) {
+ error(p, "Entry object not referenced by linked data object at "OFSfmt, q);
+ return -EBADMSG;
+ }
+ }
+
+ return 0;
+}
+
+static int verify_entry_array(
+ JournalFile *f,
+ MMapFileDescriptor *cache_data_fd, uint64_t n_data,
+ MMapFileDescriptor *cache_entry_fd, uint64_t n_entries,
+ MMapFileDescriptor *cache_entry_array_fd, uint64_t n_entry_arrays,
+ usec_t *last_usec,
+ bool show_progress) {
+
+ uint64_t i = 0, a, n, last = 0;
+ int r;
+
+ assert(f);
+ assert(cache_data_fd);
+ assert(cache_entry_fd);
+ assert(cache_entry_array_fd);
+ assert(last_usec);
+
+ n = le64toh(f->header->n_entries);
+ a = le64toh(f->header->entry_array_offset);
+ while (i < n) {
+ uint64_t next, m, j;
+ Object *o;
+
+ if (show_progress)
+ draw_progress(0x8000 + scale_progress(0x3FFF, i, n), last_usec);
+
+ if (a == 0) {
+ error(a, "Array chain too short at %"PRIu64" of %"PRIu64, i, n);
+ return -EBADMSG;
+ }
+
+ if (!contains_uint64(cache_entry_array_fd, n_entry_arrays, a)) {
+ error(a, "Invalid array %"PRIu64" of %"PRIu64, i, n);
+ return -EBADMSG;
+ }
+
+ r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
+ if (r < 0)
+ return r;
+
+ next = le64toh(o->entry_array.next_entry_array_offset);
+ if (next != 0 && next <= a) {
+ error(a, "Array chain has cycle at %"PRIu64" of %"PRIu64" (jumps back from to "OFSfmt")", i, n, next);
+ return -EBADMSG;
+ }
+
+ m = journal_file_entry_array_n_items(f, o);
+ for (j = 0; i < n && j < m; i++, j++) {
+ uint64_t p;
+
+ p = journal_file_entry_array_item(f, o, j);
+ if (p <= last) {
+ error(a, "Entry array not sorted at %"PRIu64" of %"PRIu64, i, n);
+ return -EBADMSG;
+ }
+ last = p;
+
+ if (!contains_uint64(cache_entry_fd, n_entries, p)) {
+ error(a, "Invalid array entry at %"PRIu64" of %"PRIu64, i, n);
+ return -EBADMSG;
+ }
+
+ r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
+ if (r < 0)
+ return r;
+
+ r = verify_entry(f, o, p, cache_data_fd, n_data, /*last=*/ i + 1 == n);
+ if (r < 0)
+ return r;
+
+ /* Pointer might have moved, reposition */
+ r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
+ if (r < 0)
+ return r;
+ }
+
+ a = next;
+ }
+
+ return 0;
+}
+
+static int verify_hash_table(
+ Object *o, uint64_t p, uint64_t *n_hash_tables, uint64_t header_offset, uint64_t header_size) {
+
+ assert(o);
+ assert(n_hash_tables);
+
+ if (*n_hash_tables > 1) {
+ error(p,
+ "More than one %s: %" PRIu64,
+ journal_object_type_to_string(o->object.type),
+ *n_hash_tables);
+ return -EBADMSG;
+ }
+
+ if (header_offset != p + offsetof(Object, hash_table.items)) {
+ error(p,
+ "Header offset for %s invalid (%" PRIu64 " != %" PRIu64 ")",
+ journal_object_type_to_string(o->object.type),
+ header_offset,
+ p + offsetof(Object, hash_table.items));
+ return -EBADMSG;
+ }
+
+ if (header_size != le64toh(o->object.size) - offsetof(Object, hash_table.items)) {
+ error(p,
+ "Header size for %s invalid (%" PRIu64 " != %" PRIu64 ")",
+ journal_object_type_to_string(o->object.type),
+ header_size,
+ le64toh(o->object.size) - offsetof(Object, hash_table.items));
+ return -EBADMSG;
+ }
+
+ (*n_hash_tables)++;
+
+ return 0;
+}
+
+int journal_file_verify(
+ JournalFile *f,
+ const char *key,
+ usec_t *first_contained, usec_t *last_validated, usec_t *last_contained,
+ bool show_progress) {
+ int r;
+ Object *o;
+ uint64_t p = 0, last_epoch = 0, last_tag_realtime = 0;
+
+ uint64_t entry_seqnum = 0, entry_monotonic = 0, entry_realtime = 0;
+ usec_t min_entry_realtime = USEC_INFINITY, max_entry_realtime = 0;
+ sd_id128_t entry_boot_id = {}; /* Unnecessary initialization to appease gcc */
+ bool entry_seqnum_set = false, entry_monotonic_set = false, entry_realtime_set = false, found_main_entry_array = false;
+ uint64_t n_objects = 0, n_entries = 0, n_data = 0, n_fields = 0, n_data_hash_tables = 0, n_field_hash_tables = 0, n_entry_arrays = 0, n_tags = 0;
+ usec_t last_usec = 0;
+ _cleanup_close_ int data_fd = -EBADF, entry_fd = -EBADF, entry_array_fd = -EBADF;
+ _cleanup_fclose_ FILE *data_fp = NULL, *entry_fp = NULL, *entry_array_fp = NULL;
+ MMapFileDescriptor *cache_data_fd = NULL, *cache_entry_fd = NULL, *cache_entry_array_fd = NULL;
+ unsigned i;
+ bool found_last = false;
+ const char *tmp_dir = NULL;
+ MMapCache *m;
+
+#if HAVE_GCRYPT
+ uint64_t last_tag = 0;
+#endif
+ assert(f);
+
+ if (key) {
+#if HAVE_GCRYPT
+ r = journal_file_parse_verification_key(f, key);
+ if (r < 0) {
+ log_error("Failed to parse seed.");
+ return r;
+ }
+#else
+ return -EOPNOTSUPP;
+#endif
+ } else if (JOURNAL_HEADER_SEALED(f->header))
+ return -ENOKEY;
+
+ r = var_tmp_dir(&tmp_dir);
+ if (r < 0) {
+ log_error_errno(r, "Failed to determine temporary directory: %m");
+ goto fail;
+ }
+
+ data_fd = open_tmpfile_unlinkable(tmp_dir, O_RDWR | O_CLOEXEC);
+ if (data_fd < 0) {
+ r = log_error_errno(data_fd, "Failed to create data file: %m");
+ goto fail;
+ }
+
+ entry_fd = open_tmpfile_unlinkable(tmp_dir, O_RDWR | O_CLOEXEC);
+ if (entry_fd < 0) {
+ r = log_error_errno(entry_fd, "Failed to create entry file: %m");
+ goto fail;
+ }
+
+ entry_array_fd = open_tmpfile_unlinkable(tmp_dir, O_RDWR | O_CLOEXEC);
+ if (entry_array_fd < 0) {
+ r = log_error_errno(entry_array_fd,
+ "Failed to create entry array file: %m");
+ goto fail;
+ }
+
+ m = mmap_cache_fd_cache(f->cache_fd);
+ r = mmap_cache_add_fd(m, data_fd, PROT_READ|PROT_WRITE, &cache_data_fd);
+ if (r < 0) {
+ log_error_errno(r, "Failed to cache data file: %m");
+ goto fail;
+ }
+
+ r = mmap_cache_add_fd(m, entry_fd, PROT_READ|PROT_WRITE, &cache_entry_fd);
+ if (r < 0) {
+ log_error_errno(r, "Failed to cache entry file: %m");
+ goto fail;
+ }
+
+ r = mmap_cache_add_fd(m, entry_array_fd, PROT_READ|PROT_WRITE, &cache_entry_array_fd);
+ if (r < 0) {
+ log_error_errno(r, "Failed to cache entry array file: %m");
+ goto fail;
+ }
+
+ r = take_fdopen_unlocked(&data_fd, "w+", &data_fp);
+ if (r < 0) {
+ log_error_errno(r, "Failed to open data file stream: %m");
+ goto fail;
+ }
+
+ r = take_fdopen_unlocked(&entry_fd, "w+", &entry_fp);
+ if (r < 0) {
+ log_error_errno(r, "Failed to open entry file stream: %m");
+ goto fail;
+ }
+
+ r = take_fdopen_unlocked(&entry_array_fd, "w+", &entry_array_fp);
+ if (r < 0) {
+ log_error_errno(r, "Failed to open entry array file stream: %m");
+ goto fail;
+ }
+
+ if (le32toh(f->header->compatible_flags) & ~HEADER_COMPATIBLE_SUPPORTED) {
+ log_error("Cannot verify file with unknown extensions.");
+ r = -EOPNOTSUPP;
+ goto fail;
+ }
+
+ for (i = 0; i < sizeof(f->header->reserved); i++)
+ if (f->header->reserved[i] != 0) {
+ error(offsetof(Header, reserved[i]), "Reserved field is non-zero");
+ r = -EBADMSG;
+ goto fail;
+ }
+
+ if (JOURNAL_HEADER_SEALED(f->header) && !JOURNAL_HEADER_SEALED_CONTINUOUS(f->header))
+ warning(p,
+ "This log file was sealed with an old journald version where the sequence of seals might not be continuous. We cannot guarantee completeness.");
+
+ /* First iteration: we go through all objects, verify the
+ * superficial structure, headers, hashes. */
+
+ p = le64toh(f->header->header_size);
+ for (;;) {
+ /* Early exit if there are no objects in the file, at all */
+ if (le64toh(f->header->tail_object_offset) == 0)
+ break;
+
+ if (show_progress)
+ draw_progress(scale_progress(0x7FFF, p, le64toh(f->header->tail_object_offset)), &last_usec);
+
+ r = journal_file_move_to_object(f, OBJECT_UNUSED, p, &o);
+ if (r < 0) {
+ error_errno(p, r, "Invalid object: %m");
+ goto fail;
+ }
+
+ if (p > le64toh(f->header->tail_object_offset)) {
+ error(offsetof(Header, tail_object_offset),
+ "Invalid tail object pointer (%"PRIu64" > %"PRIu64")",
+ p,
+ le64toh(f->header->tail_object_offset));
+ r = -EBADMSG;
+ goto fail;
+ }
+
+ n_objects++;
+
+ r = journal_file_object_verify(f, p, o);
+ if (r < 0) {
+ error_errno(p, r, "Invalid object contents: %m");
+ goto fail;
+ }
+
+ if (!!(o->object.flags & OBJECT_COMPRESSED_XZ) +
+ !!(o->object.flags & OBJECT_COMPRESSED_LZ4) +
+ !!(o->object.flags & OBJECT_COMPRESSED_ZSTD) > 1) {
+ error(p, "Object has multiple compression flags set (flags: 0x%x)", o->object.flags);
+ r = -EINVAL;
+ goto fail;
+ }
+
+ if ((o->object.flags & OBJECT_COMPRESSED_XZ) && !JOURNAL_HEADER_COMPRESSED_XZ(f->header)) {
+ error(p, "XZ compressed object in file without XZ compression");
+ r = -EBADMSG;
+ goto fail;
+ }
+
+ if ((o->object.flags & OBJECT_COMPRESSED_LZ4) && !JOURNAL_HEADER_COMPRESSED_LZ4(f->header)) {
+ error(p, "LZ4 compressed object in file without LZ4 compression");
+ r = -EBADMSG;
+ goto fail;
+ }
+
+ if ((o->object.flags & OBJECT_COMPRESSED_ZSTD) && !JOURNAL_HEADER_COMPRESSED_ZSTD(f->header)) {
+ error(p, "ZSTD compressed object in file without ZSTD compression");
+ r = -EBADMSG;
+ goto fail;
+ }
+
+ switch (o->object.type) {
+
+ case OBJECT_DATA:
+ r = write_uint64(data_fp, p);
+ if (r < 0)
+ goto fail;
+
+ n_data++;
+ break;
+
+ case OBJECT_FIELD:
+ n_fields++;
+ break;
+
+ case OBJECT_ENTRY:
+ if (JOURNAL_HEADER_SEALED(f->header) && n_tags <= 0) {
+ error(p, "First entry before first tag");
+ r = -EBADMSG;
+ goto fail;
+ }
+
+ r = write_uint64(entry_fp, p);
+ if (r < 0)
+ goto fail;
+
+ if (le64toh(o->entry.realtime) < last_tag_realtime) {
+ error(p,
+ "Older entry after newer tag (%"PRIu64" < %"PRIu64")",
+ le64toh(o->entry.realtime),
+ last_tag_realtime);
+ r = -EBADMSG;
+ goto fail;
+ }
+
+ if (!entry_seqnum_set &&
+ le64toh(o->entry.seqnum) != le64toh(f->header->head_entry_seqnum)) {
+ error(p,
+ "Head entry sequence number incorrect (%"PRIu64" != %"PRIu64")",
+ le64toh(o->entry.seqnum),
+ le64toh(f->header->head_entry_seqnum));
+ r = -EBADMSG;
+ goto fail;
+ }
+
+ if (entry_seqnum_set &&
+ entry_seqnum >= le64toh(o->entry.seqnum)) {
+ error(p,
+ "Entry sequence number out of synchronization (%"PRIu64" >= %"PRIu64")",
+ entry_seqnum,
+ le64toh(o->entry.seqnum));
+ r = -EBADMSG;
+ goto fail;
+ }
+
+ entry_seqnum = le64toh(o->entry.seqnum);
+ entry_seqnum_set = true;
+
+ if (entry_monotonic_set &&
+ sd_id128_equal(entry_boot_id, o->entry.boot_id) &&
+ entry_monotonic > le64toh(o->entry.monotonic)) {
+ error(p,
+ "Entry timestamp out of synchronization (%"PRIu64" > %"PRIu64")",
+ entry_monotonic,
+ le64toh(o->entry.monotonic));
+ r = -EBADMSG;
+ goto fail;
+ }
+
+ entry_monotonic = le64toh(o->entry.monotonic);
+ entry_boot_id = o->entry.boot_id;
+ entry_monotonic_set = true;
+
+ if (!entry_realtime_set &&
+ le64toh(o->entry.realtime) != le64toh(f->header->head_entry_realtime)) {
+ error(p,
+ "Head entry realtime timestamp incorrect (%"PRIu64" != %"PRIu64")",
+ le64toh(o->entry.realtime),
+ le64toh(f->header->head_entry_realtime));
+ r = -EBADMSG;
+ goto fail;
+ }
+
+ entry_realtime = le64toh(o->entry.realtime);
+ entry_realtime_set = true;
+
+ max_entry_realtime = MAX(max_entry_realtime, le64toh(o->entry.realtime));
+ min_entry_realtime = MIN(min_entry_realtime, le64toh(o->entry.realtime));
+
+ n_entries++;
+ break;
+
+ case OBJECT_DATA_HASH_TABLE:
+ r = verify_hash_table(o, p, &n_data_hash_tables,
+ le64toh(f->header->data_hash_table_offset),
+ le64toh(f->header->data_hash_table_size));
+ if (r < 0)
+ goto fail;
+ break;
+
+ case OBJECT_FIELD_HASH_TABLE:
+ r = verify_hash_table(o, p, &n_field_hash_tables,
+ le64toh(f->header->field_hash_table_offset),
+ le64toh(f->header->field_hash_table_size));
+ if (r < 0)
+ goto fail;
+
+ break;
+
+ case OBJECT_ENTRY_ARRAY:
+ r = write_uint64(entry_array_fp, p);
+ if (r < 0)
+ goto fail;
+
+ if (p == le64toh(f->header->entry_array_offset)) {
+ if (found_main_entry_array) {
+ error(p, "More than one main entry array");
+ r = -EBADMSG;
+ goto fail;
+ }
+
+ found_main_entry_array = true;
+ }
+
+ n_entry_arrays++;
+ break;
+
+ case OBJECT_TAG:
+ if (!JOURNAL_HEADER_SEALED(f->header)) {
+ error(p, "Tag object in file without sealing");
+ r = -EBADMSG;
+ goto fail;
+ }
+
+ if (le64toh(o->tag.seqnum) != n_tags + 1) {
+ error(p,
+ "Tag sequence number out of synchronization (%"PRIu64" != %"PRIu64")",
+ le64toh(o->tag.seqnum),
+ n_tags + 1);
+ r = -EBADMSG;
+ goto fail;
+ }
+
+ if (JOURNAL_HEADER_SEALED_CONTINUOUS(f->header)) {
+ if (!(n_tags == 0 || (n_tags == 1 && le64toh(o->tag.epoch) == last_epoch)
+ || le64toh(o->tag.epoch) == last_epoch + 1)) {
+ error(p,
+ "Epoch sequence not continuous (%"PRIu64" vs %"PRIu64")",
+ le64toh(o->tag.epoch),
+ last_epoch);
+ r = -EBADMSG;
+ goto fail;
+ }
+ } else {
+ if (le64toh(o->tag.epoch) < last_epoch) {
+ error(p,
+ "Epoch sequence out of synchronization (%"PRIu64" < %"PRIu64")",
+ le64toh(o->tag.epoch),
+ last_epoch);
+ r = -EBADMSG;
+ goto fail;
+ }
+ }
+
+#if HAVE_GCRYPT
+ if (JOURNAL_HEADER_SEALED(f->header)) {
+ uint64_t q, rt, rt_end;
+
+ debug(p, "Checking tag %"PRIu64"...", le64toh(o->tag.seqnum));
+
+ rt = f->fss_start_usec + le64toh(o->tag.epoch) * f->fss_interval_usec;
+ rt_end = usec_add(rt, f->fss_interval_usec);
+ if (entry_realtime_set && entry_realtime >= rt_end) {
+ error(p,
+ "tag/entry realtime timestamp out of synchronization (%"PRIu64" >= %"PRIu64")",
+ entry_realtime,
+ rt + f->fss_interval_usec);
+ r = -EBADMSG;
+ goto fail;
+ }
+ if (max_entry_realtime >= rt_end) {
+ error(p,
+ "Entry realtime (%"PRIu64", %s) is too late with respect to tag (%"PRIu64", %s)",
+ max_entry_realtime, FORMAT_TIMESTAMP(max_entry_realtime),
+ rt_end, FORMAT_TIMESTAMP(rt_end));
+ r = -EBADMSG;
+ goto fail;
+ }
+ if (min_entry_realtime < rt) {
+ error(p,
+ "Entry realtime (%"PRIu64", %s) is too early with respect to tag (%"PRIu64", %s)",
+ min_entry_realtime, FORMAT_TIMESTAMP(min_entry_realtime),
+ rt, FORMAT_TIMESTAMP(rt));
+ r = -EBADMSG;
+ goto fail;
+ }
+ min_entry_realtime = USEC_INFINITY;
+
+ /* OK, now we know the epoch. So let's now set
+ * it, and calculate the HMAC for everything
+ * since the last tag. */
+ r = journal_file_fsprg_seek(f, le64toh(o->tag.epoch));
+ if (r < 0)
+ goto fail;
+
+ r = journal_file_hmac_start(f);
+ if (r < 0)
+ goto fail;
+
+ if (last_tag == 0) {
+ r = journal_file_hmac_put_header(f);
+ if (r < 0)
+ goto fail;
+
+ q = le64toh(f->header->header_size);
+ } else
+ q = last_tag;
+
+ while (q <= p) {
+ r = journal_file_move_to_object(f, OBJECT_UNUSED, q, &o);
+ if (r < 0)
+ goto fail;
+
+ r = journal_file_hmac_put_object(f, OBJECT_UNUSED, o, q);
+ if (r < 0)
+ goto fail;
+
+ q = q + ALIGN64(le64toh(o->object.size));
+ }
+
+ /* Position might have changed, let's reposition things */
+ r = journal_file_move_to_object(f, OBJECT_UNUSED, p, &o);
+ if (r < 0)
+ goto fail;
+
+ if (memcmp(o->tag.tag, gcry_md_read(f->hmac, 0), TAG_LENGTH) != 0) {
+ error(p, "Tag failed verification");
+ r = -EBADMSG;
+ goto fail;
+ }
+
+ f->hmac_running = false;
+ last_tag_realtime = rt;
+ }
+
+ last_tag = p + ALIGN64(le64toh(o->object.size));
+#endif
+
+ last_epoch = le64toh(o->tag.epoch);
+
+ n_tags++;
+ break;
+ }
+
+ if (p == le64toh(f->header->tail_object_offset)) {
+ found_last = true;
+ break;
+ }
+
+ p = p + ALIGN64(le64toh(o->object.size));
+ };
+
+ if (!found_last && le64toh(f->header->tail_object_offset) != 0) {
+ error(le64toh(f->header->tail_object_offset),
+ "Tail object pointer dead (%"PRIu64" != 0)",
+ le64toh(f->header->tail_object_offset));
+ r = -EBADMSG;
+ goto fail;
+ }
+
+ if (n_objects != le64toh(f->header->n_objects)) {
+ error(offsetof(Header, n_objects),
+ "Object number mismatch (%"PRIu64" != %"PRIu64")",
+ n_objects,
+ le64toh(f->header->n_objects));
+ r = -EBADMSG;
+ goto fail;
+ }
+
+ if (n_entries != le64toh(f->header->n_entries)) {
+ error(offsetof(Header, n_entries),
+ "Entry number mismatch (%"PRIu64" != %"PRIu64")",
+ n_entries,
+ le64toh(f->header->n_entries));
+ r = -EBADMSG;
+ goto fail;
+ }
+
+ if (JOURNAL_HEADER_CONTAINS(f->header, n_data) &&
+ n_data != le64toh(f->header->n_data)) {
+ error(offsetof(Header, n_data),
+ "Data number mismatch (%"PRIu64" != %"PRIu64")",
+ n_data,
+ le64toh(f->header->n_data));
+ r = -EBADMSG;
+ goto fail;
+ }
+
+ if (JOURNAL_HEADER_CONTAINS(f->header, n_fields) &&
+ n_fields != le64toh(f->header->n_fields)) {
+ error(offsetof(Header, n_fields),
+ "Field number mismatch (%"PRIu64" != %"PRIu64")",
+ n_fields,
+ le64toh(f->header->n_fields));
+ r = -EBADMSG;
+ goto fail;
+ }
+
+ if (JOURNAL_HEADER_CONTAINS(f->header, n_tags) &&
+ n_tags != le64toh(f->header->n_tags)) {
+ error(offsetof(Header, n_tags),
+ "Tag number mismatch (%"PRIu64" != %"PRIu64")",
+ n_tags,
+ le64toh(f->header->n_tags));
+ r = -EBADMSG;
+ goto fail;
+ }
+
+ if (JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays) &&
+ n_entry_arrays != le64toh(f->header->n_entry_arrays)) {
+ error(offsetof(Header, n_entry_arrays),
+ "Entry array number mismatch (%"PRIu64" != %"PRIu64")",
+ n_entry_arrays,
+ le64toh(f->header->n_entry_arrays));
+ r = -EBADMSG;
+ goto fail;
+ }
+
+ if (!found_main_entry_array && le64toh(f->header->entry_array_offset) != 0) {
+ error(0, "Missing main entry array");
+ r = -EBADMSG;
+ goto fail;
+ }
+
+ if (entry_seqnum_set &&
+ entry_seqnum != le64toh(f->header->tail_entry_seqnum)) {
+ error(offsetof(Header, tail_entry_seqnum),
+ "Tail entry sequence number incorrect (%"PRIu64" != %"PRIu64")",
+ entry_seqnum,
+ le64toh(f->header->tail_entry_seqnum));
+ r = -EBADMSG;
+ goto fail;
+ }
+
+ if (entry_monotonic_set &&
+ (sd_id128_equal(entry_boot_id, f->header->tail_entry_boot_id) &&
+ JOURNAL_HEADER_TAIL_ENTRY_BOOT_ID(f->header) &&
+ entry_monotonic != le64toh(f->header->tail_entry_monotonic))) {
+ error(0,
+ "Invalid tail monotonic timestamp (%"PRIu64" != %"PRIu64")",
+ entry_monotonic,
+ le64toh(f->header->tail_entry_monotonic));
+ r = -EBADMSG;
+ goto fail;
+ }
+
+ if (entry_realtime_set && entry_realtime != le64toh(f->header->tail_entry_realtime)) {
+ error(0,
+ "Invalid tail realtime timestamp (%"PRIu64" != %"PRIu64")",
+ entry_realtime,
+ le64toh(f->header->tail_entry_realtime));
+ r = -EBADMSG;
+ goto fail;
+ }
+
+ if (fflush(data_fp) != 0) {
+ r = log_error_errno(errno, "Failed to flush data file stream: %m");
+ goto fail;
+ }
+
+ if (fflush(entry_fp) != 0) {
+ r = log_error_errno(errno, "Failed to flush entry file stream: %m");
+ goto fail;
+ }
+
+ if (fflush(entry_array_fp) != 0) {
+ r = log_error_errno(errno, "Failed to flush entry array file stream: %m");
+ goto fail;
+ }
+
+ /* Second iteration: we follow all objects referenced from the
+ * two entry points: the object hash table and the entry
+ * array. We also check that everything referenced (directly
+ * or indirectly) in the data hash table also exists in the
+ * entry array, and vice versa. Note that we do not care for
+ * unreferenced objects. We only care that everything that is
+ * referenced is consistent. */
+
+ r = verify_entry_array(f,
+ cache_data_fd, n_data,
+ cache_entry_fd, n_entries,
+ cache_entry_array_fd, n_entry_arrays,
+ &last_usec,
+ show_progress);
+ if (r < 0)
+ goto fail;
+
+ r = verify_data_hash_table(f,
+ cache_data_fd, n_data,
+ cache_entry_fd, n_entries,
+ cache_entry_array_fd, n_entry_arrays,
+ &last_usec,
+ show_progress);
+ if (r < 0)
+ goto fail;
+
+ if (show_progress)
+ flush_progress();
+
+ mmap_cache_fd_free(cache_data_fd);
+ mmap_cache_fd_free(cache_entry_fd);
+ mmap_cache_fd_free(cache_entry_array_fd);
+
+ if (first_contained)
+ *first_contained = le64toh(f->header->head_entry_realtime);
+#if HAVE_GCRYPT
+ if (last_validated)
+ *last_validated = last_tag_realtime + f->fss_interval_usec;
+#endif
+ if (last_contained)
+ *last_contained = le64toh(f->header->tail_entry_realtime);
+
+ return 0;
+
+fail:
+ if (show_progress)
+ flush_progress();
+
+ log_error("File corruption detected at %s:%"PRIu64" (of %"PRIu64" bytes, %"PRIu64"%%).",
+ f->path,
+ p,
+ (uint64_t) f->last_stat.st_size,
+ 100U * p / (uint64_t) f->last_stat.st_size);
+
+ if (cache_data_fd)
+ mmap_cache_fd_free(cache_data_fd);
+
+ if (cache_entry_fd)
+ mmap_cache_fd_free(cache_entry_fd);
+
+ if (cache_entry_array_fd)
+ mmap_cache_fd_free(cache_entry_array_fd);
+
+ return r;
+}
diff --git a/src/libsystemd/sd-journal/journal-verify.h b/src/libsystemd/sd-journal/journal-verify.h
new file mode 100644
index 0000000..5790330
--- /dev/null
+++ b/src/libsystemd/sd-journal/journal-verify.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "journal-file.h"
+
+int journal_file_verify(JournalFile *f, const char *key, usec_t *first_contained, usec_t *last_validated, usec_t *last_contained, bool show_progress);
diff --git a/src/libsystemd/sd-journal/lookup3.c b/src/libsystemd/sd-journal/lookup3.c
new file mode 100644
index 0000000..c2a6406
--- /dev/null
+++ b/src/libsystemd/sd-journal/lookup3.c
@@ -0,0 +1,1002 @@
+/* SPDX-License-Identifier: LicenseRef-lookup3-public-domain */
+/* Slightly modified by Lennart Poettering, to avoid name clashes, and
+ * unexport a few functions. */
+
+#include "lookup3.h"
+
+#if HAVE_VALGRIND_VALGRIND_H
+# include <valgrind/valgrind.h>
+#else
+# define RUNNING_ON_VALGRIND 0
+#endif
+
+/*
+-------------------------------------------------------------------------------
+lookup3.c, by Bob Jenkins, May 2006, Public Domain.
+
+These are functions for producing 32-bit hashes for hash table lookup.
+hashword(), hashlittle(), hashlittle2(), hashbig(), mix(), and final()
+are externally useful functions. Routines to test the hash are included
+if SELF_TEST is defined. You can use this free for any purpose. It's in
+the public domain. It has no warranty.
+
+You probably want to use hashlittle(). hashlittle() and hashbig()
+hash byte arrays. hashlittle() is faster than hashbig() on
+little-endian machines. Intel and AMD are little-endian machines.
+On second thought, you probably want hashlittle2(), which is identical to
+hashlittle() except it returns two 32-bit hashes for the price of one.
+You could implement hashbig2() if you wanted but I haven't bothered here.
+
+If you want to find a hash of, say, exactly 7 integers, do
+ a = i1; b = i2; c = i3;
+ mix(a,b,c);
+ a += i4; b += i5; c += i6;
+ mix(a,b,c);
+ a += i7;
+ final(a,b,c);
+then use c as the hash value. If you have a variable length array of
+4-byte integers to hash, use hashword(). If you have a byte array (like
+a character string), use hashlittle(). If you have several byte arrays, or
+a mix of things, see the comments above hashlittle().
+
+Why is this so big? I read 12 bytes at a time into 3 4-byte integers,
+then mix those integers. This is fast (you can do a lot more thorough
+mixing with 12*3 instructions on 3 integers than you can with 3 instructions
+on 1 byte), but shoehorning those bytes into integers efficiently is messy.
+-------------------------------------------------------------------------------
+*/
+/* #define SELF_TEST 1 */
+
+#include <stdint.h> /* defines uint32_t etc */
+#include <stdio.h> /* defines printf for tests */
+#include <sys/param.h> /* attempt to define endianness */
+#include <time.h> /* defines time_t for timings in the test */
+#ifdef linux
+# include <endian.h> /* attempt to define endianness */
+#endif
+
+#if __GNUC__ >= 7
+_Pragma("GCC diagnostic ignored \"-Wimplicit-fallthrough\"")
+#endif
+
+/*
+ * My best guess at if you are big-endian or little-endian. This may
+ * need adjustment.
+ */
+#if (defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && \
+ __BYTE_ORDER == __LITTLE_ENDIAN) || \
+ (defined(i386) || defined(__i386__) || defined(__i486__) || \
+ defined(__i586__) || defined(__i686__) || defined(vax) || defined(MIPSEL))
+# define HASH_LITTLE_ENDIAN 1
+# define HASH_BIG_ENDIAN 0
+#elif (defined(__BYTE_ORDER) && defined(__BIG_ENDIAN) && \
+ __BYTE_ORDER == __BIG_ENDIAN) || \
+ (defined(sparc) || defined(POWERPC) || defined(mc68000) || defined(sel))
+# define HASH_LITTLE_ENDIAN 0
+# define HASH_BIG_ENDIAN 1
+#else
+# define HASH_LITTLE_ENDIAN 0
+# define HASH_BIG_ENDIAN 0
+#endif
+
+#define hashsize(n) ((uint32_t)1<<(n))
+#define hashmask(n) (hashsize(n)-1)
+#define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k))))
+
+/*
+-------------------------------------------------------------------------------
+mix -- mix 3 32-bit values reversibly.
+
+This is reversible, so any information in (a,b,c) before mix() is
+still in (a,b,c) after mix().
+
+If four pairs of (a,b,c) inputs are run through mix(), or through
+mix() in reverse, there are at least 32 bits of the output that
+are sometimes the same for one pair and different for another pair.
+This was tested for:
+* pairs that differed by one bit, by two bits, in any combination
+ of top bits of (a,b,c), or in any combination of bottom bits of
+ (a,b,c).
+* "differ" is defined as +, -, ^, or ~^. For + and -, I transformed
+ the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
+ is commonly produced by subtraction) look like a single 1-bit
+ difference.
+* the base values were pseudorandom, all zero but one bit set, or
+ all zero plus a counter that starts at zero.
+
+Some k values for my "a-=c; a^=rot(c,k); c+=b;" arrangement that
+satisfy this are
+ 4 6 8 16 19 4
+ 9 15 3 18 27 15
+ 14 9 3 7 17 3
+Well, "9 15 3 18 27 15" didn't quite get 32 bits diffing
+for "differ" defined as + with a one-bit base and a two-bit delta. I
+used http://burtleburtle.net/bob/hash/avalanche.html to choose
+the operations, constants, and arrangements of the variables.
+
+This does not achieve avalanche. There are input bits of (a,b,c)
+that fail to affect some output bits of (a,b,c), especially of a. The
+most thoroughly mixed value is c, but it doesn't really even achieve
+avalanche in c.
+
+This allows some parallelism. Read-after-writes are good at doubling
+the number of bits affected, so the goal of mixing pulls in the opposite
+direction as the goal of parallelism. I did what I could. Rotates
+seem to cost as much as shifts on every machine I could lay my hands
+on, and rotates are much kinder to the top and bottom bits, so I used
+rotates.
+-------------------------------------------------------------------------------
+*/
+#define mix(a,b,c) \
+{ \
+ a -= c; a ^= rot(c, 4); c += b; \
+ b -= a; b ^= rot(a, 6); a += c; \
+ c -= b; c ^= rot(b, 8); b += a; \
+ a -= c; a ^= rot(c,16); c += b; \
+ b -= a; b ^= rot(a,19); a += c; \
+ c -= b; c ^= rot(b, 4); b += a; \
+}
+
+/*
+-------------------------------------------------------------------------------
+final -- final mixing of 3 32-bit values (a,b,c) into c
+
+Pairs of (a,b,c) values differing in only a few bits will usually
+produce values of c that look totally different. This was tested for
+* pairs that differed by one bit, by two bits, in any combination
+ of top bits of (a,b,c), or in any combination of bottom bits of
+ (a,b,c).
+* "differ" is defined as +, -, ^, or ~^. For + and -, I transformed
+ the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
+ is commonly produced by subtraction) look like a single 1-bit
+ difference.
+* the base values were pseudorandom, all zero but one bit set, or
+ all zero plus a counter that starts at zero.
+
+These constants passed:
+ 14 11 25 16 4 14 24
+ 12 14 25 16 4 14 24
+and these came close:
+ 4 8 15 26 3 22 24
+ 10 8 15 26 3 22 24
+ 11 8 15 26 3 22 24
+-------------------------------------------------------------------------------
+*/
+#define final(a,b,c) \
+{ \
+ c ^= b; c -= rot(b,14); \
+ a ^= c; a -= rot(c,11); \
+ b ^= a; b -= rot(a,25); \
+ c ^= b; c -= rot(b,16); \
+ a ^= c; a -= rot(c,4); \
+ b ^= a; b -= rot(a,14); \
+ c ^= b; c -= rot(b,24); \
+}
+
+/*
+--------------------------------------------------------------------
+ This works on all machines. To be useful, it requires
+ -- that the key be an array of uint32_t's, and
+ -- that the length be the number of uint32_t's in the key
+
+ The function hashword() is identical to hashlittle() on little-endian
+ machines, and identical to hashbig() on big-endian machines,
+ except that the length has to be measured in uint32_ts rather than in
+ bytes. hashlittle() is more complicated than hashword() only because
+ hashlittle() has to dance around fitting the key bytes into registers.
+--------------------------------------------------------------------
+*/
+uint32_t jenkins_hashword(
+const uint32_t *k, /* the key, an array of uint32_t values */
+size_t length, /* the length of the key, in uint32_ts */
+uint32_t initval) /* the previous hash, or an arbitrary value */
+{
+ uint32_t a,b,c;
+
+ /* Set up the internal state */
+ a = b = c = 0xdeadbeef + (((uint32_t)length)<<2) + initval;
+
+ /*------------------------------------------------- handle most of the key */
+ while (length > 3)
+ {
+ a += k[0];
+ b += k[1];
+ c += k[2];
+ mix(a,b,c);
+ length -= 3;
+ k += 3;
+ }
+
+ /*------------------------------------------- handle the last 3 uint32_t's */
+ switch(length) /* all the case statements fall through */
+ {
+ case 3 : c+=k[2];
+ case 2 : b+=k[1];
+ case 1 : a+=k[0];
+ final(a,b,c);
+ case 0: /* case 0: nothing left to add */
+ break;
+ }
+ /*------------------------------------------------------ report the result */
+ return c;
+}
+
+/*
+--------------------------------------------------------------------
+hashword2() -- same as hashword(), but take two seeds and return two
+32-bit values. pc and pb must both be nonnull, and *pc and *pb must
+both be initialized with seeds. If you pass in (*pb)==0, the output
+(*pc) will be the same as the return value from hashword().
+--------------------------------------------------------------------
+*/
+void jenkins_hashword2 (
+const uint32_t *k, /* the key, an array of uint32_t values */
+size_t length, /* the length of the key, in uint32_ts */
+uint32_t *pc, /* IN: seed OUT: primary hash value */
+uint32_t *pb) /* IN: more seed OUT: secondary hash value */
+{
+ uint32_t a,b,c;
+
+ /* Set up the internal state */
+ a = b = c = 0xdeadbeef + ((uint32_t)(length<<2)) + *pc;
+ c += *pb;
+
+ /*------------------------------------------------- handle most of the key */
+ while (length > 3)
+ {
+ a += k[0];
+ b += k[1];
+ c += k[2];
+ mix(a,b,c);
+ length -= 3;
+ k += 3;
+ }
+
+ /*------------------------------------------- handle the last 3 uint32_t's */
+ switch(length) /* all the case statements fall through */
+ {
+ case 3 : c+=k[2];
+ case 2 : b+=k[1];
+ case 1 : a+=k[0];
+ final(a,b,c);
+ case 0: /* case 0: nothing left to add */
+ break;
+ }
+ /*------------------------------------------------------ report the result */
+ *pc=c; *pb=b;
+}
+
+/*
+-------------------------------------------------------------------------------
+hashlittle() -- hash a variable-length key into a 32-bit value
+ k : the key (the unaligned variable-length array of bytes)
+ length : the length of the key, counting by bytes
+ initval : can be any 4-byte value
+Returns a 32-bit value. Every bit of the key affects every bit of
+the return value. Two keys differing by one or two bits will have
+totally different hash values.
+
+The best hash table sizes are powers of 2. There is no need to do
+mod a prime (mod is sooo slow!). If you need less than 32 bits,
+use a bitmask. For example, if you need only 10 bits, do
+ h = (h & hashmask(10));
+In which case, the hash table should have hashsize(10) elements.
+
+If you are hashing n strings (uint8_t **)k, do it like this:
+ for (i=0, h=0; i<n; ++i) h = hashlittle( k[i], len[i], h);
+
+By Bob Jenkins, 2006. bob_jenkins@burtleburtle.net. You may use this
+code any way you wish, private, educational, or commercial. It's free.
+
+Use for hash table lookup, or anything where one collision in 2^^32 is
+acceptable. Do NOT use for cryptographic purposes.
+-------------------------------------------------------------------------------
+*/
+
+uint32_t jenkins_hashlittle( const void *key, size_t length, uint32_t initval)
+{
+ uint32_t a,b,c; /* internal state */
+ union { const void *ptr; size_t i; } u; /* needed for Mac Powerbook G4 */
+
+ /* Set up the internal state */
+ a = b = c = 0xdeadbeef + ((uint32_t)length) + initval;
+
+ u.ptr = key;
+ if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) {
+ const uint32_t *k = (const uint32_t *)key; /* read 32-bit chunks */
+
+ /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
+ while (length > 12)
+ {
+ a += k[0];
+ b += k[1];
+ c += k[2];
+ mix(a,b,c);
+ length -= 12;
+ k += 3;
+ }
+
+ /*----------------------------- handle the last (probably partial) block */
+ /*
+ * "k[2]&0xffffff" actually reads beyond the end of the string, but
+ * then masks off the part it's not allowed to read. Because the
+ * string is aligned, the masked-off tail is in the same word as the
+ * rest of the string. Every machine with memory protection I've seen
+ * does it on word boundaries, so is OK with this. But valgrind will
+ * still catch it and complain. The masking trick does make the hash
+ * noticeably faster for short strings (like English words).
+ */
+#define VALGRIND_LIKE (_unlikely_(HAS_FEATURE_ADDRESS_SANITIZER || \
+ HAS_FEATURE_MEMORY_SANITIZER || \
+ RUNNING_ON_VALGRIND))
+
+ if (!VALGRIND_LIKE) {
+ switch(length)
+ {
+ case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
+ case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break;
+ case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break;
+ case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break;
+ case 8 : b+=k[1]; a+=k[0]; break;
+ case 7 : b+=k[1]&0xffffff; a+=k[0]; break;
+ case 6 : b+=k[1]&0xffff; a+=k[0]; break;
+ case 5 : b+=k[1]&0xff; a+=k[0]; break;
+ case 4 : a+=k[0]; break;
+ case 3 : a+=k[0]&0xffffff; break;
+ case 2 : a+=k[0]&0xffff; break;
+ case 1 : a+=k[0]&0xff; break;
+ case 0 : return c; /* zero length strings require no mixing */
+ }
+ } else {
+ const uint8_t *k8 = (const uint8_t *) k;
+
+ switch(length)
+ {
+ case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
+ case 11: c+=((uint32_t)k8[10])<<16; /* fall through */
+ case 10: c+=((uint32_t)k8[9])<<8; /* fall through */
+ case 9 : c+=k8[8]; /* fall through */
+ case 8 : b+=k[1]; a+=k[0]; break;
+ case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */
+ case 6 : b+=((uint32_t)k8[5])<<8; /* fall through */
+ case 5 : b+=k8[4]; /* fall through */
+ case 4 : a+=k[0]; break;
+ case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */
+ case 2 : a+=((uint32_t)k8[1])<<8; /* fall through */
+ case 1 : a+=k8[0]; break;
+ case 0 : return c;
+ }
+ }
+
+ } else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) {
+ const uint16_t *k = (const uint16_t *)key; /* read 16-bit chunks */
+ const uint8_t *k8;
+
+ /*--------------- all but last block: aligned reads and different mixing */
+ while (length > 12)
+ {
+ a += k[0] + (((uint32_t)k[1])<<16);
+ b += k[2] + (((uint32_t)k[3])<<16);
+ c += k[4] + (((uint32_t)k[5])<<16);
+ mix(a,b,c);
+ length -= 12;
+ k += 6;
+ }
+
+ /*----------------------------- handle the last (probably partial) block */
+ k8 = (const uint8_t *)k;
+ switch(length)
+ {
+ case 12: c+=k[4]+(((uint32_t)k[5])<<16);
+ b+=k[2]+(((uint32_t)k[3])<<16);
+ a+=k[0]+(((uint32_t)k[1])<<16);
+ break;
+ case 11: c+=((uint32_t)k8[10])<<16; /* fall through */
+ case 10: c+=k[4];
+ b+=k[2]+(((uint32_t)k[3])<<16);
+ a+=k[0]+(((uint32_t)k[1])<<16);
+ break;
+ case 9 : c+=k8[8]; /* fall through */
+ case 8 : b+=k[2]+(((uint32_t)k[3])<<16);
+ a+=k[0]+(((uint32_t)k[1])<<16);
+ break;
+ case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */
+ case 6 : b+=k[2];
+ a+=k[0]+(((uint32_t)k[1])<<16);
+ break;
+ case 5 : b+=k8[4]; /* fall through */
+ case 4 : a+=k[0]+(((uint32_t)k[1])<<16);
+ break;
+ case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */
+ case 2 : a+=k[0];
+ break;
+ case 1 : a+=k8[0];
+ break;
+ case 0 : return c; /* zero length requires no mixing */
+ }
+
+ } else { /* need to read the key one byte at a time */
+ const uint8_t *k = (const uint8_t *)key;
+
+ /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
+ while (length > 12)
+ {
+ a += k[0];
+ a += ((uint32_t)k[1])<<8;
+ a += ((uint32_t)k[2])<<16;
+ a += ((uint32_t)k[3])<<24;
+ b += k[4];
+ b += ((uint32_t)k[5])<<8;
+ b += ((uint32_t)k[6])<<16;
+ b += ((uint32_t)k[7])<<24;
+ c += k[8];
+ c += ((uint32_t)k[9])<<8;
+ c += ((uint32_t)k[10])<<16;
+ c += ((uint32_t)k[11])<<24;
+ mix(a,b,c);
+ length -= 12;
+ k += 12;
+ }
+
+ /*-------------------------------- last block: affect all 32 bits of (c) */
+ switch(length) /* all the case statements fall through */
+ {
+ case 12: c+=((uint32_t)k[11])<<24;
+ case 11: c+=((uint32_t)k[10])<<16;
+ case 10: c+=((uint32_t)k[9])<<8;
+ case 9 : c+=k[8];
+ case 8 : b+=((uint32_t)k[7])<<24;
+ case 7 : b+=((uint32_t)k[6])<<16;
+ case 6 : b+=((uint32_t)k[5])<<8;
+ case 5 : b+=k[4];
+ case 4 : a+=((uint32_t)k[3])<<24;
+ case 3 : a+=((uint32_t)k[2])<<16;
+ case 2 : a+=((uint32_t)k[1])<<8;
+ case 1 : a+=k[0];
+ break;
+ case 0 : return c;
+ }
+ }
+
+ final(a,b,c);
+ return c;
+}
+
+/*
+ * hashlittle2: return 2 32-bit hash values
+ *
+ * This is identical to hashlittle(), except it returns two 32-bit hash
+ * values instead of just one. This is good enough for hash table
+ * lookup with 2^^64 buckets, or if you want a second hash if you're not
+ * happy with the first, or if you want a probably-unique 64-bit ID for
+ * the key. *pc is better mixed than *pb, so use *pc first. If you want
+ * a 64-bit value do something like "*pc + (((uint64_t)*pb)<<32)".
+ */
+void jenkins_hashlittle2(
+ const void *key, /* the key to hash */
+ size_t length, /* length of the key */
+ uint32_t *pc, /* IN: primary initval, OUT: primary hash */
+ uint32_t *pb) /* IN: secondary initval, OUT: secondary hash */
+{
+ uint32_t a,b,c; /* internal state */
+ union { const void *ptr; size_t i; } u; /* needed for Mac Powerbook G4 */
+
+ /* Set up the internal state */
+ a = b = c = 0xdeadbeef + ((uint32_t)length) + *pc;
+ c += *pb;
+
+ u.ptr = key;
+ if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) {
+ const uint32_t *k = (const uint32_t *)key; /* read 32-bit chunks */
+
+ /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
+ while (length > 12)
+ {
+ a += k[0];
+ b += k[1];
+ c += k[2];
+ mix(a,b,c);
+ length -= 12;
+ k += 3;
+ }
+
+ /*----------------------------- handle the last (probably partial) block */
+ /*
+ * "k[2]&0xffffff" actually reads beyond the end of the string, but
+ * then masks off the part it's not allowed to read. Because the
+ * string is aligned, the masked-off tail is in the same word as the
+ * rest of the string. Every machine with memory protection I've seen
+ * does it on word boundaries, so is OK with this. But valgrind will
+ * still catch it and complain. The masking trick does make the hash
+ * noticeably faster for short strings (like English words).
+ */
+ if (!VALGRIND_LIKE) {
+ switch(length)
+ {
+ case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
+ case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break;
+ case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break;
+ case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break;
+ case 8 : b+=k[1]; a+=k[0]; break;
+ case 7 : b+=k[1]&0xffffff; a+=k[0]; break;
+ case 6 : b+=k[1]&0xffff; a+=k[0]; break;
+ case 5 : b+=k[1]&0xff; a+=k[0]; break;
+ case 4 : a+=k[0]; break;
+ case 3 : a+=k[0]&0xffffff; break;
+ case 2 : a+=k[0]&0xffff; break;
+ case 1 : a+=k[0]&0xff; break;
+ case 0 : *pc=c; *pb=b; return; /* zero length strings require no mixing */
+ }
+ } else {
+ const uint8_t *k8 = (const uint8_t *)k;
+
+ switch(length)
+ {
+ case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
+ case 11: c+=((uint32_t)k8[10])<<16; /* fall through */
+ case 10: c+=((uint32_t)k8[9])<<8; /* fall through */
+ case 9 : c+=k8[8]; /* fall through */
+ case 8 : b+=k[1]; a+=k[0]; break;
+ case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */
+ case 6 : b+=((uint32_t)k8[5])<<8; /* fall through */
+ case 5 : b+=k8[4]; /* fall through */
+ case 4 : a+=k[0]; break;
+ case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */
+ case 2 : a+=((uint32_t)k8[1])<<8; /* fall through */
+ case 1 : a+=k8[0]; break;
+ case 0 : *pc=c; *pb=b; return; /* zero length strings require no mixing */
+ }
+ }
+
+ } else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) {
+ const uint16_t *k = (const uint16_t *)key; /* read 16-bit chunks */
+ const uint8_t *k8;
+
+ /*--------------- all but last block: aligned reads and different mixing */
+ while (length > 12)
+ {
+ a += k[0] + (((uint32_t)k[1])<<16);
+ b += k[2] + (((uint32_t)k[3])<<16);
+ c += k[4] + (((uint32_t)k[5])<<16);
+ mix(a,b,c);
+ length -= 12;
+ k += 6;
+ }
+
+ /*----------------------------- handle the last (probably partial) block */
+ k8 = (const uint8_t *)k;
+ switch(length)
+ {
+ case 12: c+=k[4]+(((uint32_t)k[5])<<16);
+ b+=k[2]+(((uint32_t)k[3])<<16);
+ a+=k[0]+(((uint32_t)k[1])<<16);
+ break;
+ case 11: c+=((uint32_t)k8[10])<<16; /* fall through */
+ case 10: c+=k[4];
+ b+=k[2]+(((uint32_t)k[3])<<16);
+ a+=k[0]+(((uint32_t)k[1])<<16);
+ break;
+ case 9 : c+=k8[8]; /* fall through */
+ case 8 : b+=k[2]+(((uint32_t)k[3])<<16);
+ a+=k[0]+(((uint32_t)k[1])<<16);
+ break;
+ case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */
+ case 6 : b+=k[2];
+ a+=k[0]+(((uint32_t)k[1])<<16);
+ break;
+ case 5 : b+=k8[4]; /* fall through */
+ case 4 : a+=k[0]+(((uint32_t)k[1])<<16);
+ break;
+ case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */
+ case 2 : a+=k[0];
+ break;
+ case 1 : a+=k8[0];
+ break;
+ case 0 : *pc=c; *pb=b; return; /* zero length strings require no mixing */
+ }
+
+ } else { /* need to read the key one byte at a time */
+ const uint8_t *k = (const uint8_t *)key;
+
+ /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
+ while (length > 12)
+ {
+ a += k[0];
+ a += ((uint32_t)k[1])<<8;
+ a += ((uint32_t)k[2])<<16;
+ a += ((uint32_t)k[3])<<24;
+ b += k[4];
+ b += ((uint32_t)k[5])<<8;
+ b += ((uint32_t)k[6])<<16;
+ b += ((uint32_t)k[7])<<24;
+ c += k[8];
+ c += ((uint32_t)k[9])<<8;
+ c += ((uint32_t)k[10])<<16;
+ c += ((uint32_t)k[11])<<24;
+ mix(a,b,c);
+ length -= 12;
+ k += 12;
+ }
+
+ /*-------------------------------- last block: affect all 32 bits of (c) */
+ switch(length) /* all the case statements fall through */
+ {
+ case 12: c+=((uint32_t)k[11])<<24;
+ case 11: c+=((uint32_t)k[10])<<16;
+ case 10: c+=((uint32_t)k[9])<<8;
+ case 9 : c+=k[8];
+ case 8 : b+=((uint32_t)k[7])<<24;
+ case 7 : b+=((uint32_t)k[6])<<16;
+ case 6 : b+=((uint32_t)k[5])<<8;
+ case 5 : b+=k[4];
+ case 4 : a+=((uint32_t)k[3])<<24;
+ case 3 : a+=((uint32_t)k[2])<<16;
+ case 2 : a+=((uint32_t)k[1])<<8;
+ case 1 : a+=k[0];
+ break;
+ case 0 : *pc=c; *pb=b; return; /* zero length strings require no mixing */
+ }
+ }
+
+ final(a,b,c);
+ *pc=c; *pb=b;
+}
+
+/*
+ * hashbig():
+ * This is the same as hashword() on big-endian machines. It is different
+ * from hashlittle() on all machines. hashbig() takes advantage of
+ * big-endian byte ordering.
+ */
+uint32_t jenkins_hashbig( const void *key, size_t length, uint32_t initval)
+{
+ uint32_t a,b,c;
+ union { const void *ptr; size_t i; } u; /* to cast key to (size_t) happily */
+
+ /* Set up the internal state */
+ a = b = c = 0xdeadbeef + ((uint32_t)length) + initval;
+
+ u.ptr = key;
+ if (HASH_BIG_ENDIAN && ((u.i & 0x3) == 0)) {
+ const uint32_t *k = (const uint32_t *)key; /* read 32-bit chunks */
+
+ /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
+ while (length > 12)
+ {
+ a += k[0];
+ b += k[1];
+ c += k[2];
+ mix(a,b,c);
+ length -= 12;
+ k += 3;
+ }
+
+ /*----------------------------- handle the last (probably partial) block */
+ /*
+ * "k[2]<<8" actually reads beyond the end of the string, but
+ * then shifts out the part it's not allowed to read. Because the
+ * string is aligned, the illegal read is in the same word as the
+ * rest of the string. Every machine with memory protection I've seen
+ * does it on word boundaries, so is OK with this. But valgrind will
+ * still catch it and complain. The masking trick does make the hash
+ * noticeably faster for short strings (like English words).
+ */
+
+ if (!VALGRIND_LIKE) {
+ switch(length)
+ {
+ case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
+ case 11: c+=k[2]&0xffffff00; b+=k[1]; a+=k[0]; break;
+ case 10: c+=k[2]&0xffff0000; b+=k[1]; a+=k[0]; break;
+ case 9 : c+=k[2]&0xff000000; b+=k[1]; a+=k[0]; break;
+ case 8 : b+=k[1]; a+=k[0]; break;
+ case 7 : b+=k[1]&0xffffff00; a+=k[0]; break;
+ case 6 : b+=k[1]&0xffff0000; a+=k[0]; break;
+ case 5 : b+=k[1]&0xff000000; a+=k[0]; break;
+ case 4 : a+=k[0]; break;
+ case 3 : a+=k[0]&0xffffff00; break;
+ case 2 : a+=k[0]&0xffff0000; break;
+ case 1 : a+=k[0]&0xff000000; break;
+ case 0 : return c; /* zero length strings require no mixing */
+ }
+ } else {
+ const uint8_t *k8 = (const uint8_t *)k;
+
+ switch(length) /* all the case statements fall through */
+ {
+ case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
+ case 11: c+=((uint32_t)k8[10])<<8; /* fall through */
+ case 10: c+=((uint32_t)k8[9])<<16; /* fall through */
+ case 9 : c+=((uint32_t)k8[8])<<24; /* fall through */
+ case 8 : b+=k[1]; a+=k[0]; break;
+ case 7 : b+=((uint32_t)k8[6])<<8; /* fall through */
+ case 6 : b+=((uint32_t)k8[5])<<16; /* fall through */
+ case 5 : b+=((uint32_t)k8[4])<<24; /* fall through */
+ case 4 : a+=k[0]; break;
+ case 3 : a+=((uint32_t)k8[2])<<8; /* fall through */
+ case 2 : a+=((uint32_t)k8[1])<<16; /* fall through */
+ case 1 : a+=((uint32_t)k8[0])<<24; break;
+ case 0 : return c;
+ }
+ }
+
+ } else { /* need to read the key one byte at a time */
+ const uint8_t *k = (const uint8_t *)key;
+
+ /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
+ while (length > 12)
+ {
+ a += ((uint32_t)k[0])<<24;
+ a += ((uint32_t)k[1])<<16;
+ a += ((uint32_t)k[2])<<8;
+ a += ((uint32_t)k[3]);
+ b += ((uint32_t)k[4])<<24;
+ b += ((uint32_t)k[5])<<16;
+ b += ((uint32_t)k[6])<<8;
+ b += ((uint32_t)k[7]);
+ c += ((uint32_t)k[8])<<24;
+ c += ((uint32_t)k[9])<<16;
+ c += ((uint32_t)k[10])<<8;
+ c += ((uint32_t)k[11]);
+ mix(a,b,c);
+ length -= 12;
+ k += 12;
+ }
+
+ /*-------------------------------- last block: affect all 32 bits of (c) */
+ switch(length) /* all the case statements fall through */
+ {
+ case 12: c+=k[11];
+ case 11: c+=((uint32_t)k[10])<<8;
+ case 10: c+=((uint32_t)k[9])<<16;
+ case 9 : c+=((uint32_t)k[8])<<24;
+ case 8 : b+=k[7];
+ case 7 : b+=((uint32_t)k[6])<<8;
+ case 6 : b+=((uint32_t)k[5])<<16;
+ case 5 : b+=((uint32_t)k[4])<<24;
+ case 4 : a+=k[3];
+ case 3 : a+=((uint32_t)k[2])<<8;
+ case 2 : a+=((uint32_t)k[1])<<16;
+ case 1 : a+=((uint32_t)k[0])<<24;
+ break;
+ case 0 : return c;
+ }
+ }
+
+ final(a,b,c);
+ return c;
+}
+
+#ifdef SELF_TEST
+
+/* used for timings */
+void driver1()
+{
+ uint8_t buf[256];
+ uint32_t i;
+ uint32_t h=0;
+ time_t a,z;
+
+ time(&a);
+ for (i=0; i<256; ++i) buf[i] = 'x';
+ for (i=0; i<1; ++i)
+ {
+ h = hashlittle(&buf[0],1,h);
+ }
+ time(&z);
+ if (z-a > 0) printf("time %d %.8x\n", z-a, h);
+}
+
+/* check that every input bit changes every output bit half the time */
+#define HASHSTATE 1
+#define HASHLEN 1
+#define MAXPAIR 60
+#define MAXLEN 70
+void driver2()
+{
+ uint8_t qa[MAXLEN+1], qb[MAXLEN+2], *a = &qa[0], *b = &qb[1];
+ uint32_t c[HASHSTATE], d[HASHSTATE], i=0, j=0, k, l, m=0, z;
+ uint32_t e[HASHSTATE],f[HASHSTATE],g[HASHSTATE],h[HASHSTATE];
+ uint32_t x[HASHSTATE],y[HASHSTATE];
+ uint32_t hlen;
+
+ printf("No more than %d trials should ever be needed \n",MAXPAIR/2);
+ for (hlen=0; hlen < MAXLEN; ++hlen)
+ {
+ z=0;
+ for (i=0; i<hlen; ++i) /*----------------------- for each input byte, */
+ {
+ for (j=0; j<8; ++j) /*------------------------ for each input bit, */
+ {
+ for (m=1; m<8; ++m) /*------------- for several possible initvals, */
+ {
+ for (l=0; l<HASHSTATE; ++l)
+ e[l]=f[l]=g[l]=h[l]=x[l]=y[l]=~((uint32_t)0);
+
+ /*---- check that every output bit is affected by that input bit */
+ for (k=0; k<MAXPAIR; k+=2)
+ {
+ uint32_t finished=1;
+ /* keys have one bit different */
+ for (l=0; l<hlen+1; ++l) {a[l] = b[l] = (uint8_t)0;}
+ /* have a and b be two keys differing in only one bit */
+ a[i] ^= (k<<j);
+ a[i] ^= (k>>(8-j));
+ c[0] = hashlittle(a, hlen, m);
+ b[i] ^= ((k+1)<<j);
+ b[i] ^= ((k+1)>>(8-j));
+ d[0] = hashlittle(b, hlen, m);
+ /* check every bit is 1, 0, set, and not set at least once */
+ for (l=0; l<HASHSTATE; ++l)
+ {
+ e[l] &= (c[l]^d[l]);
+ f[l] &= ~(c[l]^d[l]);
+ g[l] &= c[l];
+ h[l] &= ~c[l];
+ x[l] &= d[l];
+ y[l] &= ~d[l];
+ if (e[l]|f[l]|g[l]|h[l]|x[l]|y[l]) finished=0;
+ }
+ if (finished) break;
+ }
+ if (k>z) z=k;
+ if (k==MAXPAIR)
+ {
+ printf("Some bit didn't change: ");
+ printf("%.8x %.8x %.8x %.8x %.8x %.8x ",
+ e[0],f[0],g[0],h[0],x[0],y[0]);
+ printf("i %d j %d m %d len %d\n", i, j, m, hlen);
+ }
+ if (z==MAXPAIR) goto done;
+ }
+ }
+ }
+ done:
+ if (z < MAXPAIR)
+ {
+ printf("Mix success %2d bytes %2d initvals ",i,m);
+ printf("required %d trials\n", z/2);
+ }
+ }
+ printf("\n");
+}
+
+/* Check for reading beyond the end of the buffer and alignment problems */
+void driver3()
+{
+ uint8_t buf[MAXLEN+20], *b;
+ uint32_t len;
+ uint8_t q[] = "This is the time for all good men to come to the aid of their country...";
+ uint32_t h;
+ uint8_t qq[] = "xThis is the time for all good men to come to the aid of their country...";
+ uint32_t i;
+ uint8_t qqq[] = "xxThis is the time for all good men to come to the aid of their country...";
+ uint32_t j;
+ uint8_t qqqq[] = "xxxThis is the time for all good men to come to the aid of their country...";
+ uint32_t ref,x,y;
+ uint8_t *p;
+
+ printf("Endianness. These lines should all be the same (for values filled in):\n");
+ printf("%.8x %.8x %.8x\n",
+ hashword((const uint32_t *)q, (sizeof(q)-1)/4, 13),
+ hashword((const uint32_t *)q, (sizeof(q)-5)/4, 13),
+ hashword((const uint32_t *)q, (sizeof(q)-9)/4, 13));
+ p = q;
+ printf("%.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n",
+ hashlittle(p, sizeof(q)-1, 13), hashlittle(p, sizeof(q)-2, 13),
+ hashlittle(p, sizeof(q)-3, 13), hashlittle(p, sizeof(q)-4, 13),
+ hashlittle(p, sizeof(q)-5, 13), hashlittle(p, sizeof(q)-6, 13),
+ hashlittle(p, sizeof(q)-7, 13), hashlittle(p, sizeof(q)-8, 13),
+ hashlittle(p, sizeof(q)-9, 13), hashlittle(p, sizeof(q)-10, 13),
+ hashlittle(p, sizeof(q)-11, 13), hashlittle(p, sizeof(q)-12, 13));
+ p = &qq[1];
+ printf("%.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n",
+ hashlittle(p, sizeof(q)-1, 13), hashlittle(p, sizeof(q)-2, 13),
+ hashlittle(p, sizeof(q)-3, 13), hashlittle(p, sizeof(q)-4, 13),
+ hashlittle(p, sizeof(q)-5, 13), hashlittle(p, sizeof(q)-6, 13),
+ hashlittle(p, sizeof(q)-7, 13), hashlittle(p, sizeof(q)-8, 13),
+ hashlittle(p, sizeof(q)-9, 13), hashlittle(p, sizeof(q)-10, 13),
+ hashlittle(p, sizeof(q)-11, 13), hashlittle(p, sizeof(q)-12, 13));
+ p = &qqq[2];
+ printf("%.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n",
+ hashlittle(p, sizeof(q)-1, 13), hashlittle(p, sizeof(q)-2, 13),
+ hashlittle(p, sizeof(q)-3, 13), hashlittle(p, sizeof(q)-4, 13),
+ hashlittle(p, sizeof(q)-5, 13), hashlittle(p, sizeof(q)-6, 13),
+ hashlittle(p, sizeof(q)-7, 13), hashlittle(p, sizeof(q)-8, 13),
+ hashlittle(p, sizeof(q)-9, 13), hashlittle(p, sizeof(q)-10, 13),
+ hashlittle(p, sizeof(q)-11, 13), hashlittle(p, sizeof(q)-12, 13));
+ p = &qqqq[3];
+ printf("%.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n",
+ hashlittle(p, sizeof(q)-1, 13), hashlittle(p, sizeof(q)-2, 13),
+ hashlittle(p, sizeof(q)-3, 13), hashlittle(p, sizeof(q)-4, 13),
+ hashlittle(p, sizeof(q)-5, 13), hashlittle(p, sizeof(q)-6, 13),
+ hashlittle(p, sizeof(q)-7, 13), hashlittle(p, sizeof(q)-8, 13),
+ hashlittle(p, sizeof(q)-9, 13), hashlittle(p, sizeof(q)-10, 13),
+ hashlittle(p, sizeof(q)-11, 13), hashlittle(p, sizeof(q)-12, 13));
+ printf("\n");
+
+ /* check that hashlittle2 and hashlittle produce the same results */
+ i=47; j=0;
+ hashlittle2(q, sizeof(q), &i, &j);
+ if (hashlittle(q, sizeof(q), 47) != i)
+ printf("hashlittle2 and hashlittle mismatch\n");
+
+ /* check that hashword2 and hashword produce the same results */
+ len = 0xdeadbeef;
+ i=47, j=0;
+ hashword2(&len, 1, &i, &j);
+ if (hashword(&len, 1, 47) != i)
+ printf("hashword2 and hashword mismatch %x %x\n",
+ i, hashword(&len, 1, 47));
+
+ /* check hashlittle doesn't read before or after the ends of the string */
+ for (h=0, b=buf+1; h<8; ++h, ++b)
+ {
+ for (i=0; i<MAXLEN; ++i)
+ {
+ len = i;
+ for (j=0; j<i; ++j) *(b+j)=0;
+
+ /* these should all be equal */
+ ref = hashlittle(b, len, (uint32_t)1);
+ *(b+i)=(uint8_t)~0;
+ *(b-1)=(uint8_t)~0;
+ x = hashlittle(b, len, (uint32_t)1);
+ y = hashlittle(b, len, (uint32_t)1);
+ if ((ref != x) || (ref != y))
+ {
+ printf("alignment error: %.8x %.8x %.8x %d %d\n",ref,x,y,
+ h, i);
+ }
+ }
+ }
+}
+
+/* check for problems with nulls */
+ void driver4()
+{
+ uint8_t buf[1];
+ uint32_t h,i,state[HASHSTATE];
+
+ buf[0] = ~0;
+ for (i=0; i<HASHSTATE; ++i) state[i] = 1;
+ printf("These should all be different\n");
+ for (i=0, h=0; i<8; ++i)
+ {
+ h = hashlittle(buf, 0, h);
+ printf("%2ld 0-byte strings, hash is %.8x\n", i, h);
+ }
+}
+
+void driver5()
+{
+ uint32_t b,c;
+ b=0, c=0, hashlittle2("", 0, &c, &b);
+ printf("hash is %.8lx %.8lx\n", c, b); /* deadbeef deadbeef */
+ b=0xdeadbeef, c=0, hashlittle2("", 0, &c, &b);
+ printf("hash is %.8lx %.8lx\n", c, b); /* bd5b7dde deadbeef */
+ b=0xdeadbeef, c=0xdeadbeef, hashlittle2("", 0, &c, &b);
+ printf("hash is %.8lx %.8lx\n", c, b); /* 9c093ccd bd5b7dde */
+ b=0, c=0, hashlittle2("Four score and seven years ago", 30, &c, &b);
+ printf("hash is %.8lx %.8lx\n", c, b); /* 17770551 ce7226e6 */
+ b=1, c=0, hashlittle2("Four score and seven years ago", 30, &c, &b);
+ printf("hash is %.8lx %.8lx\n", c, b); /* e3607cae bd371de4 */
+ b=0, c=1, hashlittle2("Four score and seven years ago", 30, &c, &b);
+ printf("hash is %.8lx %.8lx\n", c, b); /* cd628161 6cbea4b3 */
+ c = hashlittle("Four score and seven years ago", 30, 0);
+ printf("hash is %.8lx\n", c); /* 17770551 */
+ c = hashlittle("Four score and seven years ago", 30, 1);
+ printf("hash is %.8lx\n", c); /* cd628161 */
+}
+
+int main()
+{
+ driver1(); /* test that the key is hashed: used for timings */
+ driver2(); /* test that whole key is hashed thoroughly */
+ driver3(); /* test that nothing but the key is hashed */
+ driver4(); /* test hashing multiple buffers (all buffers are null) */
+ driver5(); /* test the hash against known vectors */
+ return 1;
+}
+
+#endif /* SELF_TEST */
diff --git a/src/libsystemd/sd-journal/lookup3.h b/src/libsystemd/sd-journal/lookup3.h
new file mode 100644
index 0000000..04e493e
--- /dev/null
+++ b/src/libsystemd/sd-journal/lookup3.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: LicenseRef-lookup3-public-domain */
+#pragma once
+
+#include <inttypes.h>
+#include <sys/types.h>
+
+#include "macro.h"
+
+uint32_t jenkins_hashword(const uint32_t *k, size_t length, uint32_t initval) _pure_;
+void jenkins_hashword2(const uint32_t *k, size_t length, uint32_t *pc, uint32_t *pb);
+
+uint32_t jenkins_hashlittle(const void *key, size_t length, uint32_t initval) _pure_;
+void jenkins_hashlittle2(const void *key, size_t length, uint32_t *pc, uint32_t *pb);
+
+uint32_t jenkins_hashbig(const void *key, size_t length, uint32_t initval) _pure_;
+
+static inline uint64_t jenkins_hash64(const void *data, size_t length) {
+ uint32_t a = 0, b = 0;
+
+ jenkins_hashlittle2(data, length, &a, &b);
+
+ return ((uint64_t) a << 32ULL) | (uint64_t) b;
+}
diff --git a/src/libsystemd/sd-journal/mmap-cache.c b/src/libsystemd/sd-journal/mmap-cache.c
new file mode 100644
index 0000000..973ade6
--- /dev/null
+++ b/src/libsystemd/sd-journal/mmap-cache.c
@@ -0,0 +1,562 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+
+#include "alloc-util.h"
+#include "errno-util.h"
+#include "fd-util.h"
+#include "hashmap.h"
+#include "list.h"
+#include "log.h"
+#include "macro.h"
+#include "memory-util.h"
+#include "mmap-cache.h"
+#include "sigbus.h"
+
+typedef struct Window Window;
+
+typedef enum WindowFlags {
+ WINDOW_KEEP_ALWAYS = 1u << (_MMAP_CACHE_CATEGORY_MAX + 0),
+ WINDOW_IN_UNUSED = 1u << (_MMAP_CACHE_CATEGORY_MAX + 1),
+ WINDOW_INVALIDATED = 1u << (_MMAP_CACHE_CATEGORY_MAX + 2),
+
+ _WINDOW_USED_MASK = WINDOW_IN_UNUSED - 1, /* The mask contains all bits that indicate the windows
+ * is currently in use. Covers the all the object types
+ * and the additional WINDOW_KEEP_ALWAYS flag. */
+} WindowFlags;
+
+#define WINDOW_IS_UNUSED(w) (((w)->flags & _WINDOW_USED_MASK) == 0)
+
+struct Window {
+ MMapFileDescriptor *fd;
+
+ WindowFlags flags;
+
+ void *ptr;
+ uint64_t offset;
+ size_t size;
+
+ LIST_FIELDS(Window, windows);
+ LIST_FIELDS(Window, unused);
+};
+
+struct MMapFileDescriptor {
+ MMapCache *cache;
+
+ int fd;
+ int prot;
+ bool sigbus;
+
+ LIST_HEAD(Window, windows);
+};
+
+struct MMapCache {
+ unsigned n_ref;
+ unsigned n_windows;
+
+ unsigned n_category_cache_hit;
+ unsigned n_window_list_hit;
+ unsigned n_missed;
+
+ Hashmap *fds;
+
+ LIST_HEAD(Window, unused);
+ Window *last_unused;
+
+ Window *windows_by_category[_MMAP_CACHE_CATEGORY_MAX];
+};
+
+#define WINDOWS_MIN 64
+
+#if ENABLE_DEBUG_MMAP_CACHE
+/* Tiny windows increase mmap activity and the chance of exposing unsafe use. */
+# define WINDOW_SIZE (page_size())
+#else
+# define WINDOW_SIZE ((size_t) (UINT64_C(8) * UINT64_C(1024) * UINT64_C(1024)))
+#endif
+
+MMapCache* mmap_cache_new(void) {
+ MMapCache *m;
+
+ m = new(MMapCache, 1);
+ if (!m)
+ return NULL;
+
+ *m = (MMapCache) {
+ .n_ref = 1,
+ };
+
+ return m;
+}
+
+static Window* window_unlink(Window *w) {
+ assert(w);
+
+ MMapCache *m = mmap_cache_fd_cache(w->fd);
+
+ if (w->ptr)
+ munmap(w->ptr, w->size);
+
+ if (FLAGS_SET(w->flags, WINDOW_IN_UNUSED)) {
+ if (m->last_unused == w)
+ m->last_unused = w->unused_prev;
+ LIST_REMOVE(unused, m->unused, w);
+ }
+
+ for (unsigned i = 0; i < _MMAP_CACHE_CATEGORY_MAX; i++)
+ if (FLAGS_SET(w->flags, 1u << i))
+ assert_se(TAKE_PTR(m->windows_by_category[i]) == w);
+
+ return LIST_REMOVE(windows, w->fd->windows, w);
+}
+
+static void window_invalidate(Window *w) {
+ assert(w);
+ assert(w->fd);
+
+ if (FLAGS_SET(w->flags, WINDOW_INVALIDATED))
+ return;
+
+ /* Replace the window with anonymous pages. This is useful when we hit a SIGBUS and want to make sure
+ * the file cannot trigger any further SIGBUS, possibly overrunning the sigbus queue. */
+
+ assert_se(mmap(w->ptr, w->size, w->fd->prot, MAP_PRIVATE|MAP_ANONYMOUS|MAP_FIXED, -1, 0) == w->ptr);
+ w->flags |= WINDOW_INVALIDATED;
+}
+
+static Window* window_free(Window *w) {
+ if (!w)
+ return NULL;
+
+ window_unlink(w);
+ w->fd->cache->n_windows--;
+
+ return mfree(w);
+}
+
+static bool window_matches(Window *w, MMapFileDescriptor *f, uint64_t offset, size_t size) {
+ assert(size > 0);
+
+ return
+ w &&
+ f == w->fd &&
+ offset >= w->offset &&
+ offset + size <= w->offset + w->size;
+}
+
+static bool window_matches_by_addr(Window *w, MMapFileDescriptor *f, void *addr, size_t size) {
+ assert(size > 0);
+
+ return
+ w &&
+ f == w->fd &&
+ (uint8_t*) addr >= (uint8_t*) w->ptr &&
+ (uint8_t*) addr + size <= (uint8_t*) w->ptr + w->size;
+}
+
+static Window* window_add(MMapFileDescriptor *f, uint64_t offset, size_t size, void *ptr) {
+ MMapCache *m = mmap_cache_fd_cache(f);
+ Window *w;
+
+ if (!m->last_unused || m->n_windows <= WINDOWS_MIN) {
+ /* Allocate a new window */
+ w = new(Window, 1);
+ if (!w)
+ return NULL;
+ m->n_windows++;
+ } else
+ /* Reuse an existing one */
+ w = window_unlink(m->last_unused);
+
+ *w = (Window) {
+ .fd = f,
+ .offset = offset,
+ .size = size,
+ .ptr = ptr,
+ };
+
+ return LIST_PREPEND(windows, f->windows, w);
+}
+
+static void category_detach_window(MMapCache *m, MMapCacheCategory c) {
+ Window *w;
+
+ assert(m);
+ assert(c >= 0 && c < _MMAP_CACHE_CATEGORY_MAX);
+
+ w = TAKE_PTR(m->windows_by_category[c]);
+ if (!w)
+ return; /* Nothing attached. */
+
+ assert(FLAGS_SET(w->flags, 1u << c));
+ w->flags &= ~(1u << c);
+
+ if (WINDOW_IS_UNUSED(w)) {
+ /* Not used anymore? */
+#if ENABLE_DEBUG_MMAP_CACHE
+ /* Unmap unused windows immediately to expose use-after-unmap by SIGSEGV. */
+ window_free(w);
+#else
+ LIST_PREPEND(unused, m->unused, w);
+ if (!m->last_unused)
+ m->last_unused = w;
+ w->flags |= WINDOW_IN_UNUSED;
+#endif
+ }
+}
+
+static void category_attach_window(MMapCache *m, MMapCacheCategory c, Window *w) {
+ assert(m);
+ assert(c >= 0 && c < _MMAP_CACHE_CATEGORY_MAX);
+ assert(w);
+
+ if (m->windows_by_category[c] == w)
+ return; /* Already attached. */
+
+ category_detach_window(m, c);
+
+ if (FLAGS_SET(w->flags, WINDOW_IN_UNUSED)) {
+ /* Used again? */
+ if (m->last_unused == w)
+ m->last_unused = w->unused_prev;
+ LIST_REMOVE(unused, m->unused, w);
+ w->flags &= ~WINDOW_IN_UNUSED;
+ }
+
+ m->windows_by_category[c] = w;
+ w->flags |= (1u << c);
+}
+
+static MMapCache* mmap_cache_free(MMapCache *m) {
+ if (!m)
+ return NULL;
+
+ /* All windows are owned by fds, and each fd takes a reference of MMapCache. So, when this is called,
+ * all fds are already freed, and hence there is no window. */
+
+ assert(hashmap_isempty(m->fds));
+ hashmap_free(m->fds);
+
+ assert(!m->unused);
+ assert(m->n_windows == 0);
+
+ return mfree(m);
+}
+
+DEFINE_TRIVIAL_REF_UNREF_FUNC(MMapCache, mmap_cache, mmap_cache_free);
+
+static int mmap_try_harder(MMapFileDescriptor *f, void *addr, int flags, uint64_t offset, size_t size, void **ret) {
+ MMapCache *m = mmap_cache_fd_cache(f);
+
+ assert(ret);
+
+ for (;;) {
+ void *ptr;
+
+ ptr = mmap(addr, size, f->prot, flags, f->fd, offset);
+ if (ptr != MAP_FAILED) {
+ *ret = ptr;
+ return 0;
+ }
+ if (errno != ENOMEM)
+ return negative_errno();
+
+ /* When failed with ENOMEM, try again after making a room by freeing an unused window. */
+
+ if (!m->last_unused)
+ return -ENOMEM; /* no free window, propagate the original error. */
+
+ window_free(m->last_unused);
+ }
+}
+
+static int add_mmap(
+ MMapFileDescriptor *f,
+ uint64_t offset,
+ size_t size,
+ struct stat *st,
+ Window **ret) {
+
+ Window *w;
+ void *d;
+ int r;
+
+ assert(f);
+ assert(size > 0);
+ assert(ret);
+
+ /* overflow check */
+ if (size > SIZE_MAX - PAGE_OFFSET_U64(offset))
+ return -EADDRNOTAVAIL;
+
+ size = PAGE_ALIGN(size + PAGE_OFFSET_U64(offset));
+ offset = PAGE_ALIGN_DOWN_U64(offset);
+
+ if (size < WINDOW_SIZE) {
+ uint64_t delta;
+
+ delta = PAGE_ALIGN((WINDOW_SIZE - size) / 2);
+ offset = LESS_BY(offset, delta);
+ size = WINDOW_SIZE;
+ }
+
+ if (st) {
+ /* Memory maps that are larger then the files underneath have undefined behavior. Hence,
+ * clamp things to the file size if we know it */
+
+ if (offset >= (uint64_t) st->st_size)
+ return -EADDRNOTAVAIL;
+
+ if (size > (uint64_t) st->st_size - offset)
+ size = PAGE_ALIGN((uint64_t) st->st_size - offset);
+ }
+
+ if (size >= SIZE_MAX)
+ return -EADDRNOTAVAIL;
+
+ r = mmap_try_harder(f, NULL, MAP_SHARED, offset, size, &d);
+ if (r < 0)
+ return r;
+
+ w = window_add(f, offset, size, d);
+ if (!w) {
+ (void) munmap(d, size);
+ return -ENOMEM;
+ }
+
+ *ret = w;
+ return 0;
+}
+
+int mmap_cache_fd_get(
+ MMapFileDescriptor *f,
+ MMapCacheCategory c,
+ bool keep_always,
+ uint64_t offset,
+ size_t size,
+ struct stat *st,
+ void **ret) {
+
+ MMapCache *m = mmap_cache_fd_cache(f);
+ Window *w;
+ int r;
+
+ assert(size > 0);
+ assert(c >= 0 && c < _MMAP_CACHE_CATEGORY_MAX);
+ assert(ret);
+
+ if (f->sigbus)
+ return -EIO;
+
+ /* Check whether the current category is the right one already */
+ if (window_matches(m->windows_by_category[c], f, offset, size)) {
+ m->n_category_cache_hit++;
+ w = m->windows_by_category[c];
+ goto found;
+ }
+
+ /* Drop the reference to the window, since it's unnecessary now */
+ category_detach_window(m, c);
+
+ /* Search for a matching mmap */
+ LIST_FOREACH(windows, i, f->windows)
+ if (window_matches(i, f, offset, size)) {
+ m->n_window_list_hit++;
+ w = i;
+ goto found;
+ }
+
+ m->n_missed++;
+
+ /* Create a new mmap */
+ r = add_mmap(f, offset, size, st, &w);
+ if (r < 0)
+ return r;
+
+found:
+ if (keep_always)
+ w->flags |= WINDOW_KEEP_ALWAYS;
+
+ category_attach_window(m, c, w);
+ *ret = (uint8_t*) w->ptr + (offset - w->offset);
+ return 0;
+}
+
+int mmap_cache_fd_pin(
+ MMapFileDescriptor *f,
+ MMapCacheCategory c,
+ void *addr,
+ size_t size) {
+
+ MMapCache *m = mmap_cache_fd_cache(f);
+ Window *w;
+
+ assert(addr);
+ assert(c >= 0 && c < _MMAP_CACHE_CATEGORY_MAX);
+ assert(size > 0);
+
+ if (f->sigbus)
+ return -EIO;
+
+ /* Check if the current category is the right one. */
+ if (window_matches_by_addr(m->windows_by_category[c], f, addr, size)) {
+ m->n_category_cache_hit++;
+ w = m->windows_by_category[c];
+ goto found;
+ }
+
+ /* Search for a matching mmap. */
+ LIST_FOREACH(windows, i, f->windows)
+ if (window_matches_by_addr(i, f, addr, size)) {
+ m->n_window_list_hit++;
+ w = i;
+ goto found;
+ }
+
+ m->n_missed++;
+ return -EADDRNOTAVAIL; /* Not found. */
+
+found:
+ if (FLAGS_SET(w->flags, WINDOW_KEEP_ALWAYS))
+ return 0; /* The window will never unmapped. */
+
+ /* Attach the window to the 'pinning' category. */
+ category_attach_window(m, MMAP_CACHE_CATEGORY_PIN, w);
+ return 1;
+}
+
+void mmap_cache_stats_log_debug(MMapCache *m) {
+ assert(m);
+
+ log_debug("mmap cache statistics: %u category cache hit, %u window list hit, %u miss",
+ m->n_category_cache_hit, m->n_window_list_hit, m->n_missed);
+}
+
+static void mmap_cache_process_sigbus(MMapCache *m) {
+ bool found = false;
+ MMapFileDescriptor *f;
+ int r;
+
+ assert(m);
+
+ /* Iterate through all triggered pages and mark their files as invalidated. */
+ for (;;) {
+ bool ours;
+ void *addr;
+
+ r = sigbus_pop(&addr);
+ if (_likely_(r == 0))
+ break;
+ if (r < 0) {
+ log_error_errno(r, "SIGBUS handling failed: %m");
+ abort();
+ }
+
+ ours = false;
+ HASHMAP_FOREACH(f, m->fds) {
+ LIST_FOREACH(windows, w, f->windows)
+ if (window_matches_by_addr(w, f, addr, 1)) {
+ found = ours = f->sigbus = true;
+ break;
+ }
+
+ if (ours)
+ break;
+ }
+
+ /* Didn't find a matching window, give up. */
+ if (!ours) {
+ log_error("Unknown SIGBUS page, aborting.");
+ abort();
+ }
+ }
+
+ /* The list of triggered pages is now empty. Now, let's remap all windows of the triggered file to
+ * anonymous maps, so that no page of the file in question is triggered again, so that we can be sure
+ * not to hit the queue size limit. */
+ if (_likely_(!found))
+ return;
+
+ HASHMAP_FOREACH(f, m->fds) {
+ if (!f->sigbus)
+ continue;
+
+ LIST_FOREACH(windows, w, f->windows)
+ window_invalidate(w);
+ }
+}
+
+bool mmap_cache_fd_got_sigbus(MMapFileDescriptor *f) {
+ assert(f);
+
+ mmap_cache_process_sigbus(f->cache);
+
+ return f->sigbus;
+}
+
+int mmap_cache_add_fd(MMapCache *m, int fd, int prot, MMapFileDescriptor **ret) {
+ _cleanup_free_ MMapFileDescriptor *f = NULL;
+ MMapFileDescriptor *existing;
+ int r;
+
+ assert(m);
+ assert(fd >= 0);
+
+ existing = hashmap_get(m->fds, FD_TO_PTR(fd));
+ if (existing) {
+ if (existing->prot != prot)
+ return -EEXIST;
+ if (ret)
+ *ret = existing;
+ return 0;
+ }
+
+ f = new(MMapFileDescriptor, 1);
+ if (!f)
+ return -ENOMEM;
+
+ *f = (MMapFileDescriptor) {
+ .fd = fd,
+ .prot = prot,
+ };
+
+ r = hashmap_ensure_put(&m->fds, NULL, FD_TO_PTR(fd), f);
+ if (r < 0)
+ return r;
+ assert(r > 0);
+
+ f->cache = mmap_cache_ref(m);
+
+ if (ret)
+ *ret = f;
+
+ TAKE_PTR(f);
+ return 1;
+}
+
+MMapFileDescriptor* mmap_cache_fd_free(MMapFileDescriptor *f) {
+ if (!f)
+ return NULL;
+
+ /* Make sure that any queued SIGBUS are first dispatched, so that we don't end up with a SIGBUS entry
+ * we cannot relate to any existing memory map. */
+
+ mmap_cache_process_sigbus(f->cache);
+
+ while (f->windows)
+ window_free(f->windows);
+
+ assert_se(hashmap_remove(f->cache->fds, FD_TO_PTR(f->fd)) == f);
+
+ /* Unref the cache at the end. Otherwise, the assertions in mmap_cache_free() may be triggered. */
+ f->cache = mmap_cache_unref(f->cache);
+
+ return mfree(f);
+}
+
+MMapCache* mmap_cache_fd_cache(MMapFileDescriptor *f) {
+ assert(f);
+ return ASSERT_PTR(f->cache);
+}
diff --git a/src/libsystemd/sd-journal/mmap-cache.h b/src/libsystemd/sd-journal/mmap-cache.h
new file mode 100644
index 0000000..1fbc236
--- /dev/null
+++ b/src/libsystemd/sd-journal/mmap-cache.h
@@ -0,0 +1,60 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <errno.h>
+#include <stdbool.h>
+#include <sys/stat.h>
+
+#include "journal-def.h"
+
+typedef struct MMapCache MMapCache;
+typedef struct MMapFileDescriptor MMapFileDescriptor;
+
+typedef enum MMapCacheCategory {
+ MMAP_CACHE_CATEGORY_ANY = OBJECT_UNUSED,
+ MMAP_CACHE_CATEGORY_DATA = OBJECT_DATA,
+ MMAP_CACHE_CATEGORY_FIELD = OBJECT_FIELD,
+ MMAP_CACHE_CATEGORY_ENTRY = OBJECT_ENTRY,
+ MMAP_CACHE_CATEGORY_DATA_HASH_TABLE = OBJECT_DATA_HASH_TABLE,
+ MMAP_CACHE_CATEGORY_FIELD_HASH_TABLE = OBJECT_FIELD_HASH_TABLE,
+ MMAP_CACHE_CATEGORY_ENTRY_ARRAY = OBJECT_ENTRY_ARRAY,
+ MMAP_CACHE_CATEGORY_TAG = OBJECT_TAG,
+ MMAP_CACHE_CATEGORY_HEADER, /* for reading file header */
+ MMAP_CACHE_CATEGORY_PIN, /* for temporary pinning a object */
+ _MMAP_CACHE_CATEGORY_MAX,
+ _MMAP_CACHE_CATEGORY_INVALID = -EINVAL,
+} MMapCacheCategory;
+
+assert_cc((int) _OBJECT_TYPE_MAX < (int) _MMAP_CACHE_CATEGORY_MAX);
+
+static inline MMapCacheCategory type_to_category(ObjectType type) {
+ return type >= 0 && type < _OBJECT_TYPE_MAX ? (MMapCacheCategory) type : MMAP_CACHE_CATEGORY_ANY;
+}
+
+MMapCache* mmap_cache_new(void);
+MMapCache* mmap_cache_ref(MMapCache *m);
+MMapCache* mmap_cache_unref(MMapCache *m);
+DEFINE_TRIVIAL_CLEANUP_FUNC(MMapCache*, mmap_cache_unref);
+
+int mmap_cache_fd_get(
+ MMapFileDescriptor *f,
+ MMapCacheCategory c,
+ bool keep_always,
+ uint64_t offset,
+ size_t size,
+ struct stat *st,
+ void **ret);
+
+int mmap_cache_fd_pin(
+ MMapFileDescriptor *f,
+ MMapCacheCategory c,
+ void *addr,
+ size_t size);
+
+int mmap_cache_add_fd(MMapCache *m, int fd, int prot, MMapFileDescriptor **ret);
+MMapCache* mmap_cache_fd_cache(MMapFileDescriptor *f);
+MMapFileDescriptor* mmap_cache_fd_free(MMapFileDescriptor *f);
+
+void mmap_cache_stats_log_debug(MMapCache *m);
+
+bool mmap_cache_fd_got_sigbus(MMapFileDescriptor *f);
diff --git a/src/libsystemd/sd-journal/sd-journal.c b/src/libsystemd/sd-journal/sd-journal.c
new file mode 100644
index 0000000..6b9ff0a
--- /dev/null
+++ b/src/libsystemd/sd-journal/sd-journal.c
@@ -0,0 +1,3528 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <linux/magic.h>
+#include <poll.h>
+#include <stddef.h>
+#include <sys/inotify.h>
+#include <sys/vfs.h>
+#include <unistd.h>
+
+#include "sd-journal.h"
+
+#include "alloc-util.h"
+#include "catalog.h"
+#include "compress.h"
+#include "dirent-util.h"
+#include "env-file.h"
+#include "escape.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "format-util.h"
+#include "fs-util.h"
+#include "hashmap.h"
+#include "hostname-util.h"
+#include "id128-util.h"
+#include "inotify-util.h"
+#include "io-util.h"
+#include "journal-def.h"
+#include "journal-file.h"
+#include "journal-internal.h"
+#include "list.h"
+#include "lookup3.h"
+#include "nulstr-util.h"
+#include "origin-id.h"
+#include "path-util.h"
+#include "prioq.h"
+#include "process-util.h"
+#include "replace-var.h"
+#include "stat-util.h"
+#include "stdio-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "syslog-util.h"
+#include "uid-alloc-range.h"
+
+#define JOURNAL_FILES_RECHECK_USEC (2 * USEC_PER_SEC)
+
+/* The maximum size of variable values we'll expand in catalog entries. We bind this to PATH_MAX for now, as
+ * we want to be able to show all officially valid paths at least */
+#define REPLACE_VAR_MAX PATH_MAX
+
+#define DEFAULT_DATA_THRESHOLD (64*1024)
+
+DEFINE_PRIVATE_ORIGIN_ID_HELPERS(sd_journal, journal);
+
+static void remove_file_real(sd_journal *j, JournalFile *f);
+static int journal_file_read_tail_timestamp(sd_journal *j, JournalFile *f);
+static void journal_file_unlink_newest_by_boot_id(sd_journal *j, JournalFile *f);
+
+static int journal_put_error(sd_journal *j, int r, const char *path) {
+ _cleanup_free_ char *copy = NULL;
+ int k;
+
+ /* Memorize an error we encountered, and store which
+ * file/directory it was generated from. Note that we store
+ * only *one* path per error code, as the error code is the
+ * key into the hashmap, and the path is the value. This means
+ * we keep track only of all error kinds, but not of all error
+ * locations. This has the benefit that the hashmap cannot
+ * grow beyond bounds.
+ *
+ * We return an error here only if we didn't manage to
+ * memorize the real error. */
+
+ if (r >= 0)
+ return r;
+
+ if (path) {
+ copy = strdup(path);
+ if (!copy)
+ return -ENOMEM;
+ }
+
+ k = hashmap_ensure_put(&j->errors, NULL, INT_TO_PTR(r), copy);
+ if (k < 0) {
+ if (k == -EEXIST)
+ return 0;
+
+ return k;
+ }
+
+ TAKE_PTR(copy);
+ return 0;
+}
+
+static void detach_location(sd_journal *j) {
+ JournalFile *f;
+
+ assert(j);
+
+ j->current_file = NULL;
+ j->current_field = 0;
+
+ ORDERED_HASHMAP_FOREACH(f, j->files)
+ journal_file_reset_location(f);
+}
+
+static void init_location(Location *l, LocationType type, JournalFile *f, Object *o) {
+ assert(l);
+ assert(IN_SET(type, LOCATION_DISCRETE, LOCATION_SEEK));
+ assert(f);
+
+ *l = (Location) {
+ .type = type,
+ .seqnum = le64toh(o->entry.seqnum),
+ .seqnum_id = f->header->seqnum_id,
+ .realtime = le64toh(o->entry.realtime),
+ .monotonic = le64toh(o->entry.monotonic),
+ .boot_id = o->entry.boot_id,
+ .xor_hash = le64toh(o->entry.xor_hash),
+ .seqnum_set = true,
+ .realtime_set = true,
+ .monotonic_set = true,
+ .xor_hash_set = true,
+ };
+}
+
+static void set_location(sd_journal *j, JournalFile *f, Object *o) {
+ assert(j);
+ assert(f);
+ assert(o);
+
+ init_location(&j->current_location, LOCATION_DISCRETE, f, o);
+
+ j->current_file = f;
+ j->current_field = 0;
+
+ /* Let f know its candidate entry was picked. */
+ assert(f->location_type == LOCATION_SEEK);
+ f->location_type = LOCATION_DISCRETE;
+}
+
+static int match_is_valid(const void *data, size_t size) {
+ const char *b = ASSERT_PTR(data);
+
+ if (size < 2)
+ return false;
+
+ if (((char*) data)[0] == '_' && ((char*) data)[1] == '_')
+ return false;
+
+ for (const char *p = b; p < b + size; p++) {
+
+ if (*p == '=')
+ return p > b;
+
+ if (*p == '_')
+ continue;
+
+ if (*p >= 'A' && *p <= 'Z')
+ continue;
+
+ if (ascii_isdigit(*p))
+ continue;
+
+ return false;
+ }
+
+ return false;
+}
+
+static bool same_field(const void *_a, size_t s, const void *_b, size_t t) {
+ const uint8_t *a = _a, *b = _b;
+
+ for (size_t j = 0; j < s && j < t; j++) {
+
+ if (a[j] != b[j])
+ return false;
+
+ if (a[j] == '=')
+ return true;
+ }
+
+ assert_not_reached();
+}
+
+static Match *match_new(Match *p, MatchType t) {
+ Match *m;
+
+ m = new(Match, 1);
+ if (!m)
+ return NULL;
+
+ *m = (Match) {
+ .type = t,
+ .parent = p,
+ };
+
+ if (p)
+ LIST_PREPEND(matches, p->matches, m);
+
+ return m;
+}
+
+static Match *match_free(Match *m) {
+ assert(m);
+
+ while (m->matches)
+ match_free(m->matches);
+
+ if (m->parent)
+ LIST_REMOVE(matches, m->parent->matches, m);
+
+ free(m->data);
+ return mfree(m);
+}
+
+static Match *match_free_if_empty(Match *m) {
+ if (!m || m->matches)
+ return m;
+
+ return match_free(m);
+}
+
+_public_ int sd_journal_add_match(sd_journal *j, const void *data, size_t size) {
+ Match *add_here = NULL, *m = NULL;
+ uint64_t hash;
+
+ assert_return(j, -EINVAL);
+ assert_return(!journal_origin_changed(j), -ECHILD);
+ assert_return(data, -EINVAL);
+
+ if (size == 0)
+ size = strlen(data);
+
+ if (!match_is_valid(data, size))
+ return -EINVAL;
+
+ /* level 0: AND term
+ * level 1: OR terms
+ * level 2: AND terms
+ * level 3: OR terms
+ * level 4: concrete matches */
+
+ if (!j->level0) {
+ j->level0 = match_new(NULL, MATCH_AND_TERM);
+ if (!j->level0)
+ return -ENOMEM;
+ }
+
+ if (!j->level1) {
+ j->level1 = match_new(j->level0, MATCH_OR_TERM);
+ if (!j->level1)
+ return -ENOMEM;
+ }
+
+ if (!j->level2) {
+ j->level2 = match_new(j->level1, MATCH_AND_TERM);
+ if (!j->level2)
+ return -ENOMEM;
+ }
+
+ assert(j->level0->type == MATCH_AND_TERM);
+ assert(j->level1->type == MATCH_OR_TERM);
+ assert(j->level2->type == MATCH_AND_TERM);
+
+ /* Old-style Jenkins (unkeyed) hashing only here. We do not cover new-style siphash (keyed) hashing
+ * here, since it's different for each file, and thus can't be pre-calculated in the Match object. */
+ hash = jenkins_hash64(data, size);
+
+ LIST_FOREACH(matches, l3, j->level2->matches) {
+ assert(l3->type == MATCH_OR_TERM);
+
+ LIST_FOREACH(matches, l4, l3->matches) {
+ assert(l4->type == MATCH_DISCRETE);
+
+ /* Exactly the same match already? Then ignore
+ * this addition */
+ if (l4->hash == hash &&
+ l4->size == size &&
+ memcmp(l4->data, data, size) == 0)
+ return 0;
+
+ /* Same field? Then let's add this to this OR term */
+ if (same_field(data, size, l4->data, l4->size)) {
+ add_here = l3;
+ break;
+ }
+ }
+
+ if (add_here)
+ break;
+ }
+
+ if (!add_here) {
+ add_here = match_new(j->level2, MATCH_OR_TERM);
+ if (!add_here)
+ goto fail;
+ }
+
+ m = match_new(add_here, MATCH_DISCRETE);
+ if (!m)
+ goto fail;
+
+ m->hash = hash;
+ m->size = size;
+ m->data = memdup(data, size);
+ if (!m->data)
+ goto fail;
+
+ detach_location(j);
+
+ return 0;
+
+fail:
+ match_free(m);
+ match_free_if_empty(add_here);
+ j->level2 = match_free_if_empty(j->level2);
+ j->level1 = match_free_if_empty(j->level1);
+ j->level0 = match_free_if_empty(j->level0);
+
+ return -ENOMEM;
+}
+
+_public_ int sd_journal_add_conjunction(sd_journal *j) {
+ assert_return(j, -EINVAL);
+ assert_return(!journal_origin_changed(j), -ECHILD);
+
+ if (!j->level0)
+ return 0;
+
+ if (!j->level1)
+ return 0;
+
+ if (!j->level1->matches)
+ return 0;
+
+ j->level1 = NULL;
+ j->level2 = NULL;
+
+ return 0;
+}
+
+_public_ int sd_journal_add_disjunction(sd_journal *j) {
+ assert_return(j, -EINVAL);
+ assert_return(!journal_origin_changed(j), -ECHILD);
+
+ if (!j->level0)
+ return 0;
+
+ if (!j->level1)
+ return 0;
+
+ if (!j->level2)
+ return 0;
+
+ if (!j->level2->matches)
+ return 0;
+
+ j->level2 = NULL;
+ return 0;
+}
+
+static char *match_make_string(Match *m) {
+ _cleanup_free_ char *p = NULL;
+ bool enclose = false;
+
+ if (!m)
+ return strdup("none");
+
+ if (m->type == MATCH_DISCRETE)
+ return cescape_length(m->data, m->size);
+
+ LIST_FOREACH(matches, i, m->matches) {
+ _cleanup_free_ char *t = NULL;
+
+ t = match_make_string(i);
+ if (!t)
+ return NULL;
+
+ if (p) {
+ if (!strextend(&p, m->type == MATCH_OR_TERM ? " OR " : " AND ", t))
+ return NULL;
+
+ enclose = true;
+ } else
+ p = TAKE_PTR(t);
+ }
+
+ if (enclose)
+ return strjoin("(", p, ")");
+
+ return TAKE_PTR(p);
+}
+
+char *journal_make_match_string(sd_journal *j) {
+ assert(j);
+
+ return match_make_string(j->level0);
+}
+
+_public_ void sd_journal_flush_matches(sd_journal *j) {
+ if (!j || journal_origin_changed(j))
+ return;
+
+ if (j->level0)
+ match_free(j->level0);
+
+ j->level0 = j->level1 = j->level2 = NULL;
+
+ detach_location(j);
+}
+
+static int journal_file_find_newest_for_boot_id(
+ sd_journal *j,
+ sd_id128_t id,
+ JournalFile **ret) {
+
+ JournalFile *prev = NULL;
+ int r;
+
+ assert(j);
+ assert(ret);
+
+ /* Before we use it, let's refresh the timestamp from the header, and reshuffle our prioq
+ * accordingly. We do this only a bunch of times, to not be caught in some update loop. */
+ for (unsigned n_tries = 0;; n_tries++) {
+ JournalFile *f;
+ Prioq *q;
+
+ q = hashmap_get(j->newest_by_boot_id, &id);
+ if (!q)
+ return log_debug_errno(SYNTHETIC_ERRNO(ENODATA),
+ "Requested delta for boot ID %s, but we have no information about that boot ID.", SD_ID128_TO_STRING(id));
+
+ assert_se(f = prioq_peek(q)); /* we delete hashmap entries once the prioq is empty, so this must hold */
+
+ if (f == prev || n_tries >= 5) {
+ /* This was already the best answer in the previous run, or we tried too often, use it */
+ *ret = f;
+ return 0;
+ }
+
+ prev = f;
+
+ /* Let's read the journal file's current timestamp once, before we return it, maybe it has changed. */
+ r = journal_file_read_tail_timestamp(j, f);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to read tail timestamp while trying to find newest journal file for boot ID %s.", SD_ID128_TO_STRING(id));
+
+ /* Refreshing the timestamp we read might have reshuffled the prioq, hence let's check the
+ * prioq again and only use the information once we reached an equilibrium or hit a limit */
+ }
+}
+
+static int compare_boot_ids(sd_journal *j, sd_id128_t a, sd_id128_t b) {
+ JournalFile *x, *y;
+
+ assert(j);
+
+ /* Try to find the newest open journal file for the two boot ids */
+ if (journal_file_find_newest_for_boot_id(j, a, &x) < 0 ||
+ journal_file_find_newest_for_boot_id(j, b, &y) < 0)
+ return 0;
+
+ /* Only compare the boot id timestamps if they originate from the same machine. If they are from
+ * different machines, then we timestamps of the boot ids might be as off as the timestamps on the
+ * entries and hence not useful for comparing. */
+ if (!sd_id128_equal(x->newest_machine_id, y->newest_machine_id))
+ return 0;
+
+ return CMP(x->newest_realtime_usec, y->newest_realtime_usec);
+}
+
+static int compare_with_location(
+ sd_journal *j,
+ const JournalFile *f,
+ const Location *l,
+ const JournalFile *current_file) {
+ int r;
+
+ assert(j);
+ assert(f);
+ assert(l);
+ assert(f->location_type == LOCATION_SEEK);
+ assert(IN_SET(l->type, LOCATION_DISCRETE, LOCATION_SEEK));
+
+ if (l->monotonic_set &&
+ sd_id128_equal(f->current_boot_id, l->boot_id) &&
+ l->realtime_set &&
+ f->current_realtime == l->realtime &&
+ l->xor_hash_set &&
+ f->current_xor_hash == l->xor_hash &&
+ l->seqnum_set &&
+ sd_id128_equal(f->header->seqnum_id, l->seqnum_id) &&
+ f->current_seqnum == l->seqnum &&
+ f != current_file)
+ return 0;
+
+ if (l->seqnum_set &&
+ sd_id128_equal(f->header->seqnum_id, l->seqnum_id)) {
+ r = CMP(f->current_seqnum, l->seqnum);
+ if (r != 0)
+ return r;
+ }
+
+ if (l->monotonic_set) {
+ /* If both arguments have the same boot ID, then we can compare the monotonic timestamps. If
+ * they are distinct, then we might able to lookup the timestamps of those boot IDs (if they
+ * are from the same machine) and order by that. */
+ if (sd_id128_equal(f->current_boot_id, l->boot_id))
+ r = CMP(f->current_monotonic, l->monotonic);
+ else
+ r = compare_boot_ids(j, f->current_boot_id, l->boot_id);
+ if (r != 0)
+ return r;
+ }
+
+ if (l->realtime_set) {
+ r = CMP(f->current_realtime, l->realtime);
+ if (r != 0)
+ return r;
+ }
+
+ if (l->xor_hash_set) {
+ r = CMP(f->current_xor_hash, l->xor_hash);
+ if (r != 0)
+ return r;
+ }
+
+ return 0;
+}
+
+static int next_for_match(
+ sd_journal *j,
+ Match *m,
+ JournalFile *f,
+ uint64_t after_offset,
+ direction_t direction,
+ Object **ret,
+ uint64_t *offset) {
+
+ int r;
+ uint64_t np = 0;
+
+ assert(j);
+ assert(m);
+ assert(f);
+
+ if (m->type == MATCH_DISCRETE) {
+ Object *d;
+ uint64_t hash;
+
+ /* If the keyed hash logic is used, we need to calculate the hash fresh per file. Otherwise
+ * we can use what we pre-calculated. */
+ if (JOURNAL_HEADER_KEYED_HASH(f->header))
+ hash = journal_file_hash_data(f, m->data, m->size);
+ else
+ hash = m->hash;
+
+ r = journal_file_find_data_object_with_hash(f, m->data, m->size, hash, &d, NULL);
+ if (r <= 0)
+ return r;
+
+ return journal_file_move_to_entry_by_offset_for_data(f, d, after_offset, direction, ret, offset);
+
+ } else if (m->type == MATCH_OR_TERM) {
+
+ /* Find the earliest match beyond after_offset */
+
+ LIST_FOREACH(matches, i, m->matches) {
+ uint64_t cp;
+
+ r = next_for_match(j, i, f, after_offset, direction, NULL, &cp);
+ if (r < 0)
+ return r;
+ else if (r > 0) {
+ if (np == 0 || (direction == DIRECTION_DOWN ? cp < np : cp > np))
+ np = cp;
+ }
+ }
+
+ if (np == 0)
+ return 0;
+
+ } else if (m->type == MATCH_AND_TERM) {
+ Match *last_moved;
+
+ /* Always jump to the next matching entry and repeat
+ * this until we find an offset that matches for all
+ * matches. */
+
+ if (!m->matches)
+ return 0;
+
+ r = next_for_match(j, m->matches, f, after_offset, direction, NULL, &np);
+ if (r <= 0)
+ return r;
+
+ assert(direction == DIRECTION_DOWN ? np >= after_offset : np <= after_offset);
+ last_moved = m->matches;
+
+ LIST_LOOP_BUT_ONE(matches, i, m->matches, last_moved) {
+ uint64_t cp;
+
+ r = next_for_match(j, i, f, np, direction, NULL, &cp);
+ if (r <= 0)
+ return r;
+
+ assert(direction == DIRECTION_DOWN ? cp >= np : cp <= np);
+ if (direction == DIRECTION_DOWN ? cp > np : cp < np) {
+ np = cp;
+ last_moved = i;
+ }
+ }
+ }
+
+ assert(np > 0);
+
+ if (ret) {
+ r = journal_file_move_to_object(f, OBJECT_ENTRY, np, ret);
+ if (r < 0)
+ return r;
+ }
+
+ if (offset)
+ *offset = np;
+
+ return 1;
+}
+
+static int find_location_for_match(
+ sd_journal *j,
+ Match *m,
+ JournalFile *f,
+ direction_t direction,
+ Object **ret,
+ uint64_t *offset) {
+
+ int r;
+
+ assert(j);
+ assert(m);
+ assert(f);
+
+ if (m->type == MATCH_DISCRETE) {
+ Object *d;
+ uint64_t dp, hash;
+
+ if (JOURNAL_HEADER_KEYED_HASH(f->header))
+ hash = journal_file_hash_data(f, m->data, m->size);
+ else
+ hash = m->hash;
+
+ r = journal_file_find_data_object_with_hash(f, m->data, m->size, hash, &d, &dp);
+ if (r <= 0)
+ return r;
+
+ /* FIXME: missing: find by monotonic */
+
+ if (j->current_location.type == LOCATION_HEAD)
+ return direction == DIRECTION_DOWN ? journal_file_move_to_entry_for_data(f, d, DIRECTION_DOWN, ret, offset) : 0;
+ if (j->current_location.type == LOCATION_TAIL)
+ return direction == DIRECTION_UP ? journal_file_move_to_entry_for_data(f, d, DIRECTION_UP, ret, offset) : 0;
+ if (j->current_location.seqnum_set && sd_id128_equal(j->current_location.seqnum_id, f->header->seqnum_id))
+ return journal_file_move_to_entry_by_seqnum_for_data(f, d, j->current_location.seqnum, direction, ret, offset);
+ if (j->current_location.monotonic_set) {
+ r = journal_file_move_to_entry_by_monotonic_for_data(f, d, j->current_location.boot_id, j->current_location.monotonic, direction, ret, offset);
+ if (r != 0)
+ return r;
+
+ /* The data object might have been invalidated. */
+ r = journal_file_move_to_object(f, OBJECT_DATA, dp, &d);
+ if (r < 0)
+ return r;
+ }
+ if (j->current_location.realtime_set)
+ return journal_file_move_to_entry_by_realtime_for_data(f, d, j->current_location.realtime, direction, ret, offset);
+
+ return journal_file_move_to_entry_for_data(f, d, direction, ret, offset);
+
+ } else if (m->type == MATCH_OR_TERM) {
+ uint64_t np = 0;
+
+ /* Find the earliest match */
+
+ LIST_FOREACH(matches, i, m->matches) {
+ uint64_t cp;
+
+ r = find_location_for_match(j, i, f, direction, NULL, &cp);
+ if (r < 0)
+ return r;
+ else if (r > 0) {
+ if (np == 0 || (direction == DIRECTION_DOWN ? np > cp : np < cp))
+ np = cp;
+ }
+ }
+
+ if (np == 0)
+ return 0;
+
+ if (ret) {
+ r = journal_file_move_to_object(f, OBJECT_ENTRY, np, ret);
+ if (r < 0)
+ return r;
+ }
+
+ if (offset)
+ *offset = np;
+
+ return 1;
+
+ } else {
+ uint64_t np = 0;
+
+ assert(m->type == MATCH_AND_TERM);
+
+ /* First jump to the last match, and then find the
+ * next one where all matches match */
+
+ if (!m->matches)
+ return 0;
+
+ LIST_FOREACH(matches, i, m->matches) {
+ uint64_t cp;
+
+ r = find_location_for_match(j, i, f, direction, NULL, &cp);
+ if (r <= 0)
+ return r;
+
+ if (np == 0 || (direction == DIRECTION_DOWN ? cp > np : cp < np))
+ np = cp;
+ }
+
+ return next_for_match(j, m, f, np, direction, ret, offset);
+ }
+}
+
+static int find_location_with_matches(
+ sd_journal *j,
+ JournalFile *f,
+ direction_t direction,
+ Object **ret,
+ uint64_t *offset) {
+
+ int r;
+
+ assert(j);
+ assert(f);
+ assert(ret);
+ assert(offset);
+
+ if (!j->level0) {
+ /* No matches is simple */
+
+ if (j->current_location.type == LOCATION_HEAD)
+ return direction == DIRECTION_DOWN ? journal_file_next_entry(f, 0, DIRECTION_DOWN, ret, offset) : 0;
+ if (j->current_location.type == LOCATION_TAIL)
+ return direction == DIRECTION_UP ? journal_file_next_entry(f, 0, DIRECTION_UP, ret, offset) : 0;
+ if (j->current_location.seqnum_set && sd_id128_equal(j->current_location.seqnum_id, f->header->seqnum_id))
+ return journal_file_move_to_entry_by_seqnum(f, j->current_location.seqnum, direction, ret, offset);
+ if (j->current_location.monotonic_set) {
+ r = journal_file_move_to_entry_by_monotonic(f, j->current_location.boot_id, j->current_location.monotonic, direction, ret, offset);
+ if (r != 0)
+ return r;
+ }
+ if (j->current_location.realtime_set)
+ return journal_file_move_to_entry_by_realtime(f, j->current_location.realtime, direction, ret, offset);
+
+ return journal_file_next_entry(f, 0, direction, ret, offset);
+ } else
+ return find_location_for_match(j, j->level0, f, direction, ret, offset);
+}
+
+static int next_with_matches(
+ sd_journal *j,
+ JournalFile *f,
+ direction_t direction,
+ Object **ret,
+ uint64_t *offset) {
+
+ assert(j);
+ assert(f);
+ assert(ret);
+ assert(offset);
+
+ /* No matches is easy. We simple advance the file
+ * pointer by one. */
+ if (!j->level0)
+ return journal_file_next_entry(f, f->current_offset, direction, ret, offset);
+
+ /* If we have a match then we look for the next matching entry
+ * with an offset at least one step larger */
+ return next_for_match(j, j->level0, f,
+ direction == DIRECTION_DOWN ? f->current_offset + 1
+ : f->current_offset - 1,
+ direction, ret, offset);
+}
+
+static int next_beyond_location(sd_journal *j, JournalFile *f, direction_t direction) {
+ Object *c;
+ uint64_t cp, n_entries;
+ int r;
+
+ assert(j);
+ assert(f);
+
+ (void) journal_file_read_tail_timestamp(j, f);
+
+ n_entries = le64toh(f->header->n_entries);
+
+ /* If we hit EOF before, we don't need to look into this file again
+ * unless direction changed or new entries appeared. */
+ if (f->last_direction == direction &&
+ f->location_type == (direction == DIRECTION_DOWN ? LOCATION_TAIL : LOCATION_HEAD) &&
+ n_entries == f->last_n_entries)
+ return 0;
+
+ f->last_n_entries = n_entries;
+
+ if (f->last_direction == direction && f->current_offset > 0) {
+ /* LOCATION_SEEK here means we did the work in a previous
+ * iteration and the current location already points to a
+ * candidate entry. */
+ if (f->location_type != LOCATION_SEEK) {
+ r = next_with_matches(j, f, direction, &c, &cp);
+ if (r <= 0)
+ return r;
+
+ journal_file_save_location(f, c, cp);
+ }
+ } else {
+ f->last_direction = direction;
+
+ r = find_location_with_matches(j, f, direction, &c, &cp);
+ if (r <= 0)
+ return r;
+
+ journal_file_save_location(f, c, cp);
+ }
+
+ /* OK, we found the spot, now let's advance until an entry
+ * that is actually different from what we were previously
+ * looking at. This is necessary to handle entries which exist
+ * in two (or more) journal files, and which shall all be
+ * suppressed but one. */
+
+ for (;;) {
+ bool found;
+
+ if (j->current_location.type == LOCATION_DISCRETE) {
+ int k;
+
+ k = compare_with_location(j, f, &j->current_location, j->current_file);
+
+ found = direction == DIRECTION_DOWN ? k > 0 : k < 0;
+ } else
+ found = true;
+
+ if (found)
+ return 1;
+
+ r = next_with_matches(j, f, direction, &c, &cp);
+ if (r <= 0)
+ return r;
+
+ journal_file_save_location(f, c, cp);
+ }
+}
+
+static int compare_locations(sd_journal *j, JournalFile *af, JournalFile *bf) {
+ int r;
+
+ assert(j);
+ assert(af);
+ assert(af->header);
+ assert(bf);
+ assert(bf->header);
+ assert(af->location_type == LOCATION_SEEK);
+ assert(bf->location_type == LOCATION_SEEK);
+
+ /* If contents, timestamps and seqnum match, these entries are identical. */
+ if (sd_id128_equal(af->current_boot_id, bf->current_boot_id) &&
+ af->current_monotonic == bf->current_monotonic &&
+ af->current_realtime == bf->current_realtime &&
+ af->current_xor_hash == bf->current_xor_hash &&
+ sd_id128_equal(af->header->seqnum_id, bf->header->seqnum_id) &&
+ af->current_seqnum == bf->current_seqnum)
+ return 0;
+
+ if (sd_id128_equal(af->header->seqnum_id, bf->header->seqnum_id)) {
+ /* If this is from the same seqnum source, compare seqnums */
+ r = CMP(af->current_seqnum, bf->current_seqnum);
+ if (r != 0)
+ return r;
+
+ /* Wow! This is weird, different data but the same seqnums? Something is borked, but let's
+ * make the best of it and compare by time. */
+ }
+
+ if (sd_id128_equal(af->current_boot_id, bf->current_boot_id))
+ /* If the boot id matches, compare monotonic time */
+ r = CMP(af->current_monotonic, bf->current_monotonic);
+ else
+ /* If they don't match try to compare boot IDs */
+ r = compare_boot_ids(j, af->current_boot_id, bf->current_boot_id);
+ if (r != 0)
+ return r;
+
+ /* Otherwise, compare UTC time */
+ r = CMP(af->current_realtime, bf->current_realtime);
+ if (r != 0)
+ return r;
+
+ /* Finally, compare by contents */
+ return CMP(af->current_xor_hash, bf->current_xor_hash);
+}
+
+static int real_journal_next(sd_journal *j, direction_t direction) {
+ JournalFile *new_file = NULL;
+ unsigned n_files;
+ const void **files;
+ Object *o;
+ int r;
+
+ assert_return(j, -EINVAL);
+ assert_return(!journal_origin_changed(j), -ECHILD);
+
+ r = iterated_cache_get(j->files_cache, NULL, &files, &n_files);
+ if (r < 0)
+ return r;
+
+ for (unsigned i = 0; i < n_files; i++) {
+ JournalFile *f = (JournalFile *)files[i];
+ bool found;
+
+ r = next_beyond_location(j, f, direction);
+ if (r < 0) {
+ log_debug_errno(r, "Can't iterate through %s, ignoring: %m", f->path);
+ remove_file_real(j, f);
+ continue;
+ } else if (r == 0) {
+ f->location_type = direction == DIRECTION_DOWN ? LOCATION_TAIL : LOCATION_HEAD;
+ continue;
+ }
+
+ if (!new_file)
+ found = true;
+ else {
+ int k;
+
+ k = compare_locations(j, f, new_file);
+
+ found = direction == DIRECTION_DOWN ? k < 0 : k > 0;
+ }
+
+ if (found)
+ new_file = f;
+ }
+
+ if (!new_file)
+ return 0;
+
+ r = journal_file_move_to_object(new_file, OBJECT_ENTRY, new_file->current_offset, &o);
+ if (r < 0)
+ return r;
+
+ set_location(j, new_file, o);
+
+ return 1;
+}
+
+_public_ int sd_journal_next(sd_journal *j) {
+ return real_journal_next(j, DIRECTION_DOWN);
+}
+
+_public_ int sd_journal_previous(sd_journal *j) {
+ return real_journal_next(j, DIRECTION_UP);
+}
+
+_public_ int sd_journal_step_one(sd_journal *j, int advanced) {
+ assert_return(j, -EINVAL);
+
+ if (j->current_location.type == LOCATION_HEAD)
+ return sd_journal_next(j);
+ if (j->current_location.type == LOCATION_TAIL)
+ return sd_journal_previous(j);
+ return real_journal_next(j, advanced ? DIRECTION_DOWN : DIRECTION_UP);
+}
+
+static int real_journal_next_skip(sd_journal *j, direction_t direction, uint64_t skip) {
+ int c = 0, r;
+
+ assert_return(j, -EINVAL);
+ assert_return(!journal_origin_changed(j), -ECHILD);
+ assert_return(skip <= INT_MAX, -ERANGE);
+
+ if (skip == 0) {
+ /* If this is not a discrete skip, then at least
+ * resolve the current location */
+ if (j->current_location.type != LOCATION_DISCRETE) {
+ r = real_journal_next(j, direction);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+ }
+
+ do {
+ r = real_journal_next(j, direction);
+ if (r < 0)
+ return r;
+
+ if (r == 0)
+ return c;
+
+ skip--;
+ c++;
+ } while (skip > 0);
+
+ return c;
+}
+
+_public_ int sd_journal_next_skip(sd_journal *j, uint64_t skip) {
+ return real_journal_next_skip(j, DIRECTION_DOWN, skip);
+}
+
+_public_ int sd_journal_previous_skip(sd_journal *j, uint64_t skip) {
+ return real_journal_next_skip(j, DIRECTION_UP, skip);
+}
+
+_public_ int sd_journal_get_cursor(sd_journal *j, char **cursor) {
+ Object *o;
+ int r;
+
+ assert_return(j, -EINVAL);
+ assert_return(!journal_origin_changed(j), -ECHILD);
+ assert_return(cursor, -EINVAL);
+
+ if (!j->current_file || j->current_file->current_offset <= 0)
+ return -EADDRNOTAVAIL;
+
+ r = journal_file_move_to_object(j->current_file, OBJECT_ENTRY, j->current_file->current_offset, &o);
+ if (r < 0)
+ return r;
+
+ if (asprintf(cursor,
+ "s=%s;i=%"PRIx64";b=%s;m=%"PRIx64";t=%"PRIx64";x=%"PRIx64,
+ SD_ID128_TO_STRING(j->current_file->header->seqnum_id), le64toh(o->entry.seqnum),
+ SD_ID128_TO_STRING(o->entry.boot_id), le64toh(o->entry.monotonic),
+ le64toh(o->entry.realtime),
+ le64toh(o->entry.xor_hash)) < 0)
+ return -ENOMEM;
+
+ return 0;
+}
+
+_public_ int sd_journal_seek_cursor(sd_journal *j, const char *cursor) {
+ unsigned long long seqnum, monotonic, realtime, xor_hash;
+ bool seqnum_id_set = false,
+ seqnum_set = false,
+ boot_id_set = false,
+ monotonic_set = false,
+ realtime_set = false,
+ xor_hash_set = false;
+ sd_id128_t seqnum_id, boot_id;
+ int r;
+
+ assert_return(j, -EINVAL);
+ assert_return(!journal_origin_changed(j), -ECHILD);
+ assert_return(!isempty(cursor), -EINVAL);
+
+ for (const char *p = cursor;;) {
+ _cleanup_free_ char *word = NULL;
+
+ r = extract_first_word(&p, &word, ";", EXTRACT_DONT_COALESCE_SEPARATORS);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ if (word[0] == '\0' || word[1] != '=')
+ return -EINVAL;
+
+ switch (word[0]) {
+ case 's':
+ seqnum_id_set = true;
+ r = sd_id128_from_string(word + 2, &seqnum_id);
+ if (r < 0)
+ return r;
+ break;
+
+ case 'i':
+ seqnum_set = true;
+ if (sscanf(word + 2, "%llx", &seqnum) != 1)
+ return -EINVAL;
+ break;
+
+ case 'b':
+ boot_id_set = true;
+ r = sd_id128_from_string(word + 2, &boot_id);
+ if (r < 0)
+ return r;
+ break;
+
+ case 'm':
+ monotonic_set = true;
+ if (sscanf(word + 2, "%llx", &monotonic) != 1)
+ return -EINVAL;
+ break;
+
+ case 't':
+ realtime_set = true;
+ if (sscanf(word + 2, "%llx", &realtime) != 1)
+ return -EINVAL;
+ break;
+
+ case 'x':
+ xor_hash_set = true;
+ if (sscanf(word + 2, "%llx", &xor_hash) != 1)
+ return -EINVAL;
+ break;
+ }
+ }
+
+ if ((!seqnum_set || !seqnum_id_set) &&
+ (!monotonic_set || !boot_id_set) &&
+ !realtime_set)
+ return -EINVAL;
+
+ detach_location(j);
+ j->current_location = (Location) {
+ .type = LOCATION_SEEK,
+ };
+
+ if (realtime_set) {
+ j->current_location.realtime = (uint64_t) realtime;
+ j->current_location.realtime_set = true;
+ }
+
+ if (seqnum_set && seqnum_id_set) {
+ j->current_location.seqnum = (uint64_t) seqnum;
+ j->current_location.seqnum_id = seqnum_id;
+ j->current_location.seqnum_set = true;
+ }
+
+ if (monotonic_set && boot_id_set) {
+ j->current_location.monotonic = (uint64_t) monotonic;
+ j->current_location.boot_id = boot_id;
+ j->current_location.monotonic_set = true;
+ }
+
+ if (xor_hash_set) {
+ j->current_location.xor_hash = (uint64_t) xor_hash;
+ j->current_location.xor_hash_set = true;
+ }
+
+ return 0;
+}
+
+_public_ int sd_journal_test_cursor(sd_journal *j, const char *cursor) {
+ int r;
+ Object *o;
+
+ assert_return(j, -EINVAL);
+ assert_return(!journal_origin_changed(j), -ECHILD);
+ assert_return(!isempty(cursor), -EINVAL);
+
+ if (!j->current_file || j->current_file->current_offset <= 0)
+ return -EADDRNOTAVAIL;
+
+ r = journal_file_move_to_object(j->current_file, OBJECT_ENTRY, j->current_file->current_offset, &o);
+ if (r < 0)
+ return r;
+
+ for (;;) {
+ _cleanup_free_ char *item = NULL;
+ unsigned long long ll;
+ sd_id128_t id;
+ int k = 0;
+
+ r = extract_first_word(&cursor, &item, ";", EXTRACT_DONT_COALESCE_SEPARATORS);
+ if (r < 0)
+ return r;
+
+ if (r == 0)
+ break;
+
+ if (strlen(item) < 2 || item[1] != '=')
+ return -EINVAL;
+
+ switch (item[0]) {
+
+ case 's':
+ k = sd_id128_from_string(item+2, &id);
+ if (k < 0)
+ return k;
+ if (!sd_id128_equal(id, j->current_file->header->seqnum_id))
+ return 0;
+ break;
+
+ case 'i':
+ if (sscanf(item+2, "%llx", &ll) != 1)
+ return -EINVAL;
+ if (ll != le64toh(o->entry.seqnum))
+ return 0;
+ break;
+
+ case 'b':
+ k = sd_id128_from_string(item+2, &id);
+ if (k < 0)
+ return k;
+ if (!sd_id128_equal(id, o->entry.boot_id))
+ return 0;
+ break;
+
+ case 'm':
+ if (sscanf(item+2, "%llx", &ll) != 1)
+ return -EINVAL;
+ if (ll != le64toh(o->entry.monotonic))
+ return 0;
+ break;
+
+ case 't':
+ if (sscanf(item+2, "%llx", &ll) != 1)
+ return -EINVAL;
+ if (ll != le64toh(o->entry.realtime))
+ return 0;
+ break;
+
+ case 'x':
+ if (sscanf(item+2, "%llx", &ll) != 1)
+ return -EINVAL;
+ if (ll != le64toh(o->entry.xor_hash))
+ return 0;
+ break;
+ }
+ }
+
+ return 1;
+}
+
+_public_ int sd_journal_seek_monotonic_usec(sd_journal *j, sd_id128_t boot_id, uint64_t usec) {
+ assert_return(j, -EINVAL);
+ assert_return(!journal_origin_changed(j), -ECHILD);
+
+ detach_location(j);
+
+ j->current_location = (Location) {
+ .type = LOCATION_SEEK,
+ .boot_id = boot_id,
+ .monotonic = usec,
+ .monotonic_set = true,
+ };
+
+ return 0;
+}
+
+_public_ int sd_journal_seek_realtime_usec(sd_journal *j, uint64_t usec) {
+ assert_return(j, -EINVAL);
+ assert_return(!journal_origin_changed(j), -ECHILD);
+
+ detach_location(j);
+
+ j->current_location = (Location) {
+ .type = LOCATION_SEEK,
+ .realtime = usec,
+ .realtime_set = true,
+ };
+
+ return 0;
+}
+
+_public_ int sd_journal_seek_head(sd_journal *j) {
+ assert_return(j, -EINVAL);
+ assert_return(!journal_origin_changed(j), -ECHILD);
+
+ detach_location(j);
+
+ j->current_location = (Location) {
+ .type = LOCATION_HEAD,
+ };
+
+ return 0;
+}
+
+_public_ int sd_journal_seek_tail(sd_journal *j) {
+ assert_return(j, -EINVAL);
+ assert_return(!journal_origin_changed(j), -ECHILD);
+
+ detach_location(j);
+
+ j->current_location = (Location) {
+ .type = LOCATION_TAIL,
+ };
+
+ return 0;
+}
+
+static void check_network(sd_journal *j, int fd) {
+ assert(j);
+
+ if (j->on_network)
+ return;
+
+ j->on_network = fd_is_network_fs(fd);
+}
+
+static bool file_has_type_prefix(const char *prefix, const char *filename) {
+ const char *full, *tilded, *atted;
+
+ full = strjoina(prefix, ".journal");
+ tilded = strjoina(full, "~");
+ atted = strjoina(prefix, "@");
+
+ return STR_IN_SET(filename, full, tilded) ||
+ startswith(filename, atted);
+}
+
+static bool file_type_wanted(int flags, const char *filename) {
+ assert(filename);
+
+ if (!ENDSWITH_SET(filename, ".journal", ".journal~"))
+ return false;
+
+ /* no flags set → every type is OK */
+ if (!(flags & (SD_JOURNAL_SYSTEM | SD_JOURNAL_CURRENT_USER)))
+ return true;
+
+ if (FLAGS_SET(flags, SD_JOURNAL_CURRENT_USER)) {
+ char prefix[5 + DECIMAL_STR_MAX(uid_t) + 1];
+
+ xsprintf(prefix, "user-" UID_FMT, getuid());
+
+ if (file_has_type_prefix(prefix, filename))
+ return true;
+
+ /* If SD_JOURNAL_CURRENT_USER is specified and we are invoked under a system UID, then
+ * automatically enable SD_JOURNAL_SYSTEM too, because journald will actually put system user
+ * data into the system journal. */
+
+ if (uid_for_system_journal(getuid()))
+ flags |= SD_JOURNAL_SYSTEM;
+ }
+
+ if (FLAGS_SET(flags, SD_JOURNAL_SYSTEM) && file_has_type_prefix("system", filename))
+ return true;
+
+ return false;
+}
+
+static bool path_has_prefix(sd_journal *j, const char *path, const char *prefix) {
+ assert(j);
+ assert(path);
+ assert(prefix);
+
+ if (j->toplevel_fd >= 0)
+ return false;
+
+ return path_startswith(path, prefix);
+}
+
+static void track_file_disposition(sd_journal *j, JournalFile *f) {
+ assert(j);
+ assert(f);
+
+ if (!j->has_runtime_files && path_has_prefix(j, f->path, "/run"))
+ j->has_runtime_files = true;
+ else if (!j->has_persistent_files && path_has_prefix(j, f->path, "/var"))
+ j->has_persistent_files = true;
+}
+
+static const char *skip_slash(const char *p) {
+
+ if (!p)
+ return NULL;
+
+ while (*p == '/')
+ p++;
+
+ return p;
+}
+
+static int add_any_file(
+ sd_journal *j,
+ int fd,
+ const char *path) {
+
+ _cleanup_close_ int our_fd = -EBADF;
+ JournalFile *f;
+ struct stat st;
+ int r;
+
+ assert(j);
+ assert(fd >= 0 || path);
+
+ if (fd < 0) {
+ assert(path); /* For gcc. */
+ if (j->toplevel_fd >= 0)
+ /* If there's a top-level fd defined make the path relative, explicitly, since otherwise
+ * openat() ignores the first argument. */
+
+ fd = our_fd = openat(j->toplevel_fd, skip_slash(path), O_RDONLY|O_CLOEXEC|O_NONBLOCK);
+ else
+ fd = our_fd = open(path, O_RDONLY|O_CLOEXEC|O_NONBLOCK);
+ if (fd < 0) {
+ r = log_debug_errno(errno, "Failed to open journal file %s: %m", path);
+ goto error;
+ }
+
+ r = fd_nonblock(fd, false);
+ if (r < 0) {
+ r = log_debug_errno(errno, "Failed to turn off O_NONBLOCK for %s: %m", path);
+ goto error;
+ }
+ }
+
+ if (fstat(fd, &st) < 0) {
+ r = log_debug_errno(errno, "Failed to fstat %s: %m", path ?: "fd");
+ goto error;
+ }
+
+ r = stat_verify_regular(&st);
+ if (r < 0) {
+ log_debug_errno(r, "Refusing to open %s: %m", path ?: "fd");
+ goto error;
+ }
+
+ if (path) {
+ f = ordered_hashmap_get(j->files, path);
+ if (f) {
+ if (stat_inode_same(&f->last_stat, &st)) {
+ /* We already track this file, under the same path and with the same
+ * device/inode numbers, it's hence really the same. Mark this file as seen
+ * in this generation. This is used to GC old files in process_q_overflow()
+ * to detect journal files that are still there and discern them from those
+ * which are gone. */
+
+ f->last_seen_generation = j->generation;
+ (void) journal_file_read_tail_timestamp(j, f);
+ return 0;
+ }
+
+ /* So we tracked a file under this name, but it has a different inode/device. In that
+ * case, it got replaced (probably due to rotation?), let's drop it hence from our
+ * list. */
+ remove_file_real(j, f);
+ f = NULL;
+ }
+ }
+
+ if (ordered_hashmap_size(j->files) >= JOURNAL_FILES_MAX) {
+ r = log_debug_errno(SYNTHETIC_ERRNO(ETOOMANYREFS),
+ "Too many open journal files, not adding %s.", path ?: "fd");
+ goto error;
+ }
+
+ r = journal_file_open(fd, path, O_RDONLY, 0, 0, 0, NULL, j->mmap, NULL, &f);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to open journal file %s: %m", path ?: "from fd");
+ goto error;
+ }
+
+ /* journal_file_dump(f); */
+
+ /* journal_file_open() generates an replacement fname if necessary, so we can use f->path. */
+ r = ordered_hashmap_put(j->files, f->path, f);
+ if (r < 0) {
+ f->close_fd = false; /* Make sure journal_file_close() doesn't close the caller's fd
+ * (or our own). The caller or we will do that ourselves. */
+ (void) journal_file_close(f);
+ goto error;
+ }
+
+ TAKE_FD(our_fd); /* the fd is now owned by the JournalFile object */
+
+ f->last_seen_generation = j->generation;
+
+ track_file_disposition(j, f);
+ check_network(j, f->fd);
+ (void) journal_file_read_tail_timestamp(j, f);
+
+ j->current_invalidate_counter++;
+
+ log_debug("File %s added.", f->path);
+
+ return 0;
+
+error:
+ (void) journal_put_error(j, r, path); /* path==NULL is OK. */
+ return r;
+}
+
+static int add_file_by_name(
+ sd_journal *j,
+ const char *prefix,
+ const char *filename) {
+
+ _cleanup_free_ char *path = NULL;
+
+ assert(j);
+ assert(prefix);
+ assert(filename);
+
+ if (j->no_new_files)
+ return 0;
+
+ if (!file_type_wanted(j->flags, filename))
+ return 0;
+
+ path = path_join(prefix, filename);
+ if (!path)
+ return -ENOMEM;
+
+ return add_any_file(j, -1, path);
+}
+
+static int remove_file_by_name(
+ sd_journal *j,
+ const char *prefix,
+ const char *filename) {
+
+ _cleanup_free_ char *path = NULL;
+ JournalFile *f;
+
+ assert(j);
+ assert(prefix);
+ assert(filename);
+
+ path = path_join(prefix, filename);
+ if (!path)
+ return -ENOMEM;
+
+ f = ordered_hashmap_get(j->files, path);
+ if (!f)
+ return 0;
+
+ remove_file_real(j, f);
+ return 1;
+}
+
+static void remove_file_real(sd_journal *j, JournalFile *f) {
+ assert(j);
+ assert(f);
+
+ (void) ordered_hashmap_remove(j->files, f->path);
+
+ log_debug("File %s removed.", f->path);
+
+ if (j->current_file == f) {
+ j->current_file = NULL;
+ j->current_field = 0;
+ }
+
+ if (j->unique_file == f) {
+ /* Jump to the next unique_file or NULL if that one was last */
+ j->unique_file = ordered_hashmap_next(j->files, j->unique_file->path);
+ j->unique_offset = 0;
+ if (!j->unique_file)
+ j->unique_file_lost = true;
+ }
+
+ if (j->fields_file == f) {
+ j->fields_file = ordered_hashmap_next(j->files, j->fields_file->path);
+ j->fields_offset = 0;
+ if (!j->fields_file)
+ j->fields_file_lost = true;
+ }
+
+ journal_file_unlink_newest_by_boot_id(j, f);
+ (void) journal_file_close(f);
+
+ j->current_invalidate_counter++;
+}
+
+static int dirname_is_machine_id(const char *fn) {
+ sd_id128_t id, machine;
+ const char *e;
+ int r;
+
+ /* Returns true if the specified directory name matches the local machine ID */
+
+ r = sd_id128_get_machine(&machine);
+ if (r < 0)
+ return r;
+
+ e = strchr(fn, '.');
+ if (e) {
+ const char *k;
+
+ /* Looks like it has a namespace suffix. Verify that. */
+ if (!log_namespace_name_valid(e + 1))
+ return false;
+
+ k = strndupa_safe(fn, e - fn);
+ r = sd_id128_from_string(k, &id);
+ } else
+ r = sd_id128_from_string(fn, &id);
+ if (r < 0)
+ return r;
+
+ return sd_id128_equal(id, machine);
+}
+
+static int dirname_has_namespace(const char *fn, const char *namespace) {
+ const char *e;
+
+ /* Returns true if the specified directory name matches the specified namespace */
+
+ e = strchr(fn, '.');
+ if (e) {
+ const char *k;
+
+ if (!namespace)
+ return false;
+
+ if (!streq(e + 1, namespace))
+ return false;
+
+ k = strndupa_safe(fn, e - fn);
+ return id128_is_valid(k);
+ }
+
+ if (namespace)
+ return false;
+
+ return id128_is_valid(fn);
+}
+
+static bool dirent_is_journal_file(const struct dirent *de) {
+ assert(de);
+
+ /* Returns true if the specified directory entry looks like a journal file we might be interested in */
+
+ if (!IN_SET(de->d_type, DT_REG, DT_LNK, DT_UNKNOWN))
+ return false;
+
+ return endswith(de->d_name, ".journal") ||
+ endswith(de->d_name, ".journal~");
+}
+
+static bool dirent_is_journal_subdir(const struct dirent *de) {
+ const char *e, *n;
+ assert(de);
+
+ /* returns true if the specified directory entry looks like a directory that might contain journal
+ * files we might be interested in, i.e. is either a 128-bit ID or a 128-bit ID suffixed by a
+ * namespace. */
+
+ if (!IN_SET(de->d_type, DT_DIR, DT_LNK, DT_UNKNOWN))
+ return false;
+
+ e = strchr(de->d_name, '.');
+ if (!e)
+ return id128_is_valid(de->d_name); /* No namespace */
+
+ n = strndupa_safe(de->d_name, e - de->d_name);
+ if (!id128_is_valid(n))
+ return false;
+
+ return log_namespace_name_valid(e + 1);
+}
+
+static int directory_open(sd_journal *j, const char *path, DIR **ret) {
+ DIR *d;
+
+ assert(j);
+ assert(path);
+ assert(ret);
+
+ if (j->toplevel_fd < 0)
+ d = opendir(path);
+ else
+ /* Open the specified directory relative to the toplevel fd. Enforce that the path specified is
+ * relative, by dropping the initial slash */
+ d = xopendirat(j->toplevel_fd, skip_slash(path), 0);
+ if (!d)
+ return -errno;
+
+ *ret = d;
+ return 0;
+}
+
+static int add_directory(sd_journal *j, const char *prefix, const char *dirname);
+
+static void directory_enumerate(sd_journal *j, Directory *m, DIR *d) {
+ assert(j);
+ assert(m);
+ assert(d);
+
+ FOREACH_DIRENT_ALL(de, d, goto fail) {
+ if (dirent_is_journal_file(de))
+ (void) add_file_by_name(j, m->path, de->d_name);
+
+ if (m->is_root && dirent_is_journal_subdir(de))
+ (void) add_directory(j, m->path, de->d_name);
+ }
+
+ return;
+fail:
+ log_debug_errno(errno, "Failed to enumerate directory %s, ignoring: %m", m->path);
+}
+
+static void directory_watch(sd_journal *j, Directory *m, int fd, uint32_t mask) {
+ int r;
+
+ assert(j);
+ assert(m);
+ assert(fd >= 0);
+
+ /* Watch this directory if that's enabled and if it not being watched yet. */
+
+ if (m->wd > 0) /* Already have a watch? */
+ return;
+ if (j->inotify_fd < 0) /* Not watching at all? */
+ return;
+
+ m->wd = inotify_add_watch_fd(j->inotify_fd, fd, mask);
+ if (m->wd < 0) {
+ log_debug_errno(errno, "Failed to watch journal directory '%s', ignoring: %m", m->path);
+ return;
+ }
+
+ r = hashmap_put(j->directories_by_wd, INT_TO_PTR(m->wd), m);
+ if (r == -EEXIST)
+ log_debug_errno(r, "Directory '%s' already being watched under a different path, ignoring: %m", m->path);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to add watch for journal directory '%s' to hashmap, ignoring: %m", m->path);
+ (void) inotify_rm_watch(j->inotify_fd, m->wd);
+ m->wd = -1;
+ }
+}
+
+static int add_directory(
+ sd_journal *j,
+ const char *prefix,
+ const char *dirname) {
+
+ _cleanup_free_ char *path = NULL;
+ _cleanup_closedir_ DIR *d = NULL;
+ Directory *m;
+ int r, k;
+
+ assert(j);
+ assert(prefix);
+
+ /* Adds a journal file directory to watch. If the directory is already tracked this updates the inotify watch
+ * and reenumerates directory contents */
+
+ path = path_join(prefix, dirname);
+ if (!path) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ log_debug("Considering directory '%s'.", path);
+
+ /* We consider everything local that is in a directory for the local machine ID, or that is stored in /run */
+ if ((j->flags & SD_JOURNAL_LOCAL_ONLY) &&
+ !((dirname && dirname_is_machine_id(dirname) > 0) || path_has_prefix(j, path, "/run")))
+ return 0;
+
+ if (dirname &&
+ (!(FLAGS_SET(j->flags, SD_JOURNAL_ALL_NAMESPACES) ||
+ dirname_has_namespace(dirname, j->namespace) > 0 ||
+ (FLAGS_SET(j->flags, SD_JOURNAL_INCLUDE_DEFAULT_NAMESPACE) && dirname_has_namespace(dirname, NULL) > 0))))
+ return 0;
+
+ r = directory_open(j, path, &d);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to open directory '%s': %m", path);
+ goto fail;
+ }
+
+ m = hashmap_get(j->directories_by_path, path);
+ if (!m) {
+ m = new(Directory, 1);
+ if (!m) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ *m = (Directory) {
+ .is_root = false,
+ .path = path,
+ };
+
+ if (hashmap_put(j->directories_by_path, m->path, m) < 0) {
+ free(m);
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ path = NULL; /* avoid freeing in cleanup */
+ j->current_invalidate_counter++;
+
+ log_debug("Directory %s added.", m->path);
+
+ } else if (m->is_root)
+ return 0; /* Don't 'downgrade' from root directory */
+
+ m->last_seen_generation = j->generation;
+
+ directory_watch(j, m, dirfd(d),
+ IN_CREATE|IN_MOVED_TO|IN_MODIFY|IN_ATTRIB|IN_DELETE|
+ IN_DELETE_SELF|IN_MOVE_SELF|IN_UNMOUNT|IN_MOVED_FROM|
+ IN_ONLYDIR);
+
+ if (!j->no_new_files)
+ directory_enumerate(j, m, d);
+
+ check_network(j, dirfd(d));
+
+ return 0;
+
+fail:
+ k = journal_put_error(j, r, path ?: prefix);
+ if (k < 0)
+ return k;
+
+ return r;
+}
+
+static int add_root_directory(sd_journal *j, const char *p, bool missing_ok) {
+
+ _cleanup_closedir_ DIR *d = NULL;
+ Directory *m;
+ int r, k;
+
+ assert(j);
+
+ /* Adds a root directory to our set of directories to use. If the root directory is already in the set, we
+ * update the inotify logic, and renumerate the directory entries. This call may hence be called to initially
+ * populate the set, as well as to update it later. */
+
+ if (p) {
+ /* If there's a path specified, use it. */
+
+ log_debug("Considering root directory '%s'.", p);
+
+ if ((j->flags & SD_JOURNAL_RUNTIME_ONLY) &&
+ !path_has_prefix(j, p, "/run"))
+ return -EINVAL;
+
+ if (j->prefix)
+ p = strjoina(j->prefix, p);
+
+ r = directory_open(j, p, &d);
+ if (r == -ENOENT && missing_ok)
+ return 0;
+ if (r < 0) {
+ log_debug_errno(r, "Failed to open root directory %s: %m", p);
+ goto fail;
+ }
+ } else {
+ _cleanup_close_ int dfd = -EBADF;
+
+ /* If there's no path specified, then we use the top-level fd itself. We duplicate the fd here, since
+ * opendir() will take possession of the fd, and close it, which we don't want. */
+
+ p = "."; /* store this as "." in the directories hashmap */
+
+ dfd = fcntl(j->toplevel_fd, F_DUPFD_CLOEXEC, 3);
+ if (dfd < 0) {
+ r = -errno;
+ goto fail;
+ }
+
+ d = take_fdopendir(&dfd);
+ if (!d) {
+ r = -errno;
+ goto fail;
+ }
+
+ rewinddir(d);
+ }
+
+ m = hashmap_get(j->directories_by_path, p);
+ if (!m) {
+ m = new0(Directory, 1);
+ if (!m) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ m->is_root = true;
+
+ m->path = strdup(p);
+ if (!m->path) {
+ free(m);
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ if (hashmap_put(j->directories_by_path, m->path, m) < 0) {
+ free(m->path);
+ free(m);
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ j->current_invalidate_counter++;
+
+ log_debug("Root directory %s added.", m->path);
+
+ } else if (!m->is_root)
+ return 0;
+
+ directory_watch(j, m, dirfd(d),
+ IN_CREATE|IN_MOVED_TO|IN_MODIFY|IN_ATTRIB|IN_DELETE|
+ IN_ONLYDIR);
+
+ if (!j->no_new_files)
+ directory_enumerate(j, m, d);
+
+ check_network(j, dirfd(d));
+
+ return 0;
+
+fail:
+ k = journal_put_error(j, r, p);
+ if (k < 0)
+ return k;
+
+ return r;
+}
+
+static void remove_directory(sd_journal *j, Directory *d) {
+ assert(j);
+
+ if (d->wd > 0) {
+ hashmap_remove(j->directories_by_wd, INT_TO_PTR(d->wd));
+
+ if (j->inotify_fd >= 0)
+ (void) inotify_rm_watch(j->inotify_fd, d->wd);
+ }
+
+ hashmap_remove(j->directories_by_path, d->path);
+
+ if (d->is_root)
+ log_debug("Root directory %s removed.", d->path);
+ else
+ log_debug("Directory %s removed.", d->path);
+
+ free(d->path);
+ free(d);
+}
+
+static int add_search_paths(sd_journal *j) {
+
+ static const char search_paths[] =
+ "/run/log/journal\0"
+ "/var/log/journal\0";
+
+ assert(j);
+
+ /* We ignore most errors here, since the idea is to only open
+ * what's actually accessible, and ignore the rest. */
+
+ NULSTR_FOREACH(p, search_paths)
+ (void) add_root_directory(j, p, true);
+
+ if (!(j->flags & SD_JOURNAL_LOCAL_ONLY))
+ (void) add_root_directory(j, "/var/log/journal/remote", true);
+
+ return 0;
+}
+
+static int add_current_paths(sd_journal *j) {
+ JournalFile *f;
+
+ assert(j);
+ assert(j->no_new_files);
+
+ /* Simply adds all directories for files we have open as directories. We don't expect errors here, so we
+ * treat them as fatal. */
+
+ ORDERED_HASHMAP_FOREACH(f, j->files) {
+ _cleanup_free_ char *dir = NULL;
+ int r;
+
+ r = path_extract_directory(f->path, &dir);
+ if (r < 0)
+ return r;
+
+ r = add_directory(j, dir, NULL);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+static int allocate_inotify(sd_journal *j) {
+ assert(j);
+
+ if (j->inotify_fd < 0) {
+ j->inotify_fd = inotify_init1(IN_NONBLOCK|IN_CLOEXEC);
+ if (j->inotify_fd < 0)
+ return -errno;
+ }
+
+ return hashmap_ensure_allocated(&j->directories_by_wd, NULL);
+}
+
+static sd_journal *journal_new(int flags, const char *path, const char *namespace) {
+ _cleanup_(sd_journal_closep) sd_journal *j = NULL;
+
+ j = new(sd_journal, 1);
+ if (!j)
+ return NULL;
+
+ *j = (sd_journal) {
+ .origin_id = origin_id_query(),
+ .toplevel_fd = -EBADF,
+ .inotify_fd = -EBADF,
+ .flags = flags,
+ .data_threshold = DEFAULT_DATA_THRESHOLD,
+ };
+
+ if (path) {
+ char *t;
+
+ t = strdup(path);
+ if (!t)
+ return NULL;
+
+ if (flags & SD_JOURNAL_OS_ROOT)
+ j->prefix = t;
+ else
+ j->path = t;
+ }
+
+ if (namespace) {
+ j->namespace = strdup(namespace);
+ if (!j->namespace)
+ return NULL;
+ }
+
+ j->files = ordered_hashmap_new(&path_hash_ops);
+ if (!j->files)
+ return NULL;
+
+ j->files_cache = ordered_hashmap_iterated_cache_new(j->files);
+ j->directories_by_path = hashmap_new(&path_hash_ops);
+ j->mmap = mmap_cache_new();
+ if (!j->files_cache || !j->directories_by_path || !j->mmap)
+ return NULL;
+
+ return TAKE_PTR(j);
+}
+
+#define OPEN_ALLOWED_FLAGS \
+ (SD_JOURNAL_LOCAL_ONLY | \
+ SD_JOURNAL_RUNTIME_ONLY | \
+ SD_JOURNAL_SYSTEM | \
+ SD_JOURNAL_CURRENT_USER | \
+ SD_JOURNAL_ALL_NAMESPACES | \
+ SD_JOURNAL_INCLUDE_DEFAULT_NAMESPACE)
+
+_public_ int sd_journal_open_namespace(sd_journal **ret, const char *namespace, int flags) {
+ _cleanup_(sd_journal_closep) sd_journal *j = NULL;
+ int r;
+
+ assert_return(ret, -EINVAL);
+ assert_return((flags & ~OPEN_ALLOWED_FLAGS) == 0, -EINVAL);
+
+ j = journal_new(flags, NULL, namespace);
+ if (!j)
+ return -ENOMEM;
+
+ r = add_search_paths(j);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(j);
+ return 0;
+}
+
+_public_ int sd_journal_open(sd_journal **ret, int flags) {
+ return sd_journal_open_namespace(ret, NULL, flags);
+}
+
+#define OPEN_CONTAINER_ALLOWED_FLAGS \
+ (SD_JOURNAL_LOCAL_ONLY | SD_JOURNAL_SYSTEM)
+
+_public_ int sd_journal_open_container(sd_journal **ret, const char *machine, int flags) {
+ _cleanup_free_ char *root = NULL, *class = NULL;
+ _cleanup_(sd_journal_closep) sd_journal *j = NULL;
+ char *p;
+ int r;
+
+ /* This is deprecated, people should use machined's OpenMachineRootDirectory() call instead in
+ * combination with sd_journal_open_directory_fd(). */
+
+ assert_return(machine, -EINVAL);
+ assert_return(ret, -EINVAL);
+ assert_return((flags & ~OPEN_CONTAINER_ALLOWED_FLAGS) == 0, -EINVAL);
+ assert_return(hostname_is_valid(machine, 0), -EINVAL);
+
+ p = strjoina("/run/systemd/machines/", machine);
+ r = parse_env_file(NULL, p,
+ "ROOT", &root,
+ "CLASS", &class);
+ if (r == -ENOENT)
+ return -EHOSTDOWN;
+ if (r < 0)
+ return r;
+ if (!root)
+ return -ENODATA;
+
+ if (!streq_ptr(class, "container"))
+ return -EIO;
+
+ j = journal_new(flags, root, NULL);
+ if (!j)
+ return -ENOMEM;
+
+ r = add_search_paths(j);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(j);
+ return 0;
+}
+
+#define OPEN_DIRECTORY_ALLOWED_FLAGS \
+ (SD_JOURNAL_OS_ROOT | \
+ SD_JOURNAL_SYSTEM | SD_JOURNAL_CURRENT_USER )
+
+_public_ int sd_journal_open_directory(sd_journal **ret, const char *path, int flags) {
+ _cleanup_(sd_journal_closep) sd_journal *j = NULL;
+ int r;
+
+ assert_return(ret, -EINVAL);
+ assert_return(path, -EINVAL);
+ assert_return((flags & ~OPEN_DIRECTORY_ALLOWED_FLAGS) == 0, -EINVAL);
+
+ j = journal_new(flags, path, NULL);
+ if (!j)
+ return -ENOMEM;
+
+ if (flags & SD_JOURNAL_OS_ROOT)
+ r = add_search_paths(j);
+ else
+ r = add_root_directory(j, path, false);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(j);
+ return 0;
+}
+
+_public_ int sd_journal_open_files(sd_journal **ret, const char **paths, int flags) {
+ _cleanup_(sd_journal_closep) sd_journal *j = NULL;
+ int r;
+
+ assert_return(ret, -EINVAL);
+ assert_return(flags == 0, -EINVAL);
+
+ j = journal_new(flags, NULL, NULL);
+ if (!j)
+ return -ENOMEM;
+
+ STRV_FOREACH(path, paths) {
+ r = add_any_file(j, -1, *path);
+ if (r < 0)
+ return r;
+ }
+
+ j->no_new_files = true;
+
+ *ret = TAKE_PTR(j);
+ return 0;
+}
+
+#define OPEN_DIRECTORY_FD_ALLOWED_FLAGS \
+ (SD_JOURNAL_OS_ROOT | \
+ SD_JOURNAL_SYSTEM | \
+ SD_JOURNAL_CURRENT_USER | \
+ SD_JOURNAL_TAKE_DIRECTORY_FD)
+
+_public_ int sd_journal_open_directory_fd(sd_journal **ret, int fd, int flags) {
+ _cleanup_(sd_journal_closep) sd_journal *j = NULL;
+ struct stat st;
+ bool take_fd;
+ int r;
+
+ assert_return(ret, -EINVAL);
+ assert_return(fd >= 0, -EBADF);
+ assert_return((flags & ~OPEN_DIRECTORY_FD_ALLOWED_FLAGS) == 0, -EINVAL);
+
+ if (fstat(fd, &st) < 0)
+ return -errno;
+
+ if (!S_ISDIR(st.st_mode))
+ return -EBADFD;
+
+ take_fd = FLAGS_SET(flags, SD_JOURNAL_TAKE_DIRECTORY_FD);
+ j = journal_new(flags & ~SD_JOURNAL_TAKE_DIRECTORY_FD, NULL, NULL);
+ if (!j)
+ return -ENOMEM;
+
+ j->toplevel_fd = fd;
+
+ if (flags & SD_JOURNAL_OS_ROOT)
+ r = add_search_paths(j);
+ else
+ r = add_root_directory(j, NULL, false);
+ if (r < 0)
+ return r;
+
+ SET_FLAG(j->flags, SD_JOURNAL_TAKE_DIRECTORY_FD, take_fd);
+
+ *ret = TAKE_PTR(j);
+ return 0;
+}
+
+_public_ int sd_journal_open_files_fd(sd_journal **ret, int fds[], unsigned n_fds, int flags) {
+ JournalFile *f;
+ _cleanup_(sd_journal_closep) sd_journal *j = NULL;
+ int r;
+
+ assert_return(ret, -EINVAL);
+ assert_return(n_fds > 0, -EBADF);
+ assert_return(flags == 0, -EINVAL);
+
+ j = journal_new(flags, NULL, NULL);
+ if (!j)
+ return -ENOMEM;
+
+ for (unsigned i = 0; i < n_fds; i++) {
+ struct stat st;
+
+ if (fds[i] < 0) {
+ r = -EBADF;
+ goto fail;
+ }
+
+ if (fstat(fds[i], &st) < 0) {
+ r = -errno;
+ goto fail;
+ }
+
+ r = stat_verify_regular(&st);
+ if (r < 0)
+ goto fail;
+
+ r = add_any_file(j, fds[i], NULL);
+ if (r < 0)
+ goto fail;
+ }
+
+ j->no_new_files = true;
+ j->no_inotify = true;
+
+ *ret = TAKE_PTR(j);
+ return 0;
+
+fail:
+ /* If we fail, make sure we don't take possession of the files we managed to make use of successfully, and they
+ * remain open */
+ ORDERED_HASHMAP_FOREACH(f, j->files)
+ f->close_fd = false;
+
+ return r;
+}
+
+_public_ void sd_journal_close(sd_journal *j) {
+ Directory *d;
+ Prioq *p;
+
+ if (!j || journal_origin_changed(j))
+ return;
+
+ while ((p = hashmap_first(j->newest_by_boot_id)))
+ journal_file_unlink_newest_by_boot_id(j, prioq_peek(p));
+ hashmap_free(j->newest_by_boot_id);
+
+ sd_journal_flush_matches(j);
+
+ ordered_hashmap_free_with_destructor(j->files, journal_file_close);
+ iterated_cache_free(j->files_cache);
+
+ while ((d = hashmap_first(j->directories_by_path)))
+ remove_directory(j, d);
+
+ while ((d = hashmap_first(j->directories_by_wd)))
+ remove_directory(j, d);
+
+ hashmap_free(j->directories_by_path);
+ hashmap_free(j->directories_by_wd);
+
+ if (FLAGS_SET(j->flags, SD_JOURNAL_TAKE_DIRECTORY_FD))
+ safe_close(j->toplevel_fd);
+
+ safe_close(j->inotify_fd);
+
+ if (j->mmap) {
+ mmap_cache_stats_log_debug(j->mmap);
+ mmap_cache_unref(j->mmap);
+ }
+
+ hashmap_free_free(j->errors);
+
+ free(j->path);
+ free(j->prefix);
+ free(j->namespace);
+ free(j->unique_field);
+ free(j->fields_buffer);
+ free(j);
+}
+
+static void journal_file_unlink_newest_by_boot_id(sd_journal *j, JournalFile *f) {
+ JournalFile *nf;
+ Prioq *p;
+
+ assert(j);
+ assert(f);
+
+ if (f->newest_boot_id_prioq_idx == PRIOQ_IDX_NULL) /* not linked currently, hence this is a NOP */
+ return;
+
+ assert_se(p = hashmap_get(j->newest_by_boot_id, &f->newest_boot_id));
+ assert_se(prioq_remove(p, f, &f->newest_boot_id_prioq_idx) > 0);
+
+ nf = prioq_peek(p);
+ if (nf)
+ /* There's still a member in the prioq? Then make sure the hashmap key now points to its
+ * .newest_boot_id field (and not ours!). Not we only replace the memory of the key here, the
+ * value of the key (and the data associated with it) remain the same. */
+ assert_se(hashmap_replace(j->newest_by_boot_id, &nf->newest_boot_id, p) >= 0);
+ else {
+ assert_se(hashmap_remove(j->newest_by_boot_id, &f->newest_boot_id) == p);
+ prioq_free(p);
+ }
+
+ f->newest_boot_id_prioq_idx = PRIOQ_IDX_NULL;
+}
+
+static int journal_file_newest_monotonic_compare(const void *a, const void *b) {
+ const JournalFile *x = a, *y = b;
+
+ return -CMP(x->newest_monotonic_usec, y->newest_monotonic_usec); /* Invert order, we want newest first! */
+}
+
+static int journal_file_reshuffle_newest_by_boot_id(sd_journal *j, JournalFile *f) {
+ Prioq *p;
+ int r;
+
+ assert(j);
+ assert(f);
+
+ p = hashmap_get(j->newest_by_boot_id, &f->newest_boot_id);
+ if (p) {
+ /* There's already a priority queue for this boot ID */
+
+ if (f->newest_boot_id_prioq_idx == PRIOQ_IDX_NULL) {
+ r = prioq_put(p, f, &f->newest_boot_id_prioq_idx); /* Insert if we aren't in there yet */
+ if (r < 0)
+ return r;
+ } else
+ prioq_reshuffle(p, f, &f->newest_boot_id_prioq_idx); /* Reshuffle otherwise */
+
+ } else {
+ _cleanup_(prioq_freep) Prioq *q = NULL;
+
+ /* No priority queue yet, then allocate one */
+
+ assert(f->newest_boot_id_prioq_idx == PRIOQ_IDX_NULL); /* we can't be a member either */
+
+ q = prioq_new(journal_file_newest_monotonic_compare);
+ if (!q)
+ return -ENOMEM;
+
+ r = prioq_put(q, f, &f->newest_boot_id_prioq_idx);
+ if (r < 0)
+ return r;
+
+ r = hashmap_ensure_put(&j->newest_by_boot_id, &id128_hash_ops, &f->newest_boot_id, q);
+ if (r < 0) {
+ f->newest_boot_id_prioq_idx = PRIOQ_IDX_NULL;
+ return r;
+ }
+
+ TAKE_PTR(q);
+ }
+
+ return 0;
+}
+
+static int journal_file_read_tail_timestamp(sd_journal *j, JournalFile *f) {
+ uint64_t offset, mo, rt;
+ sd_id128_t id;
+ ObjectType type;
+ Object *o;
+ int r;
+
+ assert(j);
+ assert(f);
+ assert(f->header);
+
+ /* Tries to read the timestamp of the most recently written entry. */
+
+ r = journal_file_fstat(f);
+ if (r < 0)
+ return r;
+ if (f->newest_mtime == timespec_load(&f->last_stat.st_mtim))
+ return 0; /* mtime didn't change since last time, don't bother */
+
+ if (JOURNAL_HEADER_CONTAINS(f->header, tail_entry_offset)) {
+ offset = le64toh(READ_NOW(f->header->tail_entry_offset));
+ type = OBJECT_ENTRY;
+ } else {
+ offset = le64toh(READ_NOW(f->header->tail_object_offset));
+ type = OBJECT_UNUSED;
+ }
+ if (offset == 0)
+ return -ENODATA; /* not a single object/entry, hence no tail timestamp */
+
+ /* Move to the last object in the journal file, in the hope it is an entry (which it usually will
+ * be). If we lack the "tail_entry_offset" field in the header, we specify the type as OBJECT_UNUSED
+ * here, since we cannot be sure what the last object will be, and want no noisy logging if it isn't
+ * an entry. We instead check after figuring out the pointer. */
+ r = journal_file_move_to_object(f, type, offset, &o);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to move to last object in journal file, ignoring: %m");
+ o = NULL;
+ }
+ if (o && o->object.type == OBJECT_ENTRY) {
+ /* Yay, last object is an entry, let's use the data. */
+ id = o->entry.boot_id;
+ mo = le64toh(o->entry.monotonic);
+ rt = le64toh(o->entry.realtime);
+ } else {
+ /* So the object is not an entry or we couldn't access it? In that case, let's read the most
+ * recent entry timestamps from the header. It's equally good. Unfortunately though, in old
+ * versions of the journal the boot ID in the header doesn't have to match the monotonic
+ * timestamp of the header. Let's check the header flag that indicates whether this strictly
+ * matches first hence, before using the data. */
+
+ if (JOURNAL_HEADER_TAIL_ENTRY_BOOT_ID(f->header) && f->header->state == STATE_ARCHIVED) {
+ mo = le64toh(f->header->tail_entry_monotonic);
+ rt = le64toh(f->header->tail_entry_realtime);
+ id = f->header->tail_entry_boot_id;
+ } else {
+ /* Otherwise let's find the last entry manually (this possibly means traversing the
+ * chain of entry arrays, till the end */
+ r = journal_file_next_entry(f, 0, DIRECTION_UP, &o, NULL);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -ENODATA;
+
+ id = o->entry.boot_id;
+ mo = le64toh(o->entry.monotonic);
+ rt = le64toh(o->entry.realtime);
+ }
+ }
+
+ if (mo > rt) /* monotonic clock is further ahead than realtime? that's weird, refuse to use the data */
+ return -ENODATA;
+
+ if (!sd_id128_equal(f->newest_boot_id, id))
+ journal_file_unlink_newest_by_boot_id(j, f);
+
+ f->newest_boot_id = id;
+ f->newest_monotonic_usec = mo;
+ f->newest_realtime_usec = rt;
+ f->newest_machine_id = f->header->machine_id;
+ f->newest_mtime = timespec_load(&f->last_stat.st_mtim);
+
+ r = journal_file_reshuffle_newest_by_boot_id(j, f);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+_public_ int sd_journal_get_realtime_usec(sd_journal *j, uint64_t *ret) {
+ JournalFile *f;
+ Object *o;
+ int r;
+
+ assert_return(j, -EINVAL);
+ assert_return(!journal_origin_changed(j), -ECHILD);
+
+ f = j->current_file;
+ if (!f)
+ return -EADDRNOTAVAIL;
+ if (f->current_offset <= 0)
+ return -EADDRNOTAVAIL;
+
+ r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
+ if (r < 0)
+ return r;
+
+ uint64_t t = le64toh(o->entry.realtime);
+ if (!VALID_REALTIME(t))
+ return -EBADMSG;
+
+ if (ret)
+ *ret = t;
+
+ return 0;
+}
+
+_public_ int sd_journal_get_monotonic_usec(sd_journal *j, uint64_t *ret, sd_id128_t *ret_boot_id) {
+ JournalFile *f;
+ Object *o;
+ int r;
+
+ assert_return(j, -EINVAL);
+ assert_return(!journal_origin_changed(j), -ECHILD);
+
+ f = j->current_file;
+ if (!f)
+ return -EADDRNOTAVAIL;
+ if (f->current_offset <= 0)
+ return -EADDRNOTAVAIL;
+
+ r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
+ if (r < 0)
+ return r;
+
+ if (ret_boot_id)
+ *ret_boot_id = o->entry.boot_id;
+ else {
+ sd_id128_t id;
+
+ r = sd_id128_get_boot(&id);
+ if (r < 0)
+ return r;
+
+ if (!sd_id128_equal(id, o->entry.boot_id))
+ return -ESTALE;
+ }
+
+ uint64_t t = le64toh(o->entry.monotonic);
+ if (!VALID_MONOTONIC(t))
+ return -EBADMSG;
+
+ if (ret)
+ *ret = t;
+
+ return 0;
+}
+
+_public_ int sd_journal_get_seqnum(
+ sd_journal *j,
+ uint64_t *ret_seqnum,
+ sd_id128_t *ret_seqnum_id) {
+
+ JournalFile *f;
+ Object *o;
+ int r;
+
+ assert_return(j, -EINVAL);
+ assert_return(!journal_origin_changed(j), -ECHILD);
+
+ f = j->current_file;
+ if (!f)
+ return -EADDRNOTAVAIL;
+
+ if (f->current_offset <= 0)
+ return -EADDRNOTAVAIL;
+
+ r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
+ if (r < 0)
+ return r;
+
+ if (ret_seqnum_id)
+ *ret_seqnum_id = f->header->seqnum_id;
+ if (ret_seqnum)
+ *ret_seqnum = le64toh(o->entry.seqnum);
+
+ return 0;
+}
+
+static bool field_is_valid(const char *field) {
+ assert(field);
+
+ if (isempty(field))
+ return false;
+
+ if (startswith(field, "__"))
+ return false;
+
+ for (const char *p = field; *p; p++) {
+
+ if (*p == '_')
+ continue;
+
+ if (*p >= 'A' && *p <= 'Z')
+ continue;
+
+ if (ascii_isdigit(*p))
+ continue;
+
+ return false;
+ }
+
+ return true;
+}
+
+_public_ int sd_journal_get_data(sd_journal *j, const char *field, const void **data, size_t *size) {
+ JournalFile *f;
+ size_t field_length;
+ Object *o;
+ int r;
+
+ assert_return(j, -EINVAL);
+ assert_return(!journal_origin_changed(j), -ECHILD);
+ assert_return(field, -EINVAL);
+ assert_return(data, -EINVAL);
+ assert_return(size, -EINVAL);
+ assert_return(field_is_valid(field), -EINVAL);
+
+ f = j->current_file;
+ if (!f)
+ return -EADDRNOTAVAIL;
+
+ if (f->current_offset <= 0)
+ return -EADDRNOTAVAIL;
+
+ r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
+ if (r < 0)
+ return r;
+
+ field_length = strlen(field);
+
+ uint64_t n = journal_file_entry_n_items(f, o);
+ for (uint64_t i = 0; i < n; i++) {
+ uint64_t p;
+ void *d;
+ size_t l;
+
+ p = journal_file_entry_item_object_offset(f, o, i);
+ r = journal_file_data_payload(f, NULL, p, field, field_length, j->data_threshold, &d, &l);
+ if (r == 0)
+ continue;
+ if (IN_SET(r, -EADDRNOTAVAIL, -EBADMSG)) {
+ log_debug_errno(r, "Entry item %"PRIu64" data object is bad, skipping over it: %m", i);
+ continue;
+ }
+ if (r < 0)
+ return r;
+
+ *data = d;
+ *size = l;
+
+ return 0;
+ }
+
+ return -ENOENT;
+}
+
+_public_ int sd_journal_enumerate_data(sd_journal *j, const void **data, size_t *size) {
+ JournalFile *f;
+ Object *o;
+ int r;
+
+ assert_return(j, -EINVAL);
+ assert_return(!journal_origin_changed(j), -ECHILD);
+ assert_return(data, -EINVAL);
+ assert_return(size, -EINVAL);
+
+ f = j->current_file;
+ if (!f)
+ return -EADDRNOTAVAIL;
+
+ if (f->current_offset <= 0)
+ return -EADDRNOTAVAIL;
+
+ r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
+ if (r < 0)
+ return r;
+
+ for (uint64_t n = journal_file_entry_n_items(f, o); j->current_field < n; j->current_field++) {
+ uint64_t p;
+ void *d;
+ size_t l;
+
+ p = journal_file_entry_item_object_offset(f, o, j->current_field);
+ r = journal_file_data_payload(f, NULL, p, NULL, 0, j->data_threshold, &d, &l);
+ if (IN_SET(r, -EADDRNOTAVAIL, -EBADMSG)) {
+ log_debug_errno(r, "Entry item %"PRIu64" data object is bad, skipping over it: %m", j->current_field);
+ continue;
+ }
+ if (r < 0)
+ return r;
+ assert(r > 0);
+
+ *data = d;
+ *size = l;
+
+ j->current_field++;
+
+ return 1;
+ }
+
+ return 0;
+}
+
+_public_ int sd_journal_enumerate_available_data(sd_journal *j, const void **data, size_t *size) {
+ for (;;) {
+ int r;
+
+ r = sd_journal_enumerate_data(j, data, size);
+ if (r >= 0)
+ return r;
+ if (!JOURNAL_ERRNO_IS_UNAVAILABLE_FIELD(r))
+ return r;
+ j->current_field++; /* Try with the next field */
+ }
+}
+
+_public_ void sd_journal_restart_data(sd_journal *j) {
+ if (!j || journal_origin_changed(j))
+ return;
+
+ j->current_field = 0;
+}
+
+static int reiterate_all_paths(sd_journal *j) {
+ assert(j);
+
+ if (j->no_new_files)
+ return add_current_paths(j);
+
+ if (j->flags & SD_JOURNAL_OS_ROOT)
+ return add_search_paths(j);
+
+ if (j->toplevel_fd >= 0)
+ return add_root_directory(j, NULL, false);
+
+ if (j->path)
+ return add_root_directory(j, j->path, true);
+
+ return add_search_paths(j);
+}
+
+_public_ int sd_journal_get_fd(sd_journal *j) {
+ int r;
+
+ assert_return(j, -EINVAL);
+ assert_return(!journal_origin_changed(j), -ECHILD);
+
+ if (j->no_inotify)
+ return -EMEDIUMTYPE;
+
+ if (j->inotify_fd >= 0)
+ return j->inotify_fd;
+
+ r = allocate_inotify(j);
+ if (r < 0)
+ return r;
+
+ log_debug("Reiterating files to get inotify watches established.");
+
+ /* Iterate through all dirs again, to add them to the inotify */
+ r = reiterate_all_paths(j);
+ if (r < 0)
+ return r;
+
+ return j->inotify_fd;
+}
+
+_public_ int sd_journal_get_events(sd_journal *j) {
+ int fd;
+
+ assert_return(j, -EINVAL);
+ assert_return(!journal_origin_changed(j), -ECHILD);
+
+ fd = sd_journal_get_fd(j);
+ if (fd < 0)
+ return fd;
+
+ return POLLIN;
+}
+
+_public_ int sd_journal_get_timeout(sd_journal *j, uint64_t *timeout_usec) {
+ int fd;
+
+ assert_return(j, -EINVAL);
+ assert_return(!journal_origin_changed(j), -ECHILD);
+ assert_return(timeout_usec, -EINVAL);
+
+ fd = sd_journal_get_fd(j);
+ if (fd < 0)
+ return fd;
+
+ if (!j->on_network) {
+ *timeout_usec = UINT64_MAX;
+ return 0;
+ }
+
+ /* If we are on the network we need to regularly check for
+ * changes manually */
+
+ *timeout_usec = j->last_process_usec + JOURNAL_FILES_RECHECK_USEC;
+ return 1;
+}
+
+static void process_q_overflow(sd_journal *j) {
+ JournalFile *f;
+ Directory *m;
+
+ assert(j);
+
+ /* When the inotify queue overruns we need to enumerate and re-validate all journal files to bring our list
+ * back in sync with what's on disk. For this we pick a new generation counter value. It'll be assigned to all
+ * journal files we encounter. All journal files and all directories that don't carry it after reenumeration
+ * are subject for unloading. */
+
+ log_debug("Inotify queue overrun, reiterating everything.");
+
+ j->generation++;
+ (void) reiterate_all_paths(j);
+
+ ORDERED_HASHMAP_FOREACH(f, j->files) {
+
+ if (f->last_seen_generation == j->generation)
+ continue;
+
+ log_debug("File '%s' hasn't been seen in this enumeration, removing.", f->path);
+ remove_file_real(j, f);
+ }
+
+ HASHMAP_FOREACH(m, j->directories_by_path) {
+
+ if (m->last_seen_generation == j->generation)
+ continue;
+
+ if (m->is_root) /* Never GC root directories */
+ continue;
+
+ log_debug("Directory '%s' hasn't been seen in this enumeration, removing.", f->path);
+ remove_directory(j, m);
+ }
+
+ log_debug("Reiteration complete.");
+}
+
+static void process_inotify_event(sd_journal *j, const struct inotify_event *e) {
+ Directory *d;
+
+ assert(j);
+ assert(e);
+
+ if (e->mask & IN_Q_OVERFLOW) {
+ process_q_overflow(j);
+ return;
+ }
+
+ /* Is this a subdirectory we watch? */
+ d = hashmap_get(j->directories_by_wd, INT_TO_PTR(e->wd));
+ if (d) {
+ if (!(e->mask & IN_ISDIR) && e->len > 0 &&
+ (endswith(e->name, ".journal") ||
+ endswith(e->name, ".journal~"))) {
+
+ /* Event for a journal file */
+
+ if (e->mask & (IN_CREATE|IN_MOVED_TO|IN_MODIFY|IN_ATTRIB))
+ (void) add_file_by_name(j, d->path, e->name);
+ else if (e->mask & (IN_DELETE|IN_MOVED_FROM|IN_UNMOUNT))
+ (void) remove_file_by_name(j, d->path, e->name);
+
+ } else if (!d->is_root && e->len == 0) {
+
+ /* Event for a subdirectory */
+
+ if (e->mask & (IN_DELETE_SELF|IN_MOVE_SELF|IN_UNMOUNT))
+ remove_directory(j, d);
+
+ } else if (d->is_root && (e->mask & IN_ISDIR) && e->len > 0 && id128_is_valid(e->name)) {
+
+ /* Event for root directory */
+
+ if (e->mask & (IN_CREATE|IN_MOVED_TO|IN_MODIFY|IN_ATTRIB))
+ (void) add_directory(j, d->path, e->name);
+ }
+
+ return;
+ }
+
+ if (e->mask & IN_IGNORED)
+ return;
+
+ log_debug("Unexpected inotify event.");
+}
+
+static int determine_change(sd_journal *j) {
+ bool b;
+
+ assert(j);
+
+ b = j->current_invalidate_counter != j->last_invalidate_counter;
+ j->last_invalidate_counter = j->current_invalidate_counter;
+
+ return b ? SD_JOURNAL_INVALIDATE : SD_JOURNAL_APPEND;
+}
+
+_public_ int sd_journal_process(sd_journal *j) {
+ bool got_something = false;
+
+ assert_return(j, -EINVAL);
+ assert_return(!journal_origin_changed(j), -ECHILD);
+
+ if (j->inotify_fd < 0) /* We have no inotify fd yet? Then there's noting to process. */
+ return 0;
+
+ j->last_process_usec = now(CLOCK_MONOTONIC);
+ j->last_invalidate_counter = j->current_invalidate_counter;
+
+ for (;;) {
+ union inotify_event_buffer buffer;
+ ssize_t l;
+
+ l = read(j->inotify_fd, &buffer, sizeof(buffer));
+ if (l < 0) {
+ if (ERRNO_IS_TRANSIENT(errno))
+ return got_something ? determine_change(j) : SD_JOURNAL_NOP;
+
+ return -errno;
+ }
+
+ got_something = true;
+
+ FOREACH_INOTIFY_EVENT(e, buffer, l)
+ process_inotify_event(j, e);
+ }
+}
+
+_public_ int sd_journal_wait(sd_journal *j, uint64_t timeout_usec) {
+ int r;
+ uint64_t t;
+
+ assert_return(j, -EINVAL);
+ assert_return(!journal_origin_changed(j), -ECHILD);
+
+ if (j->inotify_fd < 0) {
+ JournalFile *f;
+
+ /* This is the first invocation, hence create the inotify watch */
+ r = sd_journal_get_fd(j);
+ if (r < 0)
+ return r;
+
+ /* Server might have done some vacuuming while we weren't watching. Get rid of the deleted
+ * files now so they don't stay around indefinitely. */
+ ORDERED_HASHMAP_FOREACH(f, j->files) {
+ r = journal_file_fstat(f);
+ if (r == -EIDRM)
+ remove_file_real(j, f);
+ else if (r < 0)
+ log_debug_errno(r, "Failed to fstat() journal file '%s', ignoring: %m", f->path);
+ }
+
+ /* The journal might have changed since the context object was created and we weren't
+ * watching before, hence don't wait for anything, and return immediately. */
+ return determine_change(j);
+ }
+
+ r = sd_journal_get_timeout(j, &t);
+ if (r < 0)
+ return r;
+
+ if (t != UINT64_MAX) {
+ t = usec_sub_unsigned(t, now(CLOCK_MONOTONIC));
+
+ if (timeout_usec == UINT64_MAX || timeout_usec > t)
+ timeout_usec = t;
+ }
+
+ do {
+ r = fd_wait_for_event(j->inotify_fd, POLLIN, timeout_usec);
+ } while (r == -EINTR);
+
+ if (r < 0)
+ return r;
+
+ return sd_journal_process(j);
+}
+
+_public_ int sd_journal_get_cutoff_realtime_usec(sd_journal *j, uint64_t *from, uint64_t *to) {
+ JournalFile *f;
+ bool first = true;
+ uint64_t fmin = 0, tmax = 0;
+ int r;
+
+ assert_return(j, -EINVAL);
+ assert_return(!journal_origin_changed(j), -ECHILD);
+ assert_return(from || to, -EINVAL);
+ assert_return(from != to, -EINVAL);
+
+ ORDERED_HASHMAP_FOREACH(f, j->files) {
+ usec_t fr, t;
+
+ r = journal_file_get_cutoff_realtime_usec(f, &fr, &t);
+ if (r == -ENOENT)
+ continue;
+ if (r < 0)
+ return r;
+ if (r == 0)
+ continue;
+
+ if (first) {
+ fmin = fr;
+ tmax = t;
+ first = false;
+ } else {
+ fmin = MIN(fr, fmin);
+ tmax = MAX(t, tmax);
+ }
+ }
+
+ if (from)
+ *from = fmin;
+ if (to)
+ *to = tmax;
+
+ return first ? 0 : 1;
+}
+
+_public_ int sd_journal_get_cutoff_monotonic_usec(
+ sd_journal *j,
+ sd_id128_t boot_id,
+ uint64_t *ret_from,
+ uint64_t *ret_to) {
+
+ uint64_t from = UINT64_MAX, to = UINT64_MAX;
+ bool found = false;
+ JournalFile *f;
+ int r;
+
+ assert_return(j, -EINVAL);
+ assert_return(!journal_origin_changed(j), -ECHILD);
+ assert_return(ret_from != ret_to, -EINVAL);
+
+ ORDERED_HASHMAP_FOREACH(f, j->files) {
+ usec_t ff, tt;
+
+ r = journal_file_get_cutoff_monotonic_usec(f, boot_id, &ff, &tt);
+ if (r == -ENOENT)
+ continue;
+ if (r < 0)
+ return r;
+ if (r == 0)
+ continue;
+
+ if (found) {
+ from = MIN(ff, from);
+ to = MAX(tt, to);
+ } else {
+ from = ff;
+ to = tt;
+ found = true;
+ }
+ }
+
+ if (ret_from)
+ *ret_from = from;
+ if (ret_to)
+ *ret_to = to;
+
+ return found;
+}
+
+void journal_print_header(sd_journal *j) {
+ JournalFile *f;
+ bool newline = false;
+
+ assert(j);
+
+ ORDERED_HASHMAP_FOREACH(f, j->files) {
+ if (newline)
+ putchar('\n');
+ else
+ newline = true;
+
+ journal_file_print_header(f);
+ }
+}
+
+_public_ int sd_journal_get_usage(sd_journal *j, uint64_t *ret) {
+ JournalFile *f;
+ uint64_t sum = 0;
+
+ assert_return(j, -EINVAL);
+ assert_return(!journal_origin_changed(j), -ECHILD);
+ assert_return(ret, -EINVAL);
+
+ ORDERED_HASHMAP_FOREACH(f, j->files) {
+ struct stat st;
+ uint64_t b;
+
+ if (fstat(f->fd, &st) < 0)
+ return -errno;
+
+ b = (uint64_t) st.st_blocks;
+ if (b > UINT64_MAX / 512)
+ return -EOVERFLOW;
+ b *= 512;
+
+ if (sum > UINT64_MAX - b)
+ return -EOVERFLOW;
+ sum += b;
+ }
+
+ *ret = sum;
+ return 0;
+}
+
+_public_ int sd_journal_query_unique(sd_journal *j, const char *field) {
+ int r;
+
+ assert_return(j, -EINVAL);
+ assert_return(!journal_origin_changed(j), -ECHILD);
+
+ if (!field_is_valid(field))
+ return -EINVAL;
+
+ r = free_and_strdup(&j->unique_field, field);
+ if (r < 0)
+ return r;
+
+ j->unique_file = NULL;
+ j->unique_offset = 0;
+ j->unique_file_lost = false;
+
+ return 0;
+}
+
+_public_ int sd_journal_enumerate_unique(
+ sd_journal *j,
+ const void **ret_data,
+ size_t *ret_size) {
+
+ size_t k;
+
+ assert_return(j, -EINVAL);
+ assert_return(!journal_origin_changed(j), -ECHILD);
+ assert_return(j->unique_field, -EINVAL);
+
+ k = strlen(j->unique_field);
+
+ if (!j->unique_file) {
+ if (j->unique_file_lost)
+ return 0;
+
+ j->unique_file = ordered_hashmap_first(j->files);
+ if (!j->unique_file)
+ return 0;
+
+ j->unique_offset = 0;
+ }
+
+ for (;;) {
+ JournalFile *of;
+ Object *o;
+ void *odata;
+ size_t ol;
+ bool found;
+ int r;
+
+ /* Proceed to next data object in the field's linked list */
+ if (j->unique_offset == 0) {
+ r = journal_file_find_field_object(j->unique_file, j->unique_field, k, &o, NULL);
+ if (r < 0)
+ return r;
+
+ j->unique_offset = r > 0 ? le64toh(o->field.head_data_offset) : 0;
+ } else {
+ r = journal_file_move_to_object(j->unique_file, OBJECT_DATA, j->unique_offset, &o);
+ if (r < 0)
+ return r;
+
+ j->unique_offset = le64toh(o->data.next_field_offset);
+ }
+
+ /* We reached the end of the list? Then start again, with the next file */
+ if (j->unique_offset == 0) {
+ j->unique_file = ordered_hashmap_next(j->files, j->unique_file->path);
+ if (!j->unique_file)
+ return 0;
+
+ continue;
+ }
+
+ r = journal_file_move_to_object(j->unique_file, OBJECT_DATA, j->unique_offset, &o);
+ if (r < 0)
+ return r;
+
+ /* Let's pin the data object, so we can look at it at the same time as one on another file. */
+ r = journal_file_pin_object(j->unique_file, o);
+ if (r < 0)
+ return r;
+
+ r = journal_file_data_payload(j->unique_file, o, j->unique_offset, NULL, 0,
+ j->data_threshold, &odata, &ol);
+ if (r < 0)
+ return r;
+
+ /* Check if we have at least the field name and "=". */
+ if (ol <= k)
+ return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
+ "%s:offset " OFSfmt ": object has size %zu, expected at least %zu",
+ j->unique_file->path,
+ j->unique_offset, ol, k + 1);
+
+ if (memcmp(odata, j->unique_field, k) != 0 || ((const char*) odata)[k] != '=')
+ return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
+ "%s:offset " OFSfmt ": object does not start with \"%s=\"",
+ j->unique_file->path,
+ j->unique_offset,
+ j->unique_field);
+
+ /* OK, now let's see if we already returned this data object by checking if it exists in the
+ * earlier traversed files. */
+ found = false;
+ ORDERED_HASHMAP_FOREACH(of, j->files) {
+ if (of == j->unique_file)
+ break;
+
+ /* Skip this file it didn't have any fields indexed */
+ if (JOURNAL_HEADER_CONTAINS(of->header, n_fields) && le64toh(of->header->n_fields) <= 0)
+ continue;
+
+ /* We can reuse the hash from our current file only on old-style journal files
+ * without keyed hashes. On new-style files we have to calculate the hash anew, to
+ * take the per-file hash seed into consideration. */
+ if (!JOURNAL_HEADER_KEYED_HASH(j->unique_file->header) && !JOURNAL_HEADER_KEYED_HASH(of->header))
+ r = journal_file_find_data_object_with_hash(of, odata, ol, le64toh(o->data.hash), NULL, NULL);
+ else
+ r = journal_file_find_data_object(of, odata, ol, NULL, NULL);
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ found = true;
+ break;
+ }
+ }
+
+ if (found)
+ continue;
+
+ *ret_data = odata;
+ *ret_size = ol;
+
+ return 1;
+ }
+}
+
+_public_ int sd_journal_enumerate_available_unique(sd_journal *j, const void **data, size_t *size) {
+ for (;;) {
+ int r;
+
+ r = sd_journal_enumerate_unique(j, data, size);
+ if (r >= 0)
+ return r;
+ if (!JOURNAL_ERRNO_IS_UNAVAILABLE_FIELD(r))
+ return r;
+ /* Try with the next field. sd_journal_enumerate_unique() modifies state, so on the next try
+ * we will access the next field. */
+ }
+}
+
+_public_ void sd_journal_restart_unique(sd_journal *j) {
+ if (!j || journal_origin_changed(j))
+ return;
+
+ j->unique_file = NULL;
+ j->unique_offset = 0;
+ j->unique_file_lost = false;
+}
+
+_public_ int sd_journal_enumerate_fields(sd_journal *j, const char **field) {
+ int r;
+
+ assert_return(j, -EINVAL);
+ assert_return(!journal_origin_changed(j), -ECHILD);
+ assert_return(field, -EINVAL);
+
+ if (!j->fields_file) {
+ if (j->fields_file_lost)
+ return 0;
+
+ j->fields_file = ordered_hashmap_first(j->files);
+ if (!j->fields_file)
+ return 0;
+
+ j->fields_hash_table_index = 0;
+ j->fields_offset = 0;
+ }
+
+ for (;;) {
+ JournalFile *f, *of;
+ uint64_t m;
+ Object *o;
+ size_t sz;
+ bool found;
+
+ f = j->fields_file;
+
+ if (j->fields_offset == 0) {
+ bool eof = false;
+
+ /* We are not yet positioned at any field. Let's pick the first one */
+ r = journal_file_map_field_hash_table(f);
+ if (r < 0)
+ return r;
+
+ m = le64toh(f->header->field_hash_table_size) / sizeof(HashItem);
+ for (;;) {
+ if (j->fields_hash_table_index >= m) {
+ /* Reached the end of the hash table, go to the next file. */
+ eof = true;
+ break;
+ }
+
+ j->fields_offset = le64toh(f->field_hash_table[j->fields_hash_table_index].head_hash_offset);
+
+ if (j->fields_offset != 0)
+ break;
+
+ /* Empty hash table bucket, go to next one */
+ j->fields_hash_table_index++;
+ }
+
+ if (eof) {
+ /* Proceed with next file */
+ j->fields_file = ordered_hashmap_next(j->files, f->path);
+ if (!j->fields_file) {
+ *field = NULL;
+ return 0;
+ }
+
+ j->fields_offset = 0;
+ j->fields_hash_table_index = 0;
+ continue;
+ }
+
+ } else {
+ /* We are already positioned at a field. If so, let's figure out the next field from it */
+
+ r = journal_file_move_to_object(f, OBJECT_FIELD, j->fields_offset, &o);
+ if (r < 0)
+ return r;
+
+ j->fields_offset = le64toh(o->field.next_hash_offset);
+ if (j->fields_offset == 0) {
+ /* Reached the end of the hash table chain */
+ j->fields_hash_table_index++;
+ continue;
+ }
+ }
+
+ /* We use OBJECT_UNUSED here, so that the iterator below doesn't remove our mmap window */
+ r = journal_file_move_to_object(f, OBJECT_UNUSED, j->fields_offset, &o);
+ if (r < 0)
+ return r;
+
+ /* Because we used OBJECT_UNUSED above, we need to do our type check manually */
+ if (o->object.type != OBJECT_FIELD)
+ return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
+ "%s:offset " OFSfmt ": object has type %i, expected %i",
+ f->path, j->fields_offset,
+ o->object.type, OBJECT_FIELD);
+
+ sz = le64toh(o->object.size) - offsetof(Object, field.payload);
+
+ /* Let's see if we already returned this field name before. */
+ found = false;
+ ORDERED_HASHMAP_FOREACH(of, j->files) {
+ if (of == f)
+ break;
+
+ /* Skip this file it didn't have any fields indexed */
+ if (JOURNAL_HEADER_CONTAINS(of->header, n_fields) && le64toh(of->header->n_fields) <= 0)
+ continue;
+
+ if (!JOURNAL_HEADER_KEYED_HASH(f->header) && !JOURNAL_HEADER_KEYED_HASH(of->header))
+ r = journal_file_find_field_object_with_hash(of, o->field.payload, sz,
+ le64toh(o->field.hash), NULL, NULL);
+ else
+ r = journal_file_find_field_object(of, o->field.payload, sz, NULL, NULL);
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ found = true;
+ break;
+ }
+ }
+
+ if (found)
+ continue;
+
+ /* Check if this is really a valid string containing no NUL byte */
+ if (memchr(o->field.payload, 0, sz))
+ return -EBADMSG;
+
+ if (j->data_threshold > 0 && sz > j->data_threshold)
+ sz = j->data_threshold;
+
+ if (!GREEDY_REALLOC(j->fields_buffer, sz + 1))
+ return -ENOMEM;
+
+ memcpy(j->fields_buffer, o->field.payload, sz);
+ j->fields_buffer[sz] = 0;
+
+ if (!field_is_valid(j->fields_buffer))
+ return -EBADMSG;
+
+ *field = j->fields_buffer;
+ return 1;
+ }
+}
+
+_public_ void sd_journal_restart_fields(sd_journal *j) {
+ if (!j || journal_origin_changed(j))
+ return;
+
+ j->fields_file = NULL;
+ j->fields_hash_table_index = 0;
+ j->fields_offset = 0;
+ j->fields_file_lost = false;
+}
+
+_public_ int sd_journal_reliable_fd(sd_journal *j) {
+ assert_return(j, -EINVAL);
+ assert_return(!journal_origin_changed(j), -ECHILD);
+
+ return !j->on_network;
+}
+
+static char *lookup_field(const char *field, void *userdata) {
+ sd_journal *j = ASSERT_PTR(userdata);
+ const void *data;
+ size_t size, d;
+ int r;
+
+ assert(field);
+
+ r = sd_journal_get_data(j, field, &data, &size);
+ if (r < 0 ||
+ size > REPLACE_VAR_MAX)
+ return strdup(field);
+
+ d = strlen(field) + 1;
+
+ return strndup((const char*) data + d, size - d);
+}
+
+_public_ int sd_journal_get_catalog(sd_journal *j, char **ret) {
+ const void *data;
+ size_t size;
+ sd_id128_t id;
+ _cleanup_free_ char *text = NULL, *cid = NULL;
+ char *t;
+ int r;
+
+ assert_return(j, -EINVAL);
+ assert_return(!journal_origin_changed(j), -ECHILD);
+ assert_return(ret, -EINVAL);
+
+ r = sd_journal_get_data(j, "MESSAGE_ID", &data, &size);
+ if (r < 0)
+ return r;
+
+ cid = strndup((const char*) data + 11, size - 11);
+ if (!cid)
+ return -ENOMEM;
+
+ r = sd_id128_from_string(cid, &id);
+ if (r < 0)
+ return r;
+
+ r = catalog_get(secure_getenv("SYSTEMD_CATALOG") ?: CATALOG_DATABASE, id, &text);
+ if (r < 0)
+ return r;
+
+ t = replace_var(text, lookup_field, j);
+ if (!t)
+ return -ENOMEM;
+
+ *ret = t;
+ return 0;
+}
+
+_public_ int sd_journal_get_catalog_for_message_id(sd_id128_t id, char **ret) {
+ assert_return(ret, -EINVAL);
+
+ return catalog_get(CATALOG_DATABASE, id, ret);
+}
+
+_public_ int sd_journal_set_data_threshold(sd_journal *j, size_t sz) {
+ assert_return(j, -EINVAL);
+ assert_return(!journal_origin_changed(j), -ECHILD);
+
+ j->data_threshold = sz;
+ return 0;
+}
+
+_public_ int sd_journal_get_data_threshold(sd_journal *j, size_t *sz) {
+ assert_return(j, -EINVAL);
+ assert_return(!journal_origin_changed(j), -ECHILD);
+ assert_return(sz, -EINVAL);
+
+ *sz = j->data_threshold;
+ return 0;
+}
+
+_public_ int sd_journal_has_runtime_files(sd_journal *j) {
+ assert_return(j, -EINVAL);
+
+ return j->has_runtime_files;
+}
+
+_public_ int sd_journal_has_persistent_files(sd_journal *j) {
+ assert_return(j, -EINVAL);
+
+ return j->has_persistent_files;
+}
diff --git a/src/libsystemd/sd-journal/test-audit-type.c b/src/libsystemd/sd-journal/test-audit-type.c
new file mode 100644
index 0000000..1d5003b
--- /dev/null
+++ b/src/libsystemd/sd-journal/test-audit-type.c
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <stdio.h>
+#include <linux/audit.h>
+
+#include "audit-type.h"
+#include "tests.h"
+
+static void print_audit_label(int i) {
+ const char *name;
+
+ name = audit_type_name_alloca(i);
+ /* This is a separate function only because of alloca */
+ printf("%i → %s → %s\n", i, audit_type_to_string(i), name);
+}
+
+TEST(audit_type) {
+ int i;
+
+ for (i = 0; i <= AUDIT_KERNEL; i++)
+ print_audit_label(i);
+}
+
+DEFINE_TEST_MAIN(LOG_INFO);
diff --git a/src/libsystemd/sd-journal/test-catalog.c b/src/libsystemd/sd-journal/test-catalog.c
new file mode 100644
index 0000000..603952e
--- /dev/null
+++ b/src/libsystemd/sd-journal/test-catalog.c
@@ -0,0 +1,235 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <locale.h>
+#include <unistd.h>
+
+#include "sd-messages.h"
+
+#include "alloc-util.h"
+#include "catalog.h"
+#include "fd-util.h"
+#include "fs-util.h"
+#include "log.h"
+#include "macro.h"
+#include "path-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "tests.h"
+#include "tmpfile-util.h"
+
+static char** catalog_dirs = NULL;
+static const char *no_catalog_dirs[] = {
+ "/bin/hopefully/with/no/catalog",
+ NULL
+};
+
+static OrderedHashmap* test_import(const char* contents, ssize_t size, int code) {
+ _cleanup_(unlink_tempfilep) char name[] = "/tmp/test-catalog.XXXXXX";
+ _cleanup_close_ int fd = -EBADF;
+ OrderedHashmap *h;
+
+ if (size < 0)
+ size = strlen(contents);
+
+ assert_se(h = ordered_hashmap_new(&catalog_hash_ops));
+
+ fd = mkostemp_safe(name);
+ assert_se(fd >= 0);
+ assert_se(write(fd, contents, size) == size);
+
+ assert_se(catalog_import_file(h, name) == code);
+
+ return h;
+}
+
+static void test_catalog_import_invalid(void) {
+ _cleanup_ordered_hashmap_free_free_free_ OrderedHashmap *h = NULL;
+
+ h = test_import("xxx", -1, -EINVAL);
+ assert_se(ordered_hashmap_isempty(h));
+}
+
+static void test_catalog_import_badid(void) {
+ _unused_ _cleanup_ordered_hashmap_free_free_free_ OrderedHashmap *h = NULL;
+ const char *input =
+"-- 0027229ca0644181a76c4e92458afaff dededededededededededededededede\n" \
+"Subject: message\n" \
+"\n" \
+"payload\n";
+ h = test_import(input, -1, -EINVAL);
+}
+
+static void test_catalog_import_one(void) {
+ _cleanup_ordered_hashmap_free_free_free_ OrderedHashmap *h = NULL;
+ char *payload;
+
+ const char *input =
+"-- 0027229ca0644181a76c4e92458afaff dededededededededededededededed\n" \
+"Subject: message\n" \
+"\n" \
+"payload\n";
+ const char *expect =
+"Subject: message\n" \
+"\n" \
+"payload\n";
+
+ h = test_import(input, -1, 0);
+ assert_se(ordered_hashmap_size(h) == 1);
+
+ ORDERED_HASHMAP_FOREACH(payload, h) {
+ printf("expect: %s\n", expect);
+ printf("actual: %s\n", payload);
+ assert_se(streq(expect, payload));
+ }
+}
+
+static void test_catalog_import_merge(void) {
+ _cleanup_ordered_hashmap_free_free_free_ OrderedHashmap *h = NULL;
+ char *payload;
+
+ const char *input =
+"-- 0027229ca0644181a76c4e92458afaff dededededededededededededededed\n" \
+"Subject: message\n" \
+"Defined-By: me\n" \
+"\n" \
+"payload\n" \
+"\n" \
+"-- 0027229ca0644181a76c4e92458afaff dededededededededededededededed\n" \
+"Subject: override subject\n" \
+"X-Header: hello\n" \
+"\n" \
+"override payload\n";
+
+ const char *combined =
+"Subject: override subject\n" \
+"X-Header: hello\n" \
+"Subject: message\n" \
+"Defined-By: me\n" \
+"\n" \
+"override payload\n";
+
+ h = test_import(input, -1, 0);
+ assert_se(ordered_hashmap_size(h) == 1);
+
+ ORDERED_HASHMAP_FOREACH(payload, h)
+ assert_se(streq(combined, payload));
+}
+
+static void test_catalog_import_merge_no_body(void) {
+ _cleanup_ordered_hashmap_free_free_free_ OrderedHashmap *h = NULL;
+ char *payload;
+
+ const char *input =
+"-- 0027229ca0644181a76c4e92458afaff dededededededededededededededed\n" \
+"Subject: message\n" \
+"Defined-By: me\n" \
+"\n" \
+"payload\n" \
+"\n" \
+"-- 0027229ca0644181a76c4e92458afaff dededededededededededededededed\n" \
+"Subject: override subject\n" \
+"X-Header: hello\n" \
+"\n";
+
+ const char *combined =
+"Subject: override subject\n" \
+"X-Header: hello\n" \
+"Subject: message\n" \
+"Defined-By: me\n" \
+"\n" \
+"payload\n";
+
+ h = test_import(input, -1, 0);
+ assert_se(ordered_hashmap_size(h) == 1);
+
+ ORDERED_HASHMAP_FOREACH(payload, h)
+ assert_se(streq(combined, payload));
+}
+
+static void test_catalog_update(const char *database) {
+ int r;
+
+ /* Test what happens if there are no files. */
+ r = catalog_update(database, NULL, NULL);
+ assert_se(r == 0);
+
+ /* Test what happens if there are no files in the directory. */
+ r = catalog_update(database, NULL, no_catalog_dirs);
+ assert_se(r == 0);
+
+ /* Make sure that we at least have some files loaded or the
+ * catalog_list below will fail. */
+ r = catalog_update(database, NULL, (const char * const *) catalog_dirs);
+ assert_se(r == 0);
+}
+
+static void test_catalog_file_lang(void) {
+ _cleanup_free_ char *lang = NULL, *lang2 = NULL, *lang3 = NULL, *lang4 = NULL;
+
+ assert_se(catalog_file_lang("systemd.de_DE.catalog", &lang) == 1);
+ assert_se(streq(lang, "de_DE"));
+
+ assert_se(catalog_file_lang("systemd..catalog", &lang2) == 0);
+ assert_se(lang2 == NULL);
+
+ assert_se(catalog_file_lang("systemd.fr.catalog", &lang2) == 1);
+ assert_se(streq(lang2, "fr"));
+
+ assert_se(catalog_file_lang("systemd.fr.catalog.gz", &lang3) == 0);
+ assert_se(lang3 == NULL);
+
+ assert_se(catalog_file_lang("systemd.01234567890123456789012345678901.catalog", &lang3) == 0);
+ assert_se(lang3 == NULL);
+
+ assert_se(catalog_file_lang("systemd.0123456789012345678901234567890.catalog", &lang3) == 1);
+ assert_se(streq(lang3, "0123456789012345678901234567890"));
+
+ assert_se(catalog_file_lang("/x/y/systemd.catalog", &lang4) == 0);
+ assert_se(lang4 == NULL);
+
+ assert_se(catalog_file_lang("/x/y/systemd.ru_RU.catalog", &lang4) == 1);
+ assert_se(streq(lang4, "ru_RU"));
+}
+
+int main(int argc, char *argv[]) {
+ _cleanup_(unlink_tempfilep) char database[] = "/tmp/test-catalog.XXXXXX";
+ _cleanup_close_ int fd = -EBADF;
+ _cleanup_free_ char *text = NULL;
+ int r;
+
+ setlocale(LC_ALL, "de_DE.UTF-8");
+
+ test_setup_logging(LOG_DEBUG);
+
+ /* If test-catalog is located at the build directory, then use catalogs in that.
+ * If it is not, e.g. installed by systemd-tests package, then use installed catalogs. */
+ catalog_dirs = STRV_MAKE(get_catalog_dir());
+
+ assert_se(access(catalog_dirs[0], F_OK) >= 0);
+ log_notice("Using catalog directory '%s'", catalog_dirs[0]);
+
+ test_catalog_file_lang();
+
+ test_catalog_import_invalid();
+ test_catalog_import_badid();
+ test_catalog_import_one();
+ test_catalog_import_merge();
+ test_catalog_import_merge_no_body();
+
+ assert_se((fd = mkostemp_safe(database)) >= 0);
+
+ test_catalog_update(database);
+
+ r = catalog_list(stdout, database, true);
+ assert_se(r >= 0);
+
+ r = catalog_list(stdout, database, false);
+ assert_se(r >= 0);
+
+ assert_se(catalog_get(database, SD_MESSAGE_COREDUMP, &text) >= 0);
+ printf(">>>%s<<<\n", text);
+
+ return 0;
+}
diff --git a/src/libsystemd/sd-journal/test-journal-append.c b/src/libsystemd/sd-journal/test-journal-append.c
new file mode 100644
index 0000000..24b98c8
--- /dev/null
+++ b/src/libsystemd/sd-journal/test-journal-append.c
@@ -0,0 +1,269 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <fcntl.h>
+#include <getopt.h>
+#include <stdio.h>
+#include <unistd.h>
+
+#include "chattr-util.h"
+#include "fd-util.h"
+#include "fs-util.h"
+#include "iovec-util.h"
+#include "journal-file-util.h"
+#include "log.h"
+#include "mmap-cache.h"
+#include "parse-util.h"
+#include "random-util.h"
+#include "rm-rf.h"
+#include "strv.h"
+#include "terminal-util.h"
+#include "tests.h"
+#include "tmpfile-util.h"
+
+static int journal_append_message(JournalFile *mj, const char *message) {
+ struct iovec iovec;
+ struct dual_timestamp ts;
+
+ assert(mj);
+ assert(message);
+
+ dual_timestamp_now(&ts);
+ iovec = IOVEC_MAKE_STRING(message);
+ return journal_file_append_entry(
+ mj,
+ &ts,
+ /* boot_id= */ NULL,
+ &iovec,
+ /* n_iovec= */ 1,
+ /* seqnum= */ NULL,
+ /* seqnum_id= */ NULL,
+ /* ret_object= */ NULL,
+ /* ret_offset= */ NULL);
+}
+
+static int journal_corrupt_and_append(uint64_t start_offset, uint64_t step) {
+ _cleanup_(mmap_cache_unrefp) MMapCache *mmap_cache = NULL;
+ _cleanup_(rm_rf_physical_and_freep) char *tempdir = NULL;
+ _cleanup_(journal_file_offline_closep) JournalFile *mj = NULL;
+ uint64_t start, end;
+ int r;
+
+ mmap_cache = mmap_cache_new();
+ assert_se(mmap_cache);
+
+ /* journal_file_open() requires a valid machine id */
+ if (sd_id128_get_machine(NULL) < 0)
+ return log_tests_skipped("No valid machine ID found");
+
+ assert_se(mkdtemp_malloc("/tmp/journal-append-XXXXXX", &tempdir) >= 0);
+ assert_se(chdir(tempdir) >= 0);
+ (void) chattr_path(tempdir, FS_NOCOW_FL, FS_NOCOW_FL, NULL);
+
+ log_debug("Opening journal %s/system.journal", tempdir);
+
+ r = journal_file_open(
+ /* fd= */ -1,
+ "system.journal",
+ O_RDWR|O_CREAT,
+ JOURNAL_COMPRESS,
+ 0644,
+ /* compress_threshold_bytes= */ UINT64_MAX,
+ /* metrics= */ NULL,
+ mmap_cache,
+ /* template= */ NULL,
+ &mj);
+ if (r < 0)
+ return log_error_errno(r, "Failed to open the journal: %m");
+
+ assert_se(mj);
+
+ /* Add a couple of initial messages */
+ for (int i = 0; i < 10; i++) {
+ _cleanup_free_ char *message = NULL;
+
+ assert_se(asprintf(&message, "MESSAGE=Initial message %d", i) >= 0);
+ r = journal_append_message(mj, message);
+ if (r < 0)
+ return log_error_errno(r, "Failed to write to the journal: %m");
+ }
+
+ start = start_offset == UINT64_MAX ? random_u64() % mj->last_stat.st_size : start_offset;
+ end = (uint64_t) mj->last_stat.st_size;
+
+ /* Print the initial offset at which we start flipping bits, which can be
+ * later used to reproduce a potential fail */
+ log_info("Start offset: %" PRIu64 ", corrupt-step: %" PRIu64, start, step);
+ fflush(stdout);
+
+ if (start >= end)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Start offset >= journal size, can't continue");
+
+ for (uint64_t offset = start; offset < end; offset += step) {
+ _cleanup_free_ char *message = NULL;
+ uint8_t b;
+
+ /* Flip a bit in the journal file */
+ r = pread(mj->fd, &b, 1, offset);
+ assert_se(r == 1);
+ b |= 0x1;
+ r = pwrite(mj->fd, &b, 1, offset);
+ assert_se(r == 1);
+
+ /* Close and reopen the journal to flush all caches and remap
+ * the corrupted journal */
+ mj = journal_file_offline_close(mj);
+ r = journal_file_open(
+ /* fd= */ -1,
+ "system.journal",
+ O_RDWR|O_CREAT,
+ JOURNAL_COMPRESS,
+ 0644,
+ /* compress_threshold_bytes= */ UINT64_MAX,
+ /* metrics= */ NULL,
+ mmap_cache,
+ /* template= */ NULL,
+ &mj);
+ if (r < 0) {
+ /* The corrupted journal might get rejected during reopening
+ * if it's corrupted enough (especially its header), so
+ * treat this as a success if it doesn't crash */
+ log_info_errno(r, "Failed to reopen the journal: %m");
+ break;
+ }
+
+ /* Try to write something to the (possibly corrupted) journal */
+ assert_se(asprintf(&message, "MESSAGE=Hello world %" PRIu64, offset) >= 0);
+ r = journal_append_message(mj, message);
+ if (r < 0) {
+ /* We care only about crashes or sanitizer errors,
+ * failed write without any crash is a success */
+ log_info_errno(r, "Failed to write to the journal: %m");
+ break;
+ }
+ }
+
+ return 0;
+}
+
+int main(int argc, char *argv[]) {
+ uint64_t start_offset = UINT64_MAX;
+ uint64_t iterations = 100;
+ uint64_t iteration_step = 1;
+ uint64_t corrupt_step = 31;
+ bool sequential = false, run_one = false;
+ int c, r;
+
+ test_setup_logging(LOG_DEBUG);
+
+ enum {
+ ARG_START_OFFSET = 0x1000,
+ ARG_ITERATIONS,
+ ARG_ITERATION_STEP,
+ ARG_CORRUPT_STEP,
+ ARG_SEQUENTIAL,
+ ARG_RUN_ONE,
+ };
+
+ static const struct option options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "start-offset", required_argument, NULL, ARG_START_OFFSET },
+ { "iterations", required_argument, NULL, ARG_ITERATIONS },
+ { "iteration-step", required_argument, NULL, ARG_ITERATION_STEP },
+ { "corrupt-step", required_argument, NULL, ARG_CORRUPT_STEP },
+ { "sequential", no_argument, NULL, ARG_SEQUENTIAL },
+ { "run-one", required_argument, NULL, ARG_RUN_ONE },
+ {}
+ };
+
+ assert_se(argc >= 0);
+ assert_se(argv);
+
+ while ((c = getopt_long(argc, argv, "h", options, NULL)) >= 0)
+ switch (c) {
+
+ case 'h':
+ printf("Syntax:\n"
+ " %s [OPTION...]\n"
+ "Options:\n"
+ " --start-offset=OFFSET Offset at which to start corrupting the journal\n"
+ " (default: random offset is picked, unless\n"
+ " --sequential is used - in that case we use 0 + iteration)\n"
+ " --iterations=ITER Number of iterations to perform before exiting\n"
+ " (default: 100)\n"
+ " --iteration-step=STEP Iteration step (default: 1)\n"
+ " --corrupt-step=STEP Corrupt every n-th byte starting from OFFSET (default: 31)\n"
+ " --sequential Go through offsets sequentially instead of picking\n"
+ " a random one on each iteration. If set, we go through\n"
+ " offsets <0; ITER), or <OFFSET, ITER) if --start-offset=\n"
+ " is set (default: false)\n"
+ " --run-one=OFFSET Single shot mode for reproducing issues. Takes the same\n"
+ " offset as --start-offset= and does only one iteration\n"
+ , program_invocation_short_name);
+ return 0;
+
+ case ARG_START_OFFSET:
+ r = safe_atou64(optarg, &start_offset);
+ if (r < 0)
+ return log_error_errno(r, "Invalid starting offset: %m");
+ break;
+
+ case ARG_ITERATIONS:
+ r = safe_atou64(optarg, &iterations);
+ if (r < 0)
+ return log_error_errno(r, "Invalid value for iterations: %m");
+ break;
+
+ case ARG_CORRUPT_STEP:
+ r = safe_atou64(optarg, &corrupt_step);
+ if (r < 0)
+ return log_error_errno(r, "Invalid value for corrupt-step: %m");
+ break;
+
+ case ARG_ITERATION_STEP:
+ r = safe_atou64(optarg, &iteration_step);
+ if (r < 0)
+ return log_error_errno(r, "Invalid value for iteration-step: %m");
+ break;
+
+ case ARG_SEQUENTIAL:
+ sequential = true;
+ break;
+
+ case ARG_RUN_ONE:
+ r = safe_atou64(optarg, &start_offset);
+ if (r < 0)
+ return log_error_errno(r, "Invalid offset: %m");
+
+ run_one = true;
+ break;
+
+ case '?':
+ return -EINVAL;
+
+ default:
+ assert_not_reached();
+ }
+
+ if (run_one)
+ /* Reproducer mode */
+ return journal_corrupt_and_append(start_offset, corrupt_step);
+
+ for (uint64_t i = 0; i < iterations; i++) {
+ uint64_t offset = UINT64_MAX;
+
+ log_info("Iteration #%" PRIu64 ", step: %" PRIu64, i, iteration_step);
+
+ if (sequential)
+ offset = (start_offset == UINT64_MAX ? 0 : start_offset) + i * iteration_step;
+
+ r = journal_corrupt_and_append(offset, corrupt_step);
+ if (r < 0)
+ return EXIT_FAILURE;
+ if (r > 0)
+ /* Reached the end of the journal file */
+ break;
+ }
+
+ return EXIT_SUCCESS;
+}
diff --git a/src/libsystemd/sd-journal/test-journal-enum.c b/src/libsystemd/sd-journal/test-journal-enum.c
new file mode 100644
index 0000000..03fe8e2
--- /dev/null
+++ b/src/libsystemd/sd-journal/test-journal-enum.c
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <stdio.h>
+
+#include "sd-journal.h"
+
+#include "journal-internal.h"
+#include "log.h"
+#include "macro.h"
+#include "tests.h"
+
+int main(int argc, char *argv[]) {
+ unsigned n = 0;
+ _cleanup_(sd_journal_closep) sd_journal *j = NULL;
+
+ test_setup_logging(LOG_DEBUG);
+
+ assert_se(sd_journal_open(&j, SD_JOURNAL_LOCAL_ONLY) >= 0);
+
+ assert_se(sd_journal_add_match(j, "_TRANSPORT=syslog", 0) >= 0);
+ assert_se(sd_journal_add_match(j, "_UID=0", 0) >= 0);
+
+ SD_JOURNAL_FOREACH_BACKWARDS(j) {
+ const void *d;
+ size_t l;
+
+ assert_se(sd_journal_get_data(j, "MESSAGE", &d, &l) >= 0);
+
+ printf("%.*s\n", (int) l, (char*) d);
+
+ n++;
+ if (n >= 10)
+ break;
+ }
+
+ return 0;
+}
diff --git a/src/libsystemd/sd-journal/test-journal-file.c b/src/libsystemd/sd-journal/test-journal-file.c
new file mode 100644
index 0000000..729de1f
--- /dev/null
+++ b/src/libsystemd/sd-journal/test-journal-file.c
@@ -0,0 +1,45 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include "journal-file.h"
+#include "tests.h"
+#include "user-util.h"
+
+static void test_journal_file_parse_uid_from_filename_simple(
+ const char *path,
+ uid_t expected_uid,
+ int expected_error) {
+
+ uid_t uid = UID_INVALID;
+ int r;
+
+ log_info("testing %s", path);
+
+ r = journal_file_parse_uid_from_filename(path, &uid);
+ assert_se(r == expected_error);
+ if (r < 0)
+ assert_se(uid == UID_INVALID);
+ else
+ assert_se(uid == expected_uid);
+}
+
+TEST(journal_file_parse_uid_from_filename) {
+
+ test_journal_file_parse_uid_from_filename_simple("/var/log/journal/", 0, -EISDIR);
+
+ /* The helper should return -EREMOTE for any filenames that don't look like an online or offline user
+ * journals. This includes archived and disposed journal files. */
+ test_journal_file_parse_uid_from_filename_simple("/etc/password", 0, -EREMOTE);
+ test_journal_file_parse_uid_from_filename_simple("system.journal", 0, -EREMOTE);
+ test_journal_file_parse_uid_from_filename_simple("user-1000@0005d26980bdce6e-2f2a4939583822ef.journal~", 0, -EREMOTE);
+ test_journal_file_parse_uid_from_filename_simple("user-1000@xxx-yyy-zzz.journal", 0, -EREMOTE);
+
+ test_journal_file_parse_uid_from_filename_simple("user-1000.journal", 1000, 0);
+ test_journal_file_parse_uid_from_filename_simple("user-foo.journal", 0, -EINVAL);
+ test_journal_file_parse_uid_from_filename_simple("user-65535.journal", 0, -ENXIO);
+}
+
+DEFINE_TEST_MAIN(LOG_INFO);
diff --git a/src/libsystemd/sd-journal/test-journal-flush.c b/src/libsystemd/sd-journal/test-journal-flush.c
new file mode 100644
index 0000000..3f07835
--- /dev/null
+++ b/src/libsystemd/sd-journal/test-journal-flush.c
@@ -0,0 +1,118 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <fcntl.h>
+#include <unistd.h>
+
+#include "sd-journal.h"
+
+#include "alloc-util.h"
+#include "chattr-util.h"
+#include "journal-file-util.h"
+#include "journal-internal.h"
+#include "logs-show.h"
+#include "macro.h"
+#include "path-util.h"
+#include "rm-rf.h"
+#include "string-util.h"
+#include "tests.h"
+#include "tmpfile-util.h"
+
+static void test_journal_flush_one(int argc, char *argv[]) {
+ _cleanup_(mmap_cache_unrefp) MMapCache *m = NULL;
+ _cleanup_free_ char *fn = NULL;
+ _cleanup_(rm_rf_physical_and_freep) char *dn = NULL;
+ _cleanup_(journal_file_offline_closep) JournalFile *new_journal = NULL;
+ _cleanup_(sd_journal_closep) sd_journal *j = NULL;
+ unsigned n, limit;
+ int r;
+
+ assert_se(m = mmap_cache_new());
+ assert_se(mkdtemp_malloc("/var/tmp/test-journal-flush.XXXXXX", &dn) >= 0);
+ (void) chattr_path(dn, FS_NOCOW_FL, FS_NOCOW_FL, NULL);
+
+ assert_se(fn = path_join(dn, "test.journal"));
+
+ r = journal_file_open(-1, fn, O_CREAT|O_RDWR, 0, 0644, 0, NULL, m, NULL, &new_journal);
+ assert_se(r >= 0);
+
+ if (argc > 1)
+ r = sd_journal_open_files(&j, (const char **) strv_skip(argv, 1), 0);
+ else
+ r = sd_journal_open(&j, 0);
+ assert_se(r == 0);
+
+ sd_journal_set_data_threshold(j, 0);
+
+ n = 0;
+ limit = slow_tests_enabled() ? 10000 : 1000;
+ SD_JOURNAL_FOREACH(j) {
+ Object *o;
+ JournalFile *f;
+
+ f = j->current_file;
+ assert_se(f && f->current_offset > 0);
+
+ r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
+ if (r < 0)
+ log_error_errno(r, "journal_file_move_to_object failed: %m");
+ assert_se(r >= 0);
+
+ r = journal_file_copy_entry(f, new_journal, o, f->current_offset, NULL, NULL);
+ if (r < 0)
+ log_warning_errno(r, "journal_file_copy_entry failed: %m");
+ assert_se(r >= 0 ||
+ IN_SET(r, -EBADMSG, /* corrupted file */
+ -EPROTONOSUPPORT, /* unsupported compression */
+ -EIO, /* file rotated */
+ -EREMCHG)); /* clock rollback */
+
+ if (++n >= limit)
+ break;
+ }
+
+ if (n == 0)
+ return (void) log_tests_skipped("No journal entry found");
+
+ /* Open the new journal before archiving and offlining the file. */
+ sd_journal_close(j);
+ assert_se(sd_journal_open_directory(&j, dn, 0) >= 0);
+
+ /* Read the online journal. */
+ assert_se(sd_journal_seek_tail(j) >= 0);
+ assert_se(sd_journal_step_one(j, 0) > 0);
+ printf("current_journal: %s (%i)\n", j->current_file->path, j->current_file->fd);
+ assert_se(show_journal_entry(stdout, j, OUTPUT_EXPORT, 0, 0, NULL, NULL, NULL, &(dual_timestamp) {}, &(sd_id128_t) {}) >= 0);
+
+ uint64_t p;
+ assert_se(journal_file_tail_end_by_mmap(j->current_file, &p) >= 0);
+ for (uint64_t q = ALIGN64(p + 1); q < (uint64_t) j->current_file->last_stat.st_size; q = ALIGN64(q + 1)) {
+ Object *o;
+
+ r = journal_file_move_to_object(j->current_file, OBJECT_UNUSED, q, &o);
+ assert_se(IN_SET(r, -EBADMSG, -EADDRNOTAVAIL));
+ }
+
+ /* Archive and offline file. */
+ assert_se(journal_file_archive(new_journal, NULL) >= 0);
+ assert_se(journal_file_set_offline(new_journal, /* wait = */ true) >= 0);
+
+ /* Read the archived and offline journal. */
+ for (uint64_t q = ALIGN64(p + 1); q < (uint64_t) j->current_file->last_stat.st_size; q = ALIGN64(q + 1)) {
+ Object *o;
+
+ r = journal_file_move_to_object(j->current_file, OBJECT_UNUSED, q, &o);
+ assert_se(IN_SET(r, -EBADMSG, -EADDRNOTAVAIL, -EIDRM));
+ }
+}
+
+TEST(journal_flush) {
+ assert_se(setenv("SYSTEMD_JOURNAL_COMPACT", "0", 1) >= 0);
+ test_journal_flush_one(saved_argc, saved_argv);
+}
+
+TEST(journal_flush_compact) {
+ assert_se(setenv("SYSTEMD_JOURNAL_COMPACT", "1", 1) >= 0);
+ test_journal_flush_one(saved_argc, saved_argv);
+}
+
+DEFINE_TEST_MAIN(LOG_INFO);
diff --git a/src/libsystemd/sd-journal/test-journal-init.c b/src/libsystemd/sd-journal/test-journal-init.c
new file mode 100644
index 0000000..c8a1977
--- /dev/null
+++ b/src/libsystemd/sd-journal/test-journal-init.c
@@ -0,0 +1,68 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <unistd.h>
+
+#include "sd-journal.h"
+
+#include "chattr-util.h"
+#include "journal-internal.h"
+#include "log.h"
+#include "parse-util.h"
+#include "process-util.h"
+#include "rm-rf.h"
+#include "tests.h"
+
+int main(int argc, char *argv[]) {
+ sd_journal *j;
+ int r, i, I = 100;
+ char t[] = "/var/tmp/journal-stream-XXXXXX";
+
+ test_setup_logging(LOG_DEBUG);
+
+ if (argc >= 2) {
+ r = safe_atoi(argv[1], &I);
+ if (r < 0)
+ log_info("Could not parse loop count argument. Using default.");
+ }
+
+ log_info("Running %d loops", I);
+
+ assert_se(mkdtemp(t));
+ (void) chattr_path(t, FS_NOCOW_FL, FS_NOCOW_FL, NULL);
+
+ for (i = 0; i < I; i++) {
+ r = sd_journal_open(&j, SD_JOURNAL_LOCAL_ONLY);
+ assert_se(r == 0);
+
+ sd_journal_close(j);
+
+ r = sd_journal_open_directory(&j, t, 0);
+ assert_se(r == 0);
+
+ assert_se(sd_journal_seek_head(j) == 0);
+ assert_se(j->current_location.type == LOCATION_HEAD);
+
+ r = safe_fork("(journal-fork-test)", FORK_WAIT|FORK_LOG, NULL);
+ if (r == 0) {
+ assert_se(j);
+ assert_se(sd_journal_get_realtime_usec(j, NULL) == -ECHILD);
+ assert_se(sd_journal_seek_tail(j) == -ECHILD);
+ assert_se(j->current_location.type == LOCATION_HEAD);
+ sd_journal_close(j);
+ _exit(EXIT_SUCCESS);
+ }
+
+ assert_se(r >= 0);
+
+ sd_journal_close(j);
+
+ j = NULL;
+ r = sd_journal_open_directory(&j, t, SD_JOURNAL_LOCAL_ONLY);
+ assert_se(r == -EINVAL);
+ assert_se(j == NULL);
+ }
+
+ assert_se(rm_rf(t, REMOVE_ROOT|REMOVE_PHYSICAL) >= 0);
+
+ return 0;
+}
diff --git a/src/libsystemd/sd-journal/test-journal-interleaving.c b/src/libsystemd/sd-journal/test-journal-interleaving.c
new file mode 100644
index 0000000..8aeef8f
--- /dev/null
+++ b/src/libsystemd/sd-journal/test-journal-interleaving.c
@@ -0,0 +1,737 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <fcntl.h>
+#include <unistd.h>
+
+#include "sd-id128.h"
+#include "sd-journal.h"
+
+#include "alloc-util.h"
+#include "chattr-util.h"
+#include "iovec-util.h"
+#include "journal-file-util.h"
+#include "journal-vacuum.h"
+#include "log.h"
+#include "logs-show.h"
+#include "parse-util.h"
+#include "random-util.h"
+#include "rm-rf.h"
+#include "tests.h"
+
+/* This program tests skipping around in a multi-file journal. */
+
+static bool arg_keep = false;
+static dual_timestamp previous_ts = {};
+
+_noreturn_ static void log_assert_errno(const char *text, int error, const char *file, unsigned line, const char *func) {
+ log_internal(LOG_CRIT, error, file, line, func,
+ "'%s' failed at %s:%u (%s): %m", text, file, line, func);
+ abort();
+}
+
+#define assert_ret(expr) \
+ do { \
+ int _r_ = (expr); \
+ if (_unlikely_(_r_ < 0)) \
+ log_assert_errno(#expr, -_r_, PROJECT_FILE, __LINE__, __func__); \
+ } while (false)
+
+static JournalFile *test_open_internal(const char *name, JournalFileFlags flags) {
+ _cleanup_(mmap_cache_unrefp) MMapCache *m = NULL;
+ JournalFile *f;
+
+ m = mmap_cache_new();
+ assert_se(m != NULL);
+
+ assert_ret(journal_file_open(-1, name, O_RDWR|O_CREAT, flags, 0644, UINT64_MAX, NULL, m, NULL, &f));
+ return f;
+}
+
+static JournalFile *test_open(const char *name) {
+ return test_open_internal(name, JOURNAL_COMPRESS);
+}
+
+static JournalFile *test_open_strict(const char *name) {
+ return test_open_internal(name, JOURNAL_COMPRESS | JOURNAL_STRICT_ORDER);
+}
+
+static void test_close(JournalFile *f) {
+ (void) journal_file_offline_close(f);
+}
+
+static void test_done(const char *t) {
+ log_info("Done...");
+
+ if (arg_keep)
+ log_info("Not removing %s", t);
+ else {
+ journal_directory_vacuum(".", 3000000, 0, 0, NULL, true);
+
+ assert_se(rm_rf(t, REMOVE_ROOT|REMOVE_PHYSICAL) >= 0);
+ }
+
+ log_info("------------------------------------------------------------");
+}
+
+static void append_number(JournalFile *f, int n, const sd_id128_t *boot_id, uint64_t *seqnum, uint64_t *ret_offset) {
+ _cleanup_free_ char *p = NULL, *q = NULL;
+ dual_timestamp ts;
+ struct iovec iovec[2];
+ size_t n_iov = 0;
+
+ dual_timestamp_now(&ts);
+
+ if (ts.monotonic <= previous_ts.monotonic)
+ ts.monotonic = previous_ts.monotonic + 1;
+
+ if (ts.realtime <= previous_ts.realtime)
+ ts.realtime = previous_ts.realtime + 1;
+
+ previous_ts = ts;
+
+ assert_se(asprintf(&p, "NUMBER=%d", n) >= 0);
+ iovec[n_iov++] = IOVEC_MAKE_STRING(p);
+
+ if (boot_id) {
+ assert_se(q = strjoin("_BOOT_ID=", SD_ID128_TO_STRING(*boot_id)));
+ iovec[n_iov++] = IOVEC_MAKE_STRING(q);
+ }
+
+ assert_ret(journal_file_append_entry(f, &ts, boot_id, iovec, n_iov, seqnum, NULL, NULL, ret_offset));
+}
+
+static void append_unreferenced_data(JournalFile *f, const sd_id128_t *boot_id) {
+ _cleanup_free_ char *q = NULL;
+ dual_timestamp ts;
+ struct iovec iovec;
+
+ assert(boot_id);
+
+ ts.monotonic = usec_sub_unsigned(previous_ts.monotonic, 10);
+ ts.realtime = usec_sub_unsigned(previous_ts.realtime, 10);
+
+ assert_se(q = strjoin("_BOOT_ID=", SD_ID128_TO_STRING(*boot_id)));
+ iovec = IOVEC_MAKE_STRING(q);
+
+ assert_se(journal_file_append_entry(f, &ts, boot_id, &iovec, 1, NULL, NULL, NULL, NULL) == -EREMCHG);
+}
+
+static void test_check_number(sd_journal *j, int n) {
+ sd_id128_t boot_id;
+ const void *d;
+ _cleanup_free_ char *k = NULL;
+ size_t l;
+ int x;
+
+ assert_se(sd_journal_get_monotonic_usec(j, NULL, &boot_id) >= 0);
+ assert_ret(sd_journal_get_data(j, "NUMBER", &d, &l));
+ assert_se(k = strndup(d, l));
+ printf("%s %s (expected=%i)\n", SD_ID128_TO_STRING(boot_id), k, n);
+
+ assert_se(safe_atoi(k + STRLEN("NUMBER="), &x) >= 0);
+ assert_se(n == x);
+}
+
+static void test_check_numbers_down(sd_journal *j, int count) {
+ int i;
+
+ for (i = 1; i <= count; i++) {
+ int r;
+ test_check_number(j, i);
+ assert_ret(r = sd_journal_next(j));
+ if (i == count)
+ assert_se(r == 0);
+ else
+ assert_se(r == 1);
+ }
+
+}
+
+static void test_check_numbers_up(sd_journal *j, int count) {
+ for (int i = count; i >= 1; i--) {
+ int r;
+ test_check_number(j, i);
+ assert_ret(r = sd_journal_previous(j));
+ if (i == 1)
+ assert_se(r == 0);
+ else
+ assert_se(r == 1);
+ }
+
+}
+
+static void setup_sequential(void) {
+ JournalFile *f1, *f2, *f3;
+ sd_id128_t id;
+
+ f1 = test_open("one.journal");
+ f2 = test_open("two.journal");
+ f3 = test_open("three.journal");
+ assert_se(sd_id128_randomize(&id) >= 0);
+ log_info("boot_id: %s", SD_ID128_TO_STRING(id));
+ append_number(f1, 1, &id, NULL, NULL);
+ append_number(f1, 2, &id, NULL, NULL);
+ append_number(f1, 3, &id, NULL, NULL);
+ append_number(f2, 4, &id, NULL, NULL);
+ assert_se(sd_id128_randomize(&id) >= 0);
+ log_info("boot_id: %s", SD_ID128_TO_STRING(id));
+ append_number(f2, 5, &id, NULL, NULL);
+ append_number(f2, 6, &id, NULL, NULL);
+ append_number(f3, 7, &id, NULL, NULL);
+ append_number(f3, 8, &id, NULL, NULL);
+ assert_se(sd_id128_randomize(&id) >= 0);
+ log_info("boot_id: %s", SD_ID128_TO_STRING(id));
+ append_number(f3, 9, &id, NULL, NULL);
+ test_close(f1);
+ test_close(f2);
+ test_close(f3);
+}
+
+static void setup_interleaved(void) {
+ JournalFile *f1, *f2, *f3;
+ sd_id128_t id;
+
+ f1 = test_open("one.journal");
+ f2 = test_open("two.journal");
+ f3 = test_open("three.journal");
+ assert_se(sd_id128_randomize(&id) >= 0);
+ log_info("boot_id: %s", SD_ID128_TO_STRING(id));
+ append_number(f1, 1, &id, NULL, NULL);
+ append_number(f2, 2, &id, NULL, NULL);
+ append_number(f3, 3, &id, NULL, NULL);
+ append_number(f1, 4, &id, NULL, NULL);
+ append_number(f2, 5, &id, NULL, NULL);
+ append_number(f3, 6, &id, NULL, NULL);
+ append_number(f1, 7, &id, NULL, NULL);
+ append_number(f2, 8, &id, NULL, NULL);
+ append_number(f3, 9, &id, NULL, NULL);
+ test_close(f1);
+ test_close(f2);
+ test_close(f3);
+}
+
+static void setup_unreferenced_data(void) {
+ JournalFile *f1, *f2, *f3;
+ sd_id128_t id;
+
+ /* For issue #29275. */
+
+ f1 = test_open_strict("one.journal");
+ f2 = test_open_strict("two.journal");
+ f3 = test_open_strict("three.journal");
+ assert_se(sd_id128_randomize(&id) >= 0);
+ log_info("boot_id: %s", SD_ID128_TO_STRING(id));
+ append_number(f1, 1, &id, NULL, NULL);
+ append_number(f1, 2, &id, NULL, NULL);
+ append_number(f1, 3, &id, NULL, NULL);
+ assert_se(sd_id128_randomize(&id) >= 0);
+ log_info("boot_id: %s", SD_ID128_TO_STRING(id));
+ append_unreferenced_data(f1, &id);
+ append_number(f2, 4, &id, NULL, NULL);
+ append_number(f2, 5, &id, NULL, NULL);
+ append_number(f2, 6, &id, NULL, NULL);
+ assert_se(sd_id128_randomize(&id) >= 0);
+ log_info("boot_id: %s", SD_ID128_TO_STRING(id));
+ append_unreferenced_data(f2, &id);
+ append_number(f3, 7, &id, NULL, NULL);
+ append_number(f3, 8, &id, NULL, NULL);
+ append_number(f3, 9, &id, NULL, NULL);
+ test_close(f1);
+ test_close(f2);
+ test_close(f3);
+}
+
+static void mkdtemp_chdir_chattr(char *path) {
+ assert_se(mkdtemp(path));
+ assert_se(chdir(path) >= 0);
+
+ /* Speed up things a bit on btrfs, ensuring that CoW is turned off for all files created in our
+ * directory during the test run */
+ (void) chattr_path(path, FS_NOCOW_FL, FS_NOCOW_FL, NULL);
+}
+
+static void test_skip_one(void (*setup)(void)) {
+ char t[] = "/var/tmp/journal-skip-XXXXXX";
+ sd_journal *j;
+ int r;
+
+ mkdtemp_chdir_chattr(t);
+
+ setup();
+
+ /* Seek to head, iterate down. */
+ assert_ret(sd_journal_open_directory(&j, t, 0));
+ assert_ret(sd_journal_seek_head(j));
+ assert_se(sd_journal_next(j) == 1); /* pointing to the first entry */
+ test_check_numbers_down(j, 9);
+ sd_journal_close(j);
+
+ /* Seek to head, iterate down. */
+ assert_ret(sd_journal_open_directory(&j, t, 0));
+ assert_ret(sd_journal_seek_head(j));
+ assert_se(sd_journal_next(j) == 1); /* pointing to the first entry */
+ assert_se(sd_journal_previous(j) == 0); /* no-op */
+ test_check_numbers_down(j, 9);
+ sd_journal_close(j);
+
+ /* Seek to head twice, iterate down. */
+ assert_ret(sd_journal_open_directory(&j, t, 0));
+ assert_ret(sd_journal_seek_head(j));
+ assert_se(sd_journal_next(j) == 1); /* pointing to the first entry */
+ assert_ret(sd_journal_seek_head(j));
+ assert_se(sd_journal_next(j) == 1); /* pointing to the first entry */
+ test_check_numbers_down(j, 9);
+ sd_journal_close(j);
+
+ /* Seek to head, move to previous, then iterate down. */
+ assert_ret(sd_journal_open_directory(&j, t, 0));
+ assert_ret(sd_journal_seek_head(j));
+ assert_se(sd_journal_previous(j) == 0); /* no-op */
+ assert_se(sd_journal_next(j) == 1); /* pointing to the first entry */
+ test_check_numbers_down(j, 9);
+ sd_journal_close(j);
+
+ /* Seek to head, walk several steps, then iterate down. */
+ assert_ret(sd_journal_open_directory(&j, t, 0));
+ assert_ret(sd_journal_seek_head(j));
+ assert_se(sd_journal_previous(j) == 0); /* no-op */
+ assert_se(sd_journal_previous(j) == 0); /* no-op */
+ assert_se(sd_journal_previous(j) == 0); /* no-op */
+ assert_se(sd_journal_next(j) == 1); /* pointing to the first entry */
+ assert_se(sd_journal_previous(j) == 0); /* no-op */
+ assert_se(sd_journal_previous(j) == 0); /* no-op */
+ test_check_numbers_down(j, 9);
+ sd_journal_close(j);
+
+ /* Seek to tail, iterate up. */
+ assert_ret(sd_journal_open_directory(&j, t, 0));
+ assert_ret(sd_journal_seek_tail(j));
+ assert_se(sd_journal_previous(j) == 1); /* pointing to the last entry */
+ test_check_numbers_up(j, 9);
+ sd_journal_close(j);
+
+ /* Seek to tail twice, iterate up. */
+ assert_ret(sd_journal_open_directory(&j, t, 0));
+ assert_ret(sd_journal_seek_tail(j));
+ assert_se(sd_journal_previous(j) == 1); /* pointing to the last entry */
+ assert_ret(sd_journal_seek_tail(j));
+ assert_se(sd_journal_previous(j) == 1); /* pointing to the last entry */
+ test_check_numbers_up(j, 9);
+ sd_journal_close(j);
+
+ /* Seek to tail, move to next, then iterate up. */
+ assert_ret(sd_journal_open_directory(&j, t, 0));
+ assert_ret(sd_journal_seek_tail(j));
+ assert_se(sd_journal_next(j) == 0); /* no-op */
+ assert_se(sd_journal_previous(j) == 1); /* pointing to the last entry */
+ test_check_numbers_up(j, 9);
+ sd_journal_close(j);
+
+ /* Seek to tail, walk several steps, then iterate up. */
+ assert_ret(sd_journal_open_directory(&j, t, 0));
+ assert_ret(sd_journal_seek_tail(j));
+ assert_se(sd_journal_next(j) == 0); /* no-op */
+ assert_se(sd_journal_next(j) == 0); /* no-op */
+ assert_se(sd_journal_next(j) == 0); /* no-op */
+ assert_se(sd_journal_previous(j) == 1); /* pointing to the last entry. */
+ assert_se(sd_journal_next(j) == 0); /* no-op */
+ assert_se(sd_journal_next(j) == 0); /* no-op */
+ test_check_numbers_up(j, 9);
+ sd_journal_close(j);
+
+ /* Seek to tail, skip to head, iterate down. */
+ assert_ret(sd_journal_open_directory(&j, t, 0));
+ assert_ret(sd_journal_seek_tail(j));
+ assert_se(sd_journal_previous_skip(j, 9) == 9); /* pointing to the first entry. */
+ test_check_numbers_down(j, 9);
+ sd_journal_close(j);
+
+ /* Seek to tail, skip to head in a more complex way, then iterate down. */
+ assert_ret(sd_journal_open_directory(&j, t, 0));
+ assert_ret(sd_journal_seek_tail(j));
+ assert_se(sd_journal_next(j) == 0);
+ assert_se(sd_journal_previous_skip(j, 4) == 4);
+ assert_se(sd_journal_previous_skip(j, 5) == 5);
+ assert_se(sd_journal_previous(j) == 0);
+ assert_se(sd_journal_previous_skip(j, 5) == 0);
+ assert_se(sd_journal_next(j) == 1);
+ assert_se(sd_journal_previous_skip(j, 5) == 1);
+ assert_se(sd_journal_next(j) == 1);
+ assert_se(sd_journal_next(j) == 1);
+ assert_se(sd_journal_previous(j) == 1);
+ assert_se(sd_journal_next(j) == 1);
+ assert_se(sd_journal_next(j) == 1);
+ assert_se(sd_journal_previous_skip(j, 5) == 3);
+ test_check_numbers_down(j, 9);
+ sd_journal_close(j);
+
+ /* Seek to head, skip to tail, iterate up. */
+ assert_ret(sd_journal_open_directory(&j, t, 0));
+ assert_ret(sd_journal_seek_head(j));
+ assert_se(sd_journal_next_skip(j, 9) == 9);
+ test_check_numbers_up(j, 9);
+ sd_journal_close(j);
+
+ /* Seek to head, skip to tail in a more complex way, then iterate up. */
+ assert_ret(sd_journal_open_directory(&j, t, 0));
+ assert_ret(sd_journal_seek_head(j));
+ assert_se(sd_journal_previous(j) == 0);
+ assert_se(sd_journal_next_skip(j, 4) == 4);
+ assert_se(sd_journal_next_skip(j, 5) == 5);
+ assert_se(sd_journal_next(j) == 0);
+ assert_se(sd_journal_next_skip(j, 5) == 0);
+ assert_se(sd_journal_previous(j) == 1);
+ assert_se(sd_journal_next_skip(j, 5) == 1);
+ assert_se(sd_journal_previous(j) == 1);
+ assert_se(sd_journal_previous(j) == 1);
+ assert_se(sd_journal_next(j) == 1);
+ assert_se(sd_journal_previous(j) == 1);
+ assert_se(sd_journal_previous(j) == 1);
+ assert_se(r = sd_journal_next_skip(j, 5) == 3);
+ test_check_numbers_up(j, 9);
+ sd_journal_close(j);
+
+ test_done(t);
+}
+
+TEST(skip) {
+ test_skip_one(setup_sequential);
+ test_skip_one(setup_interleaved);
+}
+
+static void test_boot_id_one(void (*setup)(void), size_t n_boots_expected) {
+ char t[] = "/var/tmp/journal-boot-id-XXXXXX";
+ sd_journal *j;
+ _cleanup_free_ BootId *boots = NULL;
+ size_t n_boots;
+
+ mkdtemp_chdir_chattr(t);
+
+ setup();
+
+ assert_ret(sd_journal_open_directory(&j, t, 0));
+ assert_se(journal_get_boots(j, &boots, &n_boots) >= 0);
+ assert_se(boots);
+ assert_se(n_boots == n_boots_expected);
+ sd_journal_close(j);
+
+ FOREACH_ARRAY(b, boots, n_boots) {
+ assert_ret(sd_journal_open_directory(&j, t, 0));
+ assert_se(journal_find_boot_by_id(j, b->id) == 1);
+ sd_journal_close(j);
+ }
+
+ for (int i = - (int) n_boots + 1; i <= (int) n_boots; i++) {
+ sd_id128_t id;
+
+ assert_ret(sd_journal_open_directory(&j, t, 0));
+ assert_se(journal_find_boot_by_offset(j, i, &id) == 1);
+ if (i <= 0)
+ assert_se(sd_id128_equal(id, boots[n_boots + i - 1].id));
+ else
+ assert_se(sd_id128_equal(id, boots[i - 1].id));
+ sd_journal_close(j);
+ }
+
+ test_done(t);
+}
+
+TEST(boot_id) {
+ test_boot_id_one(setup_sequential, 3);
+ test_boot_id_one(setup_unreferenced_data, 3);
+}
+
+static void test_sequence_numbers_one(void) {
+ _cleanup_(mmap_cache_unrefp) MMapCache *m = NULL;
+ char t[] = "/var/tmp/journal-seq-XXXXXX";
+ JournalFile *one, *two;
+ uint64_t seqnum = 0;
+ sd_id128_t seqnum_id;
+
+ m = mmap_cache_new();
+ assert_se(m != NULL);
+
+ mkdtemp_chdir_chattr(t);
+
+ assert_se(journal_file_open(-1, "one.journal", O_RDWR|O_CREAT, JOURNAL_COMPRESS, 0644,
+ UINT64_MAX, NULL, m, NULL, &one) == 0);
+
+ append_number(one, 1, NULL, &seqnum, NULL);
+ printf("seqnum=%"PRIu64"\n", seqnum);
+ assert_se(seqnum == 1);
+ append_number(one, 2, NULL, &seqnum, NULL);
+ printf("seqnum=%"PRIu64"\n", seqnum);
+ assert_se(seqnum == 2);
+
+ assert_se(one->header->state == STATE_ONLINE);
+ assert_se(!sd_id128_equal(one->header->file_id, one->header->machine_id));
+ assert_se(!sd_id128_equal(one->header->file_id, one->header->tail_entry_boot_id));
+ assert_se(sd_id128_equal(one->header->file_id, one->header->seqnum_id));
+
+ memcpy(&seqnum_id, &one->header->seqnum_id, sizeof(sd_id128_t));
+
+ assert_se(journal_file_open(-1, "two.journal", O_RDWR|O_CREAT, JOURNAL_COMPRESS, 0644,
+ UINT64_MAX, NULL, m, one, &two) == 0);
+
+ assert_se(two->header->state == STATE_ONLINE);
+ assert_se(!sd_id128_equal(two->header->file_id, one->header->file_id));
+ assert_se(sd_id128_equal(two->header->machine_id, one->header->machine_id));
+ assert_se(sd_id128_is_null(two->header->tail_entry_boot_id)); /* Not written yet. */
+ assert_se(sd_id128_equal(two->header->seqnum_id, one->header->seqnum_id));
+
+ append_number(two, 3, NULL, &seqnum, NULL);
+ printf("seqnum=%"PRIu64"\n", seqnum);
+ assert_se(seqnum == 3);
+ append_number(two, 4, NULL, &seqnum, NULL);
+ printf("seqnum=%"PRIu64"\n", seqnum);
+ assert_se(seqnum == 4);
+
+ /* Verify tail_entry_boot_id. */
+ assert_se(sd_id128_equal(two->header->tail_entry_boot_id, one->header->tail_entry_boot_id));
+
+ test_close(two);
+
+ append_number(one, 5, NULL, &seqnum, NULL);
+ printf("seqnum=%"PRIu64"\n", seqnum);
+ assert_se(seqnum == 5);
+
+ append_number(one, 6, NULL, &seqnum, NULL);
+ printf("seqnum=%"PRIu64"\n", seqnum);
+ assert_se(seqnum == 6);
+
+ test_close(one);
+
+ /* If the machine-id is not initialized, the header file verification
+ * (which happens when re-opening a journal file) will fail. */
+ if (sd_id128_get_machine(NULL) >= 0) {
+ /* restart server */
+ seqnum = 0;
+
+ assert_se(journal_file_open(-1, "two.journal", O_RDWR, JOURNAL_COMPRESS, 0,
+ UINT64_MAX, NULL, m, NULL, &two) == 0);
+
+ assert_se(sd_id128_equal(two->header->seqnum_id, seqnum_id));
+
+ append_number(two, 7, NULL, &seqnum, NULL);
+ printf("seqnum=%"PRIu64"\n", seqnum);
+ assert_se(seqnum == 5);
+
+ /* So..., here we have the same seqnum in two files with the
+ * same seqnum_id. */
+
+ test_close(two);
+ }
+
+ test_done(t);
+}
+
+TEST(sequence_numbers) {
+ assert_se(setenv("SYSTEMD_JOURNAL_COMPACT", "0", 1) >= 0);
+ test_sequence_numbers_one();
+
+ assert_se(setenv("SYSTEMD_JOURNAL_COMPACT", "1", 1) >= 0);
+ test_sequence_numbers_one();
+}
+
+static int expected_result(uint64_t needle, const uint64_t *candidates, const uint64_t *offset, size_t n, direction_t direction, uint64_t *ret) {
+ switch (direction) {
+ case DIRECTION_DOWN:
+ for (size_t i = 0; i < n; i++) {
+ if (candidates[i] == 0) {
+ *ret = 0;
+ return 0;
+ }
+ if (needle <= candidates[i]) {
+ *ret = offset[i];
+ return 1;
+ }
+ }
+ *ret = 0;
+ return 0;
+
+ case DIRECTION_UP:
+ for (size_t i = 0; i < n; i++)
+ if (needle < candidates[i] || candidates[i] == 0) {
+ if (i == 0) {
+ *ret = 0;
+ return 0;
+ }
+ *ret = offset[i - 1];
+ return 1;
+ }
+ *ret = offset[n - 1];
+ return 1;
+
+ default:
+ assert_not_reached();
+ }
+}
+
+static void verify(JournalFile *f, const uint64_t *seqnum, const uint64_t *offset, size_t n) {
+ uint64_t p, q;
+ int r, e;
+
+ /* by seqnum (sequential) */
+ for (uint64_t i = 0; i < n + 2; i++) {
+ p = 0;
+ r = journal_file_move_to_entry_by_seqnum(f, i, DIRECTION_DOWN, NULL, &p);
+ e = expected_result(i, seqnum, offset, n, DIRECTION_DOWN, &q);
+ assert_se(r == e);
+ assert_se(p == q);
+
+ p = 0;
+ r = journal_file_move_to_entry_by_seqnum(f, i, DIRECTION_UP, NULL, &p);
+ e = expected_result(i, seqnum, offset, n, DIRECTION_UP, &q);
+ assert_se(r == e);
+ assert_se(p == q);
+ }
+
+ /* by seqnum (random) */
+ for (size_t trial = 0; trial < 3 * n; trial++) {
+ uint64_t i = random_u64_range(n + 2);
+
+ p = 0;
+ r = journal_file_move_to_entry_by_seqnum(f, i, DIRECTION_DOWN, NULL, &p);
+ e = expected_result(i, seqnum, offset, n, DIRECTION_DOWN, &q);
+ assert_se(r == e);
+ assert_se(p == q);
+ }
+ for (size_t trial = 0; trial < 3 * n; trial++) {
+ uint64_t i = random_u64_range(n + 2);
+
+ p = 0;
+ r = journal_file_move_to_entry_by_seqnum(f, i, DIRECTION_UP, NULL, &p);
+ e = expected_result(i, seqnum, offset, n, DIRECTION_UP, &q);
+ assert_se(r == e);
+ assert_se(p == q);
+ }
+
+ /* by offset (sequential) */
+ for (size_t i = 0; i < n; i++) {
+ p = 0;
+ r = journal_file_move_to_entry_by_offset(f, offset[i] - 1, DIRECTION_DOWN, NULL, &p);
+ e = expected_result(offset[i] - 1, offset, offset, n, DIRECTION_DOWN, &q);
+ assert_se(r == e);
+ assert_se(p == q);
+
+ p = 0;
+ r = journal_file_move_to_entry_by_offset(f, offset[i], DIRECTION_DOWN, NULL, &p);
+ e = expected_result(offset[i], offset, offset, n, DIRECTION_DOWN, &q);
+ assert_se(r == e);
+ assert_se(p == q);
+
+ p = 0;
+ r = journal_file_move_to_entry_by_offset(f, offset[i] + 1, DIRECTION_DOWN, NULL, &p);
+ e = expected_result(offset[i] + 1, offset, offset, n, DIRECTION_DOWN, &q);
+ assert_se(r == e);
+ assert_se(p == q);
+
+ p = 0;
+ r = journal_file_move_to_entry_by_offset(f, offset[i] - 1, DIRECTION_UP, NULL, &p);
+ e = expected_result(offset[i] - 1, offset, offset, n, DIRECTION_UP, &q);
+ assert_se(r == e);
+ assert_se(p == q);
+
+ p = 0;
+ r = journal_file_move_to_entry_by_offset(f, offset[i], DIRECTION_UP, NULL, &p);
+ e = expected_result(offset[i], offset, offset, n, DIRECTION_UP, &q);
+ assert_se(r == e);
+ assert_se(p == q);
+
+ p = 0;
+ r = journal_file_move_to_entry_by_offset(f, offset[i] + 1, DIRECTION_UP, NULL, &p);
+ e = expected_result(offset[i] + 1, offset, offset, n, DIRECTION_UP, &q);
+ assert_se(r == e);
+ assert_se(p == q);
+ }
+
+ /* by offset (random) */
+ for (size_t trial = 0; trial < 3 * n; trial++) {
+ uint64_t i = offset[0] - 1 + random_u64_range(offset[n-1] - offset[0] + 2);
+
+ p = 0;
+ r = journal_file_move_to_entry_by_offset(f, i, DIRECTION_DOWN, NULL, &p);
+ e = expected_result(i, offset, offset, n, DIRECTION_DOWN, &q);
+ assert_se(r == e);
+ assert_se(p == q);
+ }
+ for (size_t trial = 0; trial < 3 * n; trial++) {
+ uint64_t i = offset[0] - 1 + random_u64_range(offset[n-1] - offset[0] + 2);
+
+ p = 0;
+ r = journal_file_move_to_entry_by_offset(f, i, DIRECTION_UP, NULL, &p);
+ e = expected_result(i, offset, offset, n, DIRECTION_UP, &q);
+ assert_se(r == e);
+ assert_se(p == q);
+ }
+}
+
+static void test_generic_array_bisect_one(size_t n, size_t num_corrupted) {
+ _cleanup_(mmap_cache_unrefp) MMapCache *m = NULL;
+ char t[] = "/var/tmp/journal-seq-XXXXXX";
+ _cleanup_free_ uint64_t *seqnum = NULL, *offset = NULL;
+ JournalFile *f;
+
+ log_info("/* %s(%zu, %zu) */", __func__, n, num_corrupted);
+
+ assert_se(m = mmap_cache_new());
+
+ mkdtemp_chdir_chattr(t);
+
+ assert_se(journal_file_open(-1, "test.journal", O_RDWR|O_CREAT, JOURNAL_COMPRESS, 0644,
+ UINT64_MAX, NULL, m, NULL, &f) == 0);
+
+ assert_se(seqnum = new0(uint64_t, n));
+ assert_se(offset = new0(uint64_t, n));
+
+ for (size_t i = 0; i < n; i++) {
+ append_number(f, i, NULL, seqnum + i, offset + i);
+ if (i == 0) {
+ assert_se(seqnum[i] > 0);
+ assert_se(offset[i] > 0);
+ } else {
+ assert_se(seqnum[i] > seqnum[i-1]);
+ assert_se(offset[i] > offset[i-1]);
+ }
+ }
+
+ verify(f, seqnum, offset, n);
+
+ /* Reset chain cache. */
+ assert_se(journal_file_move_to_entry_by_offset(f, offset[0], DIRECTION_DOWN, NULL, NULL) > 0);
+
+ /* make journal corrupted by clearing seqnum. */
+ for (size_t i = n - num_corrupted; i < n; i++) {
+ Object *o;
+
+ assert_se(journal_file_move_to_object(f, OBJECT_ENTRY, offset[i], &o) >= 0);
+ assert_se(o);
+ o->entry.seqnum = 0;
+ seqnum[i] = 0;
+ }
+
+ verify(f, seqnum, offset, n);
+
+ test_close(f);
+ test_done(t);
+}
+
+TEST(generic_array_bisect) {
+ for (size_t n = 1; n < 10; n++)
+ for (size_t m = 1; m <= n; m++)
+ test_generic_array_bisect_one(n, m);
+
+ test_generic_array_bisect_one(100, 40);
+}
+
+static int intro(void) {
+ /* journal_file_open() requires a valid machine id */
+ if (access("/etc/machine-id", F_OK) != 0)
+ return log_tests_skipped("/etc/machine-id not found");
+
+ arg_keep = saved_argc > 1;
+
+ return EXIT_SUCCESS;
+}
+
+DEFINE_TEST_MAIN_WITH_INTRO(LOG_DEBUG, intro);
diff --git a/src/libsystemd/sd-journal/test-journal-match.c b/src/libsystemd/sd-journal/test-journal-match.c
new file mode 100644
index 0000000..571a88c
--- /dev/null
+++ b/src/libsystemd/sd-journal/test-journal-match.c
@@ -0,0 +1,61 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <stdio.h>
+
+#include "sd-journal.h"
+
+#include "alloc-util.h"
+#include "journal-internal.h"
+#include "log.h"
+#include "string-util.h"
+#include "tests.h"
+
+int main(int argc, char *argv[]) {
+ _cleanup_(sd_journal_closep) sd_journal *j = NULL;
+ _cleanup_free_ char *t;
+
+ test_setup_logging(LOG_DEBUG);
+
+ assert_se(sd_journal_open(&j, 0) >= 0);
+
+ assert_se(sd_journal_add_match(j, "foobar", 0) < 0);
+ assert_se(sd_journal_add_match(j, "foobar=waldo", 0) < 0);
+ assert_se(sd_journal_add_match(j, "", 0) < 0);
+ assert_se(sd_journal_add_match(j, "=", 0) < 0);
+ assert_se(sd_journal_add_match(j, "=xxxxx", 0) < 0);
+ assert_se(sd_journal_add_match(j, (uint8_t[4]){'A', '=', '\1', '\2'}, 4) >= 0);
+ assert_se(sd_journal_add_match(j, (uint8_t[5]){'B', '=', 'C', '\0', 'D'}, 5) >= 0);
+ assert_se(sd_journal_add_match(j, "HALLO=WALDO", 0) >= 0);
+ assert_se(sd_journal_add_match(j, "QUUX=mmmm", 0) >= 0);
+ assert_se(sd_journal_add_match(j, "QUUX=xxxxx", 0) >= 0);
+ assert_se(sd_journal_add_match(j, "HALLO=", 0) >= 0);
+ assert_se(sd_journal_add_match(j, "QUUX=xxxxx", 0) >= 0);
+ assert_se(sd_journal_add_match(j, "QUUX=yyyyy", 0) >= 0);
+ assert_se(sd_journal_add_match(j, "PIFF=paff", 0) >= 0);
+
+ assert_se(sd_journal_add_disjunction(j) >= 0);
+
+ assert_se(sd_journal_add_match(j, "ONE=one", 0) >= 0);
+ assert_se(sd_journal_add_match(j, "ONE=two", 0) >= 0);
+ assert_se(sd_journal_add_match(j, "TWO=two", 0) >= 0);
+
+ assert_se(sd_journal_add_conjunction(j) >= 0);
+
+ assert_se(sd_journal_add_match(j, "L4_1=yes", 0) >= 0);
+ assert_se(sd_journal_add_match(j, "L4_1=ok", 0) >= 0);
+ assert_se(sd_journal_add_match(j, "L4_2=yes", 0) >= 0);
+ assert_se(sd_journal_add_match(j, "L4_2=ok", 0) >= 0);
+
+ assert_se(sd_journal_add_disjunction(j) >= 0);
+
+ assert_se(sd_journal_add_match(j, "L3=yes", 0) >= 0);
+ assert_se(sd_journal_add_match(j, "L3=ok", 0) >= 0);
+
+ assert_se(t = journal_make_match_string(j));
+
+ printf("resulting match expression is: %s\n", t);
+
+ assert_se(streq(t, "(((L3=ok OR L3=yes) OR ((L4_2=ok OR L4_2=yes) AND (L4_1=ok OR L4_1=yes))) AND ((TWO=two AND (ONE=two OR ONE=one)) OR (PIFF=paff AND (QUUX=yyyyy OR QUUX=xxxxx OR QUUX=mmmm) AND (HALLO= OR HALLO=WALDO) AND B=C\\000D AND A=\\001\\002)))"));
+
+ return 0;
+}
diff --git a/src/libsystemd/sd-journal/test-journal-send.c b/src/libsystemd/sd-journal/test-journal-send.c
new file mode 100644
index 0000000..ca1fe7c
--- /dev/null
+++ b/src/libsystemd/sd-journal/test-journal-send.c
@@ -0,0 +1,111 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include "sd-journal.h"
+
+#include "fileio.h"
+#include "journal-send.h"
+#include "macro.h"
+#include "memory-util.h"
+#include "tests.h"
+
+TEST(journal_print) {
+ assert_se(sd_journal_print(LOG_INFO, "XXX") == 0);
+ assert_se(sd_journal_print(LOG_INFO, "%s", "YYY") == 0);
+ assert_se(sd_journal_print(LOG_INFO, "X%4094sY", "ZZZ") == 0);
+ assert_se(sd_journal_print(LOG_INFO, "X%*sY", (int) LONG_LINE_MAX - 8 - 3, "ZZZ") == 0);
+ assert_se(sd_journal_print(LOG_INFO, "X%*sY", (int) LONG_LINE_MAX - 8 - 2, "ZZZ") == -ENOBUFS);
+}
+
+TEST(journal_send) {
+ _cleanup_free_ char *huge = NULL;
+
+#define HUGE_SIZE (4096*1024)
+ assert_se(huge = malloc(HUGE_SIZE));
+
+ /* utf-8 and non-utf-8, message-less and message-ful iovecs */
+ struct iovec graph1[] = {
+ {(char*) "GRAPH=graph", STRLEN("GRAPH=graph")}
+ };
+ struct iovec graph2[] = {
+ {(char*) "GRAPH=graph\n", STRLEN("GRAPH=graph\n")}
+ };
+ struct iovec message1[] = {
+ {(char*) "MESSAGE=graph", STRLEN("MESSAGE=graph")}
+ };
+ struct iovec message2[] = {
+ {(char*) "MESSAGE=graph\n", STRLEN("MESSAGE=graph\n")}
+ };
+
+ assert_se(sd_journal_print(LOG_INFO, "piepapo") == 0);
+
+ assert_se(sd_journal_send("MESSAGE=foobar",
+ "VALUE=%i", 7,
+ NULL) == 0);
+
+ errno = ENOENT;
+ assert_se(sd_journal_perror("Foobar") == 0);
+
+ assert_se(sd_journal_perror("") == 0);
+
+ memcpy(huge, "HUGE=", STRLEN("HUGE="));
+ memset(&huge[STRLEN("HUGE=")], 'x', HUGE_SIZE - STRLEN("HUGE=") - 1);
+ huge[HUGE_SIZE - 1] = '\0';
+
+ assert_se(sd_journal_send("MESSAGE=Huge field attached",
+ huge,
+ NULL) == 0);
+
+ assert_se(sd_journal_send("MESSAGE=uiui",
+ "VALUE=A",
+ "VALUE=B",
+ "VALUE=C",
+ "SINGLETON=1",
+ "OTHERVALUE=X",
+ "OTHERVALUE=Y",
+ "WITH_BINARY=this is a binary value \a",
+ NULL) == 0);
+
+ syslog(LOG_NOTICE, "Hello World!");
+
+ assert_se(sd_journal_print(LOG_NOTICE, "Hello World") == 0);
+
+ assert_se(sd_journal_send("MESSAGE=Hello World!",
+ "MESSAGE_ID=52fb62f99e2c49d89cfbf9d6de5e3555",
+ "PRIORITY=5",
+ "HOME=%s", getenv("HOME"),
+ "TERM=%s", getenv("TERM"),
+ "PAGE_SIZE=%li", sysconf(_SC_PAGESIZE),
+ "N_CPUS=%li", sysconf(_SC_NPROCESSORS_ONLN),
+ NULL) == 0);
+
+ assert_se(sd_journal_sendv(graph1, 1) == 0);
+ assert_se(sd_journal_sendv(graph2, 1) == 0);
+ assert_se(sd_journal_sendv(message1, 1) == 0);
+ assert_se(sd_journal_sendv(message2, 1) == 0);
+
+ /* test without location fields */
+#undef sd_journal_sendv
+ assert_se(sd_journal_sendv(graph1, 1) == 0);
+ assert_se(sd_journal_sendv(graph2, 1) == 0);
+ assert_se(sd_journal_sendv(message1, 1) == 0);
+ assert_se(sd_journal_sendv(message2, 1) == 0);
+
+ /* The above syslog() opens a fd which is stored in libc, and the valgrind reports the fd is
+ * leaked when we do not call closelog(). */
+ closelog();
+}
+
+static int outro(void) {
+ /* Sleep a bit to make it easy for journald to collect metadata. */
+ sleep(1);
+
+ close_journal_fd();
+
+ return EXIT_SUCCESS;
+}
+
+DEFINE_TEST_MAIN_FULL(LOG_INFO, NULL, outro);
diff --git a/src/libsystemd/sd-journal/test-journal-stream.c b/src/libsystemd/sd-journal/test-journal-stream.c
new file mode 100644
index 0000000..3a370ef
--- /dev/null
+++ b/src/libsystemd/sd-journal/test-journal-stream.c
@@ -0,0 +1,201 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <fcntl.h>
+#include <unistd.h>
+
+#include "sd-journal.h"
+
+#include "alloc-util.h"
+#include "chattr-util.h"
+#include "iovec-util.h"
+#include "journal-file-util.h"
+#include "journal-internal.h"
+#include "log.h"
+#include "macro.h"
+#include "parse-util.h"
+#include "rm-rf.h"
+#include "tests.h"
+
+#define N_ENTRIES 200
+
+static void verify_contents(sd_journal *j, unsigned skip) {
+ unsigned i;
+
+ assert_se(j);
+
+ i = 0;
+ SD_JOURNAL_FOREACH(j) {
+ const void *d;
+ char *k, *c;
+ size_t l;
+ unsigned u = 0;
+
+ assert_se(sd_journal_get_cursor(j, &k) >= 0);
+ printf("cursor: %s\n", k);
+ free(k);
+
+ assert_se(sd_journal_get_data(j, "MAGIC", &d, &l) >= 0);
+ printf("\t%.*s\n", (int) l, (const char*) d);
+
+ assert_se(sd_journal_get_data(j, "NUMBER", &d, &l) >= 0);
+ assert_se(k = strndup(d, l));
+ printf("\t%s\n", k);
+
+ if (skip > 0) {
+ assert_se(safe_atou(k + 7, &u) >= 0);
+ assert_se(i == u);
+ i += skip;
+ }
+
+ free(k);
+
+ assert_se(sd_journal_get_cursor(j, &c) >= 0);
+ assert_se(sd_journal_test_cursor(j, c) > 0);
+ free(c);
+ }
+
+ if (skip > 0)
+ assert_se(i == N_ENTRIES);
+}
+
+static void run_test(void) {
+ _cleanup_(mmap_cache_unrefp) MMapCache *m = NULL;
+ JournalFile *one, *two, *three;
+ char t[] = "/var/tmp/journal-stream-XXXXXX";
+ unsigned i;
+ _cleanup_(sd_journal_closep) sd_journal *j = NULL;
+ char *z;
+ const void *data;
+ size_t l;
+ dual_timestamp previous_ts = DUAL_TIMESTAMP_NULL;
+
+ m = mmap_cache_new();
+ assert_se(m != NULL);
+
+ assert_se(mkdtemp(t));
+ assert_se(chdir(t) >= 0);
+ (void) chattr_path(t, FS_NOCOW_FL, FS_NOCOW_FL, NULL);
+
+ assert_se(journal_file_open(-1, "one.journal", O_RDWR|O_CREAT, JOURNAL_COMPRESS, 0666, UINT64_MAX, NULL, m, NULL, &one) == 0);
+ assert_se(journal_file_open(-1, "two.journal", O_RDWR|O_CREAT, JOURNAL_COMPRESS, 0666, UINT64_MAX, NULL, m, NULL, &two) == 0);
+ assert_se(journal_file_open(-1, "three.journal", O_RDWR|O_CREAT, JOURNAL_COMPRESS, 0666, UINT64_MAX, NULL, m, NULL, &three) == 0);
+
+ for (i = 0; i < N_ENTRIES; i++) {
+ char *p, *q;
+ dual_timestamp ts;
+ struct iovec iovec[2];
+
+ dual_timestamp_now(&ts);
+
+ if (ts.monotonic <= previous_ts.monotonic)
+ ts.monotonic = previous_ts.monotonic + 1;
+
+ if (ts.realtime <= previous_ts.realtime)
+ ts.realtime = previous_ts.realtime + 1;
+
+ previous_ts = ts;
+
+ assert_se(asprintf(&p, "NUMBER=%u", i) >= 0);
+ iovec[0] = IOVEC_MAKE(p, strlen(p));
+
+ assert_se(asprintf(&q, "MAGIC=%s", i % 5 == 0 ? "quux" : "waldo") >= 0);
+
+ iovec[1] = IOVEC_MAKE(q, strlen(q));
+
+ if (i % 10 == 0)
+ assert_se(journal_file_append_entry(three, &ts, NULL, iovec, 2, NULL, NULL, NULL, NULL) == 0);
+ else {
+ if (i % 3 == 0)
+ assert_se(journal_file_append_entry(two, &ts, NULL, iovec, 2, NULL, NULL, NULL, NULL) == 0);
+
+ assert_se(journal_file_append_entry(one, &ts, NULL, iovec, 2, NULL, NULL, NULL, NULL) == 0);
+ }
+
+ free(p);
+ free(q);
+ }
+
+ (void) journal_file_offline_close(one);
+ (void) journal_file_offline_close(two);
+ (void) journal_file_offline_close(three);
+
+ assert_se(sd_journal_open_directory(&j, t, 0) >= 0);
+
+ assert_se(sd_journal_add_match(j, "MAGIC=quux", 0) >= 0);
+ SD_JOURNAL_FOREACH_BACKWARDS(j) {
+ _cleanup_free_ char *c;
+
+ assert_se(sd_journal_get_data(j, "NUMBER", &data, &l) >= 0);
+ printf("\t%.*s\n", (int) l, (const char*) data);
+
+ assert_se(sd_journal_get_cursor(j, &c) >= 0);
+ assert_se(sd_journal_test_cursor(j, c) > 0);
+ }
+
+ SD_JOURNAL_FOREACH(j) {
+ _cleanup_free_ char *c;
+
+ assert_se(sd_journal_get_data(j, "NUMBER", &data, &l) >= 0);
+ printf("\t%.*s\n", (int) l, (const char*) data);
+
+ assert_se(sd_journal_get_cursor(j, &c) >= 0);
+ assert_se(sd_journal_test_cursor(j, c) > 0);
+ }
+
+ sd_journal_flush_matches(j);
+
+ verify_contents(j, 1);
+
+ printf("NEXT TEST\n");
+ assert_se(sd_journal_add_match(j, "MAGIC=quux", 0) >= 0);
+
+ assert_se(z = journal_make_match_string(j));
+ printf("resulting match expression is: %s\n", z);
+ free(z);
+
+ verify_contents(j, 5);
+
+ printf("NEXT TEST\n");
+ sd_journal_flush_matches(j);
+ assert_se(sd_journal_add_match(j, "MAGIC=waldo", 0) >= 0);
+ assert_se(sd_journal_add_match(j, "NUMBER=10", 0) >= 0);
+ assert_se(sd_journal_add_match(j, "NUMBER=11", 0) >= 0);
+ assert_se(sd_journal_add_match(j, "NUMBER=12", 0) >= 0);
+
+ assert_se(z = journal_make_match_string(j));
+ printf("resulting match expression is: %s\n", z);
+ free(z);
+
+ verify_contents(j, 0);
+
+ assert_se(sd_journal_query_unique(j, "NUMBER") >= 0);
+ SD_JOURNAL_FOREACH_UNIQUE(j, data, l)
+ printf("%.*s\n", (int) l, (const char*) data);
+
+ assert_se(rm_rf(t, REMOVE_ROOT|REMOVE_PHYSICAL) >= 0);
+}
+
+int main(int argc, char *argv[]) {
+
+ /* journal_file_open() requires a valid machine id */
+ if (access("/etc/machine-id", F_OK) != 0)
+ return log_tests_skipped("/etc/machine-id not found");
+
+ test_setup_logging(LOG_DEBUG);
+
+ /* Run this test multiple times with different configurations of features. */
+
+ assert_se(setenv("SYSTEMD_JOURNAL_KEYED_HASH", "0", 1) >= 0);
+ run_test();
+
+ assert_se(setenv("SYSTEMD_JOURNAL_KEYED_HASH", "1", 1) >= 0);
+ run_test();
+
+ assert_se(setenv("SYSTEMD_JOURNAL_COMPACT", "0", 1) >= 0);
+ run_test();
+
+ assert_se(setenv("SYSTEMD_JOURNAL_COMPACT", "1", 1) >= 0);
+ run_test();
+
+ return 0;
+}
diff --git a/src/libsystemd/sd-journal/test-journal-verify.c b/src/libsystemd/sd-journal/test-journal-verify.c
new file mode 100644
index 0000000..edce440
--- /dev/null
+++ b/src/libsystemd/sd-journal/test-journal-verify.c
@@ -0,0 +1,210 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <fcntl.h>
+#include <stdio.h>
+#include <unistd.h>
+
+#include "chattr-util.h"
+#include "fd-util.h"
+#include "iovec-util.h"
+#include "journal-file-util.h"
+#include "journal-verify.h"
+#include "log.h"
+#include "mmap-cache.h"
+#include "rm-rf.h"
+#include "strv.h"
+#include "terminal-util.h"
+#include "tests.h"
+
+#define N_ENTRIES 6000
+#define RANDOM_RANGE 77
+
+static void bit_toggle(const char *fn, uint64_t p) {
+ uint8_t b;
+ ssize_t r;
+ int fd;
+
+ fd = open(fn, O_RDWR|O_CLOEXEC);
+ assert_se(fd >= 0);
+
+ r = pread(fd, &b, 1, p/8);
+ assert_se(r == 1);
+
+ b ^= 1 << (p % 8);
+
+ r = pwrite(fd, &b, 1, p/8);
+ assert_se(r == 1);
+
+ safe_close(fd);
+}
+
+static int raw_verify(const char *fn, const char *verification_key) {
+ _cleanup_(mmap_cache_unrefp) MMapCache *m = NULL;
+ JournalFile *f;
+ int r;
+
+ m = mmap_cache_new();
+ assert_se(m != NULL);
+
+ r = journal_file_open(
+ /* fd= */ -1,
+ fn,
+ O_RDONLY,
+ JOURNAL_COMPRESS|(verification_key ? JOURNAL_SEAL : 0),
+ 0666,
+ /* compress_threshold_bytes= */ UINT64_MAX,
+ /* metrics= */ NULL,
+ m,
+ /* template= */ NULL,
+ &f);
+ if (r < 0)
+ return r;
+
+ r = journal_file_verify(f, verification_key, NULL, NULL, NULL, false);
+ (void) journal_file_close(f);
+
+ return r;
+}
+
+static int run_test(const char *verification_key, ssize_t max_iterations) {
+ _cleanup_(mmap_cache_unrefp) MMapCache *m = NULL;
+ char t[] = "/var/tmp/journal-XXXXXX";
+ struct stat st;
+ JournalFile *f;
+ JournalFile *df;
+ usec_t from = 0, to = 0, total = 0;
+ uint64_t start, end;
+ int r;
+
+ m = mmap_cache_new();
+ assert_se(m != NULL);
+
+ /* journal_file_open() requires a valid machine id */
+ if (sd_id128_get_machine(NULL) < 0)
+ return log_tests_skipped("No valid machine ID found");
+
+ test_setup_logging(LOG_DEBUG);
+
+ assert_se(mkdtemp(t));
+ assert_se(chdir(t) >= 0);
+ (void) chattr_path(t, FS_NOCOW_FL, FS_NOCOW_FL, NULL);
+
+ log_info("Generating a test journal");
+
+ assert_se(journal_file_open(
+ /* fd= */ -1,
+ "test.journal",
+ O_RDWR|O_CREAT,
+ JOURNAL_COMPRESS|(verification_key ? JOURNAL_SEAL : 0),
+ 0666,
+ /* compress_threshold_bytes= */ UINT64_MAX,
+ /* metrics= */ NULL,
+ m,
+ /* template= */ NULL,
+ &df) == 0);
+
+ for (size_t n = 0; n < N_ENTRIES; n++) {
+ _cleanup_free_ char *test = NULL;
+ struct iovec iovec;
+ struct dual_timestamp ts;
+
+ dual_timestamp_now(&ts);
+ assert_se(asprintf(&test, "RANDOM=%li", random() % RANDOM_RANGE));
+ iovec = IOVEC_MAKE_STRING(test);
+ assert_se(journal_file_append_entry(
+ df,
+ &ts,
+ /* boot_id= */ NULL,
+ &iovec,
+ /* n_iovec= */ 1,
+ /* seqnum= */ NULL,
+ /* seqnum_id= */ NULL,
+ /* ret_object= */ NULL,
+ /* ret_offset= */ NULL) == 0);
+ }
+
+ (void) journal_file_offline_close(df);
+
+ log_info("Verifying with key: %s", strna(verification_key));
+
+ assert_se(journal_file_open(
+ /* fd= */ -1,
+ "test.journal",
+ O_RDONLY,
+ JOURNAL_COMPRESS|(verification_key ? JOURNAL_SEAL : 0),
+ 0666,
+ /* compress_threshold_bytes= */ UINT64_MAX,
+ /* metrics= */ NULL,
+ m,
+ /* template= */ NULL,
+ &f) == 0);
+ journal_file_print_header(f);
+ journal_file_dump(f);
+
+ assert_se(journal_file_verify(f, verification_key, &from, &to, &total, true) >= 0);
+
+ if (verification_key && JOURNAL_HEADER_SEALED(f->header))
+ log_info("=> Validated from %s to %s, %s missing",
+ FORMAT_TIMESTAMP(from),
+ FORMAT_TIMESTAMP(to),
+ FORMAT_TIMESPAN(total > to ? total - to : 0, 0));
+
+ (void) journal_file_close(f);
+ assert_se(stat("test.journal", &st) >= 0);
+
+ start = 38448 * 8 + 0;
+ end = max_iterations < 0 ? (uint64_t)st.st_size * 8 : start + max_iterations;
+ log_info("Toggling bits %"PRIu64 " to %"PRIu64, start, end);
+
+ for (uint64_t p = start; p < end; p++) {
+ bit_toggle("test.journal", p);
+
+ if (max_iterations < 0)
+ log_info("[ %"PRIu64"+%"PRIu64"]", p / 8, p % 8);
+
+ r = raw_verify("test.journal", verification_key);
+ /* Suppress the notice when running in the limited (CI) mode */
+ if (verification_key && max_iterations < 0 && r >= 0)
+ log_notice(ANSI_HIGHLIGHT_RED ">>>> %"PRIu64" (bit %"PRIu64") can be toggled without detection." ANSI_NORMAL, p / 8, p % 8);
+
+ bit_toggle("test.journal", p);
+ }
+
+ assert_se(rm_rf(t, REMOVE_ROOT|REMOVE_PHYSICAL) >= 0);
+
+ return 0;
+}
+
+int main(int argc, char *argv[]) {
+ const char *verification_key = NULL;
+ int max_iterations = 512;
+
+ if (argc > 1) {
+ /* Don't limit the number of iterations when the verification key
+ * is provided on the command line, we want to do that only in CIs */
+ verification_key = argv[1];
+ max_iterations = -1;
+ }
+
+ assert_se(setenv("SYSTEMD_JOURNAL_COMPACT", "0", 1) >= 0);
+ run_test(verification_key, max_iterations);
+
+ assert_se(setenv("SYSTEMD_JOURNAL_COMPACT", "1", 1) >= 0);
+ run_test(verification_key, max_iterations);
+
+#if HAVE_GCRYPT
+ /* If we're running without any arguments and we're compiled with gcrypt
+ * check the journal verification stuff with a valid key as well */
+ if (argc <= 1) {
+ verification_key = "c262bd-85187f-0b1b04-877cc5/1c7af8-35a4e900";
+
+ assert_se(setenv("SYSTEMD_JOURNAL_COMPACT", "0", 1) >= 0);
+ run_test(verification_key, max_iterations);
+
+ assert_se(setenv("SYSTEMD_JOURNAL_COMPACT", "1", 1) >= 0);
+ run_test(verification_key, max_iterations);
+ }
+#endif
+
+ return 0;
+}
diff --git a/src/libsystemd/sd-journal/test-journal.c b/src/libsystemd/sd-journal/test-journal.c
new file mode 100644
index 0000000..96f2b67
--- /dev/null
+++ b/src/libsystemd/sd-journal/test-journal.c
@@ -0,0 +1,280 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <fcntl.h>
+#include <unistd.h>
+
+#include "chattr-util.h"
+#include "iovec-util.h"
+#include "journal-authenticate.h"
+#include "journal-file-util.h"
+#include "journal-vacuum.h"
+#include "log.h"
+#include "rm-rf.h"
+#include "tests.h"
+
+static bool arg_keep = false;
+
+static void mkdtemp_chdir_chattr(char *path) {
+ assert_se(mkdtemp(path));
+ assert_se(chdir(path) >= 0);
+
+ /* Speed up things a bit on btrfs, ensuring that CoW is turned off for all files created in our
+ * directory during the test run */
+ (void) chattr_path(path, FS_NOCOW_FL, FS_NOCOW_FL, NULL);
+}
+
+static void test_non_empty_one(void) {
+ _cleanup_(mmap_cache_unrefp) MMapCache *m = NULL;
+ dual_timestamp ts;
+ JournalFile *f;
+ struct iovec iovec;
+ static const char test[] = "TEST1=1", test2[] = "TEST2=2";
+ Object *o, *d;
+ uint64_t p;
+ sd_id128_t fake_boot_id;
+ char t[] = "/var/tmp/journal-XXXXXX";
+
+ m = mmap_cache_new();
+ assert_se(m != NULL);
+
+ mkdtemp_chdir_chattr(t);
+
+ assert_se(journal_file_open(-1, "test.journal", O_RDWR|O_CREAT, JOURNAL_COMPRESS|JOURNAL_SEAL, 0666, UINT64_MAX, NULL, m, NULL, &f) == 0);
+
+ assert_se(dual_timestamp_now(&ts));
+ assert_se(sd_id128_randomize(&fake_boot_id) == 0);
+
+ iovec = IOVEC_MAKE_STRING(test);
+ assert_se(journal_file_append_entry(f, &ts, NULL, &iovec, 1, NULL, NULL, NULL, NULL) == 0);
+
+ iovec = IOVEC_MAKE_STRING(test2);
+ assert_se(journal_file_append_entry(f, &ts, NULL, &iovec, 1, NULL, NULL, NULL, NULL) == 0);
+
+ iovec = IOVEC_MAKE_STRING(test);
+ assert_se(journal_file_append_entry(f, &ts, &fake_boot_id, &iovec, 1, NULL, NULL, NULL, NULL) == 0);
+
+#if HAVE_GCRYPT
+ journal_file_append_tag(f);
+#endif
+ journal_file_dump(f);
+
+ assert_se(journal_file_next_entry(f, 0, DIRECTION_DOWN, &o, &p) == 1);
+ assert_se(le64toh(o->entry.seqnum) == 1);
+
+ assert_se(journal_file_next_entry(f, p, DIRECTION_DOWN, &o, &p) == 1);
+ assert_se(le64toh(o->entry.seqnum) == 2);
+
+ assert_se(journal_file_next_entry(f, p, DIRECTION_DOWN, &o, &p) == 1);
+ assert_se(le64toh(o->entry.seqnum) == 3);
+ assert_se(sd_id128_equal(o->entry.boot_id, fake_boot_id));
+
+ assert_se(journal_file_next_entry(f, p, DIRECTION_DOWN, &o, &p) == 0);
+
+ assert_se(journal_file_next_entry(f, 0, DIRECTION_DOWN, &o, &p) == 1);
+ assert_se(le64toh(o->entry.seqnum) == 1);
+
+ assert_se(journal_file_find_data_object(f, test, strlen(test), &d, NULL) == 1);
+ assert_se(journal_file_move_to_entry_for_data(f, d, DIRECTION_DOWN, &o, NULL) == 1);
+ assert_se(le64toh(o->entry.seqnum) == 1);
+
+ assert_se(journal_file_move_to_entry_for_data(f, d, DIRECTION_UP, &o, NULL) == 1);
+ assert_se(le64toh(o->entry.seqnum) == 3);
+
+ assert_se(journal_file_find_data_object(f, test2, strlen(test2), &d, NULL) == 1);
+ assert_se(journal_file_move_to_entry_for_data(f, d, DIRECTION_UP, &o, NULL) == 1);
+ assert_se(le64toh(o->entry.seqnum) == 2);
+
+ assert_se(journal_file_move_to_entry_for_data(f, d, DIRECTION_DOWN, &o, NULL) == 1);
+ assert_se(le64toh(o->entry.seqnum) == 2);
+
+ assert_se(journal_file_find_data_object(f, "quux", 4, &d, NULL) == 0);
+
+ assert_se(journal_file_move_to_entry_by_seqnum(f, 1, DIRECTION_DOWN, &o, NULL) == 1);
+ assert_se(le64toh(o->entry.seqnum) == 1);
+
+ assert_se(journal_file_move_to_entry_by_seqnum(f, 3, DIRECTION_DOWN, &o, NULL) == 1);
+ assert_se(le64toh(o->entry.seqnum) == 3);
+
+ assert_se(journal_file_move_to_entry_by_seqnum(f, 2, DIRECTION_DOWN, &o, NULL) == 1);
+ assert_se(le64toh(o->entry.seqnum) == 2);
+
+ assert_se(journal_file_move_to_entry_by_seqnum(f, 10, DIRECTION_DOWN, &o, NULL) == 0);
+
+ journal_file_rotate(&f, m, JOURNAL_SEAL|JOURNAL_COMPRESS, UINT64_MAX, NULL);
+ journal_file_rotate(&f, m, JOURNAL_SEAL|JOURNAL_COMPRESS, UINT64_MAX, NULL);
+
+ (void) journal_file_offline_close(f);
+
+ log_info("Done...");
+
+ if (arg_keep)
+ log_info("Not removing %s", t);
+ else {
+ journal_directory_vacuum(".", 3000000, 0, 0, NULL, true);
+
+ assert_se(rm_rf(t, REMOVE_ROOT|REMOVE_PHYSICAL) >= 0);
+ }
+
+ puts("------------------------------------------------------------");
+}
+
+TEST(non_empty) {
+ assert_se(setenv("SYSTEMD_JOURNAL_COMPACT", "0", 1) >= 0);
+ test_non_empty_one();
+
+ assert_se(setenv("SYSTEMD_JOURNAL_COMPACT", "1", 1) >= 0);
+ test_non_empty_one();
+}
+
+static void test_empty_one(void) {
+ _cleanup_(mmap_cache_unrefp) MMapCache *m = NULL;
+ JournalFile *f1, *f2, *f3, *f4;
+ char t[] = "/var/tmp/journal-XXXXXX";
+
+ m = mmap_cache_new();
+ assert_se(m != NULL);
+
+ mkdtemp_chdir_chattr(t);
+
+ assert_se(journal_file_open(-1, "test.journal", O_RDWR|O_CREAT, 0, 0666, UINT64_MAX, NULL, m, NULL, &f1) == 0);
+ assert_se(journal_file_open(-1, "test-compress.journal", O_RDWR|O_CREAT, JOURNAL_COMPRESS, 0666, UINT64_MAX, NULL, m, NULL, &f2) == 0);
+ assert_se(journal_file_open(-1, "test-seal.journal", O_RDWR|O_CREAT, JOURNAL_SEAL, 0666, UINT64_MAX, NULL, m, NULL, &f3) == 0);
+ assert_se(journal_file_open(-1, "test-seal-compress.journal", O_RDWR|O_CREAT, JOURNAL_COMPRESS|JOURNAL_SEAL, 0666, UINT64_MAX, NULL, m, NULL, &f4) == 0);
+
+ journal_file_print_header(f1);
+ puts("");
+ journal_file_print_header(f2);
+ puts("");
+ journal_file_print_header(f3);
+ puts("");
+ journal_file_print_header(f4);
+ puts("");
+
+ log_info("Done...");
+
+ if (arg_keep)
+ log_info("Not removing %s", t);
+ else {
+ journal_directory_vacuum(".", 3000000, 0, 0, NULL, true);
+
+ assert_se(rm_rf(t, REMOVE_ROOT|REMOVE_PHYSICAL) >= 0);
+ }
+
+ (void) journal_file_offline_close(f1);
+ (void) journal_file_offline_close(f2);
+ (void) journal_file_offline_close(f3);
+ (void) journal_file_offline_close(f4);
+}
+
+TEST(empty) {
+ assert_se(setenv("SYSTEMD_JOURNAL_COMPACT", "0", 1) >= 0);
+ test_empty_one();
+
+ assert_se(setenv("SYSTEMD_JOURNAL_COMPACT", "1", 1) >= 0);
+ test_empty_one();
+}
+
+#if HAVE_COMPRESSION
+static bool check_compressed(uint64_t compress_threshold, uint64_t data_size) {
+ _cleanup_(mmap_cache_unrefp) MMapCache *m = NULL;
+ dual_timestamp ts;
+ JournalFile *f;
+ struct iovec iovec;
+ Object *o;
+ uint64_t p;
+ char t[] = "/var/tmp/journal-XXXXXX";
+ char data[2048] = "FIELD=";
+ bool is_compressed;
+ int r;
+
+ assert_se(data_size <= sizeof(data));
+
+ m = mmap_cache_new();
+ assert_se(m != NULL);
+
+ mkdtemp_chdir_chattr(t);
+
+ assert_se(journal_file_open(-1, "test.journal", O_RDWR|O_CREAT, JOURNAL_COMPRESS|JOURNAL_SEAL, 0666, compress_threshold, NULL, m, NULL, &f) == 0);
+
+ dual_timestamp_now(&ts);
+
+ iovec = IOVEC_MAKE(data, data_size);
+ assert_se(journal_file_append_entry(f, &ts, NULL, &iovec, 1, NULL, NULL, NULL, NULL) == 0);
+
+#if HAVE_GCRYPT
+ journal_file_append_tag(f);
+#endif
+ journal_file_dump(f);
+
+ /* We have to partially reimplement some of the dump logic, because the normal next_entry does the
+ * decompression for us. */
+ p = le64toh(f->header->header_size);
+ for (;;) {
+ r = journal_file_move_to_object(f, OBJECT_UNUSED, p, &o);
+ assert_se(r == 0);
+ if (o->object.type == OBJECT_DATA)
+ break;
+
+ assert_se(p < le64toh(f->header->tail_object_offset));
+ p = p + ALIGN64(le64toh(o->object.size));
+ }
+
+ is_compressed = COMPRESSION_FROM_OBJECT(o) != COMPRESSION_NONE;
+
+ (void) journal_file_offline_close(f);
+
+ log_info("Done...");
+
+ if (arg_keep)
+ log_info("Not removing %s", t);
+ else {
+ journal_directory_vacuum(".", 3000000, 0, 0, NULL, true);
+
+ assert_se(rm_rf(t, REMOVE_ROOT|REMOVE_PHYSICAL) >= 0);
+ }
+
+ puts("------------------------------------------------------------");
+
+ return is_compressed;
+}
+
+static void test_min_compress_size_one(void) {
+ /* Note that XZ will actually fail to compress anything under 80 bytes, so you have to choose the limits
+ * carefully */
+
+ /* DEFAULT_MIN_COMPRESS_SIZE is 512 */
+ assert_se(!check_compressed(UINT64_MAX, 255));
+ assert_se(check_compressed(UINT64_MAX, 513));
+
+ /* compress everything */
+ assert_se(check_compressed(0, 96));
+ assert_se(check_compressed(8, 96));
+
+ /* Ensure we don't try to compress less than 8 bytes */
+ assert_se(!check_compressed(0, 7));
+
+ /* check boundary conditions */
+ assert_se(check_compressed(256, 256));
+ assert_se(!check_compressed(256, 255));
+}
+
+TEST(min_compress_size) {
+ assert_se(setenv("SYSTEMD_JOURNAL_COMPACT", "0", 1) >= 0);
+ test_min_compress_size_one();
+
+ assert_se(setenv("SYSTEMD_JOURNAL_COMPACT", "1", 1) >= 0);
+ test_min_compress_size_one();
+}
+#endif
+
+static int intro(void) {
+ arg_keep = saved_argc > 1;
+
+ /* journal_file_open() requires a valid machine id */
+ if (access("/etc/machine-id", F_OK) != 0)
+ return log_tests_skipped("/etc/machine-id not found");
+
+ return EXIT_SUCCESS;
+}
+
+DEFINE_TEST_MAIN_WITH_INTRO(LOG_DEBUG, intro);
diff --git a/src/libsystemd/sd-journal/test-mmap-cache.c b/src/libsystemd/sd-journal/test-mmap-cache.c
new file mode 100644
index 0000000..ce5ea12
--- /dev/null
+++ b/src/libsystemd/sd-journal/test-mmap-cache.c
@@ -0,0 +1,68 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <fcntl.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+#include <unistd.h>
+
+#include "fd-util.h"
+#include "macro.h"
+#include "mmap-cache.h"
+#include "tests.h"
+#include "tmpfile-util.h"
+
+int main(int argc, char *argv[]) {
+ MMapFileDescriptor *fx;
+ int x, y, z, r;
+ char px[] = "/tmp/testmmapXXXXXXX", py[] = "/tmp/testmmapYXXXXXX", pz[] = "/tmp/testmmapZXXXXXX";
+ MMapCache *m;
+ void *p, *q;
+
+ test_setup_logging(LOG_DEBUG);
+
+ assert_se(m = mmap_cache_new());
+
+ x = mkostemp_safe(px);
+ assert_se(x >= 0);
+ (void) unlink(px);
+
+ assert_se(mmap_cache_add_fd(m, x, PROT_READ, &fx) > 0);
+
+ y = mkostemp_safe(py);
+ assert_se(y >= 0);
+ (void) unlink(py);
+
+ z = mkostemp_safe(pz);
+ assert_se(z >= 0);
+ (void) unlink(pz);
+
+ r = mmap_cache_fd_get(fx, 0, false, 1, 2, NULL, &p);
+ assert_se(r >= 0);
+
+ r = mmap_cache_fd_get(fx, 0, false, 2, 2, NULL, &q);
+ assert_se(r >= 0);
+
+ assert_se((uint8_t*) p + 1 == (uint8_t*) q);
+
+ r = mmap_cache_fd_get(fx, 1, false, 3, 2, NULL, &q);
+ assert_se(r >= 0);
+
+ assert_se((uint8_t*) p + 2 == (uint8_t*) q);
+
+ r = mmap_cache_fd_get(fx, 0, false, 16ULL*1024ULL*1024ULL, 2, NULL, &p);
+ assert_se(r >= 0);
+
+ r = mmap_cache_fd_get(fx, 1, false, 16ULL*1024ULL*1024ULL+1, 2, NULL, &q);
+ assert_se(r >= 0);
+
+ assert_se((uint8_t*) p + 1 == (uint8_t*) q);
+
+ mmap_cache_fd_free(fx);
+ mmap_cache_unref(m);
+
+ safe_close(x);
+ safe_close(y);
+ safe_close(z);
+
+ return 0;
+}
diff --git a/src/libsystemd/sd-login/sd-login.c b/src/libsystemd/sd-login/sd-login.c
new file mode 100644
index 0000000..f9e86c6
--- /dev/null
+++ b/src/libsystemd/sd-login/sd-login.c
@@ -0,0 +1,1323 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <poll.h>
+#include <sys/inotify.h>
+#include <unistd.h>
+
+#include "sd-login.h"
+
+#include "alloc-util.h"
+#include "cgroup-util.h"
+#include "dirent-util.h"
+#include "env-file.h"
+#include "escape.h"
+#include "extract-word.h"
+#include "fd-util.h"
+#include "format-util.h"
+#include "fs-util.h"
+#include "hostname-util.h"
+#include "io-util.h"
+#include "login-util.h"
+#include "macro.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "process-util.h"
+#include "socket-util.h"
+#include "stdio-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "user-util.h"
+
+/* Error codes:
+ *
+ * invalid input parameters → -EINVAL
+ * invalid fd → -EBADF
+ * process does not exist → -ESRCH
+ * cgroup does not exist → -ENOENT
+ * machine, session does not exist → -ENXIO
+ * requested metadata on object is missing → -ENODATA
+ */
+
+_public_ int sd_pid_get_session(pid_t pid, char **session) {
+ int r;
+
+ assert_return(pid >= 0, -EINVAL);
+ assert_return(session, -EINVAL);
+
+ r = cg_pid_get_session(pid, session);
+ return IN_SET(r, -ENXIO, -ENOMEDIUM) ? -ENODATA : r;
+}
+
+_public_ int sd_pid_get_unit(pid_t pid, char **unit) {
+ int r;
+
+ assert_return(pid >= 0, -EINVAL);
+ assert_return(unit, -EINVAL);
+
+ r = cg_pid_get_unit(pid, unit);
+ return IN_SET(r, -ENXIO, -ENOMEDIUM) ? -ENODATA : r;
+}
+
+_public_ int sd_pid_get_user_unit(pid_t pid, char **unit) {
+ int r;
+
+ assert_return(pid >= 0, -EINVAL);
+ assert_return(unit, -EINVAL);
+
+ r = cg_pid_get_user_unit(pid, unit);
+ return IN_SET(r, -ENXIO, -ENOMEDIUM) ? -ENODATA : r;
+}
+
+_public_ int sd_pid_get_machine_name(pid_t pid, char **name) {
+ int r;
+
+ assert_return(pid >= 0, -EINVAL);
+ assert_return(name, -EINVAL);
+
+ r = cg_pid_get_machine_name(pid, name);
+ return IN_SET(r, -ENXIO, -ENOMEDIUM) ? -ENODATA : r;
+}
+
+_public_ int sd_pid_get_slice(pid_t pid, char **slice) {
+ int r;
+
+ assert_return(pid >= 0, -EINVAL);
+ assert_return(slice, -EINVAL);
+
+ r = cg_pid_get_slice(pid, slice);
+ return IN_SET(r, -ENXIO, -ENOMEDIUM) ? -ENODATA : r;
+}
+
+_public_ int sd_pid_get_user_slice(pid_t pid, char **slice) {
+ int r;
+
+ assert_return(pid >= 0, -EINVAL);
+ assert_return(slice, -EINVAL);
+
+ r = cg_pid_get_user_slice(pid, slice);
+ return IN_SET(r, -ENXIO, -ENOMEDIUM) ? -ENODATA : r;
+}
+
+_public_ int sd_pid_get_owner_uid(pid_t pid, uid_t *uid) {
+ int r;
+
+ assert_return(pid >= 0, -EINVAL);
+ assert_return(uid, -EINVAL);
+
+ r = cg_pid_get_owner_uid(pid, uid);
+ return IN_SET(r, -ENXIO, -ENOMEDIUM) ? -ENODATA : r;
+}
+
+_public_ int sd_pid_get_cgroup(pid_t pid, char **cgroup) {
+ char *c;
+ int r;
+
+ assert_return(pid >= 0, -EINVAL);
+ assert_return(cgroup, -EINVAL);
+
+ r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &c);
+ if (r < 0)
+ return r;
+
+ /* The internal APIs return the empty string for the root
+ * cgroup, let's return the "/" in the public APIs instead, as
+ * that's easier and less ambiguous for people to grok. */
+ if (isempty(c)) {
+ r = free_and_strdup(&c, "/");
+ if (r < 0)
+ return r;
+
+ }
+
+ *cgroup = c;
+ return 0;
+}
+
+_public_ int sd_pidfd_get_session(int pidfd, char **ret_session) {
+ _cleanup_free_ char *session = NULL;
+ pid_t pid;
+ int r;
+
+ assert_return(pidfd >= 0, -EBADF);
+ assert_return(ret_session, -EINVAL);
+
+ r = pidfd_get_pid(pidfd, &pid);
+ if (r < 0)
+ return r;
+
+ r = sd_pid_get_session(pid, &session);
+ if (r < 0)
+ return r;
+
+ r = pidfd_verify_pid(pidfd, pid);
+ if (r < 0)
+ return r;
+
+ *ret_session = TAKE_PTR(session);
+
+ return 0;
+}
+
+_public_ int sd_pidfd_get_unit(int pidfd, char **ret_unit) {
+ _cleanup_free_ char *unit = NULL;
+ pid_t pid;
+ int r;
+
+ assert_return(pidfd >= 0, -EBADF);
+ assert_return(ret_unit, -EINVAL);
+
+ r = pidfd_get_pid(pidfd, &pid);
+ if (r < 0)
+ return r;
+
+ r = sd_pid_get_unit(pid, &unit);
+ if (r < 0)
+ return r;
+
+ r = pidfd_verify_pid(pidfd, pid);
+ if (r < 0)
+ return r;
+
+ *ret_unit = TAKE_PTR(unit);
+
+ return 0;
+}
+
+_public_ int sd_pidfd_get_user_unit(int pidfd, char **ret_unit) {
+ _cleanup_free_ char *unit = NULL;
+ pid_t pid;
+ int r;
+
+ assert_return(pidfd >= 0, -EBADF);
+ assert_return(ret_unit, -EINVAL);
+
+ r = pidfd_get_pid(pidfd, &pid);
+ if (r < 0)
+ return r;
+
+ r = sd_pid_get_user_unit(pid, &unit);
+ if (r < 0)
+ return r;
+
+ r = pidfd_verify_pid(pidfd, pid);
+ if (r < 0)
+ return r;
+
+ *ret_unit = TAKE_PTR(unit);
+
+ return 0;
+}
+
+_public_ int sd_pidfd_get_machine_name(int pidfd, char **ret_name) {
+ _cleanup_free_ char *name = NULL;
+ pid_t pid;
+ int r;
+
+ assert_return(pidfd >= 0, -EBADF);
+ assert_return(ret_name, -EINVAL);
+
+ r = pidfd_get_pid(pidfd, &pid);
+ if (r < 0)
+ return r;
+
+ r = sd_pid_get_machine_name(pid, &name);
+ if (r < 0)
+ return r;
+
+ r = pidfd_verify_pid(pidfd, pid);
+ if (r < 0)
+ return r;
+
+ *ret_name = TAKE_PTR(name);
+
+ return 0;
+}
+
+_public_ int sd_pidfd_get_slice(int pidfd, char **ret_slice) {
+ _cleanup_free_ char *slice = NULL;
+ pid_t pid;
+ int r;
+
+ assert_return(pidfd >= 0, -EBADF);
+ assert_return(ret_slice, -EINVAL);
+
+ r = pidfd_get_pid(pidfd, &pid);
+ if (r < 0)
+ return r;
+
+ r = sd_pid_get_slice(pid, &slice);
+ if (r < 0)
+ return r;
+
+ r = pidfd_verify_pid(pidfd, pid);
+ if (r < 0)
+ return r;
+
+ *ret_slice = TAKE_PTR(slice);
+
+ return 0;
+}
+
+_public_ int sd_pidfd_get_user_slice(int pidfd, char **ret_slice) {
+ _cleanup_free_ char *slice = NULL;
+ pid_t pid;
+ int r;
+
+ assert_return(pidfd >= 0, -EBADF);
+ assert_return(ret_slice, -EINVAL);
+
+ r = pidfd_get_pid(pidfd, &pid);
+ if (r < 0)
+ return r;
+
+ r = sd_pid_get_user_slice(pid, &slice);
+ if (r < 0)
+ return r;
+
+ r = pidfd_verify_pid(pidfd, pid);
+ if (r < 0)
+ return r;
+
+ *ret_slice = TAKE_PTR(slice);
+
+ return 0;
+}
+
+_public_ int sd_pidfd_get_owner_uid(int pidfd, uid_t *ret_uid) {
+ uid_t uid;
+ pid_t pid;
+ int r;
+
+ assert_return(pidfd >= 0, -EINVAL);
+ assert_return(ret_uid, -EINVAL);
+
+ r = pidfd_get_pid(pidfd, &pid);
+ if (r < 0)
+ return r;
+
+ r = sd_pid_get_owner_uid(pid, &uid);
+ if (r < 0)
+ return r;
+
+ r = pidfd_verify_pid(pidfd, pid);
+ if (r < 0)
+ return r;
+
+ *ret_uid = uid;
+
+ return 0;
+}
+
+_public_ int sd_pidfd_get_cgroup(int pidfd, char **ret_cgroup) {
+ _cleanup_free_ char *cgroup = NULL;
+ pid_t pid;
+ int r;
+
+ assert_return(pidfd >= 0, -EBADF);
+ assert_return(ret_cgroup, -EINVAL);
+
+ r = pidfd_get_pid(pidfd, &pid);
+ if (r < 0)
+ return r;
+
+ r = sd_pid_get_cgroup(pid, &cgroup);
+ if (r < 0)
+ return r;
+
+ r = pidfd_verify_pid(pidfd, pid);
+ if (r < 0)
+ return r;
+
+ *ret_cgroup = TAKE_PTR(cgroup);
+
+ return 0;
+}
+
+_public_ int sd_peer_get_session(int fd, char **session) {
+ struct ucred ucred = UCRED_INVALID;
+ int r;
+
+ assert_return(fd >= 0, -EBADF);
+ assert_return(session, -EINVAL);
+
+ r = getpeercred(fd, &ucred);
+ if (r < 0)
+ return r;
+
+ return cg_pid_get_session(ucred.pid, session);
+}
+
+_public_ int sd_peer_get_owner_uid(int fd, uid_t *uid) {
+ struct ucred ucred;
+ int r;
+
+ assert_return(fd >= 0, -EBADF);
+ assert_return(uid, -EINVAL);
+
+ r = getpeercred(fd, &ucred);
+ if (r < 0)
+ return r;
+
+ return cg_pid_get_owner_uid(ucred.pid, uid);
+}
+
+_public_ int sd_peer_get_unit(int fd, char **unit) {
+ struct ucred ucred;
+ int r;
+
+ assert_return(fd >= 0, -EBADF);
+ assert_return(unit, -EINVAL);
+
+ r = getpeercred(fd, &ucred);
+ if (r < 0)
+ return r;
+
+ return cg_pid_get_unit(ucred.pid, unit);
+}
+
+_public_ int sd_peer_get_user_unit(int fd, char **unit) {
+ struct ucred ucred;
+ int r;
+
+ assert_return(fd >= 0, -EBADF);
+ assert_return(unit, -EINVAL);
+
+ r = getpeercred(fd, &ucred);
+ if (r < 0)
+ return r;
+
+ return cg_pid_get_user_unit(ucred.pid, unit);
+}
+
+_public_ int sd_peer_get_machine_name(int fd, char **machine) {
+ struct ucred ucred;
+ int r;
+
+ assert_return(fd >= 0, -EBADF);
+ assert_return(machine, -EINVAL);
+
+ r = getpeercred(fd, &ucred);
+ if (r < 0)
+ return r;
+
+ return cg_pid_get_machine_name(ucred.pid, machine);
+}
+
+_public_ int sd_peer_get_slice(int fd, char **slice) {
+ struct ucred ucred;
+ int r;
+
+ assert_return(fd >= 0, -EBADF);
+ assert_return(slice, -EINVAL);
+
+ r = getpeercred(fd, &ucred);
+ if (r < 0)
+ return r;
+
+ return cg_pid_get_slice(ucred.pid, slice);
+}
+
+_public_ int sd_peer_get_user_slice(int fd, char **slice) {
+ struct ucred ucred;
+ int r;
+
+ assert_return(fd >= 0, -EBADF);
+ assert_return(slice, -EINVAL);
+
+ r = getpeercred(fd, &ucred);
+ if (r < 0)
+ return r;
+
+ return cg_pid_get_user_slice(ucred.pid, slice);
+}
+
+_public_ int sd_peer_get_cgroup(int fd, char **cgroup) {
+ struct ucred ucred;
+ int r;
+
+ assert_return(fd >= 0, -EBADF);
+ assert_return(cgroup, -EINVAL);
+
+ r = getpeercred(fd, &ucred);
+ if (r < 0)
+ return r;
+
+ return sd_pid_get_cgroup(ucred.pid, cgroup);
+}
+
+static int file_of_uid(uid_t uid, char **p) {
+
+ assert_return(uid_is_valid(uid), -EINVAL);
+ assert(p);
+
+ if (asprintf(p, "/run/systemd/users/" UID_FMT, uid) < 0)
+ return -ENOMEM;
+
+ return 0;
+}
+
+_public_ int sd_uid_get_state(uid_t uid, char**state) {
+ _cleanup_free_ char *p = NULL, *s = NULL;
+ int r;
+
+ assert_return(state, -EINVAL);
+
+ r = file_of_uid(uid, &p);
+ if (r < 0)
+ return r;
+
+ r = parse_env_file(NULL, p, "STATE", &s);
+ if (r == -ENOENT)
+ r = free_and_strdup(&s, "offline");
+ if (r < 0)
+ return r;
+ if (isempty(s))
+ return -EIO;
+
+ *state = TAKE_PTR(s);
+ return 0;
+}
+
+_public_ int sd_uid_get_display(uid_t uid, char **session) {
+ _cleanup_free_ char *p = NULL, *s = NULL;
+ int r;
+
+ assert_return(session, -EINVAL);
+
+ r = file_of_uid(uid, &p);
+ if (r < 0)
+ return r;
+
+ r = parse_env_file(NULL, p, "DISPLAY", &s);
+ if (r == -ENOENT)
+ return -ENODATA;
+ if (r < 0)
+ return r;
+ if (isempty(s))
+ return -ENODATA;
+
+ *session = TAKE_PTR(s);
+
+ return 0;
+}
+
+_public_ int sd_uid_get_login_time(uid_t uid, uint64_t *usec) {
+ _cleanup_free_ char *p = NULL, *s = NULL, *rt = NULL;
+ usec_t t;
+ int r;
+
+ assert_return(usec, -EINVAL);
+
+ r = file_of_uid(uid, &p);
+ if (r < 0)
+ return r;
+
+ r = parse_env_file(NULL, p, "STATE", &s, "REALTIME", &rt);
+ if (r == -ENOENT)
+ return -ENXIO;
+ if (r < 0)
+ return r;
+ if (isempty(s) || isempty(rt))
+ return -EIO;
+
+ if (!STR_IN_SET(s, "active", "online"))
+ return -ENXIO;
+
+ r = safe_atou64(rt, &t);
+ if (r < 0)
+ return r;
+
+ *usec = t;
+ return 0;
+}
+
+static int file_of_seat(const char *seat, char **_p) {
+ char *p;
+ int r;
+
+ assert(_p);
+
+ if (seat) {
+ if (!filename_is_valid(seat))
+ return -EINVAL;
+
+ p = path_join("/run/systemd/seats", seat);
+ } else {
+ _cleanup_free_ char *buf = NULL;
+
+ r = sd_session_get_seat(NULL, &buf);
+ if (r < 0)
+ return r;
+
+ p = path_join("/run/systemd/seats", buf);
+ }
+ if (!p)
+ return -ENOMEM;
+
+ *_p = TAKE_PTR(p);
+ return 0;
+}
+
+_public_ int sd_uid_is_on_seat(uid_t uid, int require_active, const char *seat) {
+ _cleanup_free_ char *filename = NULL, *content = NULL;
+ int r;
+
+ assert_return(uid_is_valid(uid), -EINVAL);
+
+ r = file_of_seat(seat, &filename);
+ if (r < 0)
+ return r;
+
+ r = parse_env_file(NULL, filename,
+ require_active ? "ACTIVE_UID" : "UIDS",
+ &content);
+ if (r == -ENOENT)
+ return 0;
+ if (r < 0)
+ return r;
+ if (isempty(content))
+ return 0;
+
+ char t[DECIMAL_STR_MAX(uid_t)];
+ xsprintf(t, UID_FMT, uid);
+
+ return string_contains_word(content, NULL, t);
+}
+
+static int uid_get_array(uid_t uid, const char *variable, char ***array) {
+ _cleanup_free_ char *p = NULL, *s = NULL;
+ char **a;
+ int r;
+
+ assert(variable);
+
+ r = file_of_uid(uid, &p);
+ if (r < 0)
+ return r;
+
+ r = parse_env_file(NULL, p, variable, &s);
+ if (r == -ENOENT || (r >= 0 && isempty(s))) {
+ if (array)
+ *array = NULL;
+ return 0;
+ }
+ if (r < 0)
+ return r;
+
+ a = strv_split(s, NULL);
+ if (!a)
+ return -ENOMEM;
+
+ strv_uniq(a);
+ r = (int) strv_length(a);
+
+ if (array)
+ *array = a;
+ else
+ strv_free(a);
+
+ return r;
+}
+
+_public_ int sd_uid_get_sessions(uid_t uid, int require_active, char ***sessions) {
+ return uid_get_array(
+ uid,
+ require_active == 0 ? "ONLINE_SESSIONS" :
+ require_active > 0 ? "ACTIVE_SESSIONS" :
+ "SESSIONS",
+ sessions);
+}
+
+_public_ int sd_uid_get_seats(uid_t uid, int require_active, char ***seats) {
+ return uid_get_array(
+ uid,
+ require_active == 0 ? "ONLINE_SEATS" :
+ require_active > 0 ? "ACTIVE_SEATS" :
+ "SEATS",
+ seats);
+}
+
+static int file_of_session(const char *session, char **_p) {
+ char *p;
+ int r;
+
+ assert(_p);
+
+ if (session) {
+ if (!session_id_valid(session))
+ return -EINVAL;
+
+ p = path_join("/run/systemd/sessions", session);
+ } else {
+ _cleanup_free_ char *buf = NULL;
+
+ r = sd_pid_get_session(0, &buf);
+ if (r < 0)
+ return r;
+
+ p = path_join("/run/systemd/sessions", buf);
+ }
+
+ if (!p)
+ return -ENOMEM;
+
+ *_p = p;
+ return 0;
+}
+
+_public_ int sd_session_is_active(const char *session) {
+ _cleanup_free_ char *p = NULL, *s = NULL;
+ int r;
+
+ r = file_of_session(session, &p);
+ if (r < 0)
+ return r;
+
+ r = parse_env_file(NULL, p, "ACTIVE", &s);
+ if (r == -ENOENT)
+ return -ENXIO;
+ if (r < 0)
+ return r;
+ if (isempty(s))
+ return -EIO;
+
+ return parse_boolean(s);
+}
+
+_public_ int sd_session_is_remote(const char *session) {
+ _cleanup_free_ char *p = NULL, *s = NULL;
+ int r;
+
+ r = file_of_session(session, &p);
+ if (r < 0)
+ return r;
+
+ r = parse_env_file(NULL, p, "REMOTE", &s);
+ if (r == -ENOENT)
+ return -ENXIO;
+ if (r < 0)
+ return r;
+ if (isempty(s))
+ return -ENODATA;
+
+ return parse_boolean(s);
+}
+
+_public_ int sd_session_get_state(const char *session, char **state) {
+ _cleanup_free_ char *p = NULL, *s = NULL;
+ int r;
+
+ assert_return(state, -EINVAL);
+
+ r = file_of_session(session, &p);
+ if (r < 0)
+ return r;
+
+ r = parse_env_file(NULL, p, "STATE", &s);
+ if (r == -ENOENT)
+ return -ENXIO;
+ if (r < 0)
+ return r;
+ if (isempty(s))
+ return -EIO;
+
+ *state = TAKE_PTR(s);
+
+ return 0;
+}
+
+_public_ int sd_session_get_uid(const char *session, uid_t *uid) {
+ int r;
+ _cleanup_free_ char *p = NULL, *s = NULL;
+
+ assert_return(uid, -EINVAL);
+
+ r = file_of_session(session, &p);
+ if (r < 0)
+ return r;
+
+ r = parse_env_file(NULL, p, "UID", &s);
+ if (r == -ENOENT)
+ return -ENXIO;
+ if (r < 0)
+ return r;
+ if (isempty(s))
+ return -EIO;
+
+ return parse_uid(s, uid);
+}
+
+static int session_get_string(const char *session, const char *field, char **value) {
+ _cleanup_free_ char *p = NULL, *s = NULL;
+ int r;
+
+ assert_return(value, -EINVAL);
+ assert(field);
+
+ r = file_of_session(session, &p);
+ if (r < 0)
+ return r;
+
+ r = parse_env_file(NULL, p, field, &s);
+ if (r == -ENOENT)
+ return -ENXIO;
+ if (r < 0)
+ return r;
+ if (isempty(s))
+ return -ENODATA;
+
+ *value = TAKE_PTR(s);
+ return 0;
+}
+
+_public_ int sd_session_get_username(const char *session, char **username) {
+ return session_get_string(session, "USER", username);
+}
+
+_public_ int sd_session_get_seat(const char *session, char **seat) {
+ return session_get_string(session, "SEAT", seat);
+}
+
+_public_ int sd_session_get_start_time(const char *session, uint64_t *usec) {
+ _cleanup_free_ char *p = NULL, *s = NULL;
+ usec_t t;
+ int r;
+
+ assert_return(usec, -EINVAL);
+
+ r = file_of_session(session, &p);
+ if (r < 0)
+ return r;
+
+ r = parse_env_file(NULL, p, "REALTIME", &s);
+ if (r == -ENOENT)
+ return -ENXIO;
+ if (r < 0)
+ return r;
+ if (isempty(s))
+ return -EIO;
+
+ r = safe_atou64(s, &t);
+ if (r < 0)
+ return r;
+
+ *usec = t;
+ return 0;
+}
+
+_public_ int sd_session_get_tty(const char *session, char **tty) {
+ return session_get_string(session, "TTY", tty);
+}
+
+_public_ int sd_session_get_vt(const char *session, unsigned *vtnr) {
+ _cleanup_free_ char *vtnr_string = NULL;
+ unsigned u;
+ int r;
+
+ assert_return(vtnr, -EINVAL);
+
+ r = session_get_string(session, "VTNR", &vtnr_string);
+ if (r < 0)
+ return r;
+
+ r = safe_atou(vtnr_string, &u);
+ if (r < 0)
+ return r;
+
+ *vtnr = u;
+ return 0;
+}
+
+_public_ int sd_session_get_service(const char *session, char **service) {
+ return session_get_string(session, "SERVICE", service);
+}
+
+_public_ int sd_session_get_type(const char *session, char **type) {
+ return session_get_string(session, "TYPE", type);
+}
+
+_public_ int sd_session_get_class(const char *session, char **class) {
+ return session_get_string(session, "CLASS", class);
+}
+
+_public_ int sd_session_get_desktop(const char *session, char **desktop) {
+ _cleanup_free_ char *escaped = NULL;
+ int r;
+ ssize_t l;
+
+ assert_return(desktop, -EINVAL);
+
+ r = session_get_string(session, "DESKTOP", &escaped);
+ if (r < 0)
+ return r;
+
+ l = cunescape(escaped, 0, desktop);
+ if (l < 0)
+ return l;
+ return 0;
+}
+
+_public_ int sd_session_get_display(const char *session, char **display) {
+ return session_get_string(session, "DISPLAY", display);
+}
+
+_public_ int sd_session_get_remote_user(const char *session, char **remote_user) {
+ return session_get_string(session, "REMOTE_USER", remote_user);
+}
+
+_public_ int sd_session_get_remote_host(const char *session, char **remote_host) {
+ return session_get_string(session, "REMOTE_HOST", remote_host);
+}
+
+_public_ int sd_session_get_leader(const char *session, pid_t *leader) {
+ _cleanup_free_ char *leader_string = NULL;
+ pid_t pid;
+ int r;
+
+ assert_return(leader, -EINVAL);
+
+ r = session_get_string(session, "LEADER", &leader_string);
+ if (r < 0)
+ return r;
+
+ r = parse_pid(leader_string, &pid);
+ if (r < 0)
+ return r;
+
+ *leader = pid;
+ return 0;
+}
+
+_public_ int sd_seat_get_active(const char *seat, char **session, uid_t *uid) {
+ _cleanup_free_ char *p = NULL, *s = NULL, *t = NULL;
+ int r;
+
+ assert_return(session || uid, -EINVAL);
+
+ r = file_of_seat(seat, &p);
+ if (r < 0)
+ return r;
+
+ r = parse_env_file(NULL, p,
+ "ACTIVE", &s,
+ "ACTIVE_UID", &t);
+ if (r == -ENOENT)
+ return -ENXIO;
+ if (r < 0)
+ return r;
+
+ if (session && !s)
+ return -ENODATA;
+
+ if (uid && !t)
+ return -ENODATA;
+
+ if (uid && t) {
+ r = parse_uid(t, uid);
+ if (r < 0)
+ return r;
+ }
+
+ if (session && s)
+ *session = TAKE_PTR(s);
+
+ return 0;
+}
+
+_public_ int sd_seat_get_sessions(
+ const char *seat,
+ char ***ret_sessions,
+ uid_t **ret_uids,
+ unsigned *ret_n_uids) {
+
+ _cleanup_free_ char *fname = NULL, *session_line = NULL, *uid_line = NULL;
+ _cleanup_strv_free_ char **sessions = NULL;
+ _cleanup_free_ uid_t *uids = NULL;
+ unsigned n_sessions = 0;
+ int r;
+
+ r = file_of_seat(seat, &fname);
+ if (r < 0)
+ return r;
+
+ r = parse_env_file(NULL, fname,
+ "SESSIONS", &session_line,
+ "UIDS", &uid_line);
+ if (r == -ENOENT)
+ return -ENXIO;
+ if (r < 0)
+ return r;
+
+ if (session_line) {
+ sessions = strv_split(session_line, NULL);
+ if (!sessions)
+ return -ENOMEM;
+
+ n_sessions = strv_length(sessions);
+ };
+
+ if (ret_uids && uid_line) {
+ uids = new(uid_t, n_sessions);
+ if (!uids)
+ return -ENOMEM;
+
+ size_t n = 0;
+ for (const char *p = uid_line;;) {
+ _cleanup_free_ char *word = NULL;
+
+ r = extract_first_word(&p, &word, NULL, 0);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ r = parse_uid(word, &uids[n++]);
+ if (r < 0)
+ return r;
+ }
+
+ if (n != n_sessions)
+ return -EUCLEAN;
+ }
+
+ if (ret_sessions)
+ *ret_sessions = TAKE_PTR(sessions);
+ if (ret_uids)
+ *ret_uids = TAKE_PTR(uids);
+ if (ret_n_uids)
+ *ret_n_uids = n_sessions;
+
+ return n_sessions;
+}
+
+static int seat_get_can(const char *seat, const char *variable) {
+ _cleanup_free_ char *p = NULL, *s = NULL;
+ int r;
+
+ assert(variable);
+
+ r = file_of_seat(seat, &p);
+ if (r < 0)
+ return r;
+
+ r = parse_env_file(NULL, p,
+ variable, &s);
+ if (r == -ENOENT)
+ return -ENXIO;
+ if (r < 0)
+ return r;
+ if (isempty(s))
+ return -ENODATA;
+
+ return parse_boolean(s);
+}
+
+_public_ int sd_seat_can_multi_session(const char *seat) {
+ return true;
+}
+
+_public_ int sd_seat_can_tty(const char *seat) {
+ return seat_get_can(seat, "CAN_TTY");
+}
+
+_public_ int sd_seat_can_graphical(const char *seat) {
+ return seat_get_can(seat, "CAN_GRAPHICAL");
+}
+
+_public_ int sd_get_seats(char ***seats) {
+ int r;
+
+ r = get_files_in_directory("/run/systemd/seats/", seats);
+ if (r == -ENOENT) {
+ if (seats)
+ *seats = NULL;
+ return 0;
+ }
+ return r;
+}
+
+_public_ int sd_get_sessions(char ***sessions) {
+ int r;
+
+ r = get_files_in_directory("/run/systemd/sessions/", sessions);
+ if (r == -ENOENT) {
+ if (sessions)
+ *sessions = NULL;
+ return 0;
+ }
+ return r;
+}
+
+_public_ int sd_get_uids(uid_t **users) {
+ _cleanup_closedir_ DIR *d = NULL;
+ int r = 0;
+ unsigned n = 0;
+ _cleanup_free_ uid_t *l = NULL;
+
+ d = opendir("/run/systemd/users/");
+ if (!d) {
+ if (errno == ENOENT) {
+ if (users)
+ *users = NULL;
+ return 0;
+ }
+ return -errno;
+ }
+
+ FOREACH_DIRENT_ALL(de, d, return -errno) {
+ int k;
+ uid_t uid;
+
+ if (!dirent_is_file(de))
+ continue;
+
+ k = parse_uid(de->d_name, &uid);
+ if (k < 0)
+ continue;
+
+ if (users) {
+ if ((unsigned) r >= n) {
+ uid_t *t;
+
+ n = MAX(16, 2*r);
+ t = reallocarray(l, n, sizeof(uid_t));
+ if (!t)
+ return -ENOMEM;
+
+ l = t;
+ }
+
+ assert((unsigned) r < n);
+ l[r++] = uid;
+ } else
+ r++;
+ }
+
+ if (users)
+ *users = TAKE_PTR(l);
+
+ return r;
+}
+
+_public_ int sd_get_machine_names(char ***machines) {
+ _cleanup_strv_free_ char **l = NULL;
+ char **a, **b;
+ int r;
+
+ r = get_files_in_directory("/run/systemd/machines/", &l);
+ if (r == -ENOENT) {
+ if (machines)
+ *machines = NULL;
+ return 0;
+ }
+ if (r < 0)
+ return r;
+
+ if (l) {
+ r = 0;
+
+ /* Filter out the unit: symlinks */
+ for (a = b = l; *a; a++) {
+ if (startswith(*a, "unit:") || !hostname_is_valid(*a, 0))
+ free(*a);
+ else {
+ *b = *a;
+ b++;
+ r++;
+ }
+ }
+
+ *b = NULL;
+ }
+
+ if (machines)
+ *machines = TAKE_PTR(l);
+
+ return r;
+}
+
+_public_ int sd_machine_get_class(const char *machine, char **class) {
+ _cleanup_free_ char *c = NULL;
+ const char *p;
+ int r;
+
+ assert_return(class, -EINVAL);
+
+ if (streq(machine, ".host")) {
+ c = strdup("host");
+ if (!c)
+ return -ENOMEM;
+ } else {
+ if (!hostname_is_valid(machine, 0))
+ return -EINVAL;
+
+ p = strjoina("/run/systemd/machines/", machine);
+ r = parse_env_file(NULL, p, "CLASS", &c);
+ if (r == -ENOENT)
+ return -ENXIO;
+ if (r < 0)
+ return r;
+ if (!c)
+ return -EIO;
+ }
+
+ *class = TAKE_PTR(c);
+ return 0;
+}
+
+_public_ int sd_machine_get_ifindices(const char *machine, int **ret_ifindices) {
+ _cleanup_free_ char *netif_line = NULL;
+ const char *p;
+ int r;
+
+ assert_return(hostname_is_valid(machine, 0), -EINVAL);
+
+ p = strjoina("/run/systemd/machines/", machine);
+ r = parse_env_file(NULL, p, "NETIF", &netif_line);
+ if (r == -ENOENT)
+ return -ENXIO;
+ if (r < 0)
+ return r;
+ if (!netif_line) {
+ *ret_ifindices = NULL;
+ return 0;
+ }
+
+ _cleanup_strv_free_ char **tt = strv_split(netif_line, NULL);
+ if (!tt)
+ return -ENOMEM;
+
+ _cleanup_free_ int *ifindices = NULL;
+ if (ret_ifindices) {
+ ifindices = new(int, strv_length(tt));
+ if (!ifindices)
+ return -ENOMEM;
+ }
+
+ size_t n = 0;
+ for (size_t i = 0; tt[i]; i++) {
+ int ind;
+
+ ind = parse_ifindex(tt[i]);
+ if (ind < 0)
+ /* Return -EUCLEAN to distinguish from -EINVAL for invalid args */
+ return ind == -EINVAL ? -EUCLEAN : ind;
+
+ if (ret_ifindices)
+ ifindices[n] = ind;
+ n++;
+ }
+
+ if (ret_ifindices)
+ *ret_ifindices = TAKE_PTR(ifindices);
+
+ return n;
+}
+
+static int MONITOR_TO_FD(sd_login_monitor *m) {
+ return (int) (unsigned long) m - 1;
+}
+
+static sd_login_monitor* FD_TO_MONITOR(int fd) {
+ return (sd_login_monitor*) (unsigned long) (fd + 1);
+}
+
+_public_ int sd_login_monitor_new(const char *category, sd_login_monitor **m) {
+ _cleanup_close_ int fd = -EBADF;
+ bool good = false;
+ int k;
+
+ assert_return(m, -EINVAL);
+
+ fd = inotify_init1(IN_NONBLOCK|IN_CLOEXEC);
+ if (fd < 0)
+ return -errno;
+
+ if (!category || streq(category, "seat")) {
+ k = inotify_add_watch(fd, "/run/systemd/seats/", IN_MOVED_TO|IN_DELETE);
+ if (k < 0)
+ return -errno;
+
+ good = true;
+ }
+
+ if (!category || streq(category, "session")) {
+ k = inotify_add_watch(fd, "/run/systemd/sessions/", IN_MOVED_TO|IN_DELETE);
+ if (k < 0)
+ return -errno;
+
+ good = true;
+ }
+
+ if (!category || streq(category, "uid")) {
+ k = inotify_add_watch(fd, "/run/systemd/users/", IN_MOVED_TO|IN_DELETE);
+ if (k < 0)
+ return -errno;
+
+ good = true;
+ }
+
+ if (!category || streq(category, "machine")) {
+ k = inotify_add_watch(fd, "/run/systemd/machines/", IN_MOVED_TO|IN_DELETE);
+ if (k < 0)
+ return -errno;
+
+ good = true;
+ }
+
+ if (!good)
+ return -EINVAL;
+
+ *m = FD_TO_MONITOR(TAKE_FD(fd));
+ return 0;
+}
+
+_public_ sd_login_monitor* sd_login_monitor_unref(sd_login_monitor *m) {
+ if (m)
+ (void) close_nointr(MONITOR_TO_FD(m));
+
+ return NULL;
+}
+
+_public_ int sd_login_monitor_flush(sd_login_monitor *m) {
+ int r;
+
+ assert_return(m, -EINVAL);
+
+ r = flush_fd(MONITOR_TO_FD(m));
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+_public_ int sd_login_monitor_get_fd(sd_login_monitor *m) {
+
+ assert_return(m, -EINVAL);
+
+ return MONITOR_TO_FD(m);
+}
+
+_public_ int sd_login_monitor_get_events(sd_login_monitor *m) {
+
+ assert_return(m, -EINVAL);
+
+ /* For now we will only return POLLIN here, since we don't
+ * need anything else ever for inotify. However, let's have
+ * this API to keep our options open should we later on need
+ * it. */
+ return POLLIN;
+}
+
+_public_ int sd_login_monitor_get_timeout(sd_login_monitor *m, uint64_t *timeout_usec) {
+
+ assert_return(m, -EINVAL);
+ assert_return(timeout_usec, -EINVAL);
+
+ /* For now we will only return UINT64_MAX, since we don't
+ * need any timeout. However, let's have this API to keep our
+ * options open should we later on need it. */
+ *timeout_usec = UINT64_MAX;
+ return 0;
+}
diff --git a/src/libsystemd/sd-login/test-login.c b/src/libsystemd/sd-login/test-login.c
new file mode 100644
index 0000000..819f86f
--- /dev/null
+++ b/src/libsystemd/sd-login/test-login.c
@@ -0,0 +1,334 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <poll.h>
+
+#include "sd-login.h"
+
+#include "alloc-util.h"
+#include "errno-list.h"
+#include "fd-util.h"
+#include "format-util.h"
+#include "log.h"
+#include "missing_syscall.h"
+#include "process-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "tests.h"
+#include "time-util.h"
+#include "user-util.h"
+
+static char* format_uids(char **buf, uid_t* uids, int count) {
+ int pos = 0, inc;
+ size_t size = (DECIMAL_STR_MAX(uid_t) + 1) * count + 1;
+
+ assert_se(*buf = malloc(size));
+
+ for (int k = 0; k < count; k++) {
+ sprintf(*buf + pos, "%s"UID_FMT"%n", k > 0 ? " " : "", uids[k], &inc);
+ pos += inc;
+ }
+
+ assert_se(pos < (ssize_t)size);
+ (*buf)[pos] = '\0';
+
+ return *buf;
+}
+
+static const char *e(int r) {
+ return r == 0 ? "OK" : errno_to_name(r);
+}
+
+TEST(login) {
+ _cleanup_close_pair_ int pair[2] = EBADF_PAIR;
+ _cleanup_free_ char *pp = NULL, *qq = NULL,
+ *display_session = NULL, *cgroup = NULL,
+ *display = NULL, *remote_user = NULL, *remote_host = NULL,
+ *type = NULL, *class = NULL, *state = NULL, *state2 = NULL,
+ *seat = NULL, *session = NULL,
+ *unit = NULL, *user_unit = NULL, *slice = NULL;
+ _cleanup_close_ int pidfd = -EBADF;
+ int r;
+ uid_t u, u2 = UID_INVALID;
+ char *t, **seats = NULL, **sessions = NULL;
+
+ r = sd_pid_get_unit(0, &unit);
+ log_info("sd_pid_get_unit(0, …) → %s / \"%s\"", e(r), strnull(unit));
+ assert_se(IN_SET(r, 0, -ENODATA));
+
+ r = sd_pid_get_user_unit(0, &user_unit);
+ log_info("sd_pid_get_user_unit(0, …) → %s / \"%s\"", e(r), strnull(user_unit));
+ assert_se(IN_SET(r, 0, -ENODATA));
+
+ r = sd_pid_get_slice(0, &slice);
+ log_info("sd_pid_get_slice(0, …) → %s / \"%s\"", e(r), strnull(slice));
+ assert_se(IN_SET(r, 0, -ENODATA));
+
+ r = sd_pid_get_owner_uid(0, &u2);
+ log_info("sd_pid_get_owner_uid(0, …) → %s / "UID_FMT, e(r), u2);
+ assert_se(IN_SET(r, 0, -ENODATA));
+
+ r = sd_pid_get_session(0, &session);
+ log_info("sd_pid_get_session(0, …) → %s / \"%s\"", e(r), strnull(session));
+
+ r = sd_pid_get_cgroup(0, &cgroup);
+ log_info("sd_pid_get_cgroup(0, …) → %s / \"%s\"", e(r), strnull(cgroup));
+ assert_se(IN_SET(r, 0, -ENOMEDIUM));
+
+ pidfd = pidfd_open(getpid_cached(), 0);
+ if (pidfd >= 0) {
+ _cleanup_free_ char *cgroup2 = NULL, *session2 = NULL,
+ *unit2 = NULL, *user_unit2 = NULL, *slice2 = NULL;
+
+ r = sd_pidfd_get_unit(pidfd, &unit2);
+ log_info("sd_pidfd_get_unit(pidfd, …) → %s / \"%s\"", e(r), strnull(unit2));
+ assert_se(IN_SET(r, 0, -ENODATA));
+
+ r = sd_pidfd_get_user_unit(pidfd, &user_unit2);
+ log_info("sd_pidfd_get_user_unit(pidfd, …) → %s / \"%s\"", e(r), strnull(user_unit2));
+ assert_se(IN_SET(r, 0, -ENODATA));
+
+ r = sd_pidfd_get_slice(pidfd, &slice2);
+ log_info("sd_pidfd_get_slice(pidfd, …) → %s / \"%s\"", e(r), strnull(slice2));
+ assert_se(IN_SET(r, 0, -ENODATA));
+
+ r = sd_pidfd_get_owner_uid(pidfd, &u2);
+ log_info("sd_pidfd_get_owner_uid(pidfd, …) → %s / "UID_FMT, e(r), u2);
+ assert_se(IN_SET(r, 0, -ENODATA));
+
+ r = sd_pidfd_get_session(pidfd, &session2);
+ log_info("sd_pidfd_get_session(pidfd, …) → %s / \"%s\"", e(r), strnull(session2));
+
+ r = sd_pidfd_get_cgroup(pidfd, &cgroup2);
+ log_info("sd_pidfd_get_cgroup(pidfd, …) → %s / \"%s\"", e(r), strnull(cgroup2));
+ assert_se(IN_SET(r, 0, -ENOMEDIUM));
+ }
+
+ r = sd_uid_get_display(u2, &display_session);
+ log_info("sd_uid_get_display("UID_FMT", …) → %s / \"%s\"", u2, e(r), strnull(display_session));
+ if (u2 == UID_INVALID)
+ assert_se(r == -EINVAL);
+ else
+ assert_se(IN_SET(r, 0, -ENODATA));
+
+ assert_se(socketpair(AF_UNIX, SOCK_STREAM, 0, pair) == 0);
+ sd_peer_get_session(pair[0], &pp);
+ sd_peer_get_session(pair[1], &qq);
+ assert_se(streq_ptr(pp, qq));
+
+ r = sd_uid_get_sessions(u2, false, &sessions);
+ assert_se(t = strv_join(sessions, " "));
+ log_info("sd_uid_get_sessions("UID_FMT", …) → %s \"%s\"", u2, e(r), t);
+ if (u2 == UID_INVALID)
+ assert_se(r == -EINVAL);
+ else {
+ assert_se(r >= 0);
+ assert_se(r == (int) strv_length(sessions));
+ }
+ sessions = strv_free(sessions);
+ free(t);
+
+ assert_se(r == sd_uid_get_sessions(u2, false, NULL));
+
+ r = sd_uid_get_seats(u2, false, &seats);
+ assert_se(t = strv_join(seats, " "));
+ log_info("sd_uid_get_seats("UID_FMT", …) → %s \"%s\"", u2, e(r), t);
+ if (u2 == UID_INVALID)
+ assert_se(r == -EINVAL);
+ else {
+ assert_se(r >= 0);
+ assert_se(r == (int) strv_length(seats));
+ }
+ seats = strv_free(seats);
+ free(t);
+
+ assert_se(r == sd_uid_get_seats(u2, false, NULL));
+
+ if (session) {
+ r = sd_session_is_active(session);
+ if (r == -ENXIO)
+ log_notice("sd_session_is_active() failed with ENXIO, it seems logind is not running.");
+ else {
+ /* All those tests will fail with ENXIO, so let's skip them. */
+
+ assert_se(r >= 0);
+ log_info("sd_session_is_active(\"%s\") → %s", session, yes_no(r));
+
+ r = sd_session_is_remote(session);
+ assert_se(r >= 0);
+ log_info("sd_session_is_remote(\"%s\") → %s", session, yes_no(r));
+
+ r = sd_session_get_state(session, &state);
+ assert_se(r == 0);
+ log_info("sd_session_get_state(\"%s\") → \"%s\"", session, state);
+
+ assert_se(sd_session_get_uid(session, &u) >= 0);
+ log_info("sd_session_get_uid(\"%s\") → "UID_FMT, session, u);
+ assert_se(u == u2);
+
+ assert_se(sd_session_get_type(session, &type) >= 0);
+ log_info("sd_session_get_type(\"%s\") → \"%s\"", session, type);
+
+ assert_se(sd_session_get_class(session, &class) >= 0);
+ log_info("sd_session_get_class(\"%s\") → \"%s\"", session, class);
+
+ r = sd_session_get_display(session, &display);
+ assert_se(IN_SET(r, 0, -ENODATA));
+ log_info("sd_session_get_display(\"%s\") → \"%s\"", session, strna(display));
+
+ r = sd_session_get_remote_user(session, &remote_user);
+ assert_se(IN_SET(r, 0, -ENODATA));
+ log_info("sd_session_get_remote_user(\"%s\") → \"%s\"",
+ session, strna(remote_user));
+
+ r = sd_session_get_remote_host(session, &remote_host);
+ assert_se(IN_SET(r, 0, -ENODATA));
+ log_info("sd_session_get_remote_host(\"%s\") → \"%s\"",
+ session, strna(remote_host));
+
+ r = sd_session_get_seat(session, &seat);
+ if (r >= 0) {
+ assert_se(seat);
+
+ log_info("sd_session_get_seat(\"%s\") → \"%s\"", session, seat);
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
+ r = sd_seat_can_multi_session(seat);
+#pragma GCC diagnostic pop
+ assert_se(r == 1);
+ log_info("sd_session_can_multi_seat(\"%s\") → %s", seat, yes_no(r));
+
+ r = sd_seat_can_tty(seat);
+ assert_se(r >= 0);
+ log_info("sd_session_can_tty(\"%s\") → %s", seat, yes_no(r));
+
+ r = sd_seat_can_graphical(seat);
+ assert_se(r >= 0);
+ log_info("sd_session_can_graphical(\"%s\") → %s", seat, yes_no(r));
+ } else {
+ log_info_errno(r, "sd_session_get_seat(\"%s\"): %m", session);
+ assert_se(r == -ENODATA);
+ }
+
+ assert_se(sd_uid_get_state(u, &state2) == 0);
+ log_info("sd_uid_get_state("UID_FMT", …) → %s", u, state2);
+ }
+ }
+
+ if (seat) {
+ _cleanup_free_ char *session2 = NULL, *buf = NULL;
+ _cleanup_free_ uid_t *uids = NULL;
+ unsigned n;
+
+ assert_se(sd_uid_is_on_seat(u, 0, seat) > 0);
+
+ r = sd_seat_get_active(seat, &session2, &u2);
+ assert_se(r == 0);
+ log_info("sd_seat_get_active(\"%s\", …) → \"%s\", "UID_FMT, seat, session2, u2);
+
+ r = sd_uid_is_on_seat(u, 1, seat);
+ assert_se(IN_SET(r, 0, 1));
+ assert_se(!!r == streq(session, session2));
+
+ r = sd_seat_get_sessions(seat, &sessions, &uids, &n);
+ assert_se(r >= 0);
+ assert_se(r == (int) strv_length(sessions));
+ assert_se(t = strv_join(sessions, " "));
+ strv_free(sessions);
+ log_info("sd_seat_get_sessions(\"%s\", …) → %s, \"%s\", [%u] {%s}",
+ seat, e(r), t, n, format_uids(&buf, uids, n));
+ free(t);
+
+ assert_se(sd_seat_get_sessions(seat, NULL, NULL, NULL) == r);
+ }
+
+ r = sd_get_seats(&seats);
+ assert_se(r >= 0);
+ assert_se(r == (int) strv_length(seats));
+ assert_se(t = strv_join(seats, ", "));
+ strv_free(seats);
+ log_info("sd_get_seats(…) → [%i] \"%s\"", r, t);
+ t = mfree(t);
+
+ assert_se(sd_get_seats(NULL) == r);
+
+ r = sd_seat_get_active(NULL, &t, NULL);
+ assert_se(IN_SET(r, 0, -ENODATA, -ENXIO));
+ log_info("sd_seat_get_active(NULL, …) (active session on current seat) → %s / \"%s\"", e(r), strnull(t));
+ free(t);
+
+ r = sd_get_sessions(&sessions);
+ assert_se(r >= 0);
+ assert_se(r == (int) strv_length(sessions));
+ assert_se(t = strv_join(sessions, ", "));
+ strv_free(sessions);
+ log_info("sd_get_sessions(…) → [%i] \"%s\"", r, t);
+ free(t);
+
+ assert_se(sd_get_sessions(NULL) == r);
+
+ {
+ _cleanup_free_ uid_t *uids = NULL;
+ _cleanup_free_ char *buf = NULL;
+
+ r = sd_get_uids(&uids);
+ assert_se(r >= 0);
+ log_info("sd_get_uids(…) → [%i] {%s}", r, format_uids(&buf, uids, r));
+
+ assert_se(sd_get_uids(NULL) == r);
+ }
+
+ {
+ _cleanup_strv_free_ char **machines = NULL;
+ _cleanup_free_ char *buf = NULL;
+
+ r = sd_get_machine_names(&machines);
+ assert_se(r >= 0);
+ assert_se(r == (int) strv_length(machines));
+ assert_se(buf = strv_join(machines, " "));
+ log_info("sd_get_machines(…) → [%i] \"%s\"", r, buf);
+
+ assert_se(sd_get_machine_names(NULL) == r);
+ }
+}
+
+TEST(monitor) {
+ sd_login_monitor *m = NULL;
+ int r;
+
+ if (!streq_ptr(saved_argv[1], "-m"))
+ return;
+
+ assert_se(sd_login_monitor_new("session", &m) == 0);
+
+ for (unsigned n = 0; n < 5; n++) {
+ struct pollfd pollfd = {};
+ usec_t timeout, nw;
+
+ assert_se((pollfd.fd = sd_login_monitor_get_fd(m)) >= 0);
+ assert_se((pollfd.events = sd_login_monitor_get_events(m)) >= 0);
+
+ assert_se(sd_login_monitor_get_timeout(m, &timeout) >= 0);
+
+ nw = now(CLOCK_MONOTONIC);
+
+ r = poll(&pollfd, 1,
+ timeout == UINT64_MAX ? -1 :
+ timeout > nw ? (int) ((timeout - nw) / 1000) :
+ 0);
+
+ assert_se(r >= 0);
+
+ sd_login_monitor_flush(m);
+ printf("Wake!\n");
+ }
+
+ sd_login_monitor_unref(m);
+}
+
+static int intro(void) {
+ log_info("/* Information printed is from the live system */");
+ return EXIT_SUCCESS;
+}
+
+DEFINE_TEST_MAIN_WITH_INTRO(LOG_INFO, intro);
diff --git a/src/libsystemd/sd-netlink/netlink-genl.c b/src/libsystemd/sd-netlink/netlink-genl.c
new file mode 100644
index 0000000..1dc62e8
--- /dev/null
+++ b/src/libsystemd/sd-netlink/netlink-genl.c
@@ -0,0 +1,488 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <linux/genetlink.h>
+
+#include "sd-netlink.h"
+
+#include "alloc-util.h"
+#include "netlink-genl.h"
+#include "netlink-internal.h"
+#include "netlink-types.h"
+
+typedef struct GenericNetlinkFamily {
+ sd_netlink *genl;
+
+ const NLAPolicySet *policy_set;
+
+ uint16_t id; /* a.k.a nlmsg_type */
+ char *name;
+ uint32_t version;
+ uint32_t additional_header_size;
+ Hashmap *multicast_group_by_name;
+} GenericNetlinkFamily;
+
+static const GenericNetlinkFamily nlctrl_static = {
+ .id = GENL_ID_CTRL,
+ .name = (char*) CTRL_GENL_NAME,
+ .version = 0x01,
+};
+
+static GenericNetlinkFamily *genl_family_free(GenericNetlinkFamily *f) {
+ if (!f)
+ return NULL;
+
+ if (f->genl) {
+ if (f->id > 0)
+ hashmap_remove(f->genl->genl_family_by_id, UINT_TO_PTR(f->id));
+ if (f->name)
+ hashmap_remove(f->genl->genl_family_by_name, f->name);
+ }
+
+ free(f->name);
+ hashmap_free(f->multicast_group_by_name);
+
+ return mfree(f);
+}
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(GenericNetlinkFamily*, genl_family_free);
+
+void genl_clear_family(sd_netlink *nl) {
+ assert(nl);
+
+ nl->genl_family_by_name = hashmap_free_with_destructor(nl->genl_family_by_name, genl_family_free);
+ nl->genl_family_by_id = hashmap_free_with_destructor(nl->genl_family_by_id, genl_family_free);
+}
+
+static int genl_family_new_unsupported(
+ sd_netlink *nl,
+ const char *family_name,
+ const NLAPolicySet *policy_set) {
+
+ _cleanup_(genl_family_freep) GenericNetlinkFamily *f = NULL;
+ int r;
+
+ assert(nl);
+ assert(family_name);
+ assert(policy_set);
+
+ /* Kernel does not support the genl family? To prevent from resolving the family name again,
+ * let's store the family with zero id to indicate that. */
+
+ f = new(GenericNetlinkFamily, 1);
+ if (!f)
+ return -ENOMEM;
+
+ *f = (GenericNetlinkFamily) {
+ .policy_set = policy_set,
+ };
+
+ f->name = strdup(family_name);
+ if (!f->name)
+ return -ENOMEM;
+
+ r = hashmap_ensure_put(&nl->genl_family_by_name, &string_hash_ops, f->name, f);
+ if (r < 0)
+ return r;
+
+ f->genl = nl;
+ TAKE_PTR(f);
+ return 0;
+}
+
+static int genl_family_new(
+ sd_netlink *nl,
+ const char *expected_family_name,
+ const NLAPolicySet *policy_set,
+ sd_netlink_message *message,
+ const GenericNetlinkFamily **ret) {
+
+ _cleanup_(genl_family_freep) GenericNetlinkFamily *f = NULL;
+ const char *family_name;
+ uint8_t cmd;
+ int r;
+
+ assert(nl);
+ assert(expected_family_name);
+ assert(policy_set);
+ assert(message);
+ assert(ret);
+
+ f = new(GenericNetlinkFamily, 1);
+ if (!f)
+ return -ENOMEM;
+
+ *f = (GenericNetlinkFamily) {
+ .policy_set = policy_set,
+ };
+
+ r = sd_genl_message_get_family_name(nl, message, &family_name);
+ if (r < 0)
+ return r;
+
+ if (!streq(family_name, CTRL_GENL_NAME))
+ return -EINVAL;
+
+ r = sd_genl_message_get_command(nl, message, &cmd);
+ if (r < 0)
+ return r;
+
+ if (cmd != CTRL_CMD_NEWFAMILY)
+ return -EINVAL;
+
+ r = sd_netlink_message_read_u16(message, CTRL_ATTR_FAMILY_ID, &f->id);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_message_read_string_strdup(message, CTRL_ATTR_FAMILY_NAME, &f->name);
+ if (r < 0)
+ return r;
+
+ if (!streq(f->name, expected_family_name))
+ return -EINVAL;
+
+ r = sd_netlink_message_read_u32(message, CTRL_ATTR_VERSION, &f->version);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_message_read_u32(message, CTRL_ATTR_HDRSIZE, &f->additional_header_size);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_message_enter_container(message, CTRL_ATTR_MCAST_GROUPS);
+ if (r >= 0) {
+ for (uint16_t i = 0; i < UINT16_MAX; i++) {
+ _cleanup_free_ char *group_name = NULL;
+ uint32_t group_id;
+
+ r = sd_netlink_message_enter_array(message, i + 1);
+ if (r == -ENODATA)
+ break;
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_message_read_u32(message, CTRL_ATTR_MCAST_GRP_ID, &group_id);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_message_read_string_strdup(message, CTRL_ATTR_MCAST_GRP_NAME, &group_name);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_message_exit_container(message);
+ if (r < 0)
+ return r;
+
+ if (group_id == 0) {
+ log_debug("sd-netlink: received multicast group '%s' for generic netlink family '%s' with id == 0, ignoring",
+ group_name, f->name);
+ continue;
+ }
+
+ r = hashmap_ensure_put(&f->multicast_group_by_name, &string_hash_ops_free, group_name, UINT32_TO_PTR(group_id));
+ if (r < 0)
+ return r;
+
+ TAKE_PTR(group_name);
+ }
+
+ r = sd_netlink_message_exit_container(message);
+ if (r < 0)
+ return r;
+ }
+
+ r = hashmap_ensure_put(&nl->genl_family_by_id, NULL, UINT_TO_PTR(f->id), f);
+ if (r < 0)
+ return r;
+
+ r = hashmap_ensure_put(&nl->genl_family_by_name, &string_hash_ops, f->name, f);
+ if (r < 0) {
+ hashmap_remove(nl->genl_family_by_id, UINT_TO_PTR(f->id));
+ return r;
+ }
+
+ f->genl = nl;
+ *ret = TAKE_PTR(f);
+ return 0;
+}
+
+static const NLAPolicySet *genl_family_get_policy_set(const GenericNetlinkFamily *family) {
+ assert(family);
+
+ if (family->policy_set)
+ return family->policy_set;
+
+ return genl_get_policy_set_by_name(family->name);
+}
+
+static int genl_message_new(
+ sd_netlink *nl,
+ const GenericNetlinkFamily *family,
+ uint8_t cmd,
+ sd_netlink_message **ret) {
+
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL;
+ const NLAPolicySet *policy_set;
+ int r;
+
+ assert(nl);
+ assert(nl->protocol == NETLINK_GENERIC);
+ assert(family);
+ assert(ret);
+
+ policy_set = genl_family_get_policy_set(family);
+ if (!policy_set)
+ return -EOPNOTSUPP;
+
+ r = message_new_full(nl, family->id, policy_set,
+ sizeof(struct genlmsghdr) + family->additional_header_size, &m);
+ if (r < 0)
+ return r;
+
+ *(struct genlmsghdr *) NLMSG_DATA(m->hdr) = (struct genlmsghdr) {
+ .cmd = cmd,
+ .version = family->version,
+ };
+
+ *ret = TAKE_PTR(m);
+ return 0;
+}
+
+static int genl_family_get_by_name_internal(
+ sd_netlink *nl,
+ const GenericNetlinkFamily *ctrl,
+ const char *name,
+ const GenericNetlinkFamily **ret) {
+
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *req = NULL, *reply = NULL;
+ const NLAPolicySet *policy_set;
+ int r;
+
+ assert(nl);
+ assert(nl->protocol == NETLINK_GENERIC);
+ assert(ctrl);
+ assert(name);
+ assert(ret);
+
+ policy_set = genl_get_policy_set_by_name(name);
+ if (!policy_set)
+ return -EOPNOTSUPP;
+
+ r = genl_message_new(nl, ctrl, CTRL_CMD_GETFAMILY, &req);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_message_append_string(req, CTRL_ATTR_FAMILY_NAME, name);
+ if (r < 0)
+ return r;
+
+ if (sd_netlink_call(nl, req, 0, &reply) < 0) {
+ (void) genl_family_new_unsupported(nl, name, policy_set);
+ return -EOPNOTSUPP;
+ }
+
+ return genl_family_new(nl, name, policy_set, reply, ret);
+}
+
+static int genl_family_get_by_name(sd_netlink *nl, const char *name, const GenericNetlinkFamily **ret) {
+ const GenericNetlinkFamily *f, *ctrl;
+ int r;
+
+ assert(nl);
+ assert(nl->protocol == NETLINK_GENERIC);
+ assert(name);
+ assert(ret);
+
+ f = hashmap_get(nl->genl_family_by_name, name);
+ if (f) {
+ if (f->id == 0) /* kernel does not support the family. */
+ return -EOPNOTSUPP;
+
+ *ret = f;
+ return 0;
+ }
+
+ if (streq(name, CTRL_GENL_NAME))
+ return genl_family_get_by_name_internal(nl, &nlctrl_static, CTRL_GENL_NAME, ret);
+
+ ctrl = hashmap_get(nl->genl_family_by_name, CTRL_GENL_NAME);
+ if (!ctrl) {
+ r = genl_family_get_by_name_internal(nl, &nlctrl_static, CTRL_GENL_NAME, &ctrl);
+ if (r < 0)
+ return r;
+ }
+
+ return genl_family_get_by_name_internal(nl, ctrl, name, ret);
+}
+
+static int genl_family_get_by_id(sd_netlink *nl, uint16_t id, const GenericNetlinkFamily **ret) {
+ const GenericNetlinkFamily *f;
+
+ assert(nl);
+ assert(nl->protocol == NETLINK_GENERIC);
+ assert(ret);
+
+ f = hashmap_get(nl->genl_family_by_id, UINT_TO_PTR(id));
+ if (f) {
+ *ret = f;
+ return 0;
+ }
+
+ if (id == GENL_ID_CTRL) {
+ *ret = &nlctrl_static;
+ return 0;
+ }
+
+ return -ENOENT;
+}
+
+int genl_get_policy_set_and_header_size(
+ sd_netlink *nl,
+ uint16_t id,
+ const NLAPolicySet **ret_policy_set,
+ size_t *ret_header_size) {
+
+ const GenericNetlinkFamily *f;
+ int r;
+
+ assert(nl);
+ assert(nl->protocol == NETLINK_GENERIC);
+
+ r = genl_family_get_by_id(nl, id, &f);
+ if (r < 0)
+ return r;
+
+ if (ret_policy_set) {
+ const NLAPolicySet *p;
+
+ p = genl_family_get_policy_set(f);
+ if (!p)
+ return -EOPNOTSUPP;
+
+ *ret_policy_set = p;
+ }
+ if (ret_header_size)
+ *ret_header_size = sizeof(struct genlmsghdr) + f->additional_header_size;
+ return 0;
+}
+
+int sd_genl_message_new(sd_netlink *nl, const char *family_name, uint8_t cmd, sd_netlink_message **ret) {
+ const GenericNetlinkFamily *family;
+ int r;
+
+ assert_return(nl, -EINVAL);
+ assert_return(nl->protocol == NETLINK_GENERIC, -EINVAL);
+ assert_return(family_name, -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ r = genl_family_get_by_name(nl, family_name, &family);
+ if (r < 0)
+ return r;
+
+ return genl_message_new(nl, family, cmd, ret);
+}
+
+int sd_genl_message_get_family_name(sd_netlink *nl, sd_netlink_message *m, const char **ret) {
+ const GenericNetlinkFamily *family;
+ uint16_t nlmsg_type;
+ int r;
+
+ assert_return(nl, -EINVAL);
+ assert_return(nl->protocol == NETLINK_GENERIC, -EINVAL);
+ assert_return(m, -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ r = sd_netlink_message_get_type(m, &nlmsg_type);
+ if (r < 0)
+ return r;
+
+ r = genl_family_get_by_id(nl, nlmsg_type, &family);
+ if (r < 0)
+ return r;
+
+ *ret = family->name;
+ return 0;
+}
+
+int sd_genl_message_get_command(sd_netlink *nl, sd_netlink_message *m, uint8_t *ret) {
+ struct genlmsghdr *h;
+ uint16_t nlmsg_type;
+ size_t size;
+ int r;
+
+ assert_return(nl, -EINVAL);
+ assert_return(nl->protocol == NETLINK_GENERIC, -EINVAL);
+ assert_return(m, -EINVAL);
+ assert_return(m->protocol == NETLINK_GENERIC, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ r = sd_netlink_message_get_type(m, &nlmsg_type);
+ if (r < 0)
+ return r;
+
+ r = genl_get_policy_set_and_header_size(nl, nlmsg_type, NULL, &size);
+ if (r < 0)
+ return r;
+
+ if (m->hdr->nlmsg_len < NLMSG_LENGTH(size))
+ return -EBADMSG;
+
+ h = NLMSG_DATA(m->hdr);
+
+ *ret = h->cmd;
+ return 0;
+}
+
+static int genl_family_get_multicast_group_id_by_name(const GenericNetlinkFamily *f, const char *name, uint32_t *ret) {
+ void *p;
+
+ assert(f);
+ assert(name);
+
+ p = hashmap_get(f->multicast_group_by_name, name);
+ if (!p)
+ return -ENOENT;
+
+ if (ret)
+ *ret = PTR_TO_UINT32(p);
+ return 0;
+}
+
+int sd_genl_add_match(
+ sd_netlink *nl,
+ sd_netlink_slot **ret_slot,
+ const char *family_name,
+ const char *multicast_group_name,
+ uint8_t command,
+ sd_netlink_message_handler_t callback,
+ sd_netlink_destroy_t destroy_callback,
+ void *userdata,
+ const char *description) {
+
+ const GenericNetlinkFamily *f;
+ uint32_t multicast_group_id;
+ int r;
+
+ assert_return(nl, -EINVAL);
+ assert_return(nl->protocol == NETLINK_GENERIC, -EINVAL);
+ assert_return(callback, -EINVAL);
+ assert_return(family_name, -EINVAL);
+ assert_return(multicast_group_name, -EINVAL);
+
+ /* If command == 0, then all commands belonging to the multicast group trigger the callback. */
+
+ r = genl_family_get_by_name(nl, family_name, &f);
+ if (r < 0)
+ return r;
+
+ r = genl_family_get_multicast_group_id_by_name(f, multicast_group_name, &multicast_group_id);
+ if (r < 0)
+ return r;
+
+ return netlink_add_match_internal(nl, ret_slot, &multicast_group_id, 1, f->id, command,
+ callback, destroy_callback, userdata, description);
+}
+
+int sd_genl_socket_open(sd_netlink **ret) {
+ return netlink_open_family(ret, NETLINK_GENERIC);
+}
diff --git a/src/libsystemd/sd-netlink/netlink-genl.h b/src/libsystemd/sd-netlink/netlink-genl.h
new file mode 100644
index 0000000..b06be05
--- /dev/null
+++ b/src/libsystemd/sd-netlink/netlink-genl.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "sd-netlink.h"
+
+#define CTRL_GENL_NAME "nlctrl"
+
+void genl_clear_family(sd_netlink *nl);
diff --git a/src/libsystemd/sd-netlink/netlink-internal.h b/src/libsystemd/sd-netlink/netlink-internal.h
new file mode 100644
index 0000000..891d3e8
--- /dev/null
+++ b/src/libsystemd/sd-netlink/netlink-internal.h
@@ -0,0 +1,212 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <linux/netlink.h>
+
+#include "sd-netlink.h"
+
+#include "list.h"
+#include "netlink-types.h"
+#include "ordered-set.h"
+#include "prioq.h"
+#include "time-util.h"
+
+#define NETLINK_DEFAULT_TIMEOUT_USEC ((usec_t) (25 * USEC_PER_SEC))
+
+#define NETLINK_RQUEUE_MAX 64*1024
+
+#define NETLINK_CONTAINER_DEPTH 32
+
+struct reply_callback {
+ sd_netlink_message_handler_t callback;
+ usec_t timeout;
+ uint32_t serial;
+ unsigned prioq_idx;
+};
+
+struct match_callback {
+ sd_netlink_message_handler_t callback;
+ uint32_t *groups;
+ size_t n_groups;
+ uint16_t type;
+ uint8_t cmd; /* used by genl */
+
+ LIST_FIELDS(struct match_callback, match_callbacks);
+};
+
+typedef enum NetlinkSlotType {
+ NETLINK_REPLY_CALLBACK,
+ NETLINK_MATCH_CALLBACK,
+ _NETLINK_SLOT_INVALID = -EINVAL,
+} NetlinkSlotType;
+
+struct sd_netlink_slot {
+ unsigned n_ref;
+ NetlinkSlotType type:8;
+ bool floating;
+ sd_netlink *netlink;
+ void *userdata;
+ sd_netlink_destroy_t destroy_callback;
+
+ char *description;
+
+ LIST_FIELDS(sd_netlink_slot, slots);
+
+ union {
+ struct reply_callback reply_callback;
+ struct match_callback match_callback;
+ };
+};
+
+struct sd_netlink {
+ unsigned n_ref;
+
+ int fd;
+
+ union {
+ struct sockaddr sa;
+ struct sockaddr_nl nl;
+ } sockaddr;
+
+ int protocol;
+
+ Hashmap *broadcast_group_refs;
+ bool broadcast_group_dont_leave:1; /* until we can rely on 4.2 */
+
+ OrderedSet *rqueue;
+ Hashmap *rqueue_by_serial;
+ Hashmap *rqueue_partial_by_serial;
+
+ struct nlmsghdr *rbuffer;
+
+ bool processing:1;
+
+ uint32_t serial;
+
+ struct Prioq *reply_callbacks_prioq;
+ Hashmap *reply_callbacks;
+
+ LIST_HEAD(struct match_callback, match_callbacks);
+
+ LIST_HEAD(sd_netlink_slot, slots);
+
+ pid_t original_pid;
+
+ sd_event_source *io_event_source;
+ sd_event_source *time_event_source;
+ sd_event_source *exit_event_source;
+ sd_event *event;
+
+ Hashmap *genl_family_by_name;
+ Hashmap *genl_family_by_id;
+};
+
+struct netlink_attribute {
+ size_t offset; /* offset from hdr to attribute */
+ bool nested:1;
+ bool net_byteorder:1;
+};
+
+struct netlink_container {
+ const struct NLAPolicySet *policy_set; /* the policy set of the container */
+ size_t offset; /* offset from hdr to the start of the container */
+ struct netlink_attribute *attributes;
+ uint16_t max_attribute; /* the maximum attribute in container */
+};
+
+struct sd_netlink_message {
+ unsigned n_ref;
+
+ int protocol;
+
+ struct nlmsghdr *hdr;
+ struct netlink_container containers[NETLINK_CONTAINER_DEPTH];
+ unsigned n_containers; /* number of containers */
+ uint32_t multicast_group;
+ bool sealed:1;
+
+ sd_netlink_message *next; /* next in a chain of multi-part messages */
+};
+
+int message_new_empty(sd_netlink *nl, sd_netlink_message **ret);
+int message_new_full(
+ sd_netlink *nl,
+ uint16_t nlmsg_type,
+ const NLAPolicySet *policy_set,
+ size_t header_size,
+ sd_netlink_message **ret);
+int message_new(sd_netlink *nl, sd_netlink_message **ret, uint16_t type);
+int message_new_synthetic_error(sd_netlink *nl, int error, uint32_t serial, sd_netlink_message **ret);
+
+static inline uint32_t message_get_serial(sd_netlink_message *m) {
+ assert(m);
+ return ASSERT_PTR(m->hdr)->nlmsg_seq;
+}
+
+void message_seal(sd_netlink_message *m);
+
+int netlink_open_family(sd_netlink **ret, int family);
+bool netlink_pid_changed(sd_netlink *nl);
+
+int socket_bind(sd_netlink *nl);
+int socket_broadcast_group_ref(sd_netlink *nl, unsigned group);
+int socket_broadcast_group_unref(sd_netlink *nl, unsigned group);
+int socket_write_message(sd_netlink *nl, sd_netlink_message *m);
+int socket_read_message(sd_netlink *nl);
+
+int netlink_add_match_internal(
+ sd_netlink *nl,
+ sd_netlink_slot **ret_slot,
+ const uint32_t *groups,
+ size_t n_groups,
+ uint16_t type,
+ uint8_t cmd,
+ sd_netlink_message_handler_t callback,
+ sd_netlink_destroy_t destroy_callback,
+ void *userdata,
+ const char *description);
+
+/* Make sure callbacks don't destroy the netlink connection */
+#define NETLINK_DONT_DESTROY(nl) \
+ _cleanup_(sd_netlink_unrefp) _unused_ sd_netlink *_dont_destroy_##nl = sd_netlink_ref(nl)
+
+bool nfproto_is_valid(int nfproto);
+
+/* nfnl */
+/* TODO: to be exported later */
+int sd_nfnl_socket_open(sd_netlink **ret);
+int sd_nfnl_send_batch(
+ sd_netlink *nfnl,
+ sd_netlink_message **messages,
+ size_t msgcount,
+ uint32_t **ret_serials);
+int sd_nfnl_call_batch(
+ sd_netlink *nfnl,
+ sd_netlink_message **messages,
+ size_t n_messages,
+ uint64_t usec,
+ sd_netlink_message ***ret_messages);
+int sd_nfnl_message_new(
+ sd_netlink *nfnl,
+ sd_netlink_message **ret,
+ int nfproto,
+ uint16_t subsys,
+ uint16_t msg_type,
+ uint16_t flags);
+int sd_nfnl_nft_message_new_table(sd_netlink *nfnl, sd_netlink_message **ret,
+ int nfproto, const char *table);
+int sd_nfnl_nft_message_new_basechain(sd_netlink *nfnl, sd_netlink_message **ret,
+ int nfproto, const char *table, const char *chain,
+ const char *type, uint8_t hook, int prio);
+int sd_nfnl_nft_message_new_rule(sd_netlink *nfnl, sd_netlink_message **ret,
+ int nfproto, const char *table, const char *chain);
+int sd_nfnl_nft_message_new_set(sd_netlink *nfnl, sd_netlink_message **ret,
+ int nfproto, const char *table, const char *set_name,
+ uint32_t setid, uint32_t klen);
+int sd_nfnl_nft_message_new_setelems(sd_netlink *nfnl, sd_netlink_message **ret,
+ int add, int nfproto, const char *table, const char *set_name);
+int sd_nfnl_nft_message_append_setelem(sd_netlink_message *m,
+ uint32_t index,
+ const void *key, size_t key_len,
+ const void *data, size_t data_len,
+ uint32_t flags);
diff --git a/src/libsystemd/sd-netlink/netlink-message-nfnl.c b/src/libsystemd/sd-netlink/netlink-message-nfnl.c
new file mode 100644
index 0000000..fd3055d
--- /dev/null
+++ b/src/libsystemd/sd-netlink/netlink-message-nfnl.c
@@ -0,0 +1,417 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <netinet/in.h>
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nf_tables.h>
+#include <linux/netfilter.h>
+
+#include "sd-netlink.h"
+
+#include "iovec-util.h"
+#include "netlink-internal.h"
+#include "netlink-types.h"
+#include "netlink-util.h"
+
+bool nfproto_is_valid(int nfproto) {
+ return IN_SET(nfproto,
+ NFPROTO_UNSPEC,
+ NFPROTO_INET,
+ NFPROTO_IPV4,
+ NFPROTO_ARP,
+ NFPROTO_NETDEV,
+ NFPROTO_BRIDGE,
+ NFPROTO_IPV6);
+}
+
+int sd_nfnl_message_new(sd_netlink *nfnl, sd_netlink_message **ret, int nfproto, uint16_t subsys, uint16_t msg_type, uint16_t flags) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL;
+ int r;
+
+ assert_return(nfnl, -EINVAL);
+ assert_return(ret, -EINVAL);
+ assert_return(nfproto_is_valid(nfproto), -EINVAL);
+ assert_return(NFNL_MSG_TYPE(msg_type) == msg_type, -EINVAL);
+
+ r = message_new(nfnl, &m, subsys << 8 | msg_type);
+ if (r < 0)
+ return r;
+
+ m->hdr->nlmsg_flags |= flags;
+
+ *(struct nfgenmsg*) NLMSG_DATA(m->hdr) = (struct nfgenmsg) {
+ .nfgen_family = nfproto,
+ .version = NFNETLINK_V0,
+ };
+
+ *ret = TAKE_PTR(m);
+ return 0;
+}
+
+static int nfnl_message_set_res_id(sd_netlink_message *m, uint16_t res_id) {
+ struct nfgenmsg *nfgen;
+
+ assert(m);
+ assert(m->hdr);
+
+ nfgen = NLMSG_DATA(m->hdr);
+ nfgen->res_id = htobe16(res_id);
+
+ return 0;
+}
+
+static int nfnl_message_get_subsys(sd_netlink_message *m, uint16_t *ret) {
+ uint16_t t;
+ int r;
+
+ assert(m);
+ assert(ret);
+
+ r = sd_netlink_message_get_type(m, &t);
+ if (r < 0)
+ return r;
+
+ *ret = NFNL_SUBSYS_ID(t);
+ return 0;
+}
+
+static int nfnl_message_new_batch(sd_netlink *nfnl, sd_netlink_message **ret, uint16_t subsys, uint16_t msg_type) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL;
+ int r;
+
+ assert_return(nfnl, -EINVAL);
+ assert_return(ret, -EINVAL);
+ assert_return(NFNL_MSG_TYPE(msg_type) == msg_type, -EINVAL);
+
+ r = sd_nfnl_message_new(nfnl, &m, NFPROTO_UNSPEC, NFNL_SUBSYS_NONE, msg_type, 0);
+ if (r < 0)
+ return r;
+
+ r = nfnl_message_set_res_id(m, subsys);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(m);
+ return 0;
+}
+
+int sd_nfnl_send_batch(
+ sd_netlink *nfnl,
+ sd_netlink_message **messages,
+ size_t n_messages,
+ uint32_t **ret_serials) {
+
+ /* iovs refs batch_begin and batch_end, hence, free iovs first, then free batch_begin and batch_end. */
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *batch_begin = NULL, *batch_end = NULL;
+ _cleanup_free_ struct iovec *iovs = NULL;
+ _cleanup_free_ uint32_t *serials = NULL;
+ uint16_t subsys;
+ ssize_t k;
+ size_t c = 0;
+ int r;
+
+ assert_return(nfnl, -EINVAL);
+ assert_return(!netlink_pid_changed(nfnl), -ECHILD);
+ assert_return(messages, -EINVAL);
+ assert_return(n_messages > 0, -EINVAL);
+
+ iovs = new(struct iovec, n_messages + 2);
+ if (!iovs)
+ return -ENOMEM;
+
+ if (ret_serials) {
+ serials = new(uint32_t, n_messages);
+ if (!serials)
+ return -ENOMEM;
+ }
+
+ r = nfnl_message_get_subsys(messages[0], &subsys);
+ if (r < 0)
+ return r;
+
+ r = nfnl_message_new_batch(nfnl, &batch_begin, subsys, NFNL_MSG_BATCH_BEGIN);
+ if (r < 0)
+ return r;
+
+ netlink_seal_message(nfnl, batch_begin);
+ iovs[c++] = IOVEC_MAKE(batch_begin->hdr, batch_begin->hdr->nlmsg_len);
+
+ for (size_t i = 0; i < n_messages; i++) {
+ uint16_t s;
+
+ r = nfnl_message_get_subsys(messages[i], &s);
+ if (r < 0)
+ return r;
+
+ if (s != subsys)
+ return -EINVAL;
+
+ netlink_seal_message(nfnl, messages[i]);
+ if (serials)
+ serials[i] = message_get_serial(messages[i]);
+
+ /* It seems that the kernel accepts an arbitrary number. Let's set the lower 16 bits of the
+ * serial of the first message. */
+ nfnl_message_set_res_id(messages[i], (uint16_t) (message_get_serial(batch_begin) & UINT16_MAX));
+
+ iovs[c++] = IOVEC_MAKE(messages[i]->hdr, messages[i]->hdr->nlmsg_len);
+ }
+
+ r = nfnl_message_new_batch(nfnl, &batch_end, subsys, NFNL_MSG_BATCH_END);
+ if (r < 0)
+ return r;
+
+ netlink_seal_message(nfnl, batch_end);
+ iovs[c++] = IOVEC_MAKE(batch_end->hdr, batch_end->hdr->nlmsg_len);
+
+ assert(c == n_messages + 2);
+ k = writev(nfnl->fd, iovs, n_messages + 2);
+ if (k < 0)
+ return -errno;
+
+ if (ret_serials)
+ *ret_serials = TAKE_PTR(serials);
+
+ return 0;
+}
+
+int sd_nfnl_call_batch(
+ sd_netlink *nfnl,
+ sd_netlink_message **messages,
+ size_t n_messages,
+ uint64_t usec,
+ sd_netlink_message ***ret_messages) {
+
+ _cleanup_free_ sd_netlink_message **replies = NULL;
+ _cleanup_free_ uint32_t *serials = NULL;
+ int r;
+
+ assert_return(nfnl, -EINVAL);
+ assert_return(!netlink_pid_changed(nfnl), -ECHILD);
+ assert_return(messages, -EINVAL);
+ assert_return(n_messages > 0, -EINVAL);
+
+ if (ret_messages) {
+ replies = new0(sd_netlink_message*, n_messages);
+ if (!replies)
+ return -ENOMEM;
+ }
+
+ r = sd_nfnl_send_batch(nfnl, messages, n_messages, &serials);
+ if (r < 0)
+ return r;
+
+ for (size_t i = 0; i < n_messages; i++)
+ RET_GATHER(r,
+ sd_netlink_read(nfnl, serials[i], usec, ret_messages ? replies + i : NULL));
+ if (r < 0)
+ return r;
+
+ if (ret_messages)
+ *ret_messages = TAKE_PTR(replies);
+
+ return 0;
+}
+
+int sd_nfnl_nft_message_new_basechain(
+ sd_netlink *nfnl,
+ sd_netlink_message **ret,
+ int nfproto,
+ const char *table,
+ const char *chain,
+ const char *type,
+ uint8_t hook,
+ int prio) {
+
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL;
+ int r;
+
+ r = sd_nfnl_message_new(nfnl, &m, nfproto, NFNL_SUBSYS_NFTABLES, NFT_MSG_NEWCHAIN, NLM_F_CREATE);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_message_append_string(m, NFTA_CHAIN_TABLE, table);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_message_append_string(m, NFTA_CHAIN_NAME, chain);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_message_append_string(m, NFTA_CHAIN_TYPE, type);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_message_open_container(m, NFTA_CHAIN_HOOK);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_message_append_u32(m, NFTA_HOOK_HOOKNUM, htobe32(hook));
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_message_append_u32(m, NFTA_HOOK_PRIORITY, htobe32(prio));
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_message_close_container(m);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(m);
+ return 0;
+}
+
+int sd_nfnl_nft_message_new_table(
+ sd_netlink *nfnl,
+ sd_netlink_message **ret,
+ int nfproto,
+ const char *table) {
+
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL;
+ int r;
+
+ r = sd_nfnl_message_new(nfnl, &m, nfproto, NFNL_SUBSYS_NFTABLES, NFT_MSG_NEWTABLE, NLM_F_CREATE | NLM_F_EXCL);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_message_append_string(m, NFTA_TABLE_NAME, table);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(m);
+ return r;
+}
+
+int sd_nfnl_nft_message_new_rule(
+ sd_netlink *nfnl,
+ sd_netlink_message **ret,
+ int nfproto,
+ const char *table,
+ const char *chain) {
+
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL;
+ int r;
+
+ r = sd_nfnl_message_new(nfnl, &m, nfproto, NFNL_SUBSYS_NFTABLES, NFT_MSG_NEWRULE, NLM_F_CREATE);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_message_append_string(m, NFTA_RULE_TABLE, table);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_message_append_string(m, NFTA_RULE_CHAIN, chain);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(m);
+ return r;
+}
+
+int sd_nfnl_nft_message_new_set(
+ sd_netlink *nfnl,
+ sd_netlink_message **ret,
+ int nfproto,
+ const char *table,
+ const char *set_name,
+ uint32_t set_id,
+ uint32_t klen) {
+
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL;
+ int r;
+
+ r = sd_nfnl_message_new(nfnl, &m, nfproto, NFNL_SUBSYS_NFTABLES, NFT_MSG_NEWSET, NLM_F_CREATE);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_message_append_string(m, NFTA_SET_TABLE, table);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_message_append_string(m, NFTA_SET_NAME, set_name);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_message_append_u32(m, NFTA_SET_ID, ++set_id);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_message_append_u32(m, NFTA_SET_KEY_LEN, htobe32(klen));
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(m);
+ return r;
+}
+
+int sd_nfnl_nft_message_new_setelems(
+ sd_netlink *nfnl,
+ sd_netlink_message **ret,
+ int add, /* boolean */
+ int nfproto,
+ const char *table,
+ const char *set_name) {
+
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL;
+ int r;
+
+ if (add)
+ r = sd_nfnl_message_new(nfnl, &m, nfproto, NFNL_SUBSYS_NFTABLES, NFT_MSG_NEWSETELEM, NLM_F_CREATE);
+ else
+ r = sd_nfnl_message_new(nfnl, &m, nfproto, NFNL_SUBSYS_NFTABLES, NFT_MSG_DELSETELEM, 0);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_message_append_string(m, NFTA_SET_ELEM_LIST_TABLE, table);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_message_append_string(m, NFTA_SET_ELEM_LIST_SET, set_name);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(m);
+ return r;
+}
+
+int sd_nfnl_nft_message_append_setelem(
+ sd_netlink_message *m,
+ uint32_t index,
+ const void *key,
+ size_t key_len,
+ const void *data,
+ size_t data_len,
+ uint32_t flags) {
+
+ int r;
+
+ r = sd_netlink_message_open_array(m, index);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_message_append_container_data(m, NFTA_SET_ELEM_KEY, NFTA_DATA_VALUE, key, key_len);
+ if (r < 0)
+ goto cancel;
+
+ if (data) {
+ r = sd_netlink_message_append_container_data(m, NFTA_SET_ELEM_DATA, NFTA_DATA_VALUE, data, data_len);
+ if (r < 0)
+ goto cancel;
+ }
+
+ if (flags != 0) {
+ r = sd_netlink_message_append_u32(m, NFTA_SET_ELEM_FLAGS, htobe32(flags));
+ if (r < 0)
+ goto cancel;
+ }
+
+ return sd_netlink_message_close_container(m); /* array */
+
+cancel:
+ (void) sd_netlink_message_cancel_array(m);
+ return r;
+}
+
+int sd_nfnl_socket_open(sd_netlink **ret) {
+ return netlink_open_family(ret, NETLINK_NETFILTER);
+}
diff --git a/src/libsystemd/sd-netlink/netlink-message-rtnl.c b/src/libsystemd/sd-netlink/netlink-message-rtnl.c
new file mode 100644
index 0000000..008e802
--- /dev/null
+++ b/src/libsystemd/sd-netlink/netlink-message-rtnl.c
@@ -0,0 +1,1204 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <netinet/in.h>
+#include <linux/fib_rules.h>
+#include <linux/if_addrlabel.h>
+#include <linux/if_bridge.h>
+#include <linux/nexthop.h>
+#include <stdbool.h>
+#include <unistd.h>
+
+#include "sd-netlink.h"
+
+#include "format-util.h"
+#include "netlink-internal.h"
+#include "netlink-types.h"
+#include "netlink-util.h"
+#include "socket-util.h"
+
+static bool rtnl_message_type_is_neigh(uint16_t type) {
+ return IN_SET(type, RTM_NEWNEIGH, RTM_GETNEIGH, RTM_DELNEIGH);
+}
+
+static bool rtnl_message_type_is_route(uint16_t type) {
+ return IN_SET(type, RTM_NEWROUTE, RTM_GETROUTE, RTM_DELROUTE);
+}
+
+static bool rtnl_message_type_is_nexthop(uint16_t type) {
+ return IN_SET(type, RTM_NEWNEXTHOP, RTM_GETNEXTHOP, RTM_DELNEXTHOP);
+}
+
+static bool rtnl_message_type_is_link(uint16_t type) {
+ return IN_SET(type,
+ RTM_NEWLINK, RTM_SETLINK, RTM_GETLINK, RTM_DELLINK,
+ RTM_NEWLINKPROP, RTM_DELLINKPROP, RTM_GETLINKPROP);
+}
+
+static bool rtnl_message_type_is_addr(uint16_t type) {
+ return IN_SET(type, RTM_NEWADDR, RTM_GETADDR, RTM_DELADDR);
+}
+
+static bool rtnl_message_type_is_addrlabel(uint16_t type) {
+ return IN_SET(type, RTM_NEWADDRLABEL, RTM_DELADDRLABEL, RTM_GETADDRLABEL);
+}
+
+static bool rtnl_message_type_is_routing_policy_rule(uint16_t type) {
+ return IN_SET(type, RTM_NEWRULE, RTM_DELRULE, RTM_GETRULE);
+}
+
+static bool rtnl_message_type_is_traffic_control(uint16_t type) {
+ return IN_SET(type,
+ RTM_NEWQDISC, RTM_DELQDISC, RTM_GETQDISC,
+ RTM_NEWTCLASS, RTM_DELTCLASS, RTM_GETTCLASS);
+}
+
+static bool rtnl_message_type_is_mdb(uint16_t type) {
+ return IN_SET(type, RTM_NEWMDB, RTM_DELMDB, RTM_GETMDB);
+}
+
+int sd_rtnl_message_route_set_dst_prefixlen(sd_netlink_message *m, unsigned char prefixlen) {
+ struct rtmsg *rtm;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_route(m->hdr->nlmsg_type), -EINVAL);
+
+ rtm = NLMSG_DATA(m->hdr);
+
+ if ((rtm->rtm_family == AF_INET && prefixlen > 32) ||
+ (rtm->rtm_family == AF_INET6 && prefixlen > 128))
+ return -ERANGE;
+
+ rtm->rtm_dst_len = prefixlen;
+
+ return 0;
+}
+
+int sd_rtnl_message_route_set_src_prefixlen(sd_netlink_message *m, unsigned char prefixlen) {
+ struct rtmsg *rtm;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_route(m->hdr->nlmsg_type), -EINVAL);
+
+ rtm = NLMSG_DATA(m->hdr);
+
+ if ((rtm->rtm_family == AF_INET && prefixlen > 32) ||
+ (rtm->rtm_family == AF_INET6 && prefixlen > 128))
+ return -ERANGE;
+
+ rtm->rtm_src_len = prefixlen;
+
+ return 0;
+}
+
+int sd_rtnl_message_route_set_scope(sd_netlink_message *m, unsigned char scope) {
+ struct rtmsg *rtm;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_route(m->hdr->nlmsg_type), -EINVAL);
+
+ rtm = NLMSG_DATA(m->hdr);
+
+ rtm->rtm_scope = scope;
+
+ return 0;
+}
+
+int sd_rtnl_message_route_set_flags(sd_netlink_message *m, unsigned flags) {
+ struct rtmsg *rtm;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_route(m->hdr->nlmsg_type), -EINVAL);
+
+ rtm = NLMSG_DATA(m->hdr);
+
+ rtm->rtm_flags = flags;
+
+ return 0;
+}
+
+int sd_rtnl_message_route_get_flags(sd_netlink_message *m, unsigned *flags) {
+ struct rtmsg *rtm;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_route(m->hdr->nlmsg_type), -EINVAL);
+ assert_return(flags, -EINVAL);
+
+ rtm = NLMSG_DATA(m->hdr);
+
+ *flags = rtm->rtm_flags;
+
+ return 0;
+}
+
+int sd_rtnl_message_route_set_table(sd_netlink_message *m, unsigned char table) {
+ struct rtmsg *rtm;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_route(m->hdr->nlmsg_type), -EINVAL);
+
+ rtm = NLMSG_DATA(m->hdr);
+
+ rtm->rtm_table = table;
+
+ return 0;
+}
+
+int sd_rtnl_message_route_get_family(sd_netlink_message *m, int *family) {
+ struct rtmsg *rtm;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_route(m->hdr->nlmsg_type), -EINVAL);
+ assert_return(family, -EINVAL);
+
+ rtm = NLMSG_DATA(m->hdr);
+
+ *family = rtm->rtm_family;
+
+ return 0;
+}
+
+int sd_rtnl_message_route_get_type(sd_netlink_message *m, unsigned char *type) {
+ struct rtmsg *rtm;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_route(m->hdr->nlmsg_type), -EINVAL);
+ assert_return(type, -EINVAL);
+
+ rtm = NLMSG_DATA(m->hdr);
+
+ *type = rtm->rtm_type;
+
+ return 0;
+}
+
+int sd_rtnl_message_route_set_type(sd_netlink_message *m, unsigned char type) {
+ struct rtmsg *rtm;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_route(m->hdr->nlmsg_type), -EINVAL);
+
+ rtm = NLMSG_DATA(m->hdr);
+
+ rtm->rtm_type = type;
+
+ return 0;
+}
+
+int sd_rtnl_message_route_get_protocol(sd_netlink_message *m, unsigned char *protocol) {
+ struct rtmsg *rtm;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_route(m->hdr->nlmsg_type), -EINVAL);
+ assert_return(protocol, -EINVAL);
+
+ rtm = NLMSG_DATA(m->hdr);
+
+ *protocol = rtm->rtm_protocol;
+
+ return 0;
+}
+
+int sd_rtnl_message_route_get_scope(sd_netlink_message *m, unsigned char *scope) {
+ struct rtmsg *rtm;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_route(m->hdr->nlmsg_type), -EINVAL);
+ assert_return(scope, -EINVAL);
+
+ rtm = NLMSG_DATA(m->hdr);
+
+ *scope = rtm->rtm_scope;
+
+ return 0;
+}
+
+int sd_rtnl_message_route_get_tos(sd_netlink_message *m, uint8_t *tos) {
+ struct rtmsg *rtm;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_route(m->hdr->nlmsg_type), -EINVAL);
+ assert_return(tos, -EINVAL);
+
+ rtm = NLMSG_DATA(m->hdr);
+
+ *tos = rtm->rtm_tos;
+
+ return 0;
+}
+
+int sd_rtnl_message_route_get_table(sd_netlink_message *m, unsigned char *table) {
+ struct rtmsg *rtm;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_route(m->hdr->nlmsg_type), -EINVAL);
+ assert_return(table, -EINVAL);
+
+ rtm = NLMSG_DATA(m->hdr);
+
+ *table = rtm->rtm_table;
+
+ return 0;
+}
+
+int sd_rtnl_message_route_get_dst_prefixlen(sd_netlink_message *m, unsigned char *dst_len) {
+ struct rtmsg *rtm;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_route(m->hdr->nlmsg_type), -EINVAL);
+ assert_return(dst_len, -EINVAL);
+
+ rtm = NLMSG_DATA(m->hdr);
+
+ *dst_len = rtm->rtm_dst_len;
+
+ return 0;
+}
+
+int sd_rtnl_message_route_get_src_prefixlen(sd_netlink_message *m, unsigned char *src_len) {
+ struct rtmsg *rtm;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_route(m->hdr->nlmsg_type), -EINVAL);
+ assert_return(src_len, -EINVAL);
+
+ rtm = NLMSG_DATA(m->hdr);
+
+ *src_len = rtm->rtm_src_len;
+
+ return 0;
+}
+
+int sd_rtnl_message_new_route(sd_netlink *rtnl, sd_netlink_message **ret,
+ uint16_t nlmsg_type, int rtm_family,
+ unsigned char rtm_protocol) {
+ struct rtmsg *rtm;
+ int r;
+
+ assert_return(rtnl_message_type_is_route(nlmsg_type), -EINVAL);
+ assert_return((nlmsg_type == RTM_GETROUTE && rtm_family == AF_UNSPEC) ||
+ IN_SET(rtm_family, AF_INET, AF_INET6), -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ r = message_new(rtnl, ret, nlmsg_type);
+ if (r < 0)
+ return r;
+
+ if (nlmsg_type == RTM_NEWROUTE)
+ (*ret)->hdr->nlmsg_flags |= NLM_F_CREATE | NLM_F_APPEND;
+
+ rtm = NLMSG_DATA((*ret)->hdr);
+
+ rtm->rtm_family = rtm_family;
+ rtm->rtm_protocol = rtm_protocol;
+
+ return 0;
+}
+
+int sd_rtnl_message_new_nexthop(sd_netlink *rtnl, sd_netlink_message **ret,
+ uint16_t nlmsg_type, int nh_family,
+ unsigned char nh_protocol) {
+ struct nhmsg *nhm;
+ int r;
+
+ assert_return(rtnl_message_type_is_nexthop(nlmsg_type), -EINVAL);
+ switch (nlmsg_type) {
+ case RTM_DELNEXTHOP:
+ assert_return(nh_family == AF_UNSPEC, -EINVAL);
+ _fallthrough_;
+ case RTM_GETNEXTHOP:
+ assert_return(nh_protocol == RTPROT_UNSPEC, -EINVAL);
+ break;
+ case RTM_NEWNEXTHOP:
+ assert_return(IN_SET(nh_family, AF_UNSPEC, AF_INET, AF_INET6), -EINVAL);
+ break;
+ default:
+ assert_not_reached();
+ }
+ assert_return(ret, -EINVAL);
+
+ r = message_new(rtnl, ret, nlmsg_type);
+ if (r < 0)
+ return r;
+
+ if (nlmsg_type == RTM_NEWNEXTHOP)
+ (*ret)->hdr->nlmsg_flags |= NLM_F_CREATE | NLM_F_APPEND;
+
+ nhm = NLMSG_DATA((*ret)->hdr);
+
+ nhm->nh_family = nh_family;
+ nhm->nh_scope = RT_SCOPE_UNIVERSE;
+ nhm->nh_protocol = nh_protocol;
+
+ return 0;
+}
+
+int sd_rtnl_message_nexthop_set_flags(sd_netlink_message *m, uint8_t flags) {
+ struct nhmsg *nhm;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(m->hdr->nlmsg_type == RTM_NEWNEXTHOP, -EINVAL);
+
+ nhm = NLMSG_DATA(m->hdr);
+ nhm->nh_flags = flags;
+
+ return 0;
+}
+
+int sd_rtnl_message_nexthop_get_flags(sd_netlink_message *m, uint8_t *ret) {
+ struct nhmsg *nhm;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_nexthop(m->hdr->nlmsg_type), -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ nhm = NLMSG_DATA(m->hdr);
+ *ret = nhm->nh_flags;
+
+ return 0;
+}
+
+int sd_rtnl_message_nexthop_get_family(sd_netlink_message *m, uint8_t *family) {
+ struct nhmsg *nhm;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_nexthop(m->hdr->nlmsg_type), -EINVAL);
+ assert_return(family, -EINVAL);
+
+ nhm = NLMSG_DATA(m->hdr);
+ *family = nhm->nh_family;
+
+ return 0;
+}
+
+int sd_rtnl_message_nexthop_get_protocol(sd_netlink_message *m, uint8_t *protocol) {
+ struct nhmsg *nhm;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_nexthop(m->hdr->nlmsg_type), -EINVAL);
+ assert_return(protocol, -EINVAL);
+
+ nhm = NLMSG_DATA(m->hdr);
+ *protocol = nhm->nh_protocol;
+
+ return 0;
+}
+
+int sd_rtnl_message_neigh_set_flags(sd_netlink_message *m, uint8_t flags) {
+ struct ndmsg *ndm;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_neigh(m->hdr->nlmsg_type), -EINVAL);
+
+ ndm = NLMSG_DATA(m->hdr);
+ ndm->ndm_flags = flags;
+
+ return 0;
+}
+
+int sd_rtnl_message_neigh_set_state(sd_netlink_message *m, uint16_t state) {
+ struct ndmsg *ndm;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_neigh(m->hdr->nlmsg_type), -EINVAL);
+
+ ndm = NLMSG_DATA(m->hdr);
+ ndm->ndm_state = state;
+
+ return 0;
+}
+
+int sd_rtnl_message_neigh_get_flags(sd_netlink_message *m, uint8_t *flags) {
+ struct ndmsg *ndm;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_neigh(m->hdr->nlmsg_type), -EINVAL);
+
+ ndm = NLMSG_DATA(m->hdr);
+ *flags = ndm->ndm_flags;
+
+ return 0;
+}
+
+int sd_rtnl_message_neigh_get_state(sd_netlink_message *m, uint16_t *state) {
+ struct ndmsg *ndm;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_neigh(m->hdr->nlmsg_type), -EINVAL);
+
+ ndm = NLMSG_DATA(m->hdr);
+ *state = ndm->ndm_state;
+
+ return 0;
+}
+
+int sd_rtnl_message_neigh_get_family(sd_netlink_message *m, int *family) {
+ struct ndmsg *ndm;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_neigh(m->hdr->nlmsg_type), -EINVAL);
+ assert_return(family, -EINVAL);
+
+ ndm = NLMSG_DATA(m->hdr);
+
+ *family = ndm->ndm_family;
+
+ return 0;
+}
+
+int sd_rtnl_message_neigh_get_ifindex(sd_netlink_message *m, int *index) {
+ struct ndmsg *ndm;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_neigh(m->hdr->nlmsg_type), -EINVAL);
+ assert_return(index, -EINVAL);
+
+ ndm = NLMSG_DATA(m->hdr);
+
+ *index = ndm->ndm_ifindex;
+
+ return 0;
+}
+
+int sd_rtnl_message_new_neigh(
+ sd_netlink *rtnl,
+ sd_netlink_message **ret,
+ uint16_t nlmsg_type,
+ int index,
+ int ndm_family) {
+
+ struct ndmsg *ndm;
+ int r;
+
+ assert_return(rtnl_message_type_is_neigh(nlmsg_type), -EINVAL);
+ assert_return(IN_SET(ndm_family, AF_UNSPEC, AF_INET, AF_INET6, AF_BRIDGE), -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ r = message_new(rtnl, ret, nlmsg_type);
+ if (r < 0)
+ return r;
+
+ if (nlmsg_type == RTM_NEWNEIGH) {
+ if (ndm_family == AF_BRIDGE)
+ (*ret)->hdr->nlmsg_flags |= NLM_F_CREATE | NLM_F_APPEND;
+ else
+ (*ret)->hdr->nlmsg_flags |= NLM_F_CREATE | NLM_F_REPLACE;
+ }
+
+ ndm = NLMSG_DATA((*ret)->hdr);
+
+ ndm->ndm_family = ndm_family;
+ ndm->ndm_ifindex = index;
+
+ return 0;
+}
+
+int sd_rtnl_message_link_set_flags(sd_netlink_message *m, unsigned flags, unsigned change) {
+ struct ifinfomsg *ifi;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_link(m->hdr->nlmsg_type), -EINVAL);
+ assert_return(change != 0, -EINVAL);
+
+ ifi = NLMSG_DATA(m->hdr);
+
+ ifi->ifi_flags = flags;
+ ifi->ifi_change = change;
+
+ return 0;
+}
+
+int sd_rtnl_message_link_set_type(sd_netlink_message *m, unsigned type) {
+ struct ifinfomsg *ifi;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_link(m->hdr->nlmsg_type), -EINVAL);
+
+ ifi = NLMSG_DATA(m->hdr);
+
+ ifi->ifi_type = type;
+
+ return 0;
+}
+
+int sd_rtnl_message_link_set_family(sd_netlink_message *m, unsigned family) {
+ struct ifinfomsg *ifi;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_link(m->hdr->nlmsg_type), -EINVAL);
+
+ ifi = NLMSG_DATA(m->hdr);
+
+ ifi->ifi_family = family;
+
+ return 0;
+}
+
+int sd_rtnl_message_new_link(sd_netlink *rtnl, sd_netlink_message **ret,
+ uint16_t nlmsg_type, int index) {
+ struct ifinfomsg *ifi;
+ int r;
+
+ assert_return(rtnl_message_type_is_link(nlmsg_type), -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ r = message_new(rtnl, ret, nlmsg_type);
+ if (r < 0)
+ return r;
+
+ if (nlmsg_type == RTM_NEWLINK)
+ (*ret)->hdr->nlmsg_flags |= NLM_F_CREATE | NLM_F_EXCL;
+ else if (nlmsg_type == RTM_NEWLINKPROP)
+ (*ret)->hdr->nlmsg_flags |= NLM_F_CREATE | NLM_F_EXCL | NLM_F_APPEND;
+
+ ifi = NLMSG_DATA((*ret)->hdr);
+
+ ifi->ifi_family = AF_UNSPEC;
+ ifi->ifi_index = index;
+
+ return 0;
+}
+
+int sd_rtnl_message_addr_set_prefixlen(sd_netlink_message *m, unsigned char prefixlen) {
+ struct ifaddrmsg *ifa;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_addr(m->hdr->nlmsg_type), -EINVAL);
+
+ ifa = NLMSG_DATA(m->hdr);
+
+ if ((ifa->ifa_family == AF_INET && prefixlen > 32) ||
+ (ifa->ifa_family == AF_INET6 && prefixlen > 128))
+ return -ERANGE;
+
+ ifa->ifa_prefixlen = prefixlen;
+
+ return 0;
+}
+
+int sd_rtnl_message_addr_set_flags(sd_netlink_message *m, unsigned char flags) {
+ struct ifaddrmsg *ifa;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_addr(m->hdr->nlmsg_type), -EINVAL);
+
+ ifa = NLMSG_DATA(m->hdr);
+
+ ifa->ifa_flags = flags;
+
+ return 0;
+}
+
+int sd_rtnl_message_addr_set_scope(sd_netlink_message *m, unsigned char scope) {
+ struct ifaddrmsg *ifa;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_addr(m->hdr->nlmsg_type), -EINVAL);
+
+ ifa = NLMSG_DATA(m->hdr);
+
+ ifa->ifa_scope = scope;
+
+ return 0;
+}
+
+int sd_rtnl_message_addr_get_family(sd_netlink_message *m, int *ret_family) {
+ struct ifaddrmsg *ifa;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_addr(m->hdr->nlmsg_type), -EINVAL);
+ assert_return(ret_family, -EINVAL);
+
+ ifa = NLMSG_DATA(m->hdr);
+
+ *ret_family = ifa->ifa_family;
+
+ return 0;
+}
+
+int sd_rtnl_message_addr_get_prefixlen(sd_netlink_message *m, unsigned char *ret_prefixlen) {
+ struct ifaddrmsg *ifa;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_addr(m->hdr->nlmsg_type), -EINVAL);
+ assert_return(ret_prefixlen, -EINVAL);
+
+ ifa = NLMSG_DATA(m->hdr);
+
+ *ret_prefixlen = ifa->ifa_prefixlen;
+
+ return 0;
+}
+
+int sd_rtnl_message_addr_get_scope(sd_netlink_message *m, unsigned char *ret_scope) {
+ struct ifaddrmsg *ifa;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_addr(m->hdr->nlmsg_type), -EINVAL);
+ assert_return(ret_scope, -EINVAL);
+
+ ifa = NLMSG_DATA(m->hdr);
+
+ *ret_scope = ifa->ifa_scope;
+
+ return 0;
+}
+
+int sd_rtnl_message_addr_get_flags(sd_netlink_message *m, unsigned char *ret_flags) {
+ struct ifaddrmsg *ifa;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_addr(m->hdr->nlmsg_type), -EINVAL);
+ assert_return(ret_flags, -EINVAL);
+
+ ifa = NLMSG_DATA(m->hdr);
+
+ *ret_flags = ifa->ifa_flags;
+
+ return 0;
+}
+
+int sd_rtnl_message_addr_get_ifindex(sd_netlink_message *m, int *ret_ifindex) {
+ struct ifaddrmsg *ifa;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_addr(m->hdr->nlmsg_type), -EINVAL);
+ assert_return(ret_ifindex, -EINVAL);
+
+ ifa = NLMSG_DATA(m->hdr);
+
+ *ret_ifindex = ifa->ifa_index;
+
+ return 0;
+}
+
+int sd_rtnl_message_new_addr(
+ sd_netlink *rtnl,
+ sd_netlink_message **ret,
+ uint16_t nlmsg_type,
+ int index,
+ int family) {
+
+ struct ifaddrmsg *ifa;
+ int r;
+
+ assert_return(rtnl_message_type_is_addr(nlmsg_type), -EINVAL);
+ assert_return((nlmsg_type == RTM_GETADDR && index == 0) ||
+ index > 0, -EINVAL);
+ assert_return((nlmsg_type == RTM_GETADDR && family == AF_UNSPEC) ||
+ IN_SET(family, AF_INET, AF_INET6), -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ r = message_new(rtnl, ret, nlmsg_type);
+ if (r < 0)
+ return r;
+
+ ifa = NLMSG_DATA((*ret)->hdr);
+
+ ifa->ifa_index = index;
+ ifa->ifa_family = family;
+
+ return 0;
+}
+
+int sd_rtnl_message_new_addr_update(
+ sd_netlink *rtnl,
+ sd_netlink_message **ret,
+ int index,
+ int family) {
+ int r;
+
+ r = sd_rtnl_message_new_addr(rtnl, ret, RTM_NEWADDR, index, family);
+ if (r < 0)
+ return r;
+
+ (*ret)->hdr->nlmsg_flags |= NLM_F_REPLACE;
+
+ return 0;
+}
+
+int sd_rtnl_message_link_get_ifindex(sd_netlink_message *m, int *ifindex) {
+ struct ifinfomsg *ifi;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_link(m->hdr->nlmsg_type), -EINVAL);
+ assert_return(ifindex, -EINVAL);
+
+ ifi = NLMSG_DATA(m->hdr);
+
+ *ifindex = ifi->ifi_index;
+
+ return 0;
+}
+
+int sd_rtnl_message_link_get_flags(sd_netlink_message *m, unsigned *flags) {
+ struct ifinfomsg *ifi;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_link(m->hdr->nlmsg_type), -EINVAL);
+ assert_return(flags, -EINVAL);
+
+ ifi = NLMSG_DATA(m->hdr);
+
+ *flags = ifi->ifi_flags;
+
+ return 0;
+}
+
+int sd_rtnl_message_link_get_type(sd_netlink_message *m, unsigned short *type) {
+ struct ifinfomsg *ifi;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_link(m->hdr->nlmsg_type), -EINVAL);
+ assert_return(type, -EINVAL);
+
+ ifi = NLMSG_DATA(m->hdr);
+
+ *type = ifi->ifi_type;
+
+ return 0;
+}
+
+int sd_rtnl_message_get_family(sd_netlink_message *m, int *family) {
+ assert_return(m, -EINVAL);
+ assert_return(family, -EINVAL);
+
+ assert(m->hdr);
+
+ if (rtnl_message_type_is_link(m->hdr->nlmsg_type)) {
+ struct ifinfomsg *ifi;
+
+ ifi = NLMSG_DATA(m->hdr);
+
+ *family = ifi->ifi_family;
+
+ return 0;
+ } else if (rtnl_message_type_is_route(m->hdr->nlmsg_type)) {
+ struct rtmsg *rtm;
+
+ rtm = NLMSG_DATA(m->hdr);
+
+ *family = rtm->rtm_family;
+
+ return 0;
+ } else if (rtnl_message_type_is_neigh(m->hdr->nlmsg_type)) {
+ struct ndmsg *ndm;
+
+ ndm = NLMSG_DATA(m->hdr);
+
+ *family = ndm->ndm_family;
+
+ return 0;
+ } else if (rtnl_message_type_is_addr(m->hdr->nlmsg_type)) {
+ struct ifaddrmsg *ifa;
+
+ ifa = NLMSG_DATA(m->hdr);
+
+ *family = ifa->ifa_family;
+
+ return 0;
+ } else if (rtnl_message_type_is_routing_policy_rule(m->hdr->nlmsg_type)) {
+ struct rtmsg *rtm;
+
+ rtm = NLMSG_DATA(m->hdr);
+
+ *family = rtm->rtm_family;
+
+ return 0;
+ } else if (rtnl_message_type_is_nexthop(m->hdr->nlmsg_type)) {
+ struct nhmsg *nhm;
+
+ nhm = NLMSG_DATA(m->hdr);
+
+ *family = nhm->nh_family;
+
+ return 0;
+ }
+
+ return -EOPNOTSUPP;
+}
+
+int sd_rtnl_message_new_addrlabel(
+ sd_netlink *rtnl,
+ sd_netlink_message **ret,
+ uint16_t nlmsg_type,
+ int ifindex,
+ int ifal_family) {
+
+ struct ifaddrlblmsg *addrlabel;
+ int r;
+
+ assert_return(rtnl_message_type_is_addrlabel(nlmsg_type), -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ r = message_new(rtnl, ret, nlmsg_type);
+ if (r < 0)
+ return r;
+
+ if (nlmsg_type == RTM_NEWADDRLABEL)
+ (*ret)->hdr->nlmsg_flags |= NLM_F_CREATE | NLM_F_EXCL;
+
+ addrlabel = NLMSG_DATA((*ret)->hdr);
+
+ addrlabel->ifal_family = ifal_family;
+ addrlabel->ifal_index = ifindex;
+
+ return 0;
+}
+
+int sd_rtnl_message_addrlabel_set_prefixlen(sd_netlink_message *m, unsigned char prefixlen) {
+ struct ifaddrlblmsg *addrlabel;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_addrlabel(m->hdr->nlmsg_type), -EINVAL);
+
+ addrlabel = NLMSG_DATA(m->hdr);
+
+ if (prefixlen > 128)
+ return -ERANGE;
+
+ addrlabel->ifal_prefixlen = prefixlen;
+
+ return 0;
+}
+
+int sd_rtnl_message_addrlabel_get_prefixlen(sd_netlink_message *m, unsigned char *prefixlen) {
+ struct ifaddrlblmsg *addrlabel;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_addrlabel(m->hdr->nlmsg_type), -EINVAL);
+
+ addrlabel = NLMSG_DATA(m->hdr);
+
+ *prefixlen = addrlabel->ifal_prefixlen;
+
+ return 0;
+}
+
+int sd_rtnl_message_new_routing_policy_rule(
+ sd_netlink *rtnl,
+ sd_netlink_message **ret,
+ uint16_t nlmsg_type,
+ int ifal_family) {
+
+ struct fib_rule_hdr *frh;
+ int r;
+
+ assert_return(rtnl_message_type_is_routing_policy_rule(nlmsg_type), -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ r = message_new(rtnl, ret, nlmsg_type);
+ if (r < 0)
+ return r;
+
+ if (nlmsg_type == RTM_NEWRULE)
+ (*ret)->hdr->nlmsg_flags |= NLM_F_CREATE | NLM_F_EXCL;
+
+ frh = NLMSG_DATA((*ret)->hdr);
+ frh->family = ifal_family;
+
+ return 0;
+}
+
+int sd_rtnl_message_routing_policy_rule_set_tos(sd_netlink_message *m, uint8_t tos) {
+ struct fib_rule_hdr *frh;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_routing_policy_rule(m->hdr->nlmsg_type), -EINVAL);
+
+ frh = NLMSG_DATA(m->hdr);
+
+ frh->tos = tos;
+
+ return 0;
+}
+
+int sd_rtnl_message_routing_policy_rule_get_tos(sd_netlink_message *m, uint8_t *tos) {
+ struct fib_rule_hdr *frh;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_routing_policy_rule(m->hdr->nlmsg_type), -EINVAL);
+
+ frh = NLMSG_DATA(m->hdr);
+
+ *tos = frh->tos;
+
+ return 0;
+}
+
+int sd_rtnl_message_routing_policy_rule_set_table(sd_netlink_message *m, uint8_t table) {
+ struct fib_rule_hdr *frh;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_routing_policy_rule(m->hdr->nlmsg_type), -EINVAL);
+
+ frh = NLMSG_DATA(m->hdr);
+
+ frh->table = table;
+
+ return 0;
+}
+
+int sd_rtnl_message_routing_policy_rule_get_table(sd_netlink_message *m, uint8_t *table) {
+ struct fib_rule_hdr *frh;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_routing_policy_rule(m->hdr->nlmsg_type), -EINVAL);
+
+ frh = NLMSG_DATA(m->hdr);
+
+ *table = frh->table;
+
+ return 0;
+}
+
+int sd_rtnl_message_routing_policy_rule_set_flags(sd_netlink_message *m, uint32_t flags) {
+ struct fib_rule_hdr *frh;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_routing_policy_rule(m->hdr->nlmsg_type), -EINVAL);
+
+ frh = NLMSG_DATA(m->hdr);
+ frh->flags = flags;
+
+ return 0;
+}
+
+int sd_rtnl_message_routing_policy_rule_get_flags(sd_netlink_message *m, uint32_t *flags) {
+ struct fib_rule_hdr *frh;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_routing_policy_rule(m->hdr->nlmsg_type), -EINVAL);
+
+ frh = NLMSG_DATA(m->hdr);
+ *flags = frh->flags;
+
+ return 0;
+}
+
+int sd_rtnl_message_routing_policy_rule_set_fib_type(sd_netlink_message *m, uint8_t type) {
+ struct fib_rule_hdr *frh;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_routing_policy_rule(m->hdr->nlmsg_type), -EINVAL);
+
+ frh = NLMSG_DATA(m->hdr);
+
+ frh->action = type;
+
+ return 0;
+}
+
+int sd_rtnl_message_routing_policy_rule_get_fib_type(sd_netlink_message *m, uint8_t *type) {
+ struct fib_rule_hdr *frh;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_routing_policy_rule(m->hdr->nlmsg_type), -EINVAL);
+
+ frh = NLMSG_DATA(m->hdr);
+
+ *type = frh->action;
+
+ return 0;
+}
+
+int sd_rtnl_message_routing_policy_rule_set_fib_dst_prefixlen(sd_netlink_message *m, uint8_t len) {
+ struct fib_rule_hdr *frh;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_routing_policy_rule(m->hdr->nlmsg_type), -EINVAL);
+
+ frh = NLMSG_DATA(m->hdr);
+
+ frh->dst_len = len;
+
+ return 0;
+}
+
+int sd_rtnl_message_routing_policy_rule_get_fib_dst_prefixlen(sd_netlink_message *m, uint8_t *len) {
+ struct fib_rule_hdr *frh;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_routing_policy_rule(m->hdr->nlmsg_type), -EINVAL);
+
+ frh = NLMSG_DATA(m->hdr);
+
+ *len = frh->dst_len;
+
+ return 0;
+}
+
+int sd_rtnl_message_routing_policy_rule_set_fib_src_prefixlen(sd_netlink_message *m, uint8_t len) {
+ struct fib_rule_hdr *frh;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_routing_policy_rule(m->hdr->nlmsg_type), -EINVAL);
+
+ frh = NLMSG_DATA(m->hdr);
+
+ frh->src_len = len;
+
+ return 0;
+}
+
+int sd_rtnl_message_routing_policy_rule_get_fib_src_prefixlen(sd_netlink_message *m, uint8_t *len) {
+ struct fib_rule_hdr *frh;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_routing_policy_rule(m->hdr->nlmsg_type), -EINVAL);
+
+ frh = NLMSG_DATA(m->hdr);
+
+ *len = frh->src_len;
+
+ return 0;
+}
+
+int sd_rtnl_message_new_traffic_control(
+ sd_netlink *rtnl,
+ sd_netlink_message **ret,
+ uint16_t nlmsg_type,
+ int ifindex,
+ uint32_t handle,
+ uint32_t parent) {
+
+ struct tcmsg *tcm;
+ int r;
+
+ assert_return(rtnl_message_type_is_traffic_control(nlmsg_type), -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ r = message_new(rtnl, ret, nlmsg_type);
+ if (r < 0)
+ return r;
+
+ if (IN_SET(nlmsg_type, RTM_NEWQDISC, RTM_NEWTCLASS))
+ (*ret)->hdr->nlmsg_flags |= NLM_F_CREATE | NLM_F_EXCL;
+
+ tcm = NLMSG_DATA((*ret)->hdr);
+ tcm->tcm_ifindex = ifindex;
+ tcm->tcm_handle = handle;
+ tcm->tcm_parent = parent;
+
+ return 0;
+}
+
+int sd_rtnl_message_traffic_control_get_ifindex(sd_netlink_message *m, int *ret) {
+ struct tcmsg *tcm;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_traffic_control(m->hdr->nlmsg_type), -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ tcm = NLMSG_DATA(m->hdr);
+ *ret = tcm->tcm_ifindex;
+
+ return 0;
+}
+
+int sd_rtnl_message_traffic_control_get_handle(sd_netlink_message *m, uint32_t *ret) {
+ struct tcmsg *tcm;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_traffic_control(m->hdr->nlmsg_type), -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ tcm = NLMSG_DATA(m->hdr);
+ *ret = tcm->tcm_handle;
+
+ return 0;
+}
+
+int sd_rtnl_message_traffic_control_get_parent(sd_netlink_message *m, uint32_t *ret) {
+ struct tcmsg *tcm;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(rtnl_message_type_is_traffic_control(m->hdr->nlmsg_type), -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ tcm = NLMSG_DATA(m->hdr);
+ *ret = tcm->tcm_parent;
+
+ return 0;
+}
+
+int sd_rtnl_message_new_mdb(
+ sd_netlink *rtnl,
+ sd_netlink_message **ret,
+ uint16_t nlmsg_type,
+ int mdb_ifindex) {
+
+ struct br_port_msg *bpm;
+ int r;
+
+ assert_return(rtnl_message_type_is_mdb(nlmsg_type), -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ r = message_new(rtnl, ret, nlmsg_type);
+ if (r < 0)
+ return r;
+
+ if (nlmsg_type == RTM_NEWMDB)
+ (*ret)->hdr->nlmsg_flags |= NLM_F_CREATE | NLM_F_EXCL;
+
+ bpm = NLMSG_DATA((*ret)->hdr);
+ bpm->family = AF_BRIDGE;
+ bpm->ifindex = mdb_ifindex;
+
+ return 0;
+}
diff --git a/src/libsystemd/sd-netlink/netlink-message.c b/src/libsystemd/sd-netlink/netlink-message.c
new file mode 100644
index 0000000..000a50e
--- /dev/null
+++ b/src/libsystemd/sd-netlink/netlink-message.c
@@ -0,0 +1,1421 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <netinet/in.h>
+#include <stdbool.h>
+#include <unistd.h>
+
+#include "sd-netlink.h"
+
+#include "alloc-util.h"
+#include "format-util.h"
+#include "memory-util.h"
+#include "netlink-internal.h"
+#include "netlink-types.h"
+#include "netlink-util.h"
+#include "socket-util.h"
+#include "strv.h"
+
+#define GET_CONTAINER(m, i) ((struct rtattr*)((uint8_t*)(m)->hdr + (m)->containers[i].offset))
+
+#define RTA_TYPE(rta) ((rta)->rta_type & NLA_TYPE_MASK)
+#define RTA_FLAGS(rta) ((rta)->rta_type & ~NLA_TYPE_MASK)
+
+int message_new_empty(sd_netlink *nl, sd_netlink_message **ret) {
+ sd_netlink_message *m;
+
+ assert(nl);
+ assert(ret);
+
+ /* Note that 'nl' is currently unused, if we start using it internally we must take care to
+ * avoid problems due to mutual references between buses and their queued messages. See sd-bus. */
+
+ m = new(sd_netlink_message, 1);
+ if (!m)
+ return -ENOMEM;
+
+ *m = (sd_netlink_message) {
+ .n_ref = 1,
+ .protocol = nl->protocol,
+ .sealed = false,
+ };
+
+ *ret = m;
+ return 0;
+}
+
+int message_new_full(
+ sd_netlink *nl,
+ uint16_t nlmsg_type,
+ const NLAPolicySet *policy_set,
+ size_t header_size,
+ sd_netlink_message **ret) {
+
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL;
+ size_t size;
+ int r;
+
+ assert(nl);
+ assert(policy_set);
+ assert(ret);
+
+ size = NLMSG_SPACE(header_size);
+ assert(size >= sizeof(struct nlmsghdr));
+
+ r = message_new_empty(nl, &m);
+ if (r < 0)
+ return r;
+
+ m->containers[0].policy_set = policy_set;
+
+ m->hdr = malloc0(size);
+ if (!m->hdr)
+ return -ENOMEM;
+
+ m->hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
+ m->hdr->nlmsg_len = size;
+ m->hdr->nlmsg_type = nlmsg_type;
+
+ *ret = TAKE_PTR(m);
+ return 0;
+}
+
+int message_new(sd_netlink *nl, sd_netlink_message **ret, uint16_t nlmsg_type) {
+ const NLAPolicySet *policy_set;
+ size_t size;
+ int r;
+
+ assert_return(nl, -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ r = netlink_get_policy_set_and_header_size(nl, nlmsg_type, &policy_set, &size);
+ if (r < 0)
+ return r;
+
+ return message_new_full(nl, nlmsg_type, policy_set, size, ret);
+}
+
+int message_new_synthetic_error(sd_netlink *nl, int error, uint32_t serial, sd_netlink_message **ret) {
+ struct nlmsgerr *err;
+ int r;
+
+ assert(error <= 0);
+
+ r = message_new(nl, ret, NLMSG_ERROR);
+ if (r < 0)
+ return r;
+
+ message_seal(*ret);
+ (*ret)->hdr->nlmsg_seq = serial;
+
+ err = NLMSG_DATA((*ret)->hdr);
+ err->error = error;
+
+ return 0;
+}
+
+int sd_netlink_message_set_request_dump(sd_netlink_message *m, int dump) {
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+ assert_return(m->protocol != NETLINK_ROUTE ||
+ IN_SET(m->hdr->nlmsg_type,
+ RTM_GETLINK, RTM_GETLINKPROP, RTM_GETADDR, RTM_GETROUTE, RTM_GETNEIGH,
+ RTM_GETRULE, RTM_GETADDRLABEL, RTM_GETNEXTHOP, RTM_GETQDISC, RTM_GETTCLASS),
+ -EINVAL);
+
+ SET_FLAG(m->hdr->nlmsg_flags, NLM_F_DUMP, dump);
+
+ return 0;
+}
+
+DEFINE_TRIVIAL_REF_FUNC(sd_netlink_message, sd_netlink_message);
+
+sd_netlink_message* sd_netlink_message_unref(sd_netlink_message *m) {
+ while (m && --m->n_ref == 0) {
+ unsigned i;
+
+ free(m->hdr);
+
+ for (i = 0; i <= m->n_containers; i++)
+ free(m->containers[i].attributes);
+
+ sd_netlink_message *t = m;
+ m = m->next;
+ free(t);
+ }
+
+ return NULL;
+}
+
+int sd_netlink_message_get_type(sd_netlink_message *m, uint16_t *ret) {
+ assert_return(m, -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ *ret = m->hdr->nlmsg_type;
+
+ return 0;
+}
+
+int sd_netlink_message_set_flags(sd_netlink_message *m, uint16_t flags) {
+ assert_return(m, -EINVAL);
+ assert_return(flags != 0, -EINVAL);
+
+ m->hdr->nlmsg_flags = flags;
+
+ return 0;
+}
+
+int sd_netlink_message_is_broadcast(sd_netlink_message *m) {
+ assert_return(m, -EINVAL);
+
+ return m->multicast_group != 0;
+}
+
+/* If successful the updated message will be correctly aligned, if unsuccessful the old message is untouched. */
+static int add_rtattr(sd_netlink_message *m, uint16_t attr_type, const void *data, size_t data_length) {
+ size_t message_length;
+ struct nlmsghdr *new_hdr;
+ struct rtattr *rta;
+ int offset;
+
+ assert(m);
+ assert(m->hdr);
+ assert(!m->sealed);
+ assert(NLMSG_ALIGN(m->hdr->nlmsg_len) == m->hdr->nlmsg_len);
+ assert(!data || data_length > 0);
+
+ /* get the new message size (with padding at the end) */
+ message_length = m->hdr->nlmsg_len + RTA_SPACE(data_length);
+
+ /* buffer should be smaller than both one page or 8K to be accepted by the kernel */
+ if (message_length > MIN(page_size(), 8192UL))
+ return -ENOBUFS;
+
+ /* realloc to fit the new attribute */
+ new_hdr = realloc(m->hdr, message_length);
+ if (!new_hdr)
+ return -ENOMEM;
+ m->hdr = new_hdr;
+
+ /* get pointer to the attribute we are about to add */
+ rta = (struct rtattr *) ((uint8_t *) m->hdr + m->hdr->nlmsg_len);
+
+ rtattr_append_attribute_internal(rta, attr_type, data, data_length);
+
+ /* if we are inside containers, extend them */
+ for (unsigned i = 0; i < m->n_containers; i++)
+ GET_CONTAINER(m, i)->rta_len += RTA_SPACE(data_length);
+
+ /* update message size */
+ offset = m->hdr->nlmsg_len;
+ m->hdr->nlmsg_len = message_length;
+
+ /* return old message size */
+ return offset;
+}
+
+static int message_attribute_has_type(sd_netlink_message *m, size_t *ret_size, uint16_t attr_type, NLAType type) {
+ const NLAPolicy *policy;
+
+ assert(m);
+
+ policy = policy_set_get_policy(m->containers[m->n_containers].policy_set, attr_type);
+ if (!policy)
+ return -EOPNOTSUPP;
+
+ if (policy_get_type(policy) != type)
+ return -EINVAL;
+
+ if (ret_size)
+ *ret_size = policy_get_size(policy);
+ return 0;
+}
+
+int sd_netlink_message_append_string(sd_netlink_message *m, uint16_t attr_type, const char *data) {
+ size_t length, size;
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(!m->sealed, -EPERM);
+ assert_return(data, -EINVAL);
+
+ r = message_attribute_has_type(m, &size, attr_type, NETLINK_TYPE_STRING);
+ if (r < 0)
+ return r;
+
+ if (size) {
+ length = strnlen(data, size+1);
+ if (length > size)
+ return -EINVAL;
+ } else
+ length = strlen(data);
+
+ r = add_rtattr(m, attr_type, data, length + 1);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int sd_netlink_message_append_strv(sd_netlink_message *m, uint16_t attr_type, const char* const *data) {
+ size_t length, size;
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(!m->sealed, -EPERM);
+ assert_return(data, -EINVAL);
+
+ r = message_attribute_has_type(m, &size, attr_type, NETLINK_TYPE_STRING);
+ if (r < 0)
+ return r;
+
+ STRV_FOREACH(p, data) {
+ if (size) {
+ length = strnlen(*p, size+1);
+ if (length > size)
+ return -EINVAL;
+ } else
+ length = strlen(*p);
+
+ r = add_rtattr(m, attr_type, *p, length + 1);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+int sd_netlink_message_append_flag(sd_netlink_message *m, uint16_t attr_type) {
+ size_t size;
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(!m->sealed, -EPERM);
+
+ r = message_attribute_has_type(m, &size, attr_type, NETLINK_TYPE_FLAG);
+ if (r < 0)
+ return r;
+
+ r = add_rtattr(m, attr_type, NULL, 0);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int sd_netlink_message_append_u8(sd_netlink_message *m, uint16_t attr_type, uint8_t data) {
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(!m->sealed, -EPERM);
+
+ r = message_attribute_has_type(m, NULL, attr_type, NETLINK_TYPE_U8);
+ if (r < 0)
+ return r;
+
+ r = add_rtattr(m, attr_type, &data, sizeof(uint8_t));
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int sd_netlink_message_append_u16(sd_netlink_message *m, uint16_t attr_type, uint16_t data) {
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(!m->sealed, -EPERM);
+
+ r = message_attribute_has_type(m, NULL, attr_type, NETLINK_TYPE_U16);
+ if (r < 0)
+ return r;
+
+ r = add_rtattr(m, attr_type, &data, sizeof(uint16_t));
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int sd_netlink_message_append_u32(sd_netlink_message *m, uint16_t attr_type, uint32_t data) {
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(!m->sealed, -EPERM);
+
+ r = message_attribute_has_type(m, NULL, attr_type, NETLINK_TYPE_U32);
+ if (r < 0)
+ return r;
+
+ r = add_rtattr(m, attr_type, &data, sizeof(uint32_t));
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int sd_netlink_message_append_u64(sd_netlink_message *m, uint16_t attr_type, uint64_t data) {
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(!m->sealed, -EPERM);
+
+ r = message_attribute_has_type(m, NULL, attr_type, NETLINK_TYPE_U64);
+ if (r < 0)
+ return r;
+
+ r = add_rtattr(m, attr_type, &data, sizeof(uint64_t));
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int sd_netlink_message_append_s8(sd_netlink_message *m, uint16_t attr_type, int8_t data) {
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(!m->sealed, -EPERM);
+
+ r = message_attribute_has_type(m, NULL, attr_type, NETLINK_TYPE_S8);
+ if (r < 0)
+ return r;
+
+ r = add_rtattr(m, attr_type, &data, sizeof(int8_t));
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int sd_netlink_message_append_s16(sd_netlink_message *m, uint16_t attr_type, int16_t data) {
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(!m->sealed, -EPERM);
+
+ r = message_attribute_has_type(m, NULL, attr_type, NETLINK_TYPE_S16);
+ if (r < 0)
+ return r;
+
+ r = add_rtattr(m, attr_type, &data, sizeof(int16_t));
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int sd_netlink_message_append_s32(sd_netlink_message *m, uint16_t attr_type, int32_t data) {
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(!m->sealed, -EPERM);
+
+ r = message_attribute_has_type(m, NULL, attr_type, NETLINK_TYPE_S32);
+ if (r < 0)
+ return r;
+
+ r = add_rtattr(m, attr_type, &data, sizeof(int32_t));
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int sd_netlink_message_append_s64(sd_netlink_message *m, uint16_t attr_type, int64_t data) {
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(!m->sealed, -EPERM);
+
+ r = message_attribute_has_type(m, NULL, attr_type, NETLINK_TYPE_S64);
+ if (r < 0)
+ return r;
+
+ r = add_rtattr(m, attr_type, &data, sizeof(int64_t));
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int sd_netlink_message_append_data(sd_netlink_message *m, uint16_t attr_type, const void *data, size_t len) {
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(!m->sealed, -EPERM);
+
+ r = add_rtattr(m, attr_type, data, len);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int sd_netlink_message_append_container_data(
+ sd_netlink_message *m,
+ uint16_t container_type,
+ uint16_t attr_type,
+ const void *data,
+ size_t len) {
+
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(!m->sealed, -EPERM);
+
+ r = sd_netlink_message_open_container(m, container_type);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_message_append_data(m, attr_type, data, len);
+ if (r < 0)
+ return r;
+
+ return sd_netlink_message_close_container(m);
+}
+
+int netlink_message_append_in_addr_union(sd_netlink_message *m, uint16_t attr_type, int family, const union in_addr_union *data) {
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(!m->sealed, -EPERM);
+ assert_return(data, -EINVAL);
+ assert_return(IN_SET(family, AF_INET, AF_INET6), -EINVAL);
+
+ r = message_attribute_has_type(m, NULL, attr_type, NETLINK_TYPE_IN_ADDR);
+ if (r < 0)
+ return r;
+
+ r = add_rtattr(m, attr_type, data, FAMILY_ADDRESS_SIZE(family));
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int sd_netlink_message_append_in_addr(sd_netlink_message *m, uint16_t attr_type, const struct in_addr *data) {
+ return netlink_message_append_in_addr_union(m, attr_type, AF_INET, (const union in_addr_union *) data);
+}
+
+int sd_netlink_message_append_in6_addr(sd_netlink_message *m, uint16_t attr_type, const struct in6_addr *data) {
+ return netlink_message_append_in_addr_union(m, attr_type, AF_INET6, (const union in_addr_union *) data);
+}
+
+int netlink_message_append_sockaddr_union(sd_netlink_message *m, uint16_t attr_type, const union sockaddr_union *data) {
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(!m->sealed, -EPERM);
+ assert_return(data, -EINVAL);
+ assert_return(IN_SET(data->sa.sa_family, AF_INET, AF_INET6), -EINVAL);
+
+ r = message_attribute_has_type(m, NULL, attr_type, NETLINK_TYPE_SOCKADDR);
+ if (r < 0)
+ return r;
+
+ r = add_rtattr(m, attr_type, data, data->sa.sa_family == AF_INET ? sizeof(struct sockaddr_in) : sizeof(struct sockaddr_in6));
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int sd_netlink_message_append_sockaddr_in(sd_netlink_message *m, uint16_t attr_type, const struct sockaddr_in *data) {
+ return netlink_message_append_sockaddr_union(m, attr_type, (const union sockaddr_union *) data);
+}
+
+int sd_netlink_message_append_sockaddr_in6(sd_netlink_message *m, uint16_t attr_type, const struct sockaddr_in6 *data) {
+ return netlink_message_append_sockaddr_union(m, attr_type, (const union sockaddr_union *) data);
+}
+
+int sd_netlink_message_append_ether_addr(sd_netlink_message *m, uint16_t attr_type, const struct ether_addr *data) {
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(!m->sealed, -EPERM);
+ assert_return(data, -EINVAL);
+
+ r = message_attribute_has_type(m, NULL, attr_type, NETLINK_TYPE_ETHER_ADDR);
+ if (r < 0)
+ return r;
+
+ r = add_rtattr(m, attr_type, data, ETH_ALEN);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int netlink_message_append_hw_addr(sd_netlink_message *m, uint16_t attr_type, const struct hw_addr_data *data) {
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(!m->sealed, -EPERM);
+ assert_return(data, -EINVAL);
+ assert_return(data->length > 0, -EINVAL);
+
+ r = message_attribute_has_type(m, NULL, attr_type, NETLINK_TYPE_ETHER_ADDR);
+ if (r < 0)
+ return r;
+
+ r = add_rtattr(m, attr_type, data->bytes, data->length);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int sd_netlink_message_append_cache_info(sd_netlink_message *m, uint16_t attr_type, const struct ifa_cacheinfo *info) {
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(!m->sealed, -EPERM);
+ assert_return(info, -EINVAL);
+
+ r = message_attribute_has_type(m, NULL, attr_type, NETLINK_TYPE_CACHE_INFO);
+ if (r < 0)
+ return r;
+
+ r = add_rtattr(m, attr_type, info, sizeof(struct ifa_cacheinfo));
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int sd_netlink_message_open_container(sd_netlink_message *m, uint16_t attr_type) {
+ size_t size;
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(!m->sealed, -EPERM);
+ /* m->containers[m->n_containers + 1] is accessed both in read and write. Prevent access out of bound */
+ assert_return(m->n_containers < (NETLINK_CONTAINER_DEPTH - 1), -ERANGE);
+
+ r = message_attribute_has_type(m, &size, attr_type, NETLINK_TYPE_NESTED);
+ if (r < 0) {
+ const NLAPolicySetUnion *policy_set_union;
+ int family;
+
+ r = message_attribute_has_type(m, &size, attr_type, NETLINK_TYPE_NESTED_UNION_BY_FAMILY);
+ if (r < 0)
+ return r;
+
+ r = sd_rtnl_message_get_family(m, &family);
+ if (r < 0)
+ return r;
+
+ policy_set_union = policy_set_get_policy_set_union(
+ m->containers[m->n_containers].policy_set,
+ attr_type);
+ if (!policy_set_union)
+ return -EOPNOTSUPP;
+
+ m->containers[m->n_containers + 1].policy_set =
+ policy_set_union_get_policy_set_by_family(
+ policy_set_union,
+ family);
+ } else
+ m->containers[m->n_containers + 1].policy_set =
+ policy_set_get_policy_set(
+ m->containers[m->n_containers].policy_set,
+ attr_type);
+ if (!m->containers[m->n_containers + 1].policy_set)
+ return -EOPNOTSUPP;
+
+ r = add_rtattr(m, attr_type | NLA_F_NESTED, NULL, size);
+ if (r < 0)
+ return r;
+
+ m->containers[m->n_containers++].offset = r;
+
+ return 0;
+}
+
+int sd_netlink_message_open_container_union(sd_netlink_message *m, uint16_t attr_type, const char *key) {
+ const NLAPolicySetUnion *policy_set_union;
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(!m->sealed, -EPERM);
+ assert_return(m->n_containers < (NETLINK_CONTAINER_DEPTH - 1), -ERANGE);
+
+ r = message_attribute_has_type(m, NULL, attr_type, NETLINK_TYPE_NESTED_UNION_BY_STRING);
+ if (r < 0)
+ return r;
+
+ policy_set_union = policy_set_get_policy_set_union(
+ m->containers[m->n_containers].policy_set,
+ attr_type);
+ if (!policy_set_union)
+ return -EOPNOTSUPP;
+
+ m->containers[m->n_containers + 1].policy_set =
+ policy_set_union_get_policy_set_by_string(
+ policy_set_union,
+ key);
+ if (!m->containers[m->n_containers + 1].policy_set)
+ return -EOPNOTSUPP;
+
+ r = sd_netlink_message_append_string(m, policy_set_union_get_match_attribute(policy_set_union), key);
+ if (r < 0)
+ return r;
+
+ /* do we ever need non-null size */
+ r = add_rtattr(m, attr_type | NLA_F_NESTED, NULL, 0);
+ if (r < 0)
+ return r;
+
+ m->containers[m->n_containers++].offset = r;
+
+ return 0;
+}
+
+int sd_netlink_message_close_container(sd_netlink_message *m) {
+ assert_return(m, -EINVAL);
+ assert_return(!m->sealed, -EPERM);
+ assert_return(m->n_containers > 0, -EINVAL);
+
+ m->containers[m->n_containers].policy_set = NULL;
+ m->containers[m->n_containers].offset = 0;
+ m->n_containers--;
+
+ return 0;
+}
+
+int sd_netlink_message_open_array(sd_netlink_message *m, uint16_t attr_type) {
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(!m->sealed, -EPERM);
+ assert_return(m->n_containers < (NETLINK_CONTAINER_DEPTH - 1), -ERANGE);
+
+ r = add_rtattr(m, attr_type | NLA_F_NESTED, NULL, 0);
+ if (r < 0)
+ return r;
+
+ m->containers[m->n_containers].offset = r;
+ m->n_containers++;
+ m->containers[m->n_containers].policy_set = m->containers[m->n_containers - 1].policy_set;
+
+ return 0;
+}
+
+int sd_netlink_message_cancel_array(sd_netlink_message *m) {
+ uint32_t rta_len;
+
+ assert_return(m, -EINVAL);
+ assert_return(!m->sealed, -EPERM);
+ assert_return(m->n_containers > 1, -EINVAL);
+
+ rta_len = GET_CONTAINER(m, (m->n_containers - 1))->rta_len;
+
+ for (unsigned i = 0; i < m->n_containers; i++)
+ GET_CONTAINER(m, i)->rta_len -= rta_len;
+
+ m->hdr->nlmsg_len -= rta_len;
+
+ m->n_containers--;
+ m->containers[m->n_containers].policy_set = NULL;
+
+ return 0;
+}
+
+static int netlink_message_read_internal(
+ sd_netlink_message *m,
+ uint16_t attr_type,
+ void **ret_data,
+ bool *ret_net_byteorder) {
+
+ struct netlink_attribute *attribute;
+ struct rtattr *rta;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->sealed, -EPERM);
+
+ assert(m->n_containers < NETLINK_CONTAINER_DEPTH);
+
+ if (!m->containers[m->n_containers].attributes)
+ return -ENODATA;
+
+ if (attr_type > m->containers[m->n_containers].max_attribute)
+ return -ENODATA;
+
+ attribute = &m->containers[m->n_containers].attributes[attr_type];
+
+ if (attribute->offset == 0)
+ return -ENODATA;
+
+ rta = (struct rtattr*)((uint8_t *) m->hdr + attribute->offset);
+
+ if (ret_data)
+ *ret_data = RTA_DATA(rta);
+
+ if (ret_net_byteorder)
+ *ret_net_byteorder = attribute->net_byteorder;
+
+ return RTA_PAYLOAD(rta);
+}
+
+int sd_netlink_message_read(sd_netlink_message *m, uint16_t attr_type, size_t size, void *data) {
+ void *attr_data;
+ int r;
+
+ assert_return(m, -EINVAL);
+
+ r = netlink_message_read_internal(m, attr_type, &attr_data, NULL);
+ if (r < 0)
+ return r;
+
+ if ((size_t) r > size)
+ return -ENOBUFS;
+
+ if (data)
+ memcpy(data, attr_data, r);
+
+ return r;
+}
+
+int sd_netlink_message_read_data(sd_netlink_message *m, uint16_t attr_type, size_t *ret_size, void **ret_data) {
+ void *attr_data;
+ int r;
+
+ assert_return(m, -EINVAL);
+
+ r = netlink_message_read_internal(m, attr_type, &attr_data, NULL);
+ if (r < 0)
+ return r;
+
+ if (ret_data) {
+ void *data;
+
+ data = memdup(attr_data, r);
+ if (!data)
+ return -ENOMEM;
+
+ *ret_data = data;
+ }
+
+ if (ret_size)
+ *ret_size = r;
+
+ return r;
+}
+
+int sd_netlink_message_read_data_suffix0(sd_netlink_message *m, uint16_t attr_type, size_t *ret_size, void **ret_data) {
+ void *attr_data;
+ int r;
+
+ assert_return(m, -EINVAL);
+
+ r = netlink_message_read_internal(m, attr_type, &attr_data, NULL);
+ if (r < 0)
+ return r;
+
+ if (ret_data) {
+ void *data;
+
+ data = memdup_suffix0(attr_data, r);
+ if (!data)
+ return -ENOMEM;
+
+ *ret_data = data;
+ }
+
+ if (ret_size)
+ *ret_size = r;
+
+ return r;
+}
+
+int sd_netlink_message_read_string_strdup(sd_netlink_message *m, uint16_t attr_type, char **data) {
+ void *attr_data;
+ int r;
+
+ assert_return(m, -EINVAL);
+
+ r = message_attribute_has_type(m, NULL, attr_type, NETLINK_TYPE_STRING);
+ if (r < 0)
+ return r;
+
+ r = netlink_message_read_internal(m, attr_type, &attr_data, NULL);
+ if (r < 0)
+ return r;
+
+ if (data) {
+ char *str;
+
+ str = strndup(attr_data, r);
+ if (!str)
+ return -ENOMEM;
+
+ *data = str;
+ }
+
+ return 0;
+}
+
+int sd_netlink_message_read_string(sd_netlink_message *m, uint16_t attr_type, const char **data) {
+ void *attr_data;
+ int r;
+
+ assert_return(m, -EINVAL);
+
+ r = message_attribute_has_type(m, NULL, attr_type, NETLINK_TYPE_STRING);
+ if (r < 0)
+ return r;
+
+ r = netlink_message_read_internal(m, attr_type, &attr_data, NULL);
+ if (r < 0)
+ return r;
+
+ if (strnlen(attr_data, r) >= (size_t) r)
+ return -EIO;
+
+ if (data)
+ *data = (const char *) attr_data;
+
+ return 0;
+}
+
+int sd_netlink_message_read_u8(sd_netlink_message *m, uint16_t attr_type, uint8_t *data) {
+ void *attr_data;
+ int r;
+
+ assert_return(m, -EINVAL);
+
+ r = message_attribute_has_type(m, NULL, attr_type, NETLINK_TYPE_U8);
+ if (r < 0)
+ return r;
+
+ r = netlink_message_read_internal(m, attr_type, &attr_data, NULL);
+ if (r < 0)
+ return r;
+
+ if ((size_t) r < sizeof(uint8_t))
+ return -EIO;
+
+ if (data)
+ *data = *(uint8_t *) attr_data;
+
+ return 0;
+}
+
+int sd_netlink_message_read_u16(sd_netlink_message *m, uint16_t attr_type, uint16_t *data) {
+ void *attr_data;
+ bool net_byteorder;
+ int r;
+
+ assert_return(m, -EINVAL);
+
+ r = message_attribute_has_type(m, NULL, attr_type, NETLINK_TYPE_U16);
+ if (r < 0)
+ return r;
+
+ r = netlink_message_read_internal(m, attr_type, &attr_data, &net_byteorder);
+ if (r < 0)
+ return r;
+
+ if ((size_t) r < sizeof(uint16_t))
+ return -EIO;
+
+ if (data) {
+ if (net_byteorder)
+ *data = be16toh(*(uint16_t *) attr_data);
+ else
+ *data = *(uint16_t *) attr_data;
+ }
+
+ return 0;
+}
+
+int sd_netlink_message_read_u32(sd_netlink_message *m, uint16_t attr_type, uint32_t *data) {
+ void *attr_data;
+ bool net_byteorder;
+ int r;
+
+ assert_return(m, -EINVAL);
+
+ r = message_attribute_has_type(m, NULL, attr_type, NETLINK_TYPE_U32);
+ if (r < 0)
+ return r;
+
+ r = netlink_message_read_internal(m, attr_type, &attr_data, &net_byteorder);
+ if (r < 0)
+ return r;
+
+ if ((size_t) r < sizeof(uint32_t))
+ return -EIO;
+
+ if (data) {
+ if (net_byteorder)
+ *data = be32toh(*(uint32_t *) attr_data);
+ else
+ *data = *(uint32_t *) attr_data;
+ }
+
+ return 0;
+}
+
+int sd_netlink_message_read_ether_addr(sd_netlink_message *m, uint16_t attr_type, struct ether_addr *data) {
+ void *attr_data;
+ int r;
+
+ assert_return(m, -EINVAL);
+
+ r = message_attribute_has_type(m, NULL, attr_type, NETLINK_TYPE_ETHER_ADDR);
+ if (r < 0)
+ return r;
+
+ r = netlink_message_read_internal(m, attr_type, &attr_data, NULL);
+ if (r < 0)
+ return r;
+
+ if ((size_t) r < sizeof(struct ether_addr))
+ return -EIO;
+
+ if (data)
+ memcpy(data, attr_data, sizeof(struct ether_addr));
+
+ return 0;
+}
+
+int netlink_message_read_hw_addr(sd_netlink_message *m, uint16_t attr_type, struct hw_addr_data *data) {
+ void *attr_data;
+ int r;
+
+ assert_return(m, -EINVAL);
+
+ r = message_attribute_has_type(m, NULL, attr_type, NETLINK_TYPE_ETHER_ADDR);
+ if (r < 0)
+ return r;
+
+ r = netlink_message_read_internal(m, attr_type, &attr_data, NULL);
+ if (r < 0)
+ return r;
+
+ if (r > HW_ADDR_MAX_SIZE)
+ return -EIO;
+
+ if (data) {
+ memcpy(data->bytes, attr_data, r);
+ data->length = r;
+ }
+
+ return 0;
+}
+
+int sd_netlink_message_read_cache_info(sd_netlink_message *m, uint16_t attr_type, struct ifa_cacheinfo *info) {
+ void *attr_data;
+ int r;
+
+ assert_return(m, -EINVAL);
+
+ r = message_attribute_has_type(m, NULL, attr_type, NETLINK_TYPE_CACHE_INFO);
+ if (r < 0)
+ return r;
+
+ r = netlink_message_read_internal(m, attr_type, &attr_data, NULL);
+ if (r < 0)
+ return r;
+
+ if ((size_t) r < sizeof(struct ifa_cacheinfo))
+ return -EIO;
+
+ if (info)
+ memcpy(info, attr_data, sizeof(struct ifa_cacheinfo));
+
+ return 0;
+}
+
+int netlink_message_read_in_addr_union(sd_netlink_message *m, uint16_t attr_type, int family, union in_addr_union *data) {
+ void *attr_data;
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(IN_SET(family, AF_INET, AF_INET6), -EINVAL);
+
+ r = message_attribute_has_type(m, NULL, attr_type, NETLINK_TYPE_IN_ADDR);
+ if (r < 0)
+ return r;
+
+ r = netlink_message_read_internal(m, attr_type, &attr_data, NULL);
+ if (r < 0)
+ return r;
+
+ if ((size_t) r < FAMILY_ADDRESS_SIZE(family))
+ return -EIO;
+
+ if (data)
+ memcpy(data, attr_data, FAMILY_ADDRESS_SIZE(family));
+
+ return 0;
+}
+
+int sd_netlink_message_read_in_addr(sd_netlink_message *m, uint16_t attr_type, struct in_addr *data) {
+ union in_addr_union u;
+ int r;
+
+ r = netlink_message_read_in_addr_union(m, attr_type, AF_INET, &u);
+ if (r >= 0 && data)
+ *data = u.in;
+
+ return r;
+}
+
+int sd_netlink_message_read_in6_addr(sd_netlink_message *m, uint16_t attr_type, struct in6_addr *data) {
+ union in_addr_union u;
+ int r;
+
+ r = netlink_message_read_in_addr_union(m, attr_type, AF_INET6, &u);
+ if (r >= 0 && data)
+ *data = u.in6;
+
+ return r;
+}
+
+int sd_netlink_message_has_flag(sd_netlink_message *m, uint16_t attr_type) {
+ void *attr_data;
+ int r;
+
+ assert_return(m, -EINVAL);
+
+ /* This returns 1 when the flag is set, 0 when not set, negative errno on error. */
+
+ r = message_attribute_has_type(m, NULL, attr_type, NETLINK_TYPE_FLAG);
+ if (r < 0)
+ return r;
+
+ r = netlink_message_read_internal(m, attr_type, &attr_data, NULL);
+ if (r == -ENODATA)
+ return 0;
+ if (r < 0)
+ return r;
+
+ return 1;
+}
+
+int sd_netlink_message_read_strv(sd_netlink_message *m, uint16_t container_type, uint16_t attr_type, char ***ret) {
+ _cleanup_strv_free_ char **s = NULL;
+ const NLAPolicySet *policy_set;
+ const NLAPolicy *policy;
+ struct rtattr *rta;
+ void *container;
+ size_t rt_len;
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->n_containers < NETLINK_CONTAINER_DEPTH, -EINVAL);
+
+ policy = policy_set_get_policy(
+ m->containers[m->n_containers].policy_set,
+ container_type);
+ if (!policy)
+ return -EOPNOTSUPP;
+
+ if (policy_get_type(policy) != NETLINK_TYPE_NESTED)
+ return -EINVAL;
+
+ policy_set = policy_set_get_policy_set(
+ m->containers[m->n_containers].policy_set,
+ container_type);
+ if (!policy_set)
+ return -EOPNOTSUPP;
+
+ policy = policy_set_get_policy(policy_set, attr_type);
+ if (!policy)
+ return -EOPNOTSUPP;
+
+ if (policy_get_type(policy) != NETLINK_TYPE_STRING)
+ return -EINVAL;
+
+ r = netlink_message_read_internal(m, container_type, &container, NULL);
+ if (r < 0)
+ return r;
+
+ rt_len = (size_t) r;
+ rta = container;
+
+ /* RTA_OK() macro compares with rta->rt_len, which is unsigned short, and
+ * LGTM.com analysis does not like the type difference. Hence, here we
+ * introduce an unsigned short variable as a workaround. */
+ unsigned short len = rt_len;
+ for (; RTA_OK(rta, len); rta = RTA_NEXT(rta, len)) {
+ uint16_t type;
+
+ type = RTA_TYPE(rta);
+ if (type != attr_type)
+ continue;
+
+ r = strv_extend(&s, RTA_DATA(rta));
+ if (r < 0)
+ return r;
+ }
+
+ *ret = TAKE_PTR(s);
+ return 0;
+}
+
+static int netlink_container_parse(
+ sd_netlink_message *m,
+ struct netlink_container *container,
+ struct rtattr *rta,
+ size_t rt_len) {
+
+ _cleanup_free_ struct netlink_attribute *attributes = NULL;
+ uint16_t max_attr = 0;
+
+ /* RTA_OK() macro compares with rta->rt_len, which is unsigned short, and
+ * LGTM.com analysis does not like the type difference. Hence, here we
+ * introduce an unsigned short variable as a workaround. */
+ unsigned short len = rt_len;
+ for (; RTA_OK(rta, len); rta = RTA_NEXT(rta, len)) {
+ uint16_t attr;
+
+ attr = RTA_TYPE(rta);
+ max_attr = MAX(max_attr, attr);
+
+ if (!GREEDY_REALLOC0(attributes, (size_t) max_attr + 1))
+ return -ENOMEM;
+
+ if (attributes[attr].offset != 0)
+ log_debug("sd-netlink: message parse - overwriting repeated attribute");
+
+ attributes[attr].offset = (uint8_t *) rta - (uint8_t *) m->hdr;
+ attributes[attr].nested = RTA_FLAGS(rta) & NLA_F_NESTED;
+ attributes[attr].net_byteorder = RTA_FLAGS(rta) & NLA_F_NET_BYTEORDER;
+ }
+
+ container->attributes = TAKE_PTR(attributes);
+ container->max_attribute = max_attr;
+
+ return 0;
+}
+
+int sd_netlink_message_enter_container(sd_netlink_message *m, uint16_t attr_type) {
+ const NLAPolicy *policy;
+ const NLAPolicySet *policy_set;
+ void *container;
+ size_t size;
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->n_containers < (NETLINK_CONTAINER_DEPTH - 1), -EINVAL);
+
+ policy = policy_set_get_policy(
+ m->containers[m->n_containers].policy_set,
+ attr_type);
+ if (!policy)
+ return -EOPNOTSUPP;
+
+ switch (policy_get_type(policy)) {
+ case NETLINK_TYPE_NESTED:
+ policy_set = policy_set_get_policy_set(
+ m->containers[m->n_containers].policy_set,
+ attr_type);
+ break;
+
+ case NETLINK_TYPE_NESTED_UNION_BY_STRING: {
+ const NLAPolicySetUnion *policy_set_union;
+ const char *key;
+
+ policy_set_union = policy_get_policy_set_union(policy);
+ if (!policy_set_union)
+ return -EOPNOTSUPP;
+
+ r = sd_netlink_message_read_string(
+ m,
+ policy_set_union_get_match_attribute(policy_set_union),
+ &key);
+ if (r < 0)
+ return r;
+
+ policy_set = policy_set_union_get_policy_set_by_string(
+ policy_set_union,
+ key);
+ break;
+ }
+ case NETLINK_TYPE_NESTED_UNION_BY_FAMILY: {
+ const NLAPolicySetUnion *policy_set_union;
+ int family;
+
+ policy_set_union = policy_get_policy_set_union(policy);
+ if (!policy_set_union)
+ return -EOPNOTSUPP;
+
+ r = sd_rtnl_message_get_family(m, &family);
+ if (r < 0)
+ return r;
+
+ policy_set = policy_set_union_get_policy_set_by_family(
+ policy_set_union,
+ family);
+ break;
+ }
+ default:
+ assert_not_reached();
+ }
+ if (!policy_set)
+ return -EOPNOTSUPP;
+
+ r = netlink_message_read_internal(m, attr_type, &container, NULL);
+ if (r < 0)
+ return r;
+
+ size = (size_t) r;
+ m->n_containers++;
+
+ r = netlink_container_parse(m,
+ &m->containers[m->n_containers],
+ container,
+ size);
+ if (r < 0) {
+ m->n_containers--;
+ return r;
+ }
+
+ m->containers[m->n_containers].policy_set = policy_set;
+
+ return 0;
+}
+
+int sd_netlink_message_enter_array(sd_netlink_message *m, uint16_t attr_type) {
+ void *container;
+ size_t size;
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->n_containers < (NETLINK_CONTAINER_DEPTH - 1), -EINVAL);
+
+ r = netlink_message_read_internal(m, attr_type, &container, NULL);
+ if (r < 0)
+ return r;
+
+ size = (size_t) r;
+ m->n_containers++;
+
+ r = netlink_container_parse(m,
+ &m->containers[m->n_containers],
+ container,
+ size);
+ if (r < 0) {
+ m->n_containers--;
+ return r;
+ }
+
+ m->containers[m->n_containers].policy_set = m->containers[m->n_containers - 1].policy_set;
+
+ return 0;
+}
+
+int sd_netlink_message_exit_container(sd_netlink_message *m) {
+ assert_return(m, -EINVAL);
+ assert_return(m->sealed, -EINVAL);
+ assert_return(m->n_containers > 0, -EINVAL);
+
+ m->containers[m->n_containers].attributes = mfree(m->containers[m->n_containers].attributes);
+ m->containers[m->n_containers].max_attribute = 0;
+ m->containers[m->n_containers].policy_set = NULL;
+
+ m->n_containers--;
+
+ return 0;
+}
+
+int sd_netlink_message_get_max_attribute(sd_netlink_message *m, uint16_t *ret) {
+ assert_return(m, -EINVAL);
+ assert_return(m->sealed, -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ *ret = m->containers[m->n_containers].max_attribute;
+ return 0;
+}
+
+int sd_netlink_message_is_error(sd_netlink_message *m) {
+ assert_return(m, 0);
+ assert_return(m->hdr, 0);
+
+ return m->hdr->nlmsg_type == NLMSG_ERROR;
+}
+
+int sd_netlink_message_get_errno(sd_netlink_message *m) {
+ struct nlmsgerr *err;
+
+ assert_return(m, -EINVAL);
+ assert_return(m->hdr, -EINVAL);
+
+ if (!sd_netlink_message_is_error(m))
+ return 0;
+
+ err = NLMSG_DATA(m->hdr);
+
+ return err->error;
+}
+
+static int netlink_message_parse_error(sd_netlink_message *m) {
+ struct nlmsgerr *err = NLMSG_DATA(m->hdr);
+ size_t hlen = sizeof(struct nlmsgerr);
+
+ /* no TLVs, nothing to do here */
+ if (!(m->hdr->nlmsg_flags & NLM_F_ACK_TLVS))
+ return 0;
+
+ /* if NLM_F_CAPPED is set then the inner err msg was capped */
+ if (!(m->hdr->nlmsg_flags & NLM_F_CAPPED))
+ hlen += err->msg.nlmsg_len - sizeof(struct nlmsghdr);
+
+ if (m->hdr->nlmsg_len <= NLMSG_SPACE(hlen))
+ return 0;
+
+ return netlink_container_parse(m,
+ &m->containers[m->n_containers],
+ (struct rtattr*)((uint8_t*) NLMSG_DATA(m->hdr) + hlen),
+ NLMSG_PAYLOAD(m->hdr, hlen));
+}
+
+int sd_netlink_message_rewind(sd_netlink_message *m, sd_netlink *nl) {
+ size_t size;
+ int r;
+
+ assert_return(m, -EINVAL);
+ assert_return(nl, -EINVAL);
+
+ /* don't allow appending to message once parsed */
+ message_seal(m);
+
+ for (unsigned i = 1; i <= m->n_containers; i++)
+ m->containers[i].attributes = mfree(m->containers[i].attributes);
+
+ m->n_containers = 0;
+
+ if (m->containers[0].attributes)
+ /* top-level attributes have already been parsed */
+ return 0;
+
+ assert(m->hdr);
+
+ r = netlink_get_policy_set_and_header_size(nl, m->hdr->nlmsg_type,
+ &m->containers[0].policy_set, &size);
+ if (r < 0)
+ return r;
+
+ if (sd_netlink_message_is_error(m))
+ return netlink_message_parse_error(m);
+
+ return netlink_container_parse(m,
+ &m->containers[0],
+ (struct rtattr*)((uint8_t*) NLMSG_DATA(m->hdr) + NLMSG_ALIGN(size)),
+ NLMSG_PAYLOAD(m->hdr, size));
+}
+
+void message_seal(sd_netlink_message *m) {
+ assert(m);
+
+ m->sealed = true;
+}
+
+sd_netlink_message *sd_netlink_message_next(sd_netlink_message *m) {
+ assert_return(m, NULL);
+
+ return m->next;
+}
diff --git a/src/libsystemd/sd-netlink/netlink-slot.c b/src/libsystemd/sd-netlink/netlink-slot.c
new file mode 100644
index 0000000..d85d2cd
--- /dev/null
+++ b/src/libsystemd/sd-netlink/netlink-slot.c
@@ -0,0 +1,188 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+
+#include "sd-netlink.h"
+
+#include "alloc-util.h"
+#include "netlink-internal.h"
+#include "netlink-slot.h"
+#include "string-util.h"
+
+int netlink_slot_allocate(
+ sd_netlink *nl,
+ bool floating,
+ NetlinkSlotType type,
+ size_t extra,
+ void *userdata,
+ const char *description,
+ sd_netlink_slot **ret) {
+
+ _cleanup_free_ sd_netlink_slot *slot = NULL;
+
+ assert(nl);
+ assert(ret);
+
+ slot = malloc0(offsetof(sd_netlink_slot, reply_callback) + extra);
+ if (!slot)
+ return -ENOMEM;
+
+ slot->n_ref = 1;
+ slot->netlink = nl;
+ slot->userdata = userdata;
+ slot->type = type;
+ slot->floating = floating;
+
+ if (description) {
+ slot->description = strdup(description);
+ if (!slot->description)
+ return -ENOMEM;
+ }
+
+ if (!floating)
+ sd_netlink_ref(nl);
+
+ LIST_PREPEND(slots, nl->slots, slot);
+
+ *ret = TAKE_PTR(slot);
+
+ return 0;
+}
+
+void netlink_slot_disconnect(sd_netlink_slot *slot, bool unref) {
+ sd_netlink *nl;
+
+ assert(slot);
+
+ nl = slot->netlink;
+ if (!nl)
+ return;
+
+ switch (slot->type) {
+
+ case NETLINK_REPLY_CALLBACK:
+ (void) hashmap_remove(nl->reply_callbacks, &slot->reply_callback.serial);
+
+ if (slot->reply_callback.timeout != USEC_INFINITY)
+ prioq_remove(nl->reply_callbacks_prioq, &slot->reply_callback, &slot->reply_callback.prioq_idx);
+
+ break;
+ case NETLINK_MATCH_CALLBACK:
+ LIST_REMOVE(match_callbacks, nl->match_callbacks, &slot->match_callback);
+
+ for (size_t i = 0; i < slot->match_callback.n_groups; i++)
+ (void) socket_broadcast_group_unref(nl, slot->match_callback.groups[i]);
+
+ slot->match_callback.n_groups = 0;
+ slot->match_callback.groups = mfree(slot->match_callback.groups);
+
+ break;
+ default:
+ assert_not_reached();
+ }
+
+ slot->type = _NETLINK_SLOT_INVALID;
+ slot->netlink = NULL;
+ LIST_REMOVE(slots, nl->slots, slot);
+
+ if (!slot->floating)
+ sd_netlink_unref(nl);
+ else if (unref)
+ sd_netlink_slot_unref(slot);
+}
+
+static sd_netlink_slot* netlink_slot_free(sd_netlink_slot *slot) {
+ assert(slot);
+
+ netlink_slot_disconnect(slot, false);
+
+ if (slot->destroy_callback)
+ slot->destroy_callback(slot->userdata);
+
+ free(slot->description);
+ return mfree(slot);
+}
+
+DEFINE_TRIVIAL_REF_UNREF_FUNC(sd_netlink_slot, sd_netlink_slot, netlink_slot_free);
+
+sd_netlink *sd_netlink_slot_get_netlink(sd_netlink_slot *slot) {
+ assert_return(slot, NULL);
+
+ return slot->netlink;
+}
+
+void *sd_netlink_slot_get_userdata(sd_netlink_slot *slot) {
+ assert_return(slot, NULL);
+
+ return slot->userdata;
+}
+
+void *sd_netlink_slot_set_userdata(sd_netlink_slot *slot, void *userdata) {
+ void *ret;
+
+ assert_return(slot, NULL);
+
+ ret = slot->userdata;
+ slot->userdata = userdata;
+
+ return ret;
+}
+
+int sd_netlink_slot_get_destroy_callback(sd_netlink_slot *slot, sd_netlink_destroy_t *callback) {
+ assert_return(slot, -EINVAL);
+
+ if (callback)
+ *callback = slot->destroy_callback;
+
+ return !!slot->destroy_callback;
+}
+
+int sd_netlink_slot_set_destroy_callback(sd_netlink_slot *slot, sd_netlink_destroy_t callback) {
+ assert_return(slot, -EINVAL);
+
+ slot->destroy_callback = callback;
+ return 0;
+}
+
+int sd_netlink_slot_get_floating(sd_netlink_slot *slot) {
+ assert_return(slot, -EINVAL);
+
+ return slot->floating;
+}
+
+int sd_netlink_slot_set_floating(sd_netlink_slot *slot, int b) {
+ assert_return(slot, -EINVAL);
+
+ if (slot->floating == !!b)
+ return 0;
+
+ if (!slot->netlink) /* Already disconnected */
+ return -ESTALE;
+
+ slot->floating = b;
+
+ if (b) {
+ sd_netlink_slot_ref(slot);
+ sd_netlink_unref(slot->netlink);
+ } else {
+ sd_netlink_ref(slot->netlink);
+ sd_netlink_slot_unref(slot);
+ }
+
+ return 1;
+}
+
+int sd_netlink_slot_get_description(sd_netlink_slot *slot, const char **description) {
+ assert_return(slot, -EINVAL);
+
+ if (description)
+ *description = slot->description;
+
+ return !!slot->description;
+}
+
+int sd_netlink_slot_set_description(sd_netlink_slot *slot, const char *description) {
+ assert_return(slot, -EINVAL);
+
+ return free_and_strdup(&slot->description, description);
+}
diff --git a/src/libsystemd/sd-netlink/netlink-slot.h b/src/libsystemd/sd-netlink/netlink-slot.h
new file mode 100644
index 0000000..79de817
--- /dev/null
+++ b/src/libsystemd/sd-netlink/netlink-slot.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "sd-netlink.h"
+
+int netlink_slot_allocate(
+ sd_netlink *nl,
+ bool floating,
+ NetlinkSlotType type,
+ size_t extra,
+ void *userdata,
+ const char *description,
+ sd_netlink_slot **ret);
+void netlink_slot_disconnect(sd_netlink_slot *slot, bool unref);
diff --git a/src/libsystemd/sd-netlink/netlink-socket.c b/src/libsystemd/sd-netlink/netlink-socket.c
new file mode 100644
index 0000000..64cde89
--- /dev/null
+++ b/src/libsystemd/sd-netlink/netlink-socket.c
@@ -0,0 +1,459 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <malloc.h>
+#include <netinet/in.h>
+#include <stdbool.h>
+#include <unistd.h>
+
+#include "sd-netlink.h"
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "format-util.h"
+#include "iovec-util.h"
+#include "netlink-internal.h"
+#include "netlink-types.h"
+#include "socket-util.h"
+
+static int broadcast_groups_get(sd_netlink *nl) {
+ _cleanup_free_ uint32_t *groups = NULL;
+ socklen_t len = 0, old_len;
+ int r;
+
+ assert(nl);
+ assert(nl->fd >= 0);
+
+ if (getsockopt(nl->fd, SOL_NETLINK, NETLINK_LIST_MEMBERSHIPS, NULL, &len) < 0) {
+ if (errno != ENOPROTOOPT)
+ return -errno;
+
+ nl->broadcast_group_dont_leave = true;
+ return 0;
+ }
+
+ if (len == 0)
+ return 0;
+
+ groups = new0(uint32_t, len);
+ if (!groups)
+ return -ENOMEM;
+
+ old_len = len;
+
+ if (getsockopt(nl->fd, SOL_NETLINK, NETLINK_LIST_MEMBERSHIPS, groups, &len) < 0)
+ return -errno;
+
+ if (old_len != len)
+ return -EIO;
+
+ for (unsigned i = 0; i < len; i++)
+ for (unsigned j = 0; j < sizeof(uint32_t) * 8; j++)
+ if (groups[i] & (1U << j)) {
+ unsigned group = i * sizeof(uint32_t) * 8 + j + 1;
+
+ r = hashmap_ensure_put(&nl->broadcast_group_refs, NULL, UINT_TO_PTR(group), UINT_TO_PTR(1));
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+int socket_bind(sd_netlink *nl) {
+ socklen_t addrlen;
+ int r;
+
+ r = setsockopt_int(nl->fd, SOL_NETLINK, NETLINK_PKTINFO, true);
+ if (r < 0)
+ return r;
+
+ addrlen = sizeof(nl->sockaddr);
+
+ /* ignore EINVAL to allow binding an already bound socket */
+ if (bind(nl->fd, &nl->sockaddr.sa, addrlen) < 0 && errno != EINVAL)
+ return -errno;
+
+ if (getsockname(nl->fd, &nl->sockaddr.sa, &addrlen) < 0)
+ return -errno;
+
+ return broadcast_groups_get(nl);
+}
+
+static unsigned broadcast_group_get_ref(sd_netlink *nl, unsigned group) {
+ assert(nl);
+
+ return PTR_TO_UINT(hashmap_get(nl->broadcast_group_refs, UINT_TO_PTR(group)));
+}
+
+static int broadcast_group_set_ref(sd_netlink *nl, unsigned group, unsigned n_ref) {
+ int r;
+
+ assert(nl);
+
+ r = hashmap_ensure_allocated(&nl->broadcast_group_refs, NULL);
+ if (r < 0)
+ return r;
+
+ return hashmap_replace(nl->broadcast_group_refs, UINT_TO_PTR(group), UINT_TO_PTR(n_ref));
+}
+
+static int broadcast_group_join(sd_netlink *nl, unsigned group) {
+ assert(nl);
+ assert(nl->fd >= 0);
+ assert(group > 0);
+
+ /* group is "unsigned", but netlink(7) says the argument for NETLINK_ADD_MEMBERSHIP is "int" */
+ return setsockopt_int(nl->fd, SOL_NETLINK, NETLINK_ADD_MEMBERSHIP, group);
+}
+
+int socket_broadcast_group_ref(sd_netlink *nl, unsigned group) {
+ unsigned n_ref;
+ int r;
+
+ assert(nl);
+
+ n_ref = broadcast_group_get_ref(nl, group);
+
+ n_ref++;
+
+ r = broadcast_group_set_ref(nl, group, n_ref);
+ if (r < 0)
+ return r;
+
+ if (n_ref > 1)
+ /* already in the group */
+ return 0;
+
+ return broadcast_group_join(nl, group);
+}
+
+static int broadcast_group_leave(sd_netlink *nl, unsigned group) {
+ assert(nl);
+ assert(nl->fd >= 0);
+ assert(group > 0);
+
+ if (nl->broadcast_group_dont_leave)
+ return 0;
+
+ /* group is "unsigned", but netlink(7) says the argument for NETLINK_DROP_MEMBERSHIP is "int" */
+ return setsockopt_int(nl->fd, SOL_NETLINK, NETLINK_DROP_MEMBERSHIP, group);
+}
+
+int socket_broadcast_group_unref(sd_netlink *nl, unsigned group) {
+ unsigned n_ref;
+ int r;
+
+ assert(nl);
+
+ n_ref = broadcast_group_get_ref(nl, group);
+ if (n_ref == 0)
+ return 0;
+
+ n_ref--;
+
+ r = broadcast_group_set_ref(nl, group, n_ref);
+ if (r < 0)
+ return r;
+
+ if (n_ref > 0)
+ /* still refs left */
+ return 0;
+
+ return broadcast_group_leave(nl, group);
+}
+
+/* returns the number of bytes sent, or a negative error code */
+int socket_write_message(sd_netlink *nl, sd_netlink_message *m) {
+ union sockaddr_union addr = {
+ .nl.nl_family = AF_NETLINK,
+ };
+ ssize_t k;
+
+ assert(nl);
+ assert(m);
+ assert(m->hdr);
+
+ k = sendto(nl->fd, m->hdr, m->hdr->nlmsg_len, 0, &addr.sa, sizeof(addr));
+ if (k < 0)
+ return -errno;
+
+ return k;
+}
+
+static int socket_recv_message(int fd, void *buf, size_t buf_size, uint32_t *ret_mcast_group, bool peek) {
+ struct iovec iov = IOVEC_MAKE(buf, buf_size);
+ union sockaddr_union sender;
+ CMSG_BUFFER_TYPE(CMSG_SPACE(sizeof(struct nl_pktinfo))) control;
+ struct msghdr msg = {
+ .msg_iov = &iov,
+ .msg_iovlen = 1,
+ .msg_name = &sender,
+ .msg_namelen = sizeof(sender),
+ .msg_control = &control,
+ .msg_controllen = sizeof(control),
+ };
+ ssize_t n;
+
+ assert(fd >= 0);
+ assert(peek || (buf && buf_size > 0));
+
+ n = recvmsg_safe(fd, &msg, MSG_TRUNC | (peek ? MSG_PEEK : 0));
+ if (n == -ENOBUFS)
+ return log_debug_errno(n, "sd-netlink: kernel receive buffer overrun");
+ else if (ERRNO_IS_NEG_TRANSIENT(n)) {
+ if (ret_mcast_group)
+ *ret_mcast_group = 0;
+ return 0;
+ } else if (n < 0)
+ return (int) n;
+
+ if (sender.nl.nl_pid != 0) {
+ /* not from the kernel, ignore */
+ log_debug("sd-netlink: ignoring message from PID %"PRIu32, sender.nl.nl_pid);
+
+ if (peek) {
+ /* drop the message */
+ n = recvmsg_safe(fd, &msg, 0);
+ if (n < 0)
+ return (int) n;
+ }
+
+ if (ret_mcast_group)
+ *ret_mcast_group = 0;
+ return 0;
+ }
+
+ if (!peek && (size_t) n > buf_size) /* message did not fit in read buffer */
+ return -EIO;
+
+ if (ret_mcast_group) {
+ struct nl_pktinfo *pi;
+
+ pi = CMSG_FIND_DATA(&msg, SOL_NETLINK, NETLINK_PKTINFO, struct nl_pktinfo);
+ if (pi)
+ *ret_mcast_group = pi->group;
+ else
+ *ret_mcast_group = 0;
+ }
+
+ return (int) n;
+}
+
+DEFINE_PRIVATE_HASH_OPS_WITH_VALUE_DESTRUCTOR(
+ netlink_message_hash_ops,
+ void, trivial_hash_func, trivial_compare_func,
+ sd_netlink_message, sd_netlink_message_unref);
+
+static int netlink_queue_received_message(sd_netlink *nl, sd_netlink_message *m) {
+ uint32_t serial;
+ int r;
+
+ assert(nl);
+ assert(m);
+
+ if (ordered_set_size(nl->rqueue) >= NETLINK_RQUEUE_MAX)
+ return log_debug_errno(SYNTHETIC_ERRNO(ENOBUFS),
+ "sd-netlink: exhausted the read queue size (%d)", NETLINK_RQUEUE_MAX);
+
+ r = ordered_set_ensure_put(&nl->rqueue, &netlink_message_hash_ops, m);
+ if (r < 0)
+ return r;
+
+ sd_netlink_message_ref(m);
+
+ if (sd_netlink_message_is_broadcast(m))
+ return 0;
+
+ serial = message_get_serial(m);
+ if (serial == 0)
+ return 0;
+
+ if (sd_netlink_message_get_errno(m) < 0) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *old = NULL;
+
+ old = hashmap_remove(nl->rqueue_by_serial, UINT32_TO_PTR(serial));
+ if (old)
+ log_debug("sd-netlink: received error message with serial %"PRIu32", but another message with "
+ "the same serial is already stored in the read queue, replacing.", serial);
+ }
+
+ r = hashmap_ensure_put(&nl->rqueue_by_serial, &netlink_message_hash_ops, UINT32_TO_PTR(serial), m);
+ if (r == -EEXIST) {
+ if (!sd_netlink_message_is_error(m))
+ log_debug("sd-netlink: received message with serial %"PRIu32", but another message with "
+ "the same serial is already stored in the read queue, ignoring.", serial);
+ return 0;
+ }
+ if (r < 0) {
+ sd_netlink_message_unref(ordered_set_remove(nl->rqueue, m));
+ return r;
+ }
+
+ sd_netlink_message_ref(m);
+ return 0;
+}
+
+static int netlink_queue_partially_received_message(sd_netlink *nl, sd_netlink_message *m) {
+ uint32_t serial;
+ int r;
+
+ assert(nl);
+ assert(m);
+ assert(m->hdr->nlmsg_flags & NLM_F_MULTI);
+
+ if (hashmap_size(nl->rqueue_partial_by_serial) >= NETLINK_RQUEUE_MAX)
+ return log_debug_errno(SYNTHETIC_ERRNO(ENOBUFS),
+ "sd-netlink: exhausted the partial read queue size (%d)", NETLINK_RQUEUE_MAX);
+
+ serial = message_get_serial(m);
+ r = hashmap_ensure_put(&nl->rqueue_partial_by_serial, &netlink_message_hash_ops, UINT32_TO_PTR(serial), m);
+ if (r < 0)
+ return r;
+
+ sd_netlink_message_ref(m);
+ return 0;
+}
+
+static int parse_message_one(sd_netlink *nl, uint32_t group, const struct nlmsghdr *hdr, sd_netlink_message **ret) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL;
+ size_t size;
+ int r;
+
+ assert(nl);
+ assert(hdr);
+ assert(ret);
+
+ /* not broadcast and not for us */
+ if (group == 0 && hdr->nlmsg_pid != nl->sockaddr.nl.nl_pid)
+ goto finalize;
+
+ /* silently drop noop messages */
+ if (hdr->nlmsg_type == NLMSG_NOOP)
+ goto finalize;
+
+ /* check that we support this message type */
+ r = netlink_get_policy_set_and_header_size(nl, hdr->nlmsg_type, NULL, &size);
+ if (r == -EOPNOTSUPP) {
+ log_debug("sd-netlink: ignored message with unknown type: %i", hdr->nlmsg_type);
+ goto finalize;
+ }
+ if (r < 0)
+ return r;
+
+ /* check that the size matches the message type */
+ if (hdr->nlmsg_len < NLMSG_LENGTH(size)) {
+ log_debug("sd-netlink: message is shorter than expected, dropping.");
+ goto finalize;
+ }
+
+ r = message_new_empty(nl, &m);
+ if (r < 0)
+ return r;
+
+ m->multicast_group = group;
+ m->hdr = memdup(hdr, hdr->nlmsg_len);
+ if (!m->hdr)
+ return -ENOMEM;
+
+ /* seal and parse the top-level message */
+ r = sd_netlink_message_rewind(m, nl);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(m);
+ return 1;
+
+finalize:
+ *ret = NULL;
+ return 0;
+}
+
+/* On success, the number of bytes received is returned and *ret points to the received message
+ * which has a valid header and the correct size.
+ * If nothing useful was received 0 is returned.
+ * On failure, a negative error code is returned.
+ */
+int socket_read_message(sd_netlink *nl) {
+ bool done = false;
+ uint32_t group;
+ size_t len;
+ int r;
+
+ assert(nl);
+
+ /* read nothing, just get the pending message size */
+ r = socket_recv_message(nl->fd, NULL, 0, NULL, true);
+ if (r <= 0)
+ return r;
+ len = (size_t) r;
+
+ /* make room for the pending message */
+ if (!greedy_realloc((void**) &nl->rbuffer, len, sizeof(uint8_t)))
+ return -ENOMEM;
+
+ /* read the pending message */
+ r = socket_recv_message(nl->fd, nl->rbuffer, MALLOC_SIZEOF_SAFE(nl->rbuffer), &group, false);
+ if (r <= 0)
+ return r;
+ len = (size_t) r;
+
+ if (!NLMSG_OK(nl->rbuffer, len)) {
+ log_debug("sd-netlink: received invalid message, discarding %zu bytes of incoming message", len);
+ return 0;
+ }
+
+ for (struct nlmsghdr *hdr = nl->rbuffer; NLMSG_OK(hdr, len); hdr = NLMSG_NEXT(hdr, len)) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL;
+
+ r = parse_message_one(nl, group, hdr, &m);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ continue;
+
+ if (hdr->nlmsg_flags & NLM_F_MULTI) {
+ if (hdr->nlmsg_type == NLMSG_DONE) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *existing = NULL;
+
+ /* finished reading multi-part message */
+ existing = hashmap_remove(nl->rqueue_partial_by_serial, UINT32_TO_PTR(hdr->nlmsg_seq));
+
+ /* if we receive only NLMSG_DONE, put it into the receive queue. */
+ r = netlink_queue_received_message(nl, existing ?: m);
+ if (r < 0)
+ return r;
+
+ done = true;
+ } else {
+ sd_netlink_message *existing;
+
+ existing = hashmap_get(nl->rqueue_partial_by_serial, UINT32_TO_PTR(hdr->nlmsg_seq));
+ if (existing) {
+ /* This is the continuation of the previously read messages.
+ * Let's append this message at the end. */
+ while (existing->next)
+ existing = existing->next;
+ existing->next = TAKE_PTR(m);
+ } else {
+ /* This is the first message. Put it into the queue for partially
+ * received messages. */
+ r = netlink_queue_partially_received_message(nl, m);
+ if (r < 0)
+ return r;
+ }
+ }
+
+ } else {
+ r = netlink_queue_received_message(nl, m);
+ if (r < 0)
+ return r;
+
+ done = true;
+ }
+ }
+
+ if (len > 0)
+ log_debug("sd-netlink: discarding trailing %zu bytes of incoming message", len);
+
+ return done;
+}
diff --git a/src/libsystemd/sd-netlink/netlink-types-genl.c b/src/libsystemd/sd-netlink/netlink-types-genl.c
new file mode 100644
index 0000000..6fe9adc
--- /dev/null
+++ b/src/libsystemd/sd-netlink/netlink-types-genl.c
@@ -0,0 +1,251 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <netinet/in.h>
+#include <sys/socket.h>
+#include <linux/batman_adv.h>
+#include <linux/fou.h>
+#include <linux/genetlink.h>
+#include <linux/if.h>
+#include <linux/if_macsec.h>
+#include <linux/l2tp.h>
+#include <linux/nl80211.h>
+#include <linux/wireguard.h>
+
+#include "missing_network.h"
+#include "netlink-genl.h"
+#include "netlink-types-internal.h"
+
+/***************** genl ctrl type systems *****************/
+static const NLAPolicy genl_ctrl_mcast_group_policies[] = {
+ [CTRL_ATTR_MCAST_GRP_NAME] = BUILD_POLICY(STRING),
+ [CTRL_ATTR_MCAST_GRP_ID] = BUILD_POLICY(U32),
+};
+
+DEFINE_POLICY_SET(genl_ctrl_mcast_group);
+
+static const NLAPolicy genl_ctrl_ops_policies[] = {
+ [CTRL_ATTR_OP_ID] = BUILD_POLICY(U32),
+ [CTRL_ATTR_OP_FLAGS] = BUILD_POLICY(U32),
+};
+
+DEFINE_POLICY_SET(genl_ctrl_ops);
+
+static const NLAPolicy genl_ctrl_policies[] = {
+ [CTRL_ATTR_FAMILY_ID] = BUILD_POLICY(U16),
+ [CTRL_ATTR_FAMILY_NAME] = BUILD_POLICY(STRING),
+ [CTRL_ATTR_VERSION] = BUILD_POLICY(U32),
+ [CTRL_ATTR_HDRSIZE] = BUILD_POLICY(U32),
+ [CTRL_ATTR_MAXATTR] = BUILD_POLICY(U32),
+ [CTRL_ATTR_OPS] = BUILD_POLICY_NESTED(genl_ctrl_ops),
+ [CTRL_ATTR_MCAST_GROUPS] = BUILD_POLICY_NESTED(genl_ctrl_mcast_group),
+ /*
+ [CTRL_ATTR_POLICY] = { .type = NETLINK_TYPE_NESTED, },
+ [CTRL_ATTR_OP_POLICY] = { .type = NETLINK_TYPE_NESTED, }
+ */
+ [CTRL_ATTR_OP] = BUILD_POLICY(U32),
+};
+
+/***************** genl batadv type systems *****************/
+static const NLAPolicy genl_batadv_policies[] = {
+ [BATADV_ATTR_VERSION] = BUILD_POLICY(STRING),
+ [BATADV_ATTR_ALGO_NAME] = BUILD_POLICY(STRING),
+ [BATADV_ATTR_MESH_IFINDEX] = BUILD_POLICY(U32),
+ [BATADV_ATTR_MESH_IFNAME] = BUILD_POLICY_WITH_SIZE(STRING, IFNAMSIZ),
+ [BATADV_ATTR_MESH_ADDRESS] = BUILD_POLICY_WITH_SIZE(ETHER_ADDR, ETH_ALEN),
+ [BATADV_ATTR_HARD_IFINDEX] = BUILD_POLICY(U32),
+ [BATADV_ATTR_HARD_IFNAME] = BUILD_POLICY_WITH_SIZE(STRING, IFNAMSIZ),
+ [BATADV_ATTR_HARD_ADDRESS] = BUILD_POLICY_WITH_SIZE(ETHER_ADDR, ETH_ALEN),
+ [BATADV_ATTR_ORIG_ADDRESS] = BUILD_POLICY_WITH_SIZE(ETHER_ADDR, ETH_ALEN),
+ [BATADV_ATTR_TPMETER_RESULT] = BUILD_POLICY(U8),
+ [BATADV_ATTR_TPMETER_TEST_TIME] = BUILD_POLICY(U32),
+ [BATADV_ATTR_TPMETER_BYTES] = BUILD_POLICY(U64),
+ [BATADV_ATTR_TPMETER_COOKIE] = BUILD_POLICY(U32),
+ [BATADV_ATTR_PAD] = BUILD_POLICY(UNSPEC),
+ [BATADV_ATTR_ACTIVE] = BUILD_POLICY(FLAG),
+ [BATADV_ATTR_TT_ADDRESS] = BUILD_POLICY_WITH_SIZE(ETHER_ADDR, ETH_ALEN),
+ [BATADV_ATTR_TT_TTVN] = BUILD_POLICY(U8),
+ [BATADV_ATTR_TT_LAST_TTVN] = BUILD_POLICY(U8),
+ [BATADV_ATTR_TT_CRC32] = BUILD_POLICY(U32),
+ [BATADV_ATTR_TT_VID] = BUILD_POLICY(U16),
+ [BATADV_ATTR_TT_FLAGS] = BUILD_POLICY(U32),
+ [BATADV_ATTR_FLAG_BEST] = BUILD_POLICY(FLAG),
+ [BATADV_ATTR_LAST_SEEN_MSECS] = BUILD_POLICY(U32),
+ [BATADV_ATTR_NEIGH_ADDRESS] = BUILD_POLICY_WITH_SIZE(ETHER_ADDR, ETH_ALEN),
+ [BATADV_ATTR_TQ] = BUILD_POLICY(U8),
+ [BATADV_ATTR_THROUGHPUT] = BUILD_POLICY(U32),
+ [BATADV_ATTR_BANDWIDTH_UP] = BUILD_POLICY(U32),
+ [BATADV_ATTR_BANDWIDTH_DOWN] = BUILD_POLICY(U32),
+ [BATADV_ATTR_ROUTER] = BUILD_POLICY_WITH_SIZE(ETHER_ADDR, ETH_ALEN),
+ [BATADV_ATTR_BLA_OWN] = BUILD_POLICY(FLAG),
+ [BATADV_ATTR_BLA_ADDRESS] = BUILD_POLICY_WITH_SIZE(ETHER_ADDR, ETH_ALEN),
+ [BATADV_ATTR_BLA_VID] = BUILD_POLICY(U16),
+ [BATADV_ATTR_BLA_BACKBONE] = BUILD_POLICY_WITH_SIZE(ETHER_ADDR, ETH_ALEN),
+ [BATADV_ATTR_BLA_CRC] = BUILD_POLICY(U16),
+ [BATADV_ATTR_DAT_CACHE_IP4ADDRESS] = BUILD_POLICY(U32),
+ [BATADV_ATTR_DAT_CACHE_HWADDRESS] = BUILD_POLICY_WITH_SIZE(ETHER_ADDR, ETH_ALEN),
+ [BATADV_ATTR_DAT_CACHE_VID] = BUILD_POLICY(U16),
+ [BATADV_ATTR_MCAST_FLAGS] = BUILD_POLICY(U32),
+ [BATADV_ATTR_MCAST_FLAGS_PRIV] = BUILD_POLICY(U32),
+ [BATADV_ATTR_VLANID] = BUILD_POLICY(U16),
+ [BATADV_ATTR_AGGREGATED_OGMS_ENABLED] = BUILD_POLICY(U8),
+ [BATADV_ATTR_AP_ISOLATION_ENABLED] = BUILD_POLICY(U8),
+ [BATADV_ATTR_ISOLATION_MARK] = BUILD_POLICY(U32),
+ [BATADV_ATTR_ISOLATION_MASK] = BUILD_POLICY(U32),
+ [BATADV_ATTR_BONDING_ENABLED] = BUILD_POLICY(U8),
+ [BATADV_ATTR_BRIDGE_LOOP_AVOIDANCE_ENABLED] = BUILD_POLICY(U8),
+ [BATADV_ATTR_DISTRIBUTED_ARP_TABLE_ENABLED] = BUILD_POLICY(U8),
+ [BATADV_ATTR_FRAGMENTATION_ENABLED] = BUILD_POLICY(U8),
+ [BATADV_ATTR_GW_BANDWIDTH_DOWN] = BUILD_POLICY(U32),
+ [BATADV_ATTR_GW_BANDWIDTH_UP] = BUILD_POLICY(U32),
+ [BATADV_ATTR_GW_MODE] = BUILD_POLICY(U8),
+ [BATADV_ATTR_GW_SEL_CLASS] = BUILD_POLICY(U32),
+ [BATADV_ATTR_HOP_PENALTY] = BUILD_POLICY(U8),
+ [BATADV_ATTR_LOG_LEVEL] = BUILD_POLICY(U32),
+ [BATADV_ATTR_MULTICAST_FORCEFLOOD_ENABLED] = BUILD_POLICY(U8),
+ [BATADV_ATTR_MULTICAST_FANOUT] = BUILD_POLICY(U32),
+ [BATADV_ATTR_NETWORK_CODING_ENABLED] = BUILD_POLICY(U8),
+ [BATADV_ATTR_ORIG_INTERVAL] = BUILD_POLICY(U32),
+ [BATADV_ATTR_ELP_INTERVAL] = BUILD_POLICY(U32),
+ [BATADV_ATTR_THROUGHPUT_OVERRIDE] = BUILD_POLICY(U32),
+};
+
+/***************** genl fou type systems *****************/
+static const NLAPolicy genl_fou_policies[] = {
+ [FOU_ATTR_PORT] = BUILD_POLICY(U16),
+ [FOU_ATTR_AF] = BUILD_POLICY(U8),
+ [FOU_ATTR_IPPROTO] = BUILD_POLICY(U8),
+ [FOU_ATTR_TYPE] = BUILD_POLICY(U8),
+ [FOU_ATTR_REMCSUM_NOPARTIAL] = BUILD_POLICY(FLAG),
+ [FOU_ATTR_LOCAL_V4] = BUILD_POLICY_WITH_SIZE(IN_ADDR, sizeof(struct in_addr)),
+ [FOU_ATTR_PEER_V4] = BUILD_POLICY_WITH_SIZE(IN_ADDR, sizeof(struct in_addr)),
+ [FOU_ATTR_LOCAL_V6] = BUILD_POLICY_WITH_SIZE(IN_ADDR, sizeof(struct in6_addr)),
+ [FOU_ATTR_PEER_V6] = BUILD_POLICY_WITH_SIZE(IN_ADDR, sizeof(struct in6_addr)),
+ [FOU_ATTR_PEER_PORT] = BUILD_POLICY(U16),
+ [FOU_ATTR_IFINDEX] = BUILD_POLICY(U32),
+};
+
+/***************** genl l2tp type systems *****************/
+static const NLAPolicy genl_l2tp_policies[] = {
+ [L2TP_ATTR_PW_TYPE] = BUILD_POLICY(U16),
+ [L2TP_ATTR_ENCAP_TYPE] = BUILD_POLICY(U16),
+ [L2TP_ATTR_OFFSET] = BUILD_POLICY(U16),
+ [L2TP_ATTR_DATA_SEQ] = BUILD_POLICY(U16),
+ [L2TP_ATTR_L2SPEC_TYPE] = BUILD_POLICY(U8),
+ [L2TP_ATTR_L2SPEC_LEN] = BUILD_POLICY(U8),
+ [L2TP_ATTR_PROTO_VERSION] = BUILD_POLICY(U8),
+ [L2TP_ATTR_IFNAME] = BUILD_POLICY(STRING),
+ [L2TP_ATTR_CONN_ID] = BUILD_POLICY(U32),
+ [L2TP_ATTR_PEER_CONN_ID] = BUILD_POLICY(U32),
+ [L2TP_ATTR_SESSION_ID] = BUILD_POLICY(U32),
+ [L2TP_ATTR_PEER_SESSION_ID] = BUILD_POLICY(U32),
+ [L2TP_ATTR_UDP_CSUM] = BUILD_POLICY(U8),
+ [L2TP_ATTR_VLAN_ID] = BUILD_POLICY(U16),
+ [L2TP_ATTR_RECV_SEQ] = BUILD_POLICY(U8),
+ [L2TP_ATTR_SEND_SEQ] = BUILD_POLICY(U8),
+ [L2TP_ATTR_LNS_MODE] = BUILD_POLICY(U8),
+ [L2TP_ATTR_USING_IPSEC] = BUILD_POLICY(U8),
+ [L2TP_ATTR_FD] = BUILD_POLICY(U32),
+ [L2TP_ATTR_IP_SADDR] = BUILD_POLICY_WITH_SIZE(IN_ADDR, sizeof(struct in_addr)),
+ [L2TP_ATTR_IP_DADDR] = BUILD_POLICY_WITH_SIZE(IN_ADDR, sizeof(struct in_addr)),
+ [L2TP_ATTR_UDP_SPORT] = BUILD_POLICY(U16),
+ [L2TP_ATTR_UDP_DPORT] = BUILD_POLICY(U16),
+ [L2TP_ATTR_IP6_SADDR] = BUILD_POLICY_WITH_SIZE(IN_ADDR, sizeof(struct in6_addr)),
+ [L2TP_ATTR_IP6_DADDR] = BUILD_POLICY_WITH_SIZE(IN_ADDR, sizeof(struct in6_addr)),
+ [L2TP_ATTR_UDP_ZERO_CSUM6_TX] = BUILD_POLICY(FLAG),
+ [L2TP_ATTR_UDP_ZERO_CSUM6_RX] = BUILD_POLICY(FLAG),
+};
+
+/***************** genl macsec type systems *****************/
+static const NLAPolicy genl_macsec_rxsc_policies[] = {
+ [MACSEC_RXSC_ATTR_SCI] = BUILD_POLICY(U64),
+};
+
+DEFINE_POLICY_SET(genl_macsec_rxsc);
+
+static const NLAPolicy genl_macsec_sa_policies[] = {
+ [MACSEC_SA_ATTR_AN] = BUILD_POLICY(U8),
+ [MACSEC_SA_ATTR_ACTIVE] = BUILD_POLICY(U8),
+ [MACSEC_SA_ATTR_PN] = BUILD_POLICY(U32),
+ [MACSEC_SA_ATTR_KEYID] = BUILD_POLICY_WITH_SIZE(BINARY, MACSEC_KEYID_LEN),
+ [MACSEC_SA_ATTR_KEY] = BUILD_POLICY_WITH_SIZE(BINARY, MACSEC_MAX_KEY_LEN),
+};
+
+DEFINE_POLICY_SET(genl_macsec_sa);
+
+static const NLAPolicy genl_macsec_policies[] = {
+ [MACSEC_ATTR_IFINDEX] = BUILD_POLICY(U32),
+ [MACSEC_ATTR_RXSC_CONFIG] = BUILD_POLICY_NESTED(genl_macsec_rxsc),
+ [MACSEC_ATTR_SA_CONFIG] = BUILD_POLICY_NESTED(genl_macsec_sa),
+};
+
+/***************** genl NetLabel type systems *****************/
+static const NLAPolicy genl_netlabel_policies[] = {
+ [NLBL_UNLABEL_A_IPV4ADDR] = BUILD_POLICY(IN_ADDR),
+ [NLBL_UNLABEL_A_IPV4MASK] = BUILD_POLICY(IN_ADDR),
+ [NLBL_UNLABEL_A_IPV6ADDR] = BUILD_POLICY_WITH_SIZE(IN_ADDR, sizeof(struct in6_addr)),
+ [NLBL_UNLABEL_A_IPV6MASK] = BUILD_POLICY_WITH_SIZE(IN_ADDR, sizeof(struct in6_addr)),
+ [NLBL_UNLABEL_A_IFACE] = BUILD_POLICY_WITH_SIZE(STRING, IFNAMSIZ-1),
+ [NLBL_UNLABEL_A_SECCTX] = BUILD_POLICY(STRING),
+};
+
+/***************** genl nl80211 type systems *****************/
+static const NLAPolicy genl_nl80211_policies[] = {
+ [NL80211_ATTR_WIPHY] = BUILD_POLICY(U32),
+ [NL80211_ATTR_WIPHY_NAME] = BUILD_POLICY(STRING),
+ [NL80211_ATTR_IFINDEX] = BUILD_POLICY(U32),
+ [NL80211_ATTR_IFNAME] = BUILD_POLICY_WITH_SIZE(STRING, IFNAMSIZ-1),
+ [NL80211_ATTR_IFTYPE] = BUILD_POLICY(U32),
+ [NL80211_ATTR_MAC] = BUILD_POLICY_WITH_SIZE(ETHER_ADDR, ETH_ALEN),
+ [NL80211_ATTR_SSID] = BUILD_POLICY_WITH_SIZE(BINARY, IEEE80211_MAX_SSID_LEN),
+ [NL80211_ATTR_STATUS_CODE] = BUILD_POLICY(U16),
+ [NL80211_ATTR_4ADDR] = BUILD_POLICY(U8),
+};
+
+/***************** genl wireguard type systems *****************/
+static const NLAPolicy genl_wireguard_allowedip_policies[] = {
+ [WGALLOWEDIP_A_FAMILY] = BUILD_POLICY(U16),
+ [WGALLOWEDIP_A_IPADDR] = BUILD_POLICY(IN_ADDR),
+ [WGALLOWEDIP_A_CIDR_MASK] = BUILD_POLICY(U8),
+};
+
+DEFINE_POLICY_SET(genl_wireguard_allowedip);
+
+static const NLAPolicy genl_wireguard_peer_policies[] = {
+ [WGPEER_A_PUBLIC_KEY] = BUILD_POLICY_WITH_SIZE(BINARY, WG_KEY_LEN),
+ [WGPEER_A_FLAGS] = BUILD_POLICY(U32),
+ [WGPEER_A_PRESHARED_KEY] = BUILD_POLICY_WITH_SIZE(BINARY, WG_KEY_LEN),
+ [WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL] = BUILD_POLICY(U16),
+ [WGPEER_A_ENDPOINT] = BUILD_POLICY(SOCKADDR),
+ [WGPEER_A_ALLOWEDIPS] = BUILD_POLICY_NESTED(genl_wireguard_allowedip),
+};
+
+DEFINE_POLICY_SET(genl_wireguard_peer);
+
+static const NLAPolicy genl_wireguard_policies[] = {
+ [WGDEVICE_A_IFINDEX] = BUILD_POLICY(U32),
+ [WGDEVICE_A_IFNAME] = BUILD_POLICY_WITH_SIZE(STRING, IFNAMSIZ-1),
+ [WGDEVICE_A_FLAGS] = BUILD_POLICY(U32),
+ [WGDEVICE_A_PRIVATE_KEY] = BUILD_POLICY_WITH_SIZE(BINARY, WG_KEY_LEN),
+ [WGDEVICE_A_LISTEN_PORT] = BUILD_POLICY(U16),
+ [WGDEVICE_A_FWMARK] = BUILD_POLICY(U32),
+ [WGDEVICE_A_PEERS] = BUILD_POLICY_NESTED(genl_wireguard_peer),
+};
+
+/***************** genl families *****************/
+static const NLAPolicySetUnionElement genl_policy_set_union_elements[] = {
+ BUILD_UNION_ELEMENT_BY_STRING(CTRL_GENL_NAME, genl_ctrl),
+ BUILD_UNION_ELEMENT_BY_STRING(BATADV_NL_NAME, genl_batadv),
+ BUILD_UNION_ELEMENT_BY_STRING(FOU_GENL_NAME, genl_fou),
+ BUILD_UNION_ELEMENT_BY_STRING(L2TP_GENL_NAME, genl_l2tp),
+ BUILD_UNION_ELEMENT_BY_STRING(MACSEC_GENL_NAME, genl_macsec),
+ BUILD_UNION_ELEMENT_BY_STRING(NETLBL_NLTYPE_UNLABELED_NAME, genl_netlabel),
+ BUILD_UNION_ELEMENT_BY_STRING(NL80211_GENL_NAME, genl_nl80211),
+ BUILD_UNION_ELEMENT_BY_STRING(WG_GENL_NAME, genl_wireguard),
+};
+
+/* This is the root type system union, so match_attribute is not necessary. */
+DEFINE_POLICY_SET_UNION(genl, 0);
+
+const NLAPolicySet *genl_get_policy_set_by_name(const char *name) {
+ return policy_set_union_get_policy_set_by_string(&genl_policy_set_union, name);
+}
diff --git a/src/libsystemd/sd-netlink/netlink-types-internal.h b/src/libsystemd/sd-netlink/netlink-types-internal.h
new file mode 100644
index 0000000..1412514
--- /dev/null
+++ b/src/libsystemd/sd-netlink/netlink-types-internal.h
@@ -0,0 +1,66 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "macro.h"
+#include "netlink-types.h"
+
+/* C.f. see 'struct nla_policy' at include/net/netlink.h. */
+struct NLAPolicy {
+ NLAType type;
+ size_t size;
+ union {
+ const NLAPolicySet *policy_set;
+ const NLAPolicySetUnion *policy_set_union;
+ };
+};
+
+struct NLAPolicySet {
+ uint16_t count;
+ const NLAPolicy *policies;
+};
+
+typedef struct NLAPolicySetUnionElement {
+ union {
+ int family; /* used by NETLINK_TYPE_NESTED_UNION_BY_FAMILY */
+ const char *string; /* used by NETLINK_TYPE_NESTED_UNION_BY_STRING */
+ };
+ NLAPolicySet policy_set;
+} NLAPolicySetUnionElement;
+
+struct NLAPolicySetUnion {
+ size_t count;
+ const NLAPolicySetUnionElement *elements;
+ uint16_t match_attribute; /* used by NETLINK_TYPE_NESTED_UNION_BY_STRING */
+};
+
+#define BUILD_POLICY_WITH_SIZE(t, n) \
+ { .type = NETLINK_TYPE_##t, .size = n }
+#define BUILD_POLICY(t) \
+ BUILD_POLICY_WITH_SIZE(t, 0)
+#define BUILD_POLICY_NESTED_WITH_SIZE(name, n) \
+ { .type = NETLINK_TYPE_NESTED, .size = n, .policy_set = &name##_policy_set }
+#define BUILD_POLICY_NESTED(name) \
+ BUILD_POLICY_NESTED_WITH_SIZE(name, 0)
+#define _BUILD_POLICY_NESTED_UNION(name, by) \
+ { .type = NETLINK_TYPE_NESTED_UNION_BY_##by, .policy_set_union = &name##_policy_set_union }
+#define BUILD_POLICY_NESTED_UNION_BY_STRING(name) \
+ _BUILD_POLICY_NESTED_UNION(name, STRING)
+#define BUILD_POLICY_NESTED_UNION_BY_FAMILY(name) \
+ _BUILD_POLICY_NESTED_UNION(name, FAMILY)
+
+#define _BUILD_POLICY_SET(name) \
+ { .count = ELEMENTSOF(name##_policies), .policies = name##_policies }
+#define DEFINE_POLICY_SET(name) \
+ static const NLAPolicySet name##_policy_set = _BUILD_POLICY_SET(name)
+
+# define BUILD_UNION_ELEMENT_BY_STRING(s, name) \
+ { .string = s, .policy_set = _BUILD_POLICY_SET(name) }
+# define BUILD_UNION_ELEMENT_BY_FAMILY(f, name) \
+ { .family = f, .policy_set = _BUILD_POLICY_SET(name) }
+
+#define DEFINE_POLICY_SET_UNION(name, attr) \
+ static const NLAPolicySetUnion name##_policy_set_union = { \
+ .count = ELEMENTSOF(name##_policy_set_union_elements), \
+ .elements = name##_policy_set_union_elements, \
+ .match_attribute = attr, \
+ }
diff --git a/src/libsystemd/sd-netlink/netlink-types-nfnl.c b/src/libsystemd/sd-netlink/netlink-types-nfnl.c
new file mode 100644
index 0000000..8ef4d45
--- /dev/null
+++ b/src/libsystemd/sd-netlink/netlink-types-nfnl.c
@@ -0,0 +1,194 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <linux/if.h>
+#include <linux/netfilter/nf_tables.h>
+#include <linux/netfilter/nfnetlink.h>
+
+#include "netlink-types-internal.h"
+
+static const NLAPolicy nfnl_nft_table_policies[] = {
+ [NFTA_TABLE_NAME] = BUILD_POLICY_WITH_SIZE(STRING, NFT_TABLE_MAXNAMELEN - 1),
+ [NFTA_TABLE_FLAGS] = BUILD_POLICY(U32),
+};
+
+DEFINE_POLICY_SET(nfnl_nft_table);
+
+static const NLAPolicy nfnl_nft_chain_hook_policies[] = {
+ [NFTA_HOOK_HOOKNUM] = BUILD_POLICY(U32),
+ [NFTA_HOOK_PRIORITY] = BUILD_POLICY(U32),
+ [NFTA_HOOK_DEV] = BUILD_POLICY_WITH_SIZE(STRING, IFNAMSIZ - 1),
+};
+
+DEFINE_POLICY_SET(nfnl_nft_chain_hook);
+
+static const NLAPolicy nfnl_nft_chain_policies[] = {
+ [NFTA_CHAIN_TABLE] = BUILD_POLICY_WITH_SIZE(STRING, NFT_TABLE_MAXNAMELEN - 1),
+ [NFTA_CHAIN_NAME] = BUILD_POLICY_WITH_SIZE(STRING, NFT_TABLE_MAXNAMELEN - 1),
+ [NFTA_CHAIN_HOOK] = BUILD_POLICY_NESTED(nfnl_nft_chain_hook),
+ [NFTA_CHAIN_TYPE] = BUILD_POLICY_WITH_SIZE(STRING, 16),
+ [NFTA_CHAIN_FLAGS] = BUILD_POLICY(U32),
+};
+
+DEFINE_POLICY_SET(nfnl_nft_chain);
+
+static const NLAPolicy nfnl_nft_expr_meta_policies[] = {
+ [NFTA_META_DREG] = BUILD_POLICY(U32),
+ [NFTA_META_KEY] = BUILD_POLICY(U32),
+ [NFTA_META_SREG] = BUILD_POLICY(U32),
+};
+
+static const NLAPolicy nfnl_nft_expr_payload_policies[] = {
+ [NFTA_PAYLOAD_DREG] = BUILD_POLICY(U32),
+ [NFTA_PAYLOAD_BASE] = BUILD_POLICY(U32),
+ [NFTA_PAYLOAD_OFFSET] = BUILD_POLICY(U32),
+ [NFTA_PAYLOAD_LEN] = BUILD_POLICY(U32),
+};
+
+static const NLAPolicy nfnl_nft_expr_nat_policies[] = {
+ [NFTA_NAT_TYPE] = BUILD_POLICY(U32),
+ [NFTA_NAT_FAMILY] = BUILD_POLICY(U32),
+ [NFTA_NAT_REG_ADDR_MIN] = BUILD_POLICY(U32),
+ [NFTA_NAT_REG_ADDR_MAX] = BUILD_POLICY(U32),
+ [NFTA_NAT_REG_PROTO_MIN] = BUILD_POLICY(U32),
+ [NFTA_NAT_REG_PROTO_MAX] = BUILD_POLICY(U32),
+ [NFTA_NAT_FLAGS] = BUILD_POLICY(U32),
+};
+
+static const NLAPolicy nfnl_nft_data_policies[] = {
+ [NFTA_DATA_VALUE] = { .type = NETLINK_TYPE_BINARY },
+};
+
+DEFINE_POLICY_SET(nfnl_nft_data);
+
+static const NLAPolicy nfnl_nft_expr_bitwise_policies[] = {
+ [NFTA_BITWISE_SREG] = BUILD_POLICY(U32),
+ [NFTA_BITWISE_DREG] = BUILD_POLICY(U32),
+ [NFTA_BITWISE_LEN] = BUILD_POLICY(U32),
+ [NFTA_BITWISE_MASK] = BUILD_POLICY_NESTED(nfnl_nft_data),
+ [NFTA_BITWISE_XOR] = BUILD_POLICY_NESTED(nfnl_nft_data),
+};
+
+static const NLAPolicy nfnl_nft_expr_cmp_policies[] = {
+ [NFTA_CMP_SREG] = BUILD_POLICY(U32),
+ [NFTA_CMP_OP] = BUILD_POLICY(U32),
+ [NFTA_CMP_DATA] = BUILD_POLICY_NESTED(nfnl_nft_data),
+};
+
+static const NLAPolicy nfnl_nft_expr_fib_policies[] = {
+ [NFTA_FIB_DREG] = BUILD_POLICY(U32),
+ [NFTA_FIB_RESULT] = BUILD_POLICY(U32),
+ [NFTA_FIB_FLAGS] = BUILD_POLICY(U32),
+};
+
+static const NLAPolicy nfnl_nft_expr_lookup_policies[] = {
+ [NFTA_LOOKUP_SET] = { .type = NETLINK_TYPE_STRING },
+ [NFTA_LOOKUP_SREG] = BUILD_POLICY(U32),
+ [NFTA_LOOKUP_DREG] = BUILD_POLICY(U32),
+ [NFTA_LOOKUP_FLAGS] = BUILD_POLICY(U32),
+};
+
+static const NLAPolicy nfnl_nft_expr_masq_policies[] = {
+ [NFTA_MASQ_FLAGS] = BUILD_POLICY(U32),
+ [NFTA_MASQ_REG_PROTO_MIN] = BUILD_POLICY(U32),
+ [NFTA_MASQ_REG_PROTO_MAX] = BUILD_POLICY(U32),
+};
+
+static const NLAPolicySetUnionElement nfnl_expr_data_policy_set_union_elements[] = {
+ BUILD_UNION_ELEMENT_BY_STRING("bitwise", nfnl_nft_expr_bitwise),
+ BUILD_UNION_ELEMENT_BY_STRING("cmp", nfnl_nft_expr_cmp),
+ BUILD_UNION_ELEMENT_BY_STRING("fib", nfnl_nft_expr_fib),
+ BUILD_UNION_ELEMENT_BY_STRING("lookup", nfnl_nft_expr_lookup),
+ BUILD_UNION_ELEMENT_BY_STRING("masq", nfnl_nft_expr_masq),
+ BUILD_UNION_ELEMENT_BY_STRING("meta", nfnl_nft_expr_meta),
+ BUILD_UNION_ELEMENT_BY_STRING("nat", nfnl_nft_expr_nat),
+ BUILD_UNION_ELEMENT_BY_STRING("payload", nfnl_nft_expr_payload),
+};
+
+DEFINE_POLICY_SET_UNION(nfnl_expr_data, NFTA_EXPR_NAME);
+
+static const NLAPolicy nfnl_nft_rule_expr_policies[] = {
+ [NFTA_EXPR_NAME] = BUILD_POLICY_WITH_SIZE(STRING, 16),
+ [NFTA_EXPR_DATA] = BUILD_POLICY_NESTED_UNION_BY_STRING(nfnl_expr_data),
+};
+
+DEFINE_POLICY_SET(nfnl_nft_rule_expr);
+
+static const NLAPolicy nfnl_nft_rule_policies[] = {
+ [NFTA_RULE_TABLE] = BUILD_POLICY_WITH_SIZE(STRING, NFT_TABLE_MAXNAMELEN - 1),
+ [NFTA_RULE_CHAIN] = BUILD_POLICY_WITH_SIZE(STRING, NFT_TABLE_MAXNAMELEN - 1),
+ [NFTA_RULE_EXPRESSIONS] = BUILD_POLICY_NESTED(nfnl_nft_rule_expr),
+};
+
+DEFINE_POLICY_SET(nfnl_nft_rule);
+
+static const NLAPolicy nfnl_nft_set_policies[] = {
+ [NFTA_SET_TABLE] = BUILD_POLICY_WITH_SIZE(STRING, NFT_TABLE_MAXNAMELEN - 1),
+ [NFTA_SET_NAME] = BUILD_POLICY_WITH_SIZE(STRING, NFT_TABLE_MAXNAMELEN - 1),
+ [NFTA_SET_FLAGS] = BUILD_POLICY(U32),
+ [NFTA_SET_KEY_TYPE] = BUILD_POLICY(U32),
+ [NFTA_SET_KEY_LEN] = BUILD_POLICY(U32),
+ [NFTA_SET_DATA_TYPE] = BUILD_POLICY(U32),
+ [NFTA_SET_DATA_LEN] = BUILD_POLICY(U32),
+ [NFTA_SET_POLICY] = BUILD_POLICY(U32),
+ [NFTA_SET_ID] = BUILD_POLICY(U32),
+};
+
+DEFINE_POLICY_SET(nfnl_nft_set);
+
+static const NLAPolicy nfnl_nft_setelem_policies[] = {
+ [NFTA_SET_ELEM_KEY] = BUILD_POLICY_NESTED(nfnl_nft_data),
+ [NFTA_SET_ELEM_DATA] = BUILD_POLICY_NESTED(nfnl_nft_data),
+ [NFTA_SET_ELEM_FLAGS] = BUILD_POLICY(U32),
+};
+
+DEFINE_POLICY_SET(nfnl_nft_setelem);
+
+static const NLAPolicy nfnl_nft_setelem_list_policies[] = {
+ [NFTA_SET_ELEM_LIST_TABLE] = BUILD_POLICY_WITH_SIZE(STRING, NFT_TABLE_MAXNAMELEN - 1),
+ [NFTA_SET_ELEM_LIST_SET] = BUILD_POLICY_WITH_SIZE(STRING, NFT_TABLE_MAXNAMELEN - 1),
+ [NFTA_SET_ELEM_LIST_ELEMENTS] = BUILD_POLICY_NESTED(nfnl_nft_setelem),
+};
+
+DEFINE_POLICY_SET(nfnl_nft_setelem_list);
+
+static const NLAPolicy nfnl_subsys_nft_policies[] = {
+ [NFT_MSG_DELTABLE] = BUILD_POLICY_NESTED_WITH_SIZE(nfnl_nft_table, sizeof(struct nfgenmsg)),
+ [NFT_MSG_NEWTABLE] = BUILD_POLICY_NESTED_WITH_SIZE(nfnl_nft_table, sizeof(struct nfgenmsg)),
+ [NFT_MSG_NEWCHAIN] = BUILD_POLICY_NESTED_WITH_SIZE(nfnl_nft_chain, sizeof(struct nfgenmsg)),
+ [NFT_MSG_NEWRULE] = BUILD_POLICY_NESTED_WITH_SIZE(nfnl_nft_rule, sizeof(struct nfgenmsg)),
+ [NFT_MSG_NEWSET] = BUILD_POLICY_NESTED_WITH_SIZE(nfnl_nft_set, sizeof(struct nfgenmsg)),
+ [NFT_MSG_NEWSETELEM] = BUILD_POLICY_NESTED_WITH_SIZE(nfnl_nft_setelem_list, sizeof(struct nfgenmsg)),
+ [NFT_MSG_DELSETELEM] = BUILD_POLICY_NESTED_WITH_SIZE(nfnl_nft_setelem_list, sizeof(struct nfgenmsg)),
+};
+
+DEFINE_POLICY_SET(nfnl_subsys_nft);
+
+static const NLAPolicy nfnl_msg_batch_policies[] = {
+ [NFNL_BATCH_GENID] = BUILD_POLICY(U32)
+};
+
+DEFINE_POLICY_SET(nfnl_msg_batch);
+
+static const NLAPolicy nfnl_subsys_none_policies[] = {
+ [NFNL_MSG_BATCH_BEGIN] = BUILD_POLICY_NESTED_WITH_SIZE(nfnl_msg_batch, sizeof(struct nfgenmsg)),
+ [NFNL_MSG_BATCH_END] = BUILD_POLICY_NESTED_WITH_SIZE(nfnl_msg_batch, sizeof(struct nfgenmsg)),
+};
+
+DEFINE_POLICY_SET(nfnl_subsys_none);
+
+static const NLAPolicy nfnl_policies[] = {
+ [NFNL_SUBSYS_NONE] = BUILD_POLICY_NESTED(nfnl_subsys_none),
+ [NFNL_SUBSYS_NFTABLES] = BUILD_POLICY_NESTED(nfnl_subsys_nft),
+};
+
+DEFINE_POLICY_SET(nfnl);
+
+const NLAPolicy *nfnl_get_policy(uint16_t nlmsg_type) {
+ const NLAPolicySet *subsys;
+
+ subsys = policy_set_get_policy_set(&nfnl_policy_set, NFNL_SUBSYS_ID(nlmsg_type));
+ if (!subsys)
+ return NULL;
+
+ return policy_set_get_policy(subsys, NFNL_MSG_TYPE(nlmsg_type));
+}
diff --git a/src/libsystemd/sd-netlink/netlink-types-rtnl.c b/src/libsystemd/sd-netlink/netlink-types-rtnl.c
new file mode 100644
index 0000000..0153456
--- /dev/null
+++ b/src/libsystemd/sd-netlink/netlink-types-rtnl.c
@@ -0,0 +1,1229 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <netinet/in.h>
+#include <sys/socket.h>
+#include <linux/batman_adv.h>
+#include <linux/can/netlink.h>
+#include <linux/can/vxcan.h>
+#include <linux/cfm_bridge.h>
+#include <linux/fib_rules.h>
+#include <linux/fou.h>
+#include <linux/if.h>
+#include <linux/if_addr.h>
+#include <linux/if_addrlabel.h>
+#include <linux/if_bridge.h>
+#include <linux/if_link.h>
+#include <linux/if_macsec.h>
+#include <linux/if_tunnel.h>
+#include <linux/ip.h>
+#include <linux/l2tp.h>
+#include <linux/netlink.h>
+#include <linux/nexthop.h>
+#include <linux/nl80211.h>
+#include <linux/pkt_sched.h>
+#include <linux/rtnetlink.h>
+#include <linux/veth.h>
+#include <linux/wireguard.h>
+
+#include "missing_network.h"
+#include "netlink-types-internal.h"
+
+enum {
+ BOND_ARP_TARGETS_0,
+ BOND_ARP_TARGETS_1,
+ BOND_ARP_TARGETS_2,
+ BOND_ARP_TARGETS_3,
+ BOND_ARP_TARGETS_4,
+ BOND_ARP_TARGETS_5,
+ BOND_ARP_TARGETS_6,
+ BOND_ARP_TARGETS_7,
+ BOND_ARP_TARGETS_8,
+ BOND_ARP_TARGETS_9,
+ BOND_ARP_TARGETS_10,
+ BOND_ARP_TARGETS_11,
+ BOND_ARP_TARGETS_12,
+ BOND_ARP_TARGETS_13,
+ BOND_ARP_TARGETS_14,
+ BOND_ARP_TARGETS_15,
+ _BOND_ARP_TARGETS_MAX,
+};
+
+assert_cc(_BOND_ARP_TARGETS_MAX == BOND_MAX_ARP_TARGETS);
+
+static const NLAPolicySet rtnl_link_policy_set;
+
+static const NLAPolicy rtnl_link_info_data_bareudp_policies[] = {
+ [IFLA_BAREUDP_PORT] = BUILD_POLICY(U16),
+ [IFLA_BAREUDP_ETHERTYPE] = BUILD_POLICY(U16),
+ [IFLA_BAREUDP_SRCPORT_MIN] = BUILD_POLICY(U16),
+ [IFLA_BAREUDP_MULTIPROTO_MODE] = BUILD_POLICY(FLAG),
+};
+
+static const NLAPolicy rtnl_link_info_data_batadv_policies[] = {
+ [IFLA_BATADV_ALGO_NAME] = BUILD_POLICY_WITH_SIZE(STRING, 20),
+};
+
+static const NLAPolicy rtnl_bond_arp_ip_target_policies[] = {
+ [BOND_ARP_TARGETS_0] = BUILD_POLICY(U32),
+ [BOND_ARP_TARGETS_1] = BUILD_POLICY(U32),
+ [BOND_ARP_TARGETS_2] = BUILD_POLICY(U32),
+ [BOND_ARP_TARGETS_3] = BUILD_POLICY(U32),
+ [BOND_ARP_TARGETS_4] = BUILD_POLICY(U32),
+ [BOND_ARP_TARGETS_5] = BUILD_POLICY(U32),
+ [BOND_ARP_TARGETS_6] = BUILD_POLICY(U32),
+ [BOND_ARP_TARGETS_7] = BUILD_POLICY(U32),
+ [BOND_ARP_TARGETS_8] = BUILD_POLICY(U32),
+ [BOND_ARP_TARGETS_9] = BUILD_POLICY(U32),
+ [BOND_ARP_TARGETS_10] = BUILD_POLICY(U32),
+ [BOND_ARP_TARGETS_11] = BUILD_POLICY(U32),
+ [BOND_ARP_TARGETS_12] = BUILD_POLICY(U32),
+ [BOND_ARP_TARGETS_13] = BUILD_POLICY(U32),
+ [BOND_ARP_TARGETS_14] = BUILD_POLICY(U32),
+ [BOND_ARP_TARGETS_15] = BUILD_POLICY(U32),
+};
+
+DEFINE_POLICY_SET(rtnl_bond_arp_ip_target);
+
+static const NLAPolicy rtnl_bond_ad_info_policies[] = {
+ [IFLA_BOND_AD_INFO_AGGREGATOR] = BUILD_POLICY(U16),
+ [IFLA_BOND_AD_INFO_NUM_PORTS] = BUILD_POLICY(U16),
+ [IFLA_BOND_AD_INFO_ACTOR_KEY] = BUILD_POLICY(U16),
+ [IFLA_BOND_AD_INFO_PARTNER_KEY] = BUILD_POLICY(U16),
+ [IFLA_BOND_AD_INFO_PARTNER_MAC] = BUILD_POLICY_WITH_SIZE(ETHER_ADDR, ETH_ALEN),
+};
+
+DEFINE_POLICY_SET(rtnl_bond_ad_info);
+
+static const NLAPolicy rtnl_link_info_data_bond_policies[] = {
+ [IFLA_BOND_MODE] = BUILD_POLICY(U8),
+ [IFLA_BOND_ACTIVE_SLAVE] = BUILD_POLICY(U32),
+ [IFLA_BOND_MIIMON] = BUILD_POLICY(U32),
+ [IFLA_BOND_UPDELAY] = BUILD_POLICY(U32),
+ [IFLA_BOND_DOWNDELAY] = BUILD_POLICY(U32),
+ [IFLA_BOND_USE_CARRIER] = BUILD_POLICY(U8),
+ [IFLA_BOND_ARP_INTERVAL] = BUILD_POLICY(U32),
+ [IFLA_BOND_ARP_IP_TARGET] = BUILD_POLICY_NESTED(rtnl_bond_arp_ip_target),
+ [IFLA_BOND_ARP_VALIDATE] = BUILD_POLICY(U32),
+ [IFLA_BOND_ARP_ALL_TARGETS] = BUILD_POLICY(U32),
+ [IFLA_BOND_PRIMARY] = BUILD_POLICY(U32),
+ [IFLA_BOND_PRIMARY_RESELECT] = BUILD_POLICY(U8),
+ [IFLA_BOND_FAIL_OVER_MAC] = BUILD_POLICY(U8),
+ [IFLA_BOND_XMIT_HASH_POLICY] = BUILD_POLICY(U8),
+ [IFLA_BOND_RESEND_IGMP] = BUILD_POLICY(U32),
+ [IFLA_BOND_NUM_PEER_NOTIF] = BUILD_POLICY(U8),
+ [IFLA_BOND_ALL_SLAVES_ACTIVE] = BUILD_POLICY(U8),
+ [IFLA_BOND_MIN_LINKS] = BUILD_POLICY(U32),
+ [IFLA_BOND_LP_INTERVAL] = BUILD_POLICY(U32),
+ [IFLA_BOND_PACKETS_PER_SLAVE] = BUILD_POLICY(U32),
+ [IFLA_BOND_AD_LACP_RATE] = BUILD_POLICY(U8),
+ [IFLA_BOND_AD_SELECT] = BUILD_POLICY(U8),
+ [IFLA_BOND_AD_INFO] = BUILD_POLICY_NESTED(rtnl_bond_ad_info),
+ [IFLA_BOND_AD_ACTOR_SYS_PRIO] = BUILD_POLICY(U16),
+ [IFLA_BOND_AD_USER_PORT_KEY] = BUILD_POLICY(U16),
+ [IFLA_BOND_AD_ACTOR_SYSTEM] = BUILD_POLICY_WITH_SIZE(ETHER_ADDR, ETH_ALEN),
+ [IFLA_BOND_TLB_DYNAMIC_LB] = BUILD_POLICY(U8),
+ [IFLA_BOND_PEER_NOTIF_DELAY] = BUILD_POLICY(U32),
+};
+
+static const NLAPolicy rtnl_link_info_data_bridge_policies[] = {
+ [IFLA_BR_FORWARD_DELAY] = BUILD_POLICY(U32),
+ [IFLA_BR_HELLO_TIME] = BUILD_POLICY(U32),
+ [IFLA_BR_MAX_AGE] = BUILD_POLICY(U32),
+ [IFLA_BR_AGEING_TIME] = BUILD_POLICY(U32),
+ [IFLA_BR_STP_STATE] = BUILD_POLICY(U32),
+ [IFLA_BR_PRIORITY] = BUILD_POLICY(U16),
+ [IFLA_BR_VLAN_FILTERING] = BUILD_POLICY(U8),
+ [IFLA_BR_VLAN_PROTOCOL] = BUILD_POLICY(U16),
+ [IFLA_BR_GROUP_FWD_MASK] = BUILD_POLICY(U16),
+ [IFLA_BR_ROOT_ID] = BUILD_POLICY_WITH_SIZE(BINARY, sizeof(struct ifla_bridge_id)),
+ [IFLA_BR_BRIDGE_ID] = BUILD_POLICY_WITH_SIZE(BINARY, sizeof(struct ifla_bridge_id)),
+ [IFLA_BR_ROOT_PORT] = BUILD_POLICY(U16),
+ [IFLA_BR_ROOT_PATH_COST] = BUILD_POLICY(U32),
+ [IFLA_BR_TOPOLOGY_CHANGE] = BUILD_POLICY(U8),
+ [IFLA_BR_TOPOLOGY_CHANGE_DETECTED] = BUILD_POLICY(U8),
+ [IFLA_BR_HELLO_TIMER] = BUILD_POLICY(U64),
+ [IFLA_BR_TCN_TIMER] = BUILD_POLICY(U64),
+ [IFLA_BR_TOPOLOGY_CHANGE_TIMER] = BUILD_POLICY(U64),
+ [IFLA_BR_GC_TIMER] = BUILD_POLICY(U64),
+ [IFLA_BR_GROUP_ADDR] = BUILD_POLICY_WITH_SIZE(ETHER_ADDR, ETH_ALEN),
+ [IFLA_BR_FDB_FLUSH] = BUILD_POLICY(FLAG),
+ [IFLA_BR_MCAST_ROUTER] = BUILD_POLICY(U8),
+ [IFLA_BR_MCAST_SNOOPING] = BUILD_POLICY(U8),
+ [IFLA_BR_MCAST_QUERY_USE_IFADDR] = BUILD_POLICY(U8),
+ [IFLA_BR_MCAST_QUERIER] = BUILD_POLICY(U8),
+ [IFLA_BR_MCAST_HASH_ELASTICITY] = BUILD_POLICY(U32),
+ [IFLA_BR_MCAST_HASH_MAX] = BUILD_POLICY(U32),
+ [IFLA_BR_MCAST_LAST_MEMBER_CNT] = BUILD_POLICY(U32),
+ [IFLA_BR_MCAST_STARTUP_QUERY_CNT] = BUILD_POLICY(U32),
+ [IFLA_BR_MCAST_LAST_MEMBER_INTVL] = BUILD_POLICY(U64),
+ [IFLA_BR_MCAST_MEMBERSHIP_INTVL] = BUILD_POLICY(U64),
+ [IFLA_BR_MCAST_QUERIER_INTVL] = BUILD_POLICY(U64),
+ [IFLA_BR_MCAST_QUERY_INTVL] = BUILD_POLICY(U64),
+ [IFLA_BR_MCAST_QUERY_RESPONSE_INTVL] = BUILD_POLICY(U64),
+ [IFLA_BR_MCAST_STARTUP_QUERY_INTVL] = BUILD_POLICY(U64),
+ [IFLA_BR_NF_CALL_IPTABLES] = BUILD_POLICY(U8),
+ [IFLA_BR_NF_CALL_IP6TABLES] = BUILD_POLICY(U8),
+ [IFLA_BR_NF_CALL_ARPTABLES] = BUILD_POLICY(U8),
+ [IFLA_BR_VLAN_DEFAULT_PVID] = BUILD_POLICY(U16),
+ [IFLA_BR_VLAN_STATS_ENABLED] = BUILD_POLICY(U8),
+ [IFLA_BR_MCAST_STATS_ENABLED] = BUILD_POLICY(U8),
+ [IFLA_BR_MCAST_IGMP_VERSION] = BUILD_POLICY(U8),
+ [IFLA_BR_MCAST_MLD_VERSION] = BUILD_POLICY(U8),
+ [IFLA_BR_VLAN_STATS_PER_PORT] = BUILD_POLICY(U8),
+ [IFLA_BR_MULTI_BOOLOPT] = BUILD_POLICY_WITH_SIZE(BINARY, sizeof(struct br_boolopt_multi)),
+};
+
+static const NLAPolicy rtnl_link_info_data_can_policies[] = {
+ [IFLA_CAN_BITTIMING] = BUILD_POLICY_WITH_SIZE(BINARY, sizeof(struct can_bittiming)),
+ [IFLA_CAN_BITTIMING_CONST] = BUILD_POLICY_WITH_SIZE(BINARY, sizeof(struct can_bittiming_const)),
+ [IFLA_CAN_CLOCK] = BUILD_POLICY_WITH_SIZE(BINARY, sizeof(struct can_clock)),
+ [IFLA_CAN_STATE] = BUILD_POLICY(U32),
+ [IFLA_CAN_CTRLMODE] = BUILD_POLICY_WITH_SIZE(BINARY, sizeof(struct can_ctrlmode)),
+ [IFLA_CAN_RESTART_MS] = BUILD_POLICY(U32),
+ [IFLA_CAN_RESTART] = BUILD_POLICY(U32),
+ [IFLA_CAN_BERR_COUNTER] = BUILD_POLICY_WITH_SIZE(BINARY, sizeof(struct can_berr_counter)),
+ [IFLA_CAN_DATA_BITTIMING] = BUILD_POLICY_WITH_SIZE(BINARY, sizeof(struct can_bittiming)),
+ [IFLA_CAN_DATA_BITTIMING_CONST] = BUILD_POLICY_WITH_SIZE(BINARY, sizeof(struct can_bittiming_const)),
+ [IFLA_CAN_TERMINATION] = BUILD_POLICY(U16),
+ [IFLA_CAN_TERMINATION_CONST] = BUILD_POLICY(BINARY), /* size = termination_const_cnt * sizeof(u16) */
+ [IFLA_CAN_BITRATE_CONST] = BUILD_POLICY(BINARY), /* size = bitrate_const_cnt * sizeof(u32) */
+ [IFLA_CAN_DATA_BITRATE_CONST] = BUILD_POLICY(BINARY), /* size = data_bitrate_const_cnt * sizeof(u32) */
+ [IFLA_CAN_BITRATE_MAX] = BUILD_POLICY(U32),
+};
+
+static const NLAPolicy rtnl_link_info_data_geneve_policies[] = {
+ [IFLA_GENEVE_ID] = BUILD_POLICY(U32),
+ [IFLA_GENEVE_REMOTE] = BUILD_POLICY_WITH_SIZE(IN_ADDR, sizeof(struct in_addr)),
+ [IFLA_GENEVE_TTL] = BUILD_POLICY(U8),
+ [IFLA_GENEVE_TOS] = BUILD_POLICY(U8),
+ [IFLA_GENEVE_PORT] = BUILD_POLICY(U16),
+ [IFLA_GENEVE_COLLECT_METADATA] = BUILD_POLICY(FLAG),
+ [IFLA_GENEVE_REMOTE6] = BUILD_POLICY_WITH_SIZE(IN_ADDR, sizeof(struct in6_addr)),
+ [IFLA_GENEVE_UDP_CSUM] = BUILD_POLICY(U8),
+ [IFLA_GENEVE_UDP_ZERO_CSUM6_TX] = BUILD_POLICY(U8),
+ [IFLA_GENEVE_UDP_ZERO_CSUM6_RX] = BUILD_POLICY(U8),
+ [IFLA_GENEVE_LABEL] = BUILD_POLICY(U32),
+ [IFLA_GENEVE_TTL_INHERIT] = BUILD_POLICY(U8),
+ [IFLA_GENEVE_DF] = BUILD_POLICY(U8),
+ [IFLA_GENEVE_INNER_PROTO_INHERIT] = BUILD_POLICY(FLAG),
+};
+
+static const NLAPolicy rtnl_link_info_data_gre_policies[] = {
+ [IFLA_GRE_LINK] = BUILD_POLICY(U32),
+ [IFLA_GRE_IFLAGS] = BUILD_POLICY(U16),
+ [IFLA_GRE_OFLAGS] = BUILD_POLICY(U16),
+ [IFLA_GRE_IKEY] = BUILD_POLICY(U32),
+ [IFLA_GRE_OKEY] = BUILD_POLICY(U32),
+ [IFLA_GRE_LOCAL] = BUILD_POLICY(IN_ADDR),
+ [IFLA_GRE_REMOTE] = BUILD_POLICY(IN_ADDR),
+ [IFLA_GRE_TTL] = BUILD_POLICY(U8),
+ [IFLA_GRE_TOS] = BUILD_POLICY(U8),
+ [IFLA_GRE_PMTUDISC] = BUILD_POLICY(U8),
+ [IFLA_GRE_ENCAP_LIMIT] = BUILD_POLICY(U8),
+ [IFLA_GRE_FLOWINFO] = BUILD_POLICY(U32),
+ [IFLA_GRE_FLAGS] = BUILD_POLICY(U32),
+ [IFLA_GRE_ENCAP_TYPE] = BUILD_POLICY(U16),
+ [IFLA_GRE_ENCAP_FLAGS] = BUILD_POLICY(U16),
+ [IFLA_GRE_ENCAP_SPORT] = BUILD_POLICY(U16),
+ [IFLA_GRE_ENCAP_DPORT] = BUILD_POLICY(U16),
+ [IFLA_GRE_COLLECT_METADATA] = BUILD_POLICY(FLAG),
+ [IFLA_GRE_IGNORE_DF] = BUILD_POLICY(U8),
+ [IFLA_GRE_FWMARK] = BUILD_POLICY(U32),
+ [IFLA_GRE_ERSPAN_INDEX] = BUILD_POLICY(U32),
+ [IFLA_GRE_ERSPAN_VER] = BUILD_POLICY(U8),
+ [IFLA_GRE_ERSPAN_DIR] = BUILD_POLICY(U8),
+ [IFLA_GRE_ERSPAN_HWID] = BUILD_POLICY(U16),
+};
+
+static const NLAPolicy rtnl_link_info_data_ipoib_policies[] = {
+ [IFLA_IPOIB_PKEY] = BUILD_POLICY(U16),
+ [IFLA_IPOIB_MODE] = BUILD_POLICY(U16),
+ [IFLA_IPOIB_UMCAST] = BUILD_POLICY(U16),
+};
+
+/* IFLA_IPTUN_ attributes are used in ipv4/ipip.c, ipv6/ip6_tunnel.c, and ipv6/sit.c. And unfortunately,
+ * IFLA_IPTUN_FLAGS is used with different types, ugh... */
+#define DEFINE_IPTUN_TYPES(name, flags_type) \
+ static const NLAPolicy rtnl_link_info_data_##name##_policies[] = { \
+ [IFLA_IPTUN_LINK] = BUILD_POLICY(U32), \
+ [IFLA_IPTUN_LOCAL] = BUILD_POLICY(IN_ADDR), \
+ [IFLA_IPTUN_REMOTE] = BUILD_POLICY(IN_ADDR), \
+ [IFLA_IPTUN_TTL] = BUILD_POLICY(U8), \
+ [IFLA_IPTUN_TOS] = BUILD_POLICY(U8), \
+ [IFLA_IPTUN_ENCAP_LIMIT] = BUILD_POLICY(U8), \
+ [IFLA_IPTUN_FLOWINFO] = BUILD_POLICY(U32), \
+ [IFLA_IPTUN_FLAGS] = BUILD_POLICY(flags_type), \
+ [IFLA_IPTUN_PROTO] = BUILD_POLICY(U8), \
+ [IFLA_IPTUN_PMTUDISC] = BUILD_POLICY(U8), \
+ [IFLA_IPTUN_6RD_PREFIX] = BUILD_POLICY_WITH_SIZE(IN_ADDR, sizeof(struct in6_addr)), \
+ [IFLA_IPTUN_6RD_RELAY_PREFIX] = BUILD_POLICY(U32), \
+ [IFLA_IPTUN_6RD_PREFIXLEN] = BUILD_POLICY(U16), \
+ [IFLA_IPTUN_6RD_RELAY_PREFIXLEN] = BUILD_POLICY(U16), \
+ [IFLA_IPTUN_ENCAP_TYPE] = BUILD_POLICY(U16), \
+ [IFLA_IPTUN_ENCAP_FLAGS] = BUILD_POLICY(U16), \
+ [IFLA_IPTUN_ENCAP_SPORT] = BUILD_POLICY(U16), \
+ [IFLA_IPTUN_ENCAP_DPORT] = BUILD_POLICY(U16), \
+ [IFLA_IPTUN_COLLECT_METADATA] = BUILD_POLICY(FLAG), \
+ [IFLA_IPTUN_FWMARK] = BUILD_POLICY(U32), \
+ }
+
+DEFINE_IPTUN_TYPES(iptun, U32); /* for ipip and ip6tnl */
+DEFINE_IPTUN_TYPES(sit, U16); /* for sit */
+
+static const NLAPolicy rtnl_link_info_data_ipvlan_policies[] = {
+ [IFLA_IPVLAN_MODE] = BUILD_POLICY(U16),
+ [IFLA_IPVLAN_FLAGS] = BUILD_POLICY(U16),
+};
+
+static const NLAPolicy rtnl_link_info_data_macsec_policies[] = {
+ [IFLA_MACSEC_SCI] = BUILD_POLICY(U64),
+ [IFLA_MACSEC_PORT] = BUILD_POLICY(U16),
+ [IFLA_MACSEC_ICV_LEN] = BUILD_POLICY(U8),
+ [IFLA_MACSEC_CIPHER_SUITE] = BUILD_POLICY(U64),
+ [IFLA_MACSEC_WINDOW] = BUILD_POLICY(U32),
+ [IFLA_MACSEC_ENCODING_SA] = BUILD_POLICY(U8),
+ [IFLA_MACSEC_ENCRYPT] = BUILD_POLICY(U8),
+ [IFLA_MACSEC_PROTECT] = BUILD_POLICY(U8),
+ [IFLA_MACSEC_INC_SCI] = BUILD_POLICY(U8),
+ [IFLA_MACSEC_ES] = BUILD_POLICY(U8),
+ [IFLA_MACSEC_SCB] = BUILD_POLICY(U8),
+ [IFLA_MACSEC_REPLAY_PROTECT] = BUILD_POLICY(U8),
+ [IFLA_MACSEC_VALIDATION] = BUILD_POLICY(U8),
+ [IFLA_MACSEC_OFFLOAD] = BUILD_POLICY(U8),
+};
+
+static const NLAPolicy rtnl_macvlan_macaddr_policies[] = {
+ [IFLA_MACVLAN_MACADDR] = BUILD_POLICY_WITH_SIZE(ETHER_ADDR, ETH_ALEN),
+};
+
+DEFINE_POLICY_SET(rtnl_macvlan_macaddr);
+
+static const NLAPolicy rtnl_link_info_data_macvlan_policies[] = {
+ [IFLA_MACVLAN_MODE] = BUILD_POLICY(U32),
+ [IFLA_MACVLAN_FLAGS] = BUILD_POLICY(U16),
+ [IFLA_MACVLAN_MACADDR_MODE] = BUILD_POLICY(U32),
+ [IFLA_MACVLAN_MACADDR_DATA] = BUILD_POLICY_NESTED(rtnl_macvlan_macaddr),
+ [IFLA_MACVLAN_MACADDR_COUNT] = BUILD_POLICY(U32),
+ [IFLA_MACVLAN_BC_QUEUE_LEN] = BUILD_POLICY(U32),
+ [IFLA_MACVLAN_BC_QUEUE_LEN_USED] = BUILD_POLICY(U32),
+};
+
+static const NLAPolicy rtnl_link_info_data_tun_policies[] = {
+ [IFLA_TUN_OWNER] = BUILD_POLICY(U32),
+ [IFLA_TUN_GROUP] = BUILD_POLICY(U32),
+ [IFLA_TUN_TYPE] = BUILD_POLICY(U8),
+ [IFLA_TUN_PI] = BUILD_POLICY(U8),
+ [IFLA_TUN_VNET_HDR] = BUILD_POLICY(U8),
+ [IFLA_TUN_PERSIST] = BUILD_POLICY(U8),
+ [IFLA_TUN_MULTI_QUEUE] = BUILD_POLICY(U8),
+ [IFLA_TUN_NUM_QUEUES] = BUILD_POLICY(U32),
+ [IFLA_TUN_NUM_DISABLED_QUEUES] = BUILD_POLICY(U32),
+};
+
+static const NLAPolicy rtnl_link_info_data_veth_policies[] = {
+ [VETH_INFO_PEER] = BUILD_POLICY_NESTED_WITH_SIZE(rtnl_link, sizeof(struct ifinfomsg)),
+};
+
+static const NLAPolicy rtnl_vlan_qos_map_policies[] = {
+ [IFLA_VLAN_QOS_MAPPING] = BUILD_POLICY_WITH_SIZE(BINARY, sizeof(struct ifla_vlan_qos_mapping)),
+};
+
+DEFINE_POLICY_SET(rtnl_vlan_qos_map);
+
+static const NLAPolicy rtnl_link_info_data_vlan_policies[] = {
+ [IFLA_VLAN_ID] = BUILD_POLICY(U16),
+ [IFLA_VLAN_FLAGS] = BUILD_POLICY_WITH_SIZE(BINARY, sizeof(struct ifla_vlan_flags)),
+ [IFLA_VLAN_EGRESS_QOS] = BUILD_POLICY_NESTED(rtnl_vlan_qos_map),
+ [IFLA_VLAN_INGRESS_QOS] = BUILD_POLICY_NESTED(rtnl_vlan_qos_map),
+ [IFLA_VLAN_PROTOCOL] = BUILD_POLICY(U16),
+};
+
+static const NLAPolicy rtnl_link_info_data_vrf_policies[] = {
+ [IFLA_VRF_TABLE] = BUILD_POLICY(U32),
+};
+
+static const NLAPolicy rtnl_link_info_data_vti_policies[] = {
+ [IFLA_VTI_LINK] = BUILD_POLICY(U32),
+ [IFLA_VTI_IKEY] = BUILD_POLICY(U32),
+ [IFLA_VTI_OKEY] = BUILD_POLICY(U32),
+ [IFLA_VTI_LOCAL] = BUILD_POLICY(IN_ADDR),
+ [IFLA_VTI_REMOTE] = BUILD_POLICY(IN_ADDR),
+ [IFLA_VTI_FWMARK] = BUILD_POLICY(U32),
+};
+
+static const NLAPolicy rtnl_link_info_data_vxcan_policies[] = {
+ [VXCAN_INFO_PEER] = BUILD_POLICY_NESTED_WITH_SIZE(rtnl_link, sizeof(struct ifinfomsg)),
+};
+
+static const NLAPolicy rtnl_link_info_data_vxlan_policies[] = {
+ [IFLA_VXLAN_ID] = BUILD_POLICY(U32),
+ [IFLA_VXLAN_GROUP] = BUILD_POLICY_WITH_SIZE(IN_ADDR, sizeof(struct in_addr)),
+ [IFLA_VXLAN_LINK] = BUILD_POLICY(U32),
+ [IFLA_VXLAN_LOCAL] = BUILD_POLICY_WITH_SIZE(IN_ADDR, sizeof(struct in_addr)),
+ [IFLA_VXLAN_TTL] = BUILD_POLICY(U8),
+ [IFLA_VXLAN_TOS] = BUILD_POLICY(U8),
+ [IFLA_VXLAN_LEARNING] = BUILD_POLICY(U8),
+ [IFLA_VXLAN_AGEING] = BUILD_POLICY(U32),
+ [IFLA_VXLAN_LIMIT] = BUILD_POLICY(U32),
+ [IFLA_VXLAN_PORT_RANGE] = BUILD_POLICY_WITH_SIZE(BINARY, sizeof(struct ifla_vxlan_port_range)),
+ [IFLA_VXLAN_PROXY] = BUILD_POLICY(U8),
+ [IFLA_VXLAN_RSC] = BUILD_POLICY(U8),
+ [IFLA_VXLAN_L2MISS] = BUILD_POLICY(U8),
+ [IFLA_VXLAN_L3MISS] = BUILD_POLICY(U8),
+ [IFLA_VXLAN_PORT] = BUILD_POLICY(U16),
+ [IFLA_VXLAN_GROUP6] = BUILD_POLICY_WITH_SIZE(IN_ADDR, sizeof(struct in6_addr)),
+ [IFLA_VXLAN_LOCAL6] = BUILD_POLICY_WITH_SIZE(IN_ADDR, sizeof(struct in6_addr)),
+ [IFLA_VXLAN_UDP_CSUM] = BUILD_POLICY(U8),
+ [IFLA_VXLAN_UDP_ZERO_CSUM6_TX] = BUILD_POLICY(U8),
+ [IFLA_VXLAN_UDP_ZERO_CSUM6_RX] = BUILD_POLICY(U8),
+ [IFLA_VXLAN_REMCSUM_TX] = BUILD_POLICY(U8),
+ [IFLA_VXLAN_REMCSUM_RX] = BUILD_POLICY(U8),
+ [IFLA_VXLAN_GBP] = BUILD_POLICY(FLAG),
+ [IFLA_VXLAN_REMCSUM_NOPARTIAL] = BUILD_POLICY(FLAG),
+ [IFLA_VXLAN_COLLECT_METADATA] = BUILD_POLICY(U8),
+ [IFLA_VXLAN_LABEL] = BUILD_POLICY(U32),
+ [IFLA_VXLAN_GPE] = BUILD_POLICY(FLAG),
+ [IFLA_VXLAN_TTL_INHERIT] = BUILD_POLICY(FLAG),
+ [IFLA_VXLAN_DF] = BUILD_POLICY(U8),
+};
+
+static const NLAPolicy rtnl_link_info_data_xfrm_policies[] = {
+ [IFLA_XFRM_LINK] = BUILD_POLICY(U32),
+ [IFLA_XFRM_IF_ID] = BUILD_POLICY(U32)
+};
+
+static const NLAPolicySetUnionElement rtnl_link_info_data_policy_set_union_elements[] = {
+ BUILD_UNION_ELEMENT_BY_STRING("bareudp", rtnl_link_info_data_bareudp),
+ BUILD_UNION_ELEMENT_BY_STRING("batadv", rtnl_link_info_data_batadv),
+ BUILD_UNION_ELEMENT_BY_STRING("bond", rtnl_link_info_data_bond),
+ BUILD_UNION_ELEMENT_BY_STRING("bridge", rtnl_link_info_data_bridge),
+/*
+ BUILD_UNION_ELEMENT_BY_STRING("caif", rtnl_link_info_data_caif),
+*/
+ BUILD_UNION_ELEMENT_BY_STRING("can", rtnl_link_info_data_can),
+ BUILD_UNION_ELEMENT_BY_STRING("erspan", rtnl_link_info_data_gre),
+ BUILD_UNION_ELEMENT_BY_STRING("geneve", rtnl_link_info_data_geneve),
+ BUILD_UNION_ELEMENT_BY_STRING("gre", rtnl_link_info_data_gre),
+ BUILD_UNION_ELEMENT_BY_STRING("gretap", rtnl_link_info_data_gre),
+/*
+ BUILD_UNION_ELEMENT_BY_STRING("gtp", rtnl_link_info_data_gtp),
+ BUILD_UNION_ELEMENT_BY_STRING("hsr", rtnl_link_info_data_hsr),
+*/
+ BUILD_UNION_ELEMENT_BY_STRING("ip6erspan", rtnl_link_info_data_gre),
+ BUILD_UNION_ELEMENT_BY_STRING("ip6gre", rtnl_link_info_data_gre),
+ BUILD_UNION_ELEMENT_BY_STRING("ip6gretap", rtnl_link_info_data_gre),
+ BUILD_UNION_ELEMENT_BY_STRING("ip6tnl", rtnl_link_info_data_iptun),
+ BUILD_UNION_ELEMENT_BY_STRING("ipoib", rtnl_link_info_data_ipoib),
+ BUILD_UNION_ELEMENT_BY_STRING("ipip", rtnl_link_info_data_iptun),
+ BUILD_UNION_ELEMENT_BY_STRING("ipvlan", rtnl_link_info_data_ipvlan),
+ BUILD_UNION_ELEMENT_BY_STRING("ipvtap", rtnl_link_info_data_ipvlan),
+ BUILD_UNION_ELEMENT_BY_STRING("macsec", rtnl_link_info_data_macsec),
+ BUILD_UNION_ELEMENT_BY_STRING("macvlan", rtnl_link_info_data_macvlan),
+ BUILD_UNION_ELEMENT_BY_STRING("macvtap", rtnl_link_info_data_macvlan),
+/*
+ BUILD_UNION_ELEMENT_BY_STRING("ppp", rtnl_link_info_data_ppp),
+ BUILD_UNION_ELEMENT_BY_STRING("rmnet", rtnl_link_info_data_rmnet),
+*/
+ BUILD_UNION_ELEMENT_BY_STRING("sit", rtnl_link_info_data_sit),
+ BUILD_UNION_ELEMENT_BY_STRING("tun", rtnl_link_info_data_tun),
+ BUILD_UNION_ELEMENT_BY_STRING("veth", rtnl_link_info_data_veth),
+ BUILD_UNION_ELEMENT_BY_STRING("vlan", rtnl_link_info_data_vlan),
+ BUILD_UNION_ELEMENT_BY_STRING("vrf", rtnl_link_info_data_vrf),
+ BUILD_UNION_ELEMENT_BY_STRING("vti", rtnl_link_info_data_vti),
+ BUILD_UNION_ELEMENT_BY_STRING("vti6", rtnl_link_info_data_vti),
+ BUILD_UNION_ELEMENT_BY_STRING("vxcan", rtnl_link_info_data_vxcan),
+ BUILD_UNION_ELEMENT_BY_STRING("vxlan", rtnl_link_info_data_vxlan),
+/*
+ BUILD_UNION_ELEMENT_BY_STRING("wwan", rtnl_link_info_data_wwan),
+*/
+ BUILD_UNION_ELEMENT_BY_STRING("xfrm", rtnl_link_info_data_xfrm),
+};
+
+DEFINE_POLICY_SET_UNION(rtnl_link_info_data, IFLA_INFO_KIND);
+
+static const struct NLAPolicy rtnl_bridge_port_policies[] = {
+ [IFLA_BRPORT_STATE] = BUILD_POLICY(U8),
+ [IFLA_BRPORT_COST] = BUILD_POLICY(U32),
+ [IFLA_BRPORT_PRIORITY] = BUILD_POLICY(U16),
+ [IFLA_BRPORT_MODE] = BUILD_POLICY(U8),
+ [IFLA_BRPORT_GUARD] = BUILD_POLICY(U8),
+ [IFLA_BRPORT_PROTECT] = BUILD_POLICY(U8),
+ [IFLA_BRPORT_FAST_LEAVE] = BUILD_POLICY(U8),
+ [IFLA_BRPORT_LEARNING] = BUILD_POLICY(U8),
+ [IFLA_BRPORT_UNICAST_FLOOD] = BUILD_POLICY(U8),
+ [IFLA_BRPORT_PROXYARP] = BUILD_POLICY(U8),
+ [IFLA_BRPORT_LEARNING_SYNC] = BUILD_POLICY(U8),
+ [IFLA_BRPORT_PROXYARP_WIFI] = BUILD_POLICY(U8),
+ [IFLA_BRPORT_ROOT_ID] = BUILD_POLICY(U8),
+ [IFLA_BRPORT_BRIDGE_ID] = BUILD_POLICY(U8),
+ [IFLA_BRPORT_DESIGNATED_PORT] = BUILD_POLICY(U16),
+ [IFLA_BRPORT_DESIGNATED_COST] = BUILD_POLICY(U16),
+ [IFLA_BRPORT_ID] = BUILD_POLICY(U16),
+ [IFLA_BRPORT_NO] = BUILD_POLICY(U16),
+ [IFLA_BRPORT_TOPOLOGY_CHANGE_ACK] = BUILD_POLICY(U8),
+ [IFLA_BRPORT_CONFIG_PENDING] = BUILD_POLICY(U8),
+ [IFLA_BRPORT_MESSAGE_AGE_TIMER] = BUILD_POLICY(U64),
+ [IFLA_BRPORT_FORWARD_DELAY_TIMER] = BUILD_POLICY(U64),
+ [IFLA_BRPORT_HOLD_TIMER] = BUILD_POLICY(U64),
+ [IFLA_BRPORT_FLUSH] = BUILD_POLICY(U8),
+ [IFLA_BRPORT_MULTICAST_ROUTER] = BUILD_POLICY(U8),
+ [IFLA_BRPORT_PAD] = BUILD_POLICY(U8),
+ [IFLA_BRPORT_MCAST_FLOOD] = BUILD_POLICY(U8),
+ [IFLA_BRPORT_MCAST_TO_UCAST] = BUILD_POLICY(U8),
+ [IFLA_BRPORT_VLAN_TUNNEL] = BUILD_POLICY(U8),
+ [IFLA_BRPORT_BCAST_FLOOD] = BUILD_POLICY(U8),
+ [IFLA_BRPORT_GROUP_FWD_MASK] = BUILD_POLICY(U16),
+ [IFLA_BRPORT_NEIGH_SUPPRESS] = BUILD_POLICY(U8),
+ [IFLA_BRPORT_ISOLATED] = BUILD_POLICY(U8),
+ [IFLA_BRPORT_BACKUP_PORT] = BUILD_POLICY(U32),
+ [IFLA_BRPORT_MRP_RING_OPEN] = BUILD_POLICY(U8),
+ [IFLA_BRPORT_MRP_IN_OPEN] = BUILD_POLICY(U8),
+ [IFLA_BRPORT_MCAST_EHT_HOSTS_LIMIT] = BUILD_POLICY(U32),
+ [IFLA_BRPORT_MCAST_EHT_HOSTS_CNT] = BUILD_POLICY(U32),
+};
+
+static const NLAPolicySetUnionElement rtnl_link_info_slave_data_policy_set_union_elements[] = {
+ BUILD_UNION_ELEMENT_BY_STRING("bridge", rtnl_bridge_port),
+};
+
+DEFINE_POLICY_SET_UNION(rtnl_link_info_slave_data, IFLA_INFO_SLAVE_KIND);
+
+static const NLAPolicy rtnl_link_info_policies[] = {
+ [IFLA_INFO_KIND] = BUILD_POLICY(STRING),
+ [IFLA_INFO_DATA] = BUILD_POLICY_NESTED_UNION_BY_STRING(rtnl_link_info_data),
+ /* TODO: Currently IFLA_INFO_XSTATS is used only when IFLA_INFO_KIND is "can". In the future,
+ * when multiple kinds of netdevs use this attribute, convert its type to NETLINK_TYPE_UNION. */
+ [IFLA_INFO_XSTATS] = BUILD_POLICY_WITH_SIZE(BINARY, sizeof(struct can_device_stats)),
+ [IFLA_INFO_SLAVE_KIND] = BUILD_POLICY(STRING),
+ [IFLA_INFO_SLAVE_DATA] = BUILD_POLICY_NESTED_UNION_BY_STRING(rtnl_link_info_slave_data),
+};
+
+DEFINE_POLICY_SET(rtnl_link_info);
+
+static const struct NLAPolicy rtnl_inet_policies[] = {
+ [IFLA_INET_CONF] = BUILD_POLICY(BINARY), /* size = IPV4_DEVCONF_MAX * 4 */
+};
+
+DEFINE_POLICY_SET(rtnl_inet);
+
+static const struct NLAPolicy rtnl_inet6_policies[] = {
+ [IFLA_INET6_FLAGS] = BUILD_POLICY(U32),
+ [IFLA_INET6_CONF] = BUILD_POLICY(BINARY), /* size = DEVCONF_MAX * sizeof(s32) */
+ [IFLA_INET6_STATS] = BUILD_POLICY(BINARY), /* size = IPSTATS_MIB_MAX * sizeof(u64) */
+ [IFLA_INET6_MCAST] = {}, /* unused. */
+ [IFLA_INET6_CACHEINFO] = BUILD_POLICY_WITH_SIZE(BINARY, sizeof(struct ifla_cacheinfo)),
+ [IFLA_INET6_ICMP6STATS] = BUILD_POLICY(BINARY), /* size = ICMP6_MIB_MAX * sizeof(u64) */
+ [IFLA_INET6_TOKEN] = BUILD_POLICY_WITH_SIZE(IN_ADDR, sizeof(struct in6_addr)),
+ [IFLA_INET6_ADDR_GEN_MODE] = BUILD_POLICY(U8),
+};
+
+DEFINE_POLICY_SET(rtnl_inet6);
+
+static const NLAPolicySetUnionElement rtnl_prot_info_policy_set_union_elements[] = {
+ BUILD_UNION_ELEMENT_BY_FAMILY(AF_BRIDGE, rtnl_bridge_port),
+ BUILD_UNION_ELEMENT_BY_FAMILY(AF_INET6, rtnl_inet6),
+};
+
+DEFINE_POLICY_SET_UNION(rtnl_prot_info, 0);
+
+static const NLAPolicy rtnl_af_spec_unspec_policies[] = {
+ [AF_INET] = BUILD_POLICY_NESTED(rtnl_inet),
+ [AF_INET6] = BUILD_POLICY_NESTED(rtnl_inet6),
+};
+
+static const NLAPolicy rtnl_bridge_vlan_tunnel_info_policies[] = {
+ [IFLA_BRIDGE_VLAN_TUNNEL_ID] = BUILD_POLICY(U32),
+ [IFLA_BRIDGE_VLAN_TUNNEL_VID] = BUILD_POLICY(U16),
+ [IFLA_BRIDGE_VLAN_TUNNEL_FLAGS] = BUILD_POLICY(U16),
+};
+
+DEFINE_POLICY_SET(rtnl_bridge_vlan_tunnel_info);
+
+static const NLAPolicy rtnl_bridge_mrp_instance_policies[] = {
+ [IFLA_BRIDGE_MRP_INSTANCE_RING_ID] = BUILD_POLICY(U32),
+ [IFLA_BRIDGE_MRP_INSTANCE_P_IFINDEX] = BUILD_POLICY(U32),
+ [IFLA_BRIDGE_MRP_INSTANCE_S_IFINDEX] = BUILD_POLICY(U32),
+ [IFLA_BRIDGE_MRP_INSTANCE_PRIO] = BUILD_POLICY(U16),
+};
+
+DEFINE_POLICY_SET(rtnl_bridge_mrp_instance);
+
+static const NLAPolicy rtnl_bridge_mrp_port_state_policies[] = {
+ [IFLA_BRIDGE_MRP_PORT_STATE_STATE] = BUILD_POLICY(U32),
+};
+
+DEFINE_POLICY_SET(rtnl_bridge_mrp_port_state);
+
+static const NLAPolicy rtnl_bridge_mrp_port_role_policies[] = {
+ [IFLA_BRIDGE_MRP_PORT_ROLE_ROLE] = BUILD_POLICY(U32),
+};
+
+DEFINE_POLICY_SET(rtnl_bridge_mrp_port_role);
+
+static const NLAPolicy rtnl_bridge_mrp_ring_state_policies[] = {
+ [IFLA_BRIDGE_MRP_RING_STATE_RING_ID] = BUILD_POLICY(U32),
+ [IFLA_BRIDGE_MRP_RING_STATE_STATE] = BUILD_POLICY(U32),
+};
+
+DEFINE_POLICY_SET(rtnl_bridge_mrp_ring_state);
+
+static const NLAPolicy rtnl_bridge_mrp_ring_role_policies[] = {
+ [IFLA_BRIDGE_MRP_RING_ROLE_RING_ID] = BUILD_POLICY(U32),
+ [IFLA_BRIDGE_MRP_RING_ROLE_ROLE] = BUILD_POLICY(U32),
+};
+
+DEFINE_POLICY_SET(rtnl_bridge_mrp_ring_role);
+
+static const NLAPolicy rtnl_bridge_mrp_start_test_policies[] = {
+ [IFLA_BRIDGE_MRP_START_TEST_RING_ID] = BUILD_POLICY(U32),
+ [IFLA_BRIDGE_MRP_START_TEST_INTERVAL] = BUILD_POLICY(U32),
+ [IFLA_BRIDGE_MRP_START_TEST_MAX_MISS] = BUILD_POLICY(U32),
+ [IFLA_BRIDGE_MRP_START_TEST_PERIOD] = BUILD_POLICY(U32),
+ [IFLA_BRIDGE_MRP_START_TEST_MONITOR] = BUILD_POLICY(U32),
+};
+
+DEFINE_POLICY_SET(rtnl_bridge_mrp_start_test);
+
+static const NLAPolicy rtnl_bridge_mrp_info_policies[] = {
+ [IFLA_BRIDGE_MRP_INFO_RING_ID] = BUILD_POLICY(U32),
+ [IFLA_BRIDGE_MRP_INFO_P_IFINDEX] = BUILD_POLICY(U32),
+ [IFLA_BRIDGE_MRP_INFO_S_IFINDEX] = BUILD_POLICY(U32),
+ [IFLA_BRIDGE_MRP_INFO_PRIO] = BUILD_POLICY(U16),
+ [IFLA_BRIDGE_MRP_INFO_RING_STATE] = BUILD_POLICY(U32),
+ [IFLA_BRIDGE_MRP_INFO_RING_ROLE] = BUILD_POLICY(U32),
+ [IFLA_BRIDGE_MRP_INFO_TEST_INTERVAL] = BUILD_POLICY(U32),
+ [IFLA_BRIDGE_MRP_INFO_TEST_MAX_MISS] = BUILD_POLICY(U32),
+ [IFLA_BRIDGE_MRP_INFO_TEST_MONITOR] = BUILD_POLICY(U32),
+ [IFLA_BRIDGE_MRP_INFO_I_IFINDEX] = BUILD_POLICY(U32),
+ [IFLA_BRIDGE_MRP_INFO_IN_STATE] = BUILD_POLICY(U32),
+ [IFLA_BRIDGE_MRP_INFO_IN_ROLE] = BUILD_POLICY(U32),
+ [IFLA_BRIDGE_MRP_INFO_IN_TEST_INTERVAL] = BUILD_POLICY(U32),
+ [IFLA_BRIDGE_MRP_INFO_IN_TEST_MAX_MISS] = BUILD_POLICY(U32),
+};
+
+DEFINE_POLICY_SET(rtnl_bridge_mrp_info);
+
+static const NLAPolicy rtnl_bridge_mrp_in_role_policies[] = {
+ [IFLA_BRIDGE_MRP_IN_ROLE_RING_ID] = BUILD_POLICY(U32),
+ [IFLA_BRIDGE_MRP_IN_ROLE_IN_ID] = BUILD_POLICY(U16),
+ [IFLA_BRIDGE_MRP_IN_ROLE_ROLE] = BUILD_POLICY(U32),
+ [IFLA_BRIDGE_MRP_IN_ROLE_I_IFINDEX] = BUILD_POLICY(U32),
+};
+
+DEFINE_POLICY_SET(rtnl_bridge_mrp_in_role);
+
+static const NLAPolicy rtnl_bridge_mrp_in_state_policies[] = {
+ [IFLA_BRIDGE_MRP_IN_STATE_IN_ID] = BUILD_POLICY(U32),
+ [IFLA_BRIDGE_MRP_IN_STATE_STATE] = BUILD_POLICY(U32),
+};
+
+DEFINE_POLICY_SET(rtnl_bridge_mrp_in_state);
+
+static const NLAPolicy rtnl_bridge_mrp_start_in_test_policies[] = {
+ [IFLA_BRIDGE_MRP_START_IN_TEST_IN_ID] = BUILD_POLICY(U32),
+ [IFLA_BRIDGE_MRP_START_IN_TEST_INTERVAL] = BUILD_POLICY(U32),
+ [IFLA_BRIDGE_MRP_START_IN_TEST_MAX_MISS] = BUILD_POLICY(U32),
+ [IFLA_BRIDGE_MRP_START_IN_TEST_PERIOD] = BUILD_POLICY(U32),
+};
+
+DEFINE_POLICY_SET(rtnl_bridge_mrp_start_in_test);
+
+static const NLAPolicy rtnl_bridge_mrp_policies[] = {
+ [IFLA_BRIDGE_MRP_INSTANCE] = BUILD_POLICY_NESTED(rtnl_bridge_mrp_instance),
+ [IFLA_BRIDGE_MRP_PORT_STATE] = BUILD_POLICY_NESTED(rtnl_bridge_mrp_port_state),
+ [IFLA_BRIDGE_MRP_PORT_ROLE] = BUILD_POLICY_NESTED(rtnl_bridge_mrp_port_role),
+ [IFLA_BRIDGE_MRP_RING_STATE] = BUILD_POLICY_NESTED(rtnl_bridge_mrp_ring_state),
+ [IFLA_BRIDGE_MRP_RING_ROLE] = BUILD_POLICY_NESTED(rtnl_bridge_mrp_ring_role),
+ [IFLA_BRIDGE_MRP_START_TEST] = BUILD_POLICY_NESTED(rtnl_bridge_mrp_start_test),
+ [IFLA_BRIDGE_MRP_INFO] = BUILD_POLICY_NESTED(rtnl_bridge_mrp_info),
+ [IFLA_BRIDGE_MRP_IN_ROLE] = BUILD_POLICY_NESTED(rtnl_bridge_mrp_in_role),
+ [IFLA_BRIDGE_MRP_IN_STATE] = BUILD_POLICY_NESTED(rtnl_bridge_mrp_in_state),
+ [IFLA_BRIDGE_MRP_START_IN_TEST] = BUILD_POLICY_NESTED(rtnl_bridge_mrp_start_in_test),
+};
+
+DEFINE_POLICY_SET(rtnl_bridge_mrp);
+
+static const NLAPolicy rtnl_bridge_cfm_mep_create_policies[] = {
+ [IFLA_BRIDGE_CFM_MEP_CREATE_INSTANCE] = BUILD_POLICY(U32),
+ [IFLA_BRIDGE_CFM_MEP_CREATE_DOMAIN] = BUILD_POLICY(U32),
+ [IFLA_BRIDGE_CFM_MEP_CREATE_DIRECTION] = BUILD_POLICY(U32),
+ [IFLA_BRIDGE_CFM_MEP_CREATE_IFINDEX] = BUILD_POLICY(U32),
+};
+
+DEFINE_POLICY_SET(rtnl_bridge_cfm_mep_create);
+
+static const NLAPolicy rtnl_bridge_cfm_mep_delete_policies[] = {
+ [IFLA_BRIDGE_CFM_MEP_DELETE_INSTANCE] = BUILD_POLICY(U32),
+};
+
+DEFINE_POLICY_SET(rtnl_bridge_cfm_mep_delete);
+
+static const NLAPolicy rtnl_bridge_cfm_mep_config_policies[] = {
+ [IFLA_BRIDGE_CFM_MEP_CONFIG_INSTANCE] = BUILD_POLICY(U32),
+ [IFLA_BRIDGE_CFM_MEP_CONFIG_UNICAST_MAC] = BUILD_POLICY_WITH_SIZE(ETHER_ADDR, ETH_ALEN),
+ [IFLA_BRIDGE_CFM_MEP_CONFIG_MDLEVEL] = BUILD_POLICY(U32),
+ [IFLA_BRIDGE_CFM_MEP_CONFIG_MEPID] = BUILD_POLICY(U32),
+};
+
+DEFINE_POLICY_SET(rtnl_bridge_cfm_mep_config);
+
+static const NLAPolicy rtnl_bridge_cfm_cc_config_policies[] = {
+ [IFLA_BRIDGE_CFM_CC_CONFIG_INSTANCE] = BUILD_POLICY(U32),
+ [IFLA_BRIDGE_CFM_CC_CONFIG_ENABLE] = BUILD_POLICY(U32),
+ [IFLA_BRIDGE_CFM_CC_CONFIG_EXP_INTERVAL] = BUILD_POLICY(U32),
+ [IFLA_BRIDGE_CFM_CC_CONFIG_EXP_MAID] = BUILD_POLICY_WITH_SIZE(BINARY, CFM_MAID_LENGTH),
+};
+
+DEFINE_POLICY_SET(rtnl_bridge_cfm_cc_config);
+
+static const NLAPolicy rtnl_bridge_cfm_cc_peer_mep_policies[] = {
+ [IFLA_BRIDGE_CFM_CC_PEER_MEP_INSTANCE] = BUILD_POLICY(U32),
+ [IFLA_BRIDGE_CFM_CC_PEER_MEPID] = BUILD_POLICY(U32),
+};
+
+DEFINE_POLICY_SET(rtnl_bridge_cfm_cc_peer_mep);
+
+static const NLAPolicy rtnl_bridge_cfm_cc_rdi_policies[] = {
+ [IFLA_BRIDGE_CFM_CC_RDI_INSTANCE] = BUILD_POLICY(U32),
+ [IFLA_BRIDGE_CFM_CC_RDI_RDI] = BUILD_POLICY(U32),
+};
+
+DEFINE_POLICY_SET(rtnl_bridge_cfm_cc_rdi);
+
+static const NLAPolicy rtnl_bridge_cfm_cc_ccm_tx_policies[] = {
+ [IFLA_BRIDGE_CFM_CC_CCM_TX_INSTANCE] = BUILD_POLICY(U32),
+ [IFLA_BRIDGE_CFM_CC_CCM_TX_DMAC] = BUILD_POLICY_WITH_SIZE(ETHER_ADDR, ETH_ALEN),
+ [IFLA_BRIDGE_CFM_CC_CCM_TX_SEQ_NO_UPDATE] = BUILD_POLICY(U32),
+ [IFLA_BRIDGE_CFM_CC_CCM_TX_PERIOD] = BUILD_POLICY(U32),
+ [IFLA_BRIDGE_CFM_CC_CCM_TX_IF_TLV] = BUILD_POLICY(U32),
+ [IFLA_BRIDGE_CFM_CC_CCM_TX_IF_TLV_VALUE] = BUILD_POLICY(U8),
+ [IFLA_BRIDGE_CFM_CC_CCM_TX_PORT_TLV] = BUILD_POLICY(U32),
+ [IFLA_BRIDGE_CFM_CC_CCM_TX_PORT_TLV_VALUE] = BUILD_POLICY(U8),
+};
+
+DEFINE_POLICY_SET(rtnl_bridge_cfm_cc_ccm_tx);
+
+static const NLAPolicy rtnl_bridge_cfm_mep_status_policies[] = {
+ [IFLA_BRIDGE_CFM_MEP_STATUS_INSTANCE] = BUILD_POLICY(U32),
+ [IFLA_BRIDGE_CFM_MEP_STATUS_OPCODE_UNEXP_SEEN] = BUILD_POLICY(U32),
+ [IFLA_BRIDGE_CFM_MEP_STATUS_VERSION_UNEXP_SEEN] = BUILD_POLICY(U32),
+ [IFLA_BRIDGE_CFM_MEP_STATUS_RX_LEVEL_LOW_SEEN] = BUILD_POLICY(U32),
+};
+
+DEFINE_POLICY_SET(rtnl_bridge_cfm_mep_status);
+
+static const NLAPolicy rtnl_bridge_cfm_cc_peer_status_policies[] = {
+ [IFLA_BRIDGE_CFM_CC_PEER_STATUS_INSTANCE] = BUILD_POLICY(U32),
+ [IFLA_BRIDGE_CFM_CC_PEER_STATUS_PEER_MEPID] = BUILD_POLICY(U32),
+ [IFLA_BRIDGE_CFM_CC_PEER_STATUS_CCM_DEFECT] = BUILD_POLICY(U32),
+ [IFLA_BRIDGE_CFM_CC_PEER_STATUS_RDI] = BUILD_POLICY(U32),
+ [IFLA_BRIDGE_CFM_CC_PEER_STATUS_PORT_TLV_VALUE] = BUILD_POLICY(U8),
+ [IFLA_BRIDGE_CFM_CC_PEER_STATUS_IF_TLV_VALUE] = BUILD_POLICY(U8),
+ [IFLA_BRIDGE_CFM_CC_PEER_STATUS_SEEN] = BUILD_POLICY(U32),
+ [IFLA_BRIDGE_CFM_CC_PEER_STATUS_TLV_SEEN] = BUILD_POLICY(U32),
+ [IFLA_BRIDGE_CFM_CC_PEER_STATUS_SEQ_UNEXP_SEEN] = BUILD_POLICY(U32),
+};
+
+DEFINE_POLICY_SET(rtnl_bridge_cfm_cc_peer_status);
+
+static const NLAPolicy rtnl_bridge_cfm_policies[] = {
+ [IFLA_BRIDGE_CFM_MEP_CREATE] = BUILD_POLICY_NESTED(rtnl_bridge_cfm_mep_create),
+ [IFLA_BRIDGE_CFM_MEP_DELETE] = BUILD_POLICY_NESTED(rtnl_bridge_cfm_mep_delete),
+ [IFLA_BRIDGE_CFM_MEP_CONFIG] = BUILD_POLICY_NESTED(rtnl_bridge_cfm_mep_config),
+ [IFLA_BRIDGE_CFM_CC_CONFIG] = BUILD_POLICY_NESTED(rtnl_bridge_cfm_cc_config),
+ [IFLA_BRIDGE_CFM_CC_PEER_MEP_ADD] = BUILD_POLICY_NESTED(rtnl_bridge_cfm_cc_peer_mep),
+ [IFLA_BRIDGE_CFM_CC_PEER_MEP_REMOVE] = BUILD_POLICY_NESTED(rtnl_bridge_cfm_cc_peer_mep),
+ [IFLA_BRIDGE_CFM_CC_RDI] = BUILD_POLICY_NESTED(rtnl_bridge_cfm_cc_rdi),
+ [IFLA_BRIDGE_CFM_CC_CCM_TX] = BUILD_POLICY_NESTED(rtnl_bridge_cfm_cc_ccm_tx),
+ [IFLA_BRIDGE_CFM_MEP_CREATE_INFO] = BUILD_POLICY_NESTED(rtnl_bridge_cfm_mep_create),
+ [IFLA_BRIDGE_CFM_MEP_CONFIG_INFO] = BUILD_POLICY_NESTED(rtnl_bridge_cfm_mep_config),
+ [IFLA_BRIDGE_CFM_CC_CONFIG_INFO] = BUILD_POLICY_NESTED(rtnl_bridge_cfm_cc_config),
+ [IFLA_BRIDGE_CFM_CC_RDI_INFO] = BUILD_POLICY_NESTED(rtnl_bridge_cfm_cc_rdi),
+ [IFLA_BRIDGE_CFM_CC_CCM_TX_INFO] = BUILD_POLICY_NESTED(rtnl_bridge_cfm_cc_ccm_tx),
+ [IFLA_BRIDGE_CFM_CC_PEER_MEP_INFO] = BUILD_POLICY_NESTED(rtnl_bridge_cfm_cc_peer_mep),
+ [IFLA_BRIDGE_CFM_MEP_STATUS_INFO] = BUILD_POLICY_NESTED(rtnl_bridge_cfm_mep_status),
+ [IFLA_BRIDGE_CFM_CC_PEER_STATUS_INFO] = BUILD_POLICY_NESTED(rtnl_bridge_cfm_cc_peer_status),
+};
+
+DEFINE_POLICY_SET(rtnl_bridge_cfm);
+
+static const NLAPolicy rtnl_af_spec_bridge_policies[] = {
+ [IFLA_BRIDGE_FLAGS] = BUILD_POLICY(U16),
+ [IFLA_BRIDGE_MODE] = BUILD_POLICY(U16),
+ [IFLA_BRIDGE_VLAN_INFO] = BUILD_POLICY_WITH_SIZE(BINARY, sizeof(struct bridge_vlan_info)),
+ [IFLA_BRIDGE_VLAN_TUNNEL_INFO] = BUILD_POLICY_NESTED(rtnl_bridge_vlan_tunnel_info),
+ [IFLA_BRIDGE_MRP] = BUILD_POLICY_NESTED(rtnl_bridge_mrp),
+ [IFLA_BRIDGE_CFM] = BUILD_POLICY_NESTED(rtnl_bridge_cfm),
+};
+
+static const NLAPolicySetUnionElement rtnl_af_spec_policy_set_union_elements[] = {
+ BUILD_UNION_ELEMENT_BY_FAMILY(AF_UNSPEC, rtnl_af_spec_unspec),
+ BUILD_UNION_ELEMENT_BY_FAMILY(AF_BRIDGE, rtnl_af_spec_bridge),
+};
+
+DEFINE_POLICY_SET_UNION(rtnl_af_spec, 0);
+
+static const NLAPolicy rtnl_prop_list_policies[] = {
+ [IFLA_ALT_IFNAME] = BUILD_POLICY_WITH_SIZE(STRING, ALTIFNAMSIZ - 1),
+};
+
+DEFINE_POLICY_SET(rtnl_prop_list);
+
+static const NLAPolicy rtnl_vf_vlan_list_policies[] = {
+ [IFLA_VF_VLAN_INFO] = BUILD_POLICY_WITH_SIZE(BINARY, sizeof(struct ifla_vf_vlan_info)),
+};
+
+DEFINE_POLICY_SET(rtnl_vf_vlan_list);
+
+static const NLAPolicy rtnl_vf_info_policies[] = {
+ [IFLA_VF_MAC] = BUILD_POLICY_WITH_SIZE(BINARY, sizeof(struct ifla_vf_mac)),
+ [IFLA_VF_VLAN] = BUILD_POLICY_WITH_SIZE(BINARY, sizeof(struct ifla_vf_vlan)),
+ [IFLA_VF_VLAN_LIST] = BUILD_POLICY_NESTED(rtnl_vf_vlan_list),
+ [IFLA_VF_TX_RATE] = BUILD_POLICY_WITH_SIZE(BINARY, sizeof(struct ifla_vf_tx_rate)),
+ [IFLA_VF_SPOOFCHK] = BUILD_POLICY_WITH_SIZE(BINARY, sizeof(struct ifla_vf_spoofchk)),
+ [IFLA_VF_RATE] = BUILD_POLICY_WITH_SIZE(BINARY, sizeof(struct ifla_vf_rate)),
+ [IFLA_VF_LINK_STATE] = BUILD_POLICY_WITH_SIZE(BINARY, sizeof(struct ifla_vf_link_state)),
+ [IFLA_VF_RSS_QUERY_EN] = BUILD_POLICY_WITH_SIZE(BINARY, sizeof(struct ifla_vf_rss_query_en)),
+ [IFLA_VF_TRUST] = BUILD_POLICY_WITH_SIZE(BINARY, sizeof(struct ifla_vf_trust)),
+ [IFLA_VF_IB_NODE_GUID] = BUILD_POLICY_WITH_SIZE(BINARY, sizeof(struct ifla_vf_guid)),
+ [IFLA_VF_IB_PORT_GUID] = BUILD_POLICY_WITH_SIZE(BINARY, sizeof(struct ifla_vf_guid)),
+};
+
+DEFINE_POLICY_SET(rtnl_vf_info);
+
+static const NLAPolicy rtnl_vfinfo_list_policies[] = {
+ [IFLA_VF_INFO] = BUILD_POLICY_NESTED(rtnl_vf_info),
+};
+
+DEFINE_POLICY_SET(rtnl_vfinfo_list);
+
+static const NLAPolicy rtnl_vf_port_policies[] = {
+ [IFLA_PORT_VF] = BUILD_POLICY(U32),
+ [IFLA_PORT_PROFILE] = BUILD_POLICY(STRING),
+ [IFLA_PORT_VSI_TYPE] = BUILD_POLICY_WITH_SIZE(BINARY, sizeof(struct ifla_port_vsi)),
+ [IFLA_PORT_INSTANCE_UUID] = BUILD_POLICY_WITH_SIZE(BINARY, PORT_UUID_MAX),
+ [IFLA_PORT_HOST_UUID] = BUILD_POLICY_WITH_SIZE(BINARY, PORT_UUID_MAX),
+ [IFLA_PORT_REQUEST] = BUILD_POLICY(U8),
+ [IFLA_PORT_RESPONSE] = BUILD_POLICY(U16),
+};
+
+DEFINE_POLICY_SET(rtnl_vf_port);
+
+static const NLAPolicy rtnl_vf_ports_policies[] = {
+ [IFLA_VF_PORT] = BUILD_POLICY_NESTED(rtnl_vf_port),
+};
+
+DEFINE_POLICY_SET(rtnl_vf_ports);
+
+static const NLAPolicy rtnl_xdp_policies[] = {
+ [IFLA_XDP_FD] = BUILD_POLICY(S32),
+ [IFLA_XDP_ATTACHED] = BUILD_POLICY(U8),
+ [IFLA_XDP_FLAGS] = BUILD_POLICY(U32),
+ [IFLA_XDP_PROG_ID] = BUILD_POLICY(U32),
+ [IFLA_XDP_DRV_PROG_ID] = BUILD_POLICY(U32),
+ [IFLA_XDP_SKB_PROG_ID] = BUILD_POLICY(U32),
+ [IFLA_XDP_HW_PROG_ID] = BUILD_POLICY(U32),
+ [IFLA_XDP_EXPECTED_FD] = BUILD_POLICY(S32),
+};
+
+DEFINE_POLICY_SET(rtnl_xdp);
+
+static const NLAPolicy rtnl_proto_down_reason_policies[] = {
+ [IFLA_PROTO_DOWN_REASON_MASK] = BUILD_POLICY(U32),
+ [IFLA_PROTO_DOWN_REASON_VALUE] = BUILD_POLICY(U32),
+};
+
+DEFINE_POLICY_SET(rtnl_proto_down_reason);
+
+static const NLAPolicy rtnl_link_policies[] = {
+ [IFLA_ADDRESS] = BUILD_POLICY(ETHER_ADDR),
+ [IFLA_BROADCAST] = BUILD_POLICY(ETHER_ADDR),
+ [IFLA_IFNAME] = BUILD_POLICY_WITH_SIZE(STRING, IFNAMSIZ - 1),
+ [IFLA_MTU] = BUILD_POLICY(U32),
+ [IFLA_LINK] = BUILD_POLICY(U32),
+ [IFLA_QDISC] = BUILD_POLICY(STRING),
+ [IFLA_STATS] = BUILD_POLICY_WITH_SIZE(BINARY, sizeof(struct rtnl_link_stats)),
+ [IFLA_COST] = { /* Not used. */ },
+ [IFLA_PRIORITY] = { /* Not used. */ },
+ [IFLA_MASTER] = BUILD_POLICY(U32),
+ [IFLA_WIRELESS] = { /* Used only by wext. */ },
+ [IFLA_PROTINFO] = BUILD_POLICY_NESTED_UNION_BY_FAMILY(rtnl_prot_info),
+ [IFLA_TXQLEN] = BUILD_POLICY(U32),
+ [IFLA_MAP] = BUILD_POLICY_WITH_SIZE(BINARY, sizeof(struct rtnl_link_ifmap)),
+ [IFLA_WEIGHT] = BUILD_POLICY(U32),
+ [IFLA_OPERSTATE] = BUILD_POLICY(U8),
+ [IFLA_LINKMODE] = BUILD_POLICY(U8),
+ [IFLA_LINKINFO] = BUILD_POLICY_NESTED(rtnl_link_info),
+ [IFLA_NET_NS_PID] = BUILD_POLICY(U32),
+ [IFLA_IFALIAS] = BUILD_POLICY_WITH_SIZE(STRING, IFALIASZ - 1),
+ [IFLA_NUM_VF] = BUILD_POLICY(U32),
+ [IFLA_VFINFO_LIST] = BUILD_POLICY_NESTED(rtnl_vfinfo_list),
+ [IFLA_STATS64] = BUILD_POLICY_WITH_SIZE(BINARY, sizeof(struct rtnl_link_stats64)),
+ [IFLA_VF_PORTS] = BUILD_POLICY_NESTED(rtnl_vf_ports),
+ [IFLA_PORT_SELF] = BUILD_POLICY_NESTED(rtnl_vf_port),
+ [IFLA_AF_SPEC] = BUILD_POLICY_NESTED_UNION_BY_FAMILY(rtnl_af_spec),
+ [IFLA_GROUP] = BUILD_POLICY(U32),
+ [IFLA_NET_NS_FD] = BUILD_POLICY(U32),
+ [IFLA_EXT_MASK] = BUILD_POLICY(U32),
+ [IFLA_PROMISCUITY] = BUILD_POLICY(U32),
+ [IFLA_NUM_TX_QUEUES] = BUILD_POLICY(U32),
+ [IFLA_NUM_RX_QUEUES] = BUILD_POLICY(U32),
+ [IFLA_CARRIER] = BUILD_POLICY(U8),
+ [IFLA_PHYS_PORT_ID] = BUILD_POLICY_WITH_SIZE(BINARY, MAX_PHYS_ITEM_ID_LEN),
+ [IFLA_CARRIER_CHANGES] = BUILD_POLICY(U32),
+ [IFLA_PHYS_SWITCH_ID] = BUILD_POLICY_WITH_SIZE(BINARY, MAX_PHYS_ITEM_ID_LEN),
+ [IFLA_LINK_NETNSID] = BUILD_POLICY(S32),
+ [IFLA_PHYS_PORT_NAME] = BUILD_POLICY_WITH_SIZE(STRING, IFNAMSIZ - 1),
+ [IFLA_PROTO_DOWN] = BUILD_POLICY(U8),
+ [IFLA_GSO_MAX_SEGS] = BUILD_POLICY(U32),
+ [IFLA_GSO_MAX_SIZE] = BUILD_POLICY(U32),
+ [IFLA_XDP] = BUILD_POLICY_NESTED(rtnl_xdp),
+ [IFLA_EVENT] = BUILD_POLICY(U32),
+ [IFLA_NEW_NETNSID] = BUILD_POLICY(S32),
+ [IFLA_TARGET_NETNSID] = BUILD_POLICY(S32),
+ [IFLA_CARRIER_UP_COUNT] = BUILD_POLICY(U32),
+ [IFLA_CARRIER_DOWN_COUNT] = BUILD_POLICY(U32),
+ [IFLA_NEW_IFINDEX] = BUILD_POLICY(S32),
+ [IFLA_MIN_MTU] = BUILD_POLICY(U32),
+ [IFLA_MAX_MTU] = BUILD_POLICY(U32),
+ [IFLA_PROP_LIST] = BUILD_POLICY_NESTED(rtnl_prop_list),
+ [IFLA_ALT_IFNAME] = BUILD_POLICY_WITH_SIZE(STRING, ALTIFNAMSIZ - 1),
+ [IFLA_PERM_ADDRESS] = BUILD_POLICY(ETHER_ADDR),
+ [IFLA_PROTO_DOWN_REASON] = BUILD_POLICY_NESTED(rtnl_proto_down_reason),
+ [IFLA_PARENT_DEV_NAME] = BUILD_POLICY(STRING),
+ [IFLA_PARENT_DEV_BUS_NAME] = BUILD_POLICY(STRING),
+};
+
+DEFINE_POLICY_SET(rtnl_link);
+
+/* IFA_FLAGS was defined in kernel 3.14, but we still support older
+ * kernels where IFA_MAX is lower. */
+static const NLAPolicy rtnl_address_policies[] = {
+ [IFA_ADDRESS] = BUILD_POLICY(IN_ADDR),
+ [IFA_LOCAL] = BUILD_POLICY(IN_ADDR),
+ [IFA_LABEL] = BUILD_POLICY_WITH_SIZE(STRING, IFNAMSIZ - 1),
+ [IFA_BROADCAST] = BUILD_POLICY(IN_ADDR),
+ [IFA_ANYCAST] = BUILD_POLICY_WITH_SIZE(IN_ADDR, sizeof(struct in6_addr)),
+ [IFA_CACHEINFO] = BUILD_POLICY_WITH_SIZE(CACHE_INFO, sizeof(struct ifa_cacheinfo)),
+ [IFA_MULTICAST] = BUILD_POLICY_WITH_SIZE(IN_ADDR, sizeof(struct in6_addr)),
+ [IFA_FLAGS] = BUILD_POLICY(U32),
+ [IFA_RT_PRIORITY] = BUILD_POLICY(U32),
+ [IFA_TARGET_NETNSID] = BUILD_POLICY(S32),
+};
+
+DEFINE_POLICY_SET(rtnl_address);
+
+/* RTM_METRICS --- array of struct rtattr with types of RTAX_* */
+
+static const NLAPolicy rtnl_route_metrics_policies[] = {
+ [RTAX_MTU] = BUILD_POLICY(U32),
+ [RTAX_WINDOW] = BUILD_POLICY(U32),
+ [RTAX_RTT] = BUILD_POLICY(U32),
+ [RTAX_RTTVAR] = BUILD_POLICY(U32),
+ [RTAX_SSTHRESH] = BUILD_POLICY(U32),
+ [RTAX_CWND] = BUILD_POLICY(U32),
+ [RTAX_ADVMSS] = BUILD_POLICY(U32),
+ [RTAX_REORDERING] = BUILD_POLICY(U32),
+ [RTAX_HOPLIMIT] = BUILD_POLICY(U32),
+ [RTAX_INITCWND] = BUILD_POLICY(U32),
+ [RTAX_FEATURES] = BUILD_POLICY(U32),
+ [RTAX_RTO_MIN] = BUILD_POLICY(U32),
+ [RTAX_INITRWND] = BUILD_POLICY(U32),
+ [RTAX_QUICKACK] = BUILD_POLICY(U32),
+ [RTAX_CC_ALGO] = BUILD_POLICY(STRING),
+ [RTAX_FASTOPEN_NO_COOKIE] = BUILD_POLICY(U32),
+};
+
+DEFINE_POLICY_SET(rtnl_route_metrics);
+
+static const NLAPolicy rtnl_route_policies[] = {
+ [RTA_DST] = BUILD_POLICY(IN_ADDR),
+ [RTA_SRC] = BUILD_POLICY(IN_ADDR),
+ [RTA_IIF] = BUILD_POLICY(U32),
+ [RTA_OIF] = BUILD_POLICY(U32),
+ [RTA_GATEWAY] = BUILD_POLICY(IN_ADDR),
+ [RTA_PRIORITY] = BUILD_POLICY(U32),
+ [RTA_PREFSRC] = BUILD_POLICY(IN_ADDR),
+ [RTA_METRICS] = BUILD_POLICY_NESTED(rtnl_route_metrics),
+ [RTA_MULTIPATH] = BUILD_POLICY_WITH_SIZE(BINARY, sizeof(struct rtnexthop)),
+ [RTA_FLOW] = BUILD_POLICY(U32),
+ [RTA_CACHEINFO] = BUILD_POLICY_WITH_SIZE(BINARY, sizeof(struct rta_cacheinfo)),
+ [RTA_TABLE] = BUILD_POLICY(U32),
+ [RTA_MARK] = BUILD_POLICY(U32),
+ [RTA_MFC_STATS] = BUILD_POLICY(U64),
+ [RTA_VIA] = BUILD_POLICY(BINARY), /* See struct rtvia */
+ [RTA_NEWDST] = BUILD_POLICY(U32),
+ [RTA_PREF] = BUILD_POLICY(U8),
+ [RTA_ENCAP_TYPE] = BUILD_POLICY(U16),
+ [RTA_ENCAP] = { .type = NETLINK_TYPE_NESTED }, /* Multiple type systems i.e. LWTUNNEL_ENCAP_MPLS/LWTUNNEL_ENCAP_IP/LWTUNNEL_ENCAP_ILA etc... */
+ [RTA_EXPIRES] = BUILD_POLICY(U32),
+ [RTA_UID] = BUILD_POLICY(U32),
+ [RTA_TTL_PROPAGATE] = BUILD_POLICY(U8),
+ [RTA_IP_PROTO] = BUILD_POLICY(U8),
+ [RTA_SPORT] = BUILD_POLICY(U16),
+ [RTA_DPORT] = BUILD_POLICY(U16),
+ [RTA_NH_ID] = BUILD_POLICY(U32),
+};
+
+DEFINE_POLICY_SET(rtnl_route);
+
+static const NLAPolicy rtnl_neigh_policies[] = {
+ [NDA_DST] = BUILD_POLICY(IN_ADDR),
+ [NDA_LLADDR] = BUILD_POLICY(ETHER_ADDR),
+ [NDA_CACHEINFO] = BUILD_POLICY_WITH_SIZE(BINARY, sizeof(struct nda_cacheinfo)),
+ [NDA_PROBES] = BUILD_POLICY(U32),
+ [NDA_VLAN] = BUILD_POLICY(U16),
+ [NDA_PORT] = BUILD_POLICY(U16),
+ [NDA_VNI] = BUILD_POLICY(U32),
+ [NDA_IFINDEX] = BUILD_POLICY(U32),
+};
+
+DEFINE_POLICY_SET(rtnl_neigh);
+
+static const NLAPolicy rtnl_addrlabel_policies[] = {
+ [IFAL_ADDRESS] = BUILD_POLICY_WITH_SIZE(IN_ADDR, sizeof(struct in6_addr)),
+ [IFAL_LABEL] = BUILD_POLICY(U32),
+};
+
+DEFINE_POLICY_SET(rtnl_addrlabel);
+
+static const NLAPolicy rtnl_routing_policy_rule_policies[] = {
+ [FRA_DST] = BUILD_POLICY(IN_ADDR),
+ [FRA_SRC] = BUILD_POLICY(IN_ADDR),
+ [FRA_IIFNAME] = BUILD_POLICY(STRING),
+ [FRA_GOTO] = BUILD_POLICY(U32),
+ [FRA_PRIORITY] = BUILD_POLICY(U32),
+ [FRA_FWMARK] = BUILD_POLICY(U32),
+ [FRA_FLOW] = BUILD_POLICY(U32),
+ [FRA_TUN_ID] = BUILD_POLICY(U64),
+ [FRA_SUPPRESS_IFGROUP] = BUILD_POLICY(U32),
+ [FRA_SUPPRESS_PREFIXLEN] = BUILD_POLICY(U32),
+ [FRA_TABLE] = BUILD_POLICY(U32),
+ [FRA_FWMASK] = BUILD_POLICY(U32),
+ [FRA_OIFNAME] = BUILD_POLICY(STRING),
+ [FRA_PAD] = BUILD_POLICY(U32),
+ [FRA_L3MDEV] = BUILD_POLICY(U8),
+ [FRA_UID_RANGE] = BUILD_POLICY_WITH_SIZE(BINARY, sizeof(struct fib_rule_uid_range)),
+ [FRA_PROTOCOL] = BUILD_POLICY(U8),
+ [FRA_IP_PROTO] = BUILD_POLICY(U8),
+ [FRA_SPORT_RANGE] = BUILD_POLICY_WITH_SIZE(BINARY, sizeof(struct fib_rule_port_range)),
+ [FRA_DPORT_RANGE] = BUILD_POLICY_WITH_SIZE(BINARY, sizeof(struct fib_rule_port_range)),
+};
+
+DEFINE_POLICY_SET(rtnl_routing_policy_rule);
+
+static const NLAPolicy rtnl_nexthop_policies[] = {
+ [NHA_ID] = BUILD_POLICY(U32),
+ [NHA_GROUP] = { /* array of struct nexthop_grp */ },
+ [NHA_GROUP_TYPE] = BUILD_POLICY(U16),
+ [NHA_BLACKHOLE] = BUILD_POLICY(FLAG),
+ [NHA_OIF] = BUILD_POLICY(U32),
+ [NHA_GATEWAY] = BUILD_POLICY(IN_ADDR),
+ [NHA_ENCAP_TYPE] = BUILD_POLICY(U16),
+ [NHA_ENCAP] = { .type = NETLINK_TYPE_NESTED },
+ [NHA_GROUPS] = BUILD_POLICY(FLAG),
+ [NHA_MASTER] = BUILD_POLICY(U32),
+ [NHA_FDB] = BUILD_POLICY(FLAG),
+};
+
+DEFINE_POLICY_SET(rtnl_nexthop);
+
+static const NLAPolicy rtnl_tca_option_data_cake_policies[] = {
+ [TCA_CAKE_BASE_RATE64] = BUILD_POLICY(U64),
+ [TCA_CAKE_DIFFSERV_MODE] = BUILD_POLICY(U32),
+ [TCA_CAKE_ATM] = BUILD_POLICY(U32),
+ [TCA_CAKE_FLOW_MODE] = BUILD_POLICY(U32),
+ [TCA_CAKE_OVERHEAD] = BUILD_POLICY(S32),
+ [TCA_CAKE_RTT] = BUILD_POLICY(U32),
+ [TCA_CAKE_TARGET] = BUILD_POLICY(U32),
+ [TCA_CAKE_AUTORATE] = BUILD_POLICY(U32),
+ [TCA_CAKE_MEMORY] = BUILD_POLICY(U32),
+ [TCA_CAKE_NAT] = BUILD_POLICY(U32),
+ [TCA_CAKE_RAW] = BUILD_POLICY(U32),
+ [TCA_CAKE_WASH] = BUILD_POLICY(U32),
+ [TCA_CAKE_MPU] = BUILD_POLICY(U32),
+ [TCA_CAKE_INGRESS] = BUILD_POLICY(U32),
+ [TCA_CAKE_ACK_FILTER] = BUILD_POLICY(U32),
+ [TCA_CAKE_SPLIT_GSO] = BUILD_POLICY(U32),
+ [TCA_CAKE_FWMARK] = BUILD_POLICY(U32),
+};
+
+static const NLAPolicy rtnl_tca_option_data_codel_policies[] = {
+ [TCA_CODEL_TARGET] = BUILD_POLICY(U32),
+ [TCA_CODEL_LIMIT] = BUILD_POLICY(U32),
+ [TCA_CODEL_INTERVAL] = BUILD_POLICY(U32),
+ [TCA_CODEL_ECN] = BUILD_POLICY(U32),
+ [TCA_CODEL_CE_THRESHOLD] = BUILD_POLICY(U32),
+};
+
+static const NLAPolicy rtnl_tca_option_data_drr_policies[] = {
+ [TCA_DRR_QUANTUM] = BUILD_POLICY(U32),
+};
+
+static const NLAPolicy rtnl_tca_option_data_ets_quanta_policies[] = {
+ [TCA_ETS_QUANTA_BAND] = BUILD_POLICY(U32),
+};
+
+DEFINE_POLICY_SET(rtnl_tca_option_data_ets_quanta);
+
+static const NLAPolicy rtnl_tca_option_data_ets_prio_policies[] = {
+ [TCA_ETS_PRIOMAP_BAND] = BUILD_POLICY(U8),
+};
+
+DEFINE_POLICY_SET(rtnl_tca_option_data_ets_prio);
+
+static const NLAPolicy rtnl_tca_option_data_ets_policies[] = {
+ [TCA_ETS_NBANDS] = BUILD_POLICY(U8),
+ [TCA_ETS_NSTRICT] = BUILD_POLICY(U8),
+ [TCA_ETS_QUANTA] = BUILD_POLICY_NESTED(rtnl_tca_option_data_ets_quanta),
+ [TCA_ETS_PRIOMAP] = BUILD_POLICY_NESTED(rtnl_tca_option_data_ets_prio),
+ [TCA_ETS_QUANTA_BAND] = BUILD_POLICY(U32),
+};
+
+static const NLAPolicy rtnl_tca_option_data_fq_policies[] = {
+ [TCA_FQ_PLIMIT] = BUILD_POLICY(U32),
+ [TCA_FQ_FLOW_PLIMIT] = BUILD_POLICY(U32),
+ [TCA_FQ_QUANTUM] = BUILD_POLICY(U32),
+ [TCA_FQ_INITIAL_QUANTUM] = BUILD_POLICY(U32),
+ [TCA_FQ_RATE_ENABLE] = BUILD_POLICY(U32),
+ [TCA_FQ_FLOW_DEFAULT_RATE] = BUILD_POLICY(U32),
+ [TCA_FQ_FLOW_MAX_RATE] = BUILD_POLICY(U32),
+ [TCA_FQ_BUCKETS_LOG] = BUILD_POLICY(U32),
+ [TCA_FQ_FLOW_REFILL_DELAY] = BUILD_POLICY(U32),
+ [TCA_FQ_LOW_RATE_THRESHOLD] = BUILD_POLICY(U32),
+ [TCA_FQ_CE_THRESHOLD] = BUILD_POLICY(U32),
+ [TCA_FQ_ORPHAN_MASK] = BUILD_POLICY(U32),
+};
+
+static const NLAPolicy rtnl_tca_option_data_fq_codel_policies[] = {
+ [TCA_FQ_CODEL_TARGET] = BUILD_POLICY(U32),
+ [TCA_FQ_CODEL_LIMIT] = BUILD_POLICY(U32),
+ [TCA_FQ_CODEL_INTERVAL] = BUILD_POLICY(U32),
+ [TCA_FQ_CODEL_ECN] = BUILD_POLICY(U32),
+ [TCA_FQ_CODEL_FLOWS] = BUILD_POLICY(U32),
+ [TCA_FQ_CODEL_QUANTUM] = BUILD_POLICY(U32),
+ [TCA_FQ_CODEL_CE_THRESHOLD] = BUILD_POLICY(U32),
+ [TCA_FQ_CODEL_DROP_BATCH_SIZE] = BUILD_POLICY(U32),
+ [TCA_FQ_CODEL_MEMORY_LIMIT] = BUILD_POLICY(U32),
+};
+
+static const NLAPolicy rtnl_tca_option_data_fq_pie_policies[] = {
+ [TCA_FQ_PIE_LIMIT] = BUILD_POLICY(U32),
+};
+
+static const NLAPolicy rtnl_tca_option_data_gred_policies[] = {
+ [TCA_GRED_DPS] = BUILD_POLICY_WITH_SIZE(BINARY, sizeof(struct tc_gred_sopt)),
+};
+
+static const NLAPolicy rtnl_tca_option_data_hhf_policies[] = {
+ [TCA_HHF_BACKLOG_LIMIT] = BUILD_POLICY(U32),
+};
+
+static const NLAPolicy rtnl_tca_option_data_htb_policies[] = {
+ [TCA_HTB_PARMS] = BUILD_POLICY_WITH_SIZE(BINARY, sizeof(struct tc_htb_opt)),
+ [TCA_HTB_INIT] = BUILD_POLICY_WITH_SIZE(BINARY, sizeof(struct tc_htb_glob)),
+ [TCA_HTB_CTAB] = BUILD_POLICY_WITH_SIZE(BINARY, TC_RTAB_SIZE),
+ [TCA_HTB_RTAB] = BUILD_POLICY_WITH_SIZE(BINARY, TC_RTAB_SIZE),
+ [TCA_HTB_RATE64] = BUILD_POLICY(U64),
+ [TCA_HTB_CEIL64] = BUILD_POLICY(U64),
+};
+
+static const NLAPolicy rtnl_tca_option_data_pie_policies[] = {
+ [TCA_PIE_LIMIT] = BUILD_POLICY(U32),
+};
+
+static const NLAPolicy rtnl_tca_option_data_qfq_policies[] = {
+ [TCA_QFQ_WEIGHT] = BUILD_POLICY(U32),
+ [TCA_QFQ_LMAX] = BUILD_POLICY(U32),
+};
+
+static const NLAPolicy rtnl_tca_option_data_sfb_policies[] = {
+ [TCA_SFB_PARMS] = BUILD_POLICY_WITH_SIZE(BINARY, sizeof(struct tc_sfb_qopt)),
+};
+
+static const NLAPolicy rtnl_tca_option_data_tbf_policies[] = {
+ [TCA_TBF_PARMS] = BUILD_POLICY_WITH_SIZE(BINARY, sizeof(struct tc_tbf_qopt)),
+ [TCA_TBF_RTAB] = BUILD_POLICY_WITH_SIZE(BINARY, TC_RTAB_SIZE),
+ [TCA_TBF_PTAB] = BUILD_POLICY_WITH_SIZE(BINARY, TC_RTAB_SIZE),
+ [TCA_TBF_RATE64] = BUILD_POLICY(U64),
+ [TCA_TBF_PRATE64] = BUILD_POLICY(U64),
+ [TCA_TBF_BURST] = BUILD_POLICY(U32),
+ [TCA_TBF_PBURST] = BUILD_POLICY(U32),
+};
+
+static const NLAPolicySetUnionElement rtnl_tca_option_data_policy_set_union_elements[] = {
+ BUILD_UNION_ELEMENT_BY_STRING("cake", rtnl_tca_option_data_cake),
+ BUILD_UNION_ELEMENT_BY_STRING("codel", rtnl_tca_option_data_codel),
+ BUILD_UNION_ELEMENT_BY_STRING("drr", rtnl_tca_option_data_drr),
+ BUILD_UNION_ELEMENT_BY_STRING("ets", rtnl_tca_option_data_ets),
+ BUILD_UNION_ELEMENT_BY_STRING("fq", rtnl_tca_option_data_fq),
+ BUILD_UNION_ELEMENT_BY_STRING("fq_codel", rtnl_tca_option_data_fq_codel),
+ BUILD_UNION_ELEMENT_BY_STRING("fq_pie", rtnl_tca_option_data_fq_pie),
+ BUILD_UNION_ELEMENT_BY_STRING("gred", rtnl_tca_option_data_gred),
+ BUILD_UNION_ELEMENT_BY_STRING("hhf", rtnl_tca_option_data_hhf),
+ BUILD_UNION_ELEMENT_BY_STRING("htb", rtnl_tca_option_data_htb),
+ BUILD_UNION_ELEMENT_BY_STRING("pie", rtnl_tca_option_data_pie),
+ BUILD_UNION_ELEMENT_BY_STRING("qfq", rtnl_tca_option_data_qfq),
+ BUILD_UNION_ELEMENT_BY_STRING("sfb", rtnl_tca_option_data_sfb),
+ BUILD_UNION_ELEMENT_BY_STRING("tbf", rtnl_tca_option_data_tbf),
+};
+
+DEFINE_POLICY_SET_UNION(rtnl_tca_option_data, TCA_KIND);
+
+static const NLAPolicy rtnl_tca_policies[] = {
+ [TCA_KIND] = BUILD_POLICY(STRING),
+ [TCA_OPTIONS] = BUILD_POLICY_NESTED_UNION_BY_STRING(rtnl_tca_option_data),
+ [TCA_INGRESS_BLOCK] = BUILD_POLICY(U32),
+ [TCA_EGRESS_BLOCK] = BUILD_POLICY(U32),
+};
+
+DEFINE_POLICY_SET(rtnl_tca);
+
+static const NLAPolicy rtnl_mdb_policies[] = {
+ [MDBA_SET_ENTRY] = BUILD_POLICY_WITH_SIZE(BINARY, sizeof(struct br_port_msg)),
+};
+
+DEFINE_POLICY_SET(rtnl_mdb);
+
+static const NLAPolicy rtnl_policies[] = {
+ [RTM_NEWLINK] = BUILD_POLICY_NESTED_WITH_SIZE(rtnl_link, sizeof(struct ifinfomsg)),
+ [RTM_DELLINK] = BUILD_POLICY_NESTED_WITH_SIZE(rtnl_link, sizeof(struct ifinfomsg)),
+ [RTM_GETLINK] = BUILD_POLICY_NESTED_WITH_SIZE(rtnl_link, sizeof(struct ifinfomsg)),
+ [RTM_SETLINK] = BUILD_POLICY_NESTED_WITH_SIZE(rtnl_link, sizeof(struct ifinfomsg)),
+ [RTM_NEWLINKPROP] = BUILD_POLICY_NESTED_WITH_SIZE(rtnl_link, sizeof(struct ifinfomsg)),
+ [RTM_DELLINKPROP] = BUILD_POLICY_NESTED_WITH_SIZE(rtnl_link, sizeof(struct ifinfomsg)),
+ [RTM_GETLINKPROP] = BUILD_POLICY_NESTED_WITH_SIZE(rtnl_link, sizeof(struct ifinfomsg)),
+ [RTM_NEWADDR] = BUILD_POLICY_NESTED_WITH_SIZE(rtnl_address, sizeof(struct ifaddrmsg)),
+ [RTM_DELADDR] = BUILD_POLICY_NESTED_WITH_SIZE(rtnl_address, sizeof(struct ifaddrmsg)),
+ [RTM_GETADDR] = BUILD_POLICY_NESTED_WITH_SIZE(rtnl_address, sizeof(struct ifaddrmsg)),
+ [RTM_NEWROUTE] = BUILD_POLICY_NESTED_WITH_SIZE(rtnl_route, sizeof(struct rtmsg)),
+ [RTM_DELROUTE] = BUILD_POLICY_NESTED_WITH_SIZE(rtnl_route, sizeof(struct rtmsg)),
+ [RTM_GETROUTE] = BUILD_POLICY_NESTED_WITH_SIZE(rtnl_route, sizeof(struct rtmsg)),
+ [RTM_NEWNEIGH] = BUILD_POLICY_NESTED_WITH_SIZE(rtnl_neigh, sizeof(struct ndmsg)),
+ [RTM_DELNEIGH] = BUILD_POLICY_NESTED_WITH_SIZE(rtnl_neigh, sizeof(struct ndmsg)),
+ [RTM_GETNEIGH] = BUILD_POLICY_NESTED_WITH_SIZE(rtnl_neigh, sizeof(struct ndmsg)),
+ [RTM_NEWADDRLABEL] = BUILD_POLICY_NESTED_WITH_SIZE(rtnl_addrlabel, sizeof(struct ifaddrlblmsg)),
+ [RTM_DELADDRLABEL] = BUILD_POLICY_NESTED_WITH_SIZE(rtnl_addrlabel, sizeof(struct ifaddrlblmsg)),
+ [RTM_GETADDRLABEL] = BUILD_POLICY_NESTED_WITH_SIZE(rtnl_addrlabel, sizeof(struct ifaddrlblmsg)),
+ [RTM_NEWRULE] = BUILD_POLICY_NESTED_WITH_SIZE(rtnl_routing_policy_rule, sizeof(struct fib_rule_hdr)),
+ [RTM_DELRULE] = BUILD_POLICY_NESTED_WITH_SIZE(rtnl_routing_policy_rule, sizeof(struct fib_rule_hdr)),
+ [RTM_GETRULE] = BUILD_POLICY_NESTED_WITH_SIZE(rtnl_routing_policy_rule, sizeof(struct fib_rule_hdr)),
+ [RTM_NEWNEXTHOP] = BUILD_POLICY_NESTED_WITH_SIZE(rtnl_nexthop, sizeof(struct nhmsg)),
+ [RTM_DELNEXTHOP] = BUILD_POLICY_NESTED_WITH_SIZE(rtnl_nexthop, sizeof(struct nhmsg)),
+ [RTM_GETNEXTHOP] = BUILD_POLICY_NESTED_WITH_SIZE(rtnl_nexthop, sizeof(struct nhmsg)),
+ [RTM_NEWQDISC] = BUILD_POLICY_NESTED_WITH_SIZE(rtnl_tca, sizeof(struct tcmsg)),
+ [RTM_DELQDISC] = BUILD_POLICY_NESTED_WITH_SIZE(rtnl_tca, sizeof(struct tcmsg)),
+ [RTM_GETQDISC] = BUILD_POLICY_NESTED_WITH_SIZE(rtnl_tca, sizeof(struct tcmsg)),
+ [RTM_NEWTCLASS] = BUILD_POLICY_NESTED_WITH_SIZE(rtnl_tca, sizeof(struct tcmsg)),
+ [RTM_DELTCLASS] = BUILD_POLICY_NESTED_WITH_SIZE(rtnl_tca, sizeof(struct tcmsg)),
+ [RTM_GETTCLASS] = BUILD_POLICY_NESTED_WITH_SIZE(rtnl_tca, sizeof(struct tcmsg)),
+ [RTM_NEWMDB] = BUILD_POLICY_NESTED_WITH_SIZE(rtnl_mdb, sizeof(struct br_port_msg)),
+ [RTM_DELMDB] = BUILD_POLICY_NESTED_WITH_SIZE(rtnl_mdb, sizeof(struct br_port_msg)),
+ [RTM_GETMDB] = BUILD_POLICY_NESTED_WITH_SIZE(rtnl_mdb, sizeof(struct br_port_msg)),
+};
+
+DEFINE_POLICY_SET(rtnl);
+
+const NLAPolicy *rtnl_get_policy(uint16_t nlmsg_type) {
+ return policy_set_get_policy(&rtnl_policy_set, nlmsg_type);
+}
diff --git a/src/libsystemd/sd-netlink/netlink-types.c b/src/libsystemd/sd-netlink/netlink-types.c
new file mode 100644
index 0000000..21ef80c
--- /dev/null
+++ b/src/libsystemd/sd-netlink/netlink-types.c
@@ -0,0 +1,153 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <linux/netlink.h>
+
+#include "netlink-genl.h"
+#include "netlink-internal.h"
+#include "netlink-types-internal.h"
+
+static const NLAPolicy empty_policies[1] = {
+ /* fake array to avoid .types==NULL, which denotes invalid type-systems */
+};
+
+DEFINE_POLICY_SET(empty);
+
+static const NLAPolicy error_policies[] = {
+ [NLMSGERR_ATTR_MSG] = BUILD_POLICY(STRING),
+ [NLMSGERR_ATTR_OFFS] = BUILD_POLICY(U32),
+};
+
+DEFINE_POLICY_SET(error);
+
+static const NLAPolicy basic_policies[] = {
+ [NLMSG_DONE] = BUILD_POLICY_NESTED(empty),
+ [NLMSG_ERROR] = BUILD_POLICY_NESTED_WITH_SIZE(error, sizeof(struct nlmsgerr)),
+};
+
+DEFINE_POLICY_SET(basic);
+
+NLAType policy_get_type(const NLAPolicy *policy) {
+ return ASSERT_PTR(policy)->type;
+}
+
+size_t policy_get_size(const NLAPolicy *policy) {
+ return ASSERT_PTR(policy)->size;
+}
+
+const NLAPolicySet *policy_get_policy_set(const NLAPolicy *policy) {
+ assert(policy);
+ assert(policy->type == NETLINK_TYPE_NESTED);
+
+ return ASSERT_PTR(policy->policy_set);
+}
+
+const NLAPolicySetUnion *policy_get_policy_set_union(const NLAPolicy *policy) {
+ assert(policy);
+ assert(IN_SET(policy->type, NETLINK_TYPE_NESTED_UNION_BY_STRING, NETLINK_TYPE_NESTED_UNION_BY_FAMILY));
+
+ return ASSERT_PTR(policy->policy_set_union);
+}
+
+int netlink_get_policy_set_and_header_size(
+ sd_netlink *nl,
+ uint16_t type,
+ const NLAPolicySet **ret_policy_set,
+ size_t *ret_header_size) {
+
+ const NLAPolicy *policy;
+
+ assert(nl);
+
+ if (IN_SET(type, NLMSG_DONE, NLMSG_ERROR))
+ policy = policy_set_get_policy(&basic_policy_set, type);
+ else
+ switch (nl->protocol) {
+ case NETLINK_ROUTE:
+ policy = rtnl_get_policy(type);
+ break;
+ case NETLINK_NETFILTER:
+ policy = nfnl_get_policy(type);
+ break;
+ case NETLINK_GENERIC:
+ return genl_get_policy_set_and_header_size(nl, type, ret_policy_set, ret_header_size);
+ default:
+ return -EOPNOTSUPP;
+ }
+ if (!policy)
+ return -EOPNOTSUPP;
+
+ if (policy_get_type(policy) != NETLINK_TYPE_NESTED)
+ return -EOPNOTSUPP;
+
+ if (ret_policy_set)
+ *ret_policy_set = policy_get_policy_set(policy);
+ if (ret_header_size)
+ *ret_header_size = policy_get_size(policy);
+ return 0;
+}
+
+const NLAPolicy *policy_set_get_policy(const NLAPolicySet *policy_set, uint16_t attr_type) {
+ const NLAPolicy *policy;
+
+ assert(policy_set);
+ assert(policy_set->policies);
+
+ if (attr_type >= policy_set->count)
+ return NULL;
+
+ policy = &policy_set->policies[attr_type];
+
+ if (policy->type == NETLINK_TYPE_UNSPEC)
+ return NULL;
+
+ return policy;
+}
+
+const NLAPolicySet *policy_set_get_policy_set(const NLAPolicySet *policy_set, uint16_t attr_type) {
+ const NLAPolicy *policy;
+
+ policy = policy_set_get_policy(policy_set, attr_type);
+ if (!policy)
+ return NULL;
+
+ return policy_get_policy_set(policy);
+}
+
+const NLAPolicySetUnion *policy_set_get_policy_set_union(const NLAPolicySet *policy_set, uint16_t attr_type) {
+ const NLAPolicy *policy;
+
+ policy = policy_set_get_policy(policy_set, attr_type);
+ if (!policy)
+ return NULL;
+
+ return policy_get_policy_set_union(policy);
+}
+
+uint16_t policy_set_union_get_match_attribute(const NLAPolicySetUnion *policy_set_union) {
+ assert(policy_set_union->match_attribute != 0);
+
+ return policy_set_union->match_attribute;
+}
+
+const NLAPolicySet *policy_set_union_get_policy_set_by_string(const NLAPolicySetUnion *policy_set_union, const char *string) {
+ assert(policy_set_union);
+ assert(policy_set_union->elements);
+ assert(string);
+
+ for (size_t i = 0; i < policy_set_union->count; i++)
+ if (streq(policy_set_union->elements[i].string, string))
+ return &policy_set_union->elements[i].policy_set;
+
+ return NULL;
+}
+
+const NLAPolicySet *policy_set_union_get_policy_set_by_family(const NLAPolicySetUnion *policy_set_union, int family) {
+ assert(policy_set_union);
+ assert(policy_set_union->elements);
+
+ for (size_t i = 0; i < policy_set_union->count; i++)
+ if (policy_set_union->elements[i].family == family)
+ return &policy_set_union->elements[i].policy_set;
+
+ return NULL;
+}
diff --git a/src/libsystemd/sd-netlink/netlink-types.h b/src/libsystemd/sd-netlink/netlink-types.h
new file mode 100644
index 0000000..e034a98
--- /dev/null
+++ b/src/libsystemd/sd-netlink/netlink-types.h
@@ -0,0 +1,63 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <errno.h>
+
+#include "sd-netlink.h"
+
+typedef enum NLAType {
+ NETLINK_TYPE_UNSPEC, /* NLA_UNSPEC */
+ NETLINK_TYPE_BINARY, /* NLA_BINARY */
+ NETLINK_TYPE_FLAG, /* NLA_FLAG */
+ NETLINK_TYPE_U8, /* NLA_U8 */
+ NETLINK_TYPE_U16, /* NLA_U16 */
+ NETLINK_TYPE_U32, /* NLA_U32 */
+ NETLINK_TYPE_U64, /* NLA_U64 */
+ NETLINK_TYPE_S8, /* NLA_S8 */
+ NETLINK_TYPE_S16, /* NLA_S16 */
+ NETLINK_TYPE_S32, /* NLA_S32 */
+ NETLINK_TYPE_S64, /* NLA_S64 */
+ NETLINK_TYPE_STRING, /* NLA_STRING */
+ NETLINK_TYPE_BITFIELD32, /* NLA_BITFIELD32 */
+ NETLINK_TYPE_REJECT, /* NLA_REJECT */
+ NETLINK_TYPE_IN_ADDR,
+ NETLINK_TYPE_ETHER_ADDR,
+ NETLINK_TYPE_CACHE_INFO,
+ NETLINK_TYPE_SOCKADDR,
+ NETLINK_TYPE_NESTED, /* NLA_NESTED */
+ NETLINK_TYPE_NESTED_UNION_BY_STRING,
+ NETLINK_TYPE_NESTED_UNION_BY_FAMILY,
+ _NETLINK_TYPE_MAX,
+ _NETLINK_TYPE_INVALID = -EINVAL,
+} NLAType;
+
+typedef struct NLAPolicy NLAPolicy;
+typedef struct NLAPolicySet NLAPolicySet;
+typedef struct NLAPolicySetUnion NLAPolicySetUnion;
+
+const NLAPolicy *rtnl_get_policy(uint16_t nlmsg_type);
+const NLAPolicy *nfnl_get_policy(uint16_t nlmsg_type);
+const NLAPolicySet *genl_get_policy_set_by_name(const char *name);
+int genl_get_policy_set_and_header_size(
+ sd_netlink *nl,
+ uint16_t id,
+ const NLAPolicySet **ret_policy_set,
+ size_t *ret_header_size);
+
+NLAType policy_get_type(const NLAPolicy *policy);
+size_t policy_get_size(const NLAPolicy *policy);
+const NLAPolicySet *policy_get_policy_set(const NLAPolicy *policy);
+const NLAPolicySetUnion *policy_get_policy_set_union(const NLAPolicy *policy);
+
+int netlink_get_policy_set_and_header_size(
+ sd_netlink *nl,
+ uint16_t type,
+ const NLAPolicySet **ret_policy_set,
+ size_t *ret_header_size);
+
+const NLAPolicy *policy_set_get_policy(const NLAPolicySet *policy_set, uint16_t attr_type);
+const NLAPolicySet *policy_set_get_policy_set(const NLAPolicySet *type_system, uint16_t attr_type);
+const NLAPolicySetUnion *policy_set_get_policy_set_union(const NLAPolicySet *type_system, uint16_t attr_type);
+uint16_t policy_set_union_get_match_attribute(const NLAPolicySetUnion *policy_set_union);
+const NLAPolicySet *policy_set_union_get_policy_set_by_string(const NLAPolicySetUnion *type_system_union, const char *string);
+const NLAPolicySet *policy_set_union_get_policy_set_by_family(const NLAPolicySetUnion *type_system_union, int family);
diff --git a/src/libsystemd/sd-netlink/netlink-util.c b/src/libsystemd/sd-netlink/netlink-util.c
new file mode 100644
index 0000000..832159a
--- /dev/null
+++ b/src/libsystemd/sd-netlink/netlink-util.c
@@ -0,0 +1,818 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "sd-netlink.h"
+
+#include "fd-util.h"
+#include "iovec-util.h"
+#include "memory-util.h"
+#include "netlink-internal.h"
+#include "netlink-util.h"
+#include "parse-util.h"
+#include "process-util.h"
+#include "strv.h"
+
+static int set_link_name(sd_netlink **rtnl, int ifindex, const char *name) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *message = NULL;
+ int r;
+
+ assert(rtnl);
+ assert(ifindex > 0);
+ assert(name);
+
+ /* Assign the requested name. */
+ r = sd_rtnl_message_new_link(*rtnl, &message, RTM_SETLINK, ifindex);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_message_append_string(message, IFLA_IFNAME, name);
+ if (r < 0)
+ return r;
+
+ return sd_netlink_call(*rtnl, message, 0, NULL);
+}
+
+int rtnl_set_link_name(sd_netlink **rtnl, int ifindex, const char *name, char* const *alternative_names) {
+ _cleanup_strv_free_ char **original_altnames = NULL, **new_altnames = NULL;
+ bool altname_deleted = false;
+ int r;
+
+ assert(rtnl);
+ assert(ifindex > 0);
+
+ if (isempty(name) && strv_isempty(alternative_names))
+ return 0;
+
+ if (name && !ifname_valid(name))
+ return -EINVAL;
+
+ /* If the requested name is already assigned as an alternative name, then first drop it. */
+ r = rtnl_get_link_alternative_names(rtnl, ifindex, &original_altnames);
+ if (r < 0)
+ log_debug_errno(r, "Failed to get alternative names on network interface %i, ignoring: %m",
+ ifindex);
+
+ if (name) {
+ if (strv_contains(original_altnames, name)) {
+ r = rtnl_delete_link_alternative_names(rtnl, ifindex, STRV_MAKE(name));
+ if (r < 0)
+ return log_debug_errno(r, "Failed to remove '%s' from alternative names on network interface %i: %m",
+ name, ifindex);
+
+ altname_deleted = true;
+ }
+
+ r = set_link_name(rtnl, ifindex, name);
+ if (r < 0)
+ goto fail;
+ }
+
+ /* Filter out already assigned names from requested alternative names. Also, dedup the request. */
+ STRV_FOREACH(a, alternative_names) {
+ if (streq_ptr(name, *a))
+ continue;
+
+ if (strv_contains(original_altnames, *a))
+ continue;
+
+ if (strv_contains(new_altnames, *a))
+ continue;
+
+ if (!ifname_valid_full(*a, IFNAME_VALID_ALTERNATIVE))
+ continue;
+
+ r = strv_extend(&new_altnames, *a);
+ if (r < 0)
+ return r;
+ }
+
+ strv_sort(new_altnames);
+
+ /* Finally, assign alternative names. */
+ r = rtnl_set_link_alternative_names(rtnl, ifindex, new_altnames);
+ if (r == -EEXIST) /* Already assigned to another interface? */
+ STRV_FOREACH(a, new_altnames) {
+ r = rtnl_set_link_alternative_names(rtnl, ifindex, STRV_MAKE(*a));
+ if (r < 0)
+ log_debug_errno(r, "Failed to assign '%s' as an alternative name on network interface %i, ignoring: %m",
+ *a, ifindex);
+ }
+ else if (r < 0)
+ log_debug_errno(r, "Failed to assign alternative names on network interface %i, ignoring: %m", ifindex);
+
+ return 0;
+
+fail:
+ if (altname_deleted) {
+ int q = rtnl_set_link_alternative_names(rtnl, ifindex, STRV_MAKE(name));
+ if (q < 0)
+ log_debug_errno(q, "Failed to restore '%s' as an alternative name on network interface %i, ignoring: %m",
+ name, ifindex);
+ }
+
+ return r;
+}
+
+int rtnl_set_link_properties(
+ sd_netlink **rtnl,
+ int ifindex,
+ const char *alias,
+ const struct hw_addr_data *hw_addr,
+ uint32_t txqueues,
+ uint32_t rxqueues,
+ uint32_t txqueuelen,
+ uint32_t mtu,
+ uint32_t gso_max_size,
+ size_t gso_max_segments) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *message = NULL;
+ int r;
+
+ assert(rtnl);
+ assert(ifindex > 0);
+
+ if (!alias &&
+ (!hw_addr || hw_addr->length == 0) &&
+ txqueues == 0 &&
+ rxqueues == 0 &&
+ txqueuelen == UINT32_MAX &&
+ mtu == 0 &&
+ gso_max_size == 0 &&
+ gso_max_segments == 0)
+ return 0;
+
+ if (!*rtnl) {
+ r = sd_netlink_open(rtnl);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_rtnl_message_new_link(*rtnl, &message, RTM_SETLINK, ifindex);
+ if (r < 0)
+ return r;
+
+ if (alias) {
+ r = sd_netlink_message_append_string(message, IFLA_IFALIAS, alias);
+ if (r < 0)
+ return r;
+ }
+
+ if (hw_addr && hw_addr->length > 0) {
+ r = netlink_message_append_hw_addr(message, IFLA_ADDRESS, hw_addr);
+ if (r < 0)
+ return r;
+ }
+
+ if (txqueues > 0) {
+ r = sd_netlink_message_append_u32(message, IFLA_NUM_TX_QUEUES, txqueues);
+ if (r < 0)
+ return r;
+ }
+
+ if (rxqueues > 0) {
+ r = sd_netlink_message_append_u32(message, IFLA_NUM_RX_QUEUES, rxqueues);
+ if (r < 0)
+ return r;
+ }
+
+ if (txqueuelen < UINT32_MAX) {
+ r = sd_netlink_message_append_u32(message, IFLA_TXQLEN, txqueuelen);
+ if (r < 0)
+ return r;
+ }
+
+ if (mtu != 0) {
+ r = sd_netlink_message_append_u32(message, IFLA_MTU, mtu);
+ if (r < 0)
+ return r;
+ }
+
+ if (gso_max_size > 0) {
+ r = sd_netlink_message_append_u32(message, IFLA_GSO_MAX_SIZE, gso_max_size);
+ if (r < 0)
+ return r;
+ }
+
+ if (gso_max_segments > 0) {
+ r = sd_netlink_message_append_u32(message, IFLA_GSO_MAX_SEGS, gso_max_segments);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_netlink_call(*rtnl, message, 0, NULL);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int rtnl_get_link_alternative_names(sd_netlink **rtnl, int ifindex, char ***ret) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *message = NULL, *reply = NULL;
+ _cleanup_strv_free_ char **names = NULL;
+ int r;
+
+ assert(rtnl);
+ assert(ifindex > 0);
+ assert(ret);
+
+ if (!*rtnl) {
+ r = sd_netlink_open(rtnl);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_rtnl_message_new_link(*rtnl, &message, RTM_GETLINK, ifindex);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_call(*rtnl, message, 0, &reply);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_message_read_strv(reply, IFLA_PROP_LIST, IFLA_ALT_IFNAME, &names);
+ if (r < 0 && r != -ENODATA)
+ return r;
+
+ *ret = TAKE_PTR(names);
+
+ return 0;
+}
+
+static int rtnl_update_link_alternative_names(
+ sd_netlink **rtnl,
+ uint16_t nlmsg_type,
+ int ifindex,
+ char* const *alternative_names) {
+
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *message = NULL;
+ int r;
+
+ assert(rtnl);
+ assert(ifindex > 0);
+ assert(IN_SET(nlmsg_type, RTM_NEWLINKPROP, RTM_DELLINKPROP));
+
+ if (strv_isempty(alternative_names))
+ return 0;
+
+ if (!*rtnl) {
+ r = sd_netlink_open(rtnl);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_rtnl_message_new_link(*rtnl, &message, nlmsg_type, ifindex);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_message_open_container(message, IFLA_PROP_LIST);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_message_append_strv(message, IFLA_ALT_IFNAME, (const char**) alternative_names);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_message_close_container(message);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_call(*rtnl, message, 0, NULL);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int rtnl_set_link_alternative_names(sd_netlink **rtnl, int ifindex, char* const *alternative_names) {
+ return rtnl_update_link_alternative_names(rtnl, RTM_NEWLINKPROP, ifindex, alternative_names);
+}
+
+int rtnl_delete_link_alternative_names(sd_netlink **rtnl, int ifindex, char* const *alternative_names) {
+ return rtnl_update_link_alternative_names(rtnl, RTM_DELLINKPROP, ifindex, alternative_names);
+}
+
+int rtnl_set_link_alternative_names_by_ifname(
+ sd_netlink **rtnl,
+ const char *ifname,
+ char* const *alternative_names) {
+
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *message = NULL;
+ int r;
+
+ assert(rtnl);
+ assert(ifname);
+
+ if (strv_isempty(alternative_names))
+ return 0;
+
+ if (!*rtnl) {
+ r = sd_netlink_open(rtnl);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_rtnl_message_new_link(*rtnl, &message, RTM_NEWLINKPROP, 0);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_message_append_string(message, IFLA_IFNAME, ifname);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_message_open_container(message, IFLA_PROP_LIST);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_message_append_strv(message, IFLA_ALT_IFNAME, (const char**) alternative_names);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_message_close_container(message);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_call(*rtnl, message, 0, NULL);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int rtnl_resolve_link_alternative_name(sd_netlink **rtnl, const char *name, char **ret) {
+ _cleanup_(sd_netlink_unrefp) sd_netlink *our_rtnl = NULL;
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *message = NULL, *reply = NULL;
+ int r, ifindex;
+
+ assert(name);
+
+ /* This returns ifindex and the main interface name. */
+
+ if (!ifname_valid_full(name, IFNAME_VALID_ALTERNATIVE))
+ return -EINVAL;
+
+ if (!rtnl)
+ rtnl = &our_rtnl;
+ if (!*rtnl) {
+ r = sd_netlink_open(rtnl);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_rtnl_message_new_link(*rtnl, &message, RTM_GETLINK, 0);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_message_append_string(message, IFLA_ALT_IFNAME, name);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_call(*rtnl, message, 0, &reply);
+ if (r == -EINVAL)
+ return -ENODEV; /* The device doesn't exist */
+ if (r < 0)
+ return r;
+
+ r = sd_rtnl_message_link_get_ifindex(reply, &ifindex);
+ if (r < 0)
+ return r;
+ assert(ifindex > 0);
+
+ if (ret) {
+ r = sd_netlink_message_read_string_strdup(reply, IFLA_IFNAME, ret);
+ if (r < 0)
+ return r;
+ }
+
+ return ifindex;
+}
+
+int rtnl_resolve_ifname(sd_netlink **rtnl, const char *name) {
+ int r;
+
+ /* Like if_nametoindex, but resolves "alternative names" too. */
+
+ assert(name);
+
+ r = if_nametoindex(name);
+ if (r > 0)
+ return r;
+
+ return rtnl_resolve_link_alternative_name(rtnl, name, NULL);
+}
+
+int rtnl_resolve_interface(sd_netlink **rtnl, const char *name) {
+ int r;
+
+ /* Like rtnl_resolve_ifname, but resolves interface numbers too. */
+
+ assert(name);
+
+ r = parse_ifindex(name);
+ if (r > 0)
+ return r;
+ assert(r < 0);
+
+ return rtnl_resolve_ifname(rtnl, name);
+}
+
+int rtnl_resolve_interface_or_warn(sd_netlink **rtnl, const char *name) {
+ int r;
+
+ r = rtnl_resolve_interface(rtnl, name);
+ if (r < 0)
+ return log_error_errno(r, "Failed to resolve interface \"%s\": %m", name);
+ return r;
+}
+
+int rtnl_get_link_info(
+ sd_netlink **rtnl,
+ int ifindex,
+ unsigned short *ret_iftype,
+ unsigned *ret_flags,
+ char **ret_kind,
+ struct hw_addr_data *ret_hw_addr,
+ struct hw_addr_data *ret_permanent_hw_addr) {
+
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *message = NULL, *reply = NULL;
+ struct hw_addr_data addr = HW_ADDR_NULL, perm_addr = HW_ADDR_NULL;
+ _cleanup_free_ char *kind = NULL;
+ unsigned short iftype;
+ unsigned flags;
+ int r;
+
+ assert(rtnl);
+ assert(ifindex > 0);
+
+ if (!ret_iftype && !ret_flags)
+ return 0;
+
+ if (!*rtnl) {
+ r = sd_netlink_open(rtnl);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_rtnl_message_new_link(*rtnl, &message, RTM_GETLINK, ifindex);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_call(*rtnl, message, 0, &reply);
+ if (r == -EINVAL)
+ return -ENODEV; /* The device does not exist */
+ if (r < 0)
+ return r;
+
+ if (ret_iftype) {
+ r = sd_rtnl_message_link_get_type(reply, &iftype);
+ if (r < 0)
+ return r;
+ }
+
+ if (ret_flags) {
+ r = sd_rtnl_message_link_get_flags(reply, &flags);
+ if (r < 0)
+ return r;
+ }
+
+ if (ret_kind) {
+ r = sd_netlink_message_enter_container(reply, IFLA_LINKINFO);
+ if (r >= 0) {
+ r = sd_netlink_message_read_string_strdup(reply, IFLA_INFO_KIND, &kind);
+ if (r < 0 && r != -ENODATA)
+ return r;
+
+ r = sd_netlink_message_exit_container(reply);
+ if (r < 0)
+ return r;
+ }
+ }
+
+ if (ret_hw_addr) {
+ r = netlink_message_read_hw_addr(reply, IFLA_ADDRESS, &addr);
+ if (r < 0 && r != -ENODATA)
+ return r;
+ }
+
+ if (ret_permanent_hw_addr) {
+ r = netlink_message_read_hw_addr(reply, IFLA_PERM_ADDRESS, &perm_addr);
+ if (r < 0 && r != -ENODATA)
+ return r;
+ }
+
+ if (ret_iftype)
+ *ret_iftype = iftype;
+ if (ret_flags)
+ *ret_flags = flags;
+ if (ret_kind)
+ *ret_kind = TAKE_PTR(kind);
+ if (ret_hw_addr)
+ *ret_hw_addr = addr;
+ if (ret_permanent_hw_addr)
+ *ret_permanent_hw_addr = perm_addr;
+ return 0;
+}
+
+int rtnl_log_parse_error(int r) {
+ return log_error_errno(r, "Failed to parse netlink message: %m");
+}
+
+int rtnl_log_create_error(int r) {
+ return log_error_errno(r, "Failed to create netlink message: %m");
+}
+
+void rtattr_append_attribute_internal(struct rtattr *rta, unsigned short type, const void *data, size_t data_length) {
+ size_t padding_length;
+ uint8_t *padding;
+
+ assert(rta);
+ assert(!data || data_length > 0);
+
+ /* fill in the attribute */
+ rta->rta_type = type;
+ rta->rta_len = RTA_LENGTH(data_length);
+ if (data)
+ /* we don't deal with the case where the user lies about the type
+ * and gives us too little data (so don't do that)
+ */
+ padding = mempcpy(RTA_DATA(rta), data, data_length);
+
+ else
+ /* if no data was passed, make sure we still initialize the padding
+ note that we can have data_length > 0 (used by some containers) */
+ padding = RTA_DATA(rta);
+
+ /* make sure also the padding at the end of the message is initialized */
+ padding_length = (uint8_t *) rta + RTA_SPACE(data_length) - padding;
+ memzero(padding, padding_length);
+}
+
+int rtattr_append_attribute(struct rtattr **rta, unsigned short type, const void *data, size_t data_length) {
+ struct rtattr *new_rta, *sub_rta;
+ size_t message_length;
+
+ assert(rta);
+ assert(!data || data_length > 0);
+
+ /* get the new message size (with padding at the end) */
+ message_length = RTA_ALIGN(rta ? (*rta)->rta_len : 0) + RTA_SPACE(data_length);
+
+ /* buffer should be smaller than both one page or 8K to be accepted by the kernel */
+ if (message_length > MIN(page_size(), 8192UL))
+ return -ENOBUFS;
+
+ /* realloc to fit the new attribute */
+ new_rta = realloc(*rta, message_length);
+ if (!new_rta)
+ return -ENOMEM;
+ *rta = new_rta;
+
+ /* get pointer to the attribute we are about to add */
+ sub_rta = (struct rtattr *) ((uint8_t *) *rta + RTA_ALIGN((*rta)->rta_len));
+
+ rtattr_append_attribute_internal(sub_rta, type, data, data_length);
+
+ /* update rta_len */
+ (*rta)->rta_len = message_length;
+
+ return 0;
+}
+
+MultipathRoute *multipath_route_free(MultipathRoute *m) {
+ if (!m)
+ return NULL;
+
+ free(m->ifname);
+
+ return mfree(m);
+}
+
+int multipath_route_dup(const MultipathRoute *m, MultipathRoute **ret) {
+ _cleanup_(multipath_route_freep) MultipathRoute *n = NULL;
+ _cleanup_free_ char *ifname = NULL;
+
+ assert(m);
+ assert(ret);
+
+ if (m->ifname) {
+ ifname = strdup(m->ifname);
+ if (!ifname)
+ return -ENOMEM;
+ }
+
+ n = new(MultipathRoute, 1);
+ if (!n)
+ return -ENOMEM;
+
+ *n = (MultipathRoute) {
+ .gateway = m->gateway,
+ .weight = m->weight,
+ .ifindex = m->ifindex,
+ .ifname = TAKE_PTR(ifname),
+ };
+
+ *ret = TAKE_PTR(n);
+
+ return 0;
+}
+
+int rtattr_read_nexthop(const struct rtnexthop *rtnh, size_t size, int family, OrderedSet **ret) {
+ _cleanup_ordered_set_free_free_ OrderedSet *set = NULL;
+ int r;
+
+ assert(rtnh);
+ assert(IN_SET(family, AF_INET, AF_INET6));
+
+ if (size < sizeof(struct rtnexthop))
+ return -EBADMSG;
+
+ for (; size >= sizeof(struct rtnexthop); ) {
+ _cleanup_(multipath_route_freep) MultipathRoute *m = NULL;
+
+ if (NLMSG_ALIGN(rtnh->rtnh_len) > size)
+ return -EBADMSG;
+
+ if (rtnh->rtnh_len < sizeof(struct rtnexthop))
+ return -EBADMSG;
+
+ m = new(MultipathRoute, 1);
+ if (!m)
+ return -ENOMEM;
+
+ *m = (MultipathRoute) {
+ .ifindex = rtnh->rtnh_ifindex,
+ .weight = rtnh->rtnh_hops,
+ };
+
+ if (rtnh->rtnh_len > sizeof(struct rtnexthop)) {
+ size_t len = rtnh->rtnh_len - sizeof(struct rtnexthop);
+
+ for (struct rtattr *attr = RTNH_DATA(rtnh); RTA_OK(attr, len); attr = RTA_NEXT(attr, len)) {
+ if (attr->rta_type == RTA_GATEWAY) {
+ if (attr->rta_len != RTA_LENGTH(FAMILY_ADDRESS_SIZE(family)))
+ return -EBADMSG;
+
+ m->gateway.family = family;
+ memcpy(&m->gateway.address, RTA_DATA(attr), FAMILY_ADDRESS_SIZE(family));
+ break;
+ } else if (attr->rta_type == RTA_VIA) {
+ uint16_t gw_family;
+
+ if (family != AF_INET)
+ return -EINVAL;
+
+ if (attr->rta_len < RTA_LENGTH(sizeof(uint16_t)))
+ return -EBADMSG;
+
+ gw_family = *(uint16_t *) RTA_DATA(attr);
+
+ if (gw_family != AF_INET6)
+ return -EBADMSG;
+
+ if (attr->rta_len != RTA_LENGTH(FAMILY_ADDRESS_SIZE(gw_family) + sizeof(gw_family)))
+ return -EBADMSG;
+
+ memcpy(&m->gateway, RTA_DATA(attr), FAMILY_ADDRESS_SIZE(gw_family) + sizeof(gw_family));
+ break;
+ }
+ }
+ }
+
+ r = ordered_set_ensure_put(&set, NULL, m);
+ if (r < 0)
+ return r;
+
+ TAKE_PTR(m);
+
+ size -= NLMSG_ALIGN(rtnh->rtnh_len);
+ rtnh = RTNH_NEXT(rtnh);
+ }
+
+ if (ret)
+ *ret = TAKE_PTR(set);
+ return 0;
+}
+
+bool netlink_pid_changed(sd_netlink *nl) {
+ /* We don't support people creating an nl connection and
+ * keeping it around over a fork(). Let's complain. */
+ return ASSERT_PTR(nl)->original_pid != getpid_cached();
+}
+
+static int socket_open(int family) {
+ int fd;
+
+ fd = socket(AF_NETLINK, SOCK_RAW|SOCK_CLOEXEC|SOCK_NONBLOCK, family);
+ if (fd < 0)
+ return -errno;
+
+ return fd_move_above_stdio(fd);
+}
+
+int netlink_open_family(sd_netlink **ret, int family) {
+ _cleanup_close_ int fd = -EBADF;
+ int r;
+
+ fd = socket_open(family);
+ if (fd < 0)
+ return fd;
+
+ r = sd_netlink_open_fd(ret, fd);
+ if (r < 0)
+ return r;
+ TAKE_FD(fd);
+
+ return 0;
+}
+
+static bool serial_used(sd_netlink *nl, uint32_t serial) {
+ assert(nl);
+
+ return
+ hashmap_contains(nl->reply_callbacks, UINT32_TO_PTR(serial)) ||
+ hashmap_contains(nl->rqueue_by_serial, UINT32_TO_PTR(serial)) ||
+ hashmap_contains(nl->rqueue_partial_by_serial, UINT32_TO_PTR(serial));
+}
+
+void netlink_seal_message(sd_netlink *nl, sd_netlink_message *m) {
+ uint32_t picked;
+
+ assert(nl);
+ assert(!netlink_pid_changed(nl));
+ assert(m);
+ assert(m->hdr);
+
+ /* Avoid collisions with outstanding requests */
+ do {
+ picked = nl->serial;
+
+ /* Don't use seq == 0, as that is used for broadcasts, so we would get confused by replies to
+ such messages */
+ nl->serial = nl->serial == UINT32_MAX ? 1 : nl->serial + 1;
+
+ } while (serial_used(nl, picked));
+
+ m->hdr->nlmsg_seq = picked;
+ message_seal(m);
+}
+
+static int socket_writev_message(sd_netlink *nl, sd_netlink_message **m, size_t msgcount) {
+ _cleanup_free_ struct iovec *iovs = NULL;
+ ssize_t k;
+
+ assert(nl);
+ assert(m);
+ assert(msgcount > 0);
+
+ iovs = new(struct iovec, msgcount);
+ if (!iovs)
+ return -ENOMEM;
+
+ for (size_t i = 0; i < msgcount; i++) {
+ assert(m[i]->hdr);
+ assert(m[i]->hdr->nlmsg_len > 0);
+
+ iovs[i] = IOVEC_MAKE(m[i]->hdr, m[i]->hdr->nlmsg_len);
+ }
+
+ k = writev(nl->fd, iovs, msgcount);
+ if (k < 0)
+ return -errno;
+
+ return k;
+}
+
+int sd_netlink_sendv(
+ sd_netlink *nl,
+ sd_netlink_message **messages,
+ size_t msgcount,
+ uint32_t **ret_serial) {
+
+ _cleanup_free_ uint32_t *serials = NULL;
+ int r;
+
+ assert_return(nl, -EINVAL);
+ assert_return(!netlink_pid_changed(nl), -ECHILD);
+ assert_return(messages, -EINVAL);
+ assert_return(msgcount > 0, -EINVAL);
+
+ if (ret_serial) {
+ serials = new(uint32_t, msgcount);
+ if (!serials)
+ return -ENOMEM;
+ }
+
+ for (size_t i = 0; i < msgcount; i++) {
+ assert_return(!messages[i]->sealed, -EPERM);
+
+ netlink_seal_message(nl, messages[i]);
+ if (serials)
+ serials[i] = message_get_serial(messages[i]);
+ }
+
+ r = socket_writev_message(nl, messages, msgcount);
+ if (r < 0)
+ return r;
+
+ if (ret_serial)
+ *ret_serial = TAKE_PTR(serials);
+
+ return r;
+}
diff --git a/src/libsystemd/sd-netlink/netlink-util.h b/src/libsystemd/sd-netlink/netlink-util.h
new file mode 100644
index 0000000..369f5d5
--- /dev/null
+++ b/src/libsystemd/sd-netlink/netlink-util.h
@@ -0,0 +1,113 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <linux/rtnetlink.h>
+
+#include "sd-netlink.h"
+
+#include "ether-addr-util.h"
+#include "in-addr-util.h"
+#include "ordered-set.h"
+#include "socket-util.h"
+
+/* See struct rtvia in rtnetlink.h */
+typedef struct RouteVia {
+ uint16_t family;
+ union in_addr_union address;
+} _packed_ RouteVia;
+
+typedef struct MultipathRoute {
+ RouteVia gateway;
+ uint32_t weight;
+ int ifindex;
+ char *ifname;
+} MultipathRoute;
+
+MultipathRoute *multipath_route_free(MultipathRoute *m);
+DEFINE_TRIVIAL_CLEANUP_FUNC(MultipathRoute*, multipath_route_free);
+
+int multipath_route_dup(const MultipathRoute *m, MultipathRoute **ret);
+
+int rtnl_set_link_name(sd_netlink **rtnl, int ifindex, const char *name, char* const* alternative_names);
+static inline int rtnl_append_link_alternative_names(sd_netlink **rtnl, int ifindex, char* const *alternative_names) {
+ return rtnl_set_link_name(rtnl, ifindex, NULL, alternative_names);
+}
+int rtnl_set_link_properties(
+ sd_netlink **rtnl,
+ int ifindex,
+ const char *alias,
+ const struct hw_addr_data *hw_addr,
+ uint32_t txqueues,
+ uint32_t rxqueues,
+ uint32_t txqueuelen,
+ uint32_t mtu,
+ uint32_t gso_max_size,
+ size_t gso_max_segments);
+int rtnl_get_link_alternative_names(sd_netlink **rtnl, int ifindex, char ***ret);
+int rtnl_set_link_alternative_names(sd_netlink **rtnl, int ifindex, char* const *alternative_names);
+int rtnl_set_link_alternative_names_by_ifname(sd_netlink **rtnl, const char *ifname, char* const *alternative_names);
+int rtnl_delete_link_alternative_names(sd_netlink **rtnl, int ifindex, char* const *alternative_names);
+int rtnl_resolve_link_alternative_name(sd_netlink **rtnl, const char *name, char **ret);
+int rtnl_resolve_ifname(sd_netlink **rtnl, const char *name);
+int rtnl_resolve_interface(sd_netlink **rtnl, const char *name);
+int rtnl_resolve_interface_or_warn(sd_netlink **rtnl, const char *name);
+int rtnl_get_link_info(
+ sd_netlink **rtnl,
+ int ifindex,
+ unsigned short *ret_iftype,
+ unsigned *ret_flags,
+ char **ret_kind,
+ struct hw_addr_data *ret_hw_addr,
+ struct hw_addr_data *ret_permanent_hw_addr);
+
+int rtnl_log_parse_error(int r);
+int rtnl_log_create_error(int r);
+
+#define netlink_call_async(nl, ret_slot, message, callback, destroy_callback, userdata) \
+ ({ \
+ int (*_callback_)(sd_netlink *, sd_netlink_message *, typeof(userdata)) = callback; \
+ void (*_destroy_)(typeof(userdata)) = destroy_callback; \
+ sd_netlink_call_async(nl, ret_slot, message, \
+ (sd_netlink_message_handler_t) _callback_, \
+ (sd_netlink_destroy_t) _destroy_, \
+ userdata, 0, __func__); \
+ })
+
+#define netlink_add_match(nl, ret_slot, match, callback, destroy_callback, userdata, description) \
+ ({ \
+ int (*_callback_)(sd_netlink *, sd_netlink_message *, typeof(userdata)) = callback; \
+ void (*_destroy_)(typeof(userdata)) = destroy_callback; \
+ sd_netlink_add_match(nl, ret_slot, match, \
+ (sd_netlink_message_handler_t) _callback_, \
+ (sd_netlink_destroy_t) _destroy_, \
+ userdata, description); \
+ })
+
+#define genl_add_match(nl, ret_slot, family, group, cmd, callback, destroy_callback, userdata, description) \
+ ({ \
+ int (*_callback_)(sd_netlink *, sd_netlink_message *, typeof(userdata)) = callback; \
+ void (*_destroy_)(typeof(userdata)) = destroy_callback; \
+ sd_genl_add_match(nl, ret_slot, family, group, cmd, \
+ (sd_netlink_message_handler_t) _callback_, \
+ (sd_netlink_destroy_t) _destroy_, \
+ userdata, description); \
+ })
+
+int netlink_message_append_hw_addr(sd_netlink_message *m, unsigned short type, const struct hw_addr_data *data);
+int netlink_message_append_in_addr_union(sd_netlink_message *m, unsigned short type, int family, const union in_addr_union *data);
+int netlink_message_append_sockaddr_union(sd_netlink_message *m, unsigned short type, const union sockaddr_union *data);
+
+int netlink_message_read_hw_addr(sd_netlink_message *m, unsigned short type, struct hw_addr_data *data);
+int netlink_message_read_in_addr_union(sd_netlink_message *m, unsigned short type, int family, union in_addr_union *data);
+
+void rtattr_append_attribute_internal(struct rtattr *rta, unsigned short type, const void *data, size_t data_length);
+int rtattr_append_attribute(struct rtattr **rta, unsigned short type, const void *data, size_t data_length);
+
+int rtattr_read_nexthop(const struct rtnexthop *rtnh, size_t size, int family, OrderedSet **ret);
+
+void netlink_seal_message(sd_netlink *nl, sd_netlink_message *m);
+
+size_t netlink_get_reply_callback_count(sd_netlink *nl);
+
+/* TODO: to be exported later */
+int sd_netlink_sendv(sd_netlink *nl, sd_netlink_message **messages, size_t msgcnt, uint32_t **ret_serial);
diff --git a/src/libsystemd/sd-netlink/sd-netlink.c b/src/libsystemd/sd-netlink/sd-netlink.c
new file mode 100644
index 0000000..b6730b7
--- /dev/null
+++ b/src/libsystemd/sd-netlink/sd-netlink.c
@@ -0,0 +1,909 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <poll.h>
+
+#include "sd-netlink.h"
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "hashmap.h"
+#include "io-util.h"
+#include "macro.h"
+#include "netlink-genl.h"
+#include "netlink-internal.h"
+#include "netlink-slot.h"
+#include "netlink-util.h"
+#include "process-util.h"
+#include "socket-util.h"
+#include "string-util.h"
+
+/* Some really high limit, to catch programming errors */
+#define REPLY_CALLBACKS_MAX UINT16_MAX
+
+static int netlink_new(sd_netlink **ret) {
+ _cleanup_(sd_netlink_unrefp) sd_netlink *nl = NULL;
+
+ assert_return(ret, -EINVAL);
+
+ nl = new(sd_netlink, 1);
+ if (!nl)
+ return -ENOMEM;
+
+ *nl = (sd_netlink) {
+ .n_ref = 1,
+ .fd = -EBADF,
+ .sockaddr.nl.nl_family = AF_NETLINK,
+ .original_pid = getpid_cached(),
+ .protocol = -1,
+
+ /* Kernel change notification messages have sequence number 0. We want to avoid that with our
+ * own serials, in order not to get confused when matching up kernel replies to our earlier
+ * requests.
+ *
+ * Moreover, when using netlink socket activation (i.e. where PID 1 binds an AF_NETLINK
+ * socket for us and passes it to us across execve()) and we get restarted multiple times
+ * while the socket sticks around we might get confused by replies from earlier runs coming
+ * in late — which is pretty likely if we'd start our sequence numbers always from 1. Hence,
+ * let's start with a value based on the system clock. This should make collisions much less
+ * likely (though still theoretically possible). We use a 32 bit μs counter starting at boot
+ * for this (and explicitly exclude the zero, see above). This counter will wrap around after
+ * a bit more than 1h, but that's hopefully OK as the kernel shouldn't take that long to
+ * reply to our requests.
+ *
+ * We only pick the initial start value this way. For each message we simply increase the
+ * sequence number by 1. This means we could enqueue 1 netlink message per μs without risking
+ * collisions, which should be OK.
+ *
+ * Note this means the serials will be in the range 1…UINT32_MAX here.
+ *
+ * (In an ideal world we'd attach the current serial counter to the netlink socket itself
+ * somehow, to avoid all this, but I couldn't come up with a nice way to do this) */
+ .serial = (uint32_t) (now(CLOCK_MONOTONIC) % UINT32_MAX) + 1,
+ };
+
+ *ret = TAKE_PTR(nl);
+ return 0;
+}
+
+int sd_netlink_open_fd(sd_netlink **ret, int fd) {
+ _cleanup_(sd_netlink_unrefp) sd_netlink *nl = NULL;
+ int r, protocol;
+
+ assert_return(ret, -EINVAL);
+ assert_return(fd >= 0, -EBADF);
+
+ r = netlink_new(&nl);
+ if (r < 0)
+ return r;
+
+ r = getsockopt_int(fd, SOL_SOCKET, SO_PROTOCOL, &protocol);
+ if (r < 0)
+ return r;
+
+ nl->fd = fd;
+ nl->protocol = protocol;
+
+ r = setsockopt_int(fd, SOL_NETLINK, NETLINK_EXT_ACK, true);
+ if (r < 0)
+ log_debug_errno(r, "sd-netlink: Failed to enable NETLINK_EXT_ACK option, ignoring: %m");
+
+ r = setsockopt_int(fd, SOL_NETLINK, NETLINK_GET_STRICT_CHK, true);
+ if (r < 0)
+ log_debug_errno(r, "sd-netlink: Failed to enable NETLINK_GET_STRICT_CHK option, ignoring: %m");
+
+ r = socket_bind(nl);
+ if (r < 0) {
+ nl->fd = -EBADF; /* on failure, the caller remains owner of the fd, hence don't close it here */
+ nl->protocol = -1;
+ return r;
+ }
+
+ *ret = TAKE_PTR(nl);
+
+ return 0;
+}
+
+int sd_netlink_open(sd_netlink **ret) {
+ return netlink_open_family(ret, NETLINK_ROUTE);
+}
+
+int sd_netlink_increase_rxbuf(sd_netlink *nl, size_t size) {
+ assert_return(nl, -EINVAL);
+ assert_return(!netlink_pid_changed(nl), -ECHILD);
+
+ return fd_increase_rxbuf(nl->fd, size);
+}
+
+static sd_netlink *netlink_free(sd_netlink *nl) {
+ sd_netlink_slot *s;
+
+ assert(nl);
+
+ ordered_set_free(nl->rqueue);
+ hashmap_free(nl->rqueue_by_serial);
+ hashmap_free(nl->rqueue_partial_by_serial);
+ free(nl->rbuffer);
+
+ while ((s = nl->slots)) {
+ assert(s->floating);
+ netlink_slot_disconnect(s, true);
+ }
+ hashmap_free(nl->reply_callbacks);
+ prioq_free(nl->reply_callbacks_prioq);
+
+ sd_event_source_unref(nl->io_event_source);
+ sd_event_source_unref(nl->time_event_source);
+ sd_event_unref(nl->event);
+
+ hashmap_free(nl->broadcast_group_refs);
+
+ genl_clear_family(nl);
+
+ safe_close(nl->fd);
+ return mfree(nl);
+}
+
+DEFINE_TRIVIAL_REF_UNREF_FUNC(sd_netlink, sd_netlink, netlink_free);
+
+int sd_netlink_send(
+ sd_netlink *nl,
+ sd_netlink_message *message,
+ uint32_t *serial) {
+
+ int r;
+
+ assert_return(nl, -EINVAL);
+ assert_return(!netlink_pid_changed(nl), -ECHILD);
+ assert_return(message, -EINVAL);
+ assert_return(!message->sealed, -EPERM);
+
+ netlink_seal_message(nl, message);
+
+ r = socket_write_message(nl, message);
+ if (r < 0)
+ return r;
+
+ if (serial)
+ *serial = message_get_serial(message);
+
+ return 1;
+}
+
+static int dispatch_rqueue(sd_netlink *nl, sd_netlink_message **ret) {
+ sd_netlink_message *m;
+ int r;
+
+ assert(nl);
+ assert(ret);
+
+ if (ordered_set_size(nl->rqueue) <= 0) {
+ /* Try to read a new message */
+ r = socket_read_message(nl);
+ if (r == -ENOBUFS) /* FIXME: ignore buffer overruns for now */
+ log_debug_errno(r, "sd-netlink: Got ENOBUFS from netlink socket, ignoring.");
+ else if (r < 0)
+ return r;
+ }
+
+ /* Dispatch a queued message */
+ m = ordered_set_steal_first(nl->rqueue);
+ if (m)
+ sd_netlink_message_unref(hashmap_remove_value(nl->rqueue_by_serial, UINT32_TO_PTR(message_get_serial(m)), m));
+ *ret = m;
+ return !!m;
+}
+
+static int process_timeout(sd_netlink *nl) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL;
+ struct reply_callback *c;
+ sd_netlink_slot *slot;
+ usec_t n;
+ int r;
+
+ assert(nl);
+
+ c = prioq_peek(nl->reply_callbacks_prioq);
+ if (!c)
+ return 0;
+
+ n = now(CLOCK_MONOTONIC);
+ if (c->timeout > n)
+ return 0;
+
+ r = message_new_synthetic_error(nl, -ETIMEDOUT, c->serial, &m);
+ if (r < 0)
+ return r;
+
+ assert_se(prioq_pop(nl->reply_callbacks_prioq) == c);
+ hashmap_remove(nl->reply_callbacks, UINT32_TO_PTR(c->serial));
+
+ slot = container_of(c, sd_netlink_slot, reply_callback);
+
+ r = c->callback(nl, m, slot->userdata);
+ if (r < 0)
+ log_debug_errno(r, "sd-netlink: timedout callback %s%s%sfailed: %m",
+ slot->description ? "'" : "",
+ strempty(slot->description),
+ slot->description ? "' " : "");
+
+ if (slot->floating)
+ netlink_slot_disconnect(slot, true);
+
+ return 1;
+}
+
+static int process_reply(sd_netlink *nl, sd_netlink_message *m) {
+ struct reply_callback *c;
+ sd_netlink_slot *slot;
+ uint32_t serial;
+ uint16_t type;
+ int r;
+
+ assert(nl);
+ assert(m);
+
+ serial = message_get_serial(m);
+ c = hashmap_remove(nl->reply_callbacks, UINT32_TO_PTR(serial));
+ if (!c)
+ return 0;
+
+ if (c->timeout != USEC_INFINITY)
+ prioq_remove(nl->reply_callbacks_prioq, c, &c->prioq_idx);
+
+ r = sd_netlink_message_get_type(m, &type);
+ if (r < 0)
+ return r;
+
+ if (type == NLMSG_DONE)
+ m = NULL;
+
+ slot = container_of(c, sd_netlink_slot, reply_callback);
+
+ r = c->callback(nl, m, slot->userdata);
+ if (r < 0)
+ log_debug_errno(r, "sd-netlink: reply callback %s%s%sfailed: %m",
+ slot->description ? "'" : "",
+ strempty(slot->description),
+ slot->description ? "' " : "");
+
+ if (slot->floating)
+ netlink_slot_disconnect(slot, true);
+
+ return 1;
+}
+
+static int process_match(sd_netlink *nl, sd_netlink_message *m) {
+ uint16_t type;
+ uint8_t cmd;
+ int r;
+
+ assert(nl);
+ assert(m);
+
+ r = sd_netlink_message_get_type(m, &type);
+ if (r < 0)
+ return r;
+
+ if (m->protocol == NETLINK_GENERIC) {
+ r = sd_genl_message_get_command(nl, m, &cmd);
+ if (r < 0)
+ return r;
+ } else
+ cmd = 0;
+
+ LIST_FOREACH(match_callbacks, c, nl->match_callbacks) {
+ sd_netlink_slot *slot;
+ bool found = false;
+
+ if (c->type != type)
+ continue;
+ if (c->cmd != 0 && c->cmd != cmd)
+ continue;
+
+ for (size_t i = 0; i < c->n_groups; i++)
+ if (c->groups[i] == m->multicast_group) {
+ found = true;
+ break;
+ }
+
+ if (!found)
+ continue;
+
+ slot = container_of(c, sd_netlink_slot, match_callback);
+
+ r = c->callback(nl, m, slot->userdata);
+ if (r < 0)
+ log_debug_errno(r, "sd-netlink: match callback %s%s%sfailed: %m",
+ slot->description ? "'" : "",
+ strempty(slot->description),
+ slot->description ? "' " : "");
+ if (r != 0)
+ break;
+ }
+
+ return 1;
+}
+
+static int process_running(sd_netlink *nl, sd_netlink_message **ret) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL;
+ int r;
+
+ assert(nl);
+
+ r = process_timeout(nl);
+ if (r != 0)
+ goto null_message;
+
+ r = dispatch_rqueue(nl, &m);
+ if (r < 0)
+ return r;
+ if (!m)
+ goto null_message;
+
+ if (sd_netlink_message_is_broadcast(m))
+ r = process_match(nl, m);
+ else
+ r = process_reply(nl, m);
+ if (r != 0)
+ goto null_message;
+
+ if (ret) {
+ *ret = TAKE_PTR(m);
+
+ return 1;
+ }
+
+ return 1;
+
+null_message:
+ if (r >= 0 && ret)
+ *ret = NULL;
+
+ return r;
+}
+
+int sd_netlink_process(sd_netlink *nl, sd_netlink_message **ret) {
+ NETLINK_DONT_DESTROY(nl);
+ int r;
+
+ assert_return(nl, -EINVAL);
+ assert_return(!netlink_pid_changed(nl), -ECHILD);
+ assert_return(!nl->processing, -EBUSY);
+
+ nl->processing = true;
+ r = process_running(nl, ret);
+ nl->processing = false;
+
+ return r;
+}
+
+static usec_t timespan_to_timestamp(usec_t usec) {
+ static bool default_timeout_set = false;
+ static usec_t default_timeout;
+ int r;
+
+ if (usec == 0) {
+ if (!default_timeout_set) {
+ const char *e;
+
+ default_timeout_set = true;
+ default_timeout = NETLINK_DEFAULT_TIMEOUT_USEC;
+
+ e = secure_getenv("SYSTEMD_NETLINK_DEFAULT_TIMEOUT");
+ if (e) {
+ r = parse_sec(e, &default_timeout);
+ if (r < 0)
+ log_debug_errno(r, "sd-netlink: Failed to parse $SYSTEMD_NETLINK_DEFAULT_TIMEOUT environment variable, ignoring: %m");
+ }
+ }
+
+ usec = default_timeout;
+ }
+
+ return usec_add(now(CLOCK_MONOTONIC), usec);
+}
+
+static int netlink_poll(sd_netlink *nl, bool need_more, usec_t timeout_usec) {
+ usec_t m = USEC_INFINITY;
+ int r, e;
+
+ assert(nl);
+
+ e = sd_netlink_get_events(nl);
+ if (e < 0)
+ return e;
+
+ if (need_more)
+ /* Caller wants more data, and doesn't care about
+ * what's been read or any other timeouts. */
+ e |= POLLIN;
+ else {
+ usec_t until;
+
+ /* Caller wants to process if there is something to
+ * process, but doesn't care otherwise */
+
+ r = sd_netlink_get_timeout(nl, &until);
+ if (r < 0)
+ return r;
+
+ m = usec_sub_unsigned(until, now(CLOCK_MONOTONIC));
+ }
+
+ r = fd_wait_for_event(nl->fd, e, MIN(m, timeout_usec));
+ if (r <= 0)
+ return r;
+
+ return 1;
+}
+
+int sd_netlink_wait(sd_netlink *nl, uint64_t timeout_usec) {
+ int r;
+
+ assert_return(nl, -EINVAL);
+ assert_return(!netlink_pid_changed(nl), -ECHILD);
+
+ if (ordered_set_size(nl->rqueue) > 0)
+ return 0;
+
+ r = netlink_poll(nl, false, timeout_usec);
+ if (ERRNO_IS_NEG_TRANSIENT(r)) /* Convert EINTR to "something happened" and give user a chance to run some code before calling back into us */
+ return 1;
+ return r;
+}
+
+static int timeout_compare(const void *a, const void *b) {
+ const struct reply_callback *x = a, *y = b;
+
+ return CMP(x->timeout, y->timeout);
+}
+
+size_t netlink_get_reply_callback_count(sd_netlink *nl) {
+ assert(nl);
+
+ return hashmap_size(nl->reply_callbacks);
+}
+
+int sd_netlink_call_async(
+ sd_netlink *nl,
+ sd_netlink_slot **ret_slot,
+ sd_netlink_message *m,
+ sd_netlink_message_handler_t callback,
+ sd_netlink_destroy_t destroy_callback,
+ void *userdata,
+ uint64_t usec,
+ const char *description) {
+
+ _cleanup_free_ sd_netlink_slot *slot = NULL;
+ int r, k;
+
+ assert_return(nl, -EINVAL);
+ assert_return(m, -EINVAL);
+ assert_return(callback, -EINVAL);
+ assert_return(!netlink_pid_changed(nl), -ECHILD);
+
+ if (hashmap_size(nl->reply_callbacks) >= REPLY_CALLBACKS_MAX)
+ return -EXFULL;
+
+ r = hashmap_ensure_allocated(&nl->reply_callbacks, &trivial_hash_ops);
+ if (r < 0)
+ return r;
+
+ if (usec != UINT64_MAX) {
+ r = prioq_ensure_allocated(&nl->reply_callbacks_prioq, timeout_compare);
+ if (r < 0)
+ return r;
+ }
+
+ r = netlink_slot_allocate(nl, !ret_slot, NETLINK_REPLY_CALLBACK, sizeof(struct reply_callback), userdata, description, &slot);
+ if (r < 0)
+ return r;
+
+ slot->reply_callback.callback = callback;
+ slot->reply_callback.timeout = timespan_to_timestamp(usec);
+
+ k = sd_netlink_send(nl, m, &slot->reply_callback.serial);
+ if (k < 0)
+ return k;
+
+ r = hashmap_put(nl->reply_callbacks, UINT32_TO_PTR(slot->reply_callback.serial), &slot->reply_callback);
+ if (r < 0)
+ return r;
+
+ if (slot->reply_callback.timeout != USEC_INFINITY) {
+ r = prioq_put(nl->reply_callbacks_prioq, &slot->reply_callback, &slot->reply_callback.prioq_idx);
+ if (r < 0) {
+ (void) hashmap_remove(nl->reply_callbacks, UINT32_TO_PTR(slot->reply_callback.serial));
+ return r;
+ }
+ }
+
+ /* Set this at last. Otherwise, some failures in above would call destroy_callback but some would not. */
+ slot->destroy_callback = destroy_callback;
+
+ if (ret_slot)
+ *ret_slot = slot;
+
+ TAKE_PTR(slot);
+
+ return k;
+}
+
+int sd_netlink_read(
+ sd_netlink *nl,
+ uint32_t serial,
+ uint64_t usec,
+ sd_netlink_message **ret) {
+
+ usec_t timeout;
+ int r;
+
+ assert_return(nl, -EINVAL);
+ assert_return(!netlink_pid_changed(nl), -ECHILD);
+
+ timeout = timespan_to_timestamp(usec);
+
+ for (;;) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL;
+ usec_t left;
+
+ m = hashmap_remove(nl->rqueue_by_serial, UINT32_TO_PTR(serial));
+ if (m) {
+ uint16_t type;
+
+ /* found a match, remove from rqueue and return it */
+ sd_netlink_message_unref(ordered_set_remove(nl->rqueue, m));
+
+ r = sd_netlink_message_get_errno(m);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_message_get_type(m, &type);
+ if (r < 0)
+ return r;
+
+ if (type == NLMSG_DONE) {
+ if (ret)
+ *ret = NULL;
+ return 0;
+ }
+
+ if (ret)
+ *ret = TAKE_PTR(m);
+ return 1;
+ }
+
+ r = socket_read_message(nl);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ /* received message, so try to process straight away */
+ continue;
+
+ if (timeout != USEC_INFINITY) {
+ usec_t n;
+
+ n = now(CLOCK_MONOTONIC);
+ if (n >= timeout)
+ return -ETIMEDOUT;
+
+ left = usec_sub_unsigned(timeout, n);
+ } else
+ left = USEC_INFINITY;
+
+ r = netlink_poll(nl, true, left);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -ETIMEDOUT;
+ }
+}
+
+int sd_netlink_call(
+ sd_netlink *nl,
+ sd_netlink_message *message,
+ uint64_t usec,
+ sd_netlink_message **ret) {
+
+ uint32_t serial;
+ int r;
+
+ assert_return(nl, -EINVAL);
+ assert_return(!netlink_pid_changed(nl), -ECHILD);
+ assert_return(message, -EINVAL);
+
+ r = sd_netlink_send(nl, message, &serial);
+ if (r < 0)
+ return r;
+
+ return sd_netlink_read(nl, serial, usec, ret);
+}
+
+int sd_netlink_get_events(sd_netlink *nl) {
+ assert_return(nl, -EINVAL);
+ assert_return(!netlink_pid_changed(nl), -ECHILD);
+
+ return ordered_set_size(nl->rqueue) == 0 ? POLLIN : 0;
+}
+
+int sd_netlink_get_timeout(sd_netlink *nl, uint64_t *timeout_usec) {
+ struct reply_callback *c;
+
+ assert_return(nl, -EINVAL);
+ assert_return(timeout_usec, -EINVAL);
+ assert_return(!netlink_pid_changed(nl), -ECHILD);
+
+ if (ordered_set_size(nl->rqueue) > 0) {
+ *timeout_usec = 0;
+ return 1;
+ }
+
+ c = prioq_peek(nl->reply_callbacks_prioq);
+ if (!c) {
+ *timeout_usec = UINT64_MAX;
+ return 0;
+ }
+
+ *timeout_usec = c->timeout;
+ return 1;
+}
+
+static int io_callback(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
+ sd_netlink *nl = ASSERT_PTR(userdata);
+ int r;
+
+ r = sd_netlink_process(nl, NULL);
+ if (r < 0)
+ return r;
+
+ return 1;
+}
+
+static int time_callback(sd_event_source *s, uint64_t usec, void *userdata) {
+ sd_netlink *nl = ASSERT_PTR(userdata);
+ int r;
+
+ r = sd_netlink_process(nl, NULL);
+ if (r < 0)
+ return r;
+
+ return 1;
+}
+
+static int prepare_callback(sd_event_source *s, void *userdata) {
+ sd_netlink *nl = ASSERT_PTR(userdata);
+ int r, enabled;
+ usec_t until;
+
+ assert(s);
+
+ r = sd_netlink_get_events(nl);
+ if (r < 0)
+ return r;
+
+ r = sd_event_source_set_io_events(nl->io_event_source, r);
+ if (r < 0)
+ return r;
+
+ enabled = sd_netlink_get_timeout(nl, &until);
+ if (enabled < 0)
+ return enabled;
+ if (enabled > 0) {
+ r = sd_event_source_set_time(nl->time_event_source, until);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_event_source_set_enabled(nl->time_event_source,
+ enabled > 0 ? SD_EVENT_ONESHOT : SD_EVENT_OFF);
+ if (r < 0)
+ return r;
+
+ return 1;
+}
+
+int sd_netlink_attach_event(sd_netlink *nl, sd_event *event, int64_t priority) {
+ int r;
+
+ assert_return(nl, -EINVAL);
+ assert_return(!nl->event, -EBUSY);
+
+ assert(!nl->io_event_source);
+ assert(!nl->time_event_source);
+
+ if (event)
+ nl->event = sd_event_ref(event);
+ else {
+ r = sd_event_default(&nl->event);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_event_add_io(nl->event, &nl->io_event_source, nl->fd, 0, io_callback, nl);
+ if (r < 0)
+ goto fail;
+
+ r = sd_event_source_set_priority(nl->io_event_source, priority);
+ if (r < 0)
+ goto fail;
+
+ r = sd_event_source_set_description(nl->io_event_source, "netlink-receive-message");
+ if (r < 0)
+ goto fail;
+
+ r = sd_event_source_set_prepare(nl->io_event_source, prepare_callback);
+ if (r < 0)
+ goto fail;
+
+ r = sd_event_add_time(nl->event, &nl->time_event_source, CLOCK_MONOTONIC, 0, 0, time_callback, nl);
+ if (r < 0)
+ goto fail;
+
+ r = sd_event_source_set_priority(nl->time_event_source, priority);
+ if (r < 0)
+ goto fail;
+
+ r = sd_event_source_set_description(nl->time_event_source, "netlink-timer");
+ if (r < 0)
+ goto fail;
+
+ return 0;
+
+fail:
+ sd_netlink_detach_event(nl);
+ return r;
+}
+
+int sd_netlink_detach_event(sd_netlink *nl) {
+ assert_return(nl, -EINVAL);
+ assert_return(nl->event, -ENXIO);
+
+ nl->io_event_source = sd_event_source_unref(nl->io_event_source);
+
+ nl->time_event_source = sd_event_source_unref(nl->time_event_source);
+
+ nl->event = sd_event_unref(nl->event);
+
+ return 0;
+}
+
+sd_event* sd_netlink_get_event(sd_netlink *nl) {
+ assert_return(nl, NULL);
+
+ return nl->event;
+}
+
+int netlink_add_match_internal(
+ sd_netlink *nl,
+ sd_netlink_slot **ret_slot,
+ const uint32_t *groups,
+ size_t n_groups,
+ uint16_t type,
+ uint8_t cmd,
+ sd_netlink_message_handler_t callback,
+ sd_netlink_destroy_t destroy_callback,
+ void *userdata,
+ const char *description) {
+
+ _cleanup_free_ sd_netlink_slot *slot = NULL;
+ int r;
+
+ assert(groups);
+ assert(n_groups > 0);
+
+ for (size_t i = 0; i < n_groups; i++) {
+ r = socket_broadcast_group_ref(nl, groups[i]);
+ if (r < 0)
+ return r;
+ }
+
+ r = netlink_slot_allocate(nl, !ret_slot, NETLINK_MATCH_CALLBACK, sizeof(struct match_callback),
+ userdata, description, &slot);
+ if (r < 0)
+ return r;
+
+ slot->match_callback.groups = newdup(uint32_t, groups, n_groups);
+ if (!slot->match_callback.groups)
+ return -ENOMEM;
+
+ slot->match_callback.n_groups = n_groups;
+ slot->match_callback.callback = callback;
+ slot->match_callback.type = type;
+ slot->match_callback.cmd = cmd;
+
+ LIST_PREPEND(match_callbacks, nl->match_callbacks, &slot->match_callback);
+
+ /* Set this at last. Otherwise, some failures in above call the destroy callback but some do not. */
+ slot->destroy_callback = destroy_callback;
+
+ if (ret_slot)
+ *ret_slot = slot;
+
+ TAKE_PTR(slot);
+ return 0;
+}
+
+int sd_netlink_add_match(
+ sd_netlink *rtnl,
+ sd_netlink_slot **ret_slot,
+ uint16_t type,
+ sd_netlink_message_handler_t callback,
+ sd_netlink_destroy_t destroy_callback,
+ void *userdata,
+ const char *description) {
+
+ static const uint32_t
+ address_groups[] = { RTNLGRP_IPV4_IFADDR, RTNLGRP_IPV6_IFADDR, },
+ link_groups[] = { RTNLGRP_LINK, },
+ neighbor_groups[] = { RTNLGRP_NEIGH, },
+ nexthop_groups[] = { RTNLGRP_NEXTHOP, },
+ route_groups[] = { RTNLGRP_IPV4_ROUTE, RTNLGRP_IPV6_ROUTE, },
+ rule_groups[] = { RTNLGRP_IPV4_RULE, RTNLGRP_IPV6_RULE, },
+ tc_groups[] = { RTNLGRP_TC };
+ const uint32_t *groups;
+ size_t n_groups;
+
+ assert_return(rtnl, -EINVAL);
+ assert_return(callback, -EINVAL);
+ assert_return(!netlink_pid_changed(rtnl), -ECHILD);
+
+ switch (type) {
+ case RTM_NEWLINK:
+ case RTM_DELLINK:
+ groups = link_groups;
+ n_groups = ELEMENTSOF(link_groups);
+ break;
+ case RTM_NEWADDR:
+ case RTM_DELADDR:
+ groups = address_groups;
+ n_groups = ELEMENTSOF(address_groups);
+ break;
+ case RTM_NEWNEIGH:
+ case RTM_DELNEIGH:
+ groups = neighbor_groups;
+ n_groups = ELEMENTSOF(neighbor_groups);
+ break;
+ case RTM_NEWROUTE:
+ case RTM_DELROUTE:
+ groups = route_groups;
+ n_groups = ELEMENTSOF(route_groups);
+ break;
+ case RTM_NEWRULE:
+ case RTM_DELRULE:
+ groups = rule_groups;
+ n_groups = ELEMENTSOF(rule_groups);
+ break;
+ case RTM_NEWNEXTHOP:
+ case RTM_DELNEXTHOP:
+ groups = nexthop_groups;
+ n_groups = ELEMENTSOF(nexthop_groups);
+ break;
+ case RTM_NEWQDISC:
+ case RTM_DELQDISC:
+ case RTM_NEWTCLASS:
+ case RTM_DELTCLASS:
+ groups = tc_groups;
+ n_groups = ELEMENTSOF(tc_groups);
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ return netlink_add_match_internal(rtnl, ret_slot, groups, n_groups, type, 0, callback,
+ destroy_callback, userdata, description);
+}
+
+int sd_netlink_attach_filter(sd_netlink *nl, size_t len, const struct sock_filter *filter) {
+ assert_return(nl, -EINVAL);
+ assert_return(len == 0 || filter, -EINVAL);
+
+ if (setsockopt(nl->fd, SOL_SOCKET,
+ len == 0 ? SO_DETACH_FILTER : SO_ATTACH_FILTER,
+ &(struct sock_fprog) {
+ .len = len,
+ .filter = (struct sock_filter*) filter,
+ }, sizeof(struct sock_fprog)) < 0)
+ return -errno;
+
+ return 0;
+}
diff --git a/src/libsystemd/sd-netlink/test-netlink.c b/src/libsystemd/sd-netlink/test-netlink.c
new file mode 100644
index 0000000..13aedc4
--- /dev/null
+++ b/src/libsystemd/sd-netlink/test-netlink.c
@@ -0,0 +1,686 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <net/if.h>
+#include <netinet/ether.h>
+#include <netinet/in.h>
+#include <linux/fou.h>
+#include <linux/genetlink.h>
+#include <linux/if_macsec.h>
+#include <linux/l2tp.h>
+#include <linux/nl80211.h>
+#include <unistd.h>
+
+#include "sd-netlink.h"
+
+#include "alloc-util.h"
+#include "ether-addr-util.h"
+#include "macro.h"
+#include "netlink-genl.h"
+#include "netlink-internal.h"
+#include "netlink-util.h"
+#include "socket-util.h"
+#include "stdio-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "tests.h"
+
+TEST(message_newlink_bridge) {
+ _cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL;
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *message = NULL;
+ uint32_t cost;
+
+ assert_se(sd_netlink_open(&rtnl) >= 0);
+
+ assert_se(sd_rtnl_message_new_link(rtnl, &message, RTM_NEWLINK, 1) >= 0);
+ assert_se(sd_rtnl_message_link_set_family(message, AF_BRIDGE) >= 0);
+ assert_se(sd_netlink_message_open_container(message, IFLA_PROTINFO) >= 0);
+ assert_se(sd_netlink_message_append_u32(message, IFLA_BRPORT_COST, 10) >= 0);
+ assert_se(sd_netlink_message_close_container(message) >= 0);
+
+ assert_se(sd_netlink_message_rewind(message, rtnl) >= 0);
+
+ assert_se(sd_netlink_message_enter_container(message, IFLA_PROTINFO) >= 0);
+ assert_se(sd_netlink_message_read_u32(message, IFLA_BRPORT_COST, &cost) >= 0);
+ assert_se(cost == 10);
+ assert_se(sd_netlink_message_exit_container(message) >= 0);
+}
+
+TEST(message_getlink) {
+ _cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL;
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *message = NULL, *reply = NULL;
+ int ifindex;
+ uint8_t u8_data;
+ uint16_t u16_data;
+ uint32_t u32_data;
+ const char *str_data;
+ struct ether_addr eth_data;
+
+ assert_se(sd_netlink_open(&rtnl) >= 0);
+ ifindex = (int) if_nametoindex("lo");
+
+ /* we'd really like to test NEWLINK, but let's not mess with the running kernel */
+ assert_se(sd_rtnl_message_new_link(rtnl, &message, RTM_GETLINK, ifindex) >= 0);
+ assert_se(sd_netlink_call(rtnl, message, 0, &reply) == 1);
+
+ /* u8 */
+ assert_se(sd_netlink_message_read_u8(reply, IFLA_CARRIER, &u8_data) >= 0);
+ assert_se(sd_netlink_message_read_u8(reply, IFLA_OPERSTATE, &u8_data) >= 0);
+ assert_se(sd_netlink_message_read_u8(reply, IFLA_LINKMODE, &u8_data) >= 0);
+
+ /* u16 */
+ assert_se(sd_netlink_message_get_type(reply, &u16_data) >= 0);
+ assert_se(u16_data == RTM_NEWLINK);
+
+ /* u32 */
+ assert_se(sd_netlink_message_read_u32(reply, IFLA_MTU, &u32_data) >= 0);
+ assert_se(sd_netlink_message_read_u32(reply, IFLA_GROUP, &u32_data) >= 0);
+ assert_se(sd_netlink_message_read_u32(reply, IFLA_TXQLEN, &u32_data) >= 0);
+ assert_se(sd_netlink_message_read_u32(reply, IFLA_NUM_TX_QUEUES, &u32_data) >= 0);
+
+ /* string */
+ assert_se(sd_netlink_message_read_string(reply, IFLA_IFNAME, &str_data) >= 0);
+
+ /* ether_addr */
+ assert_se(sd_netlink_message_read_ether_addr(reply, IFLA_ADDRESS, &eth_data) >= 0);
+}
+
+TEST(message_address) {
+ _cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL;
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *message = NULL, *reply = NULL;
+ int ifindex;
+ struct in_addr in_data;
+ struct ifa_cacheinfo cache;
+ const char *label;
+
+ assert_se(sd_netlink_open(&rtnl) >= 0);
+ ifindex = (int) if_nametoindex("lo");
+
+ assert_se(sd_rtnl_message_new_addr(rtnl, &message, RTM_GETADDR, ifindex, AF_INET) >= 0);
+ assert_se(sd_netlink_message_set_request_dump(message, true) >= 0);
+ assert_se(sd_netlink_call(rtnl, message, 0, &reply) == 1);
+
+ assert_se(sd_netlink_message_read_in_addr(reply, IFA_LOCAL, &in_data) >= 0);
+ assert_se(sd_netlink_message_read_in_addr(reply, IFA_ADDRESS, &in_data) >= 0);
+ assert_se(sd_netlink_message_read_string(reply, IFA_LABEL, &label) >= 0);
+ assert_se(sd_netlink_message_read_cache_info(reply, IFA_CACHEINFO, &cache) == 0);
+}
+
+TEST(message_route) {
+ _cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL;
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *req = NULL;
+ struct in_addr addr, addr_data;
+ uint32_t index = 2, u32_data;
+
+ assert_se(sd_netlink_open(&rtnl) >= 0);
+
+ assert_se(sd_rtnl_message_new_route(rtnl, &req, RTM_NEWROUTE, AF_INET, RTPROT_STATIC) >= 0);
+
+ addr.s_addr = htobe32(INADDR_LOOPBACK);
+
+ assert_se(sd_netlink_message_append_in_addr(req, RTA_GATEWAY, &addr) >= 0);
+ assert_se(sd_netlink_message_append_u32(req, RTA_OIF, index) >= 0);
+
+ assert_se(sd_netlink_message_rewind(req, rtnl) >= 0);
+
+ assert_se(sd_netlink_message_read_in_addr(req, RTA_GATEWAY, &addr_data) >= 0);
+ assert_se(addr_data.s_addr == addr.s_addr);
+
+ assert_se(sd_netlink_message_read_u32(req, RTA_OIF, &u32_data) >= 0);
+ assert_se(u32_data == index);
+
+ assert_se((req = sd_netlink_message_unref(req)) == NULL);
+}
+
+static int link_handler(sd_netlink *rtnl, sd_netlink_message *m, void *userdata) {
+ const char *data;
+
+ assert_se(rtnl);
+ assert_se(m);
+
+ assert_se(streq_ptr(userdata, "foo"));
+
+ assert_se(sd_netlink_message_read_string(m, IFLA_IFNAME, &data) >= 0);
+ assert_se(streq(data, "lo"));
+
+ log_info("%s: got link info about %s", __func__, data);
+ return 1;
+}
+
+TEST(netlink_event_loop) {
+ _cleanup_(sd_event_unrefp) sd_event *event = NULL;
+ _cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL;
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL;
+ _cleanup_free_ char *userdata = NULL;
+ int ifindex;
+
+ assert_se(sd_netlink_open(&rtnl) >= 0);
+ ifindex = (int) if_nametoindex("lo");
+
+ assert_se(userdata = strdup("foo"));
+
+ assert_se(sd_event_default(&event) >= 0);
+ assert_se(sd_netlink_attach_event(rtnl, event, 0) >= 0);
+
+ assert_se(sd_rtnl_message_new_link(rtnl, &m, RTM_GETLINK, ifindex) >= 0);
+ assert_se(sd_netlink_call_async(rtnl, NULL, m, link_handler, NULL, userdata, 0, NULL) >= 0);
+
+ assert_se(sd_event_run(event, 0) >= 0);
+
+ assert_se(sd_netlink_detach_event(rtnl) >= 0);
+ assert_se((rtnl = sd_netlink_unref(rtnl)) == NULL);
+}
+
+static void test_async_destroy(void *userdata) {
+}
+
+TEST(netlink_call_async) {
+ _cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL;
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL, *reply = NULL;
+ _cleanup_(sd_netlink_slot_unrefp) sd_netlink_slot *slot = NULL;
+ _cleanup_free_ char *userdata = NULL;
+ sd_netlink_destroy_t destroy_callback;
+ const char *description;
+ int ifindex;
+
+ assert_se(sd_netlink_open(&rtnl) >= 0);
+ ifindex = (int) if_nametoindex("lo");
+
+ assert_se(userdata = strdup("foo"));
+
+ assert_se(sd_rtnl_message_new_link(rtnl, &m, RTM_GETLINK, ifindex) >= 0);
+ assert_se(sd_netlink_call_async(rtnl, &slot, m, link_handler, test_async_destroy, userdata, 0, "hogehoge") >= 0);
+
+ assert_se(sd_netlink_slot_get_netlink(slot) == rtnl);
+
+ assert_se(sd_netlink_slot_get_userdata(slot) == userdata);
+ assert_se(sd_netlink_slot_set_userdata(slot, NULL) == userdata);
+ assert_se(sd_netlink_slot_get_userdata(slot) == NULL);
+ assert_se(sd_netlink_slot_set_userdata(slot, userdata) == NULL);
+ assert_se(sd_netlink_slot_get_userdata(slot) == userdata);
+
+ assert_se(sd_netlink_slot_get_destroy_callback(slot, &destroy_callback) == 1);
+ assert_se(destroy_callback == test_async_destroy);
+ assert_se(sd_netlink_slot_set_destroy_callback(slot, NULL) >= 0);
+ assert_se(sd_netlink_slot_get_destroy_callback(slot, &destroy_callback) == 0);
+ assert_se(destroy_callback == NULL);
+ assert_se(sd_netlink_slot_set_destroy_callback(slot, test_async_destroy) >= 0);
+ assert_se(sd_netlink_slot_get_destroy_callback(slot, &destroy_callback) == 1);
+ assert_se(destroy_callback == test_async_destroy);
+
+ assert_se(sd_netlink_slot_get_floating(slot) == 0);
+ assert_se(sd_netlink_slot_set_floating(slot, 1) == 1);
+ assert_se(sd_netlink_slot_get_floating(slot) == 1);
+
+ assert_se(sd_netlink_slot_get_description(slot, &description) == 1);
+ assert_se(streq(description, "hogehoge"));
+ assert_se(sd_netlink_slot_set_description(slot, NULL) >= 0);
+ assert_se(sd_netlink_slot_get_description(slot, &description) == 0);
+ assert_se(description == NULL);
+
+ assert_se(sd_netlink_wait(rtnl, 0) >= 0);
+ assert_se(sd_netlink_process(rtnl, &reply) >= 0);
+
+ assert_se((rtnl = sd_netlink_unref(rtnl)) == NULL);
+}
+
+struct test_async_object {
+ unsigned n_ref;
+ char *ifname;
+};
+
+static struct test_async_object *test_async_object_free(struct test_async_object *t) {
+ assert_se(t);
+
+ free(t->ifname);
+ return mfree(t);
+}
+
+DEFINE_PRIVATE_TRIVIAL_REF_UNREF_FUNC(struct test_async_object, test_async_object, test_async_object_free);
+DEFINE_TRIVIAL_CLEANUP_FUNC(struct test_async_object *, test_async_object_unref);
+
+static int link_handler2(sd_netlink *rtnl, sd_netlink_message *m, void *userdata) {
+ struct test_async_object *t = userdata;
+ const char *data;
+
+ assert_se(rtnl);
+ assert_se(m);
+ assert_se(userdata);
+
+ log_info("%s: got link info about %s", __func__, t->ifname);
+
+ assert_se(sd_netlink_message_read_string(m, IFLA_IFNAME, &data) >= 0);
+ assert_se(streq(data, "lo"));
+
+ return 1;
+}
+
+static void test_async_object_destroy(void *userdata) {
+ struct test_async_object *t = userdata;
+
+ assert_se(userdata);
+
+ log_info("%s: n_ref=%u", __func__, t->n_ref);
+ test_async_object_unref(t);
+}
+
+TEST(async_destroy_callback) {
+ _cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL;
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL, *reply = NULL;
+ _cleanup_(test_async_object_unrefp) struct test_async_object *t = NULL;
+ _cleanup_(sd_netlink_slot_unrefp) sd_netlink_slot *slot = NULL;
+ int ifindex;
+
+ assert_se(sd_netlink_open(&rtnl) >= 0);
+ ifindex = (int) if_nametoindex("lo");
+
+ assert_se(t = new(struct test_async_object, 1));
+ *t = (struct test_async_object) {
+ .n_ref = 1,
+ };
+ assert_se(t->ifname = strdup("lo"));
+
+ /* destroy callback is called after processing message */
+ assert_se(sd_rtnl_message_new_link(rtnl, &m, RTM_GETLINK, ifindex) >= 0);
+ assert_se(sd_netlink_call_async(rtnl, NULL, m, link_handler2, test_async_object_destroy, t, 0, NULL) >= 0);
+
+ assert_se(t->n_ref == 1);
+ assert_se(test_async_object_ref(t));
+ assert_se(t->n_ref == 2);
+
+ assert_se(sd_netlink_wait(rtnl, 0) >= 0);
+ assert_se(sd_netlink_process(rtnl, &reply) == 1);
+ assert_se(t->n_ref == 1);
+
+ assert_se(!sd_netlink_message_unref(m));
+
+ /* destroy callback is called when asynchronous call is cancelled, that is, slot is freed. */
+ assert_se(sd_rtnl_message_new_link(rtnl, &m, RTM_GETLINK, ifindex) >= 0);
+ assert_se(sd_netlink_call_async(rtnl, &slot, m, link_handler2, test_async_object_destroy, t, 0, NULL) >= 0);
+
+ assert_se(t->n_ref == 1);
+ assert_se(test_async_object_ref(t));
+ assert_se(t->n_ref == 2);
+
+ assert_se(!(slot = sd_netlink_slot_unref(slot)));
+ assert_se(t->n_ref == 1);
+
+ assert_se(!sd_netlink_message_unref(m));
+
+ /* destroy callback is also called by sd_netlink_unref() */
+ assert_se(sd_rtnl_message_new_link(rtnl, &m, RTM_GETLINK, ifindex) >= 0);
+ assert_se(sd_netlink_call_async(rtnl, NULL, m, link_handler2, test_async_object_destroy, t, 0, NULL) >= 0);
+
+ assert_se(t->n_ref == 1);
+ assert_se(test_async_object_ref(t));
+ assert_se(t->n_ref == 2);
+
+ assert_se((rtnl = sd_netlink_unref(rtnl)) == NULL);
+ assert_se(t->n_ref == 1);
+}
+
+static int pipe_handler(sd_netlink *rtnl, sd_netlink_message *m, void *userdata) {
+ int *counter = userdata;
+ int r;
+
+ (*counter)--;
+
+ r = sd_netlink_message_get_errno(m);
+
+ log_info_errno(r, "%d left in pipe. got reply: %m", *counter);
+
+ assert_se(r >= 0);
+
+ return 1;
+}
+
+TEST(pipe) {
+ _cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL;
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m1 = NULL, *m2 = NULL;
+ int ifindex, counter = 0;
+
+ assert_se(sd_netlink_open(&rtnl) >= 0);
+ ifindex = (int) if_nametoindex("lo");
+
+ assert_se(sd_rtnl_message_new_link(rtnl, &m1, RTM_GETLINK, ifindex) >= 0);
+ assert_se(sd_rtnl_message_new_link(rtnl, &m2, RTM_GETLINK, ifindex) >= 0);
+
+ counter++;
+ assert_se(sd_netlink_call_async(rtnl, NULL, m1, pipe_handler, NULL, &counter, 0, NULL) >= 0);
+
+ counter++;
+ assert_se(sd_netlink_call_async(rtnl, NULL, m2, pipe_handler, NULL, &counter, 0, NULL) >= 0);
+
+ while (counter > 0) {
+ assert_se(sd_netlink_wait(rtnl, 0) >= 0);
+ assert_se(sd_netlink_process(rtnl, NULL) >= 0);
+ }
+
+ assert_se((rtnl = sd_netlink_unref(rtnl)) == NULL);
+}
+
+TEST(message_container) {
+ _cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL;
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL;
+ uint16_t u16_data;
+ uint32_t u32_data;
+ const char *string_data;
+
+ assert_se(sd_netlink_open(&rtnl) >= 0);
+
+ assert_se(sd_rtnl_message_new_link(rtnl, &m, RTM_NEWLINK, 0) >= 0);
+
+ assert_se(sd_netlink_message_open_container(m, IFLA_LINKINFO) >= 0);
+ assert_se(sd_netlink_message_open_container_union(m, IFLA_INFO_DATA, "vlan") >= 0);
+ assert_se(sd_netlink_message_append_u16(m, IFLA_VLAN_ID, 100) >= 0);
+ assert_se(sd_netlink_message_close_container(m) >= 0);
+ assert_se(sd_netlink_message_close_container(m) >= 0);
+
+ assert_se(sd_netlink_message_rewind(m, rtnl) >= 0);
+
+ assert_se(sd_netlink_message_enter_container(m, IFLA_LINKINFO) >= 0);
+ assert_se(sd_netlink_message_read_string(m, IFLA_INFO_KIND, &string_data) >= 0);
+ assert_se(streq("vlan", string_data));
+
+ assert_se(sd_netlink_message_enter_container(m, IFLA_INFO_DATA) >= 0);
+ assert_se(sd_netlink_message_read_u16(m, IFLA_VLAN_ID, &u16_data) >= 0);
+ assert_se(sd_netlink_message_exit_container(m) >= 0);
+
+ assert_se(sd_netlink_message_read_string(m, IFLA_INFO_KIND, &string_data) >= 0);
+ assert_se(streq("vlan", string_data));
+ assert_se(sd_netlink_message_exit_container(m) >= 0);
+
+ assert_se(sd_netlink_message_read_u32(m, IFLA_LINKINFO, &u32_data) < 0);
+}
+
+TEST(sd_netlink_add_match) {
+ _cleanup_(sd_netlink_slot_unrefp) sd_netlink_slot *s1 = NULL, *s2 = NULL;
+ _cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL;
+
+ assert_se(sd_netlink_open(&rtnl) >= 0);
+
+ assert_se(sd_netlink_add_match(rtnl, &s1, RTM_NEWLINK, link_handler, NULL, NULL, NULL) >= 0);
+ assert_se(sd_netlink_add_match(rtnl, &s2, RTM_NEWLINK, link_handler, NULL, NULL, NULL) >= 0);
+ assert_se(sd_netlink_add_match(rtnl, NULL, RTM_NEWLINK, link_handler, NULL, NULL, NULL) >= 0);
+
+ assert_se(!(s1 = sd_netlink_slot_unref(s1)));
+ assert_se(!(s2 = sd_netlink_slot_unref(s2)));
+
+ assert_se((rtnl = sd_netlink_unref(rtnl)) == NULL);
+}
+
+TEST(dump_addresses) {
+ _cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL;
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *req = NULL, *reply = NULL;
+
+ assert_se(sd_netlink_open(&rtnl) >= 0);
+
+ assert_se(sd_rtnl_message_new_addr(rtnl, &req, RTM_GETADDR, 0, AF_UNSPEC) >= 0);
+ assert_se(sd_netlink_message_set_request_dump(req, true) >= 0);
+ assert_se(sd_netlink_call(rtnl, req, 0, &reply) >= 0);
+
+ for (sd_netlink_message *m = reply; m; m = sd_netlink_message_next(m)) {
+ uint16_t type;
+ unsigned char scope, flags;
+ int family, ifindex;
+
+ assert_se(sd_netlink_message_get_type(m, &type) >= 0);
+ assert_se(type == RTM_NEWADDR);
+
+ assert_se(sd_rtnl_message_addr_get_ifindex(m, &ifindex) >= 0);
+ assert_se(sd_rtnl_message_addr_get_family(m, &family) >= 0);
+ assert_se(sd_rtnl_message_addr_get_scope(m, &scope) >= 0);
+ assert_se(sd_rtnl_message_addr_get_flags(m, &flags) >= 0);
+
+ assert_se(ifindex > 0);
+ assert_se(IN_SET(family, AF_INET, AF_INET6));
+
+ log_info("got IPv%i address on ifindex %i", family == AF_INET ? 4 : 6, ifindex);
+ }
+}
+
+TEST(sd_netlink_message_get_errno) {
+ _cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL;
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL;
+
+ assert_se(sd_netlink_open(&rtnl) >= 0);
+
+ assert_se(message_new_synthetic_error(rtnl, -ETIMEDOUT, 1, &m) >= 0);
+ assert_se(sd_netlink_message_get_errno(m) == -ETIMEDOUT);
+}
+
+TEST(message_array) {
+ _cleanup_(sd_netlink_unrefp) sd_netlink *genl = NULL;
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL;
+
+ assert_se(sd_genl_socket_open(&genl) >= 0);
+ assert_se(sd_genl_message_new(genl, CTRL_GENL_NAME, CTRL_CMD_GETFAMILY, &m) >= 0);
+
+ assert_se(sd_netlink_message_open_container(m, CTRL_ATTR_MCAST_GROUPS) >= 0);
+ for (unsigned i = 0; i < 10; i++) {
+ char name[STRLEN("hoge") + DECIMAL_STR_MAX(uint32_t)];
+ uint32_t id = i + 1000;
+
+ xsprintf(name, "hoge%" PRIu32, id);
+ assert_se(sd_netlink_message_open_array(m, i + 1) >= 0);
+ assert_se(sd_netlink_message_append_u32(m, CTRL_ATTR_MCAST_GRP_ID, id) >= 0);
+ assert_se(sd_netlink_message_append_string(m, CTRL_ATTR_MCAST_GRP_NAME, name) >= 0);
+ assert_se(sd_netlink_message_close_container(m) >= 0);
+ }
+ assert_se(sd_netlink_message_close_container(m) >= 0);
+
+ message_seal(m);
+ assert_se(sd_netlink_message_rewind(m, genl) >= 0);
+
+ assert_se(sd_netlink_message_enter_container(m, CTRL_ATTR_MCAST_GROUPS) >= 0);
+ for (unsigned i = 0; i < 10; i++) {
+ char expected[STRLEN("hoge") + DECIMAL_STR_MAX(uint32_t)];
+ const char *name;
+ uint32_t id;
+
+ assert_se(sd_netlink_message_enter_array(m, i + 1) >= 0);
+ assert_se(sd_netlink_message_read_u32(m, CTRL_ATTR_MCAST_GRP_ID, &id) >= 0);
+ assert_se(sd_netlink_message_read_string(m, CTRL_ATTR_MCAST_GRP_NAME, &name) >= 0);
+ assert_se(sd_netlink_message_exit_container(m) >= 0);
+
+ assert_se(id == i + 1000);
+ xsprintf(expected, "hoge%" PRIu32, id);
+ assert_se(streq(name, expected));
+ }
+ assert_se(sd_netlink_message_exit_container(m) >= 0);
+}
+
+TEST(message_strv) {
+ _cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL;
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL;
+ _cleanup_strv_free_ char **names_in = NULL, **names_out;
+ const char *p;
+
+ assert_se(sd_netlink_open(&rtnl) >= 0);
+
+ assert_se(sd_rtnl_message_new_link(rtnl, &m, RTM_NEWLINKPROP, 1) >= 0);
+
+ for (unsigned i = 0; i < 10; i++) {
+ char name[STRLEN("hoge") + DECIMAL_STR_MAX(uint32_t)];
+
+ xsprintf(name, "hoge%" PRIu32, i + 1000);
+ assert_se(strv_extend(&names_in, name) >= 0);
+ }
+
+ assert_se(sd_netlink_message_open_container(m, IFLA_PROP_LIST) >= 0);
+ assert_se(sd_netlink_message_append_strv(m, IFLA_ALT_IFNAME, (const char**) names_in) >= 0);
+ assert_se(sd_netlink_message_close_container(m) >= 0);
+
+ message_seal(m);
+ assert_se(sd_netlink_message_rewind(m, rtnl) >= 0);
+
+ assert_se(sd_netlink_message_read_strv(m, IFLA_PROP_LIST, IFLA_ALT_IFNAME, &names_out) >= 0);
+ assert_se(strv_equal(names_in, names_out));
+
+ assert_se(sd_netlink_message_enter_container(m, IFLA_PROP_LIST) >= 0);
+ assert_se(sd_netlink_message_read_string(m, IFLA_ALT_IFNAME, &p) >= 0);
+ assert_se(streq(p, "hoge1009"));
+ assert_se(sd_netlink_message_exit_container(m) >= 0);
+}
+
+static int genl_ctrl_match_callback(sd_netlink *genl, sd_netlink_message *m, void *userdata) {
+ const char *name;
+ uint16_t id;
+ uint8_t cmd;
+
+ assert_se(genl);
+ assert_se(m);
+
+ assert_se(sd_genl_message_get_family_name(genl, m, &name) >= 0);
+ assert_se(streq(name, CTRL_GENL_NAME));
+
+ assert_se(sd_genl_message_get_command(genl, m, &cmd) >= 0);
+
+ switch (cmd) {
+ case CTRL_CMD_NEWFAMILY:
+ case CTRL_CMD_DELFAMILY:
+ assert_se(sd_netlink_message_read_string(m, CTRL_ATTR_FAMILY_NAME, &name) >= 0);
+ assert_se(sd_netlink_message_read_u16(m, CTRL_ATTR_FAMILY_ID, &id) >= 0);
+ log_debug("%s: %s (id=%"PRIu16") family is %s.",
+ __func__, name, id, cmd == CTRL_CMD_NEWFAMILY ? "added" : "removed");
+ break;
+ case CTRL_CMD_NEWMCAST_GRP:
+ case CTRL_CMD_DELMCAST_GRP:
+ assert_se(sd_netlink_message_read_string(m, CTRL_ATTR_FAMILY_NAME, &name) >= 0);
+ assert_se(sd_netlink_message_read_u16(m, CTRL_ATTR_FAMILY_ID, &id) >= 0);
+ log_debug("%s: multicast group for %s (id=%"PRIu16") family is %s.",
+ __func__, name, id, cmd == CTRL_CMD_NEWMCAST_GRP ? "added" : "removed");
+ break;
+ default:
+ log_debug("%s: received nlctrl message with unknown command '%"PRIu8"'.", __func__, cmd);
+ }
+
+ return 0;
+}
+
+TEST(genl) {
+ _cleanup_(sd_event_unrefp) sd_event *event = NULL;
+ _cleanup_(sd_netlink_unrefp) sd_netlink *genl = NULL;
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL;
+ const char *name;
+ uint8_t cmd;
+ int r;
+
+ assert_se(sd_genl_socket_open(&genl) >= 0);
+ assert_se(sd_event_default(&event) >= 0);
+ assert_se(sd_netlink_attach_event(genl, event, 0) >= 0);
+
+ assert_se(sd_genl_message_new(genl, CTRL_GENL_NAME, CTRL_CMD_GETFAMILY, &m) >= 0);
+ assert_se(sd_genl_message_get_family_name(genl, m, &name) >= 0);
+ assert_se(streq(name, CTRL_GENL_NAME));
+ assert_se(sd_genl_message_get_command(genl, m, &cmd) >= 0);
+ assert_se(cmd == CTRL_CMD_GETFAMILY);
+
+ assert_se(sd_genl_add_match(genl, NULL, CTRL_GENL_NAME, "notify", 0, genl_ctrl_match_callback, NULL, NULL, "genl-ctrl-notify") >= 0);
+
+ m = sd_netlink_message_unref(m);
+ assert_se(sd_genl_message_new(genl, "should-not-exist", CTRL_CMD_GETFAMILY, &m) < 0);
+ assert_se(sd_genl_message_new(genl, "should-not-exist", CTRL_CMD_GETFAMILY, &m) == -EOPNOTSUPP);
+
+ /* These families may not be supported by kernel. Hence, ignore results. */
+ (void) sd_genl_message_new(genl, FOU_GENL_NAME, 0, &m);
+ m = sd_netlink_message_unref(m);
+ (void) sd_genl_message_new(genl, L2TP_GENL_NAME, 0, &m);
+ m = sd_netlink_message_unref(m);
+ (void) sd_genl_message_new(genl, MACSEC_GENL_NAME, 0, &m);
+ m = sd_netlink_message_unref(m);
+ (void) sd_genl_message_new(genl, NL80211_GENL_NAME, 0, &m);
+ m = sd_netlink_message_unref(m);
+ (void) sd_genl_message_new(genl, NETLBL_NLTYPE_UNLABELED_NAME, 0, &m);
+
+ for (;;) {
+ r = sd_event_run(event, 500 * USEC_PER_MSEC);
+ assert_se(r >= 0);
+ if (r == 0)
+ return;
+ }
+}
+
+static void remove_dummy_interfacep(int *ifindex) {
+ _cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL;
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *message = NULL;
+
+ if (!ifindex || *ifindex <= 0)
+ return;
+
+ assert_se(sd_netlink_open(&rtnl) >= 0);
+
+ assert_se(sd_rtnl_message_new_link(rtnl, &message, RTM_DELLINK, *ifindex) >= 0);
+ assert_se(sd_netlink_call(rtnl, message, 0, NULL) == 1);
+}
+
+TEST(rtnl_set_link_name) {
+ _cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL;
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *message = NULL, *reply = NULL;
+ _cleanup_(remove_dummy_interfacep) int ifindex = 0;
+ _cleanup_strv_free_ char **alternative_names = NULL;
+ int r;
+
+ if (geteuid() != 0)
+ return (void) log_tests_skipped("not root");
+
+ assert_se(sd_netlink_open(&rtnl) >= 0);
+
+ assert_se(sd_rtnl_message_new_link(rtnl, &message, RTM_NEWLINK, 0) >= 0);
+ assert_se(sd_netlink_message_append_string(message, IFLA_IFNAME, "test-netlink") >= 0);
+ assert_se(sd_netlink_message_open_container(message, IFLA_LINKINFO) >= 0);
+ assert_se(sd_netlink_message_append_string(message, IFLA_INFO_KIND, "dummy") >= 0);
+ r = sd_netlink_call(rtnl, message, 0, &reply);
+ if (r == -EPERM)
+ return (void) log_tests_skipped("missing required capabilities");
+ if (r == -EOPNOTSUPP)
+ return (void) log_tests_skipped("dummy network interface is not supported");
+ assert_se(r >= 0);
+
+ message = sd_netlink_message_unref(message);
+ reply = sd_netlink_message_unref(reply);
+
+ assert_se(sd_rtnl_message_new_link(rtnl, &message, RTM_GETLINK, 0) >= 0);
+ assert_se(sd_netlink_message_append_string(message, IFLA_IFNAME, "test-netlink") >= 0);
+ assert_se(sd_netlink_call(rtnl, message, 0, &reply) == 1);
+
+ assert_se(sd_rtnl_message_link_get_ifindex(reply, &ifindex) >= 0);
+ assert_se(ifindex > 0);
+
+ /* Test that the new name (which is currently an alternative name) is
+ * restored as an alternative name on error. Create an error by using
+ * an invalid device name, namely one that exceeds IFNAMSIZ
+ * (alternative names can exceed IFNAMSIZ, but not regular names). */
+ r = rtnl_set_link_alternative_names(&rtnl, ifindex, STRV_MAKE("testlongalternativename", "test-shortname"));
+ if (r == -EPERM)
+ return (void) log_tests_skipped("missing required capabilities");
+ if (r == -EOPNOTSUPP)
+ return (void) log_tests_skipped("alternative name is not supported");
+ assert_se(r >= 0);
+
+ assert_se(rtnl_get_link_alternative_names(&rtnl, ifindex, &alternative_names) >= 0);
+ assert_se(strv_contains(alternative_names, "testlongalternativename"));
+ assert_se(strv_contains(alternative_names, "test-shortname"));
+
+ assert_se(rtnl_set_link_name(&rtnl, ifindex, "testlongalternativename", NULL) == -EINVAL);
+ assert_se(rtnl_set_link_name(&rtnl, ifindex, "test-shortname", STRV_MAKE("testlongalternativename", "test-shortname", "test-additional-name")) >= 0);
+
+ alternative_names = strv_free(alternative_names);
+ assert_se(rtnl_get_link_alternative_names(&rtnl, ifindex, &alternative_names) >= 0);
+ assert_se(strv_contains(alternative_names, "testlongalternativename"));
+ assert_se(strv_contains(alternative_names, "test-additional-name"));
+ assert_se(!strv_contains(alternative_names, "test-shortname"));
+
+ assert_se(rtnl_delete_link_alternative_names(&rtnl, ifindex, STRV_MAKE("testlongalternativename")) >= 0);
+
+ alternative_names = strv_free(alternative_names);
+ assert_se(rtnl_get_link_alternative_names(&rtnl, ifindex, &alternative_names) >= 0);
+ assert_se(!strv_contains(alternative_names, "testlongalternativename"));
+ assert_se(strv_contains(alternative_names, "test-additional-name"));
+ assert_se(!strv_contains(alternative_names, "test-shortname"));
+
+ _cleanup_free_ char *resolved = NULL;
+ assert_se(rtnl_resolve_link_alternative_name(&rtnl, "test-additional-name", &resolved) == ifindex);
+ assert_se(streq_ptr(resolved, "test-shortname"));
+}
+
+DEFINE_TEST_MAIN(LOG_DEBUG);
diff --git a/src/libsystemd/sd-network/network-util.c b/src/libsystemd/sd-network/network-util.c
new file mode 100644
index 0000000..2059567
--- /dev/null
+++ b/src/libsystemd/sd-network/network-util.c
@@ -0,0 +1,157 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "sd-network.h"
+
+#include "alloc-util.h"
+#include "network-util.h"
+#include "string-table.h"
+#include "strv.h"
+
+bool network_is_online(void) {
+ _cleanup_free_ char *online_state = NULL;
+ LinkOnlineState state;
+ int r;
+
+ r = sd_network_get_online_state(&online_state);
+ if (r < 0)
+ state = _LINK_ONLINE_STATE_INVALID;
+ else
+ state = link_online_state_from_string(online_state);
+
+ if (state >= LINK_ONLINE_STATE_PARTIAL)
+ return true;
+ else if (state < 0) {
+ _cleanup_free_ char *carrier_state = NULL, *addr_state = NULL;
+
+ r = sd_network_get_carrier_state(&carrier_state);
+ if (r < 0) /* if we don't know anything, we consider the system online */
+ return true;
+
+ r = sd_network_get_address_state(&addr_state);
+ if (r < 0) /* if we don't know anything, we consider the system online */
+ return true;
+
+ /* we don't know the online state for certain, so make an educated guess */
+ if (STR_IN_SET(carrier_state, "degraded-carrier", "carrier") &&
+ STR_IN_SET(addr_state, "routable", "degraded"))
+ return true;
+ }
+
+ return false;
+}
+
+static const char* const link_operstate_table[_LINK_OPERSTATE_MAX] = {
+ [LINK_OPERSTATE_MISSING] = "missing",
+ [LINK_OPERSTATE_OFF] = "off",
+ [LINK_OPERSTATE_NO_CARRIER] = "no-carrier",
+ [LINK_OPERSTATE_DORMANT] = "dormant",
+ [LINK_OPERSTATE_DEGRADED_CARRIER] = "degraded-carrier",
+ [LINK_OPERSTATE_CARRIER] = "carrier",
+ [LINK_OPERSTATE_DEGRADED] = "degraded",
+ [LINK_OPERSTATE_ENSLAVED] = "enslaved",
+ [LINK_OPERSTATE_ROUTABLE] = "routable",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(link_operstate, LinkOperationalState);
+
+static const char* const link_carrier_state_table[_LINK_CARRIER_STATE_MAX] = {
+ [LINK_CARRIER_STATE_OFF] = "off",
+ [LINK_CARRIER_STATE_NO_CARRIER] = "no-carrier",
+ [LINK_CARRIER_STATE_DORMANT] = "dormant",
+ [LINK_CARRIER_STATE_DEGRADED_CARRIER] = "degraded-carrier",
+ [LINK_CARRIER_STATE_CARRIER] = "carrier",
+ [LINK_CARRIER_STATE_ENSLAVED] = "enslaved",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(link_carrier_state, LinkCarrierState);
+
+static const char* const link_required_address_family_table[_ADDRESS_FAMILY_MAX] = {
+ [ADDRESS_FAMILY_NO] = "any",
+ [ADDRESS_FAMILY_IPV4] = "ipv4",
+ [ADDRESS_FAMILY_IPV6] = "ipv6",
+ [ADDRESS_FAMILY_YES] = "both",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(link_required_address_family, AddressFamily);
+
+static const char* const link_address_state_table[_LINK_ADDRESS_STATE_MAX] = {
+ [LINK_ADDRESS_STATE_OFF] = "off",
+ [LINK_ADDRESS_STATE_DEGRADED] = "degraded",
+ [LINK_ADDRESS_STATE_ROUTABLE] = "routable",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(link_address_state, LinkAddressState);
+
+static const char *const link_online_state_table[_LINK_ONLINE_STATE_MAX] = {
+ [LINK_ONLINE_STATE_OFFLINE] = "offline",
+ [LINK_ONLINE_STATE_PARTIAL] = "partial",
+ [LINK_ONLINE_STATE_ONLINE] = "online",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(link_online_state, LinkOnlineState);
+
+int parse_operational_state_range(const char *str, LinkOperationalStateRange *out) {
+ LinkOperationalStateRange range = { _LINK_OPERSTATE_INVALID, _LINK_OPERSTATE_INVALID };
+ _cleanup_free_ const char *min = NULL;
+ const char *p;
+
+ assert(str);
+ assert(out);
+
+ p = strchr(str, ':');
+ if (p) {
+ min = strndup(str, p - str);
+
+ if (!isempty(p + 1)) {
+ range.max = link_operstate_from_string(p + 1);
+ if (range.max < 0)
+ return -EINVAL;
+ }
+ } else
+ min = strdup(str);
+
+ if (!min)
+ return -ENOMEM;
+
+ if (!isempty(min)) {
+ range.min = link_operstate_from_string(min);
+ if (range.min < 0)
+ return -EINVAL;
+ }
+
+ /* Fail on empty strings. */
+ if (range.min == _LINK_OPERSTATE_INVALID && range.max == _LINK_OPERSTATE_INVALID)
+ return -EINVAL;
+
+ if (range.min == _LINK_OPERSTATE_INVALID)
+ range.min = LINK_OPERSTATE_MISSING;
+ if (range.max == _LINK_OPERSTATE_INVALID)
+ range.max = LINK_OPERSTATE_ROUTABLE;
+
+ if (range.min > range.max)
+ return -EINVAL;
+
+ *out = range;
+
+ return 0;
+}
+
+int network_link_get_operational_state(int ifindex, LinkOperationalState *ret) {
+ _cleanup_free_ char *str = NULL;
+ LinkOperationalState s;
+ int r;
+
+ assert(ifindex > 0);
+ assert(ret);
+
+ r = sd_network_link_get_operational_state(ifindex, &str);
+ if (r < 0)
+ return r;
+
+ s = link_operstate_from_string(str);
+ if (s < 0)
+ return s;
+
+ *ret = s;
+ return 0;
+}
diff --git a/src/libsystemd/sd-network/network-util.h b/src/libsystemd/sd-network/network-util.h
new file mode 100644
index 0000000..c47e271
--- /dev/null
+++ b/src/libsystemd/sd-network/network-util.h
@@ -0,0 +1,86 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <errno.h>
+#include <stdbool.h>
+
+#include "macro.h"
+
+bool network_is_online(void);
+
+typedef enum AddressFamily {
+ /* This is a bitmask, though it usually doesn't feel that way! */
+ ADDRESS_FAMILY_NO = 0,
+ ADDRESS_FAMILY_IPV4 = 1 << 0,
+ ADDRESS_FAMILY_IPV6 = 1 << 1,
+ ADDRESS_FAMILY_YES = ADDRESS_FAMILY_IPV4 | ADDRESS_FAMILY_IPV6,
+ _ADDRESS_FAMILY_MAX,
+ _ADDRESS_FAMILY_INVALID = -EINVAL,
+} AddressFamily;
+
+typedef enum LinkOperationalState {
+ LINK_OPERSTATE_MISSING,
+ LINK_OPERSTATE_OFF,
+ LINK_OPERSTATE_NO_CARRIER,
+ LINK_OPERSTATE_DORMANT,
+ LINK_OPERSTATE_DEGRADED_CARRIER,
+ LINK_OPERSTATE_CARRIER,
+ LINK_OPERSTATE_DEGRADED,
+ LINK_OPERSTATE_ENSLAVED,
+ LINK_OPERSTATE_ROUTABLE,
+ _LINK_OPERSTATE_MAX,
+ _LINK_OPERSTATE_INVALID = -EINVAL,
+} LinkOperationalState;
+
+typedef enum LinkCarrierState {
+ LINK_CARRIER_STATE_OFF = LINK_OPERSTATE_OFF,
+ LINK_CARRIER_STATE_NO_CARRIER = LINK_OPERSTATE_NO_CARRIER,
+ LINK_CARRIER_STATE_DORMANT = LINK_OPERSTATE_DORMANT,
+ LINK_CARRIER_STATE_DEGRADED_CARRIER = LINK_OPERSTATE_DEGRADED_CARRIER,
+ LINK_CARRIER_STATE_CARRIER = LINK_OPERSTATE_CARRIER,
+ LINK_CARRIER_STATE_ENSLAVED = LINK_OPERSTATE_ENSLAVED,
+ _LINK_CARRIER_STATE_MAX,
+ _LINK_CARRIER_STATE_INVALID = -EINVAL,
+} LinkCarrierState;
+
+typedef enum LinkAddressState {
+ LINK_ADDRESS_STATE_OFF,
+ LINK_ADDRESS_STATE_DEGRADED,
+ LINK_ADDRESS_STATE_ROUTABLE,
+ _LINK_ADDRESS_STATE_MAX,
+ _LINK_ADDRESS_STATE_INVALID = -EINVAL,
+} LinkAddressState;
+
+typedef enum LinkOnlineState {
+ LINK_ONLINE_STATE_OFFLINE,
+ LINK_ONLINE_STATE_PARTIAL,
+ LINK_ONLINE_STATE_ONLINE,
+ _LINK_ONLINE_STATE_MAX,
+ _LINK_ONLINE_STATE_INVALID = -EINVAL,
+} LinkOnlineState;
+
+const char* link_operstate_to_string(LinkOperationalState s) _const_;
+LinkOperationalState link_operstate_from_string(const char *s) _pure_;
+
+const char* link_carrier_state_to_string(LinkCarrierState s) _const_;
+LinkCarrierState link_carrier_state_from_string(const char *s) _pure_;
+
+const char* link_required_address_family_to_string(AddressFamily s) _const_;
+AddressFamily link_required_address_family_from_string(const char *s) _pure_;
+
+const char* link_address_state_to_string(LinkAddressState s) _const_;
+LinkAddressState link_address_state_from_string(const char *s) _pure_;
+
+const char* link_online_state_to_string(LinkOnlineState s) _const_;
+LinkOnlineState link_online_state_from_string(const char *s) _pure_;
+
+typedef struct LinkOperationalStateRange {
+ LinkOperationalState min;
+ LinkOperationalState max;
+} LinkOperationalStateRange;
+
+#define LINK_OPERSTATE_RANGE_DEFAULT (LinkOperationalStateRange) { LINK_OPERSTATE_DEGRADED, \
+ LINK_OPERSTATE_ROUTABLE }
+
+int parse_operational_state_range(const char *str, LinkOperationalStateRange *out);
+int network_link_get_operational_state(int ifindex, LinkOperationalState *ret);
diff --git a/src/libsystemd/sd-network/sd-network.c b/src/libsystemd/sd-network/sd-network.c
new file mode 100644
index 0000000..cf3c400
--- /dev/null
+++ b/src/libsystemd/sd-network/sd-network.c
@@ -0,0 +1,462 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <poll.h>
+#include <sys/inotify.h>
+
+#include "sd-network.h"
+
+#include "alloc-util.h"
+#include "env-file.h"
+#include "fd-util.h"
+#include "fs-util.h"
+#include "inotify-util.h"
+#include "macro.h"
+#include "parse-util.h"
+#include "stdio-util.h"
+#include "string-util.h"
+#include "strv.h"
+
+static int network_get_string(const char *field, char **ret) {
+ _cleanup_free_ char *s = NULL;
+ int r;
+
+ assert_return(ret, -EINVAL);
+
+ r = parse_env_file(NULL, "/run/systemd/netif/state", field, &s);
+ if (r < 0)
+ return r;
+ if (isempty(s))
+ return -ENODATA;
+
+ *ret = TAKE_PTR(s);
+ return 0;
+}
+
+int sd_network_get_operational_state(char **ret) {
+ return network_get_string("OPER_STATE", ret);
+}
+
+int sd_network_get_carrier_state(char **ret) {
+ return network_get_string("CARRIER_STATE", ret);
+}
+
+int sd_network_get_address_state(char **ret) {
+ return network_get_string("ADDRESS_STATE", ret);
+}
+
+int sd_network_get_ipv4_address_state(char **ret) {
+ return network_get_string("IPV4_ADDRESS_STATE", ret);
+}
+
+int sd_network_get_ipv6_address_state(char **ret) {
+ return network_get_string("IPV6_ADDRESS_STATE", ret);
+}
+
+int sd_network_get_online_state(char **ret) {
+ return network_get_string("ONLINE_STATE", ret);
+}
+
+static int network_get_strv(const char *key, char ***ret) {
+ _cleanup_strv_free_ char **a = NULL;
+ _cleanup_free_ char *s = NULL;
+ int r;
+
+ assert_return(ret, -EINVAL);
+
+ r = parse_env_file(NULL, "/run/systemd/netif/state", key, &s);
+ if (r < 0)
+ return r;
+ if (isempty(s))
+ return -ENODATA;
+
+ a = strv_split(s, NULL);
+ if (!a)
+ return -ENOMEM;
+
+ strv_uniq(a);
+ r = (int) strv_length(a);
+
+ *ret = TAKE_PTR(a);
+ return r;
+}
+
+int sd_network_get_dns(char ***ret) {
+ return network_get_strv("DNS", ret);
+}
+
+int sd_network_get_ntp(char ***ret) {
+ return network_get_strv("NTP", ret);
+}
+
+int sd_network_get_search_domains(char ***ret) {
+ return network_get_strv("DOMAINS", ret);
+}
+
+int sd_network_get_route_domains(char ***ret) {
+ return network_get_strv("ROUTE_DOMAINS", ret);
+}
+
+static int network_link_get_string(int ifindex, const char *field, char **ret) {
+ char path[STRLEN("/run/systemd/netif/links/") + DECIMAL_STR_MAX(ifindex)];
+ _cleanup_free_ char *s = NULL;
+ int r;
+
+ assert_return(ifindex > 0, -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ xsprintf(path, "/run/systemd/netif/links/%i", ifindex);
+
+ r = parse_env_file(NULL, path, field, &s);
+ if (r < 0)
+ return r;
+ if (isempty(s))
+ return -ENODATA;
+
+ *ret = TAKE_PTR(s);
+ return 0;
+}
+
+static int network_link_get_boolean(int ifindex, const char *key) {
+ _cleanup_free_ char *s = NULL;
+ int r;
+
+ r = network_link_get_string(ifindex, key, &s);
+ if (r < 0)
+ return r;
+
+ return parse_boolean(s);
+}
+
+static int network_link_get_strv(int ifindex, const char *key, char ***ret) {
+ _cleanup_strv_free_ char **a = NULL;
+ _cleanup_free_ char *s = NULL;
+ int r;
+
+ assert_return(ifindex > 0, -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ r = network_link_get_string(ifindex, key, &s);
+ if (r < 0)
+ return r;
+
+ a = strv_split(s, NULL);
+ if (!a)
+ return -ENOMEM;
+
+ strv_uniq(a);
+ r = (int) strv_length(a);
+
+ *ret = TAKE_PTR(a);
+ return r;
+}
+
+int sd_network_link_get_setup_state(int ifindex, char **ret) {
+ return network_link_get_string(ifindex, "ADMIN_STATE", ret);
+}
+
+int sd_network_link_get_network_file(int ifindex, char **ret) {
+ return network_link_get_string(ifindex, "NETWORK_FILE", ret);
+}
+
+int sd_network_link_get_network_file_dropins(int ifindex, char ***ret) {
+ _cleanup_free_ char **sv = NULL, *joined = NULL;
+ int r;
+
+ assert_return(ifindex > 0, -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ r = network_link_get_string(ifindex, "NETWORK_FILE_DROPINS", &joined);
+ if (r < 0)
+ return r;
+
+ r = strv_split_full(&sv, joined, ":", EXTRACT_CUNESCAPE);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(sv);
+ return 0;
+}
+
+int sd_network_link_get_operational_state(int ifindex, char **ret) {
+ return network_link_get_string(ifindex, "OPER_STATE", ret);
+}
+
+int sd_network_link_get_required_family_for_online(int ifindex, char **ret) {
+ return network_link_get_string(ifindex, "REQUIRED_FAMILY_FOR_ONLINE", ret);
+}
+
+int sd_network_link_get_carrier_state(int ifindex, char **ret) {
+ return network_link_get_string(ifindex, "CARRIER_STATE", ret);
+}
+
+int sd_network_link_get_address_state(int ifindex, char **ret) {
+ return network_link_get_string(ifindex, "ADDRESS_STATE", ret);
+}
+
+int sd_network_link_get_ipv4_address_state(int ifindex, char **ret) {
+ return network_link_get_string(ifindex, "IPV4_ADDRESS_STATE", ret);
+}
+
+int sd_network_link_get_ipv6_address_state(int ifindex, char **ret) {
+ return network_link_get_string(ifindex, "IPV6_ADDRESS_STATE", ret);
+}
+
+int sd_network_link_get_online_state(int ifindex, char **ret) {
+ return network_link_get_string(ifindex, "ONLINE_STATE", ret);
+}
+
+int sd_network_link_get_dhcp6_client_iaid_string(int ifindex, char **ret) {
+ return network_link_get_string(ifindex, "DHCP6_CLIENT_IAID", ret);
+}
+
+int sd_network_link_get_dhcp6_client_duid_string(int ifindex, char **ret) {
+ return network_link_get_string(ifindex, "DHCP6_CLIENT_DUID", ret);
+}
+
+int sd_network_link_get_required_for_online(int ifindex) {
+ return network_link_get_boolean(ifindex, "REQUIRED_FOR_ONLINE");
+}
+
+int sd_network_link_get_required_operstate_for_online(int ifindex, char **ret) {
+ return network_link_get_string(ifindex, "REQUIRED_OPER_STATE_FOR_ONLINE", ret);
+}
+
+int sd_network_link_get_activation_policy(int ifindex, char **ret) {
+ return network_link_get_string(ifindex, "ACTIVATION_POLICY", ret);
+}
+
+int sd_network_link_get_llmnr(int ifindex, char **ret) {
+ return network_link_get_string(ifindex, "LLMNR", ret);
+}
+
+int sd_network_link_get_mdns(int ifindex, char **ret) {
+ return network_link_get_string(ifindex, "MDNS", ret);
+}
+
+int sd_network_link_get_dns_over_tls(int ifindex, char **ret) {
+ return network_link_get_string(ifindex, "DNS_OVER_TLS", ret);
+}
+
+int sd_network_link_get_dnssec(int ifindex, char **ret) {
+ return network_link_get_string(ifindex, "DNSSEC", ret);
+}
+
+int sd_network_link_get_dnssec_negative_trust_anchors(int ifindex, char ***ret) {
+ return network_link_get_strv(ifindex, "DNSSEC_NTA", ret);
+}
+
+int sd_network_link_get_dns(int ifindex, char ***ret) {
+ return network_link_get_strv(ifindex, "DNS", ret);
+}
+
+int sd_network_link_get_ntp(int ifindex, char ***ret) {
+ return network_link_get_strv(ifindex, "NTP", ret);
+}
+
+int sd_network_link_get_sip(int ifindex, char ***ret) {
+ return network_link_get_strv(ifindex, "SIP", ret);
+}
+
+int sd_network_link_get_captive_portal(int ifindex, char **ret) {
+ return network_link_get_string(ifindex, "CAPTIVE_PORTAL", ret);
+}
+
+int sd_network_link_get_search_domains(int ifindex, char ***ret) {
+ return network_link_get_strv(ifindex, "DOMAINS", ret);
+}
+
+int sd_network_link_get_route_domains(int ifindex, char ***ret) {
+ return network_link_get_strv(ifindex, "ROUTE_DOMAINS", ret);
+}
+
+int sd_network_link_get_dns_default_route(int ifindex) {
+ return network_link_get_boolean(ifindex, "DNS_DEFAULT_ROUTE");
+}
+
+static int network_link_get_ifindexes(int ifindex, const char *key, int **ret) {
+ _cleanup_free_ int *ifis = NULL;
+ _cleanup_free_ char *s = NULL;
+ size_t c = 0;
+ int r;
+
+ assert_return(ifindex > 0, -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ r = network_link_get_string(ifindex, key, &s);
+ if (r < 0)
+ return r;
+
+ for (const char *x = s;;) {
+ _cleanup_free_ char *word = NULL;
+
+ r = extract_first_word(&x, &word, NULL, 0);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ if (!GREEDY_REALLOC(ifis, c + 2))
+ return -ENOMEM;
+
+ r = ifis[c++] = parse_ifindex(word);
+ if (r < 0)
+ return r;
+ }
+
+ if (ifis)
+ ifis[c] = 0; /* Let's add a 0 ifindex to the end, to be nice */
+
+ *ret = TAKE_PTR(ifis);
+ return c;
+}
+
+int sd_network_link_get_carrier_bound_to(int ifindex, int **ret) {
+ return network_link_get_ifindexes(ifindex, "CARRIER_BOUND_TO", ret);
+}
+
+int sd_network_link_get_carrier_bound_by(int ifindex, int **ret) {
+ return network_link_get_ifindexes(ifindex, "CARRIER_BOUND_BY", ret);
+}
+
+int sd_network_link_get_stat(int ifindex, struct stat *ret) {
+ char path[STRLEN("/run/systemd/netif/links/") + DECIMAL_STR_MAX(ifindex)];
+ struct stat st;
+
+ assert_return(ifindex > 0, -EINVAL);
+
+ xsprintf(path, "/run/systemd/netif/links/%i", ifindex);
+
+ if (stat(path, &st) < 0)
+ return -errno;
+
+ if (ret)
+ *ret = st;
+
+ return 0;
+}
+
+static int MONITOR_TO_FD(sd_network_monitor *m) {
+ return (int) (unsigned long) m - 1;
+}
+
+static sd_network_monitor* FD_TO_MONITOR(int fd) {
+ return (sd_network_monitor*) (unsigned long) (fd + 1);
+}
+
+static int monitor_add_inotify_watch(int fd) {
+ int wd;
+
+ wd = inotify_add_watch(fd, "/run/systemd/netif/links/", IN_MOVED_TO|IN_DELETE);
+ if (wd >= 0)
+ return wd;
+ else if (errno != ENOENT)
+ return -errno;
+
+ wd = inotify_add_watch(fd, "/run/systemd/netif/", IN_CREATE|IN_ISDIR);
+ if (wd >= 0)
+ return wd;
+ else if (errno != ENOENT)
+ return -errno;
+
+ wd = inotify_add_watch(fd, "/run/systemd/", IN_CREATE|IN_ISDIR);
+ if (wd < 0)
+ return -errno;
+
+ return wd;
+}
+
+int sd_network_monitor_new(sd_network_monitor **m, const char *category) {
+ _cleanup_close_ int fd = -EBADF;
+ int k;
+ bool good = false;
+
+ assert_return(m, -EINVAL);
+
+ fd = inotify_init1(IN_NONBLOCK|IN_CLOEXEC);
+ if (fd < 0)
+ return -errno;
+
+ if (!category || streq(category, "links")) {
+ k = monitor_add_inotify_watch(fd);
+ if (k < 0)
+ return k;
+
+ good = true;
+ }
+
+ if (!good)
+ return -EINVAL;
+
+ *m = FD_TO_MONITOR(TAKE_FD(fd));
+ return 0;
+}
+
+sd_network_monitor* sd_network_monitor_unref(sd_network_monitor *m) {
+ if (m)
+ (void) close_nointr(MONITOR_TO_FD(m));
+
+ return NULL;
+}
+
+int sd_network_monitor_flush(sd_network_monitor *m) {
+ union inotify_event_buffer buffer;
+ ssize_t l;
+ int fd;
+
+ assert_return(m, -EINVAL);
+
+ fd = MONITOR_TO_FD(m);
+
+ l = read(fd, &buffer, sizeof(buffer));
+ if (l < 0) {
+ if (ERRNO_IS_TRANSIENT(errno))
+ return 0;
+
+ return -errno;
+ }
+
+ FOREACH_INOTIFY_EVENT(e, buffer, l) {
+ if (e->mask & IN_ISDIR) {
+ int wd;
+
+ wd = monitor_add_inotify_watch(fd);
+ if (wd < 0)
+ return wd;
+
+ if (wd != e->wd) {
+ if (inotify_rm_watch(fd, e->wd) < 0)
+ return -errno;
+ }
+ }
+ }
+
+ return 0;
+}
+
+int sd_network_monitor_get_fd(sd_network_monitor *m) {
+ assert_return(m, -EINVAL);
+
+ return MONITOR_TO_FD(m);
+}
+
+int sd_network_monitor_get_events(sd_network_monitor *m) {
+ assert_return(m, -EINVAL);
+
+ /* For now we will only return POLLIN here, since we don't
+ * need anything else ever for inotify. However, let's have
+ * this API to keep our options open should we later on need
+ * it. */
+ return POLLIN;
+}
+
+int sd_network_monitor_get_timeout(sd_network_monitor *m, uint64_t *ret_usec) {
+ assert_return(m, -EINVAL);
+ assert_return(ret_usec, -EINVAL);
+
+ /* For now we will only return UINT64_MAX, since we don't
+ * need any timeout. However, let's have this API to keep our
+ * options open should we later on need it. */
+ *ret_usec = UINT64_MAX;
+ return 0;
+}
diff --git a/src/libsystemd/sd-path/sd-path.c b/src/libsystemd/sd-path/sd-path.c
new file mode 100644
index 0000000..7290d1c
--- /dev/null
+++ b/src/libsystemd/sd-path/sd-path.c
@@ -0,0 +1,693 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "sd-path.h"
+
+#include "alloc-util.h"
+#include "architecture.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "fs-util.h"
+#include "nulstr-util.h"
+#include "path-lookup.h"
+#include "path-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "user-util.h"
+
+static int from_environment(const char *envname, const char *fallback, const char **ret) {
+ assert(ret);
+
+ if (envname) {
+ const char *e;
+
+ e = secure_getenv(envname);
+ if (e && path_is_absolute(e)) {
+ *ret = e;
+ return 0;
+ }
+ }
+
+ if (fallback) {
+ *ret = fallback;
+ return 0;
+ }
+
+ return -ENXIO;
+}
+
+static int from_home_dir(const char *envname, const char *suffix, char **buffer, const char **ret) {
+ _cleanup_free_ char *h = NULL;
+ int r;
+
+ assert(suffix);
+ assert(buffer);
+ assert(ret);
+
+ if (envname) {
+ const char *e = NULL;
+
+ e = secure_getenv(envname);
+ if (e && path_is_absolute(e)) {
+ *ret = e;
+ return 0;
+ }
+ }
+
+ r = get_home_dir(&h);
+ if (r < 0)
+ return r;
+
+ if (!path_extend(&h, suffix))
+ return -ENOMEM;
+
+ *buffer = h;
+ *ret = TAKE_PTR(h);
+ return 0;
+}
+
+static int from_user_dir(const char *field, char **buffer, const char **ret) {
+ _cleanup_fclose_ FILE *f = NULL;
+ _cleanup_free_ char *b = NULL;
+ _cleanup_free_ const char *fn = NULL;
+ const char *c = NULL;
+ int r;
+
+ assert(field);
+ assert(buffer);
+ assert(ret);
+
+ r = from_home_dir("XDG_CONFIG_HOME", ".config", &b, &c);
+ if (r < 0)
+ return r;
+
+ fn = path_join(c, "user-dirs.dirs");
+ if (!fn)
+ return -ENOMEM;
+
+ f = fopen(fn, "re");
+ if (!f) {
+ if (errno == ENOENT)
+ goto fallback;
+
+ return -errno;
+ }
+
+ /* This is an awful parse, but it follows closely what xdg-user-dirs does upstream */
+ for (;;) {
+ _cleanup_free_ char *line = NULL;
+ char *p, *e;
+
+ r = read_stripped_line(f, LONG_LINE_MAX, &line);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ p = startswith(line, field);
+ if (!p)
+ continue;
+
+ p += strspn(p, WHITESPACE);
+
+ if (*p != '=')
+ continue;
+ p++;
+
+ p += strspn(p, WHITESPACE);
+
+ if (*p != '"')
+ continue;
+ p++;
+
+ e = strrchr(p, '"');
+ if (!e)
+ continue;
+ *e = 0;
+
+ /* Three syntaxes permitted: relative to $HOME, $HOME itself, and absolute path */
+ if (startswith(p, "$HOME/")) {
+ _cleanup_free_ char *h = NULL;
+
+ r = get_home_dir(&h);
+ if (r < 0)
+ return r;
+
+ if (!path_extend(&h, p+5))
+ return -ENOMEM;
+
+ *buffer = h;
+ *ret = TAKE_PTR(h);
+ return 0;
+ } else if (streq(p, "$HOME")) {
+
+ r = get_home_dir(buffer);
+ if (r < 0)
+ return r;
+
+ *ret = *buffer;
+ return 0;
+ } else if (path_is_absolute(p)) {
+ char *copy;
+
+ copy = strdup(p);
+ if (!copy)
+ return -ENOMEM;
+
+ *buffer = copy;
+ *ret = copy;
+ return 0;
+ }
+ }
+
+fallback:
+ /* The desktop directory defaults to $HOME/Desktop, the others to $HOME */
+ if (streq(field, "XDG_DESKTOP_DIR")) {
+ _cleanup_free_ char *h = NULL;
+
+ r = get_home_dir(&h);
+ if (r < 0)
+ return r;
+
+ if (!path_extend(&h, "Desktop"))
+ return -ENOMEM;
+
+ *buffer = h;
+ *ret = TAKE_PTR(h);
+ } else {
+ r = get_home_dir(buffer);
+ if (r < 0)
+ return r;
+
+ *ret = *buffer;
+ }
+
+ return 0;
+}
+
+static int get_path(uint64_t type, char **buffer, const char **ret) {
+ int r;
+
+ assert(buffer);
+ assert(ret);
+
+ switch (type) {
+
+ case SD_PATH_TEMPORARY:
+ return tmp_dir(ret);
+
+ case SD_PATH_TEMPORARY_LARGE:
+ return var_tmp_dir(ret);
+
+ case SD_PATH_SYSTEM_BINARIES:
+ *ret = "/usr/bin";
+ return 0;
+
+ case SD_PATH_SYSTEM_INCLUDE:
+ *ret = "/usr/include";
+ return 0;
+
+ case SD_PATH_SYSTEM_LIBRARY_PRIVATE:
+ *ret = "/usr/lib";
+ return 0;
+
+ case SD_PATH_SYSTEM_LIBRARY_ARCH:
+ *ret = LIBDIR;
+ return 0;
+
+ case SD_PATH_SYSTEM_SHARED:
+ *ret = "/usr/share";
+ return 0;
+
+ case SD_PATH_SYSTEM_CONFIGURATION_FACTORY:
+ *ret = "/usr/share/factory/etc";
+ return 0;
+
+ case SD_PATH_SYSTEM_STATE_FACTORY:
+ *ret = "/usr/share/factory/var";
+ return 0;
+
+ case SD_PATH_SYSTEM_CONFIGURATION:
+ *ret = "/etc";
+ return 0;
+
+ case SD_PATH_SYSTEM_RUNTIME:
+ *ret = "/run";
+ return 0;
+
+ case SD_PATH_SYSTEM_RUNTIME_LOGS:
+ *ret = "/run/log";
+ return 0;
+
+ case SD_PATH_SYSTEM_STATE_PRIVATE:
+ *ret = "/var/lib";
+ return 0;
+
+ case SD_PATH_SYSTEM_STATE_LOGS:
+ *ret = "/var/log";
+ return 0;
+
+ case SD_PATH_SYSTEM_STATE_CACHE:
+ *ret = "/var/cache";
+ return 0;
+
+ case SD_PATH_SYSTEM_STATE_SPOOL:
+ *ret = "/var/spool";
+ return 0;
+
+ case SD_PATH_USER_BINARIES:
+ return from_home_dir(NULL, ".local/bin", buffer, ret);
+
+ case SD_PATH_USER_LIBRARY_PRIVATE:
+ return from_home_dir(NULL, ".local/lib", buffer, ret);
+
+ case SD_PATH_USER_LIBRARY_ARCH:
+ return from_home_dir(NULL, ".local/lib/" LIB_ARCH_TUPLE, buffer, ret);
+
+ case SD_PATH_USER_SHARED:
+ return from_home_dir("XDG_DATA_HOME", ".local/share", buffer, ret);
+
+ case SD_PATH_USER_CONFIGURATION:
+ return from_home_dir("XDG_CONFIG_HOME", ".config", buffer, ret);
+
+ case SD_PATH_USER_RUNTIME:
+ return from_environment("XDG_RUNTIME_DIR", NULL, ret);
+
+ case SD_PATH_USER_STATE_CACHE:
+ return from_home_dir("XDG_CACHE_HOME", ".cache", buffer, ret);
+
+ case SD_PATH_USER_STATE_PRIVATE:
+ return from_home_dir("XDG_STATE_HOME", ".local/state", buffer, ret);
+
+ case SD_PATH_USER:
+ r = get_home_dir(buffer);
+ if (r < 0)
+ return r;
+
+ *ret = *buffer;
+ return 0;
+
+ case SD_PATH_USER_DOCUMENTS:
+ return from_user_dir("XDG_DOCUMENTS_DIR", buffer, ret);
+
+ case SD_PATH_USER_MUSIC:
+ return from_user_dir("XDG_MUSIC_DIR", buffer, ret);
+
+ case SD_PATH_USER_PICTURES:
+ return from_user_dir("XDG_PICTURES_DIR", buffer, ret);
+
+ case SD_PATH_USER_VIDEOS:
+ return from_user_dir("XDG_VIDEOS_DIR", buffer, ret);
+
+ case SD_PATH_USER_DOWNLOAD:
+ return from_user_dir("XDG_DOWNLOAD_DIR", buffer, ret);
+
+ case SD_PATH_USER_PUBLIC:
+ return from_user_dir("XDG_PUBLICSHARE_DIR", buffer, ret);
+
+ case SD_PATH_USER_TEMPLATES:
+ return from_user_dir("XDG_TEMPLATES_DIR", buffer, ret);
+
+ case SD_PATH_USER_DESKTOP:
+ return from_user_dir("XDG_DESKTOP_DIR", buffer, ret);
+
+ case SD_PATH_SYSTEMD_UTIL:
+ *ret = PREFIX_NOSLASH "/lib/systemd";
+ return 0;
+
+ case SD_PATH_SYSTEMD_SYSTEM_UNIT:
+ *ret = SYSTEM_DATA_UNIT_DIR;
+ return 0;
+
+ case SD_PATH_SYSTEMD_SYSTEM_PRESET:
+ *ret = PREFIX_NOSLASH "/lib/systemd/system-preset";
+ return 0;
+
+ case SD_PATH_SYSTEMD_USER_UNIT:
+ *ret = USER_DATA_UNIT_DIR;
+ return 0;
+
+ case SD_PATH_SYSTEMD_USER_PRESET:
+ *ret = PREFIX_NOSLASH "/lib/systemd/user-preset";
+ return 0;
+
+ case SD_PATH_SYSTEMD_SYSTEM_CONF:
+ *ret = SYSTEM_CONFIG_UNIT_DIR;
+ return 0;
+
+ case SD_PATH_SYSTEMD_USER_CONF:
+ *ret = USER_CONFIG_UNIT_DIR;
+ return 0;
+
+ case SD_PATH_SYSTEMD_SYSTEM_GENERATOR:
+ *ret = SYSTEM_GENERATOR_DIR;
+ return 0;
+
+ case SD_PATH_SYSTEMD_USER_GENERATOR:
+ *ret = USER_GENERATOR_DIR;
+ return 0;
+
+ case SD_PATH_SYSTEMD_SLEEP:
+ *ret = PREFIX_NOSLASH "/lib/systemd/system-sleep";
+ return 0;
+
+ case SD_PATH_SYSTEMD_SHUTDOWN:
+ *ret = PREFIX_NOSLASH "/lib/systemd/system-shutdown";
+ return 0;
+
+ case SD_PATH_TMPFILES:
+ *ret = "/usr/lib/tmpfiles.d";
+ return 0;
+
+ case SD_PATH_SYSUSERS:
+ *ret = PREFIX_NOSLASH "/lib/sysusers.d";
+ return 0;
+
+ case SD_PATH_SYSCTL:
+ *ret = PREFIX_NOSLASH "/lib/sysctl.d";
+ return 0;
+
+ case SD_PATH_BINFMT:
+ *ret = PREFIX_NOSLASH "/lib/binfmt.d";
+ return 0;
+
+ case SD_PATH_MODULES_LOAD:
+ *ret = PREFIX_NOSLASH "/lib/modules-load.d";
+ return 0;
+
+ case SD_PATH_CATALOG:
+ *ret = "/usr/lib/systemd/catalog";
+ return 0;
+
+ case SD_PATH_SYSTEMD_SYSTEM_ENVIRONMENT_GENERATOR:
+ *ret = SYSTEM_ENV_GENERATOR_DIR;
+ return 0;
+
+ case SD_PATH_SYSTEMD_USER_ENVIRONMENT_GENERATOR:
+ *ret = USER_ENV_GENERATOR_DIR;
+ return 0;
+ }
+
+ return -EOPNOTSUPP;
+}
+
+static int get_path_alloc(uint64_t type, const char *suffix, char **path) {
+ _cleanup_free_ char *buffer = NULL;
+ char *buffer2 = NULL;
+ const char *ret;
+ int r;
+
+ assert(path);
+
+ r = get_path(type, &buffer, &ret);
+ if (r < 0)
+ return r;
+
+ if (suffix) {
+ suffix += strspn(suffix, "/");
+ buffer2 = path_join(ret, suffix);
+ if (!buffer2)
+ return -ENOMEM;
+ } else if (!buffer) {
+ buffer = strdup(ret);
+ if (!buffer)
+ return -ENOMEM;
+ }
+
+ *path = buffer2 ?: TAKE_PTR(buffer);
+ return 0;
+}
+
+_public_ int sd_path_lookup(uint64_t type, const char *suffix, char **path) {
+ int r;
+
+ assert_return(path, -EINVAL);
+
+ r = get_path_alloc(type, suffix, path);
+ if (r != -EOPNOTSUPP)
+ return r;
+
+ /* Fall back to sd_path_lookup_strv */
+ _cleanup_strv_free_ char **l = NULL;
+ char *buffer;
+
+ r = sd_path_lookup_strv(type, suffix, &l);
+ if (r < 0)
+ return r;
+
+ buffer = strv_join(l, ":");
+ if (!buffer)
+ return -ENOMEM;
+
+ *path = buffer;
+ return 0;
+}
+
+static int search_from_environment(
+ char ***list,
+ const char *env_home,
+ const char *home_suffix,
+ const char *env_search,
+ bool env_search_sufficient,
+ const char *first, ...) {
+
+ _cleanup_strv_free_ char **l = NULL;
+ const char *e;
+ char *h = NULL;
+ int r;
+
+ assert(list);
+
+ if (env_search) {
+ e = secure_getenv(env_search);
+ if (e) {
+ l = strv_split(e, ":");
+ if (!l)
+ return -ENOMEM;
+
+ if (env_search_sufficient) {
+ *list = TAKE_PTR(l);
+ return 0;
+ }
+ }
+ }
+
+ if (!l && first) {
+ va_list ap;
+
+ va_start(ap, first);
+ l = strv_new_ap(first, ap);
+ va_end(ap);
+
+ if (!l)
+ return -ENOMEM;
+ }
+
+ if (env_home) {
+ e = secure_getenv(env_home);
+ if (e && path_is_absolute(e)) {
+ h = strdup(e);
+ if (!h)
+ return -ENOMEM;
+ }
+ }
+
+ if (!h && home_suffix) {
+ e = secure_getenv("HOME");
+ if (e && path_is_absolute(e)) {
+ h = path_join(e, home_suffix);
+ if (!h)
+ return -ENOMEM;
+ }
+ }
+
+ if (h) {
+ r = strv_consume_prepend(&l, h);
+ if (r < 0)
+ return -ENOMEM;
+ }
+
+ *list = TAKE_PTR(l);
+ return 0;
+}
+
+#if HAVE_SPLIT_BIN
+# define ARRAY_SBIN_BIN(x) x "sbin", x "bin"
+#else
+# define ARRAY_SBIN_BIN(x) x "bin"
+#endif
+
+static int get_search(uint64_t type, char ***list) {
+ int r;
+
+ assert(list);
+
+ switch (type) {
+
+ case SD_PATH_SEARCH_BINARIES:
+ return search_from_environment(list,
+ NULL,
+ ".local/bin",
+ "PATH",
+ true,
+ ARRAY_SBIN_BIN("/usr/local/"),
+ ARRAY_SBIN_BIN("/usr/"),
+ NULL);
+
+ case SD_PATH_SEARCH_LIBRARY_PRIVATE:
+ return search_from_environment(list,
+ NULL,
+ ".local/lib",
+ NULL,
+ false,
+ "/usr/local/lib",
+ "/usr/lib",
+ NULL);
+
+ case SD_PATH_SEARCH_LIBRARY_ARCH:
+ return search_from_environment(list,
+ NULL,
+ ".local/lib/" LIB_ARCH_TUPLE,
+ "LD_LIBRARY_PATH",
+ true,
+ LIBDIR,
+ NULL);
+
+ case SD_PATH_SEARCH_SHARED:
+ return search_from_environment(list,
+ "XDG_DATA_HOME",
+ ".local/share",
+ "XDG_DATA_DIRS",
+ false,
+ "/usr/local/share",
+ "/usr/share",
+ NULL);
+
+ case SD_PATH_SEARCH_CONFIGURATION_FACTORY:
+ return search_from_environment(list,
+ NULL,
+ NULL,
+ NULL,
+ false,
+ "/usr/local/share/factory/etc",
+ "/usr/share/factory/etc",
+ NULL);
+
+ case SD_PATH_SEARCH_STATE_FACTORY:
+ return search_from_environment(list,
+ NULL,
+ NULL,
+ NULL,
+ false,
+ "/usr/local/share/factory/var",
+ "/usr/share/factory/var",
+ NULL);
+
+ case SD_PATH_SEARCH_CONFIGURATION:
+ return search_from_environment(list,
+ "XDG_CONFIG_HOME",
+ ".config",
+ "XDG_CONFIG_DIRS",
+ false,
+ "/etc",
+ NULL);
+
+ case SD_PATH_SEARCH_BINARIES_DEFAULT:
+ return strv_from_nulstr(list, DEFAULT_PATH_NULSTR);
+
+ case SD_PATH_SYSTEMD_SEARCH_SYSTEM_UNIT:
+ case SD_PATH_SYSTEMD_SEARCH_USER_UNIT: {
+ _cleanup_(lookup_paths_free) LookupPaths lp = {};
+ RuntimeScope scope = type == SD_PATH_SYSTEMD_SEARCH_SYSTEM_UNIT ?
+ RUNTIME_SCOPE_SYSTEM : RUNTIME_SCOPE_USER;
+
+ r = lookup_paths_init(&lp, scope, 0, NULL);
+ if (r < 0)
+ return r;
+
+ *list = TAKE_PTR(lp.search_path);
+ return 0;
+ }
+
+ case SD_PATH_SYSTEMD_SEARCH_SYSTEM_GENERATOR:
+ case SD_PATH_SYSTEMD_SEARCH_USER_GENERATOR: {
+ RuntimeScope scope = type == SD_PATH_SYSTEMD_SEARCH_SYSTEM_GENERATOR ?
+ RUNTIME_SCOPE_SYSTEM : RUNTIME_SCOPE_USER;
+ char **t;
+
+ t = generator_binary_paths(scope);
+ if (!t)
+ return -ENOMEM;
+
+ *list = t;
+ return 0;
+ }
+
+ case SD_PATH_SYSTEMD_SEARCH_SYSTEM_ENVIRONMENT_GENERATOR:
+ case SD_PATH_SYSTEMD_SEARCH_USER_ENVIRONMENT_GENERATOR: {
+ char **t;
+
+ t = env_generator_binary_paths(type == SD_PATH_SYSTEMD_SEARCH_SYSTEM_ENVIRONMENT_GENERATOR ?
+ RUNTIME_SCOPE_SYSTEM : RUNTIME_SCOPE_USER);
+ if (!t)
+ return -ENOMEM;
+
+ *list = t;
+ return 0;
+ }
+
+ case SD_PATH_SYSTEMD_SEARCH_NETWORK:
+ return strv_from_nulstr(list, NETWORK_DIRS_NULSTR);
+
+ }
+
+ return -EOPNOTSUPP;
+}
+
+_public_ int sd_path_lookup_strv(uint64_t type, const char *suffix, char ***paths) {
+ _cleanup_strv_free_ char **l = NULL, **n = NULL;
+ int r;
+
+ assert_return(paths, -EINVAL);
+
+ r = get_search(type, &l);
+ if (r == -EOPNOTSUPP) {
+ _cleanup_free_ char *t = NULL;
+
+ r = get_path_alloc(type, suffix, &t);
+ if (r < 0)
+ return r;
+
+ l = new(char*, 2);
+ if (!l)
+ return -ENOMEM;
+ l[0] = TAKE_PTR(t);
+ l[1] = NULL;
+
+ *paths = TAKE_PTR(l);
+ return 0;
+
+ } else if (r < 0)
+ return r;
+
+ if (!suffix) {
+ *paths = TAKE_PTR(l);
+ return 0;
+ }
+
+ n = new(char*, strv_length(l)+1);
+ if (!n)
+ return -ENOMEM;
+
+ char **j = n;
+ STRV_FOREACH(i, l) {
+ *j = path_join(*i, suffix);
+ if (!*j)
+ return -ENOMEM;
+
+ j++;
+ }
+ *j = NULL;
+
+ *paths = TAKE_PTR(n);
+ return 0;
+}
diff --git a/src/libsystemd/sd-resolve/resolve-private.h b/src/libsystemd/sd-resolve/resolve-private.h
new file mode 100644
index 0000000..7a339f7
--- /dev/null
+++ b/src/libsystemd/sd-resolve/resolve-private.h
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "sd-resolve.h"
+
+int resolve_getaddrinfo_with_destroy_callback(
+ sd_resolve *resolve, sd_resolve_query **q,
+ const char *node, const char *service, const struct addrinfo *hints,
+ sd_resolve_getaddrinfo_handler_t callback,
+ sd_resolve_destroy_t destroy_callback, void *userdata);
+int resolve_getnameinfo_with_destroy_callback(
+ sd_resolve *resolve, sd_resolve_query **q,
+ const struct sockaddr *sa, socklen_t salen, int flags, uint64_t get,
+ sd_resolve_getnameinfo_handler_t callback,
+ sd_resolve_destroy_t destroy_callback, void *userdata);
+
+#define resolve_getaddrinfo(resolve, ret_query, node, service, hints, callback, destroy_callback, userdata) \
+ ({ \
+ int (*_callback_)(sd_resolve_query*, int, const struct addrinfo*, typeof(userdata)) = callback; \
+ void (*_destroy_)(typeof(userdata)) = destroy_callback; \
+ resolve_getaddrinfo_with_destroy_callback( \
+ resolve, ret_query, \
+ node, service, hints, \
+ (sd_resolve_getaddrinfo_handler_t) _callback_, \
+ (sd_resolve_destroy_t) _destroy_, \
+ userdata); \
+ })
+
+#define resolve_getnameinfo(resolve, ret_query, sa, salen, flags, get, callback, destroy_callback, userdata) \
+ ({ \
+ int (*_callback_)(sd_resolve_query*, int, const char*, const char*, typeof(userdata)) = callback; \
+ void (*_destroy_)(typeof(userdata)) = destroy_callback; \
+ resolve_getaddrinfo_with_destroy_callback( \
+ resolve, ret_query, \
+ sa, salen, flags, get, \
+ (sd_resolve_getnameinfo_handler_t) _callback_, \
+ (sd_resolve_destroy_t) _destroy_, \
+ userdata); \
+ })
diff --git a/src/libsystemd/sd-resolve/sd-resolve.c b/src/libsystemd/sd-resolve/sd-resolve.c
new file mode 100644
index 0000000..2000f86
--- /dev/null
+++ b/src/libsystemd/sd-resolve/sd-resolve.c
@@ -0,0 +1,1296 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <poll.h>
+#include <pthread.h>
+#include <resolv.h>
+#include <signal.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/prctl.h>
+#include <unistd.h>
+
+#include "sd-resolve.h"
+
+#include "alloc-util.h"
+#include "dns-def.h"
+#include "errno-util.h"
+#include "fd-util.h"
+#include "io-util.h"
+#include "iovec-util.h"
+#include "list.h"
+#include "memory-util.h"
+#include "missing_syscall.h"
+#include "missing_threads.h"
+#include "process-util.h"
+#include "resolve-private.h"
+#include "socket-util.h"
+
+#define WORKERS_MIN 1U
+#define WORKERS_MAX 16U
+#define QUERIES_MAX 256U
+#define BUFSIZE 10240U
+
+typedef enum {
+ REQUEST_ADDRINFO,
+ RESPONSE_ADDRINFO,
+ REQUEST_NAMEINFO,
+ RESPONSE_NAMEINFO,
+ REQUEST_TERMINATE,
+ RESPONSE_DIED
+} QueryType;
+
+enum {
+ REQUEST_RECV_FD,
+ REQUEST_SEND_FD,
+ RESPONSE_RECV_FD,
+ RESPONSE_SEND_FD,
+ _FD_MAX
+};
+
+struct sd_resolve {
+ unsigned n_ref;
+
+ bool dead:1;
+ pid_t original_pid;
+
+ int fds[_FD_MAX];
+
+ pthread_t workers[WORKERS_MAX];
+ unsigned n_valid_workers;
+
+ unsigned current_id;
+ sd_resolve_query* query_array[QUERIES_MAX];
+ unsigned n_queries, n_done, n_outstanding;
+
+ sd_event_source *event_source;
+ sd_event *event;
+
+ sd_resolve_query *current;
+
+ sd_resolve **default_resolve_ptr;
+ pid_t tid;
+
+ LIST_HEAD(sd_resolve_query, queries);
+};
+
+struct sd_resolve_query {
+ unsigned n_ref;
+
+ sd_resolve *resolve;
+
+ QueryType type:4;
+ bool done:1;
+ bool floating:1;
+ unsigned id;
+
+ int ret;
+ int _errno;
+ int _h_errno;
+ struct addrinfo *addrinfo;
+ char *serv, *host;
+
+ union {
+ sd_resolve_getaddrinfo_handler_t getaddrinfo_handler;
+ sd_resolve_getnameinfo_handler_t getnameinfo_handler;
+ };
+
+ void *userdata;
+ sd_resolve_destroy_t destroy_callback;
+
+ LIST_FIELDS(sd_resolve_query, queries);
+};
+
+typedef struct RHeader {
+ QueryType type;
+ unsigned id;
+ size_t length;
+} RHeader;
+
+typedef struct AddrInfoRequest {
+ struct RHeader header;
+ bool hints_valid;
+ int ai_flags;
+ int ai_family;
+ int ai_socktype;
+ int ai_protocol;
+ size_t node_len, service_len;
+} AddrInfoRequest;
+
+typedef struct AddrInfoResponse {
+ struct RHeader header;
+ int ret;
+ int _errno;
+ int _h_errno;
+ /* followed by addrinfo_serialization[] */
+} AddrInfoResponse;
+
+typedef struct AddrInfoSerialization {
+ int ai_flags;
+ int ai_family;
+ int ai_socktype;
+ int ai_protocol;
+ size_t ai_addrlen;
+ size_t canonname_len;
+ /* Followed by ai_addr amd ai_canonname with variable lengths */
+} AddrInfoSerialization;
+
+typedef struct NameInfoRequest {
+ struct RHeader header;
+ int flags;
+ socklen_t sockaddr_len;
+ bool gethost:1, getserv:1;
+} NameInfoRequest;
+
+typedef struct NameInfoResponse {
+ struct RHeader header;
+ size_t hostlen, servlen;
+ int ret;
+ int _errno;
+ int _h_errno;
+} NameInfoResponse;
+
+typedef union Packet {
+ RHeader rheader;
+ AddrInfoRequest addrinfo_request;
+ AddrInfoResponse addrinfo_response;
+ NameInfoRequest nameinfo_request;
+ NameInfoResponse nameinfo_response;
+} Packet;
+
+static int getaddrinfo_done(sd_resolve_query* q);
+static int getnameinfo_done(sd_resolve_query *q);
+
+static void resolve_query_disconnect(sd_resolve_query *q);
+
+#define RESOLVE_DONT_DESTROY(resolve) \
+ _cleanup_(sd_resolve_unrefp) _unused_ sd_resolve *_dont_destroy_##resolve = sd_resolve_ref(resolve)
+
+static void query_assign_errno(sd_resolve_query *q, int ret, int error, int h_error) {
+ assert(q);
+
+ q->ret = ret;
+ q->_errno = abs(error);
+ q->_h_errno = h_error;
+}
+
+static int send_died(int out_fd) {
+ RHeader rh = {
+ .type = RESPONSE_DIED,
+ .length = sizeof(RHeader),
+ };
+
+ assert(out_fd >= 0);
+
+ if (send(out_fd, &rh, rh.length, MSG_NOSIGNAL) < 0)
+ return -errno;
+
+ return 0;
+}
+
+static void *serialize_addrinfo(void *p, const struct addrinfo *ai, size_t *length, size_t maxlength) {
+ AddrInfoSerialization s;
+ size_t cnl, l;
+
+ assert(p);
+ assert(ai);
+ assert(length);
+ assert(*length <= maxlength);
+
+ cnl = ai->ai_canonname ? strlen(ai->ai_canonname)+1 : 0;
+ l = sizeof(AddrInfoSerialization) + ai->ai_addrlen + cnl;
+
+ if (*length + l > maxlength)
+ return NULL;
+
+ s = (AddrInfoSerialization) {
+ .ai_flags = ai->ai_flags,
+ .ai_family = ai->ai_family,
+ .ai_socktype = ai->ai_socktype,
+ .ai_protocol = ai->ai_protocol,
+ .ai_addrlen = ai->ai_addrlen,
+ .canonname_len = cnl,
+ };
+
+ memcpy((uint8_t*) p, &s, sizeof(AddrInfoSerialization));
+ memcpy((uint8_t*) p + sizeof(AddrInfoSerialization), ai->ai_addr, ai->ai_addrlen);
+ memcpy_safe((char*) p + sizeof(AddrInfoSerialization) + ai->ai_addrlen,
+ ai->ai_canonname, cnl);
+
+ *length += l;
+ return (uint8_t*) p + l;
+}
+
+static int send_addrinfo_reply(
+ int out_fd,
+ unsigned id,
+ int ret,
+ struct addrinfo *ai,
+ int _errno,
+ int _h_errno) {
+
+ AddrInfoResponse resp = {};
+ union {
+ AddrInfoSerialization ais;
+ uint8_t space[BUFSIZE];
+ } buffer;
+ struct iovec iov[2];
+ struct msghdr mh;
+
+ assert(out_fd >= 0);
+
+ resp = (AddrInfoResponse) {
+ .header.type = RESPONSE_ADDRINFO,
+ .header.id = id,
+ .header.length = sizeof(AddrInfoResponse),
+ .ret = ret,
+ ._errno = _errno,
+ ._h_errno = _h_errno,
+ };
+
+ msan_unpoison(&resp, sizeof(resp));
+
+ if (ret == 0 && ai) {
+ void *p = &buffer;
+ struct addrinfo *k;
+
+ for (k = ai; k; k = k->ai_next) {
+ p = serialize_addrinfo(p, k, &resp.header.length, (uint8_t*) &buffer + BUFSIZE - (uint8_t*) p);
+ if (!p) {
+ freeaddrinfo(ai);
+ return -ENOBUFS;
+ }
+ }
+ }
+
+ if (ai)
+ freeaddrinfo(ai);
+
+ iov[0] = IOVEC_MAKE(&resp, sizeof(AddrInfoResponse));
+ iov[1] = IOVEC_MAKE(&buffer, resp.header.length - sizeof(AddrInfoResponse));
+
+ mh = (struct msghdr) {
+ .msg_iov = iov,
+ .msg_iovlen = ELEMENTSOF(iov)
+ };
+
+ if (sendmsg(out_fd, &mh, MSG_NOSIGNAL) < 0)
+ return -errno;
+
+ return 0;
+}
+
+static int send_nameinfo_reply(
+ int out_fd,
+ unsigned id,
+ int ret,
+ const char *host,
+ const char *serv,
+ int _errno,
+ int _h_errno) {
+
+ NameInfoResponse resp = {};
+ struct iovec iov[3];
+ struct msghdr mh;
+ size_t hl, sl;
+
+ assert(out_fd >= 0);
+
+ sl = serv ? strlen(serv)+1 : 0;
+ hl = host ? strlen(host)+1 : 0;
+
+ resp = (NameInfoResponse) {
+ .header.type = RESPONSE_NAMEINFO,
+ .header.id = id,
+ .header.length = sizeof(NameInfoResponse) + hl + sl,
+ .hostlen = hl,
+ .servlen = sl,
+ .ret = ret,
+ ._errno = _errno,
+ ._h_errno = _h_errno,
+ };
+
+ msan_unpoison(&resp, sizeof(resp));
+
+ iov[0] = IOVEC_MAKE(&resp, sizeof(NameInfoResponse));
+ iov[1] = IOVEC_MAKE((void*) host, hl);
+ iov[2] = IOVEC_MAKE((void*) serv, sl);
+
+ mh = (struct msghdr) {
+ .msg_iov = iov,
+ .msg_iovlen = ELEMENTSOF(iov)
+ };
+
+ if (sendmsg(out_fd, &mh, MSG_NOSIGNAL) < 0)
+ return -errno;
+
+ return 0;
+}
+
+static int handle_request(int out_fd, const Packet *packet, size_t length) {
+ const RHeader *req;
+
+ assert(out_fd >= 0);
+ assert(packet);
+
+ req = &packet->rheader;
+
+ assert_return(length >= sizeof(RHeader), -EIO);
+ assert_return(length == req->length, -EIO);
+
+ switch (req->type) {
+
+ case REQUEST_ADDRINFO: {
+ const AddrInfoRequest *ai_req = &packet->addrinfo_request;
+ struct addrinfo hints, *result = NULL;
+ const char *node, *service;
+ int ret;
+
+ assert_return(length >= sizeof(AddrInfoRequest), -EBADMSG);
+ assert_return(length == sizeof(AddrInfoRequest) + ai_req->node_len + ai_req->service_len, -EBADMSG);
+
+ hints = (struct addrinfo) {
+ .ai_flags = ai_req->ai_flags,
+ .ai_family = ai_req->ai_family,
+ .ai_socktype = ai_req->ai_socktype,
+ .ai_protocol = ai_req->ai_protocol,
+ };
+
+ msan_unpoison(&hints, sizeof(hints));
+
+ node = ai_req->node_len ? (const char*) ai_req + sizeof(AddrInfoRequest) : NULL;
+ service = ai_req->service_len ? (const char*) ai_req + sizeof(AddrInfoRequest) + ai_req->node_len : NULL;
+
+ ret = getaddrinfo(node, service,
+ ai_req->hints_valid ? &hints : NULL,
+ &result);
+
+ /* send_addrinfo_reply() frees result */
+ return send_addrinfo_reply(out_fd, req->id, ret, result, errno, h_errno);
+ }
+
+ case REQUEST_NAMEINFO: {
+ const NameInfoRequest *ni_req = &packet->nameinfo_request;
+ char hostbuf[NI_MAXHOST], servbuf[NI_MAXSERV];
+ union sockaddr_union sa;
+ int ret;
+
+ assert_return(length >= sizeof(NameInfoRequest), -EBADMSG);
+ assert_return(length == sizeof(NameInfoRequest) + ni_req->sockaddr_len, -EBADMSG);
+ assert_return(ni_req->sockaddr_len <= sizeof(sa), -EBADMSG);
+
+ memcpy(&sa, (const uint8_t *) ni_req + sizeof(NameInfoRequest), ni_req->sockaddr_len);
+
+ ret = getnameinfo(&sa.sa, ni_req->sockaddr_len,
+ ni_req->gethost ? hostbuf : NULL, ni_req->gethost ? sizeof(hostbuf) : 0,
+ ni_req->getserv ? servbuf : NULL, ni_req->getserv ? sizeof(servbuf) : 0,
+ ni_req->flags);
+
+ return send_nameinfo_reply(out_fd, req->id, ret,
+ ret == 0 && ni_req->gethost ? hostbuf : NULL,
+ ret == 0 && ni_req->getserv ? servbuf : NULL,
+ errno, h_errno);
+ }
+
+ case REQUEST_TERMINATE:
+ /* Quit */
+ return -ECONNRESET;
+
+ default:
+ assert_not_reached();
+ }
+
+ return 0;
+}
+
+static void* thread_worker(void *p) {
+ sd_resolve *resolve = p;
+
+ /* Assign a pretty name to this thread */
+ (void) pthread_setname_np(pthread_self(), "sd-resolve");
+
+ while (!resolve->dead) {
+ union {
+ Packet packet;
+ uint8_t space[BUFSIZE];
+ } buf;
+ ssize_t length;
+
+ length = recv(resolve->fds[REQUEST_RECV_FD], &buf, sizeof buf, 0);
+ if (length < 0) {
+ if (ERRNO_IS_TRANSIENT(errno))
+ continue;
+
+ break;
+ }
+ if (length == 0)
+ break;
+
+ if (handle_request(resolve->fds[RESPONSE_SEND_FD], &buf.packet, (size_t) length) < 0)
+ break;
+ }
+
+ send_died(resolve->fds[RESPONSE_SEND_FD]);
+
+ return NULL;
+}
+
+static int start_threads(sd_resolve *resolve, unsigned extra) {
+ sigset_t ss, saved_ss;
+ unsigned n;
+ int r, k;
+
+ assert_se(sigfillset(&ss) >= 0);
+
+ /* No signals in forked off threads please. We set the mask before forking, so that the threads never exist
+ * with a different mask than a fully blocked one */
+ r = pthread_sigmask(SIG_BLOCK, &ss, &saved_ss);
+ if (r > 0)
+ return -r;
+
+ n = resolve->n_outstanding + extra;
+ n = CLAMP(n, WORKERS_MIN, WORKERS_MAX);
+
+ while (resolve->n_valid_workers < n) {
+ r = pthread_create(&resolve->workers[resolve->n_valid_workers], NULL, thread_worker, resolve);
+ if (r > 0) {
+ r = -r;
+ goto finish;
+ }
+
+ resolve->n_valid_workers++;
+ }
+
+ r = 0;
+
+finish:
+ k = pthread_sigmask(SIG_SETMASK, &saved_ss, NULL);
+ if (k > 0 && r >= 0)
+ r = -k;
+
+ return r;
+}
+
+static bool resolve_pid_changed(sd_resolve *r) {
+ assert(r);
+
+ /* We don't support people creating a resolver and keeping it
+ * around after fork(). Let's complain. */
+
+ return r->original_pid != getpid_cached();
+}
+
+int sd_resolve_new(sd_resolve **ret) {
+ _cleanup_(sd_resolve_unrefp) sd_resolve *resolve = NULL;
+ int i;
+
+ assert_return(ret, -EINVAL);
+
+ resolve = new0(sd_resolve, 1);
+ if (!resolve)
+ return -ENOMEM;
+
+ resolve->n_ref = 1;
+ resolve->original_pid = getpid_cached();
+
+ for (i = 0; i < _FD_MAX; i++)
+ resolve->fds[i] = -EBADF;
+
+ if (socketpair(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0, resolve->fds + REQUEST_RECV_FD) < 0)
+ return -errno;
+
+ if (socketpair(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0, resolve->fds + RESPONSE_RECV_FD) < 0)
+ return -errno;
+
+ for (i = 0; i < _FD_MAX; i++)
+ resolve->fds[i] = fd_move_above_stdio(resolve->fds[i]);
+
+ (void) fd_inc_sndbuf(resolve->fds[REQUEST_SEND_FD], QUERIES_MAX * BUFSIZE);
+ (void) fd_increase_rxbuf(resolve->fds[REQUEST_RECV_FD], QUERIES_MAX * BUFSIZE);
+ (void) fd_inc_sndbuf(resolve->fds[RESPONSE_SEND_FD], QUERIES_MAX * BUFSIZE);
+ (void) fd_increase_rxbuf(resolve->fds[RESPONSE_RECV_FD], QUERIES_MAX * BUFSIZE);
+
+ (void) fd_nonblock(resolve->fds[RESPONSE_RECV_FD], true);
+
+ *ret = TAKE_PTR(resolve);
+ return 0;
+}
+
+int sd_resolve_default(sd_resolve **ret) {
+ static thread_local sd_resolve *default_resolve = NULL;
+ sd_resolve *e = NULL;
+ int r;
+
+ if (!ret)
+ return !!default_resolve;
+
+ if (default_resolve) {
+ *ret = sd_resolve_ref(default_resolve);
+ return 0;
+ }
+
+ r = sd_resolve_new(&e);
+ if (r < 0)
+ return r;
+
+ e->default_resolve_ptr = &default_resolve;
+ e->tid = gettid();
+ default_resolve = e;
+
+ *ret = e;
+ return 1;
+}
+
+int sd_resolve_get_tid(sd_resolve *resolve, pid_t *tid) {
+ assert_return(resolve, -EINVAL);
+ assert_return(tid, -EINVAL);
+ assert_return(!resolve_pid_changed(resolve), -ECHILD);
+
+ if (resolve->tid != 0) {
+ *tid = resolve->tid;
+ return 0;
+ }
+
+ if (resolve->event)
+ return sd_event_get_tid(resolve->event, tid);
+
+ return -ENXIO;
+}
+
+static sd_resolve *resolve_free(sd_resolve *resolve) {
+ PROTECT_ERRNO;
+ sd_resolve_query *q;
+ unsigned i;
+
+ assert(resolve);
+
+ while ((q = resolve->queries)) {
+ assert(q->floating);
+ resolve_query_disconnect(q);
+ sd_resolve_query_unref(q);
+ }
+
+ if (resolve->default_resolve_ptr)
+ *(resolve->default_resolve_ptr) = NULL;
+
+ resolve->dead = true;
+
+ sd_resolve_detach_event(resolve);
+
+ if (resolve->fds[REQUEST_SEND_FD] >= 0) {
+
+ RHeader req = {
+ .type = REQUEST_TERMINATE,
+ .length = sizeof req,
+ };
+
+ /* Send one termination packet for each worker */
+ for (i = 0; i < resolve->n_valid_workers; i++)
+ (void) send(resolve->fds[REQUEST_SEND_FD], &req, req.length, MSG_NOSIGNAL);
+ }
+
+ /* Now terminate them and wait until they are gone.
+ If we get an error than most likely the thread already exited. */
+ for (i = 0; i < resolve->n_valid_workers; i++)
+ (void) pthread_join(resolve->workers[i], NULL);
+
+ /* Close all communication channels */
+ close_many(resolve->fds, _FD_MAX);
+
+ return mfree(resolve);
+}
+
+DEFINE_TRIVIAL_REF_UNREF_FUNC(sd_resolve, sd_resolve, resolve_free);
+
+int sd_resolve_get_fd(sd_resolve *resolve) {
+ assert_return(resolve, -EINVAL);
+ assert_return(!resolve_pid_changed(resolve), -ECHILD);
+
+ return resolve->fds[RESPONSE_RECV_FD];
+}
+
+int sd_resolve_get_events(sd_resolve *resolve) {
+ assert_return(resolve, -EINVAL);
+ assert_return(!resolve_pid_changed(resolve), -ECHILD);
+
+ return resolve->n_queries > resolve->n_done ? POLLIN : 0;
+}
+
+int sd_resolve_get_timeout(sd_resolve *resolve, uint64_t *usec) {
+ assert_return(resolve, -EINVAL);
+ assert_return(usec, -EINVAL);
+ assert_return(!resolve_pid_changed(resolve), -ECHILD);
+
+ *usec = UINT64_MAX;
+ return 0;
+}
+
+static sd_resolve_query *lookup_query(sd_resolve *resolve, unsigned id) {
+ sd_resolve_query *q;
+
+ assert(resolve);
+
+ q = resolve->query_array[id % QUERIES_MAX];
+ if (q)
+ if (q->id == id)
+ return q;
+
+ return NULL;
+}
+
+static int complete_query(sd_resolve *resolve, sd_resolve_query *q) {
+ int r;
+
+ assert(q);
+ assert(!q->done);
+ assert(q->resolve == resolve);
+
+ q->done = true;
+ resolve->n_done++;
+
+ resolve->current = sd_resolve_query_ref(q);
+
+ switch (q->type) {
+
+ case REQUEST_ADDRINFO:
+ r = getaddrinfo_done(q);
+ break;
+
+ case REQUEST_NAMEINFO:
+ r = getnameinfo_done(q);
+ break;
+
+ default:
+ assert_not_reached();
+ }
+
+ resolve->current = NULL;
+
+ if (q->floating) {
+ resolve_query_disconnect(q);
+ sd_resolve_query_unref(q);
+ }
+
+ sd_resolve_query_unref(q);
+
+ return r;
+}
+
+static int unserialize_addrinfo(const void **p, size_t *length, struct addrinfo **ret_ai) {
+ AddrInfoSerialization s;
+ struct addrinfo *ai;
+ size_t l;
+
+ assert(p);
+ assert(*p);
+ assert(ret_ai);
+ assert(length);
+
+ if (*length < sizeof(AddrInfoSerialization))
+ return -EBADMSG;
+
+ memcpy(&s, *p, sizeof(s));
+
+ l = sizeof(AddrInfoSerialization) + s.ai_addrlen + s.canonname_len;
+ if (*length < l)
+ return -EBADMSG;
+
+ ai = new(struct addrinfo, 1);
+ if (!ai)
+ return -ENOMEM;
+
+ *ai = (struct addrinfo) {
+ .ai_flags = s.ai_flags,
+ .ai_family = s.ai_family,
+ .ai_socktype = s.ai_socktype,
+ .ai_protocol = s.ai_protocol,
+ .ai_addrlen = s.ai_addrlen,
+ };
+
+ if (s.ai_addrlen > 0) {
+ ai->ai_addr = memdup((const uint8_t*) *p + sizeof(AddrInfoSerialization), s.ai_addrlen);
+ if (!ai->ai_addr) {
+ free(ai);
+ return -ENOMEM;
+ }
+ }
+
+ if (s.canonname_len > 0) {
+ ai->ai_canonname = memdup((const uint8_t*) *p + sizeof(AddrInfoSerialization) + s.ai_addrlen, s.canonname_len);
+ if (!ai->ai_canonname) {
+ free(ai->ai_addr);
+ free(ai);
+ return -ENOMEM;
+ }
+ }
+
+ *length -= l;
+ *ret_ai = ai;
+ *p = ((const uint8_t*) *p) + l;
+
+ return 0;
+}
+
+static int handle_response(sd_resolve *resolve, const Packet *packet, size_t length) {
+ const RHeader *resp;
+ sd_resolve_query *q;
+ int r;
+
+ assert(resolve);
+ assert(packet);
+
+ resp = &packet->rheader;
+ assert_return(length >= sizeof(RHeader), -EIO);
+ assert_return(length == resp->length, -EIO);
+
+ if (resp->type == RESPONSE_DIED) {
+ resolve->dead = true;
+ return 0;
+ }
+
+ assert(resolve->n_outstanding > 0);
+ resolve->n_outstanding--;
+
+ q = lookup_query(resolve, resp->id);
+ if (!q)
+ return 0;
+
+ switch (resp->type) {
+
+ case RESPONSE_ADDRINFO: {
+ const AddrInfoResponse *ai_resp = &packet->addrinfo_response;
+ const void *p;
+ size_t l;
+ struct addrinfo *prev = NULL;
+
+ assert_return(length >= sizeof(AddrInfoResponse), -EBADMSG);
+ assert_return(q->type == REQUEST_ADDRINFO, -EBADMSG);
+
+ query_assign_errno(q, ai_resp->ret, ai_resp->_errno, ai_resp->_h_errno);
+
+ l = length - sizeof(AddrInfoResponse);
+ p = (const uint8_t*) resp + sizeof(AddrInfoResponse);
+
+ while (l > 0 && p) {
+ struct addrinfo *ai = NULL;
+
+ r = unserialize_addrinfo(&p, &l, &ai);
+ if (r < 0) {
+ query_assign_errno(q, EAI_SYSTEM, r, 0);
+ freeaddrinfo(q->addrinfo);
+ q->addrinfo = NULL;
+ break;
+ }
+
+ if (prev)
+ prev->ai_next = ai;
+ else
+ q->addrinfo = ai;
+
+ prev = ai;
+ }
+
+ return complete_query(resolve, q);
+ }
+
+ case RESPONSE_NAMEINFO: {
+ const NameInfoResponse *ni_resp = &packet->nameinfo_response;
+
+ assert_return(length >= sizeof(NameInfoResponse), -EBADMSG);
+ assert_return(q->type == REQUEST_NAMEINFO, -EBADMSG);
+
+ if (ni_resp->hostlen > DNS_HOSTNAME_MAX ||
+ ni_resp->servlen > DNS_HOSTNAME_MAX ||
+ sizeof(NameInfoResponse) + ni_resp->hostlen + ni_resp->servlen > length)
+ query_assign_errno(q, EAI_SYSTEM, EIO, 0);
+ else {
+ query_assign_errno(q, ni_resp->ret, ni_resp->_errno, ni_resp->_h_errno);
+
+ if (ni_resp->hostlen > 0) {
+ q->host = strndup((const char*) ni_resp + sizeof(NameInfoResponse),
+ ni_resp->hostlen-1);
+ if (!q->host)
+ query_assign_errno(q, EAI_MEMORY, ENOMEM, 0);
+ }
+
+ if (ni_resp->servlen > 0) {
+ q->serv = strndup((const char*) ni_resp + sizeof(NameInfoResponse) + ni_resp->hostlen,
+ ni_resp->servlen-1);
+ if (!q->serv)
+ query_assign_errno(q, EAI_MEMORY, ENOMEM, 0);
+ }
+ }
+
+ return complete_query(resolve, q);
+ }
+
+ default:
+ return 0;
+ }
+}
+
+int sd_resolve_process(sd_resolve *resolve) {
+ RESOLVE_DONT_DESTROY(resolve);
+
+ union {
+ Packet packet;
+ uint8_t space[BUFSIZE];
+ } buf;
+ ssize_t l;
+ int r;
+
+ assert_return(resolve, -EINVAL);
+ assert_return(!resolve_pid_changed(resolve), -ECHILD);
+
+ /* We don't allow recursively invoking sd_resolve_process(). */
+ assert_return(!resolve->current, -EBUSY);
+
+ l = recv(resolve->fds[RESPONSE_RECV_FD], &buf, sizeof buf, 0);
+ if (l < 0) {
+ if (ERRNO_IS_TRANSIENT(errno))
+ return 0;
+
+ return -errno;
+ }
+ if (l == 0)
+ return -ECONNREFUSED;
+
+ r = handle_response(resolve, &buf.packet, (size_t) l);
+ if (r < 0)
+ return r;
+
+ return 1;
+}
+
+int sd_resolve_wait(sd_resolve *resolve, uint64_t timeout_usec) {
+ int r;
+
+ assert_return(resolve, -EINVAL);
+ assert_return(!resolve_pid_changed(resolve), -ECHILD);
+
+ if (resolve->n_done >= resolve->n_queries)
+ return 0;
+
+ do {
+ r = fd_wait_for_event(resolve->fds[RESPONSE_RECV_FD], POLLIN, timeout_usec);
+ } while (r == -EINTR);
+
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -ETIMEDOUT;
+
+ return sd_resolve_process(resolve);
+}
+
+static int alloc_query(sd_resolve *resolve, bool floating, sd_resolve_query **_q) {
+ sd_resolve_query *q;
+ int r;
+
+ assert(resolve);
+ assert(_q);
+
+ if (resolve->n_queries >= QUERIES_MAX)
+ return -ENOBUFS;
+
+ r = start_threads(resolve, 1);
+ if (r < 0)
+ return r;
+
+ while (resolve->query_array[resolve->current_id % QUERIES_MAX])
+ resolve->current_id++;
+
+ q = resolve->query_array[resolve->current_id % QUERIES_MAX] = new0(sd_resolve_query, 1);
+ if (!q)
+ return -ENOMEM;
+
+ q->n_ref = 1;
+ q->resolve = resolve;
+ q->floating = floating;
+ q->id = resolve->current_id++;
+
+ if (!floating)
+ sd_resolve_ref(resolve);
+
+ LIST_PREPEND(queries, resolve->queries, q);
+ resolve->n_queries++;
+
+ *_q = q;
+ return 0;
+}
+
+int resolve_getaddrinfo_with_destroy_callback(
+ sd_resolve *resolve,
+ sd_resolve_query **ret_query,
+ const char *node, const char *service,
+ const struct addrinfo *hints,
+ sd_resolve_getaddrinfo_handler_t callback,
+ sd_resolve_destroy_t destroy_callback,
+ void *userdata) {
+
+ _cleanup_(sd_resolve_query_unrefp) sd_resolve_query *q = NULL;
+ size_t node_len, service_len;
+ AddrInfoRequest req = {};
+ struct iovec iov[3];
+ struct msghdr mh = {};
+ int r;
+
+ assert_return(resolve, -EINVAL);
+ assert_return(node || service, -EINVAL);
+ assert_return(callback, -EINVAL);
+ assert_return(!resolve_pid_changed(resolve), -ECHILD);
+
+ r = alloc_query(resolve, !ret_query, &q);
+ if (r < 0)
+ return r;
+
+ q->type = REQUEST_ADDRINFO;
+ q->getaddrinfo_handler = callback;
+ q->userdata = userdata;
+
+ node_len = node ? strlen(node) + 1 : 0;
+ service_len = service ? strlen(service) + 1 : 0;
+
+ req = (AddrInfoRequest) {
+ .node_len = node_len,
+ .service_len = service_len,
+
+ .header.id = q->id,
+ .header.type = REQUEST_ADDRINFO,
+ .header.length = sizeof(AddrInfoRequest) + node_len + service_len,
+
+ .hints_valid = hints,
+ .ai_flags = hints ? hints->ai_flags : 0,
+ .ai_family = hints ? hints->ai_family : 0,
+ .ai_socktype = hints ? hints->ai_socktype : 0,
+ .ai_protocol = hints ? hints->ai_protocol : 0,
+ };
+
+ msan_unpoison(&req, sizeof(req));
+
+ iov[mh.msg_iovlen++] = IOVEC_MAKE(&req, sizeof(AddrInfoRequest));
+ if (node)
+ iov[mh.msg_iovlen++] = IOVEC_MAKE((void*) node, req.node_len);
+ if (service)
+ iov[mh.msg_iovlen++] = IOVEC_MAKE((void*) service, req.service_len);
+ mh.msg_iov = iov;
+
+ if (sendmsg(resolve->fds[REQUEST_SEND_FD], &mh, MSG_NOSIGNAL) < 0)
+ return -errno;
+
+ resolve->n_outstanding++;
+ q->destroy_callback = destroy_callback;
+
+ if (ret_query)
+ *ret_query = q;
+
+ TAKE_PTR(q);
+
+ return 0;
+}
+
+int sd_resolve_getaddrinfo(
+ sd_resolve *resolve,
+ sd_resolve_query **ret_query,
+ const char *node, const char *service,
+ const struct addrinfo *hints,
+ sd_resolve_getaddrinfo_handler_t callback,
+ void *userdata) {
+
+ return resolve_getaddrinfo_with_destroy_callback(resolve, ret_query, node, service, hints, callback, NULL, userdata);
+}
+
+static int getaddrinfo_done(sd_resolve_query* q) {
+ assert(q);
+ assert(q->done);
+ assert(q->getaddrinfo_handler);
+
+ errno = q->_errno;
+ h_errno = q->_h_errno;
+
+ return q->getaddrinfo_handler(q, q->ret, q->addrinfo, q->userdata);
+}
+
+int resolve_getnameinfo_with_destroy_callback(
+ sd_resolve *resolve,
+ sd_resolve_query **ret_query,
+ const struct sockaddr *sa, socklen_t salen,
+ int flags,
+ uint64_t get,
+ sd_resolve_getnameinfo_handler_t callback,
+ sd_resolve_destroy_t destroy_callback,
+ void *userdata) {
+
+ _cleanup_(sd_resolve_query_unrefp) sd_resolve_query *q = NULL;
+ NameInfoRequest req = {};
+ struct iovec iov[2];
+ struct msghdr mh;
+ int r;
+
+ assert_return(resolve, -EINVAL);
+ assert_return(sa, -EINVAL);
+ assert_return(salen >= sizeof(struct sockaddr), -EINVAL);
+ assert_return(salen <= sizeof(union sockaddr_union), -EINVAL);
+ assert_return((get & ~SD_RESOLVE_GET_BOTH) == 0, -EINVAL);
+ assert_return(callback, -EINVAL);
+ assert_return(!resolve_pid_changed(resolve), -ECHILD);
+
+ r = alloc_query(resolve, !ret_query, &q);
+ if (r < 0)
+ return r;
+
+ q->type = REQUEST_NAMEINFO;
+ q->getnameinfo_handler = callback;
+ q->userdata = userdata;
+
+ req = (NameInfoRequest) {
+ .header.id = q->id,
+ .header.type = REQUEST_NAMEINFO,
+ .header.length = sizeof(NameInfoRequest) + salen,
+
+ .flags = flags,
+ .sockaddr_len = salen,
+ .gethost = !!(get & SD_RESOLVE_GET_HOST),
+ .getserv = !!(get & SD_RESOLVE_GET_SERVICE),
+ };
+
+ msan_unpoison(&req, sizeof(req));
+
+ iov[0] = IOVEC_MAKE(&req, sizeof(NameInfoRequest));
+ iov[1] = IOVEC_MAKE((void*) sa, salen);
+
+ mh = (struct msghdr) {
+ .msg_iov = iov,
+ .msg_iovlen = ELEMENTSOF(iov)
+ };
+
+ if (sendmsg(resolve->fds[REQUEST_SEND_FD], &mh, MSG_NOSIGNAL) < 0)
+ return -errno;
+
+ resolve->n_outstanding++;
+ q->destroy_callback = destroy_callback;
+
+ if (ret_query)
+ *ret_query = q;
+
+ TAKE_PTR(q);
+
+ return 0;
+}
+
+int sd_resolve_getnameinfo(
+ sd_resolve *resolve,
+ sd_resolve_query **ret_query,
+ const struct sockaddr *sa, socklen_t salen,
+ int flags,
+ uint64_t get,
+ sd_resolve_getnameinfo_handler_t callback,
+ void *userdata) {
+
+ return resolve_getnameinfo_with_destroy_callback(resolve, ret_query, sa, salen, flags, get, callback, NULL, userdata);
+}
+
+static int getnameinfo_done(sd_resolve_query *q) {
+
+ assert(q);
+ assert(q->done);
+ assert(q->getnameinfo_handler);
+
+ errno = q->_errno;
+ h_errno = q->_h_errno;
+
+ return q->getnameinfo_handler(q, q->ret, q->host, q->serv, q->userdata);
+}
+
+static void resolve_freeaddrinfo(struct addrinfo *ai) {
+ while (ai) {
+ struct addrinfo *next = ai->ai_next;
+
+ free(ai->ai_addr);
+ free(ai->ai_canonname);
+ free_and_replace(ai, next);
+ }
+}
+
+static void resolve_query_disconnect(sd_resolve_query *q) {
+ sd_resolve *resolve;
+ unsigned i;
+
+ assert(q);
+
+ if (!q->resolve)
+ return;
+
+ resolve = q->resolve;
+ assert(resolve->n_queries > 0);
+
+ if (q->done) {
+ assert(resolve->n_done > 0);
+ resolve->n_done--;
+ }
+
+ i = q->id % QUERIES_MAX;
+ assert(resolve->query_array[i] == q);
+ resolve->query_array[i] = NULL;
+ LIST_REMOVE(queries, resolve->queries, q);
+ resolve->n_queries--;
+
+ q->resolve = NULL;
+ if (!q->floating)
+ sd_resolve_unref(resolve);
+}
+
+static sd_resolve_query *resolve_query_free(sd_resolve_query *q) {
+ assert(q);
+
+ resolve_query_disconnect(q);
+
+ if (q->destroy_callback)
+ q->destroy_callback(q->userdata);
+
+ resolve_freeaddrinfo(q->addrinfo);
+ free(q->host);
+ free(q->serv);
+
+ return mfree(q);
+}
+
+DEFINE_TRIVIAL_REF_UNREF_FUNC(sd_resolve_query, sd_resolve_query, resolve_query_free);
+
+int sd_resolve_query_is_done(sd_resolve_query *q) {
+ assert_return(q, -EINVAL);
+ assert_return(!resolve_pid_changed(q->resolve), -ECHILD);
+
+ return q->done;
+}
+
+void* sd_resolve_query_set_userdata(sd_resolve_query *q, void *userdata) {
+ void *ret;
+
+ assert_return(q, NULL);
+ assert_return(!resolve_pid_changed(q->resolve), NULL);
+
+ ret = q->userdata;
+ q->userdata = userdata;
+
+ return ret;
+}
+
+void* sd_resolve_query_get_userdata(sd_resolve_query *q) {
+ assert_return(q, NULL);
+ assert_return(!resolve_pid_changed(q->resolve), NULL);
+
+ return q->userdata;
+}
+
+sd_resolve *sd_resolve_query_get_resolve(sd_resolve_query *q) {
+ assert_return(q, NULL);
+ assert_return(!resolve_pid_changed(q->resolve), NULL);
+
+ return q->resolve;
+}
+
+int sd_resolve_query_get_destroy_callback(sd_resolve_query *q, sd_resolve_destroy_t *destroy_callback) {
+ assert_return(q, -EINVAL);
+
+ if (destroy_callback)
+ *destroy_callback = q->destroy_callback;
+
+ return !!q->destroy_callback;
+}
+
+int sd_resolve_query_set_destroy_callback(sd_resolve_query *q, sd_resolve_destroy_t destroy_callback) {
+ assert_return(q, -EINVAL);
+
+ q->destroy_callback = destroy_callback;
+ return 0;
+}
+
+int sd_resolve_query_get_floating(sd_resolve_query *q) {
+ assert_return(q, -EINVAL);
+
+ return q->floating;
+}
+
+int sd_resolve_query_set_floating(sd_resolve_query *q, int b) {
+ assert_return(q, -EINVAL);
+
+ if (q->floating == !!b)
+ return 0;
+
+ if (!q->resolve) /* Already disconnected */
+ return -ESTALE;
+
+ q->floating = b;
+
+ if (b) {
+ sd_resolve_query_ref(q);
+ sd_resolve_unref(q->resolve);
+ } else {
+ sd_resolve_ref(q->resolve);
+ sd_resolve_query_unref(q);
+ }
+
+ return 1;
+}
+
+static int io_callback(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
+ sd_resolve *resolve = ASSERT_PTR(userdata);
+ int r;
+
+ r = sd_resolve_process(resolve);
+ if (r < 0)
+ return r;
+
+ return 1;
+}
+
+int sd_resolve_attach_event(sd_resolve *resolve, sd_event *event, int64_t priority) {
+ int r;
+
+ assert_return(resolve, -EINVAL);
+ assert_return(!resolve->event, -EBUSY);
+
+ assert(!resolve->event_source);
+
+ if (event)
+ resolve->event = sd_event_ref(event);
+ else {
+ r = sd_event_default(&resolve->event);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_event_add_io(resolve->event, &resolve->event_source, resolve->fds[RESPONSE_RECV_FD], POLLIN, io_callback, resolve);
+ if (r < 0)
+ goto fail;
+
+ r = sd_event_source_set_priority(resolve->event_source, priority);
+ if (r < 0)
+ goto fail;
+
+ return 0;
+
+fail:
+ sd_resolve_detach_event(resolve);
+ return r;
+}
+
+ int sd_resolve_detach_event(sd_resolve *resolve) {
+ assert_return(resolve, -EINVAL);
+
+ if (!resolve->event)
+ return 0;
+
+ resolve->event_source = sd_event_source_disable_unref(resolve->event_source);
+ resolve->event = sd_event_unref(resolve->event);
+ return 1;
+}
+
+sd_event *sd_resolve_get_event(sd_resolve *resolve) {
+ assert_return(resolve, NULL);
+
+ return resolve->event;
+}
diff --git a/src/libsystemd/sd-resolve/test-resolve.c b/src/libsystemd/sd-resolve/test-resolve.c
new file mode 100644
index 0000000..829e13e
--- /dev/null
+++ b/src/libsystemd/sd-resolve/test-resolve.c
@@ -0,0 +1,111 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <arpa/inet.h>
+#include <errno.h>
+#include <net/if_arp.h>
+#include <netinet/in.h>
+#include <resolv.h>
+#include <stdio.h>
+
+#include "sd-resolve.h"
+
+#include "alloc-util.h"
+#include "macro.h"
+#include "socket-util.h"
+#include "string-util.h"
+#include "tests.h"
+#include "time-util.h"
+
+#define TEST_TIMEOUT_USEC (20*USEC_PER_SEC)
+
+static int getaddrinfo_handler(sd_resolve_query *q, int ret, const struct addrinfo *ai, void *userdata) {
+ const struct addrinfo *i;
+
+ assert_se(q);
+
+ if (ret != 0) {
+ log_error("getaddrinfo error: %s %i", gai_strerror(ret), ret);
+ return 0;
+ }
+
+ for (i = ai; i; i = i->ai_next) {
+ _cleanup_free_ char *addr = NULL;
+
+ assert_se(sockaddr_pretty(i->ai_addr, i->ai_addrlen, false, true, &addr) == 0);
+ puts(addr);
+ }
+
+ printf("canonical name: %s\n", strna(ai->ai_canonname));
+
+ return 0;
+}
+
+static int getnameinfo_handler(sd_resolve_query *q, int ret, const char *host, const char *serv, void *userdata) {
+ assert_se(q);
+
+ if (ret != 0) {
+ log_error("getnameinfo error: %s %i", gai_strerror(ret), ret);
+ return 0;
+ }
+
+ printf("Host: %s — Serv: %s\n", strna(host), strna(serv));
+ return 0;
+}
+
+int main(int argc, char *argv[]) {
+ _cleanup_(sd_resolve_query_unrefp) sd_resolve_query *q1 = NULL, *q2 = NULL;
+ _cleanup_(sd_resolve_unrefp) sd_resolve *resolve = NULL;
+ int r;
+
+ struct addrinfo hints = {
+ .ai_family = AF_UNSPEC,
+ .ai_socktype = SOCK_STREAM,
+ .ai_flags = AI_CANONNAME,
+ };
+
+ union sockaddr_union sa = {
+ .in.sin_family = AF_INET,
+ .in.sin_port = htobe16(80),
+ };
+
+ test_setup_logging(LOG_DEBUG);
+
+ assert_se(sd_resolve_default(&resolve) >= 0);
+
+ /* Test a floating resolver query */
+ r = sd_resolve_getaddrinfo(resolve, NULL, "redhat.com", "http", NULL, getaddrinfo_handler, NULL);
+ if (r < 0)
+ log_error_errno(r, "sd_resolve_getaddrinfo(): %m");
+
+ /* Make a name -> address query */
+ r = sd_resolve_getaddrinfo(resolve, &q1, argc >= 2 ? argv[1] : "www.heise.de", NULL, &hints, getaddrinfo_handler, NULL);
+ if (r < 0)
+ log_error_errno(r, "sd_resolve_getaddrinfo(): %m");
+
+ /* Make an address -> name query */
+ sa.in.sin_addr.s_addr = inet_addr(argc >= 3 ? argv[2] : "193.99.144.71");
+ r = sd_resolve_getnameinfo(resolve, &q2, &sa.sa, SOCKADDR_LEN(sa), 0, SD_RESOLVE_GET_BOTH, getnameinfo_handler, NULL);
+ if (r < 0)
+ log_error_errno(r, "sd_resolve_getnameinfo(): %m");
+
+ /* Wait until all queries are completed */
+ for (;;) {
+ r = sd_resolve_wait(resolve, TEST_TIMEOUT_USEC);
+ if (r == 0)
+ break;
+ if (r == -ETIMEDOUT) {
+ /* Let's catch timeouts here, so that we can run safely in a CI that has no reliable DNS. Note
+ * that we invoke exit() directly here, as the stuck NSS call will not allow us to exit
+ * cleanly. */
+
+ log_notice_errno(r, "sd_resolve_wait() timed out, but that's OK");
+ exit(EXIT_SUCCESS);
+ }
+ if (r < 0) {
+ log_error_errno(r, "sd_resolve_wait(): %m");
+ assert_not_reached();
+ }
+ }
+
+ return 0;
+}