diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-10 20:49:52 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-10 20:49:52 +0000 |
commit | 55944e5e40b1be2afc4855d8d2baf4b73d1876b5 (patch) | |
tree | 33f869f55a1b149e9b7c2b7e201867ca5dd52992 /src/libsystemd | |
parent | Initial commit. (diff) | |
download | systemd-55944e5e40b1be2afc4855d8d2baf4b73d1876b5.tar.xz systemd-55944e5e40b1be2afc4855d8d2baf4b73d1876b5.zip |
Adding upstream version 255.4.upstream/255.4
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/libsystemd')
156 files changed, 74259 insertions, 0 deletions
diff --git a/src/libsystemd/libsystemd.pc.in b/src/libsystemd/libsystemd.pc.in new file mode 100644 index 0000000..3a43ef6 --- /dev/null +++ b/src/libsystemd/libsystemd.pc.in @@ -0,0 +1,20 @@ +# SPDX-License-Identifier: LGPL-2.1-or-later +# +# This file is part of systemd. +# +# systemd is free software; you can redistribute it and/or modify it +# under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or +# (at your option) any later version. + +prefix={{PREFIX}} +exec_prefix={{PREFIX}} +libdir={{LIBDIR}} +includedir={{INCLUDE_DIR}} + +Name: systemd +Description: systemd Library +URL: {{PROJECT_URL}} +Version: {{PROJECT_VERSION}} +Libs: -L${libdir} -lsystemd +Cflags: -I${includedir} diff --git a/src/libsystemd/libsystemd.sym b/src/libsystemd/libsystemd.sym new file mode 100644 index 0000000..4113920 --- /dev/null +++ b/src/libsystemd/libsystemd.sym @@ -0,0 +1,836 @@ +/*** + SPDX-License-Identifier: LGPL-2.1-or-later + + systemd is free software; you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; either version 2.1 of the License, or + (at your option) any later version. +***/ + +LIBSYSTEMD_209 { +global: + /* sd-journal */ + sd_journal_print; + sd_journal_printv; + sd_journal_send; + sd_journal_sendv; + sd_journal_stream_fd; + sd_journal_open; + sd_journal_close; + sd_journal_previous; + sd_journal_next; + sd_journal_previous_skip; + sd_journal_next_skip; + sd_journal_get_realtime_usec; + sd_journal_get_monotonic_usec; + sd_journal_get_data; + sd_journal_enumerate_data; + sd_journal_restart_data; + sd_journal_add_match; + sd_journal_flush_matches; + sd_journal_seek_head; + sd_journal_seek_tail; + sd_journal_seek_monotonic_usec; + sd_journal_seek_realtime_usec; + sd_journal_seek_cursor; + sd_journal_get_cursor; + sd_journal_get_fd; + sd_journal_process; + sd_journal_print_with_location; + sd_journal_printv_with_location; + sd_journal_send_with_location; + sd_journal_sendv_with_location; + sd_journal_get_cutoff_realtime_usec; + sd_journal_get_cutoff_monotonic_usec; + sd_journal_wait; + sd_journal_open_directory; + sd_journal_add_disjunction; + sd_journal_perror; + sd_journal_perror_with_location; + sd_journal_get_usage; + sd_journal_test_cursor; + sd_journal_query_unique; + sd_journal_enumerate_unique; + sd_journal_restart_unique; + sd_journal_get_catalog; + sd_journal_get_catalog_for_message_id; + sd_journal_set_data_threshold; + sd_journal_get_data_threshold; + sd_journal_reliable_fd; + sd_journal_get_events; + sd_journal_get_timeout; + sd_journal_add_conjunction; + sd_journal_open_files; + sd_journal_open_container; + + /* sd-daemon */ + sd_booted; + sd_is_fifo; + sd_is_mq; + sd_is_socket; + sd_is_socket_inet; + sd_is_socket_unix; + sd_is_special; + sd_listen_fds; + sd_notify; + sd_notifyf; + sd_watchdog_enabled; + + /* sd-id128 */ + sd_id128_to_string; + sd_id128_from_string; + sd_id128_randomize; + sd_id128_get_machine; + sd_id128_get_boot; + + /* sd-login */ + sd_get_seats; + sd_get_sessions; + sd_get_uids; + sd_login_monitor_flush; + sd_login_monitor_get_fd; + sd_login_monitor_new; + sd_login_monitor_unref; + sd_pid_get_owner_uid; + sd_pid_get_session; + sd_seat_can_multi_session; + sd_seat_get_active; + sd_seat_get_sessions; + sd_session_get_seat; + sd_session_get_uid; + sd_session_is_active; + sd_uid_get_seats; + sd_uid_get_sessions; + sd_uid_get_state; + sd_uid_is_on_seat; + sd_pid_get_unit; + sd_session_get_service; + sd_session_get_type; + sd_session_get_class; + sd_session_get_display; + sd_session_get_state; + sd_seat_can_tty; + sd_seat_can_graphical; + sd_session_get_tty; + sd_login_monitor_get_events; + sd_login_monitor_get_timeout; + sd_pid_get_user_unit; + sd_pid_get_machine_name; + sd_get_machine_names; + sd_pid_get_slice; + sd_session_get_vt; + sd_session_is_remote; + sd_session_get_remote_user; + sd_session_get_remote_host; +local: + *; +}; + +LIBSYSTEMD_211 { +global: + sd_machine_get_class; + sd_peer_get_session; + sd_peer_get_owner_uid; + sd_peer_get_unit; + sd_peer_get_user_unit; + sd_peer_get_machine_name; + sd_peer_get_slice; +} LIBSYSTEMD_209; + +LIBSYSTEMD_213 { +global: + sd_uid_get_display; +} LIBSYSTEMD_211; + +LIBSYSTEMD_214 { +global: + sd_pid_notify; + sd_pid_notifyf; +} LIBSYSTEMD_213; + +LIBSYSTEMD_216 { +global: + sd_machine_get_ifindices; +} LIBSYSTEMD_214; + +LIBSYSTEMD_217 { +global: + sd_session_get_desktop; +} LIBSYSTEMD_216; + +LIBSYSTEMD_219 { +global: + sd_pid_notify_with_fds; +} LIBSYSTEMD_217; + +LIBSYSTEMD_220 { +global: + sd_pid_get_user_slice; + sd_peer_get_user_slice; +} LIBSYSTEMD_219; + +LIBSYSTEMD_221 { +global: + /* sd-bus */ + sd_bus_default; + sd_bus_default_user; + sd_bus_default_system; + sd_bus_open; + sd_bus_open_user; + sd_bus_open_system; + sd_bus_open_system_remote; + sd_bus_open_system_machine; + sd_bus_new; + sd_bus_set_address; + sd_bus_set_fd; + sd_bus_set_exec; + sd_bus_get_address; + sd_bus_set_bus_client; + sd_bus_is_bus_client; + sd_bus_set_server; + sd_bus_is_server; + sd_bus_set_anonymous; + sd_bus_is_anonymous; + sd_bus_set_trusted; + sd_bus_is_trusted; + sd_bus_set_monitor; + sd_bus_is_monitor; + sd_bus_set_description; + sd_bus_get_description; + sd_bus_negotiate_creds; + sd_bus_negotiate_timestamp; + sd_bus_negotiate_fds; + sd_bus_can_send; + sd_bus_get_creds_mask; + sd_bus_set_allow_interactive_authorization; + sd_bus_get_allow_interactive_authorization; + sd_bus_start; + sd_bus_close; + sd_bus_try_close; + sd_bus_ref; + sd_bus_unref; + sd_bus_is_open; + sd_bus_get_bus_id; + sd_bus_get_scope; + sd_bus_get_tid; + sd_bus_get_owner_creds; + sd_bus_send; + sd_bus_send_to; + sd_bus_call; + sd_bus_call_async; + sd_bus_get_fd; + sd_bus_get_events; + sd_bus_get_timeout; + sd_bus_process; + sd_bus_process_priority; + sd_bus_wait; + sd_bus_flush; + sd_bus_get_current_slot; + sd_bus_get_current_message; + sd_bus_get_current_handler; + sd_bus_get_current_userdata; + sd_bus_attach_event; + sd_bus_detach_event; + sd_bus_get_event; + sd_bus_add_filter; + sd_bus_add_match; + sd_bus_add_object; + sd_bus_add_fallback; + sd_bus_add_object_vtable; + sd_bus_add_fallback_vtable; + sd_bus_add_node_enumerator; + sd_bus_add_object_manager; + sd_bus_slot_ref; + sd_bus_slot_unref; + sd_bus_slot_get_bus; + sd_bus_slot_get_userdata; + sd_bus_slot_set_userdata; + sd_bus_slot_get_description; + sd_bus_slot_set_description; + sd_bus_slot_get_current_message; + sd_bus_slot_get_current_handler; + sd_bus_slot_get_current_userdata; + sd_bus_message_new_signal; + sd_bus_message_new_method_call; + sd_bus_message_new_method_return; + sd_bus_message_new_method_error; + sd_bus_message_new_method_errorf; + sd_bus_message_new_method_errno; + sd_bus_message_new_method_errnof; + sd_bus_message_ref; + sd_bus_message_unref; + sd_bus_message_get_type; + sd_bus_message_get_cookie; + sd_bus_message_get_reply_cookie; + sd_bus_message_get_priority; + sd_bus_message_get_expect_reply; + sd_bus_message_get_auto_start; + sd_bus_message_get_allow_interactive_authorization; + sd_bus_message_get_signature; + sd_bus_message_get_path; + sd_bus_message_get_interface; + sd_bus_message_get_member; + sd_bus_message_get_destination; + sd_bus_message_get_sender; + sd_bus_message_get_error; + sd_bus_message_get_errno; + sd_bus_message_get_monotonic_usec; + sd_bus_message_get_realtime_usec; + sd_bus_message_get_seqnum; + sd_bus_message_get_bus; + sd_bus_message_get_creds; + sd_bus_message_is_signal; + sd_bus_message_is_method_call; + sd_bus_message_is_method_error; + sd_bus_message_is_empty; + sd_bus_message_has_signature; + sd_bus_message_set_expect_reply; + sd_bus_message_set_auto_start; + sd_bus_message_set_allow_interactive_authorization; + sd_bus_message_set_destination; + sd_bus_message_set_priority; + sd_bus_message_append; + sd_bus_message_append_basic; + sd_bus_message_append_array; + sd_bus_message_append_array_space; + sd_bus_message_append_array_iovec; + sd_bus_message_append_array_memfd; + sd_bus_message_append_string_space; + sd_bus_message_append_string_iovec; + sd_bus_message_append_string_memfd; + sd_bus_message_append_strv; + sd_bus_message_open_container; + sd_bus_message_close_container; + sd_bus_message_copy; + sd_bus_message_read; + sd_bus_message_read_basic; + sd_bus_message_read_array; + sd_bus_message_read_strv; + sd_bus_message_skip; + sd_bus_message_enter_container; + sd_bus_message_exit_container; + sd_bus_message_peek_type; + sd_bus_message_verify_type; + sd_bus_message_at_end; + sd_bus_message_rewind; + sd_bus_get_unique_name; + sd_bus_request_name; + sd_bus_release_name; + sd_bus_list_names; + sd_bus_get_name_creds; + sd_bus_get_name_machine_id; + sd_bus_call_method; + sd_bus_call_method_async; + sd_bus_get_property; + sd_bus_get_property_trivial; + sd_bus_get_property_string; + sd_bus_get_property_strv; + sd_bus_set_property; + sd_bus_reply_method_return; + sd_bus_reply_method_error; + sd_bus_reply_method_errorf; + sd_bus_reply_method_errno; + sd_bus_reply_method_errnof; + sd_bus_emit_signal; + sd_bus_emit_properties_changed_strv; + sd_bus_emit_properties_changed; + sd_bus_emit_interfaces_added_strv; + sd_bus_emit_interfaces_added; + sd_bus_emit_interfaces_removed_strv; + sd_bus_emit_interfaces_removed; + sd_bus_query_sender_creds; + sd_bus_query_sender_privilege; + sd_bus_creds_new_from_pid; + sd_bus_creds_ref; + sd_bus_creds_unref; + sd_bus_creds_get_mask; + sd_bus_creds_get_augmented_mask; + sd_bus_creds_get_pid; + sd_bus_creds_get_ppid; + sd_bus_creds_get_tid; + sd_bus_creds_get_uid; + sd_bus_creds_get_euid; + sd_bus_creds_get_suid; + sd_bus_creds_get_fsuid; + sd_bus_creds_get_gid; + sd_bus_creds_get_egid; + sd_bus_creds_get_sgid; + sd_bus_creds_get_fsgid; + sd_bus_creds_get_supplementary_gids; + sd_bus_creds_get_comm; + sd_bus_creds_get_tid_comm; + sd_bus_creds_get_exe; + sd_bus_creds_get_cmdline; + sd_bus_creds_get_cgroup; + sd_bus_creds_get_unit; + sd_bus_creds_get_slice; + sd_bus_creds_get_user_unit; + sd_bus_creds_get_user_slice; + sd_bus_creds_get_session; + sd_bus_creds_get_owner_uid; + sd_bus_creds_has_effective_cap; + sd_bus_creds_has_permitted_cap; + sd_bus_creds_has_inheritable_cap; + sd_bus_creds_has_bounding_cap; + sd_bus_creds_get_selinux_context; + sd_bus_creds_get_audit_session_id; + sd_bus_creds_get_audit_login_uid; + sd_bus_creds_get_tty; + sd_bus_creds_get_unique_name; + sd_bus_creds_get_well_known_names; + sd_bus_creds_get_description; + sd_bus_error_free; + sd_bus_error_set; + sd_bus_error_setf; + sd_bus_error_set_const; + sd_bus_error_set_errno; + sd_bus_error_set_errnof; + sd_bus_error_set_errnofv; + sd_bus_error_get_errno; + sd_bus_error_copy; + sd_bus_error_is_set; + sd_bus_error_has_name; + sd_bus_error_add_map; + sd_bus_path_encode; + sd_bus_path_decode; + sd_bus_track_new; + sd_bus_track_ref; + sd_bus_track_unref; + sd_bus_track_get_bus; + sd_bus_track_get_userdata; + sd_bus_track_set_userdata; + sd_bus_track_add_sender; + sd_bus_track_remove_sender; + sd_bus_track_add_name; + sd_bus_track_remove_name; + sd_bus_track_count; + sd_bus_track_contains; + sd_bus_track_first; + sd_bus_track_next; + + /* sd-event */ + sd_event_default; + sd_event_new; + sd_event_ref; + sd_event_unref; + sd_event_add_io; + sd_event_add_time; + sd_event_add_signal; + sd_event_add_child; + sd_event_add_defer; + sd_event_add_post; + sd_event_add_exit; + sd_event_prepare; + sd_event_wait; + sd_event_dispatch; + sd_event_run; + sd_event_loop; + sd_event_exit; + sd_event_now; + sd_event_get_fd; + sd_event_get_state; + sd_event_get_tid; + sd_event_get_exit_code; + sd_event_set_watchdog; + sd_event_get_watchdog; + sd_event_source_ref; + sd_event_source_unref; + sd_event_source_get_event; + sd_event_source_get_userdata; + sd_event_source_set_userdata; + sd_event_source_set_description; + sd_event_source_get_description; + sd_event_source_set_prepare; + sd_event_source_get_pending; + sd_event_source_get_priority; + sd_event_source_set_priority; + sd_event_source_get_enabled; + sd_event_source_set_enabled; + sd_event_source_get_io_fd; + sd_event_source_set_io_fd; + sd_event_source_get_io_events; + sd_event_source_set_io_events; + sd_event_source_get_io_revents; + sd_event_source_get_time; + sd_event_source_set_time; + sd_event_source_set_time_accuracy; + sd_event_source_get_time_accuracy; + sd_event_source_get_time_clock; + sd_event_source_get_signal; + sd_event_source_get_child_pid; +} LIBSYSTEMD_220; + +LIBSYSTEMD_222 { +global: + /* sd-bus */ + sd_bus_emit_object_added; + sd_bus_emit_object_removed; + sd_bus_flush_close_unref; +} LIBSYSTEMD_221; + +LIBSYSTEMD_226 { +global: + sd_pid_get_cgroup; + sd_peer_get_cgroup; +} LIBSYSTEMD_222; + +LIBSYSTEMD_227 { +global: + sd_bus_default_flush_close; + sd_bus_path_decode_many; + sd_bus_path_encode_many; + sd_listen_fds_with_names; +} LIBSYSTEMD_226; + +LIBSYSTEMD_229 { +global: + sd_journal_has_runtime_files; + sd_journal_has_persistent_files; + sd_journal_enumerate_fields; + sd_journal_restart_fields; +} LIBSYSTEMD_227; + +LIBSYSTEMD_230 { +global: + sd_journal_open_directory_fd; + sd_journal_open_files_fd; +} LIBSYSTEMD_229; + +LIBSYSTEMD_231 { +global: + sd_event_get_iteration; +} LIBSYSTEMD_230; + +LIBSYSTEMD_232 { +global: + sd_bus_track_set_recursive; + sd_bus_track_get_recursive; + sd_bus_track_count_name; + sd_bus_track_count_sender; + sd_bus_set_exit_on_disconnect; + sd_bus_get_exit_on_disconnect; + sd_id128_get_invocation; +} LIBSYSTEMD_231; + +LIBSYSTEMD_233 { +global: + sd_id128_get_machine_app_specific; + sd_is_socket_sockaddr; +} LIBSYSTEMD_232; + +LIBSYSTEMD_234 { +global: + sd_bus_message_appendv; +} LIBSYSTEMD_233; + +LIBSYSTEMD_236 { +global: + sd_bus_message_new; + sd_bus_message_seal; +} LIBSYSTEMD_234; + +LIBSYSTEMD_237 { +global: + sd_bus_set_watch_bind; + sd_bus_get_watch_bind; + sd_bus_request_name_async; + sd_bus_release_name_async; + sd_bus_add_match_async; + sd_bus_match_signal; + sd_bus_match_signal_async; + sd_bus_is_ready; + sd_bus_set_connected_signal; + sd_bus_get_connected_signal; + sd_bus_set_sender; + sd_bus_get_sender; + sd_bus_message_set_sender; + sd_event_source_get_io_fd_own; + sd_event_source_set_io_fd_own; +} LIBSYSTEMD_236; + +LIBSYSTEMD_238 { +global: + sd_bus_get_n_queued_read; + sd_bus_get_n_queued_write; +} LIBSYSTEMD_237; + +LIBSYSTEMD_239 { +global: + sd_bus_open_with_description; + sd_bus_open_user_with_description; + sd_bus_open_system_with_description; + sd_bus_slot_get_floating; + sd_bus_slot_set_floating; + sd_bus_slot_get_destroy_callback; + sd_bus_slot_set_destroy_callback; + sd_bus_track_get_destroy_callback; + sd_bus_track_set_destroy_callback; + sd_event_add_inotify; + sd_event_source_get_inotify_mask; + sd_event_source_set_destroy_callback; + sd_event_source_get_destroy_callback; +} LIBSYSTEMD_238; + +LIBSYSTEMD_240 { +global: + sd_bus_message_readv; + sd_bus_set_method_call_timeout; + sd_bus_get_method_call_timeout; + + sd_bus_error_move; + + sd_bus_set_close_on_exit; + sd_bus_get_close_on_exit; + + sd_device_ref; + sd_device_unref; + + sd_device_new_from_syspath; + sd_device_new_from_devnum; + sd_device_new_from_subsystem_sysname; + sd_device_new_from_device_id; + + sd_device_get_parent; + sd_device_get_parent_with_subsystem_devtype; + + sd_device_get_syspath; + sd_device_get_subsystem; + sd_device_get_devtype; + sd_device_get_devnum; + sd_device_get_ifindex; + sd_device_get_driver; + sd_device_get_devpath; + sd_device_get_devname; + sd_device_get_sysname; + sd_device_get_sysnum; + + sd_device_get_is_initialized; + sd_device_get_usec_since_initialized; + + sd_device_get_tag_first; + sd_device_get_tag_next; + sd_device_get_devlink_first; + sd_device_get_devlink_next; + sd_device_get_property_first; + sd_device_get_property_next; + sd_device_get_sysattr_first; + sd_device_get_sysattr_next; + + sd_device_has_tag; + sd_device_get_property_value; + sd_device_get_sysattr_value; + + sd_device_set_sysattr_value; + + sd_device_enumerator_new; + sd_device_enumerator_ref; + sd_device_enumerator_unref; + + sd_device_enumerator_get_device_first; + sd_device_enumerator_get_device_next; + sd_device_enumerator_get_subsystem_first; + sd_device_enumerator_get_subsystem_next; + + sd_device_enumerator_add_match_subsystem; + sd_device_enumerator_add_match_sysattr; + sd_device_enumerator_add_match_property; + sd_device_enumerator_add_match_sysname; + sd_device_enumerator_add_match_tag; + sd_device_enumerator_add_match_parent; + sd_device_enumerator_allow_uninitialized; + + sd_hwdb_ref; + sd_hwdb_unref; + + sd_hwdb_new; + + sd_hwdb_get; + + sd_hwdb_seek; + sd_hwdb_enumerate; + + sd_id128_get_boot_app_specific; + + sd_device_monitor_new; + sd_device_monitor_ref; + sd_device_monitor_unref; + + sd_device_monitor_set_receive_buffer_size; + sd_device_monitor_attach_event; + sd_device_monitor_detach_event; + sd_device_monitor_get_event; + sd_device_monitor_get_event_source; + sd_device_monitor_start; + sd_device_monitor_stop; + + sd_device_monitor_filter_add_match_subsystem_devtype; + sd_device_monitor_filter_add_match_tag; + sd_device_monitor_filter_update; + sd_device_monitor_filter_remove; + + sd_event_source_get_floating; + sd_event_source_set_floating; +} LIBSYSTEMD_239; + +LIBSYSTEMD_241 { +global: + sd_bus_close_unref; +} LIBSYSTEMD_240; + +LIBSYSTEMD_243 { +global: + sd_bus_object_vtable_format; + sd_event_source_disable_unref; +} LIBSYSTEMD_241; + +LIBSYSTEMD_245 { +global: + sd_bus_enqueue_for_read; + sd_bus_message_dump; + sd_bus_message_sensitive; + sd_event_add_child_pidfd; + sd_event_source_get_child_pidfd; + sd_event_source_get_child_pidfd_own; + sd_event_source_set_child_pidfd_own; + sd_event_source_get_child_process_own; + sd_event_source_set_child_process_own; + sd_event_source_send_child_signal; + sd_journal_open_namespace; +} LIBSYSTEMD_243; + +LIBSYSTEMD_246 { +global: + sd_bus_interface_name_is_valid; + sd_bus_service_name_is_valid; + sd_bus_member_name_is_valid; + sd_bus_object_path_is_valid; + + sd_bus_call_methodv; + sd_bus_call_method_asyncv; + sd_bus_emit_signalv; + sd_bus_reply_method_errnofv; + sd_bus_reply_method_errorfv; + sd_bus_reply_method_returnv; + sd_bus_set_propertyv; + + sd_path_lookup; + sd_path_lookup_strv; + + sd_notify_barrier; + + sd_journal_enumerate_available_data; + sd_journal_enumerate_available_unique; +} LIBSYSTEMD_245; + +LIBSYSTEMD_247 { +global: + sd_event_add_time_relative; + sd_event_source_set_time_relative; + sd_event_source_get_exit_on_failure; + sd_event_source_set_exit_on_failure; + + sd_bus_error_has_names_sentinel; + + sd_device_get_current_tag_first; + sd_device_get_current_tag_next; + sd_device_has_current_tag; + sd_device_set_sysattr_valuef; +} LIBSYSTEMD_246; + +LIBSYSTEMD_248 { +global: + sd_bus_open_user_machine; + sd_bus_message_send; + + sd_event_source_set_ratelimit; + sd_event_source_get_ratelimit; + sd_event_source_is_ratelimited; + + sd_device_get_action; + sd_device_get_seqnum; + sd_device_new_from_stat_rdev; + sd_device_trigger; +} LIBSYSTEMD_247; + +LIBSYSTEMD_249 { +global: + sd_device_monitor_filter_add_match_sysattr; + sd_device_monitor_filter_add_match_parent; + sd_device_get_usec_initialized; + sd_device_trigger_with_uuid; + sd_device_get_trigger_uuid; + sd_device_new_from_ifname; + sd_device_new_from_ifindex; +} LIBSYSTEMD_248; + +LIBSYSTEMD_250 { +global: + sd_device_get_diskseq; + sd_event_add_inotify_fd; + sd_event_source_set_ratelimit_expire_callback; +} LIBSYSTEMD_249; + +LIBSYSTEMD_251 { +global: + sd_id128_to_uuid_string; + sd_device_new_from_devname; + sd_device_new_from_path; + sd_device_open; + sd_device_enumerator_add_nomatch_sysname; +} LIBSYSTEMD_250; + +LIBSYSTEMD_252 { +global: + sd_bus_message_read_strv_extend; + sd_bus_error_setfv; + + sd_device_new_child; + sd_device_get_child_first; + sd_device_get_child_next; + sd_device_monitor_set_description; + sd_device_monitor_get_description; + + sd_event_set_signal_exit; + + sd_id128_string_equal; + + sd_hwdb_new_from_path; +} LIBSYSTEMD_251; + +LIBSYSTEMD_253 { +global: + sd_bus_emit_signal_to; + sd_bus_emit_signal_tov; + sd_bus_message_new_signal_to; + sd_pidfd_get_cgroup; + sd_pidfd_get_machine_name; + sd_pidfd_get_owner_uid; + sd_pidfd_get_session; + sd_pidfd_get_slice; + sd_pidfd_get_unit; + sd_pidfd_get_user_slice; + sd_pidfd_get_user_unit; +} LIBSYSTEMD_252; + +LIBSYSTEMD_254 { +global: + sd_journal_get_seqnum; + sd_session_get_username; + sd_session_get_start_time; + sd_uid_get_login_time; + sd_pid_notifyf_with_fds; + sd_event_add_memory_pressure; + sd_event_source_set_memory_pressure_type; + sd_event_source_set_memory_pressure_period; + sd_event_trim_memory; + sd_pid_notify_barrier; + sd_event_source_leave_ratelimit; + sd_journal_step_one; + sd_session_get_leader; +} LIBSYSTEMD_253; + +LIBSYSTEMD_255 { +global: + sd_id128_get_app_specific; + sd_device_enumerator_add_match_property_required; +} LIBSYSTEMD_254; diff --git a/src/libsystemd/meson.build b/src/libsystemd/meson.build new file mode 100644 index 0000000..5d18f97 --- /dev/null +++ b/src/libsystemd/meson.build @@ -0,0 +1,265 @@ +# SPDX-License-Identifier: LGPL-2.1-or-later + +sd_journal_sources = files( + 'sd-journal/audit-type.c', + 'sd-journal/catalog.c', + 'sd-journal/journal-file.c', + 'sd-journal/journal-send.c', + 'sd-journal/journal-vacuum.c', + 'sd-journal/journal-verify.c', + 'sd-journal/lookup3.c', + 'sd-journal/mmap-cache.c', + 'sd-journal/sd-journal.c', +) + +if conf.get('HAVE_GCRYPT') == 1 + sd_journal_sources += files( + 'sd-journal/fsprg.c', + 'sd-journal/journal-authenticate.c', + ) +endif + +audit_type_includes = [config_h, + missing_audit_h, + 'linux/audit.h'] +if conf.get('HAVE_AUDIT') == 1 + audit_type_includes += 'libaudit.h' +endif + +generate_audit_type_list = find_program('sd-journal/generate-audit_type-list.sh') +audit_type_list_txt = custom_target( + 'audit_type-list.txt', + output : 'audit_type-list.txt', + command : [generate_audit_type_list, cpp] + audit_type_includes, + capture : true) + +audit_type_to_name = custom_target( + 'audit_type-to-name.h', + input : ['sd-journal/audit_type-to-name.awk', audit_type_list_txt], + output : 'audit_type-to-name.h', + command : [awk, '-f', '@INPUT0@', '@INPUT1@'], + capture : true) + +sd_journal_sources += [audit_type_to_name] + +############################################################ + +id128_sources = files( + 'sd-id128/id128-util.c', + 'sd-id128/sd-id128.c', +) + +############################################################ + +sd_daemon_sources = files('sd-daemon/sd-daemon.c') + +############################################################ + +sd_event_sources = files( + 'sd-event/event-util.c', + 'sd-event/sd-event.c', +) + +############################################################ + +sd_login_sources = files('sd-login/sd-login.c') + +############################################################ + +libsystemd_sources = files( + 'sd-bus/bus-common-errors.c', + 'sd-bus/bus-container.c', + 'sd-bus/bus-control.c', + 'sd-bus/bus-convenience.c', + 'sd-bus/bus-creds.c', + 'sd-bus/bus-dump.c', + 'sd-bus/bus-error.c', + 'sd-bus/bus-internal.c', + 'sd-bus/bus-introspect.c', + 'sd-bus/bus-kernel.c', + 'sd-bus/bus-match.c', + 'sd-bus/bus-message.c', + 'sd-bus/bus-objects.c', + 'sd-bus/bus-signature.c', + 'sd-bus/bus-slot.c', + 'sd-bus/bus-socket.c', + 'sd-bus/bus-track.c', + 'sd-bus/bus-type.c', + 'sd-bus/sd-bus.c', + 'sd-device/device-enumerator.c', + 'sd-device/device-filter.c', + 'sd-device/device-monitor.c', + 'sd-device/device-private.c', + 'sd-device/device-util.c', + 'sd-device/sd-device.c', + 'sd-hwdb/sd-hwdb.c', + 'sd-netlink/netlink-genl.c', + 'sd-netlink/netlink-message-nfnl.c', + 'sd-netlink/netlink-message-rtnl.c', + 'sd-netlink/netlink-message.c', + 'sd-netlink/netlink-slot.c', + 'sd-netlink/netlink-socket.c', + 'sd-netlink/netlink-types-genl.c', + 'sd-netlink/netlink-types-nfnl.c', + 'sd-netlink/netlink-types-rtnl.c', + 'sd-netlink/netlink-types.c', + 'sd-netlink/netlink-util.c', + 'sd-netlink/sd-netlink.c', + 'sd-network/network-util.c', + 'sd-network/sd-network.c', + 'sd-path/sd-path.c', + 'sd-resolve/sd-resolve.c', +) + sd_journal_sources + id128_sources + sd_daemon_sources + sd_event_sources + sd_login_sources + +libsystemd_c_args = ['-fvisibility=default'] + +libsystemd_static = static_library( + 'systemd_static', + libsystemd_sources, + include_directories : libsystemd_includes, + c_args : libsystemd_c_args, + link_with : [libbasic, + libbasic_compress], + dependencies : [threads, + librt, + userspace], + build_by_default : false) + +libsystemd_dir_path = meson.current_source_dir() + +libsystemd_sym = files('libsystemd.sym') +libsystemd_sym_path = libsystemd_dir_path / 'libsystemd.sym' + +static_libsystemd = get_option('static-libsystemd') +static_libsystemd_pic = static_libsystemd == 'true' or static_libsystemd == 'pic' + +libsystemd_pc = custom_target( + 'libsystemd.pc', + input : 'libsystemd.pc.in', + output : 'libsystemd.pc', + command : [jinja2_cmdline, '@INPUT@', '@OUTPUT@'], + install : pkgconfiglibdir != 'no', + install_tag : 'devel', + install_dir : pkgconfiglibdir) + +############################################################ + +simple_tests += files( + 'sd-journal/test-audit-type.c', + 'sd-journal/test-catalog.c', + 'sd-journal/test-journal-file.c', + 'sd-journal/test-journal-init.c', + 'sd-journal/test-journal-match.c', + 'sd-journal/test-journal-send.c', + 'sd-journal/test-mmap-cache.c', +) + +libsystemd_tests += [ + { + 'sources' : files('sd-journal/test-journal-enum.c'), + 'timeout' : 360, + }, +] + +############################################################ + +simple_tests += files( + 'sd-bus/test-bus-creds.c', + 'sd-bus/test-bus-introspect.c', + 'sd-bus/test-bus-match.c', + 'sd-bus/test-bus-vtable.c', + 'sd-device/test-device-util.c', + 'sd-device/test-sd-device-monitor.c', + 'sd-device/test-sd-device.c', + 'sd-event/test-event.c', + 'sd-journal/test-journal-flush.c', + 'sd-journal/test-journal-interleaving.c', + 'sd-journal/test-journal-stream.c', + 'sd-journal/test-journal.c', + 'sd-login/test-login.c', + 'sd-netlink/test-netlink.c', +) + +libsystemd_tests += [ + { + 'sources' : files('sd-bus/test-bus-address.c'), + 'dependencies' : threads + }, + { + 'sources' : files('sd-bus/test-bus-benchmark.c'), + 'dependencies' : threads, + 'type' : 'manual', + }, + { + 'sources' : files('sd-bus/test-bus-chat.c'), + 'dependencies' : threads, + }, + { + 'sources' : files('sd-bus/test-bus-cleanup.c'), + 'dependencies' : [threads, libseccomp], + }, + { + 'sources' : files('sd-bus/test-bus-marshal.c'), + 'dependencies' : [ + libdbus, + libgio, + libglib, + libgobject, + libm, + threads, + ], + }, + { + 'sources' : files('sd-bus/test-bus-objects.c'), + 'dependencies' : threads, + }, + { + 'sources' : files('sd-bus/test-bus-peersockaddr.c'), + 'dependencies' : threads, + }, + { + 'sources' : files('sd-bus/test-bus-queue-ref-cycle.c'), + 'dependencies' : threads, + }, + { + 'sources' : files('sd-bus/test-bus-server.c'), + 'dependencies' : threads, + }, + { + 'sources' : files('sd-bus/test-bus-signature.c'), + 'dependencies' : threads, + }, + { + 'sources' : files('sd-bus/test-bus-track.c'), + 'dependencies' : libseccomp, + }, + { + 'sources' : files('sd-bus/test-bus-watch-bind.c'), + 'dependencies' : threads, + 'timeout' : 120, + }, + { + 'sources' : files('sd-journal/test-journal-append.c'), + 'type' : 'manual', + }, + { + 'sources' : files('sd-journal/test-journal-verify.c'), + 'timeout' : 90, + }, + { + 'sources' : files('sd-resolve/test-resolve.c'), + 'dependencies' : threads, + 'timeout' : 120, + }, +] + +if cxx_cmd != '' + simple_tests += files('sd-bus/test-bus-vtable-cc.cc') +endif + +############################################################ + +simple_fuzzers += files( + 'sd-bus/fuzz-bus-match.c', + 'sd-bus/fuzz-bus-message.c', +) diff --git a/src/libsystemd/sd-bus/bus-common-errors.c b/src/libsystemd/sd-bus/bus-common-errors.c new file mode 100644 index 0000000..df26fd7 --- /dev/null +++ b/src/libsystemd/sd-bus/bus-common-errors.c @@ -0,0 +1,151 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <errno.h> + +#include "sd-bus.h" + +#include "bus-common-errors.h" +#include "bus-error.h" + +BUS_ERROR_MAP_ELF_REGISTER const sd_bus_error_map bus_common_errors[] = { + SD_BUS_ERROR_MAP(BUS_ERROR_NO_SUCH_UNIT, ENOENT), + SD_BUS_ERROR_MAP(BUS_ERROR_NO_UNIT_FOR_PID, ESRCH), + SD_BUS_ERROR_MAP(BUS_ERROR_NO_UNIT_FOR_INVOCATION_ID, ENOENT), + SD_BUS_ERROR_MAP(BUS_ERROR_UNIT_EXISTS, EEXIST), + SD_BUS_ERROR_MAP(BUS_ERROR_LOAD_FAILED, EIO), + SD_BUS_ERROR_MAP(BUS_ERROR_BAD_UNIT_SETTING, ENOEXEC), + SD_BUS_ERROR_MAP(BUS_ERROR_JOB_FAILED, EREMOTEIO), + SD_BUS_ERROR_MAP(BUS_ERROR_NO_SUCH_JOB, ENOENT), + SD_BUS_ERROR_MAP(BUS_ERROR_NOT_SUBSCRIBED, EINVAL), + SD_BUS_ERROR_MAP(BUS_ERROR_ALREADY_SUBSCRIBED, EINVAL), + SD_BUS_ERROR_MAP(BUS_ERROR_ONLY_BY_DEPENDENCY, EINVAL), + SD_BUS_ERROR_MAP(BUS_ERROR_TRANSACTION_JOBS_CONFLICTING, EDEADLK), + SD_BUS_ERROR_MAP(BUS_ERROR_TRANSACTION_ORDER_IS_CYCLIC, EDEADLK), + SD_BUS_ERROR_MAP(BUS_ERROR_TRANSACTION_IS_DESTRUCTIVE, EDEADLK), + SD_BUS_ERROR_MAP(BUS_ERROR_UNIT_MASKED, ERFKILL), + SD_BUS_ERROR_MAP(BUS_ERROR_UNIT_GENERATED, EADDRNOTAVAIL), + SD_BUS_ERROR_MAP(BUS_ERROR_UNIT_LINKED, ELOOP), + SD_BUS_ERROR_MAP(BUS_ERROR_JOB_TYPE_NOT_APPLICABLE, EBADR), + SD_BUS_ERROR_MAP(BUS_ERROR_NO_ISOLATION, EPERM), + SD_BUS_ERROR_MAP(BUS_ERROR_SHUTTING_DOWN, ECANCELED), + SD_BUS_ERROR_MAP(BUS_ERROR_SCOPE_NOT_RUNNING, EHOSTDOWN), + SD_BUS_ERROR_MAP(BUS_ERROR_NO_SUCH_DYNAMIC_USER, ESRCH), + SD_BUS_ERROR_MAP(BUS_ERROR_NOT_REFERENCED, EUNATCH), + SD_BUS_ERROR_MAP(BUS_ERROR_DISK_FULL, ENOSPC), + SD_BUS_ERROR_MAP(BUS_ERROR_FILE_DESCRIPTOR_STORE_DISABLED, + EHOSTDOWN), + + SD_BUS_ERROR_MAP(BUS_ERROR_NO_SUCH_MACHINE, ENXIO), + SD_BUS_ERROR_MAP(BUS_ERROR_NO_SUCH_IMAGE, ENOENT), + SD_BUS_ERROR_MAP(BUS_ERROR_NO_MACHINE_FOR_PID, ENXIO), + SD_BUS_ERROR_MAP(BUS_ERROR_MACHINE_EXISTS, EEXIST), + SD_BUS_ERROR_MAP(BUS_ERROR_NO_PRIVATE_NETWORKING, ENOSYS), + SD_BUS_ERROR_MAP(BUS_ERROR_NO_SUCH_USER_MAPPING, ENXIO), + SD_BUS_ERROR_MAP(BUS_ERROR_NO_SUCH_GROUP_MAPPING, ENXIO), + + SD_BUS_ERROR_MAP(BUS_ERROR_NO_SUCH_PORTABLE_IMAGE, ENOENT), + SD_BUS_ERROR_MAP(BUS_ERROR_BAD_PORTABLE_IMAGE_TYPE, EMEDIUMTYPE), + + SD_BUS_ERROR_MAP(BUS_ERROR_NO_SUCH_SESSION, ENXIO), + SD_BUS_ERROR_MAP(BUS_ERROR_NO_SESSION_FOR_PID, ENXIO), + SD_BUS_ERROR_MAP(BUS_ERROR_NO_SUCH_USER, ENXIO), + SD_BUS_ERROR_MAP(BUS_ERROR_NO_USER_FOR_PID, ENXIO), + SD_BUS_ERROR_MAP(BUS_ERROR_NO_SUCH_SEAT, ENXIO), + SD_BUS_ERROR_MAP(BUS_ERROR_SESSION_NOT_ON_SEAT, EINVAL), + SD_BUS_ERROR_MAP(BUS_ERROR_NOT_IN_CONTROL, EINVAL), + SD_BUS_ERROR_MAP(BUS_ERROR_DEVICE_IS_TAKEN, EINVAL), + SD_BUS_ERROR_MAP(BUS_ERROR_DEVICE_NOT_TAKEN, EINVAL), + SD_BUS_ERROR_MAP(BUS_ERROR_OPERATION_IN_PROGRESS, EINPROGRESS), + SD_BUS_ERROR_MAP(BUS_ERROR_SLEEP_VERB_NOT_SUPPORTED, EOPNOTSUPP), + SD_BUS_ERROR_MAP(BUS_ERROR_SESSION_BUSY, EBUSY), + SD_BUS_ERROR_MAP(BUS_ERROR_NOT_YOUR_DEVICE, EPERM), + + SD_BUS_ERROR_MAP(BUS_ERROR_AUTOMATIC_TIME_SYNC_ENABLED, EALREADY), + SD_BUS_ERROR_MAP(BUS_ERROR_NO_NTP_SUPPORT, EOPNOTSUPP), + + SD_BUS_ERROR_MAP(BUS_ERROR_NO_SUCH_PROCESS, ESRCH), + + SD_BUS_ERROR_MAP(BUS_ERROR_NO_NAME_SERVERS, ESRCH), + SD_BUS_ERROR_MAP(BUS_ERROR_INVALID_REPLY, EINVAL), + SD_BUS_ERROR_MAP(BUS_ERROR_NO_SUCH_RR, ENOENT), + SD_BUS_ERROR_MAP(BUS_ERROR_CNAME_LOOP, EDEADLK), + SD_BUS_ERROR_MAP(BUS_ERROR_ABORTED, ECANCELED), + SD_BUS_ERROR_MAP(BUS_ERROR_NO_SUCH_SERVICE, EUNATCH), + SD_BUS_ERROR_MAP(BUS_ERROR_DNSSEC_FAILED, EHOSTUNREACH), + SD_BUS_ERROR_MAP(BUS_ERROR_NO_TRUST_ANCHOR, EHOSTUNREACH), + SD_BUS_ERROR_MAP(BUS_ERROR_RR_TYPE_UNSUPPORTED, EOPNOTSUPP), + SD_BUS_ERROR_MAP(BUS_ERROR_NO_SUCH_LINK, ENXIO), + SD_BUS_ERROR_MAP(BUS_ERROR_LINK_BUSY, EBUSY), + SD_BUS_ERROR_MAP(BUS_ERROR_NETWORK_DOWN, ENETDOWN), + SD_BUS_ERROR_MAP(BUS_ERROR_NO_SOURCE, ESRCH), + SD_BUS_ERROR_MAP(BUS_ERROR_STUB_LOOP, ELOOP), + SD_BUS_ERROR_MAP(BUS_ERROR_NO_SUCH_DNSSD_SERVICE, ENOENT), + SD_BUS_ERROR_MAP(BUS_ERROR_DNSSD_SERVICE_EXISTS, EEXIST), + + SD_BUS_ERROR_MAP(BUS_ERROR_DNS_FORMERR, EBADMSG), + SD_BUS_ERROR_MAP(BUS_ERROR_DNS_SERVFAIL, EHOSTDOWN), + SD_BUS_ERROR_MAP(BUS_ERROR_DNS_NXDOMAIN, ENXIO), + SD_BUS_ERROR_MAP(BUS_ERROR_DNS_NOTIMP, ENOSYS), + SD_BUS_ERROR_MAP(BUS_ERROR_DNS_REFUSED, EACCES), + SD_BUS_ERROR_MAP(BUS_ERROR_DNS_YXDOMAIN, EEXIST), + SD_BUS_ERROR_MAP(BUS_ERROR_DNS_YRRSET, EEXIST), + SD_BUS_ERROR_MAP(BUS_ERROR_DNS_NXRRSET, ENOENT), + SD_BUS_ERROR_MAP(BUS_ERROR_DNS_NOTAUTH, EACCES), + SD_BUS_ERROR_MAP(BUS_ERROR_DNS_NOTZONE, EREMOTE), + SD_BUS_ERROR_MAP(BUS_ERROR_DNS_BADVERS, EBADMSG), + SD_BUS_ERROR_MAP(BUS_ERROR_DNS_BADKEY, EKEYREJECTED), + SD_BUS_ERROR_MAP(BUS_ERROR_DNS_BADTIME, EBADMSG), + SD_BUS_ERROR_MAP(BUS_ERROR_DNS_BADMODE, EBADMSG), + SD_BUS_ERROR_MAP(BUS_ERROR_DNS_BADNAME, EBADMSG), + SD_BUS_ERROR_MAP(BUS_ERROR_DNS_BADALG, EBADMSG), + SD_BUS_ERROR_MAP(BUS_ERROR_DNS_BADTRUNC, EBADMSG), + SD_BUS_ERROR_MAP(BUS_ERROR_DNS_BADCOOKIE, EBADR), + + SD_BUS_ERROR_MAP(BUS_ERROR_NO_SUCH_TRANSFER, ENXIO), + SD_BUS_ERROR_MAP(BUS_ERROR_TRANSFER_IN_PROGRESS, EBUSY), + + SD_BUS_ERROR_MAP(BUS_ERROR_NO_PRODUCT_UUID, EOPNOTSUPP), + SD_BUS_ERROR_MAP(BUS_ERROR_FILE_IS_PROTECTED, EACCES), + SD_BUS_ERROR_MAP(BUS_ERROR_READ_ONLY_FILESYSTEM, EROFS), + + SD_BUS_ERROR_MAP(BUS_ERROR_SPEED_METER_INACTIVE, EOPNOTSUPP), + SD_BUS_ERROR_MAP(BUS_ERROR_UNMANAGED_INTERFACE, EOPNOTSUPP), + + SD_BUS_ERROR_MAP(BUS_ERROR_NO_SUCH_HOME, EEXIST), + SD_BUS_ERROR_MAP(BUS_ERROR_UID_IN_USE, EEXIST), + SD_BUS_ERROR_MAP(BUS_ERROR_USER_NAME_EXISTS, EEXIST), + SD_BUS_ERROR_MAP(BUS_ERROR_HOME_EXISTS, EEXIST), + SD_BUS_ERROR_MAP(BUS_ERROR_HOME_ALREADY_ACTIVE, EALREADY), + SD_BUS_ERROR_MAP(BUS_ERROR_HOME_ALREADY_FIXATED, EALREADY), + SD_BUS_ERROR_MAP(BUS_ERROR_HOME_UNFIXATED, EADDRNOTAVAIL), + SD_BUS_ERROR_MAP(BUS_ERROR_HOME_NOT_ACTIVE, EALREADY), + SD_BUS_ERROR_MAP(BUS_ERROR_HOME_ABSENT, EREMOTE), + SD_BUS_ERROR_MAP(BUS_ERROR_HOME_BUSY, EBUSY), + SD_BUS_ERROR_MAP(BUS_ERROR_BAD_PASSWORD, ENOKEY), + SD_BUS_ERROR_MAP(BUS_ERROR_LOW_PASSWORD_QUALITY, EUCLEAN), + SD_BUS_ERROR_MAP(BUS_ERROR_BAD_PASSWORD_AND_NO_TOKEN, EBADSLT), + SD_BUS_ERROR_MAP(BUS_ERROR_TOKEN_PIN_NEEDED, ENOANO), + SD_BUS_ERROR_MAP(BUS_ERROR_TOKEN_PROTECTED_AUTHENTICATION_PATH_NEEDED, ERFKILL), + SD_BUS_ERROR_MAP(BUS_ERROR_TOKEN_USER_PRESENCE_NEEDED, EMEDIUMTYPE), + SD_BUS_ERROR_MAP(BUS_ERROR_TOKEN_USER_VERIFICATION_NEEDED, ENOCSI), + SD_BUS_ERROR_MAP(BUS_ERROR_TOKEN_ACTION_TIMEOUT, ENOSTR), + SD_BUS_ERROR_MAP(BUS_ERROR_TOKEN_PIN_LOCKED, EOWNERDEAD), + SD_BUS_ERROR_MAP(BUS_ERROR_TOKEN_BAD_PIN, ENOLCK), + SD_BUS_ERROR_MAP(BUS_ERROR_TOKEN_BAD_PIN_FEW_TRIES_LEFT, ETOOMANYREFS), + SD_BUS_ERROR_MAP(BUS_ERROR_TOKEN_BAD_PIN_ONE_TRY_LEFT, EUCLEAN), + SD_BUS_ERROR_MAP(BUS_ERROR_BAD_SIGNATURE, EKEYREJECTED), + SD_BUS_ERROR_MAP(BUS_ERROR_HOME_RECORD_MISMATCH, EUCLEAN), + SD_BUS_ERROR_MAP(BUS_ERROR_HOME_RECORD_DOWNGRADE, ESTALE), + SD_BUS_ERROR_MAP(BUS_ERROR_HOME_RECORD_SIGNED, EROFS), + SD_BUS_ERROR_MAP(BUS_ERROR_BAD_HOME_SIZE, ERANGE), + SD_BUS_ERROR_MAP(BUS_ERROR_NO_PRIVATE_KEY, ENOPKG), + SD_BUS_ERROR_MAP(BUS_ERROR_HOME_LOCKED, ENOEXEC), + SD_BUS_ERROR_MAP(BUS_ERROR_HOME_NOT_LOCKED, ENOEXEC), + SD_BUS_ERROR_MAP(BUS_ERROR_TOO_MANY_OPERATIONS, ENOBUFS), + SD_BUS_ERROR_MAP(BUS_ERROR_AUTHENTICATION_LIMIT_HIT, ETOOMANYREFS), + SD_BUS_ERROR_MAP(BUS_ERROR_HOME_CANT_AUTHENTICATE, EKEYREVOKED), + SD_BUS_ERROR_MAP(BUS_ERROR_HOME_IN_USE, EADDRINUSE), + SD_BUS_ERROR_MAP(BUS_ERROR_REBALANCE_NOT_NEEDED, EALREADY), + + SD_BUS_ERROR_MAP_END +}; diff --git a/src/libsystemd/sd-bus/bus-common-errors.h b/src/libsystemd/sd-bus/bus-common-errors.h new file mode 100644 index 0000000..3a0eef4 --- /dev/null +++ b/src/libsystemd/sd-bus/bus-common-errors.h @@ -0,0 +1,155 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include "bus-error.h" + +#define BUS_ERROR_NO_SUCH_UNIT "org.freedesktop.systemd1.NoSuchUnit" +#define BUS_ERROR_NO_UNIT_FOR_PID "org.freedesktop.systemd1.NoUnitForPID" +#define BUS_ERROR_NO_UNIT_FOR_INVOCATION_ID "org.freedesktop.systemd1.NoUnitForInvocationID" +#define BUS_ERROR_UNIT_EXISTS "org.freedesktop.systemd1.UnitExists" +#define BUS_ERROR_LOAD_FAILED "org.freedesktop.systemd1.LoadFailed" +#define BUS_ERROR_BAD_UNIT_SETTING "org.freedesktop.systemd1.BadUnitSetting" +#define BUS_ERROR_JOB_FAILED "org.freedesktop.systemd1.JobFailed" +#define BUS_ERROR_NO_SUCH_JOB "org.freedesktop.systemd1.NoSuchJob" +#define BUS_ERROR_NOT_SUBSCRIBED "org.freedesktop.systemd1.NotSubscribed" +#define BUS_ERROR_ALREADY_SUBSCRIBED "org.freedesktop.systemd1.AlreadySubscribed" +#define BUS_ERROR_ONLY_BY_DEPENDENCY "org.freedesktop.systemd1.OnlyByDependency" +#define BUS_ERROR_TRANSACTION_JOBS_CONFLICTING "org.freedesktop.systemd1.TransactionJobsConflicting" +#define BUS_ERROR_TRANSACTION_ORDER_IS_CYCLIC "org.freedesktop.systemd1.TransactionOrderIsCyclic" +#define BUS_ERROR_TRANSACTION_IS_DESTRUCTIVE "org.freedesktop.systemd1.TransactionIsDestructive" +#define BUS_ERROR_UNIT_MASKED "org.freedesktop.systemd1.UnitMasked" +#define BUS_ERROR_UNIT_GENERATED "org.freedesktop.systemd1.UnitGenerated" +#define BUS_ERROR_UNIT_LINKED "org.freedesktop.systemd1.UnitLinked" +#define BUS_ERROR_UNIT_BAD_PATH "org.freedesktop.systemd1.UnitBadPath" +#define BUS_ERROR_JOB_TYPE_NOT_APPLICABLE "org.freedesktop.systemd1.JobTypeNotApplicable" +#define BUS_ERROR_NO_ISOLATION "org.freedesktop.systemd1.NoIsolation" +#define BUS_ERROR_SHUTTING_DOWN "org.freedesktop.systemd1.ShuttingDown" +#define BUS_ERROR_SCOPE_NOT_RUNNING "org.freedesktop.systemd1.ScopeNotRunning" +#define BUS_ERROR_NO_SUCH_DYNAMIC_USER "org.freedesktop.systemd1.NoSuchDynamicUser" +#define BUS_ERROR_NOT_REFERENCED "org.freedesktop.systemd1.NotReferenced" +#define BUS_ERROR_DISK_FULL "org.freedesktop.systemd1.DiskFull" +#define BUS_ERROR_NOTHING_TO_CLEAN "org.freedesktop.systemd1.NothingToClean" +#define BUS_ERROR_UNIT_BUSY "org.freedesktop.systemd1.UnitBusy" +#define BUS_ERROR_UNIT_INACTIVE "org.freedesktop.systemd1.UnitInactive" +#define BUS_ERROR_FREEZE_CANCELLED "org.freedesktop.systemd1.FreezeCancelled" +#define BUS_ERROR_FILE_DESCRIPTOR_STORE_DISABLED \ + "org.freedesktop.systemd1.FileDescriptorStoreDisabled" + +#define BUS_ERROR_NO_SUCH_MACHINE "org.freedesktop.machine1.NoSuchMachine" +#define BUS_ERROR_NO_SUCH_IMAGE "org.freedesktop.machine1.NoSuchImage" +#define BUS_ERROR_NO_MACHINE_FOR_PID "org.freedesktop.machine1.NoMachineForPID" +#define BUS_ERROR_MACHINE_EXISTS "org.freedesktop.machine1.MachineExists" +#define BUS_ERROR_NO_PRIVATE_NETWORKING "org.freedesktop.machine1.NoPrivateNetworking" +#define BUS_ERROR_NO_SUCH_USER_MAPPING "org.freedesktop.machine1.NoSuchUserMapping" +#define BUS_ERROR_NO_SUCH_GROUP_MAPPING "org.freedesktop.machine1.NoSuchGroupMapping" + +#define BUS_ERROR_NO_SUCH_PORTABLE_IMAGE "org.freedesktop.portable1.NoSuchImage" +#define BUS_ERROR_BAD_PORTABLE_IMAGE_TYPE "org.freedesktop.portable1.BadImageType" + +#define BUS_ERROR_NO_SUCH_SESSION "org.freedesktop.login1.NoSuchSession" +#define BUS_ERROR_NO_SESSION_FOR_PID "org.freedesktop.login1.NoSessionForPID" +#define BUS_ERROR_NO_SUCH_USER "org.freedesktop.login1.NoSuchUser" +#define BUS_ERROR_NO_USER_FOR_PID "org.freedesktop.login1.NoUserForPID" +#define BUS_ERROR_NO_SUCH_SEAT "org.freedesktop.login1.NoSuchSeat" +#define BUS_ERROR_SESSION_NOT_ON_SEAT "org.freedesktop.login1.SessionNotOnSeat" +#define BUS_ERROR_NOT_IN_CONTROL "org.freedesktop.login1.NotInControl" +#define BUS_ERROR_DEVICE_IS_TAKEN "org.freedesktop.login1.DeviceIsTaken" +#define BUS_ERROR_DEVICE_NOT_TAKEN "org.freedesktop.login1.DeviceNotTaken" +#define BUS_ERROR_OPERATION_IN_PROGRESS "org.freedesktop.login1.OperationInProgress" +#define BUS_ERROR_SLEEP_VERB_NOT_SUPPORTED "org.freedesktop.login1.SleepVerbNotSupported" +#define BUS_ERROR_SESSION_BUSY "org.freedesktop.login1.SessionBusy" +#define BUS_ERROR_NOT_YOUR_DEVICE "org.freedesktop.login1.NotYourDevice" + +#define BUS_ERROR_AUTOMATIC_TIME_SYNC_ENABLED "org.freedesktop.timedate1.AutomaticTimeSyncEnabled" +#define BUS_ERROR_NO_NTP_SUPPORT "org.freedesktop.timedate1.NoNTPSupport" + +#define BUS_ERROR_NO_SUCH_PROCESS "org.freedesktop.systemd1.NoSuchProcess" + +#define BUS_ERROR_NO_NAME_SERVERS "org.freedesktop.resolve1.NoNameServers" +#define BUS_ERROR_INVALID_REPLY "org.freedesktop.resolve1.InvalidReply" +#define BUS_ERROR_NO_SUCH_RR "org.freedesktop.resolve1.NoSuchRR" +#define BUS_ERROR_CNAME_LOOP "org.freedesktop.resolve1.CNameLoop" +#define BUS_ERROR_ABORTED "org.freedesktop.resolve1.Aborted" +#define BUS_ERROR_NO_SUCH_SERVICE "org.freedesktop.resolve1.NoSuchService" +#define BUS_ERROR_DNSSEC_FAILED "org.freedesktop.resolve1.DnssecFailed" +#define BUS_ERROR_NO_TRUST_ANCHOR "org.freedesktop.resolve1.NoTrustAnchor" +#define BUS_ERROR_RR_TYPE_UNSUPPORTED "org.freedesktop.resolve1.ResourceRecordTypeUnsupported" +#define BUS_ERROR_NO_SUCH_LINK "org.freedesktop.resolve1.NoSuchLink" +#define BUS_ERROR_LINK_BUSY "org.freedesktop.resolve1.LinkBusy" +#define BUS_ERROR_NETWORK_DOWN "org.freedesktop.resolve1.NetworkDown" +#define BUS_ERROR_NO_SOURCE "org.freedesktop.resolve1.NoSource" +#define BUS_ERROR_STUB_LOOP "org.freedesktop.resolve1.StubLoop" +#define BUS_ERROR_NO_SUCH_DNSSD_SERVICE "org.freedesktop.resolve1.NoSuchDnssdService" +#define BUS_ERROR_DNSSD_SERVICE_EXISTS "org.freedesktop.resolve1.DnssdServiceExists" + +#define _BUS_ERROR_DNS "org.freedesktop.resolve1.DnsError." +#define BUS_ERROR_DNS_FORMERR _BUS_ERROR_DNS "FORMERR" +#define BUS_ERROR_DNS_SERVFAIL _BUS_ERROR_DNS "SERVFAIL" +#define BUS_ERROR_DNS_NXDOMAIN _BUS_ERROR_DNS "NXDOMAIN" +#define BUS_ERROR_DNS_NOTIMP _BUS_ERROR_DNS "NOTIMP" +#define BUS_ERROR_DNS_REFUSED _BUS_ERROR_DNS "REFUSED" +#define BUS_ERROR_DNS_YXDOMAIN _BUS_ERROR_DNS "YXDOMAIN" +#define BUS_ERROR_DNS_YRRSET _BUS_ERROR_DNS "YRRSET" +#define BUS_ERROR_DNS_NXRRSET _BUS_ERROR_DNS "NXRRSET" +#define BUS_ERROR_DNS_NOTAUTH _BUS_ERROR_DNS "NOTAUTH" +#define BUS_ERROR_DNS_NOTZONE _BUS_ERROR_DNS "NOTZONE" +#define BUS_ERROR_DNS_BADVERS _BUS_ERROR_DNS "BADVERS" +#define BUS_ERROR_DNS_BADKEY _BUS_ERROR_DNS "BADKEY" +#define BUS_ERROR_DNS_BADTIME _BUS_ERROR_DNS "BADTIME" +#define BUS_ERROR_DNS_BADMODE _BUS_ERROR_DNS "BADMODE" +#define BUS_ERROR_DNS_BADNAME _BUS_ERROR_DNS "BADNAME" +#define BUS_ERROR_DNS_BADALG _BUS_ERROR_DNS "BADALG" +#define BUS_ERROR_DNS_BADTRUNC _BUS_ERROR_DNS "BADTRUNC" +#define BUS_ERROR_DNS_BADCOOKIE _BUS_ERROR_DNS "BADCOOKIE" + +#define BUS_ERROR_NO_SUCH_TRANSFER "org.freedesktop.import1.NoSuchTransfer" +#define BUS_ERROR_TRANSFER_IN_PROGRESS "org.freedesktop.import1.TransferInProgress" + +#define BUS_ERROR_NO_PRODUCT_UUID "org.freedesktop.hostname1.NoProductUUID" +#define BUS_ERROR_FILE_IS_PROTECTED "org.freedesktop.hostname1.FileIsProtected" +#define BUS_ERROR_READ_ONLY_FILESYSTEM "org.freedesktop.hostname1.ReadOnlyFilesystem" + +#define BUS_ERROR_SPEED_METER_INACTIVE "org.freedesktop.network1.SpeedMeterInactive" +#define BUS_ERROR_UNMANAGED_INTERFACE "org.freedesktop.network1.UnmanagedInterface" + +#define BUS_ERROR_NO_SUCH_HOME "org.freedesktop.home1.NoSuchHome" +#define BUS_ERROR_UID_IN_USE "org.freedesktop.home1.UIDInUse" +#define BUS_ERROR_USER_NAME_EXISTS "org.freedesktop.home1.UserNameExists" +#define BUS_ERROR_HOME_EXISTS "org.freedesktop.home1.HomeExists" +#define BUS_ERROR_HOME_ALREADY_ACTIVE "org.freedesktop.home1.HomeAlreadyActive" +#define BUS_ERROR_HOME_ALREADY_FIXATED "org.freedesktop.home1.HomeAlreadyFixated" +#define BUS_ERROR_HOME_UNFIXATED "org.freedesktop.home1.HomeUnfixated" +#define BUS_ERROR_HOME_NOT_ACTIVE "org.freedesktop.home1.HomeNotActive" +#define BUS_ERROR_HOME_ABSENT "org.freedesktop.home1.HomeAbsent" +#define BUS_ERROR_HOME_BUSY "org.freedesktop.home1.HomeBusy" +#define BUS_ERROR_BAD_PASSWORD "org.freedesktop.home1.BadPassword" +#define BUS_ERROR_BAD_RECOVERY_KEY "org.freedesktop.home1.BadRecoveryKey" +#define BUS_ERROR_LOW_PASSWORD_QUALITY "org.freedesktop.home1.LowPasswordQuality" +#define BUS_ERROR_BAD_PASSWORD_AND_NO_TOKEN "org.freedesktop.home1.BadPasswordAndNoToken" +#define BUS_ERROR_TOKEN_PIN_NEEDED "org.freedesktop.home1.TokenPinNeeded" +#define BUS_ERROR_TOKEN_PROTECTED_AUTHENTICATION_PATH_NEEDED \ + "org.freedesktop.home1.TokenProtectedAuthenticationPathNeeded" +#define BUS_ERROR_TOKEN_USER_PRESENCE_NEEDED "org.freedesktop.home1.TokenUserPresenceNeeded" +#define BUS_ERROR_TOKEN_USER_VERIFICATION_NEEDED \ + "org.freedesktop.home1.TokenUserVerificationNeeded" +#define BUS_ERROR_TOKEN_ACTION_TIMEOUT "org.freedesktop.home1.TokenActionTimeout" +#define BUS_ERROR_TOKEN_PIN_LOCKED "org.freedesktop.home1.TokenPinLocked" +#define BUS_ERROR_TOKEN_BAD_PIN "org.freedesktop.home1.BadPin" +#define BUS_ERROR_TOKEN_BAD_PIN_FEW_TRIES_LEFT "org.freedesktop.home1.BadPinFewTriesLeft" +#define BUS_ERROR_TOKEN_BAD_PIN_ONE_TRY_LEFT "org.freedesktop.home1.BadPinOneTryLeft" +#define BUS_ERROR_BAD_SIGNATURE "org.freedesktop.home1.BadSignature" +#define BUS_ERROR_HOME_RECORD_MISMATCH "org.freedesktop.home1.RecordMismatch" +#define BUS_ERROR_HOME_RECORD_DOWNGRADE "org.freedesktop.home1.RecordDowngrade" +#define BUS_ERROR_HOME_RECORD_SIGNED "org.freedesktop.home1.RecordSigned" +#define BUS_ERROR_BAD_HOME_SIZE "org.freedesktop.home1.BadHomeSize" +#define BUS_ERROR_NO_PRIVATE_KEY "org.freedesktop.home1.NoPrivateKey" +#define BUS_ERROR_HOME_LOCKED "org.freedesktop.home1.HomeLocked" +#define BUS_ERROR_HOME_NOT_LOCKED "org.freedesktop.home1.HomeNotLocked" +#define BUS_ERROR_NO_DISK_SPACE "org.freedesktop.home1.NoDiskSpace" +#define BUS_ERROR_TOO_MANY_OPERATIONS "org.freedesktop.home1.TooManyOperations" +#define BUS_ERROR_AUTHENTICATION_LIMIT_HIT "org.freedesktop.home1.AuthenticationLimitHit" +#define BUS_ERROR_HOME_CANT_AUTHENTICATE "org.freedesktop.home1.HomeCantAuthenticate" +#define BUS_ERROR_HOME_IN_USE "org.freedesktop.home1.HomeInUse" +#define BUS_ERROR_REBALANCE_NOT_NEEDED "org.freedesktop.home1.RebalanceNotNeeded" + +BUS_ERROR_MAP_ELF_USE(bus_common_errors); diff --git a/src/libsystemd/sd-bus/bus-container.c b/src/libsystemd/sd-bus/bus-container.c new file mode 100644 index 0000000..4146a6e --- /dev/null +++ b/src/libsystemd/sd-bus/bus-container.c @@ -0,0 +1,103 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <fcntl.h> +#include <unistd.h> + +#include "bus-container.h" +#include "bus-internal.h" +#include "bus-socket.h" +#include "fd-util.h" +#include "namespace-util.h" +#include "process-util.h" +#include "string-util.h" + +int bus_container_connect_socket(sd_bus *b) { + _cleanup_close_pair_ int pair[2] = EBADF_PAIR; + _cleanup_close_ int pidnsfd = -EBADF, mntnsfd = -EBADF, usernsfd = -EBADF, rootfd = -EBADF; + int r, error_buf = 0; + pid_t child; + ssize_t n; + + assert(b); + assert(b->input_fd < 0); + assert(b->output_fd < 0); + assert(b->nspid > 0 || b->machine); + + if (b->nspid <= 0) { + log_debug("sd-bus: connecting bus%s%s to machine %s...", + b->description ? " " : "", strempty(b->description), b->machine); + + r = container_get_leader(b->machine, &b->nspid); + if (r < 0) + return r; + } else + log_debug("sd-bus: connecting bus%s%s to namespace of PID "PID_FMT"...", + b->description ? " " : "", strempty(b->description), b->nspid); + + r = namespace_open(b->nspid, &pidnsfd, &mntnsfd, NULL, &usernsfd, &rootfd); + if (r < 0) + return log_debug_errno(r, "Failed to open namespace of PID "PID_FMT": %m", b->nspid); + + b->input_fd = socket(b->sockaddr.sa.sa_family, SOCK_STREAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0); + if (b->input_fd < 0) + return log_debug_errno(errno, "Failed to create a socket: %m"); + + b->input_fd = fd_move_above_stdio(b->input_fd); + + b->output_fd = b->input_fd; + + bus_socket_setup(b); + + if (socketpair(AF_UNIX, SOCK_SEQPACKET|SOCK_CLOEXEC, 0, pair) < 0) + return log_debug_errno(errno, "Failed to create a socket pair: %m"); + + r = namespace_fork("(sd-buscntrns)", "(sd-buscntr)", NULL, 0, FORK_RESET_SIGNALS|FORK_DEATHSIG_SIGKILL, + pidnsfd, mntnsfd, -1, usernsfd, rootfd, &child); + if (r < 0) + return log_debug_errno(r, "Failed to create namespace for (sd-buscntr): %m"); + if (r == 0) { + pair[0] = safe_close(pair[0]); + + r = connect(b->input_fd, &b->sockaddr.sa, b->sockaddr_size); + if (r < 0) { + /* Try to send error up */ + error_buf = errno; + (void) write(pair[1], &error_buf, sizeof(error_buf)); + _exit(EXIT_FAILURE); + } + + _exit(EXIT_SUCCESS); + } + + pair[1] = safe_close(pair[1]); + + r = wait_for_terminate_and_check("(sd-buscntrns)", child, 0); + if (r < 0) + return r; + bool nonzero_exit_status = r != EXIT_SUCCESS; + + n = read(pair[0], &error_buf, sizeof(error_buf)); + if (n < 0) + return log_debug_errno(errno, "Failed to read error status from (sd-buscntr): %m"); + + if (n > 0) { + if (n != sizeof(error_buf)) + return log_debug_errno(SYNTHETIC_ERRNO(EIO), + "Read error status of unexpected length %zd from (sd-buscntr): %m", n); + + if (error_buf < 0) + return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG), + "Got unexpected error status from (sd-buscntr): %m"); + + if (error_buf == EINPROGRESS) + return 1; + + if (error_buf > 0) + return log_debug_errno(error_buf, "(sd-buscntr) failed to connect to D-Bus socket: %m"); + } + + if (nonzero_exit_status) + return -EPROTO; + + return bus_socket_start_auth(b); +} diff --git a/src/libsystemd/sd-bus/bus-container.h b/src/libsystemd/sd-bus/bus-container.h new file mode 100644 index 0000000..cb503a5 --- /dev/null +++ b/src/libsystemd/sd-bus/bus-container.h @@ -0,0 +1,6 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include "sd-bus.h" + +int bus_container_connect_socket(sd_bus *b); diff --git a/src/libsystemd/sd-bus/bus-control.c b/src/libsystemd/sd-bus/bus-control.c new file mode 100644 index 0000000..1355e41 --- /dev/null +++ b/src/libsystemd/sd-bus/bus-control.c @@ -0,0 +1,1038 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#if HAVE_VALGRIND_MEMCHECK_H +#include <valgrind/memcheck.h> +#endif + +#include <errno.h> +#include <stddef.h> + +#include "sd-bus.h" + +#include "alloc-util.h" +#include "bus-control.h" +#include "bus-internal.h" +#include "bus-message.h" +#include "capability-util.h" +#include "process-util.h" +#include "stdio-util.h" +#include "string-util.h" +#include "strv.h" +#include "user-util.h" + +_public_ int sd_bus_get_unique_name(sd_bus *bus, const char **unique) { + int r; + + assert_return(bus, -EINVAL); + assert_return(bus = bus_resolve(bus), -ENOPKG); + assert_return(unique, -EINVAL); + assert_return(!bus_origin_changed(bus), -ECHILD); + + if (!bus->bus_client) + return -EINVAL; + + r = bus_ensure_running(bus); + if (r < 0) + return r; + + *unique = bus->unique_name; + return 0; +} + +static int validate_request_name_parameters( + sd_bus *bus, + const char *name, + uint64_t flags, + uint32_t *ret_param) { + + uint32_t param = 0; + + assert(bus); + assert(name); + assert(ret_param); + + assert_return(!(flags & ~(SD_BUS_NAME_ALLOW_REPLACEMENT|SD_BUS_NAME_REPLACE_EXISTING|SD_BUS_NAME_QUEUE)), -EINVAL); + assert_return(service_name_is_valid(name), -EINVAL); + assert_return(name[0] != ':', -EINVAL); + + if (!bus->bus_client) + return -EINVAL; + + /* Don't allow requesting the special driver and local names */ + if (STR_IN_SET(name, "org.freedesktop.DBus", "org.freedesktop.DBus.Local")) + return -EINVAL; + + if (!BUS_IS_OPEN(bus->state)) + return -ENOTCONN; + + if (flags & SD_BUS_NAME_ALLOW_REPLACEMENT) + param |= BUS_NAME_ALLOW_REPLACEMENT; + if (flags & SD_BUS_NAME_REPLACE_EXISTING) + param |= BUS_NAME_REPLACE_EXISTING; + if (!(flags & SD_BUS_NAME_QUEUE)) + param |= BUS_NAME_DO_NOT_QUEUE; + + *ret_param = param; + + return 0; +} + +_public_ int sd_bus_request_name( + sd_bus *bus, + const char *name, + uint64_t flags) { + + _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL; + uint32_t ret, param = 0; + int r; + + assert_return(bus, -EINVAL); + assert_return(bus = bus_resolve(bus), -ENOPKG); + assert_return(name, -EINVAL); + assert_return(!bus_origin_changed(bus), -ECHILD); + + r = validate_request_name_parameters(bus, name, flags, ¶m); + if (r < 0) + return r; + + r = sd_bus_call_method( + bus, + "org.freedesktop.DBus", + "/org/freedesktop/DBus", + "org.freedesktop.DBus", + "RequestName", + NULL, + &reply, + "su", + name, + param); + if (r < 0) + return r; + + r = sd_bus_message_read(reply, "u", &ret); + if (r < 0) + return r; + + switch (ret) { + + case BUS_NAME_ALREADY_OWNER: + return -EALREADY; + + case BUS_NAME_EXISTS: + return -EEXIST; + + case BUS_NAME_IN_QUEUE: + return 0; + + case BUS_NAME_PRIMARY_OWNER: + return 1; + } + + return -EIO; +} + +static int default_request_name_handler( + sd_bus_message *m, + void *userdata, + sd_bus_error *ret_error) { + + uint32_t ret; + int r; + + assert(m); + + if (sd_bus_message_is_method_error(m, NULL)) { + log_debug_errno(sd_bus_message_get_errno(m), + "Unable to request name, failing connection: %s", + sd_bus_message_get_error(m)->message); + + bus_enter_closing(sd_bus_message_get_bus(m)); + return 1; + } + + r = sd_bus_message_read(m, "u", &ret); + if (r < 0) + return r; + + switch (ret) { + + case BUS_NAME_ALREADY_OWNER: + log_debug("Already owner of requested service name, ignoring."); + return 1; + + case BUS_NAME_IN_QUEUE: + log_debug("In queue for requested service name."); + return 1; + + case BUS_NAME_PRIMARY_OWNER: + log_debug("Successfully acquired requested service name."); + return 1; + + case BUS_NAME_EXISTS: + log_debug("Requested service name already owned, failing connection."); + bus_enter_closing(sd_bus_message_get_bus(m)); + return 1; + } + + log_debug("Unexpected response from RequestName(), failing connection."); + bus_enter_closing(sd_bus_message_get_bus(m)); + return 1; +} + +_public_ int sd_bus_request_name_async( + sd_bus *bus, + sd_bus_slot **ret_slot, + const char *name, + uint64_t flags, + sd_bus_message_handler_t callback, + void *userdata) { + + uint32_t param = 0; + int r; + + assert_return(bus, -EINVAL); + assert_return(bus = bus_resolve(bus), -ENOPKG); + assert_return(name, -EINVAL); + assert_return(!bus_origin_changed(bus), -ECHILD); + + r = validate_request_name_parameters(bus, name, flags, ¶m); + if (r < 0) + return r; + + return sd_bus_call_method_async( + bus, + ret_slot, + "org.freedesktop.DBus", + "/org/freedesktop/DBus", + "org.freedesktop.DBus", + "RequestName", + callback ?: default_request_name_handler, + userdata, + "su", + name, + param); +} + +static int validate_release_name_parameters( + sd_bus *bus, + const char *name) { + + assert(bus); + assert(name); + + assert_return(service_name_is_valid(name), -EINVAL); + assert_return(name[0] != ':', -EINVAL); + + if (!bus->bus_client) + return -EINVAL; + + /* Don't allow releasing the special driver and local names */ + if (STR_IN_SET(name, "org.freedesktop.DBus", "org.freedesktop.DBus.Local")) + return -EINVAL; + + if (!BUS_IS_OPEN(bus->state)) + return -ENOTCONN; + + return 0; +} + +_public_ int sd_bus_release_name( + sd_bus *bus, + const char *name) { + + _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL; + uint32_t ret; + int r; + + assert_return(bus, -EINVAL); + assert_return(bus = bus_resolve(bus), -ENOPKG); + assert_return(name, -EINVAL); + assert_return(!bus_origin_changed(bus), -ECHILD); + + r = validate_release_name_parameters(bus, name); + if (r < 0) + return r; + + r = sd_bus_call_method( + bus, + "org.freedesktop.DBus", + "/org/freedesktop/DBus", + "org.freedesktop.DBus", + "ReleaseName", + NULL, + &reply, + "s", + name); + if (r < 0) + return r; + + r = sd_bus_message_read(reply, "u", &ret); + if (r < 0) + return r; + + switch (ret) { + + case BUS_NAME_NON_EXISTENT: + return -ESRCH; + + case BUS_NAME_NOT_OWNER: + return -EADDRINUSE; + + case BUS_NAME_RELEASED: + return 0; + } + + return -EIO; +} + +static int default_release_name_handler( + sd_bus_message *m, + void *userdata, + sd_bus_error *ret_error) { + + uint32_t ret; + int r; + + assert(m); + + if (sd_bus_message_is_method_error(m, NULL)) { + log_debug_errno(sd_bus_message_get_errno(m), + "Unable to release name, failing connection: %s", + sd_bus_message_get_error(m)->message); + + bus_enter_closing(sd_bus_message_get_bus(m)); + return 1; + } + + r = sd_bus_message_read(m, "u", &ret); + if (r < 0) + return r; + + switch (ret) { + + case BUS_NAME_NON_EXISTENT: + log_debug("Name asked to release is not taken currently, ignoring."); + return 1; + + case BUS_NAME_NOT_OWNER: + log_debug("Name asked to release is owned by somebody else, ignoring."); + return 1; + + case BUS_NAME_RELEASED: + log_debug("Name successfully released."); + return 1; + } + + log_debug("Unexpected response from ReleaseName(), failing connection."); + bus_enter_closing(sd_bus_message_get_bus(m)); + return 1; +} + +_public_ int sd_bus_release_name_async( + sd_bus *bus, + sd_bus_slot **ret_slot, + const char *name, + sd_bus_message_handler_t callback, + void *userdata) { + + int r; + + assert_return(bus, -EINVAL); + assert_return(bus = bus_resolve(bus), -ENOPKG); + assert_return(name, -EINVAL); + assert_return(!bus_origin_changed(bus), -ECHILD); + + r = validate_release_name_parameters(bus, name); + if (r < 0) + return r; + + return sd_bus_call_method_async( + bus, + ret_slot, + "org.freedesktop.DBus", + "/org/freedesktop/DBus", + "org.freedesktop.DBus", + "ReleaseName", + callback ?: default_release_name_handler, + userdata, + "s", + name); +} + +_public_ int sd_bus_list_names(sd_bus *bus, char ***acquired, char ***activatable) { + _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL; + _cleanup_strv_free_ char **x = NULL, **y = NULL; + int r; + + assert_return(bus, -EINVAL); + assert_return(bus = bus_resolve(bus), -ENOPKG); + assert_return(acquired || activatable, -EINVAL); + assert_return(!bus_origin_changed(bus), -ECHILD); + + if (!bus->bus_client) + return -EINVAL; + + if (!BUS_IS_OPEN(bus->state)) + return -ENOTCONN; + + if (acquired) { + r = sd_bus_call_method( + bus, + "org.freedesktop.DBus", + "/org/freedesktop/DBus", + "org.freedesktop.DBus", + "ListNames", + NULL, + &reply, + NULL); + if (r < 0) + return r; + + r = sd_bus_message_read_strv(reply, &x); + if (r < 0) + return r; + + reply = sd_bus_message_unref(reply); + } + + if (activatable) { + r = sd_bus_call_method( + bus, + "org.freedesktop.DBus", + "/org/freedesktop/DBus", + "org.freedesktop.DBus", + "ListActivatableNames", + NULL, + &reply, + NULL); + if (r < 0) + return r; + + r = sd_bus_message_read_strv(reply, &y); + if (r < 0) + return r; + + *activatable = TAKE_PTR(y); + } + + if (acquired) + *acquired = TAKE_PTR(x); + + return 0; +} + +_public_ int sd_bus_get_name_creds( + sd_bus *bus, + const char *name, + uint64_t mask, + sd_bus_creds **creds) { + + _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply_unique = NULL, *reply = NULL; + _cleanup_(sd_bus_creds_unrefp) sd_bus_creds *c = NULL; + const char *unique; + pid_t pid = 0; + int r; + + assert_return(bus, -EINVAL); + assert_return(bus = bus_resolve(bus), -ENOPKG); + assert_return(name, -EINVAL); + assert_return((mask & ~SD_BUS_CREDS_AUGMENT) <= _SD_BUS_CREDS_ALL, -EOPNOTSUPP); + assert_return(mask == 0 || creds, -EINVAL); + assert_return(!bus_origin_changed(bus), -ECHILD); + assert_return(service_name_is_valid(name), -EINVAL); + + if (!bus->bus_client) + return -EINVAL; + + /* Turn off augmenting if this isn't a local connection. If the connection is not local, then /proc is not + * going to match. */ + if (!bus->is_local) + mask &= ~SD_BUS_CREDS_AUGMENT; + + if (streq(name, "org.freedesktop.DBus.Local")) + return -EINVAL; + + if (streq(name, "org.freedesktop.DBus")) + return sd_bus_get_owner_creds(bus, mask, creds); + + if (!BUS_IS_OPEN(bus->state)) + return -ENOTCONN; + + /* If the name is unique anyway, we can use it directly */ + unique = name[0] == ':' ? name : NULL; + + /* Only query the owner if the caller wants to know it and the name is not unique anyway, or if the caller just + * wants to check whether a name exists */ + if ((FLAGS_SET(mask, SD_BUS_CREDS_UNIQUE_NAME) && !unique) || mask == 0) { + r = sd_bus_call_method( + bus, + "org.freedesktop.DBus", + "/org/freedesktop/DBus", + "org.freedesktop.DBus", + "GetNameOwner", + NULL, + &reply_unique, + "s", + name); + if (r < 0) + return r; + + r = sd_bus_message_read(reply_unique, "s", &unique); + if (r < 0) + return r; + } + + if (mask != 0) { + _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL; + bool need_pid, need_uid, need_selinux, need_separate_calls; + + c = bus_creds_new(); + if (!c) + return -ENOMEM; + + if ((mask & SD_BUS_CREDS_UNIQUE_NAME) && unique) { + c->unique_name = strdup(unique); + if (!c->unique_name) + return -ENOMEM; + + c->mask |= SD_BUS_CREDS_UNIQUE_NAME; + } + + need_pid = (mask & SD_BUS_CREDS_PID) || + ((mask & SD_BUS_CREDS_AUGMENT) && + (mask & (SD_BUS_CREDS_UID|SD_BUS_CREDS_SUID|SD_BUS_CREDS_FSUID| + SD_BUS_CREDS_GID|SD_BUS_CREDS_EGID|SD_BUS_CREDS_SGID|SD_BUS_CREDS_FSGID| + SD_BUS_CREDS_SUPPLEMENTARY_GIDS| + SD_BUS_CREDS_COMM|SD_BUS_CREDS_EXE|SD_BUS_CREDS_CMDLINE| + SD_BUS_CREDS_CGROUP|SD_BUS_CREDS_UNIT|SD_BUS_CREDS_USER_UNIT|SD_BUS_CREDS_SLICE|SD_BUS_CREDS_SESSION|SD_BUS_CREDS_OWNER_UID| + SD_BUS_CREDS_EFFECTIVE_CAPS|SD_BUS_CREDS_PERMITTED_CAPS|SD_BUS_CREDS_INHERITABLE_CAPS|SD_BUS_CREDS_BOUNDING_CAPS| + SD_BUS_CREDS_SELINUX_CONTEXT| + SD_BUS_CREDS_AUDIT_SESSION_ID|SD_BUS_CREDS_AUDIT_LOGIN_UID))); + need_uid = mask & SD_BUS_CREDS_EUID; + need_selinux = mask & SD_BUS_CREDS_SELINUX_CONTEXT; + + if (need_pid + need_uid + need_selinux > 1) { + + /* If we need more than one of the credentials, then use GetConnectionCredentials() */ + + r = sd_bus_call_method( + bus, + "org.freedesktop.DBus", + "/org/freedesktop/DBus", + "org.freedesktop.DBus", + "GetConnectionCredentials", + &error, + &reply, + "s", + unique ?: name); + + if (r < 0) { + + if (!sd_bus_error_has_name(&error, SD_BUS_ERROR_UNKNOWN_METHOD)) + return r; + + /* If we got an unknown method error, fall back to the individual calls... */ + need_separate_calls = true; + sd_bus_error_free(&error); + + } else { + need_separate_calls = false; + + r = sd_bus_message_enter_container(reply, 'a', "{sv}"); + if (r < 0) + return r; + + for (;;) { + const char *m; + + r = sd_bus_message_enter_container(reply, 'e', "sv"); + if (r < 0) + return r; + if (r == 0) + break; + + r = sd_bus_message_read(reply, "s", &m); + if (r < 0) + return r; + + if (need_uid && streq(m, "UnixUserID")) { + uint32_t u; + + r = sd_bus_message_read(reply, "v", "u", &u); + if (r < 0) + return r; + + c->euid = u; + c->mask |= SD_BUS_CREDS_EUID; + + } else if (need_pid && streq(m, "ProcessID")) { + uint32_t p; + + r = sd_bus_message_read(reply, "v", "u", &p); + if (r < 0) + return r; + + pid = p; + if (mask & SD_BUS_CREDS_PID) { + c->pid = p; + c->mask |= SD_BUS_CREDS_PID; + } + + } else if (need_selinux && streq(m, "LinuxSecurityLabel")) { + const void *p = NULL; + size_t sz = 0; + + r = sd_bus_message_enter_container(reply, 'v', "ay"); + if (r < 0) + return r; + + r = sd_bus_message_read_array(reply, 'y', &p, &sz); + if (r < 0) + return r; + + r = free_and_strndup(&c->label, p, sz); + if (r < 0) + return r; + + c->mask |= SD_BUS_CREDS_SELINUX_CONTEXT; + + r = sd_bus_message_exit_container(reply); + if (r < 0) + return r; + } else { + r = sd_bus_message_skip(reply, "v"); + if (r < 0) + return r; + } + + r = sd_bus_message_exit_container(reply); + if (r < 0) + return r; + } + + r = sd_bus_message_exit_container(reply); + if (r < 0) + return r; + + if (need_pid && pid == 0) + return -EPROTO; + } + + } else /* When we only need a single field, then let's use separate calls */ + need_separate_calls = true; + + if (need_separate_calls) { + if (need_pid) { + uint32_t u; + + r = sd_bus_call_method( + bus, + "org.freedesktop.DBus", + "/org/freedesktop/DBus", + "org.freedesktop.DBus", + "GetConnectionUnixProcessID", + NULL, + &reply, + "s", + unique ?: name); + if (r < 0) + return r; + + r = sd_bus_message_read(reply, "u", &u); + if (r < 0) + return r; + + pid = u; + if (mask & SD_BUS_CREDS_PID) { + c->pid = u; + c->mask |= SD_BUS_CREDS_PID; + } + + reply = sd_bus_message_unref(reply); + } + + if (need_uid) { + uint32_t u; + + r = sd_bus_call_method( + bus, + "org.freedesktop.DBus", + "/org/freedesktop/DBus", + "org.freedesktop.DBus", + "GetConnectionUnixUser", + NULL, + &reply, + "s", + unique ?: name); + if (r < 0) + return r; + + r = sd_bus_message_read(reply, "u", &u); + if (r < 0) + return r; + + c->euid = u; + c->mask |= SD_BUS_CREDS_EUID; + + reply = sd_bus_message_unref(reply); + } + + if (need_selinux) { + const void *p = NULL; + size_t sz = 0; + + r = sd_bus_call_method( + bus, + "org.freedesktop.DBus", + "/org/freedesktop/DBus", + "org.freedesktop.DBus", + "GetConnectionSELinuxSecurityContext", + &error, + &reply, + "s", + unique ?: name); + if (r < 0) { + if (!sd_bus_error_has_name(&error, SD_BUS_ERROR_SELINUX_SECURITY_CONTEXT_UNKNOWN)) + return r; + + /* no data is fine */ + } else { + r = sd_bus_message_read_array(reply, 'y', &p, &sz); + if (r < 0) + return r; + + c->label = memdup_suffix0(p, sz); + if (!c->label) + return -ENOMEM; + + c->mask |= SD_BUS_CREDS_SELINUX_CONTEXT; + } + } + } + + r = bus_creds_add_more(c, mask, pid, 0); + if (r < 0 && r != -ESRCH) /* Return the error, but ignore ESRCH which just means the process is already gone */ + return r; + } + + if (creds) + *creds = TAKE_PTR(c); + + return 0; +} + +static int parse_sockaddr_string(const char *t, char **ret_comm, char **ret_description) { + _cleanup_free_ char *comm = NULL, *description = NULL; + const char *e, *sl; + + assert(t); + assert(ret_comm); + assert(ret_description); + + e = strstrafter(t, "/bus/"); + if (!e) { + log_debug("Didn't find /bus/ substring in peer socket address, ignoring."); + goto not_found; + } + + sl = strchr(e, '/'); + if (!sl) { + log_debug("Didn't find / substring after /bus/ in peer socket address, ignoring."); + goto not_found; + } + + if (sl - e > 0) { + comm = strndup(e, sl - e); + if (!comm) + return -ENOMEM; + } + + sl++; + if (!isempty(sl)) { + description = strdup(sl); + if (!description) + return -ENOMEM; + } + + *ret_comm = TAKE_PTR(comm); + *ret_description = TAKE_PTR(description); + return 0; + +not_found: + *ret_comm = *ret_description = NULL; + return 0; +} + +_public_ int sd_bus_get_owner_creds(sd_bus *bus, uint64_t mask, sd_bus_creds **ret) { + _cleanup_(sd_bus_creds_unrefp) sd_bus_creds *c = NULL; + bool do_label, do_groups, do_sockaddr_peer; + pid_t pid = 0; + int r; + + assert_return(bus, -EINVAL); + assert_return(bus = bus_resolve(bus), -ENOPKG); + assert_return((mask & ~SD_BUS_CREDS_AUGMENT) <= _SD_BUS_CREDS_ALL, -EOPNOTSUPP); + assert_return(ret, -EINVAL); + assert_return(!bus_origin_changed(bus), -ECHILD); + + if (!BUS_IS_OPEN(bus->state)) + return -ENOTCONN; + + if (!bus->is_local) + mask &= ~SD_BUS_CREDS_AUGMENT; + + do_label = bus->label && (mask & SD_BUS_CREDS_SELINUX_CONTEXT); + do_groups = bus->n_groups != SIZE_MAX && (mask & SD_BUS_CREDS_SUPPLEMENTARY_GIDS); + do_sockaddr_peer = bus->sockaddr_size_peer >= offsetof(struct sockaddr_un, sun_path) + 1 && + bus->sockaddr_peer.sa.sa_family == AF_UNIX && + bus->sockaddr_peer.un.sun_path[0] == 0; + + /* Avoid allocating anything if we have no chance of returning useful data */ + if (!bus->ucred_valid && !do_label && !do_groups && !do_sockaddr_peer) + return -ENODATA; + + c = bus_creds_new(); + if (!c) + return -ENOMEM; + + if (bus->ucred_valid) { + if (pid_is_valid(bus->ucred.pid)) { + pid = c->pid = bus->ucred.pid; + c->mask |= SD_BUS_CREDS_PID & mask; + } + + if (uid_is_valid(bus->ucred.uid)) { + c->euid = bus->ucred.uid; + c->mask |= SD_BUS_CREDS_EUID & mask; + } + + if (gid_is_valid(bus->ucred.gid)) { + c->egid = bus->ucred.gid; + c->mask |= SD_BUS_CREDS_EGID & mask; + } + } + + if (do_label) { + c->label = strdup(bus->label); + if (!c->label) + return -ENOMEM; + + c->mask |= SD_BUS_CREDS_SELINUX_CONTEXT; + } + + if (do_groups) { + c->supplementary_gids = newdup(gid_t, bus->groups, bus->n_groups); + if (!c->supplementary_gids) + return -ENOMEM; + + c->n_supplementary_gids = bus->n_groups; + + c->mask |= SD_BUS_CREDS_SUPPLEMENTARY_GIDS; + } + + if (do_sockaddr_peer) { + _cleanup_free_ char *t = NULL; + + assert(bus->sockaddr_size_peer >= offsetof(struct sockaddr_un, sun_path) + 1); + assert(bus->sockaddr_peer.sa.sa_family == AF_UNIX); + assert(bus->sockaddr_peer.un.sun_path[0] == 0); + + /* So this is an abstract namespace socket, good. Now let's find the data we are interested in */ + r = make_cstring(bus->sockaddr_peer.un.sun_path + 1, + bus->sockaddr_size_peer - offsetof(struct sockaddr_un, sun_path) - 1, + MAKE_CSTRING_ALLOW_TRAILING_NUL, + &t); + if (r == -ENOMEM) + return r; + if (r < 0) + log_debug_errno(r, "Can't extract string from peer socket address, ignoring: %m"); + else { + r = parse_sockaddr_string(t, &c->comm, &c->description); + if (r < 0) + return r; + + if (c->comm) + c->mask |= SD_BUS_CREDS_COMM & mask; + + if (c->description) + c->mask |= SD_BUS_CREDS_DESCRIPTION & mask; + } + } + + r = bus_creds_add_more(c, mask, pid, 0); + if (r < 0 && r != -ESRCH) /* If the process vanished, then don't complain, just return what we got */ + return r; + + *ret = TAKE_PTR(c); + + return 0; +} + +#define append_eavesdrop(bus, m) \ + ((bus)->is_monitor \ + ? (isempty(m) ? "eavesdrop='true'" : strjoina((m), ",eavesdrop='true'")) \ + : (m)) + +int bus_add_match_internal( + sd_bus *bus, + const char *match, + uint64_t timeout_usec, + uint64_t *ret_counter) { + + _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL, *reply = NULL; + const char *e; + int r; + + assert(bus); + + if (!bus->bus_client) + return -EINVAL; + + e = append_eavesdrop(bus, match); + + r = sd_bus_message_new_method_call( + bus, + &m, + "org.freedesktop.DBus", + "/org/freedesktop/DBus", + "org.freedesktop.DBus", + "AddMatch"); + if (r < 0) + return r; + + r = sd_bus_message_append(m, "s", e); + if (r < 0) + return r; + + r = sd_bus_call( + bus, + m, + timeout_usec, + NULL, + &reply); + if (r < 0) + return r; + + /* If the caller asked for it, return the read counter of the reply */ + if (ret_counter) + *ret_counter = reply->read_counter; + + return r; +} + +int bus_add_match_internal_async( + sd_bus *bus, + sd_bus_slot **ret_slot, + const char *match, + sd_bus_message_handler_t callback, + void *userdata, + uint64_t timeout_usec) { + + _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL; + const char *e; + int r; + + assert(bus); + + if (!bus->bus_client) + return -EINVAL; + + e = append_eavesdrop(bus, match); + + r = sd_bus_message_new_method_call( + bus, + &m, + "org.freedesktop.DBus", + "/org/freedesktop/DBus", + "org.freedesktop.DBus", + "AddMatch"); + if (r < 0) + return r; + + r = sd_bus_message_append(m, "s", e); + if (r < 0) + return r; + + return sd_bus_call_async( + bus, + ret_slot, + m, + callback, + userdata, + timeout_usec); +} + +int bus_remove_match_internal( + sd_bus *bus, + const char *match) { + + const char *e; + + assert(bus); + assert(match); + + if (!bus->bus_client) + return -EINVAL; + + e = append_eavesdrop(bus, match); + + /* Fire and forget */ + + return sd_bus_call_method_async( + bus, + NULL, + "org.freedesktop.DBus", + "/org/freedesktop/DBus", + "org.freedesktop.DBus", + "RemoveMatch", + NULL, + NULL, + "s", + e); +} + +_public_ int sd_bus_get_name_machine_id(sd_bus *bus, const char *name, sd_id128_t *machine) { + _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL, *m = NULL; + const char *mid; + int r; + + assert_return(bus, -EINVAL); + assert_return(bus = bus_resolve(bus), -ENOPKG); + assert_return(name, -EINVAL); + assert_return(machine, -EINVAL); + assert_return(!bus_origin_changed(bus), -ECHILD); + assert_return(service_name_is_valid(name), -EINVAL); + + if (!bus->bus_client) + return -EINVAL; + + if (!BUS_IS_OPEN(bus->state)) + return -ENOTCONN; + + if (streq_ptr(name, bus->unique_name)) + return sd_id128_get_machine(machine); + + r = sd_bus_message_new_method_call( + bus, + &m, + name, + "/", + "org.freedesktop.DBus.Peer", + "GetMachineId"); + if (r < 0) + return r; + + r = sd_bus_message_set_auto_start(m, false); + if (r < 0) + return r; + + r = sd_bus_call(bus, m, 0, NULL, &reply); + if (r < 0) + return r; + + r = sd_bus_message_read(reply, "s", &mid); + if (r < 0) + return r; + + return sd_id128_from_string(mid, machine); +} diff --git a/src/libsystemd/sd-bus/bus-control.h b/src/libsystemd/sd-bus/bus-control.h new file mode 100644 index 0000000..1cd4fb8 --- /dev/null +++ b/src/libsystemd/sd-bus/bus-control.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include "sd-bus.h" + +int bus_add_match_internal(sd_bus *bus, const char *match, uint64_t timeout_usec, uint64_t *ret_counter); +int bus_add_match_internal_async(sd_bus *bus, sd_bus_slot **ret, const char *match, sd_bus_message_handler_t callback, void *userdata, uint64_t timeout_usec); + +int bus_remove_match_internal(sd_bus *bus, const char *match); diff --git a/src/libsystemd/sd-bus/bus-convenience.c b/src/libsystemd/sd-bus/bus-convenience.c new file mode 100644 index 0000000..989e577 --- /dev/null +++ b/src/libsystemd/sd-bus/bus-convenience.c @@ -0,0 +1,824 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <unistd.h> +#include <sys/types.h> + +#include "bus-internal.h" +#include "bus-message.h" +#include "bus-signature.h" +#include "bus-type.h" +#include "string-util.h" + +_public_ int sd_bus_message_send(sd_bus_message *reply) { + assert_return(reply, -EINVAL); + assert_return(reply->bus, -EINVAL); + assert_return(!bus_origin_changed(reply->bus), -ECHILD); + + return sd_bus_send(reply->bus, reply, NULL); +} + +_public_ int sd_bus_emit_signal_tov( + sd_bus *bus, + const char *destination, + const char *path, + const char *interface, + const char *member, + const char *types, va_list ap) { + + _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL; + int r; + + assert_return(bus, -EINVAL); + assert_return(bus = bus_resolve(bus), -ENOPKG); + assert_return(!bus_origin_changed(bus), -ECHILD); + + if (!BUS_IS_OPEN(bus->state)) + return -ENOTCONN; + + r = sd_bus_message_new_signal_to(bus, &m, destination, path, interface, member); + if (r < 0) + return r; + + if (!isempty(types)) { + r = sd_bus_message_appendv(m, types, ap); + if (r < 0) + return r; + } + + return sd_bus_send(bus, m, NULL); +} + +_public_ int sd_bus_emit_signal_to( + sd_bus *bus, + const char *destination, + const char *path, + const char *interface, + const char *member, + const char *types, ...) { + + va_list ap; + int r; + + va_start(ap, types); + r = sd_bus_emit_signal_tov(bus, destination, path, interface, member, types, ap); + va_end(ap); + + return r; +} + +_public_ int sd_bus_emit_signalv( + sd_bus *bus, + const char *path, + const char *interface, + const char *member, + const char *types, va_list ap) { + + return sd_bus_emit_signal_tov(bus, NULL, path, interface, member, types, ap); +} + +_public_ int sd_bus_emit_signal( + sd_bus *bus, + const char *path, + const char *interface, + const char *member, + const char *types, ...) { + + va_list ap; + int r; + + va_start(ap, types); + r = sd_bus_emit_signalv(bus, path, interface, member, types, ap); + va_end(ap); + + return r; +} + +_public_ int sd_bus_call_method_asyncv( + sd_bus *bus, + sd_bus_slot **slot, + const char *destination, + const char *path, + const char *interface, + const char *member, + sd_bus_message_handler_t callback, + void *userdata, + const char *types, va_list ap) { + + _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL; + int r; + + assert_return(bus, -EINVAL); + assert_return(bus = bus_resolve(bus), -ENOPKG); + assert_return(!bus_origin_changed(bus), -ECHILD); + + if (!BUS_IS_OPEN(bus->state)) + return -ENOTCONN; + + r = sd_bus_message_new_method_call(bus, &m, destination, path, interface, member); + if (r < 0) + return r; + + if (!isempty(types)) { + r = sd_bus_message_appendv(m, types, ap); + if (r < 0) + return r; + } + + return sd_bus_call_async(bus, slot, m, callback, userdata, 0); +} + +_public_ int sd_bus_call_method_async( + sd_bus *bus, + sd_bus_slot **slot, + const char *destination, + const char *path, + const char *interface, + const char *member, + sd_bus_message_handler_t callback, + void *userdata, + const char *types, ...) { + + va_list ap; + int r; + + va_start(ap, types); + r = sd_bus_call_method_asyncv(bus, slot, destination, path, interface, member, callback, userdata, types, ap); + va_end(ap); + + return r; +} + +_public_ int sd_bus_call_methodv( + sd_bus *bus, + const char *destination, + const char *path, + const char *interface, + const char *member, + sd_bus_error *error, + sd_bus_message **reply, + const char *types, va_list ap) { + + _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL; + int r; + + bus_assert_return(bus, -EINVAL, error); + bus_assert_return(bus = bus_resolve(bus), -ENOPKG, error); + bus_assert_return(!bus_origin_changed(bus), -ECHILD, error); + + if (!BUS_IS_OPEN(bus->state)) { + r = -ENOTCONN; + goto fail; + } + + r = sd_bus_message_new_method_call(bus, &m, destination, path, interface, member); + if (r < 0) + goto fail; + + if (!isempty(types)) { + r = sd_bus_message_appendv(m, types, ap); + if (r < 0) + goto fail; + } + + return sd_bus_call(bus, m, 0, error, reply); + +fail: + return sd_bus_error_set_errno(error, r); +} + +_public_ int sd_bus_call_method( + sd_bus *bus, + const char *destination, + const char *path, + const char *interface, + const char *member, + sd_bus_error *error, + sd_bus_message **reply, + const char *types, ...) { + + va_list ap; + int r; + + va_start(ap, types); + r = sd_bus_call_methodv(bus, destination, path, interface, member, error, reply, types, ap); + va_end(ap); + + return r; +} + +_public_ int sd_bus_reply_method_returnv( + sd_bus_message *call, + const char *types, va_list ap) { + + _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL; + int r; + + assert_return(call, -EINVAL); + assert_return(call->sealed, -EPERM); + assert_return(call->header->type == SD_BUS_MESSAGE_METHOD_CALL, -EINVAL); + assert_return(call->bus, -EINVAL); + assert_return(!bus_origin_changed(call->bus), -ECHILD); + + if (!BUS_IS_OPEN(call->bus->state)) + return -ENOTCONN; + + if (call->header->flags & BUS_MESSAGE_NO_REPLY_EXPECTED) + return 0; + + r = sd_bus_message_new_method_return(call, &m); + if (r < 0) + return r; + + if (!isempty(types)) { + r = sd_bus_message_appendv(m, types, ap); + if (r < 0) + return r; + } + + return sd_bus_message_send(m); +} + +_public_ int sd_bus_reply_method_return( + sd_bus_message *call, + const char *types, ...) { + + va_list ap; + int r; + + va_start(ap, types); + r = sd_bus_reply_method_returnv(call, types, ap); + va_end(ap); + + return r; +} + +_public_ int sd_bus_reply_method_error( + sd_bus_message *call, + const sd_bus_error *e) { + + _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL; + int r; + + assert_return(call, -EINVAL); + assert_return(call->sealed, -EPERM); + assert_return(call->header->type == SD_BUS_MESSAGE_METHOD_CALL, -EINVAL); + assert_return(sd_bus_error_is_set(e), -EINVAL); + assert_return(call->bus, -EINVAL); + assert_return(!bus_origin_changed(call->bus), -ECHILD); + + if (!BUS_IS_OPEN(call->bus->state)) + return -ENOTCONN; + + if (call->header->flags & BUS_MESSAGE_NO_REPLY_EXPECTED) + return 0; + + r = sd_bus_message_new_method_error(call, &m, e); + if (r < 0) + return r; + + return sd_bus_message_send(m); +} + +_public_ int sd_bus_reply_method_errorfv( + sd_bus_message *call, + const char *name, + const char *format, + va_list ap) { + + _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL; + + assert_return(call, -EINVAL); + assert_return(call->sealed, -EPERM); + assert_return(call->header->type == SD_BUS_MESSAGE_METHOD_CALL, -EINVAL); + assert_return(call->bus, -EINVAL); + assert_return(!bus_origin_changed(call->bus), -ECHILD); + + if (!BUS_IS_OPEN(call->bus->state)) + return -ENOTCONN; + + if (call->header->flags & BUS_MESSAGE_NO_REPLY_EXPECTED) + return 0; + + sd_bus_error_setfv(&error, name, format, ap); + + return sd_bus_reply_method_error(call, &error); +} + +_public_ int sd_bus_reply_method_errorf( + sd_bus_message *call, + const char *name, + const char *format, + ...) { + + va_list ap; + int r; + + va_start(ap, format); + r = sd_bus_reply_method_errorfv(call, name, format, ap); + va_end(ap); + + return r; +} + +_public_ int sd_bus_reply_method_errno( + sd_bus_message *call, + int error, + const sd_bus_error *p) { + + _cleanup_(sd_bus_error_free) sd_bus_error berror = SD_BUS_ERROR_NULL; + + assert_return(call, -EINVAL); + assert_return(call->sealed, -EPERM); + assert_return(call->header->type == SD_BUS_MESSAGE_METHOD_CALL, -EINVAL); + assert_return(call->bus, -EINVAL); + assert_return(!bus_origin_changed(call->bus), -ECHILD); + + if (!BUS_IS_OPEN(call->bus->state)) + return -ENOTCONN; + + if (call->header->flags & BUS_MESSAGE_NO_REPLY_EXPECTED) + return 0; + + if (sd_bus_error_is_set(p)) + return sd_bus_reply_method_error(call, p); + + sd_bus_error_set_errno(&berror, error); + + return sd_bus_reply_method_error(call, &berror); +} + +_public_ int sd_bus_reply_method_errnofv( + sd_bus_message *call, + int error, + const char *format, + va_list ap) { + + _cleanup_(sd_bus_error_free) sd_bus_error berror = SD_BUS_ERROR_NULL; + + assert_return(call, -EINVAL); + assert_return(call->sealed, -EPERM); + assert_return(call->header->type == SD_BUS_MESSAGE_METHOD_CALL, -EINVAL); + assert_return(call->bus, -EINVAL); + assert_return(!bus_origin_changed(call->bus), -ECHILD); + + if (!BUS_IS_OPEN(call->bus->state)) + return -ENOTCONN; + + if (call->header->flags & BUS_MESSAGE_NO_REPLY_EXPECTED) + return 0; + + sd_bus_error_set_errnofv(&berror, error, format, ap); + + return sd_bus_reply_method_error(call, &berror); +} + +_public_ int sd_bus_reply_method_errnof( + sd_bus_message *call, + int error, + const char *format, + ...) { + + va_list ap; + int r; + + va_start(ap, format); + r = sd_bus_reply_method_errnofv(call, error, format, ap); + va_end(ap); + + return r; +} + +_public_ int sd_bus_get_property( + sd_bus *bus, + const char *destination, + const char *path, + const char *interface, + const char *member, + sd_bus_error *error, + sd_bus_message **reply, + const char *type) { + + sd_bus_message *rep = NULL; + int r; + + bus_assert_return(bus, -EINVAL, error); + bus_assert_return(bus = bus_resolve(bus), -ENOPKG, error); + bus_assert_return(isempty(interface) || interface_name_is_valid(interface), -EINVAL, error); + bus_assert_return(member_name_is_valid(member), -EINVAL, error); + bus_assert_return(reply, -EINVAL, error); + bus_assert_return(signature_is_single(type, false), -EINVAL, error); + bus_assert_return(!bus_origin_changed(bus), -ECHILD, error); + + if (!BUS_IS_OPEN(bus->state)) { + r = -ENOTCONN; + goto fail; + } + + r = sd_bus_call_method(bus, destination, path, + "org.freedesktop.DBus.Properties", "Get", + error, &rep, + "ss", strempty(interface), member); + if (r < 0) + return r; + + r = sd_bus_message_enter_container(rep, 'v', type); + if (r < 0) { + sd_bus_message_unref(rep); + goto fail; + } + + *reply = rep; + return 0; + +fail: + return sd_bus_error_set_errno(error, r); +} + +_public_ int sd_bus_get_property_trivial( + sd_bus *bus, + const char *destination, + const char *path, + const char *interface, + const char *member, + sd_bus_error *error, + char type, void *ptr) { + + _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL; + int r; + + bus_assert_return(bus, -EINVAL, error); + bus_assert_return(bus = bus_resolve(bus), -ENOPKG, error); + bus_assert_return(isempty(interface) || interface_name_is_valid(interface), -EINVAL, error); + bus_assert_return(member_name_is_valid(member), -EINVAL, error); + bus_assert_return(bus_type_is_trivial(type), -EINVAL, error); + bus_assert_return(ptr, -EINVAL, error); + bus_assert_return(!bus_origin_changed(bus), -ECHILD, error); + + if (!BUS_IS_OPEN(bus->state)) { + r = -ENOTCONN; + goto fail; + } + + r = sd_bus_call_method(bus, destination, path, "org.freedesktop.DBus.Properties", "Get", error, &reply, "ss", strempty(interface), member); + if (r < 0) + return r; + + r = sd_bus_message_enter_container(reply, 'v', CHAR_TO_STR(type)); + if (r < 0) + goto fail; + + r = sd_bus_message_read_basic(reply, type, ptr); + if (r < 0) + goto fail; + + return 0; + +fail: + return sd_bus_error_set_errno(error, r); +} + +_public_ int sd_bus_get_property_string( + sd_bus *bus, + const char *destination, + const char *path, + const char *interface, + const char *member, + sd_bus_error *error, + char **ret) { + + _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL; + const char *s; + char *n; + int r; + + bus_assert_return(bus, -EINVAL, error); + bus_assert_return(bus = bus_resolve(bus), -ENOPKG, error); + bus_assert_return(isempty(interface) || interface_name_is_valid(interface), -EINVAL, error); + bus_assert_return(member_name_is_valid(member), -EINVAL, error); + bus_assert_return(ret, -EINVAL, error); + bus_assert_return(!bus_origin_changed(bus), -ECHILD, error); + + if (!BUS_IS_OPEN(bus->state)) { + r = -ENOTCONN; + goto fail; + } + + r = sd_bus_call_method(bus, destination, path, "org.freedesktop.DBus.Properties", "Get", error, &reply, "ss", strempty(interface), member); + if (r < 0) + return r; + + r = sd_bus_message_enter_container(reply, 'v', "s"); + if (r < 0) + goto fail; + + r = sd_bus_message_read_basic(reply, 's', &s); + if (r < 0) + goto fail; + + n = strdup(s); + if (!n) { + r = -ENOMEM; + goto fail; + } + + *ret = n; + return 0; + +fail: + return sd_bus_error_set_errno(error, r); +} + +_public_ int sd_bus_get_property_strv( + sd_bus *bus, + const char *destination, + const char *path, + const char *interface, + const char *member, + sd_bus_error *error, + char ***ret) { + + _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL; + int r; + + bus_assert_return(bus, -EINVAL, error); + bus_assert_return(bus = bus_resolve(bus), -ENOPKG, error); + bus_assert_return(isempty(interface) || interface_name_is_valid(interface), -EINVAL, error); + bus_assert_return(member_name_is_valid(member), -EINVAL, error); + bus_assert_return(ret, -EINVAL, error); + bus_assert_return(!bus_origin_changed(bus), -ECHILD, error); + + if (!BUS_IS_OPEN(bus->state)) { + r = -ENOTCONN; + goto fail; + } + + r = sd_bus_call_method(bus, destination, path, "org.freedesktop.DBus.Properties", "Get", error, &reply, "ss", strempty(interface), member); + if (r < 0) + return r; + + r = sd_bus_message_enter_container(reply, 'v', NULL); + if (r < 0) + goto fail; + + r = sd_bus_message_read_strv(reply, ret); + if (r < 0) + goto fail; + + return 0; + +fail: + return sd_bus_error_set_errno(error, r); +} + +_public_ int sd_bus_set_propertyv( + sd_bus *bus, + const char *destination, + const char *path, + const char *interface, + const char *member, + sd_bus_error *error, + const char *type, va_list ap) { + + _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL; + int r; + + bus_assert_return(bus, -EINVAL, error); + bus_assert_return(bus = bus_resolve(bus), -ENOPKG, error); + bus_assert_return(isempty(interface) || interface_name_is_valid(interface), -EINVAL, error); + bus_assert_return(member_name_is_valid(member), -EINVAL, error); + bus_assert_return(signature_is_single(type, false), -EINVAL, error); + bus_assert_return(!bus_origin_changed(bus), -ECHILD, error); + + if (!BUS_IS_OPEN(bus->state)) { + r = -ENOTCONN; + goto fail; + } + + r = sd_bus_message_new_method_call(bus, &m, destination, path, "org.freedesktop.DBus.Properties", "Set"); + if (r < 0) + goto fail; + + r = sd_bus_message_append(m, "ss", strempty(interface), member); + if (r < 0) + goto fail; + + r = sd_bus_message_open_container(m, 'v', type); + if (r < 0) + goto fail; + + r = sd_bus_message_appendv(m, type, ap); + if (r < 0) + goto fail; + + r = sd_bus_message_close_container(m); + if (r < 0) + goto fail; + + return sd_bus_call(bus, m, 0, error, NULL); + +fail: + return sd_bus_error_set_errno(error, r); +} + +_public_ int sd_bus_set_property( + sd_bus *bus, + const char *destination, + const char *path, + const char *interface, + const char *member, + sd_bus_error *error, + const char *type, ...) { + + va_list ap; + int r; + + va_start(ap, type); + r = sd_bus_set_propertyv(bus, destination, path, interface, member, error, type, ap); + va_end(ap); + + return r; +} + +_public_ int sd_bus_query_sender_creds(sd_bus_message *call, uint64_t mask, sd_bus_creds **ret) { + sd_bus_creds *c; + int r; + + assert_return(call, -EINVAL); + assert_return(call->sealed, -EPERM); + assert_return(call->bus, -EINVAL); + assert_return(!bus_origin_changed(call->bus), -ECHILD); + assert_return(ret, -EINVAL); + + if (!BUS_IS_OPEN(call->bus->state)) + return -ENOTCONN; + + c = sd_bus_message_get_creds(call); + + /* All data we need? */ + if (c && (mask & ~SD_BUS_CREDS_AUGMENT & ~c->mask) == 0) { + *ret = sd_bus_creds_ref(c); + return 0; + } + + /* No data passed? Or not enough data passed to retrieve the missing bits? */ + if (!c || !(c->mask & SD_BUS_CREDS_PID)) { + /* We couldn't read anything from the call, let's try + * to get it from the sender or peer. */ + + if (call->sender) + /* There's a sender, but the creds are missing. */ + return sd_bus_get_name_creds(call->bus, call->sender, mask, ret); + else + /* There's no sender. For direct connections + * the credentials of the AF_UNIX peer matter, + * which may be queried via sd_bus_get_owner_creds(). */ + return sd_bus_get_owner_creds(call->bus, mask, ret); + } + + r = bus_creds_extend_by_pid(c, mask, ret); + if (r == -ESRCH) { + /* Process doesn't exist anymore? propagate the few things we have */ + *ret = sd_bus_creds_ref(c); + return 0; + } + + return r; +} + +_public_ int sd_bus_query_sender_privilege(sd_bus_message *call, int capability) { + _cleanup_(sd_bus_creds_unrefp) sd_bus_creds *creds = NULL; + uid_t our_uid; + bool know_caps = false; + int r; + + assert_return(call, -EINVAL); + assert_return(call->sealed, -EPERM); + assert_return(call->bus, -EINVAL); + assert_return(!bus_origin_changed(call->bus), -ECHILD); + + if (!BUS_IS_OPEN(call->bus->state)) + return -ENOTCONN; + + if (capability >= 0) { + + r = sd_bus_query_sender_creds(call, SD_BUS_CREDS_UID|SD_BUS_CREDS_EUID|SD_BUS_CREDS_EFFECTIVE_CAPS, &creds); + if (r < 0) + return r; + + /* We cannot use augmented caps for authorization, + * since then data is acquired raceful from + * /proc. This can never actually happen, but let's + * better be safe than sorry, and do an extra check + * here. */ + assert_return((sd_bus_creds_get_augmented_mask(creds) & SD_BUS_CREDS_EFFECTIVE_CAPS) == 0, -EPERM); + + r = sd_bus_creds_has_effective_cap(creds, capability); + if (r > 0) + return 1; + if (r == 0) + know_caps = true; + } else { + r = sd_bus_query_sender_creds(call, SD_BUS_CREDS_UID|SD_BUS_CREDS_EUID, &creds); + if (r < 0) + return r; + } + + /* Now, check the UID, but only if the capability check wasn't + * sufficient */ + our_uid = getuid(); + if (our_uid != 0 || !know_caps || capability < 0) { + uid_t sender_uid; + + /* We cannot use augmented uid/euid for authorization, + * since then data is acquired raceful from + * /proc. This can never actually happen, but let's + * better be safe than sorry, and do an extra check + * here. */ + assert_return((sd_bus_creds_get_augmented_mask(creds) & (SD_BUS_CREDS_UID|SD_BUS_CREDS_EUID)) == 0, -EPERM); + + /* Try to use the EUID, if we have it. */ + r = sd_bus_creds_get_euid(creds, &sender_uid); + if (r < 0) + r = sd_bus_creds_get_uid(creds, &sender_uid); + + if (r >= 0) { + /* Sender has same UID as us, then let's grant access */ + if (sender_uid == our_uid) + return 1; + + /* Sender is root, we are not root. */ + if (our_uid != 0 && sender_uid == 0) + return 1; + } + } + + return 0; +} + +#define make_expression(sender, path, interface, member) \ + strjoina( \ + "type='signal'", \ + sender ? ",sender='" : "", \ + sender ?: "", \ + sender ? "'" : "", \ + path ? ",path='" : "", \ + path ?: "", \ + path ? "'" : "", \ + interface ? ",interface='" : "", \ + interface ?: "", \ + interface ? "'" : "", \ + member ? ",member='" : "", \ + member ?: "", \ + member ? "'" : "" \ + ) + +_public_ int sd_bus_match_signal( + sd_bus *bus, + sd_bus_slot **ret, + const char *sender, + const char *path, + const char *interface, + const char *member, + sd_bus_message_handler_t callback, + void *userdata) { + + const char *expression; + + assert_return(bus, -EINVAL); + assert_return(bus = bus_resolve(bus), -ENOPKG); + assert_return(!bus_origin_changed(bus), -ECHILD); + assert_return(!sender || service_name_is_valid(sender), -EINVAL); + assert_return(!path || object_path_is_valid(path), -EINVAL); + assert_return(!interface || interface_name_is_valid(interface), -EINVAL); + assert_return(!member || member_name_is_valid(member), -EINVAL); + + expression = make_expression(sender, path, interface, member); + + return sd_bus_add_match(bus, ret, expression, callback, userdata); +} + +_public_ int sd_bus_match_signal_async( + sd_bus *bus, + sd_bus_slot **ret, + const char *sender, + const char *path, + const char *interface, + const char *member, + sd_bus_message_handler_t callback, + sd_bus_message_handler_t install_callback, + void *userdata) { + + const char *expression; + + assert_return(bus, -EINVAL); + assert_return(bus = bus_resolve(bus), -ENOPKG); + assert_return(!bus_origin_changed(bus), -ECHILD); + assert_return(!sender || service_name_is_valid(sender), -EINVAL); + assert_return(!path || object_path_is_valid(path), -EINVAL); + assert_return(!interface || interface_name_is_valid(interface), -EINVAL); + assert_return(!member || member_name_is_valid(member), -EINVAL); + + expression = make_expression(sender, path, interface, member); + + return sd_bus_add_match_async(bus, ret, expression, callback, install_callback, userdata); +} diff --git a/src/libsystemd/sd-bus/bus-creds.c b/src/libsystemd/sd-bus/bus-creds.c new file mode 100644 index 0000000..c6d8caa --- /dev/null +++ b/src/libsystemd/sd-bus/bus-creds.c @@ -0,0 +1,1337 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <linux/capability.h> +#include <stdlib.h> + +#include "alloc-util.h" +#include "audit-util.h" +#include "bus-creds.h" +#include "bus-label.h" +#include "bus-message.h" +#include "capability-util.h" +#include "cgroup-util.h" +#include "errno-util.h" +#include "fd-util.h" +#include "fileio.h" +#include "format-util.h" +#include "hexdecoct.h" +#include "nulstr-util.h" +#include "parse-util.h" +#include "process-util.h" +#include "string-util.h" +#include "strv.h" +#include "terminal-util.h" +#include "user-util.h" + +enum { + CAP_OFFSET_INHERITABLE = 0, + CAP_OFFSET_PERMITTED = 1, + CAP_OFFSET_EFFECTIVE = 2, + CAP_OFFSET_BOUNDING = 3 +}; + +void bus_creds_done(sd_bus_creds *c) { + assert(c); + + /* For internal bus cred structures that are allocated by + * something else */ + + free(c->session); + free(c->unit); + free(c->user_unit); + free(c->slice); + free(c->user_slice); + free(c->unescaped_description); + free(c->supplementary_gids); + free(c->tty); + + free(c->well_known_names); /* note that this is an strv, but + * we only free the array, not the + * strings the array points to. The + * full strv we only free if + * c->allocated is set, see + * below. */ + + strv_free(c->cmdline_array); +} + +_public_ sd_bus_creds *sd_bus_creds_ref(sd_bus_creds *c) { + + if (!c) + return NULL; + + if (c->allocated) { + assert(c->n_ref > 0); + c->n_ref++; + } else { + sd_bus_message *m; + + /* If this is an embedded creds structure, then + * forward ref counting to the message */ + m = container_of(c, sd_bus_message, creds); + sd_bus_message_ref(m); + } + + return c; +} + +_public_ sd_bus_creds *sd_bus_creds_unref(sd_bus_creds *c) { + + if (!c) + return NULL; + + if (c->allocated) { + assert(c->n_ref > 0); + c->n_ref--; + + if (c->n_ref == 0) { + free(c->comm); + free(c->tid_comm); + free(c->exe); + free(c->cmdline); + free(c->cgroup); + free(c->capability); + free(c->label); + free(c->unique_name); + free(c->cgroup_root); + free(c->description); + + c->supplementary_gids = mfree(c->supplementary_gids); + + c->well_known_names = strv_free(c->well_known_names); + + bus_creds_done(c); + + free(c); + } + } else { + sd_bus_message *m; + + m = container_of(c, sd_bus_message, creds); + sd_bus_message_unref(m); + } + + return NULL; +} + +_public_ uint64_t sd_bus_creds_get_mask(const sd_bus_creds *c) { + assert_return(c, 0); + + return c->mask; +} + +_public_ uint64_t sd_bus_creds_get_augmented_mask(const sd_bus_creds *c) { + assert_return(c, 0); + + return c->augmented; +} + +sd_bus_creds* bus_creds_new(void) { + sd_bus_creds *c; + + c = new0(sd_bus_creds, 1); + if (!c) + return NULL; + + c->allocated = true; + c->n_ref = 1; + return c; +} + +_public_ int sd_bus_creds_new_from_pid(sd_bus_creds **ret, pid_t pid, uint64_t mask) { + _cleanup_(sd_bus_creds_unrefp) sd_bus_creds *c = NULL; + int r; + + assert_return(pid >= 0, -EINVAL); + assert_return(mask <= _SD_BUS_CREDS_ALL, -EOPNOTSUPP); + assert_return(ret, -EINVAL); + + if (pid == 0) + pid = getpid_cached(); + + c = bus_creds_new(); + if (!c) + return -ENOMEM; + + r = bus_creds_add_more(c, mask | SD_BUS_CREDS_AUGMENT, pid, 0); + if (r < 0) + return r; + + /* Check if the process existed at all, in case we haven't + * figured that out already */ + r = pid_is_alive(pid); + if (r < 0) + return r; + if (r == 0) + return -ESRCH; + + *ret = TAKE_PTR(c); + return 0; +} + +_public_ int sd_bus_creds_get_uid(sd_bus_creds *c, uid_t *uid) { + assert_return(c, -EINVAL); + assert_return(uid, -EINVAL); + + if (!(c->mask & SD_BUS_CREDS_UID)) + return -ENODATA; + + *uid = c->uid; + return 0; +} + +_public_ int sd_bus_creds_get_euid(sd_bus_creds *c, uid_t *euid) { + assert_return(c, -EINVAL); + assert_return(euid, -EINVAL); + + if (!(c->mask & SD_BUS_CREDS_EUID)) + return -ENODATA; + + *euid = c->euid; + return 0; +} + +_public_ int sd_bus_creds_get_suid(sd_bus_creds *c, uid_t *suid) { + assert_return(c, -EINVAL); + assert_return(suid, -EINVAL); + + if (!(c->mask & SD_BUS_CREDS_SUID)) + return -ENODATA; + + *suid = c->suid; + return 0; +} + +_public_ int sd_bus_creds_get_fsuid(sd_bus_creds *c, uid_t *fsuid) { + assert_return(c, -EINVAL); + assert_return(fsuid, -EINVAL); + + if (!(c->mask & SD_BUS_CREDS_FSUID)) + return -ENODATA; + + *fsuid = c->fsuid; + return 0; +} + +_public_ int sd_bus_creds_get_gid(sd_bus_creds *c, gid_t *gid) { + assert_return(c, -EINVAL); + assert_return(gid, -EINVAL); + + if (!(c->mask & SD_BUS_CREDS_GID)) + return -ENODATA; + + *gid = c->gid; + return 0; +} + +_public_ int sd_bus_creds_get_egid(sd_bus_creds *c, gid_t *egid) { + assert_return(c, -EINVAL); + assert_return(egid, -EINVAL); + + if (!(c->mask & SD_BUS_CREDS_EGID)) + return -ENODATA; + + *egid = c->egid; + return 0; +} + +_public_ int sd_bus_creds_get_sgid(sd_bus_creds *c, gid_t *sgid) { + assert_return(c, -EINVAL); + assert_return(sgid, -EINVAL); + + if (!(c->mask & SD_BUS_CREDS_SGID)) + return -ENODATA; + + *sgid = c->sgid; + return 0; +} + +_public_ int sd_bus_creds_get_fsgid(sd_bus_creds *c, gid_t *fsgid) { + assert_return(c, -EINVAL); + assert_return(fsgid, -EINVAL); + + if (!(c->mask & SD_BUS_CREDS_FSGID)) + return -ENODATA; + + *fsgid = c->fsgid; + return 0; +} + +_public_ int sd_bus_creds_get_supplementary_gids(sd_bus_creds *c, const gid_t **gids) { + assert_return(c, -EINVAL); + assert_return(gids, -EINVAL); + + if (!(c->mask & SD_BUS_CREDS_SUPPLEMENTARY_GIDS)) + return -ENODATA; + + *gids = c->supplementary_gids; + return (int) c->n_supplementary_gids; +} + +_public_ int sd_bus_creds_get_pid(sd_bus_creds *c, pid_t *pid) { + assert_return(c, -EINVAL); + assert_return(pid, -EINVAL); + + if (!(c->mask & SD_BUS_CREDS_PID)) + return -ENODATA; + + assert(c->pid > 0); + *pid = c->pid; + return 0; +} + +_public_ int sd_bus_creds_get_ppid(sd_bus_creds *c, pid_t *ppid) { + assert_return(c, -EINVAL); + assert_return(ppid, -EINVAL); + + if (!(c->mask & SD_BUS_CREDS_PPID)) + return -ENODATA; + + /* PID 1 has no parent process. Let's distinguish the case of + * not knowing and not having a parent process by the returned + * error code. */ + if (c->ppid == 0) + return -ENXIO; + + *ppid = c->ppid; + return 0; +} + +_public_ int sd_bus_creds_get_tid(sd_bus_creds *c, pid_t *tid) { + assert_return(c, -EINVAL); + assert_return(tid, -EINVAL); + + if (!(c->mask & SD_BUS_CREDS_TID)) + return -ENODATA; + + assert(c->tid > 0); + *tid = c->tid; + return 0; +} + +_public_ int sd_bus_creds_get_selinux_context(sd_bus_creds *c, const char **ret) { + assert_return(c, -EINVAL); + + if (!(c->mask & SD_BUS_CREDS_SELINUX_CONTEXT)) + return -ENODATA; + + assert(c->label); + *ret = c->label; + return 0; +} + +_public_ int sd_bus_creds_get_comm(sd_bus_creds *c, const char **ret) { + assert_return(c, -EINVAL); + assert_return(ret, -EINVAL); + + if (!(c->mask & SD_BUS_CREDS_COMM)) + return -ENODATA; + + assert(c->comm); + *ret = c->comm; + return 0; +} + +_public_ int sd_bus_creds_get_tid_comm(sd_bus_creds *c, const char **ret) { + assert_return(c, -EINVAL); + assert_return(ret, -EINVAL); + + if (!(c->mask & SD_BUS_CREDS_TID_COMM)) + return -ENODATA; + + assert(c->tid_comm); + *ret = c->tid_comm; + return 0; +} + +_public_ int sd_bus_creds_get_exe(sd_bus_creds *c, const char **ret) { + assert_return(c, -EINVAL); + assert_return(ret, -EINVAL); + + if (!(c->mask & SD_BUS_CREDS_EXE)) + return -ENODATA; + + if (!c->exe) + return -ENXIO; + + *ret = c->exe; + return 0; +} + +_public_ int sd_bus_creds_get_cgroup(sd_bus_creds *c, const char **ret) { + assert_return(c, -EINVAL); + assert_return(ret, -EINVAL); + + if (!(c->mask & SD_BUS_CREDS_CGROUP)) + return -ENODATA; + + assert(c->cgroup); + *ret = c->cgroup; + return 0; +} + +_public_ int sd_bus_creds_get_unit(sd_bus_creds *c, const char **ret) { + int r; + + assert_return(c, -EINVAL); + assert_return(ret, -EINVAL); + + if (!(c->mask & SD_BUS_CREDS_UNIT)) + return -ENODATA; + + assert(c->cgroup); + + if (!c->unit) { + const char *shifted; + + r = cg_shift_path(c->cgroup, c->cgroup_root, &shifted); + if (r < 0) + return r; + + r = cg_path_get_unit(shifted, (char**) &c->unit); + if (r < 0) + return r; + } + + *ret = c->unit; + return 0; +} + +_public_ int sd_bus_creds_get_user_unit(sd_bus_creds *c, const char **ret) { + int r; + + assert_return(c, -EINVAL); + assert_return(ret, -EINVAL); + + if (!(c->mask & SD_BUS_CREDS_USER_UNIT)) + return -ENODATA; + + assert(c->cgroup); + + if (!c->user_unit) { + const char *shifted; + + r = cg_shift_path(c->cgroup, c->cgroup_root, &shifted); + if (r < 0) + return r; + + r = cg_path_get_user_unit(shifted, (char**) &c->user_unit); + if (r < 0) + return r; + } + + *ret = c->user_unit; + return 0; +} + +_public_ int sd_bus_creds_get_slice(sd_bus_creds *c, const char **ret) { + int r; + + assert_return(c, -EINVAL); + assert_return(ret, -EINVAL); + + if (!(c->mask & SD_BUS_CREDS_SLICE)) + return -ENODATA; + + assert(c->cgroup); + + if (!c->slice) { + const char *shifted; + + r = cg_shift_path(c->cgroup, c->cgroup_root, &shifted); + if (r < 0) + return r; + + r = cg_path_get_slice(shifted, (char**) &c->slice); + if (r < 0) + return r; + } + + *ret = c->slice; + return 0; +} + +_public_ int sd_bus_creds_get_user_slice(sd_bus_creds *c, const char **ret) { + int r; + + assert_return(c, -EINVAL); + assert_return(ret, -EINVAL); + + if (!(c->mask & SD_BUS_CREDS_USER_SLICE)) + return -ENODATA; + + assert(c->cgroup); + + if (!c->user_slice) { + const char *shifted; + + r = cg_shift_path(c->cgroup, c->cgroup_root, &shifted); + if (r < 0) + return r; + + r = cg_path_get_user_slice(shifted, (char**) &c->user_slice); + if (r < 0) + return r; + } + + *ret = c->user_slice; + return 0; +} + +_public_ int sd_bus_creds_get_session(sd_bus_creds *c, const char **ret) { + int r; + + assert_return(c, -EINVAL); + assert_return(ret, -EINVAL); + + if (!(c->mask & SD_BUS_CREDS_SESSION)) + return -ENODATA; + + assert(c->cgroup); + + if (!c->session) { + const char *shifted; + + r = cg_shift_path(c->cgroup, c->cgroup_root, &shifted); + if (r < 0) + return r; + + r = cg_path_get_session(shifted, (char**) &c->session); + if (r < 0) + return r; + } + + *ret = c->session; + return 0; +} + +_public_ int sd_bus_creds_get_owner_uid(sd_bus_creds *c, uid_t *uid) { + const char *shifted; + int r; + + assert_return(c, -EINVAL); + assert_return(uid, -EINVAL); + + if (!(c->mask & SD_BUS_CREDS_OWNER_UID)) + return -ENODATA; + + assert(c->cgroup); + + r = cg_shift_path(c->cgroup, c->cgroup_root, &shifted); + if (r < 0) + return r; + + return cg_path_get_owner_uid(shifted, uid); +} + +_public_ int sd_bus_creds_get_cmdline(sd_bus_creds *c, char ***cmdline) { + assert_return(c, -EINVAL); + + if (!(c->mask & SD_BUS_CREDS_CMDLINE)) + return -ENODATA; + + if (!c->cmdline) + return -ENXIO; + + if (!c->cmdline_array) { + c->cmdline_array = strv_parse_nulstr(c->cmdline, c->cmdline_size); + if (!c->cmdline_array) + return -ENOMEM; + } + + *cmdline = c->cmdline_array; + return 0; +} + +_public_ int sd_bus_creds_get_audit_session_id(sd_bus_creds *c, uint32_t *sessionid) { + assert_return(c, -EINVAL); + assert_return(sessionid, -EINVAL); + + if (!(c->mask & SD_BUS_CREDS_AUDIT_SESSION_ID)) + return -ENODATA; + + if (!audit_session_is_valid(c->audit_session_id)) + return -ENXIO; + + *sessionid = c->audit_session_id; + return 0; +} + +_public_ int sd_bus_creds_get_audit_login_uid(sd_bus_creds *c, uid_t *uid) { + assert_return(c, -EINVAL); + assert_return(uid, -EINVAL); + + if (!(c->mask & SD_BUS_CREDS_AUDIT_LOGIN_UID)) + return -ENODATA; + + if (!uid_is_valid(c->audit_login_uid)) + return -ENXIO; + + *uid = c->audit_login_uid; + return 0; +} + +_public_ int sd_bus_creds_get_tty(sd_bus_creds *c, const char **ret) { + assert_return(c, -EINVAL); + assert_return(ret, -EINVAL); + + if (!(c->mask & SD_BUS_CREDS_TTY)) + return -ENODATA; + + if (!c->tty) + return -ENXIO; + + *ret = c->tty; + return 0; +} + +_public_ int sd_bus_creds_get_unique_name(sd_bus_creds *c, const char **unique_name) { + assert_return(c, -EINVAL); + assert_return(unique_name, -EINVAL); + + if (!(c->mask & SD_BUS_CREDS_UNIQUE_NAME)) + return -ENODATA; + + *unique_name = c->unique_name; + return 0; +} + +_public_ int sd_bus_creds_get_well_known_names(sd_bus_creds *c, char ***well_known_names) { + assert_return(c, -EINVAL); + assert_return(well_known_names, -EINVAL); + + if (!(c->mask & SD_BUS_CREDS_WELL_KNOWN_NAMES)) + return -ENODATA; + + /* As a special hack we return the bus driver as well-known + * names list when this is requested. */ + if (c->well_known_names_driver) { + static const char* const wkn[] = { + "org.freedesktop.DBus", + NULL + }; + + *well_known_names = (char**) wkn; + return 0; + } + + if (c->well_known_names_local) { + static const char* const wkn[] = { + "org.freedesktop.DBus.Local", + NULL + }; + + *well_known_names = (char**) wkn; + return 0; + } + + *well_known_names = c->well_known_names; + return 0; +} + +_public_ int sd_bus_creds_get_description(sd_bus_creds *c, const char **ret) { + assert_return(c, -EINVAL); + assert_return(ret, -EINVAL); + + if (!(c->mask & SD_BUS_CREDS_DESCRIPTION)) + return -ENODATA; + + assert(c->description); + + if (!c->unescaped_description) { + c->unescaped_description = bus_label_unescape(c->description); + if (!c->unescaped_description) + return -ENOMEM; + } + + *ret = c->unescaped_description; + return 0; +} + +static int has_cap(sd_bus_creds *c, size_t offset, int capability) { + size_t sz; + + assert(c); + assert(capability >= 0); + assert(c->capability); + + unsigned lc = cap_last_cap(); + + if ((unsigned) capability > lc) + return 0; + + /* If the last cap is 63, then there are 64 caps defined, and we need 2 entries à 32-bit hence. * + * If the last cap is 64, then there are 65 caps defined, and we need 3 entries à 32-bit hence. */ + sz = DIV_ROUND_UP(lc+1, 32LU); + + return !!(c->capability[offset * sz + CAP_TO_INDEX((uint32_t) capability)] & CAP_TO_MASK_CORRECTED((uint32_t) capability)); +} + +_public_ int sd_bus_creds_has_effective_cap(sd_bus_creds *c, int capability) { + assert_return(c, -EINVAL); + assert_return(capability >= 0, -EINVAL); + + if (!(c->mask & SD_BUS_CREDS_EFFECTIVE_CAPS)) + return -ENODATA; + + return has_cap(c, CAP_OFFSET_EFFECTIVE, capability); +} + +_public_ int sd_bus_creds_has_permitted_cap(sd_bus_creds *c, int capability) { + assert_return(c, -EINVAL); + assert_return(capability >= 0, -EINVAL); + + if (!(c->mask & SD_BUS_CREDS_PERMITTED_CAPS)) + return -ENODATA; + + return has_cap(c, CAP_OFFSET_PERMITTED, capability); +} + +_public_ int sd_bus_creds_has_inheritable_cap(sd_bus_creds *c, int capability) { + assert_return(c, -EINVAL); + assert_return(capability >= 0, -EINVAL); + + if (!(c->mask & SD_BUS_CREDS_INHERITABLE_CAPS)) + return -ENODATA; + + return has_cap(c, CAP_OFFSET_INHERITABLE, capability); +} + +_public_ int sd_bus_creds_has_bounding_cap(sd_bus_creds *c, int capability) { + assert_return(c, -EINVAL); + assert_return(capability >= 0, -EINVAL); + + if (!(c->mask & SD_BUS_CREDS_BOUNDING_CAPS)) + return -ENODATA; + + return has_cap(c, CAP_OFFSET_BOUNDING, capability); +} + +static int parse_caps(sd_bus_creds *c, unsigned offset, const char *p) { + size_t sz, max; + unsigned i, j; + + assert(c); + assert(p); + + max = DIV_ROUND_UP(cap_last_cap()+1, 32U); + p += strspn(p, WHITESPACE); + + sz = strlen(p); + if (sz % 8 != 0) + return -EINVAL; + + sz /= 8; + if (sz > max) + return -EINVAL; + + if (!c->capability) { + c->capability = new0(uint32_t, max * 4); + if (!c->capability) + return -ENOMEM; + } + + for (i = 0; i < sz; i ++) { + uint32_t v = 0; + + for (j = 0; j < 8; ++j) { + int t; + + t = unhexchar(*p++); + if (t < 0) + return -EINVAL; + + v = (v << 4) | t; + } + + c->capability[offset * max + (sz - i - 1)] = v; + } + + return 0; +} + +int bus_creds_add_more(sd_bus_creds *c, uint64_t mask, pid_t pid, pid_t tid) { + uint64_t missing; + int r; + + assert(c); + assert(c->allocated); + + if (!(mask & SD_BUS_CREDS_AUGMENT)) + return 0; + + /* Try to retrieve PID from creds if it wasn't passed to us */ + if (pid > 0) { + c->pid = pid; + c->mask |= SD_BUS_CREDS_PID; + } else if (c->mask & SD_BUS_CREDS_PID) + pid = c->pid; + else + /* Without pid we cannot do much... */ + return 0; + + /* Try to retrieve TID from creds if it wasn't passed to us */ + if (tid <= 0 && (c->mask & SD_BUS_CREDS_TID)) + tid = c->tid; + + /* Calculate what we shall and can add */ + missing = mask & ~(c->mask|SD_BUS_CREDS_PID|SD_BUS_CREDS_TID|SD_BUS_CREDS_UNIQUE_NAME|SD_BUS_CREDS_WELL_KNOWN_NAMES|SD_BUS_CREDS_DESCRIPTION|SD_BUS_CREDS_AUGMENT); + if (missing == 0) + return 0; + + if (tid > 0) { + c->tid = tid; + c->mask |= SD_BUS_CREDS_TID; + } + + if (missing & (SD_BUS_CREDS_PPID | + SD_BUS_CREDS_UID | SD_BUS_CREDS_EUID | SD_BUS_CREDS_SUID | SD_BUS_CREDS_FSUID | + SD_BUS_CREDS_GID | SD_BUS_CREDS_EGID | SD_BUS_CREDS_SGID | SD_BUS_CREDS_FSGID | + SD_BUS_CREDS_SUPPLEMENTARY_GIDS | + SD_BUS_CREDS_EFFECTIVE_CAPS | SD_BUS_CREDS_INHERITABLE_CAPS | + SD_BUS_CREDS_PERMITTED_CAPS | SD_BUS_CREDS_BOUNDING_CAPS)) { + + _cleanup_fclose_ FILE *f = NULL; + const char *p; + + p = procfs_file_alloca(pid, "status"); + + f = fopen(p, "re"); + if (!f) { + if (errno == ENOENT) + return -ESRCH; + else if (!ERRNO_IS_PRIVILEGE(errno)) + return -errno; + } else { + + for (;;) { + _cleanup_free_ char *line = NULL; + + r = read_line(f, LONG_LINE_MAX, &line); + if (r < 0) + return r; + if (r == 0) + break; + + if (missing & SD_BUS_CREDS_PPID) { + p = startswith(line, "PPid:"); + if (p) { + p += strspn(p, WHITESPACE); + + /* Explicitly check for PPID 0 (which is the case for PID 1) */ + if (!streq(p, "0")) { + r = parse_pid(p, &c->ppid); + if (r < 0) + return r; + + } else + c->ppid = 0; + + c->mask |= SD_BUS_CREDS_PPID; + continue; + } + } + + if (missing & (SD_BUS_CREDS_UID|SD_BUS_CREDS_EUID|SD_BUS_CREDS_SUID|SD_BUS_CREDS_FSUID)) { + p = startswith(line, "Uid:"); + if (p) { + unsigned long uid, euid, suid, fsuid; + + p += strspn(p, WHITESPACE); + if (sscanf(p, "%lu %lu %lu %lu", &uid, &euid, &suid, &fsuid) != 4) + return -EIO; + + if (missing & SD_BUS_CREDS_UID) + c->uid = (uid_t) uid; + if (missing & SD_BUS_CREDS_EUID) + c->euid = (uid_t) euid; + if (missing & SD_BUS_CREDS_SUID) + c->suid = (uid_t) suid; + if (missing & SD_BUS_CREDS_FSUID) + c->fsuid = (uid_t) fsuid; + + c->mask |= missing & (SD_BUS_CREDS_UID|SD_BUS_CREDS_EUID|SD_BUS_CREDS_SUID|SD_BUS_CREDS_FSUID); + continue; + } + } + + if (missing & (SD_BUS_CREDS_GID|SD_BUS_CREDS_EGID|SD_BUS_CREDS_SGID|SD_BUS_CREDS_FSGID)) { + p = startswith(line, "Gid:"); + if (p) { + unsigned long gid, egid, sgid, fsgid; + + p += strspn(p, WHITESPACE); + if (sscanf(p, "%lu %lu %lu %lu", &gid, &egid, &sgid, &fsgid) != 4) + return -EIO; + + if (missing & SD_BUS_CREDS_GID) + c->gid = (gid_t) gid; + if (missing & SD_BUS_CREDS_EGID) + c->egid = (gid_t) egid; + if (missing & SD_BUS_CREDS_SGID) + c->sgid = (gid_t) sgid; + if (missing & SD_BUS_CREDS_FSGID) + c->fsgid = (gid_t) fsgid; + + c->mask |= missing & (SD_BUS_CREDS_GID|SD_BUS_CREDS_EGID|SD_BUS_CREDS_SGID|SD_BUS_CREDS_FSGID); + continue; + } + } + + if (missing & SD_BUS_CREDS_SUPPLEMENTARY_GIDS) { + p = startswith(line, "Groups:"); + if (p) { + for (;;) { + unsigned long g; + int n = 0; + + p += strspn(p, WHITESPACE); + if (*p == 0) + break; + + if (sscanf(p, "%lu%n", &g, &n) != 1) + return -EIO; + + if (!GREEDY_REALLOC(c->supplementary_gids, c->n_supplementary_gids+1)) + return -ENOMEM; + + c->supplementary_gids[c->n_supplementary_gids++] = (gid_t) g; + p += n; + } + + c->mask |= SD_BUS_CREDS_SUPPLEMENTARY_GIDS; + continue; + } + } + + if (missing & SD_BUS_CREDS_EFFECTIVE_CAPS) { + p = startswith(line, "CapEff:"); + if (p) { + r = parse_caps(c, CAP_OFFSET_EFFECTIVE, p); + if (r < 0) + return r; + + c->mask |= SD_BUS_CREDS_EFFECTIVE_CAPS; + continue; + } + } + + if (missing & SD_BUS_CREDS_PERMITTED_CAPS) { + p = startswith(line, "CapPrm:"); + if (p) { + r = parse_caps(c, CAP_OFFSET_PERMITTED, p); + if (r < 0) + return r; + + c->mask |= SD_BUS_CREDS_PERMITTED_CAPS; + continue; + } + } + + if (missing & SD_BUS_CREDS_INHERITABLE_CAPS) { + p = startswith(line, "CapInh:"); + if (p) { + r = parse_caps(c, CAP_OFFSET_INHERITABLE, p); + if (r < 0) + return r; + + c->mask |= SD_BUS_CREDS_INHERITABLE_CAPS; + continue; + } + } + + if (missing & SD_BUS_CREDS_BOUNDING_CAPS) { + p = startswith(line, "CapBnd:"); + if (p) { + r = parse_caps(c, CAP_OFFSET_BOUNDING, p); + if (r < 0) + return r; + + c->mask |= SD_BUS_CREDS_BOUNDING_CAPS; + continue; + } + } + } + } + } + + if (missing & SD_BUS_CREDS_SELINUX_CONTEXT) { + const char *p; + + p = procfs_file_alloca(pid, "attr/current"); + r = read_one_line_file(p, &c->label); + if (r < 0) { + if (!IN_SET(r, -ENOENT, -EINVAL, -EPERM, -EACCES)) + return r; + } else + c->mask |= SD_BUS_CREDS_SELINUX_CONTEXT; + } + + if (missing & SD_BUS_CREDS_COMM) { + r = pid_get_comm(pid, &c->comm); + if (r < 0) { + if (!ERRNO_IS_PRIVILEGE(r)) + return r; + } else + c->mask |= SD_BUS_CREDS_COMM; + } + + if (missing & SD_BUS_CREDS_EXE) { + r = get_process_exe(pid, &c->exe); + if (r == -ESRCH) { + /* Unfortunately we cannot really distinguish + * the case here where the process does not + * exist, and /proc/$PID/exe being unreadable + * because $PID is a kernel thread. Hence, + * assume it is a kernel thread, and rely on + * that this case is caught with a later + * call. */ + c->exe = NULL; + c->mask |= SD_BUS_CREDS_EXE; + } else if (r < 0) { + if (!ERRNO_IS_PRIVILEGE(r)) + return r; + } else + c->mask |= SD_BUS_CREDS_EXE; + } + + if (missing & SD_BUS_CREDS_CMDLINE) { + const char *p; + + p = procfs_file_alloca(pid, "cmdline"); + r = read_full_virtual_file(p, &c->cmdline, &c->cmdline_size); + if (r == -ENOENT) + return -ESRCH; + if (r < 0) { + if (!ERRNO_IS_PRIVILEGE(r)) + return r; + } else { + if (c->cmdline_size == 0) + c->cmdline = mfree(c->cmdline); + + c->mask |= SD_BUS_CREDS_CMDLINE; + } + } + + if (tid > 0 && (missing & SD_BUS_CREDS_TID_COMM)) { + _cleanup_free_ char *p = NULL; + + if (asprintf(&p, "/proc/"PID_FMT"/task/"PID_FMT"/comm", pid, tid) < 0) + return -ENOMEM; + + r = read_one_line_file(p, &c->tid_comm); + if (r == -ENOENT) + return -ESRCH; + if (r < 0) { + if (!ERRNO_IS_PRIVILEGE(r)) + return r; + } else + c->mask |= SD_BUS_CREDS_TID_COMM; + } + + if (missing & (SD_BUS_CREDS_CGROUP|SD_BUS_CREDS_UNIT|SD_BUS_CREDS_USER_UNIT|SD_BUS_CREDS_SLICE|SD_BUS_CREDS_USER_SLICE|SD_BUS_CREDS_SESSION|SD_BUS_CREDS_OWNER_UID)) { + + if (!c->cgroup) { + r = cg_pid_get_path(NULL, pid, &c->cgroup); + if (r < 0) { + if (!ERRNO_IS_PRIVILEGE(r)) + return r; + } + } + + if (!c->cgroup_root) { + r = cg_get_root_path(&c->cgroup_root); + if (r < 0) + return r; + } + + if (c->cgroup) + c->mask |= missing & (SD_BUS_CREDS_CGROUP|SD_BUS_CREDS_UNIT|SD_BUS_CREDS_USER_UNIT|SD_BUS_CREDS_SLICE|SD_BUS_CREDS_USER_SLICE|SD_BUS_CREDS_SESSION|SD_BUS_CREDS_OWNER_UID); + } + + if (missing & SD_BUS_CREDS_AUDIT_SESSION_ID) { + r = audit_session_from_pid(pid, &c->audit_session_id); + if (r == -ENODATA) { + /* ENODATA means: no audit session id assigned */ + c->audit_session_id = AUDIT_SESSION_INVALID; + c->mask |= SD_BUS_CREDS_AUDIT_SESSION_ID; + } else if (r < 0) { + if (!IN_SET(r, -EOPNOTSUPP, -ENOENT, -EPERM, -EACCES)) + return r; + } else + c->mask |= SD_BUS_CREDS_AUDIT_SESSION_ID; + } + + if (missing & SD_BUS_CREDS_AUDIT_LOGIN_UID) { + r = audit_loginuid_from_pid(pid, &c->audit_login_uid); + if (r == -ENODATA) { + /* ENODATA means: no audit login uid assigned */ + c->audit_login_uid = UID_INVALID; + c->mask |= SD_BUS_CREDS_AUDIT_LOGIN_UID; + } else if (r < 0) { + if (!IN_SET(r, -EOPNOTSUPP, -ENOENT, -EPERM, -EACCES)) + return r; + } else + c->mask |= SD_BUS_CREDS_AUDIT_LOGIN_UID; + } + + if (missing & SD_BUS_CREDS_TTY) { + r = get_ctty(pid, NULL, &c->tty); + if (r == -ENXIO) { + /* ENXIO means: process has no controlling TTY */ + c->tty = NULL; + c->mask |= SD_BUS_CREDS_TTY; + } else if (r < 0) { + if (!IN_SET(r, -EPERM, -EACCES, -ENOENT)) + return r; + } else + c->mask |= SD_BUS_CREDS_TTY; + } + + /* In case only the exe path was to be read we cannot distinguish the case where the exe path was + * unreadable because the process was a kernel thread, or when the process didn't exist at + * all. Hence, let's do a final check, to be sure. */ + r = pid_is_alive(pid); + if (r < 0) + return r; + if (r == 0) + return -ESRCH; + + if (tid > 0 && tid != pid && pid_is_unwaited(tid) == 0) + return -ESRCH; + + c->augmented = missing & c->mask; + + return 0; +} + +int bus_creds_extend_by_pid(sd_bus_creds *c, uint64_t mask, sd_bus_creds **ret) { + _cleanup_(sd_bus_creds_unrefp) sd_bus_creds *n = NULL; + int r; + + assert(c); + assert(ret); + + if ((mask & ~c->mask) == 0 || (!(mask & SD_BUS_CREDS_AUGMENT))) { + /* There's already all data we need, or augmentation + * wasn't turned on. */ + + *ret = sd_bus_creds_ref(c); + return 0; + } + + n = bus_creds_new(); + if (!n) + return -ENOMEM; + + /* Copy the original data over */ + + if (c->mask & mask & SD_BUS_CREDS_PID) { + n->pid = c->pid; + n->mask |= SD_BUS_CREDS_PID; + } + + if (c->mask & mask & SD_BUS_CREDS_TID) { + n->tid = c->tid; + n->mask |= SD_BUS_CREDS_TID; + } + + if (c->mask & mask & SD_BUS_CREDS_PPID) { + n->ppid = c->ppid; + n->mask |= SD_BUS_CREDS_PPID; + } + + if (c->mask & mask & SD_BUS_CREDS_UID) { + n->uid = c->uid; + n->mask |= SD_BUS_CREDS_UID; + } + + if (c->mask & mask & SD_BUS_CREDS_EUID) { + n->euid = c->euid; + n->mask |= SD_BUS_CREDS_EUID; + } + + if (c->mask & mask & SD_BUS_CREDS_SUID) { + n->suid = c->suid; + n->mask |= SD_BUS_CREDS_SUID; + } + + if (c->mask & mask & SD_BUS_CREDS_FSUID) { + n->fsuid = c->fsuid; + n->mask |= SD_BUS_CREDS_FSUID; + } + + if (c->mask & mask & SD_BUS_CREDS_GID) { + n->gid = c->gid; + n->mask |= SD_BUS_CREDS_GID; + } + + if (c->mask & mask & SD_BUS_CREDS_EGID) { + n->egid = c->egid; + n->mask |= SD_BUS_CREDS_EGID; + } + + if (c->mask & mask & SD_BUS_CREDS_SGID) { + n->sgid = c->sgid; + n->mask |= SD_BUS_CREDS_SGID; + } + + if (c->mask & mask & SD_BUS_CREDS_FSGID) { + n->fsgid = c->fsgid; + n->mask |= SD_BUS_CREDS_FSGID; + } + + if (c->mask & mask & SD_BUS_CREDS_SUPPLEMENTARY_GIDS) { + if (c->supplementary_gids) { + n->supplementary_gids = newdup(gid_t, c->supplementary_gids, c->n_supplementary_gids); + if (!n->supplementary_gids) + return -ENOMEM; + n->n_supplementary_gids = c->n_supplementary_gids; + } else { + n->supplementary_gids = NULL; + n->n_supplementary_gids = 0; + } + + n->mask |= SD_BUS_CREDS_SUPPLEMENTARY_GIDS; + } + + if (c->mask & mask & SD_BUS_CREDS_COMM) { + assert(c->comm); + + n->comm = strdup(c->comm); + if (!n->comm) + return -ENOMEM; + + n->mask |= SD_BUS_CREDS_COMM; + } + + if (c->mask & mask & SD_BUS_CREDS_TID_COMM) { + assert(c->tid_comm); + + n->tid_comm = strdup(c->tid_comm); + if (!n->tid_comm) + return -ENOMEM; + + n->mask |= SD_BUS_CREDS_TID_COMM; + } + + if (c->mask & mask & SD_BUS_CREDS_EXE) { + if (c->exe) { + n->exe = strdup(c->exe); + if (!n->exe) + return -ENOMEM; + } else + n->exe = NULL; + + n->mask |= SD_BUS_CREDS_EXE; + } + + if (c->mask & mask & SD_BUS_CREDS_CMDLINE) { + if (c->cmdline) { + n->cmdline = memdup(c->cmdline, c->cmdline_size); + if (!n->cmdline) + return -ENOMEM; + + n->cmdline_size = c->cmdline_size; + } else { + n->cmdline = NULL; + n->cmdline_size = 0; + } + + n->mask |= SD_BUS_CREDS_CMDLINE; + } + + if (c->mask & mask & (SD_BUS_CREDS_CGROUP|SD_BUS_CREDS_SESSION|SD_BUS_CREDS_UNIT|SD_BUS_CREDS_USER_UNIT|SD_BUS_CREDS_SLICE|SD_BUS_CREDS_USER_SLICE|SD_BUS_CREDS_OWNER_UID)) { + assert(c->cgroup); + + n->cgroup = strdup(c->cgroup); + if (!n->cgroup) + return -ENOMEM; + + n->cgroup_root = strdup(c->cgroup_root); + if (!n->cgroup_root) + return -ENOMEM; + + n->mask |= mask & (SD_BUS_CREDS_CGROUP|SD_BUS_CREDS_SESSION|SD_BUS_CREDS_UNIT|SD_BUS_CREDS_USER_UNIT|SD_BUS_CREDS_SLICE|SD_BUS_CREDS_USER_SLICE|SD_BUS_CREDS_OWNER_UID); + } + + if (c->mask & mask & (SD_BUS_CREDS_EFFECTIVE_CAPS|SD_BUS_CREDS_PERMITTED_CAPS|SD_BUS_CREDS_INHERITABLE_CAPS|SD_BUS_CREDS_BOUNDING_CAPS)) { + assert(c->capability); + + n->capability = memdup(c->capability, DIV_ROUND_UP(cap_last_cap()+1, 32U) * 4 * 4); + if (!n->capability) + return -ENOMEM; + + n->mask |= c->mask & mask & (SD_BUS_CREDS_EFFECTIVE_CAPS|SD_BUS_CREDS_PERMITTED_CAPS|SD_BUS_CREDS_INHERITABLE_CAPS|SD_BUS_CREDS_BOUNDING_CAPS); + } + + if (c->mask & mask & SD_BUS_CREDS_SELINUX_CONTEXT) { + assert(c->label); + + n->label = strdup(c->label); + if (!n->label) + return -ENOMEM; + n->mask |= SD_BUS_CREDS_SELINUX_CONTEXT; + } + + if (c->mask & mask & SD_BUS_CREDS_AUDIT_SESSION_ID) { + n->audit_session_id = c->audit_session_id; + n->mask |= SD_BUS_CREDS_AUDIT_SESSION_ID; + } + if (c->mask & mask & SD_BUS_CREDS_AUDIT_LOGIN_UID) { + n->audit_login_uid = c->audit_login_uid; + n->mask |= SD_BUS_CREDS_AUDIT_LOGIN_UID; + } + + if (c->mask & mask & SD_BUS_CREDS_TTY) { + if (c->tty) { + n->tty = strdup(c->tty); + if (!n->tty) + return -ENOMEM; + } else + n->tty = NULL; + n->mask |= SD_BUS_CREDS_TTY; + } + + if (c->mask & mask & SD_BUS_CREDS_UNIQUE_NAME) { + assert(c->unique_name); + + n->unique_name = strdup(c->unique_name); + if (!n->unique_name) + return -ENOMEM; + n->mask |= SD_BUS_CREDS_UNIQUE_NAME; + } + + if (c->mask & mask & SD_BUS_CREDS_WELL_KNOWN_NAMES) { + if (strv_isempty(c->well_known_names)) + n->well_known_names = NULL; + else { + n->well_known_names = strv_copy(c->well_known_names); + if (!n->well_known_names) + return -ENOMEM; + } + n->well_known_names_driver = c->well_known_names_driver; + n->well_known_names_local = c->well_known_names_local; + n->mask |= SD_BUS_CREDS_WELL_KNOWN_NAMES; + } + + if (c->mask & mask & SD_BUS_CREDS_DESCRIPTION) { + assert(c->description); + n->description = strdup(c->description); + if (!n->description) + return -ENOMEM; + n->mask |= SD_BUS_CREDS_DESCRIPTION; + } + + n->augmented = c->augmented & n->mask; + + /* Get more data */ + + r = bus_creds_add_more(n, mask, 0, 0); + if (r < 0) + return r; + + *ret = TAKE_PTR(n); + + return 0; +} diff --git a/src/libsystemd/sd-bus/bus-creds.h b/src/libsystemd/sd-bus/bus-creds.h new file mode 100644 index 0000000..7806d9e --- /dev/null +++ b/src/libsystemd/sd-bus/bus-creds.h @@ -0,0 +1,72 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include <stdbool.h> + +#include "sd-bus.h" + +struct sd_bus_creds { + bool allocated; + unsigned n_ref; + + uint64_t mask; + uint64_t augmented; + + uid_t uid; + uid_t euid; + uid_t suid; + uid_t fsuid; + gid_t gid; + gid_t egid; + gid_t sgid; + gid_t fsgid; + + gid_t *supplementary_gids; + unsigned n_supplementary_gids; + + pid_t ppid; + pid_t pid; + pid_t tid; + + char *comm; + char *tid_comm; + char *exe; + + char *cmdline; + size_t cmdline_size; + char **cmdline_array; + + char *cgroup; + char *session; + char *unit; + char *user_unit; + char *slice; + char *user_slice; + + char *tty; + + uint32_t *capability; + + uint32_t audit_session_id; + uid_t audit_login_uid; + + char *label; + + char *unique_name; + + char **well_known_names; + bool well_known_names_driver:1; + bool well_known_names_local:1; + + char *cgroup_root; + + char *description, *unescaped_description; +}; + +sd_bus_creds* bus_creds_new(void); + +void bus_creds_done(sd_bus_creds *c); + +int bus_creds_add_more(sd_bus_creds *c, uint64_t mask, pid_t pid, pid_t tid); + +int bus_creds_extend_by_pid(sd_bus_creds *c, uint64_t mask, sd_bus_creds **ret); diff --git a/src/libsystemd/sd-bus/bus-dump.c b/src/libsystemd/sd-bus/bus-dump.c new file mode 100644 index 0000000..6d24f3b --- /dev/null +++ b/src/libsystemd/sd-bus/bus-dump.c @@ -0,0 +1,649 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <sys/time.h> + +#include "alloc-util.h" +#include "bus-dump.h" +#include "bus-internal.h" +#include "bus-message.h" +#include "bus-type.h" +#include "cap-list.h" +#include "capability-util.h" +#include "fileio.h" +#include "format-util.h" +#include "glyph-util.h" +#include "macro.h" +#include "pcapng.h" +#include "string-util.h" +#include "strv.h" +#include "terminal-util.h" + +static char *indent(unsigned level, uint64_t flags) { + char *p; + unsigned n, i = 0; + + n = 0; + + if (flags & SD_BUS_MESSAGE_DUMP_SUBTREE_ONLY && level > 0) + level -= 1; + + if (flags & SD_BUS_MESSAGE_DUMP_WITH_HEADER) + n += 2; + + p = new(char, n + level*8 + 1); + if (!p) + return NULL; + + if (flags & SD_BUS_MESSAGE_DUMP_WITH_HEADER) { + p[i++] = ' '; + p[i++] = ' '; + } + + memset(p + i, ' ', level*8); + p[i + level*8] = 0; + + return p; +} + +_public_ int sd_bus_message_dump(sd_bus_message *m, FILE *f, uint64_t flags) { + unsigned level = 1; + int r; + + assert_return(m, -EINVAL); + assert_return((flags & ~_SD_BUS_MESSAGE_DUMP_KNOWN_FLAGS) == 0, -EINVAL); + + if (!f) + f = stdout; + + if (flags & SD_BUS_MESSAGE_DUMP_WITH_HEADER) { + usec_t ts = m->realtime; + + if (ts == 0) + ts = now(CLOCK_REALTIME); + + fprintf(f, + "%s%s%s Type=%s%s%s Endian=%c Flags=%u Version=%u", + m->header->type == SD_BUS_MESSAGE_METHOD_ERROR ? ansi_highlight_red() : + m->header->type == SD_BUS_MESSAGE_METHOD_RETURN ? ansi_highlight_green() : + m->header->type != SD_BUS_MESSAGE_SIGNAL ? ansi_highlight() : "", + special_glyph(SPECIAL_GLYPH_TRIANGULAR_BULLET), + ansi_normal(), + + ansi_highlight(), + bus_message_type_to_string(m->header->type) ?: "(unknown)", + ansi_normal(), + + m->header->endian, + m->header->flags, + m->header->version); + + /* Display synthetic message serial number in a more readable + * format than UINT32_MAX */ + if (BUS_MESSAGE_COOKIE(m) == UINT32_MAX) + fprintf(f, " Cookie=-1"); + else + fprintf(f, " Cookie=%" PRIu64, BUS_MESSAGE_COOKIE(m)); + + if (m->reply_cookie != 0) + fprintf(f, " ReplyCookie=%" PRIu64, m->reply_cookie); + + fprintf(f, " Timestamp=\"%s\"\n", strna(FORMAT_TIMESTAMP_STYLE(ts, TIMESTAMP_US_UTC))); + + if (m->sender) + fprintf(f, " Sender=%s%s%s", ansi_highlight(), m->sender, ansi_normal()); + if (m->destination) + fprintf(f, " Destination=%s%s%s", ansi_highlight(), m->destination, ansi_normal()); + if (m->path) + fprintf(f, " Path=%s%s%s", ansi_highlight(), m->path, ansi_normal()); + if (m->interface) + fprintf(f, " Interface=%s%s%s", ansi_highlight(), m->interface, ansi_normal()); + if (m->member) + fprintf(f, " Member=%s%s%s", ansi_highlight(), m->member, ansi_normal()); + + if (m->sender || m->destination || m->path || m->interface || m->member) + fputs("\n", f); + + if (sd_bus_error_is_set(&m->error)) + fprintf(f, + " ErrorName=%s%s%s" + " ErrorMessage=%s\"%s\"%s\n", + ansi_highlight_red(), strna(m->error.name), ansi_normal(), + ansi_highlight_red(), strna(m->error.message), ansi_normal()); + + if (m->monotonic != 0) + fprintf(f, " Monotonic="USEC_FMT, m->monotonic); + if (m->realtime != 0) + fprintf(f, " Realtime="USEC_FMT, m->realtime); + if (m->seqnum != 0) + fprintf(f, " SequenceNumber=%"PRIu64, m->seqnum); + + if (m->monotonic != 0 || m->realtime != 0 || m->seqnum != 0) + fputs("\n", f); + + bus_creds_dump(&m->creds, f, true); + } + + r = sd_bus_message_rewind(m, !(flags & SD_BUS_MESSAGE_DUMP_SUBTREE_ONLY)); + if (r < 0) + return log_error_errno(r, "Failed to rewind: %m"); + + if (!(flags & SD_BUS_MESSAGE_DUMP_SUBTREE_ONLY)) { + _cleanup_free_ char *prefix = NULL; + + prefix = indent(0, flags); + if (!prefix) + return log_oom(); + + fprintf(f, "%sMESSAGE \"%s\" {\n", prefix, strempty(m->root_container.signature)); + } + + for (;;) { + _cleanup_free_ char *prefix = NULL; + const char *contents = NULL; + char type; + union { + uint8_t u8; + uint16_t u16; + int16_t s16; + uint32_t u32; + int32_t s32; + uint64_t u64; + int64_t s64; + double d64; + const char *string; + int i; + } basic; + + r = sd_bus_message_peek_type(m, &type, &contents); + if (r < 0) + return log_error_errno(r, "Failed to peek type: %m"); + + if (r == 0) { + if (level <= 1) + break; + + r = sd_bus_message_exit_container(m); + if (r < 0) + return log_error_errno(r, "Failed to exit container: %m"); + + level--; + + prefix = indent(level, flags); + if (!prefix) + return log_oom(); + + fprintf(f, "%s};\n", prefix); + continue; + } + + prefix = indent(level, flags); + if (!prefix) + return log_oom(); + + if (bus_type_is_container(type) > 0) { + r = sd_bus_message_enter_container(m, type, contents); + if (r < 0) + return log_error_errno(r, "Failed to enter container: %m"); + + if (type == SD_BUS_TYPE_ARRAY) + fprintf(f, "%sARRAY \"%s\" {\n", prefix, contents); + else if (type == SD_BUS_TYPE_VARIANT) + fprintf(f, "%sVARIANT \"%s\" {\n", prefix, contents); + else if (type == SD_BUS_TYPE_STRUCT) + fprintf(f, "%sSTRUCT \"%s\" {\n", prefix, contents); + else if (type == SD_BUS_TYPE_DICT_ENTRY) + fprintf(f, "%sDICT_ENTRY \"%s\" {\n", prefix, contents); + + level++; + + continue; + } + + r = sd_bus_message_read_basic(m, type, &basic); + if (r < 0) + return log_error_errno(r, "Failed to get basic: %m"); + + assert(r > 0); + + switch (type) { + + case SD_BUS_TYPE_BYTE: + fprintf(f, "%sBYTE %s%u%s;\n", prefix, ansi_highlight(), basic.u8, ansi_normal()); + break; + + case SD_BUS_TYPE_BOOLEAN: + fprintf(f, "%sBOOLEAN %s%s%s;\n", prefix, ansi_highlight(), true_false(basic.i), ansi_normal()); + break; + + case SD_BUS_TYPE_INT16: + fprintf(f, "%sINT16 %s%i%s;\n", prefix, ansi_highlight(), basic.s16, ansi_normal()); + break; + + case SD_BUS_TYPE_UINT16: + fprintf(f, "%sUINT16 %s%u%s;\n", prefix, ansi_highlight(), basic.u16, ansi_normal()); + break; + + case SD_BUS_TYPE_INT32: + fprintf(f, "%sINT32 %s%i%s;\n", prefix, ansi_highlight(), basic.s32, ansi_normal()); + break; + + case SD_BUS_TYPE_UINT32: + fprintf(f, "%sUINT32 %s%u%s;\n", prefix, ansi_highlight(), basic.u32, ansi_normal()); + break; + + case SD_BUS_TYPE_INT64: + fprintf(f, "%sINT64 %s%"PRIi64"%s;\n", prefix, ansi_highlight(), basic.s64, ansi_normal()); + break; + + case SD_BUS_TYPE_UINT64: + fprintf(f, "%sUINT64 %s%"PRIu64"%s;\n", prefix, ansi_highlight(), basic.u64, ansi_normal()); + break; + + case SD_BUS_TYPE_DOUBLE: + fprintf(f, "%sDOUBLE %s%g%s;\n", prefix, ansi_highlight(), basic.d64, ansi_normal()); + break; + + case SD_BUS_TYPE_STRING: + fprintf(f, "%sSTRING \"%s%s%s\";\n", prefix, ansi_highlight(), basic.string, ansi_normal()); + break; + + case SD_BUS_TYPE_OBJECT_PATH: + fprintf(f, "%sOBJECT_PATH \"%s%s%s\";\n", prefix, ansi_highlight(), basic.string, ansi_normal()); + break; + + case SD_BUS_TYPE_SIGNATURE: + fprintf(f, "%sSIGNATURE \"%s%s%s\";\n", prefix, ansi_highlight(), basic.string, ansi_normal()); + break; + + case SD_BUS_TYPE_UNIX_FD: + fprintf(f, "%sUNIX_FD %s%i%s;\n", prefix, ansi_highlight(), basic.i, ansi_normal()); + break; + + default: + assert_not_reached(); + } + } + + if (!(flags & SD_BUS_MESSAGE_DUMP_SUBTREE_ONLY)) { + _cleanup_free_ char *prefix = NULL; + + prefix = indent(0, flags); + if (!prefix) + return log_oom(); + + fprintf(f, "%s};\n\n", prefix); + } + + return 0; +} + +static void dump_capabilities( + sd_bus_creds *c, + FILE *f, + const char *name, + bool terse, + int (*has)(sd_bus_creds *c, int capability)) { + + unsigned long i, last_cap; + unsigned n = 0; + int r; + + assert(c); + assert(f); + assert(name); + assert(has); + + i = 0; + r = has(c, i); + if (r < 0) + return; + + fprintf(f, "%s%s=%s", terse ? " " : "", name, terse ? "" : ansi_highlight()); + last_cap = cap_last_cap(); + + for (;;) { + if (r > 0) { + + if (n > 0) + fputc(' ', f); + if (n % 4 == 3) + fprintf(f, terse ? "\n " : "\n "); + + fprintf(f, "%s", strna(capability_to_name(i))); + n++; + } + + i++; + + if (i > last_cap) + break; + + r = has(c, i); + } + + fputs("\n", f); + + if (!terse) + fputs(ansi_normal(), f); +} + +int bus_creds_dump(sd_bus_creds *c, FILE *f, bool terse) { + uid_t owner, audit_loginuid; + uint32_t audit_sessionid; + char **cmdline = NULL, **well_known = NULL; + const char *prefix, *color, *suffix, *s; + int r, q, v, w, z; + + assert(c); + + if (!f) + f = stdout; + + if (terse) { + prefix = " "; + suffix = ""; + color = ""; + } else { + const char *off; + + prefix = ""; + color = ansi_highlight(); + + off = ansi_normal(); + suffix = strjoina(off, "\n"); + } + + if (c->mask & SD_BUS_CREDS_PID) + fprintf(f, "%sPID=%s"PID_FMT"%s", prefix, color, c->pid, suffix); + if (c->mask & SD_BUS_CREDS_TID) + fprintf(f, "%sTID=%s"PID_FMT"%s", prefix, color, c->tid, suffix); + if (c->mask & SD_BUS_CREDS_PPID) { + if (c->ppid == 0) + fprintf(f, "%sPPID=%sn/a%s", prefix, color, suffix); + else + fprintf(f, "%sPPID=%s"PID_FMT"%s", prefix, color, c->ppid, suffix); + } + if (c->mask & SD_BUS_CREDS_TTY) + fprintf(f, "%sTTY=%s%s%s", prefix, color, strna(c->tty), suffix); + + if (terse && ((c->mask & (SD_BUS_CREDS_PID|SD_BUS_CREDS_TID|SD_BUS_CREDS_PPID|SD_BUS_CREDS_TTY)))) + fputs("\n", f); + + if (c->mask & SD_BUS_CREDS_UID) + fprintf(f, "%sUID=%s"UID_FMT"%s", prefix, color, c->uid, suffix); + if (c->mask & SD_BUS_CREDS_EUID) + fprintf(f, "%sEUID=%s"UID_FMT"%s", prefix, color, c->euid, suffix); + if (c->mask & SD_BUS_CREDS_SUID) + fprintf(f, "%sSUID=%s"UID_FMT"%s", prefix, color, c->suid, suffix); + if (c->mask & SD_BUS_CREDS_FSUID) + fprintf(f, "%sFSUID=%s"UID_FMT"%s", prefix, color, c->fsuid, suffix); + r = sd_bus_creds_get_owner_uid(c, &owner); + if (r >= 0) + fprintf(f, "%sOwnerUID=%s"UID_FMT"%s", prefix, color, owner, suffix); + if (c->mask & SD_BUS_CREDS_GID) + fprintf(f, "%sGID=%s"GID_FMT"%s", prefix, color, c->gid, suffix); + if (c->mask & SD_BUS_CREDS_EGID) + fprintf(f, "%sEGID=%s"GID_FMT"%s", prefix, color, c->egid, suffix); + if (c->mask & SD_BUS_CREDS_SGID) + fprintf(f, "%sSGID=%s"GID_FMT"%s", prefix, color, c->sgid, suffix); + if (c->mask & SD_BUS_CREDS_FSGID) + fprintf(f, "%sFSGID=%s"GID_FMT"%s", prefix, color, c->fsgid, suffix); + + if (c->mask & SD_BUS_CREDS_SUPPLEMENTARY_GIDS) { + fprintf(f, "%sSupplementaryGIDs=%s", prefix, color); + for (unsigned i = 0; i < c->n_supplementary_gids; i++) + fprintf(f, "%s" GID_FMT, i > 0 ? " " : "", c->supplementary_gids[i]); + fprintf(f, "%s", suffix); + } + + if (terse && ((c->mask & (SD_BUS_CREDS_UID|SD_BUS_CREDS_EUID|SD_BUS_CREDS_SUID|SD_BUS_CREDS_FSUID| + SD_BUS_CREDS_GID|SD_BUS_CREDS_EGID|SD_BUS_CREDS_SGID|SD_BUS_CREDS_FSGID| + SD_BUS_CREDS_SUPPLEMENTARY_GIDS)) || r >= 0)) + fputs("\n", f); + + if (c->mask & SD_BUS_CREDS_COMM) + fprintf(f, "%sComm=%s%s%s", prefix, color, c->comm, suffix); + if (c->mask & SD_BUS_CREDS_TID_COMM) + fprintf(f, "%sTIDComm=%s%s%s", prefix, color, c->tid_comm, suffix); + if (c->mask & SD_BUS_CREDS_EXE) + fprintf(f, "%sExe=%s%s%s", prefix, color, strna(c->exe), suffix); + + if (terse && (c->mask & (SD_BUS_CREDS_EXE|SD_BUS_CREDS_COMM|SD_BUS_CREDS_TID_COMM))) + fputs("\n", f); + + r = sd_bus_creds_get_cmdline(c, &cmdline); + if (r >= 0) { + fprintf(f, "%sCommandLine=%s", prefix, color); + STRV_FOREACH(i, cmdline) { + if (i != cmdline) + fputc(' ', f); + + fputs(*i, f); + } + + fprintf(f, "%s", suffix); + } else if (r != -ENODATA) + fprintf(f, "%sCommandLine=%sn/a%s", prefix, color, suffix); + + if (c->mask & SD_BUS_CREDS_SELINUX_CONTEXT) + fprintf(f, "%sLabel=%s%s%s", prefix, color, c->label, suffix); + if (c->mask & SD_BUS_CREDS_DESCRIPTION) + fprintf(f, "%sDescription=%s%s%s", prefix, color, c->description, suffix); + + if (terse && (c->mask & (SD_BUS_CREDS_SELINUX_CONTEXT|SD_BUS_CREDS_DESCRIPTION))) + fputs("\n", f); + + if (c->mask & SD_BUS_CREDS_CGROUP) + fprintf(f, "%sCGroup=%s%s%s", prefix, color, c->cgroup, suffix); + s = NULL; + r = sd_bus_creds_get_unit(c, &s); + if (r != -ENODATA) + fprintf(f, "%sUnit=%s%s%s", prefix, color, strna(s), suffix); + s = NULL; + v = sd_bus_creds_get_slice(c, &s); + if (v != -ENODATA) + fprintf(f, "%sSlice=%s%s%s", prefix, color, strna(s), suffix); + s = NULL; + q = sd_bus_creds_get_user_unit(c, &s); + if (q != -ENODATA) + fprintf(f, "%sUserUnit=%s%s%s", prefix, color, strna(s), suffix); + s = NULL; + w = sd_bus_creds_get_user_slice(c, &s); + if (w != -ENODATA) + fprintf(f, "%sUserSlice=%s%s%s", prefix, color, strna(s), suffix); + s = NULL; + z = sd_bus_creds_get_session(c, &s); + if (z != -ENODATA) + fprintf(f, "%sSession=%s%s%s", prefix, color, strna(s), suffix); + + if (terse && ((c->mask & SD_BUS_CREDS_CGROUP) || r != -ENODATA || q != -ENODATA || v != -ENODATA || w != -ENODATA || z != -ENODATA)) + fputs("\n", f); + + r = sd_bus_creds_get_audit_login_uid(c, &audit_loginuid); + if (r >= 0) + fprintf(f, "%sAuditLoginUID=%s"UID_FMT"%s", prefix, color, audit_loginuid, suffix); + else if (r != -ENODATA) + fprintf(f, "%sAuditLoginUID=%sn/a%s", prefix, color, suffix); + q = sd_bus_creds_get_audit_session_id(c, &audit_sessionid); + if (q >= 0) + fprintf(f, "%sAuditSessionID=%s%"PRIu32"%s", prefix, color, audit_sessionid, suffix); + else if (q != -ENODATA) + fprintf(f, "%sAuditSessionID=%sn/a%s", prefix, color, suffix); + + if (terse && (r != -ENODATA || q != -ENODATA)) + fputs("\n", f); + + if (c->mask & SD_BUS_CREDS_UNIQUE_NAME) + fprintf(f, "%sUniqueName=%s%s%s", prefix, color, c->unique_name, suffix); + + if (sd_bus_creds_get_well_known_names(c, &well_known) >= 0) { + fprintf(f, "%sWellKnownNames=%s", prefix, color); + STRV_FOREACH(i, well_known) { + if (i != well_known) + fputc(' ', f); + + fputs(*i, f); + } + + fprintf(f, "%s", suffix); + } + + if (terse && (c->mask & SD_BUS_CREDS_UNIQUE_NAME || well_known)) + fputc('\n', f); + + dump_capabilities(c, f, "EffectiveCapabilities", terse, sd_bus_creds_has_effective_cap); + dump_capabilities(c, f, "PermittedCapabilities", terse, sd_bus_creds_has_permitted_cap); + dump_capabilities(c, f, "InheritableCapabilities", terse, sd_bus_creds_has_inheritable_cap); + dump_capabilities(c, f, "BoundingCapabilities", terse, sd_bus_creds_has_bounding_cap); + + return 0; +} + +static uint16_t pcapng_optlen(size_t len) { + return ALIGN4(len + sizeof(struct pcapng_option)); +} + +static void pcapng_putopt(FILE *f, uint16_t code, const void *data, size_t len) { + struct pcapng_option opt = { + .code = code, + .length = len, + }; + + assert(f); + assert((uint16_t) len == len); + assert(data || len == 0); + + fwrite(&opt, 1, sizeof(opt), f); + if (len > 0) { + size_t pad = ALIGN4(len) - len; + + fwrite(data, 1, len, f); + + assert(pad < sizeof(uint32_t)); + while (pad-- > 0) + fputc('\0', f); + } +} + +static void pcapng_section_header(FILE *f, const char *os, const char *app) { + uint32_t len; + + assert(f); + + /* determine length of section header and options */ + len = sizeof(struct pcapng_section); + if (os) + len += pcapng_optlen(strlen(os)); + if (app) + len += pcapng_optlen(strlen(app)); + len += pcapng_optlen(0); /* OPT_END */ + len += sizeof(uint32_t); /* trailer length */ + + struct pcapng_section hdr = { + .block_type = PCAPNG_SECTION_BLOCK, + .block_length = len, + .byte_order_magic = PCAPNG_BYTE_ORDER_MAGIC, + .major_version = PCAPNG_MAJOR_VERS, + .minor_version = PCAPNG_MINOR_VERS, + .section_length = UINT64_MAX, + }; + + fwrite(&hdr, 1, sizeof(hdr), f); + if (os) + pcapng_putopt(f, PCAPNG_SHB_OS, os, strlen(os)); + if (app) + pcapng_putopt(f, PCAPNG_SHB_USERAPPL, app, strlen(app)); + pcapng_putopt(f, PCAPNG_OPT_END, NULL, 0); + fwrite(&len, 1, sizeof(uint32_t), f); +} + +/* Only have a single instance of dbus pseudo interface */ +static void pcapng_interface_header(FILE *f, size_t snaplen) { + uint32_t len; + + assert(f); + assert(snaplen > 0); + assert((size_t) (uint32_t) snaplen == snaplen); + + /* no options (yet) */ + len = sizeof(struct pcapng_interface_block) + sizeof(uint32_t); + struct pcapng_interface_block hdr = { + .block_type = PCAPNG_INTERFACE_BLOCK, + .block_length = len, + .link_type = 231, /* D-Bus */ + .snap_len = snaplen, + }; + + fwrite(&hdr, 1, sizeof(hdr), f); + fwrite(&len, 1, sizeof(uint32_t), f); +} + +int bus_pcap_header(size_t snaplen, const char *os, const char *info, FILE *f) { + if (!f) + f = stdout; + + pcapng_section_header(f, os, info); + pcapng_interface_header(f, snaplen); + return fflush_and_check(f); +} + +int bus_message_pcap_frame(sd_bus_message *m, size_t snaplen, FILE *f) { + struct bus_body_part *part; + size_t msglen, caplen, pad; + uint32_t length; + uint64_t ts; + unsigned i; + size_t w; + + if (!f) + f = stdout; + + assert(m); + assert(snaplen > 0); + assert((size_t) (uint32_t) snaplen == snaplen); + + ts = m->realtime ?: now(CLOCK_REALTIME); + msglen = BUS_MESSAGE_SIZE(m); + caplen = MIN(msglen, snaplen); + pad = ALIGN4(caplen) - caplen; + + /* packet block has no options */ + length = sizeof(struct pcapng_enhance_packet_block) + + caplen + pad + sizeof(uint32_t); + + struct pcapng_enhance_packet_block epb = { + .block_type = PCAPNG_ENHANCED_PACKET_BLOCK, + .block_length = length, + .interface_id = 0, + .timestamp_hi = (uint32_t)(ts >> 32), + .timestamp_lo = (uint32_t)ts, + .original_length = msglen, + .capture_length = caplen, + }; + + /* write the pcapng enhanced packet block header */ + fwrite(&epb, 1, sizeof(epb), f); + + /* write the dbus header */ + w = MIN(BUS_MESSAGE_BODY_BEGIN(m), snaplen); + fwrite(m->header, 1, w, f); + snaplen -= w; + + /* write the dbus body */ + MESSAGE_FOREACH_PART(part, i, m) { + if (snaplen <= 0) + break; + + w = MIN(part->size, snaplen); + fwrite(part->data, 1, w, f); + snaplen -= w; + } + + while (pad-- > 0) + fputc('\0', f); + + /* trailing block length */ + fwrite(&length, 1, sizeof(uint32_t), f); + + return fflush_and_check(f); +} diff --git a/src/libsystemd/sd-bus/bus-dump.h b/src/libsystemd/sd-bus/bus-dump.h new file mode 100644 index 0000000..e7470ba --- /dev/null +++ b/src/libsystemd/sd-bus/bus-dump.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include <stdbool.h> +#include <stdio.h> + +#include "sd-bus.h" + +int bus_creds_dump(sd_bus_creds *c, FILE *f, bool terse); + +int bus_pcap_header(size_t snaplen, const char *os, const char *app, FILE *f); +int bus_message_pcap_frame(sd_bus_message *m, size_t snaplen, FILE *f); diff --git a/src/libsystemd/sd-bus/bus-error.c b/src/libsystemd/sd-bus/bus-error.c new file mode 100644 index 0000000..77b2e1a --- /dev/null +++ b/src/libsystemd/sd-bus/bus-error.c @@ -0,0 +1,628 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <errno.h> +#include <stdarg.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> + +#include "sd-bus.h" + +#include "alloc-util.h" +#include "bus-error.h" +#include "errno-list.h" +#include "errno-util.h" +#include "string-util.h" +#include "strv.h" + +BUS_ERROR_MAP_ELF_REGISTER const sd_bus_error_map bus_standard_errors[] = { + SD_BUS_ERROR_MAP(SD_BUS_ERROR_FAILED, EACCES), + SD_BUS_ERROR_MAP(SD_BUS_ERROR_NO_MEMORY, ENOMEM), + SD_BUS_ERROR_MAP(SD_BUS_ERROR_SERVICE_UNKNOWN, EHOSTUNREACH), + SD_BUS_ERROR_MAP(SD_BUS_ERROR_NAME_HAS_NO_OWNER, ENXIO), + SD_BUS_ERROR_MAP(SD_BUS_ERROR_NO_REPLY, ETIMEDOUT), + SD_BUS_ERROR_MAP(SD_BUS_ERROR_IO_ERROR, EIO), + SD_BUS_ERROR_MAP(SD_BUS_ERROR_BAD_ADDRESS, EADDRNOTAVAIL), + SD_BUS_ERROR_MAP(SD_BUS_ERROR_NOT_SUPPORTED, EOPNOTSUPP), + SD_BUS_ERROR_MAP(SD_BUS_ERROR_LIMITS_EXCEEDED, ENOBUFS), + SD_BUS_ERROR_MAP(SD_BUS_ERROR_ACCESS_DENIED, EACCES), + SD_BUS_ERROR_MAP(SD_BUS_ERROR_AUTH_FAILED, EACCES), + SD_BUS_ERROR_MAP(SD_BUS_ERROR_NO_SERVER, EHOSTDOWN), + SD_BUS_ERROR_MAP(SD_BUS_ERROR_TIMEOUT, ETIMEDOUT), + SD_BUS_ERROR_MAP(SD_BUS_ERROR_NO_NETWORK, ENONET), + SD_BUS_ERROR_MAP(SD_BUS_ERROR_ADDRESS_IN_USE, EADDRINUSE), + SD_BUS_ERROR_MAP(SD_BUS_ERROR_DISCONNECTED, ECONNRESET), + SD_BUS_ERROR_MAP(SD_BUS_ERROR_INVALID_ARGS, EINVAL), + SD_BUS_ERROR_MAP(SD_BUS_ERROR_FILE_NOT_FOUND, ENOENT), + SD_BUS_ERROR_MAP(SD_BUS_ERROR_FILE_EXISTS, EEXIST), + SD_BUS_ERROR_MAP(SD_BUS_ERROR_UNKNOWN_METHOD, EBADR), + SD_BUS_ERROR_MAP(SD_BUS_ERROR_UNKNOWN_OBJECT, EBADR), + SD_BUS_ERROR_MAP(SD_BUS_ERROR_UNKNOWN_INTERFACE, EBADR), + SD_BUS_ERROR_MAP(SD_BUS_ERROR_UNKNOWN_PROPERTY, EBADR), + SD_BUS_ERROR_MAP(SD_BUS_ERROR_PROPERTY_READ_ONLY, EROFS), + SD_BUS_ERROR_MAP(SD_BUS_ERROR_UNIX_PROCESS_ID_UNKNOWN, ESRCH), + SD_BUS_ERROR_MAP(SD_BUS_ERROR_INVALID_SIGNATURE, EINVAL), + SD_BUS_ERROR_MAP(SD_BUS_ERROR_INCONSISTENT_MESSAGE, EBADMSG), + SD_BUS_ERROR_MAP(SD_BUS_ERROR_TIMED_OUT, ETIMEDOUT), + SD_BUS_ERROR_MAP(SD_BUS_ERROR_MATCH_RULE_NOT_FOUND, ENOENT), + SD_BUS_ERROR_MAP(SD_BUS_ERROR_MATCH_RULE_INVALID, EINVAL), + SD_BUS_ERROR_MAP(SD_BUS_ERROR_INTERACTIVE_AUTHORIZATION_REQUIRED, EACCES), + SD_BUS_ERROR_MAP(SD_BUS_ERROR_INVALID_FILE_CONTENT, EINVAL), + SD_BUS_ERROR_MAP(SD_BUS_ERROR_SELINUX_SECURITY_CONTEXT_UNKNOWN, ESRCH), + SD_BUS_ERROR_MAP(SD_BUS_ERROR_OBJECT_PATH_IN_USE, EBUSY), + SD_BUS_ERROR_MAP_END +}; + +/* GCC maps this magically to the beginning and end of the BUS_ERROR_MAP section */ +extern const sd_bus_error_map __start_SYSTEMD_BUS_ERROR_MAP[]; +extern const sd_bus_error_map __stop_SYSTEMD_BUS_ERROR_MAP[]; + +/* Additional maps registered with sd_bus_error_add_map() are in this + * NULL terminated array */ +static const sd_bus_error_map **additional_error_maps = NULL; + +static int bus_error_name_to_errno(const char *name) { + const sd_bus_error_map **map, *m; + const char *p; + int r; + + if (!name) + return EINVAL; + + p = startswith(name, "System.Error."); + if (p) { + r = errno_from_name(p); + if (r < 0) + return EIO; + + return r; + } + + if (additional_error_maps) + for (map = additional_error_maps; *map; map++) + for (m = *map;; m++) { + /* For additional error maps the end marker is actually the end marker */ + if (m->code == BUS_ERROR_MAP_END_MARKER) + break; + + if (streq(m->name, name)) { + assert(m->code > 0); + return m->code; + } + } + + m = ALIGN_PTR(__start_SYSTEMD_BUS_ERROR_MAP); + while (m < __stop_SYSTEMD_BUS_ERROR_MAP) { + /* For magic ELF error maps, the end marker might + * appear in the middle of things, since multiple maps + * might appear in the same section. Hence, let's skip + * over it, but realign the pointer to the next 8 byte + * boundary, which is the selected alignment for the + * arrays. */ + if (m->code == BUS_ERROR_MAP_END_MARKER) { + m = ALIGN_PTR(m + 1); + continue; + } + + if (streq(m->name, name)) { + assert(m->code > 0); + return m->code; + } + + m++; + } + + return EIO; +} + +static sd_bus_error errno_to_bus_error_const(int error) { + + if (error < 0) + error = -error; + + switch (error) { + + case ENOMEM: + return BUS_ERROR_OOM; + + case EPERM: + case EACCES: + return SD_BUS_ERROR_MAKE_CONST(SD_BUS_ERROR_ACCESS_DENIED, "Access denied"); + + case EINVAL: + return SD_BUS_ERROR_MAKE_CONST(SD_BUS_ERROR_INVALID_ARGS, "Invalid argument"); + + case ESRCH: + return SD_BUS_ERROR_MAKE_CONST(SD_BUS_ERROR_UNIX_PROCESS_ID_UNKNOWN, "No such process"); + + case ENOENT: + return SD_BUS_ERROR_MAKE_CONST(SD_BUS_ERROR_FILE_NOT_FOUND, "File not found"); + + case EEXIST: + return SD_BUS_ERROR_MAKE_CONST(SD_BUS_ERROR_FILE_EXISTS, "File exists"); + + case ETIMEDOUT: + case ETIME: + return SD_BUS_ERROR_MAKE_CONST(SD_BUS_ERROR_TIMEOUT, "Timed out"); + + case EIO: + return SD_BUS_ERROR_MAKE_CONST(SD_BUS_ERROR_IO_ERROR, "Input/output error"); + + case ENETRESET: + case ECONNABORTED: + case ECONNRESET: + return SD_BUS_ERROR_MAKE_CONST(SD_BUS_ERROR_DISCONNECTED, "Disconnected"); + + case EOPNOTSUPP: + return SD_BUS_ERROR_MAKE_CONST(SD_BUS_ERROR_NOT_SUPPORTED, "Not supported"); + + case EADDRNOTAVAIL: + return SD_BUS_ERROR_MAKE_CONST(SD_BUS_ERROR_BAD_ADDRESS, "Address not available"); + + case ENOBUFS: + return SD_BUS_ERROR_MAKE_CONST(SD_BUS_ERROR_LIMITS_EXCEEDED, "Limits exceeded"); + + case EADDRINUSE: + return SD_BUS_ERROR_MAKE_CONST(SD_BUS_ERROR_ADDRESS_IN_USE, "Address in use"); + + case EBADMSG: + return SD_BUS_ERROR_MAKE_CONST(SD_BUS_ERROR_INCONSISTENT_MESSAGE, "Inconsistent message"); + } + + return SD_BUS_ERROR_NULL; +} + +static int errno_to_bus_error_name_new(int error, char **ret) { + const char *name; + char *n; + + if (error < 0) + error = -error; + + name = errno_to_name(error); + if (!name) + return 0; + + n = strjoin("System.Error.", name); + if (!n) + return -ENOMEM; + + *ret = n; + return 1; +} + +bool bus_error_is_dirty(sd_bus_error *e) { + if (!e) + return false; + + return e->name || e->message || e->_need_free != 0; +} + +_public_ void sd_bus_error_free(sd_bus_error *e) { + if (!e) + return; + + if (e->_need_free > 0) { + free((void*) e->name); + free((void*) e->message); + } + + *e = SD_BUS_ERROR_NULL; +} + +_public_ int sd_bus_error_set(sd_bus_error *e, const char *name, const char *message) { + int r; + + if (!name) + return 0; + + if (e) { + assert_return(!bus_error_is_dirty(e), -EINVAL); + + e->name = strdup(name); + if (!e->name) { + *e = BUS_ERROR_OOM; + return -ENOMEM; + } + + if (message) + e->message = strdup(message); + + e->_need_free = 1; + } + + r = bus_error_name_to_errno(name); + assert(r > 0); + return -r; +} + +_public_ int sd_bus_error_setfv(sd_bus_error *e, const char *name, const char *format, va_list ap) { + int r; + + if (!name) + return 0; + + if (e) { + assert_return(!bus_error_is_dirty(e), -EINVAL); + + e->name = strdup(name); + if (!e->name) { + *e = BUS_ERROR_OOM; + return -ENOMEM; + } + + if (format) { + _cleanup_free_ char *mesg = NULL; + + /* If we hit OOM on formatting the pretty message, we ignore + * this, since we at least managed to write the error name */ + + if (vasprintf(&mesg, format, ap) >= 0) + e->message = TAKE_PTR(mesg); + } + + e->_need_free = 1; + } + + r = bus_error_name_to_errno(name); + assert(r > 0); + return -r; +} + +_public_ int sd_bus_error_setf(sd_bus_error *e, const char *name, const char *format, ...) { + int r; + + if (format) { + va_list ap; + + va_start(ap, format); + r = sd_bus_error_setfv(e, name, format, ap); + assert(!name || r < 0); + va_end(ap); + + return r; + } + + r = sd_bus_error_set(e, name, NULL); + assert(!name || r < 0); + return r; +} + +_public_ int sd_bus_error_copy(sd_bus_error *dest, const sd_bus_error *e) { + + if (!sd_bus_error_is_set(e)) + return 0; + if (!dest) + goto finish; + + assert_return(!bus_error_is_dirty(dest), -EINVAL); + + /* + * _need_free < 0 indicates that the error is temporarily const, needs deep copying + * _need_free == 0 indicates that the error is perpetually const, needs no deep copying + * _need_free > 0 indicates that the error is fully dynamic, needs deep copying + */ + + if (e->_need_free == 0) + *dest = *e; + else { + dest->name = strdup(e->name); + if (!dest->name) { + *dest = BUS_ERROR_OOM; + return -ENOMEM; + } + + if (e->message) + dest->message = strdup(e->message); + + dest->_need_free = 1; + } + +finish: + return -bus_error_name_to_errno(e->name); +} + +_public_ int sd_bus_error_move(sd_bus_error *dest, sd_bus_error *e) { + int r; + + if (!sd_bus_error_is_set(e)) { + + if (dest) + *dest = SD_BUS_ERROR_NULL; + + return 0; + } + + r = -bus_error_name_to_errno(e->name); + + if (dest) { + *dest = *e; + *e = SD_BUS_ERROR_NULL; + } else + sd_bus_error_free(e); + + return r; +} + +_public_ int sd_bus_error_set_const(sd_bus_error *e, const char *name, const char *message) { + if (!name) + return 0; + if (!e) + goto finish; + + assert_return(!bus_error_is_dirty(e), -EINVAL); + + *e = SD_BUS_ERROR_MAKE_CONST(name, message); + +finish: + return -bus_error_name_to_errno(name); +} + +_public_ int sd_bus_error_is_set(const sd_bus_error *e) { + if (!e) + return 0; + + return !!e->name; +} + +_public_ int sd_bus_error_has_name(const sd_bus_error *e, const char *name) { + if (!e) + return 0; + + return streq_ptr(e->name, name); +} + +_public_ int sd_bus_error_has_names_sentinel(const sd_bus_error *e, ...) { + if (!e || !e->name) + return 0; + + va_list ap; + const char *p; + + va_start(ap, e); + while ((p = va_arg(ap, const char *))) + if (streq(p, e->name)) + break; + va_end(ap); + return !!p; +} + +_public_ int sd_bus_error_get_errno(const sd_bus_error* e) { + if (!e || !e->name) + return 0; + + return bus_error_name_to_errno(e->name); +} + +static void bus_error_strerror(sd_bus_error *e, int error) { + size_t k = 64; + char *m; + + assert(e); + + for (;;) { + char *x; + + m = new(char, k); + if (!m) + return; + + errno = 0; + x = strerror_r(error, m, k); + if (errno == ERANGE || strlen(x) >= k - 1) { + free(m); + k *= 2; + continue; + } + + if (errno) { + free(m); + return; + } + + if (x == m) { + if (e->_need_free > 0) { + /* Error is already dynamic, let's just update the message */ + free((char*) e->message); + e->message = x; + + } else { + char *t; + /* Error was const so far, let's make it dynamic, if we can */ + + t = strdup(e->name); + if (!t) { + free(m); + return; + } + + e->_need_free = 1; + e->name = t; + e->message = x; + } + } else { + free(m); + + if (e->_need_free > 0) { + char *t; + + /* Error is dynamic, let's hence make the message also dynamic */ + t = strdup(x); + if (!t) + return; + + free((char*) e->message); + e->message = t; + } else { + /* Error is const, hence we can just override */ + e->message = x; + } + } + + return; + } +} + +_public_ int sd_bus_error_set_errno(sd_bus_error *e, int error) { + + if (error < 0) + error = -error; + + if (!e) + return -error; + if (error == 0) + return 0; + + assert_return(!bus_error_is_dirty(e), -EINVAL); + + /* First, try a const translation */ + *e = errno_to_bus_error_const(error); + + if (!sd_bus_error_is_set(e)) { + int k; + + /* If that didn't work, try a dynamic one. */ + + k = errno_to_bus_error_name_new(error, (char**) &e->name); + if (k > 0) + e->_need_free = 1; + else if (k < 0) { + *e = BUS_ERROR_OOM; + return -error; + } else + *e = BUS_ERROR_FAILED; + } + + /* Now, fill in the message from strerror_r() if we can */ + bus_error_strerror(e, error); + return -error; +} + +_public_ int sd_bus_error_set_errnofv(sd_bus_error *e, int error, const char *format, va_list ap) { + PROTECT_ERRNO; + + if (error < 0) + error = -error; + + if (!e) + return -error; + if (error == 0) + return 0; + + assert_return(!bus_error_is_dirty(e), -EINVAL); + + /* First, try a const translation */ + *e = errno_to_bus_error_const(error); + + if (!sd_bus_error_is_set(e)) { + int k; + + /* If that didn't work, try a dynamic one */ + + k = errno_to_bus_error_name_new(error, (char**) &e->name); + if (k > 0) + e->_need_free = 1; + else if (k < 0) { + *e = BUS_ERROR_OOM; + return -ENOMEM; + } else + *e = BUS_ERROR_FAILED; + } + + if (format) { + _cleanup_free_ char *m = NULL; + + /* Then, let's try to fill in the supplied message */ + + errno = error; /* Make sure that %m resolves to the specified error */ + if (vasprintf(&m, format, ap) < 0) + goto fail; + + if (e->_need_free <= 0) { + char *t; + + t = strdup(e->name); + if (!t) + goto fail; + + e->_need_free = 1; + e->name = t; + } + + e->message = TAKE_PTR(m); + return -error; + } + +fail: + /* If that didn't work, use strerror_r() for the message */ + bus_error_strerror(e, error); + return -error; +} + +_public_ int sd_bus_error_set_errnof(sd_bus_error *e, int error, const char *format, ...) { + int r; + + if (error < 0) + error = -error; + + if (!e) + return -error; + if (error == 0) + return 0; + + assert_return(!bus_error_is_dirty(e), -EINVAL); + + if (format) { + va_list ap; + + va_start(ap, format); + r = sd_bus_error_set_errnofv(e, error, format, ap); + va_end(ap); + + return r; + } + + return sd_bus_error_set_errno(e, error); +} + +const char* _bus_error_message(const sd_bus_error *e, int error, char buf[static ERRNO_BUF_LEN]) { + /* Sometimes, the D-Bus server is a little bit too verbose with + * its error messages, so let's override them here */ + if (sd_bus_error_has_name(e, SD_BUS_ERROR_ACCESS_DENIED)) + return "Access denied"; + + if (e && e->message) + return e->message; + + return strerror_r(abs(error), buf, ERRNO_BUF_LEN); +} + +static bool map_ok(const sd_bus_error_map *map) { + for (; map->code != BUS_ERROR_MAP_END_MARKER; map++) + if (!map->name || map->code <= 0) + return false; + return true; +} + +_public_ int sd_bus_error_add_map(const sd_bus_error_map *map) { + const sd_bus_error_map **maps = NULL; + unsigned n = 0; + + assert_return(map, -EINVAL); + assert_return(map_ok(map), -EINVAL); + + if (additional_error_maps) + for (; additional_error_maps[n] != NULL; n++) + if (additional_error_maps[n] == map) + return 0; + + maps = reallocarray(additional_error_maps, n + 2, sizeof(struct sd_bus_error_map*)); + if (!maps) + return -ENOMEM; + + maps[n] = map; + maps[n+1] = NULL; + + additional_error_maps = maps; + return 1; +} diff --git a/src/libsystemd/sd-bus/bus-error.h b/src/libsystemd/sd-bus/bus-error.h new file mode 100644 index 0000000..c8768c9 --- /dev/null +++ b/src/libsystemd/sd-bus/bus-error.h @@ -0,0 +1,57 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include <stdbool.h> + +#include "sd-bus.h" + +#include "errno-util.h" +#include "macro.h" + +bool bus_error_is_dirty(sd_bus_error *e); + +const char* _bus_error_message(const sd_bus_error *e, int error, char buf[static ERRNO_BUF_LEN]); + +/* Note: the lifetime of the compound literal is the immediately surrounding block, + * see C11 §6.5.2.5, and + * https://stackoverflow.com/questions/34880638/compound-literal-lifetime-and-if-blocks */ +#define bus_error_message(e, error) _bus_error_message(e, error, (char[ERRNO_BUF_LEN]){}) + +#define BUS_ERROR_OOM SD_BUS_ERROR_MAKE_CONST(SD_BUS_ERROR_NO_MEMORY, "Out of memory") +#define BUS_ERROR_FAILED SD_BUS_ERROR_MAKE_CONST(SD_BUS_ERROR_FAILED, "Operation failed") + +/* + * There are two ways to register error maps with the error translation + * logic: by using BUS_ERROR_MAP_ELF_REGISTER, which however only + * works when linked into the same ELF module, or via + * sd_bus_error_add_map() which is the official, external API, that + * works from any module. + * + * Note that BUS_ERROR_MAP_ELF_REGISTER has to be used as decorator in + * the bus error table, and BUS_ERROR_MAP_ELF_USE has to be used at + * least once per compilation unit (i.e. per library), to ensure that + * the error map is really added to the final binary. + * + * In addition, set the retain attribute so that the section cannot be + * discarded by ld --gc-sections -z start-stop-gc. Older compilers would + * warn for the unknown attribute, so just disable -Wattributes. + */ + +#define BUS_ERROR_MAP_ELF_REGISTER \ + _Pragma("GCC diagnostic ignored \"-Wattributes\"") \ + _section_("SYSTEMD_BUS_ERROR_MAP") \ + _used_ \ + _retain_ \ + _alignptr_ \ + _variable_no_sanitize_address_ + +#define BUS_ERROR_MAP_ELF_USE(errors) \ + extern const sd_bus_error_map errors[]; \ + _used_ \ + static const sd_bus_error_map * const CONCATENATE(errors ## _copy_, __COUNTER__) = errors; + +/* We use something exotic as end marker, to ensure people build the + * maps using the macsd-ros. */ +#define BUS_ERROR_MAP_END_MARKER -'x' + +BUS_ERROR_MAP_ELF_USE(bus_standard_errors); diff --git a/src/libsystemd/sd-bus/bus-internal.c b/src/libsystemd/sd-bus/bus-internal.c new file mode 100644 index 0000000..a249b84 --- /dev/null +++ b/src/libsystemd/sd-bus/bus-internal.c @@ -0,0 +1,338 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include "alloc-util.h" +#include "bus-internal.h" +#include "bus-message.h" +#include "escape.h" +#include "hexdecoct.h" +#include "string-util.h" + +bool object_path_is_valid(const char *p) { + const char *q; + bool slash; + + if (!p) + return false; + + if (p[0] != '/') + return false; + + if (p[1] == 0) + return true; + + for (slash = true, q = p+1; *q; q++) + if (*q == '/') { + if (slash) + return false; + + slash = true; + } else { + bool good; + + good = ascii_isalpha(*q) || + ascii_isdigit(*q) || + *q == '_'; + + if (!good) + return false; + + slash = false; + } + + if (slash) + return false; + + return (q - p) <= BUS_PATH_SIZE_MAX; +} + +char* object_path_startswith(const char *a, const char *b) { + const char *p; + + if (!object_path_is_valid(a) || + !object_path_is_valid(b)) + return NULL; + + if (streq(b, "/")) + return (char*) a + 1; + + p = startswith(a, b); + if (!p) + return NULL; + + if (*p == 0) + return (char*) p; + + if (*p == '/') + return (char*) p + 1; + + return NULL; +} + +bool interface_name_is_valid(const char *p) { + const char *q; + bool dot, found_dot = false; + + if (isempty(p)) + return false; + + for (dot = true, q = p; *q; q++) + if (*q == '.') { + if (dot) + return false; + + found_dot = dot = true; + } else { + bool good; + + good = + ascii_isalpha(*q) || + (!dot && ascii_isdigit(*q)) || + *q == '_'; + + if (!good) { + if (DEBUG_LOGGING) { + _cleanup_free_ char *iface = cescape(p); + log_debug("The interface %s is invalid as it contains special character", strnull(iface)); + } + return false; + } + + dot = false; + } + + if (q - p > SD_BUS_MAXIMUM_NAME_LENGTH) + return false; + + if (dot) + return false; + + if (!found_dot) + return false; + + return true; +} + +bool service_name_is_valid(const char *p) { + const char *q; + bool dot, found_dot = false, unique; + + if (isempty(p)) + return false; + + unique = p[0] == ':'; + + for (dot = true, q = unique ? p+1 : p; *q; q++) + if (*q == '.') { + if (dot) + return false; + + found_dot = dot = true; + } else { + bool good; + + good = + ascii_isalpha(*q) || + ((!dot || unique) && ascii_isdigit(*q)) || + IN_SET(*q, '_', '-'); + + if (!good) + return false; + + dot = false; + } + + if (q - p > SD_BUS_MAXIMUM_NAME_LENGTH) + return false; + + if (dot) + return false; + + if (!found_dot) + return false; + + return true; +} + +bool member_name_is_valid(const char *p) { + const char *q; + + if (isempty(p)) + return false; + + for (q = p; *q; q++) { + bool good; + + good = + ascii_isalpha(*q) || + ascii_isdigit(*q) || + *q == '_'; + + if (!good) + return false; + } + + if (q - p > SD_BUS_MAXIMUM_NAME_LENGTH) + return false; + + return true; +} + +/* + * Complex pattern match + * This checks whether @a is a 'complex-prefix' of @b, or @b is a + * 'complex-prefix' of @a, based on strings that consist of labels with @c as + * separator. This function returns true if: + * - both strings are equal + * - either is a prefix of the other and ends with @c + * The second rule makes sure that either string needs to be fully included in + * the other, and the string which is considered the prefix needs to end with a + * separator. + */ +static bool complex_pattern_check(char c, const char *a, const char *b) { + bool separator = false; + + if (!a && !b) + return true; + + if (!a || !b) + return false; + + for (;;) { + if (*a != *b) + return (separator && (*a == 0 || *b == 0)); + + if (*a == 0) + return true; + + separator = *a == c; + + a++, b++; + } +} + +bool namespace_complex_pattern(const char *pattern, const char *value) { + return complex_pattern_check('.', pattern, value); +} + +bool path_complex_pattern(const char *pattern, const char *value) { + return complex_pattern_check('/', pattern, value); +} + +/* + * Simple pattern match + * This checks whether @a is a 'simple-prefix' of @b, based on strings that + * consist of labels with @c as separator. This function returns true, if: + * - if @a and @b are equal + * - if @a is a prefix of @b, and the first following character in @b (or the + * last character in @a) is @c + * The second rule basically makes sure that if @a is a prefix of @b, then @b + * must follow with a new label separated by @c. It cannot extend the label. + */ +static bool simple_pattern_check(char c, const char *a, const char *b) { + bool separator = false; + + if (!a && !b) + return true; + + if (!a || !b) + return false; + + for (;;) { + if (*a != *b) + return *a == 0 && (*b == c || separator); + + if (*a == 0) + return true; + + separator = *a == c; + + a++, b++; + } +} + +bool namespace_simple_pattern(const char *pattern, const char *value) { + return simple_pattern_check('.', pattern, value); +} + +bool path_simple_pattern(const char *pattern, const char *value) { + return simple_pattern_check('/', pattern, value); +} + +int bus_message_type_from_string(const char *s, uint8_t *u) { + if (streq(s, "signal")) + *u = SD_BUS_MESSAGE_SIGNAL; + else if (streq(s, "method_call")) + *u = SD_BUS_MESSAGE_METHOD_CALL; + else if (streq(s, "error")) + *u = SD_BUS_MESSAGE_METHOD_ERROR; + else if (streq(s, "method_return")) + *u = SD_BUS_MESSAGE_METHOD_RETURN; + else + return -EINVAL; + + return 0; +} + +const char *bus_message_type_to_string(uint8_t u) { + if (u == SD_BUS_MESSAGE_SIGNAL) + return "signal"; + else if (u == SD_BUS_MESSAGE_METHOD_CALL) + return "method_call"; + else if (u == SD_BUS_MESSAGE_METHOD_ERROR) + return "error"; + else if (u == SD_BUS_MESSAGE_METHOD_RETURN) + return "method_return"; + else + return NULL; +} + +char *bus_address_escape(const char *v) { + const char *a; + char *r, *b; + + r = new(char, strlen(v)*3+1); + if (!r) + return NULL; + + for (a = v, b = r; *a; a++) { + + if (ascii_isdigit(*a) || + ascii_isalpha(*a) || + strchr("_-/.", *a)) + *(b++) = *a; + else { + *(b++) = '%'; + *(b++) = hexchar(*a >> 4); + *(b++) = hexchar(*a & 0xF); + } + } + + *b = 0; + return r; +} + +int bus_maybe_reply_error(sd_bus_message *m, int r, sd_bus_error *error) { + assert(m); + + if (sd_bus_error_is_set(error) || r < 0) { + if (m->header->type == SD_BUS_MESSAGE_METHOD_CALL) + sd_bus_reply_method_errno(m, r, error); + } else + return r; + + log_debug("Failed to process message type=%s sender=%s destination=%s path=%s interface=%s member=%s cookie=%" PRIu64 " reply_cookie=%" PRIu64 " signature=%s error-name=%s error-message=%s: %s", + bus_message_type_to_string(m->header->type), + strna(sd_bus_message_get_sender(m)), + strna(sd_bus_message_get_destination(m)), + strna(sd_bus_message_get_path(m)), + strna(sd_bus_message_get_interface(m)), + strna(sd_bus_message_get_member(m)), + BUS_MESSAGE_COOKIE(m), + m->reply_cookie, + strna(m->root_container.signature), + strna(m->error.name), + strna(m->error.message), + bus_error_message(error, r)); + + return 1; +} diff --git a/src/libsystemd/sd-bus/bus-internal.h b/src/libsystemd/sd-bus/bus-internal.h new file mode 100644 index 0000000..098a518 --- /dev/null +++ b/src/libsystemd/sd-bus/bus-internal.h @@ -0,0 +1,427 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include <pthread.h> + +#include "sd-bus.h" + +#include "bus-error.h" +#include "bus-kernel.h" +#include "bus-match.h" +#include "constants.h" +#include "hashmap.h" +#include "list.h" +#include "prioq.h" +#include "runtime-scope.h" +#include "socket-util.h" +#include "time-util.h" + +/* Note that we use the new /run prefix here (instead of /var/run) since we require them to be aliases and + * that way we become independent of /var being mounted */ +#define DEFAULT_SYSTEM_BUS_ADDRESS "unix:path=/run/dbus/system_bus_socket" +#define DEFAULT_USER_BUS_ADDRESS_FMT "unix:path=%s/bus" + +struct reply_callback { + sd_bus_message_handler_t callback; + usec_t timeout_usec; /* this is a relative timeout until we reach the BUS_HELLO state, and an absolute one right after */ + uint64_t cookie; + unsigned prioq_idx; +}; + +struct filter_callback { + sd_bus_message_handler_t callback; + + unsigned last_iteration; + + LIST_FIELDS(struct filter_callback, callbacks); +}; + +struct match_callback { + sd_bus_message_handler_t callback; + sd_bus_message_handler_t install_callback; + + sd_bus_slot *install_slot; /* The AddMatch() call */ + + unsigned last_iteration; + + /* Don't dispatch this slot with messages that arrived in any iteration before or at the this + * one. We use this to ensure that matches don't apply "retroactively" and confuse the caller: + * only messages received after the match was installed will be considered. */ + uint64_t after; + + char *match_string; + + struct bus_match_node *match_node; +}; + +struct node { + char *path; + struct node *parent; + LIST_HEAD(struct node, child); + LIST_FIELDS(struct node, siblings); + + LIST_HEAD(struct node_callback, callbacks); + LIST_HEAD(struct node_vtable, vtables); + LIST_HEAD(struct node_enumerator, enumerators); + LIST_HEAD(struct node_object_manager, object_managers); +}; + +struct node_callback { + struct node *node; + + bool is_fallback:1; + unsigned last_iteration; + + sd_bus_message_handler_t callback; + + LIST_FIELDS(struct node_callback, callbacks); +}; + +struct node_enumerator { + struct node *node; + + sd_bus_node_enumerator_t callback; + + unsigned last_iteration; + + LIST_FIELDS(struct node_enumerator, enumerators); +}; + +struct node_object_manager { + struct node *node; + + LIST_FIELDS(struct node_object_manager, object_managers); +}; + +struct node_vtable { + struct node *node; + + bool is_fallback:1; + unsigned last_iteration; + + char *interface; + const sd_bus_vtable *vtable; + sd_bus_object_find_t find; + + LIST_FIELDS(struct node_vtable, vtables); +}; + +struct vtable_member { + const char *path; + const char *interface; + const char *member; + struct node_vtable *parent; + unsigned last_iteration; + const sd_bus_vtable *vtable; +}; + +typedef enum BusSlotType { + BUS_REPLY_CALLBACK, + BUS_FILTER_CALLBACK, + BUS_MATCH_CALLBACK, + BUS_NODE_CALLBACK, + BUS_NODE_ENUMERATOR, + BUS_NODE_VTABLE, + BUS_NODE_OBJECT_MANAGER, + _BUS_SLOT_INVALID = -EINVAL, +} BusSlotType; + +struct sd_bus_slot { + unsigned n_ref; + BusSlotType type:8; + + /* Slots can be "floating" or not. If they are not floating (the usual case) then they reference the + * bus object they are associated with. This means the bus object stays allocated at least as long as + * there is a slot around associated with it. If it is floating, then the slot's lifecycle is bound + * to the lifecycle of the bus: it will be disconnected from the bus when the bus is destroyed, and + * it keeping the slot reffed hence won't mean the bus stays reffed too. Internally this means the + * reference direction is reversed: floating slots objects are referenced by the bus object, and not + * vice versa. */ + bool floating; + bool match_added; + + sd_bus *bus; + void *userdata; + sd_bus_destroy_t destroy_callback; + + char *description; + + LIST_FIELDS(sd_bus_slot, slots); + + union { + struct reply_callback reply_callback; + struct filter_callback filter_callback; + struct match_callback match_callback; + struct node_callback node_callback; + struct node_enumerator node_enumerator; + struct node_object_manager node_object_manager; + struct node_vtable node_vtable; + }; +}; + +enum bus_state { + BUS_UNSET, + BUS_WATCH_BIND, /* waiting for the socket to appear via inotify */ + BUS_OPENING, /* the kernel's connect() is still not ready */ + BUS_AUTHENTICATING, /* we are currently in the "SASL" authorization phase of dbus */ + BUS_HELLO, /* we are waiting for the Hello() response */ + BUS_RUNNING, + BUS_CLOSING, + BUS_CLOSED, + _BUS_STATE_MAX, +}; + +static inline bool BUS_IS_OPEN(enum bus_state state) { + return state > BUS_UNSET && state < BUS_CLOSING; +} + +enum bus_auth { + _BUS_AUTH_INVALID, + BUS_AUTH_EXTERNAL, + BUS_AUTH_ANONYMOUS +}; + +struct sd_bus { + unsigned n_ref; + + enum bus_state state; + int input_fd, output_fd; + int inotify_fd; + int message_version; + int message_endian; + + bool can_fds:1; + bool bus_client:1; + bool ucred_valid:1; + bool is_server:1; + bool anonymous_auth:1; + bool prefer_readv:1; + bool prefer_writev:1; + bool match_callbacks_modified:1; + bool filter_callbacks_modified:1; + bool nodes_modified:1; + bool trusted:1; + bool manual_peer_interface:1; + bool allow_interactive_authorization:1; + bool exit_on_disconnect:1; + bool exited:1; + bool exit_triggered:1; + bool is_local:1; + bool watch_bind:1; + bool is_monitor:1; + bool accept_fd:1; + bool attach_timestamp:1; + bool connected_signal:1; + bool close_on_exit:1; + + RuntimeScope runtime_scope; + + signed int use_memfd:2; + + void *rbuffer; + size_t rbuffer_size; + + sd_bus_message **rqueue; + size_t rqueue_size; + + sd_bus_message **wqueue; + size_t wqueue_size; + size_t windex; + + uint64_t cookie; + uint64_t read_counter; /* A counter for each incoming msg */ + + char *unique_name; + uint64_t unique_id; + + struct bus_match_node match_callbacks; + Prioq *reply_callbacks_prioq; + OrderedHashmap *reply_callbacks; + LIST_HEAD(struct filter_callback, filter_callbacks); + + Hashmap *nodes; + Hashmap *vtable_methods; + Hashmap *vtable_properties; + + union sockaddr_union sockaddr; + socklen_t sockaddr_size; + + pid_t nspid; + char *machine; + + sd_id128_t server_id; + + char *address; + unsigned address_index; + + int last_connect_error; + + enum bus_auth auth; + unsigned auth_index; + struct iovec auth_iovec[3]; + size_t auth_rbegin; + char *auth_buffer; + usec_t auth_timeout; + + struct ucred ucred; + char *label; + gid_t *groups; + size_t n_groups; + union sockaddr_union sockaddr_peer; + socklen_t sockaddr_size_peer; + + uint64_t creds_mask; + + int *fds; + size_t n_fds; + + char *exec_path; + char **exec_argv; + + /* We do locking around the memfd cache, since we want to + * allow people to process a sd_bus_message in a different + * thread then it was generated on and free it there. Since + * adding something to the memfd cache might happen when a + * message is released, we hence need to protect this bit with + * a mutex. */ + pthread_mutex_t memfd_cache_mutex; + struct memfd_cache memfd_cache[MEMFD_CACHE_MAX]; + unsigned n_memfd_cache; + + uint64_t origin_id; + pid_t busexec_pid; + + unsigned iteration_counter; + + sd_event_source *input_io_event_source; + sd_event_source *output_io_event_source; + sd_event_source *time_event_source; + sd_event_source *quit_event_source; + sd_event_source *inotify_event_source; + sd_event *event; + int event_priority; + + pid_t tid; + + sd_bus_message *current_message; + sd_bus_slot *current_slot; + sd_bus_message_handler_t current_handler; + void *current_userdata; + + sd_bus **default_bus_ptr; + + char *description; + char *patch_sender; + + sd_bus_track *track_queue; + + LIST_HEAD(sd_bus_slot, slots); + LIST_HEAD(sd_bus_track, tracks); + + int *inotify_watches; + size_t n_inotify_watches; + + /* zero means use value specified by $SYSTEMD_BUS_TIMEOUT= environment variable or built-in default */ + usec_t method_call_timeout; +}; + +/* For method calls we timeout at 25s, like in the D-Bus reference implementation */ +#define BUS_DEFAULT_TIMEOUT ((usec_t) (25 * USEC_PER_SEC)) + +/* For the authentication phase we grant 90s, to provide extra room during boot, when RNGs and such are not filled up + * with enough entropy yet and might delay the boot */ +#define BUS_AUTH_TIMEOUT ((usec_t) DEFAULT_TIMEOUT_USEC) + +#define BUS_WQUEUE_MAX (384*1024) +#define BUS_RQUEUE_MAX (384*1024) + +#define BUS_MESSAGE_SIZE_MAX (128*1024*1024) +#define BUS_AUTH_SIZE_MAX (64*1024) +/* Note that the D-Bus specification states that bus paths shall have no size limit. We enforce here one + * anyway, since truly unbounded strings are a security problem. The limit we pick is relatively large however, + * to not clash unnecessarily with real-life applications. */ +#define BUS_PATH_SIZE_MAX (64*1024) + +#define BUS_CONTAINER_DEPTH 128 + +/* Defined by the specification as maximum size of an array in bytes */ +#define BUS_ARRAY_MAX_SIZE 67108864 + +#define BUS_FDS_MAX 1024 + +#define BUS_EXEC_ARGV_MAX 256 + +bool interface_name_is_valid(const char *p) _pure_; +bool service_name_is_valid(const char *p) _pure_; +bool member_name_is_valid(const char *p) _pure_; +bool object_path_is_valid(const char *p) _pure_; + +char *object_path_startswith(const char *a, const char *b) _pure_; + +bool namespace_complex_pattern(const char *pattern, const char *value) _pure_; +bool path_complex_pattern(const char *pattern, const char *value) _pure_; + +bool namespace_simple_pattern(const char *pattern, const char *value) _pure_; +bool path_simple_pattern(const char *pattern, const char *value) _pure_; + +int bus_message_type_from_string(const char *s, uint8_t *u); +const char *bus_message_type_to_string(uint8_t u) _pure_; + +#define error_name_is_valid interface_name_is_valid + +sd_bus *bus_resolve(sd_bus *bus); + +int bus_ensure_running(sd_bus *bus); +int bus_start_running(sd_bus *bus); +int bus_next_address(sd_bus *bus); + +int bus_seal_synthetic_message(sd_bus *b, sd_bus_message *m); + +int bus_rqueue_make_room(sd_bus *bus); + +bool bus_origin_changed(sd_bus *bus); + +char *bus_address_escape(const char *v); + +int bus_attach_io_events(sd_bus *b); +int bus_attach_inotify_event(sd_bus *b); + +void bus_close_inotify_fd(sd_bus *b); +void bus_close_io_fds(sd_bus *b); + +int bus_add_match_full( + sd_bus *bus, + sd_bus_slot **slot, + bool asynchronous, + const char *match, + sd_bus_message_handler_t callback, + sd_bus_message_handler_t install_callback, + void *userdata, + uint64_t timeout_usec); + +#define OBJECT_PATH_FOREACH_PREFIX(prefix, path) \ + for (char *_slash = ({ strcpy((prefix), (path)); streq((prefix), "/") ? NULL : strrchr((prefix), '/'); }) ; \ + _slash && ((_slash[(_slash) == (prefix)] = 0), true); \ + _slash = streq((prefix), "/") ? NULL : strrchr((prefix), '/')) + +/* If we are invoking callbacks of a bus object, ensure unreffing the + * bus from the callback doesn't destroy the object we are working on */ +#define BUS_DONT_DESTROY(bus) \ + _cleanup_(sd_bus_unrefp) _unused_ sd_bus *_dont_destroy_##bus = sd_bus_ref(bus) + +int bus_set_address_system(sd_bus *bus); +int bus_set_address_user(sd_bus *bus); +int bus_set_address_system_remote(sd_bus *b, const char *host); +int bus_set_address_machine(sd_bus *b, RuntimeScope runtime_scope, const char *machine); + +int bus_maybe_reply_error(sd_bus_message *m, int r, sd_bus_error *error); + +#define bus_assert_return(expr, r, error) \ + do { \ + if (!assert_log(expr, #expr)) \ + return sd_bus_error_set_errno(error, r); \ + } while (false) + +void bus_enter_closing(sd_bus *bus); + +void bus_set_state(sd_bus *bus, enum bus_state state); diff --git a/src/libsystemd/sd-bus/bus-introspect.c b/src/libsystemd/sd-bus/bus-introspect.c new file mode 100644 index 0000000..84c8774 --- /dev/null +++ b/src/libsystemd/sd-bus/bus-introspect.c @@ -0,0 +1,290 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include "bus-internal.h" +#include "bus-introspect.h" +#include "bus-objects.h" +#include "bus-protocol.h" +#include "bus-signature.h" +#include "fd-util.h" +#include "fileio.h" +#include "memory-util.h" +#include "memstream-util.h" +#include "string-util.h" + +#define BUS_INTROSPECT_DOCTYPE \ + "<!DOCTYPE node PUBLIC \"-//freedesktop//DTD D-BUS Object Introspection 1.0//EN\"\n" \ + "\"https://www.freedesktop.org/standards/dbus/1.0/introspect.dtd\">\n" + +#define BUS_INTROSPECT_INTERFACE_PEER \ + " <interface name=\"org.freedesktop.DBus.Peer\">\n" \ + " <method name=\"Ping\"/>\n" \ + " <method name=\"GetMachineId\">\n" \ + " <arg type=\"s\" name=\"machine_uuid\" direction=\"out\"/>\n" \ + " </method>\n" \ + " </interface>\n" + +#define BUS_INTROSPECT_INTERFACE_INTROSPECTABLE \ + " <interface name=\"org.freedesktop.DBus.Introspectable\">\n" \ + " <method name=\"Introspect\">\n" \ + " <arg name=\"xml_data\" type=\"s\" direction=\"out\"/>\n" \ + " </method>\n" \ + " </interface>\n" + +#define BUS_INTROSPECT_INTERFACE_PROPERTIES \ + " <interface name=\"org.freedesktop.DBus.Properties\">\n" \ + " <method name=\"Get\">\n" \ + " <arg name=\"interface_name\" direction=\"in\" type=\"s\"/>\n" \ + " <arg name=\"property_name\" direction=\"in\" type=\"s\"/>\n" \ + " <arg name=\"value\" direction=\"out\" type=\"v\"/>\n" \ + " </method>\n" \ + " <method name=\"GetAll\">\n" \ + " <arg name=\"interface_name\" direction=\"in\" type=\"s\"/>\n" \ + " <arg name=\"props\" direction=\"out\" type=\"a{sv}\"/>\n" \ + " </method>\n" \ + " <method name=\"Set\">\n" \ + " <arg name=\"interface_name\" direction=\"in\" type=\"s\"/>\n" \ + " <arg name=\"property_name\" direction=\"in\" type=\"s\"/>\n" \ + " <arg name=\"value\" direction=\"in\" type=\"v\"/>\n" \ + " </method>\n" \ + " <signal name=\"PropertiesChanged\">\n" \ + " <arg type=\"s\" name=\"interface_name\"/>\n" \ + " <arg type=\"a{sv}\" name=\"changed_properties\"/>\n" \ + " <arg type=\"as\" name=\"invalidated_properties\"/>\n" \ + " </signal>\n" \ + " </interface>\n" + +#define BUS_INTROSPECT_INTERFACE_OBJECT_MANAGER \ + " <interface name=\"org.freedesktop.DBus.ObjectManager\">\n" \ + " <method name=\"GetManagedObjects\">\n" \ + " <arg type=\"a{oa{sa{sv}}}\" name=\"object_paths_interfaces_and_properties\" direction=\"out\"/>\n" \ + " </method>\n" \ + " <signal name=\"InterfacesAdded\">\n" \ + " <arg type=\"o\" name=\"object_path\"/>\n" \ + " <arg type=\"a{sa{sv}}\" name=\"interfaces_and_properties\"/>\n" \ + " </signal>\n" \ + " <signal name=\"InterfacesRemoved\">\n" \ + " <arg type=\"o\" name=\"object_path\"/>\n" \ + " <arg type=\"as\" name=\"interfaces\"/>\n" \ + " </signal>\n" \ + " </interface>\n" + +int introspect_begin(struct introspect *i, bool trusted) { + FILE *f; + + assert(i); + + *i = (struct introspect) { + .trusted = trusted, + }; + + f = memstream_init(&i->m); + if (!f) + return -ENOMEM; + + fputs(BUS_INTROSPECT_DOCTYPE + "<node>\n", f); + + return 0; +} + +int introspect_write_default_interfaces(struct introspect *i, bool object_manager) { + assert(i); + assert(i->m.f); + + fputs(BUS_INTROSPECT_INTERFACE_PEER + BUS_INTROSPECT_INTERFACE_INTROSPECTABLE + BUS_INTROSPECT_INTERFACE_PROPERTIES, i->m.f); + + if (object_manager) + fputs(BUS_INTROSPECT_INTERFACE_OBJECT_MANAGER, i->m.f); + + return 0; +} + +static int set_interface_name(struct introspect *i, const char *interface_name) { + assert(i); + assert(i->m.f); + + if (streq_ptr(i->interface_name, interface_name)) + return 0; + + if (i->interface_name) + fputs(" </interface>\n", i->m.f); + + if (interface_name) + fprintf(i->m.f, " <interface name=\"%s\">\n", interface_name); + + return free_and_strdup(&i->interface_name, interface_name); +} + +int introspect_write_child_nodes(struct introspect *i, OrderedSet *s, const char *prefix) { + char *node; + + assert(i); + assert(i->m.f); + assert(prefix); + + assert_se(set_interface_name(i, NULL) >= 0); + + while ((node = ordered_set_steal_first(s))) { + const char *e; + + e = object_path_startswith(node, prefix); + if (e && e[0]) + fprintf(i->m.f, " <node name=\"%s\"/>\n", e); + + free(node); + } + + return 0; +} + +static void introspect_write_flags(struct introspect *i, int type, uint64_t flags) { + assert(i); + assert(i->m.f); + + if (flags & SD_BUS_VTABLE_DEPRECATED) + fputs(" <annotation name=\"org.freedesktop.DBus.Deprecated\" value=\"true\"/>\n", i->m.f); + + if (type == _SD_BUS_VTABLE_METHOD && (flags & SD_BUS_VTABLE_METHOD_NO_REPLY)) + fputs(" <annotation name=\"org.freedesktop.DBus.Method.NoReply\" value=\"true\"/>\n", i->m.f); + + if (IN_SET(type, _SD_BUS_VTABLE_PROPERTY, _SD_BUS_VTABLE_WRITABLE_PROPERTY)) { + if (flags & SD_BUS_VTABLE_PROPERTY_EXPLICIT) + fputs(" <annotation name=\"org.freedesktop.systemd1.Explicit\" value=\"true\"/>\n", i->m.f); + + if (flags & SD_BUS_VTABLE_PROPERTY_CONST) + fputs(" <annotation name=\"org.freedesktop.DBus.Property.EmitsChangedSignal\" value=\"const\"/>\n", i->m.f); + else if (flags & SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION) + fputs(" <annotation name=\"org.freedesktop.DBus.Property.EmitsChangedSignal\" value=\"invalidates\"/>\n", i->m.f); + else if (!(flags & SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE)) + fputs(" <annotation name=\"org.freedesktop.DBus.Property.EmitsChangedSignal\" value=\"false\"/>\n", i->m.f); + } + + if (!i->trusted && + IN_SET(type, _SD_BUS_VTABLE_METHOD, _SD_BUS_VTABLE_WRITABLE_PROPERTY) && + !(flags & SD_BUS_VTABLE_UNPRIVILEGED)) + fputs(" <annotation name=\"org.freedesktop.systemd1.Privileged\" value=\"true\"/>\n", i->m.f); +} + +/* Note that "names" is both an input and an output parameter. It initially points to the first argument name in a + NULL-separated list of strings, and is then advanced with each argument, and the resulting pointer is returned. */ +static int introspect_write_arguments(struct introspect *i, const char *signature, const char **names, const char *direction) { + int r; + + assert(i); + assert(i->m.f); + + for (;;) { + size_t l; + + if (!*signature) + return 0; + + r = signature_element_length(signature, &l); + if (r < 0) + return r; + + fprintf(i->m.f, " <arg type=\"%.*s\"", (int) l, signature); + + if (**names != '\0') { + fprintf(i->m.f, " name=\"%s\"", *names); + *names += strlen(*names) + 1; + } + + if (direction) + fprintf(i->m.f, " direction=\"%s\"/>\n", direction); + else + fputs("/>\n", i->m.f); + + signature += l; + } +} + +int introspect_write_interface( + struct introspect *i, + const char *interface_name, + const sd_bus_vtable *v) { + + const sd_bus_vtable *vtable = ASSERT_PTR(v); + const char *names = ""; + int r; + + assert(i); + assert(i->m.f); + assert(interface_name); + + r = set_interface_name(i, interface_name); + if (r < 0) + return r; + + for (; v->type != _SD_BUS_VTABLE_END; v = bus_vtable_next(vtable, v)) { + + /* Ignore methods, signals and properties that are + * marked "hidden", but do show the interface + * itself */ + + if (v->type != _SD_BUS_VTABLE_START && (v->flags & SD_BUS_VTABLE_HIDDEN)) + continue; + + switch (v->type) { + + case _SD_BUS_VTABLE_START: + if (v->flags & SD_BUS_VTABLE_DEPRECATED) + fputs(" <annotation name=\"org.freedesktop.DBus.Deprecated\" value=\"true\"/>\n", i->m.f); + break; + + case _SD_BUS_VTABLE_METHOD: + fprintf(i->m.f, " <method name=\"%s\">\n", v->x.method.member); + if (bus_vtable_has_names(vtable)) + names = strempty(v->x.method.names); + introspect_write_arguments(i, strempty(v->x.method.signature), &names, "in"); + introspect_write_arguments(i, strempty(v->x.method.result), &names, "out"); + introspect_write_flags(i, v->type, v->flags); + fputs(" </method>\n", i->m.f); + break; + + case _SD_BUS_VTABLE_PROPERTY: + case _SD_BUS_VTABLE_WRITABLE_PROPERTY: + fprintf(i->m.f, " <property name=\"%s\" type=\"%s\" access=\"%s\">\n", + v->x.property.member, + v->x.property.signature, + v->type == _SD_BUS_VTABLE_WRITABLE_PROPERTY ? "readwrite" : "read"); + introspect_write_flags(i, v->type, v->flags); + fputs(" </property>\n", i->m.f); + break; + + case _SD_BUS_VTABLE_SIGNAL: + fprintf(i->m.f, " <signal name=\"%s\">\n", v->x.signal.member); + if (bus_vtable_has_names(vtable)) + names = strempty(v->x.signal.names); + introspect_write_arguments(i, strempty(v->x.signal.signature), &names, NULL); + introspect_write_flags(i, v->type, v->flags); + fputs(" </signal>\n", i->m.f); + break; + } + + } + + return 0; +} + +int introspect_finish(struct introspect *i, char **ret) { + assert(i); + assert(i->m.f); + + assert_se(set_interface_name(i, NULL) >= 0); + + fputs("</node>\n", i->m.f); + + return memstream_finalize(&i->m, ret, NULL); +} + +void introspect_done(struct introspect *i) { + assert(i); + + /* Normally introspect_finish() does all the work, this is just a backup for error paths */ + + memstream_done(&i->m); + free(i->interface_name); +} diff --git a/src/libsystemd/sd-bus/bus-introspect.h b/src/libsystemd/sd-bus/bus-introspect.h new file mode 100644 index 0000000..83bcfb2 --- /dev/null +++ b/src/libsystemd/sd-bus/bus-introspect.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include <stdio.h> + +#include "sd-bus.h" + +#include "memstream-util.h" +#include "ordered-set.h" + +struct introspect { + MemStream m; + char *interface_name; + bool trusted; +}; + +int introspect_begin(struct introspect *i, bool trusted); +int introspect_write_default_interfaces(struct introspect *i, bool object_manager); +int introspect_write_child_nodes(struct introspect *i, OrderedSet *s, const char *prefix); +int introspect_write_interface( + struct introspect *i, + const char *interface_name, + const sd_bus_vtable *v); +int introspect_finish(struct introspect *i, char **ret); +void introspect_done(struct introspect *i); diff --git a/src/libsystemd/sd-bus/bus-kernel.c b/src/libsystemd/sd-bus/bus-kernel.c new file mode 100644 index 0000000..d7ff834 --- /dev/null +++ b/src/libsystemd/sd-bus/bus-kernel.c @@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#if HAVE_VALGRIND_MEMCHECK_H +#include <valgrind/memcheck.h> +#endif + +#include <fcntl.h> +#include <malloc.h> +#include <sys/mman.h> +#include <sys/prctl.h> + +#include "alloc-util.h" +#include "bus-internal.h" +#include "bus-kernel.h" +#include "bus-label.h" +#include "bus-message.h" +#include "capability-util.h" +#include "fd-util.h" +#include "fileio.h" +#include "format-util.h" +#include "memfd-util.h" +#include "parse-util.h" +#include "stdio-util.h" +#include "string-util.h" +#include "strv.h" +#include "user-util.h" +#include "memory-util.h" + +void close_and_munmap(int fd, void *address, size_t size) { + if (size > 0) { + size = PAGE_ALIGN(size); + assert(size < SIZE_MAX); + assert_se(munmap(address, size) >= 0); + } + + safe_close(fd); +} + +void bus_flush_memfd(sd_bus *b) { + assert(b); + + for (unsigned i = 0; i < b->n_memfd_cache; i++) + close_and_munmap(b->memfd_cache[i].fd, b->memfd_cache[i].address, b->memfd_cache[i].mapped); +} diff --git a/src/libsystemd/sd-bus/bus-kernel.h b/src/libsystemd/sd-bus/bus-kernel.h new file mode 100644 index 0000000..be8e0ce --- /dev/null +++ b/src/libsystemd/sd-bus/bus-kernel.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include "sd-bus.h" + +#define MEMFD_CACHE_MAX 32 + +/* When we cache a memfd block for reuse, we will truncate blocks + * longer than this in order not to keep too much data around. */ +#define MEMFD_CACHE_ITEM_SIZE_MAX (128*1024) + +/* This determines at which minimum size we prefer sending memfds over + * sending vectors */ +#define MEMFD_MIN_SIZE (512*1024) + +struct memfd_cache { + int fd; + void *address; + size_t mapped; + size_t allocated; +}; + +void close_and_munmap(int fd, void *address, size_t size); +void bus_flush_memfd(sd_bus *bus); diff --git a/src/libsystemd/sd-bus/bus-match.c b/src/libsystemd/sd-bus/bus-match.c new file mode 100644 index 0000000..606304d --- /dev/null +++ b/src/libsystemd/sd-bus/bus-match.c @@ -0,0 +1,1058 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include "alloc-util.h" +#include "bus-internal.h" +#include "bus-match.h" +#include "bus-message.h" +#include "fd-util.h" +#include "fileio.h" +#include "hexdecoct.h" +#include "memstream-util.h" +#include "sort-util.h" +#include "string-util.h" +#include "strv.h" + +/* Example: + * + * A: type=signal,sender=foo,interface=bar + * B: type=signal,sender=quux,interface=fips + * C: type=signal,sender=quux,interface=waldo + * D: type=signal,member=test + * E: sender=miau + * F: type=signal + * G: type=signal + * + * results in this tree: + * + * BUS_MATCH_ROOT + * + BUS_MATCH_MESSAGE_TYPE + * | ` BUS_MATCH_VALUE: value == signal + * | + DBUS_MATCH_SENDER + * | | + BUS_MATCH_VALUE: value == foo + * | | | ` DBUS_MATCH_INTERFACE + * | | | ` BUS_MATCH_VALUE: value == bar + * | | | ` BUS_MATCH_LEAF: A + * | | ` BUS_MATCH_VALUE: value == quux + * | | ` DBUS_MATCH_INTERFACE + * | | | BUS_MATCH_VALUE: value == fips + * | | | ` BUS_MATCH_LEAF: B + * | | ` BUS_MATCH_VALUE: value == waldo + * | | ` BUS_MATCH_LEAF: C + * | + DBUS_MATCH_MEMBER + * | | ` BUS_MATCH_VALUE: value == test + * | | ` BUS_MATCH_LEAF: D + * | + BUS_MATCH_LEAF: F + * | ` BUS_MATCH_LEAF: G + * ` BUS_MATCH_SENDER + * ` BUS_MATCH_VALUE: value == miau + * ` BUS_MATCH_LEAF: E + */ + +static bool BUS_MATCH_IS_COMPARE(enum bus_match_node_type t) { + return t >= BUS_MATCH_SENDER && t <= BUS_MATCH_ARG_HAS_LAST; +} + +static bool BUS_MATCH_CAN_HASH(enum bus_match_node_type t) { + return (t >= BUS_MATCH_MESSAGE_TYPE && t <= BUS_MATCH_PATH) || + (t >= BUS_MATCH_ARG && t <= BUS_MATCH_ARG_LAST) || + (t >= BUS_MATCH_ARG_HAS && t <= BUS_MATCH_ARG_HAS_LAST); +} + +static void bus_match_node_free(struct bus_match_node *node) { + assert(node); + assert(node->parent); + assert(!node->child); + assert(node->type != BUS_MATCH_ROOT); + assert(node->type < _BUS_MATCH_NODE_TYPE_MAX); + + if (node->parent->child) { + /* We are apparently linked into the parent's child + * list. Let's remove us from there. */ + if (node->prev) { + assert(node->prev->next == node); + node->prev->next = node->next; + } else { + assert(node->parent->child == node); + node->parent->child = node->next; + } + + if (node->next) + node->next->prev = node->prev; + } + + if (node->type == BUS_MATCH_VALUE) { + /* We might be in the parent's hash table, so clean + * this up */ + + if (node->parent->type == BUS_MATCH_MESSAGE_TYPE) + hashmap_remove(node->parent->compare.children, UINT_TO_PTR(node->value.u8)); + else if (BUS_MATCH_CAN_HASH(node->parent->type) && node->value.str) + hashmap_remove(node->parent->compare.children, node->value.str); + + free(node->value.str); + } + + if (BUS_MATCH_IS_COMPARE(node->type)) { + assert(hashmap_isempty(node->compare.children)); + hashmap_free(node->compare.children); + } + + free(node); +} + +static bool bus_match_node_maybe_free(struct bus_match_node *node) { + assert(node); + + if (node->type == BUS_MATCH_ROOT) + return false; + + if (node->child) + return false; + + if (BUS_MATCH_IS_COMPARE(node->type) && !hashmap_isempty(node->compare.children)) + return true; + + bus_match_node_free(node); + return true; +} + +static bool value_node_test( + struct bus_match_node *node, + enum bus_match_node_type parent_type, + uint8_t value_u8, + const char *value_str, + char **value_strv, + sd_bus_message *m) { + + assert(node); + assert(node->type == BUS_MATCH_VALUE); + + /* Tests parameters against this value node, doing prefix + * magic and stuff. */ + + switch (parent_type) { + + case BUS_MATCH_MESSAGE_TYPE: + return node->value.u8 == value_u8; + + case BUS_MATCH_SENDER: + if (streq_ptr(node->value.str, value_str)) + return true; + + if (m->creds.mask & SD_BUS_CREDS_WELL_KNOWN_NAMES) { + /* on kdbus we have the well known names list + * in the credentials, let's make use of that + * for an accurate match */ + + STRV_FOREACH(i, m->creds.well_known_names) + if (streq_ptr(node->value.str, *i)) + return true; + + } else { + + /* If we don't have kdbus, we don't know the + * well-known names of the senders. In that, + * let's just hope that dbus-daemon doesn't + * send us stuff we didn't want. */ + + if (node->value.str[0] != ':' && value_str && value_str[0] == ':') + return true; + } + + return false; + + case BUS_MATCH_DESTINATION: + case BUS_MATCH_INTERFACE: + case BUS_MATCH_MEMBER: + case BUS_MATCH_PATH: + case BUS_MATCH_ARG ... BUS_MATCH_ARG_LAST: + + if (value_str) + return streq_ptr(node->value.str, value_str); + + return false; + + case BUS_MATCH_ARG_HAS ... BUS_MATCH_ARG_HAS_LAST: { + STRV_FOREACH(i, value_strv) + if (streq_ptr(node->value.str, *i)) + return true; + + return false; + } + + case BUS_MATCH_ARG_NAMESPACE ... BUS_MATCH_ARG_NAMESPACE_LAST: + if (value_str) + return namespace_simple_pattern(node->value.str, value_str); + + return false; + + case BUS_MATCH_PATH_NAMESPACE: + return path_simple_pattern(node->value.str, value_str); + + case BUS_MATCH_ARG_PATH ... BUS_MATCH_ARG_PATH_LAST: + if (value_str) + return path_complex_pattern(node->value.str, value_str); + + return false; + + default: + assert_not_reached(); + } +} + +static bool value_node_same( + struct bus_match_node *node, + enum bus_match_node_type parent_type, + uint8_t value_u8, + const char *value_str) { + + /* Tests parameters against this value node, not doing prefix + * magic and stuff, i.e. this one actually compares the match + * itself. */ + + assert(node); + assert(node->type == BUS_MATCH_VALUE); + + switch (parent_type) { + + case BUS_MATCH_MESSAGE_TYPE: + return node->value.u8 == value_u8; + + case BUS_MATCH_SENDER: + case BUS_MATCH_DESTINATION: + case BUS_MATCH_INTERFACE: + case BUS_MATCH_MEMBER: + case BUS_MATCH_PATH: + case BUS_MATCH_ARG ... BUS_MATCH_ARG_LAST: + case BUS_MATCH_ARG_HAS ... BUS_MATCH_ARG_HAS_LAST: + case BUS_MATCH_ARG_NAMESPACE ... BUS_MATCH_ARG_NAMESPACE_LAST: + case BUS_MATCH_PATH_NAMESPACE: + case BUS_MATCH_ARG_PATH ... BUS_MATCH_ARG_PATH_LAST: + return streq(node->value.str, value_str); + + default: + assert_not_reached(); + } +} + +int bus_match_run( + sd_bus *bus, + struct bus_match_node *node, + sd_bus_message *m) { + + _cleanup_strv_free_ char **test_strv = NULL; + const char *test_str = NULL; + uint8_t test_u8 = 0; + int r; + + assert(m); + + if (!node) + return 0; + + if (bus && bus->match_callbacks_modified) + return 0; + + /* Not these special semantics: when traversing the tree we + * usually let bus_match_run() when called for a node + * recursively invoke bus_match_run(). There's are two + * exceptions here though, which are BUS_NODE_ROOT (which + * cannot have a sibling), and BUS_NODE_VALUE (whose siblings + * are invoked anyway by its parent. */ + + switch (node->type) { + + case BUS_MATCH_ROOT: + + /* Run all children. Since we cannot have any siblings + * we won't call any. The children of the root node + * are compares or leaves, they will automatically + * call their siblings. */ + return bus_match_run(bus, node->child, m); + + case BUS_MATCH_VALUE: + + /* Run all children. We don't execute any siblings, we + * assume our caller does that. The children of value + * nodes are compares or leaves, they will + * automatically call their siblings */ + + assert(node->child); + return bus_match_run(bus, node->child, m); + + case BUS_MATCH_LEAF: + + if (bus) { + /* Don't run this match as long as the AddMatch() call is not complete yet. + * + * Don't run this match unless the 'after' counter has been reached. + * + * Don't run this match more than once per iteration */ + + if (node->leaf.callback->install_slot || + m->read_counter <= node->leaf.callback->after || + node->leaf.callback->last_iteration == bus->iteration_counter) + return bus_match_run(bus, node->next, m); + + node->leaf.callback->last_iteration = bus->iteration_counter; + } + + r = sd_bus_message_rewind(m, true); + if (r < 0) + return r; + + /* Run the callback. And then invoke siblings. */ + if (node->leaf.callback->callback) { + _cleanup_(sd_bus_error_free) sd_bus_error error_buffer = SD_BUS_ERROR_NULL; + sd_bus_slot *slot; + + slot = container_of(node->leaf.callback, sd_bus_slot, match_callback); + if (bus) { + bus->current_slot = sd_bus_slot_ref(slot); + bus->current_handler = node->leaf.callback->callback; + bus->current_userdata = slot->userdata; + } + r = node->leaf.callback->callback(m, slot->userdata, &error_buffer); + if (bus) { + bus->current_userdata = NULL; + bus->current_handler = NULL; + bus->current_slot = sd_bus_slot_unref(slot); + } + + r = bus_maybe_reply_error(m, r, &error_buffer); + if (r != 0) + return r; + + if (bus && bus->match_callbacks_modified) + return 0; + } + + return bus_match_run(bus, node->next, m); + + case BUS_MATCH_MESSAGE_TYPE: + test_u8 = m->header->type; + break; + + case BUS_MATCH_SENDER: + test_str = m->sender; + /* FIXME: resolve test_str from a well-known to a unique name first */ + break; + + case BUS_MATCH_DESTINATION: + test_str = m->destination; + break; + + case BUS_MATCH_INTERFACE: + test_str = m->interface; + break; + + case BUS_MATCH_MEMBER: + test_str = m->member; + break; + + case BUS_MATCH_PATH: + case BUS_MATCH_PATH_NAMESPACE: + test_str = m->path; + break; + + case BUS_MATCH_ARG ... BUS_MATCH_ARG_LAST: + (void) bus_message_get_arg(m, node->type - BUS_MATCH_ARG, &test_str); + break; + + case BUS_MATCH_ARG_PATH ... BUS_MATCH_ARG_PATH_LAST: + (void) bus_message_get_arg(m, node->type - BUS_MATCH_ARG_PATH, &test_str); + break; + + case BUS_MATCH_ARG_NAMESPACE ... BUS_MATCH_ARG_NAMESPACE_LAST: + (void) bus_message_get_arg(m, node->type - BUS_MATCH_ARG_NAMESPACE, &test_str); + break; + + case BUS_MATCH_ARG_HAS ... BUS_MATCH_ARG_HAS_LAST: + (void) bus_message_get_arg_strv(m, node->type - BUS_MATCH_ARG_HAS, &test_strv); + break; + + default: + assert_not_reached(); + } + + if (BUS_MATCH_CAN_HASH(node->type)) { + struct bus_match_node *found; + + /* Lookup via hash table, nice! So let's jump directly. */ + + if (test_str) + found = hashmap_get(node->compare.children, test_str); + else if (test_strv) { + STRV_FOREACH(i, test_strv) { + found = hashmap_get(node->compare.children, *i); + if (found) { + r = bus_match_run(bus, found, m); + if (r != 0) + return r; + } + } + + found = NULL; + } else if (node->type == BUS_MATCH_MESSAGE_TYPE) + found = hashmap_get(node->compare.children, UINT_TO_PTR(test_u8)); + else + found = NULL; + + if (found) { + r = bus_match_run(bus, found, m); + if (r != 0) + return r; + } + } else + /* No hash table, so let's iterate manually... */ + for (struct bus_match_node *c = node->child; c; c = c->next) { + if (!value_node_test(c, node->type, test_u8, test_str, test_strv, m)) + continue; + + r = bus_match_run(bus, c, m); + if (r != 0) + return r; + + if (bus && bus->match_callbacks_modified) + return 0; + } + + if (bus && bus->match_callbacks_modified) + return 0; + + /* And now, let's invoke our siblings */ + return bus_match_run(bus, node->next, m); +} + +static int bus_match_add_compare_value( + struct bus_match_node *where, + enum bus_match_node_type t, + uint8_t value_u8, + const char *value_str, + struct bus_match_node **ret) { + + struct bus_match_node *c, *n = NULL; + int r; + + assert(where); + assert(IN_SET(where->type, BUS_MATCH_ROOT, BUS_MATCH_VALUE)); + assert(BUS_MATCH_IS_COMPARE(t)); + assert(ret); + + for (c = where->child; c && c->type != t; c = c->next) + ; + + if (c) { + /* Comparison node already exists? Then let's see if the value node exists too. */ + + if (t == BUS_MATCH_MESSAGE_TYPE) + n = hashmap_get(c->compare.children, UINT_TO_PTR(value_u8)); + else if (BUS_MATCH_CAN_HASH(t)) + n = hashmap_get(c->compare.children, value_str); + else + for (n = c->child; n && !value_node_same(n, t, value_u8, value_str); n = n->next) + ; + + if (n) { + *ret = n; + return 0; + } + } else { + /* Comparison node, doesn't exist yet? Then let's create it. */ + + c = new0(struct bus_match_node, 1); + if (!c) { + r = -ENOMEM; + goto fail; + } + + c->type = t; + c->parent = where; + c->next = where->child; + if (c->next) + c->next->prev = c; + where->child = c; + + if (t == BUS_MATCH_MESSAGE_TYPE) { + c->compare.children = hashmap_new(NULL); + if (!c->compare.children) { + r = -ENOMEM; + goto fail; + } + } else if (BUS_MATCH_CAN_HASH(t)) { + c->compare.children = hashmap_new(&string_hash_ops); + if (!c->compare.children) { + r = -ENOMEM; + goto fail; + } + } + } + + n = new0(struct bus_match_node, 1); + if (!n) { + r = -ENOMEM; + goto fail; + } + + n->type = BUS_MATCH_VALUE; + n->value.u8 = value_u8; + if (value_str) { + n->value.str = strdup(value_str); + if (!n->value.str) { + r = -ENOMEM; + goto fail; + } + } + + n->parent = c; + if (c->compare.children) { + + if (t == BUS_MATCH_MESSAGE_TYPE) + r = hashmap_put(c->compare.children, UINT_TO_PTR(value_u8), n); + else + r = hashmap_put(c->compare.children, n->value.str, n); + + if (r < 0) + goto fail; + } else { + n->next = c->child; + if (n->next) + n->next->prev = n; + c->child = n; + } + + *ret = n; + return 1; + +fail: + if (c) + bus_match_node_maybe_free(c); + + if (n) { + free(n->value.str); + free(n); + } + + return r; +} + +static int bus_match_add_leaf( + struct bus_match_node *where, + struct match_callback *callback) { + + struct bus_match_node *n; + + assert(where); + assert(IN_SET(where->type, BUS_MATCH_ROOT, BUS_MATCH_VALUE)); + assert(callback); + + n = new0(struct bus_match_node, 1); + if (!n) + return -ENOMEM; + + n->type = BUS_MATCH_LEAF; + n->parent = where; + n->next = where->child; + if (n->next) + n->next->prev = n; + + n->leaf.callback = callback; + callback->match_node = n; + + where->child = n; + + return 1; +} + +enum bus_match_node_type bus_match_node_type_from_string(const char *k, size_t n) { + assert(k); + + if (n == 4 && startswith(k, "type")) + return BUS_MATCH_MESSAGE_TYPE; + if (n == 6 && startswith(k, "sender")) + return BUS_MATCH_SENDER; + if (n == 11 && startswith(k, "destination")) + return BUS_MATCH_DESTINATION; + if (n == 9 && startswith(k, "interface")) + return BUS_MATCH_INTERFACE; + if (n == 6 && startswith(k, "member")) + return BUS_MATCH_MEMBER; + if (n == 4 && startswith(k, "path")) + return BUS_MATCH_PATH; + if (n == 14 && startswith(k, "path_namespace")) + return BUS_MATCH_PATH_NAMESPACE; + + if (n == 4 && startswith(k, "arg")) { + int j; + + j = undecchar(k[3]); + if (j < 0) + return -EINVAL; + + return BUS_MATCH_ARG + j; + } + + if (n == 5 && startswith(k, "arg")) { + int a, b; + enum bus_match_node_type t; + + a = undecchar(k[3]); + b = undecchar(k[4]); + if (a <= 0 || b < 0) + return -EINVAL; + + t = BUS_MATCH_ARG + a * 10 + b; + if (t > BUS_MATCH_ARG_LAST) + return -EINVAL; + + return t; + } + + if (n == 8 && startswith(k, "arg") && startswith(k + 4, "path")) { + int j; + + j = undecchar(k[3]); + if (j < 0) + return -EINVAL; + + return BUS_MATCH_ARG_PATH + j; + } + + if (n == 9 && startswith(k, "arg") && startswith(k + 5, "path")) { + enum bus_match_node_type t; + int a, b; + + a = undecchar(k[3]); + b = undecchar(k[4]); + if (a <= 0 || b < 0) + return -EINVAL; + + t = BUS_MATCH_ARG_PATH + a * 10 + b; + if (t > BUS_MATCH_ARG_PATH_LAST) + return -EINVAL; + + return t; + } + + if (n == 13 && startswith(k, "arg") && startswith(k + 4, "namespace")) { + int j; + + j = undecchar(k[3]); + if (j < 0) + return -EINVAL; + + return BUS_MATCH_ARG_NAMESPACE + j; + } + + if (n == 14 && startswith(k, "arg") && startswith(k + 5, "namespace")) { + enum bus_match_node_type t; + int a, b; + + a = undecchar(k[3]); + b = undecchar(k[4]); + if (a <= 0 || b < 0) + return -EINVAL; + + t = BUS_MATCH_ARG_NAMESPACE + a * 10 + b; + if (t > BUS_MATCH_ARG_NAMESPACE_LAST) + return -EINVAL; + + return t; + } + + if (n == 7 && startswith(k, "arg") && startswith(k + 4, "has")) { + int j; + + j = undecchar(k[3]); + if (j < 0) + return -EINVAL; + + return BUS_MATCH_ARG_HAS + j; + } + + if (n == 8 && startswith(k, "arg") && startswith(k + 5, "has")) { + enum bus_match_node_type t; + int a, b; + + a = undecchar(k[3]); + b = undecchar(k[4]); + if (a <= 0 || b < 0) + return -EINVAL; + + t = BUS_MATCH_ARG_HAS + a * 10 + b; + if (t > BUS_MATCH_ARG_HAS_LAST) + return -EINVAL; + + return t; + } + + return -EINVAL; +} + +static int match_component_compare(const struct bus_match_component *a, const struct bus_match_component *b) { + return CMP(a->type, b->type); +} + +void bus_match_parse_free(struct bus_match_component *components, size_t n_components) { + for (size_t i = 0; i < n_components; i++) + free(components[i].value_str); + + free(components); +} + +int bus_match_parse( + const char *match, + struct bus_match_component **ret_components, + size_t *ret_n_components) { + + struct bus_match_component *components = NULL; + size_t n_components = 0; + int r; + + assert(match); + assert(ret_components); + assert(ret_n_components); + + CLEANUP_ARRAY(components, n_components, bus_match_parse_free); + + while (*match != '\0') { + const char *eq, *q; + enum bus_match_node_type t; + size_t j = 0; + _cleanup_free_ char *value = NULL; + bool escaped = false, quoted; + uint8_t u; + + /* Avahi's match rules appear to include whitespace, skip over it */ + match += strspn(match, " "); + + eq = strchr(match, '='); + if (!eq) + return -EINVAL; + + t = bus_match_node_type_from_string(match, eq - match); + if (t < 0) + return -EINVAL; + + quoted = eq[1] == '\''; + + for (q = eq + 1 + quoted;; q++) { + + if (*q == '\0') { + + if (quoted) + return -EINVAL; + + if (value) + value[j] = '\0'; + break; + } + + if (!escaped) { + if (*q == '\\') { + escaped = true; + continue; + } + + if (quoted) { + if (*q == '\'') { + if (value) + value[j] = '\0'; + break; + } + } else { + if (*q == ',') { + if (value) + value[j] = '\0'; + break; + } + } + } + + if (!GREEDY_REALLOC(value, j + 2)) + return -ENOMEM; + + value[j++] = *q; + escaped = false; + } + + if (!value) { + value = strdup(""); + if (!value) + return -ENOMEM; + } + + if (t == BUS_MATCH_MESSAGE_TYPE) { + r = bus_message_type_from_string(value, &u); + if (r < 0) + return r; + + value = mfree(value); + } else + u = 0; + + if (!GREEDY_REALLOC(components, n_components + 1)) + return -ENOMEM; + + components[n_components++] = (struct bus_match_component) { + .type = t, + .value_str = TAKE_PTR(value), + .value_u8 = u, + }; + + if (q[quoted] == 0) + break; + + if (q[quoted] != ',') + return -EINVAL; + + match = q + 1 + quoted; + } + + /* Order the whole thing, so that we always generate the same tree */ + typesafe_qsort(components, n_components, match_component_compare); + + /* Check for duplicates */ + for (size_t i = 0; i+1 < n_components; i++) + if (components[i].type == components[i+1].type) + return -EINVAL; + + *ret_components = TAKE_PTR(components); + *ret_n_components = n_components; + + return 0; +} + +char *bus_match_to_string(struct bus_match_component *components, size_t n_components) { + _cleanup_(memstream_done) MemStream m = {}; + FILE *f; + int r; + + if (n_components <= 0) + return strdup(""); + + assert(components); + + f = memstream_init(&m); + if (!f) + return NULL; + + for (size_t i = 0; i < n_components; i++) { + char buf[32]; + + if (i != 0) + fputc(',', f); + + fputs(bus_match_node_type_to_string(components[i].type, buf, sizeof(buf)), f); + fputc('=', f); + fputc('\'', f); + + if (components[i].type == BUS_MATCH_MESSAGE_TYPE) + fputs(bus_message_type_to_string(components[i].value_u8), f); + else + fputs(components[i].value_str, f); + + fputc('\'', f); + } + + char *buffer; + r = memstream_finalize(&m, &buffer, NULL); + if (r < 0) + return NULL; + + return buffer; +} + +int bus_match_add( + struct bus_match_node *root, + struct bus_match_component *components, + size_t n_components, + struct match_callback *callback) { + + int r; + + assert(root); + assert(callback); + + for (size_t i = 0; i < n_components; i++) { + r = bus_match_add_compare_value(root, + components[i].type, + components[i].value_u8, + components[i].value_str, + &root); + if (r < 0) + return r; + } + + return bus_match_add_leaf(root, callback); +} + +int bus_match_remove( + struct bus_match_node *root, + struct match_callback *callback) { + + struct bus_match_node *node, *pp; + + assert(root); + assert(callback); + + node = callback->match_node; + if (!node) + return 0; + + assert(node->type == BUS_MATCH_LEAF); + + callback->match_node = NULL; + + /* Free the leaf */ + pp = node->parent; + bus_match_node_free(node); + + /* Prune the tree above */ + while (pp) { + node = pp; + pp = node->parent; + + if (!bus_match_node_maybe_free(node)) + break; + } + + return 1; +} + +void bus_match_free(struct bus_match_node *node) { + struct bus_match_node *c; + + if (!node) + return; + + if (BUS_MATCH_CAN_HASH(node->type)) { + + HASHMAP_FOREACH(c, node->compare.children) + bus_match_free(c); + + assert(hashmap_isempty(node->compare.children)); + } + + while ((c = node->child)) + bus_match_free(c); + + if (node->type != BUS_MATCH_ROOT) + bus_match_node_free(node); +} + +const char* bus_match_node_type_to_string(enum bus_match_node_type t, char buf[], size_t l) { + switch (t) { + + case BUS_MATCH_ROOT: + return "root"; + + case BUS_MATCH_VALUE: + return "value"; + + case BUS_MATCH_LEAF: + return "leaf"; + + case BUS_MATCH_MESSAGE_TYPE: + return "type"; + + case BUS_MATCH_SENDER: + return "sender"; + + case BUS_MATCH_DESTINATION: + return "destination"; + + case BUS_MATCH_INTERFACE: + return "interface"; + + case BUS_MATCH_MEMBER: + return "member"; + + case BUS_MATCH_PATH: + return "path"; + + case BUS_MATCH_PATH_NAMESPACE: + return "path_namespace"; + + case BUS_MATCH_ARG ... BUS_MATCH_ARG_LAST: + return snprintf_ok(buf, l, "arg%i", t - BUS_MATCH_ARG); + + case BUS_MATCH_ARG_PATH ... BUS_MATCH_ARG_PATH_LAST: + return snprintf_ok(buf, l, "arg%ipath", t - BUS_MATCH_ARG_PATH); + + case BUS_MATCH_ARG_NAMESPACE ... BUS_MATCH_ARG_NAMESPACE_LAST: + return snprintf_ok(buf, l, "arg%inamespace", t - BUS_MATCH_ARG_NAMESPACE); + + case BUS_MATCH_ARG_HAS ... BUS_MATCH_ARG_HAS_LAST: + return snprintf_ok(buf, l, "arg%ihas", t - BUS_MATCH_ARG_HAS); + + default: + return NULL; + } +} + +void bus_match_dump(FILE *out, struct bus_match_node *node, unsigned level) { + char buf[32]; + + if (!node) + return; + + fprintf(out, "%*s[%s]", 2 * (int) level, "", bus_match_node_type_to_string(node->type, buf, sizeof(buf))); + + if (node->type == BUS_MATCH_VALUE) { + if (node->parent->type == BUS_MATCH_MESSAGE_TYPE) + fprintf(out, " <%u>\n", node->value.u8); + else + fprintf(out, " <%s>\n", node->value.str); + } else if (node->type == BUS_MATCH_ROOT) + fputs(" root\n", out); + else if (node->type == BUS_MATCH_LEAF) + fprintf(out, " %p/%p\n", node->leaf.callback->callback, + container_of(node->leaf.callback, sd_bus_slot, match_callback)->userdata); + else + putc('\n', out); + + if (BUS_MATCH_CAN_HASH(node->type)) { + struct bus_match_node *c; + HASHMAP_FOREACH(c, node->compare.children) + bus_match_dump(out, c, level + 1); + } + + for (struct bus_match_node *c = node->child; c; c = c->next) + bus_match_dump(out, c, level + 1); +} + +enum bus_match_scope bus_match_get_scope(const struct bus_match_component *components, size_t n_components) { + bool found_driver = false; + + if (n_components <= 0) + return BUS_MATCH_GENERIC; + + assert(components); + + /* Checks whether the specified match can only match the + * pseudo-service for local messages, which we detect by + * sender, interface or path. If a match is not restricted to + * local messages, then we check if it only matches on the + * driver. */ + + for (size_t i = 0; i < n_components; i++) { + const struct bus_match_component *c = components + i; + + if (c->type == BUS_MATCH_SENDER) { + if (streq_ptr(c->value_str, "org.freedesktop.DBus.Local")) + return BUS_MATCH_LOCAL; + + if (streq_ptr(c->value_str, "org.freedesktop.DBus")) + found_driver = true; + } + + if (c->type == BUS_MATCH_INTERFACE && streq_ptr(c->value_str, "org.freedesktop.DBus.Local")) + return BUS_MATCH_LOCAL; + + if (c->type == BUS_MATCH_PATH && streq_ptr(c->value_str, "/org/freedesktop/DBus/Local")) + return BUS_MATCH_LOCAL; + } + + return found_driver ? BUS_MATCH_DRIVER : BUS_MATCH_GENERIC; +} diff --git a/src/libsystemd/sd-bus/bus-match.h b/src/libsystemd/sd-bus/bus-match.h new file mode 100644 index 0000000..ccb6aae --- /dev/null +++ b/src/libsystemd/sd-bus/bus-match.h @@ -0,0 +1,82 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include <stdio.h> + +#include "sd-bus.h" + +#include "hashmap.h" + +enum bus_match_node_type { + BUS_MATCH_ROOT, + BUS_MATCH_VALUE, + BUS_MATCH_LEAF, + + /* The following are all different kinds of compare nodes */ + BUS_MATCH_SENDER, + BUS_MATCH_MESSAGE_TYPE, + BUS_MATCH_DESTINATION, + BUS_MATCH_INTERFACE, + BUS_MATCH_MEMBER, + BUS_MATCH_PATH, + BUS_MATCH_PATH_NAMESPACE, + BUS_MATCH_ARG, + BUS_MATCH_ARG_LAST = BUS_MATCH_ARG + 63, + BUS_MATCH_ARG_PATH, + BUS_MATCH_ARG_PATH_LAST = BUS_MATCH_ARG_PATH + 63, + BUS_MATCH_ARG_NAMESPACE, + BUS_MATCH_ARG_NAMESPACE_LAST = BUS_MATCH_ARG_NAMESPACE + 63, + BUS_MATCH_ARG_HAS, + BUS_MATCH_ARG_HAS_LAST = BUS_MATCH_ARG_HAS + 63, + _BUS_MATCH_NODE_TYPE_MAX, + _BUS_MATCH_NODE_TYPE_INVALID = -EINVAL, +}; + +struct bus_match_node { + enum bus_match_node_type type; + struct bus_match_node *parent, *next, *prev, *child; + + union { + struct { + char *str; + uint8_t u8; + } value; + struct { + struct match_callback *callback; + } leaf; + struct { + /* If this is set, then the child is NULL */ + Hashmap *children; + } compare; + }; +}; + +struct bus_match_component { + enum bus_match_node_type type; + uint8_t value_u8; + char *value_str; +}; + +enum bus_match_scope { + BUS_MATCH_GENERIC, + BUS_MATCH_LOCAL, + BUS_MATCH_DRIVER, +}; + +int bus_match_run(sd_bus *bus, struct bus_match_node *root, sd_bus_message *m); + +int bus_match_add(struct bus_match_node *root, struct bus_match_component *components, size_t n_components, struct match_callback *callback); +int bus_match_remove(struct bus_match_node *root, struct match_callback *callback); + +void bus_match_free(struct bus_match_node *node); + +void bus_match_dump(FILE *out, struct bus_match_node *node, unsigned level); + +const char* bus_match_node_type_to_string(enum bus_match_node_type t, char buf[], size_t l); +enum bus_match_node_type bus_match_node_type_from_string(const char *k, size_t n); + +int bus_match_parse(const char *match, struct bus_match_component **ret_components, size_t *ret_n_components); +void bus_match_parse_free(struct bus_match_component *components, size_t n_components); +char *bus_match_to_string(struct bus_match_component *components, size_t n_components); + +enum bus_match_scope bus_match_get_scope(const struct bus_match_component *components, size_t n_components); diff --git a/src/libsystemd/sd-bus/bus-message.c b/src/libsystemd/sd-bus/bus-message.c new file mode 100644 index 0000000..ab8b068 --- /dev/null +++ b/src/libsystemd/sd-bus/bus-message.c @@ -0,0 +1,4712 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <errno.h> +#include <fcntl.h> +#include <sys/mman.h> + +#include "sd-bus.h" + +#include "alloc-util.h" +#include "bus-internal.h" +#include "bus-message.h" +#include "bus-signature.h" +#include "bus-type.h" +#include "fd-util.h" +#include "iovec-util.h" +#include "memfd-util.h" +#include "memory-util.h" +#include "string-util.h" +#include "strv.h" +#include "time-util.h" +#include "utf8.h" + +static int message_append_basic(sd_bus_message *m, char type, const void *p, const void **stored); +static int message_parse_fields(sd_bus_message *m); + +static void *adjust_pointer(const void *p, void *old_base, size_t sz, void *new_base) { + + if (!p) + return NULL; + + if (old_base == new_base) + return (void*) p; + + if ((uint8_t*) p < (uint8_t*) old_base) + return (void*) p; + + if ((uint8_t*) p >= (uint8_t*) old_base + sz) + return (void*) p; + + return (uint8_t*) new_base + ((uint8_t*) p - (uint8_t*) old_base); +} + +static void message_free_part(sd_bus_message *m, struct bus_body_part *part) { + assert(m); + assert(part); + + if (part->memfd >= 0) { + /* erase if requested, but only if the memfd is not sealed yet, i.e. is writable */ + if (m->sensitive && !m->sealed) + explicit_bzero_safe(part->data, part->size); + + close_and_munmap(part->memfd, part->mmap_begin, part->mapped); + } else if (part->munmap_this) + /* We don't erase sensitive data here, since the data is memory mapped from someone else, and + * we just don't know if it's OK to write to it */ + munmap(part->mmap_begin, part->mapped); + else { + /* Erase this if that is requested. Since this is regular memory we know we can write it. */ + if (m->sensitive) + explicit_bzero_safe(part->data, part->size); + + if (part->free_this) + free(part->data); + } + + if (part != &m->body) + free(part); +} + +static void message_reset_parts(sd_bus_message *m) { + struct bus_body_part *part; + + assert(m); + + part = &m->body; + while (m->n_body_parts > 0) { + struct bus_body_part *next = part->next; + message_free_part(m, part); + part = next; + m->n_body_parts--; + } + + m->body_end = NULL; + + m->cached_rindex_part = NULL; + m->cached_rindex_part_begin = 0; +} + +static struct bus_container *message_get_last_container(sd_bus_message *m) { + assert(m); + + if (m->n_containers == 0) + return &m->root_container; + + assert(m->containers); + return m->containers + m->n_containers - 1; +} + +static void message_free_last_container(sd_bus_message *m) { + struct bus_container *c; + + c = message_get_last_container(m); + + free(c->signature); + free(c->peeked_signature); + + /* Move to previous container, but not if we are on root container */ + if (m->n_containers > 0) + m->n_containers--; +} + +static void message_reset_containers(sd_bus_message *m) { + assert(m); + + while (m->n_containers > 0) + message_free_last_container(m); + + m->containers = mfree(m->containers); + m->root_container.index = 0; +} + +static sd_bus_message* message_free(sd_bus_message *m) { + assert(m); + + message_reset_parts(m); + + if (m->free_header) + free(m->header); + + /* Note that we don't unref m->bus here. That's already done by sd_bus_message_unref() as each user + * reference to the bus message also is considered a reference to the bus connection itself. */ + + if (m->free_fds) { + close_many(m->fds, m->n_fds); + free(m->fds); + } + + if (m->iovec != m->iovec_fixed) + free(m->iovec); + + message_reset_containers(m); + assert(m->n_containers == 0); + message_free_last_container(m); + + bus_creds_done(&m->creds); + return mfree(m); +} + +static void *message_extend_fields(sd_bus_message *m, size_t sz, bool add_offset) { + void *op, *np; + size_t old_size, new_size, start; + + assert(m); + + if (m->poisoned) + return NULL; + + old_size = sizeof(struct bus_header) + m->fields_size; + start = ALIGN8(old_size); + new_size = start + sz; + + if (new_size < start || new_size > UINT32_MAX) + goto poison; + + if (old_size == new_size) + return (uint8_t*) m->header + old_size; + + if (m->free_header) { + np = realloc(m->header, ALIGN8(new_size)); + if (!np) + goto poison; + } else { + /* Initially, the header is allocated as part of + * the sd_bus_message itself, let's replace it by + * dynamic data */ + + np = malloc(ALIGN8(new_size)); + if (!np) + goto poison; + + memcpy(np, m->header, sizeof(struct bus_header)); + } + + /* Zero out padding */ + if (start > old_size) + memzero((uint8_t*) np + old_size, start - old_size); + + op = m->header; + m->header = np; + m->fields_size = new_size - sizeof(struct bus_header); + + /* Adjust quick access pointers */ + m->path = adjust_pointer(m->path, op, old_size, m->header); + m->interface = adjust_pointer(m->interface, op, old_size, m->header); + m->member = adjust_pointer(m->member, op, old_size, m->header); + m->destination = adjust_pointer(m->destination, op, old_size, m->header); + m->sender = adjust_pointer(m->sender, op, old_size, m->header); + m->error.name = adjust_pointer(m->error.name, op, old_size, m->header); + + m->free_header = true; + + if (add_offset) { + if (m->n_header_offsets >= ELEMENTSOF(m->header_offsets)) + goto poison; + + m->header_offsets[m->n_header_offsets++] = new_size - sizeof(struct bus_header); + } + + return (uint8_t*) np + start; + +poison: + m->poisoned = true; + return NULL; +} + +static int message_append_field_string( + sd_bus_message *m, + uint64_t h, + char type, + const char *s, + const char **ret) { + + size_t l; + uint8_t *p; + + assert(m); + + /* dbus only allows 8-bit header field ids */ + if (h > 0xFF) + return -EINVAL; + + /* dbus doesn't allow strings over 32-bit */ + l = strlen(s); + if (l > UINT32_MAX) + return -EINVAL; + + /* Signature "(yv)" where the variant contains "s" */ + + /* (field id byte + (signature length + signature 's' + NUL) + (string length + string + NUL)) */ + p = message_extend_fields(m, 4 + 4 + l + 1, false); + if (!p) + return -ENOMEM; + + p[0] = (uint8_t) h; + p[1] = 1; + p[2] = type; + p[3] = 0; + + ((uint32_t*) p)[1] = l; + memcpy(p + 8, s, l + 1); + + if (ret) + *ret = (char*) p + 8; + + return 0; +} + +static int message_append_field_signature( + sd_bus_message *m, + uint64_t h, + const char *s, + const char **ret) { + + size_t l; + uint8_t *p; + + assert(m); + + /* dbus only allows 8-bit header field ids */ + if (h > 0xFF) + return -EINVAL; + + /* dbus doesn't allow signatures over 8-bit */ + l = strlen(s); + if (l > SD_BUS_MAXIMUM_SIGNATURE_LENGTH) + return -EINVAL; + + /* Signature "(yv)" where the variant contains "g" */ + + /* (field id byte + (signature length + signature 'g' + NUL) + (string length + string + NUL)) */ + p = message_extend_fields(m, 4 + 1 + l + 1, false); + if (!p) + return -ENOMEM; + + p[0] = (uint8_t) h; + p[1] = 1; + p[2] = SD_BUS_TYPE_SIGNATURE; + p[3] = 0; + p[4] = l; + memcpy(p + 5, s, l + 1); + + if (ret) + *ret = (const char*) p + 5; + + return 0; +} + +static int message_append_field_uint32(sd_bus_message *m, uint64_t h, uint32_t x) { + uint8_t *p; + + assert(m); + + /* dbus only allows 8-bit header field ids */ + if (h > 0xFF) + return -EINVAL; + + /* (field id byte + (signature length + signature 'u' + NUL) + value) */ + p = message_extend_fields(m, 4 + 4, false); + if (!p) + return -ENOMEM; + + p[0] = (uint8_t) h; + p[1] = 1; + p[2] = 'u'; + p[3] = 0; + + ((uint32_t*) p)[1] = x; + + return 0; +} + +static int message_append_reply_cookie(sd_bus_message *m, uint64_t cookie) { + assert(m); + + /* 64-bit cookies are not supported */ + if (cookie > UINT32_MAX) + return -EOPNOTSUPP; + + return message_append_field_uint32(m, BUS_MESSAGE_HEADER_REPLY_SERIAL, (uint32_t) cookie); +} + +static int message_from_header( + sd_bus *bus, + void *buffer, + size_t message_size, + int *fds, + size_t n_fds, + const char *label, + sd_bus_message **ret) { + + _cleanup_free_ sd_bus_message *m = NULL; + struct bus_header *h; + size_t a, label_sz = 0; /* avoid false maybe-uninitialized warning */ + + assert(bus); + assert(buffer || message_size <= 0); + assert(fds || n_fds <= 0); + assert(ret); + + if (message_size < sizeof(struct bus_header)) + return -EBADMSG; + + h = buffer; + if (!IN_SET(h->version, 1, 2)) + return -EBADMSG; + + if (h->type == _SD_BUS_MESSAGE_TYPE_INVALID) + return -EBADMSG; + + if (!IN_SET(h->endian, BUS_LITTLE_ENDIAN, BUS_BIG_ENDIAN)) + return -EBADMSG; + + /* Note that we are happy with unknown flags in the flags header! */ + + a = ALIGN(sizeof(sd_bus_message)); + + if (label) { + label_sz = strlen(label); + a += label_sz + 1; + } + + m = malloc0(a); + if (!m) + return -ENOMEM; + + m->sealed = true; + m->header = buffer; + + if (h->serial == 0) + return -EBADMSG; + + m->fields_size = BUS_MESSAGE_BSWAP32(m, h->fields_size); + m->body_size = BUS_MESSAGE_BSWAP32(m, h->body_size); + + assert(message_size >= sizeof(struct bus_header)); + if (ALIGN8(m->fields_size) > message_size - sizeof(struct bus_header) || + m->body_size != message_size - sizeof(struct bus_header) - ALIGN8(m->fields_size)) + return -EBADMSG; + + m->fds = fds; + m->n_fds = n_fds; + + if (label) { + m->creds.label = (char*) m + ALIGN(sizeof(sd_bus_message)); + memcpy(m->creds.label, label, label_sz + 1); + + m->creds.mask |= SD_BUS_CREDS_SELINUX_CONTEXT; + } + + m->n_ref = 1; + m->bus = sd_bus_ref(bus); + + *ret = TAKE_PTR(m); + + return 0; +} + +int bus_message_from_malloc( + sd_bus *bus, + void *buffer, + size_t length, + int *fds, + size_t n_fds, + const char *label, + sd_bus_message **ret) { + + _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL; + size_t sz; + int r; + + r = message_from_header( + bus, + buffer, length, + fds, n_fds, + label, + &m); + if (r < 0) + return r; + + sz = length - sizeof(struct bus_header) - ALIGN8(m->fields_size); + if (sz > 0) { + m->n_body_parts = 1; + m->body.data = (uint8_t*) buffer + sizeof(struct bus_header) + ALIGN8(m->fields_size); + m->body.size = sz; + m->body.sealed = true; + m->body.memfd = -EBADF; + } + + m->n_iovec = 1; + m->iovec = m->iovec_fixed; + m->iovec[0] = IOVEC_MAKE(buffer, length); + + r = message_parse_fields(m); + if (r < 0) + return r; + + /* We take possession of the memory and fds now */ + m->free_header = true; + m->free_fds = true; + + *ret = TAKE_PTR(m); + return 0; +} + +_public_ int sd_bus_message_new( + sd_bus *bus, + sd_bus_message **m, + uint8_t type) { + + assert_return(bus, -ENOTCONN); + assert_return(bus = bus_resolve(bus), -ENOPKG); + assert_return(bus->state != BUS_UNSET, -ENOTCONN); + assert_return(m, -EINVAL); + /* Creation of messages with _SD_BUS_MESSAGE_TYPE_INVALID is allowed. */ + assert_return(type < _SD_BUS_MESSAGE_TYPE_MAX, -EINVAL); + + sd_bus_message *t = malloc0(ALIGN(sizeof(sd_bus_message)) + sizeof(struct bus_header)); + if (!t) + return -ENOMEM; + + t->n_ref = 1; + t->bus = sd_bus_ref(bus); + t->header = (struct bus_header*) ((uint8_t*) t + ALIGN(sizeof(struct sd_bus_message))); + t->header->endian = BUS_NATIVE_ENDIAN; + t->header->type = type; + t->header->version = bus->message_version; + t->allow_fds = bus->can_fds || !IN_SET(bus->state, BUS_HELLO, BUS_RUNNING); + + if (bus->allow_interactive_authorization) + t->header->flags |= BUS_MESSAGE_ALLOW_INTERACTIVE_AUTHORIZATION; + + *m = t; + return 0; +} + +_public_ int sd_bus_message_new_signal_to( + sd_bus *bus, + sd_bus_message **m, + const char *destination, + const char *path, + const char *interface, + const char *member) { + + _cleanup_(sd_bus_message_unrefp) sd_bus_message *t = NULL; + int r; + + assert_return(bus, -ENOTCONN); + assert_return(bus = bus_resolve(bus), -ENOPKG); + assert_return(bus->state != BUS_UNSET, -ENOTCONN); + assert_return(!destination || service_name_is_valid(destination), -EINVAL); + assert_return(object_path_is_valid(path), -EINVAL); + assert_return(interface_name_is_valid(interface), -EINVAL); + assert_return(member_name_is_valid(member), -EINVAL); + assert_return(m, -EINVAL); + + r = sd_bus_message_new(bus, &t, SD_BUS_MESSAGE_SIGNAL); + if (r < 0) + return -ENOMEM; + + assert(t); + + t->header->flags |= BUS_MESSAGE_NO_REPLY_EXPECTED; + + r = message_append_field_string(t, BUS_MESSAGE_HEADER_PATH, SD_BUS_TYPE_OBJECT_PATH, path, &t->path); + if (r < 0) + return r; + r = message_append_field_string(t, BUS_MESSAGE_HEADER_INTERFACE, SD_BUS_TYPE_STRING, interface, &t->interface); + if (r < 0) + return r; + r = message_append_field_string(t, BUS_MESSAGE_HEADER_MEMBER, SD_BUS_TYPE_STRING, member, &t->member); + if (r < 0) + return r; + + if (destination) { + r = message_append_field_string(t, BUS_MESSAGE_HEADER_DESTINATION, SD_BUS_TYPE_STRING, destination, &t->destination); + if (r < 0) + return r; + } + + *m = TAKE_PTR(t); + return 0; +} + +_public_ int sd_bus_message_new_signal( + sd_bus *bus, + sd_bus_message **m, + const char *path, + const char *interface, + const char *member) { + + return sd_bus_message_new_signal_to(bus, m, NULL, path, interface, member); +} + +_public_ int sd_bus_message_new_method_call( + sd_bus *bus, + sd_bus_message **m, + const char *destination, + const char *path, + const char *interface, + const char *member) { + + _cleanup_(sd_bus_message_unrefp) sd_bus_message *t = NULL; + int r; + + assert_return(bus, -ENOTCONN); + assert_return(bus = bus_resolve(bus), -ENOPKG); + assert_return(bus->state != BUS_UNSET, -ENOTCONN); + assert_return(!destination || service_name_is_valid(destination), -EINVAL); + assert_return(object_path_is_valid(path), -EINVAL); + assert_return(!interface || interface_name_is_valid(interface), -EINVAL); + assert_return(member_name_is_valid(member), -EINVAL); + assert_return(m, -EINVAL); + + r = sd_bus_message_new(bus, &t, SD_BUS_MESSAGE_METHOD_CALL); + if (r < 0) + return -ENOMEM; + + assert(t); + + r = message_append_field_string(t, BUS_MESSAGE_HEADER_PATH, SD_BUS_TYPE_OBJECT_PATH, path, &t->path); + if (r < 0) + return r; + r = message_append_field_string(t, BUS_MESSAGE_HEADER_MEMBER, SD_BUS_TYPE_STRING, member, &t->member); + if (r < 0) + return r; + + if (interface) { + r = message_append_field_string(t, BUS_MESSAGE_HEADER_INTERFACE, SD_BUS_TYPE_STRING, interface, &t->interface); + if (r < 0) + return r; + } + + if (destination) { + r = message_append_field_string(t, BUS_MESSAGE_HEADER_DESTINATION, SD_BUS_TYPE_STRING, destination, &t->destination); + if (r < 0) + return r; + } + + *m = TAKE_PTR(t); + return 0; +} + +static int message_new_reply( + sd_bus_message *call, + uint8_t type, + sd_bus_message **m) { + + _cleanup_(sd_bus_message_unrefp) sd_bus_message *t = NULL; + uint64_t cookie; + int r; + + assert_return(call, -EINVAL); + assert_return(call->sealed, -EPERM); + assert_return(call->header->type == SD_BUS_MESSAGE_METHOD_CALL, -EINVAL); + assert_return(call->bus->state != BUS_UNSET, -ENOTCONN); + assert_return(m, -EINVAL); + + cookie = BUS_MESSAGE_COOKIE(call); + if (cookie == 0) + return -EOPNOTSUPP; + + r = sd_bus_message_new(call->bus, &t, type); + if (r < 0) + return -ENOMEM; + + assert(t); + + t->header->flags |= BUS_MESSAGE_NO_REPLY_EXPECTED; + t->reply_cookie = cookie; + r = message_append_reply_cookie(t, t->reply_cookie); + if (r < 0) + return r; + + if (call->sender) { + r = message_append_field_string(t, BUS_MESSAGE_HEADER_DESTINATION, SD_BUS_TYPE_STRING, call->sender, &t->destination); + if (r < 0) + return r; + } + + t->dont_send = !!(call->header->flags & BUS_MESSAGE_NO_REPLY_EXPECTED); + t->enforced_reply_signature = call->enforced_reply_signature; + + /* let's copy the sensitive flag over. Let's do that as a safety precaution to keep a transaction + * wholly sensitive if already the incoming message was sensitive. This is particularly useful when a + * vtable record sets the SD_BUS_VTABLE_SENSITIVE flag on a method call, since this means it applies + * to both the message call and the reply. */ + t->sensitive = call->sensitive; + + *m = TAKE_PTR(t); + return 0; +} + +_public_ int sd_bus_message_new_method_return( + sd_bus_message *call, + sd_bus_message **m) { + + return message_new_reply(call, SD_BUS_MESSAGE_METHOD_RETURN, m); +} + +_public_ int sd_bus_message_new_method_error( + sd_bus_message *call, + sd_bus_message **m, + const sd_bus_error *e) { + + _cleanup_(sd_bus_message_unrefp) sd_bus_message *t = NULL; + int r; + + assert_return(sd_bus_error_is_set(e), -EINVAL); + assert_return(m, -EINVAL); + + r = message_new_reply(call, SD_BUS_MESSAGE_METHOD_ERROR, &t); + if (r < 0) + return r; + + r = message_append_field_string(t, BUS_MESSAGE_HEADER_ERROR_NAME, SD_BUS_TYPE_STRING, e->name, &t->error.name); + if (r < 0) + return r; + + if (e->message) { + r = message_append_basic(t, SD_BUS_TYPE_STRING, e->message, (const void**) &t->error.message); + if (r < 0) + return r; + } + + t->error._need_free = -1; + + *m = TAKE_PTR(t); + return 0; +} + +_public_ int sd_bus_message_new_method_errorf( + sd_bus_message *call, + sd_bus_message **m, + const char *name, + const char *format, + ...) { + + _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL; + va_list ap; + + assert_return(name, -EINVAL); + assert_return(m, -EINVAL); + + va_start(ap, format); + sd_bus_error_setfv(&error, name, format, ap); + va_end(ap); + + return sd_bus_message_new_method_error(call, m, &error); +} + +_public_ int sd_bus_message_new_method_errno( + sd_bus_message *call, + sd_bus_message **m, + int error, + const sd_bus_error *p) { + + _cleanup_(sd_bus_error_free) sd_bus_error berror = SD_BUS_ERROR_NULL; + + if (sd_bus_error_is_set(p)) + return sd_bus_message_new_method_error(call, m, p); + + sd_bus_error_set_errno(&berror, error); + + return sd_bus_message_new_method_error(call, m, &berror); +} + +_public_ int sd_bus_message_new_method_errnof( + sd_bus_message *call, + sd_bus_message **m, + int error, + const char *format, + ...) { + + _cleanup_(sd_bus_error_free) sd_bus_error berror = SD_BUS_ERROR_NULL; + va_list ap; + + va_start(ap, format); + sd_bus_error_set_errnofv(&berror, error, format, ap); + va_end(ap); + + return sd_bus_message_new_method_error(call, m, &berror); +} + +void bus_message_set_sender_local(sd_bus *bus, sd_bus_message *m) { + assert(bus); + assert(m); + + m->sender = m->creds.unique_name = (char*) "org.freedesktop.DBus.Local"; + m->creds.well_known_names_local = true; + m->creds.mask |= (SD_BUS_CREDS_UNIQUE_NAME|SD_BUS_CREDS_WELL_KNOWN_NAMES) & bus->creds_mask; +} + +void bus_message_set_sender_driver(sd_bus *bus, sd_bus_message *m) { + assert(bus); + assert(m); + + m->sender = m->creds.unique_name = (char*) "org.freedesktop.DBus"; + m->creds.well_known_names_driver = true; + m->creds.mask |= (SD_BUS_CREDS_UNIQUE_NAME|SD_BUS_CREDS_WELL_KNOWN_NAMES) & bus->creds_mask; +} + +int bus_message_new_synthetic_error( + sd_bus *bus, + uint64_t cookie, + const sd_bus_error *e, + sd_bus_message **m) { + + _cleanup_(sd_bus_message_unrefp) sd_bus_message *t = NULL; + int r; + + assert(bus); + assert(sd_bus_error_is_set(e)); + assert(m); + + r = sd_bus_message_new(bus, &t, SD_BUS_MESSAGE_METHOD_ERROR); + if (r < 0) + return -ENOMEM; + + assert(t); + + t->header->flags |= BUS_MESSAGE_NO_REPLY_EXPECTED; + t->reply_cookie = cookie; + + r = message_append_reply_cookie(t, t->reply_cookie); + if (r < 0) + return r; + + if (bus && bus->unique_name) { + r = message_append_field_string(t, BUS_MESSAGE_HEADER_DESTINATION, SD_BUS_TYPE_STRING, bus->unique_name, &t->destination); + if (r < 0) + return r; + } + + r = message_append_field_string(t, BUS_MESSAGE_HEADER_ERROR_NAME, SD_BUS_TYPE_STRING, e->name, &t->error.name); + if (r < 0) + return r; + + if (e->message) { + r = message_append_basic(t, SD_BUS_TYPE_STRING, e->message, (const void**) &t->error.message); + if (r < 0) + return r; + } + + t->error._need_free = -1; + + bus_message_set_sender_driver(bus, t); + + *m = TAKE_PTR(t); + return 0; +} + +_public_ sd_bus_message* sd_bus_message_ref(sd_bus_message *m) { + if (!m) + return NULL; + + /* We are fine if this message so far was either explicitly reffed or not reffed but queued into at + * least one bus connection object. */ + assert(m->n_ref > 0 || m->n_queued > 0); + + m->n_ref++; + + /* Each user reference to a bus message shall also be considered a ref on the bus */ + sd_bus_ref(m->bus); + return m; +} + +_public_ sd_bus_message* sd_bus_message_unref(sd_bus_message *m) { + if (!m) + return NULL; + + assert(m->n_ref > 0); + + sd_bus_unref(m->bus); /* Each regular ref is also a ref on the bus connection. Let's hence drop it + * here. Note we have to do this before decrementing our own n_ref here, since + * otherwise, if this message is currently queued sd_bus_unref() might call + * bus_message_unref_queued() for this which might then destroy the message + * while we are still processing it. */ + m->n_ref--; + + if (m->n_ref > 0 || m->n_queued > 0) + return NULL; + + /* Unset the bus field if neither the user has a reference nor this message is queued. We are careful + * to reset the field only after the last reference to the bus is dropped, after all we might keep + * multiple references to the bus, once for each reference kept on ourselves. */ + m->bus = NULL; + + return message_free(m); +} + +sd_bus_message* bus_message_ref_queued(sd_bus_message *m, sd_bus *bus) { + if (!m) + return NULL; + + /* If this is a different bus than the message is associated with, then implicitly turn this into a + * regular reference. This means that you can create a memory leak by enqueuing a message generated + * on one bus onto another at the same time as enqueueing a message from the second one on the first, + * as we'll not detect the cyclic references there. */ + if (bus != m->bus) + return sd_bus_message_ref(m); + + assert(m->n_ref > 0 || m->n_queued > 0); + m->n_queued++; + + return m; +} + +sd_bus_message* bus_message_unref_queued(sd_bus_message *m, sd_bus *bus) { + if (!m) + return NULL; + + if (bus != m->bus) + return sd_bus_message_unref(m); + + assert(m->n_queued > 0); + m->n_queued--; + + if (m->n_ref > 0 || m->n_queued > 0) + return NULL; + + m->bus = NULL; + + return message_free(m); +} + +_public_ int sd_bus_message_get_type(sd_bus_message *m, uint8_t *type) { + assert_return(m, -EINVAL); + assert_return(type, -EINVAL); + + *type = m->header->type; + return 0; +} + +_public_ int sd_bus_message_get_cookie(sd_bus_message *m, uint64_t *cookie) { + uint64_t c; + + assert_return(m, -EINVAL); + assert_return(cookie, -EINVAL); + + c = BUS_MESSAGE_COOKIE(m); + if (c == 0) + return -ENODATA; + + *cookie = BUS_MESSAGE_COOKIE(m); + return 0; +} + +_public_ int sd_bus_message_get_reply_cookie(sd_bus_message *m, uint64_t *cookie) { + assert_return(m, -EINVAL); + assert_return(cookie, -EINVAL); + + if (m->reply_cookie == 0) + return -ENODATA; + + *cookie = m->reply_cookie; + return 0; +} + +_public_ int sd_bus_message_get_expect_reply(sd_bus_message *m) { + assert_return(m, -EINVAL); + + return m->header->type == SD_BUS_MESSAGE_METHOD_CALL && + !(m->header->flags & BUS_MESSAGE_NO_REPLY_EXPECTED); +} + +_public_ int sd_bus_message_get_auto_start(sd_bus_message *m) { + assert_return(m, -EINVAL); + + return !(m->header->flags & BUS_MESSAGE_NO_AUTO_START); +} + +_public_ int sd_bus_message_get_allow_interactive_authorization(sd_bus_message *m) { + assert_return(m, -EINVAL); + + return m->header->type == SD_BUS_MESSAGE_METHOD_CALL && + (m->header->flags & BUS_MESSAGE_ALLOW_INTERACTIVE_AUTHORIZATION); +} + +_public_ const char *sd_bus_message_get_path(sd_bus_message *m) { + assert_return(m, NULL); + + return m->path; +} + +_public_ const char *sd_bus_message_get_interface(sd_bus_message *m) { + assert_return(m, NULL); + + return m->interface; +} + +_public_ const char *sd_bus_message_get_member(sd_bus_message *m) { + assert_return(m, NULL); + + return m->member; +} + +_public_ const char *sd_bus_message_get_destination(sd_bus_message *m) { + assert_return(m, NULL); + + return m->destination; +} + +_public_ const char *sd_bus_message_get_sender(sd_bus_message *m) { + assert_return(m, NULL); + + return m->sender; +} + +_public_ const sd_bus_error *sd_bus_message_get_error(sd_bus_message *m) { + assert_return(m, NULL); + + if (!sd_bus_error_is_set(&m->error)) + return NULL; + + return &m->error; +} + +_public_ int sd_bus_message_get_monotonic_usec(sd_bus_message *m, uint64_t *usec) { + assert_return(m, -EINVAL); + assert_return(usec, -EINVAL); + + if (m->monotonic <= 0) + return -ENODATA; + + *usec = m->monotonic; + return 0; +} + +_public_ int sd_bus_message_get_realtime_usec(sd_bus_message *m, uint64_t *usec) { + assert_return(m, -EINVAL); + assert_return(usec, -EINVAL); + + if (m->realtime <= 0) + return -ENODATA; + + *usec = m->realtime; + return 0; +} + +_public_ int sd_bus_message_get_seqnum(sd_bus_message *m, uint64_t *seqnum) { + assert_return(m, -EINVAL); + assert_return(seqnum, -EINVAL); + + if (m->seqnum <= 0) + return -ENODATA; + + *seqnum = m->seqnum; + return 0; +} + +_public_ sd_bus_creds *sd_bus_message_get_creds(sd_bus_message *m) { + assert_return(m, NULL); + + if (m->creds.mask == 0) + return NULL; + + return &m->creds; +} + +_public_ int sd_bus_message_is_signal( + sd_bus_message *m, + const char *interface, + const char *member) { + + assert_return(m, -EINVAL); + + if (m->header->type != SD_BUS_MESSAGE_SIGNAL) + return 0; + + if (interface && !streq_ptr(m->interface, interface)) + return 0; + + if (member && !streq_ptr(m->member, member)) + return 0; + + return 1; +} + +_public_ int sd_bus_message_is_method_call( + sd_bus_message *m, + const char *interface, + const char *member) { + + assert_return(m, -EINVAL); + + if (m->header->type != SD_BUS_MESSAGE_METHOD_CALL) + return 0; + + if (interface && !streq_ptr(m->interface, interface)) + return 0; + + if (member && !streq_ptr(m->member, member)) + return 0; + + return 1; +} + +_public_ int sd_bus_message_is_method_error(sd_bus_message *m, const char *name) { + assert_return(m, -EINVAL); + + if (m->header->type != SD_BUS_MESSAGE_METHOD_ERROR) + return 0; + + if (name && !streq_ptr(m->error.name, name)) + return 0; + + return 1; +} + +_public_ int sd_bus_message_set_expect_reply(sd_bus_message *m, int b) { + assert_return(m, -EINVAL); + assert_return(!m->sealed, -EPERM); + assert_return(m->header->type == SD_BUS_MESSAGE_METHOD_CALL, -EPERM); + + SET_FLAG(m->header->flags, BUS_MESSAGE_NO_REPLY_EXPECTED, !b); + + return 0; +} + +_public_ int sd_bus_message_set_auto_start(sd_bus_message *m, int b) { + assert_return(m, -EINVAL); + assert_return(!m->sealed, -EPERM); + + SET_FLAG(m->header->flags, BUS_MESSAGE_NO_AUTO_START, !b); + + return 0; +} + +_public_ int sd_bus_message_set_allow_interactive_authorization(sd_bus_message *m, int b) { + assert_return(m, -EINVAL); + assert_return(!m->sealed, -EPERM); + + SET_FLAG(m->header->flags, BUS_MESSAGE_ALLOW_INTERACTIVE_AUTHORIZATION, b); + + return 0; +} + +static struct bus_body_part *message_append_part(sd_bus_message *m) { + struct bus_body_part *part; + + assert(m); + + if (m->poisoned) + return NULL; + + if (m->n_body_parts <= 0) { + part = &m->body; + zero(*part); + } else { + assert(m->body_end); + + part = new0(struct bus_body_part, 1); + if (!part) { + m->poisoned = true; + return NULL; + } + + m->body_end->next = part; + } + + part->memfd = -EBADF; + m->body_end = part; + m->n_body_parts++; + + return part; +} + +static void part_zero(struct bus_body_part *part, size_t sz) { + assert(part); + assert(sz > 0); + assert(sz < 8); + + /* All other fields can be left in their defaults */ + assert(!part->data); + assert(part->memfd < 0); + + part->size = sz; + part->is_zero = true; + part->sealed = true; +} + +static int part_make_space( + struct sd_bus_message *m, + struct bus_body_part *part, + size_t sz, + void **q) { + + void *n; + + assert(m); + assert(part); + assert(!part->sealed); + + if (m->poisoned) + return -ENOMEM; + + if (part->allocated == 0 || sz > part->allocated) { + size_t new_allocated; + + new_allocated = sz > 0 ? 2 * sz : 64; + n = realloc(part->data, new_allocated); + if (!n) { + m->poisoned = true; + return -ENOMEM; + } + + part->data = n; + part->allocated = new_allocated; + part->free_this = true; + } + + if (q) + *q = part->data ? (uint8_t*) part->data + part->size : NULL; + + part->size = sz; + return 0; +} + +static void message_extend_containers(sd_bus_message *m, size_t expand) { + assert(m); + + if (expand <= 0) + return; + + if (m->n_containers <= 0) + return; + + /* Update counters */ + for (struct bus_container *c = m->containers; c < m->containers + m->n_containers; c++) + if (c->array_size) + *c->array_size += expand; +} + +static void *message_extend_body( + sd_bus_message *m, + size_t align, + size_t sz) { + + size_t start_body, end_body, padding, added; + void *p; + int r; + + assert(m); + assert(align > 0); + assert(!m->sealed); + + if (m->poisoned) + return NULL; + + start_body = ALIGN_TO(m->body_size, align); + end_body = start_body + sz; + + padding = start_body - m->body_size; + added = padding + sz; + + /* Check for 32-bit overflows */ + if (end_body < start_body || end_body > UINT32_MAX) { + m->poisoned = true; + return NULL; + } + + if (added > 0) { + struct bus_body_part *part = NULL; + bool add_new_part; + + add_new_part = + m->n_body_parts <= 0 || + m->body_end->sealed || + (padding != ALIGN_TO(m->body_end->size, align) - m->body_end->size); + /* If this must be an inlined extension, let's create a new part if + * the previous part is large enough to be inlined. */ + + if (add_new_part) { + if (padding > 0) { + part = message_append_part(m); + if (!part) + return NULL; + + part_zero(part, padding); + } + + part = message_append_part(m); + if (!part) + return NULL; + + r = part_make_space(m, part, sz, &p); + if (r < 0) + return NULL; + } else { + void *op; + size_t os, start_part, end_part; + + part = m->body_end; + op = part->data; + os = part->size; + + start_part = ALIGN_TO(part->size, align); + end_part = start_part + sz; + + r = part_make_space(m, part, end_part, &p); + if (r < 0) + return NULL; + + if (padding > 0) { + memzero(p, padding); + p = (uint8_t*) p + padding; + } + + /* Readjust pointers */ + if (m->n_containers > 0) + for (struct bus_container *c = m->containers; c < m->containers + m->n_containers; c++) + c->array_size = adjust_pointer(c->array_size, op, os, part->data); + + m->error.message = (const char*) adjust_pointer(m->error.message, op, os, part->data); + } + } else + /* Return something that is not NULL and is aligned */ + p = (uint8_t*) align; + + m->body_size = end_body; + message_extend_containers(m, added); + + return p; +} + +static int message_push_fd(sd_bus_message *m, int fd) { + int *f, copy; + + assert(m); + + if (fd < 0) + return -EINVAL; + + if (!m->allow_fds) + return -EOPNOTSUPP; + + copy = fcntl(fd, F_DUPFD_CLOEXEC, 3); + if (copy < 0) + return -errno; + + f = reallocarray(m->fds, m->n_fds + 1, sizeof(int)); + if (!f) { + m->poisoned = true; + safe_close(copy); + return -ENOMEM; + } + + m->fds = f; + m->fds[m->n_fds] = copy; + m->free_fds = true; + + return copy; +} + +int message_append_basic(sd_bus_message *m, char type, const void *p, const void **stored) { + _cleanup_close_ int fd = -EBADF; + struct bus_container *c; + ssize_t align, sz; + uint32_t u32; + void *a; + + assert_return(m, -EINVAL); + assert_return(!m->sealed, -EPERM); + assert_return(bus_type_is_basic(type), -EINVAL); + assert_return(!m->poisoned, -ESTALE); + + c = message_get_last_container(m); + + if (c->signature && c->signature[c->index]) { + /* Container signature is already set */ + + if (c->signature[c->index] != type) + return -ENXIO; + } else { + char *e; + + /* Maybe we can append to the signature? But only if this is the top-level container */ + if (c->enclosing != 0) + return -ENXIO; + + e = strextend(&c->signature, CHAR_TO_STR(type)); + if (!e) { + m->poisoned = true; + return -ENOMEM; + } + } + + switch (type) { + + case SD_BUS_TYPE_STRING: + /* To make things easy we'll serialize a NULL string + * into the empty string */ + p = strempty(p); + + if (!utf8_is_valid(p)) + return -EINVAL; + + align = 4; + sz = 4 + strlen(p) + 1; + break; + + case SD_BUS_TYPE_OBJECT_PATH: + + if (!p) + return -EINVAL; + + if (!object_path_is_valid(p)) + return -EINVAL; + + align = 4; + sz = 4 + strlen(p) + 1; + break; + + case SD_BUS_TYPE_SIGNATURE: + + p = strempty(p); + + if (!signature_is_valid(p, /* allow_dict_entry = */ true)) + return -EINVAL; + + align = 1; + sz = 1 + strlen(p) + 1; + break; + + case SD_BUS_TYPE_BOOLEAN: + + u32 = p && *(int*) p; + p = &u32; + + align = sz = 4; + break; + + case SD_BUS_TYPE_UNIX_FD: + + if (!p) + return -EINVAL; + + fd = message_push_fd(m, *(int*) p); + if (fd < 0) + return fd; + + u32 = m->n_fds; + p = &u32; + + align = sz = 4; + break; + + default: + align = bus_type_get_alignment(type); + sz = bus_type_get_size(type); + break; + } + + assert(align > 0); + assert(sz > 0); + + a = message_extend_body(m, align, sz); + if (!a) + return -ENOMEM; + + if (IN_SET(type, SD_BUS_TYPE_STRING, SD_BUS_TYPE_OBJECT_PATH)) { + *(uint32_t*) a = sz - 5; + memcpy((uint8_t*) a + 4, p, sz - 4); + + if (stored) + *stored = (const uint8_t*) a + 4; + + } else if (type == SD_BUS_TYPE_SIGNATURE) { + *(uint8_t*) a = sz - 2; + memcpy((uint8_t*) a + 1, p, sz - 1); + + if (stored) + *stored = (const uint8_t*) a + 1; + } else { + memcpy(a, p, sz); + + if (stored) + *stored = a; + } + + if (type == SD_BUS_TYPE_UNIX_FD) + m->n_fds++; + + if (c->enclosing != SD_BUS_TYPE_ARRAY) + c->index++; + + fd = -EBADF; + return 0; +} + +_public_ int sd_bus_message_append_basic(sd_bus_message *m, char type, const void *p) { + return message_append_basic(m, type, p, NULL); +} + +_public_ int sd_bus_message_append_string_space( + sd_bus_message *m, + size_t size, + char **s) { + + struct bus_container *c; + void *a; + + assert_return(m, -EINVAL); + assert_return(s, -EINVAL); + assert_return(!m->sealed, -EPERM); + assert_return(!m->poisoned, -ESTALE); + + c = message_get_last_container(m); + + if (c->signature && c->signature[c->index]) { + /* Container signature is already set */ + + if (c->signature[c->index] != SD_BUS_TYPE_STRING) + return -ENXIO; + } else { + char *e; + + /* Maybe we can append to the signature? But only if this is the top-level container */ + if (c->enclosing != 0) + return -ENXIO; + + e = strextend(&c->signature, CHAR_TO_STR(SD_BUS_TYPE_STRING)); + if (!e) { + m->poisoned = true; + return -ENOMEM; + } + } + + a = message_extend_body(m, 4, 4 + size + 1); + if (!a) + return -ENOMEM; + + *(uint32_t*) a = size; + *s = (char*) a + 4; + + (*s)[size] = 0; + + if (c->enclosing != SD_BUS_TYPE_ARRAY) + c->index++; + + return 0; +} + +_public_ int sd_bus_message_append_string_iovec( + sd_bus_message *m, + const struct iovec *iov, + unsigned n /* should be size_t, but is API now… 😞 */) { + + size_t size; + unsigned i; + char *p; + int r; + + assert_return(m, -EINVAL); + assert_return(!m->sealed, -EPERM); + assert_return(iov || n == 0, -EINVAL); + assert_return(!m->poisoned, -ESTALE); + + size = iovec_total_size(iov, n); + + r = sd_bus_message_append_string_space(m, size, &p); + if (r < 0) + return r; + + for (i = 0; i < n; i++) { + + if (iov[i].iov_base) + memcpy(p, iov[i].iov_base, iov[i].iov_len); + else + memset(p, ' ', iov[i].iov_len); + + p += iov[i].iov_len; + } + + return 0; +} + +static int bus_message_open_array( + sd_bus_message *m, + struct bus_container *c, + const char *contents, + uint32_t **array_size, + size_t *begin) { + + unsigned nindex; + int alignment; + void *a, *op; + size_t os; + struct bus_body_part *o; + + assert(m); + assert(c); + assert(contents); + assert(array_size); + assert(begin); + + if (!signature_is_single(contents, true)) + return -EINVAL; + + if (c->signature && c->signature[c->index]) { + + /* Verify the existing signature */ + + if (c->signature[c->index] != SD_BUS_TYPE_ARRAY) + return -ENXIO; + + if (!startswith(c->signature + c->index + 1, contents)) + return -ENXIO; + + nindex = c->index + 1 + strlen(contents); + } else { + char *e; + + if (c->enclosing != 0) + return -ENXIO; + + /* Extend the existing signature */ + + e = strextend(&c->signature, CHAR_TO_STR(SD_BUS_TYPE_ARRAY), contents); + if (!e) { + m->poisoned = true; + return -ENOMEM; + } + + nindex = e - c->signature; + } + + alignment = bus_type_get_alignment(contents[0]); + if (alignment < 0) + return alignment; + + a = message_extend_body(m, 4, 4); + if (!a) + return -ENOMEM; + + o = m->body_end; + op = m->body_end->data; + os = m->body_end->size; + + /* Add alignment between size and first element */ + if (!message_extend_body(m, alignment, 0)) + return -ENOMEM; + + /* location of array size might have changed so let's readjust a */ + if (o == m->body_end) + a = adjust_pointer(a, op, os, m->body_end->data); + + *(uint32_t*) a = 0; + *array_size = a; + + if (c->enclosing != SD_BUS_TYPE_ARRAY) + c->index = nindex; + + return 0; +} + +static int bus_message_open_variant( + sd_bus_message *m, + struct bus_container *c, + const char *contents) { + + size_t l; + void *a; + + assert(m); + assert(c); + assert(contents); + + if (!signature_is_single(contents, false)) + return -EINVAL; + + if (*contents == SD_BUS_TYPE_DICT_ENTRY_BEGIN) + return -EINVAL; + + if (c->signature && c->signature[c->index]) { + + if (c->signature[c->index] != SD_BUS_TYPE_VARIANT) + return -ENXIO; + + } else { + char *e; + + if (c->enclosing != 0) + return -ENXIO; + + e = strextend(&c->signature, CHAR_TO_STR(SD_BUS_TYPE_VARIANT)); + if (!e) { + m->poisoned = true; + return -ENOMEM; + } + } + + l = strlen(contents); + a = message_extend_body(m, 1, 1 + l + 1); + if (!a) + return -ENOMEM; + + *(uint8_t*) a = l; + memcpy((uint8_t*) a + 1, contents, l + 1); + + if (c->enclosing != SD_BUS_TYPE_ARRAY) + c->index++; + + return 0; +} + +static int bus_message_open_struct( + sd_bus_message *m, + struct bus_container *c, + const char *contents, + size_t *begin) { + + size_t nindex; + + assert(m); + assert(c); + assert(contents); + assert(begin); + + if (!signature_is_valid(contents, false)) + return -EINVAL; + + if (c->signature && c->signature[c->index]) { + size_t l; + + l = strlen(contents); + + if (c->signature[c->index] != SD_BUS_TYPE_STRUCT_BEGIN || + !startswith(c->signature + c->index + 1, contents) || + c->signature[c->index + 1 + l] != SD_BUS_TYPE_STRUCT_END) + return -ENXIO; + + nindex = c->index + 1 + l + 1; + } else { + char *e; + + if (c->enclosing != 0) + return -ENXIO; + + e = strextend(&c->signature, CHAR_TO_STR(SD_BUS_TYPE_STRUCT_BEGIN), contents, CHAR_TO_STR(SD_BUS_TYPE_STRUCT_END)); + if (!e) { + m->poisoned = true; + return -ENOMEM; + } + + nindex = e - c->signature; + } + + /* Align contents to 8 byte boundary */ + if (!message_extend_body(m, 8, 0)) + return -ENOMEM; + + if (c->enclosing != SD_BUS_TYPE_ARRAY) + c->index = nindex; + + return 0; +} + +static int bus_message_open_dict_entry( + sd_bus_message *m, + struct bus_container *c, + const char *contents, + size_t *begin) { + + assert(m); + assert(c); + assert(contents); + assert(begin); + + if (!signature_is_pair(contents)) + return -EINVAL; + + if (c->enclosing != SD_BUS_TYPE_ARRAY) + return -ENXIO; + + if (c->signature && c->signature[c->index]) { + size_t l; + + l = strlen(contents); + + if (c->signature[c->index] != SD_BUS_TYPE_DICT_ENTRY_BEGIN || + !startswith(c->signature + c->index + 1, contents) || + c->signature[c->index + 1 + l] != SD_BUS_TYPE_DICT_ENTRY_END) + return -ENXIO; + } else + return -ENXIO; + + /* Align contents to 8 byte boundary */ + if (!message_extend_body(m, 8, 0)) + return -ENOMEM; + + return 0; +} + +_public_ int sd_bus_message_open_container( + sd_bus_message *m, + char type, + const char *contents) { + + struct bus_container *c; + uint32_t *array_size = NULL; + _cleanup_free_ char *signature = NULL; + size_t before, begin = 0; + int r; + + assert_return(m, -EINVAL); + assert_return(!m->sealed, -EPERM); + assert_return(contents, -EINVAL); + assert_return(!m->poisoned, -ESTALE); + + /* Make sure we have space for one more container */ + if (!GREEDY_REALLOC(m->containers, m->n_containers + 1)) { + m->poisoned = true; + return -ENOMEM; + } + + c = message_get_last_container(m); + + signature = strdup(contents); + if (!signature) { + m->poisoned = true; + return -ENOMEM; + } + + /* Save old index in the parent container, in case we have to + * abort this container */ + c->saved_index = c->index; + before = m->body_size; + + if (type == SD_BUS_TYPE_ARRAY) + r = bus_message_open_array(m, c, contents, &array_size, &begin); + else if (type == SD_BUS_TYPE_VARIANT) + r = bus_message_open_variant(m, c, contents); + else if (type == SD_BUS_TYPE_STRUCT) + r = bus_message_open_struct(m, c, contents, &begin); + else if (type == SD_BUS_TYPE_DICT_ENTRY) + r = bus_message_open_dict_entry(m, c, contents, &begin); + else + r = -EINVAL; + if (r < 0) + return r; + + /* OK, let's fill it in */ + m->containers[m->n_containers++] = (struct bus_container) { + .enclosing = type, + .signature = TAKE_PTR(signature), + .array_size = array_size, + .before = before, + .begin = begin, + }; + + return 0; +} + +_public_ int sd_bus_message_close_container(sd_bus_message *m) { + struct bus_container *c; + + assert_return(m, -EINVAL); + assert_return(!m->sealed, -EPERM); + assert_return(m->n_containers > 0, -EINVAL); + assert_return(!m->poisoned, -ESTALE); + + c = message_get_last_container(m); + + if (c->enclosing != SD_BUS_TYPE_ARRAY) + if (c->signature && c->signature[c->index] != 0) + return -EINVAL; + + m->n_containers--; + + free(c->signature); + + return 0; +} + +typedef struct { + const char *types; + unsigned n_struct; + unsigned n_array; +} TypeStack; + +static int type_stack_push(TypeStack *stack, unsigned max, unsigned *i, const char *types, unsigned n_struct, unsigned n_array) { + assert(stack); + assert(max > 0); + + if (*i >= max) + return -EINVAL; + + stack[*i].types = types; + stack[*i].n_struct = n_struct; + stack[*i].n_array = n_array; + (*i)++; + + return 0; +} + +static int type_stack_pop(TypeStack *stack, unsigned max, unsigned *i, const char **types, unsigned *n_struct, unsigned *n_array) { + assert(stack); + assert(max > 0); + assert(types); + assert(n_struct); + assert(n_array); + + if (*i <= 0) + return 0; + + (*i)--; + *types = stack[*i].types; + *n_struct = stack[*i].n_struct; + *n_array = stack[*i].n_array; + + return 1; +} + +_public_ int sd_bus_message_appendv( + sd_bus_message *m, + const char *types, + va_list ap) { + + unsigned n_array, n_struct; + TypeStack stack[BUS_CONTAINER_DEPTH]; + unsigned stack_ptr = 0; + int r; + + assert_return(m, -EINVAL); + assert_return(types, -EINVAL); + assert_return(!m->sealed, -EPERM); + assert_return(!m->poisoned, -ESTALE); + + n_array = UINT_MAX; + n_struct = strlen(types); + + for (;;) { + const char *t; + + if (n_array == 0 || (n_array == UINT_MAX && n_struct == 0)) { + r = type_stack_pop(stack, ELEMENTSOF(stack), &stack_ptr, &types, &n_struct, &n_array); + if (r < 0) + return r; + if (r == 0) + break; + + r = sd_bus_message_close_container(m); + if (r < 0) + return r; + + continue; + } + + t = types; + if (n_array != UINT_MAX) + n_array--; + else { + types++; + n_struct--; + } + + switch (*t) { + + case SD_BUS_TYPE_BYTE: { + uint8_t x; + + x = (uint8_t) va_arg(ap, int); + r = sd_bus_message_append_basic(m, *t, &x); + break; + } + + case SD_BUS_TYPE_BOOLEAN: + case SD_BUS_TYPE_INT32: + case SD_BUS_TYPE_UINT32: + case SD_BUS_TYPE_UNIX_FD: { + uint32_t x; + + /* We assume a boolean is the same as int32_t */ + assert_cc(sizeof(int32_t) == sizeof(int)); + + x = va_arg(ap, uint32_t); + r = sd_bus_message_append_basic(m, *t, &x); + break; + } + + case SD_BUS_TYPE_INT16: + case SD_BUS_TYPE_UINT16: { + uint16_t x; + + x = (uint16_t) va_arg(ap, int); + r = sd_bus_message_append_basic(m, *t, &x); + break; + } + + case SD_BUS_TYPE_INT64: + case SD_BUS_TYPE_UINT64: { + uint64_t x; + + x = va_arg(ap, uint64_t); + r = sd_bus_message_append_basic(m, *t, &x); + break; + } + + case SD_BUS_TYPE_DOUBLE: { + double x; + + x = va_arg(ap, double); + r = sd_bus_message_append_basic(m, *t, &x); + break; + } + + case SD_BUS_TYPE_STRING: + case SD_BUS_TYPE_OBJECT_PATH: + case SD_BUS_TYPE_SIGNATURE: { + const char *x; + + x = va_arg(ap, const char*); + r = sd_bus_message_append_basic(m, *t, x); + break; + } + + case SD_BUS_TYPE_ARRAY: { + size_t k; + + r = signature_element_length(t + 1, &k); + if (r < 0) + return r; + + { + char s[k + 1]; + memcpy(s, t + 1, k); + s[k] = 0; + + r = sd_bus_message_open_container(m, SD_BUS_TYPE_ARRAY, s); + if (r < 0) + return r; + } + + if (n_array == UINT_MAX) { + types += k; + n_struct -= k; + } + + r = type_stack_push(stack, ELEMENTSOF(stack), &stack_ptr, types, n_struct, n_array); + if (r < 0) + return r; + + types = t + 1; + n_struct = k; + n_array = va_arg(ap, unsigned); + + break; + } + + case SD_BUS_TYPE_VARIANT: { + const char *s; + + s = va_arg(ap, const char*); + if (!s) + return -EINVAL; + + r = sd_bus_message_open_container(m, SD_BUS_TYPE_VARIANT, s); + if (r < 0) + return r; + + r = type_stack_push(stack, ELEMENTSOF(stack), &stack_ptr, types, n_struct, n_array); + if (r < 0) + return r; + + types = s; + n_struct = strlen(s); + n_array = UINT_MAX; + + break; + } + + case SD_BUS_TYPE_STRUCT_BEGIN: + case SD_BUS_TYPE_DICT_ENTRY_BEGIN: { + size_t k; + + r = signature_element_length(t, &k); + if (r < 0) + return r; + if (k < 2) + return -ERANGE; + + { + char s[k - 1]; + + memcpy(s, t + 1, k - 2); + s[k - 2] = 0; + + r = sd_bus_message_open_container(m, *t == SD_BUS_TYPE_STRUCT_BEGIN ? SD_BUS_TYPE_STRUCT : SD_BUS_TYPE_DICT_ENTRY, s); + if (r < 0) + return r; + } + + if (n_array == UINT_MAX) { + types += k - 1; + n_struct -= k - 1; + } + + r = type_stack_push(stack, ELEMENTSOF(stack), &stack_ptr, types, n_struct, n_array); + if (r < 0) + return r; + + types = t + 1; + n_struct = k - 2; + n_array = UINT_MAX; + + break; + } + + default: + r = -EINVAL; + } + + if (r < 0) + return r; + } + + return 1; +} + +_public_ int sd_bus_message_append(sd_bus_message *m, const char *types, ...) { + va_list ap; + int r; + + va_start(ap, types); + r = sd_bus_message_appendv(m, types, ap); + va_end(ap); + + return r; +} + +_public_ int sd_bus_message_append_array_space( + sd_bus_message *m, + char type, + size_t size, + void **ptr) { + + ssize_t align, sz; + void *a; + int r; + + assert_return(m, -EINVAL); + assert_return(!m->sealed, -EPERM); + assert_return(bus_type_is_trivial(type) && type != SD_BUS_TYPE_BOOLEAN, -EINVAL); + assert_return(ptr || size == 0, -EINVAL); + assert_return(!m->poisoned, -ESTALE); + + align = bus_type_get_alignment(type); + sz = bus_type_get_size(type); + + assert_se(align > 0); + assert_se(sz > 0); + + if (size % sz != 0) + return -EINVAL; + + r = sd_bus_message_open_container(m, SD_BUS_TYPE_ARRAY, CHAR_TO_STR(type)); + if (r < 0) + return r; + + a = message_extend_body(m, align, size); + if (!a) + return -ENOMEM; + + r = sd_bus_message_close_container(m); + if (r < 0) + return r; + + *ptr = a; + return 0; +} + +_public_ int sd_bus_message_append_array( + sd_bus_message *m, + char type, + const void *ptr, + size_t size) { + int r; + void *p; + + assert_return(m, -EINVAL); + assert_return(!m->sealed, -EPERM); + assert_return(bus_type_is_trivial(type), -EINVAL); + assert_return(ptr || size == 0, -EINVAL); + assert_return(!m->poisoned, -ESTALE); + + r = sd_bus_message_append_array_space(m, type, size, &p); + if (r < 0) + return r; + + memcpy_safe(p, ptr, size); + + return 0; +} + +_public_ int sd_bus_message_append_array_iovec( + sd_bus_message *m, + char type, + const struct iovec *iov, + unsigned n /* should be size_t, but is API now… 😞 */) { + + size_t size; + unsigned i; + void *p; + int r; + + assert_return(m, -EINVAL); + assert_return(!m->sealed, -EPERM); + assert_return(bus_type_is_trivial(type), -EINVAL); + assert_return(iov || n == 0, -EINVAL); + assert_return(!m->poisoned, -ESTALE); + + size = iovec_total_size(iov, n); + + r = sd_bus_message_append_array_space(m, type, size, &p); + if (r < 0) + return r; + + for (i = 0; i < n; i++) { + + if (iov[i].iov_base) + memcpy(p, iov[i].iov_base, iov[i].iov_len); + else + memzero(p, iov[i].iov_len); + + p = (uint8_t*) p + iov[i].iov_len; + } + + return 0; +} + +_public_ int sd_bus_message_append_array_memfd( + sd_bus_message *m, + char type, + int memfd, + uint64_t offset, + uint64_t size) { + + _cleanup_close_ int copy_fd = -EBADF; + struct bus_body_part *part; + ssize_t align, sz; + uint64_t real_size; + void *a; + int r; + + assert_return(m, -EINVAL); + assert_return(memfd >= 0, -EBADF); + assert_return(bus_type_is_trivial(type), -EINVAL); + assert_return(size > 0, -EINVAL); + assert_return(!m->sealed, -EPERM); + assert_return(!m->poisoned, -ESTALE); + + r = memfd_set_sealed(memfd); + if (r < 0) + return r; + + copy_fd = fcntl(memfd, F_DUPFD_CLOEXEC, 3); + if (copy_fd < 0) + return copy_fd; + + r = memfd_get_size(memfd, &real_size); + if (r < 0) + return r; + + if (offset == 0 && size == UINT64_MAX) + size = real_size; + else if (offset + size > real_size) + return -EMSGSIZE; + + align = bus_type_get_alignment(type); + sz = bus_type_get_size(type); + + assert_se(align > 0); + assert_se(sz > 0); + + if (offset % align != 0) + return -EINVAL; + + if (size % sz != 0) + return -EINVAL; + + if (size > (uint64_t) UINT32_MAX) + return -EINVAL; + + r = sd_bus_message_open_container(m, SD_BUS_TYPE_ARRAY, CHAR_TO_STR(type)); + if (r < 0) + return r; + + a = message_extend_body(m, align, 0); + if (!a) + return -ENOMEM; + + part = message_append_part(m); + if (!part) + return -ENOMEM; + + part->memfd = copy_fd; + part->memfd_offset = offset; + part->sealed = true; + part->size = size; + copy_fd = -EBADF; + + m->body_size += size; + message_extend_containers(m, size); + + return sd_bus_message_close_container(m); +} + +_public_ int sd_bus_message_append_string_memfd( + sd_bus_message *m, + int memfd, + uint64_t offset, + uint64_t size) { + + _cleanup_close_ int copy_fd = -EBADF; + struct bus_body_part *part; + struct bus_container *c; + uint64_t real_size; + void *a; + int r; + + assert_return(m, -EINVAL); + assert_return(memfd >= 0, -EBADF); + assert_return(size > 0, -EINVAL); + assert_return(!m->sealed, -EPERM); + assert_return(!m->poisoned, -ESTALE); + + r = memfd_set_sealed(memfd); + if (r < 0) + return r; + + copy_fd = fcntl(memfd, FD_CLOEXEC, 3); + if (copy_fd < 0) + return copy_fd; + + r = memfd_get_size(memfd, &real_size); + if (r < 0) + return r; + + if (offset == 0 && size == UINT64_MAX) + size = real_size; + else if (offset + size > real_size) + return -EMSGSIZE; + + /* We require this to be NUL terminated */ + if (size == 0) + return -EINVAL; + + if (size > (uint64_t) UINT32_MAX) + return -EINVAL; + + c = message_get_last_container(m); + if (c->signature && c->signature[c->index]) { + /* Container signature is already set */ + + if (c->signature[c->index] != SD_BUS_TYPE_STRING) + return -ENXIO; + } else { + char *e; + + /* Maybe we can append to the signature? But only if this is the top-level container */ + if (c->enclosing != 0) + return -ENXIO; + + e = strextend(&c->signature, CHAR_TO_STR(SD_BUS_TYPE_STRING)); + if (!e) { + m->poisoned = true; + return -ENOMEM; + } + } + + a = message_extend_body(m, 4, 4); + if (!a) + return -ENOMEM; + + *(uint32_t*) a = size - 1; + + part = message_append_part(m); + if (!part) + return -ENOMEM; + + part->memfd = copy_fd; + part->memfd_offset = offset; + part->sealed = true; + part->size = size; + copy_fd = -EBADF; + + m->body_size += size; + message_extend_containers(m, size); + + if (c->enclosing != SD_BUS_TYPE_ARRAY) + c->index++; + + return 0; +} + +_public_ int sd_bus_message_append_strv(sd_bus_message *m, char **l) { + int r; + + assert_return(m, -EINVAL); + assert_return(!m->sealed, -EPERM); + assert_return(!m->poisoned, -ESTALE); + + r = sd_bus_message_open_container(m, 'a', "s"); + if (r < 0) + return r; + + STRV_FOREACH(i, l) { + r = sd_bus_message_append_basic(m, 's', *i); + if (r < 0) + return r; + } + + return sd_bus_message_close_container(m); +} + +static int bus_message_close_header(sd_bus_message *m) { + assert(m); + + /* The actual user data is finished now, we just complete the variant and struct now. Remember + * this position, so that during parsing we know where to put the outer container end. */ + m->user_body_size = m->body_size; + + m->header->fields_size = m->fields_size; + m->header->body_size = m->body_size; + + return 0; +} + +_public_ int sd_bus_message_seal(sd_bus_message *m, uint64_t cookie, uint64_t timeout_usec) { + struct bus_body_part *part; + size_t a; + unsigned i; + int r; + + assert_return(m, -EINVAL); + + if (m->sealed) + return -EPERM; + + if (m->n_containers > 0) + return -EBADMSG; + + if (m->poisoned) + return -ESTALE; + + if (cookie > UINT32_MAX) + return -EOPNOTSUPP; + + /* In vtables the return signature of method calls is listed, + * let's check if they match if this is a response */ + if (m->header->type == SD_BUS_MESSAGE_METHOD_RETURN && + m->enforced_reply_signature && + !streq(strempty(m->root_container.signature), m->enforced_reply_signature)) + return -ENOMSG; + + /* If there's a non-trivial signature set, then add it in here */ + if (!isempty(m->root_container.signature)) { + r = message_append_field_signature(m, BUS_MESSAGE_HEADER_SIGNATURE, m->root_container.signature, NULL); + if (r < 0) + return r; + } + + if (m->n_fds > 0) { + r = message_append_field_uint32(m, BUS_MESSAGE_HEADER_UNIX_FDS, m->n_fds); + if (r < 0) + return r; + } + + r = bus_message_close_header(m); + if (r < 0) + return r; + + m->header->serial = (uint32_t) cookie; + + m->timeout = m->header->flags & BUS_MESSAGE_NO_REPLY_EXPECTED ? 0 : timeout_usec; + + /* Add padding at the end of the fields part, since we know + * the body needs to start at an 8 byte alignment. We made + * sure we allocated enough space for this, so all we need to + * do here is to zero it out. */ + a = ALIGN8(m->fields_size) - m->fields_size; + if (a > 0) + memzero((uint8_t*) BUS_MESSAGE_FIELDS(m) + m->fields_size, a); + + /* If this is something we can send as memfd, then let's seal + the memfd now. Note that we can send memfds as payload only + for directed messages, and not for broadcasts. */ + if (m->destination && m->bus->use_memfd) { + MESSAGE_FOREACH_PART(part, i, m) + if (part->memfd >= 0 && + !part->sealed && + (part->size > MEMFD_MIN_SIZE || m->bus->use_memfd < 0) && + part != m->body_end) { /* The last part may never be sent as memfd */ + uint64_t sz; + + /* Try to seal it if that makes + * sense. First, unmap our own map to + * make sure we don't keep it busy. */ + bus_body_part_unmap(part); + + /* Then, sync up real memfd size */ + sz = part->size; + r = memfd_set_size(part->memfd, sz); + if (r < 0) + return r; + + /* Finally, try to seal */ + if (memfd_set_sealed(part->memfd) >= 0) + part->sealed = true; + } + } + + m->root_container.end = m->user_body_size; + m->root_container.index = 0; + + m->sealed = true; + + return 0; +} + +int bus_body_part_map(struct bus_body_part *part) { + void *p; + size_t psz, shift; + + assert_se(part); + + if (part->data) + return 0; + + if (part->size <= 0) + return 0; + + /* For smaller zero parts (as used for padding) we don't need to map anything... */ + if (part->memfd < 0 && part->is_zero && part->size < 8) { + static const uint8_t zeroes[7] = { }; + part->data = (void*) zeroes; + return 0; + } + + shift = PAGE_OFFSET(part->memfd_offset); + psz = PAGE_ALIGN(part->size + shift); + if (psz >= SIZE_MAX) + return -EFBIG; + + if (part->memfd >= 0) + p = mmap(NULL, psz, PROT_READ, MAP_PRIVATE, part->memfd, part->memfd_offset - shift); + else if (part->is_zero) + p = mmap(NULL, psz, PROT_READ, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); + else + return -EINVAL; + + if (p == MAP_FAILED) + return -errno; + + part->mapped = psz; + part->mmap_begin = p; + part->data = (uint8_t*) p + shift; + part->munmap_this = true; + + return 0; +} + +void bus_body_part_unmap(struct bus_body_part *part) { + + assert_se(part); + + if (part->memfd < 0) + return; + + if (!part->mmap_begin) + return; + + if (!part->munmap_this) + return; + + assert_se(munmap(part->mmap_begin, part->mapped) == 0); + + part->mmap_begin = NULL; + part->data = NULL; + part->mapped = 0; + part->munmap_this = false; + + return; +} + +static bool message_end_of_signature(sd_bus_message *m) { + struct bus_container *c; + + assert(m); + + c = message_get_last_container(m); + return !c->signature || c->signature[c->index] == 0; +} + +static bool message_end_of_array(sd_bus_message *m, size_t index) { + struct bus_container *c; + + assert(m); + + c = message_get_last_container(m); + if (c->enclosing != SD_BUS_TYPE_ARRAY) + return false; + + assert(c->array_size); + return index >= c->begin + BUS_MESSAGE_BSWAP32(m, *c->array_size); +} + +_public_ int sd_bus_message_at_end(sd_bus_message *m, int complete) { + assert_return(m, -EINVAL); + assert_return(m->sealed, -EPERM); + + if (complete && m->n_containers > 0) + return false; + + if (message_end_of_signature(m)) + return true; + + if (message_end_of_array(m, m->rindex)) + return true; + + return false; +} + +static struct bus_body_part* find_part(sd_bus_message *m, size_t index, size_t sz, void **p) { + struct bus_body_part *part; + size_t begin; + int r; + + assert(m); + + if (m->cached_rindex_part && index >= m->cached_rindex_part_begin) { + part = m->cached_rindex_part; + begin = m->cached_rindex_part_begin; + } else { + part = &m->body; + begin = 0; + } + + while (part) { + if (index < begin) + return NULL; + + if (index + sz <= begin + part->size) { + + r = bus_body_part_map(part); + if (r < 0) + return NULL; + + if (p) + *p = part->data ? (uint8_t*) part->data + index - begin + : NULL; /* Avoid dereferencing a NULL pointer. */ + + m->cached_rindex_part = part; + m->cached_rindex_part_begin = begin; + + return part; + } + + begin += part->size; + part = part->next; + } + + return NULL; +} + +static int message_peek_body( + sd_bus_message *m, + size_t *rindex, + size_t align, + size_t nbytes, + void **ret) { + + size_t k, start, end, padding; + struct bus_body_part *part; + uint8_t *q; + + assert(m); + assert(rindex); + assert(align > 0); + + start = ALIGN_TO(*rindex, align); + if (start > m->user_body_size) + return -EBADMSG; + + padding = start - *rindex; + + /* Avoid overflow below */ + if (nbytes > SIZE_MAX - start) + return -EBADMSG; + + end = start + nbytes; + if (end > m->user_body_size) + return -EBADMSG; + + part = find_part(m, *rindex, padding, (void**) &q); + if (!part) + return -EBADMSG; + + if (q) { + /* Verify padding */ + for (k = 0; k < padding; k++) + if (q[k] != 0) + return -EBADMSG; + } + + part = find_part(m, start, nbytes, (void**) &q); + if (!part || (nbytes > 0 && !q)) + return -EBADMSG; + + *rindex = end; + + if (ret) + *ret = q; + + return 0; +} + +static bool validate_nul(const char *s, size_t l) { + + /* Check for NUL chars in the string */ + if (memchr(s, 0, l)) + return false; + + /* Check for NUL termination */ + if (s[l] != 0) + return false; + + return true; +} + +static bool validate_string(const char *s, size_t l) { + + if (!validate_nul(s, l)) + return false; + + /* Check if valid UTF8 */ + if (!utf8_is_valid(s)) + return false; + + return true; +} + +static bool validate_signature(const char *s, size_t l) { + + if (!validate_nul(s, l)) + return false; + + /* Check if valid signature */ + if (!signature_is_valid(s, true)) + return false; + + return true; +} + +static bool validate_object_path(const char *s, size_t l) { + + if (!validate_nul(s, l)) + return false; + + if (!object_path_is_valid(s)) + return false; + + return true; +} + +_public_ int sd_bus_message_read_basic(sd_bus_message *m, char type, void *p) { + struct bus_container *c; + size_t rindex; + void *q; + int r; + + assert_return(m, -EINVAL); + assert_return(m->sealed, -EPERM); + assert_return(bus_type_is_basic(type), -EINVAL); + + if (message_end_of_signature(m)) + return -ENXIO; + + if (message_end_of_array(m, m->rindex)) + return 0; + + c = message_get_last_container(m); + if (c->signature[c->index] != type) + return -ENXIO; + + rindex = m->rindex; + + if (IN_SET(type, SD_BUS_TYPE_STRING, SD_BUS_TYPE_OBJECT_PATH)) { + uint32_t l; + bool ok; + + r = message_peek_body(m, &rindex, 4, 4, &q); + if (r < 0) + return r; + + l = BUS_MESSAGE_BSWAP32(m, *(uint32_t*) q); + if (l == UINT32_MAX) + /* avoid overflow right below */ + return -EBADMSG; + + r = message_peek_body(m, &rindex, 1, l+1, &q); + if (r < 0) + return r; + + if (type == SD_BUS_TYPE_OBJECT_PATH) + ok = validate_object_path(q, l); + else + ok = validate_string(q, l); + if (!ok) + return -EBADMSG; + + if (p) + *(const char**) p = q; + + } else if (type == SD_BUS_TYPE_SIGNATURE) { + uint8_t l; + + r = message_peek_body(m, &rindex, 1, 1, &q); + if (r < 0) + return r; + + l = *(uint8_t*) q; + if (l == UINT8_MAX) + /* avoid overflow right below */ + return -EBADMSG; + + r = message_peek_body(m, &rindex, 1, l+1, &q); + if (r < 0) + return r; + + if (!validate_signature(q, l)) + return -EBADMSG; + + if (p) + *(const char**) p = q; + + } else { + ssize_t sz, align; + + align = bus_type_get_alignment(type); + assert(align > 0); + + sz = bus_type_get_size(type); + assert(sz > 0); + + r = message_peek_body(m, &rindex, align, sz, &q); + if (r < 0) + return r; + + switch (type) { + + case SD_BUS_TYPE_BYTE: + if (p) + *(uint8_t*) p = *(uint8_t*) q; + break; + + case SD_BUS_TYPE_BOOLEAN: + if (p) + *(int*) p = !!*(uint32_t*) q; + break; + + case SD_BUS_TYPE_INT16: + case SD_BUS_TYPE_UINT16: + if (p) + *(uint16_t*) p = BUS_MESSAGE_BSWAP16(m, *(uint16_t*) q); + break; + + case SD_BUS_TYPE_INT32: + case SD_BUS_TYPE_UINT32: + if (p) + *(uint32_t*) p = BUS_MESSAGE_BSWAP32(m, *(uint32_t*) q); + break; + + case SD_BUS_TYPE_INT64: + case SD_BUS_TYPE_UINT64: + case SD_BUS_TYPE_DOUBLE: + if (p) + *(uint64_t*) p = BUS_MESSAGE_BSWAP64(m, *(uint64_t*) q); + break; + + case SD_BUS_TYPE_UNIX_FD: { + uint32_t j; + + j = BUS_MESSAGE_BSWAP32(m, *(uint32_t*) q); + if (j >= m->n_fds) + return -EBADMSG; + + if (p) + *(int*) p = m->fds[j]; + break; + } + + default: + assert_not_reached(); + } + } + + m->rindex = rindex; + + if (c->enclosing != SD_BUS_TYPE_ARRAY) + c->index++; + + return 1; +} + +static int bus_message_enter_array( + sd_bus_message *m, + struct bus_container *c, + const char *contents, + uint32_t **array_size) { + + size_t rindex; + void *q; + int alignment, r; + + assert(m); + assert(c); + assert(contents); + assert(array_size); + + if (!signature_is_single(contents, true)) + return -EINVAL; + + if (!c->signature || c->signature[c->index] == 0) + return -ENXIO; + + if (c->signature[c->index] != SD_BUS_TYPE_ARRAY) + return -ENXIO; + + if (!startswith(c->signature + c->index + 1, contents)) + return -ENXIO; + + rindex = m->rindex; + + r = message_peek_body(m, &rindex, 4, 4, &q); + if (r < 0) + return r; + + if (BUS_MESSAGE_BSWAP32(m, *(uint32_t*) q) > BUS_ARRAY_MAX_SIZE) + return -EBADMSG; + + alignment = bus_type_get_alignment(contents[0]); + if (alignment < 0) + return alignment; + + r = message_peek_body(m, &rindex, alignment, 0, NULL); + if (r < 0) + return r; + + *array_size = (uint32_t*) q; + + m->rindex = rindex; + + if (c->enclosing != SD_BUS_TYPE_ARRAY) + c->index += 1 + strlen(contents); + + return 1; +} + +static int bus_message_enter_variant( + sd_bus_message *m, + struct bus_container *c, + const char *contents) { + + size_t rindex; + uint8_t l; + void *q; + int r; + + assert(m); + assert(c); + assert(contents); + + if (!signature_is_single(contents, false)) + return -EINVAL; + + if (*contents == SD_BUS_TYPE_DICT_ENTRY_BEGIN) + return -EINVAL; + + if (!c->signature || c->signature[c->index] == 0) + return -ENXIO; + + if (c->signature[c->index] != SD_BUS_TYPE_VARIANT) + return -ENXIO; + + rindex = m->rindex; + + r = message_peek_body(m, &rindex, 1, 1, &q); + if (r < 0) + return r; + + l = *(uint8_t*) q; + if (l == UINT8_MAX) + /* avoid overflow right below */ + return -EBADMSG; + + r = message_peek_body(m, &rindex, 1, l+1, &q); + if (r < 0) + return r; + + if (!validate_signature(q, l)) + return -EBADMSG; + + if (!streq(q, contents)) + return -ENXIO; + + m->rindex = rindex; + + if (c->enclosing != SD_BUS_TYPE_ARRAY) + c->index++; + + return 1; +} + +static int bus_message_enter_struct( + sd_bus_message *m, + struct bus_container *c, + const char *contents) { + + size_t l; + int r; + + assert(m); + assert(c); + assert(contents); + + if (!signature_is_valid(contents, false)) + return -EINVAL; + + if (!c->signature || c->signature[c->index] == 0) + return -ENXIO; + + l = strlen(contents); + + if (c->signature[c->index] != SD_BUS_TYPE_STRUCT_BEGIN || + !startswith(c->signature + c->index + 1, contents) || + c->signature[c->index + 1 + l] != SD_BUS_TYPE_STRUCT_END) + return -ENXIO; + + r = message_peek_body(m, &m->rindex, 8, 0, NULL); + if (r < 0) + return r; + + if (c->enclosing != SD_BUS_TYPE_ARRAY) + c->index += 1 + l + 1; + + return 1; +} + +static int bus_message_enter_dict_entry( + sd_bus_message *m, + struct bus_container *c, + const char *contents) { + + size_t l; + int r; + + assert(m); + assert(c); + assert(contents); + + if (!signature_is_pair(contents)) + return -EINVAL; + + if (c->enclosing != SD_BUS_TYPE_ARRAY) + return -ENXIO; + + if (!c->signature || c->signature[c->index] == 0) + return 0; + + l = strlen(contents); + + if (c->signature[c->index] != SD_BUS_TYPE_DICT_ENTRY_BEGIN || + !startswith(c->signature + c->index + 1, contents) || + c->signature[c->index + 1 + l] != SD_BUS_TYPE_DICT_ENTRY_END) + return -ENXIO; + + r = message_peek_body(m, &m->rindex, 8, 0, NULL); + if (r < 0) + return r; + + if (c->enclosing != SD_BUS_TYPE_ARRAY) + c->index += 1 + l + 1; + + return 1; +} + +_public_ int sd_bus_message_enter_container(sd_bus_message *m, + char type, + const char *contents) { + struct bus_container *c; + uint32_t *array_size = NULL; + _cleanup_free_ char *signature = NULL; + size_t before; + int r; + + assert_return(m, -EINVAL); + assert_return(m->sealed, -EPERM); + assert_return(type != 0 || !contents, -EINVAL); + + if (type == 0 || !contents) { + const char *cc; + char tt; + + /* Allow entering into anonymous containers */ + r = sd_bus_message_peek_type(m, &tt, &cc); + if (r < 0) + return r; + + if (type != 0 && type != tt) + return -ENXIO; + + if (contents && !streq(contents, cc)) + return -ENXIO; + + type = tt; + contents = cc; + } + + /* + * We enforce a global limit on container depth, that is much + * higher than the 32 structs and 32 arrays the specification + * mandates. This is simpler to implement for us, and we need + * this only to ensure our container array doesn't grow + * without bounds. We are happy to return any data from a + * message as long as the data itself is valid, even if the + * overall message might be not. + * + * Note that the message signature is validated when + * parsing the headers, and that validation does check the + * 32/32 limit. + * + * Note that the specification defines no limits on the depth + * of stacked variants, but we do. + */ + if (m->n_containers >= BUS_CONTAINER_DEPTH) + return -EBADMSG; + + if (!GREEDY_REALLOC(m->containers, m->n_containers + 1)) + return -ENOMEM; + + if (message_end_of_signature(m)) + return -ENXIO; + + if (message_end_of_array(m, m->rindex)) + return 0; + + c = message_get_last_container(m); + + signature = strdup(contents); + if (!signature) + return -ENOMEM; + + c->saved_index = c->index; + before = m->rindex; + + if (type == SD_BUS_TYPE_ARRAY) + r = bus_message_enter_array(m, c, contents, &array_size); + else if (type == SD_BUS_TYPE_VARIANT) + r = bus_message_enter_variant(m, c, contents); + else if (type == SD_BUS_TYPE_STRUCT) + r = bus_message_enter_struct(m, c, contents); + else if (type == SD_BUS_TYPE_DICT_ENTRY) + r = bus_message_enter_dict_entry(m, c, contents); + else + r = -EINVAL; + if (r <= 0) + return r; + + /* OK, let's fill it in */ + m->containers[m->n_containers++] = (struct bus_container) { + .enclosing = type, + .signature = TAKE_PTR(signature), + + .before = before, + .begin = m->rindex, + /* Unary type has fixed size of 1, but virtual size of 0 */ + .end = m->rindex, + .array_size = array_size, + }; + + return 1; +} + +_public_ int sd_bus_message_exit_container(sd_bus_message *m) { + struct bus_container *c; + + assert_return(m, -EINVAL); + assert_return(m->sealed, -EPERM); + assert_return(m->n_containers > 0, -ENXIO); + + c = message_get_last_container(m); + + if (c->enclosing != SD_BUS_TYPE_ARRAY) { + if (c->signature && c->signature[c->index] != 0) + return -EBUSY; + } + + if (c->enclosing == SD_BUS_TYPE_ARRAY) { + uint32_t l; + + l = BUS_MESSAGE_BSWAP32(m, *c->array_size); + if (c->begin + l != m->rindex) + return -EBUSY; + } + + message_free_last_container(m); + + return 1; +} + +static void message_quit_container(sd_bus_message *m) { + struct bus_container *c; + + assert(m); + assert(m->sealed); + assert(m->n_containers > 0); + + /* Undo seeks */ + c = message_get_last_container(m); + assert(m->rindex >= c->before); + m->rindex = c->before; + + /* Free container */ + message_free_last_container(m); + + /* Correct index of new top-level container */ + c = message_get_last_container(m); + c->index = c->saved_index; +} + +_public_ int sd_bus_message_peek_type(sd_bus_message *m, char *type, const char **contents) { + struct bus_container *c; + int r; + + assert_return(m, -EINVAL); + assert_return(m->sealed, -EPERM); + + if (message_end_of_signature(m)) + goto eof; + + if (message_end_of_array(m, m->rindex)) + goto eof; + + c = message_get_last_container(m); + + if (bus_type_is_basic(c->signature[c->index])) { + if (contents) + *contents = NULL; + if (type) + *type = c->signature[c->index]; + return 1; + } + + if (c->signature[c->index] == SD_BUS_TYPE_ARRAY) { + + if (contents) { + size_t l; + + r = signature_element_length(c->signature+c->index+1, &l); + if (r < 0) + return r; + + /* signature_element_length does verification internally */ + + /* The array element must not be empty */ + assert(l >= 1); + if (free_and_strndup(&c->peeked_signature, + c->signature + c->index + 1, l) < 0) + return -ENOMEM; + + *contents = c->peeked_signature; + } + + if (type) + *type = SD_BUS_TYPE_ARRAY; + + return 1; + } + + if (IN_SET(c->signature[c->index], SD_BUS_TYPE_STRUCT_BEGIN, SD_BUS_TYPE_DICT_ENTRY_BEGIN)) { + + if (contents) { + size_t l; + + r = signature_element_length(c->signature+c->index, &l); + if (r < 0) + return r; + + assert(l >= 3); + if (free_and_strndup(&c->peeked_signature, + c->signature + c->index + 1, l - 2) < 0) + return -ENOMEM; + + *contents = c->peeked_signature; + } + + if (type) + *type = c->signature[c->index] == SD_BUS_TYPE_STRUCT_BEGIN ? SD_BUS_TYPE_STRUCT : SD_BUS_TYPE_DICT_ENTRY; + + return 1; + } + + if (c->signature[c->index] == SD_BUS_TYPE_VARIANT) { + if (contents) { + size_t rindex, l; + void *q; + + rindex = m->rindex; + r = message_peek_body(m, &rindex, 1, 1, &q); + if (r < 0) + return r; + + l = *(uint8_t*) q; + if (l == UINT8_MAX) + /* avoid overflow right below */ + return -EBADMSG; + + r = message_peek_body(m, &rindex, 1, l+1, &q); + if (r < 0) + return r; + + if (!validate_signature(q, l)) + return -EBADMSG; + + *contents = q; + } + + if (type) + *type = SD_BUS_TYPE_VARIANT; + + return 1; + } + + return -EINVAL; + +eof: + if (type) + *type = 0; + if (contents) + *contents = NULL; + return 0; +} + +_public_ int sd_bus_message_rewind(sd_bus_message *m, int complete) { + struct bus_container *c; + + assert_return(m, -EINVAL); + assert_return(m->sealed, -EPERM); + + if (complete) { + message_reset_containers(m); + m->rindex = 0; + + c = message_get_last_container(m); + } else { + c = message_get_last_container(m); + + c->index = 0; + m->rindex = c->begin; + } + + return !isempty(c->signature); +} + +_public_ int sd_bus_message_readv( + sd_bus_message *m, + const char *types, + va_list ap) { + + unsigned n_array, n_struct; + TypeStack stack[BUS_CONTAINER_DEPTH]; + unsigned stack_ptr = 0; + unsigned n_loop = 0; + int r; + + assert_return(m, -EINVAL); + assert_return(m->sealed, -EPERM); + assert_return(types, -EINVAL); + + if (isempty(types)) + return 0; + + /* Ideally, we'd just call ourselves recursively on every + * complex type. However, the state of a va_list that is + * passed to a function is undefined after that function + * returns. This means we need to decode the va_list linearly + * in a single stackframe. We hence implement our own + * home-grown stack in an array. */ + + n_array = UINT_MAX; /* length of current array entries */ + n_struct = strlen(types); /* length of current struct contents signature */ + + for (;;) { + const char *t; + + n_loop++; + + if (n_array == 0 || (n_array == UINT_MAX && n_struct == 0)) { + r = type_stack_pop(stack, ELEMENTSOF(stack), &stack_ptr, &types, &n_struct, &n_array); + if (r < 0) + return r; + if (r == 0) + break; + + r = sd_bus_message_exit_container(m); + if (r < 0) + return r; + + continue; + } + + t = types; + if (n_array != UINT_MAX) + n_array--; + else { + types++; + n_struct--; + } + + switch (*t) { + + case SD_BUS_TYPE_BYTE: + case SD_BUS_TYPE_BOOLEAN: + case SD_BUS_TYPE_INT16: + case SD_BUS_TYPE_UINT16: + case SD_BUS_TYPE_INT32: + case SD_BUS_TYPE_UINT32: + case SD_BUS_TYPE_INT64: + case SD_BUS_TYPE_UINT64: + case SD_BUS_TYPE_DOUBLE: + case SD_BUS_TYPE_STRING: + case SD_BUS_TYPE_OBJECT_PATH: + case SD_BUS_TYPE_SIGNATURE: + case SD_BUS_TYPE_UNIX_FD: { + void *p; + + p = va_arg(ap, void*); + r = sd_bus_message_read_basic(m, *t, p); + if (r < 0) + return r; + if (r == 0) { + if (n_loop <= 1) + return 0; + + return -ENXIO; + } + + break; + } + + case SD_BUS_TYPE_ARRAY: { + size_t k; + + r = signature_element_length(t + 1, &k); + if (r < 0) + return r; + + { + char s[k + 1]; + memcpy(s, t + 1, k); + s[k] = 0; + + r = sd_bus_message_enter_container(m, SD_BUS_TYPE_ARRAY, s); + if (r < 0) + return r; + if (r == 0) { + if (n_loop <= 1) + return 0; + + return -ENXIO; + } + } + + if (n_array == UINT_MAX) { + types += k; + n_struct -= k; + } + + r = type_stack_push(stack, ELEMENTSOF(stack), &stack_ptr, types, n_struct, n_array); + if (r < 0) + return r; + + types = t + 1; + n_struct = k; + n_array = va_arg(ap, unsigned); + + break; + } + + case SD_BUS_TYPE_VARIANT: { + const char *s; + + s = va_arg(ap, const char *); + if (!s) + return -EINVAL; + + r = sd_bus_message_enter_container(m, SD_BUS_TYPE_VARIANT, s); + if (r < 0) + return r; + if (r == 0) { + if (n_loop <= 1) + return 0; + + return -ENXIO; + } + + r = type_stack_push(stack, ELEMENTSOF(stack), &stack_ptr, types, n_struct, n_array); + if (r < 0) + return r; + + types = s; + n_struct = strlen(s); + n_array = UINT_MAX; + + break; + } + + case SD_BUS_TYPE_STRUCT_BEGIN: + case SD_BUS_TYPE_DICT_ENTRY_BEGIN: { + size_t k; + + r = signature_element_length(t, &k); + if (r < 0) + return r; + if (k < 2) + return -ERANGE; + + { + char s[k - 1]; + memcpy(s, t + 1, k - 2); + s[k - 2] = 0; + + r = sd_bus_message_enter_container(m, *t == SD_BUS_TYPE_STRUCT_BEGIN ? SD_BUS_TYPE_STRUCT : SD_BUS_TYPE_DICT_ENTRY, s); + if (r < 0) + return r; + if (r == 0) { + if (n_loop <= 1) + return 0; + return -ENXIO; + } + } + + if (n_array == UINT_MAX) { + types += k - 1; + n_struct -= k - 1; + } + + r = type_stack_push(stack, ELEMENTSOF(stack), &stack_ptr, types, n_struct, n_array); + if (r < 0) + return r; + + types = t + 1; + n_struct = k - 2; + n_array = UINT_MAX; + + break; + } + + default: + return -EINVAL; + } + } + + return 1; +} + +_public_ int sd_bus_message_read(sd_bus_message *m, const char *types, ...) { + va_list ap; + int r; + + va_start(ap, types); + r = sd_bus_message_readv(m, types, ap); + va_end(ap); + + return r; +} + +_public_ int sd_bus_message_skip(sd_bus_message *m, const char *types) { + int r; + + assert_return(m, -EINVAL); + assert_return(m->sealed, -EPERM); + + /* If types is NULL, read exactly one element */ + if (!types) { + struct bus_container *c; + size_t l; + + if (message_end_of_signature(m)) + return -ENXIO; + + if (message_end_of_array(m, m->rindex)) + return 0; + + c = message_get_last_container(m); + + r = signature_element_length(c->signature + c->index, &l); + if (r < 0) + return r; + + types = strndupa_safe(c->signature + c->index, l); + } + + switch (*types) { + + case 0: /* Nothing to drop */ + return 0; + + case SD_BUS_TYPE_BYTE: + case SD_BUS_TYPE_BOOLEAN: + case SD_BUS_TYPE_INT16: + case SD_BUS_TYPE_UINT16: + case SD_BUS_TYPE_INT32: + case SD_BUS_TYPE_UINT32: + case SD_BUS_TYPE_INT64: + case SD_BUS_TYPE_UINT64: + case SD_BUS_TYPE_DOUBLE: + case SD_BUS_TYPE_STRING: + case SD_BUS_TYPE_OBJECT_PATH: + case SD_BUS_TYPE_SIGNATURE: + case SD_BUS_TYPE_UNIX_FD: + + r = sd_bus_message_read_basic(m, *types, NULL); + if (r <= 0) + return r; + + r = sd_bus_message_skip(m, types + 1); + if (r < 0) + return r; + + return 1; + + case SD_BUS_TYPE_ARRAY: { + size_t k; + + r = signature_element_length(types + 1, &k); + if (r < 0) + return r; + + { + char s[k+1]; + memcpy(s, types+1, k); + s[k] = 0; + + r = sd_bus_message_enter_container(m, SD_BUS_TYPE_ARRAY, s); + if (r <= 0) + return r; + + for (;;) { + r = sd_bus_message_skip(m, s); + if (r < 0) + return r; + if (r == 0) + break; + } + + r = sd_bus_message_exit_container(m); + if (r < 0) + return r; + } + + r = sd_bus_message_skip(m, types + 1 + k); + if (r < 0) + return r; + + return 1; + } + + case SD_BUS_TYPE_VARIANT: { + const char *contents; + char x; + + r = sd_bus_message_peek_type(m, &x, &contents); + if (r <= 0) + return r; + + if (x != SD_BUS_TYPE_VARIANT) + return -ENXIO; + + r = sd_bus_message_enter_container(m, SD_BUS_TYPE_VARIANT, contents); + if (r <= 0) + return r; + + r = sd_bus_message_skip(m, contents); + if (r < 0) + return r; + assert(r != 0); + + r = sd_bus_message_exit_container(m); + if (r < 0) + return r; + + r = sd_bus_message_skip(m, types + 1); + if (r < 0) + return r; + + return 1; + } + + case SD_BUS_TYPE_STRUCT_BEGIN: + case SD_BUS_TYPE_DICT_ENTRY_BEGIN: { + size_t k; + + r = signature_element_length(types, &k); + if (r < 0) + return r; + if (k < 2) + return -ERANGE; + + { + char s[k-1]; + memcpy(s, types+1, k-2); + s[k-2] = 0; + + r = sd_bus_message_enter_container(m, *types == SD_BUS_TYPE_STRUCT_BEGIN ? SD_BUS_TYPE_STRUCT : SD_BUS_TYPE_DICT_ENTRY, s); + if (r <= 0) + return r; + + r = sd_bus_message_skip(m, s); + if (r < 0) + return r; + + r = sd_bus_message_exit_container(m); + if (r < 0) + return r; + } + + r = sd_bus_message_skip(m, types + k); + if (r < 0) + return r; + + return 1; + } + + default: + return -EINVAL; + } +} + +_public_ int sd_bus_message_read_array( + sd_bus_message *m, + char type, + const void **ptr, + size_t *size) { + + struct bus_container *c; + void *p; + size_t sz; + ssize_t align; + int r; + + assert_return(m, -EINVAL); + assert_return(m->sealed, -EPERM); + assert_return(bus_type_is_trivial(type), -EINVAL); + assert_return(ptr, -EINVAL); + assert_return(size, -EINVAL); + assert_return(!BUS_MESSAGE_NEED_BSWAP(m), -EOPNOTSUPP); + + r = sd_bus_message_enter_container(m, SD_BUS_TYPE_ARRAY, CHAR_TO_STR(type)); + if (r < 0) + return r; + if (r == 0) { + *ptr = NULL; + *size = 0; + return 0; + } + + c = message_get_last_container(m); + + align = bus_type_get_alignment(type); + if (align < 0) + return align; + + sz = BUS_MESSAGE_BSWAP32(m, *c->array_size); + + if (sz == 0) + /* Zero length array, let's return some aligned + * pointer that is not NULL */ + p = (uint8_t*) align; + else { + r = message_peek_body(m, &m->rindex, align, sz, &p); + if (r < 0) + goto fail; + } + + r = sd_bus_message_exit_container(m); + if (r < 0) + goto fail; + + *ptr = (const void*) p; + *size = sz; + + return 1; + +fail: + message_quit_container(m); + return r; +} + +static int message_peek_fields( + sd_bus_message *m, + size_t *rindex, + size_t align, + size_t nbytes, + void **ret) { + + size_t start, end; + + assert(m); + assert(rindex); + assert(align > 0); + + start = ALIGN_TO(*rindex, align); + if (start > m->fields_size) + return -EBADMSG; + + /* Avoid overflow below */ + if (nbytes > SIZE_MAX - start) + return -EBADMSG; + + end = start + nbytes; + if (end > m->fields_size) + return -EBADMSG; + + /* Verify that padding is 0 */ + uint8_t *p = BUS_MESSAGE_FIELDS(m); + for (size_t k = *rindex; k < start; k++) + if (p[k] != 0) + return -EBADMSG; + + if (ret) + *ret = p + start; + + *rindex = end; + return 1; +} + +static int message_peek_field_uint32( + sd_bus_message *m, + size_t *ri, + size_t item_size, + uint32_t *ret) { + + int r; + void *q; + + assert(m); + assert(ri); + + r = message_peek_fields(m, ri, 4, 4, &q); + if (r < 0) + return r; + + if (ret) + *ret = BUS_MESSAGE_BSWAP32(m, *(uint32_t*) q); + + return 0; +} + +static int message_peek_field_string( + sd_bus_message *m, + bool (*validate)(const char *p), + size_t *ri, + size_t item_size, + const char **ret) { + + uint32_t l; + int r; + void *q; + + assert(m); + assert(ri); + + r = message_peek_field_uint32(m, ri, 4, &l); + if (r < 0) + return r; + + if (l == UINT32_MAX) + /* avoid overflow right below */ + return -EBADMSG; + + r = message_peek_fields(m, ri, 1, l+1, &q); + if (r < 0) + return r; + + if (validate) { + if (!validate_nul(q, l)) + return -EBADMSG; + + if (!validate(q)) + return -EBADMSG; + } else { + if (!validate_string(q, l)) + return -EBADMSG; + } + + if (ret) + *ret = q; + + return 0; +} + +static int message_peek_field_signature( + sd_bus_message *m, + size_t *ri, + size_t item_size, + const char **ret) { + + size_t l; + int r; + void *q; + + assert(m); + assert(ri); + + r = message_peek_fields(m, ri, 1, 1, &q); + if (r < 0) + return r; + + l = *(uint8_t*) q; + if (l == UINT8_MAX) + /* avoid overflow right below */ + return -EBADMSG; + + r = message_peek_fields(m, ri, 1, l+1, &q); + if (r < 0) + return r; + + if (!validate_signature(q, l)) + return -EBADMSG; + + if (ret) + *ret = q; + + return 0; +} + +static int message_skip_fields( + sd_bus_message *m, + size_t *ri, + uint32_t array_size, + const char **signature) { + + size_t original_index; + int r; + + assert(m); + assert(ri); + assert(signature); + + original_index = *ri; + + for (;;) { + char t; + size_t l; + + if (array_size != UINT32_MAX && + array_size <= *ri - original_index) + return 0; + + t = **signature; + if (!t) + return 0; + + if (t == SD_BUS_TYPE_STRING) { + + r = message_peek_field_string(m, NULL, ri, 0, NULL); + if (r < 0) + return r; + + (*signature)++; + + } else if (t == SD_BUS_TYPE_OBJECT_PATH) { + + r = message_peek_field_string(m, object_path_is_valid, ri, 0, NULL); + if (r < 0) + return r; + + (*signature)++; + + } else if (t == SD_BUS_TYPE_SIGNATURE) { + + r = message_peek_field_signature(m, ri, 0, NULL); + if (r < 0) + return r; + + (*signature)++; + + } else if (bus_type_is_basic(t)) { + ssize_t align, k; + + align = bus_type_get_alignment(t); + k = bus_type_get_size(t); + assert(align > 0 && k > 0); + + r = message_peek_fields(m, ri, align, k, NULL); + if (r < 0) + return r; + + (*signature)++; + + } else if (t == SD_BUS_TYPE_ARRAY) { + + r = signature_element_length(*signature + 1, &l); + if (r < 0) + return r; + + assert(l >= 1); + { + char sig[l + 1], *s = sig; + uint32_t nas; + int alignment; + + strncpy(sig, *signature + 1, l); + sig[l] = '\0'; + + alignment = bus_type_get_alignment(sig[0]); + if (alignment < 0) + return alignment; + + r = message_peek_field_uint32(m, ri, 0, &nas); + if (r < 0) + return r; + if (nas > BUS_ARRAY_MAX_SIZE) + return -EBADMSG; + + r = message_peek_fields(m, ri, alignment, 0, NULL); + if (r < 0) + return r; + + r = message_skip_fields(m, ri, nas, (const char**) &s); + if (r < 0) + return r; + } + + (*signature) += 1 + l; + + } else if (t == SD_BUS_TYPE_VARIANT) { + const char *s; + + r = message_peek_field_signature(m, ri, 0, &s); + if (r < 0) + return r; + + r = message_skip_fields(m, ri, UINT32_MAX, (const char**) &s); + if (r < 0) + return r; + + (*signature)++; + + } else if (IN_SET(t, SD_BUS_TYPE_STRUCT, SD_BUS_TYPE_DICT_ENTRY)) { + + r = signature_element_length(*signature, &l); + if (r < 0) + return r; + + assert(l >= 2); + { + char sig[l + 1], *s = sig; + strncpy(sig, *signature + 1, l); + sig[l] = '\0'; + + r = message_skip_fields(m, ri, UINT32_MAX, (const char**) &s); + if (r < 0) + return r; + } + + *signature += l; + } else + return -EBADMSG; + } +} + +static int message_parse_fields(sd_bus_message *m) { + uint32_t unix_fds = 0; + bool unix_fds_set = false; + int r; + + assert(m); + + m->user_body_size = m->body_size; + + for (size_t ri = 0; ri < m->fields_size; ) { + const char *signature; + uint64_t field_type; + size_t item_size = SIZE_MAX; + uint8_t *u8; + + r = message_peek_fields(m, &ri, 8, 1, (void**) &u8); + if (r < 0) + return r; + + field_type = *u8; + + r = message_peek_field_signature(m, &ri, 0, &signature); + if (r < 0) + return r; + + switch (field_type) { + + case _BUS_MESSAGE_HEADER_INVALID: + return -EBADMSG; + + case BUS_MESSAGE_HEADER_PATH: + + if (m->path) + return -EBADMSG; + + if (!streq(signature, "o")) + return -EBADMSG; + + r = message_peek_field_string(m, object_path_is_valid, &ri, item_size, &m->path); + break; + + case BUS_MESSAGE_HEADER_INTERFACE: + + if (m->interface) + return -EBADMSG; + + if (!streq(signature, "s")) + return -EBADMSG; + + r = message_peek_field_string(m, interface_name_is_valid, &ri, item_size, &m->interface); + break; + + case BUS_MESSAGE_HEADER_MEMBER: + + if (m->member) + return -EBADMSG; + + if (!streq(signature, "s")) + return -EBADMSG; + + r = message_peek_field_string(m, member_name_is_valid, &ri, item_size, &m->member); + break; + + case BUS_MESSAGE_HEADER_ERROR_NAME: + + if (m->error.name) + return -EBADMSG; + + if (!streq(signature, "s")) + return -EBADMSG; + + r = message_peek_field_string(m, error_name_is_valid, &ri, item_size, &m->error.name); + if (r >= 0) + m->error._need_free = -1; + + break; + + case BUS_MESSAGE_HEADER_DESTINATION: + + if (m->destination) + return -EBADMSG; + + if (!streq(signature, "s")) + return -EBADMSG; + + r = message_peek_field_string(m, service_name_is_valid, &ri, item_size, &m->destination); + break; + + case BUS_MESSAGE_HEADER_SENDER: + + if (m->sender) + return -EBADMSG; + + if (!streq(signature, "s")) + return -EBADMSG; + + r = message_peek_field_string(m, service_name_is_valid, &ri, item_size, &m->sender); + + if (r >= 0 && m->sender[0] == ':' && m->bus->bus_client) { + m->creds.unique_name = (char*) m->sender; + m->creds.mask |= SD_BUS_CREDS_UNIQUE_NAME & m->bus->creds_mask; + } + + break; + + case BUS_MESSAGE_HEADER_SIGNATURE: { + const char *s; + char *c; + + if (m->root_container.signature) + return -EBADMSG; + + if (!streq(signature, "g")) + return -EBADMSG; + + r = message_peek_field_signature(m, &ri, item_size, &s); + if (r < 0) + return r; + + c = strdup(s); + if (!c) + return -ENOMEM; + + free_and_replace(m->root_container.signature, c); + break; + } + + case BUS_MESSAGE_HEADER_REPLY_SERIAL: { + uint32_t serial; + + if (m->reply_cookie != 0) + return -EBADMSG; + + + if (!streq(signature, "u")) + return -EBADMSG; + + r = message_peek_field_uint32(m, &ri, item_size, &serial); + if (r < 0) + return r; + + m->reply_cookie = serial; + + if (m->reply_cookie == 0) + return -EBADMSG; + + break; + } + case BUS_MESSAGE_HEADER_UNIX_FDS: + if (unix_fds_set) + return -EBADMSG; + + if (!streq(signature, "u")) + return -EBADMSG; + + r = message_peek_field_uint32(m, &ri, item_size, &unix_fds); + if (r < 0) + return -EBADMSG; + + unix_fds_set = true; + break; + + default: + r = message_skip_fields(m, &ri, UINT32_MAX, (const char **) &signature); + } + if (r < 0) + return r; + } + + if (m->n_fds != unix_fds) + return -EBADMSG; + + switch (m->header->type) { + + case SD_BUS_MESSAGE_SIGNAL: + if (!m->path || !m->interface || !m->member) + return -EBADMSG; + + if (m->reply_cookie != 0) + return -EBADMSG; + + break; + + case SD_BUS_MESSAGE_METHOD_CALL: + + if (!m->path || !m->member) + return -EBADMSG; + + if (m->reply_cookie != 0) + return -EBADMSG; + + break; + + case SD_BUS_MESSAGE_METHOD_RETURN: + + if (m->reply_cookie == 0) + return -EBADMSG; + break; + + case SD_BUS_MESSAGE_METHOD_ERROR: + + if (m->reply_cookie == 0 || !m->error.name) + return -EBADMSG; + break; + } + + /* Refuse non-local messages that claim they are local */ + if (streq_ptr(m->path, "/org/freedesktop/DBus/Local")) + return -EBADMSG; + if (streq_ptr(m->interface, "org.freedesktop.DBus.Local")) + return -EBADMSG; + if (streq_ptr(m->sender, "org.freedesktop.DBus.Local")) + return -EBADMSG; + + m->root_container.end = m->user_body_size; + + /* Try to read the error message, but if we can't it's a non-issue */ + if (m->header->type == SD_BUS_MESSAGE_METHOD_ERROR) + (void) sd_bus_message_read(m, "s", &m->error.message); + + return 0; +} + +_public_ int sd_bus_message_set_destination(sd_bus_message *m, const char *destination) { + assert_return(m, -EINVAL); + assert_return(destination, -EINVAL); + assert_return(service_name_is_valid(destination), -EINVAL); + assert_return(!m->sealed, -EPERM); + assert_return(!m->destination, -EEXIST); + + return message_append_field_string(m, BUS_MESSAGE_HEADER_DESTINATION, SD_BUS_TYPE_STRING, destination, &m->destination); +} + +_public_ int sd_bus_message_set_sender(sd_bus_message *m, const char *sender) { + assert_return(m, -EINVAL); + assert_return(sender, -EINVAL); + assert_return(service_name_is_valid(sender), -EINVAL); + assert_return(!m->sealed, -EPERM); + assert_return(!m->sender, -EEXIST); + + return message_append_field_string(m, BUS_MESSAGE_HEADER_SENDER, SD_BUS_TYPE_STRING, sender, &m->sender); +} + +int bus_message_get_blob(sd_bus_message *m, void **buffer, size_t *sz) { + size_t total; + void *p, *e; + size_t i; + struct bus_body_part *part; + + assert(m); + assert(buffer); + assert(sz); + + total = BUS_MESSAGE_SIZE(m); + + p = malloc(total); + if (!p) + return -ENOMEM; + + e = mempcpy(p, m->header, BUS_MESSAGE_BODY_BEGIN(m)); + MESSAGE_FOREACH_PART(part, i, m) + e = mempcpy(e, part->data, part->size); + + assert(total == (size_t) ((uint8_t*) e - (uint8_t*) p)); + + *buffer = p; + *sz = total; + + return 0; +} + +_public_ int sd_bus_message_read_strv_extend(sd_bus_message *m, char ***l) { + char type; + const char *contents, *s; + int r; + + assert(m); + assert(l); + + r = sd_bus_message_peek_type(m, &type, &contents); + if (r < 0) + return r; + + if (type != SD_BUS_TYPE_ARRAY || !STR_IN_SET(contents, "s", "o", "g")) + return -ENXIO; + + r = sd_bus_message_enter_container(m, 'a', NULL); + if (r <= 0) + return r; + + /* sd_bus_message_read_basic() does content validation for us. */ + while ((r = sd_bus_message_read_basic(m, *contents, &s)) > 0) { + r = strv_extend(l, s); + if (r < 0) + return r; + } + if (r < 0) + return r; + + r = sd_bus_message_exit_container(m); + if (r < 0) + return r; + + return 1; +} + +_public_ int sd_bus_message_read_strv(sd_bus_message *m, char ***l) { + _cleanup_strv_free_ char **strv = NULL; + int r; + + assert_return(m, -EINVAL); + assert_return(m->sealed, -EPERM); + assert_return(l, -EINVAL); + + r = sd_bus_message_read_strv_extend(m, &strv); + if (r <= 0) + return r; + + *l = TAKE_PTR(strv); + return 1; +} + +static int bus_message_get_arg_skip( + sd_bus_message *m, + unsigned i, + char *_type, + const char **_contents) { + + unsigned j; + int r; + + r = sd_bus_message_rewind(m, true); + if (r < 0) + return r; + + for (j = 0;; j++) { + const char *contents; + char type; + + r = sd_bus_message_peek_type(m, &type, &contents); + if (r < 0) + return r; + if (r == 0) + return -ENXIO; + + /* Don't match against arguments after the first one we don't understand */ + if (!IN_SET(type, SD_BUS_TYPE_STRING, SD_BUS_TYPE_OBJECT_PATH, SD_BUS_TYPE_SIGNATURE) && + !(type == SD_BUS_TYPE_ARRAY && STR_IN_SET(contents, "s", "o", "g"))) + return -ENXIO; + + if (j >= i) { + if (_contents) + *_contents = contents; + if (_type) + *_type = type; + return 0; + } + + r = sd_bus_message_skip(m, NULL); + if (r < 0) + return r; + } + +} + +int bus_message_get_arg(sd_bus_message *m, unsigned i, const char **str) { + char type; + int r; + + assert(m); + assert(str); + + r = bus_message_get_arg_skip(m, i, &type, NULL); + if (r < 0) + return r; + + if (!IN_SET(type, SD_BUS_TYPE_STRING, SD_BUS_TYPE_OBJECT_PATH, SD_BUS_TYPE_SIGNATURE)) + return -ENXIO; + + return sd_bus_message_read_basic(m, type, str); +} + +int bus_message_get_arg_strv(sd_bus_message *m, unsigned i, char ***strv) { + const char *contents; + char type; + int r; + + assert(m); + assert(strv); + + r = bus_message_get_arg_skip(m, i, &type, &contents); + if (r < 0) + return r; + + if (type != SD_BUS_TYPE_ARRAY) + return -ENXIO; + if (!STR_IN_SET(contents, "s", "o", "g")) + return -ENXIO; + + return sd_bus_message_read_strv(m, strv); +} + +_public_ int sd_bus_message_get_errno(sd_bus_message *m) { + assert_return(m, EINVAL); + + if (m->header->type != SD_BUS_MESSAGE_METHOD_ERROR) + return 0; + + return sd_bus_error_get_errno(&m->error); +} + +_public_ const char* sd_bus_message_get_signature(sd_bus_message *m, int complete) { + struct bus_container *c; + + assert_return(m, NULL); + + c = complete ? &m->root_container : message_get_last_container(m); + return strempty(c->signature); +} + +_public_ int sd_bus_message_is_empty(sd_bus_message *m) { + assert_return(m, -EINVAL); + + return isempty(m->root_container.signature); +} + +_public_ int sd_bus_message_has_signature(sd_bus_message *m, const char *signature) { + assert_return(m, -EINVAL); + + return streq(strempty(m->root_container.signature), strempty(signature)); +} + +_public_ int sd_bus_message_copy(sd_bus_message *m, sd_bus_message *source, int all) { + bool done_something = false; + int r; + + assert_return(m, -EINVAL); + assert_return(source, -EINVAL); + assert_return(!m->sealed, -EPERM); + assert_return(source->sealed, -EPERM); + + do { + const char *contents; + char type; + union { + uint8_t u8; + uint16_t u16; + int16_t s16; + uint32_t u32; + int32_t s32; + uint64_t u64; + int64_t s64; + double d64; + const char *string; + int i; + } basic; + + r = sd_bus_message_peek_type(source, &type, &contents); + if (r < 0) + return r; + if (r == 0) + break; + + done_something = true; + + if (bus_type_is_container(type) > 0) { + + r = sd_bus_message_enter_container(source, type, contents); + if (r < 0) + return r; + + r = sd_bus_message_open_container(m, type, contents); + if (r < 0) + return r; + + r = sd_bus_message_copy(m, source, true); + if (r < 0) + return r; + + r = sd_bus_message_close_container(m); + if (r < 0) + return r; + + r = sd_bus_message_exit_container(source); + if (r < 0) + return r; + + continue; + } + + r = sd_bus_message_read_basic(source, type, &basic); + if (r < 0) + return r; + + assert(r > 0); + + if (IN_SET(type, SD_BUS_TYPE_OBJECT_PATH, SD_BUS_TYPE_SIGNATURE, SD_BUS_TYPE_STRING)) + r = sd_bus_message_append_basic(m, type, basic.string); + else + r = sd_bus_message_append_basic(m, type, &basic); + + if (r < 0) + return r; + + } while (all); + + return done_something; +} + +_public_ int sd_bus_message_verify_type(sd_bus_message *m, char type, const char *contents) { + const char *c; + char t; + int r; + + assert_return(m, -EINVAL); + assert_return(m->sealed, -EPERM); + assert_return(!type || bus_type_is_valid(type), -EINVAL); + assert_return(!contents || signature_is_valid(contents, true), -EINVAL); + assert_return(type || contents, -EINVAL); + assert_return(!contents || !type || bus_type_is_container(type), -EINVAL); + + r = sd_bus_message_peek_type(m, &t, &c); + if (r <= 0) + return r; + + if (type != 0 && type != t) + return 0; + + if (contents && !streq_ptr(contents, c)) + return 0; + + return 1; +} + +_public_ sd_bus *sd_bus_message_get_bus(sd_bus_message *m) { + assert_return(m, NULL); + + return m->bus; +} + +int bus_message_remarshal(sd_bus *bus, sd_bus_message **m) { + _cleanup_(sd_bus_message_unrefp) sd_bus_message *n = NULL; + usec_t timeout; + int r; + + assert(bus); + assert(m); + assert(*m); + + switch ((*m)->header->type) { + + case SD_BUS_MESSAGE_SIGNAL: + r = sd_bus_message_new_signal(bus, &n, (*m)->path, (*m)->interface, (*m)->member); + if (r < 0) + return r; + + break; + + case SD_BUS_MESSAGE_METHOD_CALL: + r = sd_bus_message_new_method_call(bus, &n, (*m)->destination, (*m)->path, (*m)->interface, (*m)->member); + if (r < 0) + return r; + + break; + + case SD_BUS_MESSAGE_METHOD_RETURN: + case SD_BUS_MESSAGE_METHOD_ERROR: + + r = sd_bus_message_new(bus, &n, (*m)->header->type); + if (r < 0) + return -ENOMEM; + + assert(n); + + n->reply_cookie = (*m)->reply_cookie; + + r = message_append_reply_cookie(n, n->reply_cookie); + if (r < 0) + return r; + + if ((*m)->header->type == SD_BUS_MESSAGE_METHOD_ERROR && (*m)->error.name) { + r = message_append_field_string(n, BUS_MESSAGE_HEADER_ERROR_NAME, SD_BUS_TYPE_STRING, (*m)->error.name, &n->error.message); + if (r < 0) + return r; + + n->error._need_free = -1; + } + + break; + + default: + return -EINVAL; + } + + if ((*m)->destination && !n->destination) { + r = message_append_field_string(n, BUS_MESSAGE_HEADER_DESTINATION, SD_BUS_TYPE_STRING, (*m)->destination, &n->destination); + if (r < 0) + return r; + } + + if ((*m)->sender && !n->sender) { + r = message_append_field_string(n, BUS_MESSAGE_HEADER_SENDER, SD_BUS_TYPE_STRING, (*m)->sender, &n->sender); + if (r < 0) + return r; + } + + n->header->flags |= (*m)->header->flags & (BUS_MESSAGE_NO_REPLY_EXPECTED|BUS_MESSAGE_NO_AUTO_START); + + r = sd_bus_message_copy(n, *m, true); + if (r < 0) + return r; + + timeout = (*m)->timeout; + if (timeout == 0 && !((*m)->header->flags & BUS_MESSAGE_NO_REPLY_EXPECTED)) { + r = sd_bus_get_method_call_timeout(bus, &timeout); + if (r < 0) + return r; + } + + r = sd_bus_message_seal(n, BUS_MESSAGE_COOKIE(*m), timeout); + if (r < 0) + return r; + + sd_bus_message_unref(*m); + *m = TAKE_PTR(n); + + return 0; +} + +_public_ int sd_bus_message_get_priority(sd_bus_message *m, int64_t *priority) { + static bool warned = false; + + assert_return(m, -EINVAL); + assert_return(priority, -EINVAL); + + if (!warned) { + log_debug("sd_bus_message_get_priority() is deprecated and always returns 0."); + warned = true; + } + + *priority = 0; + return 0; +} + +_public_ int sd_bus_message_set_priority(sd_bus_message *m, int64_t priority) { + static bool warned = false; + + assert_return(m, -EINVAL); + assert_return(!m->sealed, -EPERM); + + if (!warned) { + log_debug("sd_bus_message_set_priority() is deprecated and does nothing."); + warned = true; + } + + return 0; +} + +_public_ int sd_bus_message_sensitive(sd_bus_message *m) { + assert_return(m, -EINVAL); + + m->sensitive = true; + return 0; +} + +char** bus_message_make_log_fields(sd_bus_message *m) { + _cleanup_strv_free_ char **strv = NULL; + + assert(m); + + (void) strv_extend_assignment(&strv, "DBUS_MESSAGE_TYPE", bus_message_type_to_string(m->header->type)); + (void) strv_extend_assignment(&strv, "DBUS_SENDER", sd_bus_message_get_sender(m)); + (void) strv_extend_assignment(&strv, "DBUS_DESTINATION", sd_bus_message_get_destination(m)); + (void) strv_extend_assignment(&strv, "DBUS_PATH", sd_bus_message_get_path(m)); + (void) strv_extend_assignment(&strv, "DBUS_INTERFACE", sd_bus_message_get_interface(m)); + (void) strv_extend_assignment(&strv, "DBUS_MEMBER", sd_bus_message_get_member(m)); + + (void) strv_extendf(&strv, "DBUS_MESSAGE_COOKIE=%" PRIu64, BUS_MESSAGE_COOKIE(m)); + if (m->reply_cookie != 0) + (void) strv_extendf(&strv, "DBUS_MESSAGE_REPLY_COOKIE=%" PRIu64, m->reply_cookie); + + (void) strv_extend_assignment(&strv, "DBUS_SIGNATURE", m->root_container.signature); + (void) strv_extend_assignment(&strv, "DBUS_ERROR_NAME", m->error.name); + (void) strv_extend_assignment(&strv, "DBUS_ERROR_MESSAGE", m->error.message); + + return TAKE_PTR(strv); +} diff --git a/src/libsystemd/sd-bus/bus-message.h b/src/libsystemd/sd-bus/bus-message.h new file mode 100644 index 0000000..76f0d85 --- /dev/null +++ b/src/libsystemd/sd-bus/bus-message.h @@ -0,0 +1,191 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include <byteswap.h> +#include <stdbool.h> +#include <sys/socket.h> + +#include "sd-bus.h" + +#include "bus-creds.h" +#include "bus-protocol.h" +#include "macro.h" +#include "time-util.h" + +struct bus_container { + char enclosing; + + /* Indexes into the signature string */ + unsigned index, saved_index; + char *signature; + + size_t before, begin, end; + + /* pointer to the array size value, if this is a value */ + uint32_t *array_size; + + char *peeked_signature; +}; + +struct bus_body_part { + struct bus_body_part *next; + void *data; + void *mmap_begin; + size_t size; + size_t mapped; + size_t allocated; + uint64_t memfd_offset; + int memfd; + bool free_this:1; + bool munmap_this:1; + bool sealed:1; + bool is_zero:1; +}; + +struct sd_bus_message { + /* Caveat: a message can be referenced in two different ways: the main (user-facing) way will also + * pin the bus connection object the message is associated with. The secondary way ("queued") is used + * when a message is in the read or write queues of the bus connection object, which will not pin the + * bus connection object. This is necessary so that we don't have to have a pair of cyclic references + * between a message that is queued and its connection: as soon as a message is only referenced by + * the connection (by means of being queued) and the connection itself has no other references it + * will be freed. */ + + unsigned n_ref; /* Counter of references that pin the connection */ + unsigned n_queued; /* Counter of references that do not pin the connection */ + + sd_bus *bus; + + uint64_t reply_cookie; + + const char *path; + const char *interface; + const char *member; + const char *destination; + const char *sender; + + sd_bus_error error; + + sd_bus_creds creds; + + usec_t monotonic; + usec_t realtime; + uint64_t seqnum; + uint64_t verify_destination_id; + + bool sealed:1; + bool dont_send:1; + bool allow_fds:1; + bool free_header:1; + bool free_fds:1; + bool poisoned:1; + bool sensitive:1; + + /* The first bytes of the message */ + struct bus_header *header; + + size_t fields_size; + size_t body_size; + size_t user_body_size; + + struct bus_body_part body; + struct bus_body_part *body_end; + unsigned n_body_parts; + + size_t rindex; + struct bus_body_part *cached_rindex_part; + size_t cached_rindex_part_begin; + + uint32_t n_fds; + int *fds; + + struct bus_container root_container, *containers; + size_t n_containers; + + struct iovec *iovec; + struct iovec iovec_fixed[2]; + unsigned n_iovec; + + char *peeked_signature; + + /* If set replies to this message must carry the signature + * specified here to successfully seal. This is initialized + * from the vtable data */ + const char *enforced_reply_signature; + + usec_t timeout; + + size_t header_offsets[_BUS_MESSAGE_HEADER_MAX]; + unsigned n_header_offsets; + + uint64_t read_counter; +}; + +static inline bool BUS_MESSAGE_NEED_BSWAP(sd_bus_message *m) { + return m->header->endian != BUS_NATIVE_ENDIAN; +} + +static inline uint16_t BUS_MESSAGE_BSWAP16(sd_bus_message *m, uint16_t u) { + return BUS_MESSAGE_NEED_BSWAP(m) ? bswap_16(u) : u; +} + +static inline uint32_t BUS_MESSAGE_BSWAP32(sd_bus_message *m, uint32_t u) { + return BUS_MESSAGE_NEED_BSWAP(m) ? bswap_32(u) : u; +} + +static inline uint64_t BUS_MESSAGE_BSWAP64(sd_bus_message *m, uint64_t u) { + return BUS_MESSAGE_NEED_BSWAP(m) ? bswap_64(u) : u; +} + +static inline uint64_t BUS_MESSAGE_COOKIE(sd_bus_message *m) { + return BUS_MESSAGE_BSWAP32(m, m->header->serial); +} + +static inline size_t BUS_MESSAGE_SIZE(sd_bus_message *m) { + return + sizeof(struct bus_header) + + ALIGN8(m->fields_size) + + m->body_size; +} + +static inline size_t BUS_MESSAGE_BODY_BEGIN(sd_bus_message *m) { + return + sizeof(struct bus_header) + + ALIGN8(m->fields_size); +} + +static inline void* BUS_MESSAGE_FIELDS(sd_bus_message *m) { + return (uint8_t*) m->header + sizeof(struct bus_header); +} + +int bus_message_get_blob(sd_bus_message *m, void **buffer, size_t *sz); + +int bus_message_from_malloc( + sd_bus *bus, + void *buffer, + size_t length, + int *fds, + size_t n_fds, + const char *label, + sd_bus_message **ret); + +int bus_message_get_arg(sd_bus_message *m, unsigned i, const char **str); +int bus_message_get_arg_strv(sd_bus_message *m, unsigned i, char ***strv); + +#define MESSAGE_FOREACH_PART(part, i, m) \ + for ((i) = 0, (part) = &(m)->body; (i) < (m)->n_body_parts; (i)++, (part) = (part)->next) + +int bus_body_part_map(struct bus_body_part *part); +void bus_body_part_unmap(struct bus_body_part *part); + +int bus_message_new_synthetic_error(sd_bus *bus, uint64_t serial, const sd_bus_error *e, sd_bus_message **m); + +int bus_message_remarshal(sd_bus *bus, sd_bus_message **m); + +void bus_message_set_sender_driver(sd_bus *bus, sd_bus_message *m); +void bus_message_set_sender_local(sd_bus *bus, sd_bus_message *m); + +sd_bus_message* bus_message_ref_queued(sd_bus_message *m, sd_bus *bus); +sd_bus_message* bus_message_unref_queued(sd_bus_message *m, sd_bus *bus); + +char** bus_message_make_log_fields(sd_bus_message *m); diff --git a/src/libsystemd/sd-bus/bus-objects.c b/src/libsystemd/sd-bus/bus-objects.c new file mode 100644 index 0000000..c25c40f --- /dev/null +++ b/src/libsystemd/sd-bus/bus-objects.c @@ -0,0 +1,3033 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include "alloc-util.h" +#include "bus-internal.h" +#include "bus-introspect.h" +#include "bus-message.h" +#include "bus-objects.h" +#include "bus-signature.h" +#include "bus-slot.h" +#include "bus-type.h" +#include "missing_capability.h" +#include "string-util.h" +#include "strv.h" + +static int node_vtable_get_userdata( + sd_bus *bus, + const char *path, + struct node_vtable *c, + void **userdata, + sd_bus_error *error) { + + sd_bus_slot *s; + void *u, *found_u = NULL; + int r; + + assert(bus); + assert(path); + assert(c); + + s = container_of(c, sd_bus_slot, node_vtable); + u = s->userdata; + if (c->find) { + bus->current_slot = sd_bus_slot_ref(s); + bus->current_userdata = u; + r = c->find(bus, path, c->interface, u, &found_u, error); + bus->current_userdata = NULL; + bus->current_slot = sd_bus_slot_unref(s); + + if (r < 0) + return r; + if (sd_bus_error_is_set(error)) + return -sd_bus_error_get_errno(error); + if (r == 0) + return r; + } else + found_u = u; + + if (userdata) + *userdata = found_u; + + return 1; +} + +static void *vtable_method_convert_userdata(const sd_bus_vtable *p, void *u) { + assert(p); + + if (!u || FLAGS_SET(p->flags, SD_BUS_VTABLE_ABSOLUTE_OFFSET)) + return SIZE_TO_PTR(p->x.method.offset); /* don't add offset on NULL, to make ubsan happy */ + + return (uint8_t*) u + p->x.method.offset; +} + +static void *vtable_property_convert_userdata(const sd_bus_vtable *p, void *u) { + assert(p); + + if (!u || FLAGS_SET(p->flags, SD_BUS_VTABLE_ABSOLUTE_OFFSET)) + return SIZE_TO_PTR(p->x.property.offset); /* as above */ + + return (uint8_t*) u + p->x.property.offset; +} + +static int vtable_property_get_userdata( + sd_bus *bus, + const char *path, + struct vtable_member *p, + void **userdata, + sd_bus_error *error) { + + void *u; + int r; + + assert(bus); + assert(path); + assert(p); + assert(userdata); + + r = node_vtable_get_userdata(bus, path, p->parent, &u, error); + if (r <= 0) + return r; + if (bus->nodes_modified) + return 0; + + *userdata = vtable_property_convert_userdata(p->vtable, u); + return 1; +} + +static int add_enumerated_to_set( + sd_bus *bus, + const char *prefix, + struct node_enumerator *first, + OrderedSet *s, + sd_bus_error *error) { + + int r; + + assert(bus); + assert(prefix); + assert(s); + + LIST_FOREACH(enumerators, c, first) { + char **children = NULL; + sd_bus_slot *slot; + + if (bus->nodes_modified) + return 0; + + slot = container_of(c, sd_bus_slot, node_enumerator); + + bus->current_slot = sd_bus_slot_ref(slot); + bus->current_userdata = slot->userdata; + r = c->callback(bus, prefix, slot->userdata, &children, error); + bus->current_userdata = NULL; + bus->current_slot = sd_bus_slot_unref(slot); + + if (r < 0) + return r; + if (sd_bus_error_is_set(error)) + return -sd_bus_error_get_errno(error); + + STRV_FOREACH(k, children) { + if (r < 0) { + free(*k); + continue; + } + + if (!object_path_is_valid(*k)) { + free(*k); + r = -EINVAL; + continue; + } + + if (!object_path_startswith(*k, prefix)) { + free(*k); + continue; + } + + r = ordered_set_consume(s, *k); + if (r == -EEXIST) + r = 0; + } + + free(children); + if (r < 0) + return r; + } + + return 0; +} + +enum { + /* if set, add_subtree() works recursively */ + CHILDREN_RECURSIVE = 1 << 0, + /* if set, add_subtree() scans object-manager hierarchies recursively */ + CHILDREN_SUBHIERARCHIES = 1 << 1, +}; + +static int add_subtree_to_set( + sd_bus *bus, + const char *prefix, + struct node *n, + unsigned flags, + OrderedSet *s, + sd_bus_error *error) { + + int r; + + assert(bus); + assert(prefix); + assert(n); + assert(s); + + r = add_enumerated_to_set(bus, prefix, n->enumerators, s, error); + if (r < 0) + return r; + if (bus->nodes_modified) + return 0; + + LIST_FOREACH(siblings, i, n->child) { + char *t; + + if (!object_path_startswith(i->path, prefix)) + continue; + + t = strdup(i->path); + if (!t) + return -ENOMEM; + + r = ordered_set_consume(s, t); + if (r < 0 && r != -EEXIST) + return r; + + if ((flags & CHILDREN_RECURSIVE) && + ((flags & CHILDREN_SUBHIERARCHIES) || !i->object_managers)) { + r = add_subtree_to_set(bus, prefix, i, flags, s, error); + if (r < 0) + return r; + if (bus->nodes_modified) + return 0; + } + } + + return 0; +} + +static int get_child_nodes( + sd_bus *bus, + const char *prefix, + struct node *n, + unsigned flags, + OrderedSet **ret, + sd_bus_error *error) { + + _cleanup_ordered_set_free_free_ OrderedSet *s = NULL; + int r; + + assert(bus); + assert(prefix); + assert(n); + assert(ret); + + s = ordered_set_new(&string_hash_ops); + if (!s) + return -ENOMEM; + + r = add_subtree_to_set(bus, prefix, n, flags, s, error); + if (r < 0) + return r; + + *ret = TAKE_PTR(s); + return 0; +} + +static int node_callbacks_run( + sd_bus *bus, + sd_bus_message *m, + struct node_callback *first, + bool require_fallback, + bool *found_object) { + + int r; + + assert(bus); + assert(m); + assert(found_object); + + LIST_FOREACH(callbacks, c, first) { + _cleanup_(sd_bus_error_free) sd_bus_error error_buffer = SD_BUS_ERROR_NULL; + sd_bus_slot *slot; + + if (bus->nodes_modified) + return 0; + + if (require_fallback && !c->is_fallback) + continue; + + *found_object = true; + + if (c->last_iteration == bus->iteration_counter) + continue; + + c->last_iteration = bus->iteration_counter; + + r = sd_bus_message_rewind(m, true); + if (r < 0) + return r; + + slot = container_of(c, sd_bus_slot, node_callback); + + bus->current_slot = sd_bus_slot_ref(slot); + bus->current_handler = c->callback; + bus->current_userdata = slot->userdata; + r = c->callback(m, slot->userdata, &error_buffer); + bus->current_userdata = NULL; + bus->current_handler = NULL; + bus->current_slot = sd_bus_slot_unref(slot); + + r = bus_maybe_reply_error(m, r, &error_buffer); + if (r != 0) + return r; + } + + return 0; +} + +#define CAPABILITY_SHIFT(x) (((x) >> __builtin_ctzll(_SD_BUS_VTABLE_CAPABILITY_MASK)) & 0xFFFF) + +static int check_access(sd_bus *bus, sd_bus_message *m, struct vtable_member *c, sd_bus_error *error) { + uint64_t cap; + int r; + + assert(bus); + assert(m); + assert(c); + + /* If the entire bus is trusted let's grant access */ + if (bus->trusted) + return 0; + + /* If the member is marked UNPRIVILEGED let's grant access */ + if (c->vtable->flags & SD_BUS_VTABLE_UNPRIVILEGED) + return 0; + + /* Check that the caller has the requested capability set. Note that the flags value contains the + * capability number plus one, which we need to subtract here. We do this so that we have 0 as + * special value for the default. */ + cap = CAPABILITY_SHIFT(c->vtable->flags); + if (cap == 0) + cap = CAPABILITY_SHIFT(c->parent->vtable[0].flags); + if (cap == 0) + cap = CAP_SYS_ADMIN; + else + cap--; + + r = sd_bus_query_sender_privilege(m, cap); + if (r < 0) + return r; + if (r > 0) + return 0; + + return sd_bus_error_setf(error, SD_BUS_ERROR_ACCESS_DENIED, "Access to %s.%s() not permitted.", c->interface, c->member); +} + +static int method_callbacks_run( + sd_bus *bus, + sd_bus_message *m, + struct vtable_member *c, + bool require_fallback, + bool *found_object) { + + _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL; + const char *signature; + void *u; + int r; + + assert(bus); + assert(m); + assert(c); + assert(found_object); + + if (require_fallback && !c->parent->is_fallback) + return 0; + + if (FLAGS_SET(c->vtable->flags, SD_BUS_VTABLE_SENSITIVE)) { + r = sd_bus_message_sensitive(m); + if (r < 0) + return r; + } + + r = check_access(bus, m, c, &error); + if (r < 0) + return bus_maybe_reply_error(m, r, &error); + + r = node_vtable_get_userdata(bus, m->path, c->parent, &u, &error); + if (r <= 0) + return bus_maybe_reply_error(m, r, &error); + if (bus->nodes_modified) + return 0; + + u = vtable_method_convert_userdata(c->vtable, u); + + *found_object = true; + + if (c->last_iteration == bus->iteration_counter) + return 0; + + c->last_iteration = bus->iteration_counter; + + r = sd_bus_message_rewind(m, true); + if (r < 0) + return r; + + signature = sd_bus_message_get_signature(m, true); + if (!signature) + return -EINVAL; + + if (!streq(strempty(c->vtable->x.method.signature), signature)) + return sd_bus_reply_method_errorf( + m, + SD_BUS_ERROR_INVALID_ARGS, + "Invalid arguments '%s' to call %s.%s(), expecting '%s'.", + signature, c->interface, c->member, strempty(c->vtable->x.method.signature)); + + /* Keep track what the signature of the reply to this message + * should be, so that this can be enforced when sealing the + * reply. */ + m->enforced_reply_signature = strempty(c->vtable->x.method.result); + + if (c->vtable->x.method.handler) { + sd_bus_slot *slot; + + slot = container_of(c->parent, sd_bus_slot, node_vtable); + + bus->current_slot = sd_bus_slot_ref(slot); + bus->current_handler = c->vtable->x.method.handler; + bus->current_userdata = u; + r = c->vtable->x.method.handler(m, u, &error); + bus->current_userdata = NULL; + bus->current_handler = NULL; + bus->current_slot = sd_bus_slot_unref(slot); + + return bus_maybe_reply_error(m, r, &error); + } + + /* If the method callback is NULL, make this a successful NOP */ + r = sd_bus_reply_method_return(m, NULL); + if (r < 0) + return r; + + return 1; +} + +static int invoke_property_get( + sd_bus *bus, + sd_bus_slot *slot, + const sd_bus_vtable *v, + const char *path, + const char *interface, + const char *property, + sd_bus_message *reply, + void *userdata, + sd_bus_error *error) { + + const void *p; + int r; + + assert(bus); + assert(slot); + assert(v); + assert(path); + assert(interface); + assert(property); + assert(reply); + + if (v->x.property.get) { + + bus->current_slot = sd_bus_slot_ref(slot); + bus->current_userdata = userdata; + r = v->x.property.get(bus, path, interface, property, reply, userdata, error); + bus->current_userdata = NULL; + bus->current_slot = sd_bus_slot_unref(slot); + + if (r < 0) + return r; + if (sd_bus_error_is_set(error)) + return -sd_bus_error_get_errno(error); + return r; + } + + /* Automatic handling if no callback is defined. */ + + if (streq(v->x.property.signature, "as")) + return sd_bus_message_append_strv(reply, *(char***) userdata); + + assert(signature_is_single(v->x.property.signature, false)); + assert(bus_type_is_basic(v->x.property.signature[0])); + + switch (v->x.property.signature[0]) { + + case SD_BUS_TYPE_STRING: + case SD_BUS_TYPE_SIGNATURE: + p = strempty(*(char**) userdata); + break; + + case SD_BUS_TYPE_OBJECT_PATH: + p = *(char**) userdata; + assert(p); + break; + + default: + p = userdata; + break; + } + + return sd_bus_message_append_basic(reply, v->x.property.signature[0], p); +} + +static int invoke_property_set( + sd_bus *bus, + sd_bus_slot *slot, + const sd_bus_vtable *v, + const char *path, + const char *interface, + const char *property, + sd_bus_message *value, + void *userdata, + sd_bus_error *error) { + + int r; + + assert(bus); + assert(slot); + assert(v); + assert(path); + assert(interface); + assert(property); + assert(value); + + if (v->x.property.set) { + + bus->current_slot = sd_bus_slot_ref(slot); + bus->current_userdata = userdata; + r = v->x.property.set(bus, path, interface, property, value, userdata, error); + bus->current_userdata = NULL; + bus->current_slot = sd_bus_slot_unref(slot); + + if (r < 0) + return r; + if (sd_bus_error_is_set(error)) + return -sd_bus_error_get_errno(error); + return r; + } + + /* Automatic handling if no callback is defined. */ + + assert(signature_is_single(v->x.property.signature, false)); + assert(bus_type_is_basic(v->x.property.signature[0])); + + switch (v->x.property.signature[0]) { + + case SD_BUS_TYPE_STRING: + case SD_BUS_TYPE_OBJECT_PATH: + case SD_BUS_TYPE_SIGNATURE: { + const char *p; + char *n; + + r = sd_bus_message_read_basic(value, v->x.property.signature[0], &p); + if (r < 0) + return r; + + n = strdup(p); + if (!n) + return -ENOMEM; + + free(*(char**) userdata); + *(char**) userdata = n; + + break; + } + + default: + r = sd_bus_message_read_basic(value, v->x.property.signature[0], userdata); + if (r < 0) + return r; + + break; + } + + return 1; +} + +static int property_get_set_callbacks_run( + sd_bus *bus, + sd_bus_message *m, + struct vtable_member *c, + bool require_fallback, + bool is_get, + bool *found_object) { + + _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL; + _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL; + sd_bus_slot *slot; + void *u = NULL; + int r; + + assert(bus); + assert(m); + assert(c); + assert(found_object); + + if (require_fallback && !c->parent->is_fallback) + return 0; + + if (FLAGS_SET(c->vtable->flags, SD_BUS_VTABLE_SENSITIVE)) { + r = sd_bus_message_sensitive(m); + if (r < 0) + return r; + } + + r = vtable_property_get_userdata(bus, m->path, c, &u, &error); + if (r <= 0) + return bus_maybe_reply_error(m, r, &error); + if (bus->nodes_modified) + return 0; + + slot = container_of(c->parent, sd_bus_slot, node_vtable); + + *found_object = true; + + r = sd_bus_message_new_method_return(m, &reply); + if (r < 0) + return r; + + if (FLAGS_SET(c->vtable->flags, SD_BUS_VTABLE_SENSITIVE)) { + r = sd_bus_message_sensitive(reply); + if (r < 0) + return r; + } + + if (is_get) { + /* Note that we do not protect against reexecution + * here (using the last_iteration check, see below), + * should the node tree have changed and we got called + * again. We assume that property Get() calls are + * ultimately without side-effects or if they aren't + * then at least idempotent. */ + + r = sd_bus_message_open_container(reply, 'v', c->vtable->x.property.signature); + if (r < 0) + return r; + + /* Note that we do not do an access check here. Read + * access to properties is always unrestricted, since + * PropertiesChanged signals broadcast contents + * anyway. */ + + r = invoke_property_get(bus, slot, c->vtable, m->path, c->interface, c->member, reply, u, &error); + if (r < 0) + return bus_maybe_reply_error(m, r, &error); + + if (bus->nodes_modified) + return 0; + + r = sd_bus_message_close_container(reply); + if (r < 0) + return r; + + } else { + const char *signature = NULL; + char type = 0; + + if (c->vtable->type != _SD_BUS_VTABLE_WRITABLE_PROPERTY) + return sd_bus_reply_method_errorf(m, SD_BUS_ERROR_PROPERTY_READ_ONLY, "Property '%s' is not writable.", c->member); + + /* Avoid that we call the set routine more than once + * if the processing of this message got restarted + * because the node tree changed. */ + if (c->last_iteration == bus->iteration_counter) + return 0; + + c->last_iteration = bus->iteration_counter; + + r = sd_bus_message_peek_type(m, &type, &signature); + if (r < 0) + return r; + + if (type != 'v') + return sd_bus_reply_method_errorf(m, SD_BUS_ERROR_INVALID_SIGNATURE, + "Incorrect signature when setting property '%s', expected 'v', got '%c'.", + c->member, type); + if (!streq(strempty(signature), strempty(c->vtable->x.property.signature))) + return sd_bus_reply_method_errorf(m, SD_BUS_ERROR_INVALID_ARGS, + "Incorrect parameters for property '%s', expected '%s', got '%s'.", + c->member, strempty(c->vtable->x.property.signature), strempty(signature)); + + r = sd_bus_message_enter_container(m, 'v', c->vtable->x.property.signature); + if (r < 0) + return r; + + r = check_access(bus, m, c, &error); + if (r < 0) + return bus_maybe_reply_error(m, r, &error); + + r = invoke_property_set(bus, slot, c->vtable, m->path, c->interface, c->member, m, u, &error); + if (r < 0) + return bus_maybe_reply_error(m, r, &error); + + if (bus->nodes_modified) + return 0; + + r = sd_bus_message_exit_container(m); + if (r < 0) + return r; + } + + r = sd_bus_send(bus, reply, NULL); + if (r < 0) + return r; + + return 1; +} + +static int vtable_append_one_property( + sd_bus *bus, + sd_bus_message *reply, + const char *path, + struct node_vtable *c, + const sd_bus_vtable *v, + void *userdata, + sd_bus_error *error) { + + sd_bus_slot *slot; + int r; + + assert(bus); + assert(reply); + assert(path); + assert(c); + assert(v); + + if (FLAGS_SET(c->vtable->flags, SD_BUS_VTABLE_SENSITIVE)) { + r = sd_bus_message_sensitive(reply); + if (r < 0) + return r; + } + + r = sd_bus_message_open_container(reply, 'e', "sv"); + if (r < 0) + return r; + + r = sd_bus_message_append(reply, "s", v->x.property.member); + if (r < 0) + return r; + + r = sd_bus_message_open_container(reply, 'v', v->x.property.signature); + if (r < 0) + return r; + + slot = container_of(c, sd_bus_slot, node_vtable); + + r = invoke_property_get(bus, slot, v, path, c->interface, v->x.property.member, reply, vtable_property_convert_userdata(v, userdata), error); + if (r < 0) + return r; + if (bus->nodes_modified) + return 0; + + r = sd_bus_message_close_container(reply); + if (r < 0) + return r; + + r = sd_bus_message_close_container(reply); + if (r < 0) + return r; + + return 0; +} + +static int vtable_append_all_properties( + sd_bus *bus, + sd_bus_message *reply, + const char *path, + struct node_vtable *c, + void *userdata, + sd_bus_error *error) { + + const sd_bus_vtable *v; + int r; + + assert(bus); + assert(reply); + assert(path); + assert(c); + + if (c->vtable[0].flags & SD_BUS_VTABLE_HIDDEN) + return 1; + + v = c->vtable; + for (v = bus_vtable_next(c->vtable, v); v->type != _SD_BUS_VTABLE_END; v = bus_vtable_next(c->vtable, v)) { + if (!IN_SET(v->type, _SD_BUS_VTABLE_PROPERTY, _SD_BUS_VTABLE_WRITABLE_PROPERTY)) + continue; + + if (v->flags & SD_BUS_VTABLE_HIDDEN) + continue; + + /* Let's not include properties marked as "explicit" in any message that contains a generic + * dump of properties, but only in those generated as a response to an explicit request. */ + if (v->flags & SD_BUS_VTABLE_PROPERTY_EXPLICIT) + continue; + + /* Let's not include properties marked only for invalidation on change (i.e. in contrast to + * those whose new values are included in PropertiesChanges message) in any signals. This is + * useful to ensure they aren't included in InterfacesAdded messages. */ + if (reply->header->type != SD_BUS_MESSAGE_METHOD_RETURN && + FLAGS_SET(v->flags, SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION)) + continue; + + r = vtable_append_one_property(bus, reply, path, c, v, userdata, error); + if (r < 0) + return r; + if (bus->nodes_modified) + return 0; + } + + return 1; +} + +static int property_get_all_callbacks_run( + sd_bus *bus, + sd_bus_message *m, + struct node_vtable *first, + bool require_fallback, + const char *iface, + bool *found_object) { + + _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL; + bool found_interface; + int r; + + assert(bus); + assert(m); + assert(found_object); + + r = sd_bus_message_new_method_return(m, &reply); + if (r < 0) + return r; + + r = sd_bus_message_open_container(reply, 'a', "{sv}"); + if (r < 0) + return r; + + found_interface = !iface || STR_IN_SET(iface, + "org.freedesktop.DBus.Properties", + "org.freedesktop.DBus.Peer", + "org.freedesktop.DBus.Introspectable"); + + LIST_FOREACH(vtables, c, first) { + _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL; + void *u; + + if (require_fallback && !c->is_fallback) + continue; + + r = node_vtable_get_userdata(bus, m->path, c, &u, &error); + if (r < 0) + return bus_maybe_reply_error(m, r, &error); + if (bus->nodes_modified) + return 0; + if (r == 0) + continue; + + *found_object = true; + + if (iface && !streq(c->interface, iface)) + continue; + found_interface = true; + + r = vtable_append_all_properties(bus, reply, m->path, c, u, &error); + if (r < 0) + return bus_maybe_reply_error(m, r, &error); + if (bus->nodes_modified) + return 0; + } + + if (!*found_object) + return 0; + + if (!found_interface) { + r = sd_bus_reply_method_errorf( + m, + SD_BUS_ERROR_UNKNOWN_INTERFACE, + "Unknown interface '%s'.", iface); + if (r < 0) + return r; + + return 1; + } + + r = sd_bus_message_close_container(reply); + if (r < 0) + return r; + + r = sd_bus_send(bus, reply, NULL); + if (r < 0) + return r; + + return 1; +} + +static int bus_node_exists( + sd_bus *bus, + struct node *n, + const char *path, + bool require_fallback) { + + int r; + + assert(bus); + assert(n); + assert(path); + + /* Tests if there's anything attached directly to this node + * for the specified path */ + + if (!require_fallback && (n->enumerators || n->object_managers)) + return true; + + LIST_FOREACH(callbacks, k, n->callbacks) { + if (require_fallback && !k->is_fallback) + continue; + + return 1; + } + + LIST_FOREACH(vtables, c, n->vtables) { + _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL; + + if (require_fallback && !c->is_fallback) + continue; + + r = node_vtable_get_userdata(bus, path, c, NULL, &error); + if (r != 0) + return r; + if (bus->nodes_modified) + return 0; + } + + return 0; +} + +int introspect_path( + sd_bus *bus, + const char *path, + struct node *n, + bool require_fallback, + bool ignore_nodes_modified, + bool *found_object, + char **ret, + sd_bus_error *error) { + + _cleanup_ordered_set_free_free_ OrderedSet *s = NULL; + _cleanup_(introspect_done) struct introspect intro = {}; + bool empty; + int r; + + if (!n) { + n = hashmap_get(bus->nodes, path); + if (!n) + return -ENOENT; + } + + r = get_child_nodes(bus, path, n, 0, &s, error); + if (r < 0) + return r; + if (bus->nodes_modified && !ignore_nodes_modified) + return 0; + + r = introspect_begin(&intro, bus->trusted); + if (r < 0) + return r; + + r = introspect_write_default_interfaces(&intro, !require_fallback && n->object_managers); + if (r < 0) + return r; + + empty = ordered_set_isempty(s); + + LIST_FOREACH(vtables, c, n->vtables) { + if (require_fallback && !c->is_fallback) + continue; + + r = node_vtable_get_userdata(bus, path, c, NULL, error); + if (r < 0) + return r; + if (bus->nodes_modified && !ignore_nodes_modified) + return 0; + if (r == 0) + continue; + + empty = false; + + if (c->vtable[0].flags & SD_BUS_VTABLE_HIDDEN) + continue; + + r = introspect_write_interface(&intro, c->interface, c->vtable); + if (r < 0) + return r; + } + + if (empty) { + /* Nothing?, let's see if we exist at all, and if not + * refuse to do anything */ + r = bus_node_exists(bus, n, path, require_fallback); + if (r <= 0) + return r; + if (bus->nodes_modified && !ignore_nodes_modified) + return 0; + } + + if (found_object) + *found_object = true; + + r = introspect_write_child_nodes(&intro, s, path); + if (r < 0) + return r; + + r = introspect_finish(&intro, ret); + if (r < 0) + return r; + + return 1; +} + +static int process_introspect( + sd_bus *bus, + sd_bus_message *m, + struct node *n, + bool require_fallback, + bool *found_object) { + + _cleanup_free_ char *s = NULL; + _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL; + _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL; + int r; + + assert(bus); + assert(m); + assert(n); + assert(found_object); + + r = introspect_path(bus, m->path, n, require_fallback, false, found_object, &s, &error); + if (r < 0) + return bus_maybe_reply_error(m, r, &error); + if (r == 0) + /* nodes_modified == true */ + return 0; + + r = sd_bus_message_new_method_return(m, &reply); + if (r < 0) + return r; + + r = sd_bus_message_append(reply, "s", s); + if (r < 0) + return r; + + r = sd_bus_send(bus, reply, NULL); + if (r < 0) + return r; + + return 1; +} + +static int object_manager_serialize_path( + sd_bus *bus, + sd_bus_message *reply, + const char *prefix, + const char *path, + bool require_fallback, + bool *found_object_manager, + sd_bus_error *error) { + + const char *previous_interface = NULL; + bool found_something = false; + struct node *n; + int r; + + assert(bus); + assert(reply); + assert(prefix); + assert(path); + assert(found_object_manager); + assert(error); + + n = hashmap_get(bus->nodes, prefix); + if (!n) + return 0; + + if (!require_fallback && n->object_managers) + *found_object_manager = true; + + LIST_FOREACH(vtables, i, n->vtables) { + void *u; + + if (require_fallback && !i->is_fallback) + continue; + + r = node_vtable_get_userdata(bus, path, i, &u, error); + if (r < 0) + return r; + if (bus->nodes_modified) + return 0; + if (r == 0) + continue; + + if (!found_something) { + + /* Open the object part */ + + r = sd_bus_message_open_container(reply, 'e', "oa{sa{sv}}"); + if (r < 0) + return r; + + r = sd_bus_message_append(reply, "o", path); + if (r < 0) + return r; + + r = sd_bus_message_open_container(reply, 'a', "{sa{sv}}"); + if (r < 0) + return r; + + r = sd_bus_message_append(reply, "{sa{sv}}", "org.freedesktop.DBus.Peer", 0); + if (r < 0) + return r; + + r = sd_bus_message_append(reply, "{sa{sv}}", "org.freedesktop.DBus.Introspectable", 0); + if (r < 0) + return r; + + r = sd_bus_message_append(reply, "{sa{sv}}", "org.freedesktop.DBus.Properties", 0); + if (r < 0) + return r; + + if (*found_object_manager) { + r = sd_bus_message_append( + reply, "{sa{sv}}", "org.freedesktop.DBus.ObjectManager", 0); + if (r < 0) + return r; + } + + found_something = true; + } + + if (!streq_ptr(previous_interface, i->interface)) { + + /* Maybe close the previous interface part */ + + if (previous_interface) { + r = sd_bus_message_close_container(reply); + if (r < 0) + return r; + + r = sd_bus_message_close_container(reply); + if (r < 0) + return r; + } + + /* Open the new interface part */ + + r = sd_bus_message_open_container(reply, 'e', "sa{sv}"); + if (r < 0) + return r; + + r = sd_bus_message_append(reply, "s", i->interface); + if (r < 0) + return r; + + r = sd_bus_message_open_container(reply, 'a', "{sv}"); + if (r < 0) + return r; + } + + r = vtable_append_all_properties(bus, reply, path, i, u, error); + if (r < 0) + return r; + if (bus->nodes_modified) + return 0; + + previous_interface = i->interface; + } + + if (previous_interface) { + r = sd_bus_message_close_container(reply); + if (r < 0) + return r; + + r = sd_bus_message_close_container(reply); + if (r < 0) + return r; + } + + if (found_something) { + r = sd_bus_message_close_container(reply); + if (r < 0) + return r; + + r = sd_bus_message_close_container(reply); + if (r < 0) + return r; + } + + return 1; +} + +static int object_manager_serialize_path_and_fallbacks( + sd_bus *bus, + sd_bus_message *reply, + const char *path, + sd_bus_error *error) { + + _cleanup_free_ char *prefix = NULL; + size_t pl; + int r; + bool found_object_manager = false; + + assert(bus); + assert(reply); + assert(path); + assert(error); + + /* First, add all vtables registered for this path */ + r = object_manager_serialize_path(bus, reply, path, path, false, &found_object_manager, error); + if (r < 0) + return r; + if (bus->nodes_modified) + return 0; + + /* Second, add fallback vtables registered for any of the prefixes */ + pl = strlen(path); + assert(pl <= BUS_PATH_SIZE_MAX); + prefix = new(char, pl + 1); + if (!prefix) + return -ENOMEM; + + OBJECT_PATH_FOREACH_PREFIX(prefix, path) { + r = object_manager_serialize_path(bus, reply, prefix, path, true, &found_object_manager, error); + if (r < 0) + return r; + if (bus->nodes_modified) + return 0; + } + + return 0; +} + +static int process_get_managed_objects( + sd_bus *bus, + sd_bus_message *m, + struct node *n, + bool require_fallback, + bool *found_object) { + + _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL; + _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL; + _cleanup_ordered_set_free_free_ OrderedSet *s = NULL; + char *path; + int r; + + assert(bus); + assert(m); + assert(n); + assert(found_object); + + /* Spec says, GetManagedObjects() is only implemented on the root of a + * sub-tree. Therefore, we require a registered object-manager on + * exactly the queried path, otherwise, we refuse to respond. */ + + if (require_fallback || !n->object_managers) + return 0; + + r = get_child_nodes(bus, m->path, n, CHILDREN_RECURSIVE, &s, &error); + if (r < 0) + return bus_maybe_reply_error(m, r, &error); + if (bus->nodes_modified) + return 0; + + r = sd_bus_message_new_method_return(m, &reply); + if (r < 0) + return r; + + r = sd_bus_message_open_container(reply, 'a', "{oa{sa{sv}}}"); + if (r < 0) + return r; + + ORDERED_SET_FOREACH(path, s) { + r = object_manager_serialize_path_and_fallbacks(bus, reply, path, &error); + if (r < 0) + return bus_maybe_reply_error(m, r, &error); + + if (bus->nodes_modified) + return 0; + } + + r = sd_bus_message_close_container(reply); + if (r < 0) + return r; + + r = sd_bus_send(bus, reply, NULL); + if (r < 0) + return r; + + return 1; +} + +static int object_find_and_run( + sd_bus *bus, + sd_bus_message *m, + const char *p, + bool require_fallback, + bool *found_object) { + + struct node *n; + struct vtable_member vtable_key, *v; + int r; + + assert(bus); + assert(m); + assert(p); + assert(found_object); + + n = hashmap_get(bus->nodes, p); + if (!n) + return 0; + + /* First, try object callbacks */ + r = node_callbacks_run(bus, m, n->callbacks, require_fallback, found_object); + if (r != 0) + return r; + if (bus->nodes_modified) + return 0; + + if (!m->interface || !m->member) + return 0; + + /* Then, look for a known method */ + vtable_key.path = (char*) p; + vtable_key.interface = m->interface; + vtable_key.member = m->member; + + v = hashmap_get(bus->vtable_methods, &vtable_key); + if (v) { + r = method_callbacks_run(bus, m, v, require_fallback, found_object); + if (r != 0) + return r; + if (bus->nodes_modified) + return 0; + } + + /* Then, look for a known property */ + if (streq(m->interface, "org.freedesktop.DBus.Properties")) { + bool get = false; + + get = streq(m->member, "Get"); + + if (get || streq(m->member, "Set")) { + + r = sd_bus_message_rewind(m, true); + if (r < 0) + return r; + + vtable_key.path = (char*) p; + + r = sd_bus_message_read(m, "ss", &vtable_key.interface, &vtable_key.member); + if (r < 0) + return sd_bus_reply_method_errorf(m, SD_BUS_ERROR_INVALID_ARGS, "Expected interface and member parameters"); + + v = hashmap_get(bus->vtable_properties, &vtable_key); + if (v) { + r = property_get_set_callbacks_run(bus, m, v, require_fallback, get, found_object); + if (r != 0) + return r; + } + + } else if (streq(m->member, "GetAll")) { + const char *iface; + + r = sd_bus_message_rewind(m, true); + if (r < 0) + return r; + + r = sd_bus_message_read(m, "s", &iface); + if (r < 0) + return sd_bus_reply_method_errorf(m, SD_BUS_ERROR_INVALID_ARGS, "Expected interface parameter"); + + if (iface[0] == 0) + iface = NULL; + + r = property_get_all_callbacks_run(bus, m, n->vtables, require_fallback, iface, found_object); + if (r != 0) + return r; + } + + } else if (sd_bus_message_is_method_call(m, "org.freedesktop.DBus.Introspectable", "Introspect")) { + + if (!isempty(sd_bus_message_get_signature(m, true))) + return sd_bus_reply_method_errorf(m, SD_BUS_ERROR_INVALID_ARGS, "Expected no parameters"); + + r = process_introspect(bus, m, n, require_fallback, found_object); + if (r != 0) + return r; + + } else if (sd_bus_message_is_method_call(m, "org.freedesktop.DBus.ObjectManager", "GetManagedObjects")) { + + if (!isempty(sd_bus_message_get_signature(m, true))) + return sd_bus_reply_method_errorf(m, SD_BUS_ERROR_INVALID_ARGS, "Expected no parameters"); + + r = process_get_managed_objects(bus, m, n, require_fallback, found_object); + if (r != 0) + return r; + } + + if (bus->nodes_modified) + return 0; + + if (!*found_object) { + r = bus_node_exists(bus, n, m->path, require_fallback); + if (r < 0) + return bus_maybe_reply_error(m, r, NULL); + if (bus->nodes_modified) + return 0; + if (r > 0) + *found_object = true; + } + + return 0; +} + +int bus_process_object(sd_bus *bus, sd_bus_message *m) { + _cleanup_free_ char *prefix = NULL; + int r; + size_t pl; + bool found_object = false; + + assert(bus); + assert(m); + + if (bus->is_monitor) + return 0; + + if (m->header->type != SD_BUS_MESSAGE_METHOD_CALL) + return 0; + + if (hashmap_isempty(bus->nodes)) + return 0; + + /* Never respond to broadcast messages */ + if (bus->bus_client && !m->destination) + return 0; + + assert(m->path); + assert(m->member); + + pl = strlen(m->path); + assert(pl <= BUS_PATH_SIZE_MAX); + prefix = new(char, pl + 1); + if (!prefix) + return -ENOMEM; + + do { + bus->nodes_modified = false; + + r = object_find_and_run(bus, m, m->path, false, &found_object); + if (r != 0) + return r; + + /* Look for fallback prefixes */ + OBJECT_PATH_FOREACH_PREFIX(prefix, m->path) { + + if (bus->nodes_modified) + break; + + r = object_find_and_run(bus, m, prefix, true, &found_object); + if (r != 0) + return r; + } + + } while (bus->nodes_modified); + + if (!found_object) + return 0; + + if (sd_bus_message_is_method_call(m, "org.freedesktop.DBus.Properties", "Get") || + sd_bus_message_is_method_call(m, "org.freedesktop.DBus.Properties", "Set")) { + const char *interface = NULL, *property = NULL; + + (void) sd_bus_message_rewind(m, true); + (void) sd_bus_message_read_basic(m, 's', &interface); + (void) sd_bus_message_read_basic(m, 's', &property); + + r = sd_bus_reply_method_errorf( + m, + SD_BUS_ERROR_UNKNOWN_PROPERTY, + "Unknown interface %s or property %s.", strnull(interface), strnull(property)); + } else + r = sd_bus_reply_method_errorf( + m, + SD_BUS_ERROR_UNKNOWN_METHOD, + "Unknown method %s or interface %s.", m->member, m->interface); + + if (r < 0) + return r; + + return 1; +} + +static struct node* bus_node_allocate(sd_bus *bus, const char *path) { + struct node *n, *parent; + const char *e; + _cleanup_free_ char *s = NULL; + char *p; + int r; + + assert(bus); + assert(path); + assert(path[0] == '/'); + + n = hashmap_get(bus->nodes, path); + if (n) + return n; + + r = hashmap_ensure_allocated(&bus->nodes, &string_hash_ops); + if (r < 0) + return NULL; + + s = strdup(path); + if (!s) + return NULL; + + if (streq(path, "/")) + parent = NULL; + else { + assert_se(e = strrchr(path, '/')); + + p = strndupa_safe(path, MAX(1, e - path)); + + parent = bus_node_allocate(bus, p); + if (!parent) + return NULL; + } + + n = new0(struct node, 1); + if (!n) + return NULL; + + n->parent = parent; + n->path = TAKE_PTR(s); + + r = hashmap_put(bus->nodes, n->path, n); + if (r < 0) { + free(n->path); + return mfree(n); + } + + if (parent) + LIST_PREPEND(siblings, parent->child, n); + + return n; +} + +void bus_node_gc(sd_bus *b, struct node *n) { + assert(b); + + if (!n) + return; + + if (n->child || + n->callbacks || + n->vtables || + n->enumerators || + n->object_managers) + return; + + assert_se(hashmap_remove(b->nodes, n->path) == n); + + if (n->parent) + LIST_REMOVE(siblings, n->parent->child, n); + + free(n->path); + bus_node_gc(b, n->parent); + free(n); +} + +static int bus_find_parent_object_manager(sd_bus *bus, struct node **out, const char *path, bool* path_has_object_manager) { + struct node *n; + + assert(bus); + assert(path); + assert(path_has_object_manager); + + n = hashmap_get(bus->nodes, path); + + if (n) + *path_has_object_manager = n->object_managers; + + if (!n) { + _cleanup_free_ char *prefix = NULL; + size_t pl; + + pl = strlen(path); + assert(pl <= BUS_PATH_SIZE_MAX); + prefix = new(char, pl + 1); + if (!prefix) + return -ENOMEM; + + OBJECT_PATH_FOREACH_PREFIX(prefix, path) { + n = hashmap_get(bus->nodes, prefix); + if (n) + break; + } + } + + while (n && !n->object_managers) + n = n->parent; + + if (out) + *out = n; + return !!n; +} + +static int bus_add_object( + sd_bus *bus, + sd_bus_slot **slot, + bool fallback, + const char *path, + sd_bus_message_handler_t callback, + void *userdata) { + + sd_bus_slot *s; + struct node *n; + int r; + + assert_return(bus, -EINVAL); + assert_return(bus = bus_resolve(bus), -ENOPKG); + assert_return(object_path_is_valid(path), -EINVAL); + assert_return(callback, -EINVAL); + assert_return(!bus_origin_changed(bus), -ECHILD); + + n = bus_node_allocate(bus, path); + if (!n) + return -ENOMEM; + + s = bus_slot_allocate(bus, !slot, BUS_NODE_CALLBACK, sizeof(struct node_callback), userdata); + if (!s) { + r = -ENOMEM; + goto fail; + } + + s->node_callback.callback = callback; + s->node_callback.is_fallback = fallback; + + s->node_callback.node = n; + LIST_PREPEND(callbacks, n->callbacks, &s->node_callback); + bus->nodes_modified = true; + + if (slot) + *slot = s; + + return 0; + +fail: + sd_bus_slot_unref(s); + bus_node_gc(bus, n); + + return r; +} + +_public_ int sd_bus_add_object( + sd_bus *bus, + sd_bus_slot **slot, + const char *path, + sd_bus_message_handler_t callback, + void *userdata) { + + return bus_add_object(bus, slot, false, path, callback, userdata); +} + +_public_ int sd_bus_add_fallback( + sd_bus *bus, + sd_bus_slot **slot, + const char *prefix, + sd_bus_message_handler_t callback, + void *userdata) { + + return bus_add_object(bus, slot, true, prefix, callback, userdata); +} + +static void vtable_member_hash_func(const struct vtable_member *m, struct siphash *state) { + assert(m); + + string_hash_func(m->path, state); + string_hash_func(m->interface, state); + string_hash_func(m->member, state); +} + +static int vtable_member_compare_func(const struct vtable_member *x, const struct vtable_member *y) { + int r; + + assert(x); + assert(y); + + r = strcmp(x->path, y->path); + if (r != 0) + return r; + + r = strcmp(x->interface, y->interface); + if (r != 0) + return r; + + return strcmp(x->member, y->member); +} + +DEFINE_PRIVATE_HASH_OPS(vtable_member_hash_ops, struct vtable_member, vtable_member_hash_func, vtable_member_compare_func); + +typedef enum { + NAMES_FIRST_PART = 1 << 0, /* first part of argument name list (input names). It is reset by names_are_valid() */ + NAMES_PRESENT = 1 << 1, /* at least one argument name is present, so the names will checked. + This flag is set and used internally by names_are_valid(), but needs to be stored across calls for 2-parts list */ + NAMES_SINGLE_PART = 1 << 2, /* argument name list consisting of a single part */ +} names_flags; + +static bool names_are_valid(const char *signature, const char **names, names_flags *flags) { + int r; + + if ((*flags & NAMES_FIRST_PART || *flags & NAMES_SINGLE_PART) && **names != '\0') + *flags |= NAMES_PRESENT; + + for (;*flags & NAMES_PRESENT;) { + size_t l; + + if (!*signature) + break; + + r = signature_element_length(signature, &l); + if (r < 0) + return false; + + if (**names != '\0') { + if (!member_name_is_valid(*names)) + return false; + *names += strlen(*names) + 1; + } else if (*flags & NAMES_PRESENT) + return false; + + signature += l; + } + /* let's check if there are more argument names specified than the signature allows */ + if (*flags & NAMES_PRESENT && **names != '\0' && !(*flags & NAMES_FIRST_PART)) + return false; + *flags &= ~NAMES_FIRST_PART; + return true; +} + +/* the current version of this struct is defined in sd-bus-vtable.h, but we need to list here the historical versions + to make sure the calling code is compatible with one of these */ +struct sd_bus_vtable_221 { + uint8_t type:8; + uint64_t flags:56; + union { + struct { + size_t element_size; + } start; + struct { + const char *member; + const char *signature; + const char *result; + sd_bus_message_handler_t handler; + size_t offset; + } method; + struct { + const char *member; + const char *signature; + } signal; + struct { + const char *member; + const char *signature; + sd_bus_property_get_t get; + sd_bus_property_set_t set; + size_t offset; + } property; + } x; +}; +/* Structure size up to v241 */ +#define VTABLE_ELEMENT_SIZE_221 sizeof(struct sd_bus_vtable_221) + +/* Size of the structure when "features" field was added. If the structure definition is augmented, a copy of + * the structure definition will need to be made (similarly to the sd_bus_vtable_221 above), and this + * definition updated to refer to it. */ +#define VTABLE_ELEMENT_SIZE_242 sizeof(struct sd_bus_vtable) + +static int vtable_features(const sd_bus_vtable *vtable) { + if (vtable[0].x.start.element_size < VTABLE_ELEMENT_SIZE_242 || + !vtable[0].x.start.vtable_format_reference) + return 0; + return vtable[0].x.start.features; +} + +bool bus_vtable_has_names(const sd_bus_vtable *vtable) { + return vtable_features(vtable) & _SD_BUS_VTABLE_PARAM_NAMES; +} + +const sd_bus_vtable* bus_vtable_next(const sd_bus_vtable *vtable, const sd_bus_vtable *v) { + return (const sd_bus_vtable*) ((char*) v + vtable[0].x.start.element_size); +} + +static int add_object_vtable_internal( + sd_bus *bus, + sd_bus_slot **slot, + const char *path, + const char *interface, + const sd_bus_vtable *vtable, + bool fallback, + sd_bus_object_find_t find, + void *userdata) { + + sd_bus_slot *s = NULL; + struct node_vtable *existing = NULL; + const sd_bus_vtable *v; + struct node *n; + int r; + const char *names = ""; + names_flags nf; + + assert_return(bus, -EINVAL); + assert_return(bus = bus_resolve(bus), -ENOPKG); + assert_return(object_path_is_valid(path), -EINVAL); + assert_return(interface_name_is_valid(interface), -EINVAL); + assert_return(vtable, -EINVAL); + assert_return(vtable[0].type == _SD_BUS_VTABLE_START, -EINVAL); + assert_return(vtable[0].x.start.element_size == VTABLE_ELEMENT_SIZE_221 || + vtable[0].x.start.element_size >= VTABLE_ELEMENT_SIZE_242, + -EINVAL); + assert_return(!bus_origin_changed(bus), -ECHILD); + assert_return(!streq(interface, "org.freedesktop.DBus.Properties") && + !streq(interface, "org.freedesktop.DBus.Introspectable") && + !streq(interface, "org.freedesktop.DBus.Peer") && + !streq(interface, "org.freedesktop.DBus.ObjectManager"), -EINVAL); + + r = hashmap_ensure_allocated(&bus->vtable_methods, &vtable_member_hash_ops); + if (r < 0) + return r; + + r = hashmap_ensure_allocated(&bus->vtable_properties, &vtable_member_hash_ops); + if (r < 0) + return r; + + n = bus_node_allocate(bus, path); + if (!n) + return -ENOMEM; + + LIST_FOREACH(vtables, i, n->vtables) { + if (i->is_fallback != fallback) { + r = -EPROTOTYPE; + goto fail; + } + + if (streq(i->interface, interface)) { + + if (i->vtable == vtable) { + r = -EEXIST; + goto fail; + } + + existing = i; + } + } + + s = bus_slot_allocate(bus, !slot, BUS_NODE_VTABLE, sizeof(struct node_vtable), userdata); + if (!s) { + r = -ENOMEM; + goto fail; + } + + s->node_vtable.is_fallback = fallback; + s->node_vtable.vtable = vtable; + s->node_vtable.find = find; + + s->node_vtable.interface = strdup(interface); + if (!s->node_vtable.interface) { + r = -ENOMEM; + goto fail; + } + + v = s->node_vtable.vtable; + for (v = bus_vtable_next(vtable, v); v->type != _SD_BUS_VTABLE_END; v = bus_vtable_next(vtable, v)) { + + switch (v->type) { + + case _SD_BUS_VTABLE_METHOD: { + struct vtable_member *m; + nf = NAMES_FIRST_PART; + + if (bus_vtable_has_names(vtable)) + names = strempty(v->x.method.names); + + if (!member_name_is_valid(v->x.method.member) || + !signature_is_valid(strempty(v->x.method.signature), false) || + !signature_is_valid(strempty(v->x.method.result), false) || + !names_are_valid(strempty(v->x.method.signature), &names, &nf) || + !names_are_valid(strempty(v->x.method.result), &names, &nf) || + !(v->x.method.handler || (isempty(v->x.method.signature) && isempty(v->x.method.result))) || + v->flags & (SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE|SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION)) { + r = -EINVAL; + goto fail; + } + + m = new0(struct vtable_member, 1); + if (!m) { + r = -ENOMEM; + goto fail; + } + + m->parent = &s->node_vtable; + m->path = n->path; + m->interface = s->node_vtable.interface; + m->member = v->x.method.member; + m->vtable = v; + + r = hashmap_put(bus->vtable_methods, m, m); + if (r < 0) { + free(m); + goto fail; + } + + break; + } + + case _SD_BUS_VTABLE_WRITABLE_PROPERTY: + + if (!(v->x.property.set || bus_type_is_basic(v->x.property.signature[0]))) { + r = -EINVAL; + goto fail; + } + + if (v->flags & SD_BUS_VTABLE_PROPERTY_CONST) { + r = -EINVAL; + goto fail; + } + + _fallthrough_; + case _SD_BUS_VTABLE_PROPERTY: { + struct vtable_member *m; + + if (!member_name_is_valid(v->x.property.member) || + !signature_is_single(v->x.property.signature, false) || + !(v->x.property.get || bus_type_is_basic(v->x.property.signature[0]) || streq(v->x.property.signature, "as")) || + (v->flags & SD_BUS_VTABLE_METHOD_NO_REPLY) || + (!!(v->flags & SD_BUS_VTABLE_PROPERTY_CONST) + !!(v->flags & SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE) + !!(v->flags & SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION)) > 1 || + ((v->flags & SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE) && (v->flags & SD_BUS_VTABLE_PROPERTY_EXPLICIT)) || + (v->flags & SD_BUS_VTABLE_UNPRIVILEGED && v->type == _SD_BUS_VTABLE_PROPERTY)) { + r = -EINVAL; + goto fail; + } + + m = new0(struct vtable_member, 1); + if (!m) { + r = -ENOMEM; + goto fail; + } + + m->parent = &s->node_vtable; + m->path = n->path; + m->interface = s->node_vtable.interface; + m->member = v->x.property.member; + m->vtable = v; + + r = hashmap_put(bus->vtable_properties, m, m); + if (r < 0) { + free(m); + goto fail; + } + + break; + } + + case _SD_BUS_VTABLE_SIGNAL: + nf = NAMES_SINGLE_PART; + + if (bus_vtable_has_names(vtable)) + names = strempty(v->x.signal.names); + + if (!member_name_is_valid(v->x.signal.member) || + !signature_is_valid(strempty(v->x.signal.signature), false) || + !names_are_valid(strempty(v->x.signal.signature), &names, &nf) || + v->flags & SD_BUS_VTABLE_UNPRIVILEGED) { + r = -EINVAL; + goto fail; + } + + break; + + default: + r = -EINVAL; + goto fail; + } + } + + s->node_vtable.node = n; + LIST_INSERT_AFTER(vtables, n->vtables, existing, &s->node_vtable); + bus->nodes_modified = true; + + if (slot) + *slot = s; + + return 0; + +fail: + sd_bus_slot_unref(s); + bus_node_gc(bus, n); + + return r; +} + +/* This symbol exists solely to tell the linker that the "new" vtable format is used. */ +_public_ const unsigned sd_bus_object_vtable_format = 242; + +_public_ int sd_bus_add_object_vtable( + sd_bus *bus, + sd_bus_slot **slot, + const char *path, + const char *interface, + const sd_bus_vtable *vtable, + void *userdata) { + + return add_object_vtable_internal(bus, slot, path, interface, vtable, false, NULL, userdata); +} + +_public_ int sd_bus_add_fallback_vtable( + sd_bus *bus, + sd_bus_slot **slot, + const char *prefix, + const char *interface, + const sd_bus_vtable *vtable, + sd_bus_object_find_t find, + void *userdata) { + + return add_object_vtable_internal(bus, slot, prefix, interface, vtable, true, find, userdata); +} + +_public_ int sd_bus_add_node_enumerator( + sd_bus *bus, + sd_bus_slot **slot, + const char *path, + sd_bus_node_enumerator_t callback, + void *userdata) { + + sd_bus_slot *s; + struct node *n; + int r; + + assert_return(bus, -EINVAL); + assert_return(bus = bus_resolve(bus), -ENOPKG); + assert_return(object_path_is_valid(path), -EINVAL); + assert_return(callback, -EINVAL); + assert_return(!bus_origin_changed(bus), -ECHILD); + + n = bus_node_allocate(bus, path); + if (!n) + return -ENOMEM; + + s = bus_slot_allocate(bus, !slot, BUS_NODE_ENUMERATOR, sizeof(struct node_enumerator), userdata); + if (!s) { + r = -ENOMEM; + goto fail; + } + + s->node_enumerator.callback = callback; + + s->node_enumerator.node = n; + LIST_PREPEND(enumerators, n->enumerators, &s->node_enumerator); + bus->nodes_modified = true; + + if (slot) + *slot = s; + + return 0; + +fail: + sd_bus_slot_unref(s); + bus_node_gc(bus, n); + + return r; +} + +static int emit_properties_changed_on_interface( + sd_bus *bus, + const char *prefix, + const char *path, + const char *interface, + bool require_fallback, + bool *found_interface, + char **names) { + + _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL; + _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL; + bool has_invalidating = false, has_changing = false; + struct vtable_member key = {}; + struct node *n; + void *u = NULL; + int r; + + assert(bus); + assert(prefix); + assert(path); + assert(interface); + assert(found_interface); + + n = hashmap_get(bus->nodes, prefix); + if (!n) + return 0; + + r = sd_bus_message_new_signal(bus, &m, path, "org.freedesktop.DBus.Properties", "PropertiesChanged"); + if (r < 0) + return r; + + r = sd_bus_message_append(m, "s", interface); + if (r < 0) + return r; + + r = sd_bus_message_open_container(m, 'a', "{sv}"); + if (r < 0) + return r; + + key.path = prefix; + key.interface = interface; + + LIST_FOREACH(vtables, c, n->vtables) { + if (require_fallback && !c->is_fallback) + continue; + + if (!streq(c->interface, interface)) + continue; + + r = node_vtable_get_userdata(bus, path, c, &u, &error); + if (r < 0) + return r; + if (bus->nodes_modified) + return 0; + if (r == 0) + continue; + + *found_interface = true; + + if (names) { + /* If the caller specified a list of + * properties we include exactly those in the + * PropertiesChanged message */ + + STRV_FOREACH(property, names) { + struct vtable_member *v; + + assert_return(member_name_is_valid(*property), -EINVAL); + + key.member = *property; + v = hashmap_get(bus->vtable_properties, &key); + if (!v) + return -ENOENT; + + /* If there are two vtables for the same + * interface, let's handle this property when + * we come to that vtable. */ + if (c != v->parent) + continue; + + assert_return(v->vtable->flags & SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE || + v->vtable->flags & SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION, -EDOM); + + assert_return(!(v->vtable->flags & SD_BUS_VTABLE_HIDDEN), -EDOM); + + if (v->vtable->flags & SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION) { + has_invalidating = true; + continue; + } + + has_changing = true; + + r = vtable_append_one_property(bus, m, m->path, c, v->vtable, u, &error); + if (r < 0) + return r; + if (bus->nodes_modified) + return 0; + } + } else { + const sd_bus_vtable *v; + + /* If the caller specified no properties list + * we include all properties that are marked + * as changing in the message. */ + + v = c->vtable; + for (v = bus_vtable_next(c->vtable, v); v->type != _SD_BUS_VTABLE_END; v = bus_vtable_next(c->vtable, v)) { + if (!IN_SET(v->type, _SD_BUS_VTABLE_PROPERTY, _SD_BUS_VTABLE_WRITABLE_PROPERTY)) + continue; + + if (v->flags & SD_BUS_VTABLE_HIDDEN) + continue; + + if (v->flags & SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION) { + has_invalidating = true; + continue; + } + + if (!(v->flags & SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE)) + continue; + + has_changing = true; + + r = vtable_append_one_property(bus, m, m->path, c, v, u, &error); + if (r < 0) + return r; + if (bus->nodes_modified) + return 0; + } + } + } + + if (!has_invalidating && !has_changing) + return 0; + + r = sd_bus_message_close_container(m); + if (r < 0) + return r; + + r = sd_bus_message_open_container(m, 'a', "s"); + if (r < 0) + return r; + + if (has_invalidating) { + LIST_FOREACH(vtables, c, n->vtables) { + if (require_fallback && !c->is_fallback) + continue; + + if (!streq(c->interface, interface)) + continue; + + r = node_vtable_get_userdata(bus, path, c, &u, &error); + if (r < 0) + return r; + if (bus->nodes_modified) + return 0; + if (r == 0) + continue; + + if (names) { + STRV_FOREACH(property, names) { + struct vtable_member *v; + + key.member = *property; + assert_se(v = hashmap_get(bus->vtable_properties, &key)); + assert(c == v->parent); + + if (!(v->vtable->flags & SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION)) + continue; + + r = sd_bus_message_append(m, "s", *property); + if (r < 0) + return r; + } + } else { + const sd_bus_vtable *v; + + v = c->vtable; + for (v = bus_vtable_next(c->vtable, v); v->type != _SD_BUS_VTABLE_END; v = bus_vtable_next(c->vtable, v)) { + if (!IN_SET(v->type, _SD_BUS_VTABLE_PROPERTY, _SD_BUS_VTABLE_WRITABLE_PROPERTY)) + continue; + + if (v->flags & SD_BUS_VTABLE_HIDDEN) + continue; + + if (!(v->flags & SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION)) + continue; + + r = sd_bus_message_append(m, "s", v->x.property.member); + if (r < 0) + return r; + } + } + } + } + + r = sd_bus_message_close_container(m); + if (r < 0) + return r; + + r = sd_bus_send(bus, m, NULL); + if (r < 0) + return r; + + return 1; +} + +_public_ int sd_bus_emit_properties_changed_strv( + sd_bus *bus, + const char *path, + const char *interface, + char **names) { + + _cleanup_free_ char *prefix = NULL; + bool found_interface = false; + size_t pl; + int r; + + assert_return(bus, -EINVAL); + assert_return(bus = bus_resolve(bus), -ENOPKG); + assert_return(object_path_is_valid(path), -EINVAL); + assert_return(interface_name_is_valid(interface), -EINVAL); + assert_return(!bus_origin_changed(bus), -ECHILD); + + if (!BUS_IS_OPEN(bus->state)) + return -ENOTCONN; + + /* A non-NULL but empty names list means nothing needs to be + generated. A NULL list OTOH indicates that all properties + that are set to EMITS_CHANGE or EMITS_INVALIDATION shall be + included in the PropertiesChanged message. */ + if (names && names[0] == NULL) + return 0; + + BUS_DONT_DESTROY(bus); + + pl = strlen(path); + assert(pl <= BUS_PATH_SIZE_MAX); + prefix = new(char, pl + 1); + if (!prefix) + return -ENOMEM; + + do { + bus->nodes_modified = false; + + r = emit_properties_changed_on_interface(bus, path, path, interface, false, &found_interface, names); + if (r != 0) + return r; + if (bus->nodes_modified) + continue; + + OBJECT_PATH_FOREACH_PREFIX(prefix, path) { + r = emit_properties_changed_on_interface(bus, prefix, path, interface, true, &found_interface, names); + if (r != 0) + return r; + if (bus->nodes_modified) + break; + } + + } while (bus->nodes_modified); + + return found_interface ? 0 : -ENOENT; +} + +_public_ int sd_bus_emit_properties_changed( + sd_bus *bus, + const char *path, + const char *interface, + const char *name, ...) { + + char **names; + + assert_return(bus, -EINVAL); + assert_return(bus = bus_resolve(bus), -ENOPKG); + assert_return(object_path_is_valid(path), -EINVAL); + assert_return(interface_name_is_valid(interface), -EINVAL); + assert_return(!bus_origin_changed(bus), -ECHILD); + + if (!BUS_IS_OPEN(bus->state)) + return -ENOTCONN; + + if (!name) + return 0; + + names = strv_from_stdarg_alloca(name); + + return sd_bus_emit_properties_changed_strv(bus, path, interface, names); +} + +static int object_added_append_all_prefix( + sd_bus *bus, + sd_bus_message *m, + OrderedSet *s, + const char *prefix, + const char *path, + bool require_fallback) { + + const char *previous_interface = NULL; + struct node *n; + int r; + + assert(bus); + assert(m); + assert(s); + assert(prefix); + assert(path); + + n = hashmap_get(bus->nodes, prefix); + if (!n) + return 0; + + LIST_FOREACH(vtables, c, n->vtables) { + _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL; + void *u = NULL; + + if (require_fallback && !c->is_fallback) + continue; + + r = node_vtable_get_userdata(bus, path, c, &u, &error); + if (r < 0) + return r; + if (bus->nodes_modified) + return 0; + if (r == 0) + continue; + + if (!streq_ptr(c->interface, previous_interface)) { + /* If a child-node already handled this interface, we + * skip it on any of its parents. The child vtables + * always fully override any conflicting vtables of + * any parent node. */ + if (ordered_set_get(s, c->interface)) + continue; + + r = ordered_set_put(s, c->interface); + if (r < 0) + return r; + + if (previous_interface) { + r = sd_bus_message_close_container(m); + if (r < 0) + return r; + r = sd_bus_message_close_container(m); + if (r < 0) + return r; + } + + r = sd_bus_message_open_container(m, 'e', "sa{sv}"); + if (r < 0) + return r; + r = sd_bus_message_append(m, "s", c->interface); + if (r < 0) + return r; + r = sd_bus_message_open_container(m, 'a', "{sv}"); + if (r < 0) + return r; + + previous_interface = c->interface; + } + + r = vtable_append_all_properties(bus, m, path, c, u, &error); + if (r < 0) + return r; + if (bus->nodes_modified) + return 0; + } + + if (previous_interface) { + r = sd_bus_message_close_container(m); + if (r < 0) + return r; + r = sd_bus_message_close_container(m); + if (r < 0) + return r; + } + + return 0; +} + +static int object_added_append_all(sd_bus *bus, sd_bus_message *m, const char *path, bool path_has_object_manager) { + _cleanup_ordered_set_free_ OrderedSet *s = NULL; + _cleanup_free_ char *prefix = NULL; + size_t pl; + int r; + + assert(bus); + assert(m); + assert(path); + + /* + * This appends all interfaces registered on path @path. We first add + * the builtin interfaces, which are always available and handled by + * sd-bus. Then, we add all interfaces registered on the exact node, + * followed by all fallback interfaces registered on any parent prefix. + * + * If an interface is registered multiple times on the same node with + * different vtables, we merge all the properties across all vtables. + * However, if a child node has the same interface registered as one of + * its parent nodes has as fallback, we make the child overwrite the + * parent instead of extending it. Therefore, we keep a "Set" of all + * handled interfaces during parent traversal, so we skip interfaces on + * a parent that were overwritten by a child. + */ + + s = ordered_set_new(&string_hash_ops); + if (!s) + return -ENOMEM; + + r = sd_bus_message_append(m, "{sa{sv}}", "org.freedesktop.DBus.Peer", 0); + if (r < 0) + return r; + r = sd_bus_message_append(m, "{sa{sv}}", "org.freedesktop.DBus.Introspectable", 0); + if (r < 0) + return r; + r = sd_bus_message_append(m, "{sa{sv}}", "org.freedesktop.DBus.Properties", 0); + if (r < 0) + return r; + if (path_has_object_manager){ + r = sd_bus_message_append(m, "{sa{sv}}", "org.freedesktop.DBus.ObjectManager", 0); + if (r < 0) + return r; + } + + r = object_added_append_all_prefix(bus, m, s, path, path, false); + if (r < 0) + return r; + if (bus->nodes_modified) + return 0; + + pl = strlen(path); + assert(pl <= BUS_PATH_SIZE_MAX); + prefix = new(char, pl + 1); + if (!prefix) + return -ENOMEM; + + OBJECT_PATH_FOREACH_PREFIX(prefix, path) { + r = object_added_append_all_prefix(bus, m, s, prefix, path, true); + if (r < 0) + return r; + if (bus->nodes_modified) + return 0; + } + + return 0; +} + +_public_ int sd_bus_emit_object_added(sd_bus *bus, const char *path) { + _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL; + struct node *object_manager; + int r; + + /* + * This emits an InterfacesAdded signal on the given path, by iterating + * all registered vtables and fallback vtables on the path. All + * properties are queried and included in the signal. + * This call is equivalent to sd_bus_emit_interfaces_added() with an + * explicit list of registered interfaces. However, unlike + * interfaces_added(), this call can figure out the list of supported + * interfaces itself. Furthermore, it properly adds the builtin + * org.freedesktop.DBus.* interfaces. + */ + + assert_return(bus, -EINVAL); + assert_return(bus = bus_resolve(bus), -ENOPKG); + assert_return(object_path_is_valid(path), -EINVAL); + assert_return(!bus_origin_changed(bus), -ECHILD); + + if (!BUS_IS_OPEN(bus->state)) + return -ENOTCONN; + + bool path_has_object_manager = false; + r = bus_find_parent_object_manager(bus, &object_manager, path, &path_has_object_manager); + if (r < 0) + return r; + if (r == 0) + return -ESRCH; + + BUS_DONT_DESTROY(bus); + + do { + bus->nodes_modified = false; + m = sd_bus_message_unref(m); + + r = sd_bus_message_new_signal(bus, &m, object_manager->path, "org.freedesktop.DBus.ObjectManager", "InterfacesAdded"); + if (r < 0) + return r; + + r = sd_bus_message_append_basic(m, 'o', path); + if (r < 0) + return r; + + r = sd_bus_message_open_container(m, 'a', "{sa{sv}}"); + if (r < 0) + return r; + + r = object_added_append_all(bus, m, path, path_has_object_manager); + if (r < 0) + return r; + + if (bus->nodes_modified) + continue; + + r = sd_bus_message_close_container(m); + if (r < 0) + return r; + + } while (bus->nodes_modified); + + return sd_bus_send(bus, m, NULL); +} + +static int object_removed_append_all_prefix( + sd_bus *bus, + sd_bus_message *m, + OrderedSet *s, + const char *prefix, + const char *path, + bool require_fallback) { + + const char *previous_interface = NULL; + struct node *n; + int r; + + assert(bus); + assert(m); + assert(s); + assert(prefix); + assert(path); + + n = hashmap_get(bus->nodes, prefix); + if (!n) + return 0; + + LIST_FOREACH(vtables, c, n->vtables) { + _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL; + void *u = NULL; + + if (require_fallback && !c->is_fallback) + continue; + if (streq_ptr(c->interface, previous_interface)) + continue; + + /* If a child-node already handled this interface, we + * skip it on any of its parents. The child vtables + * always fully override any conflicting vtables of + * any parent node. */ + if (ordered_set_get(s, c->interface)) + continue; + + r = node_vtable_get_userdata(bus, path, c, &u, &error); + if (r < 0) + return r; + if (bus->nodes_modified) + return 0; + if (r == 0) + continue; + + r = ordered_set_put(s, c->interface); + if (r < 0) + return r; + + r = sd_bus_message_append(m, "s", c->interface); + if (r < 0) + return r; + + previous_interface = c->interface; + } + + return 0; +} + +static int object_removed_append_all(sd_bus *bus, sd_bus_message *m, const char *path, bool path_has_object_manager) { + _cleanup_ordered_set_free_ OrderedSet *s = NULL; + _cleanup_free_ char *prefix = NULL; + size_t pl; + int r; + + assert(bus); + assert(m); + assert(path); + + /* see sd_bus_emit_object_added() for details */ + + s = ordered_set_new(&string_hash_ops); + if (!s) + return -ENOMEM; + + r = sd_bus_message_append(m, "s", "org.freedesktop.DBus.Peer"); + if (r < 0) + return r; + r = sd_bus_message_append(m, "s", "org.freedesktop.DBus.Introspectable"); + if (r < 0) + return r; + r = sd_bus_message_append(m, "s", "org.freedesktop.DBus.Properties"); + if (r < 0) + return r; + + if (path_has_object_manager){ + r = sd_bus_message_append(m, "s", "org.freedesktop.DBus.ObjectManager"); + if (r < 0) + return r; + } + + r = object_removed_append_all_prefix(bus, m, s, path, path, false); + if (r < 0) + return r; + if (bus->nodes_modified) + return 0; + + pl = strlen(path); + assert(pl <= BUS_PATH_SIZE_MAX); + prefix = new(char, pl + 1); + if (!prefix) + return -ENOMEM; + + OBJECT_PATH_FOREACH_PREFIX(prefix, path) { + r = object_removed_append_all_prefix(bus, m, s, prefix, path, true); + if (r < 0) + return r; + if (bus->nodes_modified) + return 0; + } + + return 0; +} + +_public_ int sd_bus_emit_object_removed(sd_bus *bus, const char *path) { + _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL; + struct node *object_manager; + int r; + + /* + * This is like sd_bus_emit_object_added(), but emits an + * InterfacesRemoved signal on the given path. This only includes any + * registered interfaces but skips the properties. Note that this will + * call into the find() callbacks of any registered vtable. Therefore, + * you must call this function before destroying/unlinking your object. + * Otherwise, the list of interfaces will be incomplete. However, note + * that this will *NOT* call into any property callback. Therefore, the + * object might be in an "destructed" state, as long as we can find it. + */ + + assert_return(bus, -EINVAL); + assert_return(bus = bus_resolve(bus), -ENOPKG); + assert_return(object_path_is_valid(path), -EINVAL); + assert_return(!bus_origin_changed(bus), -ECHILD); + + if (!BUS_IS_OPEN(bus->state)) + return -ENOTCONN; + + bool path_has_object_manager = false; + r = bus_find_parent_object_manager(bus, &object_manager, path, &path_has_object_manager); + if (r < 0) + return r; + if (r == 0) + return -ESRCH; + + BUS_DONT_DESTROY(bus); + + do { + bus->nodes_modified = false; + m = sd_bus_message_unref(m); + + r = sd_bus_message_new_signal(bus, &m, object_manager->path, "org.freedesktop.DBus.ObjectManager", "InterfacesRemoved"); + if (r < 0) + return r; + + r = sd_bus_message_append_basic(m, 'o', path); + if (r < 0) + return r; + + r = sd_bus_message_open_container(m, 'a', "s"); + if (r < 0) + return r; + + r = object_removed_append_all(bus, m, path, path_has_object_manager); + if (r < 0) + return r; + + if (bus->nodes_modified) + continue; + + r = sd_bus_message_close_container(m); + if (r < 0) + return r; + + } while (bus->nodes_modified); + + return sd_bus_send(bus, m, NULL); +} + +static int interfaces_added_append_one_prefix( + sd_bus *bus, + sd_bus_message *m, + const char *prefix, + const char *path, + const char *interface, + bool require_fallback) { + + _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL; + bool found_interface = false; + struct node *n; + void *u = NULL; + int r; + + assert(bus); + assert(m); + assert(prefix); + assert(path); + assert(interface); + + n = hashmap_get(bus->nodes, prefix); + if (!n) + return 0; + + LIST_FOREACH(vtables, c, n->vtables) { + if (require_fallback && !c->is_fallback) + continue; + + if (!streq(c->interface, interface)) + continue; + + r = node_vtable_get_userdata(bus, path, c, &u, &error); + if (r < 0) + return r; + if (bus->nodes_modified) + return 0; + if (r == 0) + continue; + + if (!found_interface) { + r = sd_bus_message_append_basic(m, 's', interface); + if (r < 0) + return r; + + r = sd_bus_message_open_container(m, 'a', "{sv}"); + if (r < 0) + return r; + + found_interface = true; + } + + r = vtable_append_all_properties(bus, m, path, c, u, &error); + if (r < 0) + return r; + if (bus->nodes_modified) + return 0; + } + + if (found_interface) { + r = sd_bus_message_close_container(m); + if (r < 0) + return r; + } + + return found_interface; +} + +static int interfaces_added_append_one( + sd_bus *bus, + sd_bus_message *m, + const char *path, + const char *interface) { + + _cleanup_free_ char *prefix = NULL; + size_t pl; + int r; + + assert(bus); + assert(m); + assert(path); + assert(interface); + + r = interfaces_added_append_one_prefix(bus, m, path, path, interface, false); + if (r != 0) + return r; + if (bus->nodes_modified) + return 0; + + pl = strlen(path); + assert(pl <= BUS_PATH_SIZE_MAX); + prefix = new(char, pl + 1); + if (!prefix) + return -ENOMEM; + + OBJECT_PATH_FOREACH_PREFIX(prefix, path) { + r = interfaces_added_append_one_prefix(bus, m, prefix, path, interface, true); + if (r != 0) + return r; + if (bus->nodes_modified) + return 0; + } + + return -ENOENT; +} + +_public_ int sd_bus_emit_interfaces_added_strv(sd_bus *bus, const char *path, char **interfaces) { + _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL; + struct node *object_manager; + int r; + + assert_return(bus, -EINVAL); + assert_return(bus = bus_resolve(bus), -ENOPKG); + assert_return(object_path_is_valid(path), -EINVAL); + assert_return(!bus_origin_changed(bus), -ECHILD); + + if (!BUS_IS_OPEN(bus->state)) + return -ENOTCONN; + + if (strv_isempty(interfaces)) + return 0; + + bool path_has_object_manager = false; + r = bus_find_parent_object_manager(bus, &object_manager, path, &path_has_object_manager); + if (r < 0) + return r; + if (r == 0) + return -ESRCH; + + BUS_DONT_DESTROY(bus); + + do { + bus->nodes_modified = false; + m = sd_bus_message_unref(m); + + r = sd_bus_message_new_signal(bus, &m, object_manager->path, "org.freedesktop.DBus.ObjectManager", "InterfacesAdded"); + if (r < 0) + return r; + + r = sd_bus_message_append_basic(m, 'o', path); + if (r < 0) + return r; + + r = sd_bus_message_open_container(m, 'a', "{sa{sv}}"); + if (r < 0) + return r; + + STRV_FOREACH(i, interfaces) { + assert_return(interface_name_is_valid(*i), -EINVAL); + + r = sd_bus_message_open_container(m, 'e', "sa{sv}"); + if (r < 0) + return r; + + r = interfaces_added_append_one(bus, m, path, *i); + if (r < 0) + return r; + + if (bus->nodes_modified) + break; + + r = sd_bus_message_close_container(m); + if (r < 0) + return r; + } + + if (bus->nodes_modified) + continue; + + r = sd_bus_message_close_container(m); + if (r < 0) + return r; + + } while (bus->nodes_modified); + + return sd_bus_send(bus, m, NULL); +} + +_public_ int sd_bus_emit_interfaces_added(sd_bus *bus, const char *path, const char *interface, ...) { + char **interfaces; + + assert_return(bus, -EINVAL); + assert_return(bus = bus_resolve(bus), -ENOPKG); + assert_return(object_path_is_valid(path), -EINVAL); + assert_return(!bus_origin_changed(bus), -ECHILD); + + if (!BUS_IS_OPEN(bus->state)) + return -ENOTCONN; + + interfaces = strv_from_stdarg_alloca(interface); + + return sd_bus_emit_interfaces_added_strv(bus, path, interfaces); +} + +_public_ int sd_bus_emit_interfaces_removed_strv(sd_bus *bus, const char *path, char **interfaces) { + _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL; + struct node *object_manager; + int r; + + assert_return(bus, -EINVAL); + assert_return(bus = bus_resolve(bus), -ENOPKG); + assert_return(object_path_is_valid(path), -EINVAL); + assert_return(!bus_origin_changed(bus), -ECHILD); + + if (!BUS_IS_OPEN(bus->state)) + return -ENOTCONN; + + if (strv_isempty(interfaces)) + return 0; + + bool path_has_object_manager = false; + r = bus_find_parent_object_manager(bus, &object_manager, path, &path_has_object_manager); + if (r < 0) + return r; + if (r == 0) + return -ESRCH; + + r = sd_bus_message_new_signal(bus, &m, object_manager->path, "org.freedesktop.DBus.ObjectManager", "InterfacesRemoved"); + if (r < 0) + return r; + + r = sd_bus_message_append_basic(m, 'o', path); + if (r < 0) + return r; + + r = sd_bus_message_append_strv(m, interfaces); + if (r < 0) + return r; + + return sd_bus_send(bus, m, NULL); +} + +_public_ int sd_bus_emit_interfaces_removed(sd_bus *bus, const char *path, const char *interface, ...) { + char **interfaces; + + assert_return(bus, -EINVAL); + assert_return(bus = bus_resolve(bus), -ENOPKG); + assert_return(object_path_is_valid(path), -EINVAL); + assert_return(!bus_origin_changed(bus), -ECHILD); + + if (!BUS_IS_OPEN(bus->state)) + return -ENOTCONN; + + interfaces = strv_from_stdarg_alloca(interface); + + return sd_bus_emit_interfaces_removed_strv(bus, path, interfaces); +} + +_public_ int sd_bus_add_object_manager(sd_bus *bus, sd_bus_slot **slot, const char *path) { + sd_bus_slot *s; + struct node *n; + int r; + + assert_return(bus, -EINVAL); + assert_return(bus = bus_resolve(bus), -ENOPKG); + assert_return(object_path_is_valid(path), -EINVAL); + assert_return(!bus_origin_changed(bus), -ECHILD); + + n = bus_node_allocate(bus, path); + if (!n) + return -ENOMEM; + + s = bus_slot_allocate(bus, !slot, BUS_NODE_OBJECT_MANAGER, sizeof(struct node_object_manager), NULL); + if (!s) { + r = -ENOMEM; + goto fail; + } + + s->node_object_manager.node = n; + LIST_PREPEND(object_managers, n->object_managers, &s->node_object_manager); + bus->nodes_modified = true; + + if (slot) + *slot = s; + + return 0; + +fail: + sd_bus_slot_unref(s); + bus_node_gc(bus, n); + + return r; +} diff --git a/src/libsystemd/sd-bus/bus-objects.h b/src/libsystemd/sd-bus/bus-objects.h new file mode 100644 index 0000000..20fccfa --- /dev/null +++ b/src/libsystemd/sd-bus/bus-objects.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include "bus-internal.h" +#include "bus-introspect.h" + +const sd_bus_vtable* bus_vtable_next(const sd_bus_vtable *vtable, const sd_bus_vtable *v); +bool bus_vtable_has_names(const sd_bus_vtable *vtable); +int bus_process_object(sd_bus *bus, sd_bus_message *m); +void bus_node_gc(sd_bus *b, struct node *n); + +int introspect_path( + sd_bus *bus, + const char *path, + struct node *n, + bool require_fallback, + bool ignore_nodes_modified, + bool *found_object, + char **ret, + sd_bus_error *error); diff --git a/src/libsystemd/sd-bus/bus-protocol.h b/src/libsystemd/sd-bus/bus-protocol.h new file mode 100644 index 0000000..be46b5f --- /dev/null +++ b/src/libsystemd/sd-bus/bus-protocol.h @@ -0,0 +1,88 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include <endian.h> + +#include "macro.h" + +/* Packet header */ + +struct _packed_ bus_header { + uint8_t endian; + uint8_t type; + uint8_t flags; + uint8_t version; + uint32_t body_size; + /* Note that what the bus spec calls "serial" we'll call "cookie" instead, because we don't + * want to imply that the cookie was in any way monotonically increasing. */ + uint32_t serial; + uint32_t fields_size; +}; + +/* Endianness */ + +enum { + _BUS_INVALID_ENDIAN = 0, + BUS_LITTLE_ENDIAN = 'l', + BUS_BIG_ENDIAN = 'B', +#if __BYTE_ORDER == __BIG_ENDIAN + BUS_NATIVE_ENDIAN = BUS_BIG_ENDIAN, + BUS_REVERSE_ENDIAN = BUS_LITTLE_ENDIAN +#else + BUS_NATIVE_ENDIAN = BUS_LITTLE_ENDIAN, + BUS_REVERSE_ENDIAN = BUS_BIG_ENDIAN +#endif +}; + +/* Flags */ + +enum { + BUS_MESSAGE_NO_REPLY_EXPECTED = 1 << 0, + BUS_MESSAGE_NO_AUTO_START = 1 << 1, + BUS_MESSAGE_ALLOW_INTERACTIVE_AUTHORIZATION = 1 << 2, +}; + +/* Header fields */ + +enum { + _BUS_MESSAGE_HEADER_INVALID = 0, + BUS_MESSAGE_HEADER_PATH, + BUS_MESSAGE_HEADER_INTERFACE, + BUS_MESSAGE_HEADER_MEMBER, + BUS_MESSAGE_HEADER_ERROR_NAME, + BUS_MESSAGE_HEADER_REPLY_SERIAL, + BUS_MESSAGE_HEADER_DESTINATION, + BUS_MESSAGE_HEADER_SENDER, + BUS_MESSAGE_HEADER_SIGNATURE, + BUS_MESSAGE_HEADER_UNIX_FDS, + _BUS_MESSAGE_HEADER_MAX +}; + +/* RequestName parameters */ + +enum { + BUS_NAME_ALLOW_REPLACEMENT = 1 << 0, + BUS_NAME_REPLACE_EXISTING = 1 << 1, + BUS_NAME_DO_NOT_QUEUE = 1 << 2, +}; + +/* RequestName returns */ +enum { + BUS_NAME_PRIMARY_OWNER = 1, + BUS_NAME_IN_QUEUE = 2, + BUS_NAME_EXISTS = 3, + BUS_NAME_ALREADY_OWNER = 4 +}; + +/* ReleaseName returns */ +enum { + BUS_NAME_RELEASED = 1, + BUS_NAME_NON_EXISTENT = 2, + BUS_NAME_NOT_OWNER = 3, +}; + +/* StartServiceByName returns */ +enum { + BUS_START_REPLY_SUCCESS = 1, + BUS_START_REPLY_ALREADY_RUNNING = 2, +}; diff --git a/src/libsystemd/sd-bus/bus-signature.c b/src/libsystemd/sd-bus/bus-signature.c new file mode 100644 index 0000000..78c7436 --- /dev/null +++ b/src/libsystemd/sd-bus/bus-signature.c @@ -0,0 +1,146 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include "sd-bus.h" + +#include "bus-signature.h" +#include "bus-type.h" + +static int signature_element_length_internal( + const char *s, + bool allow_dict_entry, + unsigned array_depth, + unsigned struct_depth, + size_t *l) { + + int r; + + if (!s) + return -EINVAL; + + assert(l); + + if (bus_type_is_basic(*s) || *s == SD_BUS_TYPE_VARIANT) { + *l = 1; + return 0; + } + + if (*s == SD_BUS_TYPE_ARRAY) { + size_t t; + + if (array_depth >= 32) + return -EINVAL; + + r = signature_element_length_internal(s + 1, true, array_depth+1, struct_depth, &t); + if (r < 0) + return r; + + *l = t + 1; + return 0; + } + + if (*s == SD_BUS_TYPE_STRUCT_BEGIN) { + const char *p = s + 1; + + if (struct_depth >= 32) + return -EINVAL; + + while (*p != SD_BUS_TYPE_STRUCT_END) { + size_t t; + + r = signature_element_length_internal(p, false, array_depth, struct_depth+1, &t); + if (r < 0) + return r; + + p += t; + } + + if (p - s < 2) + /* D-Bus spec: Empty structures are not allowed; there + * must be at least one type code between the parentheses. + */ + return -EINVAL; + + *l = p - s + 1; + return 0; + } + + if (*s == SD_BUS_TYPE_DICT_ENTRY_BEGIN && allow_dict_entry) { + const char *p = s + 1; + unsigned n = 0; + + if (struct_depth >= 32) + return -EINVAL; + + while (*p != SD_BUS_TYPE_DICT_ENTRY_END) { + size_t t; + + if (n == 0 && !bus_type_is_basic(*p)) + return -EINVAL; + + r = signature_element_length_internal(p, false, array_depth, struct_depth+1, &t); + if (r < 0) + return r; + + p += t; + n++; + } + + if (n != 2) + return -EINVAL; + + *l = p - s + 1; + return 0; + } + + return -EINVAL; +} + +int signature_element_length(const char *s, size_t *l) { + return signature_element_length_internal(s, true, 0, 0, l); +} + +bool signature_is_single(const char *s, bool allow_dict_entry) { + int r; + size_t t; + + if (!s) + return false; + + r = signature_element_length_internal(s, allow_dict_entry, 0, 0, &t); + if (r < 0) + return false; + + return s[t] == 0; +} + +bool signature_is_pair(const char *s) { + + if (!s) + return false; + + if (!bus_type_is_basic(*s)) + return false; + + return signature_is_single(s + 1, false); +} + +bool signature_is_valid(const char *s, bool allow_dict_entry) { + const char *p; + int r; + + if (!s) + return false; + + p = s; + while (*p) { + size_t t; + + r = signature_element_length_internal(p, allow_dict_entry, 0, 0, &t); + if (r < 0) + return false; + + p += t; + } + + return p - s <= SD_BUS_MAXIMUM_SIGNATURE_LENGTH; +} diff --git a/src/libsystemd/sd-bus/bus-signature.h b/src/libsystemd/sd-bus/bus-signature.h new file mode 100644 index 0000000..314fcc2 --- /dev/null +++ b/src/libsystemd/sd-bus/bus-signature.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include <stdbool.h> + +bool signature_is_single(const char *s, bool allow_dict_entry); +bool signature_is_pair(const char *s); +bool signature_is_valid(const char *s, bool allow_dict_entry); + +int signature_element_length(const char *s, size_t *l); diff --git a/src/libsystemd/sd-bus/bus-slot.c b/src/libsystemd/sd-bus/bus-slot.c new file mode 100644 index 0000000..9f28957 --- /dev/null +++ b/src/libsystemd/sd-bus/bus-slot.c @@ -0,0 +1,311 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include "sd-bus.h" + +#include "alloc-util.h" +#include "bus-control.h" +#include "bus-objects.h" +#include "bus-slot.h" +#include "string-util.h" + +sd_bus_slot *bus_slot_allocate( + sd_bus *bus, + bool floating, + BusSlotType type, + size_t extra, + void *userdata) { + + sd_bus_slot *slot; + + assert(bus); + + slot = malloc0(offsetof(sd_bus_slot, reply_callback) + extra); + if (!slot) + return NULL; + + slot->n_ref = 1; + slot->type = type; + slot->bus = bus; + slot->floating = floating; + slot->userdata = userdata; + + if (!floating) + sd_bus_ref(bus); + + LIST_PREPEND(slots, bus->slots, slot); + + return slot; +} + +void bus_slot_disconnect(sd_bus_slot *slot, bool unref) { + sd_bus *bus; + + assert(slot); + + if (!slot->bus) + return; + + switch (slot->type) { + + case BUS_REPLY_CALLBACK: + + if (slot->reply_callback.cookie != 0) + ordered_hashmap_remove(slot->bus->reply_callbacks, &slot->reply_callback.cookie); + + if (slot->reply_callback.timeout_usec != 0) + prioq_remove(slot->bus->reply_callbacks_prioq, &slot->reply_callback, &slot->reply_callback.prioq_idx); + + break; + + case BUS_FILTER_CALLBACK: + slot->bus->filter_callbacks_modified = true; + LIST_REMOVE(callbacks, slot->bus->filter_callbacks, &slot->filter_callback); + break; + + case BUS_MATCH_CALLBACK: + + if (slot->match_added) + (void) bus_remove_match_internal(slot->bus, slot->match_callback.match_string); + + if (slot->match_callback.install_slot) { + bus_slot_disconnect(slot->match_callback.install_slot, true); + slot->match_callback.install_slot = sd_bus_slot_unref(slot->match_callback.install_slot); + } + + slot->bus->match_callbacks_modified = true; + bus_match_remove(&slot->bus->match_callbacks, &slot->match_callback); + + slot->match_callback.match_string = mfree(slot->match_callback.match_string); + + break; + + case BUS_NODE_CALLBACK: + + if (slot->node_callback.node) { + LIST_REMOVE(callbacks, slot->node_callback.node->callbacks, &slot->node_callback); + slot->bus->nodes_modified = true; + + bus_node_gc(slot->bus, slot->node_callback.node); + } + + break; + + case BUS_NODE_ENUMERATOR: + + if (slot->node_enumerator.node) { + LIST_REMOVE(enumerators, slot->node_enumerator.node->enumerators, &slot->node_enumerator); + slot->bus->nodes_modified = true; + + bus_node_gc(slot->bus, slot->node_enumerator.node); + } + + break; + + case BUS_NODE_OBJECT_MANAGER: + + if (slot->node_object_manager.node) { + LIST_REMOVE(object_managers, slot->node_object_manager.node->object_managers, &slot->node_object_manager); + slot->bus->nodes_modified = true; + + bus_node_gc(slot->bus, slot->node_object_manager.node); + } + + break; + + case BUS_NODE_VTABLE: + + if (slot->node_vtable.node && slot->node_vtable.interface && slot->node_vtable.vtable) { + const sd_bus_vtable *v; + + for (v = slot->node_vtable.vtable; v->type != _SD_BUS_VTABLE_END; v = bus_vtable_next(slot->node_vtable.vtable, v)) { + struct vtable_member *x = NULL; + + switch (v->type) { + + case _SD_BUS_VTABLE_METHOD: { + struct vtable_member key; + + key.path = slot->node_vtable.node->path; + key.interface = slot->node_vtable.interface; + key.member = v->x.method.member; + + x = hashmap_remove(slot->bus->vtable_methods, &key); + break; + } + + case _SD_BUS_VTABLE_PROPERTY: + case _SD_BUS_VTABLE_WRITABLE_PROPERTY: { + struct vtable_member key; + + key.path = slot->node_vtable.node->path; + key.interface = slot->node_vtable.interface; + key.member = v->x.method.member; + + x = hashmap_remove(slot->bus->vtable_properties, &key); + break; + }} + + free(x); + } + } + + slot->node_vtable.interface = mfree(slot->node_vtable.interface); + + if (slot->node_vtable.node) { + LIST_REMOVE(vtables, slot->node_vtable.node->vtables, &slot->node_vtable); + slot->bus->nodes_modified = true; + + bus_node_gc(slot->bus, slot->node_vtable.node); + } + + break; + + default: + assert_not_reached(); + } + + bus = slot->bus; + + slot->type = _BUS_SLOT_INVALID; + slot->bus = NULL; + LIST_REMOVE(slots, bus->slots, slot); + + if (!slot->floating) + sd_bus_unref(bus); + else if (unref) + sd_bus_slot_unref(slot); +} + +static sd_bus_slot* bus_slot_free(sd_bus_slot *slot) { + assert(slot); + + bus_slot_disconnect(slot, false); + + if (slot->destroy_callback) + slot->destroy_callback(slot->userdata); + + free(slot->description); + return mfree(slot); +} + +DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(sd_bus_slot, sd_bus_slot, bus_slot_free); + +_public_ sd_bus* sd_bus_slot_get_bus(sd_bus_slot *slot) { + assert_return(slot, NULL); + + return slot->bus; +} + +_public_ void *sd_bus_slot_get_userdata(sd_bus_slot *slot) { + assert_return(slot, NULL); + + return slot->userdata; +} + +_public_ void *sd_bus_slot_set_userdata(sd_bus_slot *slot, void *userdata) { + void *ret; + + assert_return(slot, NULL); + + ret = slot->userdata; + slot->userdata = userdata; + + return ret; +} + +_public_ int sd_bus_slot_set_destroy_callback(sd_bus_slot *slot, sd_bus_destroy_t callback) { + assert_return(slot, -EINVAL); + + slot->destroy_callback = callback; + return 0; +} + +_public_ int sd_bus_slot_get_destroy_callback(sd_bus_slot *slot, sd_bus_destroy_t *callback) { + assert_return(slot, -EINVAL); + + if (callback) + *callback = slot->destroy_callback; + + return !!slot->destroy_callback; +} + +_public_ sd_bus_message *sd_bus_slot_get_current_message(sd_bus_slot *slot) { + assert_return(slot, NULL); + assert_return(slot->type >= 0, NULL); + + if (slot->bus->current_slot != slot) + return NULL; + + return slot->bus->current_message; +} + +_public_ sd_bus_message_handler_t sd_bus_slot_get_current_handler(sd_bus_slot *slot) { + assert_return(slot, NULL); + assert_return(slot->type >= 0, NULL); + + if (slot->bus->current_slot != slot) + return NULL; + + return slot->bus->current_handler; +} + +_public_ void* sd_bus_slot_get_current_userdata(sd_bus_slot *slot) { + assert_return(slot, NULL); + assert_return(slot->type >= 0, NULL); + + if (slot->bus->current_slot != slot) + return NULL; + + return slot->bus->current_userdata; +} + +_public_ int sd_bus_slot_get_floating(sd_bus_slot *slot) { + assert_return(slot, -EINVAL); + + return slot->floating; +} + +_public_ int sd_bus_slot_set_floating(sd_bus_slot *slot, int b) { + assert_return(slot, -EINVAL); + + if (slot->floating == !!b) + return 0; + + if (!slot->bus) /* already disconnected slots can't be reconnected */ + return -ESTALE; + + slot->floating = b; + + /* When a slot is "floating" then the bus references the slot. Otherwise the slot references the bus. Hence, + * when we move from one to the other, let's increase one reference and decrease the other. */ + + if (b) { + sd_bus_slot_ref(slot); + sd_bus_unref(slot->bus); + } else { + sd_bus_ref(slot->bus); + sd_bus_slot_unref(slot); + } + + return 1; +} + +_public_ int sd_bus_slot_set_description(sd_bus_slot *slot, const char *description) { + assert_return(slot, -EINVAL); + + return free_and_strdup(&slot->description, description); +} + +_public_ int sd_bus_slot_get_description(sd_bus_slot *slot, const char **description) { + assert_return(slot, -EINVAL); + assert_return(description, -EINVAL); + + if (slot->description) + *description = slot->description; + else if (slot->type == BUS_MATCH_CALLBACK) + *description = slot->match_callback.match_string; + else + return -ENXIO; + + return 0; +} diff --git a/src/libsystemd/sd-bus/bus-slot.h b/src/libsystemd/sd-bus/bus-slot.h new file mode 100644 index 0000000..8116195 --- /dev/null +++ b/src/libsystemd/sd-bus/bus-slot.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include "sd-bus.h" + +#include "bus-internal.h" + +sd_bus_slot *bus_slot_allocate(sd_bus *bus, bool floating, BusSlotType type, size_t extra, void *userdata); + +void bus_slot_disconnect(sd_bus_slot *slot, bool unref); diff --git a/src/libsystemd/sd-bus/bus-socket.c b/src/libsystemd/sd-bus/bus-socket.c new file mode 100644 index 0000000..5ade8e9 --- /dev/null +++ b/src/libsystemd/sd-bus/bus-socket.c @@ -0,0 +1,1428 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <endian.h> +#include <poll.h> +#include <stdlib.h> +#include <unistd.h> + +#include "sd-bus.h" +#include "sd-daemon.h" + +#include "alloc-util.h" +#include "bus-internal.h" +#include "bus-message.h" +#include "bus-socket.h" +#include "escape.h" +#include "fd-util.h" +#include "format-util.h" +#include "fs-util.h" +#include "hexdecoct.h" +#include "io-util.h" +#include "iovec-util.h" +#include "macro.h" +#include "memory-util.h" +#include "path-util.h" +#include "process-util.h" +#include "random-util.h" +#include "signal-util.h" +#include "stdio-util.h" +#include "string-util.h" +#include "user-util.h" +#include "utf8.h" + +#define SNDBUF_SIZE (8*1024*1024) + +static void iovec_advance(struct iovec iov[], unsigned *idx, size_t size) { + + while (size > 0) { + struct iovec *i = iov + *idx; + + if (i->iov_len > size) { + i->iov_base = (uint8_t*) i->iov_base + size; + i->iov_len -= size; + return; + } + + size -= i->iov_len; + + *i = IOVEC_MAKE(NULL, 0); + + (*idx)++; + } +} + +static int append_iovec(sd_bus_message *m, const void *p, size_t sz) { + assert(m); + assert(p); + assert(sz > 0); + + m->iovec[m->n_iovec++] = IOVEC_MAKE((void*) p, sz); + + return 0; +} + +static int bus_message_setup_iovec(sd_bus_message *m) { + struct bus_body_part *part; + unsigned n, i; + int r; + + assert(m); + assert(m->sealed); + + if (m->n_iovec > 0) + return 0; + + assert(!m->iovec); + + n = 1 + m->n_body_parts; + if (n < ELEMENTSOF(m->iovec_fixed)) + m->iovec = m->iovec_fixed; + else { + m->iovec = new(struct iovec, n); + if (!m->iovec) { + r = -ENOMEM; + goto fail; + } + } + + r = append_iovec(m, m->header, BUS_MESSAGE_BODY_BEGIN(m)); + if (r < 0) + goto fail; + + MESSAGE_FOREACH_PART(part, i, m) { + r = bus_body_part_map(part); + if (r < 0) + goto fail; + + r = append_iovec(m, part->data, part->size); + if (r < 0) + goto fail; + } + + assert(n == m->n_iovec); + + return 0; + +fail: + m->poisoned = true; + return r; +} + +bool bus_socket_auth_needs_write(sd_bus *b) { + + unsigned i; + + if (b->auth_index >= ELEMENTSOF(b->auth_iovec)) + return false; + + for (i = b->auth_index; i < ELEMENTSOF(b->auth_iovec); i++) { + struct iovec *j = b->auth_iovec + i; + + if (j->iov_len > 0) + return true; + } + + return false; +} + +static int bus_socket_auth_verify_client(sd_bus *b) { + char *l, *lines[4] = {}; + sd_id128_t peer; + size_t i, n; + int r; + + assert(b); + + /* + * We expect up to three response lines: + * "DATA\r\n" (optional) + * "OK <server-id>\r\n" + * "AGREE_UNIX_FD\r\n" (optional) + */ + + n = 0; + lines[n] = b->rbuffer; + for (i = 0; i < 3; ++i) { + l = memmem_safe(lines[n], b->rbuffer_size - (lines[n] - (char*) b->rbuffer), "\r\n", 2); + if (l) + lines[++n] = l + 2; + else + break; + } + + /* + * If we sent a non-empty initial response, then we just expect an OK + * reply. We currently do this if, and only if, we picked ANONYMOUS. + * If we did not send an initial response, then we expect a DATA + * challenge, reply with our own DATA, and expect an OK reply. We do + * this for EXTERNAL. + * If FD negotiation was requested, we additionally expect + * an AGREE_UNIX_FD response in all cases. + */ + if (n < (b->anonymous_auth ? 1U : 2U) + !!b->accept_fd) + return 0; /* wait for more data */ + + i = 0; + + /* In case of EXTERNAL, verify the first response was DATA. */ + if (!b->anonymous_auth) { + l = lines[i++]; + if (lines[i] - l == 4 + 2) { + if (memcmp(l, "DATA", 4)) + return -EPERM; + } else if (lines[i] - l == 3 + 32 + 2) { + /* + * Old versions of the server-side implementation of + * `sd-bus` replied with "OK <id>" to "AUTH" requests + * from a client, even if the "AUTH" line did not + * contain inlined arguments. Therefore, we also accept + * "OK <id>" here, even though it is technically the + * wrong reply. We ignore the "<id>" parameter, though, + * since it has no real value. + */ + if (memcmp(l, "OK ", 3)) + return -EPERM; + } else + return -EPERM; + } + + /* Now check the OK line. */ + l = lines[i++]; + + if (lines[i] - l != 3 + 32 + 2) + return -EPERM; + if (memcmp(l, "OK ", 3)) + return -EPERM; + + b->auth = b->anonymous_auth ? BUS_AUTH_ANONYMOUS : BUS_AUTH_EXTERNAL; + + for (unsigned j = 0; j < 32; j += 2) { + int x, y; + + x = unhexchar(l[3 + j]); + y = unhexchar(l[3 + j + 1]); + + if (x < 0 || y < 0) + return -EINVAL; + + peer.bytes[j/2] = ((uint8_t) x << 4 | (uint8_t) y); + } + + if (!sd_id128_is_null(b->server_id) && + !sd_id128_equal(b->server_id, peer)) + return -EPERM; + + b->server_id = peer; + + /* And possibly check the third line, too */ + if (b->accept_fd) { + l = lines[i++]; + b->can_fds = !!memory_startswith(l, lines[i] - l, "AGREE_UNIX_FD"); + } + + assert(i == n); + + b->rbuffer_size -= (lines[i] - (char*) b->rbuffer); + memmove(b->rbuffer, lines[i], b->rbuffer_size); + + r = bus_start_running(b); + if (r < 0) + return r; + + return 1; +} + +static bool line_equals(const char *s, size_t m, const char *line) { + size_t l; + + l = strlen(line); + if (l != m) + return false; + + return memcmp(s, line, l) == 0; +} + +static bool line_begins(const char *s, size_t m, const char *word) { + const char *p; + + p = memory_startswith(s, m, word); + return p && (p == (s + m) || *p == ' '); +} + +static int verify_anonymous_token(sd_bus *b, const char *p, size_t l) { + _cleanup_free_ char *token = NULL; + size_t len; + int r; + + if (!b->anonymous_auth) + return 0; + + if (l <= 0) + return 1; + + assert(p[0] == ' '); + p++; l--; + + if (l % 2 != 0) + return 0; + + r = unhexmem(p, l, (void **) &token, &len); + if (r < 0) + return 0; + + if (memchr(token, 0, len)) + return 0; + + return !!utf8_is_valid(token); +} + +static int verify_external_token(sd_bus *b, const char *p, size_t l) { + _cleanup_free_ char *token = NULL; + size_t len; + uid_t u; + int r; + + /* We don't do any real authentication here. Instead, if + * the owner of this bus wanted authentication they should have + * checked SO_PEERCRED before even creating the bus object. */ + + if (!b->anonymous_auth && !b->ucred_valid) + return 0; + + if (l <= 0) + return 1; + + assert(p[0] == ' '); + p++; l--; + + if (l % 2 != 0) + return 0; + + r = unhexmem(p, l, (void**) &token, &len); + if (r < 0) + return 0; + + if (memchr(token, 0, len)) + return 0; + + r = parse_uid(token, &u); + if (r < 0) + return 0; + + /* We ignore the passed value if anonymous authentication is + * on anyway. */ + if (!b->anonymous_auth && u != b->ucred.uid) + return 0; + + return 1; +} + +static int bus_socket_auth_write(sd_bus *b, const char *t) { + char *p; + size_t l; + + assert(b); + assert(t); + + /* We only make use of the first iovec */ + assert(IN_SET(b->auth_index, 0, 1)); + + l = strlen(t); + p = malloc(b->auth_iovec[0].iov_len + l); + if (!p) + return -ENOMEM; + + memcpy_safe(p, b->auth_iovec[0].iov_base, b->auth_iovec[0].iov_len); + memcpy(p + b->auth_iovec[0].iov_len, t, l); + + b->auth_iovec[0].iov_base = p; + b->auth_iovec[0].iov_len += l; + + free_and_replace(b->auth_buffer, p); + b->auth_index = 0; + return 0; +} + +static int bus_socket_auth_write_ok(sd_bus *b) { + char t[3 + 32 + 2 + 1]; + + assert(b); + + xsprintf(t, "OK " SD_ID128_FORMAT_STR "\r\n", SD_ID128_FORMAT_VAL(b->server_id)); + + return bus_socket_auth_write(b, t); +} + +static int bus_socket_auth_verify_server(sd_bus *b) { + char *e; + const char *line; + size_t l; + bool processed = false; + int r; + + assert(b); + + if (b->rbuffer_size < 1) + return 0; + + /* First char must be a NUL byte */ + if (*(char*) b->rbuffer != 0) + return -EIO; + + if (b->rbuffer_size < 3) + return 0; + + /* Begin with the first line */ + if (b->auth_rbegin <= 0) + b->auth_rbegin = 1; + + for (;;) { + /* Check if line is complete */ + line = (char*) b->rbuffer + b->auth_rbegin; + e = memmem_safe(line, b->rbuffer_size - b->auth_rbegin, "\r\n", 2); + if (!e) + return processed; + + l = e - line; + + if (line_begins(line, l, "AUTH ANONYMOUS")) { + + r = verify_anonymous_token(b, + line + strlen("AUTH ANONYMOUS"), + l - strlen("AUTH ANONYMOUS")); + if (r < 0) + return r; + if (r == 0) + r = bus_socket_auth_write(b, "REJECTED\r\n"); + else { + b->auth = BUS_AUTH_ANONYMOUS; + if (l <= strlen("AUTH ANONYMOUS")) + r = bus_socket_auth_write(b, "DATA\r\n"); + else + r = bus_socket_auth_write_ok(b); + } + + } else if (line_begins(line, l, "AUTH EXTERNAL")) { + + r = verify_external_token(b, + line + strlen("AUTH EXTERNAL"), + l - strlen("AUTH EXTERNAL")); + if (r < 0) + return r; + if (r == 0) + r = bus_socket_auth_write(b, "REJECTED\r\n"); + else { + b->auth = BUS_AUTH_EXTERNAL; + if (l <= strlen("AUTH EXTERNAL")) + r = bus_socket_auth_write(b, "DATA\r\n"); + else + r = bus_socket_auth_write_ok(b); + } + + } else if (line_begins(line, l, "AUTH")) + r = bus_socket_auth_write(b, "REJECTED EXTERNAL ANONYMOUS\r\n"); + else if (line_equals(line, l, "CANCEL") || + line_begins(line, l, "ERROR")) { + + b->auth = _BUS_AUTH_INVALID; + r = bus_socket_auth_write(b, "REJECTED\r\n"); + + } else if (line_equals(line, l, "BEGIN")) { + + if (b->auth == _BUS_AUTH_INVALID) + r = bus_socket_auth_write(b, "ERROR\r\n"); + else { + /* We can't leave from the auth phase + * before we haven't written + * everything queued, so let's check + * that */ + + if (bus_socket_auth_needs_write(b)) + return 1; + + b->rbuffer_size -= (e + 2 - (char*) b->rbuffer); + memmove(b->rbuffer, e + 2, b->rbuffer_size); + return bus_start_running(b); + } + + } else if (line_begins(line, l, "DATA")) { + + if (b->auth == _BUS_AUTH_INVALID) + r = bus_socket_auth_write(b, "ERROR\r\n"); + else { + if (b->auth == BUS_AUTH_ANONYMOUS) + r = verify_anonymous_token(b, line + 4, l - 4); + else + r = verify_external_token(b, line + 4, l - 4); + + if (r < 0) + return r; + if (r == 0) { + b->auth = _BUS_AUTH_INVALID; + r = bus_socket_auth_write(b, "REJECTED\r\n"); + } else + r = bus_socket_auth_write_ok(b); + } + } else if (line_equals(line, l, "NEGOTIATE_UNIX_FD")) { + if (b->auth == _BUS_AUTH_INVALID || !b->accept_fd) + r = bus_socket_auth_write(b, "ERROR\r\n"); + else { + b->can_fds = true; + r = bus_socket_auth_write(b, "AGREE_UNIX_FD\r\n"); + } + } else + r = bus_socket_auth_write(b, "ERROR\r\n"); + + if (r < 0) + return r; + + b->auth_rbegin = e + 2 - (char*) b->rbuffer; + + processed = true; + } +} + +static int bus_socket_auth_verify(sd_bus *b) { + assert(b); + + if (b->is_server) + return bus_socket_auth_verify_server(b); + else + return bus_socket_auth_verify_client(b); +} + +static int bus_socket_write_auth(sd_bus *b) { + ssize_t k; + + assert(b); + assert(b->state == BUS_AUTHENTICATING); + + if (!bus_socket_auth_needs_write(b)) + return 0; + + if (b->prefer_writev) + k = writev(b->output_fd, b->auth_iovec + b->auth_index, ELEMENTSOF(b->auth_iovec) - b->auth_index); + else { + struct msghdr mh = { + .msg_iov = b->auth_iovec + b->auth_index, + .msg_iovlen = ELEMENTSOF(b->auth_iovec) - b->auth_index, + }; + + k = sendmsg(b->output_fd, &mh, MSG_DONTWAIT|MSG_NOSIGNAL); + if (k < 0 && errno == ENOTSOCK) { + b->prefer_writev = true; + k = writev(b->output_fd, b->auth_iovec + b->auth_index, ELEMENTSOF(b->auth_iovec) - b->auth_index); + } + } + + if (k < 0) + return ERRNO_IS_TRANSIENT(errno) ? 0 : -errno; + + iovec_advance(b->auth_iovec, &b->auth_index, (size_t) k); + + /* Now crank the state machine since we might be able to make progress after writing. For example, + * the server only processes "BEGIN" when the write buffer is empty. + */ + return bus_socket_auth_verify(b); +} + +static int bus_socket_read_auth(sd_bus *b) { + struct msghdr mh; + struct iovec iov = {}; + size_t n; + ssize_t k; + int r; + void *p; + CMSG_BUFFER_TYPE(CMSG_SPACE(sizeof(int) * BUS_FDS_MAX)) control; + bool handle_cmsg = false; + + assert(b); + assert(b->state == BUS_AUTHENTICATING); + + r = bus_socket_auth_verify(b); + if (r != 0) + return r; + + n = MAX(256u, b->rbuffer_size * 2); + + if (n > BUS_AUTH_SIZE_MAX) + n = BUS_AUTH_SIZE_MAX; + + if (b->rbuffer_size >= n) + return -ENOBUFS; + + p = realloc(b->rbuffer, n); + if (!p) + return -ENOMEM; + + b->rbuffer = p; + + iov = IOVEC_MAKE((uint8_t *)b->rbuffer + b->rbuffer_size, n - b->rbuffer_size); + + if (b->prefer_readv) { + k = readv(b->input_fd, &iov, 1); + if (k < 0) + k = -errno; + } else { + mh = (struct msghdr) { + .msg_iov = &iov, + .msg_iovlen = 1, + .msg_control = &control, + .msg_controllen = sizeof(control), + }; + + k = recvmsg_safe(b->input_fd, &mh, MSG_DONTWAIT|MSG_CMSG_CLOEXEC); + if (k == -ENOTSOCK) { + b->prefer_readv = true; + k = readv(b->input_fd, &iov, 1); + if (k < 0) + k = -errno; + } else + handle_cmsg = true; + } + if (ERRNO_IS_NEG_TRANSIENT(k)) + return 0; + if (k < 0) + return (int) k; + if (k == 0) { + if (handle_cmsg) + cmsg_close_all(&mh); /* paranoia, we shouldn't have gotten any fds on EOF */ + return -ECONNRESET; + } + + b->rbuffer_size += k; + + if (handle_cmsg) { + struct cmsghdr *cmsg; + + CMSG_FOREACH(cmsg, &mh) + if (cmsg->cmsg_level == SOL_SOCKET && + cmsg->cmsg_type == SCM_RIGHTS) { + int j; + + /* Whut? We received fds during the auth + * protocol? Somebody is playing games with + * us. Close them all, and fail */ + j = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int); + close_many(CMSG_TYPED_DATA(cmsg, int), j); + return -EIO; + } else + log_debug("Got unexpected auxiliary data with level=%d and type=%d", + cmsg->cmsg_level, cmsg->cmsg_type); + } + + r = bus_socket_auth_verify(b); + if (r != 0) + return r; + + return 1; +} + +void bus_socket_setup(sd_bus *b) { + assert(b); + + /* Increase the buffers to 8 MB */ + (void) fd_increase_rxbuf(b->input_fd, SNDBUF_SIZE); + (void) fd_inc_sndbuf(b->output_fd, SNDBUF_SIZE); + + b->message_version = 1; + b->message_endian = 0; +} + +static void bus_get_peercred(sd_bus *b) { + int r; + + assert(b); + assert(!b->ucred_valid); + assert(!b->label); + assert(b->n_groups == SIZE_MAX); + + /* Get the peer for socketpair() sockets */ + b->ucred_valid = getpeercred(b->input_fd, &b->ucred) >= 0; + + /* Get the SELinux context of the peer */ + r = getpeersec(b->input_fd, &b->label); + if (r < 0 && !IN_SET(r, -EOPNOTSUPP, -ENOPROTOOPT)) + log_debug_errno(r, "Failed to determine peer security context: %m"); + + /* Get the list of auxiliary groups of the peer */ + r = getpeergroups(b->input_fd, &b->groups); + if (r >= 0) + b->n_groups = (size_t) r; + else if (!IN_SET(r, -EOPNOTSUPP, -ENOPROTOOPT)) + log_debug_errno(r, "Failed to determine peer's group list: %m"); + + /* Let's query the peers socket address, it might carry information such as the peer's comm or + * description string */ + zero(b->sockaddr_peer); + b->sockaddr_size_peer = 0; + + socklen_t l = sizeof(b->sockaddr_peer) - 1; /* Leave space for a NUL */ + if (getpeername(b->input_fd, &b->sockaddr_peer.sa, &l) < 0) + log_debug_errno(errno, "Failed to get peer's socket address, ignoring: %m"); + else + b->sockaddr_size_peer = l; +} + +static int bus_socket_start_auth_client(sd_bus *b) { + static const char sasl_auth_anonymous[] = { + /* + * We use an arbitrary trace-string for the ANONYMOUS authentication. It can be used by the + * message broker to aid debugging of clients. We fully anonymize the connection and use a + * static default. + */ + /* HEX a n o n y m o u s */ + "\0AUTH ANONYMOUS 616e6f6e796d6f7573\r\n" + }; + static const char sasl_auth_external[] = { + "\0AUTH EXTERNAL\r\n" + "DATA\r\n" + }; + static const char sasl_negotiate_unix_fd[] = { + "NEGOTIATE_UNIX_FD\r\n" + }; + static const char sasl_begin[] = { + "BEGIN\r\n" + }; + size_t i = 0; + + assert(b); + + if (b->anonymous_auth) + b->auth_iovec[i++] = IOVEC_MAKE((char*) sasl_auth_anonymous, sizeof(sasl_auth_anonymous) - 1); + else + b->auth_iovec[i++] = IOVEC_MAKE((char*) sasl_auth_external, sizeof(sasl_auth_external) - 1); + + if (b->accept_fd) + b->auth_iovec[i++] = IOVEC_MAKE_STRING(sasl_negotiate_unix_fd); + + b->auth_iovec[i++] = IOVEC_MAKE_STRING(sasl_begin); + + return bus_socket_write_auth(b); +} + +int bus_socket_start_auth(sd_bus *b) { + assert(b); + + bus_get_peercred(b); + + bus_set_state(b, BUS_AUTHENTICATING); + b->auth_timeout = now(CLOCK_MONOTONIC) + BUS_AUTH_TIMEOUT; + + if (sd_is_socket(b->input_fd, AF_UNIX, 0, 0) <= 0) + b->accept_fd = false; + + if (b->output_fd != b->input_fd) + if (sd_is_socket(b->output_fd, AF_UNIX, 0, 0) <= 0) + b->accept_fd = false; + + if (b->is_server) + return bus_socket_read_auth(b); + else + return bus_socket_start_auth_client(b); +} + +static int bus_socket_inotify_setup(sd_bus *b) { + _cleanup_free_ int *new_watches = NULL; + _cleanup_free_ char *absolute = NULL; + size_t n = 0, done = 0, i; + unsigned max_follow = 32; + const char *p; + int wd, r; + + assert(b); + assert(b->watch_bind); + assert(b->sockaddr.sa.sa_family == AF_UNIX); + assert(b->sockaddr.un.sun_path[0] != 0); + + /* Sets up an inotify fd in case watch_bind is enabled: wait until the configured AF_UNIX file system + * socket appears before connecting to it. The implemented is pretty simplistic: we just subscribe to + * relevant changes to all components of the path, and every time we get an event for that we try to + * reconnect again, without actually caring what precisely the event we got told us. If we still + * can't connect we re-subscribe to all relevant changes of anything in the path, so that our watches + * include any possibly newly created path components. */ + + if (b->inotify_fd < 0) { + b->inotify_fd = inotify_init1(IN_NONBLOCK|IN_CLOEXEC); + if (b->inotify_fd < 0) + return -errno; + + b->inotify_fd = fd_move_above_stdio(b->inotify_fd); + } + + /* Make sure the path is NUL terminated */ + p = strndupa_safe(b->sockaddr.un.sun_path, + sizeof(b->sockaddr.un.sun_path)); + + /* Make sure the path is absolute */ + r = path_make_absolute_cwd(p, &absolute); + if (r < 0) + goto fail; + + /* Watch all components of the path, and don't mind any prefix that doesn't exist yet. For the + * innermost directory that exists we want to know when files are created or moved into it. For all + * parents of it we just care if they are removed or renamed. */ + + if (!GREEDY_REALLOC(new_watches, n + 1)) { + r = -ENOMEM; + goto fail; + } + + /* Start with the top-level directory, which is a bit simpler than the rest, since it can't be a + * symlink, and always exists */ + wd = inotify_add_watch(b->inotify_fd, "/", IN_CREATE|IN_MOVED_TO); + if (wd < 0) { + r = log_debug_errno(errno, "Failed to add inotify watch on /: %m"); + goto fail; + } else + new_watches[n++] = wd; + + for (;;) { + _cleanup_free_ char *component = NULL, *prefix = NULL, *destination = NULL; + size_t n_slashes, n_component; + char *c = NULL; + + n_slashes = strspn(absolute + done, "/"); + n_component = n_slashes + strcspn(absolute + done + n_slashes, "/"); + + if (n_component == 0) /* The end */ + break; + + component = strndup(absolute + done, n_component); + if (!component) { + r = -ENOMEM; + goto fail; + } + + /* A trailing slash? That's a directory, and not a socket then */ + if (path_equal(component, "/")) { + r = -EISDIR; + goto fail; + } + + /* A single dot? Let's eat this up */ + if (path_equal(component, "/.")) { + done += n_component; + continue; + } + + prefix = strndup(absolute, done + n_component); + if (!prefix) { + r = -ENOMEM; + goto fail; + } + + if (!GREEDY_REALLOC(new_watches, n + 1)) { + r = -ENOMEM; + goto fail; + } + + wd = inotify_add_watch(b->inotify_fd, prefix, IN_DELETE_SELF|IN_MOVE_SELF|IN_ATTRIB|IN_CREATE|IN_MOVED_TO|IN_DONT_FOLLOW); + log_debug("Added inotify watch for %s on bus %s: %i", prefix, strna(b->description), wd); + + if (wd < 0) { + if (IN_SET(errno, ENOENT, ELOOP)) + break; /* This component doesn't exist yet, or the path contains a cyclic symlink right now */ + + r = log_debug_errno(errno, "Failed to add inotify watch on %s: %m", empty_to_root(prefix)); + goto fail; + } else + new_watches[n++] = wd; + + /* Check if this is possibly a symlink. If so, let's follow it and watch it too. */ + r = readlink_malloc(prefix, &destination); + if (r == -EINVAL) { /* not a symlink */ + done += n_component; + continue; + } + if (r < 0) + goto fail; + + if (isempty(destination)) { /* Empty symlink target? Yuck! */ + r = -EINVAL; + goto fail; + } + + if (max_follow <= 0) { /* Let's make sure we don't follow symlinks forever */ + r = -ELOOP; + goto fail; + } + + if (path_is_absolute(destination)) { + /* For absolute symlinks we build the new path and start anew */ + c = strjoin(destination, absolute + done + n_component); + done = 0; + } else { + _cleanup_free_ char *t = NULL; + + /* For relative symlinks we replace the last component, and try again */ + t = strndup(absolute, done); + if (!t) + return -ENOMEM; + + c = strjoin(t, "/", destination, absolute + done + n_component); + } + if (!c) { + r = -ENOMEM; + goto fail; + } + + free_and_replace(absolute, c); + + max_follow--; + } + + /* And now, let's remove all watches from the previous iteration we don't need anymore */ + for (i = 0; i < b->n_inotify_watches; i++) { + bool found = false; + size_t j; + + for (j = 0; j < n; j++) + if (new_watches[j] == b->inotify_watches[i]) { + found = true; + break; + } + + if (found) + continue; + + (void) inotify_rm_watch(b->inotify_fd, b->inotify_watches[i]); + } + + free_and_replace(b->inotify_watches, new_watches); + b->n_inotify_watches = n; + + return 0; + +fail: + bus_close_inotify_fd(b); + return r; +} + +static int bind_description(sd_bus *b, int fd, int family) { + _cleanup_free_ char *bind_name = NULL, *comm = NULL; + union sockaddr_union bsa; + const char *d = NULL; + int r; + + assert(b); + assert(fd >= 0); + + /* If this is an AF_UNIX socket, let's set our client's socket address to carry the description + * string for this bus connection. This is useful for debugging things, as the connection name is + * visible in various socket-related tools, and can even be queried by the server side. */ + + if (family != AF_UNIX) + return 0; + + (void) sd_bus_get_description(b, &d); + + /* Generate a recognizable source address in the abstract namespace. We'll include: + * - a random 64-bit value (to avoid collisions) + * - our "comm" process name (suppressed if contains "/" to avoid parsing issues) + * - the description string of the bus connection. */ + (void) pid_get_comm(0, &comm); + if (comm && strchr(comm, '/')) + comm = mfree(comm); + + if (!d && !comm) /* skip if we don't have either field, rely on kernel autobind instead */ + return 0; + + if (asprintf(&bind_name, "@%" PRIx64 "/bus/%s/%s", random_u64(), strempty(comm), strempty(d)) < 0) + return -ENOMEM; + + strshorten(bind_name, sizeof_field(struct sockaddr_un, sun_path)); + + r = sockaddr_un_set_path(&bsa.un, bind_name); + if (r < 0) + return r; + + if (bind(fd, &bsa.sa, r) < 0) + return -errno; + + return 0; +} + +int bus_socket_connect(sd_bus *b) { + bool inotify_done = false; + int r; + + assert(b); + + for (;;) { + assert(b->input_fd < 0); + assert(b->output_fd < 0); + assert(b->sockaddr.sa.sa_family != AF_UNSPEC); + + if (DEBUG_LOGGING) { + _cleanup_free_ char *pretty = NULL; + (void) sockaddr_pretty(&b->sockaddr.sa, b->sockaddr_size, false, true, &pretty); + log_debug("sd-bus: starting bus%s%s by connecting to %s...", + b->description ? " " : "", strempty(b->description), strnull(pretty)); + } + + b->input_fd = socket(b->sockaddr.sa.sa_family, SOCK_STREAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0); + if (b->input_fd < 0) + return -errno; + + r = bind_description(b, b->input_fd, b->sockaddr.sa.sa_family); + if (r < 0) + return r; + + b->input_fd = fd_move_above_stdio(b->input_fd); + + b->output_fd = b->input_fd; + bus_socket_setup(b); + + if (connect(b->input_fd, &b->sockaddr.sa, b->sockaddr_size) < 0) { + if (errno == EINPROGRESS) { + + /* If we have any inotify watches open, close them now, we don't need them anymore, as + * we have successfully initiated a connection */ + bus_close_inotify_fd(b); + + /* Note that very likely we are already in BUS_OPENING state here, as we enter it when + * we start parsing the address string. The only reason we set the state explicitly + * here, is to undo BUS_WATCH_BIND, in case we did the inotify magic. */ + bus_set_state(b, BUS_OPENING); + return 1; + } + + if (IN_SET(errno, ENOENT, ECONNREFUSED) && /* ENOENT → unix socket doesn't exist at all; ECONNREFUSED → unix socket stale */ + b->watch_bind && + b->sockaddr.sa.sa_family == AF_UNIX && + b->sockaddr.un.sun_path[0] != 0) { + + /* This connection attempt failed, let's release the socket for now, and start with a + * fresh one when reconnecting. */ + bus_close_io_fds(b); + + if (inotify_done) { + /* inotify set up already, don't do it again, just return now, and remember + * that we are waiting for inotify events now. */ + bus_set_state(b, BUS_WATCH_BIND); + return 1; + } + + /* This is a file system socket, and the inotify logic is enabled. Let's create the necessary inotify fd. */ + r = bus_socket_inotify_setup(b); + if (r < 0) + return r; + + /* Let's now try to connect a second time, because in theory there's otherwise a race + * here: the socket might have been created in the time between our first connect() and + * the time we set up the inotify logic. But let's remember that we set up inotify now, + * so that we don't do the connect() more than twice. */ + inotify_done = true; + + } else + return -errno; + } else + break; + } + + /* Yay, established, we don't need no inotify anymore! */ + bus_close_inotify_fd(b); + + return bus_socket_start_auth(b); +} + +int bus_socket_exec(sd_bus *b) { + int s[2], r; + + assert(b); + assert(b->input_fd < 0); + assert(b->output_fd < 0); + assert(b->exec_path); + assert(b->busexec_pid == 0); + + if (DEBUG_LOGGING) { + _cleanup_free_ char *line = NULL; + + if (b->exec_argv) + line = quote_command_line(b->exec_argv, SHELL_ESCAPE_EMPTY); + + log_debug("sd-bus: starting bus%s%s with %s%s", + b->description ? " " : "", strempty(b->description), + line ?: b->exec_path, + b->exec_argv && !line ? "…" : ""); + } + + r = socketpair(AF_UNIX, SOCK_STREAM|SOCK_NONBLOCK|SOCK_CLOEXEC, 0, s); + if (r < 0) + return -errno; + + r = safe_fork_full("(sd-busexec)", + (int[]) { s[1], s[1], STDERR_FILENO }, + NULL, 0, + FORK_RESET_SIGNALS|FORK_CLOSE_ALL_FDS|FORK_REARRANGE_STDIO|FORK_RLIMIT_NOFILE_SAFE, &b->busexec_pid); + if (r < 0) { + safe_close_pair(s); + return r; + } + if (r == 0) { + /* Child */ + + if (b->exec_argv) + execvp(b->exec_path, b->exec_argv); + else + execvp(b->exec_path, STRV_MAKE(b->exec_path)); + + _exit(EXIT_FAILURE); + } + + safe_close(s[1]); + b->output_fd = b->input_fd = fd_move_above_stdio(s[0]); + + bus_socket_setup(b); + + return bus_socket_start_auth(b); +} + +int bus_socket_take_fd(sd_bus *b) { + assert(b); + + bus_socket_setup(b); + + return bus_socket_start_auth(b); +} + +int bus_socket_write_message(sd_bus *bus, sd_bus_message *m, size_t *idx) { + struct iovec *iov; + ssize_t k; + size_t n; + unsigned j; + int r; + + assert(bus); + assert(m); + assert(idx); + assert(IN_SET(bus->state, BUS_RUNNING, BUS_HELLO)); + + if (*idx >= BUS_MESSAGE_SIZE(m)) + return 0; + + r = bus_message_setup_iovec(m); + if (r < 0) + return r; + + n = m->n_iovec * sizeof(struct iovec); + iov = newa(struct iovec, n); + memcpy_safe(iov, m->iovec, n); + + j = 0; + iovec_advance(iov, &j, *idx); + + if (bus->prefer_writev) + k = writev(bus->output_fd, iov, m->n_iovec); + else { + struct msghdr mh = { + .msg_iov = iov, + .msg_iovlen = m->n_iovec, + }; + + if (m->n_fds > 0 && *idx == 0) { + struct cmsghdr *control; + + mh.msg_controllen = CMSG_SPACE(sizeof(int) * m->n_fds); + mh.msg_control = alloca0(mh.msg_controllen); + control = CMSG_FIRSTHDR(&mh); + control->cmsg_len = CMSG_LEN(sizeof(int) * m->n_fds); + control->cmsg_level = SOL_SOCKET; + control->cmsg_type = SCM_RIGHTS; + memcpy(CMSG_DATA(control), m->fds, sizeof(int) * m->n_fds); + } + + k = sendmsg(bus->output_fd, &mh, MSG_DONTWAIT|MSG_NOSIGNAL); + if (k < 0 && errno == ENOTSOCK) { + bus->prefer_writev = true; + k = writev(bus->output_fd, iov, m->n_iovec); + } + } + + if (k < 0) + return ERRNO_IS_TRANSIENT(errno) ? 0 : -errno; + + *idx += (size_t) k; + return 1; +} + +static int bus_socket_read_message_need(sd_bus *bus, size_t *need) { + uint32_t a, b; + uint8_t e; + uint64_t sum; + + assert(bus); + assert(need); + assert(IN_SET(bus->state, BUS_RUNNING, BUS_HELLO)); + + if (bus->rbuffer_size < sizeof(struct bus_header)) { + *need = sizeof(struct bus_header) + 8; + + /* Minimum message size: + * + * Header + + * + * Method Call: +2 string headers + * Signal: +3 string headers + * Method Error: +1 string headers + * +1 uint32 headers + * Method Reply: +1 uint32 headers + * + * A string header is at least 9 bytes + * A uint32 header is at least 8 bytes + * + * Hence the minimum message size of a valid message + * is header + 8 bytes */ + + return 0; + } + + a = ((const uint32_t*) bus->rbuffer)[1]; + b = ((const uint32_t*) bus->rbuffer)[3]; + + e = ((const uint8_t*) bus->rbuffer)[0]; + if (e == BUS_LITTLE_ENDIAN) { + a = le32toh(a); + b = le32toh(b); + } else if (e == BUS_BIG_ENDIAN) { + a = be32toh(a); + b = be32toh(b); + } else + return -EBADMSG; + + sum = (uint64_t) sizeof(struct bus_header) + (uint64_t) ALIGN8(b) + (uint64_t) a; + if (sum >= BUS_MESSAGE_SIZE_MAX) + return -ENOBUFS; + + *need = (size_t) sum; + return 0; +} + +static int bus_socket_make_message(sd_bus *bus, size_t size) { + sd_bus_message *t = NULL; + void *b; + int r; + + assert(bus); + assert(bus->rbuffer_size >= size); + assert(IN_SET(bus->state, BUS_RUNNING, BUS_HELLO)); + + r = bus_rqueue_make_room(bus); + if (r < 0) + return r; + + if (bus->rbuffer_size > size) { + b = memdup((const uint8_t*) bus->rbuffer + size, + bus->rbuffer_size - size); + if (!b) + return -ENOMEM; + } else + b = NULL; + + r = bus_message_from_malloc(bus, + bus->rbuffer, size, + bus->fds, bus->n_fds, + NULL, + &t); + if (r == -EBADMSG) { + log_debug_errno(r, "Received invalid message from connection %s, dropping.", strna(bus->description)); + free(bus->rbuffer); /* We want to drop current rbuffer and proceed with whatever remains in b */ + } else if (r < 0) { + free(b); + return r; + } + + /* rbuffer ownership was either transferred to t, or we got EBADMSG and dropped it. */ + bus->rbuffer = b; + bus->rbuffer_size -= size; + + bus->fds = NULL; + bus->n_fds = 0; + + if (t) { + t->read_counter = ++bus->read_counter; + bus->rqueue[bus->rqueue_size++] = bus_message_ref_queued(t, bus); + sd_bus_message_unref(t); + } + + return 1; +} + +int bus_socket_read_message(sd_bus *bus) { + struct msghdr mh; + struct iovec iov = {}; + ssize_t k; + size_t need; + int r; + void *b; + CMSG_BUFFER_TYPE(CMSG_SPACE(sizeof(int) * BUS_FDS_MAX)) control; + bool handle_cmsg = false; + + assert(bus); + assert(IN_SET(bus->state, BUS_RUNNING, BUS_HELLO)); + + r = bus_socket_read_message_need(bus, &need); + if (r < 0) + return r; + + if (bus->rbuffer_size >= need) + return bus_socket_make_message(bus, need); + + b = realloc(bus->rbuffer, need); + if (!b) + return -ENOMEM; + + bus->rbuffer = b; + + iov = IOVEC_MAKE((uint8_t *)bus->rbuffer + bus->rbuffer_size, need - bus->rbuffer_size); + + if (bus->prefer_readv) { + k = readv(bus->input_fd, &iov, 1); + if (k < 0) + k = -errno; + } else { + mh = (struct msghdr) { + .msg_iov = &iov, + .msg_iovlen = 1, + .msg_control = &control, + .msg_controllen = sizeof(control), + }; + + k = recvmsg_safe(bus->input_fd, &mh, MSG_DONTWAIT|MSG_CMSG_CLOEXEC); + if (k == -ENOTSOCK) { + bus->prefer_readv = true; + k = readv(bus->input_fd, &iov, 1); + if (k < 0) + k = -errno; + } else + handle_cmsg = true; + } + if (ERRNO_IS_NEG_TRANSIENT(k)) + return 0; + if (k < 0) + return (int) k; + if (k == 0) { + if (handle_cmsg) + cmsg_close_all(&mh); /* On EOF we shouldn't have gotten an fd, but let's make sure */ + return -ECONNRESET; + } + + bus->rbuffer_size += k; + + if (handle_cmsg) { + struct cmsghdr *cmsg; + + CMSG_FOREACH(cmsg, &mh) + if (cmsg->cmsg_level == SOL_SOCKET && + cmsg->cmsg_type == SCM_RIGHTS) { + int n, *f, i; + + n = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int); + + if (!bus->can_fds) { + /* Whut? We received fds but this + * isn't actually enabled? Close them, + * and fail */ + + close_many(CMSG_TYPED_DATA(cmsg, int), n); + return -EIO; + } + + f = reallocarray(bus->fds, bus->n_fds + n, sizeof(int)); + if (!f) { + close_many(CMSG_TYPED_DATA(cmsg, int), n); + return -ENOMEM; + } + + for (i = 0; i < n; i++) + f[bus->n_fds++] = fd_move_above_stdio(CMSG_TYPED_DATA(cmsg, int)[i]); + bus->fds = f; + } else + log_debug("Got unexpected auxiliary data with level=%d and type=%d", + cmsg->cmsg_level, cmsg->cmsg_type); + } + + r = bus_socket_read_message_need(bus, &need); + if (r < 0) + return r; + + if (bus->rbuffer_size >= need) + return bus_socket_make_message(bus, need); + + return 1; +} + +int bus_socket_process_opening(sd_bus *b) { + int error = 0, events, r; + socklen_t slen = sizeof(error); + + assert(b->state == BUS_OPENING); + + events = fd_wait_for_event(b->output_fd, POLLOUT, 0); + if (ERRNO_IS_NEG_TRANSIENT(events)) + return 0; + if (events < 0) + return events; + if (!(events & (POLLOUT|POLLERR|POLLHUP))) + return 0; + + r = getsockopt(b->output_fd, SOL_SOCKET, SO_ERROR, &error, &slen); + if (r < 0) + b->last_connect_error = errno; + else if (error != 0) + b->last_connect_error = error; + else if (events & (POLLERR|POLLHUP)) + b->last_connect_error = ECONNREFUSED; + else + return bus_socket_start_auth(b); + + return bus_next_address(b); +} + +int bus_socket_process_authenticating(sd_bus *b) { + int r; + + assert(b); + assert(b->state == BUS_AUTHENTICATING); + + if (now(CLOCK_MONOTONIC) >= b->auth_timeout) + return -ETIMEDOUT; + + r = bus_socket_write_auth(b); + if (r != 0) + return r; + + return bus_socket_read_auth(b); +} + +int bus_socket_process_watch_bind(sd_bus *b) { + int r, q; + + assert(b); + assert(b->state == BUS_WATCH_BIND); + assert(b->inotify_fd >= 0); + + r = flush_fd(b->inotify_fd); + if (r <= 0) + return r; + + log_debug("Got inotify event on bus %s.", strna(b->description)); + + /* We flushed events out of the inotify fd. In that case, maybe the socket is valid now? Let's try to connect + * to it again */ + + r = bus_socket_connect(b); + if (r < 0) + return r; + + q = bus_attach_io_events(b); + if (q < 0) + return q; + + q = bus_attach_inotify_event(b); + if (q < 0) + return q; + + return r; +} diff --git a/src/libsystemd/sd-bus/bus-socket.h b/src/libsystemd/sd-bus/bus-socket.h new file mode 100644 index 0000000..52bc404 --- /dev/null +++ b/src/libsystemd/sd-bus/bus-socket.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include "sd-bus.h" + +void bus_socket_setup(sd_bus *b); + +int bus_socket_connect(sd_bus *b); +int bus_socket_exec(sd_bus *b); +int bus_socket_take_fd(sd_bus *b); +int bus_socket_start_auth(sd_bus *b); + +int bus_socket_write_message(sd_bus *bus, sd_bus_message *m, size_t *idx); +int bus_socket_read_message(sd_bus *bus); + +int bus_socket_process_opening(sd_bus *b); +int bus_socket_process_authenticating(sd_bus *b); +int bus_socket_process_watch_bind(sd_bus *b); + +bool bus_socket_auth_needs_write(sd_bus *b); diff --git a/src/libsystemd/sd-bus/bus-track.c b/src/libsystemd/sd-bus/bus-track.c new file mode 100644 index 0000000..f9c59a1 --- /dev/null +++ b/src/libsystemd/sd-bus/bus-track.c @@ -0,0 +1,495 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include "sd-bus.h" + +#include "alloc-util.h" +#include "bus-internal.h" +#include "bus-track.h" +#include "string-util.h" + +struct track_item { + unsigned n_ref; + char *name; + sd_bus_slot *slot; +}; + +struct sd_bus_track { + unsigned n_ref; + unsigned n_adding; /* are we in the process of adding a new name? */ + sd_bus *bus; + sd_bus_track_handler_t handler; + void *userdata; + Hashmap *names; + LIST_FIELDS(sd_bus_track, queue); + Iterator iterator; + bool in_list:1; /* In bus->tracks? */ + bool in_queue:1; /* In bus->track_queue? */ + bool modified:1; + bool recursive:1; + sd_bus_destroy_t destroy_callback; + + LIST_FIELDS(sd_bus_track, tracks); +}; + +#define MATCH_FOR_NAME(name) \ + strjoina("type='signal'," \ + "sender='org.freedesktop.DBus'," \ + "path='/org/freedesktop/DBus'," \ + "interface='org.freedesktop.DBus'," \ + "member='NameOwnerChanged'," \ + "arg0='", name, "'") + +static struct track_item* track_item_free(struct track_item *i) { + if (!i) + return NULL; + + sd_bus_slot_unref(i->slot); + free(i->name); + return mfree(i); +} + +DEFINE_PRIVATE_TRIVIAL_UNREF_FUNC(struct track_item, track_item, track_item_free); +DEFINE_TRIVIAL_CLEANUP_FUNC(struct track_item*, track_item_unref); +DEFINE_PRIVATE_HASH_OPS_WITH_VALUE_DESTRUCTOR(track_item_hash_ops, char, string_hash_func, string_compare_func, + struct track_item, track_item_free); + +static void bus_track_add_to_queue(sd_bus_track *track) { + assert(track); + + /* Adds the bus track object to the queue of objects we should dispatch next, subject to a number of + * conditions. */ + + /* Already in the queue? */ + if (track->in_queue) + return; + + /* if we are currently in the process of adding a new name, then let's not enqueue this just yet, let's wait + * until the addition is complete. */ + if (track->n_adding > 0) + return; + + /* still referenced? */ + if (hashmap_size(track->names) > 0) + return; + + /* Nothing to call? */ + if (!track->handler) + return; + + /* Already closed? */ + if (!track->in_list) + return; + + LIST_PREPEND(queue, track->bus->track_queue, track); + track->in_queue = true; +} + +static void bus_track_remove_from_queue(sd_bus_track *track) { + assert(track); + + if (!track->in_queue) + return; + + LIST_REMOVE(queue, track->bus->track_queue, track); + track->in_queue = false; +} + +static int bus_track_remove_name_fully(sd_bus_track *track, const char *name) { + struct track_item *i; + + assert(track); + assert(name); + + i = hashmap_remove(track->names, name); + if (!i) + return 0; + + track_item_free(i); + + bus_track_add_to_queue(track); + + track->modified = true; + return 1; +} + +_public_ int sd_bus_track_new( + sd_bus *bus, + sd_bus_track **track, + sd_bus_track_handler_t handler, + void *userdata) { + + sd_bus_track *t; + + assert_return(bus, -EINVAL); + assert_return(bus = bus_resolve(bus), -ENOPKG); + assert_return(track, -EINVAL); + + if (!bus->bus_client) + return -EINVAL; + + t = new0(sd_bus_track, 1); + if (!t) + return -ENOMEM; + + t->n_ref = 1; + t->handler = handler; + t->userdata = userdata; + t->bus = sd_bus_ref(bus); + + LIST_PREPEND(tracks, bus->tracks, t); + t->in_list = true; + + bus_track_add_to_queue(t); + + *track = t; + return 0; +} + +static sd_bus_track *track_free(sd_bus_track *track) { + assert(track); + + if (track->in_list) + LIST_REMOVE(tracks, track->bus->tracks, track); + + bus_track_remove_from_queue(track); + track->names = hashmap_free(track->names); + track->bus = sd_bus_unref(track->bus); + + if (track->destroy_callback) + track->destroy_callback(track->userdata); + + return mfree(track); +} + +DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(sd_bus_track, sd_bus_track, track_free); + +static int on_name_owner_changed(sd_bus_message *message, void *userdata, sd_bus_error *error) { + sd_bus_track *track = ASSERT_PTR(userdata); + const char *name; + int r; + + assert(message); + + r = sd_bus_message_read(message, "sss", &name, NULL, NULL); + if (r < 0) + return 0; + + bus_track_remove_name_fully(track, name); + return 0; +} + +_public_ int sd_bus_track_add_name(sd_bus_track *track, const char *name) { + _cleanup_(track_item_unrefp) struct track_item *n = NULL; + struct track_item *i; + const char *match; + int r; + + assert_return(track, -EINVAL); + assert_return(service_name_is_valid(name), -EINVAL); + + i = hashmap_get(track->names, name); + if (i) { + if (track->recursive) { + assert(i->n_ref > 0); + + /* Manual overflow check (instead of a DEFINE_TRIVIAL_REF_FUNC() helper or so), so + * that we can return a proper error, given this is almost always called in a + * directly client controllable way, and thus better should never hit an assertion + * here. */ + if (i->n_ref >= UINT_MAX) + return -EOVERFLOW; + + i->n_ref++; + } + + bus_track_remove_from_queue(track); + return 0; + } + + r = hashmap_ensure_allocated(&track->names, &track_item_hash_ops); + if (r < 0) + return r; + + n = new(struct track_item, 1); + if (!n) + return -ENOMEM; + + *n = (struct track_item) { + .n_ref = 1, + }; + + n->name = strdup(name); + if (!n->name) + return -ENOMEM; + + /* First, subscribe to this name */ + match = MATCH_FOR_NAME(name); + + bus_track_remove_from_queue(track); /* don't dispatch this while we work in it */ + + r = sd_bus_add_match_async(track->bus, &n->slot, match, on_name_owner_changed, NULL, track); + if (r < 0) { + bus_track_add_to_queue(track); + return r; + } + + r = hashmap_put(track->names, n->name, n); + if (r < 0) { + bus_track_add_to_queue(track); + return r; + } + + /* Second, check if it is currently existing, or maybe doesn't, or maybe disappeared already. */ + track->n_adding++; /* again, make sure this isn't dispatch while we are working in it */ + r = sd_bus_get_name_creds(track->bus, name, 0, NULL); + track->n_adding--; + if (r < 0) { + hashmap_remove(track->names, name); + bus_track_add_to_queue(track); + return r; + } + + TAKE_PTR(n); + + bus_track_remove_from_queue(track); + track->modified = true; + + return 1; +} + +_public_ int sd_bus_track_remove_name(sd_bus_track *track, const char *name) { + struct track_item *i; + + assert_return(name, -EINVAL); + + if (!track) /* Treat a NULL track object as an empty track object */ + return 0; + + i = hashmap_get(track->names, name); + if (!i) + return 0; + + assert(i->n_ref >= 1); + if (i->n_ref <= 1) + return bus_track_remove_name_fully(track, name); + + track_item_unref(i); + + return 1; +} + +_public_ unsigned sd_bus_track_count(sd_bus_track *track) { + + if (!track) /* Let's consider a NULL object equivalent to an empty object */ + return 0; + + /* This signature really should have returned an int, so that we can propagate errors. But well, ... Also, note + * that this returns the number of names being watched, and multiple references to the same name are not + * counted. */ + + return hashmap_size(track->names); +} + +_public_ const char* sd_bus_track_contains(sd_bus_track *track, const char *name) { + assert_return(name, NULL); + + if (!track) /* Let's consider a NULL object equivalent to an empty object */ + return NULL; + + return hashmap_contains(track->names, name) ? name : NULL; +} + +_public_ const char* sd_bus_track_first(sd_bus_track *track) { + const char *n = NULL; + + if (!track) + return NULL; + + track->modified = false; + track->iterator = ITERATOR_FIRST; + + (void) hashmap_iterate(track->names, &track->iterator, NULL, (const void**) &n); + return n; +} + +_public_ const char* sd_bus_track_next(sd_bus_track *track) { + const char *n = NULL; + + if (!track) + return NULL; + + if (track->modified) + return NULL; + + (void) hashmap_iterate(track->names, &track->iterator, NULL, (const void**) &n); + return n; +} + +_public_ int sd_bus_track_add_sender(sd_bus_track *track, sd_bus_message *m) { + const char *sender; + + assert_return(track, -EINVAL); + assert_return(m, -EINVAL); + + if (sd_bus_message_get_bus(m) != track->bus) + return -EINVAL; + + sender = sd_bus_message_get_sender(m); + if (!sender) + return -EINVAL; + + return sd_bus_track_add_name(track, sender); +} + +_public_ int sd_bus_track_remove_sender(sd_bus_track *track, sd_bus_message *m) { + const char *sender; + + assert_return(m, -EINVAL); + + if (!track) /* Treat a NULL track object as an empty track object */ + return 0; + + if (sd_bus_message_get_bus(m) != track->bus) + return -EINVAL; + + sender = sd_bus_message_get_sender(m); + if (!sender) + return -EINVAL; + + return sd_bus_track_remove_name(track, sender); +} + +_public_ sd_bus* sd_bus_track_get_bus(sd_bus_track *track) { + assert_return(track, NULL); + + return track->bus; +} + +void bus_track_dispatch(sd_bus_track *track) { + int r; + + assert(track); + assert(track->handler); + + bus_track_remove_from_queue(track); + + sd_bus_track_ref(track); + + r = track->handler(track, track->userdata); + if (r < 0) + log_debug_errno(r, "Failed to process track handler: %m"); + else if (r == 0) + bus_track_add_to_queue(track); + + sd_bus_track_unref(track); +} + +void bus_track_close(sd_bus_track *track) { + assert(track); + + /* Called whenever our bus connected is closed. If so, and our track object is non-empty, dispatch it + * immediately, as we are closing now, but first flush out all names. */ + + if (!track->in_list) + return; /* We already closed this one, don't close it again. */ + + /* Remember that this one is closed now */ + LIST_REMOVE(tracks, track->bus->tracks, track); + track->in_list = false; + + /* If there's no name in this one anyway, we don't have to dispatch */ + if (hashmap_isempty(track->names)) + return; + + /* Let's flush out all names */ + hashmap_clear(track->names); + + /* Invoke handler */ + if (track->handler) + bus_track_dispatch(track); +} + +_public_ void *sd_bus_track_get_userdata(sd_bus_track *track) { + assert_return(track, NULL); + + return track->userdata; +} + +_public_ void *sd_bus_track_set_userdata(sd_bus_track *track, void *userdata) { + void *ret; + + assert_return(track, NULL); + + ret = track->userdata; + track->userdata = userdata; + + return ret; +} + +_public_ int sd_bus_track_set_destroy_callback(sd_bus_track *track, sd_bus_destroy_t callback) { + assert_return(track, -EINVAL); + + track->destroy_callback = callback; + return 0; +} + +_public_ int sd_bus_track_get_destroy_callback(sd_bus_track *track, sd_bus_destroy_t *ret) { + assert_return(track, -EINVAL); + + if (ret) + *ret = track->destroy_callback; + + return !!track->destroy_callback; +} + +_public_ int sd_bus_track_set_recursive(sd_bus_track *track, int b) { + assert_return(track, -EINVAL); + + if (track->recursive == !!b) + return 0; + + if (!hashmap_isempty(track->names)) + return -EBUSY; + + track->recursive = b; + return 0; +} + +_public_ int sd_bus_track_get_recursive(sd_bus_track *track) { + assert_return(track, -EINVAL); + + return track->recursive; +} + +_public_ int sd_bus_track_count_sender(sd_bus_track *track, sd_bus_message *m) { + const char *sender; + + assert_return(m, -EINVAL); + + if (!track) /* Let's consider a NULL object equivalent to an empty object */ + return 0; + + if (sd_bus_message_get_bus(m) != track->bus) + return -EINVAL; + + sender = sd_bus_message_get_sender(m); + if (!sender) + return -EINVAL; + + return sd_bus_track_count_name(track, sender); +} + +_public_ int sd_bus_track_count_name(sd_bus_track *track, const char *name) { + struct track_item *i; + + assert_return(service_name_is_valid(name), -EINVAL); + + if (!track) /* Let's consider a NULL object equivalent to an empty object */ + return 0; + + i = hashmap_get(track->names, name); + if (!i) + return 0; + + return i->n_ref; +} diff --git a/src/libsystemd/sd-bus/bus-track.h b/src/libsystemd/sd-bus/bus-track.h new file mode 100644 index 0000000..8dae1f3 --- /dev/null +++ b/src/libsystemd/sd-bus/bus-track.h @@ -0,0 +1,5 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +void bus_track_dispatch(sd_bus_track *track); +void bus_track_close(sd_bus_track *track); diff --git a/src/libsystemd/sd-bus/bus-type.c b/src/libsystemd/sd-bus/bus-type.c new file mode 100644 index 0000000..6a0f53d --- /dev/null +++ b/src/libsystemd/sd-bus/bus-type.c @@ -0,0 +1,162 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <errno.h> + +#include "sd-bus.h" + +#include "bus-internal.h" +#include "bus-type.h" + +bool bus_type_is_valid(char c) { + static const char valid[] = { + SD_BUS_TYPE_BYTE, + SD_BUS_TYPE_BOOLEAN, + SD_BUS_TYPE_INT16, + SD_BUS_TYPE_UINT16, + SD_BUS_TYPE_INT32, + SD_BUS_TYPE_UINT32, + SD_BUS_TYPE_INT64, + SD_BUS_TYPE_UINT64, + SD_BUS_TYPE_DOUBLE, + SD_BUS_TYPE_STRING, + SD_BUS_TYPE_OBJECT_PATH, + SD_BUS_TYPE_SIGNATURE, + SD_BUS_TYPE_ARRAY, + SD_BUS_TYPE_VARIANT, + SD_BUS_TYPE_STRUCT, + SD_BUS_TYPE_DICT_ENTRY, + SD_BUS_TYPE_UNIX_FD + }; + + return !!memchr(valid, c, sizeof(valid)); +} + +bool bus_type_is_basic(char c) { + static const char valid[] = { + SD_BUS_TYPE_BYTE, + SD_BUS_TYPE_BOOLEAN, + SD_BUS_TYPE_INT16, + SD_BUS_TYPE_UINT16, + SD_BUS_TYPE_INT32, + SD_BUS_TYPE_UINT32, + SD_BUS_TYPE_INT64, + SD_BUS_TYPE_UINT64, + SD_BUS_TYPE_DOUBLE, + SD_BUS_TYPE_STRING, + SD_BUS_TYPE_OBJECT_PATH, + SD_BUS_TYPE_SIGNATURE, + SD_BUS_TYPE_UNIX_FD + }; + + return !!memchr(valid, c, sizeof(valid)); +} + +bool bus_type_is_trivial(char c) { + static const char valid[] = { + SD_BUS_TYPE_BYTE, + SD_BUS_TYPE_BOOLEAN, + SD_BUS_TYPE_INT16, + SD_BUS_TYPE_UINT16, + SD_BUS_TYPE_INT32, + SD_BUS_TYPE_UINT32, + SD_BUS_TYPE_INT64, + SD_BUS_TYPE_UINT64, + SD_BUS_TYPE_DOUBLE + }; + + return !!memchr(valid, c, sizeof(valid)); +} + +bool bus_type_is_container(char c) { + static const char valid[] = { + SD_BUS_TYPE_ARRAY, + SD_BUS_TYPE_VARIANT, + SD_BUS_TYPE_STRUCT, + SD_BUS_TYPE_DICT_ENTRY + }; + + return !!memchr(valid, c, sizeof(valid)); +} + +int bus_type_get_alignment(char c) { + + switch (c) { + case SD_BUS_TYPE_BYTE: + case SD_BUS_TYPE_SIGNATURE: + case SD_BUS_TYPE_VARIANT: + return 1; + + case SD_BUS_TYPE_INT16: + case SD_BUS_TYPE_UINT16: + return 2; + + case SD_BUS_TYPE_BOOLEAN: + case SD_BUS_TYPE_INT32: + case SD_BUS_TYPE_UINT32: + case SD_BUS_TYPE_STRING: + case SD_BUS_TYPE_OBJECT_PATH: + case SD_BUS_TYPE_ARRAY: + case SD_BUS_TYPE_UNIX_FD: + return 4; + + case SD_BUS_TYPE_INT64: + case SD_BUS_TYPE_UINT64: + case SD_BUS_TYPE_DOUBLE: + case SD_BUS_TYPE_STRUCT: + case SD_BUS_TYPE_STRUCT_BEGIN: + case SD_BUS_TYPE_DICT_ENTRY: + case SD_BUS_TYPE_DICT_ENTRY_BEGIN: + return 8; + } + + return -EINVAL; +} + +int bus_type_get_size(char c) { + + switch (c) { + case SD_BUS_TYPE_BYTE: + return 1; + + case SD_BUS_TYPE_INT16: + case SD_BUS_TYPE_UINT16: + return 2; + + case SD_BUS_TYPE_BOOLEAN: + case SD_BUS_TYPE_INT32: + case SD_BUS_TYPE_UINT32: + case SD_BUS_TYPE_UNIX_FD: + return 4; + + case SD_BUS_TYPE_INT64: + case SD_BUS_TYPE_UINT64: + case SD_BUS_TYPE_DOUBLE: + return 8; + } + + return -EINVAL; +} + +_public_ int sd_bus_interface_name_is_valid(const char *p) { + assert_return(p, -EINVAL); + + return interface_name_is_valid(p); +} + +_public_ int sd_bus_service_name_is_valid(const char *p) { + assert_return(p, -EINVAL); + + return service_name_is_valid(p); +} + +_public_ int sd_bus_member_name_is_valid(const char *p) { + assert_return(p, -EINVAL); + + return member_name_is_valid(p); +} + +_public_ int sd_bus_object_path_is_valid(const char *p) { + assert_return(p, -EINVAL); + + return object_path_is_valid(p); +} diff --git a/src/libsystemd/sd-bus/bus-type.h b/src/libsystemd/sd-bus/bus-type.h new file mode 100644 index 0000000..490108a --- /dev/null +++ b/src/libsystemd/sd-bus/bus-type.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include <stdbool.h> + +#include "macro.h" + +bool bus_type_is_valid(char c) _const_; +bool bus_type_is_basic(char c) _const_; +/* "trivial" is systemd's term for what the D-Bus Specification calls + * a "fixed type": that is, a basic type of fixed length */ +bool bus_type_is_trivial(char c) _const_; +bool bus_type_is_container(char c) _const_; + +int bus_type_get_alignment(char c) _const_; +int bus_type_get_size(char c) _const_; diff --git a/src/libsystemd/sd-bus/fuzz-bus-match.c b/src/libsystemd/sd-bus/fuzz-bus-match.c new file mode 100644 index 0000000..16da534 --- /dev/null +++ b/src/libsystemd/sd-bus/fuzz-bus-match.c @@ -0,0 +1,86 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include "alloc-util.h" +#include "bus-internal.h" +#include "bus-match.h" +#include "env-util.h" +#include "fd-util.h" +#include "fileio.h" +#include "fuzz.h" +#include "memstream-util.h" + +int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { + _cleanup_(memstream_done) MemStream m = {}; + _cleanup_(sd_bus_unrefp) sd_bus *bus = NULL; + FILE *g = NULL; + int r; + + if (outside_size_range(size, 0, 65536)) + return 0; + + fuzz_setup_logging(); + + r = sd_bus_new(&bus); + assert_se(r >= 0); + + _cleanup_(bus_match_free) struct bus_match_node root = { + .type = BUS_MATCH_ROOT, + }; + + /* Note that we use the pointer to match_callback substructure, but the code + * uses container_of() to access outside of the passed-in type. */ + sd_bus_slot slot = { + .type = BUS_MATCH_CALLBACK, + .match_callback = {}, + }; + + if (getenv_bool("SYSTEMD_FUZZ_OUTPUT") <= 0) + assert_se(g = memstream_init(&m)); + + for (size_t offset = 0; offset < size; ) { + _cleanup_free_ char *line = NULL; + char *end; + + end = memchr((char*) data + offset, '\n', size - offset); + + line = memdup_suffix0((char*) data + offset, + end ? end - (char*) data - offset : size - offset); + if (!line) + return log_oom_debug(); + + offset = end ? (size_t) (end - (char*) data + 1) : size; + + struct bus_match_component *components; + size_t n_components; + r = bus_match_parse(line, &components, &n_components); + if (IN_SET(r, -EINVAL, -ENOMEM)) { + log_debug_errno(r, "Failed to parse line: %m"); + continue; + } + assert_se(r >= 0); /* We only expect EINVAL and ENOMEM errors, or success. */ + + CLEANUP_ARRAY(components, n_components, bus_match_parse_free); + + log_debug("Parsed %zu components.", n_components); + + _cleanup_free_ char *again = bus_match_to_string(components, n_components); + if (!again) { + log_oom(); + break; + } + + if (g) + fprintf(g, "%s\n", again); + + r = bus_match_add(&root, components, n_components, &slot.match_callback); + if (r < 0) { + log_error_errno(r, "Failed to add match: %m"); + break; + } + } + + bus_match_dump(g ?: stdout, &root, 0); /* We do this even on failure, to check consistency after error. */ + bus_match_free(&root); + + return 0; +} diff --git a/src/libsystemd/sd-bus/fuzz-bus-match.options b/src/libsystemd/sd-bus/fuzz-bus-match.options new file mode 100644 index 0000000..678d526 --- /dev/null +++ b/src/libsystemd/sd-bus/fuzz-bus-match.options @@ -0,0 +1,2 @@ +[libfuzzer] +max_len = 65536 diff --git a/src/libsystemd/sd-bus/fuzz-bus-message.c b/src/libsystemd/sd-bus/fuzz-bus-message.c new file mode 100644 index 0000000..ca7091e --- /dev/null +++ b/src/libsystemd/sd-bus/fuzz-bus-message.c @@ -0,0 +1,42 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include "alloc-util.h" +#include "bus-dump.h" +#include "bus-message.h" +#include "env-util.h" +#include "fd-util.h" +#include "fileio.h" +#include "fuzz.h" +#include "memstream-util.h" + +int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { + _cleanup_(memstream_done) MemStream ms = {}; + _cleanup_(sd_bus_unrefp) sd_bus *bus = NULL; + _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL; + _cleanup_free_ void *buffer = NULL; + FILE *g = NULL; + int r; + + fuzz_setup_logging(); + + r = sd_bus_new(&bus); + assert_se(r >= 0); + + assert_se(buffer = memdup(data, size)); + + r = bus_message_from_malloc(bus, buffer, size, NULL, 0, NULL, &m); + if (r == -EBADMSG) + return 0; + assert_se(r >= 0); + TAKE_PTR(buffer); + + if (getenv_bool("SYSTEMD_FUZZ_OUTPUT") <= 0) + assert_se(g = memstream_init(&ms)); + + sd_bus_message_dump(m, g ?: stdout, SD_BUS_MESSAGE_DUMP_WITH_HEADER); + + r = sd_bus_message_rewind(m, true); + assert_se(r >= 0); + + return 0; +} diff --git a/src/libsystemd/sd-bus/sd-bus.c b/src/libsystemd/sd-bus/sd-bus.c new file mode 100644 index 0000000..8befc97 --- /dev/null +++ b/src/libsystemd/sd-bus/sd-bus.c @@ -0,0 +1,4441 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <endian.h> +#include <netdb.h> +#include <pthread.h> +#include <signal.h> +#include <stdlib.h> +#include <sys/mman.h> +#include <sys/stat.h> +#include <sys/wait.h> +#include <unistd.h> + +#include "sd-bus.h" + +#include "af-list.h" +#include "alloc-util.h" +#include "bus-container.h" +#include "bus-control.h" +#include "bus-internal.h" +#include "bus-kernel.h" +#include "bus-label.h" +#include "bus-message.h" +#include "bus-objects.h" +#include "bus-protocol.h" +#include "bus-slot.h" +#include "bus-socket.h" +#include "bus-track.h" +#include "bus-type.h" +#include "cgroup-util.h" +#include "constants.h" +#include "errno-util.h" +#include "fd-util.h" +#include "glyph-util.h" +#include "hexdecoct.h" +#include "hostname-util.h" +#include "io-util.h" +#include "macro.h" +#include "memory-util.h" +#include "missing_syscall.h" +#include "missing_threads.h" +#include "origin-id.h" +#include "parse-util.h" +#include "path-util.h" +#include "process-util.h" +#include "stdio-util.h" +#include "string-util.h" +#include "strv.h" +#include "user-util.h" + +#define log_debug_bus_message(m) \ + do { \ + sd_bus_message *_mm = (m); \ + log_debug("Got message type=%s sender=%s destination=%s path=%s interface=%s member=%s " \ + " cookie=%" PRIu64 " reply_cookie=%" PRIu64 \ + " signature=%s error-name=%s error-message=%s", \ + strna(bus_message_type_to_string(_mm->header->type)), \ + strna(sd_bus_message_get_sender(_mm)), \ + strna(sd_bus_message_get_destination(_mm)), \ + strna(sd_bus_message_get_path(_mm)), \ + strna(sd_bus_message_get_interface(_mm)), \ + strna(sd_bus_message_get_member(_mm)), \ + BUS_MESSAGE_COOKIE(_mm), \ + _mm->reply_cookie, \ + strna(_mm->root_container.signature), \ + strna(_mm->error.name), \ + strna(_mm->error.message)); \ + } while (false) + +static int bus_poll(sd_bus *bus, bool need_more, uint64_t timeout_usec); +static void bus_detach_io_events(sd_bus *b); + +static thread_local sd_bus *default_system_bus = NULL; +static thread_local sd_bus *default_user_bus = NULL; +static thread_local sd_bus *default_starter_bus = NULL; + +static sd_bus **bus_choose_default(int (**bus_open)(sd_bus **)) { + const char *e; + + /* Let's try our best to reuse another cached connection. If + * the starter bus type is set, connect via our normal + * connection logic, ignoring $DBUS_STARTER_ADDRESS, so that + * we can share the connection with the user/system default + * bus. */ + + e = secure_getenv("DBUS_STARTER_BUS_TYPE"); + if (e) { + if (streq(e, "system")) { + if (bus_open) + *bus_open = sd_bus_open_system; + return &default_system_bus; + } else if (STR_IN_SET(e, "user", "session")) { + if (bus_open) + *bus_open = sd_bus_open_user; + return &default_user_bus; + } + } + + /* No type is specified, so we have not other option than to + * use the starter address if it is set. */ + e = secure_getenv("DBUS_STARTER_ADDRESS"); + if (e) { + if (bus_open) + *bus_open = sd_bus_open; + return &default_starter_bus; + } + + /* Finally, if nothing is set use the cached connection for + * the right scope */ + + if (cg_pid_get_owner_uid(0, NULL) >= 0) { + if (bus_open) + *bus_open = sd_bus_open_user; + return &default_user_bus; + } else { + if (bus_open) + *bus_open = sd_bus_open_system; + return &default_system_bus; + } +} + +sd_bus *bus_resolve(sd_bus *bus) { + switch ((uintptr_t) bus) { + case (uintptr_t) SD_BUS_DEFAULT: + return *(bus_choose_default(NULL)); + case (uintptr_t) SD_BUS_DEFAULT_USER: + return default_user_bus; + case (uintptr_t) SD_BUS_DEFAULT_SYSTEM: + return default_system_bus; + default: + return bus; + } +} + +void bus_close_io_fds(sd_bus *b) { + assert(b); + + bus_detach_io_events(b); + + if (b->input_fd != b->output_fd) + safe_close(b->output_fd); + b->output_fd = b->input_fd = safe_close(b->input_fd); +} + +void bus_close_inotify_fd(sd_bus *b) { + assert(b); + + b->inotify_event_source = sd_event_source_disable_unref(b->inotify_event_source); + + b->inotify_fd = safe_close(b->inotify_fd); + b->inotify_watches = mfree(b->inotify_watches); + b->n_inotify_watches = 0; +} + +static void bus_reset_queues(sd_bus *b) { + assert(b); + + while (b->rqueue_size > 0) + bus_message_unref_queued(b->rqueue[--b->rqueue_size], b); + + b->rqueue = mfree(b->rqueue); + + while (b->wqueue_size > 0) + bus_message_unref_queued(b->wqueue[--b->wqueue_size], b); + + b->wqueue = mfree(b->wqueue); +} + +static sd_bus* bus_free(sd_bus *b) { + sd_bus_slot *s; + + assert(b); + assert(!b->track_queue); + assert(!b->tracks); + + b->state = BUS_CLOSED; + + sd_bus_detach_event(b); + + while ((s = b->slots)) { + /* At this point only floating slots can still be + * around, because the non-floating ones keep a + * reference to the bus, and we thus couldn't be + * destructing right now... We forcibly disconnect the + * slots here, so that they still can be referenced by + * apps, but are dead. */ + + assert(s->floating); + bus_slot_disconnect(s, true); + } + + if (b->default_bus_ptr) + *b->default_bus_ptr = NULL; + + bus_close_io_fds(b); + bus_close_inotify_fd(b); + + free(b->label); + free(b->groups); + free(b->rbuffer); + free(b->unique_name); + free(b->auth_buffer); + free(b->address); + free(b->machine); + free(b->description); + free(b->patch_sender); + + free(b->exec_path); + strv_free(b->exec_argv); + + close_many(b->fds, b->n_fds); + free(b->fds); + + bus_reset_queues(b); + + ordered_hashmap_free_free(b->reply_callbacks); + prioq_free(b->reply_callbacks_prioq); + + assert(b->match_callbacks.type == BUS_MATCH_ROOT); + bus_match_free(&b->match_callbacks); + + hashmap_free_free(b->vtable_methods); + hashmap_free_free(b->vtable_properties); + + assert(hashmap_isempty(b->nodes)); + hashmap_free(b->nodes); + + bus_flush_memfd(b); + + assert_se(pthread_mutex_destroy(&b->memfd_cache_mutex) == 0); + + return mfree(b); +} + +DEFINE_TRIVIAL_CLEANUP_FUNC(sd_bus*, bus_free); + +DEFINE_ORIGIN_ID_HELPERS(sd_bus, bus); + +_public_ int sd_bus_new(sd_bus **ret) { + _cleanup_free_ sd_bus *b = NULL; + + assert_return(ret, -EINVAL); + + b = new(sd_bus, 1); + if (!b) + return -ENOMEM; + + *b = (sd_bus) { + .n_ref = 1, + .input_fd = -EBADF, + .output_fd = -EBADF, + .inotify_fd = -EBADF, + .message_version = 1, + .creds_mask = SD_BUS_CREDS_WELL_KNOWN_NAMES|SD_BUS_CREDS_UNIQUE_NAME, + .accept_fd = true, + .origin_id = origin_id_query(), + .n_groups = SIZE_MAX, + .close_on_exit = true, + .ucred = UCRED_INVALID, + .runtime_scope = _RUNTIME_SCOPE_INVALID, + }; + + /* We guarantee that wqueue always has space for at least one entry */ + if (!GREEDY_REALLOC(b->wqueue, 1)) + return -ENOMEM; + + assert_se(pthread_mutex_init(&b->memfd_cache_mutex, NULL) == 0); + + *ret = TAKE_PTR(b); + return 0; +} + +_public_ int sd_bus_set_address(sd_bus *bus, const char *address) { + assert_return(bus, -EINVAL); + assert_return(bus = bus_resolve(bus), -ENOPKG); + assert_return(bus->state == BUS_UNSET, -EPERM); + assert_return(address, -EINVAL); + assert_return(!bus_origin_changed(bus), -ECHILD); + + return free_and_strdup(&bus->address, address); +} + +_public_ int sd_bus_set_fd(sd_bus *bus, int input_fd, int output_fd) { + assert_return(bus, -EINVAL); + assert_return(bus = bus_resolve(bus), -ENOPKG); + assert_return(bus->state == BUS_UNSET, -EPERM); + assert_return(input_fd >= 0, -EBADF); + assert_return(output_fd >= 0, -EBADF); + assert_return(!bus_origin_changed(bus), -ECHILD); + + bus->input_fd = input_fd; + bus->output_fd = output_fd; + return 0; +} + +_public_ int sd_bus_set_exec(sd_bus *bus, const char *path, char *const *argv) { + _cleanup_strv_free_ char **a = NULL; + int r; + + assert_return(bus, -EINVAL); + assert_return(bus = bus_resolve(bus), -ENOPKG); + assert_return(bus->state == BUS_UNSET, -EPERM); + assert_return(path, -EINVAL); + assert_return(!strv_isempty(argv), -EINVAL); + assert_return(!bus_origin_changed(bus), -ECHILD); + + a = strv_copy(argv); + if (!a) + return -ENOMEM; + + r = free_and_strdup(&bus->exec_path, path); + if (r < 0) + return r; + + return strv_free_and_replace(bus->exec_argv, a); +} + +_public_ int sd_bus_set_bus_client(sd_bus *bus, int b) { + assert_return(bus, -EINVAL); + assert_return(bus = bus_resolve(bus), -ENOPKG); + assert_return(bus->state == BUS_UNSET, -EPERM); + assert_return(!bus->patch_sender, -EPERM); + assert_return(!bus_origin_changed(bus), -ECHILD); + + bus->bus_client = !!b; + return 0; +} + +_public_ int sd_bus_set_monitor(sd_bus *bus, int b) { + assert_return(bus, -EINVAL); + assert_return(bus = bus_resolve(bus), -ENOPKG); + assert_return(bus->state == BUS_UNSET, -EPERM); + assert_return(!bus_origin_changed(bus), -ECHILD); + + bus->is_monitor = !!b; + return 0; +} + +_public_ int sd_bus_negotiate_fds(sd_bus *bus, int b) { + assert_return(bus, -EINVAL); + assert_return(bus = bus_resolve(bus), -ENOPKG); + assert_return(bus->state == BUS_UNSET, -EPERM); + assert_return(!bus_origin_changed(bus), -ECHILD); + + bus->accept_fd = !!b; + return 0; +} + +_public_ int sd_bus_negotiate_timestamp(sd_bus *bus, int b) { + assert_return(bus, -EINVAL); + assert_return(bus = bus_resolve(bus), -ENOPKG); + assert_return(!IN_SET(bus->state, BUS_CLOSING, BUS_CLOSED), -EPERM); + assert_return(!bus_origin_changed(bus), -ECHILD); + + /* This is not actually supported by any of our transports these days, but we do honour it for synthetic + * replies, and maybe one day classic D-Bus learns this too */ + bus->attach_timestamp = !!b; + + return 0; +} + +_public_ int sd_bus_negotiate_creds(sd_bus *bus, int b, uint64_t mask) { + assert_return(bus, -EINVAL); + assert_return(bus = bus_resolve(bus), -ENOPKG); + assert_return(mask <= _SD_BUS_CREDS_ALL, -EINVAL); + assert_return(!IN_SET(bus->state, BUS_CLOSING, BUS_CLOSED), -EPERM); + assert_return(!bus_origin_changed(bus), -ECHILD); + + SET_FLAG(bus->creds_mask, mask, b); + + /* The well knowns we need unconditionally, so that matches can work */ + bus->creds_mask |= SD_BUS_CREDS_WELL_KNOWN_NAMES|SD_BUS_CREDS_UNIQUE_NAME; + + return 0; +} + +_public_ int sd_bus_set_server(sd_bus *bus, int b, sd_id128_t server_id) { + assert_return(bus, -EINVAL); + assert_return(bus = bus_resolve(bus), -ENOPKG); + assert_return(b || sd_id128_equal(server_id, SD_ID128_NULL), -EINVAL); + assert_return(bus->state == BUS_UNSET, -EPERM); + assert_return(!bus_origin_changed(bus), -ECHILD); + + bus->is_server = !!b; + bus->server_id = server_id; + return 0; +} + +_public_ int sd_bus_set_anonymous(sd_bus *bus, int b) { + assert_return(bus, -EINVAL); + assert_return(bus = bus_resolve(bus), -ENOPKG); + assert_return(bus->state == BUS_UNSET, -EPERM); + assert_return(!bus_origin_changed(bus), -ECHILD); + + bus->anonymous_auth = !!b; + return 0; +} + +_public_ int sd_bus_set_trusted(sd_bus *bus, int b) { + assert_return(bus, -EINVAL); + assert_return(bus = bus_resolve(bus), -ENOPKG); + assert_return(bus->state == BUS_UNSET, -EPERM); + assert_return(!bus_origin_changed(bus), -ECHILD); + + bus->trusted = !!b; + return 0; +} + +_public_ int sd_bus_set_description(sd_bus *bus, const char *description) { + assert_return(bus, -EINVAL); + assert_return(bus = bus_resolve(bus), -ENOPKG); + assert_return(bus->state == BUS_UNSET, -EPERM); + assert_return(!bus_origin_changed(bus), -ECHILD); + + return free_and_strdup(&bus->description, description); +} + +_public_ int sd_bus_set_allow_interactive_authorization(sd_bus *bus, int b) { + assert_return(bus, -EINVAL); + assert_return(bus = bus_resolve(bus), -ENOPKG); + assert_return(!bus_origin_changed(bus), -ECHILD); + + bus->allow_interactive_authorization = !!b; + return 0; +} + +_public_ int sd_bus_get_allow_interactive_authorization(sd_bus *bus) { + assert_return(bus, -EINVAL); + assert_return(bus = bus_resolve(bus), -ENOPKG); + assert_return(!bus_origin_changed(bus), -ECHILD); + + return bus->allow_interactive_authorization; +} + +_public_ int sd_bus_set_watch_bind(sd_bus *bus, int b) { + assert_return(bus, -EINVAL); + assert_return(bus = bus_resolve(bus), -ENOPKG); + assert_return(bus->state == BUS_UNSET, -EPERM); + assert_return(!bus_origin_changed(bus), -ECHILD); + + bus->watch_bind = !!b; + return 0; +} + +_public_ int sd_bus_get_watch_bind(sd_bus *bus) { + assert_return(bus, -EINVAL); + assert_return(bus = bus_resolve(bus), -ENOPKG); + assert_return(!bus_origin_changed(bus), -ECHILD); + + return bus->watch_bind; +} + +_public_ int sd_bus_set_connected_signal(sd_bus *bus, int b) { + assert_return(bus, -EINVAL); + assert_return(bus = bus_resolve(bus), -ENOPKG); + assert_return(bus->state == BUS_UNSET, -EPERM); + assert_return(!bus_origin_changed(bus), -ECHILD); + + bus->connected_signal = !!b; + return 0; +} + +_public_ int sd_bus_get_connected_signal(sd_bus *bus) { + assert_return(bus, -EINVAL); + assert_return(bus = bus_resolve(bus), -ENOPKG); + assert_return(!bus_origin_changed(bus), -ECHILD); + + return bus->connected_signal; +} + +static int synthesize_connected_signal(sd_bus *bus) { + _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL; + int r; + + assert(bus); + + /* If enabled, synthesizes a local "Connected" signal mirroring the local "Disconnected" signal. This is called + * whenever we fully established a connection, i.e. after the authorization phase, and after receiving the + * Hello() reply. Or in other words, whenever we enter BUS_RUNNING state. + * + * This is useful so that clients can start doing stuff whenever the connection is fully established in a way + * that works independently from whether we connected to a full bus or just a direct connection. */ + + if (!bus->connected_signal) + return 0; + + r = sd_bus_message_new_signal( + bus, + &m, + "/org/freedesktop/DBus/Local", + "org.freedesktop.DBus.Local", + "Connected"); + if (r < 0) + return r; + + bus_message_set_sender_local(bus, m); + m->read_counter = ++bus->read_counter; + + r = bus_seal_synthetic_message(bus, m); + if (r < 0) + return r; + + r = bus_rqueue_make_room(bus); + if (r < 0) + return r; + + /* Insert at the very front */ + memmove(bus->rqueue + 1, bus->rqueue, sizeof(sd_bus_message*) * bus->rqueue_size); + bus->rqueue[0] = bus_message_ref_queued(m, bus); + bus->rqueue_size++; + + return 0; +} + +void bus_set_state(sd_bus *bus, enum bus_state state) { + static const char* const table[_BUS_STATE_MAX] = { + [BUS_UNSET] = "UNSET", + [BUS_WATCH_BIND] = "WATCH_BIND", + [BUS_OPENING] = "OPENING", + [BUS_AUTHENTICATING] = "AUTHENTICATING", + [BUS_HELLO] = "HELLO", + [BUS_RUNNING] = "RUNNING", + [BUS_CLOSING] = "CLOSING", + [BUS_CLOSED] = "CLOSED", + }; + + assert(bus); + assert(state < _BUS_STATE_MAX); + + if (state == bus->state) + return; + + log_debug("Bus %s: changing state %s %s %s", strna(bus->description), + table[bus->state], special_glyph(SPECIAL_GLYPH_ARROW_RIGHT), table[state]); + bus->state = state; +} + +static int hello_callback(sd_bus_message *reply, void *userdata, sd_bus_error *error) { + const char *s; + sd_bus *bus; + int r; + + assert(reply); + bus = reply->bus; + assert(bus); + assert(IN_SET(bus->state, BUS_HELLO, BUS_CLOSING)); + + r = sd_bus_message_get_errno(reply); + if (r > 0) { + r = -r; + goto fail; + } + + r = sd_bus_message_read(reply, "s", &s); + if (r < 0) + goto fail; + + if (!service_name_is_valid(s) || s[0] != ':') { + r = -EBADMSG; + goto fail; + } + + r = free_and_strdup(&bus->unique_name, s); + if (r < 0) + goto fail; + + if (bus->state == BUS_HELLO) { + bus_set_state(bus, BUS_RUNNING); + + r = synthesize_connected_signal(bus); + if (r < 0) + goto fail; + } + + return 1; + +fail: + /* When Hello() failed, let's propagate this in two ways: first we return the error immediately here, + * which is the propagated up towards the event loop. Let's also invalidate the connection, so that + * if the user then calls back into us again we won't wait any longer. */ + + bus_set_state(bus, BUS_CLOSING); + return r; +} + +static int bus_send_hello(sd_bus *bus) { + _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL; + int r; + + assert(bus); + + if (!bus->bus_client) + return 0; + + r = sd_bus_message_new_method_call( + bus, + &m, + "org.freedesktop.DBus", + "/org/freedesktop/DBus", + "org.freedesktop.DBus", + "Hello"); + if (r < 0) + return r; + + return sd_bus_call_async(bus, NULL, m, hello_callback, NULL, 0); +} + +int bus_start_running(sd_bus *bus) { + struct reply_callback *c; + usec_t n; + int r; + + assert(bus); + assert(bus->state < BUS_HELLO); + + /* We start all method call timeouts when we enter BUS_HELLO or BUS_RUNNING mode. At this point let's convert + * all relative to absolute timestamps. Note that we do not reshuffle the reply callback priority queue since + * adding a fixed value to all entries should not alter the internal order. */ + + n = now(CLOCK_MONOTONIC); + ORDERED_HASHMAP_FOREACH(c, bus->reply_callbacks) { + if (c->timeout_usec == 0) + continue; + + c->timeout_usec = usec_add(n, c->timeout_usec); + } + + if (bus->bus_client) { + bus_set_state(bus, BUS_HELLO); + return 1; + } + + bus_set_state(bus, BUS_RUNNING); + + r = synthesize_connected_signal(bus); + if (r < 0) + return r; + + return 1; +} + +static int parse_address_key(const char **p, const char *key, char **value) { + _cleanup_free_ char *r = NULL; + size_t l, n = 0; + const char *a; + + assert(p); + assert(*p); + assert(value); + + if (key) { + l = strlen(key); + if (strncmp(*p, key, l) != 0) + return 0; + + if ((*p)[l] != '=') + return 0; + + if (*value) + return -EINVAL; + + a = *p + l + 1; + } else + a = *p; + + while (!IN_SET(*a, ';', ',', 0)) { + char c; + + if (*a == '%') { + int x, y; + + x = unhexchar(a[1]); + if (x < 0) + return x; + + y = unhexchar(a[2]); + if (y < 0) + return y; + + c = (char) ((x << 4) | y); + a += 3; + } else { + c = *a; + a++; + } + + if (!GREEDY_REALLOC(r, n + 2)) + return -ENOMEM; + + r[n++] = c; + } + + if (!r) { + r = strdup(""); + if (!r) + return -ENOMEM; + } else + r[n] = 0; + + if (*a == ',') + a++; + + *p = a; + + free_and_replace(*value, r); + + return 1; +} + +static void skip_address_key(const char **p) { + assert(p); + assert(*p); + + *p += strcspn(*p, ","); + + if (**p == ',') + (*p)++; +} + +static int parse_unix_address(sd_bus *b, const char **p, char **guid) { + _cleanup_free_ char *path = NULL, *abstract = NULL; + size_t l; + int r; + + assert(b); + assert(p); + assert(*p); + assert(guid); + + while (!IN_SET(**p, 0, ';')) { + r = parse_address_key(p, "guid", guid); + if (r < 0) + return r; + else if (r > 0) + continue; + + r = parse_address_key(p, "path", &path); + if (r < 0) + return r; + else if (r > 0) + continue; + + r = parse_address_key(p, "abstract", &abstract); + if (r < 0) + return r; + else if (r > 0) + continue; + + skip_address_key(p); + } + + if (!path && !abstract) + return -EINVAL; + + if (path && abstract) + return -EINVAL; + + if (path) { + l = strlen(path); + if (l >= sizeof(b->sockaddr.un.sun_path)) /* We insist on NUL termination */ + return -E2BIG; + + b->sockaddr.un = (struct sockaddr_un) { + .sun_family = AF_UNIX, + }; + + memcpy(b->sockaddr.un.sun_path, path, l); + b->sockaddr_size = offsetof(struct sockaddr_un, sun_path) + l + 1; + + } else { + assert(abstract); + + l = strlen(abstract); + if (l >= sizeof(b->sockaddr.un.sun_path) - 1) /* We insist on NUL termination */ + return -E2BIG; + + b->sockaddr.un = (struct sockaddr_un) { + .sun_family = AF_UNIX, + }; + + memcpy(b->sockaddr.un.sun_path+1, abstract, l); + b->sockaddr_size = offsetof(struct sockaddr_un, sun_path) + 1 + l; + } + + b->is_local = true; + + return 0; +} + +static int parse_tcp_address(sd_bus *b, const char **p, char **guid) { + _cleanup_free_ char *host = NULL, *port = NULL, *family = NULL; + int r; + struct addrinfo *result, hints = { + .ai_socktype = SOCK_STREAM, + }; + + assert(b); + assert(p); + assert(*p); + assert(guid); + + while (!IN_SET(**p, 0, ';')) { + r = parse_address_key(p, "guid", guid); + if (r < 0) + return r; + else if (r > 0) + continue; + + r = parse_address_key(p, "host", &host); + if (r < 0) + return r; + else if (r > 0) + continue; + + r = parse_address_key(p, "port", &port); + if (r < 0) + return r; + else if (r > 0) + continue; + + r = parse_address_key(p, "family", &family); + if (r < 0) + return r; + else if (r > 0) + continue; + + skip_address_key(p); + } + + if (!host || !port) + return -EINVAL; + + if (family) { + hints.ai_family = af_from_ipv4_ipv6(family); + if (hints.ai_family == AF_UNSPEC) + return -EINVAL; + } + + r = getaddrinfo(host, port, &hints, &result); + if (r == EAI_SYSTEM) + return -errno; + else if (r != 0) + return -EADDRNOTAVAIL; + + memcpy(&b->sockaddr, result->ai_addr, result->ai_addrlen); + b->sockaddr_size = result->ai_addrlen; + + freeaddrinfo(result); + + b->is_local = false; + + return 0; +} + +static int parse_exec_address(sd_bus *b, const char **p, char **guid) { + char *path = NULL; + unsigned n_argv = 0, j; + char **argv = NULL; + int r; + + assert(b); + assert(p); + assert(*p); + assert(guid); + + while (!IN_SET(**p, 0, ';')) { + r = parse_address_key(p, "guid", guid); + if (r < 0) + goto fail; + else if (r > 0) + continue; + + r = parse_address_key(p, "path", &path); + if (r < 0) + goto fail; + else if (r > 0) + continue; + + if (startswith(*p, "argv")) { + unsigned ul; + + errno = 0; + ul = strtoul(*p + 4, (char**) p, 10); + if (errno > 0 || **p != '=' || ul > 256) { + r = -EINVAL; + goto fail; + } + + (*p)++; + + if (ul >= n_argv) { + if (!GREEDY_REALLOC0(argv, ul + 2)) { + r = -ENOMEM; + goto fail; + } + + n_argv = ul + 1; + } + + r = parse_address_key(p, NULL, argv + ul); + if (r < 0) + goto fail; + + continue; + } + + skip_address_key(p); + } + + if (!path) { + r = -EINVAL; + goto fail; + } + + /* Make sure there are no holes in the array, with the + * exception of argv[0] */ + for (j = 1; j < n_argv; j++) + if (!argv[j]) { + r = -EINVAL; + goto fail; + } + + if (argv && argv[0] == NULL) { + argv[0] = strdup(path); + if (!argv[0]) { + r = -ENOMEM; + goto fail; + } + } + + b->exec_path = path; + b->exec_argv = argv; + + b->is_local = false; + + return 0; + +fail: + for (j = 0; j < n_argv; j++) + free(argv[j]); + + free(argv); + free(path); + return r; +} + +static int parse_container_unix_address(sd_bus *b, const char **p, char **guid) { + _cleanup_free_ char *machine = NULL, *pid = NULL; + int r; + + assert(b); + assert(p); + assert(*p); + assert(guid); + + while (!IN_SET(**p, 0, ';')) { + r = parse_address_key(p, "guid", guid); + if (r < 0) + return r; + else if (r > 0) + continue; + + r = parse_address_key(p, "machine", &machine); + if (r < 0) + return r; + else if (r > 0) + continue; + + r = parse_address_key(p, "pid", &pid); + if (r < 0) + return r; + else if (r > 0) + continue; + + skip_address_key(p); + } + + if (!machine == !pid) + return -EINVAL; + + if (machine) { + if (!hostname_is_valid(machine, VALID_HOSTNAME_DOT_HOST)) + return -EINVAL; + + free_and_replace(b->machine, machine); + } else + b->machine = mfree(b->machine); + + if (pid) { + r = parse_pid(pid, &b->nspid); + if (r < 0) + return r; + } else + b->nspid = 0; + + b->sockaddr.un = (struct sockaddr_un) { + .sun_family = AF_UNIX, + /* Note that we use the old /var/run prefix here, to increase compatibility with really old containers */ + .sun_path = "/var/run/dbus/system_bus_socket", + }; + b->sockaddr_size = SOCKADDR_UN_LEN(b->sockaddr.un); + b->is_local = false; + + return 0; +} + +static void bus_reset_parsed_address(sd_bus *b) { + assert(b); + + zero(b->sockaddr); + b->sockaddr_size = 0; + b->exec_argv = strv_free(b->exec_argv); + b->exec_path = mfree(b->exec_path); + b->server_id = SD_ID128_NULL; + b->machine = mfree(b->machine); + b->nspid = 0; +} + +static int bus_parse_next_address(sd_bus *b) { + _cleanup_free_ char *guid = NULL; + const char *a; + int r; + + assert(b); + + if (!b->address) + return 0; + if (b->address[b->address_index] == 0) + return 0; + + bus_reset_parsed_address(b); + + a = b->address + b->address_index; + + while (*a != 0) { + + if (*a == ';') { + a++; + continue; + } + + if (startswith(a, "unix:")) { + a += 5; + + r = parse_unix_address(b, &a, &guid); + if (r < 0) + return r; + break; + + } else if (startswith(a, "tcp:")) { + + a += 4; + r = parse_tcp_address(b, &a, &guid); + if (r < 0) + return r; + + break; + + } else if (startswith(a, "unixexec:")) { + + a += 9; + r = parse_exec_address(b, &a, &guid); + if (r < 0) + return r; + + break; + + } else if (startswith(a, "x-machine-unix:")) { + + a += 15; + r = parse_container_unix_address(b, &a, &guid); + if (r < 0) + return r; + + break; + } + + a = strchr(a, ';'); + if (!a) + return 0; + } + + if (guid) { + r = sd_id128_from_string(guid, &b->server_id); + if (r < 0) + return r; + } + + b->address_index = a - b->address; + return 1; +} + +static void bus_kill_exec(sd_bus *bus) { + if (!pid_is_valid(bus->busexec_pid)) + return; + + sigterm_wait(TAKE_PID(bus->busexec_pid)); +} + +static int bus_start_address(sd_bus *b) { + int r; + + assert(b); + + for (;;) { + bus_close_io_fds(b); + bus_close_inotify_fd(b); + + bus_kill_exec(b); + + /* If you provide multiple different bus-addresses, we + * try all of them in order and use the first one that + * succeeds. */ + + if (b->exec_path) + r = bus_socket_exec(b); + else if ((b->nspid > 0 || b->machine) && b->sockaddr.sa.sa_family != AF_UNSPEC) + r = bus_container_connect_socket(b); + else if (b->sockaddr.sa.sa_family != AF_UNSPEC) + r = bus_socket_connect(b); + else + goto next; + + if (r >= 0) { + int q; + + q = bus_attach_io_events(b); + if (q < 0) + return q; + + q = bus_attach_inotify_event(b); + if (q < 0) + return q; + + return r; + } + + b->last_connect_error = -r; + + next: + r = bus_parse_next_address(b); + if (r < 0) + return r; + if (r == 0) + return b->last_connect_error > 0 ? -b->last_connect_error : -ECONNREFUSED; + } +} + +int bus_next_address(sd_bus *b) { + assert(b); + + bus_reset_parsed_address(b); + return bus_start_address(b); +} + +static int bus_start_fd(sd_bus *b) { + struct stat st; + int r; + + assert(b); + assert(b->input_fd >= 0); + assert(b->output_fd >= 0); + + if (DEBUG_LOGGING) { + _cleanup_free_ char *pi = NULL, *po = NULL; + (void) fd_get_path(b->input_fd, &pi); + (void) fd_get_path(b->output_fd, &po); + log_debug("sd-bus: starting bus%s%s on fds %d/%d (%s, %s)...", + b->description ? " " : "", strempty(b->description), + b->input_fd, b->output_fd, + pi ?: "???", po ?: "???"); + } + + r = fd_nonblock(b->input_fd, true); + if (r < 0) + return r; + + r = fd_cloexec(b->input_fd, true); + if (r < 0) + return r; + + if (b->input_fd != b->output_fd) { + r = fd_nonblock(b->output_fd, true); + if (r < 0) + return r; + + r = fd_cloexec(b->output_fd, true); + if (r < 0) + return r; + } + + if (fstat(b->input_fd, &st) < 0) + return -errno; + + return bus_socket_take_fd(b); +} + +_public_ int sd_bus_start(sd_bus *bus) { + int r; + + assert_return(bus, -EINVAL); + assert_return(bus = bus_resolve(bus), -ENOPKG); + assert_return(bus->state == BUS_UNSET, -EPERM); + assert_return(!bus_origin_changed(bus), -ECHILD); + + bus_set_state(bus, BUS_OPENING); + + if (bus->is_server && bus->bus_client) + return -EINVAL; + + if (bus->input_fd >= 0) + r = bus_start_fd(bus); + else if (bus->address || bus->sockaddr.sa.sa_family != AF_UNSPEC || bus->exec_path || bus->machine) + r = bus_start_address(bus); + else + return -EINVAL; + + if (r < 0) { + sd_bus_close(bus); + return r; + } + + return bus_send_hello(bus); +} + +_public_ int sd_bus_open_with_description(sd_bus **ret, const char *description) { + const char *e; + _cleanup_(bus_freep) sd_bus *b = NULL; + int r; + + assert_return(ret, -EINVAL); + + /* Let's connect to the starter bus if it is set, and + * otherwise to the bus that is appropriate for the scope + * we are running in */ + + e = secure_getenv("DBUS_STARTER_BUS_TYPE"); + if (e) { + if (streq(e, "system")) + return sd_bus_open_system_with_description(ret, description); + else if (STR_IN_SET(e, "session", "user")) + return sd_bus_open_user_with_description(ret, description); + } + + e = secure_getenv("DBUS_STARTER_ADDRESS"); + if (!e) { + if (cg_pid_get_owner_uid(0, NULL) >= 0) + return sd_bus_open_user_with_description(ret, description); + else + return sd_bus_open_system_with_description(ret, description); + } + + r = sd_bus_new(&b); + if (r < 0) + return r; + + r = sd_bus_set_address(b, e); + if (r < 0) + return r; + + b->bus_client = true; + + /* We don't know whether the bus is trusted or not, so better + * be safe, and authenticate everything */ + b->trusted = false; + b->is_local = false; + b->creds_mask |= SD_BUS_CREDS_UID | SD_BUS_CREDS_EUID | SD_BUS_CREDS_EFFECTIVE_CAPS; + + r = sd_bus_start(b); + if (r < 0) + return r; + + *ret = TAKE_PTR(b); + return 0; +} + +_public_ int sd_bus_open(sd_bus **ret) { + return sd_bus_open_with_description(ret, NULL); +} + +int bus_set_address_system(sd_bus *b) { + const char *e; + int r; + + assert(b); + + e = secure_getenv("DBUS_SYSTEM_BUS_ADDRESS"); + + r = sd_bus_set_address(b, e ?: DEFAULT_SYSTEM_BUS_ADDRESS); + if (r < 0) + return r; + + b->runtime_scope = RUNTIME_SCOPE_SYSTEM; + return r; +} + +_public_ int sd_bus_open_system_with_description(sd_bus **ret, const char *description) { + _cleanup_(bus_freep) sd_bus *b = NULL; + int r; + + assert_return(ret, -EINVAL); + + r = sd_bus_new(&b); + if (r < 0) + return r; + + if (description) { + r = sd_bus_set_description(b, description); + if (r < 0) + return r; + } + + r = bus_set_address_system(b); + if (r < 0) + return r; + + b->bus_client = true; + + /* Let's do per-method access control on the system bus. We + * need the caller's UID and capability set for that. */ + b->trusted = false; + b->creds_mask |= SD_BUS_CREDS_UID | SD_BUS_CREDS_EUID | SD_BUS_CREDS_EFFECTIVE_CAPS; + b->is_local = true; + + r = sd_bus_start(b); + if (r < 0) + return r; + + *ret = TAKE_PTR(b); + return 0; +} + +_public_ int sd_bus_open_system(sd_bus **ret) { + return sd_bus_open_system_with_description(ret, NULL); +} + +int bus_set_address_user(sd_bus *b) { + const char *a; + _cleanup_free_ char *_a = NULL; + int r; + + assert(b); + + a = secure_getenv("DBUS_SESSION_BUS_ADDRESS"); + if (!a) { + const char *e; + _cleanup_free_ char *ee = NULL; + + e = secure_getenv("XDG_RUNTIME_DIR"); + if (!e) + return log_debug_errno(SYNTHETIC_ERRNO(ENOMEDIUM), + "sd-bus: $XDG_RUNTIME_DIR not set, cannot connect to user bus."); + + ee = bus_address_escape(e); + if (!ee) + return -ENOMEM; + + if (asprintf(&_a, DEFAULT_USER_BUS_ADDRESS_FMT, ee) < 0) + return -ENOMEM; + a = _a; + } + + r = sd_bus_set_address(b, a); + if (r < 0) + return r; + + b->runtime_scope = RUNTIME_SCOPE_USER; + return r; +} + +_public_ int sd_bus_open_user_with_description(sd_bus **ret, const char *description) { + _cleanup_(bus_freep) sd_bus *b = NULL; + int r; + + assert_return(ret, -EINVAL); + + r = sd_bus_new(&b); + if (r < 0) + return r; + + if (description) { + r = sd_bus_set_description(b, description); + if (r < 0) + return r; + } + + r = bus_set_address_user(b); + if (r < 0) + return r; + + b->bus_client = true; + + /* We don't do any per-method access control on the user bus. */ + b->trusted = true; + b->is_local = true; + + r = sd_bus_start(b); + if (r < 0) + return r; + + *ret = TAKE_PTR(b); + return 0; +} + +_public_ int sd_bus_open_user(sd_bus **ret) { + return sd_bus_open_user_with_description(ret, NULL); +} + +int bus_set_address_system_remote(sd_bus *b, const char *host) { + _cleanup_free_ char *e = NULL; + char *m = NULL, *c = NULL, *a, *rbracket = NULL, *p = NULL; + + assert(b); + assert(host); + + /* Skip ":"s in ipv6 addresses */ + if (*host == '[') { + char *t; + + rbracket = strchr(host, ']'); + if (!rbracket) + return -EINVAL; + t = strndupa_safe(host + 1, rbracket - host - 1); + e = bus_address_escape(t); + if (!e) + return -ENOMEM; + } else if ((a = strchr(host, '@'))) { + if (*(a + 1) == '[') { + _cleanup_free_ char *t = NULL; + + rbracket = strchr(a + 1, ']'); + if (!rbracket) + return -EINVAL; + t = new0(char, strlen(host)); + if (!t) + return -ENOMEM; + strncat(t, host, a - host + 1); + strncat(t, a + 2, rbracket - a - 2); + e = bus_address_escape(t); + if (!e) + return -ENOMEM; + } else if (*(a + 1) == '\0' || strchr(a + 1, '@')) + return -EINVAL; + } + + /* Let's see if a port was given */ + m = strchr(rbracket ? rbracket + 1 : host, ':'); + if (m) { + char *t; + bool got_forward_slash = false; + + p = m + 1; + + t = strchr(p, '/'); + if (t) { + p = strndupa_safe(p, t - p); + got_forward_slash = true; + } + + if (!in_charset(p, "0123456789") || *p == '\0') { + if (!hostname_is_valid(p, 0) || got_forward_slash) + return -EINVAL; + + m = TAKE_PTR(p); + goto interpret_port_as_machine_old_syntax; + } + } + + /* Let's see if a machine was given */ + m = strchr(rbracket ? rbracket + 1 : host, '/'); + if (m) { + m++; +interpret_port_as_machine_old_syntax: + /* Let's make sure this is not a port of some kind, + * and is a valid machine name. */ + if (!in_charset(m, "0123456789") && hostname_is_valid(m, 0)) + c = strjoina(",argv", p ? "7" : "5", "=--machine=", m); + } + + if (!e) { + char *t; + + t = strndupa_safe(host, strcspn(host, ":/")); + + e = bus_address_escape(t); + if (!e) + return -ENOMEM; + } + + a = strjoin("unixexec:path=ssh,argv1=-xT", p ? ",argv2=-p,argv3=" : "", strempty(p), + ",argv", p ? "4" : "2", "=--,argv", p ? "5" : "3", "=", e, + ",argv", p ? "6" : "4", "=systemd-stdio-bridge", c); + if (!a) + return -ENOMEM; + + return free_and_replace(b->address, a); +} + +_public_ int sd_bus_open_system_remote(sd_bus **ret, const char *host) { + _cleanup_(bus_freep) sd_bus *b = NULL; + int r; + + assert_return(host, -EINVAL); + assert_return(ret, -EINVAL); + + r = sd_bus_new(&b); + if (r < 0) + return r; + + r = bus_set_address_system_remote(b, host); + if (r < 0) + return r; + + b->bus_client = true; + b->trusted = false; + b->runtime_scope = RUNTIME_SCOPE_SYSTEM; + b->is_local = false; + + r = sd_bus_start(b); + if (r < 0) + return r; + + *ret = TAKE_PTR(b); + return 0; +} + +int bus_set_address_machine(sd_bus *b, RuntimeScope runtime_scope, const char *machine) { + _cleanup_free_ char *a = NULL; + const char *rhs; + + assert(b); + assert(machine); + + rhs = strchr(machine, '@'); + if (rhs || runtime_scope == RUNTIME_SCOPE_USER) { + _cleanup_free_ char *u = NULL, *eu = NULL, *erhs = NULL; + + /* If there's an "@" in the container specification, we'll connect as a user specified at its + * left hand side, which is useful in combination with user=true. This isn't as trivial as it + * might sound: it's not sufficient to enter the container and connect to some socket there, + * since the --user socket path depends on $XDG_RUNTIME_DIR which is set via PAM. Thus, to be + * able to connect, we need to have a PAM session. Our way out? We use systemd-run to get + * into the container and acquire a PAM session there, and then invoke systemd-stdio-bridge + * in it, which propagates the bus transport to us. */ + + if (rhs) { + if (rhs > machine) + u = strndup(machine, rhs - machine); + else + u = getusername_malloc(); /* Empty user name, let's use the local one */ + if (!u) + return -ENOMEM; + + eu = bus_address_escape(u); + if (!eu) + return -ENOMEM; + + rhs++; + } else { + /* No "@" specified but we shall connect to the user instance? Then assume root (and + * not a user named identically to the calling one). This means: + * + * --machine=foobar --user → connect to user bus of root user in container "foobar" + * --machine=@foobar --user → connect to user bus of user named like the calling user in container "foobar" + * + * Why? so that behaviour for "--machine=foobar --system" is roughly similar to + * "--machine=foobar --user": both times we unconditionally connect as root user + * regardless what the calling user is. */ + + rhs = machine; + } + + if (!isempty(rhs)) { + erhs = bus_address_escape(rhs); + if (!erhs) + return -ENOMEM; + } + + /* systemd-run -M… -PGq --wait -pUser=… -pPAMName=login systemd-stdio-bridge */ + + a = strjoin("unixexec:path=systemd-run," + "argv1=-M", erhs ?: ".host", "," + "argv2=-PGq," + "argv3=--wait," + "argv4=-pUser%3d", eu ?: "root", ",", + "argv5=-pPAMName%3dlogin," + "argv6=systemd-stdio-bridge"); + if (!a) + return -ENOMEM; + + if (runtime_scope == RUNTIME_SCOPE_USER) { + /* Ideally we'd use the "--user" switch to systemd-stdio-bridge here, but it's only + * available in recent systemd versions. Using the "-p" switch with the explicit path + * is a working alternative, and is compatible with older versions, hence that's what + * we use here. */ + if (!strextend(&a, ",argv7=-punix:path%3d%24%7bXDG_RUNTIME_DIR%7d/bus")) + return -ENOMEM; + } + } else { + _cleanup_free_ char *e = NULL; + + /* Just a container name, we can go the simple way, and just join the container, and connect + * to the well-known path of the system bus there. */ + + e = bus_address_escape(machine); + if (!e) + return -ENOMEM; + + a = strjoin("x-machine-unix:machine=", e); + if (!a) + return -ENOMEM; + } + + return free_and_replace(b->address, a); +} + +static int user_and_machine_valid(const char *user_and_machine) { + const char *h; + + /* Checks if a container specification in the form "user@container" or just "container" is valid. + * + * If the "@" syntax is used we'll allow either the "user" or the "container" part to be omitted, but + * not both. */ + + h = strchr(user_and_machine, '@'); + if (!h) + h = user_and_machine; + else { + _cleanup_free_ char *user = NULL; + + user = strndup(user_and_machine, h - user_and_machine); + if (!user) + return -ENOMEM; + + if (!isempty(user) && !valid_user_group_name(user, VALID_USER_RELAX | VALID_USER_ALLOW_NUMERIC)) + return false; + + h++; + + if (isempty(h)) + return !isempty(user); + } + + return hostname_is_valid(h, VALID_HOSTNAME_DOT_HOST); +} + +static int user_and_machine_equivalent(const char *user_and_machine) { + _cleanup_free_ char *un = NULL; + const char *f; + + /* Returns true if the specified user+machine name are actually equivalent to our own identity and + * our own host. If so we can shortcut things. Why bother? Because that way we don't have to fork + * off short-lived worker processes that are then unavailable for authentication and logging in the + * peer. Moreover joining a namespace requires privileges. If we are in the right namespace anyway, + * we can avoid permission problems thus. */ + + assert(user_and_machine); + + /* Omitting the user name means that we shall use the same user name as we run as locally, which + * means we'll end up on the same host, let's shortcut */ + if (streq(user_and_machine, "@.host")) + return true; + + /* Otherwise, if we are root, then we can also allow the ".host" syntax, as that's the user this + * would connect to. */ + uid_t uid = geteuid(); + + if (uid == 0 && STR_IN_SET(user_and_machine, ".host", "root@.host", "0@.host")) + return true; + + /* Otherwise, we have to figure out our user id and name, and compare things with that. */ + char buf[DECIMAL_STR_MAX(uid_t)]; + xsprintf(buf, UID_FMT, uid); + + f = startswith(user_and_machine, buf); + if (!f) { + un = getusername_malloc(); + if (!un) + return -ENOMEM; + + f = startswith(user_and_machine, un); + if (!f) + return false; + } + + return STR_IN_SET(f, "@", "@.host"); +} + +_public_ int sd_bus_open_system_machine(sd_bus **ret, const char *user_and_machine) { + _cleanup_(bus_freep) sd_bus *b = NULL; + int r; + + assert_return(user_and_machine, -EINVAL); + assert_return(ret, -EINVAL); + + if (user_and_machine_equivalent(user_and_machine)) + return sd_bus_open_system(ret); + + r = user_and_machine_valid(user_and_machine); + if (r < 0) + return r; + if (r == 0) + return -EINVAL; + + r = sd_bus_new(&b); + if (r < 0) + return r; + + r = bus_set_address_machine(b, RUNTIME_SCOPE_SYSTEM, user_and_machine); + if (r < 0) + return r; + + b->bus_client = true; + b->runtime_scope = RUNTIME_SCOPE_SYSTEM; + + r = sd_bus_start(b); + if (r < 0) + return r; + + *ret = TAKE_PTR(b); + return 0; +} + +_public_ int sd_bus_open_user_machine(sd_bus **ret, const char *user_and_machine) { + _cleanup_(bus_freep) sd_bus *b = NULL; + int r; + + assert_return(user_and_machine, -EINVAL); + assert_return(ret, -EINVAL); + + /* Shortcut things if we'd end up on this host and as the same user. */ + if (user_and_machine_equivalent(user_and_machine)) + return sd_bus_open_user(ret); + + r = user_and_machine_valid(user_and_machine); + if (r < 0) + return r; + if (r == 0) + return -EINVAL; + + r = sd_bus_new(&b); + if (r < 0) + return r; + + r = bus_set_address_machine(b, RUNTIME_SCOPE_USER, user_and_machine); + if (r < 0) + return r; + + b->bus_client = true; + b->trusted = true; + + r = sd_bus_start(b); + if (r < 0) + return r; + + *ret = TAKE_PTR(b); + return 0; +} + +_public_ void sd_bus_close(sd_bus *bus) { + if (!bus) + return; + if (bus->state == BUS_CLOSED) + return; + if (bus_origin_changed(bus)) + return; + + /* Don't leave ssh hanging around */ + bus_kill_exec(bus); + + bus_set_state(bus, BUS_CLOSED); + + sd_bus_detach_event(bus); + + /* Drop all queued messages so that they drop references to + * the bus object and the bus may be freed */ + bus_reset_queues(bus); + + bus_close_io_fds(bus); + bus_close_inotify_fd(bus); +} + +_public_ sd_bus *sd_bus_close_unref(sd_bus *bus) { + if (!bus) + return NULL; + if (bus_origin_changed(bus)) + return NULL; + + sd_bus_close(bus); + + return sd_bus_unref(bus); +} + +_public_ sd_bus* sd_bus_flush_close_unref(sd_bus *bus) { + if (!bus) + return NULL; + if (bus_origin_changed(bus)) + return NULL; + + /* Have to do this before flush() to prevent hang */ + bus_kill_exec(bus); + sd_bus_flush(bus); + + return sd_bus_close_unref(bus); +} + +void bus_enter_closing(sd_bus *bus) { + assert(bus); + + if (!IN_SET(bus->state, BUS_WATCH_BIND, BUS_OPENING, BUS_AUTHENTICATING, BUS_HELLO, BUS_RUNNING)) + return; + + bus_set_state(bus, BUS_CLOSING); +} + +/* Define manually so we can add the PID check */ +_public_ sd_bus *sd_bus_ref(sd_bus *bus) { + if (!bus) + return NULL; + if (bus_origin_changed(bus)) + return NULL; + + bus->n_ref++; + + return bus; +} + +_public_ sd_bus* sd_bus_unref(sd_bus *bus) { + if (!bus) + return NULL; + if (bus_origin_changed(bus)) + return NULL; + + assert(bus->n_ref > 0); + if (--bus->n_ref > 0) + return NULL; + + return bus_free(bus); +} + +_public_ int sd_bus_is_open(sd_bus *bus) { + if (!bus) + return 0; + + assert_return(bus = bus_resolve(bus), -ENOPKG); + assert_return(!bus_origin_changed(bus), -ECHILD); + + return BUS_IS_OPEN(bus->state); +} + +_public_ int sd_bus_is_ready(sd_bus *bus) { + if (!bus) + return 0; + + assert_return(bus = bus_resolve(bus), -ENOPKG); + assert_return(!bus_origin_changed(bus), -ECHILD); + + return bus->state == BUS_RUNNING; +} + +_public_ int sd_bus_can_send(sd_bus *bus, char type) { + int r; + + assert_return(bus, -EINVAL); + assert_return(bus = bus_resolve(bus), -ENOPKG); + assert_return(bus->state != BUS_UNSET, -ENOTCONN); + assert_return(!bus_origin_changed(bus), -ECHILD); + + if (bus->is_monitor) + return 0; + + if (type == SD_BUS_TYPE_UNIX_FD) { + if (!bus->accept_fd) + return 0; + + r = bus_ensure_running(bus); + if (r < 0) + return r; + + return bus->can_fds; + } + + return bus_type_is_valid(type); +} + +_public_ int sd_bus_get_bus_id(sd_bus *bus, sd_id128_t *id) { + int r; + + assert_return(bus, -EINVAL); + assert_return(bus = bus_resolve(bus), -ENOPKG); + assert_return(id, -EINVAL); + assert_return(!bus_origin_changed(bus), -ECHILD); + + r = bus_ensure_running(bus); + if (r < 0) + return r; + + *id = bus->server_id; + return 0; +} + +#define COOKIE_CYCLED (UINT32_C(1) << 31) + +static uint64_t cookie_inc(uint64_t cookie) { + + /* Stay within the 32-bit range, since classic D-Bus can't deal with more */ + if (cookie >= UINT32_MAX) + return COOKIE_CYCLED; /* Don't go back to zero, but use the highest bit for checking + * whether we are looping. */ + + return cookie + 1; +} + +static int next_cookie(sd_bus *b) { + uint64_t new_cookie; + + assert(b); + + new_cookie = cookie_inc(b->cookie); + + /* Small optimization: don't bother with checking for cookie reuse until we overran cookiespace at + * least once, but then do it thorougly. */ + if (FLAGS_SET(new_cookie, COOKIE_CYCLED)) { + uint32_t i; + + /* Check if the cookie is currently in use. If so, pick the next one */ + for (i = 0; i < COOKIE_CYCLED; i++) { + if (!ordered_hashmap_contains(b->reply_callbacks, &new_cookie)) + goto good; + + new_cookie = cookie_inc(new_cookie); + } + + /* Can't fulfill request */ + return -EBUSY; + } + +good: + b->cookie = new_cookie; + return 0; +} + +static int bus_seal_message(sd_bus *b, sd_bus_message *m, usec_t timeout) { + int r; + + assert(b); + assert(m); + + if (m->sealed) { + /* If we copy the same message to multiple + * destinations, avoid using the same cookie + * numbers. */ + b->cookie = MAX(b->cookie, BUS_MESSAGE_COOKIE(m)); + return 0; + } + + if (timeout == 0) { + r = sd_bus_get_method_call_timeout(b, &timeout); + if (r < 0) + return r; + } + + if (!m->sender && b->patch_sender) { + r = sd_bus_message_set_sender(m, b->patch_sender); + if (r < 0) + return r; + } + + r = next_cookie(b); + if (r < 0) + return r; + + return sd_bus_message_seal(m, b->cookie, timeout); +} + +static int bus_remarshal_message(sd_bus *b, sd_bus_message **m) { + bool remarshal = false; + + assert(b); + + /* wrong packet version */ + if (b->message_version != 0 && b->message_version != (*m)->header->version) + remarshal = true; + + /* wrong packet endianness */ + if (b->message_endian != 0 && b->message_endian != (*m)->header->endian) + remarshal = true; + + return remarshal ? bus_message_remarshal(b, m) : 0; +} + +int bus_seal_synthetic_message(sd_bus *b, sd_bus_message *m) { + assert(b); + assert(m); + + /* Fake some timestamps, if they were requested, and not + * already initialized */ + if (b->attach_timestamp) { + if (m->realtime <= 0) + m->realtime = now(CLOCK_REALTIME); + + if (m->monotonic <= 0) + m->monotonic = now(CLOCK_MONOTONIC); + } + + /* The bus specification says the serial number cannot be 0, + * hence let's fill something in for synthetic messages. Since + * synthetic messages might have a fake sender and we don't + * want to interfere with the real sender's serial numbers we + * pick a fixed, artificial one. */ + return sd_bus_message_seal(m, UINT32_MAX, 0); +} + +static int bus_write_message(sd_bus *bus, sd_bus_message *m, size_t *idx) { + int r; + + assert(bus); + assert(m); + + r = bus_socket_write_message(bus, m, idx); + if (r <= 0) + return r; + + if (*idx >= BUS_MESSAGE_SIZE(m)) + log_debug("Sent message type=%s sender=%s destination=%s path=%s interface=%s member=%s" + " cookie=%" PRIu64 " reply_cookie=%" PRIu64 + " signature=%s error-name=%s error-message=%s", + bus_message_type_to_string(m->header->type), + strna(sd_bus_message_get_sender(m)), + strna(sd_bus_message_get_destination(m)), + strna(sd_bus_message_get_path(m)), + strna(sd_bus_message_get_interface(m)), + strna(sd_bus_message_get_member(m)), + BUS_MESSAGE_COOKIE(m), + m->reply_cookie, + strna(m->root_container.signature), + strna(m->error.name), + strna(m->error.message)); + + return r; +} + +static int dispatch_wqueue(sd_bus *bus) { + int r, ret = 0; + + assert(bus); + assert(IN_SET(bus->state, BUS_RUNNING, BUS_HELLO)); + + while (bus->wqueue_size > 0) { + + r = bus_write_message(bus, bus->wqueue[0], &bus->windex); + if (r < 0) + return r; + else if (r == 0) + /* Didn't do anything this time */ + return ret; + else if (bus->windex >= BUS_MESSAGE_SIZE(bus->wqueue[0])) { + /* Fully written. Let's drop the entry from + * the queue. + * + * This isn't particularly optimized, but + * well, this is supposed to be our worst-case + * buffer only, and the socket buffer is + * supposed to be our primary buffer, and if + * it got full, then all bets are off + * anyway. */ + + bus->wqueue_size--; + bus_message_unref_queued(bus->wqueue[0], bus); + memmove(bus->wqueue, bus->wqueue + 1, sizeof(sd_bus_message*) * bus->wqueue_size); + bus->windex = 0; + + ret = 1; + } + } + + return ret; +} + +static int bus_read_message(sd_bus *bus) { + assert(bus); + + return bus_socket_read_message(bus); +} + +int bus_rqueue_make_room(sd_bus *bus) { + assert(bus); + + if (bus->rqueue_size >= BUS_RQUEUE_MAX) + return -ENOBUFS; + + if (!GREEDY_REALLOC(bus->rqueue, bus->rqueue_size + 1)) + return -ENOMEM; + + return 0; +} + +static void rqueue_drop_one(sd_bus *bus, size_t i) { + assert(bus); + assert(i < bus->rqueue_size); + + bus_message_unref_queued(bus->rqueue[i], bus); + memmove(bus->rqueue + i, bus->rqueue + i + 1, sizeof(sd_bus_message*) * (bus->rqueue_size - i - 1)); + bus->rqueue_size--; +} + +static int dispatch_rqueue(sd_bus *bus, sd_bus_message **m) { + int r, ret = 0; + + assert(bus); + assert(m); + assert(IN_SET(bus->state, BUS_RUNNING, BUS_HELLO)); + + for (;;) { + if (bus->rqueue_size > 0) { + /* Dispatch a queued message */ + *m = sd_bus_message_ref(bus->rqueue[0]); + rqueue_drop_one(bus, 0); + return 1; + } + + /* Try to read a new message */ + r = bus_read_message(bus); + if (r < 0) + return r; + if (r == 0) { + *m = NULL; + return ret; + } + + ret = 1; + } +} + +_public_ int sd_bus_send(sd_bus *bus, sd_bus_message *_m, uint64_t *cookie) { + _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = sd_bus_message_ref(_m); + int r; + + assert_return(m, -EINVAL); + + if (bus) + assert_return(bus = bus_resolve(bus), -ENOPKG); + else + assert_return(bus = m->bus, -ENOTCONN); + assert_return(!bus_origin_changed(bus), -ECHILD); + + if (!BUS_IS_OPEN(bus->state)) + return -ENOTCONN; + + if (m->n_fds > 0) { + r = sd_bus_can_send(bus, SD_BUS_TYPE_UNIX_FD); + if (r < 0) + return r; + if (r == 0) + return -EOPNOTSUPP; + } + + /* If the cookie number isn't kept, then we know that no reply + * is expected */ + if (!cookie && !m->sealed) + m->header->flags |= BUS_MESSAGE_NO_REPLY_EXPECTED; + + r = bus_seal_message(bus, m, 0); + if (r < 0) + return r; + + /* Remarshall if we have to. This will possibly unref the + * message and place a replacement in m */ + r = bus_remarshal_message(bus, &m); + if (r < 0) + return r; + + /* If this is a reply and no reply was requested, then let's + * suppress this, if we can */ + if (m->dont_send) + goto finish; + + if (IN_SET(bus->state, BUS_RUNNING, BUS_HELLO) && bus->wqueue_size <= 0) { + size_t idx = 0; + + r = bus_write_message(bus, m, &idx); + if (ERRNO_IS_NEG_DISCONNECT(r)) { + bus_enter_closing(bus); + return -ECONNRESET; + } else if (r < 0) + return r; + + if (idx < BUS_MESSAGE_SIZE(m)) { + /* Wasn't fully written. So let's remember how + * much was written. Note that the first entry + * of the wqueue array is always allocated so + * that we always can remember how much was + * written. */ + bus->wqueue[0] = bus_message_ref_queued(m, bus); + bus->wqueue_size = 1; + bus->windex = idx; + } + + } else { + /* Just append it to the queue. */ + + if (bus->wqueue_size >= BUS_WQUEUE_MAX) + return -ENOBUFS; + + if (!GREEDY_REALLOC(bus->wqueue, bus->wqueue_size + 1)) + return -ENOMEM; + + bus->wqueue[bus->wqueue_size++] = bus_message_ref_queued(m, bus); + } + +finish: + if (cookie) + *cookie = BUS_MESSAGE_COOKIE(m); + + return 1; +} + +_public_ int sd_bus_send_to(sd_bus *bus, sd_bus_message *m, const char *destination, uint64_t *cookie) { + int r; + + assert_return(m, -EINVAL); + + if (bus) + assert_return(bus = bus_resolve(bus), -ENOPKG); + else + assert_return(bus = m->bus, -ENOTCONN); + assert_return(!bus_origin_changed(bus), -ECHILD); + + if (!BUS_IS_OPEN(bus->state)) + return -ENOTCONN; + + if (!streq_ptr(m->destination, destination)) { + + if (!destination) + return -EEXIST; + + r = sd_bus_message_set_destination(m, destination); + if (r < 0) + return r; + } + + return sd_bus_send(bus, m, cookie); +} + +static usec_t calc_elapse(sd_bus *bus, uint64_t usec) { + assert(bus); + + assert_cc(sizeof(usec_t) == sizeof(uint64_t)); + + if (usec == USEC_INFINITY) + return 0; + + /* We start all timeouts the instant we enter BUS_HELLO/BUS_RUNNING state, so that the don't run in parallel + * with any connection setup states. Hence, if a method callback is started earlier than that we just store the + * relative timestamp, and afterwards the absolute one. */ + + if (IN_SET(bus->state, BUS_WATCH_BIND, BUS_OPENING, BUS_AUTHENTICATING)) + return usec; + else + return usec_add(now(CLOCK_MONOTONIC), usec); +} + +static int timeout_compare(const void *a, const void *b) { + const struct reply_callback *x = a, *y = b; + + if (x->timeout_usec != 0 && y->timeout_usec == 0) + return -1; + + if (x->timeout_usec == 0 && y->timeout_usec != 0) + return 1; + + return CMP(x->timeout_usec, y->timeout_usec); +} + +_public_ int sd_bus_call_async( + sd_bus *bus, + sd_bus_slot **slot, + sd_bus_message *_m, + sd_bus_message_handler_t callback, + void *userdata, + uint64_t usec) { + + _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = sd_bus_message_ref(_m); + _cleanup_(sd_bus_slot_unrefp) sd_bus_slot *s = NULL; + int r; + + assert_return(m, -EINVAL); + assert_return(m->header->type == SD_BUS_MESSAGE_METHOD_CALL, -EINVAL); + assert_return(!m->sealed || (!!callback == !(m->header->flags & BUS_MESSAGE_NO_REPLY_EXPECTED)), -EINVAL); + + if (bus) + assert_return(bus = bus_resolve(bus), -ENOPKG); + else + assert_return(bus = m->bus, -ENOTCONN); + assert_return(!bus_origin_changed(bus), -ECHILD); + + if (!BUS_IS_OPEN(bus->state)) + return -ENOTCONN; + + /* If no callback is specified and there's no interest in a slot, then there's no reason to ask for a reply */ + if (!callback && !slot && !m->sealed) + m->header->flags |= BUS_MESSAGE_NO_REPLY_EXPECTED; + + r = ordered_hashmap_ensure_allocated(&bus->reply_callbacks, &uint64_hash_ops); + if (r < 0) + return r; + + r = prioq_ensure_allocated(&bus->reply_callbacks_prioq, timeout_compare); + if (r < 0) + return r; + + r = bus_seal_message(bus, m, usec); + if (r < 0) + return r; + + r = bus_remarshal_message(bus, &m); + if (r < 0) + return r; + + if (slot || callback) { + s = bus_slot_allocate(bus, !slot, BUS_REPLY_CALLBACK, sizeof(struct reply_callback), userdata); + if (!s) + return -ENOMEM; + + s->reply_callback.callback = callback; + + s->reply_callback.cookie = BUS_MESSAGE_COOKIE(m); + r = ordered_hashmap_put(bus->reply_callbacks, &s->reply_callback.cookie, &s->reply_callback); + if (r < 0) { + s->reply_callback.cookie = 0; + return r; + } + + s->reply_callback.timeout_usec = calc_elapse(bus, m->timeout); + if (s->reply_callback.timeout_usec != 0) { + r = prioq_put(bus->reply_callbacks_prioq, &s->reply_callback, &s->reply_callback.prioq_idx); + if (r < 0) { + s->reply_callback.timeout_usec = 0; + return r; + } + } + } + + r = sd_bus_send(bus, m, s ? &s->reply_callback.cookie : NULL); + if (r < 0) + return r; + + if (slot) + *slot = s; + s = NULL; + + return r; +} + +int bus_ensure_running(sd_bus *bus) { + int r; + + assert(bus); + + if (bus->state == BUS_RUNNING) + return 1; + + for (;;) { + if (IN_SET(bus->state, BUS_UNSET, BUS_CLOSED, BUS_CLOSING)) + return -ENOTCONN; + + r = sd_bus_process(bus, NULL); + if (r < 0) + return r; + if (bus->state == BUS_RUNNING) + return 1; + if (r > 0) + continue; + + r = sd_bus_wait(bus, UINT64_MAX); + if (r < 0) + return r; + } +} + +_public_ int sd_bus_call( + sd_bus *bus, + sd_bus_message *_m, + uint64_t usec, + sd_bus_error *error, + sd_bus_message **reply) { + + _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = sd_bus_message_ref(_m); + usec_t timeout; + uint64_t cookie; + size_t i; + int r; + + bus_assert_return(m, -EINVAL, error); + bus_assert_return(m->header->type == SD_BUS_MESSAGE_METHOD_CALL, -EINVAL, error); + bus_assert_return(!(m->header->flags & BUS_MESSAGE_NO_REPLY_EXPECTED), -EINVAL, error); + bus_assert_return(!bus_error_is_dirty(error), -EINVAL, error); + + if (bus) + assert_return(bus = bus_resolve(bus), -ENOPKG); + else + assert_return(bus = m->bus, -ENOTCONN); + bus_assert_return(!bus_origin_changed(bus), -ECHILD, error); + + if (!BUS_IS_OPEN(bus->state)) { + r = -ENOTCONN; + goto fail; + } + + r = bus_ensure_running(bus); + if (r < 0) + goto fail; + + i = bus->rqueue_size; + + r = bus_seal_message(bus, m, usec); + if (r < 0) + goto fail; + + r = bus_remarshal_message(bus, &m); + if (r < 0) + goto fail; + + r = sd_bus_send(bus, m, &cookie); + if (r < 0) + goto fail; + + timeout = calc_elapse(bus, m->timeout); + + for (;;) { + usec_t left; + + while (i < bus->rqueue_size) { + _cleanup_(sd_bus_message_unrefp) sd_bus_message *incoming = NULL; + + incoming = sd_bus_message_ref(bus->rqueue[i]); + + if (incoming->reply_cookie == cookie) { + /* Found a match! */ + + rqueue_drop_one(bus, i); + log_debug_bus_message(incoming); + + if (incoming->header->type == SD_BUS_MESSAGE_METHOD_RETURN) { + + if (incoming->n_fds <= 0 || bus->accept_fd) { + if (reply) + *reply = TAKE_PTR(incoming); + + return 1; + } + + return sd_bus_error_set(error, SD_BUS_ERROR_INCONSISTENT_MESSAGE, + "Reply message contained file descriptors which I couldn't accept. Sorry."); + + } else if (incoming->header->type == SD_BUS_MESSAGE_METHOD_ERROR) + return sd_bus_error_copy(error, &incoming->error); + else { + r = -EIO; + goto fail; + } + + } else if (BUS_MESSAGE_COOKIE(incoming) == cookie && + bus->unique_name && + incoming->sender && + streq(bus->unique_name, incoming->sender)) { + + rqueue_drop_one(bus, i); + + /* Our own message? Somebody is trying to send its own client a message, + * let's not dead-lock, let's fail immediately. */ + + r = -ELOOP; + goto fail; + } + + /* Try to read more, right-away */ + i++; + } + + r = bus_read_message(bus); + if (r < 0) { + if (ERRNO_IS_DISCONNECT(r)) { + bus_enter_closing(bus); + r = -ECONNRESET; + } + + goto fail; + } + if (r > 0) + continue; + + if (timeout > 0) { + usec_t n; + + n = now(CLOCK_MONOTONIC); + if (n >= timeout) { + r = -ETIMEDOUT; + goto fail; + } + + left = timeout - n; + } else + left = UINT64_MAX; + + r = bus_poll(bus, true, left); + if (ERRNO_IS_NEG_TRANSIENT(r)) + continue; + if (r < 0) + goto fail; + if (r == 0) { + r = -ETIMEDOUT; + goto fail; + } + + r = dispatch_wqueue(bus); + if (r < 0) { + if (ERRNO_IS_DISCONNECT(r)) { + bus_enter_closing(bus); + r = -ECONNRESET; + } + + goto fail; + } + } + +fail: + return sd_bus_error_set_errno(error, r); +} + +_public_ int sd_bus_get_fd(sd_bus *bus) { + assert_return(bus, -EINVAL); + assert_return(bus = bus_resolve(bus), -ENOPKG); + assert_return(bus->input_fd == bus->output_fd, -EPERM); + assert_return(!bus_origin_changed(bus), -ECHILD); + + if (bus->state == BUS_CLOSED) + return -ENOTCONN; + + if (bus->inotify_fd >= 0) + return bus->inotify_fd; + + if (bus->input_fd >= 0) + return bus->input_fd; + + return -ENOTCONN; +} + +_public_ int sd_bus_get_events(sd_bus *bus) { + int flags = 0; + + assert_return(bus, -EINVAL); + assert_return(bus = bus_resolve(bus), -ENOPKG); + assert_return(!bus_origin_changed(bus), -ECHILD); + + switch (bus->state) { + + case BUS_UNSET: + case BUS_CLOSED: + return -ENOTCONN; + + case BUS_WATCH_BIND: + flags |= POLLIN; + break; + + case BUS_OPENING: + flags |= POLLOUT; + break; + + case BUS_AUTHENTICATING: + if (bus_socket_auth_needs_write(bus)) + flags |= POLLOUT; + + flags |= POLLIN; + break; + + case BUS_RUNNING: + case BUS_HELLO: + if (bus->rqueue_size <= 0) + flags |= POLLIN; + if (bus->wqueue_size > 0) + flags |= POLLOUT; + break; + + case BUS_CLOSING: + break; + + default: + assert_not_reached(); + } + + return flags; +} + +_public_ int sd_bus_get_timeout(sd_bus *bus, uint64_t *timeout_usec) { + struct reply_callback *c; + + assert_return(bus, -EINVAL); + assert_return(bus = bus_resolve(bus), -ENOPKG); + assert_return(timeout_usec, -EINVAL); + assert_return(!bus_origin_changed(bus), -ECHILD); + + if (!BUS_IS_OPEN(bus->state) && bus->state != BUS_CLOSING) + return -ENOTCONN; + + if (bus->track_queue) { + *timeout_usec = 0; + return 1; + } + + switch (bus->state) { + + case BUS_AUTHENTICATING: + *timeout_usec = bus->auth_timeout; + return 1; + + case BUS_RUNNING: + case BUS_HELLO: + if (bus->rqueue_size > 0) { + *timeout_usec = 0; + return 1; + } + + c = prioq_peek(bus->reply_callbacks_prioq); + if (!c) { + *timeout_usec = UINT64_MAX; + return 0; + } + + if (c->timeout_usec == 0) { + *timeout_usec = UINT64_MAX; + return 0; + } + + *timeout_usec = c->timeout_usec; + return 1; + + case BUS_CLOSING: + *timeout_usec = 0; + return 1; + + case BUS_WATCH_BIND: + case BUS_OPENING: + *timeout_usec = UINT64_MAX; + return 0; + + default: + assert_not_reached(); + } +} + +static int process_timeout(sd_bus *bus) { + _cleanup_(sd_bus_error_free) sd_bus_error error_buffer = SD_BUS_ERROR_NULL; + _cleanup_(sd_bus_message_unrefp) sd_bus_message* m = NULL; + struct reply_callback *c; + sd_bus_slot *slot; + bool is_hello; + usec_t n; + int r; + + assert(bus); + assert(IN_SET(bus->state, BUS_RUNNING, BUS_HELLO)); + + c = prioq_peek(bus->reply_callbacks_prioq); + if (!c) + return 0; + + n = now(CLOCK_MONOTONIC); + if (c->timeout_usec > n) + return 0; + + r = bus_message_new_synthetic_error( + bus, + c->cookie, + &SD_BUS_ERROR_MAKE_CONST(SD_BUS_ERROR_NO_REPLY, "Method call timed out"), + &m); + if (r < 0) + return r; + + m->read_counter = ++bus->read_counter; + + r = bus_seal_synthetic_message(bus, m); + if (r < 0) + return r; + + assert_se(prioq_pop(bus->reply_callbacks_prioq) == c); + c->timeout_usec = 0; + + ordered_hashmap_remove(bus->reply_callbacks, &c->cookie); + c->cookie = 0; + + slot = container_of(c, sd_bus_slot, reply_callback); + + bus->iteration_counter++; + + is_hello = bus->state == BUS_HELLO && c->callback == hello_callback; + + bus->current_message = m; + bus->current_slot = sd_bus_slot_ref(slot); + bus->current_handler = c->callback; + bus->current_userdata = slot->userdata; + r = c->callback(m, slot->userdata, &error_buffer); + bus->current_userdata = NULL; + bus->current_handler = NULL; + bus->current_slot = NULL; + bus->current_message = NULL; + + if (slot->floating) + bus_slot_disconnect(slot, true); + + sd_bus_slot_unref(slot); + + /* When this is the hello message and it timed out, then make sure to propagate the error up, don't just log + * and ignore the callback handler's return value. */ + if (is_hello) + return r; + + return bus_maybe_reply_error(m, r, &error_buffer); +} + +static int process_hello(sd_bus *bus, sd_bus_message *m) { + assert(bus); + assert(m); + + if (bus->state != BUS_HELLO) + return 0; + + /* Let's make sure the first message on the bus is the HELLO + * reply. But note that we don't actually parse the message + * here (we leave that to the usual handling), we just verify + * we don't let any earlier msg through. */ + + if (!IN_SET(m->header->type, SD_BUS_MESSAGE_METHOD_RETURN, SD_BUS_MESSAGE_METHOD_ERROR)) + return -EIO; + + if (m->reply_cookie != 1) + return -EIO; + + return 0; +} + +static int process_reply(sd_bus *bus, sd_bus_message *m) { + _cleanup_(sd_bus_message_unrefp) sd_bus_message *synthetic_reply = NULL; + _cleanup_(sd_bus_error_free) sd_bus_error error_buffer = SD_BUS_ERROR_NULL; + struct reply_callback *c; + sd_bus_slot *slot; + bool is_hello; + int r; + + assert(bus); + assert(m); + + if (!IN_SET(m->header->type, SD_BUS_MESSAGE_METHOD_RETURN, SD_BUS_MESSAGE_METHOD_ERROR)) + return 0; + + if (m->destination && bus->unique_name && !streq_ptr(m->destination, bus->unique_name)) + return 0; + + c = ordered_hashmap_remove(bus->reply_callbacks, &m->reply_cookie); + if (!c) + return 0; + + c->cookie = 0; + + slot = container_of(c, sd_bus_slot, reply_callback); + + if (m->n_fds > 0 && !bus->accept_fd) { + + /* If the reply contained a file descriptor which we + * didn't want we pass an error instead. */ + + r = bus_message_new_synthetic_error( + bus, + m->reply_cookie, + &SD_BUS_ERROR_MAKE_CONST(SD_BUS_ERROR_INCONSISTENT_MESSAGE, "Reply message contained file descriptor"), + &synthetic_reply); + if (r < 0) + return r; + + /* Copy over original timestamp */ + synthetic_reply->realtime = m->realtime; + synthetic_reply->monotonic = m->monotonic; + synthetic_reply->seqnum = m->seqnum; + synthetic_reply->read_counter = m->read_counter; + + r = bus_seal_synthetic_message(bus, synthetic_reply); + if (r < 0) + return r; + + m = synthetic_reply; + } else { + r = sd_bus_message_rewind(m, true); + if (r < 0) + return r; + } + + if (c->timeout_usec != 0) { + prioq_remove(bus->reply_callbacks_prioq, c, &c->prioq_idx); + c->timeout_usec = 0; + } + + is_hello = bus->state == BUS_HELLO && c->callback == hello_callback; + + bus->current_slot = sd_bus_slot_ref(slot); + bus->current_handler = c->callback; + bus->current_userdata = slot->userdata; + r = c->callback(m, slot->userdata, &error_buffer); + bus->current_userdata = NULL; + bus->current_handler = NULL; + bus->current_slot = NULL; + + if (slot->floating) + bus_slot_disconnect(slot, true); + + sd_bus_slot_unref(slot); + + /* When this is the hello message and it failed, then make sure to propagate the error up, don't just log and + * ignore the callback handler's return value. */ + if (is_hello) + return r; + + return bus_maybe_reply_error(m, r, &error_buffer); +} + +static int process_filter(sd_bus *bus, sd_bus_message *m) { + _cleanup_(sd_bus_error_free) sd_bus_error error_buffer = SD_BUS_ERROR_NULL; + int r; + + assert(bus); + assert(m); + + do { + bus->filter_callbacks_modified = false; + + LIST_FOREACH(callbacks, l, bus->filter_callbacks) { + sd_bus_slot *slot; + + if (bus->filter_callbacks_modified) + break; + + /* Don't run this more than once per iteration */ + if (l->last_iteration == bus->iteration_counter) + continue; + + l->last_iteration = bus->iteration_counter; + + r = sd_bus_message_rewind(m, true); + if (r < 0) + return r; + + slot = container_of(l, sd_bus_slot, filter_callback); + + bus->current_slot = sd_bus_slot_ref(slot); + bus->current_handler = l->callback; + bus->current_userdata = slot->userdata; + r = l->callback(m, slot->userdata, &error_buffer); + bus->current_userdata = NULL; + bus->current_handler = NULL; + bus->current_slot = sd_bus_slot_unref(slot); + + r = bus_maybe_reply_error(m, r, &error_buffer); + if (r != 0) + return r; + + } + + } while (bus->filter_callbacks_modified); + + return 0; +} + +static int process_match(sd_bus *bus, sd_bus_message *m) { + int r; + + assert(bus); + assert(m); + + do { + bus->match_callbacks_modified = false; + + r = bus_match_run(bus, &bus->match_callbacks, m); + if (r != 0) + return r; + + } while (bus->match_callbacks_modified); + + return 0; +} + +static int process_builtin(sd_bus *bus, sd_bus_message *m) { + _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL; + int r; + + assert(bus); + assert(m); + + if (bus->is_monitor) + return 0; + + if (bus->manual_peer_interface) + return 0; + + if (m->header->type != SD_BUS_MESSAGE_METHOD_CALL) + return 0; + + if (!streq_ptr(m->interface, "org.freedesktop.DBus.Peer")) + return 0; + + if (m->header->flags & BUS_MESSAGE_NO_REPLY_EXPECTED) + return 1; + + if (streq_ptr(m->member, "Ping")) + r = sd_bus_message_new_method_return(m, &reply); + else if (streq_ptr(m->member, "GetMachineId")) { + sd_id128_t id; + + r = sd_id128_get_machine(&id); + if (r < 0) + return r; + + r = sd_bus_message_new_method_return(m, &reply); + if (r < 0) + return r; + + r = sd_bus_message_append(reply, "s", SD_ID128_TO_STRING(id)); + } else { + r = sd_bus_message_new_method_errorf( + m, &reply, + SD_BUS_ERROR_UNKNOWN_METHOD, + "Unknown method '%s' on interface '%s'.", m->member, m->interface); + } + if (r < 0) + return r; + + r = sd_bus_send(bus, reply, NULL); + if (r < 0) + return r; + + return 1; +} + +static int process_fd_check(sd_bus *bus, sd_bus_message *m) { + assert(bus); + assert(m); + + /* If we got a message with a file descriptor which we didn't + * want to accept, then let's drop it. How can this even + * happen? For example, when the kernel queues a message into + * an activatable names's queue which allows fds, and then is + * delivered to us later even though we ourselves did not + * negotiate it. */ + + if (bus->is_monitor) + return 0; + + if (m->n_fds <= 0) + return 0; + + if (bus->accept_fd) + return 0; + + if (m->header->type != SD_BUS_MESSAGE_METHOD_CALL) + return 1; /* just eat it up */ + + return sd_bus_reply_method_errorf(m, SD_BUS_ERROR_INCONSISTENT_MESSAGE, + "Message contains file descriptors, which I cannot accept. Sorry."); +} + +static int process_message(sd_bus *bus, sd_bus_message *m) { + _unused_ _cleanup_(log_context_unrefp) LogContext *c = NULL; + int r; + + assert(bus); + assert(m); + + bus->current_message = m; + bus->iteration_counter++; + + if (log_context_enabled()) + c = log_context_new_strv_consume(bus_message_make_log_fields(m)); + + log_debug_bus_message(m); + + r = process_hello(bus, m); + if (r != 0) + goto finish; + + r = process_reply(bus, m); + if (r != 0) + goto finish; + + r = process_fd_check(bus, m); + if (r != 0) + goto finish; + + r = process_filter(bus, m); + if (r != 0) + goto finish; + + r = process_match(bus, m); + if (r != 0) + goto finish; + + r = process_builtin(bus, m); + if (r != 0) + goto finish; + + r = bus_process_object(bus, m); + +finish: + bus->current_message = NULL; + return r; +} + +static int dispatch_track(sd_bus *bus) { + assert(bus); + + if (!bus->track_queue) + return 0; + + bus_track_dispatch(bus->track_queue); + return 1; +} + +static int process_running(sd_bus *bus, sd_bus_message **ret) { + _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL; + int r; + + assert(bus); + assert(IN_SET(bus->state, BUS_RUNNING, BUS_HELLO)); + + r = process_timeout(bus); + if (r != 0) + goto null_message; + + r = dispatch_wqueue(bus); + if (r != 0) + goto null_message; + + r = dispatch_track(bus); + if (r != 0) + goto null_message; + + r = dispatch_rqueue(bus, &m); + if (r < 0) + return r; + if (!m) + goto null_message; + + r = process_message(bus, m); + if (r != 0) + goto null_message; + + if (ret) { + r = sd_bus_message_rewind(m, true); + if (r < 0) + return r; + + *ret = TAKE_PTR(m); + return 1; + } + + if (m->header->type == SD_BUS_MESSAGE_METHOD_CALL) { + + log_debug("Unprocessed message call sender=%s object=%s interface=%s member=%s", + strna(sd_bus_message_get_sender(m)), + strna(sd_bus_message_get_path(m)), + strna(sd_bus_message_get_interface(m)), + strna(sd_bus_message_get_member(m))); + + r = sd_bus_reply_method_errorf( + m, + SD_BUS_ERROR_UNKNOWN_OBJECT, + "Unknown object '%s'.", m->path); + if (r < 0) + return r; + } + + return 1; + +null_message: + if (r >= 0 && ret) + *ret = NULL; + + return r; +} + +static int bus_exit_now(sd_bus *bus, sd_event *event) { + assert(bus); + + /* Exit due to close, if this is requested. If this is bus object is attached to an event source, invokes + * sd_event_exit(), otherwise invokes libc exit(). */ + + if (bus->exited) /* did we already exit? */ + return 0; + if (!bus->exit_triggered) /* was the exit condition triggered? */ + return 0; + if (!bus->exit_on_disconnect) /* Shall we actually exit on disconnection? */ + return 0; + + bus->exited = true; /* never exit more than once */ + + log_debug("Bus connection disconnected, exiting."); + + if (!event) + event = bus->event; + + if (event) + return sd_event_exit(event, EXIT_FAILURE); + else + exit(EXIT_FAILURE); + + assert_not_reached(); +} + +static int process_closing_reply_callback(sd_bus *bus, struct reply_callback *c) { + _cleanup_(sd_bus_error_free) sd_bus_error error_buffer = SD_BUS_ERROR_NULL; + _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL; + sd_bus_slot *slot; + int r; + + assert(bus); + assert(c); + + r = bus_message_new_synthetic_error( + bus, + c->cookie, + &SD_BUS_ERROR_MAKE_CONST(SD_BUS_ERROR_NO_REPLY, "Connection terminated"), + &m); + if (r < 0) + return r; + + m->read_counter = ++bus->read_counter; + + r = bus_seal_synthetic_message(bus, m); + if (r < 0) + return r; + + if (c->timeout_usec != 0) { + prioq_remove(bus->reply_callbacks_prioq, c, &c->prioq_idx); + c->timeout_usec = 0; + } + + ordered_hashmap_remove(bus->reply_callbacks, &c->cookie); + c->cookie = 0; + + slot = container_of(c, sd_bus_slot, reply_callback); + + bus->iteration_counter++; + + bus->current_message = m; + bus->current_slot = sd_bus_slot_ref(slot); + bus->current_handler = c->callback; + bus->current_userdata = slot->userdata; + r = c->callback(m, slot->userdata, &error_buffer); + bus->current_userdata = NULL; + bus->current_handler = NULL; + bus->current_slot = NULL; + bus->current_message = NULL; + + if (slot->floating) + bus_slot_disconnect(slot, true); + + sd_bus_slot_unref(slot); + + return bus_maybe_reply_error(m, r, &error_buffer); +} + +static int process_closing(sd_bus *bus, sd_bus_message **ret) { + _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL; + _cleanup_(sd_event_unrefp) sd_event *event = NULL; + struct reply_callback *c; + int r; + + assert(bus); + assert(bus->state == BUS_CLOSING); + + /* First, fail all outstanding method calls */ + c = ordered_hashmap_first(bus->reply_callbacks); + if (c) + return process_closing_reply_callback(bus, c); + + /* Then, fake-drop all remaining bus tracking references */ + if (bus->tracks) { + bus_track_close(bus->tracks); + return 1; + } + + /* Then, synthesize a Disconnected message */ + r = sd_bus_message_new_signal( + bus, + &m, + "/org/freedesktop/DBus/Local", + "org.freedesktop.DBus.Local", + "Disconnected"); + if (r < 0) + return r; + + bus_message_set_sender_local(bus, m); + m->read_counter = ++bus->read_counter; + + r = bus_seal_synthetic_message(bus, m); + if (r < 0) + return r; + + /* sd_bus_close() will deref the event and set bus->event to NULL. But in bus_exit_now() we use + * bus->event to decide whether to return from the event loop or exit(), but given it's always NULL + * at that point, it always exit(). Ref it here and pass it through further down to avoid that. */ + event = sd_event_ref(bus->event); + sd_bus_close(bus); + + bus->current_message = m; + bus->iteration_counter++; + + r = process_filter(bus, m); + if (r != 0) + goto finish; + + r = process_match(bus, m); + if (r != 0) + goto finish; + + /* Nothing else to do, exit now, if the condition holds */ + bus->exit_triggered = true; + (void) bus_exit_now(bus, event); + + if (ret) + *ret = TAKE_PTR(m); + + r = 1; + +finish: + bus->current_message = NULL; + + return r; +} + +static int bus_process_internal(sd_bus *bus, sd_bus_message **ret) { + int r; + + /* Returns 0 when we didn't do anything. This should cause the + * caller to invoke sd_bus_wait() before returning the next + * time. Returns > 0 when we did something, which possibly + * means *ret is filled in with an unprocessed message. */ + + assert_return(bus, -EINVAL); + assert_return(bus = bus_resolve(bus), -ENOPKG); + assert_return(!bus_origin_changed(bus), -ECHILD); + + /* We don't allow recursively invoking sd_bus_process(). */ + assert_return(!bus->current_message, -EBUSY); + assert(!bus->current_slot); /* This should be NULL whenever bus->current_message is */ + + BUS_DONT_DESTROY(bus); + + switch (bus->state) { + + case BUS_UNSET: + return -ENOTCONN; + + case BUS_CLOSED: + return -ECONNRESET; + + case BUS_WATCH_BIND: + r = bus_socket_process_watch_bind(bus); + break; + + case BUS_OPENING: + r = bus_socket_process_opening(bus); + break; + + case BUS_AUTHENTICATING: + r = bus_socket_process_authenticating(bus); + break; + + case BUS_RUNNING: + case BUS_HELLO: + r = process_running(bus, ret); + if (r >= 0) + return r; + + /* This branch initializes *ret, hence we don't use the generic error checking below */ + break; + + case BUS_CLOSING: + return process_closing(bus, ret); + + default: + assert_not_reached(); + } + + if (ERRNO_IS_NEG_DISCONNECT(r)) { + bus_enter_closing(bus); + r = 1; + } else if (r < 0) + return r; + + if (ret) + *ret = NULL; + + return r; +} + +_public_ int sd_bus_process(sd_bus *bus, sd_bus_message **ret) { + return bus_process_internal(bus, ret); +} + +_public_ int sd_bus_process_priority(sd_bus *bus, int64_t priority, sd_bus_message **ret) { + return bus_process_internal(bus, ret); +} + +static int bus_poll(sd_bus *bus, bool need_more, uint64_t timeout_usec) { + struct pollfd p[2] = {}; + usec_t m = USEC_INFINITY; + int r, n; + + assert(bus); + + if (bus->state == BUS_CLOSING) + return 1; + + if (!BUS_IS_OPEN(bus->state)) + return -ENOTCONN; + + if (bus->state == BUS_WATCH_BIND) { + assert(bus->inotify_fd >= 0); + + p[0].events = POLLIN; + p[0].fd = bus->inotify_fd; + n = 1; + } else { + int e; + + e = sd_bus_get_events(bus); + if (e < 0) + return e; + + if (need_more) + /* The caller really needs some more data, they don't + * care about what's already read, or any timeouts + * except its own. */ + e |= POLLIN; + else { + usec_t until; + /* The caller wants to process if there's something to + * process, but doesn't care otherwise */ + + r = sd_bus_get_timeout(bus, &until); + if (r < 0) + return r; + if (r > 0) + m = usec_sub_unsigned(until, now(CLOCK_MONOTONIC)); + } + + p[0].fd = bus->input_fd; + if (bus->output_fd == bus->input_fd) { + p[0].events = e; + n = 1; + } else { + p[0].events = e & POLLIN; + p[1].fd = bus->output_fd; + p[1].events = e & POLLOUT; + n = 2; + } + } + + if (timeout_usec != UINT64_MAX && (m == USEC_INFINITY || timeout_usec < m)) + m = timeout_usec; + + r = ppoll_usec(p, n, m); + if (r <= 0) + return r; + + return 1; +} + +_public_ int sd_bus_wait(sd_bus *bus, uint64_t timeout_usec) { + int r; + + assert_return(bus, -EINVAL); + assert_return(bus = bus_resolve(bus), -ENOPKG); + assert_return(!bus_origin_changed(bus), -ECHILD); + + if (bus->state == BUS_CLOSING) + return 0; + + if (!BUS_IS_OPEN(bus->state)) + return -ENOTCONN; + + if (bus->rqueue_size > 0) + return 0; + + r = bus_poll(bus, false, timeout_usec); + if (ERRNO_IS_NEG_TRANSIENT(r)) + return 1; /* treat EINTR as success, but let's exit, so that the caller will call back into us soon. */ + + return r; +} + +_public_ int sd_bus_flush(sd_bus *bus) { + int r; + + assert_return(bus, -EINVAL); + assert_return(bus = bus_resolve(bus), -ENOPKG); + assert_return(!bus_origin_changed(bus), -ECHILD); + + if (bus->state == BUS_CLOSING) + return 0; + + if (!BUS_IS_OPEN(bus->state)) + return -ENOTCONN; + + /* We never were connected? Don't hang in inotify for good, as there's no timeout set for it */ + if (bus->state == BUS_WATCH_BIND) + return -EUNATCH; + + r = bus_ensure_running(bus); + if (r < 0) + return r; + + if (bus->wqueue_size <= 0) + return 0; + + for (;;) { + r = dispatch_wqueue(bus); + if (ERRNO_IS_NEG_DISCONNECT(r)) { + bus_enter_closing(bus); + return -ECONNRESET; + } else if (r < 0) + return r; + + if (bus->wqueue_size <= 0) + return 0; + + r = bus_poll(bus, false, UINT64_MAX); + if (ERRNO_IS_NEG_TRANSIENT(r)) + continue; + if (r < 0) + return r; + } +} + +_public_ int sd_bus_add_filter( + sd_bus *bus, + sd_bus_slot **slot, + sd_bus_message_handler_t callback, + void *userdata) { + + sd_bus_slot *s; + + assert_return(bus, -EINVAL); + assert_return(bus = bus_resolve(bus), -ENOPKG); + assert_return(callback, -EINVAL); + assert_return(!bus_origin_changed(bus), -ECHILD); + + s = bus_slot_allocate(bus, !slot, BUS_FILTER_CALLBACK, sizeof(struct filter_callback), userdata); + if (!s) + return -ENOMEM; + + s->filter_callback.callback = callback; + + bus->filter_callbacks_modified = true; + LIST_PREPEND(callbacks, bus->filter_callbacks, &s->filter_callback); + + if (slot) + *slot = s; + + return 0; +} + +static int add_match_callback( + sd_bus_message *m, + void *userdata, + sd_bus_error *ret_error) { + + sd_bus_slot *match_slot = ASSERT_PTR(userdata); + bool failed = false; + int r; + + assert(m); + + sd_bus_slot_ref(match_slot); + + if (sd_bus_message_is_method_error(m, NULL)) { + log_debug_errno(sd_bus_message_get_errno(m), + "Unable to add match %s, failing connection: %s", + match_slot->match_callback.match_string, + sd_bus_message_get_error(m)->message); + + failed = true; + } else + log_debug("Match %s successfully installed.", match_slot->match_callback.match_string); + + if (match_slot->match_callback.install_callback) { + sd_bus *bus; + + bus = sd_bus_message_get_bus(m); + + /* This function has been called as slot handler, and we want to call another slot handler. Let's + * update the slot callback metadata temporarily with our own data, and then revert back to the old + * values. */ + + assert(bus->current_slot == match_slot->match_callback.install_slot); + assert(bus->current_handler == add_match_callback); + assert(bus->current_userdata == userdata); + + bus->current_slot = match_slot; + bus->current_handler = match_slot->match_callback.install_callback; + bus->current_userdata = match_slot->userdata; + + r = match_slot->match_callback.install_callback(m, match_slot->userdata, ret_error); + + bus->current_slot = match_slot->match_callback.install_slot; + bus->current_handler = add_match_callback; + bus->current_userdata = userdata; + } else { + if (failed) /* Generic failure handling: destroy the connection */ + bus_enter_closing(sd_bus_message_get_bus(m)); + + r = 1; + } + + /* We don't need the install method reply slot anymore, let's free it */ + match_slot->match_callback.install_slot = sd_bus_slot_unref(match_slot->match_callback.install_slot); + + if (failed && match_slot->floating) + bus_slot_disconnect(match_slot, true); + + sd_bus_slot_unref(match_slot); + + return r; +} + +int bus_add_match_full( + sd_bus *bus, + sd_bus_slot **slot, + bool asynchronous, + const char *match, + sd_bus_message_handler_t callback, + sd_bus_message_handler_t install_callback, + void *userdata, + uint64_t timeout_usec) { + + struct bus_match_component *components = NULL; + size_t n_components = 0; + _cleanup_(sd_bus_slot_unrefp) sd_bus_slot *s = NULL; + int r; + + assert_return(bus, -EINVAL); + assert_return(bus = bus_resolve(bus), -ENOPKG); + assert_return(match, -EINVAL); + assert_return(!bus_origin_changed(bus), -ECHILD); + + CLEANUP_ARRAY(components, n_components, bus_match_parse_free); + + r = bus_match_parse(match, &components, &n_components); + if (r < 0) + return r; + + s = bus_slot_allocate(bus, !slot, BUS_MATCH_CALLBACK, sizeof(struct match_callback), userdata); + if (!s) + return -ENOMEM; + + s->match_callback.callback = callback; + s->match_callback.install_callback = install_callback; + + if (bus->bus_client) { + enum bus_match_scope scope; + + scope = bus_match_get_scope(components, n_components); + + /* Do not install server-side matches for matches against the local service, interface or bus path. */ + if (scope != BUS_MATCH_LOCAL) { + + /* We store the original match string, so that we can use it to remove the match again. */ + + s->match_callback.match_string = strdup(match); + if (!s->match_callback.match_string) + return -ENOMEM; + + if (asynchronous) { + r = bus_add_match_internal_async(bus, + &s->match_callback.install_slot, + s->match_callback.match_string, + add_match_callback, + s, + timeout_usec); + + if (r < 0) + return r; + + /* Make the slot of the match call floating now. We need the reference, but we don't + * want that this match pins the bus object, hence we first create it non-floating, but + * then make it floating. */ + r = sd_bus_slot_set_floating(s->match_callback.install_slot, true); + } else + r = bus_add_match_internal(bus, + s->match_callback.match_string, + timeout_usec, + &s->match_callback.after); + if (r < 0) + return r; + + s->match_added = true; + } + } + + bus->match_callbacks_modified = true; + r = bus_match_add(&bus->match_callbacks, components, n_components, &s->match_callback); + if (r < 0) + return r; + + if (slot) + *slot = s; + s = NULL; + + return 0; +} + +_public_ int sd_bus_add_match( + sd_bus *bus, + sd_bus_slot **slot, + const char *match, + sd_bus_message_handler_t callback, + void *userdata) { + + return bus_add_match_full(bus, slot, false, match, callback, NULL, userdata, 0); +} + +_public_ int sd_bus_add_match_async( + sd_bus *bus, + sd_bus_slot **slot, + const char *match, + sd_bus_message_handler_t callback, + sd_bus_message_handler_t install_callback, + void *userdata) { + + return bus_add_match_full(bus, slot, true, match, callback, install_callback, userdata, 0); +} + +static int io_callback(sd_event_source *s, int fd, uint32_t revents, void *userdata) { + sd_bus *bus = ASSERT_PTR(userdata); + int r; + + /* Note that this is called both on input_fd, output_fd as well as inotify_fd events */ + + r = sd_bus_process(bus, NULL); + if (r < 0) { + log_debug_errno(r, "Processing of bus failed, closing down: %m"); + bus_enter_closing(bus); + } + + return 1; +} + +static int time_callback(sd_event_source *s, uint64_t usec, void *userdata) { + sd_bus *bus = ASSERT_PTR(userdata); + int r; + + r = sd_bus_process(bus, NULL); + if (r < 0) { + log_debug_errno(r, "Processing of bus failed, closing down: %m"); + bus_enter_closing(bus); + } + + return 1; +} + +static int prepare_callback(sd_event_source *s, void *userdata) { + sd_bus *bus = ASSERT_PTR(userdata); + int r, e; + usec_t until; + + assert(s); + + e = sd_bus_get_events(bus); + if (e < 0) { + r = e; + goto fail; + } + + if (bus->output_fd != bus->input_fd) { + + r = sd_event_source_set_io_events(bus->input_io_event_source, e & POLLIN); + if (r < 0) + goto fail; + + r = sd_event_source_set_io_events(bus->output_io_event_source, e & POLLOUT); + } else + r = sd_event_source_set_io_events(bus->input_io_event_source, e); + if (r < 0) + goto fail; + + r = sd_bus_get_timeout(bus, &until); + if (r < 0) + goto fail; + if (r > 0) { + int j; + + j = sd_event_source_set_time(bus->time_event_source, until); + if (j < 0) { + r = j; + goto fail; + } + } + + r = sd_event_source_set_enabled(bus->time_event_source, r > 0 ? SD_EVENT_ONESHOT : SD_EVENT_OFF); + if (r < 0) + goto fail; + + return 1; + +fail: + log_debug_errno(r, "Preparing of bus events failed, closing down: %m"); + bus_enter_closing(bus); + + return 1; +} + +static int quit_callback(sd_event_source *event, void *userdata) { + sd_bus *bus = userdata; + + assert(event); + + if (bus->close_on_exit) { + sd_bus_flush(bus); + sd_bus_close(bus); + } + + return 1; +} + +int bus_attach_io_events(sd_bus *bus) { + int r; + + assert(bus); + + if (bus->input_fd < 0) + return 0; + + if (!bus->event) + return 0; + + if (!bus->input_io_event_source) { + r = sd_event_add_io(bus->event, &bus->input_io_event_source, bus->input_fd, 0, io_callback, bus); + if (r < 0) + return r; + + r = sd_event_source_set_prepare(bus->input_io_event_source, prepare_callback); + if (r < 0) + return r; + + r = sd_event_source_set_priority(bus->input_io_event_source, bus->event_priority); + if (r < 0) + return r; + + r = sd_event_source_set_description(bus->input_io_event_source, "bus-input"); + } else + r = sd_event_source_set_io_fd(bus->input_io_event_source, bus->input_fd); + + if (r < 0) + return r; + + if (bus->output_fd != bus->input_fd) { + assert(bus->output_fd >= 0); + + if (!bus->output_io_event_source) { + r = sd_event_add_io(bus->event, &bus->output_io_event_source, bus->output_fd, 0, io_callback, bus); + if (r < 0) + return r; + + r = sd_event_source_set_priority(bus->output_io_event_source, bus->event_priority); + if (r < 0) + return r; + + r = sd_event_source_set_description(bus->input_io_event_source, "bus-output"); + } else + r = sd_event_source_set_io_fd(bus->output_io_event_source, bus->output_fd); + + if (r < 0) + return r; + } + + return 0; +} + +static void bus_detach_io_events(sd_bus *bus) { + assert(bus); + + bus->input_io_event_source = sd_event_source_disable_unref(bus->input_io_event_source); + bus->output_io_event_source = sd_event_source_disable_unref(bus->output_io_event_source); +} + +int bus_attach_inotify_event(sd_bus *bus) { + int r; + + assert(bus); + + if (bus->inotify_fd < 0) + return 0; + + if (!bus->event) + return 0; + + if (!bus->inotify_event_source) { + r = sd_event_add_io(bus->event, &bus->inotify_event_source, bus->inotify_fd, EPOLLIN, io_callback, bus); + if (r < 0) + return r; + + r = sd_event_source_set_priority(bus->inotify_event_source, bus->event_priority); + if (r < 0) + return r; + + r = sd_event_source_set_description(bus->inotify_event_source, "bus-inotify"); + } else + r = sd_event_source_set_io_fd(bus->inotify_event_source, bus->inotify_fd); + if (r < 0) + return r; + + return 0; +} + +_public_ int sd_bus_attach_event(sd_bus *bus, sd_event *event, int priority) { + int r; + + assert_return(bus, -EINVAL); + assert_return(bus = bus_resolve(bus), -ENOPKG); + assert_return(!bus->event, -EBUSY); + + assert(!bus->input_io_event_source); + assert(!bus->output_io_event_source); + assert(!bus->time_event_source); + + if (event) + bus->event = sd_event_ref(event); + else { + r = sd_event_default(&bus->event); + if (r < 0) + return r; + } + + bus->event_priority = priority; + + r = sd_event_add_time(bus->event, &bus->time_event_source, CLOCK_MONOTONIC, 0, 0, time_callback, bus); + if (r < 0) + goto fail; + + r = sd_event_source_set_priority(bus->time_event_source, priority); + if (r < 0) + goto fail; + + r = sd_event_source_set_description(bus->time_event_source, "bus-time"); + if (r < 0) + goto fail; + + r = sd_event_add_exit(bus->event, &bus->quit_event_source, quit_callback, bus); + if (r < 0) + goto fail; + + r = sd_event_source_set_description(bus->quit_event_source, "bus-exit"); + if (r < 0) + goto fail; + + r = bus_attach_io_events(bus); + if (r < 0) + goto fail; + + r = bus_attach_inotify_event(bus); + if (r < 0) + goto fail; + + return 0; + +fail: + sd_bus_detach_event(bus); + return r; +} + +_public_ int sd_bus_detach_event(sd_bus *bus) { + assert_return(bus, -EINVAL); + assert_return(bus = bus_resolve(bus), -ENOPKG); + + if (!bus->event) + return 0; + + bus_detach_io_events(bus); + bus->inotify_event_source = sd_event_source_disable_unref(bus->inotify_event_source); + bus->time_event_source = sd_event_source_disable_unref(bus->time_event_source); + bus->quit_event_source = sd_event_source_disable_unref(bus->quit_event_source); + + bus->event = sd_event_unref(bus->event); + return 1; +} + +_public_ sd_event* sd_bus_get_event(sd_bus *bus) { + assert_return(bus = bus_resolve(bus), NULL); + + return bus->event; +} + +_public_ sd_bus_message* sd_bus_get_current_message(sd_bus *bus) { + assert_return(bus = bus_resolve(bus), NULL); + + return bus->current_message; +} + +_public_ sd_bus_slot* sd_bus_get_current_slot(sd_bus *bus) { + assert_return(bus = bus_resolve(bus), NULL); + + return bus->current_slot; +} + +_public_ sd_bus_message_handler_t sd_bus_get_current_handler(sd_bus *bus) { + assert_return(bus = bus_resolve(bus), NULL); + + return bus->current_handler; +} + +_public_ void* sd_bus_get_current_userdata(sd_bus *bus) { + assert_return(bus = bus_resolve(bus), NULL); + + return bus->current_userdata; +} + +static int bus_default(int (*bus_open)(sd_bus **), sd_bus **default_bus, sd_bus **ret) { + sd_bus *b = NULL; + int r; + + assert(bus_open); + assert(default_bus); + + if (!ret) + return !!*default_bus; + + if (*default_bus) { + *ret = sd_bus_ref(*default_bus); + return 0; + } + + r = bus_open(&b); + if (r < 0) + return r; + + b->default_bus_ptr = default_bus; + b->tid = gettid(); + *default_bus = b; + + *ret = b; + return 1; +} + +_public_ int sd_bus_default_system(sd_bus **ret) { + return bus_default(sd_bus_open_system, &default_system_bus, ret); +} + +_public_ int sd_bus_default_user(sd_bus **ret) { + return bus_default(sd_bus_open_user, &default_user_bus, ret); +} + +_public_ int sd_bus_default(sd_bus **ret) { + int (*bus_open)(sd_bus **) = NULL; + sd_bus **busp; + + busp = bus_choose_default(&bus_open); + return bus_default(bus_open, busp, ret); +} + +_public_ int sd_bus_get_tid(sd_bus *b, pid_t *tid) { + assert_return(b, -EINVAL); + assert_return(tid, -EINVAL); + assert_return(!bus_origin_changed(b), -ECHILD); + + if (b->tid != 0) { + *tid = b->tid; + return 0; + } + + if (b->event) + return sd_event_get_tid(b->event, tid); + + return -ENXIO; +} + +_public_ int sd_bus_path_encode(const char *prefix, const char *external_id, char **ret_path) { + _cleanup_free_ char *e = NULL; + char *ret; + + assert_return(object_path_is_valid(prefix), -EINVAL); + assert_return(external_id, -EINVAL); + assert_return(ret_path, -EINVAL); + + e = bus_label_escape(external_id); + if (!e) + return -ENOMEM; + + ret = path_join(prefix, e); + if (!ret) + return -ENOMEM; + + *ret_path = ret; + return 0; +} + +_public_ int sd_bus_path_decode(const char *path, const char *prefix, char **external_id) { + const char *e; + char *ret; + + assert_return(object_path_is_valid(path), -EINVAL); + assert_return(object_path_is_valid(prefix), -EINVAL); + assert_return(external_id, -EINVAL); + + e = object_path_startswith(path, prefix); + if (!e) { + *external_id = NULL; + return 0; + } + + /* Note that 'e' might be an empty string here. That's expected. E.g. a case where the subtree + * corresponds to a subtree on a disk, and we want to return something that represents the root + * of the filesystem. */ + + ret = bus_label_unescape(e); + if (!ret) + return -ENOMEM; + + *external_id = ret; + return 1; +} + +_public_ int sd_bus_path_encode_many(char **out, const char *path_template, ...) { + _cleanup_strv_free_ char **labels = NULL; + char *path, *path_pos, **label_pos; + const char *sep, *template_pos; + size_t path_length; + va_list list; + int r; + + assert_return(out, -EINVAL); + assert_return(path_template, -EINVAL); + + path_length = strlen(path_template); + + va_start(list, path_template); + for (sep = strchr(path_template, '%'); sep; sep = strchr(sep + 1, '%')) { + const char *arg; + char *label; + + arg = va_arg(list, const char *); + if (!arg) { + va_end(list); + return -EINVAL; + } + + label = bus_label_escape(arg); + if (!label) { + va_end(list); + return -ENOMEM; + } + + r = strv_consume(&labels, label); + if (r < 0) { + va_end(list); + return r; + } + + /* add label length, but account for the format character */ + path_length += strlen(label) - 1; + } + va_end(list); + + path = malloc(path_length + 1); + if (!path) + return -ENOMEM; + + path_pos = path; + label_pos = labels; + + for (template_pos = path_template; *template_pos; ) { + sep = strchrnul(template_pos, '%'); + path_pos = mempcpy(path_pos, template_pos, sep - template_pos); + if (!*sep) + break; + + path_pos = stpcpy(path_pos, *label_pos++); + template_pos = sep + 1; + } + + *path_pos = 0; + *out = path; + return 0; +} + +_public_ int sd_bus_path_decode_many(const char *path, const char *path_template, ...) { + _cleanup_strv_free_ char **labels = NULL; + const char *template_pos, *path_pos; + char **label_pos; + va_list list; + int r; + + /* + * This decodes an object-path based on a template argument. The + * template consists of a verbatim path, optionally including special + * directives: + * + * - Each occurrence of '%' in the template matches an arbitrary + * substring of a label in the given path. At most one such + * directive is allowed per label. For each such directive, the + * caller must provide an output parameter (char **) via va_arg. If + * NULL is passed, the given label is verified, but not returned. + * For each matched label, the *decoded* label is stored in the + * passed output argument, and the caller is responsible to free + * it. Note that the output arguments are only modified if the + * actually path matched the template. Otherwise, they're left + * untouched. + * + * This function returns <0 on error, 0 if the path does not match the + * template, 1 if it matched. + */ + + assert_return(path, -EINVAL); + assert_return(path_template, -EINVAL); + + path_pos = path; + + for (template_pos = path_template; *template_pos; ) { + const char *sep; + size_t length; + char *label; + + /* verify everything until the next '%' matches verbatim */ + sep = strchrnul(template_pos, '%'); + length = sep - template_pos; + if (strncmp(path_pos, template_pos, length)) + return 0; + + path_pos += length; + template_pos += length; + + if (!*template_pos) + break; + + /* We found the next '%' character. Everything up until here + * matched. We now skip ahead to the end of this label and make + * sure it matches the tail of the label in the path. Then we + * decode the string in-between and save it for later use. */ + + ++template_pos; /* skip over '%' */ + + sep = strchrnul(template_pos, '/'); + length = sep - template_pos; /* length of suffix to match verbatim */ + + /* verify the suffixes match */ + sep = strchrnul(path_pos, '/'); + if (sep - path_pos < (ssize_t)length || + strncmp(sep - length, template_pos, length)) + return 0; + + template_pos += length; /* skip over matched label */ + length = sep - path_pos - length; /* length of sub-label to decode */ + + /* store unescaped label for later use */ + label = bus_label_unescape_n(path_pos, length); + if (!label) + return -ENOMEM; + + r = strv_consume(&labels, label); + if (r < 0) + return r; + + path_pos = sep; /* skip decoded label and suffix */ + } + + /* end of template must match end of path */ + if (*path_pos) + return 0; + + /* copy the labels over to the caller */ + va_start(list, path_template); + for (label_pos = labels; label_pos && *label_pos; ++label_pos) { + char **arg; + + arg = va_arg(list, char **); + if (arg) + *arg = *label_pos; + else + free(*label_pos); + } + va_end(list); + + labels = mfree(labels); + return 1; +} + +_public_ int sd_bus_try_close(sd_bus *bus) { + assert_return(bus, -EINVAL); + assert_return(bus = bus_resolve(bus), -ENOPKG); + assert_return(!bus_origin_changed(bus), -ECHILD); + + return -EOPNOTSUPP; +} + +_public_ int sd_bus_get_description(sd_bus *bus, const char **description) { + assert_return(bus, -EINVAL); + assert_return(bus = bus_resolve(bus), -ENOPKG); + assert_return(description, -EINVAL); + + const char *d = bus->description; + if (!d) + d = runtime_scope_to_string(bus->runtime_scope); + if (!d) + return -ENXIO; + + *description = d; + return 0; +} + +_public_ int sd_bus_get_scope(sd_bus *bus, const char **scope) { + assert_return(bus, -EINVAL); + assert_return(bus = bus_resolve(bus), -ENOPKG); + assert_return(scope, -EINVAL); + assert_return(!bus_origin_changed(bus), -ECHILD); + + if (bus->runtime_scope < 0) + return -ENODATA; + + *scope = runtime_scope_to_string(bus->runtime_scope); + return 0; +} + +_public_ int sd_bus_get_address(sd_bus *bus, const char **address) { + assert_return(bus, -EINVAL); + assert_return(bus = bus_resolve(bus), -ENOPKG); + assert_return(address, -EINVAL); + assert_return(!bus_origin_changed(bus), -ECHILD); + + if (bus->address) { + *address = bus->address; + return 0; + } + + return -ENODATA; +} + +_public_ int sd_bus_get_creds_mask(sd_bus *bus, uint64_t *mask) { + assert_return(bus, -EINVAL); + assert_return(bus = bus_resolve(bus), -ENOPKG); + assert_return(mask, -EINVAL); + assert_return(!bus_origin_changed(bus), -ECHILD); + + *mask = bus->creds_mask; + return 0; +} + +_public_ int sd_bus_is_bus_client(sd_bus *bus) { + assert_return(bus, -EINVAL); + assert_return(bus = bus_resolve(bus), -ENOPKG); + assert_return(!bus_origin_changed(bus), -ECHILD); + + return bus->bus_client; +} + +_public_ int sd_bus_is_server(sd_bus *bus) { + assert_return(bus, -EINVAL); + assert_return(bus = bus_resolve(bus), -ENOPKG); + assert_return(!bus_origin_changed(bus), -ECHILD); + + return bus->is_server; +} + +_public_ int sd_bus_is_anonymous(sd_bus *bus) { + assert_return(bus, -EINVAL); + assert_return(bus = bus_resolve(bus), -ENOPKG); + assert_return(!bus_origin_changed(bus), -ECHILD); + + return bus->anonymous_auth; +} + +_public_ int sd_bus_is_trusted(sd_bus *bus) { + assert_return(bus, -EINVAL); + assert_return(bus = bus_resolve(bus), -ENOPKG); + assert_return(!bus_origin_changed(bus), -ECHILD); + + return bus->trusted; +} + +_public_ int sd_bus_is_monitor(sd_bus *bus) { + assert_return(bus, -EINVAL); + assert_return(bus = bus_resolve(bus), -ENOPKG); + assert_return(!bus_origin_changed(bus), -ECHILD); + + return bus->is_monitor; +} + +static void flush_close(sd_bus *bus) { + if (!bus) + return; + + /* Flushes and closes the specified bus. We take a ref before, + * to ensure the flushing does not cause the bus to be + * unreferenced. */ + + sd_bus_flush_close_unref(sd_bus_ref(bus)); +} + +_public_ void sd_bus_default_flush_close(void) { + flush_close(default_starter_bus); + flush_close(default_user_bus); + flush_close(default_system_bus); +} + +_public_ int sd_bus_set_exit_on_disconnect(sd_bus *bus, int b) { + assert_return(bus, -EINVAL); + assert_return(bus = bus_resolve(bus), -ENOPKG); + + /* Turns on exit-on-disconnect, and triggers it immediately if the bus connection was already + * disconnected. Note that this is triggered exclusively on disconnections triggered by the server side, never + * from the client side. */ + bus->exit_on_disconnect = b; + + /* If the exit condition was triggered already, exit immediately. */ + return bus_exit_now(bus, /* event= */ NULL); +} + +_public_ int sd_bus_get_exit_on_disconnect(sd_bus *bus) { + assert_return(bus, -EINVAL); + assert_return(bus = bus_resolve(bus), -ENOPKG); + + return bus->exit_on_disconnect; +} + +_public_ int sd_bus_set_sender(sd_bus *bus, const char *sender) { + assert_return(bus, -EINVAL); + assert_return(bus = bus_resolve(bus), -ENOPKG); + assert_return(!bus->bus_client, -EPERM); + assert_return(!sender || service_name_is_valid(sender), -EINVAL); + + return free_and_strdup(&bus->patch_sender, sender); +} + +_public_ int sd_bus_get_sender(sd_bus *bus, const char **ret) { + assert_return(bus, -EINVAL); + assert_return(bus = bus_resolve(bus), -ENOPKG); + assert_return(ret, -EINVAL); + + if (!bus->patch_sender) + return -ENODATA; + + *ret = bus->patch_sender; + return 0; +} + +_public_ int sd_bus_get_n_queued_read(sd_bus *bus, uint64_t *ret) { + assert_return(bus, -EINVAL); + assert_return(bus = bus_resolve(bus), -ENOPKG); + assert_return(!bus_origin_changed(bus), -ECHILD); + assert_return(ret, -EINVAL); + + *ret = bus->rqueue_size; + return 0; +} + +_public_ int sd_bus_get_n_queued_write(sd_bus *bus, uint64_t *ret) { + assert_return(bus, -EINVAL); + assert_return(bus = bus_resolve(bus), -ENOPKG); + assert_return(!bus_origin_changed(bus), -ECHILD); + assert_return(ret, -EINVAL); + + *ret = bus->wqueue_size; + return 0; +} + +_public_ int sd_bus_set_method_call_timeout(sd_bus *bus, uint64_t usec) { + assert_return(bus, -EINVAL); + assert_return(bus = bus_resolve(bus), -ENOPKG); + + bus->method_call_timeout = usec; + return 0; +} + +_public_ int sd_bus_get_method_call_timeout(sd_bus *bus, uint64_t *ret) { + const char *e; + usec_t usec; + + assert_return(bus, -EINVAL); + assert_return(bus = bus_resolve(bus), -ENOPKG); + assert_return(ret, -EINVAL); + + if (bus->method_call_timeout != 0) { + *ret = bus->method_call_timeout; + return 0; + } + + e = secure_getenv("SYSTEMD_BUS_TIMEOUT"); + if (e && parse_sec(e, &usec) >= 0 && usec != 0) { + /* Save the parsed value to avoid multiple parsing. To change the timeout value, + * use sd_bus_set_method_call_timeout() instead of setenv(). */ + *ret = bus->method_call_timeout = usec; + return 0; + } + + *ret = bus->method_call_timeout = BUS_DEFAULT_TIMEOUT; + return 0; +} + +_public_ int sd_bus_set_close_on_exit(sd_bus *bus, int b) { + assert_return(bus, -EINVAL); + assert_return(bus = bus_resolve(bus), -ENOPKG); + + bus->close_on_exit = b; + return 0; +} + +_public_ int sd_bus_get_close_on_exit(sd_bus *bus) { + assert_return(bus, -EINVAL); + assert_return(bus = bus_resolve(bus), -ENOPKG); + + return bus->close_on_exit; +} + +_public_ int sd_bus_enqueue_for_read(sd_bus *bus, sd_bus_message *m) { + int r; + + assert_return(bus, -EINVAL); + assert_return(bus = bus_resolve(bus), -ENOPKG); + assert_return(m, -EINVAL); + assert_return(m->sealed, -EINVAL); + assert_return(!bus_origin_changed(bus), -ECHILD); + + if (!BUS_IS_OPEN(bus->state)) + return -ENOTCONN; + + /* Re-enqueue a message for reading. This is primarily useful for PolicyKit-style authentication, + * where we accept a message, then determine we need to interactively authenticate the user, and then + * we want to process the message again. */ + + r = bus_rqueue_make_room(bus); + if (r < 0) + return r; + + bus->rqueue[bus->rqueue_size++] = bus_message_ref_queued(m, bus); + return 0; +} diff --git a/src/libsystemd/sd-bus/test-bus-address.c b/src/libsystemd/sd-bus/test-bus-address.c new file mode 100644 index 0000000..347ba1a --- /dev/null +++ b/src/libsystemd/sd-bus/test-bus-address.c @@ -0,0 +1,61 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include "sd-bus.h" + +#include "bus-internal.h" +#include "log.h" +#include "string-util.h" +#include "strv.h" +#include "tests.h" + +static void test_one_address(sd_bus *b, + const char *host, + int result, const char *expected) { + int r; + + r = bus_set_address_system_remote(b, host); + log_info("\"%s\" → %d, \"%s\"", host, r, strna(r >= 0 ? b->address : NULL)); + assert_se(r == result); + if (r >= 0) + assert_se(streq_ptr(b->address, expected)); +} + +TEST(bus_set_address_system_remote) { + _cleanup_(sd_bus_unrefp) sd_bus *b = NULL; + + assert_se(sd_bus_new(&b) >= 0); + if (!strv_isempty(saved_argv + 1)) { + STRV_FOREACH(a, saved_argv + 1) + test_one_address(b, *a, 0, NULL); + return; + }; + + test_one_address(b, "host", + 0, "unixexec:path=ssh,argv1=-xT,argv2=--,argv3=host,argv4=systemd-stdio-bridge"); + test_one_address(b, "host:123", + 0, "unixexec:path=ssh,argv1=-xT,argv2=-p,argv3=123,argv4=--,argv5=host,argv6=systemd-stdio-bridge"); + test_one_address(b, "host:123:123", + -EINVAL, NULL); + test_one_address(b, "host:", + -EINVAL, NULL); + test_one_address(b, "user@host", + 0, "unixexec:path=ssh,argv1=-xT,argv2=--,argv3=user%40host,argv4=systemd-stdio-bridge"); + test_one_address(b, "user@host@host", + -EINVAL, NULL); + test_one_address(b, "[::1]", + 0, "unixexec:path=ssh,argv1=-xT,argv2=--,argv3=%3a%3a1,argv4=systemd-stdio-bridge"); + test_one_address(b, "user@[::1]", + 0, "unixexec:path=ssh,argv1=-xT,argv2=--,argv3=user%40%3a%3a1,argv4=systemd-stdio-bridge"); + test_one_address(b, "user@[::1]:99", + 0, "unixexec:path=ssh,argv1=-xT,argv2=-p,argv3=99,argv4=--,argv5=user%40%3a%3a1,argv6=systemd-stdio-bridge"); + test_one_address(b, "user@[::1]:", + -EINVAL, NULL); + test_one_address(b, "user@[::1:", + -EINVAL, NULL); + test_one_address(b, "user@", + -EINVAL, NULL); + test_one_address(b, "user@@", + -EINVAL, NULL); +} + +DEFINE_TEST_MAIN(LOG_INFO); diff --git a/src/libsystemd/sd-bus/test-bus-benchmark.c b/src/libsystemd/sd-bus/test-bus-benchmark.c new file mode 100644 index 0000000..d988588 --- /dev/null +++ b/src/libsystemd/sd-bus/test-bus-benchmark.c @@ -0,0 +1,326 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <sys/wait.h> +#include <unistd.h> + +#include "sd-bus.h" + +#include "alloc-util.h" +#include "bus-internal.h" +#include "bus-kernel.h" +#include "constants.h" +#include "fd-util.h" +#include "missing_resource.h" +#include "string-util.h" +#include "tests.h" +#include "time-util.h" + +#define MAX_SIZE (2*1024*1024) + +static usec_t arg_loop_usec = 100 * USEC_PER_MSEC; + +typedef enum Type { + TYPE_LEGACY, + TYPE_DIRECT, +} Type; + +static void server(sd_bus *b, size_t *result) { + int r; + + for (;;) { + _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL; + + r = sd_bus_process(b, &m); + assert_se(r >= 0); + + if (r == 0) + assert_se(sd_bus_wait(b, USEC_INFINITY) >= 0); + if (!m) + continue; + + if (sd_bus_message_is_method_call(m, "benchmark.server", "Ping")) + assert_se(sd_bus_reply_method_return(m, NULL) >= 0); + else if (sd_bus_message_is_method_call(m, "benchmark.server", "Work")) { + const void *p; + size_t sz; + + /* Make sure the mmap is mapped */ + assert_se(sd_bus_message_read_array(m, 'y', &p, &sz) > 0); + + r = sd_bus_reply_method_return(m, NULL); + assert_se(r >= 0); + } else if (sd_bus_message_is_method_call(m, "benchmark.server", "Exit")) { + uint64_t res; + assert_se(sd_bus_message_read(m, "t", &res) > 0); + + *result = res; + return; + + } else if (!sd_bus_message_is_signal(m, NULL, NULL)) + assert_not_reached(); + } +} + +static void transaction(sd_bus *b, size_t sz, const char *server_name) { + _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL, *reply = NULL; + uint8_t *p; + + assert_se(sd_bus_message_new_method_call(b, &m, server_name, "/", "benchmark.server", "Work") >= 0); + assert_se(sd_bus_message_append_array_space(m, 'y', sz, (void**) &p) >= 0); + + memset(p, 0x80, sz); + + assert_se(sd_bus_call(b, m, 0, NULL, &reply) >= 0); +} + +static void client_bisect(const char *address, const char *server_name) { + _cleanup_(sd_bus_message_unrefp) sd_bus_message *x = NULL; + size_t lsize, rsize, csize; + sd_bus *b; + int r; + + r = sd_bus_new(&b); + assert_se(r >= 0); + + r = sd_bus_set_address(b, address); + assert_se(r >= 0); + + r = sd_bus_start(b); + assert_se(r >= 0); + + r = sd_bus_call_method(b, server_name, "/", "benchmark.server", "Ping", NULL, NULL, NULL); + assert_se(r >= 0); + + lsize = 1; + rsize = MAX_SIZE; + + printf("SIZE\tCOPY\tMEMFD\n"); + + for (;;) { + usec_t t; + unsigned n_copying, n_memfd; + + csize = (lsize + rsize) / 2; + + if (csize <= lsize) + break; + + if (csize <= 0) + break; + + printf("%zu\t", csize); + + b->use_memfd = 0; + + t = now(CLOCK_MONOTONIC); + for (n_copying = 0;; n_copying++) { + transaction(b, csize, server_name); + if (now(CLOCK_MONOTONIC) >= t + arg_loop_usec) + break; + } + printf("%u\t", (unsigned) ((n_copying * USEC_PER_SEC) / arg_loop_usec)); + + b->use_memfd = -1; + + t = now(CLOCK_MONOTONIC); + for (n_memfd = 0;; n_memfd++) { + transaction(b, csize, server_name); + if (now(CLOCK_MONOTONIC) >= t + arg_loop_usec) + break; + } + printf("%u\n", (unsigned) ((n_memfd * USEC_PER_SEC) / arg_loop_usec)); + + if (n_copying == n_memfd) + break; + + if (n_copying > n_memfd) + lsize = csize; + else + rsize = csize; + } + + b->use_memfd = 1; + assert_se(sd_bus_message_new_method_call(b, &x, server_name, "/", "benchmark.server", "Exit") >= 0); + assert_se(sd_bus_message_append(x, "t", csize) >= 0); + assert_se(sd_bus_send(b, x, NULL) >= 0); + + sd_bus_unref(b); +} + +static void client_chart(Type type, const char *address, const char *server_name, int fd) { + _cleanup_(sd_bus_message_unrefp) sd_bus_message *x = NULL; + size_t csize; + sd_bus *b; + int r; + + r = sd_bus_new(&b); + assert_se(r >= 0); + + if (type == TYPE_DIRECT) { + r = sd_bus_set_fd(b, fd, fd); + assert_se(r >= 0); + } else { + r = sd_bus_set_address(b, address); + assert_se(r >= 0); + + r = sd_bus_set_bus_client(b, true); + assert_se(r >= 0); + } + + r = sd_bus_start(b); + assert_se(r >= 0); + + r = sd_bus_call_method(b, server_name, "/", "benchmark.server", "Ping", NULL, NULL, NULL); + assert_se(r >= 0); + + switch (type) { + case TYPE_LEGACY: + printf("SIZE\tLEGACY\n"); + break; + case TYPE_DIRECT: + printf("SIZE\tDIRECT\n"); + break; + } + + for (csize = 1; csize <= MAX_SIZE; csize *= 2) { + usec_t t; + unsigned n_memfd; + + printf("%zu\t", csize); + + t = now(CLOCK_MONOTONIC); + for (n_memfd = 0;; n_memfd++) { + transaction(b, csize, server_name); + if (now(CLOCK_MONOTONIC) >= t + arg_loop_usec) + break; + } + + printf("%u\n", (unsigned) ((n_memfd * USEC_PER_SEC) / arg_loop_usec)); + } + + b->use_memfd = 1; + assert_se(sd_bus_message_new_method_call(b, &x, server_name, "/", "benchmark.server", "Exit") >= 0); + assert_se(sd_bus_message_append(x, "t", csize) >= 0); + assert_se(sd_bus_send(b, x, NULL) >= 0); + + sd_bus_unref(b); +} + +int main(int argc, char *argv[]) { + enum { + MODE_BISECT, + MODE_CHART, + } mode = MODE_BISECT; + Type type = TYPE_LEGACY; + int i, pair[2] = EBADF_PAIR; + _cleanup_free_ char *address = NULL, *server_name = NULL; + _cleanup_close_ int bus_ref = -EBADF; + const char *unique; + cpu_set_t cpuset; + size_t result; + sd_bus *b; + pid_t pid; + int r; + + test_setup_logging(LOG_DEBUG); + + for (i = 1; i < argc; i++) { + if (streq(argv[i], "chart")) { + mode = MODE_CHART; + continue; + } else if (streq(argv[i], "legacy")) { + type = TYPE_LEGACY; + continue; + } else if (streq(argv[i], "direct")) { + type = TYPE_DIRECT; + continue; + } + + assert_se(parse_sec(argv[i], &arg_loop_usec) >= 0); + } + + assert_se(arg_loop_usec > 0); + + if (type == TYPE_LEGACY) { + const char *e; + + e = secure_getenv("DBUS_SESSION_BUS_ADDRESS"); + assert_se(e); + + address = strdup(e); + assert_se(address); + } + + r = sd_bus_new(&b); + assert_se(r >= 0); + + if (type == TYPE_DIRECT) { + assert_se(socketpair(AF_UNIX, SOCK_STREAM, 0, pair) >= 0); + + r = sd_bus_set_fd(b, pair[0], pair[0]); + assert_se(r >= 0); + + r = sd_bus_set_server(b, true, SD_ID128_NULL); + assert_se(r >= 0); + } else { + r = sd_bus_set_address(b, address); + assert_se(r >= 0); + + r = sd_bus_set_bus_client(b, true); + assert_se(r >= 0); + } + + r = sd_bus_start(b); + assert_se(r >= 0); + + if (type != TYPE_DIRECT) { + r = sd_bus_get_unique_name(b, &unique); + assert_se(r >= 0); + + server_name = strdup(unique); + assert_se(server_name); + } + + sync(); + setpriority(PRIO_PROCESS, 0, -19); + + pid = fork(); + assert_se(pid >= 0); + + if (pid == 0) { + CPU_ZERO(&cpuset); + CPU_SET(0, &cpuset); + pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &cpuset); + + safe_close(bus_ref); + sd_bus_unref(b); + + switch (mode) { + case MODE_BISECT: + client_bisect(address, server_name); + break; + + case MODE_CHART: + client_chart(type, address, server_name, pair[1]); + break; + } + + _exit(EXIT_SUCCESS); + } + + CPU_ZERO(&cpuset); + CPU_SET(1, &cpuset); + pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &cpuset); + + server(b, &result); + + if (mode == MODE_BISECT) + printf("Copying/memfd are equally fast at %zu bytes\n", result); + + assert_se(waitpid(pid, NULL, 0) == pid); + + safe_close(pair[1]); + sd_bus_unref(b); + + return 0; +} diff --git a/src/libsystemd/sd-bus/test-bus-chat.c b/src/libsystemd/sd-bus/test-bus-chat.c new file mode 100644 index 0000000..da1340f --- /dev/null +++ b/src/libsystemd/sd-bus/test-bus-chat.c @@ -0,0 +1,539 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <fcntl.h> +#include <pthread.h> +#include <stdlib.h> +#include <unistd.h> + +#include "sd-bus.h" + +#include "alloc-util.h" +#include "bus-error.h" +#include "bus-internal.h" +#include "bus-match.h" +#include "errno-util.h" +#include "fd-util.h" +#include "format-util.h" +#include "log.h" +#include "macro.h" +#include "string-util.h" +#include "tests.h" + +static int match_callback(sd_bus_message *m, void *userdata, sd_bus_error *ret_error) { + log_info("Match triggered! destination=%s interface=%s member=%s", + strna(sd_bus_message_get_destination(m)), + strna(sd_bus_message_get_interface(m)), + strna(sd_bus_message_get_member(m))); + return 0; +} + +static int object_callback(sd_bus_message *m, void *userdata, sd_bus_error *ret_error) { + int r; + + if (sd_bus_message_is_method_error(m, NULL)) + return 0; + + if (sd_bus_message_is_method_call(m, "org.object.test", "Foobar")) { + log_info("Invoked Foobar() on %s", sd_bus_message_get_path(m)); + + r = sd_bus_reply_method_return(m, NULL); + if (r < 0) + return log_error_errno(r, "Failed to send reply: %m"); + + return 1; + } + + return 0; +} + +static int server_init(sd_bus **ret_bus) { + _cleanup_(sd_bus_unrefp) sd_bus *bus = NULL; + const char *unique, *desc; + sd_id128_t id; + int r; + + assert_se(ret_bus); + + r = sd_bus_open_user_with_description(&bus, "my bus!"); + if (r < 0) + return log_error_errno(r, "Failed to connect to user bus: %m"); + + r = sd_bus_get_bus_id(bus, &id); + if (r < 0) + return log_error_errno(r, "Failed to get server ID: %m"); + + r = sd_bus_get_unique_name(bus, &unique); + if (r < 0) + return log_error_errno(r, "Failed to get unique name: %m"); + + assert_se(sd_bus_get_description(bus, &desc) >= 0); + assert_se(streq(desc, "my bus!")); + + log_info("Peer ID is " SD_ID128_FORMAT_STR ".", SD_ID128_FORMAT_VAL(id)); + log_info("Unique ID: %s", unique); + log_info("Can send file handles: %i", sd_bus_can_send(bus, 'h')); + + r = sd_bus_request_name(bus, "org.freedesktop.systemd.test", 0); + if (r < 0) + return log_error_errno(r, "Failed to acquire name: %m"); + + r = sd_bus_add_fallback(bus, NULL, "/foo/bar", object_callback, NULL); + if (r < 0) + return log_error_errno(r, "Failed to add object: %m"); + + r = sd_bus_match_signal(bus, NULL, NULL, NULL, "foo.bar", "Notify", match_callback, NULL); + if (r < 0) + return log_error_errno(r, "Failed to request match: %m"); + + r = sd_bus_match_signal(bus, NULL, NULL, NULL, "foo.bar", "NotifyTo", match_callback, NULL); + if (r < 0) + return log_error_errno(r, "Failed to request match: %m"); + + r = sd_bus_add_match(bus, NULL, "type='signal',interface='org.freedesktop.DBus',member='NameOwnerChanged'", match_callback, NULL); + if (r < 0) + return log_error_errno(r, "Failed to add match: %m"); + + bus_match_dump(stdout, &bus->match_callbacks, 0); + + *ret_bus = TAKE_PTR(bus); + return 0; +} + +static int server(sd_bus *_bus) { + _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = ASSERT_PTR(_bus); + bool client1_gone = false, client2_gone = false; + int r; + + while (!client1_gone || !client2_gone) { + _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL; + _cleanup_(sd_bus_creds_unrefp) sd_bus_creds *creds = NULL; + pid_t pid = 0; + const char *label = NULL; + + r = sd_bus_process(bus, &m); + if (r < 0) + return log_error_errno(r, "Failed to process requests: %m"); + if (r == 0) { + r = sd_bus_wait(bus, UINT64_MAX); + if (r < 0) + return log_error_errno(r, "Failed to wait: %m"); + + continue; + } + if (!m) + continue; + + r = sd_bus_query_sender_creds(m, SD_BUS_CREDS_AUGMENT | SD_BUS_CREDS_PID | SD_BUS_CREDS_SELINUX_CONTEXT, &creds); + if (r < 0) + log_debug_errno(r, "Failed to query sender credentials, ignoring: %m"); + else { + r = sd_bus_creds_get_pid(creds, &pid); + if (r < 0) + return log_error_errno(r, "Failed to get sender pid: %m"); + + (void) sd_bus_creds_get_selinux_context(creds, &label); + } + + log_info("Got message! member=%s pid="PID_FMT" label=%s", + strna(sd_bus_message_get_member(m)), + pid, + strna(label)); + + /* sd_bus_message_dump(m); */ + /* sd_bus_message_rewind(m, true); */ + + if (sd_bus_message_is_method_call(m, "org.freedesktop.systemd.test", "LowerCase")) { + const char *hello; + _cleanup_free_ char *lowercase = NULL; + + r = sd_bus_message_read(m, "s", &hello); + if (r < 0) + return log_error_errno(r, "Failed to get parameter: %m"); + + lowercase = strdup(hello); + if (!lowercase) + return log_oom(); + + ascii_strlower(lowercase); + + r = sd_bus_reply_method_return(m, "s", lowercase); + if (r < 0) + return log_error_errno(r, "Failed to send reply: %m"); + + } else if (sd_bus_message_is_method_call(m, "org.freedesktop.systemd.test", "ExitClient1")) { + + r = sd_bus_reply_method_return(m, NULL); + if (r < 0) + return log_error_errno(r, "Failed to send reply: %m"); + + client1_gone = true; + } else if (sd_bus_message_is_method_call(m, "org.freedesktop.systemd.test", "ExitClient2")) { + + r = sd_bus_reply_method_return(m, NULL); + if (r < 0) + return log_error_errno(r, "Failed to send reply: %m"); + + client2_gone = true; + } else if (sd_bus_message_is_method_call(m, "org.freedesktop.systemd.test", "Slow")) { + + sleep(1); + + r = sd_bus_reply_method_return(m, NULL); + if (r < 0) + return log_error_errno(r, "Failed to send reply: %m"); + + } else if (sd_bus_message_is_method_call(m, "org.freedesktop.systemd.test", "FileDescriptor")) { + int fd; + static const char x = 'X'; + + r = sd_bus_message_read(m, "h", &fd); + if (r < 0) + return log_error_errno(r, "Failed to get parameter: %m"); + + log_info("Received fd=%d", fd); + + if (write(fd, &x, 1) < 0) { + r = log_error_errno(errno, "Failed to write to fd: %m"); + safe_close(fd); + return r; + } + + r = sd_bus_reply_method_return(m, NULL); + if (r < 0) + return log_error_errno(r, "Failed to send reply: %m"); + + } else if (sd_bus_message_is_method_call(m, NULL, NULL)) { + + r = sd_bus_reply_method_error( + m, + &SD_BUS_ERROR_MAKE_CONST(SD_BUS_ERROR_UNKNOWN_METHOD, "Unknown method.")); + if (r < 0) + return log_error_errno(r, "Failed to send reply: %m"); + } + } + + return 0; +} + +static void* client1(void *p) { + _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL; + _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL; + _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL; + const char *hello; + int r; + _cleanup_close_pair_ int pp[2] = EBADF_PAIR; + char x; + + r = sd_bus_open_user(&bus); + if (r < 0) { + log_error_errno(r, "Failed to connect to user bus: %m"); + goto finish; + } + + r = sd_bus_call_method( + bus, + "org.freedesktop.systemd.test", + "/", + "org.freedesktop.systemd.test", + "LowerCase", + &error, + &reply, + "s", + "HELLO"); + if (r < 0) { + log_error_errno(r, "Failed to issue method call: %m"); + goto finish; + } + + r = sd_bus_message_read(reply, "s", &hello); + if (r < 0) { + log_error_errno(r, "Failed to get string: %m"); + goto finish; + } + + assert_se(streq(hello, "hello")); + + if (pipe2(pp, O_CLOEXEC|O_NONBLOCK) < 0) { + r = log_error_errno(errno, "Failed to allocate pipe: %m"); + goto finish; + } + + log_info("Sending fd=%d", pp[1]); + + r = sd_bus_call_method( + bus, + "org.freedesktop.systemd.test", + "/", + "org.freedesktop.systemd.test", + "FileDescriptor", + &error, + NULL, + "h", + pp[1]); + if (r < 0) { + log_error_errno(r, "Failed to issue method call: %m"); + goto finish; + } + + errno = 0; + if (read(pp[0], &x, 1) <= 0) { + log_error("Failed to read from pipe: %s", STRERROR_OR_EOF(errno)); + goto finish; + } + + r = 0; + +finish: + if (bus) { + _cleanup_(sd_bus_message_unrefp) sd_bus_message *q = NULL; + + r = sd_bus_message_new_method_call( + bus, + &q, + "org.freedesktop.systemd.test", + "/", + "org.freedesktop.systemd.test", + "ExitClient1"); + if (r < 0) + log_error_errno(r, "Failed to allocate method call: %m"); + else + sd_bus_send(bus, q, NULL); + + } + + return INT_TO_PTR(r); +} + +static int quit_callback(sd_bus_message *m, void *userdata, sd_bus_error *ret_error) { + bool *x = userdata; + + log_error_errno(sd_bus_message_get_errno(m), "Quit callback: %m"); + + *x = 1; + return 1; +} + +static void* client2(void *p) { + _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL, *reply = NULL; + _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL; + _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL; + bool quit = false; + const char *mid; + int r; + + r = sd_bus_open_user(&bus); + if (r < 0) { + log_error_errno(r, "Failed to connect to user bus: %m"); + goto finish; + } + + r = sd_bus_message_new_method_call( + bus, + &m, + "org.freedesktop.systemd.test", + "/foo/bar/waldo/piep", + "org.object.test", + "Foobar"); + if (r < 0) { + log_error_errno(r, "Failed to allocate method call: %m"); + goto finish; + } + + r = sd_bus_send(bus, m, NULL); + if (r < 0) { + log_error("Failed to issue method call: %s", bus_error_message(&error, r)); + goto finish; + } + + m = sd_bus_message_unref(m); + + r = sd_bus_message_new_signal( + bus, + &m, + "/foobar", + "foo.bar", + "Notify"); + if (r < 0) { + log_error_errno(r, "Failed to allocate signal: %m"); + goto finish; + } + + r = sd_bus_send(bus, m, NULL); + if (r < 0) { + log_error("Failed to issue signal: %s", bus_error_message(&error, r)); + goto finish; + } + + m = sd_bus_message_unref(m); + + r = sd_bus_message_new_signal_to( + bus, + &m, + "org.freedesktop.systemd.test", + "/foobar", + "foo.bar", + "NotifyTo"); + if (r < 0) { + log_error_errno(r, "Failed to allocate signal to: %m"); + goto finish; + } + + r = sd_bus_send(bus, m, NULL); + if (r < 0) { + log_error("Failed to issue signal to: %s", bus_error_message(&error, r)); + goto finish; + } + + m = sd_bus_message_unref(m); + + r = sd_bus_message_new_method_call( + bus, + &m, + "org.freedesktop.systemd.test", + "/", + "org.freedesktop.DBus.Peer", + "GetMachineId"); + if (r < 0) { + log_error_errno(r, "Failed to allocate method call: %m"); + goto finish; + } + + r = sd_bus_call(bus, m, 0, &error, &reply); + if (r < 0) { + log_error("Failed to issue method call: %s", bus_error_message(&error, r)); + goto finish; + } + + r = sd_bus_message_read(reply, "s", &mid); + if (r < 0) { + log_error_errno(r, "Failed to parse machine ID: %m"); + goto finish; + } + + log_info("Machine ID is %s.", mid); + + m = sd_bus_message_unref(m); + + r = sd_bus_message_new_method_call( + bus, + &m, + "org.freedesktop.systemd.test", + "/", + "org.freedesktop.systemd.test", + "Slow"); + if (r < 0) { + log_error_errno(r, "Failed to allocate method call: %m"); + goto finish; + } + + reply = sd_bus_message_unref(reply); + + r = sd_bus_call(bus, m, 200 * USEC_PER_MSEC, &error, &reply); + if (r < 0) + log_debug("Failed to issue method call: %s", bus_error_message(&error, r)); + else { + log_error("Slow call unexpectedly succeed."); + r = -ENOANO; + goto finish; + } + + m = sd_bus_message_unref(m); + + r = sd_bus_message_new_method_call( + bus, + &m, + "org.freedesktop.systemd.test", + "/", + "org.freedesktop.systemd.test", + "Slow"); + if (r < 0) { + log_error_errno(r, "Failed to allocate method call: %m"); + goto finish; + } + + r = sd_bus_call_async(bus, NULL, m, quit_callback, &quit, 200 * USEC_PER_MSEC); + if (r < 0) { + log_info("Failed to issue method call: %s", bus_error_message(&error, r)); + goto finish; + } + + while (!quit) { + r = sd_bus_process(bus, NULL); + if (r < 0) { + log_error_errno(r, "Failed to process requests: %m"); + goto finish; + } + if (r == 0) { + r = sd_bus_wait(bus, UINT64_MAX); + if (r < 0) { + log_error_errno(r, "Failed to wait: %m"); + goto finish; + } + } + } + + r = 0; + +finish: + if (bus) { + _cleanup_(sd_bus_message_unrefp) sd_bus_message *q = NULL; + + r = sd_bus_message_new_method_call( + bus, + &q, + "org.freedesktop.systemd.test", + "/", + "org.freedesktop.systemd.test", + "ExitClient2"); + if (r < 0) { + log_error_errno(r, "Failed to allocate method call: %m"); + goto finish; + } + + (void) sd_bus_send(bus, q, NULL); + } + + return INT_TO_PTR(r); +} + +int main(int argc, char *argv[]) { + pthread_t c1, c2; + sd_bus *bus; + void *p; + int q, r; + + test_setup_logging(LOG_INFO); + + r = server_init(&bus); + if (r < 0) + return log_tests_skipped("Failed to connect to bus"); + + log_info("Initialized..."); + + r = pthread_create(&c1, NULL, client1, bus); + if (r != 0) + return EXIT_FAILURE; + + r = pthread_create(&c2, NULL, client2, bus); + if (r != 0) + return EXIT_FAILURE; + + r = server(bus); + + q = pthread_join(c1, &p); + if (q != 0) + return EXIT_FAILURE; + if (PTR_TO_INT(p) < 0) + return EXIT_FAILURE; + + q = pthread_join(c2, &p); + if (q != 0) + return EXIT_FAILURE; + if (PTR_TO_INT(p) < 0) + return EXIT_FAILURE; + + if (r < 0) + return EXIT_FAILURE; + + return EXIT_SUCCESS; +} diff --git a/src/libsystemd/sd-bus/test-bus-cleanup.c b/src/libsystemd/sd-bus/test-bus-cleanup.c new file mode 100644 index 0000000..3e14627 --- /dev/null +++ b/src/libsystemd/sd-bus/test-bus-cleanup.c @@ -0,0 +1,105 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <stdio.h> +#include <unistd.h> + +#include "sd-bus.h" + +#include "bus-internal.h" +#include "bus-message.h" +#include "process-util.h" +#include "tests.h" + +static bool use_system_bus = false; + +static void test_bus_new(void) { + _cleanup_(sd_bus_unrefp) sd_bus *bus = NULL; + + assert_se(sd_bus_new(&bus) == 0); + assert_se(bus->n_ref == 1); +} + +static void test_bus_fork(void) { + _cleanup_(sd_bus_unrefp) sd_bus *bus = NULL; + int r; + + assert_se(sd_bus_new(&bus) == 0); + assert_se(bus->n_ref == 1); + + /* Check that after a fork the cleanup functions return NULL */ + r = safe_fork("(bus-fork-test)", FORK_WAIT|FORK_LOG, NULL); + if (r == 0) { + assert_se(bus); + assert_se(sd_bus_is_ready(bus) == -ECHILD); + assert_se(sd_bus_flush_close_unref(bus) == NULL); + assert_se(sd_bus_close_unref(bus) == NULL); + assert_se(sd_bus_unref(bus) == NULL); + sd_bus_close(bus); + assert_se(bus->n_ref == 1); + _exit(EXIT_SUCCESS); + } + + assert_se(r >= 0); + assert_se(bus->n_ref == 1); +} + +static int test_bus_open(void) { + _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL; + int r; + + r = sd_bus_open_user(&bus); + if (IN_SET(r, -ECONNREFUSED, -ENOENT, -ENOMEDIUM)) { + r = sd_bus_open_system(&bus); + if (IN_SET(r, -ECONNREFUSED, -ENOENT)) + return r; + use_system_bus = true; + } + + assert_se(r >= 0); + assert_se(bus->n_ref >= 1); /* we send a hello message when opening, so the count is above 1 */ + + return 0; +} + +static void test_bus_new_method_call(void) { + sd_bus *bus = NULL; + _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL; + + assert_se(use_system_bus ? sd_bus_open_system(&bus) >= 0 : sd_bus_open_user(&bus) >= 0); + + assert_se(sd_bus_message_new_method_call(bus, &m, "a.service.name", "/an/object/path", "an.interface.name", "AMethodName") >= 0); + + assert_se(m->n_ref == 1); /* We hold the only reference to the message */ + assert_se(bus->n_ref >= 2); + sd_bus_flush_close_unref(bus); + assert_se(m->n_ref == 1); +} + +static void test_bus_new_signal(void) { + sd_bus *bus = NULL; + _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL; + + assert_se(use_system_bus ? sd_bus_open_system(&bus) >= 0 : sd_bus_open_user(&bus) >= 0); + + assert_se(sd_bus_message_new_signal(bus, &m, "/an/object/path", "an.interface.name", "Name") >= 0); + + assert_se(m->n_ref == 1); /* We hold the only reference to the message */ + assert_se(bus->n_ref >= 2); + sd_bus_flush_close_unref(bus); + assert_se(m->n_ref == 1); +} + +int main(int argc, char **argv) { + test_setup_logging(LOG_INFO); + + test_bus_new(); + test_bus_fork(); + + if (test_bus_open() < 0) + return log_tests_skipped("Failed to connect to bus"); + + test_bus_new_method_call(); + test_bus_new_signal(); + + return EXIT_SUCCESS; +} diff --git a/src/libsystemd/sd-bus/test-bus-creds.c b/src/libsystemd/sd-bus/test-bus-creds.c new file mode 100644 index 0000000..13801be --- /dev/null +++ b/src/libsystemd/sd-bus/test-bus-creds.c @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include "sd-bus.h" + +#include "bus-dump.h" +#include "cgroup-util.h" +#include "tests.h" + +int main(int argc, char *argv[]) { + _cleanup_(sd_bus_creds_unrefp) sd_bus_creds *creds = NULL; + int r; + + test_setup_logging(LOG_DEBUG); + + if (cg_unified() == -ENOMEDIUM) + return log_tests_skipped("/sys/fs/cgroup/ not available"); + + r = sd_bus_creds_new_from_pid(&creds, 0, _SD_BUS_CREDS_ALL); + log_full_errno(r < 0 ? LOG_ERR : LOG_DEBUG, r, "sd_bus_creds_new_from_pid: %m"); + assert_se(r >= 0); + + bus_creds_dump(creds, NULL, true); + + creds = sd_bus_creds_unref(creds); + + r = sd_bus_creds_new_from_pid(&creds, 1, _SD_BUS_CREDS_ALL); + if (r != -EACCES) { + assert_se(r >= 0); + putchar('\n'); + bus_creds_dump(creds, NULL, true); + } + + return 0; +} diff --git a/src/libsystemd/sd-bus/test-bus-error.c b/src/libsystemd/sd-bus/test-bus-error.c new file mode 100644 index 0000000..a55f3f9 --- /dev/null +++ b/src/libsystemd/sd-bus/test-bus-error.c @@ -0,0 +1,294 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include "sd-bus.h" + +#include "alloc-util.h" +#include "bus-common-errors.h" +#include "bus-error.h" +#include "errno-list.h" +#include "errno-util.h" +#include "string-util.h" +#include "tests.h" + +TEST(error) { + _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL, second = SD_BUS_ERROR_NULL; + const sd_bus_error const_error = SD_BUS_ERROR_MAKE_CONST(SD_BUS_ERROR_FILE_EXISTS, "const error"); + const sd_bus_error temporarily_const_error = { + .name = SD_BUS_ERROR_ACCESS_DENIED, + .message = "oh! no", + ._need_free = -1, + }; + + assert_se(!sd_bus_error_is_set(&error)); + assert_se(sd_bus_error_set(&error, SD_BUS_ERROR_NOT_SUPPORTED, "xxx") == -EOPNOTSUPP); + assert_se(streq(error.name, SD_BUS_ERROR_NOT_SUPPORTED)); + assert_se(streq(error.message, "xxx")); + assert_se(sd_bus_error_has_name(&error, SD_BUS_ERROR_NOT_SUPPORTED)); + assert_se(sd_bus_error_has_names_sentinel(&error, SD_BUS_ERROR_NOT_SUPPORTED, NULL)); + assert_se(sd_bus_error_has_names(&error, SD_BUS_ERROR_NOT_SUPPORTED)); + assert_se(sd_bus_error_has_names(&error, SD_BUS_ERROR_NOT_SUPPORTED, SD_BUS_ERROR_FILE_NOT_FOUND)); + assert_se(sd_bus_error_has_names(&error, SD_BUS_ERROR_FILE_NOT_FOUND, SD_BUS_ERROR_NOT_SUPPORTED, NULL)); + assert_se(!sd_bus_error_has_names(&error, SD_BUS_ERROR_FILE_NOT_FOUND)); + assert_se(sd_bus_error_get_errno(&error) == EOPNOTSUPP); + assert_se(sd_bus_error_is_set(&error)); + sd_bus_error_free(&error); + + /* Check with no error */ + assert_se(!sd_bus_error_is_set(&error)); + assert_se(sd_bus_error_setf(&error, NULL, "yyy %i", -1) == 0); + assert_se(error.name == NULL); + assert_se(error.message == NULL); + assert_se(!sd_bus_error_has_name(&error, SD_BUS_ERROR_FILE_NOT_FOUND)); + assert_se(!sd_bus_error_has_names(&error, SD_BUS_ERROR_FILE_NOT_FOUND)); + assert_se(sd_bus_error_get_errno(&error) == 0); + assert_se(!sd_bus_error_is_set(&error)); + + assert_se(sd_bus_error_setf(&error, SD_BUS_ERROR_FILE_NOT_FOUND, "yyy %i", -1) == -ENOENT); + assert_se(streq(error.name, SD_BUS_ERROR_FILE_NOT_FOUND)); + assert_se(streq(error.message, "yyy -1")); + assert_se(sd_bus_error_has_name(&error, SD_BUS_ERROR_FILE_NOT_FOUND)); + assert_se(sd_bus_error_has_names(&error, SD_BUS_ERROR_FILE_NOT_FOUND)); + assert_se(sd_bus_error_get_errno(&error) == ENOENT); + assert_se(sd_bus_error_is_set(&error)); + + assert_se(!sd_bus_error_is_set(&second)); + assert_se(second._need_free == 0); + assert_se(error._need_free > 0); + assert_se(sd_bus_error_copy(&second, &error) == -ENOENT); + assert_se(second._need_free > 0); + assert_se(streq(error.name, second.name)); + assert_se(streq(error.message, second.message)); + assert_se(sd_bus_error_get_errno(&second) == ENOENT); + assert_se(sd_bus_error_has_name(&second, SD_BUS_ERROR_FILE_NOT_FOUND)); + assert_se(sd_bus_error_has_names(&second, SD_BUS_ERROR_FILE_NOT_FOUND)); + assert_se(sd_bus_error_is_set(&second)); + + sd_bus_error_free(&error); + sd_bus_error_free(&second); + + assert_se(!sd_bus_error_is_set(&second)); + assert_se(const_error._need_free == 0); + assert_se(sd_bus_error_copy(&second, &const_error) == -EEXIST); + assert_se(second._need_free == 0); + assert_se(streq(const_error.name, second.name)); + assert_se(streq(const_error.message, second.message)); + assert_se(sd_bus_error_get_errno(&second) == EEXIST); + assert_se(sd_bus_error_has_name(&second, SD_BUS_ERROR_FILE_EXISTS)); + assert_se(sd_bus_error_is_set(&second)); + sd_bus_error_free(&second); + + assert_se(!sd_bus_error_is_set(&second)); + assert_se(temporarily_const_error._need_free < 0); + assert_se(sd_bus_error_copy(&second, &temporarily_const_error) == -EACCES); + assert_se(second._need_free > 0); + assert_se(streq(temporarily_const_error.name, second.name)); + assert_se(streq(temporarily_const_error.message, second.message)); + assert_se(sd_bus_error_get_errno(&second) == EACCES); + assert_se(sd_bus_error_has_name(&second, SD_BUS_ERROR_ACCESS_DENIED)); + assert_se(sd_bus_error_is_set(&second)); + + assert_se(!sd_bus_error_is_set(&error)); + assert_se(sd_bus_error_set_const(&error, "System.Error.EUCLEAN", "Hallo") == -EUCLEAN); + assert_se(streq(error.name, "System.Error.EUCLEAN")); + assert_se(streq(error.message, "Hallo")); + assert_se(sd_bus_error_has_name(&error, "System.Error.EUCLEAN")); + assert_se(sd_bus_error_get_errno(&error) == EUCLEAN); + assert_se(sd_bus_error_is_set(&error)); + sd_bus_error_free(&error); + + assert_se(!sd_bus_error_is_set(&error)); + assert_se(sd_bus_error_set_errno(&error, EBUSY) == -EBUSY); + assert_se(streq(error.name, "System.Error.EBUSY")); + assert_se(streq(error.message, STRERROR(EBUSY))); + assert_se(sd_bus_error_has_name(&error, "System.Error.EBUSY")); + assert_se(sd_bus_error_get_errno(&error) == EBUSY); + assert_se(sd_bus_error_is_set(&error)); + sd_bus_error_free(&error); + + assert_se(!sd_bus_error_is_set(&error)); + assert_se(sd_bus_error_set_errnof(&error, EIO, "Waldi %c", 'X') == -EIO); + assert_se(streq(error.name, SD_BUS_ERROR_IO_ERROR)); + assert_se(streq(error.message, "Waldi X")); + assert_se(sd_bus_error_has_name(&error, SD_BUS_ERROR_IO_ERROR)); + assert_se(sd_bus_error_get_errno(&error) == EIO); + assert_se(sd_bus_error_is_set(&error)); + sd_bus_error_free(&error); + + /* Check with no error */ + assert_se(!sd_bus_error_is_set(&error)); + assert_se(sd_bus_error_set_errnof(&error, 0, "Waldi %c", 'X') == 0); + assert_se(error.name == NULL); + assert_se(error.message == NULL); + assert_se(!sd_bus_error_has_name(&error, SD_BUS_ERROR_IO_ERROR)); + assert_se(sd_bus_error_get_errno(&error) == 0); + assert_se(!sd_bus_error_is_set(&error)); +} + +extern const sd_bus_error_map __start_SYSTEMD_BUS_ERROR_MAP[]; +extern const sd_bus_error_map __stop_SYSTEMD_BUS_ERROR_MAP[]; + +static int dump_mapping_table(void) { + const sd_bus_error_map *m; + + printf("----- errno mappings ------\n"); + m = ALIGN_PTR(__start_SYSTEMD_BUS_ERROR_MAP); + while (m < __stop_SYSTEMD_BUS_ERROR_MAP) { + + if (m->code == BUS_ERROR_MAP_END_MARKER) { + m = ALIGN_PTR(m + 1); + continue; + } + + printf("%s -> %i/%s\n", strna(m->name), m->code, strna(errno_to_name(m->code))); + m++; + } + printf("---------------------------\n"); + + return EXIT_SUCCESS; +} + +TEST(errno_mapping_standard) { + assert_se(sd_bus_error_set(NULL, "System.Error.EUCLEAN", NULL) == -EUCLEAN); + assert_se(sd_bus_error_set(NULL, "System.Error.EBUSY", NULL) == -EBUSY); + assert_se(sd_bus_error_set(NULL, "System.Error.EINVAL", NULL) == -EINVAL); + assert_se(sd_bus_error_set(NULL, "System.Error.WHATSIT", NULL) == -EIO); +} + +BUS_ERROR_MAP_ELF_REGISTER const sd_bus_error_map test_errors[] = { + SD_BUS_ERROR_MAP("org.freedesktop.custom-dbus-error", 5), + SD_BUS_ERROR_MAP("org.freedesktop.custom-dbus-error-2", 52), + SD_BUS_ERROR_MAP_END +}; + +BUS_ERROR_MAP_ELF_REGISTER const sd_bus_error_map test_errors2[] = { + SD_BUS_ERROR_MAP("org.freedesktop.custom-dbus-error-3", 33), + SD_BUS_ERROR_MAP("org.freedesktop.custom-dbus-error-4", 44), + SD_BUS_ERROR_MAP("org.freedesktop.custom-dbus-error-33", 333), + SD_BUS_ERROR_MAP_END +}; + +static const sd_bus_error_map test_errors3[] = { + SD_BUS_ERROR_MAP("org.freedesktop.custom-dbus-error-88", 888), + SD_BUS_ERROR_MAP("org.freedesktop.custom-dbus-error-99", 999), + SD_BUS_ERROR_MAP_END +}; + +static const sd_bus_error_map test_errors4[] = { + SD_BUS_ERROR_MAP("org.freedesktop.custom-dbus-error-77", 777), + SD_BUS_ERROR_MAP("org.freedesktop.custom-dbus-error-78", 778), + SD_BUS_ERROR_MAP_END +}; + +static const sd_bus_error_map test_errors_bad1[] = { + SD_BUS_ERROR_MAP("org.freedesktop.custom-dbus-error-1", 0), + SD_BUS_ERROR_MAP_END +}; + +static const sd_bus_error_map test_errors_bad2[] = { + SD_BUS_ERROR_MAP("org.freedesktop.custom-dbus-error-1", -1), + SD_BUS_ERROR_MAP_END +}; + +TEST(errno_mapping_custom) { + assert_se(sd_bus_error_set(NULL, "org.freedesktop.custom-dbus-error", NULL) == -5); + assert_se(sd_bus_error_set(NULL, "org.freedesktop.custom-dbus-error-2", NULL) == -52); + assert_se(sd_bus_error_set(NULL, "org.freedesktop.custom-dbus-error-x", NULL) == -EIO); + assert_se(sd_bus_error_set(NULL, "org.freedesktop.custom-dbus-error-33", NULL) == -333); + + assert_se(sd_bus_error_set(NULL, "org.freedesktop.custom-dbus-error-88", NULL) == -EIO); + assert_se(sd_bus_error_set(NULL, "org.freedesktop.custom-dbus-error-99", NULL) == -EIO); + assert_se(sd_bus_error_set(NULL, "org.freedesktop.custom-dbus-error-77", NULL) == -EIO); + + assert_se(sd_bus_error_add_map(test_errors3) > 0); + assert_se(sd_bus_error_set(NULL, "org.freedesktop.custom-dbus-error-88", NULL) == -888); + assert_se(sd_bus_error_add_map(test_errors4) > 0); + assert_se(sd_bus_error_add_map(test_errors4) == 0); + assert_se(sd_bus_error_add_map(test_errors3) == 0); + + assert_se(sd_bus_error_set(NULL, "org.freedesktop.custom-dbus-error-99", NULL) == -999); + assert_se(sd_bus_error_set(NULL, "org.freedesktop.custom-dbus-error-77", NULL) == -777); + assert_se(sd_bus_error_set(NULL, "org.freedesktop.custom-dbus-error-78", NULL) == -778); + assert_se(sd_bus_error_set(NULL, "org.freedesktop.custom-dbus-error-2", NULL) == -52); + assert_se(sd_bus_error_set(NULL, "org.freedesktop.custom-dbus-error-y", NULL) == -EIO); + + assert_se(sd_bus_error_set(NULL, BUS_ERROR_NO_SUCH_UNIT, NULL) == -ENOENT); + + assert_se(sd_bus_error_add_map(test_errors_bad1) == -EINVAL); + assert_se(sd_bus_error_add_map(test_errors_bad2) == -EINVAL); +} + +TEST(sd_bus_error_set_errnof) { + _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL; + _cleanup_free_ char *str = NULL; + + assert_se(sd_bus_error_set_errnof(NULL, 0, NULL) == 0); + assert_se(sd_bus_error_set_errnof(NULL, ENOANO, NULL) == -ENOANO); + + assert_se(sd_bus_error_set_errnof(&error, 0, NULL) == 0); + assert_se(!bus_error_is_dirty(&error)); + + assert_se(sd_bus_error_set_errnof(&error, EACCES, NULL) == -EACCES); + assert_se(sd_bus_error_has_name(&error, SD_BUS_ERROR_ACCESS_DENIED)); + errno = EACCES; + assert_se(asprintf(&str, "%m") >= 0); + assert_se(streq(error.message, str)); + assert_se(error._need_free == 0); + + str = mfree(str); + sd_bus_error_free(&error); + + assert_se(sd_bus_error_set_errnof(&error, ENOANO, NULL) == -ENOANO); + assert_se(sd_bus_error_has_name(&error, "System.Error.ENOANO")); + errno = ENOANO; + assert_se(asprintf(&str, "%m") >= 0); + assert_se(streq(error.message, str)); + assert_se(error._need_free == 1); + + str = mfree(str); + sd_bus_error_free(&error); + + assert_se(sd_bus_error_set_errnof(&error, 100000, NULL) == -100000); + assert_se(sd_bus_error_has_name(&error, SD_BUS_ERROR_FAILED)); + errno = 100000; + assert_se(asprintf(&str, "%m") >= 0); + assert_se(streq(error.message, str)); + assert_se(error._need_free == 1); + + str = mfree(str); + sd_bus_error_free(&error); + + assert_se(sd_bus_error_set_errnof(NULL, 0, "hoge %s: %m", "foo") == 0); + assert_se(sd_bus_error_set_errnof(NULL, ENOANO, "hoge %s: %m", "foo") == -ENOANO); + + assert_se(sd_bus_error_set_errnof(&error, 0, "hoge %s: %m", "foo") == 0); + assert_se(!bus_error_is_dirty(&error)); + + assert_se(sd_bus_error_set_errnof(&error, EACCES, "hoge %s: %m", "foo") == -EACCES); + assert_se(sd_bus_error_has_name(&error, SD_BUS_ERROR_ACCESS_DENIED)); + errno = EACCES; + assert_se(asprintf(&str, "hoge %s: %m", "foo") >= 0); + assert_se(streq(error.message, str)); + assert_se(error._need_free == 1); + + str = mfree(str); + sd_bus_error_free(&error); + + assert_se(sd_bus_error_set_errnof(&error, ENOANO, "hoge %s: %m", "foo") == -ENOANO); + assert_se(sd_bus_error_has_name(&error, "System.Error.ENOANO")); + errno = ENOANO; + assert_se(asprintf(&str, "hoge %s: %m", "foo") >= 0); + assert_se(streq(error.message, str)); + assert_se(error._need_free == 1); + + str = mfree(str); + sd_bus_error_free(&error); + + assert_se(sd_bus_error_set_errnof(&error, 100000, "hoge %s: %m", "foo") == -100000); + assert_se(sd_bus_error_has_name(&error, SD_BUS_ERROR_FAILED)); + errno = 100000; + assert_se(asprintf(&str, "hoge %s: %m", "foo") >= 0); + assert_se(streq(error.message, str)); + assert_se(error._need_free == 1); +} + +DEFINE_TEST_MAIN_WITH_INTRO(LOG_INFO, dump_mapping_table); diff --git a/src/libsystemd/sd-bus/test-bus-introspect.c b/src/libsystemd/sd-bus/test-bus-introspect.c new file mode 100644 index 0000000..3c026ae --- /dev/null +++ b/src/libsystemd/sd-bus/test-bus-introspect.c @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include "bus-introspect.h" +#include "log.h" +#include "tests.h" + +#include "test-vtable-data.h" + +static void test_manual_introspection_one(const sd_bus_vtable vtable[]) { + struct introspect intro = {}; + _cleanup_free_ char *s = NULL; + + log_info("/* %s */", __func__); + + assert_se(introspect_begin(&intro, false) >= 0); + + assert_se(introspect_write_interface(&intro, "org.foo", vtable) >= 0); + /* write again to check if output looks OK for a different interface */ + assert_se(introspect_write_interface(&intro, "org.foo.bar", vtable) >= 0); + assert_se(introspect_finish(&intro, &s) == 0); + + fputs(s, stdout); + fputs("\n", stdout); +} + +TEST(manual_introspection) { + test_manual_introspection_one(test_vtable_1); + test_manual_introspection_one(test_vtable_2); + test_manual_introspection_one(test_vtable_deprecated); + test_manual_introspection_one((const sd_bus_vtable *) vtable_format_221); +} + +DEFINE_TEST_MAIN(LOG_DEBUG); diff --git a/src/libsystemd/sd-bus/test-bus-marshal.c b/src/libsystemd/sd-bus/test-bus-marshal.c new file mode 100644 index 0000000..0044d33 --- /dev/null +++ b/src/libsystemd/sd-bus/test-bus-marshal.c @@ -0,0 +1,418 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <math.h> +#include <stdlib.h> + +#if HAVE_GLIB +#include <gio/gio.h> +#endif + +#if HAVE_DBUS +#include <dbus/dbus.h> +#endif + +#include "sd-bus.h" + +#include "alloc-util.h" +#include "bus-dump.h" +#include "bus-label.h" +#include "bus-message.h" +#include "bus-util.h" +#include "escape.h" +#include "fd-util.h" +#include "fileio.h" +#include "log.h" +#include "memstream-util.h" +#include "tests.h" + +static void test_bus_path_encode_unique(void) { + _cleanup_free_ char *a = NULL, *b = NULL, *c = NULL, *d = NULL, *e = NULL; + + assert_se(bus_path_encode_unique(NULL, "/foo/bar", "some.sender", "a.suffix", &a) >= 0 && streq_ptr(a, "/foo/bar/some_2esender/a_2esuffix")); + assert_se(bus_path_decode_unique(a, "/foo/bar", &b, &c) > 0 && streq_ptr(b, "some.sender") && streq_ptr(c, "a.suffix")); + assert_se(bus_path_decode_unique(a, "/bar/foo", &d, &d) == 0 && !d); + assert_se(bus_path_decode_unique("/foo/bar/onlyOneSuffix", "/foo/bar", &d, &d) == 0 && !d); + assert_se(bus_path_decode_unique("/foo/bar/_/_", "/foo/bar", &d, &e) > 0 && streq_ptr(d, "") && streq_ptr(e, "")); +} + +static void test_bus_path_encode(void) { + _cleanup_free_ char *a = NULL, *b = NULL, *c = NULL, *d = NULL, *e = NULL, *f = NULL, *g = NULL; + + assert_se(sd_bus_path_encode("/foo/bar", "waldo", &a) >= 0 && streq(a, "/foo/bar/waldo")); + assert_se(sd_bus_path_decode(a, "/waldo", &b) == 0 && b == NULL); + assert_se(sd_bus_path_decode(a, "/foo/bar", &b) > 0 && streq(b, "waldo")); + + assert_se(sd_bus_path_encode("xxxx", "waldo", &c) < 0); + assert_se(sd_bus_path_encode("/foo/", "waldo", &c) < 0); + + assert_se(sd_bus_path_encode("/foo/bar", "", &c) >= 0 && streq(c, "/foo/bar/_")); + assert_se(sd_bus_path_decode(c, "/foo/bar", &d) > 0 && streq(d, "")); + + assert_se(sd_bus_path_encode("/foo/bar", "foo.bar", &e) >= 0 && streq(e, "/foo/bar/foo_2ebar")); + assert_se(sd_bus_path_decode(e, "/foo/bar", &f) > 0 && streq(f, "foo.bar")); + + assert_se(sd_bus_path_decode("/waldo", "/waldo", &g) > 0 && streq(g, "")); +} + +static void test_bus_path_encode_many(void) { + _cleanup_free_ char *a = NULL, *b = NULL, *c = NULL, *d = NULL, *e = NULL, *f = NULL; + + assert_se(sd_bus_path_decode_many("/foo/bar", "/prefix/%", NULL) == 0); + assert_se(sd_bus_path_decode_many("/prefix/bar", "/prefix/%bar", NULL) == 1); + assert_se(sd_bus_path_decode_many("/foo/bar", "/prefix/%/suffix", NULL) == 0); + assert_se(sd_bus_path_decode_many("/prefix/foobar/suffix", "/prefix/%/suffix", &a) == 1 && streq_ptr(a, "foobar")); + assert_se(sd_bus_path_decode_many("/prefix/one_foo_two/mid/three_bar_four/suffix", "/prefix/one_%_two/mid/three_%_four/suffix", &b, &c) == 1 && streq_ptr(b, "foo") && streq_ptr(c, "bar")); + assert_se(sd_bus_path_decode_many("/prefix/one_foo_two/mid/three_bar_four/suffix", "/prefix/one_%_two/mid/three_%_four/suffix", NULL, &d) == 1 && streq_ptr(d, "bar")); + + assert_se(sd_bus_path_decode_many("/foo/bar", "/foo/bar/%", NULL) == 0); + assert_se(sd_bus_path_decode_many("/foo/bar/suffix", "/foo/bar%", NULL) == 0); + assert_se(sd_bus_path_decode_many("/foo/bar/suffix", "/foo/%/bar", NULL) == 0); + assert_se(sd_bus_path_decode_many("/foo/bar/suffix", "/foo/%bar", NULL) == 0); + assert_se(sd_bus_path_decode_many("/foo/bar/suffix", "/foo/bar/suffix") == 1); + assert_se(sd_bus_path_decode_many("/foo/bar/suffix", "/foo/%%/suffix", NULL, NULL) == 0); /* multiple '%' are treated verbatim */ + assert_se(sd_bus_path_decode_many("/foo/bar/suffix", "/foo/%/suffi", NULL) == 0); + assert_se(sd_bus_path_decode_many("/foo/bar/suffix", "/foo/%/suffix", &e) == 1 && streq_ptr(e, "bar")); + assert_se(sd_bus_path_decode_many("/foo/bar/suffix", "/foo/%/%", NULL, NULL) == 1); + assert_se(sd_bus_path_decode_many("/foo/bar/suffix", "/%/%/%", NULL, NULL, NULL) == 1); + assert_se(sd_bus_path_decode_many("/foo/bar/suffix", "%/%/%", NULL, NULL, NULL) == 0); + assert_se(sd_bus_path_decode_many("/foo/bar/suffix", "/%/%", NULL, NULL) == 0); + assert_se(sd_bus_path_decode_many("/foo/bar/suffix", "/%/%/", NULL, NULL) == 0); + assert_se(sd_bus_path_decode_many("/foo/bar/suffix", "/%/", NULL) == 0); + assert_se(sd_bus_path_decode_many("/foo/bar/suffix", "/%", NULL) == 0); + assert_se(sd_bus_path_decode_many("/foo/bar/suffix", "%", NULL) == 0); + + assert_se(sd_bus_path_encode_many(&f, "/prefix/one_%_two/mid/three_%_four/suffix", "foo", "bar") >= 0 && streq_ptr(f, "/prefix/one_foo_two/mid/three_bar_four/suffix")); +} + +static void test_bus_label_escape_one(const char *a, const char *b) { + _cleanup_free_ char *t = NULL, *x = NULL, *y = NULL; + + assert_se(t = bus_label_escape(a)); + assert_se(streq(t, b)); + + assert_se(x = bus_label_unescape(t)); + assert_se(streq(a, x)); + + assert_se(y = bus_label_unescape(b)); + assert_se(streq(a, y)); +} + +static void test_bus_label_escape(void) { + test_bus_label_escape_one("foo123bar", "foo123bar"); + test_bus_label_escape_one("foo.bar", "foo_2ebar"); + test_bus_label_escape_one("foo_2ebar", "foo_5f2ebar"); + test_bus_label_escape_one("", "_"); + test_bus_label_escape_one("_", "_5f"); + test_bus_label_escape_one("1", "_31"); + test_bus_label_escape_one(":1", "_3a1"); +} + +int main(int argc, char *argv[]) { + _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL, *copy = NULL; + _cleanup_free_ char *h = NULL, *first = NULL, *second = NULL, *third = NULL; + const int32_t integer_array[] = { -1, -2, 0, 1, 2 }, *return_array; + const char *x, *x2, *y, *z, *a, *b, *c, *d, *a_signature; + size_t sz, first_size, second_size = 0, third_size = 0; + _cleanup_(sd_bus_unrefp) sd_bus *bus = NULL; + _cleanup_(memstream_done) MemStream ms = {}; + void *buffer = NULL; + int r, boolean; + uint64_t u64; + uint8_t u, v; + double dbl; + FILE *mf; + char *s; + + test_setup_logging(LOG_INFO); + + r = sd_bus_default_user(&bus); + if (r < 0) + r = sd_bus_default_system(&bus); + if (r < 0) + return log_tests_skipped("Failed to connect to bus"); + + r = sd_bus_message_new_method_call(bus, &m, "foobar.waldo", "/", "foobar.waldo", "Piep"); + assert_se(r >= 0); + + r = sd_bus_message_append(m, ""); + assert_se(r >= 0); + + r = sd_bus_message_append(m, "s", "a string"); + assert_se(r >= 0); + + r = sd_bus_message_append(m, "s", NULL); + assert_se(r >= 0); + + r = sd_bus_message_append(m, "asg", 2, "string #1", "string #2", "sba(tt)ss"); + assert_se(r >= 0); + + r = sd_bus_message_append(m, "sass", "foobar", 5, "foo", "bar", "waldo", "piep", "pap", "after"); + assert_se(r >= 0); + + r = sd_bus_message_append(m, "a{yv}", 2, 3, "s", "foo", 5, "s", "waldo"); + assert_se(r >= 0); + + r = sd_bus_message_append(m, "y(ty)y(yt)y", 8, 777ULL, 7, 9, 77, 7777ULL, 10); + assert_se(r >= 0); + + r = sd_bus_message_append(m, "()"); + assert_se(r == -EINVAL); + + r = sd_bus_message_append(m, "ba(ss)", 255, 3, "aaa", "1", "bbb", "2", "ccc", "3"); + assert_se(r >= 0); + + r = sd_bus_message_open_container(m, 'a', "s"); + assert_se(r >= 0); + + r = sd_bus_message_append_basic(m, 's', "foobar"); + assert_se(r >= 0); + + r = sd_bus_message_append_basic(m, 's', "waldo"); + assert_se(r >= 0); + + r = sd_bus_message_close_container(m); + assert_se(r >= 0); + + r = sd_bus_message_append_string_space(m, 5, &s); + assert_se(r >= 0); + strcpy(s, "hallo"); + + r = sd_bus_message_append_array(m, 'i', integer_array, sizeof(integer_array)); + assert_se(r >= 0); + + r = sd_bus_message_append_array(m, 'u', NULL, 0); + assert_se(r >= 0); + + r = sd_bus_message_append(m, "a(stdo)", 1, "foo", 815ULL, 47.0, "/"); + assert_se(r >= 0); + + r = sd_bus_message_seal(m, 4711, 0); + assert_se(r >= 0); + + sd_bus_message_dump(m, stdout, SD_BUS_MESSAGE_DUMP_WITH_HEADER); + + assert_se(mf = memstream_init(&ms)); + sd_bus_message_dump(m, mf, 0); + assert_se(memstream_finalize(&ms, &first, &first_size) >= 0); + + r = bus_message_get_blob(m, &buffer, &sz); + assert_se(r >= 0); + + h = cescape_length(buffer, sz); + assert_se(h); + log_info("message size = %zu, contents =\n%s", sz, h); + +#if HAVE_GLIB + /* Work-around for asan bug. See c8d980a3e962aba2ea3a4cedf75fa94890a6d746. */ +#if !HAS_FEATURE_ADDRESS_SANITIZER + { + GDBusMessage *g; + char *p; + +#if !defined(GLIB_VERSION_2_36) + g_type_init(); +#endif + + g = g_dbus_message_new_from_blob(buffer, sz, 0, NULL); + p = g_dbus_message_print(g, 0); + log_info("%s", p); + g_free(p); + g_object_unref(g); + } +#endif +#endif + +#if HAVE_DBUS + { + DBusMessage *w; + DBusError error; + + dbus_error_init(&error); + + w = dbus_message_demarshal(buffer, sz, &error); + if (!w) + log_error("%s", error.message); + else + dbus_message_unref(w); + + dbus_error_free(&error); + } +#endif + + m = sd_bus_message_unref(m); + + r = bus_message_from_malloc(bus, buffer, sz, NULL, 0, NULL, &m); + assert_se(r >= 0); + + sd_bus_message_dump(m, stdout, SD_BUS_MESSAGE_DUMP_WITH_HEADER); + + assert_se(mf = memstream_init(&ms)); + sd_bus_message_dump(m, mf, 0); + assert_se(memstream_finalize(&ms, &second, &second_size) >= 0); + assert_se(first_size == second_size); + assert_se(memcmp(first, second, first_size) == 0); + + assert_se(sd_bus_message_rewind(m, true) >= 0); + + r = sd_bus_message_read(m, "ssasg", &x, &x2, 2, &y, &z, &a_signature); + assert_se(r > 0); + assert_se(streq(x, "a string")); + assert_se(streq(x2, "")); + assert_se(streq(y, "string #1")); + assert_se(streq(z, "string #2")); + assert_se(streq(a_signature, "sba(tt)ss")); + + r = sd_bus_message_read(m, "sass", &x, 5, &y, &z, &a, &b, &c, &d); + assert_se(r > 0); + assert_se(streq(x, "foobar")); + assert_se(streq(y, "foo")); + assert_se(streq(z, "bar")); + assert_se(streq(a, "waldo")); + assert_se(streq(b, "piep")); + assert_se(streq(c, "pap")); + assert_se(streq(d, "after")); + + r = sd_bus_message_read(m, "a{yv}", 2, &u, "s", &x, &v, "s", &y); + assert_se(r > 0); + assert_se(u == 3); + assert_se(streq(x, "foo")); + assert_se(v == 5); + assert_se(streq(y, "waldo")); + + r = sd_bus_message_read(m, "y(ty)", &v, &u64, &u); + assert_se(r > 0); + assert_se(v == 8); + assert_se(u64 == 777); + assert_se(u == 7); + + r = sd_bus_message_read(m, "y(yt)", &v, &u, &u64); + assert_se(r > 0); + assert_se(v == 9); + assert_se(u == 77); + assert_se(u64 == 7777); + + r = sd_bus_message_read(m, "y", &v); + assert_se(r > 0); + assert_se(v == 10); + + r = sd_bus_message_read(m, "()"); + assert_se(r < 0); + + r = sd_bus_message_read(m, "ba(ss)", &boolean, 3, &x, &y, &a, &b, &c, &d); + assert_se(r > 0); + assert_se(boolean); + assert_se(streq(x, "aaa")); + assert_se(streq(y, "1")); + assert_se(streq(a, "bbb")); + assert_se(streq(b, "2")); + assert_se(streq(c, "ccc")); + assert_se(streq(d, "3")); + + assert_se(sd_bus_message_verify_type(m, 'a', "s") > 0); + + r = sd_bus_message_read(m, "as", 2, &x, &y); + assert_se(r > 0); + assert_se(streq(x, "foobar")); + assert_se(streq(y, "waldo")); + + r = sd_bus_message_read_basic(m, 's', &s); + assert_se(r > 0); + assert_se(streq(s, "hallo")); + + r = sd_bus_message_read_array(m, 'i', (const void**) &return_array, &sz); + assert_se(r > 0); + assert_se(sz == sizeof(integer_array)); + assert_se(memcmp(integer_array, return_array, sz) == 0); + + r = sd_bus_message_read_array(m, 'u', (const void**) &return_array, &sz); + assert_se(r > 0); + assert_se(sz == 0); + + r = sd_bus_message_read(m, "a(stdo)", 1, &x, &u64, &dbl, &y); + assert_se(r > 0); + assert_se(streq(x, "foo")); + assert_se(u64 == 815ULL); + assert_se(fabs(dbl - 47.0) < 0.1); + assert_se(streq(y, "/")); + + r = sd_bus_message_peek_type(m, NULL, NULL); + assert_se(r == 0); + + r = sd_bus_message_new_method_call(bus, ©, "foobar.waldo", "/", "foobar.waldo", "Piep"); + assert_se(r >= 0); + + r = sd_bus_message_rewind(m, true); + assert_se(r >= 0); + + r = sd_bus_message_copy(copy, m, true); + assert_se(r >= 0); + + r = sd_bus_message_seal(copy, 4712, 0); + assert_se(r >= 0); + + assert_se(mf = memstream_init(&ms)); + sd_bus_message_dump(copy, mf, 0); + assert_se(memstream_finalize(&ms, &third, &third_size) >= 0); + + printf("<%.*s>\n", (int) first_size, first); + printf("<%.*s>\n", (int) third_size, third); + + assert_se(first_size == third_size); + assert_se(memcmp(first, third, third_size) == 0); + + r = sd_bus_message_rewind(m, true); + assert_se(r >= 0); + + assert_se(sd_bus_message_verify_type(m, 's', NULL) > 0); + + r = sd_bus_message_skip(m, "ssasg"); + assert_se(r > 0); + + assert_se(sd_bus_message_verify_type(m, 's', NULL) > 0); + + r = sd_bus_message_skip(m, "sass"); + assert_se(r >= 0); + + assert_se(sd_bus_message_verify_type(m, 'a', "{yv}") > 0); + + r = sd_bus_message_skip(m, "a{yv}y(ty)y(yt)y"); + assert_se(r >= 0); + + assert_se(sd_bus_message_verify_type(m, 'b', NULL) > 0); + + r = sd_bus_message_read(m, "b", &boolean); + assert_se(r > 0); + assert_se(boolean); + + r = sd_bus_message_enter_container(m, 0, NULL); + assert_se(r > 0); + + r = sd_bus_message_read(m, "(ss)", &x, &y); + assert_se(r > 0); + + r = sd_bus_message_read(m, "(ss)", &a, &b); + assert_se(r > 0); + + r = sd_bus_message_read(m, "(ss)", &c, &d); + assert_se(r > 0); + + r = sd_bus_message_read(m, "(ss)", &x, &y); + assert_se(r == 0); + + r = sd_bus_message_exit_container(m); + assert_se(r >= 0); + + assert_se(streq(x, "aaa")); + assert_se(streq(y, "1")); + assert_se(streq(a, "bbb")); + assert_se(streq(b, "2")); + assert_se(streq(c, "ccc")); + assert_se(streq(d, "3")); + + test_bus_label_escape(); + test_bus_path_encode(); + test_bus_path_encode_unique(); + test_bus_path_encode_many(); + + return 0; +} diff --git a/src/libsystemd/sd-bus/test-bus-match.c b/src/libsystemd/sd-bus/test-bus-match.c new file mode 100644 index 0000000..2d77557 --- /dev/null +++ b/src/libsystemd/sd-bus/test-bus-match.c @@ -0,0 +1,145 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include "bus-match.h" +#include "bus-message.h" +#include "bus-slot.h" +#include "log.h" +#include "macro.h" +#include "memory-util.h" +#include "tests.h" + +static bool mask[32]; + +static int filter(sd_bus_message *m, void *userdata, sd_bus_error *ret_error) { + log_info("Ran %u", PTR_TO_UINT(userdata)); + assert_se(PTR_TO_UINT(userdata) < ELEMENTSOF(mask)); + mask[PTR_TO_UINT(userdata)] = true; + return 0; +} + +static bool mask_contains(unsigned a[], unsigned n) { + unsigned i, j; + + for (i = 0; i < ELEMENTSOF(mask); i++) { + bool found = false; + + for (j = 0; j < n; j++) + if (a[j] == i) { + found = true; + break; + } + + if (found != mask[i]) + return false; + } + + return true; +} + +static int match_add(sd_bus_slot *slots, struct bus_match_node *root, const char *match, int value) { + struct bus_match_component *components; + size_t n_components; + sd_bus_slot *s; + int r; + + s = slots + value; + + r = bus_match_parse(match, &components, &n_components); + if (r < 0) + return r; + + CLEANUP_ARRAY(components, n_components, bus_match_parse_free); + + s->userdata = INT_TO_PTR(value); + s->match_callback.callback = filter; + + return bus_match_add(root, components, n_components, &s->match_callback); +} + +static void test_match_scope(const char *match, enum bus_match_scope scope) { + struct bus_match_component *components = NULL; + size_t n_components = 0; + + CLEANUP_ARRAY(components, n_components, bus_match_parse_free); + + assert_se(bus_match_parse(match, &components, &n_components) >= 0); + assert_se(bus_match_get_scope(components, n_components) == scope); +} + +int main(int argc, char *argv[]) { + struct bus_match_node root = { + .type = BUS_MATCH_ROOT, + }; + + _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL; + _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL; + sd_bus_slot slots[19] = {}; + int r; + + test_setup_logging(LOG_INFO); + + r = sd_bus_open_user(&bus); + if (r < 0) + r = sd_bus_open_system(&bus); + if (r < 0) + return log_tests_skipped("Failed to connect to bus"); + + assert_se(match_add(slots, &root, "arg2='wal\\'do',sender='foo',type='signal',interface='bar.x',", 1) >= 0); + assert_se(match_add(slots, &root, "arg2='wal\\'do2',sender='foo',type='signal',interface='bar.x',", 2) >= 0); + assert_se(match_add(slots, &root, "arg3='test',sender='foo',type='signal',interface='bar.x',", 3) >= 0); + assert_se(match_add(slots, &root, "arg3='test',sender='foo',type='method_call',interface='bar.x',", 4) >= 0); + assert_se(match_add(slots, &root, "", 5) >= 0); + assert_se(match_add(slots, &root, "interface='quux.x'", 6) >= 0); + assert_se(match_add(slots, &root, "interface='bar.x'", 7) >= 0); + assert_se(match_add(slots, &root, "member='waldo',path='/foo/bar'", 8) >= 0); + assert_se(match_add(slots, &root, "path='/foo/bar'", 9) >= 0); + assert_se(match_add(slots, &root, "path_namespace='/foo'", 10) >= 0); + assert_se(match_add(slots, &root, "path_namespace='/foo/quux'", 11) >= 0); + assert_se(match_add(slots, &root, "arg1='two'", 12) >= 0); + assert_se(match_add(slots, &root, "member='waldo',arg2path='/prefix/'", 13) >= 0); + assert_se(match_add(slots, &root, "member=waldo,path='/foo/bar',arg3namespace='prefix'", 14) >= 0); + assert_se(match_add(slots, &root, "arg4has='pi'", 15) >= 0); + assert_se(match_add(slots, &root, "arg4has='pa'", 16) >= 0); + assert_se(match_add(slots, &root, "arg4has='po'", 17) >= 0); + assert_se(match_add(slots, &root, "arg4='pi'", 18) >= 0); + + bus_match_dump(stdout, &root, 0); + + assert_se(sd_bus_message_new_signal(bus, &m, "/foo/bar", "bar.x", "waldo") >= 0); + assert_se(sd_bus_message_append(m, "ssssas", "one", "two", "/prefix/three", "prefix.four", 3, "pi", "pa", "po") >= 0); + assert_se(sd_bus_message_seal(m, 1, 0) >= 0); + + zero(mask); + assert_se(bus_match_run(NULL, &root, m) == 0); + assert_se(mask_contains((unsigned[]) { 9, 8, 7, 5, 10, 12, 13, 14, 15, 16, 17 }, 11)); + + assert_se(bus_match_remove(&root, &slots[8].match_callback) >= 0); + assert_se(bus_match_remove(&root, &slots[13].match_callback) >= 0); + + bus_match_dump(stdout, &root, 0); + + zero(mask); + assert_se(bus_match_run(NULL, &root, m) == 0); + assert_se(mask_contains((unsigned[]) { 9, 5, 10, 12, 14, 7, 15, 16, 17 }, 9)); + + for (enum bus_match_node_type i = 0; i < _BUS_MATCH_NODE_TYPE_MAX; i++) { + char buf[32]; + const char *x; + + assert_se(x = bus_match_node_type_to_string(i, buf, sizeof(buf))); + + if (i >= BUS_MATCH_MESSAGE_TYPE) + assert_se(bus_match_node_type_from_string(x, strlen(x)) == i); + } + + bus_match_free(&root); + + test_match_scope("interface='foobar'", BUS_MATCH_GENERIC); + test_match_scope("", BUS_MATCH_GENERIC); + test_match_scope("interface='org.freedesktop.DBus.Local'", BUS_MATCH_LOCAL); + test_match_scope("sender='org.freedesktop.DBus.Local'", BUS_MATCH_LOCAL); + test_match_scope("member='gurke',path='/org/freedesktop/DBus/Local'", BUS_MATCH_LOCAL); + test_match_scope("arg2='piep',sender='org.freedesktop.DBus',member='waldo'", BUS_MATCH_DRIVER); + + return 0; +} diff --git a/src/libsystemd/sd-bus/test-bus-objects.c b/src/libsystemd/sd-bus/test-bus-objects.c new file mode 100644 index 0000000..ccdd0d5 --- /dev/null +++ b/src/libsystemd/sd-bus/test-bus-objects.c @@ -0,0 +1,677 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <pthread.h> +#include <stdlib.h> + +#include "sd-bus.h" + +#include "alloc-util.h" +#include "bus-dump.h" +#include "bus-internal.h" +#include "bus-message.h" +#include "log.h" +#include "macro.h" +#include "strv.h" +#include "tests.h" + +struct context { + int fds[2]; + bool quit; + char *something; + char *automatic_string_property; + uint32_t automatic_integer_property; +}; + +static int something_handler(sd_bus_message *m, void *userdata, sd_bus_error *error) { + struct context *c = userdata; + const char *s; + char *n = NULL; + int r; + + r = sd_bus_message_read(m, "s", &s); + assert_se(r > 0); + + n = strjoin("<<<", s, ">>>"); + assert_se(n); + + free(c->something); + c->something = n; + + log_info("AlterSomething() called, got %s, returning %s", s, n); + + /* This should fail, since the return type doesn't match */ + assert_se(sd_bus_reply_method_return(m, "u", 4711) == -ENOMSG); + + r = sd_bus_reply_method_return(m, "s", n); + assert_se(r >= 0); + + return 1; +} + +static int exit_handler(sd_bus_message *m, void *userdata, sd_bus_error *error) { + struct context *c = userdata; + int r; + + c->quit = true; + + log_info("Exit called"); + + r = sd_bus_reply_method_return(m, ""); + assert_se(r >= 0); + + return 1; +} + +static int get_handler(sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *reply, void *userdata, sd_bus_error *error) { + struct context *c = userdata; + int r; + + log_info("property get for %s called, returning \"%s\".", property, c->something); + + r = sd_bus_message_append(reply, "s", c->something); + assert_se(r >= 0); + + return 1; +} + +static int set_handler(sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *value, void *userdata, sd_bus_error *error) { + struct context *c = userdata; + const char *s; + char *n; + int r; + + log_info("property set for %s called", property); + + r = sd_bus_message_read(value, "s", &s); + assert_se(r >= 0); + + n = strdup(s); + assert_se(n); + + free(c->something); + c->something = n; + + return 1; +} + +static int value_handler(sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *reply, void *userdata, sd_bus_error *error) { + _cleanup_free_ char *s = NULL; + const char *x; + int r; + + assert_se(asprintf(&s, "object %p, path %s", userdata, path) >= 0); + r = sd_bus_message_append(reply, "s", s); + assert_se(r >= 0); + + assert_se(x = startswith(path, "/value/")); + + assert_se(PTR_TO_UINT(userdata) == 30); + + return 1; +} + +static int notify_test(sd_bus_message *m, void *userdata, sd_bus_error *error) { + int r; + + assert_se(sd_bus_emit_properties_changed(sd_bus_message_get_bus(m), m->path, "org.freedesktop.systemd.ValueTest", "Value", NULL) >= 0); + + r = sd_bus_reply_method_return(m, NULL); + assert_se(r >= 0); + + return 1; +} + +static int notify_test2(sd_bus_message *m, void *userdata, sd_bus_error *error) { + int r; + + assert_se(sd_bus_emit_properties_changed_strv(sd_bus_message_get_bus(m), m->path, "org.freedesktop.systemd.ValueTest", NULL) >= 0); + + r = sd_bus_reply_method_return(m, NULL); + assert_se(r >= 0); + + return 1; +} + +static int emit_interfaces_added(sd_bus_message *m, void *userdata, sd_bus_error *error) { + int r; + + assert_se(sd_bus_emit_interfaces_added(sd_bus_message_get_bus(m), "/value/a/x", "org.freedesktop.systemd.ValueTest", NULL) >= 0); + + r = sd_bus_reply_method_return(m, NULL); + assert_se(r >= 0); + + return 1; +} + +static int emit_interfaces_removed(sd_bus_message *m, void *userdata, sd_bus_error *error) { + int r; + + assert_se(sd_bus_emit_interfaces_removed(sd_bus_message_get_bus(m), "/value/a/x", "org.freedesktop.systemd.ValueTest", NULL) >= 0); + + r = sd_bus_reply_method_return(m, NULL); + assert_se(r >= 0); + + return 1; +} + +static int emit_object_added(sd_bus_message *m, void *userdata, sd_bus_error *error) { + int r; + + assert_se(sd_bus_emit_object_added(sd_bus_message_get_bus(m), "/value/a/x") >= 0); + + r = sd_bus_reply_method_return(m, NULL); + assert_se(r >= 0); + + return 1; +} + +static int emit_object_with_manager_added(sd_bus_message *m, void *userdata, sd_bus_error *error) { + assert_se(sd_bus_emit_object_added(sd_bus_message_get_bus(m), "/value/a") >= 0); + + return ASSERT_SE_NONNEG(sd_bus_reply_method_return(m, NULL)); +} + +static int emit_object_removed(sd_bus_message *m, void *userdata, sd_bus_error *error) { + int r; + + assert_se(sd_bus_emit_object_removed(sd_bus_message_get_bus(m), "/value/a/x") >= 0); + + r = sd_bus_reply_method_return(m, NULL); + assert_se(r >= 0); + + return 1; +} + +static const sd_bus_vtable vtable[] = { + SD_BUS_VTABLE_START(0), + SD_BUS_METHOD("AlterSomething", "s", "s", something_handler, 0), + SD_BUS_METHOD("Exit", "", "", exit_handler, 0), + SD_BUS_WRITABLE_PROPERTY("Something", "s", get_handler, set_handler, 0, 0), + SD_BUS_WRITABLE_PROPERTY("AutomaticStringProperty", "s", NULL, NULL, offsetof(struct context, automatic_string_property), 0), + SD_BUS_WRITABLE_PROPERTY("AutomaticIntegerProperty", "u", NULL, NULL, offsetof(struct context, automatic_integer_property), 0), + SD_BUS_METHOD("NoOperation", NULL, NULL, NULL, 0), + SD_BUS_METHOD("EmitInterfacesAdded", NULL, NULL, emit_interfaces_added, 0), + SD_BUS_METHOD("EmitInterfacesRemoved", NULL, NULL, emit_interfaces_removed, 0), + SD_BUS_METHOD("EmitObjectAdded", NULL, NULL, emit_object_added, 0), + SD_BUS_METHOD("EmitObjectWithManagerAdded", NULL, NULL, emit_object_with_manager_added, 0), + SD_BUS_METHOD("EmitObjectRemoved", NULL, NULL, emit_object_removed, 0), + SD_BUS_VTABLE_END +}; + +static const sd_bus_vtable vtable2[] = { + SD_BUS_VTABLE_START(0), + SD_BUS_METHOD("NotifyTest", "", "", notify_test, 0), + SD_BUS_METHOD("NotifyTest2", "", "", notify_test2, 0), + SD_BUS_PROPERTY("Value", "s", value_handler, 10, SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE), + SD_BUS_PROPERTY("Value2", "s", value_handler, 10, SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION), + SD_BUS_PROPERTY("Value3", "s", value_handler, 10, SD_BUS_VTABLE_PROPERTY_CONST), + SD_BUS_PROPERTY("Value4", "s", value_handler, 10, 0), + SD_BUS_PROPERTY("AnExplicitProperty", "s", NULL, offsetof(struct context, something), SD_BUS_VTABLE_PROPERTY_EXPLICIT|SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION), + SD_BUS_VTABLE_END +}; + +static int enumerator_callback(sd_bus *bus, const char *path, void *userdata, char ***nodes, sd_bus_error *error) { + + if (object_path_startswith("/value", path)) + assert_se(*nodes = strv_new("/value/a", "/value/b", "/value/c")); + + return 1; +} + +static int enumerator2_callback(sd_bus *bus, const char *path, void *userdata, char ***nodes, sd_bus_error *error) { + + if (object_path_startswith("/value/a", path)) + assert_se(*nodes = strv_new("/value/a/x", "/value/a/y", "/value/a/z")); + + return 1; +} + +static int enumerator3_callback(sd_bus *bus, const char *path, void *userdata, char ***nodes, sd_bus_error *error) { + _cleanup_strv_free_ char **v = NULL; + + if (!object_path_startswith("/value/b", path)) + return 1; + + for (unsigned i = 0; i < 30; i++) + assert_se(strv_extendf(&v, "/value/b/%u", i) >= 0); + + *nodes = TAKE_PTR(v); + return 1; +} + +static void *server(void *p) { + struct context *c = p; + sd_bus *bus = NULL; + sd_id128_t id; + int r; + + c->quit = false; + + assert_se(sd_id128_randomize(&id) >= 0); + + assert_se(sd_bus_new(&bus) >= 0); + assert_se(sd_bus_set_fd(bus, c->fds[0], c->fds[0]) >= 0); + assert_se(sd_bus_set_server(bus, 1, id) >= 0); + + assert_se(sd_bus_add_object_vtable(bus, NULL, "/foo", "org.freedesktop.systemd.test", vtable, c) >= 0); + assert_se(sd_bus_add_object_vtable(bus, NULL, "/foo", "org.freedesktop.systemd.test2", vtable, c) >= 0); + assert_se(sd_bus_add_fallback_vtable(bus, NULL, "/value", "org.freedesktop.systemd.ValueTest", vtable2, NULL, UINT_TO_PTR(20)) >= 0); + assert_se(sd_bus_add_node_enumerator(bus, NULL, "/value", enumerator_callback, NULL) >= 0); + assert_se(sd_bus_add_node_enumerator(bus, NULL, "/value/a", enumerator2_callback, NULL) >= 0); + assert_se(sd_bus_add_node_enumerator(bus, NULL, "/value/b", enumerator3_callback, NULL) >= 0); + assert_se(sd_bus_add_object_manager(bus, NULL, "/value") >= 0); + assert_se(sd_bus_add_object_manager(bus, NULL, "/value/a") >= 0); + + assert_se(sd_bus_start(bus) >= 0); + + log_error("Entering event loop on server"); + + while (!c->quit) { + log_error("Loop!"); + + r = sd_bus_process(bus, NULL); + if (r < 0) { + log_error_errno(r, "Failed to process requests: %m"); + goto fail; + } + + if (r == 0) { + r = sd_bus_wait(bus, UINT64_MAX); + if (r < 0) { + log_error_errno(r, "Failed to wait: %m"); + goto fail; + } + + continue; + } + } + + r = 0; + +fail: + if (bus) { + sd_bus_flush(bus); + sd_bus_unref(bus); + } + + return INT_TO_PTR(r); +} + +static int client(struct context *c) { + _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL; + _cleanup_(sd_bus_unrefp) sd_bus *bus = NULL; + _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL; + _cleanup_strv_free_ char **lines = NULL; + const char *s; + int r; + + assert_se(sd_bus_new(&bus) >= 0); + assert_se(sd_bus_set_fd(bus, c->fds[1], c->fds[1]) >= 0); + assert_se(sd_bus_start(bus) >= 0); + + r = sd_bus_call_method(bus, "org.freedesktop.systemd.test", "/foo", "org.freedesktop.systemd.test", "NoOperation", &error, NULL, NULL); + assert_se(r >= 0); + + r = sd_bus_call_method(bus, "org.freedesktop.systemd.test", "/foo", "org.freedesktop.systemd.test", "AlterSomething", &error, &reply, "s", "hallo"); + assert_se(r >= 0); + + r = sd_bus_message_read(reply, "s", &s); + assert_se(r >= 0); + assert_se(streq(s, "<<<hallo>>>")); + + reply = sd_bus_message_unref(reply); + + r = sd_bus_call_method(bus, "org.freedesktop.systemd.test", "/foo", "org.freedesktop.systemd.test", "Doesntexist", &error, &reply, ""); + assert_se(r < 0); + assert_se(sd_bus_error_has_name(&error, SD_BUS_ERROR_UNKNOWN_METHOD)); + + sd_bus_error_free(&error); + + r = sd_bus_call_method(bus, "org.freedesktop.systemd.test", "/foo", "org.freedesktop.systemd.test", "Doesntexist", &error, &reply, NULL); /* NULL and "" are equivalent */ + assert_se(r < 0); + assert_se(sd_bus_error_has_name(&error, SD_BUS_ERROR_UNKNOWN_METHOD)); + + sd_bus_error_free(&error); + + r = sd_bus_call_method(bus, "org.freedesktop.systemd.test", "/foo", "org.freedesktop.systemd.test", "AlterSomething", &error, &reply, "as", 1, "hallo"); + assert_se(r < 0); + assert_se(sd_bus_error_has_name(&error, SD_BUS_ERROR_INVALID_ARGS)); + + sd_bus_error_free(&error); + + r = sd_bus_get_property(bus, "org.freedesktop.systemd.test", "/foo", "org.freedesktop.systemd.test", "Something", &error, &reply, "s"); + assert_se(r >= 0); + + r = sd_bus_message_read(reply, "s", &s); + assert_se(r >= 0); + assert_se(streq(s, "<<<hallo>>>")); + + reply = sd_bus_message_unref(reply); + + r = sd_bus_set_property(bus, "org.freedesktop.systemd.test", "/foo", "org.freedesktop.systemd.test", "Something", &error, "s", "test"); + assert_se(r >= 0); + + r = sd_bus_get_property(bus, "org.freedesktop.systemd.test", "/foo", "org.freedesktop.systemd.test", "Something", &error, &reply, "s"); + assert_se(r >= 0); + + r = sd_bus_message_read(reply, "s", &s); + assert_se(r >= 0); + assert_se(streq(s, "test")); + + reply = sd_bus_message_unref(reply); + + r = sd_bus_set_property(bus, "org.freedesktop.systemd.test", "/foo", "org.freedesktop.systemd.test", "AutomaticIntegerProperty", &error, "u", 815); + assert_se(r >= 0); + + assert_se(c->automatic_integer_property == 815); + + r = sd_bus_set_property(bus, "org.freedesktop.systemd.test", "/foo", "org.freedesktop.systemd.test", "AutomaticStringProperty", &error, "s", "Du Dödel, Du!"); + assert_se(r >= 0); + + assert_se(streq(c->automatic_string_property, "Du Dödel, Du!")); + + r = sd_bus_call_method(bus, "org.freedesktop.systemd.test", "/foo", "org.freedesktop.DBus.Introspectable", "Introspect", &error, &reply, ""); + assert_se(r >= 0); + + r = sd_bus_message_read(reply, "s", &s); + assert_se(r >= 0); + fputs(s, stdout); + + reply = sd_bus_message_unref(reply); + + r = sd_bus_call_method(bus, "org.freedesktop.systemd.test", "/foo", "org.freedesktop.DBus.Introspectable", "Introspect", &error, &reply, NULL); /* NULL and "" are equivalent */ + assert_se(r >= 0); + + r = sd_bus_message_read(reply, "s", &s); + assert_se(r >= 0); + fputs(s, stdout); + + reply = sd_bus_message_unref(reply); + + r = sd_bus_get_property(bus, "org.freedesktop.systemd.test", "/value/xuzz", "org.freedesktop.systemd.ValueTest", "Value", &error, &reply, "s"); + assert_se(r >= 0); + + r = sd_bus_message_read(reply, "s", &s); + assert_se(r >= 0); + log_info("read %s", s); + + reply = sd_bus_message_unref(reply); + + r = sd_bus_call_method(bus, "org.freedesktop.systemd.test", "/", "org.freedesktop.DBus.Introspectable", "Introspect", &error, &reply, NULL); + assert_se(r >= 0); + + r = sd_bus_message_read(reply, "s", &s); + assert_se(r >= 0); + fputs(s, stdout); + + reply = sd_bus_message_unref(reply); + + r = sd_bus_call_method(bus, "org.freedesktop.systemd.test", "/value", "org.freedesktop.DBus.Introspectable", "Introspect", &error, &reply, NULL); + assert_se(r >= 0); + + r = sd_bus_message_read(reply, "s", &s); + assert_se(r >= 0); + fputs(s, stdout); + + reply = sd_bus_message_unref(reply); + + r = sd_bus_call_method(bus, "org.freedesktop.systemd.test", "/value/a", "org.freedesktop.DBus.Introspectable", "Introspect", &error, &reply, NULL); + assert_se(r >= 0); + + r = sd_bus_message_read(reply, "s", &s); + assert_se(r >= 0); + fputs(s, stdout); + + assert_se(lines = strv_split_newlines(s)); + assert_se(strv_contains(lines, " <node name=\"x\"/>")); + assert_se(strv_contains(lines, " <node name=\"y\"/>")); + assert_se(strv_contains(lines, " <node name=\"z\"/>")); + lines = strv_free(lines); + + reply = sd_bus_message_unref(reply); + + r = sd_bus_call_method(bus, "org.freedesktop.systemd.test", "/value/b", "org.freedesktop.DBus.Introspectable", "Introspect", &error, &reply, NULL); + assert_se(r >= 0); + + r = sd_bus_message_read(reply, "s", &s); + assert_se(r >= 0); + fputs(s, stdout); + + assert_se(lines = strv_split_newlines(s)); + for (unsigned i = 0; i < 30; i++) { + _cleanup_free_ char *n = NULL; + + assert_se(asprintf(&n, " <node name=\"%u\"/>", i) >= 0); + assert_se(strv_contains(lines, n)); + } + lines = strv_free(lines); + + reply = sd_bus_message_unref(reply); + + r = sd_bus_call_method(bus, "org.freedesktop.systemd.test", "/foo", "org.freedesktop.DBus.Properties", "GetAll", &error, &reply, "s", NULL); + assert_se(r >= 0); + + sd_bus_message_dump(reply, stdout, SD_BUS_MESSAGE_DUMP_WITH_HEADER); + + reply = sd_bus_message_unref(reply); + + r = sd_bus_call_method(bus, "org.freedesktop.systemd.test", "/value/a", "org.freedesktop.DBus.Properties", "GetAll", &error, &reply, "s", "org.freedesktop.systemd.ValueTest2"); + assert_se(r < 0); + assert_se(sd_bus_error_has_name(&error, SD_BUS_ERROR_UNKNOWN_INTERFACE)); + sd_bus_error_free(&error); + + r = sd_bus_call_method(bus, "org.freedesktop.systemd.test", "/foo", "org.freedesktop.DBus.ObjectManager", "GetManagedObjects", &error, &reply, NULL); + assert_se(r < 0); + assert_se(sd_bus_error_has_name(&error, SD_BUS_ERROR_UNKNOWN_METHOD)); + sd_bus_error_free(&error); + + r = sd_bus_call_method(bus, "org.freedesktop.systemd.test", "/value", "org.freedesktop.DBus.ObjectManager", "GetManagedObjects", &error, &reply, NULL); + assert_se(r >= 0); + + sd_bus_message_dump(reply, stdout, SD_BUS_MESSAGE_DUMP_WITH_HEADER); + + /* Check that /value/b does not have ObjectManager interface but /value/a does */ + assert_se(sd_bus_message_rewind(reply, 1) > 0); + assert_se(sd_bus_message_enter_container(reply, SD_BUS_TYPE_ARRAY, "{oa{sa{sv}}}") > 0); + while (ASSERT_SE_NONNEG(sd_bus_message_enter_container(reply, SD_BUS_TYPE_DICT_ENTRY, "oa{sa{sv}}")) > 0) { + const char *path = NULL; + assert_se(sd_bus_message_read_basic(reply, 'o', &path) > 0); + if (STR_IN_SET(path, "/value/b", "/value/a")) { + /* Check that there is no object manager interface here */ + bool found_object_manager_interface = false; + assert_se(sd_bus_message_enter_container(reply, SD_BUS_TYPE_ARRAY, "{sa{sv}}") > 0); + while (ASSERT_SE_NONNEG(sd_bus_message_enter_container(reply, SD_BUS_TYPE_DICT_ENTRY, "sa{sv}")) > 0) { + const char *interface_name = NULL; + assert_se(sd_bus_message_read_basic(reply, 's', &interface_name) > 0); + + if (streq(interface_name, "org.freedesktop.DBus.ObjectManager")) { + assert_se(!streq(path, "/value/b")); + found_object_manager_interface = true; + } + + assert_se(sd_bus_message_skip(reply, "a{sv}") >= 0); + assert_se(sd_bus_message_exit_container(reply) >= 0); + } + assert_se(sd_bus_message_exit_container(reply) >= 0); + + if (streq(path, "/value/a")) { + /* ObjectManager must be here */ + assert_se(found_object_manager_interface); + } + + } else + assert_se(sd_bus_message_skip(reply, "a{sa{sv}}") >= 0); + + assert_se(sd_bus_message_exit_container(reply) >= 0); + } + + reply = sd_bus_message_unref(reply); + + r = sd_bus_call_method(bus, "org.freedesktop.systemd.test", "/value/a", "org.freedesktop.systemd.ValueTest", "NotifyTest", &error, NULL, NULL); + assert_se(r >= 0); + + r = sd_bus_process(bus, &reply); + assert_se(r > 0); + + assert_se(sd_bus_message_is_signal(reply, "org.freedesktop.DBus.Properties", "PropertiesChanged")); + sd_bus_message_dump(reply, stdout, SD_BUS_MESSAGE_DUMP_WITH_HEADER); + + reply = sd_bus_message_unref(reply); + + r = sd_bus_call_method(bus, "org.freedesktop.systemd.test", "/value/a", "org.freedesktop.systemd.ValueTest", "NotifyTest2", &error, NULL, NULL); + assert_se(r >= 0); + + r = sd_bus_process(bus, &reply); + assert_se(r > 0); + + assert_se(sd_bus_message_is_signal(reply, "org.freedesktop.DBus.Properties", "PropertiesChanged")); + sd_bus_message_dump(reply, stdout, SD_BUS_MESSAGE_DUMP_WITH_HEADER); + + reply = sd_bus_message_unref(reply); + + r = sd_bus_call_method(bus, "org.freedesktop.systemd.test", "/foo", "org.freedesktop.systemd.test", "EmitInterfacesAdded", &error, NULL, NULL); + assert_se(r >= 0); + + r = sd_bus_process(bus, &reply); + assert_se(r > 0); + + assert_se(sd_bus_message_is_signal(reply, "org.freedesktop.DBus.ObjectManager", "InterfacesAdded")); + sd_bus_message_dump(reply, stdout, SD_BUS_MESSAGE_DUMP_WITH_HEADER); + + reply = sd_bus_message_unref(reply); + + r = sd_bus_call_method(bus, "org.freedesktop.systemd.test", "/foo", "org.freedesktop.systemd.test", "EmitInterfacesRemoved", &error, NULL, NULL); + assert_se(r >= 0); + + r = sd_bus_process(bus, &reply); + assert_se(r > 0); + + assert_se(sd_bus_message_is_signal(reply, "org.freedesktop.DBus.ObjectManager", "InterfacesRemoved")); + sd_bus_message_dump(reply, stdout, SD_BUS_MESSAGE_DUMP_WITH_HEADER); + + reply = sd_bus_message_unref(reply); + + r = sd_bus_call_method(bus, "org.freedesktop.systemd.test", "/foo", "org.freedesktop.systemd.test", "EmitObjectAdded", &error, NULL, NULL); + assert_se(r >= 0); + + r = sd_bus_process(bus, &reply); + assert_se(r > 0); + + assert_se(sd_bus_message_is_signal(reply, "org.freedesktop.DBus.ObjectManager", "InterfacesAdded")); + sd_bus_message_dump(reply, stdout, SD_BUS_MESSAGE_DUMP_WITH_HEADER); + + /* Check if /value/a/x does not have org.freedesktop.DBus.ObjectManager */ + assert_se(sd_bus_message_rewind(reply, 1) >= 0); + const char* should_be_value_a_x = NULL; + assert_se(sd_bus_message_read_basic(reply, 'o', &should_be_value_a_x) > 0); + assert_se(streq(should_be_value_a_x, "/value/a/x")); + assert_se(sd_bus_message_enter_container(reply, SD_BUS_TYPE_ARRAY, "{sa{sv}}") > 0); + while (ASSERT_SE_NONNEG(sd_bus_message_enter_container(reply, SD_BUS_TYPE_DICT_ENTRY, "sa{sv}")) > 0) { + const char* interface_name = NULL; + assert_se(sd_bus_message_read_basic(reply, 's', &interface_name) > 0); + + assert(!streq(interface_name, "org.freedesktop.DBus.ObjectManager")); + + assert_se(sd_bus_message_skip(reply, "a{sv}") >= 0); + + assert_se(sd_bus_message_exit_container(reply) >= 0); + } + + reply = sd_bus_message_unref(reply); + + assert_se(sd_bus_call_method(bus, "org.freedesktop.systemd.test", "/foo", "org.freedesktop.systemd.test", "EmitObjectWithManagerAdded", &error, NULL, NULL) >= 0); + + assert_se(sd_bus_process(bus, &reply) > 0); + + assert_se(sd_bus_message_is_signal(reply, "org.freedesktop.DBus.ObjectManager", "InterfacesAdded")); + sd_bus_message_dump(reply, stdout, SD_BUS_MESSAGE_DUMP_WITH_HEADER); + + /* Check if /value/a has org.freedesktop.DBus.ObjectManager */ + assert_se(sd_bus_message_rewind(reply, 1) >= 0); + const char* should_be_value_a = NULL; + bool found_object_manager = false; + assert_se(sd_bus_message_read_basic(reply, 'o', &should_be_value_a) > 0); + assert_se(streq(should_be_value_a, "/value/a")); + assert_se(sd_bus_message_enter_container(reply, SD_BUS_TYPE_ARRAY, "{sa{sv}}") > 0); + while (ASSERT_SE_NONNEG(sd_bus_message_enter_container(reply, SD_BUS_TYPE_DICT_ENTRY, "sa{sv}")) > 0) { + const char* interface_name = NULL; + assert_se(sd_bus_message_read_basic(reply, 's', &interface_name)); + + if (streq(interface_name, "org.freedesktop.DBus.ObjectManager")) { + found_object_manager = true; + break; + } + + assert_se(sd_bus_message_skip(reply, "a{sv}") >= 0); + + assert_se(sd_bus_message_exit_container(reply) >= 0); + } + assert_se(found_object_manager); + + reply = sd_bus_message_unref(reply); + + r = sd_bus_call_method(bus, "org.freedesktop.systemd.test", "/foo", "org.freedesktop.systemd.test", "EmitObjectRemoved", &error, NULL, NULL); + assert_se(r >= 0); + + r = sd_bus_process(bus, &reply); + assert_se(r > 0); + + assert_se(sd_bus_message_is_signal(reply, "org.freedesktop.DBus.ObjectManager", "InterfacesRemoved")); + sd_bus_message_dump(reply, stdout, SD_BUS_MESSAGE_DUMP_WITH_HEADER); + + /* Check if /value/a/x does not have org.freedesktop.DBus.ObjectManager */ + assert_se(sd_bus_message_rewind(reply, 1) >= 0); + should_be_value_a_x = NULL; + assert_se(sd_bus_message_read_basic(reply, 'o', &should_be_value_a_x) > 0); + assert_se(streq(should_be_value_a_x, "/value/a/x")); + assert_se(sd_bus_message_enter_container(reply, SD_BUS_TYPE_ARRAY, "s") > 0); + const char* deleted_interface_name = NULL; + while (ASSERT_SE_NONNEG(sd_bus_message_read_basic(reply, 's', &deleted_interface_name)) > 0) { + assert(!streq(deleted_interface_name, "org.freedesktop.DBus.ObjectManager")); + } + assert_se(sd_bus_message_exit_container(reply) >= 0); + + reply = sd_bus_message_unref(reply); + + r = sd_bus_call_method(bus, "org.freedesktop.systemd.test", "/foo", "org.freedesktop.systemd.test", "Exit", &error, NULL, NULL); + assert_se(r >= 0); + + sd_bus_flush(bus); + + return 0; +} + +int main(int argc, char *argv[]) { + struct context c = {}; + pthread_t s; + void *p; + int r, q; + + test_setup_logging(LOG_DEBUG); + + c.automatic_integer_property = 4711; + assert_se(c.automatic_string_property = strdup("dudeldu")); + + assert_se(socketpair(AF_UNIX, SOCK_STREAM, 0, c.fds) >= 0); + + r = pthread_create(&s, NULL, server, &c); + if (r != 0) + return -r; + + r = client(&c); + + q = pthread_join(s, &p); + if (q != 0) + return -q; + + if (r < 0) + return r; + + if (PTR_TO_INT(p) < 0) + return PTR_TO_INT(p); + + free(c.something); + free(c.automatic_string_property); + + return EXIT_SUCCESS; +} diff --git a/src/libsystemd/sd-bus/test-bus-peersockaddr.c b/src/libsystemd/sd-bus/test-bus-peersockaddr.c new file mode 100644 index 0000000..79556e8 --- /dev/null +++ b/src/libsystemd/sd-bus/test-bus-peersockaddr.c @@ -0,0 +1,127 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <pthread.h> +#include <unistd.h> + +#include "sd-bus.h" + +#include "fd-util.h" +#include "process-util.h" +#include "socket-util.h" +#include "tests.h" + +static void *server(void *p) { + _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL; + _cleanup_close_ int listen_fd = PTR_TO_INT(p), fd = -EBADF; + _cleanup_(sd_bus_creds_unrefp) sd_bus_creds *c = NULL; + _cleanup_free_ char *our_comm = NULL; + sd_id128_t id; + int r; + + assert_se(sd_id128_randomize(&id) >= 0); + + fd = accept4(listen_fd, NULL, NULL, SOCK_CLOEXEC|SOCK_NONBLOCK); + assert_se(fd >= 0); + + assert_se(sd_bus_new(&bus) >= 0); + assert_se(sd_bus_set_fd(bus, fd, fd) >= 0); + TAKE_FD(fd); + assert_se(sd_bus_set_server(bus, true, id) >= 0); + assert_se(sd_bus_negotiate_creds(bus, 1, SD_BUS_CREDS_EUID|SD_BUS_CREDS_EGID|SD_BUS_CREDS_PID|SD_BUS_CREDS_COMM|SD_BUS_CREDS_DESCRIPTION) >= 0); + + assert_se(sd_bus_start(bus) >= 0); + + assert_se(sd_bus_get_owner_creds(bus, SD_BUS_CREDS_EUID|SD_BUS_CREDS_EGID|SD_BUS_CREDS_PID|SD_BUS_CREDS_COMM|SD_BUS_CREDS_DESCRIPTION, &c) >= 0); + + uid_t u; + assert_se(sd_bus_creds_get_euid(c, &u) >= 0); + assert_se(u == getuid()); + + gid_t g; + assert_se(sd_bus_creds_get_egid(c, &g) >= 0); + assert_se(g == getgid()); + + pid_t pid; + assert_se(sd_bus_creds_get_pid(c, &pid) >= 0); + assert_se(pid == getpid_cached()); + + const char *comm; + assert_se(sd_bus_creds_get_comm(c, &comm) >= 0); + assert_se(pid_get_comm(0, &our_comm) >= 0); + assert_se(streq_ptr(comm, our_comm)); + + const char *description; + assert_se(sd_bus_creds_get_description(c, &description) >= 0); + assert_se(streq_ptr(description, "wuffwuff")); + + for (;;) { + _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL; + + r = sd_bus_process(bus, &m); + assert_se(r >= 0); + + if (r == 0) { + assert_se(sd_bus_wait(bus, UINT64_MAX) >= 0); + continue; + } + + if (m && sd_bus_message_is_method_call(m, "foo.foo", "Foo") > 0) { + assert_se(sd_bus_reply_method_return(m, "s", "bar") >= 0); + break; + } + } + + return NULL; +} + +static void* client(void *p) { + _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL; + _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL; + const char *z; + + assert_se(sd_bus_new(&bus) >= 0); + assert_se(sd_bus_set_description(bus, "wuffwuff") >= 0); + assert_se(sd_bus_set_address(bus, p) >= 0); + assert_se(sd_bus_start(bus) >= 0); + + assert_se(sd_bus_call_method(bus, "foo.foo", "/foo", "foo.foo", "Foo", NULL, &reply, "s", "foo") >= 0); + + assert_se(sd_bus_message_read(reply, "s", &z) >= 0); + assert_se(streq_ptr(z, "bar")); + + return NULL; +} + +TEST(description) { + _cleanup_free_ char *a = NULL; + _cleanup_close_ int fd = -EBADF; + union sockaddr_union sa = { + .un.sun_family = AF_UNIX, + }; + socklen_t salen; + pthread_t s, c; + + fd = socket(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC, 0); + assert_se(fd >= 0); + + assert_se(bind(fd, &sa.sa, offsetof(struct sockaddr_un, sun_path)) >= 0); /* force auto-bind */ + + assert_se(listen(fd, 1) >= 0); + + salen = sizeof(sa); + assert_se(getsockname(fd, &sa.sa, &salen) >= 0); + assert_se(salen >= offsetof(struct sockaddr_un, sun_path)); + assert_se(sa.un.sun_path[0] == 0); + + assert_se(asprintf(&a, "unix:abstract=%s", sa.un.sun_path + 1) >= 0); + + assert_se(pthread_create(&s, NULL, server, INT_TO_PTR(fd)) == 0); + TAKE_FD(fd); + + assert_se(pthread_create(&c, NULL, client, a) == 0); + + assert_se(pthread_join(s, NULL) == 0); + assert_se(pthread_join(c, NULL) == 0); +} + +DEFINE_TEST_MAIN(LOG_INFO); diff --git a/src/libsystemd/sd-bus/test-bus-queue-ref-cycle.c b/src/libsystemd/sd-bus/test-bus-queue-ref-cycle.c new file mode 100644 index 0000000..7c2fa72 --- /dev/null +++ b/src/libsystemd/sd-bus/test-bus-queue-ref-cycle.c @@ -0,0 +1,56 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#include "sd-bus.h" + +#include "main-func.h" +#include "tests.h" + +static int test_ref_unref(void) { + sd_bus_message *m = NULL; + sd_bus *bus = NULL; + int r; + + /* This test will result in a memory leak in <= v240, but not on v241. Hence to be really useful it + * should be run through a leak tracker such as valgrind. */ + + r = sd_bus_open_system(&bus); + if (r < 0) + return log_tests_skipped("Failed to connect to bus"); + + /* Create a message and enqueue it (this shouldn't send it though as the connection setup is not complete yet) */ + assert_se(sd_bus_message_new_method_call(bus, &m, "foo.bar", "/foo", "quux.quux", "waldo") >= 0); + assert_se(sd_bus_send(bus, m, NULL) >= 0); + + /* Let's now unref the message first and the bus second. */ + m = sd_bus_message_unref(m); + bus = sd_bus_unref(bus); + + /* We should have a memory leak now on <= v240. Let's do this again, but destroy in the opposite + * order. On v240 that too should be a leak. */ + + r = sd_bus_open_system(&bus); + if (r < 0) + return log_tests_skipped("Failed to connect to bus"); + + assert_se(sd_bus_message_new_method_call(bus, &m, "foo.bar", "/foo", "quux.quux", "waldo") >= 0); + assert_se(sd_bus_send(bus, m, NULL) >= 0); + + /* Let's now unref things in the opposite order */ + bus = sd_bus_unref(bus); + m = sd_bus_message_unref(m); + + return 0; +} + +static int run(int argc, char *argv[]) { + int r; + + test_setup_logging(LOG_INFO); + + r = test_ref_unref(); + if (r < 0) + return r; + + return 0; +} + +DEFINE_MAIN_FUNCTION(run); diff --git a/src/libsystemd/sd-bus/test-bus-server.c b/src/libsystemd/sd-bus/test-bus-server.c new file mode 100644 index 0000000..8049e33 --- /dev/null +++ b/src/libsystemd/sd-bus/test-bus-server.c @@ -0,0 +1,185 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <pthread.h> +#include <stdlib.h> + +#include "sd-bus.h" + +#include "bus-internal.h" +#include "log.h" +#include "macro.h" +#include "memory-util.h" +#include "string-util.h" +#include "tests.h" + +struct context { + int fds[2]; + + bool client_negotiate_unix_fds; + bool server_negotiate_unix_fds; + + bool client_anonymous_auth; + bool server_anonymous_auth; +}; + +static int _server(struct context *c) { + _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL; + sd_id128_t id; + bool quit = false; + int r; + + assert_se(sd_id128_randomize(&id) >= 0); + + assert_se(sd_bus_new(&bus) >= 0); + assert_se(sd_bus_set_fd(bus, c->fds[0], c->fds[0]) >= 0); + assert_se(sd_bus_set_server(bus, 1, id) >= 0); + assert_se(sd_bus_set_anonymous(bus, c->server_anonymous_auth) >= 0); + assert_se(sd_bus_negotiate_fds(bus, c->server_negotiate_unix_fds) >= 0); + assert_se(sd_bus_start(bus) >= 0); + + while (!quit) { + _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL, *reply = NULL; + + r = sd_bus_process(bus, &m); + if (r < 0) + return log_error_errno(r, "Failed to process requests: %m"); + + if (r == 0) { + r = sd_bus_wait(bus, UINT64_MAX); + if (r < 0) + return log_error_errno(r, "Failed to wait: %m"); + continue; + } + + if (!m) + continue; + + log_info("Got message! member=%s", strna(sd_bus_message_get_member(m))); + + if (sd_bus_message_is_method_call(m, "org.freedesktop.systemd.test", "Exit")) { + + assert_se((sd_bus_can_send(bus, 'h') >= 1) == + (c->server_negotiate_unix_fds && c->client_negotiate_unix_fds)); + + r = sd_bus_message_new_method_return(m, &reply); + if (r < 0) + return log_error_errno(r, "Failed to allocate return: %m"); + + quit = true; + + } else if (sd_bus_message_is_method_call(m, NULL, NULL)) { + r = sd_bus_message_new_method_error( + m, + &reply, + &SD_BUS_ERROR_MAKE_CONST(SD_BUS_ERROR_UNKNOWN_METHOD, "Unknown method.")); + if (r < 0) + return log_error_errno(r, "Failed to allocate return: %m"); + } + + if (reply) { + r = sd_bus_send(bus, reply, NULL); + if (r < 0) + return log_error_errno(r, "Failed to send reply: %m"); + } + } + + return 0; +} + +static void* server(void *p) { + return INT_TO_PTR(_server(p)); +} + +static int client(struct context *c) { + _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL, *reply = NULL; + _cleanup_(sd_bus_unrefp) sd_bus *bus = NULL; + _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL; + int r; + + assert_se(sd_bus_new(&bus) >= 0); + assert_se(sd_bus_set_fd(bus, c->fds[1], c->fds[1]) >= 0); + assert_se(sd_bus_negotiate_fds(bus, c->client_negotiate_unix_fds) >= 0); + assert_se(sd_bus_set_anonymous(bus, c->client_anonymous_auth) >= 0); + assert_se(sd_bus_start(bus) >= 0); + + r = sd_bus_message_new_method_call( + bus, + &m, + "org.freedesktop.systemd.test", + "/", + "org.freedesktop.systemd.test", + "Exit"); + if (r < 0) + return log_error_errno(r, "Failed to allocate method call: %m"); + + r = sd_bus_call(bus, m, 0, &error, &reply); + if (r < 0) + return log_error_errno(r, "Failed to issue method call: %s", bus_error_message(&error, r)); + + return 0; +} + +static int test_one(bool client_negotiate_unix_fds, bool server_negotiate_unix_fds, + bool client_anonymous_auth, bool server_anonymous_auth) { + + struct context c; + pthread_t s; + void *p; + int r, q; + + zero(c); + + assert_se(socketpair(AF_UNIX, SOCK_STREAM, 0, c.fds) >= 0); + + c.client_negotiate_unix_fds = client_negotiate_unix_fds; + c.server_negotiate_unix_fds = server_negotiate_unix_fds; + c.client_anonymous_auth = client_anonymous_auth; + c.server_anonymous_auth = server_anonymous_auth; + + r = pthread_create(&s, NULL, server, &c); + if (r != 0) + return -r; + + r = client(&c); + + q = pthread_join(s, &p); + if (q != 0) + return -q; + + if (r < 0) + return r; + + if (PTR_TO_INT(p) < 0) + return PTR_TO_INT(p); + + return 0; +} + +int main(int argc, char *argv[]) { + int r; + + test_setup_logging(LOG_DEBUG); + + r = test_one(true, true, false, false); + assert_se(r >= 0); + + r = test_one(true, false, false, false); + assert_se(r >= 0); + + r = test_one(false, true, false, false); + assert_se(r >= 0); + + r = test_one(false, false, false, false); + assert_se(r >= 0); + + r = test_one(true, true, true, true); + assert_se(r >= 0); + + r = test_one(true, true, false, true); + assert_se(r >= 0); + + r = test_one(true, true, true, false); + assert_se(r == -EPERM); + + return EXIT_SUCCESS; +} diff --git a/src/libsystemd/sd-bus/test-bus-signature.c b/src/libsystemd/sd-bus/test-bus-signature.c new file mode 100644 index 0000000..5a4c811 --- /dev/null +++ b/src/libsystemd/sd-bus/test-bus-signature.c @@ -0,0 +1,150 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include "bus-internal.h" +#include "bus-signature.h" +#include "log.h" +#include "string-util.h" +#include "tests.h" + +int main(int argc, char *argv[]) { + char prefix[256]; + int r; + + test_setup_logging(LOG_DEBUG); + + assert_se(signature_is_single("y", false)); + assert_se(signature_is_single("u", false)); + assert_se(signature_is_single("v", false)); + assert_se(signature_is_single("as", false)); + assert_se(signature_is_single("(ss)", false)); + assert_se(!signature_is_single("()", false)); + assert_se(!signature_is_single("(()()()()())", false)); + assert_se(!signature_is_single("(((())))", false)); + assert_se(signature_is_single("((((s))))", false)); + assert_se(signature_is_single("{ss}", true)); + assert_se(signature_is_single("a{ss}", false)); + assert_se(!signature_is_single("uu", false)); + assert_se(!signature_is_single("", false)); + assert_se(!signature_is_single("(", false)); + assert_se(!signature_is_single(")", false)); + assert_se(!signature_is_single("())", false)); + assert_se(!signature_is_single("((())", false)); + assert_se(!signature_is_single("{)", false)); + assert_se(!signature_is_single("{}", true)); + assert_se(!signature_is_single("{sss}", true)); + assert_se(!signature_is_single("{s}", true)); + assert_se(!signature_is_single("{ss}", false)); + assert_se(!signature_is_single("{ass}", true)); + assert_se(!signature_is_single("a}", true)); + + assert_se(signature_is_pair("yy")); + assert_se(signature_is_pair("ss")); + assert_se(signature_is_pair("sas")); + assert_se(signature_is_pair("sv")); + assert_se(signature_is_pair("sa(vs)")); + assert_se(!signature_is_pair("")); + assert_se(!signature_is_pair("va")); + assert_se(!signature_is_pair("sss")); + assert_se(!signature_is_pair("{s}ss")); + + assert_se(signature_is_valid("ssa{ss}sssub", true)); + assert_se(signature_is_valid("ssa{ss}sssub", false)); + assert_se(signature_is_valid("{ss}", true)); + assert_se(!signature_is_valid("{ss}", false)); + assert_se(signature_is_valid("", true)); + assert_se(signature_is_valid("", false)); + + assert_se(signature_is_valid("sssusa(uuubbba(uu)uuuu)a{u(uuuvas)}", false)); + + assert_se(!signature_is_valid("a", false)); + assert_se(signature_is_valid("as", false)); + assert_se(signature_is_valid("aas", false)); + assert_se(signature_is_valid("aaas", false)); + assert_se(signature_is_valid("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaad", false)); + assert_se(signature_is_valid("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaas", false)); + assert_se(!signature_is_valid("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaau", false)); + + assert_se(signature_is_valid("((((((((((((((((((((((((((((((((s))))))))))))))))))))))))))))))))", false)); + assert_se(!signature_is_valid("((((((((((((((((((((((((((((((((()))))))))))))))))))))))))))))))))", false)); + + assert_se(namespace_complex_pattern("", "")); + assert_se(namespace_complex_pattern("foobar", "foobar")); + assert_se(namespace_complex_pattern("foobar.waldo", "foobar.waldo")); + assert_se(namespace_complex_pattern("foobar.", "foobar.waldo")); + assert_se(namespace_complex_pattern("foobar.waldo", "foobar.")); + assert_se(!namespace_complex_pattern("foobar.waldo", "foobar")); + assert_se(!namespace_complex_pattern("foobar", "foobar.waldo")); + assert_se(!namespace_complex_pattern("", "foo")); + assert_se(!namespace_complex_pattern("foo", "")); + assert_se(!namespace_complex_pattern("foo.", "")); + + assert_se(path_complex_pattern("", "")); + assert_se(!path_complex_pattern("", "/")); + assert_se(!path_complex_pattern("/", "")); + assert_se(path_complex_pattern("/", "/")); + assert_se(path_complex_pattern("/foobar/", "/")); + assert_se(!path_complex_pattern("/foobar/", "/foobar")); + assert_se(path_complex_pattern("/foobar", "/foobar")); + assert_se(!path_complex_pattern("/foobar", "/foobar/")); + assert_se(!path_complex_pattern("/foobar", "/foobar/waldo")); + assert_se(path_complex_pattern("/foobar/", "/foobar/waldo")); + assert_se(path_complex_pattern("/foobar/waldo", "/foobar/")); + + assert_se(path_simple_pattern("/foo/", "/foo/bar/waldo")); + + assert_se(namespace_simple_pattern("", "")); + assert_se(namespace_simple_pattern("", ".foobar")); + assert_se(namespace_simple_pattern("foobar", "foobar")); + assert_se(namespace_simple_pattern("foobar.waldo", "foobar.waldo")); + assert_se(namespace_simple_pattern("foobar", "foobar.waldo")); + assert_se(!namespace_simple_pattern("foobar.waldo", "foobar")); + assert_se(!namespace_simple_pattern("", "foo")); + assert_se(!namespace_simple_pattern("foo", "")); + assert_se(namespace_simple_pattern("foo.", "foo.bar.waldo")); + + assert_se(streq(object_path_startswith("/foo/bar", "/foo"), "bar")); + assert_se(streq(object_path_startswith("/foo", "/foo"), "")); + assert_se(streq(object_path_startswith("/foo", "/"), "foo")); + assert_se(streq(object_path_startswith("/", "/"), "")); + assert_se(!object_path_startswith("/foo", "/bar")); + assert_se(!object_path_startswith("/", "/bar")); + assert_se(!object_path_startswith("/foo", "")); + + assert_se(object_path_is_valid("/foo/bar")); + assert_se(object_path_is_valid("/foo")); + assert_se(object_path_is_valid("/")); + assert_se(object_path_is_valid("/foo5")); + assert_se(object_path_is_valid("/foo_5")); + assert_se(!object_path_is_valid("")); + assert_se(!object_path_is_valid("/foo/")); + assert_se(!object_path_is_valid("//")); + assert_se(!object_path_is_valid("//foo")); + assert_se(!object_path_is_valid("/foo//bar")); + assert_se(!object_path_is_valid("/foo/aaaäöä")); + + OBJECT_PATH_FOREACH_PREFIX(prefix, "/") { + log_info("<%s>", prefix); + assert_not_reached(); + } + + r = 0; + OBJECT_PATH_FOREACH_PREFIX(prefix, "/xxx") { + log_info("<%s>", prefix); + assert_se(streq(prefix, "/")); + assert_se(r == 0); + r++; + } + assert_se(r == 1); + + r = 0; + OBJECT_PATH_FOREACH_PREFIX(prefix, "/xxx/yyy/zzz") { + log_info("<%s>", prefix); + assert_se(r != 0 || streq(prefix, "/xxx/yyy")); + assert_se(r != 1 || streq(prefix, "/xxx")); + assert_se(r != 2 || streq(prefix, "/")); + r++; + } + assert_se(r == 3); + + return 0; +} diff --git a/src/libsystemd/sd-bus/test-bus-track.c b/src/libsystemd/sd-bus/test-bus-track.c new file mode 100644 index 0000000..5604e84 --- /dev/null +++ b/src/libsystemd/sd-bus/test-bus-track.c @@ -0,0 +1,151 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <errno.h> +#include <sys/socket.h> + +#include "sd-bus.h" + +#include "macro.h" +#include "tests.h" + +static bool track_cb_called_x = false; +static bool track_cb_called_y = false; +static bool track_destroy_called_z = false; + +static int track_cb_x(sd_bus_track *t, void *userdata) { + + log_error("TRACK CB X"); + + assert_se(!track_cb_called_x); + track_cb_called_x = true; + + /* This means b's name disappeared. Let's now disconnect, to make sure the track handling on disconnect works + * as it should. */ + + assert_se(shutdown(sd_bus_get_fd(sd_bus_track_get_bus(t)), SHUT_RDWR) >= 0); + return 1; +} + +static int track_cb_y(sd_bus_track *t, void *userdata) { + + log_error("TRACK CB Y"); + + assert_se(!track_cb_called_y); + track_cb_called_y = true; + + /* We got disconnected, let's close everything */ + + assert_se(sd_event_exit(sd_bus_get_event(sd_bus_track_get_bus(t)), EXIT_SUCCESS) >= 0); + + return 0; +} + +static int track_cb_z(sd_bus_track *t, void *userdata) { + assert_not_reached(); +} + +static void track_destroy_z(void *userdata) { + track_destroy_called_z = true; +} + +int main(int argc, char *argv[]) { + _cleanup_(sd_event_unrefp) sd_event *event = NULL; + _cleanup_(sd_bus_track_unrefp) sd_bus_track *x = NULL, *y = NULL, *z = NULL; + _cleanup_(sd_bus_unrefp) sd_bus *a = NULL, *b = NULL; + bool use_system_bus = false; + const char *unique; + int r; + + test_setup_logging(LOG_INFO); + + assert_se(sd_event_default(&event) >= 0); + + r = sd_bus_open_user(&a); + if (IN_SET(r, -ECONNREFUSED, -ENOENT, -ENOMEDIUM)) { + r = sd_bus_open_system(&a); + if (IN_SET(r, -ECONNREFUSED, -ENOENT)) + return log_tests_skipped("Failed to connect to bus"); + use_system_bus = true; + } + assert_se(r >= 0); + + assert_se(sd_bus_attach_event(a, event, SD_EVENT_PRIORITY_NORMAL) >= 0); + + if (use_system_bus) + assert_se(sd_bus_open_system(&b) >= 0); + else + assert_se(sd_bus_open_user(&b) >= 0); + + assert_se(sd_bus_attach_event(b, event, SD_EVENT_PRIORITY_NORMAL) >= 0); + + /* Watch b's name from a */ + assert_se(sd_bus_track_new(a, &x, track_cb_x, NULL) >= 0); + + assert_se(sd_bus_get_unique_name(b, &unique) >= 0); + + assert_se(sd_bus_track_add_name(x, unique) >= 0); + + /* Watch's a's own name from a */ + assert_se(sd_bus_track_new(a, &y, track_cb_y, NULL) >= 0); + + assert_se(sd_bus_get_unique_name(a, &unique) >= 0); + + assert_se(sd_bus_track_add_name(y, unique) >= 0); + + /* Basic tests. */ + assert_se(sd_bus_track_new(a, &z, track_cb_z, NULL) >= 0); + + /* non-recursive case */ + assert_se(sd_bus_track_set_recursive(z, false) >= 0); + assert_se(sd_bus_track_get_recursive(z) == 0); + assert_se(!sd_bus_track_contains(z, unique)); + assert_se(sd_bus_track_count_name(z, unique) == 0); + assert_se(sd_bus_track_remove_name(z, unique) == 0); + assert_se(sd_bus_track_add_name(z, unique) >= 0); + assert_se(sd_bus_track_add_name(z, unique) >= 0); + assert_se(sd_bus_track_add_name(z, unique) >= 0); + assert_se(sd_bus_track_set_recursive(z, true) == -EBUSY); + assert_se(sd_bus_track_contains(z, unique)); + assert_se(sd_bus_track_count_name(z, unique) == 1); + assert_se(sd_bus_track_remove_name(z, unique) == 1); + assert_se(!sd_bus_track_contains(z, unique)); + assert_se(sd_bus_track_count_name(z, unique) == 0); + assert_se(sd_bus_track_remove_name(z, unique) == 0); + + /* recursive case */ + assert_se(sd_bus_track_set_recursive(z, true) >= 0); + assert_se(sd_bus_track_get_recursive(z) == 1); + assert_se(!sd_bus_track_contains(z, unique)); + assert_se(sd_bus_track_count_name(z, unique) == 0); + assert_se(sd_bus_track_remove_name(z, unique) == 0); + assert_se(sd_bus_track_add_name(z, unique) >= 0); + assert_se(sd_bus_track_add_name(z, unique) >= 0); + assert_se(sd_bus_track_add_name(z, unique) >= 0); + assert_se(sd_bus_track_set_recursive(z, false) == -EBUSY); + assert_se(sd_bus_track_contains(z, unique)); + assert_se(sd_bus_track_count_name(z, unique) == 3); + assert_se(sd_bus_track_remove_name(z, unique) == 1); + assert_se(sd_bus_track_contains(z, unique)); + assert_se(sd_bus_track_count_name(z, unique) == 2); + assert_se(sd_bus_track_remove_name(z, unique) == 1); + assert_se(sd_bus_track_contains(z, unique)); + assert_se(sd_bus_track_count_name(z, unique) == 1); + assert_se(sd_bus_track_remove_name(z, unique) == 1); + assert_se(!sd_bus_track_contains(z, unique)); + assert_se(sd_bus_track_count_name(z, unique) == 0); + assert_se(sd_bus_track_remove_name(z, unique) == 0); + + assert_se(sd_bus_track_set_destroy_callback(z, track_destroy_z) >= 0); + z = sd_bus_track_unref(z); + assert_se(track_destroy_called_z); + + /* Now make b's name disappear */ + sd_bus_close(b); + + assert_se(sd_event_loop(event) >= 0); + + assert_se(track_cb_called_x); + assert_se(track_cb_called_y); + + return 0; +} diff --git a/src/libsystemd/sd-bus/test-bus-vtable-cc.cc b/src/libsystemd/sd-bus/test-bus-vtable-cc.cc new file mode 120000 index 0000000..abee398 --- /dev/null +++ b/src/libsystemd/sd-bus/test-bus-vtable-cc.cc @@ -0,0 +1 @@ +test-bus-vtable.c
\ No newline at end of file diff --git a/src/libsystemd/sd-bus/test-bus-vtable.c b/src/libsystemd/sd-bus/test-bus-vtable.c new file mode 100644 index 0000000..fe12238 --- /dev/null +++ b/src/libsystemd/sd-bus/test-bus-vtable.c @@ -0,0 +1,76 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <stdbool.h> +#include <stddef.h> + +/* We use system assert.h here, because we don't want to keep macro.h and log.h C++ compatible */ +#undef NDEBUG +#include <assert.h> +#include <errno.h> +#include <stdio.h> + +#include "sd-bus-vtable.h" + +#ifndef __cplusplus +# include "bus-objects.h" +#endif + +#include "test-vtable-data.h" + +#define DEFAULT_BUS_PATH "unix:path=/run/dbus/system_bus_socket" + +static struct context c = {}; +static int happy_finder_object = 0; + +static int happy_finder(sd_bus *bus, const char *path, const char *interface, void *userdata, void **found, sd_bus_error *error) { + assert(userdata); + assert(userdata == &c); + +#ifndef __cplusplus + log_info("%s called", __func__); +#endif + + happy_finder_object++; + *found = &happy_finder_object; + return 1; /* found */ +} + +static void test_vtable(void) { + sd_bus *bus = NULL; + int r; + + assert(sd_bus_new(&bus) >= 0); + + assert(sd_bus_add_object_vtable(bus, NULL, "/foo", "org.freedesktop.systemd.testVtable", test_vtable_2, &c) >= 0); + assert(sd_bus_add_object_vtable(bus, NULL, "/foo", "org.freedesktop.systemd.testVtable2", test_vtable_2, &c) >= 0); + /* the cast on the line below is needed to test with the old version of the table */ + assert(sd_bus_add_object_vtable(bus, NULL, "/foo", "org.freedesktop.systemd.testVtable221", + (const sd_bus_vtable *)vtable_format_221, &c) >= 0); + + assert(sd_bus_add_fallback_vtable(bus, NULL, "/fallback", "org.freedesktop.systemd.testVtable2", test_vtable_2, happy_finder, &c) >= 0); + + assert(sd_bus_set_address(bus, DEFAULT_BUS_PATH) >= 0); + r = sd_bus_start(bus); + assert(r == 0 || /* success */ + r == -ENOENT /* dbus is inactive */ ); + +#ifndef __cplusplus + _cleanup_free_ char *s, *s2; + + assert_se(introspect_path(bus, "/foo", NULL, false, true, NULL, &s, NULL) == 1); + fputs(s, stdout); + + assert_se(introspect_path(bus, "/fallback", NULL, false, true, NULL, &s2, NULL) == 1); + fputs(s2, stdout); + + assert_se(happy_finder_object == 1); +#endif + + sd_bus_unref(bus); +} + +int main(int argc, char **argv) { + test_vtable(); + + return 0; +} diff --git a/src/libsystemd/sd-bus/test-bus-watch-bind.c b/src/libsystemd/sd-bus/test-bus-watch-bind.c new file mode 100644 index 0000000..7f73c6e --- /dev/null +++ b/src/libsystemd/sd-bus/test-bus-watch-bind.c @@ -0,0 +1,228 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <pthread.h> + +#include "sd-bus.h" +#include "sd-event.h" +#include "sd-id128.h" + +#include "alloc-util.h" +#include "bus-internal.h" +#include "fd-util.h" +#include "fs-util.h" +#include "mkdir.h" +#include "path-util.h" +#include "random-util.h" +#include "rm-rf.h" +#include "socket-util.h" +#include "string-util.h" +#include "tmpfile-util.h" +#include "tests.h" + +static int method_foobar(sd_bus_message *m, void *userdata, sd_bus_error *ret_error) { + log_info("Got Foobar() call."); + + assert_se(sd_event_exit(sd_bus_get_event(sd_bus_message_get_bus(m)), 0) >= 0); + return sd_bus_reply_method_return(m, NULL); +} + +static int method_exit(sd_bus_message *m, void *userdata, sd_bus_error *ret_error) { + log_info("Got Exit() call"); + + assert_se(sd_bus_reply_method_return(m, NULL) >= 0); + /* Simulate D-Bus going away to test the bus_exit_now() path with exit_on_disconnect set */ + bus_enter_closing(sd_bus_message_get_bus(m)); + return 0; +} + +static const sd_bus_vtable vtable[] = { + SD_BUS_VTABLE_START(0), + SD_BUS_METHOD("Foobar", NULL, NULL, method_foobar, SD_BUS_VTABLE_UNPRIVILEGED), + SD_BUS_METHOD("Exit", NULL, NULL, method_exit, SD_BUS_VTABLE_UNPRIVILEGED), + SD_BUS_VTABLE_END, +}; + +static void* thread_server(void *p) { + _cleanup_free_ char *suffixed = NULL, *suffixed2 = NULL, *d = NULL; + _cleanup_close_ int fd = -EBADF; + union sockaddr_union u; + const char *path = p; + int r; + + log_debug("Initializing server"); + + /* Let's play some games, by slowly creating the socket directory, and renaming it in the middle */ + usleep_safe(100 * USEC_PER_MSEC); + + assert_se(mkdir_parents(path, 0755) >= 0); + usleep_safe(100 * USEC_PER_MSEC); + + assert_se(path_extract_directory(path, &d) >= 0); + assert_se(asprintf(&suffixed, "%s.%" PRIx64, d, random_u64()) >= 0); + assert_se(rename(d, suffixed) >= 0); + usleep_safe(100 * USEC_PER_MSEC); + + assert_se(asprintf(&suffixed2, "%s.%" PRIx64, d, random_u64()) >= 0); + assert_se(symlink(suffixed2, d) >= 0); + usleep_safe(100 * USEC_PER_MSEC); + + assert_se(symlink(basename(suffixed), suffixed2) >= 0); + usleep_safe(100 * USEC_PER_MSEC); + + socklen_t sa_len; + r = sockaddr_un_set_path(&u.un, path); + assert_se(r >= 0); + sa_len = r; + + fd = socket(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC, 0); + assert_se(fd >= 0); + + assert_se(bind(fd, &u.sa, sa_len) >= 0); + usleep_safe(100 * USEC_PER_MSEC); + + assert_se(listen(fd, SOMAXCONN_DELUXE) >= 0); + usleep_safe(100 * USEC_PER_MSEC); + + assert_se(touch(path) >= 0); + usleep_safe(100 * USEC_PER_MSEC); + + log_debug("Initialized server"); + + for (;;) { + _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL; + _cleanup_(sd_event_unrefp) sd_event *event = NULL; + sd_id128_t id; + int bus_fd, code; + + assert_se(sd_id128_randomize(&id) >= 0); + + assert_se(sd_event_new(&event) >= 0); + + bus_fd = accept4(fd, NULL, NULL, SOCK_NONBLOCK|SOCK_CLOEXEC); + assert_se(bus_fd >= 0); + + log_debug("Accepted server connection"); + + assert_se(sd_bus_new(&bus) >= 0); + assert_se(sd_bus_set_exit_on_disconnect(bus, true) >= 0); + assert_se(sd_bus_set_description(bus, "server") >= 0); + assert_se(sd_bus_set_fd(bus, bus_fd, bus_fd) >= 0); + assert_se(sd_bus_set_server(bus, true, id) >= 0); + /* assert_se(sd_bus_set_anonymous(bus, true) >= 0); */ + + assert_se(sd_bus_attach_event(bus, event, 0) >= 0); + + assert_se(sd_bus_add_object_vtable(bus, NULL, "/foo", "foo.TestInterface", vtable, NULL) >= 0); + + assert_se(sd_bus_start(bus) >= 0); + + assert_se(sd_event_loop(event) >= 0); + + assert_se(sd_event_get_exit_code(event, &code) >= 0); + + if (code > 0) + break; + } + + log_debug("Server done"); + + return NULL; +} + +static void* thread_client1(void *p) { + _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL; + _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL; + const char *path = p, *t; + int r; + + log_debug("Initializing client1"); + + assert_se(sd_bus_new(&bus) >= 0); + assert_se(sd_bus_set_description(bus, "client1") >= 0); + + t = strjoina("unix:path=", path); + assert_se(sd_bus_set_address(bus, t) >= 0); + assert_se(sd_bus_set_watch_bind(bus, true) >= 0); + assert_se(sd_bus_start(bus) >= 0); + + r = sd_bus_call_method(bus, "foo.bar", "/foo", "foo.TestInterface", "Foobar", &error, NULL, NULL); + assert_se(r >= 0); + + log_debug("Client1 done"); + + return NULL; +} + +static int client2_callback(sd_bus_message *m, void *userdata, sd_bus_error *ret_error) { + assert_se(sd_bus_message_is_method_error(m, NULL) == 0); + assert_se(sd_event_exit(sd_bus_get_event(sd_bus_message_get_bus(m)), 0) >= 0); + return 0; +} + +static void* thread_client2(void *p) { + _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL; + _cleanup_(sd_event_unrefp) sd_event *event = NULL; + const char *path = p, *t; + + log_debug("Initializing client2"); + + assert_se(sd_event_new(&event) >= 0); + assert_se(sd_bus_new(&bus) >= 0); + assert_se(sd_bus_set_description(bus, "client2") >= 0); + + t = strjoina("unix:path=", path); + assert_se(sd_bus_set_address(bus, t) >= 0); + assert_se(sd_bus_set_watch_bind(bus, true) >= 0); + assert_se(sd_bus_attach_event(bus, event, 0) >= 0); + assert_se(sd_bus_start(bus) >= 0); + + assert_se(sd_bus_call_method_async(bus, NULL, "foo.bar", "/foo", "foo.TestInterface", "Foobar", client2_callback, NULL, NULL) >= 0); + + assert_se(sd_event_loop(event) >= 0); + + log_debug("Client2 done"); + + return NULL; +} + +static void request_exit(const char *path) { + _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL; + const char *t; + + assert_se(sd_bus_new(&bus) >= 0); + + t = strjoina("unix:path=", path); + assert_se(sd_bus_set_address(bus, t) >= 0); + assert_se(sd_bus_set_watch_bind(bus, true) >= 0); + assert_se(sd_bus_set_description(bus, "request-exit") >= 0); + assert_se(sd_bus_start(bus) >= 0); + + assert_se(sd_bus_call_method(bus, "foo.bar", "/foo", "foo.TestInterface", "Exit", NULL, NULL, NULL) >= 0); +} + +int main(int argc, char *argv[]) { + _cleanup_(rm_rf_physical_and_freep) char *d = NULL; + pthread_t server, client1, client2; + char *path; + + test_setup_logging(LOG_DEBUG); + + /* We use /dev/shm here rather than /tmp, since some weird distros might set up /tmp as some weird fs that + * doesn't support inotify properly. */ + assert_se(mkdtemp_malloc("/dev/shm/systemd-watch-bind-XXXXXX", &d) >= 0); + + path = strjoina(d, "/this/is/a/socket"); + + assert_se(pthread_create(&server, NULL, thread_server, path) == 0); + assert_se(pthread_create(&client1, NULL, thread_client1, path) == 0); + assert_se(pthread_create(&client2, NULL, thread_client2, path) == 0); + + assert_se(pthread_join(client1, NULL) == 0); + assert_se(pthread_join(client2, NULL) == 0); + + request_exit(path); + + assert_se(pthread_join(server, NULL) == 0); + + return 0; +} diff --git a/src/libsystemd/sd-bus/test-vtable-data.h b/src/libsystemd/sd-bus/test-vtable-data.h new file mode 100644 index 0000000..7269a49 --- /dev/null +++ b/src/libsystemd/sd-bus/test-vtable-data.h @@ -0,0 +1,132 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +/* This is meant to be included in other files, hence no headers */ + +struct context { + bool quit; + char *something; + char *automatic_string_property; + uint32_t automatic_integer_property; +}; + +static int handler(sd_bus_message *m, void *userdata, sd_bus_error *error) { + return 1; +} + +static int value_handler(sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *reply, void *userdata, sd_bus_error *error) { + return 1; +} + +static int get_handler(sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *reply, void *userdata, sd_bus_error *error) { + return 1; +} + +static int set_handler(sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *value, void *userdata, sd_bus_error *error) { + return 1; +} + +static const sd_bus_vtable test_vtable_1[] = { + SD_BUS_VTABLE_START(0), + SD_BUS_METHOD("Hello", "ssas", "a(uu)", NULL, 0), + SD_BUS_METHOD("DeprecatedHello", "", "", NULL, SD_BUS_VTABLE_DEPRECATED), + SD_BUS_METHOD("DeprecatedHelloNoReply", "", "", NULL, SD_BUS_VTABLE_DEPRECATED|SD_BUS_VTABLE_METHOD_NO_REPLY), + SD_BUS_SIGNAL("Wowza", "sss", 0), + SD_BUS_SIGNAL("DeprecatedWowza", "ut", SD_BUS_VTABLE_DEPRECATED), + SD_BUS_WRITABLE_PROPERTY("AProperty", "s", get_handler, set_handler, 0, 0), + SD_BUS_PROPERTY("AReadOnlyDeprecatedProperty", "(ut)", get_handler, 0, SD_BUS_VTABLE_DEPRECATED), + SD_BUS_PROPERTY("ChangingProperty", "t", get_handler, 0, SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE), + SD_BUS_PROPERTY("Invalidating", "t", get_handler, 0, SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION), + SD_BUS_PROPERTY("Constant", "t", get_handler, 0, SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_PROPERTY_EXPLICIT), + SD_BUS_VTABLE_END +}; + +static const sd_bus_vtable test_vtable_2[] = { + SD_BUS_VTABLE_START(0), + SD_BUS_METHOD("AlterSomething", "s", "s", handler, SD_BUS_VTABLE_UNPRIVILEGED), + SD_BUS_METHOD("Exit", "", "", handler, 0), + SD_BUS_METHOD_WITH_OFFSET("AlterSomething2", "s", "s", handler, 200, 0), + SD_BUS_METHOD_WITH_OFFSET("Exit2", "", "", handler, 200, 0), + SD_BUS_METHOD_WITH_NAMES_OFFSET("AlterSomething3", "so", SD_BUS_PARAM(string) SD_BUS_PARAM(path), + "s", SD_BUS_PARAM(returnstring), handler, 200, 0), + SD_BUS_METHOD_WITH_NAMES("Exit3", "bx", SD_BUS_PARAM(with_confirmation) SD_BUS_PARAM(after_msec), + "bb", SD_BUS_PARAM(accepted) SD_BUS_PARAM(scheduled), handler, 0), + SD_BUS_PROPERTY("Value", "s", value_handler, 10, SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE), + SD_BUS_PROPERTY("Value2", "s", value_handler, 10, SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION), + SD_BUS_PROPERTY("Value3", "s", value_handler, 10, SD_BUS_VTABLE_PROPERTY_CONST), + SD_BUS_PROPERTY("Value4", "s", value_handler, 10, 0), + SD_BUS_PROPERTY("AnExplicitProperty", "s", NULL, offsetof(struct context, something), + SD_BUS_VTABLE_PROPERTY_EXPLICIT|SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION), + SD_BUS_WRITABLE_PROPERTY("Something", "s", get_handler, set_handler, 0, 0), + SD_BUS_WRITABLE_PROPERTY("AutomaticStringProperty", "s", NULL, NULL, + offsetof(struct context, automatic_string_property), 0), + SD_BUS_WRITABLE_PROPERTY("AutomaticIntegerProperty", "u", NULL, NULL, + offsetof(struct context, automatic_integer_property), 0), + SD_BUS_METHOD("NoOperation", NULL, NULL, NULL, 0), + SD_BUS_SIGNAL("DummySignal", "b", 0), + SD_BUS_SIGNAL("DummySignal2", "so", 0), + SD_BUS_SIGNAL_WITH_NAMES("DummySignal3", "so", SD_BUS_PARAM(string) SD_BUS_PARAM(path), 0), + SD_BUS_VTABLE_END +}; + +static const sd_bus_vtable test_vtable_deprecated[] = { + SD_BUS_VTABLE_START(SD_BUS_VTABLE_DEPRECATED), + SD_BUS_VTABLE_END +}; + +struct sd_bus_vtable_221 { + uint8_t type:8; + uint64_t flags:56; + union { + struct { + size_t element_size; + } start; + struct { + const char *member; + const char *signature; + const char *result; + sd_bus_message_handler_t handler; + size_t offset; + } method; + struct { + const char *member; + const char *signature; + } signal; + struct { + const char *member; + const char *signature; + sd_bus_property_get_t get; + sd_bus_property_set_t set; + size_t offset; + } property; + } x; +}; + +static const struct sd_bus_vtable_221 vtable_format_221[] = { + { + .type = _SD_BUS_VTABLE_START, + .flags = 0, + .x = { + .start = { + .element_size = sizeof(struct sd_bus_vtable_221) + }, + }, + }, + { + .type = _SD_BUS_VTABLE_METHOD, + .flags = 0, + .x = { + .method = { + .member = "Exit", + .signature = "", + .result = "", + .handler = handler, + .offset = 0, + }, + }, + }, + { + .type = _SD_BUS_VTABLE_END, + .flags = 0, + .x = { { 0 } }, + } +}; diff --git a/src/libsystemd/sd-daemon/sd-daemon.c b/src/libsystemd/sd-daemon/sd-daemon.c new file mode 100644 index 0000000..6a60cde --- /dev/null +++ b/src/libsystemd/sd-daemon/sd-daemon.c @@ -0,0 +1,775 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <errno.h> +#include <limits.h> +#include <mqueue.h> +#include <netinet/in.h> +#include <poll.h> +#include <stdarg.h> +#include <stddef.h> +#include <stdio.h> +#include <stdlib.h> +#include <sys/stat.h> +#include <sys/un.h> +#include <unistd.h> + +#include "sd-daemon.h" + +#include "alloc-util.h" +#include "fd-util.h" +#include "fs-util.h" +#include "io-util.h" +#include "iovec-util.h" +#include "parse-util.h" +#include "path-util.h" +#include "process-util.h" +#include "socket-util.h" +#include "stat-util.h" +#include "strv.h" +#include "time-util.h" + +#define SNDBUF_SIZE (8*1024*1024) + +static void unsetenv_all(bool unset_environment) { + if (!unset_environment) + return; + + assert_se(unsetenv("LISTEN_PID") == 0); + assert_se(unsetenv("LISTEN_FDS") == 0); + assert_se(unsetenv("LISTEN_FDNAMES") == 0); +} + +_public_ int sd_listen_fds(int unset_environment) { + const char *e; + int n, r; + pid_t pid; + + e = getenv("LISTEN_PID"); + if (!e) { + r = 0; + goto finish; + } + + r = parse_pid(e, &pid); + if (r < 0) + goto finish; + + /* Is this for us? */ + if (getpid_cached() != pid) { + r = 0; + goto finish; + } + + e = getenv("LISTEN_FDS"); + if (!e) { + r = 0; + goto finish; + } + + r = safe_atoi(e, &n); + if (r < 0) + goto finish; + + assert_cc(SD_LISTEN_FDS_START < INT_MAX); + if (n <= 0 || n > INT_MAX - SD_LISTEN_FDS_START) { + r = -EINVAL; + goto finish; + } + + for (int fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd ++) { + r = fd_cloexec(fd, true); + if (r < 0) + goto finish; + } + + r = n; + +finish: + unsetenv_all(unset_environment); + return r; +} + +_public_ int sd_listen_fds_with_names(int unset_environment, char ***names) { + _cleanup_strv_free_ char **l = NULL; + bool have_names; + int n_names = 0, n_fds; + const char *e; + int r; + + if (!names) + return sd_listen_fds(unset_environment); + + e = getenv("LISTEN_FDNAMES"); + if (e) { + n_names = strv_split_full(&l, e, ":", EXTRACT_DONT_COALESCE_SEPARATORS); + if (n_names < 0) { + unsetenv_all(unset_environment); + return n_names; + } + + have_names = true; + } else + have_names = false; + + n_fds = sd_listen_fds(unset_environment); + if (n_fds <= 0) + return n_fds; + + if (have_names) { + if (n_names != n_fds) + return -EINVAL; + } else { + r = strv_extend_n(&l, "unknown", n_fds); + if (r < 0) + return r; + } + + *names = TAKE_PTR(l); + + return n_fds; +} + +_public_ int sd_is_fifo(int fd, const char *path) { + struct stat st_fd; + + assert_return(fd >= 0, -EBADF); + + if (fstat(fd, &st_fd) < 0) + return -errno; + + if (!S_ISFIFO(st_fd.st_mode)) + return 0; + + if (path) { + struct stat st_path; + + if (stat(path, &st_path) < 0) { + + if (IN_SET(errno, ENOENT, ENOTDIR)) + return 0; + + return -errno; + } + + return stat_inode_same(&st_path, &st_fd); + } + + return 1; +} + +_public_ int sd_is_special(int fd, const char *path) { + struct stat st_fd; + + assert_return(fd >= 0, -EBADF); + + if (fstat(fd, &st_fd) < 0) + return -errno; + + if (!S_ISREG(st_fd.st_mode) && !S_ISCHR(st_fd.st_mode)) + return 0; + + if (path) { + struct stat st_path; + + if (stat(path, &st_path) < 0) { + + if (IN_SET(errno, ENOENT, ENOTDIR)) + return 0; + + return -errno; + } + + if (S_ISREG(st_fd.st_mode) && S_ISREG(st_path.st_mode)) + return stat_inode_same(&st_path, &st_fd); + else if (S_ISCHR(st_fd.st_mode) && S_ISCHR(st_path.st_mode)) + return st_path.st_rdev == st_fd.st_rdev; + else + return 0; + } + + return 1; +} + +static int is_socket_internal(int fd, int type, int listening) { + struct stat st_fd; + + assert_return(fd >= 0, -EBADF); + assert_return(type >= 0, -EINVAL); + + if (fstat(fd, &st_fd) < 0) + return -errno; + + if (!S_ISSOCK(st_fd.st_mode)) + return 0; + + if (type != 0) { + int other_type = 0; + socklen_t l = sizeof(other_type); + + if (getsockopt(fd, SOL_SOCKET, SO_TYPE, &other_type, &l) < 0) + return -errno; + + if (l != sizeof(other_type)) + return -EINVAL; + + if (other_type != type) + return 0; + } + + if (listening >= 0) { + int accepting = 0; + socklen_t l = sizeof(accepting); + + if (getsockopt(fd, SOL_SOCKET, SO_ACCEPTCONN, &accepting, &l) < 0) + return -errno; + + if (l != sizeof(accepting)) + return -EINVAL; + + if (!accepting != !listening) + return 0; + } + + return 1; +} + +_public_ int sd_is_socket(int fd, int family, int type, int listening) { + int r; + + assert_return(fd >= 0, -EBADF); + assert_return(family >= 0, -EINVAL); + + r = is_socket_internal(fd, type, listening); + if (r <= 0) + return r; + + if (family > 0) { + union sockaddr_union sockaddr = {}; + socklen_t l = sizeof(sockaddr); + + if (getsockname(fd, &sockaddr.sa, &l) < 0) + return -errno; + + if (l < sizeof(sa_family_t)) + return -EINVAL; + + return sockaddr.sa.sa_family == family; + } + + return 1; +} + +_public_ int sd_is_socket_inet(int fd, int family, int type, int listening, uint16_t port) { + union sockaddr_union sockaddr = {}; + socklen_t l = sizeof(sockaddr); + int r; + + assert_return(fd >= 0, -EBADF); + assert_return(IN_SET(family, 0, AF_INET, AF_INET6), -EINVAL); + + r = is_socket_internal(fd, type, listening); + if (r <= 0) + return r; + + if (getsockname(fd, &sockaddr.sa, &l) < 0) + return -errno; + + if (l < sizeof(sa_family_t)) + return -EINVAL; + + if (!IN_SET(sockaddr.sa.sa_family, AF_INET, AF_INET6)) + return 0; + + if (family != 0) + if (sockaddr.sa.sa_family != family) + return 0; + + if (port > 0) { + unsigned sa_port; + + r = sockaddr_port(&sockaddr.sa, &sa_port); + if (r < 0) + return r; + + return port == sa_port; + } + + return 1; +} + +_public_ int sd_is_socket_sockaddr(int fd, int type, const struct sockaddr* addr, unsigned addr_len, int listening) { + union sockaddr_union sockaddr = {}; + socklen_t l = sizeof(sockaddr); + int r; + + assert_return(fd >= 0, -EBADF); + assert_return(addr, -EINVAL); + assert_return(addr_len >= sizeof(sa_family_t), -ENOBUFS); + assert_return(IN_SET(addr->sa_family, AF_INET, AF_INET6), -EPFNOSUPPORT); + + r = is_socket_internal(fd, type, listening); + if (r <= 0) + return r; + + if (getsockname(fd, &sockaddr.sa, &l) < 0) + return -errno; + + if (l < sizeof(sa_family_t)) + return -EINVAL; + + if (sockaddr.sa.sa_family != addr->sa_family) + return 0; + + if (sockaddr.sa.sa_family == AF_INET) { + const struct sockaddr_in *in = (const struct sockaddr_in *) addr; + + if (l < sizeof(struct sockaddr_in) || addr_len < sizeof(struct sockaddr_in)) + return -EINVAL; + + if (in->sin_port != 0 && + sockaddr.in.sin_port != in->sin_port) + return false; + + return sockaddr.in.sin_addr.s_addr == in->sin_addr.s_addr; + + } else { + const struct sockaddr_in6 *in = (const struct sockaddr_in6 *) addr; + + if (l < sizeof(struct sockaddr_in6) || addr_len < sizeof(struct sockaddr_in6)) + return -EINVAL; + + if (in->sin6_port != 0 && + sockaddr.in6.sin6_port != in->sin6_port) + return false; + + if (in->sin6_flowinfo != 0 && + sockaddr.in6.sin6_flowinfo != in->sin6_flowinfo) + return false; + + if (in->sin6_scope_id != 0 && + sockaddr.in6.sin6_scope_id != in->sin6_scope_id) + return false; + + return memcmp(sockaddr.in6.sin6_addr.s6_addr, in->sin6_addr.s6_addr, + sizeof(in->sin6_addr.s6_addr)) == 0; + } +} + +_public_ int sd_is_socket_unix(int fd, int type, int listening, const char *path, size_t length) { + union sockaddr_union sockaddr = {}; + socklen_t l = sizeof(sockaddr); + int r; + + assert_return(fd >= 0, -EBADF); + + r = is_socket_internal(fd, type, listening); + if (r <= 0) + return r; + + if (getsockname(fd, &sockaddr.sa, &l) < 0) + return -errno; + + if (l < sizeof(sa_family_t)) + return -EINVAL; + + if (sockaddr.sa.sa_family != AF_UNIX) + return 0; + + if (path) { + if (length == 0) + length = strlen(path); + + if (length == 0) + /* Unnamed socket */ + return l == offsetof(struct sockaddr_un, sun_path); + + if (path[0]) + /* Normal path socket */ + return + (l >= offsetof(struct sockaddr_un, sun_path) + length + 1) && + memcmp(path, sockaddr.un.sun_path, length+1) == 0; + else + /* Abstract namespace socket */ + return + (l == offsetof(struct sockaddr_un, sun_path) + length) && + memcmp(path, sockaddr.un.sun_path, length) == 0; + } + + return 1; +} + +_public_ int sd_is_mq(int fd, const char *path) { + struct mq_attr attr; + + /* Check that the fd is valid */ + assert_return(fcntl(fd, F_GETFD) >= 0, -errno); + + if (mq_getattr(fd, &attr) < 0) { + if (errno == EBADF) + /* A non-mq fd (or an invalid one, but we ruled that out above) */ + return 0; + return -errno; + } + + if (path) { + _cleanup_free_ char *fpath = NULL; + struct stat a, b; + + assert_return(path_is_absolute(path), -EINVAL); + + if (fstat(fd, &a) < 0) + return -errno; + + fpath = path_join("/dev/mqueue", path); + if (!fpath) + return -ENOMEM; + + if (stat(fpath, &b) < 0) + return -errno; + + if (!stat_inode_same(&a, &b)) + return 0; + } + + return 1; +} + +static int vsock_bind_privileged_port(int fd) { + union sockaddr_union sa = { + .vm.svm_family = AF_VSOCK, + .vm.svm_cid = VMADDR_CID_ANY, + .vm.svm_port = 1023, + }; + int r; + + assert(fd >= 0); + + do + r = RET_NERRNO(bind(fd, &sa.sa, sizeof(sa.vm))); + while (r == -EADDRINUSE && --sa.vm.svm_port > 0); + + return r; +} + +static int pid_notify_with_fds_internal( + pid_t pid, + const char *state, + const int *fds, + unsigned n_fds) { + SocketAddress address; + struct iovec iovec; + struct msghdr msghdr = { + .msg_iov = &iovec, + .msg_iovlen = 1, + .msg_name = &address.sockaddr, + }; + _cleanup_close_ int fd = -EBADF; + struct cmsghdr *cmsg = NULL; + const char *e; + bool send_ucred; + ssize_t n; + int type, r; + + if (!state) + return -EINVAL; + + if (n_fds > 0 && !fds) + return -EINVAL; + + e = getenv("NOTIFY_SOCKET"); + if (!e) + return 0; + + /* Allow AF_UNIX and AF_VSOCK, reject the rest. */ + r = socket_address_parse_unix(&address, e); + if (r == -EPROTO) + r = socket_address_parse_vsock(&address, e); + if (r < 0) + return r; + msghdr.msg_namelen = address.size; + + /* If we didn't get an address (which is a normal pattern when specifying VSOCK tuples) error out, + * we always require a specific CID. */ + if (address.sockaddr.vm.svm_family == AF_VSOCK && address.sockaddr.vm.svm_cid == VMADDR_CID_ANY) + return -EINVAL; + + type = address.type == 0 ? SOCK_DGRAM : address.type; + + /* At the time of writing QEMU does not yet support AF_VSOCK + SOCK_DGRAM and returns + * ENODEV. Fallback to SOCK_SEQPACKET in that case. */ + fd = socket(address.sockaddr.sa.sa_family, type|SOCK_CLOEXEC, 0); + if (fd < 0) { + if (!(ERRNO_IS_NOT_SUPPORTED(errno) || errno == ENODEV) || address.sockaddr.sa.sa_family != AF_VSOCK || address.type > 0) + return log_debug_errno(errno, "Failed to open %s notify socket to '%s': %m", socket_address_type_to_string(type), e); + + type = SOCK_SEQPACKET; + fd = socket(address.sockaddr.sa.sa_family, type|SOCK_CLOEXEC, 0); + if (fd < 0 && ERRNO_IS_NOT_SUPPORTED(errno)) { + type = SOCK_STREAM; + fd = socket(address.sockaddr.sa.sa_family, type|SOCK_CLOEXEC, 0); + } + if (fd < 0) + return log_debug_errno(errno, "Failed to open %s socket to '%s': %m", socket_address_type_to_string(type), e); + } + + if (address.sockaddr.sa.sa_family == AF_VSOCK) { + r = vsock_bind_privileged_port(fd); + if (r < 0 && !ERRNO_IS_PRIVILEGE(r)) + return log_debug_errno(r, "Failed to bind socket to privileged port: %m"); + } + + if (IN_SET(type, SOCK_STREAM, SOCK_SEQPACKET)) { + if (connect(fd, &address.sockaddr.sa, address.size) < 0) + return log_debug_errno(errno, "Failed to connect socket to '%s': %m", e); + + msghdr.msg_name = NULL; + msghdr.msg_namelen = 0; + } + + (void) fd_inc_sndbuf(fd, SNDBUF_SIZE); + + iovec = IOVEC_MAKE_STRING(state); + + send_ucred = + (pid != 0 && pid != getpid_cached()) || + getuid() != geteuid() || + getgid() != getegid(); + + if (n_fds > 0 || send_ucred) { + /* CMSG_SPACE(0) may return value different than zero, which results in miscalculated controllen. */ + msghdr.msg_controllen = + (n_fds > 0 ? CMSG_SPACE(sizeof(int) * n_fds) : 0) + + (send_ucred ? CMSG_SPACE(sizeof(struct ucred)) : 0); + + msghdr.msg_control = alloca0(msghdr.msg_controllen); + + cmsg = CMSG_FIRSTHDR(&msghdr); + if (n_fds > 0) { + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_type = SCM_RIGHTS; + cmsg->cmsg_len = CMSG_LEN(sizeof(int) * n_fds); + + memcpy(CMSG_DATA(cmsg), fds, sizeof(int) * n_fds); + + if (send_ucred) + assert_se(cmsg = CMSG_NXTHDR(&msghdr, cmsg)); + } + + if (send_ucred) { + struct ucred *ucred; + + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_type = SCM_CREDENTIALS; + cmsg->cmsg_len = CMSG_LEN(sizeof(struct ucred)); + + ucred = CMSG_TYPED_DATA(cmsg, struct ucred); + ucred->pid = pid != 0 ? pid : getpid_cached(); + ucred->uid = getuid(); + ucred->gid = getgid(); + } + } + + do { + /* First try with fake ucred data, as requested */ + n = sendmsg(fd, &msghdr, MSG_NOSIGNAL); + if (n < 0) { + if (!send_ucred) + return log_debug_errno(errno, "Failed to send notify message to '%s': %m", e); + + /* If that failed, try with our own ucred instead */ + msghdr.msg_controllen -= CMSG_SPACE(sizeof(struct ucred)); + if (msghdr.msg_controllen == 0) + msghdr.msg_control = NULL; + + n = 0; + send_ucred = false; + } else { + /* Unless we're using SOCK_STREAM, we expect to write all the contents immediately. */ + if (type != SOCK_STREAM && (size_t) n < iovec_total_size(msghdr.msg_iov, msghdr.msg_iovlen)) + return -EIO; + + /* Make sure we only send fds and ucred once, even if we're using SOCK_STREAM. */ + msghdr.msg_control = NULL; + msghdr.msg_controllen = 0; + } + } while (!iovec_increment(msghdr.msg_iov, msghdr.msg_iovlen, n)); + + return 1; +} + +_public_ int sd_pid_notify_with_fds( + pid_t pid, + int unset_environment, + const char *state, + const int *fds, + unsigned n_fds) { + + int r; + + r = pid_notify_with_fds_internal(pid, state, fds, n_fds); + + if (unset_environment) + assert_se(unsetenv("NOTIFY_SOCKET") == 0); + + return r; +} + +_public_ int sd_pid_notify_barrier(pid_t pid, int unset_environment, uint64_t timeout) { + _cleanup_close_pair_ int pipe_fd[2] = EBADF_PAIR; + int r; + + if (pipe2(pipe_fd, O_CLOEXEC) < 0) + return -errno; + + r = sd_pid_notify_with_fds(pid, unset_environment, "BARRIER=1", &pipe_fd[1], 1); + if (r <= 0) + return r; + + pipe_fd[1] = safe_close(pipe_fd[1]); + + r = fd_wait_for_event(pipe_fd[0], 0 /* POLLHUP is implicit */, timeout); + if (r < 0) + return r; + if (r == 0) + return -ETIMEDOUT; + + return 1; +} + +_public_ int sd_notify_barrier(int unset_environment, uint64_t timeout) { + return sd_pid_notify_barrier(0, unset_environment, timeout); +} + +_public_ int sd_pid_notify(pid_t pid, int unset_environment, const char *state) { + return sd_pid_notify_with_fds(pid, unset_environment, state, NULL, 0); +} + +_public_ int sd_notify(int unset_environment, const char *state) { + return sd_pid_notify_with_fds(0, unset_environment, state, NULL, 0); +} + +_public_ int sd_pid_notifyf(pid_t pid, int unset_environment, const char *format, ...) { + _cleanup_free_ char *p = NULL; + int r; + + if (format) { + va_list ap; + + va_start(ap, format); + r = vasprintf(&p, format, ap); + va_end(ap); + + if (r < 0 || !p) + return -ENOMEM; + } + + return sd_pid_notify(pid, unset_environment, p); +} + +_public_ int sd_notifyf(int unset_environment, const char *format, ...) { + _cleanup_free_ char *p = NULL; + int r; + + if (format) { + va_list ap; + + va_start(ap, format); + r = vasprintf(&p, format, ap); + va_end(ap); + + if (r < 0 || !p) + return -ENOMEM; + } + + return sd_pid_notify(0, unset_environment, p); +} + +_public_ int sd_pid_notifyf_with_fds( + pid_t pid, + int unset_environment, + const int *fds, size_t n_fds, + const char *format, ...) { + + _cleanup_free_ char *p = NULL; + int r; + + /* Paranoia check: we traditionally used 'unsigned' as array size, but we nowadays more correctly use + * 'size_t'. sd_pid_notifyf_with_fds() and sd_pid_notify_with_fds() are from different eras, hence + * differ in this. Let's catch resulting incompatibilites early, even though they are pretty much + * theoretic only */ + if (n_fds > UINT_MAX) + return -E2BIG; + + if (format) { + va_list ap; + + va_start(ap, format); + r = vasprintf(&p, format, ap); + va_end(ap); + + if (r < 0 || !p) + return -ENOMEM; + } + + return sd_pid_notify_with_fds(pid, unset_environment, p, fds, n_fds); +} + +_public_ int sd_booted(void) { + /* We test whether the runtime unit file directory has been + * created. This takes place in mount-setup.c, so is + * guaranteed to happen very early during boot. */ + + if (laccess("/run/systemd/system/", F_OK) >= 0) + return true; + + if (errno == ENOENT) + return false; + + return -errno; +} + +_public_ int sd_watchdog_enabled(int unset_environment, uint64_t *usec) { + const char *s, *p = ""; /* p is set to dummy value to do unsetting */ + uint64_t u; + int r = 0; + + s = getenv("WATCHDOG_USEC"); + if (!s) + goto finish; + + r = safe_atou64(s, &u); + if (r < 0) + goto finish; + if (!timestamp_is_set(u)) { + r = -EINVAL; + goto finish; + } + + p = getenv("WATCHDOG_PID"); + if (p) { + pid_t pid; + + r = parse_pid(p, &pid); + if (r < 0) + goto finish; + + /* Is this for us? */ + if (getpid_cached() != pid) { + r = 0; + goto finish; + } + } + + if (usec) + *usec = u; + + r = 1; + +finish: + if (unset_environment && s) + assert_se(unsetenv("WATCHDOG_USEC") == 0); + if (unset_environment && p) + assert_se(unsetenv("WATCHDOG_PID") == 0); + + return r; +} diff --git a/src/libsystemd/sd-device/device-enumerator-private.h b/src/libsystemd/sd-device/device-enumerator-private.h new file mode 100644 index 0000000..cf62fab --- /dev/null +++ b/src/libsystemd/sd-device/device-enumerator-private.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include <stdbool.h> + +#include "sd-device.h" + +typedef enum MatchInitializedType { + MATCH_INITIALIZED_NO, /* only devices without a db entry */ + MATCH_INITIALIZED_YES, /* only devices with a db entry */ + MATCH_INITIALIZED_ALL, /* all devices */ + MATCH_INITIALIZED_COMPAT, /* only devices that have no devnode/ifindex or have a db entry */ + _MATCH_INITIALIZED_MAX, + _MATCH_INITIALIZED_INVALID = -EINVAL, +} MatchInitializedType; + +int device_enumerator_scan_devices(sd_device_enumerator *enumerator); +int device_enumerator_scan_subsystems(sd_device_enumerator *enumerator); +int device_enumerator_scan_devices_and_subsystems(sd_device_enumerator *enumerator); +int device_enumerator_add_device(sd_device_enumerator *enumerator, sd_device *device); +int device_enumerator_add_parent_devices(sd_device_enumerator *enumerator, sd_device *device); +int device_enumerator_add_match_is_initialized(sd_device_enumerator *enumerator, MatchInitializedType type); +int device_enumerator_add_match_parent_incremental(sd_device_enumerator *enumerator, sd_device *parent); +int device_enumerator_add_prioritized_subsystem(sd_device_enumerator *enumerator, const char *subsystem); +sd_device *device_enumerator_get_first(sd_device_enumerator *enumerator); +sd_device *device_enumerator_get_next(sd_device_enumerator *enumerator); +sd_device **device_enumerator_get_devices(sd_device_enumerator *enumerator, size_t *ret_n_devices); + +#define FOREACH_DEVICE_AND_SUBSYSTEM(enumerator, device) \ + for (device = device_enumerator_get_first(enumerator); \ + device; \ + device = device_enumerator_get_next(enumerator)) diff --git a/src/libsystemd/sd-device/device-enumerator.c b/src/libsystemd/sd-device/device-enumerator.c new file mode 100644 index 0000000..15c5c42 --- /dev/null +++ b/src/libsystemd/sd-device/device-enumerator.c @@ -0,0 +1,1194 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <fcntl.h> +#include <unistd.h> + +#include "sd-device.h" + +#include "alloc-util.h" +#include "device-enumerator-private.h" +#include "device-filter.h" +#include "device-util.h" +#include "dirent-util.h" +#include "fd-util.h" +#include "set.h" +#include "sort-util.h" +#include "string-util.h" +#include "strv.h" + +typedef enum DeviceEnumerationType { + DEVICE_ENUMERATION_TYPE_DEVICES, + DEVICE_ENUMERATION_TYPE_SUBSYSTEMS, + DEVICE_ENUMERATION_TYPE_ALL, + _DEVICE_ENUMERATION_TYPE_MAX, + _DEVICE_ENUMERATION_TYPE_INVALID = -EINVAL, +} DeviceEnumerationType; + +struct sd_device_enumerator { + unsigned n_ref; + + DeviceEnumerationType type; + Hashmap *devices_by_syspath; + sd_device **devices; + size_t n_devices, current_device_index; + bool scan_uptodate; + bool sorted; + + char **prioritized_subsystems; + Set *match_subsystem; + Set *nomatch_subsystem; + Hashmap *match_sysattr; + Hashmap *nomatch_sysattr; + Hashmap *match_property; + Hashmap *match_property_required; + Set *match_sysname; + Set *nomatch_sysname; + Set *match_tag; + Set *match_parent; + MatchInitializedType match_initialized; +}; + +_public_ int sd_device_enumerator_new(sd_device_enumerator **ret) { + _cleanup_(sd_device_enumerator_unrefp) sd_device_enumerator *enumerator = NULL; + + assert(ret); + + enumerator = new(sd_device_enumerator, 1); + if (!enumerator) + return -ENOMEM; + + *enumerator = (sd_device_enumerator) { + .n_ref = 1, + .type = _DEVICE_ENUMERATION_TYPE_INVALID, + .match_initialized = MATCH_INITIALIZED_COMPAT, + }; + + *ret = TAKE_PTR(enumerator); + + return 0; +} + +static void device_unref_many(sd_device **devices, size_t n) { + assert(devices || n == 0); + + for (size_t i = 0; i < n; i++) + sd_device_unref(devices[i]); +} + +static void device_enumerator_unref_devices(sd_device_enumerator *enumerator) { + assert(enumerator); + + hashmap_clear_with_destructor(enumerator->devices_by_syspath, sd_device_unref); + device_unref_many(enumerator->devices, enumerator->n_devices); + enumerator->devices = mfree(enumerator->devices); + enumerator->n_devices = 0; +} + +static sd_device_enumerator *device_enumerator_free(sd_device_enumerator *enumerator) { + assert(enumerator); + + device_enumerator_unref_devices(enumerator); + + hashmap_free(enumerator->devices_by_syspath); + strv_free(enumerator->prioritized_subsystems); + set_free(enumerator->match_subsystem); + set_free(enumerator->nomatch_subsystem); + hashmap_free(enumerator->match_sysattr); + hashmap_free(enumerator->nomatch_sysattr); + hashmap_free(enumerator->match_property); + hashmap_free(enumerator->match_property_required); + set_free(enumerator->match_sysname); + set_free(enumerator->nomatch_sysname); + set_free(enumerator->match_tag); + set_free(enumerator->match_parent); + + return mfree(enumerator); +} + +DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(sd_device_enumerator, sd_device_enumerator, device_enumerator_free); + +int device_enumerator_add_prioritized_subsystem(sd_device_enumerator *enumerator, const char *subsystem) { + int r; + + assert(enumerator); + assert(subsystem); + + if (strv_contains(enumerator->prioritized_subsystems, subsystem)) + return 0; + + r = strv_extend(&enumerator->prioritized_subsystems, subsystem); + if (r < 0) + return r; + + enumerator->scan_uptodate = false; + + return 1; +} + +_public_ int sd_device_enumerator_add_match_subsystem(sd_device_enumerator *enumerator, const char *subsystem, int match) { + Set **set; + int r; + + assert_return(enumerator, -EINVAL); + assert_return(subsystem, -EINVAL); + + if (match) + set = &enumerator->match_subsystem; + else + set = &enumerator->nomatch_subsystem; + + r = set_put_strdup(set, subsystem); + if (r <= 0) + return r; + + enumerator->scan_uptodate = false; + + return 1; +} + +_public_ int sd_device_enumerator_add_match_sysattr(sd_device_enumerator *enumerator, const char *sysattr, const char *value, int match) { + Hashmap **hashmap; + int r; + + assert_return(enumerator, -EINVAL); + assert_return(sysattr, -EINVAL); + + if (match) + hashmap = &enumerator->match_sysattr; + else + hashmap = &enumerator->nomatch_sysattr; + + r = update_match_strv(hashmap, sysattr, value, /* clear_on_null = */ true); + if (r <= 0) + return r; + + enumerator->scan_uptodate = false; + + return 1; +} + +_public_ int sd_device_enumerator_add_match_property(sd_device_enumerator *enumerator, const char *property, const char *value) { + int r; + + assert_return(enumerator, -EINVAL); + assert_return(property, -EINVAL); + + r = update_match_strv(&enumerator->match_property, property, value, /* clear_on_null = */ false); + if (r <= 0) + return r; + + enumerator->scan_uptodate = false; + + return 1; +} + +_public_ int sd_device_enumerator_add_match_property_required(sd_device_enumerator *enumerator, const char *property, const char *value) { + int r; + + assert_return(enumerator, -EINVAL); + assert_return(property, -EINVAL); + + r = update_match_strv(&enumerator->match_property_required, property, value, /* clear_on_null = */ false); + if (r <= 0) + return r; + + enumerator->scan_uptodate = false; + + return 1; +} + +static int device_enumerator_add_match_sysname(sd_device_enumerator *enumerator, const char *sysname, bool match) { + int r; + + assert_return(enumerator, -EINVAL); + assert_return(sysname, -EINVAL); + + r = set_put_strdup(match ? &enumerator->match_sysname : &enumerator->nomatch_sysname, sysname); + if (r <= 0) + return r; + + enumerator->scan_uptodate = false; + + return 1; +} + +_public_ int sd_device_enumerator_add_match_sysname(sd_device_enumerator *enumerator, const char *sysname) { + return device_enumerator_add_match_sysname(enumerator, sysname, true); +} + +_public_ int sd_device_enumerator_add_nomatch_sysname(sd_device_enumerator *enumerator, const char *sysname) { + return device_enumerator_add_match_sysname(enumerator, sysname, false); +} + +_public_ int sd_device_enumerator_add_match_tag(sd_device_enumerator *enumerator, const char *tag) { + int r; + + assert_return(enumerator, -EINVAL); + assert_return(tag, -EINVAL); + + r = set_put_strdup(&enumerator->match_tag, tag); + if (r <= 0) + return r; + + enumerator->scan_uptodate = false; + + return 1; +} + +int device_enumerator_add_match_parent_incremental(sd_device_enumerator *enumerator, sd_device *parent) { + const char *path; + int r; + + assert(enumerator); + assert(parent); + + r = sd_device_get_syspath(parent, &path); + if (r < 0) + return r; + + r = set_put_strdup(&enumerator->match_parent, path); + if (r <= 0) + return r; + + enumerator->scan_uptodate = false; + + return 1; +} + +_public_ int sd_device_enumerator_add_match_parent(sd_device_enumerator *enumerator, sd_device *parent) { + assert_return(enumerator, -EINVAL); + assert_return(parent, -EINVAL); + + set_clear(enumerator->match_parent); + + return device_enumerator_add_match_parent_incremental(enumerator, parent); +} + +_public_ int sd_device_enumerator_allow_uninitialized(sd_device_enumerator *enumerator) { + assert_return(enumerator, -EINVAL); + + enumerator->match_initialized = MATCH_INITIALIZED_ALL; + + enumerator->scan_uptodate = false; + + return 1; +} + +int device_enumerator_add_match_is_initialized(sd_device_enumerator *enumerator, MatchInitializedType type) { + assert_return(enumerator, -EINVAL); + assert_return(type >= 0 && type < _MATCH_INITIALIZED_MAX, -EINVAL); + + enumerator->match_initialized = type; + + enumerator->scan_uptodate = false; + + return 1; +} + +static int sound_device_compare(const char *devpath_a, const char *devpath_b) { + const char *sound_a, *sound_b; + size_t prefix_len; + + assert(devpath_a); + assert(devpath_b); + + /* For sound cards the control device must be enumerated last to make sure it's the final + * device node that gets ACLs applied. Applications rely on this fact and use ACL changes on + * the control node as an indicator that the ACL change of the entire sound card completed. The + * kernel makes this guarantee when creating those devices, and hence we should too when + * enumerating them. */ + + sound_a = strstrafter(devpath_a, "/sound/card"); + if (!sound_a) + return 0; + + sound_a = strchr(devpath_a, '/'); + if (!sound_a) + return 0; + + prefix_len = sound_a - devpath_a; + + if (!strneq(devpath_a, devpath_b, prefix_len)) + return 0; + + sound_b = devpath_b + prefix_len; + + return CMP(!!startswith(sound_a, "/controlC"), + !!startswith(sound_b, "/controlC")); +} + +static bool devpath_is_late_block(const char *devpath) { + assert(devpath); + + return strstr(devpath, "/block/md") || strstr(devpath, "/block/dm-"); +} + +static int device_compare(sd_device * const *a, sd_device * const *b) { + const char *devpath_a, *devpath_b; + int r; + + assert(a); + assert(b); + assert(*a); + assert(*b); + + assert_se(sd_device_get_devpath(*(sd_device**) a, &devpath_a) >= 0); + assert_se(sd_device_get_devpath(*(sd_device**) b, &devpath_b) >= 0); + + r = sound_device_compare(devpath_a, devpath_b); + if (r != 0) + return r; + + /* md and dm devices are enumerated after all other devices */ + r = CMP(devpath_is_late_block(devpath_a), devpath_is_late_block(devpath_b)); + if (r != 0) + return r; + + return path_compare(devpath_a, devpath_b); +} + +static int enumerator_sort_devices(sd_device_enumerator *enumerator) { + size_t n_sorted = 0, n = 0; + sd_device **devices; + sd_device *device; + int r; + + assert(enumerator); + + if (enumerator->sorted) + return 0; + + devices = new(sd_device*, hashmap_size(enumerator->devices_by_syspath)); + if (!devices) + return -ENOMEM; + + STRV_FOREACH(prioritized_subsystem, enumerator->prioritized_subsystems) { + + for (;;) { + const char *syspath; + size_t m = n; + + HASHMAP_FOREACH_KEY(device, syspath, enumerator->devices_by_syspath) { + _cleanup_free_ char *p = NULL; + const char *subsys; + + if (sd_device_get_subsystem(device, &subsys) < 0) + continue; + + if (!streq(subsys, *prioritized_subsystem)) + continue; + + devices[n++] = sd_device_ref(device); + + for (;;) { + _cleanup_free_ char *q = NULL; + + r = path_extract_directory(p ?: syspath, &q); + if (r == -EADDRNOTAVAIL) + break; + if (r < 0) + goto failed; + + device = hashmap_get(enumerator->devices_by_syspath, q); + if (device) + devices[n++] = sd_device_ref(device); + + free_and_replace(p, q); + } + + break; + } + + /* We cannot remove multiple entries in the loop HASHMAP_FOREACH_KEY() above. */ + for (size_t i = m; i < n; i++) { + r = sd_device_get_syspath(devices[i], &syspath); + if (r < 0) + goto failed; + + assert_se(hashmap_remove(enumerator->devices_by_syspath, syspath) == devices[i]); + sd_device_unref(devices[i]); + } + + if (m == n) + break; + } + + typesafe_qsort(devices + n_sorted, n - n_sorted, device_compare); + n_sorted = n; + } + + HASHMAP_FOREACH(device, enumerator->devices_by_syspath) + devices[n++] = sd_device_ref(device); + + /* Move all devices back to the hashmap. Otherwise, devices added by + * udev_enumerate_add_syspath() -> device_enumerator_add_device() may not be listed. */ + for (size_t i = 0; i < n_sorted; i++) { + const char *syspath; + + r = sd_device_get_syspath(devices[i], &syspath); + if (r < 0) + goto failed; + + r = hashmap_put(enumerator->devices_by_syspath, syspath, devices[i]); + if (r < 0) + goto failed; + assert(r > 0); + + sd_device_ref(devices[i]); + } + + typesafe_qsort(devices + n_sorted, n - n_sorted, device_compare); + + device_unref_many(enumerator->devices, enumerator->n_devices); + + enumerator->n_devices = n; + free_and_replace(enumerator->devices, devices); + + enumerator->sorted = true; + return 0; + +failed: + device_unref_many(devices, n); + free(devices); + return r; +} + +int device_enumerator_add_device(sd_device_enumerator *enumerator, sd_device *device) { + const char *syspath; + int r; + + assert_return(enumerator, -EINVAL); + assert_return(device, -EINVAL); + + r = sd_device_get_syspath(device, &syspath); + if (r < 0) + return r; + + r = hashmap_ensure_put(&enumerator->devices_by_syspath, &string_hash_ops, syspath, device); + if (IN_SET(r, -EEXIST, 0)) + return 0; + if (r < 0) + return r; + + sd_device_ref(device); + + enumerator->sorted = false; + return 1; +} + +static bool match_property(Hashmap *properties, sd_device *device, bool match_all) { + const char *property_pattern; + char * const *value_patterns; + + assert(device); + + /* Unlike device_match_sysattr(), this accepts device that has at least one matching property. */ + + if (hashmap_isempty(properties)) + return true; + + HASHMAP_FOREACH_KEY(value_patterns, property_pattern, properties) { + bool match = false; + + FOREACH_DEVICE_PROPERTY(device, property, value) { + if (fnmatch(property_pattern, property, 0) != 0) + continue; + + match = strv_fnmatch(value_patterns, value); + if (match) { + if (!match_all) + return true; + + break; + } + } + + if (!match && match_all) + return false; + } + + return match_all; +} + +static bool match_tag(sd_device_enumerator *enumerator, sd_device *device) { + const char *tag; + + assert(enumerator); + assert(device); + + SET_FOREACH(tag, enumerator->match_tag) + if (!sd_device_has_tag(device, tag)) + return false; + + return true; +} + +static bool match_sysname(sd_device_enumerator *enumerator, const char *sysname) { + assert(enumerator); + assert(sysname); + + return set_fnmatch(enumerator->match_sysname, enumerator->nomatch_sysname, sysname); +} + +static int match_initialized(sd_device_enumerator *enumerator, sd_device *device) { + int r; + + assert(enumerator); + assert(device); + + if (enumerator->match_initialized == MATCH_INITIALIZED_ALL) + return true; + + r = sd_device_get_is_initialized(device); + if (r == -ENOENT) /* this is necessarily racey, so ignore missing devices */ + return false; + if (r < 0) + return r; + + if (enumerator->match_initialized == MATCH_INITIALIZED_COMPAT) { + /* only devices that have no devnode/ifindex or have a db entry are accepted. */ + if (r > 0) + return true; + + if (sd_device_get_devnum(device, NULL) >= 0) + return false; + + if (sd_device_get_ifindex(device, NULL) >= 0) + return false; + + return true; + } + + return (enumerator->match_initialized == MATCH_INITIALIZED_NO) == (r == 0); +} + +static bool match_subsystem(sd_device_enumerator *enumerator, const char *subsystem) { + assert(enumerator); + + if (!subsystem) + return false; + + return set_fnmatch(enumerator->match_subsystem, enumerator->nomatch_subsystem, subsystem); +} + +typedef enum MatchFlag { + MATCH_SYSNAME = 1u << 0, + MATCH_SUBSYSTEM = 1u << 1, + MATCH_PARENT = 1u << 2, + MATCH_TAG = 1u << 3, + + MATCH_ALL = (1u << 4) - 1, +} MatchFlag; + +static int test_matches( + sd_device_enumerator *enumerator, + sd_device *device, + MatchFlag flags) { + + int r; + + assert(enumerator); + assert(device); + + if (FLAGS_SET(flags, MATCH_SYSNAME)) { + const char *sysname; + + r = sd_device_get_sysname(device, &sysname); + if (r < 0) + return r; + + if (!match_sysname(enumerator, sysname)) + return false; + } + + if (FLAGS_SET(flags, MATCH_SUBSYSTEM)) { + const char *subsystem; + + r = sd_device_get_subsystem(device, &subsystem); + if (r == -ENOENT) + return false; + if (r < 0) + return r; + + if (!match_subsystem(enumerator, subsystem)) + return false; + } + + if (FLAGS_SET(flags, MATCH_PARENT) && + !device_match_parent(device, enumerator->match_parent, NULL)) + return false; + + if (FLAGS_SET(flags, MATCH_TAG) && + !match_tag(enumerator, device)) + return false; + + r = match_initialized(enumerator, device); + if (r <= 0) + return r; + + if (!match_property(enumerator->match_property, device, /* match_all = */ false)) + return false; + + if (!match_property(enumerator->match_property_required, device, /* match_all = */ true)) + return false; + + if (!device_match_sysattr(device, enumerator->match_sysattr, enumerator->nomatch_sysattr)) + return false; + + return true; +} + +static int enumerator_add_parent_devices( + sd_device_enumerator *enumerator, + sd_device *device, + MatchFlag flags) { + + int r; + + assert(enumerator); + assert(device); + + for (;;) { + r = sd_device_get_parent(device, &device); + if (r == -ENOENT) /* Reached the top? */ + return 0; + if (r < 0) + return r; + + r = test_matches(enumerator, device, flags); + if (r < 0) + return r; + if (r == 0) + continue; + + r = device_enumerator_add_device(enumerator, device); + if (r < 0) + return r; + if (r == 0) /* Exists already? Then no need to go further up. */ + return 0; + } +} + +int device_enumerator_add_parent_devices(sd_device_enumerator *enumerator, sd_device *device) { + return enumerator_add_parent_devices(enumerator, device, MATCH_ALL & (~MATCH_PARENT)); +} + +static bool relevant_sysfs_subdir(const struct dirent *de) { + assert(de); + + if (de->d_name[0] == '.') + return false; + + /* Also filter out regular files and such, i.e. stuff that definitely isn't a kobject path. (Note + * that we rely on the fact that sysfs fills in d_type here, i.e. doesn't do DT_UNKNOWN) */ + return IN_SET(de->d_type, DT_DIR, DT_LNK); +} + +static int enumerator_scan_dir_and_add_devices( + sd_device_enumerator *enumerator, + const char *basedir, + const char *subdir1, + const char *subdir2) { + + _cleanup_closedir_ DIR *dir = NULL; + char *path; + int k, r = 0; + + assert(enumerator); + assert(basedir); + + path = strjoina("/sys/", basedir, "/"); + + if (subdir1) + path = strjoina(path, subdir1, "/"); + + if (subdir2) + path = strjoina(path, subdir2, "/"); + + dir = opendir(path); + if (!dir) { + bool ignore = errno == ENOENT; + + /* this is necessarily racey, so ignore missing directories */ + log_debug_errno(errno, + "sd-device-enumerator: Failed to open directory %s%s: %m", + path, ignore ? ", ignoring" : ""); + return ignore ? 0 : -errno; + } + + FOREACH_DIRENT_ALL(de, dir, return -errno) { + _cleanup_(sd_device_unrefp) sd_device *device = NULL; + char syspath[strlen(path) + 1 + strlen(de->d_name) + 1]; + + if (!relevant_sysfs_subdir(de)) + continue; + + if (!match_sysname(enumerator, de->d_name)) + continue; + + (void) sprintf(syspath, "%s%s", path, de->d_name); + + k = sd_device_new_from_syspath(&device, syspath); + if (k < 0) { + if (k != -ENODEV) + /* this is necessarily racey, so ignore missing devices */ + r = k; + + continue; + } + + k = test_matches(enumerator, device, MATCH_ALL & (~MATCH_SYSNAME)); /* sysname is already tested. */ + if (k <= 0) { + if (k < 0) + r = k; + continue; + } + + k = device_enumerator_add_device(enumerator, device); + if (k < 0) + r = k; + + /* Also include all potentially matching parent devices in the enumeration. These are things + * like root busses — e.g. /sys/devices/pci0000:00/ or /sys/devices/pnp0/, which ar not + * linked from /sys/class/ or /sys/bus/, hence pick them up explicitly here. */ + k = enumerator_add_parent_devices(enumerator, device, MATCH_ALL); + if (k < 0) + r = k; + } + + return r; +} + +static int enumerator_scan_dir( + sd_device_enumerator *enumerator, + const char *basedir, + const char *subdir, + const char *subsystem) { + + _cleanup_closedir_ DIR *dir = NULL; + char *path; + int r = 0; + + path = strjoina("/sys/", basedir); + + dir = opendir(path); + if (!dir) { + bool ignore = errno == ENOENT; + + log_debug_errno(errno, + "sd-device-enumerator: Failed to open directory %s%s: %m", + path, ignore ? ", ignoring" : ""); + return ignore ? 0 : -errno; + } + + FOREACH_DIRENT_ALL(de, dir, return -errno) { + int k; + + if (!relevant_sysfs_subdir(de)) + continue; + + if (!match_subsystem(enumerator, subsystem ?: de->d_name)) + continue; + + k = enumerator_scan_dir_and_add_devices(enumerator, basedir, de->d_name, subdir); + if (k < 0) + r = k; + } + + return r; +} + +static int enumerator_scan_devices_tag(sd_device_enumerator *enumerator, const char *tag) { + _cleanup_closedir_ DIR *dir = NULL; + char *path; + int r = 0; + + assert(enumerator); + assert(tag); + + path = strjoina("/run/udev/tags/", tag); + + dir = opendir(path); + if (!dir) { + bool ignore = errno == ENOENT; + + log_debug_errno(errno, + "sd-device-enumerator: Failed to open directory %s%s: %m", + path, ignore ? ", ignoring" : ""); + return ignore ? 0 : -errno; + } + + /* TODO: filter away subsystems? */ + + FOREACH_DIRENT_ALL(de, dir, return -errno) { + _cleanup_(sd_device_unrefp) sd_device *device = NULL; + int k; + + if (de->d_name[0] == '.') + continue; + + k = sd_device_new_from_device_id(&device, de->d_name); + if (k < 0) { + if (k != -ENODEV) + /* this is necessarily racy, so ignore missing devices */ + r = k; + + continue; + } + + /* Generated from tag, hence not necessary to check tag again. */ + k = test_matches(enumerator, device, MATCH_ALL & (~MATCH_TAG)); + if (k < 0) + r = k; + if (k <= 0) + continue; + + k = device_enumerator_add_device(enumerator, device); + if (k < 0) { + r = k; + continue; + } + } + + return r; +} + +static int enumerator_scan_devices_tags(sd_device_enumerator *enumerator) { + const char *tag; + int r = 0; + + assert(enumerator); + + SET_FOREACH(tag, enumerator->match_tag) { + int k; + + k = enumerator_scan_devices_tag(enumerator, tag); + if (k < 0) + r = k; + } + + return r; +} + +static int parent_add_child(sd_device_enumerator *enumerator, const char *path, MatchFlag flags) { + _cleanup_(sd_device_unrefp) sd_device *device = NULL; + int r; + + r = sd_device_new_from_syspath(&device, path); + if (r == -ENODEV) + /* this is necessarily racy, so ignore missing devices */ + return 0; + else if (r < 0) + return r; + + r = test_matches(enumerator, device, flags); + if (r <= 0) + return r; + + return device_enumerator_add_device(enumerator, device); +} + +static int parent_crawl_children(sd_device_enumerator *enumerator, const char *path, Set **stack) { + _cleanup_closedir_ DIR *dir = NULL; + int r = 0; + + assert(enumerator); + assert(path); + assert(stack); + + dir = opendir(path); + if (!dir) { + bool ignore = errno == ENOENT; + + log_debug_errno(errno, + "sd-device-enumerator: Failed to open directory %s%s: %m", + path, ignore ? ", ignoring" : ""); + return ignore ? 0 : -errno; + } + + FOREACH_DIRENT_ALL(de, dir, return -errno) { + _cleanup_free_ char *child = NULL; + int k; + + if (de->d_name[0] == '.') + continue; + + if (de->d_type != DT_DIR) + continue; + + child = path_join(path, de->d_name); + if (!child) + return -ENOMEM; + + /* Let's check sysname filter earlier. The other tests require the sd-device object created + * from the path, thus much costly. */ + if (match_sysname(enumerator, de->d_name)) { + k = parent_add_child(enumerator, child, MATCH_ALL & (~(MATCH_SYSNAME|MATCH_PARENT))); + if (k < 0) + r = k; + } + + k = set_ensure_consume(stack, &path_hash_ops_free, TAKE_PTR(child)); + if (k < 0) + r = k; + } + + return r; +} + +static int enumerator_scan_devices_children(sd_device_enumerator *enumerator) { + _cleanup_set_free_ Set *stack = NULL; + const char *path; + int r = 0, k; + + assert(enumerator); + + SET_FOREACH(path, enumerator->match_parent) { + k = parent_add_child(enumerator, path, MATCH_ALL & (~MATCH_PARENT)); + if (k < 0) + r = k; + + k = parent_crawl_children(enumerator, path, &stack); + if (k < 0) + r = k; + } + + for (;;) { + _cleanup_free_ char *p = NULL; + + p = set_steal_first(stack); + if (!p) + return r; + + k = parent_crawl_children(enumerator, p, &stack); + if (k < 0) + r = k; + } +} + +static int enumerator_scan_devices_all(sd_device_enumerator *enumerator) { + int k, r = 0; + + k = enumerator_scan_dir(enumerator, "bus", "devices", NULL); + if (k < 0) + r = log_debug_errno(k, "sd-device-enumerator: Failed to scan /sys/bus: %m"); + + k = enumerator_scan_dir(enumerator, "class", NULL, NULL); + if (k < 0) + r = log_debug_errno(k, "sd-device-enumerator: Failed to scan /sys/class: %m"); + + return r; +} + +int device_enumerator_scan_devices(sd_device_enumerator *enumerator) { + int r = 0, k; + + assert(enumerator); + + if (enumerator->scan_uptodate && + enumerator->type == DEVICE_ENUMERATION_TYPE_DEVICES) + return 0; + + device_enumerator_unref_devices(enumerator); + + if (!set_isempty(enumerator->match_tag)) { + k = enumerator_scan_devices_tags(enumerator); + if (k < 0) + r = k; + } else if (enumerator->match_parent) { + k = enumerator_scan_devices_children(enumerator); + if (k < 0) + r = k; + } else { + k = enumerator_scan_devices_all(enumerator); + if (k < 0) + r = k; + } + + enumerator->scan_uptodate = true; + enumerator->type = DEVICE_ENUMERATION_TYPE_DEVICES; + + return r; +} + +_public_ sd_device *sd_device_enumerator_get_device_first(sd_device_enumerator *enumerator) { + assert_return(enumerator, NULL); + + if (device_enumerator_scan_devices(enumerator) < 0) + return NULL; + + if (enumerator_sort_devices(enumerator) < 0) + return NULL; + + enumerator->current_device_index = 0; + + if (enumerator->n_devices == 0) + return NULL; + + return enumerator->devices[0]; +} + +_public_ sd_device *sd_device_enumerator_get_device_next(sd_device_enumerator *enumerator) { + assert_return(enumerator, NULL); + + if (!enumerator->scan_uptodate || + !enumerator->sorted || + enumerator->type != DEVICE_ENUMERATION_TYPE_DEVICES || + enumerator->current_device_index + 1 >= enumerator->n_devices) + return NULL; + + return enumerator->devices[++enumerator->current_device_index]; +} + +int device_enumerator_scan_subsystems(sd_device_enumerator *enumerator) { + int r = 0, k; + + assert(enumerator); + + if (enumerator->scan_uptodate && + enumerator->type == DEVICE_ENUMERATION_TYPE_SUBSYSTEMS) + return 0; + + device_enumerator_unref_devices(enumerator); + + /* modules */ + if (match_subsystem(enumerator, "module")) { + k = enumerator_scan_dir_and_add_devices(enumerator, "module", NULL, NULL); + if (k < 0) + r = log_debug_errno(k, "sd-device-enumerator: Failed to scan modules: %m"); + } + + /* subsystems (only buses support coldplug) */ + if (match_subsystem(enumerator, "subsystem")) { + k = enumerator_scan_dir_and_add_devices(enumerator, "bus", NULL, NULL); + if (k < 0) + r = log_debug_errno(k, "sd-device-enumerator: Failed to scan subsystems: %m"); + } + + /* subsystem drivers */ + if (match_subsystem(enumerator, "drivers")) { + k = enumerator_scan_dir(enumerator, "bus", "drivers", "drivers"); + if (k < 0) + r = log_debug_errno(k, "sd-device-enumerator: Failed to scan drivers: %m"); + } + + enumerator->scan_uptodate = true; + enumerator->type = DEVICE_ENUMERATION_TYPE_SUBSYSTEMS; + + return r; +} + +_public_ sd_device *sd_device_enumerator_get_subsystem_first(sd_device_enumerator *enumerator) { + assert_return(enumerator, NULL); + + if (device_enumerator_scan_subsystems(enumerator) < 0) + return NULL; + + if (enumerator_sort_devices(enumerator) < 0) + return NULL; + + enumerator->current_device_index = 0; + + if (enumerator->n_devices == 0) + return NULL; + + return enumerator->devices[0]; +} + +_public_ sd_device *sd_device_enumerator_get_subsystem_next(sd_device_enumerator *enumerator) { + assert_return(enumerator, NULL); + + if (!enumerator->scan_uptodate || + !enumerator->sorted || + enumerator->type != DEVICE_ENUMERATION_TYPE_SUBSYSTEMS || + enumerator->current_device_index + 1 >= enumerator->n_devices) + return NULL; + + return enumerator->devices[++enumerator->current_device_index]; +} + +int device_enumerator_scan_devices_and_subsystems(sd_device_enumerator *enumerator) { + int r; + + assert(enumerator); + + if (enumerator->scan_uptodate && + enumerator->type == DEVICE_ENUMERATION_TYPE_ALL) + return 0; + + device_enumerator_unref_devices(enumerator); + + if (!set_isempty(enumerator->match_tag)) + r = enumerator_scan_devices_tags(enumerator); + else if (enumerator->match_parent) + r = enumerator_scan_devices_children(enumerator); + else { + int k; + + r = enumerator_scan_devices_all(enumerator); + + if (match_subsystem(enumerator, "module")) { + k = enumerator_scan_dir_and_add_devices(enumerator, "module", NULL, NULL); + if (k < 0) + r = log_debug_errno(k, "sd-device-enumerator: Failed to scan modules: %m"); + } + if (match_subsystem(enumerator, "subsystem")) { + k = enumerator_scan_dir_and_add_devices(enumerator, "bus", NULL, NULL); + if (k < 0) + r = log_debug_errno(k, "sd-device-enumerator: Failed to scan subsystems: %m"); + } + + if (match_subsystem(enumerator, "drivers")) { + k = enumerator_scan_dir(enumerator, "bus", "drivers", "drivers"); + if (k < 0) + r = log_debug_errno(k, "sd-device-enumerator: Failed to scan drivers: %m"); + } + } + + enumerator->scan_uptodate = true; + enumerator->type = DEVICE_ENUMERATION_TYPE_ALL; + + return r; +} + +sd_device *device_enumerator_get_first(sd_device_enumerator *enumerator) { + assert_return(enumerator, NULL); + + if (!enumerator->scan_uptodate) + return NULL; + + if (enumerator_sort_devices(enumerator) < 0) + return NULL; + + enumerator->current_device_index = 0; + + if (enumerator->n_devices == 0) + return NULL; + + return enumerator->devices[0]; +} + +sd_device *device_enumerator_get_next(sd_device_enumerator *enumerator) { + assert_return(enumerator, NULL); + + if (!enumerator->scan_uptodate || + !enumerator->sorted || + enumerator->current_device_index + 1 >= enumerator->n_devices) + return NULL; + + return enumerator->devices[++enumerator->current_device_index]; +} + +sd_device **device_enumerator_get_devices(sd_device_enumerator *enumerator, size_t *ret_n_devices) { + assert(enumerator); + assert(ret_n_devices); + + if (!enumerator->scan_uptodate) + return NULL; + + if (enumerator_sort_devices(enumerator) < 0) + return NULL; + + *ret_n_devices = enumerator->n_devices; + return enumerator->devices; +} diff --git a/src/libsystemd/sd-device/device-filter.c b/src/libsystemd/sd-device/device-filter.c new file mode 100644 index 0000000..4101e7d --- /dev/null +++ b/src/libsystemd/sd-device/device-filter.c @@ -0,0 +1,115 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <fnmatch.h> + +#include "device-filter.h" +#include "path-util.h" + +int update_match_strv(Hashmap **match_strv, const char *key, const char *value, bool clear_on_null) { + char **strv; + int r; + + assert(match_strv); + assert(key); + + strv = hashmap_get(*match_strv, key); + if (strv) { + if (!value) { + char **v; + + if (strv_isempty(strv) || !clear_on_null) + return 0; + + /* Accept all value. Clear previous assignment. */ + + v = new0(char*, 1); + if (!v) + return -ENOMEM; + + strv_free_and_replace(strv, v); + } else { + if (strv_contains(strv, value)) + return 0; + + r = strv_extend(&strv, value); + if (r < 0) + return r; + } + + r = hashmap_update(*match_strv, key, strv); + if (r < 0) + return r; + + } else { + _cleanup_strv_free_ char **strv_alloc = NULL; + _cleanup_free_ char *key_alloc = NULL; + + key_alloc = strdup(key); + if (!key_alloc) + return -ENOMEM; + + strv_alloc = strv_new(value); + if (!strv_alloc) + return -ENOMEM; + + r = hashmap_ensure_put(match_strv, &string_hash_ops_free_strv_free, key_alloc, strv_alloc); + if (r < 0) + return r; + + TAKE_PTR(key_alloc); + TAKE_PTR(strv_alloc); + } + + return 1; +} + +static bool device_match_sysattr_value(sd_device *device, const char *sysattr, char * const *patterns) { + const char *value; + + assert(device); + assert(sysattr); + + if (sd_device_get_sysattr_value(device, sysattr, &value) < 0) + return false; + + return strv_fnmatch_or_empty(patterns, value, 0); +} + +bool device_match_sysattr(sd_device *device, Hashmap *match_sysattr, Hashmap *nomatch_sysattr) { + char * const *patterns; + const char *sysattr; + + assert(device); + + HASHMAP_FOREACH_KEY(patterns, sysattr, match_sysattr) + if (!device_match_sysattr_value(device, sysattr, patterns)) + return false; + + HASHMAP_FOREACH_KEY(patterns, sysattr, nomatch_sysattr) + if (device_match_sysattr_value(device, sysattr, patterns)) + return false; + + return true; +} + +bool device_match_parent(sd_device *device, Set *match_parent, Set *nomatch_parent) { + const char *syspath_parent, *syspath; + + assert(device); + + if (sd_device_get_syspath(device, &syspath) < 0) + return false; + + SET_FOREACH(syspath_parent, nomatch_parent) + if (path_startswith(syspath, syspath_parent)) + return false; + + if (set_isempty(match_parent)) + return true; + + SET_FOREACH(syspath_parent, match_parent) + if (path_startswith(syspath, syspath_parent)) + return true; + + return false; +} diff --git a/src/libsystemd/sd-device/device-filter.h b/src/libsystemd/sd-device/device-filter.h new file mode 100644 index 0000000..0c5f34e --- /dev/null +++ b/src/libsystemd/sd-device/device-filter.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include <stdbool.h> + +#include "sd-device.h" + +#include "hashmap.h" +#include "set.h" + +int update_match_strv(Hashmap **match_strv, const char *key, const char *value, bool clear_on_null); +bool device_match_sysattr(sd_device *device, Hashmap *match_sysattr, Hashmap *nomatch_sysattr); +bool device_match_parent(sd_device *device, Set *match_parent, Set *nomatch_parent); diff --git a/src/libsystemd/sd-device/device-internal.h b/src/libsystemd/sd-device/device-internal.h new file mode 100644 index 0000000..a465eb2 --- /dev/null +++ b/src/libsystemd/sd-device/device-internal.h @@ -0,0 +1,117 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include "sd-device.h" + +#include "device-private.h" +#include "hashmap.h" +#include "set.h" +#include "time-util.h" + +#define LATEST_UDEV_DATABASE_VERSION 1 + +struct sd_device { + unsigned n_ref; + + /* The database version indicates the supported features by the udev database. + * This is saved and parsed in V field. + * + * 0: None of the following features are supported (systemd version <= 246). + * 1: The current tags (Q) and the database version (V) features are implemented (>= 247). + */ + unsigned database_version; + + sd_device *parent; + + OrderedHashmap *properties; + Iterator properties_iterator; + uint64_t properties_generation; /* changes whenever the properties are changed */ + uint64_t properties_iterator_generation; /* generation when iteration was started */ + + /* the subset of the properties that should be written to the db */ + OrderedHashmap *properties_db; + + Hashmap *sysattr_values; /* cached sysattr values */ + + Set *sysattrs; /* names of sysattrs */ + Iterator sysattrs_iterator; + + Set *all_tags, *current_tags; + Iterator all_tags_iterator, current_tags_iterator; + uint64_t all_tags_iterator_generation, current_tags_iterator_generation; /* generation when iteration was started */ + uint64_t tags_generation; /* changes whenever the tags are changed */ + + Set *devlinks; + Iterator devlinks_iterator; + uint64_t devlinks_generation; /* changes whenever the devlinks are changed */ + uint64_t devlinks_iterator_generation; /* generation when iteration was started */ + int devlink_priority; + + Hashmap *children; + Iterator children_iterator; + bool children_enumerated; + + int ifindex; + char *devtype; + char *devname; + dev_t devnum; + + char **properties_strv; /* the properties hashmap as a strv */ + char *properties_nulstr; /* the same as a nulstr */ + size_t properties_nulstr_len; + + char *syspath; + const char *devpath; + const char *sysnum; + char *sysname; + + char *subsystem; + char *driver_subsystem; /* only set for the 'drivers' subsystem */ + char *driver; + + char *device_id; + + usec_t usec_initialized; + + mode_t devmode; + uid_t devuid; + gid_t devgid; + + uint64_t diskseq; /* Block device sequence number, monothonically incremented by the kernel on create/attach */ + + /* only set when device is passed through netlink */ + sd_device_action_t action; + uint64_t seqnum; + + bool parent_set:1; /* no need to try to reload parent */ + bool sysattrs_read:1; /* don't try to re-read sysattrs once read */ + bool property_tags_outdated:1; /* need to update TAGS= or CURRENT_TAGS= property */ + bool property_devlinks_outdated:1; /* need to update DEVLINKS= property */ + bool properties_buf_outdated:1; /* need to reread hashmap */ + bool subsystem_set:1; /* don't reread subsystem */ + bool driver_set:1; /* don't reread driver */ + bool uevent_loaded:1; /* don't reread uevent */ + bool db_loaded; /* don't reread db */ + + bool is_initialized:1; + bool sealed:1; /* don't read more information from uevent/db */ + bool db_persist:1; /* don't clean up the db when switching from initrd to real root */ +}; + +int device_new_aux(sd_device **ret); +int device_add_property_aux(sd_device *device, const char *key, const char *value, bool db); +static inline int device_add_property_internal(sd_device *device, const char *key, const char *value) { + return device_add_property_aux(device, key, value, false); +} + +int device_set_syspath(sd_device *device, const char *_syspath, bool verify); +int device_set_ifindex(sd_device *device, const char *ifindex); +int device_set_devmode(sd_device *device, const char *devmode); +int device_set_devname(sd_device *device, const char *devname); +int device_set_devtype(sd_device *device, const char *devtype); +int device_set_devnum(sd_device *device, const char *major, const char *minor); +int device_set_subsystem(sd_device *device, const char *subsystem); +int device_set_diskseq(sd_device *device, const char *str); +int device_set_drivers_subsystem(sd_device *device); +int device_set_driver(sd_device *device, const char *driver); +int device_set_usec_initialized(sd_device *device, usec_t when); diff --git a/src/libsystemd/sd-device/device-monitor-private.h b/src/libsystemd/sd-device/device-monitor-private.h new file mode 100644 index 0000000..33e2714 --- /dev/null +++ b/src/libsystemd/sd-device/device-monitor-private.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include <errno.h> + +#include "sd-device.h" + +typedef enum MonitorNetlinkGroup { + MONITOR_GROUP_NONE, + MONITOR_GROUP_KERNEL, + MONITOR_GROUP_UDEV, + _MONITOR_NETLINK_GROUP_MAX, + _MONITOR_NETLINK_GROUP_INVALID = -EINVAL, +} MonitorNetlinkGroup; + +int device_monitor_new_full(sd_device_monitor **ret, MonitorNetlinkGroup group, int fd); +int device_monitor_disconnect(sd_device_monitor *m); +int device_monitor_allow_unicast_sender(sd_device_monitor *m, sd_device_monitor *sender); +int device_monitor_enable_receiving(sd_device_monitor *m); +int device_monitor_get_fd(sd_device_monitor *m); +int device_monitor_send_device(sd_device_monitor *m, sd_device_monitor *destination, sd_device *device); +int device_monitor_receive_device(sd_device_monitor *m, sd_device **ret); diff --git a/src/libsystemd/sd-device/device-monitor.c b/src/libsystemd/sd-device/device-monitor.c new file mode 100644 index 0000000..bb4f9bd --- /dev/null +++ b/src/libsystemd/sd-device/device-monitor.c @@ -0,0 +1,929 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <errno.h> +#include <linux/filter.h> +#include <linux/netlink.h> +#include <linux/sockios.h> +#include <sys/ioctl.h> +#include <unistd.h> + +#include "sd-device.h" +#include "sd-event.h" + +#include "MurmurHash2.h" +#include "alloc-util.h" +#include "device-filter.h" +#include "device-monitor-private.h" +#include "device-private.h" +#include "device-util.h" +#include "errno-util.h" +#include "fd-util.h" +#include "format-util.h" +#include "hashmap.h" +#include "iovec-util.h" +#include "missing_socket.h" +#include "mountpoint-util.h" +#include "set.h" +#include "socket-util.h" +#include "stat-util.h" +#include "string-util.h" +#include "strv.h" +#include "uid-range.h" + +#define log_monitor(m, format, ...) \ + log_debug("sd-device-monitor(%s): " format, strna(m ? m->description : NULL), ##__VA_ARGS__) +#define log_monitor_errno(m, r, format, ...) \ + log_debug_errno(r, "sd-device-monitor(%s): " format, strna(m ? m->description : NULL), ##__VA_ARGS__) +#define log_device_monitor(d, m, format, ...) \ + log_device_debug(d, "sd-device-monitor(%s): " format, strna(m ? m->description : NULL), ##__VA_ARGS__) +#define log_device_monitor_errno(d, m, r, format, ...) \ + log_device_debug_errno(d, r, "sd-device-monitor(%s): " format, strna(m ? m->description : NULL), ##__VA_ARGS__) + +struct sd_device_monitor { + unsigned n_ref; + + int sock; + union sockaddr_union snl; + union sockaddr_union snl_trusted_sender; + bool bound; + + UidRange *mapped_userns_uid_range; + + Hashmap *subsystem_filter; + Set *tag_filter; + Hashmap *match_sysattr_filter; + Hashmap *nomatch_sysattr_filter; + Set *match_parent_filter; + Set *nomatch_parent_filter; + bool filter_uptodate; + + sd_event *event; + sd_event_source *event_source; + char *description; + sd_device_monitor_handler_t callback; + void *userdata; +}; + +#define UDEV_MONITOR_MAGIC 0xfeedcafe + +typedef struct monitor_netlink_header { + /* "libudev" prefix to distinguish libudev and kernel messages */ + char prefix[8]; + /* Magic to protect against daemon <-> Library message format mismatch + * Used in the kernel from socket filter rules; needs to be stored in network order */ + unsigned magic; + /* Total length of header structure known to the sender */ + unsigned header_size; + /* Properties string buffer */ + unsigned properties_off; + unsigned properties_len; + /* Hashes of primary device properties strings, to let libudev subscribers + * use in-kernel socket filters; values need to be stored in network order */ + unsigned filter_subsystem_hash; + unsigned filter_devtype_hash; + unsigned filter_tag_bloom_hi; + unsigned filter_tag_bloom_lo; +} monitor_netlink_header; + +static int monitor_set_nl_address(sd_device_monitor *m) { + union sockaddr_union snl; + socklen_t addrlen; + + assert(m); + + /* Get the address the kernel has assigned us. + * It is usually, but not necessarily the pid. */ + addrlen = sizeof(struct sockaddr_nl); + if (getsockname(m->sock, &snl.sa, &addrlen) < 0) + return -errno; + + m->snl.nl.nl_pid = snl.nl.nl_pid; + return 0; +} + +int device_monitor_allow_unicast_sender(sd_device_monitor *m, sd_device_monitor *sender) { + assert(m); + assert(sender); + + m->snl_trusted_sender.nl.nl_pid = sender->snl.nl.nl_pid; + return 0; +} + +_public_ int sd_device_monitor_set_receive_buffer_size(sd_device_monitor *m, size_t size) { + assert_return(m, -EINVAL); + + return fd_set_rcvbuf(m->sock, size, false); +} + +int device_monitor_disconnect(sd_device_monitor *m) { + assert(m); + + m->sock = safe_close(m->sock); + return 0; +} + +int device_monitor_get_fd(sd_device_monitor *m) { + assert(m); + + return m->sock; +} + +int device_monitor_new_full(sd_device_monitor **ret, MonitorNetlinkGroup group, int fd) { + _cleanup_(sd_device_monitor_unrefp) sd_device_monitor *m = NULL; + _cleanup_close_ int sock = -EBADF; + int r; + + assert(group >= 0 && group < _MONITOR_NETLINK_GROUP_MAX); + assert_return(ret, -EINVAL); + + if (group == MONITOR_GROUP_UDEV && + access("/run/udev/control", F_OK) < 0 && + dev_is_devtmpfs() <= 0) { + + /* + * We do not support subscribing to uevents if no instance of + * udev is running. Uevents would otherwise broadcast the + * processing data of the host into containers, which is not + * desired. + * + * Containers will currently not get any udev uevents, until + * a supporting infrastructure is available. + * + * We do not set a netlink multicast group here, so the socket + * will not receive any messages. + */ + + log_monitor(m, "The udev service seems not to be active, disabling the monitor."); + group = MONITOR_GROUP_NONE; + } + + if (fd < 0) { + sock = socket(AF_NETLINK, SOCK_RAW|SOCK_CLOEXEC|SOCK_NONBLOCK, NETLINK_KOBJECT_UEVENT); + if (sock < 0) + return log_monitor_errno(m, errno, "Failed to create socket: %m"); + } + + m = new(sd_device_monitor, 1); + if (!m) + return -ENOMEM; + + *m = (sd_device_monitor) { + .n_ref = 1, + .sock = fd >= 0 ? fd : TAKE_FD(sock), + .bound = fd >= 0, + .snl.nl.nl_family = AF_NETLINK, + .snl.nl.nl_groups = group, + }; + + if (fd >= 0) { + r = monitor_set_nl_address(m); + if (r < 0) { + log_monitor_errno(m, r, "Failed to set netlink address: %m"); + goto fail; + } + } + + if (DEBUG_LOGGING) { + _cleanup_close_ int netns = -EBADF; + + /* So here's the thing: only AF_NETLINK sockets from the main network namespace will get + * hardware events. Let's check if ours is from there, and if not generate a debug message, + * since we cannot possibly work correctly otherwise. This is just a safety check to make + * things easier to debug. */ + + netns = ioctl(m->sock, SIOCGSKNS); + if (netns < 0) + log_monitor_errno(m, errno, "Unable to get network namespace of udev netlink socket, unable to determine if we are in host netns, ignoring: %m"); + else { + struct stat a, b; + + if (fstat(netns, &a) < 0) { + r = log_monitor_errno(m, errno, "Failed to stat netns of udev netlink socket: %m"); + goto fail; + } + + if (stat("/proc/1/ns/net", &b) < 0) { + if (ERRNO_IS_PRIVILEGE(errno)) + /* If we can't access PID1's netns info due to permissions, it's fine, this is a + * safety check only after all. */ + log_monitor_errno(m, errno, "No permission to stat PID1's netns, unable to determine if we are in host netns, ignoring: %m"); + else + log_monitor_errno(m, errno, "Failed to stat PID1's netns, ignoring: %m"); + + } else if (!stat_inode_same(&a, &b)) + log_monitor(m, "Netlink socket we listen on is not from host netns, we won't see device events."); + } + } + + /* Let's bump the receive buffer size, but only if we are not called via socket activation, as in + * that case the service manager sets the receive buffer size for us, and the value in the .socket + * unit should take full effect. */ + if (fd < 0) { + r = sd_device_monitor_set_receive_buffer_size(m, 128*1024*1024); + if (r < 0) + log_monitor_errno(m, r, "Failed to increase receive buffer size, ignoring: %m"); + } + + *ret = TAKE_PTR(m); + return 0; + +fail: + /* Let's unset the socket fd in the monitor object before we destroy it so that the fd passed in is + * not closed on failure. */ + if (fd >= 0) + m->sock = -1; + + return r; +} + +_public_ int sd_device_monitor_new(sd_device_monitor **ret) { + return device_monitor_new_full(ret, MONITOR_GROUP_UDEV, -1); +} + +_public_ int sd_device_monitor_stop(sd_device_monitor *m) { + assert_return(m, -EINVAL); + + m->event_source = sd_event_source_unref(m->event_source); + (void) device_monitor_disconnect(m); + + return 0; +} + +static int device_monitor_event_handler(sd_event_source *s, int fd, uint32_t revents, void *userdata) { + _cleanup_(sd_device_unrefp) sd_device *device = NULL; + _unused_ _cleanup_(log_context_unrefp) LogContext *c = NULL; + sd_device_monitor *m = ASSERT_PTR(userdata); + + if (device_monitor_receive_device(m, &device) <= 0) + return 0; + + if (log_context_enabled()) + c = log_context_new_strv_consume(device_make_log_fields(device)); + + if (m->callback) + return m->callback(m, device, m->userdata); + + return 0; +} + +_public_ int sd_device_monitor_start(sd_device_monitor *m, sd_device_monitor_handler_t callback, void *userdata) { + int r; + + assert_return(m, -EINVAL); + + if (!m->event) { + r = sd_device_monitor_attach_event(m, NULL); + if (r < 0) + return r; + } + + r = device_monitor_enable_receiving(m); + if (r < 0) + return r; + + m->callback = callback; + m->userdata = userdata; + + r = sd_event_add_io(m->event, &m->event_source, m->sock, EPOLLIN, device_monitor_event_handler, m); + if (r < 0) + return r; + + (void) sd_event_source_set_description(m->event_source, m->description ?: "sd-device-monitor"); + + return 0; +} + +_public_ int sd_device_monitor_detach_event(sd_device_monitor *m) { + assert_return(m, -EINVAL); + + (void) sd_device_monitor_stop(m); + m->event = sd_event_unref(m->event); + + return 0; +} + +_public_ int sd_device_monitor_attach_event(sd_device_monitor *m, sd_event *event) { + int r; + + assert_return(m, -EINVAL); + assert_return(!m->event, -EBUSY); + + if (event) + m->event = sd_event_ref(event); + else { + r = sd_event_default(&m->event); + if (r < 0) + return r; + } + + return 0; +} + +_public_ sd_event *sd_device_monitor_get_event(sd_device_monitor *m) { + assert_return(m, NULL); + + return m->event; +} + +_public_ sd_event_source *sd_device_monitor_get_event_source(sd_device_monitor *m) { + assert_return(m, NULL); + + return m->event_source; +} + +_public_ int sd_device_monitor_set_description(sd_device_monitor *m, const char *description) { + int r; + + assert_return(m, -EINVAL); + + r = free_and_strdup(&m->description, description); + if (r <= 0) + return r; + + if (m->event_source) + (void) sd_event_source_set_description(m->event_source, description); + + return r; +} + +_public_ int sd_device_monitor_get_description(sd_device_monitor *m, const char **ret) { + assert_return(m, -EINVAL); + assert_return(ret, -EINVAL); + + *ret = m->description; + return 0; +} + +int device_monitor_enable_receiving(sd_device_monitor *m) { + int r; + + assert(m); + + r = sd_device_monitor_filter_update(m); + if (r < 0) + return log_monitor_errno(m, r, "Failed to update filter: %m"); + + if (!m->bound) { + /* enable receiving of sender credentials */ + r = setsockopt_int(m->sock, SOL_SOCKET, SO_PASSCRED, true); + if (r < 0) + return log_monitor_errno(m, r, "Failed to set socket option SO_PASSCRED: %m"); + + if (bind(m->sock, &m->snl.sa, sizeof(struct sockaddr_nl)) < 0) + return log_monitor_errno(m, errno, "Failed to bind monitoring socket: %m"); + + m->bound = true; + + r = monitor_set_nl_address(m); + if (r < 0) + return log_monitor_errno(m, r, "Failed to set address: %m"); + } + + return 0; +} + +static sd_device_monitor *device_monitor_free(sd_device_monitor *m) { + assert(m); + + (void) sd_device_monitor_detach_event(m); + + uid_range_free(m->mapped_userns_uid_range); + free(m->description); + hashmap_free(m->subsystem_filter); + set_free(m->tag_filter); + hashmap_free(m->match_sysattr_filter); + hashmap_free(m->nomatch_sysattr_filter); + set_free(m->match_parent_filter); + set_free(m->nomatch_parent_filter); + + return mfree(m); +} + +DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(sd_device_monitor, sd_device_monitor, device_monitor_free); + +static int check_subsystem_filter(sd_device_monitor *m, sd_device *device) { + const char *s, *subsystem, *d, *devtype = NULL; + int r; + + assert(m); + assert(device); + + if (hashmap_isempty(m->subsystem_filter)) + return true; + + r = sd_device_get_subsystem(device, &subsystem); + if (r < 0) + return r; + + r = sd_device_get_devtype(device, &devtype); + if (r < 0 && r != -ENOENT) + return r; + + HASHMAP_FOREACH_KEY(d, s, m->subsystem_filter) { + if (!streq(s, subsystem)) + continue; + + if (!d || streq_ptr(d, devtype)) + return true; + } + + return false; +} + +static bool check_tag_filter(sd_device_monitor *m, sd_device *device) { + const char *tag; + + assert(m); + assert(device); + + if (set_isempty(m->tag_filter)) + return true; + + SET_FOREACH(tag, m->tag_filter) + if (sd_device_has_tag(device, tag) > 0) + return true; + + return false; +} + +static int passes_filter(sd_device_monitor *m, sd_device *device) { + int r; + + assert(m); + assert(device); + + r = check_subsystem_filter(m, device); + if (r <= 0) + return r; + + if (!check_tag_filter(m, device)) + return false; + + if (!device_match_sysattr(device, m->match_sysattr_filter, m->nomatch_sysattr_filter)) + return false; + + return device_match_parent(device, m->match_parent_filter, m->nomatch_parent_filter); +} + +static bool check_sender_uid(sd_device_monitor *m, uid_t uid) { + int r; + + assert(m); + + /* Always trust messages from uid 0. */ + if (uid == 0) + return true; + + /* Trust messages sent by the same UID we are running. Currently, such situation happens only for + * unicast messages. */ + if (uid == getuid() || uid == geteuid()) + return true; + + if (!m->mapped_userns_uid_range) { + r = uid_range_load_userns(&m->mapped_userns_uid_range, NULL); + if (r < 0) + log_monitor_errno(m, r, "Failed to load UID ranges mapped to the current user namespace, ignoring: %m"); + } + + /* Trust messages come from outside of the current user namespace. */ + if (!uid_range_contains(m->mapped_userns_uid_range, uid)) + return true; + + /* Otherwise, refuse messages. */ + return false; +} + +int device_monitor_receive_device(sd_device_monitor *m, sd_device **ret) { + _cleanup_(sd_device_unrefp) sd_device *device = NULL; + _cleanup_free_ uint8_t *buf_alloc = NULL; + union { + monitor_netlink_header *nlh; + char *nulstr; + uint8_t *buf; + } message; + struct iovec iov; + CMSG_BUFFER_TYPE(CMSG_SPACE(sizeof(struct ucred))) control; + union sockaddr_union snl; + struct msghdr smsg = { + .msg_iov = &iov, + .msg_iovlen = 1, + .msg_control = &control, + .msg_controllen = sizeof(control), + .msg_name = &snl, + .msg_namelen = sizeof(snl), + }; + struct ucred *cred; + size_t offset; + ssize_t n; + bool is_initialized = false; + int r; + + assert(m); + assert(ret); + + n = next_datagram_size_fd(m->sock); + if (n < 0) { + if (!ERRNO_IS_TRANSIENT(n)) + log_monitor_errno(m, n, "Failed to get the received message size: %m"); + return n; + } + + if ((size_t) n < ALLOCA_MAX / sizeof(uint8_t) / 2) + message.buf = newa(uint8_t, n); + else { + buf_alloc = new(uint8_t, n); + if (!buf_alloc) + return log_oom_debug(); + + message.buf = buf_alloc; + } + + iov = IOVEC_MAKE(message.buf, n); + + n = recvmsg(m->sock, &smsg, 0); + if (n < 0) { + if (!ERRNO_IS_TRANSIENT(errno)) + log_monitor_errno(m, errno, "Failed to receive message: %m"); + return -errno; + } + + if (smsg.msg_flags & MSG_TRUNC) + return log_monitor_errno(m, SYNTHETIC_ERRNO(EINVAL), "Received truncated message, ignoring message."); + + if (n < 32) + return log_monitor_errno(m, SYNTHETIC_ERRNO(EINVAL), "Invalid message length (%zi), ignoring message.", n); + + if (snl.nl.nl_groups == MONITOR_GROUP_NONE) { + /* unicast message, check if we trust the sender */ + if (m->snl_trusted_sender.nl.nl_pid == 0 || + snl.nl.nl_pid != m->snl_trusted_sender.nl.nl_pid) + return log_monitor_errno(m, SYNTHETIC_ERRNO(EAGAIN), + "Unicast netlink message ignored."); + + } else if (snl.nl.nl_groups == MONITOR_GROUP_KERNEL) { + if (snl.nl.nl_pid > 0) + return log_monitor_errno(m, SYNTHETIC_ERRNO(EAGAIN), + "Multicast kernel netlink message from PID %"PRIu32" ignored.", + snl.nl.nl_pid); + } + + cred = CMSG_FIND_DATA(&smsg, SOL_SOCKET, SCM_CREDENTIALS, struct ucred); + if (!cred) + return log_monitor_errno(m, SYNTHETIC_ERRNO(EAGAIN), + "No sender credentials received, ignoring message."); + + if (!check_sender_uid(m, cred->uid)) + return log_monitor_errno(m, SYNTHETIC_ERRNO(EAGAIN), + "Sender uid="UID_FMT", message ignored.", cred->uid); + + if (!memchr(message.buf, 0, n)) + return log_monitor_errno(m, SYNTHETIC_ERRNO(EAGAIN), "Received message without NUL, ignoring message."); + + if (streq(message.nulstr, "libudev")) { + /* udev message needs proper version magic */ + if (message.nlh->magic != htobe32(UDEV_MONITOR_MAGIC)) + return log_monitor_errno(m, SYNTHETIC_ERRNO(EAGAIN), + "Invalid message signature (%x != %x).", + message.nlh->magic, htobe32(UDEV_MONITOR_MAGIC)); + + if (message.nlh->properties_off + 32 > (size_t) n) + return log_monitor_errno(m, SYNTHETIC_ERRNO(EAGAIN), + "Invalid offset for properties (%u > %zi).", + message.nlh->properties_off + 32, n); + + offset = message.nlh->properties_off; + + /* devices received from udev are always initialized */ + is_initialized = true; + + } else { + /* check kernel message header */ + if (!strstr(message.nulstr, "@/")) + return log_monitor_errno(m, SYNTHETIC_ERRNO(EAGAIN), "Invalid message header."); + + offset = strlen(message.nulstr) + 1; + if (offset >= (size_t) n) + return log_monitor_errno(m, SYNTHETIC_ERRNO(EAGAIN), "Invalid message length."); + } + + r = device_new_from_nulstr(&device, message.nulstr + offset, n - offset); + if (r < 0) + return log_monitor_errno(m, r, "Failed to create device from received message: %m"); + + if (is_initialized) + device_set_is_initialized(device); + + /* Skip device, if it does not pass the current filter */ + r = passes_filter(m, device); + if (r < 0) + return log_device_monitor_errno(device, m, r, "Failed to check received device passing filter: %m"); + if (r == 0) + log_device_monitor(device, m, "Received device does not pass filter, ignoring."); + else + *ret = TAKE_PTR(device); + + return r; +} + +static uint32_t string_hash32(const char *str) { + return MurmurHash2(str, strlen(str), 0); +} + +/* Get a bunch of bit numbers out of the hash, and set the bits in our bit field */ +static uint64_t string_bloom64(const char *str) { + uint64_t bits = 0; + uint32_t hash = string_hash32(str); + + bits |= UINT64_C(1) << (hash & 63); + bits |= UINT64_C(1) << ((hash >> 6) & 63); + bits |= UINT64_C(1) << ((hash >> 12) & 63); + bits |= UINT64_C(1) << ((hash >> 18) & 63); + return bits; +} + +int device_monitor_send_device( + sd_device_monitor *m, + sd_device_monitor *destination, + sd_device *device) { + + monitor_netlink_header nlh = { + .prefix = "libudev", + .magic = htobe32(UDEV_MONITOR_MAGIC), + .header_size = sizeof nlh, + }; + struct iovec iov[2] = { + { .iov_base = &nlh, .iov_len = sizeof nlh }, + }; + struct msghdr smsg = { + .msg_iov = iov, + .msg_iovlen = 2, + }; + /* default destination for sending */ + union sockaddr_union default_destination = { + .nl.nl_family = AF_NETLINK, + .nl.nl_groups = MONITOR_GROUP_UDEV, + }; + uint64_t tag_bloom_bits; + const char *buf, *val; + ssize_t count; + size_t blen; + int r; + + assert(m); + assert(device); + + r = device_get_properties_nulstr(device, &buf, &blen); + if (r < 0) + return log_device_monitor_errno(device, m, r, "Failed to get device properties: %m"); + if (blen < 32) + return log_device_monitor_errno(device, m, SYNTHETIC_ERRNO(EINVAL), + "Length of device property nulstr is too small to contain valid device information."); + + /* fill in versioned header */ + r = sd_device_get_subsystem(device, &val); + if (r < 0) + return log_device_monitor_errno(device, m, r, "Failed to get device subsystem: %m"); + nlh.filter_subsystem_hash = htobe32(string_hash32(val)); + + if (sd_device_get_devtype(device, &val) >= 0) + nlh.filter_devtype_hash = htobe32(string_hash32(val)); + + /* add tag bloom filter */ + tag_bloom_bits = 0; + FOREACH_DEVICE_TAG(device, tag) + tag_bloom_bits |= string_bloom64(tag); + + if (tag_bloom_bits > 0) { + nlh.filter_tag_bloom_hi = htobe32(tag_bloom_bits >> 32); + nlh.filter_tag_bloom_lo = htobe32(tag_bloom_bits & 0xffffffff); + } + + /* add properties list */ + nlh.properties_off = iov[0].iov_len; + nlh.properties_len = blen; + iov[1] = IOVEC_MAKE((char*) buf, blen); + + /* + * Use custom address for target, or the default one. + * + * If we send to a multicast group, we will get + * ECONNREFUSED, which is expected. + */ + smsg.msg_name = destination ? &destination->snl : &default_destination; + smsg.msg_namelen = sizeof(struct sockaddr_nl); + count = sendmsg(m->sock, &smsg, 0); + if (count < 0) { + if (!destination && errno == ECONNREFUSED) { + log_device_monitor(device, m, "Passed to netlink monitor."); + return 0; + } else + return log_device_monitor_errno(device, m, errno, "Failed to send device to netlink monitor: %m"); + } + + log_device_monitor(device, m, "Passed %zi byte to netlink monitor.", count); + return count; +} + +static void bpf_stmt(struct sock_filter *ins, unsigned *i, + unsigned short code, unsigned data) { + ins[(*i)++] = (struct sock_filter) { + .code = code, + .k = data, + }; +} + +static void bpf_jmp(struct sock_filter *ins, unsigned *i, + unsigned short code, unsigned data, + unsigned short jt, unsigned short jf) { + ins[(*i)++] = (struct sock_filter) { + .code = code, + .jt = jt, + .jf = jf, + .k = data, + }; +} + +_public_ int sd_device_monitor_filter_update(sd_device_monitor *m) { + struct sock_filter ins[512] = {}; + struct sock_fprog filter; + const char *subsystem, *devtype, *tag; + unsigned i = 0; + + assert_return(m, -EINVAL); + + if (m->filter_uptodate) + return 0; + + if (m->snl.nl.nl_groups == MONITOR_GROUP_KERNEL || + (hashmap_isempty(m->subsystem_filter) && + set_isempty(m->tag_filter))) { + m->filter_uptodate = true; + return 0; + } + + /* load magic in A */ + bpf_stmt(ins, &i, BPF_LD|BPF_W|BPF_ABS, offsetof(monitor_netlink_header, magic)); + /* jump if magic matches */ + bpf_jmp(ins, &i, BPF_JMP|BPF_JEQ|BPF_K, UDEV_MONITOR_MAGIC, 1, 0); + /* wrong magic, pass packet */ + bpf_stmt(ins, &i, BPF_RET|BPF_K, 0xffffffff); + + if (!set_isempty(m->tag_filter)) { + int tag_matches = set_size(m->tag_filter); + + /* add all tags matches */ + SET_FOREACH(tag, m->tag_filter) { + uint64_t tag_bloom_bits = string_bloom64(tag); + uint32_t tag_bloom_hi = tag_bloom_bits >> 32; + uint32_t tag_bloom_lo = tag_bloom_bits & 0xffffffff; + + /* load device bloom bits in A */ + bpf_stmt(ins, &i, BPF_LD|BPF_W|BPF_ABS, offsetof(monitor_netlink_header, filter_tag_bloom_hi)); + /* clear bits (tag bits & bloom bits) */ + bpf_stmt(ins, &i, BPF_ALU|BPF_AND|BPF_K, tag_bloom_hi); + /* jump to next tag if it does not match */ + bpf_jmp(ins, &i, BPF_JMP|BPF_JEQ|BPF_K, tag_bloom_hi, 0, 3); + + /* load device bloom bits in A */ + bpf_stmt(ins, &i, BPF_LD|BPF_W|BPF_ABS, offsetof(monitor_netlink_header, filter_tag_bloom_lo)); + /* clear bits (tag bits & bloom bits) */ + bpf_stmt(ins, &i, BPF_ALU|BPF_AND|BPF_K, tag_bloom_lo); + /* jump behind end of tag match block if tag matches */ + tag_matches--; + bpf_jmp(ins, &i, BPF_JMP|BPF_JEQ|BPF_K, tag_bloom_lo, 1 + (tag_matches * 6), 0); + } + + /* nothing matched, drop packet */ + bpf_stmt(ins, &i, BPF_RET|BPF_K, 0); + } + + /* add all subsystem matches */ + if (!hashmap_isempty(m->subsystem_filter)) { + HASHMAP_FOREACH_KEY(devtype, subsystem, m->subsystem_filter) { + uint32_t hash = string_hash32(subsystem); + + /* load device subsystem value in A */ + bpf_stmt(ins, &i, BPF_LD|BPF_W|BPF_ABS, offsetof(monitor_netlink_header, filter_subsystem_hash)); + if (!devtype) { + /* jump if subsystem does not match */ + bpf_jmp(ins, &i, BPF_JMP|BPF_JEQ|BPF_K, hash, 0, 1); + } else { + /* jump if subsystem does not match */ + bpf_jmp(ins, &i, BPF_JMP|BPF_JEQ|BPF_K, hash, 0, 3); + /* load device devtype value in A */ + bpf_stmt(ins, &i, BPF_LD|BPF_W|BPF_ABS, offsetof(monitor_netlink_header, filter_devtype_hash)); + /* jump if value does not match */ + hash = string_hash32(devtype); + bpf_jmp(ins, &i, BPF_JMP|BPF_JEQ|BPF_K, hash, 0, 1); + } + + /* matched, pass packet */ + bpf_stmt(ins, &i, BPF_RET|BPF_K, 0xffffffff); + + if (i+1 >= ELEMENTSOF(ins)) + return -E2BIG; + } + + /* nothing matched, drop packet */ + bpf_stmt(ins, &i, BPF_RET|BPF_K, 0); + } + + /* matched, pass packet */ + bpf_stmt(ins, &i, BPF_RET|BPF_K, 0xffffffff); + + /* install filter */ + filter = (struct sock_fprog) { + .len = i, + .filter = ins, + }; + if (setsockopt(m->sock, SOL_SOCKET, SO_ATTACH_FILTER, &filter, sizeof(filter)) < 0) + return -errno; + + m->filter_uptodate = true; + return 0; +} + +_public_ int sd_device_monitor_filter_add_match_subsystem_devtype(sd_device_monitor *m, const char *subsystem, const char *devtype) { + int r; + + assert_return(m, -EINVAL); + assert_return(subsystem, -EINVAL); + + /* Do not use string_has_ops_free_free or hashmap_put_strdup() here, as this may be called + * multiple times with the same subsystem but different devtypes. */ + r = hashmap_put_strdup_full(&m->subsystem_filter, &trivial_hash_ops_free_free, subsystem, devtype); + if (r <= 0) + return r; + + m->filter_uptodate = false; + return r; +} + +_public_ int sd_device_monitor_filter_add_match_tag(sd_device_monitor *m, const char *tag) { + int r; + + assert_return(m, -EINVAL); + assert_return(tag, -EINVAL); + + r = set_put_strdup(&m->tag_filter, tag); + if (r <= 0) + return r; + + m->filter_uptodate = false; + return r; +} + +_public_ int sd_device_monitor_filter_add_match_sysattr(sd_device_monitor *m, const char *sysattr, const char *value, int match) { + Hashmap **hashmap; + + assert_return(m, -EINVAL); + assert_return(sysattr, -EINVAL); + + if (match) + hashmap = &m->match_sysattr_filter; + else + hashmap = &m->nomatch_sysattr_filter; + + /* TODO: unset m->filter_uptodate on success when we support this filter on BPF. */ + return update_match_strv(hashmap, sysattr, value, /* clear_on_null = */ true); +} + +_public_ int sd_device_monitor_filter_add_match_parent(sd_device_monitor *m, sd_device *device, int match) { + const char *syspath; + Set **set; + int r; + + assert_return(m, -EINVAL); + assert_return(device, -EINVAL); + + r = sd_device_get_syspath(device, &syspath); + if (r < 0) + return r; + + if (match) + set = &m->match_parent_filter; + else + set = &m->nomatch_parent_filter; + + /* TODO: unset m->filter_uptodate on success when we support this filter on BPF. */ + return set_put_strdup(set, syspath); +} + +_public_ int sd_device_monitor_filter_remove(sd_device_monitor *m) { + static const struct sock_fprog filter = { 0, NULL }; + + assert_return(m, -EINVAL); + + m->subsystem_filter = hashmap_free(m->subsystem_filter); + m->tag_filter = set_free(m->tag_filter); + m->match_sysattr_filter = hashmap_free(m->match_sysattr_filter); + m->nomatch_sysattr_filter = hashmap_free(m->nomatch_sysattr_filter); + m->match_parent_filter = set_free(m->match_parent_filter); + m->nomatch_parent_filter = set_free(m->nomatch_parent_filter); + + if (setsockopt(m->sock, SOL_SOCKET, SO_DETACH_FILTER, &filter, sizeof(filter)) < 0) + return -errno; + + m->filter_uptodate = true; + return 0; +} diff --git a/src/libsystemd/sd-device/device-private.c b/src/libsystemd/sd-device/device-private.c new file mode 100644 index 0000000..0edabfb --- /dev/null +++ b/src/libsystemd/sd-device/device-private.c @@ -0,0 +1,903 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <ctype.h> +#include <net/if.h> +#include <sys/types.h> + +#include "sd-device.h" + +#include "alloc-util.h" +#include "device-internal.h" +#include "device-private.h" +#include "device-util.h" +#include "fd-util.h" +#include "fileio.h" +#include "fs-util.h" +#include "hashmap.h" +#include "macro.h" +#include "mkdir.h" +#include "nulstr-util.h" +#include "parse-util.h" +#include "path-util.h" +#include "set.h" +#include "stdio-util.h" +#include "string-table.h" +#include "string-util.h" +#include "strv.h" +#include "strxcpyx.h" +#include "tmpfile-util.h" +#include "user-util.h" + +int device_add_property(sd_device *device, const char *key, const char *value) { + int r; + + assert(device); + assert(key); + + r = device_add_property_aux(device, key, value, false); + if (r < 0) + return r; + + if (key[0] != '.') { + r = device_add_property_aux(device, key, value, true); + if (r < 0) + return r; + } + + return 0; +} + +int device_add_propertyf(sd_device *device, const char *key, const char *format, ...) { + _cleanup_free_ char *value = NULL; + va_list ap; + int r; + + assert(device); + assert(key); + + if (!format) + return device_add_property(device, key, NULL); + + va_start(ap, format); + r = vasprintf(&value, format, ap); + va_end(ap); + + if (r < 0) + return -ENOMEM; + + return device_add_property(device, key, value); +} + +void device_set_devlink_priority(sd_device *device, int priority) { + assert(device); + + device->devlink_priority = priority; +} + +void device_set_is_initialized(sd_device *device) { + assert(device); + + device->is_initialized = true; +} + +int device_ensure_usec_initialized(sd_device *device, sd_device *device_old) { + usec_t when; + + assert(device); + + if (device_old && device_old->usec_initialized > 0) + when = device_old->usec_initialized; + else + when = now(CLOCK_MONOTONIC); + + return device_set_usec_initialized(device, when); +} + +uint64_t device_get_properties_generation(sd_device *device) { + assert(device); + + return device->properties_generation; +} + +uint64_t device_get_tags_generation(sd_device *device) { + assert(device); + + return device->tags_generation; +} + +uint64_t device_get_devlinks_generation(sd_device *device) { + assert(device); + + return device->devlinks_generation; +} + +int device_get_devnode_mode(sd_device *device, mode_t *ret) { + int r; + + assert(device); + + r = device_read_db(device); + if (r < 0) + return r; + + if (device->devmode == MODE_INVALID) + return -ENOENT; + + if (ret) + *ret = device->devmode; + + return 0; +} + +int device_get_devnode_uid(sd_device *device, uid_t *ret) { + int r; + + assert(device); + + r = device_read_db(device); + if (r < 0) + return r; + + if (device->devuid == UID_INVALID) + return -ENOENT; + + if (ret) + *ret = device->devuid; + + return 0; +} + +static int device_set_devuid(sd_device *device, const char *uid) { + uid_t u; + int r; + + assert(device); + assert(uid); + + r = parse_uid(uid, &u); + if (r < 0) + return r; + + r = device_add_property_internal(device, "DEVUID", uid); + if (r < 0) + return r; + + device->devuid = u; + + return 0; +} + +int device_get_devnode_gid(sd_device *device, gid_t *ret) { + int r; + + assert(device); + + r = device_read_db(device); + if (r < 0) + return r; + + if (device->devgid == GID_INVALID) + return -ENOENT; + + if (ret) + *ret = device->devgid; + + return 0; +} + +static int device_set_devgid(sd_device *device, const char *gid) { + gid_t g; + int r; + + assert(device); + assert(gid); + + r = parse_gid(gid, &g); + if (r < 0) + return r; + + r = device_add_property_internal(device, "DEVGID", gid); + if (r < 0) + return r; + + device->devgid = g; + + return 0; +} + +int device_set_action(sd_device *device, sd_device_action_t a) { + int r; + + assert(device); + assert(a >= 0 && a < _SD_DEVICE_ACTION_MAX); + + r = device_add_property_internal(device, "ACTION", device_action_to_string(a)); + if (r < 0) + return r; + + device->action = a; + + return 0; +} + +static int device_set_action_from_string(sd_device *device, const char *action) { + sd_device_action_t a; + + assert(device); + assert(action); + + a = device_action_from_string(action); + if (a < 0) + return a; + + return device_set_action(device, a); +} + +static int device_set_seqnum(sd_device *device, const char *str) { + uint64_t seqnum; + int r; + + assert(device); + assert(str); + + r = safe_atou64(str, &seqnum); + if (r < 0) + return r; + if (seqnum == 0) + return -EINVAL; + + r = device_add_property_internal(device, "SEQNUM", str); + if (r < 0) + return r; + + device->seqnum = seqnum; + + return 0; +} + +static int device_amend(sd_device *device, const char *key, const char *value) { + int r; + + assert(device); + assert(key); + assert(value); + + if (streq(key, "DEVPATH")) { + char *path; + + path = strjoina("/sys", value); + + /* the caller must verify or trust this data (e.g., if it comes from the kernel) */ + r = device_set_syspath(device, path, false); + if (r < 0) + return log_device_debug_errno(device, r, "sd-device: Failed to set syspath to '%s': %m", path); + } else if (streq(key, "SUBSYSTEM")) { + r = device_set_subsystem(device, value); + if (r < 0) + return log_device_debug_errno(device, r, "sd-device: Failed to set subsystem to '%s': %m", value); + } else if (streq(key, "DEVTYPE")) { + r = device_set_devtype(device, value); + if (r < 0) + return log_device_debug_errno(device, r, "sd-device: Failed to set devtype to '%s': %m", value); + } else if (streq(key, "DEVNAME")) { + r = device_set_devname(device, value); + if (r < 0) + return log_device_debug_errno(device, r, "sd-device: Failed to set devname to '%s': %m", value); + } else if (streq(key, "USEC_INITIALIZED")) { + usec_t t; + + r = safe_atou64(value, &t); + if (r < 0) + return log_device_debug_errno(device, r, "sd-device: Failed to parse timestamp '%s': %m", value); + + r = device_set_usec_initialized(device, t); + if (r < 0) + return log_device_debug_errno(device, r, "sd-device: Failed to set usec-initialized to '%s': %m", value); + } else if (streq(key, "DRIVER")) { + r = device_set_driver(device, value); + if (r < 0) + return log_device_debug_errno(device, r, "sd-device: Failed to set driver to '%s': %m", value); + } else if (streq(key, "IFINDEX")) { + r = device_set_ifindex(device, value); + if (r < 0) + return log_device_debug_errno(device, r, "sd-device: Failed to set ifindex to '%s': %m", value); + } else if (streq(key, "DEVMODE")) { + r = device_set_devmode(device, value); + if (r < 0) + return log_device_debug_errno(device, r, "sd-device: Failed to set devmode to '%s': %m", value); + } else if (streq(key, "DEVUID")) { + r = device_set_devuid(device, value); + if (r < 0) + return log_device_debug_errno(device, r, "sd-device: Failed to set devuid to '%s': %m", value); + } else if (streq(key, "DEVGID")) { + r = device_set_devgid(device, value); + if (r < 0) + return log_device_debug_errno(device, r, "sd-device: Failed to set devgid to '%s': %m", value); + } else if (streq(key, "ACTION")) { + r = device_set_action_from_string(device, value); + if (r < 0) + return log_device_debug_errno(device, r, "sd-device: Failed to set action to '%s': %m", value); + } else if (streq(key, "SEQNUM")) { + r = device_set_seqnum(device, value); + if (r < 0) + return log_device_debug_errno(device, r, "sd-device: Failed to set SEQNUM to '%s': %m", value); + } else if (streq(key, "DISKSEQ")) { + r = device_set_diskseq(device, value); + if (r < 0) + return log_device_debug_errno(device, r, "sd-device: Failed to set DISKSEQ to '%s': %m", value); + } else if (streq(key, "DEVLINKS")) { + for (const char *p = value;;) { + _cleanup_free_ char *word = NULL; + + /* udev rules may set escaped strings, and sd-device does not modify the input + * strings. So, it is also necessary to keep the strings received through + * sd-device-monitor. */ + r = extract_first_word(&p, &word, NULL, EXTRACT_RETAIN_ESCAPE); + if (r < 0) + return r; + if (r == 0) + break; + + r = device_add_devlink(device, word); + if (r < 0) + return log_device_debug_errno(device, r, "sd-device: Failed to add devlink '%s': %m", word); + } + } else if (STR_IN_SET(key, "TAGS", "CURRENT_TAGS")) { + for (const char *p = value;;) { + _cleanup_free_ char *word = NULL; + + r = extract_first_word(&p, &word, ":", EXTRACT_DONT_COALESCE_SEPARATORS); + if (r < 0) + return r; + if (r == 0) + break; + if (isempty(word)) + continue; + + r = device_add_tag(device, word, streq(key, "CURRENT_TAGS")); + if (r < 0) + return log_device_debug_errno(device, r, "sd-device: Failed to add tag '%s': %m", word); + } + } else if (streq(key, "UDEV_DATABASE_VERSION")) { + r = safe_atou(value, &device->database_version); + if (r < 0) + return log_device_debug_errno(device, r, "sd-device: Failed to parse udev database version '%s': %m", value); + } else { + r = device_add_property_internal(device, key, value); + if (r < 0) + return log_device_debug_errno(device, r, "sd-device: Failed to add property '%s=%s': %m", key, value); + } + + return 0; +} + +static int device_append( + sd_device *device, + char *key, + const char **_major, + const char **_minor) { + + const char *major = NULL, *minor = NULL; + char *value; + int r; + + assert(device); + assert(key); + assert(_major); + assert(_minor); + + value = strchr(key, '='); + if (!value) + return log_device_debug_errno(device, SYNTHETIC_ERRNO(EINVAL), + "sd-device: Not a key-value pair: '%s'", key); + + *value = '\0'; + + value++; + + if (streq(key, "MAJOR")) + major = value; + else if (streq(key, "MINOR")) + minor = value; + else { + r = device_amend(device, key, value); + if (r < 0) + return r; + } + + if (major) + *_major = major; + + if (minor) + *_minor = minor; + + return 0; +} + +void device_seal(sd_device *device) { + assert(device); + + device->sealed = true; +} + +static int device_verify(sd_device *device) { + int r; + + assert(device); + + if (!device->devpath || !device->subsystem || device->action < 0 || device->seqnum == 0) + return log_device_debug_errno(device, SYNTHETIC_ERRNO(EINVAL), + "sd-device: Device created from strv or nulstr lacks devpath, subsystem, action or seqnum."); + + if (streq(device->subsystem, "drivers")) { + r = device_set_drivers_subsystem(device); + if (r < 0) + return r; + } + + device->sealed = true; + + return 0; +} + +int device_new_from_strv(sd_device **ret, char **strv) { + _cleanup_(sd_device_unrefp) sd_device *device = NULL; + const char *major = NULL, *minor = NULL; + int r; + + assert(ret); + assert(strv); + + r = device_new_aux(&device); + if (r < 0) + return r; + + STRV_FOREACH(key, strv) { + r = device_append(device, *key, &major, &minor); + if (r < 0) + return r; + } + + if (major) { + r = device_set_devnum(device, major, minor); + if (r < 0) + return log_device_debug_errno(device, r, "sd-device: Failed to set devnum %s:%s: %m", major, minor); + } + + r = device_verify(device); + if (r < 0) + return r; + + *ret = TAKE_PTR(device); + + return 0; +} + +int device_new_from_nulstr(sd_device **ret, char *nulstr, size_t len) { + _cleanup_(sd_device_unrefp) sd_device *device = NULL; + const char *major = NULL, *minor = NULL; + int r; + + assert(ret); + assert(nulstr); + assert(len); + + r = device_new_aux(&device); + if (r < 0) + return r; + + for (size_t i = 0; i < len; ) { + char *key; + const char *end; + + key = nulstr + i; + end = memchr(key, '\0', len - i); + if (!end) + return log_device_debug_errno(device, SYNTHETIC_ERRNO(EINVAL), + "sd-device: Failed to parse nulstr"); + + i += end - key + 1; + + /* netlink messages for some devices contain an unwanted newline at the end of value. + * Let's drop the newline and remaining characters after the newline. */ + truncate_nl(key); + + r = device_append(device, key, &major, &minor); + if (r < 0) + return r; + } + + if (major) { + r = device_set_devnum(device, major, minor); + if (r < 0) + return log_device_debug_errno(device, r, "sd-device: Failed to set devnum %s:%s: %m", major, minor); + } + + r = device_verify(device); + if (r < 0) + return r; + + *ret = TAKE_PTR(device); + + return 0; +} + +static int device_update_properties_bufs(sd_device *device) { + _cleanup_free_ char **buf_strv = NULL, *buf_nulstr = NULL; + size_t nulstr_len = 0, num = 0; + + assert(device); + + if (!device->properties_buf_outdated) + return 0; + + /* append udev database version */ + buf_nulstr = newdup(char, "UDEV_DATABASE_VERSION=" STRINGIFY(LATEST_UDEV_DATABASE_VERSION) "\0", + STRLEN("UDEV_DATABASE_VERSION=" STRINGIFY(LATEST_UDEV_DATABASE_VERSION)) + 2); + if (!buf_nulstr) + return -ENOMEM; + + nulstr_len += STRLEN("UDEV_DATABASE_VERSION=" STRINGIFY(LATEST_UDEV_DATABASE_VERSION)) + 1; + num++; + + FOREACH_DEVICE_PROPERTY(device, prop, val) { + size_t len = 0; + + len = strlen(prop) + 1 + strlen(val); + + buf_nulstr = GREEDY_REALLOC0(buf_nulstr, nulstr_len + len + 2); + if (!buf_nulstr) + return -ENOMEM; + + strscpyl(buf_nulstr + nulstr_len, len + 1, prop, "=", val, NULL); + nulstr_len += len + 1; + num++; + } + + /* build buf_strv from buf_nulstr */ + buf_strv = new0(char*, num + 1); + if (!buf_strv) + return -ENOMEM; + + size_t i = 0; + NULSTR_FOREACH(p, buf_nulstr) + buf_strv[i++] = p; + assert(i == num); + + free_and_replace(device->properties_nulstr, buf_nulstr); + device->properties_nulstr_len = nulstr_len; + free_and_replace(device->properties_strv, buf_strv); + + device->properties_buf_outdated = false; + return 0; +} + +int device_get_properties_nulstr(sd_device *device, const char **ret_nulstr, size_t *ret_len) { + int r; + + assert(device); + + r = device_update_properties_bufs(device); + if (r < 0) + return r; + + if (ret_nulstr) + *ret_nulstr = device->properties_nulstr; + if (ret_len) + *ret_len = device->properties_nulstr_len; + + return 0; +} + +int device_get_properties_strv(sd_device *device, char ***ret) { + int r; + + assert(device); + + r = device_update_properties_bufs(device); + if (r < 0) + return r; + + if (ret) + *ret = device->properties_strv; + + return 0; +} + +int device_get_devlink_priority(sd_device *device, int *ret) { + int r; + + assert(device); + + r = device_read_db(device); + if (r < 0) + return r; + + if (ret) + *ret = device->devlink_priority; + + return 0; +} + +int device_clone_with_db(sd_device *device, sd_device **ret) { + _cleanup_(sd_device_unrefp) sd_device *dest = NULL; + const char *key, *val; + int r; + + assert(device); + assert(ret); + + /* The device may be already removed. Let's copy minimal set of information that was obtained through + * netlink socket. */ + + r = device_new_aux(&dest); + if (r < 0) + return r; + + /* Seal device to prevent reading the uevent file, as the device may have been already removed. */ + dest->sealed = true; + + /* Copy syspath, then also devname, sysname or sysnum can be obtained. */ + r = device_set_syspath(dest, device->syspath, false); + if (r < 0) + return r; + + /* Copy other information stored in database. Here, do not use FOREACH_DEVICE_PROPERTY() and + * sd_device_get_property_value(), as they calls device_properties_prepare() -> + * device_read_uevent_file(), but as commented in the above, the device may be already removed and + * reading uevent file may fail. */ + ORDERED_HASHMAP_FOREACH_KEY(val, key, device->properties) { + if (streq(key, "MINOR")) + continue; + + if (streq(key, "MAJOR")) { + const char *minor = NULL; + + minor = ordered_hashmap_get(device->properties, "MINOR"); + r = device_set_devnum(dest, val, minor); + } else + r = device_amend(dest, key, val); + if (r < 0) + return r; + + if (streq(key, "SUBSYSTEM") && streq(val, "drivers")) { + r = free_and_strdup(&dest->driver_subsystem, device->driver_subsystem); + if (r < 0) + return r; + } + } + + /* Finally, read the udev database. */ + r = device_read_db_internal(dest, /* force = */ true); + if (r < 0) + return r; + + *ret = TAKE_PTR(dest); + return 0; +} + +void device_cleanup_tags(sd_device *device) { + assert(device); + + device->all_tags = set_free_free(device->all_tags); + device->current_tags = set_free_free(device->current_tags); + device->property_tags_outdated = true; + device->tags_generation++; +} + +void device_cleanup_devlinks(sd_device *device) { + assert(device); + + set_free_free(device->devlinks); + device->devlinks = NULL; + device->property_devlinks_outdated = true; + device->devlinks_generation++; +} + +void device_remove_tag(sd_device *device, const char *tag) { + assert(device); + assert(tag); + + free(set_remove(device->current_tags, tag)); + device->property_tags_outdated = true; + device->tags_generation++; +} + +static int device_tag(sd_device *device, const char *tag, bool add) { + const char *id; + char *path; + int r; + + assert(device); + assert(tag); + + r = device_get_device_id(device, &id); + if (r < 0) + return r; + + path = strjoina("/run/udev/tags/", tag, "/", id); + + if (add) + return touch_file(path, true, USEC_INFINITY, UID_INVALID, GID_INVALID, 0444); + + if (unlink(path) < 0 && errno != ENOENT) + return -errno; + + return 0; +} + +int device_tag_index(sd_device *device, sd_device *device_old, bool add) { + int r = 0, k; + + if (add && device_old) + /* delete possible left-over tags */ + FOREACH_DEVICE_TAG(device_old, tag) + if (!sd_device_has_tag(device, tag)) { + k = device_tag(device_old, tag, false); + if (r >= 0 && k < 0) + r = k; + } + + FOREACH_DEVICE_TAG(device, tag) { + k = device_tag(device, tag, add); + if (r >= 0 && k < 0) + r = k; + } + + return r; +} + +static bool device_has_info(sd_device *device) { + assert(device); + + if (!set_isempty(device->devlinks)) + return true; + + if (device->devlink_priority != 0) + return true; + + if (!ordered_hashmap_isempty(device->properties_db)) + return true; + + if (!set_isempty(device->all_tags)) + return true; + + if (!set_isempty(device->current_tags)) + return true; + + return false; +} + +void device_set_db_persist(sd_device *device) { + assert(device); + + device->db_persist = true; +} + +int device_update_db(sd_device *device) { + const char *id; + char *path; + _cleanup_fclose_ FILE *f = NULL; + _cleanup_(unlink_and_freep) char *path_tmp = NULL; + bool has_info; + int r; + + assert(device); + + has_info = device_has_info(device); + + r = device_get_device_id(device, &id); + if (r < 0) + return r; + + path = strjoina("/run/udev/data/", id); + + /* do not store anything for otherwise empty devices */ + if (!has_info && major(device->devnum) == 0 && device->ifindex == 0) { + if (unlink(path) < 0 && errno != ENOENT) + return -errno; + + return 0; + } + + /* write a database file */ + r = mkdir_parents(path, 0755); + if (r < 0) + return r; + + r = fopen_temporary(path, &f, &path_tmp); + if (r < 0) + return r; + + /* set 'sticky' bit to indicate that we should not clean the database when we transition from initrd + * to the real root */ + if (fchmod(fileno(f), device->db_persist ? 01644 : 0644) < 0) { + r = -errno; + goto fail; + } + + if (has_info) { + const char *property, *value, *ct; + + if (major(device->devnum) > 0) { + FOREACH_DEVICE_DEVLINK(device, devlink) + fprintf(f, "S:%s\n", devlink + STRLEN("/dev/")); + + if (device->devlink_priority != 0) + fprintf(f, "L:%i\n", device->devlink_priority); + } + + if (device->usec_initialized > 0) + fprintf(f, "I:"USEC_FMT"\n", device->usec_initialized); + + ORDERED_HASHMAP_FOREACH_KEY(value, property, device->properties_db) + fprintf(f, "E:%s=%s\n", property, value); + + FOREACH_DEVICE_TAG(device, tag) + fprintf(f, "G:%s\n", tag); /* Any tag */ + + SET_FOREACH(ct, device->current_tags) + fprintf(f, "Q:%s\n", ct); /* Current tag */ + + /* Always write the latest database version here, instead of the value stored in + * device->database_version, as which may be 0. */ + fputs("V:" STRINGIFY(LATEST_UDEV_DATABASE_VERSION) "\n", f); + } + + r = fflush_and_check(f); + if (r < 0) + goto fail; + + if (rename(path_tmp, path) < 0) { + r = -errno; + goto fail; + } + + path_tmp = mfree(path_tmp); + + log_device_debug(device, "sd-device: Created %s file '%s' for '%s'", has_info ? "db" : "empty", + path, device->devpath); + + return 0; + +fail: + (void) unlink(path); + + return log_device_debug_errno(device, r, "sd-device: Failed to create %s file '%s' for '%s'", has_info ? "db" : "empty", path, device->devpath); +} + +int device_delete_db(sd_device *device) { + const char *id; + char *path; + int r; + + assert(device); + + r = device_get_device_id(device, &id); + if (r < 0) + return r; + + path = strjoina("/run/udev/data/", id); + + if (unlink(path) < 0 && errno != ENOENT) + return -errno; + + return 0; +} + +static const char* const device_action_table[_SD_DEVICE_ACTION_MAX] = { + [SD_DEVICE_ADD] = "add", + [SD_DEVICE_REMOVE] = "remove", + [SD_DEVICE_CHANGE] = "change", + [SD_DEVICE_MOVE] = "move", + [SD_DEVICE_ONLINE] = "online", + [SD_DEVICE_OFFLINE] = "offline", + [SD_DEVICE_BIND] = "bind", + [SD_DEVICE_UNBIND] = "unbind", +}; + +DEFINE_STRING_TABLE_LOOKUP(device_action, sd_device_action_t); + +void dump_device_action_table(void) { + DUMP_STRING_TABLE(device_action, sd_device_action_t, _SD_DEVICE_ACTION_MAX); +} diff --git a/src/libsystemd/sd-device/device-private.h b/src/libsystemd/sd-device/device-private.h new file mode 100644 index 0000000..b903d1a --- /dev/null +++ b/src/libsystemd/sd-device/device-private.h @@ -0,0 +1,74 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include <dirent.h> +#include <inttypes.h> +#include <stdbool.h> +#include <sys/stat.h> +#include <sys/types.h> + +#include "sd-device.h" + +#include "macro.h" + +int device_new_from_mode_and_devnum(sd_device **ret, mode_t mode, dev_t devnum); +int device_new_from_nulstr(sd_device **ret, char *nulstr, size_t len); +int device_new_from_strv(sd_device **ret, char **strv); + +int device_opendir(sd_device *device, const char *subdir, DIR **ret); + +int device_get_property_bool(sd_device *device, const char *key); +int device_get_property_int(sd_device *device, const char *key, int *ret); +int device_get_sysattr_int(sd_device *device, const char *sysattr, int *ret_value); +int device_get_sysattr_unsigned(sd_device *device, const char *sysattr, unsigned *ret_value); +int device_get_sysattr_bool(sd_device *device, const char *sysattr); +int device_get_device_id(sd_device *device, const char **ret); +int device_get_devlink_priority(sd_device *device, int *ret); +int device_get_devnode_mode(sd_device *device, mode_t *ret); +int device_get_devnode_uid(sd_device *device, uid_t *ret); +int device_get_devnode_gid(sd_device *device, gid_t *ret); + +void device_clear_sysattr_cache(sd_device *device); +int device_cache_sysattr_value(sd_device *device, const char *key, char *value); +int device_get_cached_sysattr_value(sd_device *device, const char *key, const char **ret_value); + +void device_seal(sd_device *device); +void device_set_is_initialized(sd_device *device); +void device_set_db_persist(sd_device *device); +void device_set_devlink_priority(sd_device *device, int priority); +int device_ensure_usec_initialized(sd_device *device, sd_device *device_old); +int device_add_devlink(sd_device *device, const char *devlink); +int device_remove_devlink(sd_device *device, const char *devlink); +bool device_has_devlink(sd_device *device, const char *devlink); +int device_add_property(sd_device *device, const char *property, const char *value); +int device_add_propertyf(sd_device *device, const char *key, const char *format, ...) _printf_(3, 4); +int device_add_tag(sd_device *device, const char *tag, bool both); +void device_remove_tag(sd_device *device, const char *tag); +void device_cleanup_tags(sd_device *device); +void device_cleanup_devlinks(sd_device *device); + +uint64_t device_get_properties_generation(sd_device *device); +uint64_t device_get_tags_generation(sd_device *device); +uint64_t device_get_devlinks_generation(sd_device *device); + +int device_properties_prepare(sd_device *device); +int device_get_properties_nulstr(sd_device *device, const char **ret_nulstr, size_t *ret_len); +int device_get_properties_strv(sd_device *device, char ***ret); + +int device_clone_with_db(sd_device *device, sd_device **ret); + +int device_tag_index(sd_device *dev, sd_device *dev_old, bool add); +int device_update_db(sd_device *device); +int device_delete_db(sd_device *device); +int device_read_db_internal_filename(sd_device *device, const char *filename); /* For fuzzer */ +int device_read_db_internal(sd_device *device, bool force); +static inline int device_read_db(sd_device *device) { + return device_read_db_internal(device, false); +} + +int device_read_uevent_file(sd_device *device); + +int device_set_action(sd_device *device, sd_device_action_t a); +sd_device_action_t device_action_from_string(const char *s) _pure_; +const char *device_action_to_string(sd_device_action_t a) _const_; +void dump_device_action_table(void); diff --git a/src/libsystemd/sd-device/device-util.c b/src/libsystemd/sd-device/device-util.c new file mode 100644 index 0000000..529eff2 --- /dev/null +++ b/src/libsystemd/sd-device/device-util.c @@ -0,0 +1,141 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include "device-private.h" +#include "device-util.h" +#include "devnum-util.h" +#include "fd-util.h" +#include "string-util.h" +#include "strv.h" + +int devname_from_devnum(mode_t mode, dev_t devnum, char **ret) { + _cleanup_(sd_device_unrefp) sd_device *dev = NULL; + _cleanup_free_ char *s = NULL; + const char *devname; + int r; + + assert(ret); + + if (devnum_is_zero(devnum)) + return device_path_make_inaccessible(mode, ret); + + r = device_new_from_mode_and_devnum(&dev, mode, devnum); + if (r < 0) + return r; + + r = sd_device_get_devname(dev, &devname); + if (r < 0) + return r; + + s = strdup(devname); + if (!s) + return -ENOMEM; + + *ret = TAKE_PTR(s); + return 0; +} + +int device_open_from_devnum(mode_t mode, dev_t devnum, int flags, char **ret) { + _cleanup_(sd_device_unrefp) sd_device *dev = NULL; + _cleanup_close_ int fd = -EBADF; + int r; + + r = device_new_from_mode_and_devnum(&dev, mode, devnum); + if (r < 0) + return r; + + fd = sd_device_open(dev, flags); + if (fd < 0) + return fd; + + if (ret) { + const char *devname; + char *s; + + r = sd_device_get_devname(dev, &devname); + if (r < 0) + return r; + + s = strdup(devname); + if (!s) + return -ENOMEM; + + *ret = s; + } + + return TAKE_FD(fd); +} + +static int add_string_field( + sd_device *device, + const char *field, + int (*func)(sd_device *dev, const char **s), + char ***strv) { + + const char *s; + int r; + + assert(device); + assert(field); + assert(func); + assert(strv); + + r = func(device, &s); + if (r < 0 && r != -ENOENT) + log_device_debug_errno(device, r, "Failed to get device \"%s\" property, ignoring: %m", field); + if (r >= 0) + (void) strv_extend_assignment(strv, field, s); + + return 0; +} + +char** device_make_log_fields(sd_device *device) { + _cleanup_strv_free_ char **strv = NULL; + dev_t devnum; + int ifindex; + sd_device_action_t action; + uint64_t seqnum, diskseq; + int r; + + assert(device); + + (void) add_string_field(device, "SYSPATH", sd_device_get_syspath, &strv); + (void) add_string_field(device, "SUBSYSTEM", sd_device_get_subsystem, &strv); + (void) add_string_field(device, "DEVTYPE", sd_device_get_devtype, &strv); + (void) add_string_field(device, "DRIVER", sd_device_get_driver, &strv); + (void) add_string_field(device, "DEVPATH", sd_device_get_devpath, &strv); + (void) add_string_field(device, "DEVNAME", sd_device_get_devname, &strv); + (void) add_string_field(device, "SYSNAME", sd_device_get_sysname, &strv); + (void) add_string_field(device, "SYSNUM", sd_device_get_sysnum, &strv); + + r = sd_device_get_devnum(device, &devnum); + if (r < 0 && r != -ENOENT) + log_device_debug_errno(device, r, "Failed to get device \"DEVNUM\" property, ignoring: %m"); + if (r >= 0) + (void) strv_extendf(&strv, "DEVNUM="DEVNUM_FORMAT_STR, DEVNUM_FORMAT_VAL(devnum)); + + r = sd_device_get_ifindex(device, &ifindex); + if (r < 0 && r != -ENOENT) + log_device_debug_errno(device, r, "Failed to get device \"IFINDEX\" property, ignoring: %m"); + if (r >= 0) + (void) strv_extendf(&strv, "IFINDEX=%i", ifindex); + + r = sd_device_get_action(device, &action); + if (r < 0 && r != -ENOENT) + log_device_debug_errno(device, r, "Failed to get device \"ACTION\" property, ignoring: %m"); + if (r >= 0) + (void) strv_extendf(&strv, "ACTION=%s", device_action_to_string(action)); + + r = sd_device_get_seqnum(device, &seqnum); + if (r < 0 && r != -ENOENT) + log_device_debug_errno(device, r, "Failed to get device \"SEQNUM\" property, ignoring: %m"); + if (r >= 0) + (void) strv_extendf(&strv, "SEQNUM=%"PRIu64, seqnum); + + r = sd_device_get_diskseq(device, &diskseq); + if (r < 0 && r != -ENOENT) + log_device_debug_errno(device, r, "Failed to get device \"DISKSEQ\" property, ignoring: %m"); + if (r >= 0) + (void) strv_extendf(&strv, "DISKSEQ=%"PRIu64, diskseq); + + return TAKE_PTR(strv); +} diff --git a/src/libsystemd/sd-device/device-util.h b/src/libsystemd/sd-device/device-util.h new file mode 100644 index 0000000..bf86ddc --- /dev/null +++ b/src/libsystemd/sd-device/device-util.h @@ -0,0 +1,104 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include <stdbool.h> +#include <sys/stat.h> +#include <sys/types.h> + +#include "sd-device.h" + +#include "alloc-util.h" +#include "log.h" +#include "macro.h" + +#define device_unref_and_replace(a, b) \ + unref_and_replace_full(a, b, sd_device_ref, sd_device_unref) + +#define FOREACH_DEVICE_PROPERTY(device, key, value) \ + for (const char *value, *key = sd_device_get_property_first(device, &value); \ + key; \ + key = sd_device_get_property_next(device, &value)) + +#define FOREACH_DEVICE_TAG(device, tag) \ + for (const char *tag = sd_device_get_tag_first(device); \ + tag; \ + tag = sd_device_get_tag_next(device)) + +#define FOREACH_DEVICE_CURRENT_TAG(device, tag) \ + for (const char *tag = sd_device_get_current_tag_first(device); \ + tag; \ + tag = sd_device_get_current_tag_next(device)) + +#define FOREACH_DEVICE_SYSATTR(device, attr) \ + for (const char *attr = sd_device_get_sysattr_first(device); \ + attr; \ + attr = sd_device_get_sysattr_next(device)) + +#define FOREACH_DEVICE_DEVLINK(device, devlink) \ + for (const char *devlink = sd_device_get_devlink_first(device); \ + devlink; \ + devlink = sd_device_get_devlink_next(device)) + +#define _FOREACH_DEVICE_CHILD(device, child, suffix_ptr) \ + for (sd_device *child = sd_device_get_child_first(device, suffix_ptr); \ + child; \ + child = sd_device_get_child_next(device, suffix_ptr)) + +#define FOREACH_DEVICE_CHILD(device, child) \ + _FOREACH_DEVICE_CHILD(device, child, NULL) + +#define FOREACH_DEVICE_CHILD_WITH_SUFFIX(device, child, suffix) \ + _FOREACH_DEVICE_CHILD(device, child, &suffix) + +#define FOREACH_DEVICE(enumerator, device) \ + for (sd_device *device = sd_device_enumerator_get_device_first(enumerator); \ + device; \ + device = sd_device_enumerator_get_device_next(enumerator)) + +#define FOREACH_SUBSYSTEM(enumerator, device) \ + for (sd_device *device = sd_device_enumerator_get_subsystem_first(enumerator); \ + device; \ + device = sd_device_enumerator_get_subsystem_next(enumerator)) + +#define log_device_full_errno_zerook(device, level, error, ...) \ + ({ \ + const char *_sysname = NULL; \ + sd_device *_d = (device); \ + int _level = (level), _e = (error); \ + \ + if (_d && _unlikely_(log_get_max_level() >= LOG_PRI(_level))) \ + (void) sd_device_get_sysname(_d, &_sysname); \ + log_object_internal(_level, _e, PROJECT_FILE, __LINE__, __func__, \ + _sysname ? "DEVICE=" : NULL, _sysname, \ + NULL, NULL, __VA_ARGS__); \ + }) + +#define log_device_full_errno(device, level, error, ...) \ + ({ \ + int _error = (error); \ + ASSERT_NON_ZERO(_error); \ + log_device_full_errno_zerook(device, level, _error, __VA_ARGS__); \ + }) + +#define log_device_full(device, level, ...) (void) log_device_full_errno_zerook(device, level, 0, __VA_ARGS__) + +#define log_device_debug(device, ...) log_device_full(device, LOG_DEBUG, __VA_ARGS__) +#define log_device_info(device, ...) log_device_full(device, LOG_INFO, __VA_ARGS__) +#define log_device_notice(device, ...) log_device_full(device, LOG_NOTICE, __VA_ARGS__) +#define log_device_warning(device, ...) log_device_full(device, LOG_WARNING, __VA_ARGS__) +#define log_device_error(device, ...) log_device_full(device, LOG_ERR, __VA_ARGS__) + +#define log_device_debug_errno(device, error, ...) log_device_full_errno(device, LOG_DEBUG, error, __VA_ARGS__) +#define log_device_info_errno(device, error, ...) log_device_full_errno(device, LOG_INFO, error, __VA_ARGS__) +#define log_device_notice_errno(device, error, ...) log_device_full_errno(device, LOG_NOTICE, error, __VA_ARGS__) +#define log_device_warning_errno(device, error, ...) log_device_full_errno(device, LOG_WARNING, error, __VA_ARGS__) +#define log_device_error_errno(device, error, ...) log_device_full_errno(device, LOG_ERR, error, __VA_ARGS__) + +int devname_from_devnum(mode_t mode, dev_t devnum, char **ret); +static inline int devname_from_stat_rdev(const struct stat *st, char **ret) { + assert(st); + return devname_from_devnum(st->st_mode, st->st_rdev, ret); +} +int device_open_from_devnum(mode_t mode, dev_t devnum, int flags, char **ret); + +char** device_make_log_fields(sd_device *device); diff --git a/src/libsystemd/sd-device/sd-device.c b/src/libsystemd/sd-device/sd-device.c new file mode 100644 index 0000000..2fbc619 --- /dev/null +++ b/src/libsystemd/sd-device/sd-device.c @@ -0,0 +1,2715 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <ctype.h> +#include <net/if.h> +#include <sys/ioctl.h> +#include <sys/types.h> + +#include "sd-device.h" + +#include "alloc-util.h" +#include "chase.h" +#include "device-internal.h" +#include "device-private.h" +#include "device-util.h" +#include "devnum-util.h" +#include "dirent-util.h" +#include "env-util.h" +#include "fd-util.h" +#include "fileio.h" +#include "format-util.h" +#include "fs-util.h" +#include "hashmap.h" +#include "id128-util.h" +#include "macro.h" +#include "missing_magic.h" +#include "netlink-util.h" +#include "parse-util.h" +#include "path-util.h" +#include "set.h" +#include "socket-util.h" +#include "stdio-util.h" +#include "string-util.h" +#include "strv.h" +#include "strxcpyx.h" +#include "user-util.h" + +int device_new_aux(sd_device **ret) { + sd_device *device; + + assert(ret); + + device = new(sd_device, 1); + if (!device) + return -ENOMEM; + + *device = (sd_device) { + .n_ref = 1, + .devmode = MODE_INVALID, + .devuid = UID_INVALID, + .devgid = GID_INVALID, + .action = _SD_DEVICE_ACTION_INVALID, + }; + + *ret = device; + return 0; +} + +static sd_device *device_free(sd_device *device) { + assert(device); + + sd_device_unref(device->parent); + free(device->syspath); + free(device->sysname); + free(device->devtype); + free(device->devname); + free(device->subsystem); + free(device->driver_subsystem); + free(device->driver); + free(device->device_id); + free(device->properties_strv); + free(device->properties_nulstr); + + ordered_hashmap_free(device->properties); + ordered_hashmap_free(device->properties_db); + hashmap_free(device->sysattr_values); + set_free(device->sysattrs); + set_free(device->all_tags); + set_free(device->current_tags); + set_free(device->devlinks); + hashmap_free(device->children); + + return mfree(device); +} + +DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(sd_device, sd_device, device_free); + +int device_add_property_aux(sd_device *device, const char *key, const char *value, bool db) { + OrderedHashmap **properties; + + assert(device); + assert(key); + + if (db) + properties = &device->properties_db; + else + properties = &device->properties; + + if (value) { + _unused_ _cleanup_free_ char *old_value = NULL; + _cleanup_free_ char *new_key = NULL, *new_value = NULL, *old_key = NULL; + int r; + + r = ordered_hashmap_ensure_allocated(properties, &string_hash_ops_free_free); + if (r < 0) + return r; + + new_key = strdup(key); + if (!new_key) + return -ENOMEM; + + new_value = strdup(value); + if (!new_value) + return -ENOMEM; + + old_value = ordered_hashmap_get2(*properties, key, (void**) &old_key); + + /* ordered_hashmap_replace() does not fail when the hashmap already has the entry. */ + r = ordered_hashmap_replace(*properties, new_key, new_value); + if (r < 0) + return r; + + TAKE_PTR(new_key); + TAKE_PTR(new_value); + } else { + _unused_ _cleanup_free_ char *old_value = NULL; + _cleanup_free_ char *old_key = NULL; + + old_value = ordered_hashmap_remove2(*properties, key, (void**) &old_key); + } + + if (!db) { + device->properties_generation++; + device->properties_buf_outdated = true; + } + + return 0; +} + +int device_set_syspath(sd_device *device, const char *_syspath, bool verify) { + _cleanup_free_ char *syspath = NULL; + const char *devpath; + int r; + + assert(device); + assert(_syspath); + + if (verify) { + _cleanup_close_ int fd = -EBADF; + + /* The input path maybe a symlink located outside of /sys. Let's try to chase the symlink at first. + * The primary use case is that e.g. /proc/device-tree is a symlink to /sys/firmware/devicetree/base. + * By chasing symlinks in the path at first, we can call sd_device_new_from_path() with such path. */ + r = chase(_syspath, NULL, 0, &syspath, &fd); + if (r == -ENOENT) + /* the device does not exist (any more?) */ + return log_debug_errno(SYNTHETIC_ERRNO(ENODEV), + "sd-device: Failed to chase symlinks in \"%s\".", _syspath); + if (r < 0) + return log_debug_errno(r, "sd-device: Failed to get target of '%s': %m", _syspath); + + if (!path_startswith(syspath, "/sys")) { + _cleanup_free_ char *real_sys = NULL, *new_syspath = NULL; + char *p; + + /* /sys is a symlink to somewhere sysfs is mounted on? In that case, we convert the path to real sysfs to "/sys". */ + r = chase("/sys", NULL, 0, &real_sys, NULL); + if (r < 0) + return log_debug_errno(r, "sd-device: Failed to chase symlink /sys: %m"); + + p = path_startswith(syspath, real_sys); + if (!p) + return log_debug_errno(SYNTHETIC_ERRNO(ENODEV), + "sd-device: Canonicalized path '%s' does not starts with sysfs mount point '%s'", + syspath, real_sys); + + new_syspath = path_join("/sys", p); + if (!new_syspath) + return log_oom_debug(); + + free_and_replace(syspath, new_syspath); + path_simplify(syspath); + } + + if (path_startswith(syspath, "/sys/devices/")) { + /* For proper devices, stricter rules apply: they must have a 'uevent' file, + * otherwise we won't allow them */ + + if (faccessat(fd, "uevent", F_OK, 0) < 0) { + if (errno == ENOENT) + /* This is not a valid device. Note, this condition is quite often + * satisfied when enumerating devices or finding a parent device. + * Hence, use log_trace_errno() here. */ + return log_trace_errno(SYNTHETIC_ERRNO(ENODEV), + "sd-device: the uevent file \"%s/uevent\" does not exist.", syspath); + if (errno == ENOTDIR) + /* Not actually a directory. */ + return log_debug_errno(SYNTHETIC_ERRNO(ENODEV), + "sd-device: the syspath \"%s\" is not a directory.", syspath); + + return log_debug_errno(errno, "sd-device: cannot find uevent file for %s: %m", syspath); + } + } else { + struct stat st; + + /* For everything else lax rules apply: they just need to be a directory */ + + if (fstat(fd, &st) < 0) + return log_debug_errno(errno, "sd-device: failed to check if syspath \"%s\" is a directory: %m", syspath); + if (!S_ISDIR(st.st_mode)) + return log_debug_errno(SYNTHETIC_ERRNO(ENODEV), + "sd-device: the syspath \"%s\" is not a directory.", syspath); + } + + /* Only operate on sysfs, i.e. refuse going down into /sys/fs/cgroup/ or similar places where + * things are not arranged as kobjects in kernel, and hence don't necessarily have + * kobject/attribute structure. */ + r = getenv_bool_secure("SYSTEMD_DEVICE_VERIFY_SYSFS"); + if (r < 0 && r != -ENXIO) + log_debug_errno(r, "Failed to parse $SYSTEMD_DEVICE_VERIFY_SYSFS value: %m"); + if (r != 0) { + r = fd_is_fs_type(fd, SYSFS_MAGIC); + if (r < 0) + return log_debug_errno(r, "sd-device: failed to check if syspath \"%s\" is backed by sysfs.", syspath); + if (r == 0) + return log_debug_errno(SYNTHETIC_ERRNO(ENODEV), + "sd-device: the syspath \"%s\" is outside of sysfs, refusing.", syspath); + } + } else { + /* must be a subdirectory of /sys */ + if (!path_startswith(_syspath, "/sys/")) + return log_debug_errno(SYNTHETIC_ERRNO(EINVAL), + "sd-device: Syspath '%s' is not a subdirectory of /sys", + _syspath); + + r = path_simplify_alloc(_syspath, &syspath); + if (r < 0) + return r; + } + + assert_se(devpath = startswith(syspath, "/sys")); + if (devpath[0] != '/') + return log_debug_errno(SYNTHETIC_ERRNO(ENODEV), "sd-device: \"/sys\" alone is not a valid device path."); + + r = device_add_property_internal(device, "DEVPATH", devpath); + if (r < 0) + return log_debug_errno(r, "sd-device: Failed to add \"DEVPATH\" property for device \"%s\": %m", syspath); + + free_and_replace(device->syspath, syspath); + device->devpath = devpath; + + /* Unset sysname and sysnum, they will be assigned when requested. */ + device->sysnum = NULL; + device->sysname = mfree(device->sysname); + return 0; +} + +static int device_new_from_syspath(sd_device **ret, const char *syspath, bool strict) { + _cleanup_(sd_device_unrefp) sd_device *device = NULL; + int r; + + assert_return(ret, -EINVAL); + assert_return(syspath, -EINVAL); + + if (strict && !path_startswith(syspath, "/sys/")) + return -EINVAL; + + r = device_new_aux(&device); + if (r < 0) + return r; + + r = device_set_syspath(device, syspath, /* verify= */ true); + if (r < 0) + return r; + + *ret = TAKE_PTR(device); + return 0; +} + +_public_ int sd_device_new_from_syspath(sd_device **ret, const char *syspath) { + return device_new_from_syspath(ret, syspath, /* strict = */ true); +} + +int device_new_from_mode_and_devnum(sd_device **ret, mode_t mode, dev_t devnum) { + _cleanup_(sd_device_unrefp) sd_device *dev = NULL; + _cleanup_free_ char *syspath = NULL; + const char *t, *subsystem = NULL; + dev_t n; + int r; + + assert(ret); + + if (S_ISCHR(mode)) + t = "char"; + else if (S_ISBLK(mode)) + t = "block"; + else + return -ENOTTY; + + if (major(devnum) == 0) + return -ENODEV; + + if (asprintf(&syspath, "/sys/dev/%s/" DEVNUM_FORMAT_STR, t, DEVNUM_FORMAT_VAL(devnum)) < 0) + return -ENOMEM; + + r = sd_device_new_from_syspath(&dev, syspath); + if (r < 0) + return r; + + r = sd_device_get_devnum(dev, &n); + if (r == -ENOENT) + return -ENXIO; + if (r < 0) + return r; + if (n != devnum) + return -ENXIO; + + r = sd_device_get_subsystem(dev, &subsystem); + if (r < 0 && r != -ENOENT) + return r; + if (streq_ptr(subsystem, "block") != !!S_ISBLK(mode)) + return -ENXIO; + + *ret = TAKE_PTR(dev); + return 0; +} + +_public_ int sd_device_new_from_devnum(sd_device **ret, char type, dev_t devnum) { + assert_return(ret, -EINVAL); + assert_return(IN_SET(type, 'b', 'c'), -EINVAL); + + return device_new_from_mode_and_devnum(ret, type == 'b' ? S_IFBLK : S_IFCHR, devnum); +} + +static int device_new_from_main_ifname(sd_device **ret, const char *ifname) { + const char *syspath; + + assert(ret); + assert(ifname); + + syspath = strjoina("/sys/class/net/", ifname); + return sd_device_new_from_syspath(ret, syspath); +} + +_public_ int sd_device_new_from_ifname(sd_device **ret, const char *ifname) { + _cleanup_free_ char *main_name = NULL; + int r; + + assert_return(ret, -EINVAL); + assert_return(ifname, -EINVAL); + + r = parse_ifindex(ifname); + if (r > 0) + return sd_device_new_from_ifindex(ret, r); + + if (ifname_valid(ifname)) { + r = device_new_from_main_ifname(ret, ifname); + if (r >= 0) + return r; + } + + r = rtnl_resolve_link_alternative_name(NULL, ifname, &main_name); + if (r < 0) + return r; + + return device_new_from_main_ifname(ret, main_name); +} + +_public_ int sd_device_new_from_ifindex(sd_device **ret, int ifindex) { + _cleanup_(sd_device_unrefp) sd_device *dev = NULL; + char ifname[IF_NAMESIZE]; + int r, i; + + assert_return(ret, -EINVAL); + assert_return(ifindex > 0, -EINVAL); + + if (format_ifname(ifindex, ifname) < 0) + return -ENODEV; + + r = device_new_from_main_ifname(&dev, ifname); + if (r < 0) + return r; + + r = sd_device_get_ifindex(dev, &i); + if (r == -ENOENT) + return -ENXIO; + if (r < 0) + return r; + if (i != ifindex) + return -ENXIO; + + *ret = TAKE_PTR(dev); + return 0; +} + +static int device_strjoin_new( + const char *a, + const char *b, + const char *c, + const char *d, + sd_device **ret) { + + const char *p; + int r; + + p = strjoina(a, b, c, d); + if (access(p, F_OK) < 0) + return IN_SET(errno, ENOENT, ENAMETOOLONG) ? 0 : -errno; /* If this sysfs is too long then it doesn't exist either */ + + r = sd_device_new_from_syspath(ret, p); + if (r < 0) + return r; + + return 1; +} + +_public_ int sd_device_new_from_subsystem_sysname( + sd_device **ret, + const char *subsystem, + const char *sysname) { + + char *name; + int r; + + assert_return(ret, -EINVAL); + assert_return(subsystem, -EINVAL); + assert_return(sysname, -EINVAL); + + if (!path_is_normalized(subsystem)) + return -EINVAL; + if (!path_is_normalized(sysname)) + return -EINVAL; + + /* translate sysname back to sysfs filename */ + name = strdupa_safe(sysname); + string_replace_char(name, '/', '!'); + + if (streq(subsystem, "subsystem")) { + FOREACH_STRING(s, "/sys/bus/", "/sys/class/") { + r = device_strjoin_new(s, name, NULL, NULL, ret); + if (r < 0) + return r; + if (r > 0) + return 0; + } + + } else if (streq(subsystem, "module")) { + r = device_strjoin_new("/sys/module/", name, NULL, NULL, ret); + if (r < 0) + return r; + if (r > 0) + return 0; + + } else if (streq(subsystem, "drivers")) { + const char *sep; + + sep = strchr(name, ':'); + if (sep && sep[1] != '\0') { /* Require ":" and something non-empty after that. */ + + const char *subsys = memdupa_suffix0(name, sep - name); + sep++; + + if (streq(sep, "drivers")) /* If the sysname is "drivers", then it's the drivers directory itself that is meant. */ + r = device_strjoin_new("/sys/bus/", subsys, "/drivers", NULL, ret); + else + r = device_strjoin_new("/sys/bus/", subsys, "/drivers/", sep, ret); + if (r < 0) + return r; + if (r > 0) + return 0; + } + } + + r = device_strjoin_new("/sys/bus/", subsystem, "/devices/", name, ret); + if (r < 0) + return r; + if (r > 0) + return 0; + + r = device_strjoin_new("/sys/class/", subsystem, "/", name, ret); + if (r < 0) + return r; + if (r > 0) + return 0; + + r = device_strjoin_new("/sys/firmware/", subsystem, "/", name, ret); + if (r < 0) + return r; + if (r > 0) + return 0; + + return -ENODEV; +} + +_public_ int sd_device_new_from_stat_rdev(sd_device **ret, const struct stat *st) { + assert_return(ret, -EINVAL); + assert_return(st, -EINVAL); + + return device_new_from_mode_and_devnum(ret, st->st_mode, st->st_rdev); +} + +_public_ int sd_device_new_from_devname(sd_device **ret, const char *devname) { + struct stat st; + dev_t devnum; + mode_t mode; + + assert_return(ret, -EINVAL); + assert_return(devname, -EINVAL); + + /* This function actually accepts both devlinks and devnames, i.e. both symlinks and device + * nodes below /dev/. */ + + /* Also ignore when the specified path is "/dev". */ + if (isempty(path_startswith(devname, "/dev"))) + return -EINVAL; + + if (device_path_parse_major_minor(devname, &mode, &devnum) >= 0) + /* Let's shortcut when "/dev/block/maj:min" or "/dev/char/maj:min" is specified. + * In that case, we can directly convert the path to syspath, hence it is not necessary + * that the specified path exists. So, this works fine without udevd being running. */ + return device_new_from_mode_and_devnum(ret, mode, devnum); + + if (stat(devname, &st) < 0) + return ERRNO_IS_DEVICE_ABSENT(errno) ? -ENODEV : -errno; + + return sd_device_new_from_stat_rdev(ret, &st); +} + +_public_ int sd_device_new_from_path(sd_device **ret, const char *path) { + assert_return(ret, -EINVAL); + assert_return(path, -EINVAL); + + if (path_startswith(path, "/dev")) + return sd_device_new_from_devname(ret, path); + + return device_new_from_syspath(ret, path, /* strict = */ false); +} + +int device_set_devtype(sd_device *device, const char *devtype) { + _cleanup_free_ char *t = NULL; + int r; + + assert(device); + assert(devtype); + + t = strdup(devtype); + if (!t) + return -ENOMEM; + + r = device_add_property_internal(device, "DEVTYPE", t); + if (r < 0) + return r; + + return free_and_replace(device->devtype, t); +} + +int device_set_ifindex(sd_device *device, const char *name) { + int r, ifindex; + + assert(device); + assert(name); + + ifindex = parse_ifindex(name); + if (ifindex < 0) + return ifindex; + + r = device_add_property_internal(device, "IFINDEX", name); + if (r < 0) + return r; + + device->ifindex = ifindex; + + return 0; +} + +static int mangle_devname(const char *p, char **ret) { + char *q; + + assert(p); + assert(ret); + + if (!path_is_safe(p)) + return -EINVAL; + + /* When the path is absolute, it must start with "/dev/", but ignore "/dev/" itself. */ + if (path_is_absolute(p)) { + if (isempty(path_startswith(p, "/dev/"))) + return -EINVAL; + + q = strdup(p); + } else + q = path_join("/dev/", p); + if (!q) + return -ENOMEM; + + path_simplify(q); + + *ret = q; + return 0; +} + +int device_set_devname(sd_device *device, const char *devname) { + _cleanup_free_ char *t = NULL; + int r; + + assert(device); + assert(devname); + + r = mangle_devname(devname, &t); + if (r < 0) + return r; + + r = device_add_property_internal(device, "DEVNAME", t); + if (r < 0) + return r; + + return free_and_replace(device->devname, t); +} + +int device_set_devmode(sd_device *device, const char *_devmode) { + unsigned devmode; + int r; + + assert(device); + assert(_devmode); + + r = safe_atou(_devmode, &devmode); + if (r < 0) + return r; + + if (devmode > 07777) + return -EINVAL; + + r = device_add_property_internal(device, "DEVMODE", _devmode); + if (r < 0) + return r; + + device->devmode = devmode; + + return 0; +} + +int device_set_devnum(sd_device *device, const char *major, const char *minor) { + unsigned maj, min = 0; + int r; + + assert(device); + assert(major); + + r = safe_atou(major, &maj); + if (r < 0) + return r; + if (maj == 0) + return 0; + if (!DEVICE_MAJOR_VALID(maj)) + return -EINVAL; + + if (minor) { + r = safe_atou(minor, &min); + if (r < 0) + return r; + if (!DEVICE_MINOR_VALID(min)) + return -EINVAL; + } + + r = device_add_property_internal(device, "MAJOR", major); + if (r < 0) + return r; + + if (minor) { + r = device_add_property_internal(device, "MINOR", minor); + if (r < 0) + return r; + } + + device->devnum = makedev(maj, min); + + return 0; +} + +int device_set_diskseq(sd_device *device, const char *str) { + uint64_t diskseq; + int r; + + assert(device); + assert(str); + + r = safe_atou64(str, &diskseq); + if (r < 0) + return r; + if (diskseq == 0) + return -EINVAL; + + r = device_add_property_internal(device, "DISKSEQ", str); + if (r < 0) + return r; + + device->diskseq = diskseq; + + return 0; +} + +static int handle_uevent_line( + sd_device *device, + const char *key, + const char *value, + const char **major, + const char **minor) { + + assert(device); + assert(key); + assert(value); + assert(major); + assert(minor); + + if (streq(key, "DEVTYPE")) + return device_set_devtype(device, value); + if (streq(key, "IFINDEX")) + return device_set_ifindex(device, value); + if (streq(key, "DEVNAME")) + return device_set_devname(device, value); + if (streq(key, "DEVMODE")) + return device_set_devmode(device, value); + if (streq(key, "DISKSEQ")) + return device_set_diskseq(device, value); + if (streq(key, "MAJOR")) + *major = value; + else if (streq(key, "MINOR")) + *minor = value; + else + return device_add_property_internal(device, key, value); + + return 0; +} + +int device_read_uevent_file(sd_device *device) { + _cleanup_free_ char *uevent = NULL; + const char *syspath, *key = NULL, *value = NULL, *major = NULL, *minor = NULL; + char *path; + size_t uevent_len; + int r; + + enum { + PRE_KEY, + KEY, + PRE_VALUE, + VALUE, + INVALID_LINE, + } state = PRE_KEY; + + assert(device); + + if (device->uevent_loaded || device->sealed) + return 0; + + r = sd_device_get_syspath(device, &syspath); + if (r < 0) + return r; + + device->uevent_loaded = true; + + path = strjoina(syspath, "/uevent"); + + r = read_full_virtual_file(path, &uevent, &uevent_len); + if (r == -EACCES || ERRNO_IS_NEG_DEVICE_ABSENT(r)) + /* The uevent files may be write-only, the device may be already removed, or the device + * may not have the uevent file. */ + return 0; + if (r < 0) + return log_device_debug_errno(device, r, "sd-device: Failed to read uevent file '%s': %m", path); + + for (size_t i = 0; i < uevent_len; i++) + switch (state) { + case PRE_KEY: + if (!strchr(NEWLINE, uevent[i])) { + key = &uevent[i]; + + state = KEY; + } + + break; + case KEY: + if (uevent[i] == '=') { + uevent[i] = '\0'; + + state = PRE_VALUE; + } else if (strchr(NEWLINE, uevent[i])) { + uevent[i] = '\0'; + log_device_debug(device, "sd-device: Invalid uevent line '%s', ignoring", key); + + state = PRE_KEY; + } + + break; + case PRE_VALUE: + value = &uevent[i]; + state = VALUE; + + _fallthrough_; /* to handle empty property */ + case VALUE: + if (strchr(NEWLINE, uevent[i])) { + uevent[i] = '\0'; + + r = handle_uevent_line(device, key, value, &major, &minor); + if (r < 0) + log_device_debug_errno(device, r, "sd-device: Failed to handle uevent entry '%s=%s', ignoring: %m", key, value); + + state = PRE_KEY; + } + + break; + default: + assert_not_reached(); + } + + if (major) { + r = device_set_devnum(device, major, minor); + if (r < 0) + log_device_debug_errno(device, r, "sd-device: Failed to set 'MAJOR=%s' or 'MINOR=%s' from '%s', ignoring: %m", major, strna(minor), path); + } + + return 0; +} + +_public_ int sd_device_get_ifindex(sd_device *device, int *ifindex) { + int r; + + assert_return(device, -EINVAL); + + r = device_read_uevent_file(device); + if (r < 0) + return r; + + if (device->ifindex <= 0) + return -ENOENT; + + if (ifindex) + *ifindex = device->ifindex; + + return 0; +} + +_public_ int sd_device_new_from_device_id(sd_device **ret, const char *id) { + int r; + + assert_return(ret, -EINVAL); + assert_return(id, -EINVAL); + + switch (id[0]) { + case 'b': + case 'c': { + dev_t devt; + + if (isempty(id)) + return -EINVAL; + + r = parse_devnum(id + 1, &devt); + if (r < 0) + return r; + + return sd_device_new_from_devnum(ret, id[0], devt); + } + + case 'n': { + int ifindex; + + ifindex = parse_ifindex(id + 1); + if (ifindex < 0) + return ifindex; + + return sd_device_new_from_ifindex(ret, ifindex); + } + + case '+': { + const char *subsys, *sep; + + sep = strchr(id + 1, ':'); + if (!sep || sep - id - 1 > NAME_MAX) + return -EINVAL; + + subsys = memdupa_suffix0(id + 1, sep - id - 1); + + return sd_device_new_from_subsystem_sysname(ret, subsys, sep + 1); + } + + default: + return -EINVAL; + } +} + +_public_ int sd_device_get_syspath(sd_device *device, const char **ret) { + assert_return(device, -EINVAL); + + assert(path_startswith(device->syspath, "/sys/")); + + if (ret) + *ret = device->syspath; + + return 0; +} + +DEFINE_PRIVATE_HASH_OPS_FULL( + device_by_path_hash_ops, + char, path_hash_func, path_compare, free, + sd_device, sd_device_unref); + +static int device_enumerate_children_internal(sd_device *device, const char *subdir, Set **stack, Hashmap **children) { + _cleanup_closedir_ DIR *dir = NULL; + int r; + + assert(device); + assert(stack); + assert(children); + + r = device_opendir(device, subdir, &dir); + if (r < 0) + return r; + + FOREACH_DIRENT_ALL(de, dir, return -errno) { + _cleanup_(sd_device_unrefp) sd_device *child = NULL; + _cleanup_free_ char *p = NULL; + + if (dot_or_dot_dot(de->d_name)) + continue; + + if (!IN_SET(de->d_type, DT_LNK, DT_DIR)) + continue; + + if (subdir) + p = path_join(subdir, de->d_name); + else + p = strdup(de->d_name); + if (!p) + return -ENOMEM; + + /* Try to create child device. */ + r = sd_device_new_child(&child, device, p); + if (r >= 0) { + /* OK, this is a child device, saving it. */ + r = hashmap_ensure_put(children, &device_by_path_hash_ops, p, child); + if (r < 0) + return r; + + TAKE_PTR(p); + TAKE_PTR(child); + } else if (r == -ENODEV) { + /* This is not a child device. Push the sub-directory into stack, and read it later. */ + + if (de->d_type == DT_LNK) + /* Do not follow symlinks, otherwise, we will enter an infinite loop, e.g., + * /sys/class/block/nvme0n1/subsystem/nvme0n1/subsystem/nvme0n1/subsystem/… */ + continue; + + r = set_ensure_consume(stack, &path_hash_ops_free, TAKE_PTR(p)); + if (r < 0) + return r; + } else + return r; + } + + return 0; +} + +static int device_enumerate_children(sd_device *device) { + _cleanup_hashmap_free_ Hashmap *children = NULL; + _cleanup_set_free_ Set *stack = NULL; + int r; + + assert(device); + + if (device->children_enumerated) + return 0; /* Already enumerated. */ + + r = device_enumerate_children_internal(device, NULL, &stack, &children); + if (r < 0) + return r; + + for (;;) { + _cleanup_free_ char *subdir = NULL; + + subdir = set_steal_first(stack); + if (!subdir) + break; + + r = device_enumerate_children_internal(device, subdir, &stack, &children); + if (r < 0) + return r; + } + + device->children_enumerated = true; + device->children = TAKE_PTR(children); + return 1; /* Enumerated. */ +} + +_public_ sd_device *sd_device_get_child_first(sd_device *device, const char **ret_suffix) { + int r; + + assert(device); + + r = device_enumerate_children(device); + if (r < 0) { + log_device_debug_errno(device, r, "sd-device: failed to enumerate child devices: %m"); + if (ret_suffix) + *ret_suffix = NULL; + return NULL; + } + + device->children_iterator = ITERATOR_FIRST; + + return sd_device_get_child_next(device, ret_suffix); +} + +_public_ sd_device *sd_device_get_child_next(sd_device *device, const char **ret_suffix) { + sd_device *child; + + assert(device); + + (void) hashmap_iterate(device->children, &device->children_iterator, (void**) &child, (const void**) ret_suffix); + return child; +} + +_public_ int sd_device_new_child(sd_device **ret, sd_device *device, const char *suffix) { + _cleanup_free_ char *path = NULL; + sd_device *child; + const char *s; + int r; + + assert_return(ret, -EINVAL); + assert_return(device, -EINVAL); + assert_return(suffix, -EINVAL); + + if (!path_is_safe(suffix)) + return -EINVAL; + + /* If we have already enumerated children, try to find the child from the cache. */ + child = hashmap_get(device->children, suffix); + if (child) { + *ret = sd_device_ref(child); + return 0; + } + + r = sd_device_get_syspath(device, &s); + if (r < 0) + return r; + + path = path_join(s, suffix); + if (!path) + return -ENOMEM; + + return sd_device_new_from_syspath(ret, path); +} + +static int device_new_from_child(sd_device **ret, sd_device *child) { + _cleanup_free_ char *path = NULL; + const char *syspath; + int r; + + assert(ret); + assert(child); + + r = sd_device_get_syspath(child, &syspath); + if (r < 0) + return r; + + for (;;) { + _cleanup_free_ char *p = NULL; + + r = path_extract_directory(path ?: syspath, &p); + if (r < 0) + return r; + + if (path_equal(p, "/sys")) + return -ENODEV; + + r = sd_device_new_from_syspath(ret, p); + if (r != -ENODEV) + return r; + + free_and_replace(path, p); + } +} + +_public_ int sd_device_get_parent(sd_device *child, sd_device **ret) { + int r; + + assert_return(child, -EINVAL); + + if (!child->parent_set) { + r = device_new_from_child(&child->parent, child); + if (r < 0 && r != -ENODEV) + return r; + + child->parent_set = true; + } + + if (!child->parent) + return -ENOENT; + + if (ret) + *ret = child->parent; + return 0; +} + +int device_set_subsystem(sd_device *device, const char *subsystem) { + _cleanup_free_ char *s = NULL; + int r; + + assert(device); + + if (subsystem) { + s = strdup(subsystem); + if (!s) + return -ENOMEM; + } + + r = device_add_property_internal(device, "SUBSYSTEM", s); + if (r < 0) + return r; + + device->subsystem_set = true; + return free_and_replace(device->subsystem, s); +} + +int device_set_drivers_subsystem(sd_device *device) { + _cleanup_free_ char *subsystem = NULL; + const char *devpath, *drivers, *p; + int r; + + assert(device); + + r = sd_device_get_devpath(device, &devpath); + if (r < 0) + return r; + + drivers = strstr(devpath, "/drivers/"); + if (!drivers) + drivers = endswith(devpath, "/drivers"); + if (!drivers) + return -EINVAL; + + /* Find the path component immediately before the "/drivers/" string */ + r = path_find_last_component(devpath, /* accept_dot_dot= */ false, &drivers, &p); + if (r < 0) + return r; + if (r == 0) + return -EINVAL; + + subsystem = strndup(p, r); + if (!subsystem) + return -ENOMEM; + + r = device_set_subsystem(device, "drivers"); + if (r < 0) + return r; + + return free_and_replace(device->driver_subsystem, subsystem); +} + +_public_ int sd_device_get_subsystem(sd_device *device, const char **ret) { + int r; + + assert_return(device, -EINVAL); + + if (!device->subsystem_set) { + _cleanup_free_ char *subsystem = NULL; + const char *syspath; + char *path; + + r = sd_device_get_syspath(device, &syspath); + if (r < 0) + return r; + + /* read 'subsystem' link */ + path = strjoina(syspath, "/subsystem"); + r = readlink_value(path, &subsystem); + if (r < 0 && r != -ENOENT) + return log_device_debug_errno(device, r, + "sd-device: Failed to read subsystem for %s: %m", + device->devpath); + + if (subsystem) + r = device_set_subsystem(device, subsystem); + /* use implicit names */ + else if (!isempty(path_startswith(device->devpath, "/module/"))) + r = device_set_subsystem(device, "module"); + else if (strstr(syspath, "/drivers/") || endswith(syspath, "/drivers")) + r = device_set_drivers_subsystem(device); + else if (!isempty(PATH_STARTSWITH_SET(device->devpath, "/class/", "/bus/"))) + r = device_set_subsystem(device, "subsystem"); + else { + device->subsystem_set = true; + r = 0; + } + if (r < 0) + return log_device_debug_errno(device, r, + "sd-device: Failed to set subsystem for %s: %m", + device->devpath); + } + + if (!device->subsystem) + return -ENOENT; + + if (ret) + *ret = device->subsystem; + return 0; +} + +_public_ int sd_device_get_devtype(sd_device *device, const char **devtype) { + int r; + + assert_return(device, -EINVAL); + + r = device_read_uevent_file(device); + if (r < 0) + return r; + + if (!device->devtype) + return -ENOENT; + + if (devtype) + *devtype = device->devtype; + + return !!device->devtype; +} + +_public_ int sd_device_get_parent_with_subsystem_devtype(sd_device *child, const char *subsystem, const char *devtype, sd_device **ret) { + sd_device *parent = NULL; + int r; + + assert_return(child, -EINVAL); + assert_return(subsystem, -EINVAL); + + r = sd_device_get_parent(child, &parent); + while (r >= 0) { + const char *parent_subsystem = NULL; + + (void) sd_device_get_subsystem(parent, &parent_subsystem); + if (streq_ptr(parent_subsystem, subsystem)) { + const char *parent_devtype = NULL; + + if (!devtype) + break; + + (void) sd_device_get_devtype(parent, &parent_devtype); + if (streq_ptr(parent_devtype, devtype)) + break; + } + r = sd_device_get_parent(parent, &parent); + } + + if (r < 0) + return r; + + if (ret) + *ret = parent; + return 0; +} + +_public_ int sd_device_get_devnum(sd_device *device, dev_t *devnum) { + int r; + + assert_return(device, -EINVAL); + + r = device_read_uevent_file(device); + if (r < 0) + return r; + + if (major(device->devnum) <= 0) + return -ENOENT; + + if (devnum) + *devnum = device->devnum; + + return 0; +} + +int device_set_driver(sd_device *device, const char *driver) { + _cleanup_free_ char *d = NULL; + int r; + + assert(device); + + if (driver) { + d = strdup(driver); + if (!d) + return -ENOMEM; + } + + r = device_add_property_internal(device, "DRIVER", d); + if (r < 0) + return r; + + device->driver_set = true; + return free_and_replace(device->driver, d); +} + +_public_ int sd_device_get_driver(sd_device *device, const char **ret) { + assert_return(device, -EINVAL); + + if (!device->driver_set) { + _cleanup_free_ char *driver = NULL; + const char *syspath; + char *path; + int r; + + r = sd_device_get_syspath(device, &syspath); + if (r < 0) + return r; + + path = strjoina(syspath, "/driver"); + r = readlink_value(path, &driver); + if (r < 0 && r != -ENOENT) + return log_device_debug_errno(device, r, + "sd-device: readlink(\"%s\") failed: %m", path); + + r = device_set_driver(device, driver); + if (r < 0) + return log_device_debug_errno(device, r, + "sd-device: Failed to set driver \"%s\": %m", driver); + } + + if (!device->driver) + return -ENOENT; + + if (ret) + *ret = device->driver; + return 0; +} + +_public_ int sd_device_get_devpath(sd_device *device, const char **ret) { + assert_return(device, -EINVAL); + + assert(device->devpath); + assert(device->devpath[0] == '/'); + + if (ret) + *ret = device->devpath; + + return 0; +} + +_public_ int sd_device_get_devname(sd_device *device, const char **devname) { + int r; + + assert_return(device, -EINVAL); + + r = device_read_uevent_file(device); + if (r < 0) + return r; + + if (!device->devname) + return -ENOENT; + + assert(!isempty(path_startswith(device->devname, "/dev/"))); + + if (devname) + *devname = device->devname; + return 0; +} + +static int device_set_sysname_and_sysnum(sd_device *device) { + _cleanup_free_ char *sysname = NULL; + size_t len, n; + int r; + + assert(device); + + r = path_extract_filename(device->devpath, &sysname); + if (r < 0) + return r; + if (r == O_DIRECTORY) + return -EINVAL; + + /* some devices have '!' in their name, change that to '/' */ + string_replace_char(sysname, '!', '/'); + + n = strspn_from_end(sysname, DIGITS); + len = strlen(sysname); + assert(n <= len); + if (n == len) + n = 0; /* Do not set sysnum for number only sysname. */ + + device->sysnum = n > 0 ? sysname + len - n : NULL; + return free_and_replace(device->sysname, sysname); +} + +_public_ int sd_device_get_sysname(sd_device *device, const char **ret) { + int r; + + assert_return(device, -EINVAL); + + if (!device->sysname) { + r = device_set_sysname_and_sysnum(device); + if (r < 0) + return r; + } + + if (ret) + *ret = device->sysname; + return 0; +} + +_public_ int sd_device_get_sysnum(sd_device *device, const char **ret) { + int r; + + assert_return(device, -EINVAL); + + if (!device->sysname) { + r = device_set_sysname_and_sysnum(device); + if (r < 0) + return r; + } + + if (!device->sysnum) + return -ENOENT; + + if (ret) + *ret = device->sysnum; + return 0; +} + +_public_ int sd_device_get_action(sd_device *device, sd_device_action_t *ret) { + assert_return(device, -EINVAL); + + if (device->action < 0) + return -ENOENT; + + if (ret) + *ret = device->action; + + return 0; +} + +_public_ int sd_device_get_seqnum(sd_device *device, uint64_t *ret) { + assert_return(device, -EINVAL); + + if (device->seqnum == 0) + return -ENOENT; + + if (ret) + *ret = device->seqnum; + + return 0; +} + +_public_ int sd_device_get_diskseq(sd_device *device, uint64_t *ret) { + int r; + + assert_return(device, -EINVAL); + + r = device_read_uevent_file(device); + if (r < 0) + return r; + + if (device->diskseq == 0) + return -ENOENT; + + if (ret) + *ret = device->diskseq; + + return 0; +} + +static bool is_valid_tag(const char *tag) { + assert(tag); + + return in_charset(tag, ALPHANUMERICAL "-_") && filename_is_valid(tag); +} + +int device_add_tag(sd_device *device, const char *tag, bool both) { + int r, added; + + assert(device); + assert(tag); + + if (!is_valid_tag(tag)) + return -EINVAL; + + /* Definitely add to the "all" list of tags (i.e. the sticky list) */ + added = set_put_strdup(&device->all_tags, tag); + if (added < 0) + return added; + + /* And optionally, also add it to the current list of tags */ + if (both) { + r = set_put_strdup(&device->current_tags, tag); + if (r < 0) { + if (added > 0) + (void) set_remove(device->all_tags, tag); + + return r; + } + } + + device->tags_generation++; + device->property_tags_outdated = true; + + return 0; +} + +int device_add_devlink(sd_device *device, const char *devlink) { + char *p; + int r; + + assert(device); + assert(devlink); + + r = mangle_devname(devlink, &p); + if (r < 0) + return r; + + r = set_ensure_consume(&device->devlinks, &path_hash_ops_free, p); + if (r < 0) + return r; + + device->devlinks_generation++; + device->property_devlinks_outdated = true; + + return r; /* return 1 when newly added, 0 when already exists */ +} + +int device_remove_devlink(sd_device *device, const char *devlink) { + _cleanup_free_ char *p = NULL, *s = NULL; + int r; + + assert(device); + assert(devlink); + + r = mangle_devname(devlink, &p); + if (r < 0) + return r; + + s = set_remove(device->devlinks, p); + if (!s) + return 0; /* does not exist */ + + device->devlinks_generation++; + device->property_devlinks_outdated = true; + return 1; /* removed */ +} + +bool device_has_devlink(sd_device *device, const char *devlink) { + assert(device); + assert(devlink); + + return set_contains(device->devlinks, devlink); +} + +static int device_add_property_internal_from_string(sd_device *device, const char *str) { + _cleanup_free_ char *key = NULL; + char *value; + int r; + + assert(device); + assert(str); + + key = strdup(str); + if (!key) + return -ENOMEM; + + value = strchr(key, '='); + if (!value) + return -EINVAL; + + *value = '\0'; + + if (isempty(++value)) + value = NULL; + + /* Add the property to both sd_device::properties and sd_device::properties_db, + * as this is called by only handle_db_line(). */ + r = device_add_property_aux(device, key, value, false); + if (r < 0) + return r; + + return device_add_property_aux(device, key, value, true); +} + +int device_set_usec_initialized(sd_device *device, usec_t when) { + char s[DECIMAL_STR_MAX(usec_t)]; + int r; + + assert(device); + + xsprintf(s, USEC_FMT, when); + + r = device_add_property_internal(device, "USEC_INITIALIZED", s); + if (r < 0) + return r; + + device->usec_initialized = when; + return 0; +} + +static int handle_db_line(sd_device *device, char key, const char *value) { + int r; + + assert(device); + assert(value); + + switch (key) { + case 'G': /* Any tag */ + case 'Q': /* Current tag */ + return device_add_tag(device, value, key == 'Q'); + + case 'S': { + const char *path; + + path = strjoina("/dev/", value); + return device_add_devlink(device, path); + } + case 'E': + return device_add_property_internal_from_string(device, value); + + case 'I': { + usec_t t; + + r = safe_atou64(value, &t); + if (r < 0) + return r; + + return device_set_usec_initialized(device, t); + } + case 'L': + return safe_atoi(value, &device->devlink_priority); + + case 'W': + /* Deprecated. Previously, watch handle is both saved in database and /run/udev/watch. + * However, the handle saved in database may not be updated when the handle is updated + * or removed. Moreover, it is not necessary to store the handle within the database, + * as its value becomes meaningless when udevd is restarted. */ + return 0; + + case 'V': + return safe_atou(value, &device->database_version); + + default: + log_device_debug(device, "sd-device: Unknown key '%c' in device db, ignoring", key); + return 0; + } +} + +int device_get_device_id(sd_device *device, const char **ret) { + assert(device); + assert(ret); + + if (!device->device_id) { + _cleanup_free_ char *id = NULL; + const char *subsystem; + dev_t devnum; + int ifindex, r; + + r = sd_device_get_subsystem(device, &subsystem); + if (r < 0) + return r; + + if (sd_device_get_devnum(device, &devnum) >= 0) { + /* use dev_t — b259:131072, c254:0 */ + if (asprintf(&id, "%c" DEVNUM_FORMAT_STR, + streq(subsystem, "block") ? 'b' : 'c', + DEVNUM_FORMAT_VAL(devnum)) < 0) + return -ENOMEM; + } else if (sd_device_get_ifindex(device, &ifindex) >= 0) { + /* use netdev ifindex — n3 */ + if (asprintf(&id, "n%u", (unsigned) ifindex) < 0) + return -ENOMEM; + } else { + _cleanup_free_ char *sysname = NULL; + + /* use $subsys:$sysname — pci:0000:00:1f.2 + * sd_device_get_sysname() has '!' translated, get it from devpath */ + r = path_extract_filename(device->devpath, &sysname); + if (r < 0) + return r; + if (r == O_DIRECTORY) + return -EINVAL; + + if (streq(subsystem, "drivers")) { + /* the 'drivers' pseudo-subsystem is special, and needs the real + * subsystem encoded as well */ + assert(device->driver_subsystem); + id = strjoin("+drivers:", device->driver_subsystem, ":", sysname); + } else + id = strjoin("+", subsystem, ":", sysname); + if (!id) + return -ENOMEM; + } + + if (!filename_is_valid(id)) + return -EINVAL; + + device->device_id = TAKE_PTR(id); + } + + *ret = device->device_id; + return 0; +} + +int device_read_db_internal_filename(sd_device *device, const char *filename) { + _cleanup_free_ char *db = NULL; + const char *value; + size_t db_len; + char key = '\0'; /* Unnecessary initialization to appease gcc-12.0.0-0.4.fc36 */ + int r; + + enum { + PRE_KEY, + KEY, + PRE_VALUE, + VALUE, + INVALID_LINE, + } state = PRE_KEY; + + assert(device); + assert(filename); + + r = read_full_file(filename, &db, &db_len); + if (r < 0) { + if (r == -ENOENT) + return 0; + + return log_device_debug_errno(device, r, "sd-device: Failed to read db '%s': %m", filename); + } + + /* devices with a database entry are initialized */ + device->is_initialized = true; + + device->db_loaded = true; + + for (size_t i = 0; i < db_len; i++) + switch (state) { + case PRE_KEY: + if (!strchr(NEWLINE, db[i])) { + key = db[i]; + + state = KEY; + } + + break; + case KEY: + if (db[i] != ':') { + log_device_debug(device, "sd-device: Invalid db entry with key '%c', ignoring", key); + + state = INVALID_LINE; + } else { + db[i] = '\0'; + + state = PRE_VALUE; + } + + break; + case PRE_VALUE: + value = &db[i]; + + state = VALUE; + + break; + case INVALID_LINE: + if (strchr(NEWLINE, db[i])) + state = PRE_KEY; + + break; + case VALUE: + if (strchr(NEWLINE, db[i])) { + db[i] = '\0'; + r = handle_db_line(device, key, value); + if (r < 0) + log_device_debug_errno(device, r, "sd-device: Failed to handle db entry '%c:%s', ignoring: %m", + key, value); + + state = PRE_KEY; + } + + break; + default: + return log_device_debug_errno(device, SYNTHETIC_ERRNO(EINVAL), "sd-device: invalid db syntax."); + } + + return 0; +} + +int device_read_db_internal(sd_device *device, bool force) { + const char *id, *path; + int r; + + assert(device); + + if (device->db_loaded || (!force && device->sealed)) + return 0; + + r = device_get_device_id(device, &id); + if (r < 0) + return r; + + path = strjoina("/run/udev/data/", id); + + return device_read_db_internal_filename(device, path); +} + +_public_ int sd_device_get_is_initialized(sd_device *device) { + int r; + + assert_return(device, -EINVAL); + + r = device_read_db(device); + if (r == -ENOENT) + /* The device may be already removed or renamed. */ + return false; + if (r < 0) + return r; + + return device->is_initialized; +} + +_public_ int sd_device_get_usec_initialized(sd_device *device, uint64_t *ret) { + int r; + + assert_return(device, -EINVAL); + + r = sd_device_get_is_initialized(device); + if (r < 0) + return r; + if (r == 0) + return -EBUSY; + + if (device->usec_initialized == 0) + return -ENODATA; + + if (ret) + *ret = device->usec_initialized; + + return 0; +} + +_public_ int sd_device_get_usec_since_initialized(sd_device *device, uint64_t *ret) { + usec_t now_ts, ts; + int r; + + assert_return(device, -EINVAL); + + r = sd_device_get_usec_initialized(device, &ts); + if (r < 0) + return r; + + now_ts = now(CLOCK_MONOTONIC); + + if (now_ts < ts) + return -EIO; + + if (ret) + *ret = usec_sub_unsigned(now_ts, ts); + + return 0; +} + +_public_ const char *sd_device_get_tag_first(sd_device *device) { + void *v; + + assert_return(device, NULL); + + (void) device_read_db(device); + + device->all_tags_iterator_generation = device->tags_generation; + device->all_tags_iterator = ITERATOR_FIRST; + + (void) set_iterate(device->all_tags, &device->all_tags_iterator, &v); + return v; +} + +_public_ const char *sd_device_get_tag_next(sd_device *device) { + void *v; + + assert_return(device, NULL); + + (void) device_read_db(device); + + if (device->all_tags_iterator_generation != device->tags_generation) + return NULL; + + (void) set_iterate(device->all_tags, &device->all_tags_iterator, &v); + return v; +} + +static bool device_database_supports_current_tags(sd_device *device) { + assert(device); + + (void) device_read_db(device); + + /* The current tags (saved in Q field) feature is implemented in database version 1. + * If the database version is 0, then the tags (NOT current tags, saved in G field) are not + * sticky. Thus, we can safely bypass the operations for the current tags (Q) to tags (G). */ + + return device->database_version >= 1; +} + +_public_ const char *sd_device_get_current_tag_first(sd_device *device) { + void *v; + + assert_return(device, NULL); + + if (!device_database_supports_current_tags(device)) + return sd_device_get_tag_first(device); + + (void) device_read_db(device); + + device->current_tags_iterator_generation = device->tags_generation; + device->current_tags_iterator = ITERATOR_FIRST; + + (void) set_iterate(device->current_tags, &device->current_tags_iterator, &v); + return v; +} + +_public_ const char *sd_device_get_current_tag_next(sd_device *device) { + void *v; + + assert_return(device, NULL); + + if (!device_database_supports_current_tags(device)) + return sd_device_get_tag_next(device); + + (void) device_read_db(device); + + if (device->current_tags_iterator_generation != device->tags_generation) + return NULL; + + (void) set_iterate(device->current_tags, &device->current_tags_iterator, &v); + return v; +} + +_public_ const char *sd_device_get_devlink_first(sd_device *device) { + void *v; + + assert_return(device, NULL); + + (void) device_read_db(device); + + device->devlinks_iterator_generation = device->devlinks_generation; + device->devlinks_iterator = ITERATOR_FIRST; + + (void) set_iterate(device->devlinks, &device->devlinks_iterator, &v); + return v; +} + +_public_ const char *sd_device_get_devlink_next(sd_device *device) { + void *v; + + assert_return(device, NULL); + + (void) device_read_db(device); + + if (device->devlinks_iterator_generation != device->devlinks_generation) + return NULL; + + (void) set_iterate(device->devlinks, &device->devlinks_iterator, &v); + return v; +} + +int device_properties_prepare(sd_device *device) { + int r; + + assert(device); + + r = device_read_uevent_file(device); + if (r < 0) + return r; + + r = device_read_db(device); + if (r < 0) + return r; + + if (device->property_devlinks_outdated) { + _cleanup_free_ char *devlinks = NULL; + + r = set_strjoin(device->devlinks, " ", false, &devlinks); + if (r < 0) + return r; + + if (!isempty(devlinks)) { + r = device_add_property_internal(device, "DEVLINKS", devlinks); + if (r < 0) + return r; + } + + device->property_devlinks_outdated = false; + } + + if (device->property_tags_outdated) { + _cleanup_free_ char *tags = NULL; + + r = set_strjoin(device->all_tags, ":", true, &tags); + if (r < 0) + return r; + + if (!isempty(tags)) { + r = device_add_property_internal(device, "TAGS", tags); + if (r < 0) + return r; + } + + tags = mfree(tags); + r = set_strjoin(device->current_tags, ":", true, &tags); + if (r < 0) + return r; + + if (!isempty(tags)) { + r = device_add_property_internal(device, "CURRENT_TAGS", tags); + if (r < 0) + return r; + } + + device->property_tags_outdated = false; + } + + return 0; +} + +_public_ const char *sd_device_get_property_first(sd_device *device, const char **_value) { + const char *key; + int r; + + assert_return(device, NULL); + + r = device_properties_prepare(device); + if (r < 0) + return NULL; + + device->properties_iterator_generation = device->properties_generation; + device->properties_iterator = ITERATOR_FIRST; + + (void) ordered_hashmap_iterate(device->properties, &device->properties_iterator, (void**)_value, (const void**)&key); + return key; +} + +_public_ const char *sd_device_get_property_next(sd_device *device, const char **_value) { + const char *key; + int r; + + assert_return(device, NULL); + + r = device_properties_prepare(device); + if (r < 0) + return NULL; + + if (device->properties_iterator_generation != device->properties_generation) + return NULL; + + (void) ordered_hashmap_iterate(device->properties, &device->properties_iterator, (void**)_value, (const void**)&key); + return key; +} + +static int device_sysattrs_read_all_internal(sd_device *device, const char *subdir, Set **stack) { + _cleanup_closedir_ DIR *dir = NULL; + int r; + + assert(device); + assert(stack); + + r = device_opendir(device, subdir, &dir); + if (r == -ENOENT && subdir) + return 0; /* Maybe, this is a child device, and is already removed. */ + if (r < 0) + return r; + + if (subdir) { + if (faccessat(dirfd(dir), "uevent", F_OK, 0) >= 0) + return 0; /* this is a child device, skipping */ + if (errno != ENOENT) { + log_device_debug_errno(device, errno, + "sd-device: Failed to access %s/uevent, ignoring sub-directory %s: %m", + subdir, subdir); + return 0; + } + } + + FOREACH_DIRENT_ALL(de, dir, return -errno) { + _cleanup_free_ char *p = NULL; + struct stat statbuf; + + if (dot_or_dot_dot(de->d_name)) + continue; + + /* only handle symlinks, regular files, and directories */ + if (!IN_SET(de->d_type, DT_LNK, DT_REG, DT_DIR)) + continue; + + if (subdir) { + p = path_join(subdir, de->d_name); + if (!p) + return -ENOMEM; + } + + if (de->d_type == DT_DIR) { + /* push the sub-directory into the stack, and read it later. */ + if (p) + r = set_ensure_consume(stack, &path_hash_ops_free, TAKE_PTR(p)); + else + r = set_put_strdup_full(stack, &path_hash_ops_free, de->d_name); + if (r < 0) + return r; + + continue; + } + + if (fstatat(dirfd(dir), de->d_name, &statbuf, AT_SYMLINK_NOFOLLOW) < 0) + continue; + + if ((statbuf.st_mode & (S_IRUSR | S_IWUSR)) == 0) + continue; + + if (p) + r = set_ensure_consume(&device->sysattrs, &path_hash_ops_free, TAKE_PTR(p)); + else + r = set_put_strdup_full(&device->sysattrs, &path_hash_ops_free, de->d_name); + if (r < 0) + return r; + } + + return 0; +} + +static int device_sysattrs_read_all(sd_device *device) { + _cleanup_set_free_ Set *stack = NULL; + int r; + + assert(device); + + if (device->sysattrs_read) + return 0; + + r = device_sysattrs_read_all_internal(device, NULL, &stack); + if (r < 0) + return r; + + for (;;) { + _cleanup_free_ char *subdir = NULL; + + subdir = set_steal_first(stack); + if (!subdir) + break; + + r = device_sysattrs_read_all_internal(device, subdir, &stack); + if (r < 0) + return r; + } + + device->sysattrs_read = true; + + return 0; +} + +_public_ const char *sd_device_get_sysattr_first(sd_device *device) { + void *v; + int r; + + assert_return(device, NULL); + + if (!device->sysattrs_read) { + r = device_sysattrs_read_all(device); + if (r < 0) { + errno = -r; + return NULL; + } + } + + device->sysattrs_iterator = ITERATOR_FIRST; + + (void) set_iterate(device->sysattrs, &device->sysattrs_iterator, &v); + return v; +} + +_public_ const char *sd_device_get_sysattr_next(sd_device *device) { + void *v; + + assert_return(device, NULL); + + if (!device->sysattrs_read) + return NULL; + + (void) set_iterate(device->sysattrs, &device->sysattrs_iterator, &v); + return v; +} + +_public_ int sd_device_has_tag(sd_device *device, const char *tag) { + assert_return(device, -EINVAL); + assert_return(tag, -EINVAL); + + (void) device_read_db(device); + + return set_contains(device->all_tags, tag); +} + +_public_ int sd_device_has_current_tag(sd_device *device, const char *tag) { + assert_return(device, -EINVAL); + assert_return(tag, -EINVAL); + + if (!device_database_supports_current_tags(device)) + return sd_device_has_tag(device, tag); + + (void) device_read_db(device); + + return set_contains(device->current_tags, tag); +} + +_public_ int sd_device_get_property_value(sd_device *device, const char *key, const char **ret_value) { + const char *value; + int r; + + assert_return(device, -EINVAL); + assert_return(key, -EINVAL); + + r = device_properties_prepare(device); + if (r < 0) + return r; + + value = ordered_hashmap_get(device->properties, key); + if (!value) + return -ENOENT; + + if (ret_value) + *ret_value = value; + return 0; +} + +int device_get_property_bool(sd_device *device, const char *key) { + const char *value; + int r; + + assert(device); + assert(key); + + r = sd_device_get_property_value(device, key, &value); + if (r < 0) + return r; + + return parse_boolean(value); +} + +int device_get_property_int(sd_device *device, const char *key, int *ret) { + const char *value; + int r, v; + + assert(device); + assert(key); + + r = sd_device_get_property_value(device, key, &value); + if (r < 0) + return r; + + r = safe_atoi(value, &v); + if (r < 0) + return r; + + if (ret) + *ret = v; + return 0; +} + +_public_ int sd_device_get_trigger_uuid(sd_device *device, sd_id128_t *ret) { + const char *s; + sd_id128_t id; + int r; + + assert_return(device, -EINVAL); + + /* Retrieves the UUID attached to a uevent when triggering it from userspace via + * sd_device_trigger_with_uuid() or an equivalent interface. Returns -ENOENT if the record is not + * caused by a synthetic event and -ENODATA if it was but no UUID was specified */ + + r = sd_device_get_property_value(device, "SYNTH_UUID", &s); + if (r < 0) + return r; + + if (streq(s, "0")) /* SYNTH_UUID=0 is set whenever a device is triggered by userspace without specifying a UUID */ + return -ENODATA; + + r = sd_id128_from_string(s, &id); + if (r < 0) + return r; + + if (ret) + *ret = id; + + return 0; +} + +void device_clear_sysattr_cache(sd_device *device) { + device->sysattr_values = hashmap_free(device->sysattr_values); +} + +int device_cache_sysattr_value(sd_device *device, const char *key, char *value) { + _unused_ _cleanup_free_ char *old_value = NULL; + _cleanup_free_ char *new_key = NULL; + int r; + + assert(device); + assert(key); + + /* This takes the reference of the input value. The input value may be NULL. + * This replaces the value if it already exists. */ + + /* First, remove the old cache entry. So, we do not need to clear cache on error. */ + old_value = hashmap_remove2(device->sysattr_values, key, (void **) &new_key); + if (!new_key) { + new_key = strdup(key); + if (!new_key) + return -ENOMEM; + } + + r = hashmap_ensure_put(&device->sysattr_values, &path_hash_ops_free_free, new_key, value); + if (r < 0) + return r; + + TAKE_PTR(new_key); + + return 0; +} + +int device_get_cached_sysattr_value(sd_device *device, const char *key, const char **ret_value) { + const char *k = NULL, *value; + + assert(device); + assert(key); + + value = hashmap_get2(device->sysattr_values, key, (void **) &k); + if (!k) + return -ESTALE; /* We have not read the attribute. */ + if (!value) + return -ENOENT; /* We have looked up the attribute before and it did not exist. */ + if (ret_value) + *ret_value = value; + return 0; +} + +/* We cache all sysattr lookups. If an attribute does not exist, it is stored + * with a NULL value in the cache, otherwise the returned string is stored */ +_public_ int sd_device_get_sysattr_value(sd_device *device, const char *sysattr, const char **ret_value) { + _cleanup_free_ char *value = NULL, *path = NULL; + const char *syspath; + struct stat statbuf; + int r; + + assert_return(device, -EINVAL); + assert_return(sysattr, -EINVAL); + + /* look for possibly already cached result */ + r = device_get_cached_sysattr_value(device, sysattr, ret_value); + if (r != -ESTALE) + return r; + + r = sd_device_get_syspath(device, &syspath); + if (r < 0) + return r; + + path = path_join(syspath, sysattr); + if (!path) + return -ENOMEM; + + if (lstat(path, &statbuf) < 0) { + int k; + + r = -errno; + + /* remember that we could not access the sysattr */ + k = device_cache_sysattr_value(device, sysattr, NULL); + if (k < 0) + log_device_debug_errno(device, k, + "sd-device: failed to cache attribute '%s' with NULL, ignoring: %m", + sysattr); + + return r; + } else if (S_ISLNK(statbuf.st_mode)) { + /* Some core links return only the last element of the target path, + * these are just values, the paths should not be exposed. */ + if (STR_IN_SET(sysattr, "driver", "subsystem", "module")) { + r = readlink_value(path, &value); + if (r < 0) + return r; + } else + return -EINVAL; + } else if (S_ISDIR(statbuf.st_mode)) + /* skip directories */ + return -EISDIR; + else if (!(statbuf.st_mode & S_IRUSR)) + /* skip non-readable files */ + return -EPERM; + else { + size_t size; + + /* Read attribute value, Some attributes contain embedded '\0'. So, it is necessary to + * also get the size of the result. See issue #20025. */ + r = read_full_virtual_file(path, &value, &size); + if (r < 0) + return r; + + /* drop trailing newlines */ + while (size > 0 && strchr(NEWLINE, value[--size])) + value[size] = '\0'; + } + + /* Unfortunately, we need to return 'const char*' instead of 'char*'. Hence, failure in caching + * sysattr value is critical unlike the other places. */ + r = device_cache_sysattr_value(device, sysattr, value); + if (r < 0) { + log_device_debug_errno(device, r, + "sd-device: failed to cache attribute '%s' with '%s'%s: %m", + sysattr, value, ret_value ? "" : ", ignoring"); + if (ret_value) + return r; + + return 0; + } + + if (ret_value) + *ret_value = value; + + TAKE_PTR(value); + return 0; +} + +int device_get_sysattr_int(sd_device *device, const char *sysattr, int *ret_value) { + const char *value; + int r; + + r = sd_device_get_sysattr_value(device, sysattr, &value); + if (r < 0) + return r; + + int v; + r = safe_atoi(value, &v); + if (r < 0) + return log_device_debug_errno(device, r, "Failed to parse '%s' attribute: %m", sysattr); + + if (ret_value) + *ret_value = v; + /* We return "true" if the value is positive. */ + return v > 0; +} + +int device_get_sysattr_unsigned(sd_device *device, const char *sysattr, unsigned *ret_value) { + const char *value; + int r; + + r = sd_device_get_sysattr_value(device, sysattr, &value); + if (r < 0) + return r; + + unsigned v; + r = safe_atou(value, &v); + if (r < 0) + return log_device_debug_errno(device, r, "Failed to parse '%s' attribute: %m", sysattr); + + if (ret_value) + *ret_value = v; + /* We return "true" if the value is positive. */ + return v > 0; +} + +int device_get_sysattr_bool(sd_device *device, const char *sysattr) { + const char *value; + int r; + + assert(device); + assert(sysattr); + + r = sd_device_get_sysattr_value(device, sysattr, &value); + if (r < 0) + return r; + + return parse_boolean(value); +} + +static void device_remove_cached_sysattr_value(sd_device *device, const char *_key) { + _cleanup_free_ char *key = NULL; + + assert(device); + assert(_key); + + free(hashmap_remove2(device->sysattr_values, _key, (void **) &key)); +} + +_public_ int sd_device_set_sysattr_value(sd_device *device, const char *sysattr, const char *_value) { + _cleanup_free_ char *value = NULL, *path = NULL; + const char *syspath; + size_t len; + int r; + + assert_return(device, -EINVAL); + assert_return(sysattr, -EINVAL); + + /* Set the attribute and save it in the cache. */ + + if (!_value) { + /* If input value is NULL, then clear cache and not write anything. */ + device_remove_cached_sysattr_value(device, sysattr); + return 0; + } + + r = sd_device_get_syspath(device, &syspath); + if (r < 0) + return r; + + path = path_join(syspath, sysattr); + if (!path) + return -ENOMEM; + + len = strlen(_value); + + /* drop trailing newlines */ + while (len > 0 && strchr(NEWLINE, _value[len - 1])) + len --; + + /* value length is limited to 4k */ + if (len > 4096) + return -EINVAL; + + value = strndup(_value, len); + if (!value) + return -ENOMEM; + + r = write_string_file(path, value, WRITE_STRING_FILE_DISABLE_BUFFER | WRITE_STRING_FILE_NOFOLLOW); + if (r < 0) { + /* On failure, clear cache entry, as we do not know how it fails. */ + device_remove_cached_sysattr_value(device, sysattr); + return r; + } + + /* Do not cache action string written into uevent file. */ + if (streq(sysattr, "uevent")) + return 0; + + r = device_cache_sysattr_value(device, sysattr, value); + if (r < 0) + log_device_debug_errno(device, r, + "sd-device: failed to cache attribute '%s' with '%s', ignoring: %m", + sysattr, value); + else + TAKE_PTR(value); + + return 0; +} + +_public_ int sd_device_set_sysattr_valuef(sd_device *device, const char *sysattr, const char *format, ...) { + _cleanup_free_ char *value = NULL; + va_list ap; + int r; + + assert_return(device, -EINVAL); + assert_return(sysattr, -EINVAL); + + if (!format) { + device_remove_cached_sysattr_value(device, sysattr); + return 0; + } + + va_start(ap, format); + r = vasprintf(&value, format, ap); + va_end(ap); + + if (r < 0) + return -ENOMEM; + + return sd_device_set_sysattr_value(device, sysattr, value); +} + +_public_ int sd_device_trigger(sd_device *device, sd_device_action_t action) { + const char *s; + + assert_return(device, -EINVAL); + + s = device_action_to_string(action); + if (!s) + return -EINVAL; + + /* This uses the simple no-UUID interface of kernel < 4.13 */ + return sd_device_set_sysattr_value(device, "uevent", s); +} + +_public_ int sd_device_trigger_with_uuid( + sd_device *device, + sd_device_action_t action, + sd_id128_t *ret_uuid) { + + const char *s, *j; + sd_id128_t u; + int r; + + assert_return(device, -EINVAL); + + /* If no one wants to know the UUID, use the simple interface from pre-4.13 times */ + if (!ret_uuid) + return sd_device_trigger(device, action); + + s = device_action_to_string(action); + if (!s) + return -EINVAL; + + r = sd_id128_randomize(&u); + if (r < 0) + return r; + + j = strjoina(s, " ", SD_ID128_TO_UUID_STRING(u)); + + r = sd_device_set_sysattr_value(device, "uevent", j); + if (r < 0) + return r; + + *ret_uuid = u; + return 0; +} + +_public_ int sd_device_open(sd_device *device, int flags) { + _cleanup_close_ int fd = -EBADF, fd2 = -EBADF; + const char *devname, *subsystem = NULL; + uint64_t q, diskseq = 0; + struct stat st; + dev_t devnum; + int r; + + assert_return(device, -EINVAL); + assert_return(FLAGS_SET(flags, O_PATH) || !FLAGS_SET(flags, O_NOFOLLOW), -EINVAL); + + r = sd_device_get_devname(device, &devname); + if (r == -ENOENT) + return -ENOEXEC; + if (r < 0) + return r; + + r = sd_device_get_devnum(device, &devnum); + if (r == -ENOENT) + return -ENOEXEC; + if (r < 0) + return r; + + r = sd_device_get_subsystem(device, &subsystem); + if (r < 0 && r != -ENOENT) + return r; + + fd = open(devname, FLAGS_SET(flags, O_PATH) ? flags : O_CLOEXEC|O_NOFOLLOW|O_PATH); + if (fd < 0) + return -errno; + + if (fstat(fd, &st) < 0) + return -errno; + + if (st.st_rdev != devnum) + return -ENXIO; + + if (streq_ptr(subsystem, "block") ? !S_ISBLK(st.st_mode) : !S_ISCHR(st.st_mode)) + return -ENXIO; + + /* If flags has O_PATH, then we cannot check diskseq. Let's return earlier. */ + if (FLAGS_SET(flags, O_PATH)) + return TAKE_FD(fd); + + /* If the device is not initialized, then we cannot determine if we should check diskseq through + * ID_IGNORE_DISKSEQ property. Let's skip to check diskseq in that case. */ + r = sd_device_get_is_initialized(device); + if (r < 0) + return r; + if (r > 0) { + r = device_get_property_bool(device, "ID_IGNORE_DISKSEQ"); + if (r < 0 && r != -ENOENT) + return r; + if (r <= 0) { + r = sd_device_get_diskseq(device, &diskseq); + if (r < 0 && r != -ENOENT) + return r; + } + } + + fd2 = fd_reopen(fd, flags); + if (fd2 < 0) + return fd2; + + if (diskseq == 0) + return TAKE_FD(fd2); + + r = fd_get_diskseq(fd2, &q); + if (r < 0) + return r; + + if (q != diskseq) + return -ENXIO; + + return TAKE_FD(fd2); +} + +int device_opendir(sd_device *device, const char *subdir, DIR **ret) { + _cleanup_closedir_ DIR *d = NULL; + _cleanup_free_ char *path = NULL; + const char *syspath; + int r; + + assert(device); + assert(ret); + + r = sd_device_get_syspath(device, &syspath); + if (r < 0) + return r; + + if (subdir) { + if (!path_is_safe(subdir)) + return -EINVAL; + + path = path_join(syspath, subdir); + if (!path) + return -ENOMEM; + } + + d = opendir(path ?: syspath); + if (!d) + return -errno; + + *ret = TAKE_PTR(d); + return 0; +} diff --git a/src/libsystemd/sd-device/test-device-util.c b/src/libsystemd/sd-device/test-device-util.c new file mode 100644 index 0000000..bc8ab66 --- /dev/null +++ b/src/libsystemd/sd-device/test-device-util.c @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include "device-util.h" +#include "tests.h" + +TEST(log_device_full) { + int r; + + for (int level = LOG_ERR; level <= LOG_DEBUG; level++) { + log_device_full(NULL, level, "test level=%d: %m", level); + + r = log_device_full_errno(NULL, level, EUCLEAN, "test level=%d errno=EUCLEAN: %m", level); + assert_se(r == -EUCLEAN); + + r = log_device_full_errno(NULL, level, 0, "test level=%d errno=0: %m", level); + assert_se(r == 0); + + r = log_device_full_errno(NULL, level, SYNTHETIC_ERRNO(ENODATA), "test level=%d errno=S(ENODATA): %m", level); + assert_se(r == -ENODATA); + } +} + +DEFINE_TEST_MAIN(LOG_INFO); diff --git a/src/libsystemd/sd-device/test-sd-device-monitor.c b/src/libsystemd/sd-device/test-sd-device-monitor.c new file mode 100644 index 0000000..e124e00 --- /dev/null +++ b/src/libsystemd/sd-device/test-sd-device-monitor.c @@ -0,0 +1,344 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <stdbool.h> +#include <unistd.h> + +#include "sd-device.h" +#include "sd-event.h" + +#include "device-monitor-private.h" +#include "device-private.h" +#include "device-util.h" +#include "macro.h" +#include "path-util.h" +#include "stat-util.h" +#include "string-util.h" +#include "tests.h" +#include "virt.h" + +static int monitor_handler(sd_device_monitor *m, sd_device *d, void *userdata) { + const char *s, *syspath = userdata; + + assert_se(sd_device_get_syspath(d, &s) >= 0); + assert_se(streq(s, syspath)); + + return sd_event_exit(sd_device_monitor_get_event(m), 100); +} + +static void test_receive_device_fail(void) { + _cleanup_(sd_device_monitor_unrefp) sd_device_monitor *monitor_server = NULL, *monitor_client = NULL; + _cleanup_(sd_device_unrefp) sd_device *loopback = NULL; + const char *syspath; + + log_info("/* %s */", __func__); + + /* Try to send device with invalid action and without seqnum. */ + assert_se(sd_device_new_from_syspath(&loopback, "/sys/class/net/lo") >= 0); + assert_se(device_add_property(loopback, "ACTION", "hoge") >= 0); + + assert_se(sd_device_get_syspath(loopback, &syspath) >= 0); + + assert_se(device_monitor_new_full(&monitor_server, MONITOR_GROUP_NONE, -1) >= 0); + assert_se(sd_device_monitor_set_description(monitor_server, "sender") >= 0); + assert_se(sd_device_monitor_start(monitor_server, NULL, NULL) >= 0); + + assert_se(device_monitor_new_full(&monitor_client, MONITOR_GROUP_NONE, -1) >= 0); + assert_se(sd_device_monitor_set_description(monitor_client, "receiver") >= 0); + assert_se(device_monitor_allow_unicast_sender(monitor_client, monitor_server) >= 0); + assert_se(sd_device_monitor_start(monitor_client, monitor_handler, (void *) syspath) >= 0); + + assert_se(device_monitor_send_device(monitor_server, monitor_client, loopback) >= 0); + assert_se(sd_event_run(sd_device_monitor_get_event(monitor_client), 0) >= 0); +} + +static void test_send_receive_one(sd_device *device, bool subsystem_filter, bool tag_filter, bool use_bpf) { + _cleanup_(sd_device_monitor_unrefp) sd_device_monitor *monitor_server = NULL, *monitor_client = NULL; + const char *syspath, *subsystem, *devtype = NULL; + + log_device_info(device, "/* %s(subsystem_filter=%s, tag_filter=%s, use_bpf=%s) */", __func__, + true_false(subsystem_filter), true_false(tag_filter), true_false(use_bpf)); + + assert_se(sd_device_get_syspath(device, &syspath) >= 0); + + assert_se(device_monitor_new_full(&monitor_server, MONITOR_GROUP_NONE, -1) >= 0); + assert_se(sd_device_monitor_set_description(monitor_server, "sender") >= 0); + assert_se(sd_device_monitor_start(monitor_server, NULL, NULL) >= 0); + + assert_se(device_monitor_new_full(&monitor_client, MONITOR_GROUP_NONE, -1) >= 0); + assert_se(sd_device_monitor_set_description(monitor_client, "receiver") >= 0); + assert_se(device_monitor_allow_unicast_sender(monitor_client, monitor_server) >= 0); + assert_se(sd_device_monitor_start(monitor_client, monitor_handler, (void *) syspath) >= 0); + + if (subsystem_filter) { + assert_se(sd_device_get_subsystem(device, &subsystem) >= 0); + (void) sd_device_get_devtype(device, &devtype); + assert_se(sd_device_monitor_filter_add_match_subsystem_devtype(monitor_client, subsystem, devtype) >= 0); + } + + if (tag_filter) + FOREACH_DEVICE_TAG(device, tag) + assert_se(sd_device_monitor_filter_add_match_tag(monitor_client, tag) >= 0); + + if ((subsystem_filter || tag_filter) && use_bpf) + assert_se(sd_device_monitor_filter_update(monitor_client) >= 0); + + assert_se(device_monitor_send_device(monitor_server, monitor_client, device) >= 0); + assert_se(sd_event_loop(sd_device_monitor_get_event(monitor_client)) == 100); +} + +static void test_subsystem_filter(sd_device *device) { + _cleanup_(sd_device_monitor_unrefp) sd_device_monitor *monitor_server = NULL, *monitor_client = NULL; + _cleanup_(sd_device_enumerator_unrefp) sd_device_enumerator *e = NULL; + const char *syspath, *subsystem; + + log_device_info(device, "/* %s */", __func__); + + assert_se(sd_device_get_syspath(device, &syspath) >= 0); + assert_se(sd_device_get_subsystem(device, &subsystem) >= 0); + + assert_se(device_monitor_new_full(&monitor_server, MONITOR_GROUP_NONE, -1) >= 0); + assert_se(sd_device_monitor_set_description(monitor_server, "sender") >= 0); + assert_se(sd_device_monitor_start(monitor_server, NULL, NULL) >= 0); + + assert_se(device_monitor_new_full(&monitor_client, MONITOR_GROUP_NONE, -1) >= 0); + assert_se(sd_device_monitor_set_description(monitor_client, "receiver") >= 0); + assert_se(device_monitor_allow_unicast_sender(monitor_client, monitor_server) >= 0); + assert_se(sd_device_monitor_filter_add_match_subsystem_devtype(monitor_client, subsystem, NULL) >= 0); + assert_se(sd_device_monitor_start(monitor_client, monitor_handler, (void *) syspath) >= 0); + + assert_se(sd_device_enumerator_new(&e) >= 0); + assert_se(sd_device_enumerator_add_match_subsystem(e, subsystem, false) >= 0); + FOREACH_DEVICE(e, d) { + const char *p, *s; + + assert_se(sd_device_get_syspath(d, &p) >= 0); + assert_se(sd_device_get_subsystem(d, &s) >= 0); + + assert_se(device_add_property(d, "ACTION", "add") >= 0); + assert_se(device_add_property(d, "SEQNUM", "10") >= 0); + + log_device_debug(d, "Sending device subsystem:%s syspath:%s", s, p); + assert_se(device_monitor_send_device(monitor_server, monitor_client, d) >= 0); + } + + log_device_info(device, "Sending device subsystem:%s syspath:%s", subsystem, syspath); + assert_se(device_monitor_send_device(monitor_server, monitor_client, device) >= 0); + assert_se(sd_event_loop(sd_device_monitor_get_event(monitor_client)) == 100); +} + +static void test_tag_filter(sd_device *device) { + _cleanup_(sd_device_monitor_unrefp) sd_device_monitor *monitor_server = NULL, *monitor_client = NULL; + _cleanup_(sd_device_enumerator_unrefp) sd_device_enumerator *e = NULL; + const char *syspath; + + log_device_info(device, "/* %s */", __func__); + + assert_se(sd_device_get_syspath(device, &syspath) >= 0); + + assert_se(device_monitor_new_full(&monitor_server, MONITOR_GROUP_NONE, -1) >= 0); + assert_se(sd_device_monitor_set_description(monitor_server, "sender") >= 0); + assert_se(sd_device_monitor_start(monitor_server, NULL, NULL) >= 0); + + assert_se(device_monitor_new_full(&monitor_client, MONITOR_GROUP_NONE, -1) >= 0); + assert_se(sd_device_monitor_set_description(monitor_client, "receiver") >= 0); + assert_se(device_monitor_allow_unicast_sender(monitor_client, monitor_server) >= 0); + assert_se(sd_device_monitor_filter_add_match_tag(monitor_client, "TEST_SD_DEVICE_MONITOR") >= 0); + assert_se(sd_device_monitor_start(monitor_client, monitor_handler, (void *) syspath) >= 0); + + assert_se(sd_device_enumerator_new(&e) >= 0); + FOREACH_DEVICE(e, d) { + const char *p; + + assert_se(sd_device_get_syspath(d, &p) >= 0); + + assert_se(device_add_property(d, "ACTION", "add") >= 0); + assert_se(device_add_property(d, "SEQNUM", "10") >= 0); + + log_device_debug(d, "Sending device syspath:%s", p); + assert_se(device_monitor_send_device(monitor_server, monitor_client, d) >= 0); + } + + log_device_info(device, "Sending device syspath:%s", syspath); + assert_se(device_monitor_send_device(monitor_server, monitor_client, device) >= 0); + assert_se(sd_event_loop(sd_device_monitor_get_event(monitor_client)) == 100); + +} + +static void test_sysattr_filter(sd_device *device, const char *sysattr) { + _cleanup_(sd_device_monitor_unrefp) sd_device_monitor *monitor_server = NULL, *monitor_client = NULL; + _cleanup_(sd_device_enumerator_unrefp) sd_device_enumerator *e = NULL; + const char *syspath, *sysattr_value; + + log_device_info(device, "/* %s(%s) */", __func__, sysattr); + + assert_se(sd_device_get_syspath(device, &syspath) >= 0); + assert_se(sd_device_get_sysattr_value(device, sysattr, &sysattr_value) >= 0); + + assert_se(device_monitor_new_full(&monitor_server, MONITOR_GROUP_NONE, -1) >= 0); + assert_se(sd_device_monitor_set_description(monitor_server, "sender") >= 0); + assert_se(sd_device_monitor_start(monitor_server, NULL, NULL) >= 0); + + assert_se(device_monitor_new_full(&monitor_client, MONITOR_GROUP_NONE, -1) >= 0); + assert_se(sd_device_monitor_set_description(monitor_client, "receiver") >= 0); + assert_se(device_monitor_allow_unicast_sender(monitor_client, monitor_server) >= 0); + assert_se(sd_device_monitor_filter_add_match_sysattr(monitor_client, sysattr, sysattr_value, true) >= 0); + assert_se(sd_device_monitor_start(monitor_client, monitor_handler, (void *) syspath) >= 0); + + assert_se(sd_device_enumerator_new(&e) >= 0); + assert_se(sd_device_enumerator_add_match_sysattr(e, sysattr, sysattr_value, false) >= 0); + FOREACH_DEVICE(e, d) { + const char *p; + + assert_se(sd_device_get_syspath(d, &p) >= 0); + + assert_se(device_add_property(d, "ACTION", "add") >= 0); + assert_se(device_add_property(d, "SEQNUM", "10") >= 0); + + log_device_debug(d, "Sending device syspath:%s", p); + assert_se(device_monitor_send_device(monitor_server, monitor_client, d) >= 0); + + /* The sysattr filter is not implemented in BPF yet. So, sending multiple devices may fills up + * buffer and device_monitor_send_device() may return EAGAIN. Let's send one device here, + * which should be filtered out by the receiver. */ + break; + } + + log_device_info(device, "Sending device syspath:%s", syspath); + assert_se(device_monitor_send_device(monitor_server, monitor_client, device) >= 0); + assert_se(sd_event_loop(sd_device_monitor_get_event(monitor_client)) == 100); + +} + +static void test_parent_filter(sd_device *device) { + _cleanup_(sd_device_monitor_unrefp) sd_device_monitor *monitor_server = NULL, *monitor_client = NULL; + _cleanup_(sd_device_enumerator_unrefp) sd_device_enumerator *e = NULL; + const char *syspath, *parent_syspath; + sd_device *parent; + int r; + + log_device_info(device, "/* %s */", __func__); + + assert_se(sd_device_get_syspath(device, &syspath) >= 0); + r = sd_device_get_parent(device, &parent); + if (r < 0) + return (void) log_device_info(device, "Device does not have parent, skipping."); + assert_se(sd_device_get_syspath(parent, &parent_syspath) >= 0); + + assert_se(device_monitor_new_full(&monitor_server, MONITOR_GROUP_NONE, -1) >= 0); + assert_se(sd_device_monitor_set_description(monitor_server, "sender") >= 0); + assert_se(sd_device_monitor_start(monitor_server, NULL, NULL) >= 0); + + assert_se(device_monitor_new_full(&monitor_client, MONITOR_GROUP_NONE, -1) >= 0); + assert_se(sd_device_monitor_set_description(monitor_client, "receiver") >= 0); + assert_se(device_monitor_allow_unicast_sender(monitor_client, monitor_server) >= 0); + assert_se(sd_device_monitor_filter_add_match_parent(monitor_client, parent, true) >= 0); + assert_se(sd_device_monitor_start(monitor_client, monitor_handler, (void *) syspath) >= 0); + + assert_se(sd_device_enumerator_new(&e) >= 0); + FOREACH_DEVICE(e, d) { + const char *p; + + assert_se(sd_device_get_syspath(d, &p) >= 0); + if (path_startswith(p, parent_syspath)) + continue; + + assert_se(device_add_property(d, "ACTION", "add") >= 0); + assert_se(device_add_property(d, "SEQNUM", "10") >= 0); + + log_device_debug(d, "Sending device syspath:%s", p); + assert_se(device_monitor_send_device(monitor_server, monitor_client, d) >= 0); + + /* The parent filter is not implemented in BPF yet. So, sending multiple devices may fills up + * buffer and device_monitor_send_device() may return EAGAIN. Let's send one device here, + * which should be filtered out by the receiver. */ + break; + } + + log_device_info(device, "Sending device syspath:%s", syspath); + assert_se(device_monitor_send_device(monitor_server, monitor_client, device) >= 0); + assert_se(sd_event_loop(sd_device_monitor_get_event(monitor_client)) == 100); + +} + +static void test_sd_device_monitor_filter_remove(sd_device *device) { + _cleanup_(sd_device_monitor_unrefp) sd_device_monitor *monitor_server = NULL, *monitor_client = NULL; + const char *syspath; + + log_device_info(device, "/* %s */", __func__); + + assert_se(sd_device_get_syspath(device, &syspath) >= 0); + + assert_se(device_monitor_new_full(&monitor_server, MONITOR_GROUP_NONE, -1) >= 0); + assert_se(sd_device_monitor_set_description(monitor_server, "sender") >= 0); + assert_se(sd_device_monitor_start(monitor_server, NULL, NULL) >= 0); + + assert_se(device_monitor_new_full(&monitor_client, MONITOR_GROUP_NONE, -1) >= 0); + assert_se(sd_device_monitor_set_description(monitor_client, "receiver") >= 0); + assert_se(device_monitor_allow_unicast_sender(monitor_client, monitor_server) >= 0); + assert_se(sd_device_monitor_start(monitor_client, monitor_handler, (void *) syspath) >= 0); + + assert_se(sd_device_monitor_filter_add_match_subsystem_devtype(monitor_client, "hoge", NULL) >= 0); + assert_se(sd_device_monitor_filter_update(monitor_client) >= 0); + + assert_se(device_monitor_send_device(monitor_server, monitor_client, device) >= 0); + assert_se(sd_event_run(sd_device_monitor_get_event(monitor_client), 0) >= 0); + + assert_se(sd_device_monitor_filter_remove(monitor_client) >= 0); + + assert_se(device_monitor_send_device(monitor_server, monitor_client, device) >= 0); + assert_se(sd_event_loop(sd_device_monitor_get_event(monitor_client)) == 100); +} + +int main(int argc, char *argv[]) { + _cleanup_(sd_device_unrefp) sd_device *loopback = NULL, *sda = NULL; + int r; + + test_setup_logging(LOG_INFO); + + if (getuid() != 0) + return log_tests_skipped("not root"); + + if (path_is_read_only_fs("/sys") > 0) + return log_tests_skipped("Running in container"); + + test_receive_device_fail(); + + assert_se(sd_device_new_from_syspath(&loopback, "/sys/class/net/lo") >= 0); + assert_se(device_add_property(loopback, "ACTION", "add") >= 0); + assert_se(device_add_property(loopback, "SEQNUM", "10") >= 0); + assert_se(device_add_tag(loopback, "TEST_SD_DEVICE_MONITOR", true) >= 0); + + test_send_receive_one(loopback, false, false, false); + test_send_receive_one(loopback, true, false, false); + test_send_receive_one(loopback, false, true, false); + test_send_receive_one(loopback, true, true, false); + test_send_receive_one(loopback, true, false, true); + test_send_receive_one(loopback, false, true, true); + test_send_receive_one(loopback, true, true, true); + + test_subsystem_filter(loopback); + test_tag_filter(loopback); + test_sysattr_filter(loopback, "ifindex"); + test_sd_device_monitor_filter_remove(loopback); + + r = sd_device_new_from_subsystem_sysname(&sda, "block", "sda"); + if (r < 0) { + log_info_errno(r, "Failed to create sd_device for sda, skipping remaining tests: %m"); + return 0; + } + + assert_se(device_add_property(sda, "ACTION", "change") >= 0); + assert_se(device_add_property(sda, "SEQNUM", "11") >= 0); + + test_send_receive_one(sda, false, false, false); + test_send_receive_one(sda, true, false, false); + test_send_receive_one(sda, false, true, false); + test_send_receive_one(sda, true, true, false); + test_send_receive_one(sda, true, false, true); + test_send_receive_one(sda, false, true, true); + test_send_receive_one(sda, true, true, true); + + test_parent_filter(sda); + + return 0; +} diff --git a/src/libsystemd/sd-device/test-sd-device-thread.c b/src/libsystemd/sd-device/test-sd-device-thread.c new file mode 100644 index 0000000..c99d179 --- /dev/null +++ b/src/libsystemd/sd-device/test-sd-device-thread.c @@ -0,0 +1,51 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <errno.h> +#include <pthread.h> +#include <stdio.h> +#include <stdlib.h> + +#include "sd-device.h" + +#include "device-util.h" + +#define handle_error_errno(error, msg) \ + ({ \ + errno = abs(error); \ + perror(msg); \ + EXIT_FAILURE; \ + }) + +static void* thread(void *p) { + sd_device **d = p; + + *d = sd_device_unref(*d); + + return NULL; +} + +int main(int argc, char *argv[]) { + sd_device *loopback; + pthread_t t; + int r; + + r = sd_device_new_from_syspath(&loopback, "/sys/class/net/lo"); + if (r < 0) + return handle_error_errno(r, "Failed to create loopback device object"); + + FOREACH_DEVICE_PROPERTY(loopback, key, value) + printf("%s=%s\n", key, value); + + r = pthread_create(&t, NULL, thread, &loopback); + if (r != 0) + return handle_error_errno(r, "Failed to create thread"); + + r = pthread_join(t, NULL); + if (r != 0) + return handle_error_errno(r, "Failed to wait thread finished"); + + if (loopback) + return handle_error_errno(r, "loopback device is not unref()ed"); + + return 0; +} diff --git a/src/libsystemd/sd-device/test-sd-device.c b/src/libsystemd/sd-device/test-sd-device.c new file mode 100644 index 0000000..bce99b5 --- /dev/null +++ b/src/libsystemd/sd-device/test-sd-device.c @@ -0,0 +1,678 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <ctype.h> +#include <fcntl.h> +#include <unistd.h> + +#include "device-enumerator-private.h" +#include "device-internal.h" +#include "device-private.h" +#include "device-util.h" +#include "errno-util.h" +#include "fd-util.h" +#include "hashmap.h" +#include "nulstr-util.h" +#include "path-util.h" +#include "rm-rf.h" +#include "stat-util.h" +#include "string-util.h" +#include "tests.h" +#include "time-util.h" +#include "tmpfile-util.h" +#include "udev-util.h" + +static void test_sd_device_one(sd_device *d) { + _cleanup_(sd_device_unrefp) sd_device *dev = NULL; + const char *syspath, *sysname, *subsystem = NULL, *devname, *val; + bool is_block = false; + dev_t devnum; + usec_t usec; + int ifindex, r; + + assert_se(sd_device_get_syspath(d, &syspath) >= 0); + assert_se(path_startswith(syspath, "/sys")); + assert_se(sd_device_get_sysname(d, &sysname) >= 0); + + log_info("%s(%s)", __func__, syspath); + + assert_se(sd_device_new_from_syspath(&dev, syspath) >= 0); + assert_se(sd_device_get_syspath(dev, &val) >= 0); + assert_se(streq(syspath, val)); + dev = sd_device_unref(dev); + + assert_se(sd_device_new_from_path(&dev, syspath) >= 0); + assert_se(sd_device_get_syspath(dev, &val) >= 0); + assert_se(streq(syspath, val)); + dev = sd_device_unref(dev); + + r = sd_device_get_ifindex(d, &ifindex); + if (r >= 0) { + assert_se(ifindex > 0); + + r = sd_device_new_from_ifindex(&dev, ifindex); + if (r == -ENODEV) + log_device_warning_errno(d, r, + "Failed to create sd-device object from ifindex %i. " + "Maybe running on a non-host network namespace.", ifindex); + else { + assert_se(r >= 0); + assert_se(sd_device_get_syspath(dev, &val) >= 0); + assert_se(streq(syspath, val)); + dev = sd_device_unref(dev); + } + + /* This does not require the interface really exists on the network namespace. + * Hence, this should always succeed. */ + assert_se(sd_device_new_from_ifname(&dev, sysname) >= 0); + assert_se(sd_device_get_syspath(dev, &val) >= 0); + assert_se(streq(syspath, val)); + dev = sd_device_unref(dev); + } else + assert_se(r == -ENOENT); + + r = sd_device_get_subsystem(d, &subsystem); + if (r < 0) + assert_se(r == -ENOENT); + else if (!streq(subsystem, "gpio")) { /* Unfortunately, there exist /sys/class/gpio and /sys/bus/gpio. + * Hence, sd_device_new_from_subsystem_sysname() and + * sd_device_new_from_device_id() may not work as expected. */ + const char *name, *id; + + if (streq(subsystem, "drivers")) + name = strjoina(d->driver_subsystem, ":", sysname); + else + name = sysname; + assert_se(sd_device_new_from_subsystem_sysname(&dev, subsystem, name) >= 0); + assert_se(sd_device_get_syspath(dev, &val) >= 0); + assert_se(streq(syspath, val)); + dev = sd_device_unref(dev); + + /* The device ID depends on subsystem. */ + assert_se(device_get_device_id(d, &id) >= 0); + r = sd_device_new_from_device_id(&dev, id); + if (r == -ENODEV && ifindex > 0) + log_device_warning_errno(d, r, + "Failed to create sd-device object from device ID \"%s\". " + "Maybe running on a non-host network namespace.", id); + else { + assert_se(r >= 0); + assert_se(sd_device_get_syspath(dev, &val) >= 0); + assert_se(streq(syspath, val)); + dev = sd_device_unref(dev); + } + + /* These require udev database, and reading database requires device ID. */ + r = sd_device_get_is_initialized(d); + if (r > 0) { + r = sd_device_get_usec_since_initialized(d, &usec); + assert_se((r >= 0 && usec > 0) || r == -ENODATA); + } else + assert(r == 0); + + r = sd_device_get_property_value(d, "ID_NET_DRIVER", &val); + assert_se(r >= 0 || r == -ENOENT); + } + + is_block = streq_ptr(subsystem, "block"); + + r = sd_device_get_devname(d, &devname); + if (r >= 0) { + r = sd_device_new_from_devname(&dev, devname); + if (r >= 0) { + assert_se(sd_device_get_syspath(dev, &val) >= 0); + assert_se(streq(syspath, val)); + dev = sd_device_unref(dev); + } else + assert_se(r == -ENODEV || ERRNO_IS_PRIVILEGE(r)); + + r = sd_device_new_from_path(&dev, devname); + if (r >= 0) { + assert_se(sd_device_get_syspath(dev, &val) >= 0); + assert_se(streq(syspath, val)); + dev = sd_device_unref(dev); + + _cleanup_close_ int fd = -EBADF; + fd = sd_device_open(d, O_CLOEXEC| O_NONBLOCK | (is_block ? O_RDONLY : O_NOCTTY | O_PATH)); + assert_se(fd >= 0 || ERRNO_IS_PRIVILEGE(fd)); + } else + assert_se(r == -ENODEV || ERRNO_IS_PRIVILEGE(r)); + } else + assert_se(r == -ENOENT); + + r = sd_device_get_devnum(d, &devnum); + if (r >= 0) { + _cleanup_free_ char *p = NULL; + + assert_se(major(devnum) > 0); + + assert_se(sd_device_new_from_devnum(&dev, is_block ? 'b' : 'c', devnum) >= 0); + assert_se(sd_device_get_syspath(dev, &val) >= 0); + assert_se(streq(syspath, val)); + dev = sd_device_unref(dev); + + assert_se(asprintf(&p, "/dev/%s/%u:%u", is_block ? "block" : "char", major(devnum), minor(devnum)) >= 0); + assert_se(sd_device_new_from_devname(&dev, p) >= 0); + assert_se(sd_device_get_syspath(dev, &val) >= 0); + assert_se(streq(syspath, val)); + dev = sd_device_unref(dev); + + assert_se(sd_device_new_from_path(&dev, p) >= 0); + assert_se(sd_device_get_syspath(dev, &val) >= 0); + assert_se(streq(syspath, val)); + dev = sd_device_unref(dev); + } else + assert_se(r == -ENOENT); + + assert_se(sd_device_get_devpath(d, &val) >= 0); + + r = sd_device_get_devtype(d, &val); + assert_se(r >= 0 || r == -ENOENT); + + r = sd_device_get_driver(d, &val); + assert_se(r >= 0 || r == -ENOENT); + + r = sd_device_get_sysnum(d, &val); + if (r >= 0) { + assert_se(val > sysname); + assert_se(val < sysname + strlen(sysname)); + assert_se(in_charset(val, DIGITS)); + assert_se(!ascii_isdigit(val[-1])); + } else + assert_se(r == -ENOENT); + + r = sd_device_get_sysattr_value(d, "nsid", NULL); + if (r >= 0) { + unsigned x; + + assert_se(device_get_sysattr_unsigned(d, "nsid", NULL) >= 0); + r = device_get_sysattr_unsigned(d, "nsid", &x); + assert_se(r >= 0); + assert_se((x > 0) == (r > 0)); + } else + assert_se(ERRNO_IS_PRIVILEGE(r) || IN_SET(r, -ENOENT, -EINVAL)); +} + +TEST(sd_device_enumerator_devices) { + _cleanup_(sd_device_enumerator_unrefp) sd_device_enumerator *e = NULL; + + assert_se(sd_device_enumerator_new(&e) >= 0); + assert_se(sd_device_enumerator_allow_uninitialized(e) >= 0); + /* On some CI environments, it seems some loop block devices and corresponding bdi devices sometimes + * disappear during running this test. Let's exclude them here for stability. */ + assert_se(sd_device_enumerator_add_match_subsystem(e, "bdi", false) >= 0); + assert_se(sd_device_enumerator_add_nomatch_sysname(e, "loop*") >= 0); + /* On CentOS CI, systemd-networkd-tests.py may be running when this test is invoked. The networkd + * test creates and removes many network interfaces, and may interfere with this test. */ + assert_se(sd_device_enumerator_add_match_subsystem(e, "net", false) >= 0); + FOREACH_DEVICE(e, d) + test_sd_device_one(d); +} + +TEST(sd_device_enumerator_subsystems) { + _cleanup_(sd_device_enumerator_unrefp) sd_device_enumerator *e = NULL; + + assert_se(sd_device_enumerator_new(&e) >= 0); + assert_se(sd_device_enumerator_allow_uninitialized(e) >= 0); + FOREACH_SUBSYSTEM(e, d) + test_sd_device_one(d); +} + +static void test_sd_device_enumerator_filter_subsystem_one( + const char *subsystem, + Hashmap *h, + unsigned *ret_n_new_dev, + unsigned *ret_n_removed_dev) { + + _cleanup_(sd_device_enumerator_unrefp) sd_device_enumerator *e = NULL; + unsigned n_new_dev = 0, n_removed_dev = 0; + sd_device *dev; + + assert_se(sd_device_enumerator_new(&e) >= 0); + assert_se(sd_device_enumerator_add_match_subsystem(e, subsystem, true) >= 0); + assert_se(sd_device_enumerator_add_nomatch_sysname(e, "loop*") >= 0); + + FOREACH_DEVICE(e, d) { + const char *syspath; + sd_device *t; + + assert_se(sd_device_get_syspath(d, &syspath) >= 0); + t = hashmap_remove(h, syspath); + + if (!t) { + log_warning("New device found: subsystem:%s syspath:%s", subsystem, syspath); + n_new_dev++; + } + + assert_se(!sd_device_unref(t)); + } + + HASHMAP_FOREACH(dev, h) { + const char *syspath; + + assert_se(sd_device_get_syspath(dev, &syspath) >= 0); + log_warning("Device removed: subsystem:%s syspath:%s", subsystem, syspath); + n_removed_dev++; + + assert_se(!sd_device_unref(dev)); + } + + hashmap_free(h); + + *ret_n_new_dev = n_new_dev; + *ret_n_removed_dev = n_removed_dev; +} + +static bool test_sd_device_enumerator_filter_subsystem_trial(void) { + _cleanup_(sd_device_enumerator_unrefp) sd_device_enumerator *e = NULL; + _cleanup_hashmap_free_ Hashmap *subsystems = NULL; + unsigned n_new_dev = 0, n_removed_dev = 0; + Hashmap *h; + char *s; + + assert_se(subsystems = hashmap_new(&string_hash_ops)); + assert_se(sd_device_enumerator_new(&e) >= 0); + /* See comments in TEST(sd_device_enumerator_devices). */ + assert_se(sd_device_enumerator_add_match_subsystem(e, "bdi", false) >= 0); + assert_se(sd_device_enumerator_add_nomatch_sysname(e, "loop*") >= 0); + assert_se(sd_device_enumerator_add_match_subsystem(e, "net", false) >= 0); + + FOREACH_DEVICE(e, d) { + const char *syspath, *subsystem; + int r; + + assert_se(sd_device_get_syspath(d, &syspath) >= 0); + + r = sd_device_get_subsystem(d, &subsystem); + assert_se(r >= 0 || r == -ENOENT); + if (r < 0) + continue; + + h = hashmap_get(subsystems, subsystem); + if (!h) { + char *str; + assert_se(str = strdup(subsystem)); + assert_se(h = hashmap_new(&string_hash_ops)); + assert_se(hashmap_put(subsystems, str, h) >= 0); + } + + assert_se(hashmap_put(h, syspath, d) >= 0); + assert_se(sd_device_ref(d)); + + log_debug("Added subsystem:%s syspath:%s", subsystem, syspath); + } + + while ((h = hashmap_steal_first_key_and_value(subsystems, (void**) &s))) { + unsigned n, m; + + test_sd_device_enumerator_filter_subsystem_one(s, TAKE_PTR(h), &n, &m); + free(s); + + n_new_dev += n; + n_removed_dev += m; + } + + if (n_new_dev > 0) + log_warning("%u new devices are found in re-scan", n_new_dev); + if (n_removed_dev > 0) + log_warning("%u devices removed in re-scan", n_removed_dev); + + return n_new_dev + n_removed_dev == 0; +} + +static bool test_sd_device_enumerator_filter_subsystem_trial_many(void) { + for (unsigned i = 0; i < 20; i++) { + log_debug("%s(): trial %u", __func__, i); + if (test_sd_device_enumerator_filter_subsystem_trial()) + return true; + } + + return false; +} + +static int on_inotify(sd_event_source *s, const struct inotify_event *event, void *userdata) { + if (test_sd_device_enumerator_filter_subsystem_trial_many()) + return sd_event_exit(sd_event_source_get_event(s), 0); + + return sd_event_exit(sd_event_source_get_event(s), -EBUSY); +} + +TEST(sd_device_enumerator_filter_subsystem) { + /* The test test_sd_device_enumerator_filter_subsystem_trial() is quite racy. Let's run the function + * several times after the udev queue becomes empty. */ + + if (!udev_available() || (access("/run/udev", F_OK) < 0 && errno == ENOENT)) { + assert_se(test_sd_device_enumerator_filter_subsystem_trial_many()); + return; + } + + _cleanup_(sd_event_unrefp) sd_event *event = NULL; + assert_se(sd_event_default(&event) >= 0); + assert_se(sd_event_add_inotify(event, NULL, "/run/udev" , IN_DELETE, on_inotify, NULL) >= 0); + + if (udev_queue_is_empty() == 0) { + log_debug("udev queue is not empty, waiting for all queued events to be processed."); + assert_se(sd_event_loop(event) >= 0); + } else + assert_se(test_sd_device_enumerator_filter_subsystem_trial_many()); +} + +TEST(sd_device_enumerator_add_match_sysattr) { + _cleanup_(sd_device_enumerator_unrefp) sd_device_enumerator *e = NULL; + sd_device *dev; + int ifindex; + + assert_se(sd_device_enumerator_new(&e) >= 0); + assert_se(sd_device_enumerator_allow_uninitialized(e) >= 0); + assert_se(sd_device_enumerator_add_match_subsystem(e, "net", true) >= 0); + assert_se(sd_device_enumerator_add_match_sysattr(e, "ifindex", "1", true) >= 0); + assert_se(sd_device_enumerator_add_match_sysattr(e, "ifindex", "hoge", true) >= 0); + assert_se(sd_device_enumerator_add_match_sysattr(e, "ifindex", "foo", true) >= 0); + assert_se(sd_device_enumerator_add_match_sysattr(e, "ifindex", "bar", false) >= 0); + assert_se(sd_device_enumerator_add_match_sysattr(e, "ifindex", "baz", false) >= 0); + + dev = sd_device_enumerator_get_device_first(e); + assert_se(dev); + assert_se(sd_device_get_ifindex(dev, &ifindex) >= 0); + assert_se(ifindex == 1); + + assert_se(!sd_device_enumerator_get_device_next(e)); +} + +TEST(sd_device_enumerator_add_match_property) { + _cleanup_(sd_device_enumerator_unrefp) sd_device_enumerator *e = NULL; + sd_device *dev; + int ifindex; + + assert_se(sd_device_enumerator_new(&e) >= 0); + assert_se(sd_device_enumerator_allow_uninitialized(e) >= 0); + assert_se(sd_device_enumerator_add_match_subsystem(e, "net", true) >= 0); + assert_se(sd_device_enumerator_add_match_sysattr(e, "ifindex", "1", true) >= 0); + assert_se(sd_device_enumerator_add_match_property(e, "IFINDE*", "1*") >= 0); + assert_se(sd_device_enumerator_add_match_property(e, "IFINDE*", "hoge") >= 0); + assert_se(sd_device_enumerator_add_match_property(e, "IFINDE*", NULL) >= 0); + assert_se(sd_device_enumerator_add_match_property(e, "AAAAA", "BBBB") >= 0); + assert_se(sd_device_enumerator_add_match_property(e, "FOOOO", NULL) >= 0); + + dev = sd_device_enumerator_get_device_first(e); + assert_se(dev); + assert_se(sd_device_get_ifindex(dev, &ifindex) >= 0); + assert_se(ifindex == 1); +} + +TEST(sd_device_enumerator_add_match_property_required) { + _cleanup_(sd_device_enumerator_unrefp) sd_device_enumerator *e = NULL; + sd_device *dev; + int ifindex; + + assert_se(sd_device_enumerator_new(&e) >= 0); + assert_se(sd_device_enumerator_allow_uninitialized(e) >= 0); + assert_se(sd_device_enumerator_add_match_subsystem(e, "net", true) >= 0); + assert_se(sd_device_enumerator_add_match_sysattr(e, "ifindex", "1", true) >= 0); + assert_se(sd_device_enumerator_add_match_property_required(e, "IFINDE*", "1*") >= 0); + + /* Only one required match which should be satisfied. */ + dev = sd_device_enumerator_get_device_first(e); + assert_se(dev); + assert_se(sd_device_get_ifindex(dev, &ifindex) >= 0); + assert_se(ifindex == 1); + + /* Now let's add a bunch of garbage properties which should not be satisfied. */ + assert_se(sd_device_enumerator_add_match_property_required(e, "IFINDE*", "hoge") >= 0); + assert_se(sd_device_enumerator_add_match_property_required(e, "IFINDE*", NULL) >= 0); + assert_se(sd_device_enumerator_add_match_property_required(e, "AAAAA", "BBBB") >= 0); + assert_se(sd_device_enumerator_add_match_property_required(e, "FOOOO", NULL) >= 0); + + assert_se(!sd_device_enumerator_get_device_first(e)); +} + +static void check_parent_match(sd_device_enumerator *e, sd_device *dev) { + const char *syspath; + bool found = false; + + assert_se(sd_device_get_syspath(dev, &syspath) >= 0); + + FOREACH_DEVICE(e, d) { + const char *s; + + assert_se(sd_device_get_syspath(d, &s) >= 0); + if (streq(s, syspath)) { + found = true; + break; + } + } + + if (!found) { + log_device_debug(dev, "not enumerated, already removed??"); + /* If the original device not found, then the device should be already removed. */ + assert_se(access(syspath, F_OK) < 0); + assert_se(errno == ENOENT); + } +} + +TEST(sd_device_enumerator_add_match_parent) { + _cleanup_(sd_device_enumerator_unrefp) sd_device_enumerator *e = NULL; + int r; + + assert_se(sd_device_enumerator_new(&e) >= 0); + assert_se(sd_device_enumerator_allow_uninitialized(e) >= 0); + /* See comments in TEST(sd_device_enumerator_devices). */ + assert_se(sd_device_enumerator_add_match_subsystem(e, "bdi", false) >= 0); + assert_se(sd_device_enumerator_add_nomatch_sysname(e, "loop*") >= 0); + assert_se(sd_device_enumerator_add_match_subsystem(e, "net", false) >= 0); + + if (!slow_tests_enabled()) + assert_se(sd_device_enumerator_add_match_subsystem(e, "block", true) >= 0); + + FOREACH_DEVICE(e, dev) { + _cleanup_(sd_device_enumerator_unrefp) sd_device_enumerator *p = NULL; + const char *syspath; + sd_device *parent; + + assert_se(sd_device_get_syspath(dev, &syspath) >= 0); + + r = sd_device_get_parent(dev, &parent); + if (r < 0) { + assert_se(ERRNO_IS_DEVICE_ABSENT(r)); + continue; + } + + log_debug("> %s", syspath); + + assert_se(sd_device_enumerator_new(&p) >= 0); + assert_se(sd_device_enumerator_allow_uninitialized(p) >= 0); + assert_se(sd_device_enumerator_add_match_parent(p, parent) >= 0); + + check_parent_match(p, dev); + + /* If the device does not have subsystem, then it is not enumerated. */ + r = sd_device_get_subsystem(parent, NULL); + if (r < 0) { + assert_se(r == -ENOENT); + continue; + } + check_parent_match(p, parent); + } +} + +TEST(sd_device_get_child) { + _cleanup_(sd_device_enumerator_unrefp) sd_device_enumerator *e = NULL; + int r; + + assert_se(sd_device_enumerator_new(&e) >= 0); + assert_se(sd_device_enumerator_allow_uninitialized(e) >= 0); + /* See comments in TEST(sd_device_enumerator_devices). */ + assert_se(sd_device_enumerator_add_match_subsystem(e, "bdi", false) >= 0); + assert_se(sd_device_enumerator_add_nomatch_sysname(e, "loop*") >= 0); + assert_se(sd_device_enumerator_add_match_subsystem(e, "net", false) >= 0); + + if (!slow_tests_enabled()) + assert_se(sd_device_enumerator_add_match_subsystem(e, "block", true) >= 0); + + FOREACH_DEVICE(e, dev) { + const char *syspath, *parent_syspath, *expected_suffix, *suffix; + sd_device *parent; + bool found = false; + + assert_se(sd_device_get_syspath(dev, &syspath) >= 0); + + r = sd_device_get_parent(dev, &parent); + if (r < 0) { + assert_se(ERRNO_IS_DEVICE_ABSENT(r)); + continue; + } + + assert_se(sd_device_get_syspath(parent, &parent_syspath) >= 0); + assert_se(expected_suffix = path_startswith(syspath, parent_syspath)); + + log_debug("> %s", syspath); + + FOREACH_DEVICE_CHILD_WITH_SUFFIX(parent, child, suffix) { + const char *s; + + assert_se(child); + assert_se(suffix); + + if (!streq(suffix, expected_suffix)) + continue; + + assert_se(sd_device_get_syspath(child, &s) >= 0); + assert_se(streq(s, syspath)); + found = true; + break; + } + assert_se(found); + } +} + +TEST(sd_device_new_from_nulstr) { + const char *devlinks = + "/dev/disk/by-partuuid/1290d63a-42cc-4c71-b87c-xxxxxxxxxxxx\0" + "/dev/disk/by-path/pci-0000:00:0f.0-scsi-0:0:0:0-part3\0" + "/dev/disk/by-label/Arch\\x20Linux\0" + "/dev/disk/by-uuid/a07b87e5-4af5-4a59-bde9-yyyyyyyyyyyy\0" + "/dev/disk/by-partlabel/Arch\\x20Linux\0" + "\0"; + + _cleanup_(sd_device_unrefp) sd_device *device = NULL, *from_nulstr = NULL; + _cleanup_free_ char *nulstr_copy = NULL; + const char *nulstr; + size_t len; + + assert_se(sd_device_new_from_syspath(&device, "/sys/class/net/lo") >= 0); + + /* Yeah, of course, setting devlink to the loopback interface is nonsense. But this is just a + * test for generating and parsing nulstr. For issue #17772. */ + NULSTR_FOREACH(devlink, devlinks) { + log_device_info(device, "setting devlink: %s", devlink); + assert_se(device_add_devlink(device, devlink) >= 0); + assert_se(set_contains(device->devlinks, devlink)); + } + + /* For issue #23799 */ + assert_se(device_add_tag(device, "tag1", false) >= 0); + assert_se(device_add_tag(device, "tag2", false) >= 0); + assert_se(device_add_tag(device, "current-tag1", true) >= 0); + assert_se(device_add_tag(device, "current-tag2", true) >= 0); + + /* These properties are necessary for device_new_from_nulstr(). See device_verify(). */ + assert_se(device_add_property_internal(device, "SEQNUM", "1") >= 0); + assert_se(device_add_property_internal(device, "ACTION", "change") >= 0); + + assert_se(device_get_properties_nulstr(device, &nulstr, &len) >= 0); + assert_se(nulstr_copy = newdup(char, nulstr, len)); + assert_se(device_new_from_nulstr(&from_nulstr, nulstr_copy, len) >= 0); + + assert_se(sd_device_has_tag(from_nulstr, "tag1") == 1); + assert_se(sd_device_has_tag(from_nulstr, "tag2") == 1); + assert_se(sd_device_has_tag(from_nulstr, "current-tag1") == 1); + assert_se(sd_device_has_tag(from_nulstr, "current-tag2") == 1); + assert_se(sd_device_has_current_tag(from_nulstr, "tag1") == 0); + assert_se(sd_device_has_current_tag(from_nulstr, "tag2") == 0); + assert_se(sd_device_has_current_tag(from_nulstr, "current-tag1") == 1); + assert_se(sd_device_has_current_tag(from_nulstr, "current-tag2") == 1); + + NULSTR_FOREACH(devlink, devlinks) { + log_device_info(from_nulstr, "checking devlink: %s", devlink); + assert_se(set_contains(from_nulstr->devlinks, devlink)); + } +} + +TEST(sd_device_new_from_path) { + _cleanup_(sd_device_enumerator_unrefp) sd_device_enumerator *e = NULL; + _cleanup_(rm_rf_physical_and_freep) char *tmpdir = NULL; + int r; + + assert_se(mkdtemp_malloc("/tmp/test-sd-device.XXXXXXX", &tmpdir) >= 0); + + assert_se(sd_device_enumerator_new(&e) >= 0); + assert_se(sd_device_enumerator_allow_uninitialized(e) >= 0); + assert_se(sd_device_enumerator_add_match_subsystem(e, "block", true) >= 0); + assert_se(sd_device_enumerator_add_nomatch_sysname(e, "loop*") >= 0); + assert_se(sd_device_enumerator_add_match_property(e, "DEVNAME", "*") >= 0); + + FOREACH_DEVICE(e, dev) { + _cleanup_(sd_device_unrefp) sd_device *d = NULL; + const char *syspath, *devpath, *sysname, *s; + _cleanup_free_ char *path = NULL; + + assert_se(sd_device_get_sysname(dev, &sysname) >= 0); + + log_debug("%s(%s)", __func__, sysname); + + assert_se(sd_device_get_syspath(dev, &syspath) >= 0); + assert_se(sd_device_new_from_path(&d, syspath) >= 0); + assert_se(sd_device_get_syspath(d, &s) >= 0); + assert_se(streq(s, syspath)); + d = sd_device_unref(d); + + assert_se(sd_device_get_devname(dev, &devpath) >= 0); + r = sd_device_new_from_path(&d, devpath); + if (r >= 0) { + assert_se(sd_device_get_syspath(d, &s) >= 0); + assert_se(streq(s, syspath)); + d = sd_device_unref(d); + } else + assert_se(r == -ENODEV || ERRNO_IS_PRIVILEGE(r)); + + assert_se(path = path_join(tmpdir, sysname)); + assert_se(symlink(syspath, path) >= 0); + assert_se(sd_device_new_from_path(&d, path) >= 0); + assert_se(sd_device_get_syspath(d, &s) >= 0); + assert_se(streq(s, syspath)); + } +} + +static void test_devname_from_devnum_one(const char *path) { + _cleanup_free_ char *resolved = NULL; + struct stat st; + + log_debug("> %s", path); + + if (stat(path, &st) < 0) { + assert_se(errno == ENOENT); + log_notice("Path %s not found, skipping test", path); + return; + } + + assert_se(devname_from_devnum(st.st_mode, st.st_rdev, &resolved) >= 0); + assert_se(path_equal(path, resolved)); + resolved = mfree(resolved); + assert_se(devname_from_stat_rdev(&st, &resolved) >= 0); + assert_se(path_equal(path, resolved)); +} + +TEST(devname_from_devnum) { + test_devname_from_devnum_one("/dev/null"); + test_devname_from_devnum_one("/dev/zero"); + test_devname_from_devnum_one("/dev/full"); + test_devname_from_devnum_one("/dev/random"); + test_devname_from_devnum_one("/dev/urandom"); + test_devname_from_devnum_one("/dev/tty"); + + if (is_device_node("/run/systemd/inaccessible/blk") > 0) { + test_devname_from_devnum_one("/run/systemd/inaccessible/chr"); + test_devname_from_devnum_one("/run/systemd/inaccessible/blk"); + } +} + +DEFINE_TEST_MAIN(LOG_INFO); diff --git a/src/libsystemd/sd-event/event-source.h b/src/libsystemd/sd-event/event-source.h new file mode 100644 index 0000000..f4e38d7 --- /dev/null +++ b/src/libsystemd/sd-event/event-source.h @@ -0,0 +1,239 @@ +#pragma once +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <sys/epoll.h> +#include <sys/timerfd.h> +#include <sys/wait.h> + +#include "sd-event.h" + +#include "hashmap.h" +#include "inotify-util.h" +#include "list.h" +#include "prioq.h" +#include "ratelimit.h" + +typedef enum EventSourceType { + SOURCE_IO, + SOURCE_TIME_REALTIME, + SOURCE_TIME_BOOTTIME, + SOURCE_TIME_MONOTONIC, + SOURCE_TIME_REALTIME_ALARM, + SOURCE_TIME_BOOTTIME_ALARM, + SOURCE_SIGNAL, + SOURCE_CHILD, + SOURCE_DEFER, + SOURCE_POST, + SOURCE_EXIT, + SOURCE_WATCHDOG, + SOURCE_INOTIFY, + SOURCE_MEMORY_PRESSURE, + _SOURCE_EVENT_SOURCE_TYPE_MAX, + _SOURCE_EVENT_SOURCE_TYPE_INVALID = -EINVAL, +} EventSourceType; + +/* All objects we use in epoll events start with this value, so that + * we know how to dispatch it */ +typedef enum WakeupType { + WAKEUP_NONE, + WAKEUP_EVENT_SOURCE, /* either I/O or pidfd wakeup */ + WAKEUP_CLOCK_DATA, + WAKEUP_SIGNAL_DATA, + WAKEUP_INOTIFY_DATA, + _WAKEUP_TYPE_MAX, + _WAKEUP_TYPE_INVALID = -EINVAL, +} WakeupType; + +struct inode_data; + +struct sd_event_source { + WakeupType wakeup; + + unsigned n_ref; + + sd_event *event; + void *userdata; + sd_event_handler_t prepare; + + char *description; + + EventSourceType type; + signed int enabled:3; + bool pending:1; + bool dispatching:1; + bool floating:1; + bool exit_on_failure:1; + bool ratelimited:1; + + int64_t priority; + unsigned pending_index; + unsigned prepare_index; + uint64_t pending_iteration; + uint64_t prepare_iteration; + + sd_event_destroy_t destroy_callback; + sd_event_handler_t ratelimit_expire_callback; + + LIST_FIELDS(sd_event_source, sources); + + RateLimit rate_limit; + + /* These are primarily fields relevant for time event sources, but since any event source can + * effectively become one when rate-limited, this is part of the common fields. */ + unsigned earliest_index; + unsigned latest_index; + + union { + struct { + sd_event_io_handler_t callback; + int fd; + uint32_t events; + uint32_t revents; + bool registered:1; + bool owned:1; + } io; + struct { + sd_event_time_handler_t callback; + usec_t next, accuracy; + } time; + struct { + sd_event_signal_handler_t callback; + struct signalfd_siginfo siginfo; + int sig; + bool unblock; + } signal; + struct { + sd_event_child_handler_t callback; + siginfo_t siginfo; + pid_t pid; + int options; + int pidfd; + bool registered:1; /* whether the pidfd is registered in the epoll */ + bool pidfd_owned:1; /* close pidfd when event source is freed */ + bool process_owned:1; /* kill+reap process when event source is freed */ + bool exited:1; /* true if process exited (i.e. if there's value in SIGKILLing it if we want to get rid of it) */ + bool waited:1; /* true if process was waited for (i.e. if there's value in waitid(P_PID)'ing it if we want to get rid of it) */ + } child; + struct { + sd_event_handler_t callback; + } defer; + struct { + sd_event_handler_t callback; + } post; + struct { + sd_event_handler_t callback; + unsigned prioq_index; + } exit; + struct { + sd_event_inotify_handler_t callback; + uint32_t mask; + struct inode_data *inode_data; + LIST_FIELDS(sd_event_source, by_inode_data); + } inotify; + struct { + int fd; + sd_event_handler_t callback; + void *write_buffer; + size_t write_buffer_size; + uint32_t events, revents; + LIST_FIELDS(sd_event_source, write_list); + bool registered:1; + bool locked:1; + bool in_write_list:1; + } memory_pressure; + }; +}; + +struct clock_data { + WakeupType wakeup; + int fd; + + /* For all clocks we maintain two priority queues each, one + * ordered for the earliest times the events may be + * dispatched, and one ordered by the latest times they must + * have been dispatched. The range between the top entries in + * the two prioqs is the time window we can freely schedule + * wakeups in */ + + Prioq *earliest; + Prioq *latest; + usec_t next; + + bool needs_rearm:1; +}; + +struct signal_data { + WakeupType wakeup; + + /* For each priority we maintain one signal fd, so that we + * only have to dequeue a single event per priority at a + * time. */ + + int fd; + int64_t priority; + sigset_t sigset; + sd_event_source *current; +}; + +/* A structure listing all event sources currently watching a specific inode */ +struct inode_data { + /* The identifier for the inode, the combination of the .st_dev + .st_ino fields of the file */ + ino_t ino; + dev_t dev; + + /* An fd of the inode to watch. The fd is kept open until the next iteration of the loop, so that we can + * rearrange the priority still until then, as we need the original inode to change the priority as we need to + * add a watch descriptor to the right inotify for the priority which we can only do if we have a handle to the + * original inode. We keep a list of all inode_data objects with an open fd in the to_close list (see below) of + * the sd-event object, so that it is efficient to close everything, before entering the next event loop + * iteration. */ + int fd; + + /* The inotify "watch descriptor" */ + int wd; + + /* The combination of the mask of all inotify watches on this inode we manage. This is also the mask that has + * most recently been set on the watch descriptor. */ + uint32_t combined_mask; + + /* All event sources subscribed to this inode */ + LIST_HEAD(sd_event_source, event_sources); + + /* The inotify object we watch this inode with */ + struct inotify_data *inotify_data; + + /* A linked list of all inode data objects with fds to close (see above) */ + LIST_FIELDS(struct inode_data, to_close); +}; + +/* A structure encapsulating an inotify fd */ +struct inotify_data { + WakeupType wakeup; + + /* For each priority we maintain one inotify fd, so that we only have to dequeue a single event per priority at + * a time */ + + int fd; + int64_t priority; + + Hashmap *inodes; /* The inode_data structures keyed by dev+ino */ + Hashmap *wd; /* The inode_data structures keyed by the watch descriptor for each */ + + /* The buffer we read inotify events into */ + union inotify_event_buffer buffer; + size_t buffer_filled; /* fill level of the buffer */ + + /* How many event sources are currently marked pending for this inotify. We won't read new events off the + * inotify fd as long as there are still pending events on the inotify (because we have no strategy of queuing + * the events locally if they can't be coalesced). */ + unsigned n_pending; + + /* If this counter is non-zero, don't GC the inotify data object even if not used to watch any inode + * anymore. This is useful to pin the object for a bit longer, after the last event source needing it + * is gone. */ + unsigned n_busy; + + /* A linked list of all inotify objects with data already read, that still need processing. We keep this list + * to make it efficient to figure out what inotify objects to process data on next. */ + LIST_FIELDS(struct inotify_data, buffered); +}; diff --git a/src/libsystemd/sd-event/event-util.c b/src/libsystemd/sd-event/event-util.c new file mode 100644 index 0000000..a310122 --- /dev/null +++ b/src/libsystemd/sd-event/event-util.c @@ -0,0 +1,153 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <errno.h> + +#include "event-source.h" +#include "event-util.h" +#include "fd-util.h" +#include "log.h" +#include "string-util.h" + +int event_reset_time( + sd_event *e, + sd_event_source **s, + clockid_t clock, + uint64_t usec, + uint64_t accuracy, + sd_event_time_handler_t callback, + void *userdata, + int64_t priority, + const char *description, + bool force_reset) { + + bool created = false; + int enabled, r; + clockid_t c; + + assert(e); + assert(s); + + if (*s) { + if (!force_reset) { + r = sd_event_source_get_enabled(*s, &enabled); + if (r < 0) + return log_debug_errno(r, "sd-event: Failed to query whether event source \"%s\" is enabled or not: %m", + strna((*s)->description ?: description)); + + if (enabled != SD_EVENT_OFF) + return 0; + } + + r = sd_event_source_get_time_clock(*s, &c); + if (r < 0) + return log_debug_errno(r, "sd-event: Failed to get clock id of event source \"%s\": %m", strna((*s)->description ?: description)); + + if (c != clock) + return log_debug_errno(SYNTHETIC_ERRNO(EINVAL), + "sd-event: Current clock id %i of event source \"%s\" is different from specified one %i.", + (int)c, + strna((*s)->description ?: description), + (int)clock); + + r = sd_event_source_set_time(*s, usec); + if (r < 0) + return log_debug_errno(r, "sd-event: Failed to set time for event source \"%s\": %m", strna((*s)->description ?: description)); + + r = sd_event_source_set_time_accuracy(*s, accuracy); + if (r < 0) + return log_debug_errno(r, "sd-event: Failed to set accuracy for event source \"%s\": %m", strna((*s)->description ?: description)); + + /* callback function is not updated, as we do not have sd_event_source_set_time_callback(). */ + + (void) sd_event_source_set_userdata(*s, userdata); + + r = sd_event_source_set_enabled(*s, SD_EVENT_ONESHOT); + if (r < 0) + return log_debug_errno(r, "sd-event: Failed to enable event source \"%s\": %m", strna((*s)->description ?: description)); + } else { + r = sd_event_add_time(e, s, clock, usec, accuracy, callback, userdata); + if (r < 0) + return log_debug_errno(r, "sd-event: Failed to create timer event \"%s\": %m", strna(description)); + + created = true; + } + + r = sd_event_source_set_priority(*s, priority); + if (r < 0) + return log_debug_errno(r, "sd-event: Failed to set priority for event source \"%s\": %m", strna((*s)->description ?: description)); + + if (description) { + r = sd_event_source_set_description(*s, description); + if (r < 0) + return log_debug_errno(r, "sd-event: Failed to set description for event source \"%s\": %m", description); + } + + return created; +} + +int event_reset_time_relative( + sd_event *e, + sd_event_source **s, + clockid_t clock, + uint64_t usec, + uint64_t accuracy, + sd_event_time_handler_t callback, + void *userdata, + int64_t priority, + const char *description, + bool force_reset) { + + int r; + + assert(e); + + if (usec > 0) { + usec_t usec_now; + + r = sd_event_now(e, clock, &usec_now); + if (r < 0) + return log_debug_errno(r, "sd-event: Failed to get the current time: %m"); + + usec = usec_add(usec_now, usec); + } + + return event_reset_time(e, s, clock, usec, accuracy, callback, userdata, priority, description, force_reset); +} + +int event_add_time_change(sd_event *e, sd_event_source **ret, sd_event_io_handler_t callback, void *userdata) { + _cleanup_(sd_event_source_unrefp) sd_event_source *s = NULL; + _cleanup_close_ int fd = -EBADF; + int r; + + assert(e); + + /* Allocates an IO event source that gets woken up whenever the clock changes. Needs to be recreated on each event */ + + fd = time_change_fd(); + if (fd < 0) + return fd; + + r = sd_event_add_io(e, &s, fd, EPOLLIN, callback, userdata); + if (r < 0) + return r; + + r = sd_event_source_set_io_fd_own(s, true); + if (r < 0) + return r; + + TAKE_FD(fd); + + r = sd_event_source_set_description(s, "time-change"); + if (r < 0) + return r; + + if (ret) + *ret = TAKE_PTR(s); + else { + r = sd_event_source_set_floating(s, true); + if (r < 0) + return r; + } + + return 0; +} diff --git a/src/libsystemd/sd-event/event-util.h b/src/libsystemd/sd-event/event-util.h new file mode 100644 index 0000000..c185584 --- /dev/null +++ b/src/libsystemd/sd-event/event-util.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include <stdbool.h> + +#include "sd-event.h" + +int event_reset_time( + sd_event *e, + sd_event_source **s, + clockid_t clock, + uint64_t usec, + uint64_t accuracy, + sd_event_time_handler_t callback, + void *userdata, + int64_t priority, + const char *description, + bool force_reset); +int event_reset_time_relative( + sd_event *e, + sd_event_source **s, + clockid_t clock, + uint64_t usec, + uint64_t accuracy, + sd_event_time_handler_t callback, + void *userdata, + int64_t priority, + const char *description, + bool force_reset); +static inline int event_source_disable(sd_event_source *s) { + return sd_event_source_set_enabled(s, SD_EVENT_OFF); +} + +int event_add_time_change(sd_event *e, sd_event_source **ret, sd_event_io_handler_t callback, void *userdata); diff --git a/src/libsystemd/sd-event/sd-event.c b/src/libsystemd/sd-event/sd-event.c new file mode 100644 index 0000000..288798a --- /dev/null +++ b/src/libsystemd/sd-event/sd-event.c @@ -0,0 +1,5357 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <sys/epoll.h> +#include <sys/timerfd.h> +#include <sys/wait.h> + +#include "sd-daemon.h" +#include "sd-event.h" +#include "sd-id128.h" +#include "sd-messages.h" + +#include "alloc-util.h" +#include "env-util.h" +#include "event-source.h" +#include "fd-util.h" +#include "fs-util.h" +#include "glyph-util.h" +#include "hashmap.h" +#include "hexdecoct.h" +#include "list.h" +#include "logarithm.h" +#include "macro.h" +#include "mallinfo-util.h" +#include "memory-util.h" +#include "missing_magic.h" +#include "missing_syscall.h" +#include "missing_threads.h" +#include "origin-id.h" +#include "path-util.h" +#include "prioq.h" +#include "process-util.h" +#include "psi-util.h" +#include "set.h" +#include "signal-util.h" +#include "socket-util.h" +#include "stat-util.h" +#include "string-table.h" +#include "string-util.h" +#include "strxcpyx.h" +#include "time-util.h" + +#define DEFAULT_ACCURACY_USEC (250 * USEC_PER_MSEC) + +static bool EVENT_SOURCE_WATCH_PIDFD(sd_event_source *s) { + /* Returns true if this is a PID event source and can be implemented by watching EPOLLIN */ + return s && + s->type == SOURCE_CHILD && + s->child.pidfd >= 0 && + s->child.options == WEXITED; +} + +static bool event_source_is_online(sd_event_source *s) { + assert(s); + return s->enabled != SD_EVENT_OFF && !s->ratelimited; +} + +static bool event_source_is_offline(sd_event_source *s) { + assert(s); + return s->enabled == SD_EVENT_OFF || s->ratelimited; +} + +static const char* const event_source_type_table[_SOURCE_EVENT_SOURCE_TYPE_MAX] = { + [SOURCE_IO] = "io", + [SOURCE_TIME_REALTIME] = "realtime", + [SOURCE_TIME_BOOTTIME] = "boottime", + [SOURCE_TIME_MONOTONIC] = "monotonic", + [SOURCE_TIME_REALTIME_ALARM] = "realtime-alarm", + [SOURCE_TIME_BOOTTIME_ALARM] = "boottime-alarm", + [SOURCE_SIGNAL] = "signal", + [SOURCE_CHILD] = "child", + [SOURCE_DEFER] = "defer", + [SOURCE_POST] = "post", + [SOURCE_EXIT] = "exit", + [SOURCE_WATCHDOG] = "watchdog", + [SOURCE_INOTIFY] = "inotify", + [SOURCE_MEMORY_PRESSURE] = "memory-pressure", +}; + +DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(event_source_type, int); + +#define EVENT_SOURCE_IS_TIME(t) \ + IN_SET((t), \ + SOURCE_TIME_REALTIME, \ + SOURCE_TIME_BOOTTIME, \ + SOURCE_TIME_MONOTONIC, \ + SOURCE_TIME_REALTIME_ALARM, \ + SOURCE_TIME_BOOTTIME_ALARM) + +#define EVENT_SOURCE_CAN_RATE_LIMIT(t) \ + IN_SET((t), \ + SOURCE_IO, \ + SOURCE_TIME_REALTIME, \ + SOURCE_TIME_BOOTTIME, \ + SOURCE_TIME_MONOTONIC, \ + SOURCE_TIME_REALTIME_ALARM, \ + SOURCE_TIME_BOOTTIME_ALARM, \ + SOURCE_SIGNAL, \ + SOURCE_DEFER, \ + SOURCE_INOTIFY, \ + SOURCE_MEMORY_PRESSURE) + +/* This is used to assert that we didn't pass an unexpected source type to event_source_time_prioq_put(). + * Time sources and ratelimited sources can be passed, so effectively this is the same as the + * EVENT_SOURCE_CAN_RATE_LIMIT() macro. */ +#define EVENT_SOURCE_USES_TIME_PRIOQ(t) EVENT_SOURCE_CAN_RATE_LIMIT(t) + +struct sd_event { + unsigned n_ref; + + int epoll_fd; + int watchdog_fd; + + Prioq *pending; + Prioq *prepare; + + /* timerfd_create() only supports these five clocks so far. We + * can add support for more clocks when the kernel learns to + * deal with them, too. */ + struct clock_data realtime; + struct clock_data boottime; + struct clock_data monotonic; + struct clock_data realtime_alarm; + struct clock_data boottime_alarm; + + usec_t perturb; + + sd_event_source **signal_sources; /* indexed by signal number */ + Hashmap *signal_data; /* indexed by priority */ + + Hashmap *child_sources; + unsigned n_online_child_sources; + + Set *post_sources; + + Prioq *exit; + + Hashmap *inotify_data; /* indexed by priority */ + + /* A list of inode structures that still have an fd open, that we need to close before the next loop iteration */ + LIST_HEAD(struct inode_data, inode_data_to_close_list); + + /* A list of inotify objects that already have events buffered which aren't processed yet */ + LIST_HEAD(struct inotify_data, buffered_inotify_data_list); + + /* A list of memory pressure event sources that still need their subscription string written */ + LIST_HEAD(sd_event_source, memory_pressure_write_list); + + uint64_t origin_id; + + uint64_t iteration; + triple_timestamp timestamp; + int state; + + bool exit_requested:1; + bool need_process_child:1; + bool watchdog:1; + bool profile_delays:1; + + int exit_code; + + pid_t tid; + sd_event **default_event_ptr; + + usec_t watchdog_last, watchdog_period; + + unsigned n_sources; + + struct epoll_event *event_queue; + + LIST_HEAD(sd_event_source, sources); + + sd_event_source *sigint_event_source, *sigterm_event_source; + + usec_t last_run_usec, last_log_usec; + unsigned delays[sizeof(usec_t) * 8]; +}; + +DEFINE_PRIVATE_ORIGIN_ID_HELPERS(sd_event, event); + +static thread_local sd_event *default_event = NULL; + +static void source_disconnect(sd_event_source *s); +static void event_gc_inode_data(sd_event *e, struct inode_data *d); + +static sd_event *event_resolve(sd_event *e) { + return e == SD_EVENT_DEFAULT ? default_event : e; +} + +static int pending_prioq_compare(const void *a, const void *b) { + const sd_event_source *x = a, *y = b; + int r; + + assert(x->pending); + assert(y->pending); + + /* Enabled ones first */ + r = CMP(x->enabled == SD_EVENT_OFF, y->enabled == SD_EVENT_OFF); + if (r != 0) + return r; + + /* Non rate-limited ones first. */ + r = CMP(!!x->ratelimited, !!y->ratelimited); + if (r != 0) + return r; + + /* Lower priority values first */ + r = CMP(x->priority, y->priority); + if (r != 0) + return r; + + /* Older entries first */ + return CMP(x->pending_iteration, y->pending_iteration); +} + +static int prepare_prioq_compare(const void *a, const void *b) { + const sd_event_source *x = a, *y = b; + int r; + + assert(x->prepare); + assert(y->prepare); + + /* Enabled ones first */ + r = CMP(x->enabled == SD_EVENT_OFF, y->enabled == SD_EVENT_OFF); + if (r != 0) + return r; + + /* Non rate-limited ones first. */ + r = CMP(!!x->ratelimited, !!y->ratelimited); + if (r != 0) + return r; + + /* Move most recently prepared ones last, so that we can stop + * preparing as soon as we hit one that has already been + * prepared in the current iteration */ + r = CMP(x->prepare_iteration, y->prepare_iteration); + if (r != 0) + return r; + + /* Lower priority values first */ + return CMP(x->priority, y->priority); +} + +static usec_t time_event_source_next(const sd_event_source *s) { + assert(s); + + /* We have two kinds of event sources that have elapsation times associated with them: the actual + * time based ones and the ones for which a ratelimit can be in effect (where we want to be notified + * once the ratelimit time window ends). Let's return the next elapsing time depending on what we are + * looking at here. */ + + if (s->ratelimited) { /* If rate-limited the next elapsation is when the ratelimit time window ends */ + assert(s->rate_limit.begin != 0); + assert(s->rate_limit.interval != 0); + return usec_add(s->rate_limit.begin, s->rate_limit.interval); + } + + /* Otherwise this must be a time event source, if not ratelimited */ + if (EVENT_SOURCE_IS_TIME(s->type)) + return s->time.next; + + return USEC_INFINITY; +} + +static usec_t time_event_source_latest(const sd_event_source *s) { + assert(s); + + if (s->ratelimited) { /* For ratelimited stuff the earliest and the latest time shall actually be the + * same, as we should avoid adding additional inaccuracy on an inaccuracy time + * window */ + assert(s->rate_limit.begin != 0); + assert(s->rate_limit.interval != 0); + return usec_add(s->rate_limit.begin, s->rate_limit.interval); + } + + /* Must be a time event source, if not ratelimited */ + if (EVENT_SOURCE_IS_TIME(s->type)) + return usec_add(s->time.next, s->time.accuracy); + + return USEC_INFINITY; +} + +static bool event_source_timer_candidate(const sd_event_source *s) { + assert(s); + + /* Returns true for event sources that either are not pending yet (i.e. where it's worth to mark them pending) + * or which are currently ratelimited (i.e. where it's worth leaving the ratelimited state) */ + return !s->pending || s->ratelimited; +} + +static int time_prioq_compare(const void *a, const void *b, usec_t (*time_func)(const sd_event_source *s)) { + const sd_event_source *x = a, *y = b; + int r; + + /* Enabled ones first */ + r = CMP(x->enabled == SD_EVENT_OFF, y->enabled == SD_EVENT_OFF); + if (r != 0) + return r; + + /* Order "non-pending OR ratelimited" before "pending AND not-ratelimited" */ + r = CMP(!event_source_timer_candidate(x), !event_source_timer_candidate(y)); + if (r != 0) + return r; + + /* Order by time */ + return CMP(time_func(x), time_func(y)); +} + +static int earliest_time_prioq_compare(const void *a, const void *b) { + return time_prioq_compare(a, b, time_event_source_next); +} + +static int latest_time_prioq_compare(const void *a, const void *b) { + return time_prioq_compare(a, b, time_event_source_latest); +} + +static int exit_prioq_compare(const void *a, const void *b) { + const sd_event_source *x = a, *y = b; + int r; + + assert(x->type == SOURCE_EXIT); + assert(y->type == SOURCE_EXIT); + + /* Enabled ones first */ + r = CMP(x->enabled == SD_EVENT_OFF, y->enabled == SD_EVENT_OFF); + if (r != 0) + return r; + + /* Lower priority values first */ + return CMP(x->priority, y->priority); +} + +static void free_clock_data(struct clock_data *d) { + assert(d); + assert(d->wakeup == WAKEUP_CLOCK_DATA); + + safe_close(d->fd); + prioq_free(d->earliest); + prioq_free(d->latest); +} + +static sd_event *event_free(sd_event *e) { + sd_event_source *s; + + assert(e); + + e->sigterm_event_source = sd_event_source_unref(e->sigterm_event_source); + e->sigint_event_source = sd_event_source_unref(e->sigint_event_source); + + while ((s = e->sources)) { + assert(s->floating); + source_disconnect(s); + sd_event_source_unref(s); + } + + assert(e->n_sources == 0); + + if (e->default_event_ptr) + *(e->default_event_ptr) = NULL; + + safe_close(e->epoll_fd); + safe_close(e->watchdog_fd); + + free_clock_data(&e->realtime); + free_clock_data(&e->boottime); + free_clock_data(&e->monotonic); + free_clock_data(&e->realtime_alarm); + free_clock_data(&e->boottime_alarm); + + prioq_free(e->pending); + prioq_free(e->prepare); + prioq_free(e->exit); + + free(e->signal_sources); + hashmap_free(e->signal_data); + + hashmap_free(e->inotify_data); + + hashmap_free(e->child_sources); + set_free(e->post_sources); + + free(e->event_queue); + + return mfree(e); +} + +_public_ int sd_event_new(sd_event** ret) { + sd_event *e; + int r; + + assert_return(ret, -EINVAL); + + e = new(sd_event, 1); + if (!e) + return -ENOMEM; + + *e = (sd_event) { + .n_ref = 1, + .epoll_fd = -EBADF, + .watchdog_fd = -EBADF, + .realtime.wakeup = WAKEUP_CLOCK_DATA, + .realtime.fd = -EBADF, + .realtime.next = USEC_INFINITY, + .boottime.wakeup = WAKEUP_CLOCK_DATA, + .boottime.fd = -EBADF, + .boottime.next = USEC_INFINITY, + .monotonic.wakeup = WAKEUP_CLOCK_DATA, + .monotonic.fd = -EBADF, + .monotonic.next = USEC_INFINITY, + .realtime_alarm.wakeup = WAKEUP_CLOCK_DATA, + .realtime_alarm.fd = -EBADF, + .realtime_alarm.next = USEC_INFINITY, + .boottime_alarm.wakeup = WAKEUP_CLOCK_DATA, + .boottime_alarm.fd = -EBADF, + .boottime_alarm.next = USEC_INFINITY, + .perturb = USEC_INFINITY, + .origin_id = origin_id_query(), + }; + + r = prioq_ensure_allocated(&e->pending, pending_prioq_compare); + if (r < 0) + goto fail; + + e->epoll_fd = epoll_create1(EPOLL_CLOEXEC); + if (e->epoll_fd < 0) { + r = -errno; + goto fail; + } + + e->epoll_fd = fd_move_above_stdio(e->epoll_fd); + + if (secure_getenv("SD_EVENT_PROFILE_DELAYS")) { + log_debug("Event loop profiling enabled. Logarithmic histogram of event loop iterations in the range 2^0 %s 2^63 us will be logged every 5s.", + special_glyph(SPECIAL_GLYPH_ELLIPSIS)); + e->profile_delays = true; + } + + *ret = e; + return 0; + +fail: + event_free(e); + return r; +} + +/* Define manually so we can add the origin check */ +_public_ sd_event *sd_event_ref(sd_event *e) { + if (!e) + return NULL; + if (event_origin_changed(e)) + return NULL; + + e->n_ref++; + + return e; +} + +_public_ sd_event* sd_event_unref(sd_event *e) { + if (!e) + return NULL; + if (event_origin_changed(e)) + return NULL; + + assert(e->n_ref > 0); + if (--e->n_ref > 0) + return NULL; + + return event_free(e); +} + +#define PROTECT_EVENT(e) \ + _unused_ _cleanup_(sd_event_unrefp) sd_event *_ref = sd_event_ref(e); + +_public_ sd_event_source* sd_event_source_disable_unref(sd_event_source *s) { + if (s) + (void) sd_event_source_set_enabled(s, SD_EVENT_OFF); + return sd_event_source_unref(s); +} + +static void source_io_unregister(sd_event_source *s) { + assert(s); + assert(s->type == SOURCE_IO); + + if (event_origin_changed(s->event)) + return; + + if (!s->io.registered) + return; + + if (epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, s->io.fd, NULL) < 0) + log_debug_errno(errno, "Failed to remove source %s (type %s) from epoll, ignoring: %m", + strna(s->description), event_source_type_to_string(s->type)); + + s->io.registered = false; +} + +static int source_io_register( + sd_event_source *s, + int enabled, + uint32_t events) { + + assert(s); + assert(s->type == SOURCE_IO); + assert(enabled != SD_EVENT_OFF); + + struct epoll_event ev = { + .events = events | (enabled == SD_EVENT_ONESHOT ? EPOLLONESHOT : 0), + .data.ptr = s, + }; + + if (epoll_ctl(s->event->epoll_fd, + s->io.registered ? EPOLL_CTL_MOD : EPOLL_CTL_ADD, + s->io.fd, &ev) < 0) + return -errno; + + s->io.registered = true; + + return 0; +} + +static void source_child_pidfd_unregister(sd_event_source *s) { + assert(s); + assert(s->type == SOURCE_CHILD); + + if (event_origin_changed(s->event)) + return; + + if (!s->child.registered) + return; + + if (EVENT_SOURCE_WATCH_PIDFD(s)) + if (epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, s->child.pidfd, NULL) < 0) + log_debug_errno(errno, "Failed to remove source %s (type %s) from epoll, ignoring: %m", + strna(s->description), event_source_type_to_string(s->type)); + + s->child.registered = false; +} + +static int source_child_pidfd_register(sd_event_source *s, int enabled) { + assert(s); + assert(s->type == SOURCE_CHILD); + assert(enabled != SD_EVENT_OFF); + + if (EVENT_SOURCE_WATCH_PIDFD(s)) { + struct epoll_event ev = { + .events = EPOLLIN | (enabled == SD_EVENT_ONESHOT ? EPOLLONESHOT : 0), + .data.ptr = s, + }; + + if (epoll_ctl(s->event->epoll_fd, + s->child.registered ? EPOLL_CTL_MOD : EPOLL_CTL_ADD, + s->child.pidfd, &ev) < 0) + return -errno; + } + + s->child.registered = true; + return 0; +} + +static void source_memory_pressure_unregister(sd_event_source *s) { + assert(s); + assert(s->type == SOURCE_MEMORY_PRESSURE); + + if (event_origin_changed(s->event)) + return; + + if (!s->memory_pressure.registered) + return; + + if (epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, s->memory_pressure.fd, NULL) < 0) + log_debug_errno(errno, "Failed to remove source %s (type %s) from epoll, ignoring: %m", + strna(s->description), event_source_type_to_string(s->type)); + + s->memory_pressure.registered = false; +} + +static int source_memory_pressure_register(sd_event_source *s, int enabled) { + assert(s); + assert(s->type == SOURCE_MEMORY_PRESSURE); + assert(enabled != SD_EVENT_OFF); + + struct epoll_event ev = { + .events = s->memory_pressure.write_buffer_size > 0 ? EPOLLOUT : + (s->memory_pressure.events | (enabled == SD_EVENT_ONESHOT ? EPOLLONESHOT : 0)), + .data.ptr = s, + }; + + if (epoll_ctl(s->event->epoll_fd, + s->memory_pressure.registered ? EPOLL_CTL_MOD : EPOLL_CTL_ADD, + s->memory_pressure.fd, &ev) < 0) + return -errno; + + s->memory_pressure.registered = true; + return 0; +} + +static void source_memory_pressure_add_to_write_list(sd_event_source *s) { + assert(s); + assert(s->type == SOURCE_MEMORY_PRESSURE); + + if (s->memory_pressure.in_write_list) + return; + + LIST_PREPEND(memory_pressure.write_list, s->event->memory_pressure_write_list, s); + s->memory_pressure.in_write_list = true; +} + +static void source_memory_pressure_remove_from_write_list(sd_event_source *s) { + assert(s); + assert(s->type == SOURCE_MEMORY_PRESSURE); + + if (!s->memory_pressure.in_write_list) + return; + + LIST_REMOVE(memory_pressure.write_list, s->event->memory_pressure_write_list, s); + s->memory_pressure.in_write_list = false; +} + +static clockid_t event_source_type_to_clock(EventSourceType t) { + + switch (t) { + + case SOURCE_TIME_REALTIME: + return CLOCK_REALTIME; + + case SOURCE_TIME_BOOTTIME: + return CLOCK_BOOTTIME; + + case SOURCE_TIME_MONOTONIC: + return CLOCK_MONOTONIC; + + case SOURCE_TIME_REALTIME_ALARM: + return CLOCK_REALTIME_ALARM; + + case SOURCE_TIME_BOOTTIME_ALARM: + return CLOCK_BOOTTIME_ALARM; + + default: + return (clockid_t) -1; + } +} + +static EventSourceType clock_to_event_source_type(clockid_t clock) { + + switch (clock) { + + case CLOCK_REALTIME: + return SOURCE_TIME_REALTIME; + + case CLOCK_BOOTTIME: + return SOURCE_TIME_BOOTTIME; + + case CLOCK_MONOTONIC: + return SOURCE_TIME_MONOTONIC; + + case CLOCK_REALTIME_ALARM: + return SOURCE_TIME_REALTIME_ALARM; + + case CLOCK_BOOTTIME_ALARM: + return SOURCE_TIME_BOOTTIME_ALARM; + + default: + return _SOURCE_EVENT_SOURCE_TYPE_INVALID; + } +} + +static struct clock_data* event_get_clock_data(sd_event *e, EventSourceType t) { + assert(e); + + switch (t) { + + case SOURCE_TIME_REALTIME: + return &e->realtime; + + case SOURCE_TIME_BOOTTIME: + return &e->boottime; + + case SOURCE_TIME_MONOTONIC: + return &e->monotonic; + + case SOURCE_TIME_REALTIME_ALARM: + return &e->realtime_alarm; + + case SOURCE_TIME_BOOTTIME_ALARM: + return &e->boottime_alarm; + + default: + return NULL; + } +} + +static void event_free_signal_data(sd_event *e, struct signal_data *d) { + assert(e); + + if (!d) + return; + + hashmap_remove(e->signal_data, &d->priority); + safe_close(d->fd); + free(d); +} + +static int event_make_signal_data( + sd_event *e, + int sig, + struct signal_data **ret) { + + struct signal_data *d; + bool added = false; + sigset_t ss_copy; + int64_t priority; + int r; + + assert(e); + + if (event_origin_changed(e)) + return -ECHILD; + + if (e->signal_sources && e->signal_sources[sig]) + priority = e->signal_sources[sig]->priority; + else + priority = SD_EVENT_PRIORITY_NORMAL; + + d = hashmap_get(e->signal_data, &priority); + if (d) { + if (sigismember(&d->sigset, sig) > 0) { + if (ret) + *ret = d; + return 0; + } + } else { + d = new(struct signal_data, 1); + if (!d) + return -ENOMEM; + + *d = (struct signal_data) { + .wakeup = WAKEUP_SIGNAL_DATA, + .fd = -EBADF, + .priority = priority, + }; + + r = hashmap_ensure_put(&e->signal_data, &uint64_hash_ops, &d->priority, d); + if (r < 0) { + free(d); + return r; + } + + added = true; + } + + ss_copy = d->sigset; + assert_se(sigaddset(&ss_copy, sig) >= 0); + + r = signalfd(d->fd >= 0 ? d->fd : -1, /* the first arg must be -1 or a valid signalfd */ + &ss_copy, + SFD_NONBLOCK|SFD_CLOEXEC); + if (r < 0) { + r = -errno; + goto fail; + } + + d->sigset = ss_copy; + + if (d->fd >= 0) { + if (ret) + *ret = d; + return 0; + } + + d->fd = fd_move_above_stdio(r); + + struct epoll_event ev = { + .events = EPOLLIN, + .data.ptr = d, + }; + + if (epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, d->fd, &ev) < 0) { + r = -errno; + goto fail; + } + + if (ret) + *ret = d; + + return 0; + +fail: + if (added) + event_free_signal_data(e, d); + + return r; +} + +static void event_unmask_signal_data(sd_event *e, struct signal_data *d, int sig) { + assert(e); + assert(d); + + /* Turns off the specified signal in the signal data + * object. If the signal mask of the object becomes empty that + * way removes it. */ + + if (sigismember(&d->sigset, sig) == 0) + return; + + assert_se(sigdelset(&d->sigset, sig) >= 0); + + if (sigisemptyset(&d->sigset)) { + /* If all the mask is all-zero we can get rid of the structure */ + event_free_signal_data(e, d); + return; + } + + if (event_origin_changed(e)) + return; + + assert(d->fd >= 0); + + if (signalfd(d->fd, &d->sigset, SFD_NONBLOCK|SFD_CLOEXEC) < 0) + log_debug_errno(errno, "Failed to unset signal bit, ignoring: %m"); +} + +static void event_gc_signal_data(sd_event *e, const int64_t *priority, int sig) { + struct signal_data *d; + static const int64_t zero_priority = 0; + + assert(e); + + /* Rechecks if the specified signal is still something we are interested in. If not, we'll unmask it, + * and possibly drop the signalfd for it. */ + + if (sig == SIGCHLD && + e->n_online_child_sources > 0) + return; + + if (e->signal_sources && + e->signal_sources[sig] && + event_source_is_online(e->signal_sources[sig])) + return; + + /* + * The specified signal might be enabled in three different queues: + * + * 1) the one that belongs to the priority passed (if it is non-NULL) + * 2) the one that belongs to the priority of the event source of the signal (if there is one) + * 3) the 0 priority (to cover the SIGCHLD case) + * + * Hence, let's remove it from all three here. + */ + + if (priority) { + d = hashmap_get(e->signal_data, priority); + if (d) + event_unmask_signal_data(e, d, sig); + } + + if (e->signal_sources && e->signal_sources[sig]) { + d = hashmap_get(e->signal_data, &e->signal_sources[sig]->priority); + if (d) + event_unmask_signal_data(e, d, sig); + } + + d = hashmap_get(e->signal_data, &zero_priority); + if (d) + event_unmask_signal_data(e, d, sig); +} + +static void event_source_pp_prioq_reshuffle(sd_event_source *s) { + assert(s); + + /* Reshuffles the pending + prepare prioqs. Called whenever the dispatch order changes, i.e. when + * they are enabled/disabled or marked pending and such. */ + + if (s->pending) + prioq_reshuffle(s->event->pending, s, &s->pending_index); + + if (s->prepare) + prioq_reshuffle(s->event->prepare, s, &s->prepare_index); +} + +static void event_source_time_prioq_reshuffle(sd_event_source *s) { + struct clock_data *d; + + assert(s); + + /* Called whenever the event source's timer ordering properties changed, i.e. time, accuracy, + * pending, enable state, and ratelimiting state. Makes sure the two prioq's are ordered + * properly again. */ + + if (s->ratelimited) + d = &s->event->monotonic; + else if (EVENT_SOURCE_IS_TIME(s->type)) + assert_se(d = event_get_clock_data(s->event, s->type)); + else + return; /* no-op for an event source which is neither a timer nor ratelimited. */ + + prioq_reshuffle(d->earliest, s, &s->earliest_index); + prioq_reshuffle(d->latest, s, &s->latest_index); + d->needs_rearm = true; +} + +static void event_source_time_prioq_remove( + sd_event_source *s, + struct clock_data *d) { + + assert(s); + assert(d); + + prioq_remove(d->earliest, s, &s->earliest_index); + prioq_remove(d->latest, s, &s->latest_index); + s->earliest_index = s->latest_index = PRIOQ_IDX_NULL; + d->needs_rearm = true; +} + +static void source_disconnect(sd_event_source *s) { + sd_event *event; + int r; + + assert(s); + + if (!s->event) + return; + + assert(s->event->n_sources > 0); + + switch (s->type) { + + case SOURCE_IO: + if (s->io.fd >= 0) + source_io_unregister(s); + + break; + + case SOURCE_TIME_REALTIME: + case SOURCE_TIME_BOOTTIME: + case SOURCE_TIME_MONOTONIC: + case SOURCE_TIME_REALTIME_ALARM: + case SOURCE_TIME_BOOTTIME_ALARM: + /* Only remove this event source from the time event source here if it is not ratelimited. If + * it is ratelimited, we'll remove it below, separately. Why? Because the clock used might + * differ: ratelimiting always uses CLOCK_MONOTONIC, but timer events might use any clock */ + + if (!s->ratelimited) { + struct clock_data *d; + assert_se(d = event_get_clock_data(s->event, s->type)); + event_source_time_prioq_remove(s, d); + } + + break; + + case SOURCE_SIGNAL: + if (s->signal.sig > 0) { + + if (s->event->signal_sources) + s->event->signal_sources[s->signal.sig] = NULL; + + event_gc_signal_data(s->event, &s->priority, s->signal.sig); + + if (s->signal.unblock) { + sigset_t new_ss; + + if (sigemptyset(&new_ss) < 0) + log_debug_errno(errno, "Failed to reset signal set, ignoring: %m"); + else if (sigaddset(&new_ss, s->signal.sig) < 0) + log_debug_errno(errno, "Failed to add signal %i to signal mask, ignoring: %m", s->signal.sig); + else { + r = pthread_sigmask(SIG_UNBLOCK, &new_ss, NULL); + if (r != 0) + log_debug_errno(r, "Failed to unblock signal %i, ignoring: %m", s->signal.sig); + } + } + } + + break; + + case SOURCE_CHILD: + if (event_origin_changed(s->event)) + s->child.process_owned = false; + + if (s->child.pid > 0) { + if (event_source_is_online(s)) { + assert(s->event->n_online_child_sources > 0); + s->event->n_online_child_sources--; + } + + (void) hashmap_remove(s->event->child_sources, PID_TO_PTR(s->child.pid)); + } + + if (EVENT_SOURCE_WATCH_PIDFD(s)) + source_child_pidfd_unregister(s); + else + event_gc_signal_data(s->event, &s->priority, SIGCHLD); + + break; + + case SOURCE_DEFER: + /* nothing */ + break; + + case SOURCE_POST: + set_remove(s->event->post_sources, s); + break; + + case SOURCE_EXIT: + prioq_remove(s->event->exit, s, &s->exit.prioq_index); + break; + + case SOURCE_INOTIFY: { + struct inode_data *inode_data; + + inode_data = s->inotify.inode_data; + if (inode_data) { + struct inotify_data *inotify_data; + assert_se(inotify_data = inode_data->inotify_data); + + /* Detach this event source from the inode object */ + LIST_REMOVE(inotify.by_inode_data, inode_data->event_sources, s); + s->inotify.inode_data = NULL; + + if (s->pending) { + assert(inotify_data->n_pending > 0); + inotify_data->n_pending--; + } + + /* Note that we don't reduce the inotify mask for the watch descriptor here if the inode is + * continued to being watched. That's because inotify doesn't really have an API for that: we + * can only change watch masks with access to the original inode either by fd or by path. But + * paths aren't stable, and keeping an O_PATH fd open all the time would mean wasting an fd + * continuously and keeping the mount busy which we can't really do. We could reconstruct the + * original inode from /proc/self/fdinfo/$INOTIFY_FD (as all watch descriptors are listed + * there), but given the need for open_by_handle_at() which is privileged and not universally + * available this would be quite an incomplete solution. Hence we go the other way, leave the + * mask set, even if it is not minimized now, and ignore all events we aren't interested in + * anymore after reception. Yes, this sucks, but … Linux … */ + + /* Maybe release the inode data (and its inotify) */ + event_gc_inode_data(s->event, inode_data); + } + + break; + } + + case SOURCE_MEMORY_PRESSURE: + source_memory_pressure_remove_from_write_list(s); + source_memory_pressure_unregister(s); + break; + + default: + assert_not_reached(); + } + + if (s->pending) + prioq_remove(s->event->pending, s, &s->pending_index); + + if (s->prepare) + prioq_remove(s->event->prepare, s, &s->prepare_index); + + if (s->ratelimited) + event_source_time_prioq_remove(s, &s->event->monotonic); + + event = TAKE_PTR(s->event); + LIST_REMOVE(sources, event->sources, s); + event->n_sources--; + + /* Note that we don't invalidate the type here, since we still need it in order to close the fd or + * pidfd associated with this event source, which we'll do only on source_free(). */ + + if (!s->floating) + sd_event_unref(event); +} + +static sd_event_source* source_free(sd_event_source *s) { + assert(s); + + source_disconnect(s); + + if (s->type == SOURCE_IO && s->io.owned) + s->io.fd = safe_close(s->io.fd); + + if (s->type == SOURCE_CHILD) { + /* Eventually the kernel will do this automatically for us, but for now let's emulate this (unreliably) in userspace. */ + + if (s->child.process_owned) { + + if (!s->child.exited) { + bool sent = false; + + if (s->child.pidfd >= 0) { + if (pidfd_send_signal(s->child.pidfd, SIGKILL, NULL, 0) < 0) { + if (errno == ESRCH) /* Already dead */ + sent = true; + else if (!ERRNO_IS_NOT_SUPPORTED(errno)) + log_debug_errno(errno, "Failed to kill process " PID_FMT " via pidfd_send_signal(), re-trying via kill(): %m", + s->child.pid); + } else + sent = true; + } + + if (!sent) + if (kill(s->child.pid, SIGKILL) < 0) + if (errno != ESRCH) /* Already dead */ + log_debug_errno(errno, "Failed to kill process " PID_FMT " via kill(), ignoring: %m", + s->child.pid); + } + + if (!s->child.waited) { + siginfo_t si = {}; + + /* Reap the child if we can */ + (void) waitid(P_PID, s->child.pid, &si, WEXITED); + } + } + + if (s->child.pidfd_owned) + s->child.pidfd = safe_close(s->child.pidfd); + } + + if (s->type == SOURCE_MEMORY_PRESSURE) { + s->memory_pressure.fd = safe_close(s->memory_pressure.fd); + s->memory_pressure.write_buffer = mfree(s->memory_pressure.write_buffer); + } + + if (s->destroy_callback) + s->destroy_callback(s->userdata); + + free(s->description); + return mfree(s); +} +DEFINE_TRIVIAL_CLEANUP_FUNC(sd_event_source*, source_free); + +static int source_set_pending(sd_event_source *s, bool b) { + int r; + + assert(s); + assert(s->type != SOURCE_EXIT); + + if (s->pending == b) + return 0; + + s->pending = b; + + if (b) { + s->pending_iteration = s->event->iteration; + + r = prioq_put(s->event->pending, s, &s->pending_index); + if (r < 0) { + s->pending = false; + return r; + } + } else + assert_se(prioq_remove(s->event->pending, s, &s->pending_index)); + + if (EVENT_SOURCE_IS_TIME(s->type)) + event_source_time_prioq_reshuffle(s); + + if (s->type == SOURCE_SIGNAL && !b) { + struct signal_data *d; + + d = hashmap_get(s->event->signal_data, &s->priority); + if (d && d->current == s) + d->current = NULL; + } + + if (s->type == SOURCE_INOTIFY) { + + assert(s->inotify.inode_data); + assert(s->inotify.inode_data->inotify_data); + + if (b) + s->inotify.inode_data->inotify_data->n_pending ++; + else { + assert(s->inotify.inode_data->inotify_data->n_pending > 0); + s->inotify.inode_data->inotify_data->n_pending --; + } + } + + return 1; +} + +static sd_event_source *source_new(sd_event *e, bool floating, EventSourceType type) { + + /* Let's allocate exactly what we need. Note that the difference of the smallest event source + * structure to the largest is 144 bytes on x86-64 at the time of writing, i.e. more than two cache + * lines. */ + static const size_t size_table[_SOURCE_EVENT_SOURCE_TYPE_MAX] = { + [SOURCE_IO] = endoffsetof_field(sd_event_source, io), + [SOURCE_TIME_REALTIME] = endoffsetof_field(sd_event_source, time), + [SOURCE_TIME_BOOTTIME] = endoffsetof_field(sd_event_source, time), + [SOURCE_TIME_MONOTONIC] = endoffsetof_field(sd_event_source, time), + [SOURCE_TIME_REALTIME_ALARM] = endoffsetof_field(sd_event_source, time), + [SOURCE_TIME_BOOTTIME_ALARM] = endoffsetof_field(sd_event_source, time), + [SOURCE_SIGNAL] = endoffsetof_field(sd_event_source, signal), + [SOURCE_CHILD] = endoffsetof_field(sd_event_source, child), + [SOURCE_DEFER] = endoffsetof_field(sd_event_source, defer), + [SOURCE_POST] = endoffsetof_field(sd_event_source, post), + [SOURCE_EXIT] = endoffsetof_field(sd_event_source, exit), + [SOURCE_INOTIFY] = endoffsetof_field(sd_event_source, inotify), + [SOURCE_MEMORY_PRESSURE] = endoffsetof_field(sd_event_source, memory_pressure), + }; + + sd_event_source *s; + + assert(e); + assert(type >= 0); + assert(type < _SOURCE_EVENT_SOURCE_TYPE_MAX); + assert(size_table[type] > 0); + + s = malloc0(size_table[type]); + if (!s) + return NULL; + /* We use expand_to_usable() here to tell gcc that it should consider this an object of the full + * size, even if we only allocate the initial part we need. */ + s = expand_to_usable(s, sizeof(sd_event_source)); + + /* Note: we cannot use compound initialization here, because sizeof(sd_event_source) is likely larger + * than what we allocated here. */ + s->n_ref = 1; + s->event = e; + s->floating = floating; + s->type = type; + s->pending_index = PRIOQ_IDX_NULL; + s->prepare_index = PRIOQ_IDX_NULL; + + if (!floating) + sd_event_ref(e); + + LIST_PREPEND(sources, e->sources, s); + e->n_sources++; + + return s; +} + +static int io_exit_callback(sd_event_source *s, int fd, uint32_t revents, void *userdata) { + assert(s); + + return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata)); +} + +_public_ int sd_event_add_io( + sd_event *e, + sd_event_source **ret, + int fd, + uint32_t events, + sd_event_io_handler_t callback, + void *userdata) { + + _cleanup_(source_freep) sd_event_source *s = NULL; + int r; + + assert_return(e, -EINVAL); + assert_return(e = event_resolve(e), -ENOPKG); + assert_return(fd >= 0, -EBADF); + assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL); + assert_return(e->state != SD_EVENT_FINISHED, -ESTALE); + assert_return(!event_origin_changed(e), -ECHILD); + + if (!callback) + callback = io_exit_callback; + + s = source_new(e, !ret, SOURCE_IO); + if (!s) + return -ENOMEM; + + s->wakeup = WAKEUP_EVENT_SOURCE; + s->io.fd = fd; + s->io.events = events; + s->io.callback = callback; + s->userdata = userdata; + s->enabled = SD_EVENT_ON; + + r = source_io_register(s, s->enabled, events); + if (r < 0) + return r; + + if (ret) + *ret = s; + TAKE_PTR(s); + + return 0; +} + +static void initialize_perturb(sd_event *e) { + sd_id128_t id = {}; + + /* When we sleep for longer, we try to realign the wakeup to the same time within each + * minute/second/250ms, so that events all across the system can be coalesced into a single CPU + * wakeup. However, let's take some system-specific randomness for this value, so that in a network + * of systems with synced clocks timer events are distributed a bit. Here, we calculate a + * perturbation usec offset from the boot ID (or machine ID if failed, e.g. /proc is not mounted). */ + + if (_likely_(e->perturb != USEC_INFINITY)) + return; + + if (sd_id128_get_boot(&id) >= 0 || sd_id128_get_machine(&id) >= 0) + e->perturb = (id.qwords[0] ^ id.qwords[1]) % USEC_PER_MINUTE; + else + e->perturb = 0; /* This is a super early process without /proc and /etc ?? */ +} + +static int event_setup_timer_fd( + sd_event *e, + struct clock_data *d, + clockid_t clock) { + + assert(e); + assert(d); + + if (_likely_(d->fd >= 0)) + return 0; + + _cleanup_close_ int fd = -EBADF; + + fd = timerfd_create(clock, TFD_NONBLOCK|TFD_CLOEXEC); + if (fd < 0) + return -errno; + + fd = fd_move_above_stdio(fd); + + struct epoll_event ev = { + .events = EPOLLIN, + .data.ptr = d, + }; + + if (epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, fd, &ev) < 0) + return -errno; + + d->fd = TAKE_FD(fd); + return 0; +} + +static int time_exit_callback(sd_event_source *s, uint64_t usec, void *userdata) { + assert(s); + + return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata)); +} + +static int setup_clock_data(sd_event *e, struct clock_data *d, clockid_t clock) { + int r; + + assert(d); + + if (d->fd < 0) { + r = event_setup_timer_fd(e, d, clock); + if (r < 0) + return r; + } + + r = prioq_ensure_allocated(&d->earliest, earliest_time_prioq_compare); + if (r < 0) + return r; + + r = prioq_ensure_allocated(&d->latest, latest_time_prioq_compare); + if (r < 0) + return r; + + return 0; +} + +static int event_source_time_prioq_put( + sd_event_source *s, + struct clock_data *d) { + + int r; + + assert(s); + assert(d); + assert(EVENT_SOURCE_USES_TIME_PRIOQ(s->type)); + + r = prioq_put(d->earliest, s, &s->earliest_index); + if (r < 0) + return r; + + r = prioq_put(d->latest, s, &s->latest_index); + if (r < 0) { + assert_se(prioq_remove(d->earliest, s, &s->earliest_index) > 0); + s->earliest_index = PRIOQ_IDX_NULL; + return r; + } + + d->needs_rearm = true; + return 0; +} + +_public_ int sd_event_add_time( + sd_event *e, + sd_event_source **ret, + clockid_t clock, + uint64_t usec, + uint64_t accuracy, + sd_event_time_handler_t callback, + void *userdata) { + + EventSourceType type; + _cleanup_(source_freep) sd_event_source *s = NULL; + struct clock_data *d; + int r; + + assert_return(e, -EINVAL); + assert_return(e = event_resolve(e), -ENOPKG); + assert_return(accuracy != UINT64_MAX, -EINVAL); + assert_return(e->state != SD_EVENT_FINISHED, -ESTALE); + assert_return(!event_origin_changed(e), -ECHILD); + + if (!clock_supported(clock)) /* Checks whether the kernel supports the clock */ + return -EOPNOTSUPP; + + type = clock_to_event_source_type(clock); /* checks whether sd-event supports this clock */ + if (type < 0) + return -EOPNOTSUPP; + + if (!callback) + callback = time_exit_callback; + + assert_se(d = event_get_clock_data(e, type)); + + r = setup_clock_data(e, d, clock); + if (r < 0) + return r; + + s = source_new(e, !ret, type); + if (!s) + return -ENOMEM; + + s->time.next = usec; + s->time.accuracy = accuracy == 0 ? DEFAULT_ACCURACY_USEC : accuracy; + s->time.callback = callback; + s->earliest_index = s->latest_index = PRIOQ_IDX_NULL; + s->userdata = userdata; + s->enabled = SD_EVENT_ONESHOT; + + r = event_source_time_prioq_put(s, d); + if (r < 0) + return r; + + if (ret) + *ret = s; + TAKE_PTR(s); + + return 0; +} + +_public_ int sd_event_add_time_relative( + sd_event *e, + sd_event_source **ret, + clockid_t clock, + uint64_t usec, + uint64_t accuracy, + sd_event_time_handler_t callback, + void *userdata) { + + usec_t t; + int r; + + /* Same as sd_event_add_time() but operates relative to the event loop's current point in time, and + * checks for overflow. */ + + r = sd_event_now(e, clock, &t); + if (r < 0) + return r; + + if (usec >= USEC_INFINITY - t) + return -EOVERFLOW; + + return sd_event_add_time(e, ret, clock, t + usec, accuracy, callback, userdata); +} + +static int signal_exit_callback(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) { + assert(s); + + return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata)); +} + +_public_ int sd_event_add_signal( + sd_event *e, + sd_event_source **ret, + int sig, + sd_event_signal_handler_t callback, + void *userdata) { + + _cleanup_(source_freep) sd_event_source *s = NULL; + struct signal_data *d; + sigset_t new_ss; + bool block_it; + int r; + + assert_return(e, -EINVAL); + assert_return(e = event_resolve(e), -ENOPKG); + assert_return(e->state != SD_EVENT_FINISHED, -ESTALE); + assert_return(!event_origin_changed(e), -ECHILD); + + /* Let's make sure our special flag stays outside of the valid signal range */ + assert_cc(_NSIG < SD_EVENT_SIGNAL_PROCMASK); + + if (sig & SD_EVENT_SIGNAL_PROCMASK) { + sig &= ~SD_EVENT_SIGNAL_PROCMASK; + assert_return(SIGNAL_VALID(sig), -EINVAL); + + block_it = true; + } else { + assert_return(SIGNAL_VALID(sig), -EINVAL); + + r = signal_is_blocked(sig); + if (r < 0) + return r; + if (r == 0) + return -EBUSY; + + block_it = false; + } + + if (!callback) + callback = signal_exit_callback; + + if (!e->signal_sources) { + e->signal_sources = new0(sd_event_source*, _NSIG); + if (!e->signal_sources) + return -ENOMEM; + } else if (e->signal_sources[sig]) + return -EBUSY; + + s = source_new(e, !ret, SOURCE_SIGNAL); + if (!s) + return -ENOMEM; + + s->signal.sig = sig; + s->signal.callback = callback; + s->userdata = userdata; + s->enabled = SD_EVENT_ON; + + e->signal_sources[sig] = s; + + if (block_it) { + sigset_t old_ss; + + if (sigemptyset(&new_ss) < 0) + return -errno; + + if (sigaddset(&new_ss, sig) < 0) + return -errno; + + r = pthread_sigmask(SIG_BLOCK, &new_ss, &old_ss); + if (r != 0) + return -r; + + r = sigismember(&old_ss, sig); + if (r < 0) + return -errno; + + s->signal.unblock = !r; + } else + s->signal.unblock = false; + + r = event_make_signal_data(e, sig, &d); + if (r < 0) { + if (s->signal.unblock) + (void) pthread_sigmask(SIG_UNBLOCK, &new_ss, NULL); + + return r; + } + + /* Use the signal name as description for the event source by default */ + (void) sd_event_source_set_description(s, signal_to_string(sig)); + + if (ret) + *ret = s; + TAKE_PTR(s); + + return 0; +} + +static int child_exit_callback(sd_event_source *s, const siginfo_t *si, void *userdata) { + assert(s); + + return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata)); +} + +static bool shall_use_pidfd(void) { + /* Mostly relevant for debugging, i.e. this is used in test-event.c to test the event loop once with and once without pidfd */ + return getenv_bool_secure("SYSTEMD_PIDFD") != 0; +} + +_public_ int sd_event_add_child( + sd_event *e, + sd_event_source **ret, + pid_t pid, + int options, + sd_event_child_handler_t callback, + void *userdata) { + + _cleanup_(source_freep) sd_event_source *s = NULL; + int r; + + assert_return(e, -EINVAL); + assert_return(e = event_resolve(e), -ENOPKG); + assert_return(pid > 1, -EINVAL); + assert_return(!(options & ~(WEXITED|WSTOPPED|WCONTINUED)), -EINVAL); + assert_return(options != 0, -EINVAL); + assert_return(e->state != SD_EVENT_FINISHED, -ESTALE); + assert_return(!event_origin_changed(e), -ECHILD); + + if (!callback) + callback = child_exit_callback; + + if (e->n_online_child_sources == 0) { + /* Caller must block SIGCHLD before using us to watch children, even if pidfd is available, + * for compatibility with pre-pidfd and because we don't want the reap the child processes + * ourselves, i.e. call waitid(), and don't want Linux' default internal logic for that to + * take effect. + * + * (As an optimization we only do this check on the first child event source created.) */ + r = signal_is_blocked(SIGCHLD); + if (r < 0) + return r; + if (r == 0) + return -EBUSY; + } + + r = hashmap_ensure_allocated(&e->child_sources, NULL); + if (r < 0) + return r; + + if (hashmap_contains(e->child_sources, PID_TO_PTR(pid))) + return -EBUSY; + + s = source_new(e, !ret, SOURCE_CHILD); + if (!s) + return -ENOMEM; + + s->wakeup = WAKEUP_EVENT_SOURCE; + s->child.options = options; + s->child.callback = callback; + s->userdata = userdata; + s->enabled = SD_EVENT_ONESHOT; + + /* We always take a pidfd here if we can, even if we wait for anything else than WEXITED, so that we + * pin the PID, and make regular waitid() handling race-free. */ + + if (shall_use_pidfd()) { + s->child.pidfd = pidfd_open(pid, 0); + if (s->child.pidfd < 0) { + /* Propagate errors unless the syscall is not supported or blocked */ + if (!ERRNO_IS_NOT_SUPPORTED(errno) && !ERRNO_IS_PRIVILEGE(errno)) + return -errno; + } else + s->child.pidfd_owned = true; /* If we allocate the pidfd we own it by default */ + } else + s->child.pidfd = -EBADF; + + if (EVENT_SOURCE_WATCH_PIDFD(s)) { + /* We have a pidfd and we only want to watch for exit */ + r = source_child_pidfd_register(s, s->enabled); + if (r < 0) + return r; + + } else { + /* We have no pidfd or we shall wait for some other event than WEXITED */ + r = event_make_signal_data(e, SIGCHLD, NULL); + if (r < 0) + return r; + + e->need_process_child = true; + } + + r = hashmap_put(e->child_sources, PID_TO_PTR(pid), s); + if (r < 0) + return r; + + /* These must be done after everything succeeds. */ + s->child.pid = pid; + e->n_online_child_sources++; + + if (ret) + *ret = s; + TAKE_PTR(s); + return 0; +} + +_public_ int sd_event_add_child_pidfd( + sd_event *e, + sd_event_source **ret, + int pidfd, + int options, + sd_event_child_handler_t callback, + void *userdata) { + + + _cleanup_(source_freep) sd_event_source *s = NULL; + pid_t pid; + int r; + + assert_return(e, -EINVAL); + assert_return(e = event_resolve(e), -ENOPKG); + assert_return(pidfd >= 0, -EBADF); + assert_return(!(options & ~(WEXITED|WSTOPPED|WCONTINUED)), -EINVAL); + assert_return(options != 0, -EINVAL); + assert_return(e->state != SD_EVENT_FINISHED, -ESTALE); + assert_return(!event_origin_changed(e), -ECHILD); + + if (!callback) + callback = child_exit_callback; + + if (e->n_online_child_sources == 0) { + r = signal_is_blocked(SIGCHLD); + if (r < 0) + return r; + if (r == 0) + return -EBUSY; + } + + r = hashmap_ensure_allocated(&e->child_sources, NULL); + if (r < 0) + return r; + + r = pidfd_get_pid(pidfd, &pid); + if (r < 0) + return r; + + if (hashmap_contains(e->child_sources, PID_TO_PTR(pid))) + return -EBUSY; + + s = source_new(e, !ret, SOURCE_CHILD); + if (!s) + return -ENOMEM; + + s->wakeup = WAKEUP_EVENT_SOURCE; + s->child.pidfd = pidfd; + s->child.pid = pid; + s->child.options = options; + s->child.callback = callback; + s->child.pidfd_owned = false; /* If we got the pidfd passed in we don't own it by default (similar to the IO fd case) */ + s->userdata = userdata; + s->enabled = SD_EVENT_ONESHOT; + + r = hashmap_put(e->child_sources, PID_TO_PTR(pid), s); + if (r < 0) + return r; + + if (EVENT_SOURCE_WATCH_PIDFD(s)) { + /* We only want to watch for WEXITED */ + r = source_child_pidfd_register(s, s->enabled); + if (r < 0) + return r; + } else { + /* We shall wait for some other event than WEXITED */ + r = event_make_signal_data(e, SIGCHLD, NULL); + if (r < 0) + return r; + + e->need_process_child = true; + } + + e->n_online_child_sources++; + + if (ret) + *ret = s; + TAKE_PTR(s); + return 0; +} + +static int generic_exit_callback(sd_event_source *s, void *userdata) { + assert(s); + + return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata)); +} + +_public_ int sd_event_add_defer( + sd_event *e, + sd_event_source **ret, + sd_event_handler_t callback, + void *userdata) { + + _cleanup_(source_freep) sd_event_source *s = NULL; + int r; + + assert_return(e, -EINVAL); + assert_return(e = event_resolve(e), -ENOPKG); + assert_return(e->state != SD_EVENT_FINISHED, -ESTALE); + assert_return(!event_origin_changed(e), -ECHILD); + + if (!callback) + callback = generic_exit_callback; + + s = source_new(e, !ret, SOURCE_DEFER); + if (!s) + return -ENOMEM; + + s->defer.callback = callback; + s->userdata = userdata; + s->enabled = SD_EVENT_ONESHOT; + + r = source_set_pending(s, true); + if (r < 0) + return r; + + if (ret) + *ret = s; + TAKE_PTR(s); + + return 0; +} + +_public_ int sd_event_add_post( + sd_event *e, + sd_event_source **ret, + sd_event_handler_t callback, + void *userdata) { + + _cleanup_(source_freep) sd_event_source *s = NULL; + int r; + + assert_return(e, -EINVAL); + assert_return(e = event_resolve(e), -ENOPKG); + assert_return(e->state != SD_EVENT_FINISHED, -ESTALE); + assert_return(!event_origin_changed(e), -ECHILD); + + if (!callback) + callback = generic_exit_callback; + + s = source_new(e, !ret, SOURCE_POST); + if (!s) + return -ENOMEM; + + s->post.callback = callback; + s->userdata = userdata; + s->enabled = SD_EVENT_ON; + + r = set_ensure_put(&e->post_sources, NULL, s); + if (r < 0) + return r; + assert(r > 0); + + if (ret) + *ret = s; + TAKE_PTR(s); + + return 0; +} + +_public_ int sd_event_add_exit( + sd_event *e, + sd_event_source **ret, + sd_event_handler_t callback, + void *userdata) { + + _cleanup_(source_freep) sd_event_source *s = NULL; + int r; + + assert_return(e, -EINVAL); + assert_return(e = event_resolve(e), -ENOPKG); + assert_return(callback, -EINVAL); + assert_return(e->state != SD_EVENT_FINISHED, -ESTALE); + assert_return(!event_origin_changed(e), -ECHILD); + + r = prioq_ensure_allocated(&e->exit, exit_prioq_compare); + if (r < 0) + return r; + + s = source_new(e, !ret, SOURCE_EXIT); + if (!s) + return -ENOMEM; + + s->exit.callback = callback; + s->userdata = userdata; + s->exit.prioq_index = PRIOQ_IDX_NULL; + s->enabled = SD_EVENT_ONESHOT; + + r = prioq_put(s->event->exit, s, &s->exit.prioq_index); + if (r < 0) + return r; + + if (ret) + *ret = s; + TAKE_PTR(s); + + return 0; +} + +_public_ int sd_event_trim_memory(void) { + int r; + + /* A default implementation of a memory pressure callback. Simply releases our own allocation caches + * and glibc's. This is automatically used when people call sd_event_add_memory_pressure() with a + * NULL callback parameter. */ + + log_debug("Memory pressure event, trimming malloc() memory."); + +#if HAVE_GENERIC_MALLINFO + generic_mallinfo before_mallinfo = generic_mallinfo_get(); +#endif + + usec_t before_timestamp = now(CLOCK_MONOTONIC); + hashmap_trim_pools(); + r = malloc_trim(0); + usec_t after_timestamp = now(CLOCK_MONOTONIC); + + if (r > 0) + log_debug("Successfully trimmed some memory."); + else + log_debug("Couldn't trim any memory."); + + usec_t period = after_timestamp - before_timestamp; + +#if HAVE_GENERIC_MALLINFO + generic_mallinfo after_mallinfo = generic_mallinfo_get(); + size_t l = LESS_BY((size_t) before_mallinfo.hblkhd, (size_t) after_mallinfo.hblkhd) + + LESS_BY((size_t) before_mallinfo.arena, (size_t) after_mallinfo.arena); + log_struct(LOG_DEBUG, + LOG_MESSAGE("Memory trimming took %s, returned %s to OS.", + FORMAT_TIMESPAN(period, 0), + FORMAT_BYTES(l)), + "MESSAGE_ID=" SD_MESSAGE_MEMORY_TRIM_STR, + "TRIMMED_BYTES=%zu", l, + "TRIMMED_USEC=" USEC_FMT, period); +#else + log_struct(LOG_DEBUG, + LOG_MESSAGE("Memory trimming took %s.", + FORMAT_TIMESPAN(period, 0)), + "MESSAGE_ID=" SD_MESSAGE_MEMORY_TRIM_STR, + "TRIMMED_USEC=" USEC_FMT, period); +#endif + + return 0; +} + +static int memory_pressure_callback(sd_event_source *s, void *userdata) { + assert(s); + + sd_event_trim_memory(); + return 0; +} + +_public_ int sd_event_add_memory_pressure( + sd_event *e, + sd_event_source **ret, + sd_event_handler_t callback, + void *userdata) { + + _cleanup_free_ char *w = NULL; + _cleanup_(source_freep) sd_event_source *s = NULL; + _cleanup_close_ int path_fd = -EBADF, fd = -EBADF; + _cleanup_free_ void *write_buffer = NULL; + const char *watch, *watch_fallback = NULL, *env; + size_t write_buffer_size = 0; + struct stat st; + uint32_t events; + bool locked; + int r; + + assert_return(e, -EINVAL); + assert_return(e = event_resolve(e), -ENOPKG); + assert_return(e->state != SD_EVENT_FINISHED, -ESTALE); + assert_return(!event_origin_changed(e), -ECHILD); + + if (!callback) + callback = memory_pressure_callback; + + s = source_new(e, !ret, SOURCE_MEMORY_PRESSURE); + if (!s) + return -ENOMEM; + + s->wakeup = WAKEUP_EVENT_SOURCE; + s->memory_pressure.callback = callback; + s->userdata = userdata; + s->enabled = SD_EVENT_ON; + s->memory_pressure.fd = -EBADF; + + env = secure_getenv("MEMORY_PRESSURE_WATCH"); + if (env) { + if (isempty(env) || path_equal(env, "/dev/null")) + return log_debug_errno(SYNTHETIC_ERRNO(EHOSTDOWN), + "Memory pressure logic is explicitly disabled via $MEMORY_PRESSURE_WATCH."); + + if (!path_is_absolute(env) || !path_is_normalized(env)) + return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG), + "$MEMORY_PRESSURE_WATCH set to invalid path: %s", env); + + watch = env; + + env = secure_getenv("MEMORY_PRESSURE_WRITE"); + if (env) { + r = unbase64mem(env, SIZE_MAX, &write_buffer, &write_buffer_size); + if (r < 0) + return r; + } + + locked = true; + } else { + + r = is_pressure_supported(); + if (r < 0) + return r; + if (r == 0) + return -EOPNOTSUPP; + + /* By default we want to watch memory pressure on the local cgroup, but we'll fall back on + * the system wide pressure if for some reason we cannot (which could be: memory controller + * not delegated to us, or PSI simply not available in the kernel). On legacy cgroupv1 we'll + * only use the system-wide logic. */ + r = cg_all_unified(); + if (r < 0) + return r; + if (r == 0) + watch = "/proc/pressure/memory"; + else { + _cleanup_free_ char *cg = NULL; + + r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, 0, &cg); + if (r < 0) + return r; + + w = path_join("/sys/fs/cgroup", cg, "memory.pressure"); + if (!w) + return -ENOMEM; + + watch = w; + watch_fallback = "/proc/pressure/memory"; + } + + /* Android uses three levels in its userspace low memory killer logic: + * some 70000 1000000 + * some 100000 1000000 + * full 70000 1000000 + * + * GNOME's low memory monitor uses: + * some 70000 1000000 + * some 100000 1000000 + * full 100000 1000000 + * + * We'll default to the middle level that both agree on. Except we do it on a 2s window + * (i.e. 200ms per 2s, rather than 100ms per 1s), because that's the window duration the + * kernel will allow us to do unprivileged, also in the future. */ + if (asprintf((char**) &write_buffer, + "%s " USEC_FMT " " USEC_FMT, + MEMORY_PRESSURE_DEFAULT_TYPE, + MEMORY_PRESSURE_DEFAULT_THRESHOLD_USEC, + MEMORY_PRESSURE_DEFAULT_WINDOW_USEC) < 0) + return -ENOMEM; + + write_buffer_size = strlen(write_buffer) + 1; + locked = false; + } + + path_fd = open(watch, O_PATH|O_CLOEXEC); + if (path_fd < 0) { + if (errno != ENOENT) + return -errno; + + /* We got ENOENT. Three options now: try the fallback if we have one, or return the error as + * is (if based on user/env config), or return -EOPNOTSUPP (because we picked the path, and + * the PSI service apparently is not supported) */ + if (!watch_fallback) + return locked ? -ENOENT : -EOPNOTSUPP; + + path_fd = open(watch_fallback, O_PATH|O_CLOEXEC); + if (path_fd < 0) { + if (errno == ENOENT) /* PSI is not available in the kernel even under the fallback path? */ + return -EOPNOTSUPP; + return -errno; + } + } + + if (fstat(path_fd, &st) < 0) + return -errno; + + if (S_ISSOCK(st.st_mode)) { + fd = socket(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0); + if (fd < 0) + return -errno; + + r = connect_unix_path(fd, path_fd, NULL); + if (r < 0) + return r; + + events = EPOLLIN; + + } else if (S_ISREG(st.st_mode) || S_ISFIFO(st.st_mode) || S_ISCHR(st.st_mode)) { + fd = fd_reopen(path_fd, (write_buffer_size > 0 ? O_RDWR : O_RDONLY) |O_CLOEXEC|O_NONBLOCK|O_NOCTTY); + if (fd < 0) + return fd; + + if (S_ISREG(st.st_mode)) { + struct statfs sfs; + + /* If this is a regular file validate this is a procfs or cgroupfs file, where we look for EPOLLPRI */ + + if (fstatfs(fd, &sfs) < 0) + return -errno; + + if (!is_fs_type(&sfs, PROC_SUPER_MAGIC) && + !is_fs_type(&sfs, CGROUP2_SUPER_MAGIC)) + return -ENOTTY; + + events = EPOLLPRI; + } else + /* For fifos and char devices just watch for EPOLLIN */ + events = EPOLLIN; + + } else if (S_ISDIR(st.st_mode)) + return -EISDIR; + else + return -EBADF; + + s->memory_pressure.fd = TAKE_FD(fd); + s->memory_pressure.write_buffer = TAKE_PTR(write_buffer); + s->memory_pressure.write_buffer_size = write_buffer_size; + s->memory_pressure.events = events; + s->memory_pressure.locked = locked; + + /* So here's the thing: if we are talking to PSI we need to write the watch string before adding the + * fd to epoll (if we ignore this, then the watch won't work). Hence we'll not actually register the + * fd with the epoll right-away. Instead, we just add the event source to a list of memory pressure + * event sources on which writes must be executed before the first event loop iteration is + * executed. (We could also write the data here, right away, but we want to give the caller the + * freedom to call sd_event_source_set_memory_pressure_type() and + * sd_event_source_set_memory_pressure_rate() before we write it. */ + + if (s->memory_pressure.write_buffer_size > 0) + source_memory_pressure_add_to_write_list(s); + else { + r = source_memory_pressure_register(s, s->enabled); + if (r < 0) + return r; + } + + if (ret) + *ret = s; + TAKE_PTR(s); + + return 0; +} + +static void event_free_inotify_data(sd_event *e, struct inotify_data *d) { + assert(e); + + if (!d) + return; + + assert(hashmap_isempty(d->inodes)); + assert(hashmap_isempty(d->wd)); + + if (d->buffer_filled > 0) + LIST_REMOVE(buffered, e->buffered_inotify_data_list, d); + + hashmap_free(d->inodes); + hashmap_free(d->wd); + + assert_se(hashmap_remove(e->inotify_data, &d->priority) == d); + + if (d->fd >= 0) { + if (!event_origin_changed(e) && + epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, d->fd, NULL) < 0) + log_debug_errno(errno, "Failed to remove inotify fd from epoll, ignoring: %m"); + + safe_close(d->fd); + } + free(d); +} + +static int event_make_inotify_data( + sd_event *e, + int64_t priority, + struct inotify_data **ret) { + + _cleanup_close_ int fd = -EBADF; + struct inotify_data *d; + int r; + + assert(e); + + d = hashmap_get(e->inotify_data, &priority); + if (d) { + if (ret) + *ret = d; + return 0; + } + + fd = inotify_init1(IN_NONBLOCK|O_CLOEXEC); + if (fd < 0) + return -errno; + + fd = fd_move_above_stdio(fd); + + d = new(struct inotify_data, 1); + if (!d) + return -ENOMEM; + + *d = (struct inotify_data) { + .wakeup = WAKEUP_INOTIFY_DATA, + .fd = TAKE_FD(fd), + .priority = priority, + }; + + r = hashmap_ensure_put(&e->inotify_data, &uint64_hash_ops, &d->priority, d); + if (r < 0) { + d->fd = safe_close(d->fd); + free(d); + return r; + } + + struct epoll_event ev = { + .events = EPOLLIN, + .data.ptr = d, + }; + + if (epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, d->fd, &ev) < 0) { + r = -errno; + d->fd = safe_close(d->fd); /* let's close this ourselves, as event_free_inotify_data() would otherwise + * remove the fd from the epoll first, which we don't want as we couldn't + * add it in the first place. */ + event_free_inotify_data(e, d); + return r; + } + + if (ret) + *ret = d; + + return 1; +} + +static int inode_data_compare(const struct inode_data *x, const struct inode_data *y) { + int r; + + assert(x); + assert(y); + + r = CMP(x->dev, y->dev); + if (r != 0) + return r; + + return CMP(x->ino, y->ino); +} + +static void inode_data_hash_func(const struct inode_data *d, struct siphash *state) { + assert(d); + + siphash24_compress(&d->dev, sizeof(d->dev), state); + siphash24_compress(&d->ino, sizeof(d->ino), state); +} + +DEFINE_PRIVATE_HASH_OPS(inode_data_hash_ops, struct inode_data, inode_data_hash_func, inode_data_compare); + +static void event_free_inode_data( + sd_event *e, + struct inode_data *d) { + + assert(e); + + if (!d) + return; + + assert(!d->event_sources); + + if (d->fd >= 0) { + LIST_REMOVE(to_close, e->inode_data_to_close_list, d); + safe_close(d->fd); + } + + if (d->inotify_data) { + + if (d->wd >= 0) { + if (d->inotify_data->fd >= 0 && !event_origin_changed(e)) { + /* So here's a problem. At the time this runs the watch descriptor might already be + * invalidated, because an IN_IGNORED event might be queued right the moment we enter + * the syscall. Hence, whenever we get EINVAL, ignore it entirely, since it's a very + * likely case to happen. */ + + if (inotify_rm_watch(d->inotify_data->fd, d->wd) < 0 && errno != EINVAL) + log_debug_errno(errno, "Failed to remove watch descriptor %i from inotify, ignoring: %m", d->wd); + } + + assert_se(hashmap_remove(d->inotify_data->wd, INT_TO_PTR(d->wd)) == d); + } + + assert_se(hashmap_remove(d->inotify_data->inodes, d) == d); + } + + free(d); +} + +static void event_gc_inotify_data( + sd_event *e, + struct inotify_data *d) { + + assert(e); + + /* GCs the inotify data object if we don't need it anymore. That's the case if we don't want to watch + * any inode with it anymore, which in turn happens if no event source of this priority is interested + * in any inode any longer. That said, we maintain an extra busy counter: if non-zero we'll delay GC + * (under the expectation that the GC is called again once the counter is decremented). */ + + if (!d) + return; + + if (!hashmap_isempty(d->inodes)) + return; + + if (d->n_busy > 0) + return; + + event_free_inotify_data(e, d); +} + +static void event_gc_inode_data( + sd_event *e, + struct inode_data *d) { + + struct inotify_data *inotify_data; + + assert(e); + + if (!d) + return; + + if (d->event_sources) + return; + + inotify_data = d->inotify_data; + event_free_inode_data(e, d); + + event_gc_inotify_data(e, inotify_data); +} + +static int event_make_inode_data( + sd_event *e, + struct inotify_data *inotify_data, + dev_t dev, + ino_t ino, + struct inode_data **ret) { + + struct inode_data *d, key; + int r; + + assert(e); + assert(inotify_data); + + key = (struct inode_data) { + .ino = ino, + .dev = dev, + }; + + d = hashmap_get(inotify_data->inodes, &key); + if (d) { + if (ret) + *ret = d; + + return 0; + } + + r = hashmap_ensure_allocated(&inotify_data->inodes, &inode_data_hash_ops); + if (r < 0) + return r; + + d = new(struct inode_data, 1); + if (!d) + return -ENOMEM; + + *d = (struct inode_data) { + .dev = dev, + .ino = ino, + .wd = -1, + .fd = -EBADF, + .inotify_data = inotify_data, + }; + + r = hashmap_put(inotify_data->inodes, d, d); + if (r < 0) { + free(d); + return r; + } + + if (ret) + *ret = d; + + return 1; +} + +static uint32_t inode_data_determine_mask(struct inode_data *d) { + bool excl_unlink = true; + uint32_t combined = 0; + + assert(d); + + /* Combines the watch masks of all event sources watching this inode. We generally just OR them together, but + * the IN_EXCL_UNLINK flag is ANDed instead. + * + * Note that we add all sources to the mask here, regardless whether enabled, disabled or oneshot. That's + * because we cannot change the mask anymore after the event source was created once, since the kernel has no + * API for that. Hence we need to subscribe to the maximum mask we ever might be interested in, and suppress + * events we don't care for client-side. */ + + LIST_FOREACH(inotify.by_inode_data, s, d->event_sources) { + + if ((s->inotify.mask & IN_EXCL_UNLINK) == 0) + excl_unlink = false; + + combined |= s->inotify.mask; + } + + return (combined & ~(IN_ONESHOT|IN_DONT_FOLLOW|IN_ONLYDIR|IN_EXCL_UNLINK)) | (excl_unlink ? IN_EXCL_UNLINK : 0); +} + +static int inode_data_realize_watch(sd_event *e, struct inode_data *d) { + uint32_t combined_mask; + int wd, r; + + assert(d); + assert(d->fd >= 0); + + combined_mask = inode_data_determine_mask(d); + + if (d->wd >= 0 && combined_mask == d->combined_mask) + return 0; + + r = hashmap_ensure_allocated(&d->inotify_data->wd, NULL); + if (r < 0) + return r; + + wd = inotify_add_watch_fd(d->inotify_data->fd, d->fd, combined_mask); + if (wd < 0) + return -errno; + + if (d->wd < 0) { + r = hashmap_put(d->inotify_data->wd, INT_TO_PTR(wd), d); + if (r < 0) { + (void) inotify_rm_watch(d->inotify_data->fd, wd); + return r; + } + + d->wd = wd; + + } else if (d->wd != wd) { + + log_debug("Weird, the watch descriptor we already knew for this inode changed?"); + (void) inotify_rm_watch(d->fd, wd); + return -EINVAL; + } + + d->combined_mask = combined_mask; + return 1; +} + +static int inotify_exit_callback(sd_event_source *s, const struct inotify_event *event, void *userdata) { + assert(s); + + return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata)); +} + +static int event_add_inotify_fd_internal( + sd_event *e, + sd_event_source **ret, + int fd, + bool donate, + uint32_t mask, + sd_event_inotify_handler_t callback, + void *userdata) { + + _cleanup_close_ int donated_fd = donate ? fd : -EBADF; + _cleanup_(source_freep) sd_event_source *s = NULL; + struct inotify_data *inotify_data = NULL; + struct inode_data *inode_data = NULL; + struct stat st; + int r; + + assert_return(e, -EINVAL); + assert_return(e = event_resolve(e), -ENOPKG); + assert_return(fd >= 0, -EBADF); + assert_return(e->state != SD_EVENT_FINISHED, -ESTALE); + assert_return(!event_origin_changed(e), -ECHILD); + + if (!callback) + callback = inotify_exit_callback; + + /* Refuse IN_MASK_ADD since we coalesce watches on the same inode, and hence really don't want to merge + * masks. Or in other words, this whole code exists only to manage IN_MASK_ADD type operations for you, hence + * the user can't use them for us. */ + if (mask & IN_MASK_ADD) + return -EINVAL; + + if (fstat(fd, &st) < 0) + return -errno; + + s = source_new(e, !ret, SOURCE_INOTIFY); + if (!s) + return -ENOMEM; + + s->enabled = mask & IN_ONESHOT ? SD_EVENT_ONESHOT : SD_EVENT_ON; + s->inotify.mask = mask; + s->inotify.callback = callback; + s->userdata = userdata; + + /* Allocate an inotify object for this priority, and an inode object within it */ + r = event_make_inotify_data(e, SD_EVENT_PRIORITY_NORMAL, &inotify_data); + if (r < 0) + return r; + + r = event_make_inode_data(e, inotify_data, st.st_dev, st.st_ino, &inode_data); + if (r < 0) { + event_gc_inotify_data(e, inotify_data); + return r; + } + + /* Keep the O_PATH fd around until the first iteration of the loop, so that we can still change the priority of + * the event source, until then, for which we need the original inode. */ + if (inode_data->fd < 0) { + if (donated_fd >= 0) + inode_data->fd = TAKE_FD(donated_fd); + else { + inode_data->fd = fcntl(fd, F_DUPFD_CLOEXEC, 3); + if (inode_data->fd < 0) { + r = -errno; + event_gc_inode_data(e, inode_data); + return r; + } + } + + LIST_PREPEND(to_close, e->inode_data_to_close_list, inode_data); + } + + /* Link our event source to the inode data object */ + LIST_PREPEND(inotify.by_inode_data, inode_data->event_sources, s); + s->inotify.inode_data = inode_data; + + /* Actually realize the watch now */ + r = inode_data_realize_watch(e, inode_data); + if (r < 0) + return r; + + if (ret) + *ret = s; + TAKE_PTR(s); + + return 0; +} + +_public_ int sd_event_add_inotify_fd( + sd_event *e, + sd_event_source **ret, + int fd, + uint32_t mask, + sd_event_inotify_handler_t callback, + void *userdata) { + + return event_add_inotify_fd_internal(e, ret, fd, /* donate= */ false, mask, callback, userdata); +} + +_public_ int sd_event_add_inotify( + sd_event *e, + sd_event_source **ret, + const char *path, + uint32_t mask, + sd_event_inotify_handler_t callback, + void *userdata) { + + sd_event_source *s = NULL; /* avoid false maybe-uninitialized warning */ + int fd, r; + + assert_return(path, -EINVAL); + + fd = open(path, O_PATH | O_CLOEXEC | + (mask & IN_ONLYDIR ? O_DIRECTORY : 0) | + (mask & IN_DONT_FOLLOW ? O_NOFOLLOW : 0)); + if (fd < 0) + return -errno; + + r = event_add_inotify_fd_internal(e, &s, fd, /* donate= */ true, mask, callback, userdata); + if (r < 0) + return r; + + (void) sd_event_source_set_description(s, path); + + if (ret) + *ret = s; + + return r; +} + +static sd_event_source* event_source_free(sd_event_source *s) { + if (!s) + return NULL; + + /* Here's a special hack: when we are called from a + * dispatch handler we won't free the event source + * immediately, but we will detach the fd from the + * epoll. This way it is safe for the caller to unref + * the event source and immediately close the fd, but + * we still retain a valid event source object after + * the callback. */ + + if (s->dispatching) + source_disconnect(s); + else + source_free(s); + + return NULL; +} + +DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(sd_event_source, sd_event_source, event_source_free); + +_public_ int sd_event_source_set_description(sd_event_source *s, const char *description) { + assert_return(s, -EINVAL); + assert_return(!event_origin_changed(s->event), -ECHILD); + + return free_and_strdup(&s->description, description); +} + +_public_ int sd_event_source_get_description(sd_event_source *s, const char **description) { + assert_return(s, -EINVAL); + assert_return(description, -EINVAL); + + if (!s->description) + return -ENXIO; + + *description = s->description; + return 0; +} + +_public_ sd_event *sd_event_source_get_event(sd_event_source *s) { + assert_return(s, NULL); + assert_return(!event_origin_changed(s->event), NULL); + + return s->event; +} + +_public_ int sd_event_source_get_pending(sd_event_source *s) { + assert_return(s, -EINVAL); + assert_return(s->type != SOURCE_EXIT, -EDOM); + assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE); + assert_return(!event_origin_changed(s->event), -ECHILD); + + return s->pending; +} + +_public_ int sd_event_source_get_io_fd(sd_event_source *s) { + assert_return(s, -EINVAL); + assert_return(s->type == SOURCE_IO, -EDOM); + assert_return(!event_origin_changed(s->event), -ECHILD); + + return s->io.fd; +} + +_public_ int sd_event_source_set_io_fd(sd_event_source *s, int fd) { + int r; + + assert_return(s, -EINVAL); + assert_return(fd >= 0, -EBADF); + assert_return(s->type == SOURCE_IO, -EDOM); + assert_return(!event_origin_changed(s->event), -ECHILD); + + if (s->io.fd == fd) + return 0; + + if (event_source_is_offline(s)) { + s->io.fd = fd; + s->io.registered = false; + } else { + int saved_fd; + + saved_fd = s->io.fd; + assert(s->io.registered); + + s->io.fd = fd; + s->io.registered = false; + + r = source_io_register(s, s->enabled, s->io.events); + if (r < 0) { + s->io.fd = saved_fd; + s->io.registered = true; + return r; + } + + (void) epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, saved_fd, NULL); + } + + return 0; +} + +_public_ int sd_event_source_get_io_fd_own(sd_event_source *s) { + assert_return(s, -EINVAL); + assert_return(s->type == SOURCE_IO, -EDOM); + assert_return(!event_origin_changed(s->event), -ECHILD); + + return s->io.owned; +} + +_public_ int sd_event_source_set_io_fd_own(sd_event_source *s, int own) { + assert_return(s, -EINVAL); + assert_return(s->type == SOURCE_IO, -EDOM); + assert_return(!event_origin_changed(s->event), -ECHILD); + + s->io.owned = own; + return 0; +} + +_public_ int sd_event_source_get_io_events(sd_event_source *s, uint32_t* events) { + assert_return(s, -EINVAL); + assert_return(events, -EINVAL); + assert_return(s->type == SOURCE_IO, -EDOM); + assert_return(!event_origin_changed(s->event), -ECHILD); + + *events = s->io.events; + return 0; +} + +_public_ int sd_event_source_set_io_events(sd_event_source *s, uint32_t events) { + int r; + + assert_return(s, -EINVAL); + assert_return(s->type == SOURCE_IO, -EDOM); + assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL); + assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE); + assert_return(!event_origin_changed(s->event), -ECHILD); + + /* edge-triggered updates are never skipped, so we can reset edges */ + if (s->io.events == events && !(events & EPOLLET)) + return 0; + + r = source_set_pending(s, false); + if (r < 0) + return r; + + if (event_source_is_online(s)) { + r = source_io_register(s, s->enabled, events); + if (r < 0) + return r; + } + + s->io.events = events; + + return 0; +} + +_public_ int sd_event_source_get_io_revents(sd_event_source *s, uint32_t* revents) { + assert_return(s, -EINVAL); + assert_return(revents, -EINVAL); + assert_return(s->type == SOURCE_IO, -EDOM); + assert_return(s->pending, -ENODATA); + assert_return(!event_origin_changed(s->event), -ECHILD); + + *revents = s->io.revents; + return 0; +} + +_public_ int sd_event_source_get_signal(sd_event_source *s) { + assert_return(s, -EINVAL); + assert_return(s->type == SOURCE_SIGNAL, -EDOM); + assert_return(!event_origin_changed(s->event), -ECHILD); + + return s->signal.sig; +} + +_public_ int sd_event_source_get_priority(sd_event_source *s, int64_t *priority) { + assert_return(s, -EINVAL); + assert_return(!event_origin_changed(s->event), -ECHILD); + + *priority = s->priority; + return 0; +} + +_public_ int sd_event_source_set_priority(sd_event_source *s, int64_t priority) { + bool rm_inotify = false, rm_inode = false; + struct inotify_data *new_inotify_data = NULL; + struct inode_data *new_inode_data = NULL; + int r; + + assert_return(s, -EINVAL); + assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE); + assert_return(!event_origin_changed(s->event), -ECHILD); + + if (s->priority == priority) + return 0; + + if (s->type == SOURCE_INOTIFY) { + struct inode_data *old_inode_data; + + assert(s->inotify.inode_data); + old_inode_data = s->inotify.inode_data; + + /* We need the original fd to change the priority. If we don't have it we can't change the priority, + * anymore. Note that we close any fds when entering the next event loop iteration, i.e. for inotify + * events we allow priority changes only until the first following iteration. */ + if (old_inode_data->fd < 0) + return -EOPNOTSUPP; + + r = event_make_inotify_data(s->event, priority, &new_inotify_data); + if (r < 0) + return r; + rm_inotify = r > 0; + + r = event_make_inode_data(s->event, new_inotify_data, old_inode_data->dev, old_inode_data->ino, &new_inode_data); + if (r < 0) + goto fail; + rm_inode = r > 0; + + if (new_inode_data->fd < 0) { + /* Duplicate the fd for the new inode object if we don't have any yet */ + new_inode_data->fd = fcntl(old_inode_data->fd, F_DUPFD_CLOEXEC, 3); + if (new_inode_data->fd < 0) { + r = -errno; + goto fail; + } + + LIST_PREPEND(to_close, s->event->inode_data_to_close_list, new_inode_data); + } + + /* Move the event source to the new inode data structure */ + LIST_REMOVE(inotify.by_inode_data, old_inode_data->event_sources, s); + LIST_PREPEND(inotify.by_inode_data, new_inode_data->event_sources, s); + s->inotify.inode_data = new_inode_data; + + /* Now create the new watch */ + r = inode_data_realize_watch(s->event, new_inode_data); + if (r < 0) { + /* Move it back */ + LIST_REMOVE(inotify.by_inode_data, new_inode_data->event_sources, s); + LIST_PREPEND(inotify.by_inode_data, old_inode_data->event_sources, s); + s->inotify.inode_data = old_inode_data; + goto fail; + } + + s->priority = priority; + + event_gc_inode_data(s->event, old_inode_data); + + } else if (s->type == SOURCE_SIGNAL && event_source_is_online(s)) { + struct signal_data *old, *d; + + /* Move us from the signalfd belonging to the old + * priority to the signalfd of the new priority */ + + assert_se(old = hashmap_get(s->event->signal_data, &s->priority)); + + s->priority = priority; + + r = event_make_signal_data(s->event, s->signal.sig, &d); + if (r < 0) { + s->priority = old->priority; + return r; + } + + event_unmask_signal_data(s->event, old, s->signal.sig); + } else + s->priority = priority; + + event_source_pp_prioq_reshuffle(s); + + if (s->type == SOURCE_EXIT) + prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index); + + return 0; + +fail: + if (rm_inode) + event_free_inode_data(s->event, new_inode_data); + + if (rm_inotify) + event_free_inotify_data(s->event, new_inotify_data); + + return r; +} + +_public_ int sd_event_source_get_enabled(sd_event_source *s, int *ret) { + /* Quick mode: the event source doesn't exist and we only want to query boolean enablement state. */ + if (!s && !ret) + return false; + + assert_return(s, -EINVAL); + assert_return(!event_origin_changed(s->event), -ECHILD); + + if (ret) + *ret = s->enabled; + + return s->enabled != SD_EVENT_OFF; +} + +static int event_source_offline( + sd_event_source *s, + int enabled, + bool ratelimited) { + + bool was_offline; + int r; + + assert(s); + assert(enabled == SD_EVENT_OFF || ratelimited); + + /* Unset the pending flag when this event source is disabled */ + if (s->enabled != SD_EVENT_OFF && + enabled == SD_EVENT_OFF && + !IN_SET(s->type, SOURCE_DEFER, SOURCE_EXIT)) { + r = source_set_pending(s, false); + if (r < 0) + return r; + } + + was_offline = event_source_is_offline(s); + s->enabled = enabled; + s->ratelimited = ratelimited; + + switch (s->type) { + + case SOURCE_IO: + source_io_unregister(s); + break; + + case SOURCE_SIGNAL: + event_gc_signal_data(s->event, &s->priority, s->signal.sig); + break; + + case SOURCE_CHILD: + if (!was_offline) { + assert(s->event->n_online_child_sources > 0); + s->event->n_online_child_sources--; + } + + if (EVENT_SOURCE_WATCH_PIDFD(s)) + source_child_pidfd_unregister(s); + else + event_gc_signal_data(s->event, &s->priority, SIGCHLD); + break; + + case SOURCE_EXIT: + prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index); + break; + + case SOURCE_MEMORY_PRESSURE: + source_memory_pressure_unregister(s); + break; + + case SOURCE_TIME_REALTIME: + case SOURCE_TIME_BOOTTIME: + case SOURCE_TIME_MONOTONIC: + case SOURCE_TIME_REALTIME_ALARM: + case SOURCE_TIME_BOOTTIME_ALARM: + case SOURCE_DEFER: + case SOURCE_POST: + case SOURCE_INOTIFY: + break; + + default: + assert_not_reached(); + } + + /* Always reshuffle time prioq, as the ratelimited flag may be changed. */ + event_source_time_prioq_reshuffle(s); + + return 1; +} + +static int event_source_online( + sd_event_source *s, + int enabled, + bool ratelimited) { + + bool was_online; + int r; + + assert(s); + assert(enabled != SD_EVENT_OFF || !ratelimited); + + /* Unset the pending flag when this event source is enabled */ + if (s->enabled == SD_EVENT_OFF && + enabled != SD_EVENT_OFF && + !IN_SET(s->type, SOURCE_DEFER, SOURCE_EXIT)) { + r = source_set_pending(s, false); + if (r < 0) + return r; + } + + /* Are we really ready for onlining? */ + if (enabled == SD_EVENT_OFF || ratelimited) { + /* Nope, we are not ready for onlining, then just update the precise state and exit */ + s->enabled = enabled; + s->ratelimited = ratelimited; + return 0; + } + + was_online = event_source_is_online(s); + + switch (s->type) { + case SOURCE_IO: + r = source_io_register(s, enabled, s->io.events); + if (r < 0) + return r; + break; + + case SOURCE_SIGNAL: + r = event_make_signal_data(s->event, s->signal.sig, NULL); + if (r < 0) { + event_gc_signal_data(s->event, &s->priority, s->signal.sig); + return r; + } + + break; + + case SOURCE_CHILD: + if (EVENT_SOURCE_WATCH_PIDFD(s)) { + /* yes, we have pidfd */ + + r = source_child_pidfd_register(s, enabled); + if (r < 0) + return r; + } else { + /* no pidfd, or something other to watch for than WEXITED */ + + r = event_make_signal_data(s->event, SIGCHLD, NULL); + if (r < 0) { + event_gc_signal_data(s->event, &s->priority, SIGCHLD); + return r; + } + } + + if (!was_online) + s->event->n_online_child_sources++; + break; + + case SOURCE_MEMORY_PRESSURE: + r = source_memory_pressure_register(s, enabled); + if (r < 0) + return r; + + break; + + case SOURCE_TIME_REALTIME: + case SOURCE_TIME_BOOTTIME: + case SOURCE_TIME_MONOTONIC: + case SOURCE_TIME_REALTIME_ALARM: + case SOURCE_TIME_BOOTTIME_ALARM: + case SOURCE_EXIT: + case SOURCE_DEFER: + case SOURCE_POST: + case SOURCE_INOTIFY: + break; + + default: + assert_not_reached(); + } + + s->enabled = enabled; + s->ratelimited = ratelimited; + + /* Non-failing operations below */ + if (s->type == SOURCE_EXIT) + prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index); + + /* Always reshuffle time prioq, as the ratelimited flag may be changed. */ + event_source_time_prioq_reshuffle(s); + + return 1; +} + +_public_ int sd_event_source_set_enabled(sd_event_source *s, int m) { + int r; + + assert_return(IN_SET(m, SD_EVENT_OFF, SD_EVENT_ON, SD_EVENT_ONESHOT), -EINVAL); + + /* Quick mode: if the source doesn't exist, SD_EVENT_OFF is a noop. */ + if (m == SD_EVENT_OFF && !s) + return 0; + + assert_return(s, -EINVAL); + assert_return(!event_origin_changed(s->event), -ECHILD); + + /* If we are dead anyway, we are fine with turning off sources, but everything else needs to fail. */ + if (s->event->state == SD_EVENT_FINISHED) + return m == SD_EVENT_OFF ? 0 : -ESTALE; + + if (s->enabled == m) /* No change? */ + return 0; + + if (m == SD_EVENT_OFF) + r = event_source_offline(s, m, s->ratelimited); + else { + if (s->enabled != SD_EVENT_OFF) { + /* Switching from "on" to "oneshot" or back? If that's the case, we can take a shortcut, the + * event source is already enabled after all. */ + s->enabled = m; + return 0; + } + + r = event_source_online(s, m, s->ratelimited); + } + if (r < 0) + return r; + + event_source_pp_prioq_reshuffle(s); + return 0; +} + +_public_ int sd_event_source_get_time(sd_event_source *s, uint64_t *usec) { + assert_return(s, -EINVAL); + assert_return(usec, -EINVAL); + assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM); + assert_return(!event_origin_changed(s->event), -ECHILD); + + *usec = s->time.next; + return 0; +} + +_public_ int sd_event_source_set_time(sd_event_source *s, uint64_t usec) { + int r; + + assert_return(s, -EINVAL); + assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM); + assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE); + assert_return(!event_origin_changed(s->event), -ECHILD); + + r = source_set_pending(s, false); + if (r < 0) + return r; + + s->time.next = usec; + + event_source_time_prioq_reshuffle(s); + return 0; +} + +_public_ int sd_event_source_set_time_relative(sd_event_source *s, uint64_t usec) { + usec_t t; + int r; + + assert_return(s, -EINVAL); + assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM); + assert_return(!event_origin_changed(s->event), -ECHILD); + + if (usec == USEC_INFINITY) + return sd_event_source_set_time(s, USEC_INFINITY); + + r = sd_event_now(s->event, event_source_type_to_clock(s->type), &t); + if (r < 0) + return r; + + usec = usec_add(t, usec); + if (usec == USEC_INFINITY) + return -EOVERFLOW; + + return sd_event_source_set_time(s, usec); +} + +_public_ int sd_event_source_get_time_accuracy(sd_event_source *s, uint64_t *usec) { + assert_return(s, -EINVAL); + assert_return(usec, -EINVAL); + assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM); + assert_return(!event_origin_changed(s->event), -ECHILD); + + *usec = s->time.accuracy; + return 0; +} + +_public_ int sd_event_source_set_time_accuracy(sd_event_source *s, uint64_t usec) { + int r; + + assert_return(s, -EINVAL); + assert_return(usec != UINT64_MAX, -EINVAL); + assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM); + assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE); + assert_return(!event_origin_changed(s->event), -ECHILD); + + r = source_set_pending(s, false); + if (r < 0) + return r; + + if (usec == 0) + usec = DEFAULT_ACCURACY_USEC; + + s->time.accuracy = usec; + + event_source_time_prioq_reshuffle(s); + return 0; +} + +_public_ int sd_event_source_get_time_clock(sd_event_source *s, clockid_t *clock) { + assert_return(s, -EINVAL); + assert_return(clock, -EINVAL); + assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM); + assert_return(!event_origin_changed(s->event), -ECHILD); + + *clock = event_source_type_to_clock(s->type); + return 0; +} + +_public_ int sd_event_source_get_child_pid(sd_event_source *s, pid_t *pid) { + assert_return(s, -EINVAL); + assert_return(pid, -EINVAL); + assert_return(s->type == SOURCE_CHILD, -EDOM); + assert_return(!event_origin_changed(s->event), -ECHILD); + + *pid = s->child.pid; + return 0; +} + +_public_ int sd_event_source_get_child_pidfd(sd_event_source *s) { + assert_return(s, -EINVAL); + assert_return(s->type == SOURCE_CHILD, -EDOM); + assert_return(!event_origin_changed(s->event), -ECHILD); + + if (s->child.pidfd < 0) + return -EOPNOTSUPP; + + return s->child.pidfd; +} + +_public_ int sd_event_source_send_child_signal(sd_event_source *s, int sig, const siginfo_t *si, unsigned flags) { + assert_return(s, -EINVAL); + assert_return(s->type == SOURCE_CHILD, -EDOM); + assert_return(!event_origin_changed(s->event), -ECHILD); + assert_return(SIGNAL_VALID(sig), -EINVAL); + + /* If we already have seen indication the process exited refuse sending a signal early. This way we + * can be sure we don't accidentally kill the wrong process on PID reuse when pidfds are not + * available. */ + if (s->child.exited) + return -ESRCH; + + if (s->child.pidfd >= 0) { + siginfo_t copy; + + /* pidfd_send_signal() changes the siginfo_t argument. This is weird, let's hence copy the + * structure here */ + if (si) + copy = *si; + + if (pidfd_send_signal(s->child.pidfd, sig, si ? © : NULL, 0) < 0) { + /* Let's propagate the error only if the system call is not implemented or prohibited */ + if (!ERRNO_IS_NOT_SUPPORTED(errno) && !ERRNO_IS_PRIVILEGE(errno)) + return -errno; + } else + return 0; + } + + /* Flags are only supported for pidfd_send_signal(), not for rt_sigqueueinfo(), hence let's refuse + * this here. */ + if (flags != 0) + return -EOPNOTSUPP; + + if (si) { + /* We use rt_sigqueueinfo() only if siginfo_t is specified. */ + siginfo_t copy = *si; + + if (rt_sigqueueinfo(s->child.pid, sig, ©) < 0) + return -errno; + } else if (kill(s->child.pid, sig) < 0) + return -errno; + + return 0; +} + +_public_ int sd_event_source_get_child_pidfd_own(sd_event_source *s) { + assert_return(s, -EINVAL); + assert_return(s->type == SOURCE_CHILD, -EDOM); + assert_return(!event_origin_changed(s->event), -ECHILD); + + if (s->child.pidfd < 0) + return -EOPNOTSUPP; + + return s->child.pidfd_owned; +} + +_public_ int sd_event_source_set_child_pidfd_own(sd_event_source *s, int own) { + assert_return(s, -EINVAL); + assert_return(s->type == SOURCE_CHILD, -EDOM); + assert_return(!event_origin_changed(s->event), -ECHILD); + + if (s->child.pidfd < 0) + return -EOPNOTSUPP; + + s->child.pidfd_owned = own; + return 0; +} + +_public_ int sd_event_source_get_child_process_own(sd_event_source *s) { + assert_return(s, -EINVAL); + assert_return(s->type == SOURCE_CHILD, -EDOM); + assert_return(!event_origin_changed(s->event), -ECHILD); + + return s->child.process_owned; +} + +_public_ int sd_event_source_set_child_process_own(sd_event_source *s, int own) { + assert_return(s, -EINVAL); + assert_return(s->type == SOURCE_CHILD, -EDOM); + assert_return(!event_origin_changed(s->event), -ECHILD); + + s->child.process_owned = own; + return 0; +} + +_public_ int sd_event_source_get_inotify_mask(sd_event_source *s, uint32_t *mask) { + assert_return(s, -EINVAL); + assert_return(mask, -EINVAL); + assert_return(s->type == SOURCE_INOTIFY, -EDOM); + assert_return(!event_origin_changed(s->event), -ECHILD); + + *mask = s->inotify.mask; + return 0; +} + +_public_ int sd_event_source_set_prepare(sd_event_source *s, sd_event_handler_t callback) { + int r; + + assert_return(s, -EINVAL); + assert_return(s->type != SOURCE_EXIT, -EDOM); + assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE); + assert_return(!event_origin_changed(s->event), -ECHILD); + + if (s->prepare == callback) + return 0; + + if (callback && s->prepare) { + s->prepare = callback; + return 0; + } + + r = prioq_ensure_allocated(&s->event->prepare, prepare_prioq_compare); + if (r < 0) + return r; + + s->prepare = callback; + + if (callback) { + r = prioq_put(s->event->prepare, s, &s->prepare_index); + if (r < 0) + return r; + } else + prioq_remove(s->event->prepare, s, &s->prepare_index); + + return 0; +} + +_public_ void* sd_event_source_get_userdata(sd_event_source *s) { + assert_return(s, NULL); + assert_return(!event_origin_changed(s->event), NULL); + + return s->userdata; +} + +_public_ void *sd_event_source_set_userdata(sd_event_source *s, void *userdata) { + void *ret; + + assert_return(s, NULL); + assert_return(!event_origin_changed(s->event), NULL); + + ret = s->userdata; + s->userdata = userdata; + + return ret; +} + +static int event_source_enter_ratelimited(sd_event_source *s) { + int r; + + assert(s); + + /* When an event source becomes ratelimited, we place it in the CLOCK_MONOTONIC priority queue, with + * the end of the rate limit time window, much as if it was a timer event source. */ + + if (s->ratelimited) + return 0; /* Already ratelimited, this is a NOP hence */ + + /* Make sure we can install a CLOCK_MONOTONIC event further down. */ + r = setup_clock_data(s->event, &s->event->monotonic, CLOCK_MONOTONIC); + if (r < 0) + return r; + + /* Timer event sources are already using the earliest/latest queues for the timer scheduling. Let's + * first remove them from the prioq appropriate for their own clock, so that we can use the prioq + * fields of the event source then for adding it to the CLOCK_MONOTONIC prioq instead. */ + if (EVENT_SOURCE_IS_TIME(s->type)) + event_source_time_prioq_remove(s, event_get_clock_data(s->event, s->type)); + + /* Now, let's add the event source to the monotonic clock instead */ + r = event_source_time_prioq_put(s, &s->event->monotonic); + if (r < 0) + goto fail; + + /* And let's take the event source officially offline */ + r = event_source_offline(s, s->enabled, /* ratelimited= */ true); + if (r < 0) { + event_source_time_prioq_remove(s, &s->event->monotonic); + goto fail; + } + + event_source_pp_prioq_reshuffle(s); + + log_debug("Event source %p (%s) entered rate limit state.", s, strna(s->description)); + return 0; + +fail: + /* Reinstall time event sources in the priority queue as before. This shouldn't fail, since the queue + * space for it should already be allocated. */ + if (EVENT_SOURCE_IS_TIME(s->type)) + assert_se(event_source_time_prioq_put(s, event_get_clock_data(s->event, s->type)) >= 0); + + return r; +} + +static int event_source_leave_ratelimit(sd_event_source *s, bool run_callback) { + int r; + + assert(s); + + if (!s->ratelimited) + return 0; + + /* Let's take the event source out of the monotonic prioq first. */ + event_source_time_prioq_remove(s, &s->event->monotonic); + + /* Let's then add the event source to its native clock prioq again — if this is a timer event source */ + if (EVENT_SOURCE_IS_TIME(s->type)) { + r = event_source_time_prioq_put(s, event_get_clock_data(s->event, s->type)); + if (r < 0) + goto fail; + } + + /* Let's try to take it online again. */ + r = event_source_online(s, s->enabled, /* ratelimited= */ false); + if (r < 0) { + /* Do something roughly sensible when this failed: undo the two prioq ops above */ + if (EVENT_SOURCE_IS_TIME(s->type)) + event_source_time_prioq_remove(s, event_get_clock_data(s->event, s->type)); + + goto fail; + } + + event_source_pp_prioq_reshuffle(s); + ratelimit_reset(&s->rate_limit); + + log_debug("Event source %p (%s) left rate limit state.", s, strna(s->description)); + + if (run_callback && s->ratelimit_expire_callback) { + s->dispatching = true; + r = s->ratelimit_expire_callback(s, s->userdata); + s->dispatching = false; + + if (r < 0) { + log_debug_errno(r, "Ratelimit expiry callback of event source %s (type %s) returned error, %s: %m", + strna(s->description), + event_source_type_to_string(s->type), + s->exit_on_failure ? "exiting" : "disabling"); + + if (s->exit_on_failure) + (void) sd_event_exit(s->event, r); + } + + if (s->n_ref == 0) + source_free(s); + else if (r < 0) + assert_se(sd_event_source_set_enabled(s, SD_EVENT_OFF) >= 0); + + return 1; + } + + return 0; + +fail: + /* Do something somewhat reasonable when we cannot move an event sources out of ratelimited mode: + * simply put it back in it, maybe we can then process it more successfully next iteration. */ + assert_se(event_source_time_prioq_put(s, &s->event->monotonic) >= 0); + + return r; +} + +static usec_t sleep_between(sd_event *e, usec_t a, usec_t b) { + usec_t c; + assert(e); + assert(a <= b); + + if (a <= 0) + return 0; + if (a >= USEC_INFINITY) + return USEC_INFINITY; + + if (b <= a + 1) + return a; + + initialize_perturb(e); + + /* + Find a good time to wake up again between times a and b. We + have two goals here: + + a) We want to wake up as seldom as possible, hence prefer + later times over earlier times. + + b) But if we have to wake up, then let's make sure to + dispatch as much as possible on the entire system. + + We implement this by waking up everywhere at the same time + within any given minute if we can, synchronised via the + perturbation value determined from the boot ID. If we can't, + then we try to find the same spot in every 10s, then 1s and + then 250ms step. Otherwise, we pick the last possible time + to wake up. + */ + + c = (b / USEC_PER_MINUTE) * USEC_PER_MINUTE + e->perturb; + if (c >= b) { + if (_unlikely_(c < USEC_PER_MINUTE)) + return b; + + c -= USEC_PER_MINUTE; + } + + if (c >= a) + return c; + + c = (b / (USEC_PER_SEC*10)) * (USEC_PER_SEC*10) + (e->perturb % (USEC_PER_SEC*10)); + if (c >= b) { + if (_unlikely_(c < USEC_PER_SEC*10)) + return b; + + c -= USEC_PER_SEC*10; + } + + if (c >= a) + return c; + + c = (b / USEC_PER_SEC) * USEC_PER_SEC + (e->perturb % USEC_PER_SEC); + if (c >= b) { + if (_unlikely_(c < USEC_PER_SEC)) + return b; + + c -= USEC_PER_SEC; + } + + if (c >= a) + return c; + + c = (b / (USEC_PER_MSEC*250)) * (USEC_PER_MSEC*250) + (e->perturb % (USEC_PER_MSEC*250)); + if (c >= b) { + if (_unlikely_(c < USEC_PER_MSEC*250)) + return b; + + c -= USEC_PER_MSEC*250; + } + + if (c >= a) + return c; + + return b; +} + +static int event_arm_timer( + sd_event *e, + struct clock_data *d) { + + struct itimerspec its = {}; + sd_event_source *a, *b; + usec_t t; + + assert(e); + assert(d); + + if (!d->needs_rearm) + return 0; + + d->needs_rearm = false; + + a = prioq_peek(d->earliest); + assert(!a || EVENT_SOURCE_USES_TIME_PRIOQ(a->type)); + if (!a || a->enabled == SD_EVENT_OFF || time_event_source_next(a) == USEC_INFINITY) { + + if (d->fd < 0) + return 0; + + if (d->next == USEC_INFINITY) + return 0; + + /* disarm */ + if (timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL) < 0) + return -errno; + + d->next = USEC_INFINITY; + return 0; + } + + b = prioq_peek(d->latest); + assert(!b || EVENT_SOURCE_USES_TIME_PRIOQ(b->type)); + assert(b && b->enabled != SD_EVENT_OFF); + + t = sleep_between(e, time_event_source_next(a), time_event_source_latest(b)); + if (d->next == t) + return 0; + + assert_se(d->fd >= 0); + + if (t == 0) { + /* We don't want to disarm here, just mean some time looooong ago. */ + its.it_value.tv_sec = 0; + its.it_value.tv_nsec = 1; + } else + timespec_store(&its.it_value, t); + + if (timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL) < 0) + return -errno; + + d->next = t; + return 0; +} + +static int process_io(sd_event *e, sd_event_source *s, uint32_t revents) { + assert(e); + assert(s); + assert(s->type == SOURCE_IO); + + /* If the event source was already pending, we just OR in the + * new revents, otherwise we reset the value. The ORing is + * necessary to handle EPOLLONESHOT events properly where + * readability might happen independently of writability, and + * we need to keep track of both */ + + if (s->pending) + s->io.revents |= revents; + else + s->io.revents = revents; + + return source_set_pending(s, true); +} + +static int flush_timer(sd_event *e, int fd, uint32_t events, usec_t *next) { + uint64_t x; + ssize_t ss; + + assert(e); + assert(fd >= 0); + + assert_return(events == EPOLLIN, -EIO); + + ss = read(fd, &x, sizeof(x)); + if (ss < 0) { + if (ERRNO_IS_TRANSIENT(errno)) + return 0; + + return -errno; + } + + if (_unlikely_(ss != sizeof(x))) + return -EIO; + + if (next) + *next = USEC_INFINITY; + + return 0; +} + +static int process_timer( + sd_event *e, + usec_t n, + struct clock_data *d) { + + sd_event_source *s; + bool callback_invoked = false; + int r; + + assert(e); + assert(d); + + for (;;) { + s = prioq_peek(d->earliest); + assert(!s || EVENT_SOURCE_USES_TIME_PRIOQ(s->type)); + + if (!s || time_event_source_next(s) > n) + break; + + if (s->ratelimited) { + /* This is an event sources whose ratelimit window has ended. Let's turn it on + * again. */ + assert(s->ratelimited); + + r = event_source_leave_ratelimit(s, /* run_callback */ true); + if (r < 0) + return r; + else if (r == 1) + callback_invoked = true; + + continue; + } + + if (s->enabled == SD_EVENT_OFF || s->pending) + break; + + r = source_set_pending(s, true); + if (r < 0) + return r; + + event_source_time_prioq_reshuffle(s); + } + + return callback_invoked; +} + +static int process_child(sd_event *e, int64_t threshold, int64_t *ret_min_priority) { + int64_t min_priority = threshold; + bool something_new = false; + sd_event_source *s; + int r; + + assert(e); + assert(ret_min_priority); + + if (!e->need_process_child) { + *ret_min_priority = min_priority; + return 0; + } + + e->need_process_child = false; + + /* So, this is ugly. We iteratively invoke waitid() with P_PID + WNOHANG for each PID we wait + * for, instead of using P_ALL. This is because we only want to get child information of very + * specific child processes, and not all of them. We might not have processed the SIGCHLD event + * of a previous invocation and we don't want to maintain a unbounded *per-child* event queue, + * hence we really don't want anything flushed out of the kernel's queue that we don't care + * about. Since this is O(n) this means that if you have a lot of processes you probably want + * to handle SIGCHLD yourself. + * + * We do not reap the children here (by using WNOWAIT), this is only done after the event + * source is dispatched so that the callback still sees the process as a zombie. */ + + HASHMAP_FOREACH(s, e->child_sources) { + assert(s->type == SOURCE_CHILD); + + if (s->priority > threshold) + continue; + + if (s->pending) + continue; + + if (event_source_is_offline(s)) + continue; + + if (s->child.exited) + continue; + + if (EVENT_SOURCE_WATCH_PIDFD(s)) + /* There's a usable pidfd known for this event source? Then don't waitid() for + * it here */ + continue; + + zero(s->child.siginfo); + if (waitid(P_PID, s->child.pid, &s->child.siginfo, + WNOHANG | (s->child.options & WEXITED ? WNOWAIT : 0) | s->child.options) < 0) + return negative_errno(); + + if (s->child.siginfo.si_pid != 0) { + bool zombie = IN_SET(s->child.siginfo.si_code, CLD_EXITED, CLD_KILLED, CLD_DUMPED); + + if (zombie) + s->child.exited = true; + + if (!zombie && (s->child.options & WEXITED)) { + /* If the child isn't dead then let's immediately remove the state + * change from the queue, since there's no benefit in leaving it + * queued. */ + + assert(s->child.options & (WSTOPPED|WCONTINUED)); + (void) waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|(s->child.options & (WSTOPPED|WCONTINUED))); + } + + r = source_set_pending(s, true); + if (r < 0) + return r; + if (r > 0) { + something_new = true; + min_priority = MIN(min_priority, s->priority); + } + } + } + + *ret_min_priority = min_priority; + return something_new; +} + +static int process_pidfd(sd_event *e, sd_event_source *s, uint32_t revents) { + assert(e); + assert(s); + assert(s->type == SOURCE_CHILD); + + if (s->pending) + return 0; + + if (event_source_is_offline(s)) + return 0; + + if (!EVENT_SOURCE_WATCH_PIDFD(s)) + return 0; + + zero(s->child.siginfo); + if (waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG | WNOWAIT | s->child.options) < 0) + return -errno; + + if (s->child.siginfo.si_pid == 0) + return 0; + + if (IN_SET(s->child.siginfo.si_code, CLD_EXITED, CLD_KILLED, CLD_DUMPED)) + s->child.exited = true; + + return source_set_pending(s, true); +} + +static int process_signal(sd_event *e, struct signal_data *d, uint32_t events, int64_t *min_priority) { + int r; + + assert(e); + assert(d); + assert_return(events == EPOLLIN, -EIO); + assert(min_priority); + + /* If there's a signal queued on this priority and SIGCHLD is on this priority too, then make + * sure to recheck the children we watch. This is because we only ever dequeue the first signal + * per priority, and if we dequeue one, and SIGCHLD might be enqueued later we wouldn't know, + * but we might have higher priority children we care about hence we need to check that + * explicitly. */ + + if (sigismember(&d->sigset, SIGCHLD)) + e->need_process_child = true; + + /* If there's already an event source pending for this priority we don't read another */ + if (d->current) + return 0; + + for (;;) { + struct signalfd_siginfo si; + ssize_t n; + sd_event_source *s = NULL; + + n = read(d->fd, &si, sizeof(si)); + if (n < 0) { + if (ERRNO_IS_TRANSIENT(errno)) + return 0; + + return -errno; + } + + if (_unlikely_(n != sizeof(si))) + return -EIO; + + assert(SIGNAL_VALID(si.ssi_signo)); + + if (e->signal_sources) + s = e->signal_sources[si.ssi_signo]; + if (!s) + continue; + if (s->pending) + continue; + + s->signal.siginfo = si; + d->current = s; + + r = source_set_pending(s, true); + if (r < 0) + return r; + if (r > 0 && *min_priority >= s->priority) { + *min_priority = s->priority; + return 1; /* an event source with smaller priority is queued. */ + } + + return 0; + } +} + +static int event_inotify_data_read(sd_event *e, struct inotify_data *d, uint32_t revents, int64_t threshold) { + ssize_t n; + + assert(e); + assert(d); + + assert_return(revents == EPOLLIN, -EIO); + + /* If there's already an event source pending for this priority, don't read another */ + if (d->n_pending > 0) + return 0; + + /* Is the read buffer non-empty? If so, let's not read more */ + if (d->buffer_filled > 0) + return 0; + + if (d->priority > threshold) + return 0; + + n = read(d->fd, &d->buffer, sizeof(d->buffer)); + if (n < 0) { + if (ERRNO_IS_TRANSIENT(errno)) + return 0; + + return -errno; + } + + assert(n > 0); + d->buffer_filled = (size_t) n; + LIST_PREPEND(buffered, e->buffered_inotify_data_list, d); + + return 1; +} + +static void event_inotify_data_drop(sd_event *e, struct inotify_data *d, size_t sz) { + assert(e); + assert(d); + assert(sz <= d->buffer_filled); + + if (sz == 0) + return; + + /* Move the rest to the buffer to the front, in order to get things properly aligned again */ + memmove(d->buffer.raw, d->buffer.raw + sz, d->buffer_filled - sz); + d->buffer_filled -= sz; + + if (d->buffer_filled == 0) + LIST_REMOVE(buffered, e->buffered_inotify_data_list, d); +} + +static int event_inotify_data_process(sd_event *e, struct inotify_data *d) { + int r; + + assert(e); + assert(d); + + /* If there's already an event source pending for this priority, don't read another */ + if (d->n_pending > 0) + return 0; + + while (d->buffer_filled > 0) { + size_t sz; + + /* Let's validate that the event structures are complete */ + if (d->buffer_filled < offsetof(struct inotify_event, name)) + return -EIO; + + sz = offsetof(struct inotify_event, name) + d->buffer.ev.len; + if (d->buffer_filled < sz) + return -EIO; + + if (d->buffer.ev.mask & IN_Q_OVERFLOW) { + struct inode_data *inode_data; + + /* The queue overran, let's pass this event to all event sources connected to this inotify + * object */ + + HASHMAP_FOREACH(inode_data, d->inodes) + LIST_FOREACH(inotify.by_inode_data, s, inode_data->event_sources) { + + if (event_source_is_offline(s)) + continue; + + r = source_set_pending(s, true); + if (r < 0) + return r; + } + } else { + struct inode_data *inode_data; + + /* Find the inode object for this watch descriptor. If IN_IGNORED is set we also remove it from + * our watch descriptor table. */ + if (d->buffer.ev.mask & IN_IGNORED) { + + inode_data = hashmap_remove(d->wd, INT_TO_PTR(d->buffer.ev.wd)); + if (!inode_data) { + event_inotify_data_drop(e, d, sz); + continue; + } + + /* The watch descriptor was removed by the kernel, let's drop it here too */ + inode_data->wd = -1; + } else { + inode_data = hashmap_get(d->wd, INT_TO_PTR(d->buffer.ev.wd)); + if (!inode_data) { + event_inotify_data_drop(e, d, sz); + continue; + } + } + + /* Trigger all event sources that are interested in these events. Also trigger all event + * sources if IN_IGNORED or IN_UNMOUNT is set. */ + LIST_FOREACH(inotify.by_inode_data, s, inode_data->event_sources) { + + if (event_source_is_offline(s)) + continue; + + if ((d->buffer.ev.mask & (IN_IGNORED|IN_UNMOUNT)) == 0 && + (s->inotify.mask & d->buffer.ev.mask & IN_ALL_EVENTS) == 0) + continue; + + r = source_set_pending(s, true); + if (r < 0) + return r; + } + } + + /* Something pending now? If so, let's finish, otherwise let's read more. */ + if (d->n_pending > 0) + return 1; + } + + return 0; +} + +static int process_inotify(sd_event *e) { + int r, done = 0; + + assert(e); + + LIST_FOREACH(buffered, d, e->buffered_inotify_data_list) { + r = event_inotify_data_process(e, d); + if (r < 0) + return r; + if (r > 0) + done ++; + } + + return done; +} + +static int process_memory_pressure(sd_event_source *s, uint32_t revents) { + assert(s); + assert(s->type == SOURCE_MEMORY_PRESSURE); + + if (s->pending) + s->memory_pressure.revents |= revents; + else + s->memory_pressure.revents = revents; + + return source_set_pending(s, true); +} + +static int source_memory_pressure_write(sd_event_source *s) { + ssize_t n; + int r; + + assert(s); + assert(s->type == SOURCE_MEMORY_PRESSURE); + + /* once we start writing, the buffer is locked, we allow no further changes. */ + s->memory_pressure.locked = true; + + if (s->memory_pressure.write_buffer_size > 0) { + n = write(s->memory_pressure.fd, s->memory_pressure.write_buffer, s->memory_pressure.write_buffer_size); + if (n < 0) { + if (!ERRNO_IS_TRANSIENT(errno)) { + /* If kernel is built with CONFIG_PSI_DEFAULT_DISABLED it will expose PSI + * files, but then generates EOPNOSUPP on read() and write() (instead of on + * open()!). This sucks hard, since we can only detect this kind of failure + * so late. Let's make the best of it, and turn off the event source like we + * do for failed event source handlers. */ + + log_debug_errno(errno, "Writing memory pressure settings to kernel failed, disabling memory pressure event source: %m"); + assert_se(sd_event_source_set_enabled(s, SD_EVENT_OFF) >= 0); + return 0; + } + + n = 0; + } + } else + n = 0; + + assert(n >= 0); + + if ((size_t) n == s->memory_pressure.write_buffer_size) { + s->memory_pressure.write_buffer = mfree(s->memory_pressure.write_buffer); + + if (n > 0) { + s->memory_pressure.write_buffer_size = 0; + + /* Update epoll events mask, since we have now written everything and don't care for EPOLLOUT anymore */ + r = source_memory_pressure_register(s, s->enabled); + if (r < 0) + return r; + } + } else if (n > 0) { + _cleanup_free_ void *c = NULL; + + assert((size_t) n < s->memory_pressure.write_buffer_size); + + c = memdup((uint8_t*) s->memory_pressure.write_buffer + n, s->memory_pressure.write_buffer_size - n); + if (!c) + return -ENOMEM; + + free_and_replace(s->memory_pressure.write_buffer, c); + s->memory_pressure.write_buffer_size -= n; + return 1; + } + + return 0; +} + +static int source_memory_pressure_initiate_dispatch(sd_event_source *s) { + int r; + + assert(s); + assert(s->type == SOURCE_MEMORY_PRESSURE); + + r = source_memory_pressure_write(s); + if (r < 0) + return r; + if (r > 0) + return 1; /* if we wrote something, then don't continue with dispatching user dispatch + * function. Instead, shortcut it so that we wait for next EPOLLOUT immediately. */ + + /* No pending incoming IO? Then let's not continue further */ + if ((s->memory_pressure.revents & (EPOLLIN|EPOLLPRI)) == 0) { + + /* Treat IO errors on the notifier the same ways errors returned from a callback */ + if ((s->memory_pressure.revents & (EPOLLHUP|EPOLLERR|EPOLLRDHUP)) != 0) + return -EIO; + + return 1; /* leave dispatch, we already processed everything */ + } + + if (s->memory_pressure.revents & EPOLLIN) { + uint8_t pipe_buf[PIPE_BUF]; + ssize_t n; + + /* If the fd is readable, then flush out anything that might be queued */ + + n = read(s->memory_pressure.fd, pipe_buf, sizeof(pipe_buf)); + if (n < 0 && !ERRNO_IS_TRANSIENT(errno)) + return -errno; + } + + return 0; /* go on, dispatch to user callback */ +} + +static int source_dispatch(sd_event_source *s) { + EventSourceType saved_type; + sd_event *saved_event; + int r = 0; + + assert(s); + assert(s->pending || s->type == SOURCE_EXIT); + + /* Save the event source type, here, so that we still know it after the event callback which might + * invalidate the event. */ + saved_type = s->type; + + /* Similarly, store a reference to the event loop object, so that we can still access it after the + * callback might have invalidated/disconnected the event source. */ + saved_event = s->event; + PROTECT_EVENT(saved_event); + + /* Check if we hit the ratelimit for this event source, and if so, let's disable it. */ + assert(!s->ratelimited); + if (!ratelimit_below(&s->rate_limit)) { + r = event_source_enter_ratelimited(s); + if (r < 0) + return r; + + return 1; + } + + if (!IN_SET(s->type, SOURCE_DEFER, SOURCE_EXIT)) { + r = source_set_pending(s, false); + if (r < 0) + return r; + } + + if (s->type != SOURCE_POST) { + sd_event_source *z; + + /* If we execute a non-post source, let's mark all post sources as pending. */ + + SET_FOREACH(z, s->event->post_sources) { + if (event_source_is_offline(z)) + continue; + + r = source_set_pending(z, true); + if (r < 0) + return r; + } + } + + if (s->type == SOURCE_MEMORY_PRESSURE) { + r = source_memory_pressure_initiate_dispatch(s); + if (r == -EIO) /* handle EIO errors similar to callback errors */ + goto finish; + if (r < 0) + return r; + if (r > 0) /* already handled */ + return 1; + } + + if (s->enabled == SD_EVENT_ONESHOT) { + r = sd_event_source_set_enabled(s, SD_EVENT_OFF); + if (r < 0) + return r; + } + + s->dispatching = true; + + switch (s->type) { + + case SOURCE_IO: + r = s->io.callback(s, s->io.fd, s->io.revents, s->userdata); + break; + + case SOURCE_TIME_REALTIME: + case SOURCE_TIME_BOOTTIME: + case SOURCE_TIME_MONOTONIC: + case SOURCE_TIME_REALTIME_ALARM: + case SOURCE_TIME_BOOTTIME_ALARM: + r = s->time.callback(s, s->time.next, s->userdata); + break; + + case SOURCE_SIGNAL: + r = s->signal.callback(s, &s->signal.siginfo, s->userdata); + break; + + case SOURCE_CHILD: { + bool zombie; + + zombie = IN_SET(s->child.siginfo.si_code, CLD_EXITED, CLD_KILLED, CLD_DUMPED); + + r = s->child.callback(s, &s->child.siginfo, s->userdata); + + /* Now, reap the PID for good. */ + if (zombie) { + (void) waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|WEXITED); + s->child.waited = true; + } + + break; + } + + case SOURCE_DEFER: + r = s->defer.callback(s, s->userdata); + break; + + case SOURCE_POST: + r = s->post.callback(s, s->userdata); + break; + + case SOURCE_EXIT: + r = s->exit.callback(s, s->userdata); + break; + + case SOURCE_INOTIFY: { + struct sd_event *e = s->event; + struct inotify_data *d; + size_t sz; + + assert(s->inotify.inode_data); + assert_se(d = s->inotify.inode_data->inotify_data); + + assert(d->buffer_filled >= offsetof(struct inotify_event, name)); + sz = offsetof(struct inotify_event, name) + d->buffer.ev.len; + assert(d->buffer_filled >= sz); + + /* If the inotify callback destroys the event source then this likely means we don't need to + * watch the inode anymore, and thus also won't need the inotify object anymore. But if we'd + * free it immediately, then we couldn't drop the event from the inotify event queue without + * memory corruption anymore, as below. Hence, let's not free it immediately, but mark it + * "busy" with a counter (which will ensure it's not GC'ed away prematurely). Let's then + * explicitly GC it after we are done dropping the inotify event from the buffer. */ + d->n_busy++; + r = s->inotify.callback(s, &d->buffer.ev, s->userdata); + d->n_busy--; + + /* When no event is pending anymore on this inotify object, then let's drop the event from + * the inotify event queue buffer. */ + if (d->n_pending == 0) + event_inotify_data_drop(e, d, sz); + + /* Now we don't want to access 'd' anymore, it's OK to GC now. */ + event_gc_inotify_data(e, d); + break; + } + + case SOURCE_MEMORY_PRESSURE: + r = s->memory_pressure.callback(s, s->userdata); + break; + + case SOURCE_WATCHDOG: + case _SOURCE_EVENT_SOURCE_TYPE_MAX: + case _SOURCE_EVENT_SOURCE_TYPE_INVALID: + assert_not_reached(); + } + + s->dispatching = false; + +finish: + if (r < 0) { + log_debug_errno(r, "Event source %s (type %s) returned error, %s: %m", + strna(s->description), + event_source_type_to_string(saved_type), + s->exit_on_failure ? "exiting" : "disabling"); + + if (s->exit_on_failure) + (void) sd_event_exit(saved_event, r); + } + + if (s->n_ref == 0) + source_free(s); + else if (r < 0) + assert_se(sd_event_source_set_enabled(s, SD_EVENT_OFF) >= 0); + + return 1; +} + +static int event_prepare(sd_event *e) { + int r; + + assert(e); + + for (;;) { + sd_event_source *s; + + s = prioq_peek(e->prepare); + if (!s || s->prepare_iteration == e->iteration || event_source_is_offline(s)) + break; + + s->prepare_iteration = e->iteration; + prioq_reshuffle(e->prepare, s, &s->prepare_index); + + assert(s->prepare); + s->dispatching = true; + r = s->prepare(s, s->userdata); + s->dispatching = false; + + if (r < 0) { + log_debug_errno(r, "Prepare callback of event source %s (type %s) returned error, %s: %m", + strna(s->description), + event_source_type_to_string(s->type), + s->exit_on_failure ? "exiting" : "disabling"); + + if (s->exit_on_failure) + (void) sd_event_exit(e, r); + } + + if (s->n_ref == 0) + source_free(s); + else if (r < 0) + assert_se(sd_event_source_set_enabled(s, SD_EVENT_OFF) >= 0); + } + + return 0; +} + +static int dispatch_exit(sd_event *e) { + sd_event_source *p; + int r; + + assert(e); + + p = prioq_peek(e->exit); + assert(!p || p->type == SOURCE_EXIT); + + if (!p || event_source_is_offline(p)) { + e->state = SD_EVENT_FINISHED; + return 0; + } + + PROTECT_EVENT(e); + e->iteration++; + e->state = SD_EVENT_EXITING; + r = source_dispatch(p); + e->state = SD_EVENT_INITIAL; + return r; +} + +static sd_event_source* event_next_pending(sd_event *e) { + sd_event_source *p; + + assert(e); + + p = prioq_peek(e->pending); + if (!p) + return NULL; + + if (event_source_is_offline(p)) + return NULL; + + return p; +} + +static int arm_watchdog(sd_event *e) { + struct itimerspec its = {}; + usec_t t; + + assert(e); + assert(e->watchdog_fd >= 0); + + t = sleep_between(e, + usec_add(e->watchdog_last, (e->watchdog_period / 2)), + usec_add(e->watchdog_last, (e->watchdog_period * 3 / 4))); + + timespec_store(&its.it_value, t); + + /* Make sure we never set the watchdog to 0, which tells the + * kernel to disable it. */ + if (its.it_value.tv_sec == 0 && its.it_value.tv_nsec == 0) + its.it_value.tv_nsec = 1; + + return RET_NERRNO(timerfd_settime(e->watchdog_fd, TFD_TIMER_ABSTIME, &its, NULL)); +} + +static int process_watchdog(sd_event *e) { + assert(e); + + if (!e->watchdog) + return 0; + + /* Don't notify watchdog too often */ + if (e->watchdog_last + e->watchdog_period / 4 > e->timestamp.monotonic) + return 0; + + sd_notify(false, "WATCHDOG=1"); + e->watchdog_last = e->timestamp.monotonic; + + return arm_watchdog(e); +} + +static void event_close_inode_data_fds(sd_event *e) { + struct inode_data *d; + + assert(e); + + /* Close the fds pointing to the inodes to watch now. We need to close them as they might otherwise pin + * filesystems. But we can't close them right-away as we need them as long as the user still wants to make + * adjustments to the event source, such as changing the priority (which requires us to remove and re-add a watch + * for the inode). Hence, let's close them when entering the first iteration after they were added, as a + * compromise. */ + + while ((d = e->inode_data_to_close_list)) { + assert(d->fd >= 0); + d->fd = safe_close(d->fd); + + LIST_REMOVE(to_close, e->inode_data_to_close_list, d); + } +} + +static int event_memory_pressure_write_list(sd_event *e) { + int r; + + assert(e); + + for (;;) { + sd_event_source *s; + + s = LIST_POP(memory_pressure.write_list, e->memory_pressure_write_list); + if (!s) + break; + + assert(s->type == SOURCE_MEMORY_PRESSURE); + assert(s->memory_pressure.write_buffer_size > 0); + s->memory_pressure.in_write_list = false; + + r = source_memory_pressure_write(s); + if (r < 0) + return r; + } + + return 0; +} + +_public_ int sd_event_prepare(sd_event *e) { + int r; + + assert_return(e, -EINVAL); + assert_return(e = event_resolve(e), -ENOPKG); + assert_return(!event_origin_changed(e), -ECHILD); + assert_return(e->state != SD_EVENT_FINISHED, -ESTALE); + assert_return(e->state == SD_EVENT_INITIAL, -EBUSY); + + /* Let's check that if we are a default event loop we are executed in the correct thread. We only do + * this check here once, since gettid() is typically not cached, and thus want to minimize + * syscalls */ + assert_return(!e->default_event_ptr || e->tid == gettid(), -EREMOTEIO); + + /* Make sure that none of the preparation callbacks ends up freeing the event source under our feet */ + PROTECT_EVENT(e); + + if (e->exit_requested) + goto pending; + + e->iteration++; + + e->state = SD_EVENT_PREPARING; + r = event_prepare(e); + e->state = SD_EVENT_INITIAL; + if (r < 0) + return r; + + r = event_memory_pressure_write_list(e); + if (r < 0) + return r; + + r = event_arm_timer(e, &e->realtime); + if (r < 0) + return r; + + r = event_arm_timer(e, &e->boottime); + if (r < 0) + return r; + + r = event_arm_timer(e, &e->monotonic); + if (r < 0) + return r; + + r = event_arm_timer(e, &e->realtime_alarm); + if (r < 0) + return r; + + r = event_arm_timer(e, &e->boottime_alarm); + if (r < 0) + return r; + + event_close_inode_data_fds(e); + + if (event_next_pending(e) || e->need_process_child || e->buffered_inotify_data_list) + goto pending; + + e->state = SD_EVENT_ARMED; + + return 0; + +pending: + e->state = SD_EVENT_ARMED; + r = sd_event_wait(e, 0); + if (r == 0) + e->state = SD_EVENT_ARMED; + + return r; +} + +static int epoll_wait_usec( + int fd, + struct epoll_event *events, + int maxevents, + usec_t timeout) { + + int msec; + /* A wrapper that uses epoll_pwait2() if available, and falls back to epoll_wait() if not. */ + +#if HAVE_EPOLL_PWAIT2 + static bool epoll_pwait2_absent = false; + int r; + + /* epoll_pwait2() was added to Linux 5.11 (2021-02-14) and to glibc in 2.35 (2022-02-03). In contrast + * to other syscalls we don't bother with our own fallback syscall wrappers on old libcs, since this + * is not that obvious to implement given the libc and kernel definitions differ in the last + * argument. Moreover, the only reason to use it is the more accurate time-outs (which is not a + * biggie), let's hence rely on glibc's definitions, and fallback to epoll_pwait() when that's + * missing. */ + + if (!epoll_pwait2_absent && timeout != USEC_INFINITY) { + r = epoll_pwait2(fd, + events, + maxevents, + TIMESPEC_STORE(timeout), + NULL); + if (r >= 0) + return r; + if (!ERRNO_IS_NOT_SUPPORTED(errno) && !ERRNO_IS_PRIVILEGE(errno)) + return -errno; /* Only fallback to old epoll_wait() if the syscall is masked or not + * supported. */ + + epoll_pwait2_absent = true; + } +#endif + + if (timeout == USEC_INFINITY) + msec = -1; + else { + usec_t k; + + k = DIV_ROUND_UP(timeout, USEC_PER_MSEC); + if (k >= INT_MAX) + msec = INT_MAX; /* Saturate */ + else + msec = (int) k; + } + + return RET_NERRNO(epoll_wait(fd, events, maxevents, msec)); +} + +static int process_epoll(sd_event *e, usec_t timeout, int64_t threshold, int64_t *ret_min_priority) { + size_t n_event_queue, m, n_event_max; + int64_t min_priority = threshold; + bool something_new = false; + int r; + + assert(e); + assert(ret_min_priority); + + n_event_queue = MAX(e->n_sources, 1u); + if (!GREEDY_REALLOC(e->event_queue, n_event_queue)) + return -ENOMEM; + + n_event_max = MALLOC_ELEMENTSOF(e->event_queue); + + /* If we still have inotify data buffered, then query the other fds, but don't wait on it */ + if (e->buffered_inotify_data_list) + timeout = 0; + + for (;;) { + r = epoll_wait_usec( + e->epoll_fd, + e->event_queue, + n_event_max, + timeout); + if (r < 0) + return r; + + m = (size_t) r; + + if (m < n_event_max) + break; + + if (n_event_max >= n_event_queue * 10) + break; + + if (!GREEDY_REALLOC(e->event_queue, n_event_max + n_event_queue)) + return -ENOMEM; + + n_event_max = MALLOC_ELEMENTSOF(e->event_queue); + timeout = 0; + } + + /* Set timestamp only when this is called first time. */ + if (threshold == INT64_MAX) + triple_timestamp_now(&e->timestamp); + + for (size_t i = 0; i < m; i++) { + + if (e->event_queue[i].data.ptr == INT_TO_PTR(SOURCE_WATCHDOG)) + r = flush_timer(e, e->watchdog_fd, e->event_queue[i].events, NULL); + else { + WakeupType *t = e->event_queue[i].data.ptr; + + switch (*t) { + + case WAKEUP_EVENT_SOURCE: { + sd_event_source *s = e->event_queue[i].data.ptr; + + assert(s); + + if (s->priority > threshold) + continue; + + min_priority = MIN(min_priority, s->priority); + + switch (s->type) { + + case SOURCE_IO: + r = process_io(e, s, e->event_queue[i].events); + break; + + case SOURCE_CHILD: + r = process_pidfd(e, s, e->event_queue[i].events); + break; + + case SOURCE_MEMORY_PRESSURE: + r = process_memory_pressure(s, e->event_queue[i].events); + break; + + default: + assert_not_reached(); + } + + break; + } + + case WAKEUP_CLOCK_DATA: { + struct clock_data *d = e->event_queue[i].data.ptr; + + assert(d); + + r = flush_timer(e, d->fd, e->event_queue[i].events, &d->next); + break; + } + + case WAKEUP_SIGNAL_DATA: + r = process_signal(e, e->event_queue[i].data.ptr, e->event_queue[i].events, &min_priority); + break; + + case WAKEUP_INOTIFY_DATA: + r = event_inotify_data_read(e, e->event_queue[i].data.ptr, e->event_queue[i].events, threshold); + break; + + default: + assert_not_reached(); + } + } + if (r < 0) + return r; + if (r > 0) + something_new = true; + } + + *ret_min_priority = min_priority; + return something_new; +} + +_public_ int sd_event_wait(sd_event *e, uint64_t timeout) { + int r; + + assert_return(e, -EINVAL); + assert_return(e = event_resolve(e), -ENOPKG); + assert_return(!event_origin_changed(e), -ECHILD); + assert_return(e->state != SD_EVENT_FINISHED, -ESTALE); + assert_return(e->state == SD_EVENT_ARMED, -EBUSY); + + if (e->exit_requested) { + e->state = SD_EVENT_PENDING; + return 1; + } + + for (int64_t threshold = INT64_MAX; ; threshold--) { + int64_t epoll_min_priority, child_min_priority; + + /* There may be a possibility that new epoll (especially IO) and child events are + * triggered just after process_epoll() call but before process_child(), and the new IO + * events may have higher priority than the child events. To salvage these events, + * let's call epoll_wait() again, but accepts only events with higher priority than the + * previous. See issue https://github.com/systemd/systemd/issues/18190 and comments + * https://github.com/systemd/systemd/pull/18750#issuecomment-785801085 + * https://github.com/systemd/systemd/pull/18922#issuecomment-792825226 */ + + r = process_epoll(e, timeout, threshold, &epoll_min_priority); + if (r == -EINTR) { + e->state = SD_EVENT_PENDING; + return 1; + } + if (r < 0) + goto finish; + if (r == 0 && threshold < INT64_MAX) + /* No new epoll event. */ + break; + + r = process_child(e, threshold, &child_min_priority); + if (r < 0) + goto finish; + if (r == 0) + /* No new child event. */ + break; + + threshold = MIN(epoll_min_priority, child_min_priority); + if (threshold == INT64_MIN) + break; + + timeout = 0; + } + + r = process_watchdog(e); + if (r < 0) + goto finish; + + r = process_inotify(e); + if (r < 0) + goto finish; + + r = process_timer(e, e->timestamp.realtime, &e->realtime); + if (r < 0) + goto finish; + + r = process_timer(e, e->timestamp.boottime, &e->boottime); + if (r < 0) + goto finish; + + r = process_timer(e, e->timestamp.realtime, &e->realtime_alarm); + if (r < 0) + goto finish; + + r = process_timer(e, e->timestamp.boottime, &e->boottime_alarm); + if (r < 0) + goto finish; + + r = process_timer(e, e->timestamp.monotonic, &e->monotonic); + if (r < 0) + goto finish; + else if (r == 1) { + /* Ratelimit expiry callback was called. Let's postpone processing pending sources and + * put loop in the initial state in order to evaluate (in the next iteration) also sources + * there were potentially re-enabled by the callback. + * + * Wondering why we treat only this invocation of process_timer() differently? Once event + * source is ratelimited we essentially transform it into CLOCK_MONOTONIC timer hence + * ratelimit expiry callback is never called for any other timer type. */ + r = 0; + goto finish; + } + + if (event_next_pending(e)) { + e->state = SD_EVENT_PENDING; + return 1; + } + + r = 0; + +finish: + e->state = SD_EVENT_INITIAL; + + return r; +} + +_public_ int sd_event_dispatch(sd_event *e) { + sd_event_source *p; + int r; + + assert_return(e, -EINVAL); + assert_return(e = event_resolve(e), -ENOPKG); + assert_return(!event_origin_changed(e), -ECHILD); + assert_return(e->state != SD_EVENT_FINISHED, -ESTALE); + assert_return(e->state == SD_EVENT_PENDING, -EBUSY); + + if (e->exit_requested) + return dispatch_exit(e); + + p = event_next_pending(e); + if (p) { + PROTECT_EVENT(e); + + e->state = SD_EVENT_RUNNING; + r = source_dispatch(p); + e->state = SD_EVENT_INITIAL; + return r; + } + + e->state = SD_EVENT_INITIAL; + + return 1; +} + +static void event_log_delays(sd_event *e) { + char b[ELEMENTSOF(e->delays) * DECIMAL_STR_MAX(unsigned) + 1], *p; + size_t l, i; + + p = b; + l = sizeof(b); + for (i = 0; i < ELEMENTSOF(e->delays); i++) { + l = strpcpyf(&p, l, "%u ", e->delays[i]); + e->delays[i] = 0; + } + log_debug("Event loop iterations: %s", b); +} + +_public_ int sd_event_run(sd_event *e, uint64_t timeout) { + int r; + + assert_return(e, -EINVAL); + assert_return(e = event_resolve(e), -ENOPKG); + assert_return(!event_origin_changed(e), -ECHILD); + assert_return(e->state != SD_EVENT_FINISHED, -ESTALE); + assert_return(e->state == SD_EVENT_INITIAL, -EBUSY); + + if (e->profile_delays && e->last_run_usec != 0) { + usec_t this_run; + unsigned l; + + this_run = now(CLOCK_MONOTONIC); + + l = log2u64(this_run - e->last_run_usec); + assert(l < ELEMENTSOF(e->delays)); + e->delays[l]++; + + if (this_run - e->last_log_usec >= 5*USEC_PER_SEC) { + event_log_delays(e); + e->last_log_usec = this_run; + } + } + + /* Make sure that none of the preparation callbacks ends up freeing the event source under our feet */ + PROTECT_EVENT(e); + + r = sd_event_prepare(e); + if (r == 0) + /* There was nothing? Then wait... */ + r = sd_event_wait(e, timeout); + + if (e->profile_delays) + e->last_run_usec = now(CLOCK_MONOTONIC); + + if (r > 0) { + /* There's something now, then let's dispatch it */ + r = sd_event_dispatch(e); + if (r < 0) + return r; + + return 1; + } + + return r; +} + +_public_ int sd_event_loop(sd_event *e) { + int r; + + assert_return(e, -EINVAL); + assert_return(e = event_resolve(e), -ENOPKG); + assert_return(!event_origin_changed(e), -ECHILD); + assert_return(e->state == SD_EVENT_INITIAL, -EBUSY); + + + PROTECT_EVENT(e); + + while (e->state != SD_EVENT_FINISHED) { + r = sd_event_run(e, UINT64_MAX); + if (r < 0) + return r; + } + + return e->exit_code; +} + +_public_ int sd_event_get_fd(sd_event *e) { + assert_return(e, -EINVAL); + assert_return(e = event_resolve(e), -ENOPKG); + assert_return(!event_origin_changed(e), -ECHILD); + + return e->epoll_fd; +} + +_public_ int sd_event_get_state(sd_event *e) { + assert_return(e, -EINVAL); + assert_return(e = event_resolve(e), -ENOPKG); + assert_return(!event_origin_changed(e), -ECHILD); + + return e->state; +} + +_public_ int sd_event_get_exit_code(sd_event *e, int *code) { + assert_return(e, -EINVAL); + assert_return(e = event_resolve(e), -ENOPKG); + assert_return(code, -EINVAL); + assert_return(!event_origin_changed(e), -ECHILD); + + if (!e->exit_requested) + return -ENODATA; + + *code = e->exit_code; + return 0; +} + +_public_ int sd_event_exit(sd_event *e, int code) { + assert_return(e, -EINVAL); + assert_return(e = event_resolve(e), -ENOPKG); + assert_return(e->state != SD_EVENT_FINISHED, -ESTALE); + assert_return(!event_origin_changed(e), -ECHILD); + + e->exit_requested = true; + e->exit_code = code; + + return 0; +} + +_public_ int sd_event_now(sd_event *e, clockid_t clock, uint64_t *usec) { + assert_return(e, -EINVAL); + assert_return(e = event_resolve(e), -ENOPKG); + assert_return(usec, -EINVAL); + assert_return(!event_origin_changed(e), -ECHILD); + + if (!TRIPLE_TIMESTAMP_HAS_CLOCK(clock)) + return -EOPNOTSUPP; + + if (!triple_timestamp_is_set(&e->timestamp)) { + /* Implicitly fall back to now() if we never ran before and thus have no cached time. */ + *usec = now(clock); + return 1; + } + + *usec = triple_timestamp_by_clock(&e->timestamp, clock); + return 0; +} + +_public_ int sd_event_default(sd_event **ret) { + sd_event *e = NULL; + int r; + + if (!ret) + return !!default_event; + + if (default_event) { + *ret = sd_event_ref(default_event); + return 0; + } + + r = sd_event_new(&e); + if (r < 0) + return r; + + e->default_event_ptr = &default_event; + e->tid = gettid(); + default_event = e; + + *ret = e; + return 1; +} + +_public_ int sd_event_get_tid(sd_event *e, pid_t *tid) { + assert_return(e, -EINVAL); + assert_return(e = event_resolve(e), -ENOPKG); + assert_return(tid, -EINVAL); + assert_return(!event_origin_changed(e), -ECHILD); + + if (e->tid != 0) { + *tid = e->tid; + return 0; + } + + return -ENXIO; +} + +_public_ int sd_event_set_watchdog(sd_event *e, int b) { + int r; + + assert_return(e, -EINVAL); + assert_return(e = event_resolve(e), -ENOPKG); + assert_return(!event_origin_changed(e), -ECHILD); + + if (e->watchdog == !!b) + return e->watchdog; + + if (b) { + r = sd_watchdog_enabled(false, &e->watchdog_period); + if (r <= 0) + return r; + + /* Issue first ping immediately */ + sd_notify(false, "WATCHDOG=1"); + e->watchdog_last = now(CLOCK_MONOTONIC); + + e->watchdog_fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK|TFD_CLOEXEC); + if (e->watchdog_fd < 0) + return -errno; + + r = arm_watchdog(e); + if (r < 0) + goto fail; + + struct epoll_event ev = { + .events = EPOLLIN, + .data.ptr = INT_TO_PTR(SOURCE_WATCHDOG), + }; + + if (epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->watchdog_fd, &ev) < 0) { + r = -errno; + goto fail; + } + + } else { + if (e->watchdog_fd >= 0) { + (void) epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, e->watchdog_fd, NULL); + e->watchdog_fd = safe_close(e->watchdog_fd); + } + } + + e->watchdog = !!b; + return e->watchdog; + +fail: + e->watchdog_fd = safe_close(e->watchdog_fd); + return r; +} + +_public_ int sd_event_get_watchdog(sd_event *e) { + assert_return(e, -EINVAL); + assert_return(e = event_resolve(e), -ENOPKG); + assert_return(!event_origin_changed(e), -ECHILD); + + return e->watchdog; +} + +_public_ int sd_event_get_iteration(sd_event *e, uint64_t *ret) { + assert_return(e, -EINVAL); + assert_return(e = event_resolve(e), -ENOPKG); + assert_return(!event_origin_changed(e), -ECHILD); + + *ret = e->iteration; + return 0; +} + +_public_ int sd_event_source_set_destroy_callback(sd_event_source *s, sd_event_destroy_t callback) { + assert_return(s, -EINVAL); + assert_return(s->event, -EINVAL); + assert_return(!event_origin_changed(s->event), -ECHILD); + + s->destroy_callback = callback; + return 0; +} + +_public_ int sd_event_source_get_destroy_callback(sd_event_source *s, sd_event_destroy_t *ret) { + assert_return(s, -EINVAL); + assert_return(!event_origin_changed(s->event), -ECHILD); + + if (ret) + *ret = s->destroy_callback; + + return !!s->destroy_callback; +} + +_public_ int sd_event_source_get_floating(sd_event_source *s) { + assert_return(s, -EINVAL); + assert_return(!event_origin_changed(s->event), -ECHILD); + + return s->floating; +} + +_public_ int sd_event_source_set_floating(sd_event_source *s, int b) { + assert_return(s, -EINVAL); + assert_return(!event_origin_changed(s->event), -ECHILD); + + if (s->floating == !!b) + return 0; + + if (!s->event) /* Already disconnected */ + return -ESTALE; + + s->floating = b; + + if (b) { + sd_event_source_ref(s); + sd_event_unref(s->event); + } else { + sd_event_ref(s->event); + sd_event_source_unref(s); + } + + return 1; +} + +_public_ int sd_event_source_get_exit_on_failure(sd_event_source *s) { + assert_return(s, -EINVAL); + assert_return(s->type != SOURCE_EXIT, -EDOM); + assert_return(!event_origin_changed(s->event), -ECHILD); + + return s->exit_on_failure; +} + +_public_ int sd_event_source_set_exit_on_failure(sd_event_source *s, int b) { + assert_return(s, -EINVAL); + assert_return(s->type != SOURCE_EXIT, -EDOM); + assert_return(!event_origin_changed(s->event), -ECHILD); + + if (s->exit_on_failure == !!b) + return 0; + + s->exit_on_failure = b; + return 1; +} + +_public_ int sd_event_source_set_ratelimit(sd_event_source *s, uint64_t interval, unsigned burst) { + int r; + + assert_return(s, -EINVAL); + assert_return(!event_origin_changed(s->event), -ECHILD); + + /* Turning on ratelimiting on event source types that don't support it, is a loggable offense. Doing + * so is a programming error. */ + assert_return(EVENT_SOURCE_CAN_RATE_LIMIT(s->type), -EDOM); + + /* When ratelimiting is configured we'll always reset the rate limit state first and start fresh, + * non-ratelimited. */ + r = event_source_leave_ratelimit(s, /* run_callback */ false); + if (r < 0) + return r; + + s->rate_limit = (RateLimit) { interval, burst }; + return 0; +} + +_public_ int sd_event_source_set_ratelimit_expire_callback(sd_event_source *s, sd_event_handler_t callback) { + assert_return(s, -EINVAL); + assert_return(!event_origin_changed(s->event), -ECHILD); + + s->ratelimit_expire_callback = callback; + return 0; +} + +_public_ int sd_event_source_get_ratelimit(sd_event_source *s, uint64_t *ret_interval, unsigned *ret_burst) { + assert_return(s, -EINVAL); + assert_return(!event_origin_changed(s->event), -ECHILD); + + /* Querying whether an event source has ratelimiting configured is not a loggable offense, hence + * don't use assert_return(). Unlike turning on ratelimiting it's not really a programming error. */ + if (!EVENT_SOURCE_CAN_RATE_LIMIT(s->type)) + return -EDOM; + + if (!ratelimit_configured(&s->rate_limit)) + return -ENOEXEC; + + if (ret_interval) + *ret_interval = s->rate_limit.interval; + if (ret_burst) + *ret_burst = s->rate_limit.burst; + + return 0; +} + +_public_ int sd_event_source_is_ratelimited(sd_event_source *s) { + assert_return(s, -EINVAL); + assert_return(!event_origin_changed(s->event), -ECHILD); + + if (!EVENT_SOURCE_CAN_RATE_LIMIT(s->type)) + return false; + + if (!ratelimit_configured(&s->rate_limit)) + return false; + + return s->ratelimited; +} + +_public_ int sd_event_source_leave_ratelimit(sd_event_source *s) { + int r; + + assert_return(s, -EINVAL); + + if (!EVENT_SOURCE_CAN_RATE_LIMIT(s->type)) + return 0; + + if (!ratelimit_configured(&s->rate_limit)) + return 0; + + if (!s->ratelimited) + return 0; + + r = event_source_leave_ratelimit(s, /* run_callback */ false); + if (r < 0) + return r; + + return 1; /* tell caller that we indeed just left the ratelimit state */ +} + +_public_ int sd_event_set_signal_exit(sd_event *e, int b) { + bool change = false; + int r; + + assert_return(e, -EINVAL); + + if (b) { + /* We want to maintain pointers to these event sources, so that we can destroy them when told + * so. But we also don't want them to pin the event loop itself. Hence we mark them as + * floating after creation (and undo this before deleting them again). */ + + if (!e->sigint_event_source) { + r = sd_event_add_signal(e, &e->sigint_event_source, SIGINT | SD_EVENT_SIGNAL_PROCMASK, NULL, NULL); + if (r < 0) + return r; + + assert(sd_event_source_set_floating(e->sigint_event_source, true) >= 0); + change = true; + } + + if (!e->sigterm_event_source) { + r = sd_event_add_signal(e, &e->sigterm_event_source, SIGTERM | SD_EVENT_SIGNAL_PROCMASK, NULL, NULL); + if (r < 0) { + if (change) { + assert(sd_event_source_set_floating(e->sigint_event_source, false) >= 0); + e->sigint_event_source = sd_event_source_unref(e->sigint_event_source); + } + + return r; + } + + assert(sd_event_source_set_floating(e->sigterm_event_source, true) >= 0); + change = true; + } + + } else { + if (e->sigint_event_source) { + assert(sd_event_source_set_floating(e->sigint_event_source, false) >= 0); + e->sigint_event_source = sd_event_source_unref(e->sigint_event_source); + change = true; + } + + if (e->sigterm_event_source) { + assert(sd_event_source_set_floating(e->sigterm_event_source, false) >= 0); + e->sigterm_event_source = sd_event_source_unref(e->sigterm_event_source); + change = true; + } + } + + return change; +} + +_public_ int sd_event_source_set_memory_pressure_type(sd_event_source *s, const char *ty) { + _cleanup_free_ char *b = NULL; + _cleanup_free_ void *w = NULL; + + assert_return(s, -EINVAL); + assert_return(s->type == SOURCE_MEMORY_PRESSURE, -EDOM); + assert_return(ty, -EINVAL); + assert_return(!event_origin_changed(s->event), -ECHILD); + + if (!STR_IN_SET(ty, "some", "full")) + return -EINVAL; + + if (s->memory_pressure.locked) /* Refuse adjusting parameters, if caller told us how to watch for events */ + return -EBUSY; + + char* space = memchr(s->memory_pressure.write_buffer, ' ', s->memory_pressure.write_buffer_size); + if (!space) + return -EINVAL; + + size_t l = (char*) space - (char*) s->memory_pressure.write_buffer; + b = memdup_suffix0(s->memory_pressure.write_buffer, l); + if (!b) + return -ENOMEM; + if (!STR_IN_SET(b, "some", "full")) + return -EINVAL; + + if (streq(b, ty)) + return 0; + + size_t nl = strlen(ty) + (s->memory_pressure.write_buffer_size - l); + w = new(char, nl); + if (!w) + return -ENOMEM; + + memcpy(stpcpy(w, ty), space, (s->memory_pressure.write_buffer_size - l)); + + free_and_replace(s->memory_pressure.write_buffer, w); + s->memory_pressure.write_buffer_size = nl; + s->memory_pressure.locked = false; + + return 1; +} + +_public_ int sd_event_source_set_memory_pressure_period(sd_event_source *s, uint64_t threshold_usec, uint64_t window_usec) { + _cleanup_free_ char *b = NULL; + _cleanup_free_ void *w = NULL; + + assert_return(s, -EINVAL); + assert_return(s->type == SOURCE_MEMORY_PRESSURE, -EDOM); + assert_return(!event_origin_changed(s->event), -ECHILD); + + if (threshold_usec <= 0 || threshold_usec >= UINT64_MAX) + return -ERANGE; + if (window_usec <= 0 || window_usec >= UINT64_MAX) + return -ERANGE; + if (threshold_usec > window_usec) + return -EINVAL; + + if (s->memory_pressure.locked) /* Refuse adjusting parameters, if caller told us how to watch for events */ + return -EBUSY; + + char* space = memchr(s->memory_pressure.write_buffer, ' ', s->memory_pressure.write_buffer_size); + if (!space) + return -EINVAL; + + size_t l = (char*) space - (char*) s->memory_pressure.write_buffer; + b = memdup_suffix0(s->memory_pressure.write_buffer, l); + if (!b) + return -ENOMEM; + if (!STR_IN_SET(b, "some", "full")) + return -EINVAL; + + if (asprintf((char**) &w, + "%s " USEC_FMT " " USEC_FMT "", + b, + threshold_usec, + window_usec) < 0) + return -EINVAL; + + l = strlen(w) + 1; + if (memcmp_nn(s->memory_pressure.write_buffer, s->memory_pressure.write_buffer_size, w, l) == 0) + return 0; + + free_and_replace(s->memory_pressure.write_buffer, w); + s->memory_pressure.write_buffer_size = l; + s->memory_pressure.locked = false; + + return 1; +} diff --git a/src/libsystemd/sd-event/test-event.c b/src/libsystemd/sd-event/test-event.c new file mode 100644 index 0000000..63d3ee7 --- /dev/null +++ b/src/libsystemd/sd-event/test-event.c @@ -0,0 +1,902 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <sys/wait.h> +#include <unistd.h> + +#include "sd-event.h" + +#include "alloc-util.h" +#include "exec-util.h" +#include "fd-util.h" +#include "fs-util.h" +#include "log.h" +#include "macro.h" +#include "missing_syscall.h" +#include "parse-util.h" +#include "path-util.h" +#include "process-util.h" +#include "random-util.h" +#include "rm-rf.h" +#include "signal-util.h" +#include "stdio-util.h" +#include "string-util.h" +#include "tests.h" +#include "tmpfile-util.h" + +static int prepare_handler(sd_event_source *s, void *userdata) { + log_info("preparing %c", PTR_TO_INT(userdata)); + return 1; +} + +static bool got_a, got_b, got_c, got_unref; +static unsigned got_d; + +static int unref_handler(sd_event_source *s, int fd, uint32_t revents, void *userdata) { + sd_event_source_unref(s); + got_unref = true; + return 0; +} + +static int io_handler(sd_event_source *s, int fd, uint32_t revents, void *userdata) { + + log_info("got IO on %c", PTR_TO_INT(userdata)); + + if (userdata == INT_TO_PTR('a')) { + assert_se(sd_event_source_set_enabled(s, SD_EVENT_OFF) >= 0); + assert_se(!got_a); + got_a = true; + } else if (userdata == INT_TO_PTR('b')) { + assert_se(!got_b); + got_b = true; + } else if (userdata == INT_TO_PTR('d')) { + got_d++; + if (got_d < 2) + assert_se(sd_event_source_set_enabled(s, SD_EVENT_ONESHOT) >= 0); + else + assert_se(sd_event_source_set_enabled(s, SD_EVENT_OFF) >= 0); + } else + assert_not_reached(); + + return 1; +} + +static int child_handler(sd_event_source *s, const siginfo_t *si, void *userdata) { + + assert_se(s); + assert_se(si); + + assert_se(si->si_uid == getuid()); + assert_se(si->si_signo == SIGCHLD); + assert_se(si->si_code == CLD_EXITED); + assert_se(si->si_status == 78); + + log_info("got child on %c", PTR_TO_INT(userdata)); + + assert_se(userdata == INT_TO_PTR('f')); + + assert_se(sd_event_exit(sd_event_source_get_event(s), 0) >= 0); + sd_event_source_unref(s); + + return 1; +} + +static int signal_handler(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) { + sd_event_source *p = NULL; + pid_t pid; + siginfo_t plain_si; + + assert_se(s); + assert_se(si); + + log_info("got signal on %c", PTR_TO_INT(userdata)); + + assert_se(userdata == INT_TO_PTR('e')); + + assert_se(sigprocmask_many(SIG_BLOCK, NULL, SIGCHLD, SIGUSR2, -1) >= 0); + + pid = fork(); + assert_se(pid >= 0); + + if (pid == 0) { + sigset_t ss; + + assert_se(sigemptyset(&ss) >= 0); + assert_se(sigaddset(&ss, SIGUSR2) >= 0); + + zero(plain_si); + assert_se(sigwaitinfo(&ss, &plain_si) >= 0); + + assert_se(plain_si.si_signo == SIGUSR2); + assert_se(plain_si.si_value.sival_int == 4711); + + _exit(78); + } + + assert_se(sd_event_add_child(sd_event_source_get_event(s), &p, pid, WEXITED, child_handler, INT_TO_PTR('f')) >= 0); + assert_se(sd_event_source_set_enabled(p, SD_EVENT_ONESHOT) >= 0); + assert_se(sd_event_source_set_child_process_own(p, true) >= 0); + + /* We can't use structured initialization here, since the structure contains various unions and these + * fields lie in overlapping (carefully aligned) unions that LLVM is allergic to allow assignments + * to */ + zero(plain_si); + plain_si.si_signo = SIGUSR2; + plain_si.si_code = SI_QUEUE; + plain_si.si_pid = getpid_cached(); + plain_si.si_uid = getuid(); + plain_si.si_value.sival_int = 4711; + + assert_se(sd_event_source_send_child_signal(p, SIGUSR2, &plain_si, 0) >= 0); + + sd_event_source_unref(s); + + return 1; +} + +static int defer_handler(sd_event_source *s, void *userdata) { + sd_event_source *p = NULL; + + assert_se(s); + + log_info("got defer on %c", PTR_TO_INT(userdata)); + + assert_se(userdata == INT_TO_PTR('d')); + + assert_se(sigprocmask_many(SIG_BLOCK, NULL, SIGUSR1, -1) >= 0); + + assert_se(sd_event_add_signal(sd_event_source_get_event(s), &p, SIGUSR1, signal_handler, INT_TO_PTR('e')) >= 0); + assert_se(sd_event_source_set_enabled(p, SD_EVENT_ONESHOT) >= 0); + raise(SIGUSR1); + + sd_event_source_unref(s); + + return 1; +} + +static bool do_quit; + +static int time_handler(sd_event_source *s, uint64_t usec, void *userdata) { + log_info("got timer on %c", PTR_TO_INT(userdata)); + + if (userdata == INT_TO_PTR('c')) { + + if (do_quit) { + sd_event_source *p; + + assert_se(sd_event_add_defer(sd_event_source_get_event(s), &p, defer_handler, INT_TO_PTR('d')) >= 0); + assert_se(sd_event_source_set_enabled(p, SD_EVENT_ONESHOT) >= 0); + } else { + assert_se(!got_c); + got_c = true; + } + } else + assert_not_reached(); + + return 2; +} + +static bool got_exit = false; + +static int exit_handler(sd_event_source *s, void *userdata) { + log_info("got quit handler on %c", PTR_TO_INT(userdata)); + + got_exit = true; + + return 3; +} + +static bool got_post = false; + +static int post_handler(sd_event_source *s, void *userdata) { + log_info("got post handler"); + + got_post = true; + + return 2; +} + +static void test_basic_one(bool with_pidfd) { + sd_event *e = NULL; + sd_event_source *w = NULL, *x = NULL, *y = NULL, *z = NULL, *q = NULL, *t = NULL; + static const char ch = 'x'; + int a[2] = EBADF_PAIR, b[2] = EBADF_PAIR, + d[2] = EBADF_PAIR, k[2] = EBADF_PAIR; + uint64_t event_now; + int64_t priority; + + log_info("/* %s(pidfd=%s) */", __func__, yes_no(with_pidfd)); + + assert_se(setenv("SYSTEMD_PIDFD", yes_no(with_pidfd), 1) >= 0); + + assert_se(pipe(a) >= 0); + assert_se(pipe(b) >= 0); + assert_se(pipe(d) >= 0); + assert_se(pipe(k) >= 0); + + assert_se(sd_event_default(&e) >= 0); + assert_se(sd_event_now(e, CLOCK_MONOTONIC, &event_now) > 0); + + assert_se(sd_event_set_watchdog(e, true) >= 0); + + /* Test whether we cleanly can destroy an io event source from its own handler */ + got_unref = false; + assert_se(sd_event_add_io(e, &t, k[0], EPOLLIN, unref_handler, NULL) >= 0); + assert_se(write(k[1], &ch, 1) == 1); + assert_se(sd_event_run(e, UINT64_MAX) >= 1); + assert_se(got_unref); + + got_a = false, got_b = false, got_c = false, got_d = 0; + + /* Add a oneshot handler, trigger it, reenable it, and trigger it again. */ + assert_se(sd_event_add_io(e, &w, d[0], EPOLLIN, io_handler, INT_TO_PTR('d')) >= 0); + assert_se(sd_event_source_set_enabled(w, SD_EVENT_ONESHOT) >= 0); + assert_se(write(d[1], &ch, 1) >= 0); + assert_se(sd_event_run(e, UINT64_MAX) >= 1); + assert_se(got_d == 1); + assert_se(write(d[1], &ch, 1) >= 0); + assert_se(sd_event_run(e, UINT64_MAX) >= 1); + assert_se(got_d == 2); + + assert_se(sd_event_add_io(e, &x, a[0], EPOLLIN, io_handler, INT_TO_PTR('a')) >= 0); + assert_se(sd_event_add_io(e, &y, b[0], EPOLLIN, io_handler, INT_TO_PTR('b')) >= 0); + + do_quit = false; + assert_se(sd_event_add_time(e, &z, CLOCK_MONOTONIC, 0, 0, time_handler, INT_TO_PTR('c')) >= 0); + assert_se(sd_event_add_exit(e, &q, exit_handler, INT_TO_PTR('g')) >= 0); + + assert_se(sd_event_source_set_priority(x, 99) >= 0); + assert_se(sd_event_source_get_priority(x, &priority) >= 0); + assert_se(priority == 99); + assert_se(sd_event_source_set_enabled(y, SD_EVENT_ONESHOT) >= 0); + assert_se(sd_event_source_set_prepare(x, prepare_handler) >= 0); + assert_se(sd_event_source_set_priority(z, 50) >= 0); + assert_se(sd_event_source_set_enabled(z, SD_EVENT_ONESHOT) >= 0); + assert_se(sd_event_source_set_prepare(z, prepare_handler) >= 0); + + /* Test for floating event sources */ + assert_se(sigprocmask_many(SIG_BLOCK, NULL, SIGRTMIN+1, -1) >= 0); + assert_se(sd_event_add_signal(e, NULL, SIGRTMIN+1, NULL, NULL) >= 0); + + assert_se(write(a[1], &ch, 1) >= 0); + assert_se(write(b[1], &ch, 1) >= 0); + + assert_se(!got_a && !got_b && !got_c); + + assert_se(sd_event_run(e, UINT64_MAX) >= 1); + + assert_se(!got_a && got_b && !got_c); + + assert_se(sd_event_run(e, UINT64_MAX) >= 1); + + assert_se(!got_a && got_b && got_c); + + assert_se(sd_event_run(e, UINT64_MAX) >= 1); + + assert_se(got_a && got_b && got_c); + + sd_event_source_unref(x); + sd_event_source_unref(y); + + do_quit = true; + assert_se(sd_event_add_post(e, NULL, post_handler, NULL) >= 0); + assert_se(sd_event_now(e, CLOCK_MONOTONIC, &event_now) == 0); + assert_se(sd_event_source_set_time(z, event_now + 200 * USEC_PER_MSEC) >= 0); + assert_se(sd_event_source_set_enabled(z, SD_EVENT_ONESHOT) >= 0); + + assert_se(sd_event_loop(e) >= 0); + assert_se(got_post); + assert_se(got_exit); + + sd_event_source_unref(z); + sd_event_source_unref(q); + + sd_event_source_unref(w); + + sd_event_unref(e); + + safe_close_pair(a); + safe_close_pair(b); + safe_close_pair(d); + safe_close_pair(k); + + assert_se(unsetenv("SYSTEMD_PIDFD") >= 0); +} + +TEST(basic) { + test_basic_one(true); /* test with pidfd */ + test_basic_one(false); /* test without pidfd */ +} + +TEST(sd_event_now) { + _cleanup_(sd_event_unrefp) sd_event *e = NULL; + uint64_t event_now; + + assert_se(sd_event_new(&e) >= 0); + assert_se(sd_event_now(e, CLOCK_MONOTONIC, &event_now) > 0); + assert_se(sd_event_now(e, CLOCK_REALTIME, &event_now) > 0); + assert_se(sd_event_now(e, CLOCK_REALTIME_ALARM, &event_now) > 0); + assert_se(sd_event_now(e, CLOCK_BOOTTIME, &event_now) > 0); + assert_se(sd_event_now(e, CLOCK_BOOTTIME_ALARM, &event_now) > 0); + assert_se(sd_event_now(e, -1, &event_now) == -EOPNOTSUPP); + assert_se(sd_event_now(e, 900 /* arbitrary big number */, &event_now) == -EOPNOTSUPP); + + assert_se(sd_event_run(e, 0) == 0); + + assert_se(sd_event_now(e, CLOCK_MONOTONIC, &event_now) == 0); + assert_se(sd_event_now(e, CLOCK_REALTIME, &event_now) == 0); + assert_se(sd_event_now(e, CLOCK_REALTIME_ALARM, &event_now) == 0); + assert_se(sd_event_now(e, CLOCK_BOOTTIME, &event_now) == 0); + assert_se(sd_event_now(e, CLOCK_BOOTTIME_ALARM, &event_now) == 0); + assert_se(sd_event_now(e, -1, &event_now) == -EOPNOTSUPP); + assert_se(sd_event_now(e, 900 /* arbitrary big number */, &event_now) == -EOPNOTSUPP); +} + +static int last_rtqueue_sigval = 0; +static int n_rtqueue = 0; + +static int rtqueue_handler(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) { + last_rtqueue_sigval = si->ssi_int; + n_rtqueue++; + return 0; +} + +TEST(rtqueue) { + sd_event_source *u = NULL, *v = NULL, *s = NULL; + sd_event *e = NULL; + + assert_se(sd_event_default(&e) >= 0); + + assert_se(sigprocmask_many(SIG_BLOCK, NULL, SIGRTMIN+2, SIGRTMIN+3, SIGUSR2, -1) >= 0); + assert_se(sd_event_add_signal(e, &u, SIGRTMIN+2, rtqueue_handler, NULL) >= 0); + assert_se(sd_event_add_signal(e, &v, SIGRTMIN+3, rtqueue_handler, NULL) >= 0); + assert_se(sd_event_add_signal(e, &s, SIGUSR2, rtqueue_handler, NULL) >= 0); + + assert_se(sd_event_source_set_priority(v, -10) >= 0); + + assert_se(sigqueue(getpid_cached(), SIGRTMIN+2, (union sigval) { .sival_int = 1 }) >= 0); + assert_se(sigqueue(getpid_cached(), SIGRTMIN+3, (union sigval) { .sival_int = 2 }) >= 0); + assert_se(sigqueue(getpid_cached(), SIGUSR2, (union sigval) { .sival_int = 3 }) >= 0); + assert_se(sigqueue(getpid_cached(), SIGRTMIN+3, (union sigval) { .sival_int = 4 }) >= 0); + assert_se(sigqueue(getpid_cached(), SIGUSR2, (union sigval) { .sival_int = 5 }) >= 0); + + assert_se(n_rtqueue == 0); + assert_se(last_rtqueue_sigval == 0); + + assert_se(sd_event_run(e, UINT64_MAX) >= 1); + assert_se(n_rtqueue == 1); + assert_se(last_rtqueue_sigval == 2); /* first SIGRTMIN+3 */ + + assert_se(sd_event_run(e, UINT64_MAX) >= 1); + assert_se(n_rtqueue == 2); + assert_se(last_rtqueue_sigval == 4); /* second SIGRTMIN+3 */ + + assert_se(sd_event_run(e, UINT64_MAX) >= 1); + assert_se(n_rtqueue == 3); + assert_se(last_rtqueue_sigval == 3); /* first SIGUSR2 */ + + assert_se(sd_event_run(e, UINT64_MAX) >= 1); + assert_se(n_rtqueue == 4); + assert_se(last_rtqueue_sigval == 1); /* SIGRTMIN+2 */ + + assert_se(sd_event_run(e, 0) == 0); /* the other SIGUSR2 is dropped, because the first one was still queued */ + assert_se(n_rtqueue == 4); + assert_se(last_rtqueue_sigval == 1); + + sd_event_source_unref(u); + sd_event_source_unref(v); + sd_event_source_unref(s); + + sd_event_unref(e); +} + +#define CREATE_EVENTS_MAX (70000U) + +struct inotify_context { + bool delete_self_handler_called; + unsigned create_called[CREATE_EVENTS_MAX]; + unsigned create_overflow; + unsigned n_create_events; +}; + +static void maybe_exit(sd_event_source *s, struct inotify_context *c) { + unsigned n; + + assert_se(s); + assert_se(c); + + if (!c->delete_self_handler_called) + return; + + for (n = 0; n < 3; n++) { + unsigned i; + + if (c->create_overflow & (1U << n)) + continue; + + for (i = 0; i < c->n_create_events; i++) + if (!(c->create_called[i] & (1U << n))) + return; + } + + sd_event_exit(sd_event_source_get_event(s), 0); +} + +static int inotify_handler(sd_event_source *s, const struct inotify_event *ev, void *userdata) { + struct inotify_context *c = userdata; + const char *description; + unsigned bit, n; + + assert_se(sd_event_source_get_description(s, &description) >= 0); + assert_se(safe_atou(description, &n) >= 0); + + assert_se(n <= 3); + bit = 1U << n; + + if (ev->mask & IN_Q_OVERFLOW) { + log_info("inotify-handler <%s>: overflow", description); + c->create_overflow |= bit; + } else if (ev->mask & IN_CREATE) { + if (streq(ev->name, "sub")) + log_debug("inotify-handler <%s>: create on %s", description, ev->name); + else { + unsigned i; + + assert_se(safe_atou(ev->name, &i) >= 0); + assert_se(i < c->n_create_events); + c->create_called[i] |= bit; + } + } else if (ev->mask & IN_DELETE) { + log_info("inotify-handler <%s>: delete of %s", description, ev->name); + assert_se(streq(ev->name, "sub")); + } else + assert_not_reached(); + + maybe_exit(s, c); + return 1; +} + +static int delete_self_handler(sd_event_source *s, const struct inotify_event *ev, void *userdata) { + struct inotify_context *c = userdata; + + if (ev->mask & IN_Q_OVERFLOW) { + log_info("delete-self-handler: overflow"); + c->delete_self_handler_called = true; + } else if (ev->mask & IN_DELETE_SELF) { + log_info("delete-self-handler: delete-self"); + c->delete_self_handler_called = true; + } else if (ev->mask & IN_IGNORED) { + log_info("delete-self-handler: ignore"); + } else + assert_not_reached(); + + maybe_exit(s, c); + return 1; +} + +static void test_inotify_one(unsigned n_create_events) { + _cleanup_(rm_rf_physical_and_freep) char *p = NULL; + sd_event_source *a = NULL, *b = NULL, *c = NULL, *d = NULL; + struct inotify_context context = { + .n_create_events = n_create_events, + }; + sd_event *e = NULL; + const char *q; + unsigned i; + + log_info("/* %s(%u) */", __func__, n_create_events); + + assert_se(sd_event_default(&e) >= 0); + + assert_se(mkdtemp_malloc("/tmp/test-inotify-XXXXXX", &p) >= 0); + + assert_se(sd_event_add_inotify(e, &a, p, IN_CREATE|IN_ONLYDIR, inotify_handler, &context) >= 0); + assert_se(sd_event_add_inotify(e, &b, p, IN_CREATE|IN_DELETE|IN_DONT_FOLLOW, inotify_handler, &context) >= 0); + assert_se(sd_event_source_set_priority(b, SD_EVENT_PRIORITY_IDLE) >= 0); + assert_se(sd_event_source_set_priority(b, SD_EVENT_PRIORITY_NORMAL) >= 0); + assert_se(sd_event_add_inotify(e, &c, p, IN_CREATE|IN_DELETE|IN_EXCL_UNLINK, inotify_handler, &context) >= 0); + assert_se(sd_event_source_set_priority(c, SD_EVENT_PRIORITY_IDLE) >= 0); + + assert_se(sd_event_source_set_description(a, "0") >= 0); + assert_se(sd_event_source_set_description(b, "1") >= 0); + assert_se(sd_event_source_set_description(c, "2") >= 0); + + q = strjoina(p, "/sub"); + assert_se(touch(q) >= 0); + assert_se(sd_event_add_inotify(e, &d, q, IN_DELETE_SELF, delete_self_handler, &context) >= 0); + + for (i = 0; i < n_create_events; i++) { + char buf[DECIMAL_STR_MAX(unsigned)+1]; + _cleanup_free_ char *z = NULL; + + xsprintf(buf, "%u", i); + assert_se(z = path_join(p, buf)); + + assert_se(touch(z) >= 0); + } + + assert_se(unlink(q) >= 0); + + assert_se(sd_event_loop(e) >= 0); + + sd_event_source_unref(a); + sd_event_source_unref(b); + sd_event_source_unref(c); + sd_event_source_unref(d); + + sd_event_unref(e); +} + +TEST(inotify) { + test_inotify_one(100); /* should work without overflow */ + test_inotify_one(33000); /* should trigger a q overflow */ +} + +static int pidfd_handler(sd_event_source *s, const siginfo_t *si, void *userdata) { + assert_se(s); + assert_se(si); + + assert_se(si->si_uid == getuid()); + assert_se(si->si_signo == SIGCHLD); + assert_se(si->si_code == CLD_EXITED); + assert_se(si->si_status == 66); + + log_info("got pidfd on %c", PTR_TO_INT(userdata)); + + assert_se(userdata == INT_TO_PTR('p')); + + assert_se(sd_event_exit(sd_event_source_get_event(s), 0) >= 0); + sd_event_source_unref(s); + + return 0; +} + +TEST(pidfd) { + sd_event_source *s = NULL, *t = NULL; + sd_event *e = NULL; + int pidfd; + pid_t pid, pid2; + + assert_se(sigprocmask_many(SIG_BLOCK, NULL, SIGCHLD, -1) >= 0); + + pid = fork(); + if (pid == 0) + /* child */ + _exit(66); + + assert_se(pid > 1); + + pidfd = pidfd_open(pid, 0); + if (pidfd < 0) { + /* No pidfd_open() supported or blocked? */ + assert_se(ERRNO_IS_NOT_SUPPORTED(errno) || ERRNO_IS_PRIVILEGE(errno)); + (void) wait_for_terminate(pid, NULL); + return; + } + + pid2 = fork(); + if (pid2 == 0) + freeze(); + + assert_se(pid > 2); + + assert_se(sd_event_default(&e) >= 0); + assert_se(sd_event_add_child_pidfd(e, &s, pidfd, WEXITED, pidfd_handler, INT_TO_PTR('p')) >= 0); + assert_se(sd_event_source_set_child_pidfd_own(s, true) >= 0); + + /* This one should never trigger, since our second child lives forever */ + assert_se(sd_event_add_child(e, &t, pid2, WEXITED, pidfd_handler, INT_TO_PTR('q')) >= 0); + assert_se(sd_event_source_set_child_process_own(t, true) >= 0); + + assert_se(sd_event_loop(e) >= 0); + + /* Child should still be alive */ + assert_se(kill(pid2, 0) >= 0); + + t = sd_event_source_unref(t); + + /* Child should now be dead, since we dropped the ref */ + assert_se(kill(pid2, 0) < 0 && errno == ESRCH); + + sd_event_unref(e); +} + +static int ratelimit_io_handler(sd_event_source *s, int fd, uint32_t revents, void *userdata) { + unsigned *c = (unsigned*) userdata; + *c += 1; + return 0; +} + +static int ratelimit_time_handler(sd_event_source *s, uint64_t usec, void *userdata) { + int r; + + r = sd_event_source_set_enabled(s, SD_EVENT_ON); + if (r < 0) + log_warning_errno(r, "Failed to turn on notify event source: %m"); + + r = sd_event_source_set_time(s, usec + 1000); + if (r < 0) + log_error_errno(r, "Failed to restart watchdog event source: %m"); + + unsigned *c = (unsigned*) userdata; + *c += 1; + + return 0; +} + +static int expired = -1; +static int ratelimit_expired(sd_event_source *s, void *userdata) { + return ++expired; +} + +TEST(ratelimit) { + _cleanup_close_pair_ int p[2] = EBADF_PAIR; + _cleanup_(sd_event_unrefp) sd_event *e = NULL; + _cleanup_(sd_event_source_unrefp) sd_event_source *s = NULL; + uint64_t interval; + unsigned count, burst; + + assert_se(sd_event_default(&e) >= 0); + assert_se(pipe2(p, O_CLOEXEC|O_NONBLOCK) >= 0); + + assert_se(sd_event_add_io(e, &s, p[0], EPOLLIN, ratelimit_io_handler, &count) >= 0); + assert_se(sd_event_source_set_description(s, "test-ratelimit-io") >= 0); + assert_se(sd_event_source_set_ratelimit(s, 1 * USEC_PER_SEC, 5) >= 0); + assert_se(sd_event_source_get_ratelimit(s, &interval, &burst) >= 0); + assert_se(interval == 1 * USEC_PER_SEC && burst == 5); + + assert_se(write(p[1], "1", 1) == 1); + + count = 0; + for (unsigned i = 0; i < 10; i++) { + log_debug("slow loop iteration %u", i); + assert_se(sd_event_run(e, UINT64_MAX) >= 0); + assert_se(usleep_safe(250 * USEC_PER_MSEC) >= 0); + } + + assert_se(sd_event_source_is_ratelimited(s) == 0); + assert_se(count == 10); + log_info("ratelimit_io_handler: called %u times, event source not ratelimited", count); + + assert_se(sd_event_source_set_ratelimit(s, 0, 0) >= 0); + assert_se(sd_event_source_set_ratelimit(s, 1 * USEC_PER_SEC, 5) >= 0); + + count = 0; + for (unsigned i = 0; i < 10; i++) { + log_debug("fast event loop iteration %u", i); + assert_se(sd_event_run(e, UINT64_MAX) >= 0); + assert_se(usleep_safe(10) >= 0); + } + log_info("ratelimit_io_handler: called %u times, event source got ratelimited", count); + assert_se(count < 10); + + s = sd_event_source_unref(s); + safe_close_pair(p); + + count = 0; + assert_se(sd_event_add_time_relative(e, &s, CLOCK_MONOTONIC, 1000, 1, ratelimit_time_handler, &count) >= 0); + assert_se(sd_event_source_set_ratelimit(s, 1 * USEC_PER_SEC, 10) == 0); + + do { + assert_se(sd_event_run(e, UINT64_MAX) >= 0); + } while (!sd_event_source_is_ratelimited(s)); + + log_info("ratelimit_time_handler: called %u times, event source got ratelimited", count); + assert_se(count == 10); + + /* In order to get rid of active rate limit client needs to disable it explicitly */ + assert_se(sd_event_source_set_ratelimit(s, 0, 0) >= 0); + assert_se(!sd_event_source_is_ratelimited(s)); + + assert_se(sd_event_source_set_ratelimit(s, 1 * USEC_PER_SEC, 10) >= 0); + + /* Set callback that will be invoked when we leave rate limited state. */ + assert_se(sd_event_source_set_ratelimit_expire_callback(s, ratelimit_expired) >= 0); + + do { + assert_se(sd_event_run(e, UINT64_MAX) >= 0); + } while (!sd_event_source_is_ratelimited(s)); + + log_info("ratelimit_time_handler: called 10 more times, event source got ratelimited"); + assert_se(count == 20); + + /* Dispatch the event loop once more and check that ratelimit expiration callback got called */ + assert_se(sd_event_run(e, UINT64_MAX) >= 0); + assert_se(expired == 0); +} + +TEST(simple_timeout) { + _cleanup_(sd_event_unrefp) sd_event *e = NULL; + usec_t f, t, some_time; + + some_time = random_u64_range(2 * USEC_PER_SEC); + + assert_se(sd_event_default(&e) >= 0); + + assert_se(sd_event_prepare(e) == 0); + + f = now(CLOCK_MONOTONIC); + assert_se(sd_event_wait(e, some_time) >= 0); + t = now(CLOCK_MONOTONIC); + + /* The event loop may sleep longer than the specified time (timer accuracy, scheduling latencies, …), + * but never shorter. Let's check that. */ + assert_se(t >= usec_add(f, some_time)); +} + +static int inotify_self_destroy_handler(sd_event_source *s, const struct inotify_event *ev, void *userdata) { + sd_event_source **p = userdata; + + assert_se(ev); + assert_se(p); + assert_se(*p == s); + + assert_se(FLAGS_SET(ev->mask, IN_ATTRIB)); + + assert_se(sd_event_exit(sd_event_source_get_event(s), 0) >= 0); + + *p = sd_event_source_unref(*p); /* here's what we actually intend to test: we destroy the event + * source from inside the event source handler */ + return 1; +} + +TEST(inotify_self_destroy) { + _cleanup_(sd_event_source_unrefp) sd_event_source *s = NULL; + _cleanup_(sd_event_unrefp) sd_event *e = NULL; + char path[] = "/tmp/inotifyXXXXXX"; + _cleanup_close_ int fd = -EBADF; + + /* Tests that destroying an inotify event source from its own handler is safe */ + + assert_se(sd_event_default(&e) >= 0); + + fd = mkostemp_safe(path); + assert_se(fd >= 0); + assert_se(sd_event_add_inotify_fd(e, &s, fd, IN_ATTRIB, inotify_self_destroy_handler, &s) >= 0); + fd = safe_close(fd); + assert_se(unlink(path) >= 0); /* This will trigger IN_ATTRIB because link count goes to zero */ + assert_se(sd_event_loop(e) >= 0); +} + +struct inotify_process_buffered_data_context { + const char *path[2]; + unsigned i; +}; + +static int inotify_process_buffered_data_handler(sd_event_source *s, const struct inotify_event *ev, void *userdata) { + struct inotify_process_buffered_data_context *c = ASSERT_PTR(userdata); + const char *description; + + assert_se(sd_event_source_get_description(s, &description) >= 0); + + assert_se(c->i < 2); + assert_se(streq(c->path[c->i], description)); + c->i++; + + return 1; +} + +TEST(inotify_process_buffered_data) { + _cleanup_(rm_rf_physical_and_freep) char *p = NULL, *q = NULL; + _cleanup_(sd_event_source_unrefp) sd_event_source *a = NULL, *b = NULL; + _cleanup_(sd_event_unrefp) sd_event *e = NULL; + _cleanup_free_ char *z = NULL; + + /* For issue #23826 */ + + assert_se(sd_event_default(&e) >= 0); + + assert_se(mkdtemp_malloc("/tmp/test-inotify-XXXXXX", &p) >= 0); + assert_se(mkdtemp_malloc("/tmp/test-inotify-XXXXXX", &q) >= 0); + + struct inotify_process_buffered_data_context context = { + .path = { p, q }, + }; + + assert_se(sd_event_add_inotify(e, &a, p, IN_CREATE, inotify_process_buffered_data_handler, &context) >= 0); + assert_se(sd_event_add_inotify(e, &b, q, IN_CREATE, inotify_process_buffered_data_handler, &context) >= 0); + + assert_se(z = path_join(p, "aaa")); + assert_se(touch(z) >= 0); + z = mfree(z); + assert_se(z = path_join(q, "bbb")); + assert_se(touch(z) >= 0); + z = mfree(z); + + assert_se(sd_event_run(e, 10 * USEC_PER_SEC) > 0); + assert_se(sd_event_prepare(e) > 0); /* issue #23826: this was 0. */ + assert_se(sd_event_dispatch(e) > 0); + assert_se(sd_event_prepare(e) == 0); + assert_se(sd_event_wait(e, 0) == 0); +} + +TEST(fork) { + _cleanup_(sd_event_unrefp) sd_event *e = NULL; + int r; + + assert_se(sd_event_default(&e) >= 0); + assert_se(sd_event_prepare(e) == 0); + + /* Check that after a fork the cleanup functions return NULL */ + r = safe_fork("(bus-fork-test)", FORK_WAIT|FORK_LOG, NULL); + if (r == 0) { + assert_se(e); + assert_se(sd_event_ref(e) == NULL); + assert_se(sd_event_unref(e) == NULL); + _exit(EXIT_SUCCESS); + } + + assert_se(r >= 0); +} + +static int hup_callback(sd_event_source *s, int fd, uint32_t revents, void *userdata) { + unsigned *c = userdata; + + assert_se(revents == EPOLLHUP); + + (*c)++; + return 0; +} + +TEST(leave_ratelimit) { + bool expect_ratelimit = false, manually_left_ratelimit = false; + _cleanup_(sd_event_source_unrefp) sd_event_source *s = NULL; + _cleanup_(sd_event_unrefp) sd_event *e = NULL; + _cleanup_close_pair_ int pfd[2] = EBADF_PAIR; + unsigned c = 0; + int r; + + assert_se(sd_event_default(&e) >= 0); + + /* Create an event source that will continuously fire by creating a pipe whose write side is closed, + * and which hence will only see EOF and constant EPOLLHUP */ + assert_se(pipe2(pfd, O_CLOEXEC) >= 0); + assert_se(sd_event_add_io(e, &s, pfd[0], EPOLLIN, hup_callback, &c) >= 0); + assert_se(sd_event_source_set_io_fd_own(s, true) >= 0); + assert_se(sd_event_source_set_ratelimit(s, 5*USEC_PER_MINUTE, 5) >= 0); + + pfd[0] = -EBADF; + pfd[1] = safe_close(pfd[1]); /* Trigger continuous EOF */ + + for (;;) { + r = sd_event_prepare(e); + assert_se(r >= 0); + + if (r == 0) { + r = sd_event_wait(e, UINT64_MAX); + assert_se(r > 0); + } + + r = sd_event_dispatch(e); + assert_se(r > 0); + + r = sd_event_source_is_ratelimited(s); + assert_se(r >= 0); + + if (c < 5) + /* First four dispatches should just work */ + assert_se(!r); + else if (c == 5) { + /* The fifth dispatch should still work, but we now expect the ratelimit to be hit subsequently */ + if (!expect_ratelimit) { + assert_se(!r); + assert_se(sd_event_source_leave_ratelimit(s) == 0); /* this should be a NOP, and return 0 hence */ + expect_ratelimit = true; + } else { + /* We expected the ratelimit, let's leave it manually, and verify it */ + assert_se(r); + assert_se(sd_event_source_leave_ratelimit(s) > 0); /* we are ratelimited, hence should return > 0 */ + assert_se(sd_event_source_is_ratelimited(s) == 0); + + manually_left_ratelimit = true; + } + + } else if (c == 6) + /* On the sixth iteration let's just exit */ + break; + } + + /* Verify we definitely hit the ratelimit and left it manually again */ + assert_se(manually_left_ratelimit); +} + +DEFINE_TEST_MAIN(LOG_DEBUG); diff --git a/src/libsystemd/sd-hwdb/hwdb-internal.h b/src/libsystemd/sd-hwdb/hwdb-internal.h new file mode 100644 index 0000000..9db3b31 --- /dev/null +++ b/src/libsystemd/sd-hwdb/hwdb-internal.h @@ -0,0 +1,89 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include <stdint.h> +#include <sys/stat.h> + +#include "constants.h" +#include "hashmap.h" +#include "sparse-endian.h" + +#define HWDB_SIG { 'K', 'S', 'L', 'P', 'H', 'H', 'R', 'H' } + +struct sd_hwdb { + unsigned n_ref; + + FILE *f; + struct stat st; + union { + struct trie_header_f *head; + const char *map; + }; + + OrderedHashmap *properties; + Iterator properties_iterator; + bool properties_modified; +}; + +/* on-disk trie objects */ +struct trie_header_f { + uint8_t signature[8]; + + /* version of tool which created the file */ + le64_t tool_version; + le64_t file_size; + + /* size of structures to allow them to grow */ + le64_t header_size; + le64_t node_size; + le64_t child_entry_size; + le64_t value_entry_size; + + /* offset of the root trie node */ + le64_t nodes_root_off; + + /* size of the nodes and string section */ + le64_t nodes_len; + le64_t strings_len; +} _packed_; + +struct trie_node_f { + /* prefix of lookup string, shared by all children */ + le64_t prefix_off; + /* size of children entry array appended to the node */ + uint8_t children_count; + uint8_t padding[7]; + /* size of value entry array appended to the node */ + le64_t values_count; +} _packed_; + +/* array of child entries, follows directly the node record */ +struct trie_child_entry_f { + /* index of the child node */ + uint8_t c; + uint8_t padding[7]; + /* offset of the child node */ + le64_t child_off; +} _packed_; + +/* array of value entries, follows directly the node record/child array */ +struct trie_value_entry_f { + le64_t key_off; + le64_t value_off; +} _packed_; + +/* v2 extends v1 with filename and line-number */ +struct trie_value_entry2_f { + le64_t key_off; + le64_t value_off; + le64_t filename_off; + le32_t line_number; + le16_t file_priority; + le16_t padding; +} _packed_; + +#define hwdb_bin_paths \ + "/etc/systemd/hwdb/hwdb.bin\0" \ + "/etc/udev/hwdb.bin\0" \ + "/usr/lib/systemd/hwdb/hwdb.bin\0" \ + UDEVLIBEXECDIR "/hwdb.bin\0" diff --git a/src/libsystemd/sd-hwdb/sd-hwdb.c b/src/libsystemd/sd-hwdb/sd-hwdb.c new file mode 100644 index 0000000..f163314 --- /dev/null +++ b/src/libsystemd/sd-hwdb/sd-hwdb.c @@ -0,0 +1,436 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +/*** + Copyright © 2008 Alan Jenkins <alan.christopher.jenkins@googlemail.com> +***/ + +#include <errno.h> +#include <fnmatch.h> +#include <inttypes.h> +#include <stdio.h> +#include <stdlib.h> +#include <sys/mman.h> +#include <sys/stat.h> + +#include "sd-hwdb.h" + +#include "alloc-util.h" +#include "fd-util.h" +#include "fileio.h" +#include "hashmap.h" +#include "hwdb-internal.h" +#include "nulstr-util.h" +#include "string-util.h" +#include "time-util.h" + +struct linebuf { + char bytes[LINE_MAX]; + size_t size; + size_t len; +}; + +static void linebuf_init(struct linebuf *buf) { + buf->size = 0; + buf->len = 0; +} + +static const char *linebuf_get(struct linebuf *buf) { + if (buf->len + 1 >= sizeof(buf->bytes)) + return NULL; + buf->bytes[buf->len] = '\0'; + return buf->bytes; +} + +static bool linebuf_add(struct linebuf *buf, const char *s, size_t len) { + if (buf->len + len >= sizeof(buf->bytes)) + return false; + memcpy(buf->bytes + buf->len, s, len); + buf->len += len; + return true; +} + +static bool linebuf_add_char(struct linebuf *buf, char c) { + if (buf->len + 1 >= sizeof(buf->bytes)) + return false; + buf->bytes[buf->len++] = c; + return true; +} + +static void linebuf_rem(struct linebuf *buf, size_t count) { + assert(buf->len >= count); + buf->len -= count; +} + +static void linebuf_rem_char(struct linebuf *buf) { + linebuf_rem(buf, 1); +} + +static const struct trie_child_entry_f *trie_node_child(sd_hwdb *hwdb, const struct trie_node_f *node, size_t idx) { + const char *base = (const char *)node; + + base += le64toh(hwdb->head->node_size); + base += idx * le64toh(hwdb->head->child_entry_size); + return (const struct trie_child_entry_f *)base; +} + +static const struct trie_value_entry_f *trie_node_value(sd_hwdb *hwdb, const struct trie_node_f *node, size_t idx) { + const char *base = (const char *)node; + + base += le64toh(hwdb->head->node_size); + base += node->children_count * le64toh(hwdb->head->child_entry_size); + base += idx * le64toh(hwdb->head->value_entry_size); + return (const struct trie_value_entry_f *)base; +} + +static const struct trie_node_f *trie_node_from_off(sd_hwdb *hwdb, le64_t off) { + return (const struct trie_node_f *)(hwdb->map + le64toh(off)); +} + +static const char *trie_string(sd_hwdb *hwdb, le64_t off) { + return hwdb->map + le64toh(off); +} + +static int trie_children_cmp_f(const void *v1, const void *v2) { + const struct trie_child_entry_f *n1 = v1; + const struct trie_child_entry_f *n2 = v2; + + return n1->c - n2->c; +} + +static const struct trie_node_f *node_lookup_f(sd_hwdb *hwdb, const struct trie_node_f *node, uint8_t c) { + struct trie_child_entry_f *child; + struct trie_child_entry_f search; + + search.c = c; + child = bsearch(&search, (const char *)node + le64toh(hwdb->head->node_size), node->children_count, + le64toh(hwdb->head->child_entry_size), trie_children_cmp_f); + if (child) + return trie_node_from_off(hwdb, child->child_off); + return NULL; +} + +static int hwdb_add_property(sd_hwdb *hwdb, const struct trie_value_entry_f *entry) { + const char *key; + int r; + + assert(hwdb); + + key = trie_string(hwdb, entry->key_off); + + /* + * Silently ignore all properties which do not start with a + * space; future extensions might use additional prefixes. + */ + if (key[0] != ' ') + return 0; + + key++; + + if (le64toh(hwdb->head->value_entry_size) >= sizeof(struct trie_value_entry2_f)) { + const struct trie_value_entry2_f *old, *entry2; + + entry2 = (const struct trie_value_entry2_f *)entry; + old = ordered_hashmap_get(hwdb->properties, key); + if (old) { + /* On duplicates, we order by filename priority and line-number. + * + * v2 of the format had 64 bits for the line number. + * v3 reuses top 32 bits of line_number to store the priority. + * We check the top bits — if they are zero we have v2 format. + * This means that v2 clients will print wrong line numbers with + * v3 data. + * + * For v3 data: we compare the priority (of the source file) + * and the line number. + * + * For v2 data: we rely on the fact that the filenames in the hwdb + * are added in the order of priority (higher later), because they + * are *processed* in the order of priority. So we compare the + * indices to determine which file had higher priority. Comparing + * the strings alphabetically would be useless, because those are + * full paths, and e.g. /usr/lib would sort after /etc, even + * though it has lower priority. This is not reliable because of + * suffix compression, but should work for the most common case of + * /usr/lib/udev/hwbd.d and /etc/udev/hwdb.d, and is better than + * not doing the comparison at all. + */ + bool lower; + + if (entry2->file_priority == 0) + lower = entry2->filename_off < old->filename_off || + (entry2->filename_off == old->filename_off && entry2->line_number < old->line_number); + else + lower = entry2->file_priority < old->file_priority || + (entry2->file_priority == old->file_priority && entry2->line_number < old->line_number); + if (lower) + return 0; + } + } + + r = ordered_hashmap_ensure_allocated(&hwdb->properties, &string_hash_ops); + if (r < 0) + return r; + + r = ordered_hashmap_replace(hwdb->properties, key, (void *)entry); + if (r < 0) + return r; + + hwdb->properties_modified = true; + + return 0; +} + +static int trie_fnmatch_f(sd_hwdb *hwdb, const struct trie_node_f *node, size_t p, + struct linebuf *buf, const char *search) { + size_t len; + size_t i; + const char *prefix; + int err; + + prefix = trie_string(hwdb, node->prefix_off); + len = strlen(prefix + p); + linebuf_add(buf, prefix + p, len); + + for (i = 0; i < node->children_count; i++) { + const struct trie_child_entry_f *child = trie_node_child(hwdb, node, i); + + linebuf_add_char(buf, child->c); + err = trie_fnmatch_f(hwdb, trie_node_from_off(hwdb, child->child_off), 0, buf, search); + if (err < 0) + return err; + linebuf_rem_char(buf); + } + + if (le64toh(node->values_count) && fnmatch(linebuf_get(buf), search, 0) == 0) + for (i = 0; i < le64toh(node->values_count); i++) { + err = hwdb_add_property(hwdb, trie_node_value(hwdb, node, i)); + if (err < 0) + return err; + } + + linebuf_rem(buf, len); + return 0; +} + +static int trie_search_f(sd_hwdb *hwdb, const char *search) { + struct linebuf buf; + const struct trie_node_f *node; + size_t i = 0; + int err; + + linebuf_init(&buf); + + node = trie_node_from_off(hwdb, hwdb->head->nodes_root_off); + while (node) { + const struct trie_node_f *child; + size_t p = 0; + + if (node->prefix_off) { + char c; + + for (; (c = trie_string(hwdb, node->prefix_off)[p]); p++) { + if (IN_SET(c, '*', '?', '[')) + return trie_fnmatch_f(hwdb, node, p, &buf, search + i + p); + if (c != search[i + p]) + return 0; + } + i += p; + } + + child = node_lookup_f(hwdb, node, '*'); + if (child) { + linebuf_add_char(&buf, '*'); + err = trie_fnmatch_f(hwdb, child, 0, &buf, search + i); + if (err < 0) + return err; + linebuf_rem_char(&buf); + } + + child = node_lookup_f(hwdb, node, '?'); + if (child) { + linebuf_add_char(&buf, '?'); + err = trie_fnmatch_f(hwdb, child, 0, &buf, search + i); + if (err < 0) + return err; + linebuf_rem_char(&buf); + } + + child = node_lookup_f(hwdb, node, '['); + if (child) { + linebuf_add_char(&buf, '['); + err = trie_fnmatch_f(hwdb, child, 0, &buf, search + i); + if (err < 0) + return err; + linebuf_rem_char(&buf); + } + + if (search[i] == '\0') { + size_t n; + + for (n = 0; n < le64toh(node->values_count); n++) { + err = hwdb_add_property(hwdb, trie_node_value(hwdb, node, n)); + if (err < 0) + return err; + } + return 0; + } + + child = node_lookup_f(hwdb, node, search[i]); + node = child; + i++; + } + return 0; +} + +static int hwdb_new(const char *path, sd_hwdb **ret) { + _cleanup_(sd_hwdb_unrefp) sd_hwdb *hwdb = NULL; + const char sig[] = HWDB_SIG; + + assert_return(ret, -EINVAL); + + hwdb = new0(sd_hwdb, 1); + if (!hwdb) + return -ENOMEM; + + hwdb->n_ref = 1; + + /* Find hwdb.bin in the explicit path if provided, or iterate over hwdb_bin_paths otherwise */ + if (!isempty(path)) { + log_debug("Trying to open \"%s\"...", path); + hwdb->f = fopen(path, "re"); + if (!hwdb->f) + return log_debug_errno(errno, "Failed to open %s: %m", path); + } else { + NULSTR_FOREACH(p, hwdb_bin_paths) { + log_debug("Trying to open \"%s\"...", p); + hwdb->f = fopen(p, "re"); + if (hwdb->f) { + path = p; + break; + } + if (errno != ENOENT) + return log_debug_errno(errno, "Failed to open %s: %m", p); + } + + if (!hwdb->f) + return log_debug_errno(SYNTHETIC_ERRNO(ENOENT), + "hwdb.bin does not exist, please run 'systemd-hwdb update'"); + } + + if (fstat(fileno(hwdb->f), &hwdb->st) < 0) + return log_debug_errno(errno, "Failed to stat %s: %m", path); + if (hwdb->st.st_size < (off_t) offsetof(struct trie_header_f, strings_len) + 8) + return log_debug_errno(SYNTHETIC_ERRNO(EIO), "File %s is too short: %m", path); + if (file_offset_beyond_memory_size(hwdb->st.st_size)) + return log_debug_errno(SYNTHETIC_ERRNO(EFBIG), "File %s is too long: %m", path); + + hwdb->map = mmap(0, hwdb->st.st_size, PROT_READ, MAP_SHARED, fileno(hwdb->f), 0); + if (hwdb->map == MAP_FAILED) + return log_debug_errno(errno, "Failed to map %s: %m", path); + + if (memcmp(hwdb->map, sig, sizeof(hwdb->head->signature)) != 0 || + (size_t) hwdb->st.st_size != le64toh(hwdb->head->file_size)) + return log_debug_errno(SYNTHETIC_ERRNO(EINVAL), + "Failed to recognize the format of %s", path); + + log_debug("=== trie on-disk ==="); + log_debug("tool version: %"PRIu64, le64toh(hwdb->head->tool_version)); + log_debug("file size: %8"PRIi64" bytes", hwdb->st.st_size); + log_debug("header size %8"PRIu64" bytes", le64toh(hwdb->head->header_size)); + log_debug("strings %8"PRIu64" bytes", le64toh(hwdb->head->strings_len)); + log_debug("nodes %8"PRIu64" bytes", le64toh(hwdb->head->nodes_len)); + + *ret = TAKE_PTR(hwdb); + + return 0; +} + +_public_ int sd_hwdb_new_from_path(const char *path, sd_hwdb **ret) { + assert_return(!isempty(path), -EINVAL); + + return hwdb_new(path, ret); +} + +_public_ int sd_hwdb_new(sd_hwdb **ret) { + return hwdb_new(NULL, ret); +} + +static sd_hwdb *hwdb_free(sd_hwdb *hwdb) { + assert(hwdb); + + if (hwdb->map) + munmap((void *)hwdb->map, hwdb->st.st_size); + safe_fclose(hwdb->f); + ordered_hashmap_free(hwdb->properties); + return mfree(hwdb); +} + +DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(sd_hwdb, sd_hwdb, hwdb_free) + +static int properties_prepare(sd_hwdb *hwdb, const char *modalias) { + assert(hwdb); + assert(modalias); + + ordered_hashmap_clear(hwdb->properties); + hwdb->properties_modified = true; + + return trie_search_f(hwdb, modalias); +} + +_public_ int sd_hwdb_get(sd_hwdb *hwdb, const char *modalias, const char *key, const char **_value) { + const struct trie_value_entry_f *entry; + int r; + + assert_return(hwdb, -EINVAL); + assert_return(hwdb->f, -EINVAL); + assert_return(modalias, -EINVAL); + assert_return(_value, -EINVAL); + + r = properties_prepare(hwdb, modalias); + if (r < 0) + return r; + + entry = ordered_hashmap_get(hwdb->properties, key); + if (!entry) + return -ENOENT; + + *_value = trie_string(hwdb, entry->value_off); + + return 0; +} + +_public_ int sd_hwdb_seek(sd_hwdb *hwdb, const char *modalias) { + int r; + + assert_return(hwdb, -EINVAL); + assert_return(hwdb->f, -EINVAL); + assert_return(modalias, -EINVAL); + + r = properties_prepare(hwdb, modalias); + if (r < 0) + return r; + + hwdb->properties_modified = false; + hwdb->properties_iterator = ITERATOR_FIRST; + + return 0; +} + +_public_ int sd_hwdb_enumerate(sd_hwdb *hwdb, const char **key, const char **value) { + const struct trie_value_entry_f *entry; + const void *k; + + assert_return(hwdb, -EINVAL); + assert_return(key, -EINVAL); + assert_return(value, -EINVAL); + + if (hwdb->properties_modified) + return -EAGAIN; + + if (!ordered_hashmap_iterate(hwdb->properties, &hwdb->properties_iterator, (void **)&entry, &k)) + return 0; + + *key = k; + *value = trie_string(hwdb, entry->value_off); + + return 1; +} diff --git a/src/libsystemd/sd-id128/id128-util.c b/src/libsystemd/sd-id128/id128-util.c new file mode 100644 index 0000000..94bfd70 --- /dev/null +++ b/src/libsystemd/sd-id128/id128-util.c @@ -0,0 +1,265 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <errno.h> +#include <fcntl.h> +#include <unistd.h> + +#include "fd-util.h" +#include "fs-util.h" +#include "hexdecoct.h" +#include "id128-util.h" +#include "io-util.h" +#include "sha256.h" +#include "stdio-util.h" +#include "string-util.h" +#include "sync-util.h" +#include "virt.h" + +int id128_from_string_nonzero(const char *s, sd_id128_t *ret) { + sd_id128_t t; + int r; + + assert(ret); + + r = sd_id128_from_string(ASSERT_PTR(s), &t); + if (r < 0) + return r; + + if (sd_id128_is_null(t)) + return -ENXIO; + + *ret = t; + return 0; +} + +bool id128_is_valid(const char *s) { + size_t l; + + assert(s); + + l = strlen(s); + + if (l == SD_ID128_STRING_MAX - 1) + /* Plain formatted 128-bit hex string */ + return in_charset(s, HEXDIGITS); + + if (l == SD_ID128_UUID_STRING_MAX - 1) { + /* Formatted UUID */ + for (size_t i = 0; i < l; i++) { + char c = s[i]; + + if (IN_SET(i, 8, 13, 18, 23)) { + if (c != '-') + return false; + } else if (!ascii_ishex(c)) + return false; + } + return true; + } + + return false; +} + +int id128_read_fd(int fd, Id128Flag f, sd_id128_t *ret) { + char buffer[SD_ID128_UUID_STRING_MAX + 1]; /* +1 is for trailing newline */ + sd_id128_t id; + ssize_t l; + int r; + + assert(fd >= 0); + + /* Reads an 128-bit ID from a file, which may either be in plain format (32 hex digits), or in UUID format, both + * optionally followed by a newline and nothing else. ID files should really be newline terminated, but if they + * aren't that's OK too, following the rule of "Be conservative in what you send, be liberal in what you + * accept". + * + * This returns the following: + * -ENOMEDIUM: an empty string, + * -ENOPKG: "uninitialized" or "uninitialized\n", + * -EUCLEAN: other invalid strings. */ + + l = loop_read(fd, buffer, sizeof(buffer), false); /* we expect a short read of either 32/33 or 36/37 chars */ + if (l < 0) + return (int) l; + if (l == 0) /* empty? */ + return -ENOMEDIUM; + + switch (l) { + + case STRLEN("uninitialized"): + case STRLEN("uninitialized\n"): + return strneq(buffer, "uninitialized\n", l) ? -ENOPKG : -EINVAL; + + case SD_ID128_STRING_MAX: /* plain UUID with trailing newline */ + if (buffer[SD_ID128_STRING_MAX-1] != '\n') + return -EUCLEAN; + + _fallthrough_; + case SD_ID128_STRING_MAX-1: /* plain UUID without trailing newline */ + if (!FLAGS_SET(f, ID128_FORMAT_PLAIN)) + return -EUCLEAN; + + buffer[SD_ID128_STRING_MAX-1] = 0; + break; + + case SD_ID128_UUID_STRING_MAX: /* RFC UUID with trailing newline */ + if (buffer[SD_ID128_UUID_STRING_MAX-1] != '\n') + return -EUCLEAN; + + _fallthrough_; + case SD_ID128_UUID_STRING_MAX-1: /* RFC UUID without trailing newline */ + if (!FLAGS_SET(f, ID128_FORMAT_UUID)) + return -EUCLEAN; + + buffer[SD_ID128_UUID_STRING_MAX-1] = 0; + break; + + default: + return -EUCLEAN; + } + + r = sd_id128_from_string(buffer, &id); + if (r == -EINVAL) + return -EUCLEAN; + if (r < 0) + return r; + + if (FLAGS_SET(f, ID128_REFUSE_NULL) && sd_id128_is_null(id)) + return -ENOMEDIUM; + + if (ret) + *ret = id; + return 0; +} + +int id128_read_at(int dir_fd, const char *path, Id128Flag f, sd_id128_t *ret) { + _cleanup_close_ int fd = -EBADF; + + assert(dir_fd >= 0 || dir_fd == AT_FDCWD); + assert(path); + + fd = xopenat(dir_fd, path, O_RDONLY|O_CLOEXEC|O_NOCTTY, /* xopen_flags = */ 0, /* mode = */ 0); + if (fd < 0) + return fd; + + return id128_read_fd(fd, f, ret); +} + +int id128_write_fd(int fd, Id128Flag f, sd_id128_t id) { + char buffer[SD_ID128_UUID_STRING_MAX + 1]; /* +1 is for trailing newline */ + size_t sz; + int r; + + assert(fd >= 0); + assert(IN_SET((f & ID128_FORMAT_ANY), ID128_FORMAT_PLAIN, ID128_FORMAT_UUID)); + + if (FLAGS_SET(f, ID128_REFUSE_NULL) && sd_id128_is_null(id)) + return -ENOMEDIUM; + + if (FLAGS_SET(f, ID128_FORMAT_PLAIN)) { + assert_se(sd_id128_to_string(id, buffer)); + sz = SD_ID128_STRING_MAX; + } else { + assert_se(sd_id128_to_uuid_string(id, buffer)); + sz = SD_ID128_UUID_STRING_MAX; + } + + buffer[sz - 1] = '\n'; + r = loop_write(fd, buffer, sz); + if (r < 0) + return r; + + if (FLAGS_SET(f, ID128_SYNC_ON_WRITE)) { + r = fsync_full(fd); + if (r < 0) + return r; + } + + return 0; +} + +int id128_write_at(int dir_fd, const char *path, Id128Flag f, sd_id128_t id) { + _cleanup_close_ int fd = -EBADF; + + assert(dir_fd >= 0 || dir_fd == AT_FDCWD); + assert(path); + + fd = xopenat(dir_fd, path, O_WRONLY|O_CREAT|O_CLOEXEC|O_NOCTTY|O_TRUNC, /* xopen_flags = */ 0, 0444); + if (fd < 0) + return fd; + + return id128_write_fd(fd, f, id); +} + +void id128_hash_func(const sd_id128_t *p, struct siphash *state) { + siphash24_compress(p, sizeof(sd_id128_t), state); +} + +int id128_compare_func(const sd_id128_t *a, const sd_id128_t *b) { + return memcmp(a, b, 16); +} + +sd_id128_t id128_make_v4_uuid(sd_id128_t id) { + /* Stolen from generate_random_uuid() of drivers/char/random.c + * in the kernel sources */ + + /* Set UUID version to 4 --- truly random generation */ + id.bytes[6] = (id.bytes[6] & 0x0F) | 0x40; + + /* Set the UUID variant to DCE */ + id.bytes[8] = (id.bytes[8] & 0x3F) | 0x80; + + return id; +} + +DEFINE_HASH_OPS(id128_hash_ops, sd_id128_t, id128_hash_func, id128_compare_func); +DEFINE_HASH_OPS_WITH_KEY_DESTRUCTOR(id128_hash_ops_free, sd_id128_t, id128_hash_func, id128_compare_func, free); + +int id128_get_product(sd_id128_t *ret) { + sd_id128_t uuid; + int r; + + assert(ret); + + /* Reads the systems product UUID from DMI or devicetree (where it is located on POWER). This is + * particularly relevant in VM environments, where VM managers typically place a VM uuid there. */ + + r = detect_container(); + if (r < 0) + return r; + if (r > 0) /* Refuse returning this in containers, as this is not a property of our system then, but + * of the host */ + return -ENOENT; + + r = id128_read("/sys/class/dmi/id/product_uuid", ID128_FORMAT_UUID, &uuid); + if (r == -ENOENT) + r = id128_read("/proc/device-tree/vm,uuid", ID128_FORMAT_UUID, &uuid); + if (r == -ENOENT) + r = id128_read("/sys/hypervisor/uuid", ID128_FORMAT_UUID, &uuid); + if (r < 0) + return r; + + if (sd_id128_is_null(uuid) || sd_id128_is_allf(uuid)) + return -EADDRNOTAVAIL; /* Recognizable error */ + + *ret = uuid; + return 0; +} + +sd_id128_t id128_digest(const void *data, size_t size) { + assert(data || size == 0); + + /* Hashes a UUID from some arbitrary data */ + + if (size == SIZE_MAX) + size = strlen(data); + + uint8_t h[SHA256_DIGEST_SIZE]; + sd_id128_t id; + + /* Take the first half of the SHA256 result */ + assert_cc(sizeof(h) >= sizeof(id.bytes)); + memcpy(id.bytes, sha256_direct(data, size, h), sizeof(id.bytes)); + + return id128_make_v4_uuid(id); +} diff --git a/src/libsystemd/sd-id128/id128-util.h b/src/libsystemd/sd-id128/id128-util.h new file mode 100644 index 0000000..53ba50a --- /dev/null +++ b/src/libsystemd/sd-id128/id128-util.h @@ -0,0 +1,58 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include <fcntl.h> +#include <stdbool.h> + +#include "sd-id128.h" + +#include "errno-util.h" +#include "hash-funcs.h" +#include "macro.h" + +bool id128_is_valid(const char *s) _pure_; + +typedef enum Id128Flag { + ID128_FORMAT_PLAIN = 1 << 0, /* formatted as 32 hex chars as-is */ + ID128_FORMAT_UUID = 1 << 1, /* formatted as 36 character uuid string */ + ID128_FORMAT_ANY = ID128_FORMAT_PLAIN | ID128_FORMAT_UUID, + + ID128_SYNC_ON_WRITE = 1 << 2, /* Sync the file after write. Used only when writing an ID. */ + ID128_REFUSE_NULL = 1 << 3, /* Refuse all zero ID with -ENOMEDIUM. */ +} Id128Flag; + +int id128_from_string_nonzero(const char *s, sd_id128_t *ret); + +int id128_read_fd(int fd, Id128Flag f, sd_id128_t *ret); +int id128_read_at(int dir_fd, const char *path, Id128Flag f, sd_id128_t *ret); +static inline int id128_read(const char *path, Id128Flag f, sd_id128_t *ret) { + return id128_read_at(AT_FDCWD, path, f, ret); +} + +int id128_write_fd(int fd, Id128Flag f, sd_id128_t id); +int id128_write_at(int dir_fd, const char *path, Id128Flag f, sd_id128_t id); +static inline int id128_write(const char *path, Id128Flag f, sd_id128_t id) { + return id128_write_at(AT_FDCWD, path, f, id); +} + +int id128_get_machine(const char *root, sd_id128_t *ret); +int id128_get_machine_at(int rfd, sd_id128_t *ret); + +void id128_hash_func(const sd_id128_t *p, struct siphash *state); +int id128_compare_func(const sd_id128_t *a, const sd_id128_t *b) _pure_; +extern const struct hash_ops id128_hash_ops; +extern const struct hash_ops id128_hash_ops_free; + +sd_id128_t id128_make_v4_uuid(sd_id128_t id); + +int id128_get_product(sd_id128_t *ret); + +sd_id128_t id128_digest(const void *data, size_t size); + +/* A helper to check for the three relevant cases of "machine ID not initialized" */ +#define ERRNO_IS_NEG_MACHINE_ID_UNSET(r) \ + IN_SET(r, \ + -ENOENT, \ + -ENOMEDIUM, \ + -ENOPKG) +_DEFINE_ABS_WRAPPER(MACHINE_ID_UNSET); diff --git a/src/libsystemd/sd-id128/sd-id128.c b/src/libsystemd/sd-id128/sd-id128.c new file mode 100644 index 0000000..9fda79a --- /dev/null +++ b/src/libsystemd/sd-id128/sd-id128.c @@ -0,0 +1,382 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <errno.h> +#include <fcntl.h> +#include <unistd.h> + +#include "sd-id128.h" + +#include "alloc-util.h" +#include "chase.h" +#include "fd-util.h" +#include "hexdecoct.h" +#include "hmac.h" +#include "id128-util.h" +#include "io-util.h" +#include "macro.h" +#include "missing_syscall.h" +#include "missing_threads.h" +#include "path-util.h" +#include "random-util.h" +#include "stat-util.h" +#include "user-util.h" + +_public_ char *sd_id128_to_string(sd_id128_t id, char s[_SD_ARRAY_STATIC SD_ID128_STRING_MAX]) { + size_t k = 0; + + assert_return(s, NULL); + + for (size_t n = 0; n < sizeof(sd_id128_t); n++) { + s[k++] = hexchar(id.bytes[n] >> 4); + s[k++] = hexchar(id.bytes[n] & 0xF); + } + + assert(k == SD_ID128_STRING_MAX - 1); + s[k] = 0; + + return s; +} + +_public_ char *sd_id128_to_uuid_string(sd_id128_t id, char s[_SD_ARRAY_STATIC SD_ID128_UUID_STRING_MAX]) { + size_t k = 0; + + assert_return(s, NULL); + + /* Similar to sd_id128_to_string() but formats the result as UUID instead of plain hex chars */ + + for (size_t n = 0; n < sizeof(sd_id128_t); n++) { + + if (IN_SET(n, 4, 6, 8, 10)) + s[k++] = '-'; + + s[k++] = hexchar(id.bytes[n] >> 4); + s[k++] = hexchar(id.bytes[n] & 0xF); + } + + assert(k == SD_ID128_UUID_STRING_MAX - 1); + s[k] = 0; + + return s; +} + +_public_ int sd_id128_from_string(const char *s, sd_id128_t *ret) { + size_t n, i; + sd_id128_t t; + bool is_guid = false; + + assert_return(s, -EINVAL); + + for (n = 0, i = 0; n < sizeof(sd_id128_t);) { + int a, b; + + if (s[i] == '-') { + /* Is this a GUID? Then be nice, and skip over + * the dashes */ + + if (i == 8) + is_guid = true; + else if (IN_SET(i, 13, 18, 23)) { + if (!is_guid) + return -EINVAL; + } else + return -EINVAL; + + i++; + continue; + } + + a = unhexchar(s[i++]); + if (a < 0) + return -EINVAL; + + b = unhexchar(s[i++]); + if (b < 0) + return -EINVAL; + + t.bytes[n++] = (a << 4) | b; + } + + if (i != (is_guid ? SD_ID128_UUID_STRING_MAX : SD_ID128_STRING_MAX) - 1) + return -EINVAL; + + if (s[i] != 0) + return -EINVAL; + + if (ret) + *ret = t; + return 0; +} + +_public_ int sd_id128_string_equal(const char *s, sd_id128_t id) { + sd_id128_t parsed; + int r; + + if (!s) + return false; + + /* Checks if the specified string matches a valid string representation of the specified 128 bit ID/uuid */ + + r = sd_id128_from_string(s, &parsed); + if (r < 0) + return r; + + return sd_id128_equal(parsed, id); +} + +_public_ int sd_id128_get_machine(sd_id128_t *ret) { + static thread_local sd_id128_t saved_machine_id = {}; + int r; + + if (sd_id128_is_null(saved_machine_id)) { + r = id128_read("/etc/machine-id", ID128_FORMAT_PLAIN | ID128_REFUSE_NULL, &saved_machine_id); + if (r < 0) + return r; + } + + if (ret) + *ret = saved_machine_id; + return 0; +} + +int id128_get_machine_at(int rfd, sd_id128_t *ret) { + _cleanup_close_ int fd = -EBADF; + int r; + + assert(rfd >= 0 || rfd == AT_FDCWD); + + r = dir_fd_is_root_or_cwd(rfd); + if (r < 0) + return r; + if (r > 0) + return sd_id128_get_machine(ret); + + fd = chase_and_openat(rfd, "/etc/machine-id", CHASE_AT_RESOLVE_IN_ROOT, O_RDONLY|O_CLOEXEC|O_NOCTTY, NULL); + if (fd < 0) + return fd; + + return id128_read_fd(fd, ID128_FORMAT_PLAIN | ID128_REFUSE_NULL, ret); +} + +int id128_get_machine(const char *root, sd_id128_t *ret) { + _cleanup_close_ int fd = -EBADF; + + if (empty_or_root(root)) + return sd_id128_get_machine(ret); + + fd = chase_and_open("/etc/machine-id", root, CHASE_PREFIX_ROOT, O_RDONLY|O_CLOEXEC|O_NOCTTY, NULL); + if (fd < 0) + return fd; + + return id128_read_fd(fd, ID128_FORMAT_PLAIN | ID128_REFUSE_NULL, ret); +} + +_public_ int sd_id128_get_boot(sd_id128_t *ret) { + static thread_local sd_id128_t saved_boot_id = {}; + int r; + + if (sd_id128_is_null(saved_boot_id)) { + r = id128_read("/proc/sys/kernel/random/boot_id", ID128_FORMAT_UUID | ID128_REFUSE_NULL, &saved_boot_id); + if (r == -ENOENT && proc_mounted() == 0) + return -ENOSYS; + if (r < 0) + return r; + } + + if (ret) + *ret = saved_boot_id; + return 0; +} + +static int get_invocation_from_keyring(sd_id128_t *ret) { + _cleanup_free_ char *description = NULL; + char *d, *p, *g, *u, *e; + unsigned long perms; + key_serial_t key; + size_t sz = 256; + uid_t uid; + gid_t gid; + int r, c; + +#define MAX_PERMS ((unsigned long) (KEY_POS_VIEW|KEY_POS_READ|KEY_POS_SEARCH| \ + KEY_USR_VIEW|KEY_USR_READ|KEY_USR_SEARCH)) + + assert(ret); + + key = request_key("user", "invocation_id", NULL, 0); + if (key == -1) { + /* Keyring support not available? No invocation key stored? */ + if (IN_SET(errno, ENOSYS, ENOKEY)) + return -ENXIO; + + return -errno; + } + + for (;;) { + description = new(char, sz); + if (!description) + return -ENOMEM; + + c = keyctl(KEYCTL_DESCRIBE, key, (unsigned long) description, sz, 0); + if (c < 0) + return -errno; + + if ((size_t) c <= sz) + break; + + sz = c; + free(description); + } + + /* The kernel returns a final NUL in the string, verify that. */ + assert(description[c-1] == 0); + + /* Chop off the final description string */ + d = strrchr(description, ';'); + if (!d) + return -EUCLEAN; + *d = 0; + + /* Look for the permissions */ + p = strrchr(description, ';'); + if (!p) + return -EUCLEAN; + + errno = 0; + perms = strtoul(p + 1, &e, 16); + if (errno > 0) + return -errno; + if (e == p + 1) /* Read at least one character */ + return -EUCLEAN; + if (e != d) /* Must reached the end */ + return -EUCLEAN; + + if ((perms & ~MAX_PERMS) != 0) + return -EPERM; + + *p = 0; + + /* Look for the group ID */ + g = strrchr(description, ';'); + if (!g) + return -EUCLEAN; + r = parse_gid(g + 1, &gid); + if (r < 0) + return r; + if (gid != 0) + return -EPERM; + *g = 0; + + /* Look for the user ID */ + u = strrchr(description, ';'); + if (!u) + return -EUCLEAN; + r = parse_uid(u + 1, &uid); + if (r < 0) + return r; + if (uid != 0) + return -EPERM; + + c = keyctl(KEYCTL_READ, key, (unsigned long) ret, sizeof(sd_id128_t), 0); + if (c < 0) + return -errno; + if (c != sizeof(sd_id128_t)) + return -EUCLEAN; + + return 0; +} + +static int get_invocation_from_environment(sd_id128_t *ret) { + const char *e; + int r; + + assert(ret); + + e = secure_getenv("INVOCATION_ID"); + if (!e) + return -ENXIO; + + r = sd_id128_from_string(e, ret); + return r == -EINVAL ? -EUCLEAN : r; +} + +_public_ int sd_id128_get_invocation(sd_id128_t *ret) { + static thread_local sd_id128_t saved_invocation_id = {}; + int r; + + if (sd_id128_is_null(saved_invocation_id)) { + /* We first check the environment. The environment variable is primarily relevant for user + * services, and sufficiently safe as long as no privilege boundary is involved. */ + r = get_invocation_from_environment(&saved_invocation_id); + if (r == -ENXIO) + /* The kernel keyring is relevant for system services (as for user services we don't + * store the invocation ID in the keyring, as there'd be no trust benefit in that). */ + r = get_invocation_from_keyring(&saved_invocation_id); + if (r < 0) + return r; + + if (sd_id128_is_null(saved_invocation_id)) + return -ENOMEDIUM; + } + + if (ret) + *ret = saved_invocation_id; + return 0; +} + +_public_ int sd_id128_randomize(sd_id128_t *ret) { + sd_id128_t t; + + assert_return(ret, -EINVAL); + + random_bytes(&t, sizeof(t)); + + /* Turn this into a valid v4 UUID, to be nice. Note that we + * only guarantee this for newly generated UUIDs, not for + * pre-existing ones. */ + + *ret = id128_make_v4_uuid(t); + return 0; +} + +_public_ int sd_id128_get_app_specific(sd_id128_t base, sd_id128_t app_id, sd_id128_t *ret) { + assert_cc(sizeof(sd_id128_t) < SHA256_DIGEST_SIZE); /* Check that we don't need to pad with zeros. */ + union { + uint8_t hmac[SHA256_DIGEST_SIZE]; + sd_id128_t result; + } buf; + + assert_return(ret, -EINVAL); + assert_return(!sd_id128_is_null(app_id), -ENXIO); + + hmac_sha256(&base, sizeof(base), &app_id, sizeof(app_id), buf.hmac); + + /* Take only the first half. */ + *ret = id128_make_v4_uuid(buf.result); + return 0; +} + +_public_ int sd_id128_get_machine_app_specific(sd_id128_t app_id, sd_id128_t *ret) { + sd_id128_t id; + int r; + + assert_return(ret, -EINVAL); + + r = sd_id128_get_machine(&id); + if (r < 0) + return r; + + return sd_id128_get_app_specific(id, app_id, ret); +} + +_public_ int sd_id128_get_boot_app_specific(sd_id128_t app_id, sd_id128_t *ret) { + sd_id128_t id; + int r; + + assert_return(ret, -EINVAL); + + r = sd_id128_get_boot(&id); + if (r < 0) + return r; + + return sd_id128_get_app_specific(id, app_id, ret); +} diff --git a/src/libsystemd/sd-journal/audit-type.c b/src/libsystemd/sd-journal/audit-type.c new file mode 100644 index 0000000..122cdf5 --- /dev/null +++ b/src/libsystemd/sd-journal/audit-type.c @@ -0,0 +1,6 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include "audit-type.h" +#include "missing_audit.h" + +#include "audit_type-to-name.h" diff --git a/src/libsystemd/sd-journal/audit-type.h b/src/libsystemd/sd-journal/audit-type.h new file mode 100644 index 0000000..f2c4898 --- /dev/null +++ b/src/libsystemd/sd-journal/audit-type.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include <stdio.h> + +#include "alloc-util.h" +#include "macro.h" + +const char *audit_type_to_string(int type); +int audit_type_from_string(const char *s); + +/* This is inspired by DNS TYPEnnn formatting */ +#define audit_type_name_alloca(type) \ + ({ \ + const char *_s_; \ + _s_ = audit_type_to_string(type); \ + if (!_s_) { \ + _s_ = newa(char, STRLEN("AUDIT") + DECIMAL_STR_MAX(int)); \ + sprintf((char*) _s_, "AUDIT%04i", type); \ + } \ + _s_; \ + }) diff --git a/src/libsystemd/sd-journal/audit_type-to-name.awk b/src/libsystemd/sd-journal/audit_type-to-name.awk new file mode 100644 index 0000000..a859c44 --- /dev/null +++ b/src/libsystemd/sd-journal/audit_type-to-name.awk @@ -0,0 +1,14 @@ +# SPDX-License-Identifier: LGPL-2.1-or-later + +BEGIN{ + print "const char *audit_type_to_string(int type) {" + print " switch (type) {" +} +{ + printf " case AUDIT_%s: return \"%s\";\n", $1, $1 +} +END{ + print " default: return NULL;" + print " }" + print "}" +} diff --git a/src/libsystemd/sd-journal/catalog.c b/src/libsystemd/sd-journal/catalog.c new file mode 100644 index 0000000..ae91534 --- /dev/null +++ b/src/libsystemd/sd-journal/catalog.c @@ -0,0 +1,743 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <errno.h> +#include <fcntl.h> +#include <locale.h> +#include <stdio.h> +#include <sys/mman.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <unistd.h> + +#include "sd-id128.h" + +#include "alloc-util.h" +#include "catalog.h" +#include "conf-files.h" +#include "fd-util.h" +#include "fileio.h" +#include "hashmap.h" +#include "log.h" +#include "memory-util.h" +#include "mkdir.h" +#include "path-util.h" +#include "siphash24.h" +#include "sort-util.h" +#include "sparse-endian.h" +#include "strbuf.h" +#include "string-util.h" +#include "strv.h" +#include "tmpfile-util.h" + +const char * const catalog_file_dirs[] = { + "/usr/local/lib/systemd/catalog/", + "/usr/lib/systemd/catalog/", + NULL +}; + +#define CATALOG_SIGNATURE { 'R', 'H', 'H', 'H', 'K', 'S', 'L', 'P' } + +typedef struct CatalogHeader { + uint8_t signature[8]; /* "RHHHKSLP" */ + le32_t compatible_flags; + le32_t incompatible_flags; + le64_t header_size; + le64_t n_items; + le64_t catalog_item_size; +} CatalogHeader; + +typedef struct CatalogItem { + sd_id128_t id; + char language[32]; /* One byte is used for termination, so the maximum allowed + * length of the string is actually 31 bytes. */ + le64_t offset; +} CatalogItem; + +static void catalog_hash_func(const CatalogItem *i, struct siphash *state) { + siphash24_compress(&i->id, sizeof(i->id), state); + siphash24_compress_string(i->language, state); +} + +static int catalog_compare_func(const CatalogItem *a, const CatalogItem *b) { + unsigned k; + int r; + + for (k = 0; k < ELEMENTSOF(b->id.bytes); k++) { + r = CMP(a->id.bytes[k], b->id.bytes[k]); + if (r != 0) + return r; + } + + return strcmp(a->language, b->language); +} + +DEFINE_HASH_OPS(catalog_hash_ops, CatalogItem, catalog_hash_func, catalog_compare_func); + +static bool next_header(const char **s) { + const char *e; + + e = strchr(*s, '\n'); + + /* Unexpected end */ + if (!e) + return false; + + /* End of headers */ + if (e == *s) + return false; + + *s = e + 1; + return true; +} + +static const char *skip_header(const char *s) { + while (next_header(&s)) + ; + return s; +} + +static char *combine_entries(const char *one, const char *two) { + const char *b1, *b2; + size_t l1, l2, n; + char *dest, *p; + + /* Find split point of headers to body */ + b1 = skip_header(one); + b2 = skip_header(two); + + l1 = strlen(one); + l2 = strlen(two); + dest = new(char, l1 + l2 + 1); + if (!dest) { + log_oom(); + return NULL; + } + + p = dest; + + /* Headers from @one */ + n = b1 - one; + p = mempcpy(p, one, n); + + /* Headers from @two, these will only be found if not present above */ + n = b2 - two; + p = mempcpy(p, two, n); + + /* Body from @one */ + n = l1 - (b1 - one); + if (n > 0) + p = mempcpy(p, b1, n); + /* Body from @two */ + else { + n = l2 - (b2 - two); + p = mempcpy(p, b2, n); + } + + assert(p - dest <= (ptrdiff_t)(l1 + l2)); + p[0] = '\0'; + return dest; +} + +static int finish_item( + OrderedHashmap *h, + sd_id128_t id, + const char *language, + char *payload, size_t payload_size) { + + _cleanup_free_ CatalogItem *i = NULL; + _cleanup_free_ char *combined = NULL; + char *prev; + int r; + + assert(h); + assert(payload); + assert(payload_size > 0); + + i = new0(CatalogItem, 1); + if (!i) + return log_oom(); + + i->id = id; + if (language) { + assert(strlen(language) > 1 && strlen(language) < 32); + strcpy(i->language, language); + } + + prev = ordered_hashmap_get(h, i); + if (prev) { + /* Already have such an item, combine them */ + combined = combine_entries(payload, prev); + if (!combined) + return log_oom(); + + r = ordered_hashmap_update(h, i, combined); + if (r < 0) + return log_error_errno(r, "Failed to update catalog item: %m"); + + TAKE_PTR(combined); + free(prev); + } else { + /* A new item */ + combined = memdup(payload, payload_size + 1); + if (!combined) + return log_oom(); + + r = ordered_hashmap_put(h, i, combined); + if (r < 0) + return log_error_errno(r, "Failed to insert catalog item: %m"); + + TAKE_PTR(i); + TAKE_PTR(combined); + } + + return 0; +} + +int catalog_file_lang(const char* filename, char **lang) { + char *beg, *end, *_lang; + + end = endswith(filename, ".catalog"); + if (!end) + return 0; + + beg = end - 1; + while (beg > filename && !IN_SET(*beg, '.', '/') && end - beg < 32) + beg--; + + if (*beg != '.' || end <= beg + 1) + return 0; + + _lang = strndup(beg + 1, end - beg - 1); + if (!_lang) + return -ENOMEM; + + *lang = _lang; + return 1; +} + +static int catalog_entry_lang( + const char* filename, + unsigned line, + const char* t, + const char* deflang, + char **ret) { + + size_t c; + char *z; + + c = strlen(t); + if (c < 2) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), + "[%s:%u] Language too short.", filename, line); + if (c > 31) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), + "[%s:%u] language too long.", filename, line); + + if (deflang) { + if (streq(t, deflang)) { + log_warning("[%s:%u] language specified unnecessarily", filename, line); + return 0; + } + + log_warning("[%s:%u] language differs from default for file", filename, line); + } + + z = strdup(t); + if (!z) + return -ENOMEM; + + *ret = z; + return 0; +} + +int catalog_import_file(OrderedHashmap *h, const char *path) { + _cleanup_fclose_ FILE *f = NULL; + _cleanup_free_ char *payload = NULL; + size_t payload_size = 0; + unsigned n = 0; + sd_id128_t id; + _cleanup_free_ char *deflang = NULL, *lang = NULL; + bool got_id = false, empty_line = true; + int r; + + assert(h); + assert(path); + + f = fopen(path, "re"); + if (!f) + return log_error_errno(errno, "Failed to open file %s: %m", path); + + r = catalog_file_lang(path, &deflang); + if (r < 0) + log_error_errno(r, "Failed to determine language for file %s: %m", path); + if (r == 1) + log_debug("File %s has language %s.", path, deflang); + + for (;;) { + _cleanup_free_ char *line = NULL; + size_t line_len; + + r = read_line(f, LONG_LINE_MAX, &line); + if (r < 0) + return log_error_errno(r, "Failed to read file %s: %m", path); + if (r == 0) + break; + + n++; + + if (isempty(line)) { + empty_line = true; + continue; + } + + if (strchr(COMMENTS, line[0])) + continue; + + if (empty_line && + strlen(line) >= 2+1+32 && + line[0] == '-' && + line[1] == '-' && + line[2] == ' ' && + IN_SET(line[2+1+32], ' ', '\0')) { + + bool with_language; + sd_id128_t jd; + + /* New entry */ + + with_language = line[2+1+32] != '\0'; + line[2+1+32] = '\0'; + + if (sd_id128_from_string(line + 2 + 1, &jd) >= 0) { + + if (got_id) { + if (payload_size == 0) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), + "[%s:%u] No payload text.", + path, + n); + + r = finish_item(h, id, lang ?: deflang, payload, payload_size); + if (r < 0) + return r; + + lang = mfree(lang); + payload_size = 0; + } + + if (with_language) { + char *t; + + t = strstrip(line + 2 + 1 + 32 + 1); + r = catalog_entry_lang(path, n, t, deflang, &lang); + if (r < 0) + return r; + } + + got_id = true; + empty_line = false; + id = jd; + + continue; + } + } + + /* Payload */ + if (!got_id) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), + "[%s:%u] Got payload before ID.", + path, n); + + line_len = strlen(line); + if (!GREEDY_REALLOC(payload, payload_size + (empty_line ? 1 : 0) + line_len + 1 + 1)) + return log_oom(); + + if (empty_line) + payload[payload_size++] = '\n'; + memcpy(payload + payload_size, line, line_len); + payload_size += line_len; + payload[payload_size++] = '\n'; + payload[payload_size] = '\0'; + + empty_line = false; + } + + if (got_id) { + if (payload_size == 0) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), + "[%s:%u] No payload text.", + path, n); + + r = finish_item(h, id, lang ?: deflang, payload, payload_size); + if (r < 0) + return r; + } + + return 0; +} + +static int64_t write_catalog( + const char *database, + struct strbuf *sb, + CatalogItem *items, + size_t n) { + + _cleanup_fclose_ FILE *w = NULL; + _cleanup_free_ char *p = NULL; + CatalogHeader header; + size_t k; + int r; + + r = mkdir_parents(database, 0755); + if (r < 0) + return log_error_errno(r, "Failed to create parent directories of %s: %m", database); + + r = fopen_temporary(database, &w, &p); + if (r < 0) + return log_error_errno(r, "Failed to open database for writing: %s: %m", + database); + + header = (CatalogHeader) { + .signature = CATALOG_SIGNATURE, + .header_size = htole64(CONST_ALIGN_TO(sizeof(CatalogHeader), 8)), + .catalog_item_size = htole64(sizeof(CatalogItem)), + .n_items = htole64(n), + }; + + r = -EIO; + + k = fwrite(&header, 1, sizeof(header), w); + if (k != sizeof(header)) { + log_error("%s: failed to write header.", p); + goto error; + } + + k = fwrite(items, 1, n * sizeof(CatalogItem), w); + if (k != n * sizeof(CatalogItem)) { + log_error("%s: failed to write database.", p); + goto error; + } + + k = fwrite(sb->buf, 1, sb->len, w); + if (k != sb->len) { + log_error("%s: failed to write strings.", p); + goto error; + } + + r = fflush_and_check(w); + if (r < 0) { + log_error_errno(r, "%s: failed to write database: %m", p); + goto error; + } + + (void) fchmod(fileno(w), 0644); + + if (rename(p, database) < 0) { + r = log_error_errno(errno, "rename (%s -> %s) failed: %m", p, database); + goto error; + } + + return ftello(w); + +error: + (void) unlink(p); + return r; +} + +int catalog_update(const char* database, const char* root, const char* const* dirs) { + _cleanup_strv_free_ char **files = NULL; + _cleanup_(strbuf_freep) struct strbuf *sb = NULL; + _cleanup_ordered_hashmap_free_free_free_ OrderedHashmap *h = NULL; + _cleanup_free_ CatalogItem *items = NULL; + ssize_t offset; + char *payload; + CatalogItem *i; + unsigned n; + int r; + int64_t sz; + + h = ordered_hashmap_new(&catalog_hash_ops); + sb = strbuf_new(); + if (!h || !sb) + return log_oom(); + + r = conf_files_list_strv(&files, ".catalog", root, 0, dirs); + if (r < 0) + return log_error_errno(r, "Failed to get catalog files: %m"); + + STRV_FOREACH(f, files) { + log_debug("Reading file '%s'", *f); + r = catalog_import_file(h, *f); + if (r < 0) + return log_error_errno(r, "Failed to import file '%s': %m", *f); + } + + if (ordered_hashmap_size(h) <= 0) { + log_info("No items in catalog."); + return 0; + } else + log_debug("Found %u items in catalog.", ordered_hashmap_size(h)); + + items = new(CatalogItem, ordered_hashmap_size(h)); + if (!items) + return log_oom(); + + n = 0; + ORDERED_HASHMAP_FOREACH_KEY(payload, i, h) { + log_trace("Found " SD_ID128_FORMAT_STR ", language %s", + SD_ID128_FORMAT_VAL(i->id), + isempty(i->language) ? "C" : i->language); + + offset = strbuf_add_string(sb, payload, strlen(payload)); + if (offset < 0) + return log_oom(); + + i->offset = htole64((uint64_t) offset); + items[n++] = *i; + } + + assert(n == ordered_hashmap_size(h)); + typesafe_qsort(items, n, catalog_compare_func); + + strbuf_complete(sb); + + sz = write_catalog(database, sb, items, n); + if (sz < 0) + return log_error_errno(sz, "Failed to write %s: %m", database); + + log_debug("%s: wrote %u items, with %zu bytes of strings, %"PRIi64" total size.", + database, n, sb->len, sz); + return 0; +} + +static int open_mmap(const char *database, int *_fd, struct stat *_st, void **_p) { + _cleanup_close_ int fd = -EBADF; + const CatalogHeader *h; + struct stat st; + void *p; + + assert(_fd); + assert(_st); + assert(_p); + + fd = open(database, O_RDONLY|O_CLOEXEC); + if (fd < 0) + return -errno; + + if (fstat(fd, &st) < 0) + return -errno; + + if (st.st_size < (off_t) sizeof(CatalogHeader) || file_offset_beyond_memory_size(st.st_size)) + return -EINVAL; + + p = mmap(NULL, st.st_size, PROT_READ, MAP_SHARED, fd, 0); + if (p == MAP_FAILED) + return -errno; + + h = p; + if (memcmp(h->signature, (const uint8_t[]) CATALOG_SIGNATURE, sizeof(h->signature)) != 0 || + le64toh(h->header_size) < sizeof(CatalogHeader) || + le64toh(h->catalog_item_size) < sizeof(CatalogItem) || + h->incompatible_flags != 0 || + le64toh(h->n_items) <= 0 || + st.st_size < (off_t) (le64toh(h->header_size) + le64toh(h->catalog_item_size) * le64toh(h->n_items))) { + munmap(p, st.st_size); + return -EBADMSG; + } + + *_fd = TAKE_FD(fd); + *_st = st; + *_p = p; + + return 0; +} + +static const char *find_id(void *p, sd_id128_t id) { + CatalogItem *f = NULL, key = { .id = id }; + const CatalogHeader *h = p; + const char *loc; + + loc = setlocale(LC_MESSAGES, NULL); + if (!isempty(loc) && !STR_IN_SET(loc, "C", "POSIX")) { + size_t len; + + len = strcspn(loc, ".@"); + if (len > sizeof(key.language) - 1) + log_debug("LC_MESSAGES value too long, ignoring: \"%.*s\"", (int) len, loc); + else { + strncpy(key.language, loc, len); + key.language[len] = '\0'; + + f = bsearch(&key, + (const uint8_t*) p + le64toh(h->header_size), + le64toh(h->n_items), + le64toh(h->catalog_item_size), + (comparison_fn_t) catalog_compare_func); + if (!f) { + char *e; + + e = strchr(key.language, '_'); + if (e) { + *e = 0; + f = bsearch(&key, + (const uint8_t*) p + le64toh(h->header_size), + le64toh(h->n_items), + le64toh(h->catalog_item_size), + (comparison_fn_t) catalog_compare_func); + } + } + } + } + + if (!f) { + zero(key.language); + f = bsearch(&key, + (const uint8_t*) p + le64toh(h->header_size), + le64toh(h->n_items), + le64toh(h->catalog_item_size), + (comparison_fn_t) catalog_compare_func); + } + + if (!f) + return NULL; + + return (const char*) p + + le64toh(h->header_size) + + le64toh(h->n_items) * le64toh(h->catalog_item_size) + + le64toh(f->offset); +} + +int catalog_get(const char* database, sd_id128_t id, char **_text) { + _cleanup_close_ int fd = -EBADF; + void *p = NULL; + struct stat st = {}; + char *text = NULL; + int r; + const char *s; + + assert(_text); + + r = open_mmap(database, &fd, &st, &p); + if (r < 0) + return r; + + s = find_id(p, id); + if (!s) { + r = -ENOENT; + goto finish; + } + + text = strdup(s); + if (!text) { + r = -ENOMEM; + goto finish; + } + + *_text = text; + r = 0; + +finish: + if (p) + munmap(p, st.st_size); + + return r; +} + +static char *find_header(const char *s, const char *header) { + + for (;;) { + const char *v; + + v = startswith(s, header); + if (v) { + v += strspn(v, WHITESPACE); + return strndup(v, strcspn(v, NEWLINE)); + } + + if (!next_header(&s)) + return NULL; + } +} + +static void dump_catalog_entry(FILE *f, sd_id128_t id, const char *s, bool oneline) { + if (oneline) { + _cleanup_free_ char *subject = NULL, *defined_by = NULL; + + subject = find_header(s, "Subject:"); + defined_by = find_header(s, "Defined-By:"); + + fprintf(f, SD_ID128_FORMAT_STR " %s: %s\n", + SD_ID128_FORMAT_VAL(id), + strna(defined_by), strna(subject)); + } else + fprintf(f, "-- " SD_ID128_FORMAT_STR "\n%s\n", + SD_ID128_FORMAT_VAL(id), s); +} + +int catalog_list(FILE *f, const char *database, bool oneline) { + _cleanup_close_ int fd = -EBADF; + void *p = NULL; + struct stat st; + const CatalogHeader *h; + const CatalogItem *items; + int r; + unsigned n; + sd_id128_t last_id; + bool last_id_set = false; + + r = open_mmap(database, &fd, &st, &p); + if (r < 0) + return r; + + h = p; + items = (const CatalogItem*) ((const uint8_t*) p + le64toh(h->header_size)); + + for (n = 0; n < le64toh(h->n_items); n++) { + const char *s; + + if (last_id_set && sd_id128_equal(last_id, items[n].id)) + continue; + + assert_se(s = find_id(p, items[n].id)); + + dump_catalog_entry(f, items[n].id, s, oneline); + + last_id_set = true; + last_id = items[n].id; + } + + munmap(p, st.st_size); + + return 0; +} + +int catalog_list_items(FILE *f, const char *database, bool oneline, char **items) { + int r = 0; + + STRV_FOREACH(item, items) { + sd_id128_t id; + int k; + _cleanup_free_ char *msg = NULL; + + k = sd_id128_from_string(*item, &id); + if (k < 0) { + log_error_errno(k, "Failed to parse id128 '%s': %m", *item); + if (r == 0) + r = k; + continue; + } + + k = catalog_get(database, id, &msg); + if (k < 0) { + log_full_errno(k == -ENOENT ? LOG_NOTICE : LOG_ERR, k, + "Failed to retrieve catalog entry for '%s': %m", *item); + if (r == 0) + r = k; + continue; + } + + dump_catalog_entry(f, id, msg, oneline); + } + + return r; +} diff --git a/src/libsystemd/sd-journal/catalog.h b/src/libsystemd/sd-journal/catalog.h new file mode 100644 index 0000000..df27869 --- /dev/null +++ b/src/libsystemd/sd-journal/catalog.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include <stdbool.h> +#include <stdio.h> + +#include "sd-id128.h" + +#include "hashmap.h" +#include "strbuf.h" + +int catalog_import_file(OrderedHashmap *h, const char *path); +int catalog_update(const char* database, const char* root, const char* const* dirs); +int catalog_get(const char* database, sd_id128_t id, char **data); +int catalog_list(FILE *f, const char* database, bool oneline); +int catalog_list_items(FILE *f, const char* database, bool oneline, char **items); +int catalog_file_lang(const char *filename, char **lang); +extern const char * const catalog_file_dirs[]; +extern const struct hash_ops catalog_hash_ops; diff --git a/src/libsystemd/sd-journal/fsprg.c b/src/libsystemd/sd-journal/fsprg.c new file mode 100644 index 0000000..e86be6a --- /dev/null +++ b/src/libsystemd/sd-journal/fsprg.c @@ -0,0 +1,381 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later + * + * fsprg v0.1 - (seekable) forward-secure pseudorandom generator + * Copyright © 2012 B. Poettering + * Contact: fsprg@point-at-infinity.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301 USA + */ + +/* + * See "Practical Secure Logging: Seekable Sequential Key Generators" + * by G. A. Marson, B. Poettering for details: + * + * http://eprint.iacr.org/2013/397 + */ + +#include <string.h> + +#include "fsprg.h" +#include "gcrypt-util.h" +#include "memory-util.h" + +#define ISVALID_SECPAR(secpar) (((secpar) % 16 == 0) && ((secpar) >= 16) && ((secpar) <= 16384)) +#define VALIDATE_SECPAR(secpar) assert(ISVALID_SECPAR(secpar)); + +#define RND_HASH GCRY_MD_SHA256 +#define RND_GEN_P 0x01 +#define RND_GEN_Q 0x02 +#define RND_GEN_X 0x03 + +#pragma GCC diagnostic ignored "-Wpointer-arith" +/* TODO: remove void* arithmetic and this work-around */ + +/******************************************************************************/ + +static void mpi_export(void *buf, size_t buflen, const gcry_mpi_t x) { + unsigned len; + size_t nwritten; + + assert(gcry_mpi_cmp_ui(x, 0) >= 0); + len = (gcry_mpi_get_nbits(x) + 7) / 8; + assert(len <= buflen); + memzero(buf, buflen); + gcry_mpi_print(GCRYMPI_FMT_USG, buf + (buflen - len), len, &nwritten, x); + assert(nwritten == len); +} + +static gcry_mpi_t mpi_import(const void *buf, size_t buflen) { + gcry_mpi_t h; + _unused_ unsigned len; + + assert_se(gcry_mpi_scan(&h, GCRYMPI_FMT_USG, buf, buflen, NULL) == 0); + len = (gcry_mpi_get_nbits(h) + 7) / 8; + assert(len <= buflen); + assert(gcry_mpi_cmp_ui(h, 0) >= 0); + + return h; +} + +static void uint64_export(void *buf, size_t buflen, uint64_t x) { + assert(buflen == 8); + ((uint8_t*) buf)[0] = (x >> 56) & 0xff; + ((uint8_t*) buf)[1] = (x >> 48) & 0xff; + ((uint8_t*) buf)[2] = (x >> 40) & 0xff; + ((uint8_t*) buf)[3] = (x >> 32) & 0xff; + ((uint8_t*) buf)[4] = (x >> 24) & 0xff; + ((uint8_t*) buf)[5] = (x >> 16) & 0xff; + ((uint8_t*) buf)[6] = (x >> 8) & 0xff; + ((uint8_t*) buf)[7] = (x >> 0) & 0xff; +} + +static uint64_t uint64_import(const void *buf, size_t buflen) { + assert(buflen == 8); + return + (uint64_t)(((uint8_t*) buf)[0]) << 56 | + (uint64_t)(((uint8_t*) buf)[1]) << 48 | + (uint64_t)(((uint8_t*) buf)[2]) << 40 | + (uint64_t)(((uint8_t*) buf)[3]) << 32 | + (uint64_t)(((uint8_t*) buf)[4]) << 24 | + (uint64_t)(((uint8_t*) buf)[5]) << 16 | + (uint64_t)(((uint8_t*) buf)[6]) << 8 | + (uint64_t)(((uint8_t*) buf)[7]) << 0; +} + +/* deterministically generate from seed/idx a string of buflen pseudorandom bytes */ +static void det_randomize(void *buf, size_t buflen, const void *seed, size_t seedlen, uint32_t idx) { + gcry_md_hd_t hd, hd2; + size_t olen, cpylen; + gcry_error_t err; + uint32_t ctr; + + olen = gcry_md_get_algo_dlen(RND_HASH); + err = gcry_md_open(&hd, RND_HASH, 0); + assert_se(gcry_err_code(err) == GPG_ERR_NO_ERROR); /* This shouldn't happen */ + gcry_md_write(hd, seed, seedlen); + gcry_md_putc(hd, (idx >> 24) & 0xff); + gcry_md_putc(hd, (idx >> 16) & 0xff); + gcry_md_putc(hd, (idx >> 8) & 0xff); + gcry_md_putc(hd, (idx >> 0) & 0xff); + + for (ctr = 0; buflen; ctr++) { + err = gcry_md_copy(&hd2, hd); + assert_se(gcry_err_code(err) == GPG_ERR_NO_ERROR); /* This shouldn't happen */ + gcry_md_putc(hd2, (ctr >> 24) & 0xff); + gcry_md_putc(hd2, (ctr >> 16) & 0xff); + gcry_md_putc(hd2, (ctr >> 8) & 0xff); + gcry_md_putc(hd2, (ctr >> 0) & 0xff); + gcry_md_final(hd2); + cpylen = (buflen < olen) ? buflen : olen; + memcpy(buf, gcry_md_read(hd2, RND_HASH), cpylen); + gcry_md_close(hd2); + buf += cpylen; + buflen -= cpylen; + } + gcry_md_close(hd); +} + +/* deterministically generate from seed/idx a prime of length `bits' that is 3 (mod 4) */ +static gcry_mpi_t genprime3mod4(int bits, const void *seed, size_t seedlen, uint32_t idx) { + size_t buflen = bits / 8; + uint8_t buf[buflen]; + gcry_mpi_t p; + + assert(bits % 8 == 0); + assert(buflen > 0); + + det_randomize(buf, buflen, seed, seedlen, idx); + buf[0] |= 0xc0; /* set upper two bits, so that n=pq has maximum size */ + buf[buflen - 1] |= 0x03; /* set lower two bits, to have result 3 (mod 4) */ + + p = mpi_import(buf, buflen); + while (gcry_prime_check(p, 0)) + gcry_mpi_add_ui(p, p, 4); + + return p; +} + +/* deterministically generate from seed/idx a quadratic residue (mod n) */ +static gcry_mpi_t gensquare(const gcry_mpi_t n, const void *seed, size_t seedlen, uint32_t idx, unsigned secpar) { + size_t buflen = secpar / 8; + uint8_t buf[buflen]; + gcry_mpi_t x; + + det_randomize(buf, buflen, seed, seedlen, idx); + buf[0] &= 0x7f; /* clear upper bit, so that we have x < n */ + x = mpi_import(buf, buflen); + assert(gcry_mpi_cmp(x, n) < 0); + gcry_mpi_mulm(x, x, x, n); + return x; +} + +/* compute 2^m (mod phi(p)), for a prime p */ +static gcry_mpi_t twopowmodphi(uint64_t m, const gcry_mpi_t p) { + gcry_mpi_t phi, r; + int n; + + phi = gcry_mpi_new(0); + gcry_mpi_sub_ui(phi, p, 1); + + /* count number of used bits in m */ + for (n = 0; (1ULL << n) <= m; n++) + ; + + r = gcry_mpi_new(0); + gcry_mpi_set_ui(r, 1); + while (n) { /* square and multiply algorithm for fast exponentiation */ + n--; + gcry_mpi_mulm(r, r, r, phi); + if (m & ((uint64_t)1 << n)) { + gcry_mpi_add(r, r, r); + if (gcry_mpi_cmp(r, phi) >= 0) + gcry_mpi_sub(r, r, phi); + } + } + + gcry_mpi_release(phi); + return r; +} + +/* Decompose $x \in Z_n$ into $(xp,xq) \in Z_p \times Z_q$ using Chinese Remainder Theorem */ +static void CRT_decompose(gcry_mpi_t *xp, gcry_mpi_t *xq, const gcry_mpi_t x, const gcry_mpi_t p, const gcry_mpi_t q) { + *xp = gcry_mpi_new(0); + *xq = gcry_mpi_new(0); + gcry_mpi_mod(*xp, x, p); + gcry_mpi_mod(*xq, x, q); +} + +/* Compose $(xp,xq) \in Z_p \times Z_q$ into $x \in Z_n$ using Chinese Remainder Theorem */ +static void CRT_compose(gcry_mpi_t *x, const gcry_mpi_t xp, const gcry_mpi_t xq, const gcry_mpi_t p, const gcry_mpi_t q) { + gcry_mpi_t a, u; + + a = gcry_mpi_new(0); + u = gcry_mpi_new(0); + *x = gcry_mpi_new(0); + gcry_mpi_subm(a, xq, xp, q); + gcry_mpi_invm(u, p, q); + gcry_mpi_mulm(a, a, u, q); /* a = (xq - xp) / p (mod q) */ + gcry_mpi_mul(*x, p, a); + gcry_mpi_add(*x, *x, xp); /* x = p * ((xq - xp) / p mod q) + xp */ + gcry_mpi_release(a); + gcry_mpi_release(u); +} + +/******************************************************************************/ + +size_t FSPRG_mskinbytes(unsigned _secpar) { + VALIDATE_SECPAR(_secpar); + return 2 + 2 * (_secpar / 2) / 8; /* to store header,p,q */ +} + +size_t FSPRG_mpkinbytes(unsigned _secpar) { + VALIDATE_SECPAR(_secpar); + return 2 + _secpar / 8; /* to store header,n */ +} + +size_t FSPRG_stateinbytes(unsigned _secpar) { + VALIDATE_SECPAR(_secpar); + return 2 + 2 * _secpar / 8 + 8; /* to store header,n,x,epoch */ +} + +static void store_secpar(void *buf, uint16_t secpar) { + secpar = secpar / 16 - 1; + ((uint8_t*) buf)[0] = (secpar >> 8) & 0xff; + ((uint8_t*) buf)[1] = (secpar >> 0) & 0xff; +} + +static uint16_t read_secpar(const void *buf) { + uint16_t secpar; + secpar = + (uint16_t)(((uint8_t*) buf)[0]) << 8 | + (uint16_t)(((uint8_t*) buf)[1]) << 0; + return 16 * (secpar + 1); +} + +void FSPRG_GenMK(void *msk, void *mpk, const void *seed, size_t seedlen, unsigned _secpar) { + uint8_t iseed[FSPRG_RECOMMENDED_SEEDLEN]; + gcry_mpi_t n, p, q; + uint16_t secpar; + + VALIDATE_SECPAR(_secpar); + secpar = _secpar; + + initialize_libgcrypt(false); + + if (!seed) { + gcry_randomize(iseed, FSPRG_RECOMMENDED_SEEDLEN, GCRY_STRONG_RANDOM); + seed = iseed; + seedlen = FSPRG_RECOMMENDED_SEEDLEN; + } + + p = genprime3mod4(secpar / 2, seed, seedlen, RND_GEN_P); + q = genprime3mod4(secpar / 2, seed, seedlen, RND_GEN_Q); + + if (msk) { + store_secpar(msk + 0, secpar); + mpi_export(msk + 2 + 0 * (secpar / 2) / 8, (secpar / 2) / 8, p); + mpi_export(msk + 2 + 1 * (secpar / 2) / 8, (secpar / 2) / 8, q); + } + + if (mpk) { + n = gcry_mpi_new(0); + gcry_mpi_mul(n, p, q); + assert(gcry_mpi_get_nbits(n) == secpar); + + store_secpar(mpk + 0, secpar); + mpi_export(mpk + 2, secpar / 8, n); + + gcry_mpi_release(n); + } + + gcry_mpi_release(p); + gcry_mpi_release(q); +} + +void FSPRG_GenState0(void *state, const void *mpk, const void *seed, size_t seedlen) { + gcry_mpi_t n, x; + uint16_t secpar; + + initialize_libgcrypt(false); + + secpar = read_secpar(mpk + 0); + n = mpi_import(mpk + 2, secpar / 8); + x = gensquare(n, seed, seedlen, RND_GEN_X, secpar); + + memcpy(state, mpk, 2 + secpar / 8); + mpi_export(state + 2 + 1 * secpar / 8, secpar / 8, x); + memzero(state + 2 + 2 * secpar / 8, 8); + + gcry_mpi_release(n); + gcry_mpi_release(x); +} + +void FSPRG_Evolve(void *state) { + gcry_mpi_t n, x; + uint16_t secpar; + uint64_t epoch; + + initialize_libgcrypt(false); + + secpar = read_secpar(state + 0); + n = mpi_import(state + 2 + 0 * secpar / 8, secpar / 8); + x = mpi_import(state + 2 + 1 * secpar / 8, secpar / 8); + epoch = uint64_import(state + 2 + 2 * secpar / 8, 8); + + gcry_mpi_mulm(x, x, x, n); + epoch++; + + mpi_export(state + 2 + 1 * secpar / 8, secpar / 8, x); + uint64_export(state + 2 + 2 * secpar / 8, 8, epoch); + + gcry_mpi_release(n); + gcry_mpi_release(x); +} + +uint64_t FSPRG_GetEpoch(const void *state) { + uint16_t secpar; + secpar = read_secpar(state + 0); + return uint64_import(state + 2 + 2 * secpar / 8, 8); +} + +void FSPRG_Seek(void *state, uint64_t epoch, const void *msk, const void *seed, size_t seedlen) { + gcry_mpi_t p, q, n, x, xp, xq, kp, kq, xm; + uint16_t secpar; + + initialize_libgcrypt(false); + + secpar = read_secpar(msk + 0); + p = mpi_import(msk + 2 + 0 * (secpar / 2) / 8, (secpar / 2) / 8); + q = mpi_import(msk + 2 + 1 * (secpar / 2) / 8, (secpar / 2) / 8); + + n = gcry_mpi_new(0); + gcry_mpi_mul(n, p, q); + + x = gensquare(n, seed, seedlen, RND_GEN_X, secpar); + CRT_decompose(&xp, &xq, x, p, q); /* split (mod n) into (mod p) and (mod q) using CRT */ + + kp = twopowmodphi(epoch, p); /* compute 2^epoch (mod phi(p)) */ + kq = twopowmodphi(epoch, q); /* compute 2^epoch (mod phi(q)) */ + + gcry_mpi_powm(xp, xp, kp, p); /* compute x^(2^epoch) (mod p) */ + gcry_mpi_powm(xq, xq, kq, q); /* compute x^(2^epoch) (mod q) */ + + CRT_compose(&xm, xp, xq, p, q); /* combine (mod p) and (mod q) to (mod n) using CRT */ + + store_secpar(state + 0, secpar); + mpi_export(state + 2 + 0 * secpar / 8, secpar / 8, n); + mpi_export(state + 2 + 1 * secpar / 8, secpar / 8, xm); + uint64_export(state + 2 + 2 * secpar / 8, 8, epoch); + + gcry_mpi_release(p); + gcry_mpi_release(q); + gcry_mpi_release(n); + gcry_mpi_release(x); + gcry_mpi_release(xp); + gcry_mpi_release(xq); + gcry_mpi_release(kp); + gcry_mpi_release(kq); + gcry_mpi_release(xm); +} + +void FSPRG_GetKey(const void *state, void *key, size_t keylen, uint32_t idx) { + uint16_t secpar; + + initialize_libgcrypt(false); + + secpar = read_secpar(state + 0); + det_randomize(key, keylen, state + 2, 2 * secpar / 8 + 8, idx); +} diff --git a/src/libsystemd/sd-journal/fsprg.h b/src/libsystemd/sd-journal/fsprg.h new file mode 100644 index 0000000..d3d88aa --- /dev/null +++ b/src/libsystemd/sd-journal/fsprg.h @@ -0,0 +1,61 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +/* + * fsprg v0.1 - (seekable) forward-secure pseudorandom generator + * Copyright © 2012 B. Poettering + * Contact: fsprg@point-at-infinity.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301 USA + */ + +#include <inttypes.h> +#include <sys/types.h> + +#include "macro.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define FSPRG_RECOMMENDED_SECPAR 1536 +#define FSPRG_RECOMMENDED_SEEDLEN (96/8) + +size_t FSPRG_mskinbytes(unsigned secpar) _const_; +size_t FSPRG_mpkinbytes(unsigned secpar) _const_; +size_t FSPRG_stateinbytes(unsigned secpar) _const_; + +/* Setup msk and mpk. Providing seed != NULL makes this algorithm deterministic. */ +void FSPRG_GenMK(void *msk, void *mpk, const void *seed, size_t seedlen, unsigned secpar); + +/* Initialize state deterministically in dependence on seed. */ +/* Note: in case one wants to run only one GenState0 per GenMK it is safe to use + the same seed for both GenMK and GenState0. +*/ +void FSPRG_GenState0(void *state, const void *mpk, const void *seed, size_t seedlen); + +void FSPRG_Evolve(void *state); + +uint64_t FSPRG_GetEpoch(const void *state) _pure_; + +/* Seek to any arbitrary state (by providing msk together with seed from GenState0). */ +void FSPRG_Seek(void *state, uint64_t epoch, const void *msk, const void *seed, size_t seedlen); + +void FSPRG_GetKey(const void *state, void *key, size_t keylen, uint32_t idx); + +#ifdef __cplusplus +} +#endif diff --git a/src/libsystemd/sd-journal/generate-audit_type-list.sh b/src/libsystemd/sd-journal/generate-audit_type-list.sh new file mode 100755 index 0000000..3851ea1 --- /dev/null +++ b/src/libsystemd/sd-journal/generate-audit_type-list.sh @@ -0,0 +1,17 @@ +#!/usr/bin/env bash +# SPDX-License-Identifier: LGPL-2.1-or-later +set -eu +set -o pipefail + +cpp="${1:?}" +shift + +includes=() +for i in "$@"; do + includes+=(-include "$i") +done + +$cpp -dM "${includes[@]}" - </dev/null | \ + grep -vE 'AUDIT_.*(FIRST|LAST)_' | \ + sed -r -n 's/^#define\s+AUDIT_(\w+)\s+([0-9]{4})\s*$$/\1\t\2/p' | \ + sort -k2 diff --git a/src/libsystemd/sd-journal/journal-authenticate.c b/src/libsystemd/sd-journal/journal-authenticate.c new file mode 100644 index 0000000..8e7533e --- /dev/null +++ b/src/libsystemd/sd-journal/journal-authenticate.c @@ -0,0 +1,525 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <fcntl.h> +#include <sys/mman.h> + +#include "fd-util.h" +#include "fsprg.h" +#include "gcrypt-util.h" +#include "hexdecoct.h" +#include "journal-authenticate.h" +#include "journal-def.h" +#include "journal-file.h" +#include "memory-util.h" +#include "time-util.h" + +static void* fssheader_free(FSSHeader *p) { + /* mmap() returns MAP_FAILED on error and sets the errno */ + if (!p || p == MAP_FAILED) + return NULL; + + assert_se(munmap(p, PAGE_ALIGN(sizeof(FSSHeader))) >= 0); + return NULL; +} + +DEFINE_TRIVIAL_CLEANUP_FUNC(FSSHeader*, fssheader_free); + +static uint64_t journal_file_tag_seqnum(JournalFile *f) { + uint64_t r; + + assert(f); + + r = le64toh(f->header->n_tags) + 1; + f->header->n_tags = htole64(r); + + return r; +} + +int journal_file_append_tag(JournalFile *f) { + Object *o; + uint64_t p; + int r; + + assert(f); + + if (!JOURNAL_HEADER_SEALED(f->header)) + return 0; + + if (!f->hmac_running) { + r = journal_file_hmac_start(f); + if (r < 0) + return r; + } + + assert(f->hmac); + + r = journal_file_append_object(f, OBJECT_TAG, sizeof(struct TagObject), &o, &p); + if (r < 0) + return r; + + o->tag.seqnum = htole64(journal_file_tag_seqnum(f)); + o->tag.epoch = htole64(FSPRG_GetEpoch(f->fsprg_state)); + + log_debug("Writing tag %"PRIu64" for epoch %"PRIu64"", + le64toh(o->tag.seqnum), + FSPRG_GetEpoch(f->fsprg_state)); + + /* Add the tag object itself, so that we can protect its + * header. This will exclude the actual hash value in it */ + r = journal_file_hmac_put_object(f, OBJECT_TAG, o, p); + if (r < 0) + return r; + + /* Get the HMAC tag and store it in the object */ + memcpy(o->tag.tag, gcry_md_read(f->hmac, 0), TAG_LENGTH); + f->hmac_running = false; + + return 0; +} + +int journal_file_hmac_start(JournalFile *f) { + uint8_t key[256 / 8]; /* Let's pass 256 bit from FSPRG to HMAC */ + gcry_error_t err; + + assert(f); + + if (!JOURNAL_HEADER_SEALED(f->header)) + return 0; + + if (f->hmac_running) + return 0; + + /* Prepare HMAC for next cycle */ + gcry_md_reset(f->hmac); + FSPRG_GetKey(f->fsprg_state, key, sizeof(key), 0); + err = gcry_md_setkey(f->hmac, key, sizeof(key)); + if (gcry_err_code(err) != GPG_ERR_NO_ERROR) + return log_debug_errno(SYNTHETIC_ERRNO(EIO), + "gcry_md_setkey() failed with error code: %s", + gcry_strerror(err)); + + f->hmac_running = true; + + return 0; +} + +static int journal_file_get_epoch(JournalFile *f, uint64_t realtime, uint64_t *epoch) { + uint64_t t; + + assert(f); + assert(epoch); + assert(JOURNAL_HEADER_SEALED(f->header)); + + if (f->fss_start_usec == 0 || f->fss_interval_usec == 0) + return -EOPNOTSUPP; + + if (realtime < f->fss_start_usec) + return -ESTALE; + + t = realtime - f->fss_start_usec; + t = t / f->fss_interval_usec; + + *epoch = t; + + return 0; +} + +static int journal_file_fsprg_need_evolve(JournalFile *f, uint64_t realtime) { + uint64_t goal, epoch; + int r; + + assert(f); + + if (!JOURNAL_HEADER_SEALED(f->header)) + return 0; + + r = journal_file_get_epoch(f, realtime, &goal); + if (r < 0) + return r; + + epoch = FSPRG_GetEpoch(f->fsprg_state); + if (epoch > goal) + return -ESTALE; + + return epoch != goal; +} + +int journal_file_fsprg_evolve(JournalFile *f, uint64_t realtime) { + uint64_t goal, epoch; + int r; + + assert(f); + + if (!JOURNAL_HEADER_SEALED(f->header)) + return 0; + + r = journal_file_get_epoch(f, realtime, &goal); + if (r < 0) + return r; + + epoch = FSPRG_GetEpoch(f->fsprg_state); + if (epoch < goal) + log_debug("Evolving FSPRG key from epoch %"PRIu64" to %"PRIu64".", epoch, goal); + + for (;;) { + if (epoch > goal) + return -ESTALE; + if (epoch == goal) + return 0; + + FSPRG_Evolve(f->fsprg_state); + epoch = FSPRG_GetEpoch(f->fsprg_state); + if (epoch < goal) { + r = journal_file_append_tag(f); + if (r < 0) + return r; + } + } +} + +int journal_file_fsprg_seek(JournalFile *f, uint64_t goal) { + void *msk; + uint64_t epoch; + + assert(f); + + if (!JOURNAL_HEADER_SEALED(f->header)) + return 0; + + assert(f->fsprg_seed); + + if (f->fsprg_state) { + /* Cheaper... */ + + epoch = FSPRG_GetEpoch(f->fsprg_state); + if (goal == epoch) + return 0; + + if (goal == epoch + 1) { + FSPRG_Evolve(f->fsprg_state); + return 0; + } + } else { + f->fsprg_state_size = FSPRG_stateinbytes(FSPRG_RECOMMENDED_SECPAR); + f->fsprg_state = malloc(f->fsprg_state_size); + if (!f->fsprg_state) + return -ENOMEM; + } + + log_debug("Seeking FSPRG key to %"PRIu64".", goal); + + msk = alloca_safe(FSPRG_mskinbytes(FSPRG_RECOMMENDED_SECPAR)); + FSPRG_GenMK(msk, NULL, f->fsprg_seed, f->fsprg_seed_size, FSPRG_RECOMMENDED_SECPAR); + FSPRG_Seek(f->fsprg_state, goal, msk, f->fsprg_seed, f->fsprg_seed_size); + + return 0; +} + +int journal_file_maybe_append_tag(JournalFile *f, uint64_t realtime) { + int r; + + assert(f); + + if (!JOURNAL_HEADER_SEALED(f->header)) + return 0; + + if (realtime <= 0) + realtime = now(CLOCK_REALTIME); + + r = journal_file_fsprg_need_evolve(f, realtime); + if (r <= 0) + return 0; + + r = journal_file_append_tag(f); + if (r < 0) + return r; + + r = journal_file_fsprg_evolve(f, realtime); + if (r < 0) + return r; + + return 0; +} + +int journal_file_hmac_put_object(JournalFile *f, ObjectType type, Object *o, uint64_t p) { + int r; + + assert(f); + + if (!JOURNAL_HEADER_SEALED(f->header)) + return 0; + + r = journal_file_hmac_start(f); + if (r < 0) + return r; + + if (!o) { + r = journal_file_move_to_object(f, type, p, &o); + if (r < 0) + return r; + } else if (type > OBJECT_UNUSED && o->object.type != type) + return -EBADMSG; + + gcry_md_write(f->hmac, o, offsetof(ObjectHeader, payload)); + + switch (o->object.type) { + + case OBJECT_DATA: + /* All but hash and payload are mutable */ + gcry_md_write(f->hmac, &o->data.hash, sizeof(o->data.hash)); + gcry_md_write(f->hmac, journal_file_data_payload_field(f, o), le64toh(o->object.size) - journal_file_data_payload_offset(f)); + break; + + case OBJECT_FIELD: + /* Same here */ + gcry_md_write(f->hmac, &o->field.hash, sizeof(o->field.hash)); + gcry_md_write(f->hmac, o->field.payload, le64toh(o->object.size) - offsetof(Object, field.payload)); + break; + + case OBJECT_ENTRY: + /* All */ + gcry_md_write(f->hmac, &o->entry.seqnum, le64toh(o->object.size) - offsetof(Object, entry.seqnum)); + break; + + case OBJECT_FIELD_HASH_TABLE: + case OBJECT_DATA_HASH_TABLE: + case OBJECT_ENTRY_ARRAY: + /* Nothing: everything is mutable */ + break; + + case OBJECT_TAG: + /* All but the tag itself */ + gcry_md_write(f->hmac, &o->tag.seqnum, sizeof(o->tag.seqnum)); + gcry_md_write(f->hmac, &o->tag.epoch, sizeof(o->tag.epoch)); + break; + default: + return -EINVAL; + } + + return 0; +} + +int journal_file_hmac_put_header(JournalFile *f) { + int r; + + assert(f); + + if (!JOURNAL_HEADER_SEALED(f->header)) + return 0; + + r = journal_file_hmac_start(f); + if (r < 0) + return r; + + /* All but state+reserved, boot_id, arena_size, + * tail_object_offset, n_objects, n_entries, + * tail_entry_seqnum, head_entry_seqnum, entry_array_offset, + * head_entry_realtime, tail_entry_realtime, + * tail_entry_monotonic, n_data, n_fields, n_tags, + * n_entry_arrays. */ + + gcry_md_write(f->hmac, f->header->signature, offsetof(Header, state) - offsetof(Header, signature)); + gcry_md_write(f->hmac, &f->header->file_id, offsetof(Header, tail_entry_boot_id) - offsetof(Header, file_id)); + gcry_md_write(f->hmac, &f->header->seqnum_id, offsetof(Header, arena_size) - offsetof(Header, seqnum_id)); + gcry_md_write(f->hmac, &f->header->data_hash_table_offset, offsetof(Header, tail_object_offset) - offsetof(Header, data_hash_table_offset)); + + return 0; +} + +int journal_file_fss_load(JournalFile *f) { + _cleanup_close_ int fd = -EBADF; + _cleanup_free_ char *path = NULL; + _cleanup_(fssheader_freep) FSSHeader *header = NULL; + struct stat st; + sd_id128_t machine; + int r; + + assert(f); + + /* This function is used to determine whether sealing should be enabled in the journal header so we + * can't check the header to check if sealing is enabled here. */ + + r = sd_id128_get_machine(&machine); + if (r < 0) + return r; + + if (asprintf(&path, "/var/log/journal/" SD_ID128_FORMAT_STR "/fss", + SD_ID128_FORMAT_VAL(machine)) < 0) + return -ENOMEM; + + fd = open(path, O_RDWR|O_CLOEXEC|O_NOCTTY, 0600); + if (fd < 0) { + if (errno != ENOENT) + log_error_errno(errno, "Failed to open %s: %m", path); + + return -errno; + } + + if (fstat(fd, &st) < 0) + return -errno; + + if (st.st_size < (off_t) sizeof(FSSHeader)) + return -ENODATA; + + header = mmap(NULL, PAGE_ALIGN(sizeof(FSSHeader)), PROT_READ, MAP_SHARED, fd, 0); + if (header == MAP_FAILED) + return -errno; + + if (memcmp(header->signature, FSS_HEADER_SIGNATURE, 8) != 0) + return -EBADMSG; + + if (header->incompatible_flags != 0) + return -EPROTONOSUPPORT; + + if (le64toh(header->header_size) < sizeof(FSSHeader)) + return -EBADMSG; + + if (le64toh(header->fsprg_state_size) != FSPRG_stateinbytes(le16toh(header->fsprg_secpar))) + return -EBADMSG; + + f->fss_file_size = le64toh(header->header_size) + le64toh(header->fsprg_state_size); + if ((uint64_t) st.st_size < f->fss_file_size) + return -ENODATA; + + if (!sd_id128_equal(machine, header->machine_id)) + return -EHOSTDOWN; + + if (le64toh(header->start_usec) <= 0 || le64toh(header->interval_usec) <= 0) + return -EBADMSG; + + size_t sz = PAGE_ALIGN(f->fss_file_size); + assert(sz < SIZE_MAX); + f->fss_file = mmap(NULL, sz, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); + if (f->fss_file == MAP_FAILED) { + f->fss_file = NULL; + return -errno; + } + + f->fss_start_usec = le64toh(f->fss_file->start_usec); + f->fss_interval_usec = le64toh(f->fss_file->interval_usec); + + f->fsprg_state = (uint8_t*) f->fss_file + le64toh(f->fss_file->header_size); + f->fsprg_state_size = le64toh(f->fss_file->fsprg_state_size); + + return 0; +} + +int journal_file_hmac_setup(JournalFile *f) { + gcry_error_t e; + + if (!JOURNAL_HEADER_SEALED(f->header)) + return 0; + + initialize_libgcrypt(true); + + e = gcry_md_open(&f->hmac, GCRY_MD_SHA256, GCRY_MD_FLAG_HMAC); + if (e != 0) + return -EOPNOTSUPP; + + return 0; +} + +int journal_file_append_first_tag(JournalFile *f) { + uint64_t p; + int r; + + if (!JOURNAL_HEADER_SEALED(f->header)) + return 0; + + log_debug("Calculating first tag..."); + + r = journal_file_hmac_put_header(f); + if (r < 0) + return r; + + p = le64toh(f->header->field_hash_table_offset); + if (p < offsetof(Object, hash_table.items)) + return -EINVAL; + p -= offsetof(Object, hash_table.items); + + r = journal_file_hmac_put_object(f, OBJECT_FIELD_HASH_TABLE, NULL, p); + if (r < 0) + return r; + + p = le64toh(f->header->data_hash_table_offset); + if (p < offsetof(Object, hash_table.items)) + return -EINVAL; + p -= offsetof(Object, hash_table.items); + + r = journal_file_hmac_put_object(f, OBJECT_DATA_HASH_TABLE, NULL, p); + if (r < 0) + return r; + + r = journal_file_append_tag(f); + if (r < 0) + return r; + + return 0; +} + +int journal_file_parse_verification_key(JournalFile *f, const char *key) { + _cleanup_free_ uint8_t *seed = NULL; + size_t seed_size; + const char *k; + unsigned long long start, interval; + int r; + + assert(f); + assert(key); + + seed_size = FSPRG_RECOMMENDED_SEEDLEN; + seed = malloc(seed_size); + if (!seed) + return -ENOMEM; + + k = key; + for (size_t c = 0; c < seed_size; c++) { + int x, y; + + k = skip_leading_chars(k, "-"); + + x = unhexchar(*k); + if (x < 0) + return -EINVAL; + k++; + + y = unhexchar(*k); + if (y < 0) + return -EINVAL; + k++; + + seed[c] = (uint8_t) (x * 16 + y); + } + + if (*k != '/') + return -EINVAL; + k++; + + r = sscanf(k, "%llx-%llx", &start, &interval); + if (r != 2) + return -EINVAL; + + f->fsprg_seed = TAKE_PTR(seed); + f->fsprg_seed_size = seed_size; + + f->fss_start_usec = start * interval; + f->fss_interval_usec = interval; + + return 0; +} + +bool journal_file_next_evolve_usec(JournalFile *f, usec_t *u) { + uint64_t epoch; + + assert(f); + assert(u); + + if (!JOURNAL_HEADER_SEALED(f->header)) + return false; + + epoch = FSPRG_GetEpoch(f->fsprg_state); + + *u = (usec_t) (f->fss_start_usec + f->fss_interval_usec * epoch + f->fss_interval_usec); + + return true; +} diff --git a/src/libsystemd/sd-journal/journal-authenticate.h b/src/libsystemd/sd-journal/journal-authenticate.h new file mode 100644 index 0000000..e895722 --- /dev/null +++ b/src/libsystemd/sd-journal/journal-authenticate.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include <stdbool.h> + +#include "journal-file.h" + +int journal_file_append_tag(JournalFile *f); +int journal_file_maybe_append_tag(JournalFile *f, uint64_t realtime); +int journal_file_append_first_tag(JournalFile *f); + +int journal_file_hmac_setup(JournalFile *f); +int journal_file_hmac_start(JournalFile *f); +int journal_file_hmac_put_header(JournalFile *f); +int journal_file_hmac_put_object(JournalFile *f, ObjectType type, Object *o, uint64_t p); + +int journal_file_fss_load(JournalFile *f); +int journal_file_parse_verification_key(JournalFile *f, const char *key); + +int journal_file_fsprg_evolve(JournalFile *f, uint64_t realtime); +int journal_file_fsprg_seek(JournalFile *f, uint64_t epoch); + +bool journal_file_next_evolve_usec(JournalFile *f, usec_t *u); diff --git a/src/libsystemd/sd-journal/journal-def.h b/src/libsystemd/sd-journal/journal-def.h new file mode 100644 index 0000000..1b10f24 --- /dev/null +++ b/src/libsystemd/sd-journal/journal-def.h @@ -0,0 +1,269 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include "sd-id128.h" + +#include "macro.h" +#include "sparse-endian.h" + +/* + * If you change this file you probably should also change its documentation: + * + * https://systemd.io/JOURNAL_FILE_FORMAT + */ + +typedef struct Header Header; + +typedef struct ObjectHeader ObjectHeader; +typedef union Object Object; + +typedef struct DataObject DataObject; +typedef struct FieldObject FieldObject; +typedef struct EntryObject EntryObject; +typedef struct HashTableObject HashTableObject; +typedef struct EntryArrayObject EntryArrayObject; +typedef struct TagObject TagObject; + +typedef struct HashItem HashItem; + +typedef struct FSSHeader FSSHeader; + +/* Object types */ +typedef enum ObjectType { + OBJECT_UNUSED, /* also serves as "any type" or "additional category" */ + OBJECT_DATA, + OBJECT_FIELD, + OBJECT_ENTRY, + OBJECT_DATA_HASH_TABLE, + OBJECT_FIELD_HASH_TABLE, + OBJECT_ENTRY_ARRAY, + OBJECT_TAG, + _OBJECT_TYPE_MAX, + _OBJECT_TYPE_INVALID = -EINVAL, +} ObjectType; + +/* Object flags (note that src/basic/compress.h uses the same values for the compression types) */ +enum { + OBJECT_COMPRESSED_XZ = 1 << 0, + OBJECT_COMPRESSED_LZ4 = 1 << 1, + OBJECT_COMPRESSED_ZSTD = 1 << 2, + _OBJECT_COMPRESSED_MASK = OBJECT_COMPRESSED_XZ | OBJECT_COMPRESSED_LZ4 | OBJECT_COMPRESSED_ZSTD, +}; + +struct ObjectHeader { + uint8_t type; + uint8_t flags; + uint8_t reserved[6]; + le64_t size; + uint8_t payload[]; +} _packed_; + +#define DataObject__contents { \ + ObjectHeader object; \ + le64_t hash; \ + le64_t next_hash_offset; \ + le64_t next_field_offset; \ + le64_t entry_offset; /* the first array entry we store inline */ \ + le64_t entry_array_offset; \ + le64_t n_entries; \ + union { \ + struct { \ + uint8_t payload[0]; \ + } regular; \ + struct { \ + le32_t tail_entry_array_offset; \ + le32_t tail_entry_array_n_entries; \ + uint8_t payload[0]; \ + } compact; \ + }; \ +} + +struct DataObject DataObject__contents; +struct DataObject__packed DataObject__contents _packed_; +assert_cc(sizeof(struct DataObject) == sizeof(struct DataObject__packed)); + +#define FieldObject__contents { \ + ObjectHeader object; \ + le64_t hash; \ + le64_t next_hash_offset; \ + le64_t head_data_offset; \ + uint8_t payload[]; \ +} + +struct FieldObject FieldObject__contents; +struct FieldObject__packed FieldObject__contents _packed_; +assert_cc(sizeof(struct FieldObject) == sizeof(struct FieldObject__packed)); + +#define EntryObject__contents { \ + ObjectHeader object; \ + le64_t seqnum; \ + le64_t realtime; \ + le64_t monotonic; \ + sd_id128_t boot_id; \ + le64_t xor_hash; \ + union { \ + struct { \ + dummy_t __empty__regular; \ + struct { \ + le64_t object_offset; \ + le64_t hash; \ + } regular[]; \ + }; \ + struct { \ + dummy_t __empty_compact; \ + struct { \ + le32_t object_offset; \ + } compact[]; \ + }; \ + } items; \ +} + +struct EntryObject EntryObject__contents; +struct EntryObject__packed EntryObject__contents _packed_; +assert_cc(sizeof(struct EntryObject) == sizeof(struct EntryObject__packed)); + +struct HashItem { + le64_t head_hash_offset; + le64_t tail_hash_offset; +} _packed_; + +struct HashTableObject { + ObjectHeader object; + HashItem items[]; +} _packed_; + +struct EntryArrayObject { + ObjectHeader object; + le64_t next_entry_array_offset; + union { + DECLARE_FLEX_ARRAY(le64_t, regular); + DECLARE_FLEX_ARRAY(le32_t, compact); + } items; +} _packed_; + +#define TAG_LENGTH (256/8) + +struct TagObject { + ObjectHeader object; + le64_t seqnum; + le64_t epoch; + uint8_t tag[TAG_LENGTH]; /* SHA-256 HMAC */ +} _packed_; + +union Object { + ObjectHeader object; + DataObject data; + FieldObject field; + EntryObject entry; + HashTableObject hash_table; + EntryArrayObject entry_array; + TagObject tag; +}; + +enum { + STATE_OFFLINE = 0, + STATE_ONLINE = 1, + STATE_ARCHIVED = 2, + _STATE_MAX +}; + +/* Header flags */ +enum { + HEADER_INCOMPATIBLE_COMPRESSED_XZ = 1 << 0, + HEADER_INCOMPATIBLE_COMPRESSED_LZ4 = 1 << 1, + HEADER_INCOMPATIBLE_KEYED_HASH = 1 << 2, + HEADER_INCOMPATIBLE_COMPRESSED_ZSTD = 1 << 3, + HEADER_INCOMPATIBLE_COMPACT = 1 << 4, + + HEADER_INCOMPATIBLE_ANY = HEADER_INCOMPATIBLE_COMPRESSED_XZ | + HEADER_INCOMPATIBLE_COMPRESSED_LZ4 | + HEADER_INCOMPATIBLE_KEYED_HASH | + HEADER_INCOMPATIBLE_COMPRESSED_ZSTD | + HEADER_INCOMPATIBLE_COMPACT, + + HEADER_INCOMPATIBLE_SUPPORTED = (HAVE_XZ ? HEADER_INCOMPATIBLE_COMPRESSED_XZ : 0) | + (HAVE_LZ4 ? HEADER_INCOMPATIBLE_COMPRESSED_LZ4 : 0) | + (HAVE_ZSTD ? HEADER_INCOMPATIBLE_COMPRESSED_ZSTD : 0) | + HEADER_INCOMPATIBLE_KEYED_HASH | + HEADER_INCOMPATIBLE_COMPACT, +}; + + +enum { + HEADER_COMPATIBLE_SEALED = 1 << 0, + HEADER_COMPATIBLE_TAIL_ENTRY_BOOT_ID = 1 << 1, /* if set, the last_entry_boot_id field in the header is exclusively refreshed when an entry is appended */ + HEADER_COMPATIBLE_SEALED_CONTINUOUS = 1 << 2, + HEADER_COMPATIBLE_ANY = HEADER_COMPATIBLE_SEALED | + HEADER_COMPATIBLE_TAIL_ENTRY_BOOT_ID | + HEADER_COMPATIBLE_SEALED_CONTINUOUS, + + HEADER_COMPATIBLE_SUPPORTED = (HAVE_GCRYPT ? HEADER_COMPATIBLE_SEALED | HEADER_COMPATIBLE_SEALED_CONTINUOUS : 0) | + HEADER_COMPATIBLE_TAIL_ENTRY_BOOT_ID, +}; + + +#define HEADER_SIGNATURE \ + ((const uint8_t[]) { 'L', 'P', 'K', 'S', 'H', 'H', 'R', 'H' }) + +#define struct_Header__contents { \ + uint8_t signature[8]; /* "LPKSHHRH" */ \ + le32_t compatible_flags; \ + le32_t incompatible_flags; \ + uint8_t state; \ + uint8_t reserved[7]; \ + sd_id128_t file_id; \ + sd_id128_t machine_id; \ + sd_id128_t tail_entry_boot_id; \ + sd_id128_t seqnum_id; \ + le64_t header_size; \ + le64_t arena_size; \ + le64_t data_hash_table_offset; \ + le64_t data_hash_table_size; \ + le64_t field_hash_table_offset; \ + le64_t field_hash_table_size; \ + le64_t tail_object_offset; \ + le64_t n_objects; \ + le64_t n_entries; \ + le64_t tail_entry_seqnum; \ + le64_t head_entry_seqnum; \ + le64_t entry_array_offset; \ + le64_t head_entry_realtime; \ + le64_t tail_entry_realtime; \ + le64_t tail_entry_monotonic; \ + /* Added in 187 */ \ + le64_t n_data; \ + le64_t n_fields; \ + /* Added in 189 */ \ + le64_t n_tags; \ + le64_t n_entry_arrays; \ + /* Added in 246 */ \ + le64_t data_hash_chain_depth; \ + le64_t field_hash_chain_depth; \ + /* Added in 252 */ \ + le32_t tail_entry_array_offset; \ + le32_t tail_entry_array_n_entries; \ + /* Added in 254 */ \ + le64_t tail_entry_offset; \ + } + +struct Header struct_Header__contents; +struct Header__packed struct_Header__contents _packed_; +assert_cc(sizeof(struct Header) == sizeof(struct Header__packed)); +assert_cc(sizeof(struct Header) == 272); + +#define FSS_HEADER_SIGNATURE \ + ((const char[]) { 'K', 'S', 'H', 'H', 'R', 'H', 'L', 'P' }) + +struct FSSHeader { + uint8_t signature[8]; /* "KSHHRHLP" */ + le32_t compatible_flags; + le32_t incompatible_flags; + sd_id128_t machine_id; + sd_id128_t boot_id; /* last writer */ + le64_t header_size; + le64_t start_usec; + le64_t interval_usec; + le16_t fsprg_secpar; + le16_t reserved[3]; + le64_t fsprg_state_size; +} _packed_; diff --git a/src/libsystemd/sd-journal/journal-file.c b/src/libsystemd/sd-journal/journal-file.c new file mode 100644 index 0000000..d2493a0 --- /dev/null +++ b/src/libsystemd/sd-journal/journal-file.c @@ -0,0 +1,4696 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <errno.h> +#include <fcntl.h> +#include <linux/fs.h> +#include <linux/magic.h> +#include <pthread.h> +#include <stddef.h> +#include <sys/mman.h> +#include <sys/statvfs.h> +#include <sys/uio.h> +#include <unistd.h> + +#include "sd-event.h" + +#include "alloc-util.h" +#include "chattr-util.h" +#include "compress.h" +#include "env-util.h" +#include "fd-util.h" +#include "format-util.h" +#include "fs-util.h" +#include "id128-util.h" +#include "journal-authenticate.h" +#include "journal-def.h" +#include "journal-file.h" +#include "journal-internal.h" +#include "lookup3.h" +#include "memory-util.h" +#include "missing_threads.h" +#include "path-util.h" +#include "prioq.h" +#include "random-util.h" +#include "set.h" +#include "sort-util.h" +#include "stat-util.h" +#include "string-table.h" +#include "string-util.h" +#include "strv.h" +#include "sync-util.h" +#include "user-util.h" +#include "xattr-util.h" + +#define DEFAULT_DATA_HASH_TABLE_SIZE (2047ULL*sizeof(HashItem)) +#define DEFAULT_FIELD_HASH_TABLE_SIZE (333ULL*sizeof(HashItem)) + +#define DEFAULT_COMPRESS_THRESHOLD (512ULL) +#define MIN_COMPRESS_THRESHOLD (8ULL) + +#define U64_KB UINT64_C(1024) +#define U64_MB (UINT64_C(1024) * U64_KB) +#define U64_GB (UINT64_C(1024) * U64_MB) + +/* This is the minimum journal file size */ +#define JOURNAL_FILE_SIZE_MIN (512 * U64_KB) /* 512 KiB */ +#define JOURNAL_COMPACT_SIZE_MAX ((uint64_t) UINT32_MAX) /* 4 GiB */ + +/* These are the lower and upper bounds if we deduce the max_use value from the file system size */ +#define MAX_USE_LOWER (1 * U64_MB) /* 1 MiB */ +#define MAX_USE_UPPER (4 * U64_GB) /* 4 GiB */ + +/* Those are the lower and upper bounds for the minimal use limit, + * i.e. how much we'll use even if keep_free suggests otherwise. */ +#define MIN_USE_LOW (1 * U64_MB) /* 1 MiB */ +#define MIN_USE_HIGH (16 * U64_MB) /* 16 MiB */ + +/* This is the upper bound if we deduce max_size from max_use */ +#define MAX_SIZE_UPPER (128 * U64_MB) /* 128 MiB */ + +/* This is the upper bound if we deduce the keep_free value from the file system size */ +#define KEEP_FREE_UPPER (4 * U64_GB) /* 4 GiB */ + +/* This is the keep_free value when we can't determine the system size */ +#define DEFAULT_KEEP_FREE (1 * U64_MB) /* 1 MB */ + +/* This is the default maximum number of journal files to keep around. */ +#define DEFAULT_N_MAX_FILES 100 + +/* n_data was the first entry we added after the initial file format design */ +#define HEADER_SIZE_MIN ALIGN64(offsetof(Header, n_data)) + +/* How many entries to keep in the entry array chain cache at max */ +#define CHAIN_CACHE_MAX 20 + +/* How much to increase the journal file size at once each time we allocate something new. */ +#define FILE_SIZE_INCREASE (8 * U64_MB) /* 8MB */ + +/* Reread fstat() of the file for detecting deletions at least this often */ +#define LAST_STAT_REFRESH_USEC (5*USEC_PER_SEC) + +/* Longest hash chain to rotate after */ +#define HASH_CHAIN_DEPTH_MAX 100 + +#ifdef __clang__ +# pragma GCC diagnostic ignored "-Waddress-of-packed-member" +#endif + +static int mmap_prot_from_open_flags(int flags) { + switch (flags & O_ACCMODE) { + case O_RDONLY: + return PROT_READ; + case O_WRONLY: + return PROT_WRITE; + case O_RDWR: + return PROT_READ|PROT_WRITE; + default: + assert_not_reached(); + } +} + +int journal_file_tail_end_by_pread(JournalFile *f, uint64_t *ret_offset) { + uint64_t p; + int r; + + assert(f); + assert(f->header); + assert(ret_offset); + + /* Same as journal_file_tail_end_by_mmap() below, but operates with pread() to avoid the mmap cache + * (and thus is thread safe) */ + + p = le64toh(f->header->tail_object_offset); + if (p == 0) + p = le64toh(f->header->header_size); + else { + Object tail; + uint64_t sz; + + r = journal_file_read_object_header(f, OBJECT_UNUSED, p, &tail); + if (r < 0) + return r; + + sz = le64toh(tail.object.size); + if (sz > UINT64_MAX - sizeof(uint64_t) + 1) + return -EBADMSG; + + sz = ALIGN64(sz); + if (p > UINT64_MAX - sz) + return -EBADMSG; + + p += sz; + } + + *ret_offset = p; + + return 0; +} + +int journal_file_tail_end_by_mmap(JournalFile *f, uint64_t *ret_offset) { + uint64_t p; + int r; + + assert(f); + assert(f->header); + assert(ret_offset); + + /* Same as journal_file_tail_end_by_pread() above, but operates with the usual mmap logic */ + + p = le64toh(f->header->tail_object_offset); + if (p == 0) + p = le64toh(f->header->header_size); + else { + Object *tail; + uint64_t sz; + + r = journal_file_move_to_object(f, OBJECT_UNUSED, p, &tail); + if (r < 0) + return r; + + sz = le64toh(READ_NOW(tail->object.size)); + if (sz > UINT64_MAX - sizeof(uint64_t) + 1) + return -EBADMSG; + + sz = ALIGN64(sz); + if (p > UINT64_MAX - sz) + return -EBADMSG; + + p += sz; + } + + *ret_offset = p; + + return 0; +} + +int journal_file_set_offline_thread_join(JournalFile *f) { + int r; + + assert(f); + + if (f->offline_state == OFFLINE_JOINED) + return 0; + + r = pthread_join(f->offline_thread, NULL); + if (r) + return -r; + + f->offline_state = OFFLINE_JOINED; + + if (mmap_cache_fd_got_sigbus(f->cache_fd)) + return -EIO; + + return 0; +} + +static int journal_file_set_online(JournalFile *f) { + bool wait = true; + + assert(f); + + if (!journal_file_writable(f)) + return -EPERM; + + if (f->fd < 0 || !f->header) + return -EINVAL; + + while (wait) { + switch (f->offline_state) { + case OFFLINE_JOINED: + /* No offline thread, no need to wait. */ + wait = false; + break; + + case OFFLINE_SYNCING: { + OfflineState tmp_state = OFFLINE_SYNCING; + if (!__atomic_compare_exchange_n(&f->offline_state, &tmp_state, OFFLINE_CANCEL, + false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) + continue; + } + /* Canceled syncing prior to offlining, no need to wait. */ + wait = false; + break; + + case OFFLINE_AGAIN_FROM_SYNCING: { + OfflineState tmp_state = OFFLINE_AGAIN_FROM_SYNCING; + if (!__atomic_compare_exchange_n(&f->offline_state, &tmp_state, OFFLINE_CANCEL, + false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) + continue; + } + /* Canceled restart from syncing, no need to wait. */ + wait = false; + break; + + case OFFLINE_AGAIN_FROM_OFFLINING: { + OfflineState tmp_state = OFFLINE_AGAIN_FROM_OFFLINING; + if (!__atomic_compare_exchange_n(&f->offline_state, &tmp_state, OFFLINE_CANCEL, + false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) + continue; + } + /* Canceled restart from offlining, must wait for offlining to complete however. */ + _fallthrough_; + default: { + int r; + + r = journal_file_set_offline_thread_join(f); + if (r < 0) + return r; + + wait = false; + break; + } + } + } + + if (mmap_cache_fd_got_sigbus(f->cache_fd)) + return -EIO; + + switch (f->header->state) { + case STATE_ONLINE: + return 0; + + case STATE_OFFLINE: + f->header->state = STATE_ONLINE; + (void) fsync(f->fd); + return 0; + + default: + return -EINVAL; + } +} + +JournalFile* journal_file_close(JournalFile *f) { + if (!f) + return NULL; + + assert(f->newest_boot_id_prioq_idx == PRIOQ_IDX_NULL); + + if (f->cache_fd) + mmap_cache_fd_free(f->cache_fd); + + if (f->close_fd) + safe_close(f->fd); + free(f->path); + + ordered_hashmap_free_free(f->chain_cache); + +#if HAVE_COMPRESSION + free(f->compress_buffer); +#endif + +#if HAVE_GCRYPT + if (f->fss_file) { + size_t sz = PAGE_ALIGN(f->fss_file_size); + assert(sz < SIZE_MAX); + munmap(f->fss_file, sz); + } else + free(f->fsprg_state); + + free(f->fsprg_seed); + + if (f->hmac) + gcry_md_close(f->hmac); +#endif + + return mfree(f); +} + +static bool keyed_hash_requested(void) { + static thread_local int cached = -1; + int r; + + if (cached < 0) { + r = getenv_bool("SYSTEMD_JOURNAL_KEYED_HASH"); + if (r < 0) { + if (r != -ENXIO) + log_debug_errno(r, "Failed to parse $SYSTEMD_JOURNAL_KEYED_HASH environment variable, ignoring: %m"); + cached = true; + } else + cached = r; + } + + return cached; +} + +static bool compact_mode_requested(void) { + static thread_local int cached = -1; + int r; + + if (cached < 0) { + r = getenv_bool("SYSTEMD_JOURNAL_COMPACT"); + if (r < 0) { + if (r != -ENXIO) + log_debug_errno(r, "Failed to parse $SYSTEMD_JOURNAL_COMPACT environment variable, ignoring: %m"); + cached = true; + } else + cached = r; + } + + return cached; +} + +#if HAVE_COMPRESSION +static Compression getenv_compression(void) { + Compression c; + const char *e; + int r; + + e = getenv("SYSTEMD_JOURNAL_COMPRESS"); + if (!e) + return DEFAULT_COMPRESSION; + + r = parse_boolean(e); + if (r >= 0) + return r ? DEFAULT_COMPRESSION : COMPRESSION_NONE; + + c = compression_from_string(e); + if (c < 0) { + log_debug_errno(c, "Failed to parse SYSTEMD_JOURNAL_COMPRESS value, ignoring: %s", e); + return DEFAULT_COMPRESSION; + } + + if (!compression_supported(c)) { + log_debug("Unsupported compression algorithm specified, ignoring: %s", e); + return DEFAULT_COMPRESSION; + } + + return c; +} +#endif + +static Compression compression_requested(void) { +#if HAVE_COMPRESSION + static thread_local Compression cached = _COMPRESSION_INVALID; + + if (cached < 0) + cached = getenv_compression(); + + return cached; +#else + return COMPRESSION_NONE; +#endif +} + +static int journal_file_init_header( + JournalFile *f, + JournalFileFlags file_flags, + JournalFile *template) { + + bool seal = false; + ssize_t k; + int r; + + assert(f); + +#if HAVE_GCRYPT + /* Try to load the FSPRG state, and if we can't, then just don't do sealing */ + seal = FLAGS_SET(file_flags, JOURNAL_SEAL) && journal_file_fss_load(f) >= 0; +#endif + + Header h = { + .header_size = htole64(ALIGN64(sizeof(h))), + .incompatible_flags = htole32( + FLAGS_SET(file_flags, JOURNAL_COMPRESS) * COMPRESSION_TO_HEADER_INCOMPATIBLE_FLAG(compression_requested()) | + keyed_hash_requested() * HEADER_INCOMPATIBLE_KEYED_HASH | + compact_mode_requested() * HEADER_INCOMPATIBLE_COMPACT), + .compatible_flags = htole32( + (seal * (HEADER_COMPATIBLE_SEALED | HEADER_COMPATIBLE_SEALED_CONTINUOUS) ) | + HEADER_COMPATIBLE_TAIL_ENTRY_BOOT_ID), + }; + + assert_cc(sizeof(h.signature) == sizeof(HEADER_SIGNATURE)); + memcpy(h.signature, HEADER_SIGNATURE, sizeof(HEADER_SIGNATURE)); + + r = sd_id128_randomize(&h.file_id); + if (r < 0) + return r; + + r = sd_id128_get_machine(&h.machine_id); + if (r < 0 && !ERRNO_IS_MACHINE_ID_UNSET(r)) + return r; /* If we have no valid machine ID (test environment?), let's simply leave the + * machine ID field all zeroes. */ + + if (template) { + h.seqnum_id = template->header->seqnum_id; + h.tail_entry_seqnum = template->header->tail_entry_seqnum; + } else + h.seqnum_id = h.file_id; + + k = pwrite(f->fd, &h, sizeof(h), 0); + if (k < 0) + return -errno; + if (k != sizeof(h)) + return -EIO; + + return 0; +} + +static int journal_file_refresh_header(JournalFile *f) { + int r; + + assert(f); + assert(f->header); + + /* We used to update the header's boot ID field here, but we don't do that anymore, as per + * HEADER_COMPATIBLE_TAIL_ENTRY_BOOT_ID */ + + r = journal_file_set_online(f); + + /* Sync the online state to disk; likely just created a new file, also sync the directory this file + * is located in. */ + (void) fsync_full(f->fd); + + return r; +} + +static bool warn_wrong_flags(const JournalFile *f, bool compatible) { + const uint32_t any = compatible ? HEADER_COMPATIBLE_ANY : HEADER_INCOMPATIBLE_ANY, + supported = compatible ? HEADER_COMPATIBLE_SUPPORTED : HEADER_INCOMPATIBLE_SUPPORTED; + const char *type = compatible ? "compatible" : "incompatible"; + uint32_t flags; + + assert(f); + assert(f->header); + + flags = le32toh(compatible ? f->header->compatible_flags : f->header->incompatible_flags); + + if (flags & ~supported) { + if (flags & ~any) + log_debug("Journal file %s has unknown %s flags 0x%"PRIx32, + f->path, type, flags & ~any); + flags = (flags & any) & ~supported; + if (flags) { + const char* strv[6]; + size_t n = 0; + _cleanup_free_ char *t = NULL; + + if (compatible) { + if (flags & HEADER_COMPATIBLE_SEALED) + strv[n++] = "sealed"; + if (flags & HEADER_COMPATIBLE_SEALED_CONTINUOUS) + strv[n++] = "sealed-continuous"; + } else { + if (flags & HEADER_INCOMPATIBLE_COMPRESSED_XZ) + strv[n++] = "xz-compressed"; + if (flags & HEADER_INCOMPATIBLE_COMPRESSED_LZ4) + strv[n++] = "lz4-compressed"; + if (flags & HEADER_INCOMPATIBLE_COMPRESSED_ZSTD) + strv[n++] = "zstd-compressed"; + if (flags & HEADER_INCOMPATIBLE_KEYED_HASH) + strv[n++] = "keyed-hash"; + if (flags & HEADER_INCOMPATIBLE_COMPACT) + strv[n++] = "compact"; + } + strv[n] = NULL; + assert(n < ELEMENTSOF(strv)); + + t = strv_join((char**) strv, ", "); + log_debug("Journal file %s uses %s %s %s disabled at compilation time.", + f->path, type, n > 1 ? "flags" : "flag", strnull(t)); + } + return true; + } + + return false; +} + +static bool offset_is_valid(uint64_t offset, uint64_t header_size, uint64_t tail_object_offset) { + if (offset == 0) + return true; + if (!VALID64(offset)) + return false; + if (offset < header_size) + return false; + if (offset > tail_object_offset) + return false; + return true; +} + +static bool hash_table_is_valid(uint64_t offset, uint64_t size, uint64_t header_size, uint64_t arena_size, uint64_t tail_object_offset) { + if ((offset == 0) != (size == 0)) + return false; + if (offset == 0) + return true; + if (offset <= offsetof(Object, hash_table.items)) + return false; + offset -= offsetof(Object, hash_table.items); + if (!offset_is_valid(offset, header_size, tail_object_offset)) + return false; + assert(offset <= header_size + arena_size); + if (size > header_size + arena_size - offset) + return false; + return true; +} + +static int journal_file_verify_header(JournalFile *f) { + uint64_t arena_size, header_size; + + assert(f); + assert(f->header); + + if (memcmp(f->header->signature, HEADER_SIGNATURE, 8)) + return -EBADMSG; + + /* In both read and write mode we refuse to open files with incompatible + * flags we don't know. */ + if (warn_wrong_flags(f, false)) + return -EPROTONOSUPPORT; + + /* When open for writing we refuse to open files with compatible flags, too. */ + if (journal_file_writable(f) && warn_wrong_flags(f, true)) + return -EPROTONOSUPPORT; + + if (f->header->state >= _STATE_MAX) + return -EBADMSG; + + header_size = le64toh(READ_NOW(f->header->header_size)); + + /* The first addition was n_data, so check that we are at least this large */ + if (header_size < HEADER_SIZE_MIN) + return -EBADMSG; + + /* When open for writing we refuse to open files with a mismatch of the header size, i.e. writing to + * files implementing older or new header structures. */ + if (journal_file_writable(f) && header_size != sizeof(Header)) + return -EPROTONOSUPPORT; + + /* Don't write to journal files without the new boot ID update behavior guarantee. */ + if (journal_file_writable(f) && !JOURNAL_HEADER_TAIL_ENTRY_BOOT_ID(f->header)) + return -EPROTONOSUPPORT; + + if (JOURNAL_HEADER_SEALED(f->header) && !JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays)) + return -EBADMSG; + + arena_size = le64toh(READ_NOW(f->header->arena_size)); + + if (UINT64_MAX - header_size < arena_size || header_size + arena_size > (uint64_t) f->last_stat.st_size) + return -ENODATA; + + uint64_t tail_object_offset = le64toh(f->header->tail_object_offset); + if (!offset_is_valid(tail_object_offset, header_size, UINT64_MAX)) + return -ENODATA; + if (header_size + arena_size < tail_object_offset) + return -ENODATA; + if (header_size + arena_size - tail_object_offset < sizeof(ObjectHeader)) + return -ENODATA; + + if (!hash_table_is_valid(le64toh(f->header->data_hash_table_offset), + le64toh(f->header->data_hash_table_size), + header_size, arena_size, tail_object_offset)) + return -ENODATA; + + if (!hash_table_is_valid(le64toh(f->header->field_hash_table_offset), + le64toh(f->header->field_hash_table_size), + header_size, arena_size, tail_object_offset)) + return -ENODATA; + + uint64_t entry_array_offset = le64toh(f->header->entry_array_offset); + if (!offset_is_valid(entry_array_offset, header_size, tail_object_offset)) + return -ENODATA; + + if (JOURNAL_HEADER_CONTAINS(f->header, tail_entry_array_offset)) { + uint32_t offset = le32toh(f->header->tail_entry_array_offset); + uint32_t n = le32toh(f->header->tail_entry_array_n_entries); + + if (!offset_is_valid(offset, header_size, tail_object_offset)) + return -ENODATA; + if (entry_array_offset > offset) + return -ENODATA; + if (entry_array_offset == 0 && offset != 0) + return -ENODATA; + if ((offset == 0) != (n == 0)) + return -ENODATA; + assert(offset <= header_size + arena_size); + if ((uint64_t) n * journal_file_entry_array_item_size(f) > header_size + arena_size - offset) + return -ENODATA; + } + + if (JOURNAL_HEADER_CONTAINS(f->header, tail_entry_offset)) { + uint64_t offset = le64toh(f->header->tail_entry_offset); + + if (!offset_is_valid(offset, header_size, tail_object_offset)) + return -ENODATA; + + if (offset > 0) { + /* When there is an entry object, then these fields must be filled. */ + if (sd_id128_is_null(f->header->tail_entry_boot_id)) + return -ENODATA; + if (!VALID_REALTIME(le64toh(f->header->head_entry_realtime))) + return -ENODATA; + if (!VALID_REALTIME(le64toh(f->header->tail_entry_realtime))) + return -ENODATA; + if (!VALID_MONOTONIC(le64toh(f->header->tail_entry_realtime))) + return -ENODATA; + } else { + /* Otherwise, the fields must be zero. */ + if (JOURNAL_HEADER_TAIL_ENTRY_BOOT_ID(f->header) && + !sd_id128_is_null(f->header->tail_entry_boot_id)) + return -ENODATA; + if (f->header->head_entry_realtime != 0) + return -ENODATA; + if (f->header->tail_entry_realtime != 0) + return -ENODATA; + if (f->header->tail_entry_realtime != 0) + return -ENODATA; + } + } + + /* Verify number of objects */ + uint64_t n_objects = le64toh(f->header->n_objects); + if (n_objects > arena_size / sizeof(ObjectHeader)) + return -ENODATA; + + uint64_t n_entries = le64toh(f->header->n_entries); + if (n_entries > n_objects) + return -ENODATA; + + if (JOURNAL_HEADER_CONTAINS(f->header, n_data) && + le64toh(f->header->n_data) > n_objects) + return -ENODATA; + + if (JOURNAL_HEADER_CONTAINS(f->header, n_fields) && + le64toh(f->header->n_fields) > n_objects) + return -ENODATA; + + if (JOURNAL_HEADER_CONTAINS(f->header, n_tags) && + le64toh(f->header->n_tags) > n_objects) + return -ENODATA; + + if (JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays) && + le64toh(f->header->n_entry_arrays) > n_objects) + return -ENODATA; + + if (JOURNAL_HEADER_CONTAINS(f->header, tail_entry_array_n_entries) && + le32toh(f->header->tail_entry_array_n_entries) > n_entries) + return -ENODATA; + + if (journal_file_writable(f)) { + sd_id128_t machine_id; + uint8_t state; + int r; + + r = sd_id128_get_machine(&machine_id); + if (ERRNO_IS_NEG_MACHINE_ID_UNSET(r)) /* Gracefully handle the machine ID not being initialized yet */ + machine_id = SD_ID128_NULL; + else if (r < 0) + return r; + + if (!sd_id128_equal(machine_id, f->header->machine_id)) + return log_debug_errno(SYNTHETIC_ERRNO(EHOSTDOWN), + "Trying to open journal file from different host for writing, refusing."); + + state = f->header->state; + + if (state == STATE_ARCHIVED) + return -ESHUTDOWN; /* Already archived */ + if (state == STATE_ONLINE) + return log_debug_errno(SYNTHETIC_ERRNO(EBUSY), + "Journal file %s is already online. Assuming unclean closing.", + f->path); + if (state != STATE_OFFLINE) + return log_debug_errno(SYNTHETIC_ERRNO(EBUSY), + "Journal file %s has unknown state %i.", + f->path, state); + + if (f->header->field_hash_table_size == 0 || f->header->data_hash_table_size == 0) + return -EBADMSG; + } + + return 0; +} + +int journal_file_fstat(JournalFile *f) { + int r; + + assert(f); + assert(f->fd >= 0); + + if (fstat(f->fd, &f->last_stat) < 0) + return -errno; + + f->last_stat_usec = now(CLOCK_MONOTONIC); + + /* Refuse dealing with files that aren't regular */ + r = stat_verify_regular(&f->last_stat); + if (r < 0) + return r; + + /* Refuse appending to files that are already deleted */ + if (f->last_stat.st_nlink <= 0) + return -EIDRM; + + return 0; +} + +static int journal_file_allocate(JournalFile *f, uint64_t offset, uint64_t size) { + uint64_t old_size, new_size, old_header_size, old_arena_size; + int r; + + assert(f); + assert(f->header); + + /* We assume that this file is not sparse, and we know that for sure, since we always call + * posix_fallocate() ourselves */ + + if (size > PAGE_ALIGN_DOWN_U64(UINT64_MAX) - offset) + return -EINVAL; + + if (mmap_cache_fd_got_sigbus(f->cache_fd)) + return -EIO; + + old_header_size = le64toh(READ_NOW(f->header->header_size)); + old_arena_size = le64toh(READ_NOW(f->header->arena_size)); + if (old_arena_size > PAGE_ALIGN_DOWN_U64(UINT64_MAX) - old_header_size) + return -EBADMSG; + + old_size = old_header_size + old_arena_size; + + new_size = MAX(PAGE_ALIGN_U64(offset + size), old_header_size); + + if (new_size <= old_size) { + + /* We already pre-allocated enough space, but before + * we write to it, let's check with fstat() if the + * file got deleted, in order make sure we don't throw + * away the data immediately. Don't check fstat() for + * all writes though, but only once ever 10s. */ + + if (f->last_stat_usec + LAST_STAT_REFRESH_USEC > now(CLOCK_MONOTONIC)) + return 0; + + return journal_file_fstat(f); + } + + /* Allocate more space. */ + + if (f->metrics.max_size > 0 && new_size > f->metrics.max_size) + return -E2BIG; + + /* Refuse to go over 4G in compact mode so offsets can be stored in 32-bit. */ + if (JOURNAL_HEADER_COMPACT(f->header) && new_size > UINT32_MAX) + return -E2BIG; + + if (new_size > f->metrics.min_size && f->metrics.keep_free > 0) { + struct statvfs svfs; + + if (fstatvfs(f->fd, &svfs) >= 0) { + uint64_t available; + + available = LESS_BY(u64_multiply_safe(svfs.f_bfree, svfs.f_bsize), f->metrics.keep_free); + + if (new_size - old_size > available) + return -E2BIG; + } + } + + /* Increase by larger blocks at once */ + new_size = ROUND_UP(new_size, FILE_SIZE_INCREASE); + if (f->metrics.max_size > 0 && new_size > f->metrics.max_size) + new_size = f->metrics.max_size; + + /* Note that the glibc fallocate() fallback is very + inefficient, hence we try to minimize the allocation area + as we can. */ + r = posix_fallocate_loop(f->fd, old_size, new_size - old_size); + if (r < 0) + return r; + + f->header->arena_size = htole64(new_size - old_header_size); + + return journal_file_fstat(f); +} + +static int journal_file_move_to( + JournalFile *f, + ObjectType type, + bool keep_always, + uint64_t offset, + uint64_t size, + void **ret) { + + int r; + + assert(f); + assert(ret); + + /* This function may clear, overwrite, or alter previously cached entries with the same type. After + * this function has been called, all previously read objects with the same type may be invalidated, + * hence must be re-read before use. */ + + if (size <= 0) + return -EINVAL; + + if (size > UINT64_MAX - offset) + return -EBADMSG; + + /* Avoid SIGBUS on invalid accesses */ + if (offset + size > (uint64_t) f->last_stat.st_size) { + /* Hmm, out of range? Let's refresh the fstat() data + * first, before we trust that check. */ + + r = journal_file_fstat(f); + if (r < 0) + return r; + + if (offset + size > (uint64_t) f->last_stat.st_size) + return -EADDRNOTAVAIL; + } + + return mmap_cache_fd_get(f->cache_fd, type_to_category(type), keep_always, offset, size, &f->last_stat, ret); +} + +static uint64_t minimum_header_size(JournalFile *f, Object *o) { + + static const uint64_t table[] = { + [OBJECT_DATA] = sizeof(DataObject), + [OBJECT_FIELD] = sizeof(FieldObject), + [OBJECT_ENTRY] = sizeof(EntryObject), + [OBJECT_DATA_HASH_TABLE] = sizeof(HashTableObject), + [OBJECT_FIELD_HASH_TABLE] = sizeof(HashTableObject), + [OBJECT_ENTRY_ARRAY] = sizeof(EntryArrayObject), + [OBJECT_TAG] = sizeof(TagObject), + }; + + assert(f); + assert(o); + + if (o->object.type == OBJECT_DATA) + return journal_file_data_payload_offset(f); + + if (o->object.type >= ELEMENTSOF(table) || table[o->object.type] <= 0) + return sizeof(ObjectHeader); + + return table[o->object.type]; +} + +static int check_object_header(JournalFile *f, Object *o, ObjectType type, uint64_t offset) { + uint64_t s; + + assert(f); + assert(o); + + s = le64toh(READ_NOW(o->object.size)); + if (s == 0) + return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG), + "Attempt to move to uninitialized object: %" PRIu64, + offset); + + if (s < sizeof(ObjectHeader)) + return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG), + "Attempt to move to overly short object with size %"PRIu64": %" PRIu64, + s, offset); + + if (o->object.type <= OBJECT_UNUSED || o->object.type >= _OBJECT_TYPE_MAX) + return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG), + "Attempt to move to object with invalid type (%u): %" PRIu64, + o->object.type, offset); + + if (type > OBJECT_UNUSED && o->object.type != type) + return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG), + "Found %s object while expecting %s object: %" PRIu64, + journal_object_type_to_string(o->object.type), + journal_object_type_to_string(type), + offset); + + if (s < minimum_header_size(f, o)) + return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG), + "Size of %s object (%"PRIu64") is smaller than the minimum object size (%"PRIu64"): %" PRIu64, + journal_object_type_to_string(o->object.type), + s, + minimum_header_size(f, o), + offset); + + return 0; +} + +/* Lightweight object checks. We want this to be fast, so that we won't + * slowdown every journal_file_move_to_object() call too much. */ +static int check_object(JournalFile *f, Object *o, uint64_t offset) { + assert(f); + assert(o); + + switch (o->object.type) { + + case OBJECT_DATA: + if ((le64toh(o->data.entry_offset) == 0) ^ (le64toh(o->data.n_entries) == 0)) + return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG), + "Bad data n_entries: %" PRIu64 ": %" PRIu64, + le64toh(o->data.n_entries), + offset); + + if (le64toh(o->object.size) <= journal_file_data_payload_offset(f)) + return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG), + "Bad data size (<= %zu): %" PRIu64 ": %" PRIu64, + journal_file_data_payload_offset(f), + le64toh(o->object.size), + offset); + + if (!VALID64(le64toh(o->data.next_hash_offset)) || + !VALID64(le64toh(o->data.next_field_offset)) || + !VALID64(le64toh(o->data.entry_offset)) || + !VALID64(le64toh(o->data.entry_array_offset))) + return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG), + "Invalid offset, next_hash_offset=" OFSfmt ", next_field_offset=" OFSfmt ", entry_offset=" OFSfmt ", entry_array_offset=" OFSfmt ": %" PRIu64, + le64toh(o->data.next_hash_offset), + le64toh(o->data.next_field_offset), + le64toh(o->data.entry_offset), + le64toh(o->data.entry_array_offset), + offset); + + break; + + case OBJECT_FIELD: + if (le64toh(o->object.size) <= offsetof(Object, field.payload)) + return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG), + "Bad field size (<= %zu): %" PRIu64 ": %" PRIu64, + offsetof(Object, field.payload), + le64toh(o->object.size), + offset); + + if (!VALID64(le64toh(o->field.next_hash_offset)) || + !VALID64(le64toh(o->field.head_data_offset))) + return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG), + "Invalid offset, next_hash_offset=" OFSfmt ", head_data_offset=" OFSfmt ": %" PRIu64, + le64toh(o->field.next_hash_offset), + le64toh(o->field.head_data_offset), + offset); + break; + + case OBJECT_ENTRY: { + uint64_t sz; + + sz = le64toh(READ_NOW(o->object.size)); + if (sz < offsetof(Object, entry.items) || + (sz - offsetof(Object, entry.items)) % journal_file_entry_item_size(f) != 0) + return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG), + "Bad entry size (<= %zu): %" PRIu64 ": %" PRIu64, + offsetof(Object, entry.items), + sz, + offset); + + if ((sz - offsetof(Object, entry.items)) / journal_file_entry_item_size(f) <= 0) + return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG), + "Invalid number items in entry: %" PRIu64 ": %" PRIu64, + (sz - offsetof(Object, entry.items)) / journal_file_entry_item_size(f), + offset); + + if (le64toh(o->entry.seqnum) <= 0) + return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG), + "Invalid entry seqnum: %" PRIx64 ": %" PRIu64, + le64toh(o->entry.seqnum), + offset); + + if (!VALID_REALTIME(le64toh(o->entry.realtime))) + return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG), + "Invalid entry realtime timestamp: %" PRIu64 ": %" PRIu64, + le64toh(o->entry.realtime), + offset); + + if (!VALID_MONOTONIC(le64toh(o->entry.monotonic))) + return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG), + "Invalid entry monotonic timestamp: %" PRIu64 ": %" PRIu64, + le64toh(o->entry.monotonic), + offset); + + if (sd_id128_is_null(o->entry.boot_id)) + return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG), + "Invalid object entry with an empty boot ID: %" PRIu64, + offset); + + break; + } + + case OBJECT_DATA_HASH_TABLE: + case OBJECT_FIELD_HASH_TABLE: { + uint64_t sz; + + sz = le64toh(READ_NOW(o->object.size)); + if (sz < offsetof(Object, hash_table.items) || + (sz - offsetof(Object, hash_table.items)) % sizeof(HashItem) != 0 || + (sz - offsetof(Object, hash_table.items)) / sizeof(HashItem) <= 0) + return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG), + "Invalid %s hash table size: %" PRIu64 ": %" PRIu64, + journal_object_type_to_string(o->object.type), + sz, + offset); + + break; + } + + case OBJECT_ENTRY_ARRAY: { + uint64_t sz, next; + + sz = le64toh(READ_NOW(o->object.size)); + if (sz < offsetof(Object, entry_array.items) || + (sz - offsetof(Object, entry_array.items)) % journal_file_entry_array_item_size(f) != 0 || + (sz - offsetof(Object, entry_array.items)) / journal_file_entry_array_item_size(f) <= 0) + return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG), + "Invalid object entry array size: %" PRIu64 ": %" PRIu64, + sz, + offset); + /* Here, we request that the offset of each entry array object is in strictly increasing order. */ + next = le64toh(o->entry_array.next_entry_array_offset); + if (!VALID64(next) || (next > 0 && next <= offset)) + return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG), + "Invalid object entry array next_entry_array_offset: %" PRIu64 ": %" PRIu64, + next, + offset); + + break; + } + + case OBJECT_TAG: + if (le64toh(o->object.size) != sizeof(TagObject)) + return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG), + "Invalid object tag size: %" PRIu64 ": %" PRIu64, + le64toh(o->object.size), + offset); + + if (!VALID_EPOCH(le64toh(o->tag.epoch))) + return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG), + "Invalid object tag epoch: %" PRIu64 ": %" PRIu64, + le64toh(o->tag.epoch), offset); + + break; + } + + return 0; +} + +int journal_file_move_to_object(JournalFile *f, ObjectType type, uint64_t offset, Object **ret) { + int r; + Object *o; + + assert(f); + + /* Even if this function fails, it may clear, overwrite, or alter previously cached entries with the + * same type. After this function has been called, all previously read objects with the same type may + * be invalidated, hence must be re-read before use. */ + + /* Objects may only be located at multiple of 64 bit */ + if (!VALID64(offset)) + return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG), + "Attempt to move to %s object at non-64-bit boundary: %" PRIu64, + journal_object_type_to_string(type), + offset); + + /* Object may not be located in the file header */ + if (offset < le64toh(f->header->header_size)) + return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG), + "Attempt to move to %s object located in file header: %" PRIu64, + journal_object_type_to_string(type), + offset); + + r = journal_file_move_to(f, type, false, offset, sizeof(ObjectHeader), (void**) &o); + if (r < 0) + return r; + + r = check_object_header(f, o, type, offset); + if (r < 0) + return r; + + r = journal_file_move_to(f, type, false, offset, le64toh(READ_NOW(o->object.size)), (void**) &o); + if (r < 0) + return r; + + r = check_object_header(f, o, type, offset); + if (r < 0) + return r; + + r = check_object(f, o, offset); + if (r < 0) + return r; + + if (ret) + *ret = o; + + return 0; +} + +int journal_file_pin_object(JournalFile *f, Object *o) { + assert(f); + assert(o); + + /* This attaches the mmap window that provides the object to the 'pinning' category. So, reading + * another object with the same type will not invalidate the object, until this function is called + * for another object. */ + return mmap_cache_fd_pin(f->cache_fd, type_to_category(o->object.type), o, le64toh(o->object.size)); +} + +int journal_file_read_object_header(JournalFile *f, ObjectType type, uint64_t offset, Object *ret) { + ssize_t n; + Object o; + int r; + + assert(f); + + /* Objects may only be located at multiple of 64 bit */ + if (!VALID64(offset)) + return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG), + "Attempt to read %s object at non-64-bit boundary: %" PRIu64, + journal_object_type_to_string(type), offset); + + /* Object may not be located in the file header */ + if (offset < le64toh(f->header->header_size)) + return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG), + "Attempt to read %s object located in file header: %" PRIu64, + journal_object_type_to_string(type), offset); + + /* This will likely read too much data but it avoids having to call pread() twice. */ + n = pread(f->fd, &o, sizeof(o), offset); + if (n < 0) + return log_debug_errno(errno, "Failed to read journal %s object at offset: %" PRIu64, + journal_object_type_to_string(type), offset); + + if ((size_t) n < sizeof(o.object)) + return log_debug_errno(SYNTHETIC_ERRNO(EIO), + "Failed to read short %s object at offset: %" PRIu64, + journal_object_type_to_string(type), offset); + + r = check_object_header(f, &o, type, offset); + if (r < 0) + return r; + + if ((size_t) n < minimum_header_size(f, &o)) + return log_debug_errno(SYNTHETIC_ERRNO(EIO), + "Short read while reading %s object: %" PRIu64, + journal_object_type_to_string(type), offset); + + r = check_object(f, &o, offset); + if (r < 0) + return r; + + if (ret) + *ret = o; + + return 0; +} + +static uint64_t inc_seqnum(uint64_t seqnum) { + if (seqnum < UINT64_MAX-1) + return seqnum + 1; + + return 1; /* skip over UINT64_MAX and 0 when we run out of seqnums and start again */ +} + +static uint64_t journal_file_entry_seqnum( + JournalFile *f, + uint64_t *seqnum) { + + uint64_t next_seqnum; + + assert(f); + assert(f->header); + + /* Picks a new sequence number for the entry we are about to add and returns it. */ + + next_seqnum = inc_seqnum(le64toh(f->header->tail_entry_seqnum)); + + /* If an external seqnum counter was passed, we update both the local and the external one, and set + * it to the maximum of both */ + if (seqnum) + *seqnum = next_seqnum = MAX(inc_seqnum(*seqnum), next_seqnum); + + f->header->tail_entry_seqnum = htole64(next_seqnum); + + if (f->header->head_entry_seqnum == 0) + f->header->head_entry_seqnum = htole64(next_seqnum); + + return next_seqnum; +} + +int journal_file_append_object( + JournalFile *f, + ObjectType type, + uint64_t size, + Object **ret_object, + uint64_t *ret_offset) { + + int r; + uint64_t p; + Object *o; + + assert(f); + assert(f->header); + assert(type > OBJECT_UNUSED && type < _OBJECT_TYPE_MAX); + assert(size >= sizeof(ObjectHeader)); + + r = journal_file_set_online(f); + if (r < 0) + return r; + + r = journal_file_tail_end_by_mmap(f, &p); + if (r < 0) + return r; + + r = journal_file_allocate(f, p, size); + if (r < 0) + return r; + + r = journal_file_move_to(f, type, false, p, size, (void**) &o); + if (r < 0) + return r; + + o->object = (ObjectHeader) { + .type = type, + .size = htole64(size), + }; + + f->header->tail_object_offset = htole64(p); + f->header->n_objects = htole64(le64toh(f->header->n_objects) + 1); + + if (ret_object) + *ret_object = o; + + if (ret_offset) + *ret_offset = p; + + return 0; +} + +static int journal_file_setup_data_hash_table(JournalFile *f) { + uint64_t s, p; + Object *o; + int r; + + assert(f); + assert(f->header); + + /* We estimate that we need 1 hash table entry per 768 bytes + of journal file and we want to make sure we never get + beyond 75% fill level. Calculate the hash table size for + the maximum file size based on these metrics. */ + + s = (f->metrics.max_size * 4 / 768 / 3) * sizeof(HashItem); + if (s < DEFAULT_DATA_HASH_TABLE_SIZE) + s = DEFAULT_DATA_HASH_TABLE_SIZE; + + log_debug("Reserving %"PRIu64" entries in data hash table.", s / sizeof(HashItem)); + + r = journal_file_append_object(f, + OBJECT_DATA_HASH_TABLE, + offsetof(Object, hash_table.items) + s, + &o, &p); + if (r < 0) + return r; + + memzero(o->hash_table.items, s); + + f->header->data_hash_table_offset = htole64(p + offsetof(Object, hash_table.items)); + f->header->data_hash_table_size = htole64(s); + + return 0; +} + +static int journal_file_setup_field_hash_table(JournalFile *f) { + uint64_t s, p; + Object *o; + int r; + + assert(f); + assert(f->header); + + /* We use a fixed size hash table for the fields as this + * number should grow very slowly only */ + + s = DEFAULT_FIELD_HASH_TABLE_SIZE; + log_debug("Reserving %"PRIu64" entries in field hash table.", s / sizeof(HashItem)); + + r = journal_file_append_object(f, + OBJECT_FIELD_HASH_TABLE, + offsetof(Object, hash_table.items) + s, + &o, &p); + if (r < 0) + return r; + + memzero(o->hash_table.items, s); + + f->header->field_hash_table_offset = htole64(p + offsetof(Object, hash_table.items)); + f->header->field_hash_table_size = htole64(s); + + return 0; +} + +int journal_file_map_data_hash_table(JournalFile *f) { + uint64_t s, p; + void *t; + int r; + + assert(f); + assert(f->header); + + if (f->data_hash_table) + return 0; + + p = le64toh(f->header->data_hash_table_offset); + s = le64toh(f->header->data_hash_table_size); + + r = journal_file_move_to(f, + OBJECT_DATA_HASH_TABLE, + true, + p, s, + &t); + if (r < 0) + return r; + + f->data_hash_table = t; + return 0; +} + +int journal_file_map_field_hash_table(JournalFile *f) { + uint64_t s, p; + void *t; + int r; + + assert(f); + assert(f->header); + + if (f->field_hash_table) + return 0; + + p = le64toh(f->header->field_hash_table_offset); + s = le64toh(f->header->field_hash_table_size); + + r = journal_file_move_to(f, + OBJECT_FIELD_HASH_TABLE, + true, + p, s, + &t); + if (r < 0) + return r; + + f->field_hash_table = t; + return 0; +} + +static int journal_file_link_field( + JournalFile *f, + Object *o, + uint64_t offset, + uint64_t hash) { + + uint64_t p, h, m; + int r; + + assert(f); + assert(f->header); + assert(f->field_hash_table); + assert(o); + assert(offset > 0); + + if (o->object.type != OBJECT_FIELD) + return -EINVAL; + + m = le64toh(READ_NOW(f->header->field_hash_table_size)) / sizeof(HashItem); + if (m <= 0) + return -EBADMSG; + + /* This might alter the window we are looking at */ + o->field.next_hash_offset = o->field.head_data_offset = 0; + + h = hash % m; + p = le64toh(f->field_hash_table[h].tail_hash_offset); + if (p == 0) + f->field_hash_table[h].head_hash_offset = htole64(offset); + else { + r = journal_file_move_to_object(f, OBJECT_FIELD, p, &o); + if (r < 0) + return r; + + o->field.next_hash_offset = htole64(offset); + } + + f->field_hash_table[h].tail_hash_offset = htole64(offset); + + if (JOURNAL_HEADER_CONTAINS(f->header, n_fields)) + f->header->n_fields = htole64(le64toh(f->header->n_fields) + 1); + + return 0; +} + +static int journal_file_link_data( + JournalFile *f, + Object *o, + uint64_t offset, + uint64_t hash) { + + uint64_t p, h, m; + int r; + + assert(f); + assert(f->header); + assert(f->data_hash_table); + assert(o); + assert(offset > 0); + + if (o->object.type != OBJECT_DATA) + return -EINVAL; + + m = le64toh(READ_NOW(f->header->data_hash_table_size)) / sizeof(HashItem); + if (m <= 0) + return -EBADMSG; + + /* This might alter the window we are looking at */ + o->data.next_hash_offset = o->data.next_field_offset = 0; + o->data.entry_offset = o->data.entry_array_offset = 0; + o->data.n_entries = 0; + + h = hash % m; + p = le64toh(f->data_hash_table[h].tail_hash_offset); + if (p == 0) + /* Only entry in the hash table is easy */ + f->data_hash_table[h].head_hash_offset = htole64(offset); + else { + /* Move back to the previous data object, to patch in + * pointer */ + + r = journal_file_move_to_object(f, OBJECT_DATA, p, &o); + if (r < 0) + return r; + + o->data.next_hash_offset = htole64(offset); + } + + f->data_hash_table[h].tail_hash_offset = htole64(offset); + + if (JOURNAL_HEADER_CONTAINS(f->header, n_data)) + f->header->n_data = htole64(le64toh(f->header->n_data) + 1); + + return 0; +} + +static int get_next_hash_offset( + JournalFile *f, + uint64_t *p, + le64_t *next_hash_offset, + uint64_t *depth, + le64_t *header_max_depth) { + + uint64_t nextp; + + assert(f); + assert(p); + assert(next_hash_offset); + assert(depth); + + nextp = le64toh(READ_NOW(*next_hash_offset)); + if (nextp > 0) { + if (nextp <= *p) /* Refuse going in loops */ + return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG), + "Detected hash item loop in %s, refusing.", f->path); + + (*depth)++; + + /* If the depth of this hash chain is larger than all others we have seen so far, record it */ + if (header_max_depth && journal_file_writable(f)) + *header_max_depth = htole64(MAX(*depth, le64toh(*header_max_depth))); + } + + *p = nextp; + return 0; +} + +int journal_file_find_field_object_with_hash( + JournalFile *f, + const void *field, + uint64_t size, + uint64_t hash, + Object **ret_object, + uint64_t *ret_offset) { + + uint64_t p, osize, h, m, depth = 0; + int r; + + assert(f); + assert(f->header); + assert(field); + assert(size > 0); + + /* If the field hash table is empty, we can't find anything */ + if (le64toh(f->header->field_hash_table_size) <= 0) + return 0; + + /* Map the field hash table, if it isn't mapped yet. */ + r = journal_file_map_field_hash_table(f); + if (r < 0) + return r; + + osize = offsetof(Object, field.payload) + size; + + m = le64toh(READ_NOW(f->header->field_hash_table_size)) / sizeof(HashItem); + if (m <= 0) + return -EBADMSG; + + h = hash % m; + p = le64toh(f->field_hash_table[h].head_hash_offset); + while (p > 0) { + Object *o; + + r = journal_file_move_to_object(f, OBJECT_FIELD, p, &o); + if (r < 0) + return r; + + if (le64toh(o->field.hash) == hash && + le64toh(o->object.size) == osize && + memcmp(o->field.payload, field, size) == 0) { + + if (ret_object) + *ret_object = o; + if (ret_offset) + *ret_offset = p; + + return 1; + } + + r = get_next_hash_offset( + f, + &p, + &o->field.next_hash_offset, + &depth, + JOURNAL_HEADER_CONTAINS(f->header, field_hash_chain_depth) ? &f->header->field_hash_chain_depth : NULL); + if (r < 0) + return r; + } + + return 0; +} + +uint64_t journal_file_hash_data( + JournalFile *f, + const void *data, + size_t sz) { + + assert(f); + assert(f->header); + assert(data || sz == 0); + + /* We try to unify our codebase on siphash, hence new-styled journal files utilizing the keyed hash + * function use siphash. Old journal files use the Jenkins hash. */ + + if (JOURNAL_HEADER_KEYED_HASH(f->header)) + return siphash24(data, sz, f->header->file_id.bytes); + + return jenkins_hash64(data, sz); +} + +int journal_file_find_field_object( + JournalFile *f, + const void *field, + uint64_t size, + Object **ret_object, + uint64_t *ret_offset) { + + assert(f); + assert(field); + assert(size > 0); + + return journal_file_find_field_object_with_hash( + f, + field, size, + journal_file_hash_data(f, field, size), + ret_object, ret_offset); +} + +int journal_file_find_data_object_with_hash( + JournalFile *f, + const void *data, + uint64_t size, + uint64_t hash, + Object **ret_object, + uint64_t *ret_offset) { + + uint64_t p, h, m, depth = 0; + int r; + + assert(f); + assert(f->header); + assert(data || size == 0); + + /* If there's no data hash table, then there's no entry. */ + if (le64toh(f->header->data_hash_table_size) <= 0) + return 0; + + /* Map the data hash table, if it isn't mapped yet. */ + r = journal_file_map_data_hash_table(f); + if (r < 0) + return r; + + m = le64toh(READ_NOW(f->header->data_hash_table_size)) / sizeof(HashItem); + if (m <= 0) + return -EBADMSG; + + h = hash % m; + p = le64toh(f->data_hash_table[h].head_hash_offset); + + while (p > 0) { + Object *o; + void *d; + size_t rsize; + + r = journal_file_move_to_object(f, OBJECT_DATA, p, &o); + if (r < 0) + return r; + + if (le64toh(o->data.hash) != hash) + goto next; + + r = journal_file_data_payload(f, o, p, NULL, 0, 0, &d, &rsize); + if (r < 0) + return r; + assert(r > 0); /* journal_file_data_payload() always returns > 0 if no field is provided. */ + + if (memcmp_nn(data, size, d, rsize) == 0) { + if (ret_object) + *ret_object = o; + + if (ret_offset) + *ret_offset = p; + + return 1; + } + + next: + r = get_next_hash_offset( + f, + &p, + &o->data.next_hash_offset, + &depth, + JOURNAL_HEADER_CONTAINS(f->header, data_hash_chain_depth) ? &f->header->data_hash_chain_depth : NULL); + if (r < 0) + return r; + } + + return 0; +} + +int journal_file_find_data_object( + JournalFile *f, + const void *data, + uint64_t size, + Object **ret_object, + uint64_t *ret_offset) { + + assert(f); + assert(data || size == 0); + + return journal_file_find_data_object_with_hash( + f, + data, size, + journal_file_hash_data(f, data, size), + ret_object, ret_offset); +} + +bool journal_field_valid(const char *p, size_t l, bool allow_protected) { + /* We kinda enforce POSIX syntax recommendations for + environment variables here, but make a couple of additional + requirements. + + http://pubs.opengroup.org/onlinepubs/000095399/basedefs/xbd_chap08.html */ + + assert(p); + + if (l == SIZE_MAX) + l = strlen(p); + + /* No empty field names */ + if (l <= 0) + return false; + + /* Don't allow names longer than 64 chars */ + if (l > 64) + return false; + + /* Variables starting with an underscore are protected */ + if (!allow_protected && p[0] == '_') + return false; + + /* Don't allow digits as first character */ + if (ascii_isdigit(p[0])) + return false; + + /* Only allow A-Z0-9 and '_' */ + for (const char *a = p; a < p + l; a++) + if ((*a < 'A' || *a > 'Z') && + !ascii_isdigit(*a) && + *a != '_') + return false; + + return true; +} + +static int journal_file_append_field( + JournalFile *f, + const void *field, + uint64_t size, + Object **ret_object, + uint64_t *ret_offset) { + + uint64_t hash, p; + uint64_t osize; + Object *o; + int r; + + assert(f); + assert(field); + assert(size > 0); + + if (!journal_field_valid(field, size, true)) + return -EBADMSG; + + hash = journal_file_hash_data(f, field, size); + + r = journal_file_find_field_object_with_hash(f, field, size, hash, ret_object, ret_offset); + if (r < 0) + return r; + if (r > 0) + return 0; + + osize = offsetof(Object, field.payload) + size; + r = journal_file_append_object(f, OBJECT_FIELD, osize, &o, &p); + if (r < 0) + return r; + + o->field.hash = htole64(hash); + memcpy(o->field.payload, field, size); + + r = journal_file_link_field(f, o, p, hash); + if (r < 0) + return r; + + /* The linking might have altered the window, so let's only pass the offset to hmac which will + * move to the object again if needed. */ + +#if HAVE_GCRYPT + r = journal_file_hmac_put_object(f, OBJECT_FIELD, NULL, p); + if (r < 0) + return r; +#endif + + if (ret_object) { + r = journal_file_move_to_object(f, OBJECT_FIELD, p, ret_object); + if (r < 0) + return r; + } + + if (ret_offset) + *ret_offset = p; + + return 0; +} + +static int maybe_compress_payload(JournalFile *f, uint8_t *dst, const uint8_t *src, uint64_t size, size_t *rsize) { + assert(f); + assert(f->header); + +#if HAVE_COMPRESSION + Compression c; + int r; + + c = JOURNAL_FILE_COMPRESSION(f); + if (c == COMPRESSION_NONE || size < f->compress_threshold_bytes) + return 0; + + r = compress_blob(c, src, size, dst, size - 1, rsize); + if (r < 0) + return log_debug_errno(r, "Failed to compress data object using %s, ignoring: %m", compression_to_string(c)); + + log_debug("Compressed data object %"PRIu64" -> %zu using %s", size, *rsize, compression_to_string(c)); + + return 1; /* compressed */ +#else + return 0; +#endif +} + +static int journal_file_append_data( + JournalFile *f, + const void *data, + uint64_t size, + Object **ret_object, + uint64_t *ret_offset) { + + uint64_t hash, p, osize; + Object *o, *fo; + size_t rsize = 0; + const void *eq; + int r; + + assert(f); + + if (!data || size == 0) + return -EINVAL; + + hash = journal_file_hash_data(f, data, size); + + r = journal_file_find_data_object_with_hash(f, data, size, hash, ret_object, ret_offset); + if (r < 0) + return r; + if (r > 0) + return 0; + + eq = memchr(data, '=', size); + if (!eq) + return -EINVAL; + + osize = journal_file_data_payload_offset(f) + size; + r = journal_file_append_object(f, OBJECT_DATA, osize, &o, &p); + if (r < 0) + return r; + + o->data.hash = htole64(hash); + + r = maybe_compress_payload(f, journal_file_data_payload_field(f, o), data, size, &rsize); + if (r <= 0) + /* We don't really care failures, let's continue without compression */ + memcpy_safe(journal_file_data_payload_field(f, o), data, size); + else { + Compression c = JOURNAL_FILE_COMPRESSION(f); + + assert(c >= 0 && c < _COMPRESSION_MAX && c != COMPRESSION_NONE); + + o->object.size = htole64(journal_file_data_payload_offset(f) + rsize); + o->object.flags |= COMPRESSION_TO_OBJECT_FLAG(c); + } + + r = journal_file_link_data(f, o, p, hash); + if (r < 0) + return r; + + /* The linking might have altered the window, so let's refresh our pointer. */ + r = journal_file_move_to_object(f, OBJECT_DATA, p, &o); + if (r < 0) + return r; + +#if HAVE_GCRYPT + r = journal_file_hmac_put_object(f, OBJECT_DATA, o, p); + if (r < 0) + return r; +#endif + + /* Create field object ... */ + r = journal_file_append_field(f, data, (uint8_t*) eq - (uint8_t*) data, &fo, NULL); + if (r < 0) + return r; + + /* ... and link it in. */ + o->data.next_field_offset = fo->field.head_data_offset; + fo->field.head_data_offset = le64toh(p); + + if (ret_object) + *ret_object = o; + + if (ret_offset) + *ret_offset = p; + + return 0; +} + +static int maybe_decompress_payload( + JournalFile *f, + uint8_t *payload, + uint64_t size, + Compression compression, + const char *field, + size_t field_length, + size_t data_threshold, + void **ret_data, + size_t *ret_size) { + + assert(f); + + /* We can't read objects larger than 4G on a 32-bit machine */ + if ((uint64_t) (size_t) size != size) + return -E2BIG; + + if (compression != COMPRESSION_NONE) { +#if HAVE_COMPRESSION + size_t rsize; + int r; + + if (field) { + r = decompress_startswith(compression, payload, size, &f->compress_buffer, field, + field_length, '='); + if (r < 0) + return log_debug_errno(r, + "Cannot decompress %s object of length %" PRIu64 ": %m", + compression_to_string(compression), + size); + if (r == 0) { + if (ret_data) + *ret_data = NULL; + if (ret_size) + *ret_size = 0; + return 0; + } + } + + r = decompress_blob(compression, payload, size, &f->compress_buffer, &rsize, 0); + if (r < 0) + return r; + + if (ret_data) + *ret_data = f->compress_buffer; + if (ret_size) + *ret_size = rsize; +#else + return -EPROTONOSUPPORT; +#endif + } else { + if (field && (size < field_length + 1 || memcmp(payload, field, field_length) != 0 || payload[field_length] != '=')) { + if (ret_data) + *ret_data = NULL; + if (ret_size) + *ret_size = 0; + return 0; + } + + if (ret_data) + *ret_data = payload; + if (ret_size) + *ret_size = (size_t) size; + } + + return 1; +} + +int journal_file_data_payload( + JournalFile *f, + Object *o, + uint64_t offset, + const char *field, + size_t field_length, + size_t data_threshold, + void **ret_data, + size_t *ret_size) { + + uint64_t size; + Compression c; + int r; + + assert(f); + assert(!field == (field_length == 0)); /* These must be specified together. */ + + if (!o) { + r = journal_file_move_to_object(f, OBJECT_DATA, offset, &o); + if (r < 0) + return r; + } + + size = le64toh(READ_NOW(o->object.size)); + if (size < journal_file_data_payload_offset(f)) + return -EBADMSG; + + size -= journal_file_data_payload_offset(f); + + c = COMPRESSION_FROM_OBJECT(o); + if (c < 0) + return -EPROTONOSUPPORT; + + return maybe_decompress_payload(f, journal_file_data_payload_field(f, o), size, c, field, + field_length, data_threshold, ret_data, ret_size); +} + +uint64_t journal_file_entry_n_items(JournalFile *f, Object *o) { + uint64_t sz; + + assert(f); + assert(o); + + if (o->object.type != OBJECT_ENTRY) + return 0; + + sz = le64toh(READ_NOW(o->object.size)); + if (sz < offsetof(Object, entry.items)) + return 0; + + return (sz - offsetof(Object, entry.items)) / journal_file_entry_item_size(f); +} + +uint64_t journal_file_entry_array_n_items(JournalFile *f, Object *o) { + uint64_t sz; + + assert(f); + assert(o); + + if (o->object.type != OBJECT_ENTRY_ARRAY) + return 0; + + sz = le64toh(READ_NOW(o->object.size)); + if (sz < offsetof(Object, entry_array.items)) + return 0; + + return (sz - offsetof(Object, entry_array.items)) / journal_file_entry_array_item_size(f); +} + +uint64_t journal_file_hash_table_n_items(Object *o) { + uint64_t sz; + + assert(o); + + if (!IN_SET(o->object.type, OBJECT_DATA_HASH_TABLE, OBJECT_FIELD_HASH_TABLE)) + return 0; + + sz = le64toh(READ_NOW(o->object.size)); + if (sz < offsetof(Object, hash_table.items)) + return 0; + + return (sz - offsetof(Object, hash_table.items)) / sizeof(HashItem); +} + +static void write_entry_array_item(JournalFile *f, Object *o, uint64_t i, uint64_t p) { + assert(f); + assert(o); + + if (JOURNAL_HEADER_COMPACT(f->header)) { + assert(p <= UINT32_MAX); + o->entry_array.items.compact[i] = htole32(p); + } else + o->entry_array.items.regular[i] = htole64(p); +} + +static int link_entry_into_array( + JournalFile *f, + le64_t *first, + le64_t *idx, + le32_t *tail, + le32_t *tidx, + uint64_t p) { + + uint64_t n = 0, ap = 0, q, i, a, hidx; + Object *o; + int r; + + assert(f); + assert(f->header); + assert(first); + assert(idx); + assert(p > 0); + + a = tail ? le32toh(*tail) : le64toh(*first); + hidx = le64toh(READ_NOW(*idx)); + i = tidx ? le32toh(READ_NOW(*tidx)) : hidx; + + while (a > 0) { + r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o); + if (r < 0) + return r; + + n = journal_file_entry_array_n_items(f, o); + if (i < n) { + write_entry_array_item(f, o, i, p); + *idx = htole64(hidx + 1); + if (tidx) + *tidx = htole32(le32toh(*tidx) + 1); + return 0; + } + + i -= n; + ap = a; + a = le64toh(o->entry_array.next_entry_array_offset); + } + + if (hidx > n) + n = (hidx+1) * 2; + else + n = n * 2; + + if (n < 4) + n = 4; + + r = journal_file_append_object(f, OBJECT_ENTRY_ARRAY, + offsetof(Object, entry_array.items) + n * journal_file_entry_array_item_size(f), + &o, &q); + if (r < 0) + return r; + +#if HAVE_GCRYPT + r = journal_file_hmac_put_object(f, OBJECT_ENTRY_ARRAY, o, q); + if (r < 0) + return r; +#endif + + write_entry_array_item(f, o, i, p); + + if (ap == 0) + *first = htole64(q); + else { + r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, ap, &o); + if (r < 0) + return r; + + o->entry_array.next_entry_array_offset = htole64(q); + } + + if (tail) + *tail = htole32(q); + + if (JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays)) + f->header->n_entry_arrays = htole64(le64toh(f->header->n_entry_arrays) + 1); + + *idx = htole64(hidx + 1); + if (tidx) + *tidx = htole32(1); + + return 0; +} + +static int link_entry_into_array_plus_one( + JournalFile *f, + le64_t *extra, + le64_t *first, + le64_t *idx, + le32_t *tail, + le32_t *tidx, + uint64_t p) { + + uint64_t hidx; + int r; + + assert(f); + assert(extra); + assert(first); + assert(idx); + assert(p > 0); + + hidx = le64toh(READ_NOW(*idx)); + if (hidx == UINT64_MAX) + return -EBADMSG; + if (hidx == 0) + *extra = htole64(p); + else { + le64_t i; + + i = htole64(hidx - 1); + r = link_entry_into_array(f, first, &i, tail, tidx, p); + if (r < 0) + return r; + } + + *idx = htole64(hidx + 1); + return 0; +} + +static int journal_file_link_entry_item(JournalFile *f, uint64_t offset, uint64_t p) { + Object *o; + int r; + + assert(f); + assert(offset > 0); + + r = journal_file_move_to_object(f, OBJECT_DATA, p, &o); + if (r < 0) + return r; + + return link_entry_into_array_plus_one(f, + &o->data.entry_offset, + &o->data.entry_array_offset, + &o->data.n_entries, + JOURNAL_HEADER_COMPACT(f->header) ? &o->data.compact.tail_entry_array_offset : NULL, + JOURNAL_HEADER_COMPACT(f->header) ? &o->data.compact.tail_entry_array_n_entries : NULL, + offset); +} + +static int journal_file_link_entry( + JournalFile *f, + Object *o, + uint64_t offset, + const EntryItem items[], + size_t n_items) { + + int r; + + assert(f); + assert(f->header); + assert(o); + assert(offset > 0); + + if (o->object.type != OBJECT_ENTRY) + return -EINVAL; + + __atomic_thread_fence(__ATOMIC_SEQ_CST); + + /* Link up the entry itself */ + r = link_entry_into_array(f, + &f->header->entry_array_offset, + &f->header->n_entries, + JOURNAL_HEADER_CONTAINS(f->header, tail_entry_array_offset) ? &f->header->tail_entry_array_offset : NULL, + JOURNAL_HEADER_CONTAINS(f->header, tail_entry_array_n_entries) ? &f->header->tail_entry_array_n_entries : NULL, + offset); + if (r < 0) + return r; + + /* log_debug("=> %s seqnr=%"PRIu64" n_entries=%"PRIu64, f->path, o->entry.seqnum, f->header->n_entries); */ + + if (f->header->head_entry_realtime == 0) + f->header->head_entry_realtime = o->entry.realtime; + + f->header->tail_entry_realtime = o->entry.realtime; + f->header->tail_entry_monotonic = o->entry.monotonic; + if (JOURNAL_HEADER_CONTAINS(f->header, tail_entry_offset)) + f->header->tail_entry_offset = htole64(offset); + f->newest_mtime = 0; /* we have a new tail entry now, explicitly invalidate newest boot id/timestamp info */ + + /* Link up the items */ + for (uint64_t i = 0; i < n_items; i++) { + int k; + + /* If we fail to link an entry item because we can't allocate a new entry array, don't fail + * immediately but try to link the other entry items since it might still be possible to link + * those if they don't require a new entry array to be allocated. */ + + k = journal_file_link_entry_item(f, offset, items[i].object_offset); + if (k == -E2BIG) + r = k; + else if (k < 0) + return k; + } + + return r; +} + +static void write_entry_item(JournalFile *f, Object *o, uint64_t i, const EntryItem *item) { + assert(f); + assert(o); + assert(item); + + if (JOURNAL_HEADER_COMPACT(f->header)) { + assert(item->object_offset <= UINT32_MAX); + o->entry.items.compact[i].object_offset = htole32(item->object_offset); + } else { + o->entry.items.regular[i].object_offset = htole64(item->object_offset); + o->entry.items.regular[i].hash = htole64(item->hash); + } +} + +static int journal_file_append_entry_internal( + JournalFile *f, + const dual_timestamp *ts, + const sd_id128_t *boot_id, + const sd_id128_t *machine_id, + uint64_t xor_hash, + const EntryItem items[], + size_t n_items, + uint64_t *seqnum, + sd_id128_t *seqnum_id, + Object **ret_object, + uint64_t *ret_offset) { + + uint64_t np; + uint64_t osize; + Object *o; + int r; + + assert(f); + assert(f->header); + assert(ts); + assert(boot_id); + assert(!sd_id128_is_null(*boot_id)); + assert(items || n_items == 0); + + if (f->strict_order) { + /* If requested be stricter with ordering in this journal file, to make searching via + * bisection fully deterministic. This is an optional feature, so that if desired journal + * files can be written where the ordering is not strictly enforced (in which case bisection + * will yield *a* result, but not the *only* result, when searching for points in + * time). Strict ordering mode is enabled when journald originally writes the files, but + * might not necessarily be if other tools (the remoting tools for example) write journal + * files from combined sources. + * + * Typically, if any of the errors generated here are seen journald will just rotate the + * journal files and start anew. */ + + if (ts->realtime < le64toh(f->header->tail_entry_realtime)) + return log_debug_errno(SYNTHETIC_ERRNO(EREMCHG), + "Realtime timestamp %" PRIu64 " smaller than previous realtime " + "timestamp %" PRIu64 ", refusing entry.", + ts->realtime, le64toh(f->header->tail_entry_realtime)); + + if (sd_id128_equal(*boot_id, f->header->tail_entry_boot_id) && + ts->monotonic < le64toh(f->header->tail_entry_monotonic)) + return log_debug_errno( + SYNTHETIC_ERRNO(ENOTNAM), + "Monotonic timestamp %" PRIu64 + " smaller than previous monotonic timestamp %" PRIu64 + " while having the same boot ID, refusing entry.", + ts->monotonic, + le64toh(f->header->tail_entry_monotonic)); + } + + if (seqnum_id) { + /* Settle the passed in sequence number ID */ + + if (sd_id128_is_null(*seqnum_id)) + *seqnum_id = f->header->seqnum_id; /* Caller has none assigned, then copy the one from the file */ + else if (!sd_id128_equal(*seqnum_id, f->header->seqnum_id)) { + /* Different seqnum IDs? We can't allow entries from multiple IDs end up in the same journal.*/ + if (le64toh(f->header->n_entries) == 0) + f->header->seqnum_id = *seqnum_id; /* Caller has one, and file so far has no entries, then copy the one from the caller */ + else + return log_debug_errno(SYNTHETIC_ERRNO(EILSEQ), + "Sequence number IDs don't match, refusing entry."); + } + } + + if (machine_id && sd_id128_is_null(f->header->machine_id)) + /* Initialize machine ID when not set yet */ + f->header->machine_id = *machine_id; + + osize = offsetof(Object, entry.items) + (n_items * journal_file_entry_item_size(f)); + + r = journal_file_append_object(f, OBJECT_ENTRY, osize, &o, &np); + if (r < 0) + return r; + + o->entry.seqnum = htole64(journal_file_entry_seqnum(f, seqnum)); + o->entry.realtime = htole64(ts->realtime); + o->entry.monotonic = htole64(ts->monotonic); + o->entry.xor_hash = htole64(xor_hash); + o->entry.boot_id = f->header->tail_entry_boot_id = *boot_id; + + for (size_t i = 0; i < n_items; i++) + write_entry_item(f, o, i, &items[i]); + +#if HAVE_GCRYPT + r = journal_file_hmac_put_object(f, OBJECT_ENTRY, o, np); + if (r < 0) + return r; +#endif + + r = journal_file_link_entry(f, o, np, items, n_items); + if (r < 0) + return r; + + if (ret_object) + *ret_object = o; + + if (ret_offset) + *ret_offset = np; + + return r; +} + +void journal_file_post_change(JournalFile *f) { + assert(f); + + if (f->fd < 0) + return; + + /* inotify() does not receive IN_MODIFY events from file + * accesses done via mmap(). After each access we hence + * trigger IN_MODIFY by truncating the journal file to its + * current size which triggers IN_MODIFY. */ + + __atomic_thread_fence(__ATOMIC_SEQ_CST); + + if (ftruncate(f->fd, f->last_stat.st_size) < 0) + log_debug_errno(errno, "Failed to truncate file to its own size: %m"); +} + +static int post_change_thunk(sd_event_source *timer, uint64_t usec, void *userdata) { + assert(userdata); + + journal_file_post_change(userdata); + + return 1; +} + +static void schedule_post_change(JournalFile *f) { + sd_event *e; + int r; + + assert(f); + assert(f->post_change_timer); + + assert_se(e = sd_event_source_get_event(f->post_change_timer)); + + /* If we are already going down, post the change immediately. */ + if (IN_SET(sd_event_get_state(e), SD_EVENT_EXITING, SD_EVENT_FINISHED)) + goto fail; + + r = sd_event_source_get_enabled(f->post_change_timer, NULL); + if (r < 0) { + log_debug_errno(r, "Failed to get ftruncate timer state: %m"); + goto fail; + } + if (r > 0) + return; + + r = sd_event_source_set_time_relative(f->post_change_timer, f->post_change_timer_period); + if (r < 0) { + log_debug_errno(r, "Failed to set time for scheduling ftruncate: %m"); + goto fail; + } + + r = sd_event_source_set_enabled(f->post_change_timer, SD_EVENT_ONESHOT); + if (r < 0) { + log_debug_errno(r, "Failed to enable scheduled ftruncate: %m"); + goto fail; + } + + return; + +fail: + /* On failure, let's simply post the change immediately. */ + journal_file_post_change(f); +} + +/* Enable coalesced change posting in a timer on the provided sd_event instance */ +int journal_file_enable_post_change_timer(JournalFile *f, sd_event *e, usec_t t) { + _cleanup_(sd_event_source_unrefp) sd_event_source *timer = NULL; + int r; + + assert(f); + assert_return(!f->post_change_timer, -EINVAL); + assert(e); + assert(t); + + /* If we are already going down, we cannot install the timer. + * In such case, the caller needs to call journal_file_post_change() explicitly. */ + if (IN_SET(sd_event_get_state(e), SD_EVENT_EXITING, SD_EVENT_FINISHED)) + return 0; + + r = sd_event_add_time(e, &timer, CLOCK_MONOTONIC, 0, 0, post_change_thunk, f); + if (r < 0) + return r; + + r = sd_event_source_set_enabled(timer, SD_EVENT_OFF); + if (r < 0) + return r; + + f->post_change_timer = TAKE_PTR(timer); + f->post_change_timer_period = t; + + return 1; +} + +static int entry_item_cmp(const EntryItem *a, const EntryItem *b) { + return CMP(ASSERT_PTR(a)->object_offset, ASSERT_PTR(b)->object_offset); +} + +static size_t remove_duplicate_entry_items(EntryItem items[], size_t n) { + size_t j = 1; + + assert(items || n == 0); + + if (n <= 1) + return n; + + for (size_t i = 1; i < n; i++) + if (items[i].object_offset != items[j - 1].object_offset) + items[j++] = items[i]; + + return j; +} + +int journal_file_append_entry( + JournalFile *f, + const dual_timestamp *ts, + const sd_id128_t *boot_id, + const struct iovec iovec[], + size_t n_iovec, + uint64_t *seqnum, + sd_id128_t *seqnum_id, + Object **ret_object, + uint64_t *ret_offset) { + + _cleanup_free_ EntryItem *items_alloc = NULL; + EntryItem *items; + uint64_t xor_hash = 0; + struct dual_timestamp _ts; + sd_id128_t _boot_id, _machine_id, *machine_id; + int r; + + assert(f); + assert(f->header); + assert(iovec); + assert(n_iovec > 0); + + if (ts) { + if (!VALID_REALTIME(ts->realtime)) + return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG), + "Invalid realtime timestamp %" PRIu64 ", refusing entry.", + ts->realtime); + if (!VALID_MONOTONIC(ts->monotonic)) + return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG), + "Invalid monotomic timestamp %" PRIu64 ", refusing entry.", + ts->monotonic); + } else { + dual_timestamp_now(&_ts); + ts = &_ts; + } + + if (boot_id) { + if (sd_id128_is_null(*boot_id)) + return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG), "Empty boot ID, refusing entry."); + } else { + r = sd_id128_get_boot(&_boot_id); + if (r < 0) + return r; + + boot_id = &_boot_id; + } + + r = sd_id128_get_machine(&_machine_id); + if (ERRNO_IS_NEG_MACHINE_ID_UNSET(r)) + /* Gracefully handle the machine ID not being initialized yet */ + machine_id = NULL; + else if (r < 0) + return r; + else + machine_id = &_machine_id; + +#if HAVE_GCRYPT + r = journal_file_maybe_append_tag(f, ts->realtime); + if (r < 0) + return r; +#endif + + if (n_iovec < ALLOCA_MAX / sizeof(EntryItem) / 2) + items = newa(EntryItem, n_iovec); + else { + items_alloc = new(EntryItem, n_iovec); + if (!items_alloc) + return -ENOMEM; + + items = items_alloc; + } + + for (size_t i = 0; i < n_iovec; i++) { + uint64_t p; + Object *o; + + r = journal_file_append_data(f, iovec[i].iov_base, iovec[i].iov_len, &o, &p); + if (r < 0) + return r; + + /* When calculating the XOR hash field, we need to take special care if the "keyed-hash" + * journal file flag is on. We use the XOR hash field to quickly determine the identity of a + * specific record, and give records with otherwise identical position (i.e. match in seqno, + * timestamp, …) a stable ordering. But for that we can't have it that the hash of the + * objects in each file is different since they are keyed. Hence let's calculate the Jenkins + * hash here for that. This also has the benefit that cursors for old and new journal files + * are completely identical (they include the XOR hash after all). For classic Jenkins-hash + * files things are easier, we can just take the value from the stored record directly. */ + + if (JOURNAL_HEADER_KEYED_HASH(f->header)) + xor_hash ^= jenkins_hash64(iovec[i].iov_base, iovec[i].iov_len); + else + xor_hash ^= le64toh(o->data.hash); + + items[i] = (EntryItem) { + .object_offset = p, + .hash = le64toh(o->data.hash), + }; + } + + /* Order by the position on disk, in order to improve seek + * times for rotating media. */ + typesafe_qsort(items, n_iovec, entry_item_cmp); + n_iovec = remove_duplicate_entry_items(items, n_iovec); + + r = journal_file_append_entry_internal( + f, + ts, + boot_id, + machine_id, + xor_hash, + items, + n_iovec, + seqnum, + seqnum_id, + ret_object, + ret_offset); + + /* If the memory mapping triggered a SIGBUS then we return an + * IO error and ignore the error code passed down to us, since + * it is very likely just an effect of a nullified replacement + * mapping page */ + + if (mmap_cache_fd_got_sigbus(f->cache_fd)) + r = -EIO; + + if (f->post_change_timer) + schedule_post_change(f); + else + journal_file_post_change(f); + + return r; +} + +typedef struct ChainCacheItem { + uint64_t first; /* The offset of the entry array object at the beginning of the chain, + * i.e., le64toh(f->header->entry_array_offset), or le64toh(o->data.entry_offset). */ + uint64_t array; /* The offset of the cached entry array object. */ + uint64_t begin; /* The offset of the first item in the cached array. */ + uint64_t total; /* The total number of items in all arrays before the cached one in the chain. */ + uint64_t last_index; /* The last index we looked at in the cached array, to optimize locality when bisecting. */ +} ChainCacheItem; + +static void chain_cache_put( + OrderedHashmap *h, + ChainCacheItem *ci, + uint64_t first, + uint64_t array, + uint64_t begin, + uint64_t total, + uint64_t last_index) { + + assert(h); + + if (!ci) { + /* If the chain item to cache for this chain is the + * first one it's not worth caching anything */ + if (array == first) + return; + + if (ordered_hashmap_size(h) >= CHAIN_CACHE_MAX) { + ci = ordered_hashmap_steal_first(h); + assert(ci); + } else { + ci = new(ChainCacheItem, 1); + if (!ci) + return; + } + + ci->first = first; + + if (ordered_hashmap_put(h, &ci->first, ci) < 0) { + free(ci); + return; + } + } else + assert(ci->first == first); + + ci->array = array; + ci->begin = begin; + ci->total = total; + ci->last_index = last_index; +} + +static int bump_array_index(uint64_t *i, direction_t direction, uint64_t n) { + assert(i); + + /* Increase or decrease the specified index, in the right direction. */ + + if (direction == DIRECTION_DOWN) { + if (*i >= n - 1) + return 0; + + (*i)++; + } else { + if (*i <= 0) + return 0; + + (*i)--; + } + + return 1; +} + +static int bump_entry_array( + JournalFile *f, + Object *o, /* the current entry array object. */ + uint64_t offset, /* the offset of the entry array object. */ + uint64_t first, /* The offset of the first entry array object in the chain. */ + direction_t direction, + uint64_t *ret) { + + int r; + + assert(f); + assert(ret); + + if (direction == DIRECTION_DOWN) { + assert(o); + assert(o->object.type == OBJECT_ENTRY_ARRAY); + + *ret = le64toh(o->entry_array.next_entry_array_offset); + } else { + + /* Entry array chains are a singly linked list, so to find the previous array in the chain, we have + * to start iterating from the top. */ + + assert(offset > 0); + + uint64_t p = first, q = 0; + while (p > 0 && p != offset) { + r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, p, &o); + if (r < 0) + return r; + + q = p; + p = le64toh(o->entry_array.next_entry_array_offset); + } + + /* If we can't find the previous entry array in the entry array chain, we're likely dealing with a + * corrupted journal file. */ + if (p == 0) + return -EBADMSG; + + *ret = q; + } + + return *ret > 0; +} + +static int generic_array_get( + JournalFile *f, + uint64_t first, /* The offset of the first entry array object in the chain. */ + uint64_t i, /* The index of the target object counted from the beginning of the entry array chain. */ + direction_t direction, + Object **ret_object, /* The found object. */ + uint64_t *ret_offset) { /* The offset of the found object. */ + + uint64_t a, t = 0, k; + ChainCacheItem *ci; + Object *o = NULL; + int r; + + assert(f); + + /* FIXME: fix return value assignment on success. */ + + a = first; + + /* Try the chain cache first */ + ci = ordered_hashmap_get(f->chain_cache, &first); + if (ci && i > ci->total) { + a = ci->array; + i -= ci->total; + t = ci->total; + } + + while (a > 0) { + r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o); + if (IN_SET(r, -EBADMSG, -EADDRNOTAVAIL)) { + /* If there's corruption and we're going downwards, let's pretend we reached the + * final entry in the entry array chain. */ + + if (direction == DIRECTION_DOWN) + return 0; + + /* If there's corruption and we're going upwards, move back to the previous entry + * array and start iterating entries from there. */ + + i = UINT64_MAX; + break; + } + if (r < 0) + return r; + + k = journal_file_entry_array_n_items(f, o); + if (k == 0) + return 0; + + if (i < k) + break; + + /* The index is larger than the number of elements in the array. Let's move to the next array. */ + i -= k; + t += k; + a = le64toh(o->entry_array.next_entry_array_offset); + } + + /* If we've found the right location, now look for the first non-corrupt entry object (in the right + * direction). */ + + while (a > 0) { + if (i == UINT64_MAX) { + r = bump_entry_array(f, o, a, first, direction, &a); + if (r <= 0) + return r; + + r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o); + if (r < 0) + return r; + + k = journal_file_entry_array_n_items(f, o); + if (k == 0) + break; + + if (direction == DIRECTION_DOWN) + i = 0; + else { + /* We moved to the previous array. The total must be decreased. */ + if (t < k) + return -EBADMSG; /* chain cache is broken ? */ + + i = k - 1; + t -= k; + } + } + + do { + uint64_t p; + + p = journal_file_entry_array_item(f, o, i); + + r = journal_file_move_to_object(f, OBJECT_ENTRY, p, ret_object); + if (r >= 0) { + /* Let's cache this item for the next invocation */ + chain_cache_put(f->chain_cache, ci, first, a, journal_file_entry_array_item(f, o, 0), t, i); + + if (ret_offset) + *ret_offset = p; + + return 1; + } + if (!IN_SET(r, -EADDRNOTAVAIL, -EBADMSG)) + return r; + + /* OK, so this entry is borked. Most likely some entry didn't get synced to + * disk properly, let's see if the next one might work for us instead. */ + log_debug_errno(r, "Entry item %" PRIu64 " is bad, skipping over it.", i); + + } while (bump_array_index(&i, direction, k) > 0); + + /* All entries tried in the above do-while loop are broken. Let's move to the next (or previous) array. */ + + if (direction == DIRECTION_DOWN) + /* We are going to the next array, the total must be incremented. */ + t += k; + + i = UINT64_MAX; + } + + return 0; +} + +enum { + TEST_FOUND, /* The current object passes the test. */ + TEST_LEFT, /* The current object is in an earlier position, and the object we are looking + * for should exist in a later position. */ + TEST_RIGHT, /* The current object is in a later position, and the object we are looking for + * should exist in an earlier position. */ + TEST_GOTO_NEXT, /* No matching object exists in this array and earlier arrays, go to the next array. */ + TEST_GOTO_PREVIOUS, /* No matching object exists in this array and later arrays, go to the previous array. */ +}; + +static int generic_array_bisect_step( + JournalFile *f, + Object *array, /* entry array object */ + uint64_t i, /* index of the entry item in the array we will test. */ + uint64_t needle, + int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle), + direction_t direction, + uint64_t *m, /* The maximum number of the entries we will check in the array. */ + uint64_t *left, /* The index of the left boundary in the array. */ + uint64_t *right) { /* The index of the right boundary in the array. */ + + uint64_t p; + int r; + + assert(f); + assert(array); + assert(test_object); + assert(m); + assert(left); + assert(right); + assert(*left <= i); + assert(i <= *right); + assert(*right < *m); + + p = journal_file_entry_array_item(f, array, i); + if (p <= 0) + r = -EBADMSG; + else + r = test_object(f, p, needle); + if (IN_SET(r, -EBADMSG, -EADDRNOTAVAIL)) { + log_debug_errno(r, "Encountered invalid entry while bisecting, cutting algorithm short."); + + if (i == *left) { + /* This happens on two situations: + * + * a) i == 0 (hence, *left == 0): + * The first entry in the array is corrupted, let's go back to the previous array. + * + * b) *right == *left or *left + 1, and we are going to downwards: + * In that case, the (i-1)-th object has been already tested in the previous call, + * which returned TEST_LEFT. See below. So, there is no matching entry in this + * array nor in the whole entry array chain. */ + assert(i == 0 || (*right - *left <= 1 && direction == DIRECTION_DOWN)); + return TEST_GOTO_PREVIOUS; + } + + /* Otherwise, cutting the array short. So, here we limit the number of elements we will see + * in this array, and set the right boundary to the last possibly non-corrupted object. */ + *m = i; + *right = i - 1; + return TEST_RIGHT; + } + if (r < 0) + return r; + + if (r == TEST_FOUND) + /* There may be multiple entries that match with the needle. When the direction is down, we + * need to find the first matching entry, hence the right boundary can be moved, but the left + * one cannot. Similarly, when the direction is up, we need to find the last matching entry, + * hence the left boundary can be moved, but the right one cannot. */ + r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT; + + if (r == TEST_RIGHT) { + /* Currently, left --- needle --- i --- right, hence we can move the right boundary to i. */ + if (direction == DIRECTION_DOWN) + *right = i; + else { + if (i == 0) + return TEST_GOTO_PREVIOUS; + *right = i - 1; + } + } else { + /* Currently, left --- i --- needle --- right, hence we can move the left boundary to i. */ + if (direction == DIRECTION_DOWN) { + /* Note, here *m is always positive, as by the assertions at the beginning, we have + * 0 <= *left <= i <= *right < m */ + if (i == *m - 1) + return TEST_GOTO_NEXT; + + *left = i + 1; + } else + *left = i; + } + + return r; +} + +static int generic_array_bisect( + JournalFile *f, + uint64_t first, /* The offset of the first entry array object in the chain. */ + uint64_t n, /* The total number of elements in the chain of the entry array. */ + uint64_t needle, /* The target value (e.g. seqnum, monotonic, realtime, ...). */ + int (*test_object)(JournalFile *f, + uint64_t p, /* the offset of the (data or entry) object that will be tested. */ + uint64_t needle), + direction_t direction, + Object **ret_object, /* The found object. */ + uint64_t *ret_offset, /* The offset of the found object. */ + uint64_t *ret_idx) { /* The index of the found object counted from the beginning of the entry array chain. */ + + /* Given an entry array chain, this function finds the object "closest" to the given needle in the + * chain, taking into account the provided direction. A function can be provided to determine how + * an object is matched against the given needle. + * + * Given a journal file, the offset of an object and the needle, the test_object() function should + * return TEST_RIGHT if the needle is located earlier in the entry array chain, TEST_LEFT if the + * needle is located later in the entry array chain, and TEST_FOUND if the object matches the needle. + * If test_object() returns TEST_FOUND for a specific object, that object's information will be used + * to populate the return values of this function. If test_object() never returns TEST_FOUND, the + * return values are populated with the details of one of the objects closest to the needle. If the + * direction is DIRECTION_UP, the earlier object is used. Otherwise, the later object is used. + * If there are multiple objects that test_object() return TEST_FOUND for, then the first matching + * object returned when direction is DIRECTION_DOWN. Otherwise the last object is returned. */ + + uint64_t a, p, t = 0, i, last_index = UINT64_MAX; + ChainCacheItem *ci; + Object *array; + int r; + + assert(f); + assert(test_object); + + if (n <= 0) + return 0; + + /* Start with the first array in the chain */ + a = first; + + ci = ordered_hashmap_get(f->chain_cache, &first); + if (ci && n > ci->total && ci->begin != 0) { + /* Ah, we have iterated this bisection array chain previously! Let's see if we can skip ahead + * in the chain, as far as the last time. But we can't jump backwards in the chain, so let's + * check that first. */ + + r = test_object(f, ci->begin, needle); + if (IN_SET(r, -EBADMSG, -EADDRNOTAVAIL)) + log_debug_errno(r, "Cached entry is corrupted, ignoring: %m"); + else if (r < 0) + return r; + else if (r == TEST_LEFT) { + /* OK, what we are looking for is right of the begin of this EntryArray, so let's + * jump straight to previously cached array in the chain */ + + a = ci->array; + n -= ci->total; + t = ci->total; + last_index = ci->last_index; + } + } + + while (a > 0) { + uint64_t left, right, k, m, m_original; + + r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &array); + if (r < 0) + return r; + + k = journal_file_entry_array_n_items(f, array); + m = m_original = MIN(k, n); + if (m <= 0) + return 0; + + left = 0; + right = m - 1; + + if (direction == DIRECTION_UP) { + /* If we're going upwards, the last entry of the previous array may pass the test, + * and the first entry of the current array may not pass. In that case, the last + * entry of the previous array must be returned. Hence, we need to test the first + * entry of the current array. */ + r = generic_array_bisect_step(f, array, 0, needle, test_object, direction, &m, &left, &right); + if (r < 0) + return r; + if (r == TEST_GOTO_PREVIOUS) + goto previous; + } + + /* Test the last entry of this array, to determine if we should go to the next array. */ + r = generic_array_bisect_step(f, array, right, needle, test_object, direction, &m, &left, &right); + if (r < 0) + return r; + if (r == TEST_GOTO_PREVIOUS) + goto previous; + + /* The expected entry should be in this array, (or the last entry of the previous array). */ + if (r == TEST_RIGHT) { + + /* If we cached the last index we looked at, let's try to not to jump too wildly + * around and see if we can limit the range to look at early to the immediate + * neighbors of the last index we looked at. */ + + if (last_index > 0 && left < last_index - 1 && last_index - 1 < right) { + r = generic_array_bisect_step(f, array, last_index - 1, needle, test_object, direction, &m, &left, &right); + if (r < 0) + return r; + if (r == TEST_GOTO_PREVIOUS) + goto previous; + } + + if (last_index < UINT64_MAX && left < last_index + 1 && last_index + 1 < right) { + r = generic_array_bisect_step(f, array, last_index + 1, needle, test_object, direction, &m, &left, &right); + if (r < 0) + return r; + if (r == TEST_GOTO_PREVIOUS) + goto previous; + } + + for (;;) { + if (left == right) { + /* We found one or more corrupted entries in generic_array_bisect_step(). + * In that case, the entry pointed by 'right' may not be tested. + * + * When we are going to downwards, the entry object pointed by 'left' + * has not been tested yet, Hence, even if left == right, we still + * have to check the final entry to see if it actually matches. + * + * On the other hand, when we are going to upwards, the entry pointed + * by 'left' is always tested, So, it is not necessary to test the + * final entry again. */ + if (m != m_original && direction == DIRECTION_DOWN) { + r = generic_array_bisect_step(f, array, left, needle, test_object, direction, &m, &left, &right); + if (r < 0) + return r; + if (IN_SET(r, TEST_GOTO_PREVIOUS, TEST_GOTO_NEXT)) + return 0; /* The entry does not pass the test, or is corrupted */ + + assert(TEST_RIGHT); + assert(left == right); + } + + i = left; + goto found; + } + + assert(left < right); + i = (left + right + (direction == DIRECTION_UP)) / 2; + + r = generic_array_bisect_step(f, array, i, needle, test_object, direction, &m, &left, &right); + if (r < 0) + return r; + if (r == TEST_GOTO_PREVIOUS) + goto previous; + if (r == TEST_GOTO_NEXT) + return 0; /* Found a corrupt entry, and the array was cut short. */ + } + } + + /* Not found in this array (or the last entry of this array should be returned), go to the next array. */ + assert(r == (direction == DIRECTION_DOWN ? TEST_GOTO_NEXT : TEST_LEFT)); + + if (k >= n) { + if (direction == DIRECTION_UP) { + assert(n > 0); + i = n - 1; + goto found; + } + + return 0; + } + + n -= k; + t += k; + last_index = UINT64_MAX; + a = le64toh(array->entry_array.next_entry_array_offset); + } + + return 0; + +previous: + /* Not found in the current array, return the last entry of the previous array. */ + assert(r == TEST_GOTO_PREVIOUS); + + /* The current array is the first in the chain. no previous array. */ + if (t == 0) + return 0; + + /* When we are going downwards, there is no matching entries in the previous array. */ + if (direction == DIRECTION_DOWN) + return 0; + + /* Indicate to go to the previous array later. Note, do not move to the previous array here, + * as that may invalidate the current array object in the mmap cache and + * journal_file_entry_array_item() below may read invalid address. */ + i = UINT64_MAX; + +found: + p = journal_file_entry_array_item(f, array, 0); + if (p <= 0) + return -EBADMSG; + + /* Let's cache this item for the next invocation */ + chain_cache_put(f->chain_cache, ci, first, a, p, t, i); + + if (i == UINT64_MAX) { + uint64_t m; + + /* Get the last entry of the previous array. */ + + r = bump_entry_array(f, NULL, a, first, DIRECTION_UP, &a); + if (r <= 0) + return r; + + r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &array); + if (r < 0) + return r; + + m = journal_file_entry_array_n_items(f, array); + if (m == 0 || t < m) + return -EBADMSG; + + t -= m; + i = m - 1; + } + + p = journal_file_entry_array_item(f, array, i); + if (p == 0) + return -EBADMSG; + + if (ret_object) { + r = journal_file_move_to_object(f, OBJECT_ENTRY, p, ret_object); + if (r < 0) + return r; + } + + if (ret_offset) + *ret_offset = p; + + if (ret_idx) + *ret_idx = t + i; + + return 1; +} + +static int generic_array_bisect_for_data( + JournalFile *f, + Object *d, + uint64_t needle, + int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle), + direction_t direction, + Object **ret_object, + uint64_t *ret_offset) { + + uint64_t extra, first, n; + int r; + + assert(f); + assert(d); + assert(d->object.type == OBJECT_DATA); + assert(test_object); + + n = le64toh(d->data.n_entries); + if (n <= 0) + return 0; + n--; /* n_entries is the number of entries linked to the data object, including the 'extra' entry. */ + + extra = le64toh(d->data.entry_offset); + first = le64toh(d->data.entry_array_offset); + + /* This bisects the array in object 'first', but first checks an extra. */ + r = test_object(f, extra, needle); + if (r < 0) + return r; + + if (direction == DIRECTION_DOWN) { + /* If we are going downwards, then we need to return the first object that passes the test. + * When there is no object that passes the test, we need to return the first object that + * test_object() returns TEST_RIGHT for. */ + if (IN_SET(r, + TEST_FOUND, /* The 'extra' object passes the test. Hence, this is the first + * object that passes the test. */ + TEST_RIGHT)) /* The 'extra' object is the first object that test_object() returns + * TEST_RIGHT for, and no object exists even in the chained arrays + * that passes the test. */ + goto use_extra; /* The 'extra' object is exactly the one we are looking for. It is + * not necessary to bisect the chained arrays. */ + + /* Otherwise, the 'extra' object is not the one we are looking for. Search in the arrays. */ + + } else { + /* If we are going upwards, then we need to return the last object that passes the test. + * When there is no object that passes the test, we need to return the the last object that + * test_object() returns TEST_LEFT for. */ + if (r == TEST_RIGHT) + return 0; /* Not only the 'extra' object, but also all objects in the chained arrays + * will never get TEST_FOUND or TEST_LEFT. The object we are looking for + * does not exist. */ + + /* Even if the 'extra' object passes the test, there may be multiple objects in the arrays + * that also pass the test. Hence, we need to bisect the arrays for finding the last matching + * object. */ + } + + r = generic_array_bisect(f, first, n, needle, test_object, direction, ret_object, ret_offset, NULL); + if (r != 0) + return r; /* When > 0, the found object is the first (or last, when DIRECTION_UP) object. + * Hence, return the found object now. */ + + /* No matching object found in the chained arrays. + * DIRECTION_DOWN : the 'extra' object neither matches the condition. There is no matching object. + * DIRECTION_UP : the 'extra' object matches the condition. So, return it. */ + if (direction == DIRECTION_DOWN) + return 0; + +use_extra: + if (ret_object) { + r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, ret_object); + if (r < 0) + return r; + } + + if (ret_offset) + *ret_offset = extra; + + return 1; +} + +static int test_object_offset(JournalFile *f, uint64_t p, uint64_t needle) { + assert(f); + assert(p > 0); + + if (p == needle) + return TEST_FOUND; + else if (p < needle) + return TEST_LEFT; + else + return TEST_RIGHT; +} + +int journal_file_move_to_entry_by_offset( + JournalFile *f, + uint64_t p, + direction_t direction, + Object **ret_object, + uint64_t *ret_offset) { + + assert(f); + assert(f->header); + + return generic_array_bisect( + f, + le64toh(f->header->entry_array_offset), + le64toh(f->header->n_entries), + p, + test_object_offset, + direction, + ret_object, ret_offset, NULL); +} + +static int test_object_seqnum(JournalFile *f, uint64_t p, uint64_t needle) { + uint64_t sq; + Object *o; + int r; + + assert(f); + assert(p > 0); + + r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o); + if (r < 0) + return r; + + sq = le64toh(READ_NOW(o->entry.seqnum)); + if (sq == needle) + return TEST_FOUND; + else if (sq < needle) + return TEST_LEFT; + else + return TEST_RIGHT; +} + +int journal_file_move_to_entry_by_seqnum( + JournalFile *f, + uint64_t seqnum, + direction_t direction, + Object **ret_object, + uint64_t *ret_offset) { + + assert(f); + assert(f->header); + + return generic_array_bisect( + f, + le64toh(f->header->entry_array_offset), + le64toh(f->header->n_entries), + seqnum, + test_object_seqnum, + direction, + ret_object, ret_offset, NULL); +} + +static int test_object_realtime(JournalFile *f, uint64_t p, uint64_t needle) { + Object *o; + uint64_t rt; + int r; + + assert(f); + assert(p > 0); + + r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o); + if (r < 0) + return r; + + rt = le64toh(READ_NOW(o->entry.realtime)); + if (rt == needle) + return TEST_FOUND; + else if (rt < needle) + return TEST_LEFT; + else + return TEST_RIGHT; +} + +int journal_file_move_to_entry_by_realtime( + JournalFile *f, + uint64_t realtime, + direction_t direction, + Object **ret_object, + uint64_t *ret_offset) { + + assert(f); + assert(f->header); + + return generic_array_bisect( + f, + le64toh(f->header->entry_array_offset), + le64toh(f->header->n_entries), + realtime, + test_object_realtime, + direction, + ret_object, ret_offset, NULL); +} + +static int test_object_monotonic(JournalFile *f, uint64_t p, uint64_t needle) { + Object *o; + uint64_t m; + int r; + + assert(f); + assert(p > 0); + + r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o); + if (r < 0) + return r; + + m = le64toh(READ_NOW(o->entry.monotonic)); + if (m == needle) + return TEST_FOUND; + else if (m < needle) + return TEST_LEFT; + else + return TEST_RIGHT; +} + +static int find_data_object_by_boot_id( + JournalFile *f, + sd_id128_t boot_id, + Object **ret_object, + uint64_t *ret_offset) { + + char t[STRLEN("_BOOT_ID=") + 32 + 1] = "_BOOT_ID="; + + assert(f); + + sd_id128_to_string(boot_id, t + 9); + return journal_file_find_data_object(f, t, sizeof(t) - 1, ret_object, ret_offset); +} + +int journal_file_move_to_entry_by_monotonic( + JournalFile *f, + sd_id128_t boot_id, + uint64_t monotonic, + direction_t direction, + Object **ret_object, + uint64_t *ret_offset) { + + Object *o; + int r; + + assert(f); + + r = find_data_object_by_boot_id(f, boot_id, &o, NULL); + if (r <= 0) + return r; + + return generic_array_bisect_for_data( + f, + o, + monotonic, + test_object_monotonic, + direction, + ret_object, ret_offset); +} + +void journal_file_reset_location(JournalFile *f) { + assert(f); + + f->location_type = LOCATION_HEAD; + f->current_offset = 0; + f->current_seqnum = 0; + f->current_realtime = 0; + f->current_monotonic = 0; + zero(f->current_boot_id); + f->current_xor_hash = 0; + + /* Also reset the previous reading direction. Otherwise, next_beyond_location() may wrongly handle we + * already hit EOF. See issue #29216. */ + f->last_direction = _DIRECTION_INVALID; +} + +void journal_file_save_location(JournalFile *f, Object *o, uint64_t offset) { + assert(f); + assert(o); + + f->location_type = LOCATION_SEEK; + f->current_offset = offset; + f->current_seqnum = le64toh(o->entry.seqnum); + f->current_realtime = le64toh(o->entry.realtime); + f->current_monotonic = le64toh(o->entry.monotonic); + f->current_boot_id = o->entry.boot_id; + f->current_xor_hash = le64toh(o->entry.xor_hash); +} + +static bool check_properly_ordered(uint64_t new_offset, uint64_t old_offset, direction_t direction) { + + /* Consider it an error if any of the two offsets is uninitialized */ + if (old_offset == 0 || new_offset == 0) + return false; + + /* If we go down, the new offset must be larger than the old one. */ + return direction == DIRECTION_DOWN ? + new_offset > old_offset : + new_offset < old_offset; +} + +int journal_file_next_entry( + JournalFile *f, + uint64_t p, + direction_t direction, + Object **ret_object, + uint64_t *ret_offset) { + + uint64_t i, n, q; + Object *o; + int r; + + assert(f); + assert(f->header); + + /* FIXME: fix return value assignment. */ + + n = le64toh(READ_NOW(f->header->n_entries)); + if (n <= 0) + return 0; + + /* When the input offset 'p' is zero, return the first (or last on DIRECTION_UP) entry. */ + if (p == 0) + return generic_array_get(f, + le64toh(f->header->entry_array_offset), + direction == DIRECTION_DOWN ? 0 : n - 1, + direction, + ret_object, ret_offset); + + /* Otherwise, first find the nearest entry object. */ + r = generic_array_bisect(f, + le64toh(f->header->entry_array_offset), + le64toh(f->header->n_entries), + p, + test_object_offset, + direction, + ret_object ? &o : NULL, &q, &i); + if (r <= 0) + return r; + + assert(direction == DIRECTION_DOWN ? p <= q : q <= p); + + /* If the input offset 'p' points to an entry object, generic_array_bisect() should provides + * the same offset, and the index needs to be shifted. Otherwise, use the found object as is, + * as it is the nearest entry object from the input offset 'p'. */ + + if (p != q) + goto found; + + r = bump_array_index(&i, direction, n); + if (r <= 0) + return r; + + /* And jump to it */ + r = generic_array_get(f, le64toh(f->header->entry_array_offset), i, direction, ret_object ? &o : NULL, &q); + if (r <= 0) + return r; + + /* Ensure our array is properly ordered. */ + if (!check_properly_ordered(q, p, direction)) + return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG), + "%s: entry array not properly ordered at entry index %" PRIu64, + f->path, i); +found: + if (ret_object) + *ret_object = o; + if (ret_offset) + *ret_offset = q; + + return 1; +} + +int journal_file_move_to_entry_for_data( + JournalFile *f, + Object *d, + direction_t direction, + Object **ret_object, + uint64_t *ret_offset) { + + uint64_t extra, first, n; + int r = 0; + + assert(f); + assert(d); + assert(d->object.type == OBJECT_DATA); + assert(IN_SET(direction, DIRECTION_DOWN, DIRECTION_UP)); + + /* FIXME: fix return value assignment. */ + + /* This returns the first (when the direction is down, otherwise the last) entry linked to the + * specified data object. */ + + n = le64toh(d->data.n_entries); + if (n <= 0) + return 0; + n--; /* n_entries is the number of entries linked to the data object, including the 'extra' entry. */ + + extra = le64toh(d->data.entry_offset); + first = le64toh(d->data.entry_array_offset); + + if (direction == DIRECTION_DOWN && extra > 0) { + /* When we are going downwards, first try to read the extra entry. */ + r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, ret_object); + if (r >= 0) + goto use_extra; + if (!IN_SET(r, -EADDRNOTAVAIL, -EBADMSG)) + return r; + } + + if (n > 0) { + /* DIRECTION_DOWN : The extra entry is broken, falling back to the entries in the array. + * DIRECTION_UP : Try to find a valid entry in the array from the tail. */ + r = generic_array_get(f, + first, + direction == DIRECTION_DOWN ? 0 : n - 1, + direction, + ret_object, ret_offset); + if (!IN_SET(r, 0, -EADDRNOTAVAIL, -EBADMSG)) + return r; /* found or critical error. */ + } + + if (direction == DIRECTION_UP && extra > 0) { + /* No valid entry exists in the chained array, falling back to the extra entry. */ + r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, ret_object); + if (r >= 0) + goto use_extra; + } + + return r; + +use_extra: + if (ret_offset) + *ret_offset = extra; + + return 1; +} + +int journal_file_move_to_entry_by_offset_for_data( + JournalFile *f, + Object *d, + uint64_t p, + direction_t direction, + Object **ret, uint64_t *ret_offset) { + + assert(f); + assert(d); + assert(d->object.type == OBJECT_DATA); + + return generic_array_bisect_for_data( + f, + d, + p, + test_object_offset, + direction, + ret, ret_offset); +} + +int journal_file_move_to_entry_by_monotonic_for_data( + JournalFile *f, + Object *d, + sd_id128_t boot_id, + uint64_t monotonic, + direction_t direction, + Object **ret_object, + uint64_t *ret_offset) { + + Object *o, *entry; + uint64_t z; + int r; + + assert(f); + assert(d); + assert(d->object.type == OBJECT_DATA); + + /* First, pin the given data object, before reading the _BOOT_ID= data object below. */ + r = journal_file_pin_object(f, d); + if (r < 0) + return r; + + /* Then, read a data object for _BOOT_ID= and seek by time. */ + r = find_data_object_by_boot_id(f, boot_id, &o, NULL); + if (r <= 0) + return r; + + r = generic_array_bisect_for_data(f, + o, + monotonic, + test_object_monotonic, + direction, + NULL, &z); + if (r <= 0) + return r; + + /* And now, continue seeking until we find an entry that exists in both bisection arrays. */ + for (;;) { + uint64_t p; + + /* The journal entry found by the above bisect_plus_one() may not have the specified data, + * that is, it may not be linked in the data object. So, we need to check that. */ + + r = journal_file_move_to_entry_by_offset_for_data( + f, d, z, direction, ret_object ? &entry : NULL, &p); + if (r <= 0) + return r; + if (p == z) + break; /* The journal entry has the specified data. Yay! */ + + /* If the entry does not have the data, then move to the next (or previous, depends on the + * 'direction') entry linked to the data object. But, the next entry may be in another boot. + * So, we need to check that the entry has the matching boot ID. */ + + r = journal_file_move_to_entry_by_offset_for_data( + f, o, p, direction, ret_object ? &entry : NULL, &z); + if (r <= 0) + return r; + if (p == z) + break; /* The journal entry has the specified boot ID. Yay! */ + + /* If not, let's try to the next entry... */ + } + + if (ret_object) + *ret_object = entry; + if (ret_offset) + *ret_offset = z; + return 1; +} + +int journal_file_move_to_entry_by_seqnum_for_data( + JournalFile *f, + Object *d, + uint64_t seqnum, + direction_t direction, + Object **ret_object, + uint64_t *ret_offset) { + + assert(f); + assert(d); + assert(d->object.type == OBJECT_DATA); + + return generic_array_bisect_for_data( + f, + d, + seqnum, + test_object_seqnum, + direction, + ret_object, ret_offset); +} + +int journal_file_move_to_entry_by_realtime_for_data( + JournalFile *f, + Object *d, + uint64_t realtime, + direction_t direction, + Object **ret, uint64_t *ret_offset) { + + assert(f); + assert(d); + assert(d->object.type == OBJECT_DATA); + + return generic_array_bisect_for_data( + f, + d, + realtime, + test_object_realtime, + direction, + ret, ret_offset); +} + +void journal_file_dump(JournalFile *f) { + Object *o; + uint64_t p; + int r; + + assert(f); + assert(f->header); + + journal_file_print_header(f); + + p = le64toh(READ_NOW(f->header->header_size)); + while (p != 0) { + const char *s; + Compression c; + + r = journal_file_move_to_object(f, OBJECT_UNUSED, p, &o); + if (r < 0) + goto fail; + + s = journal_object_type_to_string(o->object.type); + + switch (o->object.type) { + + case OBJECT_ENTRY: + assert(s); + + printf("Type: %s seqnum=%"PRIu64" monotonic=%"PRIu64" realtime=%"PRIu64"\n", + s, + le64toh(o->entry.seqnum), + le64toh(o->entry.monotonic), + le64toh(o->entry.realtime)); + break; + + case OBJECT_TAG: + assert(s); + + printf("Type: %s seqnum=%"PRIu64" epoch=%"PRIu64"\n", + s, + le64toh(o->tag.seqnum), + le64toh(o->tag.epoch)); + break; + + default: + if (s) + printf("Type: %s \n", s); + else + printf("Type: unknown (%i)", o->object.type); + + break; + } + + c = COMPRESSION_FROM_OBJECT(o); + if (c > COMPRESSION_NONE) + printf("Flags: %s\n", + compression_to_string(c)); + + if (p == le64toh(f->header->tail_object_offset)) + p = 0; + else + p += ALIGN64(le64toh(o->object.size)); + } + + return; +fail: + log_error("File corrupt"); +} + +/* Note: the lifetime of the compound literal is the immediately surrounding block. */ +#define FORMAT_TIMESTAMP_SAFE(t) (FORMAT_TIMESTAMP(t) ?: " --- ") + +void journal_file_print_header(JournalFile *f) { + struct stat st; + + assert(f); + assert(f->header); + + printf("File path: %s\n" + "File ID: %s\n" + "Machine ID: %s\n" + "Boot ID: %s\n" + "Sequential number ID: %s\n" + "State: %s\n" + "Compatible flags:%s%s%s%s\n" + "Incompatible flags:%s%s%s%s%s%s\n" + "Header size: %"PRIu64"\n" + "Arena size: %"PRIu64"\n" + "Data hash table size: %"PRIu64"\n" + "Field hash table size: %"PRIu64"\n" + "Rotate suggested: %s\n" + "Head sequential number: %"PRIu64" (%"PRIx64")\n" + "Tail sequential number: %"PRIu64" (%"PRIx64")\n" + "Head realtime timestamp: %s (%"PRIx64")\n" + "Tail realtime timestamp: %s (%"PRIx64")\n" + "Tail monotonic timestamp: %s (%"PRIx64")\n" + "Objects: %"PRIu64"\n" + "Entry objects: %"PRIu64"\n", + f->path, + SD_ID128_TO_STRING(f->header->file_id), + SD_ID128_TO_STRING(f->header->machine_id), + SD_ID128_TO_STRING(f->header->tail_entry_boot_id), + SD_ID128_TO_STRING(f->header->seqnum_id), + f->header->state == STATE_OFFLINE ? "OFFLINE" : + f->header->state == STATE_ONLINE ? "ONLINE" : + f->header->state == STATE_ARCHIVED ? "ARCHIVED" : "UNKNOWN", + JOURNAL_HEADER_SEALED(f->header) ? " SEALED" : "", + JOURNAL_HEADER_SEALED_CONTINUOUS(f->header) ? " SEALED_CONTINUOUS" : "", + JOURNAL_HEADER_TAIL_ENTRY_BOOT_ID(f->header) ? " TAIL_ENTRY_BOOT_ID" : "", + (le32toh(f->header->compatible_flags) & ~HEADER_COMPATIBLE_ANY) ? " ???" : "", + JOURNAL_HEADER_COMPRESSED_XZ(f->header) ? " COMPRESSED-XZ" : "", + JOURNAL_HEADER_COMPRESSED_LZ4(f->header) ? " COMPRESSED-LZ4" : "", + JOURNAL_HEADER_COMPRESSED_ZSTD(f->header) ? " COMPRESSED-ZSTD" : "", + JOURNAL_HEADER_KEYED_HASH(f->header) ? " KEYED-HASH" : "", + JOURNAL_HEADER_COMPACT(f->header) ? " COMPACT" : "", + (le32toh(f->header->incompatible_flags) & ~HEADER_INCOMPATIBLE_ANY) ? " ???" : "", + le64toh(f->header->header_size), + le64toh(f->header->arena_size), + le64toh(f->header->data_hash_table_size) / sizeof(HashItem), + le64toh(f->header->field_hash_table_size) / sizeof(HashItem), + yes_no(journal_file_rotate_suggested(f, 0, LOG_DEBUG)), + le64toh(f->header->head_entry_seqnum), le64toh(f->header->head_entry_seqnum), + le64toh(f->header->tail_entry_seqnum), le64toh(f->header->tail_entry_seqnum), + FORMAT_TIMESTAMP_SAFE(le64toh(f->header->head_entry_realtime)), le64toh(f->header->head_entry_realtime), + FORMAT_TIMESTAMP_SAFE(le64toh(f->header->tail_entry_realtime)), le64toh(f->header->tail_entry_realtime), + FORMAT_TIMESPAN(le64toh(f->header->tail_entry_monotonic), USEC_PER_MSEC), le64toh(f->header->tail_entry_monotonic), + le64toh(f->header->n_objects), + le64toh(f->header->n_entries)); + + if (JOURNAL_HEADER_CONTAINS(f->header, n_data)) + printf("Data objects: %"PRIu64"\n" + "Data hash table fill: %.1f%%\n", + le64toh(f->header->n_data), + 100.0 * (double) le64toh(f->header->n_data) / ((double) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem)))); + + if (JOURNAL_HEADER_CONTAINS(f->header, n_fields)) + printf("Field objects: %"PRIu64"\n" + "Field hash table fill: %.1f%%\n", + le64toh(f->header->n_fields), + 100.0 * (double) le64toh(f->header->n_fields) / ((double) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem)))); + + if (JOURNAL_HEADER_CONTAINS(f->header, n_tags)) + printf("Tag objects: %"PRIu64"\n", + le64toh(f->header->n_tags)); + if (JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays)) + printf("Entry array objects: %"PRIu64"\n", + le64toh(f->header->n_entry_arrays)); + + if (JOURNAL_HEADER_CONTAINS(f->header, field_hash_chain_depth)) + printf("Deepest field hash chain: %" PRIu64"\n", + f->header->field_hash_chain_depth); + + if (JOURNAL_HEADER_CONTAINS(f->header, data_hash_chain_depth)) + printf("Deepest data hash chain: %" PRIu64"\n", + f->header->data_hash_chain_depth); + + if (fstat(f->fd, &st) >= 0) + printf("Disk usage: %s\n", FORMAT_BYTES((uint64_t) st.st_blocks * 512ULL)); +} + +static int journal_file_warn_btrfs(JournalFile *f) { + unsigned attrs; + int r; + + assert(f); + + /* Before we write anything, check if the COW logic is turned + * off on btrfs. Given our write pattern that is quite + * unfriendly to COW file systems this should greatly improve + * performance on COW file systems, such as btrfs, at the + * expense of data integrity features (which shouldn't be too + * bad, given that we do our own checksumming). */ + + r = fd_is_fs_type(f->fd, BTRFS_SUPER_MAGIC); + if (r < 0) + return log_ratelimit_warning_errno(r, JOURNAL_LOG_RATELIMIT, "Failed to determine if journal is on btrfs: %m"); + if (r == 0) + return 0; + + r = read_attr_fd(f->fd, &attrs); + if (r < 0) + return log_ratelimit_warning_errno(r, JOURNAL_LOG_RATELIMIT, "Failed to read file attributes: %m"); + + if (attrs & FS_NOCOW_FL) { + log_debug("Detected btrfs file system with copy-on-write disabled, all is good."); + return 0; + } + + log_ratelimit_notice(JOURNAL_LOG_RATELIMIT, + "Creating journal file %s on a btrfs file system, and copy-on-write is enabled. " + "This is likely to slow down journal access substantially, please consider turning " + "off the copy-on-write file attribute on the journal directory, using chattr +C.", + f->path); + + return 1; +} + +static void journal_default_metrics(JournalMetrics *m, int fd, bool compact) { + struct statvfs ss; + uint64_t fs_size = 0; + + assert(m); + assert(fd >= 0); + + if (fstatvfs(fd, &ss) >= 0) + fs_size = u64_multiply_safe(ss.f_frsize, ss.f_blocks); + else + log_debug_errno(errno, "Failed to determine disk size: %m"); + + if (m->max_use == UINT64_MAX) { + + if (fs_size > 0) + m->max_use = CLAMP(PAGE_ALIGN_U64(fs_size / 10), /* 10% of file system size */ + MAX_USE_LOWER, MAX_USE_UPPER); + else + m->max_use = MAX_USE_LOWER; + } else { + m->max_use = PAGE_ALIGN_U64(m->max_use); + + if (m->max_use != 0 && m->max_use < JOURNAL_FILE_SIZE_MIN*2) + m->max_use = JOURNAL_FILE_SIZE_MIN*2; + } + + if (m->min_use == UINT64_MAX) { + if (fs_size > 0) + m->min_use = CLAMP(PAGE_ALIGN_U64(fs_size / 50), /* 2% of file system size */ + MIN_USE_LOW, MIN_USE_HIGH); + else + m->min_use = MIN_USE_LOW; + } + + if (m->min_use > m->max_use) + m->min_use = m->max_use; + + if (m->max_size == UINT64_MAX) + m->max_size = MIN(PAGE_ALIGN_U64(m->max_use / 8), /* 8 chunks */ + MAX_SIZE_UPPER); + else + m->max_size = PAGE_ALIGN_U64(m->max_size); + + if (compact && m->max_size > JOURNAL_COMPACT_SIZE_MAX) + m->max_size = JOURNAL_COMPACT_SIZE_MAX; + + if (m->max_size != 0) { + if (m->max_size < JOURNAL_FILE_SIZE_MIN) + m->max_size = JOURNAL_FILE_SIZE_MIN; + + if (m->max_use != 0 && m->max_size*2 > m->max_use) + m->max_use = m->max_size*2; + } + + if (m->min_size == UINT64_MAX) + m->min_size = JOURNAL_FILE_SIZE_MIN; + else + m->min_size = CLAMP(PAGE_ALIGN_U64(m->min_size), + JOURNAL_FILE_SIZE_MIN, + m->max_size ?: UINT64_MAX); + + if (m->keep_free == UINT64_MAX) { + if (fs_size > 0) + m->keep_free = MIN(PAGE_ALIGN_U64(fs_size / 20), /* 5% of file system size */ + KEEP_FREE_UPPER); + else + m->keep_free = DEFAULT_KEEP_FREE; + } + + if (m->n_max_files == UINT64_MAX) + m->n_max_files = DEFAULT_N_MAX_FILES; + + log_debug("Fixed min_use=%s max_use=%s max_size=%s min_size=%s keep_free=%s n_max_files=%" PRIu64, + FORMAT_BYTES(m->min_use), + FORMAT_BYTES(m->max_use), + FORMAT_BYTES(m->max_size), + FORMAT_BYTES(m->min_size), + FORMAT_BYTES(m->keep_free), + m->n_max_files); +} + +int journal_file_open( + int fd, + const char *fname, + int open_flags, + JournalFileFlags file_flags, + mode_t mode, + uint64_t compress_threshold_bytes, + JournalMetrics *metrics, + MMapCache *mmap_cache, + JournalFile *template, + JournalFile **ret) { + + bool newly_created = false; + JournalFile *f; + void *h; + int r; + + assert(fd >= 0 || fname); + assert(file_flags >= 0); + assert(file_flags <= _JOURNAL_FILE_FLAGS_MAX); + assert(mmap_cache); + assert(ret); + + if (!IN_SET((open_flags & O_ACCMODE), O_RDONLY, O_RDWR)) + return -EINVAL; + + if ((open_flags & O_ACCMODE) == O_RDONLY && FLAGS_SET(open_flags, O_CREAT)) + return -EINVAL; + + if (fname && (open_flags & O_CREAT) && !endswith(fname, ".journal")) + return -EINVAL; + + f = new(JournalFile, 1); + if (!f) + return -ENOMEM; + + *f = (JournalFile) { + .fd = fd, + .mode = mode, + .open_flags = open_flags, + .compress_threshold_bytes = compress_threshold_bytes == UINT64_MAX ? + DEFAULT_COMPRESS_THRESHOLD : + MAX(MIN_COMPRESS_THRESHOLD, compress_threshold_bytes), + .strict_order = FLAGS_SET(file_flags, JOURNAL_STRICT_ORDER), + .newest_boot_id_prioq_idx = PRIOQ_IDX_NULL, + .last_direction = _DIRECTION_INVALID, + }; + + if (fname) { + f->path = strdup(fname); + if (!f->path) { + r = -ENOMEM; + goto fail; + } + } else { + assert(fd >= 0); + + /* If we don't know the path, fill in something explanatory and vaguely useful */ + if (asprintf(&f->path, "/proc/self/%i", fd) < 0) { + r = -ENOMEM; + goto fail; + } + } + + f->chain_cache = ordered_hashmap_new(&uint64_hash_ops); + if (!f->chain_cache) { + r = -ENOMEM; + goto fail; + } + + if (f->fd < 0) { + /* We pass O_NONBLOCK here, so that in case somebody pointed us to some character device node or FIFO + * or so, we likely fail quickly than block for long. For regular files O_NONBLOCK has no effect, hence + * it doesn't hurt in that case. */ + + f->fd = openat_report_new(AT_FDCWD, f->path, f->open_flags|O_CLOEXEC|O_NONBLOCK, f->mode, &newly_created); + if (f->fd < 0) { + r = f->fd; + goto fail; + } + + /* fds we opened here by us should also be closed by us. */ + f->close_fd = true; + + r = fd_nonblock(f->fd, false); + if (r < 0) + goto fail; + + if (!newly_created) { + r = journal_file_fstat(f); + if (r < 0) + goto fail; + } + } else { + r = journal_file_fstat(f); + if (r < 0) + goto fail; + + /* If we just got the fd passed in, we don't really know if we created the file anew */ + newly_created = f->last_stat.st_size == 0 && journal_file_writable(f); + } + + r = mmap_cache_add_fd(mmap_cache, f->fd, mmap_prot_from_open_flags(open_flags), &f->cache_fd); + if (r < 0) + goto fail; + + if (newly_created) { + (void) journal_file_warn_btrfs(f); + + /* Let's attach the creation time to the journal file, so that the vacuuming code knows the age of this + * file even if the file might end up corrupted one day... Ideally we'd just use the creation time many + * file systems maintain for each file, but the API to query this is very new, hence let's emulate this + * via extended attributes. If extended attributes are not supported we'll just skip this, and rely + * solely on mtime/atime/ctime of the file. */ + (void) fd_setcrtime(f->fd, 0); + + r = journal_file_init_header(f, file_flags, template); + if (r < 0) + goto fail; + + r = journal_file_fstat(f); + if (r < 0) + goto fail; + } + + if (f->last_stat.st_size < (off_t) HEADER_SIZE_MIN) { + r = -ENODATA; + goto fail; + } + + r = mmap_cache_fd_get(f->cache_fd, MMAP_CACHE_CATEGORY_HEADER, true, 0, PAGE_ALIGN(sizeof(Header)), &f->last_stat, &h); + if (r == -EINVAL) { + /* Some file systems (jffs2 or p9fs) don't support mmap() properly (or only read-only + * mmap()), and return EINVAL in that case. Let's propagate that as a more recognizable error + * code. */ + r = -EAFNOSUPPORT; + goto fail; + } + if (r < 0) + goto fail; + + f->header = h; + + if (!newly_created) { + r = journal_file_verify_header(f); + if (r < 0) + goto fail; + } + +#if HAVE_GCRYPT + if (!newly_created && journal_file_writable(f) && JOURNAL_HEADER_SEALED(f->header)) { + r = journal_file_fss_load(f); + if (r < 0) + goto fail; + } +#endif + + if (journal_file_writable(f)) { + if (metrics) { + journal_default_metrics(metrics, f->fd, JOURNAL_HEADER_COMPACT(f->header)); + f->metrics = *metrics; + } else if (template) + f->metrics = template->metrics; + + r = journal_file_refresh_header(f); + if (r < 0) + goto fail; + } + +#if HAVE_GCRYPT + r = journal_file_hmac_setup(f); + if (r < 0) + goto fail; +#endif + + if (newly_created) { + r = journal_file_setup_field_hash_table(f); + if (r < 0) + goto fail; + + r = journal_file_setup_data_hash_table(f); + if (r < 0) + goto fail; + +#if HAVE_GCRYPT + r = journal_file_append_first_tag(f); + if (r < 0) + goto fail; +#endif + } + + if (mmap_cache_fd_got_sigbus(f->cache_fd)) { + r = -EIO; + goto fail; + } + + if (template && template->post_change_timer) { + r = journal_file_enable_post_change_timer( + f, + sd_event_source_get_event(template->post_change_timer), + template->post_change_timer_period); + + if (r < 0) + goto fail; + } + + /* The file is opened now successfully, thus we take possession of any passed in fd. */ + f->close_fd = true; + + if (DEBUG_LOGGING) { + static int last_seal = -1, last_keyed_hash = -1; + static Compression last_compression = _COMPRESSION_INVALID; + static uint64_t last_bytes = UINT64_MAX; + + if (last_seal != JOURNAL_HEADER_SEALED(f->header) || + last_keyed_hash != JOURNAL_HEADER_KEYED_HASH(f->header) || + last_compression != JOURNAL_FILE_COMPRESSION(f) || + last_bytes != f->compress_threshold_bytes) { + + log_debug("Journal effective settings seal=%s keyed_hash=%s compress=%s compress_threshold_bytes=%s", + yes_no(JOURNAL_HEADER_SEALED(f->header)), yes_no(JOURNAL_HEADER_KEYED_HASH(f->header)), + compression_to_string(JOURNAL_FILE_COMPRESSION(f)), FORMAT_BYTES(f->compress_threshold_bytes)); + last_seal = JOURNAL_HEADER_SEALED(f->header); + last_keyed_hash = JOURNAL_HEADER_KEYED_HASH(f->header); + last_compression = JOURNAL_FILE_COMPRESSION(f); + last_bytes = f->compress_threshold_bytes; + } + } + + *ret = f; + return 0; + +fail: + if (f->cache_fd && mmap_cache_fd_got_sigbus(f->cache_fd)) + r = -EIO; + + (void) journal_file_close(f); + + if (newly_created && fd < 0) + (void) unlink(fname); + + return r; +} + +int journal_file_parse_uid_from_filename(const char *path, uid_t *ret_uid) { + _cleanup_free_ char *buf = NULL, *p = NULL; + const char *a, *b, *at; + int r; + + /* This helper returns -EREMOTE when the filename doesn't match user online/offline journal + * pattern. Hence it currently doesn't parse archived or disposed user journals. */ + + assert(path); + assert(ret_uid); + + r = path_extract_filename(path, &p); + if (r < 0) + return r; + if (r == O_DIRECTORY) + return -EISDIR; + + a = startswith(p, "user-"); + if (!a) + return -EREMOTE; + b = endswith(p, ".journal"); + if (!b) + return -EREMOTE; + + at = strchr(a, '@'); + if (at) + return -EREMOTE; + + buf = strndup(a, b-a); + if (!buf) + return -ENOMEM; + + return parse_uid(buf, ret_uid); +} + +int journal_file_archive(JournalFile *f, char **ret_previous_path) { + _cleanup_free_ char *p = NULL; + + assert(f); + + if (!journal_file_writable(f)) + return -EINVAL; + + /* Is this a journal file that was passed to us as fd? If so, we synthesized a path name for it, and we refuse + * rotation, since we don't know the actual path, and couldn't rename the file hence. */ + if (path_startswith(f->path, "/proc/self/fd")) + return -EINVAL; + + if (!endswith(f->path, ".journal")) + return -EINVAL; + + if (asprintf(&p, "%.*s@" SD_ID128_FORMAT_STR "-%016"PRIx64"-%016"PRIx64".journal", + (int) strlen(f->path) - 8, f->path, + SD_ID128_FORMAT_VAL(f->header->seqnum_id), + le64toh(f->header->head_entry_seqnum), + le64toh(f->header->head_entry_realtime)) < 0) + return -ENOMEM; + + /* Try to rename the file to the archived version. If the file already was deleted, we'll get ENOENT, let's + * ignore that case. */ + if (rename(f->path, p) < 0 && errno != ENOENT) + return -errno; + + /* Sync the rename to disk */ + (void) fsync_directory_of_file(f->fd); + + if (ret_previous_path) + *ret_previous_path = f->path; + else + free(f->path); + + f->path = TAKE_PTR(p); + + /* Set as archive so offlining commits w/state=STATE_ARCHIVED. Previously we would set old_file->header->state + * to STATE_ARCHIVED directly here, but journal_file_set_offline() short-circuits when state != STATE_ONLINE, + * which would result in the rotated journal never getting fsync() called before closing. Now we simply queue + * the archive state by setting an archive bit, leaving the state as STATE_ONLINE so proper offlining + * occurs. */ + f->archive = true; + + return 0; +} + +int journal_file_dispose(int dir_fd, const char *fname) { + _cleanup_free_ char *p = NULL; + + assert(fname); + + /* Renames a journal file to *.journal~, i.e. to mark it as corrupted or otherwise uncleanly shutdown. Note that + * this is done without looking into the file or changing any of its contents. The idea is that this is called + * whenever something is suspicious and we want to move the file away and make clear that it is not accessed + * for writing anymore. */ + + if (!endswith(fname, ".journal")) + return -EINVAL; + + if (asprintf(&p, "%.*s@%016" PRIx64 "-%016" PRIx64 ".journal~", + (int) strlen(fname) - 8, fname, + now(CLOCK_REALTIME), + random_u64()) < 0) + return -ENOMEM; + + if (renameat(dir_fd, fname, dir_fd, p) < 0) + return -errno; + + return 0; +} + +int journal_file_copy_entry( + JournalFile *from, + JournalFile *to, + Object *o, + uint64_t p, + uint64_t *seqnum, + sd_id128_t *seqnum_id) { + + _cleanup_free_ EntryItem *items_alloc = NULL; + EntryItem *items; + uint64_t n, m = 0, xor_hash = 0; + sd_id128_t boot_id; + dual_timestamp ts; + int r; + + assert(from); + assert(to); + assert(o); + assert(p > 0); + + if (!journal_file_writable(to)) + return -EPERM; + + ts = (dual_timestamp) { + .monotonic = le64toh(o->entry.monotonic), + .realtime = le64toh(o->entry.realtime), + }; + boot_id = o->entry.boot_id; + + n = journal_file_entry_n_items(from, o); + if (n == 0) + return 0; + + if (n < ALLOCA_MAX / sizeof(EntryItem) / 2) + items = newa(EntryItem, n); + else { + items_alloc = new(EntryItem, n); + if (!items_alloc) + return -ENOMEM; + + items = items_alloc; + } + + for (uint64_t i = 0; i < n; i++) { + uint64_t h, q; + void *data; + size_t l; + Object *u; + + q = journal_file_entry_item_object_offset(from, o, i); + r = journal_file_data_payload(from, NULL, q, NULL, 0, 0, &data, &l); + if (IN_SET(r, -EADDRNOTAVAIL, -EBADMSG)) { + log_debug_errno(r, "Entry item %"PRIu64" data object is bad, skipping over it: %m", i); + continue; + } + if (r < 0) + return r; + assert(r > 0); + + if (l == 0) + return -EBADMSG; + + r = journal_file_append_data(to, data, l, &u, &h); + if (r < 0) + return r; + + if (JOURNAL_HEADER_KEYED_HASH(to->header)) + xor_hash ^= jenkins_hash64(data, l); + else + xor_hash ^= le64toh(u->data.hash); + + items[m++] = (EntryItem) { + .object_offset = h, + .hash = le64toh(u->data.hash), + }; + } + + if (m == 0) + return 0; + + r = journal_file_append_entry_internal( + to, + &ts, + &boot_id, + &from->header->machine_id, + xor_hash, + items, + m, + seqnum, + seqnum_id, + /* ret_object= */ NULL, + /* ret_offset= */ NULL); + + if (mmap_cache_fd_got_sigbus(to->cache_fd)) + return -EIO; + + return r; +} + +void journal_reset_metrics(JournalMetrics *m) { + assert(m); + + /* Set everything to "pick automatic values". */ + + *m = (JournalMetrics) { + .min_use = UINT64_MAX, + .max_use = UINT64_MAX, + .min_size = UINT64_MAX, + .max_size = UINT64_MAX, + .keep_free = UINT64_MAX, + .n_max_files = UINT64_MAX, + }; +} + +int journal_file_get_cutoff_realtime_usec(JournalFile *f, usec_t *ret_from, usec_t *ret_to) { + assert(f); + assert(f->header); + assert(ret_from || ret_to); + + if (ret_from) { + if (f->header->head_entry_realtime == 0) + return -ENOENT; + + *ret_from = le64toh(f->header->head_entry_realtime); + } + + if (ret_to) { + if (f->header->tail_entry_realtime == 0) + return -ENOENT; + + *ret_to = le64toh(f->header->tail_entry_realtime); + } + + return 1; +} + +int journal_file_get_cutoff_monotonic_usec(JournalFile *f, sd_id128_t boot_id, usec_t *ret_from, usec_t *ret_to) { + Object *o; + uint64_t p; + int r; + + assert(f); + assert(ret_from || ret_to); + + /* FIXME: fix return value assignment on success with 0. */ + + r = find_data_object_by_boot_id(f, boot_id, &o, &p); + if (r <= 0) + return r; + + if (le64toh(o->data.n_entries) <= 0) + return 0; + + if (ret_from) { + r = journal_file_move_to_object(f, OBJECT_ENTRY, le64toh(o->data.entry_offset), &o); + if (r < 0) + return r; + + *ret_from = le64toh(o->entry.monotonic); + } + + if (ret_to) { + r = journal_file_move_to_object(f, OBJECT_DATA, p, &o); + if (r < 0) + return r; + + r = journal_file_move_to_entry_for_data(f, o, DIRECTION_UP, &o, NULL); + if (r <= 0) + return r; + + *ret_to = le64toh(o->entry.monotonic); + } + + return 1; +} + +bool journal_file_rotate_suggested(JournalFile *f, usec_t max_file_usec, int log_level) { + assert(f); + assert(f->header); + + /* If we gained new header fields we gained new features, + * hence suggest a rotation */ + if (le64toh(f->header->header_size) < sizeof(Header)) { + log_ratelimit_full(log_level, JOURNAL_LOG_RATELIMIT, + "%s uses an outdated header, suggesting rotation.", f->path); + return true; + } + + /* Let's check if the hash tables grew over a certain fill level (75%, borrowing this value from + * Java's hash table implementation), and if so suggest a rotation. To calculate the fill level we + * need the n_data field, which only exists in newer versions. */ + + if (JOURNAL_HEADER_CONTAINS(f->header, n_data)) + if (le64toh(f->header->n_data) * 4ULL > (le64toh(f->header->data_hash_table_size) / sizeof(HashItem)) * 3ULL) { + log_ratelimit_full( + log_level, JOURNAL_LOG_RATELIMIT, + "Data hash table of %s has a fill level at %.1f (%"PRIu64" of %"PRIu64" items, %"PRIu64" file size, %"PRIu64" bytes per hash table item), suggesting rotation.", + f->path, + 100.0 * (double) le64toh(f->header->n_data) / ((double) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem))), + le64toh(f->header->n_data), + le64toh(f->header->data_hash_table_size) / sizeof(HashItem), + (uint64_t) f->last_stat.st_size, + f->last_stat.st_size / le64toh(f->header->n_data)); + return true; + } + + if (JOURNAL_HEADER_CONTAINS(f->header, n_fields)) + if (le64toh(f->header->n_fields) * 4ULL > (le64toh(f->header->field_hash_table_size) / sizeof(HashItem)) * 3ULL) { + log_ratelimit_full( + log_level, JOURNAL_LOG_RATELIMIT, + "Field hash table of %s has a fill level at %.1f (%"PRIu64" of %"PRIu64" items), suggesting rotation.", + f->path, + 100.0 * (double) le64toh(f->header->n_fields) / ((double) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem))), + le64toh(f->header->n_fields), + le64toh(f->header->field_hash_table_size) / sizeof(HashItem)); + return true; + } + + /* If there are too many hash collisions somebody is most likely playing games with us. Hence, if our + * longest chain is longer than some threshold, let's suggest rotation. */ + if (JOURNAL_HEADER_CONTAINS(f->header, data_hash_chain_depth) && + le64toh(f->header->data_hash_chain_depth) > HASH_CHAIN_DEPTH_MAX) { + log_ratelimit_full( + log_level, JOURNAL_LOG_RATELIMIT, + "Data hash table of %s has deepest hash chain of length %" PRIu64 ", suggesting rotation.", + f->path, le64toh(f->header->data_hash_chain_depth)); + return true; + } + + if (JOURNAL_HEADER_CONTAINS(f->header, field_hash_chain_depth) && + le64toh(f->header->field_hash_chain_depth) > HASH_CHAIN_DEPTH_MAX) { + log_ratelimit_full( + log_level, JOURNAL_LOG_RATELIMIT, + "Field hash table of %s has deepest hash chain of length at %" PRIu64 ", suggesting rotation.", + f->path, le64toh(f->header->field_hash_chain_depth)); + return true; + } + + /* Are the data objects properly indexed by field objects? */ + if (JOURNAL_HEADER_CONTAINS(f->header, n_data) && + JOURNAL_HEADER_CONTAINS(f->header, n_fields) && + le64toh(f->header->n_data) > 0 && + le64toh(f->header->n_fields) == 0) { + log_ratelimit_full( + log_level, JOURNAL_LOG_RATELIMIT, + "Data objects of %s are not indexed by field objects, suggesting rotation.", + f->path); + return true; + } + + if (max_file_usec > 0) { + usec_t t, h; + + h = le64toh(f->header->head_entry_realtime); + t = now(CLOCK_REALTIME); + + if (h > 0 && t > h + max_file_usec) { + log_ratelimit_full( + log_level, JOURNAL_LOG_RATELIMIT, + "Oldest entry in %s is older than the configured file retention duration (%s), suggesting rotation.", + f->path, FORMAT_TIMESPAN(max_file_usec, USEC_PER_SEC)); + return true; + } + } + + return false; +} + +static const char * const journal_object_type_table[] = { + [OBJECT_UNUSED] = "unused", + [OBJECT_DATA] = "data", + [OBJECT_FIELD] = "field", + [OBJECT_ENTRY] = "entry", + [OBJECT_DATA_HASH_TABLE] = "data hash table", + [OBJECT_FIELD_HASH_TABLE] = "field hash table", + [OBJECT_ENTRY_ARRAY] = "entry array", + [OBJECT_TAG] = "tag", +}; + +DEFINE_STRING_TABLE_LOOKUP_TO_STRING(journal_object_type, ObjectType); diff --git a/src/libsystemd/sd-journal/journal-file.h b/src/libsystemd/sd-journal/journal-file.h new file mode 100644 index 0000000..81fafb9 --- /dev/null +++ b/src/libsystemd/sd-journal/journal-file.h @@ -0,0 +1,393 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include <fcntl.h> +#include <inttypes.h> +#include <sys/uio.h> + +#if HAVE_GCRYPT +# include <gcrypt.h> +#endif + +#include "sd-event.h" +#include "sd-id128.h" + +#include "compress.h" +#include "hashmap.h" +#include "journal-def.h" +#include "mmap-cache.h" +#include "sparse-endian.h" +#include "time-util.h" + +typedef struct JournalMetrics { + /* For all these: UINT64_MAX means "pick automatically", and 0 means "no limit enforced" */ + uint64_t max_size; /* how large journal files grow at max */ + uint64_t min_size; /* how large journal files grow at least */ + uint64_t max_use; /* how much disk space to use in total at max, keep_free permitting */ + uint64_t min_use; /* how much disk space to use in total at least, even if keep_free says not to */ + uint64_t keep_free; /* how much to keep free on disk */ + uint64_t n_max_files; /* how many files to keep around at max */ +} JournalMetrics; + +typedef enum direction { + DIRECTION_UP, + DIRECTION_DOWN, + _DIRECTION_INVALID = -EINVAL, +} direction_t; + +typedef enum LocationType { + /* The first and last entries, resp. */ + LOCATION_HEAD, + LOCATION_TAIL, + + /* We already read the entry we currently point to, and the + * next one to read should probably not be this one again. */ + LOCATION_DISCRETE, + + /* We should seek to the precise location specified, and + * return it, as we haven't read it yet. */ + LOCATION_SEEK +} LocationType; + +typedef enum OfflineState { + OFFLINE_JOINED, + OFFLINE_SYNCING, + OFFLINE_OFFLINING, + OFFLINE_CANCEL, + OFFLINE_AGAIN_FROM_SYNCING, + OFFLINE_AGAIN_FROM_OFFLINING, + OFFLINE_DONE +} OfflineState; + +typedef struct JournalFile { + int fd; + MMapFileDescriptor *cache_fd; + + mode_t mode; + + int open_flags; + bool close_fd:1; + bool archive:1; + bool strict_order:1; + + direction_t last_direction; + LocationType location_type; + uint64_t last_n_entries; + + char *path; + struct stat last_stat; + usec_t last_stat_usec; + + Header *header; + HashItem *data_hash_table; + HashItem *field_hash_table; + + uint64_t current_offset; + uint64_t current_seqnum; + uint64_t current_realtime; + uint64_t current_monotonic; + sd_id128_t current_boot_id; + uint64_t current_xor_hash; + + JournalMetrics metrics; + + sd_event_source *post_change_timer; + usec_t post_change_timer_period; + + OrderedHashmap *chain_cache; + + pthread_t offline_thread; + volatile OfflineState offline_state; + + unsigned last_seen_generation; + + uint64_t compress_threshold_bytes; +#if HAVE_COMPRESSION + void *compress_buffer; +#endif + +#if HAVE_GCRYPT + gcry_md_hd_t hmac; + bool hmac_running; + + FSSHeader *fss_file; + size_t fss_file_size; + + uint64_t fss_start_usec; + uint64_t fss_interval_usec; + + void *fsprg_state; + size_t fsprg_state_size; + + void *fsprg_seed; + size_t fsprg_seed_size; +#endif + + /* When we insert this file into the per-boot priority queue 'newest_by_boot_id' in sd_journal, then by these keys */ + sd_id128_t newest_boot_id; + sd_id128_t newest_machine_id; + uint64_t newest_monotonic_usec; + uint64_t newest_realtime_usec; + unsigned newest_boot_id_prioq_idx; + usec_t newest_mtime; +} JournalFile; + +typedef enum JournalFileFlags { + JOURNAL_COMPRESS = 1 << 0, + JOURNAL_SEAL = 1 << 1, + JOURNAL_STRICT_ORDER = 1 << 2, + _JOURNAL_FILE_FLAGS_MAX = JOURNAL_COMPRESS|JOURNAL_SEAL|JOURNAL_STRICT_ORDER, +} JournalFileFlags; + +typedef struct { + uint64_t object_offset; + uint64_t hash; +} EntryItem; + +int journal_file_open( + int fd, + const char *fname, + int open_flags, + JournalFileFlags file_flags, + mode_t mode, + uint64_t compress_threshold_bytes, + JournalMetrics *metrics, + MMapCache *mmap_cache, + JournalFile *template, + JournalFile **ret); + +int journal_file_set_offline_thread_join(JournalFile *f); +JournalFile* journal_file_close(JournalFile *j); +int journal_file_fstat(JournalFile *f); +DEFINE_TRIVIAL_CLEANUP_FUNC(JournalFile*, journal_file_close); + +#define ALIGN64(x) (((x) + 7ULL) & ~7ULL) +#define VALID64(x) (((x) & 7ULL) == 0ULL) + +/* Use six characters to cover the offsets common in smallish journal + * files without adding too many zeros. */ +#define OFSfmt "%06"PRIx64 + +static inline bool VALID_REALTIME(uint64_t u) { + /* This considers timestamps until the year 3112 valid. That should be plenty room... */ + return u > 0 && u < (1ULL << 55); +} + +static inline bool VALID_MONOTONIC(uint64_t u) { + /* This considers timestamps until 1142 years of runtime valid. */ + return u < (1ULL << 55); +} + +static inline bool VALID_EPOCH(uint64_t u) { + /* This allows changing the key for 1142 years, every usec. */ + return u < (1ULL << 55); +} + +#define JOURNAL_HEADER_CONTAINS(h, field) \ + (le64toh((h)->header_size) >= offsetof(Header, field) + sizeof((h)->field)) + +#define JOURNAL_HEADER_SEALED(h) \ + FLAGS_SET(le32toh((h)->compatible_flags), HEADER_COMPATIBLE_SEALED) + +#define JOURNAL_HEADER_SEALED_CONTINUOUS(h) \ + FLAGS_SET(le32toh((h)->compatible_flags), HEADER_COMPATIBLE_SEALED_CONTINUOUS) + +#define JOURNAL_HEADER_TAIL_ENTRY_BOOT_ID(h) \ + FLAGS_SET(le32toh((h)->compatible_flags), HEADER_COMPATIBLE_TAIL_ENTRY_BOOT_ID) + +#define JOURNAL_HEADER_COMPRESSED_XZ(h) \ + FLAGS_SET(le32toh((h)->incompatible_flags), HEADER_INCOMPATIBLE_COMPRESSED_XZ) + +#define JOURNAL_HEADER_COMPRESSED_LZ4(h) \ + FLAGS_SET(le32toh((h)->incompatible_flags), HEADER_INCOMPATIBLE_COMPRESSED_LZ4) + +#define JOURNAL_HEADER_COMPRESSED_ZSTD(h) \ + FLAGS_SET(le32toh((h)->incompatible_flags), HEADER_INCOMPATIBLE_COMPRESSED_ZSTD) + +#define JOURNAL_HEADER_KEYED_HASH(h) \ + FLAGS_SET(le32toh((h)->incompatible_flags), HEADER_INCOMPATIBLE_KEYED_HASH) + +#define JOURNAL_HEADER_COMPACT(h) \ + FLAGS_SET(le32toh((h)->incompatible_flags), HEADER_INCOMPATIBLE_COMPACT) + +int journal_file_move_to_object(JournalFile *f, ObjectType type, uint64_t offset, Object **ret); +int journal_file_pin_object(JournalFile *f, Object *o); +int journal_file_read_object_header(JournalFile *f, ObjectType type, uint64_t offset, Object *ret); + +int journal_file_tail_end_by_pread(JournalFile *f, uint64_t *ret_offset); +int journal_file_tail_end_by_mmap(JournalFile *f, uint64_t *ret_offset); + +static inline uint64_t journal_file_entry_item_object_offset(JournalFile *f, Object *o, size_t i) { + assert(f); + assert(o); + return JOURNAL_HEADER_COMPACT(f->header) ? le32toh(o->entry.items.compact[i].object_offset) : + le64toh(o->entry.items.regular[i].object_offset); +} + +static inline size_t journal_file_entry_item_size(JournalFile *f) { + assert(f); + return JOURNAL_HEADER_COMPACT(f->header) ? sizeof_field(Object, entry.items.compact[0]) : + sizeof_field(Object, entry.items.regular[0]); +} + +uint64_t journal_file_entry_n_items(JournalFile *f, Object *o) _pure_; + +int journal_file_data_payload( + JournalFile *f, + Object *o, + uint64_t offset, + const char *field, + size_t field_length, + size_t data_threshold, + void **ret_data, + size_t *ret_size); + +static inline size_t journal_file_data_payload_offset(JournalFile *f) { + return JOURNAL_HEADER_COMPACT(f->header) + ? offsetof(Object, data.compact.payload) + : offsetof(Object, data.regular.payload); +} + +static inline uint8_t* journal_file_data_payload_field(JournalFile *f, Object *o) { + return JOURNAL_HEADER_COMPACT(f->header) ? o->data.compact.payload : o->data.regular.payload; +} + +uint64_t journal_file_entry_array_n_items(JournalFile *f, Object *o) _pure_; + +static inline uint64_t journal_file_entry_array_item(JournalFile *f, Object *o, size_t i) { + assert(f); + assert(o); + return JOURNAL_HEADER_COMPACT(f->header) ? le32toh(o->entry_array.items.compact[i]) : + le64toh(o->entry_array.items.regular[i]); +} + +static inline size_t journal_file_entry_array_item_size(JournalFile *f) { + assert(f); + return JOURNAL_HEADER_COMPACT(f->header) ? sizeof(le32_t) : sizeof(le64_t); +} + +uint64_t journal_file_hash_table_n_items(Object *o) _pure_; + +int journal_file_append_object(JournalFile *f, ObjectType type, uint64_t size, Object **ret_object, uint64_t *ret_offset); +int journal_file_append_entry( + JournalFile *f, + const dual_timestamp *ts, + const sd_id128_t *boot_id, + const struct iovec iovec[], + size_t n_iovec, + uint64_t *seqnum, + sd_id128_t *seqnum_id, + Object **ret_object, + uint64_t *ret_offset); + +int journal_file_find_data_object(JournalFile *f, const void *data, uint64_t size, Object **ret_object, uint64_t *ret_offset); +int journal_file_find_data_object_with_hash(JournalFile *f, const void *data, uint64_t size, uint64_t hash, Object **ret_object, uint64_t *ret_offset); + +int journal_file_find_field_object(JournalFile *f, const void *field, uint64_t size, Object **ret_object, uint64_t *ret_offset); +int journal_file_find_field_object_with_hash(JournalFile *f, const void *field, uint64_t size, uint64_t hash, Object **ret_object, uint64_t *ret_offset); + +void journal_file_reset_location(JournalFile *f); +void journal_file_save_location(JournalFile *f, Object *o, uint64_t offset); +int journal_file_next_entry(JournalFile *f, uint64_t p, direction_t direction, Object **ret_object, uint64_t *ret_offset); + +int journal_file_move_to_entry_by_offset(JournalFile *f, uint64_t p, direction_t direction, Object **ret_object, uint64_t *ret_offset); +int journal_file_move_to_entry_by_seqnum(JournalFile *f, uint64_t seqnum, direction_t direction, Object **ret_object, uint64_t *ret_offset); +int journal_file_move_to_entry_by_realtime(JournalFile *f, uint64_t realtime, direction_t direction, Object **ret_object, uint64_t *ret_offset); +int journal_file_move_to_entry_by_monotonic(JournalFile *f, sd_id128_t boot_id, uint64_t monotonic, direction_t direction, Object **ret_object, uint64_t *ret_offset); + +int journal_file_move_to_entry_for_data(JournalFile *f, Object *d, direction_t direction, Object **ret_object, uint64_t *ret_offset); + +int journal_file_move_to_entry_by_offset_for_data(JournalFile *f, Object *d, uint64_t p, direction_t direction, Object **ret_object, uint64_t *ret_offset); +int journal_file_move_to_entry_by_seqnum_for_data(JournalFile *f, Object *d, uint64_t seqnum, direction_t direction, Object **ret_object, uint64_t *ret_offset); +int journal_file_move_to_entry_by_realtime_for_data(JournalFile *f, Object *d, uint64_t realtime, direction_t direction, Object **ret_object, uint64_t *ret_offset); +int journal_file_move_to_entry_by_monotonic_for_data(JournalFile *f, Object *d, sd_id128_t boot_id, uint64_t monotonic, direction_t direction, Object **ret_object, uint64_t *ret_offset); + +int journal_file_copy_entry(JournalFile *from, JournalFile *to, Object *o, uint64_t p, uint64_t *seqnum, sd_id128_t *seqnum_id); + +void journal_file_dump(JournalFile *f); +void journal_file_print_header(JournalFile *f); + +int journal_file_archive(JournalFile *f, char **ret_previous_path); +int journal_file_parse_uid_from_filename(const char *path, uid_t *uid); +JournalFile* journal_initiate_close(JournalFile *f, Set *deferred_closes); + +int journal_file_dispose(int dir_fd, const char *fname); + +void journal_file_post_change(JournalFile *f); +int journal_file_enable_post_change_timer(JournalFile *f, sd_event *e, usec_t t); + +void journal_reset_metrics(JournalMetrics *m); + +int journal_file_get_cutoff_realtime_usec(JournalFile *f, usec_t *ret_from, usec_t *ret_to); +int journal_file_get_cutoff_monotonic_usec(JournalFile *f, sd_id128_t boot, usec_t *ret_from, usec_t *ret_to); + +bool journal_file_rotate_suggested(JournalFile *f, usec_t max_file_usec, int log_level); + +int journal_file_map_data_hash_table(JournalFile *f); +int journal_file_map_field_hash_table(JournalFile *f); + +static inline Compression JOURNAL_FILE_COMPRESSION(JournalFile *f) { + assert(f); + + if (JOURNAL_HEADER_COMPRESSED_XZ(f->header)) + return COMPRESSION_XZ; + if (JOURNAL_HEADER_COMPRESSED_LZ4(f->header)) + return COMPRESSION_LZ4; + if (JOURNAL_HEADER_COMPRESSED_ZSTD(f->header)) + return COMPRESSION_ZSTD; + return COMPRESSION_NONE; +} + +uint64_t journal_file_hash_data(JournalFile *f, const void *data, size_t sz); + +bool journal_field_valid(const char *p, size_t l, bool allow_protected); + +const char* journal_object_type_to_string(ObjectType type) _const_; + +static inline Compression COMPRESSION_FROM_OBJECT(const Object *o) { + assert(o); + + switch (o->object.flags & _OBJECT_COMPRESSED_MASK) { + case 0: + return COMPRESSION_NONE; + case OBJECT_COMPRESSED_XZ: + return COMPRESSION_XZ; + case OBJECT_COMPRESSED_LZ4: + return COMPRESSION_LZ4; + case OBJECT_COMPRESSED_ZSTD: + return COMPRESSION_ZSTD; + default: + return _COMPRESSION_INVALID; + } +} + +static inline uint8_t COMPRESSION_TO_OBJECT_FLAG(Compression c) { + switch (c) { + case COMPRESSION_XZ: + return OBJECT_COMPRESSED_XZ; + case COMPRESSION_LZ4: + return OBJECT_COMPRESSED_LZ4; + case COMPRESSION_ZSTD: + return OBJECT_COMPRESSED_ZSTD; + default: + return 0; + } +} + +static inline uint32_t COMPRESSION_TO_HEADER_INCOMPATIBLE_FLAG(Compression c) { + switch (c) { + case COMPRESSION_XZ: + return HEADER_INCOMPATIBLE_COMPRESSED_XZ; + case COMPRESSION_LZ4: + return HEADER_INCOMPATIBLE_COMPRESSED_LZ4; + case COMPRESSION_ZSTD: + return HEADER_INCOMPATIBLE_COMPRESSED_ZSTD; + default: + return 0; + } +} + +static inline bool journal_file_writable(JournalFile *f) { + assert(f); + return (f->open_flags & O_ACCMODE) != O_RDONLY; +} diff --git a/src/libsystemd/sd-journal/journal-internal.h b/src/libsystemd/sd-journal/journal-internal.h new file mode 100644 index 0000000..259aac8 --- /dev/null +++ b/src/libsystemd/sd-journal/journal-internal.h @@ -0,0 +1,142 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include <inttypes.h> +#include <stdbool.h> +#include <sys/types.h> + +#include "sd-id128.h" +#include "sd-journal.h" + +#include "hashmap.h" +#include "journal-def.h" +#include "journal-file.h" +#include "list.h" +#include "set.h" + +#define JOURNAL_FILES_MAX 7168u + +#define JOURNAL_LOG_RATELIMIT ((const RateLimit) { .interval = 60 * USEC_PER_SEC, .burst = 3 }) + +typedef struct Match Match; +typedef struct Location Location; +typedef struct Directory Directory; + +typedef enum MatchType { + MATCH_DISCRETE, + MATCH_OR_TERM, + MATCH_AND_TERM +} MatchType; + +struct Match { + MatchType type; + Match *parent; + LIST_FIELDS(Match, matches); + + /* For concrete matches */ + char *data; + size_t size; + uint64_t hash; /* old-style jenkins hash. New-style siphash is different per file, hence won't be cached here */ + + /* For terms */ + LIST_HEAD(Match, matches); +}; + +struct Location { + LocationType type; + + bool seqnum_set:1; + bool realtime_set:1; + bool monotonic_set:1; + bool xor_hash_set:1; + + uint64_t seqnum; + sd_id128_t seqnum_id; + + uint64_t realtime; + + uint64_t monotonic; + sd_id128_t boot_id; + + uint64_t xor_hash; +}; + +struct Directory { + char *path; + int wd; + bool is_root; + unsigned last_seen_generation; +}; + +struct sd_journal { + int toplevel_fd; + + char *path; + char *prefix; + char *namespace; + + OrderedHashmap *files; + IteratedCache *files_cache; + MMapCache *mmap; + Hashmap *newest_by_boot_id; /* key: boot_id, value: prioq, ordered by monotonic timestamp of last update */ + + Location current_location; + + JournalFile *current_file; + uint64_t current_field; + + Match *level0, *level1, *level2; + + uint64_t origin_id; + + int inotify_fd; + unsigned current_invalidate_counter, last_invalidate_counter; + usec_t last_process_usec; + unsigned generation; + + /* Iterating through unique fields and their data values */ + char *unique_field; + JournalFile *unique_file; + uint64_t unique_offset; + + /* Iterating through known fields */ + JournalFile *fields_file; + uint64_t fields_offset; + uint64_t fields_hash_table_index; + char *fields_buffer; + + int flags; + + bool on_network:1; + bool no_new_files:1; + bool no_inotify:1; + bool unique_file_lost:1; /* File we were iterating over got + removed, and there were no more + files, so sd_j_enumerate_unique + will return a value equal to 0. */ + bool fields_file_lost:1; + bool has_runtime_files:1; + bool has_persistent_files:1; + + size_t data_threshold; + + Hashmap *directories_by_path; + Hashmap *directories_by_wd; + + Hashmap *errors; +}; + +char *journal_make_match_string(sd_journal *j); +void journal_print_header(sd_journal *j); + +#define JOURNAL_FOREACH_DATA_RETVAL(j, data, l, retval) \ + for (sd_journal_restart_data(j); ((retval) = sd_journal_enumerate_data((j), &(data), &(l))) > 0; ) + +/* All errors that we might encounter while extracting a field that are not real errors, + * but only mean that the field is too large or we don't support the compression. */ +static inline bool JOURNAL_ERRNO_IS_UNAVAILABLE_FIELD(int r) { + return IN_SET(abs(r), + ENOBUFS, /* Field or decompressed field too large */ + E2BIG, /* Field too large for pointer width */ + EPROTONOSUPPORT); /* Unsupported compression */ +} diff --git a/src/libsystemd/sd-journal/journal-send.c b/src/libsystemd/sd-journal/journal-send.c new file mode 100644 index 0000000..be23b2f --- /dev/null +++ b/src/libsystemd/sd-journal/journal-send.c @@ -0,0 +1,576 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <errno.h> +#include <fcntl.h> +#include <printf.h> +#include <stddef.h> +#include <sys/un.h> +#include <unistd.h> +#if HAVE_VALGRIND_VALGRIND_H +# include <valgrind/valgrind.h> +#endif + +#define SD_JOURNAL_SUPPRESS_LOCATION + +#include "sd-journal.h" + +#include "alloc-util.h" +#include "errno-util.h" +#include "fd-util.h" +#include "fileio.h" +#include "io-util.h" +#include "iovec-util.h" +#include "journal-send.h" +#include "memfd-util.h" +#include "missing_syscall.h" +#include "process-util.h" +#include "socket-util.h" +#include "stdio-util.h" +#include "string-util.h" +#include "tmpfile-util.h" + +#define SNDBUF_SIZE (8*1024*1024) + +#define ALLOCA_CODE_FUNC(f, func) \ + do { \ + size_t _fl; \ + const char *_func = (func); \ + char **_f = &(f); \ + _fl = strlen(_func) + 1; \ + *_f = newa(char, _fl + 10); \ + memcpy(*_f, "CODE_FUNC=", 10); \ + memcpy(*_f + 10, _func, _fl); \ + } while (false) + +/* We open a single fd, and we'll share it with the current process, + * all its threads, and all its subprocesses. This means we need to + * initialize it atomically, and need to operate on it atomically + * never assuming we are the only user */ +static int fd_plus_one = 0; + +static int journal_fd(void) { + int fd; + +retry: + if (fd_plus_one > 0) + return fd_plus_one - 1; + + fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0); + if (fd < 0) + return -errno; + + fd_inc_sndbuf(fd, SNDBUF_SIZE); + + if (!__atomic_compare_exchange_n(&fd_plus_one, &(int){0}, fd+1, + false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) { + safe_close(fd); + goto retry; + } + + return fd; +} + +int journal_fd_nonblock(bool nonblock) { + int r; + + r = journal_fd(); + if (r < 0) + return r; + + return fd_nonblock(r, nonblock); +} + +void close_journal_fd(void) { +#if HAVE_VALGRIND_VALGRIND_H + /* Be nice to valgrind. This is not atomic, so it is useful mainly for debugging. */ + + if (!RUNNING_ON_VALGRIND) + return; + + if (getpid_cached() != gettid()) + return; + + if (fd_plus_one <= 0) + return; + + safe_close(fd_plus_one - 1); + fd_plus_one = 0; +#endif +} + +_public_ int sd_journal_print(int priority, const char *format, ...) { + int r; + va_list ap; + + va_start(ap, format); + r = sd_journal_printv(priority, format, ap); + va_end(ap); + + return r; +} + +_public_ int sd_journal_printv(int priority, const char *format, va_list ap) { + char p[STRLEN("PRIORITY=") + DECIMAL_STR_MAX(int) + 1]; + char sbuf[LINE_MAX + 8] = "MESSAGE="; + struct iovec iov[2]; + int len; + va_list aq; + char *buffer = sbuf; + + assert_return(priority >= 0, -EINVAL); + assert_return(priority <= 7, -EINVAL); + assert_return(format, -EINVAL); + + xsprintf(p, "PRIORITY=%i", priority & LOG_PRIMASK); + + va_copy(aq, ap); + len = vsnprintf(buffer + 8, LINE_MAX, format, aq); + va_end(aq); + + if (len >= (int)LONG_LINE_MAX - 8) + return -ENOBUFS; + + /* Allocate large buffer to accommodate big message */ + if (len >= LINE_MAX) { + buffer = alloca_safe(len + 9); + memcpy(buffer, "MESSAGE=", 8); + assert_se(vsnprintf(buffer + 8, len + 1, format, ap) == len); + } + + /* Strip trailing whitespace, keep prefix whitespace. */ + (void) strstrip(buffer); + + /* Suppress empty lines */ + if (isempty(buffer + 8)) + return 0; + + iov[0] = IOVEC_MAKE_STRING(buffer); + iov[1] = IOVEC_MAKE_STRING(p); + + return sd_journal_sendv(iov, 2); +} + +_printf_(1, 0) static int fill_iovec_sprintf( + const char *format, + va_list ap, + size_t extra, + struct iovec **ret_iov, + size_t *ret_n_iov) { + + PROTECT_ERRNO; + struct iovec *iov = NULL; + size_t n = 0; + + assert(ret_iov); + assert(ret_n_iov); + + if (extra > 0) { + if (!GREEDY_REALLOC0(iov, extra)) + return -ENOMEM; + + n = extra; + } + + CLEANUP_ARRAY(iov, n, iovec_array_free); + + while (format) { + _cleanup_free_ char *buffer = NULL; + va_list aq; + + va_copy(aq, ap); + if (vasprintf(&buffer, format, aq) < 0) { + va_end(aq); + return -ENOMEM; + } + va_end(aq); + + VA_FORMAT_ADVANCE(format, ap); + format = va_arg(ap, char *); + + if (!GREEDY_REALLOC(iov, n + 1)) + return -ENOMEM; + + /* strip trailing whitespace, keep prefixing whitespace */ + iov[n++] = IOVEC_MAKE_STRING(delete_trailing_chars(TAKE_PTR(buffer), NULL)); + } + + *ret_iov = TAKE_PTR(iov); + *ret_n_iov = n; + return 0; +} + +_public_ int sd_journal_send(const char *format, ...) { + struct iovec *iov = NULL; + size_t n_iov = 0; + va_list ap; + int r; + + CLEANUP_ARRAY(iov, n_iov, iovec_array_free); + + va_start(ap, format); + r = fill_iovec_sprintf(format, ap, 0, &iov, &n_iov); + va_end(ap); + if (r < 0) + return r; + + return sd_journal_sendv(iov, n_iov); +} + +_public_ int sd_journal_sendv(const struct iovec *iov, int n) { + PROTECT_ERRNO; + int fd, r; + _cleanup_close_ int buffer_fd = -EBADF; + struct iovec *w; + uint64_t *l; + int i, j = 0; + static const union sockaddr_union sa = { + .un.sun_family = AF_UNIX, + .un.sun_path = "/run/systemd/journal/socket", + }; + struct msghdr mh = { + .msg_name = (struct sockaddr*) &sa.sa, + .msg_namelen = SOCKADDR_UN_LEN(sa.un), + }; + ssize_t k; + bool have_syslog_identifier = false; + bool seal = true; + + assert_return(iov, -EINVAL); + assert_return(n > 0, -EINVAL); + + w = newa(struct iovec, n * 5 + 3); + l = newa(uint64_t, n); + + for (i = 0; i < n; i++) { + char *c, *nl; + + if (_unlikely_(!iov[i].iov_base || iov[i].iov_len <= 1)) + return -EINVAL; + + c = memchr(iov[i].iov_base, '=', iov[i].iov_len); + if (_unlikely_(!c || c == iov[i].iov_base)) + return -EINVAL; + + have_syslog_identifier = have_syslog_identifier || + (c == (char *) iov[i].iov_base + 17 && + startswith(iov[i].iov_base, "SYSLOG_IDENTIFIER")); + + nl = memchr(iov[i].iov_base, '\n', iov[i].iov_len); + if (nl) { + if (_unlikely_(nl < c)) + return -EINVAL; + + /* Already includes a newline? Bummer, then + * let's write the variable name, then a + * newline, then the size (64-bit LE), followed + * by the data and a final newline */ + + w[j++] = IOVEC_MAKE(iov[i].iov_base, c - (char*) iov[i].iov_base); + w[j++] = IOVEC_MAKE_STRING("\n"); + + l[i] = htole64(iov[i].iov_len - (c - (char*) iov[i].iov_base) - 1); + w[j++] = IOVEC_MAKE(&l[i], sizeof(uint64_t)); + + w[j++] = IOVEC_MAKE(c + 1, iov[i].iov_len - (c - (char*) iov[i].iov_base) - 1); + } else + /* Nothing special? Then just add the line and + * append a newline */ + w[j++] = iov[i]; + + w[j++] = IOVEC_MAKE_STRING("\n"); + } + + if (!have_syslog_identifier && + string_is_safe(program_invocation_short_name)) { + + /* Implicitly add program_invocation_short_name, if it + * is not set explicitly. We only do this for + * program_invocation_short_name, and nothing else + * since everything else is much nicer to retrieve + * from the outside. */ + + w[j++] = IOVEC_MAKE_STRING("SYSLOG_IDENTIFIER="); + w[j++] = IOVEC_MAKE_STRING(program_invocation_short_name); + w[j++] = IOVEC_MAKE_STRING("\n"); + } + + fd = journal_fd(); + if (_unlikely_(fd < 0)) + return fd; + + mh.msg_iov = w; + mh.msg_iovlen = j; + + k = sendmsg(fd, &mh, MSG_NOSIGNAL); + if (k >= 0) + return 0; + + /* Fail silently if the journal is not available */ + if (errno == ENOENT) + return 0; + + if (!IN_SET(errno, EMSGSIZE, ENOBUFS, EAGAIN)) + return -errno; + + /* Message doesn't fit... Let's dump the data in a memfd or + * temporary file and just pass a file descriptor of it to the + * other side. + * + * For the temporary files we use /dev/shm instead of /tmp + * here, since we want this to be a tmpfs, and one that is + * available from early boot on and where unprivileged users + * can create files. */ + buffer_fd = memfd_new(NULL); + if (buffer_fd < 0) { + if (buffer_fd == -ENOSYS) { + buffer_fd = open_tmpfile_unlinkable("/dev/shm", O_RDWR | O_CLOEXEC); + if (buffer_fd < 0) + return buffer_fd; + + seal = false; + } else + return buffer_fd; + } + + n = writev(buffer_fd, w, j); + if (n < 0) + return -errno; + + if (seal) { + r = memfd_set_sealed(buffer_fd); + if (r < 0) + return r; + } + + r = send_one_fd_sa(fd, buffer_fd, mh.msg_name, mh.msg_namelen, 0); + if (r == -ENOENT) + /* Fail silently if the journal is not available */ + return 0; + return r; +} + +static int fill_iovec_perror_and_send(const char *message, int skip, struct iovec iov[]) { + PROTECT_ERRNO; + size_t n, k; + + k = isempty(message) ? 0 : strlen(message) + 2; + n = 8 + k + 256 + 1; + + for (;;) { + char buffer[n]; + char* j; + + errno = 0; + j = strerror_r(_saved_errno_, buffer + 8 + k, n - 8 - k); + if (errno == 0) { + char error[STRLEN("ERRNO=") + DECIMAL_STR_MAX(int) + 1]; + + if (j != buffer + 8 + k) + memmove(buffer + 8 + k, j, strlen(j)+1); + + memcpy(buffer, "MESSAGE=", 8); + + if (k > 0) { + memcpy(buffer + 8, message, k - 2); + memcpy(buffer + 8 + k - 2, ": ", 2); + } + + xsprintf(error, "ERRNO=%i", _saved_errno_); + + assert_cc(3 == LOG_ERR); + iov[skip+0] = IOVEC_MAKE_STRING("PRIORITY=3"); + iov[skip+1] = IOVEC_MAKE_STRING(buffer); + iov[skip+2] = IOVEC_MAKE_STRING(error); + + return sd_journal_sendv(iov, skip + 3); + } + + if (errno != ERANGE) + return -errno; + + n *= 2; + } +} + +_public_ int sd_journal_perror(const char *message) { + struct iovec iovec[3]; + + return fill_iovec_perror_and_send(message, 0, iovec); +} + +_public_ int sd_journal_stream_fd(const char *identifier, int priority, int level_prefix) { + _cleanup_close_ int fd = -EBADF; + char *header; + size_t l; + int r; + + assert_return(priority >= 0, -EINVAL); + assert_return(priority <= 7, -EINVAL); + + fd = socket(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC, 0); + if (fd < 0) + return -errno; + + r = connect_unix_path(fd, AT_FDCWD, "/run/systemd/journal/stdout"); + if (r < 0) + return r; + + if (shutdown(fd, SHUT_RD) < 0) + return -errno; + + (void) fd_inc_sndbuf(fd, SNDBUF_SIZE); + + identifier = strempty(identifier); + + l = strlen(identifier); + header = newa(char, l + 1 + 1 + 2 + 2 + 2 + 2 + 2); + + memcpy(header, identifier, l); + header[l++] = '\n'; + header[l++] = '\n'; /* unit id */ + header[l++] = '0' + priority; + header[l++] = '\n'; + header[l++] = '0' + !!level_prefix; + header[l++] = '\n'; + header[l++] = '0'; + header[l++] = '\n'; + header[l++] = '0'; + header[l++] = '\n'; + header[l++] = '0'; + header[l++] = '\n'; + + r = loop_write(fd, header, l); + if (r < 0) + return r; + + return TAKE_FD(fd); +} + +_public_ int sd_journal_print_with_location(int priority, const char *file, const char *line, const char *func, const char *format, ...) { + int r; + va_list ap; + + va_start(ap, format); + r = sd_journal_printv_with_location(priority, file, line, func, format, ap); + va_end(ap); + + return r; +} + +_public_ int sd_journal_printv_with_location(int priority, const char *file, const char *line, const char *func, const char *format, va_list ap) { + char p[STRLEN("PRIORITY=") + DECIMAL_STR_MAX(int) + 1]; + char sbuf[LINE_MAX + 8] = "MESSAGE="; + struct iovec iov[5]; + char *f; + int len; + char *buffer = sbuf; + va_list aq; + + assert_return(priority >= 0, -EINVAL); + assert_return(priority <= 7, -EINVAL); + assert_return(format, -EINVAL); + + xsprintf(p, "PRIORITY=%i", priority & LOG_PRIMASK); + + va_copy(aq, ap); + len = vsnprintf(buffer + 8, LINE_MAX, format, aq); + va_end(aq); + + if (len >= (int)LONG_LINE_MAX - 8) + return -ENOBUFS; + + /* Allocate large buffer to accommodate big message */ + if (len >= LINE_MAX) { + buffer = alloca_safe(len + 9); + memcpy(buffer, "MESSAGE=", 8); + assert_se(vsnprintf(buffer + 8, len + 1, format, ap) == len); + } + + /* Strip trailing whitespace, keep prefixing whitespace */ + (void) strstrip(buffer); + + /* Suppress empty lines */ + if (isempty(buffer + 8)) + return 0; + + /* func is initialized from __func__ which is not a macro, but + * a static const char[], hence cannot easily be prefixed with + * CODE_FUNC=, hence let's do it manually here. */ + ALLOCA_CODE_FUNC(f, func); + + iov[0] = IOVEC_MAKE_STRING(buffer); + iov[1] = IOVEC_MAKE_STRING(p); + iov[2] = IOVEC_MAKE_STRING(file); + iov[3] = IOVEC_MAKE_STRING(line); + iov[4] = IOVEC_MAKE_STRING(f); + + return sd_journal_sendv(iov, ELEMENTSOF(iov)); +} + +_public_ int sd_journal_send_with_location(const char *file, const char *line, const char *func, const char *format, ...) { + struct iovec *iov = NULL; + size_t n_iov = 0; + va_list ap; + char *f; + int r; + + CLEANUP_ARRAY(iov, n_iov, iovec_array_free); + + va_start(ap, format); + r = fill_iovec_sprintf(format, ap, 3, &iov, &n_iov); + va_end(ap); + if (r < 0) + return r; + + ALLOCA_CODE_FUNC(f, func); + + iov[0] = IOVEC_MAKE_STRING(file); + iov[1] = IOVEC_MAKE_STRING(line); + iov[2] = IOVEC_MAKE_STRING(f); + + r = sd_journal_sendv(iov, n_iov); + + iov[0] = iov[1] = iov[2] = (struct iovec) {}; + + return r; +} + +_public_ int sd_journal_sendv_with_location( + const char *file, const char *line, + const char *func, + const struct iovec *iov, int n) { + + struct iovec *niov; + char *f; + + assert_return(iov, -EINVAL); + assert_return(n > 0, -EINVAL); + + niov = newa(struct iovec, n + 3); + memcpy(niov, iov, sizeof(struct iovec) * n); + + ALLOCA_CODE_FUNC(f, func); + + niov[n++] = IOVEC_MAKE_STRING(file); + niov[n++] = IOVEC_MAKE_STRING(line); + niov[n++] = IOVEC_MAKE_STRING(f); + + return sd_journal_sendv(niov, n); +} + +_public_ int sd_journal_perror_with_location( + const char *file, const char *line, + const char *func, + const char *message) { + + struct iovec iov[6]; + char *f; + + ALLOCA_CODE_FUNC(f, func); + + iov[0] = IOVEC_MAKE_STRING(file); + iov[1] = IOVEC_MAKE_STRING(line); + iov[2] = IOVEC_MAKE_STRING(f); + + return fill_iovec_perror_and_send(message, 3, iov); +} diff --git a/src/libsystemd/sd-journal/journal-send.h b/src/libsystemd/sd-journal/journal-send.h new file mode 100644 index 0000000..24315e2 --- /dev/null +++ b/src/libsystemd/sd-journal/journal-send.h @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include <stdbool.h> + +int journal_fd_nonblock(bool nonblock); +void close_journal_fd(void); diff --git a/src/libsystemd/sd-journal/journal-vacuum.c b/src/libsystemd/sd-journal/journal-vacuum.c new file mode 100644 index 0000000..829edb3 --- /dev/null +++ b/src/libsystemd/sd-journal/journal-vacuum.c @@ -0,0 +1,330 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <fcntl.h> +#include <sys/stat.h> +#include <unistd.h> + +#include "sd-id128.h" + +#include "alloc-util.h" +#include "dirent-util.h" +#include "fd-util.h" +#include "format-util.h" +#include "fs-util.h" +#include "journal-def.h" +#include "journal-file.h" +#include "journal-internal.h" +#include "journal-vacuum.h" +#include "sort-util.h" +#include "string-util.h" +#include "time-util.h" +#include "xattr-util.h" + +typedef struct vacuum_info { + uint64_t usage; + char *filename; + + uint64_t realtime; + + sd_id128_t seqnum_id; + uint64_t seqnum; + bool have_seqnum; +} vacuum_info; + +static int vacuum_info_compare(const vacuum_info *a, const vacuum_info *b) { + int r; + + if (a->have_seqnum && b->have_seqnum && + sd_id128_equal(a->seqnum_id, b->seqnum_id)) + return CMP(a->seqnum, b->seqnum); + + r = CMP(a->realtime, b->realtime); + if (r != 0) + return r; + + if (a->have_seqnum && b->have_seqnum) + return memcmp(&a->seqnum_id, &b->seqnum_id, 16); + + return strcmp(a->filename, b->filename); +} + +static void vacuum_info_array_free(vacuum_info *list, size_t n) { + if (!list) + return; + + FOREACH_ARRAY(i, list, n) + free(i->filename); + + free(list); +} + +static void patch_realtime( + int fd, + const char *fn, + const struct stat *st, + unsigned long long *realtime) { + + usec_t x; + + /* The timestamp was determined by the file name, but let's see if the file might actually be older + * than the file name suggested... */ + + assert(fd >= 0); + assert(fn); + assert(st); + assert(realtime); + + x = timespec_load(&st->st_ctim); + if (timestamp_is_set(x) && x < *realtime) + *realtime = x; + + x = timespec_load(&st->st_atim); + if (timestamp_is_set(x) && x < *realtime) + *realtime = x; + + x = timespec_load(&st->st_mtim); + if (timestamp_is_set(x) && x < *realtime) + *realtime = x; + + /* Let's read the original creation time, if possible. Ideally we'd just query the creation time the + * FS might provide, but unfortunately there's currently no sane API to query it. Hence let's + * implement this manually... */ + + if (fd_getcrtime_at(fd, fn, AT_SYMLINK_FOLLOW, &x) >= 0 && x < *realtime) + *realtime = x; +} + +static int journal_file_empty(int dir_fd, const char *name) { + _cleanup_close_ int fd = -EBADF; + struct stat st; + le64_t n_entries; + ssize_t n; + + fd = openat(dir_fd, name, O_RDONLY|O_CLOEXEC|O_NOFOLLOW|O_NONBLOCK|O_NOATIME); + if (fd < 0) { + /* Maybe failed due to O_NOATIME and lack of privileges? */ + fd = openat(dir_fd, name, O_RDONLY|O_CLOEXEC|O_NOFOLLOW|O_NONBLOCK); + if (fd < 0) + return -errno; + } + + if (fstat(fd, &st) < 0) + return -errno; + + /* If an offline file doesn't even have a header we consider it empty */ + if (st.st_size < (off_t) sizeof(Header)) + return 1; + + /* If the number of entries is empty, we consider it empty, too */ + n = pread(fd, &n_entries, sizeof(n_entries), offsetof(Header, n_entries)); + if (n < 0) + return -errno; + if (n != sizeof(n_entries)) + return -EIO; + + return le64toh(n_entries) <= 0; +} + +int journal_directory_vacuum( + const char *directory, + uint64_t max_use, + uint64_t n_max_files, + usec_t max_retention_usec, + usec_t *oldest_usec, + bool verbose) { + + uint64_t sum = 0, freed = 0, n_active_files = 0; + size_t n_list = 0, i; + _cleanup_closedir_ DIR *d = NULL; + vacuum_info *list = NULL; + usec_t retention_limit = 0; + int r; + + CLEANUP_ARRAY(list, n_list, vacuum_info_array_free); + + assert(directory); + + if (max_use <= 0 && max_retention_usec <= 0 && n_max_files <= 0) + return 0; + + if (max_retention_usec > 0) + retention_limit = usec_sub_unsigned(now(CLOCK_REALTIME), max_retention_usec); + + d = opendir(directory); + if (!d) + return -errno; + + FOREACH_DIRENT_ALL(de, d, return -errno) { + unsigned long long seqnum = 0, realtime; + _cleanup_free_ char *p = NULL; + sd_id128_t seqnum_id; + bool have_seqnum; + uint64_t size; + struct stat st; + size_t q; + + if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0) { + log_debug_errno(errno, "Failed to stat file %s while vacuuming, ignoring: %m", de->d_name); + continue; + } + + if (!S_ISREG(st.st_mode)) + continue; + + size = 512UL * (uint64_t) st.st_blocks; + + q = strlen(de->d_name); + + if (endswith(de->d_name, ".journal")) { + + /* Vacuum archived files. Active files are + * left around */ + + if (q < 1 + 32 + 1 + 16 + 1 + 16 + 8) { + n_active_files++; + sum += size; + continue; + } + + if (de->d_name[q-8-16-1] != '-' || + de->d_name[q-8-16-1-16-1] != '-' || + de->d_name[q-8-16-1-16-1-32-1] != '@') { + n_active_files++; + sum += size; + continue; + } + + p = strdup(de->d_name); + if (!p) + return -ENOMEM; + + de->d_name[q-8-16-1-16-1] = 0; + if (sd_id128_from_string(de->d_name + q-8-16-1-16-1-32, &seqnum_id) < 0) { + n_active_files++; + sum += size; + continue; + } + + if (sscanf(de->d_name + q-8-16-1-16, "%16llx-%16llx.journal", &seqnum, &realtime) != 2) { + n_active_files++; + sum += size; + continue; + } + + have_seqnum = true; + + } else if (endswith(de->d_name, ".journal~")) { + unsigned long long tmp; + + /* seqnum_id won't be initialised before use below, so set to 0 */ + seqnum_id = SD_ID128_NULL; + + /* Vacuum corrupted files */ + + if (q < 1 + 16 + 1 + 16 + 8 + 1) { + n_active_files++; + sum += size; + continue; + } + + if (de->d_name[q-1-8-16-1] != '-' || + de->d_name[q-1-8-16-1-16-1] != '@') { + n_active_files++; + sum += size; + continue; + } + + p = strdup(de->d_name); + if (!p) + return -ENOMEM; + + if (sscanf(de->d_name + q-1-8-16-1-16, "%16llx-%16llx.journal~", &realtime, &tmp) != 2) { + n_active_files++; + sum += size; + continue; + } + + have_seqnum = false; + } else { + /* We do not vacuum unknown files! */ + log_debug("Not vacuuming unknown file %s.", de->d_name); + continue; + } + + r = journal_file_empty(dirfd(d), p); + if (r < 0) { + log_debug_errno(r, "Failed check if %s is empty, ignoring: %m", p); + continue; + } + if (r > 0) { + /* Always vacuum empty non-online files. */ + + r = unlinkat_deallocate(dirfd(d), p, 0); + if (r >= 0) { + + log_full(verbose ? LOG_INFO : LOG_DEBUG, + "Deleted empty archived journal %s/%s (%s).", directory, p, FORMAT_BYTES(size)); + + freed += size; + } else if (r != -ENOENT) + log_ratelimit_warning_errno(r, JOURNAL_LOG_RATELIMIT, + "Failed to delete empty archived journal %s/%s: %m", + directory, p); + + continue; + } + + patch_realtime(dirfd(d), p, &st, &realtime); + + if (!GREEDY_REALLOC(list, n_list + 1)) + return -ENOMEM; + + list[n_list++] = (vacuum_info) { + .filename = TAKE_PTR(p), + .usage = size, + .seqnum = seqnum, + .realtime = realtime, + .seqnum_id = seqnum_id, + .have_seqnum = have_seqnum, + }; + + sum += size; + } + + typesafe_qsort(list, n_list, vacuum_info_compare); + + for (i = 0; i < n_list; i++) { + uint64_t left; + + left = n_active_files + n_list - i; + + if ((max_retention_usec <= 0 || list[i].realtime >= retention_limit) && + (max_use <= 0 || sum <= max_use) && + (n_max_files <= 0 || left <= n_max_files)) + break; + + r = unlinkat_deallocate(dirfd(d), list[i].filename, 0); + if (r >= 0) { + log_full(verbose ? LOG_INFO : LOG_DEBUG, "Deleted archived journal %s/%s (%s).", + directory, list[i].filename, FORMAT_BYTES(list[i].usage)); + freed += list[i].usage; + + if (list[i].usage < sum) + sum -= list[i].usage; + else + sum = 0; + + } else if (r != -ENOENT) + log_ratelimit_warning_errno(r, JOURNAL_LOG_RATELIMIT, + "Failed to delete archived journal %s/%s: %m", + directory, list[i].filename); + } + + if (oldest_usec && i < n_list && (*oldest_usec == 0 || list[i].realtime < *oldest_usec)) + *oldest_usec = list[i].realtime; + + log_full(verbose ? LOG_INFO : LOG_DEBUG, "Vacuuming done, freed %s of archived journals from %s.", + FORMAT_BYTES(freed), directory); + + return 0; +} diff --git a/src/libsystemd/sd-journal/journal-vacuum.h b/src/libsystemd/sd-journal/journal-vacuum.h new file mode 100644 index 0000000..d87c847 --- /dev/null +++ b/src/libsystemd/sd-journal/journal-vacuum.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include <inttypes.h> +#include <stdbool.h> + +#include "time-util.h" + +int journal_directory_vacuum(const char *directory, uint64_t max_use, uint64_t n_max_files, usec_t max_retention_usec, usec_t *oldest_usec, bool verbose); diff --git a/src/libsystemd/sd-journal/journal-verify.c b/src/libsystemd/sd-journal/journal-verify.c new file mode 100644 index 0000000..bdaa01d --- /dev/null +++ b/src/libsystemd/sd-journal/journal-verify.c @@ -0,0 +1,1436 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <fcntl.h> +#include <stddef.h> +#include <sys/mman.h> +#include <unistd.h> + +#include "alloc-util.h" +#include "compress.h" +#include "fd-util.h" +#include "fileio.h" +#include "fs-util.h" +#include "journal-authenticate.h" +#include "journal-def.h" +#include "journal-file.h" +#include "journal-verify.h" +#include "lookup3.h" +#include "macro.h" +#include "terminal-util.h" +#include "tmpfile-util.h" + +static void draw_progress(uint64_t p, usec_t *last_usec) { + unsigned n, i, j, k; + usec_t z, x; + + if (!on_tty()) + return; + + z = now(CLOCK_MONOTONIC); + x = *last_usec; + + if (x != 0 && x + 40 * USEC_PER_MSEC > z) + return; + + *last_usec = z; + + n = (3 * columns()) / 4; + j = (n * (unsigned) p) / 65535ULL; + k = n - j; + + fputs("\r", stdout); + if (colors_enabled()) + fputs("\x1B[?25l", stdout); + + fputs(ansi_highlight_green(), stdout); + + for (i = 0; i < j; i++) + fputs("\xe2\x96\x88", stdout); + + fputs(ansi_normal(), stdout); + + for (i = 0; i < k; i++) + fputs("\xe2\x96\x91", stdout); + + printf(" %3"PRIu64"%%", 100U * p / 65535U); + + fputs("\r", stdout); + if (colors_enabled()) + fputs("\x1B[?25h", stdout); + + fflush(stdout); +} + +static uint64_t scale_progress(uint64_t scale, uint64_t p, uint64_t m) { + /* Calculates scale * p / m, but handles m == 0 safely, and saturates. + * Currently all callers use m >= 1, but we keep the check to be defensive. + */ + + if (p >= m || m == 0) + return scale; + + return scale * p / m; +} + +static void flush_progress(void) { + unsigned n, i; + + if (!on_tty()) + return; + + n = (3 * columns()) / 4; + + putchar('\r'); + + for (i = 0; i < n + 5; i++) + putchar(' '); + + putchar('\r'); + fflush(stdout); +} + +#define debug(_offset, _fmt, ...) do { \ + flush_progress(); \ + log_debug(OFSfmt": " _fmt, _offset, ##__VA_ARGS__); \ + } while (0) + +#define warning(_offset, _fmt, ...) do { \ + flush_progress(); \ + log_warning(OFSfmt": " _fmt, _offset, ##__VA_ARGS__); \ + } while (0) + +#define error(_offset, _fmt, ...) do { \ + flush_progress(); \ + log_error(OFSfmt": " _fmt, (uint64_t)_offset, ##__VA_ARGS__); \ + } while (0) + +#define error_errno(_offset, error, _fmt, ...) do { \ + flush_progress(); \ + log_error_errno(error, OFSfmt": " _fmt, (uint64_t)_offset, ##__VA_ARGS__); \ + } while (0) + +static int hash_payload(JournalFile *f, Object *o, uint64_t offset, const uint8_t *src, uint64_t size, uint64_t *res_hash) { + Compression c; + int r; + + assert(o); + assert(src); + assert(res_hash); + + c = COMPRESSION_FROM_OBJECT(o); + if (c < 0) + return -EBADMSG; + if (c != COMPRESSION_NONE) { + _cleanup_free_ void *b = NULL; + size_t b_size; + + r = decompress_blob(c, src, size, &b, &b_size, 0); + if (r < 0) { + error_errno(offset, r, "%s decompression failed: %m", + compression_to_string(c)); + return r; + } + + *res_hash = journal_file_hash_data(f, b, b_size); + } else + *res_hash = journal_file_hash_data(f, src, size); + + return 0; +} + +static int journal_file_object_verify(JournalFile *f, uint64_t offset, Object *o) { + assert(f); + assert(offset); + assert(o); + + /* This does various superficial tests about the length an + * possible field values. It does not follow any references to + * other objects. */ + + if ((o->object.flags & _OBJECT_COMPRESSED_MASK) != 0 && + o->object.type != OBJECT_DATA) { + error(offset, + "Found compressed object of type %s that isn't of type data, which is not allowed.", + journal_object_type_to_string(o->object.type)); + return -EBADMSG; + } + + switch (o->object.type) { + + case OBJECT_DATA: { + uint64_t h1, h2; + int r; + + if (le64toh(o->data.entry_offset) == 0) + warning(offset, "Unused data (entry_offset==0)"); + + if ((le64toh(o->data.entry_offset) == 0) ^ (le64toh(o->data.n_entries) == 0)) { + error(offset, "Bad n_entries: %"PRIu64, le64toh(o->data.n_entries)); + return -EBADMSG; + } + + if (le64toh(o->object.size) - journal_file_data_payload_offset(f) <= 0) { + error(offset, "Bad object size (<= %zu): %"PRIu64, + journal_file_data_payload_offset(f), + le64toh(o->object.size)); + return -EBADMSG; + } + + h1 = le64toh(o->data.hash); + r = hash_payload(f, o, offset, journal_file_data_payload_field(f, o), + le64toh(o->object.size) - journal_file_data_payload_offset(f), + &h2); + if (r < 0) + return r; + + if (h1 != h2) { + error(offset, "Invalid hash (%08" PRIx64 " vs. %08" PRIx64 ")", h1, h2); + return -EBADMSG; + } + + if (!VALID64(le64toh(o->data.next_hash_offset)) || + !VALID64(le64toh(o->data.next_field_offset)) || + !VALID64(le64toh(o->data.entry_offset)) || + !VALID64(le64toh(o->data.entry_array_offset))) { + error(offset, "Invalid offset (next_hash_offset="OFSfmt", next_field_offset="OFSfmt", entry_offset="OFSfmt", entry_array_offset="OFSfmt, + le64toh(o->data.next_hash_offset), + le64toh(o->data.next_field_offset), + le64toh(o->data.entry_offset), + le64toh(o->data.entry_array_offset)); + return -EBADMSG; + } + + break; + } + + case OBJECT_FIELD: { + uint64_t h1, h2; + int r; + + if (le64toh(o->object.size) - offsetof(Object, field.payload) <= 0) { + error(offset, + "Bad field size (<= %zu): %"PRIu64, + offsetof(Object, field.payload), + le64toh(o->object.size)); + return -EBADMSG; + } + + h1 = le64toh(o->field.hash); + r = hash_payload(f, o, offset, o->field.payload, + le64toh(o->object.size) - offsetof(Object, field.payload), + &h2); + if (r < 0) + return r; + + if (h1 != h2) { + error(offset, "Invalid hash (%08" PRIx64 " vs. %08" PRIx64 ")", h1, h2); + return -EBADMSG; + } + + if (!VALID64(le64toh(o->field.next_hash_offset)) || + !VALID64(le64toh(o->field.head_data_offset))) { + error(offset, + "Invalid offset (next_hash_offset="OFSfmt", head_data_offset="OFSfmt, + le64toh(o->field.next_hash_offset), + le64toh(o->field.head_data_offset)); + return -EBADMSG; + } + break; + } + + case OBJECT_ENTRY: + if ((le64toh(o->object.size) - offsetof(Object, entry.items)) % journal_file_entry_item_size(f) != 0) { + error(offset, + "Bad entry size (<= %zu): %"PRIu64, + offsetof(Object, entry.items), + le64toh(o->object.size)); + return -EBADMSG; + } + + if ((le64toh(o->object.size) - offsetof(Object, entry.items)) / journal_file_entry_item_size(f) <= 0) { + error(offset, + "Invalid number items in entry: %"PRIu64, + (le64toh(o->object.size) - offsetof(Object, entry.items)) / journal_file_entry_item_size(f)); + return -EBADMSG; + } + + if (le64toh(o->entry.seqnum) <= 0) { + error(offset, + "Invalid entry seqnum: %"PRIx64, + le64toh(o->entry.seqnum)); + return -EBADMSG; + } + + if (!VALID_REALTIME(le64toh(o->entry.realtime))) { + error(offset, + "Invalid entry realtime timestamp: %"PRIu64, + le64toh(o->entry.realtime)); + return -EBADMSG; + } + + if (!VALID_MONOTONIC(le64toh(o->entry.monotonic))) { + error(offset, + "Invalid entry monotonic timestamp: %"PRIu64, + le64toh(o->entry.monotonic)); + return -EBADMSG; + } + + for (uint64_t i = 0; i < journal_file_entry_n_items(f, o); i++) { + if (journal_file_entry_item_object_offset(f, o, i) == 0 || + !VALID64(journal_file_entry_item_object_offset(f, o, i))) { + error(offset, + "Invalid entry item (%"PRIu64"/%"PRIu64") offset: "OFSfmt, + i, journal_file_entry_n_items(f, o), + journal_file_entry_item_object_offset(f, o, i)); + return -EBADMSG; + } + } + + break; + + case OBJECT_DATA_HASH_TABLE: + case OBJECT_FIELD_HASH_TABLE: + if ((le64toh(o->object.size) - offsetof(Object, hash_table.items)) % sizeof(HashItem) != 0 || + (le64toh(o->object.size) - offsetof(Object, hash_table.items)) / sizeof(HashItem) <= 0) { + error(offset, + "Invalid %s size: %"PRIu64, + journal_object_type_to_string(o->object.type), + le64toh(o->object.size)); + return -EBADMSG; + } + + for (uint64_t i = 0; i < journal_file_hash_table_n_items(o); i++) { + if (o->hash_table.items[i].head_hash_offset != 0 && + !VALID64(le64toh(o->hash_table.items[i].head_hash_offset))) { + error(offset, + "Invalid %s hash table item (%"PRIu64"/%"PRIu64") head_hash_offset: "OFSfmt, + journal_object_type_to_string(o->object.type), + i, journal_file_hash_table_n_items(o), + le64toh(o->hash_table.items[i].head_hash_offset)); + return -EBADMSG; + } + if (o->hash_table.items[i].tail_hash_offset != 0 && + !VALID64(le64toh(o->hash_table.items[i].tail_hash_offset))) { + error(offset, + "Invalid %s hash table item (%"PRIu64"/%"PRIu64") tail_hash_offset: "OFSfmt, + journal_object_type_to_string(o->object.type), + i, journal_file_hash_table_n_items(o), + le64toh(o->hash_table.items[i].tail_hash_offset)); + return -EBADMSG; + } + + if ((o->hash_table.items[i].head_hash_offset != 0) != + (o->hash_table.items[i].tail_hash_offset != 0)) { + error(offset, + "Invalid %s hash table item (%"PRIu64"/%"PRIu64"): head_hash_offset="OFSfmt" tail_hash_offset="OFSfmt, + journal_object_type_to_string(o->object.type), + i, journal_file_hash_table_n_items(o), + le64toh(o->hash_table.items[i].head_hash_offset), + le64toh(o->hash_table.items[i].tail_hash_offset)); + return -EBADMSG; + } + } + + break; + + case OBJECT_ENTRY_ARRAY: + if ((le64toh(o->object.size) - offsetof(Object, entry_array.items)) % journal_file_entry_array_item_size(f) != 0 || + (le64toh(o->object.size) - offsetof(Object, entry_array.items)) / journal_file_entry_array_item_size(f) <= 0) { + error(offset, + "Invalid object entry array size: %"PRIu64, + le64toh(o->object.size)); + return -EBADMSG; + } + + if (!VALID64(le64toh(o->entry_array.next_entry_array_offset))) { + error(offset, + "Invalid object entry array next_entry_array_offset: "OFSfmt, + le64toh(o->entry_array.next_entry_array_offset)); + return -EBADMSG; + } + + for (uint64_t i = 0; i < journal_file_entry_array_n_items(f, o); i++) { + uint64_t q = journal_file_entry_array_item(f, o, i); + if (q != 0 && !VALID64(q)) { + error(offset, + "Invalid object entry array item (%"PRIu64"/%"PRIu64"): "OFSfmt, + i, journal_file_entry_array_n_items(f, o), q); + return -EBADMSG; + } + } + + break; + + case OBJECT_TAG: + if (le64toh(o->object.size) != sizeof(TagObject)) { + error(offset, + "Invalid object tag size: %"PRIu64, + le64toh(o->object.size)); + return -EBADMSG; + } + + if (!VALID_EPOCH(le64toh(o->tag.epoch))) { + error(offset, + "Invalid object tag epoch: %"PRIu64, + le64toh(o->tag.epoch)); + return -EBADMSG; + } + + break; + } + + return 0; +} + +static int write_uint64(FILE *fp, uint64_t p) { + if (fwrite(&p, sizeof(p), 1, fp) != 1) + return -EIO; + + return 0; +} + +static int contains_uint64(MMapFileDescriptor *f, uint64_t n, uint64_t p) { + uint64_t a, b; + int r; + + assert(f); + + /* Bisection ... */ + + a = 0; b = n; + while (a < b) { + uint64_t c, *z; + + c = (a + b) / 2; + + r = mmap_cache_fd_get(f, 0, false, c * sizeof(uint64_t), sizeof(uint64_t), NULL, (void **) &z); + if (r < 0) + return r; + + if (*z == p) + return 1; + + if (a + 1 >= b) + return 0; + + if (p < *z) + b = c; + else + a = c; + } + + return 0; +} + +static int verify_data( + JournalFile *f, + Object *o, uint64_t p, + MMapFileDescriptor *cache_entry_fd, uint64_t n_entries, + MMapFileDescriptor *cache_entry_array_fd, uint64_t n_entry_arrays) { + + uint64_t i, n, a, last, q; + int r; + + assert(f); + assert(o); + assert(cache_entry_fd); + assert(cache_entry_array_fd); + + n = le64toh(o->data.n_entries); + a = le64toh(o->data.entry_array_offset); + + /* Entry array means at least two objects */ + if (a && n < 2) { + error(p, "Entry array present (entry_array_offset="OFSfmt", but n_entries=%"PRIu64")", a, n); + return -EBADMSG; + } + + if (n == 0) + return 0; + + /* We already checked that earlier */ + assert(o->data.entry_offset); + + last = q = le64toh(o->data.entry_offset); + if (!contains_uint64(cache_entry_fd, n_entries, q)) { + error(p, "Data object references invalid entry at "OFSfmt, q); + return -EBADMSG; + } + + r = journal_file_move_to_entry_by_offset(f, q, DIRECTION_DOWN, NULL, NULL); + if (r < 0) + return r; + if (r == 0) { + error(q, "Entry object doesn't exist in the main entry array"); + return -EBADMSG; + } + + i = 1; + while (i < n) { + uint64_t next, m, j; + + if (a == 0) { + error(p, "Array chain too short"); + return -EBADMSG; + } + + if (!contains_uint64(cache_entry_array_fd, n_entry_arrays, a)) { + error(p, "Invalid array offset "OFSfmt, a); + return -EBADMSG; + } + + r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o); + if (r < 0) + return r; + + next = le64toh(o->entry_array.next_entry_array_offset); + if (next != 0 && next <= a) { + error(p, "Array chain has cycle (jumps back from "OFSfmt" to "OFSfmt")", a, next); + return -EBADMSG; + } + + m = journal_file_entry_array_n_items(f, o); + for (j = 0; i < n && j < m; i++, j++) { + + q = journal_file_entry_array_item(f, o, j); + if (q <= last) { + error(p, "Data object's entry array not sorted (%"PRIu64" <= %"PRIu64")", q, last); + return -EBADMSG; + } + last = q; + + if (!contains_uint64(cache_entry_fd, n_entries, q)) { + error(p, "Data object references invalid entry at "OFSfmt, q); + return -EBADMSG; + } + + r = journal_file_move_to_entry_by_offset(f, q, DIRECTION_DOWN, NULL, NULL); + if (r < 0) + return r; + if (r == 0) { + error(q, "Entry object doesn't exist in the main entry array"); + return -EBADMSG; + } + + /* Pointer might have moved, reposition */ + r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o); + if (r < 0) + return r; + } + + a = next; + } + + return 0; +} + +static int verify_data_hash_table( + JournalFile *f, + MMapFileDescriptor *cache_data_fd, uint64_t n_data, + MMapFileDescriptor *cache_entry_fd, uint64_t n_entries, + MMapFileDescriptor *cache_entry_array_fd, uint64_t n_entry_arrays, + usec_t *last_usec, + bool show_progress) { + + uint64_t i, n; + int r; + + assert(f); + assert(cache_data_fd); + assert(cache_entry_fd); + assert(cache_entry_array_fd); + assert(last_usec); + + n = le64toh(f->header->data_hash_table_size) / sizeof(HashItem); + if (n <= 0) + return 0; + + r = journal_file_map_data_hash_table(f); + if (r < 0) + return log_error_errno(r, "Failed to map data hash table: %m"); + + for (i = 0; i < n; i++) { + uint64_t last = 0, p; + + if (show_progress) + draw_progress(0xC000 + scale_progress(0x3FFF, i, n), last_usec); + + p = le64toh(f->data_hash_table[i].head_hash_offset); + while (p != 0) { + Object *o; + uint64_t next; + + if (!contains_uint64(cache_data_fd, n_data, p)) { + error(p, "Invalid data object at hash entry %"PRIu64" of %"PRIu64, i, n); + return -EBADMSG; + } + + r = journal_file_move_to_object(f, OBJECT_DATA, p, &o); + if (r < 0) + return r; + + next = le64toh(o->data.next_hash_offset); + if (next != 0 && next <= p) { + error(p, "Hash chain has a cycle in hash entry %"PRIu64" of %"PRIu64, i, n); + return -EBADMSG; + } + + if (le64toh(o->data.hash) % n != i) { + error(p, "Hash value mismatch in hash entry %"PRIu64" of %"PRIu64, i, n); + return -EBADMSG; + } + + r = verify_data(f, o, p, cache_entry_fd, n_entries, cache_entry_array_fd, n_entry_arrays); + if (r < 0) + return r; + + last = p; + p = next; + } + + if (last != le64toh(f->data_hash_table[i].tail_hash_offset)) { + error(p, + "Tail hash pointer mismatch in hash table (%"PRIu64" != %"PRIu64")", + last, + le64toh(f->data_hash_table[i].tail_hash_offset)); + return -EBADMSG; + } + } + + return 0; +} + +static int data_object_in_hash_table(JournalFile *f, uint64_t hash, uint64_t p) { + uint64_t n, h, q; + int r; + assert(f); + + n = le64toh(f->header->data_hash_table_size) / sizeof(HashItem); + if (n <= 0) + return 0; + + r = journal_file_map_data_hash_table(f); + if (r < 0) + return log_error_errno(r, "Failed to map data hash table: %m"); + + h = hash % n; + + q = le64toh(f->data_hash_table[h].head_hash_offset); + while (q != 0) { + Object *o; + + if (p == q) + return 1; + + r = journal_file_move_to_object(f, OBJECT_DATA, q, &o); + if (r < 0) + return r; + + q = le64toh(o->data.next_hash_offset); + } + + return 0; +} + +static int verify_entry( + JournalFile *f, + Object *o, uint64_t p, + MMapFileDescriptor *cache_data_fd, uint64_t n_data, + bool last) { + + uint64_t i, n; + int r; + + assert(f); + assert(o); + assert(cache_data_fd); + + n = journal_file_entry_n_items(f, o); + for (i = 0; i < n; i++) { + uint64_t q; + Object *u; + + q = journal_file_entry_item_object_offset(f, o, i); + + if (!contains_uint64(cache_data_fd, n_data, q)) { + error(p, "Invalid data object of entry"); + return -EBADMSG; + } + + r = journal_file_move_to_object(f, OBJECT_DATA, q, &u); + if (r < 0) + return r; + + r = data_object_in_hash_table(f, le64toh(u->data.hash), q); + if (r < 0) + return r; + if (r == 0) { + error(p, "Data object missing from hash table"); + return -EBADMSG; + } + + /* Pointer might have moved, reposition */ + r = journal_file_move_to_object(f, OBJECT_DATA, q, &u); + if (r < 0) + return r; + + r = journal_file_move_to_entry_by_offset_for_data(f, u, p, DIRECTION_DOWN, NULL, NULL); + if (r < 0) + return r; + + /* The last entry object has a very high chance of not being referenced as journal files + * almost always run out of space during linking of entry items when trying to add a new + * entry array so let's not error in that scenario. */ + if (r == 0 && !last) { + error(p, "Entry object not referenced by linked data object at "OFSfmt, q); + return -EBADMSG; + } + } + + return 0; +} + +static int verify_entry_array( + JournalFile *f, + MMapFileDescriptor *cache_data_fd, uint64_t n_data, + MMapFileDescriptor *cache_entry_fd, uint64_t n_entries, + MMapFileDescriptor *cache_entry_array_fd, uint64_t n_entry_arrays, + usec_t *last_usec, + bool show_progress) { + + uint64_t i = 0, a, n, last = 0; + int r; + + assert(f); + assert(cache_data_fd); + assert(cache_entry_fd); + assert(cache_entry_array_fd); + assert(last_usec); + + n = le64toh(f->header->n_entries); + a = le64toh(f->header->entry_array_offset); + while (i < n) { + uint64_t next, m, j; + Object *o; + + if (show_progress) + draw_progress(0x8000 + scale_progress(0x3FFF, i, n), last_usec); + + if (a == 0) { + error(a, "Array chain too short at %"PRIu64" of %"PRIu64, i, n); + return -EBADMSG; + } + + if (!contains_uint64(cache_entry_array_fd, n_entry_arrays, a)) { + error(a, "Invalid array %"PRIu64" of %"PRIu64, i, n); + return -EBADMSG; + } + + r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o); + if (r < 0) + return r; + + next = le64toh(o->entry_array.next_entry_array_offset); + if (next != 0 && next <= a) { + error(a, "Array chain has cycle at %"PRIu64" of %"PRIu64" (jumps back from to "OFSfmt")", i, n, next); + return -EBADMSG; + } + + m = journal_file_entry_array_n_items(f, o); + for (j = 0; i < n && j < m; i++, j++) { + uint64_t p; + + p = journal_file_entry_array_item(f, o, j); + if (p <= last) { + error(a, "Entry array not sorted at %"PRIu64" of %"PRIu64, i, n); + return -EBADMSG; + } + last = p; + + if (!contains_uint64(cache_entry_fd, n_entries, p)) { + error(a, "Invalid array entry at %"PRIu64" of %"PRIu64, i, n); + return -EBADMSG; + } + + r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o); + if (r < 0) + return r; + + r = verify_entry(f, o, p, cache_data_fd, n_data, /*last=*/ i + 1 == n); + if (r < 0) + return r; + + /* Pointer might have moved, reposition */ + r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o); + if (r < 0) + return r; + } + + a = next; + } + + return 0; +} + +static int verify_hash_table( + Object *o, uint64_t p, uint64_t *n_hash_tables, uint64_t header_offset, uint64_t header_size) { + + assert(o); + assert(n_hash_tables); + + if (*n_hash_tables > 1) { + error(p, + "More than one %s: %" PRIu64, + journal_object_type_to_string(o->object.type), + *n_hash_tables); + return -EBADMSG; + } + + if (header_offset != p + offsetof(Object, hash_table.items)) { + error(p, + "Header offset for %s invalid (%" PRIu64 " != %" PRIu64 ")", + journal_object_type_to_string(o->object.type), + header_offset, + p + offsetof(Object, hash_table.items)); + return -EBADMSG; + } + + if (header_size != le64toh(o->object.size) - offsetof(Object, hash_table.items)) { + error(p, + "Header size for %s invalid (%" PRIu64 " != %" PRIu64 ")", + journal_object_type_to_string(o->object.type), + header_size, + le64toh(o->object.size) - offsetof(Object, hash_table.items)); + return -EBADMSG; + } + + (*n_hash_tables)++; + + return 0; +} + +int journal_file_verify( + JournalFile *f, + const char *key, + usec_t *first_contained, usec_t *last_validated, usec_t *last_contained, + bool show_progress) { + int r; + Object *o; + uint64_t p = 0, last_epoch = 0, last_tag_realtime = 0; + + uint64_t entry_seqnum = 0, entry_monotonic = 0, entry_realtime = 0; + usec_t min_entry_realtime = USEC_INFINITY, max_entry_realtime = 0; + sd_id128_t entry_boot_id = {}; /* Unnecessary initialization to appease gcc */ + bool entry_seqnum_set = false, entry_monotonic_set = false, entry_realtime_set = false, found_main_entry_array = false; + uint64_t n_objects = 0, n_entries = 0, n_data = 0, n_fields = 0, n_data_hash_tables = 0, n_field_hash_tables = 0, n_entry_arrays = 0, n_tags = 0; + usec_t last_usec = 0; + _cleanup_close_ int data_fd = -EBADF, entry_fd = -EBADF, entry_array_fd = -EBADF; + _cleanup_fclose_ FILE *data_fp = NULL, *entry_fp = NULL, *entry_array_fp = NULL; + MMapFileDescriptor *cache_data_fd = NULL, *cache_entry_fd = NULL, *cache_entry_array_fd = NULL; + unsigned i; + bool found_last = false; + const char *tmp_dir = NULL; + MMapCache *m; + +#if HAVE_GCRYPT + uint64_t last_tag = 0; +#endif + assert(f); + + if (key) { +#if HAVE_GCRYPT + r = journal_file_parse_verification_key(f, key); + if (r < 0) { + log_error("Failed to parse seed."); + return r; + } +#else + return -EOPNOTSUPP; +#endif + } else if (JOURNAL_HEADER_SEALED(f->header)) + return -ENOKEY; + + r = var_tmp_dir(&tmp_dir); + if (r < 0) { + log_error_errno(r, "Failed to determine temporary directory: %m"); + goto fail; + } + + data_fd = open_tmpfile_unlinkable(tmp_dir, O_RDWR | O_CLOEXEC); + if (data_fd < 0) { + r = log_error_errno(data_fd, "Failed to create data file: %m"); + goto fail; + } + + entry_fd = open_tmpfile_unlinkable(tmp_dir, O_RDWR | O_CLOEXEC); + if (entry_fd < 0) { + r = log_error_errno(entry_fd, "Failed to create entry file: %m"); + goto fail; + } + + entry_array_fd = open_tmpfile_unlinkable(tmp_dir, O_RDWR | O_CLOEXEC); + if (entry_array_fd < 0) { + r = log_error_errno(entry_array_fd, + "Failed to create entry array file: %m"); + goto fail; + } + + m = mmap_cache_fd_cache(f->cache_fd); + r = mmap_cache_add_fd(m, data_fd, PROT_READ|PROT_WRITE, &cache_data_fd); + if (r < 0) { + log_error_errno(r, "Failed to cache data file: %m"); + goto fail; + } + + r = mmap_cache_add_fd(m, entry_fd, PROT_READ|PROT_WRITE, &cache_entry_fd); + if (r < 0) { + log_error_errno(r, "Failed to cache entry file: %m"); + goto fail; + } + + r = mmap_cache_add_fd(m, entry_array_fd, PROT_READ|PROT_WRITE, &cache_entry_array_fd); + if (r < 0) { + log_error_errno(r, "Failed to cache entry array file: %m"); + goto fail; + } + + r = take_fdopen_unlocked(&data_fd, "w+", &data_fp); + if (r < 0) { + log_error_errno(r, "Failed to open data file stream: %m"); + goto fail; + } + + r = take_fdopen_unlocked(&entry_fd, "w+", &entry_fp); + if (r < 0) { + log_error_errno(r, "Failed to open entry file stream: %m"); + goto fail; + } + + r = take_fdopen_unlocked(&entry_array_fd, "w+", &entry_array_fp); + if (r < 0) { + log_error_errno(r, "Failed to open entry array file stream: %m"); + goto fail; + } + + if (le32toh(f->header->compatible_flags) & ~HEADER_COMPATIBLE_SUPPORTED) { + log_error("Cannot verify file with unknown extensions."); + r = -EOPNOTSUPP; + goto fail; + } + + for (i = 0; i < sizeof(f->header->reserved); i++) + if (f->header->reserved[i] != 0) { + error(offsetof(Header, reserved[i]), "Reserved field is non-zero"); + r = -EBADMSG; + goto fail; + } + + if (JOURNAL_HEADER_SEALED(f->header) && !JOURNAL_HEADER_SEALED_CONTINUOUS(f->header)) + warning(p, + "This log file was sealed with an old journald version where the sequence of seals might not be continuous. We cannot guarantee completeness."); + + /* First iteration: we go through all objects, verify the + * superficial structure, headers, hashes. */ + + p = le64toh(f->header->header_size); + for (;;) { + /* Early exit if there are no objects in the file, at all */ + if (le64toh(f->header->tail_object_offset) == 0) + break; + + if (show_progress) + draw_progress(scale_progress(0x7FFF, p, le64toh(f->header->tail_object_offset)), &last_usec); + + r = journal_file_move_to_object(f, OBJECT_UNUSED, p, &o); + if (r < 0) { + error_errno(p, r, "Invalid object: %m"); + goto fail; + } + + if (p > le64toh(f->header->tail_object_offset)) { + error(offsetof(Header, tail_object_offset), + "Invalid tail object pointer (%"PRIu64" > %"PRIu64")", + p, + le64toh(f->header->tail_object_offset)); + r = -EBADMSG; + goto fail; + } + + n_objects++; + + r = journal_file_object_verify(f, p, o); + if (r < 0) { + error_errno(p, r, "Invalid object contents: %m"); + goto fail; + } + + if (!!(o->object.flags & OBJECT_COMPRESSED_XZ) + + !!(o->object.flags & OBJECT_COMPRESSED_LZ4) + + !!(o->object.flags & OBJECT_COMPRESSED_ZSTD) > 1) { + error(p, "Object has multiple compression flags set (flags: 0x%x)", o->object.flags); + r = -EINVAL; + goto fail; + } + + if ((o->object.flags & OBJECT_COMPRESSED_XZ) && !JOURNAL_HEADER_COMPRESSED_XZ(f->header)) { + error(p, "XZ compressed object in file without XZ compression"); + r = -EBADMSG; + goto fail; + } + + if ((o->object.flags & OBJECT_COMPRESSED_LZ4) && !JOURNAL_HEADER_COMPRESSED_LZ4(f->header)) { + error(p, "LZ4 compressed object in file without LZ4 compression"); + r = -EBADMSG; + goto fail; + } + + if ((o->object.flags & OBJECT_COMPRESSED_ZSTD) && !JOURNAL_HEADER_COMPRESSED_ZSTD(f->header)) { + error(p, "ZSTD compressed object in file without ZSTD compression"); + r = -EBADMSG; + goto fail; + } + + switch (o->object.type) { + + case OBJECT_DATA: + r = write_uint64(data_fp, p); + if (r < 0) + goto fail; + + n_data++; + break; + + case OBJECT_FIELD: + n_fields++; + break; + + case OBJECT_ENTRY: + if (JOURNAL_HEADER_SEALED(f->header) && n_tags <= 0) { + error(p, "First entry before first tag"); + r = -EBADMSG; + goto fail; + } + + r = write_uint64(entry_fp, p); + if (r < 0) + goto fail; + + if (le64toh(o->entry.realtime) < last_tag_realtime) { + error(p, + "Older entry after newer tag (%"PRIu64" < %"PRIu64")", + le64toh(o->entry.realtime), + last_tag_realtime); + r = -EBADMSG; + goto fail; + } + + if (!entry_seqnum_set && + le64toh(o->entry.seqnum) != le64toh(f->header->head_entry_seqnum)) { + error(p, + "Head entry sequence number incorrect (%"PRIu64" != %"PRIu64")", + le64toh(o->entry.seqnum), + le64toh(f->header->head_entry_seqnum)); + r = -EBADMSG; + goto fail; + } + + if (entry_seqnum_set && + entry_seqnum >= le64toh(o->entry.seqnum)) { + error(p, + "Entry sequence number out of synchronization (%"PRIu64" >= %"PRIu64")", + entry_seqnum, + le64toh(o->entry.seqnum)); + r = -EBADMSG; + goto fail; + } + + entry_seqnum = le64toh(o->entry.seqnum); + entry_seqnum_set = true; + + if (entry_monotonic_set && + sd_id128_equal(entry_boot_id, o->entry.boot_id) && + entry_monotonic > le64toh(o->entry.monotonic)) { + error(p, + "Entry timestamp out of synchronization (%"PRIu64" > %"PRIu64")", + entry_monotonic, + le64toh(o->entry.monotonic)); + r = -EBADMSG; + goto fail; + } + + entry_monotonic = le64toh(o->entry.monotonic); + entry_boot_id = o->entry.boot_id; + entry_monotonic_set = true; + + if (!entry_realtime_set && + le64toh(o->entry.realtime) != le64toh(f->header->head_entry_realtime)) { + error(p, + "Head entry realtime timestamp incorrect (%"PRIu64" != %"PRIu64")", + le64toh(o->entry.realtime), + le64toh(f->header->head_entry_realtime)); + r = -EBADMSG; + goto fail; + } + + entry_realtime = le64toh(o->entry.realtime); + entry_realtime_set = true; + + max_entry_realtime = MAX(max_entry_realtime, le64toh(o->entry.realtime)); + min_entry_realtime = MIN(min_entry_realtime, le64toh(o->entry.realtime)); + + n_entries++; + break; + + case OBJECT_DATA_HASH_TABLE: + r = verify_hash_table(o, p, &n_data_hash_tables, + le64toh(f->header->data_hash_table_offset), + le64toh(f->header->data_hash_table_size)); + if (r < 0) + goto fail; + break; + + case OBJECT_FIELD_HASH_TABLE: + r = verify_hash_table(o, p, &n_field_hash_tables, + le64toh(f->header->field_hash_table_offset), + le64toh(f->header->field_hash_table_size)); + if (r < 0) + goto fail; + + break; + + case OBJECT_ENTRY_ARRAY: + r = write_uint64(entry_array_fp, p); + if (r < 0) + goto fail; + + if (p == le64toh(f->header->entry_array_offset)) { + if (found_main_entry_array) { + error(p, "More than one main entry array"); + r = -EBADMSG; + goto fail; + } + + found_main_entry_array = true; + } + + n_entry_arrays++; + break; + + case OBJECT_TAG: + if (!JOURNAL_HEADER_SEALED(f->header)) { + error(p, "Tag object in file without sealing"); + r = -EBADMSG; + goto fail; + } + + if (le64toh(o->tag.seqnum) != n_tags + 1) { + error(p, + "Tag sequence number out of synchronization (%"PRIu64" != %"PRIu64")", + le64toh(o->tag.seqnum), + n_tags + 1); + r = -EBADMSG; + goto fail; + } + + if (JOURNAL_HEADER_SEALED_CONTINUOUS(f->header)) { + if (!(n_tags == 0 || (n_tags == 1 && le64toh(o->tag.epoch) == last_epoch) + || le64toh(o->tag.epoch) == last_epoch + 1)) { + error(p, + "Epoch sequence not continuous (%"PRIu64" vs %"PRIu64")", + le64toh(o->tag.epoch), + last_epoch); + r = -EBADMSG; + goto fail; + } + } else { + if (le64toh(o->tag.epoch) < last_epoch) { + error(p, + "Epoch sequence out of synchronization (%"PRIu64" < %"PRIu64")", + le64toh(o->tag.epoch), + last_epoch); + r = -EBADMSG; + goto fail; + } + } + +#if HAVE_GCRYPT + if (JOURNAL_HEADER_SEALED(f->header)) { + uint64_t q, rt, rt_end; + + debug(p, "Checking tag %"PRIu64"...", le64toh(o->tag.seqnum)); + + rt = f->fss_start_usec + le64toh(o->tag.epoch) * f->fss_interval_usec; + rt_end = usec_add(rt, f->fss_interval_usec); + if (entry_realtime_set && entry_realtime >= rt_end) { + error(p, + "tag/entry realtime timestamp out of synchronization (%"PRIu64" >= %"PRIu64")", + entry_realtime, + rt + f->fss_interval_usec); + r = -EBADMSG; + goto fail; + } + if (max_entry_realtime >= rt_end) { + error(p, + "Entry realtime (%"PRIu64", %s) is too late with respect to tag (%"PRIu64", %s)", + max_entry_realtime, FORMAT_TIMESTAMP(max_entry_realtime), + rt_end, FORMAT_TIMESTAMP(rt_end)); + r = -EBADMSG; + goto fail; + } + if (min_entry_realtime < rt) { + error(p, + "Entry realtime (%"PRIu64", %s) is too early with respect to tag (%"PRIu64", %s)", + min_entry_realtime, FORMAT_TIMESTAMP(min_entry_realtime), + rt, FORMAT_TIMESTAMP(rt)); + r = -EBADMSG; + goto fail; + } + min_entry_realtime = USEC_INFINITY; + + /* OK, now we know the epoch. So let's now set + * it, and calculate the HMAC for everything + * since the last tag. */ + r = journal_file_fsprg_seek(f, le64toh(o->tag.epoch)); + if (r < 0) + goto fail; + + r = journal_file_hmac_start(f); + if (r < 0) + goto fail; + + if (last_tag == 0) { + r = journal_file_hmac_put_header(f); + if (r < 0) + goto fail; + + q = le64toh(f->header->header_size); + } else + q = last_tag; + + while (q <= p) { + r = journal_file_move_to_object(f, OBJECT_UNUSED, q, &o); + if (r < 0) + goto fail; + + r = journal_file_hmac_put_object(f, OBJECT_UNUSED, o, q); + if (r < 0) + goto fail; + + q = q + ALIGN64(le64toh(o->object.size)); + } + + /* Position might have changed, let's reposition things */ + r = journal_file_move_to_object(f, OBJECT_UNUSED, p, &o); + if (r < 0) + goto fail; + + if (memcmp(o->tag.tag, gcry_md_read(f->hmac, 0), TAG_LENGTH) != 0) { + error(p, "Tag failed verification"); + r = -EBADMSG; + goto fail; + } + + f->hmac_running = false; + last_tag_realtime = rt; + } + + last_tag = p + ALIGN64(le64toh(o->object.size)); +#endif + + last_epoch = le64toh(o->tag.epoch); + + n_tags++; + break; + } + + if (p == le64toh(f->header->tail_object_offset)) { + found_last = true; + break; + } + + p = p + ALIGN64(le64toh(o->object.size)); + }; + + if (!found_last && le64toh(f->header->tail_object_offset) != 0) { + error(le64toh(f->header->tail_object_offset), + "Tail object pointer dead (%"PRIu64" != 0)", + le64toh(f->header->tail_object_offset)); + r = -EBADMSG; + goto fail; + } + + if (n_objects != le64toh(f->header->n_objects)) { + error(offsetof(Header, n_objects), + "Object number mismatch (%"PRIu64" != %"PRIu64")", + n_objects, + le64toh(f->header->n_objects)); + r = -EBADMSG; + goto fail; + } + + if (n_entries != le64toh(f->header->n_entries)) { + error(offsetof(Header, n_entries), + "Entry number mismatch (%"PRIu64" != %"PRIu64")", + n_entries, + le64toh(f->header->n_entries)); + r = -EBADMSG; + goto fail; + } + + if (JOURNAL_HEADER_CONTAINS(f->header, n_data) && + n_data != le64toh(f->header->n_data)) { + error(offsetof(Header, n_data), + "Data number mismatch (%"PRIu64" != %"PRIu64")", + n_data, + le64toh(f->header->n_data)); + r = -EBADMSG; + goto fail; + } + + if (JOURNAL_HEADER_CONTAINS(f->header, n_fields) && + n_fields != le64toh(f->header->n_fields)) { + error(offsetof(Header, n_fields), + "Field number mismatch (%"PRIu64" != %"PRIu64")", + n_fields, + le64toh(f->header->n_fields)); + r = -EBADMSG; + goto fail; + } + + if (JOURNAL_HEADER_CONTAINS(f->header, n_tags) && + n_tags != le64toh(f->header->n_tags)) { + error(offsetof(Header, n_tags), + "Tag number mismatch (%"PRIu64" != %"PRIu64")", + n_tags, + le64toh(f->header->n_tags)); + r = -EBADMSG; + goto fail; + } + + if (JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays) && + n_entry_arrays != le64toh(f->header->n_entry_arrays)) { + error(offsetof(Header, n_entry_arrays), + "Entry array number mismatch (%"PRIu64" != %"PRIu64")", + n_entry_arrays, + le64toh(f->header->n_entry_arrays)); + r = -EBADMSG; + goto fail; + } + + if (!found_main_entry_array && le64toh(f->header->entry_array_offset) != 0) { + error(0, "Missing main entry array"); + r = -EBADMSG; + goto fail; + } + + if (entry_seqnum_set && + entry_seqnum != le64toh(f->header->tail_entry_seqnum)) { + error(offsetof(Header, tail_entry_seqnum), + "Tail entry sequence number incorrect (%"PRIu64" != %"PRIu64")", + entry_seqnum, + le64toh(f->header->tail_entry_seqnum)); + r = -EBADMSG; + goto fail; + } + + if (entry_monotonic_set && + (sd_id128_equal(entry_boot_id, f->header->tail_entry_boot_id) && + JOURNAL_HEADER_TAIL_ENTRY_BOOT_ID(f->header) && + entry_monotonic != le64toh(f->header->tail_entry_monotonic))) { + error(0, + "Invalid tail monotonic timestamp (%"PRIu64" != %"PRIu64")", + entry_monotonic, + le64toh(f->header->tail_entry_monotonic)); + r = -EBADMSG; + goto fail; + } + + if (entry_realtime_set && entry_realtime != le64toh(f->header->tail_entry_realtime)) { + error(0, + "Invalid tail realtime timestamp (%"PRIu64" != %"PRIu64")", + entry_realtime, + le64toh(f->header->tail_entry_realtime)); + r = -EBADMSG; + goto fail; + } + + if (fflush(data_fp) != 0) { + r = log_error_errno(errno, "Failed to flush data file stream: %m"); + goto fail; + } + + if (fflush(entry_fp) != 0) { + r = log_error_errno(errno, "Failed to flush entry file stream: %m"); + goto fail; + } + + if (fflush(entry_array_fp) != 0) { + r = log_error_errno(errno, "Failed to flush entry array file stream: %m"); + goto fail; + } + + /* Second iteration: we follow all objects referenced from the + * two entry points: the object hash table and the entry + * array. We also check that everything referenced (directly + * or indirectly) in the data hash table also exists in the + * entry array, and vice versa. Note that we do not care for + * unreferenced objects. We only care that everything that is + * referenced is consistent. */ + + r = verify_entry_array(f, + cache_data_fd, n_data, + cache_entry_fd, n_entries, + cache_entry_array_fd, n_entry_arrays, + &last_usec, + show_progress); + if (r < 0) + goto fail; + + r = verify_data_hash_table(f, + cache_data_fd, n_data, + cache_entry_fd, n_entries, + cache_entry_array_fd, n_entry_arrays, + &last_usec, + show_progress); + if (r < 0) + goto fail; + + if (show_progress) + flush_progress(); + + mmap_cache_fd_free(cache_data_fd); + mmap_cache_fd_free(cache_entry_fd); + mmap_cache_fd_free(cache_entry_array_fd); + + if (first_contained) + *first_contained = le64toh(f->header->head_entry_realtime); +#if HAVE_GCRYPT + if (last_validated) + *last_validated = last_tag_realtime + f->fss_interval_usec; +#endif + if (last_contained) + *last_contained = le64toh(f->header->tail_entry_realtime); + + return 0; + +fail: + if (show_progress) + flush_progress(); + + log_error("File corruption detected at %s:%"PRIu64" (of %"PRIu64" bytes, %"PRIu64"%%).", + f->path, + p, + (uint64_t) f->last_stat.st_size, + 100U * p / (uint64_t) f->last_stat.st_size); + + if (cache_data_fd) + mmap_cache_fd_free(cache_data_fd); + + if (cache_entry_fd) + mmap_cache_fd_free(cache_entry_fd); + + if (cache_entry_array_fd) + mmap_cache_fd_free(cache_entry_array_fd); + + return r; +} diff --git a/src/libsystemd/sd-journal/journal-verify.h b/src/libsystemd/sd-journal/journal-verify.h new file mode 100644 index 0000000..5790330 --- /dev/null +++ b/src/libsystemd/sd-journal/journal-verify.h @@ -0,0 +1,6 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include "journal-file.h" + +int journal_file_verify(JournalFile *f, const char *key, usec_t *first_contained, usec_t *last_validated, usec_t *last_contained, bool show_progress); diff --git a/src/libsystemd/sd-journal/lookup3.c b/src/libsystemd/sd-journal/lookup3.c new file mode 100644 index 0000000..c2a6406 --- /dev/null +++ b/src/libsystemd/sd-journal/lookup3.c @@ -0,0 +1,1002 @@ +/* SPDX-License-Identifier: LicenseRef-lookup3-public-domain */ +/* Slightly modified by Lennart Poettering, to avoid name clashes, and + * unexport a few functions. */ + +#include "lookup3.h" + +#if HAVE_VALGRIND_VALGRIND_H +# include <valgrind/valgrind.h> +#else +# define RUNNING_ON_VALGRIND 0 +#endif + +/* +------------------------------------------------------------------------------- +lookup3.c, by Bob Jenkins, May 2006, Public Domain. + +These are functions for producing 32-bit hashes for hash table lookup. +hashword(), hashlittle(), hashlittle2(), hashbig(), mix(), and final() +are externally useful functions. Routines to test the hash are included +if SELF_TEST is defined. You can use this free for any purpose. It's in +the public domain. It has no warranty. + +You probably want to use hashlittle(). hashlittle() and hashbig() +hash byte arrays. hashlittle() is faster than hashbig() on +little-endian machines. Intel and AMD are little-endian machines. +On second thought, you probably want hashlittle2(), which is identical to +hashlittle() except it returns two 32-bit hashes for the price of one. +You could implement hashbig2() if you wanted but I haven't bothered here. + +If you want to find a hash of, say, exactly 7 integers, do + a = i1; b = i2; c = i3; + mix(a,b,c); + a += i4; b += i5; c += i6; + mix(a,b,c); + a += i7; + final(a,b,c); +then use c as the hash value. If you have a variable length array of +4-byte integers to hash, use hashword(). If you have a byte array (like +a character string), use hashlittle(). If you have several byte arrays, or +a mix of things, see the comments above hashlittle(). + +Why is this so big? I read 12 bytes at a time into 3 4-byte integers, +then mix those integers. This is fast (you can do a lot more thorough +mixing with 12*3 instructions on 3 integers than you can with 3 instructions +on 1 byte), but shoehorning those bytes into integers efficiently is messy. +------------------------------------------------------------------------------- +*/ +/* #define SELF_TEST 1 */ + +#include <stdint.h> /* defines uint32_t etc */ +#include <stdio.h> /* defines printf for tests */ +#include <sys/param.h> /* attempt to define endianness */ +#include <time.h> /* defines time_t for timings in the test */ +#ifdef linux +# include <endian.h> /* attempt to define endianness */ +#endif + +#if __GNUC__ >= 7 +_Pragma("GCC diagnostic ignored \"-Wimplicit-fallthrough\"") +#endif + +/* + * My best guess at if you are big-endian or little-endian. This may + * need adjustment. + */ +#if (defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && \ + __BYTE_ORDER == __LITTLE_ENDIAN) || \ + (defined(i386) || defined(__i386__) || defined(__i486__) || \ + defined(__i586__) || defined(__i686__) || defined(vax) || defined(MIPSEL)) +# define HASH_LITTLE_ENDIAN 1 +# define HASH_BIG_ENDIAN 0 +#elif (defined(__BYTE_ORDER) && defined(__BIG_ENDIAN) && \ + __BYTE_ORDER == __BIG_ENDIAN) || \ + (defined(sparc) || defined(POWERPC) || defined(mc68000) || defined(sel)) +# define HASH_LITTLE_ENDIAN 0 +# define HASH_BIG_ENDIAN 1 +#else +# define HASH_LITTLE_ENDIAN 0 +# define HASH_BIG_ENDIAN 0 +#endif + +#define hashsize(n) ((uint32_t)1<<(n)) +#define hashmask(n) (hashsize(n)-1) +#define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k)))) + +/* +------------------------------------------------------------------------------- +mix -- mix 3 32-bit values reversibly. + +This is reversible, so any information in (a,b,c) before mix() is +still in (a,b,c) after mix(). + +If four pairs of (a,b,c) inputs are run through mix(), or through +mix() in reverse, there are at least 32 bits of the output that +are sometimes the same for one pair and different for another pair. +This was tested for: +* pairs that differed by one bit, by two bits, in any combination + of top bits of (a,b,c), or in any combination of bottom bits of + (a,b,c). +* "differ" is defined as +, -, ^, or ~^. For + and -, I transformed + the output delta to a Gray code (a^(a>>1)) so a string of 1's (as + is commonly produced by subtraction) look like a single 1-bit + difference. +* the base values were pseudorandom, all zero but one bit set, or + all zero plus a counter that starts at zero. + +Some k values for my "a-=c; a^=rot(c,k); c+=b;" arrangement that +satisfy this are + 4 6 8 16 19 4 + 9 15 3 18 27 15 + 14 9 3 7 17 3 +Well, "9 15 3 18 27 15" didn't quite get 32 bits diffing +for "differ" defined as + with a one-bit base and a two-bit delta. I +used http://burtleburtle.net/bob/hash/avalanche.html to choose +the operations, constants, and arrangements of the variables. + +This does not achieve avalanche. There are input bits of (a,b,c) +that fail to affect some output bits of (a,b,c), especially of a. The +most thoroughly mixed value is c, but it doesn't really even achieve +avalanche in c. + +This allows some parallelism. Read-after-writes are good at doubling +the number of bits affected, so the goal of mixing pulls in the opposite +direction as the goal of parallelism. I did what I could. Rotates +seem to cost as much as shifts on every machine I could lay my hands +on, and rotates are much kinder to the top and bottom bits, so I used +rotates. +------------------------------------------------------------------------------- +*/ +#define mix(a,b,c) \ +{ \ + a -= c; a ^= rot(c, 4); c += b; \ + b -= a; b ^= rot(a, 6); a += c; \ + c -= b; c ^= rot(b, 8); b += a; \ + a -= c; a ^= rot(c,16); c += b; \ + b -= a; b ^= rot(a,19); a += c; \ + c -= b; c ^= rot(b, 4); b += a; \ +} + +/* +------------------------------------------------------------------------------- +final -- final mixing of 3 32-bit values (a,b,c) into c + +Pairs of (a,b,c) values differing in only a few bits will usually +produce values of c that look totally different. This was tested for +* pairs that differed by one bit, by two bits, in any combination + of top bits of (a,b,c), or in any combination of bottom bits of + (a,b,c). +* "differ" is defined as +, -, ^, or ~^. For + and -, I transformed + the output delta to a Gray code (a^(a>>1)) so a string of 1's (as + is commonly produced by subtraction) look like a single 1-bit + difference. +* the base values were pseudorandom, all zero but one bit set, or + all zero plus a counter that starts at zero. + +These constants passed: + 14 11 25 16 4 14 24 + 12 14 25 16 4 14 24 +and these came close: + 4 8 15 26 3 22 24 + 10 8 15 26 3 22 24 + 11 8 15 26 3 22 24 +------------------------------------------------------------------------------- +*/ +#define final(a,b,c) \ +{ \ + c ^= b; c -= rot(b,14); \ + a ^= c; a -= rot(c,11); \ + b ^= a; b -= rot(a,25); \ + c ^= b; c -= rot(b,16); \ + a ^= c; a -= rot(c,4); \ + b ^= a; b -= rot(a,14); \ + c ^= b; c -= rot(b,24); \ +} + +/* +-------------------------------------------------------------------- + This works on all machines. To be useful, it requires + -- that the key be an array of uint32_t's, and + -- that the length be the number of uint32_t's in the key + + The function hashword() is identical to hashlittle() on little-endian + machines, and identical to hashbig() on big-endian machines, + except that the length has to be measured in uint32_ts rather than in + bytes. hashlittle() is more complicated than hashword() only because + hashlittle() has to dance around fitting the key bytes into registers. +-------------------------------------------------------------------- +*/ +uint32_t jenkins_hashword( +const uint32_t *k, /* the key, an array of uint32_t values */ +size_t length, /* the length of the key, in uint32_ts */ +uint32_t initval) /* the previous hash, or an arbitrary value */ +{ + uint32_t a,b,c; + + /* Set up the internal state */ + a = b = c = 0xdeadbeef + (((uint32_t)length)<<2) + initval; + + /*------------------------------------------------- handle most of the key */ + while (length > 3) + { + a += k[0]; + b += k[1]; + c += k[2]; + mix(a,b,c); + length -= 3; + k += 3; + } + + /*------------------------------------------- handle the last 3 uint32_t's */ + switch(length) /* all the case statements fall through */ + { + case 3 : c+=k[2]; + case 2 : b+=k[1]; + case 1 : a+=k[0]; + final(a,b,c); + case 0: /* case 0: nothing left to add */ + break; + } + /*------------------------------------------------------ report the result */ + return c; +} + +/* +-------------------------------------------------------------------- +hashword2() -- same as hashword(), but take two seeds and return two +32-bit values. pc and pb must both be nonnull, and *pc and *pb must +both be initialized with seeds. If you pass in (*pb)==0, the output +(*pc) will be the same as the return value from hashword(). +-------------------------------------------------------------------- +*/ +void jenkins_hashword2 ( +const uint32_t *k, /* the key, an array of uint32_t values */ +size_t length, /* the length of the key, in uint32_ts */ +uint32_t *pc, /* IN: seed OUT: primary hash value */ +uint32_t *pb) /* IN: more seed OUT: secondary hash value */ +{ + uint32_t a,b,c; + + /* Set up the internal state */ + a = b = c = 0xdeadbeef + ((uint32_t)(length<<2)) + *pc; + c += *pb; + + /*------------------------------------------------- handle most of the key */ + while (length > 3) + { + a += k[0]; + b += k[1]; + c += k[2]; + mix(a,b,c); + length -= 3; + k += 3; + } + + /*------------------------------------------- handle the last 3 uint32_t's */ + switch(length) /* all the case statements fall through */ + { + case 3 : c+=k[2]; + case 2 : b+=k[1]; + case 1 : a+=k[0]; + final(a,b,c); + case 0: /* case 0: nothing left to add */ + break; + } + /*------------------------------------------------------ report the result */ + *pc=c; *pb=b; +} + +/* +------------------------------------------------------------------------------- +hashlittle() -- hash a variable-length key into a 32-bit value + k : the key (the unaligned variable-length array of bytes) + length : the length of the key, counting by bytes + initval : can be any 4-byte value +Returns a 32-bit value. Every bit of the key affects every bit of +the return value. Two keys differing by one or two bits will have +totally different hash values. + +The best hash table sizes are powers of 2. There is no need to do +mod a prime (mod is sooo slow!). If you need less than 32 bits, +use a bitmask. For example, if you need only 10 bits, do + h = (h & hashmask(10)); +In which case, the hash table should have hashsize(10) elements. + +If you are hashing n strings (uint8_t **)k, do it like this: + for (i=0, h=0; i<n; ++i) h = hashlittle( k[i], len[i], h); + +By Bob Jenkins, 2006. bob_jenkins@burtleburtle.net. You may use this +code any way you wish, private, educational, or commercial. It's free. + +Use for hash table lookup, or anything where one collision in 2^^32 is +acceptable. Do NOT use for cryptographic purposes. +------------------------------------------------------------------------------- +*/ + +uint32_t jenkins_hashlittle( const void *key, size_t length, uint32_t initval) +{ + uint32_t a,b,c; /* internal state */ + union { const void *ptr; size_t i; } u; /* needed for Mac Powerbook G4 */ + + /* Set up the internal state */ + a = b = c = 0xdeadbeef + ((uint32_t)length) + initval; + + u.ptr = key; + if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) { + const uint32_t *k = (const uint32_t *)key; /* read 32-bit chunks */ + + /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */ + while (length > 12) + { + a += k[0]; + b += k[1]; + c += k[2]; + mix(a,b,c); + length -= 12; + k += 3; + } + + /*----------------------------- handle the last (probably partial) block */ + /* + * "k[2]&0xffffff" actually reads beyond the end of the string, but + * then masks off the part it's not allowed to read. Because the + * string is aligned, the masked-off tail is in the same word as the + * rest of the string. Every machine with memory protection I've seen + * does it on word boundaries, so is OK with this. But valgrind will + * still catch it and complain. The masking trick does make the hash + * noticeably faster for short strings (like English words). + */ +#define VALGRIND_LIKE (_unlikely_(HAS_FEATURE_ADDRESS_SANITIZER || \ + HAS_FEATURE_MEMORY_SANITIZER || \ + RUNNING_ON_VALGRIND)) + + if (!VALGRIND_LIKE) { + switch(length) + { + case 12: c+=k[2]; b+=k[1]; a+=k[0]; break; + case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break; + case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break; + case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break; + case 8 : b+=k[1]; a+=k[0]; break; + case 7 : b+=k[1]&0xffffff; a+=k[0]; break; + case 6 : b+=k[1]&0xffff; a+=k[0]; break; + case 5 : b+=k[1]&0xff; a+=k[0]; break; + case 4 : a+=k[0]; break; + case 3 : a+=k[0]&0xffffff; break; + case 2 : a+=k[0]&0xffff; break; + case 1 : a+=k[0]&0xff; break; + case 0 : return c; /* zero length strings require no mixing */ + } + } else { + const uint8_t *k8 = (const uint8_t *) k; + + switch(length) + { + case 12: c+=k[2]; b+=k[1]; a+=k[0]; break; + case 11: c+=((uint32_t)k8[10])<<16; /* fall through */ + case 10: c+=((uint32_t)k8[9])<<8; /* fall through */ + case 9 : c+=k8[8]; /* fall through */ + case 8 : b+=k[1]; a+=k[0]; break; + case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */ + case 6 : b+=((uint32_t)k8[5])<<8; /* fall through */ + case 5 : b+=k8[4]; /* fall through */ + case 4 : a+=k[0]; break; + case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */ + case 2 : a+=((uint32_t)k8[1])<<8; /* fall through */ + case 1 : a+=k8[0]; break; + case 0 : return c; + } + } + + } else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) { + const uint16_t *k = (const uint16_t *)key; /* read 16-bit chunks */ + const uint8_t *k8; + + /*--------------- all but last block: aligned reads and different mixing */ + while (length > 12) + { + a += k[0] + (((uint32_t)k[1])<<16); + b += k[2] + (((uint32_t)k[3])<<16); + c += k[4] + (((uint32_t)k[5])<<16); + mix(a,b,c); + length -= 12; + k += 6; + } + + /*----------------------------- handle the last (probably partial) block */ + k8 = (const uint8_t *)k; + switch(length) + { + case 12: c+=k[4]+(((uint32_t)k[5])<<16); + b+=k[2]+(((uint32_t)k[3])<<16); + a+=k[0]+(((uint32_t)k[1])<<16); + break; + case 11: c+=((uint32_t)k8[10])<<16; /* fall through */ + case 10: c+=k[4]; + b+=k[2]+(((uint32_t)k[3])<<16); + a+=k[0]+(((uint32_t)k[1])<<16); + break; + case 9 : c+=k8[8]; /* fall through */ + case 8 : b+=k[2]+(((uint32_t)k[3])<<16); + a+=k[0]+(((uint32_t)k[1])<<16); + break; + case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */ + case 6 : b+=k[2]; + a+=k[0]+(((uint32_t)k[1])<<16); + break; + case 5 : b+=k8[4]; /* fall through */ + case 4 : a+=k[0]+(((uint32_t)k[1])<<16); + break; + case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */ + case 2 : a+=k[0]; + break; + case 1 : a+=k8[0]; + break; + case 0 : return c; /* zero length requires no mixing */ + } + + } else { /* need to read the key one byte at a time */ + const uint8_t *k = (const uint8_t *)key; + + /*--------------- all but the last block: affect some 32 bits of (a,b,c) */ + while (length > 12) + { + a += k[0]; + a += ((uint32_t)k[1])<<8; + a += ((uint32_t)k[2])<<16; + a += ((uint32_t)k[3])<<24; + b += k[4]; + b += ((uint32_t)k[5])<<8; + b += ((uint32_t)k[6])<<16; + b += ((uint32_t)k[7])<<24; + c += k[8]; + c += ((uint32_t)k[9])<<8; + c += ((uint32_t)k[10])<<16; + c += ((uint32_t)k[11])<<24; + mix(a,b,c); + length -= 12; + k += 12; + } + + /*-------------------------------- last block: affect all 32 bits of (c) */ + switch(length) /* all the case statements fall through */ + { + case 12: c+=((uint32_t)k[11])<<24; + case 11: c+=((uint32_t)k[10])<<16; + case 10: c+=((uint32_t)k[9])<<8; + case 9 : c+=k[8]; + case 8 : b+=((uint32_t)k[7])<<24; + case 7 : b+=((uint32_t)k[6])<<16; + case 6 : b+=((uint32_t)k[5])<<8; + case 5 : b+=k[4]; + case 4 : a+=((uint32_t)k[3])<<24; + case 3 : a+=((uint32_t)k[2])<<16; + case 2 : a+=((uint32_t)k[1])<<8; + case 1 : a+=k[0]; + break; + case 0 : return c; + } + } + + final(a,b,c); + return c; +} + +/* + * hashlittle2: return 2 32-bit hash values + * + * This is identical to hashlittle(), except it returns two 32-bit hash + * values instead of just one. This is good enough for hash table + * lookup with 2^^64 buckets, or if you want a second hash if you're not + * happy with the first, or if you want a probably-unique 64-bit ID for + * the key. *pc is better mixed than *pb, so use *pc first. If you want + * a 64-bit value do something like "*pc + (((uint64_t)*pb)<<32)". + */ +void jenkins_hashlittle2( + const void *key, /* the key to hash */ + size_t length, /* length of the key */ + uint32_t *pc, /* IN: primary initval, OUT: primary hash */ + uint32_t *pb) /* IN: secondary initval, OUT: secondary hash */ +{ + uint32_t a,b,c; /* internal state */ + union { const void *ptr; size_t i; } u; /* needed for Mac Powerbook G4 */ + + /* Set up the internal state */ + a = b = c = 0xdeadbeef + ((uint32_t)length) + *pc; + c += *pb; + + u.ptr = key; + if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) { + const uint32_t *k = (const uint32_t *)key; /* read 32-bit chunks */ + + /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */ + while (length > 12) + { + a += k[0]; + b += k[1]; + c += k[2]; + mix(a,b,c); + length -= 12; + k += 3; + } + + /*----------------------------- handle the last (probably partial) block */ + /* + * "k[2]&0xffffff" actually reads beyond the end of the string, but + * then masks off the part it's not allowed to read. Because the + * string is aligned, the masked-off tail is in the same word as the + * rest of the string. Every machine with memory protection I've seen + * does it on word boundaries, so is OK with this. But valgrind will + * still catch it and complain. The masking trick does make the hash + * noticeably faster for short strings (like English words). + */ + if (!VALGRIND_LIKE) { + switch(length) + { + case 12: c+=k[2]; b+=k[1]; a+=k[0]; break; + case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break; + case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break; + case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break; + case 8 : b+=k[1]; a+=k[0]; break; + case 7 : b+=k[1]&0xffffff; a+=k[0]; break; + case 6 : b+=k[1]&0xffff; a+=k[0]; break; + case 5 : b+=k[1]&0xff; a+=k[0]; break; + case 4 : a+=k[0]; break; + case 3 : a+=k[0]&0xffffff; break; + case 2 : a+=k[0]&0xffff; break; + case 1 : a+=k[0]&0xff; break; + case 0 : *pc=c; *pb=b; return; /* zero length strings require no mixing */ + } + } else { + const uint8_t *k8 = (const uint8_t *)k; + + switch(length) + { + case 12: c+=k[2]; b+=k[1]; a+=k[0]; break; + case 11: c+=((uint32_t)k8[10])<<16; /* fall through */ + case 10: c+=((uint32_t)k8[9])<<8; /* fall through */ + case 9 : c+=k8[8]; /* fall through */ + case 8 : b+=k[1]; a+=k[0]; break; + case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */ + case 6 : b+=((uint32_t)k8[5])<<8; /* fall through */ + case 5 : b+=k8[4]; /* fall through */ + case 4 : a+=k[0]; break; + case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */ + case 2 : a+=((uint32_t)k8[1])<<8; /* fall through */ + case 1 : a+=k8[0]; break; + case 0 : *pc=c; *pb=b; return; /* zero length strings require no mixing */ + } + } + + } else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) { + const uint16_t *k = (const uint16_t *)key; /* read 16-bit chunks */ + const uint8_t *k8; + + /*--------------- all but last block: aligned reads and different mixing */ + while (length > 12) + { + a += k[0] + (((uint32_t)k[1])<<16); + b += k[2] + (((uint32_t)k[3])<<16); + c += k[4] + (((uint32_t)k[5])<<16); + mix(a,b,c); + length -= 12; + k += 6; + } + + /*----------------------------- handle the last (probably partial) block */ + k8 = (const uint8_t *)k; + switch(length) + { + case 12: c+=k[4]+(((uint32_t)k[5])<<16); + b+=k[2]+(((uint32_t)k[3])<<16); + a+=k[0]+(((uint32_t)k[1])<<16); + break; + case 11: c+=((uint32_t)k8[10])<<16; /* fall through */ + case 10: c+=k[4]; + b+=k[2]+(((uint32_t)k[3])<<16); + a+=k[0]+(((uint32_t)k[1])<<16); + break; + case 9 : c+=k8[8]; /* fall through */ + case 8 : b+=k[2]+(((uint32_t)k[3])<<16); + a+=k[0]+(((uint32_t)k[1])<<16); + break; + case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */ + case 6 : b+=k[2]; + a+=k[0]+(((uint32_t)k[1])<<16); + break; + case 5 : b+=k8[4]; /* fall through */ + case 4 : a+=k[0]+(((uint32_t)k[1])<<16); + break; + case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */ + case 2 : a+=k[0]; + break; + case 1 : a+=k8[0]; + break; + case 0 : *pc=c; *pb=b; return; /* zero length strings require no mixing */ + } + + } else { /* need to read the key one byte at a time */ + const uint8_t *k = (const uint8_t *)key; + + /*--------------- all but the last block: affect some 32 bits of (a,b,c) */ + while (length > 12) + { + a += k[0]; + a += ((uint32_t)k[1])<<8; + a += ((uint32_t)k[2])<<16; + a += ((uint32_t)k[3])<<24; + b += k[4]; + b += ((uint32_t)k[5])<<8; + b += ((uint32_t)k[6])<<16; + b += ((uint32_t)k[7])<<24; + c += k[8]; + c += ((uint32_t)k[9])<<8; + c += ((uint32_t)k[10])<<16; + c += ((uint32_t)k[11])<<24; + mix(a,b,c); + length -= 12; + k += 12; + } + + /*-------------------------------- last block: affect all 32 bits of (c) */ + switch(length) /* all the case statements fall through */ + { + case 12: c+=((uint32_t)k[11])<<24; + case 11: c+=((uint32_t)k[10])<<16; + case 10: c+=((uint32_t)k[9])<<8; + case 9 : c+=k[8]; + case 8 : b+=((uint32_t)k[7])<<24; + case 7 : b+=((uint32_t)k[6])<<16; + case 6 : b+=((uint32_t)k[5])<<8; + case 5 : b+=k[4]; + case 4 : a+=((uint32_t)k[3])<<24; + case 3 : a+=((uint32_t)k[2])<<16; + case 2 : a+=((uint32_t)k[1])<<8; + case 1 : a+=k[0]; + break; + case 0 : *pc=c; *pb=b; return; /* zero length strings require no mixing */ + } + } + + final(a,b,c); + *pc=c; *pb=b; +} + +/* + * hashbig(): + * This is the same as hashword() on big-endian machines. It is different + * from hashlittle() on all machines. hashbig() takes advantage of + * big-endian byte ordering. + */ +uint32_t jenkins_hashbig( const void *key, size_t length, uint32_t initval) +{ + uint32_t a,b,c; + union { const void *ptr; size_t i; } u; /* to cast key to (size_t) happily */ + + /* Set up the internal state */ + a = b = c = 0xdeadbeef + ((uint32_t)length) + initval; + + u.ptr = key; + if (HASH_BIG_ENDIAN && ((u.i & 0x3) == 0)) { + const uint32_t *k = (const uint32_t *)key; /* read 32-bit chunks */ + + /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */ + while (length > 12) + { + a += k[0]; + b += k[1]; + c += k[2]; + mix(a,b,c); + length -= 12; + k += 3; + } + + /*----------------------------- handle the last (probably partial) block */ + /* + * "k[2]<<8" actually reads beyond the end of the string, but + * then shifts out the part it's not allowed to read. Because the + * string is aligned, the illegal read is in the same word as the + * rest of the string. Every machine with memory protection I've seen + * does it on word boundaries, so is OK with this. But valgrind will + * still catch it and complain. The masking trick does make the hash + * noticeably faster for short strings (like English words). + */ + + if (!VALGRIND_LIKE) { + switch(length) + { + case 12: c+=k[2]; b+=k[1]; a+=k[0]; break; + case 11: c+=k[2]&0xffffff00; b+=k[1]; a+=k[0]; break; + case 10: c+=k[2]&0xffff0000; b+=k[1]; a+=k[0]; break; + case 9 : c+=k[2]&0xff000000; b+=k[1]; a+=k[0]; break; + case 8 : b+=k[1]; a+=k[0]; break; + case 7 : b+=k[1]&0xffffff00; a+=k[0]; break; + case 6 : b+=k[1]&0xffff0000; a+=k[0]; break; + case 5 : b+=k[1]&0xff000000; a+=k[0]; break; + case 4 : a+=k[0]; break; + case 3 : a+=k[0]&0xffffff00; break; + case 2 : a+=k[0]&0xffff0000; break; + case 1 : a+=k[0]&0xff000000; break; + case 0 : return c; /* zero length strings require no mixing */ + } + } else { + const uint8_t *k8 = (const uint8_t *)k; + + switch(length) /* all the case statements fall through */ + { + case 12: c+=k[2]; b+=k[1]; a+=k[0]; break; + case 11: c+=((uint32_t)k8[10])<<8; /* fall through */ + case 10: c+=((uint32_t)k8[9])<<16; /* fall through */ + case 9 : c+=((uint32_t)k8[8])<<24; /* fall through */ + case 8 : b+=k[1]; a+=k[0]; break; + case 7 : b+=((uint32_t)k8[6])<<8; /* fall through */ + case 6 : b+=((uint32_t)k8[5])<<16; /* fall through */ + case 5 : b+=((uint32_t)k8[4])<<24; /* fall through */ + case 4 : a+=k[0]; break; + case 3 : a+=((uint32_t)k8[2])<<8; /* fall through */ + case 2 : a+=((uint32_t)k8[1])<<16; /* fall through */ + case 1 : a+=((uint32_t)k8[0])<<24; break; + case 0 : return c; + } + } + + } else { /* need to read the key one byte at a time */ + const uint8_t *k = (const uint8_t *)key; + + /*--------------- all but the last block: affect some 32 bits of (a,b,c) */ + while (length > 12) + { + a += ((uint32_t)k[0])<<24; + a += ((uint32_t)k[1])<<16; + a += ((uint32_t)k[2])<<8; + a += ((uint32_t)k[3]); + b += ((uint32_t)k[4])<<24; + b += ((uint32_t)k[5])<<16; + b += ((uint32_t)k[6])<<8; + b += ((uint32_t)k[7]); + c += ((uint32_t)k[8])<<24; + c += ((uint32_t)k[9])<<16; + c += ((uint32_t)k[10])<<8; + c += ((uint32_t)k[11]); + mix(a,b,c); + length -= 12; + k += 12; + } + + /*-------------------------------- last block: affect all 32 bits of (c) */ + switch(length) /* all the case statements fall through */ + { + case 12: c+=k[11]; + case 11: c+=((uint32_t)k[10])<<8; + case 10: c+=((uint32_t)k[9])<<16; + case 9 : c+=((uint32_t)k[8])<<24; + case 8 : b+=k[7]; + case 7 : b+=((uint32_t)k[6])<<8; + case 6 : b+=((uint32_t)k[5])<<16; + case 5 : b+=((uint32_t)k[4])<<24; + case 4 : a+=k[3]; + case 3 : a+=((uint32_t)k[2])<<8; + case 2 : a+=((uint32_t)k[1])<<16; + case 1 : a+=((uint32_t)k[0])<<24; + break; + case 0 : return c; + } + } + + final(a,b,c); + return c; +} + +#ifdef SELF_TEST + +/* used for timings */ +void driver1() +{ + uint8_t buf[256]; + uint32_t i; + uint32_t h=0; + time_t a,z; + + time(&a); + for (i=0; i<256; ++i) buf[i] = 'x'; + for (i=0; i<1; ++i) + { + h = hashlittle(&buf[0],1,h); + } + time(&z); + if (z-a > 0) printf("time %d %.8x\n", z-a, h); +} + +/* check that every input bit changes every output bit half the time */ +#define HASHSTATE 1 +#define HASHLEN 1 +#define MAXPAIR 60 +#define MAXLEN 70 +void driver2() +{ + uint8_t qa[MAXLEN+1], qb[MAXLEN+2], *a = &qa[0], *b = &qb[1]; + uint32_t c[HASHSTATE], d[HASHSTATE], i=0, j=0, k, l, m=0, z; + uint32_t e[HASHSTATE],f[HASHSTATE],g[HASHSTATE],h[HASHSTATE]; + uint32_t x[HASHSTATE],y[HASHSTATE]; + uint32_t hlen; + + printf("No more than %d trials should ever be needed \n",MAXPAIR/2); + for (hlen=0; hlen < MAXLEN; ++hlen) + { + z=0; + for (i=0; i<hlen; ++i) /*----------------------- for each input byte, */ + { + for (j=0; j<8; ++j) /*------------------------ for each input bit, */ + { + for (m=1; m<8; ++m) /*------------- for several possible initvals, */ + { + for (l=0; l<HASHSTATE; ++l) + e[l]=f[l]=g[l]=h[l]=x[l]=y[l]=~((uint32_t)0); + + /*---- check that every output bit is affected by that input bit */ + for (k=0; k<MAXPAIR; k+=2) + { + uint32_t finished=1; + /* keys have one bit different */ + for (l=0; l<hlen+1; ++l) {a[l] = b[l] = (uint8_t)0;} + /* have a and b be two keys differing in only one bit */ + a[i] ^= (k<<j); + a[i] ^= (k>>(8-j)); + c[0] = hashlittle(a, hlen, m); + b[i] ^= ((k+1)<<j); + b[i] ^= ((k+1)>>(8-j)); + d[0] = hashlittle(b, hlen, m); + /* check every bit is 1, 0, set, and not set at least once */ + for (l=0; l<HASHSTATE; ++l) + { + e[l] &= (c[l]^d[l]); + f[l] &= ~(c[l]^d[l]); + g[l] &= c[l]; + h[l] &= ~c[l]; + x[l] &= d[l]; + y[l] &= ~d[l]; + if (e[l]|f[l]|g[l]|h[l]|x[l]|y[l]) finished=0; + } + if (finished) break; + } + if (k>z) z=k; + if (k==MAXPAIR) + { + printf("Some bit didn't change: "); + printf("%.8x %.8x %.8x %.8x %.8x %.8x ", + e[0],f[0],g[0],h[0],x[0],y[0]); + printf("i %d j %d m %d len %d\n", i, j, m, hlen); + } + if (z==MAXPAIR) goto done; + } + } + } + done: + if (z < MAXPAIR) + { + printf("Mix success %2d bytes %2d initvals ",i,m); + printf("required %d trials\n", z/2); + } + } + printf("\n"); +} + +/* Check for reading beyond the end of the buffer and alignment problems */ +void driver3() +{ + uint8_t buf[MAXLEN+20], *b; + uint32_t len; + uint8_t q[] = "This is the time for all good men to come to the aid of their country..."; + uint32_t h; + uint8_t qq[] = "xThis is the time for all good men to come to the aid of their country..."; + uint32_t i; + uint8_t qqq[] = "xxThis is the time for all good men to come to the aid of their country..."; + uint32_t j; + uint8_t qqqq[] = "xxxThis is the time for all good men to come to the aid of their country..."; + uint32_t ref,x,y; + uint8_t *p; + + printf("Endianness. These lines should all be the same (for values filled in):\n"); + printf("%.8x %.8x %.8x\n", + hashword((const uint32_t *)q, (sizeof(q)-1)/4, 13), + hashword((const uint32_t *)q, (sizeof(q)-5)/4, 13), + hashword((const uint32_t *)q, (sizeof(q)-9)/4, 13)); + p = q; + printf("%.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n", + hashlittle(p, sizeof(q)-1, 13), hashlittle(p, sizeof(q)-2, 13), + hashlittle(p, sizeof(q)-3, 13), hashlittle(p, sizeof(q)-4, 13), + hashlittle(p, sizeof(q)-5, 13), hashlittle(p, sizeof(q)-6, 13), + hashlittle(p, sizeof(q)-7, 13), hashlittle(p, sizeof(q)-8, 13), + hashlittle(p, sizeof(q)-9, 13), hashlittle(p, sizeof(q)-10, 13), + hashlittle(p, sizeof(q)-11, 13), hashlittle(p, sizeof(q)-12, 13)); + p = &qq[1]; + printf("%.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n", + hashlittle(p, sizeof(q)-1, 13), hashlittle(p, sizeof(q)-2, 13), + hashlittle(p, sizeof(q)-3, 13), hashlittle(p, sizeof(q)-4, 13), + hashlittle(p, sizeof(q)-5, 13), hashlittle(p, sizeof(q)-6, 13), + hashlittle(p, sizeof(q)-7, 13), hashlittle(p, sizeof(q)-8, 13), + hashlittle(p, sizeof(q)-9, 13), hashlittle(p, sizeof(q)-10, 13), + hashlittle(p, sizeof(q)-11, 13), hashlittle(p, sizeof(q)-12, 13)); + p = &qqq[2]; + printf("%.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n", + hashlittle(p, sizeof(q)-1, 13), hashlittle(p, sizeof(q)-2, 13), + hashlittle(p, sizeof(q)-3, 13), hashlittle(p, sizeof(q)-4, 13), + hashlittle(p, sizeof(q)-5, 13), hashlittle(p, sizeof(q)-6, 13), + hashlittle(p, sizeof(q)-7, 13), hashlittle(p, sizeof(q)-8, 13), + hashlittle(p, sizeof(q)-9, 13), hashlittle(p, sizeof(q)-10, 13), + hashlittle(p, sizeof(q)-11, 13), hashlittle(p, sizeof(q)-12, 13)); + p = &qqqq[3]; + printf("%.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n", + hashlittle(p, sizeof(q)-1, 13), hashlittle(p, sizeof(q)-2, 13), + hashlittle(p, sizeof(q)-3, 13), hashlittle(p, sizeof(q)-4, 13), + hashlittle(p, sizeof(q)-5, 13), hashlittle(p, sizeof(q)-6, 13), + hashlittle(p, sizeof(q)-7, 13), hashlittle(p, sizeof(q)-8, 13), + hashlittle(p, sizeof(q)-9, 13), hashlittle(p, sizeof(q)-10, 13), + hashlittle(p, sizeof(q)-11, 13), hashlittle(p, sizeof(q)-12, 13)); + printf("\n"); + + /* check that hashlittle2 and hashlittle produce the same results */ + i=47; j=0; + hashlittle2(q, sizeof(q), &i, &j); + if (hashlittle(q, sizeof(q), 47) != i) + printf("hashlittle2 and hashlittle mismatch\n"); + + /* check that hashword2 and hashword produce the same results */ + len = 0xdeadbeef; + i=47, j=0; + hashword2(&len, 1, &i, &j); + if (hashword(&len, 1, 47) != i) + printf("hashword2 and hashword mismatch %x %x\n", + i, hashword(&len, 1, 47)); + + /* check hashlittle doesn't read before or after the ends of the string */ + for (h=0, b=buf+1; h<8; ++h, ++b) + { + for (i=0; i<MAXLEN; ++i) + { + len = i; + for (j=0; j<i; ++j) *(b+j)=0; + + /* these should all be equal */ + ref = hashlittle(b, len, (uint32_t)1); + *(b+i)=(uint8_t)~0; + *(b-1)=(uint8_t)~0; + x = hashlittle(b, len, (uint32_t)1); + y = hashlittle(b, len, (uint32_t)1); + if ((ref != x) || (ref != y)) + { + printf("alignment error: %.8x %.8x %.8x %d %d\n",ref,x,y, + h, i); + } + } + } +} + +/* check for problems with nulls */ + void driver4() +{ + uint8_t buf[1]; + uint32_t h,i,state[HASHSTATE]; + + buf[0] = ~0; + for (i=0; i<HASHSTATE; ++i) state[i] = 1; + printf("These should all be different\n"); + for (i=0, h=0; i<8; ++i) + { + h = hashlittle(buf, 0, h); + printf("%2ld 0-byte strings, hash is %.8x\n", i, h); + } +} + +void driver5() +{ + uint32_t b,c; + b=0, c=0, hashlittle2("", 0, &c, &b); + printf("hash is %.8lx %.8lx\n", c, b); /* deadbeef deadbeef */ + b=0xdeadbeef, c=0, hashlittle2("", 0, &c, &b); + printf("hash is %.8lx %.8lx\n", c, b); /* bd5b7dde deadbeef */ + b=0xdeadbeef, c=0xdeadbeef, hashlittle2("", 0, &c, &b); + printf("hash is %.8lx %.8lx\n", c, b); /* 9c093ccd bd5b7dde */ + b=0, c=0, hashlittle2("Four score and seven years ago", 30, &c, &b); + printf("hash is %.8lx %.8lx\n", c, b); /* 17770551 ce7226e6 */ + b=1, c=0, hashlittle2("Four score and seven years ago", 30, &c, &b); + printf("hash is %.8lx %.8lx\n", c, b); /* e3607cae bd371de4 */ + b=0, c=1, hashlittle2("Four score and seven years ago", 30, &c, &b); + printf("hash is %.8lx %.8lx\n", c, b); /* cd628161 6cbea4b3 */ + c = hashlittle("Four score and seven years ago", 30, 0); + printf("hash is %.8lx\n", c); /* 17770551 */ + c = hashlittle("Four score and seven years ago", 30, 1); + printf("hash is %.8lx\n", c); /* cd628161 */ +} + +int main() +{ + driver1(); /* test that the key is hashed: used for timings */ + driver2(); /* test that whole key is hashed thoroughly */ + driver3(); /* test that nothing but the key is hashed */ + driver4(); /* test hashing multiple buffers (all buffers are null) */ + driver5(); /* test the hash against known vectors */ + return 1; +} + +#endif /* SELF_TEST */ diff --git a/src/libsystemd/sd-journal/lookup3.h b/src/libsystemd/sd-journal/lookup3.h new file mode 100644 index 0000000..04e493e --- /dev/null +++ b/src/libsystemd/sd-journal/lookup3.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: LicenseRef-lookup3-public-domain */ +#pragma once + +#include <inttypes.h> +#include <sys/types.h> + +#include "macro.h" + +uint32_t jenkins_hashword(const uint32_t *k, size_t length, uint32_t initval) _pure_; +void jenkins_hashword2(const uint32_t *k, size_t length, uint32_t *pc, uint32_t *pb); + +uint32_t jenkins_hashlittle(const void *key, size_t length, uint32_t initval) _pure_; +void jenkins_hashlittle2(const void *key, size_t length, uint32_t *pc, uint32_t *pb); + +uint32_t jenkins_hashbig(const void *key, size_t length, uint32_t initval) _pure_; + +static inline uint64_t jenkins_hash64(const void *data, size_t length) { + uint32_t a = 0, b = 0; + + jenkins_hashlittle2(data, length, &a, &b); + + return ((uint64_t) a << 32ULL) | (uint64_t) b; +} diff --git a/src/libsystemd/sd-journal/mmap-cache.c b/src/libsystemd/sd-journal/mmap-cache.c new file mode 100644 index 0000000..973ade6 --- /dev/null +++ b/src/libsystemd/sd-journal/mmap-cache.c @@ -0,0 +1,562 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <errno.h> +#include <stdlib.h> +#include <sys/mman.h> + +#include "alloc-util.h" +#include "errno-util.h" +#include "fd-util.h" +#include "hashmap.h" +#include "list.h" +#include "log.h" +#include "macro.h" +#include "memory-util.h" +#include "mmap-cache.h" +#include "sigbus.h" + +typedef struct Window Window; + +typedef enum WindowFlags { + WINDOW_KEEP_ALWAYS = 1u << (_MMAP_CACHE_CATEGORY_MAX + 0), + WINDOW_IN_UNUSED = 1u << (_MMAP_CACHE_CATEGORY_MAX + 1), + WINDOW_INVALIDATED = 1u << (_MMAP_CACHE_CATEGORY_MAX + 2), + + _WINDOW_USED_MASK = WINDOW_IN_UNUSED - 1, /* The mask contains all bits that indicate the windows + * is currently in use. Covers the all the object types + * and the additional WINDOW_KEEP_ALWAYS flag. */ +} WindowFlags; + +#define WINDOW_IS_UNUSED(w) (((w)->flags & _WINDOW_USED_MASK) == 0) + +struct Window { + MMapFileDescriptor *fd; + + WindowFlags flags; + + void *ptr; + uint64_t offset; + size_t size; + + LIST_FIELDS(Window, windows); + LIST_FIELDS(Window, unused); +}; + +struct MMapFileDescriptor { + MMapCache *cache; + + int fd; + int prot; + bool sigbus; + + LIST_HEAD(Window, windows); +}; + +struct MMapCache { + unsigned n_ref; + unsigned n_windows; + + unsigned n_category_cache_hit; + unsigned n_window_list_hit; + unsigned n_missed; + + Hashmap *fds; + + LIST_HEAD(Window, unused); + Window *last_unused; + + Window *windows_by_category[_MMAP_CACHE_CATEGORY_MAX]; +}; + +#define WINDOWS_MIN 64 + +#if ENABLE_DEBUG_MMAP_CACHE +/* Tiny windows increase mmap activity and the chance of exposing unsafe use. */ +# define WINDOW_SIZE (page_size()) +#else +# define WINDOW_SIZE ((size_t) (UINT64_C(8) * UINT64_C(1024) * UINT64_C(1024))) +#endif + +MMapCache* mmap_cache_new(void) { + MMapCache *m; + + m = new(MMapCache, 1); + if (!m) + return NULL; + + *m = (MMapCache) { + .n_ref = 1, + }; + + return m; +} + +static Window* window_unlink(Window *w) { + assert(w); + + MMapCache *m = mmap_cache_fd_cache(w->fd); + + if (w->ptr) + munmap(w->ptr, w->size); + + if (FLAGS_SET(w->flags, WINDOW_IN_UNUSED)) { + if (m->last_unused == w) + m->last_unused = w->unused_prev; + LIST_REMOVE(unused, m->unused, w); + } + + for (unsigned i = 0; i < _MMAP_CACHE_CATEGORY_MAX; i++) + if (FLAGS_SET(w->flags, 1u << i)) + assert_se(TAKE_PTR(m->windows_by_category[i]) == w); + + return LIST_REMOVE(windows, w->fd->windows, w); +} + +static void window_invalidate(Window *w) { + assert(w); + assert(w->fd); + + if (FLAGS_SET(w->flags, WINDOW_INVALIDATED)) + return; + + /* Replace the window with anonymous pages. This is useful when we hit a SIGBUS and want to make sure + * the file cannot trigger any further SIGBUS, possibly overrunning the sigbus queue. */ + + assert_se(mmap(w->ptr, w->size, w->fd->prot, MAP_PRIVATE|MAP_ANONYMOUS|MAP_FIXED, -1, 0) == w->ptr); + w->flags |= WINDOW_INVALIDATED; +} + +static Window* window_free(Window *w) { + if (!w) + return NULL; + + window_unlink(w); + w->fd->cache->n_windows--; + + return mfree(w); +} + +static bool window_matches(Window *w, MMapFileDescriptor *f, uint64_t offset, size_t size) { + assert(size > 0); + + return + w && + f == w->fd && + offset >= w->offset && + offset + size <= w->offset + w->size; +} + +static bool window_matches_by_addr(Window *w, MMapFileDescriptor *f, void *addr, size_t size) { + assert(size > 0); + + return + w && + f == w->fd && + (uint8_t*) addr >= (uint8_t*) w->ptr && + (uint8_t*) addr + size <= (uint8_t*) w->ptr + w->size; +} + +static Window* window_add(MMapFileDescriptor *f, uint64_t offset, size_t size, void *ptr) { + MMapCache *m = mmap_cache_fd_cache(f); + Window *w; + + if (!m->last_unused || m->n_windows <= WINDOWS_MIN) { + /* Allocate a new window */ + w = new(Window, 1); + if (!w) + return NULL; + m->n_windows++; + } else + /* Reuse an existing one */ + w = window_unlink(m->last_unused); + + *w = (Window) { + .fd = f, + .offset = offset, + .size = size, + .ptr = ptr, + }; + + return LIST_PREPEND(windows, f->windows, w); +} + +static void category_detach_window(MMapCache *m, MMapCacheCategory c) { + Window *w; + + assert(m); + assert(c >= 0 && c < _MMAP_CACHE_CATEGORY_MAX); + + w = TAKE_PTR(m->windows_by_category[c]); + if (!w) + return; /* Nothing attached. */ + + assert(FLAGS_SET(w->flags, 1u << c)); + w->flags &= ~(1u << c); + + if (WINDOW_IS_UNUSED(w)) { + /* Not used anymore? */ +#if ENABLE_DEBUG_MMAP_CACHE + /* Unmap unused windows immediately to expose use-after-unmap by SIGSEGV. */ + window_free(w); +#else + LIST_PREPEND(unused, m->unused, w); + if (!m->last_unused) + m->last_unused = w; + w->flags |= WINDOW_IN_UNUSED; +#endif + } +} + +static void category_attach_window(MMapCache *m, MMapCacheCategory c, Window *w) { + assert(m); + assert(c >= 0 && c < _MMAP_CACHE_CATEGORY_MAX); + assert(w); + + if (m->windows_by_category[c] == w) + return; /* Already attached. */ + + category_detach_window(m, c); + + if (FLAGS_SET(w->flags, WINDOW_IN_UNUSED)) { + /* Used again? */ + if (m->last_unused == w) + m->last_unused = w->unused_prev; + LIST_REMOVE(unused, m->unused, w); + w->flags &= ~WINDOW_IN_UNUSED; + } + + m->windows_by_category[c] = w; + w->flags |= (1u << c); +} + +static MMapCache* mmap_cache_free(MMapCache *m) { + if (!m) + return NULL; + + /* All windows are owned by fds, and each fd takes a reference of MMapCache. So, when this is called, + * all fds are already freed, and hence there is no window. */ + + assert(hashmap_isempty(m->fds)); + hashmap_free(m->fds); + + assert(!m->unused); + assert(m->n_windows == 0); + + return mfree(m); +} + +DEFINE_TRIVIAL_REF_UNREF_FUNC(MMapCache, mmap_cache, mmap_cache_free); + +static int mmap_try_harder(MMapFileDescriptor *f, void *addr, int flags, uint64_t offset, size_t size, void **ret) { + MMapCache *m = mmap_cache_fd_cache(f); + + assert(ret); + + for (;;) { + void *ptr; + + ptr = mmap(addr, size, f->prot, flags, f->fd, offset); + if (ptr != MAP_FAILED) { + *ret = ptr; + return 0; + } + if (errno != ENOMEM) + return negative_errno(); + + /* When failed with ENOMEM, try again after making a room by freeing an unused window. */ + + if (!m->last_unused) + return -ENOMEM; /* no free window, propagate the original error. */ + + window_free(m->last_unused); + } +} + +static int add_mmap( + MMapFileDescriptor *f, + uint64_t offset, + size_t size, + struct stat *st, + Window **ret) { + + Window *w; + void *d; + int r; + + assert(f); + assert(size > 0); + assert(ret); + + /* overflow check */ + if (size > SIZE_MAX - PAGE_OFFSET_U64(offset)) + return -EADDRNOTAVAIL; + + size = PAGE_ALIGN(size + PAGE_OFFSET_U64(offset)); + offset = PAGE_ALIGN_DOWN_U64(offset); + + if (size < WINDOW_SIZE) { + uint64_t delta; + + delta = PAGE_ALIGN((WINDOW_SIZE - size) / 2); + offset = LESS_BY(offset, delta); + size = WINDOW_SIZE; + } + + if (st) { + /* Memory maps that are larger then the files underneath have undefined behavior. Hence, + * clamp things to the file size if we know it */ + + if (offset >= (uint64_t) st->st_size) + return -EADDRNOTAVAIL; + + if (size > (uint64_t) st->st_size - offset) + size = PAGE_ALIGN((uint64_t) st->st_size - offset); + } + + if (size >= SIZE_MAX) + return -EADDRNOTAVAIL; + + r = mmap_try_harder(f, NULL, MAP_SHARED, offset, size, &d); + if (r < 0) + return r; + + w = window_add(f, offset, size, d); + if (!w) { + (void) munmap(d, size); + return -ENOMEM; + } + + *ret = w; + return 0; +} + +int mmap_cache_fd_get( + MMapFileDescriptor *f, + MMapCacheCategory c, + bool keep_always, + uint64_t offset, + size_t size, + struct stat *st, + void **ret) { + + MMapCache *m = mmap_cache_fd_cache(f); + Window *w; + int r; + + assert(size > 0); + assert(c >= 0 && c < _MMAP_CACHE_CATEGORY_MAX); + assert(ret); + + if (f->sigbus) + return -EIO; + + /* Check whether the current category is the right one already */ + if (window_matches(m->windows_by_category[c], f, offset, size)) { + m->n_category_cache_hit++; + w = m->windows_by_category[c]; + goto found; + } + + /* Drop the reference to the window, since it's unnecessary now */ + category_detach_window(m, c); + + /* Search for a matching mmap */ + LIST_FOREACH(windows, i, f->windows) + if (window_matches(i, f, offset, size)) { + m->n_window_list_hit++; + w = i; + goto found; + } + + m->n_missed++; + + /* Create a new mmap */ + r = add_mmap(f, offset, size, st, &w); + if (r < 0) + return r; + +found: + if (keep_always) + w->flags |= WINDOW_KEEP_ALWAYS; + + category_attach_window(m, c, w); + *ret = (uint8_t*) w->ptr + (offset - w->offset); + return 0; +} + +int mmap_cache_fd_pin( + MMapFileDescriptor *f, + MMapCacheCategory c, + void *addr, + size_t size) { + + MMapCache *m = mmap_cache_fd_cache(f); + Window *w; + + assert(addr); + assert(c >= 0 && c < _MMAP_CACHE_CATEGORY_MAX); + assert(size > 0); + + if (f->sigbus) + return -EIO; + + /* Check if the current category is the right one. */ + if (window_matches_by_addr(m->windows_by_category[c], f, addr, size)) { + m->n_category_cache_hit++; + w = m->windows_by_category[c]; + goto found; + } + + /* Search for a matching mmap. */ + LIST_FOREACH(windows, i, f->windows) + if (window_matches_by_addr(i, f, addr, size)) { + m->n_window_list_hit++; + w = i; + goto found; + } + + m->n_missed++; + return -EADDRNOTAVAIL; /* Not found. */ + +found: + if (FLAGS_SET(w->flags, WINDOW_KEEP_ALWAYS)) + return 0; /* The window will never unmapped. */ + + /* Attach the window to the 'pinning' category. */ + category_attach_window(m, MMAP_CACHE_CATEGORY_PIN, w); + return 1; +} + +void mmap_cache_stats_log_debug(MMapCache *m) { + assert(m); + + log_debug("mmap cache statistics: %u category cache hit, %u window list hit, %u miss", + m->n_category_cache_hit, m->n_window_list_hit, m->n_missed); +} + +static void mmap_cache_process_sigbus(MMapCache *m) { + bool found = false; + MMapFileDescriptor *f; + int r; + + assert(m); + + /* Iterate through all triggered pages and mark their files as invalidated. */ + for (;;) { + bool ours; + void *addr; + + r = sigbus_pop(&addr); + if (_likely_(r == 0)) + break; + if (r < 0) { + log_error_errno(r, "SIGBUS handling failed: %m"); + abort(); + } + + ours = false; + HASHMAP_FOREACH(f, m->fds) { + LIST_FOREACH(windows, w, f->windows) + if (window_matches_by_addr(w, f, addr, 1)) { + found = ours = f->sigbus = true; + break; + } + + if (ours) + break; + } + + /* Didn't find a matching window, give up. */ + if (!ours) { + log_error("Unknown SIGBUS page, aborting."); + abort(); + } + } + + /* The list of triggered pages is now empty. Now, let's remap all windows of the triggered file to + * anonymous maps, so that no page of the file in question is triggered again, so that we can be sure + * not to hit the queue size limit. */ + if (_likely_(!found)) + return; + + HASHMAP_FOREACH(f, m->fds) { + if (!f->sigbus) + continue; + + LIST_FOREACH(windows, w, f->windows) + window_invalidate(w); + } +} + +bool mmap_cache_fd_got_sigbus(MMapFileDescriptor *f) { + assert(f); + + mmap_cache_process_sigbus(f->cache); + + return f->sigbus; +} + +int mmap_cache_add_fd(MMapCache *m, int fd, int prot, MMapFileDescriptor **ret) { + _cleanup_free_ MMapFileDescriptor *f = NULL; + MMapFileDescriptor *existing; + int r; + + assert(m); + assert(fd >= 0); + + existing = hashmap_get(m->fds, FD_TO_PTR(fd)); + if (existing) { + if (existing->prot != prot) + return -EEXIST; + if (ret) + *ret = existing; + return 0; + } + + f = new(MMapFileDescriptor, 1); + if (!f) + return -ENOMEM; + + *f = (MMapFileDescriptor) { + .fd = fd, + .prot = prot, + }; + + r = hashmap_ensure_put(&m->fds, NULL, FD_TO_PTR(fd), f); + if (r < 0) + return r; + assert(r > 0); + + f->cache = mmap_cache_ref(m); + + if (ret) + *ret = f; + + TAKE_PTR(f); + return 1; +} + +MMapFileDescriptor* mmap_cache_fd_free(MMapFileDescriptor *f) { + if (!f) + return NULL; + + /* Make sure that any queued SIGBUS are first dispatched, so that we don't end up with a SIGBUS entry + * we cannot relate to any existing memory map. */ + + mmap_cache_process_sigbus(f->cache); + + while (f->windows) + window_free(f->windows); + + assert_se(hashmap_remove(f->cache->fds, FD_TO_PTR(f->fd)) == f); + + /* Unref the cache at the end. Otherwise, the assertions in mmap_cache_free() may be triggered. */ + f->cache = mmap_cache_unref(f->cache); + + return mfree(f); +} + +MMapCache* mmap_cache_fd_cache(MMapFileDescriptor *f) { + assert(f); + return ASSERT_PTR(f->cache); +} diff --git a/src/libsystemd/sd-journal/mmap-cache.h b/src/libsystemd/sd-journal/mmap-cache.h new file mode 100644 index 0000000..1fbc236 --- /dev/null +++ b/src/libsystemd/sd-journal/mmap-cache.h @@ -0,0 +1,60 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include <errno.h> +#include <stdbool.h> +#include <sys/stat.h> + +#include "journal-def.h" + +typedef struct MMapCache MMapCache; +typedef struct MMapFileDescriptor MMapFileDescriptor; + +typedef enum MMapCacheCategory { + MMAP_CACHE_CATEGORY_ANY = OBJECT_UNUSED, + MMAP_CACHE_CATEGORY_DATA = OBJECT_DATA, + MMAP_CACHE_CATEGORY_FIELD = OBJECT_FIELD, + MMAP_CACHE_CATEGORY_ENTRY = OBJECT_ENTRY, + MMAP_CACHE_CATEGORY_DATA_HASH_TABLE = OBJECT_DATA_HASH_TABLE, + MMAP_CACHE_CATEGORY_FIELD_HASH_TABLE = OBJECT_FIELD_HASH_TABLE, + MMAP_CACHE_CATEGORY_ENTRY_ARRAY = OBJECT_ENTRY_ARRAY, + MMAP_CACHE_CATEGORY_TAG = OBJECT_TAG, + MMAP_CACHE_CATEGORY_HEADER, /* for reading file header */ + MMAP_CACHE_CATEGORY_PIN, /* for temporary pinning a object */ + _MMAP_CACHE_CATEGORY_MAX, + _MMAP_CACHE_CATEGORY_INVALID = -EINVAL, +} MMapCacheCategory; + +assert_cc((int) _OBJECT_TYPE_MAX < (int) _MMAP_CACHE_CATEGORY_MAX); + +static inline MMapCacheCategory type_to_category(ObjectType type) { + return type >= 0 && type < _OBJECT_TYPE_MAX ? (MMapCacheCategory) type : MMAP_CACHE_CATEGORY_ANY; +} + +MMapCache* mmap_cache_new(void); +MMapCache* mmap_cache_ref(MMapCache *m); +MMapCache* mmap_cache_unref(MMapCache *m); +DEFINE_TRIVIAL_CLEANUP_FUNC(MMapCache*, mmap_cache_unref); + +int mmap_cache_fd_get( + MMapFileDescriptor *f, + MMapCacheCategory c, + bool keep_always, + uint64_t offset, + size_t size, + struct stat *st, + void **ret); + +int mmap_cache_fd_pin( + MMapFileDescriptor *f, + MMapCacheCategory c, + void *addr, + size_t size); + +int mmap_cache_add_fd(MMapCache *m, int fd, int prot, MMapFileDescriptor **ret); +MMapCache* mmap_cache_fd_cache(MMapFileDescriptor *f); +MMapFileDescriptor* mmap_cache_fd_free(MMapFileDescriptor *f); + +void mmap_cache_stats_log_debug(MMapCache *m); + +bool mmap_cache_fd_got_sigbus(MMapFileDescriptor *f); diff --git a/src/libsystemd/sd-journal/sd-journal.c b/src/libsystemd/sd-journal/sd-journal.c new file mode 100644 index 0000000..6b9ff0a --- /dev/null +++ b/src/libsystemd/sd-journal/sd-journal.c @@ -0,0 +1,3528 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <errno.h> +#include <fcntl.h> +#include <inttypes.h> +#include <linux/magic.h> +#include <poll.h> +#include <stddef.h> +#include <sys/inotify.h> +#include <sys/vfs.h> +#include <unistd.h> + +#include "sd-journal.h" + +#include "alloc-util.h" +#include "catalog.h" +#include "compress.h" +#include "dirent-util.h" +#include "env-file.h" +#include "escape.h" +#include "fd-util.h" +#include "fileio.h" +#include "format-util.h" +#include "fs-util.h" +#include "hashmap.h" +#include "hostname-util.h" +#include "id128-util.h" +#include "inotify-util.h" +#include "io-util.h" +#include "journal-def.h" +#include "journal-file.h" +#include "journal-internal.h" +#include "list.h" +#include "lookup3.h" +#include "nulstr-util.h" +#include "origin-id.h" +#include "path-util.h" +#include "prioq.h" +#include "process-util.h" +#include "replace-var.h" +#include "stat-util.h" +#include "stdio-util.h" +#include "string-util.h" +#include "strv.h" +#include "syslog-util.h" +#include "uid-alloc-range.h" + +#define JOURNAL_FILES_RECHECK_USEC (2 * USEC_PER_SEC) + +/* The maximum size of variable values we'll expand in catalog entries. We bind this to PATH_MAX for now, as + * we want to be able to show all officially valid paths at least */ +#define REPLACE_VAR_MAX PATH_MAX + +#define DEFAULT_DATA_THRESHOLD (64*1024) + +DEFINE_PRIVATE_ORIGIN_ID_HELPERS(sd_journal, journal); + +static void remove_file_real(sd_journal *j, JournalFile *f); +static int journal_file_read_tail_timestamp(sd_journal *j, JournalFile *f); +static void journal_file_unlink_newest_by_boot_id(sd_journal *j, JournalFile *f); + +static int journal_put_error(sd_journal *j, int r, const char *path) { + _cleanup_free_ char *copy = NULL; + int k; + + /* Memorize an error we encountered, and store which + * file/directory it was generated from. Note that we store + * only *one* path per error code, as the error code is the + * key into the hashmap, and the path is the value. This means + * we keep track only of all error kinds, but not of all error + * locations. This has the benefit that the hashmap cannot + * grow beyond bounds. + * + * We return an error here only if we didn't manage to + * memorize the real error. */ + + if (r >= 0) + return r; + + if (path) { + copy = strdup(path); + if (!copy) + return -ENOMEM; + } + + k = hashmap_ensure_put(&j->errors, NULL, INT_TO_PTR(r), copy); + if (k < 0) { + if (k == -EEXIST) + return 0; + + return k; + } + + TAKE_PTR(copy); + return 0; +} + +static void detach_location(sd_journal *j) { + JournalFile *f; + + assert(j); + + j->current_file = NULL; + j->current_field = 0; + + ORDERED_HASHMAP_FOREACH(f, j->files) + journal_file_reset_location(f); +} + +static void init_location(Location *l, LocationType type, JournalFile *f, Object *o) { + assert(l); + assert(IN_SET(type, LOCATION_DISCRETE, LOCATION_SEEK)); + assert(f); + + *l = (Location) { + .type = type, + .seqnum = le64toh(o->entry.seqnum), + .seqnum_id = f->header->seqnum_id, + .realtime = le64toh(o->entry.realtime), + .monotonic = le64toh(o->entry.monotonic), + .boot_id = o->entry.boot_id, + .xor_hash = le64toh(o->entry.xor_hash), + .seqnum_set = true, + .realtime_set = true, + .monotonic_set = true, + .xor_hash_set = true, + }; +} + +static void set_location(sd_journal *j, JournalFile *f, Object *o) { + assert(j); + assert(f); + assert(o); + + init_location(&j->current_location, LOCATION_DISCRETE, f, o); + + j->current_file = f; + j->current_field = 0; + + /* Let f know its candidate entry was picked. */ + assert(f->location_type == LOCATION_SEEK); + f->location_type = LOCATION_DISCRETE; +} + +static int match_is_valid(const void *data, size_t size) { + const char *b = ASSERT_PTR(data); + + if (size < 2) + return false; + + if (((char*) data)[0] == '_' && ((char*) data)[1] == '_') + return false; + + for (const char *p = b; p < b + size; p++) { + + if (*p == '=') + return p > b; + + if (*p == '_') + continue; + + if (*p >= 'A' && *p <= 'Z') + continue; + + if (ascii_isdigit(*p)) + continue; + + return false; + } + + return false; +} + +static bool same_field(const void *_a, size_t s, const void *_b, size_t t) { + const uint8_t *a = _a, *b = _b; + + for (size_t j = 0; j < s && j < t; j++) { + + if (a[j] != b[j]) + return false; + + if (a[j] == '=') + return true; + } + + assert_not_reached(); +} + +static Match *match_new(Match *p, MatchType t) { + Match *m; + + m = new(Match, 1); + if (!m) + return NULL; + + *m = (Match) { + .type = t, + .parent = p, + }; + + if (p) + LIST_PREPEND(matches, p->matches, m); + + return m; +} + +static Match *match_free(Match *m) { + assert(m); + + while (m->matches) + match_free(m->matches); + + if (m->parent) + LIST_REMOVE(matches, m->parent->matches, m); + + free(m->data); + return mfree(m); +} + +static Match *match_free_if_empty(Match *m) { + if (!m || m->matches) + return m; + + return match_free(m); +} + +_public_ int sd_journal_add_match(sd_journal *j, const void *data, size_t size) { + Match *add_here = NULL, *m = NULL; + uint64_t hash; + + assert_return(j, -EINVAL); + assert_return(!journal_origin_changed(j), -ECHILD); + assert_return(data, -EINVAL); + + if (size == 0) + size = strlen(data); + + if (!match_is_valid(data, size)) + return -EINVAL; + + /* level 0: AND term + * level 1: OR terms + * level 2: AND terms + * level 3: OR terms + * level 4: concrete matches */ + + if (!j->level0) { + j->level0 = match_new(NULL, MATCH_AND_TERM); + if (!j->level0) + return -ENOMEM; + } + + if (!j->level1) { + j->level1 = match_new(j->level0, MATCH_OR_TERM); + if (!j->level1) + return -ENOMEM; + } + + if (!j->level2) { + j->level2 = match_new(j->level1, MATCH_AND_TERM); + if (!j->level2) + return -ENOMEM; + } + + assert(j->level0->type == MATCH_AND_TERM); + assert(j->level1->type == MATCH_OR_TERM); + assert(j->level2->type == MATCH_AND_TERM); + + /* Old-style Jenkins (unkeyed) hashing only here. We do not cover new-style siphash (keyed) hashing + * here, since it's different for each file, and thus can't be pre-calculated in the Match object. */ + hash = jenkins_hash64(data, size); + + LIST_FOREACH(matches, l3, j->level2->matches) { + assert(l3->type == MATCH_OR_TERM); + + LIST_FOREACH(matches, l4, l3->matches) { + assert(l4->type == MATCH_DISCRETE); + + /* Exactly the same match already? Then ignore + * this addition */ + if (l4->hash == hash && + l4->size == size && + memcmp(l4->data, data, size) == 0) + return 0; + + /* Same field? Then let's add this to this OR term */ + if (same_field(data, size, l4->data, l4->size)) { + add_here = l3; + break; + } + } + + if (add_here) + break; + } + + if (!add_here) { + add_here = match_new(j->level2, MATCH_OR_TERM); + if (!add_here) + goto fail; + } + + m = match_new(add_here, MATCH_DISCRETE); + if (!m) + goto fail; + + m->hash = hash; + m->size = size; + m->data = memdup(data, size); + if (!m->data) + goto fail; + + detach_location(j); + + return 0; + +fail: + match_free(m); + match_free_if_empty(add_here); + j->level2 = match_free_if_empty(j->level2); + j->level1 = match_free_if_empty(j->level1); + j->level0 = match_free_if_empty(j->level0); + + return -ENOMEM; +} + +_public_ int sd_journal_add_conjunction(sd_journal *j) { + assert_return(j, -EINVAL); + assert_return(!journal_origin_changed(j), -ECHILD); + + if (!j->level0) + return 0; + + if (!j->level1) + return 0; + + if (!j->level1->matches) + return 0; + + j->level1 = NULL; + j->level2 = NULL; + + return 0; +} + +_public_ int sd_journal_add_disjunction(sd_journal *j) { + assert_return(j, -EINVAL); + assert_return(!journal_origin_changed(j), -ECHILD); + + if (!j->level0) + return 0; + + if (!j->level1) + return 0; + + if (!j->level2) + return 0; + + if (!j->level2->matches) + return 0; + + j->level2 = NULL; + return 0; +} + +static char *match_make_string(Match *m) { + _cleanup_free_ char *p = NULL; + bool enclose = false; + + if (!m) + return strdup("none"); + + if (m->type == MATCH_DISCRETE) + return cescape_length(m->data, m->size); + + LIST_FOREACH(matches, i, m->matches) { + _cleanup_free_ char *t = NULL; + + t = match_make_string(i); + if (!t) + return NULL; + + if (p) { + if (!strextend(&p, m->type == MATCH_OR_TERM ? " OR " : " AND ", t)) + return NULL; + + enclose = true; + } else + p = TAKE_PTR(t); + } + + if (enclose) + return strjoin("(", p, ")"); + + return TAKE_PTR(p); +} + +char *journal_make_match_string(sd_journal *j) { + assert(j); + + return match_make_string(j->level0); +} + +_public_ void sd_journal_flush_matches(sd_journal *j) { + if (!j || journal_origin_changed(j)) + return; + + if (j->level0) + match_free(j->level0); + + j->level0 = j->level1 = j->level2 = NULL; + + detach_location(j); +} + +static int journal_file_find_newest_for_boot_id( + sd_journal *j, + sd_id128_t id, + JournalFile **ret) { + + JournalFile *prev = NULL; + int r; + + assert(j); + assert(ret); + + /* Before we use it, let's refresh the timestamp from the header, and reshuffle our prioq + * accordingly. We do this only a bunch of times, to not be caught in some update loop. */ + for (unsigned n_tries = 0;; n_tries++) { + JournalFile *f; + Prioq *q; + + q = hashmap_get(j->newest_by_boot_id, &id); + if (!q) + return log_debug_errno(SYNTHETIC_ERRNO(ENODATA), + "Requested delta for boot ID %s, but we have no information about that boot ID.", SD_ID128_TO_STRING(id)); + + assert_se(f = prioq_peek(q)); /* we delete hashmap entries once the prioq is empty, so this must hold */ + + if (f == prev || n_tries >= 5) { + /* This was already the best answer in the previous run, or we tried too often, use it */ + *ret = f; + return 0; + } + + prev = f; + + /* Let's read the journal file's current timestamp once, before we return it, maybe it has changed. */ + r = journal_file_read_tail_timestamp(j, f); + if (r < 0) + return log_debug_errno(r, "Failed to read tail timestamp while trying to find newest journal file for boot ID %s.", SD_ID128_TO_STRING(id)); + + /* Refreshing the timestamp we read might have reshuffled the prioq, hence let's check the + * prioq again and only use the information once we reached an equilibrium or hit a limit */ + } +} + +static int compare_boot_ids(sd_journal *j, sd_id128_t a, sd_id128_t b) { + JournalFile *x, *y; + + assert(j); + + /* Try to find the newest open journal file for the two boot ids */ + if (journal_file_find_newest_for_boot_id(j, a, &x) < 0 || + journal_file_find_newest_for_boot_id(j, b, &y) < 0) + return 0; + + /* Only compare the boot id timestamps if they originate from the same machine. If they are from + * different machines, then we timestamps of the boot ids might be as off as the timestamps on the + * entries and hence not useful for comparing. */ + if (!sd_id128_equal(x->newest_machine_id, y->newest_machine_id)) + return 0; + + return CMP(x->newest_realtime_usec, y->newest_realtime_usec); +} + +static int compare_with_location( + sd_journal *j, + const JournalFile *f, + const Location *l, + const JournalFile *current_file) { + int r; + + assert(j); + assert(f); + assert(l); + assert(f->location_type == LOCATION_SEEK); + assert(IN_SET(l->type, LOCATION_DISCRETE, LOCATION_SEEK)); + + if (l->monotonic_set && + sd_id128_equal(f->current_boot_id, l->boot_id) && + l->realtime_set && + f->current_realtime == l->realtime && + l->xor_hash_set && + f->current_xor_hash == l->xor_hash && + l->seqnum_set && + sd_id128_equal(f->header->seqnum_id, l->seqnum_id) && + f->current_seqnum == l->seqnum && + f != current_file) + return 0; + + if (l->seqnum_set && + sd_id128_equal(f->header->seqnum_id, l->seqnum_id)) { + r = CMP(f->current_seqnum, l->seqnum); + if (r != 0) + return r; + } + + if (l->monotonic_set) { + /* If both arguments have the same boot ID, then we can compare the monotonic timestamps. If + * they are distinct, then we might able to lookup the timestamps of those boot IDs (if they + * are from the same machine) and order by that. */ + if (sd_id128_equal(f->current_boot_id, l->boot_id)) + r = CMP(f->current_monotonic, l->monotonic); + else + r = compare_boot_ids(j, f->current_boot_id, l->boot_id); + if (r != 0) + return r; + } + + if (l->realtime_set) { + r = CMP(f->current_realtime, l->realtime); + if (r != 0) + return r; + } + + if (l->xor_hash_set) { + r = CMP(f->current_xor_hash, l->xor_hash); + if (r != 0) + return r; + } + + return 0; +} + +static int next_for_match( + sd_journal *j, + Match *m, + JournalFile *f, + uint64_t after_offset, + direction_t direction, + Object **ret, + uint64_t *offset) { + + int r; + uint64_t np = 0; + + assert(j); + assert(m); + assert(f); + + if (m->type == MATCH_DISCRETE) { + Object *d; + uint64_t hash; + + /* If the keyed hash logic is used, we need to calculate the hash fresh per file. Otherwise + * we can use what we pre-calculated. */ + if (JOURNAL_HEADER_KEYED_HASH(f->header)) + hash = journal_file_hash_data(f, m->data, m->size); + else + hash = m->hash; + + r = journal_file_find_data_object_with_hash(f, m->data, m->size, hash, &d, NULL); + if (r <= 0) + return r; + + return journal_file_move_to_entry_by_offset_for_data(f, d, after_offset, direction, ret, offset); + + } else if (m->type == MATCH_OR_TERM) { + + /* Find the earliest match beyond after_offset */ + + LIST_FOREACH(matches, i, m->matches) { + uint64_t cp; + + r = next_for_match(j, i, f, after_offset, direction, NULL, &cp); + if (r < 0) + return r; + else if (r > 0) { + if (np == 0 || (direction == DIRECTION_DOWN ? cp < np : cp > np)) + np = cp; + } + } + + if (np == 0) + return 0; + + } else if (m->type == MATCH_AND_TERM) { + Match *last_moved; + + /* Always jump to the next matching entry and repeat + * this until we find an offset that matches for all + * matches. */ + + if (!m->matches) + return 0; + + r = next_for_match(j, m->matches, f, after_offset, direction, NULL, &np); + if (r <= 0) + return r; + + assert(direction == DIRECTION_DOWN ? np >= after_offset : np <= after_offset); + last_moved = m->matches; + + LIST_LOOP_BUT_ONE(matches, i, m->matches, last_moved) { + uint64_t cp; + + r = next_for_match(j, i, f, np, direction, NULL, &cp); + if (r <= 0) + return r; + + assert(direction == DIRECTION_DOWN ? cp >= np : cp <= np); + if (direction == DIRECTION_DOWN ? cp > np : cp < np) { + np = cp; + last_moved = i; + } + } + } + + assert(np > 0); + + if (ret) { + r = journal_file_move_to_object(f, OBJECT_ENTRY, np, ret); + if (r < 0) + return r; + } + + if (offset) + *offset = np; + + return 1; +} + +static int find_location_for_match( + sd_journal *j, + Match *m, + JournalFile *f, + direction_t direction, + Object **ret, + uint64_t *offset) { + + int r; + + assert(j); + assert(m); + assert(f); + + if (m->type == MATCH_DISCRETE) { + Object *d; + uint64_t dp, hash; + + if (JOURNAL_HEADER_KEYED_HASH(f->header)) + hash = journal_file_hash_data(f, m->data, m->size); + else + hash = m->hash; + + r = journal_file_find_data_object_with_hash(f, m->data, m->size, hash, &d, &dp); + if (r <= 0) + return r; + + /* FIXME: missing: find by monotonic */ + + if (j->current_location.type == LOCATION_HEAD) + return direction == DIRECTION_DOWN ? journal_file_move_to_entry_for_data(f, d, DIRECTION_DOWN, ret, offset) : 0; + if (j->current_location.type == LOCATION_TAIL) + return direction == DIRECTION_UP ? journal_file_move_to_entry_for_data(f, d, DIRECTION_UP, ret, offset) : 0; + if (j->current_location.seqnum_set && sd_id128_equal(j->current_location.seqnum_id, f->header->seqnum_id)) + return journal_file_move_to_entry_by_seqnum_for_data(f, d, j->current_location.seqnum, direction, ret, offset); + if (j->current_location.monotonic_set) { + r = journal_file_move_to_entry_by_monotonic_for_data(f, d, j->current_location.boot_id, j->current_location.monotonic, direction, ret, offset); + if (r != 0) + return r; + + /* The data object might have been invalidated. */ + r = journal_file_move_to_object(f, OBJECT_DATA, dp, &d); + if (r < 0) + return r; + } + if (j->current_location.realtime_set) + return journal_file_move_to_entry_by_realtime_for_data(f, d, j->current_location.realtime, direction, ret, offset); + + return journal_file_move_to_entry_for_data(f, d, direction, ret, offset); + + } else if (m->type == MATCH_OR_TERM) { + uint64_t np = 0; + + /* Find the earliest match */ + + LIST_FOREACH(matches, i, m->matches) { + uint64_t cp; + + r = find_location_for_match(j, i, f, direction, NULL, &cp); + if (r < 0) + return r; + else if (r > 0) { + if (np == 0 || (direction == DIRECTION_DOWN ? np > cp : np < cp)) + np = cp; + } + } + + if (np == 0) + return 0; + + if (ret) { + r = journal_file_move_to_object(f, OBJECT_ENTRY, np, ret); + if (r < 0) + return r; + } + + if (offset) + *offset = np; + + return 1; + + } else { + uint64_t np = 0; + + assert(m->type == MATCH_AND_TERM); + + /* First jump to the last match, and then find the + * next one where all matches match */ + + if (!m->matches) + return 0; + + LIST_FOREACH(matches, i, m->matches) { + uint64_t cp; + + r = find_location_for_match(j, i, f, direction, NULL, &cp); + if (r <= 0) + return r; + + if (np == 0 || (direction == DIRECTION_DOWN ? cp > np : cp < np)) + np = cp; + } + + return next_for_match(j, m, f, np, direction, ret, offset); + } +} + +static int find_location_with_matches( + sd_journal *j, + JournalFile *f, + direction_t direction, + Object **ret, + uint64_t *offset) { + + int r; + + assert(j); + assert(f); + assert(ret); + assert(offset); + + if (!j->level0) { + /* No matches is simple */ + + if (j->current_location.type == LOCATION_HEAD) + return direction == DIRECTION_DOWN ? journal_file_next_entry(f, 0, DIRECTION_DOWN, ret, offset) : 0; + if (j->current_location.type == LOCATION_TAIL) + return direction == DIRECTION_UP ? journal_file_next_entry(f, 0, DIRECTION_UP, ret, offset) : 0; + if (j->current_location.seqnum_set && sd_id128_equal(j->current_location.seqnum_id, f->header->seqnum_id)) + return journal_file_move_to_entry_by_seqnum(f, j->current_location.seqnum, direction, ret, offset); + if (j->current_location.monotonic_set) { + r = journal_file_move_to_entry_by_monotonic(f, j->current_location.boot_id, j->current_location.monotonic, direction, ret, offset); + if (r != 0) + return r; + } + if (j->current_location.realtime_set) + return journal_file_move_to_entry_by_realtime(f, j->current_location.realtime, direction, ret, offset); + + return journal_file_next_entry(f, 0, direction, ret, offset); + } else + return find_location_for_match(j, j->level0, f, direction, ret, offset); +} + +static int next_with_matches( + sd_journal *j, + JournalFile *f, + direction_t direction, + Object **ret, + uint64_t *offset) { + + assert(j); + assert(f); + assert(ret); + assert(offset); + + /* No matches is easy. We simple advance the file + * pointer by one. */ + if (!j->level0) + return journal_file_next_entry(f, f->current_offset, direction, ret, offset); + + /* If we have a match then we look for the next matching entry + * with an offset at least one step larger */ + return next_for_match(j, j->level0, f, + direction == DIRECTION_DOWN ? f->current_offset + 1 + : f->current_offset - 1, + direction, ret, offset); +} + +static int next_beyond_location(sd_journal *j, JournalFile *f, direction_t direction) { + Object *c; + uint64_t cp, n_entries; + int r; + + assert(j); + assert(f); + + (void) journal_file_read_tail_timestamp(j, f); + + n_entries = le64toh(f->header->n_entries); + + /* If we hit EOF before, we don't need to look into this file again + * unless direction changed or new entries appeared. */ + if (f->last_direction == direction && + f->location_type == (direction == DIRECTION_DOWN ? LOCATION_TAIL : LOCATION_HEAD) && + n_entries == f->last_n_entries) + return 0; + + f->last_n_entries = n_entries; + + if (f->last_direction == direction && f->current_offset > 0) { + /* LOCATION_SEEK here means we did the work in a previous + * iteration and the current location already points to a + * candidate entry. */ + if (f->location_type != LOCATION_SEEK) { + r = next_with_matches(j, f, direction, &c, &cp); + if (r <= 0) + return r; + + journal_file_save_location(f, c, cp); + } + } else { + f->last_direction = direction; + + r = find_location_with_matches(j, f, direction, &c, &cp); + if (r <= 0) + return r; + + journal_file_save_location(f, c, cp); + } + + /* OK, we found the spot, now let's advance until an entry + * that is actually different from what we were previously + * looking at. This is necessary to handle entries which exist + * in two (or more) journal files, and which shall all be + * suppressed but one. */ + + for (;;) { + bool found; + + if (j->current_location.type == LOCATION_DISCRETE) { + int k; + + k = compare_with_location(j, f, &j->current_location, j->current_file); + + found = direction == DIRECTION_DOWN ? k > 0 : k < 0; + } else + found = true; + + if (found) + return 1; + + r = next_with_matches(j, f, direction, &c, &cp); + if (r <= 0) + return r; + + journal_file_save_location(f, c, cp); + } +} + +static int compare_locations(sd_journal *j, JournalFile *af, JournalFile *bf) { + int r; + + assert(j); + assert(af); + assert(af->header); + assert(bf); + assert(bf->header); + assert(af->location_type == LOCATION_SEEK); + assert(bf->location_type == LOCATION_SEEK); + + /* If contents, timestamps and seqnum match, these entries are identical. */ + if (sd_id128_equal(af->current_boot_id, bf->current_boot_id) && + af->current_monotonic == bf->current_monotonic && + af->current_realtime == bf->current_realtime && + af->current_xor_hash == bf->current_xor_hash && + sd_id128_equal(af->header->seqnum_id, bf->header->seqnum_id) && + af->current_seqnum == bf->current_seqnum) + return 0; + + if (sd_id128_equal(af->header->seqnum_id, bf->header->seqnum_id)) { + /* If this is from the same seqnum source, compare seqnums */ + r = CMP(af->current_seqnum, bf->current_seqnum); + if (r != 0) + return r; + + /* Wow! This is weird, different data but the same seqnums? Something is borked, but let's + * make the best of it and compare by time. */ + } + + if (sd_id128_equal(af->current_boot_id, bf->current_boot_id)) + /* If the boot id matches, compare monotonic time */ + r = CMP(af->current_monotonic, bf->current_monotonic); + else + /* If they don't match try to compare boot IDs */ + r = compare_boot_ids(j, af->current_boot_id, bf->current_boot_id); + if (r != 0) + return r; + + /* Otherwise, compare UTC time */ + r = CMP(af->current_realtime, bf->current_realtime); + if (r != 0) + return r; + + /* Finally, compare by contents */ + return CMP(af->current_xor_hash, bf->current_xor_hash); +} + +static int real_journal_next(sd_journal *j, direction_t direction) { + JournalFile *new_file = NULL; + unsigned n_files; + const void **files; + Object *o; + int r; + + assert_return(j, -EINVAL); + assert_return(!journal_origin_changed(j), -ECHILD); + + r = iterated_cache_get(j->files_cache, NULL, &files, &n_files); + if (r < 0) + return r; + + for (unsigned i = 0; i < n_files; i++) { + JournalFile *f = (JournalFile *)files[i]; + bool found; + + r = next_beyond_location(j, f, direction); + if (r < 0) { + log_debug_errno(r, "Can't iterate through %s, ignoring: %m", f->path); + remove_file_real(j, f); + continue; + } else if (r == 0) { + f->location_type = direction == DIRECTION_DOWN ? LOCATION_TAIL : LOCATION_HEAD; + continue; + } + + if (!new_file) + found = true; + else { + int k; + + k = compare_locations(j, f, new_file); + + found = direction == DIRECTION_DOWN ? k < 0 : k > 0; + } + + if (found) + new_file = f; + } + + if (!new_file) + return 0; + + r = journal_file_move_to_object(new_file, OBJECT_ENTRY, new_file->current_offset, &o); + if (r < 0) + return r; + + set_location(j, new_file, o); + + return 1; +} + +_public_ int sd_journal_next(sd_journal *j) { + return real_journal_next(j, DIRECTION_DOWN); +} + +_public_ int sd_journal_previous(sd_journal *j) { + return real_journal_next(j, DIRECTION_UP); +} + +_public_ int sd_journal_step_one(sd_journal *j, int advanced) { + assert_return(j, -EINVAL); + + if (j->current_location.type == LOCATION_HEAD) + return sd_journal_next(j); + if (j->current_location.type == LOCATION_TAIL) + return sd_journal_previous(j); + return real_journal_next(j, advanced ? DIRECTION_DOWN : DIRECTION_UP); +} + +static int real_journal_next_skip(sd_journal *j, direction_t direction, uint64_t skip) { + int c = 0, r; + + assert_return(j, -EINVAL); + assert_return(!journal_origin_changed(j), -ECHILD); + assert_return(skip <= INT_MAX, -ERANGE); + + if (skip == 0) { + /* If this is not a discrete skip, then at least + * resolve the current location */ + if (j->current_location.type != LOCATION_DISCRETE) { + r = real_journal_next(j, direction); + if (r < 0) + return r; + } + + return 0; + } + + do { + r = real_journal_next(j, direction); + if (r < 0) + return r; + + if (r == 0) + return c; + + skip--; + c++; + } while (skip > 0); + + return c; +} + +_public_ int sd_journal_next_skip(sd_journal *j, uint64_t skip) { + return real_journal_next_skip(j, DIRECTION_DOWN, skip); +} + +_public_ int sd_journal_previous_skip(sd_journal *j, uint64_t skip) { + return real_journal_next_skip(j, DIRECTION_UP, skip); +} + +_public_ int sd_journal_get_cursor(sd_journal *j, char **cursor) { + Object *o; + int r; + + assert_return(j, -EINVAL); + assert_return(!journal_origin_changed(j), -ECHILD); + assert_return(cursor, -EINVAL); + + if (!j->current_file || j->current_file->current_offset <= 0) + return -EADDRNOTAVAIL; + + r = journal_file_move_to_object(j->current_file, OBJECT_ENTRY, j->current_file->current_offset, &o); + if (r < 0) + return r; + + if (asprintf(cursor, + "s=%s;i=%"PRIx64";b=%s;m=%"PRIx64";t=%"PRIx64";x=%"PRIx64, + SD_ID128_TO_STRING(j->current_file->header->seqnum_id), le64toh(o->entry.seqnum), + SD_ID128_TO_STRING(o->entry.boot_id), le64toh(o->entry.monotonic), + le64toh(o->entry.realtime), + le64toh(o->entry.xor_hash)) < 0) + return -ENOMEM; + + return 0; +} + +_public_ int sd_journal_seek_cursor(sd_journal *j, const char *cursor) { + unsigned long long seqnum, monotonic, realtime, xor_hash; + bool seqnum_id_set = false, + seqnum_set = false, + boot_id_set = false, + monotonic_set = false, + realtime_set = false, + xor_hash_set = false; + sd_id128_t seqnum_id, boot_id; + int r; + + assert_return(j, -EINVAL); + assert_return(!journal_origin_changed(j), -ECHILD); + assert_return(!isempty(cursor), -EINVAL); + + for (const char *p = cursor;;) { + _cleanup_free_ char *word = NULL; + + r = extract_first_word(&p, &word, ";", EXTRACT_DONT_COALESCE_SEPARATORS); + if (r < 0) + return r; + if (r == 0) + break; + + if (word[0] == '\0' || word[1] != '=') + return -EINVAL; + + switch (word[0]) { + case 's': + seqnum_id_set = true; + r = sd_id128_from_string(word + 2, &seqnum_id); + if (r < 0) + return r; + break; + + case 'i': + seqnum_set = true; + if (sscanf(word + 2, "%llx", &seqnum) != 1) + return -EINVAL; + break; + + case 'b': + boot_id_set = true; + r = sd_id128_from_string(word + 2, &boot_id); + if (r < 0) + return r; + break; + + case 'm': + monotonic_set = true; + if (sscanf(word + 2, "%llx", &monotonic) != 1) + return -EINVAL; + break; + + case 't': + realtime_set = true; + if (sscanf(word + 2, "%llx", &realtime) != 1) + return -EINVAL; + break; + + case 'x': + xor_hash_set = true; + if (sscanf(word + 2, "%llx", &xor_hash) != 1) + return -EINVAL; + break; + } + } + + if ((!seqnum_set || !seqnum_id_set) && + (!monotonic_set || !boot_id_set) && + !realtime_set) + return -EINVAL; + + detach_location(j); + j->current_location = (Location) { + .type = LOCATION_SEEK, + }; + + if (realtime_set) { + j->current_location.realtime = (uint64_t) realtime; + j->current_location.realtime_set = true; + } + + if (seqnum_set && seqnum_id_set) { + j->current_location.seqnum = (uint64_t) seqnum; + j->current_location.seqnum_id = seqnum_id; + j->current_location.seqnum_set = true; + } + + if (monotonic_set && boot_id_set) { + j->current_location.monotonic = (uint64_t) monotonic; + j->current_location.boot_id = boot_id; + j->current_location.monotonic_set = true; + } + + if (xor_hash_set) { + j->current_location.xor_hash = (uint64_t) xor_hash; + j->current_location.xor_hash_set = true; + } + + return 0; +} + +_public_ int sd_journal_test_cursor(sd_journal *j, const char *cursor) { + int r; + Object *o; + + assert_return(j, -EINVAL); + assert_return(!journal_origin_changed(j), -ECHILD); + assert_return(!isempty(cursor), -EINVAL); + + if (!j->current_file || j->current_file->current_offset <= 0) + return -EADDRNOTAVAIL; + + r = journal_file_move_to_object(j->current_file, OBJECT_ENTRY, j->current_file->current_offset, &o); + if (r < 0) + return r; + + for (;;) { + _cleanup_free_ char *item = NULL; + unsigned long long ll; + sd_id128_t id; + int k = 0; + + r = extract_first_word(&cursor, &item, ";", EXTRACT_DONT_COALESCE_SEPARATORS); + if (r < 0) + return r; + + if (r == 0) + break; + + if (strlen(item) < 2 || item[1] != '=') + return -EINVAL; + + switch (item[0]) { + + case 's': + k = sd_id128_from_string(item+2, &id); + if (k < 0) + return k; + if (!sd_id128_equal(id, j->current_file->header->seqnum_id)) + return 0; + break; + + case 'i': + if (sscanf(item+2, "%llx", &ll) != 1) + return -EINVAL; + if (ll != le64toh(o->entry.seqnum)) + return 0; + break; + + case 'b': + k = sd_id128_from_string(item+2, &id); + if (k < 0) + return k; + if (!sd_id128_equal(id, o->entry.boot_id)) + return 0; + break; + + case 'm': + if (sscanf(item+2, "%llx", &ll) != 1) + return -EINVAL; + if (ll != le64toh(o->entry.monotonic)) + return 0; + break; + + case 't': + if (sscanf(item+2, "%llx", &ll) != 1) + return -EINVAL; + if (ll != le64toh(o->entry.realtime)) + return 0; + break; + + case 'x': + if (sscanf(item+2, "%llx", &ll) != 1) + return -EINVAL; + if (ll != le64toh(o->entry.xor_hash)) + return 0; + break; + } + } + + return 1; +} + +_public_ int sd_journal_seek_monotonic_usec(sd_journal *j, sd_id128_t boot_id, uint64_t usec) { + assert_return(j, -EINVAL); + assert_return(!journal_origin_changed(j), -ECHILD); + + detach_location(j); + + j->current_location = (Location) { + .type = LOCATION_SEEK, + .boot_id = boot_id, + .monotonic = usec, + .monotonic_set = true, + }; + + return 0; +} + +_public_ int sd_journal_seek_realtime_usec(sd_journal *j, uint64_t usec) { + assert_return(j, -EINVAL); + assert_return(!journal_origin_changed(j), -ECHILD); + + detach_location(j); + + j->current_location = (Location) { + .type = LOCATION_SEEK, + .realtime = usec, + .realtime_set = true, + }; + + return 0; +} + +_public_ int sd_journal_seek_head(sd_journal *j) { + assert_return(j, -EINVAL); + assert_return(!journal_origin_changed(j), -ECHILD); + + detach_location(j); + + j->current_location = (Location) { + .type = LOCATION_HEAD, + }; + + return 0; +} + +_public_ int sd_journal_seek_tail(sd_journal *j) { + assert_return(j, -EINVAL); + assert_return(!journal_origin_changed(j), -ECHILD); + + detach_location(j); + + j->current_location = (Location) { + .type = LOCATION_TAIL, + }; + + return 0; +} + +static void check_network(sd_journal *j, int fd) { + assert(j); + + if (j->on_network) + return; + + j->on_network = fd_is_network_fs(fd); +} + +static bool file_has_type_prefix(const char *prefix, const char *filename) { + const char *full, *tilded, *atted; + + full = strjoina(prefix, ".journal"); + tilded = strjoina(full, "~"); + atted = strjoina(prefix, "@"); + + return STR_IN_SET(filename, full, tilded) || + startswith(filename, atted); +} + +static bool file_type_wanted(int flags, const char *filename) { + assert(filename); + + if (!ENDSWITH_SET(filename, ".journal", ".journal~")) + return false; + + /* no flags set → every type is OK */ + if (!(flags & (SD_JOURNAL_SYSTEM | SD_JOURNAL_CURRENT_USER))) + return true; + + if (FLAGS_SET(flags, SD_JOURNAL_CURRENT_USER)) { + char prefix[5 + DECIMAL_STR_MAX(uid_t) + 1]; + + xsprintf(prefix, "user-" UID_FMT, getuid()); + + if (file_has_type_prefix(prefix, filename)) + return true; + + /* If SD_JOURNAL_CURRENT_USER is specified and we are invoked under a system UID, then + * automatically enable SD_JOURNAL_SYSTEM too, because journald will actually put system user + * data into the system journal. */ + + if (uid_for_system_journal(getuid())) + flags |= SD_JOURNAL_SYSTEM; + } + + if (FLAGS_SET(flags, SD_JOURNAL_SYSTEM) && file_has_type_prefix("system", filename)) + return true; + + return false; +} + +static bool path_has_prefix(sd_journal *j, const char *path, const char *prefix) { + assert(j); + assert(path); + assert(prefix); + + if (j->toplevel_fd >= 0) + return false; + + return path_startswith(path, prefix); +} + +static void track_file_disposition(sd_journal *j, JournalFile *f) { + assert(j); + assert(f); + + if (!j->has_runtime_files && path_has_prefix(j, f->path, "/run")) + j->has_runtime_files = true; + else if (!j->has_persistent_files && path_has_prefix(j, f->path, "/var")) + j->has_persistent_files = true; +} + +static const char *skip_slash(const char *p) { + + if (!p) + return NULL; + + while (*p == '/') + p++; + + return p; +} + +static int add_any_file( + sd_journal *j, + int fd, + const char *path) { + + _cleanup_close_ int our_fd = -EBADF; + JournalFile *f; + struct stat st; + int r; + + assert(j); + assert(fd >= 0 || path); + + if (fd < 0) { + assert(path); /* For gcc. */ + if (j->toplevel_fd >= 0) + /* If there's a top-level fd defined make the path relative, explicitly, since otherwise + * openat() ignores the first argument. */ + + fd = our_fd = openat(j->toplevel_fd, skip_slash(path), O_RDONLY|O_CLOEXEC|O_NONBLOCK); + else + fd = our_fd = open(path, O_RDONLY|O_CLOEXEC|O_NONBLOCK); + if (fd < 0) { + r = log_debug_errno(errno, "Failed to open journal file %s: %m", path); + goto error; + } + + r = fd_nonblock(fd, false); + if (r < 0) { + r = log_debug_errno(errno, "Failed to turn off O_NONBLOCK for %s: %m", path); + goto error; + } + } + + if (fstat(fd, &st) < 0) { + r = log_debug_errno(errno, "Failed to fstat %s: %m", path ?: "fd"); + goto error; + } + + r = stat_verify_regular(&st); + if (r < 0) { + log_debug_errno(r, "Refusing to open %s: %m", path ?: "fd"); + goto error; + } + + if (path) { + f = ordered_hashmap_get(j->files, path); + if (f) { + if (stat_inode_same(&f->last_stat, &st)) { + /* We already track this file, under the same path and with the same + * device/inode numbers, it's hence really the same. Mark this file as seen + * in this generation. This is used to GC old files in process_q_overflow() + * to detect journal files that are still there and discern them from those + * which are gone. */ + + f->last_seen_generation = j->generation; + (void) journal_file_read_tail_timestamp(j, f); + return 0; + } + + /* So we tracked a file under this name, but it has a different inode/device. In that + * case, it got replaced (probably due to rotation?), let's drop it hence from our + * list. */ + remove_file_real(j, f); + f = NULL; + } + } + + if (ordered_hashmap_size(j->files) >= JOURNAL_FILES_MAX) { + r = log_debug_errno(SYNTHETIC_ERRNO(ETOOMANYREFS), + "Too many open journal files, not adding %s.", path ?: "fd"); + goto error; + } + + r = journal_file_open(fd, path, O_RDONLY, 0, 0, 0, NULL, j->mmap, NULL, &f); + if (r < 0) { + log_debug_errno(r, "Failed to open journal file %s: %m", path ?: "from fd"); + goto error; + } + + /* journal_file_dump(f); */ + + /* journal_file_open() generates an replacement fname if necessary, so we can use f->path. */ + r = ordered_hashmap_put(j->files, f->path, f); + if (r < 0) { + f->close_fd = false; /* Make sure journal_file_close() doesn't close the caller's fd + * (or our own). The caller or we will do that ourselves. */ + (void) journal_file_close(f); + goto error; + } + + TAKE_FD(our_fd); /* the fd is now owned by the JournalFile object */ + + f->last_seen_generation = j->generation; + + track_file_disposition(j, f); + check_network(j, f->fd); + (void) journal_file_read_tail_timestamp(j, f); + + j->current_invalidate_counter++; + + log_debug("File %s added.", f->path); + + return 0; + +error: + (void) journal_put_error(j, r, path); /* path==NULL is OK. */ + return r; +} + +static int add_file_by_name( + sd_journal *j, + const char *prefix, + const char *filename) { + + _cleanup_free_ char *path = NULL; + + assert(j); + assert(prefix); + assert(filename); + + if (j->no_new_files) + return 0; + + if (!file_type_wanted(j->flags, filename)) + return 0; + + path = path_join(prefix, filename); + if (!path) + return -ENOMEM; + + return add_any_file(j, -1, path); +} + +static int remove_file_by_name( + sd_journal *j, + const char *prefix, + const char *filename) { + + _cleanup_free_ char *path = NULL; + JournalFile *f; + + assert(j); + assert(prefix); + assert(filename); + + path = path_join(prefix, filename); + if (!path) + return -ENOMEM; + + f = ordered_hashmap_get(j->files, path); + if (!f) + return 0; + + remove_file_real(j, f); + return 1; +} + +static void remove_file_real(sd_journal *j, JournalFile *f) { + assert(j); + assert(f); + + (void) ordered_hashmap_remove(j->files, f->path); + + log_debug("File %s removed.", f->path); + + if (j->current_file == f) { + j->current_file = NULL; + j->current_field = 0; + } + + if (j->unique_file == f) { + /* Jump to the next unique_file or NULL if that one was last */ + j->unique_file = ordered_hashmap_next(j->files, j->unique_file->path); + j->unique_offset = 0; + if (!j->unique_file) + j->unique_file_lost = true; + } + + if (j->fields_file == f) { + j->fields_file = ordered_hashmap_next(j->files, j->fields_file->path); + j->fields_offset = 0; + if (!j->fields_file) + j->fields_file_lost = true; + } + + journal_file_unlink_newest_by_boot_id(j, f); + (void) journal_file_close(f); + + j->current_invalidate_counter++; +} + +static int dirname_is_machine_id(const char *fn) { + sd_id128_t id, machine; + const char *e; + int r; + + /* Returns true if the specified directory name matches the local machine ID */ + + r = sd_id128_get_machine(&machine); + if (r < 0) + return r; + + e = strchr(fn, '.'); + if (e) { + const char *k; + + /* Looks like it has a namespace suffix. Verify that. */ + if (!log_namespace_name_valid(e + 1)) + return false; + + k = strndupa_safe(fn, e - fn); + r = sd_id128_from_string(k, &id); + } else + r = sd_id128_from_string(fn, &id); + if (r < 0) + return r; + + return sd_id128_equal(id, machine); +} + +static int dirname_has_namespace(const char *fn, const char *namespace) { + const char *e; + + /* Returns true if the specified directory name matches the specified namespace */ + + e = strchr(fn, '.'); + if (e) { + const char *k; + + if (!namespace) + return false; + + if (!streq(e + 1, namespace)) + return false; + + k = strndupa_safe(fn, e - fn); + return id128_is_valid(k); + } + + if (namespace) + return false; + + return id128_is_valid(fn); +} + +static bool dirent_is_journal_file(const struct dirent *de) { + assert(de); + + /* Returns true if the specified directory entry looks like a journal file we might be interested in */ + + if (!IN_SET(de->d_type, DT_REG, DT_LNK, DT_UNKNOWN)) + return false; + + return endswith(de->d_name, ".journal") || + endswith(de->d_name, ".journal~"); +} + +static bool dirent_is_journal_subdir(const struct dirent *de) { + const char *e, *n; + assert(de); + + /* returns true if the specified directory entry looks like a directory that might contain journal + * files we might be interested in, i.e. is either a 128-bit ID or a 128-bit ID suffixed by a + * namespace. */ + + if (!IN_SET(de->d_type, DT_DIR, DT_LNK, DT_UNKNOWN)) + return false; + + e = strchr(de->d_name, '.'); + if (!e) + return id128_is_valid(de->d_name); /* No namespace */ + + n = strndupa_safe(de->d_name, e - de->d_name); + if (!id128_is_valid(n)) + return false; + + return log_namespace_name_valid(e + 1); +} + +static int directory_open(sd_journal *j, const char *path, DIR **ret) { + DIR *d; + + assert(j); + assert(path); + assert(ret); + + if (j->toplevel_fd < 0) + d = opendir(path); + else + /* Open the specified directory relative to the toplevel fd. Enforce that the path specified is + * relative, by dropping the initial slash */ + d = xopendirat(j->toplevel_fd, skip_slash(path), 0); + if (!d) + return -errno; + + *ret = d; + return 0; +} + +static int add_directory(sd_journal *j, const char *prefix, const char *dirname); + +static void directory_enumerate(sd_journal *j, Directory *m, DIR *d) { + assert(j); + assert(m); + assert(d); + + FOREACH_DIRENT_ALL(de, d, goto fail) { + if (dirent_is_journal_file(de)) + (void) add_file_by_name(j, m->path, de->d_name); + + if (m->is_root && dirent_is_journal_subdir(de)) + (void) add_directory(j, m->path, de->d_name); + } + + return; +fail: + log_debug_errno(errno, "Failed to enumerate directory %s, ignoring: %m", m->path); +} + +static void directory_watch(sd_journal *j, Directory *m, int fd, uint32_t mask) { + int r; + + assert(j); + assert(m); + assert(fd >= 0); + + /* Watch this directory if that's enabled and if it not being watched yet. */ + + if (m->wd > 0) /* Already have a watch? */ + return; + if (j->inotify_fd < 0) /* Not watching at all? */ + return; + + m->wd = inotify_add_watch_fd(j->inotify_fd, fd, mask); + if (m->wd < 0) { + log_debug_errno(errno, "Failed to watch journal directory '%s', ignoring: %m", m->path); + return; + } + + r = hashmap_put(j->directories_by_wd, INT_TO_PTR(m->wd), m); + if (r == -EEXIST) + log_debug_errno(r, "Directory '%s' already being watched under a different path, ignoring: %m", m->path); + if (r < 0) { + log_debug_errno(r, "Failed to add watch for journal directory '%s' to hashmap, ignoring: %m", m->path); + (void) inotify_rm_watch(j->inotify_fd, m->wd); + m->wd = -1; + } +} + +static int add_directory( + sd_journal *j, + const char *prefix, + const char *dirname) { + + _cleanup_free_ char *path = NULL; + _cleanup_closedir_ DIR *d = NULL; + Directory *m; + int r, k; + + assert(j); + assert(prefix); + + /* Adds a journal file directory to watch. If the directory is already tracked this updates the inotify watch + * and reenumerates directory contents */ + + path = path_join(prefix, dirname); + if (!path) { + r = -ENOMEM; + goto fail; + } + + log_debug("Considering directory '%s'.", path); + + /* We consider everything local that is in a directory for the local machine ID, or that is stored in /run */ + if ((j->flags & SD_JOURNAL_LOCAL_ONLY) && + !((dirname && dirname_is_machine_id(dirname) > 0) || path_has_prefix(j, path, "/run"))) + return 0; + + if (dirname && + (!(FLAGS_SET(j->flags, SD_JOURNAL_ALL_NAMESPACES) || + dirname_has_namespace(dirname, j->namespace) > 0 || + (FLAGS_SET(j->flags, SD_JOURNAL_INCLUDE_DEFAULT_NAMESPACE) && dirname_has_namespace(dirname, NULL) > 0)))) + return 0; + + r = directory_open(j, path, &d); + if (r < 0) { + log_debug_errno(r, "Failed to open directory '%s': %m", path); + goto fail; + } + + m = hashmap_get(j->directories_by_path, path); + if (!m) { + m = new(Directory, 1); + if (!m) { + r = -ENOMEM; + goto fail; + } + + *m = (Directory) { + .is_root = false, + .path = path, + }; + + if (hashmap_put(j->directories_by_path, m->path, m) < 0) { + free(m); + r = -ENOMEM; + goto fail; + } + + path = NULL; /* avoid freeing in cleanup */ + j->current_invalidate_counter++; + + log_debug("Directory %s added.", m->path); + + } else if (m->is_root) + return 0; /* Don't 'downgrade' from root directory */ + + m->last_seen_generation = j->generation; + + directory_watch(j, m, dirfd(d), + IN_CREATE|IN_MOVED_TO|IN_MODIFY|IN_ATTRIB|IN_DELETE| + IN_DELETE_SELF|IN_MOVE_SELF|IN_UNMOUNT|IN_MOVED_FROM| + IN_ONLYDIR); + + if (!j->no_new_files) + directory_enumerate(j, m, d); + + check_network(j, dirfd(d)); + + return 0; + +fail: + k = journal_put_error(j, r, path ?: prefix); + if (k < 0) + return k; + + return r; +} + +static int add_root_directory(sd_journal *j, const char *p, bool missing_ok) { + + _cleanup_closedir_ DIR *d = NULL; + Directory *m; + int r, k; + + assert(j); + + /* Adds a root directory to our set of directories to use. If the root directory is already in the set, we + * update the inotify logic, and renumerate the directory entries. This call may hence be called to initially + * populate the set, as well as to update it later. */ + + if (p) { + /* If there's a path specified, use it. */ + + log_debug("Considering root directory '%s'.", p); + + if ((j->flags & SD_JOURNAL_RUNTIME_ONLY) && + !path_has_prefix(j, p, "/run")) + return -EINVAL; + + if (j->prefix) + p = strjoina(j->prefix, p); + + r = directory_open(j, p, &d); + if (r == -ENOENT && missing_ok) + return 0; + if (r < 0) { + log_debug_errno(r, "Failed to open root directory %s: %m", p); + goto fail; + } + } else { + _cleanup_close_ int dfd = -EBADF; + + /* If there's no path specified, then we use the top-level fd itself. We duplicate the fd here, since + * opendir() will take possession of the fd, and close it, which we don't want. */ + + p = "."; /* store this as "." in the directories hashmap */ + + dfd = fcntl(j->toplevel_fd, F_DUPFD_CLOEXEC, 3); + if (dfd < 0) { + r = -errno; + goto fail; + } + + d = take_fdopendir(&dfd); + if (!d) { + r = -errno; + goto fail; + } + + rewinddir(d); + } + + m = hashmap_get(j->directories_by_path, p); + if (!m) { + m = new0(Directory, 1); + if (!m) { + r = -ENOMEM; + goto fail; + } + + m->is_root = true; + + m->path = strdup(p); + if (!m->path) { + free(m); + r = -ENOMEM; + goto fail; + } + + if (hashmap_put(j->directories_by_path, m->path, m) < 0) { + free(m->path); + free(m); + r = -ENOMEM; + goto fail; + } + + j->current_invalidate_counter++; + + log_debug("Root directory %s added.", m->path); + + } else if (!m->is_root) + return 0; + + directory_watch(j, m, dirfd(d), + IN_CREATE|IN_MOVED_TO|IN_MODIFY|IN_ATTRIB|IN_DELETE| + IN_ONLYDIR); + + if (!j->no_new_files) + directory_enumerate(j, m, d); + + check_network(j, dirfd(d)); + + return 0; + +fail: + k = journal_put_error(j, r, p); + if (k < 0) + return k; + + return r; +} + +static void remove_directory(sd_journal *j, Directory *d) { + assert(j); + + if (d->wd > 0) { + hashmap_remove(j->directories_by_wd, INT_TO_PTR(d->wd)); + + if (j->inotify_fd >= 0) + (void) inotify_rm_watch(j->inotify_fd, d->wd); + } + + hashmap_remove(j->directories_by_path, d->path); + + if (d->is_root) + log_debug("Root directory %s removed.", d->path); + else + log_debug("Directory %s removed.", d->path); + + free(d->path); + free(d); +} + +static int add_search_paths(sd_journal *j) { + + static const char search_paths[] = + "/run/log/journal\0" + "/var/log/journal\0"; + + assert(j); + + /* We ignore most errors here, since the idea is to only open + * what's actually accessible, and ignore the rest. */ + + NULSTR_FOREACH(p, search_paths) + (void) add_root_directory(j, p, true); + + if (!(j->flags & SD_JOURNAL_LOCAL_ONLY)) + (void) add_root_directory(j, "/var/log/journal/remote", true); + + return 0; +} + +static int add_current_paths(sd_journal *j) { + JournalFile *f; + + assert(j); + assert(j->no_new_files); + + /* Simply adds all directories for files we have open as directories. We don't expect errors here, so we + * treat them as fatal. */ + + ORDERED_HASHMAP_FOREACH(f, j->files) { + _cleanup_free_ char *dir = NULL; + int r; + + r = path_extract_directory(f->path, &dir); + if (r < 0) + return r; + + r = add_directory(j, dir, NULL); + if (r < 0) + return r; + } + + return 0; +} + +static int allocate_inotify(sd_journal *j) { + assert(j); + + if (j->inotify_fd < 0) { + j->inotify_fd = inotify_init1(IN_NONBLOCK|IN_CLOEXEC); + if (j->inotify_fd < 0) + return -errno; + } + + return hashmap_ensure_allocated(&j->directories_by_wd, NULL); +} + +static sd_journal *journal_new(int flags, const char *path, const char *namespace) { + _cleanup_(sd_journal_closep) sd_journal *j = NULL; + + j = new(sd_journal, 1); + if (!j) + return NULL; + + *j = (sd_journal) { + .origin_id = origin_id_query(), + .toplevel_fd = -EBADF, + .inotify_fd = -EBADF, + .flags = flags, + .data_threshold = DEFAULT_DATA_THRESHOLD, + }; + + if (path) { + char *t; + + t = strdup(path); + if (!t) + return NULL; + + if (flags & SD_JOURNAL_OS_ROOT) + j->prefix = t; + else + j->path = t; + } + + if (namespace) { + j->namespace = strdup(namespace); + if (!j->namespace) + return NULL; + } + + j->files = ordered_hashmap_new(&path_hash_ops); + if (!j->files) + return NULL; + + j->files_cache = ordered_hashmap_iterated_cache_new(j->files); + j->directories_by_path = hashmap_new(&path_hash_ops); + j->mmap = mmap_cache_new(); + if (!j->files_cache || !j->directories_by_path || !j->mmap) + return NULL; + + return TAKE_PTR(j); +} + +#define OPEN_ALLOWED_FLAGS \ + (SD_JOURNAL_LOCAL_ONLY | \ + SD_JOURNAL_RUNTIME_ONLY | \ + SD_JOURNAL_SYSTEM | \ + SD_JOURNAL_CURRENT_USER | \ + SD_JOURNAL_ALL_NAMESPACES | \ + SD_JOURNAL_INCLUDE_DEFAULT_NAMESPACE) + +_public_ int sd_journal_open_namespace(sd_journal **ret, const char *namespace, int flags) { + _cleanup_(sd_journal_closep) sd_journal *j = NULL; + int r; + + assert_return(ret, -EINVAL); + assert_return((flags & ~OPEN_ALLOWED_FLAGS) == 0, -EINVAL); + + j = journal_new(flags, NULL, namespace); + if (!j) + return -ENOMEM; + + r = add_search_paths(j); + if (r < 0) + return r; + + *ret = TAKE_PTR(j); + return 0; +} + +_public_ int sd_journal_open(sd_journal **ret, int flags) { + return sd_journal_open_namespace(ret, NULL, flags); +} + +#define OPEN_CONTAINER_ALLOWED_FLAGS \ + (SD_JOURNAL_LOCAL_ONLY | SD_JOURNAL_SYSTEM) + +_public_ int sd_journal_open_container(sd_journal **ret, const char *machine, int flags) { + _cleanup_free_ char *root = NULL, *class = NULL; + _cleanup_(sd_journal_closep) sd_journal *j = NULL; + char *p; + int r; + + /* This is deprecated, people should use machined's OpenMachineRootDirectory() call instead in + * combination with sd_journal_open_directory_fd(). */ + + assert_return(machine, -EINVAL); + assert_return(ret, -EINVAL); + assert_return((flags & ~OPEN_CONTAINER_ALLOWED_FLAGS) == 0, -EINVAL); + assert_return(hostname_is_valid(machine, 0), -EINVAL); + + p = strjoina("/run/systemd/machines/", machine); + r = parse_env_file(NULL, p, + "ROOT", &root, + "CLASS", &class); + if (r == -ENOENT) + return -EHOSTDOWN; + if (r < 0) + return r; + if (!root) + return -ENODATA; + + if (!streq_ptr(class, "container")) + return -EIO; + + j = journal_new(flags, root, NULL); + if (!j) + return -ENOMEM; + + r = add_search_paths(j); + if (r < 0) + return r; + + *ret = TAKE_PTR(j); + return 0; +} + +#define OPEN_DIRECTORY_ALLOWED_FLAGS \ + (SD_JOURNAL_OS_ROOT | \ + SD_JOURNAL_SYSTEM | SD_JOURNAL_CURRENT_USER ) + +_public_ int sd_journal_open_directory(sd_journal **ret, const char *path, int flags) { + _cleanup_(sd_journal_closep) sd_journal *j = NULL; + int r; + + assert_return(ret, -EINVAL); + assert_return(path, -EINVAL); + assert_return((flags & ~OPEN_DIRECTORY_ALLOWED_FLAGS) == 0, -EINVAL); + + j = journal_new(flags, path, NULL); + if (!j) + return -ENOMEM; + + if (flags & SD_JOURNAL_OS_ROOT) + r = add_search_paths(j); + else + r = add_root_directory(j, path, false); + if (r < 0) + return r; + + *ret = TAKE_PTR(j); + return 0; +} + +_public_ int sd_journal_open_files(sd_journal **ret, const char **paths, int flags) { + _cleanup_(sd_journal_closep) sd_journal *j = NULL; + int r; + + assert_return(ret, -EINVAL); + assert_return(flags == 0, -EINVAL); + + j = journal_new(flags, NULL, NULL); + if (!j) + return -ENOMEM; + + STRV_FOREACH(path, paths) { + r = add_any_file(j, -1, *path); + if (r < 0) + return r; + } + + j->no_new_files = true; + + *ret = TAKE_PTR(j); + return 0; +} + +#define OPEN_DIRECTORY_FD_ALLOWED_FLAGS \ + (SD_JOURNAL_OS_ROOT | \ + SD_JOURNAL_SYSTEM | \ + SD_JOURNAL_CURRENT_USER | \ + SD_JOURNAL_TAKE_DIRECTORY_FD) + +_public_ int sd_journal_open_directory_fd(sd_journal **ret, int fd, int flags) { + _cleanup_(sd_journal_closep) sd_journal *j = NULL; + struct stat st; + bool take_fd; + int r; + + assert_return(ret, -EINVAL); + assert_return(fd >= 0, -EBADF); + assert_return((flags & ~OPEN_DIRECTORY_FD_ALLOWED_FLAGS) == 0, -EINVAL); + + if (fstat(fd, &st) < 0) + return -errno; + + if (!S_ISDIR(st.st_mode)) + return -EBADFD; + + take_fd = FLAGS_SET(flags, SD_JOURNAL_TAKE_DIRECTORY_FD); + j = journal_new(flags & ~SD_JOURNAL_TAKE_DIRECTORY_FD, NULL, NULL); + if (!j) + return -ENOMEM; + + j->toplevel_fd = fd; + + if (flags & SD_JOURNAL_OS_ROOT) + r = add_search_paths(j); + else + r = add_root_directory(j, NULL, false); + if (r < 0) + return r; + + SET_FLAG(j->flags, SD_JOURNAL_TAKE_DIRECTORY_FD, take_fd); + + *ret = TAKE_PTR(j); + return 0; +} + +_public_ int sd_journal_open_files_fd(sd_journal **ret, int fds[], unsigned n_fds, int flags) { + JournalFile *f; + _cleanup_(sd_journal_closep) sd_journal *j = NULL; + int r; + + assert_return(ret, -EINVAL); + assert_return(n_fds > 0, -EBADF); + assert_return(flags == 0, -EINVAL); + + j = journal_new(flags, NULL, NULL); + if (!j) + return -ENOMEM; + + for (unsigned i = 0; i < n_fds; i++) { + struct stat st; + + if (fds[i] < 0) { + r = -EBADF; + goto fail; + } + + if (fstat(fds[i], &st) < 0) { + r = -errno; + goto fail; + } + + r = stat_verify_regular(&st); + if (r < 0) + goto fail; + + r = add_any_file(j, fds[i], NULL); + if (r < 0) + goto fail; + } + + j->no_new_files = true; + j->no_inotify = true; + + *ret = TAKE_PTR(j); + return 0; + +fail: + /* If we fail, make sure we don't take possession of the files we managed to make use of successfully, and they + * remain open */ + ORDERED_HASHMAP_FOREACH(f, j->files) + f->close_fd = false; + + return r; +} + +_public_ void sd_journal_close(sd_journal *j) { + Directory *d; + Prioq *p; + + if (!j || journal_origin_changed(j)) + return; + + while ((p = hashmap_first(j->newest_by_boot_id))) + journal_file_unlink_newest_by_boot_id(j, prioq_peek(p)); + hashmap_free(j->newest_by_boot_id); + + sd_journal_flush_matches(j); + + ordered_hashmap_free_with_destructor(j->files, journal_file_close); + iterated_cache_free(j->files_cache); + + while ((d = hashmap_first(j->directories_by_path))) + remove_directory(j, d); + + while ((d = hashmap_first(j->directories_by_wd))) + remove_directory(j, d); + + hashmap_free(j->directories_by_path); + hashmap_free(j->directories_by_wd); + + if (FLAGS_SET(j->flags, SD_JOURNAL_TAKE_DIRECTORY_FD)) + safe_close(j->toplevel_fd); + + safe_close(j->inotify_fd); + + if (j->mmap) { + mmap_cache_stats_log_debug(j->mmap); + mmap_cache_unref(j->mmap); + } + + hashmap_free_free(j->errors); + + free(j->path); + free(j->prefix); + free(j->namespace); + free(j->unique_field); + free(j->fields_buffer); + free(j); +} + +static void journal_file_unlink_newest_by_boot_id(sd_journal *j, JournalFile *f) { + JournalFile *nf; + Prioq *p; + + assert(j); + assert(f); + + if (f->newest_boot_id_prioq_idx == PRIOQ_IDX_NULL) /* not linked currently, hence this is a NOP */ + return; + + assert_se(p = hashmap_get(j->newest_by_boot_id, &f->newest_boot_id)); + assert_se(prioq_remove(p, f, &f->newest_boot_id_prioq_idx) > 0); + + nf = prioq_peek(p); + if (nf) + /* There's still a member in the prioq? Then make sure the hashmap key now points to its + * .newest_boot_id field (and not ours!). Not we only replace the memory of the key here, the + * value of the key (and the data associated with it) remain the same. */ + assert_se(hashmap_replace(j->newest_by_boot_id, &nf->newest_boot_id, p) >= 0); + else { + assert_se(hashmap_remove(j->newest_by_boot_id, &f->newest_boot_id) == p); + prioq_free(p); + } + + f->newest_boot_id_prioq_idx = PRIOQ_IDX_NULL; +} + +static int journal_file_newest_monotonic_compare(const void *a, const void *b) { + const JournalFile *x = a, *y = b; + + return -CMP(x->newest_monotonic_usec, y->newest_monotonic_usec); /* Invert order, we want newest first! */ +} + +static int journal_file_reshuffle_newest_by_boot_id(sd_journal *j, JournalFile *f) { + Prioq *p; + int r; + + assert(j); + assert(f); + + p = hashmap_get(j->newest_by_boot_id, &f->newest_boot_id); + if (p) { + /* There's already a priority queue for this boot ID */ + + if (f->newest_boot_id_prioq_idx == PRIOQ_IDX_NULL) { + r = prioq_put(p, f, &f->newest_boot_id_prioq_idx); /* Insert if we aren't in there yet */ + if (r < 0) + return r; + } else + prioq_reshuffle(p, f, &f->newest_boot_id_prioq_idx); /* Reshuffle otherwise */ + + } else { + _cleanup_(prioq_freep) Prioq *q = NULL; + + /* No priority queue yet, then allocate one */ + + assert(f->newest_boot_id_prioq_idx == PRIOQ_IDX_NULL); /* we can't be a member either */ + + q = prioq_new(journal_file_newest_monotonic_compare); + if (!q) + return -ENOMEM; + + r = prioq_put(q, f, &f->newest_boot_id_prioq_idx); + if (r < 0) + return r; + + r = hashmap_ensure_put(&j->newest_by_boot_id, &id128_hash_ops, &f->newest_boot_id, q); + if (r < 0) { + f->newest_boot_id_prioq_idx = PRIOQ_IDX_NULL; + return r; + } + + TAKE_PTR(q); + } + + return 0; +} + +static int journal_file_read_tail_timestamp(sd_journal *j, JournalFile *f) { + uint64_t offset, mo, rt; + sd_id128_t id; + ObjectType type; + Object *o; + int r; + + assert(j); + assert(f); + assert(f->header); + + /* Tries to read the timestamp of the most recently written entry. */ + + r = journal_file_fstat(f); + if (r < 0) + return r; + if (f->newest_mtime == timespec_load(&f->last_stat.st_mtim)) + return 0; /* mtime didn't change since last time, don't bother */ + + if (JOURNAL_HEADER_CONTAINS(f->header, tail_entry_offset)) { + offset = le64toh(READ_NOW(f->header->tail_entry_offset)); + type = OBJECT_ENTRY; + } else { + offset = le64toh(READ_NOW(f->header->tail_object_offset)); + type = OBJECT_UNUSED; + } + if (offset == 0) + return -ENODATA; /* not a single object/entry, hence no tail timestamp */ + + /* Move to the last object in the journal file, in the hope it is an entry (which it usually will + * be). If we lack the "tail_entry_offset" field in the header, we specify the type as OBJECT_UNUSED + * here, since we cannot be sure what the last object will be, and want no noisy logging if it isn't + * an entry. We instead check after figuring out the pointer. */ + r = journal_file_move_to_object(f, type, offset, &o); + if (r < 0) { + log_debug_errno(r, "Failed to move to last object in journal file, ignoring: %m"); + o = NULL; + } + if (o && o->object.type == OBJECT_ENTRY) { + /* Yay, last object is an entry, let's use the data. */ + id = o->entry.boot_id; + mo = le64toh(o->entry.monotonic); + rt = le64toh(o->entry.realtime); + } else { + /* So the object is not an entry or we couldn't access it? In that case, let's read the most + * recent entry timestamps from the header. It's equally good. Unfortunately though, in old + * versions of the journal the boot ID in the header doesn't have to match the monotonic + * timestamp of the header. Let's check the header flag that indicates whether this strictly + * matches first hence, before using the data. */ + + if (JOURNAL_HEADER_TAIL_ENTRY_BOOT_ID(f->header) && f->header->state == STATE_ARCHIVED) { + mo = le64toh(f->header->tail_entry_monotonic); + rt = le64toh(f->header->tail_entry_realtime); + id = f->header->tail_entry_boot_id; + } else { + /* Otherwise let's find the last entry manually (this possibly means traversing the + * chain of entry arrays, till the end */ + r = journal_file_next_entry(f, 0, DIRECTION_UP, &o, NULL); + if (r < 0) + return r; + if (r == 0) + return -ENODATA; + + id = o->entry.boot_id; + mo = le64toh(o->entry.monotonic); + rt = le64toh(o->entry.realtime); + } + } + + if (mo > rt) /* monotonic clock is further ahead than realtime? that's weird, refuse to use the data */ + return -ENODATA; + + if (!sd_id128_equal(f->newest_boot_id, id)) + journal_file_unlink_newest_by_boot_id(j, f); + + f->newest_boot_id = id; + f->newest_monotonic_usec = mo; + f->newest_realtime_usec = rt; + f->newest_machine_id = f->header->machine_id; + f->newest_mtime = timespec_load(&f->last_stat.st_mtim); + + r = journal_file_reshuffle_newest_by_boot_id(j, f); + if (r < 0) + return r; + + return 0; +} + +_public_ int sd_journal_get_realtime_usec(sd_journal *j, uint64_t *ret) { + JournalFile *f; + Object *o; + int r; + + assert_return(j, -EINVAL); + assert_return(!journal_origin_changed(j), -ECHILD); + + f = j->current_file; + if (!f) + return -EADDRNOTAVAIL; + if (f->current_offset <= 0) + return -EADDRNOTAVAIL; + + r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o); + if (r < 0) + return r; + + uint64_t t = le64toh(o->entry.realtime); + if (!VALID_REALTIME(t)) + return -EBADMSG; + + if (ret) + *ret = t; + + return 0; +} + +_public_ int sd_journal_get_monotonic_usec(sd_journal *j, uint64_t *ret, sd_id128_t *ret_boot_id) { + JournalFile *f; + Object *o; + int r; + + assert_return(j, -EINVAL); + assert_return(!journal_origin_changed(j), -ECHILD); + + f = j->current_file; + if (!f) + return -EADDRNOTAVAIL; + if (f->current_offset <= 0) + return -EADDRNOTAVAIL; + + r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o); + if (r < 0) + return r; + + if (ret_boot_id) + *ret_boot_id = o->entry.boot_id; + else { + sd_id128_t id; + + r = sd_id128_get_boot(&id); + if (r < 0) + return r; + + if (!sd_id128_equal(id, o->entry.boot_id)) + return -ESTALE; + } + + uint64_t t = le64toh(o->entry.monotonic); + if (!VALID_MONOTONIC(t)) + return -EBADMSG; + + if (ret) + *ret = t; + + return 0; +} + +_public_ int sd_journal_get_seqnum( + sd_journal *j, + uint64_t *ret_seqnum, + sd_id128_t *ret_seqnum_id) { + + JournalFile *f; + Object *o; + int r; + + assert_return(j, -EINVAL); + assert_return(!journal_origin_changed(j), -ECHILD); + + f = j->current_file; + if (!f) + return -EADDRNOTAVAIL; + + if (f->current_offset <= 0) + return -EADDRNOTAVAIL; + + r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o); + if (r < 0) + return r; + + if (ret_seqnum_id) + *ret_seqnum_id = f->header->seqnum_id; + if (ret_seqnum) + *ret_seqnum = le64toh(o->entry.seqnum); + + return 0; +} + +static bool field_is_valid(const char *field) { + assert(field); + + if (isempty(field)) + return false; + + if (startswith(field, "__")) + return false; + + for (const char *p = field; *p; p++) { + + if (*p == '_') + continue; + + if (*p >= 'A' && *p <= 'Z') + continue; + + if (ascii_isdigit(*p)) + continue; + + return false; + } + + return true; +} + +_public_ int sd_journal_get_data(sd_journal *j, const char *field, const void **data, size_t *size) { + JournalFile *f; + size_t field_length; + Object *o; + int r; + + assert_return(j, -EINVAL); + assert_return(!journal_origin_changed(j), -ECHILD); + assert_return(field, -EINVAL); + assert_return(data, -EINVAL); + assert_return(size, -EINVAL); + assert_return(field_is_valid(field), -EINVAL); + + f = j->current_file; + if (!f) + return -EADDRNOTAVAIL; + + if (f->current_offset <= 0) + return -EADDRNOTAVAIL; + + r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o); + if (r < 0) + return r; + + field_length = strlen(field); + + uint64_t n = journal_file_entry_n_items(f, o); + for (uint64_t i = 0; i < n; i++) { + uint64_t p; + void *d; + size_t l; + + p = journal_file_entry_item_object_offset(f, o, i); + r = journal_file_data_payload(f, NULL, p, field, field_length, j->data_threshold, &d, &l); + if (r == 0) + continue; + if (IN_SET(r, -EADDRNOTAVAIL, -EBADMSG)) { + log_debug_errno(r, "Entry item %"PRIu64" data object is bad, skipping over it: %m", i); + continue; + } + if (r < 0) + return r; + + *data = d; + *size = l; + + return 0; + } + + return -ENOENT; +} + +_public_ int sd_journal_enumerate_data(sd_journal *j, const void **data, size_t *size) { + JournalFile *f; + Object *o; + int r; + + assert_return(j, -EINVAL); + assert_return(!journal_origin_changed(j), -ECHILD); + assert_return(data, -EINVAL); + assert_return(size, -EINVAL); + + f = j->current_file; + if (!f) + return -EADDRNOTAVAIL; + + if (f->current_offset <= 0) + return -EADDRNOTAVAIL; + + r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o); + if (r < 0) + return r; + + for (uint64_t n = journal_file_entry_n_items(f, o); j->current_field < n; j->current_field++) { + uint64_t p; + void *d; + size_t l; + + p = journal_file_entry_item_object_offset(f, o, j->current_field); + r = journal_file_data_payload(f, NULL, p, NULL, 0, j->data_threshold, &d, &l); + if (IN_SET(r, -EADDRNOTAVAIL, -EBADMSG)) { + log_debug_errno(r, "Entry item %"PRIu64" data object is bad, skipping over it: %m", j->current_field); + continue; + } + if (r < 0) + return r; + assert(r > 0); + + *data = d; + *size = l; + + j->current_field++; + + return 1; + } + + return 0; +} + +_public_ int sd_journal_enumerate_available_data(sd_journal *j, const void **data, size_t *size) { + for (;;) { + int r; + + r = sd_journal_enumerate_data(j, data, size); + if (r >= 0) + return r; + if (!JOURNAL_ERRNO_IS_UNAVAILABLE_FIELD(r)) + return r; + j->current_field++; /* Try with the next field */ + } +} + +_public_ void sd_journal_restart_data(sd_journal *j) { + if (!j || journal_origin_changed(j)) + return; + + j->current_field = 0; +} + +static int reiterate_all_paths(sd_journal *j) { + assert(j); + + if (j->no_new_files) + return add_current_paths(j); + + if (j->flags & SD_JOURNAL_OS_ROOT) + return add_search_paths(j); + + if (j->toplevel_fd >= 0) + return add_root_directory(j, NULL, false); + + if (j->path) + return add_root_directory(j, j->path, true); + + return add_search_paths(j); +} + +_public_ int sd_journal_get_fd(sd_journal *j) { + int r; + + assert_return(j, -EINVAL); + assert_return(!journal_origin_changed(j), -ECHILD); + + if (j->no_inotify) + return -EMEDIUMTYPE; + + if (j->inotify_fd >= 0) + return j->inotify_fd; + + r = allocate_inotify(j); + if (r < 0) + return r; + + log_debug("Reiterating files to get inotify watches established."); + + /* Iterate through all dirs again, to add them to the inotify */ + r = reiterate_all_paths(j); + if (r < 0) + return r; + + return j->inotify_fd; +} + +_public_ int sd_journal_get_events(sd_journal *j) { + int fd; + + assert_return(j, -EINVAL); + assert_return(!journal_origin_changed(j), -ECHILD); + + fd = sd_journal_get_fd(j); + if (fd < 0) + return fd; + + return POLLIN; +} + +_public_ int sd_journal_get_timeout(sd_journal *j, uint64_t *timeout_usec) { + int fd; + + assert_return(j, -EINVAL); + assert_return(!journal_origin_changed(j), -ECHILD); + assert_return(timeout_usec, -EINVAL); + + fd = sd_journal_get_fd(j); + if (fd < 0) + return fd; + + if (!j->on_network) { + *timeout_usec = UINT64_MAX; + return 0; + } + + /* If we are on the network we need to regularly check for + * changes manually */ + + *timeout_usec = j->last_process_usec + JOURNAL_FILES_RECHECK_USEC; + return 1; +} + +static void process_q_overflow(sd_journal *j) { + JournalFile *f; + Directory *m; + + assert(j); + + /* When the inotify queue overruns we need to enumerate and re-validate all journal files to bring our list + * back in sync with what's on disk. For this we pick a new generation counter value. It'll be assigned to all + * journal files we encounter. All journal files and all directories that don't carry it after reenumeration + * are subject for unloading. */ + + log_debug("Inotify queue overrun, reiterating everything."); + + j->generation++; + (void) reiterate_all_paths(j); + + ORDERED_HASHMAP_FOREACH(f, j->files) { + + if (f->last_seen_generation == j->generation) + continue; + + log_debug("File '%s' hasn't been seen in this enumeration, removing.", f->path); + remove_file_real(j, f); + } + + HASHMAP_FOREACH(m, j->directories_by_path) { + + if (m->last_seen_generation == j->generation) + continue; + + if (m->is_root) /* Never GC root directories */ + continue; + + log_debug("Directory '%s' hasn't been seen in this enumeration, removing.", f->path); + remove_directory(j, m); + } + + log_debug("Reiteration complete."); +} + +static void process_inotify_event(sd_journal *j, const struct inotify_event *e) { + Directory *d; + + assert(j); + assert(e); + + if (e->mask & IN_Q_OVERFLOW) { + process_q_overflow(j); + return; + } + + /* Is this a subdirectory we watch? */ + d = hashmap_get(j->directories_by_wd, INT_TO_PTR(e->wd)); + if (d) { + if (!(e->mask & IN_ISDIR) && e->len > 0 && + (endswith(e->name, ".journal") || + endswith(e->name, ".journal~"))) { + + /* Event for a journal file */ + + if (e->mask & (IN_CREATE|IN_MOVED_TO|IN_MODIFY|IN_ATTRIB)) + (void) add_file_by_name(j, d->path, e->name); + else if (e->mask & (IN_DELETE|IN_MOVED_FROM|IN_UNMOUNT)) + (void) remove_file_by_name(j, d->path, e->name); + + } else if (!d->is_root && e->len == 0) { + + /* Event for a subdirectory */ + + if (e->mask & (IN_DELETE_SELF|IN_MOVE_SELF|IN_UNMOUNT)) + remove_directory(j, d); + + } else if (d->is_root && (e->mask & IN_ISDIR) && e->len > 0 && id128_is_valid(e->name)) { + + /* Event for root directory */ + + if (e->mask & (IN_CREATE|IN_MOVED_TO|IN_MODIFY|IN_ATTRIB)) + (void) add_directory(j, d->path, e->name); + } + + return; + } + + if (e->mask & IN_IGNORED) + return; + + log_debug("Unexpected inotify event."); +} + +static int determine_change(sd_journal *j) { + bool b; + + assert(j); + + b = j->current_invalidate_counter != j->last_invalidate_counter; + j->last_invalidate_counter = j->current_invalidate_counter; + + return b ? SD_JOURNAL_INVALIDATE : SD_JOURNAL_APPEND; +} + +_public_ int sd_journal_process(sd_journal *j) { + bool got_something = false; + + assert_return(j, -EINVAL); + assert_return(!journal_origin_changed(j), -ECHILD); + + if (j->inotify_fd < 0) /* We have no inotify fd yet? Then there's noting to process. */ + return 0; + + j->last_process_usec = now(CLOCK_MONOTONIC); + j->last_invalidate_counter = j->current_invalidate_counter; + + for (;;) { + union inotify_event_buffer buffer; + ssize_t l; + + l = read(j->inotify_fd, &buffer, sizeof(buffer)); + if (l < 0) { + if (ERRNO_IS_TRANSIENT(errno)) + return got_something ? determine_change(j) : SD_JOURNAL_NOP; + + return -errno; + } + + got_something = true; + + FOREACH_INOTIFY_EVENT(e, buffer, l) + process_inotify_event(j, e); + } +} + +_public_ int sd_journal_wait(sd_journal *j, uint64_t timeout_usec) { + int r; + uint64_t t; + + assert_return(j, -EINVAL); + assert_return(!journal_origin_changed(j), -ECHILD); + + if (j->inotify_fd < 0) { + JournalFile *f; + + /* This is the first invocation, hence create the inotify watch */ + r = sd_journal_get_fd(j); + if (r < 0) + return r; + + /* Server might have done some vacuuming while we weren't watching. Get rid of the deleted + * files now so they don't stay around indefinitely. */ + ORDERED_HASHMAP_FOREACH(f, j->files) { + r = journal_file_fstat(f); + if (r == -EIDRM) + remove_file_real(j, f); + else if (r < 0) + log_debug_errno(r, "Failed to fstat() journal file '%s', ignoring: %m", f->path); + } + + /* The journal might have changed since the context object was created and we weren't + * watching before, hence don't wait for anything, and return immediately. */ + return determine_change(j); + } + + r = sd_journal_get_timeout(j, &t); + if (r < 0) + return r; + + if (t != UINT64_MAX) { + t = usec_sub_unsigned(t, now(CLOCK_MONOTONIC)); + + if (timeout_usec == UINT64_MAX || timeout_usec > t) + timeout_usec = t; + } + + do { + r = fd_wait_for_event(j->inotify_fd, POLLIN, timeout_usec); + } while (r == -EINTR); + + if (r < 0) + return r; + + return sd_journal_process(j); +} + +_public_ int sd_journal_get_cutoff_realtime_usec(sd_journal *j, uint64_t *from, uint64_t *to) { + JournalFile *f; + bool first = true; + uint64_t fmin = 0, tmax = 0; + int r; + + assert_return(j, -EINVAL); + assert_return(!journal_origin_changed(j), -ECHILD); + assert_return(from || to, -EINVAL); + assert_return(from != to, -EINVAL); + + ORDERED_HASHMAP_FOREACH(f, j->files) { + usec_t fr, t; + + r = journal_file_get_cutoff_realtime_usec(f, &fr, &t); + if (r == -ENOENT) + continue; + if (r < 0) + return r; + if (r == 0) + continue; + + if (first) { + fmin = fr; + tmax = t; + first = false; + } else { + fmin = MIN(fr, fmin); + tmax = MAX(t, tmax); + } + } + + if (from) + *from = fmin; + if (to) + *to = tmax; + + return first ? 0 : 1; +} + +_public_ int sd_journal_get_cutoff_monotonic_usec( + sd_journal *j, + sd_id128_t boot_id, + uint64_t *ret_from, + uint64_t *ret_to) { + + uint64_t from = UINT64_MAX, to = UINT64_MAX; + bool found = false; + JournalFile *f; + int r; + + assert_return(j, -EINVAL); + assert_return(!journal_origin_changed(j), -ECHILD); + assert_return(ret_from != ret_to, -EINVAL); + + ORDERED_HASHMAP_FOREACH(f, j->files) { + usec_t ff, tt; + + r = journal_file_get_cutoff_monotonic_usec(f, boot_id, &ff, &tt); + if (r == -ENOENT) + continue; + if (r < 0) + return r; + if (r == 0) + continue; + + if (found) { + from = MIN(ff, from); + to = MAX(tt, to); + } else { + from = ff; + to = tt; + found = true; + } + } + + if (ret_from) + *ret_from = from; + if (ret_to) + *ret_to = to; + + return found; +} + +void journal_print_header(sd_journal *j) { + JournalFile *f; + bool newline = false; + + assert(j); + + ORDERED_HASHMAP_FOREACH(f, j->files) { + if (newline) + putchar('\n'); + else + newline = true; + + journal_file_print_header(f); + } +} + +_public_ int sd_journal_get_usage(sd_journal *j, uint64_t *ret) { + JournalFile *f; + uint64_t sum = 0; + + assert_return(j, -EINVAL); + assert_return(!journal_origin_changed(j), -ECHILD); + assert_return(ret, -EINVAL); + + ORDERED_HASHMAP_FOREACH(f, j->files) { + struct stat st; + uint64_t b; + + if (fstat(f->fd, &st) < 0) + return -errno; + + b = (uint64_t) st.st_blocks; + if (b > UINT64_MAX / 512) + return -EOVERFLOW; + b *= 512; + + if (sum > UINT64_MAX - b) + return -EOVERFLOW; + sum += b; + } + + *ret = sum; + return 0; +} + +_public_ int sd_journal_query_unique(sd_journal *j, const char *field) { + int r; + + assert_return(j, -EINVAL); + assert_return(!journal_origin_changed(j), -ECHILD); + + if (!field_is_valid(field)) + return -EINVAL; + + r = free_and_strdup(&j->unique_field, field); + if (r < 0) + return r; + + j->unique_file = NULL; + j->unique_offset = 0; + j->unique_file_lost = false; + + return 0; +} + +_public_ int sd_journal_enumerate_unique( + sd_journal *j, + const void **ret_data, + size_t *ret_size) { + + size_t k; + + assert_return(j, -EINVAL); + assert_return(!journal_origin_changed(j), -ECHILD); + assert_return(j->unique_field, -EINVAL); + + k = strlen(j->unique_field); + + if (!j->unique_file) { + if (j->unique_file_lost) + return 0; + + j->unique_file = ordered_hashmap_first(j->files); + if (!j->unique_file) + return 0; + + j->unique_offset = 0; + } + + for (;;) { + JournalFile *of; + Object *o; + void *odata; + size_t ol; + bool found; + int r; + + /* Proceed to next data object in the field's linked list */ + if (j->unique_offset == 0) { + r = journal_file_find_field_object(j->unique_file, j->unique_field, k, &o, NULL); + if (r < 0) + return r; + + j->unique_offset = r > 0 ? le64toh(o->field.head_data_offset) : 0; + } else { + r = journal_file_move_to_object(j->unique_file, OBJECT_DATA, j->unique_offset, &o); + if (r < 0) + return r; + + j->unique_offset = le64toh(o->data.next_field_offset); + } + + /* We reached the end of the list? Then start again, with the next file */ + if (j->unique_offset == 0) { + j->unique_file = ordered_hashmap_next(j->files, j->unique_file->path); + if (!j->unique_file) + return 0; + + continue; + } + + r = journal_file_move_to_object(j->unique_file, OBJECT_DATA, j->unique_offset, &o); + if (r < 0) + return r; + + /* Let's pin the data object, so we can look at it at the same time as one on another file. */ + r = journal_file_pin_object(j->unique_file, o); + if (r < 0) + return r; + + r = journal_file_data_payload(j->unique_file, o, j->unique_offset, NULL, 0, + j->data_threshold, &odata, &ol); + if (r < 0) + return r; + + /* Check if we have at least the field name and "=". */ + if (ol <= k) + return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG), + "%s:offset " OFSfmt ": object has size %zu, expected at least %zu", + j->unique_file->path, + j->unique_offset, ol, k + 1); + + if (memcmp(odata, j->unique_field, k) != 0 || ((const char*) odata)[k] != '=') + return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG), + "%s:offset " OFSfmt ": object does not start with \"%s=\"", + j->unique_file->path, + j->unique_offset, + j->unique_field); + + /* OK, now let's see if we already returned this data object by checking if it exists in the + * earlier traversed files. */ + found = false; + ORDERED_HASHMAP_FOREACH(of, j->files) { + if (of == j->unique_file) + break; + + /* Skip this file it didn't have any fields indexed */ + if (JOURNAL_HEADER_CONTAINS(of->header, n_fields) && le64toh(of->header->n_fields) <= 0) + continue; + + /* We can reuse the hash from our current file only on old-style journal files + * without keyed hashes. On new-style files we have to calculate the hash anew, to + * take the per-file hash seed into consideration. */ + if (!JOURNAL_HEADER_KEYED_HASH(j->unique_file->header) && !JOURNAL_HEADER_KEYED_HASH(of->header)) + r = journal_file_find_data_object_with_hash(of, odata, ol, le64toh(o->data.hash), NULL, NULL); + else + r = journal_file_find_data_object(of, odata, ol, NULL, NULL); + if (r < 0) + return r; + if (r > 0) { + found = true; + break; + } + } + + if (found) + continue; + + *ret_data = odata; + *ret_size = ol; + + return 1; + } +} + +_public_ int sd_journal_enumerate_available_unique(sd_journal *j, const void **data, size_t *size) { + for (;;) { + int r; + + r = sd_journal_enumerate_unique(j, data, size); + if (r >= 0) + return r; + if (!JOURNAL_ERRNO_IS_UNAVAILABLE_FIELD(r)) + return r; + /* Try with the next field. sd_journal_enumerate_unique() modifies state, so on the next try + * we will access the next field. */ + } +} + +_public_ void sd_journal_restart_unique(sd_journal *j) { + if (!j || journal_origin_changed(j)) + return; + + j->unique_file = NULL; + j->unique_offset = 0; + j->unique_file_lost = false; +} + +_public_ int sd_journal_enumerate_fields(sd_journal *j, const char **field) { + int r; + + assert_return(j, -EINVAL); + assert_return(!journal_origin_changed(j), -ECHILD); + assert_return(field, -EINVAL); + + if (!j->fields_file) { + if (j->fields_file_lost) + return 0; + + j->fields_file = ordered_hashmap_first(j->files); + if (!j->fields_file) + return 0; + + j->fields_hash_table_index = 0; + j->fields_offset = 0; + } + + for (;;) { + JournalFile *f, *of; + uint64_t m; + Object *o; + size_t sz; + bool found; + + f = j->fields_file; + + if (j->fields_offset == 0) { + bool eof = false; + + /* We are not yet positioned at any field. Let's pick the first one */ + r = journal_file_map_field_hash_table(f); + if (r < 0) + return r; + + m = le64toh(f->header->field_hash_table_size) / sizeof(HashItem); + for (;;) { + if (j->fields_hash_table_index >= m) { + /* Reached the end of the hash table, go to the next file. */ + eof = true; + break; + } + + j->fields_offset = le64toh(f->field_hash_table[j->fields_hash_table_index].head_hash_offset); + + if (j->fields_offset != 0) + break; + + /* Empty hash table bucket, go to next one */ + j->fields_hash_table_index++; + } + + if (eof) { + /* Proceed with next file */ + j->fields_file = ordered_hashmap_next(j->files, f->path); + if (!j->fields_file) { + *field = NULL; + return 0; + } + + j->fields_offset = 0; + j->fields_hash_table_index = 0; + continue; + } + + } else { + /* We are already positioned at a field. If so, let's figure out the next field from it */ + + r = journal_file_move_to_object(f, OBJECT_FIELD, j->fields_offset, &o); + if (r < 0) + return r; + + j->fields_offset = le64toh(o->field.next_hash_offset); + if (j->fields_offset == 0) { + /* Reached the end of the hash table chain */ + j->fields_hash_table_index++; + continue; + } + } + + /* We use OBJECT_UNUSED here, so that the iterator below doesn't remove our mmap window */ + r = journal_file_move_to_object(f, OBJECT_UNUSED, j->fields_offset, &o); + if (r < 0) + return r; + + /* Because we used OBJECT_UNUSED above, we need to do our type check manually */ + if (o->object.type != OBJECT_FIELD) + return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG), + "%s:offset " OFSfmt ": object has type %i, expected %i", + f->path, j->fields_offset, + o->object.type, OBJECT_FIELD); + + sz = le64toh(o->object.size) - offsetof(Object, field.payload); + + /* Let's see if we already returned this field name before. */ + found = false; + ORDERED_HASHMAP_FOREACH(of, j->files) { + if (of == f) + break; + + /* Skip this file it didn't have any fields indexed */ + if (JOURNAL_HEADER_CONTAINS(of->header, n_fields) && le64toh(of->header->n_fields) <= 0) + continue; + + if (!JOURNAL_HEADER_KEYED_HASH(f->header) && !JOURNAL_HEADER_KEYED_HASH(of->header)) + r = journal_file_find_field_object_with_hash(of, o->field.payload, sz, + le64toh(o->field.hash), NULL, NULL); + else + r = journal_file_find_field_object(of, o->field.payload, sz, NULL, NULL); + if (r < 0) + return r; + if (r > 0) { + found = true; + break; + } + } + + if (found) + continue; + + /* Check if this is really a valid string containing no NUL byte */ + if (memchr(o->field.payload, 0, sz)) + return -EBADMSG; + + if (j->data_threshold > 0 && sz > j->data_threshold) + sz = j->data_threshold; + + if (!GREEDY_REALLOC(j->fields_buffer, sz + 1)) + return -ENOMEM; + + memcpy(j->fields_buffer, o->field.payload, sz); + j->fields_buffer[sz] = 0; + + if (!field_is_valid(j->fields_buffer)) + return -EBADMSG; + + *field = j->fields_buffer; + return 1; + } +} + +_public_ void sd_journal_restart_fields(sd_journal *j) { + if (!j || journal_origin_changed(j)) + return; + + j->fields_file = NULL; + j->fields_hash_table_index = 0; + j->fields_offset = 0; + j->fields_file_lost = false; +} + +_public_ int sd_journal_reliable_fd(sd_journal *j) { + assert_return(j, -EINVAL); + assert_return(!journal_origin_changed(j), -ECHILD); + + return !j->on_network; +} + +static char *lookup_field(const char *field, void *userdata) { + sd_journal *j = ASSERT_PTR(userdata); + const void *data; + size_t size, d; + int r; + + assert(field); + + r = sd_journal_get_data(j, field, &data, &size); + if (r < 0 || + size > REPLACE_VAR_MAX) + return strdup(field); + + d = strlen(field) + 1; + + return strndup((const char*) data + d, size - d); +} + +_public_ int sd_journal_get_catalog(sd_journal *j, char **ret) { + const void *data; + size_t size; + sd_id128_t id; + _cleanup_free_ char *text = NULL, *cid = NULL; + char *t; + int r; + + assert_return(j, -EINVAL); + assert_return(!journal_origin_changed(j), -ECHILD); + assert_return(ret, -EINVAL); + + r = sd_journal_get_data(j, "MESSAGE_ID", &data, &size); + if (r < 0) + return r; + + cid = strndup((const char*) data + 11, size - 11); + if (!cid) + return -ENOMEM; + + r = sd_id128_from_string(cid, &id); + if (r < 0) + return r; + + r = catalog_get(secure_getenv("SYSTEMD_CATALOG") ?: CATALOG_DATABASE, id, &text); + if (r < 0) + return r; + + t = replace_var(text, lookup_field, j); + if (!t) + return -ENOMEM; + + *ret = t; + return 0; +} + +_public_ int sd_journal_get_catalog_for_message_id(sd_id128_t id, char **ret) { + assert_return(ret, -EINVAL); + + return catalog_get(CATALOG_DATABASE, id, ret); +} + +_public_ int sd_journal_set_data_threshold(sd_journal *j, size_t sz) { + assert_return(j, -EINVAL); + assert_return(!journal_origin_changed(j), -ECHILD); + + j->data_threshold = sz; + return 0; +} + +_public_ int sd_journal_get_data_threshold(sd_journal *j, size_t *sz) { + assert_return(j, -EINVAL); + assert_return(!journal_origin_changed(j), -ECHILD); + assert_return(sz, -EINVAL); + + *sz = j->data_threshold; + return 0; +} + +_public_ int sd_journal_has_runtime_files(sd_journal *j) { + assert_return(j, -EINVAL); + + return j->has_runtime_files; +} + +_public_ int sd_journal_has_persistent_files(sd_journal *j) { + assert_return(j, -EINVAL); + + return j->has_persistent_files; +} diff --git a/src/libsystemd/sd-journal/test-audit-type.c b/src/libsystemd/sd-journal/test-audit-type.c new file mode 100644 index 0000000..1d5003b --- /dev/null +++ b/src/libsystemd/sd-journal/test-audit-type.c @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <stdio.h> +#include <linux/audit.h> + +#include "audit-type.h" +#include "tests.h" + +static void print_audit_label(int i) { + const char *name; + + name = audit_type_name_alloca(i); + /* This is a separate function only because of alloca */ + printf("%i → %s → %s\n", i, audit_type_to_string(i), name); +} + +TEST(audit_type) { + int i; + + for (i = 0; i <= AUDIT_KERNEL; i++) + print_audit_label(i); +} + +DEFINE_TEST_MAIN(LOG_INFO); diff --git a/src/libsystemd/sd-journal/test-catalog.c b/src/libsystemd/sd-journal/test-catalog.c new file mode 100644 index 0000000..603952e --- /dev/null +++ b/src/libsystemd/sd-journal/test-catalog.c @@ -0,0 +1,235 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <errno.h> +#include <fcntl.h> +#include <locale.h> +#include <unistd.h> + +#include "sd-messages.h" + +#include "alloc-util.h" +#include "catalog.h" +#include "fd-util.h" +#include "fs-util.h" +#include "log.h" +#include "macro.h" +#include "path-util.h" +#include "string-util.h" +#include "strv.h" +#include "tests.h" +#include "tmpfile-util.h" + +static char** catalog_dirs = NULL; +static const char *no_catalog_dirs[] = { + "/bin/hopefully/with/no/catalog", + NULL +}; + +static OrderedHashmap* test_import(const char* contents, ssize_t size, int code) { + _cleanup_(unlink_tempfilep) char name[] = "/tmp/test-catalog.XXXXXX"; + _cleanup_close_ int fd = -EBADF; + OrderedHashmap *h; + + if (size < 0) + size = strlen(contents); + + assert_se(h = ordered_hashmap_new(&catalog_hash_ops)); + + fd = mkostemp_safe(name); + assert_se(fd >= 0); + assert_se(write(fd, contents, size) == size); + + assert_se(catalog_import_file(h, name) == code); + + return h; +} + +static void test_catalog_import_invalid(void) { + _cleanup_ordered_hashmap_free_free_free_ OrderedHashmap *h = NULL; + + h = test_import("xxx", -1, -EINVAL); + assert_se(ordered_hashmap_isempty(h)); +} + +static void test_catalog_import_badid(void) { + _unused_ _cleanup_ordered_hashmap_free_free_free_ OrderedHashmap *h = NULL; + const char *input = +"-- 0027229ca0644181a76c4e92458afaff dededededededededededededededede\n" \ +"Subject: message\n" \ +"\n" \ +"payload\n"; + h = test_import(input, -1, -EINVAL); +} + +static void test_catalog_import_one(void) { + _cleanup_ordered_hashmap_free_free_free_ OrderedHashmap *h = NULL; + char *payload; + + const char *input = +"-- 0027229ca0644181a76c4e92458afaff dededededededededededededededed\n" \ +"Subject: message\n" \ +"\n" \ +"payload\n"; + const char *expect = +"Subject: message\n" \ +"\n" \ +"payload\n"; + + h = test_import(input, -1, 0); + assert_se(ordered_hashmap_size(h) == 1); + + ORDERED_HASHMAP_FOREACH(payload, h) { + printf("expect: %s\n", expect); + printf("actual: %s\n", payload); + assert_se(streq(expect, payload)); + } +} + +static void test_catalog_import_merge(void) { + _cleanup_ordered_hashmap_free_free_free_ OrderedHashmap *h = NULL; + char *payload; + + const char *input = +"-- 0027229ca0644181a76c4e92458afaff dededededededededededededededed\n" \ +"Subject: message\n" \ +"Defined-By: me\n" \ +"\n" \ +"payload\n" \ +"\n" \ +"-- 0027229ca0644181a76c4e92458afaff dededededededededededededededed\n" \ +"Subject: override subject\n" \ +"X-Header: hello\n" \ +"\n" \ +"override payload\n"; + + const char *combined = +"Subject: override subject\n" \ +"X-Header: hello\n" \ +"Subject: message\n" \ +"Defined-By: me\n" \ +"\n" \ +"override payload\n"; + + h = test_import(input, -1, 0); + assert_se(ordered_hashmap_size(h) == 1); + + ORDERED_HASHMAP_FOREACH(payload, h) + assert_se(streq(combined, payload)); +} + +static void test_catalog_import_merge_no_body(void) { + _cleanup_ordered_hashmap_free_free_free_ OrderedHashmap *h = NULL; + char *payload; + + const char *input = +"-- 0027229ca0644181a76c4e92458afaff dededededededededededededededed\n" \ +"Subject: message\n" \ +"Defined-By: me\n" \ +"\n" \ +"payload\n" \ +"\n" \ +"-- 0027229ca0644181a76c4e92458afaff dededededededededededededededed\n" \ +"Subject: override subject\n" \ +"X-Header: hello\n" \ +"\n"; + + const char *combined = +"Subject: override subject\n" \ +"X-Header: hello\n" \ +"Subject: message\n" \ +"Defined-By: me\n" \ +"\n" \ +"payload\n"; + + h = test_import(input, -1, 0); + assert_se(ordered_hashmap_size(h) == 1); + + ORDERED_HASHMAP_FOREACH(payload, h) + assert_se(streq(combined, payload)); +} + +static void test_catalog_update(const char *database) { + int r; + + /* Test what happens if there are no files. */ + r = catalog_update(database, NULL, NULL); + assert_se(r == 0); + + /* Test what happens if there are no files in the directory. */ + r = catalog_update(database, NULL, no_catalog_dirs); + assert_se(r == 0); + + /* Make sure that we at least have some files loaded or the + * catalog_list below will fail. */ + r = catalog_update(database, NULL, (const char * const *) catalog_dirs); + assert_se(r == 0); +} + +static void test_catalog_file_lang(void) { + _cleanup_free_ char *lang = NULL, *lang2 = NULL, *lang3 = NULL, *lang4 = NULL; + + assert_se(catalog_file_lang("systemd.de_DE.catalog", &lang) == 1); + assert_se(streq(lang, "de_DE")); + + assert_se(catalog_file_lang("systemd..catalog", &lang2) == 0); + assert_se(lang2 == NULL); + + assert_se(catalog_file_lang("systemd.fr.catalog", &lang2) == 1); + assert_se(streq(lang2, "fr")); + + assert_se(catalog_file_lang("systemd.fr.catalog.gz", &lang3) == 0); + assert_se(lang3 == NULL); + + assert_se(catalog_file_lang("systemd.01234567890123456789012345678901.catalog", &lang3) == 0); + assert_se(lang3 == NULL); + + assert_se(catalog_file_lang("systemd.0123456789012345678901234567890.catalog", &lang3) == 1); + assert_se(streq(lang3, "0123456789012345678901234567890")); + + assert_se(catalog_file_lang("/x/y/systemd.catalog", &lang4) == 0); + assert_se(lang4 == NULL); + + assert_se(catalog_file_lang("/x/y/systemd.ru_RU.catalog", &lang4) == 1); + assert_se(streq(lang4, "ru_RU")); +} + +int main(int argc, char *argv[]) { + _cleanup_(unlink_tempfilep) char database[] = "/tmp/test-catalog.XXXXXX"; + _cleanup_close_ int fd = -EBADF; + _cleanup_free_ char *text = NULL; + int r; + + setlocale(LC_ALL, "de_DE.UTF-8"); + + test_setup_logging(LOG_DEBUG); + + /* If test-catalog is located at the build directory, then use catalogs in that. + * If it is not, e.g. installed by systemd-tests package, then use installed catalogs. */ + catalog_dirs = STRV_MAKE(get_catalog_dir()); + + assert_se(access(catalog_dirs[0], F_OK) >= 0); + log_notice("Using catalog directory '%s'", catalog_dirs[0]); + + test_catalog_file_lang(); + + test_catalog_import_invalid(); + test_catalog_import_badid(); + test_catalog_import_one(); + test_catalog_import_merge(); + test_catalog_import_merge_no_body(); + + assert_se((fd = mkostemp_safe(database)) >= 0); + + test_catalog_update(database); + + r = catalog_list(stdout, database, true); + assert_se(r >= 0); + + r = catalog_list(stdout, database, false); + assert_se(r >= 0); + + assert_se(catalog_get(database, SD_MESSAGE_COREDUMP, &text) >= 0); + printf(">>>%s<<<\n", text); + + return 0; +} diff --git a/src/libsystemd/sd-journal/test-journal-append.c b/src/libsystemd/sd-journal/test-journal-append.c new file mode 100644 index 0000000..24b98c8 --- /dev/null +++ b/src/libsystemd/sd-journal/test-journal-append.c @@ -0,0 +1,269 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <fcntl.h> +#include <getopt.h> +#include <stdio.h> +#include <unistd.h> + +#include "chattr-util.h" +#include "fd-util.h" +#include "fs-util.h" +#include "iovec-util.h" +#include "journal-file-util.h" +#include "log.h" +#include "mmap-cache.h" +#include "parse-util.h" +#include "random-util.h" +#include "rm-rf.h" +#include "strv.h" +#include "terminal-util.h" +#include "tests.h" +#include "tmpfile-util.h" + +static int journal_append_message(JournalFile *mj, const char *message) { + struct iovec iovec; + struct dual_timestamp ts; + + assert(mj); + assert(message); + + dual_timestamp_now(&ts); + iovec = IOVEC_MAKE_STRING(message); + return journal_file_append_entry( + mj, + &ts, + /* boot_id= */ NULL, + &iovec, + /* n_iovec= */ 1, + /* seqnum= */ NULL, + /* seqnum_id= */ NULL, + /* ret_object= */ NULL, + /* ret_offset= */ NULL); +} + +static int journal_corrupt_and_append(uint64_t start_offset, uint64_t step) { + _cleanup_(mmap_cache_unrefp) MMapCache *mmap_cache = NULL; + _cleanup_(rm_rf_physical_and_freep) char *tempdir = NULL; + _cleanup_(journal_file_offline_closep) JournalFile *mj = NULL; + uint64_t start, end; + int r; + + mmap_cache = mmap_cache_new(); + assert_se(mmap_cache); + + /* journal_file_open() requires a valid machine id */ + if (sd_id128_get_machine(NULL) < 0) + return log_tests_skipped("No valid machine ID found"); + + assert_se(mkdtemp_malloc("/tmp/journal-append-XXXXXX", &tempdir) >= 0); + assert_se(chdir(tempdir) >= 0); + (void) chattr_path(tempdir, FS_NOCOW_FL, FS_NOCOW_FL, NULL); + + log_debug("Opening journal %s/system.journal", tempdir); + + r = journal_file_open( + /* fd= */ -1, + "system.journal", + O_RDWR|O_CREAT, + JOURNAL_COMPRESS, + 0644, + /* compress_threshold_bytes= */ UINT64_MAX, + /* metrics= */ NULL, + mmap_cache, + /* template= */ NULL, + &mj); + if (r < 0) + return log_error_errno(r, "Failed to open the journal: %m"); + + assert_se(mj); + + /* Add a couple of initial messages */ + for (int i = 0; i < 10; i++) { + _cleanup_free_ char *message = NULL; + + assert_se(asprintf(&message, "MESSAGE=Initial message %d", i) >= 0); + r = journal_append_message(mj, message); + if (r < 0) + return log_error_errno(r, "Failed to write to the journal: %m"); + } + + start = start_offset == UINT64_MAX ? random_u64() % mj->last_stat.st_size : start_offset; + end = (uint64_t) mj->last_stat.st_size; + + /* Print the initial offset at which we start flipping bits, which can be + * later used to reproduce a potential fail */ + log_info("Start offset: %" PRIu64 ", corrupt-step: %" PRIu64, start, step); + fflush(stdout); + + if (start >= end) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), + "Start offset >= journal size, can't continue"); + + for (uint64_t offset = start; offset < end; offset += step) { + _cleanup_free_ char *message = NULL; + uint8_t b; + + /* Flip a bit in the journal file */ + r = pread(mj->fd, &b, 1, offset); + assert_se(r == 1); + b |= 0x1; + r = pwrite(mj->fd, &b, 1, offset); + assert_se(r == 1); + + /* Close and reopen the journal to flush all caches and remap + * the corrupted journal */ + mj = journal_file_offline_close(mj); + r = journal_file_open( + /* fd= */ -1, + "system.journal", + O_RDWR|O_CREAT, + JOURNAL_COMPRESS, + 0644, + /* compress_threshold_bytes= */ UINT64_MAX, + /* metrics= */ NULL, + mmap_cache, + /* template= */ NULL, + &mj); + if (r < 0) { + /* The corrupted journal might get rejected during reopening + * if it's corrupted enough (especially its header), so + * treat this as a success if it doesn't crash */ + log_info_errno(r, "Failed to reopen the journal: %m"); + break; + } + + /* Try to write something to the (possibly corrupted) journal */ + assert_se(asprintf(&message, "MESSAGE=Hello world %" PRIu64, offset) >= 0); + r = journal_append_message(mj, message); + if (r < 0) { + /* We care only about crashes or sanitizer errors, + * failed write without any crash is a success */ + log_info_errno(r, "Failed to write to the journal: %m"); + break; + } + } + + return 0; +} + +int main(int argc, char *argv[]) { + uint64_t start_offset = UINT64_MAX; + uint64_t iterations = 100; + uint64_t iteration_step = 1; + uint64_t corrupt_step = 31; + bool sequential = false, run_one = false; + int c, r; + + test_setup_logging(LOG_DEBUG); + + enum { + ARG_START_OFFSET = 0x1000, + ARG_ITERATIONS, + ARG_ITERATION_STEP, + ARG_CORRUPT_STEP, + ARG_SEQUENTIAL, + ARG_RUN_ONE, + }; + + static const struct option options[] = { + { "help", no_argument, NULL, 'h' }, + { "start-offset", required_argument, NULL, ARG_START_OFFSET }, + { "iterations", required_argument, NULL, ARG_ITERATIONS }, + { "iteration-step", required_argument, NULL, ARG_ITERATION_STEP }, + { "corrupt-step", required_argument, NULL, ARG_CORRUPT_STEP }, + { "sequential", no_argument, NULL, ARG_SEQUENTIAL }, + { "run-one", required_argument, NULL, ARG_RUN_ONE }, + {} + }; + + assert_se(argc >= 0); + assert_se(argv); + + while ((c = getopt_long(argc, argv, "h", options, NULL)) >= 0) + switch (c) { + + case 'h': + printf("Syntax:\n" + " %s [OPTION...]\n" + "Options:\n" + " --start-offset=OFFSET Offset at which to start corrupting the journal\n" + " (default: random offset is picked, unless\n" + " --sequential is used - in that case we use 0 + iteration)\n" + " --iterations=ITER Number of iterations to perform before exiting\n" + " (default: 100)\n" + " --iteration-step=STEP Iteration step (default: 1)\n" + " --corrupt-step=STEP Corrupt every n-th byte starting from OFFSET (default: 31)\n" + " --sequential Go through offsets sequentially instead of picking\n" + " a random one on each iteration. If set, we go through\n" + " offsets <0; ITER), or <OFFSET, ITER) if --start-offset=\n" + " is set (default: false)\n" + " --run-one=OFFSET Single shot mode for reproducing issues. Takes the same\n" + " offset as --start-offset= and does only one iteration\n" + , program_invocation_short_name); + return 0; + + case ARG_START_OFFSET: + r = safe_atou64(optarg, &start_offset); + if (r < 0) + return log_error_errno(r, "Invalid starting offset: %m"); + break; + + case ARG_ITERATIONS: + r = safe_atou64(optarg, &iterations); + if (r < 0) + return log_error_errno(r, "Invalid value for iterations: %m"); + break; + + case ARG_CORRUPT_STEP: + r = safe_atou64(optarg, &corrupt_step); + if (r < 0) + return log_error_errno(r, "Invalid value for corrupt-step: %m"); + break; + + case ARG_ITERATION_STEP: + r = safe_atou64(optarg, &iteration_step); + if (r < 0) + return log_error_errno(r, "Invalid value for iteration-step: %m"); + break; + + case ARG_SEQUENTIAL: + sequential = true; + break; + + case ARG_RUN_ONE: + r = safe_atou64(optarg, &start_offset); + if (r < 0) + return log_error_errno(r, "Invalid offset: %m"); + + run_one = true; + break; + + case '?': + return -EINVAL; + + default: + assert_not_reached(); + } + + if (run_one) + /* Reproducer mode */ + return journal_corrupt_and_append(start_offset, corrupt_step); + + for (uint64_t i = 0; i < iterations; i++) { + uint64_t offset = UINT64_MAX; + + log_info("Iteration #%" PRIu64 ", step: %" PRIu64, i, iteration_step); + + if (sequential) + offset = (start_offset == UINT64_MAX ? 0 : start_offset) + i * iteration_step; + + r = journal_corrupt_and_append(offset, corrupt_step); + if (r < 0) + return EXIT_FAILURE; + if (r > 0) + /* Reached the end of the journal file */ + break; + } + + return EXIT_SUCCESS; +} diff --git a/src/libsystemd/sd-journal/test-journal-enum.c b/src/libsystemd/sd-journal/test-journal-enum.c new file mode 100644 index 0000000..03fe8e2 --- /dev/null +++ b/src/libsystemd/sd-journal/test-journal-enum.c @@ -0,0 +1,37 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <stdio.h> + +#include "sd-journal.h" + +#include "journal-internal.h" +#include "log.h" +#include "macro.h" +#include "tests.h" + +int main(int argc, char *argv[]) { + unsigned n = 0; + _cleanup_(sd_journal_closep) sd_journal *j = NULL; + + test_setup_logging(LOG_DEBUG); + + assert_se(sd_journal_open(&j, SD_JOURNAL_LOCAL_ONLY) >= 0); + + assert_se(sd_journal_add_match(j, "_TRANSPORT=syslog", 0) >= 0); + assert_se(sd_journal_add_match(j, "_UID=0", 0) >= 0); + + SD_JOURNAL_FOREACH_BACKWARDS(j) { + const void *d; + size_t l; + + assert_se(sd_journal_get_data(j, "MESSAGE", &d, &l) >= 0); + + printf("%.*s\n", (int) l, (char*) d); + + n++; + if (n >= 10) + break; + } + + return 0; +} diff --git a/src/libsystemd/sd-journal/test-journal-file.c b/src/libsystemd/sd-journal/test-journal-file.c new file mode 100644 index 0000000..729de1f --- /dev/null +++ b/src/libsystemd/sd-journal/test-journal-file.c @@ -0,0 +1,45 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <errno.h> +#include <stdlib.h> +#include <unistd.h> + +#include "journal-file.h" +#include "tests.h" +#include "user-util.h" + +static void test_journal_file_parse_uid_from_filename_simple( + const char *path, + uid_t expected_uid, + int expected_error) { + + uid_t uid = UID_INVALID; + int r; + + log_info("testing %s", path); + + r = journal_file_parse_uid_from_filename(path, &uid); + assert_se(r == expected_error); + if (r < 0) + assert_se(uid == UID_INVALID); + else + assert_se(uid == expected_uid); +} + +TEST(journal_file_parse_uid_from_filename) { + + test_journal_file_parse_uid_from_filename_simple("/var/log/journal/", 0, -EISDIR); + + /* The helper should return -EREMOTE for any filenames that don't look like an online or offline user + * journals. This includes archived and disposed journal files. */ + test_journal_file_parse_uid_from_filename_simple("/etc/password", 0, -EREMOTE); + test_journal_file_parse_uid_from_filename_simple("system.journal", 0, -EREMOTE); + test_journal_file_parse_uid_from_filename_simple("user-1000@0005d26980bdce6e-2f2a4939583822ef.journal~", 0, -EREMOTE); + test_journal_file_parse_uid_from_filename_simple("user-1000@xxx-yyy-zzz.journal", 0, -EREMOTE); + + test_journal_file_parse_uid_from_filename_simple("user-1000.journal", 1000, 0); + test_journal_file_parse_uid_from_filename_simple("user-foo.journal", 0, -EINVAL); + test_journal_file_parse_uid_from_filename_simple("user-65535.journal", 0, -ENXIO); +} + +DEFINE_TEST_MAIN(LOG_INFO); diff --git a/src/libsystemd/sd-journal/test-journal-flush.c b/src/libsystemd/sd-journal/test-journal-flush.c new file mode 100644 index 0000000..3f07835 --- /dev/null +++ b/src/libsystemd/sd-journal/test-journal-flush.c @@ -0,0 +1,118 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <fcntl.h> +#include <unistd.h> + +#include "sd-journal.h" + +#include "alloc-util.h" +#include "chattr-util.h" +#include "journal-file-util.h" +#include "journal-internal.h" +#include "logs-show.h" +#include "macro.h" +#include "path-util.h" +#include "rm-rf.h" +#include "string-util.h" +#include "tests.h" +#include "tmpfile-util.h" + +static void test_journal_flush_one(int argc, char *argv[]) { + _cleanup_(mmap_cache_unrefp) MMapCache *m = NULL; + _cleanup_free_ char *fn = NULL; + _cleanup_(rm_rf_physical_and_freep) char *dn = NULL; + _cleanup_(journal_file_offline_closep) JournalFile *new_journal = NULL; + _cleanup_(sd_journal_closep) sd_journal *j = NULL; + unsigned n, limit; + int r; + + assert_se(m = mmap_cache_new()); + assert_se(mkdtemp_malloc("/var/tmp/test-journal-flush.XXXXXX", &dn) >= 0); + (void) chattr_path(dn, FS_NOCOW_FL, FS_NOCOW_FL, NULL); + + assert_se(fn = path_join(dn, "test.journal")); + + r = journal_file_open(-1, fn, O_CREAT|O_RDWR, 0, 0644, 0, NULL, m, NULL, &new_journal); + assert_se(r >= 0); + + if (argc > 1) + r = sd_journal_open_files(&j, (const char **) strv_skip(argv, 1), 0); + else + r = sd_journal_open(&j, 0); + assert_se(r == 0); + + sd_journal_set_data_threshold(j, 0); + + n = 0; + limit = slow_tests_enabled() ? 10000 : 1000; + SD_JOURNAL_FOREACH(j) { + Object *o; + JournalFile *f; + + f = j->current_file; + assert_se(f && f->current_offset > 0); + + r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o); + if (r < 0) + log_error_errno(r, "journal_file_move_to_object failed: %m"); + assert_se(r >= 0); + + r = journal_file_copy_entry(f, new_journal, o, f->current_offset, NULL, NULL); + if (r < 0) + log_warning_errno(r, "journal_file_copy_entry failed: %m"); + assert_se(r >= 0 || + IN_SET(r, -EBADMSG, /* corrupted file */ + -EPROTONOSUPPORT, /* unsupported compression */ + -EIO, /* file rotated */ + -EREMCHG)); /* clock rollback */ + + if (++n >= limit) + break; + } + + if (n == 0) + return (void) log_tests_skipped("No journal entry found"); + + /* Open the new journal before archiving and offlining the file. */ + sd_journal_close(j); + assert_se(sd_journal_open_directory(&j, dn, 0) >= 0); + + /* Read the online journal. */ + assert_se(sd_journal_seek_tail(j) >= 0); + assert_se(sd_journal_step_one(j, 0) > 0); + printf("current_journal: %s (%i)\n", j->current_file->path, j->current_file->fd); + assert_se(show_journal_entry(stdout, j, OUTPUT_EXPORT, 0, 0, NULL, NULL, NULL, &(dual_timestamp) {}, &(sd_id128_t) {}) >= 0); + + uint64_t p; + assert_se(journal_file_tail_end_by_mmap(j->current_file, &p) >= 0); + for (uint64_t q = ALIGN64(p + 1); q < (uint64_t) j->current_file->last_stat.st_size; q = ALIGN64(q + 1)) { + Object *o; + + r = journal_file_move_to_object(j->current_file, OBJECT_UNUSED, q, &o); + assert_se(IN_SET(r, -EBADMSG, -EADDRNOTAVAIL)); + } + + /* Archive and offline file. */ + assert_se(journal_file_archive(new_journal, NULL) >= 0); + assert_se(journal_file_set_offline(new_journal, /* wait = */ true) >= 0); + + /* Read the archived and offline journal. */ + for (uint64_t q = ALIGN64(p + 1); q < (uint64_t) j->current_file->last_stat.st_size; q = ALIGN64(q + 1)) { + Object *o; + + r = journal_file_move_to_object(j->current_file, OBJECT_UNUSED, q, &o); + assert_se(IN_SET(r, -EBADMSG, -EADDRNOTAVAIL, -EIDRM)); + } +} + +TEST(journal_flush) { + assert_se(setenv("SYSTEMD_JOURNAL_COMPACT", "0", 1) >= 0); + test_journal_flush_one(saved_argc, saved_argv); +} + +TEST(journal_flush_compact) { + assert_se(setenv("SYSTEMD_JOURNAL_COMPACT", "1", 1) >= 0); + test_journal_flush_one(saved_argc, saved_argv); +} + +DEFINE_TEST_MAIN(LOG_INFO); diff --git a/src/libsystemd/sd-journal/test-journal-init.c b/src/libsystemd/sd-journal/test-journal-init.c new file mode 100644 index 0000000..c8a1977 --- /dev/null +++ b/src/libsystemd/sd-journal/test-journal-init.c @@ -0,0 +1,68 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <unistd.h> + +#include "sd-journal.h" + +#include "chattr-util.h" +#include "journal-internal.h" +#include "log.h" +#include "parse-util.h" +#include "process-util.h" +#include "rm-rf.h" +#include "tests.h" + +int main(int argc, char *argv[]) { + sd_journal *j; + int r, i, I = 100; + char t[] = "/var/tmp/journal-stream-XXXXXX"; + + test_setup_logging(LOG_DEBUG); + + if (argc >= 2) { + r = safe_atoi(argv[1], &I); + if (r < 0) + log_info("Could not parse loop count argument. Using default."); + } + + log_info("Running %d loops", I); + + assert_se(mkdtemp(t)); + (void) chattr_path(t, FS_NOCOW_FL, FS_NOCOW_FL, NULL); + + for (i = 0; i < I; i++) { + r = sd_journal_open(&j, SD_JOURNAL_LOCAL_ONLY); + assert_se(r == 0); + + sd_journal_close(j); + + r = sd_journal_open_directory(&j, t, 0); + assert_se(r == 0); + + assert_se(sd_journal_seek_head(j) == 0); + assert_se(j->current_location.type == LOCATION_HEAD); + + r = safe_fork("(journal-fork-test)", FORK_WAIT|FORK_LOG, NULL); + if (r == 0) { + assert_se(j); + assert_se(sd_journal_get_realtime_usec(j, NULL) == -ECHILD); + assert_se(sd_journal_seek_tail(j) == -ECHILD); + assert_se(j->current_location.type == LOCATION_HEAD); + sd_journal_close(j); + _exit(EXIT_SUCCESS); + } + + assert_se(r >= 0); + + sd_journal_close(j); + + j = NULL; + r = sd_journal_open_directory(&j, t, SD_JOURNAL_LOCAL_ONLY); + assert_se(r == -EINVAL); + assert_se(j == NULL); + } + + assert_se(rm_rf(t, REMOVE_ROOT|REMOVE_PHYSICAL) >= 0); + + return 0; +} diff --git a/src/libsystemd/sd-journal/test-journal-interleaving.c b/src/libsystemd/sd-journal/test-journal-interleaving.c new file mode 100644 index 0000000..8aeef8f --- /dev/null +++ b/src/libsystemd/sd-journal/test-journal-interleaving.c @@ -0,0 +1,737 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <fcntl.h> +#include <unistd.h> + +#include "sd-id128.h" +#include "sd-journal.h" + +#include "alloc-util.h" +#include "chattr-util.h" +#include "iovec-util.h" +#include "journal-file-util.h" +#include "journal-vacuum.h" +#include "log.h" +#include "logs-show.h" +#include "parse-util.h" +#include "random-util.h" +#include "rm-rf.h" +#include "tests.h" + +/* This program tests skipping around in a multi-file journal. */ + +static bool arg_keep = false; +static dual_timestamp previous_ts = {}; + +_noreturn_ static void log_assert_errno(const char *text, int error, const char *file, unsigned line, const char *func) { + log_internal(LOG_CRIT, error, file, line, func, + "'%s' failed at %s:%u (%s): %m", text, file, line, func); + abort(); +} + +#define assert_ret(expr) \ + do { \ + int _r_ = (expr); \ + if (_unlikely_(_r_ < 0)) \ + log_assert_errno(#expr, -_r_, PROJECT_FILE, __LINE__, __func__); \ + } while (false) + +static JournalFile *test_open_internal(const char *name, JournalFileFlags flags) { + _cleanup_(mmap_cache_unrefp) MMapCache *m = NULL; + JournalFile *f; + + m = mmap_cache_new(); + assert_se(m != NULL); + + assert_ret(journal_file_open(-1, name, O_RDWR|O_CREAT, flags, 0644, UINT64_MAX, NULL, m, NULL, &f)); + return f; +} + +static JournalFile *test_open(const char *name) { + return test_open_internal(name, JOURNAL_COMPRESS); +} + +static JournalFile *test_open_strict(const char *name) { + return test_open_internal(name, JOURNAL_COMPRESS | JOURNAL_STRICT_ORDER); +} + +static void test_close(JournalFile *f) { + (void) journal_file_offline_close(f); +} + +static void test_done(const char *t) { + log_info("Done..."); + + if (arg_keep) + log_info("Not removing %s", t); + else { + journal_directory_vacuum(".", 3000000, 0, 0, NULL, true); + + assert_se(rm_rf(t, REMOVE_ROOT|REMOVE_PHYSICAL) >= 0); + } + + log_info("------------------------------------------------------------"); +} + +static void append_number(JournalFile *f, int n, const sd_id128_t *boot_id, uint64_t *seqnum, uint64_t *ret_offset) { + _cleanup_free_ char *p = NULL, *q = NULL; + dual_timestamp ts; + struct iovec iovec[2]; + size_t n_iov = 0; + + dual_timestamp_now(&ts); + + if (ts.monotonic <= previous_ts.monotonic) + ts.monotonic = previous_ts.monotonic + 1; + + if (ts.realtime <= previous_ts.realtime) + ts.realtime = previous_ts.realtime + 1; + + previous_ts = ts; + + assert_se(asprintf(&p, "NUMBER=%d", n) >= 0); + iovec[n_iov++] = IOVEC_MAKE_STRING(p); + + if (boot_id) { + assert_se(q = strjoin("_BOOT_ID=", SD_ID128_TO_STRING(*boot_id))); + iovec[n_iov++] = IOVEC_MAKE_STRING(q); + } + + assert_ret(journal_file_append_entry(f, &ts, boot_id, iovec, n_iov, seqnum, NULL, NULL, ret_offset)); +} + +static void append_unreferenced_data(JournalFile *f, const sd_id128_t *boot_id) { + _cleanup_free_ char *q = NULL; + dual_timestamp ts; + struct iovec iovec; + + assert(boot_id); + + ts.monotonic = usec_sub_unsigned(previous_ts.monotonic, 10); + ts.realtime = usec_sub_unsigned(previous_ts.realtime, 10); + + assert_se(q = strjoin("_BOOT_ID=", SD_ID128_TO_STRING(*boot_id))); + iovec = IOVEC_MAKE_STRING(q); + + assert_se(journal_file_append_entry(f, &ts, boot_id, &iovec, 1, NULL, NULL, NULL, NULL) == -EREMCHG); +} + +static void test_check_number(sd_journal *j, int n) { + sd_id128_t boot_id; + const void *d; + _cleanup_free_ char *k = NULL; + size_t l; + int x; + + assert_se(sd_journal_get_monotonic_usec(j, NULL, &boot_id) >= 0); + assert_ret(sd_journal_get_data(j, "NUMBER", &d, &l)); + assert_se(k = strndup(d, l)); + printf("%s %s (expected=%i)\n", SD_ID128_TO_STRING(boot_id), k, n); + + assert_se(safe_atoi(k + STRLEN("NUMBER="), &x) >= 0); + assert_se(n == x); +} + +static void test_check_numbers_down(sd_journal *j, int count) { + int i; + + for (i = 1; i <= count; i++) { + int r; + test_check_number(j, i); + assert_ret(r = sd_journal_next(j)); + if (i == count) + assert_se(r == 0); + else + assert_se(r == 1); + } + +} + +static void test_check_numbers_up(sd_journal *j, int count) { + for (int i = count; i >= 1; i--) { + int r; + test_check_number(j, i); + assert_ret(r = sd_journal_previous(j)); + if (i == 1) + assert_se(r == 0); + else + assert_se(r == 1); + } + +} + +static void setup_sequential(void) { + JournalFile *f1, *f2, *f3; + sd_id128_t id; + + f1 = test_open("one.journal"); + f2 = test_open("two.journal"); + f3 = test_open("three.journal"); + assert_se(sd_id128_randomize(&id) >= 0); + log_info("boot_id: %s", SD_ID128_TO_STRING(id)); + append_number(f1, 1, &id, NULL, NULL); + append_number(f1, 2, &id, NULL, NULL); + append_number(f1, 3, &id, NULL, NULL); + append_number(f2, 4, &id, NULL, NULL); + assert_se(sd_id128_randomize(&id) >= 0); + log_info("boot_id: %s", SD_ID128_TO_STRING(id)); + append_number(f2, 5, &id, NULL, NULL); + append_number(f2, 6, &id, NULL, NULL); + append_number(f3, 7, &id, NULL, NULL); + append_number(f3, 8, &id, NULL, NULL); + assert_se(sd_id128_randomize(&id) >= 0); + log_info("boot_id: %s", SD_ID128_TO_STRING(id)); + append_number(f3, 9, &id, NULL, NULL); + test_close(f1); + test_close(f2); + test_close(f3); +} + +static void setup_interleaved(void) { + JournalFile *f1, *f2, *f3; + sd_id128_t id; + + f1 = test_open("one.journal"); + f2 = test_open("two.journal"); + f3 = test_open("three.journal"); + assert_se(sd_id128_randomize(&id) >= 0); + log_info("boot_id: %s", SD_ID128_TO_STRING(id)); + append_number(f1, 1, &id, NULL, NULL); + append_number(f2, 2, &id, NULL, NULL); + append_number(f3, 3, &id, NULL, NULL); + append_number(f1, 4, &id, NULL, NULL); + append_number(f2, 5, &id, NULL, NULL); + append_number(f3, 6, &id, NULL, NULL); + append_number(f1, 7, &id, NULL, NULL); + append_number(f2, 8, &id, NULL, NULL); + append_number(f3, 9, &id, NULL, NULL); + test_close(f1); + test_close(f2); + test_close(f3); +} + +static void setup_unreferenced_data(void) { + JournalFile *f1, *f2, *f3; + sd_id128_t id; + + /* For issue #29275. */ + + f1 = test_open_strict("one.journal"); + f2 = test_open_strict("two.journal"); + f3 = test_open_strict("three.journal"); + assert_se(sd_id128_randomize(&id) >= 0); + log_info("boot_id: %s", SD_ID128_TO_STRING(id)); + append_number(f1, 1, &id, NULL, NULL); + append_number(f1, 2, &id, NULL, NULL); + append_number(f1, 3, &id, NULL, NULL); + assert_se(sd_id128_randomize(&id) >= 0); + log_info("boot_id: %s", SD_ID128_TO_STRING(id)); + append_unreferenced_data(f1, &id); + append_number(f2, 4, &id, NULL, NULL); + append_number(f2, 5, &id, NULL, NULL); + append_number(f2, 6, &id, NULL, NULL); + assert_se(sd_id128_randomize(&id) >= 0); + log_info("boot_id: %s", SD_ID128_TO_STRING(id)); + append_unreferenced_data(f2, &id); + append_number(f3, 7, &id, NULL, NULL); + append_number(f3, 8, &id, NULL, NULL); + append_number(f3, 9, &id, NULL, NULL); + test_close(f1); + test_close(f2); + test_close(f3); +} + +static void mkdtemp_chdir_chattr(char *path) { + assert_se(mkdtemp(path)); + assert_se(chdir(path) >= 0); + + /* Speed up things a bit on btrfs, ensuring that CoW is turned off for all files created in our + * directory during the test run */ + (void) chattr_path(path, FS_NOCOW_FL, FS_NOCOW_FL, NULL); +} + +static void test_skip_one(void (*setup)(void)) { + char t[] = "/var/tmp/journal-skip-XXXXXX"; + sd_journal *j; + int r; + + mkdtemp_chdir_chattr(t); + + setup(); + + /* Seek to head, iterate down. */ + assert_ret(sd_journal_open_directory(&j, t, 0)); + assert_ret(sd_journal_seek_head(j)); + assert_se(sd_journal_next(j) == 1); /* pointing to the first entry */ + test_check_numbers_down(j, 9); + sd_journal_close(j); + + /* Seek to head, iterate down. */ + assert_ret(sd_journal_open_directory(&j, t, 0)); + assert_ret(sd_journal_seek_head(j)); + assert_se(sd_journal_next(j) == 1); /* pointing to the first entry */ + assert_se(sd_journal_previous(j) == 0); /* no-op */ + test_check_numbers_down(j, 9); + sd_journal_close(j); + + /* Seek to head twice, iterate down. */ + assert_ret(sd_journal_open_directory(&j, t, 0)); + assert_ret(sd_journal_seek_head(j)); + assert_se(sd_journal_next(j) == 1); /* pointing to the first entry */ + assert_ret(sd_journal_seek_head(j)); + assert_se(sd_journal_next(j) == 1); /* pointing to the first entry */ + test_check_numbers_down(j, 9); + sd_journal_close(j); + + /* Seek to head, move to previous, then iterate down. */ + assert_ret(sd_journal_open_directory(&j, t, 0)); + assert_ret(sd_journal_seek_head(j)); + assert_se(sd_journal_previous(j) == 0); /* no-op */ + assert_se(sd_journal_next(j) == 1); /* pointing to the first entry */ + test_check_numbers_down(j, 9); + sd_journal_close(j); + + /* Seek to head, walk several steps, then iterate down. */ + assert_ret(sd_journal_open_directory(&j, t, 0)); + assert_ret(sd_journal_seek_head(j)); + assert_se(sd_journal_previous(j) == 0); /* no-op */ + assert_se(sd_journal_previous(j) == 0); /* no-op */ + assert_se(sd_journal_previous(j) == 0); /* no-op */ + assert_se(sd_journal_next(j) == 1); /* pointing to the first entry */ + assert_se(sd_journal_previous(j) == 0); /* no-op */ + assert_se(sd_journal_previous(j) == 0); /* no-op */ + test_check_numbers_down(j, 9); + sd_journal_close(j); + + /* Seek to tail, iterate up. */ + assert_ret(sd_journal_open_directory(&j, t, 0)); + assert_ret(sd_journal_seek_tail(j)); + assert_se(sd_journal_previous(j) == 1); /* pointing to the last entry */ + test_check_numbers_up(j, 9); + sd_journal_close(j); + + /* Seek to tail twice, iterate up. */ + assert_ret(sd_journal_open_directory(&j, t, 0)); + assert_ret(sd_journal_seek_tail(j)); + assert_se(sd_journal_previous(j) == 1); /* pointing to the last entry */ + assert_ret(sd_journal_seek_tail(j)); + assert_se(sd_journal_previous(j) == 1); /* pointing to the last entry */ + test_check_numbers_up(j, 9); + sd_journal_close(j); + + /* Seek to tail, move to next, then iterate up. */ + assert_ret(sd_journal_open_directory(&j, t, 0)); + assert_ret(sd_journal_seek_tail(j)); + assert_se(sd_journal_next(j) == 0); /* no-op */ + assert_se(sd_journal_previous(j) == 1); /* pointing to the last entry */ + test_check_numbers_up(j, 9); + sd_journal_close(j); + + /* Seek to tail, walk several steps, then iterate up. */ + assert_ret(sd_journal_open_directory(&j, t, 0)); + assert_ret(sd_journal_seek_tail(j)); + assert_se(sd_journal_next(j) == 0); /* no-op */ + assert_se(sd_journal_next(j) == 0); /* no-op */ + assert_se(sd_journal_next(j) == 0); /* no-op */ + assert_se(sd_journal_previous(j) == 1); /* pointing to the last entry. */ + assert_se(sd_journal_next(j) == 0); /* no-op */ + assert_se(sd_journal_next(j) == 0); /* no-op */ + test_check_numbers_up(j, 9); + sd_journal_close(j); + + /* Seek to tail, skip to head, iterate down. */ + assert_ret(sd_journal_open_directory(&j, t, 0)); + assert_ret(sd_journal_seek_tail(j)); + assert_se(sd_journal_previous_skip(j, 9) == 9); /* pointing to the first entry. */ + test_check_numbers_down(j, 9); + sd_journal_close(j); + + /* Seek to tail, skip to head in a more complex way, then iterate down. */ + assert_ret(sd_journal_open_directory(&j, t, 0)); + assert_ret(sd_journal_seek_tail(j)); + assert_se(sd_journal_next(j) == 0); + assert_se(sd_journal_previous_skip(j, 4) == 4); + assert_se(sd_journal_previous_skip(j, 5) == 5); + assert_se(sd_journal_previous(j) == 0); + assert_se(sd_journal_previous_skip(j, 5) == 0); + assert_se(sd_journal_next(j) == 1); + assert_se(sd_journal_previous_skip(j, 5) == 1); + assert_se(sd_journal_next(j) == 1); + assert_se(sd_journal_next(j) == 1); + assert_se(sd_journal_previous(j) == 1); + assert_se(sd_journal_next(j) == 1); + assert_se(sd_journal_next(j) == 1); + assert_se(sd_journal_previous_skip(j, 5) == 3); + test_check_numbers_down(j, 9); + sd_journal_close(j); + + /* Seek to head, skip to tail, iterate up. */ + assert_ret(sd_journal_open_directory(&j, t, 0)); + assert_ret(sd_journal_seek_head(j)); + assert_se(sd_journal_next_skip(j, 9) == 9); + test_check_numbers_up(j, 9); + sd_journal_close(j); + + /* Seek to head, skip to tail in a more complex way, then iterate up. */ + assert_ret(sd_journal_open_directory(&j, t, 0)); + assert_ret(sd_journal_seek_head(j)); + assert_se(sd_journal_previous(j) == 0); + assert_se(sd_journal_next_skip(j, 4) == 4); + assert_se(sd_journal_next_skip(j, 5) == 5); + assert_se(sd_journal_next(j) == 0); + assert_se(sd_journal_next_skip(j, 5) == 0); + assert_se(sd_journal_previous(j) == 1); + assert_se(sd_journal_next_skip(j, 5) == 1); + assert_se(sd_journal_previous(j) == 1); + assert_se(sd_journal_previous(j) == 1); + assert_se(sd_journal_next(j) == 1); + assert_se(sd_journal_previous(j) == 1); + assert_se(sd_journal_previous(j) == 1); + assert_se(r = sd_journal_next_skip(j, 5) == 3); + test_check_numbers_up(j, 9); + sd_journal_close(j); + + test_done(t); +} + +TEST(skip) { + test_skip_one(setup_sequential); + test_skip_one(setup_interleaved); +} + +static void test_boot_id_one(void (*setup)(void), size_t n_boots_expected) { + char t[] = "/var/tmp/journal-boot-id-XXXXXX"; + sd_journal *j; + _cleanup_free_ BootId *boots = NULL; + size_t n_boots; + + mkdtemp_chdir_chattr(t); + + setup(); + + assert_ret(sd_journal_open_directory(&j, t, 0)); + assert_se(journal_get_boots(j, &boots, &n_boots) >= 0); + assert_se(boots); + assert_se(n_boots == n_boots_expected); + sd_journal_close(j); + + FOREACH_ARRAY(b, boots, n_boots) { + assert_ret(sd_journal_open_directory(&j, t, 0)); + assert_se(journal_find_boot_by_id(j, b->id) == 1); + sd_journal_close(j); + } + + for (int i = - (int) n_boots + 1; i <= (int) n_boots; i++) { + sd_id128_t id; + + assert_ret(sd_journal_open_directory(&j, t, 0)); + assert_se(journal_find_boot_by_offset(j, i, &id) == 1); + if (i <= 0) + assert_se(sd_id128_equal(id, boots[n_boots + i - 1].id)); + else + assert_se(sd_id128_equal(id, boots[i - 1].id)); + sd_journal_close(j); + } + + test_done(t); +} + +TEST(boot_id) { + test_boot_id_one(setup_sequential, 3); + test_boot_id_one(setup_unreferenced_data, 3); +} + +static void test_sequence_numbers_one(void) { + _cleanup_(mmap_cache_unrefp) MMapCache *m = NULL; + char t[] = "/var/tmp/journal-seq-XXXXXX"; + JournalFile *one, *two; + uint64_t seqnum = 0; + sd_id128_t seqnum_id; + + m = mmap_cache_new(); + assert_se(m != NULL); + + mkdtemp_chdir_chattr(t); + + assert_se(journal_file_open(-1, "one.journal", O_RDWR|O_CREAT, JOURNAL_COMPRESS, 0644, + UINT64_MAX, NULL, m, NULL, &one) == 0); + + append_number(one, 1, NULL, &seqnum, NULL); + printf("seqnum=%"PRIu64"\n", seqnum); + assert_se(seqnum == 1); + append_number(one, 2, NULL, &seqnum, NULL); + printf("seqnum=%"PRIu64"\n", seqnum); + assert_se(seqnum == 2); + + assert_se(one->header->state == STATE_ONLINE); + assert_se(!sd_id128_equal(one->header->file_id, one->header->machine_id)); + assert_se(!sd_id128_equal(one->header->file_id, one->header->tail_entry_boot_id)); + assert_se(sd_id128_equal(one->header->file_id, one->header->seqnum_id)); + + memcpy(&seqnum_id, &one->header->seqnum_id, sizeof(sd_id128_t)); + + assert_se(journal_file_open(-1, "two.journal", O_RDWR|O_CREAT, JOURNAL_COMPRESS, 0644, + UINT64_MAX, NULL, m, one, &two) == 0); + + assert_se(two->header->state == STATE_ONLINE); + assert_se(!sd_id128_equal(two->header->file_id, one->header->file_id)); + assert_se(sd_id128_equal(two->header->machine_id, one->header->machine_id)); + assert_se(sd_id128_is_null(two->header->tail_entry_boot_id)); /* Not written yet. */ + assert_se(sd_id128_equal(two->header->seqnum_id, one->header->seqnum_id)); + + append_number(two, 3, NULL, &seqnum, NULL); + printf("seqnum=%"PRIu64"\n", seqnum); + assert_se(seqnum == 3); + append_number(two, 4, NULL, &seqnum, NULL); + printf("seqnum=%"PRIu64"\n", seqnum); + assert_se(seqnum == 4); + + /* Verify tail_entry_boot_id. */ + assert_se(sd_id128_equal(two->header->tail_entry_boot_id, one->header->tail_entry_boot_id)); + + test_close(two); + + append_number(one, 5, NULL, &seqnum, NULL); + printf("seqnum=%"PRIu64"\n", seqnum); + assert_se(seqnum == 5); + + append_number(one, 6, NULL, &seqnum, NULL); + printf("seqnum=%"PRIu64"\n", seqnum); + assert_se(seqnum == 6); + + test_close(one); + + /* If the machine-id is not initialized, the header file verification + * (which happens when re-opening a journal file) will fail. */ + if (sd_id128_get_machine(NULL) >= 0) { + /* restart server */ + seqnum = 0; + + assert_se(journal_file_open(-1, "two.journal", O_RDWR, JOURNAL_COMPRESS, 0, + UINT64_MAX, NULL, m, NULL, &two) == 0); + + assert_se(sd_id128_equal(two->header->seqnum_id, seqnum_id)); + + append_number(two, 7, NULL, &seqnum, NULL); + printf("seqnum=%"PRIu64"\n", seqnum); + assert_se(seqnum == 5); + + /* So..., here we have the same seqnum in two files with the + * same seqnum_id. */ + + test_close(two); + } + + test_done(t); +} + +TEST(sequence_numbers) { + assert_se(setenv("SYSTEMD_JOURNAL_COMPACT", "0", 1) >= 0); + test_sequence_numbers_one(); + + assert_se(setenv("SYSTEMD_JOURNAL_COMPACT", "1", 1) >= 0); + test_sequence_numbers_one(); +} + +static int expected_result(uint64_t needle, const uint64_t *candidates, const uint64_t *offset, size_t n, direction_t direction, uint64_t *ret) { + switch (direction) { + case DIRECTION_DOWN: + for (size_t i = 0; i < n; i++) { + if (candidates[i] == 0) { + *ret = 0; + return 0; + } + if (needle <= candidates[i]) { + *ret = offset[i]; + return 1; + } + } + *ret = 0; + return 0; + + case DIRECTION_UP: + for (size_t i = 0; i < n; i++) + if (needle < candidates[i] || candidates[i] == 0) { + if (i == 0) { + *ret = 0; + return 0; + } + *ret = offset[i - 1]; + return 1; + } + *ret = offset[n - 1]; + return 1; + + default: + assert_not_reached(); + } +} + +static void verify(JournalFile *f, const uint64_t *seqnum, const uint64_t *offset, size_t n) { + uint64_t p, q; + int r, e; + + /* by seqnum (sequential) */ + for (uint64_t i = 0; i < n + 2; i++) { + p = 0; + r = journal_file_move_to_entry_by_seqnum(f, i, DIRECTION_DOWN, NULL, &p); + e = expected_result(i, seqnum, offset, n, DIRECTION_DOWN, &q); + assert_se(r == e); + assert_se(p == q); + + p = 0; + r = journal_file_move_to_entry_by_seqnum(f, i, DIRECTION_UP, NULL, &p); + e = expected_result(i, seqnum, offset, n, DIRECTION_UP, &q); + assert_se(r == e); + assert_se(p == q); + } + + /* by seqnum (random) */ + for (size_t trial = 0; trial < 3 * n; trial++) { + uint64_t i = random_u64_range(n + 2); + + p = 0; + r = journal_file_move_to_entry_by_seqnum(f, i, DIRECTION_DOWN, NULL, &p); + e = expected_result(i, seqnum, offset, n, DIRECTION_DOWN, &q); + assert_se(r == e); + assert_se(p == q); + } + for (size_t trial = 0; trial < 3 * n; trial++) { + uint64_t i = random_u64_range(n + 2); + + p = 0; + r = journal_file_move_to_entry_by_seqnum(f, i, DIRECTION_UP, NULL, &p); + e = expected_result(i, seqnum, offset, n, DIRECTION_UP, &q); + assert_se(r == e); + assert_se(p == q); + } + + /* by offset (sequential) */ + for (size_t i = 0; i < n; i++) { + p = 0; + r = journal_file_move_to_entry_by_offset(f, offset[i] - 1, DIRECTION_DOWN, NULL, &p); + e = expected_result(offset[i] - 1, offset, offset, n, DIRECTION_DOWN, &q); + assert_se(r == e); + assert_se(p == q); + + p = 0; + r = journal_file_move_to_entry_by_offset(f, offset[i], DIRECTION_DOWN, NULL, &p); + e = expected_result(offset[i], offset, offset, n, DIRECTION_DOWN, &q); + assert_se(r == e); + assert_se(p == q); + + p = 0; + r = journal_file_move_to_entry_by_offset(f, offset[i] + 1, DIRECTION_DOWN, NULL, &p); + e = expected_result(offset[i] + 1, offset, offset, n, DIRECTION_DOWN, &q); + assert_se(r == e); + assert_se(p == q); + + p = 0; + r = journal_file_move_to_entry_by_offset(f, offset[i] - 1, DIRECTION_UP, NULL, &p); + e = expected_result(offset[i] - 1, offset, offset, n, DIRECTION_UP, &q); + assert_se(r == e); + assert_se(p == q); + + p = 0; + r = journal_file_move_to_entry_by_offset(f, offset[i], DIRECTION_UP, NULL, &p); + e = expected_result(offset[i], offset, offset, n, DIRECTION_UP, &q); + assert_se(r == e); + assert_se(p == q); + + p = 0; + r = journal_file_move_to_entry_by_offset(f, offset[i] + 1, DIRECTION_UP, NULL, &p); + e = expected_result(offset[i] + 1, offset, offset, n, DIRECTION_UP, &q); + assert_se(r == e); + assert_se(p == q); + } + + /* by offset (random) */ + for (size_t trial = 0; trial < 3 * n; trial++) { + uint64_t i = offset[0] - 1 + random_u64_range(offset[n-1] - offset[0] + 2); + + p = 0; + r = journal_file_move_to_entry_by_offset(f, i, DIRECTION_DOWN, NULL, &p); + e = expected_result(i, offset, offset, n, DIRECTION_DOWN, &q); + assert_se(r == e); + assert_se(p == q); + } + for (size_t trial = 0; trial < 3 * n; trial++) { + uint64_t i = offset[0] - 1 + random_u64_range(offset[n-1] - offset[0] + 2); + + p = 0; + r = journal_file_move_to_entry_by_offset(f, i, DIRECTION_UP, NULL, &p); + e = expected_result(i, offset, offset, n, DIRECTION_UP, &q); + assert_se(r == e); + assert_se(p == q); + } +} + +static void test_generic_array_bisect_one(size_t n, size_t num_corrupted) { + _cleanup_(mmap_cache_unrefp) MMapCache *m = NULL; + char t[] = "/var/tmp/journal-seq-XXXXXX"; + _cleanup_free_ uint64_t *seqnum = NULL, *offset = NULL; + JournalFile *f; + + log_info("/* %s(%zu, %zu) */", __func__, n, num_corrupted); + + assert_se(m = mmap_cache_new()); + + mkdtemp_chdir_chattr(t); + + assert_se(journal_file_open(-1, "test.journal", O_RDWR|O_CREAT, JOURNAL_COMPRESS, 0644, + UINT64_MAX, NULL, m, NULL, &f) == 0); + + assert_se(seqnum = new0(uint64_t, n)); + assert_se(offset = new0(uint64_t, n)); + + for (size_t i = 0; i < n; i++) { + append_number(f, i, NULL, seqnum + i, offset + i); + if (i == 0) { + assert_se(seqnum[i] > 0); + assert_se(offset[i] > 0); + } else { + assert_se(seqnum[i] > seqnum[i-1]); + assert_se(offset[i] > offset[i-1]); + } + } + + verify(f, seqnum, offset, n); + + /* Reset chain cache. */ + assert_se(journal_file_move_to_entry_by_offset(f, offset[0], DIRECTION_DOWN, NULL, NULL) > 0); + + /* make journal corrupted by clearing seqnum. */ + for (size_t i = n - num_corrupted; i < n; i++) { + Object *o; + + assert_se(journal_file_move_to_object(f, OBJECT_ENTRY, offset[i], &o) >= 0); + assert_se(o); + o->entry.seqnum = 0; + seqnum[i] = 0; + } + + verify(f, seqnum, offset, n); + + test_close(f); + test_done(t); +} + +TEST(generic_array_bisect) { + for (size_t n = 1; n < 10; n++) + for (size_t m = 1; m <= n; m++) + test_generic_array_bisect_one(n, m); + + test_generic_array_bisect_one(100, 40); +} + +static int intro(void) { + /* journal_file_open() requires a valid machine id */ + if (access("/etc/machine-id", F_OK) != 0) + return log_tests_skipped("/etc/machine-id not found"); + + arg_keep = saved_argc > 1; + + return EXIT_SUCCESS; +} + +DEFINE_TEST_MAIN_WITH_INTRO(LOG_DEBUG, intro); diff --git a/src/libsystemd/sd-journal/test-journal-match.c b/src/libsystemd/sd-journal/test-journal-match.c new file mode 100644 index 0000000..571a88c --- /dev/null +++ b/src/libsystemd/sd-journal/test-journal-match.c @@ -0,0 +1,61 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <stdio.h> + +#include "sd-journal.h" + +#include "alloc-util.h" +#include "journal-internal.h" +#include "log.h" +#include "string-util.h" +#include "tests.h" + +int main(int argc, char *argv[]) { + _cleanup_(sd_journal_closep) sd_journal *j = NULL; + _cleanup_free_ char *t; + + test_setup_logging(LOG_DEBUG); + + assert_se(sd_journal_open(&j, 0) >= 0); + + assert_se(sd_journal_add_match(j, "foobar", 0) < 0); + assert_se(sd_journal_add_match(j, "foobar=waldo", 0) < 0); + assert_se(sd_journal_add_match(j, "", 0) < 0); + assert_se(sd_journal_add_match(j, "=", 0) < 0); + assert_se(sd_journal_add_match(j, "=xxxxx", 0) < 0); + assert_se(sd_journal_add_match(j, (uint8_t[4]){'A', '=', '\1', '\2'}, 4) >= 0); + assert_se(sd_journal_add_match(j, (uint8_t[5]){'B', '=', 'C', '\0', 'D'}, 5) >= 0); + assert_se(sd_journal_add_match(j, "HALLO=WALDO", 0) >= 0); + assert_se(sd_journal_add_match(j, "QUUX=mmmm", 0) >= 0); + assert_se(sd_journal_add_match(j, "QUUX=xxxxx", 0) >= 0); + assert_se(sd_journal_add_match(j, "HALLO=", 0) >= 0); + assert_se(sd_journal_add_match(j, "QUUX=xxxxx", 0) >= 0); + assert_se(sd_journal_add_match(j, "QUUX=yyyyy", 0) >= 0); + assert_se(sd_journal_add_match(j, "PIFF=paff", 0) >= 0); + + assert_se(sd_journal_add_disjunction(j) >= 0); + + assert_se(sd_journal_add_match(j, "ONE=one", 0) >= 0); + assert_se(sd_journal_add_match(j, "ONE=two", 0) >= 0); + assert_se(sd_journal_add_match(j, "TWO=two", 0) >= 0); + + assert_se(sd_journal_add_conjunction(j) >= 0); + + assert_se(sd_journal_add_match(j, "L4_1=yes", 0) >= 0); + assert_se(sd_journal_add_match(j, "L4_1=ok", 0) >= 0); + assert_se(sd_journal_add_match(j, "L4_2=yes", 0) >= 0); + assert_se(sd_journal_add_match(j, "L4_2=ok", 0) >= 0); + + assert_se(sd_journal_add_disjunction(j) >= 0); + + assert_se(sd_journal_add_match(j, "L3=yes", 0) >= 0); + assert_se(sd_journal_add_match(j, "L3=ok", 0) >= 0); + + assert_se(t = journal_make_match_string(j)); + + printf("resulting match expression is: %s\n", t); + + assert_se(streq(t, "(((L3=ok OR L3=yes) OR ((L4_2=ok OR L4_2=yes) AND (L4_1=ok OR L4_1=yes))) AND ((TWO=two AND (ONE=two OR ONE=one)) OR (PIFF=paff AND (QUUX=yyyyy OR QUUX=xxxxx OR QUUX=mmmm) AND (HALLO= OR HALLO=WALDO) AND B=C\\000D AND A=\\001\\002)))")); + + return 0; +} diff --git a/src/libsystemd/sd-journal/test-journal-send.c b/src/libsystemd/sd-journal/test-journal-send.c new file mode 100644 index 0000000..ca1fe7c --- /dev/null +++ b/src/libsystemd/sd-journal/test-journal-send.c @@ -0,0 +1,111 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <errno.h> +#include <stdlib.h> +#include <unistd.h> + +#include "sd-journal.h" + +#include "fileio.h" +#include "journal-send.h" +#include "macro.h" +#include "memory-util.h" +#include "tests.h" + +TEST(journal_print) { + assert_se(sd_journal_print(LOG_INFO, "XXX") == 0); + assert_se(sd_journal_print(LOG_INFO, "%s", "YYY") == 0); + assert_se(sd_journal_print(LOG_INFO, "X%4094sY", "ZZZ") == 0); + assert_se(sd_journal_print(LOG_INFO, "X%*sY", (int) LONG_LINE_MAX - 8 - 3, "ZZZ") == 0); + assert_se(sd_journal_print(LOG_INFO, "X%*sY", (int) LONG_LINE_MAX - 8 - 2, "ZZZ") == -ENOBUFS); +} + +TEST(journal_send) { + _cleanup_free_ char *huge = NULL; + +#define HUGE_SIZE (4096*1024) + assert_se(huge = malloc(HUGE_SIZE)); + + /* utf-8 and non-utf-8, message-less and message-ful iovecs */ + struct iovec graph1[] = { + {(char*) "GRAPH=graph", STRLEN("GRAPH=graph")} + }; + struct iovec graph2[] = { + {(char*) "GRAPH=graph\n", STRLEN("GRAPH=graph\n")} + }; + struct iovec message1[] = { + {(char*) "MESSAGE=graph", STRLEN("MESSAGE=graph")} + }; + struct iovec message2[] = { + {(char*) "MESSAGE=graph\n", STRLEN("MESSAGE=graph\n")} + }; + + assert_se(sd_journal_print(LOG_INFO, "piepapo") == 0); + + assert_se(sd_journal_send("MESSAGE=foobar", + "VALUE=%i", 7, + NULL) == 0); + + errno = ENOENT; + assert_se(sd_journal_perror("Foobar") == 0); + + assert_se(sd_journal_perror("") == 0); + + memcpy(huge, "HUGE=", STRLEN("HUGE=")); + memset(&huge[STRLEN("HUGE=")], 'x', HUGE_SIZE - STRLEN("HUGE=") - 1); + huge[HUGE_SIZE - 1] = '\0'; + + assert_se(sd_journal_send("MESSAGE=Huge field attached", + huge, + NULL) == 0); + + assert_se(sd_journal_send("MESSAGE=uiui", + "VALUE=A", + "VALUE=B", + "VALUE=C", + "SINGLETON=1", + "OTHERVALUE=X", + "OTHERVALUE=Y", + "WITH_BINARY=this is a binary value \a", + NULL) == 0); + + syslog(LOG_NOTICE, "Hello World!"); + + assert_se(sd_journal_print(LOG_NOTICE, "Hello World") == 0); + + assert_se(sd_journal_send("MESSAGE=Hello World!", + "MESSAGE_ID=52fb62f99e2c49d89cfbf9d6de5e3555", + "PRIORITY=5", + "HOME=%s", getenv("HOME"), + "TERM=%s", getenv("TERM"), + "PAGE_SIZE=%li", sysconf(_SC_PAGESIZE), + "N_CPUS=%li", sysconf(_SC_NPROCESSORS_ONLN), + NULL) == 0); + + assert_se(sd_journal_sendv(graph1, 1) == 0); + assert_se(sd_journal_sendv(graph2, 1) == 0); + assert_se(sd_journal_sendv(message1, 1) == 0); + assert_se(sd_journal_sendv(message2, 1) == 0); + + /* test without location fields */ +#undef sd_journal_sendv + assert_se(sd_journal_sendv(graph1, 1) == 0); + assert_se(sd_journal_sendv(graph2, 1) == 0); + assert_se(sd_journal_sendv(message1, 1) == 0); + assert_se(sd_journal_sendv(message2, 1) == 0); + + /* The above syslog() opens a fd which is stored in libc, and the valgrind reports the fd is + * leaked when we do not call closelog(). */ + closelog(); +} + +static int outro(void) { + /* Sleep a bit to make it easy for journald to collect metadata. */ + sleep(1); + + close_journal_fd(); + + return EXIT_SUCCESS; +} + +DEFINE_TEST_MAIN_FULL(LOG_INFO, NULL, outro); diff --git a/src/libsystemd/sd-journal/test-journal-stream.c b/src/libsystemd/sd-journal/test-journal-stream.c new file mode 100644 index 0000000..3a370ef --- /dev/null +++ b/src/libsystemd/sd-journal/test-journal-stream.c @@ -0,0 +1,201 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <fcntl.h> +#include <unistd.h> + +#include "sd-journal.h" + +#include "alloc-util.h" +#include "chattr-util.h" +#include "iovec-util.h" +#include "journal-file-util.h" +#include "journal-internal.h" +#include "log.h" +#include "macro.h" +#include "parse-util.h" +#include "rm-rf.h" +#include "tests.h" + +#define N_ENTRIES 200 + +static void verify_contents(sd_journal *j, unsigned skip) { + unsigned i; + + assert_se(j); + + i = 0; + SD_JOURNAL_FOREACH(j) { + const void *d; + char *k, *c; + size_t l; + unsigned u = 0; + + assert_se(sd_journal_get_cursor(j, &k) >= 0); + printf("cursor: %s\n", k); + free(k); + + assert_se(sd_journal_get_data(j, "MAGIC", &d, &l) >= 0); + printf("\t%.*s\n", (int) l, (const char*) d); + + assert_se(sd_journal_get_data(j, "NUMBER", &d, &l) >= 0); + assert_se(k = strndup(d, l)); + printf("\t%s\n", k); + + if (skip > 0) { + assert_se(safe_atou(k + 7, &u) >= 0); + assert_se(i == u); + i += skip; + } + + free(k); + + assert_se(sd_journal_get_cursor(j, &c) >= 0); + assert_se(sd_journal_test_cursor(j, c) > 0); + free(c); + } + + if (skip > 0) + assert_se(i == N_ENTRIES); +} + +static void run_test(void) { + _cleanup_(mmap_cache_unrefp) MMapCache *m = NULL; + JournalFile *one, *two, *three; + char t[] = "/var/tmp/journal-stream-XXXXXX"; + unsigned i; + _cleanup_(sd_journal_closep) sd_journal *j = NULL; + char *z; + const void *data; + size_t l; + dual_timestamp previous_ts = DUAL_TIMESTAMP_NULL; + + m = mmap_cache_new(); + assert_se(m != NULL); + + assert_se(mkdtemp(t)); + assert_se(chdir(t) >= 0); + (void) chattr_path(t, FS_NOCOW_FL, FS_NOCOW_FL, NULL); + + assert_se(journal_file_open(-1, "one.journal", O_RDWR|O_CREAT, JOURNAL_COMPRESS, 0666, UINT64_MAX, NULL, m, NULL, &one) == 0); + assert_se(journal_file_open(-1, "two.journal", O_RDWR|O_CREAT, JOURNAL_COMPRESS, 0666, UINT64_MAX, NULL, m, NULL, &two) == 0); + assert_se(journal_file_open(-1, "three.journal", O_RDWR|O_CREAT, JOURNAL_COMPRESS, 0666, UINT64_MAX, NULL, m, NULL, &three) == 0); + + for (i = 0; i < N_ENTRIES; i++) { + char *p, *q; + dual_timestamp ts; + struct iovec iovec[2]; + + dual_timestamp_now(&ts); + + if (ts.monotonic <= previous_ts.monotonic) + ts.monotonic = previous_ts.monotonic + 1; + + if (ts.realtime <= previous_ts.realtime) + ts.realtime = previous_ts.realtime + 1; + + previous_ts = ts; + + assert_se(asprintf(&p, "NUMBER=%u", i) >= 0); + iovec[0] = IOVEC_MAKE(p, strlen(p)); + + assert_se(asprintf(&q, "MAGIC=%s", i % 5 == 0 ? "quux" : "waldo") >= 0); + + iovec[1] = IOVEC_MAKE(q, strlen(q)); + + if (i % 10 == 0) + assert_se(journal_file_append_entry(three, &ts, NULL, iovec, 2, NULL, NULL, NULL, NULL) == 0); + else { + if (i % 3 == 0) + assert_se(journal_file_append_entry(two, &ts, NULL, iovec, 2, NULL, NULL, NULL, NULL) == 0); + + assert_se(journal_file_append_entry(one, &ts, NULL, iovec, 2, NULL, NULL, NULL, NULL) == 0); + } + + free(p); + free(q); + } + + (void) journal_file_offline_close(one); + (void) journal_file_offline_close(two); + (void) journal_file_offline_close(three); + + assert_se(sd_journal_open_directory(&j, t, 0) >= 0); + + assert_se(sd_journal_add_match(j, "MAGIC=quux", 0) >= 0); + SD_JOURNAL_FOREACH_BACKWARDS(j) { + _cleanup_free_ char *c; + + assert_se(sd_journal_get_data(j, "NUMBER", &data, &l) >= 0); + printf("\t%.*s\n", (int) l, (const char*) data); + + assert_se(sd_journal_get_cursor(j, &c) >= 0); + assert_se(sd_journal_test_cursor(j, c) > 0); + } + + SD_JOURNAL_FOREACH(j) { + _cleanup_free_ char *c; + + assert_se(sd_journal_get_data(j, "NUMBER", &data, &l) >= 0); + printf("\t%.*s\n", (int) l, (const char*) data); + + assert_se(sd_journal_get_cursor(j, &c) >= 0); + assert_se(sd_journal_test_cursor(j, c) > 0); + } + + sd_journal_flush_matches(j); + + verify_contents(j, 1); + + printf("NEXT TEST\n"); + assert_se(sd_journal_add_match(j, "MAGIC=quux", 0) >= 0); + + assert_se(z = journal_make_match_string(j)); + printf("resulting match expression is: %s\n", z); + free(z); + + verify_contents(j, 5); + + printf("NEXT TEST\n"); + sd_journal_flush_matches(j); + assert_se(sd_journal_add_match(j, "MAGIC=waldo", 0) >= 0); + assert_se(sd_journal_add_match(j, "NUMBER=10", 0) >= 0); + assert_se(sd_journal_add_match(j, "NUMBER=11", 0) >= 0); + assert_se(sd_journal_add_match(j, "NUMBER=12", 0) >= 0); + + assert_se(z = journal_make_match_string(j)); + printf("resulting match expression is: %s\n", z); + free(z); + + verify_contents(j, 0); + + assert_se(sd_journal_query_unique(j, "NUMBER") >= 0); + SD_JOURNAL_FOREACH_UNIQUE(j, data, l) + printf("%.*s\n", (int) l, (const char*) data); + + assert_se(rm_rf(t, REMOVE_ROOT|REMOVE_PHYSICAL) >= 0); +} + +int main(int argc, char *argv[]) { + + /* journal_file_open() requires a valid machine id */ + if (access("/etc/machine-id", F_OK) != 0) + return log_tests_skipped("/etc/machine-id not found"); + + test_setup_logging(LOG_DEBUG); + + /* Run this test multiple times with different configurations of features. */ + + assert_se(setenv("SYSTEMD_JOURNAL_KEYED_HASH", "0", 1) >= 0); + run_test(); + + assert_se(setenv("SYSTEMD_JOURNAL_KEYED_HASH", "1", 1) >= 0); + run_test(); + + assert_se(setenv("SYSTEMD_JOURNAL_COMPACT", "0", 1) >= 0); + run_test(); + + assert_se(setenv("SYSTEMD_JOURNAL_COMPACT", "1", 1) >= 0); + run_test(); + + return 0; +} diff --git a/src/libsystemd/sd-journal/test-journal-verify.c b/src/libsystemd/sd-journal/test-journal-verify.c new file mode 100644 index 0000000..edce440 --- /dev/null +++ b/src/libsystemd/sd-journal/test-journal-verify.c @@ -0,0 +1,210 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <fcntl.h> +#include <stdio.h> +#include <unistd.h> + +#include "chattr-util.h" +#include "fd-util.h" +#include "iovec-util.h" +#include "journal-file-util.h" +#include "journal-verify.h" +#include "log.h" +#include "mmap-cache.h" +#include "rm-rf.h" +#include "strv.h" +#include "terminal-util.h" +#include "tests.h" + +#define N_ENTRIES 6000 +#define RANDOM_RANGE 77 + +static void bit_toggle(const char *fn, uint64_t p) { + uint8_t b; + ssize_t r; + int fd; + + fd = open(fn, O_RDWR|O_CLOEXEC); + assert_se(fd >= 0); + + r = pread(fd, &b, 1, p/8); + assert_se(r == 1); + + b ^= 1 << (p % 8); + + r = pwrite(fd, &b, 1, p/8); + assert_se(r == 1); + + safe_close(fd); +} + +static int raw_verify(const char *fn, const char *verification_key) { + _cleanup_(mmap_cache_unrefp) MMapCache *m = NULL; + JournalFile *f; + int r; + + m = mmap_cache_new(); + assert_se(m != NULL); + + r = journal_file_open( + /* fd= */ -1, + fn, + O_RDONLY, + JOURNAL_COMPRESS|(verification_key ? JOURNAL_SEAL : 0), + 0666, + /* compress_threshold_bytes= */ UINT64_MAX, + /* metrics= */ NULL, + m, + /* template= */ NULL, + &f); + if (r < 0) + return r; + + r = journal_file_verify(f, verification_key, NULL, NULL, NULL, false); + (void) journal_file_close(f); + + return r; +} + +static int run_test(const char *verification_key, ssize_t max_iterations) { + _cleanup_(mmap_cache_unrefp) MMapCache *m = NULL; + char t[] = "/var/tmp/journal-XXXXXX"; + struct stat st; + JournalFile *f; + JournalFile *df; + usec_t from = 0, to = 0, total = 0; + uint64_t start, end; + int r; + + m = mmap_cache_new(); + assert_se(m != NULL); + + /* journal_file_open() requires a valid machine id */ + if (sd_id128_get_machine(NULL) < 0) + return log_tests_skipped("No valid machine ID found"); + + test_setup_logging(LOG_DEBUG); + + assert_se(mkdtemp(t)); + assert_se(chdir(t) >= 0); + (void) chattr_path(t, FS_NOCOW_FL, FS_NOCOW_FL, NULL); + + log_info("Generating a test journal"); + + assert_se(journal_file_open( + /* fd= */ -1, + "test.journal", + O_RDWR|O_CREAT, + JOURNAL_COMPRESS|(verification_key ? JOURNAL_SEAL : 0), + 0666, + /* compress_threshold_bytes= */ UINT64_MAX, + /* metrics= */ NULL, + m, + /* template= */ NULL, + &df) == 0); + + for (size_t n = 0; n < N_ENTRIES; n++) { + _cleanup_free_ char *test = NULL; + struct iovec iovec; + struct dual_timestamp ts; + + dual_timestamp_now(&ts); + assert_se(asprintf(&test, "RANDOM=%li", random() % RANDOM_RANGE)); + iovec = IOVEC_MAKE_STRING(test); + assert_se(journal_file_append_entry( + df, + &ts, + /* boot_id= */ NULL, + &iovec, + /* n_iovec= */ 1, + /* seqnum= */ NULL, + /* seqnum_id= */ NULL, + /* ret_object= */ NULL, + /* ret_offset= */ NULL) == 0); + } + + (void) journal_file_offline_close(df); + + log_info("Verifying with key: %s", strna(verification_key)); + + assert_se(journal_file_open( + /* fd= */ -1, + "test.journal", + O_RDONLY, + JOURNAL_COMPRESS|(verification_key ? JOURNAL_SEAL : 0), + 0666, + /* compress_threshold_bytes= */ UINT64_MAX, + /* metrics= */ NULL, + m, + /* template= */ NULL, + &f) == 0); + journal_file_print_header(f); + journal_file_dump(f); + + assert_se(journal_file_verify(f, verification_key, &from, &to, &total, true) >= 0); + + if (verification_key && JOURNAL_HEADER_SEALED(f->header)) + log_info("=> Validated from %s to %s, %s missing", + FORMAT_TIMESTAMP(from), + FORMAT_TIMESTAMP(to), + FORMAT_TIMESPAN(total > to ? total - to : 0, 0)); + + (void) journal_file_close(f); + assert_se(stat("test.journal", &st) >= 0); + + start = 38448 * 8 + 0; + end = max_iterations < 0 ? (uint64_t)st.st_size * 8 : start + max_iterations; + log_info("Toggling bits %"PRIu64 " to %"PRIu64, start, end); + + for (uint64_t p = start; p < end; p++) { + bit_toggle("test.journal", p); + + if (max_iterations < 0) + log_info("[ %"PRIu64"+%"PRIu64"]", p / 8, p % 8); + + r = raw_verify("test.journal", verification_key); + /* Suppress the notice when running in the limited (CI) mode */ + if (verification_key && max_iterations < 0 && r >= 0) + log_notice(ANSI_HIGHLIGHT_RED ">>>> %"PRIu64" (bit %"PRIu64") can be toggled without detection." ANSI_NORMAL, p / 8, p % 8); + + bit_toggle("test.journal", p); + } + + assert_se(rm_rf(t, REMOVE_ROOT|REMOVE_PHYSICAL) >= 0); + + return 0; +} + +int main(int argc, char *argv[]) { + const char *verification_key = NULL; + int max_iterations = 512; + + if (argc > 1) { + /* Don't limit the number of iterations when the verification key + * is provided on the command line, we want to do that only in CIs */ + verification_key = argv[1]; + max_iterations = -1; + } + + assert_se(setenv("SYSTEMD_JOURNAL_COMPACT", "0", 1) >= 0); + run_test(verification_key, max_iterations); + + assert_se(setenv("SYSTEMD_JOURNAL_COMPACT", "1", 1) >= 0); + run_test(verification_key, max_iterations); + +#if HAVE_GCRYPT + /* If we're running without any arguments and we're compiled with gcrypt + * check the journal verification stuff with a valid key as well */ + if (argc <= 1) { + verification_key = "c262bd-85187f-0b1b04-877cc5/1c7af8-35a4e900"; + + assert_se(setenv("SYSTEMD_JOURNAL_COMPACT", "0", 1) >= 0); + run_test(verification_key, max_iterations); + + assert_se(setenv("SYSTEMD_JOURNAL_COMPACT", "1", 1) >= 0); + run_test(verification_key, max_iterations); + } +#endif + + return 0; +} diff --git a/src/libsystemd/sd-journal/test-journal.c b/src/libsystemd/sd-journal/test-journal.c new file mode 100644 index 0000000..96f2b67 --- /dev/null +++ b/src/libsystemd/sd-journal/test-journal.c @@ -0,0 +1,280 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <fcntl.h> +#include <unistd.h> + +#include "chattr-util.h" +#include "iovec-util.h" +#include "journal-authenticate.h" +#include "journal-file-util.h" +#include "journal-vacuum.h" +#include "log.h" +#include "rm-rf.h" +#include "tests.h" + +static bool arg_keep = false; + +static void mkdtemp_chdir_chattr(char *path) { + assert_se(mkdtemp(path)); + assert_se(chdir(path) >= 0); + + /* Speed up things a bit on btrfs, ensuring that CoW is turned off for all files created in our + * directory during the test run */ + (void) chattr_path(path, FS_NOCOW_FL, FS_NOCOW_FL, NULL); +} + +static void test_non_empty_one(void) { + _cleanup_(mmap_cache_unrefp) MMapCache *m = NULL; + dual_timestamp ts; + JournalFile *f; + struct iovec iovec; + static const char test[] = "TEST1=1", test2[] = "TEST2=2"; + Object *o, *d; + uint64_t p; + sd_id128_t fake_boot_id; + char t[] = "/var/tmp/journal-XXXXXX"; + + m = mmap_cache_new(); + assert_se(m != NULL); + + mkdtemp_chdir_chattr(t); + + assert_se(journal_file_open(-1, "test.journal", O_RDWR|O_CREAT, JOURNAL_COMPRESS|JOURNAL_SEAL, 0666, UINT64_MAX, NULL, m, NULL, &f) == 0); + + assert_se(dual_timestamp_now(&ts)); + assert_se(sd_id128_randomize(&fake_boot_id) == 0); + + iovec = IOVEC_MAKE_STRING(test); + assert_se(journal_file_append_entry(f, &ts, NULL, &iovec, 1, NULL, NULL, NULL, NULL) == 0); + + iovec = IOVEC_MAKE_STRING(test2); + assert_se(journal_file_append_entry(f, &ts, NULL, &iovec, 1, NULL, NULL, NULL, NULL) == 0); + + iovec = IOVEC_MAKE_STRING(test); + assert_se(journal_file_append_entry(f, &ts, &fake_boot_id, &iovec, 1, NULL, NULL, NULL, NULL) == 0); + +#if HAVE_GCRYPT + journal_file_append_tag(f); +#endif + journal_file_dump(f); + + assert_se(journal_file_next_entry(f, 0, DIRECTION_DOWN, &o, &p) == 1); + assert_se(le64toh(o->entry.seqnum) == 1); + + assert_se(journal_file_next_entry(f, p, DIRECTION_DOWN, &o, &p) == 1); + assert_se(le64toh(o->entry.seqnum) == 2); + + assert_se(journal_file_next_entry(f, p, DIRECTION_DOWN, &o, &p) == 1); + assert_se(le64toh(o->entry.seqnum) == 3); + assert_se(sd_id128_equal(o->entry.boot_id, fake_boot_id)); + + assert_se(journal_file_next_entry(f, p, DIRECTION_DOWN, &o, &p) == 0); + + assert_se(journal_file_next_entry(f, 0, DIRECTION_DOWN, &o, &p) == 1); + assert_se(le64toh(o->entry.seqnum) == 1); + + assert_se(journal_file_find_data_object(f, test, strlen(test), &d, NULL) == 1); + assert_se(journal_file_move_to_entry_for_data(f, d, DIRECTION_DOWN, &o, NULL) == 1); + assert_se(le64toh(o->entry.seqnum) == 1); + + assert_se(journal_file_move_to_entry_for_data(f, d, DIRECTION_UP, &o, NULL) == 1); + assert_se(le64toh(o->entry.seqnum) == 3); + + assert_se(journal_file_find_data_object(f, test2, strlen(test2), &d, NULL) == 1); + assert_se(journal_file_move_to_entry_for_data(f, d, DIRECTION_UP, &o, NULL) == 1); + assert_se(le64toh(o->entry.seqnum) == 2); + + assert_se(journal_file_move_to_entry_for_data(f, d, DIRECTION_DOWN, &o, NULL) == 1); + assert_se(le64toh(o->entry.seqnum) == 2); + + assert_se(journal_file_find_data_object(f, "quux", 4, &d, NULL) == 0); + + assert_se(journal_file_move_to_entry_by_seqnum(f, 1, DIRECTION_DOWN, &o, NULL) == 1); + assert_se(le64toh(o->entry.seqnum) == 1); + + assert_se(journal_file_move_to_entry_by_seqnum(f, 3, DIRECTION_DOWN, &o, NULL) == 1); + assert_se(le64toh(o->entry.seqnum) == 3); + + assert_se(journal_file_move_to_entry_by_seqnum(f, 2, DIRECTION_DOWN, &o, NULL) == 1); + assert_se(le64toh(o->entry.seqnum) == 2); + + assert_se(journal_file_move_to_entry_by_seqnum(f, 10, DIRECTION_DOWN, &o, NULL) == 0); + + journal_file_rotate(&f, m, JOURNAL_SEAL|JOURNAL_COMPRESS, UINT64_MAX, NULL); + journal_file_rotate(&f, m, JOURNAL_SEAL|JOURNAL_COMPRESS, UINT64_MAX, NULL); + + (void) journal_file_offline_close(f); + + log_info("Done..."); + + if (arg_keep) + log_info("Not removing %s", t); + else { + journal_directory_vacuum(".", 3000000, 0, 0, NULL, true); + + assert_se(rm_rf(t, REMOVE_ROOT|REMOVE_PHYSICAL) >= 0); + } + + puts("------------------------------------------------------------"); +} + +TEST(non_empty) { + assert_se(setenv("SYSTEMD_JOURNAL_COMPACT", "0", 1) >= 0); + test_non_empty_one(); + + assert_se(setenv("SYSTEMD_JOURNAL_COMPACT", "1", 1) >= 0); + test_non_empty_one(); +} + +static void test_empty_one(void) { + _cleanup_(mmap_cache_unrefp) MMapCache *m = NULL; + JournalFile *f1, *f2, *f3, *f4; + char t[] = "/var/tmp/journal-XXXXXX"; + + m = mmap_cache_new(); + assert_se(m != NULL); + + mkdtemp_chdir_chattr(t); + + assert_se(journal_file_open(-1, "test.journal", O_RDWR|O_CREAT, 0, 0666, UINT64_MAX, NULL, m, NULL, &f1) == 0); + assert_se(journal_file_open(-1, "test-compress.journal", O_RDWR|O_CREAT, JOURNAL_COMPRESS, 0666, UINT64_MAX, NULL, m, NULL, &f2) == 0); + assert_se(journal_file_open(-1, "test-seal.journal", O_RDWR|O_CREAT, JOURNAL_SEAL, 0666, UINT64_MAX, NULL, m, NULL, &f3) == 0); + assert_se(journal_file_open(-1, "test-seal-compress.journal", O_RDWR|O_CREAT, JOURNAL_COMPRESS|JOURNAL_SEAL, 0666, UINT64_MAX, NULL, m, NULL, &f4) == 0); + + journal_file_print_header(f1); + puts(""); + journal_file_print_header(f2); + puts(""); + journal_file_print_header(f3); + puts(""); + journal_file_print_header(f4); + puts(""); + + log_info("Done..."); + + if (arg_keep) + log_info("Not removing %s", t); + else { + journal_directory_vacuum(".", 3000000, 0, 0, NULL, true); + + assert_se(rm_rf(t, REMOVE_ROOT|REMOVE_PHYSICAL) >= 0); + } + + (void) journal_file_offline_close(f1); + (void) journal_file_offline_close(f2); + (void) journal_file_offline_close(f3); + (void) journal_file_offline_close(f4); +} + +TEST(empty) { + assert_se(setenv("SYSTEMD_JOURNAL_COMPACT", "0", 1) >= 0); + test_empty_one(); + + assert_se(setenv("SYSTEMD_JOURNAL_COMPACT", "1", 1) >= 0); + test_empty_one(); +} + +#if HAVE_COMPRESSION +static bool check_compressed(uint64_t compress_threshold, uint64_t data_size) { + _cleanup_(mmap_cache_unrefp) MMapCache *m = NULL; + dual_timestamp ts; + JournalFile *f; + struct iovec iovec; + Object *o; + uint64_t p; + char t[] = "/var/tmp/journal-XXXXXX"; + char data[2048] = "FIELD="; + bool is_compressed; + int r; + + assert_se(data_size <= sizeof(data)); + + m = mmap_cache_new(); + assert_se(m != NULL); + + mkdtemp_chdir_chattr(t); + + assert_se(journal_file_open(-1, "test.journal", O_RDWR|O_CREAT, JOURNAL_COMPRESS|JOURNAL_SEAL, 0666, compress_threshold, NULL, m, NULL, &f) == 0); + + dual_timestamp_now(&ts); + + iovec = IOVEC_MAKE(data, data_size); + assert_se(journal_file_append_entry(f, &ts, NULL, &iovec, 1, NULL, NULL, NULL, NULL) == 0); + +#if HAVE_GCRYPT + journal_file_append_tag(f); +#endif + journal_file_dump(f); + + /* We have to partially reimplement some of the dump logic, because the normal next_entry does the + * decompression for us. */ + p = le64toh(f->header->header_size); + for (;;) { + r = journal_file_move_to_object(f, OBJECT_UNUSED, p, &o); + assert_se(r == 0); + if (o->object.type == OBJECT_DATA) + break; + + assert_se(p < le64toh(f->header->tail_object_offset)); + p = p + ALIGN64(le64toh(o->object.size)); + } + + is_compressed = COMPRESSION_FROM_OBJECT(o) != COMPRESSION_NONE; + + (void) journal_file_offline_close(f); + + log_info("Done..."); + + if (arg_keep) + log_info("Not removing %s", t); + else { + journal_directory_vacuum(".", 3000000, 0, 0, NULL, true); + + assert_se(rm_rf(t, REMOVE_ROOT|REMOVE_PHYSICAL) >= 0); + } + + puts("------------------------------------------------------------"); + + return is_compressed; +} + +static void test_min_compress_size_one(void) { + /* Note that XZ will actually fail to compress anything under 80 bytes, so you have to choose the limits + * carefully */ + + /* DEFAULT_MIN_COMPRESS_SIZE is 512 */ + assert_se(!check_compressed(UINT64_MAX, 255)); + assert_se(check_compressed(UINT64_MAX, 513)); + + /* compress everything */ + assert_se(check_compressed(0, 96)); + assert_se(check_compressed(8, 96)); + + /* Ensure we don't try to compress less than 8 bytes */ + assert_se(!check_compressed(0, 7)); + + /* check boundary conditions */ + assert_se(check_compressed(256, 256)); + assert_se(!check_compressed(256, 255)); +} + +TEST(min_compress_size) { + assert_se(setenv("SYSTEMD_JOURNAL_COMPACT", "0", 1) >= 0); + test_min_compress_size_one(); + + assert_se(setenv("SYSTEMD_JOURNAL_COMPACT", "1", 1) >= 0); + test_min_compress_size_one(); +} +#endif + +static int intro(void) { + arg_keep = saved_argc > 1; + + /* journal_file_open() requires a valid machine id */ + if (access("/etc/machine-id", F_OK) != 0) + return log_tests_skipped("/etc/machine-id not found"); + + return EXIT_SUCCESS; +} + +DEFINE_TEST_MAIN_WITH_INTRO(LOG_DEBUG, intro); diff --git a/src/libsystemd/sd-journal/test-mmap-cache.c b/src/libsystemd/sd-journal/test-mmap-cache.c new file mode 100644 index 0000000..ce5ea12 --- /dev/null +++ b/src/libsystemd/sd-journal/test-mmap-cache.c @@ -0,0 +1,68 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <fcntl.h> +#include <stdlib.h> +#include <sys/mman.h> +#include <unistd.h> + +#include "fd-util.h" +#include "macro.h" +#include "mmap-cache.h" +#include "tests.h" +#include "tmpfile-util.h" + +int main(int argc, char *argv[]) { + MMapFileDescriptor *fx; + int x, y, z, r; + char px[] = "/tmp/testmmapXXXXXXX", py[] = "/tmp/testmmapYXXXXXX", pz[] = "/tmp/testmmapZXXXXXX"; + MMapCache *m; + void *p, *q; + + test_setup_logging(LOG_DEBUG); + + assert_se(m = mmap_cache_new()); + + x = mkostemp_safe(px); + assert_se(x >= 0); + (void) unlink(px); + + assert_se(mmap_cache_add_fd(m, x, PROT_READ, &fx) > 0); + + y = mkostemp_safe(py); + assert_se(y >= 0); + (void) unlink(py); + + z = mkostemp_safe(pz); + assert_se(z >= 0); + (void) unlink(pz); + + r = mmap_cache_fd_get(fx, 0, false, 1, 2, NULL, &p); + assert_se(r >= 0); + + r = mmap_cache_fd_get(fx, 0, false, 2, 2, NULL, &q); + assert_se(r >= 0); + + assert_se((uint8_t*) p + 1 == (uint8_t*) q); + + r = mmap_cache_fd_get(fx, 1, false, 3, 2, NULL, &q); + assert_se(r >= 0); + + assert_se((uint8_t*) p + 2 == (uint8_t*) q); + + r = mmap_cache_fd_get(fx, 0, false, 16ULL*1024ULL*1024ULL, 2, NULL, &p); + assert_se(r >= 0); + + r = mmap_cache_fd_get(fx, 1, false, 16ULL*1024ULL*1024ULL+1, 2, NULL, &q); + assert_se(r >= 0); + + assert_se((uint8_t*) p + 1 == (uint8_t*) q); + + mmap_cache_fd_free(fx); + mmap_cache_unref(m); + + safe_close(x); + safe_close(y); + safe_close(z); + + return 0; +} diff --git a/src/libsystemd/sd-login/sd-login.c b/src/libsystemd/sd-login/sd-login.c new file mode 100644 index 0000000..f9e86c6 --- /dev/null +++ b/src/libsystemd/sd-login/sd-login.c @@ -0,0 +1,1323 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <errno.h> +#include <poll.h> +#include <sys/inotify.h> +#include <unistd.h> + +#include "sd-login.h" + +#include "alloc-util.h" +#include "cgroup-util.h" +#include "dirent-util.h" +#include "env-file.h" +#include "escape.h" +#include "extract-word.h" +#include "fd-util.h" +#include "format-util.h" +#include "fs-util.h" +#include "hostname-util.h" +#include "io-util.h" +#include "login-util.h" +#include "macro.h" +#include "parse-util.h" +#include "path-util.h" +#include "process-util.h" +#include "socket-util.h" +#include "stdio-util.h" +#include "string-util.h" +#include "strv.h" +#include "user-util.h" + +/* Error codes: + * + * invalid input parameters → -EINVAL + * invalid fd → -EBADF + * process does not exist → -ESRCH + * cgroup does not exist → -ENOENT + * machine, session does not exist → -ENXIO + * requested metadata on object is missing → -ENODATA + */ + +_public_ int sd_pid_get_session(pid_t pid, char **session) { + int r; + + assert_return(pid >= 0, -EINVAL); + assert_return(session, -EINVAL); + + r = cg_pid_get_session(pid, session); + return IN_SET(r, -ENXIO, -ENOMEDIUM) ? -ENODATA : r; +} + +_public_ int sd_pid_get_unit(pid_t pid, char **unit) { + int r; + + assert_return(pid >= 0, -EINVAL); + assert_return(unit, -EINVAL); + + r = cg_pid_get_unit(pid, unit); + return IN_SET(r, -ENXIO, -ENOMEDIUM) ? -ENODATA : r; +} + +_public_ int sd_pid_get_user_unit(pid_t pid, char **unit) { + int r; + + assert_return(pid >= 0, -EINVAL); + assert_return(unit, -EINVAL); + + r = cg_pid_get_user_unit(pid, unit); + return IN_SET(r, -ENXIO, -ENOMEDIUM) ? -ENODATA : r; +} + +_public_ int sd_pid_get_machine_name(pid_t pid, char **name) { + int r; + + assert_return(pid >= 0, -EINVAL); + assert_return(name, -EINVAL); + + r = cg_pid_get_machine_name(pid, name); + return IN_SET(r, -ENXIO, -ENOMEDIUM) ? -ENODATA : r; +} + +_public_ int sd_pid_get_slice(pid_t pid, char **slice) { + int r; + + assert_return(pid >= 0, -EINVAL); + assert_return(slice, -EINVAL); + + r = cg_pid_get_slice(pid, slice); + return IN_SET(r, -ENXIO, -ENOMEDIUM) ? -ENODATA : r; +} + +_public_ int sd_pid_get_user_slice(pid_t pid, char **slice) { + int r; + + assert_return(pid >= 0, -EINVAL); + assert_return(slice, -EINVAL); + + r = cg_pid_get_user_slice(pid, slice); + return IN_SET(r, -ENXIO, -ENOMEDIUM) ? -ENODATA : r; +} + +_public_ int sd_pid_get_owner_uid(pid_t pid, uid_t *uid) { + int r; + + assert_return(pid >= 0, -EINVAL); + assert_return(uid, -EINVAL); + + r = cg_pid_get_owner_uid(pid, uid); + return IN_SET(r, -ENXIO, -ENOMEDIUM) ? -ENODATA : r; +} + +_public_ int sd_pid_get_cgroup(pid_t pid, char **cgroup) { + char *c; + int r; + + assert_return(pid >= 0, -EINVAL); + assert_return(cgroup, -EINVAL); + + r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &c); + if (r < 0) + return r; + + /* The internal APIs return the empty string for the root + * cgroup, let's return the "/" in the public APIs instead, as + * that's easier and less ambiguous for people to grok. */ + if (isempty(c)) { + r = free_and_strdup(&c, "/"); + if (r < 0) + return r; + + } + + *cgroup = c; + return 0; +} + +_public_ int sd_pidfd_get_session(int pidfd, char **ret_session) { + _cleanup_free_ char *session = NULL; + pid_t pid; + int r; + + assert_return(pidfd >= 0, -EBADF); + assert_return(ret_session, -EINVAL); + + r = pidfd_get_pid(pidfd, &pid); + if (r < 0) + return r; + + r = sd_pid_get_session(pid, &session); + if (r < 0) + return r; + + r = pidfd_verify_pid(pidfd, pid); + if (r < 0) + return r; + + *ret_session = TAKE_PTR(session); + + return 0; +} + +_public_ int sd_pidfd_get_unit(int pidfd, char **ret_unit) { + _cleanup_free_ char *unit = NULL; + pid_t pid; + int r; + + assert_return(pidfd >= 0, -EBADF); + assert_return(ret_unit, -EINVAL); + + r = pidfd_get_pid(pidfd, &pid); + if (r < 0) + return r; + + r = sd_pid_get_unit(pid, &unit); + if (r < 0) + return r; + + r = pidfd_verify_pid(pidfd, pid); + if (r < 0) + return r; + + *ret_unit = TAKE_PTR(unit); + + return 0; +} + +_public_ int sd_pidfd_get_user_unit(int pidfd, char **ret_unit) { + _cleanup_free_ char *unit = NULL; + pid_t pid; + int r; + + assert_return(pidfd >= 0, -EBADF); + assert_return(ret_unit, -EINVAL); + + r = pidfd_get_pid(pidfd, &pid); + if (r < 0) + return r; + + r = sd_pid_get_user_unit(pid, &unit); + if (r < 0) + return r; + + r = pidfd_verify_pid(pidfd, pid); + if (r < 0) + return r; + + *ret_unit = TAKE_PTR(unit); + + return 0; +} + +_public_ int sd_pidfd_get_machine_name(int pidfd, char **ret_name) { + _cleanup_free_ char *name = NULL; + pid_t pid; + int r; + + assert_return(pidfd >= 0, -EBADF); + assert_return(ret_name, -EINVAL); + + r = pidfd_get_pid(pidfd, &pid); + if (r < 0) + return r; + + r = sd_pid_get_machine_name(pid, &name); + if (r < 0) + return r; + + r = pidfd_verify_pid(pidfd, pid); + if (r < 0) + return r; + + *ret_name = TAKE_PTR(name); + + return 0; +} + +_public_ int sd_pidfd_get_slice(int pidfd, char **ret_slice) { + _cleanup_free_ char *slice = NULL; + pid_t pid; + int r; + + assert_return(pidfd >= 0, -EBADF); + assert_return(ret_slice, -EINVAL); + + r = pidfd_get_pid(pidfd, &pid); + if (r < 0) + return r; + + r = sd_pid_get_slice(pid, &slice); + if (r < 0) + return r; + + r = pidfd_verify_pid(pidfd, pid); + if (r < 0) + return r; + + *ret_slice = TAKE_PTR(slice); + + return 0; +} + +_public_ int sd_pidfd_get_user_slice(int pidfd, char **ret_slice) { + _cleanup_free_ char *slice = NULL; + pid_t pid; + int r; + + assert_return(pidfd >= 0, -EBADF); + assert_return(ret_slice, -EINVAL); + + r = pidfd_get_pid(pidfd, &pid); + if (r < 0) + return r; + + r = sd_pid_get_user_slice(pid, &slice); + if (r < 0) + return r; + + r = pidfd_verify_pid(pidfd, pid); + if (r < 0) + return r; + + *ret_slice = TAKE_PTR(slice); + + return 0; +} + +_public_ int sd_pidfd_get_owner_uid(int pidfd, uid_t *ret_uid) { + uid_t uid; + pid_t pid; + int r; + + assert_return(pidfd >= 0, -EINVAL); + assert_return(ret_uid, -EINVAL); + + r = pidfd_get_pid(pidfd, &pid); + if (r < 0) + return r; + + r = sd_pid_get_owner_uid(pid, &uid); + if (r < 0) + return r; + + r = pidfd_verify_pid(pidfd, pid); + if (r < 0) + return r; + + *ret_uid = uid; + + return 0; +} + +_public_ int sd_pidfd_get_cgroup(int pidfd, char **ret_cgroup) { + _cleanup_free_ char *cgroup = NULL; + pid_t pid; + int r; + + assert_return(pidfd >= 0, -EBADF); + assert_return(ret_cgroup, -EINVAL); + + r = pidfd_get_pid(pidfd, &pid); + if (r < 0) + return r; + + r = sd_pid_get_cgroup(pid, &cgroup); + if (r < 0) + return r; + + r = pidfd_verify_pid(pidfd, pid); + if (r < 0) + return r; + + *ret_cgroup = TAKE_PTR(cgroup); + + return 0; +} + +_public_ int sd_peer_get_session(int fd, char **session) { + struct ucred ucred = UCRED_INVALID; + int r; + + assert_return(fd >= 0, -EBADF); + assert_return(session, -EINVAL); + + r = getpeercred(fd, &ucred); + if (r < 0) + return r; + + return cg_pid_get_session(ucred.pid, session); +} + +_public_ int sd_peer_get_owner_uid(int fd, uid_t *uid) { + struct ucred ucred; + int r; + + assert_return(fd >= 0, -EBADF); + assert_return(uid, -EINVAL); + + r = getpeercred(fd, &ucred); + if (r < 0) + return r; + + return cg_pid_get_owner_uid(ucred.pid, uid); +} + +_public_ int sd_peer_get_unit(int fd, char **unit) { + struct ucred ucred; + int r; + + assert_return(fd >= 0, -EBADF); + assert_return(unit, -EINVAL); + + r = getpeercred(fd, &ucred); + if (r < 0) + return r; + + return cg_pid_get_unit(ucred.pid, unit); +} + +_public_ int sd_peer_get_user_unit(int fd, char **unit) { + struct ucred ucred; + int r; + + assert_return(fd >= 0, -EBADF); + assert_return(unit, -EINVAL); + + r = getpeercred(fd, &ucred); + if (r < 0) + return r; + + return cg_pid_get_user_unit(ucred.pid, unit); +} + +_public_ int sd_peer_get_machine_name(int fd, char **machine) { + struct ucred ucred; + int r; + + assert_return(fd >= 0, -EBADF); + assert_return(machine, -EINVAL); + + r = getpeercred(fd, &ucred); + if (r < 0) + return r; + + return cg_pid_get_machine_name(ucred.pid, machine); +} + +_public_ int sd_peer_get_slice(int fd, char **slice) { + struct ucred ucred; + int r; + + assert_return(fd >= 0, -EBADF); + assert_return(slice, -EINVAL); + + r = getpeercred(fd, &ucred); + if (r < 0) + return r; + + return cg_pid_get_slice(ucred.pid, slice); +} + +_public_ int sd_peer_get_user_slice(int fd, char **slice) { + struct ucred ucred; + int r; + + assert_return(fd >= 0, -EBADF); + assert_return(slice, -EINVAL); + + r = getpeercred(fd, &ucred); + if (r < 0) + return r; + + return cg_pid_get_user_slice(ucred.pid, slice); +} + +_public_ int sd_peer_get_cgroup(int fd, char **cgroup) { + struct ucred ucred; + int r; + + assert_return(fd >= 0, -EBADF); + assert_return(cgroup, -EINVAL); + + r = getpeercred(fd, &ucred); + if (r < 0) + return r; + + return sd_pid_get_cgroup(ucred.pid, cgroup); +} + +static int file_of_uid(uid_t uid, char **p) { + + assert_return(uid_is_valid(uid), -EINVAL); + assert(p); + + if (asprintf(p, "/run/systemd/users/" UID_FMT, uid) < 0) + return -ENOMEM; + + return 0; +} + +_public_ int sd_uid_get_state(uid_t uid, char**state) { + _cleanup_free_ char *p = NULL, *s = NULL; + int r; + + assert_return(state, -EINVAL); + + r = file_of_uid(uid, &p); + if (r < 0) + return r; + + r = parse_env_file(NULL, p, "STATE", &s); + if (r == -ENOENT) + r = free_and_strdup(&s, "offline"); + if (r < 0) + return r; + if (isempty(s)) + return -EIO; + + *state = TAKE_PTR(s); + return 0; +} + +_public_ int sd_uid_get_display(uid_t uid, char **session) { + _cleanup_free_ char *p = NULL, *s = NULL; + int r; + + assert_return(session, -EINVAL); + + r = file_of_uid(uid, &p); + if (r < 0) + return r; + + r = parse_env_file(NULL, p, "DISPLAY", &s); + if (r == -ENOENT) + return -ENODATA; + if (r < 0) + return r; + if (isempty(s)) + return -ENODATA; + + *session = TAKE_PTR(s); + + return 0; +} + +_public_ int sd_uid_get_login_time(uid_t uid, uint64_t *usec) { + _cleanup_free_ char *p = NULL, *s = NULL, *rt = NULL; + usec_t t; + int r; + + assert_return(usec, -EINVAL); + + r = file_of_uid(uid, &p); + if (r < 0) + return r; + + r = parse_env_file(NULL, p, "STATE", &s, "REALTIME", &rt); + if (r == -ENOENT) + return -ENXIO; + if (r < 0) + return r; + if (isempty(s) || isempty(rt)) + return -EIO; + + if (!STR_IN_SET(s, "active", "online")) + return -ENXIO; + + r = safe_atou64(rt, &t); + if (r < 0) + return r; + + *usec = t; + return 0; +} + +static int file_of_seat(const char *seat, char **_p) { + char *p; + int r; + + assert(_p); + + if (seat) { + if (!filename_is_valid(seat)) + return -EINVAL; + + p = path_join("/run/systemd/seats", seat); + } else { + _cleanup_free_ char *buf = NULL; + + r = sd_session_get_seat(NULL, &buf); + if (r < 0) + return r; + + p = path_join("/run/systemd/seats", buf); + } + if (!p) + return -ENOMEM; + + *_p = TAKE_PTR(p); + return 0; +} + +_public_ int sd_uid_is_on_seat(uid_t uid, int require_active, const char *seat) { + _cleanup_free_ char *filename = NULL, *content = NULL; + int r; + + assert_return(uid_is_valid(uid), -EINVAL); + + r = file_of_seat(seat, &filename); + if (r < 0) + return r; + + r = parse_env_file(NULL, filename, + require_active ? "ACTIVE_UID" : "UIDS", + &content); + if (r == -ENOENT) + return 0; + if (r < 0) + return r; + if (isempty(content)) + return 0; + + char t[DECIMAL_STR_MAX(uid_t)]; + xsprintf(t, UID_FMT, uid); + + return string_contains_word(content, NULL, t); +} + +static int uid_get_array(uid_t uid, const char *variable, char ***array) { + _cleanup_free_ char *p = NULL, *s = NULL; + char **a; + int r; + + assert(variable); + + r = file_of_uid(uid, &p); + if (r < 0) + return r; + + r = parse_env_file(NULL, p, variable, &s); + if (r == -ENOENT || (r >= 0 && isempty(s))) { + if (array) + *array = NULL; + return 0; + } + if (r < 0) + return r; + + a = strv_split(s, NULL); + if (!a) + return -ENOMEM; + + strv_uniq(a); + r = (int) strv_length(a); + + if (array) + *array = a; + else + strv_free(a); + + return r; +} + +_public_ int sd_uid_get_sessions(uid_t uid, int require_active, char ***sessions) { + return uid_get_array( + uid, + require_active == 0 ? "ONLINE_SESSIONS" : + require_active > 0 ? "ACTIVE_SESSIONS" : + "SESSIONS", + sessions); +} + +_public_ int sd_uid_get_seats(uid_t uid, int require_active, char ***seats) { + return uid_get_array( + uid, + require_active == 0 ? "ONLINE_SEATS" : + require_active > 0 ? "ACTIVE_SEATS" : + "SEATS", + seats); +} + +static int file_of_session(const char *session, char **_p) { + char *p; + int r; + + assert(_p); + + if (session) { + if (!session_id_valid(session)) + return -EINVAL; + + p = path_join("/run/systemd/sessions", session); + } else { + _cleanup_free_ char *buf = NULL; + + r = sd_pid_get_session(0, &buf); + if (r < 0) + return r; + + p = path_join("/run/systemd/sessions", buf); + } + + if (!p) + return -ENOMEM; + + *_p = p; + return 0; +} + +_public_ int sd_session_is_active(const char *session) { + _cleanup_free_ char *p = NULL, *s = NULL; + int r; + + r = file_of_session(session, &p); + if (r < 0) + return r; + + r = parse_env_file(NULL, p, "ACTIVE", &s); + if (r == -ENOENT) + return -ENXIO; + if (r < 0) + return r; + if (isempty(s)) + return -EIO; + + return parse_boolean(s); +} + +_public_ int sd_session_is_remote(const char *session) { + _cleanup_free_ char *p = NULL, *s = NULL; + int r; + + r = file_of_session(session, &p); + if (r < 0) + return r; + + r = parse_env_file(NULL, p, "REMOTE", &s); + if (r == -ENOENT) + return -ENXIO; + if (r < 0) + return r; + if (isempty(s)) + return -ENODATA; + + return parse_boolean(s); +} + +_public_ int sd_session_get_state(const char *session, char **state) { + _cleanup_free_ char *p = NULL, *s = NULL; + int r; + + assert_return(state, -EINVAL); + + r = file_of_session(session, &p); + if (r < 0) + return r; + + r = parse_env_file(NULL, p, "STATE", &s); + if (r == -ENOENT) + return -ENXIO; + if (r < 0) + return r; + if (isempty(s)) + return -EIO; + + *state = TAKE_PTR(s); + + return 0; +} + +_public_ int sd_session_get_uid(const char *session, uid_t *uid) { + int r; + _cleanup_free_ char *p = NULL, *s = NULL; + + assert_return(uid, -EINVAL); + + r = file_of_session(session, &p); + if (r < 0) + return r; + + r = parse_env_file(NULL, p, "UID", &s); + if (r == -ENOENT) + return -ENXIO; + if (r < 0) + return r; + if (isempty(s)) + return -EIO; + + return parse_uid(s, uid); +} + +static int session_get_string(const char *session, const char *field, char **value) { + _cleanup_free_ char *p = NULL, *s = NULL; + int r; + + assert_return(value, -EINVAL); + assert(field); + + r = file_of_session(session, &p); + if (r < 0) + return r; + + r = parse_env_file(NULL, p, field, &s); + if (r == -ENOENT) + return -ENXIO; + if (r < 0) + return r; + if (isempty(s)) + return -ENODATA; + + *value = TAKE_PTR(s); + return 0; +} + +_public_ int sd_session_get_username(const char *session, char **username) { + return session_get_string(session, "USER", username); +} + +_public_ int sd_session_get_seat(const char *session, char **seat) { + return session_get_string(session, "SEAT", seat); +} + +_public_ int sd_session_get_start_time(const char *session, uint64_t *usec) { + _cleanup_free_ char *p = NULL, *s = NULL; + usec_t t; + int r; + + assert_return(usec, -EINVAL); + + r = file_of_session(session, &p); + if (r < 0) + return r; + + r = parse_env_file(NULL, p, "REALTIME", &s); + if (r == -ENOENT) + return -ENXIO; + if (r < 0) + return r; + if (isempty(s)) + return -EIO; + + r = safe_atou64(s, &t); + if (r < 0) + return r; + + *usec = t; + return 0; +} + +_public_ int sd_session_get_tty(const char *session, char **tty) { + return session_get_string(session, "TTY", tty); +} + +_public_ int sd_session_get_vt(const char *session, unsigned *vtnr) { + _cleanup_free_ char *vtnr_string = NULL; + unsigned u; + int r; + + assert_return(vtnr, -EINVAL); + + r = session_get_string(session, "VTNR", &vtnr_string); + if (r < 0) + return r; + + r = safe_atou(vtnr_string, &u); + if (r < 0) + return r; + + *vtnr = u; + return 0; +} + +_public_ int sd_session_get_service(const char *session, char **service) { + return session_get_string(session, "SERVICE", service); +} + +_public_ int sd_session_get_type(const char *session, char **type) { + return session_get_string(session, "TYPE", type); +} + +_public_ int sd_session_get_class(const char *session, char **class) { + return session_get_string(session, "CLASS", class); +} + +_public_ int sd_session_get_desktop(const char *session, char **desktop) { + _cleanup_free_ char *escaped = NULL; + int r; + ssize_t l; + + assert_return(desktop, -EINVAL); + + r = session_get_string(session, "DESKTOP", &escaped); + if (r < 0) + return r; + + l = cunescape(escaped, 0, desktop); + if (l < 0) + return l; + return 0; +} + +_public_ int sd_session_get_display(const char *session, char **display) { + return session_get_string(session, "DISPLAY", display); +} + +_public_ int sd_session_get_remote_user(const char *session, char **remote_user) { + return session_get_string(session, "REMOTE_USER", remote_user); +} + +_public_ int sd_session_get_remote_host(const char *session, char **remote_host) { + return session_get_string(session, "REMOTE_HOST", remote_host); +} + +_public_ int sd_session_get_leader(const char *session, pid_t *leader) { + _cleanup_free_ char *leader_string = NULL; + pid_t pid; + int r; + + assert_return(leader, -EINVAL); + + r = session_get_string(session, "LEADER", &leader_string); + if (r < 0) + return r; + + r = parse_pid(leader_string, &pid); + if (r < 0) + return r; + + *leader = pid; + return 0; +} + +_public_ int sd_seat_get_active(const char *seat, char **session, uid_t *uid) { + _cleanup_free_ char *p = NULL, *s = NULL, *t = NULL; + int r; + + assert_return(session || uid, -EINVAL); + + r = file_of_seat(seat, &p); + if (r < 0) + return r; + + r = parse_env_file(NULL, p, + "ACTIVE", &s, + "ACTIVE_UID", &t); + if (r == -ENOENT) + return -ENXIO; + if (r < 0) + return r; + + if (session && !s) + return -ENODATA; + + if (uid && !t) + return -ENODATA; + + if (uid && t) { + r = parse_uid(t, uid); + if (r < 0) + return r; + } + + if (session && s) + *session = TAKE_PTR(s); + + return 0; +} + +_public_ int sd_seat_get_sessions( + const char *seat, + char ***ret_sessions, + uid_t **ret_uids, + unsigned *ret_n_uids) { + + _cleanup_free_ char *fname = NULL, *session_line = NULL, *uid_line = NULL; + _cleanup_strv_free_ char **sessions = NULL; + _cleanup_free_ uid_t *uids = NULL; + unsigned n_sessions = 0; + int r; + + r = file_of_seat(seat, &fname); + if (r < 0) + return r; + + r = parse_env_file(NULL, fname, + "SESSIONS", &session_line, + "UIDS", &uid_line); + if (r == -ENOENT) + return -ENXIO; + if (r < 0) + return r; + + if (session_line) { + sessions = strv_split(session_line, NULL); + if (!sessions) + return -ENOMEM; + + n_sessions = strv_length(sessions); + }; + + if (ret_uids && uid_line) { + uids = new(uid_t, n_sessions); + if (!uids) + return -ENOMEM; + + size_t n = 0; + for (const char *p = uid_line;;) { + _cleanup_free_ char *word = NULL; + + r = extract_first_word(&p, &word, NULL, 0); + if (r < 0) + return r; + if (r == 0) + break; + + r = parse_uid(word, &uids[n++]); + if (r < 0) + return r; + } + + if (n != n_sessions) + return -EUCLEAN; + } + + if (ret_sessions) + *ret_sessions = TAKE_PTR(sessions); + if (ret_uids) + *ret_uids = TAKE_PTR(uids); + if (ret_n_uids) + *ret_n_uids = n_sessions; + + return n_sessions; +} + +static int seat_get_can(const char *seat, const char *variable) { + _cleanup_free_ char *p = NULL, *s = NULL; + int r; + + assert(variable); + + r = file_of_seat(seat, &p); + if (r < 0) + return r; + + r = parse_env_file(NULL, p, + variable, &s); + if (r == -ENOENT) + return -ENXIO; + if (r < 0) + return r; + if (isempty(s)) + return -ENODATA; + + return parse_boolean(s); +} + +_public_ int sd_seat_can_multi_session(const char *seat) { + return true; +} + +_public_ int sd_seat_can_tty(const char *seat) { + return seat_get_can(seat, "CAN_TTY"); +} + +_public_ int sd_seat_can_graphical(const char *seat) { + return seat_get_can(seat, "CAN_GRAPHICAL"); +} + +_public_ int sd_get_seats(char ***seats) { + int r; + + r = get_files_in_directory("/run/systemd/seats/", seats); + if (r == -ENOENT) { + if (seats) + *seats = NULL; + return 0; + } + return r; +} + +_public_ int sd_get_sessions(char ***sessions) { + int r; + + r = get_files_in_directory("/run/systemd/sessions/", sessions); + if (r == -ENOENT) { + if (sessions) + *sessions = NULL; + return 0; + } + return r; +} + +_public_ int sd_get_uids(uid_t **users) { + _cleanup_closedir_ DIR *d = NULL; + int r = 0; + unsigned n = 0; + _cleanup_free_ uid_t *l = NULL; + + d = opendir("/run/systemd/users/"); + if (!d) { + if (errno == ENOENT) { + if (users) + *users = NULL; + return 0; + } + return -errno; + } + + FOREACH_DIRENT_ALL(de, d, return -errno) { + int k; + uid_t uid; + + if (!dirent_is_file(de)) + continue; + + k = parse_uid(de->d_name, &uid); + if (k < 0) + continue; + + if (users) { + if ((unsigned) r >= n) { + uid_t *t; + + n = MAX(16, 2*r); + t = reallocarray(l, n, sizeof(uid_t)); + if (!t) + return -ENOMEM; + + l = t; + } + + assert((unsigned) r < n); + l[r++] = uid; + } else + r++; + } + + if (users) + *users = TAKE_PTR(l); + + return r; +} + +_public_ int sd_get_machine_names(char ***machines) { + _cleanup_strv_free_ char **l = NULL; + char **a, **b; + int r; + + r = get_files_in_directory("/run/systemd/machines/", &l); + if (r == -ENOENT) { + if (machines) + *machines = NULL; + return 0; + } + if (r < 0) + return r; + + if (l) { + r = 0; + + /* Filter out the unit: symlinks */ + for (a = b = l; *a; a++) { + if (startswith(*a, "unit:") || !hostname_is_valid(*a, 0)) + free(*a); + else { + *b = *a; + b++; + r++; + } + } + + *b = NULL; + } + + if (machines) + *machines = TAKE_PTR(l); + + return r; +} + +_public_ int sd_machine_get_class(const char *machine, char **class) { + _cleanup_free_ char *c = NULL; + const char *p; + int r; + + assert_return(class, -EINVAL); + + if (streq(machine, ".host")) { + c = strdup("host"); + if (!c) + return -ENOMEM; + } else { + if (!hostname_is_valid(machine, 0)) + return -EINVAL; + + p = strjoina("/run/systemd/machines/", machine); + r = parse_env_file(NULL, p, "CLASS", &c); + if (r == -ENOENT) + return -ENXIO; + if (r < 0) + return r; + if (!c) + return -EIO; + } + + *class = TAKE_PTR(c); + return 0; +} + +_public_ int sd_machine_get_ifindices(const char *machine, int **ret_ifindices) { + _cleanup_free_ char *netif_line = NULL; + const char *p; + int r; + + assert_return(hostname_is_valid(machine, 0), -EINVAL); + + p = strjoina("/run/systemd/machines/", machine); + r = parse_env_file(NULL, p, "NETIF", &netif_line); + if (r == -ENOENT) + return -ENXIO; + if (r < 0) + return r; + if (!netif_line) { + *ret_ifindices = NULL; + return 0; + } + + _cleanup_strv_free_ char **tt = strv_split(netif_line, NULL); + if (!tt) + return -ENOMEM; + + _cleanup_free_ int *ifindices = NULL; + if (ret_ifindices) { + ifindices = new(int, strv_length(tt)); + if (!ifindices) + return -ENOMEM; + } + + size_t n = 0; + for (size_t i = 0; tt[i]; i++) { + int ind; + + ind = parse_ifindex(tt[i]); + if (ind < 0) + /* Return -EUCLEAN to distinguish from -EINVAL for invalid args */ + return ind == -EINVAL ? -EUCLEAN : ind; + + if (ret_ifindices) + ifindices[n] = ind; + n++; + } + + if (ret_ifindices) + *ret_ifindices = TAKE_PTR(ifindices); + + return n; +} + +static int MONITOR_TO_FD(sd_login_monitor *m) { + return (int) (unsigned long) m - 1; +} + +static sd_login_monitor* FD_TO_MONITOR(int fd) { + return (sd_login_monitor*) (unsigned long) (fd + 1); +} + +_public_ int sd_login_monitor_new(const char *category, sd_login_monitor **m) { + _cleanup_close_ int fd = -EBADF; + bool good = false; + int k; + + assert_return(m, -EINVAL); + + fd = inotify_init1(IN_NONBLOCK|IN_CLOEXEC); + if (fd < 0) + return -errno; + + if (!category || streq(category, "seat")) { + k = inotify_add_watch(fd, "/run/systemd/seats/", IN_MOVED_TO|IN_DELETE); + if (k < 0) + return -errno; + + good = true; + } + + if (!category || streq(category, "session")) { + k = inotify_add_watch(fd, "/run/systemd/sessions/", IN_MOVED_TO|IN_DELETE); + if (k < 0) + return -errno; + + good = true; + } + + if (!category || streq(category, "uid")) { + k = inotify_add_watch(fd, "/run/systemd/users/", IN_MOVED_TO|IN_DELETE); + if (k < 0) + return -errno; + + good = true; + } + + if (!category || streq(category, "machine")) { + k = inotify_add_watch(fd, "/run/systemd/machines/", IN_MOVED_TO|IN_DELETE); + if (k < 0) + return -errno; + + good = true; + } + + if (!good) + return -EINVAL; + + *m = FD_TO_MONITOR(TAKE_FD(fd)); + return 0; +} + +_public_ sd_login_monitor* sd_login_monitor_unref(sd_login_monitor *m) { + if (m) + (void) close_nointr(MONITOR_TO_FD(m)); + + return NULL; +} + +_public_ int sd_login_monitor_flush(sd_login_monitor *m) { + int r; + + assert_return(m, -EINVAL); + + r = flush_fd(MONITOR_TO_FD(m)); + if (r < 0) + return r; + + return 0; +} + +_public_ int sd_login_monitor_get_fd(sd_login_monitor *m) { + + assert_return(m, -EINVAL); + + return MONITOR_TO_FD(m); +} + +_public_ int sd_login_monitor_get_events(sd_login_monitor *m) { + + assert_return(m, -EINVAL); + + /* For now we will only return POLLIN here, since we don't + * need anything else ever for inotify. However, let's have + * this API to keep our options open should we later on need + * it. */ + return POLLIN; +} + +_public_ int sd_login_monitor_get_timeout(sd_login_monitor *m, uint64_t *timeout_usec) { + + assert_return(m, -EINVAL); + assert_return(timeout_usec, -EINVAL); + + /* For now we will only return UINT64_MAX, since we don't + * need any timeout. However, let's have this API to keep our + * options open should we later on need it. */ + *timeout_usec = UINT64_MAX; + return 0; +} diff --git a/src/libsystemd/sd-login/test-login.c b/src/libsystemd/sd-login/test-login.c new file mode 100644 index 0000000..819f86f --- /dev/null +++ b/src/libsystemd/sd-login/test-login.c @@ -0,0 +1,334 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <poll.h> + +#include "sd-login.h" + +#include "alloc-util.h" +#include "errno-list.h" +#include "fd-util.h" +#include "format-util.h" +#include "log.h" +#include "missing_syscall.h" +#include "process-util.h" +#include "string-util.h" +#include "strv.h" +#include "tests.h" +#include "time-util.h" +#include "user-util.h" + +static char* format_uids(char **buf, uid_t* uids, int count) { + int pos = 0, inc; + size_t size = (DECIMAL_STR_MAX(uid_t) + 1) * count + 1; + + assert_se(*buf = malloc(size)); + + for (int k = 0; k < count; k++) { + sprintf(*buf + pos, "%s"UID_FMT"%n", k > 0 ? " " : "", uids[k], &inc); + pos += inc; + } + + assert_se(pos < (ssize_t)size); + (*buf)[pos] = '\0'; + + return *buf; +} + +static const char *e(int r) { + return r == 0 ? "OK" : errno_to_name(r); +} + +TEST(login) { + _cleanup_close_pair_ int pair[2] = EBADF_PAIR; + _cleanup_free_ char *pp = NULL, *qq = NULL, + *display_session = NULL, *cgroup = NULL, + *display = NULL, *remote_user = NULL, *remote_host = NULL, + *type = NULL, *class = NULL, *state = NULL, *state2 = NULL, + *seat = NULL, *session = NULL, + *unit = NULL, *user_unit = NULL, *slice = NULL; + _cleanup_close_ int pidfd = -EBADF; + int r; + uid_t u, u2 = UID_INVALID; + char *t, **seats = NULL, **sessions = NULL; + + r = sd_pid_get_unit(0, &unit); + log_info("sd_pid_get_unit(0, …) → %s / \"%s\"", e(r), strnull(unit)); + assert_se(IN_SET(r, 0, -ENODATA)); + + r = sd_pid_get_user_unit(0, &user_unit); + log_info("sd_pid_get_user_unit(0, …) → %s / \"%s\"", e(r), strnull(user_unit)); + assert_se(IN_SET(r, 0, -ENODATA)); + + r = sd_pid_get_slice(0, &slice); + log_info("sd_pid_get_slice(0, …) → %s / \"%s\"", e(r), strnull(slice)); + assert_se(IN_SET(r, 0, -ENODATA)); + + r = sd_pid_get_owner_uid(0, &u2); + log_info("sd_pid_get_owner_uid(0, …) → %s / "UID_FMT, e(r), u2); + assert_se(IN_SET(r, 0, -ENODATA)); + + r = sd_pid_get_session(0, &session); + log_info("sd_pid_get_session(0, …) → %s / \"%s\"", e(r), strnull(session)); + + r = sd_pid_get_cgroup(0, &cgroup); + log_info("sd_pid_get_cgroup(0, …) → %s / \"%s\"", e(r), strnull(cgroup)); + assert_se(IN_SET(r, 0, -ENOMEDIUM)); + + pidfd = pidfd_open(getpid_cached(), 0); + if (pidfd >= 0) { + _cleanup_free_ char *cgroup2 = NULL, *session2 = NULL, + *unit2 = NULL, *user_unit2 = NULL, *slice2 = NULL; + + r = sd_pidfd_get_unit(pidfd, &unit2); + log_info("sd_pidfd_get_unit(pidfd, …) → %s / \"%s\"", e(r), strnull(unit2)); + assert_se(IN_SET(r, 0, -ENODATA)); + + r = sd_pidfd_get_user_unit(pidfd, &user_unit2); + log_info("sd_pidfd_get_user_unit(pidfd, …) → %s / \"%s\"", e(r), strnull(user_unit2)); + assert_se(IN_SET(r, 0, -ENODATA)); + + r = sd_pidfd_get_slice(pidfd, &slice2); + log_info("sd_pidfd_get_slice(pidfd, …) → %s / \"%s\"", e(r), strnull(slice2)); + assert_se(IN_SET(r, 0, -ENODATA)); + + r = sd_pidfd_get_owner_uid(pidfd, &u2); + log_info("sd_pidfd_get_owner_uid(pidfd, …) → %s / "UID_FMT, e(r), u2); + assert_se(IN_SET(r, 0, -ENODATA)); + + r = sd_pidfd_get_session(pidfd, &session2); + log_info("sd_pidfd_get_session(pidfd, …) → %s / \"%s\"", e(r), strnull(session2)); + + r = sd_pidfd_get_cgroup(pidfd, &cgroup2); + log_info("sd_pidfd_get_cgroup(pidfd, …) → %s / \"%s\"", e(r), strnull(cgroup2)); + assert_se(IN_SET(r, 0, -ENOMEDIUM)); + } + + r = sd_uid_get_display(u2, &display_session); + log_info("sd_uid_get_display("UID_FMT", …) → %s / \"%s\"", u2, e(r), strnull(display_session)); + if (u2 == UID_INVALID) + assert_se(r == -EINVAL); + else + assert_se(IN_SET(r, 0, -ENODATA)); + + assert_se(socketpair(AF_UNIX, SOCK_STREAM, 0, pair) == 0); + sd_peer_get_session(pair[0], &pp); + sd_peer_get_session(pair[1], &qq); + assert_se(streq_ptr(pp, qq)); + + r = sd_uid_get_sessions(u2, false, &sessions); + assert_se(t = strv_join(sessions, " ")); + log_info("sd_uid_get_sessions("UID_FMT", …) → %s \"%s\"", u2, e(r), t); + if (u2 == UID_INVALID) + assert_se(r == -EINVAL); + else { + assert_se(r >= 0); + assert_se(r == (int) strv_length(sessions)); + } + sessions = strv_free(sessions); + free(t); + + assert_se(r == sd_uid_get_sessions(u2, false, NULL)); + + r = sd_uid_get_seats(u2, false, &seats); + assert_se(t = strv_join(seats, " ")); + log_info("sd_uid_get_seats("UID_FMT", …) → %s \"%s\"", u2, e(r), t); + if (u2 == UID_INVALID) + assert_se(r == -EINVAL); + else { + assert_se(r >= 0); + assert_se(r == (int) strv_length(seats)); + } + seats = strv_free(seats); + free(t); + + assert_se(r == sd_uid_get_seats(u2, false, NULL)); + + if (session) { + r = sd_session_is_active(session); + if (r == -ENXIO) + log_notice("sd_session_is_active() failed with ENXIO, it seems logind is not running."); + else { + /* All those tests will fail with ENXIO, so let's skip them. */ + + assert_se(r >= 0); + log_info("sd_session_is_active(\"%s\") → %s", session, yes_no(r)); + + r = sd_session_is_remote(session); + assert_se(r >= 0); + log_info("sd_session_is_remote(\"%s\") → %s", session, yes_no(r)); + + r = sd_session_get_state(session, &state); + assert_se(r == 0); + log_info("sd_session_get_state(\"%s\") → \"%s\"", session, state); + + assert_se(sd_session_get_uid(session, &u) >= 0); + log_info("sd_session_get_uid(\"%s\") → "UID_FMT, session, u); + assert_se(u == u2); + + assert_se(sd_session_get_type(session, &type) >= 0); + log_info("sd_session_get_type(\"%s\") → \"%s\"", session, type); + + assert_se(sd_session_get_class(session, &class) >= 0); + log_info("sd_session_get_class(\"%s\") → \"%s\"", session, class); + + r = sd_session_get_display(session, &display); + assert_se(IN_SET(r, 0, -ENODATA)); + log_info("sd_session_get_display(\"%s\") → \"%s\"", session, strna(display)); + + r = sd_session_get_remote_user(session, &remote_user); + assert_se(IN_SET(r, 0, -ENODATA)); + log_info("sd_session_get_remote_user(\"%s\") → \"%s\"", + session, strna(remote_user)); + + r = sd_session_get_remote_host(session, &remote_host); + assert_se(IN_SET(r, 0, -ENODATA)); + log_info("sd_session_get_remote_host(\"%s\") → \"%s\"", + session, strna(remote_host)); + + r = sd_session_get_seat(session, &seat); + if (r >= 0) { + assert_se(seat); + + log_info("sd_session_get_seat(\"%s\") → \"%s\"", session, seat); + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wdeprecated-declarations" + r = sd_seat_can_multi_session(seat); +#pragma GCC diagnostic pop + assert_se(r == 1); + log_info("sd_session_can_multi_seat(\"%s\") → %s", seat, yes_no(r)); + + r = sd_seat_can_tty(seat); + assert_se(r >= 0); + log_info("sd_session_can_tty(\"%s\") → %s", seat, yes_no(r)); + + r = sd_seat_can_graphical(seat); + assert_se(r >= 0); + log_info("sd_session_can_graphical(\"%s\") → %s", seat, yes_no(r)); + } else { + log_info_errno(r, "sd_session_get_seat(\"%s\"): %m", session); + assert_se(r == -ENODATA); + } + + assert_se(sd_uid_get_state(u, &state2) == 0); + log_info("sd_uid_get_state("UID_FMT", …) → %s", u, state2); + } + } + + if (seat) { + _cleanup_free_ char *session2 = NULL, *buf = NULL; + _cleanup_free_ uid_t *uids = NULL; + unsigned n; + + assert_se(sd_uid_is_on_seat(u, 0, seat) > 0); + + r = sd_seat_get_active(seat, &session2, &u2); + assert_se(r == 0); + log_info("sd_seat_get_active(\"%s\", …) → \"%s\", "UID_FMT, seat, session2, u2); + + r = sd_uid_is_on_seat(u, 1, seat); + assert_se(IN_SET(r, 0, 1)); + assert_se(!!r == streq(session, session2)); + + r = sd_seat_get_sessions(seat, &sessions, &uids, &n); + assert_se(r >= 0); + assert_se(r == (int) strv_length(sessions)); + assert_se(t = strv_join(sessions, " ")); + strv_free(sessions); + log_info("sd_seat_get_sessions(\"%s\", …) → %s, \"%s\", [%u] {%s}", + seat, e(r), t, n, format_uids(&buf, uids, n)); + free(t); + + assert_se(sd_seat_get_sessions(seat, NULL, NULL, NULL) == r); + } + + r = sd_get_seats(&seats); + assert_se(r >= 0); + assert_se(r == (int) strv_length(seats)); + assert_se(t = strv_join(seats, ", ")); + strv_free(seats); + log_info("sd_get_seats(…) → [%i] \"%s\"", r, t); + t = mfree(t); + + assert_se(sd_get_seats(NULL) == r); + + r = sd_seat_get_active(NULL, &t, NULL); + assert_se(IN_SET(r, 0, -ENODATA, -ENXIO)); + log_info("sd_seat_get_active(NULL, …) (active session on current seat) → %s / \"%s\"", e(r), strnull(t)); + free(t); + + r = sd_get_sessions(&sessions); + assert_se(r >= 0); + assert_se(r == (int) strv_length(sessions)); + assert_se(t = strv_join(sessions, ", ")); + strv_free(sessions); + log_info("sd_get_sessions(…) → [%i] \"%s\"", r, t); + free(t); + + assert_se(sd_get_sessions(NULL) == r); + + { + _cleanup_free_ uid_t *uids = NULL; + _cleanup_free_ char *buf = NULL; + + r = sd_get_uids(&uids); + assert_se(r >= 0); + log_info("sd_get_uids(…) → [%i] {%s}", r, format_uids(&buf, uids, r)); + + assert_se(sd_get_uids(NULL) == r); + } + + { + _cleanup_strv_free_ char **machines = NULL; + _cleanup_free_ char *buf = NULL; + + r = sd_get_machine_names(&machines); + assert_se(r >= 0); + assert_se(r == (int) strv_length(machines)); + assert_se(buf = strv_join(machines, " ")); + log_info("sd_get_machines(…) → [%i] \"%s\"", r, buf); + + assert_se(sd_get_machine_names(NULL) == r); + } +} + +TEST(monitor) { + sd_login_monitor *m = NULL; + int r; + + if (!streq_ptr(saved_argv[1], "-m")) + return; + + assert_se(sd_login_monitor_new("session", &m) == 0); + + for (unsigned n = 0; n < 5; n++) { + struct pollfd pollfd = {}; + usec_t timeout, nw; + + assert_se((pollfd.fd = sd_login_monitor_get_fd(m)) >= 0); + assert_se((pollfd.events = sd_login_monitor_get_events(m)) >= 0); + + assert_se(sd_login_monitor_get_timeout(m, &timeout) >= 0); + + nw = now(CLOCK_MONOTONIC); + + r = poll(&pollfd, 1, + timeout == UINT64_MAX ? -1 : + timeout > nw ? (int) ((timeout - nw) / 1000) : + 0); + + assert_se(r >= 0); + + sd_login_monitor_flush(m); + printf("Wake!\n"); + } + + sd_login_monitor_unref(m); +} + +static int intro(void) { + log_info("/* Information printed is from the live system */"); + return EXIT_SUCCESS; +} + +DEFINE_TEST_MAIN_WITH_INTRO(LOG_INFO, intro); diff --git a/src/libsystemd/sd-netlink/netlink-genl.c b/src/libsystemd/sd-netlink/netlink-genl.c new file mode 100644 index 0000000..1dc62e8 --- /dev/null +++ b/src/libsystemd/sd-netlink/netlink-genl.c @@ -0,0 +1,488 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <linux/genetlink.h> + +#include "sd-netlink.h" + +#include "alloc-util.h" +#include "netlink-genl.h" +#include "netlink-internal.h" +#include "netlink-types.h" + +typedef struct GenericNetlinkFamily { + sd_netlink *genl; + + const NLAPolicySet *policy_set; + + uint16_t id; /* a.k.a nlmsg_type */ + char *name; + uint32_t version; + uint32_t additional_header_size; + Hashmap *multicast_group_by_name; +} GenericNetlinkFamily; + +static const GenericNetlinkFamily nlctrl_static = { + .id = GENL_ID_CTRL, + .name = (char*) CTRL_GENL_NAME, + .version = 0x01, +}; + +static GenericNetlinkFamily *genl_family_free(GenericNetlinkFamily *f) { + if (!f) + return NULL; + + if (f->genl) { + if (f->id > 0) + hashmap_remove(f->genl->genl_family_by_id, UINT_TO_PTR(f->id)); + if (f->name) + hashmap_remove(f->genl->genl_family_by_name, f->name); + } + + free(f->name); + hashmap_free(f->multicast_group_by_name); + + return mfree(f); +} + +DEFINE_TRIVIAL_CLEANUP_FUNC(GenericNetlinkFamily*, genl_family_free); + +void genl_clear_family(sd_netlink *nl) { + assert(nl); + + nl->genl_family_by_name = hashmap_free_with_destructor(nl->genl_family_by_name, genl_family_free); + nl->genl_family_by_id = hashmap_free_with_destructor(nl->genl_family_by_id, genl_family_free); +} + +static int genl_family_new_unsupported( + sd_netlink *nl, + const char *family_name, + const NLAPolicySet *policy_set) { + + _cleanup_(genl_family_freep) GenericNetlinkFamily *f = NULL; + int r; + + assert(nl); + assert(family_name); + assert(policy_set); + + /* Kernel does not support the genl family? To prevent from resolving the family name again, + * let's store the family with zero id to indicate that. */ + + f = new(GenericNetlinkFamily, 1); + if (!f) + return -ENOMEM; + + *f = (GenericNetlinkFamily) { + .policy_set = policy_set, + }; + + f->name = strdup(family_name); + if (!f->name) + return -ENOMEM; + + r = hashmap_ensure_put(&nl->genl_family_by_name, &string_hash_ops, f->name, f); + if (r < 0) + return r; + + f->genl = nl; + TAKE_PTR(f); + return 0; +} + +static int genl_family_new( + sd_netlink *nl, + const char *expected_family_name, + const NLAPolicySet *policy_set, + sd_netlink_message *message, + const GenericNetlinkFamily **ret) { + + _cleanup_(genl_family_freep) GenericNetlinkFamily *f = NULL; + const char *family_name; + uint8_t cmd; + int r; + + assert(nl); + assert(expected_family_name); + assert(policy_set); + assert(message); + assert(ret); + + f = new(GenericNetlinkFamily, 1); + if (!f) + return -ENOMEM; + + *f = (GenericNetlinkFamily) { + .policy_set = policy_set, + }; + + r = sd_genl_message_get_family_name(nl, message, &family_name); + if (r < 0) + return r; + + if (!streq(family_name, CTRL_GENL_NAME)) + return -EINVAL; + + r = sd_genl_message_get_command(nl, message, &cmd); + if (r < 0) + return r; + + if (cmd != CTRL_CMD_NEWFAMILY) + return -EINVAL; + + r = sd_netlink_message_read_u16(message, CTRL_ATTR_FAMILY_ID, &f->id); + if (r < 0) + return r; + + r = sd_netlink_message_read_string_strdup(message, CTRL_ATTR_FAMILY_NAME, &f->name); + if (r < 0) + return r; + + if (!streq(f->name, expected_family_name)) + return -EINVAL; + + r = sd_netlink_message_read_u32(message, CTRL_ATTR_VERSION, &f->version); + if (r < 0) + return r; + + r = sd_netlink_message_read_u32(message, CTRL_ATTR_HDRSIZE, &f->additional_header_size); + if (r < 0) + return r; + + r = sd_netlink_message_enter_container(message, CTRL_ATTR_MCAST_GROUPS); + if (r >= 0) { + for (uint16_t i = 0; i < UINT16_MAX; i++) { + _cleanup_free_ char *group_name = NULL; + uint32_t group_id; + + r = sd_netlink_message_enter_array(message, i + 1); + if (r == -ENODATA) + break; + if (r < 0) + return r; + + r = sd_netlink_message_read_u32(message, CTRL_ATTR_MCAST_GRP_ID, &group_id); + if (r < 0) + return r; + + r = sd_netlink_message_read_string_strdup(message, CTRL_ATTR_MCAST_GRP_NAME, &group_name); + if (r < 0) + return r; + + r = sd_netlink_message_exit_container(message); + if (r < 0) + return r; + + if (group_id == 0) { + log_debug("sd-netlink: received multicast group '%s' for generic netlink family '%s' with id == 0, ignoring", + group_name, f->name); + continue; + } + + r = hashmap_ensure_put(&f->multicast_group_by_name, &string_hash_ops_free, group_name, UINT32_TO_PTR(group_id)); + if (r < 0) + return r; + + TAKE_PTR(group_name); + } + + r = sd_netlink_message_exit_container(message); + if (r < 0) + return r; + } + + r = hashmap_ensure_put(&nl->genl_family_by_id, NULL, UINT_TO_PTR(f->id), f); + if (r < 0) + return r; + + r = hashmap_ensure_put(&nl->genl_family_by_name, &string_hash_ops, f->name, f); + if (r < 0) { + hashmap_remove(nl->genl_family_by_id, UINT_TO_PTR(f->id)); + return r; + } + + f->genl = nl; + *ret = TAKE_PTR(f); + return 0; +} + +static const NLAPolicySet *genl_family_get_policy_set(const GenericNetlinkFamily *family) { + assert(family); + + if (family->policy_set) + return family->policy_set; + + return genl_get_policy_set_by_name(family->name); +} + +static int genl_message_new( + sd_netlink *nl, + const GenericNetlinkFamily *family, + uint8_t cmd, + sd_netlink_message **ret) { + + _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL; + const NLAPolicySet *policy_set; + int r; + + assert(nl); + assert(nl->protocol == NETLINK_GENERIC); + assert(family); + assert(ret); + + policy_set = genl_family_get_policy_set(family); + if (!policy_set) + return -EOPNOTSUPP; + + r = message_new_full(nl, family->id, policy_set, + sizeof(struct genlmsghdr) + family->additional_header_size, &m); + if (r < 0) + return r; + + *(struct genlmsghdr *) NLMSG_DATA(m->hdr) = (struct genlmsghdr) { + .cmd = cmd, + .version = family->version, + }; + + *ret = TAKE_PTR(m); + return 0; +} + +static int genl_family_get_by_name_internal( + sd_netlink *nl, + const GenericNetlinkFamily *ctrl, + const char *name, + const GenericNetlinkFamily **ret) { + + _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *req = NULL, *reply = NULL; + const NLAPolicySet *policy_set; + int r; + + assert(nl); + assert(nl->protocol == NETLINK_GENERIC); + assert(ctrl); + assert(name); + assert(ret); + + policy_set = genl_get_policy_set_by_name(name); + if (!policy_set) + return -EOPNOTSUPP; + + r = genl_message_new(nl, ctrl, CTRL_CMD_GETFAMILY, &req); + if (r < 0) + return r; + + r = sd_netlink_message_append_string(req, CTRL_ATTR_FAMILY_NAME, name); + if (r < 0) + return r; + + if (sd_netlink_call(nl, req, 0, &reply) < 0) { + (void) genl_family_new_unsupported(nl, name, policy_set); + return -EOPNOTSUPP; + } + + return genl_family_new(nl, name, policy_set, reply, ret); +} + +static int genl_family_get_by_name(sd_netlink *nl, const char *name, const GenericNetlinkFamily **ret) { + const GenericNetlinkFamily *f, *ctrl; + int r; + + assert(nl); + assert(nl->protocol == NETLINK_GENERIC); + assert(name); + assert(ret); + + f = hashmap_get(nl->genl_family_by_name, name); + if (f) { + if (f->id == 0) /* kernel does not support the family. */ + return -EOPNOTSUPP; + + *ret = f; + return 0; + } + + if (streq(name, CTRL_GENL_NAME)) + return genl_family_get_by_name_internal(nl, &nlctrl_static, CTRL_GENL_NAME, ret); + + ctrl = hashmap_get(nl->genl_family_by_name, CTRL_GENL_NAME); + if (!ctrl) { + r = genl_family_get_by_name_internal(nl, &nlctrl_static, CTRL_GENL_NAME, &ctrl); + if (r < 0) + return r; + } + + return genl_family_get_by_name_internal(nl, ctrl, name, ret); +} + +static int genl_family_get_by_id(sd_netlink *nl, uint16_t id, const GenericNetlinkFamily **ret) { + const GenericNetlinkFamily *f; + + assert(nl); + assert(nl->protocol == NETLINK_GENERIC); + assert(ret); + + f = hashmap_get(nl->genl_family_by_id, UINT_TO_PTR(id)); + if (f) { + *ret = f; + return 0; + } + + if (id == GENL_ID_CTRL) { + *ret = &nlctrl_static; + return 0; + } + + return -ENOENT; +} + +int genl_get_policy_set_and_header_size( + sd_netlink *nl, + uint16_t id, + const NLAPolicySet **ret_policy_set, + size_t *ret_header_size) { + + const GenericNetlinkFamily *f; + int r; + + assert(nl); + assert(nl->protocol == NETLINK_GENERIC); + + r = genl_family_get_by_id(nl, id, &f); + if (r < 0) + return r; + + if (ret_policy_set) { + const NLAPolicySet *p; + + p = genl_family_get_policy_set(f); + if (!p) + return -EOPNOTSUPP; + + *ret_policy_set = p; + } + if (ret_header_size) + *ret_header_size = sizeof(struct genlmsghdr) + f->additional_header_size; + return 0; +} + +int sd_genl_message_new(sd_netlink *nl, const char *family_name, uint8_t cmd, sd_netlink_message **ret) { + const GenericNetlinkFamily *family; + int r; + + assert_return(nl, -EINVAL); + assert_return(nl->protocol == NETLINK_GENERIC, -EINVAL); + assert_return(family_name, -EINVAL); + assert_return(ret, -EINVAL); + + r = genl_family_get_by_name(nl, family_name, &family); + if (r < 0) + return r; + + return genl_message_new(nl, family, cmd, ret); +} + +int sd_genl_message_get_family_name(sd_netlink *nl, sd_netlink_message *m, const char **ret) { + const GenericNetlinkFamily *family; + uint16_t nlmsg_type; + int r; + + assert_return(nl, -EINVAL); + assert_return(nl->protocol == NETLINK_GENERIC, -EINVAL); + assert_return(m, -EINVAL); + assert_return(ret, -EINVAL); + + r = sd_netlink_message_get_type(m, &nlmsg_type); + if (r < 0) + return r; + + r = genl_family_get_by_id(nl, nlmsg_type, &family); + if (r < 0) + return r; + + *ret = family->name; + return 0; +} + +int sd_genl_message_get_command(sd_netlink *nl, sd_netlink_message *m, uint8_t *ret) { + struct genlmsghdr *h; + uint16_t nlmsg_type; + size_t size; + int r; + + assert_return(nl, -EINVAL); + assert_return(nl->protocol == NETLINK_GENERIC, -EINVAL); + assert_return(m, -EINVAL); + assert_return(m->protocol == NETLINK_GENERIC, -EINVAL); + assert_return(m->hdr, -EINVAL); + assert_return(ret, -EINVAL); + + r = sd_netlink_message_get_type(m, &nlmsg_type); + if (r < 0) + return r; + + r = genl_get_policy_set_and_header_size(nl, nlmsg_type, NULL, &size); + if (r < 0) + return r; + + if (m->hdr->nlmsg_len < NLMSG_LENGTH(size)) + return -EBADMSG; + + h = NLMSG_DATA(m->hdr); + + *ret = h->cmd; + return 0; +} + +static int genl_family_get_multicast_group_id_by_name(const GenericNetlinkFamily *f, const char *name, uint32_t *ret) { + void *p; + + assert(f); + assert(name); + + p = hashmap_get(f->multicast_group_by_name, name); + if (!p) + return -ENOENT; + + if (ret) + *ret = PTR_TO_UINT32(p); + return 0; +} + +int sd_genl_add_match( + sd_netlink *nl, + sd_netlink_slot **ret_slot, + const char *family_name, + const char *multicast_group_name, + uint8_t command, + sd_netlink_message_handler_t callback, + sd_netlink_destroy_t destroy_callback, + void *userdata, + const char *description) { + + const GenericNetlinkFamily *f; + uint32_t multicast_group_id; + int r; + + assert_return(nl, -EINVAL); + assert_return(nl->protocol == NETLINK_GENERIC, -EINVAL); + assert_return(callback, -EINVAL); + assert_return(family_name, -EINVAL); + assert_return(multicast_group_name, -EINVAL); + + /* If command == 0, then all commands belonging to the multicast group trigger the callback. */ + + r = genl_family_get_by_name(nl, family_name, &f); + if (r < 0) + return r; + + r = genl_family_get_multicast_group_id_by_name(f, multicast_group_name, &multicast_group_id); + if (r < 0) + return r; + + return netlink_add_match_internal(nl, ret_slot, &multicast_group_id, 1, f->id, command, + callback, destroy_callback, userdata, description); +} + +int sd_genl_socket_open(sd_netlink **ret) { + return netlink_open_family(ret, NETLINK_GENERIC); +} diff --git a/src/libsystemd/sd-netlink/netlink-genl.h b/src/libsystemd/sd-netlink/netlink-genl.h new file mode 100644 index 0000000..b06be05 --- /dev/null +++ b/src/libsystemd/sd-netlink/netlink-genl.h @@ -0,0 +1,8 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include "sd-netlink.h" + +#define CTRL_GENL_NAME "nlctrl" + +void genl_clear_family(sd_netlink *nl); diff --git a/src/libsystemd/sd-netlink/netlink-internal.h b/src/libsystemd/sd-netlink/netlink-internal.h new file mode 100644 index 0000000..891d3e8 --- /dev/null +++ b/src/libsystemd/sd-netlink/netlink-internal.h @@ -0,0 +1,212 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include <linux/netlink.h> + +#include "sd-netlink.h" + +#include "list.h" +#include "netlink-types.h" +#include "ordered-set.h" +#include "prioq.h" +#include "time-util.h" + +#define NETLINK_DEFAULT_TIMEOUT_USEC ((usec_t) (25 * USEC_PER_SEC)) + +#define NETLINK_RQUEUE_MAX 64*1024 + +#define NETLINK_CONTAINER_DEPTH 32 + +struct reply_callback { + sd_netlink_message_handler_t callback; + usec_t timeout; + uint32_t serial; + unsigned prioq_idx; +}; + +struct match_callback { + sd_netlink_message_handler_t callback; + uint32_t *groups; + size_t n_groups; + uint16_t type; + uint8_t cmd; /* used by genl */ + + LIST_FIELDS(struct match_callback, match_callbacks); +}; + +typedef enum NetlinkSlotType { + NETLINK_REPLY_CALLBACK, + NETLINK_MATCH_CALLBACK, + _NETLINK_SLOT_INVALID = -EINVAL, +} NetlinkSlotType; + +struct sd_netlink_slot { + unsigned n_ref; + NetlinkSlotType type:8; + bool floating; + sd_netlink *netlink; + void *userdata; + sd_netlink_destroy_t destroy_callback; + + char *description; + + LIST_FIELDS(sd_netlink_slot, slots); + + union { + struct reply_callback reply_callback; + struct match_callback match_callback; + }; +}; + +struct sd_netlink { + unsigned n_ref; + + int fd; + + union { + struct sockaddr sa; + struct sockaddr_nl nl; + } sockaddr; + + int protocol; + + Hashmap *broadcast_group_refs; + bool broadcast_group_dont_leave:1; /* until we can rely on 4.2 */ + + OrderedSet *rqueue; + Hashmap *rqueue_by_serial; + Hashmap *rqueue_partial_by_serial; + + struct nlmsghdr *rbuffer; + + bool processing:1; + + uint32_t serial; + + struct Prioq *reply_callbacks_prioq; + Hashmap *reply_callbacks; + + LIST_HEAD(struct match_callback, match_callbacks); + + LIST_HEAD(sd_netlink_slot, slots); + + pid_t original_pid; + + sd_event_source *io_event_source; + sd_event_source *time_event_source; + sd_event_source *exit_event_source; + sd_event *event; + + Hashmap *genl_family_by_name; + Hashmap *genl_family_by_id; +}; + +struct netlink_attribute { + size_t offset; /* offset from hdr to attribute */ + bool nested:1; + bool net_byteorder:1; +}; + +struct netlink_container { + const struct NLAPolicySet *policy_set; /* the policy set of the container */ + size_t offset; /* offset from hdr to the start of the container */ + struct netlink_attribute *attributes; + uint16_t max_attribute; /* the maximum attribute in container */ +}; + +struct sd_netlink_message { + unsigned n_ref; + + int protocol; + + struct nlmsghdr *hdr; + struct netlink_container containers[NETLINK_CONTAINER_DEPTH]; + unsigned n_containers; /* number of containers */ + uint32_t multicast_group; + bool sealed:1; + + sd_netlink_message *next; /* next in a chain of multi-part messages */ +}; + +int message_new_empty(sd_netlink *nl, sd_netlink_message **ret); +int message_new_full( + sd_netlink *nl, + uint16_t nlmsg_type, + const NLAPolicySet *policy_set, + size_t header_size, + sd_netlink_message **ret); +int message_new(sd_netlink *nl, sd_netlink_message **ret, uint16_t type); +int message_new_synthetic_error(sd_netlink *nl, int error, uint32_t serial, sd_netlink_message **ret); + +static inline uint32_t message_get_serial(sd_netlink_message *m) { + assert(m); + return ASSERT_PTR(m->hdr)->nlmsg_seq; +} + +void message_seal(sd_netlink_message *m); + +int netlink_open_family(sd_netlink **ret, int family); +bool netlink_pid_changed(sd_netlink *nl); + +int socket_bind(sd_netlink *nl); +int socket_broadcast_group_ref(sd_netlink *nl, unsigned group); +int socket_broadcast_group_unref(sd_netlink *nl, unsigned group); +int socket_write_message(sd_netlink *nl, sd_netlink_message *m); +int socket_read_message(sd_netlink *nl); + +int netlink_add_match_internal( + sd_netlink *nl, + sd_netlink_slot **ret_slot, + const uint32_t *groups, + size_t n_groups, + uint16_t type, + uint8_t cmd, + sd_netlink_message_handler_t callback, + sd_netlink_destroy_t destroy_callback, + void *userdata, + const char *description); + +/* Make sure callbacks don't destroy the netlink connection */ +#define NETLINK_DONT_DESTROY(nl) \ + _cleanup_(sd_netlink_unrefp) _unused_ sd_netlink *_dont_destroy_##nl = sd_netlink_ref(nl) + +bool nfproto_is_valid(int nfproto); + +/* nfnl */ +/* TODO: to be exported later */ +int sd_nfnl_socket_open(sd_netlink **ret); +int sd_nfnl_send_batch( + sd_netlink *nfnl, + sd_netlink_message **messages, + size_t msgcount, + uint32_t **ret_serials); +int sd_nfnl_call_batch( + sd_netlink *nfnl, + sd_netlink_message **messages, + size_t n_messages, + uint64_t usec, + sd_netlink_message ***ret_messages); +int sd_nfnl_message_new( + sd_netlink *nfnl, + sd_netlink_message **ret, + int nfproto, + uint16_t subsys, + uint16_t msg_type, + uint16_t flags); +int sd_nfnl_nft_message_new_table(sd_netlink *nfnl, sd_netlink_message **ret, + int nfproto, const char *table); +int sd_nfnl_nft_message_new_basechain(sd_netlink *nfnl, sd_netlink_message **ret, + int nfproto, const char *table, const char *chain, + const char *type, uint8_t hook, int prio); +int sd_nfnl_nft_message_new_rule(sd_netlink *nfnl, sd_netlink_message **ret, + int nfproto, const char *table, const char *chain); +int sd_nfnl_nft_message_new_set(sd_netlink *nfnl, sd_netlink_message **ret, + int nfproto, const char *table, const char *set_name, + uint32_t setid, uint32_t klen); +int sd_nfnl_nft_message_new_setelems(sd_netlink *nfnl, sd_netlink_message **ret, + int add, int nfproto, const char *table, const char *set_name); +int sd_nfnl_nft_message_append_setelem(sd_netlink_message *m, + uint32_t index, + const void *key, size_t key_len, + const void *data, size_t data_len, + uint32_t flags); diff --git a/src/libsystemd/sd-netlink/netlink-message-nfnl.c b/src/libsystemd/sd-netlink/netlink-message-nfnl.c new file mode 100644 index 0000000..fd3055d --- /dev/null +++ b/src/libsystemd/sd-netlink/netlink-message-nfnl.c @@ -0,0 +1,417 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <netinet/in.h> +#include <linux/netfilter/nfnetlink.h> +#include <linux/netfilter/nf_tables.h> +#include <linux/netfilter.h> + +#include "sd-netlink.h" + +#include "iovec-util.h" +#include "netlink-internal.h" +#include "netlink-types.h" +#include "netlink-util.h" + +bool nfproto_is_valid(int nfproto) { + return IN_SET(nfproto, + NFPROTO_UNSPEC, + NFPROTO_INET, + NFPROTO_IPV4, + NFPROTO_ARP, + NFPROTO_NETDEV, + NFPROTO_BRIDGE, + NFPROTO_IPV6); +} + +int sd_nfnl_message_new(sd_netlink *nfnl, sd_netlink_message **ret, int nfproto, uint16_t subsys, uint16_t msg_type, uint16_t flags) { + _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL; + int r; + + assert_return(nfnl, -EINVAL); + assert_return(ret, -EINVAL); + assert_return(nfproto_is_valid(nfproto), -EINVAL); + assert_return(NFNL_MSG_TYPE(msg_type) == msg_type, -EINVAL); + + r = message_new(nfnl, &m, subsys << 8 | msg_type); + if (r < 0) + return r; + + m->hdr->nlmsg_flags |= flags; + + *(struct nfgenmsg*) NLMSG_DATA(m->hdr) = (struct nfgenmsg) { + .nfgen_family = nfproto, + .version = NFNETLINK_V0, + }; + + *ret = TAKE_PTR(m); + return 0; +} + +static int nfnl_message_set_res_id(sd_netlink_message *m, uint16_t res_id) { + struct nfgenmsg *nfgen; + + assert(m); + assert(m->hdr); + + nfgen = NLMSG_DATA(m->hdr); + nfgen->res_id = htobe16(res_id); + + return 0; +} + +static int nfnl_message_get_subsys(sd_netlink_message *m, uint16_t *ret) { + uint16_t t; + int r; + + assert(m); + assert(ret); + + r = sd_netlink_message_get_type(m, &t); + if (r < 0) + return r; + + *ret = NFNL_SUBSYS_ID(t); + return 0; +} + +static int nfnl_message_new_batch(sd_netlink *nfnl, sd_netlink_message **ret, uint16_t subsys, uint16_t msg_type) { + _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL; + int r; + + assert_return(nfnl, -EINVAL); + assert_return(ret, -EINVAL); + assert_return(NFNL_MSG_TYPE(msg_type) == msg_type, -EINVAL); + + r = sd_nfnl_message_new(nfnl, &m, NFPROTO_UNSPEC, NFNL_SUBSYS_NONE, msg_type, 0); + if (r < 0) + return r; + + r = nfnl_message_set_res_id(m, subsys); + if (r < 0) + return r; + + *ret = TAKE_PTR(m); + return 0; +} + +int sd_nfnl_send_batch( + sd_netlink *nfnl, + sd_netlink_message **messages, + size_t n_messages, + uint32_t **ret_serials) { + + /* iovs refs batch_begin and batch_end, hence, free iovs first, then free batch_begin and batch_end. */ + _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *batch_begin = NULL, *batch_end = NULL; + _cleanup_free_ struct iovec *iovs = NULL; + _cleanup_free_ uint32_t *serials = NULL; + uint16_t subsys; + ssize_t k; + size_t c = 0; + int r; + + assert_return(nfnl, -EINVAL); + assert_return(!netlink_pid_changed(nfnl), -ECHILD); + assert_return(messages, -EINVAL); + assert_return(n_messages > 0, -EINVAL); + + iovs = new(struct iovec, n_messages + 2); + if (!iovs) + return -ENOMEM; + + if (ret_serials) { + serials = new(uint32_t, n_messages); + if (!serials) + return -ENOMEM; + } + + r = nfnl_message_get_subsys(messages[0], &subsys); + if (r < 0) + return r; + + r = nfnl_message_new_batch(nfnl, &batch_begin, subsys, NFNL_MSG_BATCH_BEGIN); + if (r < 0) + return r; + + netlink_seal_message(nfnl, batch_begin); + iovs[c++] = IOVEC_MAKE(batch_begin->hdr, batch_begin->hdr->nlmsg_len); + + for (size_t i = 0; i < n_messages; i++) { + uint16_t s; + + r = nfnl_message_get_subsys(messages[i], &s); + if (r < 0) + return r; + + if (s != subsys) + return -EINVAL; + + netlink_seal_message(nfnl, messages[i]); + if (serials) + serials[i] = message_get_serial(messages[i]); + + /* It seems that the kernel accepts an arbitrary number. Let's set the lower 16 bits of the + * serial of the first message. */ + nfnl_message_set_res_id(messages[i], (uint16_t) (message_get_serial(batch_begin) & UINT16_MAX)); + + iovs[c++] = IOVEC_MAKE(messages[i]->hdr, messages[i]->hdr->nlmsg_len); + } + + r = nfnl_message_new_batch(nfnl, &batch_end, subsys, NFNL_MSG_BATCH_END); + if (r < 0) + return r; + + netlink_seal_message(nfnl, batch_end); + iovs[c++] = IOVEC_MAKE(batch_end->hdr, batch_end->hdr->nlmsg_len); + + assert(c == n_messages + 2); + k = writev(nfnl->fd, iovs, n_messages + 2); + if (k < 0) + return -errno; + + if (ret_serials) + *ret_serials = TAKE_PTR(serials); + + return 0; +} + +int sd_nfnl_call_batch( + sd_netlink *nfnl, + sd_netlink_message **messages, + size_t n_messages, + uint64_t usec, + sd_netlink_message ***ret_messages) { + + _cleanup_free_ sd_netlink_message **replies = NULL; + _cleanup_free_ uint32_t *serials = NULL; + int r; + + assert_return(nfnl, -EINVAL); + assert_return(!netlink_pid_changed(nfnl), -ECHILD); + assert_return(messages, -EINVAL); + assert_return(n_messages > 0, -EINVAL); + + if (ret_messages) { + replies = new0(sd_netlink_message*, n_messages); + if (!replies) + return -ENOMEM; + } + + r = sd_nfnl_send_batch(nfnl, messages, n_messages, &serials); + if (r < 0) + return r; + + for (size_t i = 0; i < n_messages; i++) + RET_GATHER(r, + sd_netlink_read(nfnl, serials[i], usec, ret_messages ? replies + i : NULL)); + if (r < 0) + return r; + + if (ret_messages) + *ret_messages = TAKE_PTR(replies); + + return 0; +} + +int sd_nfnl_nft_message_new_basechain( + sd_netlink *nfnl, + sd_netlink_message **ret, + int nfproto, + const char *table, + const char *chain, + const char *type, + uint8_t hook, + int prio) { + + _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL; + int r; + + r = sd_nfnl_message_new(nfnl, &m, nfproto, NFNL_SUBSYS_NFTABLES, NFT_MSG_NEWCHAIN, NLM_F_CREATE); + if (r < 0) + return r; + + r = sd_netlink_message_append_string(m, NFTA_CHAIN_TABLE, table); + if (r < 0) + return r; + + r = sd_netlink_message_append_string(m, NFTA_CHAIN_NAME, chain); + if (r < 0) + return r; + + r = sd_netlink_message_append_string(m, NFTA_CHAIN_TYPE, type); + if (r < 0) + return r; + + r = sd_netlink_message_open_container(m, NFTA_CHAIN_HOOK); + if (r < 0) + return r; + + r = sd_netlink_message_append_u32(m, NFTA_HOOK_HOOKNUM, htobe32(hook)); + if (r < 0) + return r; + + r = sd_netlink_message_append_u32(m, NFTA_HOOK_PRIORITY, htobe32(prio)); + if (r < 0) + return r; + + r = sd_netlink_message_close_container(m); + if (r < 0) + return r; + + *ret = TAKE_PTR(m); + return 0; +} + +int sd_nfnl_nft_message_new_table( + sd_netlink *nfnl, + sd_netlink_message **ret, + int nfproto, + const char *table) { + + _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL; + int r; + + r = sd_nfnl_message_new(nfnl, &m, nfproto, NFNL_SUBSYS_NFTABLES, NFT_MSG_NEWTABLE, NLM_F_CREATE | NLM_F_EXCL); + if (r < 0) + return r; + + r = sd_netlink_message_append_string(m, NFTA_TABLE_NAME, table); + if (r < 0) + return r; + + *ret = TAKE_PTR(m); + return r; +} + +int sd_nfnl_nft_message_new_rule( + sd_netlink *nfnl, + sd_netlink_message **ret, + int nfproto, + const char *table, + const char *chain) { + + _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL; + int r; + + r = sd_nfnl_message_new(nfnl, &m, nfproto, NFNL_SUBSYS_NFTABLES, NFT_MSG_NEWRULE, NLM_F_CREATE); + if (r < 0) + return r; + + r = sd_netlink_message_append_string(m, NFTA_RULE_TABLE, table); + if (r < 0) + return r; + + r = sd_netlink_message_append_string(m, NFTA_RULE_CHAIN, chain); + if (r < 0) + return r; + + *ret = TAKE_PTR(m); + return r; +} + +int sd_nfnl_nft_message_new_set( + sd_netlink *nfnl, + sd_netlink_message **ret, + int nfproto, + const char *table, + const char *set_name, + uint32_t set_id, + uint32_t klen) { + + _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL; + int r; + + r = sd_nfnl_message_new(nfnl, &m, nfproto, NFNL_SUBSYS_NFTABLES, NFT_MSG_NEWSET, NLM_F_CREATE); + if (r < 0) + return r; + + r = sd_netlink_message_append_string(m, NFTA_SET_TABLE, table); + if (r < 0) + return r; + + r = sd_netlink_message_append_string(m, NFTA_SET_NAME, set_name); + if (r < 0) + return r; + + r = sd_netlink_message_append_u32(m, NFTA_SET_ID, ++set_id); + if (r < 0) + return r; + + r = sd_netlink_message_append_u32(m, NFTA_SET_KEY_LEN, htobe32(klen)); + if (r < 0) + return r; + + *ret = TAKE_PTR(m); + return r; +} + +int sd_nfnl_nft_message_new_setelems( + sd_netlink *nfnl, + sd_netlink_message **ret, + int add, /* boolean */ + int nfproto, + const char *table, + const char *set_name) { + + _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL; + int r; + + if (add) + r = sd_nfnl_message_new(nfnl, &m, nfproto, NFNL_SUBSYS_NFTABLES, NFT_MSG_NEWSETELEM, NLM_F_CREATE); + else + r = sd_nfnl_message_new(nfnl, &m, nfproto, NFNL_SUBSYS_NFTABLES, NFT_MSG_DELSETELEM, 0); + if (r < 0) + return r; + + r = sd_netlink_message_append_string(m, NFTA_SET_ELEM_LIST_TABLE, table); + if (r < 0) + return r; + + r = sd_netlink_message_append_string(m, NFTA_SET_ELEM_LIST_SET, set_name); + if (r < 0) + return r; + + *ret = TAKE_PTR(m); + return r; +} + +int sd_nfnl_nft_message_append_setelem( + sd_netlink_message *m, + uint32_t index, + const void *key, + size_t key_len, + const void *data, + size_t data_len, + uint32_t flags) { + + int r; + + r = sd_netlink_message_open_array(m, index); + if (r < 0) + return r; + + r = sd_netlink_message_append_container_data(m, NFTA_SET_ELEM_KEY, NFTA_DATA_VALUE, key, key_len); + if (r < 0) + goto cancel; + + if (data) { + r = sd_netlink_message_append_container_data(m, NFTA_SET_ELEM_DATA, NFTA_DATA_VALUE, data, data_len); + if (r < 0) + goto cancel; + } + + if (flags != 0) { + r = sd_netlink_message_append_u32(m, NFTA_SET_ELEM_FLAGS, htobe32(flags)); + if (r < 0) + goto cancel; + } + + return sd_netlink_message_close_container(m); /* array */ + +cancel: + (void) sd_netlink_message_cancel_array(m); + return r; +} + +int sd_nfnl_socket_open(sd_netlink **ret) { + return netlink_open_family(ret, NETLINK_NETFILTER); +} diff --git a/src/libsystemd/sd-netlink/netlink-message-rtnl.c b/src/libsystemd/sd-netlink/netlink-message-rtnl.c new file mode 100644 index 0000000..008e802 --- /dev/null +++ b/src/libsystemd/sd-netlink/netlink-message-rtnl.c @@ -0,0 +1,1204 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <netinet/in.h> +#include <linux/fib_rules.h> +#include <linux/if_addrlabel.h> +#include <linux/if_bridge.h> +#include <linux/nexthop.h> +#include <stdbool.h> +#include <unistd.h> + +#include "sd-netlink.h" + +#include "format-util.h" +#include "netlink-internal.h" +#include "netlink-types.h" +#include "netlink-util.h" +#include "socket-util.h" + +static bool rtnl_message_type_is_neigh(uint16_t type) { + return IN_SET(type, RTM_NEWNEIGH, RTM_GETNEIGH, RTM_DELNEIGH); +} + +static bool rtnl_message_type_is_route(uint16_t type) { + return IN_SET(type, RTM_NEWROUTE, RTM_GETROUTE, RTM_DELROUTE); +} + +static bool rtnl_message_type_is_nexthop(uint16_t type) { + return IN_SET(type, RTM_NEWNEXTHOP, RTM_GETNEXTHOP, RTM_DELNEXTHOP); +} + +static bool rtnl_message_type_is_link(uint16_t type) { + return IN_SET(type, + RTM_NEWLINK, RTM_SETLINK, RTM_GETLINK, RTM_DELLINK, + RTM_NEWLINKPROP, RTM_DELLINKPROP, RTM_GETLINKPROP); +} + +static bool rtnl_message_type_is_addr(uint16_t type) { + return IN_SET(type, RTM_NEWADDR, RTM_GETADDR, RTM_DELADDR); +} + +static bool rtnl_message_type_is_addrlabel(uint16_t type) { + return IN_SET(type, RTM_NEWADDRLABEL, RTM_DELADDRLABEL, RTM_GETADDRLABEL); +} + +static bool rtnl_message_type_is_routing_policy_rule(uint16_t type) { + return IN_SET(type, RTM_NEWRULE, RTM_DELRULE, RTM_GETRULE); +} + +static bool rtnl_message_type_is_traffic_control(uint16_t type) { + return IN_SET(type, + RTM_NEWQDISC, RTM_DELQDISC, RTM_GETQDISC, + RTM_NEWTCLASS, RTM_DELTCLASS, RTM_GETTCLASS); +} + +static bool rtnl_message_type_is_mdb(uint16_t type) { + return IN_SET(type, RTM_NEWMDB, RTM_DELMDB, RTM_GETMDB); +} + +int sd_rtnl_message_route_set_dst_prefixlen(sd_netlink_message *m, unsigned char prefixlen) { + struct rtmsg *rtm; + + assert_return(m, -EINVAL); + assert_return(m->hdr, -EINVAL); + assert_return(rtnl_message_type_is_route(m->hdr->nlmsg_type), -EINVAL); + + rtm = NLMSG_DATA(m->hdr); + + if ((rtm->rtm_family == AF_INET && prefixlen > 32) || + (rtm->rtm_family == AF_INET6 && prefixlen > 128)) + return -ERANGE; + + rtm->rtm_dst_len = prefixlen; + + return 0; +} + +int sd_rtnl_message_route_set_src_prefixlen(sd_netlink_message *m, unsigned char prefixlen) { + struct rtmsg *rtm; + + assert_return(m, -EINVAL); + assert_return(m->hdr, -EINVAL); + assert_return(rtnl_message_type_is_route(m->hdr->nlmsg_type), -EINVAL); + + rtm = NLMSG_DATA(m->hdr); + + if ((rtm->rtm_family == AF_INET && prefixlen > 32) || + (rtm->rtm_family == AF_INET6 && prefixlen > 128)) + return -ERANGE; + + rtm->rtm_src_len = prefixlen; + + return 0; +} + +int sd_rtnl_message_route_set_scope(sd_netlink_message *m, unsigned char scope) { + struct rtmsg *rtm; + + assert_return(m, -EINVAL); + assert_return(m->hdr, -EINVAL); + assert_return(rtnl_message_type_is_route(m->hdr->nlmsg_type), -EINVAL); + + rtm = NLMSG_DATA(m->hdr); + + rtm->rtm_scope = scope; + + return 0; +} + +int sd_rtnl_message_route_set_flags(sd_netlink_message *m, unsigned flags) { + struct rtmsg *rtm; + + assert_return(m, -EINVAL); + assert_return(m->hdr, -EINVAL); + assert_return(rtnl_message_type_is_route(m->hdr->nlmsg_type), -EINVAL); + + rtm = NLMSG_DATA(m->hdr); + + rtm->rtm_flags = flags; + + return 0; +} + +int sd_rtnl_message_route_get_flags(sd_netlink_message *m, unsigned *flags) { + struct rtmsg *rtm; + + assert_return(m, -EINVAL); + assert_return(m->hdr, -EINVAL); + assert_return(rtnl_message_type_is_route(m->hdr->nlmsg_type), -EINVAL); + assert_return(flags, -EINVAL); + + rtm = NLMSG_DATA(m->hdr); + + *flags = rtm->rtm_flags; + + return 0; +} + +int sd_rtnl_message_route_set_table(sd_netlink_message *m, unsigned char table) { + struct rtmsg *rtm; + + assert_return(m, -EINVAL); + assert_return(m->hdr, -EINVAL); + assert_return(rtnl_message_type_is_route(m->hdr->nlmsg_type), -EINVAL); + + rtm = NLMSG_DATA(m->hdr); + + rtm->rtm_table = table; + + return 0; +} + +int sd_rtnl_message_route_get_family(sd_netlink_message *m, int *family) { + struct rtmsg *rtm; + + assert_return(m, -EINVAL); + assert_return(m->hdr, -EINVAL); + assert_return(rtnl_message_type_is_route(m->hdr->nlmsg_type), -EINVAL); + assert_return(family, -EINVAL); + + rtm = NLMSG_DATA(m->hdr); + + *family = rtm->rtm_family; + + return 0; +} + +int sd_rtnl_message_route_get_type(sd_netlink_message *m, unsigned char *type) { + struct rtmsg *rtm; + + assert_return(m, -EINVAL); + assert_return(m->hdr, -EINVAL); + assert_return(rtnl_message_type_is_route(m->hdr->nlmsg_type), -EINVAL); + assert_return(type, -EINVAL); + + rtm = NLMSG_DATA(m->hdr); + + *type = rtm->rtm_type; + + return 0; +} + +int sd_rtnl_message_route_set_type(sd_netlink_message *m, unsigned char type) { + struct rtmsg *rtm; + + assert_return(m, -EINVAL); + assert_return(m->hdr, -EINVAL); + assert_return(rtnl_message_type_is_route(m->hdr->nlmsg_type), -EINVAL); + + rtm = NLMSG_DATA(m->hdr); + + rtm->rtm_type = type; + + return 0; +} + +int sd_rtnl_message_route_get_protocol(sd_netlink_message *m, unsigned char *protocol) { + struct rtmsg *rtm; + + assert_return(m, -EINVAL); + assert_return(m->hdr, -EINVAL); + assert_return(rtnl_message_type_is_route(m->hdr->nlmsg_type), -EINVAL); + assert_return(protocol, -EINVAL); + + rtm = NLMSG_DATA(m->hdr); + + *protocol = rtm->rtm_protocol; + + return 0; +} + +int sd_rtnl_message_route_get_scope(sd_netlink_message *m, unsigned char *scope) { + struct rtmsg *rtm; + + assert_return(m, -EINVAL); + assert_return(m->hdr, -EINVAL); + assert_return(rtnl_message_type_is_route(m->hdr->nlmsg_type), -EINVAL); + assert_return(scope, -EINVAL); + + rtm = NLMSG_DATA(m->hdr); + + *scope = rtm->rtm_scope; + + return 0; +} + +int sd_rtnl_message_route_get_tos(sd_netlink_message *m, uint8_t *tos) { + struct rtmsg *rtm; + + assert_return(m, -EINVAL); + assert_return(m->hdr, -EINVAL); + assert_return(rtnl_message_type_is_route(m->hdr->nlmsg_type), -EINVAL); + assert_return(tos, -EINVAL); + + rtm = NLMSG_DATA(m->hdr); + + *tos = rtm->rtm_tos; + + return 0; +} + +int sd_rtnl_message_route_get_table(sd_netlink_message *m, unsigned char *table) { + struct rtmsg *rtm; + + assert_return(m, -EINVAL); + assert_return(m->hdr, -EINVAL); + assert_return(rtnl_message_type_is_route(m->hdr->nlmsg_type), -EINVAL); + assert_return(table, -EINVAL); + + rtm = NLMSG_DATA(m->hdr); + + *table = rtm->rtm_table; + + return 0; +} + +int sd_rtnl_message_route_get_dst_prefixlen(sd_netlink_message *m, unsigned char *dst_len) { + struct rtmsg *rtm; + + assert_return(m, -EINVAL); + assert_return(m->hdr, -EINVAL); + assert_return(rtnl_message_type_is_route(m->hdr->nlmsg_type), -EINVAL); + assert_return(dst_len, -EINVAL); + + rtm = NLMSG_DATA(m->hdr); + + *dst_len = rtm->rtm_dst_len; + + return 0; +} + +int sd_rtnl_message_route_get_src_prefixlen(sd_netlink_message *m, unsigned char *src_len) { + struct rtmsg *rtm; + + assert_return(m, -EINVAL); + assert_return(m->hdr, -EINVAL); + assert_return(rtnl_message_type_is_route(m->hdr->nlmsg_type), -EINVAL); + assert_return(src_len, -EINVAL); + + rtm = NLMSG_DATA(m->hdr); + + *src_len = rtm->rtm_src_len; + + return 0; +} + +int sd_rtnl_message_new_route(sd_netlink *rtnl, sd_netlink_message **ret, + uint16_t nlmsg_type, int rtm_family, + unsigned char rtm_protocol) { + struct rtmsg *rtm; + int r; + + assert_return(rtnl_message_type_is_route(nlmsg_type), -EINVAL); + assert_return((nlmsg_type == RTM_GETROUTE && rtm_family == AF_UNSPEC) || + IN_SET(rtm_family, AF_INET, AF_INET6), -EINVAL); + assert_return(ret, -EINVAL); + + r = message_new(rtnl, ret, nlmsg_type); + if (r < 0) + return r; + + if (nlmsg_type == RTM_NEWROUTE) + (*ret)->hdr->nlmsg_flags |= NLM_F_CREATE | NLM_F_APPEND; + + rtm = NLMSG_DATA((*ret)->hdr); + + rtm->rtm_family = rtm_family; + rtm->rtm_protocol = rtm_protocol; + + return 0; +} + +int sd_rtnl_message_new_nexthop(sd_netlink *rtnl, sd_netlink_message **ret, + uint16_t nlmsg_type, int nh_family, + unsigned char nh_protocol) { + struct nhmsg *nhm; + int r; + + assert_return(rtnl_message_type_is_nexthop(nlmsg_type), -EINVAL); + switch (nlmsg_type) { + case RTM_DELNEXTHOP: + assert_return(nh_family == AF_UNSPEC, -EINVAL); + _fallthrough_; + case RTM_GETNEXTHOP: + assert_return(nh_protocol == RTPROT_UNSPEC, -EINVAL); + break; + case RTM_NEWNEXTHOP: + assert_return(IN_SET(nh_family, AF_UNSPEC, AF_INET, AF_INET6), -EINVAL); + break; + default: + assert_not_reached(); + } + assert_return(ret, -EINVAL); + + r = message_new(rtnl, ret, nlmsg_type); + if (r < 0) + return r; + + if (nlmsg_type == RTM_NEWNEXTHOP) + (*ret)->hdr->nlmsg_flags |= NLM_F_CREATE | NLM_F_APPEND; + + nhm = NLMSG_DATA((*ret)->hdr); + + nhm->nh_family = nh_family; + nhm->nh_scope = RT_SCOPE_UNIVERSE; + nhm->nh_protocol = nh_protocol; + + return 0; +} + +int sd_rtnl_message_nexthop_set_flags(sd_netlink_message *m, uint8_t flags) { + struct nhmsg *nhm; + + assert_return(m, -EINVAL); + assert_return(m->hdr, -EINVAL); + assert_return(m->hdr->nlmsg_type == RTM_NEWNEXTHOP, -EINVAL); + + nhm = NLMSG_DATA(m->hdr); + nhm->nh_flags = flags; + + return 0; +} + +int sd_rtnl_message_nexthop_get_flags(sd_netlink_message *m, uint8_t *ret) { + struct nhmsg *nhm; + + assert_return(m, -EINVAL); + assert_return(m->hdr, -EINVAL); + assert_return(rtnl_message_type_is_nexthop(m->hdr->nlmsg_type), -EINVAL); + assert_return(ret, -EINVAL); + + nhm = NLMSG_DATA(m->hdr); + *ret = nhm->nh_flags; + + return 0; +} + +int sd_rtnl_message_nexthop_get_family(sd_netlink_message *m, uint8_t *family) { + struct nhmsg *nhm; + + assert_return(m, -EINVAL); + assert_return(m->hdr, -EINVAL); + assert_return(rtnl_message_type_is_nexthop(m->hdr->nlmsg_type), -EINVAL); + assert_return(family, -EINVAL); + + nhm = NLMSG_DATA(m->hdr); + *family = nhm->nh_family; + + return 0; +} + +int sd_rtnl_message_nexthop_get_protocol(sd_netlink_message *m, uint8_t *protocol) { + struct nhmsg *nhm; + + assert_return(m, -EINVAL); + assert_return(m->hdr, -EINVAL); + assert_return(rtnl_message_type_is_nexthop(m->hdr->nlmsg_type), -EINVAL); + assert_return(protocol, -EINVAL); + + nhm = NLMSG_DATA(m->hdr); + *protocol = nhm->nh_protocol; + + return 0; +} + +int sd_rtnl_message_neigh_set_flags(sd_netlink_message *m, uint8_t flags) { + struct ndmsg *ndm; + + assert_return(m, -EINVAL); + assert_return(m->hdr, -EINVAL); + assert_return(rtnl_message_type_is_neigh(m->hdr->nlmsg_type), -EINVAL); + + ndm = NLMSG_DATA(m->hdr); + ndm->ndm_flags = flags; + + return 0; +} + +int sd_rtnl_message_neigh_set_state(sd_netlink_message *m, uint16_t state) { + struct ndmsg *ndm; + + assert_return(m, -EINVAL); + assert_return(m->hdr, -EINVAL); + assert_return(rtnl_message_type_is_neigh(m->hdr->nlmsg_type), -EINVAL); + + ndm = NLMSG_DATA(m->hdr); + ndm->ndm_state = state; + + return 0; +} + +int sd_rtnl_message_neigh_get_flags(sd_netlink_message *m, uint8_t *flags) { + struct ndmsg *ndm; + + assert_return(m, -EINVAL); + assert_return(m->hdr, -EINVAL); + assert_return(rtnl_message_type_is_neigh(m->hdr->nlmsg_type), -EINVAL); + + ndm = NLMSG_DATA(m->hdr); + *flags = ndm->ndm_flags; + + return 0; +} + +int sd_rtnl_message_neigh_get_state(sd_netlink_message *m, uint16_t *state) { + struct ndmsg *ndm; + + assert_return(m, -EINVAL); + assert_return(m->hdr, -EINVAL); + assert_return(rtnl_message_type_is_neigh(m->hdr->nlmsg_type), -EINVAL); + + ndm = NLMSG_DATA(m->hdr); + *state = ndm->ndm_state; + + return 0; +} + +int sd_rtnl_message_neigh_get_family(sd_netlink_message *m, int *family) { + struct ndmsg *ndm; + + assert_return(m, -EINVAL); + assert_return(m->hdr, -EINVAL); + assert_return(rtnl_message_type_is_neigh(m->hdr->nlmsg_type), -EINVAL); + assert_return(family, -EINVAL); + + ndm = NLMSG_DATA(m->hdr); + + *family = ndm->ndm_family; + + return 0; +} + +int sd_rtnl_message_neigh_get_ifindex(sd_netlink_message *m, int *index) { + struct ndmsg *ndm; + + assert_return(m, -EINVAL); + assert_return(m->hdr, -EINVAL); + assert_return(rtnl_message_type_is_neigh(m->hdr->nlmsg_type), -EINVAL); + assert_return(index, -EINVAL); + + ndm = NLMSG_DATA(m->hdr); + + *index = ndm->ndm_ifindex; + + return 0; +} + +int sd_rtnl_message_new_neigh( + sd_netlink *rtnl, + sd_netlink_message **ret, + uint16_t nlmsg_type, + int index, + int ndm_family) { + + struct ndmsg *ndm; + int r; + + assert_return(rtnl_message_type_is_neigh(nlmsg_type), -EINVAL); + assert_return(IN_SET(ndm_family, AF_UNSPEC, AF_INET, AF_INET6, AF_BRIDGE), -EINVAL); + assert_return(ret, -EINVAL); + + r = message_new(rtnl, ret, nlmsg_type); + if (r < 0) + return r; + + if (nlmsg_type == RTM_NEWNEIGH) { + if (ndm_family == AF_BRIDGE) + (*ret)->hdr->nlmsg_flags |= NLM_F_CREATE | NLM_F_APPEND; + else + (*ret)->hdr->nlmsg_flags |= NLM_F_CREATE | NLM_F_REPLACE; + } + + ndm = NLMSG_DATA((*ret)->hdr); + + ndm->ndm_family = ndm_family; + ndm->ndm_ifindex = index; + + return 0; +} + +int sd_rtnl_message_link_set_flags(sd_netlink_message *m, unsigned flags, unsigned change) { + struct ifinfomsg *ifi; + + assert_return(m, -EINVAL); + assert_return(m->hdr, -EINVAL); + assert_return(rtnl_message_type_is_link(m->hdr->nlmsg_type), -EINVAL); + assert_return(change != 0, -EINVAL); + + ifi = NLMSG_DATA(m->hdr); + + ifi->ifi_flags = flags; + ifi->ifi_change = change; + + return 0; +} + +int sd_rtnl_message_link_set_type(sd_netlink_message *m, unsigned type) { + struct ifinfomsg *ifi; + + assert_return(m, -EINVAL); + assert_return(m->hdr, -EINVAL); + assert_return(rtnl_message_type_is_link(m->hdr->nlmsg_type), -EINVAL); + + ifi = NLMSG_DATA(m->hdr); + + ifi->ifi_type = type; + + return 0; +} + +int sd_rtnl_message_link_set_family(sd_netlink_message *m, unsigned family) { + struct ifinfomsg *ifi; + + assert_return(m, -EINVAL); + assert_return(m->hdr, -EINVAL); + assert_return(rtnl_message_type_is_link(m->hdr->nlmsg_type), -EINVAL); + + ifi = NLMSG_DATA(m->hdr); + + ifi->ifi_family = family; + + return 0; +} + +int sd_rtnl_message_new_link(sd_netlink *rtnl, sd_netlink_message **ret, + uint16_t nlmsg_type, int index) { + struct ifinfomsg *ifi; + int r; + + assert_return(rtnl_message_type_is_link(nlmsg_type), -EINVAL); + assert_return(ret, -EINVAL); + + r = message_new(rtnl, ret, nlmsg_type); + if (r < 0) + return r; + + if (nlmsg_type == RTM_NEWLINK) + (*ret)->hdr->nlmsg_flags |= NLM_F_CREATE | NLM_F_EXCL; + else if (nlmsg_type == RTM_NEWLINKPROP) + (*ret)->hdr->nlmsg_flags |= NLM_F_CREATE | NLM_F_EXCL | NLM_F_APPEND; + + ifi = NLMSG_DATA((*ret)->hdr); + + ifi->ifi_family = AF_UNSPEC; + ifi->ifi_index = index; + + return 0; +} + +int sd_rtnl_message_addr_set_prefixlen(sd_netlink_message *m, unsigned char prefixlen) { + struct ifaddrmsg *ifa; + + assert_return(m, -EINVAL); + assert_return(m->hdr, -EINVAL); + assert_return(rtnl_message_type_is_addr(m->hdr->nlmsg_type), -EINVAL); + + ifa = NLMSG_DATA(m->hdr); + + if ((ifa->ifa_family == AF_INET && prefixlen > 32) || + (ifa->ifa_family == AF_INET6 && prefixlen > 128)) + return -ERANGE; + + ifa->ifa_prefixlen = prefixlen; + + return 0; +} + +int sd_rtnl_message_addr_set_flags(sd_netlink_message *m, unsigned char flags) { + struct ifaddrmsg *ifa; + + assert_return(m, -EINVAL); + assert_return(m->hdr, -EINVAL); + assert_return(rtnl_message_type_is_addr(m->hdr->nlmsg_type), -EINVAL); + + ifa = NLMSG_DATA(m->hdr); + + ifa->ifa_flags = flags; + + return 0; +} + +int sd_rtnl_message_addr_set_scope(sd_netlink_message *m, unsigned char scope) { + struct ifaddrmsg *ifa; + + assert_return(m, -EINVAL); + assert_return(m->hdr, -EINVAL); + assert_return(rtnl_message_type_is_addr(m->hdr->nlmsg_type), -EINVAL); + + ifa = NLMSG_DATA(m->hdr); + + ifa->ifa_scope = scope; + + return 0; +} + +int sd_rtnl_message_addr_get_family(sd_netlink_message *m, int *ret_family) { + struct ifaddrmsg *ifa; + + assert_return(m, -EINVAL); + assert_return(m->hdr, -EINVAL); + assert_return(rtnl_message_type_is_addr(m->hdr->nlmsg_type), -EINVAL); + assert_return(ret_family, -EINVAL); + + ifa = NLMSG_DATA(m->hdr); + + *ret_family = ifa->ifa_family; + + return 0; +} + +int sd_rtnl_message_addr_get_prefixlen(sd_netlink_message *m, unsigned char *ret_prefixlen) { + struct ifaddrmsg *ifa; + + assert_return(m, -EINVAL); + assert_return(m->hdr, -EINVAL); + assert_return(rtnl_message_type_is_addr(m->hdr->nlmsg_type), -EINVAL); + assert_return(ret_prefixlen, -EINVAL); + + ifa = NLMSG_DATA(m->hdr); + + *ret_prefixlen = ifa->ifa_prefixlen; + + return 0; +} + +int sd_rtnl_message_addr_get_scope(sd_netlink_message *m, unsigned char *ret_scope) { + struct ifaddrmsg *ifa; + + assert_return(m, -EINVAL); + assert_return(m->hdr, -EINVAL); + assert_return(rtnl_message_type_is_addr(m->hdr->nlmsg_type), -EINVAL); + assert_return(ret_scope, -EINVAL); + + ifa = NLMSG_DATA(m->hdr); + + *ret_scope = ifa->ifa_scope; + + return 0; +} + +int sd_rtnl_message_addr_get_flags(sd_netlink_message *m, unsigned char *ret_flags) { + struct ifaddrmsg *ifa; + + assert_return(m, -EINVAL); + assert_return(m->hdr, -EINVAL); + assert_return(rtnl_message_type_is_addr(m->hdr->nlmsg_type), -EINVAL); + assert_return(ret_flags, -EINVAL); + + ifa = NLMSG_DATA(m->hdr); + + *ret_flags = ifa->ifa_flags; + + return 0; +} + +int sd_rtnl_message_addr_get_ifindex(sd_netlink_message *m, int *ret_ifindex) { + struct ifaddrmsg *ifa; + + assert_return(m, -EINVAL); + assert_return(m->hdr, -EINVAL); + assert_return(rtnl_message_type_is_addr(m->hdr->nlmsg_type), -EINVAL); + assert_return(ret_ifindex, -EINVAL); + + ifa = NLMSG_DATA(m->hdr); + + *ret_ifindex = ifa->ifa_index; + + return 0; +} + +int sd_rtnl_message_new_addr( + sd_netlink *rtnl, + sd_netlink_message **ret, + uint16_t nlmsg_type, + int index, + int family) { + + struct ifaddrmsg *ifa; + int r; + + assert_return(rtnl_message_type_is_addr(nlmsg_type), -EINVAL); + assert_return((nlmsg_type == RTM_GETADDR && index == 0) || + index > 0, -EINVAL); + assert_return((nlmsg_type == RTM_GETADDR && family == AF_UNSPEC) || + IN_SET(family, AF_INET, AF_INET6), -EINVAL); + assert_return(ret, -EINVAL); + + r = message_new(rtnl, ret, nlmsg_type); + if (r < 0) + return r; + + ifa = NLMSG_DATA((*ret)->hdr); + + ifa->ifa_index = index; + ifa->ifa_family = family; + + return 0; +} + +int sd_rtnl_message_new_addr_update( + sd_netlink *rtnl, + sd_netlink_message **ret, + int index, + int family) { + int r; + + r = sd_rtnl_message_new_addr(rtnl, ret, RTM_NEWADDR, index, family); + if (r < 0) + return r; + + (*ret)->hdr->nlmsg_flags |= NLM_F_REPLACE; + + return 0; +} + +int sd_rtnl_message_link_get_ifindex(sd_netlink_message *m, int *ifindex) { + struct ifinfomsg *ifi; + + assert_return(m, -EINVAL); + assert_return(m->hdr, -EINVAL); + assert_return(rtnl_message_type_is_link(m->hdr->nlmsg_type), -EINVAL); + assert_return(ifindex, -EINVAL); + + ifi = NLMSG_DATA(m->hdr); + + *ifindex = ifi->ifi_index; + + return 0; +} + +int sd_rtnl_message_link_get_flags(sd_netlink_message *m, unsigned *flags) { + struct ifinfomsg *ifi; + + assert_return(m, -EINVAL); + assert_return(m->hdr, -EINVAL); + assert_return(rtnl_message_type_is_link(m->hdr->nlmsg_type), -EINVAL); + assert_return(flags, -EINVAL); + + ifi = NLMSG_DATA(m->hdr); + + *flags = ifi->ifi_flags; + + return 0; +} + +int sd_rtnl_message_link_get_type(sd_netlink_message *m, unsigned short *type) { + struct ifinfomsg *ifi; + + assert_return(m, -EINVAL); + assert_return(m->hdr, -EINVAL); + assert_return(rtnl_message_type_is_link(m->hdr->nlmsg_type), -EINVAL); + assert_return(type, -EINVAL); + + ifi = NLMSG_DATA(m->hdr); + + *type = ifi->ifi_type; + + return 0; +} + +int sd_rtnl_message_get_family(sd_netlink_message *m, int *family) { + assert_return(m, -EINVAL); + assert_return(family, -EINVAL); + + assert(m->hdr); + + if (rtnl_message_type_is_link(m->hdr->nlmsg_type)) { + struct ifinfomsg *ifi; + + ifi = NLMSG_DATA(m->hdr); + + *family = ifi->ifi_family; + + return 0; + } else if (rtnl_message_type_is_route(m->hdr->nlmsg_type)) { + struct rtmsg *rtm; + + rtm = NLMSG_DATA(m->hdr); + + *family = rtm->rtm_family; + + return 0; + } else if (rtnl_message_type_is_neigh(m->hdr->nlmsg_type)) { + struct ndmsg *ndm; + + ndm = NLMSG_DATA(m->hdr); + + *family = ndm->ndm_family; + + return 0; + } else if (rtnl_message_type_is_addr(m->hdr->nlmsg_type)) { + struct ifaddrmsg *ifa; + + ifa = NLMSG_DATA(m->hdr); + + *family = ifa->ifa_family; + + return 0; + } else if (rtnl_message_type_is_routing_policy_rule(m->hdr->nlmsg_type)) { + struct rtmsg *rtm; + + rtm = NLMSG_DATA(m->hdr); + + *family = rtm->rtm_family; + + return 0; + } else if (rtnl_message_type_is_nexthop(m->hdr->nlmsg_type)) { + struct nhmsg *nhm; + + nhm = NLMSG_DATA(m->hdr); + + *family = nhm->nh_family; + + return 0; + } + + return -EOPNOTSUPP; +} + +int sd_rtnl_message_new_addrlabel( + sd_netlink *rtnl, + sd_netlink_message **ret, + uint16_t nlmsg_type, + int ifindex, + int ifal_family) { + + struct ifaddrlblmsg *addrlabel; + int r; + + assert_return(rtnl_message_type_is_addrlabel(nlmsg_type), -EINVAL); + assert_return(ret, -EINVAL); + + r = message_new(rtnl, ret, nlmsg_type); + if (r < 0) + return r; + + if (nlmsg_type == RTM_NEWADDRLABEL) + (*ret)->hdr->nlmsg_flags |= NLM_F_CREATE | NLM_F_EXCL; + + addrlabel = NLMSG_DATA((*ret)->hdr); + + addrlabel->ifal_family = ifal_family; + addrlabel->ifal_index = ifindex; + + return 0; +} + +int sd_rtnl_message_addrlabel_set_prefixlen(sd_netlink_message *m, unsigned char prefixlen) { + struct ifaddrlblmsg *addrlabel; + + assert_return(m, -EINVAL); + assert_return(m->hdr, -EINVAL); + assert_return(rtnl_message_type_is_addrlabel(m->hdr->nlmsg_type), -EINVAL); + + addrlabel = NLMSG_DATA(m->hdr); + + if (prefixlen > 128) + return -ERANGE; + + addrlabel->ifal_prefixlen = prefixlen; + + return 0; +} + +int sd_rtnl_message_addrlabel_get_prefixlen(sd_netlink_message *m, unsigned char *prefixlen) { + struct ifaddrlblmsg *addrlabel; + + assert_return(m, -EINVAL); + assert_return(m->hdr, -EINVAL); + assert_return(rtnl_message_type_is_addrlabel(m->hdr->nlmsg_type), -EINVAL); + + addrlabel = NLMSG_DATA(m->hdr); + + *prefixlen = addrlabel->ifal_prefixlen; + + return 0; +} + +int sd_rtnl_message_new_routing_policy_rule( + sd_netlink *rtnl, + sd_netlink_message **ret, + uint16_t nlmsg_type, + int ifal_family) { + + struct fib_rule_hdr *frh; + int r; + + assert_return(rtnl_message_type_is_routing_policy_rule(nlmsg_type), -EINVAL); + assert_return(ret, -EINVAL); + + r = message_new(rtnl, ret, nlmsg_type); + if (r < 0) + return r; + + if (nlmsg_type == RTM_NEWRULE) + (*ret)->hdr->nlmsg_flags |= NLM_F_CREATE | NLM_F_EXCL; + + frh = NLMSG_DATA((*ret)->hdr); + frh->family = ifal_family; + + return 0; +} + +int sd_rtnl_message_routing_policy_rule_set_tos(sd_netlink_message *m, uint8_t tos) { + struct fib_rule_hdr *frh; + + assert_return(m, -EINVAL); + assert_return(m->hdr, -EINVAL); + assert_return(rtnl_message_type_is_routing_policy_rule(m->hdr->nlmsg_type), -EINVAL); + + frh = NLMSG_DATA(m->hdr); + + frh->tos = tos; + + return 0; +} + +int sd_rtnl_message_routing_policy_rule_get_tos(sd_netlink_message *m, uint8_t *tos) { + struct fib_rule_hdr *frh; + + assert_return(m, -EINVAL); + assert_return(m->hdr, -EINVAL); + assert_return(rtnl_message_type_is_routing_policy_rule(m->hdr->nlmsg_type), -EINVAL); + + frh = NLMSG_DATA(m->hdr); + + *tos = frh->tos; + + return 0; +} + +int sd_rtnl_message_routing_policy_rule_set_table(sd_netlink_message *m, uint8_t table) { + struct fib_rule_hdr *frh; + + assert_return(m, -EINVAL); + assert_return(m->hdr, -EINVAL); + assert_return(rtnl_message_type_is_routing_policy_rule(m->hdr->nlmsg_type), -EINVAL); + + frh = NLMSG_DATA(m->hdr); + + frh->table = table; + + return 0; +} + +int sd_rtnl_message_routing_policy_rule_get_table(sd_netlink_message *m, uint8_t *table) { + struct fib_rule_hdr *frh; + + assert_return(m, -EINVAL); + assert_return(m->hdr, -EINVAL); + assert_return(rtnl_message_type_is_routing_policy_rule(m->hdr->nlmsg_type), -EINVAL); + + frh = NLMSG_DATA(m->hdr); + + *table = frh->table; + + return 0; +} + +int sd_rtnl_message_routing_policy_rule_set_flags(sd_netlink_message *m, uint32_t flags) { + struct fib_rule_hdr *frh; + + assert_return(m, -EINVAL); + assert_return(m->hdr, -EINVAL); + assert_return(rtnl_message_type_is_routing_policy_rule(m->hdr->nlmsg_type), -EINVAL); + + frh = NLMSG_DATA(m->hdr); + frh->flags = flags; + + return 0; +} + +int sd_rtnl_message_routing_policy_rule_get_flags(sd_netlink_message *m, uint32_t *flags) { + struct fib_rule_hdr *frh; + + assert_return(m, -EINVAL); + assert_return(m->hdr, -EINVAL); + assert_return(rtnl_message_type_is_routing_policy_rule(m->hdr->nlmsg_type), -EINVAL); + + frh = NLMSG_DATA(m->hdr); + *flags = frh->flags; + + return 0; +} + +int sd_rtnl_message_routing_policy_rule_set_fib_type(sd_netlink_message *m, uint8_t type) { + struct fib_rule_hdr *frh; + + assert_return(m, -EINVAL); + assert_return(m->hdr, -EINVAL); + assert_return(rtnl_message_type_is_routing_policy_rule(m->hdr->nlmsg_type), -EINVAL); + + frh = NLMSG_DATA(m->hdr); + + frh->action = type; + + return 0; +} + +int sd_rtnl_message_routing_policy_rule_get_fib_type(sd_netlink_message *m, uint8_t *type) { + struct fib_rule_hdr *frh; + + assert_return(m, -EINVAL); + assert_return(m->hdr, -EINVAL); + assert_return(rtnl_message_type_is_routing_policy_rule(m->hdr->nlmsg_type), -EINVAL); + + frh = NLMSG_DATA(m->hdr); + + *type = frh->action; + + return 0; +} + +int sd_rtnl_message_routing_policy_rule_set_fib_dst_prefixlen(sd_netlink_message *m, uint8_t len) { + struct fib_rule_hdr *frh; + + assert_return(m, -EINVAL); + assert_return(m->hdr, -EINVAL); + assert_return(rtnl_message_type_is_routing_policy_rule(m->hdr->nlmsg_type), -EINVAL); + + frh = NLMSG_DATA(m->hdr); + + frh->dst_len = len; + + return 0; +} + +int sd_rtnl_message_routing_policy_rule_get_fib_dst_prefixlen(sd_netlink_message *m, uint8_t *len) { + struct fib_rule_hdr *frh; + + assert_return(m, -EINVAL); + assert_return(m->hdr, -EINVAL); + assert_return(rtnl_message_type_is_routing_policy_rule(m->hdr->nlmsg_type), -EINVAL); + + frh = NLMSG_DATA(m->hdr); + + *len = frh->dst_len; + + return 0; +} + +int sd_rtnl_message_routing_policy_rule_set_fib_src_prefixlen(sd_netlink_message *m, uint8_t len) { + struct fib_rule_hdr *frh; + + assert_return(m, -EINVAL); + assert_return(m->hdr, -EINVAL); + assert_return(rtnl_message_type_is_routing_policy_rule(m->hdr->nlmsg_type), -EINVAL); + + frh = NLMSG_DATA(m->hdr); + + frh->src_len = len; + + return 0; +} + +int sd_rtnl_message_routing_policy_rule_get_fib_src_prefixlen(sd_netlink_message *m, uint8_t *len) { + struct fib_rule_hdr *frh; + + assert_return(m, -EINVAL); + assert_return(m->hdr, -EINVAL); + assert_return(rtnl_message_type_is_routing_policy_rule(m->hdr->nlmsg_type), -EINVAL); + + frh = NLMSG_DATA(m->hdr); + + *len = frh->src_len; + + return 0; +} + +int sd_rtnl_message_new_traffic_control( + sd_netlink *rtnl, + sd_netlink_message **ret, + uint16_t nlmsg_type, + int ifindex, + uint32_t handle, + uint32_t parent) { + + struct tcmsg *tcm; + int r; + + assert_return(rtnl_message_type_is_traffic_control(nlmsg_type), -EINVAL); + assert_return(ret, -EINVAL); + + r = message_new(rtnl, ret, nlmsg_type); + if (r < 0) + return r; + + if (IN_SET(nlmsg_type, RTM_NEWQDISC, RTM_NEWTCLASS)) + (*ret)->hdr->nlmsg_flags |= NLM_F_CREATE | NLM_F_EXCL; + + tcm = NLMSG_DATA((*ret)->hdr); + tcm->tcm_ifindex = ifindex; + tcm->tcm_handle = handle; + tcm->tcm_parent = parent; + + return 0; +} + +int sd_rtnl_message_traffic_control_get_ifindex(sd_netlink_message *m, int *ret) { + struct tcmsg *tcm; + + assert_return(m, -EINVAL); + assert_return(m->hdr, -EINVAL); + assert_return(rtnl_message_type_is_traffic_control(m->hdr->nlmsg_type), -EINVAL); + assert_return(ret, -EINVAL); + + tcm = NLMSG_DATA(m->hdr); + *ret = tcm->tcm_ifindex; + + return 0; +} + +int sd_rtnl_message_traffic_control_get_handle(sd_netlink_message *m, uint32_t *ret) { + struct tcmsg *tcm; + + assert_return(m, -EINVAL); + assert_return(m->hdr, -EINVAL); + assert_return(rtnl_message_type_is_traffic_control(m->hdr->nlmsg_type), -EINVAL); + assert_return(ret, -EINVAL); + + tcm = NLMSG_DATA(m->hdr); + *ret = tcm->tcm_handle; + + return 0; +} + +int sd_rtnl_message_traffic_control_get_parent(sd_netlink_message *m, uint32_t *ret) { + struct tcmsg *tcm; + + assert_return(m, -EINVAL); + assert_return(m->hdr, -EINVAL); + assert_return(rtnl_message_type_is_traffic_control(m->hdr->nlmsg_type), -EINVAL); + assert_return(ret, -EINVAL); + + tcm = NLMSG_DATA(m->hdr); + *ret = tcm->tcm_parent; + + return 0; +} + +int sd_rtnl_message_new_mdb( + sd_netlink *rtnl, + sd_netlink_message **ret, + uint16_t nlmsg_type, + int mdb_ifindex) { + + struct br_port_msg *bpm; + int r; + + assert_return(rtnl_message_type_is_mdb(nlmsg_type), -EINVAL); + assert_return(ret, -EINVAL); + + r = message_new(rtnl, ret, nlmsg_type); + if (r < 0) + return r; + + if (nlmsg_type == RTM_NEWMDB) + (*ret)->hdr->nlmsg_flags |= NLM_F_CREATE | NLM_F_EXCL; + + bpm = NLMSG_DATA((*ret)->hdr); + bpm->family = AF_BRIDGE; + bpm->ifindex = mdb_ifindex; + + return 0; +} diff --git a/src/libsystemd/sd-netlink/netlink-message.c b/src/libsystemd/sd-netlink/netlink-message.c new file mode 100644 index 0000000..000a50e --- /dev/null +++ b/src/libsystemd/sd-netlink/netlink-message.c @@ -0,0 +1,1421 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <netinet/in.h> +#include <stdbool.h> +#include <unistd.h> + +#include "sd-netlink.h" + +#include "alloc-util.h" +#include "format-util.h" +#include "memory-util.h" +#include "netlink-internal.h" +#include "netlink-types.h" +#include "netlink-util.h" +#include "socket-util.h" +#include "strv.h" + +#define GET_CONTAINER(m, i) ((struct rtattr*)((uint8_t*)(m)->hdr + (m)->containers[i].offset)) + +#define RTA_TYPE(rta) ((rta)->rta_type & NLA_TYPE_MASK) +#define RTA_FLAGS(rta) ((rta)->rta_type & ~NLA_TYPE_MASK) + +int message_new_empty(sd_netlink *nl, sd_netlink_message **ret) { + sd_netlink_message *m; + + assert(nl); + assert(ret); + + /* Note that 'nl' is currently unused, if we start using it internally we must take care to + * avoid problems due to mutual references between buses and their queued messages. See sd-bus. */ + + m = new(sd_netlink_message, 1); + if (!m) + return -ENOMEM; + + *m = (sd_netlink_message) { + .n_ref = 1, + .protocol = nl->protocol, + .sealed = false, + }; + + *ret = m; + return 0; +} + +int message_new_full( + sd_netlink *nl, + uint16_t nlmsg_type, + const NLAPolicySet *policy_set, + size_t header_size, + sd_netlink_message **ret) { + + _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL; + size_t size; + int r; + + assert(nl); + assert(policy_set); + assert(ret); + + size = NLMSG_SPACE(header_size); + assert(size >= sizeof(struct nlmsghdr)); + + r = message_new_empty(nl, &m); + if (r < 0) + return r; + + m->containers[0].policy_set = policy_set; + + m->hdr = malloc0(size); + if (!m->hdr) + return -ENOMEM; + + m->hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; + m->hdr->nlmsg_len = size; + m->hdr->nlmsg_type = nlmsg_type; + + *ret = TAKE_PTR(m); + return 0; +} + +int message_new(sd_netlink *nl, sd_netlink_message **ret, uint16_t nlmsg_type) { + const NLAPolicySet *policy_set; + size_t size; + int r; + + assert_return(nl, -EINVAL); + assert_return(ret, -EINVAL); + + r = netlink_get_policy_set_and_header_size(nl, nlmsg_type, &policy_set, &size); + if (r < 0) + return r; + + return message_new_full(nl, nlmsg_type, policy_set, size, ret); +} + +int message_new_synthetic_error(sd_netlink *nl, int error, uint32_t serial, sd_netlink_message **ret) { + struct nlmsgerr *err; + int r; + + assert(error <= 0); + + r = message_new(nl, ret, NLMSG_ERROR); + if (r < 0) + return r; + + message_seal(*ret); + (*ret)->hdr->nlmsg_seq = serial; + + err = NLMSG_DATA((*ret)->hdr); + err->error = error; + + return 0; +} + +int sd_netlink_message_set_request_dump(sd_netlink_message *m, int dump) { + assert_return(m, -EINVAL); + assert_return(m->hdr, -EINVAL); + assert_return(m->protocol != NETLINK_ROUTE || + IN_SET(m->hdr->nlmsg_type, + RTM_GETLINK, RTM_GETLINKPROP, RTM_GETADDR, RTM_GETROUTE, RTM_GETNEIGH, + RTM_GETRULE, RTM_GETADDRLABEL, RTM_GETNEXTHOP, RTM_GETQDISC, RTM_GETTCLASS), + -EINVAL); + + SET_FLAG(m->hdr->nlmsg_flags, NLM_F_DUMP, dump); + + return 0; +} + +DEFINE_TRIVIAL_REF_FUNC(sd_netlink_message, sd_netlink_message); + +sd_netlink_message* sd_netlink_message_unref(sd_netlink_message *m) { + while (m && --m->n_ref == 0) { + unsigned i; + + free(m->hdr); + + for (i = 0; i <= m->n_containers; i++) + free(m->containers[i].attributes); + + sd_netlink_message *t = m; + m = m->next; + free(t); + } + + return NULL; +} + +int sd_netlink_message_get_type(sd_netlink_message *m, uint16_t *ret) { + assert_return(m, -EINVAL); + assert_return(ret, -EINVAL); + + *ret = m->hdr->nlmsg_type; + + return 0; +} + +int sd_netlink_message_set_flags(sd_netlink_message *m, uint16_t flags) { + assert_return(m, -EINVAL); + assert_return(flags != 0, -EINVAL); + + m->hdr->nlmsg_flags = flags; + + return 0; +} + +int sd_netlink_message_is_broadcast(sd_netlink_message *m) { + assert_return(m, -EINVAL); + + return m->multicast_group != 0; +} + +/* If successful the updated message will be correctly aligned, if unsuccessful the old message is untouched. */ +static int add_rtattr(sd_netlink_message *m, uint16_t attr_type, const void *data, size_t data_length) { + size_t message_length; + struct nlmsghdr *new_hdr; + struct rtattr *rta; + int offset; + + assert(m); + assert(m->hdr); + assert(!m->sealed); + assert(NLMSG_ALIGN(m->hdr->nlmsg_len) == m->hdr->nlmsg_len); + assert(!data || data_length > 0); + + /* get the new message size (with padding at the end) */ + message_length = m->hdr->nlmsg_len + RTA_SPACE(data_length); + + /* buffer should be smaller than both one page or 8K to be accepted by the kernel */ + if (message_length > MIN(page_size(), 8192UL)) + return -ENOBUFS; + + /* realloc to fit the new attribute */ + new_hdr = realloc(m->hdr, message_length); + if (!new_hdr) + return -ENOMEM; + m->hdr = new_hdr; + + /* get pointer to the attribute we are about to add */ + rta = (struct rtattr *) ((uint8_t *) m->hdr + m->hdr->nlmsg_len); + + rtattr_append_attribute_internal(rta, attr_type, data, data_length); + + /* if we are inside containers, extend them */ + for (unsigned i = 0; i < m->n_containers; i++) + GET_CONTAINER(m, i)->rta_len += RTA_SPACE(data_length); + + /* update message size */ + offset = m->hdr->nlmsg_len; + m->hdr->nlmsg_len = message_length; + + /* return old message size */ + return offset; +} + +static int message_attribute_has_type(sd_netlink_message *m, size_t *ret_size, uint16_t attr_type, NLAType type) { + const NLAPolicy *policy; + + assert(m); + + policy = policy_set_get_policy(m->containers[m->n_containers].policy_set, attr_type); + if (!policy) + return -EOPNOTSUPP; + + if (policy_get_type(policy) != type) + return -EINVAL; + + if (ret_size) + *ret_size = policy_get_size(policy); + return 0; +} + +int sd_netlink_message_append_string(sd_netlink_message *m, uint16_t attr_type, const char *data) { + size_t length, size; + int r; + + assert_return(m, -EINVAL); + assert_return(!m->sealed, -EPERM); + assert_return(data, -EINVAL); + + r = message_attribute_has_type(m, &size, attr_type, NETLINK_TYPE_STRING); + if (r < 0) + return r; + + if (size) { + length = strnlen(data, size+1); + if (length > size) + return -EINVAL; + } else + length = strlen(data); + + r = add_rtattr(m, attr_type, data, length + 1); + if (r < 0) + return r; + + return 0; +} + +int sd_netlink_message_append_strv(sd_netlink_message *m, uint16_t attr_type, const char* const *data) { + size_t length, size; + int r; + + assert_return(m, -EINVAL); + assert_return(!m->sealed, -EPERM); + assert_return(data, -EINVAL); + + r = message_attribute_has_type(m, &size, attr_type, NETLINK_TYPE_STRING); + if (r < 0) + return r; + + STRV_FOREACH(p, data) { + if (size) { + length = strnlen(*p, size+1); + if (length > size) + return -EINVAL; + } else + length = strlen(*p); + + r = add_rtattr(m, attr_type, *p, length + 1); + if (r < 0) + return r; + } + + return 0; +} + +int sd_netlink_message_append_flag(sd_netlink_message *m, uint16_t attr_type) { + size_t size; + int r; + + assert_return(m, -EINVAL); + assert_return(!m->sealed, -EPERM); + + r = message_attribute_has_type(m, &size, attr_type, NETLINK_TYPE_FLAG); + if (r < 0) + return r; + + r = add_rtattr(m, attr_type, NULL, 0); + if (r < 0) + return r; + + return 0; +} + +int sd_netlink_message_append_u8(sd_netlink_message *m, uint16_t attr_type, uint8_t data) { + int r; + + assert_return(m, -EINVAL); + assert_return(!m->sealed, -EPERM); + + r = message_attribute_has_type(m, NULL, attr_type, NETLINK_TYPE_U8); + if (r < 0) + return r; + + r = add_rtattr(m, attr_type, &data, sizeof(uint8_t)); + if (r < 0) + return r; + + return 0; +} + +int sd_netlink_message_append_u16(sd_netlink_message *m, uint16_t attr_type, uint16_t data) { + int r; + + assert_return(m, -EINVAL); + assert_return(!m->sealed, -EPERM); + + r = message_attribute_has_type(m, NULL, attr_type, NETLINK_TYPE_U16); + if (r < 0) + return r; + + r = add_rtattr(m, attr_type, &data, sizeof(uint16_t)); + if (r < 0) + return r; + + return 0; +} + +int sd_netlink_message_append_u32(sd_netlink_message *m, uint16_t attr_type, uint32_t data) { + int r; + + assert_return(m, -EINVAL); + assert_return(!m->sealed, -EPERM); + + r = message_attribute_has_type(m, NULL, attr_type, NETLINK_TYPE_U32); + if (r < 0) + return r; + + r = add_rtattr(m, attr_type, &data, sizeof(uint32_t)); + if (r < 0) + return r; + + return 0; +} + +int sd_netlink_message_append_u64(sd_netlink_message *m, uint16_t attr_type, uint64_t data) { + int r; + + assert_return(m, -EINVAL); + assert_return(!m->sealed, -EPERM); + + r = message_attribute_has_type(m, NULL, attr_type, NETLINK_TYPE_U64); + if (r < 0) + return r; + + r = add_rtattr(m, attr_type, &data, sizeof(uint64_t)); + if (r < 0) + return r; + + return 0; +} + +int sd_netlink_message_append_s8(sd_netlink_message *m, uint16_t attr_type, int8_t data) { + int r; + + assert_return(m, -EINVAL); + assert_return(!m->sealed, -EPERM); + + r = message_attribute_has_type(m, NULL, attr_type, NETLINK_TYPE_S8); + if (r < 0) + return r; + + r = add_rtattr(m, attr_type, &data, sizeof(int8_t)); + if (r < 0) + return r; + + return 0; +} + +int sd_netlink_message_append_s16(sd_netlink_message *m, uint16_t attr_type, int16_t data) { + int r; + + assert_return(m, -EINVAL); + assert_return(!m->sealed, -EPERM); + + r = message_attribute_has_type(m, NULL, attr_type, NETLINK_TYPE_S16); + if (r < 0) + return r; + + r = add_rtattr(m, attr_type, &data, sizeof(int16_t)); + if (r < 0) + return r; + + return 0; +} + +int sd_netlink_message_append_s32(sd_netlink_message *m, uint16_t attr_type, int32_t data) { + int r; + + assert_return(m, -EINVAL); + assert_return(!m->sealed, -EPERM); + + r = message_attribute_has_type(m, NULL, attr_type, NETLINK_TYPE_S32); + if (r < 0) + return r; + + r = add_rtattr(m, attr_type, &data, sizeof(int32_t)); + if (r < 0) + return r; + + return 0; +} + +int sd_netlink_message_append_s64(sd_netlink_message *m, uint16_t attr_type, int64_t data) { + int r; + + assert_return(m, -EINVAL); + assert_return(!m->sealed, -EPERM); + + r = message_attribute_has_type(m, NULL, attr_type, NETLINK_TYPE_S64); + if (r < 0) + return r; + + r = add_rtattr(m, attr_type, &data, sizeof(int64_t)); + if (r < 0) + return r; + + return 0; +} + +int sd_netlink_message_append_data(sd_netlink_message *m, uint16_t attr_type, const void *data, size_t len) { + int r; + + assert_return(m, -EINVAL); + assert_return(!m->sealed, -EPERM); + + r = add_rtattr(m, attr_type, data, len); + if (r < 0) + return r; + + return 0; +} + +int sd_netlink_message_append_container_data( + sd_netlink_message *m, + uint16_t container_type, + uint16_t attr_type, + const void *data, + size_t len) { + + int r; + + assert_return(m, -EINVAL); + assert_return(!m->sealed, -EPERM); + + r = sd_netlink_message_open_container(m, container_type); + if (r < 0) + return r; + + r = sd_netlink_message_append_data(m, attr_type, data, len); + if (r < 0) + return r; + + return sd_netlink_message_close_container(m); +} + +int netlink_message_append_in_addr_union(sd_netlink_message *m, uint16_t attr_type, int family, const union in_addr_union *data) { + int r; + + assert_return(m, -EINVAL); + assert_return(!m->sealed, -EPERM); + assert_return(data, -EINVAL); + assert_return(IN_SET(family, AF_INET, AF_INET6), -EINVAL); + + r = message_attribute_has_type(m, NULL, attr_type, NETLINK_TYPE_IN_ADDR); + if (r < 0) + return r; + + r = add_rtattr(m, attr_type, data, FAMILY_ADDRESS_SIZE(family)); + if (r < 0) + return r; + + return 0; +} + +int sd_netlink_message_append_in_addr(sd_netlink_message *m, uint16_t attr_type, const struct in_addr *data) { + return netlink_message_append_in_addr_union(m, attr_type, AF_INET, (const union in_addr_union *) data); +} + +int sd_netlink_message_append_in6_addr(sd_netlink_message *m, uint16_t attr_type, const struct in6_addr *data) { + return netlink_message_append_in_addr_union(m, attr_type, AF_INET6, (const union in_addr_union *) data); +} + +int netlink_message_append_sockaddr_union(sd_netlink_message *m, uint16_t attr_type, const union sockaddr_union *data) { + int r; + + assert_return(m, -EINVAL); + assert_return(!m->sealed, -EPERM); + assert_return(data, -EINVAL); + assert_return(IN_SET(data->sa.sa_family, AF_INET, AF_INET6), -EINVAL); + + r = message_attribute_has_type(m, NULL, attr_type, NETLINK_TYPE_SOCKADDR); + if (r < 0) + return r; + + r = add_rtattr(m, attr_type, data, data->sa.sa_family == AF_INET ? sizeof(struct sockaddr_in) : sizeof(struct sockaddr_in6)); + if (r < 0) + return r; + + return 0; +} + +int sd_netlink_message_append_sockaddr_in(sd_netlink_message *m, uint16_t attr_type, const struct sockaddr_in *data) { + return netlink_message_append_sockaddr_union(m, attr_type, (const union sockaddr_union *) data); +} + +int sd_netlink_message_append_sockaddr_in6(sd_netlink_message *m, uint16_t attr_type, const struct sockaddr_in6 *data) { + return netlink_message_append_sockaddr_union(m, attr_type, (const union sockaddr_union *) data); +} + +int sd_netlink_message_append_ether_addr(sd_netlink_message *m, uint16_t attr_type, const struct ether_addr *data) { + int r; + + assert_return(m, -EINVAL); + assert_return(!m->sealed, -EPERM); + assert_return(data, -EINVAL); + + r = message_attribute_has_type(m, NULL, attr_type, NETLINK_TYPE_ETHER_ADDR); + if (r < 0) + return r; + + r = add_rtattr(m, attr_type, data, ETH_ALEN); + if (r < 0) + return r; + + return 0; +} + +int netlink_message_append_hw_addr(sd_netlink_message *m, uint16_t attr_type, const struct hw_addr_data *data) { + int r; + + assert_return(m, -EINVAL); + assert_return(!m->sealed, -EPERM); + assert_return(data, -EINVAL); + assert_return(data->length > 0, -EINVAL); + + r = message_attribute_has_type(m, NULL, attr_type, NETLINK_TYPE_ETHER_ADDR); + if (r < 0) + return r; + + r = add_rtattr(m, attr_type, data->bytes, data->length); + if (r < 0) + return r; + + return 0; +} + +int sd_netlink_message_append_cache_info(sd_netlink_message *m, uint16_t attr_type, const struct ifa_cacheinfo *info) { + int r; + + assert_return(m, -EINVAL); + assert_return(!m->sealed, -EPERM); + assert_return(info, -EINVAL); + + r = message_attribute_has_type(m, NULL, attr_type, NETLINK_TYPE_CACHE_INFO); + if (r < 0) + return r; + + r = add_rtattr(m, attr_type, info, sizeof(struct ifa_cacheinfo)); + if (r < 0) + return r; + + return 0; +} + +int sd_netlink_message_open_container(sd_netlink_message *m, uint16_t attr_type) { + size_t size; + int r; + + assert_return(m, -EINVAL); + assert_return(!m->sealed, -EPERM); + /* m->containers[m->n_containers + 1] is accessed both in read and write. Prevent access out of bound */ + assert_return(m->n_containers < (NETLINK_CONTAINER_DEPTH - 1), -ERANGE); + + r = message_attribute_has_type(m, &size, attr_type, NETLINK_TYPE_NESTED); + if (r < 0) { + const NLAPolicySetUnion *policy_set_union; + int family; + + r = message_attribute_has_type(m, &size, attr_type, NETLINK_TYPE_NESTED_UNION_BY_FAMILY); + if (r < 0) + return r; + + r = sd_rtnl_message_get_family(m, &family); + if (r < 0) + return r; + + policy_set_union = policy_set_get_policy_set_union( + m->containers[m->n_containers].policy_set, + attr_type); + if (!policy_set_union) + return -EOPNOTSUPP; + + m->containers[m->n_containers + 1].policy_set = + policy_set_union_get_policy_set_by_family( + policy_set_union, + family); + } else + m->containers[m->n_containers + 1].policy_set = + policy_set_get_policy_set( + m->containers[m->n_containers].policy_set, + attr_type); + if (!m->containers[m->n_containers + 1].policy_set) + return -EOPNOTSUPP; + + r = add_rtattr(m, attr_type | NLA_F_NESTED, NULL, size); + if (r < 0) + return r; + + m->containers[m->n_containers++].offset = r; + + return 0; +} + +int sd_netlink_message_open_container_union(sd_netlink_message *m, uint16_t attr_type, const char *key) { + const NLAPolicySetUnion *policy_set_union; + int r; + + assert_return(m, -EINVAL); + assert_return(!m->sealed, -EPERM); + assert_return(m->n_containers < (NETLINK_CONTAINER_DEPTH - 1), -ERANGE); + + r = message_attribute_has_type(m, NULL, attr_type, NETLINK_TYPE_NESTED_UNION_BY_STRING); + if (r < 0) + return r; + + policy_set_union = policy_set_get_policy_set_union( + m->containers[m->n_containers].policy_set, + attr_type); + if (!policy_set_union) + return -EOPNOTSUPP; + + m->containers[m->n_containers + 1].policy_set = + policy_set_union_get_policy_set_by_string( + policy_set_union, + key); + if (!m->containers[m->n_containers + 1].policy_set) + return -EOPNOTSUPP; + + r = sd_netlink_message_append_string(m, policy_set_union_get_match_attribute(policy_set_union), key); + if (r < 0) + return r; + + /* do we ever need non-null size */ + r = add_rtattr(m, attr_type | NLA_F_NESTED, NULL, 0); + if (r < 0) + return r; + + m->containers[m->n_containers++].offset = r; + + return 0; +} + +int sd_netlink_message_close_container(sd_netlink_message *m) { + assert_return(m, -EINVAL); + assert_return(!m->sealed, -EPERM); + assert_return(m->n_containers > 0, -EINVAL); + + m->containers[m->n_containers].policy_set = NULL; + m->containers[m->n_containers].offset = 0; + m->n_containers--; + + return 0; +} + +int sd_netlink_message_open_array(sd_netlink_message *m, uint16_t attr_type) { + int r; + + assert_return(m, -EINVAL); + assert_return(!m->sealed, -EPERM); + assert_return(m->n_containers < (NETLINK_CONTAINER_DEPTH - 1), -ERANGE); + + r = add_rtattr(m, attr_type | NLA_F_NESTED, NULL, 0); + if (r < 0) + return r; + + m->containers[m->n_containers].offset = r; + m->n_containers++; + m->containers[m->n_containers].policy_set = m->containers[m->n_containers - 1].policy_set; + + return 0; +} + +int sd_netlink_message_cancel_array(sd_netlink_message *m) { + uint32_t rta_len; + + assert_return(m, -EINVAL); + assert_return(!m->sealed, -EPERM); + assert_return(m->n_containers > 1, -EINVAL); + + rta_len = GET_CONTAINER(m, (m->n_containers - 1))->rta_len; + + for (unsigned i = 0; i < m->n_containers; i++) + GET_CONTAINER(m, i)->rta_len -= rta_len; + + m->hdr->nlmsg_len -= rta_len; + + m->n_containers--; + m->containers[m->n_containers].policy_set = NULL; + + return 0; +} + +static int netlink_message_read_internal( + sd_netlink_message *m, + uint16_t attr_type, + void **ret_data, + bool *ret_net_byteorder) { + + struct netlink_attribute *attribute; + struct rtattr *rta; + + assert_return(m, -EINVAL); + assert_return(m->sealed, -EPERM); + + assert(m->n_containers < NETLINK_CONTAINER_DEPTH); + + if (!m->containers[m->n_containers].attributes) + return -ENODATA; + + if (attr_type > m->containers[m->n_containers].max_attribute) + return -ENODATA; + + attribute = &m->containers[m->n_containers].attributes[attr_type]; + + if (attribute->offset == 0) + return -ENODATA; + + rta = (struct rtattr*)((uint8_t *) m->hdr + attribute->offset); + + if (ret_data) + *ret_data = RTA_DATA(rta); + + if (ret_net_byteorder) + *ret_net_byteorder = attribute->net_byteorder; + + return RTA_PAYLOAD(rta); +} + +int sd_netlink_message_read(sd_netlink_message *m, uint16_t attr_type, size_t size, void *data) { + void *attr_data; + int r; + + assert_return(m, -EINVAL); + + r = netlink_message_read_internal(m, attr_type, &attr_data, NULL); + if (r < 0) + return r; + + if ((size_t) r > size) + return -ENOBUFS; + + if (data) + memcpy(data, attr_data, r); + + return r; +} + +int sd_netlink_message_read_data(sd_netlink_message *m, uint16_t attr_type, size_t *ret_size, void **ret_data) { + void *attr_data; + int r; + + assert_return(m, -EINVAL); + + r = netlink_message_read_internal(m, attr_type, &attr_data, NULL); + if (r < 0) + return r; + + if (ret_data) { + void *data; + + data = memdup(attr_data, r); + if (!data) + return -ENOMEM; + + *ret_data = data; + } + + if (ret_size) + *ret_size = r; + + return r; +} + +int sd_netlink_message_read_data_suffix0(sd_netlink_message *m, uint16_t attr_type, size_t *ret_size, void **ret_data) { + void *attr_data; + int r; + + assert_return(m, -EINVAL); + + r = netlink_message_read_internal(m, attr_type, &attr_data, NULL); + if (r < 0) + return r; + + if (ret_data) { + void *data; + + data = memdup_suffix0(attr_data, r); + if (!data) + return -ENOMEM; + + *ret_data = data; + } + + if (ret_size) + *ret_size = r; + + return r; +} + +int sd_netlink_message_read_string_strdup(sd_netlink_message *m, uint16_t attr_type, char **data) { + void *attr_data; + int r; + + assert_return(m, -EINVAL); + + r = message_attribute_has_type(m, NULL, attr_type, NETLINK_TYPE_STRING); + if (r < 0) + return r; + + r = netlink_message_read_internal(m, attr_type, &attr_data, NULL); + if (r < 0) + return r; + + if (data) { + char *str; + + str = strndup(attr_data, r); + if (!str) + return -ENOMEM; + + *data = str; + } + + return 0; +} + +int sd_netlink_message_read_string(sd_netlink_message *m, uint16_t attr_type, const char **data) { + void *attr_data; + int r; + + assert_return(m, -EINVAL); + + r = message_attribute_has_type(m, NULL, attr_type, NETLINK_TYPE_STRING); + if (r < 0) + return r; + + r = netlink_message_read_internal(m, attr_type, &attr_data, NULL); + if (r < 0) + return r; + + if (strnlen(attr_data, r) >= (size_t) r) + return -EIO; + + if (data) + *data = (const char *) attr_data; + + return 0; +} + +int sd_netlink_message_read_u8(sd_netlink_message *m, uint16_t attr_type, uint8_t *data) { + void *attr_data; + int r; + + assert_return(m, -EINVAL); + + r = message_attribute_has_type(m, NULL, attr_type, NETLINK_TYPE_U8); + if (r < 0) + return r; + + r = netlink_message_read_internal(m, attr_type, &attr_data, NULL); + if (r < 0) + return r; + + if ((size_t) r < sizeof(uint8_t)) + return -EIO; + + if (data) + *data = *(uint8_t *) attr_data; + + return 0; +} + +int sd_netlink_message_read_u16(sd_netlink_message *m, uint16_t attr_type, uint16_t *data) { + void *attr_data; + bool net_byteorder; + int r; + + assert_return(m, -EINVAL); + + r = message_attribute_has_type(m, NULL, attr_type, NETLINK_TYPE_U16); + if (r < 0) + return r; + + r = netlink_message_read_internal(m, attr_type, &attr_data, &net_byteorder); + if (r < 0) + return r; + + if ((size_t) r < sizeof(uint16_t)) + return -EIO; + + if (data) { + if (net_byteorder) + *data = be16toh(*(uint16_t *) attr_data); + else + *data = *(uint16_t *) attr_data; + } + + return 0; +} + +int sd_netlink_message_read_u32(sd_netlink_message *m, uint16_t attr_type, uint32_t *data) { + void *attr_data; + bool net_byteorder; + int r; + + assert_return(m, -EINVAL); + + r = message_attribute_has_type(m, NULL, attr_type, NETLINK_TYPE_U32); + if (r < 0) + return r; + + r = netlink_message_read_internal(m, attr_type, &attr_data, &net_byteorder); + if (r < 0) + return r; + + if ((size_t) r < sizeof(uint32_t)) + return -EIO; + + if (data) { + if (net_byteorder) + *data = be32toh(*(uint32_t *) attr_data); + else + *data = *(uint32_t *) attr_data; + } + + return 0; +} + +int sd_netlink_message_read_ether_addr(sd_netlink_message *m, uint16_t attr_type, struct ether_addr *data) { + void *attr_data; + int r; + + assert_return(m, -EINVAL); + + r = message_attribute_has_type(m, NULL, attr_type, NETLINK_TYPE_ETHER_ADDR); + if (r < 0) + return r; + + r = netlink_message_read_internal(m, attr_type, &attr_data, NULL); + if (r < 0) + return r; + + if ((size_t) r < sizeof(struct ether_addr)) + return -EIO; + + if (data) + memcpy(data, attr_data, sizeof(struct ether_addr)); + + return 0; +} + +int netlink_message_read_hw_addr(sd_netlink_message *m, uint16_t attr_type, struct hw_addr_data *data) { + void *attr_data; + int r; + + assert_return(m, -EINVAL); + + r = message_attribute_has_type(m, NULL, attr_type, NETLINK_TYPE_ETHER_ADDR); + if (r < 0) + return r; + + r = netlink_message_read_internal(m, attr_type, &attr_data, NULL); + if (r < 0) + return r; + + if (r > HW_ADDR_MAX_SIZE) + return -EIO; + + if (data) { + memcpy(data->bytes, attr_data, r); + data->length = r; + } + + return 0; +} + +int sd_netlink_message_read_cache_info(sd_netlink_message *m, uint16_t attr_type, struct ifa_cacheinfo *info) { + void *attr_data; + int r; + + assert_return(m, -EINVAL); + + r = message_attribute_has_type(m, NULL, attr_type, NETLINK_TYPE_CACHE_INFO); + if (r < 0) + return r; + + r = netlink_message_read_internal(m, attr_type, &attr_data, NULL); + if (r < 0) + return r; + + if ((size_t) r < sizeof(struct ifa_cacheinfo)) + return -EIO; + + if (info) + memcpy(info, attr_data, sizeof(struct ifa_cacheinfo)); + + return 0; +} + +int netlink_message_read_in_addr_union(sd_netlink_message *m, uint16_t attr_type, int family, union in_addr_union *data) { + void *attr_data; + int r; + + assert_return(m, -EINVAL); + assert_return(IN_SET(family, AF_INET, AF_INET6), -EINVAL); + + r = message_attribute_has_type(m, NULL, attr_type, NETLINK_TYPE_IN_ADDR); + if (r < 0) + return r; + + r = netlink_message_read_internal(m, attr_type, &attr_data, NULL); + if (r < 0) + return r; + + if ((size_t) r < FAMILY_ADDRESS_SIZE(family)) + return -EIO; + + if (data) + memcpy(data, attr_data, FAMILY_ADDRESS_SIZE(family)); + + return 0; +} + +int sd_netlink_message_read_in_addr(sd_netlink_message *m, uint16_t attr_type, struct in_addr *data) { + union in_addr_union u; + int r; + + r = netlink_message_read_in_addr_union(m, attr_type, AF_INET, &u); + if (r >= 0 && data) + *data = u.in; + + return r; +} + +int sd_netlink_message_read_in6_addr(sd_netlink_message *m, uint16_t attr_type, struct in6_addr *data) { + union in_addr_union u; + int r; + + r = netlink_message_read_in_addr_union(m, attr_type, AF_INET6, &u); + if (r >= 0 && data) + *data = u.in6; + + return r; +} + +int sd_netlink_message_has_flag(sd_netlink_message *m, uint16_t attr_type) { + void *attr_data; + int r; + + assert_return(m, -EINVAL); + + /* This returns 1 when the flag is set, 0 when not set, negative errno on error. */ + + r = message_attribute_has_type(m, NULL, attr_type, NETLINK_TYPE_FLAG); + if (r < 0) + return r; + + r = netlink_message_read_internal(m, attr_type, &attr_data, NULL); + if (r == -ENODATA) + return 0; + if (r < 0) + return r; + + return 1; +} + +int sd_netlink_message_read_strv(sd_netlink_message *m, uint16_t container_type, uint16_t attr_type, char ***ret) { + _cleanup_strv_free_ char **s = NULL; + const NLAPolicySet *policy_set; + const NLAPolicy *policy; + struct rtattr *rta; + void *container; + size_t rt_len; + int r; + + assert_return(m, -EINVAL); + assert_return(m->n_containers < NETLINK_CONTAINER_DEPTH, -EINVAL); + + policy = policy_set_get_policy( + m->containers[m->n_containers].policy_set, + container_type); + if (!policy) + return -EOPNOTSUPP; + + if (policy_get_type(policy) != NETLINK_TYPE_NESTED) + return -EINVAL; + + policy_set = policy_set_get_policy_set( + m->containers[m->n_containers].policy_set, + container_type); + if (!policy_set) + return -EOPNOTSUPP; + + policy = policy_set_get_policy(policy_set, attr_type); + if (!policy) + return -EOPNOTSUPP; + + if (policy_get_type(policy) != NETLINK_TYPE_STRING) + return -EINVAL; + + r = netlink_message_read_internal(m, container_type, &container, NULL); + if (r < 0) + return r; + + rt_len = (size_t) r; + rta = container; + + /* RTA_OK() macro compares with rta->rt_len, which is unsigned short, and + * LGTM.com analysis does not like the type difference. Hence, here we + * introduce an unsigned short variable as a workaround. */ + unsigned short len = rt_len; + for (; RTA_OK(rta, len); rta = RTA_NEXT(rta, len)) { + uint16_t type; + + type = RTA_TYPE(rta); + if (type != attr_type) + continue; + + r = strv_extend(&s, RTA_DATA(rta)); + if (r < 0) + return r; + } + + *ret = TAKE_PTR(s); + return 0; +} + +static int netlink_container_parse( + sd_netlink_message *m, + struct netlink_container *container, + struct rtattr *rta, + size_t rt_len) { + + _cleanup_free_ struct netlink_attribute *attributes = NULL; + uint16_t max_attr = 0; + + /* RTA_OK() macro compares with rta->rt_len, which is unsigned short, and + * LGTM.com analysis does not like the type difference. Hence, here we + * introduce an unsigned short variable as a workaround. */ + unsigned short len = rt_len; + for (; RTA_OK(rta, len); rta = RTA_NEXT(rta, len)) { + uint16_t attr; + + attr = RTA_TYPE(rta); + max_attr = MAX(max_attr, attr); + + if (!GREEDY_REALLOC0(attributes, (size_t) max_attr + 1)) + return -ENOMEM; + + if (attributes[attr].offset != 0) + log_debug("sd-netlink: message parse - overwriting repeated attribute"); + + attributes[attr].offset = (uint8_t *) rta - (uint8_t *) m->hdr; + attributes[attr].nested = RTA_FLAGS(rta) & NLA_F_NESTED; + attributes[attr].net_byteorder = RTA_FLAGS(rta) & NLA_F_NET_BYTEORDER; + } + + container->attributes = TAKE_PTR(attributes); + container->max_attribute = max_attr; + + return 0; +} + +int sd_netlink_message_enter_container(sd_netlink_message *m, uint16_t attr_type) { + const NLAPolicy *policy; + const NLAPolicySet *policy_set; + void *container; + size_t size; + int r; + + assert_return(m, -EINVAL); + assert_return(m->n_containers < (NETLINK_CONTAINER_DEPTH - 1), -EINVAL); + + policy = policy_set_get_policy( + m->containers[m->n_containers].policy_set, + attr_type); + if (!policy) + return -EOPNOTSUPP; + + switch (policy_get_type(policy)) { + case NETLINK_TYPE_NESTED: + policy_set = policy_set_get_policy_set( + m->containers[m->n_containers].policy_set, + attr_type); + break; + + case NETLINK_TYPE_NESTED_UNION_BY_STRING: { + const NLAPolicySetUnion *policy_set_union; + const char *key; + + policy_set_union = policy_get_policy_set_union(policy); + if (!policy_set_union) + return -EOPNOTSUPP; + + r = sd_netlink_message_read_string( + m, + policy_set_union_get_match_attribute(policy_set_union), + &key); + if (r < 0) + return r; + + policy_set = policy_set_union_get_policy_set_by_string( + policy_set_union, + key); + break; + } + case NETLINK_TYPE_NESTED_UNION_BY_FAMILY: { + const NLAPolicySetUnion *policy_set_union; + int family; + + policy_set_union = policy_get_policy_set_union(policy); + if (!policy_set_union) + return -EOPNOTSUPP; + + r = sd_rtnl_message_get_family(m, &family); + if (r < 0) + return r; + + policy_set = policy_set_union_get_policy_set_by_family( + policy_set_union, + family); + break; + } + default: + assert_not_reached(); + } + if (!policy_set) + return -EOPNOTSUPP; + + r = netlink_message_read_internal(m, attr_type, &container, NULL); + if (r < 0) + return r; + + size = (size_t) r; + m->n_containers++; + + r = netlink_container_parse(m, + &m->containers[m->n_containers], + container, + size); + if (r < 0) { + m->n_containers--; + return r; + } + + m->containers[m->n_containers].policy_set = policy_set; + + return 0; +} + +int sd_netlink_message_enter_array(sd_netlink_message *m, uint16_t attr_type) { + void *container; + size_t size; + int r; + + assert_return(m, -EINVAL); + assert_return(m->n_containers < (NETLINK_CONTAINER_DEPTH - 1), -EINVAL); + + r = netlink_message_read_internal(m, attr_type, &container, NULL); + if (r < 0) + return r; + + size = (size_t) r; + m->n_containers++; + + r = netlink_container_parse(m, + &m->containers[m->n_containers], + container, + size); + if (r < 0) { + m->n_containers--; + return r; + } + + m->containers[m->n_containers].policy_set = m->containers[m->n_containers - 1].policy_set; + + return 0; +} + +int sd_netlink_message_exit_container(sd_netlink_message *m) { + assert_return(m, -EINVAL); + assert_return(m->sealed, -EINVAL); + assert_return(m->n_containers > 0, -EINVAL); + + m->containers[m->n_containers].attributes = mfree(m->containers[m->n_containers].attributes); + m->containers[m->n_containers].max_attribute = 0; + m->containers[m->n_containers].policy_set = NULL; + + m->n_containers--; + + return 0; +} + +int sd_netlink_message_get_max_attribute(sd_netlink_message *m, uint16_t *ret) { + assert_return(m, -EINVAL); + assert_return(m->sealed, -EINVAL); + assert_return(ret, -EINVAL); + + *ret = m->containers[m->n_containers].max_attribute; + return 0; +} + +int sd_netlink_message_is_error(sd_netlink_message *m) { + assert_return(m, 0); + assert_return(m->hdr, 0); + + return m->hdr->nlmsg_type == NLMSG_ERROR; +} + +int sd_netlink_message_get_errno(sd_netlink_message *m) { + struct nlmsgerr *err; + + assert_return(m, -EINVAL); + assert_return(m->hdr, -EINVAL); + + if (!sd_netlink_message_is_error(m)) + return 0; + + err = NLMSG_DATA(m->hdr); + + return err->error; +} + +static int netlink_message_parse_error(sd_netlink_message *m) { + struct nlmsgerr *err = NLMSG_DATA(m->hdr); + size_t hlen = sizeof(struct nlmsgerr); + + /* no TLVs, nothing to do here */ + if (!(m->hdr->nlmsg_flags & NLM_F_ACK_TLVS)) + return 0; + + /* if NLM_F_CAPPED is set then the inner err msg was capped */ + if (!(m->hdr->nlmsg_flags & NLM_F_CAPPED)) + hlen += err->msg.nlmsg_len - sizeof(struct nlmsghdr); + + if (m->hdr->nlmsg_len <= NLMSG_SPACE(hlen)) + return 0; + + return netlink_container_parse(m, + &m->containers[m->n_containers], + (struct rtattr*)((uint8_t*) NLMSG_DATA(m->hdr) + hlen), + NLMSG_PAYLOAD(m->hdr, hlen)); +} + +int sd_netlink_message_rewind(sd_netlink_message *m, sd_netlink *nl) { + size_t size; + int r; + + assert_return(m, -EINVAL); + assert_return(nl, -EINVAL); + + /* don't allow appending to message once parsed */ + message_seal(m); + + for (unsigned i = 1; i <= m->n_containers; i++) + m->containers[i].attributes = mfree(m->containers[i].attributes); + + m->n_containers = 0; + + if (m->containers[0].attributes) + /* top-level attributes have already been parsed */ + return 0; + + assert(m->hdr); + + r = netlink_get_policy_set_and_header_size(nl, m->hdr->nlmsg_type, + &m->containers[0].policy_set, &size); + if (r < 0) + return r; + + if (sd_netlink_message_is_error(m)) + return netlink_message_parse_error(m); + + return netlink_container_parse(m, + &m->containers[0], + (struct rtattr*)((uint8_t*) NLMSG_DATA(m->hdr) + NLMSG_ALIGN(size)), + NLMSG_PAYLOAD(m->hdr, size)); +} + +void message_seal(sd_netlink_message *m) { + assert(m); + + m->sealed = true; +} + +sd_netlink_message *sd_netlink_message_next(sd_netlink_message *m) { + assert_return(m, NULL); + + return m->next; +} diff --git a/src/libsystemd/sd-netlink/netlink-slot.c b/src/libsystemd/sd-netlink/netlink-slot.c new file mode 100644 index 0000000..d85d2cd --- /dev/null +++ b/src/libsystemd/sd-netlink/netlink-slot.c @@ -0,0 +1,188 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <errno.h> + +#include "sd-netlink.h" + +#include "alloc-util.h" +#include "netlink-internal.h" +#include "netlink-slot.h" +#include "string-util.h" + +int netlink_slot_allocate( + sd_netlink *nl, + bool floating, + NetlinkSlotType type, + size_t extra, + void *userdata, + const char *description, + sd_netlink_slot **ret) { + + _cleanup_free_ sd_netlink_slot *slot = NULL; + + assert(nl); + assert(ret); + + slot = malloc0(offsetof(sd_netlink_slot, reply_callback) + extra); + if (!slot) + return -ENOMEM; + + slot->n_ref = 1; + slot->netlink = nl; + slot->userdata = userdata; + slot->type = type; + slot->floating = floating; + + if (description) { + slot->description = strdup(description); + if (!slot->description) + return -ENOMEM; + } + + if (!floating) + sd_netlink_ref(nl); + + LIST_PREPEND(slots, nl->slots, slot); + + *ret = TAKE_PTR(slot); + + return 0; +} + +void netlink_slot_disconnect(sd_netlink_slot *slot, bool unref) { + sd_netlink *nl; + + assert(slot); + + nl = slot->netlink; + if (!nl) + return; + + switch (slot->type) { + + case NETLINK_REPLY_CALLBACK: + (void) hashmap_remove(nl->reply_callbacks, &slot->reply_callback.serial); + + if (slot->reply_callback.timeout != USEC_INFINITY) + prioq_remove(nl->reply_callbacks_prioq, &slot->reply_callback, &slot->reply_callback.prioq_idx); + + break; + case NETLINK_MATCH_CALLBACK: + LIST_REMOVE(match_callbacks, nl->match_callbacks, &slot->match_callback); + + for (size_t i = 0; i < slot->match_callback.n_groups; i++) + (void) socket_broadcast_group_unref(nl, slot->match_callback.groups[i]); + + slot->match_callback.n_groups = 0; + slot->match_callback.groups = mfree(slot->match_callback.groups); + + break; + default: + assert_not_reached(); + } + + slot->type = _NETLINK_SLOT_INVALID; + slot->netlink = NULL; + LIST_REMOVE(slots, nl->slots, slot); + + if (!slot->floating) + sd_netlink_unref(nl); + else if (unref) + sd_netlink_slot_unref(slot); +} + +static sd_netlink_slot* netlink_slot_free(sd_netlink_slot *slot) { + assert(slot); + + netlink_slot_disconnect(slot, false); + + if (slot->destroy_callback) + slot->destroy_callback(slot->userdata); + + free(slot->description); + return mfree(slot); +} + +DEFINE_TRIVIAL_REF_UNREF_FUNC(sd_netlink_slot, sd_netlink_slot, netlink_slot_free); + +sd_netlink *sd_netlink_slot_get_netlink(sd_netlink_slot *slot) { + assert_return(slot, NULL); + + return slot->netlink; +} + +void *sd_netlink_slot_get_userdata(sd_netlink_slot *slot) { + assert_return(slot, NULL); + + return slot->userdata; +} + +void *sd_netlink_slot_set_userdata(sd_netlink_slot *slot, void *userdata) { + void *ret; + + assert_return(slot, NULL); + + ret = slot->userdata; + slot->userdata = userdata; + + return ret; +} + +int sd_netlink_slot_get_destroy_callback(sd_netlink_slot *slot, sd_netlink_destroy_t *callback) { + assert_return(slot, -EINVAL); + + if (callback) + *callback = slot->destroy_callback; + + return !!slot->destroy_callback; +} + +int sd_netlink_slot_set_destroy_callback(sd_netlink_slot *slot, sd_netlink_destroy_t callback) { + assert_return(slot, -EINVAL); + + slot->destroy_callback = callback; + return 0; +} + +int sd_netlink_slot_get_floating(sd_netlink_slot *slot) { + assert_return(slot, -EINVAL); + + return slot->floating; +} + +int sd_netlink_slot_set_floating(sd_netlink_slot *slot, int b) { + assert_return(slot, -EINVAL); + + if (slot->floating == !!b) + return 0; + + if (!slot->netlink) /* Already disconnected */ + return -ESTALE; + + slot->floating = b; + + if (b) { + sd_netlink_slot_ref(slot); + sd_netlink_unref(slot->netlink); + } else { + sd_netlink_ref(slot->netlink); + sd_netlink_slot_unref(slot); + } + + return 1; +} + +int sd_netlink_slot_get_description(sd_netlink_slot *slot, const char **description) { + assert_return(slot, -EINVAL); + + if (description) + *description = slot->description; + + return !!slot->description; +} + +int sd_netlink_slot_set_description(sd_netlink_slot *slot, const char *description) { + assert_return(slot, -EINVAL); + + return free_and_strdup(&slot->description, description); +} diff --git a/src/libsystemd/sd-netlink/netlink-slot.h b/src/libsystemd/sd-netlink/netlink-slot.h new file mode 100644 index 0000000..79de817 --- /dev/null +++ b/src/libsystemd/sd-netlink/netlink-slot.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include "sd-netlink.h" + +int netlink_slot_allocate( + sd_netlink *nl, + bool floating, + NetlinkSlotType type, + size_t extra, + void *userdata, + const char *description, + sd_netlink_slot **ret); +void netlink_slot_disconnect(sd_netlink_slot *slot, bool unref); diff --git a/src/libsystemd/sd-netlink/netlink-socket.c b/src/libsystemd/sd-netlink/netlink-socket.c new file mode 100644 index 0000000..64cde89 --- /dev/null +++ b/src/libsystemd/sd-netlink/netlink-socket.c @@ -0,0 +1,459 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <malloc.h> +#include <netinet/in.h> +#include <stdbool.h> +#include <unistd.h> + +#include "sd-netlink.h" + +#include "alloc-util.h" +#include "fd-util.h" +#include "format-util.h" +#include "iovec-util.h" +#include "netlink-internal.h" +#include "netlink-types.h" +#include "socket-util.h" + +static int broadcast_groups_get(sd_netlink *nl) { + _cleanup_free_ uint32_t *groups = NULL; + socklen_t len = 0, old_len; + int r; + + assert(nl); + assert(nl->fd >= 0); + + if (getsockopt(nl->fd, SOL_NETLINK, NETLINK_LIST_MEMBERSHIPS, NULL, &len) < 0) { + if (errno != ENOPROTOOPT) + return -errno; + + nl->broadcast_group_dont_leave = true; + return 0; + } + + if (len == 0) + return 0; + + groups = new0(uint32_t, len); + if (!groups) + return -ENOMEM; + + old_len = len; + + if (getsockopt(nl->fd, SOL_NETLINK, NETLINK_LIST_MEMBERSHIPS, groups, &len) < 0) + return -errno; + + if (old_len != len) + return -EIO; + + for (unsigned i = 0; i < len; i++) + for (unsigned j = 0; j < sizeof(uint32_t) * 8; j++) + if (groups[i] & (1U << j)) { + unsigned group = i * sizeof(uint32_t) * 8 + j + 1; + + r = hashmap_ensure_put(&nl->broadcast_group_refs, NULL, UINT_TO_PTR(group), UINT_TO_PTR(1)); + if (r < 0) + return r; + } + + return 0; +} + +int socket_bind(sd_netlink *nl) { + socklen_t addrlen; + int r; + + r = setsockopt_int(nl->fd, SOL_NETLINK, NETLINK_PKTINFO, true); + if (r < 0) + return r; + + addrlen = sizeof(nl->sockaddr); + + /* ignore EINVAL to allow binding an already bound socket */ + if (bind(nl->fd, &nl->sockaddr.sa, addrlen) < 0 && errno != EINVAL) + return -errno; + + if (getsockname(nl->fd, &nl->sockaddr.sa, &addrlen) < 0) + return -errno; + + return broadcast_groups_get(nl); +} + +static unsigned broadcast_group_get_ref(sd_netlink *nl, unsigned group) { + assert(nl); + + return PTR_TO_UINT(hashmap_get(nl->broadcast_group_refs, UINT_TO_PTR(group))); +} + +static int broadcast_group_set_ref(sd_netlink *nl, unsigned group, unsigned n_ref) { + int r; + + assert(nl); + + r = hashmap_ensure_allocated(&nl->broadcast_group_refs, NULL); + if (r < 0) + return r; + + return hashmap_replace(nl->broadcast_group_refs, UINT_TO_PTR(group), UINT_TO_PTR(n_ref)); +} + +static int broadcast_group_join(sd_netlink *nl, unsigned group) { + assert(nl); + assert(nl->fd >= 0); + assert(group > 0); + + /* group is "unsigned", but netlink(7) says the argument for NETLINK_ADD_MEMBERSHIP is "int" */ + return setsockopt_int(nl->fd, SOL_NETLINK, NETLINK_ADD_MEMBERSHIP, group); +} + +int socket_broadcast_group_ref(sd_netlink *nl, unsigned group) { + unsigned n_ref; + int r; + + assert(nl); + + n_ref = broadcast_group_get_ref(nl, group); + + n_ref++; + + r = broadcast_group_set_ref(nl, group, n_ref); + if (r < 0) + return r; + + if (n_ref > 1) + /* already in the group */ + return 0; + + return broadcast_group_join(nl, group); +} + +static int broadcast_group_leave(sd_netlink *nl, unsigned group) { + assert(nl); + assert(nl->fd >= 0); + assert(group > 0); + + if (nl->broadcast_group_dont_leave) + return 0; + + /* group is "unsigned", but netlink(7) says the argument for NETLINK_DROP_MEMBERSHIP is "int" */ + return setsockopt_int(nl->fd, SOL_NETLINK, NETLINK_DROP_MEMBERSHIP, group); +} + +int socket_broadcast_group_unref(sd_netlink *nl, unsigned group) { + unsigned n_ref; + int r; + + assert(nl); + + n_ref = broadcast_group_get_ref(nl, group); + if (n_ref == 0) + return 0; + + n_ref--; + + r = broadcast_group_set_ref(nl, group, n_ref); + if (r < 0) + return r; + + if (n_ref > 0) + /* still refs left */ + return 0; + + return broadcast_group_leave(nl, group); +} + +/* returns the number of bytes sent, or a negative error code */ +int socket_write_message(sd_netlink *nl, sd_netlink_message *m) { + union sockaddr_union addr = { + .nl.nl_family = AF_NETLINK, + }; + ssize_t k; + + assert(nl); + assert(m); + assert(m->hdr); + + k = sendto(nl->fd, m->hdr, m->hdr->nlmsg_len, 0, &addr.sa, sizeof(addr)); + if (k < 0) + return -errno; + + return k; +} + +static int socket_recv_message(int fd, void *buf, size_t buf_size, uint32_t *ret_mcast_group, bool peek) { + struct iovec iov = IOVEC_MAKE(buf, buf_size); + union sockaddr_union sender; + CMSG_BUFFER_TYPE(CMSG_SPACE(sizeof(struct nl_pktinfo))) control; + struct msghdr msg = { + .msg_iov = &iov, + .msg_iovlen = 1, + .msg_name = &sender, + .msg_namelen = sizeof(sender), + .msg_control = &control, + .msg_controllen = sizeof(control), + }; + ssize_t n; + + assert(fd >= 0); + assert(peek || (buf && buf_size > 0)); + + n = recvmsg_safe(fd, &msg, MSG_TRUNC | (peek ? MSG_PEEK : 0)); + if (n == -ENOBUFS) + return log_debug_errno(n, "sd-netlink: kernel receive buffer overrun"); + else if (ERRNO_IS_NEG_TRANSIENT(n)) { + if (ret_mcast_group) + *ret_mcast_group = 0; + return 0; + } else if (n < 0) + return (int) n; + + if (sender.nl.nl_pid != 0) { + /* not from the kernel, ignore */ + log_debug("sd-netlink: ignoring message from PID %"PRIu32, sender.nl.nl_pid); + + if (peek) { + /* drop the message */ + n = recvmsg_safe(fd, &msg, 0); + if (n < 0) + return (int) n; + } + + if (ret_mcast_group) + *ret_mcast_group = 0; + return 0; + } + + if (!peek && (size_t) n > buf_size) /* message did not fit in read buffer */ + return -EIO; + + if (ret_mcast_group) { + struct nl_pktinfo *pi; + + pi = CMSG_FIND_DATA(&msg, SOL_NETLINK, NETLINK_PKTINFO, struct nl_pktinfo); + if (pi) + *ret_mcast_group = pi->group; + else + *ret_mcast_group = 0; + } + + return (int) n; +} + +DEFINE_PRIVATE_HASH_OPS_WITH_VALUE_DESTRUCTOR( + netlink_message_hash_ops, + void, trivial_hash_func, trivial_compare_func, + sd_netlink_message, sd_netlink_message_unref); + +static int netlink_queue_received_message(sd_netlink *nl, sd_netlink_message *m) { + uint32_t serial; + int r; + + assert(nl); + assert(m); + + if (ordered_set_size(nl->rqueue) >= NETLINK_RQUEUE_MAX) + return log_debug_errno(SYNTHETIC_ERRNO(ENOBUFS), + "sd-netlink: exhausted the read queue size (%d)", NETLINK_RQUEUE_MAX); + + r = ordered_set_ensure_put(&nl->rqueue, &netlink_message_hash_ops, m); + if (r < 0) + return r; + + sd_netlink_message_ref(m); + + if (sd_netlink_message_is_broadcast(m)) + return 0; + + serial = message_get_serial(m); + if (serial == 0) + return 0; + + if (sd_netlink_message_get_errno(m) < 0) { + _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *old = NULL; + + old = hashmap_remove(nl->rqueue_by_serial, UINT32_TO_PTR(serial)); + if (old) + log_debug("sd-netlink: received error message with serial %"PRIu32", but another message with " + "the same serial is already stored in the read queue, replacing.", serial); + } + + r = hashmap_ensure_put(&nl->rqueue_by_serial, &netlink_message_hash_ops, UINT32_TO_PTR(serial), m); + if (r == -EEXIST) { + if (!sd_netlink_message_is_error(m)) + log_debug("sd-netlink: received message with serial %"PRIu32", but another message with " + "the same serial is already stored in the read queue, ignoring.", serial); + return 0; + } + if (r < 0) { + sd_netlink_message_unref(ordered_set_remove(nl->rqueue, m)); + return r; + } + + sd_netlink_message_ref(m); + return 0; +} + +static int netlink_queue_partially_received_message(sd_netlink *nl, sd_netlink_message *m) { + uint32_t serial; + int r; + + assert(nl); + assert(m); + assert(m->hdr->nlmsg_flags & NLM_F_MULTI); + + if (hashmap_size(nl->rqueue_partial_by_serial) >= NETLINK_RQUEUE_MAX) + return log_debug_errno(SYNTHETIC_ERRNO(ENOBUFS), + "sd-netlink: exhausted the partial read queue size (%d)", NETLINK_RQUEUE_MAX); + + serial = message_get_serial(m); + r = hashmap_ensure_put(&nl->rqueue_partial_by_serial, &netlink_message_hash_ops, UINT32_TO_PTR(serial), m); + if (r < 0) + return r; + + sd_netlink_message_ref(m); + return 0; +} + +static int parse_message_one(sd_netlink *nl, uint32_t group, const struct nlmsghdr *hdr, sd_netlink_message **ret) { + _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL; + size_t size; + int r; + + assert(nl); + assert(hdr); + assert(ret); + + /* not broadcast and not for us */ + if (group == 0 && hdr->nlmsg_pid != nl->sockaddr.nl.nl_pid) + goto finalize; + + /* silently drop noop messages */ + if (hdr->nlmsg_type == NLMSG_NOOP) + goto finalize; + + /* check that we support this message type */ + r = netlink_get_policy_set_and_header_size(nl, hdr->nlmsg_type, NULL, &size); + if (r == -EOPNOTSUPP) { + log_debug("sd-netlink: ignored message with unknown type: %i", hdr->nlmsg_type); + goto finalize; + } + if (r < 0) + return r; + + /* check that the size matches the message type */ + if (hdr->nlmsg_len < NLMSG_LENGTH(size)) { + log_debug("sd-netlink: message is shorter than expected, dropping."); + goto finalize; + } + + r = message_new_empty(nl, &m); + if (r < 0) + return r; + + m->multicast_group = group; + m->hdr = memdup(hdr, hdr->nlmsg_len); + if (!m->hdr) + return -ENOMEM; + + /* seal and parse the top-level message */ + r = sd_netlink_message_rewind(m, nl); + if (r < 0) + return r; + + *ret = TAKE_PTR(m); + return 1; + +finalize: + *ret = NULL; + return 0; +} + +/* On success, the number of bytes received is returned and *ret points to the received message + * which has a valid header and the correct size. + * If nothing useful was received 0 is returned. + * On failure, a negative error code is returned. + */ +int socket_read_message(sd_netlink *nl) { + bool done = false; + uint32_t group; + size_t len; + int r; + + assert(nl); + + /* read nothing, just get the pending message size */ + r = socket_recv_message(nl->fd, NULL, 0, NULL, true); + if (r <= 0) + return r; + len = (size_t) r; + + /* make room for the pending message */ + if (!greedy_realloc((void**) &nl->rbuffer, len, sizeof(uint8_t))) + return -ENOMEM; + + /* read the pending message */ + r = socket_recv_message(nl->fd, nl->rbuffer, MALLOC_SIZEOF_SAFE(nl->rbuffer), &group, false); + if (r <= 0) + return r; + len = (size_t) r; + + if (!NLMSG_OK(nl->rbuffer, len)) { + log_debug("sd-netlink: received invalid message, discarding %zu bytes of incoming message", len); + return 0; + } + + for (struct nlmsghdr *hdr = nl->rbuffer; NLMSG_OK(hdr, len); hdr = NLMSG_NEXT(hdr, len)) { + _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL; + + r = parse_message_one(nl, group, hdr, &m); + if (r < 0) + return r; + if (r == 0) + continue; + + if (hdr->nlmsg_flags & NLM_F_MULTI) { + if (hdr->nlmsg_type == NLMSG_DONE) { + _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *existing = NULL; + + /* finished reading multi-part message */ + existing = hashmap_remove(nl->rqueue_partial_by_serial, UINT32_TO_PTR(hdr->nlmsg_seq)); + + /* if we receive only NLMSG_DONE, put it into the receive queue. */ + r = netlink_queue_received_message(nl, existing ?: m); + if (r < 0) + return r; + + done = true; + } else { + sd_netlink_message *existing; + + existing = hashmap_get(nl->rqueue_partial_by_serial, UINT32_TO_PTR(hdr->nlmsg_seq)); + if (existing) { + /* This is the continuation of the previously read messages. + * Let's append this message at the end. */ + while (existing->next) + existing = existing->next; + existing->next = TAKE_PTR(m); + } else { + /* This is the first message. Put it into the queue for partially + * received messages. */ + r = netlink_queue_partially_received_message(nl, m); + if (r < 0) + return r; + } + } + + } else { + r = netlink_queue_received_message(nl, m); + if (r < 0) + return r; + + done = true; + } + } + + if (len > 0) + log_debug("sd-netlink: discarding trailing %zu bytes of incoming message", len); + + return done; +} diff --git a/src/libsystemd/sd-netlink/netlink-types-genl.c b/src/libsystemd/sd-netlink/netlink-types-genl.c new file mode 100644 index 0000000..6fe9adc --- /dev/null +++ b/src/libsystemd/sd-netlink/netlink-types-genl.c @@ -0,0 +1,251 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <netinet/in.h> +#include <sys/socket.h> +#include <linux/batman_adv.h> +#include <linux/fou.h> +#include <linux/genetlink.h> +#include <linux/if.h> +#include <linux/if_macsec.h> +#include <linux/l2tp.h> +#include <linux/nl80211.h> +#include <linux/wireguard.h> + +#include "missing_network.h" +#include "netlink-genl.h" +#include "netlink-types-internal.h" + +/***************** genl ctrl type systems *****************/ +static const NLAPolicy genl_ctrl_mcast_group_policies[] = { + [CTRL_ATTR_MCAST_GRP_NAME] = BUILD_POLICY(STRING), + [CTRL_ATTR_MCAST_GRP_ID] = BUILD_POLICY(U32), +}; + +DEFINE_POLICY_SET(genl_ctrl_mcast_group); + +static const NLAPolicy genl_ctrl_ops_policies[] = { + [CTRL_ATTR_OP_ID] = BUILD_POLICY(U32), + [CTRL_ATTR_OP_FLAGS] = BUILD_POLICY(U32), +}; + +DEFINE_POLICY_SET(genl_ctrl_ops); + +static const NLAPolicy genl_ctrl_policies[] = { + [CTRL_ATTR_FAMILY_ID] = BUILD_POLICY(U16), + [CTRL_ATTR_FAMILY_NAME] = BUILD_POLICY(STRING), + [CTRL_ATTR_VERSION] = BUILD_POLICY(U32), + [CTRL_ATTR_HDRSIZE] = BUILD_POLICY(U32), + [CTRL_ATTR_MAXATTR] = BUILD_POLICY(U32), + [CTRL_ATTR_OPS] = BUILD_POLICY_NESTED(genl_ctrl_ops), + [CTRL_ATTR_MCAST_GROUPS] = BUILD_POLICY_NESTED(genl_ctrl_mcast_group), + /* + [CTRL_ATTR_POLICY] = { .type = NETLINK_TYPE_NESTED, }, + [CTRL_ATTR_OP_POLICY] = { .type = NETLINK_TYPE_NESTED, } + */ + [CTRL_ATTR_OP] = BUILD_POLICY(U32), +}; + +/***************** genl batadv type systems *****************/ +static const NLAPolicy genl_batadv_policies[] = { + [BATADV_ATTR_VERSION] = BUILD_POLICY(STRING), + [BATADV_ATTR_ALGO_NAME] = BUILD_POLICY(STRING), + [BATADV_ATTR_MESH_IFINDEX] = BUILD_POLICY(U32), + [BATADV_ATTR_MESH_IFNAME] = BUILD_POLICY_WITH_SIZE(STRING, IFNAMSIZ), + [BATADV_ATTR_MESH_ADDRESS] = BUILD_POLICY_WITH_SIZE(ETHER_ADDR, ETH_ALEN), + [BATADV_ATTR_HARD_IFINDEX] = BUILD_POLICY(U32), + [BATADV_ATTR_HARD_IFNAME] = BUILD_POLICY_WITH_SIZE(STRING, IFNAMSIZ), + [BATADV_ATTR_HARD_ADDRESS] = BUILD_POLICY_WITH_SIZE(ETHER_ADDR, ETH_ALEN), + [BATADV_ATTR_ORIG_ADDRESS] = BUILD_POLICY_WITH_SIZE(ETHER_ADDR, ETH_ALEN), + [BATADV_ATTR_TPMETER_RESULT] = BUILD_POLICY(U8), + [BATADV_ATTR_TPMETER_TEST_TIME] = BUILD_POLICY(U32), + [BATADV_ATTR_TPMETER_BYTES] = BUILD_POLICY(U64), + [BATADV_ATTR_TPMETER_COOKIE] = BUILD_POLICY(U32), + [BATADV_ATTR_PAD] = BUILD_POLICY(UNSPEC), + [BATADV_ATTR_ACTIVE] = BUILD_POLICY(FLAG), + [BATADV_ATTR_TT_ADDRESS] = BUILD_POLICY_WITH_SIZE(ETHER_ADDR, ETH_ALEN), + [BATADV_ATTR_TT_TTVN] = BUILD_POLICY(U8), + [BATADV_ATTR_TT_LAST_TTVN] = BUILD_POLICY(U8), + [BATADV_ATTR_TT_CRC32] = BUILD_POLICY(U32), + [BATADV_ATTR_TT_VID] = BUILD_POLICY(U16), + [BATADV_ATTR_TT_FLAGS] = BUILD_POLICY(U32), + [BATADV_ATTR_FLAG_BEST] = BUILD_POLICY(FLAG), + [BATADV_ATTR_LAST_SEEN_MSECS] = BUILD_POLICY(U32), + [BATADV_ATTR_NEIGH_ADDRESS] = BUILD_POLICY_WITH_SIZE(ETHER_ADDR, ETH_ALEN), + [BATADV_ATTR_TQ] = BUILD_POLICY(U8), + [BATADV_ATTR_THROUGHPUT] = BUILD_POLICY(U32), + [BATADV_ATTR_BANDWIDTH_UP] = BUILD_POLICY(U32), + [BATADV_ATTR_BANDWIDTH_DOWN] = BUILD_POLICY(U32), + [BATADV_ATTR_ROUTER] = BUILD_POLICY_WITH_SIZE(ETHER_ADDR, ETH_ALEN), + [BATADV_ATTR_BLA_OWN] = BUILD_POLICY(FLAG), + [BATADV_ATTR_BLA_ADDRESS] = BUILD_POLICY_WITH_SIZE(ETHER_ADDR, ETH_ALEN), + [BATADV_ATTR_BLA_VID] = BUILD_POLICY(U16), + [BATADV_ATTR_BLA_BACKBONE] = BUILD_POLICY_WITH_SIZE(ETHER_ADDR, ETH_ALEN), + [BATADV_ATTR_BLA_CRC] = BUILD_POLICY(U16), + [BATADV_ATTR_DAT_CACHE_IP4ADDRESS] = BUILD_POLICY(U32), + [BATADV_ATTR_DAT_CACHE_HWADDRESS] = BUILD_POLICY_WITH_SIZE(ETHER_ADDR, ETH_ALEN), + [BATADV_ATTR_DAT_CACHE_VID] = BUILD_POLICY(U16), + [BATADV_ATTR_MCAST_FLAGS] = BUILD_POLICY(U32), + [BATADV_ATTR_MCAST_FLAGS_PRIV] = BUILD_POLICY(U32), + [BATADV_ATTR_VLANID] = BUILD_POLICY(U16), + [BATADV_ATTR_AGGREGATED_OGMS_ENABLED] = BUILD_POLICY(U8), + [BATADV_ATTR_AP_ISOLATION_ENABLED] = BUILD_POLICY(U8), + [BATADV_ATTR_ISOLATION_MARK] = BUILD_POLICY(U32), + [BATADV_ATTR_ISOLATION_MASK] = BUILD_POLICY(U32), + [BATADV_ATTR_BONDING_ENABLED] = BUILD_POLICY(U8), + [BATADV_ATTR_BRIDGE_LOOP_AVOIDANCE_ENABLED] = BUILD_POLICY(U8), + [BATADV_ATTR_DISTRIBUTED_ARP_TABLE_ENABLED] = BUILD_POLICY(U8), + [BATADV_ATTR_FRAGMENTATION_ENABLED] = BUILD_POLICY(U8), + [BATADV_ATTR_GW_BANDWIDTH_DOWN] = BUILD_POLICY(U32), + [BATADV_ATTR_GW_BANDWIDTH_UP] = BUILD_POLICY(U32), + [BATADV_ATTR_GW_MODE] = BUILD_POLICY(U8), + [BATADV_ATTR_GW_SEL_CLASS] = BUILD_POLICY(U32), + [BATADV_ATTR_HOP_PENALTY] = BUILD_POLICY(U8), + [BATADV_ATTR_LOG_LEVEL] = BUILD_POLICY(U32), + [BATADV_ATTR_MULTICAST_FORCEFLOOD_ENABLED] = BUILD_POLICY(U8), + [BATADV_ATTR_MULTICAST_FANOUT] = BUILD_POLICY(U32), + [BATADV_ATTR_NETWORK_CODING_ENABLED] = BUILD_POLICY(U8), + [BATADV_ATTR_ORIG_INTERVAL] = BUILD_POLICY(U32), + [BATADV_ATTR_ELP_INTERVAL] = BUILD_POLICY(U32), + [BATADV_ATTR_THROUGHPUT_OVERRIDE] = BUILD_POLICY(U32), +}; + +/***************** genl fou type systems *****************/ +static const NLAPolicy genl_fou_policies[] = { + [FOU_ATTR_PORT] = BUILD_POLICY(U16), + [FOU_ATTR_AF] = BUILD_POLICY(U8), + [FOU_ATTR_IPPROTO] = BUILD_POLICY(U8), + [FOU_ATTR_TYPE] = BUILD_POLICY(U8), + [FOU_ATTR_REMCSUM_NOPARTIAL] = BUILD_POLICY(FLAG), + [FOU_ATTR_LOCAL_V4] = BUILD_POLICY_WITH_SIZE(IN_ADDR, sizeof(struct in_addr)), + [FOU_ATTR_PEER_V4] = BUILD_POLICY_WITH_SIZE(IN_ADDR, sizeof(struct in_addr)), + [FOU_ATTR_LOCAL_V6] = BUILD_POLICY_WITH_SIZE(IN_ADDR, sizeof(struct in6_addr)), + [FOU_ATTR_PEER_V6] = BUILD_POLICY_WITH_SIZE(IN_ADDR, sizeof(struct in6_addr)), + [FOU_ATTR_PEER_PORT] = BUILD_POLICY(U16), + [FOU_ATTR_IFINDEX] = BUILD_POLICY(U32), +}; + +/***************** genl l2tp type systems *****************/ +static const NLAPolicy genl_l2tp_policies[] = { + [L2TP_ATTR_PW_TYPE] = BUILD_POLICY(U16), + [L2TP_ATTR_ENCAP_TYPE] = BUILD_POLICY(U16), + [L2TP_ATTR_OFFSET] = BUILD_POLICY(U16), + [L2TP_ATTR_DATA_SEQ] = BUILD_POLICY(U16), + [L2TP_ATTR_L2SPEC_TYPE] = BUILD_POLICY(U8), + [L2TP_ATTR_L2SPEC_LEN] = BUILD_POLICY(U8), + [L2TP_ATTR_PROTO_VERSION] = BUILD_POLICY(U8), + [L2TP_ATTR_IFNAME] = BUILD_POLICY(STRING), + [L2TP_ATTR_CONN_ID] = BUILD_POLICY(U32), + [L2TP_ATTR_PEER_CONN_ID] = BUILD_POLICY(U32), + [L2TP_ATTR_SESSION_ID] = BUILD_POLICY(U32), + [L2TP_ATTR_PEER_SESSION_ID] = BUILD_POLICY(U32), + [L2TP_ATTR_UDP_CSUM] = BUILD_POLICY(U8), + [L2TP_ATTR_VLAN_ID] = BUILD_POLICY(U16), + [L2TP_ATTR_RECV_SEQ] = BUILD_POLICY(U8), + [L2TP_ATTR_SEND_SEQ] = BUILD_POLICY(U8), + [L2TP_ATTR_LNS_MODE] = BUILD_POLICY(U8), + [L2TP_ATTR_USING_IPSEC] = BUILD_POLICY(U8), + [L2TP_ATTR_FD] = BUILD_POLICY(U32), + [L2TP_ATTR_IP_SADDR] = BUILD_POLICY_WITH_SIZE(IN_ADDR, sizeof(struct in_addr)), + [L2TP_ATTR_IP_DADDR] = BUILD_POLICY_WITH_SIZE(IN_ADDR, sizeof(struct in_addr)), + [L2TP_ATTR_UDP_SPORT] = BUILD_POLICY(U16), + [L2TP_ATTR_UDP_DPORT] = BUILD_POLICY(U16), + [L2TP_ATTR_IP6_SADDR] = BUILD_POLICY_WITH_SIZE(IN_ADDR, sizeof(struct in6_addr)), + [L2TP_ATTR_IP6_DADDR] = BUILD_POLICY_WITH_SIZE(IN_ADDR, sizeof(struct in6_addr)), + [L2TP_ATTR_UDP_ZERO_CSUM6_TX] = BUILD_POLICY(FLAG), + [L2TP_ATTR_UDP_ZERO_CSUM6_RX] = BUILD_POLICY(FLAG), +}; + +/***************** genl macsec type systems *****************/ +static const NLAPolicy genl_macsec_rxsc_policies[] = { + [MACSEC_RXSC_ATTR_SCI] = BUILD_POLICY(U64), +}; + +DEFINE_POLICY_SET(genl_macsec_rxsc); + +static const NLAPolicy genl_macsec_sa_policies[] = { + [MACSEC_SA_ATTR_AN] = BUILD_POLICY(U8), + [MACSEC_SA_ATTR_ACTIVE] = BUILD_POLICY(U8), + [MACSEC_SA_ATTR_PN] = BUILD_POLICY(U32), + [MACSEC_SA_ATTR_KEYID] = BUILD_POLICY_WITH_SIZE(BINARY, MACSEC_KEYID_LEN), + [MACSEC_SA_ATTR_KEY] = BUILD_POLICY_WITH_SIZE(BINARY, MACSEC_MAX_KEY_LEN), +}; + +DEFINE_POLICY_SET(genl_macsec_sa); + +static const NLAPolicy genl_macsec_policies[] = { + [MACSEC_ATTR_IFINDEX] = BUILD_POLICY(U32), + [MACSEC_ATTR_RXSC_CONFIG] = BUILD_POLICY_NESTED(genl_macsec_rxsc), + [MACSEC_ATTR_SA_CONFIG] = BUILD_POLICY_NESTED(genl_macsec_sa), +}; + +/***************** genl NetLabel type systems *****************/ +static const NLAPolicy genl_netlabel_policies[] = { + [NLBL_UNLABEL_A_IPV4ADDR] = BUILD_POLICY(IN_ADDR), + [NLBL_UNLABEL_A_IPV4MASK] = BUILD_POLICY(IN_ADDR), + [NLBL_UNLABEL_A_IPV6ADDR] = BUILD_POLICY_WITH_SIZE(IN_ADDR, sizeof(struct in6_addr)), + [NLBL_UNLABEL_A_IPV6MASK] = BUILD_POLICY_WITH_SIZE(IN_ADDR, sizeof(struct in6_addr)), + [NLBL_UNLABEL_A_IFACE] = BUILD_POLICY_WITH_SIZE(STRING, IFNAMSIZ-1), + [NLBL_UNLABEL_A_SECCTX] = BUILD_POLICY(STRING), +}; + +/***************** genl nl80211 type systems *****************/ +static const NLAPolicy genl_nl80211_policies[] = { + [NL80211_ATTR_WIPHY] = BUILD_POLICY(U32), + [NL80211_ATTR_WIPHY_NAME] = BUILD_POLICY(STRING), + [NL80211_ATTR_IFINDEX] = BUILD_POLICY(U32), + [NL80211_ATTR_IFNAME] = BUILD_POLICY_WITH_SIZE(STRING, IFNAMSIZ-1), + [NL80211_ATTR_IFTYPE] = BUILD_POLICY(U32), + [NL80211_ATTR_MAC] = BUILD_POLICY_WITH_SIZE(ETHER_ADDR, ETH_ALEN), + [NL80211_ATTR_SSID] = BUILD_POLICY_WITH_SIZE(BINARY, IEEE80211_MAX_SSID_LEN), + [NL80211_ATTR_STATUS_CODE] = BUILD_POLICY(U16), + [NL80211_ATTR_4ADDR] = BUILD_POLICY(U8), +}; + +/***************** genl wireguard type systems *****************/ +static const NLAPolicy genl_wireguard_allowedip_policies[] = { + [WGALLOWEDIP_A_FAMILY] = BUILD_POLICY(U16), + [WGALLOWEDIP_A_IPADDR] = BUILD_POLICY(IN_ADDR), + [WGALLOWEDIP_A_CIDR_MASK] = BUILD_POLICY(U8), +}; + +DEFINE_POLICY_SET(genl_wireguard_allowedip); + +static const NLAPolicy genl_wireguard_peer_policies[] = { + [WGPEER_A_PUBLIC_KEY] = BUILD_POLICY_WITH_SIZE(BINARY, WG_KEY_LEN), + [WGPEER_A_FLAGS] = BUILD_POLICY(U32), + [WGPEER_A_PRESHARED_KEY] = BUILD_POLICY_WITH_SIZE(BINARY, WG_KEY_LEN), + [WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL] = BUILD_POLICY(U16), + [WGPEER_A_ENDPOINT] = BUILD_POLICY(SOCKADDR), + [WGPEER_A_ALLOWEDIPS] = BUILD_POLICY_NESTED(genl_wireguard_allowedip), +}; + +DEFINE_POLICY_SET(genl_wireguard_peer); + +static const NLAPolicy genl_wireguard_policies[] = { + [WGDEVICE_A_IFINDEX] = BUILD_POLICY(U32), + [WGDEVICE_A_IFNAME] = BUILD_POLICY_WITH_SIZE(STRING, IFNAMSIZ-1), + [WGDEVICE_A_FLAGS] = BUILD_POLICY(U32), + [WGDEVICE_A_PRIVATE_KEY] = BUILD_POLICY_WITH_SIZE(BINARY, WG_KEY_LEN), + [WGDEVICE_A_LISTEN_PORT] = BUILD_POLICY(U16), + [WGDEVICE_A_FWMARK] = BUILD_POLICY(U32), + [WGDEVICE_A_PEERS] = BUILD_POLICY_NESTED(genl_wireguard_peer), +}; + +/***************** genl families *****************/ +static const NLAPolicySetUnionElement genl_policy_set_union_elements[] = { + BUILD_UNION_ELEMENT_BY_STRING(CTRL_GENL_NAME, genl_ctrl), + BUILD_UNION_ELEMENT_BY_STRING(BATADV_NL_NAME, genl_batadv), + BUILD_UNION_ELEMENT_BY_STRING(FOU_GENL_NAME, genl_fou), + BUILD_UNION_ELEMENT_BY_STRING(L2TP_GENL_NAME, genl_l2tp), + BUILD_UNION_ELEMENT_BY_STRING(MACSEC_GENL_NAME, genl_macsec), + BUILD_UNION_ELEMENT_BY_STRING(NETLBL_NLTYPE_UNLABELED_NAME, genl_netlabel), + BUILD_UNION_ELEMENT_BY_STRING(NL80211_GENL_NAME, genl_nl80211), + BUILD_UNION_ELEMENT_BY_STRING(WG_GENL_NAME, genl_wireguard), +}; + +/* This is the root type system union, so match_attribute is not necessary. */ +DEFINE_POLICY_SET_UNION(genl, 0); + +const NLAPolicySet *genl_get_policy_set_by_name(const char *name) { + return policy_set_union_get_policy_set_by_string(&genl_policy_set_union, name); +} diff --git a/src/libsystemd/sd-netlink/netlink-types-internal.h b/src/libsystemd/sd-netlink/netlink-types-internal.h new file mode 100644 index 0000000..1412514 --- /dev/null +++ b/src/libsystemd/sd-netlink/netlink-types-internal.h @@ -0,0 +1,66 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include "macro.h" +#include "netlink-types.h" + +/* C.f. see 'struct nla_policy' at include/net/netlink.h. */ +struct NLAPolicy { + NLAType type; + size_t size; + union { + const NLAPolicySet *policy_set; + const NLAPolicySetUnion *policy_set_union; + }; +}; + +struct NLAPolicySet { + uint16_t count; + const NLAPolicy *policies; +}; + +typedef struct NLAPolicySetUnionElement { + union { + int family; /* used by NETLINK_TYPE_NESTED_UNION_BY_FAMILY */ + const char *string; /* used by NETLINK_TYPE_NESTED_UNION_BY_STRING */ + }; + NLAPolicySet policy_set; +} NLAPolicySetUnionElement; + +struct NLAPolicySetUnion { + size_t count; + const NLAPolicySetUnionElement *elements; + uint16_t match_attribute; /* used by NETLINK_TYPE_NESTED_UNION_BY_STRING */ +}; + +#define BUILD_POLICY_WITH_SIZE(t, n) \ + { .type = NETLINK_TYPE_##t, .size = n } +#define BUILD_POLICY(t) \ + BUILD_POLICY_WITH_SIZE(t, 0) +#define BUILD_POLICY_NESTED_WITH_SIZE(name, n) \ + { .type = NETLINK_TYPE_NESTED, .size = n, .policy_set = &name##_policy_set } +#define BUILD_POLICY_NESTED(name) \ + BUILD_POLICY_NESTED_WITH_SIZE(name, 0) +#define _BUILD_POLICY_NESTED_UNION(name, by) \ + { .type = NETLINK_TYPE_NESTED_UNION_BY_##by, .policy_set_union = &name##_policy_set_union } +#define BUILD_POLICY_NESTED_UNION_BY_STRING(name) \ + _BUILD_POLICY_NESTED_UNION(name, STRING) +#define BUILD_POLICY_NESTED_UNION_BY_FAMILY(name) \ + _BUILD_POLICY_NESTED_UNION(name, FAMILY) + +#define _BUILD_POLICY_SET(name) \ + { .count = ELEMENTSOF(name##_policies), .policies = name##_policies } +#define DEFINE_POLICY_SET(name) \ + static const NLAPolicySet name##_policy_set = _BUILD_POLICY_SET(name) + +# define BUILD_UNION_ELEMENT_BY_STRING(s, name) \ + { .string = s, .policy_set = _BUILD_POLICY_SET(name) } +# define BUILD_UNION_ELEMENT_BY_FAMILY(f, name) \ + { .family = f, .policy_set = _BUILD_POLICY_SET(name) } + +#define DEFINE_POLICY_SET_UNION(name, attr) \ + static const NLAPolicySetUnion name##_policy_set_union = { \ + .count = ELEMENTSOF(name##_policy_set_union_elements), \ + .elements = name##_policy_set_union_elements, \ + .match_attribute = attr, \ + } diff --git a/src/libsystemd/sd-netlink/netlink-types-nfnl.c b/src/libsystemd/sd-netlink/netlink-types-nfnl.c new file mode 100644 index 0000000..8ef4d45 --- /dev/null +++ b/src/libsystemd/sd-netlink/netlink-types-nfnl.c @@ -0,0 +1,194 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <linux/if.h> +#include <linux/netfilter/nf_tables.h> +#include <linux/netfilter/nfnetlink.h> + +#include "netlink-types-internal.h" + +static const NLAPolicy nfnl_nft_table_policies[] = { + [NFTA_TABLE_NAME] = BUILD_POLICY_WITH_SIZE(STRING, NFT_TABLE_MAXNAMELEN - 1), + [NFTA_TABLE_FLAGS] = BUILD_POLICY(U32), +}; + +DEFINE_POLICY_SET(nfnl_nft_table); + +static const NLAPolicy nfnl_nft_chain_hook_policies[] = { + [NFTA_HOOK_HOOKNUM] = BUILD_POLICY(U32), + [NFTA_HOOK_PRIORITY] = BUILD_POLICY(U32), + [NFTA_HOOK_DEV] = BUILD_POLICY_WITH_SIZE(STRING, IFNAMSIZ - 1), +}; + +DEFINE_POLICY_SET(nfnl_nft_chain_hook); + +static const NLAPolicy nfnl_nft_chain_policies[] = { + [NFTA_CHAIN_TABLE] = BUILD_POLICY_WITH_SIZE(STRING, NFT_TABLE_MAXNAMELEN - 1), + [NFTA_CHAIN_NAME] = BUILD_POLICY_WITH_SIZE(STRING, NFT_TABLE_MAXNAMELEN - 1), + [NFTA_CHAIN_HOOK] = BUILD_POLICY_NESTED(nfnl_nft_chain_hook), + [NFTA_CHAIN_TYPE] = BUILD_POLICY_WITH_SIZE(STRING, 16), + [NFTA_CHAIN_FLAGS] = BUILD_POLICY(U32), +}; + +DEFINE_POLICY_SET(nfnl_nft_chain); + +static const NLAPolicy nfnl_nft_expr_meta_policies[] = { + [NFTA_META_DREG] = BUILD_POLICY(U32), + [NFTA_META_KEY] = BUILD_POLICY(U32), + [NFTA_META_SREG] = BUILD_POLICY(U32), +}; + +static const NLAPolicy nfnl_nft_expr_payload_policies[] = { + [NFTA_PAYLOAD_DREG] = BUILD_POLICY(U32), + [NFTA_PAYLOAD_BASE] = BUILD_POLICY(U32), + [NFTA_PAYLOAD_OFFSET] = BUILD_POLICY(U32), + [NFTA_PAYLOAD_LEN] = BUILD_POLICY(U32), +}; + +static const NLAPolicy nfnl_nft_expr_nat_policies[] = { + [NFTA_NAT_TYPE] = BUILD_POLICY(U32), + [NFTA_NAT_FAMILY] = BUILD_POLICY(U32), + [NFTA_NAT_REG_ADDR_MIN] = BUILD_POLICY(U32), + [NFTA_NAT_REG_ADDR_MAX] = BUILD_POLICY(U32), + [NFTA_NAT_REG_PROTO_MIN] = BUILD_POLICY(U32), + [NFTA_NAT_REG_PROTO_MAX] = BUILD_POLICY(U32), + [NFTA_NAT_FLAGS] = BUILD_POLICY(U32), +}; + +static const NLAPolicy nfnl_nft_data_policies[] = { + [NFTA_DATA_VALUE] = { .type = NETLINK_TYPE_BINARY }, +}; + +DEFINE_POLICY_SET(nfnl_nft_data); + +static const NLAPolicy nfnl_nft_expr_bitwise_policies[] = { + [NFTA_BITWISE_SREG] = BUILD_POLICY(U32), + [NFTA_BITWISE_DREG] = BUILD_POLICY(U32), + [NFTA_BITWISE_LEN] = BUILD_POLICY(U32), + [NFTA_BITWISE_MASK] = BUILD_POLICY_NESTED(nfnl_nft_data), + [NFTA_BITWISE_XOR] = BUILD_POLICY_NESTED(nfnl_nft_data), +}; + +static const NLAPolicy nfnl_nft_expr_cmp_policies[] = { + [NFTA_CMP_SREG] = BUILD_POLICY(U32), + [NFTA_CMP_OP] = BUILD_POLICY(U32), + [NFTA_CMP_DATA] = BUILD_POLICY_NESTED(nfnl_nft_data), +}; + +static const NLAPolicy nfnl_nft_expr_fib_policies[] = { + [NFTA_FIB_DREG] = BUILD_POLICY(U32), + [NFTA_FIB_RESULT] = BUILD_POLICY(U32), + [NFTA_FIB_FLAGS] = BUILD_POLICY(U32), +}; + +static const NLAPolicy nfnl_nft_expr_lookup_policies[] = { + [NFTA_LOOKUP_SET] = { .type = NETLINK_TYPE_STRING }, + [NFTA_LOOKUP_SREG] = BUILD_POLICY(U32), + [NFTA_LOOKUP_DREG] = BUILD_POLICY(U32), + [NFTA_LOOKUP_FLAGS] = BUILD_POLICY(U32), +}; + +static const NLAPolicy nfnl_nft_expr_masq_policies[] = { + [NFTA_MASQ_FLAGS] = BUILD_POLICY(U32), + [NFTA_MASQ_REG_PROTO_MIN] = BUILD_POLICY(U32), + [NFTA_MASQ_REG_PROTO_MAX] = BUILD_POLICY(U32), +}; + +static const NLAPolicySetUnionElement nfnl_expr_data_policy_set_union_elements[] = { + BUILD_UNION_ELEMENT_BY_STRING("bitwise", nfnl_nft_expr_bitwise), + BUILD_UNION_ELEMENT_BY_STRING("cmp", nfnl_nft_expr_cmp), + BUILD_UNION_ELEMENT_BY_STRING("fib", nfnl_nft_expr_fib), + BUILD_UNION_ELEMENT_BY_STRING("lookup", nfnl_nft_expr_lookup), + BUILD_UNION_ELEMENT_BY_STRING("masq", nfnl_nft_expr_masq), + BUILD_UNION_ELEMENT_BY_STRING("meta", nfnl_nft_expr_meta), + BUILD_UNION_ELEMENT_BY_STRING("nat", nfnl_nft_expr_nat), + BUILD_UNION_ELEMENT_BY_STRING("payload", nfnl_nft_expr_payload), +}; + +DEFINE_POLICY_SET_UNION(nfnl_expr_data, NFTA_EXPR_NAME); + +static const NLAPolicy nfnl_nft_rule_expr_policies[] = { + [NFTA_EXPR_NAME] = BUILD_POLICY_WITH_SIZE(STRING, 16), + [NFTA_EXPR_DATA] = BUILD_POLICY_NESTED_UNION_BY_STRING(nfnl_expr_data), +}; + +DEFINE_POLICY_SET(nfnl_nft_rule_expr); + +static const NLAPolicy nfnl_nft_rule_policies[] = { + [NFTA_RULE_TABLE] = BUILD_POLICY_WITH_SIZE(STRING, NFT_TABLE_MAXNAMELEN - 1), + [NFTA_RULE_CHAIN] = BUILD_POLICY_WITH_SIZE(STRING, NFT_TABLE_MAXNAMELEN - 1), + [NFTA_RULE_EXPRESSIONS] = BUILD_POLICY_NESTED(nfnl_nft_rule_expr), +}; + +DEFINE_POLICY_SET(nfnl_nft_rule); + +static const NLAPolicy nfnl_nft_set_policies[] = { + [NFTA_SET_TABLE] = BUILD_POLICY_WITH_SIZE(STRING, NFT_TABLE_MAXNAMELEN - 1), + [NFTA_SET_NAME] = BUILD_POLICY_WITH_SIZE(STRING, NFT_TABLE_MAXNAMELEN - 1), + [NFTA_SET_FLAGS] = BUILD_POLICY(U32), + [NFTA_SET_KEY_TYPE] = BUILD_POLICY(U32), + [NFTA_SET_KEY_LEN] = BUILD_POLICY(U32), + [NFTA_SET_DATA_TYPE] = BUILD_POLICY(U32), + [NFTA_SET_DATA_LEN] = BUILD_POLICY(U32), + [NFTA_SET_POLICY] = BUILD_POLICY(U32), + [NFTA_SET_ID] = BUILD_POLICY(U32), +}; + +DEFINE_POLICY_SET(nfnl_nft_set); + +static const NLAPolicy nfnl_nft_setelem_policies[] = { + [NFTA_SET_ELEM_KEY] = BUILD_POLICY_NESTED(nfnl_nft_data), + [NFTA_SET_ELEM_DATA] = BUILD_POLICY_NESTED(nfnl_nft_data), + [NFTA_SET_ELEM_FLAGS] = BUILD_POLICY(U32), +}; + +DEFINE_POLICY_SET(nfnl_nft_setelem); + +static const NLAPolicy nfnl_nft_setelem_list_policies[] = { + [NFTA_SET_ELEM_LIST_TABLE] = BUILD_POLICY_WITH_SIZE(STRING, NFT_TABLE_MAXNAMELEN - 1), + [NFTA_SET_ELEM_LIST_SET] = BUILD_POLICY_WITH_SIZE(STRING, NFT_TABLE_MAXNAMELEN - 1), + [NFTA_SET_ELEM_LIST_ELEMENTS] = BUILD_POLICY_NESTED(nfnl_nft_setelem), +}; + +DEFINE_POLICY_SET(nfnl_nft_setelem_list); + +static const NLAPolicy nfnl_subsys_nft_policies[] = { + [NFT_MSG_DELTABLE] = BUILD_POLICY_NESTED_WITH_SIZE(nfnl_nft_table, sizeof(struct nfgenmsg)), + [NFT_MSG_NEWTABLE] = BUILD_POLICY_NESTED_WITH_SIZE(nfnl_nft_table, sizeof(struct nfgenmsg)), + [NFT_MSG_NEWCHAIN] = BUILD_POLICY_NESTED_WITH_SIZE(nfnl_nft_chain, sizeof(struct nfgenmsg)), + [NFT_MSG_NEWRULE] = BUILD_POLICY_NESTED_WITH_SIZE(nfnl_nft_rule, sizeof(struct nfgenmsg)), + [NFT_MSG_NEWSET] = BUILD_POLICY_NESTED_WITH_SIZE(nfnl_nft_set, sizeof(struct nfgenmsg)), + [NFT_MSG_NEWSETELEM] = BUILD_POLICY_NESTED_WITH_SIZE(nfnl_nft_setelem_list, sizeof(struct nfgenmsg)), + [NFT_MSG_DELSETELEM] = BUILD_POLICY_NESTED_WITH_SIZE(nfnl_nft_setelem_list, sizeof(struct nfgenmsg)), +}; + +DEFINE_POLICY_SET(nfnl_subsys_nft); + +static const NLAPolicy nfnl_msg_batch_policies[] = { + [NFNL_BATCH_GENID] = BUILD_POLICY(U32) +}; + +DEFINE_POLICY_SET(nfnl_msg_batch); + +static const NLAPolicy nfnl_subsys_none_policies[] = { + [NFNL_MSG_BATCH_BEGIN] = BUILD_POLICY_NESTED_WITH_SIZE(nfnl_msg_batch, sizeof(struct nfgenmsg)), + [NFNL_MSG_BATCH_END] = BUILD_POLICY_NESTED_WITH_SIZE(nfnl_msg_batch, sizeof(struct nfgenmsg)), +}; + +DEFINE_POLICY_SET(nfnl_subsys_none); + +static const NLAPolicy nfnl_policies[] = { + [NFNL_SUBSYS_NONE] = BUILD_POLICY_NESTED(nfnl_subsys_none), + [NFNL_SUBSYS_NFTABLES] = BUILD_POLICY_NESTED(nfnl_subsys_nft), +}; + +DEFINE_POLICY_SET(nfnl); + +const NLAPolicy *nfnl_get_policy(uint16_t nlmsg_type) { + const NLAPolicySet *subsys; + + subsys = policy_set_get_policy_set(&nfnl_policy_set, NFNL_SUBSYS_ID(nlmsg_type)); + if (!subsys) + return NULL; + + return policy_set_get_policy(subsys, NFNL_MSG_TYPE(nlmsg_type)); +} diff --git a/src/libsystemd/sd-netlink/netlink-types-rtnl.c b/src/libsystemd/sd-netlink/netlink-types-rtnl.c new file mode 100644 index 0000000..0153456 --- /dev/null +++ b/src/libsystemd/sd-netlink/netlink-types-rtnl.c @@ -0,0 +1,1229 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <netinet/in.h> +#include <sys/socket.h> +#include <linux/batman_adv.h> +#include <linux/can/netlink.h> +#include <linux/can/vxcan.h> +#include <linux/cfm_bridge.h> +#include <linux/fib_rules.h> +#include <linux/fou.h> +#include <linux/if.h> +#include <linux/if_addr.h> +#include <linux/if_addrlabel.h> +#include <linux/if_bridge.h> +#include <linux/if_link.h> +#include <linux/if_macsec.h> +#include <linux/if_tunnel.h> +#include <linux/ip.h> +#include <linux/l2tp.h> +#include <linux/netlink.h> +#include <linux/nexthop.h> +#include <linux/nl80211.h> +#include <linux/pkt_sched.h> +#include <linux/rtnetlink.h> +#include <linux/veth.h> +#include <linux/wireguard.h> + +#include "missing_network.h" +#include "netlink-types-internal.h" + +enum { + BOND_ARP_TARGETS_0, + BOND_ARP_TARGETS_1, + BOND_ARP_TARGETS_2, + BOND_ARP_TARGETS_3, + BOND_ARP_TARGETS_4, + BOND_ARP_TARGETS_5, + BOND_ARP_TARGETS_6, + BOND_ARP_TARGETS_7, + BOND_ARP_TARGETS_8, + BOND_ARP_TARGETS_9, + BOND_ARP_TARGETS_10, + BOND_ARP_TARGETS_11, + BOND_ARP_TARGETS_12, + BOND_ARP_TARGETS_13, + BOND_ARP_TARGETS_14, + BOND_ARP_TARGETS_15, + _BOND_ARP_TARGETS_MAX, +}; + +assert_cc(_BOND_ARP_TARGETS_MAX == BOND_MAX_ARP_TARGETS); + +static const NLAPolicySet rtnl_link_policy_set; + +static const NLAPolicy rtnl_link_info_data_bareudp_policies[] = { + [IFLA_BAREUDP_PORT] = BUILD_POLICY(U16), + [IFLA_BAREUDP_ETHERTYPE] = BUILD_POLICY(U16), + [IFLA_BAREUDP_SRCPORT_MIN] = BUILD_POLICY(U16), + [IFLA_BAREUDP_MULTIPROTO_MODE] = BUILD_POLICY(FLAG), +}; + +static const NLAPolicy rtnl_link_info_data_batadv_policies[] = { + [IFLA_BATADV_ALGO_NAME] = BUILD_POLICY_WITH_SIZE(STRING, 20), +}; + +static const NLAPolicy rtnl_bond_arp_ip_target_policies[] = { + [BOND_ARP_TARGETS_0] = BUILD_POLICY(U32), + [BOND_ARP_TARGETS_1] = BUILD_POLICY(U32), + [BOND_ARP_TARGETS_2] = BUILD_POLICY(U32), + [BOND_ARP_TARGETS_3] = BUILD_POLICY(U32), + [BOND_ARP_TARGETS_4] = BUILD_POLICY(U32), + [BOND_ARP_TARGETS_5] = BUILD_POLICY(U32), + [BOND_ARP_TARGETS_6] = BUILD_POLICY(U32), + [BOND_ARP_TARGETS_7] = BUILD_POLICY(U32), + [BOND_ARP_TARGETS_8] = BUILD_POLICY(U32), + [BOND_ARP_TARGETS_9] = BUILD_POLICY(U32), + [BOND_ARP_TARGETS_10] = BUILD_POLICY(U32), + [BOND_ARP_TARGETS_11] = BUILD_POLICY(U32), + [BOND_ARP_TARGETS_12] = BUILD_POLICY(U32), + [BOND_ARP_TARGETS_13] = BUILD_POLICY(U32), + [BOND_ARP_TARGETS_14] = BUILD_POLICY(U32), + [BOND_ARP_TARGETS_15] = BUILD_POLICY(U32), +}; + +DEFINE_POLICY_SET(rtnl_bond_arp_ip_target); + +static const NLAPolicy rtnl_bond_ad_info_policies[] = { + [IFLA_BOND_AD_INFO_AGGREGATOR] = BUILD_POLICY(U16), + [IFLA_BOND_AD_INFO_NUM_PORTS] = BUILD_POLICY(U16), + [IFLA_BOND_AD_INFO_ACTOR_KEY] = BUILD_POLICY(U16), + [IFLA_BOND_AD_INFO_PARTNER_KEY] = BUILD_POLICY(U16), + [IFLA_BOND_AD_INFO_PARTNER_MAC] = BUILD_POLICY_WITH_SIZE(ETHER_ADDR, ETH_ALEN), +}; + +DEFINE_POLICY_SET(rtnl_bond_ad_info); + +static const NLAPolicy rtnl_link_info_data_bond_policies[] = { + [IFLA_BOND_MODE] = BUILD_POLICY(U8), + [IFLA_BOND_ACTIVE_SLAVE] = BUILD_POLICY(U32), + [IFLA_BOND_MIIMON] = BUILD_POLICY(U32), + [IFLA_BOND_UPDELAY] = BUILD_POLICY(U32), + [IFLA_BOND_DOWNDELAY] = BUILD_POLICY(U32), + [IFLA_BOND_USE_CARRIER] = BUILD_POLICY(U8), + [IFLA_BOND_ARP_INTERVAL] = BUILD_POLICY(U32), + [IFLA_BOND_ARP_IP_TARGET] = BUILD_POLICY_NESTED(rtnl_bond_arp_ip_target), + [IFLA_BOND_ARP_VALIDATE] = BUILD_POLICY(U32), + [IFLA_BOND_ARP_ALL_TARGETS] = BUILD_POLICY(U32), + [IFLA_BOND_PRIMARY] = BUILD_POLICY(U32), + [IFLA_BOND_PRIMARY_RESELECT] = BUILD_POLICY(U8), + [IFLA_BOND_FAIL_OVER_MAC] = BUILD_POLICY(U8), + [IFLA_BOND_XMIT_HASH_POLICY] = BUILD_POLICY(U8), + [IFLA_BOND_RESEND_IGMP] = BUILD_POLICY(U32), + [IFLA_BOND_NUM_PEER_NOTIF] = BUILD_POLICY(U8), + [IFLA_BOND_ALL_SLAVES_ACTIVE] = BUILD_POLICY(U8), + [IFLA_BOND_MIN_LINKS] = BUILD_POLICY(U32), + [IFLA_BOND_LP_INTERVAL] = BUILD_POLICY(U32), + [IFLA_BOND_PACKETS_PER_SLAVE] = BUILD_POLICY(U32), + [IFLA_BOND_AD_LACP_RATE] = BUILD_POLICY(U8), + [IFLA_BOND_AD_SELECT] = BUILD_POLICY(U8), + [IFLA_BOND_AD_INFO] = BUILD_POLICY_NESTED(rtnl_bond_ad_info), + [IFLA_BOND_AD_ACTOR_SYS_PRIO] = BUILD_POLICY(U16), + [IFLA_BOND_AD_USER_PORT_KEY] = BUILD_POLICY(U16), + [IFLA_BOND_AD_ACTOR_SYSTEM] = BUILD_POLICY_WITH_SIZE(ETHER_ADDR, ETH_ALEN), + [IFLA_BOND_TLB_DYNAMIC_LB] = BUILD_POLICY(U8), + [IFLA_BOND_PEER_NOTIF_DELAY] = BUILD_POLICY(U32), +}; + +static const NLAPolicy rtnl_link_info_data_bridge_policies[] = { + [IFLA_BR_FORWARD_DELAY] = BUILD_POLICY(U32), + [IFLA_BR_HELLO_TIME] = BUILD_POLICY(U32), + [IFLA_BR_MAX_AGE] = BUILD_POLICY(U32), + [IFLA_BR_AGEING_TIME] = BUILD_POLICY(U32), + [IFLA_BR_STP_STATE] = BUILD_POLICY(U32), + [IFLA_BR_PRIORITY] = BUILD_POLICY(U16), + [IFLA_BR_VLAN_FILTERING] = BUILD_POLICY(U8), + [IFLA_BR_VLAN_PROTOCOL] = BUILD_POLICY(U16), + [IFLA_BR_GROUP_FWD_MASK] = BUILD_POLICY(U16), + [IFLA_BR_ROOT_ID] = BUILD_POLICY_WITH_SIZE(BINARY, sizeof(struct ifla_bridge_id)), + [IFLA_BR_BRIDGE_ID] = BUILD_POLICY_WITH_SIZE(BINARY, sizeof(struct ifla_bridge_id)), + [IFLA_BR_ROOT_PORT] = BUILD_POLICY(U16), + [IFLA_BR_ROOT_PATH_COST] = BUILD_POLICY(U32), + [IFLA_BR_TOPOLOGY_CHANGE] = BUILD_POLICY(U8), + [IFLA_BR_TOPOLOGY_CHANGE_DETECTED] = BUILD_POLICY(U8), + [IFLA_BR_HELLO_TIMER] = BUILD_POLICY(U64), + [IFLA_BR_TCN_TIMER] = BUILD_POLICY(U64), + [IFLA_BR_TOPOLOGY_CHANGE_TIMER] = BUILD_POLICY(U64), + [IFLA_BR_GC_TIMER] = BUILD_POLICY(U64), + [IFLA_BR_GROUP_ADDR] = BUILD_POLICY_WITH_SIZE(ETHER_ADDR, ETH_ALEN), + [IFLA_BR_FDB_FLUSH] = BUILD_POLICY(FLAG), + [IFLA_BR_MCAST_ROUTER] = BUILD_POLICY(U8), + [IFLA_BR_MCAST_SNOOPING] = BUILD_POLICY(U8), + [IFLA_BR_MCAST_QUERY_USE_IFADDR] = BUILD_POLICY(U8), + [IFLA_BR_MCAST_QUERIER] = BUILD_POLICY(U8), + [IFLA_BR_MCAST_HASH_ELASTICITY] = BUILD_POLICY(U32), + [IFLA_BR_MCAST_HASH_MAX] = BUILD_POLICY(U32), + [IFLA_BR_MCAST_LAST_MEMBER_CNT] = BUILD_POLICY(U32), + [IFLA_BR_MCAST_STARTUP_QUERY_CNT] = BUILD_POLICY(U32), + [IFLA_BR_MCAST_LAST_MEMBER_INTVL] = BUILD_POLICY(U64), + [IFLA_BR_MCAST_MEMBERSHIP_INTVL] = BUILD_POLICY(U64), + [IFLA_BR_MCAST_QUERIER_INTVL] = BUILD_POLICY(U64), + [IFLA_BR_MCAST_QUERY_INTVL] = BUILD_POLICY(U64), + [IFLA_BR_MCAST_QUERY_RESPONSE_INTVL] = BUILD_POLICY(U64), + [IFLA_BR_MCAST_STARTUP_QUERY_INTVL] = BUILD_POLICY(U64), + [IFLA_BR_NF_CALL_IPTABLES] = BUILD_POLICY(U8), + [IFLA_BR_NF_CALL_IP6TABLES] = BUILD_POLICY(U8), + [IFLA_BR_NF_CALL_ARPTABLES] = BUILD_POLICY(U8), + [IFLA_BR_VLAN_DEFAULT_PVID] = BUILD_POLICY(U16), + [IFLA_BR_VLAN_STATS_ENABLED] = BUILD_POLICY(U8), + [IFLA_BR_MCAST_STATS_ENABLED] = BUILD_POLICY(U8), + [IFLA_BR_MCAST_IGMP_VERSION] = BUILD_POLICY(U8), + [IFLA_BR_MCAST_MLD_VERSION] = BUILD_POLICY(U8), + [IFLA_BR_VLAN_STATS_PER_PORT] = BUILD_POLICY(U8), + [IFLA_BR_MULTI_BOOLOPT] = BUILD_POLICY_WITH_SIZE(BINARY, sizeof(struct br_boolopt_multi)), +}; + +static const NLAPolicy rtnl_link_info_data_can_policies[] = { + [IFLA_CAN_BITTIMING] = BUILD_POLICY_WITH_SIZE(BINARY, sizeof(struct can_bittiming)), + [IFLA_CAN_BITTIMING_CONST] = BUILD_POLICY_WITH_SIZE(BINARY, sizeof(struct can_bittiming_const)), + [IFLA_CAN_CLOCK] = BUILD_POLICY_WITH_SIZE(BINARY, sizeof(struct can_clock)), + [IFLA_CAN_STATE] = BUILD_POLICY(U32), + [IFLA_CAN_CTRLMODE] = BUILD_POLICY_WITH_SIZE(BINARY, sizeof(struct can_ctrlmode)), + [IFLA_CAN_RESTART_MS] = BUILD_POLICY(U32), + [IFLA_CAN_RESTART] = BUILD_POLICY(U32), + [IFLA_CAN_BERR_COUNTER] = BUILD_POLICY_WITH_SIZE(BINARY, sizeof(struct can_berr_counter)), + [IFLA_CAN_DATA_BITTIMING] = BUILD_POLICY_WITH_SIZE(BINARY, sizeof(struct can_bittiming)), + [IFLA_CAN_DATA_BITTIMING_CONST] = BUILD_POLICY_WITH_SIZE(BINARY, sizeof(struct can_bittiming_const)), + [IFLA_CAN_TERMINATION] = BUILD_POLICY(U16), + [IFLA_CAN_TERMINATION_CONST] = BUILD_POLICY(BINARY), /* size = termination_const_cnt * sizeof(u16) */ + [IFLA_CAN_BITRATE_CONST] = BUILD_POLICY(BINARY), /* size = bitrate_const_cnt * sizeof(u32) */ + [IFLA_CAN_DATA_BITRATE_CONST] = BUILD_POLICY(BINARY), /* size = data_bitrate_const_cnt * sizeof(u32) */ + [IFLA_CAN_BITRATE_MAX] = BUILD_POLICY(U32), +}; + +static const NLAPolicy rtnl_link_info_data_geneve_policies[] = { + [IFLA_GENEVE_ID] = BUILD_POLICY(U32), + [IFLA_GENEVE_REMOTE] = BUILD_POLICY_WITH_SIZE(IN_ADDR, sizeof(struct in_addr)), + [IFLA_GENEVE_TTL] = BUILD_POLICY(U8), + [IFLA_GENEVE_TOS] = BUILD_POLICY(U8), + [IFLA_GENEVE_PORT] = BUILD_POLICY(U16), + [IFLA_GENEVE_COLLECT_METADATA] = BUILD_POLICY(FLAG), + [IFLA_GENEVE_REMOTE6] = BUILD_POLICY_WITH_SIZE(IN_ADDR, sizeof(struct in6_addr)), + [IFLA_GENEVE_UDP_CSUM] = BUILD_POLICY(U8), + [IFLA_GENEVE_UDP_ZERO_CSUM6_TX] = BUILD_POLICY(U8), + [IFLA_GENEVE_UDP_ZERO_CSUM6_RX] = BUILD_POLICY(U8), + [IFLA_GENEVE_LABEL] = BUILD_POLICY(U32), + [IFLA_GENEVE_TTL_INHERIT] = BUILD_POLICY(U8), + [IFLA_GENEVE_DF] = BUILD_POLICY(U8), + [IFLA_GENEVE_INNER_PROTO_INHERIT] = BUILD_POLICY(FLAG), +}; + +static const NLAPolicy rtnl_link_info_data_gre_policies[] = { + [IFLA_GRE_LINK] = BUILD_POLICY(U32), + [IFLA_GRE_IFLAGS] = BUILD_POLICY(U16), + [IFLA_GRE_OFLAGS] = BUILD_POLICY(U16), + [IFLA_GRE_IKEY] = BUILD_POLICY(U32), + [IFLA_GRE_OKEY] = BUILD_POLICY(U32), + [IFLA_GRE_LOCAL] = BUILD_POLICY(IN_ADDR), + [IFLA_GRE_REMOTE] = BUILD_POLICY(IN_ADDR), + [IFLA_GRE_TTL] = BUILD_POLICY(U8), + [IFLA_GRE_TOS] = BUILD_POLICY(U8), + [IFLA_GRE_PMTUDISC] = BUILD_POLICY(U8), + [IFLA_GRE_ENCAP_LIMIT] = BUILD_POLICY(U8), + [IFLA_GRE_FLOWINFO] = BUILD_POLICY(U32), + [IFLA_GRE_FLAGS] = BUILD_POLICY(U32), + [IFLA_GRE_ENCAP_TYPE] = BUILD_POLICY(U16), + [IFLA_GRE_ENCAP_FLAGS] = BUILD_POLICY(U16), + [IFLA_GRE_ENCAP_SPORT] = BUILD_POLICY(U16), + [IFLA_GRE_ENCAP_DPORT] = BUILD_POLICY(U16), + [IFLA_GRE_COLLECT_METADATA] = BUILD_POLICY(FLAG), + [IFLA_GRE_IGNORE_DF] = BUILD_POLICY(U8), + [IFLA_GRE_FWMARK] = BUILD_POLICY(U32), + [IFLA_GRE_ERSPAN_INDEX] = BUILD_POLICY(U32), + [IFLA_GRE_ERSPAN_VER] = BUILD_POLICY(U8), + [IFLA_GRE_ERSPAN_DIR] = BUILD_POLICY(U8), + [IFLA_GRE_ERSPAN_HWID] = BUILD_POLICY(U16), +}; + +static const NLAPolicy rtnl_link_info_data_ipoib_policies[] = { + [IFLA_IPOIB_PKEY] = BUILD_POLICY(U16), + [IFLA_IPOIB_MODE] = BUILD_POLICY(U16), + [IFLA_IPOIB_UMCAST] = BUILD_POLICY(U16), +}; + +/* IFLA_IPTUN_ attributes are used in ipv4/ipip.c, ipv6/ip6_tunnel.c, and ipv6/sit.c. And unfortunately, + * IFLA_IPTUN_FLAGS is used with different types, ugh... */ +#define DEFINE_IPTUN_TYPES(name, flags_type) \ + static const NLAPolicy rtnl_link_info_data_##name##_policies[] = { \ + [IFLA_IPTUN_LINK] = BUILD_POLICY(U32), \ + [IFLA_IPTUN_LOCAL] = BUILD_POLICY(IN_ADDR), \ + [IFLA_IPTUN_REMOTE] = BUILD_POLICY(IN_ADDR), \ + [IFLA_IPTUN_TTL] = BUILD_POLICY(U8), \ + [IFLA_IPTUN_TOS] = BUILD_POLICY(U8), \ + [IFLA_IPTUN_ENCAP_LIMIT] = BUILD_POLICY(U8), \ + [IFLA_IPTUN_FLOWINFO] = BUILD_POLICY(U32), \ + [IFLA_IPTUN_FLAGS] = BUILD_POLICY(flags_type), \ + [IFLA_IPTUN_PROTO] = BUILD_POLICY(U8), \ + [IFLA_IPTUN_PMTUDISC] = BUILD_POLICY(U8), \ + [IFLA_IPTUN_6RD_PREFIX] = BUILD_POLICY_WITH_SIZE(IN_ADDR, sizeof(struct in6_addr)), \ + [IFLA_IPTUN_6RD_RELAY_PREFIX] = BUILD_POLICY(U32), \ + [IFLA_IPTUN_6RD_PREFIXLEN] = BUILD_POLICY(U16), \ + [IFLA_IPTUN_6RD_RELAY_PREFIXLEN] = BUILD_POLICY(U16), \ + [IFLA_IPTUN_ENCAP_TYPE] = BUILD_POLICY(U16), \ + [IFLA_IPTUN_ENCAP_FLAGS] = BUILD_POLICY(U16), \ + [IFLA_IPTUN_ENCAP_SPORT] = BUILD_POLICY(U16), \ + [IFLA_IPTUN_ENCAP_DPORT] = BUILD_POLICY(U16), \ + [IFLA_IPTUN_COLLECT_METADATA] = BUILD_POLICY(FLAG), \ + [IFLA_IPTUN_FWMARK] = BUILD_POLICY(U32), \ + } + +DEFINE_IPTUN_TYPES(iptun, U32); /* for ipip and ip6tnl */ +DEFINE_IPTUN_TYPES(sit, U16); /* for sit */ + +static const NLAPolicy rtnl_link_info_data_ipvlan_policies[] = { + [IFLA_IPVLAN_MODE] = BUILD_POLICY(U16), + [IFLA_IPVLAN_FLAGS] = BUILD_POLICY(U16), +}; + +static const NLAPolicy rtnl_link_info_data_macsec_policies[] = { + [IFLA_MACSEC_SCI] = BUILD_POLICY(U64), + [IFLA_MACSEC_PORT] = BUILD_POLICY(U16), + [IFLA_MACSEC_ICV_LEN] = BUILD_POLICY(U8), + [IFLA_MACSEC_CIPHER_SUITE] = BUILD_POLICY(U64), + [IFLA_MACSEC_WINDOW] = BUILD_POLICY(U32), + [IFLA_MACSEC_ENCODING_SA] = BUILD_POLICY(U8), + [IFLA_MACSEC_ENCRYPT] = BUILD_POLICY(U8), + [IFLA_MACSEC_PROTECT] = BUILD_POLICY(U8), + [IFLA_MACSEC_INC_SCI] = BUILD_POLICY(U8), + [IFLA_MACSEC_ES] = BUILD_POLICY(U8), + [IFLA_MACSEC_SCB] = BUILD_POLICY(U8), + [IFLA_MACSEC_REPLAY_PROTECT] = BUILD_POLICY(U8), + [IFLA_MACSEC_VALIDATION] = BUILD_POLICY(U8), + [IFLA_MACSEC_OFFLOAD] = BUILD_POLICY(U8), +}; + +static const NLAPolicy rtnl_macvlan_macaddr_policies[] = { + [IFLA_MACVLAN_MACADDR] = BUILD_POLICY_WITH_SIZE(ETHER_ADDR, ETH_ALEN), +}; + +DEFINE_POLICY_SET(rtnl_macvlan_macaddr); + +static const NLAPolicy rtnl_link_info_data_macvlan_policies[] = { + [IFLA_MACVLAN_MODE] = BUILD_POLICY(U32), + [IFLA_MACVLAN_FLAGS] = BUILD_POLICY(U16), + [IFLA_MACVLAN_MACADDR_MODE] = BUILD_POLICY(U32), + [IFLA_MACVLAN_MACADDR_DATA] = BUILD_POLICY_NESTED(rtnl_macvlan_macaddr), + [IFLA_MACVLAN_MACADDR_COUNT] = BUILD_POLICY(U32), + [IFLA_MACVLAN_BC_QUEUE_LEN] = BUILD_POLICY(U32), + [IFLA_MACVLAN_BC_QUEUE_LEN_USED] = BUILD_POLICY(U32), +}; + +static const NLAPolicy rtnl_link_info_data_tun_policies[] = { + [IFLA_TUN_OWNER] = BUILD_POLICY(U32), + [IFLA_TUN_GROUP] = BUILD_POLICY(U32), + [IFLA_TUN_TYPE] = BUILD_POLICY(U8), + [IFLA_TUN_PI] = BUILD_POLICY(U8), + [IFLA_TUN_VNET_HDR] = BUILD_POLICY(U8), + [IFLA_TUN_PERSIST] = BUILD_POLICY(U8), + [IFLA_TUN_MULTI_QUEUE] = BUILD_POLICY(U8), + [IFLA_TUN_NUM_QUEUES] = BUILD_POLICY(U32), + [IFLA_TUN_NUM_DISABLED_QUEUES] = BUILD_POLICY(U32), +}; + +static const NLAPolicy rtnl_link_info_data_veth_policies[] = { + [VETH_INFO_PEER] = BUILD_POLICY_NESTED_WITH_SIZE(rtnl_link, sizeof(struct ifinfomsg)), +}; + +static const NLAPolicy rtnl_vlan_qos_map_policies[] = { + [IFLA_VLAN_QOS_MAPPING] = BUILD_POLICY_WITH_SIZE(BINARY, sizeof(struct ifla_vlan_qos_mapping)), +}; + +DEFINE_POLICY_SET(rtnl_vlan_qos_map); + +static const NLAPolicy rtnl_link_info_data_vlan_policies[] = { + [IFLA_VLAN_ID] = BUILD_POLICY(U16), + [IFLA_VLAN_FLAGS] = BUILD_POLICY_WITH_SIZE(BINARY, sizeof(struct ifla_vlan_flags)), + [IFLA_VLAN_EGRESS_QOS] = BUILD_POLICY_NESTED(rtnl_vlan_qos_map), + [IFLA_VLAN_INGRESS_QOS] = BUILD_POLICY_NESTED(rtnl_vlan_qos_map), + [IFLA_VLAN_PROTOCOL] = BUILD_POLICY(U16), +}; + +static const NLAPolicy rtnl_link_info_data_vrf_policies[] = { + [IFLA_VRF_TABLE] = BUILD_POLICY(U32), +}; + +static const NLAPolicy rtnl_link_info_data_vti_policies[] = { + [IFLA_VTI_LINK] = BUILD_POLICY(U32), + [IFLA_VTI_IKEY] = BUILD_POLICY(U32), + [IFLA_VTI_OKEY] = BUILD_POLICY(U32), + [IFLA_VTI_LOCAL] = BUILD_POLICY(IN_ADDR), + [IFLA_VTI_REMOTE] = BUILD_POLICY(IN_ADDR), + [IFLA_VTI_FWMARK] = BUILD_POLICY(U32), +}; + +static const NLAPolicy rtnl_link_info_data_vxcan_policies[] = { + [VXCAN_INFO_PEER] = BUILD_POLICY_NESTED_WITH_SIZE(rtnl_link, sizeof(struct ifinfomsg)), +}; + +static const NLAPolicy rtnl_link_info_data_vxlan_policies[] = { + [IFLA_VXLAN_ID] = BUILD_POLICY(U32), + [IFLA_VXLAN_GROUP] = BUILD_POLICY_WITH_SIZE(IN_ADDR, sizeof(struct in_addr)), + [IFLA_VXLAN_LINK] = BUILD_POLICY(U32), + [IFLA_VXLAN_LOCAL] = BUILD_POLICY_WITH_SIZE(IN_ADDR, sizeof(struct in_addr)), + [IFLA_VXLAN_TTL] = BUILD_POLICY(U8), + [IFLA_VXLAN_TOS] = BUILD_POLICY(U8), + [IFLA_VXLAN_LEARNING] = BUILD_POLICY(U8), + [IFLA_VXLAN_AGEING] = BUILD_POLICY(U32), + [IFLA_VXLAN_LIMIT] = BUILD_POLICY(U32), + [IFLA_VXLAN_PORT_RANGE] = BUILD_POLICY_WITH_SIZE(BINARY, sizeof(struct ifla_vxlan_port_range)), + [IFLA_VXLAN_PROXY] = BUILD_POLICY(U8), + [IFLA_VXLAN_RSC] = BUILD_POLICY(U8), + [IFLA_VXLAN_L2MISS] = BUILD_POLICY(U8), + [IFLA_VXLAN_L3MISS] = BUILD_POLICY(U8), + [IFLA_VXLAN_PORT] = BUILD_POLICY(U16), + [IFLA_VXLAN_GROUP6] = BUILD_POLICY_WITH_SIZE(IN_ADDR, sizeof(struct in6_addr)), + [IFLA_VXLAN_LOCAL6] = BUILD_POLICY_WITH_SIZE(IN_ADDR, sizeof(struct in6_addr)), + [IFLA_VXLAN_UDP_CSUM] = BUILD_POLICY(U8), + [IFLA_VXLAN_UDP_ZERO_CSUM6_TX] = BUILD_POLICY(U8), + [IFLA_VXLAN_UDP_ZERO_CSUM6_RX] = BUILD_POLICY(U8), + [IFLA_VXLAN_REMCSUM_TX] = BUILD_POLICY(U8), + [IFLA_VXLAN_REMCSUM_RX] = BUILD_POLICY(U8), + [IFLA_VXLAN_GBP] = BUILD_POLICY(FLAG), + [IFLA_VXLAN_REMCSUM_NOPARTIAL] = BUILD_POLICY(FLAG), + [IFLA_VXLAN_COLLECT_METADATA] = BUILD_POLICY(U8), + [IFLA_VXLAN_LABEL] = BUILD_POLICY(U32), + [IFLA_VXLAN_GPE] = BUILD_POLICY(FLAG), + [IFLA_VXLAN_TTL_INHERIT] = BUILD_POLICY(FLAG), + [IFLA_VXLAN_DF] = BUILD_POLICY(U8), +}; + +static const NLAPolicy rtnl_link_info_data_xfrm_policies[] = { + [IFLA_XFRM_LINK] = BUILD_POLICY(U32), + [IFLA_XFRM_IF_ID] = BUILD_POLICY(U32) +}; + +static const NLAPolicySetUnionElement rtnl_link_info_data_policy_set_union_elements[] = { + BUILD_UNION_ELEMENT_BY_STRING("bareudp", rtnl_link_info_data_bareudp), + BUILD_UNION_ELEMENT_BY_STRING("batadv", rtnl_link_info_data_batadv), + BUILD_UNION_ELEMENT_BY_STRING("bond", rtnl_link_info_data_bond), + BUILD_UNION_ELEMENT_BY_STRING("bridge", rtnl_link_info_data_bridge), +/* + BUILD_UNION_ELEMENT_BY_STRING("caif", rtnl_link_info_data_caif), +*/ + BUILD_UNION_ELEMENT_BY_STRING("can", rtnl_link_info_data_can), + BUILD_UNION_ELEMENT_BY_STRING("erspan", rtnl_link_info_data_gre), + BUILD_UNION_ELEMENT_BY_STRING("geneve", rtnl_link_info_data_geneve), + BUILD_UNION_ELEMENT_BY_STRING("gre", rtnl_link_info_data_gre), + BUILD_UNION_ELEMENT_BY_STRING("gretap", rtnl_link_info_data_gre), +/* + BUILD_UNION_ELEMENT_BY_STRING("gtp", rtnl_link_info_data_gtp), + BUILD_UNION_ELEMENT_BY_STRING("hsr", rtnl_link_info_data_hsr), +*/ + BUILD_UNION_ELEMENT_BY_STRING("ip6erspan", rtnl_link_info_data_gre), + BUILD_UNION_ELEMENT_BY_STRING("ip6gre", rtnl_link_info_data_gre), + BUILD_UNION_ELEMENT_BY_STRING("ip6gretap", rtnl_link_info_data_gre), + BUILD_UNION_ELEMENT_BY_STRING("ip6tnl", rtnl_link_info_data_iptun), + BUILD_UNION_ELEMENT_BY_STRING("ipoib", rtnl_link_info_data_ipoib), + BUILD_UNION_ELEMENT_BY_STRING("ipip", rtnl_link_info_data_iptun), + BUILD_UNION_ELEMENT_BY_STRING("ipvlan", rtnl_link_info_data_ipvlan), + BUILD_UNION_ELEMENT_BY_STRING("ipvtap", rtnl_link_info_data_ipvlan), + BUILD_UNION_ELEMENT_BY_STRING("macsec", rtnl_link_info_data_macsec), + BUILD_UNION_ELEMENT_BY_STRING("macvlan", rtnl_link_info_data_macvlan), + BUILD_UNION_ELEMENT_BY_STRING("macvtap", rtnl_link_info_data_macvlan), +/* + BUILD_UNION_ELEMENT_BY_STRING("ppp", rtnl_link_info_data_ppp), + BUILD_UNION_ELEMENT_BY_STRING("rmnet", rtnl_link_info_data_rmnet), +*/ + BUILD_UNION_ELEMENT_BY_STRING("sit", rtnl_link_info_data_sit), + BUILD_UNION_ELEMENT_BY_STRING("tun", rtnl_link_info_data_tun), + BUILD_UNION_ELEMENT_BY_STRING("veth", rtnl_link_info_data_veth), + BUILD_UNION_ELEMENT_BY_STRING("vlan", rtnl_link_info_data_vlan), + BUILD_UNION_ELEMENT_BY_STRING("vrf", rtnl_link_info_data_vrf), + BUILD_UNION_ELEMENT_BY_STRING("vti", rtnl_link_info_data_vti), + BUILD_UNION_ELEMENT_BY_STRING("vti6", rtnl_link_info_data_vti), + BUILD_UNION_ELEMENT_BY_STRING("vxcan", rtnl_link_info_data_vxcan), + BUILD_UNION_ELEMENT_BY_STRING("vxlan", rtnl_link_info_data_vxlan), +/* + BUILD_UNION_ELEMENT_BY_STRING("wwan", rtnl_link_info_data_wwan), +*/ + BUILD_UNION_ELEMENT_BY_STRING("xfrm", rtnl_link_info_data_xfrm), +}; + +DEFINE_POLICY_SET_UNION(rtnl_link_info_data, IFLA_INFO_KIND); + +static const struct NLAPolicy rtnl_bridge_port_policies[] = { + [IFLA_BRPORT_STATE] = BUILD_POLICY(U8), + [IFLA_BRPORT_COST] = BUILD_POLICY(U32), + [IFLA_BRPORT_PRIORITY] = BUILD_POLICY(U16), + [IFLA_BRPORT_MODE] = BUILD_POLICY(U8), + [IFLA_BRPORT_GUARD] = BUILD_POLICY(U8), + [IFLA_BRPORT_PROTECT] = BUILD_POLICY(U8), + [IFLA_BRPORT_FAST_LEAVE] = BUILD_POLICY(U8), + [IFLA_BRPORT_LEARNING] = BUILD_POLICY(U8), + [IFLA_BRPORT_UNICAST_FLOOD] = BUILD_POLICY(U8), + [IFLA_BRPORT_PROXYARP] = BUILD_POLICY(U8), + [IFLA_BRPORT_LEARNING_SYNC] = BUILD_POLICY(U8), + [IFLA_BRPORT_PROXYARP_WIFI] = BUILD_POLICY(U8), + [IFLA_BRPORT_ROOT_ID] = BUILD_POLICY(U8), + [IFLA_BRPORT_BRIDGE_ID] = BUILD_POLICY(U8), + [IFLA_BRPORT_DESIGNATED_PORT] = BUILD_POLICY(U16), + [IFLA_BRPORT_DESIGNATED_COST] = BUILD_POLICY(U16), + [IFLA_BRPORT_ID] = BUILD_POLICY(U16), + [IFLA_BRPORT_NO] = BUILD_POLICY(U16), + [IFLA_BRPORT_TOPOLOGY_CHANGE_ACK] = BUILD_POLICY(U8), + [IFLA_BRPORT_CONFIG_PENDING] = BUILD_POLICY(U8), + [IFLA_BRPORT_MESSAGE_AGE_TIMER] = BUILD_POLICY(U64), + [IFLA_BRPORT_FORWARD_DELAY_TIMER] = BUILD_POLICY(U64), + [IFLA_BRPORT_HOLD_TIMER] = BUILD_POLICY(U64), + [IFLA_BRPORT_FLUSH] = BUILD_POLICY(U8), + [IFLA_BRPORT_MULTICAST_ROUTER] = BUILD_POLICY(U8), + [IFLA_BRPORT_PAD] = BUILD_POLICY(U8), + [IFLA_BRPORT_MCAST_FLOOD] = BUILD_POLICY(U8), + [IFLA_BRPORT_MCAST_TO_UCAST] = BUILD_POLICY(U8), + [IFLA_BRPORT_VLAN_TUNNEL] = BUILD_POLICY(U8), + [IFLA_BRPORT_BCAST_FLOOD] = BUILD_POLICY(U8), + [IFLA_BRPORT_GROUP_FWD_MASK] = BUILD_POLICY(U16), + [IFLA_BRPORT_NEIGH_SUPPRESS] = BUILD_POLICY(U8), + [IFLA_BRPORT_ISOLATED] = BUILD_POLICY(U8), + [IFLA_BRPORT_BACKUP_PORT] = BUILD_POLICY(U32), + [IFLA_BRPORT_MRP_RING_OPEN] = BUILD_POLICY(U8), + [IFLA_BRPORT_MRP_IN_OPEN] = BUILD_POLICY(U8), + [IFLA_BRPORT_MCAST_EHT_HOSTS_LIMIT] = BUILD_POLICY(U32), + [IFLA_BRPORT_MCAST_EHT_HOSTS_CNT] = BUILD_POLICY(U32), +}; + +static const NLAPolicySetUnionElement rtnl_link_info_slave_data_policy_set_union_elements[] = { + BUILD_UNION_ELEMENT_BY_STRING("bridge", rtnl_bridge_port), +}; + +DEFINE_POLICY_SET_UNION(rtnl_link_info_slave_data, IFLA_INFO_SLAVE_KIND); + +static const NLAPolicy rtnl_link_info_policies[] = { + [IFLA_INFO_KIND] = BUILD_POLICY(STRING), + [IFLA_INFO_DATA] = BUILD_POLICY_NESTED_UNION_BY_STRING(rtnl_link_info_data), + /* TODO: Currently IFLA_INFO_XSTATS is used only when IFLA_INFO_KIND is "can". In the future, + * when multiple kinds of netdevs use this attribute, convert its type to NETLINK_TYPE_UNION. */ + [IFLA_INFO_XSTATS] = BUILD_POLICY_WITH_SIZE(BINARY, sizeof(struct can_device_stats)), + [IFLA_INFO_SLAVE_KIND] = BUILD_POLICY(STRING), + [IFLA_INFO_SLAVE_DATA] = BUILD_POLICY_NESTED_UNION_BY_STRING(rtnl_link_info_slave_data), +}; + +DEFINE_POLICY_SET(rtnl_link_info); + +static const struct NLAPolicy rtnl_inet_policies[] = { + [IFLA_INET_CONF] = BUILD_POLICY(BINARY), /* size = IPV4_DEVCONF_MAX * 4 */ +}; + +DEFINE_POLICY_SET(rtnl_inet); + +static const struct NLAPolicy rtnl_inet6_policies[] = { + [IFLA_INET6_FLAGS] = BUILD_POLICY(U32), + [IFLA_INET6_CONF] = BUILD_POLICY(BINARY), /* size = DEVCONF_MAX * sizeof(s32) */ + [IFLA_INET6_STATS] = BUILD_POLICY(BINARY), /* size = IPSTATS_MIB_MAX * sizeof(u64) */ + [IFLA_INET6_MCAST] = {}, /* unused. */ + [IFLA_INET6_CACHEINFO] = BUILD_POLICY_WITH_SIZE(BINARY, sizeof(struct ifla_cacheinfo)), + [IFLA_INET6_ICMP6STATS] = BUILD_POLICY(BINARY), /* size = ICMP6_MIB_MAX * sizeof(u64) */ + [IFLA_INET6_TOKEN] = BUILD_POLICY_WITH_SIZE(IN_ADDR, sizeof(struct in6_addr)), + [IFLA_INET6_ADDR_GEN_MODE] = BUILD_POLICY(U8), +}; + +DEFINE_POLICY_SET(rtnl_inet6); + +static const NLAPolicySetUnionElement rtnl_prot_info_policy_set_union_elements[] = { + BUILD_UNION_ELEMENT_BY_FAMILY(AF_BRIDGE, rtnl_bridge_port), + BUILD_UNION_ELEMENT_BY_FAMILY(AF_INET6, rtnl_inet6), +}; + +DEFINE_POLICY_SET_UNION(rtnl_prot_info, 0); + +static const NLAPolicy rtnl_af_spec_unspec_policies[] = { + [AF_INET] = BUILD_POLICY_NESTED(rtnl_inet), + [AF_INET6] = BUILD_POLICY_NESTED(rtnl_inet6), +}; + +static const NLAPolicy rtnl_bridge_vlan_tunnel_info_policies[] = { + [IFLA_BRIDGE_VLAN_TUNNEL_ID] = BUILD_POLICY(U32), + [IFLA_BRIDGE_VLAN_TUNNEL_VID] = BUILD_POLICY(U16), + [IFLA_BRIDGE_VLAN_TUNNEL_FLAGS] = BUILD_POLICY(U16), +}; + +DEFINE_POLICY_SET(rtnl_bridge_vlan_tunnel_info); + +static const NLAPolicy rtnl_bridge_mrp_instance_policies[] = { + [IFLA_BRIDGE_MRP_INSTANCE_RING_ID] = BUILD_POLICY(U32), + [IFLA_BRIDGE_MRP_INSTANCE_P_IFINDEX] = BUILD_POLICY(U32), + [IFLA_BRIDGE_MRP_INSTANCE_S_IFINDEX] = BUILD_POLICY(U32), + [IFLA_BRIDGE_MRP_INSTANCE_PRIO] = BUILD_POLICY(U16), +}; + +DEFINE_POLICY_SET(rtnl_bridge_mrp_instance); + +static const NLAPolicy rtnl_bridge_mrp_port_state_policies[] = { + [IFLA_BRIDGE_MRP_PORT_STATE_STATE] = BUILD_POLICY(U32), +}; + +DEFINE_POLICY_SET(rtnl_bridge_mrp_port_state); + +static const NLAPolicy rtnl_bridge_mrp_port_role_policies[] = { + [IFLA_BRIDGE_MRP_PORT_ROLE_ROLE] = BUILD_POLICY(U32), +}; + +DEFINE_POLICY_SET(rtnl_bridge_mrp_port_role); + +static const NLAPolicy rtnl_bridge_mrp_ring_state_policies[] = { + [IFLA_BRIDGE_MRP_RING_STATE_RING_ID] = BUILD_POLICY(U32), + [IFLA_BRIDGE_MRP_RING_STATE_STATE] = BUILD_POLICY(U32), +}; + +DEFINE_POLICY_SET(rtnl_bridge_mrp_ring_state); + +static const NLAPolicy rtnl_bridge_mrp_ring_role_policies[] = { + [IFLA_BRIDGE_MRP_RING_ROLE_RING_ID] = BUILD_POLICY(U32), + [IFLA_BRIDGE_MRP_RING_ROLE_ROLE] = BUILD_POLICY(U32), +}; + +DEFINE_POLICY_SET(rtnl_bridge_mrp_ring_role); + +static const NLAPolicy rtnl_bridge_mrp_start_test_policies[] = { + [IFLA_BRIDGE_MRP_START_TEST_RING_ID] = BUILD_POLICY(U32), + [IFLA_BRIDGE_MRP_START_TEST_INTERVAL] = BUILD_POLICY(U32), + [IFLA_BRIDGE_MRP_START_TEST_MAX_MISS] = BUILD_POLICY(U32), + [IFLA_BRIDGE_MRP_START_TEST_PERIOD] = BUILD_POLICY(U32), + [IFLA_BRIDGE_MRP_START_TEST_MONITOR] = BUILD_POLICY(U32), +}; + +DEFINE_POLICY_SET(rtnl_bridge_mrp_start_test); + +static const NLAPolicy rtnl_bridge_mrp_info_policies[] = { + [IFLA_BRIDGE_MRP_INFO_RING_ID] = BUILD_POLICY(U32), + [IFLA_BRIDGE_MRP_INFO_P_IFINDEX] = BUILD_POLICY(U32), + [IFLA_BRIDGE_MRP_INFO_S_IFINDEX] = BUILD_POLICY(U32), + [IFLA_BRIDGE_MRP_INFO_PRIO] = BUILD_POLICY(U16), + [IFLA_BRIDGE_MRP_INFO_RING_STATE] = BUILD_POLICY(U32), + [IFLA_BRIDGE_MRP_INFO_RING_ROLE] = BUILD_POLICY(U32), + [IFLA_BRIDGE_MRP_INFO_TEST_INTERVAL] = BUILD_POLICY(U32), + [IFLA_BRIDGE_MRP_INFO_TEST_MAX_MISS] = BUILD_POLICY(U32), + [IFLA_BRIDGE_MRP_INFO_TEST_MONITOR] = BUILD_POLICY(U32), + [IFLA_BRIDGE_MRP_INFO_I_IFINDEX] = BUILD_POLICY(U32), + [IFLA_BRIDGE_MRP_INFO_IN_STATE] = BUILD_POLICY(U32), + [IFLA_BRIDGE_MRP_INFO_IN_ROLE] = BUILD_POLICY(U32), + [IFLA_BRIDGE_MRP_INFO_IN_TEST_INTERVAL] = BUILD_POLICY(U32), + [IFLA_BRIDGE_MRP_INFO_IN_TEST_MAX_MISS] = BUILD_POLICY(U32), +}; + +DEFINE_POLICY_SET(rtnl_bridge_mrp_info); + +static const NLAPolicy rtnl_bridge_mrp_in_role_policies[] = { + [IFLA_BRIDGE_MRP_IN_ROLE_RING_ID] = BUILD_POLICY(U32), + [IFLA_BRIDGE_MRP_IN_ROLE_IN_ID] = BUILD_POLICY(U16), + [IFLA_BRIDGE_MRP_IN_ROLE_ROLE] = BUILD_POLICY(U32), + [IFLA_BRIDGE_MRP_IN_ROLE_I_IFINDEX] = BUILD_POLICY(U32), +}; + +DEFINE_POLICY_SET(rtnl_bridge_mrp_in_role); + +static const NLAPolicy rtnl_bridge_mrp_in_state_policies[] = { + [IFLA_BRIDGE_MRP_IN_STATE_IN_ID] = BUILD_POLICY(U32), + [IFLA_BRIDGE_MRP_IN_STATE_STATE] = BUILD_POLICY(U32), +}; + +DEFINE_POLICY_SET(rtnl_bridge_mrp_in_state); + +static const NLAPolicy rtnl_bridge_mrp_start_in_test_policies[] = { + [IFLA_BRIDGE_MRP_START_IN_TEST_IN_ID] = BUILD_POLICY(U32), + [IFLA_BRIDGE_MRP_START_IN_TEST_INTERVAL] = BUILD_POLICY(U32), + [IFLA_BRIDGE_MRP_START_IN_TEST_MAX_MISS] = BUILD_POLICY(U32), + [IFLA_BRIDGE_MRP_START_IN_TEST_PERIOD] = BUILD_POLICY(U32), +}; + +DEFINE_POLICY_SET(rtnl_bridge_mrp_start_in_test); + +static const NLAPolicy rtnl_bridge_mrp_policies[] = { + [IFLA_BRIDGE_MRP_INSTANCE] = BUILD_POLICY_NESTED(rtnl_bridge_mrp_instance), + [IFLA_BRIDGE_MRP_PORT_STATE] = BUILD_POLICY_NESTED(rtnl_bridge_mrp_port_state), + [IFLA_BRIDGE_MRP_PORT_ROLE] = BUILD_POLICY_NESTED(rtnl_bridge_mrp_port_role), + [IFLA_BRIDGE_MRP_RING_STATE] = BUILD_POLICY_NESTED(rtnl_bridge_mrp_ring_state), + [IFLA_BRIDGE_MRP_RING_ROLE] = BUILD_POLICY_NESTED(rtnl_bridge_mrp_ring_role), + [IFLA_BRIDGE_MRP_START_TEST] = BUILD_POLICY_NESTED(rtnl_bridge_mrp_start_test), + [IFLA_BRIDGE_MRP_INFO] = BUILD_POLICY_NESTED(rtnl_bridge_mrp_info), + [IFLA_BRIDGE_MRP_IN_ROLE] = BUILD_POLICY_NESTED(rtnl_bridge_mrp_in_role), + [IFLA_BRIDGE_MRP_IN_STATE] = BUILD_POLICY_NESTED(rtnl_bridge_mrp_in_state), + [IFLA_BRIDGE_MRP_START_IN_TEST] = BUILD_POLICY_NESTED(rtnl_bridge_mrp_start_in_test), +}; + +DEFINE_POLICY_SET(rtnl_bridge_mrp); + +static const NLAPolicy rtnl_bridge_cfm_mep_create_policies[] = { + [IFLA_BRIDGE_CFM_MEP_CREATE_INSTANCE] = BUILD_POLICY(U32), + [IFLA_BRIDGE_CFM_MEP_CREATE_DOMAIN] = BUILD_POLICY(U32), + [IFLA_BRIDGE_CFM_MEP_CREATE_DIRECTION] = BUILD_POLICY(U32), + [IFLA_BRIDGE_CFM_MEP_CREATE_IFINDEX] = BUILD_POLICY(U32), +}; + +DEFINE_POLICY_SET(rtnl_bridge_cfm_mep_create); + +static const NLAPolicy rtnl_bridge_cfm_mep_delete_policies[] = { + [IFLA_BRIDGE_CFM_MEP_DELETE_INSTANCE] = BUILD_POLICY(U32), +}; + +DEFINE_POLICY_SET(rtnl_bridge_cfm_mep_delete); + +static const NLAPolicy rtnl_bridge_cfm_mep_config_policies[] = { + [IFLA_BRIDGE_CFM_MEP_CONFIG_INSTANCE] = BUILD_POLICY(U32), + [IFLA_BRIDGE_CFM_MEP_CONFIG_UNICAST_MAC] = BUILD_POLICY_WITH_SIZE(ETHER_ADDR, ETH_ALEN), + [IFLA_BRIDGE_CFM_MEP_CONFIG_MDLEVEL] = BUILD_POLICY(U32), + [IFLA_BRIDGE_CFM_MEP_CONFIG_MEPID] = BUILD_POLICY(U32), +}; + +DEFINE_POLICY_SET(rtnl_bridge_cfm_mep_config); + +static const NLAPolicy rtnl_bridge_cfm_cc_config_policies[] = { + [IFLA_BRIDGE_CFM_CC_CONFIG_INSTANCE] = BUILD_POLICY(U32), + [IFLA_BRIDGE_CFM_CC_CONFIG_ENABLE] = BUILD_POLICY(U32), + [IFLA_BRIDGE_CFM_CC_CONFIG_EXP_INTERVAL] = BUILD_POLICY(U32), + [IFLA_BRIDGE_CFM_CC_CONFIG_EXP_MAID] = BUILD_POLICY_WITH_SIZE(BINARY, CFM_MAID_LENGTH), +}; + +DEFINE_POLICY_SET(rtnl_bridge_cfm_cc_config); + +static const NLAPolicy rtnl_bridge_cfm_cc_peer_mep_policies[] = { + [IFLA_BRIDGE_CFM_CC_PEER_MEP_INSTANCE] = BUILD_POLICY(U32), + [IFLA_BRIDGE_CFM_CC_PEER_MEPID] = BUILD_POLICY(U32), +}; + +DEFINE_POLICY_SET(rtnl_bridge_cfm_cc_peer_mep); + +static const NLAPolicy rtnl_bridge_cfm_cc_rdi_policies[] = { + [IFLA_BRIDGE_CFM_CC_RDI_INSTANCE] = BUILD_POLICY(U32), + [IFLA_BRIDGE_CFM_CC_RDI_RDI] = BUILD_POLICY(U32), +}; + +DEFINE_POLICY_SET(rtnl_bridge_cfm_cc_rdi); + +static const NLAPolicy rtnl_bridge_cfm_cc_ccm_tx_policies[] = { + [IFLA_BRIDGE_CFM_CC_CCM_TX_INSTANCE] = BUILD_POLICY(U32), + [IFLA_BRIDGE_CFM_CC_CCM_TX_DMAC] = BUILD_POLICY_WITH_SIZE(ETHER_ADDR, ETH_ALEN), + [IFLA_BRIDGE_CFM_CC_CCM_TX_SEQ_NO_UPDATE] = BUILD_POLICY(U32), + [IFLA_BRIDGE_CFM_CC_CCM_TX_PERIOD] = BUILD_POLICY(U32), + [IFLA_BRIDGE_CFM_CC_CCM_TX_IF_TLV] = BUILD_POLICY(U32), + [IFLA_BRIDGE_CFM_CC_CCM_TX_IF_TLV_VALUE] = BUILD_POLICY(U8), + [IFLA_BRIDGE_CFM_CC_CCM_TX_PORT_TLV] = BUILD_POLICY(U32), + [IFLA_BRIDGE_CFM_CC_CCM_TX_PORT_TLV_VALUE] = BUILD_POLICY(U8), +}; + +DEFINE_POLICY_SET(rtnl_bridge_cfm_cc_ccm_tx); + +static const NLAPolicy rtnl_bridge_cfm_mep_status_policies[] = { + [IFLA_BRIDGE_CFM_MEP_STATUS_INSTANCE] = BUILD_POLICY(U32), + [IFLA_BRIDGE_CFM_MEP_STATUS_OPCODE_UNEXP_SEEN] = BUILD_POLICY(U32), + [IFLA_BRIDGE_CFM_MEP_STATUS_VERSION_UNEXP_SEEN] = BUILD_POLICY(U32), + [IFLA_BRIDGE_CFM_MEP_STATUS_RX_LEVEL_LOW_SEEN] = BUILD_POLICY(U32), +}; + +DEFINE_POLICY_SET(rtnl_bridge_cfm_mep_status); + +static const NLAPolicy rtnl_bridge_cfm_cc_peer_status_policies[] = { + [IFLA_BRIDGE_CFM_CC_PEER_STATUS_INSTANCE] = BUILD_POLICY(U32), + [IFLA_BRIDGE_CFM_CC_PEER_STATUS_PEER_MEPID] = BUILD_POLICY(U32), + [IFLA_BRIDGE_CFM_CC_PEER_STATUS_CCM_DEFECT] = BUILD_POLICY(U32), + [IFLA_BRIDGE_CFM_CC_PEER_STATUS_RDI] = BUILD_POLICY(U32), + [IFLA_BRIDGE_CFM_CC_PEER_STATUS_PORT_TLV_VALUE] = BUILD_POLICY(U8), + [IFLA_BRIDGE_CFM_CC_PEER_STATUS_IF_TLV_VALUE] = BUILD_POLICY(U8), + [IFLA_BRIDGE_CFM_CC_PEER_STATUS_SEEN] = BUILD_POLICY(U32), + [IFLA_BRIDGE_CFM_CC_PEER_STATUS_TLV_SEEN] = BUILD_POLICY(U32), + [IFLA_BRIDGE_CFM_CC_PEER_STATUS_SEQ_UNEXP_SEEN] = BUILD_POLICY(U32), +}; + +DEFINE_POLICY_SET(rtnl_bridge_cfm_cc_peer_status); + +static const NLAPolicy rtnl_bridge_cfm_policies[] = { + [IFLA_BRIDGE_CFM_MEP_CREATE] = BUILD_POLICY_NESTED(rtnl_bridge_cfm_mep_create), + [IFLA_BRIDGE_CFM_MEP_DELETE] = BUILD_POLICY_NESTED(rtnl_bridge_cfm_mep_delete), + [IFLA_BRIDGE_CFM_MEP_CONFIG] = BUILD_POLICY_NESTED(rtnl_bridge_cfm_mep_config), + [IFLA_BRIDGE_CFM_CC_CONFIG] = BUILD_POLICY_NESTED(rtnl_bridge_cfm_cc_config), + [IFLA_BRIDGE_CFM_CC_PEER_MEP_ADD] = BUILD_POLICY_NESTED(rtnl_bridge_cfm_cc_peer_mep), + [IFLA_BRIDGE_CFM_CC_PEER_MEP_REMOVE] = BUILD_POLICY_NESTED(rtnl_bridge_cfm_cc_peer_mep), + [IFLA_BRIDGE_CFM_CC_RDI] = BUILD_POLICY_NESTED(rtnl_bridge_cfm_cc_rdi), + [IFLA_BRIDGE_CFM_CC_CCM_TX] = BUILD_POLICY_NESTED(rtnl_bridge_cfm_cc_ccm_tx), + [IFLA_BRIDGE_CFM_MEP_CREATE_INFO] = BUILD_POLICY_NESTED(rtnl_bridge_cfm_mep_create), + [IFLA_BRIDGE_CFM_MEP_CONFIG_INFO] = BUILD_POLICY_NESTED(rtnl_bridge_cfm_mep_config), + [IFLA_BRIDGE_CFM_CC_CONFIG_INFO] = BUILD_POLICY_NESTED(rtnl_bridge_cfm_cc_config), + [IFLA_BRIDGE_CFM_CC_RDI_INFO] = BUILD_POLICY_NESTED(rtnl_bridge_cfm_cc_rdi), + [IFLA_BRIDGE_CFM_CC_CCM_TX_INFO] = BUILD_POLICY_NESTED(rtnl_bridge_cfm_cc_ccm_tx), + [IFLA_BRIDGE_CFM_CC_PEER_MEP_INFO] = BUILD_POLICY_NESTED(rtnl_bridge_cfm_cc_peer_mep), + [IFLA_BRIDGE_CFM_MEP_STATUS_INFO] = BUILD_POLICY_NESTED(rtnl_bridge_cfm_mep_status), + [IFLA_BRIDGE_CFM_CC_PEER_STATUS_INFO] = BUILD_POLICY_NESTED(rtnl_bridge_cfm_cc_peer_status), +}; + +DEFINE_POLICY_SET(rtnl_bridge_cfm); + +static const NLAPolicy rtnl_af_spec_bridge_policies[] = { + [IFLA_BRIDGE_FLAGS] = BUILD_POLICY(U16), + [IFLA_BRIDGE_MODE] = BUILD_POLICY(U16), + [IFLA_BRIDGE_VLAN_INFO] = BUILD_POLICY_WITH_SIZE(BINARY, sizeof(struct bridge_vlan_info)), + [IFLA_BRIDGE_VLAN_TUNNEL_INFO] = BUILD_POLICY_NESTED(rtnl_bridge_vlan_tunnel_info), + [IFLA_BRIDGE_MRP] = BUILD_POLICY_NESTED(rtnl_bridge_mrp), + [IFLA_BRIDGE_CFM] = BUILD_POLICY_NESTED(rtnl_bridge_cfm), +}; + +static const NLAPolicySetUnionElement rtnl_af_spec_policy_set_union_elements[] = { + BUILD_UNION_ELEMENT_BY_FAMILY(AF_UNSPEC, rtnl_af_spec_unspec), + BUILD_UNION_ELEMENT_BY_FAMILY(AF_BRIDGE, rtnl_af_spec_bridge), +}; + +DEFINE_POLICY_SET_UNION(rtnl_af_spec, 0); + +static const NLAPolicy rtnl_prop_list_policies[] = { + [IFLA_ALT_IFNAME] = BUILD_POLICY_WITH_SIZE(STRING, ALTIFNAMSIZ - 1), +}; + +DEFINE_POLICY_SET(rtnl_prop_list); + +static const NLAPolicy rtnl_vf_vlan_list_policies[] = { + [IFLA_VF_VLAN_INFO] = BUILD_POLICY_WITH_SIZE(BINARY, sizeof(struct ifla_vf_vlan_info)), +}; + +DEFINE_POLICY_SET(rtnl_vf_vlan_list); + +static const NLAPolicy rtnl_vf_info_policies[] = { + [IFLA_VF_MAC] = BUILD_POLICY_WITH_SIZE(BINARY, sizeof(struct ifla_vf_mac)), + [IFLA_VF_VLAN] = BUILD_POLICY_WITH_SIZE(BINARY, sizeof(struct ifla_vf_vlan)), + [IFLA_VF_VLAN_LIST] = BUILD_POLICY_NESTED(rtnl_vf_vlan_list), + [IFLA_VF_TX_RATE] = BUILD_POLICY_WITH_SIZE(BINARY, sizeof(struct ifla_vf_tx_rate)), + [IFLA_VF_SPOOFCHK] = BUILD_POLICY_WITH_SIZE(BINARY, sizeof(struct ifla_vf_spoofchk)), + [IFLA_VF_RATE] = BUILD_POLICY_WITH_SIZE(BINARY, sizeof(struct ifla_vf_rate)), + [IFLA_VF_LINK_STATE] = BUILD_POLICY_WITH_SIZE(BINARY, sizeof(struct ifla_vf_link_state)), + [IFLA_VF_RSS_QUERY_EN] = BUILD_POLICY_WITH_SIZE(BINARY, sizeof(struct ifla_vf_rss_query_en)), + [IFLA_VF_TRUST] = BUILD_POLICY_WITH_SIZE(BINARY, sizeof(struct ifla_vf_trust)), + [IFLA_VF_IB_NODE_GUID] = BUILD_POLICY_WITH_SIZE(BINARY, sizeof(struct ifla_vf_guid)), + [IFLA_VF_IB_PORT_GUID] = BUILD_POLICY_WITH_SIZE(BINARY, sizeof(struct ifla_vf_guid)), +}; + +DEFINE_POLICY_SET(rtnl_vf_info); + +static const NLAPolicy rtnl_vfinfo_list_policies[] = { + [IFLA_VF_INFO] = BUILD_POLICY_NESTED(rtnl_vf_info), +}; + +DEFINE_POLICY_SET(rtnl_vfinfo_list); + +static const NLAPolicy rtnl_vf_port_policies[] = { + [IFLA_PORT_VF] = BUILD_POLICY(U32), + [IFLA_PORT_PROFILE] = BUILD_POLICY(STRING), + [IFLA_PORT_VSI_TYPE] = BUILD_POLICY_WITH_SIZE(BINARY, sizeof(struct ifla_port_vsi)), + [IFLA_PORT_INSTANCE_UUID] = BUILD_POLICY_WITH_SIZE(BINARY, PORT_UUID_MAX), + [IFLA_PORT_HOST_UUID] = BUILD_POLICY_WITH_SIZE(BINARY, PORT_UUID_MAX), + [IFLA_PORT_REQUEST] = BUILD_POLICY(U8), + [IFLA_PORT_RESPONSE] = BUILD_POLICY(U16), +}; + +DEFINE_POLICY_SET(rtnl_vf_port); + +static const NLAPolicy rtnl_vf_ports_policies[] = { + [IFLA_VF_PORT] = BUILD_POLICY_NESTED(rtnl_vf_port), +}; + +DEFINE_POLICY_SET(rtnl_vf_ports); + +static const NLAPolicy rtnl_xdp_policies[] = { + [IFLA_XDP_FD] = BUILD_POLICY(S32), + [IFLA_XDP_ATTACHED] = BUILD_POLICY(U8), + [IFLA_XDP_FLAGS] = BUILD_POLICY(U32), + [IFLA_XDP_PROG_ID] = BUILD_POLICY(U32), + [IFLA_XDP_DRV_PROG_ID] = BUILD_POLICY(U32), + [IFLA_XDP_SKB_PROG_ID] = BUILD_POLICY(U32), + [IFLA_XDP_HW_PROG_ID] = BUILD_POLICY(U32), + [IFLA_XDP_EXPECTED_FD] = BUILD_POLICY(S32), +}; + +DEFINE_POLICY_SET(rtnl_xdp); + +static const NLAPolicy rtnl_proto_down_reason_policies[] = { + [IFLA_PROTO_DOWN_REASON_MASK] = BUILD_POLICY(U32), + [IFLA_PROTO_DOWN_REASON_VALUE] = BUILD_POLICY(U32), +}; + +DEFINE_POLICY_SET(rtnl_proto_down_reason); + +static const NLAPolicy rtnl_link_policies[] = { + [IFLA_ADDRESS] = BUILD_POLICY(ETHER_ADDR), + [IFLA_BROADCAST] = BUILD_POLICY(ETHER_ADDR), + [IFLA_IFNAME] = BUILD_POLICY_WITH_SIZE(STRING, IFNAMSIZ - 1), + [IFLA_MTU] = BUILD_POLICY(U32), + [IFLA_LINK] = BUILD_POLICY(U32), + [IFLA_QDISC] = BUILD_POLICY(STRING), + [IFLA_STATS] = BUILD_POLICY_WITH_SIZE(BINARY, sizeof(struct rtnl_link_stats)), + [IFLA_COST] = { /* Not used. */ }, + [IFLA_PRIORITY] = { /* Not used. */ }, + [IFLA_MASTER] = BUILD_POLICY(U32), + [IFLA_WIRELESS] = { /* Used only by wext. */ }, + [IFLA_PROTINFO] = BUILD_POLICY_NESTED_UNION_BY_FAMILY(rtnl_prot_info), + [IFLA_TXQLEN] = BUILD_POLICY(U32), + [IFLA_MAP] = BUILD_POLICY_WITH_SIZE(BINARY, sizeof(struct rtnl_link_ifmap)), + [IFLA_WEIGHT] = BUILD_POLICY(U32), + [IFLA_OPERSTATE] = BUILD_POLICY(U8), + [IFLA_LINKMODE] = BUILD_POLICY(U8), + [IFLA_LINKINFO] = BUILD_POLICY_NESTED(rtnl_link_info), + [IFLA_NET_NS_PID] = BUILD_POLICY(U32), + [IFLA_IFALIAS] = BUILD_POLICY_WITH_SIZE(STRING, IFALIASZ - 1), + [IFLA_NUM_VF] = BUILD_POLICY(U32), + [IFLA_VFINFO_LIST] = BUILD_POLICY_NESTED(rtnl_vfinfo_list), + [IFLA_STATS64] = BUILD_POLICY_WITH_SIZE(BINARY, sizeof(struct rtnl_link_stats64)), + [IFLA_VF_PORTS] = BUILD_POLICY_NESTED(rtnl_vf_ports), + [IFLA_PORT_SELF] = BUILD_POLICY_NESTED(rtnl_vf_port), + [IFLA_AF_SPEC] = BUILD_POLICY_NESTED_UNION_BY_FAMILY(rtnl_af_spec), + [IFLA_GROUP] = BUILD_POLICY(U32), + [IFLA_NET_NS_FD] = BUILD_POLICY(U32), + [IFLA_EXT_MASK] = BUILD_POLICY(U32), + [IFLA_PROMISCUITY] = BUILD_POLICY(U32), + [IFLA_NUM_TX_QUEUES] = BUILD_POLICY(U32), + [IFLA_NUM_RX_QUEUES] = BUILD_POLICY(U32), + [IFLA_CARRIER] = BUILD_POLICY(U8), + [IFLA_PHYS_PORT_ID] = BUILD_POLICY_WITH_SIZE(BINARY, MAX_PHYS_ITEM_ID_LEN), + [IFLA_CARRIER_CHANGES] = BUILD_POLICY(U32), + [IFLA_PHYS_SWITCH_ID] = BUILD_POLICY_WITH_SIZE(BINARY, MAX_PHYS_ITEM_ID_LEN), + [IFLA_LINK_NETNSID] = BUILD_POLICY(S32), + [IFLA_PHYS_PORT_NAME] = BUILD_POLICY_WITH_SIZE(STRING, IFNAMSIZ - 1), + [IFLA_PROTO_DOWN] = BUILD_POLICY(U8), + [IFLA_GSO_MAX_SEGS] = BUILD_POLICY(U32), + [IFLA_GSO_MAX_SIZE] = BUILD_POLICY(U32), + [IFLA_XDP] = BUILD_POLICY_NESTED(rtnl_xdp), + [IFLA_EVENT] = BUILD_POLICY(U32), + [IFLA_NEW_NETNSID] = BUILD_POLICY(S32), + [IFLA_TARGET_NETNSID] = BUILD_POLICY(S32), + [IFLA_CARRIER_UP_COUNT] = BUILD_POLICY(U32), + [IFLA_CARRIER_DOWN_COUNT] = BUILD_POLICY(U32), + [IFLA_NEW_IFINDEX] = BUILD_POLICY(S32), + [IFLA_MIN_MTU] = BUILD_POLICY(U32), + [IFLA_MAX_MTU] = BUILD_POLICY(U32), + [IFLA_PROP_LIST] = BUILD_POLICY_NESTED(rtnl_prop_list), + [IFLA_ALT_IFNAME] = BUILD_POLICY_WITH_SIZE(STRING, ALTIFNAMSIZ - 1), + [IFLA_PERM_ADDRESS] = BUILD_POLICY(ETHER_ADDR), + [IFLA_PROTO_DOWN_REASON] = BUILD_POLICY_NESTED(rtnl_proto_down_reason), + [IFLA_PARENT_DEV_NAME] = BUILD_POLICY(STRING), + [IFLA_PARENT_DEV_BUS_NAME] = BUILD_POLICY(STRING), +}; + +DEFINE_POLICY_SET(rtnl_link); + +/* IFA_FLAGS was defined in kernel 3.14, but we still support older + * kernels where IFA_MAX is lower. */ +static const NLAPolicy rtnl_address_policies[] = { + [IFA_ADDRESS] = BUILD_POLICY(IN_ADDR), + [IFA_LOCAL] = BUILD_POLICY(IN_ADDR), + [IFA_LABEL] = BUILD_POLICY_WITH_SIZE(STRING, IFNAMSIZ - 1), + [IFA_BROADCAST] = BUILD_POLICY(IN_ADDR), + [IFA_ANYCAST] = BUILD_POLICY_WITH_SIZE(IN_ADDR, sizeof(struct in6_addr)), + [IFA_CACHEINFO] = BUILD_POLICY_WITH_SIZE(CACHE_INFO, sizeof(struct ifa_cacheinfo)), + [IFA_MULTICAST] = BUILD_POLICY_WITH_SIZE(IN_ADDR, sizeof(struct in6_addr)), + [IFA_FLAGS] = BUILD_POLICY(U32), + [IFA_RT_PRIORITY] = BUILD_POLICY(U32), + [IFA_TARGET_NETNSID] = BUILD_POLICY(S32), +}; + +DEFINE_POLICY_SET(rtnl_address); + +/* RTM_METRICS --- array of struct rtattr with types of RTAX_* */ + +static const NLAPolicy rtnl_route_metrics_policies[] = { + [RTAX_MTU] = BUILD_POLICY(U32), + [RTAX_WINDOW] = BUILD_POLICY(U32), + [RTAX_RTT] = BUILD_POLICY(U32), + [RTAX_RTTVAR] = BUILD_POLICY(U32), + [RTAX_SSTHRESH] = BUILD_POLICY(U32), + [RTAX_CWND] = BUILD_POLICY(U32), + [RTAX_ADVMSS] = BUILD_POLICY(U32), + [RTAX_REORDERING] = BUILD_POLICY(U32), + [RTAX_HOPLIMIT] = BUILD_POLICY(U32), + [RTAX_INITCWND] = BUILD_POLICY(U32), + [RTAX_FEATURES] = BUILD_POLICY(U32), + [RTAX_RTO_MIN] = BUILD_POLICY(U32), + [RTAX_INITRWND] = BUILD_POLICY(U32), + [RTAX_QUICKACK] = BUILD_POLICY(U32), + [RTAX_CC_ALGO] = BUILD_POLICY(STRING), + [RTAX_FASTOPEN_NO_COOKIE] = BUILD_POLICY(U32), +}; + +DEFINE_POLICY_SET(rtnl_route_metrics); + +static const NLAPolicy rtnl_route_policies[] = { + [RTA_DST] = BUILD_POLICY(IN_ADDR), + [RTA_SRC] = BUILD_POLICY(IN_ADDR), + [RTA_IIF] = BUILD_POLICY(U32), + [RTA_OIF] = BUILD_POLICY(U32), + [RTA_GATEWAY] = BUILD_POLICY(IN_ADDR), + [RTA_PRIORITY] = BUILD_POLICY(U32), + [RTA_PREFSRC] = BUILD_POLICY(IN_ADDR), + [RTA_METRICS] = BUILD_POLICY_NESTED(rtnl_route_metrics), + [RTA_MULTIPATH] = BUILD_POLICY_WITH_SIZE(BINARY, sizeof(struct rtnexthop)), + [RTA_FLOW] = BUILD_POLICY(U32), + [RTA_CACHEINFO] = BUILD_POLICY_WITH_SIZE(BINARY, sizeof(struct rta_cacheinfo)), + [RTA_TABLE] = BUILD_POLICY(U32), + [RTA_MARK] = BUILD_POLICY(U32), + [RTA_MFC_STATS] = BUILD_POLICY(U64), + [RTA_VIA] = BUILD_POLICY(BINARY), /* See struct rtvia */ + [RTA_NEWDST] = BUILD_POLICY(U32), + [RTA_PREF] = BUILD_POLICY(U8), + [RTA_ENCAP_TYPE] = BUILD_POLICY(U16), + [RTA_ENCAP] = { .type = NETLINK_TYPE_NESTED }, /* Multiple type systems i.e. LWTUNNEL_ENCAP_MPLS/LWTUNNEL_ENCAP_IP/LWTUNNEL_ENCAP_ILA etc... */ + [RTA_EXPIRES] = BUILD_POLICY(U32), + [RTA_UID] = BUILD_POLICY(U32), + [RTA_TTL_PROPAGATE] = BUILD_POLICY(U8), + [RTA_IP_PROTO] = BUILD_POLICY(U8), + [RTA_SPORT] = BUILD_POLICY(U16), + [RTA_DPORT] = BUILD_POLICY(U16), + [RTA_NH_ID] = BUILD_POLICY(U32), +}; + +DEFINE_POLICY_SET(rtnl_route); + +static const NLAPolicy rtnl_neigh_policies[] = { + [NDA_DST] = BUILD_POLICY(IN_ADDR), + [NDA_LLADDR] = BUILD_POLICY(ETHER_ADDR), + [NDA_CACHEINFO] = BUILD_POLICY_WITH_SIZE(BINARY, sizeof(struct nda_cacheinfo)), + [NDA_PROBES] = BUILD_POLICY(U32), + [NDA_VLAN] = BUILD_POLICY(U16), + [NDA_PORT] = BUILD_POLICY(U16), + [NDA_VNI] = BUILD_POLICY(U32), + [NDA_IFINDEX] = BUILD_POLICY(U32), +}; + +DEFINE_POLICY_SET(rtnl_neigh); + +static const NLAPolicy rtnl_addrlabel_policies[] = { + [IFAL_ADDRESS] = BUILD_POLICY_WITH_SIZE(IN_ADDR, sizeof(struct in6_addr)), + [IFAL_LABEL] = BUILD_POLICY(U32), +}; + +DEFINE_POLICY_SET(rtnl_addrlabel); + +static const NLAPolicy rtnl_routing_policy_rule_policies[] = { + [FRA_DST] = BUILD_POLICY(IN_ADDR), + [FRA_SRC] = BUILD_POLICY(IN_ADDR), + [FRA_IIFNAME] = BUILD_POLICY(STRING), + [FRA_GOTO] = BUILD_POLICY(U32), + [FRA_PRIORITY] = BUILD_POLICY(U32), + [FRA_FWMARK] = BUILD_POLICY(U32), + [FRA_FLOW] = BUILD_POLICY(U32), + [FRA_TUN_ID] = BUILD_POLICY(U64), + [FRA_SUPPRESS_IFGROUP] = BUILD_POLICY(U32), + [FRA_SUPPRESS_PREFIXLEN] = BUILD_POLICY(U32), + [FRA_TABLE] = BUILD_POLICY(U32), + [FRA_FWMASK] = BUILD_POLICY(U32), + [FRA_OIFNAME] = BUILD_POLICY(STRING), + [FRA_PAD] = BUILD_POLICY(U32), + [FRA_L3MDEV] = BUILD_POLICY(U8), + [FRA_UID_RANGE] = BUILD_POLICY_WITH_SIZE(BINARY, sizeof(struct fib_rule_uid_range)), + [FRA_PROTOCOL] = BUILD_POLICY(U8), + [FRA_IP_PROTO] = BUILD_POLICY(U8), + [FRA_SPORT_RANGE] = BUILD_POLICY_WITH_SIZE(BINARY, sizeof(struct fib_rule_port_range)), + [FRA_DPORT_RANGE] = BUILD_POLICY_WITH_SIZE(BINARY, sizeof(struct fib_rule_port_range)), +}; + +DEFINE_POLICY_SET(rtnl_routing_policy_rule); + +static const NLAPolicy rtnl_nexthop_policies[] = { + [NHA_ID] = BUILD_POLICY(U32), + [NHA_GROUP] = { /* array of struct nexthop_grp */ }, + [NHA_GROUP_TYPE] = BUILD_POLICY(U16), + [NHA_BLACKHOLE] = BUILD_POLICY(FLAG), + [NHA_OIF] = BUILD_POLICY(U32), + [NHA_GATEWAY] = BUILD_POLICY(IN_ADDR), + [NHA_ENCAP_TYPE] = BUILD_POLICY(U16), + [NHA_ENCAP] = { .type = NETLINK_TYPE_NESTED }, + [NHA_GROUPS] = BUILD_POLICY(FLAG), + [NHA_MASTER] = BUILD_POLICY(U32), + [NHA_FDB] = BUILD_POLICY(FLAG), +}; + +DEFINE_POLICY_SET(rtnl_nexthop); + +static const NLAPolicy rtnl_tca_option_data_cake_policies[] = { + [TCA_CAKE_BASE_RATE64] = BUILD_POLICY(U64), + [TCA_CAKE_DIFFSERV_MODE] = BUILD_POLICY(U32), + [TCA_CAKE_ATM] = BUILD_POLICY(U32), + [TCA_CAKE_FLOW_MODE] = BUILD_POLICY(U32), + [TCA_CAKE_OVERHEAD] = BUILD_POLICY(S32), + [TCA_CAKE_RTT] = BUILD_POLICY(U32), + [TCA_CAKE_TARGET] = BUILD_POLICY(U32), + [TCA_CAKE_AUTORATE] = BUILD_POLICY(U32), + [TCA_CAKE_MEMORY] = BUILD_POLICY(U32), + [TCA_CAKE_NAT] = BUILD_POLICY(U32), + [TCA_CAKE_RAW] = BUILD_POLICY(U32), + [TCA_CAKE_WASH] = BUILD_POLICY(U32), + [TCA_CAKE_MPU] = BUILD_POLICY(U32), + [TCA_CAKE_INGRESS] = BUILD_POLICY(U32), + [TCA_CAKE_ACK_FILTER] = BUILD_POLICY(U32), + [TCA_CAKE_SPLIT_GSO] = BUILD_POLICY(U32), + [TCA_CAKE_FWMARK] = BUILD_POLICY(U32), +}; + +static const NLAPolicy rtnl_tca_option_data_codel_policies[] = { + [TCA_CODEL_TARGET] = BUILD_POLICY(U32), + [TCA_CODEL_LIMIT] = BUILD_POLICY(U32), + [TCA_CODEL_INTERVAL] = BUILD_POLICY(U32), + [TCA_CODEL_ECN] = BUILD_POLICY(U32), + [TCA_CODEL_CE_THRESHOLD] = BUILD_POLICY(U32), +}; + +static const NLAPolicy rtnl_tca_option_data_drr_policies[] = { + [TCA_DRR_QUANTUM] = BUILD_POLICY(U32), +}; + +static const NLAPolicy rtnl_tca_option_data_ets_quanta_policies[] = { + [TCA_ETS_QUANTA_BAND] = BUILD_POLICY(U32), +}; + +DEFINE_POLICY_SET(rtnl_tca_option_data_ets_quanta); + +static const NLAPolicy rtnl_tca_option_data_ets_prio_policies[] = { + [TCA_ETS_PRIOMAP_BAND] = BUILD_POLICY(U8), +}; + +DEFINE_POLICY_SET(rtnl_tca_option_data_ets_prio); + +static const NLAPolicy rtnl_tca_option_data_ets_policies[] = { + [TCA_ETS_NBANDS] = BUILD_POLICY(U8), + [TCA_ETS_NSTRICT] = BUILD_POLICY(U8), + [TCA_ETS_QUANTA] = BUILD_POLICY_NESTED(rtnl_tca_option_data_ets_quanta), + [TCA_ETS_PRIOMAP] = BUILD_POLICY_NESTED(rtnl_tca_option_data_ets_prio), + [TCA_ETS_QUANTA_BAND] = BUILD_POLICY(U32), +}; + +static const NLAPolicy rtnl_tca_option_data_fq_policies[] = { + [TCA_FQ_PLIMIT] = BUILD_POLICY(U32), + [TCA_FQ_FLOW_PLIMIT] = BUILD_POLICY(U32), + [TCA_FQ_QUANTUM] = BUILD_POLICY(U32), + [TCA_FQ_INITIAL_QUANTUM] = BUILD_POLICY(U32), + [TCA_FQ_RATE_ENABLE] = BUILD_POLICY(U32), + [TCA_FQ_FLOW_DEFAULT_RATE] = BUILD_POLICY(U32), + [TCA_FQ_FLOW_MAX_RATE] = BUILD_POLICY(U32), + [TCA_FQ_BUCKETS_LOG] = BUILD_POLICY(U32), + [TCA_FQ_FLOW_REFILL_DELAY] = BUILD_POLICY(U32), + [TCA_FQ_LOW_RATE_THRESHOLD] = BUILD_POLICY(U32), + [TCA_FQ_CE_THRESHOLD] = BUILD_POLICY(U32), + [TCA_FQ_ORPHAN_MASK] = BUILD_POLICY(U32), +}; + +static const NLAPolicy rtnl_tca_option_data_fq_codel_policies[] = { + [TCA_FQ_CODEL_TARGET] = BUILD_POLICY(U32), + [TCA_FQ_CODEL_LIMIT] = BUILD_POLICY(U32), + [TCA_FQ_CODEL_INTERVAL] = BUILD_POLICY(U32), + [TCA_FQ_CODEL_ECN] = BUILD_POLICY(U32), + [TCA_FQ_CODEL_FLOWS] = BUILD_POLICY(U32), + [TCA_FQ_CODEL_QUANTUM] = BUILD_POLICY(U32), + [TCA_FQ_CODEL_CE_THRESHOLD] = BUILD_POLICY(U32), + [TCA_FQ_CODEL_DROP_BATCH_SIZE] = BUILD_POLICY(U32), + [TCA_FQ_CODEL_MEMORY_LIMIT] = BUILD_POLICY(U32), +}; + +static const NLAPolicy rtnl_tca_option_data_fq_pie_policies[] = { + [TCA_FQ_PIE_LIMIT] = BUILD_POLICY(U32), +}; + +static const NLAPolicy rtnl_tca_option_data_gred_policies[] = { + [TCA_GRED_DPS] = BUILD_POLICY_WITH_SIZE(BINARY, sizeof(struct tc_gred_sopt)), +}; + +static const NLAPolicy rtnl_tca_option_data_hhf_policies[] = { + [TCA_HHF_BACKLOG_LIMIT] = BUILD_POLICY(U32), +}; + +static const NLAPolicy rtnl_tca_option_data_htb_policies[] = { + [TCA_HTB_PARMS] = BUILD_POLICY_WITH_SIZE(BINARY, sizeof(struct tc_htb_opt)), + [TCA_HTB_INIT] = BUILD_POLICY_WITH_SIZE(BINARY, sizeof(struct tc_htb_glob)), + [TCA_HTB_CTAB] = BUILD_POLICY_WITH_SIZE(BINARY, TC_RTAB_SIZE), + [TCA_HTB_RTAB] = BUILD_POLICY_WITH_SIZE(BINARY, TC_RTAB_SIZE), + [TCA_HTB_RATE64] = BUILD_POLICY(U64), + [TCA_HTB_CEIL64] = BUILD_POLICY(U64), +}; + +static const NLAPolicy rtnl_tca_option_data_pie_policies[] = { + [TCA_PIE_LIMIT] = BUILD_POLICY(U32), +}; + +static const NLAPolicy rtnl_tca_option_data_qfq_policies[] = { + [TCA_QFQ_WEIGHT] = BUILD_POLICY(U32), + [TCA_QFQ_LMAX] = BUILD_POLICY(U32), +}; + +static const NLAPolicy rtnl_tca_option_data_sfb_policies[] = { + [TCA_SFB_PARMS] = BUILD_POLICY_WITH_SIZE(BINARY, sizeof(struct tc_sfb_qopt)), +}; + +static const NLAPolicy rtnl_tca_option_data_tbf_policies[] = { + [TCA_TBF_PARMS] = BUILD_POLICY_WITH_SIZE(BINARY, sizeof(struct tc_tbf_qopt)), + [TCA_TBF_RTAB] = BUILD_POLICY_WITH_SIZE(BINARY, TC_RTAB_SIZE), + [TCA_TBF_PTAB] = BUILD_POLICY_WITH_SIZE(BINARY, TC_RTAB_SIZE), + [TCA_TBF_RATE64] = BUILD_POLICY(U64), + [TCA_TBF_PRATE64] = BUILD_POLICY(U64), + [TCA_TBF_BURST] = BUILD_POLICY(U32), + [TCA_TBF_PBURST] = BUILD_POLICY(U32), +}; + +static const NLAPolicySetUnionElement rtnl_tca_option_data_policy_set_union_elements[] = { + BUILD_UNION_ELEMENT_BY_STRING("cake", rtnl_tca_option_data_cake), + BUILD_UNION_ELEMENT_BY_STRING("codel", rtnl_tca_option_data_codel), + BUILD_UNION_ELEMENT_BY_STRING("drr", rtnl_tca_option_data_drr), + BUILD_UNION_ELEMENT_BY_STRING("ets", rtnl_tca_option_data_ets), + BUILD_UNION_ELEMENT_BY_STRING("fq", rtnl_tca_option_data_fq), + BUILD_UNION_ELEMENT_BY_STRING("fq_codel", rtnl_tca_option_data_fq_codel), + BUILD_UNION_ELEMENT_BY_STRING("fq_pie", rtnl_tca_option_data_fq_pie), + BUILD_UNION_ELEMENT_BY_STRING("gred", rtnl_tca_option_data_gred), + BUILD_UNION_ELEMENT_BY_STRING("hhf", rtnl_tca_option_data_hhf), + BUILD_UNION_ELEMENT_BY_STRING("htb", rtnl_tca_option_data_htb), + BUILD_UNION_ELEMENT_BY_STRING("pie", rtnl_tca_option_data_pie), + BUILD_UNION_ELEMENT_BY_STRING("qfq", rtnl_tca_option_data_qfq), + BUILD_UNION_ELEMENT_BY_STRING("sfb", rtnl_tca_option_data_sfb), + BUILD_UNION_ELEMENT_BY_STRING("tbf", rtnl_tca_option_data_tbf), +}; + +DEFINE_POLICY_SET_UNION(rtnl_tca_option_data, TCA_KIND); + +static const NLAPolicy rtnl_tca_policies[] = { + [TCA_KIND] = BUILD_POLICY(STRING), + [TCA_OPTIONS] = BUILD_POLICY_NESTED_UNION_BY_STRING(rtnl_tca_option_data), + [TCA_INGRESS_BLOCK] = BUILD_POLICY(U32), + [TCA_EGRESS_BLOCK] = BUILD_POLICY(U32), +}; + +DEFINE_POLICY_SET(rtnl_tca); + +static const NLAPolicy rtnl_mdb_policies[] = { + [MDBA_SET_ENTRY] = BUILD_POLICY_WITH_SIZE(BINARY, sizeof(struct br_port_msg)), +}; + +DEFINE_POLICY_SET(rtnl_mdb); + +static const NLAPolicy rtnl_policies[] = { + [RTM_NEWLINK] = BUILD_POLICY_NESTED_WITH_SIZE(rtnl_link, sizeof(struct ifinfomsg)), + [RTM_DELLINK] = BUILD_POLICY_NESTED_WITH_SIZE(rtnl_link, sizeof(struct ifinfomsg)), + [RTM_GETLINK] = BUILD_POLICY_NESTED_WITH_SIZE(rtnl_link, sizeof(struct ifinfomsg)), + [RTM_SETLINK] = BUILD_POLICY_NESTED_WITH_SIZE(rtnl_link, sizeof(struct ifinfomsg)), + [RTM_NEWLINKPROP] = BUILD_POLICY_NESTED_WITH_SIZE(rtnl_link, sizeof(struct ifinfomsg)), + [RTM_DELLINKPROP] = BUILD_POLICY_NESTED_WITH_SIZE(rtnl_link, sizeof(struct ifinfomsg)), + [RTM_GETLINKPROP] = BUILD_POLICY_NESTED_WITH_SIZE(rtnl_link, sizeof(struct ifinfomsg)), + [RTM_NEWADDR] = BUILD_POLICY_NESTED_WITH_SIZE(rtnl_address, sizeof(struct ifaddrmsg)), + [RTM_DELADDR] = BUILD_POLICY_NESTED_WITH_SIZE(rtnl_address, sizeof(struct ifaddrmsg)), + [RTM_GETADDR] = BUILD_POLICY_NESTED_WITH_SIZE(rtnl_address, sizeof(struct ifaddrmsg)), + [RTM_NEWROUTE] = BUILD_POLICY_NESTED_WITH_SIZE(rtnl_route, sizeof(struct rtmsg)), + [RTM_DELROUTE] = BUILD_POLICY_NESTED_WITH_SIZE(rtnl_route, sizeof(struct rtmsg)), + [RTM_GETROUTE] = BUILD_POLICY_NESTED_WITH_SIZE(rtnl_route, sizeof(struct rtmsg)), + [RTM_NEWNEIGH] = BUILD_POLICY_NESTED_WITH_SIZE(rtnl_neigh, sizeof(struct ndmsg)), + [RTM_DELNEIGH] = BUILD_POLICY_NESTED_WITH_SIZE(rtnl_neigh, sizeof(struct ndmsg)), + [RTM_GETNEIGH] = BUILD_POLICY_NESTED_WITH_SIZE(rtnl_neigh, sizeof(struct ndmsg)), + [RTM_NEWADDRLABEL] = BUILD_POLICY_NESTED_WITH_SIZE(rtnl_addrlabel, sizeof(struct ifaddrlblmsg)), + [RTM_DELADDRLABEL] = BUILD_POLICY_NESTED_WITH_SIZE(rtnl_addrlabel, sizeof(struct ifaddrlblmsg)), + [RTM_GETADDRLABEL] = BUILD_POLICY_NESTED_WITH_SIZE(rtnl_addrlabel, sizeof(struct ifaddrlblmsg)), + [RTM_NEWRULE] = BUILD_POLICY_NESTED_WITH_SIZE(rtnl_routing_policy_rule, sizeof(struct fib_rule_hdr)), + [RTM_DELRULE] = BUILD_POLICY_NESTED_WITH_SIZE(rtnl_routing_policy_rule, sizeof(struct fib_rule_hdr)), + [RTM_GETRULE] = BUILD_POLICY_NESTED_WITH_SIZE(rtnl_routing_policy_rule, sizeof(struct fib_rule_hdr)), + [RTM_NEWNEXTHOP] = BUILD_POLICY_NESTED_WITH_SIZE(rtnl_nexthop, sizeof(struct nhmsg)), + [RTM_DELNEXTHOP] = BUILD_POLICY_NESTED_WITH_SIZE(rtnl_nexthop, sizeof(struct nhmsg)), + [RTM_GETNEXTHOP] = BUILD_POLICY_NESTED_WITH_SIZE(rtnl_nexthop, sizeof(struct nhmsg)), + [RTM_NEWQDISC] = BUILD_POLICY_NESTED_WITH_SIZE(rtnl_tca, sizeof(struct tcmsg)), + [RTM_DELQDISC] = BUILD_POLICY_NESTED_WITH_SIZE(rtnl_tca, sizeof(struct tcmsg)), + [RTM_GETQDISC] = BUILD_POLICY_NESTED_WITH_SIZE(rtnl_tca, sizeof(struct tcmsg)), + [RTM_NEWTCLASS] = BUILD_POLICY_NESTED_WITH_SIZE(rtnl_tca, sizeof(struct tcmsg)), + [RTM_DELTCLASS] = BUILD_POLICY_NESTED_WITH_SIZE(rtnl_tca, sizeof(struct tcmsg)), + [RTM_GETTCLASS] = BUILD_POLICY_NESTED_WITH_SIZE(rtnl_tca, sizeof(struct tcmsg)), + [RTM_NEWMDB] = BUILD_POLICY_NESTED_WITH_SIZE(rtnl_mdb, sizeof(struct br_port_msg)), + [RTM_DELMDB] = BUILD_POLICY_NESTED_WITH_SIZE(rtnl_mdb, sizeof(struct br_port_msg)), + [RTM_GETMDB] = BUILD_POLICY_NESTED_WITH_SIZE(rtnl_mdb, sizeof(struct br_port_msg)), +}; + +DEFINE_POLICY_SET(rtnl); + +const NLAPolicy *rtnl_get_policy(uint16_t nlmsg_type) { + return policy_set_get_policy(&rtnl_policy_set, nlmsg_type); +} diff --git a/src/libsystemd/sd-netlink/netlink-types.c b/src/libsystemd/sd-netlink/netlink-types.c new file mode 100644 index 0000000..21ef80c --- /dev/null +++ b/src/libsystemd/sd-netlink/netlink-types.c @@ -0,0 +1,153 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <linux/netlink.h> + +#include "netlink-genl.h" +#include "netlink-internal.h" +#include "netlink-types-internal.h" + +static const NLAPolicy empty_policies[1] = { + /* fake array to avoid .types==NULL, which denotes invalid type-systems */ +}; + +DEFINE_POLICY_SET(empty); + +static const NLAPolicy error_policies[] = { + [NLMSGERR_ATTR_MSG] = BUILD_POLICY(STRING), + [NLMSGERR_ATTR_OFFS] = BUILD_POLICY(U32), +}; + +DEFINE_POLICY_SET(error); + +static const NLAPolicy basic_policies[] = { + [NLMSG_DONE] = BUILD_POLICY_NESTED(empty), + [NLMSG_ERROR] = BUILD_POLICY_NESTED_WITH_SIZE(error, sizeof(struct nlmsgerr)), +}; + +DEFINE_POLICY_SET(basic); + +NLAType policy_get_type(const NLAPolicy *policy) { + return ASSERT_PTR(policy)->type; +} + +size_t policy_get_size(const NLAPolicy *policy) { + return ASSERT_PTR(policy)->size; +} + +const NLAPolicySet *policy_get_policy_set(const NLAPolicy *policy) { + assert(policy); + assert(policy->type == NETLINK_TYPE_NESTED); + + return ASSERT_PTR(policy->policy_set); +} + +const NLAPolicySetUnion *policy_get_policy_set_union(const NLAPolicy *policy) { + assert(policy); + assert(IN_SET(policy->type, NETLINK_TYPE_NESTED_UNION_BY_STRING, NETLINK_TYPE_NESTED_UNION_BY_FAMILY)); + + return ASSERT_PTR(policy->policy_set_union); +} + +int netlink_get_policy_set_and_header_size( + sd_netlink *nl, + uint16_t type, + const NLAPolicySet **ret_policy_set, + size_t *ret_header_size) { + + const NLAPolicy *policy; + + assert(nl); + + if (IN_SET(type, NLMSG_DONE, NLMSG_ERROR)) + policy = policy_set_get_policy(&basic_policy_set, type); + else + switch (nl->protocol) { + case NETLINK_ROUTE: + policy = rtnl_get_policy(type); + break; + case NETLINK_NETFILTER: + policy = nfnl_get_policy(type); + break; + case NETLINK_GENERIC: + return genl_get_policy_set_and_header_size(nl, type, ret_policy_set, ret_header_size); + default: + return -EOPNOTSUPP; + } + if (!policy) + return -EOPNOTSUPP; + + if (policy_get_type(policy) != NETLINK_TYPE_NESTED) + return -EOPNOTSUPP; + + if (ret_policy_set) + *ret_policy_set = policy_get_policy_set(policy); + if (ret_header_size) + *ret_header_size = policy_get_size(policy); + return 0; +} + +const NLAPolicy *policy_set_get_policy(const NLAPolicySet *policy_set, uint16_t attr_type) { + const NLAPolicy *policy; + + assert(policy_set); + assert(policy_set->policies); + + if (attr_type >= policy_set->count) + return NULL; + + policy = &policy_set->policies[attr_type]; + + if (policy->type == NETLINK_TYPE_UNSPEC) + return NULL; + + return policy; +} + +const NLAPolicySet *policy_set_get_policy_set(const NLAPolicySet *policy_set, uint16_t attr_type) { + const NLAPolicy *policy; + + policy = policy_set_get_policy(policy_set, attr_type); + if (!policy) + return NULL; + + return policy_get_policy_set(policy); +} + +const NLAPolicySetUnion *policy_set_get_policy_set_union(const NLAPolicySet *policy_set, uint16_t attr_type) { + const NLAPolicy *policy; + + policy = policy_set_get_policy(policy_set, attr_type); + if (!policy) + return NULL; + + return policy_get_policy_set_union(policy); +} + +uint16_t policy_set_union_get_match_attribute(const NLAPolicySetUnion *policy_set_union) { + assert(policy_set_union->match_attribute != 0); + + return policy_set_union->match_attribute; +} + +const NLAPolicySet *policy_set_union_get_policy_set_by_string(const NLAPolicySetUnion *policy_set_union, const char *string) { + assert(policy_set_union); + assert(policy_set_union->elements); + assert(string); + + for (size_t i = 0; i < policy_set_union->count; i++) + if (streq(policy_set_union->elements[i].string, string)) + return &policy_set_union->elements[i].policy_set; + + return NULL; +} + +const NLAPolicySet *policy_set_union_get_policy_set_by_family(const NLAPolicySetUnion *policy_set_union, int family) { + assert(policy_set_union); + assert(policy_set_union->elements); + + for (size_t i = 0; i < policy_set_union->count; i++) + if (policy_set_union->elements[i].family == family) + return &policy_set_union->elements[i].policy_set; + + return NULL; +} diff --git a/src/libsystemd/sd-netlink/netlink-types.h b/src/libsystemd/sd-netlink/netlink-types.h new file mode 100644 index 0000000..e034a98 --- /dev/null +++ b/src/libsystemd/sd-netlink/netlink-types.h @@ -0,0 +1,63 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include <errno.h> + +#include "sd-netlink.h" + +typedef enum NLAType { + NETLINK_TYPE_UNSPEC, /* NLA_UNSPEC */ + NETLINK_TYPE_BINARY, /* NLA_BINARY */ + NETLINK_TYPE_FLAG, /* NLA_FLAG */ + NETLINK_TYPE_U8, /* NLA_U8 */ + NETLINK_TYPE_U16, /* NLA_U16 */ + NETLINK_TYPE_U32, /* NLA_U32 */ + NETLINK_TYPE_U64, /* NLA_U64 */ + NETLINK_TYPE_S8, /* NLA_S8 */ + NETLINK_TYPE_S16, /* NLA_S16 */ + NETLINK_TYPE_S32, /* NLA_S32 */ + NETLINK_TYPE_S64, /* NLA_S64 */ + NETLINK_TYPE_STRING, /* NLA_STRING */ + NETLINK_TYPE_BITFIELD32, /* NLA_BITFIELD32 */ + NETLINK_TYPE_REJECT, /* NLA_REJECT */ + NETLINK_TYPE_IN_ADDR, + NETLINK_TYPE_ETHER_ADDR, + NETLINK_TYPE_CACHE_INFO, + NETLINK_TYPE_SOCKADDR, + NETLINK_TYPE_NESTED, /* NLA_NESTED */ + NETLINK_TYPE_NESTED_UNION_BY_STRING, + NETLINK_TYPE_NESTED_UNION_BY_FAMILY, + _NETLINK_TYPE_MAX, + _NETLINK_TYPE_INVALID = -EINVAL, +} NLAType; + +typedef struct NLAPolicy NLAPolicy; +typedef struct NLAPolicySet NLAPolicySet; +typedef struct NLAPolicySetUnion NLAPolicySetUnion; + +const NLAPolicy *rtnl_get_policy(uint16_t nlmsg_type); +const NLAPolicy *nfnl_get_policy(uint16_t nlmsg_type); +const NLAPolicySet *genl_get_policy_set_by_name(const char *name); +int genl_get_policy_set_and_header_size( + sd_netlink *nl, + uint16_t id, + const NLAPolicySet **ret_policy_set, + size_t *ret_header_size); + +NLAType policy_get_type(const NLAPolicy *policy); +size_t policy_get_size(const NLAPolicy *policy); +const NLAPolicySet *policy_get_policy_set(const NLAPolicy *policy); +const NLAPolicySetUnion *policy_get_policy_set_union(const NLAPolicy *policy); + +int netlink_get_policy_set_and_header_size( + sd_netlink *nl, + uint16_t type, + const NLAPolicySet **ret_policy_set, + size_t *ret_header_size); + +const NLAPolicy *policy_set_get_policy(const NLAPolicySet *policy_set, uint16_t attr_type); +const NLAPolicySet *policy_set_get_policy_set(const NLAPolicySet *type_system, uint16_t attr_type); +const NLAPolicySetUnion *policy_set_get_policy_set_union(const NLAPolicySet *type_system, uint16_t attr_type); +uint16_t policy_set_union_get_match_attribute(const NLAPolicySetUnion *policy_set_union); +const NLAPolicySet *policy_set_union_get_policy_set_by_string(const NLAPolicySetUnion *type_system_union, const char *string); +const NLAPolicySet *policy_set_union_get_policy_set_by_family(const NLAPolicySetUnion *type_system_union, int family); diff --git a/src/libsystemd/sd-netlink/netlink-util.c b/src/libsystemd/sd-netlink/netlink-util.c new file mode 100644 index 0000000..832159a --- /dev/null +++ b/src/libsystemd/sd-netlink/netlink-util.c @@ -0,0 +1,818 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include "sd-netlink.h" + +#include "fd-util.h" +#include "iovec-util.h" +#include "memory-util.h" +#include "netlink-internal.h" +#include "netlink-util.h" +#include "parse-util.h" +#include "process-util.h" +#include "strv.h" + +static int set_link_name(sd_netlink **rtnl, int ifindex, const char *name) { + _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *message = NULL; + int r; + + assert(rtnl); + assert(ifindex > 0); + assert(name); + + /* Assign the requested name. */ + r = sd_rtnl_message_new_link(*rtnl, &message, RTM_SETLINK, ifindex); + if (r < 0) + return r; + + r = sd_netlink_message_append_string(message, IFLA_IFNAME, name); + if (r < 0) + return r; + + return sd_netlink_call(*rtnl, message, 0, NULL); +} + +int rtnl_set_link_name(sd_netlink **rtnl, int ifindex, const char *name, char* const *alternative_names) { + _cleanup_strv_free_ char **original_altnames = NULL, **new_altnames = NULL; + bool altname_deleted = false; + int r; + + assert(rtnl); + assert(ifindex > 0); + + if (isempty(name) && strv_isempty(alternative_names)) + return 0; + + if (name && !ifname_valid(name)) + return -EINVAL; + + /* If the requested name is already assigned as an alternative name, then first drop it. */ + r = rtnl_get_link_alternative_names(rtnl, ifindex, &original_altnames); + if (r < 0) + log_debug_errno(r, "Failed to get alternative names on network interface %i, ignoring: %m", + ifindex); + + if (name) { + if (strv_contains(original_altnames, name)) { + r = rtnl_delete_link_alternative_names(rtnl, ifindex, STRV_MAKE(name)); + if (r < 0) + return log_debug_errno(r, "Failed to remove '%s' from alternative names on network interface %i: %m", + name, ifindex); + + altname_deleted = true; + } + + r = set_link_name(rtnl, ifindex, name); + if (r < 0) + goto fail; + } + + /* Filter out already assigned names from requested alternative names. Also, dedup the request. */ + STRV_FOREACH(a, alternative_names) { + if (streq_ptr(name, *a)) + continue; + + if (strv_contains(original_altnames, *a)) + continue; + + if (strv_contains(new_altnames, *a)) + continue; + + if (!ifname_valid_full(*a, IFNAME_VALID_ALTERNATIVE)) + continue; + + r = strv_extend(&new_altnames, *a); + if (r < 0) + return r; + } + + strv_sort(new_altnames); + + /* Finally, assign alternative names. */ + r = rtnl_set_link_alternative_names(rtnl, ifindex, new_altnames); + if (r == -EEXIST) /* Already assigned to another interface? */ + STRV_FOREACH(a, new_altnames) { + r = rtnl_set_link_alternative_names(rtnl, ifindex, STRV_MAKE(*a)); + if (r < 0) + log_debug_errno(r, "Failed to assign '%s' as an alternative name on network interface %i, ignoring: %m", + *a, ifindex); + } + else if (r < 0) + log_debug_errno(r, "Failed to assign alternative names on network interface %i, ignoring: %m", ifindex); + + return 0; + +fail: + if (altname_deleted) { + int q = rtnl_set_link_alternative_names(rtnl, ifindex, STRV_MAKE(name)); + if (q < 0) + log_debug_errno(q, "Failed to restore '%s' as an alternative name on network interface %i, ignoring: %m", + name, ifindex); + } + + return r; +} + +int rtnl_set_link_properties( + sd_netlink **rtnl, + int ifindex, + const char *alias, + const struct hw_addr_data *hw_addr, + uint32_t txqueues, + uint32_t rxqueues, + uint32_t txqueuelen, + uint32_t mtu, + uint32_t gso_max_size, + size_t gso_max_segments) { + _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *message = NULL; + int r; + + assert(rtnl); + assert(ifindex > 0); + + if (!alias && + (!hw_addr || hw_addr->length == 0) && + txqueues == 0 && + rxqueues == 0 && + txqueuelen == UINT32_MAX && + mtu == 0 && + gso_max_size == 0 && + gso_max_segments == 0) + return 0; + + if (!*rtnl) { + r = sd_netlink_open(rtnl); + if (r < 0) + return r; + } + + r = sd_rtnl_message_new_link(*rtnl, &message, RTM_SETLINK, ifindex); + if (r < 0) + return r; + + if (alias) { + r = sd_netlink_message_append_string(message, IFLA_IFALIAS, alias); + if (r < 0) + return r; + } + + if (hw_addr && hw_addr->length > 0) { + r = netlink_message_append_hw_addr(message, IFLA_ADDRESS, hw_addr); + if (r < 0) + return r; + } + + if (txqueues > 0) { + r = sd_netlink_message_append_u32(message, IFLA_NUM_TX_QUEUES, txqueues); + if (r < 0) + return r; + } + + if (rxqueues > 0) { + r = sd_netlink_message_append_u32(message, IFLA_NUM_RX_QUEUES, rxqueues); + if (r < 0) + return r; + } + + if (txqueuelen < UINT32_MAX) { + r = sd_netlink_message_append_u32(message, IFLA_TXQLEN, txqueuelen); + if (r < 0) + return r; + } + + if (mtu != 0) { + r = sd_netlink_message_append_u32(message, IFLA_MTU, mtu); + if (r < 0) + return r; + } + + if (gso_max_size > 0) { + r = sd_netlink_message_append_u32(message, IFLA_GSO_MAX_SIZE, gso_max_size); + if (r < 0) + return r; + } + + if (gso_max_segments > 0) { + r = sd_netlink_message_append_u32(message, IFLA_GSO_MAX_SEGS, gso_max_segments); + if (r < 0) + return r; + } + + r = sd_netlink_call(*rtnl, message, 0, NULL); + if (r < 0) + return r; + + return 0; +} + +int rtnl_get_link_alternative_names(sd_netlink **rtnl, int ifindex, char ***ret) { + _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *message = NULL, *reply = NULL; + _cleanup_strv_free_ char **names = NULL; + int r; + + assert(rtnl); + assert(ifindex > 0); + assert(ret); + + if (!*rtnl) { + r = sd_netlink_open(rtnl); + if (r < 0) + return r; + } + + r = sd_rtnl_message_new_link(*rtnl, &message, RTM_GETLINK, ifindex); + if (r < 0) + return r; + + r = sd_netlink_call(*rtnl, message, 0, &reply); + if (r < 0) + return r; + + r = sd_netlink_message_read_strv(reply, IFLA_PROP_LIST, IFLA_ALT_IFNAME, &names); + if (r < 0 && r != -ENODATA) + return r; + + *ret = TAKE_PTR(names); + + return 0; +} + +static int rtnl_update_link_alternative_names( + sd_netlink **rtnl, + uint16_t nlmsg_type, + int ifindex, + char* const *alternative_names) { + + _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *message = NULL; + int r; + + assert(rtnl); + assert(ifindex > 0); + assert(IN_SET(nlmsg_type, RTM_NEWLINKPROP, RTM_DELLINKPROP)); + + if (strv_isempty(alternative_names)) + return 0; + + if (!*rtnl) { + r = sd_netlink_open(rtnl); + if (r < 0) + return r; + } + + r = sd_rtnl_message_new_link(*rtnl, &message, nlmsg_type, ifindex); + if (r < 0) + return r; + + r = sd_netlink_message_open_container(message, IFLA_PROP_LIST); + if (r < 0) + return r; + + r = sd_netlink_message_append_strv(message, IFLA_ALT_IFNAME, (const char**) alternative_names); + if (r < 0) + return r; + + r = sd_netlink_message_close_container(message); + if (r < 0) + return r; + + r = sd_netlink_call(*rtnl, message, 0, NULL); + if (r < 0) + return r; + + return 0; +} + +int rtnl_set_link_alternative_names(sd_netlink **rtnl, int ifindex, char* const *alternative_names) { + return rtnl_update_link_alternative_names(rtnl, RTM_NEWLINKPROP, ifindex, alternative_names); +} + +int rtnl_delete_link_alternative_names(sd_netlink **rtnl, int ifindex, char* const *alternative_names) { + return rtnl_update_link_alternative_names(rtnl, RTM_DELLINKPROP, ifindex, alternative_names); +} + +int rtnl_set_link_alternative_names_by_ifname( + sd_netlink **rtnl, + const char *ifname, + char* const *alternative_names) { + + _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *message = NULL; + int r; + + assert(rtnl); + assert(ifname); + + if (strv_isempty(alternative_names)) + return 0; + + if (!*rtnl) { + r = sd_netlink_open(rtnl); + if (r < 0) + return r; + } + + r = sd_rtnl_message_new_link(*rtnl, &message, RTM_NEWLINKPROP, 0); + if (r < 0) + return r; + + r = sd_netlink_message_append_string(message, IFLA_IFNAME, ifname); + if (r < 0) + return r; + + r = sd_netlink_message_open_container(message, IFLA_PROP_LIST); + if (r < 0) + return r; + + r = sd_netlink_message_append_strv(message, IFLA_ALT_IFNAME, (const char**) alternative_names); + if (r < 0) + return r; + + r = sd_netlink_message_close_container(message); + if (r < 0) + return r; + + r = sd_netlink_call(*rtnl, message, 0, NULL); + if (r < 0) + return r; + + return 0; +} + +int rtnl_resolve_link_alternative_name(sd_netlink **rtnl, const char *name, char **ret) { + _cleanup_(sd_netlink_unrefp) sd_netlink *our_rtnl = NULL; + _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *message = NULL, *reply = NULL; + int r, ifindex; + + assert(name); + + /* This returns ifindex and the main interface name. */ + + if (!ifname_valid_full(name, IFNAME_VALID_ALTERNATIVE)) + return -EINVAL; + + if (!rtnl) + rtnl = &our_rtnl; + if (!*rtnl) { + r = sd_netlink_open(rtnl); + if (r < 0) + return r; + } + + r = sd_rtnl_message_new_link(*rtnl, &message, RTM_GETLINK, 0); + if (r < 0) + return r; + + r = sd_netlink_message_append_string(message, IFLA_ALT_IFNAME, name); + if (r < 0) + return r; + + r = sd_netlink_call(*rtnl, message, 0, &reply); + if (r == -EINVAL) + return -ENODEV; /* The device doesn't exist */ + if (r < 0) + return r; + + r = sd_rtnl_message_link_get_ifindex(reply, &ifindex); + if (r < 0) + return r; + assert(ifindex > 0); + + if (ret) { + r = sd_netlink_message_read_string_strdup(reply, IFLA_IFNAME, ret); + if (r < 0) + return r; + } + + return ifindex; +} + +int rtnl_resolve_ifname(sd_netlink **rtnl, const char *name) { + int r; + + /* Like if_nametoindex, but resolves "alternative names" too. */ + + assert(name); + + r = if_nametoindex(name); + if (r > 0) + return r; + + return rtnl_resolve_link_alternative_name(rtnl, name, NULL); +} + +int rtnl_resolve_interface(sd_netlink **rtnl, const char *name) { + int r; + + /* Like rtnl_resolve_ifname, but resolves interface numbers too. */ + + assert(name); + + r = parse_ifindex(name); + if (r > 0) + return r; + assert(r < 0); + + return rtnl_resolve_ifname(rtnl, name); +} + +int rtnl_resolve_interface_or_warn(sd_netlink **rtnl, const char *name) { + int r; + + r = rtnl_resolve_interface(rtnl, name); + if (r < 0) + return log_error_errno(r, "Failed to resolve interface \"%s\": %m", name); + return r; +} + +int rtnl_get_link_info( + sd_netlink **rtnl, + int ifindex, + unsigned short *ret_iftype, + unsigned *ret_flags, + char **ret_kind, + struct hw_addr_data *ret_hw_addr, + struct hw_addr_data *ret_permanent_hw_addr) { + + _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *message = NULL, *reply = NULL; + struct hw_addr_data addr = HW_ADDR_NULL, perm_addr = HW_ADDR_NULL; + _cleanup_free_ char *kind = NULL; + unsigned short iftype; + unsigned flags; + int r; + + assert(rtnl); + assert(ifindex > 0); + + if (!ret_iftype && !ret_flags) + return 0; + + if (!*rtnl) { + r = sd_netlink_open(rtnl); + if (r < 0) + return r; + } + + r = sd_rtnl_message_new_link(*rtnl, &message, RTM_GETLINK, ifindex); + if (r < 0) + return r; + + r = sd_netlink_call(*rtnl, message, 0, &reply); + if (r == -EINVAL) + return -ENODEV; /* The device does not exist */ + if (r < 0) + return r; + + if (ret_iftype) { + r = sd_rtnl_message_link_get_type(reply, &iftype); + if (r < 0) + return r; + } + + if (ret_flags) { + r = sd_rtnl_message_link_get_flags(reply, &flags); + if (r < 0) + return r; + } + + if (ret_kind) { + r = sd_netlink_message_enter_container(reply, IFLA_LINKINFO); + if (r >= 0) { + r = sd_netlink_message_read_string_strdup(reply, IFLA_INFO_KIND, &kind); + if (r < 0 && r != -ENODATA) + return r; + + r = sd_netlink_message_exit_container(reply); + if (r < 0) + return r; + } + } + + if (ret_hw_addr) { + r = netlink_message_read_hw_addr(reply, IFLA_ADDRESS, &addr); + if (r < 0 && r != -ENODATA) + return r; + } + + if (ret_permanent_hw_addr) { + r = netlink_message_read_hw_addr(reply, IFLA_PERM_ADDRESS, &perm_addr); + if (r < 0 && r != -ENODATA) + return r; + } + + if (ret_iftype) + *ret_iftype = iftype; + if (ret_flags) + *ret_flags = flags; + if (ret_kind) + *ret_kind = TAKE_PTR(kind); + if (ret_hw_addr) + *ret_hw_addr = addr; + if (ret_permanent_hw_addr) + *ret_permanent_hw_addr = perm_addr; + return 0; +} + +int rtnl_log_parse_error(int r) { + return log_error_errno(r, "Failed to parse netlink message: %m"); +} + +int rtnl_log_create_error(int r) { + return log_error_errno(r, "Failed to create netlink message: %m"); +} + +void rtattr_append_attribute_internal(struct rtattr *rta, unsigned short type, const void *data, size_t data_length) { + size_t padding_length; + uint8_t *padding; + + assert(rta); + assert(!data || data_length > 0); + + /* fill in the attribute */ + rta->rta_type = type; + rta->rta_len = RTA_LENGTH(data_length); + if (data) + /* we don't deal with the case where the user lies about the type + * and gives us too little data (so don't do that) + */ + padding = mempcpy(RTA_DATA(rta), data, data_length); + + else + /* if no data was passed, make sure we still initialize the padding + note that we can have data_length > 0 (used by some containers) */ + padding = RTA_DATA(rta); + + /* make sure also the padding at the end of the message is initialized */ + padding_length = (uint8_t *) rta + RTA_SPACE(data_length) - padding; + memzero(padding, padding_length); +} + +int rtattr_append_attribute(struct rtattr **rta, unsigned short type, const void *data, size_t data_length) { + struct rtattr *new_rta, *sub_rta; + size_t message_length; + + assert(rta); + assert(!data || data_length > 0); + + /* get the new message size (with padding at the end) */ + message_length = RTA_ALIGN(rta ? (*rta)->rta_len : 0) + RTA_SPACE(data_length); + + /* buffer should be smaller than both one page or 8K to be accepted by the kernel */ + if (message_length > MIN(page_size(), 8192UL)) + return -ENOBUFS; + + /* realloc to fit the new attribute */ + new_rta = realloc(*rta, message_length); + if (!new_rta) + return -ENOMEM; + *rta = new_rta; + + /* get pointer to the attribute we are about to add */ + sub_rta = (struct rtattr *) ((uint8_t *) *rta + RTA_ALIGN((*rta)->rta_len)); + + rtattr_append_attribute_internal(sub_rta, type, data, data_length); + + /* update rta_len */ + (*rta)->rta_len = message_length; + + return 0; +} + +MultipathRoute *multipath_route_free(MultipathRoute *m) { + if (!m) + return NULL; + + free(m->ifname); + + return mfree(m); +} + +int multipath_route_dup(const MultipathRoute *m, MultipathRoute **ret) { + _cleanup_(multipath_route_freep) MultipathRoute *n = NULL; + _cleanup_free_ char *ifname = NULL; + + assert(m); + assert(ret); + + if (m->ifname) { + ifname = strdup(m->ifname); + if (!ifname) + return -ENOMEM; + } + + n = new(MultipathRoute, 1); + if (!n) + return -ENOMEM; + + *n = (MultipathRoute) { + .gateway = m->gateway, + .weight = m->weight, + .ifindex = m->ifindex, + .ifname = TAKE_PTR(ifname), + }; + + *ret = TAKE_PTR(n); + + return 0; +} + +int rtattr_read_nexthop(const struct rtnexthop *rtnh, size_t size, int family, OrderedSet **ret) { + _cleanup_ordered_set_free_free_ OrderedSet *set = NULL; + int r; + + assert(rtnh); + assert(IN_SET(family, AF_INET, AF_INET6)); + + if (size < sizeof(struct rtnexthop)) + return -EBADMSG; + + for (; size >= sizeof(struct rtnexthop); ) { + _cleanup_(multipath_route_freep) MultipathRoute *m = NULL; + + if (NLMSG_ALIGN(rtnh->rtnh_len) > size) + return -EBADMSG; + + if (rtnh->rtnh_len < sizeof(struct rtnexthop)) + return -EBADMSG; + + m = new(MultipathRoute, 1); + if (!m) + return -ENOMEM; + + *m = (MultipathRoute) { + .ifindex = rtnh->rtnh_ifindex, + .weight = rtnh->rtnh_hops, + }; + + if (rtnh->rtnh_len > sizeof(struct rtnexthop)) { + size_t len = rtnh->rtnh_len - sizeof(struct rtnexthop); + + for (struct rtattr *attr = RTNH_DATA(rtnh); RTA_OK(attr, len); attr = RTA_NEXT(attr, len)) { + if (attr->rta_type == RTA_GATEWAY) { + if (attr->rta_len != RTA_LENGTH(FAMILY_ADDRESS_SIZE(family))) + return -EBADMSG; + + m->gateway.family = family; + memcpy(&m->gateway.address, RTA_DATA(attr), FAMILY_ADDRESS_SIZE(family)); + break; + } else if (attr->rta_type == RTA_VIA) { + uint16_t gw_family; + + if (family != AF_INET) + return -EINVAL; + + if (attr->rta_len < RTA_LENGTH(sizeof(uint16_t))) + return -EBADMSG; + + gw_family = *(uint16_t *) RTA_DATA(attr); + + if (gw_family != AF_INET6) + return -EBADMSG; + + if (attr->rta_len != RTA_LENGTH(FAMILY_ADDRESS_SIZE(gw_family) + sizeof(gw_family))) + return -EBADMSG; + + memcpy(&m->gateway, RTA_DATA(attr), FAMILY_ADDRESS_SIZE(gw_family) + sizeof(gw_family)); + break; + } + } + } + + r = ordered_set_ensure_put(&set, NULL, m); + if (r < 0) + return r; + + TAKE_PTR(m); + + size -= NLMSG_ALIGN(rtnh->rtnh_len); + rtnh = RTNH_NEXT(rtnh); + } + + if (ret) + *ret = TAKE_PTR(set); + return 0; +} + +bool netlink_pid_changed(sd_netlink *nl) { + /* We don't support people creating an nl connection and + * keeping it around over a fork(). Let's complain. */ + return ASSERT_PTR(nl)->original_pid != getpid_cached(); +} + +static int socket_open(int family) { + int fd; + + fd = socket(AF_NETLINK, SOCK_RAW|SOCK_CLOEXEC|SOCK_NONBLOCK, family); + if (fd < 0) + return -errno; + + return fd_move_above_stdio(fd); +} + +int netlink_open_family(sd_netlink **ret, int family) { + _cleanup_close_ int fd = -EBADF; + int r; + + fd = socket_open(family); + if (fd < 0) + return fd; + + r = sd_netlink_open_fd(ret, fd); + if (r < 0) + return r; + TAKE_FD(fd); + + return 0; +} + +static bool serial_used(sd_netlink *nl, uint32_t serial) { + assert(nl); + + return + hashmap_contains(nl->reply_callbacks, UINT32_TO_PTR(serial)) || + hashmap_contains(nl->rqueue_by_serial, UINT32_TO_PTR(serial)) || + hashmap_contains(nl->rqueue_partial_by_serial, UINT32_TO_PTR(serial)); +} + +void netlink_seal_message(sd_netlink *nl, sd_netlink_message *m) { + uint32_t picked; + + assert(nl); + assert(!netlink_pid_changed(nl)); + assert(m); + assert(m->hdr); + + /* Avoid collisions with outstanding requests */ + do { + picked = nl->serial; + + /* Don't use seq == 0, as that is used for broadcasts, so we would get confused by replies to + such messages */ + nl->serial = nl->serial == UINT32_MAX ? 1 : nl->serial + 1; + + } while (serial_used(nl, picked)); + + m->hdr->nlmsg_seq = picked; + message_seal(m); +} + +static int socket_writev_message(sd_netlink *nl, sd_netlink_message **m, size_t msgcount) { + _cleanup_free_ struct iovec *iovs = NULL; + ssize_t k; + + assert(nl); + assert(m); + assert(msgcount > 0); + + iovs = new(struct iovec, msgcount); + if (!iovs) + return -ENOMEM; + + for (size_t i = 0; i < msgcount; i++) { + assert(m[i]->hdr); + assert(m[i]->hdr->nlmsg_len > 0); + + iovs[i] = IOVEC_MAKE(m[i]->hdr, m[i]->hdr->nlmsg_len); + } + + k = writev(nl->fd, iovs, msgcount); + if (k < 0) + return -errno; + + return k; +} + +int sd_netlink_sendv( + sd_netlink *nl, + sd_netlink_message **messages, + size_t msgcount, + uint32_t **ret_serial) { + + _cleanup_free_ uint32_t *serials = NULL; + int r; + + assert_return(nl, -EINVAL); + assert_return(!netlink_pid_changed(nl), -ECHILD); + assert_return(messages, -EINVAL); + assert_return(msgcount > 0, -EINVAL); + + if (ret_serial) { + serials = new(uint32_t, msgcount); + if (!serials) + return -ENOMEM; + } + + for (size_t i = 0; i < msgcount; i++) { + assert_return(!messages[i]->sealed, -EPERM); + + netlink_seal_message(nl, messages[i]); + if (serials) + serials[i] = message_get_serial(messages[i]); + } + + r = socket_writev_message(nl, messages, msgcount); + if (r < 0) + return r; + + if (ret_serial) + *ret_serial = TAKE_PTR(serials); + + return r; +} diff --git a/src/libsystemd/sd-netlink/netlink-util.h b/src/libsystemd/sd-netlink/netlink-util.h new file mode 100644 index 0000000..369f5d5 --- /dev/null +++ b/src/libsystemd/sd-netlink/netlink-util.h @@ -0,0 +1,113 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include <linux/rtnetlink.h> + +#include "sd-netlink.h" + +#include "ether-addr-util.h" +#include "in-addr-util.h" +#include "ordered-set.h" +#include "socket-util.h" + +/* See struct rtvia in rtnetlink.h */ +typedef struct RouteVia { + uint16_t family; + union in_addr_union address; +} _packed_ RouteVia; + +typedef struct MultipathRoute { + RouteVia gateway; + uint32_t weight; + int ifindex; + char *ifname; +} MultipathRoute; + +MultipathRoute *multipath_route_free(MultipathRoute *m); +DEFINE_TRIVIAL_CLEANUP_FUNC(MultipathRoute*, multipath_route_free); + +int multipath_route_dup(const MultipathRoute *m, MultipathRoute **ret); + +int rtnl_set_link_name(sd_netlink **rtnl, int ifindex, const char *name, char* const* alternative_names); +static inline int rtnl_append_link_alternative_names(sd_netlink **rtnl, int ifindex, char* const *alternative_names) { + return rtnl_set_link_name(rtnl, ifindex, NULL, alternative_names); +} +int rtnl_set_link_properties( + sd_netlink **rtnl, + int ifindex, + const char *alias, + const struct hw_addr_data *hw_addr, + uint32_t txqueues, + uint32_t rxqueues, + uint32_t txqueuelen, + uint32_t mtu, + uint32_t gso_max_size, + size_t gso_max_segments); +int rtnl_get_link_alternative_names(sd_netlink **rtnl, int ifindex, char ***ret); +int rtnl_set_link_alternative_names(sd_netlink **rtnl, int ifindex, char* const *alternative_names); +int rtnl_set_link_alternative_names_by_ifname(sd_netlink **rtnl, const char *ifname, char* const *alternative_names); +int rtnl_delete_link_alternative_names(sd_netlink **rtnl, int ifindex, char* const *alternative_names); +int rtnl_resolve_link_alternative_name(sd_netlink **rtnl, const char *name, char **ret); +int rtnl_resolve_ifname(sd_netlink **rtnl, const char *name); +int rtnl_resolve_interface(sd_netlink **rtnl, const char *name); +int rtnl_resolve_interface_or_warn(sd_netlink **rtnl, const char *name); +int rtnl_get_link_info( + sd_netlink **rtnl, + int ifindex, + unsigned short *ret_iftype, + unsigned *ret_flags, + char **ret_kind, + struct hw_addr_data *ret_hw_addr, + struct hw_addr_data *ret_permanent_hw_addr); + +int rtnl_log_parse_error(int r); +int rtnl_log_create_error(int r); + +#define netlink_call_async(nl, ret_slot, message, callback, destroy_callback, userdata) \ + ({ \ + int (*_callback_)(sd_netlink *, sd_netlink_message *, typeof(userdata)) = callback; \ + void (*_destroy_)(typeof(userdata)) = destroy_callback; \ + sd_netlink_call_async(nl, ret_slot, message, \ + (sd_netlink_message_handler_t) _callback_, \ + (sd_netlink_destroy_t) _destroy_, \ + userdata, 0, __func__); \ + }) + +#define netlink_add_match(nl, ret_slot, match, callback, destroy_callback, userdata, description) \ + ({ \ + int (*_callback_)(sd_netlink *, sd_netlink_message *, typeof(userdata)) = callback; \ + void (*_destroy_)(typeof(userdata)) = destroy_callback; \ + sd_netlink_add_match(nl, ret_slot, match, \ + (sd_netlink_message_handler_t) _callback_, \ + (sd_netlink_destroy_t) _destroy_, \ + userdata, description); \ + }) + +#define genl_add_match(nl, ret_slot, family, group, cmd, callback, destroy_callback, userdata, description) \ + ({ \ + int (*_callback_)(sd_netlink *, sd_netlink_message *, typeof(userdata)) = callback; \ + void (*_destroy_)(typeof(userdata)) = destroy_callback; \ + sd_genl_add_match(nl, ret_slot, family, group, cmd, \ + (sd_netlink_message_handler_t) _callback_, \ + (sd_netlink_destroy_t) _destroy_, \ + userdata, description); \ + }) + +int netlink_message_append_hw_addr(sd_netlink_message *m, unsigned short type, const struct hw_addr_data *data); +int netlink_message_append_in_addr_union(sd_netlink_message *m, unsigned short type, int family, const union in_addr_union *data); +int netlink_message_append_sockaddr_union(sd_netlink_message *m, unsigned short type, const union sockaddr_union *data); + +int netlink_message_read_hw_addr(sd_netlink_message *m, unsigned short type, struct hw_addr_data *data); +int netlink_message_read_in_addr_union(sd_netlink_message *m, unsigned short type, int family, union in_addr_union *data); + +void rtattr_append_attribute_internal(struct rtattr *rta, unsigned short type, const void *data, size_t data_length); +int rtattr_append_attribute(struct rtattr **rta, unsigned short type, const void *data, size_t data_length); + +int rtattr_read_nexthop(const struct rtnexthop *rtnh, size_t size, int family, OrderedSet **ret); + +void netlink_seal_message(sd_netlink *nl, sd_netlink_message *m); + +size_t netlink_get_reply_callback_count(sd_netlink *nl); + +/* TODO: to be exported later */ +int sd_netlink_sendv(sd_netlink *nl, sd_netlink_message **messages, size_t msgcnt, uint32_t **ret_serial); diff --git a/src/libsystemd/sd-netlink/sd-netlink.c b/src/libsystemd/sd-netlink/sd-netlink.c new file mode 100644 index 0000000..b6730b7 --- /dev/null +++ b/src/libsystemd/sd-netlink/sd-netlink.c @@ -0,0 +1,909 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <poll.h> + +#include "sd-netlink.h" + +#include "alloc-util.h" +#include "fd-util.h" +#include "hashmap.h" +#include "io-util.h" +#include "macro.h" +#include "netlink-genl.h" +#include "netlink-internal.h" +#include "netlink-slot.h" +#include "netlink-util.h" +#include "process-util.h" +#include "socket-util.h" +#include "string-util.h" + +/* Some really high limit, to catch programming errors */ +#define REPLY_CALLBACKS_MAX UINT16_MAX + +static int netlink_new(sd_netlink **ret) { + _cleanup_(sd_netlink_unrefp) sd_netlink *nl = NULL; + + assert_return(ret, -EINVAL); + + nl = new(sd_netlink, 1); + if (!nl) + return -ENOMEM; + + *nl = (sd_netlink) { + .n_ref = 1, + .fd = -EBADF, + .sockaddr.nl.nl_family = AF_NETLINK, + .original_pid = getpid_cached(), + .protocol = -1, + + /* Kernel change notification messages have sequence number 0. We want to avoid that with our + * own serials, in order not to get confused when matching up kernel replies to our earlier + * requests. + * + * Moreover, when using netlink socket activation (i.e. where PID 1 binds an AF_NETLINK + * socket for us and passes it to us across execve()) and we get restarted multiple times + * while the socket sticks around we might get confused by replies from earlier runs coming + * in late — which is pretty likely if we'd start our sequence numbers always from 1. Hence, + * let's start with a value based on the system clock. This should make collisions much less + * likely (though still theoretically possible). We use a 32 bit μs counter starting at boot + * for this (and explicitly exclude the zero, see above). This counter will wrap around after + * a bit more than 1h, but that's hopefully OK as the kernel shouldn't take that long to + * reply to our requests. + * + * We only pick the initial start value this way. For each message we simply increase the + * sequence number by 1. This means we could enqueue 1 netlink message per μs without risking + * collisions, which should be OK. + * + * Note this means the serials will be in the range 1…UINT32_MAX here. + * + * (In an ideal world we'd attach the current serial counter to the netlink socket itself + * somehow, to avoid all this, but I couldn't come up with a nice way to do this) */ + .serial = (uint32_t) (now(CLOCK_MONOTONIC) % UINT32_MAX) + 1, + }; + + *ret = TAKE_PTR(nl); + return 0; +} + +int sd_netlink_open_fd(sd_netlink **ret, int fd) { + _cleanup_(sd_netlink_unrefp) sd_netlink *nl = NULL; + int r, protocol; + + assert_return(ret, -EINVAL); + assert_return(fd >= 0, -EBADF); + + r = netlink_new(&nl); + if (r < 0) + return r; + + r = getsockopt_int(fd, SOL_SOCKET, SO_PROTOCOL, &protocol); + if (r < 0) + return r; + + nl->fd = fd; + nl->protocol = protocol; + + r = setsockopt_int(fd, SOL_NETLINK, NETLINK_EXT_ACK, true); + if (r < 0) + log_debug_errno(r, "sd-netlink: Failed to enable NETLINK_EXT_ACK option, ignoring: %m"); + + r = setsockopt_int(fd, SOL_NETLINK, NETLINK_GET_STRICT_CHK, true); + if (r < 0) + log_debug_errno(r, "sd-netlink: Failed to enable NETLINK_GET_STRICT_CHK option, ignoring: %m"); + + r = socket_bind(nl); + if (r < 0) { + nl->fd = -EBADF; /* on failure, the caller remains owner of the fd, hence don't close it here */ + nl->protocol = -1; + return r; + } + + *ret = TAKE_PTR(nl); + + return 0; +} + +int sd_netlink_open(sd_netlink **ret) { + return netlink_open_family(ret, NETLINK_ROUTE); +} + +int sd_netlink_increase_rxbuf(sd_netlink *nl, size_t size) { + assert_return(nl, -EINVAL); + assert_return(!netlink_pid_changed(nl), -ECHILD); + + return fd_increase_rxbuf(nl->fd, size); +} + +static sd_netlink *netlink_free(sd_netlink *nl) { + sd_netlink_slot *s; + + assert(nl); + + ordered_set_free(nl->rqueue); + hashmap_free(nl->rqueue_by_serial); + hashmap_free(nl->rqueue_partial_by_serial); + free(nl->rbuffer); + + while ((s = nl->slots)) { + assert(s->floating); + netlink_slot_disconnect(s, true); + } + hashmap_free(nl->reply_callbacks); + prioq_free(nl->reply_callbacks_prioq); + + sd_event_source_unref(nl->io_event_source); + sd_event_source_unref(nl->time_event_source); + sd_event_unref(nl->event); + + hashmap_free(nl->broadcast_group_refs); + + genl_clear_family(nl); + + safe_close(nl->fd); + return mfree(nl); +} + +DEFINE_TRIVIAL_REF_UNREF_FUNC(sd_netlink, sd_netlink, netlink_free); + +int sd_netlink_send( + sd_netlink *nl, + sd_netlink_message *message, + uint32_t *serial) { + + int r; + + assert_return(nl, -EINVAL); + assert_return(!netlink_pid_changed(nl), -ECHILD); + assert_return(message, -EINVAL); + assert_return(!message->sealed, -EPERM); + + netlink_seal_message(nl, message); + + r = socket_write_message(nl, message); + if (r < 0) + return r; + + if (serial) + *serial = message_get_serial(message); + + return 1; +} + +static int dispatch_rqueue(sd_netlink *nl, sd_netlink_message **ret) { + sd_netlink_message *m; + int r; + + assert(nl); + assert(ret); + + if (ordered_set_size(nl->rqueue) <= 0) { + /* Try to read a new message */ + r = socket_read_message(nl); + if (r == -ENOBUFS) /* FIXME: ignore buffer overruns for now */ + log_debug_errno(r, "sd-netlink: Got ENOBUFS from netlink socket, ignoring."); + else if (r < 0) + return r; + } + + /* Dispatch a queued message */ + m = ordered_set_steal_first(nl->rqueue); + if (m) + sd_netlink_message_unref(hashmap_remove_value(nl->rqueue_by_serial, UINT32_TO_PTR(message_get_serial(m)), m)); + *ret = m; + return !!m; +} + +static int process_timeout(sd_netlink *nl) { + _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL; + struct reply_callback *c; + sd_netlink_slot *slot; + usec_t n; + int r; + + assert(nl); + + c = prioq_peek(nl->reply_callbacks_prioq); + if (!c) + return 0; + + n = now(CLOCK_MONOTONIC); + if (c->timeout > n) + return 0; + + r = message_new_synthetic_error(nl, -ETIMEDOUT, c->serial, &m); + if (r < 0) + return r; + + assert_se(prioq_pop(nl->reply_callbacks_prioq) == c); + hashmap_remove(nl->reply_callbacks, UINT32_TO_PTR(c->serial)); + + slot = container_of(c, sd_netlink_slot, reply_callback); + + r = c->callback(nl, m, slot->userdata); + if (r < 0) + log_debug_errno(r, "sd-netlink: timedout callback %s%s%sfailed: %m", + slot->description ? "'" : "", + strempty(slot->description), + slot->description ? "' " : ""); + + if (slot->floating) + netlink_slot_disconnect(slot, true); + + return 1; +} + +static int process_reply(sd_netlink *nl, sd_netlink_message *m) { + struct reply_callback *c; + sd_netlink_slot *slot; + uint32_t serial; + uint16_t type; + int r; + + assert(nl); + assert(m); + + serial = message_get_serial(m); + c = hashmap_remove(nl->reply_callbacks, UINT32_TO_PTR(serial)); + if (!c) + return 0; + + if (c->timeout != USEC_INFINITY) + prioq_remove(nl->reply_callbacks_prioq, c, &c->prioq_idx); + + r = sd_netlink_message_get_type(m, &type); + if (r < 0) + return r; + + if (type == NLMSG_DONE) + m = NULL; + + slot = container_of(c, sd_netlink_slot, reply_callback); + + r = c->callback(nl, m, slot->userdata); + if (r < 0) + log_debug_errno(r, "sd-netlink: reply callback %s%s%sfailed: %m", + slot->description ? "'" : "", + strempty(slot->description), + slot->description ? "' " : ""); + + if (slot->floating) + netlink_slot_disconnect(slot, true); + + return 1; +} + +static int process_match(sd_netlink *nl, sd_netlink_message *m) { + uint16_t type; + uint8_t cmd; + int r; + + assert(nl); + assert(m); + + r = sd_netlink_message_get_type(m, &type); + if (r < 0) + return r; + + if (m->protocol == NETLINK_GENERIC) { + r = sd_genl_message_get_command(nl, m, &cmd); + if (r < 0) + return r; + } else + cmd = 0; + + LIST_FOREACH(match_callbacks, c, nl->match_callbacks) { + sd_netlink_slot *slot; + bool found = false; + + if (c->type != type) + continue; + if (c->cmd != 0 && c->cmd != cmd) + continue; + + for (size_t i = 0; i < c->n_groups; i++) + if (c->groups[i] == m->multicast_group) { + found = true; + break; + } + + if (!found) + continue; + + slot = container_of(c, sd_netlink_slot, match_callback); + + r = c->callback(nl, m, slot->userdata); + if (r < 0) + log_debug_errno(r, "sd-netlink: match callback %s%s%sfailed: %m", + slot->description ? "'" : "", + strempty(slot->description), + slot->description ? "' " : ""); + if (r != 0) + break; + } + + return 1; +} + +static int process_running(sd_netlink *nl, sd_netlink_message **ret) { + _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL; + int r; + + assert(nl); + + r = process_timeout(nl); + if (r != 0) + goto null_message; + + r = dispatch_rqueue(nl, &m); + if (r < 0) + return r; + if (!m) + goto null_message; + + if (sd_netlink_message_is_broadcast(m)) + r = process_match(nl, m); + else + r = process_reply(nl, m); + if (r != 0) + goto null_message; + + if (ret) { + *ret = TAKE_PTR(m); + + return 1; + } + + return 1; + +null_message: + if (r >= 0 && ret) + *ret = NULL; + + return r; +} + +int sd_netlink_process(sd_netlink *nl, sd_netlink_message **ret) { + NETLINK_DONT_DESTROY(nl); + int r; + + assert_return(nl, -EINVAL); + assert_return(!netlink_pid_changed(nl), -ECHILD); + assert_return(!nl->processing, -EBUSY); + + nl->processing = true; + r = process_running(nl, ret); + nl->processing = false; + + return r; +} + +static usec_t timespan_to_timestamp(usec_t usec) { + static bool default_timeout_set = false; + static usec_t default_timeout; + int r; + + if (usec == 0) { + if (!default_timeout_set) { + const char *e; + + default_timeout_set = true; + default_timeout = NETLINK_DEFAULT_TIMEOUT_USEC; + + e = secure_getenv("SYSTEMD_NETLINK_DEFAULT_TIMEOUT"); + if (e) { + r = parse_sec(e, &default_timeout); + if (r < 0) + log_debug_errno(r, "sd-netlink: Failed to parse $SYSTEMD_NETLINK_DEFAULT_TIMEOUT environment variable, ignoring: %m"); + } + } + + usec = default_timeout; + } + + return usec_add(now(CLOCK_MONOTONIC), usec); +} + +static int netlink_poll(sd_netlink *nl, bool need_more, usec_t timeout_usec) { + usec_t m = USEC_INFINITY; + int r, e; + + assert(nl); + + e = sd_netlink_get_events(nl); + if (e < 0) + return e; + + if (need_more) + /* Caller wants more data, and doesn't care about + * what's been read or any other timeouts. */ + e |= POLLIN; + else { + usec_t until; + + /* Caller wants to process if there is something to + * process, but doesn't care otherwise */ + + r = sd_netlink_get_timeout(nl, &until); + if (r < 0) + return r; + + m = usec_sub_unsigned(until, now(CLOCK_MONOTONIC)); + } + + r = fd_wait_for_event(nl->fd, e, MIN(m, timeout_usec)); + if (r <= 0) + return r; + + return 1; +} + +int sd_netlink_wait(sd_netlink *nl, uint64_t timeout_usec) { + int r; + + assert_return(nl, -EINVAL); + assert_return(!netlink_pid_changed(nl), -ECHILD); + + if (ordered_set_size(nl->rqueue) > 0) + return 0; + + r = netlink_poll(nl, false, timeout_usec); + if (ERRNO_IS_NEG_TRANSIENT(r)) /* Convert EINTR to "something happened" and give user a chance to run some code before calling back into us */ + return 1; + return r; +} + +static int timeout_compare(const void *a, const void *b) { + const struct reply_callback *x = a, *y = b; + + return CMP(x->timeout, y->timeout); +} + +size_t netlink_get_reply_callback_count(sd_netlink *nl) { + assert(nl); + + return hashmap_size(nl->reply_callbacks); +} + +int sd_netlink_call_async( + sd_netlink *nl, + sd_netlink_slot **ret_slot, + sd_netlink_message *m, + sd_netlink_message_handler_t callback, + sd_netlink_destroy_t destroy_callback, + void *userdata, + uint64_t usec, + const char *description) { + + _cleanup_free_ sd_netlink_slot *slot = NULL; + int r, k; + + assert_return(nl, -EINVAL); + assert_return(m, -EINVAL); + assert_return(callback, -EINVAL); + assert_return(!netlink_pid_changed(nl), -ECHILD); + + if (hashmap_size(nl->reply_callbacks) >= REPLY_CALLBACKS_MAX) + return -EXFULL; + + r = hashmap_ensure_allocated(&nl->reply_callbacks, &trivial_hash_ops); + if (r < 0) + return r; + + if (usec != UINT64_MAX) { + r = prioq_ensure_allocated(&nl->reply_callbacks_prioq, timeout_compare); + if (r < 0) + return r; + } + + r = netlink_slot_allocate(nl, !ret_slot, NETLINK_REPLY_CALLBACK, sizeof(struct reply_callback), userdata, description, &slot); + if (r < 0) + return r; + + slot->reply_callback.callback = callback; + slot->reply_callback.timeout = timespan_to_timestamp(usec); + + k = sd_netlink_send(nl, m, &slot->reply_callback.serial); + if (k < 0) + return k; + + r = hashmap_put(nl->reply_callbacks, UINT32_TO_PTR(slot->reply_callback.serial), &slot->reply_callback); + if (r < 0) + return r; + + if (slot->reply_callback.timeout != USEC_INFINITY) { + r = prioq_put(nl->reply_callbacks_prioq, &slot->reply_callback, &slot->reply_callback.prioq_idx); + if (r < 0) { + (void) hashmap_remove(nl->reply_callbacks, UINT32_TO_PTR(slot->reply_callback.serial)); + return r; + } + } + + /* Set this at last. Otherwise, some failures in above would call destroy_callback but some would not. */ + slot->destroy_callback = destroy_callback; + + if (ret_slot) + *ret_slot = slot; + + TAKE_PTR(slot); + + return k; +} + +int sd_netlink_read( + sd_netlink *nl, + uint32_t serial, + uint64_t usec, + sd_netlink_message **ret) { + + usec_t timeout; + int r; + + assert_return(nl, -EINVAL); + assert_return(!netlink_pid_changed(nl), -ECHILD); + + timeout = timespan_to_timestamp(usec); + + for (;;) { + _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL; + usec_t left; + + m = hashmap_remove(nl->rqueue_by_serial, UINT32_TO_PTR(serial)); + if (m) { + uint16_t type; + + /* found a match, remove from rqueue and return it */ + sd_netlink_message_unref(ordered_set_remove(nl->rqueue, m)); + + r = sd_netlink_message_get_errno(m); + if (r < 0) + return r; + + r = sd_netlink_message_get_type(m, &type); + if (r < 0) + return r; + + if (type == NLMSG_DONE) { + if (ret) + *ret = NULL; + return 0; + } + + if (ret) + *ret = TAKE_PTR(m); + return 1; + } + + r = socket_read_message(nl); + if (r < 0) + return r; + if (r > 0) + /* received message, so try to process straight away */ + continue; + + if (timeout != USEC_INFINITY) { + usec_t n; + + n = now(CLOCK_MONOTONIC); + if (n >= timeout) + return -ETIMEDOUT; + + left = usec_sub_unsigned(timeout, n); + } else + left = USEC_INFINITY; + + r = netlink_poll(nl, true, left); + if (r < 0) + return r; + if (r == 0) + return -ETIMEDOUT; + } +} + +int sd_netlink_call( + sd_netlink *nl, + sd_netlink_message *message, + uint64_t usec, + sd_netlink_message **ret) { + + uint32_t serial; + int r; + + assert_return(nl, -EINVAL); + assert_return(!netlink_pid_changed(nl), -ECHILD); + assert_return(message, -EINVAL); + + r = sd_netlink_send(nl, message, &serial); + if (r < 0) + return r; + + return sd_netlink_read(nl, serial, usec, ret); +} + +int sd_netlink_get_events(sd_netlink *nl) { + assert_return(nl, -EINVAL); + assert_return(!netlink_pid_changed(nl), -ECHILD); + + return ordered_set_size(nl->rqueue) == 0 ? POLLIN : 0; +} + +int sd_netlink_get_timeout(sd_netlink *nl, uint64_t *timeout_usec) { + struct reply_callback *c; + + assert_return(nl, -EINVAL); + assert_return(timeout_usec, -EINVAL); + assert_return(!netlink_pid_changed(nl), -ECHILD); + + if (ordered_set_size(nl->rqueue) > 0) { + *timeout_usec = 0; + return 1; + } + + c = prioq_peek(nl->reply_callbacks_prioq); + if (!c) { + *timeout_usec = UINT64_MAX; + return 0; + } + + *timeout_usec = c->timeout; + return 1; +} + +static int io_callback(sd_event_source *s, int fd, uint32_t revents, void *userdata) { + sd_netlink *nl = ASSERT_PTR(userdata); + int r; + + r = sd_netlink_process(nl, NULL); + if (r < 0) + return r; + + return 1; +} + +static int time_callback(sd_event_source *s, uint64_t usec, void *userdata) { + sd_netlink *nl = ASSERT_PTR(userdata); + int r; + + r = sd_netlink_process(nl, NULL); + if (r < 0) + return r; + + return 1; +} + +static int prepare_callback(sd_event_source *s, void *userdata) { + sd_netlink *nl = ASSERT_PTR(userdata); + int r, enabled; + usec_t until; + + assert(s); + + r = sd_netlink_get_events(nl); + if (r < 0) + return r; + + r = sd_event_source_set_io_events(nl->io_event_source, r); + if (r < 0) + return r; + + enabled = sd_netlink_get_timeout(nl, &until); + if (enabled < 0) + return enabled; + if (enabled > 0) { + r = sd_event_source_set_time(nl->time_event_source, until); + if (r < 0) + return r; + } + + r = sd_event_source_set_enabled(nl->time_event_source, + enabled > 0 ? SD_EVENT_ONESHOT : SD_EVENT_OFF); + if (r < 0) + return r; + + return 1; +} + +int sd_netlink_attach_event(sd_netlink *nl, sd_event *event, int64_t priority) { + int r; + + assert_return(nl, -EINVAL); + assert_return(!nl->event, -EBUSY); + + assert(!nl->io_event_source); + assert(!nl->time_event_source); + + if (event) + nl->event = sd_event_ref(event); + else { + r = sd_event_default(&nl->event); + if (r < 0) + return r; + } + + r = sd_event_add_io(nl->event, &nl->io_event_source, nl->fd, 0, io_callback, nl); + if (r < 0) + goto fail; + + r = sd_event_source_set_priority(nl->io_event_source, priority); + if (r < 0) + goto fail; + + r = sd_event_source_set_description(nl->io_event_source, "netlink-receive-message"); + if (r < 0) + goto fail; + + r = sd_event_source_set_prepare(nl->io_event_source, prepare_callback); + if (r < 0) + goto fail; + + r = sd_event_add_time(nl->event, &nl->time_event_source, CLOCK_MONOTONIC, 0, 0, time_callback, nl); + if (r < 0) + goto fail; + + r = sd_event_source_set_priority(nl->time_event_source, priority); + if (r < 0) + goto fail; + + r = sd_event_source_set_description(nl->time_event_source, "netlink-timer"); + if (r < 0) + goto fail; + + return 0; + +fail: + sd_netlink_detach_event(nl); + return r; +} + +int sd_netlink_detach_event(sd_netlink *nl) { + assert_return(nl, -EINVAL); + assert_return(nl->event, -ENXIO); + + nl->io_event_source = sd_event_source_unref(nl->io_event_source); + + nl->time_event_source = sd_event_source_unref(nl->time_event_source); + + nl->event = sd_event_unref(nl->event); + + return 0; +} + +sd_event* sd_netlink_get_event(sd_netlink *nl) { + assert_return(nl, NULL); + + return nl->event; +} + +int netlink_add_match_internal( + sd_netlink *nl, + sd_netlink_slot **ret_slot, + const uint32_t *groups, + size_t n_groups, + uint16_t type, + uint8_t cmd, + sd_netlink_message_handler_t callback, + sd_netlink_destroy_t destroy_callback, + void *userdata, + const char *description) { + + _cleanup_free_ sd_netlink_slot *slot = NULL; + int r; + + assert(groups); + assert(n_groups > 0); + + for (size_t i = 0; i < n_groups; i++) { + r = socket_broadcast_group_ref(nl, groups[i]); + if (r < 0) + return r; + } + + r = netlink_slot_allocate(nl, !ret_slot, NETLINK_MATCH_CALLBACK, sizeof(struct match_callback), + userdata, description, &slot); + if (r < 0) + return r; + + slot->match_callback.groups = newdup(uint32_t, groups, n_groups); + if (!slot->match_callback.groups) + return -ENOMEM; + + slot->match_callback.n_groups = n_groups; + slot->match_callback.callback = callback; + slot->match_callback.type = type; + slot->match_callback.cmd = cmd; + + LIST_PREPEND(match_callbacks, nl->match_callbacks, &slot->match_callback); + + /* Set this at last. Otherwise, some failures in above call the destroy callback but some do not. */ + slot->destroy_callback = destroy_callback; + + if (ret_slot) + *ret_slot = slot; + + TAKE_PTR(slot); + return 0; +} + +int sd_netlink_add_match( + sd_netlink *rtnl, + sd_netlink_slot **ret_slot, + uint16_t type, + sd_netlink_message_handler_t callback, + sd_netlink_destroy_t destroy_callback, + void *userdata, + const char *description) { + + static const uint32_t + address_groups[] = { RTNLGRP_IPV4_IFADDR, RTNLGRP_IPV6_IFADDR, }, + link_groups[] = { RTNLGRP_LINK, }, + neighbor_groups[] = { RTNLGRP_NEIGH, }, + nexthop_groups[] = { RTNLGRP_NEXTHOP, }, + route_groups[] = { RTNLGRP_IPV4_ROUTE, RTNLGRP_IPV6_ROUTE, }, + rule_groups[] = { RTNLGRP_IPV4_RULE, RTNLGRP_IPV6_RULE, }, + tc_groups[] = { RTNLGRP_TC }; + const uint32_t *groups; + size_t n_groups; + + assert_return(rtnl, -EINVAL); + assert_return(callback, -EINVAL); + assert_return(!netlink_pid_changed(rtnl), -ECHILD); + + switch (type) { + case RTM_NEWLINK: + case RTM_DELLINK: + groups = link_groups; + n_groups = ELEMENTSOF(link_groups); + break; + case RTM_NEWADDR: + case RTM_DELADDR: + groups = address_groups; + n_groups = ELEMENTSOF(address_groups); + break; + case RTM_NEWNEIGH: + case RTM_DELNEIGH: + groups = neighbor_groups; + n_groups = ELEMENTSOF(neighbor_groups); + break; + case RTM_NEWROUTE: + case RTM_DELROUTE: + groups = route_groups; + n_groups = ELEMENTSOF(route_groups); + break; + case RTM_NEWRULE: + case RTM_DELRULE: + groups = rule_groups; + n_groups = ELEMENTSOF(rule_groups); + break; + case RTM_NEWNEXTHOP: + case RTM_DELNEXTHOP: + groups = nexthop_groups; + n_groups = ELEMENTSOF(nexthop_groups); + break; + case RTM_NEWQDISC: + case RTM_DELQDISC: + case RTM_NEWTCLASS: + case RTM_DELTCLASS: + groups = tc_groups; + n_groups = ELEMENTSOF(tc_groups); + break; + default: + return -EOPNOTSUPP; + } + + return netlink_add_match_internal(rtnl, ret_slot, groups, n_groups, type, 0, callback, + destroy_callback, userdata, description); +} + +int sd_netlink_attach_filter(sd_netlink *nl, size_t len, const struct sock_filter *filter) { + assert_return(nl, -EINVAL); + assert_return(len == 0 || filter, -EINVAL); + + if (setsockopt(nl->fd, SOL_SOCKET, + len == 0 ? SO_DETACH_FILTER : SO_ATTACH_FILTER, + &(struct sock_fprog) { + .len = len, + .filter = (struct sock_filter*) filter, + }, sizeof(struct sock_fprog)) < 0) + return -errno; + + return 0; +} diff --git a/src/libsystemd/sd-netlink/test-netlink.c b/src/libsystemd/sd-netlink/test-netlink.c new file mode 100644 index 0000000..13aedc4 --- /dev/null +++ b/src/libsystemd/sd-netlink/test-netlink.c @@ -0,0 +1,686 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <net/if.h> +#include <netinet/ether.h> +#include <netinet/in.h> +#include <linux/fou.h> +#include <linux/genetlink.h> +#include <linux/if_macsec.h> +#include <linux/l2tp.h> +#include <linux/nl80211.h> +#include <unistd.h> + +#include "sd-netlink.h" + +#include "alloc-util.h" +#include "ether-addr-util.h" +#include "macro.h" +#include "netlink-genl.h" +#include "netlink-internal.h" +#include "netlink-util.h" +#include "socket-util.h" +#include "stdio-util.h" +#include "string-util.h" +#include "strv.h" +#include "tests.h" + +TEST(message_newlink_bridge) { + _cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL; + _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *message = NULL; + uint32_t cost; + + assert_se(sd_netlink_open(&rtnl) >= 0); + + assert_se(sd_rtnl_message_new_link(rtnl, &message, RTM_NEWLINK, 1) >= 0); + assert_se(sd_rtnl_message_link_set_family(message, AF_BRIDGE) >= 0); + assert_se(sd_netlink_message_open_container(message, IFLA_PROTINFO) >= 0); + assert_se(sd_netlink_message_append_u32(message, IFLA_BRPORT_COST, 10) >= 0); + assert_se(sd_netlink_message_close_container(message) >= 0); + + assert_se(sd_netlink_message_rewind(message, rtnl) >= 0); + + assert_se(sd_netlink_message_enter_container(message, IFLA_PROTINFO) >= 0); + assert_se(sd_netlink_message_read_u32(message, IFLA_BRPORT_COST, &cost) >= 0); + assert_se(cost == 10); + assert_se(sd_netlink_message_exit_container(message) >= 0); +} + +TEST(message_getlink) { + _cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL; + _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *message = NULL, *reply = NULL; + int ifindex; + uint8_t u8_data; + uint16_t u16_data; + uint32_t u32_data; + const char *str_data; + struct ether_addr eth_data; + + assert_se(sd_netlink_open(&rtnl) >= 0); + ifindex = (int) if_nametoindex("lo"); + + /* we'd really like to test NEWLINK, but let's not mess with the running kernel */ + assert_se(sd_rtnl_message_new_link(rtnl, &message, RTM_GETLINK, ifindex) >= 0); + assert_se(sd_netlink_call(rtnl, message, 0, &reply) == 1); + + /* u8 */ + assert_se(sd_netlink_message_read_u8(reply, IFLA_CARRIER, &u8_data) >= 0); + assert_se(sd_netlink_message_read_u8(reply, IFLA_OPERSTATE, &u8_data) >= 0); + assert_se(sd_netlink_message_read_u8(reply, IFLA_LINKMODE, &u8_data) >= 0); + + /* u16 */ + assert_se(sd_netlink_message_get_type(reply, &u16_data) >= 0); + assert_se(u16_data == RTM_NEWLINK); + + /* u32 */ + assert_se(sd_netlink_message_read_u32(reply, IFLA_MTU, &u32_data) >= 0); + assert_se(sd_netlink_message_read_u32(reply, IFLA_GROUP, &u32_data) >= 0); + assert_se(sd_netlink_message_read_u32(reply, IFLA_TXQLEN, &u32_data) >= 0); + assert_se(sd_netlink_message_read_u32(reply, IFLA_NUM_TX_QUEUES, &u32_data) >= 0); + + /* string */ + assert_se(sd_netlink_message_read_string(reply, IFLA_IFNAME, &str_data) >= 0); + + /* ether_addr */ + assert_se(sd_netlink_message_read_ether_addr(reply, IFLA_ADDRESS, ð_data) >= 0); +} + +TEST(message_address) { + _cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL; + _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *message = NULL, *reply = NULL; + int ifindex; + struct in_addr in_data; + struct ifa_cacheinfo cache; + const char *label; + + assert_se(sd_netlink_open(&rtnl) >= 0); + ifindex = (int) if_nametoindex("lo"); + + assert_se(sd_rtnl_message_new_addr(rtnl, &message, RTM_GETADDR, ifindex, AF_INET) >= 0); + assert_se(sd_netlink_message_set_request_dump(message, true) >= 0); + assert_se(sd_netlink_call(rtnl, message, 0, &reply) == 1); + + assert_se(sd_netlink_message_read_in_addr(reply, IFA_LOCAL, &in_data) >= 0); + assert_se(sd_netlink_message_read_in_addr(reply, IFA_ADDRESS, &in_data) >= 0); + assert_se(sd_netlink_message_read_string(reply, IFA_LABEL, &label) >= 0); + assert_se(sd_netlink_message_read_cache_info(reply, IFA_CACHEINFO, &cache) == 0); +} + +TEST(message_route) { + _cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL; + _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *req = NULL; + struct in_addr addr, addr_data; + uint32_t index = 2, u32_data; + + assert_se(sd_netlink_open(&rtnl) >= 0); + + assert_se(sd_rtnl_message_new_route(rtnl, &req, RTM_NEWROUTE, AF_INET, RTPROT_STATIC) >= 0); + + addr.s_addr = htobe32(INADDR_LOOPBACK); + + assert_se(sd_netlink_message_append_in_addr(req, RTA_GATEWAY, &addr) >= 0); + assert_se(sd_netlink_message_append_u32(req, RTA_OIF, index) >= 0); + + assert_se(sd_netlink_message_rewind(req, rtnl) >= 0); + + assert_se(sd_netlink_message_read_in_addr(req, RTA_GATEWAY, &addr_data) >= 0); + assert_se(addr_data.s_addr == addr.s_addr); + + assert_se(sd_netlink_message_read_u32(req, RTA_OIF, &u32_data) >= 0); + assert_se(u32_data == index); + + assert_se((req = sd_netlink_message_unref(req)) == NULL); +} + +static int link_handler(sd_netlink *rtnl, sd_netlink_message *m, void *userdata) { + const char *data; + + assert_se(rtnl); + assert_se(m); + + assert_se(streq_ptr(userdata, "foo")); + + assert_se(sd_netlink_message_read_string(m, IFLA_IFNAME, &data) >= 0); + assert_se(streq(data, "lo")); + + log_info("%s: got link info about %s", __func__, data); + return 1; +} + +TEST(netlink_event_loop) { + _cleanup_(sd_event_unrefp) sd_event *event = NULL; + _cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL; + _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL; + _cleanup_free_ char *userdata = NULL; + int ifindex; + + assert_se(sd_netlink_open(&rtnl) >= 0); + ifindex = (int) if_nametoindex("lo"); + + assert_se(userdata = strdup("foo")); + + assert_se(sd_event_default(&event) >= 0); + assert_se(sd_netlink_attach_event(rtnl, event, 0) >= 0); + + assert_se(sd_rtnl_message_new_link(rtnl, &m, RTM_GETLINK, ifindex) >= 0); + assert_se(sd_netlink_call_async(rtnl, NULL, m, link_handler, NULL, userdata, 0, NULL) >= 0); + + assert_se(sd_event_run(event, 0) >= 0); + + assert_se(sd_netlink_detach_event(rtnl) >= 0); + assert_se((rtnl = sd_netlink_unref(rtnl)) == NULL); +} + +static void test_async_destroy(void *userdata) { +} + +TEST(netlink_call_async) { + _cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL; + _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL, *reply = NULL; + _cleanup_(sd_netlink_slot_unrefp) sd_netlink_slot *slot = NULL; + _cleanup_free_ char *userdata = NULL; + sd_netlink_destroy_t destroy_callback; + const char *description; + int ifindex; + + assert_se(sd_netlink_open(&rtnl) >= 0); + ifindex = (int) if_nametoindex("lo"); + + assert_se(userdata = strdup("foo")); + + assert_se(sd_rtnl_message_new_link(rtnl, &m, RTM_GETLINK, ifindex) >= 0); + assert_se(sd_netlink_call_async(rtnl, &slot, m, link_handler, test_async_destroy, userdata, 0, "hogehoge") >= 0); + + assert_se(sd_netlink_slot_get_netlink(slot) == rtnl); + + assert_se(sd_netlink_slot_get_userdata(slot) == userdata); + assert_se(sd_netlink_slot_set_userdata(slot, NULL) == userdata); + assert_se(sd_netlink_slot_get_userdata(slot) == NULL); + assert_se(sd_netlink_slot_set_userdata(slot, userdata) == NULL); + assert_se(sd_netlink_slot_get_userdata(slot) == userdata); + + assert_se(sd_netlink_slot_get_destroy_callback(slot, &destroy_callback) == 1); + assert_se(destroy_callback == test_async_destroy); + assert_se(sd_netlink_slot_set_destroy_callback(slot, NULL) >= 0); + assert_se(sd_netlink_slot_get_destroy_callback(slot, &destroy_callback) == 0); + assert_se(destroy_callback == NULL); + assert_se(sd_netlink_slot_set_destroy_callback(slot, test_async_destroy) >= 0); + assert_se(sd_netlink_slot_get_destroy_callback(slot, &destroy_callback) == 1); + assert_se(destroy_callback == test_async_destroy); + + assert_se(sd_netlink_slot_get_floating(slot) == 0); + assert_se(sd_netlink_slot_set_floating(slot, 1) == 1); + assert_se(sd_netlink_slot_get_floating(slot) == 1); + + assert_se(sd_netlink_slot_get_description(slot, &description) == 1); + assert_se(streq(description, "hogehoge")); + assert_se(sd_netlink_slot_set_description(slot, NULL) >= 0); + assert_se(sd_netlink_slot_get_description(slot, &description) == 0); + assert_se(description == NULL); + + assert_se(sd_netlink_wait(rtnl, 0) >= 0); + assert_se(sd_netlink_process(rtnl, &reply) >= 0); + + assert_se((rtnl = sd_netlink_unref(rtnl)) == NULL); +} + +struct test_async_object { + unsigned n_ref; + char *ifname; +}; + +static struct test_async_object *test_async_object_free(struct test_async_object *t) { + assert_se(t); + + free(t->ifname); + return mfree(t); +} + +DEFINE_PRIVATE_TRIVIAL_REF_UNREF_FUNC(struct test_async_object, test_async_object, test_async_object_free); +DEFINE_TRIVIAL_CLEANUP_FUNC(struct test_async_object *, test_async_object_unref); + +static int link_handler2(sd_netlink *rtnl, sd_netlink_message *m, void *userdata) { + struct test_async_object *t = userdata; + const char *data; + + assert_se(rtnl); + assert_se(m); + assert_se(userdata); + + log_info("%s: got link info about %s", __func__, t->ifname); + + assert_se(sd_netlink_message_read_string(m, IFLA_IFNAME, &data) >= 0); + assert_se(streq(data, "lo")); + + return 1; +} + +static void test_async_object_destroy(void *userdata) { + struct test_async_object *t = userdata; + + assert_se(userdata); + + log_info("%s: n_ref=%u", __func__, t->n_ref); + test_async_object_unref(t); +} + +TEST(async_destroy_callback) { + _cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL; + _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL, *reply = NULL; + _cleanup_(test_async_object_unrefp) struct test_async_object *t = NULL; + _cleanup_(sd_netlink_slot_unrefp) sd_netlink_slot *slot = NULL; + int ifindex; + + assert_se(sd_netlink_open(&rtnl) >= 0); + ifindex = (int) if_nametoindex("lo"); + + assert_se(t = new(struct test_async_object, 1)); + *t = (struct test_async_object) { + .n_ref = 1, + }; + assert_se(t->ifname = strdup("lo")); + + /* destroy callback is called after processing message */ + assert_se(sd_rtnl_message_new_link(rtnl, &m, RTM_GETLINK, ifindex) >= 0); + assert_se(sd_netlink_call_async(rtnl, NULL, m, link_handler2, test_async_object_destroy, t, 0, NULL) >= 0); + + assert_se(t->n_ref == 1); + assert_se(test_async_object_ref(t)); + assert_se(t->n_ref == 2); + + assert_se(sd_netlink_wait(rtnl, 0) >= 0); + assert_se(sd_netlink_process(rtnl, &reply) == 1); + assert_se(t->n_ref == 1); + + assert_se(!sd_netlink_message_unref(m)); + + /* destroy callback is called when asynchronous call is cancelled, that is, slot is freed. */ + assert_se(sd_rtnl_message_new_link(rtnl, &m, RTM_GETLINK, ifindex) >= 0); + assert_se(sd_netlink_call_async(rtnl, &slot, m, link_handler2, test_async_object_destroy, t, 0, NULL) >= 0); + + assert_se(t->n_ref == 1); + assert_se(test_async_object_ref(t)); + assert_se(t->n_ref == 2); + + assert_se(!(slot = sd_netlink_slot_unref(slot))); + assert_se(t->n_ref == 1); + + assert_se(!sd_netlink_message_unref(m)); + + /* destroy callback is also called by sd_netlink_unref() */ + assert_se(sd_rtnl_message_new_link(rtnl, &m, RTM_GETLINK, ifindex) >= 0); + assert_se(sd_netlink_call_async(rtnl, NULL, m, link_handler2, test_async_object_destroy, t, 0, NULL) >= 0); + + assert_se(t->n_ref == 1); + assert_se(test_async_object_ref(t)); + assert_se(t->n_ref == 2); + + assert_se((rtnl = sd_netlink_unref(rtnl)) == NULL); + assert_se(t->n_ref == 1); +} + +static int pipe_handler(sd_netlink *rtnl, sd_netlink_message *m, void *userdata) { + int *counter = userdata; + int r; + + (*counter)--; + + r = sd_netlink_message_get_errno(m); + + log_info_errno(r, "%d left in pipe. got reply: %m", *counter); + + assert_se(r >= 0); + + return 1; +} + +TEST(pipe) { + _cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL; + _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m1 = NULL, *m2 = NULL; + int ifindex, counter = 0; + + assert_se(sd_netlink_open(&rtnl) >= 0); + ifindex = (int) if_nametoindex("lo"); + + assert_se(sd_rtnl_message_new_link(rtnl, &m1, RTM_GETLINK, ifindex) >= 0); + assert_se(sd_rtnl_message_new_link(rtnl, &m2, RTM_GETLINK, ifindex) >= 0); + + counter++; + assert_se(sd_netlink_call_async(rtnl, NULL, m1, pipe_handler, NULL, &counter, 0, NULL) >= 0); + + counter++; + assert_se(sd_netlink_call_async(rtnl, NULL, m2, pipe_handler, NULL, &counter, 0, NULL) >= 0); + + while (counter > 0) { + assert_se(sd_netlink_wait(rtnl, 0) >= 0); + assert_se(sd_netlink_process(rtnl, NULL) >= 0); + } + + assert_se((rtnl = sd_netlink_unref(rtnl)) == NULL); +} + +TEST(message_container) { + _cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL; + _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL; + uint16_t u16_data; + uint32_t u32_data; + const char *string_data; + + assert_se(sd_netlink_open(&rtnl) >= 0); + + assert_se(sd_rtnl_message_new_link(rtnl, &m, RTM_NEWLINK, 0) >= 0); + + assert_se(sd_netlink_message_open_container(m, IFLA_LINKINFO) >= 0); + assert_se(sd_netlink_message_open_container_union(m, IFLA_INFO_DATA, "vlan") >= 0); + assert_se(sd_netlink_message_append_u16(m, IFLA_VLAN_ID, 100) >= 0); + assert_se(sd_netlink_message_close_container(m) >= 0); + assert_se(sd_netlink_message_close_container(m) >= 0); + + assert_se(sd_netlink_message_rewind(m, rtnl) >= 0); + + assert_se(sd_netlink_message_enter_container(m, IFLA_LINKINFO) >= 0); + assert_se(sd_netlink_message_read_string(m, IFLA_INFO_KIND, &string_data) >= 0); + assert_se(streq("vlan", string_data)); + + assert_se(sd_netlink_message_enter_container(m, IFLA_INFO_DATA) >= 0); + assert_se(sd_netlink_message_read_u16(m, IFLA_VLAN_ID, &u16_data) >= 0); + assert_se(sd_netlink_message_exit_container(m) >= 0); + + assert_se(sd_netlink_message_read_string(m, IFLA_INFO_KIND, &string_data) >= 0); + assert_se(streq("vlan", string_data)); + assert_se(sd_netlink_message_exit_container(m) >= 0); + + assert_se(sd_netlink_message_read_u32(m, IFLA_LINKINFO, &u32_data) < 0); +} + +TEST(sd_netlink_add_match) { + _cleanup_(sd_netlink_slot_unrefp) sd_netlink_slot *s1 = NULL, *s2 = NULL; + _cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL; + + assert_se(sd_netlink_open(&rtnl) >= 0); + + assert_se(sd_netlink_add_match(rtnl, &s1, RTM_NEWLINK, link_handler, NULL, NULL, NULL) >= 0); + assert_se(sd_netlink_add_match(rtnl, &s2, RTM_NEWLINK, link_handler, NULL, NULL, NULL) >= 0); + assert_se(sd_netlink_add_match(rtnl, NULL, RTM_NEWLINK, link_handler, NULL, NULL, NULL) >= 0); + + assert_se(!(s1 = sd_netlink_slot_unref(s1))); + assert_se(!(s2 = sd_netlink_slot_unref(s2))); + + assert_se((rtnl = sd_netlink_unref(rtnl)) == NULL); +} + +TEST(dump_addresses) { + _cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL; + _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *req = NULL, *reply = NULL; + + assert_se(sd_netlink_open(&rtnl) >= 0); + + assert_se(sd_rtnl_message_new_addr(rtnl, &req, RTM_GETADDR, 0, AF_UNSPEC) >= 0); + assert_se(sd_netlink_message_set_request_dump(req, true) >= 0); + assert_se(sd_netlink_call(rtnl, req, 0, &reply) >= 0); + + for (sd_netlink_message *m = reply; m; m = sd_netlink_message_next(m)) { + uint16_t type; + unsigned char scope, flags; + int family, ifindex; + + assert_se(sd_netlink_message_get_type(m, &type) >= 0); + assert_se(type == RTM_NEWADDR); + + assert_se(sd_rtnl_message_addr_get_ifindex(m, &ifindex) >= 0); + assert_se(sd_rtnl_message_addr_get_family(m, &family) >= 0); + assert_se(sd_rtnl_message_addr_get_scope(m, &scope) >= 0); + assert_se(sd_rtnl_message_addr_get_flags(m, &flags) >= 0); + + assert_se(ifindex > 0); + assert_se(IN_SET(family, AF_INET, AF_INET6)); + + log_info("got IPv%i address on ifindex %i", family == AF_INET ? 4 : 6, ifindex); + } +} + +TEST(sd_netlink_message_get_errno) { + _cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL; + _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL; + + assert_se(sd_netlink_open(&rtnl) >= 0); + + assert_se(message_new_synthetic_error(rtnl, -ETIMEDOUT, 1, &m) >= 0); + assert_se(sd_netlink_message_get_errno(m) == -ETIMEDOUT); +} + +TEST(message_array) { + _cleanup_(sd_netlink_unrefp) sd_netlink *genl = NULL; + _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL; + + assert_se(sd_genl_socket_open(&genl) >= 0); + assert_se(sd_genl_message_new(genl, CTRL_GENL_NAME, CTRL_CMD_GETFAMILY, &m) >= 0); + + assert_se(sd_netlink_message_open_container(m, CTRL_ATTR_MCAST_GROUPS) >= 0); + for (unsigned i = 0; i < 10; i++) { + char name[STRLEN("hoge") + DECIMAL_STR_MAX(uint32_t)]; + uint32_t id = i + 1000; + + xsprintf(name, "hoge%" PRIu32, id); + assert_se(sd_netlink_message_open_array(m, i + 1) >= 0); + assert_se(sd_netlink_message_append_u32(m, CTRL_ATTR_MCAST_GRP_ID, id) >= 0); + assert_se(sd_netlink_message_append_string(m, CTRL_ATTR_MCAST_GRP_NAME, name) >= 0); + assert_se(sd_netlink_message_close_container(m) >= 0); + } + assert_se(sd_netlink_message_close_container(m) >= 0); + + message_seal(m); + assert_se(sd_netlink_message_rewind(m, genl) >= 0); + + assert_se(sd_netlink_message_enter_container(m, CTRL_ATTR_MCAST_GROUPS) >= 0); + for (unsigned i = 0; i < 10; i++) { + char expected[STRLEN("hoge") + DECIMAL_STR_MAX(uint32_t)]; + const char *name; + uint32_t id; + + assert_se(sd_netlink_message_enter_array(m, i + 1) >= 0); + assert_se(sd_netlink_message_read_u32(m, CTRL_ATTR_MCAST_GRP_ID, &id) >= 0); + assert_se(sd_netlink_message_read_string(m, CTRL_ATTR_MCAST_GRP_NAME, &name) >= 0); + assert_se(sd_netlink_message_exit_container(m) >= 0); + + assert_se(id == i + 1000); + xsprintf(expected, "hoge%" PRIu32, id); + assert_se(streq(name, expected)); + } + assert_se(sd_netlink_message_exit_container(m) >= 0); +} + +TEST(message_strv) { + _cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL; + _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL; + _cleanup_strv_free_ char **names_in = NULL, **names_out; + const char *p; + + assert_se(sd_netlink_open(&rtnl) >= 0); + + assert_se(sd_rtnl_message_new_link(rtnl, &m, RTM_NEWLINKPROP, 1) >= 0); + + for (unsigned i = 0; i < 10; i++) { + char name[STRLEN("hoge") + DECIMAL_STR_MAX(uint32_t)]; + + xsprintf(name, "hoge%" PRIu32, i + 1000); + assert_se(strv_extend(&names_in, name) >= 0); + } + + assert_se(sd_netlink_message_open_container(m, IFLA_PROP_LIST) >= 0); + assert_se(sd_netlink_message_append_strv(m, IFLA_ALT_IFNAME, (const char**) names_in) >= 0); + assert_se(sd_netlink_message_close_container(m) >= 0); + + message_seal(m); + assert_se(sd_netlink_message_rewind(m, rtnl) >= 0); + + assert_se(sd_netlink_message_read_strv(m, IFLA_PROP_LIST, IFLA_ALT_IFNAME, &names_out) >= 0); + assert_se(strv_equal(names_in, names_out)); + + assert_se(sd_netlink_message_enter_container(m, IFLA_PROP_LIST) >= 0); + assert_se(sd_netlink_message_read_string(m, IFLA_ALT_IFNAME, &p) >= 0); + assert_se(streq(p, "hoge1009")); + assert_se(sd_netlink_message_exit_container(m) >= 0); +} + +static int genl_ctrl_match_callback(sd_netlink *genl, sd_netlink_message *m, void *userdata) { + const char *name; + uint16_t id; + uint8_t cmd; + + assert_se(genl); + assert_se(m); + + assert_se(sd_genl_message_get_family_name(genl, m, &name) >= 0); + assert_se(streq(name, CTRL_GENL_NAME)); + + assert_se(sd_genl_message_get_command(genl, m, &cmd) >= 0); + + switch (cmd) { + case CTRL_CMD_NEWFAMILY: + case CTRL_CMD_DELFAMILY: + assert_se(sd_netlink_message_read_string(m, CTRL_ATTR_FAMILY_NAME, &name) >= 0); + assert_se(sd_netlink_message_read_u16(m, CTRL_ATTR_FAMILY_ID, &id) >= 0); + log_debug("%s: %s (id=%"PRIu16") family is %s.", + __func__, name, id, cmd == CTRL_CMD_NEWFAMILY ? "added" : "removed"); + break; + case CTRL_CMD_NEWMCAST_GRP: + case CTRL_CMD_DELMCAST_GRP: + assert_se(sd_netlink_message_read_string(m, CTRL_ATTR_FAMILY_NAME, &name) >= 0); + assert_se(sd_netlink_message_read_u16(m, CTRL_ATTR_FAMILY_ID, &id) >= 0); + log_debug("%s: multicast group for %s (id=%"PRIu16") family is %s.", + __func__, name, id, cmd == CTRL_CMD_NEWMCAST_GRP ? "added" : "removed"); + break; + default: + log_debug("%s: received nlctrl message with unknown command '%"PRIu8"'.", __func__, cmd); + } + + return 0; +} + +TEST(genl) { + _cleanup_(sd_event_unrefp) sd_event *event = NULL; + _cleanup_(sd_netlink_unrefp) sd_netlink *genl = NULL; + _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL; + const char *name; + uint8_t cmd; + int r; + + assert_se(sd_genl_socket_open(&genl) >= 0); + assert_se(sd_event_default(&event) >= 0); + assert_se(sd_netlink_attach_event(genl, event, 0) >= 0); + + assert_se(sd_genl_message_new(genl, CTRL_GENL_NAME, CTRL_CMD_GETFAMILY, &m) >= 0); + assert_se(sd_genl_message_get_family_name(genl, m, &name) >= 0); + assert_se(streq(name, CTRL_GENL_NAME)); + assert_se(sd_genl_message_get_command(genl, m, &cmd) >= 0); + assert_se(cmd == CTRL_CMD_GETFAMILY); + + assert_se(sd_genl_add_match(genl, NULL, CTRL_GENL_NAME, "notify", 0, genl_ctrl_match_callback, NULL, NULL, "genl-ctrl-notify") >= 0); + + m = sd_netlink_message_unref(m); + assert_se(sd_genl_message_new(genl, "should-not-exist", CTRL_CMD_GETFAMILY, &m) < 0); + assert_se(sd_genl_message_new(genl, "should-not-exist", CTRL_CMD_GETFAMILY, &m) == -EOPNOTSUPP); + + /* These families may not be supported by kernel. Hence, ignore results. */ + (void) sd_genl_message_new(genl, FOU_GENL_NAME, 0, &m); + m = sd_netlink_message_unref(m); + (void) sd_genl_message_new(genl, L2TP_GENL_NAME, 0, &m); + m = sd_netlink_message_unref(m); + (void) sd_genl_message_new(genl, MACSEC_GENL_NAME, 0, &m); + m = sd_netlink_message_unref(m); + (void) sd_genl_message_new(genl, NL80211_GENL_NAME, 0, &m); + m = sd_netlink_message_unref(m); + (void) sd_genl_message_new(genl, NETLBL_NLTYPE_UNLABELED_NAME, 0, &m); + + for (;;) { + r = sd_event_run(event, 500 * USEC_PER_MSEC); + assert_se(r >= 0); + if (r == 0) + return; + } +} + +static void remove_dummy_interfacep(int *ifindex) { + _cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL; + _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *message = NULL; + + if (!ifindex || *ifindex <= 0) + return; + + assert_se(sd_netlink_open(&rtnl) >= 0); + + assert_se(sd_rtnl_message_new_link(rtnl, &message, RTM_DELLINK, *ifindex) >= 0); + assert_se(sd_netlink_call(rtnl, message, 0, NULL) == 1); +} + +TEST(rtnl_set_link_name) { + _cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL; + _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *message = NULL, *reply = NULL; + _cleanup_(remove_dummy_interfacep) int ifindex = 0; + _cleanup_strv_free_ char **alternative_names = NULL; + int r; + + if (geteuid() != 0) + return (void) log_tests_skipped("not root"); + + assert_se(sd_netlink_open(&rtnl) >= 0); + + assert_se(sd_rtnl_message_new_link(rtnl, &message, RTM_NEWLINK, 0) >= 0); + assert_se(sd_netlink_message_append_string(message, IFLA_IFNAME, "test-netlink") >= 0); + assert_se(sd_netlink_message_open_container(message, IFLA_LINKINFO) >= 0); + assert_se(sd_netlink_message_append_string(message, IFLA_INFO_KIND, "dummy") >= 0); + r = sd_netlink_call(rtnl, message, 0, &reply); + if (r == -EPERM) + return (void) log_tests_skipped("missing required capabilities"); + if (r == -EOPNOTSUPP) + return (void) log_tests_skipped("dummy network interface is not supported"); + assert_se(r >= 0); + + message = sd_netlink_message_unref(message); + reply = sd_netlink_message_unref(reply); + + assert_se(sd_rtnl_message_new_link(rtnl, &message, RTM_GETLINK, 0) >= 0); + assert_se(sd_netlink_message_append_string(message, IFLA_IFNAME, "test-netlink") >= 0); + assert_se(sd_netlink_call(rtnl, message, 0, &reply) == 1); + + assert_se(sd_rtnl_message_link_get_ifindex(reply, &ifindex) >= 0); + assert_se(ifindex > 0); + + /* Test that the new name (which is currently an alternative name) is + * restored as an alternative name on error. Create an error by using + * an invalid device name, namely one that exceeds IFNAMSIZ + * (alternative names can exceed IFNAMSIZ, but not regular names). */ + r = rtnl_set_link_alternative_names(&rtnl, ifindex, STRV_MAKE("testlongalternativename", "test-shortname")); + if (r == -EPERM) + return (void) log_tests_skipped("missing required capabilities"); + if (r == -EOPNOTSUPP) + return (void) log_tests_skipped("alternative name is not supported"); + assert_se(r >= 0); + + assert_se(rtnl_get_link_alternative_names(&rtnl, ifindex, &alternative_names) >= 0); + assert_se(strv_contains(alternative_names, "testlongalternativename")); + assert_se(strv_contains(alternative_names, "test-shortname")); + + assert_se(rtnl_set_link_name(&rtnl, ifindex, "testlongalternativename", NULL) == -EINVAL); + assert_se(rtnl_set_link_name(&rtnl, ifindex, "test-shortname", STRV_MAKE("testlongalternativename", "test-shortname", "test-additional-name")) >= 0); + + alternative_names = strv_free(alternative_names); + assert_se(rtnl_get_link_alternative_names(&rtnl, ifindex, &alternative_names) >= 0); + assert_se(strv_contains(alternative_names, "testlongalternativename")); + assert_se(strv_contains(alternative_names, "test-additional-name")); + assert_se(!strv_contains(alternative_names, "test-shortname")); + + assert_se(rtnl_delete_link_alternative_names(&rtnl, ifindex, STRV_MAKE("testlongalternativename")) >= 0); + + alternative_names = strv_free(alternative_names); + assert_se(rtnl_get_link_alternative_names(&rtnl, ifindex, &alternative_names) >= 0); + assert_se(!strv_contains(alternative_names, "testlongalternativename")); + assert_se(strv_contains(alternative_names, "test-additional-name")); + assert_se(!strv_contains(alternative_names, "test-shortname")); + + _cleanup_free_ char *resolved = NULL; + assert_se(rtnl_resolve_link_alternative_name(&rtnl, "test-additional-name", &resolved) == ifindex); + assert_se(streq_ptr(resolved, "test-shortname")); +} + +DEFINE_TEST_MAIN(LOG_DEBUG); diff --git a/src/libsystemd/sd-network/network-util.c b/src/libsystemd/sd-network/network-util.c new file mode 100644 index 0000000..2059567 --- /dev/null +++ b/src/libsystemd/sd-network/network-util.c @@ -0,0 +1,157 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include "sd-network.h" + +#include "alloc-util.h" +#include "network-util.h" +#include "string-table.h" +#include "strv.h" + +bool network_is_online(void) { + _cleanup_free_ char *online_state = NULL; + LinkOnlineState state; + int r; + + r = sd_network_get_online_state(&online_state); + if (r < 0) + state = _LINK_ONLINE_STATE_INVALID; + else + state = link_online_state_from_string(online_state); + + if (state >= LINK_ONLINE_STATE_PARTIAL) + return true; + else if (state < 0) { + _cleanup_free_ char *carrier_state = NULL, *addr_state = NULL; + + r = sd_network_get_carrier_state(&carrier_state); + if (r < 0) /* if we don't know anything, we consider the system online */ + return true; + + r = sd_network_get_address_state(&addr_state); + if (r < 0) /* if we don't know anything, we consider the system online */ + return true; + + /* we don't know the online state for certain, so make an educated guess */ + if (STR_IN_SET(carrier_state, "degraded-carrier", "carrier") && + STR_IN_SET(addr_state, "routable", "degraded")) + return true; + } + + return false; +} + +static const char* const link_operstate_table[_LINK_OPERSTATE_MAX] = { + [LINK_OPERSTATE_MISSING] = "missing", + [LINK_OPERSTATE_OFF] = "off", + [LINK_OPERSTATE_NO_CARRIER] = "no-carrier", + [LINK_OPERSTATE_DORMANT] = "dormant", + [LINK_OPERSTATE_DEGRADED_CARRIER] = "degraded-carrier", + [LINK_OPERSTATE_CARRIER] = "carrier", + [LINK_OPERSTATE_DEGRADED] = "degraded", + [LINK_OPERSTATE_ENSLAVED] = "enslaved", + [LINK_OPERSTATE_ROUTABLE] = "routable", +}; + +DEFINE_STRING_TABLE_LOOKUP(link_operstate, LinkOperationalState); + +static const char* const link_carrier_state_table[_LINK_CARRIER_STATE_MAX] = { + [LINK_CARRIER_STATE_OFF] = "off", + [LINK_CARRIER_STATE_NO_CARRIER] = "no-carrier", + [LINK_CARRIER_STATE_DORMANT] = "dormant", + [LINK_CARRIER_STATE_DEGRADED_CARRIER] = "degraded-carrier", + [LINK_CARRIER_STATE_CARRIER] = "carrier", + [LINK_CARRIER_STATE_ENSLAVED] = "enslaved", +}; + +DEFINE_STRING_TABLE_LOOKUP(link_carrier_state, LinkCarrierState); + +static const char* const link_required_address_family_table[_ADDRESS_FAMILY_MAX] = { + [ADDRESS_FAMILY_NO] = "any", + [ADDRESS_FAMILY_IPV4] = "ipv4", + [ADDRESS_FAMILY_IPV6] = "ipv6", + [ADDRESS_FAMILY_YES] = "both", +}; + +DEFINE_STRING_TABLE_LOOKUP(link_required_address_family, AddressFamily); + +static const char* const link_address_state_table[_LINK_ADDRESS_STATE_MAX] = { + [LINK_ADDRESS_STATE_OFF] = "off", + [LINK_ADDRESS_STATE_DEGRADED] = "degraded", + [LINK_ADDRESS_STATE_ROUTABLE] = "routable", +}; + +DEFINE_STRING_TABLE_LOOKUP(link_address_state, LinkAddressState); + +static const char *const link_online_state_table[_LINK_ONLINE_STATE_MAX] = { + [LINK_ONLINE_STATE_OFFLINE] = "offline", + [LINK_ONLINE_STATE_PARTIAL] = "partial", + [LINK_ONLINE_STATE_ONLINE] = "online", +}; + +DEFINE_STRING_TABLE_LOOKUP(link_online_state, LinkOnlineState); + +int parse_operational_state_range(const char *str, LinkOperationalStateRange *out) { + LinkOperationalStateRange range = { _LINK_OPERSTATE_INVALID, _LINK_OPERSTATE_INVALID }; + _cleanup_free_ const char *min = NULL; + const char *p; + + assert(str); + assert(out); + + p = strchr(str, ':'); + if (p) { + min = strndup(str, p - str); + + if (!isempty(p + 1)) { + range.max = link_operstate_from_string(p + 1); + if (range.max < 0) + return -EINVAL; + } + } else + min = strdup(str); + + if (!min) + return -ENOMEM; + + if (!isempty(min)) { + range.min = link_operstate_from_string(min); + if (range.min < 0) + return -EINVAL; + } + + /* Fail on empty strings. */ + if (range.min == _LINK_OPERSTATE_INVALID && range.max == _LINK_OPERSTATE_INVALID) + return -EINVAL; + + if (range.min == _LINK_OPERSTATE_INVALID) + range.min = LINK_OPERSTATE_MISSING; + if (range.max == _LINK_OPERSTATE_INVALID) + range.max = LINK_OPERSTATE_ROUTABLE; + + if (range.min > range.max) + return -EINVAL; + + *out = range; + + return 0; +} + +int network_link_get_operational_state(int ifindex, LinkOperationalState *ret) { + _cleanup_free_ char *str = NULL; + LinkOperationalState s; + int r; + + assert(ifindex > 0); + assert(ret); + + r = sd_network_link_get_operational_state(ifindex, &str); + if (r < 0) + return r; + + s = link_operstate_from_string(str); + if (s < 0) + return s; + + *ret = s; + return 0; +} diff --git a/src/libsystemd/sd-network/network-util.h b/src/libsystemd/sd-network/network-util.h new file mode 100644 index 0000000..c47e271 --- /dev/null +++ b/src/libsystemd/sd-network/network-util.h @@ -0,0 +1,86 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include <errno.h> +#include <stdbool.h> + +#include "macro.h" + +bool network_is_online(void); + +typedef enum AddressFamily { + /* This is a bitmask, though it usually doesn't feel that way! */ + ADDRESS_FAMILY_NO = 0, + ADDRESS_FAMILY_IPV4 = 1 << 0, + ADDRESS_FAMILY_IPV6 = 1 << 1, + ADDRESS_FAMILY_YES = ADDRESS_FAMILY_IPV4 | ADDRESS_FAMILY_IPV6, + _ADDRESS_FAMILY_MAX, + _ADDRESS_FAMILY_INVALID = -EINVAL, +} AddressFamily; + +typedef enum LinkOperationalState { + LINK_OPERSTATE_MISSING, + LINK_OPERSTATE_OFF, + LINK_OPERSTATE_NO_CARRIER, + LINK_OPERSTATE_DORMANT, + LINK_OPERSTATE_DEGRADED_CARRIER, + LINK_OPERSTATE_CARRIER, + LINK_OPERSTATE_DEGRADED, + LINK_OPERSTATE_ENSLAVED, + LINK_OPERSTATE_ROUTABLE, + _LINK_OPERSTATE_MAX, + _LINK_OPERSTATE_INVALID = -EINVAL, +} LinkOperationalState; + +typedef enum LinkCarrierState { + LINK_CARRIER_STATE_OFF = LINK_OPERSTATE_OFF, + LINK_CARRIER_STATE_NO_CARRIER = LINK_OPERSTATE_NO_CARRIER, + LINK_CARRIER_STATE_DORMANT = LINK_OPERSTATE_DORMANT, + LINK_CARRIER_STATE_DEGRADED_CARRIER = LINK_OPERSTATE_DEGRADED_CARRIER, + LINK_CARRIER_STATE_CARRIER = LINK_OPERSTATE_CARRIER, + LINK_CARRIER_STATE_ENSLAVED = LINK_OPERSTATE_ENSLAVED, + _LINK_CARRIER_STATE_MAX, + _LINK_CARRIER_STATE_INVALID = -EINVAL, +} LinkCarrierState; + +typedef enum LinkAddressState { + LINK_ADDRESS_STATE_OFF, + LINK_ADDRESS_STATE_DEGRADED, + LINK_ADDRESS_STATE_ROUTABLE, + _LINK_ADDRESS_STATE_MAX, + _LINK_ADDRESS_STATE_INVALID = -EINVAL, +} LinkAddressState; + +typedef enum LinkOnlineState { + LINK_ONLINE_STATE_OFFLINE, + LINK_ONLINE_STATE_PARTIAL, + LINK_ONLINE_STATE_ONLINE, + _LINK_ONLINE_STATE_MAX, + _LINK_ONLINE_STATE_INVALID = -EINVAL, +} LinkOnlineState; + +const char* link_operstate_to_string(LinkOperationalState s) _const_; +LinkOperationalState link_operstate_from_string(const char *s) _pure_; + +const char* link_carrier_state_to_string(LinkCarrierState s) _const_; +LinkCarrierState link_carrier_state_from_string(const char *s) _pure_; + +const char* link_required_address_family_to_string(AddressFamily s) _const_; +AddressFamily link_required_address_family_from_string(const char *s) _pure_; + +const char* link_address_state_to_string(LinkAddressState s) _const_; +LinkAddressState link_address_state_from_string(const char *s) _pure_; + +const char* link_online_state_to_string(LinkOnlineState s) _const_; +LinkOnlineState link_online_state_from_string(const char *s) _pure_; + +typedef struct LinkOperationalStateRange { + LinkOperationalState min; + LinkOperationalState max; +} LinkOperationalStateRange; + +#define LINK_OPERSTATE_RANGE_DEFAULT (LinkOperationalStateRange) { LINK_OPERSTATE_DEGRADED, \ + LINK_OPERSTATE_ROUTABLE } + +int parse_operational_state_range(const char *str, LinkOperationalStateRange *out); +int network_link_get_operational_state(int ifindex, LinkOperationalState *ret); diff --git a/src/libsystemd/sd-network/sd-network.c b/src/libsystemd/sd-network/sd-network.c new file mode 100644 index 0000000..cf3c400 --- /dev/null +++ b/src/libsystemd/sd-network/sd-network.c @@ -0,0 +1,462 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <errno.h> +#include <poll.h> +#include <sys/inotify.h> + +#include "sd-network.h" + +#include "alloc-util.h" +#include "env-file.h" +#include "fd-util.h" +#include "fs-util.h" +#include "inotify-util.h" +#include "macro.h" +#include "parse-util.h" +#include "stdio-util.h" +#include "string-util.h" +#include "strv.h" + +static int network_get_string(const char *field, char **ret) { + _cleanup_free_ char *s = NULL; + int r; + + assert_return(ret, -EINVAL); + + r = parse_env_file(NULL, "/run/systemd/netif/state", field, &s); + if (r < 0) + return r; + if (isempty(s)) + return -ENODATA; + + *ret = TAKE_PTR(s); + return 0; +} + +int sd_network_get_operational_state(char **ret) { + return network_get_string("OPER_STATE", ret); +} + +int sd_network_get_carrier_state(char **ret) { + return network_get_string("CARRIER_STATE", ret); +} + +int sd_network_get_address_state(char **ret) { + return network_get_string("ADDRESS_STATE", ret); +} + +int sd_network_get_ipv4_address_state(char **ret) { + return network_get_string("IPV4_ADDRESS_STATE", ret); +} + +int sd_network_get_ipv6_address_state(char **ret) { + return network_get_string("IPV6_ADDRESS_STATE", ret); +} + +int sd_network_get_online_state(char **ret) { + return network_get_string("ONLINE_STATE", ret); +} + +static int network_get_strv(const char *key, char ***ret) { + _cleanup_strv_free_ char **a = NULL; + _cleanup_free_ char *s = NULL; + int r; + + assert_return(ret, -EINVAL); + + r = parse_env_file(NULL, "/run/systemd/netif/state", key, &s); + if (r < 0) + return r; + if (isempty(s)) + return -ENODATA; + + a = strv_split(s, NULL); + if (!a) + return -ENOMEM; + + strv_uniq(a); + r = (int) strv_length(a); + + *ret = TAKE_PTR(a); + return r; +} + +int sd_network_get_dns(char ***ret) { + return network_get_strv("DNS", ret); +} + +int sd_network_get_ntp(char ***ret) { + return network_get_strv("NTP", ret); +} + +int sd_network_get_search_domains(char ***ret) { + return network_get_strv("DOMAINS", ret); +} + +int sd_network_get_route_domains(char ***ret) { + return network_get_strv("ROUTE_DOMAINS", ret); +} + +static int network_link_get_string(int ifindex, const char *field, char **ret) { + char path[STRLEN("/run/systemd/netif/links/") + DECIMAL_STR_MAX(ifindex)]; + _cleanup_free_ char *s = NULL; + int r; + + assert_return(ifindex > 0, -EINVAL); + assert_return(ret, -EINVAL); + + xsprintf(path, "/run/systemd/netif/links/%i", ifindex); + + r = parse_env_file(NULL, path, field, &s); + if (r < 0) + return r; + if (isempty(s)) + return -ENODATA; + + *ret = TAKE_PTR(s); + return 0; +} + +static int network_link_get_boolean(int ifindex, const char *key) { + _cleanup_free_ char *s = NULL; + int r; + + r = network_link_get_string(ifindex, key, &s); + if (r < 0) + return r; + + return parse_boolean(s); +} + +static int network_link_get_strv(int ifindex, const char *key, char ***ret) { + _cleanup_strv_free_ char **a = NULL; + _cleanup_free_ char *s = NULL; + int r; + + assert_return(ifindex > 0, -EINVAL); + assert_return(ret, -EINVAL); + + r = network_link_get_string(ifindex, key, &s); + if (r < 0) + return r; + + a = strv_split(s, NULL); + if (!a) + return -ENOMEM; + + strv_uniq(a); + r = (int) strv_length(a); + + *ret = TAKE_PTR(a); + return r; +} + +int sd_network_link_get_setup_state(int ifindex, char **ret) { + return network_link_get_string(ifindex, "ADMIN_STATE", ret); +} + +int sd_network_link_get_network_file(int ifindex, char **ret) { + return network_link_get_string(ifindex, "NETWORK_FILE", ret); +} + +int sd_network_link_get_network_file_dropins(int ifindex, char ***ret) { + _cleanup_free_ char **sv = NULL, *joined = NULL; + int r; + + assert_return(ifindex > 0, -EINVAL); + assert_return(ret, -EINVAL); + + r = network_link_get_string(ifindex, "NETWORK_FILE_DROPINS", &joined); + if (r < 0) + return r; + + r = strv_split_full(&sv, joined, ":", EXTRACT_CUNESCAPE); + if (r < 0) + return r; + + *ret = TAKE_PTR(sv); + return 0; +} + +int sd_network_link_get_operational_state(int ifindex, char **ret) { + return network_link_get_string(ifindex, "OPER_STATE", ret); +} + +int sd_network_link_get_required_family_for_online(int ifindex, char **ret) { + return network_link_get_string(ifindex, "REQUIRED_FAMILY_FOR_ONLINE", ret); +} + +int sd_network_link_get_carrier_state(int ifindex, char **ret) { + return network_link_get_string(ifindex, "CARRIER_STATE", ret); +} + +int sd_network_link_get_address_state(int ifindex, char **ret) { + return network_link_get_string(ifindex, "ADDRESS_STATE", ret); +} + +int sd_network_link_get_ipv4_address_state(int ifindex, char **ret) { + return network_link_get_string(ifindex, "IPV4_ADDRESS_STATE", ret); +} + +int sd_network_link_get_ipv6_address_state(int ifindex, char **ret) { + return network_link_get_string(ifindex, "IPV6_ADDRESS_STATE", ret); +} + +int sd_network_link_get_online_state(int ifindex, char **ret) { + return network_link_get_string(ifindex, "ONLINE_STATE", ret); +} + +int sd_network_link_get_dhcp6_client_iaid_string(int ifindex, char **ret) { + return network_link_get_string(ifindex, "DHCP6_CLIENT_IAID", ret); +} + +int sd_network_link_get_dhcp6_client_duid_string(int ifindex, char **ret) { + return network_link_get_string(ifindex, "DHCP6_CLIENT_DUID", ret); +} + +int sd_network_link_get_required_for_online(int ifindex) { + return network_link_get_boolean(ifindex, "REQUIRED_FOR_ONLINE"); +} + +int sd_network_link_get_required_operstate_for_online(int ifindex, char **ret) { + return network_link_get_string(ifindex, "REQUIRED_OPER_STATE_FOR_ONLINE", ret); +} + +int sd_network_link_get_activation_policy(int ifindex, char **ret) { + return network_link_get_string(ifindex, "ACTIVATION_POLICY", ret); +} + +int sd_network_link_get_llmnr(int ifindex, char **ret) { + return network_link_get_string(ifindex, "LLMNR", ret); +} + +int sd_network_link_get_mdns(int ifindex, char **ret) { + return network_link_get_string(ifindex, "MDNS", ret); +} + +int sd_network_link_get_dns_over_tls(int ifindex, char **ret) { + return network_link_get_string(ifindex, "DNS_OVER_TLS", ret); +} + +int sd_network_link_get_dnssec(int ifindex, char **ret) { + return network_link_get_string(ifindex, "DNSSEC", ret); +} + +int sd_network_link_get_dnssec_negative_trust_anchors(int ifindex, char ***ret) { + return network_link_get_strv(ifindex, "DNSSEC_NTA", ret); +} + +int sd_network_link_get_dns(int ifindex, char ***ret) { + return network_link_get_strv(ifindex, "DNS", ret); +} + +int sd_network_link_get_ntp(int ifindex, char ***ret) { + return network_link_get_strv(ifindex, "NTP", ret); +} + +int sd_network_link_get_sip(int ifindex, char ***ret) { + return network_link_get_strv(ifindex, "SIP", ret); +} + +int sd_network_link_get_captive_portal(int ifindex, char **ret) { + return network_link_get_string(ifindex, "CAPTIVE_PORTAL", ret); +} + +int sd_network_link_get_search_domains(int ifindex, char ***ret) { + return network_link_get_strv(ifindex, "DOMAINS", ret); +} + +int sd_network_link_get_route_domains(int ifindex, char ***ret) { + return network_link_get_strv(ifindex, "ROUTE_DOMAINS", ret); +} + +int sd_network_link_get_dns_default_route(int ifindex) { + return network_link_get_boolean(ifindex, "DNS_DEFAULT_ROUTE"); +} + +static int network_link_get_ifindexes(int ifindex, const char *key, int **ret) { + _cleanup_free_ int *ifis = NULL; + _cleanup_free_ char *s = NULL; + size_t c = 0; + int r; + + assert_return(ifindex > 0, -EINVAL); + assert_return(ret, -EINVAL); + + r = network_link_get_string(ifindex, key, &s); + if (r < 0) + return r; + + for (const char *x = s;;) { + _cleanup_free_ char *word = NULL; + + r = extract_first_word(&x, &word, NULL, 0); + if (r < 0) + return r; + if (r == 0) + break; + + if (!GREEDY_REALLOC(ifis, c + 2)) + return -ENOMEM; + + r = ifis[c++] = parse_ifindex(word); + if (r < 0) + return r; + } + + if (ifis) + ifis[c] = 0; /* Let's add a 0 ifindex to the end, to be nice */ + + *ret = TAKE_PTR(ifis); + return c; +} + +int sd_network_link_get_carrier_bound_to(int ifindex, int **ret) { + return network_link_get_ifindexes(ifindex, "CARRIER_BOUND_TO", ret); +} + +int sd_network_link_get_carrier_bound_by(int ifindex, int **ret) { + return network_link_get_ifindexes(ifindex, "CARRIER_BOUND_BY", ret); +} + +int sd_network_link_get_stat(int ifindex, struct stat *ret) { + char path[STRLEN("/run/systemd/netif/links/") + DECIMAL_STR_MAX(ifindex)]; + struct stat st; + + assert_return(ifindex > 0, -EINVAL); + + xsprintf(path, "/run/systemd/netif/links/%i", ifindex); + + if (stat(path, &st) < 0) + return -errno; + + if (ret) + *ret = st; + + return 0; +} + +static int MONITOR_TO_FD(sd_network_monitor *m) { + return (int) (unsigned long) m - 1; +} + +static sd_network_monitor* FD_TO_MONITOR(int fd) { + return (sd_network_monitor*) (unsigned long) (fd + 1); +} + +static int monitor_add_inotify_watch(int fd) { + int wd; + + wd = inotify_add_watch(fd, "/run/systemd/netif/links/", IN_MOVED_TO|IN_DELETE); + if (wd >= 0) + return wd; + else if (errno != ENOENT) + return -errno; + + wd = inotify_add_watch(fd, "/run/systemd/netif/", IN_CREATE|IN_ISDIR); + if (wd >= 0) + return wd; + else if (errno != ENOENT) + return -errno; + + wd = inotify_add_watch(fd, "/run/systemd/", IN_CREATE|IN_ISDIR); + if (wd < 0) + return -errno; + + return wd; +} + +int sd_network_monitor_new(sd_network_monitor **m, const char *category) { + _cleanup_close_ int fd = -EBADF; + int k; + bool good = false; + + assert_return(m, -EINVAL); + + fd = inotify_init1(IN_NONBLOCK|IN_CLOEXEC); + if (fd < 0) + return -errno; + + if (!category || streq(category, "links")) { + k = monitor_add_inotify_watch(fd); + if (k < 0) + return k; + + good = true; + } + + if (!good) + return -EINVAL; + + *m = FD_TO_MONITOR(TAKE_FD(fd)); + return 0; +} + +sd_network_monitor* sd_network_monitor_unref(sd_network_monitor *m) { + if (m) + (void) close_nointr(MONITOR_TO_FD(m)); + + return NULL; +} + +int sd_network_monitor_flush(sd_network_monitor *m) { + union inotify_event_buffer buffer; + ssize_t l; + int fd; + + assert_return(m, -EINVAL); + + fd = MONITOR_TO_FD(m); + + l = read(fd, &buffer, sizeof(buffer)); + if (l < 0) { + if (ERRNO_IS_TRANSIENT(errno)) + return 0; + + return -errno; + } + + FOREACH_INOTIFY_EVENT(e, buffer, l) { + if (e->mask & IN_ISDIR) { + int wd; + + wd = monitor_add_inotify_watch(fd); + if (wd < 0) + return wd; + + if (wd != e->wd) { + if (inotify_rm_watch(fd, e->wd) < 0) + return -errno; + } + } + } + + return 0; +} + +int sd_network_monitor_get_fd(sd_network_monitor *m) { + assert_return(m, -EINVAL); + + return MONITOR_TO_FD(m); +} + +int sd_network_monitor_get_events(sd_network_monitor *m) { + assert_return(m, -EINVAL); + + /* For now we will only return POLLIN here, since we don't + * need anything else ever for inotify. However, let's have + * this API to keep our options open should we later on need + * it. */ + return POLLIN; +} + +int sd_network_monitor_get_timeout(sd_network_monitor *m, uint64_t *ret_usec) { + assert_return(m, -EINVAL); + assert_return(ret_usec, -EINVAL); + + /* For now we will only return UINT64_MAX, since we don't + * need any timeout. However, let's have this API to keep our + * options open should we later on need it. */ + *ret_usec = UINT64_MAX; + return 0; +} diff --git a/src/libsystemd/sd-path/sd-path.c b/src/libsystemd/sd-path/sd-path.c new file mode 100644 index 0000000..7290d1c --- /dev/null +++ b/src/libsystemd/sd-path/sd-path.c @@ -0,0 +1,693 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include "sd-path.h" + +#include "alloc-util.h" +#include "architecture.h" +#include "fd-util.h" +#include "fileio.h" +#include "fs-util.h" +#include "nulstr-util.h" +#include "path-lookup.h" +#include "path-util.h" +#include "string-util.h" +#include "strv.h" +#include "user-util.h" + +static int from_environment(const char *envname, const char *fallback, const char **ret) { + assert(ret); + + if (envname) { + const char *e; + + e = secure_getenv(envname); + if (e && path_is_absolute(e)) { + *ret = e; + return 0; + } + } + + if (fallback) { + *ret = fallback; + return 0; + } + + return -ENXIO; +} + +static int from_home_dir(const char *envname, const char *suffix, char **buffer, const char **ret) { + _cleanup_free_ char *h = NULL; + int r; + + assert(suffix); + assert(buffer); + assert(ret); + + if (envname) { + const char *e = NULL; + + e = secure_getenv(envname); + if (e && path_is_absolute(e)) { + *ret = e; + return 0; + } + } + + r = get_home_dir(&h); + if (r < 0) + return r; + + if (!path_extend(&h, suffix)) + return -ENOMEM; + + *buffer = h; + *ret = TAKE_PTR(h); + return 0; +} + +static int from_user_dir(const char *field, char **buffer, const char **ret) { + _cleanup_fclose_ FILE *f = NULL; + _cleanup_free_ char *b = NULL; + _cleanup_free_ const char *fn = NULL; + const char *c = NULL; + int r; + + assert(field); + assert(buffer); + assert(ret); + + r = from_home_dir("XDG_CONFIG_HOME", ".config", &b, &c); + if (r < 0) + return r; + + fn = path_join(c, "user-dirs.dirs"); + if (!fn) + return -ENOMEM; + + f = fopen(fn, "re"); + if (!f) { + if (errno == ENOENT) + goto fallback; + + return -errno; + } + + /* This is an awful parse, but it follows closely what xdg-user-dirs does upstream */ + for (;;) { + _cleanup_free_ char *line = NULL; + char *p, *e; + + r = read_stripped_line(f, LONG_LINE_MAX, &line); + if (r < 0) + return r; + if (r == 0) + break; + + p = startswith(line, field); + if (!p) + continue; + + p += strspn(p, WHITESPACE); + + if (*p != '=') + continue; + p++; + + p += strspn(p, WHITESPACE); + + if (*p != '"') + continue; + p++; + + e = strrchr(p, '"'); + if (!e) + continue; + *e = 0; + + /* Three syntaxes permitted: relative to $HOME, $HOME itself, and absolute path */ + if (startswith(p, "$HOME/")) { + _cleanup_free_ char *h = NULL; + + r = get_home_dir(&h); + if (r < 0) + return r; + + if (!path_extend(&h, p+5)) + return -ENOMEM; + + *buffer = h; + *ret = TAKE_PTR(h); + return 0; + } else if (streq(p, "$HOME")) { + + r = get_home_dir(buffer); + if (r < 0) + return r; + + *ret = *buffer; + return 0; + } else if (path_is_absolute(p)) { + char *copy; + + copy = strdup(p); + if (!copy) + return -ENOMEM; + + *buffer = copy; + *ret = copy; + return 0; + } + } + +fallback: + /* The desktop directory defaults to $HOME/Desktop, the others to $HOME */ + if (streq(field, "XDG_DESKTOP_DIR")) { + _cleanup_free_ char *h = NULL; + + r = get_home_dir(&h); + if (r < 0) + return r; + + if (!path_extend(&h, "Desktop")) + return -ENOMEM; + + *buffer = h; + *ret = TAKE_PTR(h); + } else { + r = get_home_dir(buffer); + if (r < 0) + return r; + + *ret = *buffer; + } + + return 0; +} + +static int get_path(uint64_t type, char **buffer, const char **ret) { + int r; + + assert(buffer); + assert(ret); + + switch (type) { + + case SD_PATH_TEMPORARY: + return tmp_dir(ret); + + case SD_PATH_TEMPORARY_LARGE: + return var_tmp_dir(ret); + + case SD_PATH_SYSTEM_BINARIES: + *ret = "/usr/bin"; + return 0; + + case SD_PATH_SYSTEM_INCLUDE: + *ret = "/usr/include"; + return 0; + + case SD_PATH_SYSTEM_LIBRARY_PRIVATE: + *ret = "/usr/lib"; + return 0; + + case SD_PATH_SYSTEM_LIBRARY_ARCH: + *ret = LIBDIR; + return 0; + + case SD_PATH_SYSTEM_SHARED: + *ret = "/usr/share"; + return 0; + + case SD_PATH_SYSTEM_CONFIGURATION_FACTORY: + *ret = "/usr/share/factory/etc"; + return 0; + + case SD_PATH_SYSTEM_STATE_FACTORY: + *ret = "/usr/share/factory/var"; + return 0; + + case SD_PATH_SYSTEM_CONFIGURATION: + *ret = "/etc"; + return 0; + + case SD_PATH_SYSTEM_RUNTIME: + *ret = "/run"; + return 0; + + case SD_PATH_SYSTEM_RUNTIME_LOGS: + *ret = "/run/log"; + return 0; + + case SD_PATH_SYSTEM_STATE_PRIVATE: + *ret = "/var/lib"; + return 0; + + case SD_PATH_SYSTEM_STATE_LOGS: + *ret = "/var/log"; + return 0; + + case SD_PATH_SYSTEM_STATE_CACHE: + *ret = "/var/cache"; + return 0; + + case SD_PATH_SYSTEM_STATE_SPOOL: + *ret = "/var/spool"; + return 0; + + case SD_PATH_USER_BINARIES: + return from_home_dir(NULL, ".local/bin", buffer, ret); + + case SD_PATH_USER_LIBRARY_PRIVATE: + return from_home_dir(NULL, ".local/lib", buffer, ret); + + case SD_PATH_USER_LIBRARY_ARCH: + return from_home_dir(NULL, ".local/lib/" LIB_ARCH_TUPLE, buffer, ret); + + case SD_PATH_USER_SHARED: + return from_home_dir("XDG_DATA_HOME", ".local/share", buffer, ret); + + case SD_PATH_USER_CONFIGURATION: + return from_home_dir("XDG_CONFIG_HOME", ".config", buffer, ret); + + case SD_PATH_USER_RUNTIME: + return from_environment("XDG_RUNTIME_DIR", NULL, ret); + + case SD_PATH_USER_STATE_CACHE: + return from_home_dir("XDG_CACHE_HOME", ".cache", buffer, ret); + + case SD_PATH_USER_STATE_PRIVATE: + return from_home_dir("XDG_STATE_HOME", ".local/state", buffer, ret); + + case SD_PATH_USER: + r = get_home_dir(buffer); + if (r < 0) + return r; + + *ret = *buffer; + return 0; + + case SD_PATH_USER_DOCUMENTS: + return from_user_dir("XDG_DOCUMENTS_DIR", buffer, ret); + + case SD_PATH_USER_MUSIC: + return from_user_dir("XDG_MUSIC_DIR", buffer, ret); + + case SD_PATH_USER_PICTURES: + return from_user_dir("XDG_PICTURES_DIR", buffer, ret); + + case SD_PATH_USER_VIDEOS: + return from_user_dir("XDG_VIDEOS_DIR", buffer, ret); + + case SD_PATH_USER_DOWNLOAD: + return from_user_dir("XDG_DOWNLOAD_DIR", buffer, ret); + + case SD_PATH_USER_PUBLIC: + return from_user_dir("XDG_PUBLICSHARE_DIR", buffer, ret); + + case SD_PATH_USER_TEMPLATES: + return from_user_dir("XDG_TEMPLATES_DIR", buffer, ret); + + case SD_PATH_USER_DESKTOP: + return from_user_dir("XDG_DESKTOP_DIR", buffer, ret); + + case SD_PATH_SYSTEMD_UTIL: + *ret = PREFIX_NOSLASH "/lib/systemd"; + return 0; + + case SD_PATH_SYSTEMD_SYSTEM_UNIT: + *ret = SYSTEM_DATA_UNIT_DIR; + return 0; + + case SD_PATH_SYSTEMD_SYSTEM_PRESET: + *ret = PREFIX_NOSLASH "/lib/systemd/system-preset"; + return 0; + + case SD_PATH_SYSTEMD_USER_UNIT: + *ret = USER_DATA_UNIT_DIR; + return 0; + + case SD_PATH_SYSTEMD_USER_PRESET: + *ret = PREFIX_NOSLASH "/lib/systemd/user-preset"; + return 0; + + case SD_PATH_SYSTEMD_SYSTEM_CONF: + *ret = SYSTEM_CONFIG_UNIT_DIR; + return 0; + + case SD_PATH_SYSTEMD_USER_CONF: + *ret = USER_CONFIG_UNIT_DIR; + return 0; + + case SD_PATH_SYSTEMD_SYSTEM_GENERATOR: + *ret = SYSTEM_GENERATOR_DIR; + return 0; + + case SD_PATH_SYSTEMD_USER_GENERATOR: + *ret = USER_GENERATOR_DIR; + return 0; + + case SD_PATH_SYSTEMD_SLEEP: + *ret = PREFIX_NOSLASH "/lib/systemd/system-sleep"; + return 0; + + case SD_PATH_SYSTEMD_SHUTDOWN: + *ret = PREFIX_NOSLASH "/lib/systemd/system-shutdown"; + return 0; + + case SD_PATH_TMPFILES: + *ret = "/usr/lib/tmpfiles.d"; + return 0; + + case SD_PATH_SYSUSERS: + *ret = PREFIX_NOSLASH "/lib/sysusers.d"; + return 0; + + case SD_PATH_SYSCTL: + *ret = PREFIX_NOSLASH "/lib/sysctl.d"; + return 0; + + case SD_PATH_BINFMT: + *ret = PREFIX_NOSLASH "/lib/binfmt.d"; + return 0; + + case SD_PATH_MODULES_LOAD: + *ret = PREFIX_NOSLASH "/lib/modules-load.d"; + return 0; + + case SD_PATH_CATALOG: + *ret = "/usr/lib/systemd/catalog"; + return 0; + + case SD_PATH_SYSTEMD_SYSTEM_ENVIRONMENT_GENERATOR: + *ret = SYSTEM_ENV_GENERATOR_DIR; + return 0; + + case SD_PATH_SYSTEMD_USER_ENVIRONMENT_GENERATOR: + *ret = USER_ENV_GENERATOR_DIR; + return 0; + } + + return -EOPNOTSUPP; +} + +static int get_path_alloc(uint64_t type, const char *suffix, char **path) { + _cleanup_free_ char *buffer = NULL; + char *buffer2 = NULL; + const char *ret; + int r; + + assert(path); + + r = get_path(type, &buffer, &ret); + if (r < 0) + return r; + + if (suffix) { + suffix += strspn(suffix, "/"); + buffer2 = path_join(ret, suffix); + if (!buffer2) + return -ENOMEM; + } else if (!buffer) { + buffer = strdup(ret); + if (!buffer) + return -ENOMEM; + } + + *path = buffer2 ?: TAKE_PTR(buffer); + return 0; +} + +_public_ int sd_path_lookup(uint64_t type, const char *suffix, char **path) { + int r; + + assert_return(path, -EINVAL); + + r = get_path_alloc(type, suffix, path); + if (r != -EOPNOTSUPP) + return r; + + /* Fall back to sd_path_lookup_strv */ + _cleanup_strv_free_ char **l = NULL; + char *buffer; + + r = sd_path_lookup_strv(type, suffix, &l); + if (r < 0) + return r; + + buffer = strv_join(l, ":"); + if (!buffer) + return -ENOMEM; + + *path = buffer; + return 0; +} + +static int search_from_environment( + char ***list, + const char *env_home, + const char *home_suffix, + const char *env_search, + bool env_search_sufficient, + const char *first, ...) { + + _cleanup_strv_free_ char **l = NULL; + const char *e; + char *h = NULL; + int r; + + assert(list); + + if (env_search) { + e = secure_getenv(env_search); + if (e) { + l = strv_split(e, ":"); + if (!l) + return -ENOMEM; + + if (env_search_sufficient) { + *list = TAKE_PTR(l); + return 0; + } + } + } + + if (!l && first) { + va_list ap; + + va_start(ap, first); + l = strv_new_ap(first, ap); + va_end(ap); + + if (!l) + return -ENOMEM; + } + + if (env_home) { + e = secure_getenv(env_home); + if (e && path_is_absolute(e)) { + h = strdup(e); + if (!h) + return -ENOMEM; + } + } + + if (!h && home_suffix) { + e = secure_getenv("HOME"); + if (e && path_is_absolute(e)) { + h = path_join(e, home_suffix); + if (!h) + return -ENOMEM; + } + } + + if (h) { + r = strv_consume_prepend(&l, h); + if (r < 0) + return -ENOMEM; + } + + *list = TAKE_PTR(l); + return 0; +} + +#if HAVE_SPLIT_BIN +# define ARRAY_SBIN_BIN(x) x "sbin", x "bin" +#else +# define ARRAY_SBIN_BIN(x) x "bin" +#endif + +static int get_search(uint64_t type, char ***list) { + int r; + + assert(list); + + switch (type) { + + case SD_PATH_SEARCH_BINARIES: + return search_from_environment(list, + NULL, + ".local/bin", + "PATH", + true, + ARRAY_SBIN_BIN("/usr/local/"), + ARRAY_SBIN_BIN("/usr/"), + NULL); + + case SD_PATH_SEARCH_LIBRARY_PRIVATE: + return search_from_environment(list, + NULL, + ".local/lib", + NULL, + false, + "/usr/local/lib", + "/usr/lib", + NULL); + + case SD_PATH_SEARCH_LIBRARY_ARCH: + return search_from_environment(list, + NULL, + ".local/lib/" LIB_ARCH_TUPLE, + "LD_LIBRARY_PATH", + true, + LIBDIR, + NULL); + + case SD_PATH_SEARCH_SHARED: + return search_from_environment(list, + "XDG_DATA_HOME", + ".local/share", + "XDG_DATA_DIRS", + false, + "/usr/local/share", + "/usr/share", + NULL); + + case SD_PATH_SEARCH_CONFIGURATION_FACTORY: + return search_from_environment(list, + NULL, + NULL, + NULL, + false, + "/usr/local/share/factory/etc", + "/usr/share/factory/etc", + NULL); + + case SD_PATH_SEARCH_STATE_FACTORY: + return search_from_environment(list, + NULL, + NULL, + NULL, + false, + "/usr/local/share/factory/var", + "/usr/share/factory/var", + NULL); + + case SD_PATH_SEARCH_CONFIGURATION: + return search_from_environment(list, + "XDG_CONFIG_HOME", + ".config", + "XDG_CONFIG_DIRS", + false, + "/etc", + NULL); + + case SD_PATH_SEARCH_BINARIES_DEFAULT: + return strv_from_nulstr(list, DEFAULT_PATH_NULSTR); + + case SD_PATH_SYSTEMD_SEARCH_SYSTEM_UNIT: + case SD_PATH_SYSTEMD_SEARCH_USER_UNIT: { + _cleanup_(lookup_paths_free) LookupPaths lp = {}; + RuntimeScope scope = type == SD_PATH_SYSTEMD_SEARCH_SYSTEM_UNIT ? + RUNTIME_SCOPE_SYSTEM : RUNTIME_SCOPE_USER; + + r = lookup_paths_init(&lp, scope, 0, NULL); + if (r < 0) + return r; + + *list = TAKE_PTR(lp.search_path); + return 0; + } + + case SD_PATH_SYSTEMD_SEARCH_SYSTEM_GENERATOR: + case SD_PATH_SYSTEMD_SEARCH_USER_GENERATOR: { + RuntimeScope scope = type == SD_PATH_SYSTEMD_SEARCH_SYSTEM_GENERATOR ? + RUNTIME_SCOPE_SYSTEM : RUNTIME_SCOPE_USER; + char **t; + + t = generator_binary_paths(scope); + if (!t) + return -ENOMEM; + + *list = t; + return 0; + } + + case SD_PATH_SYSTEMD_SEARCH_SYSTEM_ENVIRONMENT_GENERATOR: + case SD_PATH_SYSTEMD_SEARCH_USER_ENVIRONMENT_GENERATOR: { + char **t; + + t = env_generator_binary_paths(type == SD_PATH_SYSTEMD_SEARCH_SYSTEM_ENVIRONMENT_GENERATOR ? + RUNTIME_SCOPE_SYSTEM : RUNTIME_SCOPE_USER); + if (!t) + return -ENOMEM; + + *list = t; + return 0; + } + + case SD_PATH_SYSTEMD_SEARCH_NETWORK: + return strv_from_nulstr(list, NETWORK_DIRS_NULSTR); + + } + + return -EOPNOTSUPP; +} + +_public_ int sd_path_lookup_strv(uint64_t type, const char *suffix, char ***paths) { + _cleanup_strv_free_ char **l = NULL, **n = NULL; + int r; + + assert_return(paths, -EINVAL); + + r = get_search(type, &l); + if (r == -EOPNOTSUPP) { + _cleanup_free_ char *t = NULL; + + r = get_path_alloc(type, suffix, &t); + if (r < 0) + return r; + + l = new(char*, 2); + if (!l) + return -ENOMEM; + l[0] = TAKE_PTR(t); + l[1] = NULL; + + *paths = TAKE_PTR(l); + return 0; + + } else if (r < 0) + return r; + + if (!suffix) { + *paths = TAKE_PTR(l); + return 0; + } + + n = new(char*, strv_length(l)+1); + if (!n) + return -ENOMEM; + + char **j = n; + STRV_FOREACH(i, l) { + *j = path_join(*i, suffix); + if (!*j) + return -ENOMEM; + + j++; + } + *j = NULL; + + *paths = TAKE_PTR(n); + return 0; +} diff --git a/src/libsystemd/sd-resolve/resolve-private.h b/src/libsystemd/sd-resolve/resolve-private.h new file mode 100644 index 0000000..7a339f7 --- /dev/null +++ b/src/libsystemd/sd-resolve/resolve-private.h @@ -0,0 +1,39 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include "sd-resolve.h" + +int resolve_getaddrinfo_with_destroy_callback( + sd_resolve *resolve, sd_resolve_query **q, + const char *node, const char *service, const struct addrinfo *hints, + sd_resolve_getaddrinfo_handler_t callback, + sd_resolve_destroy_t destroy_callback, void *userdata); +int resolve_getnameinfo_with_destroy_callback( + sd_resolve *resolve, sd_resolve_query **q, + const struct sockaddr *sa, socklen_t salen, int flags, uint64_t get, + sd_resolve_getnameinfo_handler_t callback, + sd_resolve_destroy_t destroy_callback, void *userdata); + +#define resolve_getaddrinfo(resolve, ret_query, node, service, hints, callback, destroy_callback, userdata) \ + ({ \ + int (*_callback_)(sd_resolve_query*, int, const struct addrinfo*, typeof(userdata)) = callback; \ + void (*_destroy_)(typeof(userdata)) = destroy_callback; \ + resolve_getaddrinfo_with_destroy_callback( \ + resolve, ret_query, \ + node, service, hints, \ + (sd_resolve_getaddrinfo_handler_t) _callback_, \ + (sd_resolve_destroy_t) _destroy_, \ + userdata); \ + }) + +#define resolve_getnameinfo(resolve, ret_query, sa, salen, flags, get, callback, destroy_callback, userdata) \ + ({ \ + int (*_callback_)(sd_resolve_query*, int, const char*, const char*, typeof(userdata)) = callback; \ + void (*_destroy_)(typeof(userdata)) = destroy_callback; \ + resolve_getaddrinfo_with_destroy_callback( \ + resolve, ret_query, \ + sa, salen, flags, get, \ + (sd_resolve_getnameinfo_handler_t) _callback_, \ + (sd_resolve_destroy_t) _destroy_, \ + userdata); \ + }) diff --git a/src/libsystemd/sd-resolve/sd-resolve.c b/src/libsystemd/sd-resolve/sd-resolve.c new file mode 100644 index 0000000..2000f86 --- /dev/null +++ b/src/libsystemd/sd-resolve/sd-resolve.c @@ -0,0 +1,1296 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <errno.h> +#include <poll.h> +#include <pthread.h> +#include <resolv.h> +#include <signal.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/prctl.h> +#include <unistd.h> + +#include "sd-resolve.h" + +#include "alloc-util.h" +#include "dns-def.h" +#include "errno-util.h" +#include "fd-util.h" +#include "io-util.h" +#include "iovec-util.h" +#include "list.h" +#include "memory-util.h" +#include "missing_syscall.h" +#include "missing_threads.h" +#include "process-util.h" +#include "resolve-private.h" +#include "socket-util.h" + +#define WORKERS_MIN 1U +#define WORKERS_MAX 16U +#define QUERIES_MAX 256U +#define BUFSIZE 10240U + +typedef enum { + REQUEST_ADDRINFO, + RESPONSE_ADDRINFO, + REQUEST_NAMEINFO, + RESPONSE_NAMEINFO, + REQUEST_TERMINATE, + RESPONSE_DIED +} QueryType; + +enum { + REQUEST_RECV_FD, + REQUEST_SEND_FD, + RESPONSE_RECV_FD, + RESPONSE_SEND_FD, + _FD_MAX +}; + +struct sd_resolve { + unsigned n_ref; + + bool dead:1; + pid_t original_pid; + + int fds[_FD_MAX]; + + pthread_t workers[WORKERS_MAX]; + unsigned n_valid_workers; + + unsigned current_id; + sd_resolve_query* query_array[QUERIES_MAX]; + unsigned n_queries, n_done, n_outstanding; + + sd_event_source *event_source; + sd_event *event; + + sd_resolve_query *current; + + sd_resolve **default_resolve_ptr; + pid_t tid; + + LIST_HEAD(sd_resolve_query, queries); +}; + +struct sd_resolve_query { + unsigned n_ref; + + sd_resolve *resolve; + + QueryType type:4; + bool done:1; + bool floating:1; + unsigned id; + + int ret; + int _errno; + int _h_errno; + struct addrinfo *addrinfo; + char *serv, *host; + + union { + sd_resolve_getaddrinfo_handler_t getaddrinfo_handler; + sd_resolve_getnameinfo_handler_t getnameinfo_handler; + }; + + void *userdata; + sd_resolve_destroy_t destroy_callback; + + LIST_FIELDS(sd_resolve_query, queries); +}; + +typedef struct RHeader { + QueryType type; + unsigned id; + size_t length; +} RHeader; + +typedef struct AddrInfoRequest { + struct RHeader header; + bool hints_valid; + int ai_flags; + int ai_family; + int ai_socktype; + int ai_protocol; + size_t node_len, service_len; +} AddrInfoRequest; + +typedef struct AddrInfoResponse { + struct RHeader header; + int ret; + int _errno; + int _h_errno; + /* followed by addrinfo_serialization[] */ +} AddrInfoResponse; + +typedef struct AddrInfoSerialization { + int ai_flags; + int ai_family; + int ai_socktype; + int ai_protocol; + size_t ai_addrlen; + size_t canonname_len; + /* Followed by ai_addr amd ai_canonname with variable lengths */ +} AddrInfoSerialization; + +typedef struct NameInfoRequest { + struct RHeader header; + int flags; + socklen_t sockaddr_len; + bool gethost:1, getserv:1; +} NameInfoRequest; + +typedef struct NameInfoResponse { + struct RHeader header; + size_t hostlen, servlen; + int ret; + int _errno; + int _h_errno; +} NameInfoResponse; + +typedef union Packet { + RHeader rheader; + AddrInfoRequest addrinfo_request; + AddrInfoResponse addrinfo_response; + NameInfoRequest nameinfo_request; + NameInfoResponse nameinfo_response; +} Packet; + +static int getaddrinfo_done(sd_resolve_query* q); +static int getnameinfo_done(sd_resolve_query *q); + +static void resolve_query_disconnect(sd_resolve_query *q); + +#define RESOLVE_DONT_DESTROY(resolve) \ + _cleanup_(sd_resolve_unrefp) _unused_ sd_resolve *_dont_destroy_##resolve = sd_resolve_ref(resolve) + +static void query_assign_errno(sd_resolve_query *q, int ret, int error, int h_error) { + assert(q); + + q->ret = ret; + q->_errno = abs(error); + q->_h_errno = h_error; +} + +static int send_died(int out_fd) { + RHeader rh = { + .type = RESPONSE_DIED, + .length = sizeof(RHeader), + }; + + assert(out_fd >= 0); + + if (send(out_fd, &rh, rh.length, MSG_NOSIGNAL) < 0) + return -errno; + + return 0; +} + +static void *serialize_addrinfo(void *p, const struct addrinfo *ai, size_t *length, size_t maxlength) { + AddrInfoSerialization s; + size_t cnl, l; + + assert(p); + assert(ai); + assert(length); + assert(*length <= maxlength); + + cnl = ai->ai_canonname ? strlen(ai->ai_canonname)+1 : 0; + l = sizeof(AddrInfoSerialization) + ai->ai_addrlen + cnl; + + if (*length + l > maxlength) + return NULL; + + s = (AddrInfoSerialization) { + .ai_flags = ai->ai_flags, + .ai_family = ai->ai_family, + .ai_socktype = ai->ai_socktype, + .ai_protocol = ai->ai_protocol, + .ai_addrlen = ai->ai_addrlen, + .canonname_len = cnl, + }; + + memcpy((uint8_t*) p, &s, sizeof(AddrInfoSerialization)); + memcpy((uint8_t*) p + sizeof(AddrInfoSerialization), ai->ai_addr, ai->ai_addrlen); + memcpy_safe((char*) p + sizeof(AddrInfoSerialization) + ai->ai_addrlen, + ai->ai_canonname, cnl); + + *length += l; + return (uint8_t*) p + l; +} + +static int send_addrinfo_reply( + int out_fd, + unsigned id, + int ret, + struct addrinfo *ai, + int _errno, + int _h_errno) { + + AddrInfoResponse resp = {}; + union { + AddrInfoSerialization ais; + uint8_t space[BUFSIZE]; + } buffer; + struct iovec iov[2]; + struct msghdr mh; + + assert(out_fd >= 0); + + resp = (AddrInfoResponse) { + .header.type = RESPONSE_ADDRINFO, + .header.id = id, + .header.length = sizeof(AddrInfoResponse), + .ret = ret, + ._errno = _errno, + ._h_errno = _h_errno, + }; + + msan_unpoison(&resp, sizeof(resp)); + + if (ret == 0 && ai) { + void *p = &buffer; + struct addrinfo *k; + + for (k = ai; k; k = k->ai_next) { + p = serialize_addrinfo(p, k, &resp.header.length, (uint8_t*) &buffer + BUFSIZE - (uint8_t*) p); + if (!p) { + freeaddrinfo(ai); + return -ENOBUFS; + } + } + } + + if (ai) + freeaddrinfo(ai); + + iov[0] = IOVEC_MAKE(&resp, sizeof(AddrInfoResponse)); + iov[1] = IOVEC_MAKE(&buffer, resp.header.length - sizeof(AddrInfoResponse)); + + mh = (struct msghdr) { + .msg_iov = iov, + .msg_iovlen = ELEMENTSOF(iov) + }; + + if (sendmsg(out_fd, &mh, MSG_NOSIGNAL) < 0) + return -errno; + + return 0; +} + +static int send_nameinfo_reply( + int out_fd, + unsigned id, + int ret, + const char *host, + const char *serv, + int _errno, + int _h_errno) { + + NameInfoResponse resp = {}; + struct iovec iov[3]; + struct msghdr mh; + size_t hl, sl; + + assert(out_fd >= 0); + + sl = serv ? strlen(serv)+1 : 0; + hl = host ? strlen(host)+1 : 0; + + resp = (NameInfoResponse) { + .header.type = RESPONSE_NAMEINFO, + .header.id = id, + .header.length = sizeof(NameInfoResponse) + hl + sl, + .hostlen = hl, + .servlen = sl, + .ret = ret, + ._errno = _errno, + ._h_errno = _h_errno, + }; + + msan_unpoison(&resp, sizeof(resp)); + + iov[0] = IOVEC_MAKE(&resp, sizeof(NameInfoResponse)); + iov[1] = IOVEC_MAKE((void*) host, hl); + iov[2] = IOVEC_MAKE((void*) serv, sl); + + mh = (struct msghdr) { + .msg_iov = iov, + .msg_iovlen = ELEMENTSOF(iov) + }; + + if (sendmsg(out_fd, &mh, MSG_NOSIGNAL) < 0) + return -errno; + + return 0; +} + +static int handle_request(int out_fd, const Packet *packet, size_t length) { + const RHeader *req; + + assert(out_fd >= 0); + assert(packet); + + req = &packet->rheader; + + assert_return(length >= sizeof(RHeader), -EIO); + assert_return(length == req->length, -EIO); + + switch (req->type) { + + case REQUEST_ADDRINFO: { + const AddrInfoRequest *ai_req = &packet->addrinfo_request; + struct addrinfo hints, *result = NULL; + const char *node, *service; + int ret; + + assert_return(length >= sizeof(AddrInfoRequest), -EBADMSG); + assert_return(length == sizeof(AddrInfoRequest) + ai_req->node_len + ai_req->service_len, -EBADMSG); + + hints = (struct addrinfo) { + .ai_flags = ai_req->ai_flags, + .ai_family = ai_req->ai_family, + .ai_socktype = ai_req->ai_socktype, + .ai_protocol = ai_req->ai_protocol, + }; + + msan_unpoison(&hints, sizeof(hints)); + + node = ai_req->node_len ? (const char*) ai_req + sizeof(AddrInfoRequest) : NULL; + service = ai_req->service_len ? (const char*) ai_req + sizeof(AddrInfoRequest) + ai_req->node_len : NULL; + + ret = getaddrinfo(node, service, + ai_req->hints_valid ? &hints : NULL, + &result); + + /* send_addrinfo_reply() frees result */ + return send_addrinfo_reply(out_fd, req->id, ret, result, errno, h_errno); + } + + case REQUEST_NAMEINFO: { + const NameInfoRequest *ni_req = &packet->nameinfo_request; + char hostbuf[NI_MAXHOST], servbuf[NI_MAXSERV]; + union sockaddr_union sa; + int ret; + + assert_return(length >= sizeof(NameInfoRequest), -EBADMSG); + assert_return(length == sizeof(NameInfoRequest) + ni_req->sockaddr_len, -EBADMSG); + assert_return(ni_req->sockaddr_len <= sizeof(sa), -EBADMSG); + + memcpy(&sa, (const uint8_t *) ni_req + sizeof(NameInfoRequest), ni_req->sockaddr_len); + + ret = getnameinfo(&sa.sa, ni_req->sockaddr_len, + ni_req->gethost ? hostbuf : NULL, ni_req->gethost ? sizeof(hostbuf) : 0, + ni_req->getserv ? servbuf : NULL, ni_req->getserv ? sizeof(servbuf) : 0, + ni_req->flags); + + return send_nameinfo_reply(out_fd, req->id, ret, + ret == 0 && ni_req->gethost ? hostbuf : NULL, + ret == 0 && ni_req->getserv ? servbuf : NULL, + errno, h_errno); + } + + case REQUEST_TERMINATE: + /* Quit */ + return -ECONNRESET; + + default: + assert_not_reached(); + } + + return 0; +} + +static void* thread_worker(void *p) { + sd_resolve *resolve = p; + + /* Assign a pretty name to this thread */ + (void) pthread_setname_np(pthread_self(), "sd-resolve"); + + while (!resolve->dead) { + union { + Packet packet; + uint8_t space[BUFSIZE]; + } buf; + ssize_t length; + + length = recv(resolve->fds[REQUEST_RECV_FD], &buf, sizeof buf, 0); + if (length < 0) { + if (ERRNO_IS_TRANSIENT(errno)) + continue; + + break; + } + if (length == 0) + break; + + if (handle_request(resolve->fds[RESPONSE_SEND_FD], &buf.packet, (size_t) length) < 0) + break; + } + + send_died(resolve->fds[RESPONSE_SEND_FD]); + + return NULL; +} + +static int start_threads(sd_resolve *resolve, unsigned extra) { + sigset_t ss, saved_ss; + unsigned n; + int r, k; + + assert_se(sigfillset(&ss) >= 0); + + /* No signals in forked off threads please. We set the mask before forking, so that the threads never exist + * with a different mask than a fully blocked one */ + r = pthread_sigmask(SIG_BLOCK, &ss, &saved_ss); + if (r > 0) + return -r; + + n = resolve->n_outstanding + extra; + n = CLAMP(n, WORKERS_MIN, WORKERS_MAX); + + while (resolve->n_valid_workers < n) { + r = pthread_create(&resolve->workers[resolve->n_valid_workers], NULL, thread_worker, resolve); + if (r > 0) { + r = -r; + goto finish; + } + + resolve->n_valid_workers++; + } + + r = 0; + +finish: + k = pthread_sigmask(SIG_SETMASK, &saved_ss, NULL); + if (k > 0 && r >= 0) + r = -k; + + return r; +} + +static bool resolve_pid_changed(sd_resolve *r) { + assert(r); + + /* We don't support people creating a resolver and keeping it + * around after fork(). Let's complain. */ + + return r->original_pid != getpid_cached(); +} + +int sd_resolve_new(sd_resolve **ret) { + _cleanup_(sd_resolve_unrefp) sd_resolve *resolve = NULL; + int i; + + assert_return(ret, -EINVAL); + + resolve = new0(sd_resolve, 1); + if (!resolve) + return -ENOMEM; + + resolve->n_ref = 1; + resolve->original_pid = getpid_cached(); + + for (i = 0; i < _FD_MAX; i++) + resolve->fds[i] = -EBADF; + + if (socketpair(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0, resolve->fds + REQUEST_RECV_FD) < 0) + return -errno; + + if (socketpair(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0, resolve->fds + RESPONSE_RECV_FD) < 0) + return -errno; + + for (i = 0; i < _FD_MAX; i++) + resolve->fds[i] = fd_move_above_stdio(resolve->fds[i]); + + (void) fd_inc_sndbuf(resolve->fds[REQUEST_SEND_FD], QUERIES_MAX * BUFSIZE); + (void) fd_increase_rxbuf(resolve->fds[REQUEST_RECV_FD], QUERIES_MAX * BUFSIZE); + (void) fd_inc_sndbuf(resolve->fds[RESPONSE_SEND_FD], QUERIES_MAX * BUFSIZE); + (void) fd_increase_rxbuf(resolve->fds[RESPONSE_RECV_FD], QUERIES_MAX * BUFSIZE); + + (void) fd_nonblock(resolve->fds[RESPONSE_RECV_FD], true); + + *ret = TAKE_PTR(resolve); + return 0; +} + +int sd_resolve_default(sd_resolve **ret) { + static thread_local sd_resolve *default_resolve = NULL; + sd_resolve *e = NULL; + int r; + + if (!ret) + return !!default_resolve; + + if (default_resolve) { + *ret = sd_resolve_ref(default_resolve); + return 0; + } + + r = sd_resolve_new(&e); + if (r < 0) + return r; + + e->default_resolve_ptr = &default_resolve; + e->tid = gettid(); + default_resolve = e; + + *ret = e; + return 1; +} + +int sd_resolve_get_tid(sd_resolve *resolve, pid_t *tid) { + assert_return(resolve, -EINVAL); + assert_return(tid, -EINVAL); + assert_return(!resolve_pid_changed(resolve), -ECHILD); + + if (resolve->tid != 0) { + *tid = resolve->tid; + return 0; + } + + if (resolve->event) + return sd_event_get_tid(resolve->event, tid); + + return -ENXIO; +} + +static sd_resolve *resolve_free(sd_resolve *resolve) { + PROTECT_ERRNO; + sd_resolve_query *q; + unsigned i; + + assert(resolve); + + while ((q = resolve->queries)) { + assert(q->floating); + resolve_query_disconnect(q); + sd_resolve_query_unref(q); + } + + if (resolve->default_resolve_ptr) + *(resolve->default_resolve_ptr) = NULL; + + resolve->dead = true; + + sd_resolve_detach_event(resolve); + + if (resolve->fds[REQUEST_SEND_FD] >= 0) { + + RHeader req = { + .type = REQUEST_TERMINATE, + .length = sizeof req, + }; + + /* Send one termination packet for each worker */ + for (i = 0; i < resolve->n_valid_workers; i++) + (void) send(resolve->fds[REQUEST_SEND_FD], &req, req.length, MSG_NOSIGNAL); + } + + /* Now terminate them and wait until they are gone. + If we get an error than most likely the thread already exited. */ + for (i = 0; i < resolve->n_valid_workers; i++) + (void) pthread_join(resolve->workers[i], NULL); + + /* Close all communication channels */ + close_many(resolve->fds, _FD_MAX); + + return mfree(resolve); +} + +DEFINE_TRIVIAL_REF_UNREF_FUNC(sd_resolve, sd_resolve, resolve_free); + +int sd_resolve_get_fd(sd_resolve *resolve) { + assert_return(resolve, -EINVAL); + assert_return(!resolve_pid_changed(resolve), -ECHILD); + + return resolve->fds[RESPONSE_RECV_FD]; +} + +int sd_resolve_get_events(sd_resolve *resolve) { + assert_return(resolve, -EINVAL); + assert_return(!resolve_pid_changed(resolve), -ECHILD); + + return resolve->n_queries > resolve->n_done ? POLLIN : 0; +} + +int sd_resolve_get_timeout(sd_resolve *resolve, uint64_t *usec) { + assert_return(resolve, -EINVAL); + assert_return(usec, -EINVAL); + assert_return(!resolve_pid_changed(resolve), -ECHILD); + + *usec = UINT64_MAX; + return 0; +} + +static sd_resolve_query *lookup_query(sd_resolve *resolve, unsigned id) { + sd_resolve_query *q; + + assert(resolve); + + q = resolve->query_array[id % QUERIES_MAX]; + if (q) + if (q->id == id) + return q; + + return NULL; +} + +static int complete_query(sd_resolve *resolve, sd_resolve_query *q) { + int r; + + assert(q); + assert(!q->done); + assert(q->resolve == resolve); + + q->done = true; + resolve->n_done++; + + resolve->current = sd_resolve_query_ref(q); + + switch (q->type) { + + case REQUEST_ADDRINFO: + r = getaddrinfo_done(q); + break; + + case REQUEST_NAMEINFO: + r = getnameinfo_done(q); + break; + + default: + assert_not_reached(); + } + + resolve->current = NULL; + + if (q->floating) { + resolve_query_disconnect(q); + sd_resolve_query_unref(q); + } + + sd_resolve_query_unref(q); + + return r; +} + +static int unserialize_addrinfo(const void **p, size_t *length, struct addrinfo **ret_ai) { + AddrInfoSerialization s; + struct addrinfo *ai; + size_t l; + + assert(p); + assert(*p); + assert(ret_ai); + assert(length); + + if (*length < sizeof(AddrInfoSerialization)) + return -EBADMSG; + + memcpy(&s, *p, sizeof(s)); + + l = sizeof(AddrInfoSerialization) + s.ai_addrlen + s.canonname_len; + if (*length < l) + return -EBADMSG; + + ai = new(struct addrinfo, 1); + if (!ai) + return -ENOMEM; + + *ai = (struct addrinfo) { + .ai_flags = s.ai_flags, + .ai_family = s.ai_family, + .ai_socktype = s.ai_socktype, + .ai_protocol = s.ai_protocol, + .ai_addrlen = s.ai_addrlen, + }; + + if (s.ai_addrlen > 0) { + ai->ai_addr = memdup((const uint8_t*) *p + sizeof(AddrInfoSerialization), s.ai_addrlen); + if (!ai->ai_addr) { + free(ai); + return -ENOMEM; + } + } + + if (s.canonname_len > 0) { + ai->ai_canonname = memdup((const uint8_t*) *p + sizeof(AddrInfoSerialization) + s.ai_addrlen, s.canonname_len); + if (!ai->ai_canonname) { + free(ai->ai_addr); + free(ai); + return -ENOMEM; + } + } + + *length -= l; + *ret_ai = ai; + *p = ((const uint8_t*) *p) + l; + + return 0; +} + +static int handle_response(sd_resolve *resolve, const Packet *packet, size_t length) { + const RHeader *resp; + sd_resolve_query *q; + int r; + + assert(resolve); + assert(packet); + + resp = &packet->rheader; + assert_return(length >= sizeof(RHeader), -EIO); + assert_return(length == resp->length, -EIO); + + if (resp->type == RESPONSE_DIED) { + resolve->dead = true; + return 0; + } + + assert(resolve->n_outstanding > 0); + resolve->n_outstanding--; + + q = lookup_query(resolve, resp->id); + if (!q) + return 0; + + switch (resp->type) { + + case RESPONSE_ADDRINFO: { + const AddrInfoResponse *ai_resp = &packet->addrinfo_response; + const void *p; + size_t l; + struct addrinfo *prev = NULL; + + assert_return(length >= sizeof(AddrInfoResponse), -EBADMSG); + assert_return(q->type == REQUEST_ADDRINFO, -EBADMSG); + + query_assign_errno(q, ai_resp->ret, ai_resp->_errno, ai_resp->_h_errno); + + l = length - sizeof(AddrInfoResponse); + p = (const uint8_t*) resp + sizeof(AddrInfoResponse); + + while (l > 0 && p) { + struct addrinfo *ai = NULL; + + r = unserialize_addrinfo(&p, &l, &ai); + if (r < 0) { + query_assign_errno(q, EAI_SYSTEM, r, 0); + freeaddrinfo(q->addrinfo); + q->addrinfo = NULL; + break; + } + + if (prev) + prev->ai_next = ai; + else + q->addrinfo = ai; + + prev = ai; + } + + return complete_query(resolve, q); + } + + case RESPONSE_NAMEINFO: { + const NameInfoResponse *ni_resp = &packet->nameinfo_response; + + assert_return(length >= sizeof(NameInfoResponse), -EBADMSG); + assert_return(q->type == REQUEST_NAMEINFO, -EBADMSG); + + if (ni_resp->hostlen > DNS_HOSTNAME_MAX || + ni_resp->servlen > DNS_HOSTNAME_MAX || + sizeof(NameInfoResponse) + ni_resp->hostlen + ni_resp->servlen > length) + query_assign_errno(q, EAI_SYSTEM, EIO, 0); + else { + query_assign_errno(q, ni_resp->ret, ni_resp->_errno, ni_resp->_h_errno); + + if (ni_resp->hostlen > 0) { + q->host = strndup((const char*) ni_resp + sizeof(NameInfoResponse), + ni_resp->hostlen-1); + if (!q->host) + query_assign_errno(q, EAI_MEMORY, ENOMEM, 0); + } + + if (ni_resp->servlen > 0) { + q->serv = strndup((const char*) ni_resp + sizeof(NameInfoResponse) + ni_resp->hostlen, + ni_resp->servlen-1); + if (!q->serv) + query_assign_errno(q, EAI_MEMORY, ENOMEM, 0); + } + } + + return complete_query(resolve, q); + } + + default: + return 0; + } +} + +int sd_resolve_process(sd_resolve *resolve) { + RESOLVE_DONT_DESTROY(resolve); + + union { + Packet packet; + uint8_t space[BUFSIZE]; + } buf; + ssize_t l; + int r; + + assert_return(resolve, -EINVAL); + assert_return(!resolve_pid_changed(resolve), -ECHILD); + + /* We don't allow recursively invoking sd_resolve_process(). */ + assert_return(!resolve->current, -EBUSY); + + l = recv(resolve->fds[RESPONSE_RECV_FD], &buf, sizeof buf, 0); + if (l < 0) { + if (ERRNO_IS_TRANSIENT(errno)) + return 0; + + return -errno; + } + if (l == 0) + return -ECONNREFUSED; + + r = handle_response(resolve, &buf.packet, (size_t) l); + if (r < 0) + return r; + + return 1; +} + +int sd_resolve_wait(sd_resolve *resolve, uint64_t timeout_usec) { + int r; + + assert_return(resolve, -EINVAL); + assert_return(!resolve_pid_changed(resolve), -ECHILD); + + if (resolve->n_done >= resolve->n_queries) + return 0; + + do { + r = fd_wait_for_event(resolve->fds[RESPONSE_RECV_FD], POLLIN, timeout_usec); + } while (r == -EINTR); + + if (r < 0) + return r; + if (r == 0) + return -ETIMEDOUT; + + return sd_resolve_process(resolve); +} + +static int alloc_query(sd_resolve *resolve, bool floating, sd_resolve_query **_q) { + sd_resolve_query *q; + int r; + + assert(resolve); + assert(_q); + + if (resolve->n_queries >= QUERIES_MAX) + return -ENOBUFS; + + r = start_threads(resolve, 1); + if (r < 0) + return r; + + while (resolve->query_array[resolve->current_id % QUERIES_MAX]) + resolve->current_id++; + + q = resolve->query_array[resolve->current_id % QUERIES_MAX] = new0(sd_resolve_query, 1); + if (!q) + return -ENOMEM; + + q->n_ref = 1; + q->resolve = resolve; + q->floating = floating; + q->id = resolve->current_id++; + + if (!floating) + sd_resolve_ref(resolve); + + LIST_PREPEND(queries, resolve->queries, q); + resolve->n_queries++; + + *_q = q; + return 0; +} + +int resolve_getaddrinfo_with_destroy_callback( + sd_resolve *resolve, + sd_resolve_query **ret_query, + const char *node, const char *service, + const struct addrinfo *hints, + sd_resolve_getaddrinfo_handler_t callback, + sd_resolve_destroy_t destroy_callback, + void *userdata) { + + _cleanup_(sd_resolve_query_unrefp) sd_resolve_query *q = NULL; + size_t node_len, service_len; + AddrInfoRequest req = {}; + struct iovec iov[3]; + struct msghdr mh = {}; + int r; + + assert_return(resolve, -EINVAL); + assert_return(node || service, -EINVAL); + assert_return(callback, -EINVAL); + assert_return(!resolve_pid_changed(resolve), -ECHILD); + + r = alloc_query(resolve, !ret_query, &q); + if (r < 0) + return r; + + q->type = REQUEST_ADDRINFO; + q->getaddrinfo_handler = callback; + q->userdata = userdata; + + node_len = node ? strlen(node) + 1 : 0; + service_len = service ? strlen(service) + 1 : 0; + + req = (AddrInfoRequest) { + .node_len = node_len, + .service_len = service_len, + + .header.id = q->id, + .header.type = REQUEST_ADDRINFO, + .header.length = sizeof(AddrInfoRequest) + node_len + service_len, + + .hints_valid = hints, + .ai_flags = hints ? hints->ai_flags : 0, + .ai_family = hints ? hints->ai_family : 0, + .ai_socktype = hints ? hints->ai_socktype : 0, + .ai_protocol = hints ? hints->ai_protocol : 0, + }; + + msan_unpoison(&req, sizeof(req)); + + iov[mh.msg_iovlen++] = IOVEC_MAKE(&req, sizeof(AddrInfoRequest)); + if (node) + iov[mh.msg_iovlen++] = IOVEC_MAKE((void*) node, req.node_len); + if (service) + iov[mh.msg_iovlen++] = IOVEC_MAKE((void*) service, req.service_len); + mh.msg_iov = iov; + + if (sendmsg(resolve->fds[REQUEST_SEND_FD], &mh, MSG_NOSIGNAL) < 0) + return -errno; + + resolve->n_outstanding++; + q->destroy_callback = destroy_callback; + + if (ret_query) + *ret_query = q; + + TAKE_PTR(q); + + return 0; +} + +int sd_resolve_getaddrinfo( + sd_resolve *resolve, + sd_resolve_query **ret_query, + const char *node, const char *service, + const struct addrinfo *hints, + sd_resolve_getaddrinfo_handler_t callback, + void *userdata) { + + return resolve_getaddrinfo_with_destroy_callback(resolve, ret_query, node, service, hints, callback, NULL, userdata); +} + +static int getaddrinfo_done(sd_resolve_query* q) { + assert(q); + assert(q->done); + assert(q->getaddrinfo_handler); + + errno = q->_errno; + h_errno = q->_h_errno; + + return q->getaddrinfo_handler(q, q->ret, q->addrinfo, q->userdata); +} + +int resolve_getnameinfo_with_destroy_callback( + sd_resolve *resolve, + sd_resolve_query **ret_query, + const struct sockaddr *sa, socklen_t salen, + int flags, + uint64_t get, + sd_resolve_getnameinfo_handler_t callback, + sd_resolve_destroy_t destroy_callback, + void *userdata) { + + _cleanup_(sd_resolve_query_unrefp) sd_resolve_query *q = NULL; + NameInfoRequest req = {}; + struct iovec iov[2]; + struct msghdr mh; + int r; + + assert_return(resolve, -EINVAL); + assert_return(sa, -EINVAL); + assert_return(salen >= sizeof(struct sockaddr), -EINVAL); + assert_return(salen <= sizeof(union sockaddr_union), -EINVAL); + assert_return((get & ~SD_RESOLVE_GET_BOTH) == 0, -EINVAL); + assert_return(callback, -EINVAL); + assert_return(!resolve_pid_changed(resolve), -ECHILD); + + r = alloc_query(resolve, !ret_query, &q); + if (r < 0) + return r; + + q->type = REQUEST_NAMEINFO; + q->getnameinfo_handler = callback; + q->userdata = userdata; + + req = (NameInfoRequest) { + .header.id = q->id, + .header.type = REQUEST_NAMEINFO, + .header.length = sizeof(NameInfoRequest) + salen, + + .flags = flags, + .sockaddr_len = salen, + .gethost = !!(get & SD_RESOLVE_GET_HOST), + .getserv = !!(get & SD_RESOLVE_GET_SERVICE), + }; + + msan_unpoison(&req, sizeof(req)); + + iov[0] = IOVEC_MAKE(&req, sizeof(NameInfoRequest)); + iov[1] = IOVEC_MAKE((void*) sa, salen); + + mh = (struct msghdr) { + .msg_iov = iov, + .msg_iovlen = ELEMENTSOF(iov) + }; + + if (sendmsg(resolve->fds[REQUEST_SEND_FD], &mh, MSG_NOSIGNAL) < 0) + return -errno; + + resolve->n_outstanding++; + q->destroy_callback = destroy_callback; + + if (ret_query) + *ret_query = q; + + TAKE_PTR(q); + + return 0; +} + +int sd_resolve_getnameinfo( + sd_resolve *resolve, + sd_resolve_query **ret_query, + const struct sockaddr *sa, socklen_t salen, + int flags, + uint64_t get, + sd_resolve_getnameinfo_handler_t callback, + void *userdata) { + + return resolve_getnameinfo_with_destroy_callback(resolve, ret_query, sa, salen, flags, get, callback, NULL, userdata); +} + +static int getnameinfo_done(sd_resolve_query *q) { + + assert(q); + assert(q->done); + assert(q->getnameinfo_handler); + + errno = q->_errno; + h_errno = q->_h_errno; + + return q->getnameinfo_handler(q, q->ret, q->host, q->serv, q->userdata); +} + +static void resolve_freeaddrinfo(struct addrinfo *ai) { + while (ai) { + struct addrinfo *next = ai->ai_next; + + free(ai->ai_addr); + free(ai->ai_canonname); + free_and_replace(ai, next); + } +} + +static void resolve_query_disconnect(sd_resolve_query *q) { + sd_resolve *resolve; + unsigned i; + + assert(q); + + if (!q->resolve) + return; + + resolve = q->resolve; + assert(resolve->n_queries > 0); + + if (q->done) { + assert(resolve->n_done > 0); + resolve->n_done--; + } + + i = q->id % QUERIES_MAX; + assert(resolve->query_array[i] == q); + resolve->query_array[i] = NULL; + LIST_REMOVE(queries, resolve->queries, q); + resolve->n_queries--; + + q->resolve = NULL; + if (!q->floating) + sd_resolve_unref(resolve); +} + +static sd_resolve_query *resolve_query_free(sd_resolve_query *q) { + assert(q); + + resolve_query_disconnect(q); + + if (q->destroy_callback) + q->destroy_callback(q->userdata); + + resolve_freeaddrinfo(q->addrinfo); + free(q->host); + free(q->serv); + + return mfree(q); +} + +DEFINE_TRIVIAL_REF_UNREF_FUNC(sd_resolve_query, sd_resolve_query, resolve_query_free); + +int sd_resolve_query_is_done(sd_resolve_query *q) { + assert_return(q, -EINVAL); + assert_return(!resolve_pid_changed(q->resolve), -ECHILD); + + return q->done; +} + +void* sd_resolve_query_set_userdata(sd_resolve_query *q, void *userdata) { + void *ret; + + assert_return(q, NULL); + assert_return(!resolve_pid_changed(q->resolve), NULL); + + ret = q->userdata; + q->userdata = userdata; + + return ret; +} + +void* sd_resolve_query_get_userdata(sd_resolve_query *q) { + assert_return(q, NULL); + assert_return(!resolve_pid_changed(q->resolve), NULL); + + return q->userdata; +} + +sd_resolve *sd_resolve_query_get_resolve(sd_resolve_query *q) { + assert_return(q, NULL); + assert_return(!resolve_pid_changed(q->resolve), NULL); + + return q->resolve; +} + +int sd_resolve_query_get_destroy_callback(sd_resolve_query *q, sd_resolve_destroy_t *destroy_callback) { + assert_return(q, -EINVAL); + + if (destroy_callback) + *destroy_callback = q->destroy_callback; + + return !!q->destroy_callback; +} + +int sd_resolve_query_set_destroy_callback(sd_resolve_query *q, sd_resolve_destroy_t destroy_callback) { + assert_return(q, -EINVAL); + + q->destroy_callback = destroy_callback; + return 0; +} + +int sd_resolve_query_get_floating(sd_resolve_query *q) { + assert_return(q, -EINVAL); + + return q->floating; +} + +int sd_resolve_query_set_floating(sd_resolve_query *q, int b) { + assert_return(q, -EINVAL); + + if (q->floating == !!b) + return 0; + + if (!q->resolve) /* Already disconnected */ + return -ESTALE; + + q->floating = b; + + if (b) { + sd_resolve_query_ref(q); + sd_resolve_unref(q->resolve); + } else { + sd_resolve_ref(q->resolve); + sd_resolve_query_unref(q); + } + + return 1; +} + +static int io_callback(sd_event_source *s, int fd, uint32_t revents, void *userdata) { + sd_resolve *resolve = ASSERT_PTR(userdata); + int r; + + r = sd_resolve_process(resolve); + if (r < 0) + return r; + + return 1; +} + +int sd_resolve_attach_event(sd_resolve *resolve, sd_event *event, int64_t priority) { + int r; + + assert_return(resolve, -EINVAL); + assert_return(!resolve->event, -EBUSY); + + assert(!resolve->event_source); + + if (event) + resolve->event = sd_event_ref(event); + else { + r = sd_event_default(&resolve->event); + if (r < 0) + return r; + } + + r = sd_event_add_io(resolve->event, &resolve->event_source, resolve->fds[RESPONSE_RECV_FD], POLLIN, io_callback, resolve); + if (r < 0) + goto fail; + + r = sd_event_source_set_priority(resolve->event_source, priority); + if (r < 0) + goto fail; + + return 0; + +fail: + sd_resolve_detach_event(resolve); + return r; +} + + int sd_resolve_detach_event(sd_resolve *resolve) { + assert_return(resolve, -EINVAL); + + if (!resolve->event) + return 0; + + resolve->event_source = sd_event_source_disable_unref(resolve->event_source); + resolve->event = sd_event_unref(resolve->event); + return 1; +} + +sd_event *sd_resolve_get_event(sd_resolve *resolve) { + assert_return(resolve, NULL); + + return resolve->event; +} diff --git a/src/libsystemd/sd-resolve/test-resolve.c b/src/libsystemd/sd-resolve/test-resolve.c new file mode 100644 index 0000000..829e13e --- /dev/null +++ b/src/libsystemd/sd-resolve/test-resolve.c @@ -0,0 +1,111 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <arpa/inet.h> +#include <errno.h> +#include <net/if_arp.h> +#include <netinet/in.h> +#include <resolv.h> +#include <stdio.h> + +#include "sd-resolve.h" + +#include "alloc-util.h" +#include "macro.h" +#include "socket-util.h" +#include "string-util.h" +#include "tests.h" +#include "time-util.h" + +#define TEST_TIMEOUT_USEC (20*USEC_PER_SEC) + +static int getaddrinfo_handler(sd_resolve_query *q, int ret, const struct addrinfo *ai, void *userdata) { + const struct addrinfo *i; + + assert_se(q); + + if (ret != 0) { + log_error("getaddrinfo error: %s %i", gai_strerror(ret), ret); + return 0; + } + + for (i = ai; i; i = i->ai_next) { + _cleanup_free_ char *addr = NULL; + + assert_se(sockaddr_pretty(i->ai_addr, i->ai_addrlen, false, true, &addr) == 0); + puts(addr); + } + + printf("canonical name: %s\n", strna(ai->ai_canonname)); + + return 0; +} + +static int getnameinfo_handler(sd_resolve_query *q, int ret, const char *host, const char *serv, void *userdata) { + assert_se(q); + + if (ret != 0) { + log_error("getnameinfo error: %s %i", gai_strerror(ret), ret); + return 0; + } + + printf("Host: %s — Serv: %s\n", strna(host), strna(serv)); + return 0; +} + +int main(int argc, char *argv[]) { + _cleanup_(sd_resolve_query_unrefp) sd_resolve_query *q1 = NULL, *q2 = NULL; + _cleanup_(sd_resolve_unrefp) sd_resolve *resolve = NULL; + int r; + + struct addrinfo hints = { + .ai_family = AF_UNSPEC, + .ai_socktype = SOCK_STREAM, + .ai_flags = AI_CANONNAME, + }; + + union sockaddr_union sa = { + .in.sin_family = AF_INET, + .in.sin_port = htobe16(80), + }; + + test_setup_logging(LOG_DEBUG); + + assert_se(sd_resolve_default(&resolve) >= 0); + + /* Test a floating resolver query */ + r = sd_resolve_getaddrinfo(resolve, NULL, "redhat.com", "http", NULL, getaddrinfo_handler, NULL); + if (r < 0) + log_error_errno(r, "sd_resolve_getaddrinfo(): %m"); + + /* Make a name -> address query */ + r = sd_resolve_getaddrinfo(resolve, &q1, argc >= 2 ? argv[1] : "www.heise.de", NULL, &hints, getaddrinfo_handler, NULL); + if (r < 0) + log_error_errno(r, "sd_resolve_getaddrinfo(): %m"); + + /* Make an address -> name query */ + sa.in.sin_addr.s_addr = inet_addr(argc >= 3 ? argv[2] : "193.99.144.71"); + r = sd_resolve_getnameinfo(resolve, &q2, &sa.sa, SOCKADDR_LEN(sa), 0, SD_RESOLVE_GET_BOTH, getnameinfo_handler, NULL); + if (r < 0) + log_error_errno(r, "sd_resolve_getnameinfo(): %m"); + + /* Wait until all queries are completed */ + for (;;) { + r = sd_resolve_wait(resolve, TEST_TIMEOUT_USEC); + if (r == 0) + break; + if (r == -ETIMEDOUT) { + /* Let's catch timeouts here, so that we can run safely in a CI that has no reliable DNS. Note + * that we invoke exit() directly here, as the stuck NSS call will not allow us to exit + * cleanly. */ + + log_notice_errno(r, "sd_resolve_wait() timed out, but that's OK"); + exit(EXIT_SUCCESS); + } + if (r < 0) { + log_error_errno(r, "sd_resolve_wait(): %m"); + assert_not_reached(); + } + } + + return 0; +} |