From 399644e47874bff147afb19c89228901ac39340e Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Mon, 15 Apr 2024 21:40:15 +0200 Subject: Adding upstream version 6.05.01. Signed-off-by: Daniel Baumann --- man7/netlink.7 | 609 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 609 insertions(+) create mode 100644 man7/netlink.7 (limited to 'man7/netlink.7') diff --git a/man7/netlink.7 b/man7/netlink.7 new file mode 100644 index 0000000..4d6cdbc --- /dev/null +++ b/man7/netlink.7 @@ -0,0 +1,609 @@ +'\" t +.\" This man page is Copyright (c) 1998 by Andi Kleen. +.\" +.\" SPDX-License-Identifier: GPL-1.0-or-later +.\" +.\" Based on the original comments from Alexey Kuznetsov +.\" Modified 2005-12-27 by Hasso Tepper +.\" $Id: netlink.7,v 1.8 2000/06/22 13:23:00 ak Exp $ +.TH netlink 7 2023-07-30 "Linux man-pages 6.05.01" +.SH NAME +netlink \- communication between kernel and user space (AF_NETLINK) +.SH SYNOPSIS +.nf +.B #include +.B #include +.B #include +.PP +.BI "netlink_socket = socket(AF_NETLINK, " socket_type ", " netlink_family ); +.fi +.SH DESCRIPTION +Netlink is used to transfer information between the kernel and +user-space processes. +It consists of a standard sockets-based interface for user space +processes and an internal kernel API for kernel modules. +The internal kernel interface is not documented in this manual page. +There is also an obsolete netlink interface +via netlink character devices; this interface is not documented here +and is provided only for backward compatibility. +.PP +Netlink is a datagram-oriented service. +Both +.B SOCK_RAW +and +.B SOCK_DGRAM +are valid values for +.IR socket_type . +However, the netlink protocol does not distinguish between datagram +and raw sockets. +.PP +.I netlink_family +selects the kernel module or netlink group to communicate with. +The currently assigned netlink families are: +.TP +.B NETLINK_ROUTE +Receives routing and link updates and may be used to modify the routing +tables (both IPv4 and IPv6), IP addresses, link parameters, +neighbor setups, queueing disciplines, traffic classes, and +packet classifiers (see +.BR rtnetlink (7)). +.TP +.BR NETLINK_W1 " (Linux 2.6.13 to Linux 2.16.17)" +Messages from 1-wire subsystem. +.TP +.B NETLINK_USERSOCK +Reserved for user-mode socket protocols. +.TP +.BR NETLINK_FIREWALL " (up to and including Linux 3.4)" +.\" removed by commit d16cf20e2f2f13411eece7f7fb72c17d141c4a84 +Transport IPv4 packets from netfilter to user space. +Used by +.I ip_queue +kernel module. +After a long period of being declared obsolete (in favor of the more advanced +.I nfnetlink_queue +feature), +.B NETLINK_FIREWALL +was removed in Linux 3.5. +.TP +.BR NETLINK_SOCK_DIAG " (since Linux 3.3)" +.\" commit 7f1fb60c4fc9fb29fbb406ac8c4cfb4e59e168d6 +Query information about sockets of various protocol families from the kernel +(see +.BR sock_diag (7)). +.TP +.BR NETLINK_INET_DIAG " (since Linux 2.6.14)" +An obsolete synonym for +.BR NETLINK_SOCK_DIAG . +.TP +.BR NETLINK_NFLOG " (up to and including Linux 3.16)" +Netfilter/iptables ULOG. +.TP +.B NETLINK_XFRM +.\" FIXME More details on NETLINK_XFRM needed. +IPsec. +.TP +.BR NETLINK_SELINUX " (since Linux 2.6.4)" +SELinux event notifications. +.TP +.BR NETLINK_ISCSI " (since Linux 2.6.15)" +.\" FIXME More details on NETLINK_ISCSI needed. +Open-iSCSI. +.TP +.BR NETLINK_AUDIT " (since Linux 2.6.6)" +.\" FIXME More details on NETLINK_AUDIT needed. +Auditing. +.TP +.BR NETLINK_FIB_LOOKUP " (since Linux 2.6.13)" +.\" FIXME More details on NETLINK_FIB_LOOKUP needed. +Access to FIB lookup from user space. +.TP +.BR NETLINK_CONNECTOR " (since Linux 2.6.14)" +Kernel connector. +See +.I Documentation/driver\-api/connector.rst +(or +.I /Documentation/connector/connector.* +.\" commit baa293e9544bea71361950d071579f0e4d5713ed +in Linux 5.2 and earlier) +in the Linux kernel source tree for further information. +.TP +.BR NETLINK_NETFILTER " (since Linux 2.6.14)" +.\" FIXME More details on NETLINK_NETFILTER needed. +Netfilter subsystem. +.TP +.BR NETLINK_SCSITRANSPORT " (since Linux 2.6.19)" +.\" commit 84314fd4740ad73550c76dee4a9578979d84af48 +.\" FIXME More details on NETLINK_SCSITRANSPORT needed. +SCSI Transports. +.TP +.BR NETLINK_RDMA " (since Linux 3.0)" +.\" commit b2cbae2c248776d81cc265ff7d48405b6a4cc463 +.\" FIXME More details on NETLINK_RDMA needed. +Infiniband RDMA. +.TP +.BR NETLINK_IP6_FW " (up to and including Linux 3.4)" +Transport IPv6 packets from netfilter to user space. +Used by +.I ip6_queue +kernel module. +.TP +.B NETLINK_DNRTMSG +DECnet routing messages. +.TP +.BR NETLINK_KOBJECT_UEVENT " (since Linux 2.6.10)" +.\" FIXME More details on NETLINK_KOBJECT_UEVENT needed. +Kernel messages to user space. +.TP +.BR NETLINK_GENERIC " (since Linux 2.6.15)" +Generic netlink family for simplified netlink usage. +.TP +.BR NETLINK_CRYPTO " (since Linux 3.2)" +.\" commit a38f7907b926e4c6c7d389ad96cc38cec2e5a9e9 +.\" Author: Steffen Klassert +Netlink interface to request information about ciphers registered +with the kernel crypto API as well as allow configuration of the +kernel crypto API. +.PP +Netlink messages consist of a byte stream with one or multiple +.I nlmsghdr +headers and associated payload. +The byte stream should be accessed only with the standard +.B NLMSG_* +macros. +See +.BR netlink (3) +for further information. +.PP +In multipart messages (multiple +.I nlmsghdr +headers with associated payload in one byte stream) the first and all +following headers have the +.B NLM_F_MULTI +flag set, except for the last header which has the type +.BR NLMSG_DONE . +.PP +After each +.I nlmsghdr +the payload follows. +.PP +.in +4n +.EX +struct nlmsghdr { + __u32 nlmsg_len; /* Length of message including header */ + __u16 nlmsg_type; /* Type of message content */ + __u16 nlmsg_flags; /* Additional flags */ + __u32 nlmsg_seq; /* Sequence number */ + __u32 nlmsg_pid; /* Sender port ID */ +}; +.EE +.in +.PP +.I nlmsg_type +can be one of the standard message types: +.B NLMSG_NOOP +message is to be ignored, +.B NLMSG_ERROR +message signals an error and the payload contains an +.I nlmsgerr +structure, +.B NLMSG_DONE +message terminates a multipart message. +Error messages get the +original request appended, unless the user requests to cap the +error message, and get extra error data if requested. +.PP +.in +4n +.EX +struct nlmsgerr { + int error; /* Negative errno or 0 for acknowledgements */ + struct nlmsghdr msg; /* Message header that caused the error */ + /* + * followed by the message contents + * unless NETLINK_CAP_ACK was set + * or the ACK indicates success (error == 0). + * For example Generic Netlink message with attributes. + * message length is aligned with NLMSG_ALIGN() + */ + /* + * followed by TLVs defined in enum nlmsgerr_attrs + * if NETLINK_EXT_ACK was set + */ +}; +.EE +.in +.PP +A netlink family usually specifies more message types, see the +appropriate manual pages for that, for example, +.BR rtnetlink (7) +for +.BR NETLINK_ROUTE . +.TS +tab(:); +l s +lB lx. +Standard flag bits in \fInlmsg_flags\fP +_ +NLM_F_REQUEST:T{ +Must be set on all request messages. +T} +NLM_F_MULTI:T{ +The message is part of a multipart message terminated by +.BR NLMSG_DONE . +T} +NLM_F_ACK:T{ +Request for an acknowledgement on success. +T} +NLM_F_ECHO:T{ +Echo this request. +T} +.TE +.\" No right adjustment for text blocks in tables +.TS +tab(:); +l s +lB lx. +Additional flag bits for GET requests +_ +NLM_F_ROOT:T{ +Return the complete table instead of a single entry. +T} +NLM_F_MATCH:T{ +Return all entries matching criteria passed in message content. +Not implemented yet. +T} +NLM_F_ATOMIC:T{ +Return an atomic snapshot of the table. +T} +NLM_F_DUMP:T{ +Convenience macro; equivalent to +(NLM_F_ROOT|NLM_F_MATCH). +T} +.TE +.\" FIXME NLM_F_ATOMIC is not used anymore? +.PP +Note that +.B NLM_F_ATOMIC +requires the +.B CAP_NET_ADMIN +capability or an effective UID of 0. +.TS +tab(:); +l s +lB lx. +Additional flag bits for NEW requests +_ +NLM_F_REPLACE:T{ +Replace existing matching object. +T} +NLM_F_EXCL:T{ +Don't replace if the object already exists. +T} +NLM_F_CREATE:T{ +Create object if it doesn't already exist. +T} +NLM_F_APPEND:T{ +Add to the end of the object list. +T} +.TE +.PP +.I nlmsg_seq +and +.I nlmsg_pid +are used to track messages. +.I nlmsg_pid +shows the origin of the message. +Note that there isn't a 1:1 relationship between +.I nlmsg_pid +and the PID of the process if the message originated from a netlink +socket. +See the +.B ADDRESS FORMATS +section for further information. +.PP +Both +.I nlmsg_seq +and +.I nlmsg_pid +.\" FIXME Explain more about nlmsg_seq and nlmsg_pid. +are opaque to netlink core. +.PP +Netlink is not a reliable protocol. +It tries its best to deliver a message to its destination(s), +but may drop messages when an out-of-memory condition or +other error occurs. +For reliable transfer the sender can request an +acknowledgement from the receiver by setting the +.B NLM_F_ACK +flag. +An acknowledgement is an +.B NLMSG_ERROR +packet with the error field set to 0. +The application must generate acknowledgements for +received messages itself. +The kernel tries to send an +.B NLMSG_ERROR +message for every failed packet. +A user process should follow this convention too. +.PP +However, reliable transmissions from kernel to user are impossible +in any case. +The kernel can't send a netlink message if the socket buffer is full: +the message will be dropped and the kernel and the user-space process will +no longer have the same view of kernel state. +It is up to the application to detect when this happens (via the +.B ENOBUFS +error returned by +.BR recvmsg (2)) +and resynchronize. +.SS Address formats +The +.I sockaddr_nl +structure describes a netlink client in user space or in the kernel. +A +.I sockaddr_nl +can be either unicast (only sent to one peer) or sent to +netlink multicast groups +.RI ( nl_groups +not equal 0). +.PP +.in +4n +.EX +struct sockaddr_nl { + sa_family_t nl_family; /* AF_NETLINK */ + unsigned short nl_pad; /* Zero */ + pid_t nl_pid; /* Port ID */ + __u32 nl_groups; /* Multicast groups mask */ +}; +.EE +.in +.PP +.I nl_pid +is the unicast address of netlink socket. +It's always 0 if the destination is in the kernel. +For a user-space process, +.I nl_pid +is usually the PID of the process owning the destination socket. +However, +.I nl_pid +identifies a netlink socket, not a process. +If a process owns several netlink +sockets, then +.I nl_pid +can be equal to the process ID only for at most one socket. +There are two ways to assign +.I nl_pid +to a netlink socket. +If the application sets +.I nl_pid +before calling +.BR bind (2), +then it is up to the application to make sure that +.I nl_pid +is unique. +If the application sets it to 0, the kernel takes care of assigning it. +The kernel assigns the process ID to the first netlink socket the process +opens and assigns a unique +.I nl_pid +to every netlink socket that the process subsequently creates. +.PP +.I nl_groups +is a bit mask with every bit representing a netlink group number. +Each netlink family has a set of 32 multicast groups. +When +.BR bind (2) +is called on the socket, the +.I nl_groups +field in the +.I sockaddr_nl +should be set to a bit mask of the groups which it wishes to listen to. +The default value for this field is zero which means that no multicasts +will be received. +A socket may multicast messages to any of the multicast groups by setting +.I nl_groups +to a bit mask of the groups it wishes to send to when it calls +.BR sendmsg (2) +or does a +.BR connect (2). +Only processes with an effective UID of 0 or the +.B CAP_NET_ADMIN +capability may send or listen to a netlink multicast group. +Since Linux 2.6.13, +.\" commit d629b836d151d43332492651dd841d32e57ebe3b +messages can't be broadcast to multiple groups. +Any replies to a message received for a multicast group should be +sent back to the sending PID and the multicast group. +Some Linux kernel subsystems may additionally allow other users +to send and/or receive messages. +As at Linux 3.0, the +.BR NETLINK_KOBJECT_UEVENT , +.BR NETLINK_GENERIC , +.BR NETLINK_ROUTE , +and +.B NETLINK_SELINUX +groups allow other users to receive messages. +No groups allow other users to send messages. +.SS Socket options +To set or get a netlink socket option, call +.BR getsockopt (2) +to read or +.BR setsockopt (2) +to write the option with the option level argument set to +.BR SOL_NETLINK . +Unless otherwise noted, +.I optval +is a pointer to an +.IR int . +.TP +.BR NETLINK_PKTINFO " (since Linux 2.6.14)" +.\" commit 9a4595bc7e67962f13232ee55a64e063062c3a99 +.\" Author: Patrick McHardy +Enable +.B nl_pktinfo +control messages for received packets to get the extended +destination group number. +.TP +.BR NETLINK_ADD_MEMBERSHIP ,\ NETLINK_DROP_MEMBERSHIP " (since Linux 2.6.14)" +.\" commit 9a4595bc7e67962f13232ee55a64e063062c3a99 +.\" Author: Patrick McHardy +Join/leave a group specified by +.IR optval . +.TP +.BR NETLINK_LIST_MEMBERSHIPS " (since Linux 4.2)" +.\" commit b42be38b2778eda2237fc759e55e3b698b05b315 +.\" Author: David Herrmann +Retrieve all groups a socket is a member of. +.I optval +is a pointer to +.B __u32 +and +.I optlen +is the size of the array. +The array is filled with the full membership set of the +socket, and the required array size is returned in +.IR optlen . +.TP +.BR NETLINK_BROADCAST_ERROR " (since Linux 2.6.30)" +.\" commit be0c22a46cfb79ab2342bb28fde99afa94ef868e +.\" Author: Pablo Neira Ayuso +When not set, +.B netlink_broadcast() +only reports +.B ESRCH +errors and silently ignore +.B ENOBUFS +errors. +.TP +.BR NETLINK_NO_ENOBUFS " (since Linux 2.6.30)" +.\" commit 38938bfe3489394e2eed5e40c9bb8f66a2ce1405 +.\" Author: Pablo Neira Ayuso +This flag can be used by unicast and broadcast listeners to avoid receiving +.B ENOBUFS +errors. +.TP +.BR NETLINK_LISTEN_ALL_NSID " (since Linux 4.2)" +.\" commit 59324cf35aba5336b611074028777838a963d03b +.\" Author: Nicolas Dichtel +When set, this socket will receive netlink notifications from +all network namespaces that have an +.I nsid +assigned into the network namespace where the socket has been opened. +The +.I nsid +is sent to user space via an ancillary data. +.TP +.BR NETLINK_CAP_ACK " (since Linux 4.3)" +.\" commit 0a6a3a23ea6efde079a5b77688541a98bf202721 +.\" Author: Christophe Ricard +The kernel may fail to allocate the necessary room for the acknowledgement +message back to user space. +This option trims off the payload of the original netlink message. +The netlink message header is still included, so the user can guess from the +sequence number which message triggered the acknowledgement. +.SH VERSIONS +The socket interface to netlink first appeared Linux 2.2. +.PP +Linux 2.0 supported a more primitive device-based netlink interface +(which is still available as a compatibility option). +This obsolete interface is not described here. +.SH NOTES +It is often better to use netlink via +.I libnetlink +or +.I libnl +than via the low-level kernel interface. +.SH BUGS +This manual page is not complete. +.SH EXAMPLES +The following example creates a +.B NETLINK_ROUTE +netlink socket which will listen to the +.B RTMGRP_LINK +(network interface create/delete/up/down events) and +.B RTMGRP_IPV4_IFADDR +(IPv4 addresses add/delete events) multicast groups. +.PP +.in +4n +.EX +struct sockaddr_nl sa; +\& +memset(&sa, 0, sizeof(sa)); +sa.nl_family = AF_NETLINK; +sa.nl_groups = RTMGRP_LINK | RTMGRP_IPV4_IFADDR; +\& +fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); +bind(fd, (struct sockaddr *) &sa, sizeof(sa)); +.EE +.in +.PP +The next example demonstrates how to send a netlink message to the +kernel (pid 0). +Note that the application must take care of message sequence numbers +in order to reliably track acknowledgements. +.PP +.in +4n +.EX +struct nlmsghdr *nh; /* The nlmsghdr with payload to send */ +struct sockaddr_nl sa; +struct iovec iov = { nh, nh\->nlmsg_len }; +struct msghdr msg; +\& +msg = { &sa, sizeof(sa), &iov, 1, NULL, 0, 0 }; +memset(&sa, 0, sizeof(sa)); +sa.nl_family = AF_NETLINK; +nh\->nlmsg_pid = 0; +nh\->nlmsg_seq = ++sequence_number; +/* Request an ack from kernel by setting NLM_F_ACK */ +nh\->nlmsg_flags |= NLM_F_ACK; +\& +sendmsg(fd, &msg, 0); +.EE +.in +.PP +And the last example is about reading netlink message. +.PP +.in +4n +.EX +int len; +/* 8192 to avoid message truncation on platforms with + page size > 4096 */ +struct nlmsghdr buf[8192/sizeof(struct nlmsghdr)]; +struct iovec iov = { buf, sizeof(buf) }; +struct sockaddr_nl sa; +struct msghdr msg; +struct nlmsghdr *nh; +\& +msg = { &sa, sizeof(sa), &iov, 1, NULL, 0, 0 }; +len = recvmsg(fd, &msg, 0); +\& +for (nh = (struct nlmsghdr *) buf; NLMSG_OK (nh, len); + nh = NLMSG_NEXT (nh, len)) { + /* The end of multipart message */ + if (nh\->nlmsg_type == NLMSG_DONE) + return; +\& + if (nh\->nlmsg_type == NLMSG_ERROR) + /* Do some error handling */ + ... +\& + /* Continue with parsing payload */ + ... +} +.EE +.in +.SH SEE ALSO +.BR cmsg (3), +.BR netlink (3), +.BR capabilities (7), +.BR rtnetlink (7), +.BR sock_diag (7) +.PP +.UR ftp://ftp.inr.ac.ru\:/ip\-routing\:/iproute2* +information about libnetlink +.UE +.PP +.UR http://www.infradead.org\:/\[ti]tgr\:/libnl/ +information about libnl +.UE +.PP +RFC 3549 "Linux Netlink as an IP Services Protocol" -- cgit v1.2.3