summaryrefslogtreecommitdiffstats
path: root/src/basic/socket-util.h
blob: 9a11df834d113ebd26a76a88458dc62d52eef185 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
/* SPDX-License-Identifier: LGPL-2.1-or-later */
#pragma once

#include <inttypes.h>
#include <linux/netlink.h>
#include <linux/if_ether.h>
#include <linux/if_infiniband.h>
#include <linux/if_packet.h>
#include <netinet/in.h>
#include <stdbool.h>
#include <stddef.h>
#include <string.h>
#include <sys/socket.h>
#include <sys/types.h>
#include <sys/un.h>

#include "errno-util.h"
#include "in-addr-util.h"
#include "macro.h"
#include "missing_network.h"
#include "missing_socket.h"
#include "sparse-endian.h"

union sockaddr_union {
        /* The minimal, abstract version */
        struct sockaddr sa;

        /* The libc provided version that allocates "enough room" for every protocol */
        struct sockaddr_storage storage;

        /* Protoctol-specific implementations */
        struct sockaddr_in in;
        struct sockaddr_in6 in6;
        struct sockaddr_un un;
        struct sockaddr_nl nl;
        struct sockaddr_ll ll;
        struct sockaddr_vm vm;

        /* Ensure there is enough space to store Infiniband addresses */
        uint8_t ll_buffer[offsetof(struct sockaddr_ll, sll_addr) + CONST_MAX(ETH_ALEN, INFINIBAND_ALEN)];

        /* Ensure there is enough space after the AF_UNIX sun_path for one more NUL byte, just to be sure that the path
         * component is always followed by at least one NUL byte. */
        uint8_t un_buffer[sizeof(struct sockaddr_un) + 1];
};

#define SUN_PATH_LEN (sizeof(((struct sockaddr_un){}).sun_path))

typedef struct SocketAddress {
        union sockaddr_union sockaddr;

        /* We store the size here explicitly due to the weird
         * sockaddr_un semantics for abstract sockets */
        socklen_t size;

        /* Socket type, i.e. SOCK_STREAM, SOCK_DGRAM, ... */
        int type;

        /* Socket protocol, IPPROTO_xxx, usually 0, except for netlink */
        int protocol;
} SocketAddress;

typedef enum SocketAddressBindIPv6Only {
        SOCKET_ADDRESS_DEFAULT,
        SOCKET_ADDRESS_BOTH,
        SOCKET_ADDRESS_IPV6_ONLY,
        _SOCKET_ADDRESS_BIND_IPV6_ONLY_MAX,
        _SOCKET_ADDRESS_BIND_IPV6_ONLY_INVALID = -EINVAL,
} SocketAddressBindIPv6Only;

#define socket_address_family(a) ((a)->sockaddr.sa.sa_family)

const char* socket_address_type_to_string(int t) _const_;
int socket_address_type_from_string(const char *s) _pure_;

int sockaddr_un_unlink(const struct sockaddr_un *sa);

static inline int socket_address_unlink(const SocketAddress *a) {
        return socket_address_family(a) == AF_UNIX ? sockaddr_un_unlink(&a->sockaddr.un) : 0;
}

bool socket_address_can_accept(const SocketAddress *a) _pure_;

int socket_address_listen(
                const SocketAddress *a,
                int flags,
                int backlog,
                SocketAddressBindIPv6Only only,
                const char *bind_to_device,
                bool reuse_port,
                bool free_bind,
                bool transparent,
                mode_t directory_mode,
                mode_t socket_mode,
                const char *label);

int socket_address_verify(const SocketAddress *a, bool strict) _pure_;
int socket_address_print(const SocketAddress *a, char **p);
bool socket_address_matches_fd(const SocketAddress *a, int fd);

bool socket_address_equal(const SocketAddress *a, const SocketAddress *b) _pure_;

const char* socket_address_get_path(const SocketAddress *a);

bool socket_ipv6_is_supported(void);
bool socket_ipv6_is_enabled(void);

int sockaddr_port(const struct sockaddr *_sa, unsigned *port);
const union in_addr_union *sockaddr_in_addr(const struct sockaddr *sa);
int sockaddr_set_in_addr(union sockaddr_union *u, int family, const union in_addr_union *a, uint16_t port);

int sockaddr_pretty(const struct sockaddr *_sa, socklen_t salen, bool translate_ipv6, bool include_port, char **ret);
int getpeername_pretty(int fd, bool include_port, char **ret);
int getsockname_pretty(int fd, char **ret);

int socknameinfo_pretty(union sockaddr_union *sa, socklen_t salen, char **_ret);

const char* socket_address_bind_ipv6_only_to_string(SocketAddressBindIPv6Only b) _const_;
SocketAddressBindIPv6Only socket_address_bind_ipv6_only_from_string(const char *s) _pure_;
SocketAddressBindIPv6Only socket_address_bind_ipv6_only_or_bool_from_string(const char *s);

int netlink_family_to_string_alloc(int b, char **s);
int netlink_family_from_string(const char *s) _pure_;

bool sockaddr_equal(const union sockaddr_union *a, const union sockaddr_union *b);

int fd_set_sndbuf(int fd, size_t n, bool increase);
static inline int fd_inc_sndbuf(int fd, size_t n) {
        return fd_set_sndbuf(fd, n, true);
}
int fd_set_rcvbuf(int fd, size_t n, bool increase);
static inline int fd_increase_rxbuf(int fd, size_t n) {
        return fd_set_rcvbuf(fd, n, true);
}

int ip_tos_to_string_alloc(int i, char **s);
int ip_tos_from_string(const char *s);

typedef enum {
        IFNAME_VALID_ALTERNATIVE = 1 << 0, /* Allow "altnames" too */
        IFNAME_VALID_NUMERIC     = 1 << 1, /* Allow decimal formatted ifindexes too */
        IFNAME_VALID_SPECIAL     = 1 << 2, /* Allow the special names "all" and "default" */
        _IFNAME_VALID_ALL        = IFNAME_VALID_ALTERNATIVE | IFNAME_VALID_NUMERIC | IFNAME_VALID_SPECIAL,
} IfnameValidFlags;
bool ifname_valid_char(char a);
bool ifname_valid_full(const char *p, IfnameValidFlags flags);
static inline bool ifname_valid(const char *p) {
        return ifname_valid_full(p, 0);
}
bool address_label_valid(const char *p);

int getpeercred(int fd, struct ucred *ucred);
int getpeersec(int fd, char **ret);
int getpeergroups(int fd, gid_t **ret);

ssize_t send_many_fds_iov_sa(
                int transport_fd,
                int *fds_array, size_t n_fds_array,
                const struct iovec *iov, size_t iovlen,
                const struct sockaddr *sa, socklen_t len,
                int flags);
static inline ssize_t send_many_fds_iov(
                int transport_fd,
                int *fds_array, size_t n_fds_array,
                const struct iovec *iov, size_t iovlen,
                int flags) {

        return send_many_fds_iov_sa(transport_fd, fds_array, n_fds_array, iov, iovlen, NULL, 0, flags);
}
static inline int send_many_fds(
                int transport_fd,
                int *fds_array,
                size_t n_fds_array,
                int flags) {

        return send_many_fds_iov_sa(transport_fd, fds_array, n_fds_array, NULL, 0, NULL, 0, flags);
}
ssize_t send_one_fd_iov_sa(
                int transport_fd,
                int fd,
                const struct iovec *iov, size_t iovlen,
                const struct sockaddr *sa, socklen_t len,
                int flags);
int send_one_fd_sa(int transport_fd,
                   int fd,
                   const struct sockaddr *sa, socklen_t len,
                   int flags);
#define send_one_fd_iov(transport_fd, fd, iov, iovlen, flags) send_one_fd_iov_sa(transport_fd, fd, iov, iovlen, NULL, 0, flags)
#define send_one_fd(transport_fd, fd, flags) send_one_fd_iov_sa(transport_fd, fd, NULL, 0, NULL, 0, flags)
ssize_t receive_one_fd_iov(int transport_fd, struct iovec *iov, size_t iovlen, int flags, int *ret_fd);
int receive_one_fd(int transport_fd, int flags);
ssize_t receive_many_fds_iov(int transport_fd, struct iovec *iov, size_t iovlen, int **ret_fds_array, size_t *ret_n_fds_array, int flags);
int receive_many_fds(int transport_fd, int **ret_fds_array, size_t *ret_n_fds_array, int flags);

ssize_t next_datagram_size_fd(int fd);

int flush_accept(int fd);

#define CMSG_FOREACH(cmsg, mh)                                          \
        for ((cmsg) = CMSG_FIRSTHDR(mh); (cmsg); (cmsg) = CMSG_NXTHDR((mh), (cmsg)))

/* Returns the cmsghdr's data pointer, but safely cast to the specified type. Does two alignment checks: one
 * at compile time, that the requested type has a smaller or same alignment as 'struct cmsghdr', and one
 * during runtime, that the actual pointer matches the alignment too. This is supposed to catch cases such as
 * 'struct timeval' is embedded into 'struct cmsghdr' on architectures where the alignment of the former is 8
 * bytes (because of a 64-bit time_t), but of the latter is 4 bytes (because size_t is 32 bits), such as
 * riscv32. */
#define CMSG_TYPED_DATA(cmsg, type)                                     \
        ({                                                              \
                struct cmsghdr *_cmsg = (cmsg);                         \
                assert_cc(alignof(type) <= alignof(struct cmsghdr));    \
                _cmsg ? CAST_ALIGN_PTR(type, CMSG_DATA(_cmsg)) : (type*) NULL; \
        })

struct cmsghdr* cmsg_find(struct msghdr *mh, int level, int type, socklen_t length);
void* cmsg_find_and_copy_data(struct msghdr *mh, int level, int type, void *buf, size_t buf_len);

/* Type-safe, dereferencing version of cmsg_find() */
#define CMSG_FIND_DATA(mh, level, type, ctype)                          \
        CMSG_TYPED_DATA(cmsg_find(mh, level, type, CMSG_LEN(sizeof(ctype))), ctype)

/* Type-safe version of cmsg_find_and_copy_data() */
#define CMSG_FIND_AND_COPY_DATA(mh, level, type, ctype)             \
        (ctype*) cmsg_find_and_copy_data(mh, level, type, &(ctype){}, sizeof(ctype))

/* Resolves to a type that can carry cmsghdr structures. Make sure things are properly aligned, i.e. the type
 * itself is placed properly in memory and the size is also aligned to what's appropriate for "cmsghdr"
 * structures. */
#define CMSG_BUFFER_TYPE(size)                                          \
        union {                                                         \
                struct cmsghdr cmsghdr;                                 \
                uint8_t buf[size];                                      \
                uint8_t align_check[(size) >= CMSG_SPACE(0) &&          \
                                    (size) == CMSG_ALIGN(size) ? 1 : -1]; \
        }

/*
 * Certain hardware address types (e.g Infiniband) do not fit into sll_addr
 * (8 bytes) and run over the structure. This macro returns the correct size that
 * must be passed to kernel.
 */
#define SOCKADDR_LL_LEN(sa)                                             \
        ({                                                              \
                const struct sockaddr_ll *_sa = &(sa);                  \
                size_t _mac_len = sizeof(_sa->sll_addr);                \
                assert(_sa->sll_family == AF_PACKET);                   \
                if (be16toh(_sa->sll_hatype) == ARPHRD_ETHER)           \
                        _mac_len = MAX(_mac_len, (size_t) ETH_ALEN);    \
                if (be16toh(_sa->sll_hatype) == ARPHRD_INFINIBAND)      \
                        _mac_len = MAX(_mac_len, (size_t) INFINIBAND_ALEN); \
                offsetof(struct sockaddr_ll, sll_addr) + _mac_len;      \
        })

/* Covers only file system and abstract AF_UNIX socket addresses, but not unnamed socket addresses. */
#define SOCKADDR_UN_LEN(sa)                                             \
        ({                                                              \
                const struct sockaddr_un *_sa = &(sa);                  \
                assert(_sa->sun_family == AF_UNIX);                     \
                offsetof(struct sockaddr_un, sun_path) +                \
                        (_sa->sun_path[0] == 0 ?                        \
                         1 + strnlen(_sa->sun_path+1, sizeof(_sa->sun_path)-1) : \
                         strnlen(_sa->sun_path, sizeof(_sa->sun_path))+1); \
        })

#define SOCKADDR_LEN(saddr)                                             \
        ({                                                              \
                const union sockaddr_union *__sa = &(saddr);            \
                size_t _len;                                            \
                switch (__sa->sa.sa_family) {                           \
                case AF_INET:                                           \
                        _len = sizeof(struct sockaddr_in);              \
                        break;                                          \
                case AF_INET6:                                          \
                        _len = sizeof(struct sockaddr_in6);             \
                        break;                                          \
                case AF_UNIX:                                           \
                        _len = SOCKADDR_UN_LEN(__sa->un);               \
                        break;                                          \
                case AF_PACKET:                                         \
                        _len = SOCKADDR_LL_LEN(__sa->ll);               \
                        break;                                          \
                case AF_NETLINK:                                        \
                        _len = sizeof(struct sockaddr_nl);              \
                        break;                                          \
                case AF_VSOCK:                                          \
                        _len = sizeof(struct sockaddr_vm);              \
                        break;                                          \
                default:                                                \
                        assert_not_reached();                           \
                }                                                       \
                _len;                                                   \
        })

int socket_ioctl_fd(void);

int sockaddr_un_set_path(struct sockaddr_un *ret, const char *path);

static inline int setsockopt_int(int fd, int level, int optname, int value) {
        if (setsockopt(fd, level, optname, &value, sizeof(value)) < 0)
                return -errno;

        return 0;
}

static inline int getsockopt_int(int fd, int level, int optname, int *ret) {
        int v;
        socklen_t sl = sizeof(v);

        if (getsockopt(fd, level, optname, &v, &sl) < 0)
                return negative_errno();
        if (sl != sizeof(v))
                return -EIO;

        *ret = v;
        return 0;
}

int socket_bind_to_ifname(int fd, const char *ifname);
int socket_bind_to_ifindex(int fd, int ifindex);

/* Define a 64-bit version of timeval/timespec in any case, even on 32-bit userspace. */
struct timeval_large {
        uint64_t tvl_sec, tvl_usec;
};
struct timespec_large {
        uint64_t tvl_sec, tvl_nsec;
};

/* glibc duplicates timespec/timeval on certain 32-bit arches, once in 32-bit and once in 64-bit.
 * See __convert_scm_timestamps() in glibc source code. Hence, we need additional buffer space for them
 * to prevent from recvmsg_safe() returning -EXFULL. */
#define CMSG_SPACE_TIMEVAL                                              \
        ((sizeof(struct timeval) == sizeof(struct timeval_large)) ?     \
         CMSG_SPACE(sizeof(struct timeval)) :                           \
         CMSG_SPACE(sizeof(struct timeval)) +                           \
         CMSG_SPACE(sizeof(struct timeval_large)))
#define CMSG_SPACE_TIMESPEC                                             \
        ((sizeof(struct timespec) == sizeof(struct timespec_large)) ?   \
         CMSG_SPACE(sizeof(struct timespec)) :                          \
         CMSG_SPACE(sizeof(struct timespec)) +                          \
         CMSG_SPACE(sizeof(struct timespec_large)))

ssize_t recvmsg_safe(int sockfd, struct msghdr *msg, int flags);

int socket_get_family(int fd);
int socket_set_recvpktinfo(int fd, int af, bool b);
int socket_set_unicast_if(int fd, int af, int ifi);

int socket_set_option(int fd, int af, int opt_ipv4, int opt_ipv6, int val);
static inline int socket_set_recverr(int fd, int af, bool b) {
        return socket_set_option(fd, af, IP_RECVERR, IPV6_RECVERR, b);
}
static inline int socket_set_recvttl(int fd, int af, bool b) {
        return socket_set_option(fd, af, IP_RECVTTL, IPV6_RECVHOPLIMIT, b);
}
static inline int socket_set_ttl(int fd, int af, int ttl) {
        return socket_set_option(fd, af, IP_TTL, IPV6_UNICAST_HOPS, ttl);
}
static inline int socket_set_freebind(int fd, int af, bool b) {
        return socket_set_option(fd, af, IP_FREEBIND, IPV6_FREEBIND, b);
}
static inline int socket_set_transparent(int fd, int af, bool b) {
        return socket_set_option(fd, af, IP_TRANSPARENT, IPV6_TRANSPARENT, b);
}
static inline int socket_set_recvfragsize(int fd, int af, bool b) {
        return socket_set_option(fd, af, IP_RECVFRAGSIZE, IPV6_RECVFRAGSIZE, b);
}

int socket_get_mtu(int fd, int af, size_t *ret);

/* an initializer for struct ucred that initialized all fields to the invalid value appropriate for each */
#define UCRED_INVALID { .pid = 0, .uid = UID_INVALID, .gid = GID_INVALID }

int connect_unix_path(int fd, int dir_fd, const char *path);

/* Parses AF_UNIX and AF_VSOCK addresses. AF_INET[6] require some netlink calls, so it cannot be in
 * src/basic/ and is done from 'socket_local_address from src/shared/. Return -EPROTO in case of
 * protocol mismatch. */
int socket_address_parse_unix(SocketAddress *ret_address, const char *s);
int socket_address_parse_vsock(SocketAddress *ret_address, const char *s);

/* libc's SOMAXCONN is defined to 128 or 4096 (at least on glibc). But actually, the value can be much
 * larger. In our codebase we want to set it to the max usually, since noawadays socket memory is properly
 * tracked by memcg, and hence we don't need to enforce extra limits here. Moreover, the kernel caps it to
 * /proc/sys/net/core/somaxconn anyway, thus by setting this to unbounded we just make that sysctl file
 * authoritative. */
#define SOMAXCONN_DELUXE INT_MAX