1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
|
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
* Copyright (C) 2011 New Dream Network
* Copyright (C) 2018 Red Hat, Inc.
*
* This is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License version 2.1, as published by the Free Software
* Foundation. See file COPYING.
*
*/
#include <errno.h>
#include <fcntl.h>
#include <stdint.h>
#include <string.h>
#include <sys/mount.h>
#include <sys/param.h>
#include <sys/socket.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>
#if defined(__linux__)
#include <sys/vfs.h>
#endif
#include "include/compat.h"
#include "include/sock_compat.h"
#include "common/safe_io.h"
// The type-value for a ZFS FS in fstatfs.
#define FS_ZFS_TYPE 0xde
// On FreeBSD, ZFS fallocate always fails since it is considered impossible to
// reserve space on a COW filesystem. posix_fallocate() returns EINVAL
// Linux in this case already emulates the reservation in glibc
// In which case it is allocated manually, and still that is not a real guarantee
// that a full buffer is allocated on disk, since it could be compressed.
// To prevent this the written buffer needs to be loaded with random data.
int manual_fallocate(int fd, off_t offset, off_t len) {
int r = lseek(fd, offset, SEEK_SET);
if (r == -1)
return errno;
char data[1024*128];
// TODO: compressing filesystems would require random data
// FIPS zeroization audit 20191115: this memset is not security related.
memset(data, 0x42, sizeof(data));
for (off_t off = 0; off < len; off += sizeof(data)) {
if (off + static_cast<off_t>(sizeof(data)) > len)
r = safe_write(fd, data, len - off);
else
r = safe_write(fd, data, sizeof(data));
if (r == -1) {
return errno;
}
}
return 0;
}
int on_zfs(int basedir_fd) {
struct statfs basefs;
(void)fstatfs(basedir_fd, &basefs);
return (basefs.f_type == FS_ZFS_TYPE);
}
int ceph_posix_fallocate(int fd, off_t offset, off_t len) {
// Return 0 if oke, otherwise errno > 0
#ifdef HAVE_POSIX_FALLOCATE
if (on_zfs(fd)) {
return manual_fallocate(fd, offset, len);
} else {
return posix_fallocate(fd, offset, len);
}
#elif defined(__APPLE__)
fstore_t store;
store.fst_flags = F_ALLOCATECONTIG;
store.fst_posmode = F_PEOFPOSMODE;
store.fst_offset = offset;
store.fst_length = len;
int ret = fcntl(fd, F_PREALLOCATE, &store);
if (ret == -1) {
ret = errno;
}
return ret;
#else
return manual_fallocate(fd, offset, len);
#endif
}
int pipe_cloexec(int pipefd[2])
{
#if defined(HAVE_PIPE2)
return pipe2(pipefd, O_CLOEXEC);
#else
if (pipe(pipefd) == -1)
return -1;
/*
* The old-fashioned, race-condition prone way that we have to fall
* back on if pipe2 does not exist.
*/
if (fcntl(pipefd[0], F_SETFD, FD_CLOEXEC) < 0) {
goto fail;
}
if (fcntl(pipefd[1], F_SETFD, FD_CLOEXEC) < 0) {
goto fail;
}
return 0;
fail:
int save_errno = errno;
VOID_TEMP_FAILURE_RETRY(close(pipefd[0]));
VOID_TEMP_FAILURE_RETRY(close(pipefd[1]));
return (errno = save_errno, -1);
#endif
}
int socket_cloexec(int domain, int type, int protocol)
{
#ifdef SOCK_CLOEXEC
return socket(domain, type|SOCK_CLOEXEC, protocol);
#else
int fd = socket(domain, type, protocol);
if (fd == -1)
return -1;
if (fcntl(fd, F_SETFD, FD_CLOEXEC) < 0)
goto fail;
return fd;
fail:
int save_errno = errno;
VOID_TEMP_FAILURE_RETRY(close(fd));
return (errno = save_errno, -1);
#endif
}
int socketpair_cloexec(int domain, int type, int protocol, int sv[2])
{
#ifdef SOCK_CLOEXEC
return socketpair(domain, type|SOCK_CLOEXEC, protocol, sv);
#else
int rc = socketpair(domain, type, protocol, sv);
if (rc == -1)
return -1;
if (fcntl(sv[0], F_SETFD, FD_CLOEXEC) < 0)
goto fail;
if (fcntl(sv[1], F_SETFD, FD_CLOEXEC) < 0)
goto fail;
return 0;
fail:
int save_errno = errno;
VOID_TEMP_FAILURE_RETRY(close(sv[0]));
VOID_TEMP_FAILURE_RETRY(close(sv[1]));
return (errno = save_errno, -1);
#endif
}
int accept_cloexec(int sockfd, struct sockaddr* addr, socklen_t* addrlen)
{
#ifdef HAVE_ACCEPT4
return accept4(sockfd, addr, addrlen, SOCK_CLOEXEC);
#else
int fd = accept(sockfd, addr, addrlen);
if (fd == -1)
return -1;
if (fcntl(fd, F_SETFD, FD_CLOEXEC) < 0)
goto fail;
return fd;
fail:
int save_errno = errno;
VOID_TEMP_FAILURE_RETRY(close(fd));
return (errno = save_errno, -1);
#endif
}
#if defined(__FreeBSD__)
int sched_setaffinity(pid_t pid, size_t cpusetsize,
cpu_set_t *mask)
{
return 0;
}
#endif
|