1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
|
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
* Copyright (C) 2014 Red Hat
*
* This is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License version 2.1, as published by the Free Software
* Foundation. See file COPYING.
*
*/
#include <errno.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#ifdef __linux__
#include <linux/falloc.h>
#endif
#include "FS.h"
#include "acconfig.h"
#ifdef HAVE_LIBXFS
#include "XFS.h"
#endif
#if defined(__APPLE__) || defined(__FreeBSD__)
#include <sys/mount.h>
#else
#include <sys/vfs.h>
#endif
#include "include/compat.h"
// ---------------
FS *FS::create(uint64_t f_type)
{
switch (f_type) {
#ifdef HAVE_LIBXFS
case XFS_SUPER_MAGIC:
return new XFS;
#endif
default:
return new FS;
}
}
FS *FS::create_by_fd(int fd)
{
struct statfs st;
::fstatfs(fd, &st);
return create(st.f_type);
}
// ---------------
int FS::set_alloc_hint(int fd, uint64_t hint)
{
return 0; // no-op
}
#ifdef HAVE_NAME_TO_HANDLE_AT
int FS::get_handle(int fd, std::string *h)
{
char buf[sizeof(struct file_handle) + MAX_HANDLE_SZ];
struct file_handle *fh = (struct file_handle *)buf;
int mount_id;
fh->handle_bytes = MAX_HANDLE_SZ;
int r = name_to_handle_at(fd, "", fh, &mount_id, AT_EMPTY_PATH);
if (r < 0) {
return -errno;
}
*h = std::string(buf, fh->handle_bytes + sizeof(struct file_handle));
return 0;
}
int FS::open_handle(int mount_fd, const std::string& h, int flags)
{
if (h.length() < sizeof(struct file_handle)) {
return -EINVAL;
}
struct file_handle *fh = (struct file_handle *)h.data();
if (fh->handle_bytes > h.length()) {
return -ERANGE;
}
int fd = open_by_handle_at(mount_fd, fh, flags);
if (fd < 0)
return -errno;
return fd;
}
#else // HAVE_NAME_TO_HANDLE_AT
int FS::get_handle(int fd, std::string *h)
{
return -EOPNOTSUPP;
}
int FS::open_handle(int mount_fd, const std::string& h, int flags)
{
return -EOPNOTSUPP;
}
#endif // HAVE_NAME_TO_HANDLE_AT
int FS::copy_file_range(int to_fd, uint64_t to_offset,
int from_fd,
uint64_t from_offset, uint64_t from_len)
{
ceph_abort_msg("write me");
}
int FS::zero(int fd, uint64_t offset, uint64_t length)
{
int r;
/*
From the fallocate(2) man page:
Specifying the FALLOC_FL_PUNCH_HOLE flag (available since Linux 2.6.38)
in mode deallocates space (i.e., creates a hole) in the byte range
starting at offset and continuing for len bytes. Within the specified
range, partial filesystem blocks are zeroed, and whole filesystem
blocks are removed from the file. After a successful call, subsequent
reads from this range will return zeroes.
The FALLOC_FL_PUNCH_HOLE flag must be ORed with FALLOC_FL_KEEP_SIZE in
mode; in other words, even when punching off the end of the file, the
file size (as reported by stat(2)) does not change.
Not all filesystems support FALLOC_FL_PUNCH_HOLE; if a filesystem
doesn't support the operation, an error is returned. The operation is
supported on at least the following filesystems:
* XFS (since Linux 2.6.38)
* ext4 (since Linux 3.0)
* Btrfs (since Linux 3.7)
* tmpfs (since Linux 3.5)
So: we only do this is PUNCH_HOLE *and* KEEP_SIZE are defined.
*/
#if !defined(__APPLE__) && !defined(__FreeBSD__)
# ifdef CEPH_HAVE_FALLOCATE
# ifdef FALLOC_FL_KEEP_SIZE
// first try fallocate
r = fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, offset, length);
if (r < 0) {
r = -errno;
}
if (r != -EOPNOTSUPP) {
goto out; // a real error
}
// if that failed (-EOPNOTSUPP), fall back to writing zeros.
# endif
# endif
#endif
{
// fall back to writing zeros
bufferlist bl;
bl.append_zero(length);
r = ::lseek64(fd, offset, SEEK_SET);
if (r < 0) {
r = -errno;
goto out;
}
r = bl.write_fd(fd);
}
out:
return r;
}
// ---------------
|