1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
|
#include <limits.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "basefs_allocator.h"
#include "block_range.h"
#include "hashmap.h"
#include "base_fs.h"
struct base_fs_allocator {
struct ext2fs_hashmap *entries;
struct basefs_entry *cur_entry;
/* The next expected logical block to allocate for cur_entry. */
blk64_t next_lblk;
/* Blocks which are definitely owned by a single inode in BaseFS. */
ext2fs_block_bitmap exclusive_block_map;
/* Blocks which are available to the first inode that requests it. */
ext2fs_block_bitmap dedup_block_map;
};
static errcode_t basefs_block_allocator(ext2_filsys, blk64_t, blk64_t *,
struct blk_alloc_ctx *ctx);
/*
* Free any reserved, but unconsumed block ranges in the allocator. This both
* frees the block_range_list data structure and unreserves exclusive blocks
* from the block map.
*/
static void fs_free_blocks_range(ext2_filsys fs,
struct base_fs_allocator *allocator,
struct block_range_list *list)
{
ext2fs_block_bitmap exclusive_map = allocator->exclusive_block_map;
blk64_t block;
while (list->head) {
block = consume_next_block(list);
if (ext2fs_test_block_bitmap2(exclusive_map, block)) {
ext2fs_unmark_block_bitmap2(fs->block_map, block);
ext2fs_unmark_block_bitmap2(exclusive_map, block);
}
}
}
/*
* Free any blocks in the bitmap that were reserved but never used. This is
* needed to free dedup_block_map and ensure the free block bitmap is
* internally consistent.
*/
static void fs_free_blocks_bitmap(ext2_filsys fs, ext2fs_block_bitmap bitmap)
{
blk64_t block = 0;
blk64_t start = fs->super->s_first_data_block;
blk64_t end = ext2fs_blocks_count(fs->super) - 1;
errcode_t retval;
for (;;) {
retval = ext2fs_find_first_set_block_bitmap2(bitmap, start, end,
&block);
if (retval)
break;
ext2fs_unmark_block_bitmap2(fs->block_map, block);
start = block + 1;
}
}
static void basefs_allocator_free(ext2_filsys fs,
struct base_fs_allocator *allocator)
{
struct basefs_entry *e;
struct ext2fs_hashmap_entry *it = NULL;
struct ext2fs_hashmap *entries = allocator->entries;
if (entries) {
while ((e = ext2fs_hashmap_iter_in_order(entries, &it))) {
fs_free_blocks_range(fs, allocator, &e->blocks);
delete_block_ranges(&e->blocks);
}
ext2fs_hashmap_free(entries);
}
fs_free_blocks_bitmap(fs, allocator->dedup_block_map);
ext2fs_free_block_bitmap(allocator->exclusive_block_map);
ext2fs_free_block_bitmap(allocator->dedup_block_map);
free(allocator);
}
/*
* Build a bitmap of which blocks are definitely owned by exactly one file in
* Base FS. Blocks which are not valid or are de-duplicated are skipped. This
* is called during allocator initialization, to ensure that libext2fs does
* not allocate which we want to re-use.
*
* If a block was allocated in the initial filesystem, it can never be re-used,
* so it will appear in neither the exclusive or dedup set. If a block is used
* by multiple files, it will be removed from the owned set and instead added
* to the dedup set.
*
* The dedup set is not removed from fs->block_map. This allows us to re-use
* dedup blocks separately and not have them be allocated outside of file data.
*
* This function returns non-zero if the block was owned, and 0 otherwise.
*/
static int fs_reserve_block(ext2_filsys fs,
struct base_fs_allocator *allocator,
blk64_t block)
{
ext2fs_block_bitmap exclusive_map = allocator->exclusive_block_map;
ext2fs_block_bitmap dedup_map = allocator->dedup_block_map;
if (block >= ext2fs_blocks_count(fs->super))
return 0;
if (ext2fs_test_block_bitmap2(fs->block_map, block)) {
if (!ext2fs_test_block_bitmap2(exclusive_map, block))
return 0;
ext2fs_unmark_block_bitmap2(exclusive_map, block);
ext2fs_mark_block_bitmap2(dedup_map, block);
return 0;
} else {
ext2fs_mark_block_bitmap2(fs->block_map, block);
ext2fs_mark_block_bitmap2(exclusive_map, block);
return 1;
}
}
/*
* Walk the requested block list and reserve blocks, either into the owned
* pool or the dedup pool as appropriate. We stop once the file has enough
* owned blocks to satisfy |file_size|. This allows any extra blocks to be
* re-used, since otherwise a large block movement between files could
* trigger block allocation errors.
*/
static void fs_reserve_blocks_range(ext2_filsys fs,
struct base_fs_allocator *allocator,
struct block_range_list *list,
off_t file_size)
{
blk64_t block;
off_t blocks_needed;
off_t blocks_acquired = 0;
struct block_range *blocks = list->head;
blocks_needed = file_size + (fs->blocksize - 1);
blocks_needed /= fs->blocksize;
while (blocks) {
for (block = blocks->start; block <= blocks->end; block++) {
if (fs_reserve_block(fs, allocator, block))
blocks_acquired++;
if (blocks_acquired >= blocks_needed)
return;
}
blocks = blocks->next;
}
}
/*
* For each file in the base FS map, ensure that its blocks are reserved in
* the actual block map. This prevents libext2fs from allocating them for
* general purpose use, and ensures that if the file needs data blocks, they
* can be re-acquired exclusively for that file.
*
* If a file in the base map is missing, or not a regular file in the new
* filesystem, then it's skipped to ensure that its blocks are reusable.
*/
static errcode_t fs_reserve_blocks(ext2_filsys fs,
struct base_fs_allocator *allocator,
const char *src_dir)
{
int nbytes;
char full_path[PATH_MAX];
const char *sep = "/";
struct stat st;
struct basefs_entry *e;
struct ext2fs_hashmap_entry *it = NULL;
struct ext2fs_hashmap *entries = allocator->entries;
if (strlen(src_dir) && src_dir[strlen(src_dir) - 1] == '/')
sep = "";
while ((e = ext2fs_hashmap_iter_in_order(entries, &it))) {
nbytes = snprintf(full_path, sizeof(full_path), "%s%s%s",
src_dir, sep, e->path);
if (nbytes >= sizeof(full_path))
return ENAMETOOLONG;
if (lstat(full_path, &st) || !S_ISREG(st.st_mode))
continue;
fs_reserve_blocks_range(fs, allocator, &e->blocks, st.st_size);
}
return 0;
}
errcode_t base_fs_alloc_load(ext2_filsys fs, const char *file,
const char *mountpoint, const char *src_dir)
{
errcode_t retval = 0;
struct base_fs_allocator *allocator;
allocator = calloc(1, sizeof(*allocator));
if (!allocator) {
retval = ENOMEM;
goto out;
}
retval = ext2fs_read_bitmaps(fs);
if (retval)
goto err_load;
allocator->cur_entry = NULL;
allocator->entries = basefs_parse(file, mountpoint);
if (!allocator->entries) {
retval = EIO;
goto err_load;
}
retval = ext2fs_allocate_block_bitmap(fs, "exclusive map",
&allocator->exclusive_block_map);
if (retval)
goto err_load;
retval = ext2fs_allocate_block_bitmap(fs, "dedup map",
&allocator->dedup_block_map);
if (retval)
goto err_load;
retval = fs_reserve_blocks(fs, allocator, src_dir);
if (retval)
goto err_load;
/* Override the default allocator */
fs->get_alloc_block2 = basefs_block_allocator;
fs->priv_data = allocator;
goto out;
err_load:
basefs_allocator_free(fs, allocator);
out:
return retval;
}
/* Try and acquire the next usable block from the Base FS map. */
static errcode_t get_next_block(ext2_filsys fs, struct base_fs_allocator *allocator,
struct block_range_list* list, blk64_t *ret)
{
blk64_t block;
ext2fs_block_bitmap exclusive_map = allocator->exclusive_block_map;
ext2fs_block_bitmap dedup_map = allocator->dedup_block_map;
if (!list->head)
return EXT2_ET_BLOCK_ALLOC_FAIL;
block = consume_next_block(list);
if (block >= ext2fs_blocks_count(fs->super))
return EXT2_ET_BLOCK_ALLOC_FAIL;
if (ext2fs_test_block_bitmap2(exclusive_map, block)) {
ext2fs_unmark_block_bitmap2(exclusive_map, block);
*ret = block;
return 0;
}
if (ext2fs_test_block_bitmap2(dedup_map, block)) {
ext2fs_unmark_block_bitmap2(dedup_map, block);
*ret = block;
return 0;
}
return EXT2_ET_BLOCK_ALLOC_FAIL;
}
/*
* BaseFS lists blocks in logical block order. However, the allocator hook is
* only called if a block needs to be allocated. In the case of a deduplicated
* block, or a hole, the hook is not invoked. This means the next block
* allocation request will be out of sequence. For example, consider if BaseFS
* specifies the following (0 being a hole):
* 1 2 3 0 4 5
*
* If the new file has a hole at logical block 0, we could accidentally
* shift the entire expected block list as follows:
* 0 1 2 0 3 4
*
* To account for this, we track the next expected logical block in the
* allocator. If the current request is for a later logical block, we skip and
* free the intermediate physical blocks that would have been allocated. This
* ensures the original block assignment is respected.
*/
static void skip_blocks(ext2_filsys fs, struct base_fs_allocator *allocator,
struct blk_alloc_ctx *ctx)
{
blk64_t block;
struct block_range_list *list = &allocator->cur_entry->blocks;
ext2fs_block_bitmap exclusive_map = allocator->exclusive_block_map;
while (list->head && allocator->next_lblk < ctx->lblk) {
block = consume_next_block(list);
if (block >= ext2fs_blocks_count(fs->super))
continue;
if (ext2fs_test_block_bitmap2(exclusive_map, block)) {
ext2fs_unmark_block_bitmap2(exclusive_map, block);
ext2fs_unmark_block_bitmap2(fs->block_map, block);
}
allocator->next_lblk++;
}
}
static errcode_t basefs_block_allocator(ext2_filsys fs, blk64_t goal,
blk64_t *ret, struct blk_alloc_ctx *ctx)
{
errcode_t retval;
struct base_fs_allocator *allocator = fs->priv_data;
struct basefs_entry *e = allocator->cur_entry;
ext2fs_block_bitmap dedup_map = allocator->dedup_block_map;
if (e && ctx && (ctx->flags & BLOCK_ALLOC_DATA)) {
if (allocator->next_lblk < ctx->lblk)
skip_blocks(fs, allocator, ctx);
allocator->next_lblk = ctx->lblk + 1;
if (!get_next_block(fs, allocator, &e->blocks, ret))
return 0;
}
retval = ext2fs_new_block2(fs, goal, fs->block_map, ret);
if (!retval) {
ext2fs_mark_block_bitmap2(fs->block_map, *ret);
return 0;
}
if (retval != EXT2_ET_BLOCK_ALLOC_FAIL)
return retval;
/* Try to steal a block from the dedup pool. */
retval = ext2fs_find_first_set_block_bitmap2(dedup_map,
fs->super->s_first_data_block,
ext2fs_blocks_count(fs->super) - 1, ret);
if (!retval) {
ext2fs_unmark_block_bitmap2(dedup_map, *ret);
return 0;
}
/*
* As a last resort, take any block from our file's list. This
* risks bloating the diff, but means we are more likely to
* successfully build an image.
*/
while (e->blocks.head) {
if (!get_next_block(fs, allocator, &e->blocks, ret))
return 0;
}
return EXT2_ET_BLOCK_ALLOC_FAIL;
}
void base_fs_alloc_cleanup(ext2_filsys fs)
{
basefs_allocator_free(fs, fs->priv_data);
fs->priv_data = NULL;
fs->get_alloc_block2 = NULL;
}
errcode_t base_fs_alloc_set_target(ext2_filsys fs, const char *target_path,
const char *name EXT2FS_ATTR((unused)),
ext2_ino_t parent_ino EXT2FS_ATTR((unused)),
ext2_ino_t root EXT2FS_ATTR((unused)), mode_t mode)
{
struct base_fs_allocator *allocator = fs->priv_data;
if (mode != S_IFREG)
return 0;
if (allocator) {
allocator->cur_entry = ext2fs_hashmap_lookup(allocator->entries,
target_path,
strlen(target_path));
allocator->next_lblk = 0;
}
return 0;
}
errcode_t base_fs_alloc_unset_target(ext2_filsys fs,
const char *target_path EXT2FS_ATTR((unused)),
const char *name EXT2FS_ATTR((unused)),
ext2_ino_t parent_ino EXT2FS_ATTR((unused)),
ext2_ino_t root EXT2FS_ATTR((unused)), mode_t mode)
{
struct base_fs_allocator *allocator = fs->priv_data;
if (!allocator || !allocator->cur_entry || mode != S_IFREG)
return 0;
fs_free_blocks_range(fs, allocator, &allocator->cur_entry->blocks);
delete_block_ranges(&allocator->cur_entry->blocks);
return 0;
}
|