summaryrefslogtreecommitdiffstats
path: root/fs/xfs/scrub/readdir.c
blob: 01c9a2dc0f2c48357b9a53f4cdb8442d6f284ff9 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 * Copyright (C) 2022-2023 Oracle.  All Rights Reserved.
 * Author: Darrick J. Wong <djwong@kernel.org>
 */
#include "xfs.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
#include "xfs_inode.h"
#include "xfs_dir2.h"
#include "xfs_dir2_priv.h"
#include "xfs_trace.h"
#include "xfs_bmap.h"
#include "xfs_trans.h"
#include "xfs_error.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
#include "scrub/readdir.h"

/* Call a function for every entry in a shortform directory. */
STATIC int
xchk_dir_walk_sf(
	struct xfs_scrub	*sc,
	struct xfs_inode	*dp,
	xchk_dirent_fn		dirent_fn,
	void			*priv)
{
	struct xfs_name		name = {
		.name		= ".",
		.len		= 1,
		.type		= XFS_DIR3_FT_DIR,
	};
	struct xfs_mount	*mp = dp->i_mount;
	struct xfs_da_geometry	*geo = mp->m_dir_geo;
	struct xfs_dir2_sf_entry *sfep;
	struct xfs_dir2_sf_hdr	*sfp = dp->i_df.if_data;
	xfs_ino_t		ino;
	xfs_dir2_dataptr_t	dapos;
	unsigned int		i;
	int			error;

	ASSERT(dp->i_df.if_bytes == dp->i_disk_size);
	ASSERT(sfp != NULL);

	/* dot entry */
	dapos = xfs_dir2_db_off_to_dataptr(geo, geo->datablk,
			geo->data_entry_offset);

	error = dirent_fn(sc, dp, dapos, &name, dp->i_ino, priv);
	if (error)
		return error;

	/* dotdot entry */
	dapos = xfs_dir2_db_off_to_dataptr(geo, geo->datablk,
			geo->data_entry_offset +
			xfs_dir2_data_entsize(mp, sizeof(".") - 1));
	ino = xfs_dir2_sf_get_parent_ino(sfp);
	name.name = "..";
	name.len = 2;

	error = dirent_fn(sc, dp, dapos, &name, ino, priv);
	if (error)
		return error;

	/* iterate everything else */
	sfep = xfs_dir2_sf_firstentry(sfp);
	for (i = 0; i < sfp->count; i++) {
		dapos = xfs_dir2_db_off_to_dataptr(geo, geo->datablk,
				xfs_dir2_sf_get_offset(sfep));
		ino = xfs_dir2_sf_get_ino(mp, sfp, sfep);
		name.name = sfep->name;
		name.len = sfep->namelen;
		name.type = xfs_dir2_sf_get_ftype(mp, sfep);

		error = dirent_fn(sc, dp, dapos, &name, ino, priv);
		if (error)
			return error;

		sfep = xfs_dir2_sf_nextentry(mp, sfp, sfep);
	}

	return 0;
}

/* Call a function for every entry in a block directory. */
STATIC int
xchk_dir_walk_block(
	struct xfs_scrub	*sc,
	struct xfs_inode	*dp,
	xchk_dirent_fn		dirent_fn,
	void			*priv)
{
	struct xfs_mount	*mp = dp->i_mount;
	struct xfs_da_geometry	*geo = mp->m_dir_geo;
	struct xfs_buf		*bp;
	unsigned int		off, next_off, end;
	int			error;

	error = xfs_dir3_block_read(sc->tp, dp, dp->i_ino, &bp);
	if (error)
		return error;

	/* Walk each directory entry. */
	end = xfs_dir3_data_end_offset(geo, bp->b_addr);
	for (off = geo->data_entry_offset; off < end; off = next_off) {
		struct xfs_name			name = { };
		struct xfs_dir2_data_unused	*dup = bp->b_addr + off;
		struct xfs_dir2_data_entry	*dep = bp->b_addr + off;
		xfs_ino_t			ino;
		xfs_dir2_dataptr_t		dapos;

		/* Skip an empty entry. */
		if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
			next_off = off + be16_to_cpu(dup->length);
			continue;
		}

		/* Otherwise, find the next entry and report it. */
		next_off = off + xfs_dir2_data_entsize(mp, dep->namelen);
		if (next_off > end)
			break;

		dapos = xfs_dir2_db_off_to_dataptr(geo, geo->datablk, off);
		ino = be64_to_cpu(dep->inumber);
		name.name = dep->name;
		name.len = dep->namelen;
		name.type = xfs_dir2_data_get_ftype(mp, dep);

		error = dirent_fn(sc, dp, dapos, &name, ino, priv);
		if (error)
			break;
	}

	xfs_trans_brelse(sc->tp, bp);
	return error;
}

/* Read a leaf-format directory buffer. */
STATIC int
xchk_read_leaf_dir_buf(
	struct xfs_trans	*tp,
	struct xfs_inode	*dp,
	struct xfs_da_geometry	*geo,
	xfs_dir2_off_t		*curoff,
	struct xfs_buf		**bpp)
{
	struct xfs_iext_cursor	icur;
	struct xfs_bmbt_irec	map;
	struct xfs_ifork	*ifp = xfs_ifork_ptr(dp, XFS_DATA_FORK);
	xfs_dablk_t		last_da;
	xfs_dablk_t		map_off;
	xfs_dir2_off_t		new_off;

	*bpp = NULL;

	/*
	 * Look for mapped directory blocks at or above the current offset.
	 * Truncate down to the nearest directory block to start the scanning
	 * operation.
	 */
	last_da = xfs_dir2_byte_to_da(geo, XFS_DIR2_LEAF_OFFSET);
	map_off = xfs_dir2_db_to_da(geo, xfs_dir2_byte_to_db(geo, *curoff));

	if (!xfs_iext_lookup_extent(dp, ifp, map_off, &icur, &map))
		return 0;
	if (map.br_startoff >= last_da)
		return 0;
	xfs_trim_extent(&map, map_off, last_da - map_off);

	/* Read the directory block of that first mapping. */
	new_off = xfs_dir2_da_to_byte(geo, map.br_startoff);
	if (new_off > *curoff)
		*curoff = new_off;

	return xfs_dir3_data_read(tp, dp, dp->i_ino, map.br_startoff, 0, bpp);
}

/* Call a function for every entry in a leaf directory. */
STATIC int
xchk_dir_walk_leaf(
	struct xfs_scrub	*sc,
	struct xfs_inode	*dp,
	xchk_dirent_fn		dirent_fn,
	void			*priv)
{
	struct xfs_mount	*mp = dp->i_mount;
	struct xfs_da_geometry	*geo = mp->m_dir_geo;
	struct xfs_buf		*bp = NULL;
	xfs_dir2_off_t		curoff = 0;
	unsigned int		offset = 0;
	int			error;

	/* Iterate every directory offset in this directory. */
	while (curoff < XFS_DIR2_LEAF_OFFSET) {
		struct xfs_name			name = { };
		struct xfs_dir2_data_unused	*dup;
		struct xfs_dir2_data_entry	*dep;
		xfs_ino_t			ino;
		unsigned int			length;
		xfs_dir2_dataptr_t		dapos;

		/*
		 * If we have no buffer, or we're off the end of the
		 * current buffer, need to get another one.
		 */
		if (!bp || offset >= geo->blksize) {
			if (bp) {
				xfs_trans_brelse(sc->tp, bp);
				bp = NULL;
			}

			error = xchk_read_leaf_dir_buf(sc->tp, dp, geo, &curoff,
					&bp);
			if (error || !bp)
				break;

			/*
			 * Find our position in the block.
			 */
			offset = geo->data_entry_offset;
			curoff += geo->data_entry_offset;
		}

		/* Skip an empty entry. */
		dup = bp->b_addr + offset;
		if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
			length = be16_to_cpu(dup->length);
			offset += length;
			curoff += length;
			continue;
		}

		/* Otherwise, find the next entry and report it. */
		dep = bp->b_addr + offset;
		length = xfs_dir2_data_entsize(mp, dep->namelen);

		dapos = xfs_dir2_byte_to_dataptr(curoff) & 0x7fffffff;
		ino = be64_to_cpu(dep->inumber);
		name.name = dep->name;
		name.len = dep->namelen;
		name.type = xfs_dir2_data_get_ftype(mp, dep);

		error = dirent_fn(sc, dp, dapos, &name, ino, priv);
		if (error)
			break;

		/* Advance to the next entry. */
		offset += length;
		curoff += length;
	}

	if (bp)
		xfs_trans_brelse(sc->tp, bp);
	return error;
}

/*
 * Call a function for every entry in a directory.
 *
 * Callers must hold the ILOCK.  File types are XFS_DIR3_FT_*.
 */
int
xchk_dir_walk(
	struct xfs_scrub	*sc,
	struct xfs_inode	*dp,
	xchk_dirent_fn		dirent_fn,
	void			*priv)
{
	struct xfs_da_args	args = {
		.dp		= dp,
		.geo		= dp->i_mount->m_dir_geo,
		.trans		= sc->tp,
		.owner		= dp->i_ino,
	};
	int			error;

	if (xfs_is_shutdown(dp->i_mount))
		return -EIO;

	ASSERT(S_ISDIR(VFS_I(dp)->i_mode));
	xfs_assert_ilocked(dp, XFS_ILOCK_SHARED | XFS_ILOCK_EXCL);

	switch (xfs_dir2_format(&args, &error)) {
	case XFS_DIR2_FMT_SF:
		return xchk_dir_walk_sf(sc, dp, dirent_fn, priv);
	case XFS_DIR2_FMT_BLOCK:
		return xchk_dir_walk_block(sc, dp, dirent_fn, priv);
	case XFS_DIR2_FMT_LEAF:
	case XFS_DIR2_FMT_NODE:
		return xchk_dir_walk_leaf(sc, dp, dirent_fn, priv);
	default:
		return error;
	}
}

/*
 * Look up the inode number for an exact name in a directory.
 *
 * Callers must hold the ILOCK.  File types are XFS_DIR3_FT_*.  Names are not
 * checked for correctness.
 */
int
xchk_dir_lookup(
	struct xfs_scrub	*sc,
	struct xfs_inode	*dp,
	const struct xfs_name	*name,
	xfs_ino_t		*ino)
{
	struct xfs_da_args	args = {
		.dp		= dp,
		.geo		= dp->i_mount->m_dir_geo,
		.trans		= sc->tp,
		.name		= name->name,
		.namelen	= name->len,
		.filetype	= name->type,
		.hashval	= xfs_dir2_hashname(dp->i_mount, name),
		.whichfork	= XFS_DATA_FORK,
		.op_flags	= XFS_DA_OP_OKNOENT,
		.owner		= dp->i_ino,
	};
	int			error;

	if (xfs_is_shutdown(dp->i_mount))
		return -EIO;

	/*
	 * A temporary directory's block headers are written with the owner
	 * set to sc->ip, so we must switch the owner here for the lookup.
	 */
	if (dp == sc->tempip)
		args.owner = sc->ip->i_ino;

	ASSERT(S_ISDIR(VFS_I(dp)->i_mode));
	xfs_assert_ilocked(dp, XFS_ILOCK_SHARED | XFS_ILOCK_EXCL);

	error = xfs_dir_lookup_args(&args);
	if (!error)
		*ino = args.inumber;
	return error;
}

/*
 * Try to grab the IOLOCK and ILOCK of sc->ip and ip, returning @ip's lock
 * state.  The caller may have a transaction, so we must use trylock for both
 * IOLOCKs.
 */
static inline unsigned int
xchk_dir_trylock_both(
	struct xfs_scrub	*sc,
	struct xfs_inode	*ip)
{
	if (!xchk_ilock_nowait(sc, XFS_IOLOCK_EXCL))
		return 0;

	if (!xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED))
		goto parent_iolock;

	xchk_ilock(sc, XFS_ILOCK_EXCL);
	if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL))
		goto parent_ilock;

	return XFS_IOLOCK_SHARED | XFS_ILOCK_EXCL;

parent_ilock:
	xchk_iunlock(sc, XFS_ILOCK_EXCL);
	xfs_iunlock(ip, XFS_IOLOCK_SHARED);
parent_iolock:
	xchk_iunlock(sc, XFS_IOLOCK_EXCL);
	return 0;
}

/*
 * Try for a limited time to grab the IOLOCK and ILOCK of both the scrub target
 * (@sc->ip) and the inode at the other end (@ip) of a directory or parent
 * pointer link so that we can check that link.
 *
 * We do not know ahead of time that the directory tree is /not/ corrupt, so we
 * cannot use the "lock two inode" functions because we do not know that there
 * is not a racing thread trying to take the locks in opposite order.  First
 * take IOLOCK_EXCL of the scrub target, and then try to take IOLOCK_SHARED
 * of @ip to synchronize with the VFS.  Next, take ILOCK_EXCL of the scrub
 * target and @ip to synchronize with XFS.
 *
 * If the trylocks succeed, *lockmode will be set to the locks held for @ip;
 * @sc->ilock_flags will be set for the locks held for @sc->ip; and zero will
 * be returned.  If not, returns -EDEADLOCK to try again; or -ETIMEDOUT if
 * XCHK_TRY_HARDER was set.  Returns -EINTR if the process has been killed.
 */
int
xchk_dir_trylock_for_pptrs(
	struct xfs_scrub	*sc,
	struct xfs_inode	*ip,
	unsigned int		*lockmode)
{
	unsigned int		nr;
	int			error = 0;

	ASSERT(sc->ilock_flags == 0);

	for (nr = 0; nr < HZ; nr++) {
		*lockmode = xchk_dir_trylock_both(sc, ip);
		if (*lockmode)
			return 0;

		if (xchk_should_terminate(sc, &error))
			return error;

		delay(1);
	}

	if (sc->flags & XCHK_TRY_HARDER) {
		xchk_set_incomplete(sc);
		return -ETIMEDOUT;
	}

	return -EDEADLOCK;
}