diff options
Diffstat (limited to 'fs/bcachefs/alloc_background.c')
-rw-r--r-- | fs/bcachefs/alloc_background.c | 666 |
1 files changed, 371 insertions, 295 deletions
diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index 534ba2b02b..658f11aebd 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -3,6 +3,7 @@ #include "alloc_background.h" #include "alloc_foreground.h" #include "backpointers.h" +#include "bkey_buf.h" #include "btree_cache.h" #include "btree_io.h" #include "btree_key_cache.h" @@ -29,7 +30,7 @@ #include <linux/sched/task.h> #include <linux/sort.h> -static void bch2_discard_one_bucket_fast(struct bch_fs *c, struct bpos bucket); +static void bch2_discard_one_bucket_fast(struct bch_dev *, u64); /* Persistent alloc info: */ @@ -195,7 +196,7 @@ static unsigned bch_alloc_v1_val_u64s(const struct bch_alloc *a) } int bch2_alloc_v1_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bkey_invalid_flags flags, + enum bch_validate_flags flags, struct printbuf *err) { struct bkey_s_c_alloc a = bkey_s_c_to_alloc(k); @@ -211,7 +212,7 @@ fsck_err: } int bch2_alloc_v2_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bkey_invalid_flags flags, + enum bch_validate_flags flags, struct printbuf *err) { struct bkey_alloc_unpacked u; @@ -225,7 +226,7 @@ fsck_err: } int bch2_alloc_v3_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bkey_invalid_flags flags, + enum bch_validate_flags flags, struct printbuf *err) { struct bkey_alloc_unpacked u; @@ -239,7 +240,7 @@ fsck_err: } int bch2_alloc_v4_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bkey_invalid_flags flags, struct printbuf *err) + enum bch_validate_flags flags, struct printbuf *err) { struct bkey_s_c_alloc_v4 a = bkey_s_c_to_alloc_v4(k); int ret = 0; @@ -259,11 +260,19 @@ int bch2_alloc_v4_invalid(struct bch_fs *c, struct bkey_s_c k, "invalid data type (got %u should be %u)", a.v->data_type, alloc_data_type(*a.v, a.v->data_type)); + for (unsigned i = 0; i < 2; i++) + bkey_fsck_err_on(a.v->io_time[i] > LRU_TIME_MAX, + c, err, + alloc_key_io_time_bad, + "invalid io_time[%s]: %llu, max %llu", + i == READ ? "read" : "write", + a.v->io_time[i], LRU_TIME_MAX); + switch (a.v->data_type) { case BCH_DATA_free: case BCH_DATA_need_gc_gens: case BCH_DATA_need_discard: - bkey_fsck_err_on(bch2_bucket_sectors(*a.v) || a.v->stripe, + bkey_fsck_err_on(bch2_bucket_sectors_total(*a.v) || a.v->stripe, c, err, alloc_key_empty_but_have_data, "empty data type free but have data"); break; @@ -330,27 +339,17 @@ void bch2_alloc_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c prt_printf(out, "gen %u oldest_gen %u data_type ", a->gen, a->oldest_gen); bch2_prt_data_type(out, a->data_type); prt_newline(out); - prt_printf(out, "journal_seq %llu", a->journal_seq); - prt_newline(out); - prt_printf(out, "need_discard %llu", BCH_ALLOC_V4_NEED_DISCARD(a)); - prt_newline(out); - prt_printf(out, "need_inc_gen %llu", BCH_ALLOC_V4_NEED_INC_GEN(a)); - prt_newline(out); - prt_printf(out, "dirty_sectors %u", a->dirty_sectors); - prt_newline(out); - prt_printf(out, "cached_sectors %u", a->cached_sectors); - prt_newline(out); - prt_printf(out, "stripe %u", a->stripe); - prt_newline(out); - prt_printf(out, "stripe_redundancy %u", a->stripe_redundancy); - prt_newline(out); - prt_printf(out, "io_time[READ] %llu", a->io_time[READ]); - prt_newline(out); - prt_printf(out, "io_time[WRITE] %llu", a->io_time[WRITE]); - prt_newline(out); - prt_printf(out, "fragmentation %llu", a->fragmentation_lru); - prt_newline(out); - prt_printf(out, "bp_start %llu", BCH_ALLOC_V4_BACKPOINTERS_START(a)); + prt_printf(out, "journal_seq %llu\n", a->journal_seq); + prt_printf(out, "need_discard %llu\n", BCH_ALLOC_V4_NEED_DISCARD(a)); + prt_printf(out, "need_inc_gen %llu\n", BCH_ALLOC_V4_NEED_INC_GEN(a)); + prt_printf(out, "dirty_sectors %u\n", a->dirty_sectors); + prt_printf(out, "cached_sectors %u\n", a->cached_sectors); + prt_printf(out, "stripe %u\n", a->stripe); + prt_printf(out, "stripe_redundancy %u\n", a->stripe_redundancy); + prt_printf(out, "io_time[READ] %llu\n", a->io_time[READ]); + prt_printf(out, "io_time[WRITE] %llu\n", a->io_time[WRITE]); + prt_printf(out, "fragmentation %llu\n", a->fragmentation_lru); + prt_printf(out, "bp_start %llu\n", BCH_ALLOC_V4_BACKPOINTERS_START(a)); printbuf_indent_sub(out, 2); } @@ -439,22 +438,18 @@ struct bkey_i_alloc_v4 *bch2_alloc_to_v4_mut(struct btree_trans *trans, struct b } struct bkey_i_alloc_v4 * -bch2_trans_start_alloc_update(struct btree_trans *trans, struct btree_iter *iter, - struct bpos pos) +bch2_trans_start_alloc_update_noupdate(struct btree_trans *trans, struct btree_iter *iter, + struct bpos pos) { - struct bkey_s_c k; - struct bkey_i_alloc_v4 *a; - int ret; - - k = bch2_bkey_get_iter(trans, iter, BTREE_ID_alloc, pos, - BTREE_ITER_WITH_UPDATES| - BTREE_ITER_CACHED| - BTREE_ITER_INTENT); - ret = bkey_err(k); + struct bkey_s_c k = bch2_bkey_get_iter(trans, iter, BTREE_ID_alloc, pos, + BTREE_ITER_with_updates| + BTREE_ITER_cached| + BTREE_ITER_intent); + int ret = bkey_err(k); if (unlikely(ret)) return ERR_PTR(ret); - a = bch2_alloc_to_v4_mut_inlined(trans, k); + struct bkey_i_alloc_v4 *a = bch2_alloc_to_v4_mut_inlined(trans, k); ret = PTR_ERR_OR_ZERO(a); if (unlikely(ret)) goto err; @@ -464,6 +459,20 @@ err: return ERR_PTR(ret); } +__flatten +struct bkey_i_alloc_v4 *bch2_trans_start_alloc_update(struct btree_trans *trans, struct bpos pos) +{ + struct btree_iter iter; + struct bkey_i_alloc_v4 *a = bch2_trans_start_alloc_update_noupdate(trans, &iter, pos); + int ret = PTR_ERR_OR_ZERO(a); + if (ret) + return ERR_PTR(ret); + + ret = bch2_trans_update(trans, &iter, &a->k_i, 0); + bch2_trans_iter_exit(trans, &iter); + return unlikely(ret) ? ERR_PTR(ret) : a; +} + static struct bpos alloc_gens_pos(struct bpos pos, unsigned *offset) { *offset = pos.offset & KEY_TYPE_BUCKET_GENS_MASK; @@ -487,7 +496,7 @@ static unsigned alloc_gen(struct bkey_s_c k, unsigned offset) } int bch2_bucket_gens_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bkey_invalid_flags flags, + enum bch_validate_flags flags, struct printbuf *err) { int ret = 0; @@ -520,7 +529,7 @@ int bch2_bucket_gens_init(struct bch_fs *c) int ret; ret = for_each_btree_key(trans, iter, BTREE_ID_alloc, POS_MIN, - BTREE_ITER_PREFETCH, k, ({ + BTREE_ITER_prefetch, k, ({ /* * Not a fsck error because this is checked/repaired by * bch2_check_alloc_key() which runs later: @@ -567,29 +576,31 @@ iter_err: int bch2_alloc_read(struct bch_fs *c) { struct btree_trans *trans = bch2_trans_get(c); + struct bch_dev *ca = NULL; int ret; down_read(&c->gc_lock); if (c->sb.version_upgrade_complete >= bcachefs_metadata_version_bucket_gens) { ret = for_each_btree_key(trans, iter, BTREE_ID_bucket_gens, POS_MIN, - BTREE_ITER_PREFETCH, k, ({ + BTREE_ITER_prefetch, k, ({ u64 start = bucket_gens_pos_to_alloc(k.k->p, 0).offset; u64 end = bucket_gens_pos_to_alloc(bpos_nosnap_successor(k.k->p), 0).offset; if (k.k->type != KEY_TYPE_bucket_gens) continue; - const struct bch_bucket_gens *g = bkey_s_c_to_bucket_gens(k).v; - + ca = bch2_dev_iterate(c, ca, k.k->p.inode); /* * Not a fsck error because this is checked/repaired by * bch2_check_alloc_key() which runs later: */ - if (!bch2_dev_exists2(c, k.k->p.inode)) + if (!ca) { + bch2_btree_iter_set_pos(&iter, POS(k.k->p.inode + 1, 0)); continue; + } - struct bch_dev *ca = bch_dev_bkey_exists(c, k.k->p.inode); + const struct bch_bucket_gens *g = bkey_s_c_to_bucket_gens(k).v; for (u64 b = max_t(u64, ca->mi.first_bucket, start); b < min_t(u64, ca->mi.nbuckets, end); @@ -599,15 +610,16 @@ int bch2_alloc_read(struct bch_fs *c) })); } else { ret = for_each_btree_key(trans, iter, BTREE_ID_alloc, POS_MIN, - BTREE_ITER_PREFETCH, k, ({ + BTREE_ITER_prefetch, k, ({ + ca = bch2_dev_iterate(c, ca, k.k->p.inode); /* * Not a fsck error because this is checked/repaired by * bch2_check_alloc_key() which runs later: */ - if (!bch2_dev_bucket_exists(c, k.k->p)) + if (!ca) { + bch2_btree_iter_set_pos(&iter, POS(k.k->p.inode + 1, 0)); continue; - - struct bch_dev *ca = bch_dev_bkey_exists(c, k.k->p.inode); + } struct bch_alloc_v4 a; *bucket_gen(ca, k.k->p.offset) = bch2_alloc_to_v4(k, &a)->gen; @@ -615,6 +627,7 @@ int bch2_alloc_read(struct bch_fs *c) })); } + bch2_dev_put(ca); bch2_trans_put(trans); up_read(&c->gc_lock); @@ -625,12 +638,12 @@ int bch2_alloc_read(struct bch_fs *c) /* Free space/discard btree: */ static int bch2_bucket_do_index(struct btree_trans *trans, + struct bch_dev *ca, struct bkey_s_c alloc_k, const struct bch_alloc_v4 *a, bool set) { struct bch_fs *c = trans->c; - struct bch_dev *ca = bch_dev_bkey_exists(c, alloc_k.k->p.inode); struct btree_iter iter; struct bkey_s_c old; struct bkey_i *k; @@ -667,7 +680,7 @@ static int bch2_bucket_do_index(struct btree_trans *trans, old = bch2_bkey_get_iter(trans, &iter, btree, bkey_start_pos(&k->k), - BTREE_ITER_INTENT); + BTREE_ITER_intent); ret = bkey_err(old); if (ret) return ret; @@ -711,8 +724,8 @@ static noinline int bch2_bucket_gen_update(struct btree_trans *trans, return ret; k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_bucket_gens, pos, - BTREE_ITER_INTENT| - BTREE_ITER_WITH_UPDATES); + BTREE_ITER_intent| + BTREE_ITER_with_updates); ret = bkey_err(k); if (ret) return ret; @@ -734,28 +747,27 @@ static noinline int bch2_bucket_gen_update(struct btree_trans *trans, int bch2_trigger_alloc(struct btree_trans *trans, enum btree_id btree, unsigned level, struct bkey_s_c old, struct bkey_s new, - unsigned flags) + enum btree_iter_update_trigger_flags flags) { struct bch_fs *c = trans->c; + struct printbuf buf = PRINTBUF; int ret = 0; - if (bch2_trans_inconsistent_on(!bch2_dev_bucket_exists(c, new.k->p), trans, - "alloc key for invalid device or bucket")) + struct bch_dev *ca = bch2_dev_bucket_tryget(c, new.k->p); + if (!ca) return -EIO; - struct bch_dev *ca = bch_dev_bkey_exists(c, new.k->p.inode); - struct bch_alloc_v4 old_a_convert; const struct bch_alloc_v4 *old_a = bch2_alloc_to_v4(old, &old_a_convert); - if (flags & BTREE_TRIGGER_TRANSACTIONAL) { + if (flags & BTREE_TRIGGER_transactional) { struct bch_alloc_v4 *new_a = bkey_s_to_alloc_v4(new).v; - new_a->data_type = alloc_data_type(*new_a, new_a->data_type); + alloc_data_type_set(new_a, new_a->data_type); - if (bch2_bucket_sectors(*new_a) > bch2_bucket_sectors(*old_a)) { - new_a->io_time[READ] = max_t(u64, 1, atomic64_read(&c->io_clock[READ].now)); - new_a->io_time[WRITE]= max_t(u64, 1, atomic64_read(&c->io_clock[WRITE].now)); + if (bch2_bucket_sectors_total(*new_a) > bch2_bucket_sectors_total(*old_a)) { + new_a->io_time[READ] = bch2_current_io_time(c, READ); + new_a->io_time[WRITE]= bch2_current_io_time(c, WRITE); SET_BCH_ALLOC_V4_NEED_INC_GEN(new_a, true); SET_BCH_ALLOC_V4_NEED_DISCARD(new_a, true); } @@ -765,20 +777,21 @@ int bch2_trigger_alloc(struct btree_trans *trans, !bch2_bucket_is_open_safe(c, new.k->p.inode, new.k->p.offset)) { new_a->gen++; SET_BCH_ALLOC_V4_NEED_INC_GEN(new_a, false); + alloc_data_type_set(new_a, new_a->data_type); } if (old_a->data_type != new_a->data_type || (new_a->data_type == BCH_DATA_free && alloc_freespace_genbits(*old_a) != alloc_freespace_genbits(*new_a))) { - ret = bch2_bucket_do_index(trans, old, old_a, false) ?: - bch2_bucket_do_index(trans, new.s_c, new_a, true); + ret = bch2_bucket_do_index(trans, ca, old, old_a, false) ?: + bch2_bucket_do_index(trans, ca, new.s_c, new_a, true); if (ret) - return ret; + goto err; } if (new_a->data_type == BCH_DATA_cached && !new_a->io_time[READ]) - new_a->io_time[READ] = max_t(u64, 1, atomic64_read(&c->io_clock[READ].now)); + new_a->io_time[READ] = bch2_current_io_time(c, READ); u64 old_lru = alloc_lru_idx_read(*old_a); u64 new_lru = alloc_lru_idx_read(*new_a); @@ -787,24 +800,23 @@ int bch2_trigger_alloc(struct btree_trans *trans, bucket_to_u64(new.k->p), old_lru, new_lru); if (ret) - return ret; + goto err; } - new_a->fragmentation_lru = alloc_lru_idx_fragmentation(*new_a, - bch_dev_bkey_exists(c, new.k->p.inode)); + new_a->fragmentation_lru = alloc_lru_idx_fragmentation(*new_a, ca); if (old_a->fragmentation_lru != new_a->fragmentation_lru) { ret = bch2_lru_change(trans, BCH_LRU_FRAGMENTATION_START, bucket_to_u64(new.k->p), old_a->fragmentation_lru, new_a->fragmentation_lru); if (ret) - return ret; + goto err; } if (old_a->gen != new_a->gen) { ret = bch2_bucket_gen_update(trans, new.k->p, new_a->gen); if (ret) - return ret; + goto err; } /* @@ -812,21 +824,21 @@ int bch2_trigger_alloc(struct btree_trans *trans, * not: */ - if ((flags & BTREE_TRIGGER_BUCKET_INVALIDATE) && + if ((flags & BTREE_TRIGGER_bucket_invalidate) && old_a->cached_sectors) { ret = bch2_update_cached_sectors_list(trans, new.k->p.inode, -((s64) old_a->cached_sectors)); if (ret) - return ret; + goto err; } } - if ((flags & BTREE_TRIGGER_ATOMIC) && (flags & BTREE_TRIGGER_INSERT)) { + if ((flags & BTREE_TRIGGER_atomic) && (flags & BTREE_TRIGGER_insert)) { struct bch_alloc_v4 *new_a = bkey_s_to_alloc_v4(new).v; u64 journal_seq = trans->journal_res.seq; u64 bucket_journal_seq = new_a->journal_seq; - if ((flags & BTREE_TRIGGER_INSERT) && + if ((flags & BTREE_TRIGGER_insert) && data_type_is_empty(old_a->data_type) != data_type_is_empty(new_a->data_type) && new.k->type == KEY_TYPE_alloc_v4) { @@ -854,13 +866,19 @@ int bch2_trigger_alloc(struct btree_trans *trans, if (ret) { bch2_fs_fatal_error(c, "setting bucket_needs_journal_commit: %s", bch2_err_str(ret)); - return ret; + goto err; } } percpu_down_read(&c->mark_lock); - if (new_a->gen != old_a->gen) - *bucket_gen(ca, new.k->p.offset) = new_a->gen; + if (new_a->gen != old_a->gen) { + u8 *gen = bucket_gen(ca, new.k->p.offset); + if (unlikely(!gen)) { + percpu_up_read(&c->mark_lock); + goto invalid_bucket; + } + *gen = new_a->gen; + } bch2_dev_usage_update(c, ca, old_a, new_a, journal_seq, false); percpu_up_read(&c->mark_lock); @@ -874,26 +892,31 @@ int bch2_trigger_alloc(struct btree_trans *trans, closure_wake_up(&c->freelist_wait); if (statechange(a->data_type == BCH_DATA_need_discard) && - !bch2_bucket_is_open(c, new.k->p.inode, new.k->p.offset) && + !bch2_bucket_is_open_safe(c, new.k->p.inode, new.k->p.offset) && bucket_flushed(new_a)) - bch2_discard_one_bucket_fast(c, new.k->p); + bch2_discard_one_bucket_fast(ca, new.k->p.offset); if (statechange(a->data_type == BCH_DATA_cached) && !bch2_bucket_is_open(c, new.k->p.inode, new.k->p.offset) && should_invalidate_buckets(ca, bch2_dev_usage_read(ca))) - bch2_do_invalidates(c); + bch2_dev_do_invalidates(ca); if (statechange(a->data_type == BCH_DATA_need_gc_gens)) - bch2_do_gc_gens(c); + bch2_gc_gens_async(c); } - if ((flags & BTREE_TRIGGER_GC) && - (flags & BTREE_TRIGGER_BUCKET_INVALIDATE)) { + if ((flags & BTREE_TRIGGER_gc) && + (flags & BTREE_TRIGGER_bucket_invalidate)) { struct bch_alloc_v4 new_a_convert; const struct bch_alloc_v4 *new_a = bch2_alloc_to_v4(new.s_c, &new_a_convert); percpu_down_read(&c->mark_lock); struct bucket *g = gc_bucket(ca, new.k->p.offset); + if (unlikely(!g)) { + percpu_up_read(&c->mark_lock); + goto invalid_bucket; + } + g->gen_valid = 1; bucket_lock(g); @@ -908,12 +931,19 @@ int bch2_trigger_alloc(struct btree_trans *trans, bucket_unlock(g); percpu_up_read(&c->mark_lock); } - - return 0; +err: + printbuf_exit(&buf); + bch2_dev_put(ca); + return ret; +invalid_bucket: + bch2_fs_inconsistent(c, "reference to invalid bucket\n %s", + (bch2_bkey_val_to_text(&buf, c, new.s_c), buf.buf)); + ret = -EIO; + goto err; } /* - * This synthesizes deleted extents for holes, similar to BTREE_ITER_SLOTS for + * This synthesizes deleted extents for holes, similar to BTREE_ITER_slots for * extents style btrees, but works on non-extents btrees: */ static struct bkey_s_c bch2_get_key_or_hole(struct btree_iter *iter, struct bpos end, struct bkey *hole) @@ -958,35 +988,34 @@ static struct bkey_s_c bch2_get_key_or_hole(struct btree_iter *iter, struct bpos } } -static bool next_bucket(struct bch_fs *c, struct bpos *bucket) +static bool next_bucket(struct bch_fs *c, struct bch_dev **ca, struct bpos *bucket) { - struct bch_dev *ca; - - if (bch2_dev_bucket_exists(c, *bucket)) - return true; - - if (bch2_dev_exists2(c, bucket->inode)) { - ca = bch_dev_bkey_exists(c, bucket->inode); + if (*ca) { + if (bucket->offset < (*ca)->mi.first_bucket) + bucket->offset = (*ca)->mi.first_bucket; - if (bucket->offset < ca->mi.first_bucket) { - bucket->offset = ca->mi.first_bucket; + if (bucket->offset < (*ca)->mi.nbuckets) return true; - } + bch2_dev_put(*ca); + *ca = NULL; bucket->inode++; bucket->offset = 0; } rcu_read_lock(); - ca = __bch2_next_dev_idx(c, bucket->inode, NULL); - if (ca) - *bucket = POS(ca->dev_idx, ca->mi.first_bucket); + *ca = __bch2_next_dev_idx(c, bucket->inode, NULL); + if (*ca) { + *bucket = POS((*ca)->dev_idx, (*ca)->mi.first_bucket); + bch2_dev_get(*ca); + } rcu_read_unlock(); - return ca != NULL; + return *ca != NULL; } -static struct bkey_s_c bch2_get_key_or_real_bucket_hole(struct btree_iter *iter, struct bkey *hole) +static struct bkey_s_c bch2_get_key_or_real_bucket_hole(struct btree_iter *iter, + struct bch_dev **ca, struct bkey *hole) { struct bch_fs *c = iter->trans->c; struct bkey_s_c k; @@ -995,22 +1024,21 @@ again: if (bkey_err(k)) return k; + *ca = bch2_dev_iterate_noerror(c, *ca, k.k->p.inode); + if (!k.k->type) { - struct bpos bucket = bkey_start_pos(k.k); + struct bpos hole_start = bkey_start_pos(k.k); - if (!bch2_dev_bucket_exists(c, bucket)) { - if (!next_bucket(c, &bucket)) + if (!*ca || !bucket_valid(*ca, hole_start.offset)) { + if (!next_bucket(c, ca, &hole_start)) return bkey_s_c_null; - bch2_btree_iter_set_pos(iter, bucket); + bch2_btree_iter_set_pos(iter, hole_start); goto again; } - if (!bch2_dev_bucket_exists(c, k.k->p)) { - struct bch_dev *ca = bch_dev_bkey_exists(c, bucket.inode); - - bch2_key_resize(hole, ca->mi.nbuckets - bucket.offset); - } + if (k.k->p.offset > (*ca)->mi.nbuckets) + bch2_key_resize(hole, (*ca)->mi.nbuckets - hole_start.offset); } return k; @@ -1025,24 +1053,25 @@ int bch2_check_alloc_key(struct btree_trans *trans, struct btree_iter *bucket_gens_iter) { struct bch_fs *c = trans->c; - struct bch_dev *ca; struct bch_alloc_v4 a_convert; const struct bch_alloc_v4 *a; unsigned discard_key_type, freespace_key_type; unsigned gens_offset; struct bkey_s_c k; struct printbuf buf = PRINTBUF; - int ret; + int ret = 0; - if (fsck_err_on(!bch2_dev_bucket_exists(c, alloc_k.k->p), c, - alloc_key_to_missing_dev_bucket, + struct bch_dev *ca = bch2_dev_bucket_tryget_noerror(c, alloc_k.k->p); + if (fsck_err_on(!ca, + c, alloc_key_to_missing_dev_bucket, "alloc key for invalid device:bucket %llu:%llu", alloc_k.k->p.inode, alloc_k.k->p.offset)) - return bch2_btree_delete_at(trans, alloc_iter, 0); + ret = bch2_btree_delete_at(trans, alloc_iter, 0); + if (!ca) + return ret; - ca = bch_dev_bkey_exists(c, alloc_k.k->p.inode); if (!ca->mi.freespace_initialized) - return 0; + goto out; a = bch2_alloc_to_v4(alloc_k, &a_convert); @@ -1141,25 +1170,26 @@ int bch2_check_alloc_key(struct btree_trans *trans, if (ret) goto err; } +out: err: fsck_err: + bch2_dev_put(ca); printbuf_exit(&buf); return ret; } static noinline_for_stack int bch2_check_alloc_hole_freespace(struct btree_trans *trans, + struct bch_dev *ca, struct bpos start, struct bpos *end, struct btree_iter *freespace_iter) { struct bch_fs *c = trans->c; - struct bch_dev *ca; struct bkey_s_c k; struct printbuf buf = PRINTBUF; int ret; - ca = bch_dev_bkey_exists(c, start.inode); if (!ca->mi.freespace_initialized) return 0; @@ -1313,7 +1343,7 @@ static noinline_for_stack int bch2_check_discard_freespace_key(struct btree_tran goto delete; out: fsck_err: - set_btree_iter_dontneed(&alloc_iter); + bch2_set_btree_iter_dontneed(&alloc_iter); bch2_trans_iter_exit(trans, &alloc_iter); printbuf_exit(&buf); return ret; @@ -1337,30 +1367,25 @@ int bch2_check_bucket_gens_key(struct btree_trans *trans, { struct bch_fs *c = trans->c; struct bkey_i_bucket_gens g; - struct bch_dev *ca; u64 start = bucket_gens_pos_to_alloc(k.k->p, 0).offset; u64 end = bucket_gens_pos_to_alloc(bpos_nosnap_successor(k.k->p), 0).offset; u64 b; - bool need_update = false, dev_exists; + bool need_update = false; struct printbuf buf = PRINTBUF; int ret = 0; BUG_ON(k.k->type != KEY_TYPE_bucket_gens); bkey_reassemble(&g.k_i, k); - /* if no bch_dev, skip out whether we repair or not */ - dev_exists = bch2_dev_exists2(c, k.k->p.inode); - if (!dev_exists) { - if (fsck_err_on(!dev_exists, c, - bucket_gens_to_invalid_dev, - "bucket_gens key for invalid device:\n %s", - (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { + struct bch_dev *ca = bch2_dev_tryget_noerror(c, k.k->p.inode); + if (!ca) { + if (fsck_err(c, bucket_gens_to_invalid_dev, + "bucket_gens key for invalid device:\n %s", + (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) ret = bch2_btree_delete_at(trans, iter, 0); - } goto out; } - ca = bch_dev_bkey_exists(c, k.k->p.inode); if (fsck_err_on(end <= ca->mi.first_bucket || start >= ca->mi.nbuckets, c, bucket_gens_to_invalid_buckets, @@ -1398,6 +1423,7 @@ int bch2_check_bucket_gens_key(struct btree_trans *trans, } out: fsck_err: + bch2_dev_put(ca); printbuf_exit(&buf); return ret; } @@ -1406,25 +1432,26 @@ int bch2_check_alloc_info(struct bch_fs *c) { struct btree_trans *trans = bch2_trans_get(c); struct btree_iter iter, discard_iter, freespace_iter, bucket_gens_iter; + struct bch_dev *ca = NULL; struct bkey hole; struct bkey_s_c k; int ret = 0; bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc, POS_MIN, - BTREE_ITER_PREFETCH); + BTREE_ITER_prefetch); bch2_trans_iter_init(trans, &discard_iter, BTREE_ID_need_discard, POS_MIN, - BTREE_ITER_PREFETCH); + BTREE_ITER_prefetch); bch2_trans_iter_init(trans, &freespace_iter, BTREE_ID_freespace, POS_MIN, - BTREE_ITER_PREFETCH); + BTREE_ITER_prefetch); bch2_trans_iter_init(trans, &bucket_gens_iter, BTREE_ID_bucket_gens, POS_MIN, - BTREE_ITER_PREFETCH); + BTREE_ITER_prefetch); while (1) { struct bpos next; bch2_trans_begin(trans); - k = bch2_get_key_or_real_bucket_hole(&iter, &hole); + k = bch2_get_key_or_real_bucket_hole(&iter, &ca, &hole); ret = bkey_err(k); if (ret) goto bkey_err; @@ -1445,7 +1472,7 @@ int bch2_check_alloc_info(struct bch_fs *c) } else { next = k.k->p; - ret = bch2_check_alloc_hole_freespace(trans, + ret = bch2_check_alloc_hole_freespace(trans, ca, bkey_start_pos(k.k), &next, &freespace_iter) ?: @@ -1473,19 +1500,21 @@ bkey_err: bch2_trans_iter_exit(trans, &freespace_iter); bch2_trans_iter_exit(trans, &discard_iter); bch2_trans_iter_exit(trans, &iter); + bch2_dev_put(ca); + ca = NULL; if (ret < 0) goto err; ret = for_each_btree_key(trans, iter, BTREE_ID_need_discard, POS_MIN, - BTREE_ITER_PREFETCH, k, + BTREE_ITER_prefetch, k, bch2_check_discard_freespace_key(trans, &iter)); if (ret) goto err; bch2_trans_iter_init(trans, &iter, BTREE_ID_freespace, POS_MIN, - BTREE_ITER_PREFETCH); + BTREE_ITER_prefetch); while (1) { bch2_trans_begin(trans); k = bch2_btree_iter_peek(&iter); @@ -1515,7 +1544,7 @@ bkey_err: ret = for_each_btree_key_commit(trans, iter, BTREE_ID_bucket_gens, POS_MIN, - BTREE_ITER_PREFETCH, k, + BTREE_ITER_prefetch, k, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, bch2_check_bucket_gens_key(trans, &iter, k)); err: @@ -1525,13 +1554,13 @@ err: } static int bch2_check_alloc_to_lru_ref(struct btree_trans *trans, - struct btree_iter *alloc_iter) + struct btree_iter *alloc_iter, + struct bkey_buf *last_flushed) { struct bch_fs *c = trans->c; - struct btree_iter lru_iter; struct bch_alloc_v4 a_convert; const struct bch_alloc_v4 *a; - struct bkey_s_c alloc_k, lru_k; + struct bkey_s_c alloc_k; struct printbuf buf = PRINTBUF; int ret; @@ -1545,6 +1574,14 @@ static int bch2_check_alloc_to_lru_ref(struct btree_trans *trans, a = bch2_alloc_to_v4(alloc_k, &a_convert); + if (a->fragmentation_lru) { + ret = bch2_lru_check_set(trans, BCH_LRU_FRAGMENTATION_START, + a->fragmentation_lru, + alloc_k, last_flushed); + if (ret) + return ret; + } + if (a->data_type != BCH_DATA_cached) return 0; @@ -1560,82 +1597,75 @@ static int bch2_check_alloc_to_lru_ref(struct btree_trans *trans, if (ret) goto err; - a_mut->v.io_time[READ] = atomic64_read(&c->io_clock[READ].now); + a_mut->v.io_time[READ] = bch2_current_io_time(c, READ); ret = bch2_trans_update(trans, alloc_iter, - &a_mut->k_i, BTREE_TRIGGER_NORUN); + &a_mut->k_i, BTREE_TRIGGER_norun); if (ret) goto err; a = &a_mut->v; } - lru_k = bch2_bkey_get_iter(trans, &lru_iter, BTREE_ID_lru, - lru_pos(alloc_k.k->p.inode, - bucket_to_u64(alloc_k.k->p), - a->io_time[READ]), 0); - ret = bkey_err(lru_k); + ret = bch2_lru_check_set(trans, alloc_k.k->p.inode, a->io_time[READ], + alloc_k, last_flushed); if (ret) - return ret; - - if (fsck_err_on(lru_k.k->type != KEY_TYPE_set, c, - alloc_key_to_missing_lru_entry, - "missing lru entry\n" - " %s", - (printbuf_reset(&buf), - bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf))) { - ret = bch2_lru_set(trans, - alloc_k.k->p.inode, - bucket_to_u64(alloc_k.k->p), - a->io_time[READ]); - if (ret) - goto err; - } + goto err; err: fsck_err: - bch2_trans_iter_exit(trans, &lru_iter); printbuf_exit(&buf); return ret; } int bch2_check_alloc_to_lru_refs(struct bch_fs *c) { + struct bkey_buf last_flushed; + + bch2_bkey_buf_init(&last_flushed); + bkey_init(&last_flushed.k->k); + int ret = bch2_trans_run(c, for_each_btree_key_commit(trans, iter, BTREE_ID_alloc, - POS_MIN, BTREE_ITER_PREFETCH, k, + POS_MIN, BTREE_ITER_prefetch, k, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, - bch2_check_alloc_to_lru_ref(trans, &iter))); + bch2_check_alloc_to_lru_ref(trans, &iter, &last_flushed))); + + bch2_bkey_buf_exit(&last_flushed, c); bch_err_fn(c, ret); return ret; } -static int discard_in_flight_add(struct bch_fs *c, struct bpos bucket) +static int discard_in_flight_add(struct bch_dev *ca, u64 bucket, bool in_progress) { int ret; - mutex_lock(&c->discard_buckets_in_flight_lock); - darray_for_each(c->discard_buckets_in_flight, i) - if (bkey_eq(*i, bucket)) { - ret = -EEXIST; + mutex_lock(&ca->discard_buckets_in_flight_lock); + darray_for_each(ca->discard_buckets_in_flight, i) + if (i->bucket == bucket) { + ret = -BCH_ERR_EEXIST_discard_in_flight_add; goto out; } - ret = darray_push(&c->discard_buckets_in_flight, bucket); + ret = darray_push(&ca->discard_buckets_in_flight, ((struct discard_in_flight) { + .in_progress = in_progress, + .bucket = bucket, + })); out: - mutex_unlock(&c->discard_buckets_in_flight_lock); + mutex_unlock(&ca->discard_buckets_in_flight_lock); return ret; } -static void discard_in_flight_remove(struct bch_fs *c, struct bpos bucket) +static void discard_in_flight_remove(struct bch_dev *ca, u64 bucket) { - mutex_lock(&c->discard_buckets_in_flight_lock); - darray_for_each(c->discard_buckets_in_flight, i) - if (bkey_eq(*i, bucket)) { - darray_remove_item(&c->discard_buckets_in_flight, i); + mutex_lock(&ca->discard_buckets_in_flight_lock); + darray_for_each(ca->discard_buckets_in_flight, i) + if (i->bucket == bucket) { + BUG_ON(!i->in_progress); + darray_remove_item(&ca->discard_buckets_in_flight, i); goto found; } BUG(); found: - mutex_unlock(&c->discard_buckets_in_flight_lock); + mutex_unlock(&ca->discard_buckets_in_flight_lock); } struct discard_buckets_state { @@ -1643,28 +1673,11 @@ struct discard_buckets_state { u64 open; u64 need_journal_commit; u64 discarded; - struct bch_dev *ca; u64 need_journal_commit_this_dev; }; -static void discard_buckets_next_dev(struct bch_fs *c, struct discard_buckets_state *s, struct bch_dev *ca) -{ - if (s->ca == ca) - return; - - if (s->ca && s->need_journal_commit_this_dev > - bch2_dev_usage_read(s->ca).d[BCH_DATA_free].buckets) - bch2_journal_flush_async(&c->journal, NULL); - - if (s->ca) - percpu_ref_put(&s->ca->ref); - if (ca) - percpu_ref_get(&ca->ref); - s->ca = ca; - s->need_journal_commit_this_dev = 0; -} - static int bch2_discard_one_bucket(struct btree_trans *trans, + struct bch_dev *ca, struct btree_iter *need_discard_iter, struct bpos *discard_pos_done, struct discard_buckets_state *s) @@ -1673,21 +1686,11 @@ static int bch2_discard_one_bucket(struct btree_trans *trans, struct bpos pos = need_discard_iter->pos; struct btree_iter iter = { NULL }; struct bkey_s_c k; - struct bch_dev *ca; struct bkey_i_alloc_v4 *a; struct printbuf buf = PRINTBUF; bool discard_locked = false; int ret = 0; - ca = bch_dev_bkey_exists(c, pos.inode); - - if (!percpu_ref_tryget(&ca->io_ref)) { - bch2_btree_iter_set_pos(need_discard_iter, POS(pos.inode + 1, 0)); - return 0; - } - - discard_buckets_next_dev(c, s, ca); - if (bch2_bucket_is_open_safe(c, pos.inode, pos.offset)) { s->open++; goto out; @@ -1703,7 +1706,7 @@ static int bch2_discard_one_bucket(struct btree_trans *trans, k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_alloc, need_discard_iter->pos, - BTREE_ITER_CACHED); + BTREE_ITER_cached); ret = bkey_err(k); if (ret) goto out; @@ -1713,7 +1716,7 @@ static int bch2_discard_one_bucket(struct btree_trans *trans, if (ret) goto out; - if (a->v.dirty_sectors) { + if (bch2_bucket_sectors_total(a->v)) { if (bch2_trans_inconsistent_on(c->curr_recovery_pass > BCH_RECOVERY_PASS_check_alloc_info, trans, "attempting to discard bucket with dirty data\n%s", (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) @@ -1747,7 +1750,7 @@ static int bch2_discard_one_bucket(struct btree_trans *trans, goto out; } - if (discard_in_flight_add(c, SPOS(iter.pos.inode, iter.pos.offset, true))) + if (discard_in_flight_add(ca, iter.pos.offset, true)) goto out; discard_locked = true; @@ -1771,8 +1774,9 @@ static int bch2_discard_one_bucket(struct btree_trans *trans, } SET_BCH_ALLOC_V4_NEED_DISCARD(&a->v, false); - a->v.data_type = alloc_data_type(a->v, a->v.data_type); write: + alloc_data_type_set(&a->v, a->v.data_type); + ret = bch2_trans_update(trans, &iter, &a->k_i, 0) ?: bch2_trans_commit(trans, NULL, NULL, BCH_WATERMARK_btree| @@ -1784,17 +1788,17 @@ write: s->discarded++; out: if (discard_locked) - discard_in_flight_remove(c, iter.pos); + discard_in_flight_remove(ca, iter.pos.offset); s->seen++; bch2_trans_iter_exit(trans, &iter); - percpu_ref_put(&ca->io_ref); printbuf_exit(&buf); return ret; } static void bch2_do_discards_work(struct work_struct *work) { - struct bch_fs *c = container_of(work, struct bch_fs, discard_work); + struct bch_dev *ca = container_of(work, struct bch_dev, discard_work); + struct bch_fs *c = ca->fs; struct discard_buckets_state s = {}; struct bpos discard_pos_done = POS_MAX; int ret; @@ -1805,29 +1809,47 @@ static void bch2_do_discards_work(struct work_struct *work) * successful commit: */ ret = bch2_trans_run(c, - for_each_btree_key(trans, iter, - BTREE_ID_need_discard, POS_MIN, 0, k, - bch2_discard_one_bucket(trans, &iter, &discard_pos_done, &s))); - - discard_buckets_next_dev(c, &s, NULL); + for_each_btree_key_upto(trans, iter, + BTREE_ID_need_discard, + POS(ca->dev_idx, 0), + POS(ca->dev_idx, U64_MAX), 0, k, + bch2_discard_one_bucket(trans, ca, &iter, &discard_pos_done, &s))); trace_discard_buckets(c, s.seen, s.open, s.need_journal_commit, s.discarded, bch2_err_str(ret)); bch2_write_ref_put(c, BCH_WRITE_REF_discard); + percpu_ref_put(&ca->io_ref); +} + +void bch2_dev_do_discards(struct bch_dev *ca) +{ + struct bch_fs *c = ca->fs; + + if (!bch2_dev_get_ioref(c, ca->dev_idx, WRITE)) + return; + + if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_discard)) + goto put_ioref; + + if (queue_work(c->write_ref_wq, &ca->discard_work)) + return; + + bch2_write_ref_put(c, BCH_WRITE_REF_discard); +put_ioref: + percpu_ref_put(&ca->io_ref); } void bch2_do_discards(struct bch_fs *c) { - if (bch2_write_ref_tryget(c, BCH_WRITE_REF_discard) && - !queue_work(c->write_ref_wq, &c->discard_work)) - bch2_write_ref_put(c, BCH_WRITE_REF_discard); + for_each_member_device(c, ca) + bch2_dev_do_discards(ca); } static int bch2_clear_bucket_needs_discard(struct btree_trans *trans, struct bpos bucket) { struct btree_iter iter; - bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc, bucket, BTREE_ITER_INTENT); + bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc, bucket, BTREE_ITER_intent); struct bkey_s_c k = bch2_btree_iter_peek_slot(&iter); int ret = bkey_err(k); if (ret) @@ -1840,7 +1862,7 @@ static int bch2_clear_bucket_needs_discard(struct btree_trans *trans, struct bpo BUG_ON(a->v.dirty_sectors); SET_BCH_ALLOC_V4_NEED_DISCARD(&a->v, false); - a->v.data_type = alloc_data_type(a->v, a->v.data_type); + alloc_data_type_set(&a->v, a->v.data_type); ret = bch2_trans_update(trans, &iter, &a->k_i, 0); err: @@ -1850,66 +1872,69 @@ err: static void bch2_do_discards_fast_work(struct work_struct *work) { - struct bch_fs *c = container_of(work, struct bch_fs, discard_fast_work); + struct bch_dev *ca = container_of(work, struct bch_dev, discard_fast_work); + struct bch_fs *c = ca->fs; while (1) { bool got_bucket = false; - struct bpos bucket; - struct bch_dev *ca; + u64 bucket; - mutex_lock(&c->discard_buckets_in_flight_lock); - darray_for_each(c->discard_buckets_in_flight, i) { - if (i->snapshot) + mutex_lock(&ca->discard_buckets_in_flight_lock); + darray_for_each(ca->discard_buckets_in_flight, i) { + if (i->in_progress) continue; - ca = bch_dev_bkey_exists(c, i->inode); - - if (!percpu_ref_tryget(&ca->io_ref)) { - darray_remove_item(&c->discard_buckets_in_flight, i); - continue; - } - got_bucket = true; - bucket = *i; - i->snapshot = true; + bucket = i->bucket; + i->in_progress = true; break; } - mutex_unlock(&c->discard_buckets_in_flight_lock); + mutex_unlock(&ca->discard_buckets_in_flight_lock); if (!got_bucket) break; if (ca->mi.discard && !c->opts.nochanges) blkdev_issue_discard(ca->disk_sb.bdev, - bucket.offset * ca->mi.bucket_size, + bucket_to_sector(ca, bucket), ca->mi.bucket_size, GFP_KERNEL); int ret = bch2_trans_do(c, NULL, NULL, - BCH_WATERMARK_btree| - BCH_TRANS_COMMIT_no_enospc, - bch2_clear_bucket_needs_discard(trans, bucket)); + BCH_WATERMARK_btree| + BCH_TRANS_COMMIT_no_enospc, + bch2_clear_bucket_needs_discard(trans, POS(ca->dev_idx, bucket))); bch_err_fn(c, ret); - percpu_ref_put(&ca->io_ref); - discard_in_flight_remove(c, bucket); + discard_in_flight_remove(ca, bucket); if (ret) break; } bch2_write_ref_put(c, BCH_WRITE_REF_discard_fast); + percpu_ref_put(&ca->io_ref); } -static void bch2_discard_one_bucket_fast(struct bch_fs *c, struct bpos bucket) +static void bch2_discard_one_bucket_fast(struct bch_dev *ca, u64 bucket) { - struct bch_dev *ca = bch_dev_bkey_exists(c, bucket.inode); + struct bch_fs *c = ca->fs; - if (!percpu_ref_is_dying(&ca->io_ref) && - !discard_in_flight_add(c, bucket) && - bch2_write_ref_tryget(c, BCH_WRITE_REF_discard_fast) && - !queue_work(c->write_ref_wq, &c->discard_fast_work)) - bch2_write_ref_put(c, BCH_WRITE_REF_discard_fast); + if (discard_in_flight_add(ca, bucket, false)) + return; + + if (!bch2_dev_get_ioref(c, ca->dev_idx, WRITE)) + return; + + if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_discard_fast)) + goto put_ioref; + + if (queue_work(c->write_ref_wq, &ca->discard_fast_work)) + return; + + bch2_write_ref_put(c, BCH_WRITE_REF_discard_fast); +put_ioref: + percpu_ref_put(&ca->io_ref); } static int invalidate_one_bucket(struct btree_trans *trans, @@ -1918,7 +1943,6 @@ static int invalidate_one_bucket(struct btree_trans *trans, s64 *nr_to_invalidate) { struct bch_fs *c = trans->c; - struct btree_iter alloc_iter = { NULL }; struct bkey_i_alloc_v4 *a = NULL; struct printbuf buf = PRINTBUF; struct bpos bucket = u64_to_bucket(lru_k.k->p.offset); @@ -1936,7 +1960,7 @@ static int invalidate_one_bucket(struct btree_trans *trans, if (bch2_bucket_is_open_safe(c, bucket.inode, bucket.offset)) return 0; - a = bch2_trans_start_alloc_update(trans, &alloc_iter, bucket); + a = bch2_trans_start_alloc_update(trans, bucket); ret = PTR_ERR_OR_ZERO(a); if (ret) goto out; @@ -1958,21 +1982,18 @@ static int invalidate_one_bucket(struct btree_trans *trans, a->v.data_type = 0; a->v.dirty_sectors = 0; a->v.cached_sectors = 0; - a->v.io_time[READ] = atomic64_read(&c->io_clock[READ].now); - a->v.io_time[WRITE] = atomic64_read(&c->io_clock[WRITE].now); + a->v.io_time[READ] = bch2_current_io_time(c, READ); + a->v.io_time[WRITE] = bch2_current_io_time(c, WRITE); - ret = bch2_trans_update(trans, &alloc_iter, &a->k_i, - BTREE_TRIGGER_BUCKET_INVALIDATE) ?: - bch2_trans_commit(trans, NULL, NULL, - BCH_WATERMARK_btree| - BCH_TRANS_COMMIT_no_enospc); + ret = bch2_trans_commit(trans, NULL, NULL, + BCH_WATERMARK_btree| + BCH_TRANS_COMMIT_no_enospc); if (ret) goto out; trace_and_count(c, bucket_invalidate, c, bucket.inode, bucket.offset, cached_sectors); --*nr_to_invalidate; out: - bch2_trans_iter_exit(trans, &alloc_iter); printbuf_exit(&buf); return ret; err: @@ -1997,9 +2018,25 @@ err: goto out; } +static struct bkey_s_c next_lru_key(struct btree_trans *trans, struct btree_iter *iter, + struct bch_dev *ca, bool *wrapped) +{ + struct bkey_s_c k; +again: + k = bch2_btree_iter_peek_upto(iter, lru_pos(ca->dev_idx, U64_MAX, LRU_TIME_MAX)); + if (!k.k && !*wrapped) { + bch2_btree_iter_set_pos(iter, lru_pos(ca->dev_idx, 0, 0)); + *wrapped = true; + goto again; + } + + return k; +} + static void bch2_do_invalidates_work(struct work_struct *work) { - struct bch_fs *c = container_of(work, struct bch_fs, invalidate_work); + struct bch_dev *ca = container_of(work, struct bch_dev, invalidate_work); + struct bch_fs *c = ca->fs; struct btree_trans *trans = bch2_trans_get(c); int ret = 0; @@ -2007,31 +2044,63 @@ static void bch2_do_invalidates_work(struct work_struct *work) if (ret) goto err; - for_each_member_device(c, ca) { - s64 nr_to_invalidate = - should_invalidate_buckets(ca, bch2_dev_usage_read(ca)); + s64 nr_to_invalidate = + should_invalidate_buckets(ca, bch2_dev_usage_read(ca)); + struct btree_iter iter; + bool wrapped = false; - ret = for_each_btree_key_upto(trans, iter, BTREE_ID_lru, - lru_pos(ca->dev_idx, 0, 0), - lru_pos(ca->dev_idx, U64_MAX, LRU_TIME_MAX), - BTREE_ITER_INTENT, k, - invalidate_one_bucket(trans, &iter, k, &nr_to_invalidate)); + bch2_trans_iter_init(trans, &iter, BTREE_ID_lru, + lru_pos(ca->dev_idx, 0, + ((bch2_current_io_time(c, READ) + U32_MAX) & + LRU_TIME_MAX)), 0); - if (ret < 0) { - percpu_ref_put(&ca->ref); + while (true) { + bch2_trans_begin(trans); + + struct bkey_s_c k = next_lru_key(trans, &iter, ca, &wrapped); + ret = bkey_err(k); + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) + continue; + if (ret) break; - } + if (!k.k) + break; + + ret = invalidate_one_bucket(trans, &iter, k, &nr_to_invalidate); + if (ret) + break; + + bch2_btree_iter_advance(&iter); } + bch2_trans_iter_exit(trans, &iter); err: bch2_trans_put(trans); bch2_write_ref_put(c, BCH_WRITE_REF_invalidate); + percpu_ref_put(&ca->io_ref); +} + +void bch2_dev_do_invalidates(struct bch_dev *ca) +{ + struct bch_fs *c = ca->fs; + + if (!bch2_dev_get_ioref(c, ca->dev_idx, WRITE)) + return; + + if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_invalidate)) + goto put_ioref; + + if (queue_work(c->write_ref_wq, &ca->invalidate_work)) + return; + + bch2_write_ref_put(c, BCH_WRITE_REF_invalidate); +put_ioref: + percpu_ref_put(&ca->io_ref); } void bch2_do_invalidates(struct bch_fs *c) { - if (bch2_write_ref_tryget(c, BCH_WRITE_REF_invalidate) && - !queue_work(c->write_ref_wq, &c->invalidate_work)) - bch2_write_ref_put(c, BCH_WRITE_REF_invalidate); + for_each_member_device(c, ca) + bch2_dev_do_invalidates(ca); } int bch2_dev_freespace_init(struct bch_fs *c, struct bch_dev *ca, @@ -2051,7 +2120,7 @@ int bch2_dev_freespace_init(struct bch_fs *c, struct bch_dev *ca, bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc, POS(ca->dev_idx, max_t(u64, ca->mi.first_bucket, bucket_start)), - BTREE_ITER_PREFETCH); + BTREE_ITER_prefetch); /* * Scan the alloc btree for every bucket on @ca, and add buckets to the * freespace/need_discard/need_gc_gens btrees as needed: @@ -2083,7 +2152,7 @@ int bch2_dev_freespace_init(struct bch_fs *c, struct bch_dev *ca, struct bch_alloc_v4 a_convert; const struct bch_alloc_v4 *a = bch2_alloc_to_v4(k, &a_convert); - ret = bch2_bucket_do_index(trans, k, a, true) ?: + ret = bch2_bucket_do_index(trans, ca, k, a, true) ?: bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc); if (ret) @@ -2155,7 +2224,7 @@ int bch2_fs_freespace_init(struct bch_fs *c) ret = bch2_dev_freespace_init(c, ca, 0, ca->mi.nbuckets); if (ret) { - percpu_ref_put(&ca->ref); + bch2_dev_put(ca); bch_err_fn(c, ret); return ret; } @@ -2182,12 +2251,15 @@ int bch2_bucket_io_time_reset(struct btree_trans *trans, unsigned dev, u64 now; int ret = 0; - a = bch2_trans_start_alloc_update(trans, &iter, POS(dev, bucket_nr)); + if (bch2_trans_relock(trans)) + bch2_trans_begin(trans); + + a = bch2_trans_start_alloc_update_noupdate(trans, &iter, POS(dev, bucket_nr)); ret = PTR_ERR_OR_ZERO(a); if (ret) return ret; - now = atomic64_read(&c->io_clock[rw].now); + now = bch2_current_io_time(c, rw); if (a->v.io_time[rw] == now) goto out; @@ -2344,16 +2416,20 @@ void bch2_dev_allocator_add(struct bch_fs *c, struct bch_dev *ca) set_bit(ca->dev_idx, c->rw_devs[i].d); } -void bch2_fs_allocator_background_exit(struct bch_fs *c) +void bch2_dev_allocator_background_exit(struct bch_dev *ca) +{ + darray_exit(&ca->discard_buckets_in_flight); +} + +void bch2_dev_allocator_background_init(struct bch_dev *ca) { - darray_exit(&c->discard_buckets_in_flight); + mutex_init(&ca->discard_buckets_in_flight_lock); + INIT_WORK(&ca->discard_work, bch2_do_discards_work); + INIT_WORK(&ca->discard_fast_work, bch2_do_discards_fast_work); + INIT_WORK(&ca->invalidate_work, bch2_do_invalidates_work); } void bch2_fs_allocator_background_init(struct bch_fs *c) { spin_lock_init(&c->freelist_lock); - mutex_init(&c->discard_buckets_in_flight_lock); - INIT_WORK(&c->discard_work, bch2_do_discards_work); - INIT_WORK(&c->discard_fast_work, bch2_do_discards_fast_work); - INIT_WORK(&c->invalidate_work, bch2_do_invalidates_work); } |