diff options
Diffstat (limited to '')
-rw-r--r-- | fs/bcachefs/move.c | 250 |
1 files changed, 152 insertions, 98 deletions
diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c index f3dac4511a..bf68ea4944 100644 --- a/fs/bcachefs/move.c +++ b/fs/bcachefs/move.c @@ -6,9 +6,11 @@ #include "backpointers.h" #include "bkey_buf.h" #include "btree_gc.h" +#include "btree_io.h" #include "btree_update.h" #include "btree_update_interior.h" #include "btree_write_buffer.h" +#include "compress.h" #include "disk_groups.h" #include "ec.h" #include "errcode.h" @@ -27,12 +29,53 @@ #include <linux/ioprio.h> #include <linux/kthread.h> -static void trace_move_extent2(struct bch_fs *c, struct bkey_s_c k) +const char * const bch2_data_ops_strs[] = { +#define x(t, n, ...) [n] = #t, + BCH_DATA_OPS() +#undef x + NULL +}; + +static void bch2_data_update_opts_to_text(struct printbuf *out, struct bch_fs *c, + struct bch_io_opts *io_opts, + struct data_update_opts *data_opts) +{ + printbuf_tabstop_push(out, 20); + prt_str(out, "rewrite ptrs:"); + prt_tab(out); + bch2_prt_u64_base2(out, data_opts->rewrite_ptrs); + prt_newline(out); + + prt_str(out, "kill ptrs: "); + prt_tab(out); + bch2_prt_u64_base2(out, data_opts->kill_ptrs); + prt_newline(out); + + prt_str(out, "target: "); + prt_tab(out); + bch2_target_to_text(out, c, data_opts->target); + prt_newline(out); + + prt_str(out, "compression: "); + prt_tab(out); + bch2_compression_opt_to_text(out, background_compression(*io_opts)); + prt_newline(out); + + prt_str(out, "extra replicas: "); + prt_tab(out); + prt_u64(out, data_opts->extra_replicas); +} + +static void trace_move_extent2(struct bch_fs *c, struct bkey_s_c k, + struct bch_io_opts *io_opts, + struct data_update_opts *data_opts) { if (trace_move_extent_enabled()) { struct printbuf buf = PRINTBUF; bch2_bkey_val_to_text(&buf, c, k); + prt_newline(&buf); + bch2_data_update_opts_to_text(&buf, c, io_opts, data_opts); trace_move_extent(c, buf.buf); printbuf_exit(&buf); } @@ -63,7 +106,7 @@ struct moving_io { struct data_update write; /* Must be last since it is variable size */ - struct bio_vec bi_inline_vecs[0]; + struct bio_vec bi_inline_vecs[]; }; static void move_free(struct moving_io *io) @@ -104,6 +147,15 @@ static void move_write(struct moving_io *io) return; } + if (trace_move_extent_write_enabled()) { + struct bch_fs *c = io->write.op.c; + struct printbuf buf = PRINTBUF; + + bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(io->write.k.k)); + trace_move_extent_write(c, buf.buf); + printbuf_exit(&buf); + } + closure_get(&io->write.ctxt->cl); atomic_add(io->write_sectors, &io->write.ctxt->write_sectors); atomic_inc(&io->write.ctxt->write_ios); @@ -211,7 +263,7 @@ void bch2_move_stats_exit(struct bch_move_stats *stats, struct bch_fs *c) trace_move_data(c, stats); } -void bch2_move_stats_init(struct bch_move_stats *stats, char *name) +void bch2_move_stats_init(struct bch_move_stats *stats, const char *name) { memset(stats, 0, sizeof(*stats)); stats->data_type = BCH_DATA_user; @@ -234,9 +286,10 @@ int bch2_move_extent(struct moving_context *ctxt, unsigned sectors = k.k->size, pages; int ret = -ENOMEM; + trace_move_extent2(c, k, &io_opts, &data_opts); + if (ctxt->stats) ctxt->stats->pos = BBPOS(iter->btree_id, iter->pos); - trace_move_extent2(c, k); bch2_data_update_opts_normalize(k, &data_opts); @@ -342,7 +395,8 @@ err: bch2_err_matches(ret, BCH_ERR_transaction_restart)) return ret; - this_cpu_inc(c->counters[BCH_COUNTER_move_extent_start_fail]); + count_event(c, move_extent_start_fail); + if (trace_move_extent_start_fail_enabled()) { struct printbuf buf = PRINTBUF; @@ -364,13 +418,10 @@ struct bch_io_opts *bch2_move_get_io_opts(struct btree_trans *trans, int ret = 0; if (io_opts->cur_inum != extent_k.k->p.inode) { - struct btree_iter iter; - struct bkey_s_c k; - io_opts->d.nr = 0; - for_each_btree_key(trans, iter, BTREE_ID_inodes, POS(0, extent_k.k->p.inode), - BTREE_ITER_ALL_SNAPSHOTS, k, ret) { + ret = for_each_btree_key(trans, iter, BTREE_ID_inodes, POS(0, extent_k.k->p.inode), + BTREE_ITER_ALL_SNAPSHOTS, k, ({ if (k.k->p.offset != extent_k.k->p.inode) break; @@ -383,11 +434,8 @@ struct bch_io_opts *bch2_move_get_io_opts(struct btree_trans *trans, struct snapshot_io_opts_entry e = { .snapshot = k.k->p.snapshot }; bch2_inode_opts_get(&e.io_opts, trans->c, &inode); - ret = darray_push(&io_opts->d, e); - if (ret) - break; - } - bch2_trans_iter_exit(trans, &iter); + darray_push(&io_opts->d, e); + })); io_opts->cur_inum = extent_k.k->p.inode; } @@ -395,12 +443,10 @@ struct bch_io_opts *bch2_move_get_io_opts(struct btree_trans *trans, if (ret) return ERR_PTR(ret); - if (extent_k.k->p.snapshot) { - struct snapshot_io_opts_entry *i; + if (extent_k.k->p.snapshot) darray_for_each(io_opts->d, i) if (bch2_snapshot_is_ancestor(c, extent_k.k->p.snapshot, i->snapshot)) return &i->io_opts; - } return &io_opts->fs_io_opts; } @@ -628,7 +674,7 @@ int bch2_move_data(struct bch_fs *c, return ret; } -int __bch2_evacuate_bucket(struct moving_context *ctxt, +int bch2_evacuate_bucket(struct moving_context *ctxt, struct move_bucket_in_flight *bucket_in_flight, struct bpos bucket, int gen, struct data_update_opts _data_opts) @@ -664,21 +710,19 @@ int __bch2_evacuate_bucket(struct moving_context *ctxt, bkey_err(k = bch2_btree_iter_peek_slot(&iter))); bch2_trans_iter_exit(trans, &iter); - if (ret) { - bch_err_msg(c, ret, "looking up alloc key"); + bch_err_msg(c, ret, "looking up alloc key"); + if (ret) goto err; - } a = bch2_alloc_to_v4(k, &a_convert); - dirty_sectors = a->dirty_sectors; + dirty_sectors = bch2_bucket_sectors_dirty(*a); bucket_size = bch_dev_bkey_exists(c, bucket.inode)->mi.bucket_size; fragmentation = a->fragmentation_lru; - ret = bch2_btree_write_buffer_flush(trans); - if (ret) { - bch_err_msg(c, ret, "flushing btree write buffer"); + ret = bch2_btree_write_buffer_tryflush(trans); + bch_err_msg(c, ret, "flushing btree write buffer"); + if (ret) goto err; - } while (!(ret = bch2_move_ratelimit(ctxt))) { if (is_kthread && kthread_should_stop()) @@ -697,9 +741,6 @@ int __bch2_evacuate_bucket(struct moving_context *ctxt, break; if (!bp.level) { - const struct bch_extent_ptr *ptr; - unsigned i = 0; - k = bch2_backpointer_get_key(trans, &iter, bp_pos, bp, 0); ret = bkey_err(k); if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) @@ -722,6 +763,7 @@ int __bch2_evacuate_bucket(struct moving_context *ctxt, data_opts.target = io_opts.background_target; data_opts.rewrite_ptrs = 0; + unsigned i = 0; bkey_for_each_ptr(bch2_bkey_ptrs_c(k), ptr) { if (ptr->dev == bucket.inode) { data_opts.rewrite_ptrs |= 1U << i; @@ -763,6 +805,8 @@ int __bch2_evacuate_bucket(struct moving_context *ctxt, if (!b) goto next; + unsigned sectors = btree_ptr_sectors_written(&b->key); + ret = bch2_btree_node_rewrite(trans, &iter, b, 0); bch2_trans_iter_exit(trans, &iter); @@ -772,11 +816,10 @@ int __bch2_evacuate_bucket(struct moving_context *ctxt, goto err; if (ctxt->rate) - bch2_ratelimit_increment(ctxt->rate, - c->opts.btree_node_size >> 9); + bch2_ratelimit_increment(ctxt->rate, sectors); if (ctxt->stats) { - atomic64_add(c->opts.btree_node_size >> 9, &ctxt->stats->sectors_seen); - atomic64_add(c->opts.btree_node_size >> 9, &ctxt->stats->sectors_moved); + atomic64_add(sectors, &ctxt->stats->sectors_seen); + atomic64_add(sectors, &ctxt->stats->sectors_moved); } } next: @@ -789,31 +832,13 @@ err: return ret; } -int bch2_evacuate_bucket(struct bch_fs *c, - struct bpos bucket, int gen, - struct data_update_opts data_opts, - struct bch_ratelimit *rate, - struct bch_move_stats *stats, - struct write_point_specifier wp, - bool wait_on_copygc) -{ - struct moving_context ctxt; - int ret; - - bch2_moving_ctxt_init(&ctxt, c, rate, stats, wp, wait_on_copygc); - ret = __bch2_evacuate_bucket(&ctxt, NULL, bucket, gen, data_opts); - bch2_moving_ctxt_exit(&ctxt); - - return ret; -} - typedef bool (*move_btree_pred)(struct bch_fs *, void *, struct btree *, struct bch_io_opts *, struct data_update_opts *); static int bch2_move_btree(struct bch_fs *c, - enum btree_id start_btree_id, struct bpos start_pos, - enum btree_id end_btree_id, struct bpos end_pos, + struct bbpos start, + struct bbpos end, move_btree_pred pred, void *arg, struct bch_move_stats *stats) { @@ -823,7 +848,7 @@ static int bch2_move_btree(struct bch_fs *c, struct btree_trans *trans; struct btree_iter iter; struct btree *b; - enum btree_id id; + enum btree_id btree; struct data_update_opts data_opts; int ret = 0; @@ -834,15 +859,15 @@ static int bch2_move_btree(struct bch_fs *c, stats->data_type = BCH_DATA_btree; - for (id = start_btree_id; - id <= min_t(unsigned, end_btree_id, btree_id_nr_alive(c) - 1); - id++) { - stats->pos = BBPOS(id, POS_MIN); + for (btree = start.btree; + btree <= min_t(unsigned, end.btree, btree_id_nr_alive(c) - 1); + btree ++) { + stats->pos = BBPOS(btree, POS_MIN); - if (!bch2_btree_id_root(c, id)->b) + if (!bch2_btree_id_root(c, btree)->b) continue; - bch2_trans_node_iter_init(trans, &iter, id, POS_MIN, 0, 0, + bch2_trans_node_iter_init(trans, &iter, btree, POS_MIN, 0, 0, BTREE_ITER_PREFETCH); retry: ret = 0; @@ -852,8 +877,8 @@ retry: if (kthread && kthread_should_stop()) break; - if ((cmp_int(id, end_btree_id) ?: - bpos_cmp(b->key.k.p, end_pos)) > 0) + if ((cmp_int(btree, end.btree) ?: + bpos_cmp(b->key.k.p, end.pos)) > 0) break; stats->pos = BBPOS(iter.btree_id, iter.pos); @@ -910,7 +935,6 @@ static bool migrate_pred(struct bch_fs *c, void *arg, struct data_update_opts *data_opts) { struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); - const struct bch_extent_ptr *ptr; struct bch_ioctl_data *op = arg; unsigned i = 0; @@ -990,8 +1014,8 @@ int bch2_scan_old_btree_nodes(struct bch_fs *c, struct bch_move_stats *stats) int ret; ret = bch2_move_btree(c, - 0, POS_MIN, - BTREE_ID_NR, SPOS_MAX, + BBPOS_MIN, + BBPOS_MAX, rewrite_old_nodes_pred, c, stats); if (!ret) { mutex_lock(&c->sb_lock); @@ -1006,79 +1030,109 @@ int bch2_scan_old_btree_nodes(struct bch_fs *c, struct bch_move_stats *stats) return ret; } +static bool drop_extra_replicas_pred(struct bch_fs *c, void *arg, + struct bkey_s_c k, + struct bch_io_opts *io_opts, + struct data_update_opts *data_opts) +{ + unsigned durability = bch2_bkey_durability(c, k); + unsigned replicas = bkey_is_btree_ptr(k.k) + ? c->opts.metadata_replicas + : io_opts->data_replicas; + const union bch_extent_entry *entry; + struct extent_ptr_decoded p; + unsigned i = 0; + + bkey_for_each_ptr_decode(k.k, bch2_bkey_ptrs_c(k), p, entry) { + unsigned d = bch2_extent_ptr_durability(c, &p); + + if (d && durability - d >= replicas) { + data_opts->kill_ptrs |= BIT(i); + durability -= d; + } + + i++; + } + + return data_opts->kill_ptrs != 0; +} + +static bool drop_extra_replicas_btree_pred(struct bch_fs *c, void *arg, + struct btree *b, + struct bch_io_opts *io_opts, + struct data_update_opts *data_opts) +{ + return drop_extra_replicas_pred(c, arg, bkey_i_to_s_c(&b->key), io_opts, data_opts); +} + int bch2_data_job(struct bch_fs *c, struct bch_move_stats *stats, struct bch_ioctl_data op) { + struct bbpos start = BBPOS(op.start_btree, op.start_pos); + struct bbpos end = BBPOS(op.end_btree, op.end_pos); int ret = 0; + if (op.op >= BCH_DATA_OP_NR) + return -EINVAL; + + bch2_move_stats_init(stats, bch2_data_ops_strs[op.op]); + switch (op.op) { - case BCH_DATA_OP_REREPLICATE: - bch2_move_stats_init(stats, "rereplicate"); + case BCH_DATA_OP_rereplicate: stats->data_type = BCH_DATA_journal; ret = bch2_journal_flush_device_pins(&c->journal, -1); - - ret = bch2_move_btree(c, - op.start_btree, op.start_pos, - op.end_btree, op.end_pos, + ret = bch2_move_btree(c, start, end, rereplicate_btree_pred, c, stats) ?: ret; - ret = bch2_replicas_gc2(c) ?: ret; - - ret = bch2_move_data(c, - (struct bbpos) { op.start_btree, op.start_pos }, - (struct bbpos) { op.end_btree, op.end_pos }, + ret = bch2_move_data(c, start, end, NULL, stats, writepoint_hashed((unsigned long) current), true, rereplicate_pred, c) ?: ret; ret = bch2_replicas_gc2(c) ?: ret; - - bch2_move_stats_exit(stats, c); break; - case BCH_DATA_OP_MIGRATE: + case BCH_DATA_OP_migrate: if (op.migrate.dev >= c->sb.nr_devices) return -EINVAL; - bch2_move_stats_init(stats, "migrate"); stats->data_type = BCH_DATA_journal; ret = bch2_journal_flush_device_pins(&c->journal, op.migrate.dev); - - ret = bch2_move_btree(c, - op.start_btree, op.start_pos, - op.end_btree, op.end_pos, + ret = bch2_move_btree(c, start, end, migrate_btree_pred, &op, stats) ?: ret; - ret = bch2_replicas_gc2(c) ?: ret; - - ret = bch2_move_data(c, - (struct bbpos) { op.start_btree, op.start_pos }, - (struct bbpos) { op.end_btree, op.end_pos }, + ret = bch2_move_data(c, start, end, NULL, stats, writepoint_hashed((unsigned long) current), true, migrate_pred, &op) ?: ret; ret = bch2_replicas_gc2(c) ?: ret; - - bch2_move_stats_exit(stats, c); break; - case BCH_DATA_OP_REWRITE_OLD_NODES: - bch2_move_stats_init(stats, "rewrite_old_nodes"); + case BCH_DATA_OP_rewrite_old_nodes: ret = bch2_scan_old_btree_nodes(c, stats); - bch2_move_stats_exit(stats, c); + break; + case BCH_DATA_OP_drop_extra_replicas: + ret = bch2_move_btree(c, start, end, + drop_extra_replicas_btree_pred, c, stats) ?: ret; + ret = bch2_move_data(c, start, end, NULL, stats, + writepoint_hashed((unsigned long) current), + true, + drop_extra_replicas_pred, c) ?: ret; + ret = bch2_replicas_gc2(c) ?: ret; break; default: ret = -EINVAL; } + bch2_move_stats_exit(stats, c); return ret; } void bch2_move_stats_to_text(struct printbuf *out, struct bch_move_stats *stats) { - prt_printf(out, "%s: data type=%s pos=", - stats->name, - bch2_data_types[stats->data_type]); + prt_printf(out, "%s: data type==", stats->name); + bch2_prt_data_type(out, stats->data_type); + prt_str(out, " pos="); bch2_bbpos_to_text(out, stats->pos); prt_newline(out); printbuf_indent_add(out, 2); |