summaryrefslogtreecommitdiffstats
path: root/src/tools/ceph_objectstore_tool.cc
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/tools/ceph_objectstore_tool.cc4249
1 files changed, 4249 insertions, 0 deletions
diff --git a/src/tools/ceph_objectstore_tool.cc b/src/tools/ceph_objectstore_tool.cc
new file mode 100644
index 00000000..9ae5750c
--- /dev/null
+++ b/src/tools/ceph_objectstore_tool.cc
@@ -0,0 +1,4249 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2013 Inktank
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#include <boost/program_options/variables_map.hpp>
+#include <boost/program_options/parsers.hpp>
+#include <boost/scoped_ptr.hpp>
+#include <boost/optional.hpp>
+
+#include <stdlib.h>
+
+#include "common/Formatter.h"
+#include "common/errno.h"
+#include "common/ceph_argparse.h"
+
+#include "global/global_init.h"
+
+#include "os/ObjectStore.h"
+#include "os/filestore/FileJournal.h"
+#include "os/filestore/FileStore.h"
+#ifdef HAVE_LIBFUSE
+#include "os/FuseStore.h"
+#endif
+
+#include "osd/PGLog.h"
+#include "osd/OSD.h"
+#include "osd/PG.h"
+#include "osd/ECUtil.h"
+
+#include "json_spirit/json_spirit_value.h"
+#include "json_spirit/json_spirit_reader.h"
+
+#include "rebuild_mondb.h"
+#include "ceph_objectstore_tool.h"
+#include "include/compat.h"
+#include "include/util.h"
+
+namespace po = boost::program_options;
+
+#ifdef INTERNAL_TEST
+CompatSet get_test_compat_set() {
+ CompatSet::FeatureSet ceph_osd_feature_compat;
+ CompatSet::FeatureSet ceph_osd_feature_ro_compat;
+ CompatSet::FeatureSet ceph_osd_feature_incompat;
+ ceph_osd_feature_incompat.insert(CEPH_OSD_FEATURE_INCOMPAT_BASE);
+ ceph_osd_feature_incompat.insert(CEPH_OSD_FEATURE_INCOMPAT_PGINFO);
+ ceph_osd_feature_incompat.insert(CEPH_OSD_FEATURE_INCOMPAT_OLOC);
+ ceph_osd_feature_incompat.insert(CEPH_OSD_FEATURE_INCOMPAT_LEC);
+ ceph_osd_feature_incompat.insert(CEPH_OSD_FEATURE_INCOMPAT_CATEGORIES);
+ ceph_osd_feature_incompat.insert(CEPH_OSD_FEATURE_INCOMPAT_HOBJECTPOOL);
+ ceph_osd_feature_incompat.insert(CEPH_OSD_FEATURE_INCOMPAT_BIGINFO);
+ ceph_osd_feature_incompat.insert(CEPH_OSD_FEATURE_INCOMPAT_LEVELDBINFO);
+ ceph_osd_feature_incompat.insert(CEPH_OSD_FEATURE_INCOMPAT_LEVELDBLOG);
+#ifdef INTERNAL_TEST2
+ ceph_osd_feature_incompat.insert(CEPH_OSD_FEATURE_INCOMPAT_SNAPMAPPER);
+ ceph_osd_feature_incompat.insert(CEPH_OSD_FEATURE_INCOMPAT_SHARDS);
+#endif
+ return CompatSet(ceph_osd_feature_compat, ceph_osd_feature_ro_compat,
+ ceph_osd_feature_incompat);
+}
+#endif
+
+const ssize_t max_read = 1024 * 1024;
+const int fd_none = INT_MIN;
+bool outistty;
+bool dry_run;
+
+struct action_on_object_t {
+ virtual ~action_on_object_t() {}
+ virtual void call(ObjectStore *store, coll_t coll, ghobject_t &ghobj, object_info_t &oi) = 0;
+};
+
+int _action_on_all_objects_in_pg(ObjectStore *store, coll_t coll, action_on_object_t &action, bool debug)
+{
+ auto ch = store->open_collection(coll);
+ unsigned LIST_AT_A_TIME = 100;
+ ghobject_t next;
+ while (!next.is_max()) {
+ vector<ghobject_t> list;
+ int r = store->collection_list(ch,
+ next,
+ ghobject_t::get_max(),
+ LIST_AT_A_TIME,
+ &list,
+ &next);
+ if (r < 0) {
+ cerr << "Error listing collection: " << coll << ", "
+ << cpp_strerror(r) << std::endl;
+ return r;
+ }
+ for (vector<ghobject_t>::iterator obj = list.begin();
+ obj != list.end();
+ ++obj) {
+ if (obj->is_pgmeta())
+ continue;
+ object_info_t oi;
+ if (coll != coll_t::meta()) {
+ bufferlist attr;
+ r = store->getattr(ch, *obj, OI_ATTR, attr);
+ if (r < 0) {
+ cerr << "Error getting attr on : " << make_pair(coll, *obj) << ", "
+ << cpp_strerror(r) << std::endl;
+ } else {
+ auto bp = attr.cbegin();
+ try {
+ decode(oi, bp);
+ } catch (...) {
+ r = -EINVAL;
+ cerr << "Error decoding attr on : " << make_pair(coll, *obj) << ", "
+ << cpp_strerror(r) << std::endl;
+ }
+ }
+ }
+ action.call(store, coll, *obj, oi);
+ }
+ }
+ return 0;
+}
+
+int action_on_all_objects_in_pg(ObjectStore *store, string pgidstr, action_on_object_t &action, bool debug)
+{
+ spg_t pgid;
+ // Scan collections in case this is an ec pool but no shard specified
+ unsigned scanned = 0;
+ int r = 0;
+ vector<coll_t> colls_to_check;
+ vector<coll_t> candidates;
+ r = store->list_collections(candidates);
+ if (r < 0) {
+ cerr << "Error listing collections: " << cpp_strerror(r) << std::endl;
+ return r;
+ }
+ pgid.parse(pgidstr.c_str());
+ for (vector<coll_t>::iterator i = candidates.begin();
+ i != candidates.end();
+ ++i) {
+ spg_t cand_pgid;
+ if (!i->is_pg(&cand_pgid))
+ continue;
+
+ // If an exact match or treat no shard as any shard
+ if (cand_pgid == pgid ||
+ (pgid.is_no_shard() && pgid.pgid == cand_pgid.pgid)) {
+ colls_to_check.push_back(*i);
+ }
+ }
+
+ if (debug)
+ cerr << colls_to_check.size() << " pgs to scan" << std::endl;
+ for (vector<coll_t>::iterator i = colls_to_check.begin();
+ i != colls_to_check.end();
+ ++i, ++scanned) {
+ if (debug)
+ cerr << "Scanning " << *i << ", " << scanned << "/"
+ << colls_to_check.size() << " completed" << std::endl;
+ r = _action_on_all_objects_in_pg(store, *i, action, debug);
+ if (r < 0)
+ break;
+ }
+ return r;
+}
+
+int action_on_all_objects_in_exact_pg(ObjectStore *store, coll_t coll, action_on_object_t &action, bool debug)
+{
+ int r = _action_on_all_objects_in_pg(store, coll, action, debug);
+ return r;
+}
+
+int _action_on_all_objects(ObjectStore *store, action_on_object_t &action, bool debug)
+{
+ unsigned scanned = 0;
+ int r = 0;
+ vector<coll_t> colls_to_check;
+ vector<coll_t> candidates;
+ r = store->list_collections(candidates);
+ if (r < 0) {
+ cerr << "Error listing collections: " << cpp_strerror(r) << std::endl;
+ return r;
+ }
+ for (vector<coll_t>::iterator i = candidates.begin();
+ i != candidates.end();
+ ++i) {
+ if (i->is_pg()) {
+ colls_to_check.push_back(*i);
+ }
+ }
+
+ if (debug)
+ cerr << colls_to_check.size() << " pgs to scan" << std::endl;
+ for (vector<coll_t>::iterator i = colls_to_check.begin();
+ i != colls_to_check.end();
+ ++i, ++scanned) {
+ if (debug)
+ cerr << "Scanning " << *i << ", " << scanned << "/"
+ << colls_to_check.size() << " completed" << std::endl;
+ r = _action_on_all_objects_in_pg(store, *i, action, debug);
+ if (r < 0)
+ return r;
+ }
+ return 0;
+}
+
+int action_on_all_objects(ObjectStore *store, action_on_object_t &action, bool debug)
+{
+ int r = _action_on_all_objects(store, action, debug);
+ return r;
+}
+
+struct pgid_object_list {
+ list<pair<coll_t, ghobject_t> > _objects;
+
+ void insert(coll_t coll, ghobject_t &ghobj) {
+ _objects.push_back(make_pair(coll, ghobj));
+ }
+
+ void dump(Formatter *f, bool human_readable) const {
+ if (!human_readable)
+ f->open_array_section("pgid_objects");
+ for (list<pair<coll_t, ghobject_t> >::const_iterator i = _objects.begin();
+ i != _objects.end();
+ ++i) {
+ f->open_array_section("pgid_object");
+ spg_t pgid;
+ bool is_pg = i->first.is_pg(&pgid);
+ if (is_pg)
+ f->dump_string("pgid", stringify(pgid));
+ if (!is_pg || !human_readable)
+ f->dump_string("coll", i->first.to_str());
+ f->open_object_section("ghobject");
+ i->second.dump(f);
+ f->close_section();
+ f->close_section();
+ if (human_readable) {
+ f->flush(cout);
+ cout << std::endl;
+ }
+ }
+ if (!human_readable) {
+ f->close_section();
+ f->flush(cout);
+ cout << std::endl;
+ }
+ }
+};
+
+struct lookup_ghobject : public action_on_object_t {
+ pgid_object_list _objects;
+ const string _name;
+ const boost::optional<std::string> _namespace;
+ bool _need_snapset;
+
+ lookup_ghobject(const string& name, const boost::optional<std::string>& nspace, bool need_snapset = false) : _name(name),
+ _namespace(nspace), _need_snapset(need_snapset) { }
+
+ void call(ObjectStore *store, coll_t coll, ghobject_t &ghobj, object_info_t &oi) override {
+ if (_need_snapset && !ghobj.hobj.has_snapset())
+ return;
+ if ((_name.length() == 0 || ghobj.hobj.oid.name == _name) &&
+ (!_namespace || ghobj.hobj.nspace == _namespace))
+ _objects.insert(coll, ghobj);
+ return;
+ }
+
+ int size() const {
+ return _objects._objects.size();
+ }
+
+ pair<coll_t, ghobject_t> pop() {
+ pair<coll_t, ghobject_t> front = _objects._objects.front();
+ _objects._objects.pop_front();
+ return front;
+ }
+
+ void dump(Formatter *f, bool human_readable) const {
+ _objects.dump(f, human_readable);
+ }
+};
+
+int file_fd = fd_none;
+bool debug;
+bool force = false;
+super_header sh;
+
+static int get_fd_data(int fd, bufferlist &bl)
+{
+ uint64_t total = 0;
+ do {
+ ssize_t bytes = bl.read_fd(fd, max_read);
+ if (bytes < 0) {
+ cerr << "read_fd error " << cpp_strerror(bytes) << std::endl;
+ return bytes;
+ }
+
+ if (bytes == 0)
+ break;
+
+ total += bytes;
+ } while(true);
+
+ ceph_assert(bl.length() == total);
+ return 0;
+}
+
+int get_log(ObjectStore *fs, __u8 struct_ver,
+ spg_t pgid, const pg_info_t &info,
+ PGLog::IndexedLog &log, pg_missing_t &missing)
+{
+ try {
+ auto ch = fs->open_collection(coll_t(pgid));
+ if (!ch) {
+ return -ENOENT;
+ }
+ ostringstream oss;
+ ceph_assert(struct_ver > 0);
+ PGLog::read_log_and_missing(
+ fs, ch,
+ pgid.make_pgmeta_oid(),
+ info, log, missing,
+ oss,
+ g_ceph_context->_conf->osd_ignore_stale_divergent_priors);
+ if (debug && oss.str().size())
+ cerr << oss.str() << std::endl;
+ }
+ catch (const buffer::error &e) {
+ cerr << "read_log_and_missing threw exception error " << e.what() << std::endl;
+ return -EFAULT;
+ }
+ return 0;
+}
+
+void dump_log(Formatter *formatter, ostream &out, pg_log_t &log,
+ pg_missing_t &missing)
+{
+ formatter->open_object_section("op_log");
+ formatter->open_object_section("pg_log_t");
+ log.dump(formatter);
+ formatter->close_section();
+ formatter->flush(out);
+ formatter->open_object_section("pg_missing_t");
+ missing.dump(formatter);
+ formatter->close_section();
+ formatter->close_section();
+ formatter->flush(out);
+}
+
+//Based on part of OSD::load_pgs()
+int finish_remove_pgs(ObjectStore *store)
+{
+ vector<coll_t> ls;
+ int r = store->list_collections(ls);
+ if (r < 0) {
+ cerr << "finish_remove_pgs: failed to list pgs: " << cpp_strerror(r)
+ << std::endl;
+ return r;
+ }
+
+ for (vector<coll_t>::iterator it = ls.begin();
+ it != ls.end();
+ ++it) {
+ spg_t pgid;
+
+ if (it->is_temp(&pgid) ||
+ (it->is_pg(&pgid) && PG::_has_removal_flag(store, pgid))) {
+ cout << "finish_remove_pgs " << *it << " removing " << pgid << std::endl;
+ OSD::recursive_remove_collection(g_ceph_context, store, pgid, *it);
+ continue;
+ }
+
+ //cout << "finish_remove_pgs ignoring unrecognized " << *it << std::endl;
+ }
+ return 0;
+}
+
+#pragma GCC diagnostic ignored "-Wpragmas"
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
+
+int mark_pg_for_removal(ObjectStore *fs, spg_t pgid, ObjectStore::Transaction *t)
+{
+ pg_info_t info(pgid);
+ coll_t coll(pgid);
+ ghobject_t pgmeta_oid(info.pgid.make_pgmeta_oid());
+
+ epoch_t map_epoch = 0;
+ int r = PG::peek_map_epoch(fs, pgid, &map_epoch);
+ if (r < 0)
+ cerr << __func__ << " warning: peek_map_epoch reported error" << std::endl;
+ PastIntervals past_intervals;
+ __u8 struct_v;
+ r = PG::read_info(fs, pgid, coll, info, past_intervals, struct_v);
+ if (r < 0) {
+ cerr << __func__ << " error on read_info " << cpp_strerror(r) << std::endl;
+ return r;
+ }
+ ceph_assert(struct_v >= 8);
+ // new omap key
+ cout << "setting '_remove' omap key" << std::endl;
+ map<string,bufferlist> values;
+ encode((char)1, values["_remove"]);
+ t->omap_setkeys(coll, pgmeta_oid, values);
+ return 0;
+}
+
+#pragma GCC diagnostic pop
+#pragma GCC diagnostic warning "-Wpragmas"
+
+template<typename Func>
+void wait_until_done(ObjectStore::Transaction* txn, Func&& func)
+{
+ bool finished = false;
+ std::condition_variable cond;
+ std::mutex m;
+ txn->register_on_complete(make_lambda_context([&]() {
+ std::unique_lock lock{m};
+ finished = true;
+ cond.notify_one();
+ }));
+ std::move(func)();
+ std::unique_lock lock{m};
+ cond.wait(lock, [&] {return finished;});
+}
+
+int initiate_new_remove_pg(ObjectStore *store, spg_t r_pgid)
+{
+ if (!dry_run)
+ finish_remove_pgs(store);
+ if (!store->collection_exists(coll_t(r_pgid)))
+ return -ENOENT;
+
+ cout << " marking collection for removal" << std::endl;
+ if (dry_run)
+ return 0;
+ ObjectStore::Transaction rmt;
+ int r = mark_pg_for_removal(store, r_pgid, &rmt);
+ if (r < 0) {
+ return r;
+ }
+ ObjectStore::CollectionHandle ch = store->open_collection(coll_t(r_pgid));
+ store->queue_transaction(ch, std::move(rmt));
+ finish_remove_pgs(store);
+ return r;
+}
+
+int write_info(ObjectStore::Transaction &t, epoch_t epoch, pg_info_t &info,
+ PastIntervals &past_intervals)
+{
+ //Empty for this
+ coll_t coll(info.pgid);
+ ghobject_t pgmeta_oid(info.pgid.make_pgmeta_oid());
+ map<string,bufferlist> km;
+ pg_info_t last_written_info;
+ int ret = PG::_prepare_write_info(
+ g_ceph_context,
+ &km, epoch,
+ info,
+ last_written_info,
+ past_intervals,
+ true, true, false);
+ if (ret) cerr << "Failed to write info" << std::endl;
+ t.omap_setkeys(coll, pgmeta_oid, km);
+ return ret;
+}
+
+typedef map<eversion_t, hobject_t> divergent_priors_t;
+
+int write_pg(ObjectStore::Transaction &t, epoch_t epoch, pg_info_t &info,
+ pg_log_t &log, PastIntervals &past_intervals,
+ divergent_priors_t &divergent,
+ pg_missing_t &missing)
+{
+ cout << __func__ << " epoch " << epoch << " info " << info << std::endl;
+ int ret = write_info(t, epoch, info, past_intervals);
+ if (ret)
+ return ret;
+ coll_t coll(info.pgid);
+ map<string,bufferlist> km;
+
+ if (!divergent.empty()) {
+ ceph_assert(missing.get_items().empty());
+ PGLog::write_log_and_missing_wo_missing(
+ t, &km, log, coll, info.pgid.make_pgmeta_oid(), divergent, true);
+ } else {
+ pg_missing_tracker_t tmissing(missing);
+ bool rebuilt_missing_set_with_deletes = missing.may_include_deletes;
+ PGLog::write_log_and_missing(
+ t, &km, log, coll, info.pgid.make_pgmeta_oid(), tmissing, true,
+ &rebuilt_missing_set_with_deletes);
+ }
+ t.omap_setkeys(coll, info.pgid.make_pgmeta_oid(), km);
+ return 0;
+}
+
+int do_trim_pg_log(ObjectStore *store, const coll_t &coll,
+ pg_info_t &info, const spg_t &pgid,
+ epoch_t map_epoch,
+ PastIntervals &past_intervals)
+{
+ ghobject_t oid = pgid.make_pgmeta_oid();
+ struct stat st;
+ auto ch = store->open_collection(coll);
+ int r = store->stat(ch, oid, &st);
+ ceph_assert(r == 0);
+ ceph_assert(st.st_size == 0);
+
+ cerr << "Log bounds are: " << "(" << info.log_tail << ","
+ << info.last_update << "]" << std::endl;
+
+ uint64_t max_entries = g_ceph_context->_conf->osd_max_pg_log_entries;
+ if (info.last_update.version - info.log_tail.version <= max_entries) {
+ cerr << "Log not larger than osd_max_pg_log_entries " << max_entries << std::endl;
+ return 0;
+ }
+
+ ceph_assert(info.last_update.version > max_entries);
+ version_t trim_to = info.last_update.version - max_entries;
+ size_t trim_at_once = g_ceph_context->_conf->osd_pg_log_trim_max;
+ eversion_t new_tail;
+ bool done = false;
+
+ while (!done) {
+ // gather keys so we can delete them in a batch without
+ // affecting the iterator
+ set<string> keys_to_trim;
+ {
+ ObjectMap::ObjectMapIterator p = store->get_omap_iterator(ch, oid);
+ if (!p)
+ break;
+ for (p->seek_to_first(); p->valid(); p->next()) {
+ if (p->key()[0] == '_')
+ continue;
+ if (p->key() == "can_rollback_to")
+ continue;
+ if (p->key() == "divergent_priors")
+ continue;
+ if (p->key() == "rollback_info_trimmed_to")
+ continue;
+ if (p->key() == "may_include_deletes_in_missing")
+ continue;
+ if (p->key().substr(0, 7) == string("missing"))
+ continue;
+ if (p->key().substr(0, 4) == string("dup_"))
+ continue;
+
+ bufferlist bl = p->value();
+ auto bp = bl.cbegin();
+ pg_log_entry_t e;
+ try {
+ e.decode_with_checksum(bp);
+ } catch (const buffer::error &e) {
+ cerr << "Error reading pg log entry: " << e << std::endl;
+ }
+ if (debug) {
+ cerr << "read entry " << e << std::endl;
+ }
+ if (e.version.version > trim_to) {
+ done = true;
+ break;
+ }
+ keys_to_trim.insert(p->key());
+ new_tail = e.version;
+ if (keys_to_trim.size() >= trim_at_once)
+ break;
+ }
+
+ if (!p->valid())
+ done = true;
+ } // deconstruct ObjectMapIterator
+
+ // delete the keys
+ if (!dry_run && !keys_to_trim.empty()) {
+ cout << "Removing keys " << *keys_to_trim.begin() << " - " << *keys_to_trim.rbegin() << std::endl;
+ ObjectStore::Transaction t;
+ t.omap_rmkeys(coll, oid, keys_to_trim);
+ store->queue_transaction(ch, std::move(t));
+ ch->flush();
+ }
+ }
+
+ // update pg info with new tail
+ if (!dry_run && new_tail != eversion_t()) {
+ info.log_tail = new_tail;
+ ObjectStore::Transaction t;
+ int ret = write_info(t, map_epoch, info, past_intervals);
+ if (ret)
+ return ret;
+ store->queue_transaction(ch, std::move(t));
+ ch->flush();
+ }
+
+ // compact the db since we just removed a bunch of data
+ cerr << "Finished trimming, now compacting..." << std::endl;
+ if (!dry_run)
+ store->compact();
+ return 0;
+}
+
+const int OMAP_BATCH_SIZE = 25;
+void get_omap_batch(ObjectMap::ObjectMapIterator &iter, map<string, bufferlist> &oset)
+{
+ oset.clear();
+ for (int count = OMAP_BATCH_SIZE; count && iter->valid(); --count, iter->next()) {
+ oset.insert(pair<string, bufferlist>(iter->key(), iter->value()));
+ }
+}
+
+int ObjectStoreTool::export_file(ObjectStore *store, coll_t cid, ghobject_t &obj)
+{
+ struct stat st;
+ mysize_t total;
+ footer ft;
+
+ auto ch = store->open_collection(cid);
+ int ret = store->stat(ch, obj, &st);
+ if (ret < 0)
+ return ret;
+
+ cerr << "Read " << obj << std::endl;
+
+ total = st.st_size;
+ if (debug)
+ cerr << "size=" << total << std::endl;
+
+ object_begin objb(obj);
+
+ {
+ bufferptr bp;
+ bufferlist bl;
+ ret = store->getattr(ch, obj, OI_ATTR, bp);
+ if (ret < 0) {
+ cerr << "getattr failure object_info " << ret << std::endl;
+ return ret;
+ }
+ bl.push_back(bp);
+ decode(objb.oi, bl);
+ if (debug)
+ cerr << "object_info: " << objb.oi << std::endl;
+ }
+
+ // NOTE: we include whiteouts, lost, etc.
+
+ ret = write_section(TYPE_OBJECT_BEGIN, objb, file_fd);
+ if (ret < 0)
+ return ret;
+
+ uint64_t offset = 0;
+ bufferlist rawdatabl;
+ while(total > 0) {
+ rawdatabl.clear();
+ mysize_t len = max_read;
+ if (len > total)
+ len = total;
+
+ ret = store->read(ch, obj, offset, len, rawdatabl);
+ if (ret < 0)
+ return ret;
+ if (ret == 0)
+ return -EINVAL;
+
+ data_section dblock(offset, len, rawdatabl);
+ if (debug)
+ cerr << "data section offset=" << offset << " len=" << len << std::endl;
+
+ total -= ret;
+ offset += ret;
+
+ ret = write_section(TYPE_DATA, dblock, file_fd);
+ if (ret) return ret;
+ }
+
+ //Handle attrs for this object
+ map<string,bufferptr> aset;
+ ret = store->getattrs(ch, obj, aset);
+ if (ret) return ret;
+ attr_section as(aset);
+ ret = write_section(TYPE_ATTRS, as, file_fd);
+ if (ret)
+ return ret;
+
+ if (debug) {
+ cerr << "attrs size " << aset.size() << std::endl;
+ }
+
+ //Handle omap information
+ bufferlist hdrbuf;
+ ret = store->omap_get_header(ch, obj, &hdrbuf, true);
+ if (ret < 0) {
+ cerr << "omap_get_header: " << cpp_strerror(ret) << std::endl;
+ return ret;
+ }
+
+ omap_hdr_section ohs(hdrbuf);
+ ret = write_section(TYPE_OMAP_HDR, ohs, file_fd);
+ if (ret)
+ return ret;
+
+ ObjectMap::ObjectMapIterator iter = store->get_omap_iterator(ch, obj);
+ if (!iter) {
+ ret = -ENOENT;
+ cerr << "omap_get_iterator: " << cpp_strerror(ret) << std::endl;
+ return ret;
+ }
+ iter->seek_to_first();
+ int mapcount = 0;
+ map<string, bufferlist> out;
+ while(iter->valid()) {
+ get_omap_batch(iter, out);
+
+ if (out.empty()) break;
+
+ mapcount += out.size();
+ omap_section oms(out);
+ ret = write_section(TYPE_OMAP, oms, file_fd);
+ if (ret)
+ return ret;
+ }
+ if (debug)
+ cerr << "omap map size " << mapcount << std::endl;
+
+ ret = write_simple(TYPE_OBJECT_END, file_fd);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
+int ObjectStoreTool::export_files(ObjectStore *store, coll_t coll)
+{
+ ghobject_t next;
+ auto ch = store->open_collection(coll);
+ while (!next.is_max()) {
+ vector<ghobject_t> objects;
+ int r = store->collection_list(ch, next, ghobject_t::get_max(), 300,
+ &objects, &next);
+ if (r < 0)
+ return r;
+ for (vector<ghobject_t>::iterator i = objects.begin();
+ i != objects.end();
+ ++i) {
+ ceph_assert(!i->hobj.is_meta());
+ if (i->is_pgmeta() || i->hobj.is_temp() || !i->is_no_gen()) {
+ continue;
+ }
+ r = export_file(store, coll, *i);
+ if (r < 0)
+ return r;
+ }
+ }
+ return 0;
+}
+
+int set_inc_osdmap(ObjectStore *store, epoch_t e, bufferlist& bl, bool force) {
+ OSDMap::Incremental inc;
+ auto it = bl.cbegin();
+ inc.decode(it);
+ if (e == 0) {
+ e = inc.epoch;
+ } else if (e != inc.epoch) {
+ cerr << "incremental.epoch mismatch: "
+ << inc.epoch << " != " << e << std::endl;
+ if (force) {
+ cerr << "But will continue anyway." << std::endl;
+ } else {
+ return -EINVAL;
+ }
+ }
+ auto ch = store->open_collection(coll_t::meta());
+ const ghobject_t inc_oid = OSD::get_inc_osdmap_pobject_name(e);
+ if (!store->exists(ch, inc_oid)) {
+ cerr << "inc-osdmap (" << inc_oid << ") does not exist." << std::endl;
+ if (!force) {
+ return -ENOENT;
+ }
+ cout << "Creating a new epoch." << std::endl;
+ }
+ if (dry_run)
+ return 0;
+ ObjectStore::Transaction t;
+ t.write(coll_t::meta(), inc_oid, 0, bl.length(), bl);
+ t.truncate(coll_t::meta(), inc_oid, bl.length());
+ store->queue_transaction(ch, std::move(t));
+ return 0;
+}
+
+int get_inc_osdmap(ObjectStore *store, epoch_t e, bufferlist& bl)
+{
+ auto ch = store->open_collection(coll_t::meta());
+ if (store->read(ch,
+ OSD::get_inc_osdmap_pobject_name(e),
+ 0, 0, bl) < 0) {
+ return -ENOENT;
+ }
+ return 0;
+}
+
+int set_osdmap(ObjectStore *store, epoch_t e, bufferlist& bl, bool force) {
+ OSDMap osdmap;
+ osdmap.decode(bl);
+ if (e == 0) {
+ e = osdmap.get_epoch();
+ } else if (e != osdmap.get_epoch()) {
+ cerr << "osdmap.epoch mismatch: "
+ << e << " != " << osdmap.get_epoch() << std::endl;
+ if (force) {
+ cerr << "But will continue anyway." << std::endl;
+ } else {
+ return -EINVAL;
+ }
+ }
+ auto ch = store->open_collection(coll_t::meta());
+ const ghobject_t full_oid = OSD::get_osdmap_pobject_name(e);
+ if (!store->exists(ch, full_oid)) {
+ cerr << "osdmap (" << full_oid << ") does not exist." << std::endl;
+ if (!force) {
+ return -ENOENT;
+ }
+ cout << "Creating a new epoch." << std::endl;
+ }
+ if (dry_run)
+ return 0;
+ ObjectStore::Transaction t;
+ t.write(coll_t::meta(), full_oid, 0, bl.length(), bl);
+ t.truncate(coll_t::meta(), full_oid, bl.length());
+ store->queue_transaction(ch, std::move(t));
+ return 0;
+}
+
+int get_osdmap(ObjectStore *store, epoch_t e, OSDMap &osdmap, bufferlist& bl)
+{
+ ObjectStore::CollectionHandle ch = store->open_collection(coll_t::meta());
+ bool found = store->read(
+ ch, OSD::get_osdmap_pobject_name(e), 0, 0, bl) >= 0;
+ if (!found) {
+ cerr << "Can't find OSDMap for pg epoch " << e << std::endl;
+ return -ENOENT;
+ }
+ osdmap.decode(bl);
+ if (debug)
+ cerr << osdmap << std::endl;
+ return 0;
+}
+
+int get_pg_num_history(ObjectStore *store, pool_pg_num_history_t *h)
+{
+ ObjectStore::CollectionHandle ch = store->open_collection(coll_t::meta());
+ bufferlist bl;
+ auto pghist = OSD::make_pg_num_history_oid();
+ int r = store->read(ch, pghist, 0, 0, bl, 0);
+ if (r >= 0 && bl.length() > 0) {
+ auto p = bl.cbegin();
+ decode(*h, p);
+ }
+ cout << __func__ << " pg_num_history " << *h << std::endl;
+ return 0;
+}
+
+int add_osdmap(ObjectStore *store, metadata_section &ms)
+{
+ return get_osdmap(store, ms.map_epoch, ms.osdmap, ms.osdmap_bl);
+}
+
+int ObjectStoreTool::do_export(ObjectStore *fs, coll_t coll, spg_t pgid,
+ pg_info_t &info, epoch_t map_epoch, __u8 struct_ver,
+ const OSDSuperblock& superblock,
+ PastIntervals &past_intervals)
+{
+ PGLog::IndexedLog log;
+ pg_missing_t missing;
+
+ cerr << "Exporting " << pgid << " info " << info << std::endl;
+
+ int ret = get_log(fs, struct_ver, pgid, info, log, missing);
+ if (ret > 0)
+ return ret;
+
+ if (debug) {
+ Formatter *formatter = Formatter::create("json-pretty");
+ ceph_assert(formatter);
+ dump_log(formatter, cerr, log, missing);
+ delete formatter;
+ }
+ write_super();
+
+ pg_begin pgb(pgid, superblock);
+ // Special case: If replicated pg don't require the importing OSD to have shard feature
+ if (pgid.is_no_shard()) {
+ pgb.superblock.compat_features.incompat.remove(CEPH_OSD_FEATURE_INCOMPAT_SHARDS);
+ }
+ ret = write_section(TYPE_PG_BEGIN, pgb, file_fd);
+ if (ret)
+ return ret;
+
+ // The metadata_section is now before files, so import can detect
+ // errors and abort without wasting time.
+ metadata_section ms(
+ struct_ver,
+ map_epoch,
+ info,
+ log,
+ past_intervals,
+ missing);
+ ret = add_osdmap(fs, ms);
+ if (ret)
+ return ret;
+ ret = write_section(TYPE_PG_METADATA, ms, file_fd);
+ if (ret)
+ return ret;
+
+ ret = export_files(fs, coll);
+ if (ret) {
+ cerr << "export_files error " << ret << std::endl;
+ return ret;
+ }
+
+ ret = write_simple(TYPE_PG_END, file_fd);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
+int dump_data(Formatter *formatter, bufferlist &bl)
+{
+ auto ebliter = bl.cbegin();
+ data_section ds;
+ ds.decode(ebliter);
+
+ formatter->open_object_section("data_block");
+ formatter->dump_unsigned("offset", ds.offset);
+ formatter->dump_unsigned("len", ds.len);
+ // XXX: Add option to dump data like od -cx ?
+ formatter->close_section();
+ formatter->flush(cout);
+ return 0;
+}
+
+int get_data(ObjectStore *store, coll_t coll, ghobject_t hoid,
+ ObjectStore::Transaction *t, bufferlist &bl)
+{
+ auto ebliter = bl.cbegin();
+ data_section ds;
+ ds.decode(ebliter);
+
+ if (debug)
+ cerr << "\tdata: offset " << ds.offset << " len " << ds.len << std::endl;
+ t->write(coll, hoid, ds.offset, ds.len, ds.databl);
+ return 0;
+}
+
+int dump_attrs(
+ Formatter *formatter, ghobject_t hoid,
+ bufferlist &bl)
+{
+ auto ebliter = bl.cbegin();
+ attr_section as;
+ as.decode(ebliter);
+
+ // This could have been handled in the caller if we didn't need to
+ // support exports that didn't include object_info_t in object_begin.
+ if (hoid.generation == ghobject_t::NO_GEN &&
+ hoid.hobj.is_head()) {
+ map<string,bufferlist>::iterator mi = as.data.find(SS_ATTR);
+ if (mi != as.data.end()) {
+ SnapSet snapset;
+ auto p = mi->second.cbegin();
+ snapset.decode(p);
+ formatter->open_object_section("snapset");
+ snapset.dump(formatter);
+ formatter->close_section();
+ } else {
+ formatter->open_object_section("snapset");
+ formatter->dump_string("error", "missing SS_ATTR");
+ formatter->close_section();
+ }
+ }
+
+ formatter->open_object_section("attrs");
+ formatter->open_array_section("user");
+ for (auto kv : as.data) {
+ // Skip system attributes
+ if (('_' != kv.first.at(0)) || kv.first.size() == 1)
+ continue;
+ formatter->open_object_section("user_attr");
+ formatter->dump_string("name", kv.first.substr(1));
+ bool b64;
+ formatter->dump_string("value", cleanbin(kv.second, b64));
+ formatter->dump_bool("Base64", b64);
+ formatter->close_section();
+ }
+ formatter->close_section();
+ formatter->open_array_section("system");
+ for (auto kv : as.data) {
+ // Skip user attributes
+ if (('_' == kv.first.at(0)) && kv.first.size() != 1)
+ continue;
+ formatter->open_object_section("sys_attr");
+ formatter->dump_string("name", kv.first);
+ formatter->close_section();
+ }
+ formatter->close_section();
+ formatter->close_section();
+ formatter->flush(cout);
+
+ return 0;
+}
+
+int get_attrs(
+ ObjectStore *store, coll_t coll, ghobject_t hoid,
+ ObjectStore::Transaction *t, bufferlist &bl,
+ OSDriver &driver, SnapMapper &snap_mapper)
+{
+ auto ebliter = bl.cbegin();
+ attr_section as;
+ as.decode(ebliter);
+
+ auto ch = store->open_collection(coll);
+ if (debug)
+ cerr << "\tattrs: len " << as.data.size() << std::endl;
+ t->setattrs(coll, hoid, as.data);
+
+ // This could have been handled in the caller if we didn't need to
+ // support exports that didn't include object_info_t in object_begin.
+ if (hoid.generation == ghobject_t::NO_GEN &&
+ hoid.hobj.is_head()) {
+ map<string,bufferlist>::iterator mi = as.data.find(SS_ATTR);
+ if (mi != as.data.end()) {
+ SnapSet snapset;
+ auto p = mi->second.cbegin();
+ snapset.decode(p);
+ cout << "snapset " << snapset << std::endl;
+ for (auto& p : snapset.clone_snaps) {
+ ghobject_t clone = hoid;
+ clone.hobj.snap = p.first;
+ set<snapid_t> snaps(p.second.begin(), p.second.end());
+ if (!store->exists(ch, clone)) {
+ // no clone, skip. this is probably a cache pool. this works
+ // because we use a separate transaction per object and clones
+ // come before head in the archive.
+ if (debug)
+ cerr << "\tskipping missing " << clone << " (snaps "
+ << snaps << ")" << std::endl;
+ continue;
+ }
+ if (debug)
+ cerr << "\tsetting " << clone.hobj << " snaps " << snaps
+ << std::endl;
+ OSDriver::OSTransaction _t(driver.get_transaction(t));
+ ceph_assert(!snaps.empty());
+ snap_mapper.add_oid(clone.hobj, snaps, &_t);
+ }
+ } else {
+ cerr << "missing SS_ATTR on " << hoid << std::endl;
+ }
+ }
+ return 0;
+}
+
+int dump_omap_hdr(Formatter *formatter, bufferlist &bl)
+{
+ auto ebliter = bl.cbegin();
+ omap_hdr_section oh;
+ oh.decode(ebliter);
+
+ formatter->open_object_section("omap_header");
+ formatter->dump_string("value", string(oh.hdr.c_str(), oh.hdr.length()));
+ formatter->close_section();
+ formatter->flush(cout);
+ return 0;
+}
+
+int get_omap_hdr(ObjectStore *store, coll_t coll, ghobject_t hoid,
+ ObjectStore::Transaction *t, bufferlist &bl)
+{
+ auto ebliter = bl.cbegin();
+ omap_hdr_section oh;
+ oh.decode(ebliter);
+
+ if (debug)
+ cerr << "\tomap header: " << string(oh.hdr.c_str(), oh.hdr.length())
+ << std::endl;
+ t->omap_setheader(coll, hoid, oh.hdr);
+ return 0;
+}
+
+int dump_omap(Formatter *formatter, bufferlist &bl)
+{
+ auto ebliter = bl.cbegin();
+ omap_section os;
+ os.decode(ebliter);
+
+ formatter->open_object_section("omaps");
+ formatter->dump_unsigned("count", os.omap.size());
+ formatter->open_array_section("data");
+ for (auto o : os.omap) {
+ formatter->open_object_section("omap");
+ formatter->dump_string("name", o.first);
+ bool b64;
+ formatter->dump_string("value", cleanbin(o.second, b64));
+ formatter->dump_bool("Base64", b64);
+ formatter->close_section();
+ }
+ formatter->close_section();
+ formatter->close_section();
+ formatter->flush(cout);
+ return 0;
+}
+
+int get_omap(ObjectStore *store, coll_t coll, ghobject_t hoid,
+ ObjectStore::Transaction *t, bufferlist &bl)
+{
+ auto ebliter = bl.cbegin();
+ omap_section os;
+ os.decode(ebliter);
+
+ if (debug)
+ cerr << "\tomap: size " << os.omap.size() << std::endl;
+ t->omap_setkeys(coll, hoid, os.omap);
+ return 0;
+}
+
+int ObjectStoreTool::dump_object(Formatter *formatter,
+ bufferlist &bl)
+{
+ auto ebliter = bl.cbegin();
+ object_begin ob;
+ ob.decode(ebliter);
+
+ if (ob.hoid.hobj.is_temp()) {
+ cerr << "ERROR: Export contains temporary object '" << ob.hoid << "'" << std::endl;
+ return -EFAULT;
+ }
+
+ formatter->open_object_section("object");
+ formatter->open_object_section("oid");
+ ob.hoid.dump(formatter);
+ formatter->close_section();
+ formatter->open_object_section("object_info");
+ ob.oi.dump(formatter);
+ formatter->close_section();
+
+ bufferlist ebl;
+ bool done = false;
+ while(!done) {
+ sectiontype_t type;
+ int ret = read_section(&type, &ebl);
+ if (ret)
+ return ret;
+
+ //cout << "\tdo_object: Section type " << hex << type << dec << std::endl;
+ //cout << "\t\tsection size " << ebl.length() << std::endl;
+ if (type >= END_OF_TYPES) {
+ cout << "Skipping unknown object section type" << std::endl;
+ continue;
+ }
+ switch(type) {
+ case TYPE_DATA:
+ if (dry_run) break;
+ ret = dump_data(formatter, ebl);
+ if (ret) return ret;
+ break;
+ case TYPE_ATTRS:
+ if (dry_run) break;
+ ret = dump_attrs(formatter, ob.hoid, ebl);
+ if (ret) return ret;
+ break;
+ case TYPE_OMAP_HDR:
+ if (dry_run) break;
+ ret = dump_omap_hdr(formatter, ebl);
+ if (ret) return ret;
+ break;
+ case TYPE_OMAP:
+ if (dry_run) break;
+ ret = dump_omap(formatter, ebl);
+ if (ret) return ret;
+ break;
+ case TYPE_OBJECT_END:
+ done = true;
+ break;
+ default:
+ cerr << "Unknown section type " << type << std::endl;
+ return -EFAULT;
+ }
+ }
+ formatter->close_section();
+ return 0;
+}
+
+int ObjectStoreTool::get_object(ObjectStore *store,
+ OSDriver& driver,
+ SnapMapper& mapper,
+ coll_t coll,
+ bufferlist &bl, OSDMap &origmap,
+ bool *skipped_objects)
+{
+ ObjectStore::Transaction tran;
+ ObjectStore::Transaction *t = &tran;
+ auto ebliter = bl.cbegin();
+ object_begin ob;
+ ob.decode(ebliter);
+
+ if (ob.hoid.hobj.is_temp()) {
+ cerr << "ERROR: Export contains temporary object '" << ob.hoid << "'" << std::endl;
+ return -EFAULT;
+ }
+ ceph_assert(g_ceph_context);
+
+ auto ch = store->open_collection(coll);
+ if (ob.hoid.hobj.nspace != g_ceph_context->_conf->osd_hit_set_namespace) {
+ object_t oid = ob.hoid.hobj.oid;
+ object_locator_t loc(ob.hoid.hobj);
+ pg_t raw_pgid = origmap.object_locator_to_pg(oid, loc);
+ pg_t pgid = origmap.raw_pg_to_pg(raw_pgid);
+
+ spg_t coll_pgid;
+ if (coll.is_pg(&coll_pgid) == false) {
+ cerr << "INTERNAL ERROR: Bad collection during import" << std::endl;
+ return -EFAULT;
+ }
+ if (coll_pgid.shard != ob.hoid.shard_id) {
+ cerr << "INTERNAL ERROR: Importing shard " << coll_pgid.shard
+ << " but object shard is " << ob.hoid.shard_id << std::endl;
+ return -EFAULT;
+ }
+
+ if (coll_pgid.pgid != pgid) {
+ cerr << "Skipping object '" << ob.hoid << "' which belongs in pg " << pgid << std::endl;
+ *skipped_objects = true;
+ skip_object(bl);
+ return 0;
+ }
+ }
+
+ if (!dry_run)
+ t->touch(coll, ob.hoid);
+
+ cout << "Write " << ob.hoid << std::endl;
+
+ bufferlist ebl;
+ bool done = false;
+ while(!done) {
+ sectiontype_t type;
+ int ret = read_section(&type, &ebl);
+ if (ret)
+ return ret;
+
+ //cout << "\tdo_object: Section type " << hex << type << dec << std::endl;
+ //cout << "\t\tsection size " << ebl.length() << std::endl;
+ if (type >= END_OF_TYPES) {
+ cout << "Skipping unknown object section type" << std::endl;
+ continue;
+ }
+ switch(type) {
+ case TYPE_DATA:
+ if (dry_run) break;
+ ret = get_data(store, coll, ob.hoid, t, ebl);
+ if (ret) return ret;
+ break;
+ case TYPE_ATTRS:
+ if (dry_run) break;
+ ret = get_attrs(store, coll, ob.hoid, t, ebl, driver, mapper);
+ if (ret) return ret;
+ break;
+ case TYPE_OMAP_HDR:
+ if (dry_run) break;
+ ret = get_omap_hdr(store, coll, ob.hoid, t, ebl);
+ if (ret) return ret;
+ break;
+ case TYPE_OMAP:
+ if (dry_run) break;
+ ret = get_omap(store, coll, ob.hoid, t, ebl);
+ if (ret) return ret;
+ break;
+ case TYPE_OBJECT_END:
+ done = true;
+ break;
+ default:
+ cerr << "Unknown section type " << type << std::endl;
+ return -EFAULT;
+ }
+ }
+ if (!dry_run) {
+ wait_until_done(t, [&] {
+ store->queue_transaction(ch, std::move(*t));
+ ch->flush();
+ });
+ }
+ return 0;
+}
+
+int dump_pg_metadata(Formatter *formatter, bufferlist &bl, metadata_section &ms)
+{
+ auto ebliter = bl.cbegin();
+ ms.decode(ebliter);
+
+ formatter->open_object_section("metadata_section");
+
+ formatter->dump_unsigned("pg_disk_version", (int)ms.struct_ver);
+ formatter->dump_unsigned("map_epoch", ms.map_epoch);
+
+ formatter->open_object_section("OSDMap");
+ ms.osdmap.dump(formatter);
+ formatter->close_section();
+ formatter->flush(cout);
+ cout << std::endl;
+
+ formatter->open_object_section("info");
+ ms.info.dump(formatter);
+ formatter->close_section();
+ formatter->flush(cout);
+
+ formatter->open_object_section("log");
+ ms.log.dump(formatter);
+ formatter->close_section();
+ formatter->flush(cout);
+
+ formatter->open_object_section("pg_missing_t");
+ ms.missing.dump(formatter);
+ formatter->close_section();
+
+ // XXX: ms.past_intervals?
+
+ formatter->close_section();
+ formatter->flush(cout);
+
+ if (ms.osdmap.get_epoch() != 0 && ms.map_epoch != ms.osdmap.get_epoch()) {
+ cerr << "FATAL: Invalid OSDMap epoch in export data" << std::endl;
+ return -EFAULT;
+ }
+
+ return 0;
+}
+
+int get_pg_metadata(ObjectStore *store, bufferlist &bl, metadata_section &ms,
+ const OSDSuperblock& sb, spg_t pgid)
+{
+ auto ebliter = bl.cbegin();
+ ms.decode(ebliter);
+ spg_t old_pgid = ms.info.pgid;
+ ms.info.pgid = pgid;
+
+ if (debug) {
+ cout << "export pgid " << old_pgid << std::endl;
+ cout << "struct_v " << (int)ms.struct_ver << std::endl;
+ cout << "map epoch " << ms.map_epoch << std::endl;
+
+#ifdef DIAGNOSTIC
+ Formatter *formatter = new JSONFormatter(true);
+ formatter->open_object_section("stuff");
+
+ formatter->open_object_section("importing OSDMap");
+ ms.osdmap.dump(formatter);
+ formatter->close_section();
+ formatter->flush(cout);
+ cout << std::endl;
+
+ cout << "osd current epoch " << sb.current_epoch << std::endl;
+
+ formatter->open_object_section("info");
+ ms.info.dump(formatter);
+ formatter->close_section();
+ formatter->flush(cout);
+ cout << std::endl;
+
+ formatter->open_object_section("log");
+ ms.log.dump(formatter);
+ formatter->close_section();
+ formatter->flush(cout);
+ cout << std::endl;
+
+ formatter->close_section();
+ formatter->flush(cout);
+ cout << std::endl;
+#endif
+ }
+
+ if (ms.osdmap.get_epoch() != 0 && ms.map_epoch != ms.osdmap.get_epoch()) {
+ cerr << "FATAL: Invalid OSDMap epoch in export data" << std::endl;
+ return -EFAULT;
+ }
+
+ if (ms.map_epoch > sb.current_epoch) {
+ cerr << "ERROR: Export PG's map_epoch " << ms.map_epoch << " > OSD's epoch " << sb.current_epoch << std::endl;
+ cerr << "The OSD you are using is older than the exported PG" << std::endl;
+ cerr << "Either use another OSD or join selected OSD to cluster to update it first" << std::endl;
+ return -EINVAL;
+ }
+
+ // Old exports didn't include OSDMap
+ if (ms.osdmap.get_epoch() == 0) {
+ cerr << "WARNING: No OSDMap in old export, this is an ancient export."
+ " Not supported." << std::endl;
+ return -EINVAL;
+ }
+
+ if (ms.osdmap.get_epoch() < sb.oldest_map) {
+ cerr << "PG export's map " << ms.osdmap.get_epoch()
+ << " is older than OSD's oldest_map " << sb.oldest_map << std::endl;
+ if (!force) {
+ cerr << " pass --force to proceed anyway (with incomplete PastIntervals)"
+ << std::endl;
+ return -EINVAL;
+ }
+ }
+ if (debug) {
+ cerr << "Import pgid " << ms.info.pgid << std::endl;
+ cerr << "Previous past_intervals " << ms.past_intervals << std::endl;
+ cerr << "history.same_interval_since "
+ << ms.info.history.same_interval_since << std::endl;
+ }
+
+ return 0;
+}
+
+// out: pg_log_t that only has entries that apply to import_pgid using curmap
+// reject: Entries rejected from "in" are in the reject.log. Other fields not set.
+void filter_divergent_priors(spg_t import_pgid, const OSDMap &curmap,
+ const string &hit_set_namespace, const divergent_priors_t &in,
+ divergent_priors_t &out, divergent_priors_t &reject)
+{
+ out.clear();
+ reject.clear();
+
+ for (divergent_priors_t::const_iterator i = in.begin();
+ i != in.end(); ++i) {
+
+ // Reject divergent priors for temporary objects
+ if (i->second.is_temp()) {
+ reject.insert(*i);
+ continue;
+ }
+
+ if (i->second.nspace != hit_set_namespace) {
+ object_t oid = i->second.oid;
+ object_locator_t loc(i->second);
+ pg_t raw_pgid = curmap.object_locator_to_pg(oid, loc);
+ pg_t pgid = curmap.raw_pg_to_pg(raw_pgid);
+
+ if (import_pgid.pgid == pgid) {
+ out.insert(*i);
+ } else {
+ reject.insert(*i);
+ }
+ } else {
+ out.insert(*i);
+ }
+ }
+}
+
+int ObjectStoreTool::dump_export(Formatter *formatter)
+{
+ bufferlist ebl;
+ pg_info_t info;
+ PGLog::IndexedLog log;
+ //bool skipped_objects = false;
+
+ int ret = read_super();
+ if (ret)
+ return ret;
+
+ if (sh.magic != super_header::super_magic) {
+ cerr << "Invalid magic number" << std::endl;
+ return -EFAULT;
+ }
+
+ if (sh.version > super_header::super_ver) {
+ cerr << "Can't handle export format version=" << sh.version << std::endl;
+ return -EINVAL;
+ }
+
+ formatter->open_object_section("Export");
+
+ //First section must be TYPE_PG_BEGIN
+ sectiontype_t type;
+ ret = read_section(&type, &ebl);
+ if (ret)
+ return ret;
+ if (type == TYPE_POOL_BEGIN) {
+ cerr << "Dump of pool exports not supported" << std::endl;
+ return -EINVAL;
+ } else if (type != TYPE_PG_BEGIN) {
+ cerr << "Invalid first section type " << std::to_string(type) << std::endl;
+ return -EFAULT;
+ }
+
+ auto ebliter = ebl.cbegin();
+ pg_begin pgb;
+ pgb.decode(ebliter);
+ spg_t pgid = pgb.pgid;
+
+ formatter->dump_string("pgid", stringify(pgid));
+ formatter->dump_string("cluster_fsid", stringify(pgb.superblock.cluster_fsid));
+ formatter->dump_string("features", stringify(pgb.superblock.compat_features));
+
+ bool done = false;
+ bool found_metadata = false;
+ metadata_section ms;
+ bool objects_started = false;
+ while(!done) {
+ ret = read_section(&type, &ebl);
+ if (ret)
+ return ret;
+
+ if (debug) {
+ cerr << "dump_export: Section type " << std::to_string(type) << std::endl;
+ }
+ if (type >= END_OF_TYPES) {
+ cerr << "Skipping unknown section type" << std::endl;
+ continue;
+ }
+ switch(type) {
+ case TYPE_OBJECT_BEGIN:
+ if (!objects_started) {
+ formatter->open_array_section("objects");
+ objects_started = true;
+ }
+ ret = dump_object(formatter, ebl);
+ if (ret) return ret;
+ break;
+ case TYPE_PG_METADATA:
+ if (objects_started)
+ cerr << "WARNING: metadata_section out of order" << std::endl;
+ ret = dump_pg_metadata(formatter, ebl, ms);
+ if (ret) return ret;
+ found_metadata = true;
+ break;
+ case TYPE_PG_END:
+ if (objects_started) {
+ formatter->close_section();
+ }
+ done = true;
+ break;
+ default:
+ cerr << "Unknown section type " << std::to_string(type) << std::endl;
+ return -EFAULT;
+ }
+ }
+
+ if (!found_metadata) {
+ cerr << "Missing metadata section" << std::endl;
+ return -EFAULT;
+ }
+
+ formatter->close_section();
+ formatter->flush(cout);
+
+ return 0;
+}
+
+int ObjectStoreTool::do_import(ObjectStore *store, OSDSuperblock& sb,
+ bool force, std::string pgidstr)
+{
+ bufferlist ebl;
+ pg_info_t info;
+ PGLog::IndexedLog log;
+ bool skipped_objects = false;
+
+ if (!dry_run)
+ finish_remove_pgs(store);
+
+ int ret = read_super();
+ if (ret)
+ return ret;
+
+ if (sh.magic != super_header::super_magic) {
+ cerr << "Invalid magic number" << std::endl;
+ return -EFAULT;
+ }
+
+ if (sh.version > super_header::super_ver) {
+ cerr << "Can't handle export format version=" << sh.version << std::endl;
+ return -EINVAL;
+ }
+
+ //First section must be TYPE_PG_BEGIN
+ sectiontype_t type;
+ ret = read_section(&type, &ebl);
+ if (ret)
+ return ret;
+ if (type == TYPE_POOL_BEGIN) {
+ cerr << "Pool exports cannot be imported into a PG" << std::endl;
+ return -EINVAL;
+ } else if (type != TYPE_PG_BEGIN) {
+ cerr << "Invalid first section type " << std::to_string(type) << std::endl;
+ return -EFAULT;
+ }
+
+ auto ebliter = ebl.cbegin();
+ pg_begin pgb;
+ pgb.decode(ebliter);
+ spg_t pgid = pgb.pgid;
+
+ if (pgidstr.length()) {
+ spg_t user_pgid;
+
+ bool ok = user_pgid.parse(pgidstr.c_str());
+ // This succeeded in main() already
+ ceph_assert(ok);
+ if (pgid != user_pgid) {
+ cerr << "specified pgid " << user_pgid
+ << " does not match actual pgid " << pgid << std::endl;
+ return -EINVAL;
+ }
+ }
+
+ if (!pgb.superblock.cluster_fsid.is_zero()
+ && pgb.superblock.cluster_fsid != sb.cluster_fsid) {
+ cerr << "Export came from different cluster with fsid "
+ << pgb.superblock.cluster_fsid << std::endl;
+ return -EINVAL;
+ }
+
+ if (debug) {
+ cerr << "Exported features: " << pgb.superblock.compat_features << std::endl;
+ }
+
+ // Special case: Old export has SHARDS incompat feature on replicated pg, removqqe it
+ if (pgid.is_no_shard())
+ pgb.superblock.compat_features.incompat.remove(CEPH_OSD_FEATURE_INCOMPAT_SHARDS);
+
+ if (sb.compat_features.compare(pgb.superblock.compat_features) == -1) {
+ CompatSet unsupported = sb.compat_features.unsupported(pgb.superblock.compat_features);
+
+ cerr << "Export has incompatible features set " << unsupported << std::endl;
+
+ // Let them import if they specify the --force option
+ if (!force)
+ return 11; // Positive return means exit status
+ }
+
+ // we need the latest OSDMap to check for collisions
+ OSDMap curmap;
+ bufferlist bl;
+ ret = get_osdmap(store, sb.current_epoch, curmap, bl);
+ if (ret) {
+ cerr << "Can't find latest local OSDMap " << sb.current_epoch << std::endl;
+ return ret;
+ }
+ if (!curmap.have_pg_pool(pgid.pgid.m_pool)) {
+ cerr << "Pool " << pgid.pgid.m_pool << " no longer exists" << std::endl;
+ // Special exit code for this error, used by test code
+ return 10; // Positive return means exit status
+ }
+
+ pool_pg_num_history_t pg_num_history;
+ get_pg_num_history(store, &pg_num_history);
+
+ ghobject_t pgmeta_oid = pgid.make_pgmeta_oid();
+
+ // Check for PG already present.
+ coll_t coll(pgid);
+ if (store->collection_exists(coll)) {
+ cerr << "pgid " << pgid << " already exists" << std::endl;
+ return -EEXIST;
+ }
+
+ ObjectStore::CollectionHandle ch;
+
+ OSDriver driver(
+ store,
+ coll_t(),
+ OSD::make_snapmapper_oid());
+ SnapMapper mapper(g_ceph_context, &driver, 0, 0, 0, pgid.shard);
+
+ cout << "Importing pgid " << pgid;
+ cout << std::endl;
+
+ bool done = false;
+ bool found_metadata = false;
+ metadata_section ms;
+ while(!done) {
+ ret = read_section(&type, &ebl);
+ if (ret)
+ return ret;
+
+ if (debug) {
+ cout << __func__ << ": Section type " << std::to_string(type) << std::endl;
+ }
+ if (type >= END_OF_TYPES) {
+ cout << "Skipping unknown section type" << std::endl;
+ continue;
+ }
+ switch(type) {
+ case TYPE_OBJECT_BEGIN:
+ ceph_assert(found_metadata);
+ ret = get_object(store, driver, mapper, coll, ebl, ms.osdmap,
+ &skipped_objects);
+ if (ret) return ret;
+ break;
+ case TYPE_PG_METADATA:
+ ret = get_pg_metadata(store, ebl, ms, sb, pgid);
+ if (ret) return ret;
+ found_metadata = true;
+
+ if (pgid != ms.info.pgid) {
+ cerr << "specified pgid " << pgid << " does not match import file pgid "
+ << ms.info.pgid << std::endl;
+ return -EINVAL;
+ }
+
+ // make sure there are no conflicting splits or merges
+ if (ms.osdmap.have_pg_pool(pgid.pgid.pool())) {
+ auto p = pg_num_history.pg_nums.find(pgid.pgid.m_pool);
+ if (p != pg_num_history.pg_nums.end() &&
+ !p->second.empty()) {
+ unsigned start_pg_num = ms.osdmap.get_pg_num(pgid.pgid.pool());
+ unsigned pg_num = start_pg_num;
+ for (auto q = p->second.lower_bound(ms.map_epoch);
+ q != p->second.end();
+ ++q) {
+ unsigned new_pg_num = q->second;
+ cout << "pool " << pgid.pgid.pool() << " pg_num " << pg_num
+ << " -> " << new_pg_num << std::endl;
+
+ // check for merge target
+ spg_t target;
+ if (pgid.is_merge_source(pg_num, new_pg_num, &target)) {
+ // FIXME: this checks assumes the OSD's PG is at the OSD's
+ // map epoch; it could be, say, at *our* epoch, pre-merge.
+ coll_t coll(target);
+ if (store->collection_exists(coll)) {
+ cerr << "pgid " << pgid << " merges to target " << target
+ << " which already exists" << std::endl;
+ return 12;
+ }
+ }
+
+ // check for split children
+ set<spg_t> children;
+ if (pgid.is_split(start_pg_num, new_pg_num, &children)) {
+ cerr << " children are " << children << std::endl;
+ for (auto child : children) {
+ coll_t coll(child);
+ if (store->collection_exists(coll)) {
+ cerr << "pgid " << pgid << " splits to " << children
+ << " and " << child << " exists" << std::endl;
+ return 12;
+ }
+ }
+ }
+ pg_num = new_pg_num;
+ }
+ }
+ } else {
+ cout << "pool " << pgid.pgid.pool() << " doesn't existing, not checking"
+ << " for splits or mergers" << std::endl;
+ }
+
+ if (!dry_run) {
+ ObjectStore::Transaction t;
+ ch = store->create_new_collection(coll);
+ PG::_create(
+ t, pgid,
+ pgid.get_split_bits(ms.osdmap.get_pg_pool(pgid.pool())->get_pg_num()));
+ PG::_init(t, pgid, NULL);
+
+ // mark this coll for removal until we're done
+ map<string,bufferlist> values;
+ encode((char)1, values["_remove"]);
+ t.omap_setkeys(coll, pgid.make_pgmeta_oid(), values);
+
+ store->queue_transaction(ch, std::move(t));
+ }
+
+ break;
+ case TYPE_PG_END:
+ ceph_assert(found_metadata);
+ done = true;
+ break;
+ default:
+ cerr << "Unknown section type " << std::to_string(type) << std::endl;
+ return -EFAULT;
+ }
+ }
+
+ if (!found_metadata) {
+ cerr << "Missing metadata section" << std::endl;
+ return -EFAULT;
+ }
+
+ ObjectStore::Transaction t;
+ if (!dry_run) {
+ pg_log_t newlog, reject;
+ pg_log_t::filter_log(pgid, ms.osdmap, g_ceph_context->_conf->osd_hit_set_namespace,
+ ms.log, newlog, reject);
+ if (debug) {
+ for (list<pg_log_entry_t>::iterator i = newlog.log.begin();
+ i != newlog.log.end(); ++i)
+ cerr << "Keeping log entry " << *i << std::endl;
+ for (list<pg_log_entry_t>::iterator i = reject.log.begin();
+ i != reject.log.end(); ++i)
+ cerr << "Skipping log entry " << *i << std::endl;
+ }
+
+ divergent_priors_t newdp, rejectdp;
+ filter_divergent_priors(pgid, ms.osdmap, g_ceph_context->_conf->osd_hit_set_namespace,
+ ms.divergent_priors, newdp, rejectdp);
+ ms.divergent_priors = newdp;
+ if (debug) {
+ for (divergent_priors_t::iterator i = newdp.begin();
+ i != newdp.end(); ++i)
+ cerr << "Keeping divergent_prior " << *i << std::endl;
+ for (divergent_priors_t::iterator i = rejectdp.begin();
+ i != rejectdp.end(); ++i)
+ cerr << "Skipping divergent_prior " << *i << std::endl;
+ }
+
+ ms.missing.filter_objects([&](const hobject_t &obj) {
+ if (obj.nspace == g_ceph_context->_conf->osd_hit_set_namespace)
+ return false;
+ ceph_assert(!obj.is_temp());
+ object_t oid = obj.oid;
+ object_locator_t loc(obj);
+ pg_t raw_pgid = ms.osdmap.object_locator_to_pg(oid, loc);
+ pg_t _pgid = ms.osdmap.raw_pg_to_pg(raw_pgid);
+
+ return pgid.pgid != _pgid;
+ });
+
+
+ if (debug) {
+ pg_missing_t missing;
+ Formatter *formatter = Formatter::create("json-pretty");
+ dump_log(formatter, cerr, newlog, ms.missing);
+ delete formatter;
+ }
+
+ // Just like a split invalidate stats since the object count is changed
+ if (skipped_objects)
+ ms.info.stats.stats_invalid = true;
+
+ ret = write_pg(
+ t,
+ ms.map_epoch,
+ ms.info,
+ newlog,
+ ms.past_intervals,
+ ms.divergent_priors,
+ ms.missing);
+ if (ret) return ret;
+ }
+
+ // done, clear removal flag
+ if (debug)
+ cerr << "done, clearing removal flag" << std::endl;
+
+ if (!dry_run) {
+ set<string> remove;
+ remove.insert("_remove");
+ t.omap_rmkeys(coll, pgid.make_pgmeta_oid(), remove);
+ wait_until_done(&t, [&] {
+ store->queue_transaction(ch, std::move(t));
+ // make sure we flush onreadable items before mapper/driver are destroyed.
+ ch->flush();
+ });
+ }
+ return 0;
+}
+
+int do_list(ObjectStore *store, string pgidstr, string object, boost::optional<std::string> nspace,
+ Formatter *formatter, bool debug, bool human_readable, bool head)
+{
+ int r;
+ lookup_ghobject lookup(object, nspace, head);
+ if (pgidstr.length() > 0) {
+ r = action_on_all_objects_in_pg(store, pgidstr, lookup, debug);
+ } else {
+ r = action_on_all_objects(store, lookup, debug);
+ }
+ if (r)
+ return r;
+ lookup.dump(formatter, human_readable);
+ formatter->flush(cout);
+ return 0;
+}
+
+int do_meta(ObjectStore *store, string object, Formatter *formatter, bool debug, bool human_readable)
+{
+ int r;
+ boost::optional<std::string> nspace; // Not specified
+ lookup_ghobject lookup(object, nspace);
+ r = action_on_all_objects_in_exact_pg(store, coll_t::meta(), lookup, debug);
+ if (r)
+ return r;
+ lookup.dump(formatter, human_readable);
+ formatter->flush(cout);
+ return 0;
+}
+
+enum rmtype {
+ BOTH,
+ SNAPMAP,
+ NOSNAPMAP
+};
+
+int remove_object(coll_t coll, ghobject_t &ghobj,
+ SnapMapper &mapper,
+ MapCacher::Transaction<std::string, bufferlist> *_t,
+ ObjectStore::Transaction *t,
+ enum rmtype type)
+{
+ if (type == BOTH || type == SNAPMAP) {
+ int r = mapper.remove_oid(ghobj.hobj, _t);
+ if (r < 0 && r != -ENOENT) {
+ cerr << "remove_oid returned " << cpp_strerror(r) << std::endl;
+ return r;
+ }
+ }
+
+ if (type == BOTH || type == NOSNAPMAP) {
+ t->remove(coll, ghobj);
+ }
+ return 0;
+}
+
+int get_snapset(ObjectStore *store, coll_t coll, ghobject_t &ghobj, SnapSet &ss, bool silent);
+
+int do_remove_object(ObjectStore *store, coll_t coll,
+ ghobject_t &ghobj, bool all, bool force, enum rmtype type)
+{
+ auto ch = store->open_collection(coll);
+ spg_t pg;
+ coll.is_pg_prefix(&pg);
+ OSDriver driver(
+ store,
+ coll_t(),
+ OSD::make_snapmapper_oid());
+ SnapMapper mapper(g_ceph_context, &driver, 0, 0, 0, pg.shard);
+ struct stat st;
+
+ int r = store->stat(ch, ghobj, &st);
+ if (r < 0) {
+ cerr << "remove: " << cpp_strerror(r) << std::endl;
+ return r;
+ }
+
+ SnapSet ss;
+ if (ghobj.hobj.has_snapset()) {
+ r = get_snapset(store, coll, ghobj, ss, false);
+ if (r < 0) {
+ cerr << "Can't get snapset error " << cpp_strerror(r) << std::endl;
+ // If --force and bad snapset let them remove the head
+ if (!(force && !all))
+ return r;
+ }
+ if (!ss.snaps.empty() && !all) {
+ if (force) {
+ cout << "WARNING: only removing "
+ << (ghobj.hobj.is_head() ? "head" : "snapdir")
+ << " with snapshots present" << std::endl;
+ ss.snaps.clear();
+ } else {
+ cerr << "Snapshots are present, use removeall to delete everything" << std::endl;
+ return -EINVAL;
+ }
+ }
+ }
+
+ ObjectStore::Transaction t;
+ OSDriver::OSTransaction _t(driver.get_transaction(&t));
+
+ ghobject_t snapobj = ghobj;
+ for (vector<snapid_t>::iterator i = ss.snaps.begin() ;
+ i != ss.snaps.end() ; ++i) {
+ snapobj.hobj.snap = *i;
+ cout << "remove " << snapobj << std::endl;
+ if (!dry_run) {
+ r = remove_object(coll, snapobj, mapper, &_t, &t, type);
+ if (r < 0)
+ return r;
+ }
+ }
+
+ cout << "remove " << ghobj << std::endl;
+
+ if (!dry_run) {
+ r = remove_object(coll, ghobj, mapper, &_t, &t, type);
+ if (r < 0)
+ return r;
+ }
+
+ if (!dry_run) {
+ wait_until_done(&t, [&] {
+ store->queue_transaction(ch, std::move(t));
+ ch->flush();
+ });
+ }
+ return 0;
+}
+
+int do_list_attrs(ObjectStore *store, coll_t coll, ghobject_t &ghobj)
+{
+ auto ch = store->open_collection(coll);
+ map<string,bufferptr> aset;
+ int r = store->getattrs(ch, ghobj, aset);
+ if (r < 0) {
+ cerr << "getattrs: " << cpp_strerror(r) << std::endl;
+ return r;
+ }
+
+ for (map<string,bufferptr>::iterator i = aset.begin();i != aset.end(); ++i) {
+ string key(i->first);
+ if (outistty)
+ key = cleanbin(key);
+ cout << key << std::endl;
+ }
+ return 0;
+}
+
+int do_list_omap(ObjectStore *store, coll_t coll, ghobject_t &ghobj)
+{
+ auto ch = store->open_collection(coll);
+ ObjectMap::ObjectMapIterator iter = store->get_omap_iterator(ch, ghobj);
+ if (!iter) {
+ cerr << "omap_get_iterator: " << cpp_strerror(ENOENT) << std::endl;
+ return -ENOENT;
+ }
+ iter->seek_to_first();
+ map<string, bufferlist> oset;
+ while(iter->valid()) {
+ get_omap_batch(iter, oset);
+
+ for (map<string,bufferlist>::iterator i = oset.begin();i != oset.end(); ++i) {
+ string key(i->first);
+ if (outistty)
+ key = cleanbin(key);
+ cout << key << std::endl;
+ }
+ }
+ return 0;
+}
+
+int do_get_bytes(ObjectStore *store, coll_t coll, ghobject_t &ghobj, int fd)
+{
+ auto ch = store->open_collection(coll);
+ struct stat st;
+ mysize_t total;
+
+ int ret = store->stat(ch, ghobj, &st);
+ if (ret < 0) {
+ cerr << "get-bytes: " << cpp_strerror(ret) << std::endl;
+ return ret;
+ }
+
+ total = st.st_size;
+ if (debug)
+ cerr << "size=" << total << std::endl;
+
+ uint64_t offset = 0;
+ bufferlist rawdatabl;
+ while(total > 0) {
+ rawdatabl.clear();
+ mysize_t len = max_read;
+ if (len > total)
+ len = total;
+
+ ret = store->read(ch, ghobj, offset, len, rawdatabl);
+ if (ret < 0)
+ return ret;
+ if (ret == 0)
+ return -EINVAL;
+
+ if (debug)
+ cerr << "data section offset=" << offset << " len=" << len << std::endl;
+
+ total -= ret;
+ offset += ret;
+
+ ret = write(fd, rawdatabl.c_str(), ret);
+ if (ret == -1) {
+ perror("write");
+ return -errno;
+ }
+ }
+
+ return 0;
+}
+
+int do_set_bytes(ObjectStore *store, coll_t coll,
+ ghobject_t &ghobj, int fd)
+{
+ ObjectStore::Transaction tran;
+ ObjectStore::Transaction *t = &tran;
+
+ if (debug)
+ cerr << "Write " << ghobj << std::endl;
+
+ if (!dry_run) {
+ t->touch(coll, ghobj);
+ t->truncate(coll, ghobj, 0);
+ }
+
+ uint64_t offset = 0;
+ bufferlist rawdatabl;
+ do {
+ rawdatabl.clear();
+ ssize_t bytes = rawdatabl.read_fd(fd, max_read);
+ if (bytes < 0) {
+ cerr << "read_fd error " << cpp_strerror(bytes) << std::endl;
+ return bytes;
+ }
+
+ if (bytes == 0)
+ break;
+
+ if (debug)
+ cerr << "\tdata: offset " << offset << " bytes " << bytes << std::endl;
+ if (!dry_run)
+ t->write(coll, ghobj, offset, bytes, rawdatabl);
+
+ offset += bytes;
+ // XXX: Should we queue_transaction() every once in a while for very large files
+ } while(true);
+
+ auto ch = store->open_collection(coll);
+ if (!dry_run)
+ store->queue_transaction(ch, std::move(*t));
+ return 0;
+}
+
+int do_get_attr(ObjectStore *store, coll_t coll, ghobject_t &ghobj, string key)
+{
+ auto ch = store->open_collection(coll);
+ bufferptr bp;
+
+ int r = store->getattr(ch, ghobj, key.c_str(), bp);
+ if (r < 0) {
+ cerr << "getattr: " << cpp_strerror(r) << std::endl;
+ return r;
+ }
+
+ string value(bp.c_str(), bp.length());
+ if (outistty) {
+ value = cleanbin(value);
+ value.push_back('\n');
+ }
+ cout << value;
+
+ return 0;
+}
+
+int do_set_attr(ObjectStore *store, coll_t coll,
+ ghobject_t &ghobj, string key, int fd)
+{
+ ObjectStore::Transaction tran;
+ ObjectStore::Transaction *t = &tran;
+ bufferlist bl;
+
+ if (debug)
+ cerr << "Setattr " << ghobj << std::endl;
+
+ int ret = get_fd_data(fd, bl);
+ if (ret < 0)
+ return ret;
+
+ if (dry_run)
+ return 0;
+
+ t->touch(coll, ghobj);
+
+ t->setattr(coll, ghobj, key, bl);
+
+ auto ch = store->open_collection(coll);
+ store->queue_transaction(ch, std::move(*t));
+ return 0;
+}
+
+int do_rm_attr(ObjectStore *store, coll_t coll,
+ ghobject_t &ghobj, string key)
+{
+ ObjectStore::Transaction tran;
+ ObjectStore::Transaction *t = &tran;
+
+ if (debug)
+ cerr << "Rmattr " << ghobj << std::endl;
+
+ if (dry_run)
+ return 0;
+
+ t->rmattr(coll, ghobj, key);
+
+ auto ch = store->open_collection(coll);
+ store->queue_transaction(ch, std::move(*t));
+ return 0;
+}
+
+int do_get_omap(ObjectStore *store, coll_t coll, ghobject_t &ghobj, string key)
+{
+ auto ch = store->open_collection(coll);
+ set<string> keys;
+ map<string, bufferlist> out;
+
+ keys.insert(key);
+
+ int r = store->omap_get_values(ch, ghobj, keys, &out);
+ if (r < 0) {
+ cerr << "omap_get_values: " << cpp_strerror(r) << std::endl;
+ return r;
+ }
+
+ if (out.empty()) {
+ cerr << "Key not found" << std::endl;
+ return -ENOENT;
+ }
+
+ ceph_assert(out.size() == 1);
+
+ bufferlist bl = out.begin()->second;
+ string value(bl.c_str(), bl.length());
+ if (outistty) {
+ value = cleanbin(value);
+ value.push_back('\n');
+ }
+ cout << value;
+
+ return 0;
+}
+
+int do_set_omap(ObjectStore *store, coll_t coll,
+ ghobject_t &ghobj, string key, int fd)
+{
+ ObjectStore::Transaction tran;
+ ObjectStore::Transaction *t = &tran;
+ map<string, bufferlist> attrset;
+ bufferlist valbl;
+
+ if (debug)
+ cerr << "Set_omap " << ghobj << std::endl;
+
+ int ret = get_fd_data(fd, valbl);
+ if (ret < 0)
+ return ret;
+
+ attrset.insert(pair<string, bufferlist>(key, valbl));
+
+ if (dry_run)
+ return 0;
+
+ t->touch(coll, ghobj);
+
+ t->omap_setkeys(coll, ghobj, attrset);
+
+ auto ch = store->open_collection(coll);
+ store->queue_transaction(ch, std::move(*t));
+ return 0;
+}
+
+int do_rm_omap(ObjectStore *store, coll_t coll,
+ ghobject_t &ghobj, string key)
+{
+ ObjectStore::Transaction tran;
+ ObjectStore::Transaction *t = &tran;
+ set<string> keys;
+
+ keys.insert(key);
+
+ if (debug)
+ cerr << "Rm_omap " << ghobj << std::endl;
+
+ if (dry_run)
+ return 0;
+
+ t->omap_rmkeys(coll, ghobj, keys);
+
+ auto ch = store->open_collection(coll);
+ store->queue_transaction(ch, std::move(*t));
+ return 0;
+}
+
+int do_get_omaphdr(ObjectStore *store, coll_t coll, ghobject_t &ghobj)
+{
+ auto ch = store->open_collection(coll);
+ bufferlist hdrbl;
+
+ int r = store->omap_get_header(ch, ghobj, &hdrbl, true);
+ if (r < 0) {
+ cerr << "omap_get_header: " << cpp_strerror(r) << std::endl;
+ return r;
+ }
+
+ string header(hdrbl.c_str(), hdrbl.length());
+ if (outistty) {
+ header = cleanbin(header);
+ header.push_back('\n');
+ }
+ cout << header;
+
+ return 0;
+}
+
+int do_set_omaphdr(ObjectStore *store, coll_t coll,
+ ghobject_t &ghobj, int fd)
+{
+ ObjectStore::Transaction tran;
+ ObjectStore::Transaction *t = &tran;
+ bufferlist hdrbl;
+
+ if (debug)
+ cerr << "Omap_setheader " << ghobj << std::endl;
+
+ int ret = get_fd_data(fd, hdrbl);
+ if (ret)
+ return ret;
+
+ if (dry_run)
+ return 0;
+
+ t->touch(coll, ghobj);
+
+ t->omap_setheader(coll, ghobj, hdrbl);
+
+ auto ch = store->open_collection(coll);
+ store->queue_transaction(ch, std::move(*t));
+ return 0;
+}
+
+struct do_fix_lost : public action_on_object_t {
+ void call(ObjectStore *store, coll_t coll,
+ ghobject_t &ghobj, object_info_t &oi) override {
+ if (oi.is_lost()) {
+ cout << coll << "/" << ghobj << " is lost";
+ if (!dry_run)
+ cout << ", fixing";
+ cout << std::endl;
+ if (dry_run)
+ return;
+ oi.clear_flag(object_info_t::FLAG_LOST);
+ bufferlist bl;
+ encode(oi, bl, -1); /* fixme: using full features */
+ ObjectStore::Transaction t;
+ t.setattr(coll, ghobj, OI_ATTR, bl);
+ auto ch = store->open_collection(coll);
+ store->queue_transaction(ch, std::move(t));
+ }
+ return;
+ }
+};
+
+int get_snapset(ObjectStore *store, coll_t coll, ghobject_t &ghobj, SnapSet &ss, bool silent = false)
+{
+ auto ch = store->open_collection(coll);
+ bufferlist attr;
+ int r = store->getattr(ch, ghobj, SS_ATTR, attr);
+ if (r < 0) {
+ if (!silent)
+ cerr << "Error getting snapset on : " << make_pair(coll, ghobj) << ", "
+ << cpp_strerror(r) << std::endl;
+ return r;
+ }
+ auto bp = attr.cbegin();
+ try {
+ decode(ss, bp);
+ } catch (...) {
+ r = -EINVAL;
+ cerr << "Error decoding snapset on : " << make_pair(coll, ghobj) << ", "
+ << cpp_strerror(r) << std::endl;
+ return r;
+ }
+ return 0;
+}
+
+int print_obj_info(ObjectStore *store, coll_t coll, ghobject_t &ghobj, Formatter* formatter)
+{
+ auto ch = store->open_collection(coll);
+ int r = 0;
+ formatter->open_object_section("obj");
+ formatter->open_object_section("id");
+ ghobj.dump(formatter);
+ formatter->close_section();
+
+ bufferlist attr;
+ int gr = store->getattr(ch, ghobj, OI_ATTR, attr);
+ if (gr < 0) {
+ r = gr;
+ cerr << "Error getting attr on : " << make_pair(coll, ghobj) << ", "
+ << cpp_strerror(r) << std::endl;
+ } else {
+ object_info_t oi;
+ auto bp = attr.cbegin();
+ try {
+ decode(oi, bp);
+ formatter->open_object_section("info");
+ oi.dump(formatter);
+ formatter->close_section();
+ } catch (...) {
+ r = -EINVAL;
+ cerr << "Error decoding attr on : " << make_pair(coll, ghobj) << ", "
+ << cpp_strerror(r) << std::endl;
+ }
+ }
+ struct stat st;
+ int sr = store->stat(ch, ghobj, &st, true);
+ if (sr < 0) {
+ r = sr;
+ cerr << "Error stat on : " << make_pair(coll, ghobj) << ", "
+ << cpp_strerror(r) << std::endl;
+ } else {
+ formatter->open_object_section("stat");
+ formatter->dump_int("size", st.st_size);
+ formatter->dump_int("blksize", st.st_blksize);
+ formatter->dump_int("blocks", st.st_blocks);
+ formatter->dump_int("nlink", st.st_nlink);
+ formatter->close_section();
+ }
+
+ if (ghobj.hobj.has_snapset()) {
+ SnapSet ss;
+ int snr = get_snapset(store, coll, ghobj, ss);
+ if (snr < 0) {
+ r = snr;
+ } else {
+ formatter->open_object_section("SnapSet");
+ ss.dump(formatter);
+ formatter->close_section();
+ }
+ }
+ bufferlist hattr;
+ gr = store->getattr(ch, ghobj, ECUtil::get_hinfo_key(), hattr);
+ if (gr == 0) {
+ ECUtil::HashInfo hinfo;
+ auto hp = hattr.cbegin();
+ try {
+ decode(hinfo, hp);
+ formatter->open_object_section("hinfo");
+ hinfo.dump(formatter);
+ formatter->close_section();
+ } catch (...) {
+ r = -EINVAL;
+ cerr << "Error decoding hinfo on : " << make_pair(coll, ghobj) << ", "
+ << cpp_strerror(r) << std::endl;
+ }
+ }
+ formatter->close_section();
+ formatter->flush(cout);
+ cout << std::endl;
+ return r;
+}
+
+int corrupt_info(ObjectStore *store, coll_t coll, ghobject_t &ghobj, Formatter* formatter)
+{
+ auto ch = store->open_collection(coll);
+ bufferlist attr;
+ int r = store->getattr(ch, ghobj, OI_ATTR, attr);
+ if (r < 0) {
+ cerr << "Error getting attr on : " << make_pair(coll, ghobj) << ", "
+ << cpp_strerror(r) << std::endl;
+ return r;
+ }
+ object_info_t oi;
+ auto bp = attr.cbegin();
+ try {
+ decode(oi, bp);
+ } catch (...) {
+ r = -EINVAL;
+ cerr << "Error getting attr on : " << make_pair(coll, ghobj) << ", "
+ << cpp_strerror(r) << std::endl;
+ return r;
+ }
+ if (!dry_run) {
+ attr.clear();
+ oi.alloc_hint_flags += 0xff;
+ ObjectStore::Transaction t;
+ encode(oi, attr, -1); /* fixme: using full features */
+ t.setattr(coll, ghobj, OI_ATTR, attr);
+ auto ch = store->open_collection(coll);
+ r = store->queue_transaction(ch, std::move(t));
+ if (r < 0) {
+ cerr << "Error writing object info: " << make_pair(coll, ghobj) << ", "
+ << cpp_strerror(r) << std::endl;
+ return r;
+ }
+ }
+ return 0;
+}
+
+int set_size(
+ ObjectStore *store, coll_t coll, ghobject_t &ghobj, uint64_t setsize, Formatter* formatter,
+ bool corrupt)
+{
+ auto ch = store->open_collection(coll);
+ if (ghobj.hobj.is_snapdir()) {
+ cerr << "Can't set the size of a snapdir" << std::endl;
+ return -EINVAL;
+ }
+ bufferlist attr;
+ int r = store->getattr(ch, ghobj, OI_ATTR, attr);
+ if (r < 0) {
+ cerr << "Error getting attr on : " << make_pair(coll, ghobj) << ", "
+ << cpp_strerror(r) << std::endl;
+ return r;
+ }
+ object_info_t oi;
+ auto bp = attr.cbegin();
+ try {
+ decode(oi, bp);
+ } catch (...) {
+ r = -EINVAL;
+ cerr << "Error getting attr on : " << make_pair(coll, ghobj) << ", "
+ << cpp_strerror(r) << std::endl;
+ return r;
+ }
+ struct stat st;
+ r = store->stat(ch, ghobj, &st, true);
+ if (r < 0) {
+ cerr << "Error stat on : " << make_pair(coll, ghobj) << ", "
+ << cpp_strerror(r) << std::endl;
+ }
+ ghobject_t head(ghobj);
+ SnapSet ss;
+ bool found_head = true;
+ map<snapid_t, uint64_t>::iterator csi;
+ bool is_snap = ghobj.hobj.is_snap();
+ if (is_snap) {
+ head.hobj = head.hobj.get_head();
+ r = get_snapset(store, coll, head, ss, true);
+ if (r < 0 && r != -ENOENT) {
+ // Requested get_snapset() silent, so if not -ENOENT show error
+ cerr << "Error getting snapset on : " << make_pair(coll, head) << ", "
+ << cpp_strerror(r) << std::endl;
+ return r;
+ }
+ if (r == -ENOENT) {
+ head.hobj = head.hobj.get_snapdir();
+ r = get_snapset(store, coll, head, ss);
+ if (r < 0)
+ return r;
+ found_head = false;
+ } else {
+ found_head = true;
+ }
+ csi = ss.clone_size.find(ghobj.hobj.snap);
+ if (csi == ss.clone_size.end()) {
+ cerr << "SnapSet is missing clone_size for snap " << ghobj.hobj.snap << std::endl;
+ return -EINVAL;
+ }
+ }
+ if ((uint64_t)st.st_size == setsize && oi.size == setsize
+ && (!is_snap || csi->second == setsize)) {
+ cout << "Size of object is already " << setsize << std::endl;
+ return 0;
+ }
+ cout << "Setting size to " << setsize << ", stat size " << st.st_size
+ << ", obj info size " << oi.size;
+ if (is_snap) {
+ cout << ", " << (found_head ? "head" : "snapdir")
+ << " clone_size " << csi->second;
+ csi->second = setsize;
+ }
+ cout << std::endl;
+ if (!dry_run) {
+ attr.clear();
+ oi.size = setsize;
+ ObjectStore::Transaction t;
+ // Only modify object info if we want to corrupt it
+ if (!corrupt && (uint64_t)st.st_size != setsize) {
+ t.truncate(coll, ghobj, setsize);
+ // Changing objectstore size will invalidate data_digest, so clear it.
+ oi.clear_data_digest();
+ }
+ encode(oi, attr, -1); /* fixme: using full features */
+ t.setattr(coll, ghobj, OI_ATTR, attr);
+ if (is_snap) {
+ bufferlist snapattr;
+ snapattr.clear();
+ encode(ss, snapattr);
+ t.setattr(coll, head, SS_ATTR, snapattr);
+ }
+ auto ch = store->open_collection(coll);
+ r = store->queue_transaction(ch, std::move(t));
+ if (r < 0) {
+ cerr << "Error writing object info: " << make_pair(coll, ghobj) << ", "
+ << cpp_strerror(r) << std::endl;
+ return r;
+ }
+ }
+ return 0;
+}
+
+int clear_data_digest(ObjectStore *store, coll_t coll, ghobject_t &ghobj) {
+ auto ch = store->open_collection(coll);
+ bufferlist attr;
+ int r = store->getattr(ch, ghobj, OI_ATTR, attr);
+ if (r < 0) {
+ cerr << "Error getting attr on : " << make_pair(coll, ghobj) << ", "
+ << cpp_strerror(r) << std::endl;
+ return r;
+ }
+ object_info_t oi;
+ auto bp = attr.cbegin();
+ try {
+ decode(oi, bp);
+ } catch (...) {
+ r = -EINVAL;
+ cerr << "Error getting attr on : " << make_pair(coll, ghobj) << ", "
+ << cpp_strerror(r) << std::endl;
+ return r;
+ }
+ if (!dry_run) {
+ attr.clear();
+ oi.clear_data_digest();
+ encode(oi, attr, -1); /* fixme: using full features */
+ ObjectStore::Transaction t;
+ t.setattr(coll, ghobj, OI_ATTR, attr);
+ auto ch = store->open_collection(coll);
+ r = store->queue_transaction(ch, std::move(t));
+ if (r < 0) {
+ cerr << "Error writing object info: " << make_pair(coll, ghobj) << ", "
+ << cpp_strerror(r) << std::endl;
+ return r;
+ }
+ }
+ return 0;
+}
+
+int clear_snapset(ObjectStore *store, coll_t coll, ghobject_t &ghobj,
+ string arg)
+{
+ SnapSet ss;
+ int ret = get_snapset(store, coll, ghobj, ss);
+ if (ret < 0)
+ return ret;
+
+ // Use "corrupt" to clear entire SnapSet
+ // Use "seq" to just corrupt SnapSet.seq
+ if (arg == "corrupt" || arg == "seq")
+ ss.seq = 0;
+ // Use "snaps" to just clear SnapSet.snaps
+ if (arg == "corrupt" || arg == "snaps")
+ ss.snaps.clear();
+ // By default just clear clone, clone_overlap and clone_size
+ if (arg == "corrupt")
+ arg = "";
+ if (arg == "" || arg == "clones")
+ ss.clones.clear();
+ if (arg == "" || arg == "clone_overlap")
+ ss.clone_overlap.clear();
+ if (arg == "" || arg == "clone_size")
+ ss.clone_size.clear();
+ // Break all clone sizes by adding 1
+ if (arg == "size") {
+ for (map<snapid_t, uint64_t>::iterator i = ss.clone_size.begin();
+ i != ss.clone_size.end(); ++i)
+ ++(i->second);
+ }
+
+ if (!dry_run) {
+ bufferlist bl;
+ encode(ss, bl);
+ ObjectStore::Transaction t;
+ t.setattr(coll, ghobj, SS_ATTR, bl);
+ auto ch = store->open_collection(coll);
+ int r = store->queue_transaction(ch, std::move(t));
+ if (r < 0) {
+ cerr << "Error setting snapset on : " << make_pair(coll, ghobj) << ", "
+ << cpp_strerror(r) << std::endl;
+ return r;
+ }
+ }
+ return 0;
+}
+
+vector<snapid_t>::iterator find(vector<snapid_t> &v, snapid_t clid)
+{
+ return std::find(v.begin(), v.end(), clid);
+}
+
+map<snapid_t, interval_set<uint64_t> >::iterator
+find(map<snapid_t, interval_set<uint64_t> > &m, snapid_t clid)
+{
+ return m.find(clid);
+}
+
+map<snapid_t, uint64_t>::iterator find(map<snapid_t, uint64_t> &m,
+ snapid_t clid)
+{
+ return m.find(clid);
+}
+
+template<class T>
+int remove_from(T &mv, string name, snapid_t cloneid, bool force)
+{
+ typename T::iterator i = find(mv, cloneid);
+ if (i != mv.end()) {
+ mv.erase(i);
+ } else {
+ cerr << "Clone " << cloneid << " doesn't exist in " << name;
+ if (force) {
+ cerr << " (ignored)" << std::endl;
+ return 0;
+ }
+ cerr << std::endl;
+ return -EINVAL;
+ }
+ return 0;
+}
+
+int remove_clone(
+ ObjectStore *store, coll_t coll, ghobject_t &ghobj, snapid_t cloneid, bool force)
+{
+ // XXX: Don't allow this if in a cache tier or former cache tier
+ // bool allow_incomplete_clones() const {
+ // return cache_mode != CACHEMODE_NONE || has_flag(FLAG_INCOMPLETE_CLONES);
+
+ SnapSet snapset;
+ int ret = get_snapset(store, coll, ghobj, snapset);
+ if (ret < 0)
+ return ret;
+
+ // Derived from trim_object()
+ // ...from snapset
+ vector<snapid_t>::iterator p;
+ for (p = snapset.clones.begin(); p != snapset.clones.end(); ++p)
+ if (*p == cloneid)
+ break;
+ if (p == snapset.clones.end()) {
+ cerr << "Clone " << cloneid << " not present";
+ return -ENOENT;
+ }
+ if (p != snapset.clones.begin()) {
+ // not the oldest... merge overlap into next older clone
+ vector<snapid_t>::iterator n = p - 1;
+ hobject_t prev_coid = ghobj.hobj;
+ prev_coid.snap = *n;
+ //bool adjust_prev_bytes = is_present_clone(prev_coid);
+
+ //if (adjust_prev_bytes)
+ // ctx->delta_stats.num_bytes -= snapset.get_clone_bytes(*n);
+
+ snapset.clone_overlap[*n].intersection_of(
+ snapset.clone_overlap[*p]);
+
+ //if (adjust_prev_bytes)
+ // ctx->delta_stats.num_bytes += snapset.get_clone_bytes(*n);
+ }
+
+ ret = remove_from(snapset.clones, "clones", cloneid, force);
+ if (ret) return ret;
+ ret = remove_from(snapset.clone_overlap, "clone_overlap", cloneid, force);
+ if (ret) return ret;
+ ret = remove_from(snapset.clone_size, "clone_size", cloneid, force);
+ if (ret) return ret;
+
+ if (dry_run)
+ return 0;
+
+ bufferlist bl;
+ encode(snapset, bl);
+ ObjectStore::Transaction t;
+ t.setattr(coll, ghobj, SS_ATTR, bl);
+ auto ch = store->open_collection(coll);
+ int r = store->queue_transaction(ch, std::move(t));
+ if (r < 0) {
+ cerr << "Error setting snapset on : " << make_pair(coll, ghobj) << ", "
+ << cpp_strerror(r) << std::endl;
+ return r;
+ }
+ cout << "Removal of clone " << cloneid << " complete" << std::endl;
+ cout << "Use pg repair after OSD restarted to correct stat information" << std::endl;
+ return 0;
+}
+
+int dup(string srcpath, ObjectStore *src, string dstpath, ObjectStore *dst)
+{
+ cout << "dup from " << src->get_type() << ": " << srcpath << "\n"
+ << " to " << dst->get_type() << ": " << dstpath
+ << std::endl;
+ int num, i;
+ vector<coll_t> collections;
+ int r;
+
+ r = src->mount();
+ if (r < 0) {
+ cerr << "failed to mount src: " << cpp_strerror(r) << std::endl;
+ return r;
+ }
+ r = dst->mount();
+ if (r < 0) {
+ cerr << "failed to mount dst: " << cpp_strerror(r) << std::endl;
+ goto out_src;
+ }
+
+ if (src->get_fsid() != dst->get_fsid()) {
+ cerr << "src fsid " << src->get_fsid() << " != dest " << dst->get_fsid()
+ << std::endl;
+ goto out;
+ }
+ cout << "fsid " << src->get_fsid() << std::endl;
+
+ // make sure dst is empty
+ r = dst->list_collections(collections);
+ if (r < 0) {
+ cerr << "error listing collections on dst: " << cpp_strerror(r) << std::endl;
+ goto out;
+ }
+ if (!collections.empty()) {
+ cerr << "destination store is not empty" << std::endl;
+ goto out;
+ }
+
+ r = src->list_collections(collections);
+ if (r < 0) {
+ cerr << "error listing collections on src: " << cpp_strerror(r) << std::endl;
+ goto out;
+ }
+
+ num = collections.size();
+ cout << num << " collections" << std::endl;
+ i = 1;
+ for (auto cid : collections) {
+ cout << i++ << "/" << num << " " << cid << std::endl;
+ auto ch = src->open_collection(cid);
+ auto dch = dst->create_new_collection(cid);
+ {
+ ObjectStore::Transaction t;
+ int bits = src->collection_bits(ch);
+ if (bits < 0) {
+ if (src->get_type() == "filestore" && cid.is_meta()) {
+ bits = 0;
+ } else {
+ cerr << "cannot get bit count for collection " << cid << ": "
+ << cpp_strerror(bits) << std::endl;
+ goto out;
+ }
+ }
+ t.create_collection(cid, bits);
+ dst->queue_transaction(dch, std::move(t));
+ }
+
+ ghobject_t pos;
+ uint64_t n = 0;
+ uint64_t bytes = 0, keys = 0;
+ while (true) {
+ vector<ghobject_t> ls;
+ r = src->collection_list(ch, pos, ghobject_t::get_max(), 1000, &ls, &pos);
+ if (r < 0) {
+ cerr << "collection_list on " << cid << " from " << pos << " got: "
+ << cpp_strerror(r) << std::endl;
+ goto out;
+ }
+ if (ls.empty()) {
+ break;
+ }
+
+ for (auto& oid : ls) {
+ //cout << " " << cid << " " << oid << std::endl;
+ if (n % 100 == 0) {
+ cout << " " << std::setw(16) << n << " objects, "
+ << std::setw(16) << bytes << " bytes, "
+ << std::setw(16) << keys << " keys"
+ << std::setw(1) << "\r" << std::flush;
+ }
+ n++;
+
+ ObjectStore::Transaction t;
+ t.touch(cid, oid);
+
+ map<string,bufferptr> attrs;
+ src->getattrs(ch, oid, attrs);
+ if (!attrs.empty()) {
+ t.setattrs(cid, oid, attrs);
+ }
+
+ bufferlist bl;
+ src->read(ch, oid, 0, 0, bl);
+ if (bl.length()) {
+ t.write(cid, oid, 0, bl.length(), bl);
+ bytes += bl.length();
+ }
+
+ bufferlist header;
+ map<string,bufferlist> omap;
+ src->omap_get(ch, oid, &header, &omap);
+ if (header.length()) {
+ t.omap_setheader(cid, oid, header);
+ ++keys;
+ }
+ if (!omap.empty()) {
+ keys += omap.size();
+ t.omap_setkeys(cid, oid, omap);
+ }
+
+ dst->queue_transaction(dch, std::move(t));
+ }
+ }
+ cout << " " << std::setw(16) << n << " objects, "
+ << std::setw(16) << bytes << " bytes, "
+ << std::setw(16) << keys << " keys"
+ << std::setw(1) << std::endl;
+ }
+
+ // keyring
+ cout << "keyring" << std::endl;
+ {
+ bufferlist bl;
+ string s = srcpath + "/keyring";
+ string err;
+ r = bl.read_file(s.c_str(), &err);
+ if (r < 0) {
+ cerr << "failed to copy " << s << ": " << err << std::endl;
+ } else {
+ string d = dstpath + "/keyring";
+ bl.write_file(d.c_str(), 0600);
+ }
+ }
+
+ // osd metadata
+ cout << "duping osd metadata" << std::endl;
+ {
+ for (auto k : {"magic", "whoami", "ceph_fsid", "fsid"}) {
+ string val;
+ src->read_meta(k, &val);
+ dst->write_meta(k, val);
+ }
+ }
+
+ dst->write_meta("ready", "ready");
+
+ cout << "done." << std::endl;
+ r = 0;
+ out:
+ dst->umount();
+ out_src:
+ src->umount();
+ return r;
+}
+
+void usage(po::options_description &desc)
+{
+ cerr << std::endl;
+ cerr << desc << std::endl;
+ cerr << std::endl;
+ cerr << "Positional syntax:" << std::endl;
+ cerr << std::endl;
+ cerr << "ceph-objectstore-tool ... <object> (get|set)-bytes [file]" << std::endl;
+ cerr << "ceph-objectstore-tool ... <object> set-(attr|omap) <key> [file]" << std::endl;
+ cerr << "ceph-objectstore-tool ... <object> (get|rm)-(attr|omap) <key>" << std::endl;
+ cerr << "ceph-objectstore-tool ... <object> get-omaphdr" << std::endl;
+ cerr << "ceph-objectstore-tool ... <object> set-omaphdr [file]" << std::endl;
+ cerr << "ceph-objectstore-tool ... <object> list-attrs" << std::endl;
+ cerr << "ceph-objectstore-tool ... <object> list-omap" << std::endl;
+ cerr << "ceph-objectstore-tool ... <object> remove|removeall" << std::endl;
+ cerr << "ceph-objectstore-tool ... <object> dump" << std::endl;
+ cerr << "ceph-objectstore-tool ... <object> set-size" << std::endl;
+ cerr << "ceph-objectstore-tool ... <object> clear-data-digest" << std::endl;
+ cerr << "ceph-objectstore-tool ... <object> remove-clone-metadata <cloneid>" << std::endl;
+ cerr << std::endl;
+ cerr << "<object> can be a JSON object description as displayed" << std::endl;
+ cerr << "by --op list." << std::endl;
+ cerr << "<object> can be an object name which will be looked up in all" << std::endl;
+ cerr << "the OSD's PGs." << std::endl;
+ cerr << "<object> can be the empty string ('') which with a provided pgid " << std::endl;
+ cerr << "specifies the pgmeta object" << std::endl;
+ cerr << std::endl;
+ cerr << "The optional [file] argument will read stdin or write stdout" << std::endl;
+ cerr << "if not specified or if '-' specified." << std::endl;
+}
+
+bool ends_with(const string& check, const string& ending)
+{
+ return check.size() >= ending.size() && check.rfind(ending) == (check.size() - ending.size());
+}
+
+// Based on FileStore::dump_journal(), set-up enough to only dump
+int mydump_journal(Formatter *f, string journalpath, bool m_journal_dio)
+{
+ int r;
+
+ if (!journalpath.length())
+ return -EINVAL;
+
+ FileJournal *journal = new FileJournal(g_ceph_context, uuid_d(), NULL, NULL,
+ journalpath.c_str(), m_journal_dio);
+ r = journal->_fdump(*f, false);
+ delete journal;
+ return r;
+}
+
+int apply_layout_settings(ObjectStore *os, const OSDSuperblock &superblock,
+ const string &pool_name, const spg_t &pgid, bool dry_run,
+ int target_level)
+{
+ int r = 0;
+
+ FileStore *fs = dynamic_cast<FileStore*>(os);
+ if (!fs) {
+ cerr << "Nothing to do for non-filestore backend" << std::endl;
+ return 0; // making this return success makes testing easier
+ }
+
+ OSDMap curmap;
+ bufferlist bl;
+ r = get_osdmap(os, superblock.current_epoch, curmap, bl);
+ if (r) {
+ cerr << "Can't find local OSDMap: " << cpp_strerror(r) << std::endl;
+ return r;
+ }
+
+ int64_t poolid = -1;
+ if (pool_name.length()) {
+ poolid = curmap.lookup_pg_pool_name(pool_name);
+ if (poolid < 0) {
+ cerr << "Couldn't find pool " << pool_name << ": " << cpp_strerror(poolid)
+ << std::endl;
+ return poolid;
+ }
+ }
+
+ vector<coll_t> collections, filtered_colls;
+ r = os->list_collections(collections);
+ if (r < 0) {
+ cerr << "Error listing collections: " << cpp_strerror(r) << std::endl;
+ return r;
+ }
+
+ for (auto const &coll : collections) {
+ spg_t coll_pgid;
+ if (coll.is_pg(&coll_pgid) &&
+ ((poolid >= 0 && coll_pgid.pool() == (uint64_t)poolid) ||
+ coll_pgid == pgid)) {
+ filtered_colls.push_back(coll);
+ }
+ }
+
+ size_t done = 0, total = filtered_colls.size();
+ for (auto const &coll : filtered_colls) {
+ if (dry_run) {
+ cerr << "Would apply layout settings to " << coll << std::endl;
+ } else {
+ cerr << "Finished " << done << "/" << total << " collections" << "\r";
+ r = fs->apply_layout_settings(coll, target_level);
+ if (r < 0) {
+ cerr << "Error applying layout settings to " << coll << std::endl;
+ return r;
+ }
+ }
+ ++done;
+ }
+
+ cerr << "Finished " << total << "/" << total << " collections" << "\r" << std::endl;
+ return r;
+}
+
+int main(int argc, char **argv)
+{
+ string dpath, jpath, pgidstr, op, file, mountpoint, mon_store_path, object;
+ string target_data_path, fsid;
+ string objcmd, arg1, arg2, type, format, argnspace, pool, rmtypestr;
+ boost::optional<std::string> nspace;
+ spg_t pgid;
+ unsigned epoch = 0;
+ ghobject_t ghobj;
+ bool human_readable;
+ Formatter *formatter;
+ bool head;
+
+ po::options_description desc("Allowed options");
+ desc.add_options()
+ ("help", "produce help message")
+ ("type", po::value<string>(&type),
+ "Arg is one of [bluestore (default), filestore, memstore]")
+ ("data-path", po::value<string>(&dpath),
+ "path to object store, mandatory")
+ ("journal-path", po::value<string>(&jpath),
+ "path to journal, use if tool can't find it")
+ ("pgid", po::value<string>(&pgidstr),
+ "PG id, mandatory for info, log, remove, export, export-remove, mark-complete, trim-pg-log, and mandatory for apply-layout-settings if --pool is not specified")
+ ("pool", po::value<string>(&pool),
+ "Pool name, mandatory for apply-layout-settings if --pgid is not specified")
+ ("op", po::value<string>(&op),
+ "Arg is one of [info, log, remove, mkfs, fsck, repair, fuse, dup, export, export-remove, import, list, fix-lost, list-pgs, dump-journal, dump-super, meta-list, "
+ "get-osdmap, set-osdmap, get-inc-osdmap, set-inc-osdmap, mark-complete, reset-last-complete, apply-layout-settings, update-mon-db, dump-export, trim-pg-log, statfs]")
+ ("epoch", po::value<unsigned>(&epoch),
+ "epoch# for get-osdmap and get-inc-osdmap, the current epoch in use if not specified")
+ ("file", po::value<string>(&file),
+ "path of file to export, export-remove, import, get-osdmap, set-osdmap, get-inc-osdmap or set-inc-osdmap")
+ ("mon-store-path", po::value<string>(&mon_store_path),
+ "path of monstore to update-mon-db")
+ ("fsid", po::value<string>(&fsid),
+ "fsid for new store created by mkfs")
+ ("target-data-path", po::value<string>(&target_data_path),
+ "path of target object store (for --op dup)")
+ ("mountpoint", po::value<string>(&mountpoint),
+ "fuse mountpoint")
+ ("format", po::value<string>(&format)->default_value("json-pretty"),
+ "Output format which may be json, json-pretty, xml, xml-pretty")
+ ("debug", "Enable diagnostic output to stderr")
+ ("force", "Ignore some types of errors and proceed with operation - USE WITH CAUTION: CORRUPTION POSSIBLE NOW OR IN THE FUTURE")
+ ("skip-journal-replay", "Disable journal replay")
+ ("skip-mount-omap", "Disable mounting of omap")
+ ("head", "Find head/snapdir when searching for objects by name")
+ ("dry-run", "Don't modify the objectstore")
+ ("namespace", po::value<string>(&argnspace), "Specify namespace when searching for objects")
+ ("rmtype", po::value<string>(&rmtypestr), "Specify corrupting object removal 'snapmap' or 'nosnapmap' - TESTING USE ONLY")
+ ;
+
+ po::options_description positional("Positional options");
+ positional.add_options()
+ ("object", po::value<string>(&object), "'' for pgmeta_oid, object name or ghobject in json")
+ ("objcmd", po::value<string>(&objcmd), "command [(get|set)-bytes, (get|set|rm)-(attr|omap), (get|set)-omaphdr, list-attrs, list-omap, remove]")
+ ("arg1", po::value<string>(&arg1), "arg1 based on cmd")
+ ("arg2", po::value<string>(&arg2), "arg2 based on cmd")
+ ;
+
+ po::options_description all;
+ all.add(desc).add(positional);
+
+ po::positional_options_description pd;
+ pd.add("object", 1).add("objcmd", 1).add("arg1", 1).add("arg2", 1);
+
+ vector<string> ceph_option_strings;
+
+ po::variables_map vm;
+ try {
+ po::parsed_options parsed =
+ po::command_line_parser(argc, argv).options(all).allow_unregistered().positional(pd).run();
+ po::store( parsed, vm);
+ po::notify(vm);
+ ceph_option_strings = po::collect_unrecognized(parsed.options,
+ po::include_positional);
+ } catch(po::error &e) {
+ std::cerr << e.what() << std::endl;
+ return 1;
+ }
+
+ if (vm.count("help")) {
+ usage(desc);
+ return 1;
+ }
+
+ // Compatibility with previous option name
+ if (op == "dump-import")
+ op = "dump-export";
+
+ debug = (vm.count("debug") > 0);
+
+ force = (vm.count("force") > 0);
+
+ if (vm.count("namespace"))
+ nspace = argnspace;
+
+ dry_run = (vm.count("dry-run") > 0);
+
+ osflagbits_t flags = 0;
+ if (dry_run || vm.count("skip-journal-replay"))
+ flags |= SKIP_JOURNAL_REPLAY;
+ if (vm.count("skip-mount-omap"))
+ flags |= SKIP_MOUNT_OMAP;
+ if (op == "update-mon-db")
+ flags |= SKIP_JOURNAL_REPLAY;
+
+ head = (vm.count("head") > 0);
+
+ // infer osd id so we can authenticate
+ char fn[PATH_MAX];
+ snprintf(fn, sizeof(fn), "%s/whoami", dpath.c_str());
+ int fd = ::open(fn, O_RDONLY);
+ if (fd >= 0) {
+ bufferlist bl;
+ bl.read_fd(fd, 64);
+ string s(bl.c_str(), bl.length());
+ int whoami = atoi(s.c_str());
+ vector<string> tmp;
+ // identify ourselves as this osd so we can auth and fetch our configs
+ tmp.push_back("-n");
+ tmp.push_back(string("osd.") + stringify(whoami));
+ // populate osd_data so that the default keyring location works
+ tmp.push_back("--osd-data");
+ tmp.push_back(dpath);
+ tmp.insert(tmp.end(), ceph_option_strings.begin(),
+ ceph_option_strings.end());
+ tmp.swap(ceph_option_strings);
+ }
+
+ vector<const char *> ceph_options;
+ ceph_options.reserve(ceph_options.size() + ceph_option_strings.size());
+ for (vector<string>::iterator i = ceph_option_strings.begin();
+ i != ceph_option_strings.end();
+ ++i) {
+ ceph_options.push_back(i->c_str());
+ }
+
+ snprintf(fn, sizeof(fn), "%s/type", dpath.c_str());
+ fd = ::open(fn, O_RDONLY);
+ if (fd >= 0) {
+ bufferlist bl;
+ bl.read_fd(fd, 64);
+ if (bl.length()) {
+ string dp_type = string(bl.c_str(), bl.length() - 1); // drop \n
+ if (vm.count("type") && dp_type != "" && type != dp_type)
+ cerr << "WARNING: Ignoring type \"" << type << "\" - found data-path type \""
+ << dp_type << "\"" << std::endl;
+ type = dp_type;
+ //cout << "object store type is " << type << std::endl;
+ }
+ ::close(fd);
+ }
+
+ if (!vm.count("type") && type == "") {
+ type = "bluestore";
+ }
+ if (!vm.count("data-path") &&
+ op != "dump-export" &&
+ !(op == "dump-journal" && type == "filestore")) {
+ cerr << "Must provide --data-path" << std::endl;
+ usage(desc);
+ return 1;
+ }
+ if (type == "filestore" && !vm.count("journal-path")) {
+ jpath = dpath + "/journal";
+ }
+ if (!vm.count("op") && !vm.count("object")) {
+ cerr << "Must provide --op or object command..." << std::endl;
+ usage(desc);
+ return 1;
+ }
+ if (op != "list" && op != "apply-layout-settings" &&
+ vm.count("op") && vm.count("object")) {
+ cerr << "Can't specify both --op and object command syntax" << std::endl;
+ usage(desc);
+ return 1;
+ }
+ if (op == "apply-layout-settings" && !(vm.count("pool") ^ vm.count("pgid"))) {
+ cerr << "apply-layout-settings requires either --pool or --pgid"
+ << std::endl;
+ usage(desc);
+ return 1;
+ }
+ if (op != "list" && op != "apply-layout-settings" && vm.count("object") && !vm.count("objcmd")) {
+ cerr << "Invalid syntax, missing command" << std::endl;
+ usage(desc);
+ return 1;
+ }
+ if (op == "fuse" && mountpoint.length() == 0) {
+ cerr << "Missing fuse mountpoint" << std::endl;
+ usage(desc);
+ return 1;
+ }
+ outistty = isatty(STDOUT_FILENO);
+
+ file_fd = fd_none;
+ if ((op == "export" || op == "export-remove" || op == "get-osdmap" || op == "get-inc-osdmap") && !dry_run) {
+ if (!vm.count("file") || file == "-") {
+ if (outistty) {
+ cerr << "stdout is a tty and no --file filename specified" << std::endl;
+ return 1;
+ }
+ file_fd = STDOUT_FILENO;
+ } else {
+ file_fd = open(file.c_str(), O_WRONLY|O_CREAT|O_TRUNC, 0666);
+ }
+ } else if (op == "import" || op == "dump-export" || op == "set-osdmap" || op == "set-inc-osdmap") {
+ if (!vm.count("file") || file == "-") {
+ if (isatty(STDIN_FILENO)) {
+ cerr << "stdin is a tty and no --file filename specified" << std::endl;
+ return 1;
+ }
+ file_fd = STDIN_FILENO;
+ } else {
+ file_fd = open(file.c_str(), O_RDONLY);
+ }
+ }
+
+ ObjectStoreTool tool = ObjectStoreTool(file_fd, dry_run);
+
+ if (vm.count("file") && file_fd == fd_none && !dry_run) {
+ cerr << "--file option only applies to import, dump-export, export, export-remove, "
+ << "get-osdmap, set-osdmap, get-inc-osdmap or set-inc-osdmap" << std::endl;
+ return 1;
+ }
+
+ if (file_fd != fd_none && file_fd < 0) {
+ string err = string("file: ") + file;
+ perror(err.c_str());
+ return 1;
+ }
+
+ auto cct = global_init(
+ NULL, ceph_options,
+ CEPH_ENTITY_TYPE_OSD,
+ CODE_ENVIRONMENT_UTILITY_NODOUT,
+ 0);
+ common_init_finish(g_ceph_context);
+ if (debug) {
+ g_conf().set_val_or_die("log_to_stderr", "true");
+ g_conf().set_val_or_die("err_to_stderr", "true");
+ }
+ g_conf().apply_changes(nullptr);
+
+ // Special list handling. Treating pretty_format as human readable,
+ // with one object per line and not an enclosing array.
+ human_readable = ends_with(format, "-pretty");
+ if ((op == "list" || op == "meta-list") && human_readable) {
+ // Remove -pretty from end of format which we know is there
+ format = format.substr(0, format.size() - strlen("-pretty"));
+ }
+
+ formatter = Formatter::create(format);
+ if (formatter == NULL) {
+ cerr << "unrecognized format: " << format << std::endl;
+ return 1;
+ }
+
+ // Special handling for filestore journal, so we can dump it without mounting
+ if (op == "dump-journal" && type == "filestore") {
+ int ret = mydump_journal(formatter, jpath, g_conf()->journal_dio);
+ if (ret < 0) {
+ cerr << "journal-path: " << jpath << ": "
+ << cpp_strerror(ret) << std::endl;
+ return 1;
+ }
+ formatter->flush(cout);
+ return 0;
+ }
+
+ if (op == "dump-export") {
+ int ret = tool.dump_export(formatter);
+ if (ret < 0) {
+ cerr << "dump-export: "
+ << cpp_strerror(ret) << std::endl;
+ return 1;
+ }
+ return 0;
+ }
+
+ //Verify that data-path really exists
+ struct stat st;
+ if (::stat(dpath.c_str(), &st) == -1) {
+ string err = string("data-path: ") + dpath;
+ perror(err.c_str());
+ return 1;
+ }
+
+ if (pgidstr.length() && !pgid.parse(pgidstr.c_str())) {
+ cerr << "Invalid pgid '" << pgidstr << "' specified" << std::endl;
+ return 1;
+ }
+
+ //Verify that the journal-path really exists
+ if (type == "filestore") {
+ if (::stat(jpath.c_str(), &st) == -1) {
+ string err = string("journal-path: ") + jpath;
+ perror(err.c_str());
+ return 1;
+ }
+ if (S_ISDIR(st.st_mode)) {
+ cerr << "journal-path: " << jpath << ": "
+ << cpp_strerror(EISDIR) << std::endl;
+ return 1;
+ }
+ }
+
+ ObjectStore *fs = ObjectStore::create(g_ceph_context, type, dpath, jpath, flags);
+ if (fs == NULL) {
+ cerr << "Unable to create store of type " << type << std::endl;
+ return 1;
+ }
+
+ if (op == "fsck" || op == "fsck-deep") {
+ int r = fs->fsck(op == "fsck-deep");
+ if (r < 0) {
+ cerr << "fsck failed: " << cpp_strerror(r) << std::endl;
+ return 1;
+ }
+ if (r > 0) {
+ cerr << "fsck found " << r << " errors" << std::endl;
+ return 1;
+ }
+ cout << "fsck found no errors" << std::endl;
+ return 0;
+ }
+ if (op == "repair" || op == "repair-deep") {
+ int r = fs->repair(op == "repair-deep");
+ if (r < 0) {
+ cerr << "repair failed: " << cpp_strerror(r) << std::endl;
+ return 1;
+ }
+ if (r > 0) {
+ cerr << "repair found " << r << " errors" << std::endl;
+ return 1;
+ }
+ cout << "repair found no errors" << std::endl;
+ return 0;
+ }
+ if (op == "mkfs") {
+ if (fsid.length()) {
+ uuid_d f;
+ bool r = f.parse(fsid.c_str());
+ if (!r) {
+ cerr << "failed to parse uuid '" << fsid << "'" << std::endl;
+ return 1;
+ }
+ fs->set_fsid(f);
+ }
+ int r = fs->mkfs();
+ if (r < 0) {
+ cerr << "mkfs failed: " << cpp_strerror(r) << std::endl;
+ return 1;
+ }
+ return 0;
+ }
+ if (op == "dup") {
+ string target_type;
+ char fn[PATH_MAX];
+ snprintf(fn, sizeof(fn), "%s/type", target_data_path.c_str());
+ int fd = ::open(fn, O_RDONLY);
+ if (fd < 0) {
+ cerr << "Unable to open " << target_data_path << "/type" << std::endl;
+ exit(1);
+ }
+ bufferlist bl;
+ bl.read_fd(fd, 64);
+ if (bl.length()) {
+ target_type = string(bl.c_str(), bl.length() - 1); // drop \n
+ }
+ ::close(fd);
+ ObjectStore *targetfs = ObjectStore::create(
+ g_ceph_context, target_type,
+ target_data_path, "", 0);
+ if (targetfs == NULL) {
+ cerr << "Unable to open store of type " << target_type << std::endl;
+ return 1;
+ }
+ int r = dup(dpath, fs, target_data_path, targetfs);
+ if (r < 0) {
+ cerr << "dup failed: " << cpp_strerror(r) << std::endl;
+ return 1;
+ }
+ return 0;
+ }
+
+ int ret = fs->mount();
+ if (ret < 0) {
+ if (ret == -EBUSY) {
+ cerr << "OSD has the store locked" << std::endl;
+ } else {
+ cerr << "Mount failed with '" << cpp_strerror(ret) << "'" << std::endl;
+ }
+ return 1;
+ }
+
+ if (op == "fuse") {
+#ifdef HAVE_LIBFUSE
+ FuseStore fuse(fs, mountpoint);
+ cout << "mounting fuse at " << mountpoint << " ..." << std::endl;
+ int r = fuse.main();
+ if (r < 0) {
+ cerr << "failed to mount fuse: " << cpp_strerror(r) << std::endl;
+ return 1;
+ }
+#else
+ cerr << "fuse support not enabled" << std::endl;
+#endif
+ return 0;
+ }
+
+ vector<coll_t> ls;
+ vector<coll_t>::iterator it;
+ CompatSet supported;
+
+#ifdef INTERNAL_TEST
+ supported = get_test_compat_set();
+#else
+ supported = OSD::get_osd_compat_set();
+#endif
+
+ bufferlist bl;
+ OSDSuperblock superblock;
+ auto ch = fs->open_collection(coll_t::meta());
+ bufferlist::const_iterator p;
+ ret = fs->read(ch, OSD_SUPERBLOCK_GOBJECT, 0, 0, bl);
+ if (ret < 0) {
+ cerr << "Failure to read OSD superblock: " << cpp_strerror(ret) << std::endl;
+ goto out;
+ }
+
+ p = bl.cbegin();
+ decode(superblock, p);
+
+ if (debug) {
+ cerr << "Cluster fsid=" << superblock.cluster_fsid << std::endl;
+ }
+
+ if (debug) {
+ cerr << "Supported features: " << supported << std::endl;
+ cerr << "On-disk features: " << superblock.compat_features << std::endl;
+ }
+ if (supported.compare(superblock.compat_features) == -1) {
+ CompatSet unsupported = supported.unsupported(superblock.compat_features);
+ cerr << "On-disk OSD incompatible features set "
+ << unsupported << std::endl;
+ ret = -EINVAL;
+ goto out;
+ }
+
+ if (op == "apply-layout-settings") {
+ int target_level = 0;
+ // Single positional argument with apply-layout-settings
+ // for target_level.
+ if (vm.count("object") && isdigit(object[0])) {
+ target_level = atoi(object.c_str());
+ // This requires --arg1 to be specified since
+ // this is the third positional argument and normally
+ // used with object operations.
+ } else if (vm.count("arg1") && isdigit(arg1[0])) {
+ target_level = atoi(arg1.c_str());
+ }
+ ret = apply_layout_settings(fs, superblock, pool, pgid, dry_run, target_level);
+ goto out;
+ }
+
+ if (op != "list" && vm.count("object")) {
+ // Special case: Create pgmeta_oid if empty string specified
+ // This can't conflict with any actual object names.
+ if (object == "") {
+ ghobj = pgid.make_pgmeta_oid();
+ } else {
+ json_spirit::Value v;
+ try {
+ if (!json_spirit::read(object, v) ||
+ (v.type() != json_spirit::array_type && v.type() != json_spirit::obj_type)) {
+ // Special: Need head/snapdir so set even if user didn't specify
+ if (vm.count("objcmd") && (objcmd == "remove-clone-metadata"))
+ head = true;
+ lookup_ghobject lookup(object, nspace, head);
+ if (pgidstr.length())
+ ret = action_on_all_objects_in_exact_pg(fs, coll_t(pgid), lookup, debug);
+ else
+ ret = action_on_all_objects(fs, lookup, debug);
+ if (ret) {
+ throw std::runtime_error("Internal error");
+ } else {
+ if (lookup.size() != 1) {
+ stringstream ss;
+ if (lookup.size() == 0)
+ ss << "No object id '" << object << "' found or invalid JSON specified";
+ else
+ ss << "Found " << lookup.size() << " objects with id '" << object
+ << "', please use a JSON spec from --op list instead";
+ throw std::runtime_error(ss.str());
+ }
+ pair<coll_t, ghobject_t> found = lookup.pop();
+ pgidstr = found.first.to_str();
+ pgid.parse(pgidstr.c_str());
+ ghobj = found.second;
+ }
+ } else {
+ stringstream ss;
+ if (pgidstr.length() == 0 && v.type() != json_spirit::array_type) {
+ ss << "Without --pgid the object '" << object
+ << "' must be a JSON array";
+ throw std::runtime_error(ss.str());
+ }
+ if (v.type() == json_spirit::array_type) {
+ json_spirit::Array array = v.get_array();
+ if (array.size() != 2) {
+ ss << "Object '" << object
+ << "' must be a JSON array with 2 elements";
+ throw std::runtime_error(ss.str());
+ }
+ vector<json_spirit::Value>::iterator i = array.begin();
+ ceph_assert(i != array.end());
+ if (i->type() != json_spirit::str_type) {
+ ss << "Object '" << object
+ << "' must be a JSON array with the first element a string";
+ throw std::runtime_error(ss.str());
+ }
+ string object_pgidstr = i->get_str();
+ if (object_pgidstr != "meta") {
+ spg_t object_pgid;
+ object_pgid.parse(object_pgidstr.c_str());
+ if (pgidstr.length() > 0) {
+ if (object_pgid != pgid) {
+ ss << "object '" << object
+ << "' has a pgid different from the --pgid="
+ << pgidstr << " option";
+ throw std::runtime_error(ss.str());
+ }
+ } else {
+ pgidstr = object_pgidstr;
+ pgid = object_pgid;
+ }
+ } else {
+ pgidstr = object_pgidstr;
+ }
+ ++i;
+ v = *i;
+ }
+ try {
+ ghobj.decode(v);
+ } catch (std::runtime_error& e) {
+ ss << "Decode object JSON error: " << e.what();
+ throw std::runtime_error(ss.str());
+ }
+ if (pgidstr != "meta" && (uint64_t)pgid.pgid.m_pool != (uint64_t)ghobj.hobj.pool) {
+ cerr << "Object pool and pgid pool don't match" << std::endl;
+ ret = 1;
+ goto out;
+ }
+ }
+ } catch (std::runtime_error& e) {
+ cerr << e.what() << std::endl;
+ ret = 1;
+ goto out;
+ }
+ }
+ }
+
+ // The ops which require --pgid option are checked here and
+ // mentioned in the usage for --pgid.
+ if ((op == "info" || op == "log" || op == "remove" || op == "export"
+ || op == "export-remove" || op == "mark-complete"
+ || op == "reset-last-complete"
+ || op == "trim-pg-log") &&
+ pgidstr.length() == 0) {
+ cerr << "Must provide pgid" << std::endl;
+ usage(desc);
+ ret = 1;
+ goto out;
+ }
+
+ if (op == "import") {
+
+ try {
+ ret = tool.do_import(fs, superblock, force, pgidstr);
+ }
+ catch (const buffer::error &e) {
+ cerr << "do_import threw exception error " << e.what() << std::endl;
+ ret = -EFAULT;
+ }
+ if (ret == -EFAULT) {
+ cerr << "Corrupt input for import" << std::endl;
+ }
+ if (ret == 0)
+ cout << "Import successful" << std::endl;
+ goto out;
+ } else if (op == "dump-journal-mount") {
+ // Undocumented feature to dump journal with mounted fs
+ // This doesn't support the format option, but it uses the
+ // ObjectStore::dump_journal() and mounts to get replay to run.
+ ret = fs->dump_journal(cout);
+ if (ret) {
+ if (ret == -EOPNOTSUPP) {
+ cerr << "Object store type \"" << type << "\" doesn't support journal dump" << std::endl;
+ } else {
+ cerr << "Journal dump failed with error " << cpp_strerror(ret) << std::endl;
+ }
+ }
+ goto out;
+ } else if (op == "get-osdmap") {
+ bufferlist bl;
+ OSDMap osdmap;
+ if (epoch == 0) {
+ epoch = superblock.current_epoch;
+ }
+ ret = get_osdmap(fs, epoch, osdmap, bl);
+ if (ret) {
+ cerr << "Failed to get osdmap#" << epoch << ": "
+ << cpp_strerror(ret) << std::endl;
+ goto out;
+ }
+ ret = bl.write_fd(file_fd);
+ if (ret) {
+ cerr << "Failed to write to " << file << ": " << cpp_strerror(ret) << std::endl;
+ } else {
+ cout << "osdmap#" << epoch << " exported." << std::endl;
+ }
+ goto out;
+ } else if (op == "set-osdmap") {
+ bufferlist bl;
+ ret = get_fd_data(file_fd, bl);
+ if (ret < 0) {
+ cerr << "Failed to read osdmap " << cpp_strerror(ret) << std::endl;
+ } else {
+ ret = set_osdmap(fs, epoch, bl, force);
+ }
+ goto out;
+ } else if (op == "get-inc-osdmap") {
+ bufferlist bl;
+ if (epoch == 0) {
+ epoch = superblock.current_epoch;
+ }
+ ret = get_inc_osdmap(fs, epoch, bl);
+ if (ret < 0) {
+ cerr << "Failed to get incremental osdmap# " << epoch << ": "
+ << cpp_strerror(ret) << std::endl;
+ goto out;
+ }
+ ret = bl.write_fd(file_fd);
+ if (ret) {
+ cerr << "Failed to write to " << file << ": " << cpp_strerror(ret) << std::endl;
+ } else {
+ cout << "inc-osdmap#" << epoch << " exported." << std::endl;
+ }
+ goto out;
+ } else if (op == "set-inc-osdmap") {
+ bufferlist bl;
+ ret = get_fd_data(file_fd, bl);
+ if (ret < 0) {
+ cerr << "Failed to read incremental osdmap " << cpp_strerror(ret) << std::endl;
+ goto out;
+ } else {
+ ret = set_inc_osdmap(fs, epoch, bl, force);
+ }
+ goto out;
+ } else if (op == "update-mon-db") {
+ if (!vm.count("mon-store-path")) {
+ cerr << "Please specify the path to monitor db to update" << std::endl;
+ ret = -EINVAL;
+ } else {
+ ret = update_mon_db(*fs, superblock, dpath + "/keyring", mon_store_path);
+ }
+ goto out;
+ }
+
+ if (op == "remove") {
+ if (!force && !dry_run) {
+ cerr << "Please use export-remove or you must use --force option" << std::endl;
+ ret = -EINVAL;
+ goto out;
+ }
+ ret = initiate_new_remove_pg(fs, pgid);
+ if (ret < 0) {
+ cerr << "PG '" << pgid << "' not found" << std::endl;
+ goto out;
+ }
+ cout << "Remove successful" << std::endl;
+ goto out;
+ }
+
+ if (op == "fix-lost") {
+ boost::scoped_ptr<action_on_object_t> action;
+ action.reset(new do_fix_lost());
+ if (pgidstr.length())
+ ret = action_on_all_objects_in_exact_pg(fs, coll_t(pgid), *action, debug);
+ else
+ ret = action_on_all_objects(fs, *action, debug);
+ goto out;
+ }
+
+ if (op == "list") {
+ ret = do_list(fs, pgidstr, object, nspace, formatter, debug,
+ human_readable, head);
+ if (ret < 0) {
+ cerr << "do_list failed: " << cpp_strerror(ret) << std::endl;
+ }
+ goto out;
+ }
+
+ if (op == "dump-super") {
+ formatter->open_object_section("superblock");
+ superblock.dump(formatter);
+ formatter->close_section();
+ formatter->flush(cout);
+ cout << std::endl;
+ goto out;
+ }
+
+ if (op == "statfs") {
+ store_statfs_t statsbuf;
+ ret = fs->statfs(&statsbuf);
+ if (ret < 0) {
+ cerr << "error from statfs: " << cpp_strerror(ret) << std::endl;
+ goto out;
+ }
+ formatter->open_object_section("statfs");
+ statsbuf.dump(formatter);
+ formatter->close_section();
+ formatter->flush(cout);
+ cout << std::endl;
+ goto out;
+ }
+
+ if (op == "meta-list") {
+ ret = do_meta(fs, object, formatter, debug, human_readable);
+ if (ret < 0) {
+ cerr << "do_meta failed: " << cpp_strerror(ret) << std::endl;
+ }
+ goto out;
+ }
+
+ ret = fs->list_collections(ls);
+ if (ret < 0) {
+ cerr << "failed to list pgs: " << cpp_strerror(ret) << std::endl;
+ goto out;
+ }
+
+ if (debug && op == "list-pgs")
+ cout << "Performing list-pgs operation" << std::endl;
+
+ // Find pg
+ for (it = ls.begin(); it != ls.end(); ++it) {
+ spg_t tmppgid;
+
+ if (pgidstr == "meta") {
+ if (it->to_str() == "meta")
+ break;
+ else
+ continue;
+ }
+
+ if (!it->is_pg(&tmppgid)) {
+ continue;
+ }
+
+ if (it->is_temp(&tmppgid)) {
+ continue;
+ }
+
+ if (op != "list-pgs" && tmppgid != pgid) {
+ continue;
+ }
+
+ if (op != "list-pgs") {
+ //Found!
+ break;
+ }
+
+ cout << tmppgid << std::endl;
+ }
+
+ if (op == "list-pgs") {
+ ret = 0;
+ goto out;
+ }
+
+ // If not an object command nor any of the ops handled below, then output this usage
+ // before complaining about a bad pgid
+ if (!vm.count("objcmd") && op != "export" && op != "export-remove" && op != "info" && op != "log" && op != "mark-complete" && op != "trim-pg-log") {
+ cerr << "Must provide --op (info, log, remove, mkfs, fsck, repair, export, export-remove, import, list, fix-lost, list-pgs, dump-journal, dump-super, meta-list, "
+ "get-osdmap, set-osdmap, get-inc-osdmap, set-inc-osdmap, mark-complete, reset-last-complete, dump-export, trim-pg-log, statfs)"
+ << std::endl;
+ usage(desc);
+ ret = 1;
+ goto out;
+ }
+ epoch_t map_epoch;
+// The following code for export, info, log require omap or !skip-mount-omap
+ if (it != ls.end()) {
+
+ coll_t coll = *it;
+
+ if (vm.count("objcmd")) {
+ ret = 0;
+ if (objcmd == "remove" || objcmd == "removeall") {
+ bool all = (objcmd == "removeall");
+ enum rmtype type = BOTH;
+ if (rmtypestr == "nosnapmap")
+ type = NOSNAPMAP;
+ else if (rmtypestr == "snapmap")
+ type = SNAPMAP;
+ ret = do_remove_object(fs, coll, ghobj, all, force, type);
+ goto out;
+ } else if (objcmd == "list-attrs") {
+ ret = do_list_attrs(fs, coll, ghobj);
+ goto out;
+ } else if (objcmd == "list-omap") {
+ ret = do_list_omap(fs, coll, ghobj);
+ goto out;
+ } else if (objcmd == "get-bytes" || objcmd == "set-bytes") {
+ if (objcmd == "get-bytes") {
+ int fd;
+ if (vm.count("arg1") == 0 || arg1 == "-") {
+ fd = STDOUT_FILENO;
+ } else {
+ fd = open(arg1.c_str(), O_WRONLY|O_TRUNC|O_CREAT|O_EXCL|O_LARGEFILE, 0666);
+ if (fd == -1) {
+ cerr << "open " << arg1 << " " << cpp_strerror(errno) << std::endl;
+ ret = 1;
+ goto out;
+ }
+ }
+ ret = do_get_bytes(fs, coll, ghobj, fd);
+ if (fd != STDOUT_FILENO)
+ close(fd);
+ } else {
+ int fd;
+ if (vm.count("arg1") == 0 || arg1 == "-") {
+ // Since read_fd() doesn't handle ^D from a tty stdin, don't allow it.
+ if (isatty(STDIN_FILENO)) {
+ cerr << "stdin is a tty and no file specified" << std::endl;
+ ret = 1;
+ goto out;
+ }
+ fd = STDIN_FILENO;
+ } else {
+ fd = open(arg1.c_str(), O_RDONLY|O_LARGEFILE, 0666);
+ if (fd == -1) {
+ cerr << "open " << arg1 << " " << cpp_strerror(errno) << std::endl;
+ ret = 1;
+ goto out;
+ }
+ }
+ ret = do_set_bytes(fs, coll, ghobj, fd);
+ if (fd != STDIN_FILENO)
+ close(fd);
+ }
+ goto out;
+ } else if (objcmd == "get-attr") {
+ if (vm.count("arg1") == 0) {
+ usage(desc);
+ ret = 1;
+ goto out;
+ }
+ ret = do_get_attr(fs, coll, ghobj, arg1);
+ goto out;
+ } else if (objcmd == "set-attr") {
+ if (vm.count("arg1") == 0) {
+ usage(desc);
+ ret = 1;
+ }
+
+ int fd;
+ if (vm.count("arg2") == 0 || arg2 == "-") {
+ // Since read_fd() doesn't handle ^D from a tty stdin, don't allow it.
+ if (isatty(STDIN_FILENO)) {
+ cerr << "stdin is a tty and no file specified" << std::endl;
+ ret = 1;
+ goto out;
+ }
+ fd = STDIN_FILENO;
+ } else {
+ fd = open(arg2.c_str(), O_RDONLY|O_LARGEFILE, 0666);
+ if (fd == -1) {
+ cerr << "open " << arg2 << " " << cpp_strerror(errno) << std::endl;
+ ret = 1;
+ goto out;
+ }
+ }
+ ret = do_set_attr(fs, coll, ghobj, arg1, fd);
+ if (fd != STDIN_FILENO)
+ close(fd);
+ goto out;
+ } else if (objcmd == "rm-attr") {
+ if (vm.count("arg1") == 0) {
+ usage(desc);
+ ret = 1;
+ goto out;
+ }
+ ret = do_rm_attr(fs, coll, ghobj, arg1);
+ goto out;
+ } else if (objcmd == "get-omap") {
+ if (vm.count("arg1") == 0) {
+ usage(desc);
+ ret = 1;
+ goto out;
+ }
+ ret = do_get_omap(fs, coll, ghobj, arg1);
+ goto out;
+ } else if (objcmd == "set-omap") {
+ if (vm.count("arg1") == 0) {
+ usage(desc);
+ ret = 1;
+ goto out;
+ }
+ int fd;
+ if (vm.count("arg2") == 0 || arg2 == "-") {
+ // Since read_fd() doesn't handle ^D from a tty stdin, don't allow it.
+ if (isatty(STDIN_FILENO)) {
+ cerr << "stdin is a tty and no file specified" << std::endl;
+ ret = 1;
+ goto out;
+ }
+ fd = STDIN_FILENO;
+ } else {
+ fd = open(arg2.c_str(), O_RDONLY|O_LARGEFILE, 0666);
+ if (fd == -1) {
+ cerr << "open " << arg2 << " " << cpp_strerror(errno) << std::endl;
+ ret = 1;
+ goto out;
+ }
+ }
+ ret = do_set_omap(fs, coll, ghobj, arg1, fd);
+ if (fd != STDIN_FILENO)
+ close(fd);
+ goto out;
+ } else if (objcmd == "rm-omap") {
+ if (vm.count("arg1") == 0) {
+ usage(desc);
+ ret = 1;
+ goto out;
+ }
+ ret = do_rm_omap(fs, coll, ghobj, arg1);
+ goto out;
+ } else if (objcmd == "get-omaphdr") {
+ if (vm.count("arg1")) {
+ usage(desc);
+ ret = 1;
+ goto out;
+ }
+ ret = do_get_omaphdr(fs, coll, ghobj);
+ goto out;
+ } else if (objcmd == "set-omaphdr") {
+ // Extra arg
+ if (vm.count("arg2")) {
+ usage(desc);
+ ret = 1;
+ goto out;
+ }
+ int fd;
+ if (vm.count("arg1") == 0 || arg1 == "-") {
+ // Since read_fd() doesn't handle ^D from a tty stdin, don't allow it.
+ if (isatty(STDIN_FILENO)) {
+ cerr << "stdin is a tty and no file specified" << std::endl;
+ ret = 1;
+ goto out;
+ }
+ fd = STDIN_FILENO;
+ } else {
+ fd = open(arg1.c_str(), O_RDONLY|O_LARGEFILE, 0666);
+ if (fd == -1) {
+ cerr << "open " << arg1 << " " << cpp_strerror(errno) << std::endl;
+ ret = 1;
+ goto out;
+ }
+ }
+ ret = do_set_omaphdr(fs, coll, ghobj, fd);
+ if (fd != STDIN_FILENO)
+ close(fd);
+ goto out;
+ } else if (objcmd == "dump") {
+ // There should not be any other arguments
+ if (vm.count("arg1") || vm.count("arg2")) {
+ usage(desc);
+ ret = 1;
+ goto out;
+ }
+ ret = print_obj_info(fs, coll, ghobj, formatter);
+ goto out;
+ } else if (objcmd == "corrupt-info") { // Undocumented testing feature
+ // There should not be any other arguments
+ if (vm.count("arg1") || vm.count("arg2")) {
+ usage(desc);
+ ret = 1;
+ goto out;
+ }
+ ret = corrupt_info(fs, coll, ghobj, formatter);
+ goto out;
+ } else if (objcmd == "set-size" || objcmd == "corrupt-size") {
+ // Undocumented testing feature
+ bool corrupt = (objcmd == "corrupt-size");
+ // Extra arg
+ if (vm.count("arg1") == 0 || vm.count("arg2")) {
+ usage(desc);
+ ret = 1;
+ goto out;
+ }
+ if (arg1.length() == 0 || !isdigit(arg1.c_str()[0])) {
+ cerr << "Invalid size '" << arg1 << "' specified" << std::endl;
+ ret = 1;
+ goto out;
+ }
+ uint64_t size = atoll(arg1.c_str());
+ ret = set_size(fs, coll, ghobj, size, formatter, corrupt);
+ goto out;
+ } else if (objcmd == "clear-data-digest") {
+ ret = clear_data_digest(fs, coll, ghobj);
+ goto out;
+ } else if (objcmd == "clear-snapset") {
+ // UNDOCUMENTED: For testing zap SnapSet
+ // IGNORE extra args since not in usage anyway
+ if (!ghobj.hobj.has_snapset()) {
+ cerr << "'" << objcmd << "' requires a head or snapdir object" << std::endl;
+ ret = 1;
+ goto out;
+ }
+ ret = clear_snapset(fs, coll, ghobj, arg1);
+ goto out;
+ } else if (objcmd == "remove-clone-metadata") {
+ // Extra arg
+ if (vm.count("arg1") == 0 || vm.count("arg2")) {
+ usage(desc);
+ ret = 1;
+ goto out;
+ }
+ if (!ghobj.hobj.has_snapset()) {
+ cerr << "'" << objcmd << "' requires a head or snapdir object" << std::endl;
+ ret = 1;
+ goto out;
+ }
+ if (arg1.length() == 0 || !isdigit(arg1.c_str()[0])) {
+ cerr << "Invalid cloneid '" << arg1 << "' specified" << std::endl;
+ ret = 1;
+ goto out;
+ }
+ snapid_t cloneid = atoi(arg1.c_str());
+ ret = remove_clone(fs, coll, ghobj, cloneid, force);
+ goto out;
+ }
+ cerr << "Unknown object command '" << objcmd << "'" << std::endl;
+ usage(desc);
+ ret = 1;
+ goto out;
+ }
+
+ map_epoch = 0;
+ ret = PG::peek_map_epoch(fs, pgid, &map_epoch);
+ if (ret < 0)
+ cerr << "peek_map_epoch reports error" << std::endl;
+ if (debug)
+ cerr << "map_epoch " << map_epoch << std::endl;
+
+ pg_info_t info(pgid);
+ PastIntervals past_intervals;
+ __u8 struct_ver;
+ ret = PG::read_info(fs, pgid, coll, info, past_intervals, struct_ver);
+ if (ret < 0) {
+ cerr << "read_info error " << cpp_strerror(ret) << std::endl;
+ goto out;
+ }
+ if (struct_ver < PG::get_compat_struct_v()) {
+ cerr << "PG is too old to upgrade, use older Ceph version" << std::endl;
+ ret = -EFAULT;
+ goto out;
+ }
+ if (debug)
+ cerr << "struct_v " << (int)struct_ver << std::endl;
+
+ if (op == "export" || op == "export-remove") {
+ ret = tool.do_export(fs, coll, pgid, info, map_epoch, struct_ver, superblock, past_intervals);
+ if (ret == 0) {
+ cerr << "Export successful" << std::endl;
+ if (op == "export-remove") {
+ ret = initiate_new_remove_pg(fs, pgid);
+ // Export succeeded, so pgid is there
+ ceph_assert(ret == 0);
+ cerr << "Remove successful" << std::endl;
+ }
+ }
+ } else if (op == "info") {
+ formatter->open_object_section("info");
+ info.dump(formatter);
+ formatter->close_section();
+ formatter->flush(cout);
+ cout << std::endl;
+ } else if (op == "log") {
+ PGLog::IndexedLog log;
+ pg_missing_t missing;
+ ret = get_log(fs, struct_ver, pgid, info, log, missing);
+ if (ret < 0)
+ goto out;
+
+ dump_log(formatter, cout, log, missing);
+ } else if (op == "mark-complete") {
+ ObjectStore::Transaction tran;
+ ObjectStore::Transaction *t = &tran;
+
+ if (struct_ver < PG::get_compat_struct_v()) {
+ cerr << "Can't mark-complete, version mismatch " << (int)struct_ver
+ << " (pg) < compat " << (int)PG::get_compat_struct_v() << " (tool)"
+ << std::endl;
+ ret = 1;
+ goto out;
+ }
+
+ cout << "Marking complete " << std::endl;
+
+ info.last_update = eversion_t(superblock.current_epoch, info.last_update.version + 1);
+ info.last_backfill = hobject_t::get_max();
+ info.last_epoch_started = superblock.current_epoch;
+ info.history.last_epoch_started = superblock.current_epoch;
+ info.history.last_epoch_clean = superblock.current_epoch;
+ past_intervals.clear();
+
+ if (!dry_run) {
+ ret = write_info(*t, map_epoch, info, past_intervals);
+ if (ret != 0)
+ goto out;
+ auto ch = fs->open_collection(coll_t(pgid));
+ fs->queue_transaction(ch, std::move(*t));
+ }
+ cout << "Marking complete succeeded" << std::endl;
+ } else if (op == "trim-pg-log") {
+ ret = do_trim_pg_log(fs, coll, info, pgid,
+ map_epoch, past_intervals);
+ if (ret < 0) {
+ cerr << "Error trimming pg log: " << cpp_strerror(ret) << std::endl;
+ goto out;
+ }
+ cout << "Finished trimming pg log" << std::endl;
+ goto out;
+ } else if (op == "reset-last-complete") {
+ if (!force) {
+ std::cerr << "WARNING: reset-last-complete is extremely dangerous and almost "
+ << "certain to lead to permanent data loss unless you know exactly "
+ << "what you are doing. Pass --force to proceed anyway."
+ << std::endl;
+ ret = -EINVAL;
+ goto out;
+ }
+ ObjectStore::Transaction tran;
+ ObjectStore::Transaction *t = &tran;
+
+ if (struct_ver < PG::get_compat_struct_v()) {
+ cerr << "Can't reset-last-complete, version mismatch " << (int)struct_ver
+ << " (pg) < compat " << (int)PG::get_compat_struct_v() << " (tool)"
+ << std::endl;
+ ret = 1;
+ goto out;
+ }
+
+ cout << "Reseting last_complete " << std::endl;
+
+ info.last_complete = info.last_update;
+
+ if (!dry_run) {
+ ret = write_info(*t, map_epoch, info, past_intervals);
+ if (ret != 0)
+ goto out;
+ fs->queue_transaction(ch, std::move(*t));
+ }
+ cout << "Reseting last_complete succeeded" << std::endl;
+
+ } else {
+ ceph_assert(!"Should have already checked for valid --op");
+ }
+ } else {
+ cerr << "PG '" << pgid << "' not found" << std::endl;
+ ret = -ENOENT;
+ }
+
+out:
+ int r = fs->umount();
+ if (r < 0) {
+ cerr << "umount failed: " << cpp_strerror(r) << std::endl;
+ // If no previous error, then use umount() error
+ if (ret == 0)
+ ret = r;
+ }
+
+ if (dry_run) {
+ // Export output can go to stdout, so put this message on stderr
+ if (op == "export")
+ cerr << "dry-run: Nothing changed" << std::endl;
+ else
+ cout << "dry-run: Nothing changed" << std::endl;
+ }
+
+ if (ret < 0)
+ ret = 1;
+ return ret;
+}