summaryrefslogtreecommitdiffstats
path: root/src/rgw/driver/dbstore/common
diff options
context:
space:
mode:
Diffstat (limited to 'src/rgw/driver/dbstore/common')
-rw-r--r--src/rgw/driver/dbstore/common/connection_pool.h147
-rw-r--r--src/rgw/driver/dbstore/common/dbstore.cc2252
-rw-r--r--src/rgw/driver/dbstore/common/dbstore.h2016
-rw-r--r--src/rgw/driver/dbstore/common/dbstore_log.h15
4 files changed, 4430 insertions, 0 deletions
diff --git a/src/rgw/driver/dbstore/common/connection_pool.h b/src/rgw/driver/dbstore/common/connection_pool.h
new file mode 100644
index 000000000..07f3c81c3
--- /dev/null
+++ b/src/rgw/driver/dbstore/common/connection_pool.h
@@ -0,0 +1,147 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab ft=cpp
+
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2022 Red Hat, Inc.
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#pragma once
+
+#include <concepts>
+#include <condition_variable>
+#include <memory>
+#include <mutex>
+#include <boost/circular_buffer.hpp>
+#include "common/dout.h"
+
+namespace rgw::dbstore {
+
+template <typename Connection>
+class ConnectionHandle;
+
+/// A thread-safe base class that manages a fixed-size pool of generic database
+/// connections and supports the reclamation of ConnectionHandles. This class
+/// is the subset of ConnectionPool which doesn't depend on the Factory type.
+template <typename Connection>
+class ConnectionPoolBase {
+ public:
+ ConnectionPoolBase(std::size_t max_connections)
+ : connections(max_connections)
+ {}
+ private:
+ friend class ConnectionHandle<Connection>;
+
+ // TODO: the caller may detect a connection error that prevents the connection
+ // from being reused. allow them to indicate these errors here
+ void put(std::unique_ptr<Connection> connection)
+ {
+ auto lock = std::scoped_lock{mutex};
+ connections.push_back(std::move(connection));
+
+ if (connections.size() == 1) { // was empty
+ cond.notify_one();
+ }
+ }
+ protected:
+ std::mutex mutex;
+ std::condition_variable cond;
+ boost::circular_buffer<std::unique_ptr<Connection>> connections;
+};
+
+/// Handle to a database connection borrowed from the pool. Automatically
+/// returns the connection to its pool on the handle's destruction.
+template <typename Connection>
+class ConnectionHandle {
+ ConnectionPoolBase<Connection>* pool = nullptr;
+ std::unique_ptr<Connection> conn;
+ public:
+ ConnectionHandle() noexcept = default;
+ ConnectionHandle(ConnectionPoolBase<Connection>* pool,
+ std::unique_ptr<Connection> conn) noexcept
+ : pool(pool), conn(std::move(conn)) {}
+
+ ~ConnectionHandle() {
+ if (conn) {
+ pool->put(std::move(conn));
+ }
+ }
+
+ ConnectionHandle(ConnectionHandle&&) = default;
+ ConnectionHandle& operator=(ConnectionHandle&& o) noexcept {
+ if (conn) {
+ pool->put(std::move(conn));
+ }
+ conn = std::move(o.conn);
+ pool = o.pool;
+ return *this;
+ }
+
+ explicit operator bool() const noexcept { return static_cast<bool>(conn); }
+ Connection& operator*() const noexcept { return *conn; }
+ Connection* operator->() const noexcept { return conn.get(); }
+ Connection* get() const noexcept { return conn.get(); }
+};
+
+
+// factory_of concept requires the function signature:
+// F(const DoutPrefixProvider*) -> std::unique_ptr<T>
+template <typename F, typename T>
+concept factory_of = requires (F factory, const DoutPrefixProvider* dpp) {
+ { factory(dpp) } -> std::same_as<std::unique_ptr<T>>;
+ requires std::move_constructible<F>;
+};
+
+
+/// Generic database connection pool that enforces a limit on open connections.
+template <typename Connection, factory_of<Connection> Factory>
+class ConnectionPool : public ConnectionPoolBase<Connection> {
+ public:
+ ConnectionPool(Factory factory, std::size_t max_connections)
+ : ConnectionPoolBase<Connection>(max_connections),
+ factory(std::move(factory))
+ {}
+
+ /// Borrow a connection from the pool. If all existing connections are in use,
+ /// use the connection factory to create another one. If we've reached the
+ /// limit on open connections, wait on a condition variable for the next one
+ /// returned to the pool.
+ auto get(const DoutPrefixProvider* dpp)
+ -> ConnectionHandle<Connection>
+ {
+ auto lock = std::unique_lock{this->mutex};
+ std::unique_ptr<Connection> conn;
+
+ if (!this->connections.empty()) {
+ // take an existing connection
+ conn = std::move(this->connections.front());
+ this->connections.pop_front();
+ } else if (total < this->connections.capacity()) {
+ // add another connection to the pool
+ conn = factory(dpp);
+ ++total;
+ } else {
+ // wait for the next put()
+ // TODO: support optional_yield
+ ldpp_dout(dpp, 4) << "ConnectionPool waiting on a connection" << dendl;
+ this->cond.wait(lock, [&] { return !this->connections.empty(); });
+ ldpp_dout(dpp, 4) << "ConnectionPool done waiting" << dendl;
+ conn = std::move(this->connections.front());
+ this->connections.pop_front();
+ }
+
+ return {this, std::move(conn)};
+ }
+ private:
+ Factory factory;
+ std::size_t total = 0;
+};
+
+} // namespace rgw::dbstore
diff --git a/src/rgw/driver/dbstore/common/dbstore.cc b/src/rgw/driver/dbstore/common/dbstore.cc
new file mode 100644
index 000000000..dc5a90c31
--- /dev/null
+++ b/src/rgw/driver/dbstore/common/dbstore.cc
@@ -0,0 +1,2252 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "dbstore.h"
+
+using namespace std;
+
+namespace rgw { namespace store {
+
+map<string, class ObjectOp*> DB::objectmap = {};
+
+map<string, class ObjectOp*> DB::getObjectMap() {
+ return DB::objectmap;
+}
+
+int DB::Initialize(string logfile, int loglevel)
+{
+ int ret = -1;
+ const DoutPrefixProvider *dpp = get_def_dpp();
+
+ if (!cct) {
+ cout << "Failed to Initialize. No ceph Context \n";
+ return -1;
+ }
+
+ if (loglevel > 0) {
+ cct->_conf->subsys.set_log_level(ceph_subsys_rgw, loglevel);
+ }
+ if (!logfile.empty()) {
+ cct->_log->set_log_file(logfile);
+ cct->_log->reopen_log_file();
+ }
+
+
+ db = openDB(dpp);
+
+ if (!db) {
+ ldpp_dout(dpp, 0) <<"Failed to open database " << dendl;
+ return ret;
+ }
+
+ ret = InitializeDBOps(dpp);
+
+ if (ret) {
+ ldpp_dout(dpp, 0) <<"InitializeDBOps failed " << dendl;
+ closeDB(dpp);
+ db = NULL;
+ return ret;
+ }
+
+ ldpp_dout(dpp, 0) << "DB successfully initialized - name:" \
+ << db_name << "" << dendl;
+
+ return ret;
+}
+
+int DB::createGC(const DoutPrefixProvider *dpp) {
+ int ret = 0;
+ /* create gc thread */
+
+ gc_worker = std::make_unique<DB::GC>(dpp, this);
+ gc_worker->create("db_gc");
+
+ return ret;
+}
+
+int DB::stopGC() {
+ if (gc_worker) {
+ gc_worker->signal_stop();
+ gc_worker->join();
+ }
+ return 0;
+}
+
+int DB::Destroy(const DoutPrefixProvider *dpp)
+{
+ if (!db)
+ return 0;
+
+ stopGC();
+
+ closeDB(dpp);
+
+
+ ldpp_dout(dpp, 20)<<"DB successfully destroyed - name:" \
+ <<db_name << dendl;
+
+ return 0;
+}
+
+
+std::shared_ptr<class DBOp> DB::getDBOp(const DoutPrefixProvider *dpp, std::string_view Op,
+ const DBOpParams *params)
+{
+ if (!Op.compare("InsertUser"))
+ return dbops.InsertUser;
+ if (!Op.compare("RemoveUser"))
+ return dbops.RemoveUser;
+ if (!Op.compare("GetUser"))
+ return dbops.GetUser;
+ if (!Op.compare("InsertBucket"))
+ return dbops.InsertBucket;
+ if (!Op.compare("UpdateBucket"))
+ return dbops.UpdateBucket;
+ if (!Op.compare("RemoveBucket"))
+ return dbops.RemoveBucket;
+ if (!Op.compare("GetBucket"))
+ return dbops.GetBucket;
+ if (!Op.compare("ListUserBuckets"))
+ return dbops.ListUserBuckets;
+ if (!Op.compare("InsertLCEntry"))
+ return dbops.InsertLCEntry;
+ if (!Op.compare("RemoveLCEntry"))
+ return dbops.RemoveLCEntry;
+ if (!Op.compare("GetLCEntry"))
+ return dbops.GetLCEntry;
+ if (!Op.compare("ListLCEntries"))
+ return dbops.ListLCEntries;
+ if (!Op.compare("InsertLCHead"))
+ return dbops.InsertLCHead;
+ if (!Op.compare("RemoveLCHead"))
+ return dbops.RemoveLCHead;
+ if (!Op.compare("GetLCHead"))
+ return dbops.GetLCHead;
+
+ /* Object Operations */
+ map<string, class ObjectOp*>::iterator iter;
+ class ObjectOp* Ob;
+
+ {
+ const std::lock_guard<std::mutex> lk(mtx);
+ iter = DB::objectmap.find(params->op.bucket.info.bucket.name);
+ }
+
+ if (iter == DB::objectmap.end()) {
+ ldpp_dout(dpp, 30)<<"No objectmap found for bucket: " \
+ <<params->op.bucket.info.bucket.name << dendl;
+ /* not found */
+ return nullptr;
+ }
+
+ Ob = iter->second;
+
+ if (!Op.compare("PutObject"))
+ return Ob->PutObject;
+ if (!Op.compare("DeleteObject"))
+ return Ob->DeleteObject;
+ if (!Op.compare("GetObject"))
+ return Ob->GetObject;
+ if (!Op.compare("UpdateObject"))
+ return Ob->UpdateObject;
+ if (!Op.compare("ListBucketObjects"))
+ return Ob->ListBucketObjects;
+ if (!Op.compare("ListVersionedObjects"))
+ return Ob->ListVersionedObjects;
+ if (!Op.compare("PutObjectData"))
+ return Ob->PutObjectData;
+ if (!Op.compare("UpdateObjectData"))
+ return Ob->UpdateObjectData;
+ if (!Op.compare("GetObjectData"))
+ return Ob->GetObjectData;
+ if (!Op.compare("DeleteObjectData"))
+ return Ob->DeleteObjectData;
+ if (!Op.compare("DeleteStaleObjectData"))
+ return Ob->DeleteStaleObjectData;
+
+ return nullptr;
+}
+
+int DB::objectmapInsert(const DoutPrefixProvider *dpp, string bucket, class ObjectOp* ptr)
+{
+ map<string, class ObjectOp*>::iterator iter;
+ class ObjectOp *Ob;
+
+ const std::lock_guard<std::mutex> lk(mtx);
+ iter = DB::objectmap.find(bucket);
+
+ if (iter != DB::objectmap.end()) {
+ // entry already exists
+ // return success or replace it or
+ // return error ?
+ //
+ // return success for now & delete the newly allocated ptr
+ ldpp_dout(dpp, 30)<<"Objectmap entry already exists for bucket("\
+ <<bucket<<"). Not inserted " << dendl;
+ delete ptr;
+ return 0;
+ }
+
+ Ob = (class ObjectOp*) ptr;
+ Ob->InitializeObjectOps(getDBname(), dpp);
+
+ DB::objectmap.insert(pair<string, class ObjectOp*>(bucket, Ob));
+
+ return 0;
+}
+
+int DB::objectmapDelete(const DoutPrefixProvider *dpp, string bucket)
+{
+ map<string, class ObjectOp*>::iterator iter;
+
+ const std::lock_guard<std::mutex> lk(mtx);
+ iter = DB::objectmap.find(bucket);
+
+ if (iter == DB::objectmap.end()) {
+ // entry doesn't exist
+ // return success or return error ?
+ // return success for now
+ ldpp_dout(dpp, 20)<<"Objectmap entry for bucket("<<bucket<<") "
+ <<"doesnt exist to delete " << dendl;
+ return 0;
+ }
+
+ DB::objectmap.erase(iter);
+
+ return 0;
+}
+
+int DB::InitializeParams(const DoutPrefixProvider *dpp, DBOpParams *params)
+{
+ int ret = -1;
+
+ if (!params)
+ goto out;
+
+ params->cct = cct;
+
+ //reset params here
+ params->user_table = user_table;
+ params->bucket_table = bucket_table;
+ params->quota_table = quota_table;
+ params->lc_entry_table = lc_entry_table;
+ params->lc_head_table = lc_head_table;
+
+ ret = 0;
+out:
+ return ret;
+}
+
+int DB::ProcessOp(const DoutPrefixProvider *dpp, std::string_view Op, DBOpParams *params) {
+ int ret = -1;
+ shared_ptr<class DBOp> db_op;
+
+ db_op = getDBOp(dpp, Op, params);
+
+ if (!db_op) {
+ ldpp_dout(dpp, 0)<<"No db_op found for Op("<<Op<<")" << dendl;
+ return ret;
+ }
+ ret = db_op->Execute(dpp, params);
+
+ if (ret) {
+ ldpp_dout(dpp, 0)<<"In Process op Execute failed for fop(" << Op << ")" << dendl;
+ } else {
+ ldpp_dout(dpp, 20)<<"Successfully processed fop(" << Op << ")" << dendl;
+ }
+
+ return ret;
+}
+
+int DB::get_user(const DoutPrefixProvider *dpp,
+ const std::string& query_str, const std::string& query_str_val,
+ RGWUserInfo& uinfo, map<string, bufferlist> *pattrs,
+ RGWObjVersionTracker *pobjv_tracker) {
+ int ret = 0;
+
+ if (query_str.empty() || query_str_val.empty()) {
+ ldpp_dout(dpp, 0)<<"In GetUser - Invalid query(" << query_str <<"), query_str_val(" << query_str_val <<")" << dendl;
+ return -1;
+ }
+
+ DBOpParams params = {};
+ InitializeParams(dpp, &params);
+
+ params.op.query_str = query_str;
+
+ // validate query_str with UserTable entries names
+ if (query_str == "username") {
+ params.op.user.uinfo.display_name = query_str_val;
+ } else if (query_str == "email") {
+ params.op.user.uinfo.user_email = query_str_val;
+ } else if (query_str == "access_key") {
+ RGWAccessKey k(query_str_val, "");
+ map<string, RGWAccessKey> keys;
+ keys[query_str_val] = k;
+ params.op.user.uinfo.access_keys = keys;
+ } else if (query_str == "user_id") {
+ params.op.user.uinfo.user_id = uinfo.user_id;
+ } else {
+ ldpp_dout(dpp, 0)<<"In GetUser Invalid query string :" <<query_str.c_str()<<") " << dendl;
+ return -1;
+ }
+
+ ret = ProcessOp(dpp, "GetUser", &params);
+
+ if (ret)
+ goto out;
+
+ /* Verify if its a valid user */
+ if (params.op.user.uinfo.access_keys.empty() ||
+ params.op.user.uinfo.user_id.id.empty()) {
+ ldpp_dout(dpp, 0)<<"In GetUser - No user with query(" <<query_str.c_str()<<"), user_id(" << uinfo.user_id <<") found" << dendl;
+ return -ENOENT;
+ }
+
+ uinfo = params.op.user.uinfo;
+
+ if (pattrs) {
+ *pattrs = params.op.user.user_attrs;
+ }
+
+ if (pobjv_tracker) {
+ pobjv_tracker->read_version = params.op.user.user_version;
+ }
+
+out:
+ return ret;
+}
+
+int DB::store_user(const DoutPrefixProvider *dpp,
+ RGWUserInfo& uinfo, bool exclusive, map<string, bufferlist> *pattrs,
+ RGWObjVersionTracker *pobjv, RGWUserInfo* pold_info)
+{
+ DBOpParams params = {};
+ InitializeParams(dpp, &params);
+ int ret = 0;
+
+ /* Check if the user already exists and return the old info, caller will have a use for it */
+ RGWUserInfo orig_info;
+ RGWObjVersionTracker objv_tracker = {};
+ obj_version& obj_ver = objv_tracker.read_version;
+
+ orig_info.user_id = uinfo.user_id;
+ ret = get_user(dpp, string("user_id"), uinfo.user_id.id, orig_info, nullptr, &objv_tracker);
+
+ if (!ret && obj_ver.ver) {
+ /* already exists. */
+
+ if (pold_info) {
+ *pold_info = orig_info;
+ }
+
+ if (pobjv && (pobjv->read_version.ver != obj_ver.ver)) {
+ /* Object version mismatch.. return ECANCELED */
+ ret = -ECANCELED;
+ ldpp_dout(dpp, 0)<<"User Read version mismatch err:(" <<ret<<") " << dendl;
+ return ret;
+ }
+
+ if (exclusive) {
+ // return
+ return ret;
+ }
+ obj_ver.ver++;
+ } else {
+ obj_ver.ver = 1;
+ obj_ver.tag = "UserTAG";
+ }
+
+ params.op.user.user_version = obj_ver;
+ params.op.user.uinfo = uinfo;
+
+ if (pattrs) {
+ params.op.user.user_attrs = *pattrs;
+ }
+
+ ret = ProcessOp(dpp, "InsertUser", &params);
+
+ if (ret) {
+ ldpp_dout(dpp, 0)<<"store_user failed with err:(" <<ret<<") " << dendl;
+ goto out;
+ }
+ ldpp_dout(dpp, 20)<<"User creation successful - userid:(" <<uinfo.user_id<<") " << dendl;
+
+ if (pobjv) {
+ pobjv->read_version = obj_ver;
+ pobjv->write_version = obj_ver;
+ }
+
+out:
+ return ret;
+}
+
+int DB::remove_user(const DoutPrefixProvider *dpp,
+ RGWUserInfo& uinfo, RGWObjVersionTracker *pobjv)
+{
+ DBOpParams params = {};
+ InitializeParams(dpp, &params);
+ int ret = 0;
+
+ RGWUserInfo orig_info;
+ RGWObjVersionTracker objv_tracker = {};
+
+ orig_info.user_id = uinfo.user_id;
+ ret = get_user(dpp, string("user_id"), uinfo.user_id.id, orig_info, nullptr, &objv_tracker);
+
+ if (ret) {
+ return ret;
+ }
+
+ if (!ret && objv_tracker.read_version.ver) {
+ /* already exists. */
+
+ if (pobjv && (pobjv->read_version.ver != objv_tracker.read_version.ver)) {
+ /* Object version mismatch.. return ECANCELED */
+ ret = -ECANCELED;
+ ldpp_dout(dpp, 0)<<"User Read version mismatch err:(" <<ret<<") " << dendl;
+ return ret;
+ }
+ }
+
+ params.op.user.uinfo.user_id = uinfo.user_id;
+
+ ret = ProcessOp(dpp, "RemoveUser", &params);
+
+ if (ret) {
+ ldpp_dout(dpp, 0)<<"remove_user failed with err:(" <<ret<<") " << dendl;
+ goto out;
+ }
+
+out:
+ return ret;
+}
+
+int DB::get_bucket_info(const DoutPrefixProvider *dpp, const std::string& query_str,
+ const std::string& query_str_val,
+ RGWBucketInfo& info,
+ rgw::sal::Attrs* pattrs, ceph::real_time* pmtime,
+ obj_version* pbucket_version) {
+ int ret = 0;
+
+ if (query_str.empty()) {
+ // not checking for query_str_val as the query can be to fetch
+ // entries with null values
+ return -1;
+ }
+
+ DBOpParams params = {};
+ DBOpParams params2 = {};
+ InitializeParams(dpp, &params);
+
+ if (query_str == "name") {
+ params.op.bucket.info.bucket.name = info.bucket.name;
+ } else {
+ ldpp_dout(dpp, 0)<<"In GetBucket Invalid query string :" <<query_str.c_str()<<") " << dendl;
+ return -1;
+ }
+
+ ret = ProcessOp(dpp, "GetBucket", &params);
+
+ if (ret) {
+ ldpp_dout(dpp, 0)<<"In GetBucket failed err:(" <<ret<<") " << dendl;
+ goto out;
+ }
+
+ if (!ret && params.op.bucket.info.bucket.marker.empty()) {
+ return -ENOENT;
+ }
+ info = params.op.bucket.info;
+
+ if (pattrs) {
+ *pattrs = params.op.bucket.bucket_attrs;
+ }
+
+ if (pmtime) {
+ *pmtime = params.op.bucket.mtime;
+ }
+ if (pbucket_version) {
+ *pbucket_version = params.op.bucket.bucket_version;
+ }
+
+out:
+ return ret;
+}
+
+int DB::create_bucket(const DoutPrefixProvider *dpp,
+ const RGWUserInfo& owner, rgw_bucket& bucket,
+ const string& zonegroup_id,
+ const rgw_placement_rule& placement_rule,
+ const string& swift_ver_location,
+ const RGWQuotaInfo * pquota_info,
+ map<std::string, bufferlist>& attrs,
+ RGWBucketInfo& info,
+ obj_version *pobjv,
+ obj_version *pep_objv,
+ real_time creation_time,
+ rgw_bucket *pmaster_bucket,
+ uint32_t *pmaster_num_shards,
+ optional_yield y,
+ bool exclusive)
+{
+ /*
+ * XXX: Simple creation for now.
+ *
+ * Referring to RGWRados::create_bucket(),
+ * Check if bucket already exists, select_bucket_placement,
+ * is explicit put/remove instance info needed? - should not be ideally
+ */
+
+ DBOpParams params = {};
+ InitializeParams(dpp, &params);
+ int ret = 0;
+
+ /* Check if the bucket already exists and return the old info, caller will have a use for it */
+ RGWBucketInfo orig_info;
+ orig_info.bucket.name = bucket.name;
+ ret = get_bucket_info(dpp, string("name"), "", orig_info, nullptr, nullptr, nullptr);
+
+ if (!ret && !orig_info.owner.id.empty() && exclusive) {
+ /* already exists. Return the old info */
+
+ info = std::move(orig_info);
+ return ret;
+ }
+
+ RGWObjVersionTracker& objv_tracker = info.objv_tracker;
+
+ objv_tracker.read_version.clear();
+
+ if (pobjv) {
+ objv_tracker.write_version = *pobjv;
+ } else {
+ objv_tracker.generate_new_write_ver(cct);
+ }
+ params.op.bucket.bucket_version = objv_tracker.write_version;
+ objv_tracker.read_version = params.op.bucket.bucket_version;
+
+ uint64_t bid = next_bucket_id();
+ string s = getDBname() + "." + std::to_string(bid);
+ bucket.marker = bucket.bucket_id = s;
+
+ info.bucket = bucket;
+ info.owner = owner.user_id;
+ info.zonegroup = zonegroup_id;
+ info.placement_rule = placement_rule;
+ info.swift_ver_location = swift_ver_location;
+ info.swift_versioning = (!swift_ver_location.empty());
+
+ info.requester_pays = false;
+ if (real_clock::is_zero(creation_time)) {
+ info.creation_time = ceph::real_clock::now();
+ } else {
+ info.creation_time = creation_time;
+ }
+ if (pquota_info) {
+ info.quota = *pquota_info;
+ }
+
+ params.op.bucket.info = info;
+ params.op.bucket.bucket_attrs = attrs;
+ params.op.bucket.mtime = ceph::real_time();
+ params.op.user.uinfo.user_id.id = owner.user_id.id;
+
+ ret = ProcessOp(dpp, "InsertBucket", &params);
+
+ if (ret) {
+ ldpp_dout(dpp, 0)<<"create_bucket failed with err:(" <<ret<<") " << dendl;
+ goto out;
+ }
+
+out:
+ return ret;
+}
+
+int DB::remove_bucket(const DoutPrefixProvider *dpp, const RGWBucketInfo info) {
+ int ret = 0;
+
+ DBOpParams params = {};
+ InitializeParams(dpp, &params);
+
+ params.op.bucket.info.bucket.name = info.bucket.name;
+
+ ret = ProcessOp(dpp, "RemoveBucket", &params);
+
+ if (ret) {
+ ldpp_dout(dpp, 0)<<"In RemoveBucket failed err:(" <<ret<<") " << dendl;
+ goto out;
+ }
+
+out:
+ return ret;
+}
+
+int DB::list_buckets(const DoutPrefixProvider *dpp, const std::string& query_str,
+ rgw_user& user,
+ const string& marker,
+ const string& end_marker,
+ uint64_t max,
+ bool need_stats,
+ RGWUserBuckets *buckets,
+ bool *is_truncated)
+{
+ int ret = 0;
+
+ DBOpParams params = {};
+ InitializeParams(dpp, &params);
+
+ params.op.user.uinfo.user_id = user;
+ params.op.bucket.min_marker = marker;
+ params.op.bucket.max_marker = end_marker;
+ params.op.list_max_count = max;
+ params.op.query_str = query_str;
+
+ ret = ProcessOp(dpp, "ListUserBuckets", &params);
+
+ if (ret) {
+ ldpp_dout(dpp, 0)<<"In ListUserBuckets failed err:(" <<ret<<") " << dendl;
+ goto out;
+ }
+
+ /* need_stats: stats are already part of entries... In case they are maintained in
+ * separate table , maybe use "Inner Join" with stats table for the query.
+ */
+ if (params.op.bucket.list_entries.size() == max)
+ *is_truncated = true;
+
+ for (auto& entry : params.op.bucket.list_entries) {
+ if (!end_marker.empty() &&
+ end_marker.compare(entry.bucket.marker) <= 0) {
+ *is_truncated = false;
+ break;
+ }
+ buckets->add(std::move(entry));
+ }
+
+ if (query_str == "all") {
+ // userID/OwnerID may have changed. Update it.
+ user.id = params.op.bucket.info.owner.id;
+ }
+
+out:
+ return ret;
+}
+
+int DB::update_bucket(const DoutPrefixProvider *dpp, const std::string& query_str,
+ RGWBucketInfo& info,
+ bool exclusive,
+ const rgw_user* powner_id,
+ map<std::string, bufferlist>* pattrs,
+ ceph::real_time* pmtime,
+ RGWObjVersionTracker* pobjv)
+{
+ int ret = 0;
+ DBOpParams params = {};
+ obj_version bucket_version;
+ RGWBucketInfo orig_info;
+
+ /* Check if the bucket already exists and return the old info, caller will have a use for it */
+ orig_info.bucket.name = info.bucket.name;
+ params.op.bucket.info.bucket.name = info.bucket.name;
+ ret = get_bucket_info(dpp, string("name"), "", orig_info, nullptr, nullptr,
+ &bucket_version);
+
+ if (ret) {
+ ldpp_dout(dpp, 0)<<"Failed to read bucket info err:(" <<ret<<") " << dendl;
+ goto out;
+ }
+
+ if (!orig_info.owner.id.empty() && exclusive) {
+ /* already exists. Return the old info */
+
+ info = std::move(orig_info);
+ return ret;
+ }
+
+ /* Verify if the objv read_ver matches current bucket version */
+ if (pobjv) {
+ if (pobjv->read_version.ver != bucket_version.ver) {
+ ldpp_dout(dpp, 0)<<"Read version mismatch err:(" <<ret<<") " << dendl;
+ ret = -ECANCELED;
+ goto out;
+ }
+ } else {
+ pobjv = &info.objv_tracker;
+ }
+
+ InitializeParams(dpp, &params);
+
+ params.op.bucket.info.bucket.name = info.bucket.name;
+
+ if (powner_id) {
+ params.op.user.uinfo.user_id.id = powner_id->id;
+ } else {
+ params.op.user.uinfo.user_id.id = orig_info.owner.id;
+ }
+
+ /* Update version & mtime */
+ params.op.bucket.bucket_version.ver = ++(bucket_version.ver);
+
+ if (pmtime) {
+ params.op.bucket.mtime = *pmtime;;
+ } else {
+ params.op.bucket.mtime = ceph::real_time();
+ }
+
+ if (query_str == "attrs") {
+ params.op.query_str = "attrs";
+ params.op.bucket.bucket_attrs = *pattrs;
+ } else if (query_str == "owner") {
+ /* Update only owner i.e, chown.
+ * Update creation_time too */
+ params.op.query_str = "owner";
+ params.op.bucket.info.creation_time = params.op.bucket.mtime;
+ } else if (query_str == "info") {
+ params.op.query_str = "info";
+ params.op.bucket.info = info;
+ } else {
+ ret = -1;
+ ldpp_dout(dpp, 0)<<"In UpdateBucket Invalid query_str : " << query_str << dendl;
+ goto out;
+ }
+
+ ret = ProcessOp(dpp, "UpdateBucket", &params);
+
+ if (ret) {
+ ldpp_dout(dpp, 0)<<"In UpdateBucket failed err:(" <<ret<<") " << dendl;
+ goto out;
+ }
+
+ if (pobjv) {
+ pobjv->read_version = params.op.bucket.bucket_version;
+ pobjv->write_version = params.op.bucket.bucket_version;
+ }
+
+out:
+ return ret;
+}
+
+/**
+ * Get ordered listing of the objects in a bucket.
+ *
+ * max_p: maximum number of results to return
+ * bucket: bucket to list contents of
+ * prefix: only return results that match this prefix
+ * delim: do not include results that match this string.
+ * Any skipped results will have the matching portion of their name
+ * inserted in common_prefixes with a "true" mark.
+ * marker: if filled in, begin the listing with this object.
+ * end_marker: if filled in, end the listing with this object.
+ * result: the objects are put in here.
+ * common_prefixes: if delim is filled in, any matching prefixes are
+ * placed here.
+ * is_truncated: if number of objects in the bucket is bigger than
+ * max, then truncated.
+ */
+int DB::Bucket::List::list_objects(const DoutPrefixProvider *dpp, int64_t max,
+ vector<rgw_bucket_dir_entry> *result,
+ map<string, bool> *common_prefixes, bool *is_truncated)
+{
+ int ret = 0;
+ DB *store = target->get_store();
+ int64_t count = 0;
+ std::string prev_obj;
+
+ DBOpParams db_params = {};
+ store->InitializeParams(dpp, &db_params);
+
+ db_params.op.bucket.info = target->get_bucket_info();
+ /* XXX: Handle whole marker? key -> name, instance, ns? */
+ db_params.op.obj.min_marker = params.marker.name;
+ db_params.op.obj.max_marker = params.end_marker.name;
+ db_params.op.obj.prefix = params.prefix + "%";
+ db_params.op.list_max_count = max + 1; /* +1 for next_marker */
+
+ ret = store->ProcessOp(dpp, "ListBucketObjects", &db_params);
+
+ if (ret) {
+ ldpp_dout(dpp, 0)<<"In ListBucketObjects failed err:(" <<ret<<") " << dendl;
+ goto out;
+ }
+
+ for (auto& entry : db_params.op.obj.list_entries) {
+
+ if (!params.list_versions) {
+ if (entry.flags & rgw_bucket_dir_entry::FLAG_DELETE_MARKER) {
+ prev_obj = entry.key.name;
+ // skip all non-current entries and delete_marker
+ continue;
+ }
+ if (entry.key.name == prev_obj) {
+ // non current versions..skip the entry
+ continue;
+ }
+ entry.flags |= rgw_bucket_dir_entry::FLAG_CURRENT;
+ } else {
+ if (entry.key.name != prev_obj) {
+ // current version
+ entry.flags |= rgw_bucket_dir_entry::FLAG_CURRENT;
+ } else {
+ entry.flags &= ~(rgw_bucket_dir_entry::FLAG_CURRENT);
+ entry.flags |= rgw_bucket_dir_entry::FLAG_VER;
+ }
+ }
+
+ prev_obj = entry.key.name;
+
+ if (count >= max) {
+ *is_truncated = true;
+ next_marker.name = entry.key.name;
+ next_marker.instance = entry.key.instance;
+ break;
+ }
+
+ if (!params.delim.empty()) {
+ const std::string& objname = entry.key.name;
+ const int delim_pos = objname.find(params.delim, params.prefix.size());
+ if (delim_pos >= 0) {
+ /* extract key -with trailing delimiter- for CommonPrefix */
+ const std::string& prefix_key =
+ objname.substr(0, delim_pos + params.delim.length());
+
+ if (common_prefixes &&
+ common_prefixes->find(prefix_key) == common_prefixes->end()) {
+ next_marker = prefix_key;
+ (*common_prefixes)[prefix_key] = true;
+ count++;
+ }
+ continue;
+ }
+ }
+
+ if (!params.end_marker.name.empty() &&
+ params.end_marker.name.compare(entry.key.name) <= 0) {
+ // should not include end_marker
+ *is_truncated = false;
+ break;
+ }
+ count++;
+ result->push_back(std::move(entry));
+ }
+out:
+ return ret;
+}
+
+int DB::raw_obj::InitializeParamsfromRawObj(const DoutPrefixProvider *dpp,
+ DBOpParams* params) {
+ int ret = 0;
+
+ if (!params)
+ return -1;
+
+ params->op.bucket.info.bucket.name = bucket_name;
+ params->op.obj.state.obj.key.name = obj_name;
+ params->op.obj.state.obj.key.instance = obj_instance;
+ params->op.obj.state.obj.key.ns = obj_ns;
+ params->op.obj.obj_id = obj_id;
+
+ if (multipart_part_str != "0.0") {
+ params->op.obj.is_multipart = true;
+ } else {
+ params->op.obj.is_multipart = false;
+ }
+
+ params->op.obj_data.multipart_part_str = multipart_part_str;
+ params->op.obj_data.part_num = part_num;
+
+ return ret;
+}
+
+int DB::Object::InitializeParamsfromObject(const DoutPrefixProvider *dpp,
+ DBOpParams* params) {
+ int ret = 0;
+ string bucket = bucket_info.bucket.name;
+
+ if (!params)
+ return -1;
+
+ params->op.bucket.info.bucket.name = bucket;
+ params->op.obj.state.obj = obj;
+ params->op.obj.obj_id = obj_id;
+
+ return ret;
+}
+
+int DB::Object::get_object_impl(const DoutPrefixProvider *dpp, DBOpParams& params) {
+ int ret = 0;
+
+ if (params.op.obj.state.obj.key.name.empty()) {
+ /* Initialize */
+ store->InitializeParams(dpp, &params);
+ InitializeParamsfromObject(dpp, &params);
+ }
+
+ ret = store->ProcessOp(dpp, "GetObject", &params);
+
+ /* pick one field check if object exists */
+ if (!ret && !params.op.obj.state.exists) {
+ ldpp_dout(dpp, 0)<<"Object(bucket:" << bucket_info.bucket.name << ", Object:"<< obj.key.name << ") doesn't exist" << dendl;
+ ret = -ENOENT;
+ }
+
+ return ret;
+}
+
+int DB::Object::obj_omap_set_val_by_key(const DoutPrefixProvider *dpp,
+ const std::string& key, bufferlist& val,
+ bool must_exist) {
+ int ret = 0;
+
+ DBOpParams params = {};
+
+ ret = get_object_impl(dpp, params);
+
+ if (ret) {
+ ldpp_dout(dpp, 0) <<"get_object_impl failed err:(" <<ret<<")" << dendl;
+ goto out;
+ }
+
+ params.op.obj.omap[key] = val;
+ params.op.query_str = "omap";
+ params.op.obj.state.mtime = real_clock::now();
+
+ ret = store->ProcessOp(dpp, "UpdateObject", &params);
+
+ if (ret) {
+ ldpp_dout(dpp, 0)<<"In UpdateObject failed err:(" <<ret<<") " << dendl;
+ goto out;
+ }
+
+out:
+ return ret;
+}
+
+int DB::Object::obj_omap_get_vals_by_keys(const DoutPrefixProvider *dpp,
+ const std::string& oid,
+ const std::set<std::string>& keys,
+ std::map<std::string, bufferlist>* vals)
+{
+ int ret = 0;
+ DBOpParams params = {};
+ std::map<std::string, bufferlist> omap;
+
+ if (!vals)
+ return -1;
+
+ ret = get_object_impl(dpp, params);
+
+ if (ret) {
+ ldpp_dout(dpp, 0) <<"get_object_impl failed err:(" <<ret<<")" << dendl;
+ goto out;
+ }
+
+ omap = params.op.obj.omap;
+
+ for (const auto& k : keys) {
+ (*vals)[k] = omap[k];
+ }
+
+out:
+ return ret;
+}
+
+int DB::Object::add_mp_part(const DoutPrefixProvider *dpp,
+ RGWUploadPartInfo info) {
+ int ret = 0;
+
+ DBOpParams params = {};
+
+ ret = get_object_impl(dpp, params);
+
+ if (ret) {
+ ldpp_dout(dpp, 0) <<"get_object_impl failed err:(" <<ret<<")" << dendl;
+ goto out;
+ }
+
+ params.op.obj.mp_parts.push_back(info);
+ params.op.query_str = "mp";
+ params.op.obj.state.mtime = real_clock::now();
+
+ ret = store->ProcessOp(dpp, "UpdateObject", &params);
+
+ if (ret) {
+ ldpp_dout(dpp, 0)<<"In UpdateObject failed err:(" <<ret<<") " << dendl;
+ goto out;
+ }
+
+out:
+ return ret;
+}
+
+int DB::Object::get_mp_parts_list(const DoutPrefixProvider *dpp,
+ std::list<RGWUploadPartInfo>& info)
+{
+ int ret = 0;
+ DBOpParams params = {};
+ std::map<std::string, bufferlist> omap;
+
+ ret = get_object_impl(dpp, params);
+
+ if (ret) {
+ ldpp_dout(dpp, 0) <<"get_object_impl failed err:(" <<ret<<")" << dendl;
+ goto out;
+ }
+
+ info = params.op.obj.mp_parts;
+
+out:
+ return ret;
+}
+
+/* Taken from rgw_rados.cc */
+void DB::gen_rand_obj_instance_name(rgw_obj_key *target_key)
+{
+#define OBJ_INSTANCE_LEN 32
+ char buf[OBJ_INSTANCE_LEN + 1];
+
+ gen_rand_alphanumeric_no_underscore(cct, buf, OBJ_INSTANCE_LEN); /* don't want it to get url escaped,
+ no underscore for instance name due to the way we encode the raw keys */
+
+ target_key->set_instance(buf);
+}
+
+int DB::Object::obj_omap_get_all(const DoutPrefixProvider *dpp,
+ std::map<std::string, bufferlist> *m)
+{
+ int ret = 0;
+ DBOpParams params = {};
+ std::map<std::string, bufferlist> omap;
+
+ if (!m)
+ return -1;
+
+ ret = get_object_impl(dpp, params);
+
+ if (ret) {
+ ldpp_dout(dpp, 0) <<"get_object_impl failed err:(" <<ret<<")" << dendl;
+ goto out;
+ }
+
+ (*m) = params.op.obj.omap;
+
+out:
+ return ret;
+}
+
+int DB::Object::obj_omap_get_vals(const DoutPrefixProvider *dpp,
+ const std::string& marker,
+ uint64_t max_count,
+ std::map<std::string, bufferlist> *m, bool* pmore)
+{
+ int ret = 0;
+ DBOpParams params = {};
+ std::map<std::string, bufferlist> omap;
+ map<string, bufferlist>::iterator iter;
+ uint64_t count = 0;
+
+ if (!m)
+ return -1;
+
+ ret = get_object_impl(dpp, params);
+
+ if (ret) {
+ ldpp_dout(dpp, 0) <<"get_object_impl failed err:(" <<ret<<")" << dendl;
+ goto out;
+ }
+
+ omap = params.op.obj.omap;
+
+ for (iter = omap.begin(); iter != omap.end(); ++iter) {
+
+ if (iter->first < marker)
+ continue;
+
+ if ((++count) > max_count) {
+ *pmore = true;
+ break;
+ }
+
+ (*m)[iter->first] = iter->second;
+ }
+
+out:
+ return ret;
+}
+
+int DB::Object::set_attrs(const DoutPrefixProvider *dpp,
+ map<string, bufferlist>& setattrs,
+ map<string, bufferlist>* rmattrs)
+{
+ int ret = 0;
+
+ DBOpParams params = {};
+ rgw::sal::Attrs *attrs;
+ map<string, bufferlist>::iterator iter;
+ RGWObjState* state;
+
+ store->InitializeParams(dpp, &params);
+ InitializeParamsfromObject(dpp, &params);
+ ret = get_state(dpp, &state, true);
+
+ if (ret && !state->exists) {
+ ldpp_dout(dpp, 0) <<"get_state failed err:(" <<ret<<")" << dendl;
+ goto out;
+ }
+
+ /* For now lets keep it simple..rmattrs & setattrs ..
+ * XXX: Check rgw_rados::set_attrs
+ */
+ params.op.obj.state = *state;
+ attrs = &params.op.obj.state.attrset;
+ if (rmattrs) {
+ for (iter = rmattrs->begin(); iter != rmattrs->end(); ++iter) {
+ (*attrs).erase(iter->first);
+ }
+ }
+ for (iter = setattrs.begin(); iter != setattrs.end(); ++iter) {
+ (*attrs)[iter->first] = iter->second;
+ }
+
+ params.op.query_str = "attrs";
+ /* As per https://docs.aws.amazon.com/AmazonS3/latest/userguide/UsingMetadata.html,
+ * the only way for users to modify object metadata is to make a copy of the object and
+ * set the metadata.
+ * Hence do not update mtime for any other attr changes */
+
+ ret = store->ProcessOp(dpp, "UpdateObject", &params);
+
+ if (ret) {
+ ldpp_dout(dpp, 0)<<"In UpdateObject failed err:(" <<ret<<") " << dendl;
+ goto out;
+ }
+
+out:
+ return ret;
+}
+
+int DB::Object::transition(const DoutPrefixProvider *dpp,
+ const rgw_placement_rule& rule,
+ const real_time& mtime,
+ uint64_t olh_epoch)
+{
+ int ret = 0;
+
+ DBOpParams params = {};
+ map<string, bufferlist> *attrset;
+
+ store->InitializeParams(dpp, &params);
+ InitializeParamsfromObject(dpp, &params);
+
+ ret = store->ProcessOp(dpp, "GetObject", &params);
+
+ if (ret) {
+ ldpp_dout(dpp, 0) <<"In GetObject failed err:(" <<ret<<")" << dendl;
+ goto out;
+ }
+
+ /* pick one field check if object exists */
+ if (!params.op.obj.state.exists) {
+ ldpp_dout(dpp, 0)<<"Object(bucket:" << bucket_info.bucket.name << ", Object:"<< obj.key.name << ") doesn't exist" << dendl;
+ return -1;
+ }
+
+ params.op.query_str = "meta";
+ params.op.obj.state.mtime = real_clock::now();
+ params.op.obj.storage_class = rule.storage_class;
+ attrset = &params.op.obj.state.attrset;
+ if (!rule.storage_class.empty()) {
+ bufferlist bl;
+ bl.append(rule.storage_class);
+ (*attrset)[RGW_ATTR_STORAGE_CLASS] = bl;
+ }
+ params.op.obj.versioned_epoch = olh_epoch; // XXX: not sure if needed
+
+ /* Unlike Rados, in dbstore for now, both head and tail objects
+ * refer to same storage class
+ */
+ params.op.obj.head_placement_rule = rule;
+ params.op.obj.tail_placement.placement_rule = rule;
+
+ ret = store->ProcessOp(dpp, "UpdateObject", &params);
+
+ if (ret) {
+ ldpp_dout(dpp, 0)<<"In UpdateObject failed err:(" <<ret<<") " << dendl;
+ goto out;
+ }
+
+out:
+ return ret;
+}
+
+int DB::raw_obj::read(const DoutPrefixProvider *dpp, int64_t ofs,
+ uint64_t len, bufferlist& bl)
+{
+ int ret = 0;
+ DBOpParams params = {};
+
+ db->InitializeParams(dpp, &params);
+ InitializeParamsfromRawObj(dpp, &params);
+
+ ret = db->ProcessOp(dpp, "GetObjectData", &params);
+
+ if (ret) {
+ ldpp_dout(dpp, 0)<<"In GetObjectData failed err:(" <<ret<<")" << dendl;
+ return ret;
+ }
+
+ /* Verify if its valid obj */
+ if (!params.op.obj_data.size) {
+ ret = -ENOENT;
+ ldpp_dout(dpp, 0)<<"In GetObjectData failed err:(" <<ret<<")" << dendl;
+ return ret;
+ }
+
+ bufferlist& read_bl = params.op.obj_data.data;
+
+ unsigned copy_len;
+ copy_len = std::min((uint64_t)read_bl.length() - ofs, len);
+ read_bl.begin(ofs).copy(copy_len, bl);
+ return bl.length();
+}
+
+int DB::raw_obj::write(const DoutPrefixProvider *dpp, int64_t ofs, int64_t write_ofs,
+ uint64_t len, bufferlist& bl)
+{
+ int ret = 0;
+ DBOpParams params = {};
+
+ db->InitializeParams(dpp, &params);
+ InitializeParamsfromRawObj(dpp, &params);
+
+ /* XXX: Check for chunk_size ?? */
+ params.op.obj_data.offset = ofs;
+ unsigned write_len = std::min((uint64_t)bl.length() - write_ofs, len);
+ bl.begin(write_ofs).copy(write_len, params.op.obj_data.data);
+ params.op.obj_data.size = params.op.obj_data.data.length();
+ params.op.obj.state.mtime = real_clock::now();
+
+ ret = db->ProcessOp(dpp, "PutObjectData", &params);
+
+ if (ret) {
+ ldpp_dout(dpp, 0)<<"In PutObjectData failed err:(" <<ret<<")" << dendl;
+ return ret;
+ }
+
+ return write_len;
+}
+
+int DB::Object::list_versioned_objects(const DoutPrefixProvider *dpp,
+ std::list<rgw_bucket_dir_entry>& list_entries) {
+ int ret = 0;
+ store = get_store();
+ DBOpParams db_params = {};
+
+ store->InitializeParams(dpp, &db_params);
+ InitializeParamsfromObject(dpp, &db_params);
+
+ db_params.op.list_max_count = MAX_VERSIONED_OBJECTS;
+
+ ret = store->ProcessOp(dpp, "ListVersionedObjects", &db_params);
+
+ if (ret) {
+ ldpp_dout(dpp, 0)<<"In ListVersionedObjects failed err:(" <<ret<<") " << dendl;
+ } else {
+ list_entries = db_params.op.obj.list_entries;
+ }
+
+ return ret;
+}
+
+int DB::Object::get_obj_state(const DoutPrefixProvider *dpp,
+ const RGWBucketInfo& bucket_info, const rgw_obj& obj,
+ bool follow_olh, RGWObjState** state)
+{
+ int ret = 0;
+
+ DBOpParams params = {};
+ RGWObjState* s;
+
+ if (!obj.key.instance.empty()) {
+ /* Versionid provided. Fetch the object */
+ ret = get_object_impl(dpp, params);
+
+ if (ret && ret != -ENOENT) {
+ ldpp_dout(dpp, 0) <<"get_object_impl failed err:(" <<ret<<")" << dendl;
+ goto out;
+ }
+ } else {
+ /* Instance is empty. May or may not be versioned object.
+ * List all the versions and read the most recent entry */
+ ret = list_versioned_objects(dpp, params.op.obj.list_entries);
+
+ if (params.op.obj.list_entries.size() != 0) {
+ /* Ensure its not a delete marker */
+ auto& ent = params.op.obj.list_entries.front();
+ if (ent.flags & rgw_bucket_dir_entry::FLAG_DELETE_MARKER) {
+ ret = -ENOENT;
+ goto out;
+ }
+ store->InitializeParams(dpp, &params);
+ InitializeParamsfromObject(dpp, &params);
+ params.op.obj.state.obj.key = ent.key;
+
+ ret = get_object_impl(dpp, params);
+
+ if (ret) {
+ ldpp_dout(dpp, 0) <<"get_object_impl of versioned object failed err:(" <<ret<<")" << dendl;
+ goto out;
+ }
+ } else {
+ ret = -ENOENT;
+ return ret;
+ }
+ }
+
+ s = &params.op.obj.state;
+ /* XXX: For now use state->shadow_obj to store ObjectID string */
+ s->shadow_obj = params.op.obj.obj_id;
+
+ *state = &obj_state;
+ **state = *s;
+
+out:
+ return ret;
+
+}
+
+int DB::Object::get_state(const DoutPrefixProvider *dpp, RGWObjState** pstate, bool follow_olh)
+{
+ return get_obj_state(dpp, bucket_info, obj, follow_olh, pstate);
+}
+
+int DB::Object::Read::get_attr(const DoutPrefixProvider *dpp, const char *name, bufferlist& dest)
+{
+ RGWObjState* state;
+ int r = source->get_state(dpp, &state, true);
+ if (r < 0)
+ return r;
+ if (!state->exists)
+ return -ENOENT;
+ if (!state->get_attr(name, dest))
+ return -ENODATA;
+
+ return 0;
+}
+
+int DB::Object::Read::prepare(const DoutPrefixProvider *dpp)
+{
+ DB *store = source->get_store();
+ CephContext *cct = store->ctx();
+
+ bufferlist etag;
+
+ map<string, bufferlist>::iterator iter;
+
+ RGWObjState* astate;
+
+ int r = source->get_state(dpp, &astate, true);
+ if (r < 0)
+ return r;
+
+ if (!astate->exists) {
+ return -ENOENT;
+ }
+
+ state.obj = astate->obj;
+ source->obj_id = astate->shadow_obj;
+
+ if (params.target_obj) {
+ *params.target_obj = state.obj;
+ }
+ if (params.attrs) {
+ *params.attrs = astate->attrset;
+ if (cct->_conf->subsys.should_gather<ceph_subsys_rgw, 20>()) {
+ for (iter = params.attrs->begin(); iter != params.attrs->end(); ++iter) {
+ ldpp_dout(dpp, 20) << "Read xattr rgw_rados: " << iter->first << dendl;
+ }
+ }
+ }
+
+ if (conds.if_match || conds.if_nomatch) {
+ r = get_attr(dpp, RGW_ATTR_ETAG, etag);
+ if (r < 0)
+ return r;
+
+ if (conds.if_match) {
+ string if_match_str = rgw_string_unquote(conds.if_match);
+ ldpp_dout(dpp, 10) << "ETag: " << string(etag.c_str(), etag.length()) << " " << " If-Match: " << if_match_str << dendl;
+ if (if_match_str.compare(0, etag.length(), etag.c_str(), etag.length()) != 0) {
+ return -ERR_PRECONDITION_FAILED;
+ }
+ }
+
+ if (conds.if_nomatch) {
+ string if_nomatch_str = rgw_string_unquote(conds.if_nomatch);
+ ldpp_dout(dpp, 10) << "ETag: " << string(etag.c_str(), etag.length()) << " " << " If-NoMatch: " << if_nomatch_str << dendl;
+ if (if_nomatch_str.compare(0, etag.length(), etag.c_str(), etag.length()) == 0) {
+ return -ERR_NOT_MODIFIED;
+ }
+ }
+ }
+
+ if (params.obj_size)
+ *params.obj_size = astate->size;
+ if (params.lastmod)
+ *params.lastmod = astate->mtime;
+
+ return 0;
+}
+
+int DB::Object::Read::range_to_ofs(uint64_t obj_size, int64_t &ofs, int64_t &end)
+{
+ if (ofs < 0) {
+ ofs += obj_size;
+ if (ofs < 0)
+ ofs = 0;
+ end = obj_size - 1;
+ } else if (end < 0) {
+ end = obj_size - 1;
+ }
+
+ if (obj_size > 0) {
+ if (ofs >= (off_t)obj_size) {
+ return -ERANGE;
+ }
+ if (end >= (off_t)obj_size) {
+ end = obj_size - 1;
+ }
+ }
+ return 0;
+}
+
+int DB::Object::Read::read(int64_t ofs, int64_t end, bufferlist& bl, const DoutPrefixProvider *dpp)
+{
+ DB *store = source->get_store();
+
+ uint64_t read_ofs = ofs;
+ uint64_t len, read_len;
+
+ bufferlist read_bl;
+ uint64_t max_chunk_size = store->get_max_chunk_size();
+
+ RGWObjState* astate;
+ int r = source->get_state(dpp, &astate, true);
+ if (r < 0)
+ return r;
+
+ if (!astate->exists) {
+ return -ENOENT;
+ }
+
+ if (astate->size == 0) {
+ end = 0;
+ } else if (end >= (int64_t)astate->size) {
+ end = astate->size - 1;
+ }
+
+ if (end < 0)
+ len = 0;
+ else
+ len = end - ofs + 1;
+
+
+ if (len > max_chunk_size) {
+ len = max_chunk_size;
+ }
+
+ int head_data_size = astate->data.length();
+ bool reading_from_head = (ofs < head_data_size);
+
+ if (reading_from_head) {
+ if (astate) { // && astate->prefetch_data)?
+ if (!ofs && astate->data.length() >= len) {
+ bl = astate->data;
+ return bl.length();
+ }
+
+ if (ofs < astate->data.length()) {
+ unsigned copy_len = std::min((uint64_t)head_data_size - ofs, len);
+ astate->data.begin(ofs).copy(copy_len, bl);
+ return bl.length();
+ }
+ }
+ }
+
+ /* tail object */
+ int part_num = (ofs / max_chunk_size);
+ /* XXX: Handle multipart_str */
+ raw_obj read_obj(store, source->get_bucket_info().bucket.name, astate->obj.key.name,
+ astate->obj.key.instance, astate->obj.key.ns, source->obj_id, "0.0", part_num);
+
+ read_len = len;
+
+ ldpp_dout(dpp, 20) << "dbstore->read obj-ofs=" << ofs << " read_ofs=" << read_ofs << " read_len=" << read_len << dendl;
+
+ // read from non head object
+ r = read_obj.read(dpp, read_ofs, read_len, bl);
+
+ if (r < 0) {
+ return r;
+ }
+
+ return bl.length();
+}
+
+static int _get_obj_iterate_cb(const DoutPrefixProvider *dpp,
+ const DB::raw_obj& read_obj, off_t obj_ofs,
+ off_t len, bool is_head_obj,
+ RGWObjState* astate, void *arg)
+{
+ struct db_get_obj_data* d = static_cast<struct db_get_obj_data*>(arg);
+ return d->store->get_obj_iterate_cb(dpp, read_obj, obj_ofs, len,
+ is_head_obj, astate, arg);
+}
+
+int DB::get_obj_iterate_cb(const DoutPrefixProvider *dpp,
+ const raw_obj& read_obj, off_t obj_ofs,
+ off_t len, bool is_head_obj,
+ RGWObjState* astate, void *arg)
+{
+ struct db_get_obj_data* d = static_cast<struct db_get_obj_data*>(arg);
+ bufferlist bl;
+ int r = 0;
+
+ if (is_head_obj) {
+ bl = astate->data;
+ } else {
+ // read from non head object
+ raw_obj robj = read_obj;
+ /* read entire data. So pass offset as '0' & len as '-1' */
+ r = robj.read(dpp, 0, -1, bl);
+
+ if (r <= 0) {
+ return r;
+ }
+ }
+
+ unsigned read_ofs = 0, read_len = 0;
+ while (read_ofs < bl.length()) {
+ unsigned chunk_len = std::min((uint64_t)bl.length() - read_ofs, (uint64_t)len);
+ r = d->client_cb->handle_data(bl, read_ofs, chunk_len);
+ if (r < 0)
+ return r;
+ read_ofs += chunk_len;
+ read_len += chunk_len;
+ ldpp_dout(dpp, 20) << "dbstore->get_obj_iterate_cb obj-ofs=" << obj_ofs << " len=" << len << " chunk_len = " << chunk_len << " read_len = " << read_len << dendl;
+ }
+
+
+ d->offset += read_len;
+
+ return read_len;
+}
+
+int DB::Object::Read::iterate(const DoutPrefixProvider *dpp, int64_t ofs, int64_t end, RGWGetDataCB *cb)
+{
+ DB *store = source->get_store();
+ const uint64_t chunk_size = store->get_max_chunk_size();
+
+ db_get_obj_data data(store, cb, ofs);
+
+ int r = source->iterate_obj(dpp, source->get_bucket_info(), state.obj,
+ ofs, end, chunk_size, _get_obj_iterate_cb, &data);
+ if (r < 0) {
+ ldpp_dout(dpp, 0) << "iterate_obj() failed with " << r << dendl;
+ return r;
+ }
+
+ return 0;
+}
+
+int DB::Object::iterate_obj(const DoutPrefixProvider *dpp,
+ const RGWBucketInfo& bucket_info, const rgw_obj& obj,
+ off_t ofs, off_t end, uint64_t max_chunk_size,
+ iterate_obj_cb cb, void *arg)
+{
+ DB *store = get_store();
+ uint64_t len;
+ RGWObjState* astate;
+
+ int r = get_state(dpp, &astate, true);
+ if (r < 0) {
+ return r;
+ }
+
+ if (!astate->exists) {
+ return -ENOENT;
+ }
+
+ if (end < 0)
+ len = 0;
+ else
+ len = end - ofs + 1;
+
+ /* XXX: Will it really help to store all parts info in astate like manifest in Rados? */
+ int part_num = 0;
+ int head_data_size = astate->data.length();
+
+ while (ofs <= end && (uint64_t)ofs < astate->size) {
+ part_num = (ofs / max_chunk_size);
+ uint64_t read_len = std::min(len, max_chunk_size);
+
+ /* XXX: Handle multipart_str */
+ raw_obj read_obj(store, get_bucket_info().bucket.name, astate->obj.key.name,
+ astate->obj.key.instance, astate->obj.key.ns, obj_id, "0.0", part_num);
+ bool reading_from_head = (ofs < head_data_size);
+
+ r = cb(dpp, read_obj, ofs, read_len, reading_from_head, astate, arg);
+ if (r <= 0) {
+ return r;
+ }
+ /* r refers to chunk_len (no. of bytes) handled in cb */
+ len -= r;
+ ofs += r;
+ }
+
+ return 0;
+}
+
+int DB::Object::Write::prepare(const DoutPrefixProvider* dpp)
+{
+ DB *store = target->get_store();
+
+ int ret = -1;
+
+ /* XXX: handle assume_noent */
+
+ obj_state.obj = target->obj;
+
+ if (target->obj_id.empty()) {
+ if (!target->obj.key.instance.empty() && (target->obj.key.instance != "null")) {
+ /* versioned object. Set obj_id same as versionID/instance */
+ target->obj_id = target->obj.key.instance;
+ } else {
+ // generate obj_id
+ char buf[33];
+ gen_rand_alphanumeric(store->ctx(), buf, sizeof(buf) - 1);
+ target->obj_id = buf;
+ }
+ }
+
+ ret = 0;
+ return ret;
+}
+
+/* writes tail objects */
+int DB::Object::Write::write_data(const DoutPrefixProvider* dpp,
+ bufferlist& data, uint64_t ofs) {
+ DB *store = target->get_store();
+ /* tail objects */
+ /* XXX: Split into parts each of max_chunk_size. But later make tail
+ * object chunk size limit to sqlite blob limit */
+ int part_num = 0;
+
+ uint64_t max_chunk_size = store->get_max_chunk_size();
+
+ /* tail_obj ofs should be greater than max_head_size */
+ if (mp_part_str == "0.0") { // ensure not multipart meta object
+ if (ofs < store->get_max_head_size()) {
+ return -1;
+ }
+ }
+
+ uint64_t end = data.length();
+ uint64_t write_ofs = 0;
+ /* as we are writing max_chunk_size at a time in sal_dbstore DBAtomicWriter::process(),
+ * maybe this while loop is not needed
+ */
+ while (write_ofs < end) {
+ part_num = (ofs / max_chunk_size);
+ uint64_t len = std::min(end, max_chunk_size);
+
+ /* XXX: Handle multipart_str */
+ raw_obj write_obj(store, target->get_bucket_info().bucket.name, obj_state.obj.key.name,
+ obj_state.obj.key.instance, obj_state.obj.key.ns, target->obj_id, mp_part_str, part_num);
+
+
+ ldpp_dout(dpp, 20) << "dbstore->write obj-ofs=" << ofs << " write_len=" << len << dendl;
+
+ // write into non head object
+ int r = write_obj.write(dpp, ofs, write_ofs, len, data);
+ if (r < 0) {
+ return r;
+ }
+ /* r refers to chunk_len (no. of bytes) handled in raw_obj::write */
+ len -= r;
+ ofs += r;
+ write_ofs += r;
+ }
+
+ return 0;
+}
+
+/* Write metadata & head object data */
+int DB::Object::Write::_do_write_meta(const DoutPrefixProvider *dpp,
+ uint64_t size, uint64_t accounted_size,
+ map<string, bufferlist>& attrs,
+ bool assume_noent, bool modify_tail)
+{
+ DB *store = target->get_store();
+
+ RGWObjState* state = &obj_state;
+ map<string, bufferlist> *attrset;
+ DBOpParams params = {};
+ int ret = 0;
+ string etag;
+ string content_type;
+ bufferlist acl_bl;
+ string storage_class;
+
+ map<string, bufferlist>::iterator iter;
+
+ store->InitializeParams(dpp, &params);
+ target->InitializeParamsfromObject(dpp, &params);
+
+ obj_state = params.op.obj.state;
+
+ if (real_clock::is_zero(meta.set_mtime)) {
+ meta.set_mtime = real_clock::now();
+ }
+
+ attrset = &state->attrset;
+ if (target->bucket_info.obj_lock_enabled() && target->bucket_info.obj_lock.has_rule()) {
+ // && meta.flags == PUT_OBJ_CREATE) {
+ auto iter = attrs.find(RGW_ATTR_OBJECT_RETENTION);
+ if (iter == attrs.end()) {
+ real_time lock_until_date = target->bucket_info.obj_lock.get_lock_until_date(meta.set_mtime);
+ string mode = target->bucket_info.obj_lock.get_mode();
+ RGWObjectRetention obj_retention(mode, lock_until_date);
+ bufferlist bl;
+ obj_retention.encode(bl);
+ (*attrset)[RGW_ATTR_OBJECT_RETENTION] = bl;
+ }
+ }
+
+ state->mtime = meta.set_mtime;
+
+ if (meta.data) {
+ /* if we want to overwrite the data, we also want to overwrite the
+ xattrs, so just remove the object */
+ params.op.obj.head_data = *meta.data;
+ }
+
+ if (meta.rmattrs) {
+ for (iter = meta.rmattrs->begin(); iter != meta.rmattrs->end(); ++iter) {
+ const string& name = iter->first;
+ (*attrset).erase(name.c_str());
+ }
+ }
+
+ if (meta.manifest) {
+ storage_class = meta.manifest->get_tail_placement().placement_rule.storage_class;
+
+ /* remove existing manifest attr */
+ iter = attrs.find(RGW_ATTR_MANIFEST);
+ if (iter != attrs.end())
+ attrs.erase(iter);
+
+ bufferlist bl;
+ encode(*meta.manifest, bl);
+ (*attrset)[RGW_ATTR_MANIFEST] = bl;
+ }
+
+ for (iter = attrs.begin(); iter != attrs.end(); ++iter) {
+ const string& name = iter->first;
+ bufferlist& bl = iter->second;
+
+ if (!bl.length())
+ continue;
+
+ (*attrset)[name.c_str()] = bl;
+
+ if (name.compare(RGW_ATTR_ETAG) == 0) {
+ etag = rgw_bl_str(bl);
+ params.op.obj.etag = etag;
+ } else if (name.compare(RGW_ATTR_CONTENT_TYPE) == 0) {
+ content_type = rgw_bl_str(bl);
+ } else if (name.compare(RGW_ATTR_ACL) == 0) {
+ acl_bl = bl;
+ }
+ }
+
+ if (!storage_class.empty()) {
+ bufferlist bl;
+ bl.append(storage_class);
+ (*attrset)[RGW_ATTR_STORAGE_CLASS] = bl;
+ }
+
+ params.op.obj.state = *state ;
+ params.op.obj.state.exists = true;
+ params.op.obj.state.size = size;
+ params.op.obj.state.accounted_size = accounted_size;
+ params.op.obj.owner = target->get_bucket_info().owner.id;
+ params.op.obj.category = meta.category;
+
+ if (meta.mtime) {
+ *meta.mtime = meta.set_mtime;
+ }
+
+ params.op.query_str = "meta";
+ params.op.obj.obj_id = target->obj_id;
+
+ /* Check if versioned */
+ bool is_versioned = !target->obj.key.instance.empty() && (target->obj.key.instance != "null");
+ params.op.obj.is_versioned = is_versioned;
+
+ if (is_versioned && (params.op.obj.category == RGWObjCategory::Main)) {
+ /* versioned object */
+ params.op.obj.flags |= rgw_bucket_dir_entry::FLAG_VER;
+ }
+ ret = store->ProcessOp(dpp, "PutObject", &params);
+ if (ret) {
+ ldpp_dout(dpp, 0)<<"In PutObject failed err:(" <<ret<<")" << dendl;
+ goto out;
+ }
+
+
+out:
+ if (ret < 0) {
+ ldpp_dout(dpp, 0) << "ERROR: do_write_meta returned ret=" << ret << dendl;
+ }
+
+ meta.canceled = true;
+
+ return ret;
+}
+
+int DB::Object::Write::write_meta(const DoutPrefixProvider *dpp, uint64_t size, uint64_t accounted_size,
+ map<string, bufferlist>& attrs)
+{
+ bool assume_noent = false;
+ /* handle assume_noent */
+ int r = _do_write_meta(dpp, size, accounted_size, attrs, assume_noent, meta.modify_tail);
+ return r;
+}
+
+int DB::Object::Delete::delete_obj(const DoutPrefixProvider *dpp) {
+ int ret = 0;
+ DBOpParams del_params = {};
+ bool versioning_enabled = ((params.versioning_status & BUCKET_VERSIONED) == BUCKET_VERSIONED);
+ bool versioning_suspended = ((params.versioning_status & BUCKET_VERSIONS_SUSPENDED) == BUCKET_VERSIONS_SUSPENDED);
+ bool regular_obj = true;
+ std::string versionid = target->obj.key.instance;
+
+ ret = target->get_object_impl(dpp, del_params);
+
+ if (ret < 0 && ret != -ENOENT) {
+ ldpp_dout(dpp, 0)<<"GetObject during delete failed err:(" <<ret<<")" << dendl;
+ return ret;
+ }
+
+ regular_obj = (del_params.op.obj.category == RGWObjCategory::Main);
+ if (!ret) {
+ if (!versionid.empty()) {
+ // version-id is provided
+ ret = delete_obj_impl(dpp, del_params);
+ return ret;
+ } else { // version-id is empty..
+ /*
+ * case: bucket_versioned
+ * create_delete_marker;
+ * case: bucket_suspended
+ * delete entry
+ * create delete marker with version-id null;
+ * default:
+ * just delete the entry
+ */
+ if (versioning_suspended && regular_obj) {
+ ret = delete_obj_impl(dpp, del_params);
+ ret = create_dm(dpp, del_params);
+ } else if (versioning_enabled && regular_obj) {
+ ret = create_dm(dpp, del_params);
+ } else {
+ ret = delete_obj_impl(dpp, del_params);
+ }
+ }
+ } else { // ret == -ENOENT
+ /* case: VersionID given
+ * return -ENOENT
+ * else: // may or may not be versioned object
+ * Listversionedobjects
+ * if (list_entries.empty()) {
+ * nothing to do..return ENOENT
+ * } else {
+ * read top entry
+ * if (top.flags | FLAG_DELETE_MARKER) {
+ * // nothing to do
+ * return -ENOENT;
+ * }
+ * if (bucket_versioned) {
+ * // create delete marker with new version-id
+ * } else if (bucket_suspended) {
+ * // create delete marker with version-id null
+ * }
+ * bucket cannot be in unversioned state post having versions
+ * }
+ */
+ if (!versionid.empty()) {
+ return -ENOENT;
+ }
+ ret = target->list_versioned_objects(dpp, del_params.op.obj.list_entries);
+ if (ret) {
+ ldpp_dout(dpp, 0)<<"ListVersionedObjects failed err:(" <<ret<<")" << dendl;
+ return ret;
+ }
+ if (del_params.op.obj.list_entries.empty()) {
+ return -ENOENT;
+ }
+ auto &ent = del_params.op.obj.list_entries.front();
+ if (ent.flags & rgw_bucket_dir_entry::FLAG_DELETE_MARKER) {
+ // for now do not create another delete marker..just exit
+ return 0;
+ }
+ ret = create_dm(dpp, del_params);
+ }
+ return ret;
+}
+
+int DB::Object::Delete::delete_obj_impl(const DoutPrefixProvider *dpp,
+ DBOpParams& del_params) {
+ int ret = 0;
+ DB *store = target->get_store();
+
+ ret = store->ProcessOp(dpp, "DeleteObject", &del_params);
+ if (ret) {
+ ldpp_dout(dpp, 0) << "In DeleteObject failed err:(" <<ret<<")" << dendl;
+ return ret;
+ }
+
+ /* Now that tail objects are associated with objectID, they are not deleted
+ * as part of this DeleteObj operation. Such tail objects (with no head object
+ * in *.object.table are cleaned up later by GC thread.
+ *
+ * To avoid races between writes/reads & GC delete, mtime is maintained for each
+ * tail object. This mtime is updated when tail object is written and also when
+ * its corresponding head object is deleted (like here in this case).
+ */
+ DBOpParams update_params = del_params;
+ update_params.op.obj.state.mtime = real_clock::now();
+ ret = store->ProcessOp(dpp, "UpdateObjectData", &update_params);
+
+ if (ret) {
+ ldpp_dout(dpp, 0) << "Updating tail objects mtime failed err:(" <<ret<<")" << dendl;
+ }
+ return ret;
+}
+
+/*
+ * a) if no versionID specified,
+ * - create a delete marker with
+ * - new version/instanceID (if bucket versioned)
+ * - null versionID (if versioning suspended)
+ */
+int DB::Object::Delete::create_dm(const DoutPrefixProvider *dpp,
+ DBOpParams& del_params) {
+
+ DB *store = target->get_store();
+ bool versioning_suspended = ((params.versioning_status & BUCKET_VERSIONS_SUSPENDED) == BUCKET_VERSIONS_SUSPENDED);
+ int ret = -1;
+ DBOpParams olh_params = {};
+ std::string version_id;
+ DBOpParams next_params = del_params;
+
+ version_id = del_params.op.obj.state.obj.key.instance;
+
+ DBOpParams dm_params = del_params;
+
+ // create delete marker
+
+ store->InitializeParams(dpp, &dm_params);
+ target->InitializeParamsfromObject(dpp, &dm_params);
+ dm_params.op.obj.category = RGWObjCategory::None;
+
+ if (versioning_suspended) {
+ dm_params.op.obj.state.obj.key.instance = "null";
+ } else {
+ store->gen_rand_obj_instance_name(&dm_params.op.obj.state.obj.key);
+ dm_params.op.obj.obj_id = dm_params.op.obj.state.obj.key.instance;
+ }
+
+ dm_params.op.obj.flags |= (rgw_bucket_dir_entry::FLAG_DELETE_MARKER);
+
+ ret = store->ProcessOp(dpp, "PutObject", &dm_params);
+
+ if (ret) {
+ ldpp_dout(dpp, 0) << "delete_olh: failed to create delete marker - err:(" <<ret<<")" << dendl;
+ return ret;
+ }
+ result.delete_marker = true;
+ result.version_id = dm_params.op.obj.state.obj.key.instance;
+ return ret;
+}
+
+int DB::get_entry(const std::string& oid, const std::string& marker,
+ std::unique_ptr<rgw::sal::Lifecycle::LCEntry>* entry)
+{
+ int ret = 0;
+ const DoutPrefixProvider *dpp = get_def_dpp();
+
+ DBOpParams params = {};
+ InitializeParams(dpp, &params);
+
+ params.op.lc_entry.index = oid;
+ params.op.lc_entry.entry.set_bucket(marker);
+
+ params.op.query_str = "get_entry";
+ ret = ProcessOp(dpp, "GetLCEntry", &params);
+
+ if (ret) {
+ ldpp_dout(dpp, 0)<<"In GetLCEntry failed err:(" <<ret<<") " << dendl;
+ goto out;
+ }
+
+ if (!params.op.lc_entry.entry.get_start_time() == 0) { //ensure entry found
+ rgw::sal::Lifecycle::LCEntry* e;
+ e = new rgw::sal::StoreLifecycle::StoreLCEntry(params.op.lc_entry.entry);
+ if (!e) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ entry->reset(e);
+ }
+
+out:
+ return ret;
+}
+
+int DB::get_next_entry(const std::string& oid, const std::string& marker,
+ std::unique_ptr<rgw::sal::Lifecycle::LCEntry>* entry)
+{
+ int ret = 0;
+ const DoutPrefixProvider *dpp = get_def_dpp();
+
+ DBOpParams params = {};
+ InitializeParams(dpp, &params);
+
+ params.op.lc_entry.index = oid;
+ params.op.lc_entry.entry.set_bucket(marker);
+
+ params.op.query_str = "get_next_entry";
+ ret = ProcessOp(dpp, "GetLCEntry", &params);
+
+ if (ret) {
+ ldpp_dout(dpp, 0)<<"In GetLCEntry failed err:(" <<ret<<") " << dendl;
+ goto out;
+ }
+
+ if (!params.op.lc_entry.entry.get_start_time() == 0) { //ensure entry found
+ rgw::sal::Lifecycle::LCEntry* e;
+ e = new rgw::sal::StoreLifecycle::StoreLCEntry(params.op.lc_entry.entry);
+ if (!e) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ entry->reset(e);
+ }
+
+out:
+ return ret;
+}
+
+int DB::set_entry(const std::string& oid, rgw::sal::Lifecycle::LCEntry& entry)
+{
+ int ret = 0;
+ const DoutPrefixProvider *dpp = get_def_dpp();
+
+ DBOpParams params = {};
+ InitializeParams(dpp, &params);
+
+ params.op.lc_entry.index = oid;
+ params.op.lc_entry.entry = entry;
+
+ ret = ProcessOp(dpp, "InsertLCEntry", &params);
+
+ if (ret) {
+ ldpp_dout(dpp, 0)<<"In InsertLCEntry failed err:(" <<ret<<") " << dendl;
+ goto out;
+ }
+
+out:
+ return ret;
+}
+
+int DB::list_entries(const std::string& oid, const std::string& marker,
+ uint32_t max_entries, std::vector<std::unique_ptr<rgw::sal::Lifecycle::LCEntry>>& entries)
+{
+ int ret = 0;
+ const DoutPrefixProvider *dpp = get_def_dpp();
+
+ entries.clear();
+
+ DBOpParams params = {};
+ InitializeParams(dpp, &params);
+
+ params.op.lc_entry.index = oid;
+ params.op.lc_entry.min_marker = marker;
+ params.op.list_max_count = max_entries;
+
+ ret = ProcessOp(dpp, "ListLCEntries", &params);
+
+ if (ret) {
+ ldpp_dout(dpp, 0)<<"In ListLCEntries failed err:(" <<ret<<") " << dendl;
+ goto out;
+ }
+
+ for (auto& entry : params.op.lc_entry.list_entries) {
+ entries.push_back(std::make_unique<rgw::sal::StoreLifecycle::StoreLCEntry>(std::move(entry)));
+ }
+
+out:
+ return ret;
+}
+
+int DB::rm_entry(const std::string& oid, rgw::sal::Lifecycle::LCEntry& entry)
+{
+ int ret = 0;
+ const DoutPrefixProvider *dpp = get_def_dpp();
+
+ DBOpParams params = {};
+ InitializeParams(dpp, &params);
+
+ params.op.lc_entry.index = oid;
+ params.op.lc_entry.entry = entry;
+
+ ret = ProcessOp(dpp, "RemoveLCEntry", &params);
+
+ if (ret) {
+ ldpp_dout(dpp, 0)<<"In RemoveLCEntry failed err:(" <<ret<<") " << dendl;
+ goto out;
+ }
+
+out:
+ return ret;
+}
+
+int DB::get_head(const std::string& oid, std::unique_ptr<rgw::sal::Lifecycle::LCHead>* head)
+{
+ int ret = 0;
+ const DoutPrefixProvider *dpp = get_def_dpp();
+
+ DBOpParams params = {};
+ InitializeParams(dpp, &params);
+
+ params.op.lc_head.index = oid;
+
+ ret = ProcessOp(dpp, "GetLCHead", &params);
+
+ if (ret) {
+ ldpp_dout(dpp, 0)<<"In GetLCHead failed err:(" <<ret<<") " << dendl;
+ goto out;
+ }
+
+ *head = std::make_unique<rgw::sal::StoreLifecycle::StoreLCHead>(params.op.lc_head.head);
+
+out:
+ return ret;
+}
+
+int DB::put_head(const std::string& oid, rgw::sal::Lifecycle::LCHead& head)
+{
+ int ret = 0;
+ const DoutPrefixProvider *dpp = get_def_dpp();
+
+ DBOpParams params = {};
+ InitializeParams(dpp, &params);
+
+ params.op.lc_head.index = oid;
+ params.op.lc_head.head = head;
+
+ ret = ProcessOp(dpp, "InsertLCHead", &params);
+
+ if (ret) {
+ ldpp_dout(dpp, 0)<<"In InsertLCHead failed err:(" <<ret<<") " << dendl;
+ goto out;
+ }
+
+out:
+ return ret;
+}
+
+int DB::delete_stale_objs(const DoutPrefixProvider *dpp, const std::string& bucket,
+ uint32_t min_wait) {
+ DBOpParams params = {};
+ int ret = -1;
+
+ params.op.bucket.info.bucket.name = bucket;
+ /* Verify if bucket exists.
+ * XXX: This is needed for now to create objectmap of bucket
+ * in SQLGetBucket
+ */
+ InitializeParams(dpp, &params);
+ ret = ProcessOp(dpp, "GetBucket", &params);
+ if (ret) {
+ ldpp_dout(dpp, 0) << "In GetBucket failed err:(" <<ret<<")" << dendl;
+ return ret;
+ }
+
+ ldpp_dout(dpp, 20) << " Deleting stale_objs of bucket( " << bucket <<")" << dendl;
+ /* XXX: handle reads racing with delete here. Simple approach is maybe
+ * to use locks or sqlite transactions.
+ */
+ InitializeParams(dpp, &params);
+ params.op.obj.state.mtime = (real_clock::now() - make_timespan(min_wait));
+ ret = ProcessOp(dpp, "DeleteStaleObjectData", &params);
+ if (ret) {
+ ldpp_dout(dpp, 0) << "In DeleteStaleObjectData failed err:(" <<ret<<")" << dendl;
+ }
+
+ return ret;
+}
+
+void *DB::GC::entry() {
+ do {
+ std::unique_lock<std::mutex> lk(mtx);
+
+ ldpp_dout(dpp, 2) << " DB GC started " << dendl;
+ int max = 100;
+ RGWUserBuckets buckets;
+ bool is_truncated = false;
+
+ do {
+ std::string& marker = bucket_marker;
+ rgw_user user;
+ user.id = user_marker;
+ buckets.clear();
+ is_truncated = false;
+
+ int r = db->list_buckets(dpp, "all", user, marker, string(),
+ max, false, &buckets, &is_truncated);
+
+ if (r < 0) { //do nothing? retry later ?
+ break;
+ }
+
+ for (const auto& ent : buckets.get_buckets()) {
+ const std::string &bname = ent.first;
+
+ r = db->delete_stale_objs(dpp, bname, gc_obj_min_wait);
+
+ if (r < 0) { //do nothing? skip to next entry?
+ ldpp_dout(dpp, 2) << " delete_stale_objs failed for bucket( " << bname <<")" << dendl;
+ }
+ bucket_marker = bname;
+ user_marker = user.id;
+
+ /* XXX: If using locks, unlock here and reacquire in the next iteration */
+ cv.wait_for(lk, std::chrono::milliseconds(100));
+ if (stop_signalled) {
+ goto done;
+ }
+ }
+ } while(is_truncated);
+
+ bucket_marker.clear();
+ cv.wait_for(lk, std::chrono::milliseconds(gc_interval*10));
+ } while(! stop_signalled);
+
+done:
+ return nullptr;
+}
+
+} } // namespace rgw::store
+
diff --git a/src/rgw/driver/dbstore/common/dbstore.h b/src/rgw/driver/dbstore/common/dbstore.h
new file mode 100644
index 000000000..b26cc116e
--- /dev/null
+++ b/src/rgw/driver/dbstore/common/dbstore.h
@@ -0,0 +1,2016 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#pragma once
+
+#include <errno.h>
+#include <stdlib.h>
+#include <string>
+#include <stdio.h>
+#include <iostream>
+#include <mutex>
+#include <condition_variable>
+#include "fmt/format.h"
+#include <map>
+#include "rgw_sal_store.h"
+#include "rgw_common.h"
+#include "driver/rados/rgw_bucket.h"
+#include "global/global_context.h"
+#include "global/global_init.h"
+#include "common/ceph_context.h"
+#include "rgw_obj_manifest.h"
+#include "rgw_multi.h"
+
+namespace rgw { namespace store {
+
+class DB;
+
+struct DBOpUserInfo {
+ RGWUserInfo uinfo = {};
+ obj_version user_version;
+ rgw::sal::Attrs user_attrs;
+};
+
+struct DBOpBucketInfo {
+ RGWBucketEnt ent; // maybe not needed. not used in create/get_bucket
+ RGWBucketInfo info;
+ RGWUser* owner = nullptr;
+ rgw::sal::Attrs bucket_attrs;
+ obj_version bucket_version;
+ ceph::real_time mtime;
+ // used for list query
+ std::string min_marker;
+ std::string max_marker;
+ std::list<RGWBucketEnt> list_entries;
+};
+
+struct DBOpObjectInfo {
+ RGWAccessControlPolicy acls;
+ RGWObjState state = {};
+
+ /* Below are taken from rgw_bucket_dir_entry */
+ RGWObjCategory category;
+ std::string etag;
+ std::string owner;
+ std::string owner_display_name;
+ std::string content_type;
+ std::string storage_class;
+ bool appendable;
+ uint64_t index_ver;
+ std::string tag;
+ uint16_t flags;
+ uint64_t versioned_epoch;
+
+ /* from state.manifest (RGWObjManifest) */
+ std::map<uint64_t, RGWObjManifestPart> objs;
+ uint64_t head_size{0};
+ rgw_placement_rule head_placement_rule;
+ uint64_t max_head_size{0};
+ std::string obj_id;
+ rgw_bucket_placement tail_placement; /* might be different than the original bucket,
+ as object might have been copied across pools */
+ std::map<uint64_t, RGWObjManifestRule> rules;
+ std::string tail_instance; /* tail object's instance */
+
+
+ /* Obj's omap <key,value> store */
+ std::map<std::string, bufferlist> omap;
+
+ /* Extra fields */
+ bool is_multipart;
+ std::list<RGWUploadPartInfo> mp_parts;
+
+ bufferlist head_data;
+ std::string min_marker;
+ std::string max_marker;
+ std::string prefix;
+ std::list<rgw_bucket_dir_entry> list_entries;
+ /* XXX: Maybe use std::vector instead of std::list */
+
+ /* for versioned objects */
+ bool is_versioned;
+ uint64_t version_num = 0;
+};
+
+struct DBOpObjectDataInfo {
+ RGWObjState state;
+ uint64_t part_num;
+ std::string multipart_part_str;
+ uint64_t offset;
+ uint64_t size;
+ bufferlist data{};
+};
+
+struct DBOpLCHeadInfo {
+ std::string index;
+ rgw::sal::StoreLifecycle::StoreLCHead head;
+};
+
+struct DBOpLCEntryInfo {
+ std::string index;
+ rgw::sal::StoreLifecycle::StoreLCEntry entry;
+ // used for list query
+ std::string min_marker;
+ std::list<rgw::sal::StoreLifecycle::StoreLCEntry> list_entries;
+};
+
+struct DBOpInfo {
+ std::string name; // Op name
+ /* Support only single access_key for now. So store
+ * it separately as primary access_key_id & secret to
+ * be able to query easily.
+ *
+ * XXX: Swift keys and subuser not supported for now */
+ DBOpUserInfo user;
+ std::string query_str;
+ DBOpBucketInfo bucket;
+ DBOpObjectInfo obj;
+ DBOpObjectDataInfo obj_data;
+ DBOpLCHeadInfo lc_head;
+ DBOpLCEntryInfo lc_entry;
+ uint64_t list_max_count;
+};
+
+struct DBOpParams {
+ CephContext *cct;
+
+ /* Tables */
+ std::string user_table;
+ std::string bucket_table;
+ std::string object_table;
+
+ /* Ops*/
+ DBOpInfo op;
+
+ std::string objectdata_table;
+ std::string object_trigger;
+ std::string object_view;
+ std::string quota_table;
+ std::string lc_head_table;
+ std::string lc_entry_table;
+ std::string obj;
+};
+
+/* Used for prepared schemas.
+ * Difference with above structure is that all
+ * the fields are strings here to accommodate any
+ * style identifiers used by backend db. By default
+ * initialized with sqlitedb style, can be overriden
+ * using InitPrepareParams()
+ *
+ * These identifiers are used in prepare and bind statements
+ * to get the right index of each param.
+ */
+struct DBOpUserPrepareInfo {
+ static constexpr const char* user_id = ":user_id";
+ static constexpr const char* tenant = ":tenant";
+ static constexpr const char* ns = ":ns";
+ static constexpr const char* display_name = ":display_name";
+ static constexpr const char* user_email = ":user_email";
+ /* Support only single access_key for now. So store
+ * it separately as primary access_key_id & secret to
+ * be able to query easily.
+ *
+ * In future, when need to support & query from multiple
+ * access keys, better to maintain them in a separate table.
+ */
+ static constexpr const char* access_keys_id = ":access_keys_id";
+ static constexpr const char* access_keys_secret = ":access_keys_secret";
+ static constexpr const char* access_keys = ":access_keys";
+ static constexpr const char* swift_keys = ":swift_keys";
+ static constexpr const char* subusers = ":subusers";
+ static constexpr const char* suspended = ":suspended";
+ static constexpr const char* max_buckets = ":max_buckets";
+ static constexpr const char* op_mask = ":op_mask";
+ static constexpr const char* user_caps = ":user_caps";
+ static constexpr const char* admin = ":admin";
+ static constexpr const char* system = ":system";
+ static constexpr const char* placement_name = ":placement_name";
+ static constexpr const char* placement_storage_class = ":placement_storage_class";
+ static constexpr const char* placement_tags = ":placement_tags";
+ static constexpr const char* bucket_quota = ":bucket_quota";
+ static constexpr const char* temp_url_keys = ":temp_url_keys";
+ static constexpr const char* user_quota = ":user_quota";
+ static constexpr const char* type = ":type";
+ static constexpr const char* mfa_ids = ":mfa_ids";
+ static constexpr const char* user_attrs = ":user_attrs";
+ static constexpr const char* user_ver = ":user_vers";
+ static constexpr const char* user_ver_tag = ":user_ver_tag";
+};
+
+struct DBOpBucketPrepareInfo {
+ static constexpr const char* bucket_name = ":bucket_name";
+ static constexpr const char* tenant = ":tenant";
+ static constexpr const char* marker = ":marker";
+ static constexpr const char* bucket_id = ":bucket_id";
+ static constexpr const char* size = ":size";
+ static constexpr const char* size_rounded = ":size_rounded";
+ static constexpr const char* creation_time = ":creation_time";
+ static constexpr const char* count = ":count";
+ static constexpr const char* placement_name = ":placement_name";
+ static constexpr const char* placement_storage_class = ":placement_storage_class";
+ /* ownerid - maps to DBOpUserPrepareInfo */
+ static constexpr const char* flags = ":flags";
+ static constexpr const char* zonegroup = ":zonegroup";
+ static constexpr const char* has_instance_obj = ":has_instance_obj";
+ static constexpr const char* quota = ":quota";
+ static constexpr const char* requester_pays = ":requester_pays";
+ static constexpr const char* has_website = ":has_website";
+ static constexpr const char* website_conf = ":website_conf";
+ static constexpr const char* swift_versioning = ":swift_versioning";
+ static constexpr const char* swift_ver_location = ":swift_ver_location";
+ static constexpr const char* mdsearch_config = ":mdsearch_config";
+ static constexpr const char* new_bucket_instance_id = ":new_bucket_instance_id";
+ static constexpr const char* obj_lock = ":obj_lock";
+ static constexpr const char* sync_policy_info_groups = ":sync_policy_info_groups";
+ static constexpr const char* bucket_attrs = ":bucket_attrs";
+ static constexpr const char* bucket_ver = ":bucket_vers";
+ static constexpr const char* bucket_ver_tag = ":bucket_ver_tag";
+ static constexpr const char* mtime = ":mtime";
+ static constexpr const char* min_marker = ":min_marker";
+ static constexpr const char* max_marker = ":max_marker";
+};
+
+struct DBOpObjectPrepareInfo {
+ static constexpr const char* obj_name = ":obj_name";
+ static constexpr const char* obj_instance = ":obj_instance";
+ static constexpr const char* obj_ns = ":obj_ns";
+ static constexpr const char* acls = ":acls";
+ static constexpr const char* index_ver = ":index_ver";
+ static constexpr const char* tag = ":tag";
+ static constexpr const char* flags = ":flags";
+ static constexpr const char* versioned_epoch = ":versioned_epoch";
+ static constexpr const char* obj_category = ":obj_category";
+ static constexpr const char* etag = ":etag";
+ static constexpr const char* owner = ":owner";
+ static constexpr const char* owner_display_name = ":owner_display_name";
+ static constexpr const char* storage_class = ":storage_class";
+ static constexpr const char* appendable = ":appendable";
+ static constexpr const char* content_type = ":content_type";
+ static constexpr const char* index_hash_source = ":index_hash_source";
+ static constexpr const char* obj_size = ":obj_size";
+ static constexpr const char* accounted_size = ":accounted_size";
+ static constexpr const char* mtime = ":mtime";
+ static constexpr const char* epoch = ":epoch";
+ static constexpr const char* obj_tag = ":obj_tag";
+ static constexpr const char* tail_tag = ":tail_tag";
+ static constexpr const char* write_tag = ":write_tag";
+ static constexpr const char* fake_tag = ":fake_tag";
+ static constexpr const char* shadow_obj = ":shadow_obj";
+ static constexpr const char* has_data = ":has_data";
+ static constexpr const char* is_versioned = ":is_versioned";
+ static constexpr const char* version_num = ":version_num";
+ static constexpr const char* pg_ver = ":pg_ver";
+ static constexpr const char* zone_short_id = ":zone_short_id";
+ static constexpr const char* obj_version = ":obj_version";
+ static constexpr const char* obj_version_tag = ":obj_version_tag";
+ static constexpr const char* obj_attrs = ":obj_attrs";
+ static constexpr const char* head_size = ":head_size";
+ static constexpr const char* max_head_size = ":max_head_size";
+ static constexpr const char* obj_id = ":obj_id";
+ static constexpr const char* tail_instance = ":tail_instance";
+ static constexpr const char* head_placement_rule_name = ":head_placement_rule_name";
+ static constexpr const char* head_placement_storage_class = ":head_placement_storage_class";
+ static constexpr const char* tail_placement_rule_name = ":tail_placement_rule_name";
+ static constexpr const char* tail_placement_storage_class = ":tail_placement_storage_class";
+ static constexpr const char* manifest_part_objs = ":manifest_part_objs";
+ static constexpr const char* manifest_part_rules = ":manifest_part_rules";
+ static constexpr const char* omap = ":omap";
+ static constexpr const char* is_multipart = ":is_multipart";
+ static constexpr const char* mp_parts = ":mp_parts";
+ static constexpr const char* head_data = ":head_data";
+ static constexpr const char* min_marker = ":min_marker";
+ static constexpr const char* max_marker = ":max_marker";
+ static constexpr const char* prefix = ":prefix";
+ /* Below used to update mp_parts obj name
+ * from meta object to src object on completion */
+ static constexpr const char* new_obj_name = ":new_obj_name";
+ static constexpr const char* new_obj_instance = ":new_obj_instance";
+ static constexpr const char* new_obj_ns = ":new_obj_ns";
+};
+
+struct DBOpObjectDataPrepareInfo {
+ static constexpr const char* part_num = ":part_num";
+ static constexpr const char* offset = ":offset";
+ static constexpr const char* data = ":data";
+ static constexpr const char* size = ":size";
+ static constexpr const char* multipart_part_str = ":multipart_part_str";
+};
+
+struct DBOpLCEntryPrepareInfo {
+ static constexpr const char* index = ":index";
+ static constexpr const char* bucket_name = ":bucket_name";
+ static constexpr const char* start_time = ":start_time";
+ static constexpr const char* status = ":status";
+ static constexpr const char* min_marker = ":min_marker";
+};
+
+struct DBOpLCHeadPrepareInfo {
+ static constexpr const char* index = ":index";
+ static constexpr const char* start_date = ":start_date";
+ static constexpr const char* marker = ":marker";
+};
+
+struct DBOpPrepareInfo {
+ DBOpUserPrepareInfo user;
+ std::string_view query_str; // view into DBOpInfo::query_str
+ DBOpBucketPrepareInfo bucket;
+ DBOpObjectPrepareInfo obj;
+ DBOpObjectDataPrepareInfo obj_data;
+ DBOpLCHeadPrepareInfo lc_head;
+ DBOpLCEntryPrepareInfo lc_entry;
+ static constexpr const char* list_max_count = ":list_max_count";
+};
+
+struct DBOpPrepareParams {
+ /* Tables */
+ std::string user_table;
+ std::string bucket_table;
+ std::string object_table;
+
+ /* Ops */
+ DBOpPrepareInfo op;
+
+
+ std::string objectdata_table;
+ std::string object_trigger;
+ std::string object_view;
+ std::string quota_table;
+ std::string lc_head_table;
+ std::string lc_entry_table;
+};
+
+struct DBOps {
+ std::shared_ptr<class InsertUserOp> InsertUser;
+ std::shared_ptr<class RemoveUserOp> RemoveUser;
+ std::shared_ptr<class GetUserOp> GetUser;
+ std::shared_ptr<class InsertBucketOp> InsertBucket;
+ std::shared_ptr<class UpdateBucketOp> UpdateBucket;
+ std::shared_ptr<class RemoveBucketOp> RemoveBucket;
+ std::shared_ptr<class GetBucketOp> GetBucket;
+ std::shared_ptr<class ListUserBucketsOp> ListUserBuckets;
+ std::shared_ptr<class InsertLCEntryOp> InsertLCEntry;
+ std::shared_ptr<class RemoveLCEntryOp> RemoveLCEntry;
+ std::shared_ptr<class GetLCEntryOp> GetLCEntry;
+ std::shared_ptr<class ListLCEntriesOp> ListLCEntries;
+ std::shared_ptr<class InsertLCHeadOp> InsertLCHead;
+ std::shared_ptr<class RemoveLCHeadOp> RemoveLCHead;
+ std::shared_ptr<class GetLCHeadOp> GetLCHead;
+};
+
+class ObjectOp {
+ public:
+ ObjectOp() {};
+
+ virtual ~ObjectOp() {}
+
+ std::shared_ptr<class PutObjectOp> PutObject;
+ std::shared_ptr<class DeleteObjectOp> DeleteObject;
+ std::shared_ptr<class GetObjectOp> GetObject;
+ std::shared_ptr<class UpdateObjectOp> UpdateObject;
+ std::shared_ptr<class ListBucketObjectsOp> ListBucketObjects;
+ std::shared_ptr<class ListVersionedObjectsOp> ListVersionedObjects;
+ std::shared_ptr<class PutObjectDataOp> PutObjectData;
+ std::shared_ptr<class UpdateObjectDataOp> UpdateObjectData;
+ std::shared_ptr<class GetObjectDataOp> GetObjectData;
+ std::shared_ptr<class DeleteObjectDataOp> DeleteObjectData;
+ std::shared_ptr<class DeleteStaleObjectDataOp> DeleteStaleObjectData;
+
+ virtual int InitializeObjectOps(std::string db_name, const DoutPrefixProvider *dpp) { return 0; }
+};
+
+class DBOp {
+ private:
+ static constexpr std::string_view CreateUserTableQ =
+ /* Corresponds to rgw::sal::User
+ *
+ * For now only UserID is made Primary key.
+ * If multiple tenants are stored in single .db handle, should
+ * make both (UserID, Tenant) as Primary Key.
+ *
+ * XXX:
+ * - AccessKeys, SwiftKeys, Subusers (map<>) are stored as blob.
+ * To enable easy query, first accesskey is stored in separate fields
+ * AccessKeysID, AccessKeysSecret.
+ * In future, may be have separate table to store these keys and
+ * query on that table.
+ * - Quota stored as blob .. should be linked to quota table.
+ */
+ "CREATE TABLE IF NOT EXISTS '{}' ( \
+ UserID TEXT NOT NULL UNIQUE, \
+ Tenant TEXT , \
+ NS TEXT , \
+ DisplayName TEXT , \
+ UserEmail TEXT , \
+ AccessKeysID TEXT , \
+ AccessKeysSecret TEXT , \
+ AccessKeys BLOB , \
+ SwiftKeys BLOB , \
+ SubUsers BLOB , \
+ Suspended INTEGER , \
+ MaxBuckets INTEGER , \
+ OpMask INTEGER , \
+ UserCaps BLOB , \
+ Admin INTEGER , \
+ System INTEGER , \
+ PlacementName TEXT , \
+ PlacementStorageClass TEXT , \
+ PlacementTags BLOB , \
+ BucketQuota BLOB , \
+ TempURLKeys BLOB , \
+ UserQuota BLOB , \
+ TYPE INTEGER , \
+ MfaIDs BLOB , \
+ AssumedRoleARN TEXT , \
+ UserAttrs BLOB, \
+ UserVersion INTEGER, \
+ UserVersionTag TEXT, \
+ PRIMARY KEY (UserID) \n);";
+
+ static constexpr std::string_view CreateBucketTableQ =
+ /* Corresponds to rgw::sal::Bucket
+ *
+ * For now only BucketName is made Primary key. Since buckets should
+ * be unique across users in rgw, OwnerID is not made part of primary key.
+ * However it is still referenced as foreign key
+ *
+ * If multiple tenants are stored in single .db handle, should
+ * make both (BucketName, Tenant) as Primary Key. Also should
+ * reference (UserID, Tenant) as Foreign key.
+ *
+ * leaving below RADOS specific fields
+ * - rgw_data_placement_target explicit_placement (struct rgw_bucket)
+ * - rgw::BucketLayout layout (struct RGWBucketInfo)
+ * - const static uint32_t NUM_SHARDS_BLIND_BUCKET (struct RGWBucketInfo),
+ * should be '0' indicating no sharding.
+ * - cls_rgw_reshard_status reshard_status (struct RGWBucketInfo)
+ *
+ * XXX:
+ * - Quota stored as blob .. should be linked to quota table.
+ * - WebsiteConf stored as BLOB..if required, should be split
+ * - Storing bucket_version (struct RGWBucket), objv_tracker
+ * (struct RGWBucketInfo) separately. Are they same?
+ *
+ */
+ "CREATE TABLE IF NOT EXISTS '{}' ( \
+ BucketName TEXT NOT NULL UNIQUE , \
+ Tenant TEXT, \
+ Marker TEXT, \
+ BucketID TEXT, \
+ Size INTEGER, \
+ SizeRounded INTEGER,\
+ CreationTime BLOB, \
+ Count INTEGER, \
+ PlacementName TEXT , \
+ PlacementStorageClass TEXT , \
+ OwnerID TEXT NOT NULL, \
+ Flags INTEGER, \
+ Zonegroup TEXT, \
+ HasInstanceObj BOOLEAN, \
+ Quota BLOB, \
+ RequesterPays BOOLEAN, \
+ HasWebsite BOOLEAN, \
+ WebsiteConf BLOB, \
+ SwiftVersioning BOOLEAN, \
+ SwiftVerLocation TEXT, \
+ MdsearchConfig BLOB, \
+ NewBucketInstanceID TEXT,\
+ ObjectLock BLOB, \
+ SyncPolicyInfoGroups BLOB, \
+ BucketAttrs BLOB, \
+ BucketVersion INTEGER, \
+ BucketVersionTag TEXT, \
+ Mtime BLOB, \
+ PRIMARY KEY (BucketName) \
+ FOREIGN KEY (OwnerID) \
+ REFERENCES '{}' (UserID) ON DELETE CASCADE ON UPDATE CASCADE \n);";
+
+ static constexpr std::string_view CreateObjectTableTriggerQ =
+ "CREATE TRIGGER IF NOT EXISTS '{}' \
+ AFTER INSERT ON '{}' \
+ BEGIN \
+ UPDATE '{}' \
+ SET VersionNum = (SELECT COALESCE(max(VersionNum), 0) from '{}' where ObjName = new.ObjName) + 1 \
+ where ObjName = new.ObjName and ObjInstance = new.ObjInstance; \
+ END;";
+
+ static constexpr std::string_view CreateObjectTableQ =
+ /* Corresponds to rgw::sal::Object
+ *
+ * For now only BucketName, ObjName is made Primary key.
+ * If multiple tenants are stored in single .db handle, should
+ * include Tenant too in the Primary Key. Also should
+ * reference (BucketID, Tenant) as Foreign key.
+ *
+ * referring to
+ * - rgw_bucket_dir_entry - following are added for now
+ * flags,
+ * versioned_epoch
+ * tag
+ * index_ver
+ * meta.category
+ * meta.etag
+ * meta.storageclass
+ * meta.appendable
+ * meta.content_type
+ * meta.owner
+ * meta.owner_display_name
+ *
+ * - RGWObjState. Below are omitted from that struct
+ * as they seem in-memory variables
+ * * is_atomic, has_atts, exists, prefetch_data, keep_tail,
+ * - RGWObjManifest
+ *
+ * Extra field added "IsMultipart" to flag multipart uploads,
+ * HeadData to store first chunk data.
+ */
+ "CREATE TABLE IF NOT EXISTS '{}' ( \
+ ObjName TEXT NOT NULL , \
+ ObjInstance TEXT, \
+ ObjNS TEXT, \
+ BucketName TEXT NOT NULL , \
+ ACLs BLOB, \
+ IndexVer INTEGER, \
+ Tag TEXT, \
+ Flags INTEGER, \
+ VersionedEpoch INTEGER, \
+ ObjCategory INTEGER, \
+ Etag TEXT, \
+ Owner TEXT, \
+ OwnerDisplayName TEXT, \
+ StorageClass TEXT, \
+ Appendable BOOL, \
+ ContentType TEXT, \
+ IndexHashSource TEXT, \
+ ObjSize INTEGER, \
+ AccountedSize INTEGER, \
+ Mtime BLOB, \
+ Epoch INTEGER, \
+ ObjTag BLOB, \
+ TailTag BLOB, \
+ WriteTag TEXT, \
+ FakeTag BOOL, \
+ ShadowObj TEXT, \
+ HasData BOOL, \
+ IsVersioned BOOL, \
+ VersionNum INTEGER, \
+ PGVer INTEGER, \
+ ZoneShortID INTEGER, \
+ ObjVersion INTEGER, \
+ ObjVersionTag TEXT, \
+ ObjAttrs BLOB, \
+ HeadSize INTEGER, \
+ MaxHeadSize INTEGER, \
+ ObjID TEXT NOT NULL, \
+ TailInstance TEXT, \
+ HeadPlacementRuleName TEXT, \
+ HeadPlacementRuleStorageClass TEXT, \
+ TailPlacementRuleName TEXT, \
+ TailPlacementStorageClass TEXT, \
+ ManifestPartObjs BLOB, \
+ ManifestPartRules BLOB, \
+ Omap BLOB, \
+ IsMultipart BOOL, \
+ MPPartsList BLOB, \
+ HeadData BLOB, \
+ PRIMARY KEY (ObjName, ObjInstance, BucketName), \
+ FOREIGN KEY (BucketName) \
+ REFERENCES '{}' (BucketName) ON DELETE CASCADE ON UPDATE CASCADE \n);";
+
+ static constexpr std::string_view CreateObjectDataTableQ =
+ /* Extra field 'MultipartPartStr' added which signifies multipart
+ * <uploadid + partnum>. For regular object, it is '0.0'
+ *
+ * - part: a collection of stripes that make a contiguous part of an
+ object. A regular object will only have one part (although might have
+ many stripes), a multipart object might have many parts. Each part
+ has a fixed stripe size (ObjChunkSize), although the last stripe of a
+ part might be smaller than that.
+ */
+ "CREATE TABLE IF NOT EXISTS '{}' ( \
+ ObjName TEXT NOT NULL , \
+ ObjInstance TEXT, \
+ ObjNS TEXT, \
+ BucketName TEXT NOT NULL , \
+ ObjID TEXT NOT NULL , \
+ MultipartPartStr TEXT, \
+ PartNum INTEGER NOT NULL, \
+ Offset INTEGER, \
+ Size INTEGER, \
+ Mtime BLOB, \
+ Data BLOB, \
+ PRIMARY KEY (ObjName, BucketName, ObjInstance, ObjID, MultipartPartStr, PartNum), \
+ FOREIGN KEY (BucketName) \
+ REFERENCES '{}' (BucketName) ON DELETE CASCADE ON UPDATE CASCADE \n);";
+
+ static constexpr std::string_view CreateObjectViewQ =
+ /* This query creats temporary view with entries from ObjectData table which have
+ * corresponding head object (i.e, with same ObjName, ObjInstance, ObjNS, ObjID)
+ * in the Object table.
+ *
+ * GC thread can use this view to delete stale entries from the ObjectData table which
+ * do not exist in this view.
+ *
+ * XXX: This view is throwing ForeignKey mismatch error, mostly may be because all the keys
+ * of objectdata table are not referenced here. So this view is not used atm.
+ */
+ "CREATE TEMP VIEW IF NOT EXISTS '{}' AS \
+ SELECT s.ObjName, s.ObjInstance, s.ObjID from '{}' as s INNER JOIN '{}' USING \
+ (ObjName, BucketName, ObjInstance, ObjID);";
+
+
+ static constexpr std::string_view CreateQuotaTableQ =
+ "CREATE TABLE IF NOT EXISTS '{}' ( \
+ QuotaID INTEGER PRIMARY KEY AUTOINCREMENT UNIQUE , \
+ MaxSizeSoftThreshold INTEGER , \
+ MaxObjsSoftThreshold INTEGER , \
+ MaxSize INTEGER , \
+ MaxObjects INTEGER , \
+ Enabled Boolean , \
+ CheckOnRaw Boolean \n);";
+
+ static constexpr std::string_view CreateLCEntryTableQ =
+ "CREATE TABLE IF NOT EXISTS '{}' ( \
+ LCIndex TEXT NOT NULL , \
+ BucketName TEXT NOT NULL , \
+ StartTime INTEGER , \
+ Status INTEGER , \
+ PRIMARY KEY (LCIndex, BucketName) \n);";
+
+ static constexpr std::string_view CreateLCHeadTableQ =
+ "CREATE TABLE IF NOT EXISTS '{}' ( \
+ LCIndex TEXT NOT NULL , \
+ Marker TEXT , \
+ StartDate INTEGER , \
+ PRIMARY KEY (LCIndex) \n);";
+
+ static constexpr std::string_view DropQ = "DROP TABLE IF EXISTS '{}'";
+ static constexpr std::string_view ListAllQ = "SELECT * from '{}'";
+
+ public:
+ DBOp() {}
+ virtual ~DBOp() {}
+ std::mutex mtx; // to protect prepared stmt
+
+ static std::string CreateTableSchema(std::string_view type,
+ const DBOpParams *params) {
+ if (!type.compare("User"))
+ return fmt::format(CreateUserTableQ,
+ params->user_table);
+ if (!type.compare("Bucket"))
+ return fmt::format(CreateBucketTableQ,
+ params->bucket_table,
+ params->user_table);
+ if (!type.compare("Object"))
+ return fmt::format(CreateObjectTableQ,
+ params->object_table,
+ params->bucket_table);
+ if (!type.compare("ObjectTrigger"))
+ return fmt::format(CreateObjectTableTriggerQ,
+ params->object_trigger,
+ params->object_table,
+ params->object_table,
+ params->object_table);
+ if (!type.compare("ObjectData"))
+ return fmt::format(CreateObjectDataTableQ,
+ params->objectdata_table,
+ params->bucket_table);
+ if (!type.compare("ObjectView"))
+ return fmt::format(CreateObjectTableQ,
+ params->object_view,
+ params->objectdata_table,
+ params->object_table);
+ if (!type.compare("Quota"))
+ return fmt::format(CreateQuotaTableQ,
+ params->quota_table);
+ if (!type.compare("LCHead"))
+ return fmt::format(CreateLCHeadTableQ,
+ params->lc_head_table);
+ if (!type.compare("LCEntry"))
+ return fmt::format(CreateLCEntryTableQ,
+ params->lc_entry_table,
+ params->bucket_table);
+
+ ceph_abort_msgf("incorrect table type %.*s", type.size(), type.data());
+ }
+
+ static std::string DeleteTableSchema(std::string_view table) {
+ return fmt::format(DropQ, table);
+ }
+ static std::string ListTableSchema(std::string_view table) {
+ return fmt::format(ListAllQ, table);
+ }
+
+ virtual int Prepare(const DoutPrefixProvider *dpp, DBOpParams *params) { return 0; }
+ virtual int Bind(const DoutPrefixProvider *dpp, DBOpParams *params) { return 0; }
+ virtual int Execute(const DoutPrefixProvider *dpp, DBOpParams *params) { return 0; }
+};
+
+class InsertUserOp : virtual public DBOp {
+ private:
+ /* For existing entires, -
+ * (1) INSERT or REPLACE - it will delete previous entry and then
+ * inserts new one. Since it deletes previos enties, it will
+ * trigger all foriegn key cascade deletes or other triggers.
+ * (2) INSERT or UPDATE - this will set NULL values to unassigned
+ * fields.
+ * more info: https://code-examples.net/en/q/377728
+ *
+ * For now using INSERT or REPLACE. If required of updating existing
+ * record, will use another query.
+ */
+ static constexpr std::string_view Query = "INSERT OR REPLACE INTO '{}' \
+ (UserID, Tenant, NS, DisplayName, UserEmail, \
+ AccessKeysID, AccessKeysSecret, AccessKeys, SwiftKeys,\
+ SubUsers, Suspended, MaxBuckets, OpMask, UserCaps, Admin, \
+ System, PlacementName, PlacementStorageClass, PlacementTags, \
+ BucketQuota, TempURLKeys, UserQuota, Type, MfaIDs, \
+ UserAttrs, UserVersion, UserVersionTag) \
+ VALUES ({}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, \
+ {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {});";
+
+ public:
+ virtual ~InsertUserOp() {}
+
+ static std::string Schema(DBOpPrepareParams &params) {
+ return fmt::format(Query, params.user_table,
+ params.op.user.user_id, params.op.user.tenant, params.op.user.ns,
+ params.op.user.display_name, params.op.user.user_email,
+ params.op.user.access_keys_id, params.op.user.access_keys_secret,
+ params.op.user.access_keys, params.op.user.swift_keys,
+ params.op.user.subusers, params.op.user.suspended,
+ params.op.user.max_buckets, params.op.user.op_mask,
+ params.op.user.user_caps, params.op.user.admin, params.op.user.system,
+ params.op.user.placement_name, params.op.user.placement_storage_class,
+ params.op.user.placement_tags, params.op.user.bucket_quota,
+ params.op.user.temp_url_keys, params.op.user.user_quota,
+ params.op.user.type, params.op.user.mfa_ids,
+ params.op.user.user_attrs, params.op.user.user_ver,
+ params.op.user.user_ver_tag);
+ }
+
+};
+
+class RemoveUserOp: virtual public DBOp {
+ private:
+ static constexpr std::string_view Query =
+ "DELETE from '{}' where UserID = {}";
+
+ public:
+ virtual ~RemoveUserOp() {}
+
+ static std::string Schema(DBOpPrepareParams &params) {
+ return fmt::format(Query, params.user_table,
+ params.op.user.user_id);
+ }
+};
+
+class GetUserOp: virtual public DBOp {
+ private:
+ /* If below query columns are updated, make sure to update the indexes
+ * in list_user() cbk in sqliteDB.cc */
+ static constexpr std::string_view Query = "SELECT \
+ UserID, Tenant, NS, DisplayName, UserEmail, \
+ AccessKeysID, AccessKeysSecret, AccessKeys, SwiftKeys,\
+ SubUsers, Suspended, MaxBuckets, OpMask, UserCaps, Admin, \
+ System, PlacementName, PlacementStorageClass, PlacementTags, \
+ BucketQuota, TempURLKeys, UserQuota, Type, MfaIDs, AssumedRoleARN, \
+ UserAttrs, UserVersion, UserVersionTag from '{}' where UserID = {}";
+
+ static constexpr std::string_view QueryByEmail = "SELECT \
+ UserID, Tenant, NS, DisplayName, UserEmail, \
+ AccessKeysID, AccessKeysSecret, AccessKeys, SwiftKeys,\
+ SubUsers, Suspended, MaxBuckets, OpMask, UserCaps, Admin, \
+ System, PlacementName, PlacementStorageClass, PlacementTags, \
+ BucketQuota, TempURLKeys, UserQuota, Type, MfaIDs, AssumedRoleARN, \
+ UserAttrs, UserVersion, UserVersionTag from '{}' where UserEmail = {}";
+
+ static constexpr std::string_view QueryByAccessKeys = "SELECT \
+ UserID, Tenant, NS, DisplayName, UserEmail, \
+ AccessKeysID, AccessKeysSecret, AccessKeys, SwiftKeys,\
+ SubUsers, Suspended, MaxBuckets, OpMask, UserCaps, Admin, \
+ System, PlacementName, PlacementStorageClass, PlacementTags, \
+ BucketQuota, TempURLKeys, UserQuota, Type, MfaIDs, AssumedRoleARN, \
+ UserAttrs, UserVersion, UserVersionTag from '{}' where AccessKeysID = {}";
+
+ static constexpr std::string_view QueryByUserID = "SELECT \
+ UserID, Tenant, NS, DisplayName, UserEmail, \
+ AccessKeysID, AccessKeysSecret, AccessKeys, SwiftKeys,\
+ SubUsers, Suspended, MaxBuckets, OpMask, UserCaps, Admin, \
+ System, PlacementName, PlacementStorageClass, PlacementTags, \
+ BucketQuota, TempURLKeys, UserQuota, Type, MfaIDs, AssumedRoleARN, \
+ UserAttrs, UserVersion, UserVersionTag \
+ from '{}' where UserID = {}";
+
+ public:
+ virtual ~GetUserOp() {}
+
+ static std::string Schema(DBOpPrepareParams &params) {
+ if (params.op.query_str == "email") {
+ return fmt::format(QueryByEmail, params.user_table,
+ params.op.user.user_email);
+ } else if (params.op.query_str == "access_key") {
+ return fmt::format(QueryByAccessKeys,
+ params.user_table,
+ params.op.user.access_keys_id);
+ } else if (params.op.query_str == "user_id") {
+ return fmt::format(QueryByUserID,
+ params.user_table,
+ params.op.user.user_id);
+ } else {
+ return fmt::format(Query, params.user_table,
+ params.op.user.user_id);
+ }
+ }
+};
+
+class InsertBucketOp: virtual public DBOp {
+ private:
+ static constexpr std::string_view Query =
+ "INSERT OR REPLACE INTO '{}' \
+ (BucketName, Tenant, Marker, BucketID, Size, SizeRounded, CreationTime, \
+ Count, PlacementName, PlacementStorageClass, OwnerID, Flags, Zonegroup, \
+ HasInstanceObj, Quota, RequesterPays, HasWebsite, WebsiteConf, \
+ SwiftVersioning, SwiftVerLocation, \
+ MdsearchConfig, NewBucketInstanceID, ObjectLock, \
+ SyncPolicyInfoGroups, BucketAttrs, BucketVersion, BucketVersionTag, Mtime) \
+ VALUES ({}, {}, {}, {}, {}, {}, {}, {}, {}, \
+ {}, {}, {}, {}, {}, {}, {}, {}, {}, \
+ {}, {}, {}, {}, {}, {}, {}, {}, {}, {})";
+
+ public:
+ virtual ~InsertBucketOp() {}
+
+ static std::string Schema(DBOpPrepareParams &params) {
+ return fmt::format(Query, params.bucket_table,
+ params.op.bucket.bucket_name, params.op.bucket.tenant,
+ params.op.bucket.marker, params.op.bucket.bucket_id,
+ params.op.bucket.size, params.op.bucket.size_rounded,
+ params.op.bucket.creation_time, params.op.bucket.count,
+ params.op.bucket.placement_name, params.op.bucket.placement_storage_class,
+ params.op.user.user_id,
+ params.op.bucket.flags, params.op.bucket.zonegroup, params.op.bucket.has_instance_obj,
+ params.op.bucket.quota, params.op.bucket.requester_pays, params.op.bucket.has_website,
+ params.op.bucket.website_conf, params.op.bucket.swift_versioning,
+ params.op.bucket.swift_ver_location, params.op.bucket.mdsearch_config,
+ params.op.bucket.new_bucket_instance_id, params.op.bucket.obj_lock,
+ params.op.bucket.sync_policy_info_groups, params.op.bucket.bucket_attrs,
+ params.op.bucket.bucket_ver, params.op.bucket.bucket_ver_tag,
+ params.op.bucket.mtime);
+ }
+};
+
+class UpdateBucketOp: virtual public DBOp {
+ private:
+ // Updates Info, Mtime, Version
+ static constexpr std::string_view InfoQuery =
+ "UPDATE '{}' SET Tenant = {}, Marker = {}, BucketID = {}, CreationTime = {}, \
+ Count = {}, PlacementName = {}, PlacementStorageClass = {}, OwnerID = {}, Flags = {}, \
+ Zonegroup = {}, HasInstanceObj = {}, Quota = {}, RequesterPays = {}, HasWebsite = {}, \
+ WebsiteConf = {}, SwiftVersioning = {}, SwiftVerLocation = {}, MdsearchConfig = {}, \
+ NewBucketInstanceID = {}, ObjectLock = {}, SyncPolicyInfoGroups = {}, \
+ BucketVersion = {}, Mtime = {} WHERE BucketName = {}";
+ // Updates Attrs, OwnerID, Mtime, Version
+ static constexpr std::string_view AttrsQuery =
+ "UPDATE '{}' SET OwnerID = {}, BucketAttrs = {}, Mtime = {}, BucketVersion = {} \
+ WHERE BucketName = {}";
+ // Updates OwnerID, CreationTime, Mtime, Version
+ static constexpr std::string_view OwnerQuery =
+ "UPDATE '{}' SET OwnerID = {}, CreationTime = {}, Mtime = {}, BucketVersion = {} WHERE BucketName = {}";
+
+ public:
+ virtual ~UpdateBucketOp() {}
+
+ static std::string Schema(DBOpPrepareParams &params) {
+ if (params.op.query_str == "info") {
+ return fmt::format(InfoQuery, params.bucket_table,
+ params.op.bucket.tenant, params.op.bucket.marker, params.op.bucket.bucket_id,
+ params.op.bucket.creation_time, params.op.bucket.count,
+ params.op.bucket.placement_name, params.op.bucket.placement_storage_class,
+ params.op.user.user_id,
+ params.op.bucket.flags, params.op.bucket.zonegroup, params.op.bucket.has_instance_obj,
+ params.op.bucket.quota, params.op.bucket.requester_pays, params.op.bucket.has_website,
+ params.op.bucket.website_conf, params.op.bucket.swift_versioning,
+ params.op.bucket.swift_ver_location, params.op.bucket.mdsearch_config,
+ params.op.bucket.new_bucket_instance_id, params.op.bucket.obj_lock,
+ params.op.bucket.sync_policy_info_groups,
+ params.op.bucket.bucket_ver, params.op.bucket.mtime,
+ params.op.bucket.bucket_name);
+ }
+ if (params.op.query_str == "attrs") {
+ return fmt::format(AttrsQuery, params.bucket_table,
+ params.op.user.user_id, params.op.bucket.bucket_attrs,
+ params.op.bucket.mtime,
+ params.op.bucket.bucket_ver, params.op.bucket.bucket_name);
+ }
+ if (params.op.query_str == "owner") {
+ return fmt::format(OwnerQuery, params.bucket_table,
+ params.op.user.user_id, params.op.bucket.creation_time,
+ params.op.bucket.mtime,
+ params.op.bucket.bucket_ver, params.op.bucket.bucket_name);
+ }
+ return "";
+ }
+};
+
+class RemoveBucketOp: virtual public DBOp {
+ private:
+ static constexpr std::string_view Query =
+ "DELETE from '{}' where BucketName = {}";
+
+ public:
+ virtual ~RemoveBucketOp() {}
+
+ static std::string Schema(DBOpPrepareParams &params) {
+ return fmt::format(Query, params.bucket_table,
+ params.op.bucket.bucket_name);
+ }
+};
+
+class GetBucketOp: virtual public DBOp {
+ private:
+ static constexpr std::string_view Query = "SELECT \
+ BucketName, BucketTable.Tenant, Marker, BucketID, Size, SizeRounded, CreationTime, \
+ Count, BucketTable.PlacementName, BucketTable.PlacementStorageClass, OwnerID, Flags, Zonegroup, \
+ HasInstanceObj, Quota, RequesterPays, HasWebsite, WebsiteConf, \
+ SwiftVersioning, SwiftVerLocation, \
+ MdsearchConfig, NewBucketInstanceID, ObjectLock, \
+ SyncPolicyInfoGroups, BucketAttrs, BucketVersion, BucketVersionTag, Mtime, NS \
+ from '{}' as BucketTable INNER JOIN '{}' ON OwnerID = UserID where BucketName = {}";
+
+ public:
+ virtual ~GetBucketOp() {}
+
+ static std::string Schema(DBOpPrepareParams &params) {
+ //return fmt::format(Query, params.op.bucket.bucket_name,
+ // params.bucket_table, params.user_table);
+ return fmt::format(Query,
+ params.bucket_table, params.user_table,
+ params.op.bucket.bucket_name);
+ }
+};
+
+class ListUserBucketsOp: virtual public DBOp {
+ private:
+ // once we have stats also stored, may have to update this query to join
+ // these two tables.
+ static constexpr std::string_view Query = "SELECT \
+ BucketName, Tenant, Marker, BucketID, Size, SizeRounded, CreationTime, \
+ Count, PlacementName, PlacementStorageClass, OwnerID, Flags, Zonegroup, \
+ HasInstanceObj, Quota, RequesterPays, HasWebsite, WebsiteConf, \
+ SwiftVersioning, SwiftVerLocation, \
+ MdsearchConfig, NewBucketInstanceID, ObjectLock, \
+ SyncPolicyInfoGroups, BucketAttrs, BucketVersion, BucketVersionTag, Mtime \
+ FROM '{}' WHERE OwnerID = {} AND BucketName > {} ORDER BY BucketName ASC LIMIT {}";
+
+ /* BucketNames are unique across users. Hence userid/OwnerID is not used as
+ * marker or for ordering here in the below query
+ */
+ static constexpr std::string_view AllQuery = "SELECT \
+ BucketName, Tenant, Marker, BucketID, Size, SizeRounded, CreationTime, \
+ Count, PlacementName, PlacementStorageClass, OwnerID, Flags, Zonegroup, \
+ HasInstanceObj, Quota, RequesterPays, HasWebsite, WebsiteConf, \
+ SwiftVersioning, SwiftVerLocation, \
+ MdsearchConfig, NewBucketInstanceID, ObjectLock, \
+ SyncPolicyInfoGroups, BucketAttrs, BucketVersion, BucketVersionTag, Mtime \
+ FROM '{}' WHERE BucketName > {} ORDER BY BucketName ASC LIMIT {}";
+
+ public:
+ virtual ~ListUserBucketsOp() {}
+
+ static std::string Schema(DBOpPrepareParams &params) {
+ if (params.op.query_str == "all") {
+ return fmt::format(AllQuery, params.bucket_table,
+ params.op.bucket.min_marker,
+ params.op.list_max_count);
+ } else {
+ return fmt::format(Query, params.bucket_table,
+ params.op.user.user_id, params.op.bucket.min_marker,
+ params.op.list_max_count);
+ }
+ }
+};
+
+class PutObjectOp: virtual public DBOp {
+ private:
+ static constexpr std::string_view Query =
+ "INSERT OR REPLACE INTO '{}' \
+ (ObjName, ObjInstance, ObjNS, BucketName, ACLs, IndexVer, Tag, \
+ Flags, VersionedEpoch, ObjCategory, Etag, Owner, OwnerDisplayName, \
+ StorageClass, Appendable, ContentType, IndexHashSource, ObjSize, \
+ AccountedSize, Mtime, Epoch, ObjTag, TailTag, WriteTag, FakeTag, \
+ ShadowObj, HasData, IsVersioned, VersionNum, PGVer, ZoneShortID, \
+ ObjVersion, ObjVersionTag, ObjAttrs, HeadSize, MaxHeadSize, \
+ ObjID, TailInstance, HeadPlacementRuleName, HeadPlacementRuleStorageClass, \
+ TailPlacementRuleName, TailPlacementStorageClass, \
+ ManifestPartObjs, ManifestPartRules, Omap, IsMultipart, MPPartsList, \
+ HeadData) \
+ VALUES ({}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, \
+ {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, \
+ {}, {}, {}, \
+ {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {})";
+
+ public:
+ virtual ~PutObjectOp() {}
+
+ static std::string Schema(DBOpPrepareParams &params) {
+ return fmt::format(Query,
+ params.object_table, params.op.obj.obj_name,
+ params.op.obj.obj_instance, params.op.obj.obj_ns,
+ params.op.bucket.bucket_name, params.op.obj.acls, params.op.obj.index_ver,
+ params.op.obj.tag, params.op.obj.flags, params.op.obj.versioned_epoch,
+ params.op.obj.obj_category, params.op.obj.etag, params.op.obj.owner,
+ params.op.obj.owner_display_name, params.op.obj.storage_class,
+ params.op.obj.appendable, params.op.obj.content_type,
+ params.op.obj.index_hash_source, params.op.obj.obj_size,
+ params.op.obj.accounted_size, params.op.obj.mtime,
+ params.op.obj.epoch, params.op.obj.obj_tag, params.op.obj.tail_tag,
+ params.op.obj.write_tag, params.op.obj.fake_tag, params.op.obj.shadow_obj,
+ params.op.obj.has_data, params.op.obj.is_versioned,
+ params.op.obj.version_num,
+ params.op.obj.pg_ver, params.op.obj.zone_short_id,
+ params.op.obj.obj_version, params.op.obj.obj_version_tag,
+ params.op.obj.obj_attrs, params.op.obj.head_size,
+ params.op.obj.max_head_size, params.op.obj.obj_id,
+ params.op.obj.tail_instance,
+ params.op.obj.head_placement_rule_name,
+ params.op.obj.head_placement_storage_class,
+ params.op.obj.tail_placement_rule_name,
+ params.op.obj.tail_placement_storage_class,
+ params.op.obj.manifest_part_objs,
+ params.op.obj.manifest_part_rules, params.op.obj.omap,
+ params.op.obj.is_multipart, params.op.obj.mp_parts,
+ params.op.obj.head_data);
+ }
+};
+
+class DeleteObjectOp: virtual public DBOp {
+ private:
+ static constexpr std::string_view Query =
+ "DELETE from '{}' where BucketName = {} and ObjName = {} and ObjInstance = {}";
+
+ public:
+ virtual ~DeleteObjectOp() {}
+
+ static std::string Schema(DBOpPrepareParams &params) {
+ return fmt::format(Query, params.object_table,
+ params.op.bucket.bucket_name,
+ params.op.obj.obj_name,
+ params.op.obj.obj_instance);
+ }
+};
+
+class GetObjectOp: virtual public DBOp {
+ private:
+ static constexpr std::string_view Query =
+ "SELECT \
+ ObjName, ObjInstance, ObjNS, BucketName, ACLs, IndexVer, Tag, \
+ Flags, VersionedEpoch, ObjCategory, Etag, Owner, OwnerDisplayName, \
+ StorageClass, Appendable, ContentType, IndexHashSource, ObjSize, \
+ AccountedSize, Mtime, Epoch, ObjTag, TailTag, WriteTag, FakeTag, \
+ ShadowObj, HasData, IsVersioned, VersionNum, PGVer, ZoneShortID, \
+ ObjVersion, ObjVersionTag, ObjAttrs, HeadSize, MaxHeadSize, \
+ ObjID, TailInstance, HeadPlacementRuleName, HeadPlacementRuleStorageClass, \
+ TailPlacementRuleName, TailPlacementStorageClass, \
+ ManifestPartObjs, ManifestPartRules, Omap, IsMultipart, MPPartsList, \
+ HeadData from '{}' \
+ where BucketName = {} and ObjName = {} and ObjInstance = {}";
+
+ public:
+ virtual ~GetObjectOp() {}
+
+ static std::string Schema(DBOpPrepareParams &params) {
+ return fmt::format(Query,
+ params.object_table,
+ params.op.bucket.bucket_name,
+ params.op.obj.obj_name,
+ params.op.obj.obj_instance);
+ }
+};
+
+class ListBucketObjectsOp: virtual public DBOp {
+ private:
+ // once we have stats also stored, may have to update this query to join
+ // these two tables.
+ static constexpr std::string_view Query =
+ "SELECT \
+ ObjName, ObjInstance, ObjNS, BucketName, ACLs, IndexVer, Tag, \
+ Flags, VersionedEpoch, ObjCategory, Etag, Owner, OwnerDisplayName, \
+ StorageClass, Appendable, ContentType, IndexHashSource, ObjSize, \
+ AccountedSize, Mtime, Epoch, ObjTag, TailTag, WriteTag, FakeTag, \
+ ShadowObj, HasData, IsVersioned, VersionNum, PGVer, ZoneShortID, \
+ ObjVersion, ObjVersionTag, ObjAttrs, HeadSize, MaxHeadSize, \
+ ObjID, TailInstance, HeadPlacementRuleName, HeadPlacementRuleStorageClass, \
+ TailPlacementRuleName, TailPlacementStorageClass, \
+ ManifestPartObjs, ManifestPartRules, Omap, IsMultipart, MPPartsList, HeadData from '{}' \
+ where BucketName = {} and ObjName >= {} and ObjName LIKE {} ORDER BY ObjName ASC, VersionNum DESC LIMIT {}";
+ public:
+ virtual ~ListBucketObjectsOp() {}
+
+ static std::string Schema(DBOpPrepareParams &params) {
+ /* XXX: Include obj_id, delim */
+ return fmt::format(Query,
+ params.object_table,
+ params.op.bucket.bucket_name,
+ params.op.obj.min_marker,
+ params.op.obj.prefix,
+ params.op.list_max_count);
+ }
+};
+
+#define MAX_VERSIONED_OBJECTS 20
+class ListVersionedObjectsOp: virtual public DBOp {
+ private:
+ // once we have stats also stored, may have to update this query to join
+ // these two tables.
+ static constexpr std::string_view Query =
+ "SELECT \
+ ObjName, ObjInstance, ObjNS, BucketName, ACLs, IndexVer, Tag, \
+ Flags, VersionedEpoch, ObjCategory, Etag, Owner, OwnerDisplayName, \
+ StorageClass, Appendable, ContentType, IndexHashSource, ObjSize, \
+ AccountedSize, Mtime, Epoch, ObjTag, TailTag, WriteTag, FakeTag, \
+ ShadowObj, HasData, IsVersioned, VersionNum, PGVer, ZoneShortID, \
+ ObjVersion, ObjVersionTag, ObjAttrs, HeadSize, MaxHeadSize, \
+ ObjID, TailInstance, HeadPlacementRuleName, HeadPlacementRuleStorageClass, \
+ TailPlacementRuleName, TailPlacementStorageClass, \
+ ManifestPartObjs, ManifestPartRules, Omap, IsMultipart, MPPartsList, \
+ HeadData from '{}' \
+ where BucketName = {} and ObjName = {} ORDER BY VersionNum DESC LIMIT {}";
+ public:
+ virtual ~ListVersionedObjectsOp() {}
+
+ static std::string Schema(DBOpPrepareParams &params) {
+ /* XXX: Include obj_id, delim */
+ return fmt::format(Query,
+ params.object_table,
+ params.op.bucket.bucket_name,
+ params.op.obj.obj_name,
+ params.op.list_max_count);
+ }
+};
+
+class UpdateObjectOp: virtual public DBOp {
+ private:
+ // Updates Omap
+ static constexpr std::string_view OmapQuery =
+ "UPDATE '{}' SET Omap = {}, Mtime = {} \
+ where BucketName = {} and ObjName = {} and ObjInstance = {}";
+ static constexpr std::string_view AttrsQuery =
+ "UPDATE '{}' SET ObjAttrs = {}, Mtime = {} \
+ where BucketName = {} and ObjName = {} and ObjInstance = {}";
+ static constexpr std::string_view MPQuery =
+ "UPDATE '{}' SET MPPartsList = {}, Mtime = {} \
+ where BucketName = {} and ObjName = {} and ObjInstance = {}";
+ static constexpr std::string_view MetaQuery =
+ "UPDATE '{}' SET \
+ ObjNS = {}, ACLs = {}, IndexVer = {}, Tag = {}, Flags = {}, VersionedEpoch = {}, \
+ ObjCategory = {}, Etag = {}, Owner = {}, OwnerDisplayName = {}, \
+ StorageClass = {}, Appendable = {}, ContentType = {}, \
+ IndexHashSource = {}, ObjSize = {}, AccountedSize = {}, Mtime = {}, \
+ Epoch = {}, ObjTag = {}, TailTag = {}, WriteTag = {}, FakeTag = {}, \
+ ShadowObj = {}, HasData = {}, IsVersioned = {}, VersionNum = {}, PGVer = {}, \
+ ZoneShortID = {}, ObjVersion = {}, ObjVersionTag = {}, ObjAttrs = {}, \
+ HeadSize = {}, MaxHeadSize = {}, ObjID = {}, TailInstance = {}, \
+ HeadPlacementRuleName = {}, HeadPlacementRuleStorageClass = {}, \
+ TailPlacementRuleName = {}, TailPlacementStorageClass = {}, \
+ ManifestPartObjs = {}, ManifestPartRules = {}, Omap = {}, \
+ IsMultipart = {}, MPPartsList = {}, HeadData = {} \
+ WHERE ObjName = {} and ObjInstance = {} and BucketName = {}";
+
+ public:
+ virtual ~UpdateObjectOp() {}
+
+ static std::string Schema(DBOpPrepareParams &params) {
+ if (params.op.query_str == "omap") {
+ return fmt::format(OmapQuery,
+ params.object_table, params.op.obj.omap,
+ params.op.obj.mtime,
+ params.op.bucket.bucket_name,
+ params.op.obj.obj_name,
+ params.op.obj.obj_instance);
+ }
+ if (params.op.query_str == "attrs") {
+ return fmt::format(AttrsQuery,
+ params.object_table, params.op.obj.obj_attrs,
+ params.op.obj.mtime,
+ params.op.bucket.bucket_name,
+ params.op.obj.obj_name,
+ params.op.obj.obj_instance);
+ }
+ if (params.op.query_str == "mp") {
+ return fmt::format(MPQuery,
+ params.object_table, params.op.obj.mp_parts,
+ params.op.obj.mtime,
+ params.op.bucket.bucket_name,
+ params.op.obj.obj_name,
+ params.op.obj.obj_instance);
+ }
+ if (params.op.query_str == "meta") {
+ return fmt::format(MetaQuery,
+ params.object_table,
+ params.op.obj.obj_ns, params.op.obj.acls, params.op.obj.index_ver,
+ params.op.obj.tag, params.op.obj.flags, params.op.obj.versioned_epoch,
+ params.op.obj.obj_category, params.op.obj.etag, params.op.obj.owner,
+ params.op.obj.owner_display_name, params.op.obj.storage_class,
+ params.op.obj.appendable, params.op.obj.content_type,
+ params.op.obj.index_hash_source, params.op.obj.obj_size,
+ params.op.obj.accounted_size, params.op.obj.mtime,
+ params.op.obj.epoch, params.op.obj.obj_tag, params.op.obj.tail_tag,
+ params.op.obj.write_tag, params.op.obj.fake_tag, params.op.obj.shadow_obj,
+ params.op.obj.has_data, params.op.obj.is_versioned, params.op.obj.version_num,
+ params.op.obj.pg_ver, params.op.obj.zone_short_id,
+ params.op.obj.obj_version, params.op.obj.obj_version_tag,
+ params.op.obj.obj_attrs, params.op.obj.head_size,
+ params.op.obj.max_head_size, params.op.obj.obj_id,
+ params.op.obj.tail_instance,
+ params.op.obj.head_placement_rule_name,
+ params.op.obj.head_placement_storage_class,
+ params.op.obj.tail_placement_rule_name,
+ params.op.obj.tail_placement_storage_class,
+ params.op.obj.manifest_part_objs,
+ params.op.obj.manifest_part_rules, params.op.obj.omap,
+ params.op.obj.is_multipart, params.op.obj.mp_parts,
+ params.op.obj.head_data,
+ params.op.obj.obj_name, params.op.obj.obj_instance,
+ params.op.bucket.bucket_name);
+ }
+ return "";
+ }
+};
+
+class PutObjectDataOp: virtual public DBOp {
+ private:
+ static constexpr std::string_view Query =
+ "INSERT OR REPLACE INTO '{}' \
+ (ObjName, ObjInstance, ObjNS, BucketName, ObjID, MultipartPartStr, PartNum, Offset, Size, Mtime, Data) \
+ VALUES ({}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {})";
+
+ public:
+ virtual ~PutObjectDataOp() {}
+
+ static std::string Schema(DBOpPrepareParams &params) {
+ return fmt::format(Query,
+ params.objectdata_table,
+ params.op.obj.obj_name, params.op.obj.obj_instance,
+ params.op.obj.obj_ns,
+ params.op.bucket.bucket_name,
+ params.op.obj.obj_id,
+ params.op.obj_data.multipart_part_str,
+ params.op.obj_data.part_num,
+ params.op.obj_data.offset,
+ params.op.obj_data.size,
+ params.op.obj.mtime,
+ params.op.obj_data.data);
+ }
+};
+
+/* XXX: Recheck if this is really needed */
+class UpdateObjectDataOp: virtual public DBOp {
+ private:
+ static constexpr std::string_view Query =
+ "UPDATE '{}' \
+ SET Mtime = {} WHERE ObjName = {} and ObjInstance = {} and \
+ BucketName = {} and ObjID = {}";
+
+ public:
+ virtual ~UpdateObjectDataOp() {}
+
+ static std::string Schema(DBOpPrepareParams &params) {
+ return fmt::format(Query,
+ params.objectdata_table,
+ params.op.obj.mtime,
+ params.op.obj.obj_name, params.op.obj.obj_instance,
+ params.op.bucket.bucket_name,
+ params.op.obj.obj_id);
+ }
+};
+
+class GetObjectDataOp: virtual public DBOp {
+ private:
+ static constexpr std::string_view Query =
+ "SELECT \
+ ObjName, ObjInstance, ObjNS, BucketName, ObjID, MultipartPartStr, PartNum, Offset, Size, Mtime, Data \
+ from '{}' where BucketName = {} and ObjName = {} and ObjInstance = {} and ObjID = {} ORDER BY MultipartPartStr, PartNum";
+
+ public:
+ virtual ~GetObjectDataOp() {}
+
+ static std::string Schema(DBOpPrepareParams &params) {
+ return fmt::format(Query,
+ params.objectdata_table,
+ params.op.bucket.bucket_name,
+ params.op.obj.obj_name,
+ params.op.obj.obj_instance,
+ params.op.obj.obj_id);
+ }
+};
+
+class DeleteObjectDataOp: virtual public DBOp {
+ private:
+ static constexpr std::string_view Query =
+ "DELETE from '{}' where BucketName = {} and ObjName = {} and ObjInstance = {} and ObjID = {}";
+
+ public:
+ virtual ~DeleteObjectDataOp() {}
+
+ static std::string Schema(DBOpPrepareParams &params) {
+ return fmt::format(Query,
+ params.objectdata_table,
+ params.op.bucket.bucket_name,
+ params.op.obj.obj_name,
+ params.op.obj.obj_instance,
+ params.op.obj.obj_id);
+ }
+};
+
+class DeleteStaleObjectDataOp: virtual public DBOp {
+ private:
+ static constexpr std::string_view Query =
+ "DELETE from '{}' WHERE (ObjName, ObjInstance, ObjID) NOT IN (SELECT s.ObjName, s.ObjInstance, s.ObjID from '{}' as s INNER JOIN '{}' USING (ObjName, BucketName, ObjInstance, ObjID)) and Mtime < {}";
+
+ public:
+ virtual ~DeleteStaleObjectDataOp() {}
+
+ static std::string Schema(DBOpPrepareParams &params) {
+ return fmt::format(Query,
+ params.objectdata_table,
+ params.objectdata_table,
+ params.object_table,
+ params.op.obj.mtime);
+ }
+};
+
+class InsertLCEntryOp: virtual public DBOp {
+ private:
+ static constexpr std::string_view Query =
+ "INSERT OR REPLACE INTO '{}' \
+ (LCIndex, BucketName, StartTime, Status) \
+ VALUES ({}, {}, {}, {})";
+
+ public:
+ virtual ~InsertLCEntryOp() {}
+
+ static std::string Schema(DBOpPrepareParams &params) {
+ return fmt::format(Query, params.lc_entry_table,
+ params.op.lc_entry.index, params.op.lc_entry.bucket_name,
+ params.op.lc_entry.start_time, params.op.lc_entry.status);
+ }
+};
+
+class RemoveLCEntryOp: virtual public DBOp {
+ private:
+ static constexpr std::string_view Query =
+ "DELETE from '{}' where LCIndex = {} and BucketName = {}";
+
+ public:
+ virtual ~RemoveLCEntryOp() {}
+
+ static std::string Schema(DBOpPrepareParams &params) {
+ return fmt::format(Query, params.lc_entry_table,
+ params.op.lc_entry.index, params.op.lc_entry.bucket_name);
+ }
+};
+
+class GetLCEntryOp: virtual public DBOp {
+ private:
+ static constexpr std::string_view Query = "SELECT \
+ LCIndex, BucketName, StartTime, Status \
+ from '{}' where LCIndex = {} and BucketName = {}";
+ static constexpr std::string_view NextQuery = "SELECT \
+ LCIndex, BucketName, StartTime, Status \
+ from '{}' where LCIndex = {} and BucketName > {} ORDER BY BucketName ASC";
+
+ public:
+ virtual ~GetLCEntryOp() {}
+
+ static std::string Schema(DBOpPrepareParams &params) {
+ if (params.op.query_str == "get_next_entry") {
+ return fmt::format(NextQuery, params.lc_entry_table,
+ params.op.lc_entry.index, params.op.lc_entry.bucket_name);
+ }
+ // default
+ return fmt::format(Query, params.lc_entry_table,
+ params.op.lc_entry.index, params.op.lc_entry.bucket_name);
+ }
+};
+
+class ListLCEntriesOp: virtual public DBOp {
+ private:
+ static constexpr std::string_view Query = "SELECT \
+ LCIndex, BucketName, StartTime, Status \
+ FROM '{}' WHERE LCIndex = {} AND BucketName > {} ORDER BY BucketName ASC LIMIT {}";
+
+ public:
+ virtual ~ListLCEntriesOp() {}
+
+ static std::string Schema(DBOpPrepareParams &params) {
+ return fmt::format(Query, params.lc_entry_table,
+ params.op.lc_entry.index, params.op.lc_entry.min_marker,
+ params.op.list_max_count);
+ }
+};
+
+class InsertLCHeadOp: virtual public DBOp {
+ private:
+ static constexpr std::string_view Query =
+ "INSERT OR REPLACE INTO '{}' \
+ (LCIndex, Marker, StartDate) \
+ VALUES ({}, {}, {})";
+
+ public:
+ virtual ~InsertLCHeadOp() {}
+
+ static std::string Schema(DBOpPrepareParams &params) {
+ return fmt::format(Query, params.lc_head_table,
+ params.op.lc_head.index, params.op.lc_head.marker,
+ params.op.lc_head.start_date);
+ }
+};
+
+class RemoveLCHeadOp: virtual public DBOp {
+ private:
+ static constexpr std::string_view Query =
+ "DELETE from '{}' where LCIndex = {}";
+
+ public:
+ virtual ~RemoveLCHeadOp() {}
+
+ static std::string Schema(DBOpPrepareParams &params) {
+ return fmt::format(Query, params.lc_head_table,
+ params.op.lc_head.index);
+ }
+};
+
+class GetLCHeadOp: virtual public DBOp {
+ private:
+ static constexpr std::string_view Query = "SELECT \
+ LCIndex, Marker, StartDate \
+ from '{}' where LCIndex = {}";
+
+ public:
+ virtual ~GetLCHeadOp() {}
+
+ static std::string Schema(DBOpPrepareParams &params) {
+ return fmt::format(Query, params.lc_head_table,
+ params.op.lc_head.index);
+ }
+};
+
+/* taken from rgw_rados.h::RGWOLHInfo */
+struct DBOLHInfo {
+ rgw_obj target;
+ bool removed;
+ DBOLHInfo() : removed(false) {}
+ void encode(bufferlist& bl) const {
+ ENCODE_START(1, 1, bl);
+ encode(target, bl);
+ encode(removed, bl);
+ ENCODE_FINISH(bl);
+ }
+
+ void decode(bufferlist::const_iterator& bl) {
+ DECODE_START(1, bl);
+ decode(target, bl);
+ decode(removed, bl);
+ DECODE_FINISH(bl);
+ }
+};
+WRITE_CLASS_ENCODER(DBOLHInfo)
+
+class DB {
+ private:
+ const std::string db_name;
+ rgw::sal::Driver* driver;
+ const std::string user_table;
+ const std::string bucket_table;
+ const std::string quota_table;
+ const std::string lc_head_table;
+ const std::string lc_entry_table;
+ static std::map<std::string, class ObjectOp*> objectmap;
+
+ protected:
+ void *db;
+ CephContext *cct;
+ const DoutPrefix dp;
+ uint64_t max_bucket_id = 0;
+ // XXX: default ObjStripeSize or ObjChunk size - 4M, make them configurable?
+ uint64_t ObjHeadSize = 1024; /* 1K - default head data size */
+ uint64_t ObjChunkSize = (get_blob_limit() - 1000); /* 1000 to accommodate other fields */
+ // Below mutex is to protect objectmap and other shared
+ // objects if any.
+ std::mutex mtx;
+
+ public:
+ DB(std::string db_name, CephContext *_cct) : db_name(db_name),
+ user_table(db_name+"_user_table"),
+ bucket_table(db_name+"_bucket_table"),
+ quota_table(db_name+"_quota_table"),
+ lc_head_table(db_name+"_lc_head_table"),
+ lc_entry_table(db_name+"_lc_entry_table"),
+ cct(_cct),
+ dp(_cct, ceph_subsys_rgw, "rgw DBStore backend: ")
+ {}
+ /* DB() {}*/
+
+ DB(CephContext *_cct) : db_name("default_db"),
+ user_table(db_name+"_user_table"),
+ bucket_table(db_name+"_bucket_table"),
+ quota_table(db_name+"_quota_table"),
+ lc_head_table(db_name+"_lc_head_table"),
+ lc_entry_table(db_name+"_lc_entry_table"),
+ cct(_cct),
+ dp(_cct, ceph_subsys_rgw, "rgw DBStore backend: ")
+ {}
+ virtual ~DB() {}
+
+ const std::string getDBname() { return db_name; }
+ const std::string getDBfile() { return db_name + ".db"; }
+ const std::string getUserTable() { return user_table; }
+ const std::string getBucketTable() { return bucket_table; }
+ const std::string getQuotaTable() { return quota_table; }
+ const std::string getLCHeadTable() { return lc_head_table; }
+ const std::string getLCEntryTable() { return lc_entry_table; }
+ const std::string getObjectTable(std::string bucket) {
+ return db_name+"_"+bucket+"_object_table"; }
+ const std::string getObjectDataTable(std::string bucket) {
+ return db_name+"_"+bucket+"_objectdata_table"; }
+ const std::string getObjectView(std::string bucket) {
+ return db_name+"_"+bucket+"_object_view"; }
+ const std::string getObjectTrigger(std::string bucket) {
+ return db_name+"_"+bucket+"_object_trigger"; }
+
+ std::map<std::string, class ObjectOp*> getObjectMap();
+
+ struct DBOps dbops; // DB operations, make it private?
+
+ void set_driver(rgw::sal::Driver* _driver) {
+ driver = _driver;
+ }
+
+ void set_context(CephContext *_cct) {
+ cct = _cct;
+ }
+
+ CephContext *ctx() { return cct; }
+ const DoutPrefixProvider *get_def_dpp() { return &dp; }
+
+ int Initialize(std::string logfile, int loglevel);
+ int Destroy(const DoutPrefixProvider *dpp);
+ int LockInit(const DoutPrefixProvider *dpp);
+ int LockDestroy(const DoutPrefixProvider *dpp);
+ int Lock(const DoutPrefixProvider *dpp);
+ int Unlock(const DoutPrefixProvider *dpp);
+
+ int InitializeParams(const DoutPrefixProvider *dpp, DBOpParams *params);
+ int ProcessOp(const DoutPrefixProvider *dpp, std::string_view Op, DBOpParams *params);
+ std::shared_ptr<class DBOp> getDBOp(const DoutPrefixProvider *dpp, std::string_view Op, const DBOpParams *params);
+ int objectmapInsert(const DoutPrefixProvider *dpp, std::string bucket, class ObjectOp* ptr);
+ int objectmapDelete(const DoutPrefixProvider *dpp, std::string bucket);
+
+ virtual uint64_t get_blob_limit() { return 0; };
+ virtual void *openDB(const DoutPrefixProvider *dpp) { return NULL; }
+ virtual int closeDB(const DoutPrefixProvider *dpp) { return 0; }
+ virtual int createTables(const DoutPrefixProvider *dpp) { return 0; }
+ virtual int InitializeDBOps(const DoutPrefixProvider *dpp) { return 0; }
+ virtual int InitPrepareParams(const DoutPrefixProvider *dpp,
+ DBOpPrepareParams &p_params,
+ DBOpParams* params) = 0;
+ virtual int createLCTables(const DoutPrefixProvider *dpp) = 0;
+
+ virtual int ListAllBuckets(const DoutPrefixProvider *dpp, DBOpParams *params) = 0;
+ virtual int ListAllUsers(const DoutPrefixProvider *dpp, DBOpParams *params) = 0;
+ virtual int ListAllObjects(const DoutPrefixProvider *dpp, DBOpParams *params) = 0;
+
+ int get_user(const DoutPrefixProvider *dpp,
+ const std::string& query_str, const std::string& query_str_val,
+ RGWUserInfo& uinfo, std::map<std::string, bufferlist> *pattrs,
+ RGWObjVersionTracker *pobjv_tracker);
+ int store_user(const DoutPrefixProvider *dpp,
+ RGWUserInfo& uinfo, bool exclusive, std::map<std::string, bufferlist> *pattrs,
+ RGWObjVersionTracker *pobjv_tracker, RGWUserInfo* pold_info);
+ int remove_user(const DoutPrefixProvider *dpp,
+ RGWUserInfo& uinfo, RGWObjVersionTracker *pobjv_tracker);
+ int get_bucket_info(const DoutPrefixProvider *dpp, const std::string& query_str,
+ const std::string& query_str_val,
+ RGWBucketInfo& info, rgw::sal::Attrs* pattrs, ceph::real_time* pmtime,
+ obj_version* pbucket_version);
+ int create_bucket(const DoutPrefixProvider *dpp,
+ const RGWUserInfo& owner, rgw_bucket& bucket,
+ const std::string& zonegroup_id,
+ const rgw_placement_rule& placement_rule,
+ const std::string& swift_ver_location,
+ const RGWQuotaInfo * pquota_info,
+ std::map<std::string, bufferlist>& attrs,
+ RGWBucketInfo& info,
+ obj_version *pobjv,
+ obj_version *pep_objv,
+ real_time creation_time,
+ rgw_bucket *pmaster_bucket,
+ uint32_t *pmaster_num_shards,
+ optional_yield y,
+ bool exclusive);
+
+ int next_bucket_id() { return ++max_bucket_id; };
+
+ int remove_bucket(const DoutPrefixProvider *dpp, const RGWBucketInfo info);
+ int list_buckets(const DoutPrefixProvider *dpp, const std::string& query_str,
+ rgw_user& user,
+ const std::string& marker,
+ const std::string& end_marker,
+ uint64_t max,
+ bool need_stats,
+ RGWUserBuckets *buckets,
+ bool *is_truncated);
+ int update_bucket(const DoutPrefixProvider *dpp, const std::string& query_str,
+ RGWBucketInfo& info, bool exclusive,
+ const rgw_user* powner_id, std::map<std::string, bufferlist>* pattrs,
+ ceph::real_time* pmtime, RGWObjVersionTracker* pobjv);
+
+ uint64_t get_max_head_size() { return ObjHeadSize; }
+ uint64_t get_max_chunk_size() { return ObjChunkSize; }
+ void gen_rand_obj_instance_name(rgw_obj_key *target_key);
+
+ // db raw obj string is of format -
+ // "<bucketname>_<objname>_<objinstance>_<multipart-part-str>_<partnum>"
+ static constexpr std::string_view raw_obj_oid = "{0}_{1}_{2}_{3}_{4}";
+
+ std::string to_oid(std::string_view bucket, std::string_view obj_name,
+ std::string_view obj_instance, std::string_view obj_id,
+ std::string_view mp_str, uint64_t partnum) {
+ return fmt::format(raw_obj_oid, bucket, obj_name, obj_instance, obj_id, mp_str, partnum);
+ }
+ int from_oid(const std::string& oid, std::string& bucket, std::string& obj_name, std::string& obj_id,
+ std::string& obj_instance,
+ std::string& mp_str, uint64_t& partnum) {
+ // TODO: use ceph::split() from common/split.h
+ // XXX: doesn't this break if obj_name has underscores in it?
+ std::vector<std::string> result;
+ boost::split(result, oid, boost::is_any_of("_"));
+ bucket = result[0];
+ obj_name = result[1];
+ obj_instance = result[2];
+ obj_id = result[3];
+ mp_str = result[4];
+ partnum = stoi(result[5]);
+
+ return 0;
+ }
+
+ struct raw_obj {
+ DB* db;
+
+ std::string bucket_name;
+ std::string obj_name;
+ std::string obj_instance;
+ std::string obj_ns;
+ std::string obj_id;
+ std::string multipart_part_str;
+ uint64_t part_num;
+
+ std::string obj_table;
+ std::string obj_data_table;
+
+ raw_obj(DB* _db) {
+ db = _db;
+ }
+
+ raw_obj(DB* _db, std::string& _bname, std::string& _obj_name, std::string& _obj_instance,
+ std::string& _obj_ns, std::string& _obj_id, std::string _mp_part_str, int _part_num) {
+ db = _db;
+ bucket_name = _bname;
+ obj_name = _obj_name;
+ obj_instance = _obj_instance;
+ obj_ns = _obj_ns;
+ obj_id = _obj_id;
+ multipart_part_str = _mp_part_str;
+ part_num = _part_num;
+
+ obj_table = bucket_name+".object.table";
+ obj_data_table = bucket_name+".objectdata.table";
+ }
+
+ raw_obj(DB* _db, std::string& oid) {
+ int r;
+
+ db = _db;
+ r = db->from_oid(oid, bucket_name, obj_name, obj_instance, obj_id, multipart_part_str,
+ part_num);
+ if (r < 0) {
+ multipart_part_str = "0.0";
+ part_num = 0;
+ }
+
+ obj_table = db->getObjectTable(bucket_name);
+ obj_data_table = db->getObjectDataTable(bucket_name);
+ }
+
+ int InitializeParamsfromRawObj (const DoutPrefixProvider *dpp, DBOpParams* params);
+
+ int read(const DoutPrefixProvider *dpp, int64_t ofs, uint64_t end, bufferlist& bl);
+ int write(const DoutPrefixProvider *dpp, int64_t ofs, int64_t write_ofs, uint64_t len, bufferlist& bl);
+ };
+
+ class GC : public Thread {
+ const DoutPrefixProvider *dpp;
+ DB *db;
+ /* Default time interval for GC
+ * XXX: Make below options configurable
+ *
+ * gc_interval: The time between successive gc thread runs
+ * gc_obj_min_wait: Min. time to wait before deleting any data post its creation.
+ *
+ */
+ std::mutex mtx;
+ std::condition_variable cv;
+ bool stop_signalled = false;
+ uint32_t gc_interval = 24*60*60; //sec ; default: 24*60*60
+ uint32_t gc_obj_min_wait = 60*60; //60*60sec default
+ std::string bucket_marker;
+ std::string user_marker;
+
+ public:
+ GC(const DoutPrefixProvider *_dpp, DB* _db) :
+ dpp(_dpp), db(_db) {}
+
+ void *entry() override;
+
+ void signal_stop() {
+ std::lock_guard<std::mutex> lk_guard(mtx);
+ stop_signalled = true;
+ cv.notify_one();
+ }
+
+ friend class DB;
+ };
+ std::unique_ptr<DB::GC> gc_worker;
+
+ class Bucket {
+ friend class DB;
+ DB* store;
+
+ RGWBucketInfo bucket_info;
+
+ public:
+ Bucket(DB *_store, const RGWBucketInfo& _binfo) : store(_store), bucket_info(_binfo) {}
+ DB *get_store() { return store; }
+ rgw_bucket& get_bucket() { return bucket_info.bucket; }
+ RGWBucketInfo& get_bucket_info() { return bucket_info; }
+
+ class List {
+ protected:
+ // absolute maximum number of objects that
+ // list_objects_(un)ordered can return
+ static constexpr int64_t bucket_list_objects_absolute_max = 25000;
+
+ DB::Bucket *target;
+ rgw_obj_key next_marker;
+
+ public:
+
+ struct Params {
+ std::string prefix;
+ std::string delim;
+ rgw_obj_key marker;
+ rgw_obj_key end_marker;
+ std::string ns;
+ bool enforce_ns;
+ RGWAccessListFilter* access_list_filter;
+ RGWBucketListNameFilter force_check_filter;
+ bool list_versions;
+ bool allow_unordered;
+
+ Params() :
+ enforce_ns(true),
+ access_list_filter(nullptr),
+ list_versions(false),
+ allow_unordered(false)
+ {}
+ } params;
+
+ explicit List(DB::Bucket *_target) : target(_target) {}
+
+ /* XXX: Handle ordered and unordered separately.
+ * For now returning only ordered entries */
+ int list_objects(const DoutPrefixProvider *dpp, int64_t max,
+ std::vector<rgw_bucket_dir_entry> *result,
+ std::map<std::string, bool> *common_prefixes, bool *is_truncated);
+ rgw_obj_key& get_next_marker() {
+ return next_marker;
+ }
+ };
+ };
+
+ class Object {
+ friend class DB;
+ DB* store;
+
+ RGWBucketInfo bucket_info;
+ rgw_obj obj;
+
+ RGWObjState obj_state;
+ std::string obj_id;
+
+ bool versioning_disabled;
+
+ bool bs_initialized;
+
+ public:
+ Object(DB *_store, const RGWBucketInfo& _bucket_info, const rgw_obj& _obj) : store(_store), bucket_info(_bucket_info),
+ obj(_obj),
+ versioning_disabled(false),
+ bs_initialized(false) {}
+
+ Object(DB *_store, const RGWBucketInfo& _bucket_info, const rgw_obj& _obj, const std::string& _obj_id) : store(_store), bucket_info(_bucket_info), obj(_obj), obj_id(_obj_id) {}
+
+ struct Read {
+ DB::Object *source;
+
+ struct GetObjState {
+ rgw_obj obj;
+ } state;
+
+ struct ConditionParams {
+ const ceph::real_time *mod_ptr;
+ const ceph::real_time *unmod_ptr;
+ bool high_precision_time;
+ uint32_t mod_zone_id;
+ uint64_t mod_pg_ver;
+ const char *if_match;
+ const char *if_nomatch;
+
+ ConditionParams() :
+ mod_ptr(NULL), unmod_ptr(NULL), high_precision_time(false), mod_zone_id(0), mod_pg_ver(0),
+ if_match(NULL), if_nomatch(NULL) {}
+ } conds;
+
+ struct Params {
+ ceph::real_time *lastmod;
+ uint64_t *obj_size;
+ std::map<std::string, bufferlist> *attrs;
+ rgw_obj *target_obj;
+
+ Params() : lastmod(nullptr), obj_size(nullptr), attrs(nullptr),
+ target_obj(nullptr) {}
+ } params;
+
+ explicit Read(DB::Object *_source) : source(_source) {}
+
+ int prepare(const DoutPrefixProvider *dpp);
+ static int range_to_ofs(uint64_t obj_size, int64_t &ofs, int64_t &end);
+ int read(int64_t ofs, int64_t end, bufferlist& bl, const DoutPrefixProvider *dpp);
+ int iterate(const DoutPrefixProvider *dpp, int64_t ofs, int64_t end, RGWGetDataCB *cb);
+ int get_attr(const DoutPrefixProvider *dpp, const char *name, bufferlist& dest);
+ };
+
+ struct Write {
+ DB::Object *target;
+ RGWObjState obj_state;
+ std::string mp_part_str = "0.0"; // multipart num
+
+ struct MetaParams {
+ ceph::real_time *mtime;
+ std::map<std::string, bufferlist>* rmattrs;
+ const bufferlist *data;
+ RGWObjManifest *manifest;
+ const std::string *ptag;
+ std::list<rgw_obj_index_key> *remove_objs;
+ ceph::real_time set_mtime;
+ rgw_user owner;
+ RGWObjCategory category;
+ int flags;
+ const char *if_match;
+ const char *if_nomatch;
+ std::optional<uint64_t> olh_epoch;
+ ceph::real_time delete_at;
+ bool canceled;
+ const std::string *user_data;
+ rgw_zone_set *zones_trace;
+ bool modify_tail;
+ bool completeMultipart;
+ bool appendable;
+
+ MetaParams() : mtime(NULL), rmattrs(NULL), data(NULL), manifest(NULL), ptag(NULL),
+ remove_objs(NULL), category(RGWObjCategory::Main), flags(0),
+ if_match(NULL), if_nomatch(NULL), canceled(false), user_data(nullptr), zones_trace(nullptr),
+ modify_tail(false), completeMultipart(false), appendable(false) {}
+ } meta;
+
+ explicit Write(DB::Object *_target) : target(_target) {}
+
+ void set_mp_part_str(std::string _mp_part_str) { mp_part_str = _mp_part_str;}
+ int prepare(const DoutPrefixProvider* dpp);
+ int write_data(const DoutPrefixProvider* dpp,
+ bufferlist& data, uint64_t ofs);
+ int _do_write_meta(const DoutPrefixProvider *dpp,
+ uint64_t size, uint64_t accounted_size,
+ std::map<std::string, bufferlist>& attrs,
+ bool assume_noent, bool modify_tail);
+ int write_meta(const DoutPrefixProvider *dpp, uint64_t size,
+ uint64_t accounted_size, std::map<std::string, bufferlist>& attrs);
+ };
+
+ struct Delete {
+ DB::Object *target;
+
+ struct DeleteParams {
+ rgw_user bucket_owner;
+ int versioning_status;
+ ACLOwner obj_owner; /* needed for creation of deletion marker */
+ uint64_t olh_epoch;
+ std::string marker_version_id;
+ uint32_t bilog_flags;
+ std::list<rgw_obj_index_key> *remove_objs;
+ ceph::real_time expiration_time;
+ ceph::real_time unmod_since;
+ ceph::real_time mtime; /* for setting delete marker mtime */
+ bool high_precision_time;
+ rgw_zone_set *zones_trace;
+ bool abortmp;
+ uint64_t parts_accounted_size;
+
+ DeleteParams() : versioning_status(0), olh_epoch(0), bilog_flags(0), remove_objs(NULL), high_precision_time(false), zones_trace(nullptr), abortmp(false), parts_accounted_size(0) {}
+ } params;
+
+ struct DeleteResult {
+ bool delete_marker;
+ std::string version_id;
+
+ DeleteResult() : delete_marker(false) {}
+ } result;
+
+ explicit Delete(DB::Object *_target) : target(_target) {}
+
+ int delete_obj(const DoutPrefixProvider *dpp);
+ int delete_obj_impl(const DoutPrefixProvider *dpp, DBOpParams& del_params);
+ int create_dm(const DoutPrefixProvider *dpp, DBOpParams& del_params);
+ };
+
+ /* XXX: the parameters may be subject to change. All we need is bucket name
+ * & obj name,instance - keys */
+ int get_object_impl(const DoutPrefixProvider *dpp, DBOpParams& params);
+ int get_obj_state(const DoutPrefixProvider *dpp, const RGWBucketInfo& bucket_info,
+ const rgw_obj& obj,
+ bool follow_olh, RGWObjState **state);
+ int get_state(const DoutPrefixProvider *dpp, RGWObjState **pstate, bool follow_olh);
+ int list_versioned_objects(const DoutPrefixProvider *dpp,
+ std::list<rgw_bucket_dir_entry>& list_entries);
+
+ DB *get_store() { return store; }
+ rgw_obj& get_obj() { return obj; }
+ RGWBucketInfo& get_bucket_info() { return bucket_info; }
+
+ int InitializeParamsfromObject(const DoutPrefixProvider *dpp, DBOpParams* params);
+ int set_attrs(const DoutPrefixProvider *dpp, std::map<std::string, bufferlist>& setattrs,
+ std::map<std::string, bufferlist>* rmattrs);
+ int transition(const DoutPrefixProvider *dpp,
+ const rgw_placement_rule& rule, const real_time& mtime,
+ uint64_t olh_epoch);
+ int obj_omap_set_val_by_key(const DoutPrefixProvider *dpp, const std::string& key, bufferlist& val, bool must_exist);
+ int obj_omap_get_vals_by_keys(const DoutPrefixProvider *dpp, const std::string& oid,
+ const std::set<std::string>& keys,
+ std::map<std::string, bufferlist>* vals);
+ int obj_omap_get_all(const DoutPrefixProvider *dpp, std::map<std::string, bufferlist> *m);
+ int obj_omap_get_vals(const DoutPrefixProvider *dpp, const std::string& marker, uint64_t count,
+ std::map<std::string, bufferlist> *m, bool* pmore);
+ using iterate_obj_cb = int (*)(const DoutPrefixProvider*, const raw_obj&, off_t, off_t,
+ bool, RGWObjState*, void*);
+ int add_mp_part(const DoutPrefixProvider *dpp, RGWUploadPartInfo info);
+ int get_mp_parts_list(const DoutPrefixProvider *dpp, std::list<RGWUploadPartInfo>& info);
+
+ int iterate_obj(const DoutPrefixProvider *dpp,
+ const RGWBucketInfo& bucket_info, const rgw_obj& obj,
+ off_t ofs, off_t end, uint64_t max_chunk_size,
+ iterate_obj_cb cb, void *arg);
+ };
+ int get_obj_iterate_cb(const DoutPrefixProvider *dpp,
+ const raw_obj& read_obj, off_t obj_ofs,
+ off_t len, bool is_head_obj,
+ RGWObjState *astate, void *arg);
+
+ int get_entry(const std::string& oid, const std::string& marker,
+ std::unique_ptr<rgw::sal::Lifecycle::LCEntry>* entry);
+ int get_next_entry(const std::string& oid, const std::string& marker,
+ std::unique_ptr<rgw::sal::Lifecycle::LCEntry>* entry);
+ int set_entry(const std::string& oid, rgw::sal::Lifecycle::LCEntry& entry);
+ int list_entries(const std::string& oid, const std::string& marker,
+ uint32_t max_entries, std::vector<std::unique_ptr<rgw::sal::Lifecycle::LCEntry>>& entries);
+ int rm_entry(const std::string& oid, rgw::sal::Lifecycle::LCEntry& entry);
+ int get_head(const std::string& oid, std::unique_ptr<rgw::sal::Lifecycle::LCHead>* head);
+ int put_head(const std::string& oid, rgw::sal::Lifecycle::LCHead& head);
+ int delete_stale_objs(const DoutPrefixProvider *dpp, const std::string& bucket,
+ uint32_t min_wait);
+ int createGC(const DoutPrefixProvider *_dpp);
+ int stopGC();
+};
+
+struct db_get_obj_data {
+ DB* store;
+ RGWGetDataCB* client_cb = nullptr;
+ uint64_t offset; // next offset to write to client
+
+ db_get_obj_data(DB* db, RGWGetDataCB* cb, uint64_t offset) :
+ store(db), client_cb(cb), offset(offset) {}
+ ~db_get_obj_data() {}
+};
+
+} } // namespace rgw::store
diff --git a/src/rgw/driver/dbstore/common/dbstore_log.h b/src/rgw/driver/dbstore/common/dbstore_log.h
new file mode 100644
index 000000000..416508369
--- /dev/null
+++ b/src/rgw/driver/dbstore/common/dbstore_log.h
@@ -0,0 +1,15 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#pragma once
+
+#include <cerrno>
+#include <cstdlib>
+#include <string>
+#include <cstdio>
+#include <iostream>
+#include <fstream>
+#include "common/dout.h"
+
+#undef dout_prefix
+#define dout_prefix *_dout << "rgw dbstore: "