summaryrefslogtreecommitdiffstats
path: root/src/librbd/cache
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-27 18:24:20 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-27 18:24:20 +0000
commit483eb2f56657e8e7f419ab1a4fab8dce9ade8609 (patch)
treee5d88d25d870d5dedacb6bbdbe2a966086a0a5cf /src/librbd/cache
parentInitial commit. (diff)
downloadceph-upstream.tar.xz
ceph-upstream.zip
Adding upstream version 14.2.21.upstream/14.2.21upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
-rw-r--r--src/librbd/cache/ImageCache.h56
-rw-r--r--src/librbd/cache/ImageWriteback.cc127
-rw-r--r--src/librbd/cache/ImageWriteback.h54
-rw-r--r--src/librbd/cache/ObjectCacherObjectDispatch.cc408
-rw-r--r--src/librbd/cache/ObjectCacherObjectDispatch.h112
-rw-r--r--src/librbd/cache/PassthroughImageCache.cc135
-rw-r--r--src/librbd/cache/PassthroughImageCache.h59
7 files changed, 951 insertions, 0 deletions
diff --git a/src/librbd/cache/ImageCache.h b/src/librbd/cache/ImageCache.h
new file mode 100644
index 00000000..71af11f2
--- /dev/null
+++ b/src/librbd/cache/ImageCache.h
@@ -0,0 +1,56 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_LIBRBD_CACHE_IMAGE_CACHE
+#define CEPH_LIBRBD_CACHE_IMAGE_CACHE
+
+#include "include/buffer_fwd.h"
+#include "include/int_types.h"
+#include <vector>
+
+class Context;
+
+namespace librbd {
+namespace cache {
+
+/**
+ * client-side, image extent cache interface
+ */
+struct ImageCache {
+ typedef std::vector<std::pair<uint64_t,uint64_t> > Extents;
+
+ virtual ~ImageCache() {
+ }
+
+ /// client AIO methods
+ virtual void aio_read(Extents&& image_extents, ceph::bufferlist* bl,
+ int fadvise_flags, Context *on_finish) = 0;
+ virtual void aio_write(Extents&& image_extents, ceph::bufferlist&& bl,
+ int fadvise_flags, Context *on_finish) = 0;
+ virtual void aio_discard(uint64_t offset, uint64_t length,
+ uint32_t discard_granularity_bytes,
+ Context *on_finish) = 0;
+ virtual void aio_flush(Context *on_finish) = 0;
+ virtual void aio_writesame(uint64_t offset, uint64_t length,
+ ceph::bufferlist&& bl,
+ int fadvise_flags, Context *on_finish) = 0;
+ virtual void aio_compare_and_write(Extents&& image_extents,
+ ceph::bufferlist&& cmp_bl,
+ ceph::bufferlist&& bl,
+ uint64_t *mismatch_offset,
+ int fadvise_flags,
+ Context *on_finish) = 0;
+
+ /// internal state methods
+ virtual void init(Context *on_finish) = 0;
+ virtual void shut_down(Context *on_finish) = 0;
+
+ virtual void invalidate(Context *on_finish) = 0;
+ virtual void flush(Context *on_finish) = 0;
+
+};
+
+} // namespace cache
+} // namespace librbd
+
+#endif // CEPH_LIBRBD_CACHE_IMAGE_CACHE
diff --git a/src/librbd/cache/ImageWriteback.cc b/src/librbd/cache/ImageWriteback.cc
new file mode 100644
index 00000000..ad479fbd
--- /dev/null
+++ b/src/librbd/cache/ImageWriteback.cc
@@ -0,0 +1,127 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "ImageWriteback.h"
+#include "include/buffer.h"
+#include "common/dout.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/io/AioCompletion.h"
+#include "librbd/io/ImageRequest.h"
+#include "librbd/io/ReadResult.h"
+
+#define dout_subsys ceph_subsys_rbd
+#undef dout_prefix
+#define dout_prefix *_dout << "librbd::ImageWriteback: " << __func__ << ": "
+
+namespace librbd {
+namespace cache {
+
+template <typename I>
+ImageWriteback<I>::ImageWriteback(I &image_ctx) : m_image_ctx(image_ctx) {
+}
+
+template <typename I>
+void ImageWriteback<I>::aio_read(Extents &&image_extents, bufferlist *bl,
+ int fadvise_flags, Context *on_finish) {
+ CephContext *cct = m_image_ctx.cct;
+ ldout(cct, 20) << "image_extents=" << image_extents << ", "
+ << "on_finish=" << on_finish << dendl;
+
+ auto aio_comp = io::AioCompletion::create_and_start(on_finish, &m_image_ctx,
+ io::AIO_TYPE_READ);
+ io::ImageReadRequest<I> req(m_image_ctx, aio_comp, std::move(image_extents),
+ io::ReadResult{bl}, fadvise_flags, {});
+ req.set_bypass_image_cache();
+ req.send();
+}
+
+template <typename I>
+void ImageWriteback<I>::aio_write(Extents &&image_extents,
+ ceph::bufferlist&& bl,
+ int fadvise_flags, Context *on_finish) {
+ CephContext *cct = m_image_ctx.cct;
+ ldout(cct, 20) << "image_extents=" << image_extents << ", "
+ << "on_finish=" << on_finish << dendl;
+
+ auto aio_comp = io::AioCompletion::create_and_start(on_finish, &m_image_ctx,
+ io::AIO_TYPE_WRITE);
+ io::ImageWriteRequest<I> req(m_image_ctx, aio_comp, std::move(image_extents),
+ std::move(bl), fadvise_flags, {});
+ req.set_bypass_image_cache();
+ req.send();
+}
+
+template <typename I>
+void ImageWriteback<I>::aio_discard(uint64_t offset, uint64_t length,
+ uint32_t discard_granularity_bytes,
+ Context *on_finish) {
+ CephContext *cct = m_image_ctx.cct;
+ ldout(cct, 20) << "offset=" << offset << ", "
+ << "length=" << length << ", "
+ << "on_finish=" << on_finish << dendl;
+
+ auto aio_comp = io::AioCompletion::create_and_start(on_finish, &m_image_ctx,
+ io::AIO_TYPE_DISCARD);
+ io::ImageDiscardRequest<I> req(m_image_ctx, aio_comp, {{offset, length}},
+ discard_granularity_bytes, {});
+ req.set_bypass_image_cache();
+ req.send();
+}
+
+template <typename I>
+void ImageWriteback<I>::aio_flush(Context *on_finish) {
+ CephContext *cct = m_image_ctx.cct;
+ ldout(cct, 20) << "on_finish=" << on_finish << dendl;
+
+ auto aio_comp = io::AioCompletion::create_and_start(on_finish, &m_image_ctx,
+ io::AIO_TYPE_FLUSH);
+ io::ImageFlushRequest<I> req(m_image_ctx, aio_comp, io::FLUSH_SOURCE_INTERNAL,
+ {});
+ req.set_bypass_image_cache();
+ req.send();
+}
+
+template <typename I>
+void ImageWriteback<I>::aio_writesame(uint64_t offset, uint64_t length,
+ ceph::bufferlist&& bl,
+ int fadvise_flags, Context *on_finish) {
+ CephContext *cct = m_image_ctx.cct;
+ ldout(cct, 20) << "offset=" << offset << ", "
+ << "length=" << length << ", "
+ << "data_len=" << bl.length() << ", "
+ << "on_finish=" << on_finish << dendl;
+
+ auto aio_comp = io::AioCompletion::create_and_start(on_finish, &m_image_ctx,
+ io::AIO_TYPE_WRITESAME);
+ io::ImageWriteSameRequest<I> req(m_image_ctx, aio_comp, {{offset, length}},
+ std::move(bl), fadvise_flags, {});
+ req.set_bypass_image_cache();
+ req.send();
+}
+
+template <typename I>
+void ImageWriteback<I>::aio_compare_and_write(Extents &&image_extents,
+ ceph::bufferlist&& cmp_bl,
+ ceph::bufferlist&& bl,
+ uint64_t *mismatch_offset,
+ int fadvise_flags,
+ Context *on_finish) {
+ CephContext *cct = m_image_ctx.cct;
+ ldout(cct, 20) << "image_extents=" << image_extents << ", "
+ << "on_finish=" << on_finish << dendl;
+
+ auto aio_comp = io::AioCompletion::create_and_start(on_finish, &m_image_ctx,
+ io::AIO_TYPE_COMPARE_AND_WRITE);
+ io::ImageCompareAndWriteRequest<I> req(m_image_ctx, aio_comp,
+ std::move(image_extents),
+ std::move(cmp_bl), std::move(bl),
+ mismatch_offset, fadvise_flags, {});
+ req.set_bypass_image_cache();
+ req.send();
+}
+
+} // namespace cache
+} // namespace librbd
+
+template class librbd::cache::ImageWriteback<librbd::ImageCtx>;
+
diff --git a/src/librbd/cache/ImageWriteback.h b/src/librbd/cache/ImageWriteback.h
new file mode 100644
index 00000000..382c57c1
--- /dev/null
+++ b/src/librbd/cache/ImageWriteback.h
@@ -0,0 +1,54 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_LIBRBD_CACHE_IMAGE_WRITEBACK
+#define CEPH_LIBRBD_CACHE_IMAGE_WRITEBACK
+
+#include "include/buffer_fwd.h"
+#include "include/int_types.h"
+#include <vector>
+
+class Context;
+
+namespace librbd {
+
+struct ImageCtx;
+
+namespace cache {
+
+/**
+ * client-side, image extent cache writeback handler
+ */
+template <typename ImageCtxT = librbd::ImageCtx>
+class ImageWriteback {
+public:
+ typedef std::vector<std::pair<uint64_t,uint64_t> > Extents;
+
+ explicit ImageWriteback(ImageCtxT &image_ctx);
+
+ void aio_read(Extents &&image_extents, ceph::bufferlist *bl,
+ int fadvise_flags, Context *on_finish);
+ void aio_write(Extents &&image_extents, ceph::bufferlist&& bl,
+ int fadvise_flags, Context *on_finish);
+ void aio_discard(uint64_t offset, uint64_t length,
+ uint32_t discard_granularity_bytes, Context *on_finish);
+ void aio_flush(Context *on_finish);
+ void aio_writesame(uint64_t offset, uint64_t length,
+ ceph::bufferlist&& bl,
+ int fadvise_flags, Context *on_finish);
+ void aio_compare_and_write(Extents &&image_extents,
+ ceph::bufferlist&& cmp_bl,
+ ceph::bufferlist&& bl,
+ uint64_t *mismatch_offset,
+ int fadvise_flags, Context *on_finish);
+private:
+ ImageCtxT &m_image_ctx;
+
+};
+
+} // namespace cache
+} // namespace librbd
+
+extern template class librbd::cache::ImageWriteback<librbd::ImageCtx>;
+
+#endif // CEPH_LIBRBD_CACHE_IMAGE_WRITEBACK
diff --git a/src/librbd/cache/ObjectCacherObjectDispatch.cc b/src/librbd/cache/ObjectCacherObjectDispatch.cc
new file mode 100644
index 00000000..5bced71b
--- /dev/null
+++ b/src/librbd/cache/ObjectCacherObjectDispatch.cc
@@ -0,0 +1,408 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "librbd/cache/ObjectCacherObjectDispatch.h"
+#include "common/errno.h"
+#include "common/WorkQueue.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/Journal.h"
+#include "librbd/Utils.h"
+#include "librbd/LibrbdWriteback.h"
+#include "librbd/io/ObjectDispatchSpec.h"
+#include "librbd/io/ObjectDispatcher.h"
+#include "librbd/io/Utils.h"
+#include "osd/osd_types.h"
+#include "osdc/WritebackHandler.h"
+#include <vector>
+
+#define dout_subsys ceph_subsys_rbd
+#undef dout_prefix
+#define dout_prefix *_dout << "librbd::cache::ObjectCacherObjectDispatch: " \
+ << this << " " << __func__ << ": "
+
+namespace librbd {
+namespace cache {
+
+namespace {
+
+typedef std::vector<ObjectExtent> ObjectExtents;
+
+} // anonymous namespace
+
+template <typename I>
+struct ObjectCacherObjectDispatch<I>::C_InvalidateCache : public Context {
+ ObjectCacherObjectDispatch* dispatcher;
+ bool purge_on_error;
+ Context *on_finish;
+
+ C_InvalidateCache(ObjectCacherObjectDispatch* dispatcher,
+ bool purge_on_error, Context *on_finish)
+ : dispatcher(dispatcher), purge_on_error(purge_on_error),
+ on_finish(on_finish) {
+ }
+
+ void finish(int r) override {
+ ceph_assert(dispatcher->m_cache_lock.is_locked());
+ auto cct = dispatcher->m_image_ctx->cct;
+
+ if (r == -EBLACKLISTED) {
+ lderr(cct) << "blacklisted during flush (purging)" << dendl;
+ dispatcher->m_object_cacher->purge_set(dispatcher->m_object_set);
+ } else if (r < 0 && purge_on_error) {
+ lderr(cct) << "failed to invalidate cache (purging): "
+ << cpp_strerror(r) << dendl;
+ dispatcher->m_object_cacher->purge_set(dispatcher->m_object_set);
+ } else if (r != 0) {
+ lderr(cct) << "failed to invalidate cache: " << cpp_strerror(r) << dendl;
+ }
+
+ auto unclean = dispatcher->m_object_cacher->release_set(
+ dispatcher->m_object_set);
+ if (unclean == 0) {
+ r = 0;
+ } else {
+ lderr(cct) << "could not release all objects from cache: "
+ << unclean << " bytes remain" << dendl;
+ if (r == 0) {
+ r = -EBUSY;
+ }
+ }
+
+ on_finish->complete(r);
+ }
+};
+
+template <typename I>
+ObjectCacherObjectDispatch<I>::ObjectCacherObjectDispatch(
+ I* image_ctx)
+ : m_image_ctx(image_ctx),
+ m_cache_lock(util::unique_lock_name(
+ "librbd::cache::ObjectCacherObjectDispatch::cache_lock", this)) {
+ ceph_assert(m_image_ctx->data_ctx.is_valid());
+}
+
+template <typename I>
+ObjectCacherObjectDispatch<I>::~ObjectCacherObjectDispatch() {
+ delete m_object_cacher;
+ delete m_object_set;
+
+ delete m_writeback_handler;
+}
+
+template <typename I>
+void ObjectCacherObjectDispatch<I>::init() {
+ auto cct = m_image_ctx->cct;
+ ldout(cct, 5) << dendl;
+
+ m_cache_lock.Lock();
+ ldout(cct, 5) << "enabling caching..." << dendl;
+ m_writeback_handler = new LibrbdWriteback(m_image_ctx, m_cache_lock);
+
+ uint64_t init_max_dirty = m_image_ctx->cache_max_dirty;
+ if (m_image_ctx->cache_writethrough_until_flush) {
+ init_max_dirty = 0;
+ }
+
+ auto cache_size =
+ m_image_ctx->config.template get_val<Option::size_t>("rbd_cache_size");
+ auto target_dirty =
+ m_image_ctx->config.template get_val<Option::size_t>("rbd_cache_target_dirty");
+ auto max_dirty_age =
+ m_image_ctx->config.template get_val<double>("rbd_cache_max_dirty_age");
+ auto block_writes_upfront =
+ m_image_ctx->config.template get_val<bool>("rbd_cache_block_writes_upfront");
+ auto max_dirty_object =
+ m_image_ctx->config.template get_val<uint64_t>("rbd_cache_max_dirty_object");
+
+ ldout(cct, 5) << "Initial cache settings:"
+ << " size=" << cache_size
+ << " num_objects=" << 10
+ << " max_dirty=" << init_max_dirty
+ << " target_dirty=" << target_dirty
+ << " max_dirty_age=" << max_dirty_age << dendl;
+
+ m_object_cacher = new ObjectCacher(cct, m_image_ctx->perfcounter->get_name(),
+ *m_writeback_handler, m_cache_lock,
+ nullptr, nullptr, cache_size,
+ 10, /* reset this in init */
+ init_max_dirty, target_dirty,
+ max_dirty_age, block_writes_upfront);
+
+ // size object cache appropriately
+ if (max_dirty_object == 0) {
+ max_dirty_object = std::min<uint64_t>(
+ 2000, std::max<uint64_t>(10, cache_size / 100 /
+ sizeof(ObjectCacher::Object)));
+ }
+ ldout(cct, 5) << " cache bytes " << cache_size
+ << " -> about " << max_dirty_object << " objects" << dendl;
+ m_object_cacher->set_max_objects(max_dirty_object);
+
+ m_object_set = new ObjectCacher::ObjectSet(nullptr,
+ m_image_ctx->data_ctx.get_id(), 0);
+ m_object_cacher->start();
+ m_cache_lock.Unlock();
+
+ // add ourself to the IO object dispatcher chain
+ m_image_ctx->io_object_dispatcher->register_object_dispatch(this);
+}
+
+template <typename I>
+void ObjectCacherObjectDispatch<I>::shut_down(Context* on_finish) {
+ auto cct = m_image_ctx->cct;
+ ldout(cct, 5) << dendl;
+
+ // chain shut down in reverse order
+
+ // shut down the cache
+ on_finish = new FunctionContext([this, on_finish](int r) {
+ m_object_cacher->stop();
+ on_finish->complete(r);
+ });
+
+ // ensure we aren't holding the cache lock post-flush
+ on_finish = util::create_async_context_callback(*m_image_ctx, on_finish);
+
+ // invalidate any remaining cache entries
+ on_finish = new C_InvalidateCache(this, true, on_finish);
+
+ // flush all pending writeback state
+ m_cache_lock.Lock();
+ m_object_cacher->release_set(m_object_set);
+ m_object_cacher->flush_set(m_object_set, on_finish);
+ m_cache_lock.Unlock();
+}
+
+template <typename I>
+bool ObjectCacherObjectDispatch<I>::read(
+ const std::string &oid, uint64_t object_no, uint64_t object_off,
+ uint64_t object_len, librados::snap_t snap_id, int op_flags,
+ const ZTracer::Trace &parent_trace, ceph::bufferlist* read_data,
+ io::ExtentMap* extent_map, int* object_dispatch_flags,
+ io::DispatchResult* dispatch_result, Context** on_finish,
+ Context* on_dispatched) {
+ // IO chained in reverse order
+ auto cct = m_image_ctx->cct;
+ ldout(cct, 20) << "object_no=" << object_no << " " << object_off << "~"
+ << object_len << dendl;
+
+ // ensure we aren't holding the cache lock post-read
+ on_dispatched = util::create_async_context_callback(*m_image_ctx,
+ on_dispatched);
+
+ m_image_ctx->snap_lock.get_read();
+ auto rd = m_object_cacher->prepare_read(snap_id, read_data, op_flags);
+ m_image_ctx->snap_lock.put_read();
+
+ ObjectExtent extent(oid, object_no, object_off, object_len, 0);
+ extent.oloc.pool = m_image_ctx->data_ctx.get_id();
+ extent.buffer_extents.push_back({0, object_len});
+ rd->extents.push_back(extent);
+
+ ZTracer::Trace trace(parent_trace);
+ *dispatch_result = io::DISPATCH_RESULT_COMPLETE;
+
+ m_cache_lock.Lock();
+ int r = m_object_cacher->readx(rd, m_object_set, on_dispatched, &trace);
+ m_cache_lock.Unlock();
+ if (r != 0) {
+ on_dispatched->complete(r);
+ }
+ return true;
+}
+
+template <typename I>
+bool ObjectCacherObjectDispatch<I>::discard(
+ const std::string &oid, uint64_t object_no, uint64_t object_off,
+ uint64_t object_len, const ::SnapContext &snapc, int discard_flags,
+ const ZTracer::Trace &parent_trace, int* object_dispatch_flags,
+ uint64_t* journal_tid, io::DispatchResult* dispatch_result,
+ Context** on_finish, Context* on_dispatched) {
+ auto cct = m_image_ctx->cct;
+ ldout(cct, 20) << "object_no=" << object_no << " " << object_off << "~"
+ << object_len << dendl;
+
+ ObjectExtents object_extents;
+ object_extents.emplace_back(oid, object_no, object_off, object_len, 0);
+
+ // discard the cache state after changes are committed to disk (and to
+ // prevent races w/ readahead)
+ auto ctx = *on_finish;
+ *on_finish = new FunctionContext(
+ [this, object_extents, ctx](int r) {
+ m_cache_lock.Lock();
+ m_object_cacher->discard_set(m_object_set, object_extents);
+ m_cache_lock.Unlock();
+
+ ctx->complete(r);
+ });
+
+ // ensure we aren't holding the cache lock post-write
+ on_dispatched = util::create_async_context_callback(*m_image_ctx,
+ on_dispatched);
+
+ *dispatch_result = io::DISPATCH_RESULT_CONTINUE;
+
+ // ensure any in-flight writeback is complete before advancing
+ // the discard request
+ m_cache_lock.Lock();
+ m_object_cacher->discard_writeback(m_object_set, object_extents,
+ on_dispatched);
+ m_cache_lock.Unlock();
+ return true;
+}
+
+template <typename I>
+bool ObjectCacherObjectDispatch<I>::write(
+ const std::string &oid, uint64_t object_no, uint64_t object_off,
+ ceph::bufferlist&& data, const ::SnapContext &snapc, int op_flags,
+ const ZTracer::Trace &parent_trace, int* object_dispatch_flags,
+ uint64_t* journal_tid, io::DispatchResult* dispatch_result,
+ Context** on_finish, Context* on_dispatched) {
+ auto cct = m_image_ctx->cct;
+ ldout(cct, 20) << "object_no=" << object_no << " " << object_off << "~"
+ << data.length() << dendl;
+
+ // ensure we aren't holding the cache lock post-write
+ on_dispatched = util::create_async_context_callback(*m_image_ctx,
+ on_dispatched);
+
+ m_image_ctx->snap_lock.get_read();
+ ObjectCacher::OSDWrite *wr = m_object_cacher->prepare_write(
+ snapc, data, ceph::real_time::min(), op_flags, *journal_tid);
+ m_image_ctx->snap_lock.put_read();
+
+ ObjectExtent extent(oid, 0, object_off, data.length(), 0);
+ extent.oloc.pool = m_image_ctx->data_ctx.get_id();
+ extent.buffer_extents.push_back({0, data.length()});
+ wr->extents.push_back(extent);
+
+ ZTracer::Trace trace(parent_trace);
+ *dispatch_result = io::DISPATCH_RESULT_COMPLETE;
+
+ m_cache_lock.Lock();
+ m_object_cacher->writex(wr, m_object_set, on_dispatched, &trace);
+ m_cache_lock.Unlock();
+ return true;
+}
+
+template <typename I>
+bool ObjectCacherObjectDispatch<I>::write_same(
+ const std::string &oid, uint64_t object_no, uint64_t object_off,
+ uint64_t object_len, io::Extents&& buffer_extents, ceph::bufferlist&& data,
+ const ::SnapContext &snapc, int op_flags,
+ const ZTracer::Trace &parent_trace, int* object_dispatch_flags,
+ uint64_t* journal_tid, io::DispatchResult* dispatch_result,
+ Context** on_finish, Context* on_dispatched) {
+ auto cct = m_image_ctx->cct;
+ ldout(cct, 20) << "object_no=" << object_no << " " << object_off << "~"
+ << object_len << dendl;
+
+ // ObjectCacher doesn't support write-same so convert to regular write
+ ObjectExtent extent(oid, 0, object_off, object_len, 0);
+ extent.buffer_extents = std::move(buffer_extents);
+
+ bufferlist ws_data;
+ io::util::assemble_write_same_extent(extent, data, &ws_data, true);
+
+ return write(oid, object_no, object_off, std::move(ws_data), snapc,
+ op_flags, parent_trace, object_dispatch_flags, journal_tid,
+ dispatch_result, on_finish, on_dispatched);
+}
+
+template <typename I>
+bool ObjectCacherObjectDispatch<I>::compare_and_write(
+ const std::string &oid, uint64_t object_no, uint64_t object_off,
+ ceph::bufferlist&& cmp_data, ceph::bufferlist&& write_data,
+ const ::SnapContext &snapc, int op_flags,
+ const ZTracer::Trace &parent_trace, uint64_t* mismatch_offset,
+ int* object_dispatch_flags, uint64_t* journal_tid,
+ io::DispatchResult* dispatch_result, Context** on_finish,
+ Context* on_dispatched) {
+ auto cct = m_image_ctx->cct;
+ ldout(cct, 20) << "object_no=" << object_no << " " << object_off << "~"
+ << cmp_data.length() << dendl;
+
+ // pass-through the compare-and-write request since it's not a supported
+ // operation of the ObjectCacher
+
+ // ensure we aren't holding the cache lock post-flush
+ on_dispatched = util::create_async_context_callback(*m_image_ctx,
+ on_dispatched);
+
+ // flush any pending writes from the cache
+ ZTracer::Trace trace(parent_trace);
+ *dispatch_result = io::DISPATCH_RESULT_CONTINUE;
+
+ ObjectExtents object_extents;
+ object_extents.emplace_back(oid, object_no, object_off, cmp_data.length(),
+ 0);
+
+ Mutex::Locker cache_locker(m_cache_lock);
+ m_object_cacher->flush_set(m_object_set, object_extents, &trace,
+ on_dispatched);
+ return true;
+}
+
+template <typename I>
+bool ObjectCacherObjectDispatch<I>::flush(
+ io::FlushSource flush_source, const ZTracer::Trace &parent_trace,
+ io::DispatchResult* dispatch_result, Context** on_finish,
+ Context* on_dispatched) {
+ auto cct = m_image_ctx->cct;
+ ldout(cct, 20) << dendl;
+
+ // ensure we aren't holding the cache lock post-flush
+ on_dispatched = util::create_async_context_callback(*m_image_ctx,
+ on_dispatched);
+
+ m_cache_lock.Lock();
+ if (flush_source == io::FLUSH_SOURCE_USER && !m_user_flushed &&
+ m_image_ctx->cache_writethrough_until_flush &&
+ m_image_ctx->cache_max_dirty > 0) {
+ m_user_flushed = true;
+ m_object_cacher->set_max_dirty(m_image_ctx->cache_max_dirty);
+ ldout(cct, 5) << "saw first user flush, enabling writeback" << dendl;
+ }
+
+ *dispatch_result = io::DISPATCH_RESULT_CONTINUE;
+ m_object_cacher->flush_set(m_object_set, on_dispatched);
+ m_cache_lock.Unlock();
+ return true;
+}
+
+template <typename I>
+bool ObjectCacherObjectDispatch<I>::invalidate_cache(Context* on_finish) {
+ auto cct = m_image_ctx->cct;
+ ldout(cct, 5) << dendl;
+
+ // ensure we aren't holding the cache lock post-flush
+ on_finish = util::create_async_context_callback(*m_image_ctx, on_finish);
+
+ // invalidate any remaining cache entries
+ on_finish = new C_InvalidateCache(this, false, on_finish);
+
+ m_cache_lock.Lock();
+ m_object_cacher->release_set(m_object_set);
+ m_object_cacher->flush_set(m_object_set, on_finish);
+ m_cache_lock.Unlock();
+ return true;
+}
+
+template <typename I>
+bool ObjectCacherObjectDispatch<I>::reset_existence_cache(
+ Context* on_finish) {
+ auto cct = m_image_ctx->cct;
+ ldout(cct, 5) << dendl;
+
+ m_cache_lock.Lock();
+ m_object_cacher->clear_nonexistence(m_object_set);
+ m_cache_lock.Unlock();
+
+ return false;
+}
+
+} // namespace cache
+} // namespace librbd
+
+template class librbd::cache::ObjectCacherObjectDispatch<librbd::ImageCtx>;
diff --git a/src/librbd/cache/ObjectCacherObjectDispatch.h b/src/librbd/cache/ObjectCacherObjectDispatch.h
new file mode 100644
index 00000000..cb145681
--- /dev/null
+++ b/src/librbd/cache/ObjectCacherObjectDispatch.h
@@ -0,0 +1,112 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_LIBRBD_CACHE_OBJECT_CACHER_OBJECT_DISPATCH_H
+#define CEPH_LIBRBD_CACHE_OBJECT_CACHER_OBJECT_DISPATCH_H
+
+#include "librbd/io/ObjectDispatchInterface.h"
+#include "common/Mutex.h"
+#include "osdc/ObjectCacher.h"
+
+struct WritebackHandler;
+
+namespace librbd {
+
+class ImageCtx;
+
+namespace cache {
+
+/**
+ * Facade around the OSDC object cacher to make it align with
+ * the object dispatcher interface
+ */
+template <typename ImageCtxT = ImageCtx>
+class ObjectCacherObjectDispatch : public io::ObjectDispatchInterface {
+public:
+ static ObjectCacherObjectDispatch* create(ImageCtxT* image_ctx) {
+ return new ObjectCacherObjectDispatch(image_ctx);
+ }
+
+ ObjectCacherObjectDispatch(ImageCtxT* image_ctx);
+ ~ObjectCacherObjectDispatch() override;
+
+ io::ObjectDispatchLayer get_object_dispatch_layer() const override {
+ return io::OBJECT_DISPATCH_LAYER_CACHE;
+ }
+
+ void init();
+ void shut_down(Context* on_finish) override;
+
+ bool read(
+ const std::string &oid, uint64_t object_no, uint64_t object_off,
+ uint64_t object_len, librados::snap_t snap_id, int op_flags,
+ const ZTracer::Trace &parent_trace, ceph::bufferlist* read_data,
+ io::ExtentMap* extent_map, int* object_dispatch_flags,
+ io::DispatchResult* dispatch_result, Context** on_finish,
+ Context* on_dispatched) override;
+
+ bool discard(
+ const std::string &oid, uint64_t object_no, uint64_t object_off,
+ uint64_t object_len, const ::SnapContext &snapc, int discard_flags,
+ const ZTracer::Trace &parent_trace, int* object_dispatch_flags,
+ uint64_t* journal_tid, io::DispatchResult* dispatch_result,
+ Context** on_finish, Context* on_dispatched) override;
+
+ bool write(
+ const std::string &oid, uint64_t object_no, uint64_t object_off,
+ ceph::bufferlist&& data, const ::SnapContext &snapc, int op_flags,
+ const ZTracer::Trace &parent_trace, int* object_dispatch_flags,
+ uint64_t* journal_tid, io::DispatchResult* dispatch_result,
+ Context** on_finish, Context* on_dispatched) override;
+
+ bool write_same(
+ const std::string &oid, uint64_t object_no, uint64_t object_off,
+ uint64_t object_len, io::Extents&& buffer_extents,
+ ceph::bufferlist&& data, const ::SnapContext &snapc, int op_flags,
+ const ZTracer::Trace &parent_trace, int* object_dispatch_flags,
+ uint64_t* journal_tid, io::DispatchResult* dispatch_result,
+ Context** on_finish, Context* on_dispatched) override;
+
+ bool compare_and_write(
+ const std::string &oid, uint64_t object_no, uint64_t object_off,
+ ceph::bufferlist&& cmp_data, ceph::bufferlist&& write_data,
+ const ::SnapContext &snapc, int op_flags,
+ const ZTracer::Trace &parent_trace, uint64_t* mismatch_offset,
+ int* object_dispatch_flags, uint64_t* journal_tid,
+ io::DispatchResult* dispatch_result, Context** on_finish,
+ Context* on_dispatched) override;
+
+ bool flush(
+ io::FlushSource flush_source, const ZTracer::Trace &parent_trace,
+ io::DispatchResult* dispatch_result, Context** on_finish,
+ Context* on_dispatched) override;
+
+ bool invalidate_cache(Context* on_finish) override;
+ bool reset_existence_cache(Context* on_finish) override;
+
+ void extent_overwritten(
+ uint64_t object_no, uint64_t object_off, uint64_t object_len,
+ uint64_t journal_tid, uint64_t new_journal_tid) {
+ }
+
+private:
+ struct C_InvalidateCache;
+
+ ImageCtxT* m_image_ctx;
+
+ Mutex m_cache_lock;
+ ObjectCacher *m_object_cacher = nullptr;
+ ObjectCacher::ObjectSet *m_object_set = nullptr;
+
+ WritebackHandler *m_writeback_handler = nullptr;
+
+ bool m_user_flushed = false;
+
+};
+
+} // namespace cache
+} // namespace librbd
+
+extern template class librbd::cache::ObjectCacherObjectDispatch<librbd::ImageCtx>;
+
+#endif // CEPH_LIBRBD_CACHE_OBJECT_CACHER_OBJECT_DISPATCH_H
diff --git a/src/librbd/cache/PassthroughImageCache.cc b/src/librbd/cache/PassthroughImageCache.cc
new file mode 100644
index 00000000..c3672f53
--- /dev/null
+++ b/src/librbd/cache/PassthroughImageCache.cc
@@ -0,0 +1,135 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "PassthroughImageCache.h"
+#include "include/buffer.h"
+#include "common/dout.h"
+#include "librbd/ImageCtx.h"
+
+#define dout_subsys ceph_subsys_rbd
+#undef dout_prefix
+#define dout_prefix *_dout << "librbd::PassthroughImageCache: " << this << " " \
+ << __func__ << ": "
+
+namespace librbd {
+namespace cache {
+
+template <typename I>
+PassthroughImageCache<I>::PassthroughImageCache(ImageCtx &image_ctx)
+ : m_image_ctx(image_ctx), m_image_writeback(image_ctx) {
+}
+
+template <typename I>
+void PassthroughImageCache<I>::aio_read(Extents &&image_extents, bufferlist *bl,
+ int fadvise_flags, Context *on_finish) {
+ CephContext *cct = m_image_ctx.cct;
+ ldout(cct, 20) << "image_extents=" << image_extents << ", "
+ << "on_finish=" << on_finish << dendl;
+
+ m_image_writeback.aio_read(std::move(image_extents), bl, fadvise_flags,
+ on_finish);
+}
+
+template <typename I>
+void PassthroughImageCache<I>::aio_write(Extents &&image_extents,
+ bufferlist&& bl,
+ int fadvise_flags,
+ Context *on_finish) {
+ CephContext *cct = m_image_ctx.cct;
+ ldout(cct, 20) << "image_extents=" << image_extents << ", "
+ << "on_finish=" << on_finish << dendl;
+
+ m_image_writeback.aio_write(std::move(image_extents), std::move(bl),
+ fadvise_flags, on_finish);
+}
+
+template <typename I>
+void PassthroughImageCache<I>::aio_discard(uint64_t offset, uint64_t length,
+ uint32_t discard_granularity_bytes,
+ Context *on_finish) {
+ CephContext *cct = m_image_ctx.cct;
+ ldout(cct, 20) << "offset=" << offset << ", "
+ << "length=" << length << ", "
+ << "on_finish=" << on_finish << dendl;
+
+ m_image_writeback.aio_discard(offset, length, discard_granularity_bytes,
+ on_finish);
+}
+
+template <typename I>
+void PassthroughImageCache<I>::aio_flush(Context *on_finish) {
+ CephContext *cct = m_image_ctx.cct;
+ ldout(cct, 20) << "on_finish=" << on_finish << dendl;
+
+ m_image_writeback.aio_flush(on_finish);
+}
+
+template <typename I>
+void PassthroughImageCache<I>::aio_writesame(uint64_t offset, uint64_t length,
+ bufferlist&& bl, int fadvise_flags,
+ Context *on_finish) {
+ CephContext *cct = m_image_ctx.cct;
+ ldout(cct, 20) << "offset=" << offset << ", "
+ << "length=" << length << ", "
+ << "data_len=" << bl.length() << ", "
+ << "on_finish=" << on_finish << dendl;
+
+ m_image_writeback.aio_writesame(offset, length, std::move(bl), fadvise_flags,
+ on_finish);
+}
+
+template <typename I>
+void PassthroughImageCache<I>::aio_compare_and_write(Extents &&image_extents,
+ bufferlist&& cmp_bl,
+ bufferlist&& bl,
+ uint64_t *mismatch_offset,
+ int fadvise_flags,
+ Context *on_finish) {
+ CephContext *cct = m_image_ctx.cct;
+ ldout(cct, 20) << "image_extents=" << image_extents << ", "
+ << "on_finish=" << on_finish << dendl;
+
+ m_image_writeback.aio_compare_and_write(
+ std::move(image_extents), std::move(cmp_bl), std::move(bl), mismatch_offset,
+ fadvise_flags, on_finish);
+}
+
+template <typename I>
+void PassthroughImageCache<I>::init(Context *on_finish) {
+ CephContext *cct = m_image_ctx.cct;
+ ldout(cct, 20) << dendl;
+
+ on_finish->complete(0);
+}
+
+template <typename I>
+void PassthroughImageCache<I>::shut_down(Context *on_finish) {
+ CephContext *cct = m_image_ctx.cct;
+ ldout(cct, 20) << dendl;
+
+ on_finish->complete(0);
+}
+
+template <typename I>
+void PassthroughImageCache<I>::invalidate(Context *on_finish) {
+ CephContext *cct = m_image_ctx.cct;
+ ldout(cct, 20) << dendl;
+
+ // dump cache contents (don't have anything)
+ on_finish->complete(0);
+}
+
+template <typename I>
+void PassthroughImageCache<I>::flush(Context *on_finish) {
+ CephContext *cct = m_image_ctx.cct;
+ ldout(cct, 20) << dendl;
+
+ // internal flush -- nothing to writeback but make sure
+ // in-flight IO is flushed
+ aio_flush(on_finish);
+}
+
+} // namespace cache
+} // namespace librbd
+
+template class librbd::cache::PassthroughImageCache<librbd::ImageCtx>;
diff --git a/src/librbd/cache/PassthroughImageCache.h b/src/librbd/cache/PassthroughImageCache.h
new file mode 100644
index 00000000..4be69a50
--- /dev/null
+++ b/src/librbd/cache/PassthroughImageCache.h
@@ -0,0 +1,59 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_LIBRBD_CACHE_PASSTHROUGH_IMAGE_CACHE
+#define CEPH_LIBRBD_CACHE_PASSTHROUGH_IMAGE_CACHE
+
+#include "ImageCache.h"
+#include "ImageWriteback.h"
+
+namespace librbd {
+
+struct ImageCtx;
+
+namespace cache {
+
+/**
+ * Example passthrough client-side, image extent cache
+ */
+template <typename ImageCtxT = librbd::ImageCtx>
+class PassthroughImageCache : public ImageCache {
+public:
+ explicit PassthroughImageCache(ImageCtx &image_ctx);
+
+ /// client AIO methods
+ void aio_read(Extents&& image_extents, ceph::bufferlist *bl,
+ int fadvise_flags, Context *on_finish) override;
+ void aio_write(Extents&& image_extents, ceph::bufferlist&& bl,
+ int fadvise_flags, Context *on_finish) override;
+ void aio_discard(uint64_t offset, uint64_t length,
+ uint32_t discard_granularity_bytes,
+ Context *on_finish) override;
+ void aio_flush(Context *on_finish) override;
+ void aio_writesame(uint64_t offset, uint64_t length,
+ ceph::bufferlist&& bl,
+ int fadvise_flags, Context *on_finish) override;
+ void aio_compare_and_write(Extents&& image_extents,
+ ceph::bufferlist&& cmp_bl, ceph::bufferlist&& bl,
+ uint64_t *mismatch_offset,int fadvise_flags,
+ Context *on_finish) override;
+
+ /// internal state methods
+ void init(Context *on_finish) override;
+ void shut_down(Context *on_finish) override;
+
+ void invalidate(Context *on_finish) override;
+ void flush(Context *on_finish) override;
+
+private:
+ ImageCtxT &m_image_ctx;
+ ImageWriteback<ImageCtxT> m_image_writeback;
+
+};
+
+} // namespace cache
+} // namespace librbd
+
+extern template class librbd::cache::PassthroughImageCache<librbd::ImageCtx>;
+
+#endif // CEPH_LIBRBD_CACHE_PASSTHROUGH_IMAGE_CACHE