11 files changed, 1790 insertions, 0 deletions
diff --git a/src/librbd/cache/pwl/rwl/Builder.h b/src/librbd/cache/pwl/rwl/Builder.h
new file mode 100644
index 000000000..c13c7b5ae
--- /dev/null
+++ b/src/librbd/cache/pwl/rwl/Builder.h
@@ -0,0 +1,107 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_LIBRBD_CACHE_PWL_RWL_BUILDER_H
+#define CEPH_LIBRBD_CACHE_PWL_RWL_BUILDER_H
+
+#include <iostream>
+#include "LogEntry.h"
+#include "ReadRequest.h"
+#include "Request.h"
+#include "LogOperation.h"
+
+#include "librbd/cache/ImageWriteback.h"
+#include "librbd/cache/pwl/Builder.h"
+
+namespace librbd {
+namespace cache {
+namespace pwl {
+namespace rwl {
+
+template <typename T>
+class Builder : public pwl::Builder<T> {
+public:
+  std::shared_ptr<pwl::WriteLogEntry> create_write_log_entry(
+      uint64_t image_offset_bytes, uint64_t write_bytes) override {
+    return std::make_shared<WriteLogEntry>(image_offset_bytes, write_bytes);
+  }
+  std::shared_ptr<pwl::WriteLogEntry> create_write_log_entry(
+      std::shared_ptr<SyncPointLogEntry> sync_point_entry,
+      uint64_t image_offset_bytes, uint64_t write_bytes) override {
+    return std::make_shared<WriteLogEntry>(
+        sync_point_entry, image_offset_bytes, write_bytes);
+  }
+  std::shared_ptr<pwl::WriteLogEntry> create_writesame_log_entry(
+      uint64_t image_offset_bytes, uint64_t write_bytes,
+      uint32_t data_length) override {
+    return std::make_shared<WriteSameLogEntry>(
+        image_offset_bytes, write_bytes, data_length);
+  }
+  std::shared_ptr<pwl::WriteLogEntry> create_writesame_log_entry(
+      std::shared_ptr<SyncPointLogEntry> sync_point_entry,
+      uint64_t image_offset_bytes, uint64_t write_bytes,
+      uint32_t data_length) override {
+    return std::make_shared<WriteSameLogEntry>(
+        sync_point_entry, image_offset_bytes, write_bytes, data_length);
+  }
+  pwl::C_WriteRequest<T> *create_write_request(
+      T &pwl, utime_t arrived, io::Extents &&image_extents,
+      bufferlist&& bl, const int fadvise_flags, ceph::mutex &lock,
+      PerfCounters *perfcounter, Context *user_req) override {
+    return new C_WriteRequest<T>(
+        pwl, arrived, std::move(image_extents), std::move(bl),
+        fadvise_flags, lock, perfcounter, user_req);
+  }
+  pwl::C_WriteSameRequest<T> *create_writesame_request(
+      T &pwl, utime_t arrived, io::Extents &&image_extents,
+      bufferlist&& bl, const int fadvise_flags, ceph::mutex &lock,
+      PerfCounters *perfcounter, Context *user_req) override {
+    return new C_WriteSameRequest<T>(
+        pwl, arrived, std::move(image_extents), std::move(bl),
+        fadvise_flags, lock, perfcounter, user_req);
+  }
+  pwl::C_WriteRequest<T> *create_comp_and_write_request(
+      T &pwl, utime_t arrived, io::Extents &&image_extents,
+      bufferlist&& cmp_bl, bufferlist&& bl, uint64_t *mismatch_offset,
+      const int fadvise_flags, ceph::mutex &lock,
+      PerfCounters *perfcounter, Context *user_req) override {
+    return new rwl::C_CompAndWriteRequest<T>(
+        pwl, arrived, std::move(image_extents), std::move(cmp_bl),
+        std::move(bl), mismatch_offset, fadvise_flags,
+        lock, perfcounter, user_req);
+  }
+  std::shared_ptr<pwl::WriteLogOperation> create_write_log_operation(
+      WriteLogOperationSet &set, uint64_t image_offset_bytes,
+      uint64_t write_bytes, CephContext *cct,
+      std::shared_ptr<pwl::WriteLogEntry> write_log_entry) {
+    return std::make_shared<WriteLogOperation>(
+        set, image_offset_bytes, write_bytes, cct, write_log_entry);
+  }
+  std::shared_ptr<pwl::WriteLogOperation> create_write_log_operation(
+      WriteLogOperationSet &set, uint64_t image_offset_bytes,
+      uint64_t write_bytes, uint32_t data_len, CephContext *cct,
+      std::shared_ptr<pwl::WriteLogEntry> writesame_log_entry) {
+    return std::make_shared<WriteLogOperation>(
+        set, image_offset_bytes, write_bytes, data_len, cct,
+        writesame_log_entry);
+  }
+  std::shared_ptr<pwl::DiscardLogOperation> create_discard_log_operation(
+      std::shared_ptr<SyncPoint> sync_point, uint64_t image_offset_bytes,
+      uint64_t write_bytes, uint32_t discard_granularity_bytes,
+      utime_t dispatch_time, PerfCounters *perfcounter, CephContext *cct) {
+    return std::make_shared<DiscardLogOperation>(
+        sync_point, image_offset_bytes, write_bytes, discard_granularity_bytes,
+        dispatch_time, perfcounter, cct);
+  }
+  C_ReadRequest *create_read_request(CephContext *cct, utime_t arrived,
+      PerfCounters *perfcounter, ceph::bufferlist *bl, Context *on_finish) {
+    return new C_ReadRequest(cct, arrived, perfcounter, bl, on_finish);
+  }
+};
+
+} // namespace rwl
+} // namespace pwl
+} // namespace cache
+} // namespace librbd
+
+#endif // CEPH_LIBRBD_CACHE_PWL_RWL_BUILDER_H
diff --git a/src/librbd/cache/pwl/rwl/LogEntry.cc b/src/librbd/cache/pwl/rwl/LogEntry.cc
new file mode 100644
index 000000000..38e09c22a
--- /dev/null
+++ b/src/librbd/cache/pwl/rwl/LogEntry.cc
@@ -0,0 +1,106 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "librbd/cache/ImageWriteback.h"
+#include "LogEntry.h"
+
+#define dout_subsys ceph_subsys_rbd_pwl
+#undef dout_prefix
+#define dout_prefix *_dout << "librbd::cache::pwl::rwl::WriteLogEntry: " \
+                           << this << " " <<  __func__ << ": "
+
+namespace librbd {
+namespace cache {
+namespace pwl {
+namespace rwl {
+
+void WriteLogEntry::writeback(
+    librbd::cache::ImageWritebackInterface &image_writeback, Context *ctx) {
+  /* Pass a copy of the pmem buffer to ImageWriteback (which may hang on to the
+   * bl even after flush()). */
+  bufferlist entry_bl;
+  buffer::list entry_bl_copy;
+  copy_cache_bl(&entry_bl_copy);
+  entry_bl_copy.begin(0).copy(write_bytes(), entry_bl);
+  image_writeback.aio_write({{ram_entry.image_offset_bytes,
+                              ram_entry.write_bytes}},
+                            std::move(entry_bl), 0, ctx);
+}
+
+void WriteLogEntry::init_cache_bp() {
+  ceph_assert(!this->cache_bp.have_raw());
+  cache_bp = buffer::ptr(buffer::create_static(this->write_bytes(),
+                                               (char*)this->cache_buffer));
+}
+
+void WriteLogEntry::init_bl(buffer::ptr &bp, buffer::list &bl) {
+  if(!is_writesame) {
+    bl.append(bp);
+    return;
+  }
+  for (uint64_t i = 0; i < ram_entry.write_bytes / ram_entry.ws_datalen; i++) {
+    bl.append(bp);
+  }
+  int trailing_partial = ram_entry.write_bytes % ram_entry.ws_datalen;
+  if (trailing_partial) {
+    bl.append(bp, 0, trailing_partial);
+  }
+}
+
+void WriteLogEntry::init_cache_buffer(
+    std::vector<WriteBufferAllocation>::iterator allocation) {
+  this->ram_entry.write_data = allocation->buffer_oid;
+  ceph_assert(!TOID_IS_NULL(this->ram_entry.write_data));
+  cache_buffer = D_RW(this->ram_entry.write_data);
+}
+
+buffer::list& WriteLogEntry::get_cache_bl() {
+  if (0 == bl_refs) {
+    std::lock_guard locker(m_entry_bl_lock);
+    if (0 == bl_refs) {
+      //init pmem bufferlist
+      cache_bl.clear();
+      init_cache_bp();
+      ceph_assert(cache_bp.have_raw());
+      int before_bl = cache_bp.raw_nref();
+      this->init_bl(cache_bp, cache_bl);
+      int after_bl = cache_bp.raw_nref();
+      bl_refs = after_bl - before_bl;
+    }
+    ceph_assert(0 != bl_refs);
+  }
+  return cache_bl;
+}
+
+void WriteLogEntry::copy_cache_bl(bufferlist *out_bl) {
+  this->get_cache_bl();
+  // cache_bp is now initialized
+  ceph_assert(cache_bp.length() == cache_bp.raw_length());
+  buffer::ptr cloned_bp = cache_bp.begin_deep().get_ptr(cache_bp.length());
+  out_bl->clear();
+  this->init_bl(cloned_bp, *out_bl);
+}
+
+unsigned int WriteLogEntry::reader_count() const {
+  if (cache_bp.have_raw()) {
+    return (cache_bp.raw_nref() - bl_refs - 1);
+  } else {
+    return 0;
+  }
+}
+
+void WriteSameLogEntry::writeback(
+    librbd::cache::ImageWritebackInterface &image_writeback, Context *ctx) {
+  bufferlist entry_bl;
+  buffer::list entry_bl_copy;
+  copy_cache_bl(&entry_bl_copy);
+  entry_bl_copy.begin(0).copy(write_bytes(), entry_bl);
+  image_writeback.aio_writesame(ram_entry.image_offset_bytes,
+                                ram_entry.write_bytes,
+                                std::move(entry_bl), 0, ctx);
+}
+
+} // namespace rwl
+} // namespace pwl
+} // namespace cache
+} // namespace librbd
diff --git a/src/librbd/cache/pwl/rwl/LogEntry.h b/src/librbd/cache/pwl/rwl/LogEntry.h
new file mode 100644
index 000000000..a4675c5fb
--- /dev/null
+++ b/src/librbd/cache/pwl/rwl/LogEntry.h
@@ -0,0 +1,68 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_LIBRBD_CACHE_PWL_RWL_LOG_ENTRY_H
+#define CEPH_LIBRBD_CACHE_PWL_RWL_LOG_ENTRY_H
+
+#include "librbd/cache/pwl/LogEntry.h"
+
+namespace librbd {
+namespace cache {
+class ImageWritebackInterface;
+namespace pwl {
+namespace rwl {
+
+class WriteLogEntry : public pwl::WriteLogEntry {
+public:
+  WriteLogEntry(std::shared_ptr<SyncPointLogEntry> sync_point_entry,
+                uint64_t image_offset_bytes, uint64_t write_bytes)
+    : pwl::WriteLogEntry(sync_point_entry, image_offset_bytes, write_bytes) {}
+  WriteLogEntry(uint64_t image_offset_bytes, uint64_t write_bytes)
+    : pwl::WriteLogEntry(image_offset_bytes, write_bytes) {}
+  WriteLogEntry(std::shared_ptr<SyncPointLogEntry> sync_point_entry,
+                uint64_t image_offset_bytes, uint64_t write_bytes,
+                uint32_t data_length)
+    : pwl::WriteLogEntry(sync_point_entry, image_offset_bytes, write_bytes,
+                         data_length) {}
+  WriteLogEntry(uint64_t image_offset_bytes, uint64_t write_bytes,
+                uint32_t data_length)
+    : pwl::WriteLogEntry(image_offset_bytes, write_bytes, data_length) {}
+ ~WriteLogEntry() {}
+  WriteLogEntry(const WriteLogEntry&) = delete;
+  WriteLogEntry &operator=(const WriteLogEntry&) = delete;
+
+  void writeback(librbd::cache::ImageWritebackInterface &image_writeback,
+                 Context *ctx) override;
+  void init_cache_bp() override;
+  void init_bl(buffer::ptr &bp, buffer::list &bl) override;
+  void init_cache_buffer(
+      std::vector<WriteBufferAllocation>::iterator allocation) override;
+  buffer::list &get_cache_bl() override;
+  void copy_cache_bl(bufferlist *out_bl) override;
+  unsigned int reader_count() const override;
+};
+
+class WriteSameLogEntry : public WriteLogEntry {
+public:
+  WriteSameLogEntry(std::shared_ptr<SyncPointLogEntry> sync_point_entry,
+                    uint64_t image_offset_bytes, uint64_t write_bytes,
+                    uint32_t data_length)
+    : WriteLogEntry(sync_point_entry, image_offset_bytes, write_bytes,
+                    data_length) {}
+  WriteSameLogEntry(uint64_t image_offset_bytes, uint64_t write_bytes,
+                   uint32_t data_length)
+    : WriteLogEntry(image_offset_bytes, write_bytes, data_length) {}
+  ~WriteSameLogEntry() {}
+  WriteSameLogEntry(const WriteSameLogEntry&) = delete;
+  WriteSameLogEntry &operator=(const WriteSameLogEntry&) = delete;
+
+  void writeback(librbd::cache::ImageWritebackInterface &image_writeback,
+                 Context *ctx) override;
+};
+
+} // namespace rwl
+} // namespace pwl
+} // namespace cache
+} // namespace librbd
+
+#endif // CEPH_LIBRBD_CACHE_PWL_RWL_LOG_ENTRY_H
diff --git a/src/librbd/cache/pwl/rwl/LogOperation.cc b/src/librbd/cache/pwl/rwl/LogOperation.cc
new file mode 100644
index 000000000..53fb917b2
--- /dev/null
+++ b/src/librbd/cache/pwl/rwl/LogOperation.cc
@@ -0,0 +1,39 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "LogOperation.h"
+
+#define dout_subsys ceph_subsys_rbd_pwl
+#undef dout_prefix
+#define dout_prefix *_dout << "librbd::cache::pwl::rwl::LogOperation: " \
+                           << this << " " <<  __func__ << ": "
+
+namespace librbd {
+namespace cache {
+namespace pwl {
+namespace rwl {
+
+void WriteLogOperation::copy_bl_to_cache_buffer(
+    std::vector<WriteBufferAllocation>::iterator allocation) {
+  /* operation is a shared_ptr, so write_op is only good as long as operation is
+   * in scope */
+  bufferlist::iterator i(&bl);
+  m_perfcounter->inc(l_librbd_pwl_log_op_bytes, log_entry->write_bytes());
+  ldout(m_cct, 20) << bl << dendl;
+  log_entry->init_cache_buffer(allocation);
+  i.copy((unsigned)log_entry->write_bytes(), (char*)log_entry->cache_buffer);
+}
+
+void DiscardLogOperation::init_op(
+    uint64_t current_sync_gen, bool persist_on_flush,
+    uint64_t last_op_sequence_num, Context *write_persist,
+    Context *write_append) {
+  log_entry->init(current_sync_gen, persist_on_flush, last_op_sequence_num);
+  this->on_write_append = write_append;
+  this->on_write_persist = write_persist;
+}
+
+} // namespace rwl
+} // namespace pwl
+} // namespace cache
+} // namespace librbd
diff --git a/src/librbd/cache/pwl/rwl/LogOperation.h b/src/librbd/cache/pwl/rwl/LogOperation.h
new file mode 100644
index 000000000..874ac77fb
--- /dev/null
+++ b/src/librbd/cache/pwl/rwl/LogOperation.h
@@ -0,0 +1,55 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_LIBRBD_CACHE_PWL_RWL_LOG_OPERATION_H
+#define CEPH_LIBRBD_CACHE_PWL_RWL_LOG_OPERATION_H
+
+#include "librbd/cache/pwl/LogOperation.h"
+
+
+namespace librbd {
+namespace cache {
+namespace pwl {
+namespace rwl {
+
+class WriteLogOperation : public pwl::WriteLogOperation {
+public:
+  WriteLogOperation(
+      WriteLogOperationSet &set, uint64_t image_offset_bytes,
+      uint64_t write_bytes, CephContext *cct,
+      std::shared_ptr<pwl::WriteLogEntry> write_log_entry)
+    : pwl::WriteLogOperation(set, image_offset_bytes, write_bytes, cct,
+                             write_log_entry) {}
+
+  WriteLogOperation(
+      WriteLogOperationSet &set, uint64_t image_offset_bytes,
+      uint64_t write_bytes, uint32_t data_len, CephContext *cct,
+      std::shared_ptr<pwl::WriteLogEntry> writesame_log_entry)
+    : pwl::WriteLogOperation(set, image_offset_bytes, write_bytes, cct,
+                             writesame_log_entry) {}
+
+  void copy_bl_to_cache_buffer(
+          std::vector<WriteBufferAllocation>::iterator allocation) override;
+};
+
+class DiscardLogOperation : public pwl::DiscardLogOperation {
+public:
+  DiscardLogOperation(
+      std::shared_ptr<SyncPoint> sync_point, uint64_t image_offset_bytes,
+      uint64_t write_bytes, uint32_t discard_granularity_bytes,
+      utime_t dispatch_time, PerfCounters *perfcounter, CephContext *cct)
+    : pwl::DiscardLogOperation(sync_point, image_offset_bytes, write_bytes,
+                               discard_granularity_bytes, dispatch_time,
+                               perfcounter, cct) {}
+  void init_op(
+      uint64_t current_sync_gen, bool persist_on_flush,
+      uint64_t last_op_sequence_num, Context *write_persist,
+      Context *write_append) override;
+};
+
+} // namespace rwl
+} // namespace pwl
+} // namespace cache
+} // namespace librbd
+
+#endif // CEPH_LIBRBD_CACHE_PWL_RWL_LOG_OPERATION_H
diff --git a/src/librbd/cache/pwl/rwl/ReadRequest.cc b/src/librbd/cache/pwl/rwl/ReadRequest.cc
new file mode 100644
index 000000000..f91f8e5a7
--- /dev/null
+++ b/src/librbd/cache/pwl/rwl/ReadRequest.cc
@@ -0,0 +1,70 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "ReadRequest.h"
+
+#define dout_subsys ceph_subsys_rbd_pwl
+#undef dout_prefix
+#define dout_prefix *_dout << "librbd::cache::pwl::rwl::ReadRequest: " << this << " " \
+                           <<  __func__ << ": "
+
+namespace librbd {
+namespace cache {
+namespace pwl {
+namespace rwl {
+
+void C_ReadRequest::finish(int r) {
+  ldout(m_cct, 20) << "(" << get_name() << "): r=" << r << dendl;
+  int hits = 0;
+  int misses = 0;
+  int hit_bytes = 0;
+  int miss_bytes = 0;
+  if (r >= 0) {
+    /*
+     * At this point the miss read has completed. We'll iterate through
+     * read_extents and produce *m_out_bl by assembling pieces of miss_bl
+     * and the individual hit extent bufs in the read extents that represent
+     * hits.
+     */
+    uint64_t miss_bl_offset = 0;
+    for (auto extent : read_extents) {
+      if (extent->m_bl.length()) {
+        /* This was a hit */
+        ceph_assert(extent->second == extent->m_bl.length());
+        ++hits;
+        hit_bytes += extent->second;
+        m_out_bl->claim_append(extent->m_bl);
+      } else {
+        /* This was a miss. */
+        ++misses;
+        miss_bytes += extent->second;
+        bufferlist miss_extent_bl;
+        miss_extent_bl.substr_of(miss_bl, miss_bl_offset, extent->second);
+        /* Add this read miss bufferlist to the output bufferlist */
+        m_out_bl->claim_append(miss_extent_bl);
+        /* Consume these bytes in the read miss bufferlist */
+        miss_bl_offset += extent->second;
+      }
+    }
+  }
+  ldout(m_cct, 20) << "(" << get_name() << "): r=" << r << " bl=" << *m_out_bl << dendl;
+  utime_t now = ceph_clock_now();
+  ceph_assert((int)m_out_bl->length() == hit_bytes + miss_bytes);
+  m_on_finish->complete(r);
+  m_perfcounter->inc(l_librbd_pwl_rd_bytes, hit_bytes + miss_bytes);
+  m_perfcounter->inc(l_librbd_pwl_rd_hit_bytes, hit_bytes);
+  m_perfcounter->tinc(l_librbd_pwl_rd_latency, now - m_arrived_time);
+  if (!misses) {
+    m_perfcounter->inc(l_librbd_pwl_rd_hit_req, 1);
+    m_perfcounter->tinc(l_librbd_pwl_rd_hit_latency, now - m_arrived_time);
+  } else {
+    if (hits) {
+      m_perfcounter->inc(l_librbd_pwl_rd_part_hit_req, 1);
+    }
+  }
+}
+
+} // namespace rwl
+} // namespace pwl
+} // namespace cache
+} // namespace librbd
diff --git a/src/librbd/cache/pwl/rwl/ReadRequest.h b/src/librbd/cache/pwl/rwl/ReadRequest.h
new file mode 100644
index 000000000..25168e83b
--- /dev/null
+++ b/src/librbd/cache/pwl/rwl/ReadRequest.h
@@ -0,0 +1,34 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_LIBRBD_CACHE_PWL_RWL_READ_REQUEST_H
+#define CEPH_LIBRBD_CACHE_PWL_RWL_READ_REQUEST_H
+
+#include "librbd/cache/pwl/ReadRequest.h"
+
+namespace librbd {
+namespace cache {
+namespace pwl {
+namespace rwl {
+
+typedef std::vector<pwl::ImageExtentBuf> ImageExtentBufs;
+
+class C_ReadRequest : public pwl::C_ReadRequest {
+protected:
+  using pwl::C_ReadRequest::m_cct;
+  using pwl::C_ReadRequest::m_on_finish;
+  using pwl::C_ReadRequest::m_out_bl;
+  using pwl::C_ReadRequest::m_arrived_time;
+  using pwl::C_ReadRequest::m_perfcounter;
+public:
+  C_ReadRequest(CephContext *cct, utime_t arrived, PerfCounters *perfcounter, bufferlist *out_bl, Context *on_finish)
+    : pwl::C_ReadRequest(cct, arrived, perfcounter, out_bl, on_finish) {}
+  void finish(int r) override;
+};
+
+} // namespace rwl
+} // namespace pwl
+} // namespace cache
+} // namespace librbd
+
+#endif // CEPH_LIBRBD_CACHE_PWL_RWL_READ_REQUEST_H
diff --git a/src/librbd/cache/pwl/rwl/Request.cc b/src/librbd/cache/pwl/rwl/Request.cc
new file mode 100644
index 000000000..a6b81d55b
--- /dev/null
+++ b/src/librbd/cache/pwl/rwl/Request.cc
@@ -0,0 +1,86 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "Request.h"
+#include "librbd/cache/pwl/AbstractWriteLog.h"
+
+#define dout_subsys ceph_subsys_rbd_pwl
+#undef dout_prefix
+#define dout_prefix *_dout << "librbd::cache::pwl::rwl::Request: " << this \
+                           << " " <<  __func__ << ": "
+
+namespace librbd {
+namespace cache {
+namespace pwl {
+namespace rwl {
+
+template <typename T>
+void C_WriteRequest<T>::setup_buffer_resources(
+    uint64_t *bytes_cached, uint64_t *bytes_dirtied, uint64_t *bytes_allocated,
+    uint64_t *number_lanes, uint64_t *number_log_entries,
+    uint64_t *number_unpublished_reserves) {
+
+  ceph_assert(!this->m_resources.allocated);
+
+  auto image_extents_size = this->image_extents.size();
+  this->m_resources.buffers.reserve(image_extents_size);
+
+  *bytes_cached = 0;
+  *bytes_allocated = 0;
+  *number_lanes = image_extents_size;
+  *number_log_entries = image_extents_size;
+  *number_unpublished_reserves = image_extents_size;
+
+  for (auto &extent : this->image_extents) {
+    this->m_resources.buffers.emplace_back();
+    struct WriteBufferAllocation &buffer = this->m_resources.buffers.back();
+    buffer.allocation_size = MIN_WRITE_ALLOC_SIZE;
+    buffer.allocated = false;
+    *bytes_cached += extent.second;
+    if (extent.second > buffer.allocation_size) {
+      buffer.allocation_size = extent.second;
+    }
+    *bytes_allocated += buffer.allocation_size;
+  }
+  *bytes_dirtied = *bytes_cached;
+}
+
+template <typename T>
+std::ostream &operator<<(std::ostream &os,
+                         const C_CompAndWriteRequest<T> &req) {
+  os << (C_WriteRequest<T>&)req
+     << " cmp_bl=" << req.cmp_bl
+     << ", read_bl=" << req.read_bl
+     << ", compare_succeeded=" << req.compare_succeeded
+     << ", mismatch_offset=" << req.mismatch_offset;
+  return os;
+}
+
+template <typename T>
+void C_WriteSameRequest<T>::setup_buffer_resources(
+    uint64_t *bytes_cached, uint64_t *bytes_dirtied, uint64_t *bytes_allocated,
+    uint64_t *number_lanes, uint64_t *number_log_entries,
+    uint64_t *number_unpublished_reserves) {
+  ceph_assert(this->image_extents.size() == 1);
+  *number_log_entries = 1;
+  *bytes_dirtied += this->image_extents[0].second;
+  auto pattern_length = this->bl.length();
+  this->m_resources.buffers.emplace_back();
+  struct WriteBufferAllocation &buffer = this->m_resources.buffers.back();
+  buffer.allocation_size = MIN_WRITE_ALLOC_SIZE;
+  buffer.allocated = false;
+  *bytes_cached += pattern_length;
+  if (pattern_length > buffer.allocation_size) {
+    buffer.allocation_size = pattern_length;
+  }
+  *bytes_allocated += buffer.allocation_size;
+}
+
+} // namespace rwl
+} // namespace pwl
+} // namespace cache
+} // namespace librbd
+
+template class librbd::cache::pwl::rwl::C_WriteRequest<librbd::cache::pwl::AbstractWriteLog<librbd::ImageCtx> >;
+template class librbd::cache::pwl::rwl::C_WriteSameRequest<librbd::cache::pwl::AbstractWriteLog<librbd::ImageCtx> >;
+template class librbd::cache::pwl::rwl::C_CompAndWriteRequest<librbd::cache::pwl::AbstractWriteLog<librbd::ImageCtx> >;
diff --git a/src/librbd/cache/pwl/rwl/Request.h b/src/librbd/cache/pwl/rwl/Request.h
new file mode 100644
index 000000000..0a5c610d6
--- /dev/null
+++ b/src/librbd/cache/pwl/rwl/Request.h
@@ -0,0 +1,90 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_LIBRBD_CACHE_RWL_REQUEST_H
+#define CEPH_LIBRBD_CACHE_RWL_REQUEST_H
+
+#include "librbd/cache/pwl/Request.h"
+
+namespace librbd {
+class BlockGuardCell;
+
+namespace cache {
+namespace pwl {
+namespace rwl {
+
+template <typename T>
+class C_WriteRequest : public pwl::C_WriteRequest<T> {
+public:
+  C_WriteRequest(
+      T &pwl, const utime_t arrived, io::Extents &&image_extents,
+      bufferlist&& cmp_bl, bufferlist&& bl, uint64_t *mismatch_offset,
+      const int fadvise_flags, ceph::mutex &lock,
+      PerfCounters *perfcounter, Context *user_req)
+    : pwl::C_WriteRequest<T>(
+        pwl, arrived, std::move(image_extents), std::move(cmp_bl),
+        std::move(bl), mismatch_offset, fadvise_flags,
+        lock, perfcounter, user_req) {}
+
+  C_WriteRequest(
+      T &pwl, const utime_t arrived, io::Extents &&image_extents,
+      bufferlist&& bl, const int fadvise_flags, ceph::mutex &lock,
+      PerfCounters *perfcounter, Context *user_req)
+    : pwl::C_WriteRequest<T>(
+        pwl, arrived, std::move(image_extents), std::move(bl),
+        fadvise_flags, lock, perfcounter, user_req) {}
+protected:
+  //Plain writes will allocate one buffer per request extent
+  void setup_buffer_resources(
+      uint64_t *bytes_cached, uint64_t *bytes_dirtied,
+      uint64_t *bytes_allocated, uint64_t *number_lanes,
+      uint64_t *number_log_entries,
+      uint64_t *number_unpublished_reserves) override;
+};
+
+template <typename T>
+class C_CompAndWriteRequest : public C_WriteRequest<T> {
+public:
+  C_CompAndWriteRequest(
+      T &pwl, const utime_t arrived, io::Extents &&image_extents,
+      bufferlist&& cmp_bl, bufferlist&& bl, uint64_t *mismatch_offset,
+      const int fadvise_flags, ceph::mutex &lock,
+      PerfCounters *perfcounter, Context *user_req)
+    : C_WriteRequest<T>(
+        pwl, arrived, std::move(image_extents), std::move(cmp_bl),
+        std::move(bl), mismatch_offset, fadvise_flags,
+        lock, perfcounter, user_req) {}
+
+  const char *get_name() const override {
+    return "C_CompAndWriteRequest";
+  }
+  template <typename U>
+  friend std::ostream &operator<<(std::ostream &os,
+                                  const C_CompAndWriteRequest<U> &req);
+};
+
+template <typename T>
+class C_WriteSameRequest : public pwl::C_WriteSameRequest<T> {
+public:
+  C_WriteSameRequest(
+      T &pwl, const utime_t arrived, io::Extents &&image_extents,
+      bufferlist&& bl, const int fadvise_flags, ceph::mutex &lock,
+      PerfCounters *perfcounter, Context *user_req)
+    : pwl::C_WriteSameRequest<T>(
+        pwl, arrived, std::move(image_extents), std::move(bl), fadvise_flags,
+        lock, perfcounter, user_req) {}
+
+  void setup_buffer_resources(
+      uint64_t *bytes_cached, uint64_t *bytes_dirtied,
+      uint64_t *bytes_allocated, uint64_t *number_lanes,
+      uint64_t *number_log_entries,
+      uint64_t *number_unpublished_reserves) override;
+
+};
+
+} // namespace rwl
+} // namespace pwl
+} // namespace cache
+} // namespace librbd
+
+#endif // CEPH_LIBRBD_CACHE_RWL_REQUEST_H
diff --git a/src/librbd/cache/pwl/rwl/WriteLog.cc b/src/librbd/cache/pwl/rwl/WriteLog.cc
new file mode 100644
index 000000000..e922ba543
--- /dev/null
+++ b/src/librbd/cache/pwl/rwl/WriteLog.cc
@@ -0,0 +1,1011 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "WriteLog.h"
+#include "include/buffer.h"
+#include "include/Context.h"
+#include "include/ceph_assert.h"
+#include "common/deleter.h"
+#include "common/dout.h"
+#include "common/environment.h"
+#include "common/errno.h"
+#include "common/WorkQueue.h"
+#include "common/Timer.h"
+#include "common/perf_counters.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/asio/ContextWQ.h"
+#include "librbd/cache/pwl/ImageCacheState.h"
+#include "librbd/cache/pwl/LogEntry.h"
+#include "librbd/plugin/Api.h"
+#include <map>
+#include <vector>
+
+#undef dout_subsys
+#define dout_subsys ceph_subsys_rbd_pwl
+#undef dout_prefix
+#define dout_prefix *_dout << "librbd::cache::pwl::rwl::WriteLog: " << this \
+                           << " " <<  __func__ << ": "
+
+namespace librbd {
+namespace cache {
+namespace pwl {
+using namespace std;
+using namespace librbd::cache::pwl;
+namespace rwl {
+
+const unsigned long int OPS_APPENDED_TOGETHER = MAX_ALLOC_PER_TRANSACTION;
+
+template <typename I>
+Builder<AbstractWriteLog<I>>* WriteLog<I>::create_builder() {
+  m_builderobj = new Builder<This>();
+  return m_builderobj;
+}
+
+template <typename I>
+WriteLog<I>::WriteLog(
+    I &image_ctx, librbd::cache::pwl::ImageCacheState<I>* cache_state,
+    ImageWritebackInterface& image_writeback,
+    plugin::Api<I>& plugin_api)
+: AbstractWriteLog<I>(image_ctx, cache_state, create_builder(), image_writeback,
+                      plugin_api),
+  m_pwl_pool_layout_name(POBJ_LAYOUT_NAME(rbd_pwl))
+{
+}
+
+template <typename I>
+WriteLog<I>::~WriteLog() {
+  m_log_pool = nullptr;
+  delete m_builderobj;
+}
+
+template <typename I>
+void WriteLog<I>::collect_read_extents(
+      uint64_t read_buffer_offset, LogMapEntry<GenericWriteLogEntry> map_entry,
+      std::vector<std::shared_ptr<GenericWriteLogEntry>> &log_entries_to_read,
+      std::vector<bufferlist*> &bls_to_read, uint64_t entry_hit_length,
+      Extent hit_extent, pwl::C_ReadRequest *read_ctx) {
+  /* Make a bl for this hit extent. This will add references to the
+   * write_entry->pmem_bp */
+  buffer::list hit_bl;
+
+  /* Create buffer object referring to pmem pool for this read hit */
+  auto write_entry = map_entry.log_entry;
+
+  buffer::list entry_bl_copy;
+  write_entry->copy_cache_bl(&entry_bl_copy);
+  entry_bl_copy.begin(read_buffer_offset).copy(entry_hit_length, hit_bl);
+  ceph_assert(hit_bl.length() == entry_hit_length);
+
+  /* Add hit extent to read extents */
+  auto hit_extent_buf = std::make_shared<ImageExtentBuf>(hit_extent, hit_bl);
+  read_ctx->read_extents.push_back(hit_extent_buf);
+}
+
+template <typename I>
+void WriteLog<I>::complete_read(
+    std::vector<std::shared_ptr<GenericWriteLogEntry>> &log_entries_to_read,
+    std::vector<bufferlist*> &bls_to_read, Context *ctx) {
+  ctx->complete(0);
+}
+
+/*
+ * Allocate the (already reserved) write log entries for a set of operations.
+ *
+ * Locking:
+ * Acquires lock
+ */
+template <typename I>
+void WriteLog<I>::alloc_op_log_entries(GenericLogOperations &ops)
+{
+  TOID(struct WriteLogPoolRoot) pool_root;
+  pool_root = POBJ_ROOT(m_log_pool, struct WriteLogPoolRoot);
+  struct WriteLogCacheEntry *pmem_log_entries = D_RW(D_RW(pool_root)->log_entries);
+
+  ceph_assert(ceph_mutex_is_locked_by_me(this->m_log_append_lock));
+
+  /* Allocate the (already reserved) log entries */
+  std::unique_lock locker(m_lock);
+
+  for (auto &operation : ops) {
+    uint32_t entry_index = this->m_first_free_entry;
+    this->m_first_free_entry = (this->m_first_free_entry + 1) % this->m_total_log_entries;
+    auto &log_entry = operation->get_log_entry();
+    log_entry->log_entry_index = entry_index;
+    log_entry->ram_entry.entry_index = entry_index;
+    log_entry->cache_entry = &pmem_log_entries[entry_index];
+    log_entry->ram_entry.set_entry_valid(true);
+    m_log_entries.push_back(log_entry);
+    ldout(m_image_ctx.cct, 20) << "operation=[" << *operation << "]" << dendl;
+  }
+  if (m_cache_state->empty && !m_log_entries.empty()) {
+    m_cache_state->empty = false;
+    this->update_image_cache_state();
+    this->write_image_cache_state(locker);
+  }
+}
+
+/*
+ * Write and persist the (already allocated) write log entries and
+ * data buffer allocations for a set of ops. The data buffer for each
+ * of these must already have been persisted to its reserved area.
+ */
+template <typename I>
+int WriteLog<I>::append_op_log_entries(GenericLogOperations &ops)
+{
+  CephContext *cct = m_image_ctx.cct;
+  GenericLogOperationsVector entries_to_flush;
+  TOID(struct WriteLogPoolRoot) pool_root;
+  pool_root = POBJ_ROOT(m_log_pool, struct WriteLogPoolRoot);
+  int ret = 0;
+
+  ceph_assert(ceph_mutex_is_locked_by_me(this->m_log_append_lock));
+
+  if (ops.empty()) {
+    return 0;
+  }
+  entries_to_flush.reserve(OPS_APPENDED_TOGETHER);
+
+  /* Write log entries to ring and persist */
+  utime_t now = ceph_clock_now();
+  for (auto &operation : ops) {
+    if (!entries_to_flush.empty()) {
+      /* Flush these and reset the list if the current entry wraps to the
+       * tail of the ring */
+      if (entries_to_flush.back()->get_log_entry()->log_entry_index >
+          operation->get_log_entry()->log_entry_index) {
+        ldout(m_image_ctx.cct, 20) << "entries to flush wrap around the end of the ring at "
+                                   << "operation=[" << *operation << "]" << dendl;
+        flush_op_log_entries(entries_to_flush);
+        entries_to_flush.clear();
+        now = ceph_clock_now();
+      }
+    }
+    ldout(m_image_ctx.cct, 20) << "Copying entry for operation at index="
+                               << operation->get_log_entry()->log_entry_index
+                               << " from " << &operation->get_log_entry()->ram_entry
+                               << " to " << operation->get_log_entry()->cache_entry
+                               << " operation=[" << *operation << "]" << dendl;
+    operation->log_append_start_time = now;
+    *operation->get_log_entry()->cache_entry = operation->get_log_entry()->ram_entry;
+    ldout(m_image_ctx.cct, 20) << "APPENDING: index="
+                               << operation->get_log_entry()->log_entry_index
+                               << " pmem_entry=[" << *operation->get_log_entry()->cache_entry
+                               << "]" << dendl;
+    entries_to_flush.push_back(operation);
+  }
+  flush_op_log_entries(entries_to_flush);
+
+  /* Drain once for all */
+  pmemobj_drain(m_log_pool);
+
+  /*
+   * Atomically advance the log head pointer and publish the
+   * allocations for all the data buffers they refer to.
+   */
+  utime_t tx_start = ceph_clock_now();
+  TX_BEGIN(m_log_pool) {
+    D_RW(pool_root)->first_free_entry = this->m_first_free_entry;
+    for (auto &operation : ops) {
+      if (operation->reserved_allocated()) {
+        auto write_op = (std::shared_ptr<WriteLogOperation>&) operation;
+        pmemobj_tx_publish(&write_op->buffer_alloc->buffer_alloc_action, 1);
+      } else {
+        ldout(m_image_ctx.cct, 20) << "skipping non-write op: " << *operation << dendl;
+      }
+    }
+  } TX_ONCOMMIT {
+  } TX_ONABORT {
+    lderr(cct) << "failed to commit " << ops.size()
+               << " log entries (" << this->m_log_pool_name << ")" << dendl;
+    ceph_assert(false);
+    ret = -EIO;
+  } TX_FINALLY {
+  } TX_END;
+
+  utime_t tx_end = ceph_clock_now();
+  m_perfcounter->tinc(l_librbd_pwl_append_tx_t, tx_end - tx_start);
+  m_perfcounter->hinc(
+    l_librbd_pwl_append_tx_t_hist, utime_t(tx_end - tx_start).to_nsec(), ops.size());
+  for (auto &operation : ops) {
+    operation->log_append_comp_time = tx_end;
+  }
+
+  return ret;
+}
+
+/*
+ * Flush the persistent write log entries set of ops. The entries must
+ * be contiguous in persistent memory.
+ */
+template <typename I>
+void WriteLog<I>::flush_op_log_entries(GenericLogOperationsVector &ops)
+{
+  if (ops.empty()) {
+    return;
+  }
+
+  if (ops.size() > 1) {
+    ceph_assert(ops.front()->get_log_entry()->cache_entry < ops.back()->get_log_entry()->cache_entry);
+  }
+
+  ldout(m_image_ctx.cct, 20) << "entry count=" << ops.size()
+                             << " start address="
+                             << ops.front()->get_log_entry()->cache_entry
+                             << " bytes="
+                             << ops.size() * sizeof(*(ops.front()->get_log_entry()->cache_entry))
+                             << dendl;
+  pmemobj_flush(m_log_pool,
+                ops.front()->get_log_entry()->cache_entry,
+                ops.size() * sizeof(*(ops.front()->get_log_entry()->cache_entry)));
+}
+
+template <typename I>
+void WriteLog<I>::remove_pool_file() {
+  if (m_log_pool) {
+    ldout(m_image_ctx.cct, 6) << "closing pmem pool" << dendl;
+    pmemobj_close(m_log_pool);
+  }
+  if (m_cache_state->clean) {
+      ldout(m_image_ctx.cct, 5) << "Removing empty pool file: " << this->m_log_pool_name << dendl;
+      if (remove(this->m_log_pool_name.c_str()) != 0) {
+        lderr(m_image_ctx.cct) << "failed to remove empty pool \"" << this->m_log_pool_name << "\": "
+          << pmemobj_errormsg() << dendl;
+      } else {
+        m_cache_state->present = false;
+      }
+  } else {
+    ldout(m_image_ctx.cct, 5) << "Not removing pool file: " << this->m_log_pool_name << dendl;
+  }
+}
+
+template <typename I>
+bool WriteLog<I>::initialize_pool(Context *on_finish, pwl::DeferredContexts &later) {
+  CephContext *cct = m_image_ctx.cct;
+  int r = -EINVAL;
+  TOID(struct WriteLogPoolRoot) pool_root;
+  ceph_assert(ceph_mutex_is_locked_by_me(m_lock));
+  if (access(this->m_log_pool_name.c_str(), F_OK) != 0) {
+    if ((m_log_pool =
+         pmemobj_create(this->m_log_pool_name.c_str(),
+                        this->m_pwl_pool_layout_name,
+                        this->m_log_pool_size,
+                        (S_IWUSR | S_IRUSR))) == NULL) {
+      lderr(cct) << "failed to create pool: " << this->m_log_pool_name
+                 << ". error: " << pmemobj_errormsg() << dendl;
+      m_cache_state->present = false;
+      m_cache_state->clean = true;
+      m_cache_state->empty = true;
+      /* TODO: filter/replace errnos that are meaningless to the caller */
+      on_finish->complete(-errno);
+      return false;
+    }
+    m_cache_state->present = true;
+    m_cache_state->clean = true;
+    m_cache_state->empty = true;
+    pool_root = POBJ_ROOT(m_log_pool, struct WriteLogPoolRoot);
+
+    /* new pool, calculate and store metadata */
+    size_t effective_pool_size = (size_t)(this->m_log_pool_size * USABLE_SIZE);
+    size_t small_write_size = MIN_WRITE_ALLOC_SIZE + BLOCK_ALLOC_OVERHEAD_BYTES + sizeof(struct WriteLogCacheEntry);
+    uint64_t num_small_writes = (uint64_t)(effective_pool_size / small_write_size);
+    if (num_small_writes > MAX_LOG_ENTRIES) {
+      num_small_writes = MAX_LOG_ENTRIES;
+    }
+    if (num_small_writes <= 2) {
+      lderr(cct) << "num_small_writes needs to > 2" << dendl;
+      goto err_close_pool;
+    }
+    this->m_bytes_allocated_cap = effective_pool_size;
+    /* Log ring empty */
+    m_first_free_entry = 0;
+    m_first_valid_entry = 0;
+    TX_BEGIN(m_log_pool) {
+      TX_ADD(pool_root);
+      D_RW(pool_root)->header.layout_version = RWL_LAYOUT_VERSION;
+      D_RW(pool_root)->log_entries =
+        TX_ZALLOC(struct WriteLogCacheEntry,
+                  sizeof(struct WriteLogCacheEntry) * num_small_writes);
+      D_RW(pool_root)->pool_size = this->m_log_pool_size;
+      D_RW(pool_root)->flushed_sync_gen = this->m_flushed_sync_gen;
+      D_RW(pool_root)->block_size = MIN_WRITE_ALLOC_SIZE;
+      D_RW(pool_root)->num_log_entries = num_small_writes;
+      D_RW(pool_root)->first_free_entry = m_first_free_entry;
+      D_RW(pool_root)->first_valid_entry = m_first_valid_entry;
+    } TX_ONCOMMIT {
+      this->m_total_log_entries = D_RO(pool_root)->num_log_entries;
+      this->m_free_log_entries = D_RO(pool_root)->num_log_entries - 1; // leave one free
+    } TX_ONABORT {
+      this->m_total_log_entries = 0;
+      this->m_free_log_entries = 0;
+      lderr(cct) << "failed to initialize pool: " << this->m_log_pool_name
+                 << ". pmemobj TX errno: " << pmemobj_tx_errno() << dendl;
+      r = -pmemobj_tx_errno();
+      goto err_close_pool;
+    } TX_FINALLY {
+    } TX_END;
+  } else {
+    ceph_assert(m_cache_state->present);
+    /* Open existing pool */
+    if ((m_log_pool =
+         pmemobj_open(this->m_log_pool_name.c_str(),
+                      this->m_pwl_pool_layout_name)) == NULL) {
+      lderr(cct) << "failed to open pool (" << this->m_log_pool_name << "): "
+                 << pmemobj_errormsg() << dendl;
+      on_finish->complete(-errno);
+      return false;
+    }
+    pool_root = POBJ_ROOT(m_log_pool, struct WriteLogPoolRoot);
+    if (D_RO(pool_root)->header.layout_version != RWL_LAYOUT_VERSION) {
+      // TODO: will handle upgrading version in the future
+      lderr(cct) << "pool layout version is "
+                 << D_RO(pool_root)->header.layout_version
+                 << " expected " << RWL_LAYOUT_VERSION << dendl;
+      goto err_close_pool;
+    }
+    if (D_RO(pool_root)->block_size != MIN_WRITE_ALLOC_SIZE) {
+      lderr(cct) << "pool block size is " << D_RO(pool_root)->block_size
+                 << " expected " << MIN_WRITE_ALLOC_SIZE << dendl;
+      goto err_close_pool;
+    }
+    this->m_log_pool_size = D_RO(pool_root)->pool_size;
+    this->m_flushed_sync_gen = D_RO(pool_root)->flushed_sync_gen;
+    this->m_total_log_entries = D_RO(pool_root)->num_log_entries;
+    m_first_free_entry = D_RO(pool_root)->first_free_entry;
+    m_first_valid_entry = D_RO(pool_root)->first_valid_entry;
+    if (m_first_free_entry < m_first_valid_entry) {
+      /* Valid entries wrap around the end of the ring, so first_free is lower
+       * than first_valid.  If first_valid was == first_free+1, the entry at
+       * first_free would be empty. The last entry is never used, so in
+       * that case there would be zero free log entries. */
+     this->m_free_log_entries = this->m_total_log_entries - (m_first_valid_entry - m_first_free_entry) -1;
+    } else {
+      /* first_valid is <= first_free. If they are == we have zero valid log
+       * entries, and n-1 free log entries */
+      this->m_free_log_entries = this->m_total_log_entries - (m_first_free_entry - m_first_valid_entry) -1;
+    }
+    size_t effective_pool_size = (size_t)(this->m_log_pool_size * USABLE_SIZE);
+    this->m_bytes_allocated_cap = effective_pool_size;
+    load_existing_entries(later);
+    m_cache_state->clean = this->m_dirty_log_entries.empty();
+    m_cache_state->empty = m_log_entries.empty();
+  }
+  return true;
+
+err_close_pool:
+  pmemobj_close(m_log_pool);
+  on_finish->complete(r);
+  return false;
+}
+
+/*
+ * Loads the log entries from an existing log.
+ *
+ * Creates the in-memory structures to represent the state of the
+ * re-opened log.
+ *
+ * Finds the last appended sync point, and any sync points referred to
+ * in log entries, but missing from the log. These missing sync points
+ * are created and scheduled for append. Some rudimentary consistency
+ * checking is done.
+ *
+ * Rebuilds the m_blocks_to_log_entries map, to make log entries
+ * readable.
+ *
+ * Places all writes on the dirty entries list, which causes them all
+ * to be flushed.
+ *
+ */
+
+template <typename I>
+void WriteLog<I>::load_existing_entries(DeferredContexts &later) {
+  TOID(struct WriteLogPoolRoot) pool_root;
+  pool_root = POBJ_ROOT(m_log_pool, struct WriteLogPoolRoot);
+  struct WriteLogCacheEntry *pmem_log_entries = D_RW(D_RW(pool_root)->log_entries);
+  uint64_t entry_index = m_first_valid_entry;
+  /* The map below allows us to find sync point log entries by sync
+   * gen number, which is necessary so write entries can be linked to
+   * their sync points. */
+  std::map<uint64_t, std::shared_ptr<SyncPointLogEntry>> sync_point_entries;
+  /* The map below tracks sync points referred to in writes but not
+   * appearing in the sync_point_entries map.  We'll use this to
+   * determine which sync points are missing and need to be
+   * created. */
+  std::map<uint64_t, bool> missing_sync_points;
+
+  /*
+   * Read the existing log entries. Construct an in-memory log entry
+   * object of the appropriate type for each. Add these to the global
+   * log entries list.
+   *
+   * Write entries will not link to their sync points yet. We'll do
+   * that in the next pass. Here we'll accumulate a map of sync point
+   * gen numbers that are referred to in writes but do not appearing in
+   * the log.
+   */
+  while (entry_index != m_first_free_entry) {
+    WriteLogCacheEntry *pmem_entry = &pmem_log_entries[entry_index];
+    std::shared_ptr<GenericLogEntry> log_entry = nullptr;
+    ceph_assert(pmem_entry->entry_index == entry_index);
+
+    this->update_entries(&log_entry, pmem_entry, missing_sync_points,
+        sync_point_entries, entry_index);
+
+    log_entry->ram_entry = *pmem_entry;
+    log_entry->cache_entry = pmem_entry;
+    log_entry->log_entry_index = entry_index;
+    log_entry->completed = true;
+
+    m_log_entries.push_back(log_entry);
+
+    entry_index = (entry_index + 1) % this->m_total_log_entries;
+  }
+
+  this->update_sync_points(missing_sync_points, sync_point_entries, later);
+}
+
+template <typename I>
+void WriteLog<I>::inc_allocated_cached_bytes(
+    std::shared_ptr<pwl::GenericLogEntry> log_entry) {
+  if (log_entry->is_write_entry()) {
+    this->m_bytes_allocated += std::max(log_entry->write_bytes(), MIN_WRITE_ALLOC_SIZE);
+    this->m_bytes_cached += log_entry->write_bytes();
+  }
+}
+
+template <typename I>
+void WriteLog<I>::write_data_to_buffer(
+    std::shared_ptr<pwl::WriteLogEntry> ws_entry,
+    WriteLogCacheEntry *pmem_entry) {
+  ws_entry->cache_buffer = D_RW(pmem_entry->write_data);
+}
+
+/**
+ * Retire up to MAX_ALLOC_PER_TRANSACTION of the oldest log entries
+ * that are eligible to be retired. Returns true if anything was
+ * retired.
+ */
+template <typename I>
+bool WriteLog<I>::retire_entries(const unsigned long int frees_per_tx) {
+  CephContext *cct = m_image_ctx.cct;
+  GenericLogEntriesVector retiring_entries;
+  uint32_t initial_first_valid_entry;
+  uint32_t first_valid_entry;
+
+  std::lock_guard retire_locker(this->m_log_retire_lock);
+  ldout(cct, 20) << "Look for entries to retire" << dendl;
+  {
+    /* Entry readers can't be added while we hold m_entry_reader_lock */
+    RWLock::WLocker entry_reader_locker(this->m_entry_reader_lock);
+    std::lock_guard locker(m_lock);
+    initial_first_valid_entry = this->m_first_valid_entry;
+    first_valid_entry = this->m_first_valid_entry;
+    while (!m_log_entries.empty() && retiring_entries.size() < frees_per_tx &&
+           this->can_retire_entry(m_log_entries.front())) {
+      auto entry = m_log_entries.front();
+      if (entry->log_entry_index != first_valid_entry) {
+        lderr(cct) << "retiring entry index (" << entry->log_entry_index
+                   << ") and first valid log entry index (" << first_valid_entry
+                   << ") must be ==." << dendl;
+      }
+      ceph_assert(entry->log_entry_index == first_valid_entry);
+      first_valid_entry = (first_valid_entry + 1) % this->m_total_log_entries;
+      m_log_entries.pop_front();
+      retiring_entries.push_back(entry);
+      /* Remove entry from map so there will be no more readers */
+      if ((entry->write_bytes() > 0) || (entry->bytes_dirty() > 0)) {
+        auto gen_write_entry = static_pointer_cast<GenericWriteLogEntry>(entry);
+        if (gen_write_entry) {
+          this->m_blocks_to_log_entries.remove_log_entry(gen_write_entry);
+        }
+      }
+    }
+  }
+
+  if (retiring_entries.size()) {
+    ldout(cct, 20) << "Retiring " << retiring_entries.size() << " entries" << dendl;
+    TOID(struct WriteLogPoolRoot) pool_root;
+    pool_root = POBJ_ROOT(m_log_pool, struct WriteLogPoolRoot);
+
+    utime_t tx_start;
+    utime_t tx_end;
+    /* Advance first valid entry and release buffers */
+    {
+      uint64_t flushed_sync_gen;
+      std::lock_guard append_locker(this->m_log_append_lock);
+      {
+        std::lock_guard locker(m_lock);
+        flushed_sync_gen = this->m_flushed_sync_gen;
+      }
+
+      tx_start = ceph_clock_now();
+      TX_BEGIN(m_log_pool) {
+        if (D_RO(pool_root)->flushed_sync_gen < flushed_sync_gen) {
+          ldout(m_image_ctx.cct, 20) << "flushed_sync_gen in log updated from "
+                                     << D_RO(pool_root)->flushed_sync_gen << " to "
+                                     << flushed_sync_gen << dendl;
+          D_RW(pool_root)->flushed_sync_gen = flushed_sync_gen;
+        }
+        D_RW(pool_root)->first_valid_entry = first_valid_entry;
+        for (auto &entry: retiring_entries) {
+          if (entry->write_bytes()) {
+            ldout(cct, 20) << "Freeing " << entry->ram_entry.write_data.oid.pool_uuid_lo
+                           << "." << entry->ram_entry.write_data.oid.off << dendl;
+            TX_FREE(entry->ram_entry.write_data);
+          } else {
+            ldout(cct, 20) << "Retiring non-write: " << *entry << dendl;
+          }
+        }
+      } TX_ONCOMMIT {
+      } TX_ONABORT {
+        lderr(cct) << "failed to commit free of" << retiring_entries.size()
+                   << " log entries (" << this->m_log_pool_name << ")" << dendl;
+        ceph_assert(false);
+      } TX_FINALLY {
+      } TX_END;
+      tx_end = ceph_clock_now();
+    }
+    m_perfcounter->tinc(l_librbd_pwl_retire_tx_t, tx_end - tx_start);
+    m_perfcounter->hinc(l_librbd_pwl_retire_tx_t_hist, utime_t(tx_end - tx_start).to_nsec(),
+        retiring_entries.size());
+
+    bool need_update_state = false;
+    /* Update runtime copy of first_valid, and free entries counts */
+    {
+      std::lock_guard locker(m_lock);
+
+      ceph_assert(this->m_first_valid_entry == initial_first_valid_entry);
+      this->m_first_valid_entry = first_valid_entry;
+      this->m_free_log_entries += retiring_entries.size();
+      if (!m_cache_state->empty && m_log_entries.empty()) {
+        m_cache_state->empty = true;
+        this->update_image_cache_state();
+        need_update_state = true;
+      }
+      for (auto &entry: retiring_entries) {
+        if (entry->write_bytes()) {
+          ceph_assert(this->m_bytes_cached >= entry->write_bytes());
+          this->m_bytes_cached -= entry->write_bytes();
+          uint64_t entry_allocation_size = entry->write_bytes();
+          if (entry_allocation_size < MIN_WRITE_ALLOC_SIZE) {
+            entry_allocation_size = MIN_WRITE_ALLOC_SIZE;
+          }
+          ceph_assert(this->m_bytes_allocated >= entry_allocation_size);
+          this->m_bytes_allocated -= entry_allocation_size;
+        }
+      }
+      this->m_alloc_failed_since_retire = false;
+      this->wake_up();
+    }
+    if (need_update_state) {
+      std::unique_lock locker(m_lock);
+      this->write_image_cache_state(locker);
+    }
+  } else {
+    ldout(cct, 20) << "Nothing to retire" << dendl;
+    return false;
+  }
+  return true;
+}
+
+template <typename I>
+void WriteLog<I>::construct_flush_entries(pwl::GenericLogEntries entries_to_flush,
+				          DeferredContexts &post_unlock,
+					  bool has_write_entry) {
+  bool invalidating = this->m_invalidating; // snapshot so we behave consistently
+
+  for (auto &log_entry : entries_to_flush) {
+    GuardedRequestFunctionContext *guarded_ctx =
+      new GuardedRequestFunctionContext([this, log_entry, invalidating]
+        (GuardedRequestFunctionContext &guard_ctx) {
+          log_entry->m_cell = guard_ctx.cell;
+          Context *ctx = this->construct_flush_entry(log_entry, invalidating);
+
+	  if (!invalidating) {
+	    ctx = new LambdaContext(
+	      [this, log_entry, ctx](int r) {
+	      m_image_ctx.op_work_queue->queue(new LambdaContext(
+	        [this, log_entry, ctx](int r) {
+		  ldout(m_image_ctx.cct, 15) << "flushing:" << log_entry
+		                             << " " << *log_entry << dendl;
+		  log_entry->writeback(this->m_image_writeback, ctx);
+	      }), 0);
+	  });
+	}
+
+	ctx->complete(0);
+    });
+   this->detain_flush_guard_request(log_entry, guarded_ctx);
+  }
+}
+
+const unsigned long int ops_flushed_together = 4;
+/*
+ * Performs the pmem buffer flush on all scheduled ops, then schedules
+ * the log event append operation for all of them.
+ */
+template <typename I>
+void WriteLog<I>::flush_then_append_scheduled_ops(void)
+{
+  GenericLogOperations ops;
+  bool ops_remain = false;
+  ldout(m_image_ctx.cct, 20) << dendl;
+  do {
+    {
+      ops.clear();
+      std::lock_guard locker(m_lock);
+      if (m_ops_to_flush.size()) {
+        auto last_in_batch = m_ops_to_flush.begin();
+        unsigned int ops_to_flush = m_ops_to_flush.size();
+        if (ops_to_flush > ops_flushed_together) {
+          ops_to_flush = ops_flushed_together;
+        }
+        ldout(m_image_ctx.cct, 20) << "should flush " << ops_to_flush << dendl;
+        std::advance(last_in_batch, ops_to_flush);
+        ops.splice(ops.end(), m_ops_to_flush, m_ops_to_flush.begin(), last_in_batch);
+        ops_remain = !m_ops_to_flush.empty();
+        ldout(m_image_ctx.cct, 20) << "flushing " << ops.size() << ", remain "
+                                   << m_ops_to_flush.size() << dendl;
+      } else {
+        ops_remain = false;
+      }
+    }
+    if (ops_remain) {
+      enlist_op_flusher();
+    }
+
+    /* Ops subsequently scheduled for flush may finish before these,
+     * which is fine. We're unconcerned with completion order until we
+     * get to the log message append step. */
+    if (ops.size()) {
+      flush_pmem_buffer(ops);
+      schedule_append_ops(ops, nullptr);
+    }
+  } while (ops_remain);
+  append_scheduled_ops();
+}
+
+/*
+ * Performs the log event append operation for all of the scheduled
+ * events.
+ */
+template <typename I>
+void WriteLog<I>::append_scheduled_ops(void) {
+  GenericLogOperations ops;
+  int append_result = 0;
+  bool ops_remain = false;
+  bool appending = false; /* true if we set m_appending */
+  ldout(m_image_ctx.cct, 20) << dendl;
+  do {
+    ops.clear();
+    this->append_scheduled(ops, ops_remain, appending, true);
+
+    if (ops.size()) {
+      std::lock_guard locker(this->m_log_append_lock);
+      alloc_op_log_entries(ops);
+      append_result = append_op_log_entries(ops);
+    }
+
+    int num_ops = ops.size();
+    if (num_ops) {
+      /* New entries may be flushable. Completion will wake up flusher. */
+      this->complete_op_log_entries(std::move(ops), append_result);
+    }
+  } while (ops_remain);
+}
+
+template <typename I>
+void WriteLog<I>::enlist_op_flusher()
+{
+  this->m_async_flush_ops++;
+  this->m_async_op_tracker.start_op();
+  Context *flush_ctx = new LambdaContext([this](int r) {
+      flush_then_append_scheduled_ops();
+      this->m_async_flush_ops--;
+      this->m_async_op_tracker.finish_op();
+    });
+  this->m_work_queue.queue(flush_ctx);
+}
+
+template <typename I>
+void WriteLog<I>::setup_schedule_append(
+    pwl::GenericLogOperationsVector &ops, bool do_early_flush,
+    C_BlockIORequestT *req) {
+  if (do_early_flush) {
+    /* This caller is waiting for persist, so we'll use their thread to
+     * expedite it */
+    flush_pmem_buffer(ops);
+    this->schedule_append(ops);
+  } else {
+    /* This is probably not still the caller's thread, so do the payload
+     * flushing/replicating later. */
+    schedule_flush_and_append(ops);
+  }
+}
+
+/*
+ * Takes custody of ops. They'll all get their log entries appended,
+ * and have their on_write_persist contexts completed once they and
+ * all prior log entries are persisted everywhere.
+ */
+template <typename I>
+void WriteLog<I>::schedule_append_ops(GenericLogOperations &ops, C_BlockIORequestT *req)
+{
+  bool need_finisher;
+  GenericLogOperationsVector appending;
+
+  std::copy(std::begin(ops), std::end(ops), std::back_inserter(appending));
+  {
+    std::lock_guard locker(m_lock);
+
+    need_finisher = this->m_ops_to_append.empty() && !this->m_appending;
+    this->m_ops_to_append.splice(this->m_ops_to_append.end(), ops);
+  }
+
+  if (need_finisher) {
+    //enlist op appender
+    this->m_async_append_ops++;
+    this->m_async_op_tracker.start_op();
+    Context *append_ctx = new LambdaContext([this](int r) {
+        append_scheduled_ops();
+        this->m_async_append_ops--;
+        this->m_async_op_tracker.finish_op();
+        });
+    this->m_work_queue.queue(append_ctx);
+  }
+
+  for (auto &op : appending) {
+    op->appending();
+  }
+}
+
+/*
+ * Takes custody of ops. They'll all get their pmem blocks flushed,
+ * then get their log entries appended.
+ */
+template <typename I>
+void WriteLog<I>::schedule_flush_and_append(GenericLogOperationsVector &ops)
+{
+  GenericLogOperations to_flush(ops.begin(), ops.end());
+  bool need_finisher;
+  ldout(m_image_ctx.cct, 20) << dendl;
+  {
+    std::lock_guard locker(m_lock);
+
+    need_finisher = m_ops_to_flush.empty();
+    m_ops_to_flush.splice(m_ops_to_flush.end(), to_flush);
+  }
+
+  if (need_finisher) {
+    enlist_op_flusher();
+  }
+}
+
+template <typename I>
+void WriteLog<I>::process_work() {
+  CephContext *cct = m_image_ctx.cct;
+  int max_iterations = 4;
+  bool wake_up_requested = false;
+  uint64_t aggressive_high_water_bytes = this->m_bytes_allocated_cap * AGGRESSIVE_RETIRE_HIGH_WATER;
+  uint64_t high_water_bytes = this->m_bytes_allocated_cap * RETIRE_HIGH_WATER;
+  uint64_t low_water_bytes = this->m_bytes_allocated_cap * RETIRE_LOW_WATER;
+  uint64_t aggressive_high_water_entries = this->m_total_log_entries * AGGRESSIVE_RETIRE_HIGH_WATER;
+  uint64_t high_water_entries = this->m_total_log_entries * RETIRE_HIGH_WATER;
+  uint64_t low_water_entries = this->m_total_log_entries * RETIRE_LOW_WATER;
+
+  ldout(cct, 20) << dendl;
+
+  do {
+    {
+      std::lock_guard locker(m_lock);
+      this->m_wake_up_requested = false;
+    }
+    if (this->m_alloc_failed_since_retire || this->m_invalidating ||
+        this->m_bytes_allocated > high_water_bytes ||
+        (m_log_entries.size() > high_water_entries)) {
+      int retired = 0;
+      utime_t started = ceph_clock_now();
+      ldout(m_image_ctx.cct, 10) << "alloc_fail=" << this->m_alloc_failed_since_retire
+                                 << ", allocated > high_water="
+                                 << (this->m_bytes_allocated > high_water_bytes)
+                                 << ", allocated_entries > high_water="
+                                 << (m_log_entries.size() > high_water_entries)
+                                 << dendl;
+      while (this->m_alloc_failed_since_retire || this->m_invalidating ||
+            (this->m_bytes_allocated > high_water_bytes) ||
+            (m_log_entries.size() > high_water_entries) ||
+            (((this->m_bytes_allocated > low_water_bytes) ||
+              (m_log_entries.size() > low_water_entries)) &&
+            (utime_t(ceph_clock_now() - started).to_msec() < RETIRE_BATCH_TIME_LIMIT_MS))) {
+        if (!retire_entries((this->m_shutting_down || this->m_invalidating ||
+           (this->m_bytes_allocated > aggressive_high_water_bytes) ||
+           (m_log_entries.size() > aggressive_high_water_entries) ||
+           this->m_alloc_failed_since_retire)
+            ? MAX_ALLOC_PER_TRANSACTION
+            : MAX_FREE_PER_TRANSACTION)) {
+          break;
+        }
+        retired++;
+        this->dispatch_deferred_writes();
+        this->process_writeback_dirty_entries();
+      }
+      ldout(m_image_ctx.cct, 10) << "Retired " << retired << " times" << dendl;
+    }
+    this->dispatch_deferred_writes();
+    this->process_writeback_dirty_entries();
+
+    {
+      std::lock_guard locker(m_lock);
+      wake_up_requested = this->m_wake_up_requested;
+    }
+  } while (wake_up_requested && --max_iterations > 0);
+
+  {
+    std::lock_guard locker(m_lock);
+    this->m_wake_up_scheduled = false;
+    /* Reschedule if it's still requested */
+    if (this->m_wake_up_requested) {
+      this->wake_up();
+    }
+  }
+}
+
+/*
+ * Flush the pmem regions for the data blocks of a set of operations
+ *
+ * V is expected to be GenericLogOperations<I>, or GenericLogOperationsVector<I>
+ */
+template <typename I>
+template <typename V>
+void WriteLog<I>::flush_pmem_buffer(V& ops)
+{
+  utime_t now = ceph_clock_now();
+  for (auto &operation : ops) {
+    if (operation->reserved_allocated()) {
+      operation->buf_persist_start_time = now;
+    } else {
+      ldout(m_image_ctx.cct, 20) << "skipping non-write op: "
+                                 << *operation << dendl;
+    }
+  }
+
+  for (auto &operation : ops) {
+    if(operation->is_writing_op()) {
+      auto log_entry = static_pointer_cast<WriteLogEntry>(operation->get_log_entry());
+      pmemobj_flush(m_log_pool, log_entry->cache_buffer, log_entry->write_bytes());
+    }
+  }
+
+  /* Drain once for all */
+  pmemobj_drain(m_log_pool);
+
+  now = ceph_clock_now();
+  for (auto &operation : ops) {
+    if (operation->reserved_allocated()) {
+      operation->buf_persist_comp_time = now;
+    } else {
+      ldout(m_image_ctx.cct, 20) << "skipping non-write op: "
+                                 << *operation << dendl;
+    }
+  }
+}
+
+/**
+ * Update/persist the last flushed sync point in the log
+ */
+template <typename I>
+void WriteLog<I>::persist_last_flushed_sync_gen()
+{
+  TOID(struct WriteLogPoolRoot) pool_root;
+  pool_root = POBJ_ROOT(m_log_pool, struct WriteLogPoolRoot);
+  uint64_t flushed_sync_gen;
+
+  std::lock_guard append_locker(this->m_log_append_lock);
+  {
+    std::lock_guard locker(m_lock);
+    flushed_sync_gen = this->m_flushed_sync_gen;
+  }
+
+  if (D_RO(pool_root)->flushed_sync_gen < flushed_sync_gen) {
+    ldout(m_image_ctx.cct, 15) << "flushed_sync_gen in log updated from "
+                               << D_RO(pool_root)->flushed_sync_gen << " to "
+                               << flushed_sync_gen << dendl;
+    TX_BEGIN(m_log_pool) {
+      D_RW(pool_root)->flushed_sync_gen = flushed_sync_gen;
+    } TX_ONCOMMIT {
+    } TX_ONABORT {
+      lderr(m_image_ctx.cct) << "failed to commit update of flushed sync point" << dendl;
+      ceph_assert(false);
+    } TX_FINALLY {
+    } TX_END;
+  }
+}
+
+template <typename I>
+void WriteLog<I>::reserve_cache(C_BlockIORequestT *req,
+                                         bool &alloc_succeeds, bool &no_space) {
+  std::vector<WriteBufferAllocation>& buffers = req->get_resources_buffers();
+  for (auto &buffer : buffers) {
+    utime_t before_reserve = ceph_clock_now();
+    buffer.buffer_oid = pmemobj_reserve(m_log_pool,
+                                        &buffer.buffer_alloc_action,
+                                        buffer.allocation_size,
+                                        0 /* Object type */);
+    buffer.allocation_lat = ceph_clock_now() - before_reserve;
+    if (TOID_IS_NULL(buffer.buffer_oid)) {
+      ldout(m_image_ctx.cct, 5) << "can't allocate all data buffers: "
+                                << pmemobj_errormsg() << ". "
+                                << *req << dendl;
+      alloc_succeeds = false;
+      no_space = true; /* Entries need to be retired */
+
+      if (this->m_free_log_entries == this->m_total_log_entries - 1) {
+        /* When the cache is empty, there is still no space to allocate.
+         * Defragment. */
+        pmemobj_defrag(m_log_pool, NULL, 0, NULL);
+      }
+      break;
+    } else {
+      buffer.allocated = true;
+    }
+    ldout(m_image_ctx.cct, 20) << "Allocated " << buffer.buffer_oid.oid.pool_uuid_lo
+                               << "." << buffer.buffer_oid.oid.off
+                               << ", size=" << buffer.allocation_size << dendl;
+  }
+}
+
+template<typename I>
+void WriteLog<I>::copy_bl_to_buffer(
+    WriteRequestResources *resources, std::unique_ptr<WriteLogOperationSet> &op_set) {
+  auto allocation = resources->buffers.begin();
+  for (auto &operation : op_set->operations) {
+    operation->copy_bl_to_cache_buffer(allocation);
+    allocation++;
+  }
+}
+
+template <typename I>
+bool WriteLog<I>::alloc_resources(C_BlockIORequestT *req) {
+  bool alloc_succeeds = true;
+  uint64_t bytes_allocated = 0;
+  uint64_t bytes_cached = 0;
+  uint64_t bytes_dirtied = 0;
+  uint64_t num_lanes = 0;
+  uint64_t num_unpublished_reserves = 0;
+  uint64_t num_log_entries = 0;
+
+  ldout(m_image_ctx.cct, 20) << dendl;
+  // Setup buffer, and get all the number of required resources
+  req->setup_buffer_resources(&bytes_cached, &bytes_dirtied, &bytes_allocated,
+                              &num_lanes, &num_log_entries, &num_unpublished_reserves);
+
+  alloc_succeeds = this->check_allocation(req, bytes_cached, bytes_dirtied,
+                                          bytes_allocated, num_lanes, num_log_entries,
+                                          num_unpublished_reserves);
+
+  std::vector<WriteBufferAllocation>& buffers = req->get_resources_buffers();
+  if (!alloc_succeeds) {
+    /* On alloc failure, free any buffers we did allocate */
+    for (auto &buffer : buffers) {
+      if (buffer.allocated) {
+        pmemobj_cancel(m_log_pool, &buffer.buffer_alloc_action, 1);
+      }
+    }
+  }
+
+  req->set_allocated(alloc_succeeds);
+  return alloc_succeeds;
+}
+
+template <typename I>
+void WriteLog<I>::complete_user_request(Context *&user_req, int r) {
+  user_req->complete(r);
+  // Set user_req as null as it is deleted
+  user_req = nullptr;
+}
+
+} // namespace rwl
+} // namespace pwl
+} // namespace cache
+} // namespace librbd
+
+template class librbd::cache::pwl::rwl::WriteLog<librbd::ImageCtx>;
diff --git a/src/librbd/cache/pwl/rwl/WriteLog.h b/src/librbd/cache/pwl/rwl/WriteLog.h
new file mode 100644
index 000000000..5083a2568
--- /dev/null
+++ b/src/librbd/cache/pwl/rwl/WriteLog.h
@@ -0,0 +1,124 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_LIBRBD_CACHE_REPLICATED_WRITE_LOG
+#define CEPH_LIBRBD_CACHE_REPLICATED_WRITE_LOG
+
+#include <functional>
+#include <libpmemobj.h>
+#include <list>
+#include "common/Timer.h"
+#include "common/RWLock.h"
+#include "common/WorkQueue.h"
+#include "common/AsyncOpTracker.h"
+#include "librbd/cache/ImageWriteback.h"
+#include "librbd/Utils.h"
+#include "librbd/BlockGuard.h"
+#include "librbd/cache/Types.h"
+#include "librbd/cache/pwl/AbstractWriteLog.h"
+#include "librbd/cache/pwl/LogMap.h"
+#include "librbd/cache/pwl/LogOperation.h"
+#include "librbd/cache/pwl/Request.h"
+#include "librbd/cache/pwl/rwl/Builder.h"
+
+class Context;
+
+namespace librbd {
+
+struct ImageCtx;
+
+namespace cache {
+namespace pwl {
+namespace rwl {
+
+template <typename ImageCtxT>
+class WriteLog : public AbstractWriteLog<ImageCtxT> {
+public:
+  WriteLog(
+      ImageCtxT &image_ctx, librbd::cache::pwl::ImageCacheState<ImageCtxT>* cache_state,
+      ImageWritebackInterface& image_writeback,
+      plugin::Api<ImageCtxT>& plugin_api);
+  ~WriteLog();
+  WriteLog(const WriteLog&) = delete;
+  WriteLog &operator=(const WriteLog&) = delete;
+
+  typedef io::Extent Extent;
+  using This = AbstractWriteLog<ImageCtxT>;
+  using C_WriteRequestT = pwl::C_WriteRequest<This>;
+  using C_WriteSameRequestT = pwl::C_WriteSameRequest<This>;
+
+  void copy_bl_to_buffer(
+      WriteRequestResources *resources, std::unique_ptr<WriteLogOperationSet> &op_set) override;
+  void complete_user_request(Context *&user_req, int r) override;
+private:
+  using C_BlockIORequestT = pwl::C_BlockIORequest<This>;
+  using C_FlushRequestT = pwl::C_FlushRequest<This>;
+  using C_DiscardRequestT = pwl::C_DiscardRequest<This>;
+
+  PMEMobjpool *m_log_pool = nullptr;
+  Builder<This> *m_builderobj;
+  const char* m_pwl_pool_layout_name;
+  const uint64_t MAX_EXTENT_SIZE = 1048576;
+
+  Builder<This>* create_builder();
+  void remove_pool_file();
+  void load_existing_entries(pwl::DeferredContexts &later);
+  void alloc_op_log_entries(pwl::GenericLogOperations &ops);
+  int append_op_log_entries(pwl::GenericLogOperations &ops);
+  void flush_then_append_scheduled_ops(void);
+  void enlist_op_flusher();
+  void flush_op_log_entries(pwl::GenericLogOperationsVector &ops);
+  template <typename V>
+  void flush_pmem_buffer(V& ops);
+  void inc_allocated_cached_bytes(
+      std::shared_ptr<pwl::GenericLogEntry> log_entry) override;
+protected:
+  using AbstractWriteLog<ImageCtxT>::m_lock;
+  using AbstractWriteLog<ImageCtxT>::m_log_entries;
+  using AbstractWriteLog<ImageCtxT>::m_image_ctx;
+  using AbstractWriteLog<ImageCtxT>::m_perfcounter;
+  using AbstractWriteLog<ImageCtxT>::m_ops_to_flush;
+  using AbstractWriteLog<ImageCtxT>::m_cache_state;
+  using AbstractWriteLog<ImageCtxT>::m_first_free_entry;
+  using AbstractWriteLog<ImageCtxT>::m_first_valid_entry;
+
+  void process_work() override;
+  void schedule_append_ops(pwl::GenericLogOperations &ops, C_BlockIORequestT *req) override;
+  void append_scheduled_ops(void) override;
+  void reserve_cache(C_BlockIORequestT *req,
+                     bool &alloc_succeeds, bool &no_space) override;
+  void collect_read_extents(
+      uint64_t read_buffer_offset, LogMapEntry<GenericWriteLogEntry> map_entry,
+      std::vector<std::shared_ptr<GenericWriteLogEntry>> &log_entries_to_read,
+      std::vector<bufferlist*> &bls_to_read, uint64_t entry_hit_length,
+      Extent hit_extent, pwl::C_ReadRequest *read_ctx) override;
+  void complete_read(
+      std::vector<std::shared_ptr<GenericWriteLogEntry>> &log_entries_to_read,
+      std::vector<bufferlist*> &bls_to_read, Context *ctx) override;
+  bool retire_entries(const unsigned long int frees_per_tx) override;
+  void persist_last_flushed_sync_gen() override;
+  bool alloc_resources(C_BlockIORequestT *req) override;
+  void schedule_flush_and_append(pwl::GenericLogOperationsVector &ops) override;
+  void setup_schedule_append(
+      pwl::GenericLogOperationsVector &ops, bool do_early_flush,
+      C_BlockIORequestT *req) override;
+  void construct_flush_entries(pwl::GenericLogEntries entries_to_flush,
+				DeferredContexts &post_unlock,
+				bool has_write_entry) override;
+  bool initialize_pool(Context *on_finish, pwl::DeferredContexts &later) override;
+  void write_data_to_buffer(
+      std::shared_ptr<pwl::WriteLogEntry> ws_entry,
+      pwl::WriteLogCacheEntry *pmem_entry) override;
+  uint64_t get_max_extent() override {
+    return MAX_EXTENT_SIZE;
+  }
+};
+
+} // namespace rwl
+} // namespace pwl
+} // namespace cache
+} // namespace librbd
+
+extern template class librbd::cache::pwl::rwl::WriteLog<librbd::ImageCtx>;
+
+#endif // CEPH_LIBRBD_CACHE_REPLICATED_WRITE_LOG