1 files changed, 1542 insertions, 0 deletions
diff --git a/src/librbd/migration/QCOWFormat.cc b/src/librbd/migration/QCOWFormat.cc
new file mode 100644
index 000000000..7bd4a5ef7
--- /dev/null
+++ b/src/librbd/migration/QCOWFormat.cc
@@ -0,0 +1,1542 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "librbd/migration/QCOWFormat.h"
+#include "common/Clock.h"
+#include "common/dout.h"
+#include "common/errno.h"
+#include "include/intarith.h"
+#include "librbd/AsioEngine.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/ImageState.h"
+#include "librbd/Utils.h"
+#include "librbd/io/AioCompletion.h"
+#include "librbd/io/ReadResult.h"
+#include "librbd/migration/SnapshotInterface.h"
+#include "librbd/migration/SourceSpecBuilder.h"
+#include "librbd/migration/StreamInterface.h"
+#include "librbd/migration/Utils.h"
+#include <boost/asio/dispatch.hpp>
+#include <boost/asio/post.hpp>
+#include <deque>
+#include <tuple>
+#include <unordered_map>
+#include <vector>
+
+#define dout_subsys ceph_subsys_rbd
+
+namespace librbd {
+namespace migration {
+
+#undef dout_prefix
+#define dout_prefix *_dout << "librbd::migration::QCOWFormat: " \
+                           << __func__ << ": "
+
+namespace qcow_format {
+
+struct ClusterExtent {
+  uint64_t cluster_offset;
+  uint64_t cluster_length;
+  uint64_t intra_cluster_offset;
+  uint64_t image_offset;
+  uint64_t buffer_offset;
+
+  ClusterExtent(uint64_t cluster_offset, uint64_t cluster_length,
+                uint64_t intra_cluster_offset, uint64_t image_offset,
+                uint64_t buffer_offset)
+    : cluster_offset(cluster_offset), cluster_length(cluster_length),
+      intra_cluster_offset(intra_cluster_offset), image_offset(image_offset),
+      buffer_offset(buffer_offset) {
+  }
+};
+
+typedef std::vector<ClusterExtent> ClusterExtents;
+
+void LookupTable::init() {
+  if (cluster_offsets == nullptr) {
+    cluster_offsets = reinterpret_cast<uint64_t*>(bl.c_str());
+  }
+}
+
+void LookupTable::decode() {
+  init();
+
+  // L2 tables are selectively byte-swapped on demand if only requesting a
+  // single cluster offset
+  if (decoded) {
+    return;
+  }
+
+  // translate the lookup table (big-endian -> CPU endianess)
+  for (auto idx = 0UL; idx < size; ++idx) {
+    cluster_offsets[idx] = be64toh(cluster_offsets[idx]);
+  }
+
+  decoded = true;
+}
+
+void populate_cluster_extents(CephContext* cct, uint64_t cluster_size,
+                              const io::Extents& image_extents,
+                              ClusterExtents* cluster_extents) {
+  uint64_t buffer_offset = 0;
+  for (auto [image_offset, image_length] : image_extents) {
+    while (image_length > 0) {
+      auto intra_cluster_offset = image_offset & (cluster_size - 1);
+      auto intra_cluster_length = cluster_size - intra_cluster_offset;
+      auto cluster_length = std::min(image_length, intra_cluster_length);
+
+      ldout(cct, 20) << "image_offset=" << image_offset << ", "
+                     << "image_length=" << image_length << ", "
+                     << "cluster_length=" << cluster_length << dendl;
+
+
+      cluster_extents->emplace_back(0, cluster_length, intra_cluster_offset,
+                                   image_offset, buffer_offset);
+
+      image_offset += cluster_length;
+      image_length -= cluster_length;
+      buffer_offset += cluster_length;
+    }
+  }
+}
+
+} // namespace qcow_format
+
+using namespace qcow_format;
+
+template <typename I>
+struct QCOWFormat<I>::Cluster {
+  const uint64_t cluster_offset;
+  bufferlist cluster_data_bl;
+
+  Cluster(uint64_t cluster_offset) : cluster_offset(cluster_offset) {
+  }
+};
+
+#undef dout_prefix
+#define dout_prefix *_dout << "librbd::migration::QCOWFormat::ClusterCache: " \
+                           << this << " " << __func__ << ": "
+
+template <typename I>
+class QCOWFormat<I>::ClusterCache {
+public:
+  ClusterCache(QCOWFormat* qcow_format)
+    : qcow_format(qcow_format),
+      m_strand(*qcow_format->m_image_ctx->asio_engine) {
+  }
+
+  void get_cluster(uint64_t cluster_offset, uint64_t cluster_length,
+                   uint64_t intra_cluster_offset, bufferlist* bl,
+                   Context* on_finish) {
+    auto cct = qcow_format->m_image_ctx->cct;
+    ldout(cct, 20) << "cluster_offset=" << cluster_offset << dendl;
+
+    // cache state machine runs in a single strand thread
+    boost::asio::dispatch(
+      m_strand,
+      [this, cluster_offset, cluster_length, intra_cluster_offset, bl,
+       on_finish]() {
+        execute_get_cluster(cluster_offset, cluster_length,
+                            intra_cluster_offset, bl, on_finish);
+      });
+  }
+
+private:
+  typedef std::tuple<uint64_t, uint64_t, bufferlist*, Context*> Completion;
+  typedef std::list<Completion> Completions;
+
+  QCOWFormat* qcow_format;
+  boost::asio::io_context::strand m_strand;
+
+  std::shared_ptr<Cluster> cluster;
+  std::unordered_map<uint64_t, Completions> cluster_completions;
+
+  void execute_get_cluster(uint64_t cluster_offset, uint64_t cluster_length,
+                           uint64_t intra_cluster_offset, bufferlist* bl,
+                           Context* on_finish) {
+    auto cct = qcow_format->m_image_ctx->cct;
+    ldout(cct, 20) << "cluster_offset=" << cluster_offset << dendl;
+
+    if (cluster && cluster->cluster_offset == cluster_offset) {
+      // most-recent cluster matches
+      bl->substr_of(cluster->cluster_data_bl, intra_cluster_offset,
+                    cluster_length);
+      boost::asio::post(*qcow_format->m_image_ctx->asio_engine,
+                        [on_finish]() { on_finish->complete(0); });
+      return;
+    }
+
+    // record callback for cluster
+    bool new_request = (cluster_completions.count(cluster_offset) == 0);
+    cluster_completions[cluster_offset].emplace_back(
+      intra_cluster_offset, cluster_length, bl, on_finish);
+    if (new_request) {
+      // start the new read request
+      read_cluster(std::make_shared<Cluster>(cluster_offset));
+    }
+  }
+
+  void read_cluster(std::shared_ptr<Cluster> cluster) {
+    auto cct = qcow_format->m_image_ctx->cct;
+
+    uint64_t stream_offset = cluster->cluster_offset;
+    uint64_t stream_length = qcow_format->m_cluster_size;
+    if ((cluster->cluster_offset & QCOW_OFLAG_COMPRESSED) != 0) {
+      // compressed clusters encode the compressed length in the lower bits
+      stream_offset = cluster->cluster_offset &
+                      qcow_format->m_cluster_offset_mask;
+      stream_length = (cluster->cluster_offset >>
+                        (63 - qcow_format->m_cluster_bits)) &
+                      (qcow_format->m_cluster_size - 1);
+    }
+
+    ldout(cct, 20) << "cluster_offset=" << cluster->cluster_offset << ", "
+                   << "stream_offset=" << stream_offset << ", "
+                   << "stream_length=" << stream_length << dendl;
+
+    // read the cluster into the cache entry
+    auto ctx = new LambdaContext([this, cluster](int r) {
+      boost::asio::post(m_strand, [this, cluster, r]() {
+        handle_read_cluster(r, cluster); }); });
+    qcow_format->m_stream->read({{stream_offset, stream_length}},
+                                &cluster->cluster_data_bl, ctx);
+  }
+
+  void handle_read_cluster(int r, std::shared_ptr<Cluster> cluster) {
+    auto cct = qcow_format->m_image_ctx->cct;
+    ldout(cct, 20) << "r=" << r << ", "
+                   << "cluster_offset=" << cluster->cluster_offset << dendl;
+
+    auto completions = std::move(cluster_completions[cluster->cluster_offset]);
+    cluster_completions.erase(cluster->cluster_offset);
+
+    if (r < 0) {
+      lderr(cct) << "failed to read cluster offset " << cluster->cluster_offset
+                 << ": " << cpp_strerror(r) << dendl;
+    } else {
+      if ((cluster->cluster_offset & QCOW_OFLAG_COMPRESSED) != 0) {
+        bufferlist compressed_bl{std::move(cluster->cluster_data_bl)};
+        cluster->cluster_data_bl.clear();
+
+        // TODO
+        lderr(cct) << "support for compressed clusters is not available"
+                   << dendl;
+        r = -EINVAL;
+      } else {
+        // cache the MRU cluster in case of sequential IO
+        this->cluster = cluster;
+      }
+    }
+
+    // complete the IO back to caller
+    boost::asio::post(*qcow_format->m_image_ctx->asio_engine,
+                      [r, cluster, completions=std::move(completions)]() {
+      for (auto completion : completions) {
+        if (r >= 0) {
+          std::get<2>(completion)->substr_of(
+            cluster->cluster_data_bl,
+            std::get<0>(completion),
+            std::get<1>(completion));
+        }
+        std::get<3>(completion)->complete(r);
+      }
+    });
+  }
+};
+
+#undef dout_prefix
+#define dout_prefix *_dout << "librbd::migration::QCOWFormat::L2TableCache: " \
+                           << this << " " << __func__ << ": "
+
+template <typename I>
+class QCOWFormat<I>::L2TableCache {
+public:
+  L2TableCache(QCOWFormat* qcow_format)
+    : qcow_format(qcow_format),
+      m_strand(*qcow_format->m_image_ctx->asio_engine),
+      l2_cache_entries(QCOW_L2_CACHE_SIZE) {
+  }
+
+  void get_l2_table(const LookupTable* l1_table, uint64_t l2_table_offset,
+                    std::shared_ptr<const LookupTable>* l2_table,
+                    Context* on_finish) {
+    auto cct = qcow_format->m_image_ctx->cct;
+    ldout(cct, 20) << "l2_table_offset=" << l2_table_offset << dendl;
+
+    // cache state machine runs in a single strand thread
+    Request request{l1_table, l2_table_offset, l2_table, on_finish};
+    boost::asio::dispatch(
+      m_strand, [this, request=std::move(request)]() {
+        requests.push_back(std::move(request));
+      });
+    dispatch_request();
+  }
+
+  void get_cluster_offset(const LookupTable* l1_table,
+                          uint64_t image_offset, uint64_t* cluster_offset,
+                          Context* on_finish) {
+    auto cct = qcow_format->m_image_ctx->cct;
+    uint32_t l1_table_index = image_offset >> qcow_format->m_l1_shift;
+    uint64_t l2_table_offset = l1_table->cluster_offsets[std::min<uint32_t>(
+                                 l1_table_index, l1_table->size - 1)] &
+                               qcow_format->m_cluster_mask;
+    uint32_t l2_table_index = (image_offset >> qcow_format->m_cluster_bits) &
+                              (qcow_format->m_l2_size - 1);
+    ldout(cct, 20) << "image_offset=" << image_offset << ", "
+                   << "l1_table_index=" << l1_table_index << ", "
+                   << "l2_table_offset=" << l2_table_offset << ", "
+                   << "l2_table_index=" << l2_table_index << dendl;
+
+    if (l1_table_index >= l1_table->size) {
+      lderr(cct) << "L1 index " << l1_table_index << " out-of-bounds" << dendl;
+      on_finish->complete(-ERANGE);
+      return;
+    } else if (l2_table_offset == 0) {
+      // L2 table has not been allocated for specified offset
+      ldout(cct, 20) << "image_offset=" << image_offset << ", "
+                     << "cluster_offset=DNE" << dendl;
+      *cluster_offset = 0;
+      on_finish->complete(-ENOENT);
+      return;
+    }
+
+    // cache state machine runs in a single strand thread
+    Request request{l1_table, l2_table_offset, l2_table_index, cluster_offset,
+                    on_finish};
+    boost::asio::dispatch(
+      m_strand, [this, request=std::move(request)]() {
+        requests.push_back(std::move(request));
+      });
+    dispatch_request();
+  }
+
+private:
+  QCOWFormat* qcow_format;
+
+  boost::asio::io_context::strand m_strand;
+
+  struct Request {
+    const LookupTable* l1_table;
+
+    uint64_t l2_table_offset;
+
+    // get_cluster_offset request
+    uint32_t l2_table_index;
+    uint64_t* cluster_offset = nullptr;
+
+    // get_l2_table request
+    std::shared_ptr<const LookupTable>* l2_table;
+
+    Context* on_finish;
+
+    Request(const LookupTable* l1_table, uint64_t l2_table_offset,
+            uint32_t l2_table_index, uint64_t* cluster_offset,
+            Context* on_finish)
+      : l1_table(l1_table), l2_table_offset(l2_table_offset),
+        l2_table_index(l2_table_index), cluster_offset(cluster_offset),
+        on_finish(on_finish) {
+    }
+    Request(const LookupTable* l1_table, uint64_t l2_table_offset,
+            std::shared_ptr<const LookupTable>* l2_table, Context* on_finish)
+      : l1_table(l1_table), l2_table_offset(l2_table_offset),
+        l2_table(l2_table), on_finish(on_finish) {
+    }
+  };
+
+  typedef std::deque<Request> Requests;
+
+  struct L2Cache {
+    uint64_t l2_offset = 0;
+    std::shared_ptr<LookupTable> l2_table;
+
+    utime_t timestamp;
+    uint32_t count = 0;
+    bool in_flight = false;
+
+    int ret_val = 0;
+  };
+  std::vector<L2Cache> l2_cache_entries;
+
+  Requests requests;
+
+  void dispatch_request() {
+    boost::asio::dispatch(m_strand, [this]() { execute_request(); });
+  }
+
+  void execute_request() {
+    auto cct = qcow_format->m_image_ctx->cct;
+    if (requests.empty()) {
+      return;
+    }
+
+    auto request = requests.front();
+    ldout(cct, 20) << "l2_table_offset=" << request.l2_table_offset << dendl;
+
+    std::shared_ptr<LookupTable> l2_table;
+    int r = l2_table_lookup(request.l2_table_offset, &l2_table);
+    if (r < 0) {
+      lderr(cct) << "failed to load L2 table: l2_table_offset="
+                 << request.l2_table_offset << ": "
+                 << cpp_strerror(r) << dendl;
+    } else if (l2_table == nullptr) {
+      // table not in cache -- will restart once its loaded
+      return;
+    } else if (request.cluster_offset != nullptr) {
+      auto cluster_offset = l2_table->cluster_offsets[request.l2_table_index];
+      if (!l2_table->decoded) {
+        // table hasn't been byte-swapped
+        cluster_offset = be64toh(cluster_offset);
+      }
+
+      *request.cluster_offset = cluster_offset & qcow_format->m_cluster_mask;
+      if (*request.cluster_offset == QCOW_OFLAG_ZERO) {
+        ldout(cct, 20) << "l2_table_offset=" << request.l2_table_offset << ", "
+                       << "l2_table_index=" << request.l2_table_index << ", "
+                       << "cluster_offset=zeroed" << dendl;
+      } else {
+        ldout(cct, 20) << "l2_table_offset=" << request.l2_table_offset << ", "
+                       << "l2_table_index=" << request.l2_table_index << ", "
+                       << "cluster_offset=" << *request.cluster_offset
+                       << dendl;
+      }
+    } else if (request.l2_table != nullptr) {
+      // ensure it's in the correct byte-order
+      l2_table->decode();
+      *request.l2_table = l2_table;
+    } else {
+      ceph_assert(false);
+    }
+
+    // complete the L2 cache request
+    boost::asio::post(*qcow_format->m_image_ctx->asio_engine,
+                      [r, ctx=request.on_finish]() { ctx->complete(r); });
+    requests.pop_front();
+
+    // process next request (if any)
+    dispatch_request();
+  }
+
+  int l2_table_lookup(uint64_t l2_offset,
+                      std::shared_ptr<LookupTable>* l2_table) {
+    auto cct = qcow_format->m_image_ctx->cct;
+
+    l2_table->reset();
+
+    // find a match in the existing cache
+    for (auto idx = 0U; idx < l2_cache_entries.size(); ++idx) {
+      auto& l2_cache = l2_cache_entries[idx];
+      if (l2_cache.l2_offset == l2_offset) {
+        if (l2_cache.in_flight) {
+          ldout(cct, 20) << "l2_offset=" << l2_offset << ", "
+                         << "index=" << idx << " (in-flight)" << dendl;
+          return 0;
+        }
+
+        if (l2_cache.ret_val < 0) {
+          ldout(cct, 20) << "l2_offset=" << l2_offset << ", "
+                         << "index=" << idx << " (error): "
+                         << cpp_strerror(l2_cache.ret_val) << dendl;
+          int r = l2_cache.ret_val;
+          l2_cache = L2Cache{};
+
+          return r;
+        }
+
+        ++l2_cache.count;
+        if (l2_cache.count == std::numeric_limits<uint32_t>::max()) {
+          for (auto& entry : l2_cache_entries) {
+            entry.count >>= 1;
+          }
+        }
+
+        ldout(cct, 20) << "l2_offset=" << l2_offset << ", " << "index=" << idx
+                       << dendl;
+        *l2_table = l2_cache.l2_table;
+        return 0;
+      }
+    }
+
+    // find the least used entry
+    int32_t min_idx = -1;
+    uint32_t min_count = std::numeric_limits<uint32_t>::max();
+    utime_t min_timestamp;
+    for (uint32_t idx = 0U; idx < l2_cache_entries.size(); ++idx) {
+      auto& l2_cache = l2_cache_entries[idx];
+      if (l2_cache.in_flight) {
+        continue;
+      }
+
+      if (l2_cache.count > 0) {
+        --l2_cache.count;
+      }
+
+      if (l2_cache.count <= min_count) {
+        if (min_idx == -1 || l2_cache.timestamp < min_timestamp) {
+          min_timestamp = l2_cache.timestamp;
+          min_count = l2_cache.count;
+          min_idx = idx;
+        }
+      }
+    }
+
+    if (min_idx == -1) {
+      // no space in the cache due to in-flight requests
+      ldout(cct, 20) << "l2_offset=" << l2_offset << ", "
+                     << "index=DNE (cache busy)" << dendl;
+      return 0;
+    }
+
+    ldout(cct, 20) << "l2_offset=" << l2_offset << ", "
+                   << "index=" << min_idx << " (loading)" << dendl;
+    auto& l2_cache = l2_cache_entries[min_idx];
+    l2_cache.l2_table = std::make_shared<LookupTable>(qcow_format->m_l2_size);
+    l2_cache.l2_offset = l2_offset;
+    l2_cache.timestamp = ceph_clock_now();
+    l2_cache.count = 1;
+    l2_cache.in_flight = true;
+
+    // read the L2 table into the L2 cache entry
+    auto ctx = new LambdaContext([this, index=min_idx, l2_offset](int r) {
+      boost::asio::post(m_strand, [this, index, l2_offset, r]() {
+        handle_l2_table_lookup(r, index, l2_offset); }); });
+    qcow_format->m_stream->read(
+      {{l2_offset, qcow_format->m_l2_size * sizeof(uint64_t)}},
+      &l2_cache.l2_table->bl, ctx);
+    return 0;
+  }
+
+  void handle_l2_table_lookup(int r, uint32_t index, uint64_t l2_offset) {
+    auto cct = qcow_format->m_image_ctx->cct;
+    ldout(cct, 20) << "r=" << r << ", "
+                   << "l2_offset=" << l2_offset << ", "
+                   << "index=" << index << dendl;
+
+    auto& l2_cache = l2_cache_entries[index];
+    ceph_assert(l2_cache.in_flight);
+    l2_cache.in_flight = false;
+
+    if (r < 0) {
+      lderr(cct) << "failed to load L2 table: "
+                 << "l2_offset=" << l2_cache.l2_offset << ": "
+                 << cpp_strerror(r) << dendl;
+      l2_cache.ret_val = r;
+    } else {
+      // keep the L2 table in big-endian byte-order until the full table
+      // is requested
+      l2_cache.l2_table->init();
+    }
+
+    // restart the state machine
+    dispatch_request();
+  }
+
+};
+
+#undef dout_prefix
+#define dout_prefix *_dout << "librbd::migration::QCOWFormat::ReadRequest: " \
+                           << this << " " << __func__ << ": "
+
+template <typename I>
+class QCOWFormat<I>::ReadRequest {
+public:
+  ReadRequest(QCOWFormat* qcow_format, io::AioCompletion* aio_comp,
+              const LookupTable* l1_table, io::Extents&& image_extents)
+    : qcow_format(qcow_format), aio_comp(aio_comp), l1_table(l1_table),
+      image_extents(std::move(image_extents)) {
+  }
+
+  void send() {
+    get_cluster_offsets();
+  }
+
+private:
+  QCOWFormat* qcow_format;
+  io::AioCompletion* aio_comp;
+
+  const LookupTable* l1_table;
+  io::Extents image_extents;
+
+  size_t image_extents_idx = 0;
+  uint32_t image_extent_offset = 0;
+
+  ClusterExtents cluster_extents;
+
+  void get_cluster_offsets() {
+    auto cct = qcow_format->m_image_ctx->cct;
+    populate_cluster_extents(cct, qcow_format->m_cluster_size, image_extents,
+                             &cluster_extents);
+
+    ldout(cct, 20) << dendl;
+    auto ctx = new LambdaContext([this](int r) {
+      handle_get_cluster_offsets(r); });
+    auto gather_ctx = new C_Gather(cct, ctx);
+
+    for (auto& cluster_extent : cluster_extents) {
+      auto sub_ctx = new LambdaContext(
+        [this, &cluster_extent, on_finish=gather_ctx->new_sub()](int r) {
+          handle_get_cluster_offset(r, cluster_extent, on_finish); });
+      qcow_format->m_l2_table_cache->get_cluster_offset(
+        l1_table, cluster_extent.image_offset,
+        &cluster_extent.cluster_offset, sub_ctx);
+    }
+
+    gather_ctx->activate();
+  }
+
+  void handle_get_cluster_offset(int r, const ClusterExtent& cluster_extent,
+                                 Context* on_finish) {
+    auto cct = qcow_format->m_image_ctx->cct;
+    ldout(cct, 20) << "r=" << r << ", "
+                   << "image_offset=" << cluster_extent.image_offset << ", "
+                   << "cluster_offset=" << cluster_extent.cluster_offset
+                   << dendl;
+
+    if (r == -ENOENT) {
+      ldout(cct, 20) << "image offset DNE in QCOW image" << dendl;
+      r = 0;
+    } else if (r < 0) {
+      lderr(cct) << "failed to map image offset " << cluster_extent.image_offset
+                 << ": " << cpp_strerror(r) << dendl;
+    }
+
+    on_finish->complete(r);
+  }
+
+  void handle_get_cluster_offsets(int r) {
+    auto cct = qcow_format->m_image_ctx->cct;
+    ldout(cct, 20) << "r=" << r << dendl;
+
+    if (r < 0) {
+      lderr(cct) << "failed to retrieve cluster extents: " << cpp_strerror(r)
+                 << dendl;
+      aio_comp->fail(r);
+      delete this;
+      return;
+    }
+
+    read_clusters();
+  }
+
+  void read_clusters() {
+    auto cct = qcow_format->m_image_ctx->cct;
+    ldout(cct, 20) << dendl;
+
+    aio_comp->set_request_count(cluster_extents.size());
+    for (auto& cluster_extent : cluster_extents) {
+      auto read_ctx = new io::ReadResult::C_ImageReadRequest(
+        aio_comp, cluster_extent.buffer_offset,
+        {{cluster_extent.image_offset, cluster_extent.cluster_length}});
+      read_ctx->ignore_enoent = true;
+
+      auto log_ctx = new LambdaContext(
+        [this, cct=qcow_format->m_image_ctx->cct,
+         image_offset=cluster_extent.image_offset,
+         image_length=cluster_extent.cluster_length, ctx=read_ctx](int r) {
+          handle_read_cluster(cct, r, image_offset, image_length, ctx);
+        });
+
+      if (cluster_extent.cluster_offset == 0) {
+        // QCOW header is at offset 0, implies cluster DNE
+        log_ctx->complete(-ENOENT);
+      } else if (cluster_extent.cluster_offset == QCOW_OFLAG_ZERO) {
+        // explicitly zeroed section
+        read_ctx->bl.append_zero(cluster_extent.cluster_length);
+        log_ctx->complete(0);
+      } else {
+        // request the (sub)cluster from the cluster cache
+        qcow_format->m_cluster_cache->get_cluster(
+          cluster_extent.cluster_offset, cluster_extent.cluster_length,
+          cluster_extent.intra_cluster_offset, &read_ctx->bl, log_ctx);
+      }
+    }
+
+    delete this;
+  }
+
+  void handle_read_cluster(CephContext* cct, int r, uint64_t image_offset,
+                           uint64_t image_length, Context* on_finish) const {
+    // NOTE: treat as static function, expect object has been deleted
+
+    ldout(cct, 20) << "r=" << r << ", "
+                   << "image_offset=" << image_offset << ", "
+                   << "image_length=" << image_length << dendl;
+
+    if (r != -ENOENT && r < 0) {
+      lderr(cct) << "failed to read image extent " << image_offset << "~"
+                 << image_length << ": " << cpp_strerror(r) << dendl;
+    }
+
+    on_finish->complete(r);
+  }
+};
+
+#undef dout_prefix
+#define dout_prefix *_dout << "librbd::migration::QCOWFormat::" \
+                           << "ListSnapsRequest: " << this << " " \
+                           << __func__ << ": "
+
+template <typename I>
+class QCOWFormat<I>::ListSnapsRequest {
+public:
+  ListSnapsRequest(
+      QCOWFormat* qcow_format, uint32_t l1_table_index,
+      ClusterExtents&& cluster_extents,
+      const std::map<uint64_t, const LookupTable*>& snap_id_to_l1_table,
+      io::SnapshotDelta* snapshot_delta, Context* on_finish)
+    : qcow_format(qcow_format), l1_table_index(l1_table_index),
+      cluster_extents(std::move(cluster_extents)),
+      snap_id_to_l1_table(snap_id_to_l1_table), snapshot_delta(snapshot_delta),
+      on_finish(on_finish) {
+  }
+
+  void send() {
+    get_l2_table();
+  }
+
+private:
+  QCOWFormat* qcow_format;
+  uint32_t l1_table_index;
+  ClusterExtents cluster_extents;
+  std::map<uint64_t, const LookupTable*> snap_id_to_l1_table;
+  io::SnapshotDelta* snapshot_delta;
+  Context* on_finish;
+
+  std::shared_ptr<const LookupTable> previous_l2_table;
+  std::shared_ptr<const LookupTable> l2_table;
+
+  void get_l2_table() {
+    auto cct = qcow_format->m_image_ctx->cct;
+    if (snap_id_to_l1_table.empty()) {
+      finish(0);
+      return;
+    }
+
+    auto it = snap_id_to_l1_table.begin();
+    auto [snap_id, l1_table] = *it;
+    snap_id_to_l1_table.erase(it);
+
+    previous_l2_table = l2_table;
+    l2_table.reset();
+
+    auto ctx = new LambdaContext([this, snap_id = snap_id](int r) {
+      boost::asio::post(qcow_format->m_strand, [this, snap_id, r]() {
+        handle_get_l2_table(r, snap_id);
+        });
+    });
+
+    if (l1_table_index >= l1_table->size ||
+        l1_table->cluster_offsets[l1_table_index] == 0) {
+      ldout(cct, 20) << "l1_table_index=" << l1_table_index << ", "
+                     << "snap_id=" << snap_id << ": DNE" << dendl;
+      ctx->complete(-ENOENT);
+      return;
+    }
+
+    uint64_t l2_table_offset = l1_table->cluster_offsets[l1_table_index] &
+                               qcow_format->m_cluster_mask;
+
+    ldout(cct, 20) << "l1_table_index=" << l1_table_index << ", "
+                   << "snap_id=" << snap_id << ", "
+                   << "l2_table_offset=" << l2_table_offset << dendl;
+    qcow_format->m_l2_table_cache->get_l2_table(l1_table, l2_table_offset,
+                                                &l2_table, ctx);
+  }
+
+  void handle_get_l2_table(int r, uint64_t snap_id) {
+    ceph_assert(qcow_format->m_strand.running_in_this_thread());
+
+    auto cct = qcow_format->m_image_ctx->cct;
+    ldout(cct, 20) << "r=" << r << ", "
+                   << "snap_id=" << snap_id << dendl;
+
+    if (r == -ENOENT) {
+      l2_table.reset();
+    } else if (r < 0) {
+      lderr(cct) << "failed to retrieve L2 table for snapshot " << snap_id
+                 << ": " << cpp_strerror(r) << dendl;
+      finish(r);
+      return;
+    }
+
+    // compare the cluster offsets at each requested L2 offset between
+    // the previous snapshot's L2 table and the current L2 table.
+    auto& sparse_extents = (*snapshot_delta)[{snap_id, snap_id}];
+    for (auto& cluster_extent : cluster_extents) {
+      uint32_t l2_table_index =
+        (cluster_extent.image_offset >> qcow_format->m_cluster_bits) &
+        (qcow_format->m_l2_size - 1);
+
+      std::optional<uint64_t> cluster_offset;
+      if (l2_table && l2_table_index < l2_table->size) {
+        cluster_offset = l2_table->cluster_offsets[l2_table_index] &
+                         qcow_format->m_cluster_offset_mask;
+      }
+
+      std::optional<uint64_t> prev_cluster_offset;
+      if (previous_l2_table && l2_table_index < previous_l2_table->size) {
+        prev_cluster_offset =
+          previous_l2_table->cluster_offsets[l2_table_index] &
+          qcow_format->m_cluster_offset_mask;
+      }
+
+      ldout(cct, 20) << "l1_table_index=" << l1_table_index << ", "
+                     << "snap_id=" << snap_id << ", "
+                     << "image_offset=" << cluster_extent.image_offset << ", "
+                     << "l2_table_index=" << l2_table_index << ", "
+                     << "cluster_offset=" << cluster_offset << ", "
+                     << "prev_cluster_offset=" << prev_cluster_offset << dendl;
+
+      auto state = io::SPARSE_EXTENT_STATE_DATA;
+      if (cluster_offset == prev_cluster_offset) {
+        continue;
+      } else if ((prev_cluster_offset && !cluster_offset) ||
+                 *cluster_offset == QCOW_OFLAG_ZERO) {
+        // explicitly zeroed or deallocated
+        state = io::SPARSE_EXTENT_STATE_ZEROED;
+      }
+
+      sparse_extents.insert(
+        cluster_extent.image_offset, cluster_extent.cluster_length,
+        {state, cluster_extent.cluster_length});
+    }
+
+    ldout(cct, 20) << "l1_table_index=" << l1_table_index << ", "
+                   << "snap_id=" << snap_id << ", "
+                   << "sparse_extents=" << sparse_extents << dendl;
+
+    // continue processing the L2 table at this index for all snapshots
+    boost::asio::post(*qcow_format->m_image_ctx->asio_engine,
+                      [this]() { get_l2_table(); });
+  }
+
+
+  void finish(int r) {
+    auto cct = qcow_format->m_image_ctx->cct;
+    ldout(cct, 20) << "r=" << r << dendl;
+
+    on_finish->complete(r);
+    delete this;
+  }
+};
+
+#undef dout_prefix
+#define dout_prefix *_dout << "librbd::migration::QCOWFormat: " << this \
+                           << " " << __func__ << ": "
+
+template <typename I>
+QCOWFormat<I>::QCOWFormat(
+    I* image_ctx, const json_spirit::mObject& json_object,
+    const SourceSpecBuilder<I>* source_spec_builder)
+  : m_image_ctx(image_ctx), m_json_object(json_object),
+    m_source_spec_builder(source_spec_builder),
+    m_strand(*image_ctx->asio_engine) {
+}
+
+template <typename I>
+void QCOWFormat<I>::open(Context* on_finish) {
+  auto cct = m_image_ctx->cct;
+  ldout(cct, 10) << dendl;
+
+  int r = m_source_spec_builder->build_stream(m_json_object, &m_stream);
+  if (r < 0) {
+    lderr(cct) << "failed to build migration stream handler" << cpp_strerror(r)
+               << dendl;
+    on_finish->complete(r);
+    return;
+  }
+
+  auto ctx = new LambdaContext([this, on_finish](int r) {
+    handle_open(r, on_finish); });
+  m_stream->open(ctx);
+}
+
+template <typename I>
+void QCOWFormat<I>::handle_open(int r, Context* on_finish) {
+  auto cct = m_image_ctx->cct;
+  ldout(cct, 10) << "r=" << r << dendl;
+
+  if (r < 0) {
+    lderr(cct) << "failed to open QCOW image: " << cpp_strerror(r)
+               << dendl;
+    on_finish->complete(r);
+    return;
+  }
+
+  probe(on_finish);
+}
+
+template <typename I>
+void QCOWFormat<I>::probe(Context* on_finish) {
+  auto cct = m_image_ctx->cct;
+  ldout(cct, 10) << dendl;
+
+  auto ctx = new LambdaContext([this, on_finish](int r) {
+    handle_probe(r, on_finish); });
+  m_bl.clear();
+  m_stream->read({{0, 8}}, &m_bl, ctx);
+}
+
+template <typename I>
+void QCOWFormat<I>::handle_probe(int r, Context* on_finish) {
+  auto cct = m_image_ctx->cct;
+  ldout(cct, 10) << "r=" << r << dendl;
+
+  if (r < 0) {
+    lderr(cct) << "failed to probe QCOW image: " << cpp_strerror(r)
+               << dendl;
+    on_finish->complete(r);
+    return;
+  }
+
+  auto header_probe = *reinterpret_cast<QCowHeaderProbe*>(
+    m_bl.c_str());
+  header_probe.magic = be32toh(header_probe.magic);
+  header_probe.version = be32toh(header_probe.version);
+
+  if (header_probe.magic != QCOW_MAGIC) {
+    lderr(cct) << "invalid QCOW header magic" << dendl;
+    on_finish->complete(-EINVAL);
+    return;
+  }
+
+  m_bl.clear();
+  if (header_probe.version == 1) {
+#ifdef WITH_RBD_MIGRATION_FORMAT_QCOW_V1
+    read_v1_header(on_finish);
+#else // WITH_RBD_MIGRATION_FORMAT_QCOW_V1
+    lderr(cct) << "QCOW is not supported" << dendl;
+    on_finish->complete(-ENOTSUP);
+#endif // WITH_RBD_MIGRATION_FORMAT_QCOW_V1
+    return;
+  } else if (header_probe.version >= 2 && header_probe.version <= 3) {
+    read_v2_header(on_finish);
+    return;
+  } else {
+    lderr(cct) << "invalid QCOW header version " << header_probe.version
+               << dendl;
+    on_finish->complete(-EINVAL);
+    return;
+  }
+}
+
+#ifdef WITH_RBD_MIGRATION_FORMAT_QCOW_V1
+
+template <typename I>
+void QCOWFormat<I>::read_v1_header(Context* on_finish) {
+  auto cct = m_image_ctx->cct;
+  ldout(cct, 10) << dendl;
+
+  auto ctx = new LambdaContext([this, on_finish](int r) {
+    handle_read_v1_header(r, on_finish); });
+  m_bl.clear();
+  m_stream->read({{0, sizeof(QCowHeaderV1)}}, &m_bl, ctx);
+}
+
+template <typename I>
+void QCOWFormat<I>::handle_read_v1_header(int r, Context* on_finish) {
+  auto cct = m_image_ctx->cct;
+  ldout(cct, 10) << "r=" << r << dendl;
+
+  if (r < 0) {
+    lderr(cct) << "failed to read QCOW header: " << cpp_strerror(r) << dendl;
+    on_finish->complete(r);
+    return;
+  }
+
+  auto header = *reinterpret_cast<QCowHeaderV1*>(m_bl.c_str());
+
+  // byte-swap important fields
+  header.magic = be32toh(header.magic);
+  header.version = be32toh(header.version);
+  header.backing_file_offset = be64toh(header.backing_file_offset);
+  header.backing_file_size = be32toh(header.backing_file_size);
+  header.size = be64toh(header.size);
+  header.crypt_method = be32toh(header.crypt_method);
+  header.l1_table_offset = be64toh(header.l1_table_offset);
+
+  if (header.magic != QCOW_MAGIC || header.version != 1) {
+    // honestly shouldn't happen since we've already validated it
+    lderr(cct) << "header is not QCOW" << dendl;
+    on_finish->complete(-EINVAL);
+    return;
+  }
+
+  if (header.cluster_bits < QCOW_MIN_CLUSTER_BITS ||
+      header.cluster_bits > QCOW_MAX_CLUSTER_BITS) {
+    lderr(cct) << "invalid cluster bits: " << header.cluster_bits << dendl;
+    on_finish->complete(-EINVAL);
+    return;
+  }
+
+  if (header.l2_bits < (QCOW_MIN_CLUSTER_BITS - 3) ||
+      header.l2_bits > (QCOW_MAX_CLUSTER_BITS - 3)) {
+    lderr(cct) << "invalid L2 bits: " << header.l2_bits << dendl;
+    on_finish->complete(-EINVAL);
+    return;
+  }
+
+  if (header.crypt_method != QCOW_CRYPT_NONE) {
+    lderr(cct) << "invalid or unsupported encryption method" << dendl;
+    on_finish->complete(-EINVAL);
+    return;
+  }
+
+  m_size = header.size;
+  if (p2roundup(m_size, static_cast<uint64_t>(512)) != m_size) {
+    lderr(cct) << "image size is not a multiple of block size" << dendl;
+    on_finish->complete(-EINVAL);
+    return;
+  }
+
+  m_backing_file_offset = header.backing_file_offset;
+  m_backing_file_size = header.backing_file_size;
+
+  m_cluster_bits = header.cluster_bits;
+  m_cluster_size = 1UL << header.cluster_bits;
+  m_cluster_offset_mask = (1ULL << (63 - header.cluster_bits)) - 1;
+  m_cluster_mask = ~QCOW_OFLAG_COMPRESSED;
+
+  m_l2_bits = header.l2_bits;
+  m_l2_size = (1UL << m_l2_bits);
+
+  m_l1_shift = m_cluster_bits + m_l2_bits;
+  m_l1_table.size = (m_size + (1LL << m_l1_shift) - 1) >> m_l1_shift;
+  m_l1_table_offset = header.l1_table_offset;
+  if (m_size > (std::numeric_limits<uint64_t>::max() - (1ULL << m_l1_shift)) ||
+      m_l1_table.size >
+        (std::numeric_limits<int32_t>::max() / sizeof(uint64_t))) {
+    lderr(cct) << "image size too big: " << m_size << dendl;
+    on_finish->complete(-EINVAL);
+    return;
+  }
+
+  ldout(cct, 15) << "size=" << m_size << ", "
+                 << "cluster_bits=" << m_cluster_bits << ", "
+                 << "l2_bits=" << m_l2_bits << dendl;
+
+  // allocate memory for L1 table and L2 + cluster caches
+  m_l2_table_cache = std::make_unique<L2TableCache>(this);
+  m_cluster_cache = std::make_unique<ClusterCache>(this);
+
+  read_l1_table(on_finish);
+}
+
+#endif // WITH_RBD_MIGRATION_FORMAT_QCOW_V1
+
+template <typename I>
+void QCOWFormat<I>::read_v2_header(Context* on_finish) {
+  auto cct = m_image_ctx->cct;
+  ldout(cct, 10) << dendl;
+
+  auto ctx = new LambdaContext([this, on_finish](int r) {
+    handle_read_v2_header(r, on_finish); });
+  m_bl.clear();
+  m_stream->read({{0, sizeof(QCowHeader)}}, &m_bl, ctx);
+}
+
+template <typename I>
+void QCOWFormat<I>::handle_read_v2_header(int r, Context* on_finish) {
+  auto cct = m_image_ctx->cct;
+  ldout(cct, 10) << "r=" << r << dendl;
+
+  if (r < 0) {
+    lderr(cct) << "failed to read QCOW2 header: " << cpp_strerror(r) << dendl;
+    on_finish->complete(r);
+    return;
+  }
+
+  auto header = *reinterpret_cast<QCowHeader*>(m_bl.c_str());
+
+  // byte-swap important fields
+  header.magic = be32toh(header.magic);
+  header.version = be32toh(header.version);
+  header.backing_file_offset = be64toh(header.backing_file_offset);
+  header.backing_file_size = be32toh(header.backing_file_size);
+  header.cluster_bits = be32toh(header.cluster_bits);
+  header.size = be64toh(header.size);
+  header.crypt_method = be32toh(header.crypt_method);
+  header.l1_size = be32toh(header.l1_size);
+  header.l1_table_offset = be64toh(header.l1_table_offset);
+  header.nb_snapshots = be32toh(header.nb_snapshots);
+  header.snapshots_offset = be64toh(header.snapshots_offset);
+
+  if (header.version == 2) {
+    // valid only for version >= 3
+    header.incompatible_features = 0;
+    header.compatible_features = 0;
+    header.autoclear_features = 0;
+    header.header_length = 72;
+    header.compression_type = 0;
+  } else {
+    header.incompatible_features = be64toh(header.incompatible_features);
+    header.compatible_features = be64toh(header.compatible_features);
+    header.autoclear_features = be64toh(header.autoclear_features);
+    header.header_length = be32toh(header.header_length);
+  }
+
+  if (header.magic != QCOW_MAGIC || header.version < 2 || header.version > 3) {
+    // honestly shouldn't happen since we've already validated it
+    lderr(cct) << "header is not QCOW2" << dendl;
+    on_finish->complete(-EINVAL);
+    return;
+  }
+
+  if (header.cluster_bits < QCOW_MIN_CLUSTER_BITS ||
+      header.cluster_bits > QCOW_MAX_CLUSTER_BITS) {
+    lderr(cct) << "invalid cluster bits: " << header.cluster_bits << dendl;
+    on_finish->complete(-EINVAL);
+    return;
+  }
+
+  if (header.crypt_method != QCOW_CRYPT_NONE) {
+    lderr(cct) << "invalid or unsupported encryption method" << dendl;
+    on_finish->complete(-EINVAL);
+    return;
+  }
+
+  m_size = header.size;
+  if (p2roundup(m_size, static_cast<uint64_t>(512)) != m_size) {
+    lderr(cct) << "image size is not a multiple of block size" << dendl;
+    on_finish->complete(-EINVAL);
+    return;
+  }
+
+  if (header.header_length <= offsetof(QCowHeader, compression_type)) {
+    header.compression_type = 0;
+  }
+
+  if ((header.compression_type != 0) ||
+      ((header.incompatible_features & QCOW2_INCOMPAT_COMPRESSION) != 0)) {
+    lderr(cct) << "invalid or unsupported compression type" << dendl;
+    on_finish->complete(-EINVAL);
+    return;
+  }
+
+  if ((header.incompatible_features & QCOW2_INCOMPAT_DATA_FILE) != 0) {
+    lderr(cct) << "external data file feature not supported" << dendl;
+    on_finish->complete(-ENOTSUP);
+  }
+
+  if ((header.incompatible_features & QCOW2_INCOMPAT_EXTL2) != 0) {
+    lderr(cct) << "extended L2 table feature not supported" << dendl;
+    on_finish->complete(-ENOTSUP);
+    return;
+  }
+
+  header.incompatible_features &= ~QCOW2_INCOMPAT_MASK;
+  if (header.incompatible_features != 0) {
+    lderr(cct) << "unknown incompatible feature enabled" << dendl;
+    on_finish->complete(-EINVAL);
+    return;
+  }
+
+  m_backing_file_offset = header.backing_file_offset;
+  m_backing_file_size = header.backing_file_size;
+
+  m_cluster_bits = header.cluster_bits;
+  m_cluster_size = 1UL << header.cluster_bits;
+  m_cluster_offset_mask = (1ULL << (63 - header.cluster_bits)) - 1;
+  m_cluster_mask = ~(QCOW_OFLAG_COMPRESSED | QCOW_OFLAG_COPIED);
+
+  // L2 table is fixed a (1) cluster block to hold 8-byte (3 bit) offsets
+  m_l2_bits = m_cluster_bits - 3;
+  m_l2_size = (1UL << m_l2_bits);
+
+  m_l1_shift = m_cluster_bits + m_l2_bits;
+  m_l1_table.size = (m_size + (1LL << m_l1_shift) - 1) >> m_l1_shift;
+  m_l1_table_offset = header.l1_table_offset;
+  if (m_size > (std::numeric_limits<uint64_t>::max() - (1ULL << m_l1_shift)) ||
+      m_l1_table.size >
+        (std::numeric_limits<int32_t>::max() / sizeof(uint64_t))) {
+    lderr(cct) << "image size too big: " << m_size << dendl;
+    on_finish->complete(-EINVAL);
+    return;
+  } else if (m_l1_table.size > header.l1_size) {
+    lderr(cct) << "invalid L1 table size in header (" << header.l1_size
+               << " < " << m_l1_table.size << ")" << dendl;
+    on_finish->complete(-EINVAL);
+    return;
+  }
+
+  m_snapshot_count = header.nb_snapshots;
+  m_snapshots_offset = header.snapshots_offset;
+
+  ldout(cct, 15) << "size=" << m_size << ", "
+                 << "cluster_bits=" << m_cluster_bits << ", "
+                 << "l1_table_offset=" << m_l1_table_offset << ", "
+                 << "snapshot_count=" << m_snapshot_count << ", "
+                 << "snapshots_offset=" << m_snapshots_offset << dendl;
+
+  // allocate memory for L1 table and L2 + cluster caches
+  m_l2_table_cache = std::make_unique<L2TableCache>(this);
+  m_cluster_cache = std::make_unique<ClusterCache>(this);
+
+  read_snapshot(on_finish);
+}
+
+template <typename I>
+void QCOWFormat<I>::read_snapshot(Context* on_finish) {
+  if (m_snapshots_offset == 0 || m_snapshots.size() == m_snapshot_count) {
+    read_l1_table(on_finish);
+    return;
+  }
+
+  // header is always aligned on 8 byte boundary
+  m_snapshots_offset = p2roundup(m_snapshots_offset, static_cast<uint64_t>(8));
+
+  auto cct = m_image_ctx->cct;
+  ldout(cct, 10) << "snap_id=" << (m_snapshots.size() + 1) << ", "
+                 << "offset=" << m_snapshots_offset << dendl;
+
+  auto ctx = new LambdaContext([this, on_finish](int r) {
+    handle_read_snapshot(r, on_finish); });
+  m_bl.clear();
+  m_stream->read({{m_snapshots_offset, sizeof(QCowSnapshotHeader)}}, &m_bl,
+                 ctx);
+}
+
+template <typename I>
+void QCOWFormat<I>::handle_read_snapshot(int r, Context* on_finish) {
+  auto cct = m_image_ctx->cct;
+  ldout(cct, 10) << "r=" << r << ", "
+                 << "index=" << m_snapshots.size() << dendl;
+
+  if (r < 0) {
+    lderr(cct) << "failed to read QCOW2 snapshot header: " << cpp_strerror(r)
+               << dendl;
+    on_finish->complete(r);
+    return;
+  }
+
+  m_snapshots_offset += m_bl.length();
+  auto header = *reinterpret_cast<QCowSnapshotHeader*>(m_bl.c_str());
+
+  auto& snapshot = m_snapshots[m_snapshots.size() + 1];
+  snapshot.id.resize(be16toh(header.id_str_size));
+  snapshot.name.resize(be16toh(header.name_size));
+  snapshot.l1_table_offset = be64toh(header.l1_table_offset);
+  snapshot.l1_table.size = be32toh(header.l1_size);
+  snapshot.timestamp.sec_ref() = be32toh(header.date_sec);
+  snapshot.timestamp.nsec_ref() = be32toh(header.date_nsec);
+  snapshot.extra_data_size = be32toh(header.extra_data_size);
+
+  ldout(cct, 10) << "snap_id=" << m_snapshots.size() << ", "
+                 << "id_str_len=" << snapshot.id.size() << ", "
+                 << "name_str_len=" << snapshot.name.size() << ", "
+                 << "l1_table_offset=" << snapshot.l1_table_offset << ", "
+                 << "l1_size=" << snapshot.l1_table.size << ", "
+                 << "extra_data_size=" << snapshot.extra_data_size << dendl;
+
+  read_snapshot_extra(on_finish);
+}
+
+template <typename I>
+void QCOWFormat<I>::read_snapshot_extra(Context* on_finish) {
+  ceph_assert(!m_snapshots.empty());
+  auto& snapshot = m_snapshots.rbegin()->second;
+
+  uint32_t length = snapshot.extra_data_size +
+                    snapshot.id.size() +
+                    snapshot.name.size();
+  if (length == 0) {
+    uuid_d uuid_gen;
+    uuid_gen.generate_random();
+    snapshot.name = uuid_gen.to_string();
+
+    read_snapshot(on_finish);
+    return;
+  }
+
+  auto cct = m_image_ctx->cct;
+  ldout(cct, 10) << "snap_id=" << m_snapshots.size() << ", "
+                 << "offset=" << m_snapshots_offset << ", "
+                 << "length=" << length << dendl;
+
+  auto offset = m_snapshots_offset;
+  m_snapshots_offset += length;
+
+  auto ctx = new LambdaContext([this, on_finish](int r) {
+    handle_read_snapshot_extra(r, on_finish); });
+  m_bl.clear();
+  m_stream->read({{offset, length}}, &m_bl, ctx);
+}
+
+template <typename I>
+void QCOWFormat<I>::handle_read_snapshot_extra(int r, Context* on_finish) {
+  ceph_assert(!m_snapshots.empty());
+  auto& snapshot = m_snapshots.rbegin()->second;
+
+  auto cct = m_image_ctx->cct;
+  ldout(cct, 10) << "r=" << r << ", "
+                 << "snap_id=" << m_snapshots.size() << dendl;
+
+  if (r < 0) {
+    lderr(cct) << "failed to read QCOW2 snapshot header extra: "
+               << cpp_strerror(r) << dendl;
+    on_finish->complete(r);
+    return;
+  }
+
+  if (snapshot.extra_data_size >=
+        offsetof(QCowSnapshotExtraData, disk_size) + sizeof(uint64_t))  {
+    auto extra = reinterpret_cast<const QCowSnapshotExtraData*>(m_bl.c_str());
+    snapshot.size = be64toh(extra->disk_size);
+  } else {
+    snapshot.size = m_size;
+  }
+
+  auto data = reinterpret_cast<const char*>(m_bl.c_str());
+  data += snapshot.extra_data_size;
+
+  if (!snapshot.id.empty()) {
+    snapshot.id = std::string(data, snapshot.id.size());
+    data += snapshot.id.size();
+  }
+
+  if (!snapshot.name.empty()) {
+    snapshot.name = std::string(data, snapshot.name.size());
+    data += snapshot.name.size();
+  } else {
+    uuid_d uuid_gen;
+    uuid_gen.generate_random();
+    snapshot.name = uuid_gen.to_string();
+  }
+
+  ldout(cct, 10) << "snap_id=" << m_snapshots.size() << ", "
+                 << "name=" << snapshot.name << ", "
+                 << "size=" << snapshot.size << dendl;
+  read_snapshot_l1_table(on_finish);
+}
+
+template <typename I>
+void QCOWFormat<I>::read_snapshot_l1_table(Context* on_finish) {
+  ceph_assert(!m_snapshots.empty());
+  auto& snapshot = m_snapshots.rbegin()->second;
+
+  auto cct = m_image_ctx->cct;
+  ldout(cct, 10) << "snap_id=" << m_snapshots.size() << ", "
+                 << "l1_table_offset=" << snapshot.l1_table_offset
+                 << dendl;
+
+  auto ctx = new LambdaContext([this, on_finish](int r) {
+    handle_read_snapshot_l1_table(r, on_finish); });
+  m_stream->read({{snapshot.l1_table_offset,
+                   snapshot.l1_table.size * sizeof(uint64_t)}},
+                 &snapshot.l1_table.bl, ctx);
+}
+
+template <typename I>
+void QCOWFormat<I>::handle_read_snapshot_l1_table(int r, Context* on_finish) {
+  ceph_assert(!m_snapshots.empty());
+  auto& snapshot = m_snapshots.rbegin()->second;
+
+  auto cct = m_image_ctx->cct;
+  ldout(cct, 10) << "r=" << r << ", "
+                 << "snap_id=" << m_snapshots.size() << dendl;
+
+  if (r < 0) {
+    lderr(cct) << "failed to read snapshot L1 table: " << cpp_strerror(r)
+               << dendl;
+    on_finish->complete(r);
+    return;
+  }
+
+  snapshot.l1_table.decode();
+  read_snapshot(on_finish);
+}
+
+template <typename I>
+void QCOWFormat<I>::read_l1_table(Context* on_finish) {
+  auto cct = m_image_ctx->cct;
+  ldout(cct, 10) << dendl;
+
+  auto ctx = new LambdaContext([this, on_finish](int r) {
+    handle_read_l1_table(r, on_finish); });
+  m_stream->read({{m_l1_table_offset,
+                   m_l1_table.size * sizeof(uint64_t)}},
+                 &m_l1_table.bl, ctx);
+}
+
+template <typename I>
+void QCOWFormat<I>::handle_read_l1_table(int r, Context* on_finish) {
+  auto cct = m_image_ctx->cct;
+  ldout(cct, 10) << "r=" << r << dendl;
+
+  if (r < 0) {
+    lderr(cct) << "failed to read L1 table: " << cpp_strerror(r) << dendl;
+    on_finish->complete(r);
+    return;
+  }
+
+  m_l1_table.decode();
+  read_backing_file(on_finish);
+}
+
+template <typename I>
+void QCOWFormat<I>::read_backing_file(Context* on_finish) {
+  if (m_backing_file_offset == 0 || m_backing_file_size == 0) {
+    // all data is within the specified file
+    on_finish->complete(0);
+    return;
+  }
+
+  auto cct = m_image_ctx->cct;
+  ldout(cct, 10) << dendl;
+
+  // TODO add support for backing files
+  on_finish->complete(-ENOTSUP);
+}
+
+template <typename I>
+void QCOWFormat<I>::close(Context* on_finish) {
+  auto cct = m_image_ctx->cct;
+  ldout(cct, 10) << dendl;
+
+  m_stream->close(on_finish);
+}
+
+template <typename I>
+void QCOWFormat<I>::get_snapshots(SnapInfos* snap_infos, Context* on_finish) {
+  auto cct = m_image_ctx->cct;
+  ldout(cct, 10) << dendl;
+
+  snap_infos->clear();
+  for (auto& [snap_id, snapshot] : m_snapshots) {
+    SnapInfo snap_info(snapshot.name, cls::rbd::UserSnapshotNamespace{},
+                       snapshot.size, {}, 0, 0, snapshot.timestamp);
+    snap_infos->emplace(snap_id, snap_info);
+  }
+
+  on_finish->complete(0);
+}
+
+template <typename I>
+void QCOWFormat<I>::get_image_size(uint64_t snap_id, uint64_t* size,
+                                  Context* on_finish) {
+  auto cct = m_image_ctx->cct;
+  ldout(cct, 10) << "snap_id=" << snap_id << dendl;
+
+  if (snap_id == CEPH_NOSNAP) {
+    *size = m_size;
+  } else {
+    auto snapshot_it = m_snapshots.find(snap_id);
+    if (snapshot_it == m_snapshots.end()) {
+      on_finish->complete(-ENOENT);
+      return;
+    }
+
+    auto& snapshot = snapshot_it->second;
+    *size = snapshot.size;
+  }
+
+  on_finish->complete(0);
+}
+
+template <typename I>
+bool QCOWFormat<I>::read(
+    io::AioCompletion* aio_comp, uint64_t snap_id, io::Extents&& image_extents,
+    io::ReadResult&& read_result, int op_flags, int read_flags,
+    const ZTracer::Trace &parent_trace) {
+  auto cct = m_image_ctx->cct;
+  ldout(cct, 20) << "snap_id=" << snap_id << ", "
+                 << "image_extents=" << image_extents << dendl;
+
+  const LookupTable* l1_table = nullptr;
+  if (snap_id == CEPH_NOSNAP) {
+    l1_table = &m_l1_table;
+  } else {
+    auto snapshot_it = m_snapshots.find(snap_id);
+    if (snapshot_it == m_snapshots.end()) {
+      aio_comp->fail(-ENOENT);
+      return true;
+    }
+
+    auto& snapshot = snapshot_it->second;
+    l1_table = &snapshot.l1_table;
+  }
+
+  aio_comp->read_result = std::move(read_result);
+  aio_comp->read_result.set_image_extents(image_extents);
+
+  auto read_request = new ReadRequest(this, aio_comp, l1_table,
+                                      std::move(image_extents));
+  read_request->send();
+
+  return true;
+}
+
+template <typename I>
+void QCOWFormat<I>::list_snaps(io::Extents&& image_extents,
+                              io::SnapIds&& snap_ids, int list_snaps_flags,
+                              io::SnapshotDelta* snapshot_delta,
+                              const ZTracer::Trace &parent_trace,
+                              Context* on_finish) {
+  auto cct = m_image_ctx->cct;
+  ldout(cct, 20) << "image_extents=" << image_extents << dendl;
+
+  ClusterExtents cluster_extents;
+  populate_cluster_extents(cct, m_cluster_size, image_extents,
+                           &cluster_extents);
+
+  // map L1 table indexes to cluster extents
+  std::map<uint64_t, ClusterExtents> l1_cluster_extents;
+  for (auto& cluster_extent : cluster_extents) {
+    uint32_t l1_table_index = cluster_extent.image_offset >> m_l1_shift;
+    auto& l1_cluster_extent = l1_cluster_extents[l1_table_index];
+    l1_cluster_extent.reserve(cluster_extents.size());
+    l1_cluster_extent.push_back(cluster_extent);
+  }
+
+  std::map<uint64_t, const LookupTable*> snap_id_to_l1_table;
+  for (auto& [snap_id, snapshot] : m_snapshots) {
+    snap_id_to_l1_table[snap_id] = &snapshot.l1_table;
+  }
+  snap_id_to_l1_table[CEPH_NOSNAP] = &m_l1_table;
+
+  on_finish = new LambdaContext([this, image_extents,
+                                 snap_ids=std::move(snap_ids),
+                                 snapshot_delta, on_finish](int r) mutable {
+    handle_list_snaps(r, std::move(image_extents), std::move(snap_ids),
+                      snapshot_delta, on_finish);
+  });
+
+  auto gather_ctx = new C_Gather(cct, on_finish);
+
+  for (auto& [l1_table_index, cluster_extents] : l1_cluster_extents) {
+    auto list_snaps_request = new ListSnapsRequest(
+      this, l1_table_index, std::move(cluster_extents), snap_id_to_l1_table,
+      snapshot_delta, gather_ctx->new_sub());
+    list_snaps_request->send();
+  }
+
+  gather_ctx->activate();
+}
+
+template <typename I>
+void QCOWFormat<I>::handle_list_snaps(int r, io::Extents&& image_extents,
+                                      io::SnapIds&& snap_ids,
+                                      io::SnapshotDelta* snapshot_delta,
+                                      Context* on_finish) {
+  auto cct = m_image_ctx->cct;
+  ldout(cct, 20) << "r=" << r << ", "
+                 << "snapshot_delta=" << *snapshot_delta << dendl;
+
+  std::optional<uint64_t> previous_size = std::nullopt;
+  for (auto& [snap_id, snapshot] : m_snapshots) {
+    auto sparse_extents = &(*snapshot_delta)[{snap_id, snap_id}];
+    util::zero_shrunk_snapshot(cct, image_extents, snap_id, snapshot.size,
+                               &previous_size, sparse_extents);
+  }
+
+  auto sparse_extents = &(*snapshot_delta)[{CEPH_NOSNAP, CEPH_NOSNAP}];
+  util::zero_shrunk_snapshot(cct, image_extents, CEPH_NOSNAP, m_size,
+                             &previous_size, sparse_extents);
+
+  util::merge_snapshot_delta(snap_ids, snapshot_delta);
+  on_finish->complete(r);
+}
+
+} // namespace migration
+} // namespace librbd
+
+template class librbd::migration::QCOWFormat<librbd::ImageCtx>;