summaryrefslogtreecommitdiffstats
path: root/src/librbd
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-23 16:45:17 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-23 16:45:44 +0000
commit17d6a993fc17d533460c5f40f3908c708e057c18 (patch)
tree1a3bd93e0ecd74fa02f93a528fe2f87e5314c4b5 /src/librbd
parentReleasing progress-linux version 18.2.2-0progress7.99u1. (diff)
downloadceph-17d6a993fc17d533460c5f40f3908c708e057c18.tar.xz
ceph-17d6a993fc17d533460c5f40f3908c708e057c18.zip
Merging upstream version 18.2.3.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/librbd')
-rw-r--r--src/librbd/ImageCtx.h2
-rw-r--r--src/librbd/Journal.cc86
-rw-r--r--src/librbd/Journal.h23
-rw-r--r--src/librbd/ObjectMap.h6
-rw-r--r--src/librbd/api/DiffIterate.cc130
-rw-r--r--src/librbd/api/DiffIterate.h7
-rw-r--r--src/librbd/api/Snapshot.cc4
-rw-r--r--src/librbd/deep_copy/ImageCopyRequest.cc7
-rw-r--r--src/librbd/io/ImageRequest.cc54
-rw-r--r--src/librbd/io/ImageRequest.h21
-rw-r--r--src/librbd/io/ObjectRequest.cc18
-rw-r--r--src/librbd/io/Types.h20
-rw-r--r--src/librbd/object_map/DiffRequest.cc382
-rw-r--r--src/librbd/object_map/DiffRequest.h29
-rw-r--r--src/librbd/object_map/Types.h15
15 files changed, 526 insertions, 278 deletions
diff --git a/src/librbd/ImageCtx.h b/src/librbd/ImageCtx.h
index 9a432c764..066651ba4 100644
--- a/src/librbd/ImageCtx.h
+++ b/src/librbd/ImageCtx.h
@@ -148,6 +148,7 @@ namespace librbd {
// encryption_format
ceph::shared_mutex timestamp_lock; // protects (create/access/modify)_timestamp
+ // and internal diff_iterate_lock_timestamp
ceph::mutex async_ops_lock; // protects async_ops and async_requests
ceph::mutex copyup_list_lock; // protects copyup_waiting_list
@@ -173,6 +174,7 @@ namespace librbd {
utime_t create_timestamp;
utime_t access_timestamp;
utime_t modify_timestamp;
+ utime_t diff_iterate_lock_timestamp;
file_layout_t layout;
diff --git a/src/librbd/Journal.cc b/src/librbd/Journal.cc
index 8ddce2e8f..1b37a30c1 100644
--- a/src/librbd/Journal.cc
+++ b/src/librbd/Journal.cc
@@ -39,6 +39,7 @@ using util::create_async_context_callback;
using util::create_context_callback;
using journal::util::C_DecodeTag;
using journal::util::C_DecodeTags;
+using io::Extents;
namespace {
@@ -760,36 +761,87 @@ void Journal<I>::user_flushed() {
}
template <typename I>
-uint64_t Journal<I>::append_write_event(uint64_t offset, size_t length,
- const bufferlist &bl,
- bool flush_entry) {
+void Journal<I>::add_write_event_entries(uint64_t offset, size_t length,
+ const bufferlist &bl,
+ uint64_t buffer_offset,
+ Bufferlists *bufferlists) {
ceph_assert(m_max_append_size > journal::AioWriteEvent::get_fixed_size());
- uint64_t max_write_data_size =
+ const uint64_t max_write_data_size =
m_max_append_size - journal::AioWriteEvent::get_fixed_size();
// ensure that the write event fits within the journal entry
- Bufferlists bufferlists;
uint64_t bytes_remaining = length;
uint64_t event_offset = 0;
do {
uint64_t event_length = std::min(bytes_remaining, max_write_data_size);
bufferlist event_bl;
- event_bl.substr_of(bl, event_offset, event_length);
+ event_bl.substr_of(bl, buffer_offset + event_offset, event_length);
journal::EventEntry event_entry(journal::AioWriteEvent(offset + event_offset,
event_length,
event_bl),
ceph_clock_now());
- bufferlists.emplace_back();
- encode(event_entry, bufferlists.back());
+ bufferlists->emplace_back();
+ encode(event_entry, bufferlists->back());
event_offset += event_length;
bytes_remaining -= event_length;
} while (bytes_remaining > 0);
+}
- return append_io_events(journal::EVENT_TYPE_AIO_WRITE, bufferlists, offset,
- length, flush_entry, 0);
+template <typename I>
+uint64_t Journal<I>::append_write_event(const Extents &image_extents,
+ const bufferlist &bl,
+ bool flush_entry) {
+ Bufferlists bufferlists;
+ uint64_t buffer_offset = 0;
+ for (auto &extent : image_extents) {
+ add_write_event_entries(extent.first, extent.second, bl, buffer_offset,
+ &bufferlists);
+
+ buffer_offset += extent.second;
+ }
+
+ return append_io_events(journal::EVENT_TYPE_AIO_WRITE, bufferlists,
+ image_extents, flush_entry, 0);
+}
+
+template <typename I>
+uint64_t Journal<I>::append_write_same_event(const Extents &image_extents,
+ const bufferlist &bl,
+ bool flush_entry) {
+ Bufferlists bufferlists;
+ for (auto &extent : image_extents) {
+ journal::EventEntry event_entry(
+ journal::AioWriteSameEvent(extent.first, extent.second, bl),
+ ceph_clock_now());
+
+ bufferlists.emplace_back();
+ encode(event_entry, bufferlists.back());
+ }
+
+ return append_io_events(journal::EVENT_TYPE_AIO_WRITESAME, bufferlists,
+ image_extents, flush_entry, 0);
+}
+
+template <typename I>
+uint64_t Journal<I>::append_discard_event(const Extents &image_extents,
+ uint32_t discard_granularity_bytes,
+ bool flush_entry) {
+ Bufferlists bufferlists;
+ for (auto &extent : image_extents) {
+ journal::EventEntry event_entry(
+ journal::AioDiscardEvent(extent.first, extent.second,
+ discard_granularity_bytes),
+ ceph_clock_now());
+
+ bufferlists.emplace_back();
+ encode(event_entry, bufferlists.back());
+ }
+
+ return append_io_events(journal::EVENT_TYPE_AIO_DISCARD, bufferlists,
+ image_extents, flush_entry, 0);
}
template <typename I>
@@ -832,7 +884,8 @@ uint64_t Journal<I>::append_compare_and_write_event(uint64_t offset,
} while (bytes_remaining > 0);
return append_io_events(journal::EVENT_TYPE_AIO_COMPARE_AND_WRITE,
- bufferlists, offset, length, flush_entry, -EILSEQ);
+ bufferlists, {{offset, length}}, flush_entry,
+ -EILSEQ);
}
template <typename I>
@@ -842,14 +895,14 @@ uint64_t Journal<I>::append_io_event(journal::EventEntry &&event_entry,
bufferlist bl;
event_entry.timestamp = ceph_clock_now();
encode(event_entry, bl);
- return append_io_events(event_entry.get_event_type(), {bl}, offset, length,
- flush_entry, filter_ret_val);
+ return append_io_events(event_entry.get_event_type(), {bl},
+ {{offset, length}}, flush_entry, filter_ret_val);
}
template <typename I>
uint64_t Journal<I>::append_io_events(journal::EventType event_type,
const Bufferlists &bufferlists,
- uint64_t offset, size_t length,
+ const Extents &image_extents,
bool flush_entry, int filter_ret_val) {
ceph_assert(!bufferlists.empty());
@@ -870,14 +923,13 @@ uint64_t Journal<I>::append_io_events(journal::EventType event_type,
{
std::lock_guard event_locker{m_event_lock};
- m_events[tid] = Event(futures, offset, length, filter_ret_val);
+ m_events[tid] = Event(futures, image_extents, filter_ret_val);
}
CephContext *cct = m_image_ctx.cct;
ldout(cct, 20) << this << " " << __func__ << ": "
<< "event=" << event_type << ", "
- << "offset=" << offset << ", "
- << "length=" << length << ", "
+ << "image_extents=" << image_extents << ", "
<< "flush=" << flush_entry << ", tid=" << tid << dendl;
Context *on_safe = create_async_context_callback(
diff --git a/src/librbd/Journal.h b/src/librbd/Journal.h
index 1ef9ffa88..5327adac7 100644
--- a/src/librbd/Journal.h
+++ b/src/librbd/Journal.h
@@ -18,6 +18,7 @@
#include "journal/ReplayHandler.h"
#include "librbd/Utils.h"
#include "librbd/asio/ContextWQ.h"
+#include "librbd/io/Types.h"
#include "librbd/journal/Types.h"
#include "librbd/journal/TypeTraits.h"
@@ -133,14 +134,20 @@ public:
void user_flushed();
- uint64_t append_write_event(uint64_t offset, size_t length,
+ uint64_t append_write_event(const io::Extents &image_extents,
const bufferlist &bl,
bool flush_entry);
+ uint64_t append_write_same_event(const io::Extents &image_extents,
+ const bufferlist &bl,
+ bool flush_entry);
uint64_t append_compare_and_write_event(uint64_t offset,
size_t length,
const bufferlist &cmp_bl,
const bufferlist &write_bl,
bool flush_entry);
+ uint64_t append_discard_event(const io::Extents &image_extents,
+ uint32_t discard_granularity_bytes,
+ bool flush_entry);
uint64_t append_io_event(journal::EventEntry &&event_entry,
uint64_t offset, size_t length,
bool flush_entry, int filter_ret_val);
@@ -200,11 +207,13 @@ private:
Event() {
}
- Event(const Futures &_futures, uint64_t offset, size_t length,
+ Event(const Futures &_futures, const io::Extents &image_extents,
int filter_ret_val)
: futures(_futures), filter_ret_val(filter_ret_val) {
- if (length > 0) {
- pending_extents.insert(offset, length);
+ for (auto &extent : image_extents) {
+ if (extent.second > 0) {
+ pending_extents.insert(extent.first, extent.second);
+ }
}
}
};
@@ -322,9 +331,13 @@ private:
bool is_journal_replaying(const ceph::mutex &) const;
bool is_tag_owner(const ceph::mutex &) const;
+ void add_write_event_entries(uint64_t offset, size_t length,
+ const bufferlist &bl,
+ uint64_t buffer_offset,
+ Bufferlists *bufferlists);
uint64_t append_io_events(journal::EventType event_type,
const Bufferlists &bufferlists,
- uint64_t offset, size_t length, bool flush_entry,
+ const io::Extents &extents, bool flush_entry,
int filter_ret_val);
Future wait_event(ceph::mutex &lock, uint64_t tid, Context *on_safe);
diff --git a/src/librbd/ObjectMap.h b/src/librbd/ObjectMap.h
index 8b5b352ef..35ea4cb88 100644
--- a/src/librbd/ObjectMap.h
+++ b/src/librbd/ObjectMap.h
@@ -45,6 +45,12 @@ public:
return m_object_map.size();
}
+ template <typename F, typename... Args>
+ auto with_object_map(F&& f, Args&&... args) const {
+ std::shared_lock locker(m_lock);
+ return std::forward<F>(f)(m_object_map, std::forward<Args>(args)...);
+ }
+
inline void set_state(uint64_t object_no, uint8_t new_state,
const boost::optional<uint8_t> &current_state) {
std::unique_lock locker{m_lock};
diff --git a/src/librbd/api/DiffIterate.cc b/src/librbd/api/DiffIterate.cc
index b400b5d5a..717110bd3 100644
--- a/src/librbd/api/DiffIterate.cc
+++ b/src/librbd/api/DiffIterate.cc
@@ -2,6 +2,7 @@
// vim: ts=8 sw=2 smarttab
#include "librbd/api/DiffIterate.h"
+#include "librbd/ExclusiveLock.h"
#include "librbd/ImageCtx.h"
#include "librbd/ImageState.h"
#include "librbd/ObjectMap.h"
@@ -30,6 +31,8 @@ namespace api {
namespace {
+constexpr uint32_t LOCK_INTERVAL_SECONDS = 5;
+
struct DiffContext {
DiffIterate<>::Callback callback;
void *callback_arg;
@@ -149,12 +152,42 @@ private:
}
};
-int simple_diff_cb(uint64_t off, size_t len, int exists, void *arg) {
- // it's possible for a discard to create a hole in the parent image -- ignore
- if (exists) {
- interval_set<uint64_t> *diff = static_cast<interval_set<uint64_t> *>(arg);
- diff->insert(off, len);
+template <typename I>
+bool should_try_acquire_lock(I* image_ctx) {
+ if (image_ctx->exclusive_lock == nullptr ||
+ image_ctx->exclusive_lock->is_lock_owner()) {
+ return false;
+ }
+ if ((image_ctx->features & RBD_FEATURE_FAST_DIFF) == 0) {
+ return false;
+ }
+
+ utime_t now = ceph_clock_now();
+ utime_t cutoff = now - utime_t(LOCK_INTERVAL_SECONDS, 0);
+
+ {
+ std::shared_lock timestamp_locker{image_ctx->timestamp_lock};
+ if (image_ctx->diff_iterate_lock_timestamp > cutoff) {
+ return false;
+ }
+ }
+
+ std::unique_lock timestamp_locker{image_ctx->timestamp_lock};
+ if (image_ctx->diff_iterate_lock_timestamp > cutoff) {
+ return false;
}
+
+ image_ctx->diff_iterate_lock_timestamp = now;
+ return true;
+}
+
+int simple_diff_cb(uint64_t off, size_t len, int exists, void *arg) {
+ // This reads the existing extents in a parent from the beginning
+ // of time. Since images are thin-provisioned, the extents will
+ // always represent data, not holes.
+ ceph_assert(exists);
+ auto diff = static_cast<interval_set<uint64_t>*>(arg);
+ diff->insert(off, len);
return 0;
}
@@ -167,10 +200,14 @@ int DiffIterate<I>::diff_iterate(I *ictx,
uint64_t off, uint64_t len,
bool include_parent, bool whole_object,
int (*cb)(uint64_t, size_t, int, void *),
- void *arg)
-{
- ldout(ictx->cct, 20) << "diff_iterate " << ictx << " off = " << off
- << " len = " << len << dendl;
+ void *arg) {
+ ldout(ictx->cct, 10) << "from_snap_namespace=" << from_snap_namespace
+ << ", fromsnapname=" << (fromsnapname ?: "")
+ << ", off=" << off
+ << ", len=" << len
+ << ", include_parent=" << include_parent
+ << ", whole_object=" << whole_object
+ << dendl;
if (!ictx->data_ctx.is_valid()) {
return -ENODEV;
@@ -197,11 +234,28 @@ int DiffIterate<I>::diff_iterate(I *ictx,
return r;
}
- ictx->image_lock.lock_shared();
- r = clip_io(ictx, off, &len, io::ImageArea::DATA);
- ictx->image_lock.unlock_shared();
- if (r < 0) {
- return r;
+ {
+ std::shared_lock owner_locker{ictx->owner_lock};
+ std::shared_lock image_locker{ictx->image_lock};
+
+ r = clip_io(ictx, off, &len, io::ImageArea::DATA);
+ if (r < 0) {
+ return r;
+ }
+
+ // optimization: hang onto the only object map needed to run fast
+ // diff against the beginning of time -- it's loaded when exclusive
+ // lock is acquired
+ // acquire exclusive lock only if not busy (i.e. don't request),
+ // throttle acquisition attempts and ignore errors
+ if (fromsnapname == nullptr && whole_object &&
+ should_try_acquire_lock(ictx)) {
+ C_SaferCond lock_ctx;
+ ictx->exclusive_lock->try_acquire_lock(&lock_ctx);
+ image_locker.unlock();
+ owner_locker.unlock();
+ lock_ctx.wait();
+ }
}
DiffIterate command(*ictx, from_snap_namespace, fromsnapname, off, len,
@@ -211,6 +265,29 @@ int DiffIterate<I>::diff_iterate(I *ictx,
}
template <typename I>
+std::pair<uint64_t, uint64_t> DiffIterate<I>::calc_object_diff_range() {
+ uint64_t period = m_image_ctx.get_stripe_period();
+ uint64_t first_period_off = round_down_to(m_offset, period);
+ uint64_t last_period_off = round_down_to(m_offset + m_length - 1, period);
+
+ striper::LightweightObjectExtents object_extents;
+ if (first_period_off != last_period_off) {
+ // map only the tail of the first period and the front of the last
+ // period instead of the entire range for efficiency
+ Striper::file_to_extents(m_image_ctx.cct, &m_image_ctx.layout,
+ m_offset, first_period_off + period - m_offset,
+ 0, 0, &object_extents);
+ Striper::file_to_extents(m_image_ctx.cct, &m_image_ctx.layout,
+ last_period_off, m_offset + m_length - last_period_off,
+ 0, 0, &object_extents);
+ } else {
+ Striper::file_to_extents(m_image_ctx.cct, &m_image_ctx.layout, m_offset,
+ m_length, 0, 0, &object_extents);
+ }
+ return {object_extents.front().object_no, object_extents.back().object_no + 1};
+}
+
+template <typename I>
int DiffIterate<I>::execute() {
CephContext* cct = m_image_ctx.cct;
@@ -244,20 +321,24 @@ int DiffIterate<I>::execute() {
int r;
bool fast_diff_enabled = false;
+ uint64_t start_object_no, end_object_no;
BitVector<2> object_diff_state;
interval_set<uint64_t> parent_diff;
if (m_whole_object) {
+ std::tie(start_object_no, end_object_no) = calc_object_diff_range();
+
C_SaferCond ctx;
auto req = object_map::DiffRequest<I>::create(&m_image_ctx, from_snap_id,
- end_snap_id,
+ end_snap_id, start_object_no,
+ end_object_no,
&object_diff_state, &ctx);
req->send();
-
r = ctx.wait();
if (r < 0) {
ldout(cct, 5) << "fast diff disabled" << dendl;
} else {
ldout(cct, 5) << "fast diff enabled" << dendl;
+ ceph_assert(object_diff_state.size() == end_object_no - start_object_no);
fast_diff_enabled = true;
// check parent overlap only if we are comparing to the beginning of time
@@ -265,12 +346,14 @@ int DiffIterate<I>::execute() {
std::shared_lock image_locker{m_image_ctx.image_lock};
uint64_t raw_overlap = 0;
m_image_ctx.get_parent_overlap(m_image_ctx.snap_id, &raw_overlap);
- auto overlap = m_image_ctx.reduce_parent_overlap(raw_overlap, false);
- if (overlap.first > 0 && overlap.second == io::ImageArea::DATA) {
+ io::Extents parent_extents = {{m_offset, m_length}};
+ if (m_image_ctx.prune_parent_extents(parent_extents, io::ImageArea::DATA,
+ raw_overlap, false) > 0) {
ldout(cct, 10) << " first getting parent diff" << dendl;
- DiffIterate diff_parent(*m_image_ctx.parent, {}, nullptr, 0,
- overlap.first, true, true, &simple_diff_cb,
- &parent_diff);
+ DiffIterate diff_parent(*m_image_ctx.parent, {}, nullptr,
+ parent_extents[0].first,
+ parent_extents[0].second, true, true,
+ &simple_diff_cb, &parent_diff);
r = diff_parent.execute();
if (r < 0) {
return r;
@@ -292,7 +375,7 @@ int DiffIterate<I>::execute() {
uint64_t left = m_length;
while (left > 0) {
- uint64_t period_off = off - (off % period);
+ uint64_t period_off = round_down_to(off, period);
uint64_t read_len = std::min(period_off + period - off, left);
if (fast_diff_enabled) {
@@ -307,7 +390,8 @@ int DiffIterate<I>::execute() {
io::SparseExtents aggregate_sparse_extents;
for (auto& [object, extents] : object_extents) {
const uint64_t object_no = extents.front().objectno;
- uint8_t diff_state = object_diff_state[object_no];
+ ceph_assert(object_no >= start_object_no && object_no < end_object_no);
+ uint8_t diff_state = object_diff_state[object_no - start_object_no];
ldout(cct, 20) << "object " << object << ": diff_state="
<< (int)diff_state << dendl;
diff --git a/src/librbd/api/DiffIterate.h b/src/librbd/api/DiffIterate.h
index e6074d9cb..c53b0e995 100644
--- a/src/librbd/api/DiffIterate.h
+++ b/src/librbd/api/DiffIterate.h
@@ -7,6 +7,7 @@
#include "include/int_types.h"
#include "common/bit_vector.hpp"
#include "cls/rbd/cls_rbd_types.h"
+#include <utility>
namespace librbd {
@@ -51,11 +52,9 @@ private:
{
}
- int execute();
-
- int diff_object_map(uint64_t from_snap_id, uint64_t to_snap_id,
- BitVector<2>* object_diff_state);
+ std::pair<uint64_t, uint64_t> calc_object_diff_range();
+ int execute();
};
} // namespace api
diff --git a/src/librbd/api/Snapshot.cc b/src/librbd/api/Snapshot.cc
index 03cefbd1c..306ddb593 100644
--- a/src/librbd/api/Snapshot.cc
+++ b/src/librbd/api/Snapshot.cc
@@ -378,7 +378,9 @@ int Snapshot<I>::remove(I *ictx, const char *snap_name, uint32_t flags,
template <typename I>
int Snapshot<I>::get_timestamp(I *ictx, uint64_t snap_id, struct timespec *timestamp) {
auto snap_it = ictx->snap_info.find(snap_id);
- ceph_assert(snap_it != ictx->snap_info.end());
+ if (snap_it == ictx->snap_info.end()) {
+ return -ENOENT;
+ }
utime_t time = snap_it->second.timestamp;
time.to_timespec(timestamp);
return 0;
diff --git a/src/librbd/deep_copy/ImageCopyRequest.cc b/src/librbd/deep_copy/ImageCopyRequest.cc
index 08e959dd5..668808340 100644
--- a/src/librbd/deep_copy/ImageCopyRequest.cc
+++ b/src/librbd/deep_copy/ImageCopyRequest.cc
@@ -101,9 +101,10 @@ void ImageCopyRequest<I>::compute_diff() {
auto ctx = create_context_callback<
ImageCopyRequest<I>, &ImageCopyRequest<I>::handle_compute_diff>(this);
- auto req = object_map::DiffRequest<I>::create(m_src_image_ctx, m_src_snap_id_start,
- m_src_snap_id_end, &m_object_diff_state,
- ctx);
+ auto req = object_map::DiffRequest<I>::create(m_src_image_ctx,
+ m_src_snap_id_start,
+ m_src_snap_id_end, 0, UINT64_MAX,
+ &m_object_diff_state, ctx);
req->send();
}
diff --git a/src/librbd/io/ImageRequest.cc b/src/librbd/io/ImageRequest.cc
index e4c41c229..fb9f8944e 100644
--- a/src/librbd/io/ImageRequest.cc
+++ b/src/librbd/io/ImageRequest.cc
@@ -473,7 +473,7 @@ void AbstractImageWriteRequest<I>::send_request() {
if (journaling) {
// in-flight ops are flushed prior to closing the journal
ceph_assert(image_ctx.journal != NULL);
- journal_tid = append_journal_event(m_synchronous);
+ journal_tid = append_journal_event();
}
// it's very important that IOContext is captured here instead of
@@ -518,22 +518,12 @@ void ImageWriteRequest<I>::assemble_extent(
}
template <typename I>
-uint64_t ImageWriteRequest<I>::append_journal_event(bool synchronous) {
+uint64_t ImageWriteRequest<I>::append_journal_event() {
I &image_ctx = this->m_image_ctx;
- uint64_t tid = 0;
- uint64_t buffer_offset = 0;
ceph_assert(!this->m_image_extents.empty());
- for (auto &extent : this->m_image_extents) {
- bufferlist sub_bl;
- sub_bl.substr_of(m_bl, buffer_offset, extent.second);
- buffer_offset += extent.second;
-
- tid = image_ctx.journal->append_write_event(extent.first, extent.second,
- sub_bl, synchronous);
- }
-
- return tid;
+ return image_ctx.journal->append_write_event(
+ this->m_image_extents, m_bl, false);
}
template <typename I>
@@ -566,22 +556,12 @@ void ImageWriteRequest<I>::update_stats(size_t length) {
}
template <typename I>
-uint64_t ImageDiscardRequest<I>::append_journal_event(bool synchronous) {
+uint64_t ImageDiscardRequest<I>::append_journal_event() {
I &image_ctx = this->m_image_ctx;
- uint64_t tid = 0;
ceph_assert(!this->m_image_extents.empty());
- for (auto &extent : this->m_image_extents) {
- journal::EventEntry event_entry(
- journal::AioDiscardEvent(extent.first,
- extent.second,
- this->m_discard_granularity_bytes));
- tid = image_ctx.journal->append_io_event(std::move(event_entry),
- extent.first, extent.second,
- synchronous, 0);
- }
-
- return tid;
+ return image_ctx.journal->append_discard_event(
+ this->m_image_extents, m_discard_granularity_bytes, false);
}
template <typename I>
@@ -717,21 +697,12 @@ void ImageFlushRequest<I>::send_request() {
}
template <typename I>
-uint64_t ImageWriteSameRequest<I>::append_journal_event(bool synchronous) {
+uint64_t ImageWriteSameRequest<I>::append_journal_event() {
I &image_ctx = this->m_image_ctx;
- uint64_t tid = 0;
ceph_assert(!this->m_image_extents.empty());
- for (auto &extent : this->m_image_extents) {
- journal::EventEntry event_entry(journal::AioWriteSameEvent(extent.first,
- extent.second,
- m_data_bl));
- tid = image_ctx.journal->append_io_event(std::move(event_entry),
- extent.first, extent.second,
- synchronous, 0);
- }
-
- return tid;
+ return image_ctx.journal->append_write_same_event(
+ this->m_image_extents, m_data_bl, false);
}
template <typename I>
@@ -768,8 +739,7 @@ void ImageWriteSameRequest<I>::update_stats(size_t length) {
}
template <typename I>
-uint64_t ImageCompareAndWriteRequest<I>::append_journal_event(
- bool synchronous) {
+uint64_t ImageCompareAndWriteRequest<I>::append_journal_event() {
I &image_ctx = this->m_image_ctx;
uint64_t tid = 0;
@@ -779,7 +749,7 @@ uint64_t ImageCompareAndWriteRequest<I>::append_journal_event(
extent.second,
m_cmp_bl,
m_bl,
- synchronous);
+ false);
return tid;
}
diff --git a/src/librbd/io/ImageRequest.h b/src/librbd/io/ImageRequest.h
index 2668c1acb..996c90a11 100644
--- a/src/librbd/io/ImageRequest.h
+++ b/src/librbd/io/ImageRequest.h
@@ -114,11 +114,6 @@ private:
template <typename ImageCtxT = ImageCtx>
class AbstractImageWriteRequest : public ImageRequest<ImageCtxT> {
-public:
- inline void flag_synchronous() {
- m_synchronous = true;
- }
-
protected:
using typename ImageRequest<ImageCtxT>::ObjectRequests;
@@ -127,8 +122,7 @@ protected:
const char *trace_name,
const ZTracer::Trace &parent_trace)
: ImageRequest<ImageCtxT>(image_ctx, aio_comp, std::move(image_extents),
- area, trace_name, parent_trace),
- m_synchronous(false) {
+ area, trace_name, parent_trace) {
}
void send_request() override;
@@ -144,11 +138,8 @@ protected:
const LightweightObjectExtent &object_extent, IOContext io_context,
uint64_t journal_tid, bool single_extent, Context *on_finish) = 0;
- virtual uint64_t append_journal_event(bool synchronous) = 0;
+ virtual uint64_t append_journal_event() = 0;
virtual void update_stats(size_t length) = 0;
-
-private:
- bool m_synchronous;
};
template <typename ImageCtxT = ImageCtx>
@@ -180,7 +171,7 @@ protected:
const LightweightObjectExtent &object_extent, IOContext io_context,
uint64_t journal_tid, bool single_extent, Context *on_finish) override;
- uint64_t append_journal_event(bool synchronous) override;
+ uint64_t append_journal_event() override;
void update_stats(size_t length) override;
private:
@@ -215,7 +206,7 @@ protected:
const LightweightObjectExtent &object_extent, IOContext io_context,
uint64_t journal_tid, bool single_extent, Context *on_finish) override;
- uint64_t append_journal_event(bool synchronous) override;
+ uint64_t append_journal_event() override;
void update_stats(size_t length) override;
int prune_object_extents(
@@ -283,7 +274,7 @@ protected:
const LightweightObjectExtent &object_extent, IOContext io_context,
uint64_t journal_tid, bool single_extent, Context *on_finish) override;
- uint64_t append_journal_event(bool synchronous) override;
+ uint64_t append_journal_event() override;
void update_stats(size_t length) override;
private:
bufferlist m_data_bl;
@@ -315,7 +306,7 @@ protected:
const LightweightObjectExtent &object_extent, IOContext io_context,
uint64_t journal_tid, bool single_extent, Context *on_finish) override;
- uint64_t append_journal_event(bool synchronous) override;
+ uint64_t append_journal_event() override;
void update_stats(size_t length) override;
aio_type_t get_aio_type() const override {
diff --git a/src/librbd/io/ObjectRequest.cc b/src/librbd/io/ObjectRequest.cc
index 6d246cdf3..fc1a96858 100644
--- a/src/librbd/io/ObjectRequest.cc
+++ b/src/librbd/io/ObjectRequest.cc
@@ -834,16 +834,17 @@ void ObjectListSnapsRequest<I>::handle_list_snaps(int r) {
end_snap_id, &diff, &end_size, &exists,
&clone_end_snap_id, &read_whole_object);
- if (read_whole_object ||
- (!diff.empty() &&
- ((m_list_snaps_flags & LIST_SNAPS_FLAG_WHOLE_OBJECT) != 0))) {
+ if (read_whole_object) {
ldout(cct, 1) << "need to read full object" << dendl;
- diff.clear();
diff.insert(0, image_ctx->layout.object_size);
+ exists = true;
end_size = image_ctx->layout.object_size;
clone_end_snap_id = end_snap_id;
- } else if (!exists) {
- end_size = 0;
+ } else if ((m_list_snaps_flags & LIST_SNAPS_FLAG_WHOLE_OBJECT) != 0 &&
+ !diff.empty()) {
+ ldout(cct, 20) << "expanding diff from " << diff << dendl;
+ diff.clear();
+ diff.insert(0, image_ctx->layout.object_size);
}
if (exists) {
@@ -863,7 +864,8 @@ void ObjectListSnapsRequest<I>::handle_list_snaps(int r) {
// clip diff to size of object (in case it was truncated)
interval_set<uint64_t> zero_interval;
- if (end_size < prev_end_size) {
+ if (end_size < prev_end_size &&
+ (m_list_snaps_flags & LIST_SNAPS_FLAG_WHOLE_OBJECT) == 0) {
zero_interval.insert(end_size, prev_end_size - end_size);
zero_interval.intersection_of(object_interval);
@@ -884,7 +886,7 @@ void ObjectListSnapsRequest<I>::handle_list_snaps(int r) {
<< "end_size=" << end_size << ", "
<< "prev_end_size=" << prev_end_size << ", "
<< "exists=" << exists << ", "
- << "whole_object=" << read_whole_object << dendl;
+ << "read_whole_object=" << read_whole_object << dendl;
// check if object exists prior to start of incremental snap delta so that
// we don't DNE the object if no additional deltas exist
diff --git a/src/librbd/io/Types.h b/src/librbd/io/Types.h
index 7c70986c5..03e9ffa3b 100644
--- a/src/librbd/io/Types.h
+++ b/src/librbd/io/Types.h
@@ -180,8 +180,9 @@ struct SparseExtent {
std::ostream& operator<<(std::ostream& os, const SparseExtent& state);
struct SparseExtentSplitMerge {
- SparseExtent split(uint64_t offset, uint64_t length, SparseExtent &se) const {
- return SparseExtent(se.state, se.length);
+ SparseExtent split(uint64_t offset, uint64_t length,
+ const SparseExtent& se) const {
+ return SparseExtent(se.state, length);
}
bool can_merge(const SparseExtent& left, const SparseExtent& right) const {
@@ -232,10 +233,10 @@ struct SparseBufferlistExtent : public SparseExtent {
struct SparseBufferlistExtentSplitMerge {
SparseBufferlistExtent split(uint64_t offset, uint64_t length,
- SparseBufferlistExtent& sbe) const {
+ const SparseBufferlistExtent& sbe) const {
ceph::bufferlist bl;
if (sbe.state == SPARSE_EXTENT_STATE_DATA) {
- bl.substr_of(bl, offset, length);
+ bl.substr_of(sbe.bl, offset, length);
}
return SparseBufferlistExtent(sbe.state, length, std::move(bl));
}
@@ -247,14 +248,13 @@ struct SparseBufferlistExtentSplitMerge {
SparseBufferlistExtent merge(SparseBufferlistExtent&& left,
SparseBufferlistExtent&& right) const {
+ ceph::bufferlist bl;
if (left.state == SPARSE_EXTENT_STATE_DATA) {
- ceph::bufferlist bl{std::move(left.bl)};
- bl.claim_append(std::move(right.bl));
- return SparseBufferlistExtent(SPARSE_EXTENT_STATE_DATA,
- bl.length(), std::move(bl));
- } else {
- return SparseBufferlistExtent(left.state, left.length + right.length, {});
+ bl.claim_append(left.bl);
+ bl.claim_append(right.bl);
}
+ return SparseBufferlistExtent(left.state, left.length + right.length,
+ std::move(bl));
}
uint64_t length(const SparseBufferlistExtent& sbe) const {
diff --git a/src/librbd/object_map/DiffRequest.cc b/src/librbd/object_map/DiffRequest.cc
index 606d48bbf..acaf31a39 100644
--- a/src/librbd/object_map/DiffRequest.cc
+++ b/src/librbd/object_map/DiffRequest.cc
@@ -21,6 +21,193 @@ namespace object_map {
using util::create_rados_callback;
template <typename I>
+DiffRequest<I>::DiffRequest(I* image_ctx,
+ uint64_t snap_id_start, uint64_t snap_id_end,
+ uint64_t start_object_no, uint64_t end_object_no,
+ BitVector<2>* object_diff_state,
+ Context* on_finish)
+ : m_image_ctx(image_ctx), m_snap_id_start(snap_id_start),
+ m_snap_id_end(snap_id_end), m_start_object_no(start_object_no),
+ m_end_object_no(end_object_no), m_object_diff_state(object_diff_state),
+ m_on_finish(on_finish) {
+ auto cct = m_image_ctx->cct;
+ ldout(cct, 10) << "snap_id_start=" << m_snap_id_start
+ << ", snap_id_end=" << m_snap_id_end
+ << ", start_object_no=" << m_start_object_no
+ << ", end_object_no=" << m_end_object_no
+ << dendl;
+}
+
+template <typename I>
+bool DiffRequest<I>::is_diff_iterate() const {
+ return m_start_object_no != 0 || m_end_object_no != UINT64_MAX;
+}
+
+template <typename I>
+int DiffRequest<I>::prepare_for_object_map() {
+ ceph_assert(ceph_mutex_is_locked(m_image_ctx->image_lock));
+
+ auto cct = m_image_ctx->cct;
+ ldout(cct, 20) << "snap_id=" << m_current_snap_id << dendl;
+
+ if ((m_image_ctx->features & RBD_FEATURE_FAST_DIFF) == 0) {
+ ldout(cct, 10) << "fast-diff feature not enabled" << dendl;
+ return -EINVAL;
+ }
+
+ if (m_current_snap_id == CEPH_NOSNAP) {
+ m_current_size = m_image_ctx->size;
+ } else {
+ auto snap_it = m_image_ctx->snap_info.find(m_current_snap_id);
+ if (snap_it == m_image_ctx->snap_info.end()) {
+ ldout(cct, 10) << "snapshot " << m_current_snap_id << " does not exist"
+ << dendl;
+ return -ENOENT;
+ }
+ m_current_size = snap_it->second.size;
+ }
+
+ uint64_t flags;
+ int r = m_image_ctx->get_flags(m_current_snap_id, &flags);
+ ceph_assert(r == 0);
+
+ if ((flags & RBD_FLAG_FAST_DIFF_INVALID) != 0) {
+ ldout(cct, 1) << "cannot perform fast diff on invalid object map"
+ << dendl;
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+template <typename I>
+int DiffRequest<I>::process_object_map(const BitVector<2>& object_map) {
+ auto cct = m_image_ctx->cct;
+ ldout(cct, 20) << "snap_id=" << m_current_snap_id << dendl;
+
+ uint64_t num_objs = Striper::get_num_objects(m_image_ctx->layout,
+ m_current_size);
+ if (object_map.size() < num_objs) {
+ ldout(cct, 1) << "object map too small: "
+ << object_map.size() << " < " << num_objs << dendl;
+ return -EINVAL;
+ }
+
+ uint64_t start_object_no, end_object_no;
+ uint64_t prev_object_diff_state_size = m_object_diff_state->size();
+ if (is_diff_iterate()) {
+ start_object_no = std::min(m_start_object_no, num_objs);
+ end_object_no = std::min(m_end_object_no, num_objs);
+ uint64_t num_objs_in_range = end_object_no - start_object_no;
+ if (m_object_diff_state->size() != num_objs_in_range) {
+ m_object_diff_state->resize(num_objs_in_range);
+ }
+ } else {
+ // for deep-copy, the object diff state should be the largest of
+ // all versions in the set, so it's only ever grown
+ // shrink is handled by flagging trimmed objects as non-existent
+ // and comparing against the previous object diff state as usual
+ if (m_object_diff_state->size() < num_objs) {
+ m_object_diff_state->resize(num_objs);
+ }
+ start_object_no = 0;
+ end_object_no = m_object_diff_state->size();
+ }
+
+ uint64_t overlap = std::min(m_object_diff_state->size(),
+ prev_object_diff_state_size);
+ auto it = object_map.begin() + start_object_no;
+ auto diff_it = m_object_diff_state->begin();
+ uint64_t ono = start_object_no;
+ for (; ono < start_object_no + overlap; ++diff_it, ++ono) {
+ uint8_t object_map_state = (ono < num_objs ? *it++ : OBJECT_NONEXISTENT);
+ uint8_t prev_object_diff_state = *diff_it;
+ switch (prev_object_diff_state) {
+ case DIFF_STATE_HOLE:
+ if (object_map_state != OBJECT_NONEXISTENT) {
+ // stay in HOLE on intermediate snapshots for diff-iterate
+ if (!is_diff_iterate() || m_current_snap_id == m_snap_id_end) {
+ *diff_it = DIFF_STATE_DATA_UPDATED;
+ }
+ }
+ break;
+ case DIFF_STATE_DATA:
+ if (object_map_state == OBJECT_NONEXISTENT) {
+ *diff_it = DIFF_STATE_HOLE_UPDATED;
+ } else if (object_map_state != OBJECT_EXISTS_CLEAN) {
+ *diff_it = DIFF_STATE_DATA_UPDATED;
+ }
+ break;
+ case DIFF_STATE_HOLE_UPDATED:
+ if (object_map_state != OBJECT_NONEXISTENT) {
+ *diff_it = DIFF_STATE_DATA_UPDATED;
+ }
+ break;
+ case DIFF_STATE_DATA_UPDATED:
+ if (object_map_state == OBJECT_NONEXISTENT) {
+ *diff_it = DIFF_STATE_HOLE_UPDATED;
+ }
+ break;
+ default:
+ ceph_abort();
+ }
+
+ ldout(cct, 20) << "object state: " << ono << " "
+ << static_cast<uint32_t>(prev_object_diff_state)
+ << "->" << static_cast<uint32_t>(*diff_it) << " ("
+ << static_cast<uint32_t>(object_map_state) << ")"
+ << dendl;
+ }
+ ldout(cct, 20) << "computed overlap diffs" << dendl;
+
+ ceph_assert(diff_it == m_object_diff_state->end() ||
+ end_object_no <= num_objs);
+ for (; ono < end_object_no; ++it, ++diff_it, ++ono) {
+ uint8_t object_map_state = *it;
+ if (object_map_state == OBJECT_NONEXISTENT) {
+ *diff_it = DIFF_STATE_HOLE;
+ } else if (m_current_snap_id != m_snap_id_start) {
+ // diffing against the beginning of time or image was grown
+ // (implicit) starting state is HOLE, this is the first object
+ // map after
+ if (is_diff_iterate()) {
+ // for diff-iterate, if the object is discarded prior to or
+ // in the end version, result should be HOLE
+ // since DATA_UPDATED can transition only to HOLE_UPDATED,
+ // stay in HOLE on intermediate snapshots -- another way to
+ // put this is that when starting with a hole, intermediate
+ // snapshots can be ignored as the result depends only on the
+ // end version
+ if (m_current_snap_id == m_snap_id_end) {
+ *diff_it = DIFF_STATE_DATA_UPDATED;
+ } else {
+ *diff_it = DIFF_STATE_HOLE;
+ }
+ } else {
+ // for deep-copy, if the object is discarded prior to or
+ // in the end version, result should be HOLE_UPDATED
+ *diff_it = DIFF_STATE_DATA_UPDATED;
+ }
+ } else {
+ // diffing against a snapshot, this is its object map
+ if (object_map_state != OBJECT_PENDING) {
+ *diff_it = DIFF_STATE_DATA;
+ } else {
+ *diff_it = DIFF_STATE_DATA_UPDATED;
+ }
+ }
+
+ ldout(cct, 20) << "object state: " << ono << " "
+ << "->" << static_cast<uint32_t>(*diff_it) << " ("
+ << static_cast<uint32_t>(*it) << ")" << dendl;
+ }
+ ldout(cct, 20) << "computed resize diffs" << dendl;
+
+ ceph_assert(diff_it == m_object_diff_state->end());
+ return 0;
+}
+
+template <typename I>
void DiffRequest<I>::send() {
auto cct = m_image_ctx->cct;
@@ -30,24 +217,62 @@ void DiffRequest<I>::send() {
<< "snap_id_end=" << m_snap_id_end << dendl;
finish(-EINVAL);
return;
- } else if (m_snap_id_start == m_snap_id_end) {
- // no delta between the same snapshot
- finish(0);
+ }
+ if (m_start_object_no == UINT64_MAX || m_start_object_no > m_end_object_no ||
+ (m_start_object_no != 0 && m_end_object_no == UINT64_MAX)) {
+ lderr(cct) << "invalid start/end object numbers: "
+ << "start_object_no=" << m_start_object_no << ", "
+ << "end_object_no=" << m_end_object_no << dendl;
+ finish(-EINVAL);
return;
}
m_object_diff_state->clear();
- // collect all the snap ids in the provided range (inclusive)
- if (m_snap_id_start != 0) {
- m_snap_ids.insert(m_snap_id_start);
+ if (m_snap_id_start == m_snap_id_end) {
+ // no delta between the same snapshot
+ finish(0);
+ return;
+ }
+ if (m_start_object_no == m_end_object_no) {
+ // no objects in the provided range (half-open)
+ finish(0);
+ return;
}
std::shared_lock image_locker{m_image_ctx->image_lock};
- auto snap_info_it = m_image_ctx->snap_info.upper_bound(m_snap_id_start);
- auto snap_info_it_end = m_image_ctx->snap_info.lower_bound(m_snap_id_end);
- for (; snap_info_it != snap_info_it_end; ++snap_info_it) {
- m_snap_ids.insert(snap_info_it->first);
+ if (is_diff_iterate() &&
+ m_snap_id_start == 0 &&
+ m_snap_id_end == m_image_ctx->snap_id &&
+ m_image_ctx->object_map != nullptr) {
+ ldout(cct, 10) << "using in-memory object map" << dendl;
+ m_current_snap_id = m_snap_id_end;
+
+ int r = prepare_for_object_map();
+ if (r == 0) {
+ r = m_image_ctx->object_map->with_object_map(
+ [this](const BitVector<2>& object_map) {
+ return process_object_map(object_map);
+ });
+ }
+ image_locker.unlock();
+
+ finish(r);
+ return;
+ }
+
+ // collect all the snap ids in the provided range (inclusive) unless
+ // this is diff-iterate against the beginning of time, in which case
+ // only the end version matters
+ if (!is_diff_iterate() || m_snap_id_start != 0) {
+ if (m_snap_id_start != 0) {
+ m_snap_ids.insert(m_snap_id_start);
+ }
+ auto snap_info_it = m_image_ctx->snap_info.upper_bound(m_snap_id_start);
+ auto snap_info_it_end = m_image_ctx->snap_info.lower_bound(m_snap_id_end);
+ for (; snap_info_it != snap_info_it_end; ++snap_info_it) {
+ m_snap_ids.insert(snap_info_it->first);
+ }
}
m_snap_ids.insert(m_snap_id_end);
@@ -72,59 +297,23 @@ void DiffRequest<I>::load_object_map(
auto cct = m_image_ctx->cct;
ldout(cct, 10) << "snap_id=" << m_current_snap_id << dendl;
- if ((m_image_ctx->features & RBD_FEATURE_FAST_DIFF) == 0) {
- image_locker->unlock();
-
- ldout(cct, 10) << "fast-diff feature not enabled" << dendl;
- finish(-EINVAL);
- return;
- }
-
// ignore ENOENT with intermediate snapshots since deleted
// snaps will get merged with later snapshots
m_ignore_enoent = (m_current_snap_id != m_snap_id_start &&
m_current_snap_id != m_snap_id_end);
- if (m_current_snap_id == CEPH_NOSNAP) {
- m_current_size = m_image_ctx->size;
- } else {
- auto snap_it = m_image_ctx->snap_info.find(m_current_snap_id);
- if (snap_it == m_image_ctx->snap_info.end()) {
- ldout(cct, 10) << "snapshot " << m_current_snap_id << " does not exist"
- << dendl;
- if (!m_ignore_enoent) {
- image_locker->unlock();
-
- finish(-ENOENT);
- return;
- }
-
- load_object_map(image_locker);
- return;
- }
-
- m_current_size = snap_it->second.size;
- }
-
- uint64_t flags = 0;
- int r = m_image_ctx->get_flags(m_current_snap_id, &flags);
- if (r < 0) {
+ int r = prepare_for_object_map();
+ if (r == -ENOENT && m_ignore_enoent) {
+ load_object_map(image_locker);
+ return;
+ } else if (r < 0) {
image_locker->unlock();
- lderr(cct) << "failed to retrieve image flags: " << cpp_strerror(r)
- << dendl;
finish(r);
return;
}
image_locker->unlock();
- if ((flags & RBD_FLAG_FAST_DIFF_INVALID) != 0) {
- ldout(cct, 1) << "cannot perform fast diff on invalid object map"
- << dendl;
- finish(-EINVAL);
- return;
- }
-
std::string oid(ObjectMap<>::object_map_name(m_image_ctx->id,
m_current_snap_id));
@@ -144,100 +333,27 @@ void DiffRequest<I>::handle_load_object_map(int r) {
auto cct = m_image_ctx->cct;
ldout(cct, 10) << "r=" << r << dendl;
+ BitVector<2> object_map;
+ std::string oid(ObjectMap<>::object_map_name(m_image_ctx->id,
+ m_current_snap_id));
+
if (r == 0) {
auto bl_it = m_out_bl.cbegin();
- r = cls_client::object_map_load_finish(&bl_it, &m_object_map);
+ r = cls_client::object_map_load_finish(&bl_it, &object_map);
}
-
- std::string oid(ObjectMap<>::object_map_name(m_image_ctx->id,
- m_current_snap_id));
if (r == -ENOENT && m_ignore_enoent) {
ldout(cct, 10) << "object map " << oid << " does not exist" << dendl;
-
- std::shared_lock image_locker{m_image_ctx->image_lock};
- load_object_map(&image_locker);
- return;
} else if (r < 0) {
lderr(cct) << "failed to load object map: " << oid << dendl;
finish(r);
return;
- }
- ldout(cct, 20) << "loaded object map " << oid << dendl;
-
- uint64_t num_objs = Striper::get_num_objects(m_image_ctx->layout,
- m_current_size);
- if (m_object_map.size() < num_objs) {
- ldout(cct, 1) << "object map too small: "
- << m_object_map.size() << " < " << num_objs << dendl;
- finish(-EINVAL);
- return;
} else {
- m_object_map.resize(num_objs);
- }
-
- uint64_t prev_object_diff_state_size = m_object_diff_state->size();
- if (prev_object_diff_state_size < num_objs) {
- // the diff state should be the largest of all snapshots in the set
- m_object_diff_state->resize(num_objs);
- }
- if (m_object_map.size() < m_object_diff_state->size()) {
- // the image was shrunk so expanding the object map will flag end objects
- // as non-existent and they will be compared against the previous object
- // diff state
- m_object_map.resize(m_object_diff_state->size());
- }
-
- uint64_t overlap = std::min(m_object_map.size(), prev_object_diff_state_size);
- auto it = m_object_map.begin();
- auto overlap_end_it = it + overlap;
- auto diff_it = m_object_diff_state->begin();
- uint64_t i = 0;
- for (; it != overlap_end_it; ++it, ++diff_it, ++i) {
- uint8_t object_map_state = *it;
- uint8_t prev_object_diff_state = *diff_it;
- if (object_map_state == OBJECT_EXISTS ||
- object_map_state == OBJECT_PENDING ||
- (object_map_state == OBJECT_EXISTS_CLEAN &&
- prev_object_diff_state != DIFF_STATE_DATA &&
- prev_object_diff_state != DIFF_STATE_DATA_UPDATED)) {
- *diff_it = DIFF_STATE_DATA_UPDATED;
- } else if (object_map_state == OBJECT_NONEXISTENT &&
- prev_object_diff_state != DIFF_STATE_HOLE &&
- prev_object_diff_state != DIFF_STATE_HOLE_UPDATED) {
- *diff_it = DIFF_STATE_HOLE_UPDATED;
- }
-
- ldout(cct, 20) << "object state: " << i << " "
- << static_cast<uint32_t>(prev_object_diff_state)
- << "->" << static_cast<uint32_t>(*diff_it) << " ("
- << static_cast<uint32_t>(object_map_state) << ")"
- << dendl;
- }
- ldout(cct, 20) << "computed overlap diffs" << dendl;
-
- bool diff_from_start = (m_snap_id_start == 0);
- auto end_it = m_object_map.end();
- if (m_object_map.size() > prev_object_diff_state_size) {
- for (; it != end_it; ++it,++diff_it, ++i) {
- uint8_t object_map_state = *it;
- if (object_map_state == OBJECT_NONEXISTENT) {
- *diff_it = DIFF_STATE_HOLE;
- } else if (diff_from_start ||
- (m_object_diff_state_valid &&
- object_map_state != OBJECT_EXISTS_CLEAN)) {
- *diff_it = DIFF_STATE_DATA_UPDATED;
- } else {
- *diff_it = DIFF_STATE_DATA;
- }
-
- ldout(cct, 20) << "object state: " << i << " "
- << "->" << static_cast<uint32_t>(*diff_it) << " ("
- << static_cast<uint32_t>(*it) << ")" << dendl;
+ r = process_object_map(object_map);
+ if (r < 0) {
+ finish(r);
+ return;
}
}
- ldout(cct, 20) << "computed resize diffs" << dendl;
-
- m_object_diff_state_valid = true;
std::shared_lock image_locker{m_image_ctx->image_lock};
load_object_map(&image_locker);
diff --git a/src/librbd/object_map/DiffRequest.h b/src/librbd/object_map/DiffRequest.h
index e83a1629e..740f4e02a 100644
--- a/src/librbd/object_map/DiffRequest.h
+++ b/src/librbd/object_map/DiffRequest.h
@@ -21,21 +21,20 @@ namespace object_map {
template <typename ImageCtxT>
class DiffRequest {
public:
- static DiffRequest* create(ImageCtxT* image_ctx, uint64_t snap_id_start,
- uint64_t snap_id_end,
+ static DiffRequest* create(ImageCtxT* image_ctx,
+ uint64_t snap_id_start, uint64_t snap_id_end,
+ uint64_t start_object_no, uint64_t end_object_no,
BitVector<2>* object_diff_state,
Context* on_finish) {
return new DiffRequest(image_ctx, snap_id_start, snap_id_end,
- object_diff_state, on_finish);
+ start_object_no, end_object_no, object_diff_state,
+ on_finish);
}
- DiffRequest(ImageCtxT* image_ctx, uint64_t snap_id_start,
- uint64_t snap_id_end, BitVector<2>* object_diff_state,
- Context* on_finish)
- : m_image_ctx(image_ctx), m_snap_id_start(snap_id_start),
- m_snap_id_end(snap_id_end), m_object_diff_state(object_diff_state),
- m_on_finish(on_finish) {
- }
+ DiffRequest(ImageCtxT* image_ctx,
+ uint64_t snap_id_start, uint64_t snap_id_end,
+ uint64_t start_object_no, uint64_t end_object_no,
+ BitVector<2>* object_diff_state, Context* on_finish);
void send();
@@ -58,6 +57,8 @@ private:
ImageCtxT* m_image_ctx;
uint64_t m_snap_id_start;
uint64_t m_snap_id_end;
+ uint64_t m_start_object_no;
+ uint64_t m_end_object_no;
BitVector<2>* m_object_diff_state;
Context* m_on_finish;
@@ -67,11 +68,13 @@ private:
uint64_t m_current_size = 0;
- BitVector<2> m_object_map;
- bool m_object_diff_state_valid = false;
-
bufferlist m_out_bl;
+ bool is_diff_iterate() const;
+
+ int prepare_for_object_map();
+ int process_object_map(const BitVector<2>& object_map);
+
void load_object_map(std::shared_lock<ceph::shared_mutex>* image_locker);
void handle_load_object_map(int r);
diff --git a/src/librbd/object_map/Types.h b/src/librbd/object_map/Types.h
index 0ce91bd96..576ea0e4b 100644
--- a/src/librbd/object_map/Types.h
+++ b/src/librbd/object_map/Types.h
@@ -8,10 +8,17 @@ namespace librbd {
namespace object_map {
enum DiffState {
- DIFF_STATE_HOLE = 0, /* unchanged hole */
- DIFF_STATE_DATA = 1, /* unchanged data */
- DIFF_STATE_HOLE_UPDATED = 2, /* new hole */
- DIFF_STATE_DATA_UPDATED = 3 /* new data */
+ // diff-iterate: hole with or without data captured in intermediate snapshot
+ // deep-copy: hole without data captured in intermediate snapshot
+ DIFF_STATE_HOLE = 0,
+ // diff-iterate, deep-copy: unchanged data
+ DIFF_STATE_DATA = 1,
+ // diff-iterate: new hole (data -> hole)
+ // deep-copy: new hole (data -> hole) or hole with data captured in
+ // intermediate snapshot
+ DIFF_STATE_HOLE_UPDATED = 2,
+ // diff-iterate, deep-copy: new data (hole -> data) or changed data
+ DIFF_STATE_DATA_UPDATED = 3
};
} // namespace object_map