// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- // vim: ts=8 sw=2 smarttab #ifndef CEPH_LIBRBD_CACHE_PWL_REQUEST_H #define CEPH_LIBRBD_CACHE_PWL_REQUEST_H #include "include/Context.h" #include "librbd/cache/pwl/Types.h" #include "librbd/cache/pwl/LogOperation.h" namespace librbd { class BlockGuardCell; namespace cache { namespace pwl { class GuardedRequestFunctionContext; struct WriteRequestResources { bool allocated = false; std::vector buffers; }; /** * A request that can be deferred in a BlockGuard to sequence * overlapping operations. * This is the custodian of the BlockGuard cell for this IO, and the * state information about the progress of this IO. This object lives * until the IO is persisted in all (live) log replicas. User request * may be completed from here before the IO persists. */ template class C_BlockIORequest : public Context { public: T &pwl; io::Extents image_extents; bufferlist bl; int fadvise_flags; Context *user_req; /* User write request */ ExtentsSummary image_extents_summary; bool detained = false; /* Detained in blockguard (overlapped with a prior IO) */ utime_t allocated_time; /* When allocation began */ C_BlockIORequest(T &pwl, const utime_t arrived, io::Extents &&extents, bufferlist&& bl, const int fadvise_flags, Context *user_req); ~C_BlockIORequest() override; C_BlockIORequest(const C_BlockIORequest&) = delete; C_BlockIORequest &operator=(const C_BlockIORequest&) = delete; void set_cell(BlockGuardCell *cell); BlockGuardCell *get_cell(void); void release_cell(); void complete_user_request(int r); void finish(int r); virtual void finish_req(int r) = 0; virtual bool alloc_resources() = 0; void deferred(); virtual void deferred_handler() = 0; virtual void dispatch() = 0; virtual void copy_cache() {}; virtual const char *get_name() const { return "C_BlockIORequest"; } uint64_t get_image_extents_size() { return image_extents.size(); } std::vector& get_resources_buffers() { return m_resources.buffers; } void set_allocated(bool allocated) { if (allocated) { m_resources.allocated = true; } else { m_resources.buffers.clear(); } } virtual void setup_buffer_resources( uint64_t *bytes_cached, uint64_t *bytes_dirtied, uint64_t *bytes_allocated, uint64_t *number_lanes, uint64_t *number_log_entries, uint64_t *number_unpublished_reserves) = 0; protected: utime_t m_arrived_time; utime_t m_dispatched_time; /* When dispatch began */ utime_t m_user_req_completed_time; std::atomic m_deferred = {false}; /* Deferred because this or a prior IO had to wait for write resources */ WriteRequestResources m_resources; private: std::atomic m_user_req_completed = {false}; std::atomic m_finish_called = {false}; std::atomic m_cell_released = {false}; BlockGuardCell* m_cell = nullptr; template friend std::ostream &operator<<(std::ostream &os, const C_BlockIORequest &req); }; /** * This is the custodian of the BlockGuard cell for this write. Block * guard is not released until the write persists everywhere (this is * how we guarantee to each log replica that they will never see * overlapping writes). */ template class C_WriteRequest : public C_BlockIORequest { public: using C_BlockIORequest::pwl; bool compare_succeeded = false; uint64_t *mismatch_offset; bufferlist cmp_bl; bufferlist read_bl; bool is_comp_and_write = false; std::unique_ptr op_set = nullptr; C_WriteRequest(T &pwl, const utime_t arrived, io::Extents &&image_extents, bufferlist&& bl, const int fadvise_flags, ceph::mutex &lock, PerfCounters *perfcounter, Context *user_req); C_WriteRequest(T &pwl, const utime_t arrived, io::Extents &&image_extents, bufferlist&& cmp_bl, bufferlist&& bl, uint64_t *mismatch_offset, int fadvise_flags, ceph::mutex &lock, PerfCounters *perfcounter, Context *user_req); ~C_WriteRequest() override; void blockguard_acquired(GuardedRequestFunctionContext &guard_ctx); /* Common finish to plain write and compare-and-write (if it writes) */ void finish_req(int r) override; /* Compare and write will override this */ virtual void update_req_stats(utime_t &now); bool alloc_resources() override; void deferred_handler() override { } void dispatch() override; void copy_cache() override; virtual std::shared_ptr create_operation(uint64_t offset, uint64_t len); virtual void setup_log_operations(DeferredContexts &on_exit); bool append_write_request(std::shared_ptr sync_point); virtual void schedule_append(); const char *get_name() const override { return "C_WriteRequest"; } protected: using C_BlockIORequest::m_resources; PerfCounters *m_perfcounter = nullptr; private: bool m_do_early_flush = false; std::atomic m_appended = {0}; bool m_queued = false; ceph::mutex &m_lock; template friend std::ostream &operator<<(std::ostream &os, const C_WriteRequest &req); }; /** * This is the custodian of the BlockGuard cell for this * aio_flush. Block guard is released as soon as the new * sync point (if required) is created. Subsequent IOs can * proceed while this flush waits for prior IOs to complete * and any required sync points to be persisted. */ template class C_FlushRequest : public C_BlockIORequest { public: using C_BlockIORequest::pwl; bool internal = false; std::shared_ptr to_append; C_FlushRequest(T &pwl, const utime_t arrived, io::Extents &&image_extents, bufferlist&& bl, const int fadvise_flags, ceph::mutex &lock, PerfCounters *perfcounter, Context *user_req); ~C_FlushRequest() override {} bool alloc_resources() override; void dispatch() override; const char *get_name() const override { return "C_FlushRequest"; } void setup_buffer_resources( uint64_t *bytes_cached, uint64_t *bytes_dirtied, uint64_t *bytes_allocated, uint64_t *number_lanes, uint64_t *number_log_entries, uint64_t *number_unpublished_reserves) override; private: std::shared_ptr op; ceph::mutex &m_lock; PerfCounters *m_perfcounter = nullptr; void finish_req(int r) override; void deferred_handler() override { m_perfcounter->inc(l_librbd_pwl_aio_flush_def, 1); } template friend std::ostream &operator<<(std::ostream &os, const C_FlushRequest &req); }; /** * This is the custodian of the BlockGuard cell for this discard. As in the * case of write, the block guard is not released until the discard persists * everywhere. */ template class C_DiscardRequest : public C_BlockIORequest { public: using C_BlockIORequest::pwl; std::shared_ptr op; C_DiscardRequest(T &pwl, const utime_t arrived, io::Extents &&image_extents, uint32_t discard_granularity_bytes, ceph::mutex &lock, PerfCounters *perfcounter, Context *user_req); ~C_DiscardRequest() override; void finish_req(int r) override {} bool alloc_resources() override; void deferred_handler() override { } void setup_log_operations(); void dispatch() override; void blockguard_acquired(GuardedRequestFunctionContext &guard_ctx); const char *get_name() const override { return "C_DiscardRequest"; } void setup_buffer_resources( uint64_t *bytes_cached, uint64_t *bytes_dirtied, uint64_t *bytes_allocated, uint64_t *number_lanes, uint64_t *number_log_entries, uint64_t *number_unpublished_reserves) override; private: uint32_t m_discard_granularity_bytes; ceph::mutex &m_lock; PerfCounters *m_perfcounter = nullptr; template friend std::ostream &operator<<(std::ostream &os, const C_DiscardRequest &req); }; /** * This is the custodian of the BlockGuard cell for this write same. * * A writesame allocates and persists a data buffer like a write, but the * data buffer is usually much shorter than the write same. */ template class C_WriteSameRequest : public C_WriteRequest { public: using C_BlockIORequest::pwl; C_WriteSameRequest(T &pwl, const utime_t arrived, io::Extents &&image_extents, bufferlist&& bl, const int fadvise_flags, ceph::mutex &lock, PerfCounters *perfcounter, Context *user_req); ~C_WriteSameRequest() override; void update_req_stats(utime_t &now) override; std::shared_ptr create_operation(uint64_t offset, uint64_t len) override; const char *get_name() const override { return "C_WriteSameRequest"; } template friend std::ostream &operator<<(std::ostream &os, const C_WriteSameRequest &req); }; struct BlockGuardReqState { bool barrier = false; /* This is a barrier request */ bool current_barrier = false; /* This is the currently active barrier */ bool detained = false; bool queued = false; /* Queued for barrier */ friend std::ostream &operator<<(std::ostream &os, const BlockGuardReqState &r) { os << "barrier=" << r.barrier << ", current_barrier=" << r.current_barrier << ", detained=" << r.detained << ", queued=" << r.queued; return os; } }; class GuardedRequestFunctionContext : public Context { public: BlockGuardCell *cell = nullptr; BlockGuardReqState state; GuardedRequestFunctionContext(boost::function &&callback) : m_callback(std::move(callback)){ } ~GuardedRequestFunctionContext(void) override { }; GuardedRequestFunctionContext(const GuardedRequestFunctionContext&) = delete; GuardedRequestFunctionContext &operator=(const GuardedRequestFunctionContext&) = delete; private: boost::function m_callback; void finish(int r) override { ceph_assert(cell); m_callback(*this); } }; class GuardedRequest { public: const BlockExtent block_extent; GuardedRequestFunctionContext *guard_ctx; /* Work to do when guard on range obtained */ GuardedRequest(const BlockExtent block_extent, GuardedRequestFunctionContext *on_guard_acquire, bool barrier = false) : block_extent(block_extent), guard_ctx(on_guard_acquire) { guard_ctx->state.barrier = barrier; } friend std::ostream &operator<<(std::ostream &os, const GuardedRequest &r) { os << "guard_ctx->state=[" << r.guard_ctx->state << "], block_extent.block_start=" << r.block_extent.block_start << ", block_extent.block_end=" << r.block_extent.block_end; return os; } }; } // namespace pwl } // namespace cache } // namespace librbd #endif // CEPH_LIBRBD_CACHE_PWL_REQUEST_H