diff options
Diffstat (limited to '')
-rw-r--r-- | src/tools/rbd_wnbd/wnbd_handler.cc | 430 |
1 files changed, 430 insertions, 0 deletions
diff --git a/src/tools/rbd_wnbd/wnbd_handler.cc b/src/tools/rbd_wnbd/wnbd_handler.cc new file mode 100644 index 000000000..ecfa47240 --- /dev/null +++ b/src/tools/rbd_wnbd/wnbd_handler.cc @@ -0,0 +1,430 @@ +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2020 SUSE LINUX GmbH + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_rbd + +#include "wnbd_handler.h" + +#define _NTSCSI_USER_MODE_ +#include <rpc.h> +#include <ddk/scsi.h> + +#include <boost/thread/tss.hpp> + +#include "common/debug.h" +#include "common/errno.h" +#include "common/safe_io.h" +#include "common/SubProcess.h" +#include "common/Formatter.h" + +#include "global/global_context.h" + +WnbdHandler::~WnbdHandler() +{ + if (started && wnbd_disk) { + dout(10) << __func__ << ": terminating" << dendl; + + shutdown(); + reply_tpool->join(); + + WnbdClose(wnbd_disk); + + started = false; + + delete reply_tpool; + delete admin_hook; + } +} + +int WnbdHandler::wait() +{ + int err = 0; + if (started && wnbd_disk) { + dout(10) << __func__ << ": waiting" << dendl; + + err = WnbdWaitDispatcher(wnbd_disk); + if (err) { + derr << __func__ << " failed waiting for dispatcher to stop: " + << err << dendl; + } + } + + return err; +} + +int WnbdAdminHook::call (std::string_view command, const cmdmap_t& cmdmap, + Formatter *f, + std::ostream& errss, + bufferlist& out) { + if (command == "wnbd stats") { + return m_handler->dump_stats(f); + } + return -ENOSYS; + } + +int WnbdHandler::dump_stats(Formatter *f) +{ + if (!f) { + return -EINVAL; + } + + WNBD_USR_STATS stats = { 0 }; + DWORD err = WnbdGetUserspaceStats(wnbd_disk, &stats); + if (err) { + derr << "Failed to retrieve WNBD userspace stats. Error: " << err << dendl; + return -EINVAL; + } + + f->open_object_section("stats"); + f->dump_int("TotalReceivedRequests", stats.TotalReceivedRequests); + f->dump_int("TotalSubmittedRequests", stats.TotalSubmittedRequests); + f->dump_int("TotalReceivedReplies", stats.TotalReceivedReplies); + f->dump_int("UnsubmittedRequests", stats.UnsubmittedRequests); + f->dump_int("PendingSubmittedRequests", stats.PendingSubmittedRequests); + f->dump_int("PendingReplies", stats.PendingReplies); + f->dump_int("ReadErrors", stats.ReadErrors); + f->dump_int("WriteErrors", stats.WriteErrors); + f->dump_int("FlushErrors", stats.FlushErrors); + f->dump_int("UnmapErrors", stats.UnmapErrors); + f->dump_int("InvalidRequests", stats.InvalidRequests); + f->dump_int("TotalRWRequests", stats.TotalRWRequests); + f->dump_int("TotalReadBlocks", stats.TotalReadBlocks); + f->dump_int("TotalWrittenBlocks", stats.TotalWrittenBlocks); + + f->close_section(); + return 0; +} + +void WnbdHandler::shutdown() +{ + std::unique_lock l{shutdown_lock}; + if (!terminated && wnbd_disk) { + // We're requesting the disk to be removed but continue serving IO + // requests until the driver sends us the "Disconnect" event. + // TODO: expose PWNBD_REMOVE_OPTIONS, we're using the defaults ATM. + WnbdRemove(wnbd_disk, NULL); + wait(); + terminated = true; + } +} + +void WnbdHandler::aio_callback(librbd::completion_t cb, void *arg) +{ + librbd::RBD::AioCompletion *aio_completion = + reinterpret_cast<librbd::RBD::AioCompletion*>(cb); + + WnbdHandler::IOContext* ctx = static_cast<WnbdHandler::IOContext*>(arg); + int ret = aio_completion->get_return_value(); + + dout(20) << __func__ << ": " << *ctx << dendl; + + if (ret == -EINVAL) { + // if shrinking an image, a pagecache writeback might reference + // extents outside of the range of the new image extents + dout(0) << __func__ << ": masking IO out-of-bounds error" << *ctx << dendl; + ctx->data.clear(); + ret = 0; + } + + if (ret < 0) { + ctx->err_code = -ret; + // TODO: check the actual error. + ctx->set_sense(SCSI_SENSE_MEDIUM_ERROR, + SCSI_ADSENSE_UNRECOVERED_ERROR); + } else if ((ctx->req_type == WnbdReqTypeRead) && + ret < static_cast<int>(ctx->req_size)) { + int pad_byte_count = static_cast<int> (ctx->req_size) - ret; + ctx->data.append_zero(pad_byte_count); + dout(20) << __func__ << ": " << *ctx << ": Pad byte count: " + << pad_byte_count << dendl; + ctx->err_code = 0; + } else { + ctx->err_code = 0; + } + + boost::asio::post( + *ctx->handler->reply_tpool, + [&, ctx]() + { + ctx->handler->send_io_response(ctx); + }); + + aio_completion->release(); +} + +void WnbdHandler::send_io_response(WnbdHandler::IOContext *ctx) { + std::unique_ptr<WnbdHandler::IOContext> pctx{ctx}; + ceph_assert(WNBD_DEFAULT_MAX_TRANSFER_LENGTH >= pctx->data.length()); + + WNBD_IO_RESPONSE wnbd_rsp = {0}; + wnbd_rsp.RequestHandle = pctx->req_handle; + wnbd_rsp.RequestType = pctx->req_type; + wnbd_rsp.Status = pctx->wnbd_status; + int err = 0; + + // Use TLS to store an overlapped structure so that we avoid + // recreating one each time we send a reply. + static boost::thread_specific_ptr<OVERLAPPED> overlapped_tls( + // Cleanup routine + [](LPOVERLAPPED p_overlapped) + { + if (p_overlapped->hEvent) { + CloseHandle(p_overlapped->hEvent); + } + delete p_overlapped; + }); + + LPOVERLAPPED overlapped = overlapped_tls.get(); + if (!overlapped) + { + overlapped = new OVERLAPPED{0}; + HANDLE overlapped_evt = CreateEventA(0, TRUE, TRUE, NULL); + if (!overlapped_evt) { + err = GetLastError(); + derr << "Could not create event. Error: " << err << dendl; + return; + } + + overlapped->hEvent = overlapped_evt; + overlapped_tls.reset(overlapped); + } + + if (!ResetEvent(overlapped->hEvent)) { + err = GetLastError(); + derr << "Could not reset event. Error: " << err << dendl; + return; + } + + err = WnbdSendResponseEx( + pctx->handler->wnbd_disk, + &wnbd_rsp, + pctx->data.c_str(), + pctx->data.length(), + overlapped); + if (err == ERROR_IO_PENDING) { + DWORD returned_bytes = 0; + err = 0; + // We've got ERROR_IO_PENDING, which means that the operation is in + // progress. We'll use GetOverlappedResult to wait for it to complete + // and then retrieve the result. + if (!GetOverlappedResult(pctx->handler->wnbd_disk, overlapped, + &returned_bytes, TRUE)) { + err = GetLastError(); + derr << "Could not send response. Request id: " << wnbd_rsp.RequestHandle + << ". Error: " << err << dendl; + } + } +} + +void WnbdHandler::IOContext::set_sense(uint8_t sense_key, uint8_t asc, uint64_t info) +{ + WnbdSetSenseEx(&wnbd_status, sense_key, asc, info); +} + +void WnbdHandler::IOContext::set_sense(uint8_t sense_key, uint8_t asc) +{ + WnbdSetSense(&wnbd_status, sense_key, asc); +} + +void WnbdHandler::Read( + PWNBD_DISK Disk, + UINT64 RequestHandle, + PVOID Buffer, + UINT64 BlockAddress, + UINT32 BlockCount, + BOOLEAN ForceUnitAccess) +{ + WnbdHandler* handler = nullptr; + ceph_assert(!WnbdGetUserContext(Disk, (PVOID*)&handler)); + + WnbdHandler::IOContext* ctx = new WnbdHandler::IOContext(); + ctx->handler = handler; + ctx->req_handle = RequestHandle; + ctx->req_type = WnbdReqTypeRead; + ctx->req_size = BlockCount * handler->block_size; + ctx->req_from = BlockAddress * handler->block_size; + ceph_assert(ctx->req_size <= WNBD_DEFAULT_MAX_TRANSFER_LENGTH); + + int op_flags = 0; + if (ForceUnitAccess) { + op_flags |= LIBRADOS_OP_FLAG_FADVISE_FUA; + } + + dout(20) << *ctx << ": start" << dendl; + + librbd::RBD::AioCompletion *c = new librbd::RBD::AioCompletion(ctx, aio_callback); + handler->image.aio_read2(ctx->req_from, ctx->req_size, ctx->data, c, op_flags); + + dout(20) << *ctx << ": submitted" << dendl; +} + +void WnbdHandler::Write( + PWNBD_DISK Disk, + UINT64 RequestHandle, + PVOID Buffer, + UINT64 BlockAddress, + UINT32 BlockCount, + BOOLEAN ForceUnitAccess) +{ + WnbdHandler* handler = nullptr; + ceph_assert(!WnbdGetUserContext(Disk, (PVOID*)&handler)); + + WnbdHandler::IOContext* ctx = new WnbdHandler::IOContext(); + ctx->handler = handler; + ctx->req_handle = RequestHandle; + ctx->req_type = WnbdReqTypeWrite; + ctx->req_size = BlockCount * handler->block_size; + ctx->req_from = BlockAddress * handler->block_size; + + bufferptr ptr((char*)Buffer, ctx->req_size); + ctx->data.push_back(ptr); + + int op_flags = 0; + if (ForceUnitAccess) { + op_flags |= LIBRADOS_OP_FLAG_FADVISE_FUA; + } + + dout(20) << *ctx << ": start" << dendl; + + librbd::RBD::AioCompletion *c = new librbd::RBD::AioCompletion(ctx, aio_callback); + handler->image.aio_write2(ctx->req_from, ctx->req_size, ctx->data, c, op_flags); + + dout(20) << *ctx << ": submitted" << dendl; +} + +void WnbdHandler::Flush( + PWNBD_DISK Disk, + UINT64 RequestHandle, + UINT64 BlockAddress, + UINT32 BlockCount) +{ + WnbdHandler* handler = nullptr; + ceph_assert(!WnbdGetUserContext(Disk, (PVOID*)&handler)); + + WnbdHandler::IOContext* ctx = new WnbdHandler::IOContext(); + ctx->handler = handler; + ctx->req_handle = RequestHandle; + ctx->req_type = WnbdReqTypeFlush; + ctx->req_size = BlockCount * handler->block_size; + ctx->req_from = BlockAddress * handler->block_size; + + dout(20) << *ctx << ": start" << dendl; + + librbd::RBD::AioCompletion *c = new librbd::RBD::AioCompletion(ctx, aio_callback); + handler->image.aio_flush(c); + + dout(20) << *ctx << ": submitted" << dendl; +} + +void WnbdHandler::Unmap( + PWNBD_DISK Disk, + UINT64 RequestHandle, + PWNBD_UNMAP_DESCRIPTOR Descriptors, + UINT32 Count) +{ + WnbdHandler* handler = nullptr; + ceph_assert(!WnbdGetUserContext(Disk, (PVOID*)&handler)); + ceph_assert(1 == Count); + + WnbdHandler::IOContext* ctx = new WnbdHandler::IOContext(); + ctx->handler = handler; + ctx->req_handle = RequestHandle; + ctx->req_type = WnbdReqTypeUnmap; + ctx->req_size = Descriptors[0].BlockCount * handler->block_size; + ctx->req_from = Descriptors[0].BlockAddress * handler->block_size; + + dout(20) << *ctx << ": start" << dendl; + + librbd::RBD::AioCompletion *c = new librbd::RBD::AioCompletion(ctx, aio_callback); + handler->image.aio_discard(ctx->req_from, ctx->req_size, c); + + dout(20) << *ctx << ": submitted" << dendl; +} + +void WnbdHandler::LogMessage( + WnbdLogLevel LogLevel, + const char* Message, + const char* FileName, + UINT32 Line, + const char* FunctionName) +{ + // We're already passing the log level to WNBD, so we'll use the highest + // log level here. + dout(0) << "libwnbd.dll!" << FunctionName << " " + << WnbdLogLevelToStr(LogLevel) << " " << Message << dendl; +} + + +int WnbdHandler::start() +{ + int err = 0; + WNBD_PROPERTIES wnbd_props = {0}; + + instance_name.copy(wnbd_props.InstanceName, sizeof(wnbd_props.InstanceName)); + ceph_assert(strlen(RBD_WNBD_OWNER_NAME) < WNBD_MAX_OWNER_LENGTH); + strncpy(wnbd_props.Owner, RBD_WNBD_OWNER_NAME, WNBD_MAX_OWNER_LENGTH); + + wnbd_props.BlockCount = block_count; + wnbd_props.BlockSize = block_size; + wnbd_props.MaxUnmapDescCount = 1; + + wnbd_props.Flags.ReadOnly = readonly; + wnbd_props.Flags.UnmapSupported = 1; + if (rbd_cache_enabled) { + wnbd_props.Flags.FUASupported = 1; + wnbd_props.Flags.FlushSupported = 1; + } + + err = WnbdCreate(&wnbd_props, &RbdWnbdInterface, this, &wnbd_disk); + if (err) + goto exit; + + started = true; + + err = WnbdStartDispatcher(wnbd_disk, io_req_workers); + if (err) { + derr << "Could not start WNBD dispatcher. Error: " << err << dendl; + } + +exit: + return err; +} + +std::ostream &operator<<(std::ostream &os, const WnbdHandler::IOContext &ctx) { + + os << "[" << std::hex << ctx.req_handle; + + switch (ctx.req_type) + { + case WnbdReqTypeRead: + os << " READ "; + break; + case WnbdReqTypeWrite: + os << " WRITE "; + break; + case WnbdReqTypeFlush: + os << " FLUSH "; + break; + case WnbdReqTypeUnmap: + os << " TRIM "; + break; + default: + os << " UNKNOWN(" << ctx.req_type << ") "; + break; + } + + os << ctx.req_from << "~" << ctx.req_size << " " + << std::dec << ntohl(ctx.err_code) << "]"; + + return os; +} |