summaryrefslogtreecommitdiffstats
path: root/src/tools/rbd_wnbd/rbd_wnbd.cc
diff options
context:
space:
mode:
Diffstat (limited to 'src/tools/rbd_wnbd/rbd_wnbd.cc')
-rw-r--r--src/tools/rbd_wnbd/rbd_wnbd.cc1871
1 files changed, 1871 insertions, 0 deletions
diff --git a/src/tools/rbd_wnbd/rbd_wnbd.cc b/src/tools/rbd_wnbd/rbd_wnbd.cc
new file mode 100644
index 000000000..a9e160456
--- /dev/null
+++ b/src/tools/rbd_wnbd/rbd_wnbd.cc
@@ -0,0 +1,1871 @@
+/*
+ * rbd-wnbd - RBD in userspace
+ *
+ * Copyright (C) 2020 SUSE LINUX GmbH
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+*/
+
+#include <objidl.h>
+// LOCK_WRITE is also defined by objidl.h, we have to avoid
+// a collision.
+#undef LOCK_WRITE
+
+#include "include/int_types.h"
+
+#include <atomic>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <unistd.h>
+
+#include "wnbd_handler.h"
+#include "wnbd_wmi.h"
+#include "rbd_wnbd.h"
+
+#include <fstream>
+#include <memory>
+#include <regex>
+
+#include "common/Formatter.h"
+#include "common/TextTable.h"
+#include "common/ceph_argparse.h"
+#include "common/config.h"
+#include "common/debug.h"
+#include "common/dout.h"
+#include "common/errno.h"
+#include "common/version.h"
+#include "common/win32/service.h"
+#include "common/win32/wstring.h"
+#include "common/admin_socket_client.h"
+
+#include "global/global_init.h"
+
+#include "include/uuid.h"
+#include "include/rados/librados.hpp"
+#include "include/rbd/librbd.hpp"
+
+#include <shellapi.h>
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rbd
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd-wnbd: "
+
+using namespace std;
+
+// Wait 2s before recreating the wmi subscription in case of errors
+#define WMI_SUBSCRIPTION_RETRY_INTERVAL 2
+// SCSI adapter modification events aren't received until the entire polling
+// interval has elapsed (unlike other WMI classes, such as Msvm_ComputerSystem).
+// With longer intervals, it even seems to miss events. For this reason,
+// we're using a relatively short interval but have adapter state monitoring
+// as an optional feature, mainly used for dev / driver certification purposes.
+#define WNBD_ADAPTER_WMI_POLL_INTERVAL 2
+// Wait for wmi events up to two seconds
+#define WMI_EVENT_TIMEOUT 2
+
+bool is_process_running(DWORD pid)
+{
+ HANDLE process = OpenProcess(SYNCHRONIZE, FALSE, pid);
+ DWORD ret = WaitForSingleObject(process, 0);
+ CloseHandle(process);
+ return ret == WAIT_TIMEOUT;
+}
+
+DWORD WNBDActiveDiskIterator::fetch_list(
+ PWNBD_CONNECTION_LIST* conn_list)
+{
+ DWORD curr_buff_sz = 0;
+ DWORD buff_sz = 0;
+ DWORD err = 0;
+ PWNBD_CONNECTION_LIST tmp_list = NULL;
+
+ // We're using a loop because other connections may show up by the time
+ // we retry.
+ do {
+ if (tmp_list)
+ free(tmp_list);
+
+ if (buff_sz) {
+ tmp_list = (PWNBD_CONNECTION_LIST) calloc(1, buff_sz);
+ if (!tmp_list) {
+ derr << "Could not allocate " << buff_sz << " bytes." << dendl;
+ err = ERROR_NOT_ENOUGH_MEMORY;
+ break;
+ }
+ }
+
+ curr_buff_sz = buff_sz;
+ // If the buffer is too small, the return value is 0 and "BufferSize"
+ // will contain the required size. This is counterintuitive, but
+ // Windows drivers can't return a buffer as well as a non-zero status.
+ err = WnbdList(tmp_list, &buff_sz);
+ if (err)
+ break;
+ } while (curr_buff_sz < buff_sz);
+
+ if (err) {
+ if (tmp_list)
+ free(tmp_list);
+ } else {
+ *conn_list = tmp_list;
+ }
+ return err;
+}
+
+WNBDActiveDiskIterator::WNBDActiveDiskIterator()
+{
+ DWORD status = WNBDActiveDiskIterator::fetch_list(&conn_list);
+ switch (status) {
+ case 0:
+ // no error
+ break;
+ case ERROR_OPEN_FAILED:
+ error = -ENOENT;
+ break;
+ default:
+ error = -EINVAL;
+ break;
+ }
+}
+
+WNBDActiveDiskIterator::~WNBDActiveDiskIterator()
+{
+ if (conn_list) {
+ free(conn_list);
+ conn_list = NULL;
+ }
+}
+
+bool WNBDActiveDiskIterator::get(Config *cfg)
+{
+ index += 1;
+ *cfg = Config();
+
+ if (!conn_list || index >= (int)conn_list->Count) {
+ return false;
+ }
+
+ auto conn_info = conn_list->Connections[index];
+ auto conn_props = conn_info.Properties;
+
+ if (strncmp(conn_props.Owner, RBD_WNBD_OWNER_NAME, WNBD_MAX_OWNER_LENGTH)) {
+ dout(10) << "Ignoring disk: " << conn_props.InstanceName
+ << ". Owner: " << conn_props.Owner << dendl;
+ return this->get(cfg);
+ }
+
+ error = load_mapping_config_from_registry(conn_props.InstanceName, cfg);
+ if (error) {
+ derr << "Could not load registry disk info for: "
+ << conn_props.InstanceName << ". Error: " << error << dendl;
+ return false;
+ }
+
+ cfg->disk_number = conn_info.DiskNumber;
+ cfg->serial_number = std::string(conn_props.SerialNumber);
+ cfg->pid = conn_props.Pid;
+ cfg->active = cfg->disk_number > 0 && is_process_running(conn_props.Pid);
+ cfg->wnbd_mapped = true;
+
+ return true;
+}
+
+RegistryDiskIterator::RegistryDiskIterator()
+{
+ reg_key = new RegistryKey(g_ceph_context, HKEY_LOCAL_MACHINE,
+ SERVICE_REG_KEY, false);
+ if (!reg_key->hKey) {
+ if (!reg_key->missingKey)
+ error = -EINVAL;
+ return;
+ }
+
+ if (RegQueryInfoKey(reg_key->hKey, NULL, NULL, NULL, &subkey_count,
+ NULL, NULL, NULL, NULL, NULL, NULL, NULL)) {
+ derr << "Could not query registry key: " << SERVICE_REG_KEY << dendl;
+ error = -EINVAL;
+ return;
+ }
+}
+
+bool RegistryDiskIterator::get(Config *cfg)
+{
+ index += 1;
+ *cfg = Config();
+
+ if (!reg_key->hKey || !subkey_count) {
+ return false;
+ }
+
+ char subkey_name[MAX_PATH] = {0};
+ DWORD subkey_name_sz = MAX_PATH;
+ int err = RegEnumKeyEx(
+ reg_key->hKey, index, subkey_name, &subkey_name_sz,
+ NULL, NULL, NULL, NULL);
+ if (err == ERROR_NO_MORE_ITEMS) {
+ return false;
+ } else if (err) {
+ derr << "Could not enumerate registry. Error: " << err << dendl;
+ error = -EINVAL;
+ return false;
+ }
+
+ if (load_mapping_config_from_registry(subkey_name, cfg)) {
+ error = -EINVAL;
+ return false;
+ };
+
+ return true;
+}
+
+// Iterate over all RBD mappings, getting info from the registry and the driver.
+bool WNBDDiskIterator::get(Config *cfg)
+{
+ *cfg = Config();
+
+ bool found_active = active_iterator.get(cfg);
+ if (found_active) {
+ active_devices.insert(cfg->devpath);
+ return true;
+ }
+
+ error = active_iterator.get_error();
+ if (error) {
+ dout(5) << ": WNBD iterator error: " << error << dendl;
+ return false;
+ }
+
+ while(registry_iterator.get(cfg)) {
+ if (active_devices.find(cfg->devpath) != active_devices.end()) {
+ // Skip active devices that were already yielded.
+ continue;
+ }
+ return true;
+ }
+
+ error = registry_iterator.get_error();
+ if (error) {
+ dout(5) << ": Registry iterator error: " << error << dendl;
+ }
+ return false;
+}
+
+int get_exe_path(std::string& path) {
+ char buffer[MAX_PATH];
+ DWORD err = 0;
+
+ int ret = GetModuleFileNameA(NULL, buffer, MAX_PATH);
+ if (!ret || ret == MAX_PATH) {
+ err = GetLastError();
+ derr << "Could not retrieve executable path. "
+ << "Error: " << win32_strerror(err) << dendl;
+ return -EINVAL;
+ }
+
+ path = buffer;
+ return 0;
+}
+
+std::string get_cli_args() {
+ std::ostringstream cmdline;
+ for (int i=1; i<__argc; i++) {
+ if (i > 1)
+ cmdline << " ";
+ cmdline << std::quoted(__argv[i]);
+ }
+ return cmdline.str();
+}
+
+int send_map_request(std::string arguments) {
+ dout(15) << __func__ << ": command arguments: " << arguments << dendl;
+
+ BYTE request_buff[SERVICE_PIPE_BUFFSZ] = { 0 };
+ ServiceRequest* request = (ServiceRequest*) request_buff;
+ request->command = Connect;
+ arguments.copy(
+ (char*)request->arguments,
+ SERVICE_PIPE_BUFFSZ - FIELD_OFFSET(ServiceRequest, arguments));
+ ServiceReply reply = { 0 };
+
+ DWORD bytes_read = 0;
+ BOOL success = CallNamedPipe(
+ SERVICE_PIPE_NAME,
+ request_buff,
+ SERVICE_PIPE_BUFFSZ,
+ &reply,
+ sizeof(reply),
+ &bytes_read,
+ DEFAULT_MAP_TIMEOUT_MS);
+ if (!success) {
+ DWORD err = GetLastError();
+ derr << "Could not send device map request. "
+ << "Make sure that the ceph service is running. "
+ << "Error: " << win32_strerror(err) << dendl;
+ return -EINVAL;
+ }
+ if (reply.status) {
+ derr << "The ceph service failed to map the image. "
+ << "Check the log file or pass '-f' (foreground mode) "
+ << "for additional information. "
+ << "Error: " << cpp_strerror(reply.status)
+ << dendl;
+ }
+
+ return reply.status;
+}
+
+// Spawn a subprocess using the specified "rbd-wnbd" command
+// arguments. A pipe is passed to the child process,
+// which will allow it to communicate the mapping status
+int map_device_using_suprocess(std::string arguments, int timeout_ms)
+{
+ STARTUPINFO si;
+ PROCESS_INFORMATION pi;
+ char ch;
+ DWORD err = 0, status = 0;
+ int exit_code = 0;
+ std::ostringstream command_line;
+ std::string exe_path;
+ // Windows async IO context
+ OVERLAPPED connect_o, read_o;
+ HANDLE connect_event = NULL, read_event = NULL;
+ // Used for waiting on multiple events that are going to be initialized later.
+ HANDLE wait_events[2] = { INVALID_HANDLE_VALUE, INVALID_HANDLE_VALUE};
+ DWORD bytes_read = 0;
+ // We may get a command line containing an old pipe handle when
+ // recreating mappings, so we'll have to replace it.
+ std::regex pipe_pattern("([\'\"]?--pipe-name[\'\"]? +[\'\"]?[^ ]+[\'\"]?)");
+
+ uuid_d uuid;
+ uuid.generate_random();
+ std::ostringstream pipe_name;
+ pipe_name << "\\\\.\\pipe\\rbd-wnbd-" << uuid;
+
+ // Create an unique named pipe to communicate with the child. */
+ HANDLE pipe_handle = CreateNamedPipe(
+ pipe_name.str().c_str(),
+ PIPE_ACCESS_INBOUND | FILE_FLAG_FIRST_PIPE_INSTANCE |
+ FILE_FLAG_OVERLAPPED,
+ PIPE_WAIT,
+ 1, // Only accept one instance
+ SERVICE_PIPE_BUFFSZ,
+ SERVICE_PIPE_BUFFSZ,
+ SERVICE_PIPE_TIMEOUT_MS,
+ NULL);
+ if (pipe_handle == INVALID_HANDLE_VALUE) {
+ err = GetLastError();
+ derr << "CreateNamedPipe failed: " << win32_strerror(err) << dendl;
+ exit_code = -ECHILD;
+ goto finally;
+ }
+ connect_event = CreateEvent(0, TRUE, FALSE, NULL);
+ read_event = CreateEvent(0, TRUE, FALSE, NULL);
+ if (!connect_event || !read_event) {
+ err = GetLastError();
+ derr << "CreateEvent failed: " << win32_strerror(err) << dendl;
+ exit_code = -ECHILD;
+ goto finally;
+ }
+ connect_o.hEvent = connect_event;
+ read_o.hEvent = read_event;
+
+ status = ConnectNamedPipe(pipe_handle, &connect_o);
+ err = GetLastError();
+ if (status || err != ERROR_IO_PENDING) {
+ if (status)
+ err = status;
+ derr << "ConnectNamedPipe failed: " << win32_strerror(err) << dendl;
+ exit_code = -ECHILD;
+ goto finally;
+ }
+ err = 0;
+
+ dout(5) << __func__ << ": command arguments: " << arguments << dendl;
+
+ // We'll avoid running arbitrary commands, instead using the executable
+ // path of this process (expected to be the full rbd-wnbd.exe path).
+ err = get_exe_path(exe_path);
+ if (err) {
+ exit_code = -EINVAL;
+ goto finally;
+ }
+ command_line << std::quoted(exe_path)
+ << " " << std::regex_replace(arguments, pipe_pattern, "")
+ << " --pipe-name " << pipe_name.str();
+
+ dout(5) << __func__ << ": command line: " << command_line.str() << dendl;
+
+ GetStartupInfo(&si);
+ // Create a detached child
+ if (!CreateProcess(NULL, (char*)command_line.str().c_str(),
+ NULL, NULL, FALSE, DETACHED_PROCESS,
+ NULL, NULL, &si, &pi)) {
+ err = GetLastError();
+ derr << "CreateProcess failed: " << win32_strerror(err) << dendl;
+ exit_code = -ECHILD;
+ goto finally;
+ }
+
+ wait_events[0] = connect_event;
+ wait_events[1] = pi.hProcess;
+ status = WaitForMultipleObjects(2, wait_events, FALSE, timeout_ms);
+ switch(status) {
+ case WAIT_OBJECT_0:
+ if (!GetOverlappedResult(pipe_handle, &connect_o, &bytes_read, TRUE)) {
+ err = GetLastError();
+ derr << "Couln't establish a connection with the child process. "
+ << "Error: " << win32_strerror(err) << dendl;
+ exit_code = -ECHILD;
+ goto clean_process;
+ }
+ // We have an incoming connection.
+ break;
+ case WAIT_OBJECT_0 + 1:
+ // The process has exited prematurely.
+ goto clean_process;
+ case WAIT_TIMEOUT:
+ derr << "Timed out waiting for child process connection." << dendl;
+ goto clean_process;
+ default:
+ derr << "Failed waiting for child process. Status: " << status << dendl;
+ goto clean_process;
+ }
+ // Block and wait for child to say it is ready.
+ dout(5) << __func__ << ": waiting for child notification." << dendl;
+ if (!ReadFile(pipe_handle, &ch, 1, NULL, &read_o)) {
+ err = GetLastError();
+ if (err != ERROR_IO_PENDING) {
+ derr << "Receiving child process reply failed with: "
+ << win32_strerror(err) << dendl;
+ exit_code = -ECHILD;
+ goto clean_process;
+ }
+ }
+ wait_events[0] = read_event;
+ wait_events[1] = pi.hProcess;
+ // The RBD daemon is expected to write back right after opening the
+ // pipe. We'll use the same timeout value for now.
+ status = WaitForMultipleObjects(2, wait_events, FALSE, timeout_ms);
+ switch(status) {
+ case WAIT_OBJECT_0:
+ if (!GetOverlappedResult(pipe_handle, &read_o, &bytes_read, TRUE)) {
+ err = GetLastError();
+ derr << "Receiving child process reply failed with: "
+ << win32_strerror(err) << dendl;
+ exit_code = -ECHILD;
+ goto clean_process;
+ }
+ break;
+ case WAIT_OBJECT_0 + 1:
+ // The process has exited prematurely.
+ goto clean_process;
+ case WAIT_TIMEOUT:
+ derr << "Timed out waiting for child process message." << dendl;
+ goto clean_process;
+ default:
+ derr << "Failed waiting for child process. Status: " << status << dendl;
+ goto clean_process;
+ }
+
+ dout(5) << __func__ << ": received child notification." << dendl;
+ goto finally;
+
+ clean_process:
+ if (!is_process_running(pi.dwProcessId)) {
+ GetExitCodeProcess(pi.hProcess, (PDWORD)&exit_code);
+ derr << "Daemon failed with: " << cpp_strerror(exit_code) << dendl;
+ } else {
+ // The process closed the pipe without notifying us or exiting.
+ // This is quite unlikely, but we'll terminate the process.
+ dout(0) << "Terminating unresponsive process." << dendl;
+ TerminateProcess(pi.hProcess, 1);
+ exit_code = -EINVAL;
+ }
+
+ finally:
+ if (exit_code)
+ derr << "Could not start RBD daemon." << dendl;
+ if (pipe_handle)
+ CloseHandle(pipe_handle);
+ if (connect_event)
+ CloseHandle(connect_event);
+ if (read_event)
+ CloseHandle(read_event);
+ return exit_code;
+}
+
+BOOL WINAPI console_handler_routine(DWORD dwCtrlType)
+{
+ dout(0) << "Received control signal: " << dwCtrlType
+ << ". Exiting." << dendl;
+
+ std::unique_lock l{shutdown_lock};
+ if (handler)
+ handler->shutdown();
+
+ return true;
+}
+
+int save_config_to_registry(Config* cfg)
+{
+ std::string strKey{ SERVICE_REG_KEY };
+ strKey.append("\\");
+ strKey.append(cfg->devpath);
+ auto reg_key = RegistryKey(
+ g_ceph_context, HKEY_LOCAL_MACHINE, strKey.c_str(), true);
+ if (!reg_key.hKey) {
+ return -EINVAL;
+ }
+
+ int ret_val = 0;
+ // Registry writes are immediately available to other processes.
+ // Still, we'll do a flush to ensure that the mapping can be
+ // recreated after a system crash.
+ if (reg_key.set("pid", getpid()) ||
+ reg_key.set("devpath", cfg->devpath) ||
+ reg_key.set("poolname", cfg->poolname) ||
+ reg_key.set("nsname", cfg->nsname) ||
+ reg_key.set("imgname", cfg->imgname) ||
+ reg_key.set("snapname", cfg->snapname) ||
+ reg_key.set("command_line", get_cli_args()) ||
+ reg_key.set("persistent", cfg->persistent) ||
+ reg_key.set("admin_sock_path", g_conf()->admin_socket) ||
+ reg_key.flush()) {
+ ret_val = -EINVAL;
+ }
+
+ return ret_val;
+}
+
+int remove_config_from_registry(Config* cfg)
+{
+ std::string strKey{ SERVICE_REG_KEY };
+ strKey.append("\\");
+ strKey.append(cfg->devpath);
+ return RegistryKey::remove(
+ g_ceph_context, HKEY_LOCAL_MACHINE, strKey.c_str());
+}
+
+int load_mapping_config_from_registry(string devpath, Config* cfg)
+{
+ std::string strKey{ SERVICE_REG_KEY };
+ strKey.append("\\");
+ strKey.append(devpath);
+ auto reg_key = RegistryKey(
+ g_ceph_context, HKEY_LOCAL_MACHINE, strKey.c_str(), false);
+ if (!reg_key.hKey) {
+ if (reg_key.missingKey)
+ return -ENOENT;
+ else
+ return -EINVAL;
+ }
+
+ reg_key.get("devpath", cfg->devpath);
+ reg_key.get("poolname", cfg->poolname);
+ reg_key.get("nsname", cfg->nsname);
+ reg_key.get("imgname", cfg->imgname);
+ reg_key.get("snapname", cfg->snapname);
+ reg_key.get("command_line", cfg->command_line);
+ reg_key.get("persistent", cfg->persistent);
+ reg_key.get("admin_sock_path", cfg->admin_sock_path);
+
+ return 0;
+}
+
+int restart_registered_mappings(
+ int worker_count,
+ int total_timeout,
+ int image_map_timeout)
+{
+ Config cfg;
+ WNBDDiskIterator iterator;
+ int r;
+ std::atomic<int> err = 0;
+
+ dout(0) << "remounting persistent disks" << dendl;
+
+ int total_timeout_ms = max(total_timeout, total_timeout * 1000);
+ int image_map_timeout_ms = max(image_map_timeout, image_map_timeout * 1000);
+
+ LARGE_INTEGER start_t, counter_freq;
+ QueryPerformanceFrequency(&counter_freq);
+ QueryPerformanceCounter(&start_t);
+
+ boost::asio::thread_pool pool(worker_count);
+ while (iterator.get(&cfg)) {
+ if (cfg.command_line.empty()) {
+ derr << "Could not recreate mapping, missing command line: "
+ << cfg.devpath << dendl;
+ err = -EINVAL;
+ continue;
+ }
+ if (cfg.wnbd_mapped) {
+ dout(1) << __func__ << ": device already mapped: "
+ << cfg.devpath << dendl;
+ continue;
+ }
+ if (!cfg.persistent) {
+ dout(1) << __func__ << ": cleaning up non-persistent mapping: "
+ << cfg.devpath << dendl;
+ r = remove_config_from_registry(&cfg);
+ if (r) {
+ derr << __func__ << ": could not clean up non-persistent mapping: "
+ << cfg.devpath << dendl;
+ }
+ continue;
+ }
+
+ boost::asio::post(pool,
+ [cfg, start_t, counter_freq, total_timeout_ms,
+ image_map_timeout_ms, &err]()
+ {
+ LARGE_INTEGER curr_t, elapsed_ms;
+ QueryPerformanceCounter(&curr_t);
+ elapsed_ms.QuadPart = curr_t.QuadPart - start_t.QuadPart;
+ elapsed_ms.QuadPart *= 1000;
+ elapsed_ms.QuadPart /= counter_freq.QuadPart;
+
+ int time_left_ms = max(
+ 0,
+ total_timeout_ms - (int)elapsed_ms.QuadPart);
+ time_left_ms = min(image_map_timeout_ms, time_left_ms);
+ if (!time_left_ms) {
+ err = -ETIMEDOUT;
+ return;
+ }
+
+ dout(1) << "Remapping: " << cfg.devpath
+ << ". Timeout: " << time_left_ms << " ms." << dendl;
+
+ // We'll try to map all devices and return a non-zero value
+ // if any of them fails.
+ int r = map_device_using_suprocess(cfg.command_line, time_left_ms);
+ if (r) {
+ err = r;
+ derr << "Could not create mapping: "
+ << cfg.devpath << ". Error: " << r << dendl;
+ } else {
+ dout(1) << "Successfully remapped: " << cfg.devpath << dendl;
+ }
+ });
+ }
+ pool.join();
+
+ r = iterator.get_error();
+ if (r) {
+ derr << "Could not fetch all mappings. Error: " << r << dendl;
+ err = r;
+ }
+
+ return err;
+}
+
+int disconnect_all_mappings(
+ bool unregister,
+ bool hard_disconnect,
+ int soft_disconnect_timeout,
+ int worker_count)
+{
+ // Although not generally recommended, soft_disconnect_timeout can be 0,
+ // which means infinite timeout.
+ ceph_assert(soft_disconnect_timeout >= 0);
+ ceph_assert(worker_count > 0);
+ int64_t timeout_ms = soft_disconnect_timeout * 1000;
+
+ Config cfg;
+ WNBDActiveDiskIterator iterator;
+ int r;
+ std::atomic<int> err = 0;
+
+ boost::asio::thread_pool pool(worker_count);
+ LARGE_INTEGER start_t, counter_freq;
+ QueryPerformanceFrequency(&counter_freq);
+ QueryPerformanceCounter(&start_t);
+ while (iterator.get(&cfg)) {
+ boost::asio::post(pool,
+ [cfg, start_t, counter_freq, timeout_ms,
+ hard_disconnect, unregister, &err]() mutable
+ {
+ LARGE_INTEGER curr_t, elapsed_ms;
+ QueryPerformanceCounter(&curr_t);
+ elapsed_ms.QuadPart = curr_t.QuadPart - start_t.QuadPart;
+ elapsed_ms.QuadPart *= 1000;
+ elapsed_ms.QuadPart /= counter_freq.QuadPart;
+
+ int64_t time_left_ms = max((int64_t)0, timeout_ms - elapsed_ms.QuadPart);
+
+ cfg.hard_disconnect = hard_disconnect || !time_left_ms;
+ cfg.hard_disconnect_fallback = true;
+ cfg.soft_disconnect_timeout = time_left_ms / 1000;
+
+ dout(1) << "Removing mapping: " << cfg.devpath
+ << ". Timeout: " << cfg.soft_disconnect_timeout
+ << "s. Hard disconnect: " << cfg.hard_disconnect
+ << dendl;
+
+ int r = do_unmap(&cfg, unregister);
+ if (r) {
+ err = r;
+ derr << "Could not remove mapping: " << cfg.devpath
+ << ". Error: " << r << dendl;
+ } else {
+ dout(1) << "Successfully removed mapping: " << cfg.devpath << dendl;
+ }
+ });
+ }
+ pool.join();
+
+ r = iterator.get_error();
+ if (r == -ENOENT) {
+ dout(0) << __func__ << ": wnbd adapter unavailable, "
+ << "assuming that no wnbd mappings exist." << dendl;
+ err = 0;
+ } else if (r) {
+ derr << "Could not fetch all mappings. Error: " << r << dendl;
+ err = r;
+ }
+
+ return err;
+}
+
+class RBDService : public ServiceBase {
+ private:
+ bool hard_disconnect;
+ int soft_disconnect_timeout;
+ int thread_count;
+ int service_start_timeout;
+ int image_map_timeout;
+ bool remap_failure_fatal;
+ bool adapter_monitoring_enabled;
+
+ std::thread adapter_monitor_thread;
+
+ ceph::mutex start_lock = ceph::make_mutex("RBDService::StartLocker");
+ ceph::mutex shutdown_lock = ceph::make_mutex("RBDService::ShutdownLocker");
+ bool started = false;
+ std::atomic<bool> stop_requsted = false;
+
+ public:
+ RBDService(bool _hard_disconnect,
+ int _soft_disconnect_timeout,
+ int _thread_count,
+ int _service_start_timeout,
+ int _image_map_timeout,
+ bool _remap_failure_fatal,
+ bool _adapter_monitoring_enabled)
+ : ServiceBase(g_ceph_context)
+ , hard_disconnect(_hard_disconnect)
+ , soft_disconnect_timeout(_soft_disconnect_timeout)
+ , thread_count(_thread_count)
+ , service_start_timeout(_service_start_timeout)
+ , image_map_timeout(_image_map_timeout)
+ , remap_failure_fatal(_remap_failure_fatal)
+ , adapter_monitoring_enabled(_adapter_monitoring_enabled)
+ {
+ }
+
+ static int execute_command(ServiceRequest* request)
+ {
+ switch(request->command) {
+ case Connect:
+ dout(1) << "Received device connect request. Command line: "
+ << (char*)request->arguments << dendl;
+ // TODO: use the configured service map timeout.
+ // TODO: add ceph.conf options.
+ return map_device_using_suprocess(
+ (char*)request->arguments, DEFAULT_MAP_TIMEOUT_MS);
+ default:
+ dout(1) << "Received unsupported command: "
+ << request->command << dendl;
+ return -ENOSYS;
+ }
+ }
+
+ static DWORD handle_connection(HANDLE pipe_handle)
+ {
+ PBYTE message[SERVICE_PIPE_BUFFSZ] = { 0 };
+ DWORD bytes_read = 0, bytes_written = 0;
+ DWORD err = 0;
+ DWORD reply_sz = 0;
+ ServiceReply reply = { 0 };
+
+ dout(20) << __func__ << ": Receiving message." << dendl;
+ BOOL success = ReadFile(
+ pipe_handle, message, SERVICE_PIPE_BUFFSZ,
+ &bytes_read, NULL);
+ if (!success || !bytes_read) {
+ err = GetLastError();
+ derr << "Could not read service command: "
+ << win32_strerror(err) << dendl;
+ goto exit;
+ }
+
+ dout(20) << __func__ << ": Executing command." << dendl;
+ reply.status = execute_command((ServiceRequest*) message);
+ reply_sz = sizeof(reply);
+
+ dout(20) << __func__ << ": Sending reply. Status: "
+ << reply.status << dendl;
+ success = WriteFile(
+ pipe_handle, &reply, reply_sz, &bytes_written, NULL);
+ if (!success || reply_sz != bytes_written) {
+ err = GetLastError();
+ derr << "Could not send service command result: "
+ << win32_strerror(err) << dendl;
+ }
+
+exit:
+ dout(20) << __func__ << ": Cleaning up connection." << dendl;
+ FlushFileBuffers(pipe_handle);
+ DisconnectNamedPipe(pipe_handle);
+ CloseHandle(pipe_handle);
+
+ return err;
+ }
+
+ // We have to support Windows server 2016. Unix sockets only work on
+ // WS 2019, so we can't use the Ceph admin socket abstraction.
+ // Getting the Ceph admin sockets to work with Windows named pipes
+ // would require quite a few changes.
+ static DWORD accept_pipe_connection() {
+ DWORD err = 0;
+ // We're currently using default ACLs, which grant full control to the
+ // LocalSystem account and administrator as well as the owner.
+ dout(20) << __func__ << ": opening new pipe instance" << dendl;
+ HANDLE pipe_handle = CreateNamedPipe(
+ SERVICE_PIPE_NAME,
+ PIPE_ACCESS_DUPLEX,
+ PIPE_TYPE_MESSAGE | PIPE_READMODE_MESSAGE | PIPE_WAIT,
+ PIPE_UNLIMITED_INSTANCES,
+ SERVICE_PIPE_BUFFSZ,
+ SERVICE_PIPE_BUFFSZ,
+ SERVICE_PIPE_TIMEOUT_MS,
+ NULL);
+ if (pipe_handle == INVALID_HANDLE_VALUE) {
+ err = GetLastError();
+ derr << "CreatePipe failed: " << win32_strerror(err) << dendl;
+ return -EINVAL;
+ }
+
+ dout(20) << __func__ << ": waiting for connections." << dendl;
+ BOOL connected = ConnectNamedPipe(pipe_handle, NULL);
+ if (!connected) {
+ err = GetLastError();
+ if (err != ERROR_PIPE_CONNECTED) {
+ derr << "Pipe connection failed: " << win32_strerror(err) << dendl;
+
+ CloseHandle(pipe_handle);
+ return err;
+ }
+ }
+
+ dout(20) << __func__ << ": Connection received." << dendl;
+ // We'll handle the connection in a separate thread and at the same time
+ // accept a new connection.
+ HANDLE handler_thread = CreateThread(
+ NULL, 0, (LPTHREAD_START_ROUTINE) handle_connection, pipe_handle, 0, 0);
+ if (!handler_thread) {
+ err = GetLastError();
+ derr << "Could not start pipe connection handler thread: "
+ << win32_strerror(err) << dendl;
+ CloseHandle(pipe_handle);
+ } else {
+ CloseHandle(handler_thread);
+ }
+
+ return err;
+ }
+
+ static int pipe_server_loop(LPVOID arg)
+ {
+ dout(5) << "Accepting admin pipe connections." << dendl;
+ while (1) {
+ // This call will block until a connection is received, which will
+ // then be handled in a separate thread. The function returns, allowing
+ // us to accept another simultaneous connection.
+ accept_pipe_connection();
+ }
+ return 0;
+ }
+
+ int create_pipe_server() {
+ HANDLE handler_thread = CreateThread(
+ NULL, 0, (LPTHREAD_START_ROUTINE) pipe_server_loop, NULL, 0, 0);
+ DWORD err = 0;
+
+ if (!handler_thread) {
+ err = GetLastError();
+ derr << "Could not start pipe server: " << win32_strerror(err) << dendl;
+ } else {
+ CloseHandle(handler_thread);
+ }
+
+ return err;
+ }
+
+ void monitor_wnbd_adapter()
+ {
+ dout(5) << __func__ << ": initializing COM" << dendl;
+ // Initialize the Windows COM library for this thread.
+ COMBootstrapper com_bootstrapper;
+ HRESULT hres = com_bootstrapper.initialize();
+ if (FAILED(hres)) {
+ return;
+ }
+
+ WmiSubscription subscription = subscribe_wnbd_adapter_events(
+ WNBD_ADAPTER_WMI_POLL_INTERVAL);
+ dout(5) << __func__ << ": initializing wmi subscription" << dendl;
+ hres = subscription.initialize();
+
+ dout(0) << "monitoring wnbd adapter state changes" << dendl;
+ // The event watcher will wait at most WMI_EVENT_TIMEOUT (2s)
+ // and exit the loop if the service is being stopped.
+ while (!stop_requsted) {
+ IWbemClassObject* object;
+ ULONG returned = 0;
+
+ if (FAILED(hres)) {
+ derr << "couldn't retrieve wnbd adapter events, wmi hresult: "
+ << hres << ". Reestablishing wmi listener in "
+ << WMI_SUBSCRIPTION_RETRY_INTERVAL << " seconds." << dendl;
+ subscription.close();
+ Sleep(WMI_SUBSCRIPTION_RETRY_INTERVAL * 1000);
+
+ dout(20) << "recreating wnbd adapter wmi subscription" << dendl;
+ subscription = subscribe_wnbd_adapter_events(
+ WNBD_ADAPTER_WMI_POLL_INTERVAL);
+ hres = subscription.initialize();
+ continue;
+ }
+
+ dout(20) << "fetching wnbd adapter events" << dendl;
+ hres = subscription.next(
+ WMI_EVENT_TIMEOUT * 1000,
+ 1, // we'll process one event at a time
+ &object,
+ &returned);
+
+ if (!FAILED(hres) && returned) {
+ if (WBEM_S_NO_ERROR == object->InheritsFrom(L"__InstanceCreationEvent")) {
+ dout(0) << "wnbd adapter (re)created, remounting disks" << dendl;
+ restart_registered_mappings(
+ thread_count, service_start_timeout, image_map_timeout);
+ } else if (WBEM_S_NO_ERROR == object->InheritsFrom(L"__InstanceDeletionEvent")) {
+ dout(0) << "wnbd adapter removed" << dendl;
+ // nothing to do here
+ } else if (WBEM_S_NO_ERROR == object->InheritsFrom(L"__InstanceModificationEvent")) {
+ dout(0) << "wnbd adapter changed" << dendl;
+ // TODO: look for state changes and log the availability/status
+ }
+
+ object->Release();
+ }
+ }
+
+ dout(10) << "service stop requested, wnbd event monitor exited" << dendl;
+ }
+
+ int run_hook() override {
+ std::unique_lock l{start_lock};
+ if (started) {
+ // The run hook is only supposed to be called once per process,
+ // however we're staying cautious.
+ derr << "Service already running." << dendl;
+ return -EALREADY;
+ }
+
+ started = true;
+ // Restart registered mappings before accepting new ones.
+ int r = restart_registered_mappings(
+ thread_count, service_start_timeout, image_map_timeout);
+ if (r) {
+ if (remap_failure_fatal) {
+ derr << "Couldn't remap all images. Cleaning up." << dendl;
+ return r;
+ } else {
+ dout(0) << "Ignoring image remap failure." << dendl;
+ }
+ }
+
+ if (adapter_monitoring_enabled) {
+ adapter_monitor_thread = std::thread(&monitor_wnbd_adapter, this);
+ } else {
+ dout(0) << "WNBD adapter monitoring disabled." << dendl;
+ }
+
+ return create_pipe_server();
+ }
+
+ // Invoked when the service is requested to stop.
+ int stop_hook() override {
+ std::unique_lock l{shutdown_lock};
+
+ stop_requsted = true;
+
+ int r = disconnect_all_mappings(
+ false, hard_disconnect, soft_disconnect_timeout, thread_count);
+
+ if (adapter_monitor_thread.joinable()) {
+ dout(10) << "waiting for wnbd event monitor thread" << dendl;
+ adapter_monitor_thread.join();
+ dout(10) << "wnbd event monitor stopped" << dendl;
+ }
+
+ return r;
+ }
+
+ // Invoked when the system is shutting down.
+ int shutdown_hook() override {
+ return stop_hook();
+ }
+};
+
+class WNBDWatchCtx : public librbd::UpdateWatchCtx
+{
+private:
+ librados::IoCtx &io_ctx;
+ WnbdHandler* handler;
+ librbd::Image &image;
+ uint64_t size;
+public:
+ WNBDWatchCtx(librados::IoCtx& io_ctx, WnbdHandler* handler,
+ librbd::Image& image, uint64_t size)
+ : io_ctx(io_ctx)
+ , handler(handler)
+ , image(image)
+ , size(size)
+ { }
+
+ ~WNBDWatchCtx() override {}
+
+ void handle_notify() override
+ {
+ uint64_t new_size;
+
+ if (image.size(&new_size) == 0 && new_size != size &&
+ handler->resize(new_size) == 0) {
+ size = new_size;
+ }
+ }
+};
+
+static void usage()
+{
+ const char* usage_str =R"(
+Usage: rbd-wnbd [options] map <image-or-snap-spec> Map an image to wnbd device
+ [options] unmap <device|image-or-snap-spec> Unmap wnbd device
+ [options] list List mapped wnbd devices
+ [options] show <image-or-snap-spec> Show mapped wnbd device
+ stats <image-or-snap-spec> Show IO counters
+ [options] service Windows service entrypoint,
+ handling device lifecycle
+
+Map options:
+ --device <device path> Optional mapping unique identifier
+ --exclusive Forbid writes by other clients
+ --read-only Map read-only
+ --non-persistent Do not recreate the mapping when the Ceph service
+ restarts. By default, mappings are persistent
+ --io-req-workers The number of workers that dispatch IO requests.
+ Default: 4
+ --io-reply-workers The number of workers that dispatch IO replies.
+ Default: 4
+
+Unmap options:
+ --hard-disconnect Skip attempting a soft disconnect
+ --no-hard-disconnect-fallback Immediately return an error if the soft
+ disconnect fails instead of attempting a hard
+ disconnect as fallback
+ --soft-disconnect-timeout Soft disconnect timeout in seconds. The soft
+ disconnect operation uses PnP to notify the
+ Windows storage stack that the device is going to
+ be disconnectd. Storage drivers can block this
+ operation if there are pending operations,
+ unflushed caches or open handles. Default: 15
+
+Service options:
+ --hard-disconnect Skip attempting a soft disconnect
+ --soft-disconnect-timeout Cummulative soft disconnect timeout in seconds,
+ used when disconnecting existing mappings. A hard
+ disconnect will be issued when hitting the timeout
+ --service-thread-count The number of workers used when mapping or
+ unmapping images. Default: 8
+ --start-timeout The service start timeout in seconds. Default: 120
+ --map-timeout Individual image map timeout in seconds. Default: 20
+ --remap-failure-fatal If set, the service will stop when failing to remap
+ an image at start time, unmapping images that have
+ been mapped so far.
+ --adapter-monitoring-enabled If set, the service will monitor WNBD adapter WMI
+ events and remount the images when the adapter gets
+ recreated. Mainly used for development and driver
+ certification purposes.
+
+Show|List options:
+ --format plain|json|xml Output format (default: plain)
+ --pretty-format Pretty formatting (json and xml)
+
+Common options:
+ --wnbd-log-level libwnbd.dll log level
+
+)";
+
+ std::cout << usage_str;
+ generic_server_usage();
+}
+
+
+static Command cmd = None;
+
+int construct_devpath_if_missing(Config* cfg)
+{
+ // Windows doesn't allow us to request specific disk paths when mapping an
+ // image. This will just be used by rbd-wnbd and wnbd as an identifier.
+ if (cfg->devpath.empty()) {
+ if (cfg->imgname.empty()) {
+ derr << "Missing image name." << dendl;
+ return -EINVAL;
+ }
+
+ if (!cfg->poolname.empty()) {
+ cfg->devpath += cfg->poolname;
+ cfg->devpath += '/';
+ }
+ if (!cfg->nsname.empty()) {
+ cfg->devpath += cfg->nsname;
+ cfg->devpath += '/';
+ }
+
+ cfg->devpath += cfg->imgname;
+
+ if (!cfg->snapname.empty()) {
+ cfg->devpath += '@';
+ cfg->devpath += cfg->snapname;
+ }
+ }
+
+ return 0;
+}
+
+boost::intrusive_ptr<CephContext> do_global_init(
+ int argc, const char *argv[], Config *cfg)
+{
+ auto args = argv_to_vec(argc, argv);
+
+ code_environment_t code_env;
+ int flags;
+
+ switch(cmd) {
+ case Connect:
+ code_env = CODE_ENVIRONMENT_DAEMON;
+ flags = CINIT_FLAG_UNPRIVILEGED_DAEMON_DEFAULTS;
+ break;
+ case Service:
+ code_env = CODE_ENVIRONMENT_DAEMON;
+ flags = CINIT_FLAG_UNPRIVILEGED_DAEMON_DEFAULTS |
+ CINIT_FLAG_NO_MON_CONFIG |
+ CINIT_FLAG_NO_DAEMON_ACTIONS;
+ break;
+ default:
+ code_env = CODE_ENVIRONMENT_UTILITY;
+ flags = CINIT_FLAG_NO_MON_CONFIG;
+ break;
+ }
+
+ global_pre_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, code_env, flags);
+ // Avoid cluttering the console when spawning a mapping that will run
+ // in the background.
+ if (g_conf()->daemonize && cfg->parent_pipe.empty()) {
+ flags |= CINIT_FLAG_NO_DAEMON_ACTIONS;
+ }
+ auto cct = global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT,
+ code_env, flags, FALSE);
+
+ // There's no fork on Windows, we should be safe calling this anytime.
+ common_init_finish(g_ceph_context);
+ global_init_chdir(g_ceph_context);
+
+ return cct;
+}
+
+static int do_map(Config *cfg)
+{
+ int r;
+
+ librados::Rados rados;
+ librbd::RBD rbd;
+ librados::IoCtx io_ctx;
+ librbd::Image image;
+ librbd::image_info_t info;
+ HANDLE parent_pipe_handle = INVALID_HANDLE_VALUE;
+ int err = 0;
+
+ if (g_conf()->daemonize && cfg->parent_pipe.empty()) {
+ return send_map_request(get_cli_args());
+ }
+
+ dout(0) << "Mapping RBD image: " << cfg->devpath << dendl;
+
+ r = rados.init_with_context(g_ceph_context);
+ if (r < 0) {
+ derr << "rbd-wnbd: couldn't initialize rados: " << cpp_strerror(r)
+ << dendl;
+ goto close_ret;
+ }
+
+ r = rados.connect();
+ if (r < 0) {
+ derr << "rbd-wnbd: couldn't connect to rados: " << cpp_strerror(r)
+ << dendl;
+ goto close_ret;
+ }
+
+ r = rados.ioctx_create(cfg->poolname.c_str(), io_ctx);
+ if (r < 0) {
+ derr << "rbd-wnbd: couldn't create IO context: " << cpp_strerror(r)
+ << dendl;
+ goto close_ret;
+ }
+
+ io_ctx.set_namespace(cfg->nsname);
+
+ r = rbd.open(io_ctx, image, cfg->imgname.c_str());
+ if (r < 0) {
+ derr << "rbd-wnbd: couldn't open rbd image: " << cpp_strerror(r)
+ << dendl;
+ goto close_ret;
+ }
+
+ if (cfg->exclusive) {
+ r = image.lock_acquire(RBD_LOCK_MODE_EXCLUSIVE);
+ if (r < 0) {
+ derr << "rbd-wnbd: failed to acquire exclusive lock: " << cpp_strerror(r)
+ << dendl;
+ goto close_ret;
+ }
+ }
+
+ if (!cfg->snapname.empty()) {
+ r = image.snap_set(cfg->snapname.c_str());
+ if (r < 0) {
+ derr << "rbd-wnbd: couldn't use snapshot: " << cpp_strerror(r)
+ << dendl;
+ goto close_ret;
+ }
+ }
+
+ r = image.stat(info, sizeof(info));
+ if (r < 0)
+ goto close_ret;
+
+ if (info.size > _UI64_MAX) {
+ r = -EFBIG;
+ derr << "rbd-wnbd: image is too large (" << byte_u_t(info.size)
+ << ", max is " << byte_u_t(_UI64_MAX) << ")" << dendl;
+ goto close_ret;
+ }
+
+ // We're storing mapping details in the registry even for non-persistent
+ // mappings. This allows us to easily retrieve mapping details such
+ // as the rbd pool or admin socket path.
+ // We're cleaning up the registry entry when the non-persistent mapping
+ // gets disconnected or when the ceph service restarts.
+ r = save_config_to_registry(cfg);
+ if (r < 0)
+ goto close_ret;
+
+ handler = new WnbdHandler(image, cfg->devpath,
+ info.size / RBD_WNBD_BLKSIZE,
+ RBD_WNBD_BLKSIZE,
+ !cfg->snapname.empty() || cfg->readonly,
+ g_conf().get_val<bool>("rbd_cache"),
+ cfg->io_req_workers,
+ cfg->io_reply_workers);
+ r = handler->start();
+ if (r) {
+ r = r == ERROR_ALREADY_EXISTS ? -EEXIST : -EINVAL;
+ goto close_ret;
+ }
+
+ // We're informing the parent processes that the initialization
+ // was successful.
+ if (!cfg->parent_pipe.empty()) {
+ parent_pipe_handle = CreateFile(
+ cfg->parent_pipe.c_str(), GENERIC_WRITE, 0, NULL,
+ OPEN_EXISTING, 0, NULL);
+ if (parent_pipe_handle == INVALID_HANDLE_VALUE) {
+ derr << "Could not open parent pipe: " << win32_strerror(err) << dendl;
+ } else if (!WriteFile(parent_pipe_handle, "a", 1, NULL, NULL)) {
+ // TODO: consider exiting in this case. The parent didn't wait for us,
+ // maybe it was killed after a timeout.
+ err = GetLastError();
+ derr << "Failed to communicate with the parent: "
+ << win32_strerror(err) << dendl;
+ } else {
+ dout(5) << __func__ << ": submitted parent notification." << dendl;
+ }
+
+ if (parent_pipe_handle != INVALID_HANDLE_VALUE)
+ CloseHandle(parent_pipe_handle);
+
+ global_init_postfork_finish(g_ceph_context);
+ }
+
+ {
+ uint64_t watch_handle;
+ WNBDWatchCtx watch_ctx(io_ctx, handler, image, info.size);
+ r = image.update_watch(&watch_ctx, &watch_handle);
+ if (r < 0) {
+ derr << __func__ << ": update_watch failed with error: "
+ << cpp_strerror(r) << dendl;
+
+ handler->shutdown();
+ goto close_ret;
+ }
+
+ handler->wait();
+
+ r = image.update_unwatch(watch_handle);
+ if (r < 0)
+ derr << __func__ << ": update_unwatch failed with error: "
+ << cpp_strerror(r) << dendl;
+
+ handler->shutdown();
+ }
+
+close_ret:
+ // The registry record shouldn't be removed for (already) running mappings.
+ if (!cfg->persistent) {
+ dout(5) << __func__ << ": cleaning up non-persistent mapping: "
+ << cfg->devpath << dendl;
+ r = remove_config_from_registry(cfg);
+ if (r) {
+ derr << __func__ << ": could not clean up non-persistent mapping: "
+ << cfg->devpath << dendl;
+ }
+ }
+
+ std::unique_lock l{shutdown_lock};
+
+ image.close();
+ io_ctx.close();
+ rados.shutdown();
+ if (handler) {
+ delete handler;
+ handler = nullptr;
+ }
+
+ return r;
+}
+
+static int do_unmap(Config *cfg, bool unregister)
+{
+ WNBD_REMOVE_OPTIONS remove_options = {0};
+ remove_options.Flags.HardRemove = cfg->hard_disconnect;
+ remove_options.Flags.HardRemoveFallback = cfg->hard_disconnect_fallback;
+ remove_options.SoftRemoveTimeoutMs = cfg->soft_disconnect_timeout * 1000;
+ remove_options.SoftRemoveRetryIntervalMs = SOFT_REMOVE_RETRY_INTERVAL * 1000;
+
+ int err = WnbdRemoveEx(cfg->devpath.c_str(), &remove_options);
+ if (err && err != ERROR_FILE_NOT_FOUND) {
+ return -EINVAL;
+ }
+
+ if (unregister) {
+ err = remove_config_from_registry(cfg);
+ if (err) {
+ derr << "rbd-wnbd: failed to unregister device: "
+ << cfg->devpath << ". Error: " << err << dendl;
+ return -EINVAL;
+ }
+ }
+ return 0;
+}
+
+static int parse_imgpath(const std::string &imgpath, Config *cfg,
+ std::ostream *err_msg)
+{
+ std::regex pattern("^(?:([^/]+)/(?:([^/@]+)/)?)?([^@]+)(?:@([^/@]+))?$");
+ std::smatch match;
+ if (!std::regex_match(imgpath, match, pattern)) {
+ derr << "rbd-wnbd: invalid spec '" << imgpath << "'" << dendl;
+ return -EINVAL;
+ }
+
+ if (match[1].matched) {
+ cfg->poolname = match[1];
+ }
+
+ if (match[2].matched) {
+ cfg->nsname = match[2];
+ }
+
+ cfg->imgname = match[3];
+
+ if (match[4].matched)
+ cfg->snapname = match[4];
+
+ return 0;
+}
+
+static int do_list_mapped_devices(const std::string &format, bool pretty_format)
+{
+ std::unique_ptr<ceph::Formatter> f;
+ TextTable tbl;
+
+ if (format == "json") {
+ f.reset(new JSONFormatter(pretty_format));
+ } else if (format == "xml") {
+ f.reset(new XMLFormatter(pretty_format));
+ } else if (!format.empty() && format != "plain") {
+ derr << "rbd-wnbd: invalid output format: " << format << dendl;
+ return -EINVAL;
+ }
+
+ if (f) {
+ f->open_array_section("devices");
+ } else {
+ tbl.define_column("id", TextTable::LEFT, TextTable::LEFT);
+ tbl.define_column("pool", TextTable::LEFT, TextTable::LEFT);
+ tbl.define_column("namespace", TextTable::LEFT, TextTable::LEFT);
+ tbl.define_column("image", TextTable::LEFT, TextTable::LEFT);
+ tbl.define_column("snap", TextTable::LEFT, TextTable::LEFT);
+ tbl.define_column("device", TextTable::LEFT, TextTable::LEFT);
+ tbl.define_column("disk_number", TextTable::LEFT, TextTable::LEFT);
+ tbl.define_column("status", TextTable::LEFT, TextTable::LEFT);
+ }
+
+ Config cfg;
+ WNBDDiskIterator wnbd_disk_iterator;
+
+ while (wnbd_disk_iterator.get(&cfg)) {
+ const char* status = cfg.active ?
+ WNBD_STATUS_ACTIVE : WNBD_STATUS_INACTIVE;
+
+ if (f) {
+ f->open_object_section("device");
+ f->dump_int("id", cfg.pid ? cfg.pid : -1);
+ f->dump_string("device", cfg.devpath);
+ f->dump_string("pool", cfg.poolname);
+ f->dump_string("namespace", cfg.nsname);
+ f->dump_string("image", cfg.imgname);
+ f->dump_string("snap", cfg.snapname);
+ f->dump_int("disk_number", cfg.disk_number ? cfg.disk_number : -1);
+ f->dump_string("status", status);
+ f->close_section();
+ } else {
+ if (cfg.snapname.empty()) {
+ cfg.snapname = "-";
+ }
+ tbl << (cfg.pid ? cfg.pid : -1) << cfg.poolname << cfg.nsname
+ << cfg.imgname << cfg.snapname << cfg.devpath
+ << cfg.disk_number << status << TextTable::endrow;
+ }
+ }
+ int error = wnbd_disk_iterator.get_error();
+ if (error) {
+ derr << "Could not get disk list: " << error << dendl;
+ return error;
+ }
+
+ if (f) {
+ f->close_section();
+ f->flush(std::cout);
+ } else {
+ std::cout << tbl;
+ }
+
+ return 0;
+}
+
+static int do_show_mapped_device(std::string format, bool pretty_format,
+ std::string devpath)
+{
+ std::unique_ptr<ceph::Formatter> f;
+ TextTable tbl;
+
+ if (format.empty() || format == "plain") {
+ format = "json";
+ pretty_format = true;
+ }
+ if (format == "json") {
+ f.reset(new JSONFormatter(pretty_format));
+ } else if (format == "xml") {
+ f.reset(new XMLFormatter(pretty_format));
+ } else {
+ derr << "rbd-wnbd: invalid output format: " << format << dendl;
+ return -EINVAL;
+ }
+
+ Config cfg;
+ int error = load_mapping_config_from_registry(devpath, &cfg);
+ if (error) {
+ derr << "Could not load registry disk info for: "
+ << devpath << ". Error: " << error << dendl;
+ return error;
+ }
+
+ WNBD_CONNECTION_INFO conn_info = { 0 };
+ // If the device is currently disconnected but there is a persistent
+ // mapping record, we'll show that.
+ DWORD ret = WnbdShow(devpath.c_str(), &conn_info);
+ if (ret && ret != ERROR_FILE_NOT_FOUND) {
+ return -EINVAL;
+ }
+
+ auto conn_props = conn_info.Properties;
+ cfg.active = conn_info.DiskNumber > 0 && is_process_running(conn_props.Pid);
+ f->open_object_section("device");
+ f->dump_int("id", conn_props.Pid ? conn_props.Pid : -1);
+ f->dump_string("device", cfg.devpath);
+ f->dump_string("pool", cfg.poolname);
+ f->dump_string("namespace", cfg.nsname);
+ f->dump_string("image", cfg.imgname);
+ f->dump_string("snap", cfg.snapname);
+ f->dump_int("persistent", cfg.persistent);
+ f->dump_int("disk_number", conn_info.DiskNumber ? conn_info.DiskNumber : -1);
+ f->dump_string("status", cfg.active ? WNBD_STATUS_ACTIVE : WNBD_STATUS_INACTIVE);
+ f->dump_string("pnp_device_id", to_string(conn_info.PNPDeviceID));
+ f->dump_int("readonly", conn_props.Flags.ReadOnly);
+ f->dump_int("block_size", conn_props.BlockSize);
+ f->dump_int("block_count", conn_props.BlockCount);
+ f->dump_int("flush_enabled", conn_props.Flags.FlushSupported);
+ f->close_section();
+ f->flush(std::cout);
+
+ return 0;
+}
+
+static int do_stats(std::string search_devpath)
+{
+ Config cfg;
+ WNBDDiskIterator wnbd_disk_iterator;
+
+ while (wnbd_disk_iterator.get(&cfg)) {
+ if (cfg.devpath != search_devpath)
+ continue;
+
+ AdminSocketClient client = AdminSocketClient(cfg.admin_sock_path);
+ std::string output;
+ std::string result = client.do_request("{\"prefix\":\"wnbd stats\"}",
+ &output);
+ if (!result.empty()) {
+ std::cerr << "Admin socket error: " << result << std::endl;
+ return -EINVAL;
+ }
+
+ std::cout << output << std::endl;
+ return 0;
+ }
+ int error = wnbd_disk_iterator.get_error();
+ if (!error) {
+ error = -ENOENT;
+ }
+
+ derr << "Could not find the specified disk." << dendl;
+ return error;
+}
+
+static int parse_args(std::vector<const char*>& args,
+ std::ostream *err_msg,
+ Command *command, Config *cfg)
+{
+ std::string conf_file_list;
+ std::string cluster;
+ CephInitParameters iparams = ceph_argparse_early_args(
+ args, CEPH_ENTITY_TYPE_CLIENT, &cluster, &conf_file_list);
+
+ ConfigProxy config{false};
+ config->name = iparams.name;
+ config->cluster = cluster;
+
+ if (!conf_file_list.empty()) {
+ config.parse_config_files(conf_file_list.c_str(), nullptr, 0);
+ } else {
+ config.parse_config_files(nullptr, nullptr, 0);
+ }
+ config.parse_env(CEPH_ENTITY_TYPE_CLIENT);
+ config.parse_argv(args);
+ cfg->poolname = config.get_val<std::string>("rbd_default_pool");
+
+ std::vector<const char*>::iterator i;
+ std::ostringstream err;
+
+ // TODO: consider using boost::program_options like Device.cc does.
+ // This should simplify argument parsing. Also, some arguments must be tied
+ // to specific commands, for example the disconnect timeout. Luckily,
+ // this is enforced by the "rbd device" wrapper.
+ for (i = args.begin(); i != args.end(); ) {
+ if (ceph_argparse_flag(args, i, "-h", "--help", (char*)NULL)) {
+ return HELP_INFO;
+ } else if (ceph_argparse_flag(args, i, "-v", "--version", (char*)NULL)) {
+ return VERSION_INFO;
+ } else if (ceph_argparse_witharg(args, i, &cfg->devpath, "--device", (char *)NULL)) {
+ } else if (ceph_argparse_witharg(args, i, &cfg->format, err, "--format",
+ (char *)NULL)) {
+ } else if (ceph_argparse_flag(args, i, "--read-only", (char *)NULL)) {
+ cfg->readonly = true;
+ } else if (ceph_argparse_flag(args, i, "--exclusive", (char *)NULL)) {
+ cfg->exclusive = true;
+ } else if (ceph_argparse_flag(args, i, "--non-persistent", (char *)NULL)) {
+ cfg->persistent = false;
+ } else if (ceph_argparse_flag(args, i, "--pretty-format", (char *)NULL)) {
+ cfg->pretty_format = true;
+ } else if (ceph_argparse_flag(args, i, "--remap-failure-fatal", (char *)NULL)) {
+ cfg->remap_failure_fatal = true;
+ } else if (ceph_argparse_flag(args, i, "--adapter-monitoring-enabled", (char *)NULL)) {
+ cfg->adapter_monitoring_enabled = true;
+ } else if (ceph_argparse_witharg(args, i, &cfg->parent_pipe, err,
+ "--pipe-name", (char *)NULL)) {
+ if (!err.str().empty()) {
+ *err_msg << "rbd-wnbd: " << err.str();
+ return -EINVAL;
+ }
+ } else if (ceph_argparse_witharg(args, i, (int*)&cfg->wnbd_log_level,
+ err, "--wnbd-log-level", (char *)NULL)) {
+ if (!err.str().empty()) {
+ *err_msg << "rbd-wnbd: " << err.str();
+ return -EINVAL;
+ }
+ if (cfg->wnbd_log_level < 0) {
+ *err_msg << "rbd-wnbd: Invalid argument for wnbd-log-level";
+ return -EINVAL;
+ }
+ } else if (ceph_argparse_witharg(args, i, (int*)&cfg->io_req_workers,
+ err, "--io-req-workers", (char *)NULL)) {
+ if (!err.str().empty()) {
+ *err_msg << "rbd-wnbd: " << err.str();
+ return -EINVAL;
+ }
+ if (cfg->io_req_workers <= 0) {
+ *err_msg << "rbd-wnbd: Invalid argument for io-req-workers";
+ return -EINVAL;
+ }
+ } else if (ceph_argparse_witharg(args, i, (int*)&cfg->io_reply_workers,
+ err, "--io-reply-workers", (char *)NULL)) {
+ if (!err.str().empty()) {
+ *err_msg << "rbd-wnbd: " << err.str();
+ return -EINVAL;
+ }
+ if (cfg->io_reply_workers <= 0) {
+ *err_msg << "rbd-wnbd: Invalid argument for io-reply-workers";
+ return -EINVAL;
+ }
+ } else if (ceph_argparse_witharg(args, i, (int*)&cfg->service_thread_count,
+ err, "--service-thread-count", (char *)NULL)) {
+ if (!err.str().empty()) {
+ *err_msg << "rbd-wnbd: " << err.str();
+ return -EINVAL;
+ }
+ if (cfg->service_thread_count <= 0) {
+ *err_msg << "rbd-wnbd: Invalid argument for service-thread-count";
+ return -EINVAL;
+ }
+ } else if (ceph_argparse_flag(args, i, "--hard-disconnect", (char *)NULL)) {
+ cfg->hard_disconnect = true;
+ } else if (ceph_argparse_flag(args, i,
+ "--no-hard-disconnect-fallback", (char *)NULL)) {
+ cfg->hard_disconnect_fallback = false;
+ } else if (ceph_argparse_witharg(args, i,
+ (int*)&cfg->soft_disconnect_timeout,
+ err, "--soft-disconnect-timeout",
+ (char *)NULL)) {
+ if (!err.str().empty()) {
+ *err_msg << "rbd-wnbd: " << err.str();
+ return -EINVAL;
+ }
+ if (cfg->soft_disconnect_timeout < 0) {
+ *err_msg << "rbd-wnbd: Invalid argument for soft-disconnect-timeout";
+ return -EINVAL;
+ }
+ } else if (ceph_argparse_witharg(args, i,
+ (int*)&cfg->service_start_timeout,
+ err, "--start-timeout",
+ (char *)NULL)) {
+ if (!err.str().empty()) {
+ *err_msg << "rbd-wnbd: " << err.str();
+ return -EINVAL;
+ }
+ if (cfg->service_start_timeout <= 0) {
+ *err_msg << "rbd-wnbd: Invalid argument for start-timeout";
+ return -EINVAL;
+ }
+ } else if (ceph_argparse_witharg(args, i,
+ (int*)&cfg->image_map_timeout,
+ err, "--map-timeout",
+ (char *)NULL)) {
+ if (!err.str().empty()) {
+ *err_msg << "rbd-wnbd: " << err.str();
+ return -EINVAL;
+ }
+ if (cfg->image_map_timeout <= 0) {
+ *err_msg << "rbd-wnbd: Invalid argument for map-timeout";
+ return -EINVAL;
+ }
+ } else {
+ ++i;
+ }
+ }
+
+ Command cmd = None;
+ if (args.begin() != args.end()) {
+ if (strcmp(*args.begin(), "map") == 0) {
+ cmd = Connect;
+ } else if (strcmp(*args.begin(), "unmap") == 0) {
+ cmd = Disconnect;
+ } else if (strcmp(*args.begin(), "list") == 0) {
+ cmd = List;
+ } else if (strcmp(*args.begin(), "show") == 0) {
+ cmd = Show;
+ } else if (strcmp(*args.begin(), "service") == 0) {
+ cmd = Service;
+ } else if (strcmp(*args.begin(), "stats") == 0) {
+ cmd = Stats;
+ } else if (strcmp(*args.begin(), "help") == 0) {
+ return HELP_INFO;
+ } else {
+ *err_msg << "rbd-wnbd: unknown command: " << *args.begin();
+ return -EINVAL;
+ }
+ args.erase(args.begin());
+ }
+
+ if (cmd == None) {
+ *err_msg << "rbd-wnbd: must specify command";
+ return -EINVAL;
+ }
+
+ switch (cmd) {
+ case Connect:
+ case Disconnect:
+ case Show:
+ case Stats:
+ if (args.begin() == args.end()) {
+ *err_msg << "rbd-wnbd: must specify wnbd device or image-or-snap-spec";
+ return -EINVAL;
+ }
+ if (parse_imgpath(*args.begin(), cfg, err_msg) < 0) {
+ return -EINVAL;
+ }
+ args.erase(args.begin());
+ break;
+ default:
+ //shut up gcc;
+ break;
+ }
+
+ if (args.begin() != args.end()) {
+ *err_msg << "rbd-wnbd: unknown args: " << *args.begin();
+ return -EINVAL;
+ }
+
+ *command = cmd;
+ return 0;
+}
+
+static int rbd_wnbd(int argc, const char *argv[])
+{
+ Config cfg;
+ auto args = argv_to_vec(argc, argv);
+
+ // Avoid using dout before calling "do_global_init"
+ if (args.empty()) {
+ std::cout << argv[0] << ": -h or --help for usage" << std::endl;
+ exit(1);
+ }
+
+ std::ostringstream err_msg;
+ int r = parse_args(args, &err_msg, &cmd, &cfg);
+ if (r == HELP_INFO) {
+ usage();
+ return 0;
+ } else if (r == VERSION_INFO) {
+ std::cout << pretty_version_to_str() << std::endl;
+ return 0;
+ } else if (r < 0) {
+ std::cout << err_msg.str() << std::endl;
+ return r;
+ }
+
+ auto cct = do_global_init(argc, argv, &cfg);
+
+ WnbdSetLogger(WnbdHandler::LogMessage);
+ WnbdSetLogLevel(cfg.wnbd_log_level);
+
+ switch (cmd) {
+ case Connect:
+ if (construct_devpath_if_missing(&cfg)) {
+ return -EINVAL;
+ }
+ r = do_map(&cfg);
+ if (r < 0)
+ return r;
+ break;
+ case Disconnect:
+ if (construct_devpath_if_missing(&cfg)) {
+ return -EINVAL;
+ }
+ r = do_unmap(&cfg, true);
+ if (r < 0)
+ return r;
+ break;
+ case List:
+ r = do_list_mapped_devices(cfg.format, cfg.pretty_format);
+ if (r < 0)
+ return r;
+ break;
+ case Show:
+ if (construct_devpath_if_missing(&cfg)) {
+ return r;
+ }
+ r = do_show_mapped_device(cfg.format, cfg.pretty_format, cfg.devpath);
+ if (r < 0)
+ return r;
+ break;
+ case Service:
+ {
+ RBDService service(cfg.hard_disconnect, cfg.soft_disconnect_timeout,
+ cfg.service_thread_count,
+ cfg.service_start_timeout,
+ cfg.image_map_timeout,
+ cfg.remap_failure_fatal,
+ cfg.adapter_monitoring_enabled);
+ // This call will block until the service stops.
+ r = RBDService::initialize(&service);
+ if (r < 0)
+ return r;
+ break;
+ }
+ case Stats:
+ if (construct_devpath_if_missing(&cfg)) {
+ return -EINVAL;
+ }
+ return do_stats(cfg.devpath);
+ default:
+ usage();
+ break;
+ }
+
+ return 0;
+}
+
+int main(int argc, const char *argv[])
+{
+ SetConsoleCtrlHandler(console_handler_routine, true);
+ // Avoid the Windows Error Reporting dialog.
+ SetErrorMode(GetErrorMode() | SEM_NOGPFAULTERRORBOX);
+ int r = rbd_wnbd(argc, argv);
+ if (r < 0) {
+ return r;
+ }
+ return 0;
+}