summaryrefslogtreecommitdiffstats
path: root/src/blk/kernel/KernelDevice.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/blk/kernel/KernelDevice.h')
-rw-r--r--src/blk/kernel/KernelDevice.h156
1 files changed, 156 insertions, 0 deletions
diff --git a/src/blk/kernel/KernelDevice.h b/src/blk/kernel/KernelDevice.h
new file mode 100644
index 000000000..e00e31f10
--- /dev/null
+++ b/src/blk/kernel/KernelDevice.h
@@ -0,0 +1,156 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2014 Red Hat
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#ifndef CEPH_BLK_KERNELDEVICE_H
+#define CEPH_BLK_KERNELDEVICE_H
+
+#include <atomic>
+
+#include "include/types.h"
+#include "include/interval_set.h"
+#include "common/Thread.h"
+#include "include/utime.h"
+
+#include "aio/aio.h"
+#include "BlockDevice.h"
+#include "extblkdev/ExtBlkDevPlugin.h"
+
+#define RW_IO_MAX (INT_MAX & CEPH_PAGE_MASK)
+
+class KernelDevice : public BlockDevice {
+protected:
+ std::string path;
+private:
+ std::vector<int> fd_directs, fd_buffereds;
+ bool enable_wrt = true;
+ bool aio, dio;
+
+ ExtBlkDevInterfaceRef ebd_impl; // structure for retrieving compression state from extended block device
+
+ std::string devname; ///< kernel dev name (/sys/block/$devname), if any
+
+ ceph::mutex debug_lock = ceph::make_mutex("KernelDevice::debug_lock");
+ interval_set<uint64_t> debug_inflight;
+
+ std::atomic<bool> io_since_flush = {false};
+ ceph::mutex flush_mutex = ceph::make_mutex("KernelDevice::flush_mutex");
+
+ std::unique_ptr<io_queue_t> io_queue;
+ aio_callback_t discard_callback;
+ void *discard_callback_priv;
+ bool aio_stop;
+ bool discard_started;
+ bool discard_stop;
+
+ ceph::mutex discard_lock = ceph::make_mutex("KernelDevice::discard_lock");
+ ceph::condition_variable discard_cond;
+ bool discard_running = false;
+ interval_set<uint64_t> discard_queued;
+ interval_set<uint64_t> discard_finishing;
+
+ struct AioCompletionThread : public Thread {
+ KernelDevice *bdev;
+ explicit AioCompletionThread(KernelDevice *b) : bdev(b) {}
+ void *entry() override {
+ bdev->_aio_thread();
+ return NULL;
+ }
+ } aio_thread;
+
+ struct DiscardThread : public Thread {
+ KernelDevice *bdev;
+ explicit DiscardThread(KernelDevice *b) : bdev(b) {}
+ void *entry() override {
+ bdev->_discard_thread();
+ return NULL;
+ }
+ } discard_thread;
+
+ std::atomic_int injecting_crash;
+
+ virtual int _post_open() { return 0; } // hook for child implementations
+ virtual void _pre_close() { } // hook for child implementations
+
+ void _aio_thread();
+ void _discard_thread();
+ int _queue_discard(interval_set<uint64_t> &to_release);
+ bool try_discard(interval_set<uint64_t> &to_release, bool async = true) override;
+
+ int _aio_start();
+ void _aio_stop();
+
+ void _discard_start();
+ void _discard_stop();
+
+ void _aio_log_start(IOContext *ioc, uint64_t offset, uint64_t length);
+ void _aio_log_finish(IOContext *ioc, uint64_t offset, uint64_t length);
+
+ int _sync_write(uint64_t off, ceph::buffer::list& bl, bool buffered, int write_hint);
+
+ int _lock();
+
+ int direct_read_unaligned(uint64_t off, uint64_t len, char *buf);
+
+ // stalled aio debugging
+ aio_list_t debug_queue;
+ ceph::mutex debug_queue_lock = ceph::make_mutex("KernelDevice::debug_queue_lock");
+ aio_t *debug_oldest = nullptr;
+ utime_t debug_stall_since;
+ void debug_aio_link(aio_t& aio);
+ void debug_aio_unlink(aio_t& aio);
+
+ int choose_fd(bool buffered, int write_hint) const;
+
+ ceph::unique_leakable_ptr<buffer::raw> create_custom_aligned(size_t len, IOContext* ioc) const;
+
+public:
+ KernelDevice(CephContext* cct, aio_callback_t cb, void *cbpriv, aio_callback_t d_cb, void *d_cbpriv);
+
+ void aio_submit(IOContext *ioc) override;
+ void discard_drain() override;
+
+ int collect_metadata(const std::string& prefix, std::map<std::string,std::string> *pm) const override;
+ int get_devname(std::string *s) const override {
+ if (devname.empty()) {
+ return -ENOENT;
+ }
+ *s = devname;
+ return 0;
+ }
+ int get_devices(std::set<std::string> *ls) const override;
+
+ int get_ebd_state(ExtBlkDevState &state) const override;
+
+ int read(uint64_t off, uint64_t len, ceph::buffer::list *pbl,
+ IOContext *ioc,
+ bool buffered) override;
+ int aio_read(uint64_t off, uint64_t len, ceph::buffer::list *pbl,
+ IOContext *ioc) override;
+ int read_random(uint64_t off, uint64_t len, char *buf, bool buffered) override;
+
+ int write(uint64_t off, ceph::buffer::list& bl, bool buffered, int write_hint = WRITE_LIFE_NOT_SET) override;
+ int aio_write(uint64_t off, ceph::buffer::list& bl,
+ IOContext *ioc,
+ bool buffered,
+ int write_hint = WRITE_LIFE_NOT_SET) override;
+ int flush() override;
+ int _discard(uint64_t offset, uint64_t len);
+
+ // for managing buffered readers/writers
+ int invalidate_cache(uint64_t off, uint64_t len) override;
+ int open(const std::string& path) override;
+ void close() override;
+};
+
+#endif