summaryrefslogtreecommitdiffstats
path: root/src/os/bluestore/bluefs_types.h
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/os/bluestore/bluefs_types.h320
1 files changed, 320 insertions, 0 deletions
diff --git a/src/os/bluestore/bluefs_types.h b/src/os/bluestore/bluefs_types.h
new file mode 100644
index 000000000..b53000188
--- /dev/null
+++ b/src/os/bluestore/bluefs_types.h
@@ -0,0 +1,320 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+#ifndef CEPH_OS_BLUESTORE_BLUEFS_TYPES_H
+#define CEPH_OS_BLUESTORE_BLUEFS_TYPES_H
+
+#include <optional>
+
+#include "bluestore_types.h"
+#include "include/utime.h"
+#include "include/encoding.h"
+#include "include/denc.h"
+
+class bluefs_extent_t {
+public:
+ uint64_t offset = 0;
+ uint32_t length = 0;
+ uint8_t bdev;
+
+ bluefs_extent_t(uint8_t b = 0, uint64_t o = 0, uint32_t l = 0)
+ : offset(o), length(l), bdev(b) {}
+
+ uint64_t end() const { return offset + length; }
+ DENC(bluefs_extent_t, v, p) {
+ DENC_START(1, 1, p);
+ denc_lba(v.offset, p);
+ denc_varint_lowz(v.length, p);
+ denc(v.bdev, p);
+ DENC_FINISH(p);
+ }
+
+ void dump(ceph::Formatter *f) const;
+ static void generate_test_instances(std::list<bluefs_extent_t*>&);
+};
+WRITE_CLASS_DENC(bluefs_extent_t)
+
+std::ostream& operator<<(std::ostream& out, const bluefs_extent_t& e);
+
+struct bluefs_fnode_delta_t {
+ uint64_t ino;
+ uint64_t size;
+ utime_t mtime;
+ uint64_t offset; // Contains offset in file of extents.
+ // Equal to 'allocated' when created.
+ // Used for consistency checking.
+ mempool::bluefs::vector<bluefs_extent_t> extents;
+
+ DENC(bluefs_fnode_delta_t, v, p) {
+ DENC_START(1, 1, p);
+ denc_varint(v.ino, p);
+ denc_varint(v.size, p);
+ denc(v.mtime, p);
+ denc(v.offset, p);
+ denc(v.extents, p);
+ DENC_FINISH(p);
+ }
+};
+WRITE_CLASS_DENC(bluefs_fnode_delta_t)
+
+std::ostream& operator<<(std::ostream& out, const bluefs_fnode_delta_t& delta);
+
+struct bluefs_fnode_t {
+ uint64_t ino;
+ uint64_t size;
+ utime_t mtime;
+ uint8_t __unused__; // was prefer_bdev
+ mempool::bluefs::vector<bluefs_extent_t> extents;
+
+ // precalculated logical offsets for extents vector entries
+ // allows fast lookup for extent index by the offset value via upper_bound()
+ mempool::bluefs::vector<uint64_t> extents_index;
+
+ uint64_t allocated;
+ uint64_t allocated_commited;
+
+ bluefs_fnode_t() : ino(0), size(0), __unused__(0), allocated(0), allocated_commited(0) {}
+
+ uint64_t get_allocated() const {
+ return allocated;
+ }
+
+ void recalc_allocated() {
+ allocated = 0;
+ extents_index.reserve(extents.size());
+ for (auto& p : extents) {
+ extents_index.emplace_back(allocated);
+ allocated += p.length;
+ }
+ allocated_commited = allocated;
+ }
+
+ DENC_HELPERS
+ void bound_encode(size_t& p) const {
+ _denc_friend(*this, p);
+ }
+ void encode(ceph::buffer::list::contiguous_appender& p) const {
+ DENC_DUMP_PRE(bluefs_fnode_t);
+ _denc_friend(*this, p);
+ }
+ void decode(ceph::buffer::ptr::const_iterator& p) {
+ _denc_friend(*this, p);
+ recalc_allocated();
+ }
+ template<typename T, typename P>
+ friend std::enable_if_t<std::is_same_v<bluefs_fnode_t, std::remove_const_t<T>>>
+ _denc_friend(T& v, P& p) {
+ DENC_START(1, 1, p);
+ denc_varint(v.ino, p);
+ denc_varint(v.size, p);
+ denc(v.mtime, p);
+ denc(v.__unused__, p);
+ denc(v.extents, p);
+ DENC_FINISH(p);
+ }
+
+ void reset_delta() {
+ allocated_commited = allocated;
+ }
+ void claim_extents(mempool::bluefs::vector<bluefs_extent_t>& extents) {
+ for (const auto& p : extents) {
+ append_extent(p);
+ }
+ extents.clear();
+ }
+ void append_extent(const bluefs_extent_t& ext) {
+ if (!extents.empty() &&
+ extents.back().end() == ext.offset &&
+ extents.back().bdev == ext.bdev &&
+ (uint64_t)extents.back().length + (uint64_t)ext.length < 0xffffffff) {
+ extents.back().length += ext.length;
+ } else {
+ extents_index.emplace_back(allocated);
+ extents.push_back(ext);
+ }
+ allocated += ext.length;
+ }
+
+ void pop_front_extent() {
+ auto it = extents.begin();
+ allocated -= it->length;
+ extents_index.erase(extents_index.begin());
+ for (auto& i: extents_index) {
+ i -= it->length;
+ }
+ extents.erase(it);
+ }
+
+ void swap_extents(bluefs_fnode_t& other) {
+ other.extents.swap(extents);
+ other.extents_index.swap(extents_index);
+ std::swap(allocated, other.allocated);
+ std::swap(allocated_commited, other.allocated_commited);
+ }
+ void clear_extents() {
+ extents_index.clear();
+ extents.clear();
+ allocated = 0;
+ allocated_commited = 0;
+ }
+
+ mempool::bluefs::vector<bluefs_extent_t>::iterator seek(
+ uint64_t off, uint64_t *x_off);
+ bluefs_fnode_delta_t* make_delta(bluefs_fnode_delta_t* delta);
+
+ void dump(ceph::Formatter *f) const;
+ static void generate_test_instances(std::list<bluefs_fnode_t*>& ls);
+
+};
+WRITE_CLASS_DENC(bluefs_fnode_t)
+
+std::ostream& operator<<(std::ostream& out, const bluefs_fnode_t& file);
+
+struct bluefs_layout_t {
+ unsigned shared_bdev = 0; ///< which bluefs bdev we are sharing
+ bool dedicated_db = false; ///< whether block.db is present
+ bool dedicated_wal = false; ///< whether block.wal is present
+
+ bool single_shared_device() const {
+ return !dedicated_db && !dedicated_wal;
+ }
+
+ bool operator==(const bluefs_layout_t& other) const {
+ return shared_bdev == other.shared_bdev &&
+ dedicated_db == other.dedicated_db &&
+ dedicated_wal == other.dedicated_wal;
+ }
+
+ void encode(ceph::buffer::list& bl) const;
+ void decode(ceph::buffer::list::const_iterator& p);
+ void dump(ceph::Formatter *f) const;
+};
+WRITE_CLASS_ENCODER(bluefs_layout_t)
+
+struct bluefs_super_t {
+ uuid_d uuid; ///< unique to this bluefs instance
+ uuid_d osd_uuid; ///< matches the osd that owns us
+ uint64_t version;
+ uint32_t block_size;
+
+ bluefs_fnode_t log_fnode;
+
+ std::optional<bluefs_layout_t> memorized_layout;
+
+ bluefs_super_t()
+ : version(0),
+ block_size(4096) { }
+
+ uint64_t block_mask() const {
+ return ~((uint64_t)block_size - 1);
+ }
+
+ void encode(ceph::buffer::list& bl) const;
+ void decode(ceph::buffer::list::const_iterator& p);
+ void dump(ceph::Formatter *f) const;
+ static void generate_test_instances(std::list<bluefs_super_t*>& ls);
+};
+WRITE_CLASS_ENCODER(bluefs_super_t)
+
+std::ostream& operator<<(std::ostream&, const bluefs_super_t& s);
+
+
+struct bluefs_transaction_t {
+ typedef enum {
+ OP_NONE = 0,
+ OP_INIT, ///< initial (empty) file system marker
+ OP_ALLOC_ADD, ///< OBSOLETE: add extent to available block storage (extent)
+ OP_ALLOC_RM, ///< OBSOLETE: remove extent from available block storage (extent)
+ OP_DIR_LINK, ///< (re)set a dir entry (dirname, filename, ino)
+ OP_DIR_UNLINK, ///< remove a dir entry (dirname, filename)
+ OP_DIR_CREATE, ///< create a dir (dirname)
+ OP_DIR_REMOVE, ///< remove a dir (dirname)
+ OP_FILE_UPDATE, ///< set/update file metadata (file)
+ OP_FILE_REMOVE, ///< remove file (ino)
+ OP_JUMP, ///< jump the seq # and offset
+ OP_JUMP_SEQ, ///< jump the seq #
+ OP_FILE_UPDATE_INC, ///< incremental update file metadata (file)
+ } op_t;
+
+ uuid_d uuid; ///< fs uuid
+ uint64_t seq; ///< sequence number
+ ceph::buffer::list op_bl; ///< encoded transaction ops
+
+ bluefs_transaction_t() : seq(0) {}
+
+ void clear() {
+ *this = bluefs_transaction_t();
+ }
+ bool empty() const {
+ return op_bl.length() == 0;
+ }
+
+ void op_init() {
+ using ceph::encode;
+ encode((__u8)OP_INIT, op_bl);
+ }
+ void op_dir_create(std::string_view dir) {
+ using ceph::encode;
+ encode((__u8)OP_DIR_CREATE, op_bl);
+ encode(dir, op_bl);
+ }
+ void op_dir_remove(std::string_view dir) {
+ using ceph::encode;
+ encode((__u8)OP_DIR_REMOVE, op_bl);
+ encode(dir, op_bl);
+ }
+ void op_dir_link(std::string_view dir, std::string_view file, uint64_t ino) {
+ using ceph::encode;
+ encode((__u8)OP_DIR_LINK, op_bl);
+ encode(dir, op_bl);
+ encode(file, op_bl);
+ encode(ino, op_bl);
+ }
+ void op_dir_unlink(std::string_view dir, std::string_view file) {
+ using ceph::encode;
+ encode((__u8)OP_DIR_UNLINK, op_bl);
+ encode(dir, op_bl);
+ encode(file, op_bl);
+ }
+ void op_file_update(bluefs_fnode_t& file) {
+ using ceph::encode;
+ encode((__u8)OP_FILE_UPDATE, op_bl);
+ encode(file, op_bl);
+ file.reset_delta();
+ }
+ /* streams update to bufferlist and clears update state */
+ void op_file_update_inc(bluefs_fnode_t& file) {
+ using ceph::encode;
+ bluefs_fnode_delta_t delta;
+ file.make_delta(&delta); //also resets delta to zero
+ encode((__u8)OP_FILE_UPDATE_INC, op_bl);
+ encode(delta, op_bl);
+ }
+ void op_file_remove(uint64_t ino) {
+ using ceph::encode;
+ encode((__u8)OP_FILE_REMOVE, op_bl);
+ encode(ino, op_bl);
+ }
+ void op_jump(uint64_t next_seq, uint64_t offset) {
+ using ceph::encode;
+ encode((__u8)OP_JUMP, op_bl);
+ encode(next_seq, op_bl);
+ encode(offset, op_bl);
+ }
+ void op_jump_seq(uint64_t next_seq) {
+ using ceph::encode;
+ encode((__u8)OP_JUMP_SEQ, op_bl);
+ encode(next_seq, op_bl);
+ }
+ void claim_ops(bluefs_transaction_t& from) {
+ op_bl.claim_append(from.op_bl);
+ }
+
+ void encode(ceph::buffer::list& bl) const;
+ void decode(ceph::buffer::list::const_iterator& p);
+ void dump(ceph::Formatter *f) const;
+ static void generate_test_instances(std::list<bluefs_transaction_t*>& ls);
+};
+WRITE_CLASS_ENCODER(bluefs_transaction_t)
+
+std::ostream& operator<<(std::ostream& out, const bluefs_transaction_t& t);
+#endif