summaryrefslogtreecommitdiffstats
path: root/src/crimson/os/seastore/lba_manager/btree/lba_btree_node.h
blob: b6f33a1ae87c51f00fd65765447e735e6382457a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
// vim: ts=8 sw=2 smarttab

#pragma once

#include <sys/mman.h>
#include <memory>
#include <string.h>

#include "crimson/common/log.h"
#include "crimson/os/seastore/lba_manager/btree/btree_range_pin.h"
#include "crimson/os/seastore/lba_manager.h"

namespace crimson::os::seastore::lba_manager::btree {

struct op_context_t {
  Cache &cache;
  btree_pin_set_t &pins;
  Transaction &trans;
};

/**
 * lba_map_val_t
 *
 * struct representing a single lba mapping
 */
struct lba_map_val_t {
  extent_len_t len = 0;  ///< length of mapping
  paddr_t paddr;         ///< physical addr of mapping
  uint32_t refcount = 0; ///< refcount
  uint32_t checksum = 0; ///< checksum of original block written at paddr (TODO)

  lba_map_val_t(
    extent_len_t len,
    paddr_t paddr,
    uint32_t refcount,
    uint32_t checksum)
    : len(len), paddr(paddr), refcount(refcount), checksum(checksum) {}
};

class BtreeLBAPin;
using BtreeLBAPinRef = std::unique_ptr<BtreeLBAPin>;

/**
 * LBANode
 *
 * Base class enabling recursive lookup between internal and leaf nodes.
 */
struct LBANode : CachedExtent {
  using LBANodeRef = TCachedExtentRef<LBANode>;
  using lookup_range_ertr = LBAManager::get_mapping_ertr;
  using lookup_range_ret = LBAManager::get_mapping_ret;

  btree_range_pin_t pin;

  LBANode(ceph::bufferptr &&ptr) : CachedExtent(std::move(ptr)), pin(this) {}
  LBANode(const LBANode &rhs)
    : CachedExtent(rhs), pin(rhs.pin, this) {}

  virtual lba_node_meta_t get_node_meta() const = 0;

  /**
   * lookup
   *
   * Returns the node at the specified depth responsible
   * for laddr
   */
  using lookup_ertr = crimson::errorator<
    crimson::ct_error::input_output_error>;
  using lookup_ret = lookup_ertr::future<LBANodeRef>;
  virtual lookup_ret lookup(
    op_context_t c,
    laddr_t addr,
    depth_t depth) = 0;

  /**
   * lookup_range
   *
   * Returns mappings within range [addr, addr+len)
   */
  virtual lookup_range_ret lookup_range(
    op_context_t c,
    laddr_t addr,
    extent_len_t len) = 0;

  /**
   * insert
   *
   * Recursively inserts into subtree rooted at *this.  Caller
   * must already have handled splitting if at_max_capacity().
   *
   * Precondition: !at_max_capacity()
   */
  using insert_ertr = crimson::errorator<
    crimson::ct_error::input_output_error
    >;
  using insert_ret = insert_ertr::future<LBAPinRef>;
  virtual insert_ret insert(
    op_context_t c,
    laddr_t laddr,
    lba_map_val_t val) = 0;

  /**
   * find_hole
   *
   * Finds minimum hole of size len in [min, max)
   *
   * @return addr of hole, L_ADDR_NULL if unfound
   */
  using find_hole_ertr = crimson::errorator<
    crimson::ct_error::input_output_error>;
  using find_hole_ret = find_hole_ertr::future<laddr_t>;
  virtual find_hole_ret find_hole(
    op_context_t c,
    laddr_t min,
    laddr_t max,
    extent_len_t len) = 0;

  /**
   * scan_mappings
   *
   * Call f for all mappings in [begin, end)
   */
  using scan_mappings_ertr = LBAManager::scan_mappings_ertr;
  using scan_mappings_ret = LBAManager::scan_mappings_ret;
  using scan_mappings_func_t = LBAManager::scan_mappings_func_t;
  virtual scan_mappings_ret scan_mappings(
    op_context_t c,
    laddr_t begin,
    laddr_t end,
    scan_mappings_func_t &f) = 0;

  using scan_mapped_space_ertr = LBAManager::scan_mapped_space_ertr;
  using scan_mapped_space_ret = LBAManager::scan_mapped_space_ret;
  using scan_mapped_space_func_t = LBAManager::scan_mapped_space_func_t;
  virtual scan_mapped_space_ret scan_mapped_space(
    op_context_t c,
    scan_mapped_space_func_t &f) = 0;

  /**
   * mutate_mapping
   *
   * Lookups up laddr, calls f on value. If f returns a value, inserts it.
   * If it returns nullopt, removes the value.
   * Caller must already have merged if at_min_capacity().
   *
   * Recursive calls use mutate_mapping_internal.
   *
   * Precondition: !at_min_capacity()
   */
  using mutate_mapping_ertr = crimson::errorator<
    crimson::ct_error::enoent,            ///< mapping does not exist
    crimson::ct_error::input_output_error
    >;
  using mutate_mapping_ret = mutate_mapping_ertr::future<
    lba_map_val_t>;
  using mutate_func_t = std::function<
    lba_map_val_t(const lba_map_val_t &v)
    >;
  virtual mutate_mapping_ret mutate_mapping(
    op_context_t c,
    laddr_t laddr,
    mutate_func_t &&f) = 0;
  virtual mutate_mapping_ret mutate_mapping_internal(
    op_context_t c,
    laddr_t laddr,
    bool is_root,
    mutate_func_t &&f) = 0;

  /**
   * mutate_internal_address
   *
   * Looks up internal node mapping at laddr, depth and
   * updates the mapping to paddr.  Returns previous paddr
   * (for debugging purposes).
   */
  using mutate_internal_address_ertr = crimson::errorator<
    crimson::ct_error::enoent,            ///< mapping does not exist
    crimson::ct_error::input_output_error
    >;
  using mutate_internal_address_ret = mutate_internal_address_ertr::future<
    paddr_t>;
  virtual mutate_internal_address_ret mutate_internal_address(
    op_context_t c,
    depth_t depth,
    laddr_t laddr,
    paddr_t paddr) = 0;

  /**
   * make_split_children
   *
   * Generates appropriately typed left and right nodes formed from the
   * contents of *this.
   *
   * Returns <left, right, pivot> where pivot is the first value of right.
   */
  virtual std::tuple<
    LBANodeRef,
    LBANodeRef,
    laddr_t>
  make_split_children(
    op_context_t c) = 0;

  /**
   * make_full_merge
   *
   * Returns a single node formed from merging *this and right.
   * Precondition: at_min_capacity() && right.at_min_capacity()
   */
  virtual LBANodeRef make_full_merge(
    op_context_t c,
    LBANodeRef &right) = 0;

  /**
   * make_balanced
   *
   * Returns nodes formed by balancing the contents of *this and right.
   *
   * Returns <left, right, pivot> where pivot is the first value of right.
   */
  virtual std::tuple<
    LBANodeRef,
    LBANodeRef,
    laddr_t>
  make_balanced(
    op_context_t c,
    LBANodeRef &right,
    bool prefer_left) = 0;

  virtual bool at_max_capacity() const = 0;
  virtual bool at_min_capacity() const = 0;

  virtual ~LBANode() = default;

  void on_delta_write(paddr_t record_block_offset) final {
    // All in-memory relative addrs are necessarily record-relative
    assert(get_prior_instance());
    pin.take_pin(get_prior_instance()->cast<LBANode>()->pin);
    resolve_relative_addrs(record_block_offset);
  }

  void on_initial_write() final {
    // All in-memory relative addrs are necessarily block-relative
    resolve_relative_addrs(get_paddr());
  }

  void on_clean_read() final {
    // From initial write of block, relative addrs are necessarily block-relative
    resolve_relative_addrs(get_paddr());
  }

  virtual void resolve_relative_addrs(paddr_t base) = 0;
};
using LBANodeRef = LBANode::LBANodeRef;

/**
 * get_lba_btree_extent
 *
 * Fetches node at depth of the appropriate type.
 */
Cache::get_extent_ertr::future<LBANodeRef> get_lba_btree_extent(
  op_context_t c, ///< [in] context structure
  depth_t depth,  ///< [in] depth of node to fetch
  paddr_t offset, ///< [in] physical addr of node
  paddr_t base    ///< [in] depending on user, block addr or record addr
                  ///       in case offset is relative
);

}