1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
|
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
* Copyright (C) 2016 Red Hat Inc.
*
* This is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License version 2.1, as published by the Free Software
* Foundation. See file COPYING.
*
*/
#pragma once
#include <ostream>
#include <map>
#include <vector>
#include "boost/variant.hpp"
#include "dmclock/src/dmclock_server.h"
#include "osd/scheduler/OpScheduler.h"
#include "common/config.h"
#include "include/cmp.h"
#include "common/ceph_context.h"
#include "common/mClockPriorityQueue.h"
#include "osd/scheduler/OpSchedulerItem.h"
namespace ceph::osd::scheduler {
constexpr uint64_t default_min = 1;
constexpr uint64_t default_max = 999999;
using client_id_t = uint64_t;
using profile_id_t = uint64_t;
struct client_profile_id_t {
client_id_t client_id;
profile_id_t profile_id;
};
WRITE_EQ_OPERATORS_2(client_profile_id_t, client_id, profile_id)
WRITE_CMP_OPERATORS_2(client_profile_id_t, client_id, profile_id)
struct scheduler_id_t {
op_scheduler_class class_id;
client_profile_id_t client_profile_id;
};
WRITE_EQ_OPERATORS_2(scheduler_id_t, class_id, client_profile_id)
WRITE_CMP_OPERATORS_2(scheduler_id_t, class_id, client_profile_id)
/**
* Scheduler implementation based on mclock.
*
* TODO: explain configs
*/
class mClockScheduler : public OpScheduler, md_config_obs_t {
CephContext *cct;
const uint32_t num_shards;
bool is_rotational;
double max_osd_capacity;
double osd_mclock_cost_per_io;
double osd_mclock_cost_per_byte;
std::string mclock_profile = "high_client_ops";
struct ClientAllocs {
uint64_t res;
uint64_t wgt;
uint64_t lim;
ClientAllocs(uint64_t _res, uint64_t _wgt, uint64_t _lim) {
update(_res, _wgt, _lim);
}
inline void update(uint64_t _res, uint64_t _wgt, uint64_t _lim) {
res = _res;
wgt = _wgt;
lim = _lim;
}
};
std::array<
ClientAllocs,
static_cast<size_t>(op_scheduler_class::client) + 1
> client_allocs = {
// Placeholder, get replaced with configured values
ClientAllocs(1, 1, 1), // background_recovery
ClientAllocs(1, 1, 1), // background_best_effort
ClientAllocs(1, 1, 1), // immediate (not used)
ClientAllocs(1, 1, 1) // client
};
class ClientRegistry {
std::array<
crimson::dmclock::ClientInfo,
static_cast<size_t>(op_scheduler_class::immediate)
> internal_client_infos = {
// Placeholder, gets replaced with configured values
crimson::dmclock::ClientInfo(1, 1, 1),
crimson::dmclock::ClientInfo(1, 1, 1)
};
crimson::dmclock::ClientInfo default_external_client_info = {1, 1, 1};
std::map<client_profile_id_t,
crimson::dmclock::ClientInfo> external_client_infos;
const crimson::dmclock::ClientInfo *get_external_client(
const client_profile_id_t &client) const;
public:
void update_from_config(const ConfigProxy &conf);
const crimson::dmclock::ClientInfo *get_info(
const scheduler_id_t &id) const;
} client_registry;
using mclock_queue_t = crimson::dmclock::PullPriorityQueue<
scheduler_id_t,
OpSchedulerItem,
true,
true,
2>;
mclock_queue_t scheduler;
std::list<OpSchedulerItem> immediate;
static scheduler_id_t get_scheduler_id(const OpSchedulerItem &item) {
return scheduler_id_t{
item.get_scheduler_class(),
client_profile_id_t{
item.get_owner(),
0
}
};
}
public:
mClockScheduler(CephContext *cct, uint32_t num_shards, bool is_rotational);
~mClockScheduler() override;
// Set the max osd capacity in iops
void set_max_osd_capacity();
// Set the cost per io for the osd
void set_osd_mclock_cost_per_io();
// Set the cost per byte for the osd
void set_osd_mclock_cost_per_byte();
// Set the mclock profile type to enable
void set_mclock_profile();
// Get the active mclock profile
std::string get_mclock_profile();
// Set "balanced" profile allocations
void set_balanced_profile_allocations();
// Set "high_recovery_ops" profile allocations
void set_high_recovery_ops_profile_allocations();
// Set "high_client_ops" profile allocations
void set_high_client_ops_profile_allocations();
// Set the mclock related config params based on the profile
void enable_mclock_profile_settings();
// Set mclock config parameter based on allocations
void set_profile_config();
// Calculate scale cost per item
int calc_scaled_cost(int cost);
// Enqueue op in the back of the regular queue
void enqueue(OpSchedulerItem &&item) final;
// Enqueue the op in the front of the regular queue
void enqueue_front(OpSchedulerItem &&item) final;
// Return an op to be dispatch
WorkItem dequeue() final;
// Returns if the queue is empty
bool empty() const final {
return immediate.empty() && scheduler.empty();
}
// Formatted output of the queue
void dump(ceph::Formatter &f) const final;
void print(std::ostream &ostream) const final {
ostream << "mClockScheduler";
}
// Update data associated with the modified mclock config key(s)
void update_configuration() final;
const char** get_tracked_conf_keys() const final;
void handle_conf_change(const ConfigProxy& conf,
const std::set<std::string> &changed) final;
};
}
|