summaryrefslogtreecommitdiffstats
path: root/src/crush/CrushTester.h
blob: 1bbc01a70c88906b1ab0aa7c134187abd22d6221 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
// vim: ts=8 sw=2 smarttab

#ifndef CEPH_CRUSH_TESTER_H
#define CEPH_CRUSH_TESTER_H

#include "crush/CrushWrapper.h"

#include <fstream>

class CrushTester {
  CrushWrapper& crush;
  std::ostream& err;

  std::map<int, int> device_weight;
  int min_rule, max_rule;
  int ruleset;
  int min_x, max_x;
  int min_rep, max_rep;
  int64_t pool_id;

  int num_batches;
  bool use_crush;

  float mark_down_device_ratio;
  float mark_down_bucket_ratio;

  bool output_utilization;
  bool output_utilization_all;
  bool output_statistics;
  bool output_mappings;
  bool output_bad_mappings;
  bool output_choose_tries;

  bool output_data_file;
  bool output_csv;

  std::string output_data_file_name;

/*
 * mark a ratio of devices down, can be used to simulate placement distributions
 * under degrated cluster conditions
 */
  void adjust_weights(std::vector<__u32>& weight);

  /*
   * Get the maximum number of devices that could be selected to satisfy ruleno.
   */
  int get_maximum_affected_by_rule(int ruleno);

  /*
   * for maps where in devices have non-sequential id numbers, return a mapping of device id
   * to a sequential id number. For example, if we have devices with id's 0 1 4 5 6 return a map
   * where:
   *     0 = 0
   *     1 = 1
   *     4 = 2
   *     5 = 3
   *     6 = 4
   *
   * which can help make post-processing easier
   */
  std::map<int,int> get_collapsed_mapping();

  /*
   * Essentially a re-implementation of CRUSH. Given a vector of devices
   * check that the vector represents a valid placement for a given ruleno.
   */
  bool check_valid_placement(int ruleno, std::vector<int> in, const std::vector<__u32>& weight);

  /*
   * Generate a random selection of devices which satisfies ruleno. Essentially a
   * monte-carlo simulator for CRUSH placements which can be used to compare the
   * statistical distribution of the CRUSH algorithm to a random number generator
   */
  int random_placement(int ruleno, std::vector<int>& out, int maxout, std::vector<__u32>& weight);

  // scaffolding to store data for off-line processing
   struct tester_data_set {
     std::vector<std::string> device_utilization;
     std::vector<std::string> device_utilization_all;
     std::vector<std::string> placement_information;
     std::vector<std::string> batch_device_utilization_all;
     std::vector<std::string> batch_device_expected_utilization_all;
     std::map<int, float> proportional_weights;
     std::map<int, float> proportional_weights_all;
     std::map<int, float> absolute_weights;
   } ;

  void write_to_csv(std::ofstream& csv_file, std::vector<std::string>& payload)
   {
     if (csv_file.good())
       for (std::vector<std::string>::iterator it = payload.begin(); it != payload.end(); ++it)
         csv_file << (*it);
   }

  void write_to_csv(std::ofstream& csv_file, std::map<int, float>& payload)
   {
     if (csv_file.good())
       for (std::map<int, float>::iterator it = payload.begin(); it != payload.end(); ++it)
         csv_file << (*it).first << ',' << (*it).second << std::endl;
   }

   void write_data_set_to_csv(std::string user_tag, tester_data_set& tester_data)
   {

     std::ofstream device_utilization_file((user_tag + (std::string)"-device_utilization.csv").c_str());
     std::ofstream device_utilization_all_file((user_tag + (std::string)"-device_utilization_all.csv").c_str());
     std::ofstream placement_information_file((user_tag + (std::string)"-placement_information.csv").c_str());
     std::ofstream proportional_weights_file((user_tag + (std::string)"-proportional_weights.csv").c_str());
     std::ofstream proportional_weights_all_file((user_tag + (std::string)"-proportional_weights_all.csv").c_str());
     std::ofstream absolute_weights_file((user_tag + (std::string)"-absolute_weights.csv").c_str());

     // write the headers
     device_utilization_file << "Device ID, Number of Objects Stored, Number of Objects Expected" << std::endl;
     device_utilization_all_file << "Device ID, Number of Objects Stored, Number of Objects Expected" << std::endl;
     proportional_weights_file << "Device ID, Proportional Weight" << std::endl;
     proportional_weights_all_file << "Device ID, Proportional Weight" << std::endl;
     absolute_weights_file << "Device ID, Absolute Weight" << std::endl;

     placement_information_file << "Input";
     for (int i = 0; i < max_rep; i++) {
       placement_information_file << ", OSD" << i;
     }
     placement_information_file << std::endl;

     write_to_csv(device_utilization_file, tester_data.device_utilization);
     write_to_csv(device_utilization_all_file, tester_data.device_utilization_all);
     write_to_csv(placement_information_file, tester_data.placement_information);
     write_to_csv(proportional_weights_file, tester_data.proportional_weights);
     write_to_csv(proportional_weights_all_file, tester_data.proportional_weights_all);
     write_to_csv(absolute_weights_file, tester_data.absolute_weights);

     device_utilization_file.close();
     device_utilization_all_file.close();
     placement_information_file.close();
     proportional_weights_file.close();
     absolute_weights_file.close();

     if (num_batches > 1) {
       std::ofstream batch_device_utilization_all_file ((user_tag + (std::string)"-batch_device_utilization_all.csv").c_str());
       std::ofstream batch_device_expected_utilization_all_file ((user_tag + (std::string)"-batch_device_expected_utilization_all.csv").c_str());

       batch_device_utilization_all_file << "Batch Round";
       for (unsigned i = 0; i < tester_data.device_utilization.size(); i++) {
         batch_device_utilization_all_file << ", Objects Stored on OSD" << i;
       }
       batch_device_utilization_all_file << std::endl;

       batch_device_expected_utilization_all_file << "Batch Round";
       for (unsigned i = 0; i < tester_data.device_utilization.size(); i++) {
         batch_device_expected_utilization_all_file << ", Objects Expected on OSD" << i;
       }
       batch_device_expected_utilization_all_file << std::endl;

       write_to_csv(batch_device_utilization_all_file, tester_data.batch_device_utilization_all);
       write_to_csv(batch_device_expected_utilization_all_file, tester_data.batch_device_expected_utilization_all);
       batch_device_expected_utilization_all_file.close();
       batch_device_utilization_all_file.close();
     }
   }

   void write_integer_indexed_vector_data_string(std::vector<std::string> &dst, int index, std::vector<int> vector_data);
   void write_integer_indexed_vector_data_string(std::vector<std::string> &dst, int index, std::vector<float> vector_data);
   void write_integer_indexed_scalar_data_string(std::vector<std::string> &dst, int index, int scalar_data);
   void write_integer_indexed_scalar_data_string(std::vector<std::string> &dst, int index, float scalar_data);

public:
  CrushTester(CrushWrapper& c, std::ostream& eo)
    : crush(c), err(eo),
      min_rule(-1), max_rule(-1),
      ruleset(-1),
      min_x(-1), max_x(-1),
      min_rep(-1), max_rep(-1),
      pool_id(-1),
      num_batches(1),
      use_crush(true),
      mark_down_device_ratio(0.0),
      mark_down_bucket_ratio(1.0),
      output_utilization(false),
      output_utilization_all(false),
      output_statistics(false),
      output_mappings(false),
      output_bad_mappings(false),
      output_choose_tries(false),
      output_data_file(false),
      output_csv(false),
      output_data_file_name("")

  { }

  void set_output_data_file_name(std::string name) {
    output_data_file_name = name;
  }
  std::string get_output_data_file_name() const {
    return output_data_file_name;
  }

  void set_output_data_file(bool b) {
     output_data_file = b;
  }
  bool get_output_data_file() const {
    return output_data_file;
  }

  void set_output_csv(bool b) {
     output_csv = b;
  }
  bool get_output_csv() const {
    return output_csv;
  }

  void set_output_utilization(bool b) {
    output_utilization = b;
  }
  bool get_output_utilization() const {
    return output_utilization;
  }

  void set_output_utilization_all(bool b) {
    output_utilization_all = b;
  }
  bool get_output_utilization_all() const {
    return output_utilization_all;
  }

  void set_output_statistics(bool b) {
    output_statistics = b;
  }
  bool get_output_statistics() const {
    return output_statistics;
  }

  void set_output_mappings(bool b) {
    output_mappings = b;
  }
  bool get_output_mappings() const {
    return output_mappings;
  }

  void set_output_bad_mappings(bool b) {
    output_bad_mappings = b;
  }
  bool get_output_bad_mappings() const {
    return output_bad_mappings;
  }

  void set_output_choose_tries(bool b) {
    output_choose_tries = b;
  }
  bool get_output_choose_tries() const {
    return output_choose_tries;
  }

  void set_batches(int b) {
    num_batches = b;
  }
  int get_batches() const {
    return num_batches;
  }

  void set_random_placement() {
    use_crush = false;
  }
  bool get_random_placement() const {
    return use_crush == false;
  }

  void set_bucket_down_ratio(float bucket_ratio) {
    mark_down_bucket_ratio = bucket_ratio;
  }
  float get_bucket_down_ratio() const {
    return mark_down_bucket_ratio;
  }

  void set_device_down_ratio(float device_ratio) {
    mark_down_device_ratio = device_ratio;
  }
  float set_device_down_ratio() const {
    return mark_down_device_ratio;
  }

  void set_device_weight(int dev, float f);

  void set_min_rep(int r) {
    min_rep = r;
  }
  int get_min_rep() const {
    return min_rep;
  }

  void set_max_rep(int r) {
    max_rep = r;
  }
  int get_max_rep() const {
    return max_rep;
  }

  void set_num_rep(int r) {
    min_rep = max_rep = r;
  }
  
  void set_min_x(int x) {
    min_x = x;
  }

  void set_pool_id(int64_t x){
    pool_id = x;
  }

  int get_min_x() const {
    return min_x;
  }

  void set_max_x(int x) {
    max_x = x;
  }
  int get_max_x() const {
    return max_x;
  }

  void set_x(int x) {
    min_x = max_x = x;
  }

  void set_min_rule(int rule) {
    min_rule = rule;
  }
  int get_min_rule() const {
    return min_rule;
  }

  void set_max_rule(int rule) {
    max_rule = rule;
  }
  int get_max_rule() const {
    return max_rule;
  }

  void set_rule(int rule) {
    min_rule = max_rule = rule;
  }

  void set_ruleset(int rs) {
    ruleset = rs;
  }

  /**
   * check if any bucket/nodes is referencing an unknown name or type
   * @param max_id rejects any non-bucket items with id less than this number,
   *               pass 0 to disable this check
   * @return false if an dangling name/type is referenced or an item id is too
   *         large, true otherwise
   */
  bool check_name_maps(unsigned max_id = 0) const;
  /**
   * print out overlapped crush rules belonging to the same ruleset
   */
  void check_overlapped_rules() const;
  int test();
  int test_with_fork(int timeout);

  int compare(CrushWrapper& other);
};

#endif