summaryrefslogtreecommitdiffstats
path: root/src/rocksdb/db_stress_tool/batched_ops_stress.cc
blob: 3f34460762ae870cdf5b5796979c316d870d2257 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
//  This source code is licensed under both the GPLv2 (found in the
//  COPYING file in the root directory) and Apache 2.0 License
//  (found in the LICENSE.Apache file in the root directory).
//
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.

#ifdef GFLAGS
#include "db_stress_tool/db_stress_common.h"

namespace ROCKSDB_NAMESPACE {
class BatchedOpsStressTest : public StressTest {
 public:
  BatchedOpsStressTest() {}
  virtual ~BatchedOpsStressTest() {}

  bool IsStateTracked() const override { return false; }

  // Given a key K and value V, this puts ("0"+K, V+"0"), ("1"+K, V+"1"), ...,
  // ("9"+K, V+"9") in DB atomically i.e in a single batch.
  // Also refer BatchedOpsStressTest::TestGet
  Status TestPut(ThreadState* thread, WriteOptions& write_opts,
                 const ReadOptions& /* read_opts */,
                 const std::vector<int>& rand_column_families,
                 const std::vector<int64_t>& rand_keys,
                 char (&value)[100]) override {
    assert(!rand_column_families.empty());
    assert(!rand_keys.empty());

    const std::string key_body = Key(rand_keys[0]);

    const uint32_t value_base =
        thread->rand.Next() % thread->shared->UNKNOWN_SENTINEL;
    const size_t sz = GenerateValue(value_base, value, sizeof(value));
    const std::string value_body = Slice(value, sz).ToString();

    WriteBatch batch(0 /* reserved_bytes */, 0 /* max_bytes */,
                     FLAGS_batch_protection_bytes_per_key,
                     FLAGS_user_timestamp_size);

    ColumnFamilyHandle* const cfh = column_families_[rand_column_families[0]];
    assert(cfh);

    for (int i = 9; i >= 0; --i) {
      const std::string num = std::to_string(i);

      // Note: the digit in num is prepended to the key; however, it is appended
      // to the value because we want the "value base" to be encoded uniformly
      // at the beginning of the value for all types of stress tests (e.g.
      // batched, non-batched, CF consistency).
      const std::string k = num + key_body;
      const std::string v = value_body + num;

      if (FLAGS_use_merge) {
        batch.Merge(cfh, k, v);
      } else if (FLAGS_use_put_entity_one_in > 0 &&
                 (value_base % FLAGS_use_put_entity_one_in) == 0) {
        batch.PutEntity(cfh, k, GenerateWideColumns(value_base, v));
      } else {
        batch.Put(cfh, k, v);
      }
    }

    const Status s = db_->Write(write_opts, &batch);

    if (!s.ok()) {
      fprintf(stderr, "multiput error: %s\n", s.ToString().c_str());
      thread->stats.AddErrors(1);
    } else {
      // we did 10 writes each of size sz + 1
      thread->stats.AddBytesForWrites(10, (sz + 1) * 10);
    }

    return s;
  }

  // Given a key K, this deletes ("0"+K), ("1"+K), ..., ("9"+K)
  // in DB atomically i.e in a single batch. Also refer MultiGet.
  Status TestDelete(ThreadState* thread, WriteOptions& writeoptions,
                    const std::vector<int>& rand_column_families,
                    const std::vector<int64_t>& rand_keys) override {
    std::string keys[10] = {"9", "7", "5", "3", "1", "8", "6", "4", "2", "0"};

    WriteBatch batch(0 /* reserved_bytes */, 0 /* max_bytes */,
                     FLAGS_batch_protection_bytes_per_key,
                     FLAGS_user_timestamp_size);
    Status s;
    auto cfh = column_families_[rand_column_families[0]];
    std::string key_str = Key(rand_keys[0]);
    for (int i = 0; i < 10; i++) {
      keys[i] += key_str;
      batch.Delete(cfh, keys[i]);
    }

    s = db_->Write(writeoptions, &batch);
    if (!s.ok()) {
      fprintf(stderr, "multidelete error: %s\n", s.ToString().c_str());
      thread->stats.AddErrors(1);
    } else {
      thread->stats.AddDeletes(10);
    }

    return s;
  }

  Status TestDeleteRange(ThreadState* /* thread */,
                         WriteOptions& /* write_opts */,
                         const std::vector<int>& /* rand_column_families */,
                         const std::vector<int64_t>& /* rand_keys */) override {
    assert(false);
    return Status::NotSupported(
        "BatchedOpsStressTest does not support "
        "TestDeleteRange");
  }

  void TestIngestExternalFile(
      ThreadState* /* thread */,
      const std::vector<int>& /* rand_column_families */,
      const std::vector<int64_t>& /* rand_keys */) override {
    assert(false);
    fprintf(stderr,
            "BatchedOpsStressTest does not support "
            "TestIngestExternalFile\n");
    std::terminate();
  }

  // Given a key K, this gets values for "0"+K, "1"+K, ..., "9"+K
  // in the same snapshot, and verifies that all the values are of the form
  // V+"0", V+"1", ..., V+"9".
  // ASSUMES that BatchedOpsStressTest::TestPut was used to put (K, V) into
  // the DB.
  Status TestGet(ThreadState* thread, const ReadOptions& readoptions,
                 const std::vector<int>& rand_column_families,
                 const std::vector<int64_t>& rand_keys) override {
    std::string keys[10] = {"0", "1", "2", "3", "4", "5", "6", "7", "8", "9"};
    Slice key_slices[10];
    std::string values[10];
    ReadOptions readoptionscopy = readoptions;
    readoptionscopy.snapshot = db_->GetSnapshot();
    std::string key_str = Key(rand_keys[0]);
    Slice key = key_str;
    auto cfh = column_families_[rand_column_families[0]];
    std::string from_db;
    Status s;
    for (int i = 0; i < 10; i++) {
      keys[i] += key.ToString();
      key_slices[i] = keys[i];
      s = db_->Get(readoptionscopy, cfh, key_slices[i], &from_db);
      if (!s.ok() && !s.IsNotFound()) {
        fprintf(stderr, "get error: %s\n", s.ToString().c_str());
        values[i] = "";
        thread->stats.AddErrors(1);
        // we continue after error rather than exiting so that we can
        // find more errors if any
      } else if (s.IsNotFound()) {
        values[i] = "";
        thread->stats.AddGets(1, 0);
      } else {
        values[i] = from_db;

        assert(!keys[i].empty());
        assert(!values[i].empty());

        const char expected = keys[i].front();
        const char actual = values[i].back();

        if (expected != actual) {
          fprintf(stderr, "get error expected = %c actual = %c\n", expected,
                  actual);
        }

        values[i].pop_back();  // get rid of the differing character

        thread->stats.AddGets(1, 1);
      }
    }
    db_->ReleaseSnapshot(readoptionscopy.snapshot);

    // Now that we retrieved all values, check that they all match
    for (int i = 1; i < 10; i++) {
      if (values[i] != values[0]) {
        fprintf(stderr, "get error: inconsistent values for key %s: %s, %s\n",
                key.ToString(true).c_str(), StringToHex(values[0]).c_str(),
                StringToHex(values[i]).c_str());
        // we continue after error rather than exiting so that we can
        // find more errors if any
      }
    }

    return s;
  }

  std::vector<Status> TestMultiGet(
      ThreadState* thread, const ReadOptions& readoptions,
      const std::vector<int>& rand_column_families,
      const std::vector<int64_t>& rand_keys) override {
    size_t num_keys = rand_keys.size();
    std::vector<Status> ret_status(num_keys);
    std::array<std::string, 10> keys = {
        {"0", "1", "2", "3", "4", "5", "6", "7", "8", "9"}};
    size_t num_prefixes = keys.size();
    for (size_t rand_key = 0; rand_key < num_keys; ++rand_key) {
      std::vector<Slice> key_slices;
      std::vector<PinnableSlice> values(num_prefixes);
      std::vector<Status> statuses(num_prefixes);
      ReadOptions readoptionscopy = readoptions;
      readoptionscopy.snapshot = db_->GetSnapshot();
      readoptionscopy.rate_limiter_priority =
          FLAGS_rate_limit_user_ops ? Env::IO_USER : Env::IO_TOTAL;
      std::vector<std::string> key_str;
      key_str.reserve(num_prefixes);
      key_slices.reserve(num_prefixes);
      std::string from_db;
      ColumnFamilyHandle* cfh = column_families_[rand_column_families[0]];

      for (size_t key = 0; key < num_prefixes; ++key) {
        key_str.emplace_back(keys[key] + Key(rand_keys[rand_key]));
        key_slices.emplace_back(key_str.back());
      }
      db_->MultiGet(readoptionscopy, cfh, num_prefixes, key_slices.data(),
                    values.data(), statuses.data());
      for (size_t i = 0; i < num_prefixes; i++) {
        Status s = statuses[i];
        if (!s.ok() && !s.IsNotFound()) {
          fprintf(stderr, "multiget error: %s\n", s.ToString().c_str());
          thread->stats.AddErrors(1);
          ret_status[rand_key] = s;
          // we continue after error rather than exiting so that we can
          // find more errors if any
        } else if (s.IsNotFound()) {
          thread->stats.AddGets(1, 0);
          ret_status[rand_key] = s;
        } else {
          assert(!keys[i].empty());
          assert(!values[i].empty());

          const char expected = keys[i][0];
          const char actual = values[i][values[i].size() - 1];

          if (expected != actual) {
            fprintf(stderr, "multiget error expected = %c actual = %c\n",
                    expected, actual);
          }

          values[i].remove_suffix(1);  // get rid of the differing character

          thread->stats.AddGets(1, 1);
        }
      }
      db_->ReleaseSnapshot(readoptionscopy.snapshot);

      // Now that we retrieved all values, check that they all match
      for (size_t i = 1; i < num_prefixes; i++) {
        if (values[i] != values[0]) {
          fprintf(stderr,
                  "multiget error: inconsistent values for key %s: %s, %s\n",
                  StringToHex(key_str[i]).c_str(),
                  StringToHex(values[0].ToString()).c_str(),
                  StringToHex(values[i].ToString()).c_str());
          // we continue after error rather than exiting so that we can
          // find more errors if any
        }
      }
    }

    return ret_status;
  }

  // Given a key, this does prefix scans for "0"+P, "1"+P, ..., "9"+P
  // in the same snapshot where P is the first FLAGS_prefix_size - 1 bytes
  // of the key. Each of these 10 scans returns a series of values;
  // each series should be the same length, and it is verified for each
  // index i that all the i'th values are of the form V+"0", V+"1", ..., V+"9".
  // ASSUMES that MultiPut was used to put (K, V)
  Status TestPrefixScan(ThreadState* thread, const ReadOptions& readoptions,
                        const std::vector<int>& rand_column_families,
                        const std::vector<int64_t>& rand_keys) override {
    assert(!rand_column_families.empty());
    assert(!rand_keys.empty());

    const std::string key = Key(rand_keys[0]);

    assert(FLAGS_prefix_size > 0);
    const size_t prefix_to_use = static_cast<size_t>(FLAGS_prefix_size);

    constexpr size_t num_prefixes = 10;

    std::array<std::string, num_prefixes> prefixes;
    std::array<Slice, num_prefixes> prefix_slices;
    std::array<ReadOptions, num_prefixes> ro_copies;
    std::array<std::string, num_prefixes> upper_bounds;
    std::array<Slice, num_prefixes> ub_slices;
    std::array<std::unique_ptr<Iterator>, num_prefixes> iters;

    const Snapshot* const snapshot = db_->GetSnapshot();

    ColumnFamilyHandle* const cfh = column_families_[rand_column_families[0]];
    assert(cfh);

    for (size_t i = 0; i < num_prefixes; ++i) {
      prefixes[i] = std::to_string(i) + key;
      prefix_slices[i] = Slice(prefixes[i].data(), prefix_to_use);

      ro_copies[i] = readoptions;
      ro_copies[i].snapshot = snapshot;
      if (thread->rand.OneIn(2) &&
          GetNextPrefix(prefix_slices[i], &(upper_bounds[i]))) {
        // For half of the time, set the upper bound to the next prefix
        ub_slices[i] = upper_bounds[i];
        ro_copies[i].iterate_upper_bound = &(ub_slices[i]);
      }

      iters[i].reset(db_->NewIterator(ro_copies[i], cfh));
      iters[i]->Seek(prefix_slices[i]);
    }

    uint64_t count = 0;

    while (iters[0]->Valid() && iters[0]->key().starts_with(prefix_slices[0])) {
      ++count;

      std::array<std::string, num_prefixes> values;

      // get list of all values for this iteration
      for (size_t i = 0; i < num_prefixes; ++i) {
        // no iterator should finish before the first one
        assert(iters[i]->Valid() &&
               iters[i]->key().starts_with(prefix_slices[i]));
        values[i] = iters[i]->value().ToString();

        // make sure the last character of the value is the expected digit
        assert(!prefixes[i].empty());
        assert(!values[i].empty());

        const char expected = prefixes[i].front();
        const char actual = values[i].back();

        if (expected != actual) {
          fprintf(stderr, "prefix scan error expected = %c actual = %c\n",
                  expected, actual);
        }

        values[i].pop_back();  // get rid of the differing character

        // make sure all values are equivalent
        if (values[i] != values[0]) {
          fprintf(stderr,
                  "prefix scan error : %" ROCKSDB_PRIszt
                  ", inconsistent values for prefix %s: %s, %s\n",
                  i, prefix_slices[i].ToString(/* hex */ true).c_str(),
                  StringToHex(values[0]).c_str(),
                  StringToHex(values[i]).c_str());
          // we continue after error rather than exiting so that we can
          // find more errors if any
        }

        // make sure value() and columns() are consistent
        const WideColumns expected_columns = GenerateExpectedWideColumns(
            GetValueBase(iters[i]->value()), iters[i]->value());
        if (iters[i]->columns() != expected_columns) {
          fprintf(stderr,
                  "prefix scan error : %" ROCKSDB_PRIszt
                  ", value and columns inconsistent for prefix %s: %s\n",
                  i, prefix_slices[i].ToString(/* hex */ true).c_str(),
                  DebugString(iters[i]->value(), iters[i]->columns(),
                              expected_columns)
                      .c_str());
        }

        iters[i]->Next();
      }
    }

    // cleanup iterators and snapshot
    for (size_t i = 0; i < num_prefixes; ++i) {
      // if the first iterator finished, they should have all finished
      assert(!iters[i]->Valid() ||
             !iters[i]->key().starts_with(prefix_slices[i]));
      assert(iters[i]->status().ok());
    }

    db_->ReleaseSnapshot(snapshot);

    thread->stats.AddPrefixes(1, count);

    return Status::OK();
  }

  void VerifyDb(ThreadState* /* thread */) const override {}

  void ContinuouslyVerifyDb(ThreadState* /* thread */) const override {}
};

StressTest* CreateBatchedOpsStressTest() { return new BatchedOpsStressTest(); }

}  // namespace ROCKSDB_NAMESPACE
#endif  // GFLAGS