summaryrefslogtreecommitdiffstats
path: root/src/rocksdb/include/rocksdb/rate_limiter.h
blob: 9cad6edf4aaee81dd562826f2b26e701ef611826 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
//  This source code is licensed under both the GPLv2 (found in the
//  COPYING file in the root directory) and Apache 2.0 License
//  (found in the LICENSE.Apache file in the root directory).
//
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.

#pragma once

#include "rocksdb/env.h"
#include "rocksdb/statistics.h"
#include "rocksdb/status.h"

namespace ROCKSDB_NAMESPACE {

// Exceptions MUST NOT propagate out of overridden functions into RocksDB,
// because RocksDB is not exception-safe. This could cause undefined behavior
// including data loss, unreported corruption, deadlocks, and more.
class RateLimiter {
 public:
  enum class OpType {
    kRead,
    kWrite,
  };

  enum class Mode {
    kReadsOnly,
    kWritesOnly,
    kAllIo,
  };

  // For API compatibility, default to rate-limiting writes only.
  explicit RateLimiter(Mode mode = Mode::kWritesOnly) : mode_(mode) {}

  virtual ~RateLimiter() {}

  // This API allows user to dynamically change rate limiter's bytes per second.
  // REQUIRED: bytes_per_second > 0
  virtual void SetBytesPerSecond(int64_t bytes_per_second) = 0;

  // Deprecated. New RateLimiter derived classes should override
  // Request(const int64_t, const Env::IOPriority, Statistics*) or
  // Request(const int64_t, const Env::IOPriority, Statistics*, OpType)
  // instead.
  //
  // Request for token for bytes. If this request can not be satisfied, the call
  // is blocked. Caller is responsible to make sure
  // bytes <= GetSingleBurstBytes()
  // and bytes >= 0.
  virtual void Request(const int64_t /*bytes*/, const Env::IOPriority /*pri*/) {
    assert(false);
  }

  // Request for token for bytes and potentially update statistics. If this
  // request can not be satisfied, the call is blocked. Caller is responsible to
  // make sure bytes <= GetSingleBurstBytes()
  // and bytes >= 0.
  virtual void Request(const int64_t bytes, const Env::IOPriority pri,
                       Statistics* /* stats */) {
    // For API compatibility, default implementation calls the older API in
    // which statistics are unsupported.
    Request(bytes, pri);
  }

  // Requests token to read or write bytes and potentially updates statistics.
  //
  // If this request can not be satisfied, the call is blocked. Caller is
  // responsible to make sure bytes <= GetSingleBurstBytes()
  // and bytes >= 0.
  virtual void Request(const int64_t bytes, const Env::IOPriority pri,
                       Statistics* stats, OpType op_type) {
    if (IsRateLimited(op_type)) {
      Request(bytes, pri, stats);
    }
  }

  // Requests token to read or write bytes and potentially updates statistics.
  // Takes into account GetSingleBurstBytes() and alignment (e.g., in case of
  // direct I/O) to allocate an appropriate number of bytes, which may be less
  // than the number of bytes requested.
  virtual size_t RequestToken(size_t bytes, size_t alignment,
                              Env::IOPriority io_priority, Statistics* stats,
                              RateLimiter::OpType op_type);

  // Max bytes can be granted in a single burst
  virtual int64_t GetSingleBurstBytes() const = 0;

  // Total bytes that go through rate limiter
  virtual int64_t GetTotalBytesThrough(
      const Env::IOPriority pri = Env::IO_TOTAL) const = 0;

  // Total # of requests that go through rate limiter
  virtual int64_t GetTotalRequests(
      const Env::IOPriority pri = Env::IO_TOTAL) const = 0;

  // Total # of requests that are pending for bytes in rate limiter
  // For convenience, this function is supported by the RateLimiter returned
  // by NewGenericRateLimiter but is not required by RocksDB.
  //
  // REQUIRED: total_pending_request != nullptr
  virtual Status GetTotalPendingRequests(
      int64_t* total_pending_requests,
      const Env::IOPriority pri = Env::IO_TOTAL) const {
    assert(total_pending_requests != nullptr);
    (void)total_pending_requests;
    (void)pri;
    return Status::NotSupported();
  }

  virtual int64_t GetBytesPerSecond() const = 0;

  virtual bool IsRateLimited(OpType op_type) {
    if ((mode_ == RateLimiter::Mode::kWritesOnly &&
         op_type == RateLimiter::OpType::kRead) ||
        (mode_ == RateLimiter::Mode::kReadsOnly &&
         op_type == RateLimiter::OpType::kWrite)) {
      return false;
    }
    return true;
  }

 protected:
  Mode GetMode() { return mode_; }

 private:
  const Mode mode_;
};

// Create a RateLimiter object, which can be shared among RocksDB instances to
// control write rate of flush and compaction.
// @rate_bytes_per_sec: this is the only parameter you want to set most of the
// time. It controls the total write rate of compaction and flush in bytes per
// second. Currently, RocksDB does not enforce rate limit for anything other
// than flush and compaction, e.g. write to WAL.
// @refill_period_us: this controls how often tokens are refilled. For example,
// when rate_bytes_per_sec is set to 10MB/s and refill_period_us is set to
// 100ms, then 1MB is refilled every 100ms internally. Larger value can lead to
// burstier writes while smaller value introduces more CPU overhead.
// The default should work for most cases.
// @fairness: RateLimiter accepts high-pri requests and low-pri requests.
// A low-pri request is usually blocked in favor of hi-pri request. Currently,
// RocksDB assigns low-pri to request from compaction and high-pri to request
// from flush. Low-pri requests can get blocked if flush requests come in
// continuously. This fairness parameter grants low-pri requests permission by
// 1/fairness chance even though high-pri requests exist to avoid starvation.
// You should be good by leaving it at default 10.
// @mode: Mode indicates which types of operations count against the limit.
// @auto_tuned: Enables dynamic adjustment of rate limit within the range
//              `[rate_bytes_per_sec / 20, rate_bytes_per_sec]`, according to
//              the recent demand for background I/O.
extern RateLimiter* NewGenericRateLimiter(
    int64_t rate_bytes_per_sec, int64_t refill_period_us = 100 * 1000,
    int32_t fairness = 10,
    RateLimiter::Mode mode = RateLimiter::Mode::kWritesOnly,
    bool auto_tuned = false);

}  // namespace ROCKSDB_NAMESPACE