diff options
Diffstat (limited to 'src/rocksdb/include/rocksdb/rate_limiter.h')
-rw-r--r-- | src/rocksdb/include/rocksdb/rate_limiter.h | 159 |
1 files changed, 159 insertions, 0 deletions
diff --git a/src/rocksdb/include/rocksdb/rate_limiter.h b/src/rocksdb/include/rocksdb/rate_limiter.h new file mode 100644 index 000000000..9cad6edf4 --- /dev/null +++ b/src/rocksdb/include/rocksdb/rate_limiter.h @@ -0,0 +1,159 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). +// +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. + +#pragma once + +#include "rocksdb/env.h" +#include "rocksdb/statistics.h" +#include "rocksdb/status.h" + +namespace ROCKSDB_NAMESPACE { + +// Exceptions MUST NOT propagate out of overridden functions into RocksDB, +// because RocksDB is not exception-safe. This could cause undefined behavior +// including data loss, unreported corruption, deadlocks, and more. +class RateLimiter { + public: + enum class OpType { + kRead, + kWrite, + }; + + enum class Mode { + kReadsOnly, + kWritesOnly, + kAllIo, + }; + + // For API compatibility, default to rate-limiting writes only. + explicit RateLimiter(Mode mode = Mode::kWritesOnly) : mode_(mode) {} + + virtual ~RateLimiter() {} + + // This API allows user to dynamically change rate limiter's bytes per second. + // REQUIRED: bytes_per_second > 0 + virtual void SetBytesPerSecond(int64_t bytes_per_second) = 0; + + // Deprecated. New RateLimiter derived classes should override + // Request(const int64_t, const Env::IOPriority, Statistics*) or + // Request(const int64_t, const Env::IOPriority, Statistics*, OpType) + // instead. + // + // Request for token for bytes. If this request can not be satisfied, the call + // is blocked. Caller is responsible to make sure + // bytes <= GetSingleBurstBytes() + // and bytes >= 0. + virtual void Request(const int64_t /*bytes*/, const Env::IOPriority /*pri*/) { + assert(false); + } + + // Request for token for bytes and potentially update statistics. If this + // request can not be satisfied, the call is blocked. Caller is responsible to + // make sure bytes <= GetSingleBurstBytes() + // and bytes >= 0. + virtual void Request(const int64_t bytes, const Env::IOPriority pri, + Statistics* /* stats */) { + // For API compatibility, default implementation calls the older API in + // which statistics are unsupported. + Request(bytes, pri); + } + + // Requests token to read or write bytes and potentially updates statistics. + // + // If this request can not be satisfied, the call is blocked. Caller is + // responsible to make sure bytes <= GetSingleBurstBytes() + // and bytes >= 0. + virtual void Request(const int64_t bytes, const Env::IOPriority pri, + Statistics* stats, OpType op_type) { + if (IsRateLimited(op_type)) { + Request(bytes, pri, stats); + } + } + + // Requests token to read or write bytes and potentially updates statistics. + // Takes into account GetSingleBurstBytes() and alignment (e.g., in case of + // direct I/O) to allocate an appropriate number of bytes, which may be less + // than the number of bytes requested. + virtual size_t RequestToken(size_t bytes, size_t alignment, + Env::IOPriority io_priority, Statistics* stats, + RateLimiter::OpType op_type); + + // Max bytes can be granted in a single burst + virtual int64_t GetSingleBurstBytes() const = 0; + + // Total bytes that go through rate limiter + virtual int64_t GetTotalBytesThrough( + const Env::IOPriority pri = Env::IO_TOTAL) const = 0; + + // Total # of requests that go through rate limiter + virtual int64_t GetTotalRequests( + const Env::IOPriority pri = Env::IO_TOTAL) const = 0; + + // Total # of requests that are pending for bytes in rate limiter + // For convenience, this function is supported by the RateLimiter returned + // by NewGenericRateLimiter but is not required by RocksDB. + // + // REQUIRED: total_pending_request != nullptr + virtual Status GetTotalPendingRequests( + int64_t* total_pending_requests, + const Env::IOPriority pri = Env::IO_TOTAL) const { + assert(total_pending_requests != nullptr); + (void)total_pending_requests; + (void)pri; + return Status::NotSupported(); + } + + virtual int64_t GetBytesPerSecond() const = 0; + + virtual bool IsRateLimited(OpType op_type) { + if ((mode_ == RateLimiter::Mode::kWritesOnly && + op_type == RateLimiter::OpType::kRead) || + (mode_ == RateLimiter::Mode::kReadsOnly && + op_type == RateLimiter::OpType::kWrite)) { + return false; + } + return true; + } + + protected: + Mode GetMode() { return mode_; } + + private: + const Mode mode_; +}; + +// Create a RateLimiter object, which can be shared among RocksDB instances to +// control write rate of flush and compaction. +// @rate_bytes_per_sec: this is the only parameter you want to set most of the +// time. It controls the total write rate of compaction and flush in bytes per +// second. Currently, RocksDB does not enforce rate limit for anything other +// than flush and compaction, e.g. write to WAL. +// @refill_period_us: this controls how often tokens are refilled. For example, +// when rate_bytes_per_sec is set to 10MB/s and refill_period_us is set to +// 100ms, then 1MB is refilled every 100ms internally. Larger value can lead to +// burstier writes while smaller value introduces more CPU overhead. +// The default should work for most cases. +// @fairness: RateLimiter accepts high-pri requests and low-pri requests. +// A low-pri request is usually blocked in favor of hi-pri request. Currently, +// RocksDB assigns low-pri to request from compaction and high-pri to request +// from flush. Low-pri requests can get blocked if flush requests come in +// continuously. This fairness parameter grants low-pri requests permission by +// 1/fairness chance even though high-pri requests exist to avoid starvation. +// You should be good by leaving it at default 10. +// @mode: Mode indicates which types of operations count against the limit. +// @auto_tuned: Enables dynamic adjustment of rate limit within the range +// `[rate_bytes_per_sec / 20, rate_bytes_per_sec]`, according to +// the recent demand for background I/O. +extern RateLimiter* NewGenericRateLimiter( + int64_t rate_bytes_per_sec, int64_t refill_period_us = 100 * 1000, + int32_t fairness = 10, + RateLimiter::Mode mode = RateLimiter::Mode::kWritesOnly, + bool auto_tuned = false); + +} // namespace ROCKSDB_NAMESPACE |