diff options
Diffstat (limited to 'src/rocksdb/include/rocksdb/compaction_filter.h')
-rw-r--r-- | src/rocksdb/include/rocksdb/compaction_filter.h | 212 |
1 files changed, 212 insertions, 0 deletions
diff --git a/src/rocksdb/include/rocksdb/compaction_filter.h b/src/rocksdb/include/rocksdb/compaction_filter.h new file mode 100644 index 000000000..976507831 --- /dev/null +++ b/src/rocksdb/include/rocksdb/compaction_filter.h @@ -0,0 +1,212 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). +// Copyright (c) 2013 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. + +#pragma once + +#include <cassert> +#include <memory> +#include <string> +#include <vector> + +#include "rocksdb/rocksdb_namespace.h" + +namespace ROCKSDB_NAMESPACE { + +class Slice; +class SliceTransform; + +// Context information of a compaction run +struct CompactionFilterContext { + // Does this compaction run include all data files + bool is_full_compaction; + // Is this compaction requested by the client (true), + // or is it occurring as an automatic compaction process + bool is_manual_compaction; +}; + +// CompactionFilter allows an application to modify/delete a key-value at +// the time of compaction. + +class CompactionFilter { + public: + enum ValueType { + kValue, + kMergeOperand, + kBlobIndex, // used internally by BlobDB. + }; + + enum class Decision { + kKeep, + kRemove, + kChangeValue, + kRemoveAndSkipUntil, + }; + + enum class BlobDecision { kKeep, kChangeValue, kCorruption, kIOError }; + + // Context information of a compaction run + struct Context { + // Does this compaction run include all data files + bool is_full_compaction; + // Is this compaction requested by the client (true), + // or is it occurring as an automatic compaction process + bool is_manual_compaction; + // Which column family this compaction is for. + uint32_t column_family_id; + }; + + virtual ~CompactionFilter() {} + + // The compaction process invokes this + // method for kv that is being compacted. A return value + // of false indicates that the kv should be preserved in the + // output of this compaction run and a return value of true + // indicates that this key-value should be removed from the + // output of the compaction. The application can inspect + // the existing value of the key and make decision based on it. + // + // Key-Values that are results of merge operation during compaction are not + // passed into this function. Currently, when you have a mix of Put()s and + // Merge()s on a same key, we only guarantee to process the merge operands + // through the compaction filters. Put()s might be processed, or might not. + // + // When the value is to be preserved, the application has the option + // to modify the existing_value and pass it back through new_value. + // value_changed needs to be set to true in this case. + // + // Note that RocksDB snapshots (i.e. call GetSnapshot() API on a + // DB* object) will not guarantee to preserve the state of the DB with + // CompactionFilter. Data seen from a snapshot might disppear after a + // compaction finishes. If you use snapshots, think twice about whether you + // want to use compaction filter and whether you are using it in a safe way. + // + // If multithreaded compaction is being used *and* a single CompactionFilter + // instance was supplied via Options::compaction_filter, this method may be + // called from different threads concurrently. The application must ensure + // that the call is thread-safe. + // + // If the CompactionFilter was created by a factory, then it will only ever + // be used by a single thread that is doing the compaction run, and this + // call does not need to be thread-safe. However, multiple filters may be + // in existence and operating concurrently. + virtual bool Filter(int /*level*/, const Slice& /*key*/, + const Slice& /*existing_value*/, + std::string* /*new_value*/, + bool* /*value_changed*/) const { + return false; + } + + // The compaction process invokes this method on every merge operand. If this + // method returns true, the merge operand will be ignored and not written out + // in the compaction output + // + // Note: If you are using a TransactionDB, it is not recommended to implement + // FilterMergeOperand(). If a Merge operation is filtered out, TransactionDB + // may not realize there is a write conflict and may allow a Transaction to + // Commit that should have failed. Instead, it is better to implement any + // Merge filtering inside the MergeOperator. + virtual bool FilterMergeOperand(int /*level*/, const Slice& /*key*/, + const Slice& /*operand*/) const { + return false; + } + + // An extended API. Called for both values and merge operands. + // Allows changing value and skipping ranges of keys. + // The default implementation uses Filter() and FilterMergeOperand(). + // If you're overriding this method, no need to override the other two. + // `value_type` indicates whether this key-value corresponds to a normal + // value (e.g. written with Put()) or a merge operand (written with Merge()). + // + // Possible return values: + // * kKeep - keep the key-value pair. + // * kRemove - remove the key-value pair or merge operand. + // * kChangeValue - keep the key and change the value/operand to *new_value. + // * kRemoveAndSkipUntil - remove this key-value pair, and also remove + // all key-value pairs with key in [key, *skip_until). This range + // of keys will be skipped without reading, potentially saving some + // IO operations compared to removing the keys one by one. + // + // *skip_until <= key is treated the same as Decision::kKeep + // (since the range [key, *skip_until) is empty). + // + // Caveats: + // - The keys are skipped even if there are snapshots containing them, + // i.e. values removed by kRemoveAndSkipUntil can disappear from a + // snapshot - beware if you're using TransactionDB or + // DB::GetSnapshot(). + // - If value for a key was overwritten or merged into (multiple Put()s + // or Merge()s), and compaction filter skips this key with + // kRemoveAndSkipUntil, it's possible that it will remove only + // the new value, exposing the old value that was supposed to be + // overwritten. + // - Doesn't work with PlainTableFactory in prefix mode. + // - If you use kRemoveAndSkipUntil, consider also reducing + // compaction_readahead_size option. + // + // Note: If you are using a TransactionDB, it is not recommended to filter + // out or modify merge operands (ValueType::kMergeOperand). + // If a merge operation is filtered out, TransactionDB may not realize there + // is a write conflict and may allow a Transaction to Commit that should have + // failed. Instead, it is better to implement any Merge filtering inside the + // MergeOperator. + virtual Decision FilterV2(int level, const Slice& key, ValueType value_type, + const Slice& existing_value, std::string* new_value, + std::string* /*skip_until*/) const { + switch (value_type) { + case ValueType::kValue: { + bool value_changed = false; + bool rv = Filter(level, key, existing_value, new_value, &value_changed); + if (rv) { + return Decision::kRemove; + } + return value_changed ? Decision::kChangeValue : Decision::kKeep; + } + case ValueType::kMergeOperand: { + bool rv = FilterMergeOperand(level, key, existing_value); + return rv ? Decision::kRemove : Decision::kKeep; + } + case ValueType::kBlobIndex: + return Decision::kKeep; + } + assert(false); + return Decision::kKeep; + } + + // Internal (BlobDB) use only. Do not override in application code. + virtual BlobDecision PrepareBlobOutput(const Slice& /* key */, + const Slice& /* existing_value */, + std::string* /* new_value */) const { + return BlobDecision::kKeep; + } + + // This function is deprecated. Snapshots will always be ignored for + // compaction filters, because we realized that not ignoring snapshots doesn't + // provide the gurantee we initially thought it would provide. Repeatable + // reads will not be guaranteed anyway. If you override the function and + // returns false, we will fail the compaction. + virtual bool IgnoreSnapshots() const { return true; } + + // Returns a name that identifies this compaction filter. + // The name will be printed to LOG file on start up for diagnosis. + virtual const char* Name() const = 0; +}; + +// Each compaction will create a new CompactionFilter allowing the +// application to know about different compactions +class CompactionFilterFactory { + public: + virtual ~CompactionFilterFactory() {} + + virtual std::unique_ptr<CompactionFilter> CreateCompactionFilter( + const CompactionFilter::Context& context) = 0; + + // Returns a name that identifies this compaction filter factory. + virtual const char* Name() const = 0; +}; + +} // namespace ROCKSDB_NAMESPACE |