1 files changed, 212 insertions, 0 deletions
diff --git a/src/rocksdb/include/rocksdb/compaction_filter.h b/src/rocksdb/include/rocksdb/compaction_filter.h
new file mode 100644
index 000000000..976507831
--- /dev/null
+++ b/src/rocksdb/include/rocksdb/compaction_filter.h
@@ -0,0 +1,212 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+// Copyright (c) 2013 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#pragma once
+
+#include <cassert>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "rocksdb/rocksdb_namespace.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class Slice;
+class SliceTransform;
+
+// Context information of a compaction run
+struct CompactionFilterContext {
+  // Does this compaction run include all data files
+  bool is_full_compaction;
+  // Is this compaction requested by the client (true),
+  // or is it occurring as an automatic compaction process
+  bool is_manual_compaction;
+};
+
+// CompactionFilter allows an application to modify/delete a key-value at
+// the time of compaction.
+
+class CompactionFilter {
+ public:
+  enum ValueType {
+    kValue,
+    kMergeOperand,
+    kBlobIndex,  // used internally by BlobDB.
+  };
+
+  enum class Decision {
+    kKeep,
+    kRemove,
+    kChangeValue,
+    kRemoveAndSkipUntil,
+  };
+
+  enum class BlobDecision { kKeep, kChangeValue, kCorruption, kIOError };
+
+  // Context information of a compaction run
+  struct Context {
+    // Does this compaction run include all data files
+    bool is_full_compaction;
+    // Is this compaction requested by the client (true),
+    // or is it occurring as an automatic compaction process
+    bool is_manual_compaction;
+    // Which column family this compaction is for.
+    uint32_t column_family_id;
+  };
+
+  virtual ~CompactionFilter() {}
+
+  // The compaction process invokes this
+  // method for kv that is being compacted. A return value
+  // of false indicates that the kv should be preserved in the
+  // output of this compaction run and a return value of true
+  // indicates that this key-value should be removed from the
+  // output of the compaction.  The application can inspect
+  // the existing value of the key and make decision based on it.
+  //
+  // Key-Values that are results of merge operation during compaction are not
+  // passed into this function. Currently, when you have a mix of Put()s and
+  // Merge()s on a same key, we only guarantee to process the merge operands
+  // through the compaction filters. Put()s might be processed, or might not.
+  //
+  // When the value is to be preserved, the application has the option
+  // to modify the existing_value and pass it back through new_value.
+  // value_changed needs to be set to true in this case.
+  //
+  // Note that RocksDB snapshots (i.e. call GetSnapshot() API on a
+  // DB* object) will not guarantee to preserve the state of the DB with
+  // CompactionFilter. Data seen from a snapshot might disppear after a
+  // compaction finishes. If you use snapshots, think twice about whether you
+  // want to use compaction filter and whether you are using it in a safe way.
+  //
+  // If multithreaded compaction is being used *and* a single CompactionFilter
+  // instance was supplied via Options::compaction_filter, this method may be
+  // called from different threads concurrently.  The application must ensure
+  // that the call is thread-safe.
+  //
+  // If the CompactionFilter was created by a factory, then it will only ever
+  // be used by a single thread that is doing the compaction run, and this
+  // call does not need to be thread-safe.  However, multiple filters may be
+  // in existence and operating concurrently.
+  virtual bool Filter(int /*level*/, const Slice& /*key*/,
+                      const Slice& /*existing_value*/,
+                      std::string* /*new_value*/,
+                      bool* /*value_changed*/) const {
+    return false;
+  }
+
+  // The compaction process invokes this method on every merge operand. If this
+  // method returns true, the merge operand will be ignored and not written out
+  // in the compaction output
+  //
+  // Note: If you are using a TransactionDB, it is not recommended to implement
+  // FilterMergeOperand().  If a Merge operation is filtered out, TransactionDB
+  // may not realize there is a write conflict and may allow a Transaction to
+  // Commit that should have failed.  Instead, it is better to implement any
+  // Merge filtering inside the MergeOperator.
+  virtual bool FilterMergeOperand(int /*level*/, const Slice& /*key*/,
+                                  const Slice& /*operand*/) const {
+    return false;
+  }
+
+  // An extended API. Called for both values and merge operands.
+  // Allows changing value and skipping ranges of keys.
+  // The default implementation uses Filter() and FilterMergeOperand().
+  // If you're overriding this method, no need to override the other two.
+  // `value_type` indicates whether this key-value corresponds to a normal
+  // value (e.g. written with Put())  or a merge operand (written with Merge()).
+  //
+  // Possible return values:
+  //  * kKeep - keep the key-value pair.
+  //  * kRemove - remove the key-value pair or merge operand.
+  //  * kChangeValue - keep the key and change the value/operand to *new_value.
+  //  * kRemoveAndSkipUntil - remove this key-value pair, and also remove
+  //      all key-value pairs with key in [key, *skip_until). This range
+  //      of keys will be skipped without reading, potentially saving some
+  //      IO operations compared to removing the keys one by one.
+  //
+  //      *skip_until <= key is treated the same as Decision::kKeep
+  //      (since the range [key, *skip_until) is empty).
+  //
+  //      Caveats:
+  //       - The keys are skipped even if there are snapshots containing them,
+  //         i.e. values removed by kRemoveAndSkipUntil can disappear from a
+  //         snapshot - beware if you're using TransactionDB or
+  //         DB::GetSnapshot().
+  //       - If value for a key was overwritten or merged into (multiple Put()s
+  //         or Merge()s), and compaction filter skips this key with
+  //         kRemoveAndSkipUntil, it's possible that it will remove only
+  //         the new value, exposing the old value that was supposed to be
+  //         overwritten.
+  //       - Doesn't work with PlainTableFactory in prefix mode.
+  //       - If you use kRemoveAndSkipUntil, consider also reducing
+  //         compaction_readahead_size option.
+  //
+  // Note: If you are using a TransactionDB, it is not recommended to filter
+  // out or modify merge operands (ValueType::kMergeOperand).
+  // If a merge operation is filtered out, TransactionDB may not realize there
+  // is a write conflict and may allow a Transaction to Commit that should have
+  // failed. Instead, it is better to implement any Merge filtering inside the
+  // MergeOperator.
+  virtual Decision FilterV2(int level, const Slice& key, ValueType value_type,
+                            const Slice& existing_value, std::string* new_value,
+                            std::string* /*skip_until*/) const {
+    switch (value_type) {
+      case ValueType::kValue: {
+        bool value_changed = false;
+        bool rv = Filter(level, key, existing_value, new_value, &value_changed);
+        if (rv) {
+          return Decision::kRemove;
+        }
+        return value_changed ? Decision::kChangeValue : Decision::kKeep;
+      }
+      case ValueType::kMergeOperand: {
+        bool rv = FilterMergeOperand(level, key, existing_value);
+        return rv ? Decision::kRemove : Decision::kKeep;
+      }
+      case ValueType::kBlobIndex:
+        return Decision::kKeep;
+    }
+    assert(false);
+    return Decision::kKeep;
+  }
+
+  // Internal (BlobDB) use only. Do not override in application code.
+  virtual BlobDecision PrepareBlobOutput(const Slice& /* key */,
+                                         const Slice& /* existing_value */,
+                                         std::string* /* new_value */) const {
+    return BlobDecision::kKeep;
+  }
+
+  // This function is deprecated. Snapshots will always be ignored for
+  // compaction filters, because we realized that not ignoring snapshots doesn't
+  // provide the gurantee we initially thought it would provide. Repeatable
+  // reads will not be guaranteed anyway. If you override the function and
+  // returns false, we will fail the compaction.
+  virtual bool IgnoreSnapshots() const { return true; }
+
+  // Returns a name that identifies this compaction filter.
+  // The name will be printed to LOG file on start up for diagnosis.
+  virtual const char* Name() const = 0;
+};
+
+// Each compaction will create a new CompactionFilter allowing the
+// application to know about different compactions
+class CompactionFilterFactory {
+ public:
+  virtual ~CompactionFilterFactory() {}
+
+  virtual std::unique_ptr<CompactionFilter> CreateCompactionFilter(
+      const CompactionFilter::Context& context) = 0;
+
+  // Returns a name that identifies this compaction filter factory.
+  virtual const char* Name() const = 0;
+};
+
+}  // namespace ROCKSDB_NAMESPACE