summaryrefslogtreecommitdiffstats
path: root/src/common/FastCDC.h
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/common/FastCDC.h54
1 files changed, 54 insertions, 0 deletions
diff --git a/src/common/FastCDC.h b/src/common/FastCDC.h
new file mode 100644
index 000000000..b9156f551
--- /dev/null
+++ b/src/common/FastCDC.h
@@ -0,0 +1,54 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#pragma once
+
+#include "CDC.h"
+
+// Based on this paper:
+// https://www.usenix.org/system/files/conference/atc16/atc16-paper-xia.pdf
+//
+// Changes:
+// - window size fixed at 64 bytes (to match our word size)
+// - use XOR instead of +
+// - match mask instead of 0
+// - use target mask when close to target size (instead of
+// small/large mask). The idea here is to try to use a consistent (target)
+// mask for most cut points if we can, and only resort to small/large mask
+// when we are (very) small or (very) large.
+
+// Note about the target_bits: The goal is an average chunk size of 1
+// << target_bits. However, in reality the average is ~1.25x that
+// because of the hard mininum chunk size.
+
+class FastCDC : public CDC {
+private:
+ int target_bits; ///< target chunk size bits (1 << target_bits)
+ int min_bits; ///< hard minimum chunk size bits (1 << min_bits)
+ int max_bits; ///< hard maximum chunk size bits (1 << max_bits)
+
+ uint64_t target_mask; ///< maskA in the paper (target_bits set)
+ uint64_t small_mask; ///< maskS in the paper (more bits set)
+ uint64_t large_mask; ///< maskL in the paper (fewer bits set)
+
+ /// lookup table with pseudorandom values for each byte
+ uint64_t table[256];
+
+ /// window size in bytes
+ const size_t window = sizeof(uint64_t)*8; // bits in uint64_t
+
+ void _setup(int target, int window_bits);
+
+public:
+ FastCDC(int target = 18, int window_bits = 0) {
+ _setup(target, window_bits);
+ };
+
+ void set_target_bits(int target, int window_bits) override {
+ _setup(target, window_bits);
+ }
+
+ void calc_chunks(
+ const bufferlist& bl,
+ std::vector<std::pair<uint64_t, uint64_t>> *chunks) const override;
+};