summaryrefslogtreecommitdiffstats
path: root/storage/tokudb/PerconaFT/ft/ft-hot-flusher.cc
diff options
context:
space:
mode:
Diffstat (limited to 'storage/tokudb/PerconaFT/ft/ft-hot-flusher.cc')
-rw-r--r--storage/tokudb/PerconaFT/ft/ft-hot-flusher.cc362
1 files changed, 362 insertions, 0 deletions
diff --git a/storage/tokudb/PerconaFT/ft/ft-hot-flusher.cc b/storage/tokudb/PerconaFT/ft/ft-hot-flusher.cc
new file mode 100644
index 00000000..ffab8647
--- /dev/null
+++ b/storage/tokudb/PerconaFT/ft/ft-hot-flusher.cc
@@ -0,0 +1,362 @@
+/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
+// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
+#ident "$Id$"
+/*======
+This file is part of PerconaFT.
+
+
+Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
+
+ PerconaFT is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License, version 2,
+ as published by the Free Software Foundation.
+
+ PerconaFT is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
+
+----------------------------------------
+
+ PerconaFT is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License, version 3,
+ as published by the Free Software Foundation.
+
+ PerconaFT is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
+======= */
+
+#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
+
+#include <my_global.h>
+#include "ft/ft.h"
+#include "ft/ft-cachetable-wrappers.h"
+#include "ft/ft-flusher.h"
+#include "ft/ft-flusher-internal.h"
+#include "ft/ft-internal.h"
+#include "ft/node.h"
+#include "portability/toku_atomic.h"
+#include "util/context.h"
+#include "util/status.h"
+
+// Member Descirption:
+// 1. highest_pivot_key - this is the key that corresponds to the
+// most recently flushed leaf entry.
+// 2. max_current_key - this is the pivot/key that we inherit as
+// we descend down the tree. We use this to set the highest_pivot_key.
+// 3. sub_tree_size - this is the percentage of the entire tree that our
+// current position (in a sub-tree) encompasses.
+// 4. percentage_done - this is the percentage of leaf nodes that have
+// been flushed into.
+// 5. rightmost_leaf_seen - this is a boolean we use to determine if
+// if we have flushed to every leaf node.
+struct hot_flusher_extra {
+ DBT highest_pivot_key;
+ DBT max_current_key;
+ float sub_tree_size;
+ float percentage_done;
+ bool rightmost_leaf_seen;
+};
+
+void
+toku_ft_hot_get_status(FT_HOT_STATUS s) {
+ hot_status.init();
+ *s = hot_status;
+}
+
+// Copies the max current key to the highest pivot key seen.
+static void
+hot_set_highest_key(struct hot_flusher_extra *flusher)
+{
+ // The max current key will be NULL if we are traversing in the
+ // rightmost subtree of a given parent. As such, we don't want to
+ // allocate memory for this case.
+ toku_destroy_dbt(&flusher->highest_pivot_key);
+ if (flusher->max_current_key.data != NULL) {
+ // Otherwise, let's copy all the contents from one key to the other.
+ toku_clone_dbt(&flusher->highest_pivot_key, flusher->max_current_key);
+ }
+}
+
+static void
+hot_set_start_key(struct hot_flusher_extra *flusher, const DBT* start)
+{
+ toku_destroy_dbt(&flusher->highest_pivot_key);
+ if (start != NULL) {
+ // Otherwise, let's copy all the contents from one key to the other.
+ toku_clone_dbt(&flusher->highest_pivot_key, *start);
+ }
+}
+
+static int
+hot_just_pick_child(FT ft,
+ FTNODE parent,
+ struct hot_flusher_extra *flusher)
+{
+ int childnum = 0;
+
+ // Search through Parents pivots, see which one is greater than
+ // the highest_pivot_key seen so far.
+ if (flusher->highest_pivot_key.data == NULL)
+ {
+ // Special case of the first child of the root node.
+ // Also known as, NEGATIVE INFINITY....
+ childnum = 0;
+ } else {
+ // Find the pivot boundary.
+ childnum = toku_ftnode_hot_next_child(parent, &flusher->highest_pivot_key, ft->cmp);
+ }
+
+ return childnum;
+}
+
+static void
+hot_update_flusher_keys(FTNODE parent,
+ int childnum,
+ struct hot_flusher_extra *flusher)
+{
+ // Update maximum current key if the child is NOT the rightmost
+ // child node.
+ if (childnum < (parent->n_children - 1)) {
+ toku_destroy_dbt(&flusher->max_current_key);
+ toku_clone_dbt(&flusher->max_current_key, parent->pivotkeys.get_pivot(childnum));
+ }
+}
+
+// Picks which child toku_ft_flush_some_child will use for flushing and
+// recursion.
+static int
+hot_pick_child(FT ft,
+ FTNODE parent,
+ void *extra)
+{
+ struct hot_flusher_extra *flusher = (struct hot_flusher_extra *) extra;
+ int childnum = hot_just_pick_child(ft, parent, flusher);
+
+ // Now we determine the percentage of the tree flushed so far.
+
+ // Whichever subtree we choose to recurse into, it is a fraction
+ // of the current parent.
+ flusher->sub_tree_size /= parent->n_children;
+
+ // Update the precentage complete, using our new sub tree size AND
+ // the number of children we have already flushed.
+ flusher->percentage_done += (flusher->sub_tree_size * childnum);
+
+ hot_update_flusher_keys(parent, childnum, flusher);
+
+ return childnum;
+}
+
+// Does nothing for now.
+static void
+hot_update_status(FTNODE UU(child),
+ int UU(dirtied),
+ void *UU(extra))
+{
+ return;
+}
+
+// If we've just split a node, HOT needs another chance to decide which
+// one to flush into. This gives it a chance to do that, and update the
+// keys it maintains.
+static int
+hot_pick_child_after_split(FT ft,
+ FTNODE parent,
+ int childnuma,
+ int childnumb,
+ void *extra)
+{
+ struct hot_flusher_extra *flusher = (struct hot_flusher_extra *) extra;
+ int childnum = hot_just_pick_child(ft, parent, flusher);
+ assert(childnum == childnuma || childnum == childnumb);
+ hot_update_flusher_keys(parent, childnum, flusher);
+ if (parent->height == 1) {
+ // We don't want to recurse into a leaf node, but if we return
+ // anything valid, ft_split_child will try to go there, so we
+ // return -1 to allow ft_split_child to have its default
+ // behavior, which will be to stop recursing.
+ childnum = -1;
+ }
+ return childnum;
+}
+
+// Basic constructor/initializer for the hot flusher struct.
+static void
+hot_flusher_init(struct flusher_advice *advice,
+ struct hot_flusher_extra *flusher)
+{
+ // Initialize the highest pivot key seen to NULL. This represents
+ // NEGATIVE INFINITY and is used to cover the special case of our
+ // first traversal of the tree.
+ toku_init_dbt(&(flusher->highest_pivot_key));
+ toku_init_dbt(&(flusher->max_current_key));
+ flusher->rightmost_leaf_seen = 0;
+ flusher->sub_tree_size = 1.0;
+ flusher->percentage_done = 0.0;
+ flusher_advice_init(advice,
+ hot_pick_child,
+ dont_destroy_basement_nodes,
+ always_recursively_flush,
+ default_merge_child,
+ hot_update_status,
+ hot_pick_child_after_split,
+ flusher
+ );
+}
+
+// Erases any DBT keys we have copied from a traversal.
+static void
+hot_flusher_destroy(struct hot_flusher_extra *flusher)
+{
+ toku_destroy_dbt(&flusher->highest_pivot_key);
+ toku_destroy_dbt(&flusher->max_current_key);
+}
+
+// Entry point for Hot Optimize Table (HOT). Note, this function is
+// not recursive. It iterates over root-to-leaf paths.
+int
+toku_ft_hot_optimize(FT_HANDLE ft_handle, DBT* left, DBT* right,
+ int (*progress_callback)(void *extra, float progress),
+ void *progress_extra, uint64_t* loops_run)
+{
+ toku::context flush_ctx(CTX_FLUSH);
+
+ int r = 0;
+ struct hot_flusher_extra flusher;
+ struct flusher_advice advice;
+
+ hot_flusher_init(&advice, &flusher);
+ hot_set_start_key(&flusher, left);
+
+ uint64_t loop_count = 0;
+ MSN msn_at_start_of_hot = ZERO_MSN; // capture msn from root at
+ // start of HOT operation
+ (void) toku_sync_fetch_and_add(&HOT_STATUS_VAL(FT_HOT_NUM_STARTED), 1);
+
+ toku_ft_note_hot_begin(ft_handle);
+
+ // Higher level logic prevents a dictionary from being deleted or
+ // truncated during a hot optimize operation. Doing so would violate
+ // the hot optimize contract.
+ do {
+ FTNODE root;
+ CACHEKEY root_key;
+ uint32_t fullhash;
+
+ {
+ // Get root node (the first parent of each successive HOT
+ // call.)
+ toku_calculate_root_offset_pointer(ft_handle->ft, &root_key, &fullhash);
+ ftnode_fetch_extra bfe;
+ bfe.create_for_full_read(ft_handle->ft);
+ toku_pin_ftnode(ft_handle->ft,
+ (BLOCKNUM) root_key,
+ fullhash,
+ &bfe,
+ PL_WRITE_EXPENSIVE,
+ &root,
+ true);
+ toku_ftnode_assert_fully_in_memory(root);
+ }
+
+ // Prepare HOT diagnostics.
+ if (loop_count == 0) {
+ // The first time through, capture msn from root
+ msn_at_start_of_hot = root->max_msn_applied_to_node_on_disk;
+ }
+
+ loop_count++;
+
+ if (loop_count > HOT_STATUS_VAL(FT_HOT_MAX_ROOT_FLUSH_COUNT)) {
+ HOT_STATUS_VAL(FT_HOT_MAX_ROOT_FLUSH_COUNT) = loop_count;
+ }
+
+ // Initialize the maximum current key. We need to do this for
+ // every traversal.
+ toku_destroy_dbt(&flusher.max_current_key);
+
+ flusher.sub_tree_size = 1.0;
+ flusher.percentage_done = 0.0;
+
+ // This should recurse to the bottom of the tree and then
+ // return.
+ if (root->height > 0) {
+ toku_ft_flush_some_child(ft_handle->ft, root, &advice);
+ } else {
+ // Since there are no children to flush, we should abort
+ // the HOT call.
+ flusher.rightmost_leaf_seen = 1;
+ toku_unpin_ftnode(ft_handle->ft, root);
+ }
+
+ // Set the highest pivot key seen here, since the parent may
+ // be unlocked and NULL'd later in our caller:
+ // toku_ft_flush_some_child().
+ hot_set_highest_key(&flusher);
+
+ // This is where we determine if the traversal is finished or
+ // not.
+ if (flusher.max_current_key.data == NULL) {
+ flusher.rightmost_leaf_seen = 1;
+ }
+ else if (right) {
+ // if we have flushed past the bounds set for us,
+ // set rightmost_leaf_seen so we exit
+ int cmp = ft_handle->ft->cmp(&flusher.max_current_key, right);
+ if (cmp > 0) {
+ flusher.rightmost_leaf_seen = 1;
+ }
+ }
+
+ // Update HOT's progress.
+ if (progress_callback != NULL) {
+ r = progress_callback(progress_extra, flusher.percentage_done);
+
+ // Check if the callback wants us to stop running HOT.
+ if (r != 0) {
+ flusher.rightmost_leaf_seen = 1;
+ }
+ }
+
+ // Loop until the max key has been updated to positive
+ // infinity.
+ } while (!flusher.rightmost_leaf_seen);
+ *loops_run = loop_count;
+
+ // Cleanup.
+ hot_flusher_destroy(&flusher);
+
+ // More diagnostics.
+ {
+ bool success = false;
+ if (r == 0) { success = true; }
+
+ {
+ toku_ft_note_hot_complete(ft_handle, success, msn_at_start_of_hot);
+ }
+
+ if (success) {
+ (void) toku_sync_fetch_and_add(&HOT_STATUS_VAL(FT_HOT_NUM_COMPLETED), 1);
+ } else {
+ (void) toku_sync_fetch_and_add(&HOT_STATUS_VAL(FT_HOT_NUM_ABORTED), 1);
+ }
+ }
+ return r;
+}
+
+#include <toku_race_tools.h>
+void __attribute__((__constructor__)) toku_hot_helgrind_ignore(void);
+void
+toku_hot_helgrind_ignore(void) {
+ // incremented only while lock is held, but read by engine status asynchronously.
+ TOKU_VALGRIND_HG_DISABLE_CHECKING(&hot_status, sizeof hot_status);
+}