diff options
Diffstat (limited to 'storage/tokudb/PerconaFT/ft/ft-hot-flusher.cc')
-rw-r--r-- | storage/tokudb/PerconaFT/ft/ft-hot-flusher.cc | 362 |
1 files changed, 362 insertions, 0 deletions
diff --git a/storage/tokudb/PerconaFT/ft/ft-hot-flusher.cc b/storage/tokudb/PerconaFT/ft/ft-hot-flusher.cc new file mode 100644 index 00000000..ffab8647 --- /dev/null +++ b/storage/tokudb/PerconaFT/ft/ft-hot-flusher.cc @@ -0,0 +1,362 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: +#ident "$Id$" +/*====== +This file is part of PerconaFT. + + +Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. + + PerconaFT is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License, version 2, + as published by the Free Software Foundation. + + PerconaFT is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with PerconaFT. If not, see <http://www.gnu.org/licenses/>. + +---------------------------------------- + + PerconaFT is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License, version 3, + as published by the Free Software Foundation. + + PerconaFT is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with PerconaFT. If not, see <http://www.gnu.org/licenses/>. +======= */ + +#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved." + +#include <my_global.h> +#include "ft/ft.h" +#include "ft/ft-cachetable-wrappers.h" +#include "ft/ft-flusher.h" +#include "ft/ft-flusher-internal.h" +#include "ft/ft-internal.h" +#include "ft/node.h" +#include "portability/toku_atomic.h" +#include "util/context.h" +#include "util/status.h" + +// Member Descirption: +// 1. highest_pivot_key - this is the key that corresponds to the +// most recently flushed leaf entry. +// 2. max_current_key - this is the pivot/key that we inherit as +// we descend down the tree. We use this to set the highest_pivot_key. +// 3. sub_tree_size - this is the percentage of the entire tree that our +// current position (in a sub-tree) encompasses. +// 4. percentage_done - this is the percentage of leaf nodes that have +// been flushed into. +// 5. rightmost_leaf_seen - this is a boolean we use to determine if +// if we have flushed to every leaf node. +struct hot_flusher_extra { + DBT highest_pivot_key; + DBT max_current_key; + float sub_tree_size; + float percentage_done; + bool rightmost_leaf_seen; +}; + +void +toku_ft_hot_get_status(FT_HOT_STATUS s) { + hot_status.init(); + *s = hot_status; +} + +// Copies the max current key to the highest pivot key seen. +static void +hot_set_highest_key(struct hot_flusher_extra *flusher) +{ + // The max current key will be NULL if we are traversing in the + // rightmost subtree of a given parent. As such, we don't want to + // allocate memory for this case. + toku_destroy_dbt(&flusher->highest_pivot_key); + if (flusher->max_current_key.data != NULL) { + // Otherwise, let's copy all the contents from one key to the other. + toku_clone_dbt(&flusher->highest_pivot_key, flusher->max_current_key); + } +} + +static void +hot_set_start_key(struct hot_flusher_extra *flusher, const DBT* start) +{ + toku_destroy_dbt(&flusher->highest_pivot_key); + if (start != NULL) { + // Otherwise, let's copy all the contents from one key to the other. + toku_clone_dbt(&flusher->highest_pivot_key, *start); + } +} + +static int +hot_just_pick_child(FT ft, + FTNODE parent, + struct hot_flusher_extra *flusher) +{ + int childnum = 0; + + // Search through Parents pivots, see which one is greater than + // the highest_pivot_key seen so far. + if (flusher->highest_pivot_key.data == NULL) + { + // Special case of the first child of the root node. + // Also known as, NEGATIVE INFINITY.... + childnum = 0; + } else { + // Find the pivot boundary. + childnum = toku_ftnode_hot_next_child(parent, &flusher->highest_pivot_key, ft->cmp); + } + + return childnum; +} + +static void +hot_update_flusher_keys(FTNODE parent, + int childnum, + struct hot_flusher_extra *flusher) +{ + // Update maximum current key if the child is NOT the rightmost + // child node. + if (childnum < (parent->n_children - 1)) { + toku_destroy_dbt(&flusher->max_current_key); + toku_clone_dbt(&flusher->max_current_key, parent->pivotkeys.get_pivot(childnum)); + } +} + +// Picks which child toku_ft_flush_some_child will use for flushing and +// recursion. +static int +hot_pick_child(FT ft, + FTNODE parent, + void *extra) +{ + struct hot_flusher_extra *flusher = (struct hot_flusher_extra *) extra; + int childnum = hot_just_pick_child(ft, parent, flusher); + + // Now we determine the percentage of the tree flushed so far. + + // Whichever subtree we choose to recurse into, it is a fraction + // of the current parent. + flusher->sub_tree_size /= parent->n_children; + + // Update the precentage complete, using our new sub tree size AND + // the number of children we have already flushed. + flusher->percentage_done += (flusher->sub_tree_size * childnum); + + hot_update_flusher_keys(parent, childnum, flusher); + + return childnum; +} + +// Does nothing for now. +static void +hot_update_status(FTNODE UU(child), + int UU(dirtied), + void *UU(extra)) +{ + return; +} + +// If we've just split a node, HOT needs another chance to decide which +// one to flush into. This gives it a chance to do that, and update the +// keys it maintains. +static int +hot_pick_child_after_split(FT ft, + FTNODE parent, + int childnuma, + int childnumb, + void *extra) +{ + struct hot_flusher_extra *flusher = (struct hot_flusher_extra *) extra; + int childnum = hot_just_pick_child(ft, parent, flusher); + assert(childnum == childnuma || childnum == childnumb); + hot_update_flusher_keys(parent, childnum, flusher); + if (parent->height == 1) { + // We don't want to recurse into a leaf node, but if we return + // anything valid, ft_split_child will try to go there, so we + // return -1 to allow ft_split_child to have its default + // behavior, which will be to stop recursing. + childnum = -1; + } + return childnum; +} + +// Basic constructor/initializer for the hot flusher struct. +static void +hot_flusher_init(struct flusher_advice *advice, + struct hot_flusher_extra *flusher) +{ + // Initialize the highest pivot key seen to NULL. This represents + // NEGATIVE INFINITY and is used to cover the special case of our + // first traversal of the tree. + toku_init_dbt(&(flusher->highest_pivot_key)); + toku_init_dbt(&(flusher->max_current_key)); + flusher->rightmost_leaf_seen = 0; + flusher->sub_tree_size = 1.0; + flusher->percentage_done = 0.0; + flusher_advice_init(advice, + hot_pick_child, + dont_destroy_basement_nodes, + always_recursively_flush, + default_merge_child, + hot_update_status, + hot_pick_child_after_split, + flusher + ); +} + +// Erases any DBT keys we have copied from a traversal. +static void +hot_flusher_destroy(struct hot_flusher_extra *flusher) +{ + toku_destroy_dbt(&flusher->highest_pivot_key); + toku_destroy_dbt(&flusher->max_current_key); +} + +// Entry point for Hot Optimize Table (HOT). Note, this function is +// not recursive. It iterates over root-to-leaf paths. +int +toku_ft_hot_optimize(FT_HANDLE ft_handle, DBT* left, DBT* right, + int (*progress_callback)(void *extra, float progress), + void *progress_extra, uint64_t* loops_run) +{ + toku::context flush_ctx(CTX_FLUSH); + + int r = 0; + struct hot_flusher_extra flusher; + struct flusher_advice advice; + + hot_flusher_init(&advice, &flusher); + hot_set_start_key(&flusher, left); + + uint64_t loop_count = 0; + MSN msn_at_start_of_hot = ZERO_MSN; // capture msn from root at + // start of HOT operation + (void) toku_sync_fetch_and_add(&HOT_STATUS_VAL(FT_HOT_NUM_STARTED), 1); + + toku_ft_note_hot_begin(ft_handle); + + // Higher level logic prevents a dictionary from being deleted or + // truncated during a hot optimize operation. Doing so would violate + // the hot optimize contract. + do { + FTNODE root; + CACHEKEY root_key; + uint32_t fullhash; + + { + // Get root node (the first parent of each successive HOT + // call.) + toku_calculate_root_offset_pointer(ft_handle->ft, &root_key, &fullhash); + ftnode_fetch_extra bfe; + bfe.create_for_full_read(ft_handle->ft); + toku_pin_ftnode(ft_handle->ft, + (BLOCKNUM) root_key, + fullhash, + &bfe, + PL_WRITE_EXPENSIVE, + &root, + true); + toku_ftnode_assert_fully_in_memory(root); + } + + // Prepare HOT diagnostics. + if (loop_count == 0) { + // The first time through, capture msn from root + msn_at_start_of_hot = root->max_msn_applied_to_node_on_disk; + } + + loop_count++; + + if (loop_count > HOT_STATUS_VAL(FT_HOT_MAX_ROOT_FLUSH_COUNT)) { + HOT_STATUS_VAL(FT_HOT_MAX_ROOT_FLUSH_COUNT) = loop_count; + } + + // Initialize the maximum current key. We need to do this for + // every traversal. + toku_destroy_dbt(&flusher.max_current_key); + + flusher.sub_tree_size = 1.0; + flusher.percentage_done = 0.0; + + // This should recurse to the bottom of the tree and then + // return. + if (root->height > 0) { + toku_ft_flush_some_child(ft_handle->ft, root, &advice); + } else { + // Since there are no children to flush, we should abort + // the HOT call. + flusher.rightmost_leaf_seen = 1; + toku_unpin_ftnode(ft_handle->ft, root); + } + + // Set the highest pivot key seen here, since the parent may + // be unlocked and NULL'd later in our caller: + // toku_ft_flush_some_child(). + hot_set_highest_key(&flusher); + + // This is where we determine if the traversal is finished or + // not. + if (flusher.max_current_key.data == NULL) { + flusher.rightmost_leaf_seen = 1; + } + else if (right) { + // if we have flushed past the bounds set for us, + // set rightmost_leaf_seen so we exit + int cmp = ft_handle->ft->cmp(&flusher.max_current_key, right); + if (cmp > 0) { + flusher.rightmost_leaf_seen = 1; + } + } + + // Update HOT's progress. + if (progress_callback != NULL) { + r = progress_callback(progress_extra, flusher.percentage_done); + + // Check if the callback wants us to stop running HOT. + if (r != 0) { + flusher.rightmost_leaf_seen = 1; + } + } + + // Loop until the max key has been updated to positive + // infinity. + } while (!flusher.rightmost_leaf_seen); + *loops_run = loop_count; + + // Cleanup. + hot_flusher_destroy(&flusher); + + // More diagnostics. + { + bool success = false; + if (r == 0) { success = true; } + + { + toku_ft_note_hot_complete(ft_handle, success, msn_at_start_of_hot); + } + + if (success) { + (void) toku_sync_fetch_and_add(&HOT_STATUS_VAL(FT_HOT_NUM_COMPLETED), 1); + } else { + (void) toku_sync_fetch_and_add(&HOT_STATUS_VAL(FT_HOT_NUM_ABORTED), 1); + } + } + return r; +} + +#include <toku_race_tools.h> +void __attribute__((__constructor__)) toku_hot_helgrind_ignore(void); +void +toku_hot_helgrind_ignore(void) { + // incremented only while lock is held, but read by engine status asynchronously. + TOKU_VALGRIND_HG_DISABLE_CHECKING(&hot_status, sizeof hot_status); +} |