summaryrefslogtreecommitdiffstats
path: root/storage/tokudb/PerconaFT/ft/ft-cachetable-wrappers.cc
diff options
context:
space:
mode:
Diffstat (limited to 'storage/tokudb/PerconaFT/ft/ft-cachetable-wrappers.cc')
-rw-r--r--storage/tokudb/PerconaFT/ft/ft-cachetable-wrappers.cc373
1 files changed, 373 insertions, 0 deletions
diff --git a/storage/tokudb/PerconaFT/ft/ft-cachetable-wrappers.cc b/storage/tokudb/PerconaFT/ft/ft-cachetable-wrappers.cc
new file mode 100644
index 00000000..439e0688
--- /dev/null
+++ b/storage/tokudb/PerconaFT/ft/ft-cachetable-wrappers.cc
@@ -0,0 +1,373 @@
+/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
+// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
+#ident "$Id$"
+/*======
+This file is part of PerconaFT.
+
+
+Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
+
+ PerconaFT is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License, version 2,
+ as published by the Free Software Foundation.
+
+ PerconaFT is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
+
+----------------------------------------
+
+ PerconaFT is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License, version 3,
+ as published by the Free Software Foundation.
+
+ PerconaFT is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
+======= */
+
+#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
+
+#include <my_global.h>
+#include "ft/serialize/block_table.h"
+#include "ft/ft-cachetable-wrappers.h"
+#include "ft/ft-flusher.h"
+#include "ft/ft-internal.h"
+#include "ft/ft.h"
+#include "ft/node.h"
+
+#include <util/context.h>
+
+static void
+ftnode_get_key_and_fullhash(
+ BLOCKNUM* cachekey,
+ uint32_t* fullhash,
+ void* extra)
+{
+ FT ft = (FT) extra;
+ BLOCKNUM blocknum;
+ ft->blocktable.allocate_blocknum(&blocknum, ft);
+ *cachekey = blocknum;
+ *fullhash = toku_cachetable_hash(ft->cf, blocknum);
+}
+
+void
+cachetable_put_empty_node_with_dep_nodes(
+ FT ft,
+ uint32_t num_dependent_nodes,
+ FTNODE* dependent_nodes,
+ BLOCKNUM* blocknum, //output
+ uint32_t* fullhash, //output
+ FTNODE* result)
+{
+ FTNODE XCALLOC(new_node);
+ PAIR dependent_pairs[num_dependent_nodes];
+ enum cachetable_dirty dependent_dirty_bits[num_dependent_nodes];
+ for (uint32_t i = 0; i < num_dependent_nodes; i++) {
+ dependent_pairs[i] = dependent_nodes[i]->ct_pair;
+ dependent_dirty_bits[i] = (enum cachetable_dirty) dependent_nodes[i]->dirty();
+ }
+
+ toku_cachetable_put_with_dep_pairs(
+ ft->cf,
+ ftnode_get_key_and_fullhash,
+ new_node,
+ make_pair_attr(sizeof(FTNODE)),
+ get_write_callbacks_for_node(ft),
+ ft,
+ num_dependent_nodes,
+ dependent_pairs,
+ dependent_dirty_bits,
+ blocknum,
+ fullhash,
+ toku_ftnode_save_ct_pair);
+ *result = new_node;
+}
+
+void
+create_new_ftnode_with_dep_nodes(
+ FT ft,
+ FTNODE *result,
+ int height,
+ int n_children,
+ uint32_t num_dependent_nodes,
+ FTNODE* dependent_nodes)
+{
+ uint32_t fullhash = 0;
+ BLOCKNUM blocknum;
+
+ cachetable_put_empty_node_with_dep_nodes(
+ ft,
+ num_dependent_nodes,
+ dependent_nodes,
+ &blocknum,
+ &fullhash,
+ result);
+
+ assert(ft->h->basementnodesize > 0);
+ if (height == 0) {
+ assert(n_children > 0);
+ }
+
+ toku_initialize_empty_ftnode(
+ *result,
+ blocknum,
+ height,
+ n_children,
+ ft->h->layout_version,
+ ft->h->flags);
+
+ (*result)->fullhash = fullhash;
+}
+
+void
+toku_create_new_ftnode (
+ FT_HANDLE t,
+ FTNODE *result,
+ int height,
+ int n_children)
+{
+ return create_new_ftnode_with_dep_nodes(
+ t->ft,
+ result,
+ height,
+ n_children,
+ 0,
+ NULL);
+}
+
+//
+// On success, this function assumes that the caller is trying to pin the node
+// with a PL_READ lock. If message application is needed,
+// then a PL_WRITE_CHEAP lock is grabbed
+//
+int
+toku_pin_ftnode_for_query(
+ FT_HANDLE ft_handle,
+ BLOCKNUM blocknum,
+ uint32_t fullhash,
+ UNLOCKERS unlockers,
+ ANCESTORS ancestors,
+ const pivot_bounds &bounds,
+ ftnode_fetch_extra *bfe,
+ bool apply_ancestor_messages, // this bool is probably temporary, for #3972, once we know how range query estimates work, will revisit this
+ FTNODE *node_p,
+ bool* msgs_applied)
+{
+ void *node_v;
+ *msgs_applied = false;
+ FTNODE node = nullptr;
+ MSN max_msn_in_path = ZERO_MSN;
+ bool needs_ancestors_messages = false;
+ // this function assumes that if you want ancestor messages applied,
+ // you are doing a read for a query. This is so we can make some optimizations
+ // below.
+ if (apply_ancestor_messages) {
+ paranoid_invariant(bfe->type == ftnode_fetch_subset);
+ }
+
+ int r = toku_cachetable_get_and_pin_nonblocking(
+ ft_handle->ft->cf,
+ blocknum,
+ fullhash,
+ &node_v,
+ get_write_callbacks_for_node(ft_handle->ft),
+ toku_ftnode_fetch_callback,
+ toku_ftnode_pf_req_callback,
+ toku_ftnode_pf_callback,
+ PL_READ,
+ bfe, //read_extraargs
+ unlockers);
+ if (r != 0) {
+ assert(r == TOKUDB_TRY_AGAIN); // Any other error and we should bomb out ASAP.
+ goto exit;
+ }
+ node = static_cast<FTNODE>(node_v);
+ if (apply_ancestor_messages && node->height == 0) {
+ needs_ancestors_messages = toku_ft_leaf_needs_ancestors_messages(
+ ft_handle->ft,
+ node,
+ ancestors,
+ bounds,
+ &max_msn_in_path,
+ bfe->child_to_read
+ );
+ if (needs_ancestors_messages) {
+ toku::context apply_messages_ctx(CTX_MESSAGE_APPLICATION);
+
+ toku_unpin_ftnode_read_only(ft_handle->ft, node);
+ int rr = toku_cachetable_get_and_pin_nonblocking(
+ ft_handle->ft->cf,
+ blocknum,
+ fullhash,
+ &node_v,
+ get_write_callbacks_for_node(ft_handle->ft),
+ toku_ftnode_fetch_callback,
+ toku_ftnode_pf_req_callback,
+ toku_ftnode_pf_callback,
+ PL_WRITE_CHEAP,
+ bfe, //read_extraargs
+ unlockers);
+ if (rr != 0) {
+ assert(rr == TOKUDB_TRY_AGAIN); // Any other error and we should bomb out ASAP.
+ r = TOKUDB_TRY_AGAIN;
+ goto exit;
+ }
+ node = static_cast<FTNODE>(node_v);
+ toku_apply_ancestors_messages_to_node(
+ ft_handle,
+ node,
+ ancestors,
+ bounds,
+ msgs_applied,
+ bfe->child_to_read
+ );
+ } else {
+ // At this point, we aren't going to run
+ // toku_apply_ancestors_messages_to_node but that doesn't
+ // mean max_msn_applied shouldn't be updated if possible
+ // (this saves the CPU work involved in
+ // toku_ft_leaf_needs_ancestors_messages).
+ //
+ // We still have a read lock, so we have not resolved
+ // checkpointing. If the node is pending and dirty, we
+ // can't modify anything, including max_msn, until we
+ // resolve checkpointing. If we do, the node might get
+ // written out that way as part of a checkpoint with a
+ // root that was already written out with a smaller
+ // max_msn. During recovery, we would then inject a
+ // message based on the root's max_msn, and that message
+ // would get filtered by the leaf because it had too high
+ // a max_msn value. (see #5407)
+ //
+ // So for simplicity we only update the max_msn if the
+ // node is clean. That way, in order for the node to get
+ // written out, it would have to be dirtied. That
+ // requires a write lock, and a write lock requires you to
+ // resolve checkpointing.
+ if (!node->dirty()) {
+ toku_ft_bn_update_max_msn(node, max_msn_in_path, bfe->child_to_read);
+ }
+ }
+ }
+ *node_p = node;
+exit:
+ return r;
+}
+
+void
+toku_pin_ftnode_with_dep_nodes(
+ FT ft,
+ BLOCKNUM blocknum,
+ uint32_t fullhash,
+ ftnode_fetch_extra *bfe,
+ pair_lock_type lock_type,
+ uint32_t num_dependent_nodes,
+ FTNODE *dependent_nodes,
+ FTNODE *node_p,
+ bool move_messages)
+{
+ void *node_v;
+ PAIR dependent_pairs[num_dependent_nodes];
+ enum cachetable_dirty dependent_dirty_bits[num_dependent_nodes];
+ for (uint32_t i = 0; i < num_dependent_nodes; i++) {
+ dependent_pairs[i] = dependent_nodes[i]->ct_pair;
+ dependent_dirty_bits[i] = (enum cachetable_dirty) dependent_nodes[i]->dirty();
+ }
+
+ int r = toku_cachetable_get_and_pin_with_dep_pairs(
+ ft->cf,
+ blocknum,
+ fullhash,
+ &node_v,
+ get_write_callbacks_for_node(ft),
+ toku_ftnode_fetch_callback,
+ toku_ftnode_pf_req_callback,
+ toku_ftnode_pf_callback,
+ lock_type,
+ bfe,
+ num_dependent_nodes,
+ dependent_pairs,
+ dependent_dirty_bits
+ );
+ invariant_zero(r);
+ FTNODE node = (FTNODE) node_v;
+ if (lock_type != PL_READ && node->height > 0 && move_messages) {
+ toku_move_ftnode_messages_to_stale(ft, node);
+ }
+ *node_p = node;
+}
+
+void toku_pin_ftnode(FT ft,
+ BLOCKNUM blocknum,
+ uint32_t fullhash,
+ ftnode_fetch_extra *bfe,
+ pair_lock_type lock_type,
+ FTNODE *node_p,
+ bool move_messages) {
+ toku_pin_ftnode_with_dep_nodes(ft, blocknum, fullhash, bfe, lock_type, 0, nullptr, node_p, move_messages);
+}
+
+int toku_maybe_pin_ftnode_clean(FT ft, BLOCKNUM blocknum, uint32_t fullhash, pair_lock_type lock_type, FTNODE *nodep) {
+ void *node_v;
+ int r = toku_cachetable_maybe_get_and_pin_clean(ft->cf, blocknum, fullhash, lock_type, &node_v);
+ if (r != 0) {
+ goto cleanup;
+ }
+ CAST_FROM_VOIDP(*nodep, node_v);
+ if ((*nodep)->height > 0 && lock_type != PL_READ) {
+ toku_move_ftnode_messages_to_stale(ft, *nodep);
+ }
+cleanup:
+ return r;
+}
+
+void toku_unpin_ftnode(FT ft, FTNODE node) {
+ int r = toku_cachetable_unpin(ft->cf,
+ node->ct_pair,
+ static_cast<enum cachetable_dirty>(node->dirty()),
+ make_ftnode_pair_attr(node));
+ invariant_zero(r);
+}
+
+void
+toku_unpin_ftnode_read_only(FT ft, FTNODE node)
+{
+ int r = toku_cachetable_unpin(
+ ft->cf,
+ node->ct_pair,
+ (enum cachetable_dirty) node->dirty(),
+ make_invalid_pair_attr()
+ );
+ assert(r==0);
+}
+
+void toku_ftnode_swap_pair_values(FTNODE a, FTNODE b)
+// Effect: Swap the blocknum, fullhash, and PAIR for for a and b
+// Requires: Both nodes are pinned
+{
+ BLOCKNUM tmp_blocknum = a->blocknum;
+ uint32_t tmp_fullhash = a->fullhash;
+ PAIR tmp_pair = a->ct_pair;
+
+ a->blocknum = b->blocknum;
+ a->fullhash = b->fullhash;
+ a->ct_pair = b->ct_pair;
+
+ b->blocknum = tmp_blocknum;
+ b->fullhash = tmp_fullhash;
+ b->ct_pair = tmp_pair;
+
+ // A and B swapped pair pointers, but we still have to swap
+ // the actual pair values (ie: the FTNODEs they represent)
+ // in the cachetable.
+ toku_cachetable_swap_pair_values(a->ct_pair, b->ct_pair);
+}