diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-21 11:54:28 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-21 11:54:28 +0000 |
commit | e6918187568dbd01842d8d1d2c808ce16a894239 (patch) | |
tree | 64f88b554b444a49f656b6c656111a145cbbaa28 /src/rocksdb/utilities/transactions/lock/range/range_tree/lib/util | |
parent | Initial commit. (diff) | |
download | ceph-b26c4052f3542036551aa9dec9caa4226e456195.tar.xz ceph-b26c4052f3542036551aa9dec9caa4226e456195.zip |
Adding upstream version 18.2.2.upstream/18.2.2
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
9 files changed, 3067 insertions, 0 deletions
diff --git a/src/rocksdb/utilities/transactions/lock/range/range_tree/lib/util/dbt.cc b/src/rocksdb/utilities/transactions/lock/range/range_tree/lib/util/dbt.cc new file mode 100644 index 000000000..63cc3a267 --- /dev/null +++ b/src/rocksdb/utilities/transactions/lock/range/range_tree/lib/util/dbt.cc @@ -0,0 +1,153 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: +#ifndef ROCKSDB_LITE +#ifndef OS_WIN +#ident "$Id$" +/*====== +This file is part of PerconaFT. + + +Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. + + PerconaFT is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License, version 2, + as published by the Free Software Foundation. + + PerconaFT is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with PerconaFT. If not, see <http://www.gnu.org/licenses/>. + +---------------------------------------- + + PerconaFT is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License, version 3, + as published by the Free Software Foundation. + + PerconaFT is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with PerconaFT. If not, see <http://www.gnu.org/licenses/>. +======= */ + +#ident \ + "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved." + +#include "dbt.h" + +#include <string.h> + +#include "../db.h" +#include "../portability/memory.h" + +DBT *toku_init_dbt(DBT *dbt) { + memset(dbt, 0, sizeof(*dbt)); + return dbt; +} + +DBT toku_empty_dbt(void) { + static const DBT empty_dbt = {.data = 0, .size = 0, .ulen = 0, .flags = 0}; + return empty_dbt; +} + +DBT *toku_init_dbt_flags(DBT *dbt, uint32_t flags) { + toku_init_dbt(dbt); + dbt->flags = flags; + return dbt; +} + +void toku_destroy_dbt(DBT *dbt) { + switch (dbt->flags) { + case DB_DBT_MALLOC: + case DB_DBT_REALLOC: + toku_free(dbt->data); + toku_init_dbt(dbt); + break; + } +} + +DBT *toku_fill_dbt(DBT *dbt, const void *k, size_t len) { + toku_init_dbt(dbt); + dbt->size = len; + dbt->data = (char *)k; + return dbt; +} + +DBT *toku_memdup_dbt(DBT *dbt, const void *k, size_t len) { + toku_init_dbt_flags(dbt, DB_DBT_MALLOC); + dbt->size = len; + dbt->data = toku_xmemdup(k, len); + return dbt; +} + +DBT *toku_copyref_dbt(DBT *dst, const DBT src) { + dst->flags = 0; + dst->ulen = 0; + dst->size = src.size; + dst->data = src.data; + return dst; +} + +DBT *toku_clone_dbt(DBT *dst, const DBT &src) { + return toku_memdup_dbt(dst, src.data, src.size); +} + +void toku_sdbt_cleanup(struct simple_dbt *sdbt) { + if (sdbt->data) toku_free(sdbt->data); + memset(sdbt, 0, sizeof(*sdbt)); +} + +const DBT *toku_dbt_positive_infinity(void) { + static DBT positive_infinity_dbt = { + .data = 0, .size = 0, .ulen = 0, .flags = 0}; // port + return &positive_infinity_dbt; +} + +const DBT *toku_dbt_negative_infinity(void) { + static DBT negative_infinity_dbt = { + .data = 0, .size = 0, .ulen = 0, .flags = 0}; // port + return &negative_infinity_dbt; +} + +bool toku_dbt_is_infinite(const DBT *dbt) { + return dbt == toku_dbt_positive_infinity() || + dbt == toku_dbt_negative_infinity(); +} + +bool toku_dbt_is_empty(const DBT *dbt) { + // can't have a null data field with a non-zero size + paranoid_invariant(dbt->data != nullptr || dbt->size == 0); + return dbt->data == nullptr; +} + +int toku_dbt_infinite_compare(const DBT *a, const DBT *b) { + if (a == b) { + return 0; + } else if (a == toku_dbt_positive_infinity()) { + return 1; + } else if (b == toku_dbt_positive_infinity()) { + return -1; + } else if (a == toku_dbt_negative_infinity()) { + return -1; + } else { + invariant(b == toku_dbt_negative_infinity()); + return 1; + } +} + +bool toku_dbt_equals(const DBT *a, const DBT *b) { + if (!toku_dbt_is_infinite(a) && !toku_dbt_is_infinite(b)) { + return a->data == b->data && a->size == b->size; + } else { + // a or b is infinite, so they're equal if they are the same infinite + return a == b ? true : false; + } +} +#endif // OS_WIN +#endif // ROCKSDB_LITE diff --git a/src/rocksdb/utilities/transactions/lock/range/range_tree/lib/util/dbt.h b/src/rocksdb/utilities/transactions/lock/range/range_tree/lib/util/dbt.h new file mode 100644 index 000000000..d86c440f8 --- /dev/null +++ b/src/rocksdb/utilities/transactions/lock/range/range_tree/lib/util/dbt.h @@ -0,0 +1,98 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: +#ident "$Id$" +/*====== +This file is part of PerconaFT. + + +Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. + + PerconaFT is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License, version 2, + as published by the Free Software Foundation. + + PerconaFT is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with PerconaFT. If not, see <http://www.gnu.org/licenses/>. + +---------------------------------------- + + PerconaFT is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License, version 3, + as published by the Free Software Foundation. + + PerconaFT is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with PerconaFT. If not, see <http://www.gnu.org/licenses/>. + +---------------------------------------- + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +======= */ + +#ident \ + "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved." + +#pragma once + +#include "../db.h" + +// TODO: John +// Document this API a little better so that DBT +// memory management can be morm widely understood. + +DBT *toku_init_dbt(DBT *); + +// returns: an initialized but empty dbt (for which toku_dbt_is_empty() is true) +DBT toku_empty_dbt(void); + +DBT *toku_init_dbt_flags(DBT *, uint32_t flags); + +void toku_destroy_dbt(DBT *); + +DBT *toku_fill_dbt(DBT *dbt, const void *k, size_t len); + +DBT *toku_memdup_dbt(DBT *dbt, const void *k, size_t len); + +DBT *toku_copyref_dbt(DBT *dst, const DBT src); + +DBT *toku_clone_dbt(DBT *dst, const DBT &src); + +void toku_sdbt_cleanup(struct simple_dbt *sdbt); + +// returns: special DBT pointer representing positive infinity +const DBT *toku_dbt_positive_infinity(void); + +// returns: special DBT pointer representing negative infinity +const DBT *toku_dbt_negative_infinity(void); + +// returns: true if the given dbt is either positive or negative infinity +bool toku_dbt_is_infinite(const DBT *dbt); + +// returns: true if the given dbt has no data (ie: dbt->data == nullptr) +bool toku_dbt_is_empty(const DBT *dbt); + +// effect: compares two potentially infinity-valued dbts +// requires: at least one is infinite (assert otherwise) +int toku_dbt_infinite_compare(const DBT *a, const DBT *b); + +// returns: true if the given dbts have the same data pointer and size +bool toku_dbt_equals(const DBT *a, const DBT *b); diff --git a/src/rocksdb/utilities/transactions/lock/range/range_tree/lib/util/growable_array.h b/src/rocksdb/utilities/transactions/lock/range/range_tree/lib/util/growable_array.h new file mode 100644 index 000000000..158750fdb --- /dev/null +++ b/src/rocksdb/utilities/transactions/lock/range/range_tree/lib/util/growable_array.h @@ -0,0 +1,144 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: +#ident "$Id$" +/*====== +This file is part of PerconaFT. + + +Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. + + PerconaFT is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License, version 2, + as published by the Free Software Foundation. + + PerconaFT is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with PerconaFT. If not, see <http://www.gnu.org/licenses/>. + +---------------------------------------- + + PerconaFT is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License, version 3, + as published by the Free Software Foundation. + + PerconaFT is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with PerconaFT. If not, see <http://www.gnu.org/licenses/>. + +---------------------------------------- + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +======= */ + +#ident \ + "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved." + +#pragma once + +#include <memory.h> + +//****************************************************************************** +// +// Overview: A growable array is a little bit like std::vector except that +// it doesn't have constructors (hence can be used in static constructs, since +// the google style guide says no constructors), and it's a little simpler. +// Operations: +// init and deinit (we don't have constructors and destructors). +// fetch_unchecked to get values out. +// store_unchecked to put values in. +// push to add an element at the end +// get_size to find out the size +// get_memory_size to find out how much memory the data stucture is using. +// +//****************************************************************************** + +namespace toku { + +template <typename T> +class GrowableArray { + public: + void init(void) + // Effect: Initialize the array to contain no elements. + { + m_array = NULL; + m_size = 0; + m_size_limit = 0; + } + + void deinit(void) + // Effect: Deinitialize the array (freeing any memory it uses, for example). + { + toku_free(m_array); + m_array = NULL; + m_size = 0; + m_size_limit = 0; + } + + T fetch_unchecked(size_t i) const + // Effect: Fetch the ith element. If i is out of range, the system asserts. + { + return m_array[i]; + } + + void store_unchecked(size_t i, T v) + // Effect: Store v in the ith element. If i is out of range, the system + // asserts. + { + paranoid_invariant(i < m_size); + m_array[i] = v; + } + + void push(T v) + // Effect: Add v to the end of the array (increasing the size). The amortized + // cost of this operation is constant. Implementation hint: Double the size + // of the array when it gets too big so that the amortized cost stays + // constant. + { + if (m_size >= m_size_limit) { + if (m_array == NULL) { + m_size_limit = 1; + } else { + m_size_limit *= 2; + } + XREALLOC_N(m_size_limit, m_array); + } + m_array[m_size++] = v; + } + + size_t get_size(void) const + // Effect: Return the number of elements in the array. + { + return m_size; + } + size_t memory_size(void) const + // Effect: Return the size (in bytes) that the array occupies in memory. This + // is really only an estimate. + { + return sizeof(*this) + sizeof(T) * m_size_limit; + } + + private: + T *m_array; + size_t m_size; + size_t m_size_limit; // How much space is allocated in array. +}; + +} // namespace toku diff --git a/src/rocksdb/utilities/transactions/lock/range/range_tree/lib/util/memarena.cc b/src/rocksdb/utilities/transactions/lock/range/range_tree/lib/util/memarena.cc new file mode 100644 index 000000000..0e7a9880b --- /dev/null +++ b/src/rocksdb/utilities/transactions/lock/range/range_tree/lib/util/memarena.cc @@ -0,0 +1,201 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: +#ifndef ROCKSDB_LITE +#ifndef OS_WIN +#ident "$Id$" +/*====== +This file is part of PerconaFT. + + +Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. + + PerconaFT is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License, version 2, + as published by the Free Software Foundation. + + PerconaFT is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with PerconaFT. If not, see <http://www.gnu.org/licenses/>. + +---------------------------------------- + + PerconaFT is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License, version 3, + as published by the Free Software Foundation. + + PerconaFT is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with PerconaFT. If not, see <http://www.gnu.org/licenses/>. + +---------------------------------------- + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +======= */ + +#ident \ + "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved." + +#include "memarena.h" + +#include <string.h> + +#include <algorithm> + +#include "../portability/memory.h" + +void memarena::create(size_t initial_size) { + _current_chunk = arena_chunk(); + _other_chunks = nullptr; + _size_of_other_chunks = 0; + _footprint_of_other_chunks = 0; + _n_other_chunks = 0; + + _current_chunk.size = initial_size; + if (_current_chunk.size > 0) { + XMALLOC_N(_current_chunk.size, _current_chunk.buf); + } +} + +void memarena::destroy(void) { + if (_current_chunk.buf) { + toku_free(_current_chunk.buf); + } + for (int i = 0; i < _n_other_chunks; i++) { + toku_free(_other_chunks[i].buf); + } + if (_other_chunks) { + toku_free(_other_chunks); + } + _current_chunk = arena_chunk(); + _other_chunks = nullptr; + _n_other_chunks = 0; +} + +static size_t round_to_page(size_t size) { + const size_t page_size = 4096; + const size_t r = page_size + ((size - 1) & ~(page_size - 1)); + assert((r & (page_size - 1)) == 0); // make sure it's aligned + assert(r >= size); // make sure it's not too small + assert(r < + size + page_size); // make sure we didn't grow by more than a page. + return r; +} + +static const size_t MEMARENA_MAX_CHUNK_SIZE = 64 * 1024 * 1024; + +void *memarena::malloc_from_arena(size_t size) { + if (_current_chunk.buf == nullptr || + _current_chunk.size < _current_chunk.used + size) { + // The existing block isn't big enough. + // Add the block to the vector of blocks. + if (_current_chunk.buf) { + invariant(_current_chunk.size > 0); + int old_n = _n_other_chunks; + XREALLOC_N(old_n + 1, _other_chunks); + _other_chunks[old_n] = _current_chunk; + _n_other_chunks = old_n + 1; + _size_of_other_chunks += _current_chunk.size; + _footprint_of_other_chunks += + toku_memory_footprint(_current_chunk.buf, _current_chunk.used); + } + + // Make a new one. Grow the buffer size exponentially until we hit + // the max chunk size, but make it at least `size' bytes so the + // current allocation always fit. + size_t new_size = + std::min(MEMARENA_MAX_CHUNK_SIZE, 2 * _current_chunk.size); + if (new_size < size) { + new_size = size; + } + new_size = round_to_page( + new_size); // at least size, but round to the next page size + XMALLOC_N(new_size, _current_chunk.buf); + _current_chunk.used = 0; + _current_chunk.size = new_size; + } + invariant(_current_chunk.buf != nullptr); + + // allocate in the existing block. + char *p = _current_chunk.buf + _current_chunk.used; + _current_chunk.used += size; + return p; +} + +void memarena::move_memory(memarena *dest) { + // Move memory to dest + XREALLOC_N(dest->_n_other_chunks + _n_other_chunks + 1, dest->_other_chunks); + dest->_size_of_other_chunks += _size_of_other_chunks + _current_chunk.size; + dest->_footprint_of_other_chunks += + _footprint_of_other_chunks + + toku_memory_footprint(_current_chunk.buf, _current_chunk.used); + for (int i = 0; i < _n_other_chunks; i++) { + dest->_other_chunks[dest->_n_other_chunks++] = _other_chunks[i]; + } + dest->_other_chunks[dest->_n_other_chunks++] = _current_chunk; + + // Clear out this memarena's memory + toku_free(_other_chunks); + _current_chunk = arena_chunk(); + _other_chunks = nullptr; + _size_of_other_chunks = 0; + _footprint_of_other_chunks = 0; + _n_other_chunks = 0; +} + +size_t memarena::total_memory_size(void) const { + return sizeof(*this) + total_size_in_use() + + _n_other_chunks * sizeof(*_other_chunks); +} + +size_t memarena::total_size_in_use(void) const { + return _size_of_other_chunks + _current_chunk.used; +} + +size_t memarena::total_footprint(void) const { + return sizeof(*this) + _footprint_of_other_chunks + + toku_memory_footprint(_current_chunk.buf, _current_chunk.used) + + _n_other_chunks * sizeof(*_other_chunks); +} + +//////////////////////////////////////////////////////////////////////////////// + +const void *memarena::chunk_iterator::current(size_t *used) const { + if (_chunk_idx < 0) { + *used = _ma->_current_chunk.used; + return _ma->_current_chunk.buf; + } else if (_chunk_idx < _ma->_n_other_chunks) { + *used = _ma->_other_chunks[_chunk_idx].used; + return _ma->_other_chunks[_chunk_idx].buf; + } + *used = 0; + return nullptr; +} + +void memarena::chunk_iterator::next() { _chunk_idx++; } + +bool memarena::chunk_iterator::more() const { + if (_chunk_idx < 0) { + return _ma->_current_chunk.buf != nullptr; + } + return _chunk_idx < _ma->_n_other_chunks; +} +#endif // OS_WIN +#endif // ROCKSDB_LITE diff --git a/src/rocksdb/utilities/transactions/lock/range/range_tree/lib/util/memarena.h b/src/rocksdb/utilities/transactions/lock/range/range_tree/lib/util/memarena.h new file mode 100644 index 000000000..ddcc1144f --- /dev/null +++ b/src/rocksdb/utilities/transactions/lock/range/range_tree/lib/util/memarena.h @@ -0,0 +1,141 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: +#ident "$Id$" +/*====== +This file is part of PerconaFT. + + +Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. + + PerconaFT is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License, version 2, + as published by the Free Software Foundation. + + PerconaFT is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with PerconaFT. If not, see <http://www.gnu.org/licenses/>. + +---------------------------------------- + + PerconaFT is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License, version 3, + as published by the Free Software Foundation. + + PerconaFT is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with PerconaFT. If not, see <http://www.gnu.org/licenses/>. + +---------------------------------------- + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +======= */ + +#ident \ + "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved." + +#pragma once + +#include <stdlib.h> + +/* + * A memarena is used to efficiently store a collection of objects that never + * move The pattern is allocate more and more stuff and free all of the items at + * once. The underlying memory will store 1 or more objects per chunk. Each + * chunk is contiguously laid out in memory but chunks are not necessarily + * contiguous with each other. + */ +class memarena { + public: + memarena() + : _current_chunk(arena_chunk()), + _other_chunks(nullptr), + _n_other_chunks(0), + _size_of_other_chunks(0), + _footprint_of_other_chunks(0) {} + + // Effect: Create a memarena with the specified initial size + void create(size_t initial_size); + + void destroy(void); + + // Effect: Allocate some memory. The returned value remains valid until the + // memarena is cleared or closed. + // In case of ENOMEM, aborts. + void *malloc_from_arena(size_t size); + + // Effect: Move all the memory from this memarena into DEST. + // When SOURCE is closed the memory won't be freed. + // When DEST is closed, the memory will be freed, unless DEST moves + // its memory to another memarena... + void move_memory(memarena *dest); + + // Effect: Calculate the amount of memory used by a memory arena. + size_t total_memory_size(void) const; + + // Effect: Calculate the used space of the memory arena (ie: excludes unused + // space) + size_t total_size_in_use(void) const; + + // Effect: Calculate the amount of memory used, according to + // toku_memory_footprint(), + // which is a more expensive but more accurate count of memory used. + size_t total_footprint(void) const; + + // iterator over the underlying chunks that store objects in the memarena. + // a chunk is represented by a pointer to const memory and a usable byte + // count. + class chunk_iterator { + public: + chunk_iterator(const memarena *ma) : _ma(ma), _chunk_idx(-1) {} + + // returns: base pointer to the current chunk + // *used set to the number of usable bytes + // if more() is false, returns nullptr and *used = 0 + const void *current(size_t *used) const; + + // requires: more() is true + void next(); + + bool more() const; + + private: + // -1 represents the 'initial' chunk in a memarena, ie: ma->_current_chunk + // >= 0 represents the i'th chunk in the ma->_other_chunks array + const memarena *_ma; + int _chunk_idx; + }; + + private: + struct arena_chunk { + arena_chunk() : buf(nullptr), used(0), size(0) {} + char *buf; + size_t used; + size_t size; + }; + + struct arena_chunk _current_chunk; + struct arena_chunk *_other_chunks; + int _n_other_chunks; + size_t _size_of_other_chunks; // the buf_size of all the other chunks. + size_t _footprint_of_other_chunks; // the footprint of all the other chunks. + + friend class memarena_unit_test; +}; diff --git a/src/rocksdb/utilities/transactions/lock/range/range_tree/lib/util/omt.h b/src/rocksdb/utilities/transactions/lock/range/range_tree/lib/util/omt.h new file mode 100644 index 000000000..f208002d3 --- /dev/null +++ b/src/rocksdb/utilities/transactions/lock/range/range_tree/lib/util/omt.h @@ -0,0 +1,794 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: +#ident "$Id$" +/*====== +This file is part of PerconaFT. + + +Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. + + PerconaFT is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License, version 2, + as published by the Free Software Foundation. + + PerconaFT is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with PerconaFT. If not, see <http://www.gnu.org/licenses/>. + +---------------------------------------- + + PerconaFT is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License, version 3, + as published by the Free Software Foundation. + + PerconaFT is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with PerconaFT. If not, see <http://www.gnu.org/licenses/>. + +---------------------------------------- + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +======= */ + +#ident \ + "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved." + +#pragma once + +#include <memory.h> +#include <stdint.h> + +#include "../portability/toku_portability.h" +#include "../portability/toku_race_tools.h" +#include "growable_array.h" + +namespace toku { + +/** + * Order Maintenance Tree (OMT) + * + * Maintains a collection of totally ordered values, where each value has an + * integer weight. The OMT is a mutable datatype. + * + * The Abstraction: + * + * An OMT is a vector of values, $V$, where $|V|$ is the length of the vector. + * The vector is numbered from $0$ to $|V|-1$. + * Each value has a weight. The weight of the $i$th element is denoted + * $w(V_i)$. + * + * We can create a new OMT, which is the empty vector. + * + * We can insert a new element $x$ into slot $i$, changing $V$ into $V'$ where + * $|V'|=1+|V|$ and + * + * V'_j = V_j if $j<i$ + * x if $j=i$ + * V_{j-1} if $j>i$. + * + * We can specify $i$ using a kind of function instead of as an integer. + * Let $b$ be a function mapping from values to nonzero integers, such that + * the signum of $b$ is monotically increasing. + * We can specify $i$ as the minimum integer such that $b(V_i)>0$. + * + * We look up a value using its index, or using a Heaviside function. + * For lookups, we allow $b$ to be zero for some values, and again the signum of + * $b$ must be monotonically increasing. When lookup up values, we can look up + * $V_i$ where $i$ is the minimum integer such that $b(V_i)=0$. (With a + * special return code if no such value exists.) (Rationale: Ordinarily we want + * $i$ to be unique. But for various reasons we want to allow multiple zeros, + * and we want the smallest $i$ in that case.) $V_i$ where $i$ is the minimum + * integer such that $b(V_i)>0$. (Or an indication that no such value exists.) + * $V_i$ where $i$ is the maximum integer such that $b(V_i)<0$. (Or an + * indication that no such value exists.) + * + * When looking up a value using a Heaviside function, we get the value and its + * index. + * + * We can also split an OMT into two OMTs, splitting the weight of the values + * evenly. Find a value $j$ such that the values to the left of $j$ have about + * the same total weight as the values to the right of $j$. The resulting two + * OMTs contain the values to the left of $j$ and the values to the right of $j$ + * respectively. All of the values from the original OMT go into one of the new + * OMTs. If the weights of the values don't split exactly evenly, then the + * implementation has the freedom to choose whether the new left OMT or the new + * right OMT is larger. + * + * Performance: + * Insertion and deletion should run with $O(\log |V|)$ time and $O(\log |V|)$ + * calls to the Heaviside function. The memory required is O(|V|). + * + * Usage: + * The omt is templated by two parameters: + * - omtdata_t is what will be stored within the omt. These could be pointers + * or real data types (ints, structs). + * - omtdataout_t is what will be returned by find and related functions. By + * default, it is the same as omtdata_t, but you can set it to (omtdata_t *). To + * create an omt which will store "TXNID"s, for example, it is a good idea to + * typedef the template: typedef omt<TXNID> txnid_omt_t; If you are storing + * structs, you may want to be able to get a pointer to the data actually stored + * in the omt (see find_zero). To do this, use the second template parameter: + * typedef omt<struct foo, struct foo *> foo_omt_t; + */ + +namespace omt_internal { + +template <bool subtree_supports_marks> +class subtree_templated { + private: + uint32_t m_index; + + public: + static const uint32_t NODE_NULL = UINT32_MAX; + inline void set_to_null(void) { m_index = NODE_NULL; } + + inline bool is_null(void) const { return NODE_NULL == this->get_index(); } + + inline uint32_t get_index(void) const { return m_index; } + + inline void set_index(uint32_t index) { + paranoid_invariant(index != NODE_NULL); + m_index = index; + } +} __attribute__((__packed__, aligned(4))); + +template <> +class subtree_templated<true> { + private: + uint32_t m_bitfield; + static const uint32_t MASK_INDEX = ~(((uint32_t)1) << 31); + static const uint32_t MASK_BIT = ((uint32_t)1) << 31; + + inline void set_index_internal(uint32_t new_index) { + m_bitfield = (m_bitfield & MASK_BIT) | new_index; + } + + public: + static const uint32_t NODE_NULL = INT32_MAX; + inline void set_to_null(void) { this->set_index_internal(NODE_NULL); } + + inline bool is_null(void) const { return NODE_NULL == this->get_index(); } + + inline uint32_t get_index(void) const { + TOKU_DRD_IGNORE_VAR(m_bitfield); + const uint32_t bits = m_bitfield; + TOKU_DRD_STOP_IGNORING_VAR(m_bitfield); + return bits & MASK_INDEX; + } + + inline void set_index(uint32_t index) { + paranoid_invariant(index < NODE_NULL); + this->set_index_internal(index); + } + + inline bool get_bit(void) const { + TOKU_DRD_IGNORE_VAR(m_bitfield); + const uint32_t bits = m_bitfield; + TOKU_DRD_STOP_IGNORING_VAR(m_bitfield); + return (bits & MASK_BIT) != 0; + } + + inline void enable_bit(void) { + // These bits may be set by a thread with a write lock on some + // leaf, and the index can be read by another thread with a (read + // or write) lock on another thread. Also, the has_marks_below + // bit can be set by two threads simultaneously. Neither of these + // are real races, so if we are using DRD we should tell it to + // ignore these bits just while we set this bit. If there were a + // race in setting the index, that would be a real race. + TOKU_DRD_IGNORE_VAR(m_bitfield); + m_bitfield |= MASK_BIT; + TOKU_DRD_STOP_IGNORING_VAR(m_bitfield); + } + + inline void disable_bit(void) { m_bitfield &= MASK_INDEX; } +} __attribute__((__packed__)); + +template <typename omtdata_t, bool subtree_supports_marks> +class omt_node_templated { + public: + omtdata_t value; + uint32_t weight; + subtree_templated<subtree_supports_marks> left; + subtree_templated<subtree_supports_marks> right; + + // this needs to be in both implementations because we don't have + // a "static if" the caller can use + inline void clear_stolen_bits(void) {} +}; // note: originally this class had __attribute__((__packed__, aligned(4))) + +template <typename omtdata_t> +class omt_node_templated<omtdata_t, true> { + public: + omtdata_t value; + uint32_t weight; + subtree_templated<true> left; + subtree_templated<true> right; + inline bool get_marked(void) const { return left.get_bit(); } + inline void set_marked_bit(void) { return left.enable_bit(); } + inline void unset_marked_bit(void) { return left.disable_bit(); } + + inline bool get_marks_below(void) const { return right.get_bit(); } + inline void set_marks_below_bit(void) { + // This function can be called by multiple threads. + // Checking first reduces cache invalidation. + if (!this->get_marks_below()) { + right.enable_bit(); + } + } + inline void unset_marks_below_bit(void) { right.disable_bit(); } + + inline void clear_stolen_bits(void) { + this->unset_marked_bit(); + this->unset_marks_below_bit(); + } +}; // note: originally this class had __attribute__((__packed__, aligned(4))) + +} // namespace omt_internal + +template <typename omtdata_t, typename omtdataout_t = omtdata_t, + bool supports_marks = false> +class omt { + public: + /** + * Effect: Create an empty OMT. + * Performance: constant time. + */ + void create(void); + + /** + * Effect: Create an empty OMT with no internal allocated space. + * Performance: constant time. + * Rationale: In some cases we need a valid omt but don't want to malloc. + */ + void create_no_array(void); + + /** + * Effect: Create a OMT containing values. The number of values is in + * numvalues. Stores the new OMT in *omtp. Requires: this has not been created + * yet Requires: values != NULL Requires: values is sorted Performance: + * time=O(numvalues) Rationale: Normally to insert N values takes O(N lg N) + * amortized time. If the N values are known in advance, are sorted, and the + * structure is empty, we can batch insert them much faster. + */ + __attribute__((nonnull)) void create_from_sorted_array( + const omtdata_t *const values, const uint32_t numvalues); + + /** + * Effect: Create an OMT containing values. The number of values is in + * numvalues. On success the OMT takes ownership of *values array, and sets + * values=NULL. Requires: this has not been created yet Requires: values != + * NULL Requires: *values is sorted Requires: *values was allocated with + * toku_malloc Requires: Capacity of the *values array is <= new_capacity + * Requires: On success, *values may not be accessed again by the caller. + * Performance: time=O(1) + * Rational: create_from_sorted_array takes O(numvalues) time. + * By taking ownership of the array, we save a malloc and + * memcpy, and possibly a free (if the caller is done with the array). + */ + void create_steal_sorted_array(omtdata_t **const values, + const uint32_t numvalues, + const uint32_t new_capacity); + + /** + * Effect: Create a new OMT, storing it in *newomt. + * The values to the right of index (starting at index) are moved to *newomt. + * Requires: newomt != NULL + * Returns + * 0 success, + * EINVAL if index > toku_omt_size(omt) + * On nonzero return, omt and *newomt are unmodified. + * Performance: time=O(n) + * Rationale: We don't need a split-evenly operation. We need to split items + * so that their total sizes are even, and other similar splitting criteria. + * It's easy to split evenly by calling size(), and dividing by two. + */ + __attribute__((nonnull)) int split_at(omt *const newomt, const uint32_t idx); + + /** + * Effect: Appends leftomt and rightomt to produce a new omt. + * Creates this as the new omt. + * leftomt and rightomt are destroyed. + * Performance: time=O(n) is acceptable, but one can imagine implementations + * that are O(\log n) worst-case. + */ + __attribute__((nonnull)) void merge(omt *const leftomt, omt *const rightomt); + + /** + * Effect: Creates a copy of an omt. + * Creates this as the clone. + * Each element is copied directly. If they are pointers, the underlying + * data is not duplicated. Performance: O(n) or the running time of + * fill_array_with_subtree_values() + */ + void clone(const omt &src); + + /** + * Effect: Set the tree to be empty. + * Note: Will not reallocate or resize any memory. + * Performance: time=O(1) + */ + void clear(void); + + /** + * Effect: Destroy an OMT, freeing all its memory. + * If the values being stored are pointers, their underlying data is not + * freed. See free_items() Those values may be freed before or after calling + * toku_omt_destroy. Rationale: Returns no values since free() cannot fail. + * Rationale: Does not free the underlying pointers to reduce complexity. + * Performance: time=O(1) + */ + void destroy(void); + + /** + * Effect: return |this|. + * Performance: time=O(1) + */ + uint32_t size(void) const; + + /** + * Effect: Insert value into the OMT. + * If there is some i such that $h(V_i, v)=0$ then returns DB_KEYEXIST. + * Otherwise, let i be the minimum value such that $h(V_i, v)>0$. + * If no such i exists, then let i be |V| + * Then this has the same effect as + * insert_at(tree, value, i); + * If idx!=NULL then i is stored in *idx + * Requires: The signum of h must be monotonically increasing. + * Returns: + * 0 success + * DB_KEYEXIST the key is present (h was equal to zero for some value) + * On nonzero return, omt is unchanged. + * Performance: time=O(\log N) amortized. + * Rationale: Some future implementation may be O(\log N) worst-case time, but + * O(\log N) amortized is good enough for now. + */ + template <typename omtcmp_t, int (*h)(const omtdata_t &, const omtcmp_t &)> + int insert(const omtdata_t &value, const omtcmp_t &v, uint32_t *const idx); + + /** + * Effect: Increases indexes of all items at slot >= idx by 1. + * Insert value into the position at idx. + * Returns: + * 0 success + * EINVAL if idx > this->size() + * On error, omt is unchanged. + * Performance: time=O(\log N) amortized time. + * Rationale: Some future implementation may be O(\log N) worst-case time, but + * O(\log N) amortized is good enough for now. + */ + int insert_at(const omtdata_t &value, const uint32_t idx); + + /** + * Effect: Replaces the item at idx with value. + * Returns: + * 0 success + * EINVAL if idx>=this->size() + * On error, omt is unchanged. + * Performance: time=O(\log N) + * Rationale: The FT needs to be able to replace a value with another copy of + * the same value (allocated in a different location) + * + */ + int set_at(const omtdata_t &value, const uint32_t idx); + + /** + * Effect: Delete the item in slot idx. + * Decreases indexes of all items at slot > idx by 1. + * Returns + * 0 success + * EINVAL if idx>=this->size() + * On error, omt is unchanged. + * Rationale: To delete an item, first find its index using find or find_zero, + * then delete it. Performance: time=O(\log N) amortized. + */ + int delete_at(const uint32_t idx); + + /** + * Effect: Iterate over the values of the omt, from left to right, calling f + * on each value. The first argument passed to f is a ref-to-const of the + * value stored in the omt. The second argument passed to f is the index of + * the value. The third argument passed to f is iterate_extra. The indices run + * from 0 (inclusive) to this->size() (exclusive). Requires: f != NULL + * Returns: + * If f ever returns nonzero, then the iteration stops, and the value + * returned by f is returned by iterate. If f always returns zero, then + * iterate returns 0. Requires: Don't modify the omt while running. (E.g., f + * may not insert or delete values from the omt.) Performance: time=O(i+\log + * N) where i is the number of times f is called, and N is the number of + * elements in the omt. Rationale: Although the functional iterator requires + * defining another function (as opposed to C++ style iterator), it is much + * easier to read. Rationale: We may at some point use functors, but for now + * this is a smaller change from the old OMT. + */ + template <typename iterate_extra_t, + int (*f)(const omtdata_t &, const uint32_t, iterate_extra_t *const)> + int iterate(iterate_extra_t *const iterate_extra) const; + + /** + * Effect: Iterate over the values of the omt, from left to right, calling f + * on each value. The first argument passed to f is a ref-to-const of the + * value stored in the omt. The second argument passed to f is the index of + * the value. The third argument passed to f is iterate_extra. The indices run + * from 0 (inclusive) to this->size() (exclusive). We will iterate only over + * [left,right) + * + * Requires: left <= right + * Requires: f != NULL + * Returns: + * EINVAL if right > this->size() + * If f ever returns nonzero, then the iteration stops, and the value + * returned by f is returned by iterate_on_range. If f always returns zero, + * then iterate_on_range returns 0. Requires: Don't modify the omt while + * running. (E.g., f may not insert or delete values from the omt.) + * Performance: time=O(i+\log N) where i is the number of times f is called, + * and N is the number of elements in the omt. Rational: Although the + * functional iterator requires defining another function (as opposed to C++ + * style iterator), it is much easier to read. + */ + template <typename iterate_extra_t, + int (*f)(const omtdata_t &, const uint32_t, iterate_extra_t *const)> + int iterate_on_range(const uint32_t left, const uint32_t right, + iterate_extra_t *const iterate_extra) const; + + /** + * Effect: Iterate over the values of the omt, and mark the nodes that are + * visited. Other than the marks, this behaves the same as iterate_on_range. + * Requires: supports_marks == true + * Performance: time=O(i+\log N) where i is the number of times f is called, + * and N is the number of elements in the omt. Notes: This function MAY be + * called concurrently by multiple threads, but not concurrently with any + * other non-const function. + */ + template <typename iterate_extra_t, + int (*f)(const omtdata_t &, const uint32_t, iterate_extra_t *const)> + int iterate_and_mark_range(const uint32_t left, const uint32_t right, + iterate_extra_t *const iterate_extra); + + /** + * Effect: Iterate over the values of the omt, from left to right, calling f + * on each value whose node has been marked. Other than the marks, this + * behaves the same as iterate. Requires: supports_marks == true Performance: + * time=O(i+\log N) where i is the number of times f is called, and N is the + * number of elements in the omt. + */ + template <typename iterate_extra_t, + int (*f)(const omtdata_t &, const uint32_t, iterate_extra_t *const)> + int iterate_over_marked(iterate_extra_t *const iterate_extra) const; + + /** + * Effect: Delete all elements from the omt, whose nodes have been marked. + * Requires: supports_marks == true + * Performance: time=O(N + i\log N) where i is the number of marked elements, + * {c,sh}ould be faster + */ + void delete_all_marked(void); + + /** + * Effect: Verify that the internal state of the marks in the tree are + * self-consistent. Crashes the system if the marks are in a bad state. + * Requires: supports_marks == true + * Performance: time=O(N) + * Notes: + * Even though this is a const function, it requires exclusive access. + * Rationale: + * The current implementation of the marks relies on a sort of + * "cache" bit representing the state of bits below it in the tree. + * This allows glass-box testing that these bits are correct. + */ + void verify_marks_consistent(void) const; + + /** + * Effect: None + * Returns whether there are any marks in the tree. + */ + bool has_marks(void) const; + + /** + * Effect: Iterate over the values of the omt, from left to right, calling f + * on each value. The first argument passed to f is a pointer to the value + * stored in the omt. The second argument passed to f is the index of the + * value. The third argument passed to f is iterate_extra. The indices run + * from 0 (inclusive) to this->size() (exclusive). Requires: same as for + * iterate() Returns: same as for iterate() Performance: same as for iterate() + * Rationale: In general, most iterators should use iterate() since they + * should not modify the data stored in the omt. This function is for + * iterators which need to modify values (for example, free_items). Rationale: + * We assume if you are transforming the data in place, you want to do it to + * everything at once, so there is not yet an iterate_on_range_ptr (but there + * could be). + */ + template <typename iterate_extra_t, + int (*f)(omtdata_t *, const uint32_t, iterate_extra_t *const)> + void iterate_ptr(iterate_extra_t *const iterate_extra); + + /** + * Effect: Set *value=V_idx + * Returns + * 0 success + * EINVAL if index>=toku_omt_size(omt) + * On nonzero return, *value is unchanged + * Performance: time=O(\log N) + */ + int fetch(const uint32_t idx, omtdataout_t *const value) const; + + /** + * Effect: Find the smallest i such that h(V_i, extra)>=0 + * If there is such an i and h(V_i,extra)==0 then set *idxp=i, set *value = + * V_i, and return 0. If there is such an i and h(V_i,extra)>0 then set + * *idxp=i and return DB_NOTFOUND. If there is no such i then set + * *idx=this->size() and return DB_NOTFOUND. Note: value is of type + * omtdataout_t, which may be of type (omtdata_t) or (omtdata_t *) but is + * fixed by the instantiation. If it is the value type, then the value is + * copied out (even if the value type is a pointer to something else) If it is + * the pointer type, then *value is set to a pointer to the data within the + * omt. This is determined by the type of the omt as initially declared. If + * the omt is declared as omt<foo_t>, then foo_t's will be stored and foo_t's + * will be returned by find and related functions. If the omt is declared as + * omt<foo_t, foo_t *>, then foo_t's will be stored, and pointers to the + * stored items will be returned by find and related functions. Rationale: + * Structs too small for malloc should be stored directly in the omt. + * These structs may need to be edited as they exist inside the omt, so we + * need a way to get a pointer within the omt. Using separate functions for + * returning pointers and values increases code duplication and reduces + * type-checking. That also reduces the ability of the creator of a data + * structure to give advice to its future users. Slight overloading in this + * case seemed to provide a better API and better type checking. + */ + template <typename omtcmp_t, int (*h)(const omtdata_t &, const omtcmp_t &)> + int find_zero(const omtcmp_t &extra, omtdataout_t *const value, + uint32_t *const idxp) const; + + /** + * Effect: + * If direction >0 then find the smallest i such that h(V_i,extra)>0. + * If direction <0 then find the largest i such that h(V_i,extra)<0. + * (Direction may not be equal to zero.) + * If value!=NULL then store V_i in *value + * If idxp!=NULL then store i in *idxp. + * Requires: The signum of h is monotically increasing. + * Returns + * 0 success + * DB_NOTFOUND no such value is found. + * On nonzero return, *value and *idxp are unchanged + * Performance: time=O(\log N) + * Rationale: + * Here's how to use the find function to find various things + * Cases for find: + * find first value: ( h(v)=+1, direction=+1 ) + * find last value ( h(v)=-1, direction=-1 ) + * find first X ( h(v)=(v< x) ? -1 : 1 direction=+1 ) + * find last X ( h(v)=(v<=x) ? -1 : 1 direction=-1 ) + * find X or successor to X ( same as find first X. ) + * + * Rationale: To help understand heaviside functions and behavor of find: + * There are 7 kinds of heaviside functions. + * The signus of the h must be monotonically increasing. + * Given a function of the following form, A is the element + * returned for direction>0, B is the element returned + * for direction<0, C is the element returned for + * direction==0 (see find_zero) (with a return of 0), and D is the element + * returned for direction==0 (see find_zero) with a return of DB_NOTFOUND. + * If any of A, B, or C are not found, then asking for the + * associated direction will return DB_NOTFOUND. + * See find_zero for more information. + * + * Let the following represent the signus of the heaviside function. + * + * -...- + * A + * D + * + * +...+ + * B + * D + * + * 0...0 + * C + * + * -...-0...0 + * AC + * + * 0...0+...+ + * C B + * + * -...-+...+ + * AB + * D + * + * -...-0...0+...+ + * AC B + */ + template <typename omtcmp_t, int (*h)(const omtdata_t &, const omtcmp_t &)> + int find(const omtcmp_t &extra, int direction, omtdataout_t *const value, + uint32_t *const idxp) const; + + /** + * Effect: Return the size (in bytes) of the omt, as it resides in main + * memory. If the data stored are pointers, don't include the size of what + * they all point to. + */ + size_t memory_size(void); + + private: + typedef uint32_t node_idx; + typedef omt_internal::subtree_templated<supports_marks> subtree; + typedef omt_internal::omt_node_templated<omtdata_t, supports_marks> omt_node; + ENSURE_POD(subtree); + + struct omt_array { + uint32_t start_idx; + uint32_t num_values; + omtdata_t *values; + }; + + struct omt_tree { + subtree root; + uint32_t free_idx; + omt_node *nodes; + }; + + bool is_array; + uint32_t capacity; + union { + struct omt_array a; + struct omt_tree t; + } d; + + __attribute__((nonnull)) void unmark(const subtree &subtree, + const uint32_t index, + GrowableArray<node_idx> *const indexes); + + void create_internal_no_array(const uint32_t new_capacity); + + void create_internal(const uint32_t new_capacity); + + uint32_t nweight(const subtree &subtree) const; + + node_idx node_malloc(void); + + void node_free(const node_idx idx); + + void maybe_resize_array(const uint32_t n); + + __attribute__((nonnull)) void fill_array_with_subtree_values( + omtdata_t *const array, const subtree &subtree) const; + + void convert_to_array(void); + + __attribute__((nonnull)) void rebuild_from_sorted_array( + subtree *const subtree, const omtdata_t *const values, + const uint32_t numvalues); + + void convert_to_tree(void); + + void maybe_resize_or_convert(const uint32_t n); + + bool will_need_rebalance(const subtree &subtree, const int leftmod, + const int rightmod) const; + + __attribute__((nonnull)) void insert_internal( + subtree *const subtreep, const omtdata_t &value, const uint32_t idx, + subtree **const rebalance_subtree); + + void set_at_internal_array(const omtdata_t &value, const uint32_t idx); + + void set_at_internal(const subtree &subtree, const omtdata_t &value, + const uint32_t idx); + + void delete_internal(subtree *const subtreep, const uint32_t idx, + omt_node *const copyn, + subtree **const rebalance_subtree); + + template <typename iterate_extra_t, + int (*f)(const omtdata_t &, const uint32_t, iterate_extra_t *const)> + int iterate_internal_array(const uint32_t left, const uint32_t right, + iterate_extra_t *const iterate_extra) const; + + template <typename iterate_extra_t, + int (*f)(omtdata_t *, const uint32_t, iterate_extra_t *const)> + void iterate_ptr_internal(const uint32_t left, const uint32_t right, + const subtree &subtree, const uint32_t idx, + iterate_extra_t *const iterate_extra); + + template <typename iterate_extra_t, + int (*f)(omtdata_t *, const uint32_t, iterate_extra_t *const)> + void iterate_ptr_internal_array(const uint32_t left, const uint32_t right, + iterate_extra_t *const iterate_extra); + + template <typename iterate_extra_t, + int (*f)(const omtdata_t &, const uint32_t, iterate_extra_t *const)> + int iterate_internal(const uint32_t left, const uint32_t right, + const subtree &subtree, const uint32_t idx, + iterate_extra_t *const iterate_extra) const; + + template <typename iterate_extra_t, + int (*f)(const omtdata_t &, const uint32_t, iterate_extra_t *const)> + int iterate_and_mark_range_internal(const uint32_t left, const uint32_t right, + const subtree &subtree, + const uint32_t idx, + iterate_extra_t *const iterate_extra); + + template <typename iterate_extra_t, + int (*f)(const omtdata_t &, const uint32_t, iterate_extra_t *const)> + int iterate_over_marked_internal(const subtree &subtree, const uint32_t idx, + iterate_extra_t *const iterate_extra) const; + + uint32_t verify_marks_consistent_internal(const subtree &subtree, + const bool allow_marks) const; + + void fetch_internal_array(const uint32_t i, omtdataout_t *const value) const; + + void fetch_internal(const subtree &subtree, const uint32_t i, + omtdataout_t *const value) const; + + __attribute__((nonnull)) void fill_array_with_subtree_idxs( + node_idx *const array, const subtree &subtree) const; + + __attribute__((nonnull)) void rebuild_subtree_from_idxs( + subtree *const subtree, const node_idx *const idxs, + const uint32_t numvalues); + + __attribute__((nonnull)) void rebalance(subtree *const subtree); + + __attribute__((nonnull)) static void copyout(omtdata_t *const out, + const omt_node *const n); + + __attribute__((nonnull)) static void copyout(omtdata_t **const out, + omt_node *const n); + + __attribute__((nonnull)) static void copyout( + omtdata_t *const out, const omtdata_t *const stored_value_ptr); + + __attribute__((nonnull)) static void copyout( + omtdata_t **const out, omtdata_t *const stored_value_ptr); + + template <typename omtcmp_t, int (*h)(const omtdata_t &, const omtcmp_t &)> + int find_internal_zero_array(const omtcmp_t &extra, omtdataout_t *const value, + uint32_t *const idxp) const; + + template <typename omtcmp_t, int (*h)(const omtdata_t &, const omtcmp_t &)> + int find_internal_zero(const subtree &subtree, const omtcmp_t &extra, + omtdataout_t *const value, uint32_t *const idxp) const; + + template <typename omtcmp_t, int (*h)(const omtdata_t &, const omtcmp_t &)> + int find_internal_plus_array(const omtcmp_t &extra, omtdataout_t *const value, + uint32_t *const idxp) const; + + template <typename omtcmp_t, int (*h)(const omtdata_t &, const omtcmp_t &)> + int find_internal_plus(const subtree &subtree, const omtcmp_t &extra, + omtdataout_t *const value, uint32_t *const idxp) const; + + template <typename omtcmp_t, int (*h)(const omtdata_t &, const omtcmp_t &)> + int find_internal_minus_array(const omtcmp_t &extra, + omtdataout_t *const value, + uint32_t *const idxp) const; + + template <typename omtcmp_t, int (*h)(const omtdata_t &, const omtcmp_t &)> + int find_internal_minus(const subtree &subtree, const omtcmp_t &extra, + omtdataout_t *const value, + uint32_t *const idxp) const; +}; + +} // namespace toku + +// include the implementation here +#include "omt_impl.h" diff --git a/src/rocksdb/utilities/transactions/lock/range/range_tree/lib/util/omt_impl.h b/src/rocksdb/utilities/transactions/lock/range/range_tree/lib/util/omt_impl.h new file mode 100644 index 000000000..e77986716 --- /dev/null +++ b/src/rocksdb/utilities/transactions/lock/range/range_tree/lib/util/omt_impl.h @@ -0,0 +1,1295 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: +#ident "$Id$" +/*====== +This file is part of PerconaFT. + + +Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. + + PerconaFT is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License, version 2, + as published by the Free Software Foundation. + + PerconaFT is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with PerconaFT. If not, see <http://www.gnu.org/licenses/>. + +---------------------------------------- + + PerconaFT is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License, version 3, + as published by the Free Software Foundation. + + PerconaFT is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with PerconaFT. If not, see <http://www.gnu.org/licenses/>. + +---------------------------------------- + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +======= */ + +#ident \ + "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved." + +#include <string.h> + +#include "../db.h" +#include "../portability/memory.h" + +namespace toku { + +template <typename omtdata_t, typename omtdataout_t, bool supports_marks> +void omt<omtdata_t, omtdataout_t, supports_marks>::create(void) { + this->create_internal(2); + if (supports_marks) { + this->convert_to_tree(); + } +} + +template <typename omtdata_t, typename omtdataout_t, bool supports_marks> +void omt<omtdata_t, omtdataout_t, supports_marks>::create_no_array(void) { + if (!supports_marks) { + this->create_internal_no_array(0); + } else { + this->is_array = false; + this->capacity = 0; + this->d.t.nodes = nullptr; + this->d.t.root.set_to_null(); + this->d.t.free_idx = 0; + } +} + +template <typename omtdata_t, typename omtdataout_t, bool supports_marks> +void omt<omtdata_t, omtdataout_t, supports_marks>::create_from_sorted_array( + const omtdata_t *const values, const uint32_t numvalues) { + this->create_internal(numvalues); + memcpy(this->d.a.values, values, numvalues * (sizeof values[0])); + this->d.a.num_values = numvalues; + if (supports_marks) { + this->convert_to_tree(); + } +} + +template <typename omtdata_t, typename omtdataout_t, bool supports_marks> +void omt<omtdata_t, omtdataout_t, supports_marks>::create_steal_sorted_array( + omtdata_t **const values, const uint32_t numvalues, + const uint32_t new_capacity) { + paranoid_invariant_notnull(values); + this->create_internal_no_array(new_capacity); + this->d.a.num_values = numvalues; + this->d.a.values = *values; + *values = nullptr; + if (supports_marks) { + this->convert_to_tree(); + } +} + +template <typename omtdata_t, typename omtdataout_t, bool supports_marks> +int omt<omtdata_t, omtdataout_t, supports_marks>::split_at(omt *const newomt, + const uint32_t idx) { + barf_if_marked(*this); + paranoid_invariant_notnull(newomt); + if (idx > this->size()) { + return EINVAL; + } + this->convert_to_array(); + const uint32_t newsize = this->size() - idx; + newomt->create_from_sorted_array(&this->d.a.values[this->d.a.start_idx + idx], + newsize); + this->d.a.num_values = idx; + this->maybe_resize_array(idx); + if (supports_marks) { + this->convert_to_tree(); + } + return 0; +} + +template <typename omtdata_t, typename omtdataout_t, bool supports_marks> +void omt<omtdata_t, omtdataout_t, supports_marks>::merge(omt *const leftomt, + omt *const rightomt) { + barf_if_marked(*this); + paranoid_invariant_notnull(leftomt); + paranoid_invariant_notnull(rightomt); + const uint32_t leftsize = leftomt->size(); + const uint32_t rightsize = rightomt->size(); + const uint32_t newsize = leftsize + rightsize; + + if (leftomt->is_array) { + if (leftomt->capacity - + (leftomt->d.a.start_idx + leftomt->d.a.num_values) >= + rightsize) { + this->create_steal_sorted_array( + &leftomt->d.a.values, leftomt->d.a.num_values, leftomt->capacity); + this->d.a.start_idx = leftomt->d.a.start_idx; + } else { + this->create_internal(newsize); + memcpy(&this->d.a.values[0], &leftomt->d.a.values[leftomt->d.a.start_idx], + leftomt->d.a.num_values * (sizeof this->d.a.values[0])); + } + } else { + this->create_internal(newsize); + leftomt->fill_array_with_subtree_values(&this->d.a.values[0], + leftomt->d.t.root); + } + leftomt->destroy(); + this->d.a.num_values = leftsize; + + if (rightomt->is_array) { + memcpy(&this->d.a.values[this->d.a.start_idx + this->d.a.num_values], + &rightomt->d.a.values[rightomt->d.a.start_idx], + rightomt->d.a.num_values * (sizeof this->d.a.values[0])); + } else { + rightomt->fill_array_with_subtree_values( + &this->d.a.values[this->d.a.start_idx + this->d.a.num_values], + rightomt->d.t.root); + } + rightomt->destroy(); + this->d.a.num_values += rightsize; + paranoid_invariant(this->size() == newsize); + if (supports_marks) { + this->convert_to_tree(); + } +} + +template <typename omtdata_t, typename omtdataout_t, bool supports_marks> +void omt<omtdata_t, omtdataout_t, supports_marks>::clone(const omt &src) { + barf_if_marked(*this); + this->create_internal(src.size()); + if (src.is_array) { + memcpy(&this->d.a.values[0], &src.d.a.values[src.d.a.start_idx], + src.d.a.num_values * (sizeof this->d.a.values[0])); + } else { + src.fill_array_with_subtree_values(&this->d.a.values[0], src.d.t.root); + } + this->d.a.num_values = src.size(); + if (supports_marks) { + this->convert_to_tree(); + } +} + +template <typename omtdata_t, typename omtdataout_t, bool supports_marks> +void omt<omtdata_t, omtdataout_t, supports_marks>::clear(void) { + if (this->is_array) { + this->d.a.start_idx = 0; + this->d.a.num_values = 0; + } else { + this->d.t.root.set_to_null(); + this->d.t.free_idx = 0; + } +} + +template <typename omtdata_t, typename omtdataout_t, bool supports_marks> +void omt<omtdata_t, omtdataout_t, supports_marks>::destroy(void) { + this->clear(); + this->capacity = 0; + if (this->is_array) { + if (this->d.a.values != nullptr) { + toku_free(this->d.a.values); + } + this->d.a.values = nullptr; + } else { + if (this->d.t.nodes != nullptr) { + toku_free(this->d.t.nodes); + } + this->d.t.nodes = nullptr; + } +} + +template <typename omtdata_t, typename omtdataout_t, bool supports_marks> +uint32_t omt<omtdata_t, omtdataout_t, supports_marks>::size(void) const { + if (this->is_array) { + return this->d.a.num_values; + } else { + return this->nweight(this->d.t.root); + } +} + +template <typename omtdata_t, typename omtdataout_t, bool supports_marks> +template <typename omtcmp_t, int (*h)(const omtdata_t &, const omtcmp_t &)> +int omt<omtdata_t, omtdataout_t, supports_marks>::insert(const omtdata_t &value, + const omtcmp_t &v, + uint32_t *const idx) { + int r; + uint32_t insert_idx; + + r = this->find_zero<omtcmp_t, h>(v, nullptr, &insert_idx); + if (r == 0) { + if (idx) *idx = insert_idx; + return DB_KEYEXIST; + } + if (r != DB_NOTFOUND) return r; + + if ((r = this->insert_at(value, insert_idx))) return r; + if (idx) *idx = insert_idx; + + return 0; +} + +// The following 3 functions implement a static if for us. +template <typename omtdata_t, typename omtdataout_t> +static void barf_if_marked(const omt<omtdata_t, omtdataout_t, false> &UU(omt)) { +} + +template <typename omtdata_t, typename omtdataout_t> +static void barf_if_marked(const omt<omtdata_t, omtdataout_t, true> &omt) { + invariant(!omt.has_marks()); +} + +template <typename omtdata_t, typename omtdataout_t, bool supports_marks> +bool omt<omtdata_t, omtdataout_t, supports_marks>::has_marks(void) const { + static_assert(supports_marks, "Does not support marks"); + if (this->d.t.root.is_null()) { + return false; + } + const omt_node &node = this->d.t.nodes[this->d.t.root.get_index()]; + return node.get_marks_below() || node.get_marked(); +} + +template <typename omtdata_t, typename omtdataout_t, bool supports_marks> +int omt<omtdata_t, omtdataout_t, supports_marks>::insert_at( + const omtdata_t &value, const uint32_t idx) { + barf_if_marked(*this); + if (idx > this->size()) { + return EINVAL; + } + + this->maybe_resize_or_convert(this->size() + 1); + if (this->is_array && idx != this->d.a.num_values && + (idx != 0 || this->d.a.start_idx == 0)) { + this->convert_to_tree(); + } + if (this->is_array) { + if (idx == this->d.a.num_values) { + this->d.a.values[this->d.a.start_idx + this->d.a.num_values] = value; + } else { + this->d.a.values[--this->d.a.start_idx] = value; + } + this->d.a.num_values++; + } else { + subtree *rebalance_subtree = nullptr; + this->insert_internal(&this->d.t.root, value, idx, &rebalance_subtree); + if (rebalance_subtree != nullptr) { + this->rebalance(rebalance_subtree); + } + } + return 0; +} + +template <typename omtdata_t, typename omtdataout_t, bool supports_marks> +int omt<omtdata_t, omtdataout_t, supports_marks>::set_at(const omtdata_t &value, + const uint32_t idx) { + barf_if_marked(*this); + if (idx >= this->size()) { + return EINVAL; + } + + if (this->is_array) { + this->set_at_internal_array(value, idx); + } else { + this->set_at_internal(this->d.t.root, value, idx); + } + return 0; +} + +template <typename omtdata_t, typename omtdataout_t, bool supports_marks> +int omt<omtdata_t, omtdataout_t, supports_marks>::delete_at( + const uint32_t idx) { + barf_if_marked(*this); + if (idx >= this->size()) { + return EINVAL; + } + + this->maybe_resize_or_convert(this->size() - 1); + if (this->is_array && idx != 0 && idx != this->d.a.num_values - 1) { + this->convert_to_tree(); + } + if (this->is_array) { + // Testing for 0 does not rule out it being the last entry. + // Test explicitly for num_values-1 + if (idx != this->d.a.num_values - 1) { + this->d.a.start_idx++; + } + this->d.a.num_values--; + } else { + subtree *rebalance_subtree = nullptr; + this->delete_internal(&this->d.t.root, idx, nullptr, &rebalance_subtree); + if (rebalance_subtree != nullptr) { + this->rebalance(rebalance_subtree); + } + } + return 0; +} + +template <typename omtdata_t, typename omtdataout_t, bool supports_marks> +template <typename iterate_extra_t, + int (*f)(const omtdata_t &, const uint32_t, iterate_extra_t *const)> +int omt<omtdata_t, omtdataout_t, supports_marks>::iterate( + iterate_extra_t *const iterate_extra) const { + return this->iterate_on_range<iterate_extra_t, f>(0, this->size(), + iterate_extra); +} + +template <typename omtdata_t, typename omtdataout_t, bool supports_marks> +template <typename iterate_extra_t, + int (*f)(const omtdata_t &, const uint32_t, iterate_extra_t *const)> +int omt<omtdata_t, omtdataout_t, supports_marks>::iterate_on_range( + const uint32_t left, const uint32_t right, + iterate_extra_t *const iterate_extra) const { + if (right > this->size()) { + return EINVAL; + } + if (left == right) { + return 0; + } + if (this->is_array) { + return this->iterate_internal_array<iterate_extra_t, f>(left, right, + iterate_extra); + } + return this->iterate_internal<iterate_extra_t, f>(left, right, this->d.t.root, + 0, iterate_extra); +} + +template <typename omtdata_t, typename omtdataout_t, bool supports_marks> +template <typename iterate_extra_t, + int (*f)(const omtdata_t &, const uint32_t, iterate_extra_t *const)> +int omt<omtdata_t, omtdataout_t, supports_marks>::iterate_and_mark_range( + const uint32_t left, const uint32_t right, + iterate_extra_t *const iterate_extra) { + static_assert(supports_marks, "does not support marks"); + if (right > this->size()) { + return EINVAL; + } + if (left == right) { + return 0; + } + paranoid_invariant(!this->is_array); + return this->iterate_and_mark_range_internal<iterate_extra_t, f>( + left, right, this->d.t.root, 0, iterate_extra); +} + +// TODO: We can optimize this if we steal 3 bits. 1 bit: this node is +// marked. 1 bit: left subtree has marks. 1 bit: right subtree has marks. +template <typename omtdata_t, typename omtdataout_t, bool supports_marks> +template <typename iterate_extra_t, + int (*f)(const omtdata_t &, const uint32_t, iterate_extra_t *const)> +int omt<omtdata_t, omtdataout_t, supports_marks>::iterate_over_marked( + iterate_extra_t *const iterate_extra) const { + static_assert(supports_marks, "does not support marks"); + paranoid_invariant(!this->is_array); + return this->iterate_over_marked_internal<iterate_extra_t, f>( + this->d.t.root, 0, iterate_extra); +} + +template <typename omtdata_t, typename omtdataout_t, bool supports_marks> +void omt<omtdata_t, omtdataout_t, supports_marks>::unmark( + const subtree &st, const uint32_t index, + GrowableArray<node_idx> *const indexes) { + if (st.is_null()) { + return; + } + omt_node &n = this->d.t.nodes[st.get_index()]; + const uint32_t index_root = index + this->nweight(n.left); + + const bool below = n.get_marks_below(); + if (below) { + this->unmark(n.left, index, indexes); + } + if (n.get_marked()) { + indexes->push(index_root); + } + n.clear_stolen_bits(); + if (below) { + this->unmark(n.right, index_root + 1, indexes); + } +} + +template <typename omtdata_t, typename omtdataout_t, bool supports_marks> +void omt<omtdata_t, omtdataout_t, supports_marks>::delete_all_marked(void) { + static_assert(supports_marks, "does not support marks"); + if (!this->has_marks()) { + return; + } + paranoid_invariant(!this->is_array); + GrowableArray<node_idx> marked_indexes; + marked_indexes.init(); + + // Remove all marks. + // We need to delete all the stolen bits before calling delete_at to + // prevent barfing. + this->unmark(this->d.t.root, 0, &marked_indexes); + + for (uint32_t i = 0; i < marked_indexes.get_size(); i++) { + // Delete from left to right, shift by number already deleted. + // Alternative is delete from right to left. + int r = this->delete_at(marked_indexes.fetch_unchecked(i) - i); + lazy_assert_zero(r); + } + marked_indexes.deinit(); + barf_if_marked(*this); +} + +template <typename omtdata_t, typename omtdataout_t, bool supports_marks> +uint32_t +omt<omtdata_t, omtdataout_t, supports_marks>::verify_marks_consistent_internal( + const subtree &st, const bool UU(allow_marks)) const { + if (st.is_null()) { + return 0; + } + const omt_node &node = this->d.t.nodes[st.get_index()]; + uint32_t num_marks = + verify_marks_consistent_internal(node.left, node.get_marks_below()); + num_marks += + verify_marks_consistent_internal(node.right, node.get_marks_below()); + if (node.get_marks_below()) { + paranoid_invariant(allow_marks); + paranoid_invariant(num_marks > 0); + } else { + // redundant with invariant below, but nice to have explicitly + paranoid_invariant(num_marks == 0); + } + if (node.get_marked()) { + paranoid_invariant(allow_marks); + ++num_marks; + } + return num_marks; +} + +template <typename omtdata_t, typename omtdataout_t, bool supports_marks> +void omt<omtdata_t, omtdataout_t, supports_marks>::verify_marks_consistent( + void) const { + static_assert(supports_marks, "does not support marks"); + paranoid_invariant(!this->is_array); + this->verify_marks_consistent_internal(this->d.t.root, true); +} + +template <typename omtdata_t, typename omtdataout_t, bool supports_marks> +template <typename iterate_extra_t, + int (*f)(omtdata_t *, const uint32_t, iterate_extra_t *const)> +void omt<omtdata_t, omtdataout_t, supports_marks>::iterate_ptr( + iterate_extra_t *const iterate_extra) { + if (this->is_array) { + this->iterate_ptr_internal_array<iterate_extra_t, f>(0, this->size(), + iterate_extra); + } else { + this->iterate_ptr_internal<iterate_extra_t, f>( + 0, this->size(), this->d.t.root, 0, iterate_extra); + } +} + +template <typename omtdata_t, typename omtdataout_t, bool supports_marks> +int omt<omtdata_t, omtdataout_t, supports_marks>::fetch( + const uint32_t idx, omtdataout_t *const value) const { + if (idx >= this->size()) { + return EINVAL; + } + if (this->is_array) { + this->fetch_internal_array(idx, value); + } else { + this->fetch_internal(this->d.t.root, idx, value); + } + return 0; +} + +template <typename omtdata_t, typename omtdataout_t, bool supports_marks> +template <typename omtcmp_t, int (*h)(const omtdata_t &, const omtcmp_t &)> +int omt<omtdata_t, omtdataout_t, supports_marks>::find_zero( + const omtcmp_t &extra, omtdataout_t *const value, + uint32_t *const idxp) const { + uint32_t tmp_index; + uint32_t *const child_idxp = (idxp != nullptr) ? idxp : &tmp_index; + int r; + if (this->is_array) { + r = this->find_internal_zero_array<omtcmp_t, h>(extra, value, child_idxp); + } else { + r = this->find_internal_zero<omtcmp_t, h>(this->d.t.root, extra, value, + child_idxp); + } + return r; +} + +template <typename omtdata_t, typename omtdataout_t, bool supports_marks> +template <typename omtcmp_t, int (*h)(const omtdata_t &, const omtcmp_t &)> +int omt<omtdata_t, omtdataout_t, supports_marks>::find( + const omtcmp_t &extra, int direction, omtdataout_t *const value, + uint32_t *const idxp) const { + uint32_t tmp_index; + uint32_t *const child_idxp = (idxp != nullptr) ? idxp : &tmp_index; + paranoid_invariant(direction != 0); + if (direction < 0) { + if (this->is_array) { + return this->find_internal_minus_array<omtcmp_t, h>(extra, value, + child_idxp); + } else { + return this->find_internal_minus<omtcmp_t, h>(this->d.t.root, extra, + value, child_idxp); + } + } else { + if (this->is_array) { + return this->find_internal_plus_array<omtcmp_t, h>(extra, value, + child_idxp); + } else { + return this->find_internal_plus<omtcmp_t, h>(this->d.t.root, extra, value, + child_idxp); + } + } +} + +template <typename omtdata_t, typename omtdataout_t, bool supports_marks> +size_t omt<omtdata_t, omtdataout_t, supports_marks>::memory_size(void) { + if (this->is_array) { + return (sizeof *this) + this->capacity * (sizeof this->d.a.values[0]); + } + return (sizeof *this) + this->capacity * (sizeof this->d.t.nodes[0]); +} + +template <typename omtdata_t, typename omtdataout_t, bool supports_marks> +void omt<omtdata_t, omtdataout_t, supports_marks>::create_internal_no_array( + const uint32_t new_capacity) { + this->is_array = true; + this->d.a.start_idx = 0; + this->d.a.num_values = 0; + this->d.a.values = nullptr; + this->capacity = new_capacity; +} + +template <typename omtdata_t, typename omtdataout_t, bool supports_marks> +void omt<omtdata_t, omtdataout_t, supports_marks>::create_internal( + const uint32_t new_capacity) { + this->create_internal_no_array(new_capacity); + XMALLOC_N(this->capacity, this->d.a.values); +} + +template <typename omtdata_t, typename omtdataout_t, bool supports_marks> +uint32_t omt<omtdata_t, omtdataout_t, supports_marks>::nweight( + const subtree &st) const { + if (st.is_null()) { + return 0; + } else { + return this->d.t.nodes[st.get_index()].weight; + } +} + +template <typename omtdata_t, typename omtdataout_t, bool supports_marks> +typename omt<omtdata_t, omtdataout_t, supports_marks>::node_idx +omt<omtdata_t, omtdataout_t, supports_marks>::node_malloc(void) { + paranoid_invariant(this->d.t.free_idx < this->capacity); + omt_node &n = this->d.t.nodes[this->d.t.free_idx]; + n.clear_stolen_bits(); + return this->d.t.free_idx++; +} + +template <typename omtdata_t, typename omtdataout_t, bool supports_marks> +void omt<omtdata_t, omtdataout_t, supports_marks>::node_free( + const node_idx UU(idx)) { + paranoid_invariant(idx < this->capacity); +} + +template <typename omtdata_t, typename omtdataout_t, bool supports_marks> +void omt<omtdata_t, omtdataout_t, supports_marks>::maybe_resize_array( + const uint32_t n) { + const uint32_t new_size = n <= 2 ? 4 : 2 * n; + const uint32_t room = this->capacity - this->d.a.start_idx; + + if (room < n || this->capacity / 2 >= new_size) { + omtdata_t *XMALLOC_N(new_size, tmp_values); + if (this->d.a.num_values) { + memcpy(tmp_values, &this->d.a.values[this->d.a.start_idx], + this->d.a.num_values * (sizeof tmp_values[0])); + } + this->d.a.start_idx = 0; + this->capacity = new_size; + toku_free(this->d.a.values); + this->d.a.values = tmp_values; + } +} + +template <typename omtdata_t, typename omtdataout_t, bool supports_marks> +void omt<omtdata_t, omtdataout_t, + supports_marks>::fill_array_with_subtree_values(omtdata_t *const array, + const subtree &st) + const { + if (st.is_null()) return; + const omt_node &tree = this->d.t.nodes[st.get_index()]; + this->fill_array_with_subtree_values(&array[0], tree.left); + array[this->nweight(tree.left)] = tree.value; + this->fill_array_with_subtree_values(&array[this->nweight(tree.left) + 1], + tree.right); +} + +template <typename omtdata_t, typename omtdataout_t, bool supports_marks> +void omt<omtdata_t, omtdataout_t, supports_marks>::convert_to_array(void) { + if (!this->is_array) { + const uint32_t num_values = this->size(); + uint32_t new_size = 2 * num_values; + new_size = new_size < 4 ? 4 : new_size; + + omtdata_t *XMALLOC_N(new_size, tmp_values); + this->fill_array_with_subtree_values(tmp_values, this->d.t.root); + toku_free(this->d.t.nodes); + this->is_array = true; + this->capacity = new_size; + this->d.a.num_values = num_values; + this->d.a.values = tmp_values; + this->d.a.start_idx = 0; + } +} + +template <typename omtdata_t, typename omtdataout_t, bool supports_marks> +void omt<omtdata_t, omtdataout_t, supports_marks>::rebuild_from_sorted_array( + subtree *const st, const omtdata_t *const values, + const uint32_t numvalues) { + if (numvalues == 0) { + st->set_to_null(); + } else { + const uint32_t halfway = numvalues / 2; + const node_idx newidx = this->node_malloc(); + omt_node *const newnode = &this->d.t.nodes[newidx]; + newnode->weight = numvalues; + newnode->value = values[halfway]; + st->set_index(newidx); + // update everything before the recursive calls so the second call + // can be a tail call. + this->rebuild_from_sorted_array(&newnode->left, &values[0], halfway); + this->rebuild_from_sorted_array(&newnode->right, &values[halfway + 1], + numvalues - (halfway + 1)); + } +} + +template <typename omtdata_t, typename omtdataout_t, bool supports_marks> +void omt<omtdata_t, omtdataout_t, supports_marks>::convert_to_tree(void) { + if (this->is_array) { + const uint32_t num_nodes = this->size(); + uint32_t new_size = num_nodes * 2; + new_size = new_size < 4 ? 4 : new_size; + + omt_node *XMALLOC_N(new_size, new_nodes); + omtdata_t *const values = this->d.a.values; + omtdata_t *const tmp_values = &values[this->d.a.start_idx]; + this->is_array = false; + this->d.t.nodes = new_nodes; + this->capacity = new_size; + this->d.t.free_idx = 0; + this->d.t.root.set_to_null(); + this->rebuild_from_sorted_array(&this->d.t.root, tmp_values, num_nodes); + toku_free(values); + } +} + +template <typename omtdata_t, typename omtdataout_t, bool supports_marks> +void omt<omtdata_t, omtdataout_t, supports_marks>::maybe_resize_or_convert( + const uint32_t n) { + if (this->is_array) { + this->maybe_resize_array(n); + } else { + const uint32_t new_size = n <= 2 ? 4 : 2 * n; + const uint32_t num_nodes = this->nweight(this->d.t.root); + if ((this->capacity / 2 >= new_size) || + (this->d.t.free_idx >= this->capacity && num_nodes < n) || + (this->capacity < n)) { + this->convert_to_array(); + // if we had a free list, the "supports_marks" version could + // just resize, as it is now, we have to convert to and back + // from an array. + if (supports_marks) { + this->convert_to_tree(); + } + } + } +} + +template <typename omtdata_t, typename omtdataout_t, bool supports_marks> +bool omt<omtdata_t, omtdataout_t, supports_marks>::will_need_rebalance( + const subtree &st, const int leftmod, const int rightmod) const { + if (st.is_null()) { + return false; + } + const omt_node &n = this->d.t.nodes[st.get_index()]; + // one of the 1's is for the root. + // the other is to take ceil(n/2) + const uint32_t weight_left = this->nweight(n.left) + leftmod; + const uint32_t weight_right = this->nweight(n.right) + rightmod; + return ((1 + weight_left < (1 + 1 + weight_right) / 2) || + (1 + weight_right < (1 + 1 + weight_left) / 2)); +} + +template <typename omtdata_t, typename omtdataout_t, bool supports_marks> +void omt<omtdata_t, omtdataout_t, supports_marks>::insert_internal( + subtree *const subtreep, const omtdata_t &value, const uint32_t idx, + subtree **const rebalance_subtree) { + if (subtreep->is_null()) { + paranoid_invariant_zero(idx); + const node_idx newidx = this->node_malloc(); + omt_node *const newnode = &this->d.t.nodes[newidx]; + newnode->weight = 1; + newnode->left.set_to_null(); + newnode->right.set_to_null(); + newnode->value = value; + subtreep->set_index(newidx); + } else { + omt_node &n = this->d.t.nodes[subtreep->get_index()]; + n.weight++; + if (idx <= this->nweight(n.left)) { + if (*rebalance_subtree == nullptr && + this->will_need_rebalance(*subtreep, 1, 0)) { + *rebalance_subtree = subtreep; + } + this->insert_internal(&n.left, value, idx, rebalance_subtree); + } else { + if (*rebalance_subtree == nullptr && + this->will_need_rebalance(*subtreep, 0, 1)) { + *rebalance_subtree = subtreep; + } + const uint32_t sub_index = idx - this->nweight(n.left) - 1; + this->insert_internal(&n.right, value, sub_index, rebalance_subtree); + } + } +} + +template <typename omtdata_t, typename omtdataout_t, bool supports_marks> +void omt<omtdata_t, omtdataout_t, supports_marks>::set_at_internal_array( + const omtdata_t &value, const uint32_t idx) { + this->d.a.values[this->d.a.start_idx + idx] = value; +} + +template <typename omtdata_t, typename omtdataout_t, bool supports_marks> +void omt<omtdata_t, omtdataout_t, supports_marks>::set_at_internal( + const subtree &st, const omtdata_t &value, const uint32_t idx) { + paranoid_invariant(!st.is_null()); + omt_node &n = this->d.t.nodes[st.get_index()]; + const uint32_t leftweight = this->nweight(n.left); + if (idx < leftweight) { + this->set_at_internal(n.left, value, idx); + } else if (idx == leftweight) { + n.value = value; + } else { + this->set_at_internal(n.right, value, idx - leftweight - 1); + } +} + +template <typename omtdata_t, typename omtdataout_t, bool supports_marks> +void omt<omtdata_t, omtdataout_t, supports_marks>::delete_internal( + subtree *const subtreep, const uint32_t idx, omt_node *const copyn, + subtree **const rebalance_subtree) { + paranoid_invariant_notnull(subtreep); + paranoid_invariant_notnull(rebalance_subtree); + paranoid_invariant(!subtreep->is_null()); + omt_node &n = this->d.t.nodes[subtreep->get_index()]; + const uint32_t leftweight = this->nweight(n.left); + if (idx < leftweight) { + n.weight--; + if (*rebalance_subtree == nullptr && + this->will_need_rebalance(*subtreep, -1, 0)) { + *rebalance_subtree = subtreep; + } + this->delete_internal(&n.left, idx, copyn, rebalance_subtree); + } else if (idx == leftweight) { + if (n.left.is_null()) { + const uint32_t oldidx = subtreep->get_index(); + *subtreep = n.right; + if (copyn != nullptr) { + copyn->value = n.value; + } + this->node_free(oldidx); + } else if (n.right.is_null()) { + const uint32_t oldidx = subtreep->get_index(); + *subtreep = n.left; + if (copyn != nullptr) { + copyn->value = n.value; + } + this->node_free(oldidx); + } else { + if (*rebalance_subtree == nullptr && + this->will_need_rebalance(*subtreep, 0, -1)) { + *rebalance_subtree = subtreep; + } + // don't need to copy up value, it's only used by this + // next call, and when that gets to the bottom there + // won't be any more recursion + n.weight--; + this->delete_internal(&n.right, 0, &n, rebalance_subtree); + } + } else { + n.weight--; + if (*rebalance_subtree == nullptr && + this->will_need_rebalance(*subtreep, 0, -1)) { + *rebalance_subtree = subtreep; + } + this->delete_internal(&n.right, idx - leftweight - 1, copyn, + rebalance_subtree); + } +} + +template <typename omtdata_t, typename omtdataout_t, bool supports_marks> +template <typename iterate_extra_t, + int (*f)(const omtdata_t &, const uint32_t, iterate_extra_t *const)> +int omt<omtdata_t, omtdataout_t, supports_marks>::iterate_internal_array( + const uint32_t left, const uint32_t right, + iterate_extra_t *const iterate_extra) const { + int r; + for (uint32_t i = left; i < right; ++i) { + r = f(this->d.a.values[this->d.a.start_idx + i], i, iterate_extra); + if (r != 0) { + return r; + } + } + return 0; +} + +template <typename omtdata_t, typename omtdataout_t, bool supports_marks> +template <typename iterate_extra_t, + int (*f)(omtdata_t *, const uint32_t, iterate_extra_t *const)> +void omt<omtdata_t, omtdataout_t, supports_marks>::iterate_ptr_internal( + const uint32_t left, const uint32_t right, const subtree &st, + const uint32_t idx, iterate_extra_t *const iterate_extra) { + if (!st.is_null()) { + omt_node &n = this->d.t.nodes[st.get_index()]; + const uint32_t idx_root = idx + this->nweight(n.left); + if (left < idx_root) { + this->iterate_ptr_internal<iterate_extra_t, f>(left, right, n.left, idx, + iterate_extra); + } + if (left <= idx_root && idx_root < right) { + int r = f(&n.value, idx_root, iterate_extra); + lazy_assert_zero(r); + } + if (idx_root + 1 < right) { + this->iterate_ptr_internal<iterate_extra_t, f>( + left, right, n.right, idx_root + 1, iterate_extra); + } + } +} + +template <typename omtdata_t, typename omtdataout_t, bool supports_marks> +template <typename iterate_extra_t, + int (*f)(omtdata_t *, const uint32_t, iterate_extra_t *const)> +void omt<omtdata_t, omtdataout_t, supports_marks>::iterate_ptr_internal_array( + const uint32_t left, const uint32_t right, + iterate_extra_t *const iterate_extra) { + for (uint32_t i = left; i < right; ++i) { + int r = f(&this->d.a.values[this->d.a.start_idx + i], i, iterate_extra); + lazy_assert_zero(r); + } +} + +template <typename omtdata_t, typename omtdataout_t, bool supports_marks> +template <typename iterate_extra_t, + int (*f)(const omtdata_t &, const uint32_t, iterate_extra_t *const)> +int omt<omtdata_t, omtdataout_t, supports_marks>::iterate_internal( + const uint32_t left, const uint32_t right, const subtree &st, + const uint32_t idx, iterate_extra_t *const iterate_extra) const { + if (st.is_null()) { + return 0; + } + int r; + const omt_node &n = this->d.t.nodes[st.get_index()]; + const uint32_t idx_root = idx + this->nweight(n.left); + if (left < idx_root) { + r = this->iterate_internal<iterate_extra_t, f>(left, right, n.left, idx, + iterate_extra); + if (r != 0) { + return r; + } + } + if (left <= idx_root && idx_root < right) { + r = f(n.value, idx_root, iterate_extra); + if (r != 0) { + return r; + } + } + if (idx_root + 1 < right) { + return this->iterate_internal<iterate_extra_t, f>( + left, right, n.right, idx_root + 1, iterate_extra); + } + return 0; +} + +template <typename omtdata_t, typename omtdataout_t, bool supports_marks> +template <typename iterate_extra_t, + int (*f)(const omtdata_t &, const uint32_t, iterate_extra_t *const)> +int omt<omtdata_t, omtdataout_t, supports_marks>:: + iterate_and_mark_range_internal(const uint32_t left, const uint32_t right, + const subtree &st, const uint32_t idx, + iterate_extra_t *const iterate_extra) { + paranoid_invariant(!st.is_null()); + int r; + omt_node &n = this->d.t.nodes[st.get_index()]; + const uint32_t idx_root = idx + this->nweight(n.left); + if (left < idx_root && !n.left.is_null()) { + n.set_marks_below_bit(); + r = this->iterate_and_mark_range_internal<iterate_extra_t, f>( + left, right, n.left, idx, iterate_extra); + if (r != 0) { + return r; + } + } + if (left <= idx_root && idx_root < right) { + n.set_marked_bit(); + r = f(n.value, idx_root, iterate_extra); + if (r != 0) { + return r; + } + } + if (idx_root + 1 < right && !n.right.is_null()) { + n.set_marks_below_bit(); + return this->iterate_and_mark_range_internal<iterate_extra_t, f>( + left, right, n.right, idx_root + 1, iterate_extra); + } + return 0; +} + +template <typename omtdata_t, typename omtdataout_t, bool supports_marks> +template <typename iterate_extra_t, + int (*f)(const omtdata_t &, const uint32_t, iterate_extra_t *const)> +int omt<omtdata_t, omtdataout_t, supports_marks>::iterate_over_marked_internal( + const subtree &st, const uint32_t idx, + iterate_extra_t *const iterate_extra) const { + if (st.is_null()) { + return 0; + } + int r; + const omt_node &n = this->d.t.nodes[st.get_index()]; + const uint32_t idx_root = idx + this->nweight(n.left); + if (n.get_marks_below()) { + r = this->iterate_over_marked_internal<iterate_extra_t, f>(n.left, idx, + iterate_extra); + if (r != 0) { + return r; + } + } + if (n.get_marked()) { + r = f(n.value, idx_root, iterate_extra); + if (r != 0) { + return r; + } + } + if (n.get_marks_below()) { + return this->iterate_over_marked_internal<iterate_extra_t, f>( + n.right, idx_root + 1, iterate_extra); + } + return 0; +} + +template <typename omtdata_t, typename omtdataout_t, bool supports_marks> +void omt<omtdata_t, omtdataout_t, supports_marks>::fetch_internal_array( + const uint32_t i, omtdataout_t *const value) const { + if (value != nullptr) { + copyout(value, &this->d.a.values[this->d.a.start_idx + i]); + } +} + +template <typename omtdata_t, typename omtdataout_t, bool supports_marks> +void omt<omtdata_t, omtdataout_t, supports_marks>::fetch_internal( + const subtree &st, const uint32_t i, omtdataout_t *const value) const { + omt_node &n = this->d.t.nodes[st.get_index()]; + const uint32_t leftweight = this->nweight(n.left); + if (i < leftweight) { + this->fetch_internal(n.left, i, value); + } else if (i == leftweight) { + if (value != nullptr) { + copyout(value, &n); + } + } else { + this->fetch_internal(n.right, i - leftweight - 1, value); + } +} + +template <typename omtdata_t, typename omtdataout_t, bool supports_marks> +void omt<omtdata_t, omtdataout_t, supports_marks>::fill_array_with_subtree_idxs( + node_idx *const array, const subtree &st) const { + if (!st.is_null()) { + const omt_node &tree = this->d.t.nodes[st.get_index()]; + this->fill_array_with_subtree_idxs(&array[0], tree.left); + array[this->nweight(tree.left)] = st.get_index(); + this->fill_array_with_subtree_idxs(&array[this->nweight(tree.left) + 1], + tree.right); + } +} + +template <typename omtdata_t, typename omtdataout_t, bool supports_marks> +void omt<omtdata_t, omtdataout_t, supports_marks>::rebuild_subtree_from_idxs( + subtree *const st, const node_idx *const idxs, const uint32_t numvalues) { + if (numvalues == 0) { + st->set_to_null(); + } else { + uint32_t halfway = numvalues / 2; + st->set_index(idxs[halfway]); + // node_idx newidx = idxs[halfway]; + omt_node &newnode = this->d.t.nodes[st->get_index()]; + newnode.weight = numvalues; + // value is already in there. + this->rebuild_subtree_from_idxs(&newnode.left, &idxs[0], halfway); + this->rebuild_subtree_from_idxs(&newnode.right, &idxs[halfway + 1], + numvalues - (halfway + 1)); + // n_idx = newidx; + } +} + +template <typename omtdata_t, typename omtdataout_t, bool supports_marks> +void omt<omtdata_t, omtdataout_t, supports_marks>::rebalance( + subtree *const st) { + node_idx idx = st->get_index(); + if (idx == this->d.t.root.get_index()) { + // Try to convert to an array. + // If this fails, (malloc) nothing will have changed. + // In the failure case we continue on to the standard rebalance + // algorithm. + this->convert_to_array(); + if (supports_marks) { + this->convert_to_tree(); + } + } else { + const omt_node &n = this->d.t.nodes[idx]; + node_idx *tmp_array; + size_t mem_needed = n.weight * (sizeof tmp_array[0]); + size_t mem_free = + (this->capacity - this->d.t.free_idx) * (sizeof this->d.t.nodes[0]); + bool malloced; + if (mem_needed <= mem_free) { + // There is sufficient free space at the end of the nodes array + // to hold enough node indexes to rebalance. + malloced = false; + tmp_array = + reinterpret_cast<node_idx *>(&this->d.t.nodes[this->d.t.free_idx]); + } else { + malloced = true; + XMALLOC_N(n.weight, tmp_array); + } + this->fill_array_with_subtree_idxs(tmp_array, *st); + this->rebuild_subtree_from_idxs(st, tmp_array, n.weight); + if (malloced) toku_free(tmp_array); + } +} + +template <typename omtdata_t, typename omtdataout_t, bool supports_marks> +void omt<omtdata_t, omtdataout_t, supports_marks>::copyout( + omtdata_t *const out, const omt_node *const n) { + *out = n->value; +} + +template <typename omtdata_t, typename omtdataout_t, bool supports_marks> +void omt<omtdata_t, omtdataout_t, supports_marks>::copyout( + omtdata_t **const out, omt_node *const n) { + *out = &n->value; +} + +template <typename omtdata_t, typename omtdataout_t, bool supports_marks> +void omt<omtdata_t, omtdataout_t, supports_marks>::copyout( + omtdata_t *const out, const omtdata_t *const stored_value_ptr) { + *out = *stored_value_ptr; +} + +template <typename omtdata_t, typename omtdataout_t, bool supports_marks> +void omt<omtdata_t, omtdataout_t, supports_marks>::copyout( + omtdata_t **const out, omtdata_t *const stored_value_ptr) { + *out = stored_value_ptr; +} + +template <typename omtdata_t, typename omtdataout_t, bool supports_marks> +template <typename omtcmp_t, int (*h)(const omtdata_t &, const omtcmp_t &)> +int omt<omtdata_t, omtdataout_t, supports_marks>::find_internal_zero_array( + const omtcmp_t &extra, omtdataout_t *const value, + uint32_t *const idxp) const { + paranoid_invariant_notnull(idxp); + uint32_t min = this->d.a.start_idx; + uint32_t limit = this->d.a.start_idx + this->d.a.num_values; + uint32_t best_pos = subtree::NODE_NULL; + uint32_t best_zero = subtree::NODE_NULL; + + while (min != limit) { + uint32_t mid = (min + limit) / 2; + int hv = h(this->d.a.values[mid], extra); + if (hv < 0) { + min = mid + 1; + } else if (hv > 0) { + best_pos = mid; + limit = mid; + } else { + best_zero = mid; + limit = mid; + } + } + if (best_zero != subtree::NODE_NULL) { + // Found a zero + if (value != nullptr) { + copyout(value, &this->d.a.values[best_zero]); + } + *idxp = best_zero - this->d.a.start_idx; + return 0; + } + if (best_pos != subtree::NODE_NULL) + *idxp = best_pos - this->d.a.start_idx; + else + *idxp = this->d.a.num_values; + return DB_NOTFOUND; +} + +template <typename omtdata_t, typename omtdataout_t, bool supports_marks> +template <typename omtcmp_t, int (*h)(const omtdata_t &, const omtcmp_t &)> +int omt<omtdata_t, omtdataout_t, supports_marks>::find_internal_zero( + const subtree &st, const omtcmp_t &extra, omtdataout_t *const value, + uint32_t *const idxp) const { + paranoid_invariant_notnull(idxp); + if (st.is_null()) { + *idxp = 0; + return DB_NOTFOUND; + } + omt_node &n = this->d.t.nodes[st.get_index()]; + int hv = h(n.value, extra); + if (hv < 0) { + int r = this->find_internal_zero<omtcmp_t, h>(n.right, extra, value, idxp); + *idxp += this->nweight(n.left) + 1; + return r; + } else if (hv > 0) { + return this->find_internal_zero<omtcmp_t, h>(n.left, extra, value, idxp); + } else { + int r = this->find_internal_zero<omtcmp_t, h>(n.left, extra, value, idxp); + if (r == DB_NOTFOUND) { + *idxp = this->nweight(n.left); + if (value != nullptr) { + copyout(value, &n); + } + r = 0; + } + return r; + } +} + +template <typename omtdata_t, typename omtdataout_t, bool supports_marks> +template <typename omtcmp_t, int (*h)(const omtdata_t &, const omtcmp_t &)> +int omt<omtdata_t, omtdataout_t, supports_marks>::find_internal_plus_array( + const omtcmp_t &extra, omtdataout_t *const value, + uint32_t *const idxp) const { + paranoid_invariant_notnull(idxp); + uint32_t min = this->d.a.start_idx; + uint32_t limit = this->d.a.start_idx + this->d.a.num_values; + uint32_t best = subtree::NODE_NULL; + + while (min != limit) { + const uint32_t mid = (min + limit) / 2; + const int hv = h(this->d.a.values[mid], extra); + if (hv > 0) { + best = mid; + limit = mid; + } else { + min = mid + 1; + } + } + if (best == subtree::NODE_NULL) { + return DB_NOTFOUND; + } + if (value != nullptr) { + copyout(value, &this->d.a.values[best]); + } + *idxp = best - this->d.a.start_idx; + return 0; +} + +template <typename omtdata_t, typename omtdataout_t, bool supports_marks> +template <typename omtcmp_t, int (*h)(const omtdata_t &, const omtcmp_t &)> +int omt<omtdata_t, omtdataout_t, supports_marks>::find_internal_plus( + const subtree &st, const omtcmp_t &extra, omtdataout_t *const value, + uint32_t *const idxp) const { + paranoid_invariant_notnull(idxp); + if (st.is_null()) { + return DB_NOTFOUND; + } + omt_node *const n = &this->d.t.nodes[st.get_index()]; + int hv = h(n->value, extra); + int r; + if (hv > 0) { + r = this->find_internal_plus<omtcmp_t, h>(n->left, extra, value, idxp); + if (r == DB_NOTFOUND) { + *idxp = this->nweight(n->left); + if (value != nullptr) { + copyout(value, n); + } + r = 0; + } + } else { + r = this->find_internal_plus<omtcmp_t, h>(n->right, extra, value, idxp); + if (r == 0) { + *idxp += this->nweight(n->left) + 1; + } + } + return r; +} + +template <typename omtdata_t, typename omtdataout_t, bool supports_marks> +template <typename omtcmp_t, int (*h)(const omtdata_t &, const omtcmp_t &)> +int omt<omtdata_t, omtdataout_t, supports_marks>::find_internal_minus_array( + const omtcmp_t &extra, omtdataout_t *const value, + uint32_t *const idxp) const { + paranoid_invariant_notnull(idxp); + uint32_t min = this->d.a.start_idx; + uint32_t limit = this->d.a.start_idx + this->d.a.num_values; + uint32_t best = subtree::NODE_NULL; + + while (min != limit) { + const uint32_t mid = (min + limit) / 2; + const int hv = h(this->d.a.values[mid], extra); + if (hv < 0) { + best = mid; + min = mid + 1; + } else { + limit = mid; + } + } + if (best == subtree::NODE_NULL) { + return DB_NOTFOUND; + } + if (value != nullptr) { + copyout(value, &this->d.a.values[best]); + } + *idxp = best - this->d.a.start_idx; + return 0; +} + +template <typename omtdata_t, typename omtdataout_t, bool supports_marks> +template <typename omtcmp_t, int (*h)(const omtdata_t &, const omtcmp_t &)> +int omt<omtdata_t, omtdataout_t, supports_marks>::find_internal_minus( + const subtree &st, const omtcmp_t &extra, omtdataout_t *const value, + uint32_t *const idxp) const { + paranoid_invariant_notnull(idxp); + if (st.is_null()) { + return DB_NOTFOUND; + } + omt_node *const n = &this->d.t.nodes[st.get_index()]; + int hv = h(n->value, extra); + if (hv < 0) { + int r = + this->find_internal_minus<omtcmp_t, h>(n->right, extra, value, idxp); + if (r == 0) { + *idxp += this->nweight(n->left) + 1; + } else if (r == DB_NOTFOUND) { + *idxp = this->nweight(n->left); + if (value != nullptr) { + copyout(value, n); + } + r = 0; + } + return r; + } else { + return this->find_internal_minus<omtcmp_t, h>(n->left, extra, value, idxp); + } +} +} // namespace toku diff --git a/src/rocksdb/utilities/transactions/lock/range/range_tree/lib/util/partitioned_counter.h b/src/rocksdb/utilities/transactions/lock/range/range_tree/lib/util/partitioned_counter.h new file mode 100644 index 000000000..f20eeedf2 --- /dev/null +++ b/src/rocksdb/utilities/transactions/lock/range/range_tree/lib/util/partitioned_counter.h @@ -0,0 +1,165 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: +#ident "$Id$" +/*====== +This file is part of PerconaFT. + + +Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. + + PerconaFT is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License, version 2, + as published by the Free Software Foundation. + + PerconaFT is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with PerconaFT. If not, see <http://www.gnu.org/licenses/>. + +---------------------------------------- + + PerconaFT is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License, version 3, + as published by the Free Software Foundation. + + PerconaFT is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with PerconaFT. If not, see <http://www.gnu.org/licenses/>. + +---------------------------------------- + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +======= */ + +#ident \ + "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved." + +#pragma once + +// Overview: A partitioned_counter provides a counter that can be incremented +// and the running sum can be read at any time. +// We assume that increments are frequent, whereas reading is infrequent. +// Implementation hint: Use thread-local storage so each thread increments its +// own data. The increment does not require a lock or atomic operation. +// Reading the data can be performed by iterating over the thread-local +// versions, summing them up. The data structure also includes a sum for all +// the threads that have died. Use a pthread_key to create the thread-local +// versions. When a thread finishes, the system calls pthread_key destructor +// which can add that thread's copy into the sum_of_dead counter. +// Rationale: For statistics such as are found in engine status, we need a +// counter that requires no cache misses to increment. We've seen significant +// performance speedups by removing certain counters. Rather than removing +// those statistics, we would like to just make the counter fast. We generally +// increment the counters frequently, and want to fetch the values +// infrequently. The counters are monotonic. The counters can be split into +// many counters, which can be summed up at the end. We don't care if we get +// slightly out-of-date counter sums when we read the counter. We don't care +// if there is a race on reading the a counter +// variable and incrementing. +// See tests/test_partitioned_counter.c for some performance measurements. +// Operations: +// create_partitioned_counter Create a counter initialized to zero. +// destroy_partitioned_counter Destroy it. +// increment_partitioned_counter Increment it. This is the frequent +// operation. read_partitioned_counter Get the current value. This is +// infrequent. +// See partitioned_counter.cc for the abstraction function and representation +// invariant. +// +// The google style guide says to avoid using constructors, and it appears that +// constructors may have broken all the tests, because they called +// pthread_key_create before the key was actually created. So the google style +// guide may have some wisdom there... +// +// This version does not use constructors, essentially reverrting to the google +// C++ style guide. +// + +// The old C interface. This required a bunch of explicit +// ___attribute__((__destructor__)) functions to remember to destroy counters at +// the end. +#if defined(__cplusplus) +extern "C" { +#endif + +typedef struct partitioned_counter *PARTITIONED_COUNTER; +PARTITIONED_COUNTER create_partitioned_counter(void); +// Effect: Create a counter, initialized to zero. + +void destroy_partitioned_counter(PARTITIONED_COUNTER); +// Effect: Destroy the counter. No operations on that counter are permitted +// after this. + +void increment_partitioned_counter(PARTITIONED_COUNTER, uint64_t amount); +// Effect: Increment the counter by amount. +// Requires: No overflows. This is a 64-bit unsigned counter. + +uint64_t read_partitioned_counter(PARTITIONED_COUNTER) + __attribute__((__visibility__("default"))); +// Effect: Return the current value of the counter. + +void partitioned_counters_init(void); +// Effect: Initialize any partitioned counters data structures that must be set +// up before any partitioned counters run. + +void partitioned_counters_destroy(void); +// Effect: Destroy any partitioned counters data structures. + +#if defined(__cplusplus) +}; +#endif + +#if 0 +#include <pthread.h> + +#include "fttypes.h" + +// Used inside the PARTITIONED_COUNTER. +struct linked_list_head { + struct linked_list_element *first; +}; + + +class PARTITIONED_COUNTER { +public: + PARTITIONED_COUNTER(void); + // Effect: Construct a counter, initialized to zero. + + ~PARTITIONED_COUNTER(void); + // Effect: Destruct the counter. + + void increment(uint64_t amount); + // Effect: Increment the counter by amount. This is a 64-bit unsigned counter, and if you overflow it, you will get overflowed results (that is mod 2^64). + // Requires: Don't use this from a static constructor or destructor. + + uint64_t read(void); + // Effect: Read the sum. + // Requires: Don't use this from a static constructor or destructor. + +private: + uint64_t _sum_of_dead; // The sum of all thread-local counts from threads that have terminated. + pthread_key_t _key; // The pthread_key which gives us the hook to construct and destruct thread-local storage. + struct linked_list_head _ll_counter_head; // A linked list of all the thread-local information for this counter. + + // This function is used to destroy the thread-local part of the state when a thread terminates. + // But it's not the destructor for the local part of the counter, it's a destructor on a "dummy" key just so that we get a notification when a thread ends. + friend void destroy_thread_local_part_of_partitioned_counters (void *); +}; +#endif diff --git a/src/rocksdb/utilities/transactions/lock/range/range_tree/lib/util/status.h b/src/rocksdb/utilities/transactions/lock/range/range_tree/lib/util/status.h new file mode 100644 index 000000000..3fd0095d0 --- /dev/null +++ b/src/rocksdb/utilities/transactions/lock/range/range_tree/lib/util/status.h @@ -0,0 +1,76 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: +#ident "$Id$" +/*====== +This file is part of PerconaFT. + + +Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. + + PerconaFT is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License, version 2, + as published by the Free Software Foundation. + + PerconaFT is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with PerconaFT. If not, see <http://www.gnu.org/licenses/>. + +---------------------------------------- + + PerconaFT is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License, version 3, + as published by the Free Software Foundation. + + PerconaFT is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with PerconaFT. If not, see <http://www.gnu.org/licenses/>. + +---------------------------------------- + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +======= */ + +#ident \ + "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved." + +#pragma once + +#include "partitioned_counter.h" +// PORT2: #include <util/constexpr.h> + +#define TOKUFT_STATUS_INIT(array, k, c, t, l, inc) \ + do { \ + array.status[k].keyname = #k; \ + array.status[k].columnname = #c; \ + array.status[k].type = t; \ + array.status[k].legend = l; \ + constexpr_static_assert( \ + strcmp(#c, "NULL") && strcmp(#c, "0"), \ + "Use nullptr for no column name instead of NULL, 0, etc..."); \ + constexpr_static_assert( \ + (inc) == TOKU_ENGINE_STATUS || strcmp(#c, "nullptr"), \ + "Missing column name."); \ + array.status[k].include = \ + static_cast<toku_engine_status_include_type>(inc); \ + if (t == STATUS_PARCOUNT) { \ + array.status[k].value.parcount = create_partitioned_counter(); \ + } \ + } while (0) |