diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-04 18:07:14 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-04 18:07:14 +0000 |
commit | a175314c3e5827eb193872241446f2f8f5c9d33c (patch) | |
tree | cd3d60ca99ae00829c52a6ca79150a5b6e62528b /storage/tokudb/PerconaFT/ft/cachetable/cachetable.h | |
parent | Initial commit. (diff) | |
download | mariadb-10.5-9e4947182e0b875da38088fdd168e775f473b8ad.tar.xz mariadb-10.5-9e4947182e0b875da38088fdd168e775f473b8ad.zip |
Adding upstream version 1:10.5.12.upstream/1%10.5.12upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'storage/tokudb/PerconaFT/ft/cachetable/cachetable.h')
-rw-r--r-- | storage/tokudb/PerconaFT/ft/cachetable/cachetable.h | 588 |
1 files changed, 588 insertions, 0 deletions
diff --git a/storage/tokudb/PerconaFT/ft/cachetable/cachetable.h b/storage/tokudb/PerconaFT/ft/cachetable/cachetable.h new file mode 100644 index 00000000..c5c21b49 --- /dev/null +++ b/storage/tokudb/PerconaFT/ft/cachetable/cachetable.h @@ -0,0 +1,588 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: +#ident "$Id$" +/*====== +This file is part of PerconaFT. + + +Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. + + PerconaFT is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License, version 2, + as published by the Free Software Foundation. + + PerconaFT is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with PerconaFT. If not, see <http://www.gnu.org/licenses/>. + +---------------------------------------- + + PerconaFT is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License, version 3, + as published by the Free Software Foundation. + + PerconaFT is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with PerconaFT. If not, see <http://www.gnu.org/licenses/>. +======= */ + +#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved." + +#pragma once + +#include <fcntl.h> + +#include "ft/logger/logger.h" +#include "ft/serialize/block_table.h" +#include "ft/txn/txn.h" +#include "ft/ft-status.h" +#include "util/minicron.h" + +// Maintain a cache mapping from cachekeys to values (void*) +// Some of the keys can be pinned. Don't pin too many or for too long. +// If the cachetable is too full, it will call the flush_callback() function with the key, the value, and the otherargs +// and then remove the key-value pair from the cache. +// The callback won't be any of the currently pinned keys. +// Also when flushing an object, the cachetable drops all references to it, +// so you may need to free() it. +// Note: The cachetable should use a common pool of memory, flushing things across cachetables. +// (The first implementation doesn't) +// If you pin something twice, you must unpin it twice. +// table_size is the initial size of the cache table hash table (in number of entries) +// size limit is the upper bound of the sum of size of the entries in the cache table (total number of bytes) + +typedef BLOCKNUM CACHEKEY; + +class checkpointer; +typedef class checkpointer *CHECKPOINTER; +typedef struct cachetable *CACHETABLE; +typedef struct cachefile *CACHEFILE; +typedef struct ctpair *PAIR; + +// This struct hold information about values stored in the cachetable. +// As one can tell from the names, we are probably violating an +// abstraction layer by placing names. +// +// The purpose of having this struct is to have a way for the +// cachetable to accumulate the some totals we are interested in. +// Breaking this abstraction layer by having these names was the +// easiest way. +// +typedef struct pair_attr_s { + long size; // size PAIR's value takes in memory + long nonleaf_size; // size if PAIR is a nonleaf node, 0 otherwise, used only for engine status + long leaf_size; // size if PAIR is a leaf node, 0 otherwise, used only for engine status + long rollback_size; // size of PAIR is a rollback node, 0 otherwise, used only for engine status + long cache_pressure_size; // amount PAIR contributes to cache pressure, is sum of buffer sizes and workdone counts + bool is_valid; +} PAIR_ATTR; + +static inline PAIR_ATTR make_pair_attr(long size) { + PAIR_ATTR result={ + .size = size, + .nonleaf_size = 0, + .leaf_size = 0, + .rollback_size = 0, + .cache_pressure_size = 0, + .is_valid = true + }; + return result; +} + +void toku_set_cleaner_period (CACHETABLE ct, uint32_t new_period); +uint32_t toku_get_cleaner_period_unlocked (CACHETABLE ct); +void toku_set_cleaner_iterations (CACHETABLE ct, uint32_t new_iterations); +uint32_t toku_get_cleaner_iterations (CACHETABLE ct); +uint32_t toku_get_cleaner_iterations_unlocked (CACHETABLE ct); +void toku_set_enable_partial_eviction (CACHETABLE ct, bool enabled); +bool toku_get_enable_partial_eviction (CACHETABLE ct); + +// cachetable operations + +// create and initialize a cache table +// size_limit is the upper limit on the size of the size of the values in the table +// pass 0 if you want the default +int toku_cachetable_create_ex(CACHETABLE *result, long size_limit, + unsigned long client_pool_threads, + unsigned long cachetable_pool_threads, + unsigned long checkpoint_pool_threads, + LSN initial_lsn, struct tokulogger *logger); + +#define toku_cachetable_create(r, s, l, o) \ + toku_cachetable_create_ex(r, s, 0, 0, 0, l, o); + +// Create a new cachetable. +// Effects: a new cachetable is created and initialized. +// The cachetable pointer is stored into result. +// The sum of the sizes of the memory objects is set to size_limit, in whatever +// units make sense to the user of the cachetable. +// Returns: If success, returns 0 and result points to the new cachetable. Otherwise, +// returns an error number. + +// Returns a pointer to the checkpointer within the given cachetable. +CHECKPOINTER toku_cachetable_get_checkpointer(CACHETABLE ct); + +// What is the cachefile that goes with a particular filenum? +// During a transaction, we cannot reuse a filenum. +int toku_cachefile_of_filenum (CACHETABLE t, FILENUM filenum, CACHEFILE *cf); + +// What is the cachefile that goes with a particular iname (relative to env)? +// During a transaction, we cannot reuse an iname. +int toku_cachefile_of_iname_in_env (CACHETABLE ct, const char *iname_in_env, CACHEFILE *cf); + +// Get the iname (within the cwd) associated with the cachefile +// Return the filename +char *toku_cachefile_fname_in_cwd (CACHEFILE cf); + +void toku_cachetable_begin_checkpoint (CHECKPOINTER cp, struct tokulogger *logger); + +void toku_cachetable_end_checkpoint(CHECKPOINTER cp, struct tokulogger *logger, + void (*testcallback_f)(void*), void * testextra); + + +// Shuts down checkpoint thread +// Requires no locks be held that are taken by the checkpoint function +void toku_cachetable_minicron_shutdown(CACHETABLE ct); + +// Prepare to close the cachetable. This informs the cachetable that it is about to be closed +// so that it can tune its checkpoint resource use. +void toku_cachetable_prepare_close(CACHETABLE ct); + +// Close the cachetable. +// Effects: All of the memory objects are flushed to disk, and the cachetable is destroyed. +void toku_cachetable_close(CACHETABLE *ct); + +// Open a file and bind the file to a new cachefile object. (For use by test programs only.) +int toku_cachetable_openf(CACHEFILE *,CACHETABLE, const char *fname_in_env, int flags, mode_t mode); + +// Bind a file to a new cachefile object. +int toku_cachetable_openfd(CACHEFILE *,CACHETABLE, int fd, + const char *fname_relative_to_env); +int toku_cachetable_openfd_with_filenum (CACHEFILE *,CACHETABLE, int fd, + const char *fname_in_env, + FILENUM filenum, bool* was_open); + +// reserve a unique filenum +FILENUM toku_cachetable_reserve_filenum(CACHETABLE ct); + +// Effect: Reserve a fraction of the cachetable memory. +// Returns the amount reserved. +// To return the memory to the cachetable, call toku_cachetable_release_reserved_memory +// Requires 0<fraction<1. +uint64_t toku_cachetable_reserve_memory(CACHETABLE, double fraction, uint64_t upper_bound); +void toku_cachetable_release_reserved_memory(CACHETABLE, uint64_t); + +// cachefile operations + +// Does an fsync of a cachefile. +void toku_cachefile_fsync(CACHEFILE cf); + +enum partial_eviction_cost { + PE_CHEAP=0, // running partial eviction is cheap, and can be done on the client thread + PE_EXPENSIVE=1, // running partial eviction is expensive, and should not be done on the client thread +}; + +// cachetable pair clean or dirty WRT external memory +enum cachetable_dirty { + CACHETABLE_CLEAN=0, // the cached object is clean WRT the cachefile + CACHETABLE_DIRTY=1, // the cached object is dirty WRT the cachefile +}; + +// The flush callback is called when a key value pair is being written to storage and possibly removed from the cachetable. +// When write_me is true, the value should be written to storage. +// When keep_me is false, the value should be freed. +// When for_checkpoint is true, this was a 'pending' write +// Returns: 0 if success, otherwise an error number. +// Can access fd (fd is protected by a readlock during call) +typedef void (*CACHETABLE_FLUSH_CALLBACK)(CACHEFILE, int fd, CACHEKEY key, void *value, void **disk_data, void *write_extraargs, PAIR_ATTR size, PAIR_ATTR* new_size, bool write_me, bool keep_me, bool for_checkpoint, bool is_clone); + +// The fetch callback is called when a thread is attempting to get and pin a memory +// object and it is not in the cachetable. +// Returns: 0 if success, otherwise an error number. The address and size of the object +// associated with the key are returned. +// Can access fd (fd is protected by a readlock during call) +typedef int (*CACHETABLE_FETCH_CALLBACK)(CACHEFILE, PAIR p, int fd, CACHEKEY key, uint32_t fullhash, void **value_data, void **disk_data, PAIR_ATTR *sizep, int *dirtyp, void *read_extraargs); + +// The cachetable calls the partial eviction estimate callback to determine if +// partial eviction is a cheap operation that may be called by on the client thread +// or whether partial eviction is expensive and should be done on a background (writer) thread. +// The callback conveys this information by setting cost to either PE_CHEAP or PE_EXPENSIVE. +// If cost is PE_EXPENSIVE, then the callback also sets bytes_freed_estimate +// to return an estimate of the number of bytes it will free +// so that the cachetable can estimate how much data is being evicted on background threads. +// If cost is PE_CHEAP, then the callback does not set bytes_freed_estimate. +typedef void (*CACHETABLE_PARTIAL_EVICTION_EST_CALLBACK)(void *ftnode_pv, void* disk_data, long* bytes_freed_estimate, enum partial_eviction_cost *cost, void *write_extraargs); + +// The cachetable calls the partial eviction callback is to possibly try and partially evict pieces +// of the PAIR. The callback determines the strategy for what to evict. The callback may choose to free +// nothing, or may choose to free as much as possible. When the partial eviction callback is finished, +// it must call finalize with the new PAIR_ATTR and the given finalize_extra. After this point, the +// write lock will be released on the PAIR and it is no longer safe to operate on any of the passed arguments. +// This is useful for doing expensive cleanup work outside of the PAIR's write lock (such as destroying objects, etc) +// +// on entry, requires a write lock to be held on the PAIR in the cachetable while this function is called +// on exit, the finalize continuation is called +typedef int (*CACHETABLE_PARTIAL_EVICTION_CALLBACK)(void *ftnode_pv, PAIR_ATTR old_attr, void *write_extraargs, + void (*finalize)(PAIR_ATTR new_attr, void *extra), void *finalize_extra); + +// The cachetable calls this function to determine if get_and_pin call requires a partial fetch. If this function returns true, +// then the cachetable will subsequently call CACHETABLE_PARTIAL_FETCH_CALLBACK to perform +// a partial fetch. If this function returns false, then the PAIR's value is returned to the caller as is. +// +// An alternative to having this callback is to always call CACHETABLE_PARTIAL_FETCH_CALLBACK, and let +// CACHETABLE_PARTIAL_FETCH_CALLBACK decide whether to do any partial fetching or not. +// There is no particular reason why this alternative was not chosen. +// Requires: a read lock to be held on the PAIR +typedef bool (*CACHETABLE_PARTIAL_FETCH_REQUIRED_CALLBACK)(void *ftnode_pv, void *read_extraargs); + +// The cachetable calls the partial fetch callback when a thread needs to read or decompress a subset of a PAIR into memory. +// An example is needing to read a basement node into memory. Another example is decompressing an internal node's +// message buffer. The cachetable determines if a partial fetch is necessary by first calling CACHETABLE_PARTIAL_FETCH_REQUIRED_CALLBACK. +// The new PAIR_ATTR of the PAIR is returned in sizep +// Can access fd (fd is protected by a readlock during call) +// Returns: 0 if success, otherwise an error number. +typedef int (*CACHETABLE_PARTIAL_FETCH_CALLBACK)(void *value_data, void* disk_data, void *read_extraargs, int fd, PAIR_ATTR *sizep); + +// The cachetable calls the put callback during a cachetable_put command to provide the opaque PAIR. +// The PAIR can then be used to later unpin the pair. +// Returns: 0 if success, otherwise an error number. +typedef void (*CACHETABLE_PUT_CALLBACK)(CACHEKEY key, void *value_data, PAIR p); + +// TODO(leif) XXX TODO XXX +typedef int (*CACHETABLE_CLEANER_CALLBACK)(void *ftnode_pv, BLOCKNUM blocknum, uint32_t fullhash, void *write_extraargs); + +typedef void (*CACHETABLE_CLONE_CALLBACK)(void* value_data, void** cloned_value_data, long* clone_size, PAIR_ATTR* new_attr, bool for_checkpoint, void* write_extraargs); + +typedef void (*CACHETABLE_CHECKPOINT_COMPLETE_CALLBACK)(void *value_data); + +typedef struct { + CACHETABLE_FLUSH_CALLBACK flush_callback; + CACHETABLE_PARTIAL_EVICTION_EST_CALLBACK pe_est_callback; + CACHETABLE_PARTIAL_EVICTION_CALLBACK pe_callback; + CACHETABLE_CLEANER_CALLBACK cleaner_callback; + CACHETABLE_CLONE_CALLBACK clone_callback; + CACHETABLE_CHECKPOINT_COMPLETE_CALLBACK checkpoint_complete_callback; + void* write_extraargs; // parameter for flush_callback, pe_est_callback, pe_callback, and cleaner_callback +} CACHETABLE_WRITE_CALLBACK; + +typedef void (*CACHETABLE_GET_KEY_AND_FULLHASH)(CACHEKEY* cachekey, uint32_t* fullhash, void* extra); + +typedef void (*CACHETABLE_REMOVE_KEY)(CACHEKEY* cachekey, bool for_checkpoint, void* extra); + +void toku_cachefile_set_userdata(CACHEFILE cf, void *userdata, + void (*log_fassociate_during_checkpoint)(CACHEFILE, void*), + void (*close_userdata)(CACHEFILE, int, void*, bool, LSN), + void (*free_userdata)(CACHEFILE, void*), + void (*checkpoint_userdata)(CACHEFILE, int, void*), + void (*begin_checkpoint_userdata)(LSN, void*), + void (*end_checkpoint_userdata)(CACHEFILE, int, void*), + void (*note_pin_by_checkpoint)(CACHEFILE, void*), + void (*note_unpin_by_checkpoint)(CACHEFILE, void*)); +// Effect: Store some cachefile-specific user data. When the last reference to a cachefile is closed, we call close_userdata(). +// Before starting a checkpoint, we call checkpoint_prepare_userdata(). +// When the cachefile needs to be checkpointed, we call checkpoint_userdata(). +// If userdata is already non-NULL, then we simply overwrite it. + +void *toku_cachefile_get_userdata(CACHEFILE); +// Effect: Get the user data. + +CACHETABLE toku_cachefile_get_cachetable(CACHEFILE cf); +// Effect: Get the cachetable. + +CACHEFILE toku_pair_get_cachefile(PAIR); +// Effect: Get the cachefile of the pair + +void toku_cachetable_swap_pair_values(PAIR old_pair, PAIR new_pair); +// Effect: Swaps the value_data of old_pair and new_pair. +// Requires: both old_pair and new_pair to be pinned with write locks. + +typedef enum { + PL_READ = 0, + PL_WRITE_CHEAP, + PL_WRITE_EXPENSIVE +} pair_lock_type; + +// put something into the cachetable and checkpoint dependent pairs +// if the checkpointing is necessary +void toku_cachetable_put_with_dep_pairs( + CACHEFILE cachefile, + CACHETABLE_GET_KEY_AND_FULLHASH get_key_and_fullhash, + void *value, + PAIR_ATTR attr, + CACHETABLE_WRITE_CALLBACK write_callback, + void *get_key_and_fullhash_extra, + uint32_t num_dependent_pairs, // number of dependent pairs that we may need to checkpoint + PAIR* dependent_pairs, + enum cachetable_dirty* dependent_dirty, // array stating dirty/cleanness of dependent pairs + CACHEKEY* key, + uint32_t* fullhash, + CACHETABLE_PUT_CALLBACK put_callback + ); + +// Put a memory object into the cachetable. +// Effects: Lookup the key in the cachetable. If the key is not in the cachetable, +// then insert the pair and pin it. Otherwise return an error. Some of the key +// value pairs may be evicted from the cachetable when the cachetable gets too big. +void toku_cachetable_put(CACHEFILE cf, CACHEKEY key, uint32_t fullhash, + void *value, PAIR_ATTR size, + CACHETABLE_WRITE_CALLBACK write_callback, + CACHETABLE_PUT_CALLBACK put_callback + ); + +// Get and pin the memory object of a PAIR, and write dependent pairs to disk +// if the dependent pairs are pending a checkpoint. +// Effects: If the memory object is in the cachetable, acquire a PAIR lock on it. +// Otherwise, fetch it from storage by calling the fetch callback. If the fetch +// succeeded, add the memory object to the cachetable with a PAIR lock on it. +// Before returning to the user, if the PAIR object being retrieved, or any of the +// dependent pairs passed in as parameters must be written to disk for checkpoint, +// then the required PAIRs are written to disk for checkpoint. +// KEY PROPERTY OF DEPENDENT PAIRS: They are already locked by the client +// Returns: 0 if the memory object is in memory, otherwise an error number. +int toku_cachetable_get_and_pin_with_dep_pairs ( + CACHEFILE cachefile, + CACHEKEY key, + uint32_t fullhash, + void**value, + CACHETABLE_WRITE_CALLBACK write_callback, + CACHETABLE_FETCH_CALLBACK fetch_callback, + CACHETABLE_PARTIAL_FETCH_REQUIRED_CALLBACK pf_req_callback, + CACHETABLE_PARTIAL_FETCH_CALLBACK pf_callback, + pair_lock_type lock_type, + void* read_extraargs, // parameter for fetch_callback, pf_req_callback, and pf_callback + uint32_t num_dependent_pairs, // number of dependent pairs that we may need to checkpoint + PAIR* dependent_pairs, + enum cachetable_dirty* dependent_dirty // array stating dirty/cleanness of dependent pairs + ); + +// Get and pin a memory object. +// Effects: If the memory object is in the cachetable acquire the PAIR lock on it. +// Otherwise, fetch it from storage by calling the fetch callback. If the fetch +// succeeded, add the memory object to the cachetable with a read lock on it. +// Returns: 0 if the memory object is in memory, otherwise an error number. +int toku_cachetable_get_and_pin ( + CACHEFILE cachefile, + CACHEKEY key, + uint32_t fullhash, + void**value, + CACHETABLE_WRITE_CALLBACK write_callback, + CACHETABLE_FETCH_CALLBACK fetch_callback, + CACHETABLE_PARTIAL_FETCH_REQUIRED_CALLBACK pf_req_callback, + CACHETABLE_PARTIAL_FETCH_CALLBACK pf_callback, + bool may_modify_value, + void* read_extraargs // parameter for fetch_callback, pf_req_callback, and pf_callback + ); + +// does partial fetch on a pinned pair +void toku_cachetable_pf_pinned_pair( + void* value, + CACHETABLE_PARTIAL_FETCH_CALLBACK pf_callback, + void* read_extraargs, + CACHEFILE cf, + CACHEKEY key, + uint32_t fullhash + ); + +struct unlockers { + bool locked; + void (*f)(void* extra); + void *extra; + struct unlockers *next; +}; +typedef struct unlockers *UNLOCKERS; + +// Effect: If the block is in the cachetable, then return it. +// Otherwise call the functions in unlockers, fetch the data (but don't pin it, since we'll just end up pinning it again later), and return TOKUDB_TRY_AGAIN. +int toku_cachetable_get_and_pin_nonblocking ( + CACHEFILE cf, + CACHEKEY key, + uint32_t fullhash, + void**value, + CACHETABLE_WRITE_CALLBACK write_callback, + CACHETABLE_FETCH_CALLBACK fetch_callback, + CACHETABLE_PARTIAL_FETCH_REQUIRED_CALLBACK pf_req_callback, + CACHETABLE_PARTIAL_FETCH_CALLBACK pf_callback, + pair_lock_type lock_type, + void *read_extraargs, // parameter for fetch_callback, pf_req_callback, and pf_callback + UNLOCKERS unlockers + ); + +int toku_cachetable_maybe_get_and_pin (CACHEFILE, CACHEKEY, uint32_t /*fullhash*/, pair_lock_type, void**); +// Effect: Maybe get and pin a memory object. +// This function is similar to the get_and_pin function except that it +// will not attempt to fetch a memory object that is not in the cachetable or requires any kind of blocking to get it. +// Returns: If the the item is already in memory, then return 0 and store it in the +// void**. If the item is not in memory, then return a nonzero error number. + +int toku_cachetable_maybe_get_and_pin_clean (CACHEFILE, CACHEKEY, uint32_t /*fullhash*/, pair_lock_type, void**); +// Effect: Like maybe get and pin, but may pin a clean pair. + +int toku_cachetable_get_attr(CACHEFILE, CACHEKEY, uint32_t /*fullhash*/, PAIR_ATTR *); +// Effect: get the attributes for cachekey +// Returns: 0 if success, non-zero if cachekey is not cached +// Notes: this function exists for tests + +int toku_cachetable_unpin(CACHEFILE, PAIR, enum cachetable_dirty dirty, PAIR_ATTR size); +// Effect: Unpin a memory object +// Modifies: If the memory object is in the cachetable, then OR the dirty flag, +// update the size, and release the read lock on the memory object. +// Returns: 0 if success, otherwise returns an error number. +// Requires: The ct is locked. + +int toku_cachetable_unpin_ct_prelocked_no_flush(CACHEFILE, PAIR, enum cachetable_dirty dirty, PAIR_ATTR size); +// Effect: The same as tokud_cachetable_unpin, except that the ct must not be locked. +// Requires: The ct is NOT locked. + +int toku_cachetable_unpin_and_remove (CACHEFILE, PAIR, CACHETABLE_REMOVE_KEY, void*); /* Removing something already present is OK. */ +// Effect: Remove an object from the cachetable. Don't write it back. +// Requires: The object must be pinned exactly once. + +// test-only wrapper that use CACHEKEY and fullhash +int toku_test_cachetable_unpin(CACHEFILE, CACHEKEY, uint32_t fullhash, enum cachetable_dirty dirty, PAIR_ATTR size); + +// test-only wrapper that use CACHEKEY and fullhash +int toku_test_cachetable_unpin_ct_prelocked_no_flush(CACHEFILE, CACHEKEY, uint32_t fullhash, enum cachetable_dirty dirty, PAIR_ATTR size); + +// test-only wrapper that use CACHEKEY +int toku_test_cachetable_unpin_and_remove (CACHEFILE, CACHEKEY, CACHETABLE_REMOVE_KEY, void*); /* Removing something already present is OK. */ + +int toku_cachefile_prefetch(CACHEFILE cf, CACHEKEY key, uint32_t fullhash, + CACHETABLE_WRITE_CALLBACK write_callback, + CACHETABLE_FETCH_CALLBACK fetch_callback, + CACHETABLE_PARTIAL_FETCH_REQUIRED_CALLBACK pf_req_callback, + CACHETABLE_PARTIAL_FETCH_CALLBACK pf_callback, + void *read_extraargs, // parameter for fetch_callback, pf_req_callback, and pf_callback + bool *doing_prefetch); +// Effect: Prefetch a memory object for a given key into the cachetable +// Precondition: The cachetable mutex is NOT held. +// Postcondition: The cachetable mutex is NOT held. +// Returns: 0 if success +// Implement Note: +// 1) The pair's rwlock is acquired (for write) (there is not a deadlock here because the rwlock is a pthread_cond_wait using the cachetable mutex). +// Case A: Single-threaded. +// A1) Call cachetable_fetch_pair, which +// a) Obtains a readlock on the cachefile's fd (to prevent multipler readers at once) +// b) Unlocks the cachetable +// c) Does the fetch off disk. +// d) Locks the cachetable +// e) Unlocks the fd lock. +// f) Unlocks the pair rwlock. +// Case B: Multithreaded +// a) Enqueue a cachetable_reader into the workqueue. +// b) Unlock the cache table. +// c) The enqueue'd job later locks the cachetable, and calls cachetable_fetch_pair (doing the steps in A1 above). + +int toku_cachetable_assert_all_unpinned (CACHETABLE); + +int toku_cachefile_count_pinned (CACHEFILE, int /*printthem*/ ); + +// Close the cachefile. +// Effects: All of the cached object associated with the cachefile are evicted from +// the cachetable. The flush callback is called for each of these objects. The +// close function does not return until all of the objects are evicted. The cachefile +// object is freed. +// If oplsn_valid is true then use oplsn as the LSN of the close instead of asking the logger. oplsn_valid being true is only allowed during recovery, and requires that you are removing the last reference (otherwise the lsn wouldn't make it in.) +void toku_cachefile_close (CACHEFILE*, bool oplsn_valid, LSN oplsn); + +// Return on success (different from pread and pwrite) +//int cachefile_pwrite (CACHEFILE, const void *buf, size_t count, toku_off_t offset); +//int cachefile_pread (CACHEFILE, void *buf, size_t count, toku_off_t offset); + +// Get the file descriptor associated with the cachefile +// Return the file descriptor +// Grabs a read lock protecting the fd +int toku_cachefile_get_fd (CACHEFILE); + +// Get the iname (within the environment) associated with the cachefile +// Return the filename +char * toku_cachefile_fname_in_env (CACHEFILE cf); + +void toku_cachefile_set_fname_in_env(CACHEFILE cf, char *new_fname_in_env); + +// Make it so when the cachefile closes, the underlying file is unlinked +void toku_cachefile_unlink_on_close(CACHEFILE cf); + +// is this cachefile marked as unlink on close? +bool toku_cachefile_is_unlink_on_close(CACHEFILE cf); + +void toku_cachefile_skip_log_recover_on_close(CACHEFILE cf); +void toku_cachefile_do_log_recover_on_close(CACHEFILE cf); +bool toku_cachefile_is_skip_log_recover_on_close(CACHEFILE cf); + +// Return the logger associated with the cachefile +struct tokulogger *toku_cachefile_logger(CACHEFILE cf); + +// Return the filenum associated with the cachefile +FILENUM toku_cachefile_filenum(CACHEFILE cf); + +// Effect: Return a 32-bit hash key. The hash key shall be suitable for using with bitmasking for a table of size power-of-two. +uint32_t toku_cachetable_hash(CACHEFILE cf, CACHEKEY key); + +uint32_t toku_cachefile_fullhash_of_header(CACHEFILE cf); + +// debug functions + +// Print the contents of the cachetable. This is mainly used from gdb +void toku_cachetable_print_state (CACHETABLE ct); + +// Get the state of the cachetable. This is used to verify the cachetable +void toku_cachetable_get_state(CACHETABLE ct, int *num_entries_ptr, int *hash_size_ptr, long *size_current_ptr, long *size_limit_ptr); + +// Get the state of a cachetable entry by key. This is used to verify the cachetable +int toku_cachetable_get_key_state(CACHETABLE ct, CACHEKEY key, CACHEFILE cf, + void **value_ptr, + int *dirty_ptr, + long long *pin_ptr, + long *size_ptr); + +// Verify the whole cachetable that the cachefile is in. Slow. +void toku_cachefile_verify (CACHEFILE cf); + +// Verify the cachetable. Slow. +void toku_cachetable_verify (CACHETABLE t); + +// Not for use in production, but useful for testing. +void toku_cachetable_print_hash_histogram (void) __attribute__((__visibility__("default"))); + +void toku_cachetable_maybe_flush_some(CACHETABLE ct); + +// for stat64 +uint64_t toku_cachefile_size(CACHEFILE cf); + +void toku_cachetable_get_status(CACHETABLE ct, CACHETABLE_STATUS s); + +void toku_cachetable_set_env_dir(CACHETABLE ct, const char *env_dir); +char * toku_construct_full_name(int count, ...); +char * toku_cachetable_get_fname_in_cwd(CACHETABLE ct, const char * fname_in_env); + +void cachefile_kibbutz_enq (CACHEFILE cf, void (*f)(void*), void *extra); +// Effect: Add a job to the cachetable's collection of work to do. Note that function f must call remove_background_job_from_cf() + +void remove_background_job_from_cf (CACHEFILE cf); +// Effect: When a kibbutz job or cleaner thread finishes in a cachefile, +// the cachetable must be notified. + +// test-only function +int toku_cachetable_get_checkpointing_user_data_status(void); + +// test-only function +int toku_cleaner_thread_for_test(CACHETABLE ct); +int toku_cleaner_thread(void *cleaner_v); + +// test function. Exported in the ydb layer and used by tests that want to run DRD +// The default of 1M is too high for drd tests, so this is a mechanism to set a smaller number. +void toku_pair_list_set_lock_size(uint32_t num_locks); + +// Used by ft-ops.cc to figure out if it has the write lock on a pair. +// Pretty hacky and not accurate enough, should be improved at the frwlock +// layer. +__attribute__((const,nonnull)) +bool toku_ctpair_is_write_locked(PAIR pair); |