diff options
Diffstat (limited to '')
-rw-r--r-- | src/util/slmdb.c | 938 |
1 files changed, 938 insertions, 0 deletions
diff --git a/src/util/slmdb.c b/src/util/slmdb.c new file mode 100644 index 0000000..499589d --- /dev/null +++ b/src/util/slmdb.c @@ -0,0 +1,938 @@ +/*++ +/* NAME +/* slmdb 3 +/* SUMMARY +/* Simplified LMDB API +/* SYNOPSIS +/* #include <slmdb.h> +/* +/* int slmdb_init(slmdb, curr_limit, size_incr, hard_limit) +/* SLMDB *slmdb; +/* size_t curr_limit; +/* int size_incr; +/* size_t hard_limit; +/* +/* int slmdb_open(slmdb, path, open_flags, lmdb_flags, slmdb_flags) +/* SLMDB *slmdb; +/* const char *path; +/* int open_flags; +/* int lmdb_flags; +/* int slmdb_flags; +/* +/* int slmdb_close(slmdb) +/* SLMDB *slmdb; +/* +/* int slmdb_get(slmdb, mdb_key, mdb_value) +/* SLMDB *slmdb; +/* MDB_val *mdb_key; +/* MDB_val *mdb_value; +/* +/* int slmdb_put(slmdb, mdb_key, mdb_value, flags) +/* SLMDB *slmdb; +/* MDB_val *mdb_key; +/* MDB_val *mdb_value; +/* int flags; +/* +/* int slmdb_del(slmdb, mdb_key) +/* SLMDB *slmdb; +/* MDB_val *mdb_key; +/* +/* int slmdb_cursor_get(slmdb, mdb_key, mdb_value, op) +/* SLMDB *slmdb; +/* MDB_val *mdb_key; +/* MDB_val *mdb_value; +/* MDB_cursor_op op; +/* AUXILIARY FUNCTIONS +/* int slmdb_fd(slmdb) +/* SLMDB *slmdb; +/* +/* size_t slmdb_curr_limit(slmdb) +/* SLMDB *slmdb; +/* +/* int slmdb_control(slmdb, request, ...) +/* SLMDB *slmdb; +/* int request; +/* DESCRIPTION +/* This module simplifies the LMDB API by hiding recoverable +/* errors from the application. Details are given in the +/* section "ERROR RECOVERY". +/* +/* slmdb_init() performs mandatory initialization before opening +/* an LMDB database. The result value is an LMDB status code +/* (zero in case of success). +/* +/* slmdb_open() opens an LMDB database. The result value is +/* an LMDB status code (zero in case of success). +/* +/* slmdb_close() finalizes an optional bulk-mode transaction +/* and closes a successfully-opened LMDB database. The result +/* value is an LMDB status code (zero in case of success). +/* +/* slmdb_get() is an mdb_get() wrapper with automatic error +/* recovery. The result value is an LMDB status code (zero +/* in case of success). +/* +/* slmdb_put() is an mdb_put() wrapper with automatic error +/* recovery. The result value is an LMDB status code (zero +/* in case of success). +/* +/* slmdb_del() is an mdb_del() wrapper with automatic error +/* recovery. The result value is an LMDB status code (zero +/* in case of success). +/* +/* slmdb_cursor_get() is an mdb_cursor_get() wrapper with +/* automatic error recovery. The result value is an LMDB +/* status code (zero in case of success). This wrapper supports +/* only one cursor per database. +/* +/* slmdb_fd() returns the file descriptor for the specified +/* database. This may be used for file status queries or +/* application-controlled locking. +/* +/* slmdb_curr_limit() returns the current database size limit +/* for the specified database. +/* +/* slmdb_control() specifies optional features. The result is +/* an LMDB status code (zero in case of success). +/* +/* Arguments: +/* .IP slmdb +/* Pointer to caller-provided storage. +/* .IP curr_limit +/* The initial memory mapping size limit. This limit is +/* automatically increased when the database becomes full. +/* .IP size_incr +/* An integer factor by which the memory mapping size limit +/* is increased when the database becomes full. +/* .IP hard_limit +/* The upper bound for the memory mapping size limit. +/* .IP path +/* LMDB database pathname. +/* .IP open_flags +/* Flags that control file open operations. Do not specify +/* locking flags here. +/* .IP lmdb_flags +/* Flags that control the LMDB environment. If MDB_NOLOCK is +/* specified, then each slmdb_get() or slmdb_cursor_get() call +/* must be protected with a shared (or exclusive) external lock, +/* and each slmdb_put() or slmdb_del() call must be protected +/* with an exclusive external lock. A lock may be released +/* after the call returns. A writer may atomically downgrade +/* an exclusive lock to shared, but it must obtain an exclusive +/* lock before making another slmdb(3) write request. +/* .sp +/* Note: when a database is opened with MDB_NOLOCK, external +/* locks such as fcntl() do not protect slmdb(3) requests +/* within the same process against each other. If a program +/* cannot avoid making simultaneous slmdb(3) requests, then +/* it must synchronize these requests with in-process locks, +/* in addition to the per-process fcntl(2) locks. +/* .IP slmdb_flags +/* Bit-wise OR of zero or more of the following: +/* .RS +/* .IP SLMDB_FLAG_BULK +/* Open the database and create a "bulk" transaction that is +/* committed when the database is closed. If MDB_NOLOCK is +/* specified, then the entire transaction must be protected +/* with a persistent external lock. All slmdb_get(), slmdb_put() +/* and slmdb_del() requests will be directed to the "bulk" +/* transaction. +/* .RE +/* .IP mdb_key +/* Pointer to caller-provided lookup key storage. +/* .IP mdb_value +/* Pointer to caller-provided value storage. +/* .IP op +/* LMDB cursor operation. +/* .IP request +/* The start of a list of (name, value) pairs, terminated with +/* CA_SLMDB_CTL_END. The following text enumerates the symbolic +/* request names and the corresponding argument types. +/* .RS +/* .IP "CA_SLMDB_CTL_LONGJMP_FN(void (*)(void *, int))" +/* Call-back function pointer. The function is called to repeat +/* a failed bulk-mode transaction from the start. The arguments +/* are the application context and the setjmp() or sigsetjmp() +/* result value. +/* .IP "CA_SLMDB_CTL_NOTIFY_FN(void (*)(void *, int, ...))" +/* Call-back function pointer. The function is called to report +/* successful error recovery. The arguments are the application +/* context, the MDB error code, and additional arguments that +/* depend on the error code. Details are given in the section +/* "ERROR RECOVERY". +/* .IP "CA_SLMDB_CTL_ASSERT_FN(void (*)(void *, const char *))" +/* Call-back function pointer. The function is called to +/* report an LMDB internal assertion failure. The arguments +/* are the application context, and text that describes the +/* problem. +/* .IP "CA_SLMDB_CTL_CB_CONTEXT(void *)" +/* Application context that is passed in call-back function +/* calls. +/* .IP "CA_SLMDB_CTL_API_RETRY_LIMIT(int)" +/* How many times to recover from LMDB errors within the +/* execution of a single slmdb(3) API call before giving up. +/* .IP "CA_SLMDB_CTL_BULK_RETRY_LIMIT(int)" +/* How many times to recover from a bulk-mode transaction +/* before giving up. +/* .RE +/* ERROR RECOVERY +/* .ad +/* .fi +/* This module automatically repeats failed requests after +/* recoverable errors, up to the limits specified with +/* slmdb_control(). +/* +/* Recoverable errors are reported through an optional +/* notification function specified with slmdb_control(). With +/* recoverable MDB_MAP_FULL and MDB_MAP_RESIZED errors, the +/* additional argument is a size_t value with the updated +/* current database size limit; with recoverable MDB_READERS_FULL +/* errors there is no additional argument. +/* BUGS +/* Recovery from MDB_MAP_FULL involves resizing the database +/* memory mapping. According to LMDB documentation this +/* requires that there is no concurrent activity in the same +/* database by other threads in the same memory address space. +/* SEE ALSO +/* lmdb(3) API manpage (currently, non-existent). +/* AUTHOR(S) +/* Howard Chu +/* Symas Corporation +/* +/* Wietse Venema +/* IBM T.J. Watson Research +/* P.O. Box 704 +/* Yorktown Heights, NY 10598, USA +/* +/* Wietse Venema +/* Google, Inc. +/* 111 8th Avenue +/* New York, NY 10011, USA +/*--*/ + + /* + * DO NOT include other Postfix-specific header files. This LMDB wrapper + * must be usable outside Postfix. + */ + +#ifdef HAS_LMDB + +/* System library. */ + +#include <sys/stat.h> +#include <errno.h> +#include <fcntl.h> +#include <string.h> +#include <unistd.h> +#include <limits.h> +#include <stdarg.h> +#include <string.h> +#include <stdlib.h> + +/* Application-specific. */ + +#include <slmdb.h> + + /* + * Minimum LMDB patchlevel. + * + * LMDB 0.9.11 allows Postfix daemons to log an LMDB error message instead of + * falling out of the sky without any explanation. Without such logging, + * Postfix with LMDB would be too hard to support. + * + * LMDB 0.9.10 fixes an information leak where LMDB wrote chunks of up to 4096 + * bytes of uninitialized heap memory to a database. This was a security + * violation because it made information persistent that was not meant to be + * persisted, or it was sharing information that was not meant to be shared. + * + * LMDB 0.9.9 allows Postfix to use external (fcntl()-based) locks, instead of + * having to use world-writable LMDB lock files. + * + * LMDB 0.9.8 allows Postfix to update the database size limit on-the-fly, so + * that it can recover from an MDB_MAP_FULL error without having to close + * the database. It also allows an application to "pick up" a new database + * size limit on-the-fly, so that it can recover from an MDB_MAP_RESIZED + * error without having to close the database. + * + * The database size limit that remains is imposed by the hardware memory + * address space (31 or 47 bits, typically) or file system. The LMDB + * implementation is supposed to handle databases larger than physical + * memory. However, this is not necessarily guaranteed for (bulk) + * transactions larger than physical memory. + */ +#if MDB_VERSION_FULL < MDB_VERINT(0, 9, 11) +#error "This Postfix version requires LMDB version 0.9.11 or later" +#endif + + /* + * Error recovery. + * + * The purpose of the slmdb(3) API is to hide LMDB quirks (recoverable + * MAP_FULL, MAP_RESIZED, or MDB_READERS_FULL errors). With these out of the + * way, applications can pretend that those quirks don't exist, and focus on + * their own job. + * + * - To recover from a single-transaction LMDB error, each wrapper function + * uses tail recursion instead of goto. Since LMDB errors are rare, code + * clarity is more important than speed. + * + * - To recover from a bulk-transaction LMDB error, the error-recovery code + * triggers a long jump back into the caller to some pre-arranged point (the + * closest thing that C has to exception handling). The application is then + * expected to repeat the bulk transaction from scratch. + * + * When any code aborts a bulk transaction, it must reset slmdb->txn to null + * to avoid a use-after-free problem in slmdb_close(). + */ + + /* + * Our default retry attempt limits. We allow a few retries per slmdb(3) API + * call for non-bulk transactions. We allow a number of bulk-transaction + * retries that is proportional to the memory address space. + */ +#define SLMDB_DEF_API_RETRY_LIMIT 30 /* Retries per slmdb(3) API call */ +#define SLMDB_DEF_BULK_RETRY_LIMIT \ + (2 * sizeof(size_t) * CHAR_BIT) /* Retries per bulk-mode transaction */ + + /* + * We increment the recursion counter each time we try to recover from + * error, and reset the recursion counter when returning to the application + * from the slmdb(3) API. + */ +#define SLMDB_API_RETURN(slmdb, status) do { \ + (slmdb)->api_retry_count = 0; \ + return (status); \ + } while (0) + + /* + * With MDB_NOLOCK, the application uses an external lock for inter-process + * synchronization. Because the caller may release the external lock after + * an SLMDB API call, each SLMDB API function must use a short-lived + * transaction unless the transaction is a bulk-mode transaction. + */ + +/* slmdb_cursor_close - close cursor and its read transaction */ + +static void slmdb_cursor_close(SLMDB *slmdb) +{ + MDB_txn *txn; + + /* + * Close the cursor and its read transaction. We can restore it later + * from the saved key information. + */ + txn = mdb_cursor_txn(slmdb->cursor); + mdb_cursor_close(slmdb->cursor); + slmdb->cursor = 0; + mdb_txn_abort(txn); +} + +/* slmdb_saved_key_init - initialize saved key info */ + +static void slmdb_saved_key_init(SLMDB *slmdb) +{ + slmdb->saved_key.mv_data = 0; + slmdb->saved_key.mv_size = 0; + slmdb->saved_key_size = 0; +} + +/* slmdb_saved_key_free - destroy saved key info */ + +static void slmdb_saved_key_free(SLMDB *slmdb) +{ + free(slmdb->saved_key.mv_data); + slmdb_saved_key_init(slmdb); +} + +#define HAVE_SLMDB_SAVED_KEY(s) ((s)->saved_key.mv_data != 0) + +/* slmdb_saved_key_assign - copy the saved key */ + +static int slmdb_saved_key_assign(SLMDB *slmdb, MDB_val *key_val) +{ + + /* + * Extend the buffer to fit the key, so that we can avoid malloc() + * overhead most of the time. + */ + if (slmdb->saved_key_size < key_val->mv_size) { + if (slmdb->saved_key.mv_data == 0) + slmdb->saved_key.mv_data = malloc(key_val->mv_size); + else + slmdb->saved_key.mv_data = + realloc(slmdb->saved_key.mv_data, key_val->mv_size); + if (slmdb->saved_key.mv_data == 0) { + slmdb_saved_key_init(slmdb); + return (ENOMEM); + } else { + slmdb->saved_key_size = key_val->mv_size; + } + } + + /* + * Copy the key under the cursor. + */ + memcpy(slmdb->saved_key.mv_data, key_val->mv_data, key_val->mv_size); + slmdb->saved_key.mv_size = key_val->mv_size; + return (0); +} + +/* slmdb_prepare - LMDB-specific (re)initialization before actual access */ + +static int slmdb_prepare(SLMDB *slmdb) +{ + int status = 0; + + /* + * This is called before accessing the database, or after recovery from + * an LMDB error. Note: this code cannot recover from errors itself. + * slmdb->txn is either the database open() transaction or a + * freshly-created bulk-mode transaction. When slmdb_prepare() commits or + * aborts commits a transaction, it must set slmdb->txn to null to avoid + * a use-after-free error in slmdb_close(). + * + * - With O_TRUNC we make a "drop" request before updating the database. + * + * - With a bulk-mode transaction we commit when the database is closed. + */ + if (slmdb->open_flags & O_TRUNC) { + if ((status = mdb_drop(slmdb->txn, slmdb->dbi, 0)) != 0) { + mdb_txn_abort(slmdb->txn); + slmdb->txn = 0; + return (status); + } + if ((slmdb->slmdb_flags & SLMDB_FLAG_BULK) == 0) { + status = mdb_txn_commit(slmdb->txn); + slmdb->txn = 0; + if (status != 0) + return (status); + } + } else if ((slmdb->slmdb_flags & SLMDB_FLAG_BULK) == 0) { + mdb_txn_abort(slmdb->txn); + slmdb->txn = 0; + } + slmdb->api_retry_count = 0; + return (status); +} + +/* slmdb_recover - recover from LMDB errors */ + +static int slmdb_recover(SLMDB *slmdb, int status) +{ + MDB_envinfo info; + int original_status = status; + + /* + * This may be needed in non-MDB_NOLOCK mode. Recovery is rare enough + * that we don't care about a few wasted cycles. + */ + if (slmdb->cursor != 0) + slmdb_cursor_close(slmdb); + + /* + * Limit the number of recovery attempts per slmdb(3) API request. + */ + if ((slmdb->api_retry_count += 1) >= slmdb->api_retry_limit) + return (status); + + /* + * Limit the number of bulk transaction recovery attempts. + */ + if ((slmdb->slmdb_flags & SLMDB_FLAG_BULK) != 0 + && (slmdb->bulk_retry_count += 1) > slmdb->bulk_retry_limit) + return (status); + + /* + * Try to clear the error condition. + */ + switch (status) { + + /* + * As of LMDB 0.9.8 when a non-bulk update runs into a "map full" + * error, we can resize the environment's memory map and clear the + * error condition. The caller should retry immediately. + */ + case MDB_MAP_FULL: + /* Can we increase the memory map? Give up if we can't. */ + if (slmdb->curr_limit < slmdb->hard_limit / slmdb->size_incr) { + slmdb->curr_limit = slmdb->curr_limit * slmdb->size_incr; + } else if (slmdb->curr_limit < slmdb->hard_limit) { + slmdb->curr_limit = slmdb->hard_limit; + } else { + /* Sorry, we are already maxed out. */ + break; + } + if (slmdb->notify_fn) + slmdb->notify_fn(slmdb->cb_context, MDB_MAP_FULL, + slmdb->curr_limit); + status = mdb_env_set_mapsize(slmdb->env, slmdb->curr_limit); + break; + + /* + * When a writer resizes the database, read-only applications must + * increase their LMDB memory map size limit, too. Otherwise, they + * won't be able to read a table after it grows. + * + * As of LMDB 0.9.8 we can import the new memory map size limit into the + * database environment by calling mdb_env_set_mapsize() with a zero + * size argument. Then we extract the map size limit for later use. + * The caller should retry immediately. + */ + case MDB_MAP_RESIZED: + if ((status = mdb_env_set_mapsize(slmdb->env, 0)) == 0) { + /* Do not panic. Maps may shrink after bulk update. */ + mdb_env_info(slmdb->env, &info); + slmdb->curr_limit = info.me_mapsize; + if (slmdb->notify_fn) + slmdb->notify_fn(slmdb->cb_context, MDB_MAP_RESIZED, + slmdb->curr_limit); + } + break; + + /* + * What is it with these built-in hard limits that cause systems to + * stop when demand is at its highest? When the system is under + * stress it should slow down and keep making progress. + */ + case MDB_READERS_FULL: + if (slmdb->notify_fn) + slmdb->notify_fn(slmdb->cb_context, MDB_READERS_FULL); + sleep(1); + status = 0; + break; + + /* + * We can't solve this problem. The application should terminate with + * a fatal run-time error and the program should be re-run later. + */ + default: + break; + } + + /* + * If we cleared the error condition for a non-bulk transaction, return a + * success status. The caller should retry the failed operation + * immediately. + */ + if (status == 0 && (slmdb->slmdb_flags & SLMDB_FLAG_BULK) != 0) { + + /* + * We cleared the error condition for a bulk transaction. If the + * transaction is not restartable, return the original error. The + * caller should terminate with a fatal run-time error, and the + * program should be re-run later. + */ + if (slmdb->longjmp_fn == 0) + return (original_status); + + /* + * Rebuild a bulk transaction from scratch, by making a long jump + * back into the caller at some pre-arranged point. In MDB_NOLOCK + * mode, there is no need to upgrade a lock to "exclusive", because a + * failed write transaction has no side effects. + */ + if ((status = mdb_txn_begin(slmdb->env, (MDB_txn *) 0, + slmdb->lmdb_flags & MDB_RDONLY, + &slmdb->txn)) == 0 + && (status = slmdb_prepare(slmdb)) == 0) + slmdb->longjmp_fn(slmdb->cb_context, 1); + } + return (status); +} + +/* slmdb_txn_begin - mdb_txn_begin() wrapper with LMDB error recovery */ + +static int slmdb_txn_begin(SLMDB *slmdb, int rdonly, MDB_txn **txn) +{ + int status; + + if ((status = mdb_txn_begin(slmdb->env, (MDB_txn *) 0, rdonly, txn)) != 0 + && (status = slmdb_recover(slmdb, status)) == 0) + status = slmdb_txn_begin(slmdb, rdonly, txn); + + return (status); +} + +/* slmdb_get - mdb_get() wrapper with LMDB error recovery */ + +int slmdb_get(SLMDB *slmdb, MDB_val *mdb_key, MDB_val *mdb_value) +{ + MDB_txn *txn; + int status; + + /* + * Start a read transaction if there's no bulk-mode txn. + */ + if (slmdb->txn) + txn = slmdb->txn; + else if ((status = slmdb_txn_begin(slmdb, MDB_RDONLY, &txn)) != 0) + SLMDB_API_RETURN(slmdb, status); + + /* + * Do the lookup. + */ + if ((status = mdb_get(txn, slmdb->dbi, mdb_key, mdb_value)) != 0 + && status != MDB_NOTFOUND) { + mdb_txn_abort(txn); + if (txn == slmdb->txn) + slmdb->txn = 0; + if ((status = slmdb_recover(slmdb, status)) == 0) + status = slmdb_get(slmdb, mdb_key, mdb_value); + SLMDB_API_RETURN(slmdb, status); + } + + /* + * Close the read txn if it's not the bulk-mode txn. + */ + if (slmdb->txn == 0) + mdb_txn_abort(txn); + + SLMDB_API_RETURN(slmdb, status); +} + +/* slmdb_put - mdb_put() wrapper with LMDB error recovery */ + +int slmdb_put(SLMDB *slmdb, MDB_val *mdb_key, + MDB_val *mdb_value, int flags) +{ + MDB_txn *txn; + int status; + + /* + * Start a write transaction if there's no bulk-mode txn. + */ + if (slmdb->txn) + txn = slmdb->txn; + else if ((status = slmdb_txn_begin(slmdb, 0, &txn)) != 0) + SLMDB_API_RETURN(slmdb, status); + + /* + * Do the update. + */ + if ((status = mdb_put(txn, slmdb->dbi, mdb_key, mdb_value, flags)) != 0) { + if (status != MDB_KEYEXIST) { + mdb_txn_abort(txn); + if (txn == slmdb->txn) + slmdb->txn = 0; + if ((status = slmdb_recover(slmdb, status)) == 0) + status = slmdb_put(slmdb, mdb_key, mdb_value, flags); + SLMDB_API_RETURN(slmdb, status); + } else { + /* Abort non-bulk transaction only. */ + if (slmdb->txn == 0) + mdb_txn_abort(txn); + } + } + + /* + * Commit the transaction if it's not the bulk-mode txn. + */ + if (status == 0 && slmdb->txn == 0 && (status = mdb_txn_commit(txn)) != 0 + && (status = slmdb_recover(slmdb, status)) == 0) + status = slmdb_put(slmdb, mdb_key, mdb_value, flags); + + SLMDB_API_RETURN(slmdb, status); +} + +/* slmdb_del - mdb_del() wrapper with LMDB error recovery */ + +int slmdb_del(SLMDB *slmdb, MDB_val *mdb_key) +{ + MDB_txn *txn; + int status; + + /* + * Start a write transaction if there's no bulk-mode txn. + */ + if (slmdb->txn) + txn = slmdb->txn; + else if ((status = slmdb_txn_begin(slmdb, 0, &txn)) != 0) + SLMDB_API_RETURN(slmdb, status); + + /* + * Do the update. + */ + if ((status = mdb_del(txn, slmdb->dbi, mdb_key, (MDB_val *) 0)) != 0) { + if (status != MDB_NOTFOUND) { + mdb_txn_abort(txn); + if (txn == slmdb->txn) + slmdb->txn = 0; + if ((status = slmdb_recover(slmdb, status)) == 0) + status = slmdb_del(slmdb, mdb_key); + SLMDB_API_RETURN(slmdb, status); + } else { + /* Abort non-bulk transaction only. */ + if (slmdb->txn == 0) + mdb_txn_abort(txn); + } + } + + /* + * Commit the transaction if it's not the bulk-mode txn. + */ + if (status == 0 && slmdb->txn == 0 && (status = mdb_txn_commit(txn)) != 0 + && (status = slmdb_recover(slmdb, status)) == 0) + status = slmdb_del(slmdb, mdb_key); + + SLMDB_API_RETURN(slmdb, status); +} + +/* slmdb_cursor_get - mdb_cursor_get() wrapper with LMDB error recovery */ + +int slmdb_cursor_get(SLMDB *slmdb, MDB_val *mdb_key, + MDB_val *mdb_value, MDB_cursor_op op) +{ + MDB_txn *txn; + int status = 0; + + /* + * TODO: figure how we would recover a failing bulk transaction. + */ + if ((slmdb->slmdb_flags & SLMDB_FLAG_BULK) != 0) { + if (slmdb->assert_fn) + slmdb->assert_fn(slmdb->cb_context, + "slmdb_cursor_get: bulk transaction is not supported"); + return (MDB_PANIC); + } + + /* + * Open a read transaction and cursor if needed. + */ + if (slmdb->cursor == 0) { + if ((status = slmdb_txn_begin(slmdb, MDB_RDONLY, &txn)) != 0) + SLMDB_API_RETURN(slmdb, status); + if ((status = mdb_cursor_open(txn, slmdb->dbi, &slmdb->cursor)) != 0) { + mdb_txn_abort(txn); + if ((status = slmdb_recover(slmdb, status)) == 0) + status = slmdb_cursor_get(slmdb, mdb_key, mdb_value, op); + SLMDB_API_RETURN(slmdb, status); + } + + /* + * Restore the cursor position from the saved key information. + */ + if (HAVE_SLMDB_SAVED_KEY(slmdb) && op != MDB_FIRST) + status = mdb_cursor_get(slmdb->cursor, &slmdb->saved_key, + (MDB_val *) 0, MDB_SET); + } + + /* + * Database lookup. + */ + if (status == 0) + status = mdb_cursor_get(slmdb->cursor, mdb_key, mdb_value, op); + + /* + * Save the cursor position if successful. This can fail only with + * ENOMEM. + * + * Close the cursor read transaction if in MDB_NOLOCK mode, because the + * caller may release the external lock after we return. + */ + if (status == 0) { + status = slmdb_saved_key_assign(slmdb, mdb_key); + if (slmdb->lmdb_flags & MDB_NOLOCK) + slmdb_cursor_close(slmdb); + } + + /* + * Handle end-of-database or other error. + */ + else { + /* Do not hand-optimize out the slmdb_cursor_close() calls below. */ + if (status == MDB_NOTFOUND) { + slmdb_cursor_close(slmdb); + if (HAVE_SLMDB_SAVED_KEY(slmdb)) + slmdb_saved_key_free(slmdb); + } else { + slmdb_cursor_close(slmdb); + if ((status = slmdb_recover(slmdb, status)) == 0) + status = slmdb_cursor_get(slmdb, mdb_key, mdb_value, op); + SLMDB_API_RETURN(slmdb, status); + /* Do not hand-optimize out the above return statement. */ + } + } + SLMDB_API_RETURN(slmdb, status); +} + +/* slmdb_assert_cb - report LMDB assertion failure */ + +static void slmdb_assert_cb(MDB_env *env, const char *text) +{ + SLMDB *slmdb = (SLMDB *) mdb_env_get_userctx(env); + + if (slmdb->assert_fn) + slmdb->assert_fn(slmdb->cb_context, text); +} + +/* slmdb_control - control optional settings */ + +int slmdb_control(SLMDB *slmdb, int first,...) +{ + va_list ap; + int status = 0; + int reqno; + int rc; + + va_start(ap, first); + for (reqno = first; status == 0 && reqno != SLMDB_CTL_END; reqno = va_arg(ap, int)) { + switch (reqno) { + case SLMDB_CTL_LONGJMP_FN: + slmdb->longjmp_fn = va_arg(ap, SLMDB_LONGJMP_FN); + break; + case SLMDB_CTL_NOTIFY_FN: + slmdb->notify_fn = va_arg(ap, SLMDB_NOTIFY_FN); + break; + case SLMDB_CTL_ASSERT_FN: + slmdb->assert_fn = va_arg(ap, SLMDB_ASSERT_FN); + if ((rc = mdb_env_set_userctx(slmdb->env, (void *) slmdb)) != 0 + || (rc = mdb_env_set_assert(slmdb->env, slmdb_assert_cb)) != 0) + status = rc; + break; + case SLMDB_CTL_CB_CONTEXT: + slmdb->cb_context = va_arg(ap, void *); + break; + case SLMDB_CTL_API_RETRY_LIMIT: + slmdb->api_retry_limit = va_arg(ap, int); + break; + case SLMDB_CTL_BULK_RETRY_LIMIT: + slmdb->bulk_retry_limit = va_arg(ap, int); + break; + default: + status = errno = EINVAL; + break; + } + } + va_end(ap); + return (status); +} + +/* slmdb_close - wrapper with LMDB error recovery */ + +int slmdb_close(SLMDB *slmdb) +{ + int status = 0; + + /* + * Finish an open bulk transaction. If slmdb_recover() returns after a + * bulk-transaction error, then it was unable to clear the error + * condition, or unable to restart the bulk transaction. + */ + if ((slmdb->slmdb_flags & SLMDB_FLAG_BULK) != 0 && slmdb->txn != 0 + && (status = mdb_txn_commit(slmdb->txn)) != 0) + status = slmdb_recover(slmdb, status); + + /* + * Clean up after an unfinished sequence() operation. + */ + if (slmdb->cursor != 0) + slmdb_cursor_close(slmdb); + + mdb_env_close(slmdb->env); + + /* + * Clean up the saved key information. + */ + if (HAVE_SLMDB_SAVED_KEY(slmdb)) + slmdb_saved_key_free(slmdb); + + SLMDB_API_RETURN(slmdb, status); +} + +/* slmdb_init - mandatory initialization */ + +int slmdb_init(SLMDB *slmdb, size_t curr_limit, int size_incr, + size_t hard_limit) +{ + + /* + * This is a separate operation to keep the slmdb_open() API simple. + * Don't allocate resources here. Just store control information, + */ + slmdb->curr_limit = curr_limit; + slmdb->size_incr = size_incr; + slmdb->hard_limit = hard_limit; + + return (MDB_SUCCESS); +} + +/* slmdb_open - open wrapped LMDB database */ + +int slmdb_open(SLMDB *slmdb, const char *path, int open_flags, + int lmdb_flags, int slmdb_flags) +{ + struct stat st; + MDB_env *env; + MDB_txn *txn; + MDB_dbi dbi; + int db_fd; + int status; + + /* + * Create LMDB environment. + */ + if ((status = mdb_env_create(&env)) != 0) + return (status); + + /* + * Make sure that the memory map has room to store and commit an initial + * "drop" transaction as well as fixed database metadata. We have no way + * to recover from errors before the first application-level I/O request. + */ +#define SLMDB_FUDGE 10240 + + if (slmdb->curr_limit < SLMDB_FUDGE) + slmdb->curr_limit = SLMDB_FUDGE; + if (stat(path, &st) == 0 + && st.st_size > slmdb->curr_limit - SLMDB_FUDGE) { + if (st.st_size > slmdb->hard_limit) + slmdb->hard_limit = st.st_size; + if (st.st_size < slmdb->hard_limit - SLMDB_FUDGE) + slmdb->curr_limit = st.st_size + SLMDB_FUDGE; + else + slmdb->curr_limit = slmdb->hard_limit; + } + + /* + * mdb_open() requires a txn, but since the default DB always exists in + * an LMDB environment, we usually don't need to do anything else with + * the txn. It is currently used for truncate and for bulk transactions. + */ + if ((status = mdb_env_set_mapsize(env, slmdb->curr_limit)) != 0 + || (status = mdb_env_open(env, path, lmdb_flags, 0644)) != 0 + || (status = mdb_txn_begin(env, (MDB_txn *) 0, + lmdb_flags & MDB_RDONLY, &txn)) != 0 + || (status = mdb_open(txn, (const char *) 0, 0, &dbi)) != 0 + || (status = mdb_env_get_fd(env, &db_fd)) != 0) { + mdb_env_close(env); + return (status); + } + + /* + * Bundle up. + */ + slmdb->open_flags = open_flags; + slmdb->lmdb_flags = lmdb_flags; + slmdb->slmdb_flags = slmdb_flags; + slmdb->env = env; + slmdb->dbi = dbi; + slmdb->db_fd = db_fd; + slmdb->cursor = 0; + slmdb_saved_key_init(slmdb); + slmdb->api_retry_count = 0; + slmdb->bulk_retry_count = 0; + slmdb->api_retry_limit = SLMDB_DEF_API_RETRY_LIMIT; + slmdb->bulk_retry_limit = SLMDB_DEF_BULK_RETRY_LIMIT; + slmdb->longjmp_fn = 0; + slmdb->notify_fn = 0; + slmdb->assert_fn = 0; + slmdb->cb_context = 0; + slmdb->txn = txn; + + if ((status = slmdb_prepare(slmdb)) != 0) + mdb_env_close(env); + + return (status); +} + +#endif |