summaryrefslogtreecommitdiffstats
path: root/ext/lsm1/lsmInt.h
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--ext/lsm1/lsmInt.h993
1 files changed, 993 insertions, 0 deletions
diff --git a/ext/lsm1/lsmInt.h b/ext/lsm1/lsmInt.h
new file mode 100644
index 0000000..0f822e4
--- /dev/null
+++ b/ext/lsm1/lsmInt.h
@@ -0,0 +1,993 @@
+/*
+** 2011-08-18
+**
+** The author disclaims copyright to this source code. In place of
+** a legal notice, here is a blessing:
+**
+** May you do good and not evil.
+** May you find forgiveness for yourself and forgive others.
+** May you share freely, never taking more than you give.
+**
+*************************************************************************
+** Internal structure definitions for the LSM module.
+*/
+#ifndef _LSM_INT_H
+#define _LSM_INT_H
+
+#include "lsm.h"
+#include <assert.h>
+#include <string.h>
+
+#include <stdarg.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <ctype.h>
+
+#ifdef _WIN32
+# ifdef _MSC_VER
+# define snprintf _snprintf
+# endif
+#else
+# include <unistd.h>
+#endif
+
+#ifdef NDEBUG
+# ifdef LSM_DEBUG_EXPENSIVE
+# undef LSM_DEBUG_EXPENSIVE
+# endif
+# ifdef LSM_DEBUG
+# undef LSM_DEBUG
+# endif
+#else
+# ifndef LSM_DEBUG
+# define LSM_DEBUG
+# endif
+#endif
+
+/*
+** Default values for various data structure parameters. These may be
+** overridden by calls to lsm_config().
+*/
+#define LSM_DFLT_PAGE_SIZE (4 * 1024)
+#define LSM_DFLT_BLOCK_SIZE (1 * 1024 * 1024)
+#define LSM_DFLT_AUTOFLUSH (1 * 1024 * 1024)
+#define LSM_DFLT_AUTOCHECKPOINT (i64)(2 * 1024 * 1024)
+#define LSM_DFLT_AUTOWORK 1
+#define LSM_DFLT_LOG_SIZE (128*1024)
+#define LSM_DFLT_AUTOMERGE 4
+#define LSM_DFLT_SAFETY LSM_SAFETY_NORMAL
+#define LSM_DFLT_MMAP (LSM_IS_64_BIT ? 1 : 32768)
+#define LSM_DFLT_MULTIPLE_PROCESSES 1
+#define LSM_DFLT_USE_LOG 1
+
+/* Initial values for log file checksums. These are only used if the
+** database file does not contain a valid checkpoint. */
+#define LSM_CKSUM0_INIT 42
+#define LSM_CKSUM1_INIT 42
+
+/* "mmap" mode is currently only used in environments with 64-bit address
+** spaces. The following macro is used to test for this. */
+#define LSM_IS_64_BIT (sizeof(void*)==8)
+
+#define LSM_AUTOWORK_QUANT 32
+
+typedef struct Database Database;
+typedef struct DbLog DbLog;
+typedef struct FileSystem FileSystem;
+typedef struct Freelist Freelist;
+typedef struct FreelistEntry FreelistEntry;
+typedef struct Level Level;
+typedef struct LogMark LogMark;
+typedef struct LogRegion LogRegion;
+typedef struct LogWriter LogWriter;
+typedef struct LsmString LsmString;
+typedef struct Mempool Mempool;
+typedef struct Merge Merge;
+typedef struct MergeInput MergeInput;
+typedef struct MetaPage MetaPage;
+typedef struct MultiCursor MultiCursor;
+typedef struct Page Page;
+typedef struct Redirect Redirect;
+typedef struct Segment Segment;
+typedef struct SegmentMerger SegmentMerger;
+typedef struct ShmChunk ShmChunk;
+typedef struct ShmHeader ShmHeader;
+typedef struct ShmReader ShmReader;
+typedef struct Snapshot Snapshot;
+typedef struct TransMark TransMark;
+typedef struct Tree Tree;
+typedef struct TreeCursor TreeCursor;
+typedef struct TreeHeader TreeHeader;
+typedef struct TreeMark TreeMark;
+typedef struct TreeRoot TreeRoot;
+
+#ifndef _SQLITEINT_H_
+typedef unsigned char u8;
+typedef unsigned short int u16;
+typedef unsigned int u32;
+typedef lsm_i64 i64;
+typedef unsigned long long int u64;
+#endif
+
+/* A page number is a 64-bit integer. */
+typedef i64 LsmPgno;
+
+#ifdef LSM_DEBUG
+int lsmErrorBkpt(int);
+#else
+# define lsmErrorBkpt(x) (x)
+#endif
+
+#define LSM_PROTOCOL_BKPT lsmErrorBkpt(LSM_PROTOCOL)
+#define LSM_IOERR_BKPT lsmErrorBkpt(LSM_IOERR)
+#define LSM_NOMEM_BKPT lsmErrorBkpt(LSM_NOMEM)
+#define LSM_CORRUPT_BKPT lsmErrorBkpt(LSM_CORRUPT)
+#define LSM_MISUSE_BKPT lsmErrorBkpt(LSM_MISUSE)
+
+#define unused_parameter(x) (void)(x)
+#define array_size(x) (sizeof(x)/sizeof(x[0]))
+
+
+/* The size of each shared-memory chunk */
+#define LSM_SHM_CHUNK_SIZE (32*1024)
+
+/* The number of bytes reserved at the start of each shm chunk for MM. */
+#define LSM_SHM_CHUNK_HDR (sizeof(ShmChunk))
+
+/* The number of available read locks. */
+#define LSM_LOCK_NREADER 6
+
+/* The number of available read-write client locks. */
+#define LSM_LOCK_NRWCLIENT 16
+
+/* Lock definitions.
+*/
+#define LSM_LOCK_DMS1 1 /* Serialize connect/disconnect ops */
+#define LSM_LOCK_DMS2 2 /* Read-write connections */
+#define LSM_LOCK_DMS3 3 /* Read-only connections */
+#define LSM_LOCK_WRITER 4
+#define LSM_LOCK_WORKER 5
+#define LSM_LOCK_CHECKPOINTER 6
+#define LSM_LOCK_ROTRANS 7
+#define LSM_LOCK_READER(i) ((i) + LSM_LOCK_ROTRANS + 1)
+#define LSM_LOCK_RWCLIENT(i) ((i) + LSM_LOCK_READER(LSM_LOCK_NREADER))
+
+#define LSM_N_LOCK LSM_LOCK_RWCLIENT(LSM_LOCK_NRWCLIENT)
+
+/*
+** Meta-page size and usable size.
+*/
+#define LSM_META_PAGE_SIZE 4096
+
+#define LSM_META_RW_PAGE_SIZE (LSM_META_PAGE_SIZE - LSM_N_LOCK)
+
+/*
+** Hard limit on the number of free-list entries that may be stored in
+** a checkpoint (the remainder are stored as a system record in the LSM).
+** See also LSM_CONFIG_MAX_FREELIST.
+*/
+#define LSM_MAX_FREELIST_ENTRIES 24
+
+#define LSM_MAX_BLOCK_REDIRECTS 16
+
+#define LSM_ATTEMPTS_BEFORE_PROTOCOL 10000
+
+
+/*
+** Each entry stored in the LSM (or in-memory tree structure) has an
+** associated mask of the following flags.
+*/
+#define LSM_START_DELETE 0x01 /* Start of open-ended delete range */
+#define LSM_END_DELETE 0x02 /* End of open-ended delete range */
+#define LSM_POINT_DELETE 0x04 /* Delete this key */
+#define LSM_INSERT 0x08 /* Insert this key and value */
+#define LSM_SEPARATOR 0x10 /* True if entry is separator key only */
+#define LSM_SYSTEMKEY 0x20 /* True if entry is a system key (FREELIST) */
+
+#define LSM_CONTIGUOUS 0x40 /* Used in lsm_tree.c */
+
+/*
+** A string that can grow by appending.
+*/
+struct LsmString {
+ lsm_env *pEnv; /* Run-time environment */
+ int n; /* Size of string. -1 indicates error */
+ int nAlloc; /* Space allocated for z[] */
+ char *z; /* The string content */
+};
+
+typedef struct LsmFile LsmFile;
+struct LsmFile {
+ lsm_file *pFile;
+ LsmFile *pNext;
+};
+
+/*
+** An instance of the following type is used to store an ordered list of
+** u32 values.
+**
+** Note: This is a place-holder implementation. It should be replaced by
+** a version that avoids making a single large allocation when the array
+** contains a large number of values. For this reason, the internals of
+** this object should only manipulated by the intArrayXXX() functions in
+** lsm_tree.c.
+*/
+typedef struct IntArray IntArray;
+struct IntArray {
+ int nAlloc;
+ int nArray;
+ u32 *aArray;
+};
+
+struct Redirect {
+ int n; /* Number of redirects */
+ struct RedirectEntry {
+ int iFrom;
+ int iTo;
+ } *a;
+};
+
+/*
+** An instance of this structure represents a point in the history of the
+** tree structure to roll back to. Refer to comments in lsm_tree.c for
+** details.
+*/
+struct TreeMark {
+ u32 iRoot; /* Offset of root node in shm file */
+ u32 nHeight; /* Current height of tree structure */
+ u32 iWrite; /* Write offset in shm file */
+ u32 nChunk; /* Number of chunks in shared-memory file */
+ u32 iFirst; /* First chunk in linked list */
+ u32 iNextShmid; /* Next id to allocate */
+ int iRollback; /* Index in lsm->rollback to revert to */
+};
+
+/*
+** An instance of this structure represents a point in the database log.
+*/
+struct LogMark {
+ i64 iOff; /* Offset into log (see lsm_log.c) */
+ int nBuf; /* Size of in-memory buffer here */
+ u8 aBuf[8]; /* Bytes of content in aBuf[] */
+ u32 cksum0; /* Checksum 0 at offset (iOff-nBuf) */
+ u32 cksum1; /* Checksum 1 at offset (iOff-nBuf) */
+};
+
+struct TransMark {
+ TreeMark tree;
+ LogMark log;
+};
+
+/*
+** A structure that defines the start and end offsets of a region in the
+** log file. The size of the region in bytes is (iEnd - iStart), so if
+** iEnd==iStart the region is zero bytes in size.
+*/
+struct LogRegion {
+ i64 iStart; /* Start of region in log file */
+ i64 iEnd; /* End of region in log file */
+};
+
+struct DbLog {
+ u32 cksum0; /* Checksum 0 at offset iOff */
+ u32 cksum1; /* Checksum 1 at offset iOff */
+ i64 iSnapshotId; /* Log space has been reclaimed to this ss */
+ LogRegion aRegion[3]; /* Log file regions (see docs in lsm_log.c) */
+};
+
+struct TreeRoot {
+ u32 iRoot;
+ u32 nHeight;
+ u32 nByte; /* Total size of this tree in bytes */
+ u32 iTransId;
+};
+
+/*
+** Tree header structure.
+*/
+struct TreeHeader {
+ u32 iUsedShmid; /* Id of first shm chunk used by this tree */
+ u32 iNextShmid; /* Shm-id of next chunk allocated */
+ u32 iFirst; /* Chunk number of smallest shm-id */
+ u32 nChunk; /* Number of chunks in shared-memory file */
+ TreeRoot root; /* Root and height of current tree */
+ u32 iWrite; /* Write offset in shm file */
+ TreeRoot oldroot; /* Root and height of the previous tree */
+ u32 iOldShmid; /* Last shm-id used by previous tree */
+ u32 iUsrVersion; /* get/set_user_version() value */
+ i64 iOldLog; /* Log offset associated with old tree */
+ u32 oldcksum0;
+ u32 oldcksum1;
+ DbLog log; /* Current layout of log file */
+ u32 aCksum[2]; /* Checksums 1 and 2. */
+};
+
+/*
+** Database handle structure.
+**
+** mLock:
+** A bitmask representing the locks currently held by the connection.
+** An LSM database supports N distinct locks, where N is some number less
+** than or equal to 32. Locks are numbered starting from 1 (see the
+** definitions for LSM_LOCK_WRITER and co.).
+**
+** The least significant 32-bits in mLock represent EXCLUSIVE locks. The
+** most significant are SHARED locks. So, if a connection holds a SHARED
+** lock on lock region iLock, then the following is true:
+**
+** (mLock & ((iLock+32-1) << 1))
+**
+** Or for an EXCLUSIVE lock:
+**
+** (mLock & ((iLock-1) << 1))
+**
+** pCsr:
+** Points to the head of a linked list that contains all currently open
+** cursors. Once this list becomes empty, the user has no outstanding
+** cursors and the database handle can be successfully closed.
+**
+** pCsrCache:
+** This list contains cursor objects that have been closed using
+** lsm_csr_close(). Each time a cursor is closed, it is shifted from
+** the pCsr list to this list. When a new cursor is opened, this list
+** is inspected to see if there exists a cursor object that can be
+** reused. This is an optimization only.
+*/
+struct lsm_db {
+
+ /* Database handle configuration */
+ lsm_env *pEnv; /* runtime environment */
+ int (*xCmp)(void *, int, void *, int); /* Compare function */
+
+ /* Values configured by calls to lsm_config */
+ int eSafety; /* LSM_SAFETY_OFF, NORMAL or FULL */
+ int bAutowork; /* Configured by LSM_CONFIG_AUTOWORK */
+ int nTreeLimit; /* Configured by LSM_CONFIG_AUTOFLUSH */
+ int nMerge; /* Configured by LSM_CONFIG_AUTOMERGE */
+ int bUseLog; /* Configured by LSM_CONFIG_USE_LOG */
+ int nDfltPgsz; /* Configured by LSM_CONFIG_PAGE_SIZE */
+ int nDfltBlksz; /* Configured by LSM_CONFIG_BLOCK_SIZE */
+ int nMaxFreelist; /* Configured by LSM_CONFIG_MAX_FREELIST */
+ int iMmap; /* Configured by LSM_CONFIG_MMAP */
+ i64 nAutockpt; /* Configured by LSM_CONFIG_AUTOCHECKPOINT */
+ int bMultiProc; /* Configured by L_C_MULTIPLE_PROCESSES */
+ int bReadonly; /* Configured by LSM_CONFIG_READONLY */
+ lsm_compress compress; /* Compression callbacks */
+ lsm_compress_factory factory; /* Compression callback factory */
+
+ /* Sub-system handles */
+ FileSystem *pFS; /* On-disk portion of database */
+ Database *pDatabase; /* Database shared data */
+
+ int iRwclient; /* Read-write client lock held (-1 == none) */
+
+ /* Client transaction context */
+ Snapshot *pClient; /* Client snapshot */
+ int iReader; /* Read lock held (-1 == unlocked) */
+ int bRoTrans; /* True if a read-only db trans is open */
+ MultiCursor *pCsr; /* List of all open cursors */
+ LogWriter *pLogWriter; /* Context for writing to the log file */
+ int nTransOpen; /* Number of opened write transactions */
+ int nTransAlloc; /* Allocated size of aTrans[] array */
+ TransMark *aTrans; /* Array of marks for transaction rollback */
+ IntArray rollback; /* List of tree-nodes to roll back */
+ int bDiscardOld; /* True if lsmTreeDiscardOld() was called */
+
+ MultiCursor *pCsrCache; /* List of all closed cursors */
+
+ /* Worker context */
+ Snapshot *pWorker; /* Worker snapshot (or NULL) */
+ Freelist *pFreelist; /* See sortedNewToplevel() */
+ int bUseFreelist; /* True to use pFreelist */
+ int bIncrMerge; /* True if currently doing a merge */
+
+ int bInFactory; /* True if within factory.xFactory() */
+
+ /* Debugging message callback */
+ void (*xLog)(void *, int, const char *);
+ void *pLogCtx;
+
+ /* Work done notification callback */
+ void (*xWork)(lsm_db *, void *);
+ void *pWorkCtx;
+
+ u64 mLock; /* Mask of current locks. See lsmShmLock(). */
+ lsm_db *pNext; /* Next connection to same database */
+
+ int nShm; /* Size of apShm[] array */
+ void **apShm; /* Shared memory chunks */
+ ShmHeader *pShmhdr; /* Live shared-memory header */
+ TreeHeader treehdr; /* Local copy of tree-header */
+ u32 aSnapshot[LSM_META_PAGE_SIZE / sizeof(u32)];
+};
+
+struct Segment {
+ LsmPgno iFirst; /* First page of this run */
+ LsmPgno iLastPg; /* Last page of this run */
+ LsmPgno iRoot; /* Root page number (if any) */
+ int nSize; /* Size of this run in pages */
+
+ Redirect *pRedirect; /* Block redirects (or NULL) */
+};
+
+/*
+** iSplitTopic/pSplitKey/nSplitKey:
+** If nRight>0, this buffer contains a copy of the largest key that has
+** already been written to the left-hand-side of the level.
+*/
+struct Level {
+ Segment lhs; /* Left-hand (main) segment */
+ int nRight; /* Size of apRight[] array */
+ Segment *aRhs; /* Old segments being merged into this */
+ int iSplitTopic; /* Split key topic (if nRight>0) */
+ void *pSplitKey; /* Pointer to split-key (if nRight>0) */
+ int nSplitKey; /* Number of bytes in split-key */
+
+ u16 iAge; /* Number of times data has been written */
+ u16 flags; /* Mask of LEVEL_XXX bits */
+ Merge *pMerge; /* Merge operation currently underway */
+ Level *pNext; /* Next level in tree */
+};
+
+/*
+** The Level.flags field is set to a combination of the following bits.
+**
+** LEVEL_FREELIST_ONLY:
+** Set if the level consists entirely of free-list entries.
+**
+** LEVEL_INCOMPLETE:
+** This is set while a new toplevel level is being constructed. It is
+** never set for any level other than a new toplevel.
+*/
+#define LEVEL_FREELIST_ONLY 0x0001
+#define LEVEL_INCOMPLETE 0x0002
+
+
+/*
+** A structure describing an ongoing merge. There is an instance of this
+** structure for every Level currently undergoing a merge in the worker
+** snapshot.
+**
+** It is assumed that code that uses an instance of this structure has
+** access to the associated Level struct.
+**
+** iOutputOff:
+** The byte offset to write to next within the last page of the
+** output segment.
+*/
+struct MergeInput {
+ LsmPgno iPg; /* Page on which next input is stored */
+ int iCell; /* Cell containing next input to merge */
+};
+struct Merge {
+ int nInput; /* Number of input runs being merged */
+ MergeInput *aInput; /* Array nInput entries in size */
+ MergeInput splitkey; /* Location in file of current splitkey */
+ int nSkip; /* Number of separators entries to skip */
+ int iOutputOff; /* Write offset on output page */
+ LsmPgno iCurrentPtr; /* Current pointer value */
+};
+
+/*
+** The first argument to this macro is a pointer to a Segment structure.
+** Returns true if the structure instance indicates that the separators
+** array is valid.
+*/
+#define segmentHasSeparators(pSegment) ((pSegment)->sep.iFirst>0)
+
+/*
+** The values that accompany the lock held by a database reader.
+*/
+struct ShmReader {
+ u32 iTreeId;
+ i64 iLsmId;
+};
+
+/*
+** An instance of this structure is stored in the first shared-memory
+** page. The shared-memory header.
+**
+** bWriter:
+** Immediately after opening a write transaction taking the WRITER lock,
+** each writer client sets this flag. It is cleared right before the
+** WRITER lock is relinquished. If a subsequent writer finds that this
+** flag is already set when a write transaction is opened, this indicates
+** that a previous writer failed mid-transaction.
+**
+** iMetaPage:
+** If the database file does not contain a valid, synced, checkpoint, this
+** value is set to 0. Otherwise, it is set to the meta-page number that
+** contains the most recently written checkpoint (either 1 or 2).
+**
+** hdr1, hdr2:
+** The two copies of the in-memory tree header. Two copies are required
+** in case a writer fails while updating one of them.
+*/
+struct ShmHeader {
+ u32 aSnap1[LSM_META_PAGE_SIZE / 4];
+ u32 aSnap2[LSM_META_PAGE_SIZE / 4];
+ u32 bWriter;
+ u32 iMetaPage;
+ TreeHeader hdr1;
+ TreeHeader hdr2;
+ ShmReader aReader[LSM_LOCK_NREADER];
+};
+
+/*
+** An instance of this structure is stored at the start of each shared-memory
+** chunk except the first (which is the header chunk - see above).
+*/
+struct ShmChunk {
+ u32 iShmid;
+ u32 iNext;
+};
+
+/*
+** Maximum number of shared-memory chunks allowed in the *-shm file. Since
+** each shared-memory chunk is 32KB in size, this is a theoretical limit only.
+*/
+#define LSM_MAX_SHMCHUNKS (1<<30)
+
+/* Return true if shm-sequence "a" is larger than or equal to "b" */
+#define shm_sequence_ge(a, b) (((u32)a-(u32)b) < LSM_MAX_SHMCHUNKS)
+
+#define LSM_APPLIST_SZ 4
+
+/*
+** An instance of the following structure stores the in-memory part of
+** the current free block list. This structure is to the free block list
+** as the in-memory tree is to the users database content. The contents
+** of the free block list is found by merging the in-memory components
+** with those stored in the LSM, just as the contents of the database is
+** found by merging the in-memory tree with the user data entries in the
+** LSM.
+**
+** Each FreelistEntry structure in the array represents either an insert
+** or delete operation on the free-list. For deletes, the FreelistEntry.iId
+** field is set to -1. For inserts, it is set to zero or greater.
+**
+** The array of FreelistEntry structures is always sorted in order of
+** block number (ascending).
+**
+** When the in-memory free block list is written into the LSM, each insert
+** operation is written separately. The entry key is the bitwise inverse
+** of the block number as a 32-bit big-endian integer. This is done so that
+** the entries in the LSM are sorted in descending order of block id.
+** The associated value is the snapshot id, formated as a varint.
+*/
+struct Freelist {
+ FreelistEntry *aEntry; /* Free list entries */
+ int nEntry; /* Number of valid slots in aEntry[] */
+ int nAlloc; /* Allocated size of aEntry[] */
+};
+struct FreelistEntry {
+ u32 iBlk; /* Block number */
+ i64 iId; /* Largest snapshot id to use this block */
+};
+
+/*
+** A snapshot of a database. A snapshot contains all the information required
+** to read or write a database file on disk. See the description of struct
+** Database below for futher details.
+*/
+struct Snapshot {
+ Database *pDatabase; /* Database this snapshot belongs to */
+ u32 iCmpId; /* Id of compression scheme */
+ Level *pLevel; /* Pointer to level 0 of snapshot (or NULL) */
+ i64 iId; /* Snapshot id */
+ i64 iLogOff; /* Log file offset */
+ Redirect redirect; /* Block redirection array */
+
+ /* Used by worker snapshots only */
+ int nBlock; /* Number of blocks in database file */
+ LsmPgno aiAppend[LSM_APPLIST_SZ]; /* Append point list */
+ Freelist freelist; /* Free block list */
+ u32 nWrite; /* Total number of pages written to disk */
+};
+#define LSM_INITIAL_SNAPSHOT_ID 11
+
+/*
+** Functions from file "lsm_ckpt.c".
+*/
+int lsmCheckpointWrite(lsm_db *, u32 *);
+int lsmCheckpointLevels(lsm_db *, int, void **, int *);
+int lsmCheckpointLoadLevels(lsm_db *pDb, void *pVal, int nVal);
+
+int lsmCheckpointRecover(lsm_db *);
+int lsmCheckpointDeserialize(lsm_db *, int, u32 *, Snapshot **);
+
+int lsmCheckpointLoadWorker(lsm_db *pDb);
+int lsmCheckpointStore(lsm_db *pDb, int);
+
+int lsmCheckpointLoad(lsm_db *pDb, int *);
+int lsmCheckpointLoadOk(lsm_db *pDb, int);
+int lsmCheckpointClientCacheOk(lsm_db *);
+
+u32 lsmCheckpointNBlock(u32 *);
+i64 lsmCheckpointId(u32 *, int);
+u32 lsmCheckpointNWrite(u32 *, int);
+i64 lsmCheckpointLogOffset(u32 *);
+int lsmCheckpointPgsz(u32 *);
+int lsmCheckpointBlksz(u32 *);
+void lsmCheckpointLogoffset(u32 *aCkpt, DbLog *pLog);
+void lsmCheckpointZeroLogoffset(lsm_db *);
+
+int lsmCheckpointSaveWorker(lsm_db *pDb, int);
+int lsmDatabaseFull(lsm_db *pDb);
+int lsmCheckpointSynced(lsm_db *pDb, i64 *piId, i64 *piLog, u32 *pnWrite);
+
+int lsmCheckpointSize(lsm_db *db, int *pnByte);
+
+int lsmInfoCompressionId(lsm_db *db, u32 *piCmpId);
+
+/*
+** Functions from file "lsm_tree.c".
+*/
+int lsmTreeNew(lsm_env *, int (*)(void *, int, void *, int), Tree **ppTree);
+void lsmTreeRelease(lsm_env *, Tree *);
+int lsmTreeInit(lsm_db *);
+int lsmTreeRepair(lsm_db *);
+
+void lsmTreeMakeOld(lsm_db *pDb);
+void lsmTreeDiscardOld(lsm_db *pDb);
+int lsmTreeHasOld(lsm_db *pDb);
+
+int lsmTreeSize(lsm_db *);
+int lsmTreeEndTransaction(lsm_db *pDb, int bCommit);
+int lsmTreeLoadHeader(lsm_db *pDb, int *);
+int lsmTreeLoadHeaderOk(lsm_db *, int);
+
+int lsmTreeInsert(lsm_db *pDb, void *pKey, int nKey, void *pVal, int nVal);
+int lsmTreeDelete(lsm_db *db, void *pKey1, int nKey1, void *pKey2, int nKey2);
+void lsmTreeRollback(lsm_db *pDb, TreeMark *pMark);
+void lsmTreeMark(lsm_db *pDb, TreeMark *pMark);
+
+int lsmTreeCursorNew(lsm_db *pDb, int, TreeCursor **);
+void lsmTreeCursorDestroy(TreeCursor *);
+
+int lsmTreeCursorSeek(TreeCursor *pCsr, void *pKey, int nKey, int *pRes);
+int lsmTreeCursorNext(TreeCursor *pCsr);
+int lsmTreeCursorPrev(TreeCursor *pCsr);
+int lsmTreeCursorEnd(TreeCursor *pCsr, int bLast);
+void lsmTreeCursorReset(TreeCursor *pCsr);
+int lsmTreeCursorKey(TreeCursor *pCsr, int *pFlags, void **ppKey, int *pnKey);
+int lsmTreeCursorFlags(TreeCursor *pCsr);
+int lsmTreeCursorValue(TreeCursor *pCsr, void **ppVal, int *pnVal);
+int lsmTreeCursorValid(TreeCursor *pCsr);
+int lsmTreeCursorSave(TreeCursor *pCsr);
+
+void lsmFlagsToString(int flags, char *zFlags);
+
+/*
+** Functions from file "mem.c".
+*/
+void *lsmMalloc(lsm_env*, size_t);
+void lsmFree(lsm_env*, void *);
+void *lsmRealloc(lsm_env*, void *, size_t);
+void *lsmReallocOrFree(lsm_env*, void *, size_t);
+void *lsmReallocOrFreeRc(lsm_env *, void *, size_t, int *);
+
+void *lsmMallocZeroRc(lsm_env*, size_t, int *);
+void *lsmMallocRc(lsm_env*, size_t, int *);
+
+void *lsmMallocZero(lsm_env *pEnv, size_t);
+char *lsmMallocStrdup(lsm_env *pEnv, const char *);
+
+/*
+** Functions from file "lsm_mutex.c".
+*/
+int lsmMutexStatic(lsm_env*, int, lsm_mutex **);
+int lsmMutexNew(lsm_env*, lsm_mutex **);
+void lsmMutexDel(lsm_env*, lsm_mutex *);
+void lsmMutexEnter(lsm_env*, lsm_mutex *);
+int lsmMutexTry(lsm_env*, lsm_mutex *);
+void lsmMutexLeave(lsm_env*, lsm_mutex *);
+
+#ifndef NDEBUG
+int lsmMutexHeld(lsm_env *, lsm_mutex *);
+int lsmMutexNotHeld(lsm_env *, lsm_mutex *);
+#endif
+
+/**************************************************************************
+** Start of functions from "lsm_file.c".
+*/
+int lsmFsOpen(lsm_db *, const char *, int);
+int lsmFsOpenLog(lsm_db *, int *);
+void lsmFsCloseLog(lsm_db *);
+void lsmFsClose(FileSystem *);
+
+int lsmFsUnmap(FileSystem *);
+
+int lsmFsConfigure(lsm_db *db);
+
+int lsmFsBlockSize(FileSystem *);
+void lsmFsSetBlockSize(FileSystem *, int);
+int lsmFsMoveBlock(FileSystem *pFS, Segment *pSeg, int iTo, int iFrom);
+
+int lsmFsPageSize(FileSystem *);
+void lsmFsSetPageSize(FileSystem *, int);
+
+int lsmFsFileid(lsm_db *pDb, void **ppId, int *pnId);
+
+/* Creating, populating, gobbling and deleting sorted runs. */
+void lsmFsGobble(lsm_db *, Segment *, LsmPgno *, int);
+int lsmFsSortedDelete(FileSystem *, Snapshot *, int, Segment *);
+int lsmFsSortedFinish(FileSystem *, Segment *);
+int lsmFsSortedAppend(FileSystem *, Snapshot *, Level *, int, Page **);
+int lsmFsSortedPadding(FileSystem *, Snapshot *, Segment *);
+
+/* Functions to retrieve the lsm_env pointer from a FileSystem or Page object */
+lsm_env *lsmFsEnv(FileSystem *);
+lsm_env *lsmPageEnv(Page *);
+FileSystem *lsmPageFS(Page *);
+
+int lsmFsSectorSize(FileSystem *);
+
+void lsmSortedSplitkey(lsm_db *, Level *, int *);
+
+/* Reading sorted run content. */
+int lsmFsDbPageLast(FileSystem *pFS, Segment *pSeg, Page **ppPg);
+int lsmFsDbPageGet(FileSystem *, Segment *, LsmPgno, Page **);
+int lsmFsDbPageNext(Segment *, Page *, int eDir, Page **);
+
+u8 *lsmFsPageData(Page *, int *);
+int lsmFsPageRelease(Page *);
+int lsmFsPagePersist(Page *);
+void lsmFsPageRef(Page *);
+LsmPgno lsmFsPageNumber(Page *);
+
+int lsmFsNRead(FileSystem *);
+int lsmFsNWrite(FileSystem *);
+
+int lsmFsMetaPageGet(FileSystem *, int, int, MetaPage **);
+int lsmFsMetaPageRelease(MetaPage *);
+u8 *lsmFsMetaPageData(MetaPage *, int *);
+
+#ifdef LSM_DEBUG
+int lsmFsDbPageIsLast(Segment *pSeg, Page *pPg);
+int lsmFsIntegrityCheck(lsm_db *);
+#endif
+
+LsmPgno lsmFsRedirectPage(FileSystem *, Redirect *, LsmPgno);
+
+int lsmFsPageWritable(Page *);
+
+/* Functions to read, write and sync the log file. */
+int lsmFsWriteLog(FileSystem *pFS, i64 iOff, LsmString *pStr);
+int lsmFsSyncLog(FileSystem *pFS);
+int lsmFsReadLog(FileSystem *pFS, i64 iOff, int nRead, LsmString *pStr);
+int lsmFsTruncateLog(FileSystem *pFS, i64 nByte);
+int lsmFsTruncateDb(FileSystem *pFS, i64 nByte);
+int lsmFsCloseAndDeleteLog(FileSystem *pFS);
+
+LsmFile *lsmFsDeferClose(FileSystem *pFS);
+
+/* And to sync the db file */
+int lsmFsSyncDb(FileSystem *, int);
+
+void lsmFsFlushWaiting(FileSystem *, int *);
+
+/* Used by lsm_info(ARRAY_STRUCTURE) and lsm_config(MMAP) */
+int lsmInfoArrayStructure(lsm_db *pDb, int bBlock, LsmPgno iFirst, char **pz);
+int lsmInfoArrayPages(lsm_db *pDb, LsmPgno iFirst, char **pzOut);
+int lsmConfigMmap(lsm_db *pDb, int *piParam);
+
+int lsmEnvOpen(lsm_env *, const char *, int, lsm_file **);
+int lsmEnvClose(lsm_env *pEnv, lsm_file *pFile);
+int lsmEnvLock(lsm_env *pEnv, lsm_file *pFile, int iLock, int eLock);
+int lsmEnvTestLock(lsm_env *pEnv, lsm_file *pFile, int iLock, int nLock, int);
+
+int lsmEnvShmMap(lsm_env *, lsm_file *, int, int, void **);
+void lsmEnvShmBarrier(lsm_env *);
+void lsmEnvShmUnmap(lsm_env *, lsm_file *, int);
+
+void lsmEnvSleep(lsm_env *, int);
+
+int lsmFsReadSyncedId(lsm_db *db, int, i64 *piVal);
+
+int lsmFsSegmentContainsPg(FileSystem *pFS, Segment *, LsmPgno, int *);
+
+void lsmFsPurgeCache(FileSystem *);
+
+/*
+** End of functions from "lsm_file.c".
+**************************************************************************/
+
+/*
+** Functions from file "lsm_sorted.c".
+*/
+int lsmInfoPageDump(lsm_db *, LsmPgno, int, char **);
+void lsmSortedCleanup(lsm_db *);
+int lsmSortedAutoWork(lsm_db *, int nUnit);
+
+int lsmSortedWalkFreelist(lsm_db *, int, int (*)(void *, int, i64), void *);
+
+int lsmSaveWorker(lsm_db *, int);
+
+int lsmFlushTreeToDisk(lsm_db *pDb);
+
+void lsmSortedRemap(lsm_db *pDb);
+
+void lsmSortedFreeLevel(lsm_env *pEnv, Level *);
+
+int lsmSortedAdvanceAll(lsm_db *pDb);
+
+int lsmSortedLoadMerge(lsm_db *, Level *, u32 *, int *);
+int lsmSortedLoadFreelist(lsm_db *pDb, void **, int *);
+
+void *lsmSortedSplitKey(Level *pLevel, int *pnByte);
+
+void lsmSortedSaveTreeCursors(lsm_db *);
+
+int lsmMCursorNew(lsm_db *, MultiCursor **);
+void lsmMCursorClose(MultiCursor *, int);
+int lsmMCursorSeek(MultiCursor *, int, void *, int , int);
+int lsmMCursorFirst(MultiCursor *);
+int lsmMCursorPrev(MultiCursor *);
+int lsmMCursorLast(MultiCursor *);
+int lsmMCursorValid(MultiCursor *);
+int lsmMCursorNext(MultiCursor *);
+int lsmMCursorKey(MultiCursor *, void **, int *);
+int lsmMCursorValue(MultiCursor *, void **, int *);
+int lsmMCursorType(MultiCursor *, int *);
+lsm_db *lsmMCursorDb(MultiCursor *);
+void lsmMCursorFreeCache(lsm_db *);
+
+int lsmSaveCursors(lsm_db *pDb);
+int lsmRestoreCursors(lsm_db *pDb);
+
+void lsmSortedDumpStructure(lsm_db *pDb, Snapshot *, int, int, const char *);
+void lsmFsDumpBlocklists(lsm_db *);
+
+void lsmSortedExpandBtreePage(Page *pPg, int nOrig);
+
+void lsmPutU32(u8 *, u32);
+u32 lsmGetU32(u8 *);
+u64 lsmGetU64(u8 *);
+
+/*
+** Functions from "lsm_varint.c".
+*/
+int lsmVarintPut32(u8 *, int);
+int lsmVarintGet32(u8 *, int *);
+int lsmVarintPut64(u8 *aData, i64 iVal);
+int lsmVarintGet64(const u8 *aData, i64 *piVal);
+
+int lsmVarintLen32(int);
+int lsmVarintSize(u8 c);
+
+/*
+** Functions from file "main.c".
+*/
+void lsmLogMessage(lsm_db *, int, const char *, ...);
+int lsmInfoFreelist(lsm_db *pDb, char **pzOut);
+
+/*
+** Functions from file "lsm_log.c".
+*/
+int lsmLogBegin(lsm_db *pDb);
+int lsmLogWrite(lsm_db *, int, void *, int, void *, int);
+int lsmLogCommit(lsm_db *);
+void lsmLogEnd(lsm_db *pDb, int bCommit);
+void lsmLogTell(lsm_db *, LogMark *);
+void lsmLogSeek(lsm_db *, LogMark *);
+void lsmLogClose(lsm_db *);
+
+int lsmLogRecover(lsm_db *);
+int lsmInfoLogStructure(lsm_db *pDb, char **pzVal);
+
+/* Valid values for the second argument to lsmLogWrite(). */
+#define LSM_WRITE 0x06
+#define LSM_DELETE 0x08
+#define LSM_DRANGE 0x0A
+
+/**************************************************************************
+** Functions from file "lsm_shared.c".
+*/
+
+int lsmDbDatabaseConnect(lsm_db*, const char *);
+void lsmDbDatabaseRelease(lsm_db *);
+
+int lsmBeginReadTrans(lsm_db *);
+int lsmBeginWriteTrans(lsm_db *);
+int lsmBeginFlush(lsm_db *);
+
+int lsmDetectRoTrans(lsm_db *db, int *);
+int lsmBeginRoTrans(lsm_db *db);
+
+int lsmBeginWork(lsm_db *);
+void lsmFinishWork(lsm_db *, int, int *);
+
+int lsmFinishRecovery(lsm_db *);
+void lsmFinishReadTrans(lsm_db *);
+int lsmFinishWriteTrans(lsm_db *, int);
+int lsmFinishFlush(lsm_db *, int);
+
+int lsmSnapshotSetFreelist(lsm_db *, int *, int);
+
+Snapshot *lsmDbSnapshotClient(lsm_db *);
+Snapshot *lsmDbSnapshotWorker(lsm_db *);
+
+void lsmSnapshotSetCkptid(Snapshot *, i64);
+
+Level *lsmDbSnapshotLevel(Snapshot *);
+void lsmDbSnapshotSetLevel(Snapshot *, Level *);
+
+void lsmDbRecoveryComplete(lsm_db *, int);
+
+int lsmBlockAllocate(lsm_db *, int, int *);
+int lsmBlockFree(lsm_db *, int);
+int lsmBlockRefree(lsm_db *, int);
+
+void lsmFreelistDeltaBegin(lsm_db *);
+void lsmFreelistDeltaEnd(lsm_db *);
+int lsmFreelistDelta(lsm_db *pDb);
+
+DbLog *lsmDatabaseLog(lsm_db *pDb);
+
+#ifdef LSM_DEBUG
+ int lsmHoldingClientMutex(lsm_db *pDb);
+ int lsmShmAssertLock(lsm_db *db, int iLock, int eOp);
+ int lsmShmAssertWorker(lsm_db *db);
+#endif
+
+void lsmFreeSnapshot(lsm_env *, Snapshot *);
+
+
+/* Candidate values for the 3rd argument to lsmShmLock() */
+#define LSM_LOCK_UNLOCK 0
+#define LSM_LOCK_SHARED 1
+#define LSM_LOCK_EXCL 2
+
+int lsmShmCacheChunks(lsm_db *db, int nChunk);
+int lsmShmLock(lsm_db *db, int iLock, int eOp, int bBlock);
+int lsmShmTestLock(lsm_db *db, int iLock, int nLock, int eOp);
+void lsmShmBarrier(lsm_db *db);
+
+#ifdef LSM_DEBUG
+void lsmShmHasLock(lsm_db *db, int iLock, int eOp);
+#else
+# define lsmShmHasLock(x,y,z)
+#endif
+
+int lsmReadlock(lsm_db *, i64 iLsm, u32 iShmMin, u32 iShmMax);
+
+int lsmLsmInUse(lsm_db *db, i64 iLsmId, int *pbInUse);
+int lsmTreeInUse(lsm_db *db, u32 iLsmId, int *pbInUse);
+int lsmFreelistAppend(lsm_env *pEnv, Freelist *p, int iBlk, i64 iId);
+
+int lsmDbMultiProc(lsm_db *);
+void lsmDbDeferredClose(lsm_db *, lsm_file *, LsmFile *);
+LsmFile *lsmDbRecycleFd(lsm_db *);
+
+int lsmWalkFreelist(lsm_db *, int, int (*)(void *, int, i64), void *);
+
+int lsmCheckCompressionId(lsm_db *, u32);
+
+
+/**************************************************************************
+** functions in lsm_str.c
+*/
+void lsmStringInit(LsmString*, lsm_env *pEnv);
+int lsmStringExtend(LsmString*, int);
+int lsmStringAppend(LsmString*, const char *, int);
+void lsmStringVAppendf(LsmString*, const char *zFormat, va_list, va_list);
+void lsmStringAppendf(LsmString*, const char *zFormat, ...);
+void lsmStringClear(LsmString*);
+char *lsmMallocPrintf(lsm_env*, const char*, ...);
+int lsmStringBinAppend(LsmString *pStr, const u8 *a, int n);
+
+int lsmStrlen(const char *zName);
+
+
+
+/*
+** Round up a number to the next larger multiple of 8. This is used
+** to force 8-byte alignment on 64-bit architectures.
+*/
+#define ROUND8(x) (((x)+7)&~7)
+
+#define LSM_MIN(x,y) ((x)>(y) ? (y) : (x))
+#define LSM_MAX(x,y) ((x)>(y) ? (x) : (y))
+
+#endif