diff options
Diffstat (limited to 'src/memjournal.c')
-rw-r--r-- | src/memjournal.c | 440 |
1 files changed, 440 insertions, 0 deletions
diff --git a/src/memjournal.c b/src/memjournal.c new file mode 100644 index 0000000..9343801 --- /dev/null +++ b/src/memjournal.c @@ -0,0 +1,440 @@ +/* +** 2008 October 7 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** +** This file contains code use to implement an in-memory rollback journal. +** The in-memory rollback journal is used to journal transactions for +** ":memory:" databases and when the journal_mode=MEMORY pragma is used. +** +** Update: The in-memory journal is also used to temporarily cache +** smaller journals that are not critical for power-loss recovery. +** For example, statement journals that are not too big will be held +** entirely in memory, thus reducing the number of file I/O calls, and +** more importantly, reducing temporary file creation events. If these +** journals become too large for memory, they are spilled to disk. But +** in the common case, they are usually small and no file I/O needs to +** occur. +*/ +#include "sqliteInt.h" + +/* Forward references to internal structures */ +typedef struct MemJournal MemJournal; +typedef struct FilePoint FilePoint; +typedef struct FileChunk FileChunk; + +/* +** The rollback journal is composed of a linked list of these structures. +** +** The zChunk array is always at least 8 bytes in size - usually much more. +** Its actual size is stored in the MemJournal.nChunkSize variable. +*/ +struct FileChunk { + FileChunk *pNext; /* Next chunk in the journal */ + u8 zChunk[8]; /* Content of this chunk */ +}; + +/* +** By default, allocate this many bytes of memory for each FileChunk object. +*/ +#define MEMJOURNAL_DFLT_FILECHUNKSIZE 1024 + +/* +** For chunk size nChunkSize, return the number of bytes that should +** be allocated for each FileChunk structure. +*/ +#define fileChunkSize(nChunkSize) (sizeof(FileChunk) + ((nChunkSize)-8)) + +/* +** An instance of this object serves as a cursor into the rollback journal. +** The cursor can be either for reading or writing. +*/ +struct FilePoint { + sqlite3_int64 iOffset; /* Offset from the beginning of the file */ + FileChunk *pChunk; /* Specific chunk into which cursor points */ +}; + +/* +** This structure is a subclass of sqlite3_file. Each open memory-journal +** is an instance of this class. +*/ +struct MemJournal { + const sqlite3_io_methods *pMethod; /* Parent class. MUST BE FIRST */ + int nChunkSize; /* In-memory chunk-size */ + + int nSpill; /* Bytes of data before flushing */ + FileChunk *pFirst; /* Head of in-memory chunk-list */ + FilePoint endpoint; /* Pointer to the end of the file */ + FilePoint readpoint; /* Pointer to the end of the last xRead() */ + + int flags; /* xOpen flags */ + sqlite3_vfs *pVfs; /* The "real" underlying VFS */ + const char *zJournal; /* Name of the journal file */ +}; + +/* +** Read data from the in-memory journal file. This is the implementation +** of the sqlite3_vfs.xRead method. +*/ +static int memjrnlRead( + sqlite3_file *pJfd, /* The journal file from which to read */ + void *zBuf, /* Put the results here */ + int iAmt, /* Number of bytes to read */ + sqlite_int64 iOfst /* Begin reading at this offset */ +){ + MemJournal *p = (MemJournal *)pJfd; + u8 *zOut = zBuf; + int nRead = iAmt; + int iChunkOffset; + FileChunk *pChunk; + + if( (iAmt+iOfst)>p->endpoint.iOffset ){ + return SQLITE_IOERR_SHORT_READ; + } + assert( p->readpoint.iOffset==0 || p->readpoint.pChunk!=0 ); + if( p->readpoint.iOffset!=iOfst || iOfst==0 ){ + sqlite3_int64 iOff = 0; + for(pChunk=p->pFirst; + ALWAYS(pChunk) && (iOff+p->nChunkSize)<=iOfst; + pChunk=pChunk->pNext + ){ + iOff += p->nChunkSize; + } + }else{ + pChunk = p->readpoint.pChunk; + assert( pChunk!=0 ); + } + + iChunkOffset = (int)(iOfst%p->nChunkSize); + do { + int iSpace = p->nChunkSize - iChunkOffset; + int nCopy = MIN(nRead, (p->nChunkSize - iChunkOffset)); + memcpy(zOut, (u8*)pChunk->zChunk + iChunkOffset, nCopy); + zOut += nCopy; + nRead -= iSpace; + iChunkOffset = 0; + } while( nRead>=0 && (pChunk=pChunk->pNext)!=0 && nRead>0 ); + p->readpoint.iOffset = pChunk ? iOfst+iAmt : 0; + p->readpoint.pChunk = pChunk; + + return SQLITE_OK; +} + +/* +** Free the list of FileChunk structures headed at MemJournal.pFirst. +*/ +static void memjrnlFreeChunks(FileChunk *pFirst){ + FileChunk *pIter; + FileChunk *pNext; + for(pIter=pFirst; pIter; pIter=pNext){ + pNext = pIter->pNext; + sqlite3_free(pIter); + } +} + +/* +** Flush the contents of memory to a real file on disk. +*/ +static int memjrnlCreateFile(MemJournal *p){ + int rc; + sqlite3_file *pReal = (sqlite3_file*)p; + MemJournal copy = *p; + + memset(p, 0, sizeof(MemJournal)); + rc = sqlite3OsOpen(copy.pVfs, copy.zJournal, pReal, copy.flags, 0); + if( rc==SQLITE_OK ){ + int nChunk = copy.nChunkSize; + i64 iOff = 0; + FileChunk *pIter; + for(pIter=copy.pFirst; pIter; pIter=pIter->pNext){ + if( iOff + nChunk > copy.endpoint.iOffset ){ + nChunk = copy.endpoint.iOffset - iOff; + } + rc = sqlite3OsWrite(pReal, (u8*)pIter->zChunk, nChunk, iOff); + if( rc ) break; + iOff += nChunk; + } + if( rc==SQLITE_OK ){ + /* No error has occurred. Free the in-memory buffers. */ + memjrnlFreeChunks(copy.pFirst); + } + } + if( rc!=SQLITE_OK ){ + /* If an error occurred while creating or writing to the file, restore + ** the original before returning. This way, SQLite uses the in-memory + ** journal data to roll back changes made to the internal page-cache + ** before this function was called. */ + sqlite3OsClose(pReal); + *p = copy; + } + return rc; +} + + +/* Forward reference */ +static int memjrnlTruncate(sqlite3_file *pJfd, sqlite_int64 size); + +/* +** Write data to the file. +*/ +static int memjrnlWrite( + sqlite3_file *pJfd, /* The journal file into which to write */ + const void *zBuf, /* Take data to be written from here */ + int iAmt, /* Number of bytes to write */ + sqlite_int64 iOfst /* Begin writing at this offset into the file */ +){ + MemJournal *p = (MemJournal *)pJfd; + int nWrite = iAmt; + u8 *zWrite = (u8 *)zBuf; + + /* If the file should be created now, create it and write the new data + ** into the file on disk. */ + if( p->nSpill>0 && (iAmt+iOfst)>p->nSpill ){ + int rc = memjrnlCreateFile(p); + if( rc==SQLITE_OK ){ + rc = sqlite3OsWrite(pJfd, zBuf, iAmt, iOfst); + } + return rc; + } + + /* If the contents of this write should be stored in memory */ + else{ + /* An in-memory journal file should only ever be appended to. Random + ** access writes are not required. The only exception to this is when + ** the in-memory journal is being used by a connection using the + ** atomic-write optimization. In this case the first 28 bytes of the + ** journal file may be written as part of committing the transaction. */ + assert( iOfst<=p->endpoint.iOffset ); + if( iOfst>0 && iOfst!=p->endpoint.iOffset ){ + memjrnlTruncate(pJfd, iOfst); + } + if( iOfst==0 && p->pFirst ){ + assert( p->nChunkSize>iAmt ); + memcpy((u8*)p->pFirst->zChunk, zBuf, iAmt); + }else{ + while( nWrite>0 ){ + FileChunk *pChunk = p->endpoint.pChunk; + int iChunkOffset = (int)(p->endpoint.iOffset%p->nChunkSize); + int iSpace = MIN(nWrite, p->nChunkSize - iChunkOffset); + + assert( pChunk!=0 || iChunkOffset==0 ); + if( iChunkOffset==0 ){ + /* New chunk is required to extend the file. */ + FileChunk *pNew = sqlite3_malloc(fileChunkSize(p->nChunkSize)); + if( !pNew ){ + return SQLITE_IOERR_NOMEM_BKPT; + } + pNew->pNext = 0; + if( pChunk ){ + assert( p->pFirst ); + pChunk->pNext = pNew; + }else{ + assert( !p->pFirst ); + p->pFirst = pNew; + } + pChunk = p->endpoint.pChunk = pNew; + } + + assert( pChunk!=0 ); + memcpy((u8*)pChunk->zChunk + iChunkOffset, zWrite, iSpace); + zWrite += iSpace; + nWrite -= iSpace; + p->endpoint.iOffset += iSpace; + } + } + } + + return SQLITE_OK; +} + +/* +** Truncate the in-memory file. +*/ +static int memjrnlTruncate(sqlite3_file *pJfd, sqlite_int64 size){ + MemJournal *p = (MemJournal *)pJfd; + assert( p->endpoint.pChunk==0 || p->endpoint.pChunk->pNext==0 ); + if( size<p->endpoint.iOffset ){ + FileChunk *pIter = 0; + if( size==0 ){ + memjrnlFreeChunks(p->pFirst); + p->pFirst = 0; + }else{ + i64 iOff = p->nChunkSize; + for(pIter=p->pFirst; ALWAYS(pIter) && iOff<size; pIter=pIter->pNext){ + iOff += p->nChunkSize; + } + if( ALWAYS(pIter) ){ + memjrnlFreeChunks(pIter->pNext); + pIter->pNext = 0; + } + } + + p->endpoint.pChunk = pIter; + p->endpoint.iOffset = size; + p->readpoint.pChunk = 0; + p->readpoint.iOffset = 0; + } + return SQLITE_OK; +} + +/* +** Close the file. +*/ +static int memjrnlClose(sqlite3_file *pJfd){ + MemJournal *p = (MemJournal *)pJfd; + memjrnlFreeChunks(p->pFirst); + return SQLITE_OK; +} + +/* +** Sync the file. +** +** If the real file has been created, call its xSync method. Otherwise, +** syncing an in-memory journal is a no-op. +*/ +static int memjrnlSync(sqlite3_file *pJfd, int flags){ + UNUSED_PARAMETER2(pJfd, flags); + return SQLITE_OK; +} + +/* +** Query the size of the file in bytes. +*/ +static int memjrnlFileSize(sqlite3_file *pJfd, sqlite_int64 *pSize){ + MemJournal *p = (MemJournal *)pJfd; + *pSize = (sqlite_int64) p->endpoint.iOffset; + return SQLITE_OK; +} + +/* +** Table of methods for MemJournal sqlite3_file object. +*/ +static const struct sqlite3_io_methods MemJournalMethods = { + 1, /* iVersion */ + memjrnlClose, /* xClose */ + memjrnlRead, /* xRead */ + memjrnlWrite, /* xWrite */ + memjrnlTruncate, /* xTruncate */ + memjrnlSync, /* xSync */ + memjrnlFileSize, /* xFileSize */ + 0, /* xLock */ + 0, /* xUnlock */ + 0, /* xCheckReservedLock */ + 0, /* xFileControl */ + 0, /* xSectorSize */ + 0, /* xDeviceCharacteristics */ + 0, /* xShmMap */ + 0, /* xShmLock */ + 0, /* xShmBarrier */ + 0, /* xShmUnmap */ + 0, /* xFetch */ + 0 /* xUnfetch */ +}; + +/* +** Open a journal file. +** +** The behaviour of the journal file depends on the value of parameter +** nSpill. If nSpill is 0, then the journal file is always create and +** accessed using the underlying VFS. If nSpill is less than zero, then +** all content is always stored in main-memory. Finally, if nSpill is a +** positive value, then the journal file is initially created in-memory +** but may be flushed to disk later on. In this case the journal file is +** flushed to disk either when it grows larger than nSpill bytes in size, +** or when sqlite3JournalCreate() is called. +*/ +int sqlite3JournalOpen( + sqlite3_vfs *pVfs, /* The VFS to use for actual file I/O */ + const char *zName, /* Name of the journal file */ + sqlite3_file *pJfd, /* Preallocated, blank file handle */ + int flags, /* Opening flags */ + int nSpill /* Bytes buffered before opening the file */ +){ + MemJournal *p = (MemJournal*)pJfd; + + assert( zName || nSpill<0 || (flags & SQLITE_OPEN_EXCLUSIVE) ); + + /* Zero the file-handle object. If nSpill was passed zero, initialize + ** it using the sqlite3OsOpen() function of the underlying VFS. In this + ** case none of the code in this module is executed as a result of calls + ** made on the journal file-handle. */ + memset(p, 0, sizeof(MemJournal)); + if( nSpill==0 ){ + return sqlite3OsOpen(pVfs, zName, pJfd, flags, 0); + } + + if( nSpill>0 ){ + p->nChunkSize = nSpill; + }else{ + p->nChunkSize = 8 + MEMJOURNAL_DFLT_FILECHUNKSIZE - sizeof(FileChunk); + assert( MEMJOURNAL_DFLT_FILECHUNKSIZE==fileChunkSize(p->nChunkSize) ); + } + + pJfd->pMethods = (const sqlite3_io_methods*)&MemJournalMethods; + p->nSpill = nSpill; + p->flags = flags; + p->zJournal = zName; + p->pVfs = pVfs; + return SQLITE_OK; +} + +/* +** Open an in-memory journal file. +*/ +void sqlite3MemJournalOpen(sqlite3_file *pJfd){ + sqlite3JournalOpen(0, 0, pJfd, 0, -1); +} + +#if defined(SQLITE_ENABLE_ATOMIC_WRITE) \ + || defined(SQLITE_ENABLE_BATCH_ATOMIC_WRITE) +/* +** If the argument p points to a MemJournal structure that is not an +** in-memory-only journal file (i.e. is one that was opened with a +ve +** nSpill parameter or as SQLITE_OPEN_MAIN_JOURNAL), and the underlying +** file has not yet been created, create it now. +*/ +int sqlite3JournalCreate(sqlite3_file *pJfd){ + int rc = SQLITE_OK; + MemJournal *p = (MemJournal*)pJfd; + if( pJfd->pMethods==&MemJournalMethods && ( +#ifdef SQLITE_ENABLE_ATOMIC_WRITE + p->nSpill>0 +#else + /* While this appears to not be possible without ATOMIC_WRITE, the + ** paths are complex, so it seems prudent to leave the test in as + ** a NEVER(), in case our analysis is subtly flawed. */ + NEVER(p->nSpill>0) +#endif +#ifdef SQLITE_ENABLE_BATCH_ATOMIC_WRITE + || (p->flags & SQLITE_OPEN_MAIN_JOURNAL) +#endif + )){ + rc = memjrnlCreateFile(p); + } + return rc; +} +#endif + +/* +** The file-handle passed as the only argument is open on a journal file. +** Return true if this "journal file" is currently stored in heap memory, +** or false otherwise. +*/ +int sqlite3JournalIsInMemory(sqlite3_file *p){ + return p->pMethods==&MemJournalMethods; +} + +/* +** Return the number of bytes required to store a JournalFile that uses vfs +** pVfs to create the underlying on-disk files. +*/ +int sqlite3JournalSize(sqlite3_vfs *pVfs){ + return MAX(pVfs->szOsFile, (int)sizeof(MemJournal)); +} |