diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-05 17:28:19 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-05 17:28:19 +0000 |
commit | 18657a960e125336f704ea058e25c27bd3900dcb (patch) | |
tree | 17b438b680ed45a996d7b59951e6aa34023783f2 /ext/misc/fossildelta.c | |
parent | Initial commit. (diff) | |
download | sqlite3-18657a960e125336f704ea058e25c27bd3900dcb.tar.xz sqlite3-18657a960e125336f704ea058e25c27bd3900dcb.zip |
Adding upstream version 3.40.1.upstream/3.40.1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
-rw-r--r-- | ext/misc/fossildelta.c | 1092 |
1 files changed, 1092 insertions, 0 deletions
diff --git a/ext/misc/fossildelta.c b/ext/misc/fossildelta.c new file mode 100644 index 0000000..6a597e0 --- /dev/null +++ b/ext/misc/fossildelta.c @@ -0,0 +1,1092 @@ +/* +** 2019-02-19 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +****************************************************************************** +** +** This SQLite extension implements the delta functions used by the RBU +** extension. Three scalar functions and one table-valued function are +** implemented here: +** +** delta_apply(X,D) -- apply delta D to file X and return the result +** delta_create(X,Y) -- compute and return a delta that carries X into Y +** delta_output_size(D) -- blob size in bytes output from applying delta D +** delta_parse(D) -- returns rows describing delta D +** +** The delta format is the Fossil delta format, described in a comment +** on the delete_create() function implementation below, and also at +** +** https://www.fossil-scm.org/fossil/doc/trunk/www/delta_format.wiki +** +** This delta format is used by the RBU extension, which is the main +** reason that these routines are included in the extension library. +** RBU does not use this extension directly. Rather, this extension is +** provided as a convenience to developers who want to analyze RBU files +** that contain deltas. +*/ +#include <string.h> +#include <assert.h> +#include <stdlib.h> +#include "sqlite3ext.h" +SQLITE_EXTENSION_INIT1 + +#ifndef SQLITE_AMALGAMATION +/* +** The "u32" type must be an unsigned 32-bit integer. Adjust this +*/ +typedef unsigned int u32; + +/* +** Must be a 16-bit value +*/ +typedef short int s16; +typedef unsigned short int u16; + +#endif /* SQLITE_AMALGAMATION */ + + +/* +** The width of a hash window in bytes. The algorithm only works if this +** is a power of 2. +*/ +#define NHASH 16 + +/* +** The current state of the rolling hash. +** +** z[] holds the values that have been hashed. z[] is a circular buffer. +** z[i] is the first entry and z[(i+NHASH-1)%NHASH] is the last entry of +** the window. +** +** Hash.a is the sum of all elements of hash.z[]. Hash.b is a weighted +** sum. Hash.b is z[i]*NHASH + z[i+1]*(NHASH-1) + ... + z[i+NHASH-1]*1. +** (Each index for z[] should be module NHASH, of course. The %NHASH operator +** is omitted in the prior expression for brevity.) +*/ +typedef struct hash hash; +struct hash { + u16 a, b; /* Hash values */ + u16 i; /* Start of the hash window */ + char z[NHASH]; /* The values that have been hashed */ +}; + +/* +** Initialize the rolling hash using the first NHASH characters of z[] +*/ +static void hash_init(hash *pHash, const char *z){ + u16 a, b, i; + a = b = z[0]; + for(i=1; i<NHASH; i++){ + a += z[i]; + b += a; + } + memcpy(pHash->z, z, NHASH); + pHash->a = a & 0xffff; + pHash->b = b & 0xffff; + pHash->i = 0; +} + +/* +** Advance the rolling hash by a single character "c" +*/ +static void hash_next(hash *pHash, int c){ + u16 old = pHash->z[pHash->i]; + pHash->z[pHash->i] = c; + pHash->i = (pHash->i+1)&(NHASH-1); + pHash->a = pHash->a - old + c; + pHash->b = pHash->b - NHASH*old + pHash->a; +} + +/* +** Return a 32-bit hash value +*/ +static u32 hash_32bit(hash *pHash){ + return (pHash->a & 0xffff) | (((u32)(pHash->b & 0xffff))<<16); +} + +/* +** Compute a hash on NHASH bytes. +** +** This routine is intended to be equivalent to: +** hash h; +** hash_init(&h, zInput); +** return hash_32bit(&h); +*/ +static u32 hash_once(const char *z){ + u16 a, b, i; + a = b = z[0]; + for(i=1; i<NHASH; i++){ + a += z[i]; + b += a; + } + return a | (((u32)b)<<16); +} + +/* +** Write an base-64 integer into the given buffer. +*/ +static void putInt(unsigned int v, char **pz){ + static const char zDigits[] = + "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz~"; + /* 123456789 123456789 123456789 123456789 123456789 123456789 123 */ + int i, j; + char zBuf[20]; + if( v==0 ){ + *(*pz)++ = '0'; + return; + } + for(i=0; v>0; i++, v>>=6){ + zBuf[i] = zDigits[v&0x3f]; + } + for(j=i-1; j>=0; j--){ + *(*pz)++ = zBuf[j]; + } +} + +/* +** Read bytes from *pz and convert them into a positive integer. When +** finished, leave *pz pointing to the first character past the end of +** the integer. The *pLen parameter holds the length of the string +** in *pz and is decremented once for each character in the integer. +*/ +static unsigned int deltaGetInt(const char **pz, int *pLen){ + static const signed char zValue[] = { + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, + -1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, + 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, -1, -1, -1, -1, 36, + -1, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, + 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, -1, -1, -1, 63, -1, + }; + unsigned int v = 0; + int c; + unsigned char *z = (unsigned char*)*pz; + unsigned char *zStart = z; + while( (c = zValue[0x7f&*(z++)])>=0 ){ + v = (v<<6) + c; + } + z--; + *pLen -= z - zStart; + *pz = (char*)z; + return v; +} + +/* +** Return the number digits in the base-64 representation of a positive integer +*/ +static int digit_count(int v){ + unsigned int i, x; + for(i=1, x=64; v>=x; i++, x <<= 6){} + return i; +} + +#ifdef __GNUC__ +# define GCC_VERSION (__GNUC__*1000000+__GNUC_MINOR__*1000+__GNUC_PATCHLEVEL__) +#else +# define GCC_VERSION 0 +#endif + +/* +** Compute a 32-bit big-endian checksum on the N-byte buffer. If the +** buffer is not a multiple of 4 bytes length, compute the sum that would +** have occurred if the buffer was padded with zeros to the next multiple +** of four bytes. +*/ +static unsigned int checksum(const char *zIn, size_t N){ + static const int byteOrderTest = 1; + const unsigned char *z = (const unsigned char *)zIn; + const unsigned char *zEnd = (const unsigned char*)&zIn[N&~3]; + unsigned sum = 0; + assert( (z - (const unsigned char*)0)%4==0 ); /* Four-byte alignment */ + if( 0==*(char*)&byteOrderTest ){ + /* This is a big-endian machine */ + while( z<zEnd ){ + sum += *(unsigned*)z; + z += 4; + } + }else{ + /* A little-endian machine */ +#if GCC_VERSION>=4003000 + while( z<zEnd ){ + sum += __builtin_bswap32(*(unsigned*)z); + z += 4; + } +#elif defined(_MSC_VER) && _MSC_VER>=1300 + while( z<zEnd ){ + sum += _byteswap_ulong(*(unsigned*)z); + z += 4; + } +#else + unsigned sum0 = 0; + unsigned sum1 = 0; + unsigned sum2 = 0; + while(N >= 16){ + sum0 += ((unsigned)z[0] + z[4] + z[8] + z[12]); + sum1 += ((unsigned)z[1] + z[5] + z[9] + z[13]); + sum2 += ((unsigned)z[2] + z[6] + z[10]+ z[14]); + sum += ((unsigned)z[3] + z[7] + z[11]+ z[15]); + z += 16; + N -= 16; + } + while(N >= 4){ + sum0 += z[0]; + sum1 += z[1]; + sum2 += z[2]; + sum += z[3]; + z += 4; + N -= 4; + } + sum += (sum2 << 8) + (sum1 << 16) + (sum0 << 24); +#endif + } + switch(N&3){ + case 3: sum += (z[2] << 8); + case 2: sum += (z[1] << 16); + case 1: sum += (z[0] << 24); + default: ; + } + return sum; +} + +/* +** Create a new delta. +** +** The delta is written into a preallocated buffer, zDelta, which +** should be at least 60 bytes longer than the target file, zOut. +** The delta string will be NUL-terminated, but it might also contain +** embedded NUL characters if either the zSrc or zOut files are +** binary. This function returns the length of the delta string +** in bytes, excluding the final NUL terminator character. +** +** Output Format: +** +** The delta begins with a base64 number followed by a newline. This +** number is the number of bytes in the TARGET file. Thus, given a +** delta file z, a program can compute the size of the output file +** simply by reading the first line and decoding the base-64 number +** found there. The delta_output_size() routine does exactly this. +** +** After the initial size number, the delta consists of a series of +** literal text segments and commands to copy from the SOURCE file. +** A copy command looks like this: +** +** NNN@MMM, +** +** where NNN is the number of bytes to be copied and MMM is the offset +** into the source file of the first byte (both base-64). If NNN is 0 +** it means copy the rest of the input file. Literal text is like this: +** +** NNN:TTTTT +** +** where NNN is the number of bytes of text (base-64) and TTTTT is the text. +** +** The last term is of the form +** +** NNN; +** +** In this case, NNN is a 32-bit bigendian checksum of the output file +** that can be used to verify that the delta applied correctly. All +** numbers are in base-64. +** +** Pure text files generate a pure text delta. Binary files generate a +** delta that may contain some binary data. +** +** Algorithm: +** +** The encoder first builds a hash table to help it find matching +** patterns in the source file. 16-byte chunks of the source file +** sampled at evenly spaced intervals are used to populate the hash +** table. +** +** Next we begin scanning the target file using a sliding 16-byte +** window. The hash of the 16-byte window in the target is used to +** search for a matching section in the source file. When a match +** is found, a copy command is added to the delta. An effort is +** made to extend the matching section to regions that come before +** and after the 16-byte hash window. A copy command is only issued +** if the result would use less space that just quoting the text +** literally. Literal text is added to the delta for sections that +** do not match or which can not be encoded efficiently using copy +** commands. +*/ +static int delta_create( + const char *zSrc, /* The source or pattern file */ + unsigned int lenSrc, /* Length of the source file */ + const char *zOut, /* The target file */ + unsigned int lenOut, /* Length of the target file */ + char *zDelta /* Write the delta into this buffer */ +){ + int i, base; + char *zOrigDelta = zDelta; + hash h; + int nHash; /* Number of hash table entries */ + int *landmark; /* Primary hash table */ + int *collide; /* Collision chain */ + int lastRead = -1; /* Last byte of zSrc read by a COPY command */ + + /* Add the target file size to the beginning of the delta + */ + putInt(lenOut, &zDelta); + *(zDelta++) = '\n'; + + /* If the source file is very small, it means that we have no + ** chance of ever doing a copy command. Just output a single + ** literal segment for the entire target and exit. + */ + if( lenSrc<=NHASH ){ + putInt(lenOut, &zDelta); + *(zDelta++) = ':'; + memcpy(zDelta, zOut, lenOut); + zDelta += lenOut; + putInt(checksum(zOut, lenOut), &zDelta); + *(zDelta++) = ';'; + return zDelta - zOrigDelta; + } + + /* Compute the hash table used to locate matching sections in the + ** source file. + */ + nHash = lenSrc/NHASH; + collide = sqlite3_malloc64( (sqlite3_int64)nHash*2*sizeof(int) ); + memset(collide, -1, nHash*2*sizeof(int)); + landmark = &collide[nHash]; + for(i=0; i<lenSrc-NHASH; i+=NHASH){ + int hv = hash_once(&zSrc[i]) % nHash; + collide[i/NHASH] = landmark[hv]; + landmark[hv] = i/NHASH; + } + + /* Begin scanning the target file and generating copy commands and + ** literal sections of the delta. + */ + base = 0; /* We have already generated everything before zOut[base] */ + while( base+NHASH<lenOut ){ + int iSrc, iBlock; + unsigned int bestCnt, bestOfst=0, bestLitsz=0; + hash_init(&h, &zOut[base]); + i = 0; /* Trying to match a landmark against zOut[base+i] */ + bestCnt = 0; + while( 1 ){ + int hv; + int limit = 250; + + hv = hash_32bit(&h) % nHash; + iBlock = landmark[hv]; + while( iBlock>=0 && (limit--)>0 ){ + /* + ** The hash window has identified a potential match against + ** landmark block iBlock. But we need to investigate further. + ** + ** Look for a region in zOut that matches zSrc. Anchor the search + ** at zSrc[iSrc] and zOut[base+i]. Do not include anything prior to + ** zOut[base] or after zOut[outLen] nor anything after zSrc[srcLen]. + ** + ** Set cnt equal to the length of the match and set ofst so that + ** zSrc[ofst] is the first element of the match. litsz is the number + ** of characters between zOut[base] and the beginning of the match. + ** sz will be the overhead (in bytes) needed to encode the copy + ** command. Only generate copy command if the overhead of the + ** copy command is less than the amount of literal text to be copied. + */ + int cnt, ofst, litsz; + int j, k, x, y; + int sz; + int limitX; + + /* Beginning at iSrc, match forwards as far as we can. j counts + ** the number of characters that match */ + iSrc = iBlock*NHASH; + y = base+i; + limitX = ( lenSrc-iSrc <= lenOut-y ) ? lenSrc : iSrc + lenOut - y; + for(x=iSrc; x<limitX; x++, y++){ + if( zSrc[x]!=zOut[y] ) break; + } + j = x - iSrc - 1; + + /* Beginning at iSrc-1, match backwards as far as we can. k counts + ** the number of characters that match */ + for(k=1; k<iSrc && k<=i; k++){ + if( zSrc[iSrc-k]!=zOut[base+i-k] ) break; + } + k--; + + /* Compute the offset and size of the matching region */ + ofst = iSrc-k; + cnt = j+k+1; + litsz = i-k; /* Number of bytes of literal text before the copy */ + /* sz will hold the number of bytes needed to encode the "insert" + ** command and the copy command, not counting the "insert" text */ + sz = digit_count(i-k)+digit_count(cnt)+digit_count(ofst)+3; + if( cnt>=sz && cnt>bestCnt ){ + /* Remember this match only if it is the best so far and it + ** does not increase the file size */ + bestCnt = cnt; + bestOfst = iSrc-k; + bestLitsz = litsz; + } + + /* Check the next matching block */ + iBlock = collide[iBlock]; + } + + /* We have a copy command that does not cause the delta to be larger + ** than a literal insert. So add the copy command to the delta. + */ + if( bestCnt>0 ){ + if( bestLitsz>0 ){ + /* Add an insert command before the copy */ + putInt(bestLitsz,&zDelta); + *(zDelta++) = ':'; + memcpy(zDelta, &zOut[base], bestLitsz); + zDelta += bestLitsz; + base += bestLitsz; + } + base += bestCnt; + putInt(bestCnt, &zDelta); + *(zDelta++) = '@'; + putInt(bestOfst, &zDelta); + *(zDelta++) = ','; + if( bestOfst + bestCnt -1 > lastRead ){ + lastRead = bestOfst + bestCnt - 1; + } + bestCnt = 0; + break; + } + + /* If we reach this point, it means no match is found so far */ + if( base+i+NHASH>=lenOut ){ + /* We have reached the end of the file and have not found any + ** matches. Do an "insert" for everything that does not match */ + putInt(lenOut-base, &zDelta); + *(zDelta++) = ':'; + memcpy(zDelta, &zOut[base], lenOut-base); + zDelta += lenOut-base; + base = lenOut; + break; + } + + /* Advance the hash by one character. Keep looking for a match */ + hash_next(&h, zOut[base+i+NHASH]); + i++; + } + } + /* Output a final "insert" record to get all the text at the end of + ** the file that does not match anything in the source file. + */ + if( base<lenOut ){ + putInt(lenOut-base, &zDelta); + *(zDelta++) = ':'; + memcpy(zDelta, &zOut[base], lenOut-base); + zDelta += lenOut-base; + } + /* Output the final checksum record. */ + putInt(checksum(zOut, lenOut), &zDelta); + *(zDelta++) = ';'; + sqlite3_free(collide); + return zDelta - zOrigDelta; +} + +/* +** Return the size (in bytes) of the output from applying +** a delta. +** +** This routine is provided so that an procedure that is able +** to call delta_apply() can learn how much space is required +** for the output and hence allocate nor more space that is really +** needed. +*/ +static int delta_output_size(const char *zDelta, int lenDelta){ + int size; + size = deltaGetInt(&zDelta, &lenDelta); + if( *zDelta!='\n' ){ + /* ERROR: size integer not terminated by "\n" */ + return -1; + } + return size; +} + + +/* +** Apply a delta. +** +** The output buffer should be big enough to hold the whole output +** file and a NUL terminator at the end. The delta_output_size() +** routine will determine this size for you. +** +** The delta string should be null-terminated. But the delta string +** may contain embedded NUL characters (if the input and output are +** binary files) so we also have to pass in the length of the delta in +** the lenDelta parameter. +** +** This function returns the size of the output file in bytes (excluding +** the final NUL terminator character). Except, if the delta string is +** malformed or intended for use with a source file other than zSrc, +** then this routine returns -1. +** +** Refer to the delta_create() documentation above for a description +** of the delta file format. +*/ +static int delta_apply( + const char *zSrc, /* The source or pattern file */ + int lenSrc, /* Length of the source file */ + const char *zDelta, /* Delta to apply to the pattern */ + int lenDelta, /* Length of the delta */ + char *zOut /* Write the output into this preallocated buffer */ +){ + unsigned int limit; + unsigned int total = 0; +#ifdef FOSSIL_ENABLE_DELTA_CKSUM_TEST + char *zOrigOut = zOut; +#endif + + limit = deltaGetInt(&zDelta, &lenDelta); + if( *zDelta!='\n' ){ + /* ERROR: size integer not terminated by "\n" */ + return -1; + } + zDelta++; lenDelta--; + while( *zDelta && lenDelta>0 ){ + unsigned int cnt, ofst; + cnt = deltaGetInt(&zDelta, &lenDelta); + switch( zDelta[0] ){ + case '@': { + zDelta++; lenDelta--; + ofst = deltaGetInt(&zDelta, &lenDelta); + if( lenDelta>0 && zDelta[0]!=',' ){ + /* ERROR: copy command not terminated by ',' */ + return -1; + } + zDelta++; lenDelta--; + total += cnt; + if( total>limit ){ + /* ERROR: copy exceeds output file size */ + return -1; + } + if( ofst+cnt > lenSrc ){ + /* ERROR: copy extends past end of input */ + return -1; + } + memcpy(zOut, &zSrc[ofst], cnt); + zOut += cnt; + break; + } + case ':': { + zDelta++; lenDelta--; + total += cnt; + if( total>limit ){ + /* ERROR: insert command gives an output larger than predicted */ + return -1; + } + if( cnt>lenDelta ){ + /* ERROR: insert count exceeds size of delta */ + return -1; + } + memcpy(zOut, zDelta, cnt); + zOut += cnt; + zDelta += cnt; + lenDelta -= cnt; + break; + } + case ';': { + zDelta++; lenDelta--; + zOut[0] = 0; +#ifdef FOSSIL_ENABLE_DELTA_CKSUM_TEST + if( cnt!=checksum(zOrigOut, total) ){ + /* ERROR: bad checksum */ + return -1; + } +#endif + if( total!=limit ){ + /* ERROR: generated size does not match predicted size */ + return -1; + } + return total; + } + default: { + /* ERROR: unknown delta operator */ + return -1; + } + } + } + /* ERROR: unterminated delta */ + return -1; +} + +/* +** SQL functions: delta_create(X,Y) +** +** Return a delta for carrying X into Y. +*/ +static void deltaCreateFunc( + sqlite3_context *context, + int argc, + sqlite3_value **argv +){ + const char *aOrig; int nOrig; /* old blob */ + const char *aNew; int nNew; /* new blob */ + char *aOut; int nOut; /* output delta */ + + assert( argc==2 ); + if( sqlite3_value_type(argv[0])==SQLITE_NULL ) return; + if( sqlite3_value_type(argv[1])==SQLITE_NULL ) return; + nOrig = sqlite3_value_bytes(argv[0]); + aOrig = (const char*)sqlite3_value_blob(argv[0]); + nNew = sqlite3_value_bytes(argv[1]); + aNew = (const char*)sqlite3_value_blob(argv[1]); + aOut = sqlite3_malloc64(nNew+70); + if( aOut==0 ){ + sqlite3_result_error_nomem(context); + }else{ + nOut = delta_create(aOrig, nOrig, aNew, nNew, aOut); + if( nOut<0 ){ + sqlite3_free(aOut); + sqlite3_result_error(context, "cannot create fossil delta", -1); + }else{ + sqlite3_result_blob(context, aOut, nOut, sqlite3_free); + } + } +} + +/* +** SQL functions: delta_apply(X,D) +** +** Return the result of applying delta D to input X. +*/ +static void deltaApplyFunc( + sqlite3_context *context, + int argc, + sqlite3_value **argv +){ + const char *aOrig; int nOrig; /* The X input */ + const char *aDelta; int nDelta; /* The input delta (D) */ + char *aOut; int nOut, nOut2; /* The output */ + + assert( argc==2 ); + if( sqlite3_value_type(argv[0])==SQLITE_NULL ) return; + if( sqlite3_value_type(argv[1])==SQLITE_NULL ) return; + nOrig = sqlite3_value_bytes(argv[0]); + aOrig = (const char*)sqlite3_value_blob(argv[0]); + nDelta = sqlite3_value_bytes(argv[1]); + aDelta = (const char*)sqlite3_value_blob(argv[1]); + + /* Figure out the size of the output */ + nOut = delta_output_size(aDelta, nDelta); + if( nOut<0 ){ + sqlite3_result_error(context, "corrupt fossil delta", -1); + return; + } + aOut = sqlite3_malloc64((sqlite3_int64)nOut+1); + if( aOut==0 ){ + sqlite3_result_error_nomem(context); + }else{ + nOut2 = delta_apply(aOrig, nOrig, aDelta, nDelta, aOut); + if( nOut2!=nOut ){ + sqlite3_free(aOut); + sqlite3_result_error(context, "corrupt fossil delta", -1); + }else{ + sqlite3_result_blob(context, aOut, nOut, sqlite3_free); + } + } +} + + +/* +** SQL functions: delta_output_size(D) +** +** Return the size of the output that results from applying delta D. +*/ +static void deltaOutputSizeFunc( + sqlite3_context *context, + int argc, + sqlite3_value **argv +){ + const char *aDelta; int nDelta; /* The input delta (D) */ + int nOut; /* Size of output */ + assert( argc==1 ); + if( sqlite3_value_type(argv[0])==SQLITE_NULL ) return; + nDelta = sqlite3_value_bytes(argv[0]); + aDelta = (const char*)sqlite3_value_blob(argv[0]); + + /* Figure out the size of the output */ + nOut = delta_output_size(aDelta, nDelta); + if( nOut<0 ){ + sqlite3_result_error(context, "corrupt fossil delta", -1); + return; + }else{ + sqlite3_result_int(context, nOut); + } +} + +/***************************************************************************** +** Table-valued SQL function: delta_parse(DELTA) +** +** Schema: +** +** CREATE TABLE delta_parse( +** op TEXT, +** a1 INT, +** a2 ANY, +** delta HIDDEN BLOB +** ); +** +** Given an input DELTA, this function parses the delta and returns +** rows for each entry in the delta. The op column has one of the +** values SIZE, COPY, INSERT, CHECKSUM, ERROR. +** +** Assuming no errors, the first row has op='SIZE'. a1 is the size of +** the output in bytes and a2 is NULL. +** +** After the initial SIZE row, there are zero or more 'COPY' and/or 'INSERT' +** rows. A COPY row means content is copied from the source into the +** output. Column a1 is the number of bytes to copy and a2 is the offset +** into source from which to begin copying. An INSERT row means to +** insert text into the output stream. Column a1 is the number of bytes +** to insert and column is a BLOB that contains the text to be inserted. +** +** The last row of a well-formed delta will have an op value of 'CHECKSUM'. +** The a1 column will be the value of the checksum and a2 will be NULL. +** +** If the input delta is not well-formed, then a row with an op value +** of 'ERROR' is returned. The a1 value of the ERROR row is the offset +** into the delta where the error was encountered and a2 is NULL. +*/ +typedef struct deltaparsevtab_vtab deltaparsevtab_vtab; +typedef struct deltaparsevtab_cursor deltaparsevtab_cursor; +struct deltaparsevtab_vtab { + sqlite3_vtab base; /* Base class - must be first */ + /* No additional information needed */ +}; +struct deltaparsevtab_cursor { + sqlite3_vtab_cursor base; /* Base class - must be first */ + char *aDelta; /* The delta being parsed */ + int nDelta; /* Number of bytes in the delta */ + int iCursor; /* Current cursor location */ + int eOp; /* Name of current operator */ + unsigned int a1, a2; /* Arguments to current operator */ + int iNext; /* Next cursor value */ +}; + +/* Operator names: +*/ +static const char *azOp[] = { + "SIZE", "COPY", "INSERT", "CHECKSUM", "ERROR", "EOF" +}; +#define DELTAPARSE_OP_SIZE 0 +#define DELTAPARSE_OP_COPY 1 +#define DELTAPARSE_OP_INSERT 2 +#define DELTAPARSE_OP_CHECKSUM 3 +#define DELTAPARSE_OP_ERROR 4 +#define DELTAPARSE_OP_EOF 5 + +/* +** The deltaparsevtabConnect() method is invoked to create a new +** deltaparse virtual table. +** +** Think of this routine as the constructor for deltaparsevtab_vtab objects. +** +** All this routine needs to do is: +** +** (1) Allocate the deltaparsevtab_vtab object and initialize all fields. +** +** (2) Tell SQLite (via the sqlite3_declare_vtab() interface) what the +** result set of queries against the virtual table will look like. +*/ +static int deltaparsevtabConnect( + sqlite3 *db, + void *pAux, + int argc, const char *const*argv, + sqlite3_vtab **ppVtab, + char **pzErr +){ + deltaparsevtab_vtab *pNew; + int rc; + + rc = sqlite3_declare_vtab(db, + "CREATE TABLE x(op,a1,a2,delta HIDDEN)" + ); + /* For convenience, define symbolic names for the index to each column. */ +#define DELTAPARSEVTAB_OP 0 +#define DELTAPARSEVTAB_A1 1 +#define DELTAPARSEVTAB_A2 2 +#define DELTAPARSEVTAB_DELTA 3 + if( rc==SQLITE_OK ){ + pNew = sqlite3_malloc64( sizeof(*pNew) ); + *ppVtab = (sqlite3_vtab*)pNew; + if( pNew==0 ) return SQLITE_NOMEM; + memset(pNew, 0, sizeof(*pNew)); + sqlite3_vtab_config(db, SQLITE_VTAB_INNOCUOUS); + } + return rc; +} + +/* +** This method is the destructor for deltaparsevtab_vtab objects. +*/ +static int deltaparsevtabDisconnect(sqlite3_vtab *pVtab){ + deltaparsevtab_vtab *p = (deltaparsevtab_vtab*)pVtab; + sqlite3_free(p); + return SQLITE_OK; +} + +/* +** Constructor for a new deltaparsevtab_cursor object. +*/ +static int deltaparsevtabOpen(sqlite3_vtab *p, sqlite3_vtab_cursor **ppCursor){ + deltaparsevtab_cursor *pCur; + pCur = sqlite3_malloc( sizeof(*pCur) ); + if( pCur==0 ) return SQLITE_NOMEM; + memset(pCur, 0, sizeof(*pCur)); + *ppCursor = &pCur->base; + return SQLITE_OK; +} + +/* +** Destructor for a deltaparsevtab_cursor. +*/ +static int deltaparsevtabClose(sqlite3_vtab_cursor *cur){ + deltaparsevtab_cursor *pCur = (deltaparsevtab_cursor*)cur; + sqlite3_free(pCur->aDelta); + sqlite3_free(pCur); + return SQLITE_OK; +} + + +/* +** Advance a deltaparsevtab_cursor to its next row of output. +*/ +static int deltaparsevtabNext(sqlite3_vtab_cursor *cur){ + deltaparsevtab_cursor *pCur = (deltaparsevtab_cursor*)cur; + const char *z; + int i = 0; + + pCur->iCursor = pCur->iNext; + z = pCur->aDelta + pCur->iCursor; + pCur->a1 = deltaGetInt(&z, &i); + switch( z[0] ){ + case '@': { + z++; + pCur->a2 = deltaGetInt(&z, &i); + pCur->eOp = DELTAPARSE_OP_COPY; + pCur->iNext = (int)(&z[1] - pCur->aDelta); + break; + } + case ':': { + z++; + pCur->a2 = (unsigned int)(z - pCur->aDelta); + pCur->eOp = DELTAPARSE_OP_INSERT; + pCur->iNext = (int)(&z[pCur->a1] - pCur->aDelta); + break; + } + case ';': { + pCur->eOp = DELTAPARSE_OP_CHECKSUM; + pCur->iNext = pCur->nDelta; + break; + } + default: { + if( pCur->iNext==pCur->nDelta ){ + pCur->eOp = DELTAPARSE_OP_EOF; + }else{ + pCur->eOp = DELTAPARSE_OP_ERROR; + pCur->iNext = pCur->nDelta; + } + break; + } + } + return SQLITE_OK; +} + +/* +** Return values of columns for the row at which the deltaparsevtab_cursor +** is currently pointing. +*/ +static int deltaparsevtabColumn( + sqlite3_vtab_cursor *cur, /* The cursor */ + sqlite3_context *ctx, /* First argument to sqlite3_result_...() */ + int i /* Which column to return */ +){ + deltaparsevtab_cursor *pCur = (deltaparsevtab_cursor*)cur; + switch( i ){ + case DELTAPARSEVTAB_OP: { + sqlite3_result_text(ctx, azOp[pCur->eOp], -1, SQLITE_STATIC); + break; + } + case DELTAPARSEVTAB_A1: { + sqlite3_result_int(ctx, pCur->a1); + break; + } + case DELTAPARSEVTAB_A2: { + if( pCur->eOp==DELTAPARSE_OP_COPY ){ + sqlite3_result_int(ctx, pCur->a2); + }else if( pCur->eOp==DELTAPARSE_OP_INSERT ){ + sqlite3_result_blob(ctx, pCur->aDelta+pCur->a2, pCur->a1, + SQLITE_TRANSIENT); + } + break; + } + case DELTAPARSEVTAB_DELTA: { + sqlite3_result_blob(ctx, pCur->aDelta, pCur->nDelta, SQLITE_TRANSIENT); + break; + } + } + return SQLITE_OK; +} + +/* +** Return the rowid for the current row. In this implementation, the +** rowid is the same as the output value. +*/ +static int deltaparsevtabRowid(sqlite3_vtab_cursor *cur, sqlite_int64 *pRowid){ + deltaparsevtab_cursor *pCur = (deltaparsevtab_cursor*)cur; + *pRowid = pCur->iCursor; + return SQLITE_OK; +} + +/* +** Return TRUE if the cursor has been moved off of the last +** row of output. +*/ +static int deltaparsevtabEof(sqlite3_vtab_cursor *cur){ + deltaparsevtab_cursor *pCur = (deltaparsevtab_cursor*)cur; + return pCur->eOp==DELTAPARSE_OP_EOF; +} + +/* +** This method is called to "rewind" the deltaparsevtab_cursor object back +** to the first row of output. This method is always called at least +** once prior to any call to deltaparsevtabColumn() or deltaparsevtabRowid() or +** deltaparsevtabEof(). +*/ +static int deltaparsevtabFilter( + sqlite3_vtab_cursor *pVtabCursor, + int idxNum, const char *idxStr, + int argc, sqlite3_value **argv +){ + deltaparsevtab_cursor *pCur = (deltaparsevtab_cursor *)pVtabCursor; + const char *a; + int i = 0; + pCur->eOp = DELTAPARSE_OP_ERROR; + if( idxNum!=1 ){ + return SQLITE_OK; + } + pCur->nDelta = sqlite3_value_bytes(argv[0]); + a = (const char*)sqlite3_value_blob(argv[0]); + if( pCur->nDelta==0 || a==0 ){ + return SQLITE_OK; + } + pCur->aDelta = sqlite3_malloc64( pCur->nDelta+1 ); + if( pCur->aDelta==0 ){ + pCur->nDelta = 0; + return SQLITE_NOMEM; + } + memcpy(pCur->aDelta, a, pCur->nDelta); + pCur->aDelta[pCur->nDelta] = 0; + a = pCur->aDelta; + pCur->eOp = DELTAPARSE_OP_SIZE; + pCur->a1 = deltaGetInt(&a, &i); + if( a[0]!='\n' ){ + pCur->eOp = DELTAPARSE_OP_ERROR; + pCur->a1 = pCur->a2 = 0; + pCur->iNext = pCur->nDelta; + return SQLITE_OK; + } + a++; + pCur->iNext = (unsigned int)(a - pCur->aDelta); + return SQLITE_OK; +} + +/* +** SQLite will invoke this method one or more times while planning a query +** that uses the virtual table. This routine needs to create +** a query plan for each invocation and compute an estimated cost for that +** plan. +*/ +static int deltaparsevtabBestIndex( + sqlite3_vtab *tab, + sqlite3_index_info *pIdxInfo +){ + int i; + for(i=0; i<pIdxInfo->nConstraint; i++){ + if( pIdxInfo->aConstraint[i].iColumn != DELTAPARSEVTAB_DELTA ) continue; + if( pIdxInfo->aConstraint[i].usable==0 ) continue; + if( pIdxInfo->aConstraint[i].op!=SQLITE_INDEX_CONSTRAINT_EQ ) continue; + pIdxInfo->aConstraintUsage[i].argvIndex = 1; + pIdxInfo->aConstraintUsage[i].omit = 1; + pIdxInfo->estimatedCost = (double)1; + pIdxInfo->estimatedRows = 10; + pIdxInfo->idxNum = 1; + return SQLITE_OK; + } + pIdxInfo->idxNum = 0; + pIdxInfo->estimatedCost = (double)0x7fffffff; + pIdxInfo->estimatedRows = 0x7fffffff; + return SQLITE_CONSTRAINT; +} + +/* +** This following structure defines all the methods for the +** virtual table. +*/ +static sqlite3_module deltaparsevtabModule = { + /* iVersion */ 0, + /* xCreate */ 0, + /* xConnect */ deltaparsevtabConnect, + /* xBestIndex */ deltaparsevtabBestIndex, + /* xDisconnect */ deltaparsevtabDisconnect, + /* xDestroy */ 0, + /* xOpen */ deltaparsevtabOpen, + /* xClose */ deltaparsevtabClose, + /* xFilter */ deltaparsevtabFilter, + /* xNext */ deltaparsevtabNext, + /* xEof */ deltaparsevtabEof, + /* xColumn */ deltaparsevtabColumn, + /* xRowid */ deltaparsevtabRowid, + /* xUpdate */ 0, + /* xBegin */ 0, + /* xSync */ 0, + /* xCommit */ 0, + /* xRollback */ 0, + /* xFindMethod */ 0, + /* xRename */ 0, + /* xSavepoint */ 0, + /* xRelease */ 0, + /* xRollbackTo */ 0, + /* xShadowName */ 0 +}; + + + +#ifdef _WIN32 +__declspec(dllexport) +#endif +int sqlite3_fossildelta_init( + sqlite3 *db, + char **pzErrMsg, + const sqlite3_api_routines *pApi +){ + static const int enc = SQLITE_UTF8|SQLITE_INNOCUOUS; + int rc = SQLITE_OK; + SQLITE_EXTENSION_INIT2(pApi); + (void)pzErrMsg; /* Unused parameter */ + rc = sqlite3_create_function(db, "delta_create", 2, enc, 0, + deltaCreateFunc, 0, 0); + if( rc==SQLITE_OK ){ + rc = sqlite3_create_function(db, "delta_apply", 2, enc, 0, + deltaApplyFunc, 0, 0); + } + if( rc==SQLITE_OK ){ + rc = sqlite3_create_function(db, "delta_output_size", 1, enc, 0, + deltaOutputSizeFunc, 0, 0); + } + if( rc==SQLITE_OK ){ + rc = sqlite3_create_module(db, "delta_parse", &deltaparsevtabModule, 0); + } + return rc; +} |