summaryrefslogtreecommitdiffstats
path: root/ext/misc/fossildelta.c
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--ext/misc/fossildelta.c1092
1 files changed, 1092 insertions, 0 deletions
diff --git a/ext/misc/fossildelta.c b/ext/misc/fossildelta.c
new file mode 100644
index 0000000..6a597e0
--- /dev/null
+++ b/ext/misc/fossildelta.c
@@ -0,0 +1,1092 @@
+/*
+** 2019-02-19
+**
+** The author disclaims copyright to this source code. In place of
+** a legal notice, here is a blessing:
+**
+** May you do good and not evil.
+** May you find forgiveness for yourself and forgive others.
+** May you share freely, never taking more than you give.
+**
+******************************************************************************
+**
+** This SQLite extension implements the delta functions used by the RBU
+** extension. Three scalar functions and one table-valued function are
+** implemented here:
+**
+** delta_apply(X,D) -- apply delta D to file X and return the result
+** delta_create(X,Y) -- compute and return a delta that carries X into Y
+** delta_output_size(D) -- blob size in bytes output from applying delta D
+** delta_parse(D) -- returns rows describing delta D
+**
+** The delta format is the Fossil delta format, described in a comment
+** on the delete_create() function implementation below, and also at
+**
+** https://www.fossil-scm.org/fossil/doc/trunk/www/delta_format.wiki
+**
+** This delta format is used by the RBU extension, which is the main
+** reason that these routines are included in the extension library.
+** RBU does not use this extension directly. Rather, this extension is
+** provided as a convenience to developers who want to analyze RBU files
+** that contain deltas.
+*/
+#include <string.h>
+#include <assert.h>
+#include <stdlib.h>
+#include "sqlite3ext.h"
+SQLITE_EXTENSION_INIT1
+
+#ifndef SQLITE_AMALGAMATION
+/*
+** The "u32" type must be an unsigned 32-bit integer. Adjust this
+*/
+typedef unsigned int u32;
+
+/*
+** Must be a 16-bit value
+*/
+typedef short int s16;
+typedef unsigned short int u16;
+
+#endif /* SQLITE_AMALGAMATION */
+
+
+/*
+** The width of a hash window in bytes. The algorithm only works if this
+** is a power of 2.
+*/
+#define NHASH 16
+
+/*
+** The current state of the rolling hash.
+**
+** z[] holds the values that have been hashed. z[] is a circular buffer.
+** z[i] is the first entry and z[(i+NHASH-1)%NHASH] is the last entry of
+** the window.
+**
+** Hash.a is the sum of all elements of hash.z[]. Hash.b is a weighted
+** sum. Hash.b is z[i]*NHASH + z[i+1]*(NHASH-1) + ... + z[i+NHASH-1]*1.
+** (Each index for z[] should be module NHASH, of course. The %NHASH operator
+** is omitted in the prior expression for brevity.)
+*/
+typedef struct hash hash;
+struct hash {
+ u16 a, b; /* Hash values */
+ u16 i; /* Start of the hash window */
+ char z[NHASH]; /* The values that have been hashed */
+};
+
+/*
+** Initialize the rolling hash using the first NHASH characters of z[]
+*/
+static void hash_init(hash *pHash, const char *z){
+ u16 a, b, i;
+ a = b = z[0];
+ for(i=1; i<NHASH; i++){
+ a += z[i];
+ b += a;
+ }
+ memcpy(pHash->z, z, NHASH);
+ pHash->a = a & 0xffff;
+ pHash->b = b & 0xffff;
+ pHash->i = 0;
+}
+
+/*
+** Advance the rolling hash by a single character "c"
+*/
+static void hash_next(hash *pHash, int c){
+ u16 old = pHash->z[pHash->i];
+ pHash->z[pHash->i] = c;
+ pHash->i = (pHash->i+1)&(NHASH-1);
+ pHash->a = pHash->a - old + c;
+ pHash->b = pHash->b - NHASH*old + pHash->a;
+}
+
+/*
+** Return a 32-bit hash value
+*/
+static u32 hash_32bit(hash *pHash){
+ return (pHash->a & 0xffff) | (((u32)(pHash->b & 0xffff))<<16);
+}
+
+/*
+** Compute a hash on NHASH bytes.
+**
+** This routine is intended to be equivalent to:
+** hash h;
+** hash_init(&h, zInput);
+** return hash_32bit(&h);
+*/
+static u32 hash_once(const char *z){
+ u16 a, b, i;
+ a = b = z[0];
+ for(i=1; i<NHASH; i++){
+ a += z[i];
+ b += a;
+ }
+ return a | (((u32)b)<<16);
+}
+
+/*
+** Write an base-64 integer into the given buffer.
+*/
+static void putInt(unsigned int v, char **pz){
+ static const char zDigits[] =
+ "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz~";
+ /* 123456789 123456789 123456789 123456789 123456789 123456789 123 */
+ int i, j;
+ char zBuf[20];
+ if( v==0 ){
+ *(*pz)++ = '0';
+ return;
+ }
+ for(i=0; v>0; i++, v>>=6){
+ zBuf[i] = zDigits[v&0x3f];
+ }
+ for(j=i-1; j>=0; j--){
+ *(*pz)++ = zBuf[j];
+ }
+}
+
+/*
+** Read bytes from *pz and convert them into a positive integer. When
+** finished, leave *pz pointing to the first character past the end of
+** the integer. The *pLen parameter holds the length of the string
+** in *pz and is decremented once for each character in the integer.
+*/
+static unsigned int deltaGetInt(const char **pz, int *pLen){
+ static const signed char zValue[] = {
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1,
+ -1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
+ 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, -1, -1, -1, -1, 36,
+ -1, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51,
+ 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, -1, -1, -1, 63, -1,
+ };
+ unsigned int v = 0;
+ int c;
+ unsigned char *z = (unsigned char*)*pz;
+ unsigned char *zStart = z;
+ while( (c = zValue[0x7f&*(z++)])>=0 ){
+ v = (v<<6) + c;
+ }
+ z--;
+ *pLen -= z - zStart;
+ *pz = (char*)z;
+ return v;
+}
+
+/*
+** Return the number digits in the base-64 representation of a positive integer
+*/
+static int digit_count(int v){
+ unsigned int i, x;
+ for(i=1, x=64; v>=x; i++, x <<= 6){}
+ return i;
+}
+
+#ifdef __GNUC__
+# define GCC_VERSION (__GNUC__*1000000+__GNUC_MINOR__*1000+__GNUC_PATCHLEVEL__)
+#else
+# define GCC_VERSION 0
+#endif
+
+/*
+** Compute a 32-bit big-endian checksum on the N-byte buffer. If the
+** buffer is not a multiple of 4 bytes length, compute the sum that would
+** have occurred if the buffer was padded with zeros to the next multiple
+** of four bytes.
+*/
+static unsigned int checksum(const char *zIn, size_t N){
+ static const int byteOrderTest = 1;
+ const unsigned char *z = (const unsigned char *)zIn;
+ const unsigned char *zEnd = (const unsigned char*)&zIn[N&~3];
+ unsigned sum = 0;
+ assert( (z - (const unsigned char*)0)%4==0 ); /* Four-byte alignment */
+ if( 0==*(char*)&byteOrderTest ){
+ /* This is a big-endian machine */
+ while( z<zEnd ){
+ sum += *(unsigned*)z;
+ z += 4;
+ }
+ }else{
+ /* A little-endian machine */
+#if GCC_VERSION>=4003000
+ while( z<zEnd ){
+ sum += __builtin_bswap32(*(unsigned*)z);
+ z += 4;
+ }
+#elif defined(_MSC_VER) && _MSC_VER>=1300
+ while( z<zEnd ){
+ sum += _byteswap_ulong(*(unsigned*)z);
+ z += 4;
+ }
+#else
+ unsigned sum0 = 0;
+ unsigned sum1 = 0;
+ unsigned sum2 = 0;
+ while(N >= 16){
+ sum0 += ((unsigned)z[0] + z[4] + z[8] + z[12]);
+ sum1 += ((unsigned)z[1] + z[5] + z[9] + z[13]);
+ sum2 += ((unsigned)z[2] + z[6] + z[10]+ z[14]);
+ sum += ((unsigned)z[3] + z[7] + z[11]+ z[15]);
+ z += 16;
+ N -= 16;
+ }
+ while(N >= 4){
+ sum0 += z[0];
+ sum1 += z[1];
+ sum2 += z[2];
+ sum += z[3];
+ z += 4;
+ N -= 4;
+ }
+ sum += (sum2 << 8) + (sum1 << 16) + (sum0 << 24);
+#endif
+ }
+ switch(N&3){
+ case 3: sum += (z[2] << 8);
+ case 2: sum += (z[1] << 16);
+ case 1: sum += (z[0] << 24);
+ default: ;
+ }
+ return sum;
+}
+
+/*
+** Create a new delta.
+**
+** The delta is written into a preallocated buffer, zDelta, which
+** should be at least 60 bytes longer than the target file, zOut.
+** The delta string will be NUL-terminated, but it might also contain
+** embedded NUL characters if either the zSrc or zOut files are
+** binary. This function returns the length of the delta string
+** in bytes, excluding the final NUL terminator character.
+**
+** Output Format:
+**
+** The delta begins with a base64 number followed by a newline. This
+** number is the number of bytes in the TARGET file. Thus, given a
+** delta file z, a program can compute the size of the output file
+** simply by reading the first line and decoding the base-64 number
+** found there. The delta_output_size() routine does exactly this.
+**
+** After the initial size number, the delta consists of a series of
+** literal text segments and commands to copy from the SOURCE file.
+** A copy command looks like this:
+**
+** NNN@MMM,
+**
+** where NNN is the number of bytes to be copied and MMM is the offset
+** into the source file of the first byte (both base-64). If NNN is 0
+** it means copy the rest of the input file. Literal text is like this:
+**
+** NNN:TTTTT
+**
+** where NNN is the number of bytes of text (base-64) and TTTTT is the text.
+**
+** The last term is of the form
+**
+** NNN;
+**
+** In this case, NNN is a 32-bit bigendian checksum of the output file
+** that can be used to verify that the delta applied correctly. All
+** numbers are in base-64.
+**
+** Pure text files generate a pure text delta. Binary files generate a
+** delta that may contain some binary data.
+**
+** Algorithm:
+**
+** The encoder first builds a hash table to help it find matching
+** patterns in the source file. 16-byte chunks of the source file
+** sampled at evenly spaced intervals are used to populate the hash
+** table.
+**
+** Next we begin scanning the target file using a sliding 16-byte
+** window. The hash of the 16-byte window in the target is used to
+** search for a matching section in the source file. When a match
+** is found, a copy command is added to the delta. An effort is
+** made to extend the matching section to regions that come before
+** and after the 16-byte hash window. A copy command is only issued
+** if the result would use less space that just quoting the text
+** literally. Literal text is added to the delta for sections that
+** do not match or which can not be encoded efficiently using copy
+** commands.
+*/
+static int delta_create(
+ const char *zSrc, /* The source or pattern file */
+ unsigned int lenSrc, /* Length of the source file */
+ const char *zOut, /* The target file */
+ unsigned int lenOut, /* Length of the target file */
+ char *zDelta /* Write the delta into this buffer */
+){
+ int i, base;
+ char *zOrigDelta = zDelta;
+ hash h;
+ int nHash; /* Number of hash table entries */
+ int *landmark; /* Primary hash table */
+ int *collide; /* Collision chain */
+ int lastRead = -1; /* Last byte of zSrc read by a COPY command */
+
+ /* Add the target file size to the beginning of the delta
+ */
+ putInt(lenOut, &zDelta);
+ *(zDelta++) = '\n';
+
+ /* If the source file is very small, it means that we have no
+ ** chance of ever doing a copy command. Just output a single
+ ** literal segment for the entire target and exit.
+ */
+ if( lenSrc<=NHASH ){
+ putInt(lenOut, &zDelta);
+ *(zDelta++) = ':';
+ memcpy(zDelta, zOut, lenOut);
+ zDelta += lenOut;
+ putInt(checksum(zOut, lenOut), &zDelta);
+ *(zDelta++) = ';';
+ return zDelta - zOrigDelta;
+ }
+
+ /* Compute the hash table used to locate matching sections in the
+ ** source file.
+ */
+ nHash = lenSrc/NHASH;
+ collide = sqlite3_malloc64( (sqlite3_int64)nHash*2*sizeof(int) );
+ memset(collide, -1, nHash*2*sizeof(int));
+ landmark = &collide[nHash];
+ for(i=0; i<lenSrc-NHASH; i+=NHASH){
+ int hv = hash_once(&zSrc[i]) % nHash;
+ collide[i/NHASH] = landmark[hv];
+ landmark[hv] = i/NHASH;
+ }
+
+ /* Begin scanning the target file and generating copy commands and
+ ** literal sections of the delta.
+ */
+ base = 0; /* We have already generated everything before zOut[base] */
+ while( base+NHASH<lenOut ){
+ int iSrc, iBlock;
+ unsigned int bestCnt, bestOfst=0, bestLitsz=0;
+ hash_init(&h, &zOut[base]);
+ i = 0; /* Trying to match a landmark against zOut[base+i] */
+ bestCnt = 0;
+ while( 1 ){
+ int hv;
+ int limit = 250;
+
+ hv = hash_32bit(&h) % nHash;
+ iBlock = landmark[hv];
+ while( iBlock>=0 && (limit--)>0 ){
+ /*
+ ** The hash window has identified a potential match against
+ ** landmark block iBlock. But we need to investigate further.
+ **
+ ** Look for a region in zOut that matches zSrc. Anchor the search
+ ** at zSrc[iSrc] and zOut[base+i]. Do not include anything prior to
+ ** zOut[base] or after zOut[outLen] nor anything after zSrc[srcLen].
+ **
+ ** Set cnt equal to the length of the match and set ofst so that
+ ** zSrc[ofst] is the first element of the match. litsz is the number
+ ** of characters between zOut[base] and the beginning of the match.
+ ** sz will be the overhead (in bytes) needed to encode the copy
+ ** command. Only generate copy command if the overhead of the
+ ** copy command is less than the amount of literal text to be copied.
+ */
+ int cnt, ofst, litsz;
+ int j, k, x, y;
+ int sz;
+ int limitX;
+
+ /* Beginning at iSrc, match forwards as far as we can. j counts
+ ** the number of characters that match */
+ iSrc = iBlock*NHASH;
+ y = base+i;
+ limitX = ( lenSrc-iSrc <= lenOut-y ) ? lenSrc : iSrc + lenOut - y;
+ for(x=iSrc; x<limitX; x++, y++){
+ if( zSrc[x]!=zOut[y] ) break;
+ }
+ j = x - iSrc - 1;
+
+ /* Beginning at iSrc-1, match backwards as far as we can. k counts
+ ** the number of characters that match */
+ for(k=1; k<iSrc && k<=i; k++){
+ if( zSrc[iSrc-k]!=zOut[base+i-k] ) break;
+ }
+ k--;
+
+ /* Compute the offset and size of the matching region */
+ ofst = iSrc-k;
+ cnt = j+k+1;
+ litsz = i-k; /* Number of bytes of literal text before the copy */
+ /* sz will hold the number of bytes needed to encode the "insert"
+ ** command and the copy command, not counting the "insert" text */
+ sz = digit_count(i-k)+digit_count(cnt)+digit_count(ofst)+3;
+ if( cnt>=sz && cnt>bestCnt ){
+ /* Remember this match only if it is the best so far and it
+ ** does not increase the file size */
+ bestCnt = cnt;
+ bestOfst = iSrc-k;
+ bestLitsz = litsz;
+ }
+
+ /* Check the next matching block */
+ iBlock = collide[iBlock];
+ }
+
+ /* We have a copy command that does not cause the delta to be larger
+ ** than a literal insert. So add the copy command to the delta.
+ */
+ if( bestCnt>0 ){
+ if( bestLitsz>0 ){
+ /* Add an insert command before the copy */
+ putInt(bestLitsz,&zDelta);
+ *(zDelta++) = ':';
+ memcpy(zDelta, &zOut[base], bestLitsz);
+ zDelta += bestLitsz;
+ base += bestLitsz;
+ }
+ base += bestCnt;
+ putInt(bestCnt, &zDelta);
+ *(zDelta++) = '@';
+ putInt(bestOfst, &zDelta);
+ *(zDelta++) = ',';
+ if( bestOfst + bestCnt -1 > lastRead ){
+ lastRead = bestOfst + bestCnt - 1;
+ }
+ bestCnt = 0;
+ break;
+ }
+
+ /* If we reach this point, it means no match is found so far */
+ if( base+i+NHASH>=lenOut ){
+ /* We have reached the end of the file and have not found any
+ ** matches. Do an "insert" for everything that does not match */
+ putInt(lenOut-base, &zDelta);
+ *(zDelta++) = ':';
+ memcpy(zDelta, &zOut[base], lenOut-base);
+ zDelta += lenOut-base;
+ base = lenOut;
+ break;
+ }
+
+ /* Advance the hash by one character. Keep looking for a match */
+ hash_next(&h, zOut[base+i+NHASH]);
+ i++;
+ }
+ }
+ /* Output a final "insert" record to get all the text at the end of
+ ** the file that does not match anything in the source file.
+ */
+ if( base<lenOut ){
+ putInt(lenOut-base, &zDelta);
+ *(zDelta++) = ':';
+ memcpy(zDelta, &zOut[base], lenOut-base);
+ zDelta += lenOut-base;
+ }
+ /* Output the final checksum record. */
+ putInt(checksum(zOut, lenOut), &zDelta);
+ *(zDelta++) = ';';
+ sqlite3_free(collide);
+ return zDelta - zOrigDelta;
+}
+
+/*
+** Return the size (in bytes) of the output from applying
+** a delta.
+**
+** This routine is provided so that an procedure that is able
+** to call delta_apply() can learn how much space is required
+** for the output and hence allocate nor more space that is really
+** needed.
+*/
+static int delta_output_size(const char *zDelta, int lenDelta){
+ int size;
+ size = deltaGetInt(&zDelta, &lenDelta);
+ if( *zDelta!='\n' ){
+ /* ERROR: size integer not terminated by "\n" */
+ return -1;
+ }
+ return size;
+}
+
+
+/*
+** Apply a delta.
+**
+** The output buffer should be big enough to hold the whole output
+** file and a NUL terminator at the end. The delta_output_size()
+** routine will determine this size for you.
+**
+** The delta string should be null-terminated. But the delta string
+** may contain embedded NUL characters (if the input and output are
+** binary files) so we also have to pass in the length of the delta in
+** the lenDelta parameter.
+**
+** This function returns the size of the output file in bytes (excluding
+** the final NUL terminator character). Except, if the delta string is
+** malformed or intended for use with a source file other than zSrc,
+** then this routine returns -1.
+**
+** Refer to the delta_create() documentation above for a description
+** of the delta file format.
+*/
+static int delta_apply(
+ const char *zSrc, /* The source or pattern file */
+ int lenSrc, /* Length of the source file */
+ const char *zDelta, /* Delta to apply to the pattern */
+ int lenDelta, /* Length of the delta */
+ char *zOut /* Write the output into this preallocated buffer */
+){
+ unsigned int limit;
+ unsigned int total = 0;
+#ifdef FOSSIL_ENABLE_DELTA_CKSUM_TEST
+ char *zOrigOut = zOut;
+#endif
+
+ limit = deltaGetInt(&zDelta, &lenDelta);
+ if( *zDelta!='\n' ){
+ /* ERROR: size integer not terminated by "\n" */
+ return -1;
+ }
+ zDelta++; lenDelta--;
+ while( *zDelta && lenDelta>0 ){
+ unsigned int cnt, ofst;
+ cnt = deltaGetInt(&zDelta, &lenDelta);
+ switch( zDelta[0] ){
+ case '@': {
+ zDelta++; lenDelta--;
+ ofst = deltaGetInt(&zDelta, &lenDelta);
+ if( lenDelta>0 && zDelta[0]!=',' ){
+ /* ERROR: copy command not terminated by ',' */
+ return -1;
+ }
+ zDelta++; lenDelta--;
+ total += cnt;
+ if( total>limit ){
+ /* ERROR: copy exceeds output file size */
+ return -1;
+ }
+ if( ofst+cnt > lenSrc ){
+ /* ERROR: copy extends past end of input */
+ return -1;
+ }
+ memcpy(zOut, &zSrc[ofst], cnt);
+ zOut += cnt;
+ break;
+ }
+ case ':': {
+ zDelta++; lenDelta--;
+ total += cnt;
+ if( total>limit ){
+ /* ERROR: insert command gives an output larger than predicted */
+ return -1;
+ }
+ if( cnt>lenDelta ){
+ /* ERROR: insert count exceeds size of delta */
+ return -1;
+ }
+ memcpy(zOut, zDelta, cnt);
+ zOut += cnt;
+ zDelta += cnt;
+ lenDelta -= cnt;
+ break;
+ }
+ case ';': {
+ zDelta++; lenDelta--;
+ zOut[0] = 0;
+#ifdef FOSSIL_ENABLE_DELTA_CKSUM_TEST
+ if( cnt!=checksum(zOrigOut, total) ){
+ /* ERROR: bad checksum */
+ return -1;
+ }
+#endif
+ if( total!=limit ){
+ /* ERROR: generated size does not match predicted size */
+ return -1;
+ }
+ return total;
+ }
+ default: {
+ /* ERROR: unknown delta operator */
+ return -1;
+ }
+ }
+ }
+ /* ERROR: unterminated delta */
+ return -1;
+}
+
+/*
+** SQL functions: delta_create(X,Y)
+**
+** Return a delta for carrying X into Y.
+*/
+static void deltaCreateFunc(
+ sqlite3_context *context,
+ int argc,
+ sqlite3_value **argv
+){
+ const char *aOrig; int nOrig; /* old blob */
+ const char *aNew; int nNew; /* new blob */
+ char *aOut; int nOut; /* output delta */
+
+ assert( argc==2 );
+ if( sqlite3_value_type(argv[0])==SQLITE_NULL ) return;
+ if( sqlite3_value_type(argv[1])==SQLITE_NULL ) return;
+ nOrig = sqlite3_value_bytes(argv[0]);
+ aOrig = (const char*)sqlite3_value_blob(argv[0]);
+ nNew = sqlite3_value_bytes(argv[1]);
+ aNew = (const char*)sqlite3_value_blob(argv[1]);
+ aOut = sqlite3_malloc64(nNew+70);
+ if( aOut==0 ){
+ sqlite3_result_error_nomem(context);
+ }else{
+ nOut = delta_create(aOrig, nOrig, aNew, nNew, aOut);
+ if( nOut<0 ){
+ sqlite3_free(aOut);
+ sqlite3_result_error(context, "cannot create fossil delta", -1);
+ }else{
+ sqlite3_result_blob(context, aOut, nOut, sqlite3_free);
+ }
+ }
+}
+
+/*
+** SQL functions: delta_apply(X,D)
+**
+** Return the result of applying delta D to input X.
+*/
+static void deltaApplyFunc(
+ sqlite3_context *context,
+ int argc,
+ sqlite3_value **argv
+){
+ const char *aOrig; int nOrig; /* The X input */
+ const char *aDelta; int nDelta; /* The input delta (D) */
+ char *aOut; int nOut, nOut2; /* The output */
+
+ assert( argc==2 );
+ if( sqlite3_value_type(argv[0])==SQLITE_NULL ) return;
+ if( sqlite3_value_type(argv[1])==SQLITE_NULL ) return;
+ nOrig = sqlite3_value_bytes(argv[0]);
+ aOrig = (const char*)sqlite3_value_blob(argv[0]);
+ nDelta = sqlite3_value_bytes(argv[1]);
+ aDelta = (const char*)sqlite3_value_blob(argv[1]);
+
+ /* Figure out the size of the output */
+ nOut = delta_output_size(aDelta, nDelta);
+ if( nOut<0 ){
+ sqlite3_result_error(context, "corrupt fossil delta", -1);
+ return;
+ }
+ aOut = sqlite3_malloc64((sqlite3_int64)nOut+1);
+ if( aOut==0 ){
+ sqlite3_result_error_nomem(context);
+ }else{
+ nOut2 = delta_apply(aOrig, nOrig, aDelta, nDelta, aOut);
+ if( nOut2!=nOut ){
+ sqlite3_free(aOut);
+ sqlite3_result_error(context, "corrupt fossil delta", -1);
+ }else{
+ sqlite3_result_blob(context, aOut, nOut, sqlite3_free);
+ }
+ }
+}
+
+
+/*
+** SQL functions: delta_output_size(D)
+**
+** Return the size of the output that results from applying delta D.
+*/
+static void deltaOutputSizeFunc(
+ sqlite3_context *context,
+ int argc,
+ sqlite3_value **argv
+){
+ const char *aDelta; int nDelta; /* The input delta (D) */
+ int nOut; /* Size of output */
+ assert( argc==1 );
+ if( sqlite3_value_type(argv[0])==SQLITE_NULL ) return;
+ nDelta = sqlite3_value_bytes(argv[0]);
+ aDelta = (const char*)sqlite3_value_blob(argv[0]);
+
+ /* Figure out the size of the output */
+ nOut = delta_output_size(aDelta, nDelta);
+ if( nOut<0 ){
+ sqlite3_result_error(context, "corrupt fossil delta", -1);
+ return;
+ }else{
+ sqlite3_result_int(context, nOut);
+ }
+}
+
+/*****************************************************************************
+** Table-valued SQL function: delta_parse(DELTA)
+**
+** Schema:
+**
+** CREATE TABLE delta_parse(
+** op TEXT,
+** a1 INT,
+** a2 ANY,
+** delta HIDDEN BLOB
+** );
+**
+** Given an input DELTA, this function parses the delta and returns
+** rows for each entry in the delta. The op column has one of the
+** values SIZE, COPY, INSERT, CHECKSUM, ERROR.
+**
+** Assuming no errors, the first row has op='SIZE'. a1 is the size of
+** the output in bytes and a2 is NULL.
+**
+** After the initial SIZE row, there are zero or more 'COPY' and/or 'INSERT'
+** rows. A COPY row means content is copied from the source into the
+** output. Column a1 is the number of bytes to copy and a2 is the offset
+** into source from which to begin copying. An INSERT row means to
+** insert text into the output stream. Column a1 is the number of bytes
+** to insert and column is a BLOB that contains the text to be inserted.
+**
+** The last row of a well-formed delta will have an op value of 'CHECKSUM'.
+** The a1 column will be the value of the checksum and a2 will be NULL.
+**
+** If the input delta is not well-formed, then a row with an op value
+** of 'ERROR' is returned. The a1 value of the ERROR row is the offset
+** into the delta where the error was encountered and a2 is NULL.
+*/
+typedef struct deltaparsevtab_vtab deltaparsevtab_vtab;
+typedef struct deltaparsevtab_cursor deltaparsevtab_cursor;
+struct deltaparsevtab_vtab {
+ sqlite3_vtab base; /* Base class - must be first */
+ /* No additional information needed */
+};
+struct deltaparsevtab_cursor {
+ sqlite3_vtab_cursor base; /* Base class - must be first */
+ char *aDelta; /* The delta being parsed */
+ int nDelta; /* Number of bytes in the delta */
+ int iCursor; /* Current cursor location */
+ int eOp; /* Name of current operator */
+ unsigned int a1, a2; /* Arguments to current operator */
+ int iNext; /* Next cursor value */
+};
+
+/* Operator names:
+*/
+static const char *azOp[] = {
+ "SIZE", "COPY", "INSERT", "CHECKSUM", "ERROR", "EOF"
+};
+#define DELTAPARSE_OP_SIZE 0
+#define DELTAPARSE_OP_COPY 1
+#define DELTAPARSE_OP_INSERT 2
+#define DELTAPARSE_OP_CHECKSUM 3
+#define DELTAPARSE_OP_ERROR 4
+#define DELTAPARSE_OP_EOF 5
+
+/*
+** The deltaparsevtabConnect() method is invoked to create a new
+** deltaparse virtual table.
+**
+** Think of this routine as the constructor for deltaparsevtab_vtab objects.
+**
+** All this routine needs to do is:
+**
+** (1) Allocate the deltaparsevtab_vtab object and initialize all fields.
+**
+** (2) Tell SQLite (via the sqlite3_declare_vtab() interface) what the
+** result set of queries against the virtual table will look like.
+*/
+static int deltaparsevtabConnect(
+ sqlite3 *db,
+ void *pAux,
+ int argc, const char *const*argv,
+ sqlite3_vtab **ppVtab,
+ char **pzErr
+){
+ deltaparsevtab_vtab *pNew;
+ int rc;
+
+ rc = sqlite3_declare_vtab(db,
+ "CREATE TABLE x(op,a1,a2,delta HIDDEN)"
+ );
+ /* For convenience, define symbolic names for the index to each column. */
+#define DELTAPARSEVTAB_OP 0
+#define DELTAPARSEVTAB_A1 1
+#define DELTAPARSEVTAB_A2 2
+#define DELTAPARSEVTAB_DELTA 3
+ if( rc==SQLITE_OK ){
+ pNew = sqlite3_malloc64( sizeof(*pNew) );
+ *ppVtab = (sqlite3_vtab*)pNew;
+ if( pNew==0 ) return SQLITE_NOMEM;
+ memset(pNew, 0, sizeof(*pNew));
+ sqlite3_vtab_config(db, SQLITE_VTAB_INNOCUOUS);
+ }
+ return rc;
+}
+
+/*
+** This method is the destructor for deltaparsevtab_vtab objects.
+*/
+static int deltaparsevtabDisconnect(sqlite3_vtab *pVtab){
+ deltaparsevtab_vtab *p = (deltaparsevtab_vtab*)pVtab;
+ sqlite3_free(p);
+ return SQLITE_OK;
+}
+
+/*
+** Constructor for a new deltaparsevtab_cursor object.
+*/
+static int deltaparsevtabOpen(sqlite3_vtab *p, sqlite3_vtab_cursor **ppCursor){
+ deltaparsevtab_cursor *pCur;
+ pCur = sqlite3_malloc( sizeof(*pCur) );
+ if( pCur==0 ) return SQLITE_NOMEM;
+ memset(pCur, 0, sizeof(*pCur));
+ *ppCursor = &pCur->base;
+ return SQLITE_OK;
+}
+
+/*
+** Destructor for a deltaparsevtab_cursor.
+*/
+static int deltaparsevtabClose(sqlite3_vtab_cursor *cur){
+ deltaparsevtab_cursor *pCur = (deltaparsevtab_cursor*)cur;
+ sqlite3_free(pCur->aDelta);
+ sqlite3_free(pCur);
+ return SQLITE_OK;
+}
+
+
+/*
+** Advance a deltaparsevtab_cursor to its next row of output.
+*/
+static int deltaparsevtabNext(sqlite3_vtab_cursor *cur){
+ deltaparsevtab_cursor *pCur = (deltaparsevtab_cursor*)cur;
+ const char *z;
+ int i = 0;
+
+ pCur->iCursor = pCur->iNext;
+ z = pCur->aDelta + pCur->iCursor;
+ pCur->a1 = deltaGetInt(&z, &i);
+ switch( z[0] ){
+ case '@': {
+ z++;
+ pCur->a2 = deltaGetInt(&z, &i);
+ pCur->eOp = DELTAPARSE_OP_COPY;
+ pCur->iNext = (int)(&z[1] - pCur->aDelta);
+ break;
+ }
+ case ':': {
+ z++;
+ pCur->a2 = (unsigned int)(z - pCur->aDelta);
+ pCur->eOp = DELTAPARSE_OP_INSERT;
+ pCur->iNext = (int)(&z[pCur->a1] - pCur->aDelta);
+ break;
+ }
+ case ';': {
+ pCur->eOp = DELTAPARSE_OP_CHECKSUM;
+ pCur->iNext = pCur->nDelta;
+ break;
+ }
+ default: {
+ if( pCur->iNext==pCur->nDelta ){
+ pCur->eOp = DELTAPARSE_OP_EOF;
+ }else{
+ pCur->eOp = DELTAPARSE_OP_ERROR;
+ pCur->iNext = pCur->nDelta;
+ }
+ break;
+ }
+ }
+ return SQLITE_OK;
+}
+
+/*
+** Return values of columns for the row at which the deltaparsevtab_cursor
+** is currently pointing.
+*/
+static int deltaparsevtabColumn(
+ sqlite3_vtab_cursor *cur, /* The cursor */
+ sqlite3_context *ctx, /* First argument to sqlite3_result_...() */
+ int i /* Which column to return */
+){
+ deltaparsevtab_cursor *pCur = (deltaparsevtab_cursor*)cur;
+ switch( i ){
+ case DELTAPARSEVTAB_OP: {
+ sqlite3_result_text(ctx, azOp[pCur->eOp], -1, SQLITE_STATIC);
+ break;
+ }
+ case DELTAPARSEVTAB_A1: {
+ sqlite3_result_int(ctx, pCur->a1);
+ break;
+ }
+ case DELTAPARSEVTAB_A2: {
+ if( pCur->eOp==DELTAPARSE_OP_COPY ){
+ sqlite3_result_int(ctx, pCur->a2);
+ }else if( pCur->eOp==DELTAPARSE_OP_INSERT ){
+ sqlite3_result_blob(ctx, pCur->aDelta+pCur->a2, pCur->a1,
+ SQLITE_TRANSIENT);
+ }
+ break;
+ }
+ case DELTAPARSEVTAB_DELTA: {
+ sqlite3_result_blob(ctx, pCur->aDelta, pCur->nDelta, SQLITE_TRANSIENT);
+ break;
+ }
+ }
+ return SQLITE_OK;
+}
+
+/*
+** Return the rowid for the current row. In this implementation, the
+** rowid is the same as the output value.
+*/
+static int deltaparsevtabRowid(sqlite3_vtab_cursor *cur, sqlite_int64 *pRowid){
+ deltaparsevtab_cursor *pCur = (deltaparsevtab_cursor*)cur;
+ *pRowid = pCur->iCursor;
+ return SQLITE_OK;
+}
+
+/*
+** Return TRUE if the cursor has been moved off of the last
+** row of output.
+*/
+static int deltaparsevtabEof(sqlite3_vtab_cursor *cur){
+ deltaparsevtab_cursor *pCur = (deltaparsevtab_cursor*)cur;
+ return pCur->eOp==DELTAPARSE_OP_EOF;
+}
+
+/*
+** This method is called to "rewind" the deltaparsevtab_cursor object back
+** to the first row of output. This method is always called at least
+** once prior to any call to deltaparsevtabColumn() or deltaparsevtabRowid() or
+** deltaparsevtabEof().
+*/
+static int deltaparsevtabFilter(
+ sqlite3_vtab_cursor *pVtabCursor,
+ int idxNum, const char *idxStr,
+ int argc, sqlite3_value **argv
+){
+ deltaparsevtab_cursor *pCur = (deltaparsevtab_cursor *)pVtabCursor;
+ const char *a;
+ int i = 0;
+ pCur->eOp = DELTAPARSE_OP_ERROR;
+ if( idxNum!=1 ){
+ return SQLITE_OK;
+ }
+ pCur->nDelta = sqlite3_value_bytes(argv[0]);
+ a = (const char*)sqlite3_value_blob(argv[0]);
+ if( pCur->nDelta==0 || a==0 ){
+ return SQLITE_OK;
+ }
+ pCur->aDelta = sqlite3_malloc64( pCur->nDelta+1 );
+ if( pCur->aDelta==0 ){
+ pCur->nDelta = 0;
+ return SQLITE_NOMEM;
+ }
+ memcpy(pCur->aDelta, a, pCur->nDelta);
+ pCur->aDelta[pCur->nDelta] = 0;
+ a = pCur->aDelta;
+ pCur->eOp = DELTAPARSE_OP_SIZE;
+ pCur->a1 = deltaGetInt(&a, &i);
+ if( a[0]!='\n' ){
+ pCur->eOp = DELTAPARSE_OP_ERROR;
+ pCur->a1 = pCur->a2 = 0;
+ pCur->iNext = pCur->nDelta;
+ return SQLITE_OK;
+ }
+ a++;
+ pCur->iNext = (unsigned int)(a - pCur->aDelta);
+ return SQLITE_OK;
+}
+
+/*
+** SQLite will invoke this method one or more times while planning a query
+** that uses the virtual table. This routine needs to create
+** a query plan for each invocation and compute an estimated cost for that
+** plan.
+*/
+static int deltaparsevtabBestIndex(
+ sqlite3_vtab *tab,
+ sqlite3_index_info *pIdxInfo
+){
+ int i;
+ for(i=0; i<pIdxInfo->nConstraint; i++){
+ if( pIdxInfo->aConstraint[i].iColumn != DELTAPARSEVTAB_DELTA ) continue;
+ if( pIdxInfo->aConstraint[i].usable==0 ) continue;
+ if( pIdxInfo->aConstraint[i].op!=SQLITE_INDEX_CONSTRAINT_EQ ) continue;
+ pIdxInfo->aConstraintUsage[i].argvIndex = 1;
+ pIdxInfo->aConstraintUsage[i].omit = 1;
+ pIdxInfo->estimatedCost = (double)1;
+ pIdxInfo->estimatedRows = 10;
+ pIdxInfo->idxNum = 1;
+ return SQLITE_OK;
+ }
+ pIdxInfo->idxNum = 0;
+ pIdxInfo->estimatedCost = (double)0x7fffffff;
+ pIdxInfo->estimatedRows = 0x7fffffff;
+ return SQLITE_CONSTRAINT;
+}
+
+/*
+** This following structure defines all the methods for the
+** virtual table.
+*/
+static sqlite3_module deltaparsevtabModule = {
+ /* iVersion */ 0,
+ /* xCreate */ 0,
+ /* xConnect */ deltaparsevtabConnect,
+ /* xBestIndex */ deltaparsevtabBestIndex,
+ /* xDisconnect */ deltaparsevtabDisconnect,
+ /* xDestroy */ 0,
+ /* xOpen */ deltaparsevtabOpen,
+ /* xClose */ deltaparsevtabClose,
+ /* xFilter */ deltaparsevtabFilter,
+ /* xNext */ deltaparsevtabNext,
+ /* xEof */ deltaparsevtabEof,
+ /* xColumn */ deltaparsevtabColumn,
+ /* xRowid */ deltaparsevtabRowid,
+ /* xUpdate */ 0,
+ /* xBegin */ 0,
+ /* xSync */ 0,
+ /* xCommit */ 0,
+ /* xRollback */ 0,
+ /* xFindMethod */ 0,
+ /* xRename */ 0,
+ /* xSavepoint */ 0,
+ /* xRelease */ 0,
+ /* xRollbackTo */ 0,
+ /* xShadowName */ 0
+};
+
+
+
+#ifdef _WIN32
+__declspec(dllexport)
+#endif
+int sqlite3_fossildelta_init(
+ sqlite3 *db,
+ char **pzErrMsg,
+ const sqlite3_api_routines *pApi
+){
+ static const int enc = SQLITE_UTF8|SQLITE_INNOCUOUS;
+ int rc = SQLITE_OK;
+ SQLITE_EXTENSION_INIT2(pApi);
+ (void)pzErrMsg; /* Unused parameter */
+ rc = sqlite3_create_function(db, "delta_create", 2, enc, 0,
+ deltaCreateFunc, 0, 0);
+ if( rc==SQLITE_OK ){
+ rc = sqlite3_create_function(db, "delta_apply", 2, enc, 0,
+ deltaApplyFunc, 0, 0);
+ }
+ if( rc==SQLITE_OK ){
+ rc = sqlite3_create_function(db, "delta_output_size", 1, enc, 0,
+ deltaOutputSizeFunc, 0, 0);
+ }
+ if( rc==SQLITE_OK ){
+ rc = sqlite3_create_module(db, "delta_parse", &deltaparsevtabModule, 0);
+ }
+ return rc;
+}