summaryrefslogtreecommitdiffstats
path: root/ext/fts5/fts5_varint.c
diff options
context:
space:
mode:
Diffstat (limited to 'ext/fts5/fts5_varint.c')
-rw-r--r--ext/fts5/fts5_varint.c344
1 files changed, 344 insertions, 0 deletions
diff --git a/ext/fts5/fts5_varint.c b/ext/fts5/fts5_varint.c
new file mode 100644
index 0000000..12fac2d
--- /dev/null
+++ b/ext/fts5/fts5_varint.c
@@ -0,0 +1,344 @@
+/*
+** 2015 May 30
+**
+** The author disclaims copyright to this source code. In place of
+** a legal notice, here is a blessing:
+**
+** May you do good and not evil.
+** May you find forgiveness for yourself and forgive others.
+** May you share freely, never taking more than you give.
+**
+******************************************************************************
+**
+** Routines for varint serialization and deserialization.
+*/
+
+
+#include "fts5Int.h"
+
+/*
+** This is a copy of the sqlite3GetVarint32() routine from the SQLite core.
+** Except, this version does handle the single byte case that the core
+** version depends on being handled before its function is called.
+*/
+int sqlite3Fts5GetVarint32(const unsigned char *p, u32 *v){
+ u32 a,b;
+
+ /* The 1-byte case. Overwhelmingly the most common. */
+ a = *p;
+ /* a: p0 (unmasked) */
+ if (!(a&0x80))
+ {
+ /* Values between 0 and 127 */
+ *v = a;
+ return 1;
+ }
+
+ /* The 2-byte case */
+ p++;
+ b = *p;
+ /* b: p1 (unmasked) */
+ if (!(b&0x80))
+ {
+ /* Values between 128 and 16383 */
+ a &= 0x7f;
+ a = a<<7;
+ *v = a | b;
+ return 2;
+ }
+
+ /* The 3-byte case */
+ p++;
+ a = a<<14;
+ a |= *p;
+ /* a: p0<<14 | p2 (unmasked) */
+ if (!(a&0x80))
+ {
+ /* Values between 16384 and 2097151 */
+ a &= (0x7f<<14)|(0x7f);
+ b &= 0x7f;
+ b = b<<7;
+ *v = a | b;
+ return 3;
+ }
+
+ /* A 32-bit varint is used to store size information in btrees.
+ ** Objects are rarely larger than 2MiB limit of a 3-byte varint.
+ ** A 3-byte varint is sufficient, for example, to record the size
+ ** of a 1048569-byte BLOB or string.
+ **
+ ** We only unroll the first 1-, 2-, and 3- byte cases. The very
+ ** rare larger cases can be handled by the slower 64-bit varint
+ ** routine.
+ */
+ {
+ u64 v64;
+ u8 n;
+ p -= 2;
+ n = sqlite3Fts5GetVarint(p, &v64);
+ *v = ((u32)v64) & 0x7FFFFFFF;
+ assert( n>3 && n<=9 );
+ return n;
+ }
+}
+
+
+/*
+** Bitmasks used by sqlite3GetVarint(). These precomputed constants
+** are defined here rather than simply putting the constant expressions
+** inline in order to work around bugs in the RVT compiler.
+**
+** SLOT_2_0 A mask for (0x7f<<14) | 0x7f
+**
+** SLOT_4_2_0 A mask for (0x7f<<28) | SLOT_2_0
+*/
+#define SLOT_2_0 0x001fc07f
+#define SLOT_4_2_0 0xf01fc07f
+
+/*
+** Read a 64-bit variable-length integer from memory starting at p[0].
+** Return the number of bytes read. The value is stored in *v.
+*/
+u8 sqlite3Fts5GetVarint(const unsigned char *p, u64 *v){
+ u32 a,b,s;
+
+ a = *p;
+ /* a: p0 (unmasked) */
+ if (!(a&0x80))
+ {
+ *v = a;
+ return 1;
+ }
+
+ p++;
+ b = *p;
+ /* b: p1 (unmasked) */
+ if (!(b&0x80))
+ {
+ a &= 0x7f;
+ a = a<<7;
+ a |= b;
+ *v = a;
+ return 2;
+ }
+
+ /* Verify that constants are precomputed correctly */
+ assert( SLOT_2_0 == ((0x7f<<14) | (0x7f)) );
+ assert( SLOT_4_2_0 == ((0xfU<<28) | (0x7f<<14) | (0x7f)) );
+
+ p++;
+ a = a<<14;
+ a |= *p;
+ /* a: p0<<14 | p2 (unmasked) */
+ if (!(a&0x80))
+ {
+ a &= SLOT_2_0;
+ b &= 0x7f;
+ b = b<<7;
+ a |= b;
+ *v = a;
+ return 3;
+ }
+
+ /* CSE1 from below */
+ a &= SLOT_2_0;
+ p++;
+ b = b<<14;
+ b |= *p;
+ /* b: p1<<14 | p3 (unmasked) */
+ if (!(b&0x80))
+ {
+ b &= SLOT_2_0;
+ /* moved CSE1 up */
+ /* a &= (0x7f<<14)|(0x7f); */
+ a = a<<7;
+ a |= b;
+ *v = a;
+ return 4;
+ }
+
+ /* a: p0<<14 | p2 (masked) */
+ /* b: p1<<14 | p3 (unmasked) */
+ /* 1:save off p0<<21 | p1<<14 | p2<<7 | p3 (masked) */
+ /* moved CSE1 up */
+ /* a &= (0x7f<<14)|(0x7f); */
+ b &= SLOT_2_0;
+ s = a;
+ /* s: p0<<14 | p2 (masked) */
+
+ p++;
+ a = a<<14;
+ a |= *p;
+ /* a: p0<<28 | p2<<14 | p4 (unmasked) */
+ if (!(a&0x80))
+ {
+ /* we can skip these cause they were (effectively) done above in calc'ing s */
+ /* a &= (0x7f<<28)|(0x7f<<14)|(0x7f); */
+ /* b &= (0x7f<<14)|(0x7f); */
+ b = b<<7;
+ a |= b;
+ s = s>>18;
+ *v = ((u64)s)<<32 | a;
+ return 5;
+ }
+
+ /* 2:save off p0<<21 | p1<<14 | p2<<7 | p3 (masked) */
+ s = s<<7;
+ s |= b;
+ /* s: p0<<21 | p1<<14 | p2<<7 | p3 (masked) */
+
+ p++;
+ b = b<<14;
+ b |= *p;
+ /* b: p1<<28 | p3<<14 | p5 (unmasked) */
+ if (!(b&0x80))
+ {
+ /* we can skip this cause it was (effectively) done above in calc'ing s */
+ /* b &= (0x7f<<28)|(0x7f<<14)|(0x7f); */
+ a &= SLOT_2_0;
+ a = a<<7;
+ a |= b;
+ s = s>>18;
+ *v = ((u64)s)<<32 | a;
+ return 6;
+ }
+
+ p++;
+ a = a<<14;
+ a |= *p;
+ /* a: p2<<28 | p4<<14 | p6 (unmasked) */
+ if (!(a&0x80))
+ {
+ a &= SLOT_4_2_0;
+ b &= SLOT_2_0;
+ b = b<<7;
+ a |= b;
+ s = s>>11;
+ *v = ((u64)s)<<32 | a;
+ return 7;
+ }
+
+ /* CSE2 from below */
+ a &= SLOT_2_0;
+ p++;
+ b = b<<14;
+ b |= *p;
+ /* b: p3<<28 | p5<<14 | p7 (unmasked) */
+ if (!(b&0x80))
+ {
+ b &= SLOT_4_2_0;
+ /* moved CSE2 up */
+ /* a &= (0x7f<<14)|(0x7f); */
+ a = a<<7;
+ a |= b;
+ s = s>>4;
+ *v = ((u64)s)<<32 | a;
+ return 8;
+ }
+
+ p++;
+ a = a<<15;
+ a |= *p;
+ /* a: p4<<29 | p6<<15 | p8 (unmasked) */
+
+ /* moved CSE2 up */
+ /* a &= (0x7f<<29)|(0x7f<<15)|(0xff); */
+ b &= SLOT_2_0;
+ b = b<<8;
+ a |= b;
+
+ s = s<<4;
+ b = p[-4];
+ b &= 0x7f;
+ b = b>>3;
+ s |= b;
+
+ *v = ((u64)s)<<32 | a;
+
+ return 9;
+}
+
+/*
+** The variable-length integer encoding is as follows:
+**
+** KEY:
+** A = 0xxxxxxx 7 bits of data and one flag bit
+** B = 1xxxxxxx 7 bits of data and one flag bit
+** C = xxxxxxxx 8 bits of data
+**
+** 7 bits - A
+** 14 bits - BA
+** 21 bits - BBA
+** 28 bits - BBBA
+** 35 bits - BBBBA
+** 42 bits - BBBBBA
+** 49 bits - BBBBBBA
+** 56 bits - BBBBBBBA
+** 64 bits - BBBBBBBBC
+*/
+
+#ifdef SQLITE_NOINLINE
+# define FTS5_NOINLINE SQLITE_NOINLINE
+#else
+# define FTS5_NOINLINE
+#endif
+
+/*
+** Write a 64-bit variable-length integer to memory starting at p[0].
+** The length of data write will be between 1 and 9 bytes. The number
+** of bytes written is returned.
+**
+** A variable-length integer consists of the lower 7 bits of each byte
+** for all bytes that have the 8th bit set and one byte with the 8th
+** bit clear. Except, if we get to the 9th byte, it stores the full
+** 8 bits and is the last byte.
+*/
+static int FTS5_NOINLINE fts5PutVarint64(unsigned char *p, u64 v){
+ int i, j, n;
+ u8 buf[10];
+ if( v & (((u64)0xff000000)<<32) ){
+ p[8] = (u8)v;
+ v >>= 8;
+ for(i=7; i>=0; i--){
+ p[i] = (u8)((v & 0x7f) | 0x80);
+ v >>= 7;
+ }
+ return 9;
+ }
+ n = 0;
+ do{
+ buf[n++] = (u8)((v & 0x7f) | 0x80);
+ v >>= 7;
+ }while( v!=0 );
+ buf[0] &= 0x7f;
+ assert( n<=9 );
+ for(i=0, j=n-1; j>=0; j--, i++){
+ p[i] = buf[j];
+ }
+ return n;
+}
+
+int sqlite3Fts5PutVarint(unsigned char *p, u64 v){
+ if( v<=0x7f ){
+ p[0] = v&0x7f;
+ return 1;
+ }
+ if( v<=0x3fff ){
+ p[0] = ((v>>7)&0x7f)|0x80;
+ p[1] = v&0x7f;
+ return 2;
+ }
+ return fts5PutVarint64(p,v);
+}
+
+
+int sqlite3Fts5GetVarintLen(u32 iVal){
+#if 0
+ if( iVal<(1 << 7 ) ) return 1;
+#endif
+ assert( iVal>=(1 << 7) );
+ if( iVal<(1 << 14) ) return 2;
+ if( iVal<(1 << 21) ) return 3;
+ if( iVal<(1 << 28) ) return 4;
+ return 5;
+}