summaryrefslogtreecommitdiffstats
path: root/ext/fts3/tool/fts3view.c
diff options
context:
space:
mode:
Diffstat (limited to 'ext/fts3/tool/fts3view.c')
-rw-r--r--ext/fts3/tool/fts3view.c875
1 files changed, 875 insertions, 0 deletions
diff --git a/ext/fts3/tool/fts3view.c b/ext/fts3/tool/fts3view.c
new file mode 100644
index 0000000..9558cde
--- /dev/null
+++ b/ext/fts3/tool/fts3view.c
@@ -0,0 +1,875 @@
+/*
+** This program is a debugging and analysis utility that displays
+** information about an FTS3 or FTS4 index.
+**
+** Link this program against the SQLite3 amalgamation with the
+** SQLITE_ENABLE_FTS4 compile-time option. Then run it as:
+**
+** fts3view DATABASE
+**
+** to get a list of all FTS3/4 tables in DATABASE, or do
+**
+** fts3view DATABASE TABLE COMMAND ....
+**
+** to see various aspects of the TABLE table. Type fts3view with no
+** arguments for a list of available COMMANDs.
+*/
+#include <stdio.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include "sqlite3.h"
+
+/*
+** Extra command-line arguments:
+*/
+int nExtra;
+char **azExtra;
+
+/*
+** Look for a command-line argument.
+*/
+const char *findOption(const char *zName, int hasArg, const char *zDefault){
+ int i;
+ const char *zResult = zDefault;
+ for(i=0; i<nExtra; i++){
+ const char *z = azExtra[i];
+ while( z[0]=='-' ) z++;
+ if( strcmp(z, zName)==0 ){
+ int j = 1;
+ if( hasArg==0 || i==nExtra-1 ) j = 0;
+ zResult = azExtra[i+j];
+ while( i+j<nExtra ){
+ azExtra[i] = azExtra[i+j+1];
+ i++;
+ }
+ break;
+ }
+ }
+ return zResult;
+}
+
+
+/*
+** Prepare an SQL query
+*/
+static sqlite3_stmt *prepare(sqlite3 *db, const char *zFormat, ...){
+ va_list ap;
+ char *zSql;
+ sqlite3_stmt *pStmt;
+ int rc;
+
+ va_start(ap, zFormat);
+ zSql = sqlite3_vmprintf(zFormat, ap);
+ va_end(ap);
+ rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0);
+ if( rc ){
+ fprintf(stderr, "Error: %s\nSQL: %s\n", sqlite3_errmsg(db), zSql);
+ exit(1);
+ }
+ sqlite3_free(zSql);
+ return pStmt;
+}
+
+/*
+** Run an SQL statement
+*/
+static int runSql(sqlite3 *db, const char *zFormat, ...){
+ va_list ap;
+ char *zSql;
+ int rc;
+
+ va_start(ap, zFormat);
+ zSql = sqlite3_vmprintf(zFormat, ap);
+ rc = sqlite3_exec(db, zSql, 0, 0, 0);
+ va_end(ap);
+ return rc;
+}
+
+/*
+** Show the table schema
+*/
+static void showSchema(sqlite3 *db, const char *zTab){
+ sqlite3_stmt *pStmt;
+ pStmt = prepare(db,
+ "SELECT sql FROM sqlite_schema"
+ " WHERE name LIKE '%q%%'"
+ " ORDER BY 1",
+ zTab);
+ while( sqlite3_step(pStmt)==SQLITE_ROW ){
+ printf("%s;\n", sqlite3_column_text(pStmt, 0));
+ }
+ sqlite3_finalize(pStmt);
+ pStmt = prepare(db, "PRAGMA page_size");
+ while( sqlite3_step(pStmt)==SQLITE_ROW ){
+ printf("PRAGMA page_size=%s;\n", sqlite3_column_text(pStmt, 0));
+ }
+ sqlite3_finalize(pStmt);
+ pStmt = prepare(db, "PRAGMA journal_mode");
+ while( sqlite3_step(pStmt)==SQLITE_ROW ){
+ printf("PRAGMA journal_mode=%s;\n", sqlite3_column_text(pStmt, 0));
+ }
+ sqlite3_finalize(pStmt);
+ pStmt = prepare(db, "PRAGMA auto_vacuum");
+ while( sqlite3_step(pStmt)==SQLITE_ROW ){
+ const char *zType = "???";
+ switch( sqlite3_column_int(pStmt, 0) ){
+ case 0: zType = "OFF"; break;
+ case 1: zType = "FULL"; break;
+ case 2: zType = "INCREMENTAL"; break;
+ }
+ printf("PRAGMA auto_vacuum=%s;\n", zType);
+ }
+ sqlite3_finalize(pStmt);
+ pStmt = prepare(db, "PRAGMA encoding");
+ while( sqlite3_step(pStmt)==SQLITE_ROW ){
+ printf("PRAGMA encoding=%s;\n", sqlite3_column_text(pStmt, 0));
+ }
+ sqlite3_finalize(pStmt);
+}
+
+/*
+** Read a 64-bit variable-length integer from memory starting at p[0].
+** Return the number of bytes read, or 0 on error.
+** The value is stored in *v.
+*/
+int getVarint(const unsigned char *p, sqlite_int64 *v){
+ const unsigned char *q = p;
+ sqlite_uint64 x = 0, y = 1;
+ while( (*q&0x80)==0x80 && q-(unsigned char *)p<9 ){
+ x += y * (*q++ & 0x7f);
+ y <<= 7;
+ }
+ x += y * (*q++);
+ *v = (sqlite_int64) x;
+ return (int) (q - (unsigned char *)p);
+}
+
+
+/* Show the content of the %_stat table
+*/
+static void showStat(sqlite3 *db, const char *zTab){
+ sqlite3_stmt *pStmt;
+ pStmt = prepare(db, "SELECT id, value FROM '%q_stat'", zTab);
+ while( sqlite3_step(pStmt)==SQLITE_ROW ){
+ printf("stat[%d] =", sqlite3_column_int(pStmt, 0));
+ switch( sqlite3_column_type(pStmt, 1) ){
+ case SQLITE_INTEGER: {
+ printf(" %d\n", sqlite3_column_int(pStmt, 1));
+ break;
+ }
+ case SQLITE_BLOB: {
+ unsigned char *x = (unsigned char*)sqlite3_column_blob(pStmt, 1);
+ int len = sqlite3_column_bytes(pStmt, 1);
+ int i = 0;
+ sqlite3_int64 v;
+ while( i<len ){
+ i += getVarint(x, &v);
+ printf(" %lld", v);
+ }
+ printf("\n");
+ break;
+ }
+ }
+ }
+ sqlite3_finalize(pStmt);
+}
+
+/*
+** Report on the vocabulary. This creates an fts4aux table with a random
+** name, but deletes it in the end.
+*/
+static void showVocabulary(sqlite3 *db, const char *zTab){
+ char *zAux;
+ sqlite3_uint64 r;
+ sqlite3_stmt *pStmt;
+ int nDoc = 0;
+ int nToken = 0;
+ int nOccurrence = 0;
+ int nTop;
+ int n, i;
+
+ sqlite3_randomness(sizeof(r), &r);
+ zAux = sqlite3_mprintf("viewer_%llx", zTab, r);
+ runSql(db, "BEGIN");
+ pStmt = prepare(db, "SELECT count(*) FROM %Q", zTab);
+ while( sqlite3_step(pStmt)==SQLITE_ROW ){
+ nDoc = sqlite3_column_int(pStmt, 0);
+ }
+ sqlite3_finalize(pStmt);
+ printf("Number of documents...................... %9d\n", nDoc);
+
+ runSql(db, "CREATE VIRTUAL TABLE %s USING fts4aux(%Q)", zAux, zTab);
+ pStmt = prepare(db,
+ "SELECT count(*), sum(occurrences) FROM %s WHERE col='*'",
+ zAux);
+ while( sqlite3_step(pStmt)==SQLITE_ROW ){
+ nToken = sqlite3_column_int(pStmt, 0);
+ nOccurrence = sqlite3_column_int(pStmt, 1);
+ }
+ sqlite3_finalize(pStmt);
+ printf("Total tokens in all documents............ %9d\n", nOccurrence);
+ printf("Total number of distinct tokens.......... %9d\n", nToken);
+ if( nToken==0 ) goto end_vocab;
+
+ n = 0;
+ pStmt = prepare(db, "SELECT count(*) FROM %s"
+ " WHERE col='*' AND occurrences==1", zAux);
+ while( sqlite3_step(pStmt)==SQLITE_ROW ){
+ n = sqlite3_column_int(pStmt, 0);
+ }
+ sqlite3_finalize(pStmt);
+ printf("Tokens used exactly once................. %9d %5.2f%%\n",
+ n, n*100.0/nToken);
+
+ n = 0;
+ pStmt = prepare(db, "SELECT count(*) FROM %s"
+ " WHERE col='*' AND documents==1", zAux);
+ while( sqlite3_step(pStmt)==SQLITE_ROW ){
+ n = sqlite3_column_int(pStmt, 0);
+ }
+ sqlite3_finalize(pStmt);
+ printf("Tokens used in only one document......... %9d %5.2f%%\n",
+ n, n*100.0/nToken);
+
+ if( nDoc>=2000 ){
+ n = 0;
+ pStmt = prepare(db, "SELECT count(*) FROM %s"
+ " WHERE col='*' AND occurrences<=%d", zAux, nDoc/1000);
+ while( sqlite3_step(pStmt)==SQLITE_ROW ){
+ n = sqlite3_column_int(pStmt, 0);
+ }
+ sqlite3_finalize(pStmt);
+ printf("Tokens used in 0.1%% or less of docs...... %9d %5.2f%%\n",
+ n, n*100.0/nToken);
+ }
+
+ if( nDoc>=200 ){
+ n = 0;
+ pStmt = prepare(db, "SELECT count(*) FROM %s"
+ " WHERE col='*' AND occurrences<=%d", zAux, nDoc/100);
+ while( sqlite3_step(pStmt)==SQLITE_ROW ){
+ n = sqlite3_column_int(pStmt, 0);
+ }
+ sqlite3_finalize(pStmt);
+ printf("Tokens used in 1%% or less of docs........ %9d %5.2f%%\n",
+ n, n*100.0/nToken);
+ }
+
+ nTop = atoi(findOption("top", 1, "25"));
+ printf("The %d most common tokens:\n", nTop);
+ pStmt = prepare(db,
+ "SELECT term, documents FROM %s"
+ " WHERE col='*'"
+ " ORDER BY documents DESC, term"
+ " LIMIT %d", zAux, nTop);
+ i = 0;
+ while( sqlite3_step(pStmt)==SQLITE_ROW ){
+ i++;
+ n = sqlite3_column_int(pStmt, 1);
+ printf(" %2d. %-30s %9d docs %5.2f%%\n", i,
+ sqlite3_column_text(pStmt, 0), n, n*100.0/nDoc);
+ }
+ sqlite3_finalize(pStmt);
+
+end_vocab:
+ runSql(db, "ROLLBACK");
+ sqlite3_free(zAux);
+}
+
+/*
+** Report on the number and sizes of segments
+*/
+static void showSegmentStats(sqlite3 *db, const char *zTab){
+ sqlite3_stmt *pStmt;
+ int nSeg = 0;
+ sqlite3_int64 szSeg = 0, mxSeg = 0;
+ int nIdx = 0;
+ sqlite3_int64 szIdx = 0, mxIdx = 0;
+ int nRoot = 0;
+ sqlite3_int64 szRoot = 0, mxRoot = 0;
+ sqlite3_int64 mx;
+ int nLeaf;
+ int n;
+ int pgsz;
+ int mxLevel;
+ int i;
+
+ pStmt = prepare(db,
+ "SELECT count(*), sum(length(block)), max(length(block))"
+ " FROM '%q_segments'",
+ zTab);
+ while( sqlite3_step(pStmt)==SQLITE_ROW ){
+ nSeg = sqlite3_column_int(pStmt, 0);
+ szSeg = sqlite3_column_int64(pStmt, 1);
+ mxSeg = sqlite3_column_int64(pStmt, 2);
+ }
+ sqlite3_finalize(pStmt);
+ pStmt = prepare(db,
+ "SELECT count(*), sum(length(block)), max(length(block))"
+ " FROM '%q_segments' a JOIN '%q_segdir' b"
+ " WHERE a.blockid BETWEEN b.leaves_end_block+1 AND b.end_block",
+ zTab, zTab);
+ while( sqlite3_step(pStmt)==SQLITE_ROW ){
+ nIdx = sqlite3_column_int(pStmt, 0);
+ szIdx = sqlite3_column_int64(pStmt, 1);
+ mxIdx = sqlite3_column_int64(pStmt, 2);
+ }
+ sqlite3_finalize(pStmt);
+ pStmt = prepare(db,
+ "SELECT count(*), sum(length(root)), max(length(root))"
+ " FROM '%q_segdir'",
+ zTab);
+ while( sqlite3_step(pStmt)==SQLITE_ROW ){
+ nRoot = sqlite3_column_int(pStmt, 0);
+ szRoot = sqlite3_column_int64(pStmt, 1);
+ mxRoot = sqlite3_column_int64(pStmt, 2);
+ }
+ sqlite3_finalize(pStmt);
+
+ printf("Number of segments....................... %9d\n", nSeg+nRoot);
+ printf("Number of leaf segments.................. %9d\n", nSeg-nIdx);
+ printf("Number of index segments................. %9d\n", nIdx);
+ printf("Number of root segments.................. %9d\n", nRoot);
+ printf("Total size of all segments............... %9lld\n", szSeg+szRoot);
+ printf("Total size of all leaf segments.......... %9lld\n", szSeg-szIdx);
+ printf("Total size of all index segments......... %9lld\n", szIdx);
+ printf("Total size of all root segments.......... %9lld\n", szRoot);
+ if( nSeg>0 ){
+ printf("Average size of all segments............. %11.1f\n",
+ (double)(szSeg+szRoot)/(double)(nSeg+nRoot));
+ printf("Average size of leaf segments............ %11.1f\n",
+ (double)(szSeg-szIdx)/(double)(nSeg-nIdx));
+ }
+ if( nIdx>0 ){
+ printf("Average size of index segments........... %11.1f\n",
+ (double)szIdx/(double)nIdx);
+ }
+ if( nRoot>0 ){
+ printf("Average size of root segments............ %11.1f\n",
+ (double)szRoot/(double)nRoot);
+ }
+ mx = mxSeg;
+ if( mx<mxRoot ) mx = mxRoot;
+ printf("Maximum segment size..................... %9lld\n", mx);
+ printf("Maximum index segment size............... %9lld\n", mxIdx);
+ printf("Maximum root segment size................ %9lld\n", mxRoot);
+
+ pStmt = prepare(db, "PRAGMA page_size");
+ pgsz = 1024;
+ while( sqlite3_step(pStmt)==SQLITE_ROW ){
+ pgsz = sqlite3_column_int(pStmt, 0);
+ }
+ sqlite3_finalize(pStmt);
+ printf("Database page size....................... %9d\n", pgsz);
+ pStmt = prepare(db,
+ "SELECT count(*)"
+ " FROM '%q_segments' a JOIN '%q_segdir' b"
+ " WHERE a.blockid BETWEEN b.start_block AND b.leaves_end_block"
+ " AND length(a.block)>%d",
+ zTab, zTab, pgsz-45);
+ n = 0;
+ while( sqlite3_step(pStmt)==SQLITE_ROW ){
+ n = sqlite3_column_int(pStmt, 0);
+ }
+ sqlite3_finalize(pStmt);
+ nLeaf = nSeg - nIdx;
+ printf("Leaf segments larger than %5d bytes.... %9d %5.2f%%\n",
+ pgsz-45, n, nLeaf>0 ? n*100.0/nLeaf : 0.0);
+
+ pStmt = prepare(db, "SELECT max(level%%1024) FROM '%q_segdir'", zTab);
+ mxLevel = 0;
+ while( sqlite3_step(pStmt)==SQLITE_ROW ){
+ mxLevel = sqlite3_column_int(pStmt, 0);
+ }
+ sqlite3_finalize(pStmt);
+
+ for(i=0; i<=mxLevel; i++){
+ pStmt = prepare(db,
+ "SELECT count(*), sum(len), avg(len), max(len), sum(len>%d),"
+ " count(distinct idx)"
+ " FROM (SELECT length(a.block) AS len, idx"
+ " FROM '%q_segments' a JOIN '%q_segdir' b"
+ " WHERE (a.blockid BETWEEN b.start_block"
+ " AND b.leaves_end_block)"
+ " AND (b.level%%1024)==%d)",
+ pgsz-45, zTab, zTab, i);
+ if( sqlite3_step(pStmt)==SQLITE_ROW
+ && (nLeaf = sqlite3_column_int(pStmt, 0))>0
+ ){
+ sqlite3_int64 sz;
+ nIdx = sqlite3_column_int(pStmt, 5);
+ printf("For level %d:\n", i);
+ printf(" Number of indexes...................... %9d\n", nIdx);
+ printf(" Number of leaf segments................ %9d\n", nLeaf);
+ if( nIdx>1 ){
+ printf(" Average leaf segments per index........ %11.1f\n",
+ (double)nLeaf/(double)nIdx);
+ }
+ printf(" Total size of all leaf segments........ %9lld\n",
+ (sz = sqlite3_column_int64(pStmt, 1)));
+ printf(" Average size of leaf segments.......... %11.1f\n",
+ sqlite3_column_double(pStmt, 2));
+ if( nIdx>1 ){
+ printf(" Average leaf segment size per index.... %11.1f\n",
+ (double)sz/(double)nIdx);
+ }
+ printf(" Maximum leaf segment size.............. %9lld\n",
+ sqlite3_column_int64(pStmt, 3));
+ n = sqlite3_column_int(pStmt, 4);
+ printf(" Leaf segments larger than %5d bytes.. %9d %5.2f%%\n",
+ pgsz-45, n, n*100.0/nLeaf);
+ }
+ sqlite3_finalize(pStmt);
+ }
+}
+
+/*
+** Print a single "tree" line of the segdir map output.
+*/
+static void printTreeLine(sqlite3_int64 iLower, sqlite3_int64 iUpper){
+ printf(" tree %9lld", iLower);
+ if( iUpper>iLower ){
+ printf(" thru %9lld (%lld blocks)", iUpper, iUpper-iLower+1);
+ }
+ printf("\n");
+}
+
+/*
+** Check to see if the block of a %_segments entry is NULL.
+*/
+static int isNullSegment(sqlite3 *db, const char *zTab, sqlite3_int64 iBlockId){
+ sqlite3_stmt *pStmt;
+ int rc = 1;
+
+ pStmt = prepare(db, "SELECT block IS NULL FROM '%q_segments'"
+ " WHERE blockid=%lld", zTab, iBlockId);
+ if( sqlite3_step(pStmt)==SQLITE_ROW ){
+ rc = sqlite3_column_int(pStmt, 0);
+ }
+ sqlite3_finalize(pStmt);
+ return rc;
+}
+
+/*
+** Show a map of segments derived from the %_segdir table.
+*/
+static void showSegdirMap(sqlite3 *db, const char *zTab){
+ int mxIndex, iIndex;
+ sqlite3_stmt *pStmt = 0;
+ sqlite3_stmt *pStmt2 = 0;
+ int prevLevel;
+
+ pStmt = prepare(db, "SELECT max(level/1024) FROM '%q_segdir'", zTab);
+ if( sqlite3_step(pStmt)==SQLITE_ROW ){
+ mxIndex = sqlite3_column_int(pStmt, 0);
+ }else{
+ mxIndex = 0;
+ }
+ sqlite3_finalize(pStmt);
+
+ printf("Number of inverted indices............... %3d\n", mxIndex+1);
+ pStmt = prepare(db,
+ "SELECT level, idx, start_block, leaves_end_block, end_block, rowid"
+ " FROM '%q_segdir'"
+ " WHERE level/1024==?"
+ " ORDER BY level DESC, idx",
+ zTab);
+ pStmt2 = prepare(db,
+ "SELECT blockid FROM '%q_segments'"
+ " WHERE blockid BETWEEN ? AND ? ORDER BY blockid",
+ zTab);
+ for(iIndex=0; iIndex<=mxIndex; iIndex++){
+ if( mxIndex>0 ){
+ printf("**************************** Index %d "
+ "****************************\n", iIndex);
+ }
+ sqlite3_bind_int(pStmt, 1, iIndex);
+ prevLevel = -1;
+ while( sqlite3_step(pStmt)==SQLITE_ROW ){
+ int iLevel = sqlite3_column_int(pStmt, 0)%1024;
+ int iIdx = sqlite3_column_int(pStmt, 1);
+ sqlite3_int64 iStart = sqlite3_column_int64(pStmt, 2);
+ sqlite3_int64 iLEnd = sqlite3_column_int64(pStmt, 3);
+ sqlite3_int64 iEnd = sqlite3_column_int64(pStmt, 4);
+ char rtag[20];
+ if( iLevel!=prevLevel ){
+ printf("level %2d idx %2d", iLevel, iIdx);
+ prevLevel = iLevel;
+ }else{
+ printf(" idx %2d", iIdx);
+ }
+ sqlite3_snprintf(sizeof(rtag), rtag, "r%lld",
+ sqlite3_column_int64(pStmt,5));
+ printf(" root %9s\n", rtag);
+ if( iLEnd>iStart ){
+ sqlite3_int64 iLower, iPrev = 0, iX;
+ if( iLEnd+1<=iEnd ){
+ sqlite3_bind_int64(pStmt2, 1, iLEnd+1);
+ sqlite3_bind_int64(pStmt2, 2, iEnd);
+ iLower = -1;
+ while( sqlite3_step(pStmt2)==SQLITE_ROW ){
+ iX = sqlite3_column_int64(pStmt2, 0);
+ if( iLower<0 ){
+ iLower = iPrev = iX;
+ }else if( iX==iPrev+1 ){
+ iPrev = iX;
+ }else{
+ printTreeLine(iLower, iPrev);
+ iLower = iPrev = iX;
+ }
+ }
+ sqlite3_reset(pStmt2);
+ if( iLower>=0 ){
+ if( iLower==iPrev && iLower==iEnd
+ && isNullSegment(db,zTab,iLower)
+ ){
+ printf(" null %9lld\n", iLower);
+ }else{
+ printTreeLine(iLower, iPrev);
+ }
+ }
+ }
+ printf(" leaves %9lld thru %9lld (%lld blocks)\n",
+ iStart, iLEnd, iLEnd - iStart + 1);
+ }
+ }
+ sqlite3_reset(pStmt);
+ }
+ sqlite3_finalize(pStmt);
+ sqlite3_finalize(pStmt2);
+}
+
+/*
+** Decode a single segment block and display the results on stdout.
+*/
+static void decodeSegment(
+ const unsigned char *aData, /* Content to print */
+ int nData /* Number of bytes of content */
+){
+ sqlite3_int64 iChild = 0;
+ sqlite3_int64 iPrefix;
+ sqlite3_int64 nTerm;
+ sqlite3_int64 n;
+ sqlite3_int64 iDocsz;
+ int iHeight;
+ sqlite3_int64 i = 0;
+ int cnt = 0;
+ char zTerm[1000];
+
+ i += getVarint(aData, &n);
+ iHeight = (int)n;
+ printf("height: %d\n", iHeight);
+ if( iHeight>0 ){
+ i += getVarint(aData+i, &iChild);
+ printf("left-child: %lld\n", iChild);
+ }
+ while( i<nData ){
+ if( (cnt++)>0 ){
+ i += getVarint(aData+i, &iPrefix);
+ }else{
+ iPrefix = 0;
+ }
+ i += getVarint(aData+i, &nTerm);
+ if( iPrefix+nTerm+1 >= sizeof(zTerm) ){
+ fprintf(stderr, "term to long\n");
+ exit(1);
+ }
+ memcpy(zTerm+iPrefix, aData+i, (size_t)nTerm);
+ zTerm[iPrefix+nTerm] = 0;
+ i += nTerm;
+ if( iHeight==0 ){
+ i += getVarint(aData+i, &iDocsz);
+ printf("term: %-25s doclist %7lld bytes offset %lld\n", zTerm, iDocsz, i);
+ i += iDocsz;
+ }else{
+ printf("term: %-25s child %lld\n", zTerm, ++iChild);
+ }
+ }
+}
+
+
+/*
+** Print a a blob as hex and ascii.
+*/
+static void printBlob(
+ const unsigned char *aData, /* Content to print */
+ int nData /* Number of bytes of content */
+){
+ int i, j;
+ const char *zOfstFmt;
+ const int perLine = 16;
+
+ if( (nData&~0xfff)==0 ){
+ zOfstFmt = " %03x: ";
+ }else if( (nData&~0xffff)==0 ){
+ zOfstFmt = " %04x: ";
+ }else if( (nData&~0xfffff)==0 ){
+ zOfstFmt = " %05x: ";
+ }else if( (nData&~0xffffff)==0 ){
+ zOfstFmt = " %06x: ";
+ }else{
+ zOfstFmt = " %08x: ";
+ }
+
+ for(i=0; i<nData; i += perLine){
+ fprintf(stdout, zOfstFmt, i);
+ for(j=0; j<perLine; j++){
+ if( i+j>nData ){
+ fprintf(stdout, " ");
+ }else{
+ fprintf(stdout,"%02x ", aData[i+j]);
+ }
+ }
+ for(j=0; j<perLine; j++){
+ if( i+j>nData ){
+ fprintf(stdout, " ");
+ }else{
+ fprintf(stdout,"%c", isprint(aData[i+j]) ? aData[i+j] : '.');
+ }
+ }
+ fprintf(stdout,"\n");
+ }
+}
+
+/*
+** Convert text to a 64-bit integer
+*/
+static sqlite3_int64 atoi64(const char *z){
+ sqlite3_int64 v = 0;
+ while( z[0]>='0' && z[0]<='9' ){
+ v = v*10 + z[0] - '0';
+ z++;
+ }
+ return v;
+}
+
+/*
+** Return a prepared statement which, when stepped, will return in its
+** first column the blob associated with segment zId. If zId begins with
+** 'r' then it is a rowid of a %_segdir entry. Otherwise it is a
+** %_segment entry.
+*/
+static sqlite3_stmt *prepareToGetSegment(
+ sqlite3 *db, /* The database */
+ const char *zTab, /* The FTS3/4 table name */
+ const char *zId /* ID of the segment to open */
+){
+ sqlite3_stmt *pStmt;
+ if( zId[0]=='r' ){
+ pStmt = prepare(db, "SELECT root FROM '%q_segdir' WHERE rowid=%lld",
+ zTab, atoi64(zId+1));
+ }else{
+ pStmt = prepare(db, "SELECT block FROM '%q_segments' WHERE blockid=%lld",
+ zTab, atoi64(zId));
+ }
+ return pStmt;
+}
+
+/*
+** Print the content of a segment or of the root of a segdir. The segment
+** or root is identified by azExtra[0]. If the first character of azExtra[0]
+** is 'r' then the remainder is the integer rowid of the %_segdir entry.
+** If the first character of azExtra[0] is not 'r' then, then all of
+** azExtra[0] is an integer which is the block number.
+**
+** If the --raw option is present in azExtra, then a hex dump is provided.
+** Otherwise a decoding is shown.
+*/
+static void showSegment(sqlite3 *db, const char *zTab){
+ const unsigned char *aData;
+ int nData;
+ sqlite3_stmt *pStmt;
+
+ pStmt = prepareToGetSegment(db, zTab, azExtra[0]);
+ if( sqlite3_step(pStmt)!=SQLITE_ROW ){
+ sqlite3_finalize(pStmt);
+ return;
+ }
+ nData = sqlite3_column_bytes(pStmt, 0);
+ aData = sqlite3_column_blob(pStmt, 0);
+ printf("Segment %s of size %d bytes:\n", azExtra[0], nData);
+ if( findOption("raw", 0, 0)!=0 ){
+ printBlob(aData, nData);
+ }else{
+ decodeSegment(aData, nData);
+ }
+ sqlite3_finalize(pStmt);
+}
+
+/*
+** Decode a single doclist and display the results on stdout.
+*/
+static void decodeDoclist(
+ const unsigned char *aData, /* Content to print */
+ int nData /* Number of bytes of content */
+){
+ sqlite3_int64 iPrevDocid = 0;
+ sqlite3_int64 iDocid;
+ sqlite3_int64 iPos;
+ sqlite3_int64 iPrevPos = 0;
+ sqlite3_int64 iCol;
+ int i = 0;
+
+ while( i<nData ){
+ i += getVarint(aData+i, &iDocid);
+ printf("docid %lld col0", iDocid+iPrevDocid);
+ iPrevDocid += iDocid;
+ iPrevPos = 0;
+ while( 1 ){
+ i += getVarint(aData+i, &iPos);
+ if( iPos==1 ){
+ i += getVarint(aData+i, &iCol);
+ printf(" col%lld", iCol);
+ iPrevPos = 0;
+ }else if( iPos==0 ){
+ printf("\n");
+ break;
+ }else{
+ iPrevPos += iPos - 2;
+ printf(" %lld", iPrevPos);
+ }
+ }
+ }
+}
+
+
+/*
+** Print the content of a doclist. The segment or segdir-root is
+** identified by azExtra[0]. If the first character of azExtra[0]
+** is 'r' then the remainder is the integer rowid of the %_segdir entry.
+** If the first character of azExtra[0] is not 'r' then, then all of
+** azExtra[0] is an integer which is the block number. The offset
+** into the segment is identified by azExtra[1]. The size of the doclist
+** is azExtra[2].
+**
+** If the --raw option is present in azExtra, then a hex dump is provided.
+** Otherwise a decoding is shown.
+*/
+static void showDoclist(sqlite3 *db, const char *zTab){
+ const unsigned char *aData;
+ sqlite3_int64 offset;
+ int nData;
+ sqlite3_stmt *pStmt;
+
+ offset = atoi64(azExtra[1]);
+ nData = atoi(azExtra[2]);
+ pStmt = prepareToGetSegment(db, zTab, azExtra[0]);
+ if( sqlite3_step(pStmt)!=SQLITE_ROW ){
+ sqlite3_finalize(pStmt);
+ return;
+ }
+ aData = sqlite3_column_blob(pStmt, 0);
+ printf("Doclist at %s offset %lld of size %d bytes:\n",
+ azExtra[0], offset, nData);
+ if( findOption("raw", 0, 0)!=0 ){
+ printBlob(aData+offset, nData);
+ }else{
+ decodeDoclist(aData+offset, nData);
+ }
+ sqlite3_finalize(pStmt);
+}
+
+/*
+** Show the top N largest segments
+*/
+static void listBigSegments(sqlite3 *db, const char *zTab){
+ int nTop, i;
+ sqlite3_stmt *pStmt;
+ sqlite3_int64 sz;
+ sqlite3_int64 id;
+
+ nTop = atoi(findOption("top", 1, "25"));
+ printf("The %d largest segments:\n", nTop);
+ pStmt = prepare(db,
+ "SELECT blockid, length(block) AS len FROM '%q_segments'"
+ " ORDER BY 2 DESC, 1"
+ " LIMIT %d", zTab, nTop);
+ i = 0;
+ while( sqlite3_step(pStmt)==SQLITE_ROW ){
+ i++;
+ id = sqlite3_column_int64(pStmt, 0);
+ sz = sqlite3_column_int64(pStmt, 1);
+ printf(" %2d. %9lld size %lld\n", i, id, sz);
+ }
+ sqlite3_finalize(pStmt);
+}
+
+
+
+static void usage(const char *argv0){
+ fprintf(stderr, "Usage: %s DATABASE\n"
+ " or: %s DATABASE FTS3TABLE ARGS...\n", argv0, argv0);
+ fprintf(stderr,
+ "ARGS:\n"
+ " big-segments [--top N] show the largest segments\n"
+ " doclist BLOCKID OFFSET SIZE [--raw] Decode a doclist\n"
+ " schema FTS table schema\n"
+ " segdir directory of segments\n"
+ " segment BLOCKID [--raw] content of a segment\n"
+ " segment-stats info on segment sizes\n"
+ " stat the %%_stat table\n"
+ " vocabulary [--top N] document vocabulary\n"
+ );
+ exit(1);
+}
+
+int main(int argc, char **argv){
+ sqlite3 *db;
+ int rc;
+ const char *zTab;
+ const char *zCmd;
+
+ if( argc<2 ) usage(argv[0]);
+ rc = sqlite3_open(argv[1], &db);
+ if( rc ){
+ fprintf(stderr, "Cannot open %s\n", argv[1]);
+ exit(1);
+ }
+ if( argc==2 ){
+ sqlite3_stmt *pStmt;
+ int cnt = 0;
+ pStmt = prepare(db, "SELECT b.sql"
+ " FROM sqlite_schema a, sqlite_schema b"
+ " WHERE a.name GLOB '*_segdir'"
+ " AND b.name=substr(a.name,1,length(a.name)-7)"
+ " ORDER BY 1");
+ while( sqlite3_step(pStmt)==SQLITE_ROW ){
+ cnt++;
+ printf("%s;\n", sqlite3_column_text(pStmt, 0));
+ }
+ sqlite3_finalize(pStmt);
+ if( cnt==0 ){
+ printf("/* No FTS3/4 tables found in database %s */\n", argv[1]);
+ }
+ return 0;
+ }
+ if( argc<4 ) usage(argv[0]);
+ zTab = argv[2];
+ zCmd = argv[3];
+ nExtra = argc-4;
+ azExtra = argv+4;
+ if( strcmp(zCmd,"big-segments")==0 ){
+ listBigSegments(db, zTab);
+ }else if( strcmp(zCmd,"doclist")==0 ){
+ if( argc<7 ) usage(argv[0]);
+ showDoclist(db, zTab);
+ }else if( strcmp(zCmd,"schema")==0 ){
+ showSchema(db, zTab);
+ }else if( strcmp(zCmd,"segdir")==0 ){
+ showSegdirMap(db, zTab);
+ }else if( strcmp(zCmd,"segment")==0 ){
+ if( argc<5 ) usage(argv[0]);
+ showSegment(db, zTab);
+ }else if( strcmp(zCmd,"segment-stats")==0 ){
+ showSegmentStats(db, zTab);
+ }else if( strcmp(zCmd,"stat")==0 ){
+ showStat(db, zTab);
+ }else if( strcmp(zCmd,"vocabulary")==0 ){
+ showVocabulary(db, zTab);
+ }else{
+ usage(argv[0]);
+ }
+ return 0;
+}