/* ** 2013 Apr 22 ** ** The author disclaims copyright to this source code. In place of ** a legal notice, here is a blessing: ** ** May you do good and not evil. ** May you find forgiveness for yourself and forgive others. ** May you share freely, never taking more than you give. ** ****************************************************************************** ** ** This file contains code for the "fts3tokenize" virtual table module. ** An fts3tokenize virtual table is created as follows: ** ** CREATE VIRTUAL TABLE USING fts3tokenize( ** , , ... ** ); ** ** The table created has the following schema: ** ** CREATE TABLE (input, token, start, end, position) ** ** When queried, the query must include a WHERE clause of type: ** ** input = ** ** The virtual table module tokenizes this , using the FTS3 ** tokenizer specified by the arguments to the CREATE VIRTUAL TABLE ** statement and returns one row for each token in the result. With ** fields set as follows: ** ** input: Always set to a copy of ** token: A token from the input. ** start: Byte offset of the token within the input . ** end: Byte offset of the byte immediately following the end of the ** token within the input string. ** pos: Token offset of token within input. ** */ #include "fts3Int.h" #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) #include #include typedef struct Fts3tokTable Fts3tokTable; typedef struct Fts3tokCursor Fts3tokCursor; /* ** Virtual table structure. */ struct Fts3tokTable { sqlite3_vtab base; /* Base class used by SQLite core */ const sqlite3_tokenizer_module *pMod; sqlite3_tokenizer *pTok; }; /* ** Virtual table cursor structure. */ struct Fts3tokCursor { sqlite3_vtab_cursor base; /* Base class used by SQLite core */ char *zInput; /* Input string */ sqlite3_tokenizer_cursor *pCsr; /* Cursor to iterate through zInput */ int iRowid; /* Current 'rowid' value */ const char *zToken; /* Current 'token' value */ int nToken; /* Size of zToken in bytes */ int iStart; /* Current 'start' value */ int iEnd; /* Current 'end' value */ int iPos; /* Current 'pos' value */ }; /* ** Query FTS for the tokenizer implementation named zName. */ static int fts3tokQueryTokenizer( Fts3Hash *pHash, const char *zName, const sqlite3_tokenizer_module **pp, char **pzErr ){ sqlite3_tokenizer_module *p; int nName = (int)strlen(zName); p = (sqlite3_tokenizer_module *)sqlite3Fts3HashFind(pHash, zName, nName+1); if( !p ){ sqlite3Fts3ErrMsg(pzErr, "unknown tokenizer: %s", zName); return SQLITE_ERROR; } *pp = p; return SQLITE_OK; } /* ** The second argument, argv[], is an array of pointers to nul-terminated ** strings. This function makes a copy of the array and strings into a ** single block of memory. It then dequotes any of the strings that appear ** to be quoted. ** ** If successful, output parameter *pazDequote is set to point at the ** array of dequoted strings and SQLITE_OK is returned. The caller is ** responsible for eventually calling sqlite3_free() to free the array ** in this case. Or, if an error occurs, an SQLite error code is returned. ** The final value of *pazDequote is undefined in this case. */ static int fts3tokDequoteArray( int argc, /* Number of elements in argv[] */ const char * const *argv, /* Input array */ char ***pazDequote /* Output array */ ){ int rc = SQLITE_OK; /* Return code */ if( argc==0 ){ *pazDequote = 0; }else{ int i; int nByte = 0; char **azDequote; for(i=0; i1 ) azArg = (const char * const *)&azDequote[1]; rc = pMod->xCreate((nDequote>1 ? nDequote-1 : 0), azArg, &pTok); } if( rc==SQLITE_OK ){ pTab = (Fts3tokTable *)sqlite3_malloc(sizeof(Fts3tokTable)); if( pTab==0 ){ rc = SQLITE_NOMEM; } } if( rc==SQLITE_OK ){ memset(pTab, 0, sizeof(Fts3tokTable)); pTab->pMod = pMod; pTab->pTok = pTok; *ppVtab = &pTab->base; }else{ if( pTok ){ pMod->xDestroy(pTok); } } sqlite3_free(azDequote); return rc; } /* ** This function does the work for both the xDisconnect and xDestroy methods. ** These tables have no persistent representation of their own, so xDisconnect ** and xDestroy are identical operations. */ static int fts3tokDisconnectMethod(sqlite3_vtab *pVtab){ Fts3tokTable *pTab = (Fts3tokTable *)pVtab; pTab->pMod->xDestroy(pTab->pTok); sqlite3_free(pTab); return SQLITE_OK; } /* ** xBestIndex - Analyze a WHERE and ORDER BY clause. */ static int fts3tokBestIndexMethod( sqlite3_vtab *pVTab, sqlite3_index_info *pInfo ){ int i; UNUSED_PARAMETER(pVTab); for(i=0; inConstraint; i++){ if( pInfo->aConstraint[i].usable && pInfo->aConstraint[i].iColumn==0 && pInfo->aConstraint[i].op==SQLITE_INDEX_CONSTRAINT_EQ ){ pInfo->idxNum = 1; pInfo->aConstraintUsage[i].argvIndex = 1; pInfo->aConstraintUsage[i].omit = 1; pInfo->estimatedCost = 1; return SQLITE_OK; } } pInfo->idxNum = 0; assert( pInfo->estimatedCost>1000000.0 ); return SQLITE_OK; } /* ** xOpen - Open a cursor. */ static int fts3tokOpenMethod(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCsr){ Fts3tokCursor *pCsr; UNUSED_PARAMETER(pVTab); pCsr = (Fts3tokCursor *)sqlite3_malloc(sizeof(Fts3tokCursor)); if( pCsr==0 ){ return SQLITE_NOMEM; } memset(pCsr, 0, sizeof(Fts3tokCursor)); *ppCsr = (sqlite3_vtab_cursor *)pCsr; return SQLITE_OK; } /* ** Reset the tokenizer cursor passed as the only argument. As if it had ** just been returned by fts3tokOpenMethod(). */ static void fts3tokResetCursor(Fts3tokCursor *pCsr){ if( pCsr->pCsr ){ Fts3tokTable *pTab = (Fts3tokTable *)(pCsr->base.pVtab); pTab->pMod->xClose(pCsr->pCsr); pCsr->pCsr = 0; } sqlite3_free(pCsr->zInput); pCsr->zInput = 0; pCsr->zToken = 0; pCsr->nToken = 0; pCsr->iStart = 0; pCsr->iEnd = 0; pCsr->iPos = 0; pCsr->iRowid = 0; } /* ** xClose - Close a cursor. */ static int fts3tokCloseMethod(sqlite3_vtab_cursor *pCursor){ Fts3tokCursor *pCsr = (Fts3tokCursor *)pCursor; fts3tokResetCursor(pCsr); sqlite3_free(pCsr); return SQLITE_OK; } /* ** xNext - Advance the cursor to the next row, if any. */ static int fts3tokNextMethod(sqlite3_vtab_cursor *pCursor){ Fts3tokCursor *pCsr = (Fts3tokCursor *)pCursor; Fts3tokTable *pTab = (Fts3tokTable *)(pCursor->pVtab); int rc; /* Return code */ pCsr->iRowid++; rc = pTab->pMod->xNext(pCsr->pCsr, &pCsr->zToken, &pCsr->nToken, &pCsr->iStart, &pCsr->iEnd, &pCsr->iPos ); if( rc!=SQLITE_OK ){ fts3tokResetCursor(pCsr); if( rc==SQLITE_DONE ) rc = SQLITE_OK; } return rc; } /* ** xFilter - Initialize a cursor to point at the start of its data. */ static int fts3tokFilterMethod( sqlite3_vtab_cursor *pCursor, /* The cursor used for this query */ int idxNum, /* Strategy index */ const char *idxStr, /* Unused */ int nVal, /* Number of elements in apVal */ sqlite3_value **apVal /* Arguments for the indexing scheme */ ){ int rc = SQLITE_ERROR; Fts3tokCursor *pCsr = (Fts3tokCursor *)pCursor; Fts3tokTable *pTab = (Fts3tokTable *)(pCursor->pVtab); UNUSED_PARAMETER(idxStr); UNUSED_PARAMETER(nVal); fts3tokResetCursor(pCsr); if( idxNum==1 ){ const char *zByte = (const char *)sqlite3_value_text(apVal[0]); int nByte = sqlite3_value_bytes(apVal[0]); pCsr->zInput = sqlite3_malloc64(nByte+1); if( pCsr->zInput==0 ){ rc = SQLITE_NOMEM; }else{ if( nByte>0 ) memcpy(pCsr->zInput, zByte, nByte); pCsr->zInput[nByte] = 0; rc = pTab->pMod->xOpen(pTab->pTok, pCsr->zInput, nByte, &pCsr->pCsr); if( rc==SQLITE_OK ){ pCsr->pCsr->pTokenizer = pTab->pTok; } } } if( rc!=SQLITE_OK ) return rc; return fts3tokNextMethod(pCursor); } /* ** xEof - Return true if the cursor is at EOF, or false otherwise. */ static int fts3tokEofMethod(sqlite3_vtab_cursor *pCursor){ Fts3tokCursor *pCsr = (Fts3tokCursor *)pCursor; return (pCsr->zToken==0); } /* ** xColumn - Return a column value. */ static int fts3tokColumnMethod( sqlite3_vtab_cursor *pCursor, /* Cursor to retrieve value from */ sqlite3_context *pCtx, /* Context for sqlite3_result_xxx() calls */ int iCol /* Index of column to read value from */ ){ Fts3tokCursor *pCsr = (Fts3tokCursor *)pCursor; /* CREATE TABLE x(input, token, start, end, position) */ switch( iCol ){ case 0: sqlite3_result_text(pCtx, pCsr->zInput, -1, SQLITE_TRANSIENT); break; case 1: sqlite3_result_text(pCtx, pCsr->zToken, pCsr->nToken, SQLITE_TRANSIENT); break; case 2: sqlite3_result_int(pCtx, pCsr->iStart); break; case 3: sqlite3_result_int(pCtx, pCsr->iEnd); break; default: assert( iCol==4 ); sqlite3_result_int(pCtx, pCsr->iPos); break; } return SQLITE_OK; } /* ** xRowid - Return the current rowid for the cursor. */ static int fts3tokRowidMethod( sqlite3_vtab_cursor *pCursor, /* Cursor to retrieve value from */ sqlite_int64 *pRowid /* OUT: Rowid value */ ){ Fts3tokCursor *pCsr = (Fts3tokCursor *)pCursor; *pRowid = (sqlite3_int64)pCsr->iRowid; return SQLITE_OK; } /* ** Register the fts3tok module with database connection db. Return SQLITE_OK ** if successful or an error code if sqlite3_create_module() fails. */ int sqlite3Fts3InitTok(sqlite3 *db, Fts3Hash *pHash, void(*xDestroy)(void*)){ static const sqlite3_module fts3tok_module = { 0, /* iVersion */ fts3tokConnectMethod, /* xCreate */ fts3tokConnectMethod, /* xConnect */ fts3tokBestIndexMethod, /* xBestIndex */ fts3tokDisconnectMethod, /* xDisconnect */ fts3tokDisconnectMethod, /* xDestroy */ fts3tokOpenMethod, /* xOpen */ fts3tokCloseMethod, /* xClose */ fts3tokFilterMethod, /* xFilter */ fts3tokNextMethod, /* xNext */ fts3tokEofMethod, /* xEof */ fts3tokColumnMethod, /* xColumn */ fts3tokRowidMethod, /* xRowid */ 0, /* xUpdate */ 0, /* xBegin */ 0, /* xSync */ 0, /* xCommit */ 0, /* xRollback */ 0, /* xFindFunction */ 0, /* xRename */ 0, /* xSavepoint */ 0, /* xRelease */ 0, /* xRollbackTo */ 0, /* xShadowName */ 0 /* xIntegrity */ }; int rc; /* Return code */ rc = sqlite3_create_module_v2( db, "fts3tokenize", &fts3tok_module, (void*)pHash, xDestroy ); return rc; } #endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */