summaryrefslogtreecommitdiffstats
path: root/ext/fts5/fts5_test_mi.c
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--ext/fts5/fts5_test_mi.c421
1 files changed, 421 insertions, 0 deletions
diff --git a/ext/fts5/fts5_test_mi.c b/ext/fts5/fts5_test_mi.c
new file mode 100644
index 0000000..6f2d6e7
--- /dev/null
+++ b/ext/fts5/fts5_test_mi.c
@@ -0,0 +1,421 @@
+/*
+** 2015 Aug 04
+**
+** The author disclaims copyright to this source code. In place of
+** a legal notice, here is a blessing:
+**
+** May you do good and not evil.
+** May you find forgiveness for yourself and forgive others.
+** May you share freely, never taking more than you give.
+**
+******************************************************************************
+**
+** This file contains test code only, it is not included in release
+** versions of FTS5. It contains the implementation of an FTS5 auxiliary
+** function very similar to the FTS4 function matchinfo():
+**
+** https://www.sqlite.org/fts3.html#matchinfo
+**
+** Known differences are that:
+**
+** 1) this function uses the FTS5 definition of "matchable phrase", which
+** excludes any phrases that are part of an expression sub-tree that
+** does not match the current row. This comes up for MATCH queries
+** such as:
+**
+** "a OR (b AND c)"
+**
+** In FTS4, if a single row contains instances of tokens "a" and "c",
+** but not "b", all instances of "c" are considered matches. In FTS5,
+** they are not (as the "b AND c" sub-tree does not match the current
+** row.
+**
+** 2) For the values returned by 'x' that apply to all rows of the table,
+** NEAR constraints are not considered. But for the number of hits in
+** the current row, they are.
+**
+** This file exports a single function that may be called to register the
+** matchinfo() implementation with a database handle:
+**
+** int sqlite3Fts5TestRegisterMatchinfo(sqlite3 *db);
+*/
+
+
+#ifdef SQLITE_ENABLE_FTS5
+
+#include "fts5.h"
+#include <assert.h>
+#include <string.h>
+
+typedef struct Fts5MatchinfoCtx Fts5MatchinfoCtx;
+
+#ifndef SQLITE_AMALGAMATION
+typedef unsigned int u32;
+#endif
+
+struct Fts5MatchinfoCtx {
+ int nCol; /* Number of cols in FTS5 table */
+ int nPhrase; /* Number of phrases in FTS5 query */
+ char *zArg; /* nul-term'd copy of 2nd arg */
+ int nRet; /* Number of elements in aRet[] */
+ u32 *aRet; /* Array of 32-bit unsigned ints to return */
+};
+
+
+
+/*
+** Return a pointer to the fts5_api pointer for database connection db.
+** If an error occurs, return NULL and leave an error in the database
+** handle (accessible using sqlite3_errcode()/errmsg()).
+*/
+static int fts5_api_from_db(sqlite3 *db, fts5_api **ppApi){
+ sqlite3_stmt *pStmt = 0;
+ int rc;
+
+ *ppApi = 0;
+ rc = sqlite3_prepare(db, "SELECT fts5(?1)", -1, &pStmt, 0);
+ if( rc==SQLITE_OK ){
+ sqlite3_bind_pointer(pStmt, 1, (void*)ppApi, "fts5_api_ptr", 0);
+ (void)sqlite3_step(pStmt);
+ rc = sqlite3_finalize(pStmt);
+ }
+
+ return rc;
+}
+
+
+/*
+** Argument f should be a flag accepted by matchinfo() (a valid character
+** in the string passed as the second argument). If it is not, -1 is
+** returned. Otherwise, if f is a valid matchinfo flag, the value returned
+** is the number of 32-bit integers added to the output array if the
+** table has nCol columns and the query nPhrase phrases.
+*/
+static int fts5MatchinfoFlagsize(int nCol, int nPhrase, char f){
+ int ret = -1;
+ switch( f ){
+ case 'p': ret = 1; break;
+ case 'c': ret = 1; break;
+ case 'x': ret = 3 * nCol * nPhrase; break;
+ case 'y': ret = nCol * nPhrase; break;
+ case 'b': ret = ((nCol + 31) / 32) * nPhrase; break;
+ case 'n': ret = 1; break;
+ case 'a': ret = nCol; break;
+ case 'l': ret = nCol; break;
+ case 's': ret = nCol; break;
+ }
+ return ret;
+}
+
+static int fts5MatchinfoIter(
+ const Fts5ExtensionApi *pApi, /* API offered by current FTS version */
+ Fts5Context *pFts, /* First arg to pass to pApi functions */
+ Fts5MatchinfoCtx *p,
+ int(*x)(const Fts5ExtensionApi*,Fts5Context*,Fts5MatchinfoCtx*,char,u32*)
+){
+ int i;
+ int n = 0;
+ int rc = SQLITE_OK;
+ char f;
+ for(i=0; (f = p->zArg[i]); i++){
+ rc = x(pApi, pFts, p, f, &p->aRet[n]);
+ if( rc!=SQLITE_OK ) break;
+ n += fts5MatchinfoFlagsize(p->nCol, p->nPhrase, f);
+ }
+ return rc;
+}
+
+static int fts5MatchinfoXCb(
+ const Fts5ExtensionApi *pApi,
+ Fts5Context *pFts,
+ void *pUserData
+){
+ Fts5PhraseIter iter;
+ int iCol, iOff;
+ u32 *aOut = (u32*)pUserData;
+ int iPrev = -1;
+
+ for(pApi->xPhraseFirst(pFts, 0, &iter, &iCol, &iOff);
+ iCol>=0;
+ pApi->xPhraseNext(pFts, &iter, &iCol, &iOff)
+ ){
+ aOut[iCol*3+1]++;
+ if( iCol!=iPrev ) aOut[iCol*3 + 2]++;
+ iPrev = iCol;
+ }
+
+ return SQLITE_OK;
+}
+
+static int fts5MatchinfoGlobalCb(
+ const Fts5ExtensionApi *pApi,
+ Fts5Context *pFts,
+ Fts5MatchinfoCtx *p,
+ char f,
+ u32 *aOut
+){
+ int rc = SQLITE_OK;
+ switch( f ){
+ case 'p':
+ aOut[0] = p->nPhrase;
+ break;
+
+ case 'c':
+ aOut[0] = p->nCol;
+ break;
+
+ case 'x': {
+ int i;
+ for(i=0; i<p->nPhrase && rc==SQLITE_OK; i++){
+ void *pPtr = (void*)&aOut[i * p->nCol * 3];
+ rc = pApi->xQueryPhrase(pFts, i, pPtr, fts5MatchinfoXCb);
+ }
+ break;
+ }
+
+ case 'n': {
+ sqlite3_int64 nRow;
+ rc = pApi->xRowCount(pFts, &nRow);
+ aOut[0] = (u32)nRow;
+ break;
+ }
+
+ case 'a': {
+ sqlite3_int64 nRow = 0;
+ rc = pApi->xRowCount(pFts, &nRow);
+ if( nRow==0 ){
+ memset(aOut, 0, sizeof(u32) * p->nCol);
+ }else{
+ int i;
+ for(i=0; rc==SQLITE_OK && i<p->nCol; i++){
+ sqlite3_int64 nToken;
+ rc = pApi->xColumnTotalSize(pFts, i, &nToken);
+ if( rc==SQLITE_OK){
+ aOut[i] = (u32)((2*nToken + nRow) / (2*nRow));
+ }
+ }
+ }
+ break;
+ }
+
+ }
+ return rc;
+}
+
+static int fts5MatchinfoLocalCb(
+ const Fts5ExtensionApi *pApi,
+ Fts5Context *pFts,
+ Fts5MatchinfoCtx *p,
+ char f,
+ u32 *aOut
+){
+ int i;
+ int rc = SQLITE_OK;
+
+ switch( f ){
+ case 'b': {
+ int iPhrase;
+ int nInt = ((p->nCol + 31) / 32) * p->nPhrase;
+ for(i=0; i<nInt; i++) aOut[i] = 0;
+
+ for(iPhrase=0; iPhrase<p->nPhrase; iPhrase++){
+ Fts5PhraseIter iter;
+ int iCol;
+ for(pApi->xPhraseFirstColumn(pFts, iPhrase, &iter, &iCol);
+ iCol>=0;
+ pApi->xPhraseNextColumn(pFts, &iter, &iCol)
+ ){
+ aOut[iPhrase * ((p->nCol+31)/32) + iCol/32] |= ((u32)1 << iCol%32);
+ }
+ }
+
+ break;
+ }
+
+ case 'x':
+ case 'y': {
+ int nMul = (f=='x' ? 3 : 1);
+ int iPhrase;
+
+ for(i=0; i<(p->nCol*p->nPhrase); i++) aOut[i*nMul] = 0;
+
+ for(iPhrase=0; iPhrase<p->nPhrase; iPhrase++){
+ Fts5PhraseIter iter;
+ int iOff, iCol;
+ for(pApi->xPhraseFirst(pFts, iPhrase, &iter, &iCol, &iOff);
+ iOff>=0;
+ pApi->xPhraseNext(pFts, &iter, &iCol, &iOff)
+ ){
+ aOut[nMul * (iCol + iPhrase * p->nCol)]++;
+ }
+ }
+
+ break;
+ }
+
+ case 'l': {
+ for(i=0; rc==SQLITE_OK && i<p->nCol; i++){
+ int nToken;
+ rc = pApi->xColumnSize(pFts, i, &nToken);
+ aOut[i] = (u32)nToken;
+ }
+ break;
+ }
+
+ case 's': {
+ int nInst;
+
+ memset(aOut, 0, sizeof(u32) * p->nCol);
+
+ rc = pApi->xInstCount(pFts, &nInst);
+ for(i=0; rc==SQLITE_OK && i<nInst; i++){
+ int iPhrase, iOff, iCol = 0;
+ int iNextPhrase;
+ int iNextOff;
+ u32 nSeq = 1;
+ int j;
+
+ rc = pApi->xInst(pFts, i, &iPhrase, &iCol, &iOff);
+ iNextPhrase = iPhrase+1;
+ iNextOff = iOff+pApi->xPhraseSize(pFts, 0);
+ for(j=i+1; rc==SQLITE_OK && j<nInst; j++){
+ int ip, ic, io;
+ rc = pApi->xInst(pFts, j, &ip, &ic, &io);
+ if( ic!=iCol || io>iNextOff ) break;
+ if( ip==iNextPhrase && io==iNextOff ){
+ nSeq++;
+ iNextPhrase = ip+1;
+ iNextOff = io + pApi->xPhraseSize(pFts, ip);
+ }
+ }
+
+ if( nSeq>aOut[iCol] ) aOut[iCol] = nSeq;
+ }
+
+ break;
+ }
+ }
+ return rc;
+}
+
+static Fts5MatchinfoCtx *fts5MatchinfoNew(
+ const Fts5ExtensionApi *pApi, /* API offered by current FTS version */
+ Fts5Context *pFts, /* First arg to pass to pApi functions */
+ sqlite3_context *pCtx, /* Context for returning error message */
+ const char *zArg /* Matchinfo flag string */
+){
+ Fts5MatchinfoCtx *p;
+ int nCol;
+ int nPhrase;
+ int i;
+ int nInt;
+ sqlite3_int64 nByte;
+ int rc;
+
+ nCol = pApi->xColumnCount(pFts);
+ nPhrase = pApi->xPhraseCount(pFts);
+
+ nInt = 0;
+ for(i=0; zArg[i]; i++){
+ int n = fts5MatchinfoFlagsize(nCol, nPhrase, zArg[i]);
+ if( n<0 ){
+ char *zErr = sqlite3_mprintf("unrecognized matchinfo flag: %c", zArg[i]);
+ sqlite3_result_error(pCtx, zErr, -1);
+ sqlite3_free(zErr);
+ return 0;
+ }
+ nInt += n;
+ }
+
+ nByte = sizeof(Fts5MatchinfoCtx) /* The struct itself */
+ + sizeof(u32) * nInt /* The p->aRet[] array */
+ + (i+1); /* The p->zArg string */
+ p = (Fts5MatchinfoCtx*)sqlite3_malloc64(nByte);
+ if( p==0 ){
+ sqlite3_result_error_nomem(pCtx);
+ return 0;
+ }
+ memset(p, 0, nByte);
+
+ p->nCol = nCol;
+ p->nPhrase = nPhrase;
+ p->aRet = (u32*)&p[1];
+ p->nRet = nInt;
+ p->zArg = (char*)&p->aRet[nInt];
+ memcpy(p->zArg, zArg, i);
+
+ rc = fts5MatchinfoIter(pApi, pFts, p, fts5MatchinfoGlobalCb);
+ if( rc!=SQLITE_OK ){
+ sqlite3_result_error_code(pCtx, rc);
+ sqlite3_free(p);
+ p = 0;
+ }
+
+ return p;
+}
+
+static void fts5MatchinfoFunc(
+ const Fts5ExtensionApi *pApi, /* API offered by current FTS version */
+ Fts5Context *pFts, /* First arg to pass to pApi functions */
+ sqlite3_context *pCtx, /* Context for returning result/error */
+ int nVal, /* Number of values in apVal[] array */
+ sqlite3_value **apVal /* Array of trailing arguments */
+){
+ const char *zArg;
+ Fts5MatchinfoCtx *p;
+ int rc = SQLITE_OK;
+
+ if( nVal>0 ){
+ zArg = (const char*)sqlite3_value_text(apVal[0]);
+ }else{
+ zArg = "pcx";
+ }
+
+ p = (Fts5MatchinfoCtx*)pApi->xGetAuxdata(pFts, 0);
+ if( p==0 || sqlite3_stricmp(zArg, p->zArg) ){
+ p = fts5MatchinfoNew(pApi, pFts, pCtx, zArg);
+ if( p==0 ){
+ rc = SQLITE_NOMEM;
+ }else{
+ rc = pApi->xSetAuxdata(pFts, p, sqlite3_free);
+ }
+ }
+
+ if( rc==SQLITE_OK ){
+ rc = fts5MatchinfoIter(pApi, pFts, p, fts5MatchinfoLocalCb);
+ }
+ if( rc!=SQLITE_OK ){
+ sqlite3_result_error_code(pCtx, rc);
+ }else{
+ /* No errors has occured, so return a copy of the array of integers. */
+ int nByte = p->nRet * sizeof(u32);
+ sqlite3_result_blob(pCtx, (void*)p->aRet, nByte, SQLITE_TRANSIENT);
+ }
+}
+
+int sqlite3Fts5TestRegisterMatchinfo(sqlite3 *db){
+ int rc; /* Return code */
+ fts5_api *pApi; /* FTS5 API functions */
+
+ /* Extract the FTS5 API pointer from the database handle. The
+ ** fts5_api_from_db() function above is copied verbatim from the
+ ** FTS5 documentation. Refer there for details. */
+ rc = fts5_api_from_db(db, &pApi);
+ if( rc!=SQLITE_OK ) return rc;
+
+ /* If fts5_api_from_db() returns NULL, then either FTS5 is not registered
+ ** with this database handle, or an error (OOM perhaps?) has occurred.
+ **
+ ** Also check that the fts5_api object is version 2 or newer.
+ */
+ if( pApi==0 || pApi->iVersion<2 ){
+ return SQLITE_ERROR;
+ }
+
+ /* Register the implementation of matchinfo() */
+ rc = pApi->xCreateFunction(pApi, "matchinfo", 0, fts5MatchinfoFunc, 0);
+
+ return rc;
+}
+
+#endif /* SQLITE_ENABLE_FTS5 */