summaryrefslogtreecommitdiffstats
path: root/ext/fts5/fts5_test_mi.c
blob: 6f2d6e7ea27cda88b018cd9cc1ac0c7a926ea5d8 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
/*
** 2015 Aug 04
**
** The author disclaims copyright to this source code.  In place of
** a legal notice, here is a blessing:
**
**    May you do good and not evil.
**    May you find forgiveness for yourself and forgive others.
**    May you share freely, never taking more than you give.
**
******************************************************************************
**
** This file contains test code only, it is not included in release 
** versions of FTS5. It contains the implementation of an FTS5 auxiliary
** function very similar to the FTS4 function matchinfo():
**
**     https://www.sqlite.org/fts3.html#matchinfo
**
** Known differences are that:
**
**  1) this function uses the FTS5 definition of "matchable phrase", which
**     excludes any phrases that are part of an expression sub-tree that
**     does not match the current row. This comes up for MATCH queries 
**     such as:
**
**         "a OR (b AND c)"
**
**     In FTS4, if a single row contains instances of tokens "a" and "c", 
**     but not "b", all instances of "c" are considered matches. In FTS5,
**     they are not (as the "b AND c" sub-tree does not match the current
**     row.
**
**  2) For the values returned by 'x' that apply to all rows of the table, 
**     NEAR constraints are not considered. But for the number of hits in
**     the current row, they are.
**     
** This file exports a single function that may be called to register the
** matchinfo() implementation with a database handle:
**
**   int sqlite3Fts5TestRegisterMatchinfo(sqlite3 *db);
*/


#ifdef SQLITE_ENABLE_FTS5

#include "fts5.h"
#include <assert.h>
#include <string.h>

typedef struct Fts5MatchinfoCtx Fts5MatchinfoCtx;

#ifndef SQLITE_AMALGAMATION
typedef unsigned int u32;
#endif

struct Fts5MatchinfoCtx {
  int nCol;                       /* Number of cols in FTS5 table */
  int nPhrase;                    /* Number of phrases in FTS5 query */
  char *zArg;                     /* nul-term'd copy of 2nd arg */
  int nRet;                       /* Number of elements in aRet[] */
  u32 *aRet;                      /* Array of 32-bit unsigned ints to return */
};



/*
** Return a pointer to the fts5_api pointer for database connection db.
** If an error occurs, return NULL and leave an error in the database 
** handle (accessible using sqlite3_errcode()/errmsg()).
*/
static int fts5_api_from_db(sqlite3 *db, fts5_api **ppApi){
  sqlite3_stmt *pStmt = 0;
  int rc;

  *ppApi = 0;
  rc = sqlite3_prepare(db, "SELECT fts5(?1)", -1, &pStmt, 0);
  if( rc==SQLITE_OK ){
    sqlite3_bind_pointer(pStmt, 1, (void*)ppApi, "fts5_api_ptr", 0);
    (void)sqlite3_step(pStmt);
    rc = sqlite3_finalize(pStmt);
  }

  return rc;
}


/*
** Argument f should be a flag accepted by matchinfo() (a valid character
** in the string passed as the second argument). If it is not, -1 is 
** returned. Otherwise, if f is a valid matchinfo flag, the value returned
** is the number of 32-bit integers added to the output array if the
** table has nCol columns and the query nPhrase phrases.
*/
static int fts5MatchinfoFlagsize(int nCol, int nPhrase, char f){
  int ret = -1;
  switch( f ){
    case 'p': ret = 1; break;
    case 'c': ret = 1; break;
    case 'x': ret = 3 * nCol * nPhrase; break;
    case 'y': ret = nCol * nPhrase; break;
    case 'b': ret = ((nCol + 31) / 32) * nPhrase; break;
    case 'n': ret = 1; break;
    case 'a': ret = nCol; break;
    case 'l': ret = nCol; break;
    case 's': ret = nCol; break;
  }
  return ret;
}

static int fts5MatchinfoIter(
  const Fts5ExtensionApi *pApi,   /* API offered by current FTS version */
  Fts5Context *pFts,              /* First arg to pass to pApi functions */
  Fts5MatchinfoCtx *p,
  int(*x)(const Fts5ExtensionApi*,Fts5Context*,Fts5MatchinfoCtx*,char,u32*)
){
  int i;
  int n = 0;
  int rc = SQLITE_OK;
  char f;
  for(i=0; (f = p->zArg[i]); i++){
    rc = x(pApi, pFts, p, f, &p->aRet[n]);
    if( rc!=SQLITE_OK ) break;
    n += fts5MatchinfoFlagsize(p->nCol, p->nPhrase, f);
  }
  return rc;
}

static int fts5MatchinfoXCb(
  const Fts5ExtensionApi *pApi,
  Fts5Context *pFts,
  void *pUserData
){
  Fts5PhraseIter iter;
  int iCol, iOff;
  u32 *aOut = (u32*)pUserData;
  int iPrev = -1;

  for(pApi->xPhraseFirst(pFts, 0, &iter, &iCol, &iOff); 
      iCol>=0; 
      pApi->xPhraseNext(pFts, &iter, &iCol, &iOff)
  ){
    aOut[iCol*3+1]++;
    if( iCol!=iPrev ) aOut[iCol*3 + 2]++;
    iPrev = iCol;
  }

  return SQLITE_OK;
}

static int fts5MatchinfoGlobalCb(
  const Fts5ExtensionApi *pApi,
  Fts5Context *pFts,
  Fts5MatchinfoCtx *p,
  char f,
  u32 *aOut
){
  int rc = SQLITE_OK;
  switch( f ){
    case 'p':
      aOut[0] = p->nPhrase; 
      break;

    case 'c':
      aOut[0] = p->nCol; 
      break;

    case 'x': {
      int i;
      for(i=0; i<p->nPhrase && rc==SQLITE_OK; i++){
        void *pPtr = (void*)&aOut[i * p->nCol * 3];
        rc = pApi->xQueryPhrase(pFts, i, pPtr, fts5MatchinfoXCb);
      }
      break;
    }

    case 'n': {
      sqlite3_int64 nRow;
      rc = pApi->xRowCount(pFts, &nRow);
      aOut[0] = (u32)nRow;
      break;
    }

    case 'a': {
      sqlite3_int64 nRow = 0;
      rc = pApi->xRowCount(pFts, &nRow);
      if( nRow==0 ){
        memset(aOut, 0, sizeof(u32) * p->nCol);
      }else{
        int i;
        for(i=0; rc==SQLITE_OK && i<p->nCol; i++){
          sqlite3_int64 nToken;
          rc = pApi->xColumnTotalSize(pFts, i, &nToken);
          if( rc==SQLITE_OK){
            aOut[i] = (u32)((2*nToken + nRow) / (2*nRow));
          }
        }
      }
      break;
    }

  }
  return rc;
}

static int fts5MatchinfoLocalCb(
  const Fts5ExtensionApi *pApi,
  Fts5Context *pFts,
  Fts5MatchinfoCtx *p,
  char f,
  u32 *aOut
){
  int i;
  int rc = SQLITE_OK;

  switch( f ){
    case 'b': {
      int iPhrase;
      int nInt = ((p->nCol + 31) / 32) * p->nPhrase;
      for(i=0; i<nInt; i++) aOut[i] = 0;

      for(iPhrase=0; iPhrase<p->nPhrase; iPhrase++){
        Fts5PhraseIter iter;
        int iCol;
        for(pApi->xPhraseFirstColumn(pFts, iPhrase, &iter, &iCol);
            iCol>=0; 
            pApi->xPhraseNextColumn(pFts, &iter, &iCol)
        ){
          aOut[iPhrase * ((p->nCol+31)/32) + iCol/32] |= ((u32)1 << iCol%32);
        }
      }

      break;
    }

    case 'x':
    case 'y': {
      int nMul = (f=='x' ? 3 : 1);
      int iPhrase;

      for(i=0; i<(p->nCol*p->nPhrase); i++) aOut[i*nMul] = 0;

      for(iPhrase=0; iPhrase<p->nPhrase; iPhrase++){
        Fts5PhraseIter iter;
        int iOff, iCol;
        for(pApi->xPhraseFirst(pFts, iPhrase, &iter, &iCol, &iOff); 
            iOff>=0; 
            pApi->xPhraseNext(pFts, &iter, &iCol, &iOff)
        ){
          aOut[nMul * (iCol + iPhrase * p->nCol)]++;
        }
      }

      break;
    }

    case 'l': {
      for(i=0; rc==SQLITE_OK && i<p->nCol; i++){
        int nToken;
        rc = pApi->xColumnSize(pFts, i, &nToken);
        aOut[i] = (u32)nToken;
      }
      break;
    }

    case 's': {
      int nInst;

      memset(aOut, 0, sizeof(u32) * p->nCol);

      rc = pApi->xInstCount(pFts, &nInst);
      for(i=0; rc==SQLITE_OK && i<nInst; i++){
        int iPhrase, iOff, iCol = 0;
        int iNextPhrase;
        int iNextOff;
        u32 nSeq = 1;
        int j;

        rc = pApi->xInst(pFts, i, &iPhrase, &iCol, &iOff);
        iNextPhrase = iPhrase+1;
        iNextOff = iOff+pApi->xPhraseSize(pFts, 0);
        for(j=i+1; rc==SQLITE_OK && j<nInst; j++){
          int ip, ic, io;
          rc = pApi->xInst(pFts, j, &ip, &ic, &io);
          if( ic!=iCol || io>iNextOff ) break;
          if( ip==iNextPhrase && io==iNextOff ){
            nSeq++;
            iNextPhrase = ip+1;
            iNextOff = io + pApi->xPhraseSize(pFts, ip);
          }
        }

        if( nSeq>aOut[iCol] ) aOut[iCol] = nSeq;
      }

      break;
    }
  }
  return rc;
}
 
static Fts5MatchinfoCtx *fts5MatchinfoNew(
  const Fts5ExtensionApi *pApi,   /* API offered by current FTS version */
  Fts5Context *pFts,              /* First arg to pass to pApi functions */
  sqlite3_context *pCtx,          /* Context for returning error message */
  const char *zArg                /* Matchinfo flag string */
){
  Fts5MatchinfoCtx *p;
  int nCol;
  int nPhrase;
  int i;
  int nInt;
  sqlite3_int64 nByte;
  int rc;

  nCol = pApi->xColumnCount(pFts);
  nPhrase = pApi->xPhraseCount(pFts);

  nInt = 0;
  for(i=0; zArg[i]; i++){
    int n = fts5MatchinfoFlagsize(nCol, nPhrase, zArg[i]);
    if( n<0 ){
      char *zErr = sqlite3_mprintf("unrecognized matchinfo flag: %c", zArg[i]);
      sqlite3_result_error(pCtx, zErr, -1);
      sqlite3_free(zErr);
      return 0;
    }
    nInt += n;
  }

  nByte = sizeof(Fts5MatchinfoCtx)          /* The struct itself */
         + sizeof(u32) * nInt               /* The p->aRet[] array */
         + (i+1);                           /* The p->zArg string */
  p = (Fts5MatchinfoCtx*)sqlite3_malloc64(nByte);
  if( p==0 ){
    sqlite3_result_error_nomem(pCtx);
    return 0;
  }
  memset(p, 0, nByte);

  p->nCol = nCol;
  p->nPhrase = nPhrase;
  p->aRet = (u32*)&p[1];
  p->nRet = nInt;
  p->zArg = (char*)&p->aRet[nInt];
  memcpy(p->zArg, zArg, i);

  rc = fts5MatchinfoIter(pApi, pFts, p, fts5MatchinfoGlobalCb);
  if( rc!=SQLITE_OK ){
    sqlite3_result_error_code(pCtx, rc);
    sqlite3_free(p);
    p = 0;
  }

  return p;
}

static void fts5MatchinfoFunc(
  const Fts5ExtensionApi *pApi,   /* API offered by current FTS version */
  Fts5Context *pFts,              /* First arg to pass to pApi functions */
  sqlite3_context *pCtx,          /* Context for returning result/error */
  int nVal,                       /* Number of values in apVal[] array */
  sqlite3_value **apVal           /* Array of trailing arguments */
){
  const char *zArg;
  Fts5MatchinfoCtx *p;
  int rc = SQLITE_OK;

  if( nVal>0 ){
    zArg = (const char*)sqlite3_value_text(apVal[0]);
  }else{
    zArg = "pcx";
  }

  p = (Fts5MatchinfoCtx*)pApi->xGetAuxdata(pFts, 0);
  if( p==0 || sqlite3_stricmp(zArg, p->zArg) ){
    p = fts5MatchinfoNew(pApi, pFts, pCtx, zArg);
    if( p==0 ){
      rc = SQLITE_NOMEM;
    }else{
      rc = pApi->xSetAuxdata(pFts, p, sqlite3_free);
    }
  }

  if( rc==SQLITE_OK ){
    rc = fts5MatchinfoIter(pApi, pFts, p, fts5MatchinfoLocalCb);
  }
  if( rc!=SQLITE_OK ){
    sqlite3_result_error_code(pCtx, rc);
  }else{
    /* No errors has occured, so return a copy of the array of integers. */
    int nByte = p->nRet * sizeof(u32);
    sqlite3_result_blob(pCtx, (void*)p->aRet, nByte, SQLITE_TRANSIENT);
  }
}

int sqlite3Fts5TestRegisterMatchinfo(sqlite3 *db){
  int rc;                         /* Return code */
  fts5_api *pApi;                 /* FTS5 API functions */

  /* Extract the FTS5 API pointer from the database handle. The 
  ** fts5_api_from_db() function above is copied verbatim from the 
  ** FTS5 documentation. Refer there for details. */
  rc = fts5_api_from_db(db, &pApi);
  if( rc!=SQLITE_OK ) return rc;

  /* If fts5_api_from_db() returns NULL, then either FTS5 is not registered
  ** with this database handle, or an error (OOM perhaps?) has occurred.
  **
  ** Also check that the fts5_api object is version 2 or newer.  
  */ 
  if( pApi==0 || pApi->iVersion<2 ){
    return SQLITE_ERROR;
  }

  /* Register the implementation of matchinfo() */
  rc = pApi->xCreateFunction(pApi, "matchinfo", 0, fts5MatchinfoFunc, 0);

  return rc;
}

#endif /* SQLITE_ENABLE_FTS5 */