/*
** 2014 May 31
**
** The author disclaims copyright to this source code.  In place of
** a legal notice, here is a blessing:
**
**    May you do good and not evil.
**    May you find forgiveness for yourself and forgive others.
**    May you share freely, never taking more than you give.
**
******************************************************************************
*/


#include "fts5Int.h"
#include <math.h>                 /* amalgamator: keep */

/*
** Object used to iterate through all "coalesced phrase instances" in 
** a single column of the current row. If the phrase instances in the
** column being considered do not overlap, this object simply iterates
** through them. Or, if they do overlap (share one or more tokens in
** common), each set of overlapping instances is treated as a single
** match. See documentation for the highlight() auxiliary function for
** details.
**
** Usage is:
**
**   for(rc = fts5CInstIterNext(pApi, pFts, iCol, &iter);
**      (rc==SQLITE_OK && 0==fts5CInstIterEof(&iter);
**      rc = fts5CInstIterNext(&iter)
**   ){
**     printf("instance starts at %d, ends at %d\n", iter.iStart, iter.iEnd);
**   }
**
*/
typedef struct CInstIter CInstIter;
struct CInstIter {
  const Fts5ExtensionApi *pApi;   /* API offered by current FTS version */
  Fts5Context *pFts;              /* First arg to pass to pApi functions */
  int iCol;                       /* Column to search */
  int iInst;                      /* Next phrase instance index */
  int nInst;                      /* Total number of phrase instances */

  /* Output variables */
  int iStart;                     /* First token in coalesced phrase instance */
  int iEnd;                       /* Last token in coalesced phrase instance */
};

/*
** Advance the iterator to the next coalesced phrase instance. Return
** an SQLite error code if an error occurs, or SQLITE_OK otherwise.
*/
static int fts5CInstIterNext(CInstIter *pIter){
  int rc = SQLITE_OK;
  pIter->iStart = -1;
  pIter->iEnd = -1;

  while( rc==SQLITE_OK && pIter->iInst<pIter->nInst ){
    int ip; int ic; int io;
    rc = pIter->pApi->xInst(pIter->pFts, pIter->iInst, &ip, &ic, &io);
    if( rc==SQLITE_OK ){
      if( ic==pIter->iCol ){
        int iEnd = io - 1 + pIter->pApi->xPhraseSize(pIter->pFts, ip);
        if( pIter->iStart<0 ){
          pIter->iStart = io;
          pIter->iEnd = iEnd;
        }else if( io<=pIter->iEnd ){
          if( iEnd>pIter->iEnd ) pIter->iEnd = iEnd;
        }else{
          break;
        }
      }
      pIter->iInst++;
    }
  }

  return rc;
}

/*
** Initialize the iterator object indicated by the final parameter to 
** iterate through coalesced phrase instances in column iCol.
*/
static int fts5CInstIterInit(
  const Fts5ExtensionApi *pApi,
  Fts5Context *pFts,
  int iCol,
  CInstIter *pIter
){
  int rc;

  memset(pIter, 0, sizeof(CInstIter));
  pIter->pApi = pApi;
  pIter->pFts = pFts;
  pIter->iCol = iCol;
  rc = pApi->xInstCount(pFts, &pIter->nInst);

  if( rc==SQLITE_OK ){
    rc = fts5CInstIterNext(pIter);
  }

  return rc;
}



/*************************************************************************
** Start of highlight() implementation.
*/
typedef struct HighlightContext HighlightContext;
struct HighlightContext {
  /* Constant parameters to fts5HighlightCb() */
  int iRangeStart;                /* First token to include */
  int iRangeEnd;                  /* If non-zero, last token to include */
  const char *zOpen;              /* Opening highlight */
  const char *zClose;             /* Closing highlight */
  const char *zIn;                /* Input text */
  int nIn;                        /* Size of input text in bytes */

  /* Variables modified by fts5HighlightCb() */
  CInstIter iter;                 /* Coalesced Instance Iterator */
  int iPos;                       /* Current token offset in zIn[] */
  int iOff;                       /* Have copied up to this offset in zIn[] */
  int bOpen;                      /* True if highlight is open */
  char *zOut;                     /* Output value */
};

/*
** Append text to the HighlightContext output string - p->zOut. Argument
** z points to a buffer containing n bytes of text to append. If n is 
** negative, everything up until the first '\0' is appended to the output.
**
** If *pRc is set to any value other than SQLITE_OK when this function is 
** called, it is a no-op. If an error (i.e. an OOM condition) is encountered, 
** *pRc is set to an error code before returning. 
*/
static void fts5HighlightAppend(
  int *pRc, 
  HighlightContext *p, 
  const char *z, int n
){
  if( *pRc==SQLITE_OK && z ){
    if( n<0 ) n = (int)strlen(z);
    p->zOut = sqlite3_mprintf("%z%.*s", p->zOut, n, z);
    if( p->zOut==0 ) *pRc = SQLITE_NOMEM;
  }
}

/*
** Tokenizer callback used by implementation of highlight() function.
*/
static int fts5HighlightCb(
  void *pContext,                 /* Pointer to HighlightContext object */
  int tflags,                     /* Mask of FTS5_TOKEN_* flags */
  const char *pToken,             /* Buffer containing token */
  int nToken,                     /* Size of token in bytes */
  int iStartOff,                  /* Start byte offset of token */
  int iEndOff                     /* End byte offset of token */
){
  HighlightContext *p = (HighlightContext*)pContext;
  int rc = SQLITE_OK;
  int iPos;

  UNUSED_PARAM2(pToken, nToken);

  if( tflags & FTS5_TOKEN_COLOCATED ) return SQLITE_OK;
  iPos = p->iPos++;

  if( p->iRangeEnd>=0 ){
    if( iPos<p->iRangeStart || iPos>p->iRangeEnd ) return SQLITE_OK;
    if( p->iRangeStart && iPos==p->iRangeStart ) p->iOff = iStartOff;
  }

  /* If the parenthesis is open, and this token is not part of the current
  ** phrase, and the starting byte offset of this token is past the point
  ** that has currently been copied into the output buffer, close the
  ** parenthesis. */
  if( p->bOpen 
   && (iPos<=p->iter.iStart || p->iter.iStart<0)
   && iStartOff>p->iOff 
  ){
    fts5HighlightAppend(&rc, p, p->zClose, -1);
    p->bOpen = 0;
  }

  /* If this is the start of a new phrase, and the highlight is not open:
  **
  **   * copy text from the input up to the start of the phrase, and
  **   * open the highlight.
  */
  if( iPos==p->iter.iStart && p->bOpen==0 ){
    fts5HighlightAppend(&rc, p, &p->zIn[p->iOff], iStartOff - p->iOff);
    fts5HighlightAppend(&rc, p, p->zOpen, -1);
    p->iOff = iStartOff;
    p->bOpen = 1;
  }

  if( iPos==p->iter.iEnd ){
    if( p->bOpen==0 ){
      assert( p->iRangeEnd>=0 );
      fts5HighlightAppend(&rc, p, p->zOpen, -1);
      p->bOpen = 1;
    }
    fts5HighlightAppend(&rc, p, &p->zIn[p->iOff], iEndOff - p->iOff);
    p->iOff = iEndOff;

    if( rc==SQLITE_OK ){
      rc = fts5CInstIterNext(&p->iter);
    }
  }

  if( iPos==p->iRangeEnd ){
    if( p->bOpen ){
      if( p->iter.iStart>=0 && iPos>=p->iter.iStart ){
        fts5HighlightAppend(&rc, p, &p->zIn[p->iOff], iEndOff - p->iOff);
        p->iOff = iEndOff;
      }
      fts5HighlightAppend(&rc, p, p->zClose, -1);
      p->bOpen = 0;
    }
    fts5HighlightAppend(&rc, p, &p->zIn[p->iOff], iEndOff - p->iOff);
    p->iOff = iEndOff;
  }

  return rc;
}

/*
** Implementation of highlight() function.
*/
static void fts5HighlightFunction(
  const Fts5ExtensionApi *pApi,   /* API offered by current FTS version */
  Fts5Context *pFts,              /* First arg to pass to pApi functions */
  sqlite3_context *pCtx,          /* Context for returning result/error */
  int nVal,                       /* Number of values in apVal[] array */
  sqlite3_value **apVal           /* Array of trailing arguments */
){
  HighlightContext ctx;
  int rc;
  int iCol;

  if( nVal!=3 ){
    const char *zErr = "wrong number of arguments to function highlight()";
    sqlite3_result_error(pCtx, zErr, -1);
    return;
  }

  iCol = sqlite3_value_int(apVal[0]);
  memset(&ctx, 0, sizeof(HighlightContext));
  ctx.zOpen = (const char*)sqlite3_value_text(apVal[1]);
  ctx.zClose = (const char*)sqlite3_value_text(apVal[2]);
  ctx.iRangeEnd = -1;
  rc = pApi->xColumnText(pFts, iCol, &ctx.zIn, &ctx.nIn);
  if( rc==SQLITE_RANGE ){
    sqlite3_result_text(pCtx, "", -1, SQLITE_STATIC);
    rc = SQLITE_OK;
  }else if( ctx.zIn ){
    if( rc==SQLITE_OK ){
      rc = fts5CInstIterInit(pApi, pFts, iCol, &ctx.iter);
    }

    if( rc==SQLITE_OK ){
      rc = pApi->xTokenize(pFts, ctx.zIn, ctx.nIn, (void*)&ctx,fts5HighlightCb);
    }
    if( ctx.bOpen ){
      fts5HighlightAppend(&rc, &ctx, ctx.zClose, -1);
    }
    fts5HighlightAppend(&rc, &ctx, &ctx.zIn[ctx.iOff], ctx.nIn - ctx.iOff);

    if( rc==SQLITE_OK ){
      sqlite3_result_text(pCtx, (const char*)ctx.zOut, -1, SQLITE_TRANSIENT);
    }
    sqlite3_free(ctx.zOut);
  }
  if( rc!=SQLITE_OK ){
    sqlite3_result_error_code(pCtx, rc);
  }
}
/*
** End of highlight() implementation.
**************************************************************************/

/*
** Context object passed to the fts5SentenceFinderCb() function.
*/
typedef struct Fts5SFinder Fts5SFinder;
struct Fts5SFinder {
  int iPos;                       /* Current token position */
  int nFirstAlloc;                /* Allocated size of aFirst[] */
  int nFirst;                     /* Number of entries in aFirst[] */
  int *aFirst;                    /* Array of first token in each sentence */
  const char *zDoc;               /* Document being tokenized */
};

/*
** Add an entry to the Fts5SFinder.aFirst[] array. Grow the array if
** necessary. Return SQLITE_OK if successful, or SQLITE_NOMEM if an
** error occurs.
*/
static int fts5SentenceFinderAdd(Fts5SFinder *p, int iAdd){
  if( p->nFirstAlloc==p->nFirst ){
    int nNew = p->nFirstAlloc ? p->nFirstAlloc*2 : 64;
    int *aNew;

    aNew = (int*)sqlite3_realloc64(p->aFirst, nNew*sizeof(int));
    if( aNew==0 ) return SQLITE_NOMEM;
    p->aFirst = aNew;
    p->nFirstAlloc = nNew;
  }
  p->aFirst[p->nFirst++] = iAdd;
  return SQLITE_OK;
}

/*
** This function is an xTokenize() callback used by the auxiliary snippet()
** function. Its job is to identify tokens that are the first in a sentence.
** For each such token, an entry is added to the SFinder.aFirst[] array.
*/
static int fts5SentenceFinderCb(
  void *pContext,                 /* Pointer to HighlightContext object */
  int tflags,                     /* Mask of FTS5_TOKEN_* flags */
  const char *pToken,             /* Buffer containing token */
  int nToken,                     /* Size of token in bytes */
  int iStartOff,                  /* Start offset of token */
  int iEndOff                     /* End offset of token */
){
  int rc = SQLITE_OK;

  UNUSED_PARAM2(pToken, nToken);
  UNUSED_PARAM(iEndOff);

  if( (tflags & FTS5_TOKEN_COLOCATED)==0 ){
    Fts5SFinder *p = (Fts5SFinder*)pContext;
    if( p->iPos>0 ){
      int i;
      char c = 0;
      for(i=iStartOff-1; i>=0; i--){
        c = p->zDoc[i];
        if( c!=' ' && c!='\t' && c!='\n' && c!='\r' ) break;
      }
      if( i!=iStartOff-1 && (c=='.' || c==':') ){
        rc = fts5SentenceFinderAdd(p, p->iPos);
      }
    }else{
      rc = fts5SentenceFinderAdd(p, 0);
    }
    p->iPos++;
  }
  return rc;
}

static int fts5SnippetScore(
  const Fts5ExtensionApi *pApi,   /* API offered by current FTS version */
  Fts5Context *pFts,              /* First arg to pass to pApi functions */
  int nDocsize,                   /* Size of column in tokens */
  unsigned char *aSeen,           /* Array with one element per query phrase */
  int iCol,                       /* Column to score */
  int iPos,                       /* Starting offset to score */
  int nToken,                     /* Max tokens per snippet */
  int *pnScore,                   /* OUT: Score */
  int *piPos                      /* OUT: Adjusted offset */
){
  int rc;
  int i;
  int ip = 0;
  int ic = 0;
  int iOff = 0;
  int iFirst = -1;
  int nInst;
  int nScore = 0;
  int iLast = 0;
  sqlite3_int64 iEnd = (sqlite3_int64)iPos + nToken;

  rc = pApi->xInstCount(pFts, &nInst);
  for(i=0; i<nInst && rc==SQLITE_OK; i++){
    rc = pApi->xInst(pFts, i, &ip, &ic, &iOff);
    if( rc==SQLITE_OK && ic==iCol && iOff>=iPos && iOff<iEnd ){
      nScore += (aSeen[ip] ? 1 : 1000);
      aSeen[ip] = 1;
      if( iFirst<0 ) iFirst = iOff;
      iLast = iOff + pApi->xPhraseSize(pFts, ip);
    }
  }

  *pnScore = nScore;
  if( piPos ){
    sqlite3_int64 iAdj = iFirst - (nToken - (iLast-iFirst)) / 2;
    if( (iAdj+nToken)>nDocsize ) iAdj = nDocsize - nToken;
    if( iAdj<0 ) iAdj = 0;
    *piPos = (int)iAdj;
  }

  return rc;
}

/*
** Return the value in pVal interpreted as utf-8 text. Except, if pVal 
** contains a NULL value, return a pointer to a static string zero
** bytes in length instead of a NULL pointer.
*/
static const char *fts5ValueToText(sqlite3_value *pVal){
  const char *zRet = (const char*)sqlite3_value_text(pVal);
  return zRet ? zRet : "";
}

/*
** Implementation of snippet() function.
*/
static void fts5SnippetFunction(
  const Fts5ExtensionApi *pApi,   /* API offered by current FTS version */
  Fts5Context *pFts,              /* First arg to pass to pApi functions */
  sqlite3_context *pCtx,          /* Context for returning result/error */
  int nVal,                       /* Number of values in apVal[] array */
  sqlite3_value **apVal           /* Array of trailing arguments */
){
  HighlightContext ctx;
  int rc = SQLITE_OK;             /* Return code */
  int iCol;                       /* 1st argument to snippet() */
  const char *zEllips;            /* 4th argument to snippet() */
  int nToken;                     /* 5th argument to snippet() */
  int nInst = 0;                  /* Number of instance matches this row */
  int i;                          /* Used to iterate through instances */
  int nPhrase;                    /* Number of phrases in query */
  unsigned char *aSeen;           /* Array of "seen instance" flags */
  int iBestCol;                   /* Column containing best snippet */
  int iBestStart = 0;             /* First token of best snippet */
  int nBestScore = 0;             /* Score of best snippet */
  int nColSize = 0;               /* Total size of iBestCol in tokens */
  Fts5SFinder sFinder;            /* Used to find the beginnings of sentences */
  int nCol;

  if( nVal!=5 ){
    const char *zErr = "wrong number of arguments to function snippet()";
    sqlite3_result_error(pCtx, zErr, -1);
    return;
  }

  nCol = pApi->xColumnCount(pFts);
  memset(&ctx, 0, sizeof(HighlightContext));
  iCol = sqlite3_value_int(apVal[0]);
  ctx.zOpen = fts5ValueToText(apVal[1]);
  ctx.zClose = fts5ValueToText(apVal[2]);
  ctx.iRangeEnd = -1;
  zEllips = fts5ValueToText(apVal[3]);
  nToken = sqlite3_value_int(apVal[4]);

  iBestCol = (iCol>=0 ? iCol : 0);
  nPhrase = pApi->xPhraseCount(pFts);
  aSeen = sqlite3_malloc(nPhrase);
  if( aSeen==0 ){
    rc = SQLITE_NOMEM;
  }
  if( rc==SQLITE_OK ){
    rc = pApi->xInstCount(pFts, &nInst);
  }

  memset(&sFinder, 0, sizeof(Fts5SFinder));
  for(i=0; i<nCol; i++){
    if( iCol<0 || iCol==i ){
      int nDoc;
      int nDocsize;
      int ii;
      sFinder.iPos = 0;
      sFinder.nFirst = 0;
      rc = pApi->xColumnText(pFts, i, &sFinder.zDoc, &nDoc);
      if( rc!=SQLITE_OK ) break;
      rc = pApi->xTokenize(pFts, 
          sFinder.zDoc, nDoc, (void*)&sFinder,fts5SentenceFinderCb
      );
      if( rc!=SQLITE_OK ) break;
      rc = pApi->xColumnSize(pFts, i, &nDocsize);
      if( rc!=SQLITE_OK ) break;

      for(ii=0; rc==SQLITE_OK && ii<nInst; ii++){
        int ip, ic, io;
        int iAdj;
        int nScore;
        int jj;

        rc = pApi->xInst(pFts, ii, &ip, &ic, &io);
        if( ic!=i ) continue;
        if( io>nDocsize ) rc = FTS5_CORRUPT;
        if( rc!=SQLITE_OK ) continue;
        memset(aSeen, 0, nPhrase);
        rc = fts5SnippetScore(pApi, pFts, nDocsize, aSeen, i,
            io, nToken, &nScore, &iAdj
        );
        if( rc==SQLITE_OK && nScore>nBestScore ){
          nBestScore = nScore;
          iBestCol = i;
          iBestStart = iAdj;
          nColSize = nDocsize;
        }

        if( rc==SQLITE_OK && sFinder.nFirst && nDocsize>nToken ){
          for(jj=0; jj<(sFinder.nFirst-1); jj++){
            if( sFinder.aFirst[jj+1]>io ) break;
          }

          if( sFinder.aFirst[jj]<io ){
            memset(aSeen, 0, nPhrase);
            rc = fts5SnippetScore(pApi, pFts, nDocsize, aSeen, i, 
              sFinder.aFirst[jj], nToken, &nScore, 0
            );

            nScore += (sFinder.aFirst[jj]==0 ? 120 : 100);
            if( rc==SQLITE_OK && nScore>nBestScore ){
              nBestScore = nScore;
              iBestCol = i;
              iBestStart = sFinder.aFirst[jj];
              nColSize = nDocsize;
            }
          }
        }
      }
    }
  }

  if( rc==SQLITE_OK ){
    rc = pApi->xColumnText(pFts, iBestCol, &ctx.zIn, &ctx.nIn);
  }
  if( rc==SQLITE_OK && nColSize==0 ){
    rc = pApi->xColumnSize(pFts, iBestCol, &nColSize);
  }
  if( ctx.zIn ){
    if( rc==SQLITE_OK ){
      rc = fts5CInstIterInit(pApi, pFts, iBestCol, &ctx.iter);
    }

    ctx.iRangeStart = iBestStart;
    ctx.iRangeEnd = iBestStart + nToken - 1;

    if( iBestStart>0 ){
      fts5HighlightAppend(&rc, &ctx, zEllips, -1);
    }

    /* Advance iterator ctx.iter so that it points to the first coalesced
    ** phrase instance at or following position iBestStart. */
    while( ctx.iter.iStart>=0 && ctx.iter.iStart<iBestStart && rc==SQLITE_OK ){
      rc = fts5CInstIterNext(&ctx.iter);
    }

    if( rc==SQLITE_OK ){
      rc = pApi->xTokenize(pFts, ctx.zIn, ctx.nIn, (void*)&ctx,fts5HighlightCb);
    }
    if( ctx.bOpen ){
      fts5HighlightAppend(&rc, &ctx, ctx.zClose, -1);
    }
    if( ctx.iRangeEnd>=(nColSize-1) ){
      fts5HighlightAppend(&rc, &ctx, &ctx.zIn[ctx.iOff], ctx.nIn - ctx.iOff);
    }else{
      fts5HighlightAppend(&rc, &ctx, zEllips, -1);
    }
  }
  if( rc==SQLITE_OK ){
    sqlite3_result_text(pCtx, (const char*)ctx.zOut, -1, SQLITE_TRANSIENT);
  }else{
    sqlite3_result_error_code(pCtx, rc);
  }
  sqlite3_free(ctx.zOut);
  sqlite3_free(aSeen);
  sqlite3_free(sFinder.aFirst);
}

/************************************************************************/

/*
** The first time the bm25() function is called for a query, an instance
** of the following structure is allocated and populated.
*/
typedef struct Fts5Bm25Data Fts5Bm25Data;
struct Fts5Bm25Data {
  int nPhrase;                    /* Number of phrases in query */
  double avgdl;                   /* Average number of tokens in each row */
  double *aIDF;                   /* IDF for each phrase */
  double *aFreq;                  /* Array used to calculate phrase freq. */
};

/*
** Callback used by fts5Bm25GetData() to count the number of rows in the
** table matched by each individual phrase within the query.
*/
static int fts5CountCb(
  const Fts5ExtensionApi *pApi, 
  Fts5Context *pFts,
  void *pUserData                 /* Pointer to sqlite3_int64 variable */
){
  sqlite3_int64 *pn = (sqlite3_int64*)pUserData;
  UNUSED_PARAM2(pApi, pFts);
  (*pn)++;
  return SQLITE_OK;
}

/*
** Set *ppData to point to the Fts5Bm25Data object for the current query. 
** If the object has not already been allocated, allocate and populate it
** now.
*/
static int fts5Bm25GetData(
  const Fts5ExtensionApi *pApi, 
  Fts5Context *pFts,
  Fts5Bm25Data **ppData           /* OUT: bm25-data object for this query */
){
  int rc = SQLITE_OK;             /* Return code */
  Fts5Bm25Data *p;                /* Object to return */

  p = (Fts5Bm25Data*)pApi->xGetAuxdata(pFts, 0);
  if( p==0 ){
    int nPhrase;                  /* Number of phrases in query */
    sqlite3_int64 nRow = 0;       /* Number of rows in table */
    sqlite3_int64 nToken = 0;     /* Number of tokens in table */
    sqlite3_int64 nByte;          /* Bytes of space to allocate */
    int i;

    /* Allocate the Fts5Bm25Data object */
    nPhrase = pApi->xPhraseCount(pFts);
    nByte = sizeof(Fts5Bm25Data) + nPhrase*2*sizeof(double);
    p = (Fts5Bm25Data*)sqlite3_malloc64(nByte);
    if( p==0 ){
      rc = SQLITE_NOMEM;
    }else{
      memset(p, 0, (size_t)nByte);
      p->nPhrase = nPhrase;
      p->aIDF = (double*)&p[1];
      p->aFreq = &p->aIDF[nPhrase];
    }

    /* Calculate the average document length for this FTS5 table */
    if( rc==SQLITE_OK ) rc = pApi->xRowCount(pFts, &nRow);
    assert( rc!=SQLITE_OK || nRow>0 );
    if( rc==SQLITE_OK ) rc = pApi->xColumnTotalSize(pFts, -1, &nToken);
    if( rc==SQLITE_OK ) p->avgdl = (double)nToken  / (double)nRow;

    /* Calculate an IDF for each phrase in the query */
    for(i=0; rc==SQLITE_OK && i<nPhrase; i++){
      sqlite3_int64 nHit = 0;
      rc = pApi->xQueryPhrase(pFts, i, (void*)&nHit, fts5CountCb);
      if( rc==SQLITE_OK ){
        /* Calculate the IDF (Inverse Document Frequency) for phrase i.
        ** This is done using the standard BM25 formula as found on wikipedia:
        **
        **   IDF = log( (N - nHit + 0.5) / (nHit + 0.5) )
        **
        ** where "N" is the total number of documents in the set and nHit
        ** is the number that contain at least one instance of the phrase
        ** under consideration.
        **
        ** The problem with this is that if (N < 2*nHit), the IDF is 
        ** negative. Which is undesirable. So the mimimum allowable IDF is
        ** (1e-6) - roughly the same as a term that appears in just over
        ** half of set of 5,000,000 documents.  */
        double idf = log( (nRow - nHit + 0.5) / (nHit + 0.5) );
        if( idf<=0.0 ) idf = 1e-6;
        p->aIDF[i] = idf;
      }
    }

    if( rc!=SQLITE_OK ){
      sqlite3_free(p);
    }else{
      rc = pApi->xSetAuxdata(pFts, p, sqlite3_free);
    }
    if( rc!=SQLITE_OK ) p = 0;
  }
  *ppData = p;
  return rc;
}

/*
** Implementation of bm25() function.
*/
static void fts5Bm25Function(
  const Fts5ExtensionApi *pApi,   /* API offered by current FTS version */
  Fts5Context *pFts,              /* First arg to pass to pApi functions */
  sqlite3_context *pCtx,          /* Context for returning result/error */
  int nVal,                       /* Number of values in apVal[] array */
  sqlite3_value **apVal           /* Array of trailing arguments */
){
  const double k1 = 1.2;          /* Constant "k1" from BM25 formula */
  const double b = 0.75;          /* Constant "b" from BM25 formula */
  int rc;                         /* Error code */
  double score = 0.0;             /* SQL function return value */
  Fts5Bm25Data *pData;            /* Values allocated/calculated once only */
  int i;                          /* Iterator variable */
  int nInst = 0;                  /* Value returned by xInstCount() */
  double D = 0.0;                 /* Total number of tokens in row */
  double *aFreq = 0;              /* Array of phrase freq. for current row */

  /* Calculate the phrase frequency (symbol "f(qi,D)" in the documentation)
  ** for each phrase in the query for the current row. */
  rc = fts5Bm25GetData(pApi, pFts, &pData);
  if( rc==SQLITE_OK ){
    aFreq = pData->aFreq;
    memset(aFreq, 0, sizeof(double) * pData->nPhrase);
    rc = pApi->xInstCount(pFts, &nInst);
  }
  for(i=0; rc==SQLITE_OK && i<nInst; i++){
    int ip; int ic; int io;
    rc = pApi->xInst(pFts, i, &ip, &ic, &io);
    if( rc==SQLITE_OK ){
      double w = (nVal > ic) ? sqlite3_value_double(apVal[ic]) : 1.0;
      aFreq[ip] += w;
    }
  }

  /* Figure out the total size of the current row in tokens. */
  if( rc==SQLITE_OK ){
    int nTok;
    rc = pApi->xColumnSize(pFts, -1, &nTok);
    D = (double)nTok;
  }

  /* Determine and return the BM25 score for the current row. Or, if an
  ** error has occurred, throw an exception. */
  if( rc==SQLITE_OK ){
    for(i=0; i<pData->nPhrase; i++){
      score += pData->aIDF[i] * (
          ( aFreq[i] * (k1 + 1.0) ) / 
          ( aFreq[i] + k1 * (1 - b + b * D / pData->avgdl) )
      );
    }
    sqlite3_result_double(pCtx, -1.0 * score);
  }else{
    sqlite3_result_error_code(pCtx, rc);
  }
}

int sqlite3Fts5AuxInit(fts5_api *pApi){
  struct Builtin {
    const char *zFunc;            /* Function name (nul-terminated) */
    void *pUserData;              /* User-data pointer */
    fts5_extension_function xFunc;/* Callback function */
    void (*xDestroy)(void*);      /* Destructor function */
  } aBuiltin [] = {
    { "snippet",   0, fts5SnippetFunction, 0 },
    { "highlight", 0, fts5HighlightFunction, 0 },
    { "bm25",      0, fts5Bm25Function,    0 },
  };
  int rc = SQLITE_OK;             /* Return code */
  int i;                          /* To iterate through builtin functions */

  for(i=0; rc==SQLITE_OK && i<ArraySize(aBuiltin); i++){
    rc = pApi->xCreateFunction(pApi,
        aBuiltin[i].zFunc,
        aBuiltin[i].pUserData,
        aBuiltin[i].xFunc,
        aBuiltin[i].xDestroy
    );
  }

  return rc;
}