diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-13 13:44:03 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-13 13:44:03 +0000 |
commit | 293913568e6a7a86fd1479e1cff8e2ecb58d6568 (patch) | |
tree | fc3b469a3ec5ab71b36ea97cc7aaddb838423a0c /src/backend/utils/adt/tsrank.c | |
parent | Initial commit. (diff) | |
download | postgresql-16-293913568e6a7a86fd1479e1cff8e2ecb58d6568.tar.xz postgresql-16-293913568e6a7a86fd1479e1cff8e2ecb58d6568.zip |
Adding upstream version 16.2.upstream/16.2
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/backend/utils/adt/tsrank.c')
-rw-r--r-- | src/backend/utils/adt/tsrank.c | 1012 |
1 files changed, 1012 insertions, 0 deletions
diff --git a/src/backend/utils/adt/tsrank.c b/src/backend/utils/adt/tsrank.c new file mode 100644 index 0000000..e5b850e --- /dev/null +++ b/src/backend/utils/adt/tsrank.c @@ -0,0 +1,1012 @@ +/*------------------------------------------------------------------------- + * + * tsrank.c + * rank tsvector by tsquery + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * + * + * IDENTIFICATION + * src/backend/utils/adt/tsrank.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include <limits.h> +#include <math.h> + +#include "miscadmin.h" +#include "tsearch/ts_utils.h" +#include "utils/array.h" +#include "utils/builtins.h" + +static const float weights[] = {0.1f, 0.2f, 0.4f, 1.0f}; + +#define wpos(wep) ( w[ WEP_GETWEIGHT(wep) ] ) + +#define RANK_NO_NORM 0x00 +#define RANK_NORM_LOGLENGTH 0x01 +#define RANK_NORM_LENGTH 0x02 +#define RANK_NORM_EXTDIST 0x04 +#define RANK_NORM_UNIQ 0x08 +#define RANK_NORM_LOGUNIQ 0x10 +#define RANK_NORM_RDIVRPLUS1 0x20 +#define DEF_NORM_METHOD RANK_NO_NORM + +static float calc_rank_or(const float *w, TSVector t, TSQuery q); +static float calc_rank_and(const float *w, TSVector t, TSQuery q); + +/* + * Returns a weight of a word collocation + */ +static float4 +word_distance(int32 w) +{ + if (w > 100) + return 1e-30f; + + return 1.0 / (1.005 + 0.05 * exp(((float4) w) / 1.5 - 2)); +} + +static int +cnt_length(TSVector t) +{ + WordEntry *ptr = ARRPTR(t), + *end = (WordEntry *) STRPTR(t); + int len = 0; + + while (ptr < end) + { + int clen = POSDATALEN(t, ptr); + + if (clen == 0) + len += 1; + else + len += clen; + + ptr++; + } + + return len; +} + + +#define WordECompareQueryItem(e,q,p,i,m) \ + tsCompareString((q) + (i)->distance, (i)->length, \ + (e) + (p)->pos, (p)->len, (m)) + + +/* + * Returns a pointer to a WordEntry's array corresponding to 'item' from + * tsvector 't'. 'q' is the TSQuery containing 'item'. + * Returns NULL if not found. + */ +static WordEntry * +find_wordentry(TSVector t, TSQuery q, QueryOperand *item, int32 *nitem) +{ + WordEntry *StopLow = ARRPTR(t); + WordEntry *StopHigh = (WordEntry *) STRPTR(t); + WordEntry *StopMiddle = StopHigh; + int difference; + + *nitem = 0; + + /* Loop invariant: StopLow <= item < StopHigh */ + while (StopLow < StopHigh) + { + StopMiddle = StopLow + (StopHigh - StopLow) / 2; + difference = WordECompareQueryItem(STRPTR(t), GETOPERAND(q), StopMiddle, item, false); + if (difference == 0) + { + StopHigh = StopMiddle; + *nitem = 1; + break; + } + else if (difference > 0) + StopLow = StopMiddle + 1; + else + StopHigh = StopMiddle; + } + + if (item->prefix) + { + if (StopLow >= StopHigh) + StopMiddle = StopHigh; + + *nitem = 0; + + while (StopMiddle < (WordEntry *) STRPTR(t) && + WordECompareQueryItem(STRPTR(t), GETOPERAND(q), StopMiddle, item, true) == 0) + { + (*nitem)++; + StopMiddle++; + } + } + + return (*nitem > 0) ? StopHigh : NULL; +} + + +/* + * sort QueryOperands by (length, word) + */ +static int +compareQueryOperand(const void *a, const void *b, void *arg) +{ + char *operand = (char *) arg; + QueryOperand *qa = (*(QueryOperand *const *) a); + QueryOperand *qb = (*(QueryOperand *const *) b); + + return tsCompareString(operand + qa->distance, qa->length, + operand + qb->distance, qb->length, + false); +} + +/* + * Returns a sorted, de-duplicated array of QueryOperands in a query. + * The returned QueryOperands are pointers to the original QueryOperands + * in the query. + * + * Length of the returned array is stored in *size + */ +static QueryOperand ** +SortAndUniqItems(TSQuery q, int *size) +{ + char *operand = GETOPERAND(q); + QueryItem *item = GETQUERY(q); + QueryOperand **res, + **ptr, + **prevptr; + + ptr = res = (QueryOperand **) palloc(sizeof(QueryOperand *) * *size); + + /* Collect all operands from the tree to res */ + while ((*size)--) + { + if (item->type == QI_VAL) + { + *ptr = (QueryOperand *) item; + ptr++; + } + item++; + } + + *size = ptr - res; + if (*size < 2) + return res; + + qsort_arg(res, *size, sizeof(QueryOperand *), compareQueryOperand, operand); + + ptr = res + 1; + prevptr = res; + + /* remove duplicates */ + while (ptr - res < *size) + { + if (compareQueryOperand((void *) ptr, (void *) prevptr, (void *) operand) != 0) + { + prevptr++; + *prevptr = *ptr; + } + ptr++; + } + + *size = prevptr + 1 - res; + return res; +} + +static float +calc_rank_and(const float *w, TSVector t, TSQuery q) +{ + WordEntryPosVector **pos; + WordEntryPosVector1 posnull; + WordEntryPosVector *POSNULL; + int i, + k, + l, + p; + WordEntry *entry, + *firstentry; + WordEntryPos *post, + *ct; + int32 dimt, + lenct, + dist, + nitem; + float res = -1.0; + QueryOperand **item; + int size = q->size; + + item = SortAndUniqItems(q, &size); + if (size < 2) + { + pfree(item); + return calc_rank_or(w, t, q); + } + pos = (WordEntryPosVector **) palloc0(sizeof(WordEntryPosVector *) * q->size); + + /* A dummy WordEntryPos array to use when haspos is false */ + posnull.npos = 1; + posnull.pos[0] = 0; + WEP_SETPOS(posnull.pos[0], MAXENTRYPOS - 1); + POSNULL = (WordEntryPosVector *) &posnull; + + for (i = 0; i < size; i++) + { + firstentry = entry = find_wordentry(t, q, item[i], &nitem); + if (!entry) + continue; + + while (entry - firstentry < nitem) + { + if (entry->haspos) + pos[i] = _POSVECPTR(t, entry); + else + pos[i] = POSNULL; + + dimt = pos[i]->npos; + post = pos[i]->pos; + for (k = 0; k < i; k++) + { + if (!pos[k]) + continue; + lenct = pos[k]->npos; + ct = pos[k]->pos; + for (l = 0; l < dimt; l++) + { + for (p = 0; p < lenct; p++) + { + dist = abs((int) WEP_GETPOS(post[l]) - (int) WEP_GETPOS(ct[p])); + if (dist || (dist == 0 && (pos[i] == POSNULL || pos[k] == POSNULL))) + { + float curw; + + if (!dist) + dist = MAXENTRYPOS; + curw = sqrt(wpos(post[l]) * wpos(ct[p]) * word_distance(dist)); + res = (res < 0) ? curw : 1.0 - (1.0 - res) * (1.0 - curw); + } + } + } + } + + entry++; + } + } + pfree(pos); + pfree(item); + return res; +} + +static float +calc_rank_or(const float *w, TSVector t, TSQuery q) +{ + WordEntry *entry, + *firstentry; + WordEntryPosVector1 posnull; + WordEntryPos *post; + int32 dimt, + j, + i, + nitem; + float res = 0.0; + QueryOperand **item; + int size = q->size; + + /* A dummy WordEntryPos array to use when haspos is false */ + posnull.npos = 1; + posnull.pos[0] = 0; + + item = SortAndUniqItems(q, &size); + + for (i = 0; i < size; i++) + { + float resj, + wjm; + int32 jm; + + firstentry = entry = find_wordentry(t, q, item[i], &nitem); + if (!entry) + continue; + + while (entry - firstentry < nitem) + { + if (entry->haspos) + { + dimt = POSDATALEN(t, entry); + post = POSDATAPTR(t, entry); + } + else + { + dimt = posnull.npos; + post = posnull.pos; + } + + resj = 0.0; + wjm = -1.0; + jm = 0; + for (j = 0; j < dimt; j++) + { + resj = resj + wpos(post[j]) / ((j + 1) * (j + 1)); + if (wpos(post[j]) > wjm) + { + wjm = wpos(post[j]); + jm = j; + } + } +/* + limit (sum(1/i^2),i=1,inf) = pi^2/6 + resj = sum(wi/i^2),i=1,noccurrence, + wi - should be sorted desc, + don't sort for now, just choose maximum weight. This should be corrected + Oleg Bartunov +*/ + res = res + (wjm + resj - wjm / ((jm + 1) * (jm + 1))) / 1.64493406685; + + entry++; + } + } + if (size > 0) + res = res / size; + pfree(item); + return res; +} + +static float +calc_rank(const float *w, TSVector t, TSQuery q, int32 method) +{ + QueryItem *item = GETQUERY(q); + float res = 0.0; + int len; + + if (!t->size || !q->size) + return 0.0; + + /* XXX: What about NOT? */ + res = (item->type == QI_OPR && (item->qoperator.oper == OP_AND || + item->qoperator.oper == OP_PHRASE)) ? + calc_rank_and(w, t, q) : + calc_rank_or(w, t, q); + + if (res < 0) + res = 1e-20f; + + if ((method & RANK_NORM_LOGLENGTH) && t->size > 0) + res /= log((double) (cnt_length(t) + 1)) / log(2.0); + + if (method & RANK_NORM_LENGTH) + { + len = cnt_length(t); + if (len > 0) + res /= (float) len; + } + + /* RANK_NORM_EXTDIST not applicable */ + + if ((method & RANK_NORM_UNIQ) && t->size > 0) + res /= (float) (t->size); + + if ((method & RANK_NORM_LOGUNIQ) && t->size > 0) + res /= log((double) (t->size + 1)) / log(2.0); + + if (method & RANK_NORM_RDIVRPLUS1) + res /= (res + 1); + + return res; +} + +static const float * +getWeights(ArrayType *win) +{ + static float ws[lengthof(weights)]; + int i; + float4 *arrdata; + + if (win == NULL) + return weights; + + if (ARR_NDIM(win) != 1) + ereport(ERROR, + (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR), + errmsg("array of weight must be one-dimensional"))); + + if (ArrayGetNItems(ARR_NDIM(win), ARR_DIMS(win)) < lengthof(weights)) + ereport(ERROR, + (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR), + errmsg("array of weight is too short"))); + + if (array_contains_nulls(win)) + ereport(ERROR, + (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED), + errmsg("array of weight must not contain nulls"))); + + arrdata = (float4 *) ARR_DATA_PTR(win); + for (i = 0; i < lengthof(weights); i++) + { + ws[i] = (arrdata[i] >= 0) ? arrdata[i] : weights[i]; + if (ws[i] > 1.0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("weight out of range"))); + } + + return ws; +} + +Datum +ts_rank_wttf(PG_FUNCTION_ARGS) +{ + ArrayType *win = (ArrayType *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0)); + TSVector txt = PG_GETARG_TSVECTOR(1); + TSQuery query = PG_GETARG_TSQUERY(2); + int method = PG_GETARG_INT32(3); + float res; + + res = calc_rank(getWeights(win), txt, query, method); + + PG_FREE_IF_COPY(win, 0); + PG_FREE_IF_COPY(txt, 1); + PG_FREE_IF_COPY(query, 2); + PG_RETURN_FLOAT4(res); +} + +Datum +ts_rank_wtt(PG_FUNCTION_ARGS) +{ + ArrayType *win = (ArrayType *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0)); + TSVector txt = PG_GETARG_TSVECTOR(1); + TSQuery query = PG_GETARG_TSQUERY(2); + float res; + + res = calc_rank(getWeights(win), txt, query, DEF_NORM_METHOD); + + PG_FREE_IF_COPY(win, 0); + PG_FREE_IF_COPY(txt, 1); + PG_FREE_IF_COPY(query, 2); + PG_RETURN_FLOAT4(res); +} + +Datum +ts_rank_ttf(PG_FUNCTION_ARGS) +{ + TSVector txt = PG_GETARG_TSVECTOR(0); + TSQuery query = PG_GETARG_TSQUERY(1); + int method = PG_GETARG_INT32(2); + float res; + + res = calc_rank(getWeights(NULL), txt, query, method); + + PG_FREE_IF_COPY(txt, 0); + PG_FREE_IF_COPY(query, 1); + PG_RETURN_FLOAT4(res); +} + +Datum +ts_rank_tt(PG_FUNCTION_ARGS) +{ + TSVector txt = PG_GETARG_TSVECTOR(0); + TSQuery query = PG_GETARG_TSQUERY(1); + float res; + + res = calc_rank(getWeights(NULL), txt, query, DEF_NORM_METHOD); + + PG_FREE_IF_COPY(txt, 0); + PG_FREE_IF_COPY(query, 1); + PG_RETURN_FLOAT4(res); +} + +typedef struct +{ + union + { + struct + { /* compiled doc representation */ + QueryItem **items; + int16 nitem; + } query; + struct + { /* struct is used for preparing doc + * representation */ + QueryItem *item; + WordEntry *entry; + } map; + } data; + WordEntryPos pos; +} DocRepresentation; + +static int +compareDocR(const void *va, const void *vb) +{ + const DocRepresentation *a = (const DocRepresentation *) va; + const DocRepresentation *b = (const DocRepresentation *) vb; + + if (WEP_GETPOS(a->pos) == WEP_GETPOS(b->pos)) + { + if (WEP_GETWEIGHT(a->pos) == WEP_GETWEIGHT(b->pos)) + { + if (a->data.map.entry == b->data.map.entry) + return 0; + + return (a->data.map.entry > b->data.map.entry) ? 1 : -1; + } + + return (WEP_GETWEIGHT(a->pos) > WEP_GETWEIGHT(b->pos)) ? 1 : -1; + } + + return (WEP_GETPOS(a->pos) > WEP_GETPOS(b->pos)) ? 1 : -1; +} + +#define MAXQROPOS MAXENTRYPOS +typedef struct +{ + bool operandexists; + bool reverseinsert; /* indicates insert order, true means + * descending order */ + uint32 npos; + WordEntryPos pos[MAXQROPOS]; +} QueryRepresentationOperand; + +typedef struct +{ + TSQuery query; + QueryRepresentationOperand *operandData; +} QueryRepresentation; + +#define QR_GET_OPERAND_DATA(q, v) \ + ( (q)->operandData + (((QueryItem*)(v)) - GETQUERY((q)->query)) ) + +/* + * TS_execute callback for matching a tsquery operand to QueryRepresentation + */ +static TSTernaryValue +checkcondition_QueryOperand(void *checkval, QueryOperand *val, + ExecPhraseData *data) +{ + QueryRepresentation *qr = (QueryRepresentation *) checkval; + QueryRepresentationOperand *opData = QR_GET_OPERAND_DATA(qr, val); + + if (!opData->operandexists) + return TS_NO; + + if (data) + { + data->npos = opData->npos; + data->pos = opData->pos; + if (opData->reverseinsert) + data->pos += MAXQROPOS - opData->npos; + } + + return TS_YES; +} + +typedef struct +{ + int pos; + int p; + int q; + DocRepresentation *begin; + DocRepresentation *end; +} CoverExt; + +static void +resetQueryRepresentation(QueryRepresentation *qr, bool reverseinsert) +{ + int i; + + for (i = 0; i < qr->query->size; i++) + { + qr->operandData[i].operandexists = false; + qr->operandData[i].reverseinsert = reverseinsert; + qr->operandData[i].npos = 0; + } +} + +static void +fillQueryRepresentationData(QueryRepresentation *qr, DocRepresentation *entry) +{ + int i; + int lastPos; + QueryRepresentationOperand *opData; + + for (i = 0; i < entry->data.query.nitem; i++) + { + if (entry->data.query.items[i]->type != QI_VAL) + continue; + + opData = QR_GET_OPERAND_DATA(qr, entry->data.query.items[i]); + + opData->operandexists = true; + + if (opData->npos == 0) + { + lastPos = (opData->reverseinsert) ? (MAXQROPOS - 1) : 0; + opData->pos[lastPos] = entry->pos; + opData->npos++; + continue; + } + + lastPos = opData->reverseinsert ? + (MAXQROPOS - opData->npos) : + (opData->npos - 1); + + if (WEP_GETPOS(opData->pos[lastPos]) != WEP_GETPOS(entry->pos)) + { + lastPos = opData->reverseinsert ? + (MAXQROPOS - 1 - opData->npos) : + (opData->npos); + + opData->pos[lastPos] = entry->pos; + opData->npos++; + } + } +} + +static bool +Cover(DocRepresentation *doc, int len, QueryRepresentation *qr, CoverExt *ext) +{ + DocRepresentation *ptr; + int lastpos = ext->pos; + bool found = false; + + /* + * since this function recurses, it could be driven to stack overflow. + * (though any decent compiler will optimize away the tail-recursion. + */ + check_stack_depth(); + + resetQueryRepresentation(qr, false); + + ext->p = INT_MAX; + ext->q = 0; + ptr = doc + ext->pos; + + /* find upper bound of cover from current position, move up */ + while (ptr - doc < len) + { + fillQueryRepresentationData(qr, ptr); + + if (TS_execute(GETQUERY(qr->query), (void *) qr, + TS_EXEC_EMPTY, checkcondition_QueryOperand)) + { + if (WEP_GETPOS(ptr->pos) > ext->q) + { + ext->q = WEP_GETPOS(ptr->pos); + ext->end = ptr; + lastpos = ptr - doc; + found = true; + } + break; + } + ptr++; + } + + if (!found) + return false; + + resetQueryRepresentation(qr, true); + + ptr = doc + lastpos; + + /* find lower bound of cover from found upper bound, move down */ + while (ptr >= doc + ext->pos) + { + /* + * we scan doc from right to left, so pos info in reverse order! + */ + fillQueryRepresentationData(qr, ptr); + + if (TS_execute(GETQUERY(qr->query), (void *) qr, + TS_EXEC_EMPTY, checkcondition_QueryOperand)) + { + if (WEP_GETPOS(ptr->pos) < ext->p) + { + ext->begin = ptr; + ext->p = WEP_GETPOS(ptr->pos); + } + break; + } + ptr--; + } + + if (ext->p <= ext->q) + { + /* + * set position for next try to next lexeme after beginning of found + * cover + */ + ext->pos = (ptr - doc) + 1; + return true; + } + + ext->pos++; + return Cover(doc, len, qr, ext); +} + +static DocRepresentation * +get_docrep(TSVector txt, QueryRepresentation *qr, int *doclen) +{ + QueryItem *item = GETQUERY(qr->query); + WordEntry *entry, + *firstentry; + WordEntryPos *post; + int32 dimt, /* number of 'post' items */ + j, + i, + nitem; + int len = qr->query->size * 4, + cur = 0; + DocRepresentation *doc; + + doc = (DocRepresentation *) palloc(sizeof(DocRepresentation) * len); + + /* + * Iterate through query to make DocRepresentation for words and it's + * entries satisfied by query + */ + for (i = 0; i < qr->query->size; i++) + { + QueryOperand *curoperand; + + if (item[i].type != QI_VAL) + continue; + + curoperand = &item[i].qoperand; + + firstentry = entry = find_wordentry(txt, qr->query, curoperand, &nitem); + if (!entry) + continue; + + /* iterations over entries in tsvector */ + while (entry - firstentry < nitem) + { + if (entry->haspos) + { + dimt = POSDATALEN(txt, entry); + post = POSDATAPTR(txt, entry); + } + else + { + /* ignore words without positions */ + entry++; + continue; + } + + while (cur + dimt >= len) + { + len *= 2; + doc = (DocRepresentation *) repalloc(doc, sizeof(DocRepresentation) * len); + } + + /* iterations over entry's positions */ + for (j = 0; j < dimt; j++) + { + if (curoperand->weight == 0 || + curoperand->weight & (1 << WEP_GETWEIGHT(post[j]))) + { + doc[cur].pos = post[j]; + doc[cur].data.map.entry = entry; + doc[cur].data.map.item = (QueryItem *) curoperand; + cur++; + } + } + + entry++; + } + } + + if (cur > 0) + { + DocRepresentation *rptr = doc + 1, + *wptr = doc, + storage; + + /* + * Sort representation in ascending order by pos and entry + */ + qsort(doc, cur, sizeof(DocRepresentation), compareDocR); + + /* + * Join QueryItem per WordEntry and it's position + */ + storage.pos = doc->pos; + storage.data.query.items = palloc(sizeof(QueryItem *) * qr->query->size); + storage.data.query.items[0] = doc->data.map.item; + storage.data.query.nitem = 1; + + while (rptr - doc < cur) + { + if (rptr->pos == (rptr - 1)->pos && + rptr->data.map.entry == (rptr - 1)->data.map.entry) + { + storage.data.query.items[storage.data.query.nitem] = rptr->data.map.item; + storage.data.query.nitem++; + } + else + { + *wptr = storage; + wptr++; + storage.pos = rptr->pos; + storage.data.query.items = palloc(sizeof(QueryItem *) * qr->query->size); + storage.data.query.items[0] = rptr->data.map.item; + storage.data.query.nitem = 1; + } + + rptr++; + } + + *wptr = storage; + wptr++; + + *doclen = wptr - doc; + return doc; + } + + pfree(doc); + return NULL; +} + +static float4 +calc_rank_cd(const float4 *arrdata, TSVector txt, TSQuery query, int method) +{ + DocRepresentation *doc; + int len, + i, + doclen = 0; + CoverExt ext; + double Wdoc = 0.0; + double invws[lengthof(weights)]; + double SumDist = 0.0, + PrevExtPos = 0.0; + int NExtent = 0; + QueryRepresentation qr; + + + for (i = 0; i < lengthof(weights); i++) + { + invws[i] = ((double) ((arrdata[i] >= 0) ? arrdata[i] : weights[i])); + if (invws[i] > 1.0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("weight out of range"))); + invws[i] = 1.0 / invws[i]; + } + + qr.query = query; + qr.operandData = (QueryRepresentationOperand *) + palloc0(sizeof(QueryRepresentationOperand) * query->size); + + doc = get_docrep(txt, &qr, &doclen); + if (!doc) + { + pfree(qr.operandData); + return 0.0; + } + + MemSet(&ext, 0, sizeof(CoverExt)); + while (Cover(doc, doclen, &qr, &ext)) + { + double Cpos = 0.0; + double InvSum = 0.0; + double CurExtPos; + int nNoise; + DocRepresentation *ptr = ext.begin; + + while (ptr <= ext.end) + { + InvSum += invws[WEP_GETWEIGHT(ptr->pos)]; + ptr++; + } + + Cpos = ((double) (ext.end - ext.begin + 1)) / InvSum; + + /* + * if doc are big enough then ext.q may be equal to ext.p due to limit + * of positional information. In this case we approximate number of + * noise word as half cover's length + */ + nNoise = (ext.q - ext.p) - (ext.end - ext.begin); + if (nNoise < 0) + nNoise = (ext.end - ext.begin) / 2; + Wdoc += Cpos / ((double) (1 + nNoise)); + + CurExtPos = ((double) (ext.q + ext.p)) / 2.0; + if (NExtent > 0 && CurExtPos > PrevExtPos /* prevent division by + * zero in a case of + * multiple lexize */ ) + SumDist += 1.0 / (CurExtPos - PrevExtPos); + + PrevExtPos = CurExtPos; + NExtent++; + } + + if ((method & RANK_NORM_LOGLENGTH) && txt->size > 0) + Wdoc /= log((double) (cnt_length(txt) + 1)); + + if (method & RANK_NORM_LENGTH) + { + len = cnt_length(txt); + if (len > 0) + Wdoc /= (double) len; + } + + if ((method & RANK_NORM_EXTDIST) && NExtent > 0 && SumDist > 0) + Wdoc /= ((double) NExtent) / SumDist; + + if ((method & RANK_NORM_UNIQ) && txt->size > 0) + Wdoc /= (double) (txt->size); + + if ((method & RANK_NORM_LOGUNIQ) && txt->size > 0) + Wdoc /= log((double) (txt->size + 1)) / log(2.0); + + if (method & RANK_NORM_RDIVRPLUS1) + Wdoc /= (Wdoc + 1); + + pfree(doc); + + pfree(qr.operandData); + + return (float4) Wdoc; +} + +Datum +ts_rankcd_wttf(PG_FUNCTION_ARGS) +{ + ArrayType *win = (ArrayType *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0)); + TSVector txt = PG_GETARG_TSVECTOR(1); + TSQuery query = PG_GETARG_TSQUERY(2); + int method = PG_GETARG_INT32(3); + float res; + + res = calc_rank_cd(getWeights(win), txt, query, method); + + PG_FREE_IF_COPY(win, 0); + PG_FREE_IF_COPY(txt, 1); + PG_FREE_IF_COPY(query, 2); + PG_RETURN_FLOAT4(res); +} + +Datum +ts_rankcd_wtt(PG_FUNCTION_ARGS) +{ + ArrayType *win = (ArrayType *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0)); + TSVector txt = PG_GETARG_TSVECTOR(1); + TSQuery query = PG_GETARG_TSQUERY(2); + float res; + + res = calc_rank_cd(getWeights(win), txt, query, DEF_NORM_METHOD); + + PG_FREE_IF_COPY(win, 0); + PG_FREE_IF_COPY(txt, 1); + PG_FREE_IF_COPY(query, 2); + PG_RETURN_FLOAT4(res); +} + +Datum +ts_rankcd_ttf(PG_FUNCTION_ARGS) +{ + TSVector txt = PG_GETARG_TSVECTOR(0); + TSQuery query = PG_GETARG_TSQUERY(1); + int method = PG_GETARG_INT32(2); + float res; + + res = calc_rank_cd(getWeights(NULL), txt, query, method); + + PG_FREE_IF_COPY(txt, 0); + PG_FREE_IF_COPY(query, 1); + PG_RETURN_FLOAT4(res); +} + +Datum +ts_rankcd_tt(PG_FUNCTION_ARGS) +{ + TSVector txt = PG_GETARG_TSVECTOR(0); + TSQuery query = PG_GETARG_TSQUERY(1); + float res; + + res = calc_rank_cd(getWeights(NULL), txt, query, DEF_NORM_METHOD); + + PG_FREE_IF_COPY(txt, 0); + PG_FREE_IF_COPY(query, 1); + PG_RETURN_FLOAT4(res); +} |