diff options
Diffstat (limited to 'contrib/pg_trgm/trgm_gin.c')
-rw-r--r-- | contrib/pg_trgm/trgm_gin.c | 359 |
1 files changed, 359 insertions, 0 deletions
diff --git a/contrib/pg_trgm/trgm_gin.c b/contrib/pg_trgm/trgm_gin.c new file mode 100644 index 0000000..32fafef --- /dev/null +++ b/contrib/pg_trgm/trgm_gin.c @@ -0,0 +1,359 @@ +/* + * contrib/pg_trgm/trgm_gin.c + */ +#include "postgres.h" + +#include "access/gin.h" +#include "access/stratnum.h" +#include "fmgr.h" +#include "trgm.h" + +PG_FUNCTION_INFO_V1(gin_extract_trgm); +PG_FUNCTION_INFO_V1(gin_extract_value_trgm); +PG_FUNCTION_INFO_V1(gin_extract_query_trgm); +PG_FUNCTION_INFO_V1(gin_trgm_consistent); +PG_FUNCTION_INFO_V1(gin_trgm_triconsistent); + +/* + * This function can only be called if a pre-9.1 version of the GIN operator + * class definition is present in the catalogs (probably as a consequence + * of upgrade-in-place). Cope. + */ +Datum +gin_extract_trgm(PG_FUNCTION_ARGS) +{ + if (PG_NARGS() == 3) + return gin_extract_value_trgm(fcinfo); + if (PG_NARGS() == 7) + return gin_extract_query_trgm(fcinfo); + elog(ERROR, "unexpected number of arguments to gin_extract_trgm"); + PG_RETURN_NULL(); +} + +Datum +gin_extract_value_trgm(PG_FUNCTION_ARGS) +{ + text *val = (text *) PG_GETARG_TEXT_PP(0); + int32 *nentries = (int32 *) PG_GETARG_POINTER(1); + Datum *entries = NULL; + TRGM *trg; + int32 trglen; + + *nentries = 0; + + trg = generate_trgm(VARDATA_ANY(val), VARSIZE_ANY_EXHDR(val)); + trglen = ARRNELEM(trg); + + if (trglen > 0) + { + trgm *ptr; + int32 i; + + *nentries = trglen; + entries = (Datum *) palloc(sizeof(Datum) * trglen); + + ptr = GETARR(trg); + for (i = 0; i < trglen; i++) + { + int32 item = trgm2int(ptr); + + entries[i] = Int32GetDatum(item); + ptr++; + } + } + + PG_RETURN_POINTER(entries); +} + +Datum +gin_extract_query_trgm(PG_FUNCTION_ARGS) +{ + text *val = (text *) PG_GETARG_TEXT_PP(0); + int32 *nentries = (int32 *) PG_GETARG_POINTER(1); + StrategyNumber strategy = PG_GETARG_UINT16(2); + + /* bool **pmatch = (bool **) PG_GETARG_POINTER(3); */ + Pointer **extra_data = (Pointer **) PG_GETARG_POINTER(4); + + /* bool **nullFlags = (bool **) PG_GETARG_POINTER(5); */ + int32 *searchMode = (int32 *) PG_GETARG_POINTER(6); + Datum *entries = NULL; + TRGM *trg; + int32 trglen; + trgm *ptr; + TrgmPackedGraph *graph; + int32 i; + + switch (strategy) + { + case SimilarityStrategyNumber: + case WordSimilarityStrategyNumber: + case StrictWordSimilarityStrategyNumber: + case EqualStrategyNumber: + trg = generate_trgm(VARDATA_ANY(val), VARSIZE_ANY_EXHDR(val)); + break; + case ILikeStrategyNumber: +#ifndef IGNORECASE + elog(ERROR, "cannot handle ~~* with case-sensitive trigrams"); +#endif + /* FALL THRU */ + case LikeStrategyNumber: + + /* + * For wildcard search we extract all the trigrams that every + * potentially-matching string must include. + */ + trg = generate_wildcard_trgm(VARDATA_ANY(val), + VARSIZE_ANY_EXHDR(val)); + break; + case RegExpICaseStrategyNumber: +#ifndef IGNORECASE + elog(ERROR, "cannot handle ~* with case-sensitive trigrams"); +#endif + /* FALL THRU */ + case RegExpStrategyNumber: + trg = createTrgmNFA(val, PG_GET_COLLATION(), + &graph, CurrentMemoryContext); + if (trg && ARRNELEM(trg) > 0) + { + /* + * Successful regex processing: store NFA-like graph as + * extra_data. GIN API requires an array of nentries + * Pointers, but we just put the same value in each element. + */ + trglen = ARRNELEM(trg); + *extra_data = (Pointer *) palloc(sizeof(Pointer) * trglen); + for (i = 0; i < trglen; i++) + (*extra_data)[i] = (Pointer) graph; + } + else + { + /* No result: have to do full index scan. */ + *nentries = 0; + *searchMode = GIN_SEARCH_MODE_ALL; + PG_RETURN_POINTER(entries); + } + break; + default: + elog(ERROR, "unrecognized strategy number: %d", strategy); + trg = NULL; /* keep compiler quiet */ + break; + } + + trglen = ARRNELEM(trg); + *nentries = trglen; + + if (trglen > 0) + { + entries = (Datum *) palloc(sizeof(Datum) * trglen); + ptr = GETARR(trg); + for (i = 0; i < trglen; i++) + { + int32 item = trgm2int(ptr); + + entries[i] = Int32GetDatum(item); + ptr++; + } + } + + /* + * If no trigram was extracted then we have to scan all the index. + */ + if (trglen == 0) + *searchMode = GIN_SEARCH_MODE_ALL; + + PG_RETURN_POINTER(entries); +} + +Datum +gin_trgm_consistent(PG_FUNCTION_ARGS) +{ + bool *check = (bool *) PG_GETARG_POINTER(0); + StrategyNumber strategy = PG_GETARG_UINT16(1); + + /* text *query = PG_GETARG_TEXT_PP(2); */ + int32 nkeys = PG_GETARG_INT32(3); + Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4); + bool *recheck = (bool *) PG_GETARG_POINTER(5); + bool res; + int32 i, + ntrue; + double nlimit; + + /* All cases served by this function are inexact */ + *recheck = true; + + switch (strategy) + { + case SimilarityStrategyNumber: + case WordSimilarityStrategyNumber: + case StrictWordSimilarityStrategyNumber: + nlimit = index_strategy_get_limit(strategy); + + /* Count the matches */ + ntrue = 0; + for (i = 0; i < nkeys; i++) + { + if (check[i]) + ntrue++; + } + + /*-------------------- + * If DIVUNION is defined then similarity formula is: + * c / (len1 + len2 - c) + * where c is number of common trigrams and it stands as ntrue in + * this code. Here we don't know value of len2 but we can assume + * that c (ntrue) is a lower bound of len2, so upper bound of + * similarity is: + * c / (len1 + c - c) => c / len1 + * If DIVUNION is not defined then similarity formula is: + * c / max(len1, len2) + * And again, c (ntrue) is a lower bound of len2, but c <= len1 + * just by definition and, consequently, upper bound of + * similarity is just c / len1. + * So, independently on DIVUNION the upper bound formula is the same. + */ + res = (nkeys == 0) ? false : + (((((float4) ntrue) / ((float4) nkeys))) >= nlimit); + break; + case ILikeStrategyNumber: +#ifndef IGNORECASE + elog(ERROR, "cannot handle ~~* with case-sensitive trigrams"); +#endif + /* FALL THRU */ + case LikeStrategyNumber: + case EqualStrategyNumber: + /* Check if all extracted trigrams are presented. */ + res = true; + for (i = 0; i < nkeys; i++) + { + if (!check[i]) + { + res = false; + break; + } + } + break; + case RegExpICaseStrategyNumber: +#ifndef IGNORECASE + elog(ERROR, "cannot handle ~* with case-sensitive trigrams"); +#endif + /* FALL THRU */ + case RegExpStrategyNumber: + if (nkeys < 1) + { + /* Regex processing gave no result: do full index scan */ + res = true; + } + else + res = trigramsMatchGraph((TrgmPackedGraph *) extra_data[0], + check); + break; + default: + elog(ERROR, "unrecognized strategy number: %d", strategy); + res = false; /* keep compiler quiet */ + break; + } + + PG_RETURN_BOOL(res); +} + +/* + * In all cases, GIN_TRUE is at least as favorable to inclusion as + * GIN_MAYBE. If no better option is available, simply treat + * GIN_MAYBE as if it were GIN_TRUE and apply the same test as the binary + * consistent function. + */ +Datum +gin_trgm_triconsistent(PG_FUNCTION_ARGS) +{ + GinTernaryValue *check = (GinTernaryValue *) PG_GETARG_POINTER(0); + StrategyNumber strategy = PG_GETARG_UINT16(1); + + /* text *query = PG_GETARG_TEXT_PP(2); */ + int32 nkeys = PG_GETARG_INT32(3); + Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4); + GinTernaryValue res = GIN_MAYBE; + int32 i, + ntrue; + bool *boolcheck; + double nlimit; + + switch (strategy) + { + case SimilarityStrategyNumber: + case WordSimilarityStrategyNumber: + case StrictWordSimilarityStrategyNumber: + nlimit = index_strategy_get_limit(strategy); + + /* Count the matches */ + ntrue = 0; + for (i = 0; i < nkeys; i++) + { + if (check[i] != GIN_FALSE) + ntrue++; + } + + /* + * See comment in gin_trgm_consistent() about * upper bound + * formula + */ + res = (nkeys == 0) + ? GIN_FALSE : (((((float4) ntrue) / ((float4) nkeys)) >= nlimit) + ? GIN_MAYBE : GIN_FALSE); + break; + case ILikeStrategyNumber: +#ifndef IGNORECASE + elog(ERROR, "cannot handle ~~* with case-sensitive trigrams"); +#endif + /* FALL THRU */ + case LikeStrategyNumber: + case EqualStrategyNumber: + /* Check if all extracted trigrams are presented. */ + res = GIN_MAYBE; + for (i = 0; i < nkeys; i++) + { + if (check[i] == GIN_FALSE) + { + res = GIN_FALSE; + break; + } + } + break; + case RegExpICaseStrategyNumber: +#ifndef IGNORECASE + elog(ERROR, "cannot handle ~* with case-sensitive trigrams"); +#endif + /* FALL THRU */ + case RegExpStrategyNumber: + if (nkeys < 1) + { + /* Regex processing gave no result: do full index scan */ + res = GIN_MAYBE; + } + else + { + /* + * As trigramsMatchGraph implements a monotonic boolean + * function, promoting all GIN_MAYBE keys to GIN_TRUE will + * give a conservative result. + */ + boolcheck = (bool *) palloc(sizeof(bool) * nkeys); + for (i = 0; i < nkeys; i++) + boolcheck[i] = (check[i] != GIN_FALSE); + if (!trigramsMatchGraph((TrgmPackedGraph *) extra_data[0], + boolcheck)) + res = GIN_FALSE; + pfree(boolcheck); + } + break; + default: + elog(ERROR, "unrecognized strategy number: %d", strategy); + res = GIN_FALSE; /* keep compiler quiet */ + break; + } + + /* All cases served by this function are inexact */ + Assert(res != GIN_TRUE); + PG_RETURN_GIN_TERNARY_VALUE(res); +} |