summaryrefslogtreecommitdiffstats
path: root/src/backend/tsearch/wparser.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/tsearch/wparser.c')
-rw-r--r--src/backend/tsearch/wparser.c549
1 files changed, 549 insertions, 0 deletions
diff --git a/src/backend/tsearch/wparser.c b/src/backend/tsearch/wparser.c
new file mode 100644
index 0000000..71882dc
--- /dev/null
+++ b/src/backend/tsearch/wparser.c
@@ -0,0 +1,549 @@
+/*-------------------------------------------------------------------------
+ *
+ * wparser.c
+ * Standard interface to word parser
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/tsearch/wparser.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "catalog/namespace.h"
+#include "catalog/pg_type.h"
+#include "commands/defrem.h"
+#include "common/jsonapi.h"
+#include "funcapi.h"
+#include "tsearch/ts_cache.h"
+#include "tsearch/ts_utils.h"
+#include "utils/builtins.h"
+#include "utils/jsonfuncs.h"
+#include "utils/varlena.h"
+
+/******sql-level interface******/
+
+typedef struct
+{
+ int cur;
+ LexDescr *list;
+} TSTokenTypeStorage;
+
+/* state for ts_headline_json_* */
+typedef struct HeadlineJsonState
+{
+ HeadlineParsedText *prs;
+ TSConfigCacheEntry *cfg;
+ TSParserCacheEntry *prsobj;
+ TSQuery query;
+ List *prsoptions;
+ bool transformed;
+} HeadlineJsonState;
+
+static text *headline_json_value(void *_state, char *elem_value, int elem_len);
+
+static void
+tt_setup_firstcall(FuncCallContext *funcctx, Oid prsid)
+{
+ TupleDesc tupdesc;
+ MemoryContext oldcontext;
+ TSTokenTypeStorage *st;
+ TSParserCacheEntry *prs = lookup_ts_parser_cache(prsid);
+
+ if (!OidIsValid(prs->lextypeOid))
+ elog(ERROR, "method lextype isn't defined for text search parser %u",
+ prsid);
+
+ oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+
+ st = (TSTokenTypeStorage *) palloc(sizeof(TSTokenTypeStorage));
+ st->cur = 0;
+ /* lextype takes one dummy argument */
+ st->list = (LexDescr *) DatumGetPointer(OidFunctionCall1(prs->lextypeOid,
+ (Datum) 0));
+ funcctx->user_fctx = (void *) st;
+
+ tupdesc = CreateTemplateTupleDesc(3);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 1, "tokid",
+ INT4OID, -1, 0);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 2, "alias",
+ TEXTOID, -1, 0);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 3, "description",
+ TEXTOID, -1, 0);
+
+ funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
+ MemoryContextSwitchTo(oldcontext);
+}
+
+static Datum
+tt_process_call(FuncCallContext *funcctx)
+{
+ TSTokenTypeStorage *st;
+
+ st = (TSTokenTypeStorage *) funcctx->user_fctx;
+ if (st->list && st->list[st->cur].lexid)
+ {
+ Datum result;
+ char *values[3];
+ char txtid[16];
+ HeapTuple tuple;
+
+ sprintf(txtid, "%d", st->list[st->cur].lexid);
+ values[0] = txtid;
+ values[1] = st->list[st->cur].alias;
+ values[2] = st->list[st->cur].descr;
+
+ tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
+ result = HeapTupleGetDatum(tuple);
+
+ pfree(values[1]);
+ pfree(values[2]);
+ st->cur++;
+ return result;
+ }
+ return (Datum) 0;
+}
+
+Datum
+ts_token_type_byid(PG_FUNCTION_ARGS)
+{
+ FuncCallContext *funcctx;
+ Datum result;
+
+ if (SRF_IS_FIRSTCALL())
+ {
+ funcctx = SRF_FIRSTCALL_INIT();
+ tt_setup_firstcall(funcctx, PG_GETARG_OID(0));
+ }
+
+ funcctx = SRF_PERCALL_SETUP();
+
+ if ((result = tt_process_call(funcctx)) != (Datum) 0)
+ SRF_RETURN_NEXT(funcctx, result);
+ SRF_RETURN_DONE(funcctx);
+}
+
+Datum
+ts_token_type_byname(PG_FUNCTION_ARGS)
+{
+ FuncCallContext *funcctx;
+ Datum result;
+
+ if (SRF_IS_FIRSTCALL())
+ {
+ text *prsname = PG_GETARG_TEXT_PP(0);
+ Oid prsId;
+
+ funcctx = SRF_FIRSTCALL_INIT();
+ prsId = get_ts_parser_oid(textToQualifiedNameList(prsname), false);
+ tt_setup_firstcall(funcctx, prsId);
+ }
+
+ funcctx = SRF_PERCALL_SETUP();
+
+ if ((result = tt_process_call(funcctx)) != (Datum) 0)
+ SRF_RETURN_NEXT(funcctx, result);
+ SRF_RETURN_DONE(funcctx);
+}
+
+typedef struct
+{
+ int type;
+ char *lexeme;
+} LexemeEntry;
+
+typedef struct
+{
+ int cur;
+ int len;
+ LexemeEntry *list;
+} PrsStorage;
+
+
+static void
+prs_setup_firstcall(FuncCallContext *funcctx, Oid prsid, text *txt)
+{
+ TupleDesc tupdesc;
+ MemoryContext oldcontext;
+ PrsStorage *st;
+ TSParserCacheEntry *prs = lookup_ts_parser_cache(prsid);
+ char *lex = NULL;
+ int llen = 0,
+ type = 0;
+ void *prsdata;
+
+ oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+
+ st = (PrsStorage *) palloc(sizeof(PrsStorage));
+ st->cur = 0;
+ st->len = 16;
+ st->list = (LexemeEntry *) palloc(sizeof(LexemeEntry) * st->len);
+
+ prsdata = (void *) DatumGetPointer(FunctionCall2(&prs->prsstart,
+ PointerGetDatum(VARDATA_ANY(txt)),
+ Int32GetDatum(VARSIZE_ANY_EXHDR(txt))));
+
+ while ((type = DatumGetInt32(FunctionCall3(&prs->prstoken,
+ PointerGetDatum(prsdata),
+ PointerGetDatum(&lex),
+ PointerGetDatum(&llen)))) != 0)
+ {
+ if (st->cur >= st->len)
+ {
+ st->len = 2 * st->len;
+ st->list = (LexemeEntry *) repalloc(st->list, sizeof(LexemeEntry) * st->len);
+ }
+ st->list[st->cur].lexeme = palloc(llen + 1);
+ memcpy(st->list[st->cur].lexeme, lex, llen);
+ st->list[st->cur].lexeme[llen] = '\0';
+ st->list[st->cur].type = type;
+ st->cur++;
+ }
+
+ FunctionCall1(&prs->prsend, PointerGetDatum(prsdata));
+
+ st->len = st->cur;
+ st->cur = 0;
+
+ funcctx->user_fctx = (void *) st;
+ tupdesc = CreateTemplateTupleDesc(2);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 1, "tokid",
+ INT4OID, -1, 0);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 2, "token",
+ TEXTOID, -1, 0);
+
+ funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
+ MemoryContextSwitchTo(oldcontext);
+}
+
+static Datum
+prs_process_call(FuncCallContext *funcctx)
+{
+ PrsStorage *st;
+
+ st = (PrsStorage *) funcctx->user_fctx;
+ if (st->cur < st->len)
+ {
+ Datum result;
+ char *values[2];
+ char tid[16];
+ HeapTuple tuple;
+
+ values[0] = tid;
+ sprintf(tid, "%d", st->list[st->cur].type);
+ values[1] = st->list[st->cur].lexeme;
+ tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
+ result = HeapTupleGetDatum(tuple);
+
+ pfree(values[1]);
+ st->cur++;
+ return result;
+ }
+ return (Datum) 0;
+}
+
+Datum
+ts_parse_byid(PG_FUNCTION_ARGS)
+{
+ FuncCallContext *funcctx;
+ Datum result;
+
+ if (SRF_IS_FIRSTCALL())
+ {
+ text *txt = PG_GETARG_TEXT_PP(1);
+
+ funcctx = SRF_FIRSTCALL_INIT();
+ prs_setup_firstcall(funcctx, PG_GETARG_OID(0), txt);
+ PG_FREE_IF_COPY(txt, 1);
+ }
+
+ funcctx = SRF_PERCALL_SETUP();
+
+ if ((result = prs_process_call(funcctx)) != (Datum) 0)
+ SRF_RETURN_NEXT(funcctx, result);
+ SRF_RETURN_DONE(funcctx);
+}
+
+Datum
+ts_parse_byname(PG_FUNCTION_ARGS)
+{
+ FuncCallContext *funcctx;
+ Datum result;
+
+ if (SRF_IS_FIRSTCALL())
+ {
+ text *prsname = PG_GETARG_TEXT_PP(0);
+ text *txt = PG_GETARG_TEXT_PP(1);
+ Oid prsId;
+
+ funcctx = SRF_FIRSTCALL_INIT();
+ prsId = get_ts_parser_oid(textToQualifiedNameList(prsname), false);
+ prs_setup_firstcall(funcctx, prsId, txt);
+ }
+
+ funcctx = SRF_PERCALL_SETUP();
+
+ if ((result = prs_process_call(funcctx)) != (Datum) 0)
+ SRF_RETURN_NEXT(funcctx, result);
+ SRF_RETURN_DONE(funcctx);
+}
+
+Datum
+ts_headline_byid_opt(PG_FUNCTION_ARGS)
+{
+ Oid tsconfig = PG_GETARG_OID(0);
+ text *in = PG_GETARG_TEXT_PP(1);
+ TSQuery query = PG_GETARG_TSQUERY(2);
+ text *opt = (PG_NARGS() > 3 && PG_GETARG_POINTER(3)) ? PG_GETARG_TEXT_PP(3) : NULL;
+ HeadlineParsedText prs;
+ List *prsoptions;
+ text *out;
+ TSConfigCacheEntry *cfg;
+ TSParserCacheEntry *prsobj;
+
+ cfg = lookup_ts_config_cache(tsconfig);
+ prsobj = lookup_ts_parser_cache(cfg->prsId);
+
+ if (!OidIsValid(prsobj->headlineOid))
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("text search parser does not support headline creation")));
+
+ memset(&prs, 0, sizeof(HeadlineParsedText));
+ prs.lenwords = 32;
+ prs.words = (HeadlineWordEntry *) palloc(sizeof(HeadlineWordEntry) * prs.lenwords);
+
+ hlparsetext(cfg->cfgId, &prs, query,
+ VARDATA_ANY(in), VARSIZE_ANY_EXHDR(in));
+
+ if (opt)
+ prsoptions = deserialize_deflist(PointerGetDatum(opt));
+ else
+ prsoptions = NIL;
+
+ FunctionCall3(&(prsobj->prsheadline),
+ PointerGetDatum(&prs),
+ PointerGetDatum(prsoptions),
+ PointerGetDatum(query));
+
+ out = generateHeadline(&prs);
+
+ PG_FREE_IF_COPY(in, 1);
+ PG_FREE_IF_COPY(query, 2);
+ if (opt)
+ PG_FREE_IF_COPY(opt, 3);
+ pfree(prs.words);
+ pfree(prs.startsel);
+ pfree(prs.stopsel);
+
+ PG_RETURN_POINTER(out);
+}
+
+Datum
+ts_headline_byid(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_DATUM(DirectFunctionCall3(ts_headline_byid_opt,
+ PG_GETARG_DATUM(0),
+ PG_GETARG_DATUM(1),
+ PG_GETARG_DATUM(2)));
+}
+
+Datum
+ts_headline(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_DATUM(DirectFunctionCall3(ts_headline_byid_opt,
+ ObjectIdGetDatum(getTSCurrentConfig(true)),
+ PG_GETARG_DATUM(0),
+ PG_GETARG_DATUM(1)));
+}
+
+Datum
+ts_headline_opt(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_DATUM(DirectFunctionCall4(ts_headline_byid_opt,
+ ObjectIdGetDatum(getTSCurrentConfig(true)),
+ PG_GETARG_DATUM(0),
+ PG_GETARG_DATUM(1),
+ PG_GETARG_DATUM(2)));
+}
+
+Datum
+ts_headline_jsonb_byid_opt(PG_FUNCTION_ARGS)
+{
+ Oid tsconfig = PG_GETARG_OID(0);
+ Jsonb *jb = PG_GETARG_JSONB_P(1);
+ TSQuery query = PG_GETARG_TSQUERY(2);
+ text *opt = (PG_NARGS() > 3 && PG_GETARG_POINTER(3)) ? PG_GETARG_TEXT_P(3) : NULL;
+ Jsonb *out;
+ JsonTransformStringValuesAction action = (JsonTransformStringValuesAction) headline_json_value;
+ HeadlineParsedText prs;
+ HeadlineJsonState *state = palloc0(sizeof(HeadlineJsonState));
+
+ memset(&prs, 0, sizeof(HeadlineParsedText));
+ prs.lenwords = 32;
+ prs.words = (HeadlineWordEntry *) palloc(sizeof(HeadlineWordEntry) * prs.lenwords);
+
+ state->prs = &prs;
+ state->cfg = lookup_ts_config_cache(tsconfig);
+ state->prsobj = lookup_ts_parser_cache(state->cfg->prsId);
+ state->query = query;
+ if (opt)
+ state->prsoptions = deserialize_deflist(PointerGetDatum(opt));
+ else
+ state->prsoptions = NIL;
+
+ if (!OidIsValid(state->prsobj->headlineOid))
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("text search parser does not support headline creation")));
+
+ out = transform_jsonb_string_values(jb, state, action);
+
+ PG_FREE_IF_COPY(jb, 1);
+ PG_FREE_IF_COPY(query, 2);
+ if (opt)
+ PG_FREE_IF_COPY(opt, 3);
+
+ pfree(prs.words);
+
+ if (state->transformed)
+ {
+ pfree(prs.startsel);
+ pfree(prs.stopsel);
+ }
+
+ PG_RETURN_JSONB_P(out);
+}
+
+Datum
+ts_headline_jsonb(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_DATUM(DirectFunctionCall3(ts_headline_jsonb_byid_opt,
+ ObjectIdGetDatum(getTSCurrentConfig(true)),
+ PG_GETARG_DATUM(0),
+ PG_GETARG_DATUM(1)));
+}
+
+Datum
+ts_headline_jsonb_byid(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_DATUM(DirectFunctionCall3(ts_headline_jsonb_byid_opt,
+ PG_GETARG_DATUM(0),
+ PG_GETARG_DATUM(1),
+ PG_GETARG_DATUM(2)));
+}
+
+Datum
+ts_headline_jsonb_opt(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_DATUM(DirectFunctionCall4(ts_headline_jsonb_byid_opt,
+ ObjectIdGetDatum(getTSCurrentConfig(true)),
+ PG_GETARG_DATUM(0),
+ PG_GETARG_DATUM(1),
+ PG_GETARG_DATUM(2)));
+}
+
+Datum
+ts_headline_json_byid_opt(PG_FUNCTION_ARGS)
+{
+ Oid tsconfig = PG_GETARG_OID(0);
+ text *json = PG_GETARG_TEXT_P(1);
+ TSQuery query = PG_GETARG_TSQUERY(2);
+ text *opt = (PG_NARGS() > 3 && PG_GETARG_POINTER(3)) ? PG_GETARG_TEXT_P(3) : NULL;
+ text *out;
+ JsonTransformStringValuesAction action = (JsonTransformStringValuesAction) headline_json_value;
+
+ HeadlineParsedText prs;
+ HeadlineJsonState *state = palloc0(sizeof(HeadlineJsonState));
+
+ memset(&prs, 0, sizeof(HeadlineParsedText));
+ prs.lenwords = 32;
+ prs.words = (HeadlineWordEntry *) palloc(sizeof(HeadlineWordEntry) * prs.lenwords);
+
+ state->prs = &prs;
+ state->cfg = lookup_ts_config_cache(tsconfig);
+ state->prsobj = lookup_ts_parser_cache(state->cfg->prsId);
+ state->query = query;
+ if (opt)
+ state->prsoptions = deserialize_deflist(PointerGetDatum(opt));
+ else
+ state->prsoptions = NIL;
+
+ if (!OidIsValid(state->prsobj->headlineOid))
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("text search parser does not support headline creation")));
+
+ out = transform_json_string_values(json, state, action);
+
+ PG_FREE_IF_COPY(json, 1);
+ PG_FREE_IF_COPY(query, 2);
+ if (opt)
+ PG_FREE_IF_COPY(opt, 3);
+ pfree(prs.words);
+
+ if (state->transformed)
+ {
+ pfree(prs.startsel);
+ pfree(prs.stopsel);
+ }
+
+ PG_RETURN_TEXT_P(out);
+}
+
+Datum
+ts_headline_json(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_DATUM(DirectFunctionCall3(ts_headline_json_byid_opt,
+ ObjectIdGetDatum(getTSCurrentConfig(true)),
+ PG_GETARG_DATUM(0),
+ PG_GETARG_DATUM(1)));
+}
+
+Datum
+ts_headline_json_byid(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_DATUM(DirectFunctionCall3(ts_headline_json_byid_opt,
+ PG_GETARG_DATUM(0),
+ PG_GETARG_DATUM(1),
+ PG_GETARG_DATUM(2)));
+}
+
+Datum
+ts_headline_json_opt(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_DATUM(DirectFunctionCall4(ts_headline_json_byid_opt,
+ ObjectIdGetDatum(getTSCurrentConfig(true)),
+ PG_GETARG_DATUM(0),
+ PG_GETARG_DATUM(1),
+ PG_GETARG_DATUM(2)));
+}
+
+
+/*
+ * Return headline in text from, generated from a json(b) element
+ */
+static text *
+headline_json_value(void *_state, char *elem_value, int elem_len)
+{
+ HeadlineJsonState *state = (HeadlineJsonState *) _state;
+
+ HeadlineParsedText *prs = state->prs;
+ TSConfigCacheEntry *cfg = state->cfg;
+ TSParserCacheEntry *prsobj = state->prsobj;
+ TSQuery query = state->query;
+ List *prsoptions = state->prsoptions;
+
+ prs->curwords = 0;
+ hlparsetext(cfg->cfgId, prs, query, elem_value, elem_len);
+ FunctionCall3(&(prsobj->prsheadline),
+ PointerGetDatum(prs),
+ PointerGetDatum(prsoptions),
+ PointerGetDatum(query));
+
+ state->transformed = true;
+ return generateHeadline(prs);
+}