summaryrefslogtreecommitdiffstats
path: root/src/backend/utils/adt/tsquery.c
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-04 12:17:33 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-04 12:17:33 +0000
commit5e45211a64149b3c659b90ff2de6fa982a5a93ed (patch)
tree739caf8c461053357daa9f162bef34516c7bf452 /src/backend/utils/adt/tsquery.c
parentInitial commit. (diff)
downloadpostgresql-15-5e45211a64149b3c659b90ff2de6fa982a5a93ed.tar.xz
postgresql-15-5e45211a64149b3c659b90ff2de6fa982a5a93ed.zip
Adding upstream version 15.5.upstream/15.5
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/backend/utils/adt/tsquery.c')
-rw-r--r--src/backend/utils/adt/tsquery.c1349
1 files changed, 1349 insertions, 0 deletions
diff --git a/src/backend/utils/adt/tsquery.c b/src/backend/utils/adt/tsquery.c
new file mode 100644
index 0000000..f54f298
--- /dev/null
+++ b/src/backend/utils/adt/tsquery.c
@@ -0,0 +1,1349 @@
+/*-------------------------------------------------------------------------
+ *
+ * tsquery.c
+ * I/O functions for tsquery
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/utils/adt/tsquery.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "libpq/pqformat.h"
+#include "miscadmin.h"
+#include "tsearch/ts_locale.h"
+#include "tsearch/ts_type.h"
+#include "tsearch/ts_utils.h"
+#include "utils/builtins.h"
+#include "utils/memutils.h"
+#include "utils/pg_crc.h"
+
+/* FTS operator priorities, see ts_type.h */
+const int tsearch_op_priority[OP_COUNT] =
+{
+ 4, /* OP_NOT */
+ 2, /* OP_AND */
+ 1, /* OP_OR */
+ 3 /* OP_PHRASE */
+};
+
+/*
+ * parser's states
+ */
+typedef enum
+{
+ WAITOPERAND = 1,
+ WAITOPERATOR = 2,
+ WAITFIRSTOPERAND = 3
+} ts_parserstate;
+
+/*
+ * token types for parsing
+ */
+typedef enum
+{
+ PT_END = 0,
+ PT_ERR = 1,
+ PT_VAL = 2,
+ PT_OPR = 3,
+ PT_OPEN = 4,
+ PT_CLOSE = 5
+} ts_tokentype;
+
+/*
+ * get token from query string
+ *
+ * *operator is filled in with OP_* when return values is PT_OPR,
+ * but *weight could contain a distance value in case of phrase operator.
+ * *strval, *lenval and *weight are filled in when return value is PT_VAL
+ *
+ */
+typedef ts_tokentype (*ts_tokenizer) (TSQueryParserState state, int8 *operator,
+ int *lenval, char **strval,
+ int16 *weight, bool *prefix);
+
+struct TSQueryParserStateData
+{
+ /* Tokenizer used for parsing tsquery */
+ ts_tokenizer gettoken;
+
+ /* State of tokenizer function */
+ char *buffer; /* entire string we are scanning */
+ char *buf; /* current scan point */
+ int count; /* nesting count, incremented by (,
+ * decremented by ) */
+ ts_parserstate state;
+
+ /* polish (prefix) notation in list, filled in by push* functions */
+ List *polstr;
+
+ /*
+ * Strings from operands are collected in op. curop is a pointer to the
+ * end of used space of op.
+ */
+ char *op;
+ char *curop;
+ int lenop; /* allocated size of op */
+ int sumlen; /* used size of op */
+
+ /* state for value's parser */
+ TSVectorParseState valstate;
+};
+
+/*
+ * subroutine to parse the modifiers (weight and prefix flag currently)
+ * part, like ':AB*' of a query.
+ */
+static char *
+get_modifiers(char *buf, int16 *weight, bool *prefix)
+{
+ *weight = 0;
+ *prefix = false;
+
+ if (!t_iseq(buf, ':'))
+ return buf;
+
+ buf++;
+ while (*buf && pg_mblen(buf) == 1)
+ {
+ switch (*buf)
+ {
+ case 'a':
+ case 'A':
+ *weight |= 1 << 3;
+ break;
+ case 'b':
+ case 'B':
+ *weight |= 1 << 2;
+ break;
+ case 'c':
+ case 'C':
+ *weight |= 1 << 1;
+ break;
+ case 'd':
+ case 'D':
+ *weight |= 1;
+ break;
+ case '*':
+ *prefix = true;
+ break;
+ default:
+ return buf;
+ }
+ buf++;
+ }
+
+ return buf;
+}
+
+/*
+ * Parse phrase operator. The operator
+ * may take the following forms:
+ *
+ * a <N> b (distance is exactly N lexemes)
+ * a <-> b (default distance = 1)
+ *
+ * The buffer should begin with '<' char
+ */
+static bool
+parse_phrase_operator(TSQueryParserState pstate, int16 *distance)
+{
+ enum
+ {
+ PHRASE_OPEN = 0,
+ PHRASE_DIST,
+ PHRASE_CLOSE,
+ PHRASE_FINISH
+ } state = PHRASE_OPEN;
+ char *ptr = pstate->buf;
+ char *endptr;
+ long l = 1; /* default distance */
+
+ while (*ptr)
+ {
+ switch (state)
+ {
+ case PHRASE_OPEN:
+ if (t_iseq(ptr, '<'))
+ {
+ state = PHRASE_DIST;
+ ptr++;
+ }
+ else
+ return false;
+ break;
+
+ case PHRASE_DIST:
+ if (t_iseq(ptr, '-'))
+ {
+ state = PHRASE_CLOSE;
+ ptr++;
+ continue;
+ }
+
+ if (!t_isdigit(ptr))
+ return false;
+
+ errno = 0;
+ l = strtol(ptr, &endptr, 10);
+ if (ptr == endptr)
+ return false;
+ else if (errno == ERANGE || l < 0 || l > MAXENTRYPOS)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("distance in phrase operator must be an integer value between zero and %d inclusive",
+ MAXENTRYPOS)));
+ else
+ {
+ state = PHRASE_CLOSE;
+ ptr = endptr;
+ }
+ break;
+
+ case PHRASE_CLOSE:
+ if (t_iseq(ptr, '>'))
+ {
+ state = PHRASE_FINISH;
+ ptr++;
+ }
+ else
+ return false;
+ break;
+
+ case PHRASE_FINISH:
+ *distance = (int16) l;
+ pstate->buf = ptr;
+ return true;
+ }
+ }
+
+ return false;
+}
+
+/*
+ * Parse OR operator used in websearch_to_tsquery(), returns true if we
+ * believe that "OR" literal could be an operator OR
+ */
+static bool
+parse_or_operator(TSQueryParserState pstate)
+{
+ char *ptr = pstate->buf;
+
+ /* it should begin with "OR" literal */
+ if (pg_strncasecmp(ptr, "or", 2) != 0)
+ return false;
+
+ ptr += 2;
+
+ /*
+ * it shouldn't be a part of any word but somewhere later it should be
+ * some operand
+ */
+ if (*ptr == '\0') /* no operand */
+ return false;
+
+ /* it shouldn't be a part of any word */
+ if (t_iseq(ptr, '-') || t_iseq(ptr, '_') || t_isalpha(ptr) || t_isdigit(ptr))
+ return false;
+
+ for (;;)
+ {
+ ptr += pg_mblen(ptr);
+
+ if (*ptr == '\0') /* got end of string without operand */
+ return false;
+
+ /*
+ * Suppose, we found an operand, but could be a not correct operand.
+ * So we still treat OR literal as operation with possibly incorrect
+ * operand and will not search it as lexeme
+ */
+ if (!t_isspace(ptr))
+ break;
+ }
+
+ pstate->buf += 2;
+ return true;
+}
+
+static ts_tokentype
+gettoken_query_standard(TSQueryParserState state, int8 *operator,
+ int *lenval, char **strval,
+ int16 *weight, bool *prefix)
+{
+ *weight = 0;
+ *prefix = false;
+
+ while (true)
+ {
+ switch (state->state)
+ {
+ case WAITFIRSTOPERAND:
+ case WAITOPERAND:
+ if (t_iseq(state->buf, '!'))
+ {
+ state->buf++;
+ state->state = WAITOPERAND;
+ *operator = OP_NOT;
+ return PT_OPR;
+ }
+ else if (t_iseq(state->buf, '('))
+ {
+ state->buf++;
+ state->state = WAITOPERAND;
+ state->count++;
+ return PT_OPEN;
+ }
+ else if (t_iseq(state->buf, ':'))
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("syntax error in tsquery: \"%s\"",
+ state->buffer)));
+ }
+ else if (!t_isspace(state->buf))
+ {
+ /*
+ * We rely on the tsvector parser to parse the value for
+ * us
+ */
+ reset_tsvector_parser(state->valstate, state->buf);
+ if (gettoken_tsvector(state->valstate, strval, lenval,
+ NULL, NULL, &state->buf))
+ {
+ state->buf = get_modifiers(state->buf, weight, prefix);
+ state->state = WAITOPERATOR;
+ return PT_VAL;
+ }
+ else if (state->state == WAITFIRSTOPERAND)
+ {
+ return PT_END;
+ }
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("no operand in tsquery: \"%s\"",
+ state->buffer)));
+ }
+ break;
+
+ case WAITOPERATOR:
+ if (t_iseq(state->buf, '&'))
+ {
+ state->buf++;
+ state->state = WAITOPERAND;
+ *operator = OP_AND;
+ return PT_OPR;
+ }
+ else if (t_iseq(state->buf, '|'))
+ {
+ state->buf++;
+ state->state = WAITOPERAND;
+ *operator = OP_OR;
+ return PT_OPR;
+ }
+ else if (parse_phrase_operator(state, weight))
+ {
+ /* weight var is used as storage for distance */
+ state->state = WAITOPERAND;
+ *operator = OP_PHRASE;
+ return PT_OPR;
+ }
+ else if (t_iseq(state->buf, ')'))
+ {
+ state->buf++;
+ state->count--;
+ return (state->count < 0) ? PT_ERR : PT_CLOSE;
+ }
+ else if (*state->buf == '\0')
+ {
+ return (state->count) ? PT_ERR : PT_END;
+ }
+ else if (!t_isspace(state->buf))
+ {
+ return PT_ERR;
+ }
+ break;
+ }
+
+ state->buf += pg_mblen(state->buf);
+ }
+}
+
+static ts_tokentype
+gettoken_query_websearch(TSQueryParserState state, int8 *operator,
+ int *lenval, char **strval,
+ int16 *weight, bool *prefix)
+{
+ *weight = 0;
+ *prefix = false;
+
+ while (true)
+ {
+ switch (state->state)
+ {
+ case WAITFIRSTOPERAND:
+ case WAITOPERAND:
+ if (t_iseq(state->buf, '-'))
+ {
+ state->buf++;
+ state->state = WAITOPERAND;
+
+ *operator = OP_NOT;
+ return PT_OPR;
+ }
+ else if (t_iseq(state->buf, '"'))
+ {
+ /* Everything in quotes is processed as a single token */
+
+ /* skip opening quote */
+ state->buf++;
+ *strval = state->buf;
+
+ /* iterate to the closing quote or end of the string */
+ while (*state->buf != '\0' && !t_iseq(state->buf, '"'))
+ state->buf++;
+ *lenval = state->buf - *strval;
+
+ /* skip closing quote if not end of the string */
+ if (*state->buf != '\0')
+ state->buf++;
+
+ state->state = WAITOPERATOR;
+ state->count++;
+ return PT_VAL;
+ }
+ else if (ISOPERATOR(state->buf))
+ {
+ /* or else gettoken_tsvector() will raise an error */
+ state->buf++;
+ state->state = WAITOPERAND;
+ continue;
+ }
+ else if (!t_isspace(state->buf))
+ {
+ /*
+ * We rely on the tsvector parser to parse the value for
+ * us
+ */
+ reset_tsvector_parser(state->valstate, state->buf);
+ if (gettoken_tsvector(state->valstate, strval, lenval,
+ NULL, NULL, &state->buf))
+ {
+ state->state = WAITOPERATOR;
+ return PT_VAL;
+ }
+ else if (state->state == WAITFIRSTOPERAND)
+ {
+ return PT_END;
+ }
+ else
+ {
+ /* finally, we have to provide an operand */
+ pushStop(state);
+ return PT_END;
+ }
+ }
+ break;
+
+ case WAITOPERATOR:
+ if (t_iseq(state->buf, '"'))
+ {
+ /*
+ * put implicit AND after an operand and handle this quote
+ * in WAITOPERAND
+ */
+ state->state = WAITOPERAND;
+ *operator = OP_AND;
+ return PT_OPR;
+ }
+ else if (parse_or_operator(state))
+ {
+ state->state = WAITOPERAND;
+ *operator = OP_OR;
+ return PT_OPR;
+ }
+ else if (*state->buf == '\0')
+ {
+ return PT_END;
+ }
+ else if (!t_isspace(state->buf))
+ {
+ /* put implicit AND after an operand */
+ *operator = OP_AND;
+ state->state = WAITOPERAND;
+ return PT_OPR;
+ }
+ break;
+ }
+
+ state->buf += pg_mblen(state->buf);
+ }
+}
+
+static ts_tokentype
+gettoken_query_plain(TSQueryParserState state, int8 *operator,
+ int *lenval, char **strval,
+ int16 *weight, bool *prefix)
+{
+ *weight = 0;
+ *prefix = false;
+
+ if (*state->buf == '\0')
+ return PT_END;
+
+ *strval = state->buf;
+ *lenval = strlen(state->buf);
+ state->buf += *lenval;
+ state->count++;
+ return PT_VAL;
+}
+
+/*
+ * Push an operator to state->polstr
+ */
+void
+pushOperator(TSQueryParserState state, int8 oper, int16 distance)
+{
+ QueryOperator *tmp;
+
+ Assert(oper == OP_NOT || oper == OP_AND || oper == OP_OR || oper == OP_PHRASE);
+
+ tmp = (QueryOperator *) palloc0(sizeof(QueryOperator));
+ tmp->type = QI_OPR;
+ tmp->oper = oper;
+ tmp->distance = (oper == OP_PHRASE) ? distance : 0;
+ /* left is filled in later with findoprnd */
+
+ state->polstr = lcons(tmp, state->polstr);
+}
+
+static void
+pushValue_internal(TSQueryParserState state, pg_crc32 valcrc, int distance, int lenval, int weight, bool prefix)
+{
+ QueryOperand *tmp;
+
+ if (distance >= MAXSTRPOS)
+ ereport(ERROR,
+ (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+ errmsg("value is too big in tsquery: \"%s\"",
+ state->buffer)));
+ if (lenval >= MAXSTRLEN)
+ ereport(ERROR,
+ (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+ errmsg("operand is too long in tsquery: \"%s\"",
+ state->buffer)));
+
+ tmp = (QueryOperand *) palloc0(sizeof(QueryOperand));
+ tmp->type = QI_VAL;
+ tmp->weight = weight;
+ tmp->prefix = prefix;
+ tmp->valcrc = (int32) valcrc;
+ tmp->length = lenval;
+ tmp->distance = distance;
+
+ state->polstr = lcons(tmp, state->polstr);
+}
+
+/*
+ * Push an operand to state->polstr.
+ *
+ * strval must point to a string equal to state->curop. lenval is the length
+ * of the string.
+ */
+void
+pushValue(TSQueryParserState state, char *strval, int lenval, int16 weight, bool prefix)
+{
+ pg_crc32 valcrc;
+
+ if (lenval >= MAXSTRLEN)
+ ereport(ERROR,
+ (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+ errmsg("word is too long in tsquery: \"%s\"",
+ state->buffer)));
+
+ INIT_LEGACY_CRC32(valcrc);
+ COMP_LEGACY_CRC32(valcrc, strval, lenval);
+ FIN_LEGACY_CRC32(valcrc);
+ pushValue_internal(state, valcrc, state->curop - state->op, lenval, weight, prefix);
+
+ /* append the value string to state.op, enlarging buffer if needed first */
+ while (state->curop - state->op + lenval + 1 >= state->lenop)
+ {
+ int used = state->curop - state->op;
+
+ state->lenop *= 2;
+ state->op = (char *) repalloc((void *) state->op, state->lenop);
+ state->curop = state->op + used;
+ }
+ memcpy((void *) state->curop, (void *) strval, lenval);
+ state->curop += lenval;
+ *(state->curop) = '\0';
+ state->curop++;
+ state->sumlen += lenval + 1 /* \0 */ ;
+}
+
+
+/*
+ * Push a stopword placeholder to state->polstr
+ */
+void
+pushStop(TSQueryParserState state)
+{
+ QueryOperand *tmp;
+
+ tmp = (QueryOperand *) palloc0(sizeof(QueryOperand));
+ tmp->type = QI_VALSTOP;
+
+ state->polstr = lcons(tmp, state->polstr);
+}
+
+
+#define STACKDEPTH 32
+
+typedef struct OperatorElement
+{
+ int8 op;
+ int16 distance;
+} OperatorElement;
+
+static void
+pushOpStack(OperatorElement *stack, int *lenstack, int8 op, int16 distance)
+{
+ if (*lenstack == STACKDEPTH) /* internal error */
+ elog(ERROR, "tsquery stack too small");
+
+ stack[*lenstack].op = op;
+ stack[*lenstack].distance = distance;
+
+ (*lenstack)++;
+}
+
+static void
+cleanOpStack(TSQueryParserState state,
+ OperatorElement *stack, int *lenstack, int8 op)
+{
+ int opPriority = OP_PRIORITY(op);
+
+ while (*lenstack)
+ {
+ /* NOT is right associative unlike to others */
+ if ((op != OP_NOT && opPriority > OP_PRIORITY(stack[*lenstack - 1].op)) ||
+ (op == OP_NOT && opPriority >= OP_PRIORITY(stack[*lenstack - 1].op)))
+ break;
+
+ (*lenstack)--;
+ pushOperator(state, stack[*lenstack].op,
+ stack[*lenstack].distance);
+ }
+}
+
+/*
+ * Make polish (prefix) notation of query.
+ *
+ * See parse_tsquery for explanation of pushval.
+ */
+static void
+makepol(TSQueryParserState state,
+ PushFunction pushval,
+ Datum opaque)
+{
+ int8 operator = 0;
+ ts_tokentype type;
+ int lenval = 0;
+ char *strval = NULL;
+ OperatorElement opstack[STACKDEPTH];
+ int lenstack = 0;
+ int16 weight = 0;
+ bool prefix;
+
+ /* since this function recurses, it could be driven to stack overflow */
+ check_stack_depth();
+
+ while ((type = state->gettoken(state, &operator,
+ &lenval, &strval,
+ &weight, &prefix)) != PT_END)
+ {
+ switch (type)
+ {
+ case PT_VAL:
+ pushval(opaque, state, strval, lenval, weight, prefix);
+ break;
+ case PT_OPR:
+ cleanOpStack(state, opstack, &lenstack, operator);
+ pushOpStack(opstack, &lenstack, operator, weight);
+ break;
+ case PT_OPEN:
+ makepol(state, pushval, opaque);
+ break;
+ case PT_CLOSE:
+ cleanOpStack(state, opstack, &lenstack, OP_OR /* lowest */ );
+ return;
+ case PT_ERR:
+ default:
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("syntax error in tsquery: \"%s\"",
+ state->buffer)));
+ }
+ }
+
+ cleanOpStack(state, opstack, &lenstack, OP_OR /* lowest */ );
+}
+
+static void
+findoprnd_recurse(QueryItem *ptr, uint32 *pos, int nnodes, bool *needcleanup)
+{
+ /* since this function recurses, it could be driven to stack overflow. */
+ check_stack_depth();
+
+ if (*pos >= nnodes)
+ elog(ERROR, "malformed tsquery: operand not found");
+
+ if (ptr[*pos].type == QI_VAL)
+ {
+ (*pos)++;
+ }
+ else if (ptr[*pos].type == QI_VALSTOP)
+ {
+ *needcleanup = true; /* we'll have to remove stop words */
+ (*pos)++;
+ }
+ else
+ {
+ Assert(ptr[*pos].type == QI_OPR);
+
+ if (ptr[*pos].qoperator.oper == OP_NOT)
+ {
+ ptr[*pos].qoperator.left = 1; /* fixed offset */
+ (*pos)++;
+
+ /* process the only argument */
+ findoprnd_recurse(ptr, pos, nnodes, needcleanup);
+ }
+ else
+ {
+ QueryOperator *curitem = &ptr[*pos].qoperator;
+ int tmp = *pos; /* save current position */
+
+ Assert(curitem->oper == OP_AND ||
+ curitem->oper == OP_OR ||
+ curitem->oper == OP_PHRASE);
+
+ (*pos)++;
+
+ /* process RIGHT argument */
+ findoprnd_recurse(ptr, pos, nnodes, needcleanup);
+
+ curitem->left = *pos - tmp; /* set LEFT arg's offset */
+
+ /* process LEFT argument */
+ findoprnd_recurse(ptr, pos, nnodes, needcleanup);
+ }
+ }
+}
+
+
+/*
+ * Fill in the left-fields previously left unfilled.
+ * The input QueryItems must be in polish (prefix) notation.
+ * Also, set *needcleanup to true if there are any QI_VALSTOP nodes.
+ */
+static void
+findoprnd(QueryItem *ptr, int size, bool *needcleanup)
+{
+ uint32 pos;
+
+ *needcleanup = false;
+ pos = 0;
+ findoprnd_recurse(ptr, &pos, size, needcleanup);
+
+ if (pos != size)
+ elog(ERROR, "malformed tsquery: extra nodes");
+}
+
+
+/*
+ * Each value (operand) in the query is passed to pushval. pushval can
+ * transform the simple value to an arbitrarily complex expression using
+ * pushValue and pushOperator. It must push a single value with pushValue,
+ * a complete expression with all operands, or a stopword placeholder
+ * with pushStop, otherwise the prefix notation representation will be broken,
+ * having an operator with no operand.
+ *
+ * opaque is passed on to pushval as is, pushval can use it to store its
+ * private state.
+ */
+TSQuery
+parse_tsquery(char *buf,
+ PushFunction pushval,
+ Datum opaque,
+ int flags)
+{
+ struct TSQueryParserStateData state;
+ int i;
+ TSQuery query;
+ int commonlen;
+ QueryItem *ptr;
+ ListCell *cell;
+ bool needcleanup;
+ int tsv_flags = P_TSV_OPR_IS_DELIM | P_TSV_IS_TSQUERY;
+
+ /* plain should not be used with web */
+ Assert((flags & (P_TSQ_PLAIN | P_TSQ_WEB)) != (P_TSQ_PLAIN | P_TSQ_WEB));
+
+ /* select suitable tokenizer */
+ if (flags & P_TSQ_PLAIN)
+ state.gettoken = gettoken_query_plain;
+ else if (flags & P_TSQ_WEB)
+ {
+ state.gettoken = gettoken_query_websearch;
+ tsv_flags |= P_TSV_IS_WEB;
+ }
+ else
+ state.gettoken = gettoken_query_standard;
+
+ /* init state */
+ state.buffer = buf;
+ state.buf = buf;
+ state.count = 0;
+ state.state = WAITFIRSTOPERAND;
+ state.polstr = NIL;
+
+ /* init value parser's state */
+ state.valstate = init_tsvector_parser(state.buffer, tsv_flags);
+
+ /* init list of operand */
+ state.sumlen = 0;
+ state.lenop = 64;
+ state.curop = state.op = (char *) palloc(state.lenop);
+ *(state.curop) = '\0';
+
+ /* parse query & make polish notation (postfix, but in reverse order) */
+ makepol(&state, pushval, opaque);
+
+ close_tsvector_parser(state.valstate);
+
+ if (list_length(state.polstr) == 0)
+ {
+ ereport(NOTICE,
+ (errmsg("text-search query doesn't contain lexemes: \"%s\"",
+ state.buffer)));
+ query = (TSQuery) palloc(HDRSIZETQ);
+ SET_VARSIZE(query, HDRSIZETQ);
+ query->size = 0;
+ return query;
+ }
+
+ if (TSQUERY_TOO_BIG(list_length(state.polstr), state.sumlen))
+ ereport(ERROR,
+ (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+ errmsg("tsquery is too large")));
+ commonlen = COMPUTESIZE(list_length(state.polstr), state.sumlen);
+
+ /* Pack the QueryItems in the final TSQuery struct to return to caller */
+ query = (TSQuery) palloc0(commonlen);
+ SET_VARSIZE(query, commonlen);
+ query->size = list_length(state.polstr);
+ ptr = GETQUERY(query);
+
+ /* Copy QueryItems to TSQuery */
+ i = 0;
+ foreach(cell, state.polstr)
+ {
+ QueryItem *item = (QueryItem *) lfirst(cell);
+
+ switch (item->type)
+ {
+ case QI_VAL:
+ memcpy(&ptr[i], item, sizeof(QueryOperand));
+ break;
+ case QI_VALSTOP:
+ ptr[i].type = QI_VALSTOP;
+ break;
+ case QI_OPR:
+ memcpy(&ptr[i], item, sizeof(QueryOperator));
+ break;
+ default:
+ elog(ERROR, "unrecognized QueryItem type: %d", item->type);
+ }
+ i++;
+ }
+
+ /* Copy all the operand strings to TSQuery */
+ memcpy((void *) GETOPERAND(query), (void *) state.op, state.sumlen);
+ pfree(state.op);
+
+ /*
+ * Set left operand pointers for every operator. While we're at it,
+ * detect whether there are any QI_VALSTOP nodes.
+ */
+ findoprnd(ptr, query->size, &needcleanup);
+
+ /*
+ * If there are QI_VALSTOP nodes, delete them and simplify the tree.
+ */
+ if (needcleanup)
+ query = cleanup_tsquery_stopwords(query);
+
+ return query;
+}
+
+static void
+pushval_asis(Datum opaque, TSQueryParserState state, char *strval, int lenval,
+ int16 weight, bool prefix)
+{
+ pushValue(state, strval, lenval, weight, prefix);
+}
+
+/*
+ * in without morphology
+ */
+Datum
+tsqueryin(PG_FUNCTION_ARGS)
+{
+ char *in = PG_GETARG_CSTRING(0);
+
+ PG_RETURN_TSQUERY(parse_tsquery(in, pushval_asis, PointerGetDatum(NULL), 0));
+}
+
+/*
+ * out function
+ */
+typedef struct
+{
+ QueryItem *curpol;
+ char *buf;
+ char *cur;
+ char *op;
+ int buflen;
+} INFIX;
+
+/* Makes sure inf->buf is large enough for adding 'addsize' bytes */
+#define RESIZEBUF(inf, addsize) \
+while( ( (inf)->cur - (inf)->buf ) + (addsize) + 1 >= (inf)->buflen ) \
+{ \
+ int len = (inf)->cur - (inf)->buf; \
+ (inf)->buflen *= 2; \
+ (inf)->buf = (char*) repalloc( (void*)(inf)->buf, (inf)->buflen ); \
+ (inf)->cur = (inf)->buf + len; \
+}
+
+/*
+ * recursively traverse the tree and
+ * print it in infix (human-readable) form
+ */
+static void
+infix(INFIX *in, int parentPriority, bool rightPhraseOp)
+{
+ /* since this function recurses, it could be driven to stack overflow. */
+ check_stack_depth();
+
+ if (in->curpol->type == QI_VAL)
+ {
+ QueryOperand *curpol = &in->curpol->qoperand;
+ char *op = in->op + curpol->distance;
+ int clen;
+
+ RESIZEBUF(in, curpol->length * (pg_database_encoding_max_length() + 1) + 2 + 6);
+ *(in->cur) = '\'';
+ in->cur++;
+ while (*op)
+ {
+ if (t_iseq(op, '\''))
+ {
+ *(in->cur) = '\'';
+ in->cur++;
+ }
+ else if (t_iseq(op, '\\'))
+ {
+ *(in->cur) = '\\';
+ in->cur++;
+ }
+ COPYCHAR(in->cur, op);
+
+ clen = pg_mblen(op);
+ op += clen;
+ in->cur += clen;
+ }
+ *(in->cur) = '\'';
+ in->cur++;
+ if (curpol->weight || curpol->prefix)
+ {
+ *(in->cur) = ':';
+ in->cur++;
+ if (curpol->prefix)
+ {
+ *(in->cur) = '*';
+ in->cur++;
+ }
+ if (curpol->weight & (1 << 3))
+ {
+ *(in->cur) = 'A';
+ in->cur++;
+ }
+ if (curpol->weight & (1 << 2))
+ {
+ *(in->cur) = 'B';
+ in->cur++;
+ }
+ if (curpol->weight & (1 << 1))
+ {
+ *(in->cur) = 'C';
+ in->cur++;
+ }
+ if (curpol->weight & 1)
+ {
+ *(in->cur) = 'D';
+ in->cur++;
+ }
+ }
+ *(in->cur) = '\0';
+ in->curpol++;
+ }
+ else if (in->curpol->qoperator.oper == OP_NOT)
+ {
+ int priority = QO_PRIORITY(in->curpol);
+
+ if (priority < parentPriority)
+ {
+ RESIZEBUF(in, 2);
+ sprintf(in->cur, "( ");
+ in->cur = strchr(in->cur, '\0');
+ }
+ RESIZEBUF(in, 1);
+ *(in->cur) = '!';
+ in->cur++;
+ *(in->cur) = '\0';
+ in->curpol++;
+
+ infix(in, priority, false);
+ if (priority < parentPriority)
+ {
+ RESIZEBUF(in, 2);
+ sprintf(in->cur, " )");
+ in->cur = strchr(in->cur, '\0');
+ }
+ }
+ else
+ {
+ int8 op = in->curpol->qoperator.oper;
+ int priority = QO_PRIORITY(in->curpol);
+ int16 distance = in->curpol->qoperator.distance;
+ INFIX nrm;
+ bool needParenthesis = false;
+
+ in->curpol++;
+ if (priority < parentPriority ||
+ /* phrase operator depends on order */
+ (op == OP_PHRASE && rightPhraseOp))
+ {
+ needParenthesis = true;
+ RESIZEBUF(in, 2);
+ sprintf(in->cur, "( ");
+ in->cur = strchr(in->cur, '\0');
+ }
+
+ nrm.curpol = in->curpol;
+ nrm.op = in->op;
+ nrm.buflen = 16;
+ nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
+
+ /* get right operand */
+ infix(&nrm, priority, (op == OP_PHRASE));
+
+ /* get & print left operand */
+ in->curpol = nrm.curpol;
+ infix(in, priority, false);
+
+ /* print operator & right operand */
+ RESIZEBUF(in, 3 + (2 + 10 /* distance */ ) + (nrm.cur - nrm.buf));
+ switch (op)
+ {
+ case OP_OR:
+ sprintf(in->cur, " | %s", nrm.buf);
+ break;
+ case OP_AND:
+ sprintf(in->cur, " & %s", nrm.buf);
+ break;
+ case OP_PHRASE:
+ if (distance != 1)
+ sprintf(in->cur, " <%d> %s", distance, nrm.buf);
+ else
+ sprintf(in->cur, " <-> %s", nrm.buf);
+ break;
+ default:
+ /* OP_NOT is handled in above if-branch */
+ elog(ERROR, "unrecognized operator type: %d", op);
+ }
+ in->cur = strchr(in->cur, '\0');
+ pfree(nrm.buf);
+
+ if (needParenthesis)
+ {
+ RESIZEBUF(in, 2);
+ sprintf(in->cur, " )");
+ in->cur = strchr(in->cur, '\0');
+ }
+ }
+}
+
+Datum
+tsqueryout(PG_FUNCTION_ARGS)
+{
+ TSQuery query = PG_GETARG_TSQUERY(0);
+ INFIX nrm;
+
+ if (query->size == 0)
+ {
+ char *b = palloc(1);
+
+ *b = '\0';
+ PG_RETURN_POINTER(b);
+ }
+ nrm.curpol = GETQUERY(query);
+ nrm.buflen = 32;
+ nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
+ *(nrm.cur) = '\0';
+ nrm.op = GETOPERAND(query);
+ infix(&nrm, -1 /* lowest priority */ , false);
+
+ PG_FREE_IF_COPY(query, 0);
+ PG_RETURN_CSTRING(nrm.buf);
+}
+
+/*
+ * Binary Input / Output functions. The binary format is as follows:
+ *
+ * uint32 number of operators/operands in the query
+ *
+ * Followed by the operators and operands, in prefix notation. For each
+ * operand:
+ *
+ * uint8 type, QI_VAL
+ * uint8 weight
+ * operand text in client encoding, null-terminated
+ * uint8 prefix
+ *
+ * For each operator:
+ * uint8 type, QI_OPR
+ * uint8 operator, one of OP_AND, OP_PHRASE OP_OR, OP_NOT.
+ * uint16 distance (only for OP_PHRASE)
+ */
+Datum
+tsquerysend(PG_FUNCTION_ARGS)
+{
+ TSQuery query = PG_GETARG_TSQUERY(0);
+ StringInfoData buf;
+ int i;
+ QueryItem *item = GETQUERY(query);
+
+ pq_begintypsend(&buf);
+
+ pq_sendint32(&buf, query->size);
+ for (i = 0; i < query->size; i++)
+ {
+ pq_sendint8(&buf, item->type);
+
+ switch (item->type)
+ {
+ case QI_VAL:
+ pq_sendint8(&buf, item->qoperand.weight);
+ pq_sendint8(&buf, item->qoperand.prefix);
+ pq_sendstring(&buf, GETOPERAND(query) + item->qoperand.distance);
+ break;
+ case QI_OPR:
+ pq_sendint8(&buf, item->qoperator.oper);
+ if (item->qoperator.oper == OP_PHRASE)
+ pq_sendint16(&buf, item->qoperator.distance);
+ break;
+ default:
+ elog(ERROR, "unrecognized tsquery node type: %d", item->type);
+ }
+ item++;
+ }
+
+ PG_FREE_IF_COPY(query, 0);
+
+ PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
+}
+
+Datum
+tsqueryrecv(PG_FUNCTION_ARGS)
+{
+ StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
+ TSQuery query;
+ int i,
+ len;
+ QueryItem *item;
+ int datalen;
+ char *ptr;
+ uint32 size;
+ const char **operands;
+ bool needcleanup;
+
+ size = pq_getmsgint(buf, sizeof(uint32));
+ if (size > (MaxAllocSize / sizeof(QueryItem)))
+ elog(ERROR, "invalid size of tsquery");
+
+ /* Allocate space to temporarily hold operand strings */
+ operands = palloc(size * sizeof(char *));
+
+ /* Allocate space for all the QueryItems. */
+ len = HDRSIZETQ + sizeof(QueryItem) * size;
+ query = (TSQuery) palloc0(len);
+ query->size = size;
+ item = GETQUERY(query);
+
+ datalen = 0;
+ for (i = 0; i < size; i++)
+ {
+ item->type = (int8) pq_getmsgint(buf, sizeof(int8));
+
+ if (item->type == QI_VAL)
+ {
+ size_t val_len; /* length after recoding to server
+ * encoding */
+ uint8 weight;
+ uint8 prefix;
+ const char *val;
+ pg_crc32 valcrc;
+
+ weight = (uint8) pq_getmsgint(buf, sizeof(uint8));
+ prefix = (uint8) pq_getmsgint(buf, sizeof(uint8));
+ val = pq_getmsgstring(buf);
+ val_len = strlen(val);
+
+ /* Sanity checks */
+
+ if (weight > 0xF)
+ elog(ERROR, "invalid tsquery: invalid weight bitmap");
+
+ if (val_len > MAXSTRLEN)
+ elog(ERROR, "invalid tsquery: operand too long");
+
+ if (datalen > MAXSTRPOS)
+ elog(ERROR, "invalid tsquery: total operand length exceeded");
+
+ /* Looks valid. */
+
+ INIT_LEGACY_CRC32(valcrc);
+ COMP_LEGACY_CRC32(valcrc, val, val_len);
+ FIN_LEGACY_CRC32(valcrc);
+
+ item->qoperand.weight = weight;
+ item->qoperand.prefix = (prefix) ? true : false;
+ item->qoperand.valcrc = (int32) valcrc;
+ item->qoperand.length = val_len;
+ item->qoperand.distance = datalen;
+
+ /*
+ * Operand strings are copied to the final struct after this loop;
+ * here we just collect them to an array
+ */
+ operands[i] = val;
+
+ datalen += val_len + 1; /* + 1 for the '\0' terminator */
+ }
+ else if (item->type == QI_OPR)
+ {
+ int8 oper;
+
+ oper = (int8) pq_getmsgint(buf, sizeof(int8));
+ if (oper != OP_NOT && oper != OP_OR && oper != OP_AND && oper != OP_PHRASE)
+ elog(ERROR, "invalid tsquery: unrecognized operator type %d",
+ (int) oper);
+ if (i == size - 1)
+ elog(ERROR, "invalid pointer to right operand");
+
+ item->qoperator.oper = oper;
+ if (oper == OP_PHRASE)
+ item->qoperator.distance = (int16) pq_getmsgint(buf, sizeof(int16));
+ }
+ else
+ elog(ERROR, "unrecognized tsquery node type: %d", item->type);
+
+ item++;
+ }
+
+ /* Enlarge buffer to make room for the operand values. */
+ query = (TSQuery) repalloc(query, len + datalen);
+ item = GETQUERY(query);
+ ptr = GETOPERAND(query);
+
+ /*
+ * Fill in the left-pointers. Checks that the tree is well-formed as a
+ * side-effect.
+ */
+ findoprnd(item, size, &needcleanup);
+
+ /* Can't have found any QI_VALSTOP nodes */
+ Assert(!needcleanup);
+
+ /* Copy operands to output struct */
+ for (i = 0; i < size; i++)
+ {
+ if (item->type == QI_VAL)
+ {
+ memcpy(ptr, operands[i], item->qoperand.length + 1);
+ ptr += item->qoperand.length + 1;
+ }
+ item++;
+ }
+
+ pfree(operands);
+
+ Assert(ptr - GETOPERAND(query) == datalen);
+
+ SET_VARSIZE(query, len + datalen);
+
+ PG_RETURN_TSQUERY(query);
+}
+
+/*
+ * debug function, used only for view query
+ * which will be executed in non-leaf pages in index
+ */
+Datum
+tsquerytree(PG_FUNCTION_ARGS)
+{
+ TSQuery query = PG_GETARG_TSQUERY(0);
+ INFIX nrm;
+ text *res;
+ QueryItem *q;
+ int len;
+
+ if (query->size == 0)
+ {
+ res = (text *) palloc(VARHDRSZ);
+ SET_VARSIZE(res, VARHDRSZ);
+ PG_RETURN_POINTER(res);
+ }
+
+ q = clean_NOT(GETQUERY(query), &len);
+
+ if (!q)
+ {
+ res = cstring_to_text("T");
+ }
+ else
+ {
+ nrm.curpol = q;
+ nrm.buflen = 32;
+ nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
+ *(nrm.cur) = '\0';
+ nrm.op = GETOPERAND(query);
+ infix(&nrm, -1, false);
+ res = cstring_to_text_with_len(nrm.buf, nrm.cur - nrm.buf);
+ pfree(q);
+ }
+
+ PG_FREE_IF_COPY(query, 0);
+
+ PG_RETURN_TEXT_P(res);
+}