From 5e45211a64149b3c659b90ff2de6fa982a5a93ed Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sat, 4 May 2024 14:17:33 +0200 Subject: Adding upstream version 15.5. Signed-off-by: Daniel Baumann --- src/backend/utils/adt/tsquery.c | 1349 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 1349 insertions(+) create mode 100644 src/backend/utils/adt/tsquery.c (limited to 'src/backend/utils/adt/tsquery.c') diff --git a/src/backend/utils/adt/tsquery.c b/src/backend/utils/adt/tsquery.c new file mode 100644 index 0000000..f54f298 --- /dev/null +++ b/src/backend/utils/adt/tsquery.c @@ -0,0 +1,1349 @@ +/*------------------------------------------------------------------------- + * + * tsquery.c + * I/O functions for tsquery + * + * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group + * + * + * IDENTIFICATION + * src/backend/utils/adt/tsquery.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "libpq/pqformat.h" +#include "miscadmin.h" +#include "tsearch/ts_locale.h" +#include "tsearch/ts_type.h" +#include "tsearch/ts_utils.h" +#include "utils/builtins.h" +#include "utils/memutils.h" +#include "utils/pg_crc.h" + +/* FTS operator priorities, see ts_type.h */ +const int tsearch_op_priority[OP_COUNT] = +{ + 4, /* OP_NOT */ + 2, /* OP_AND */ + 1, /* OP_OR */ + 3 /* OP_PHRASE */ +}; + +/* + * parser's states + */ +typedef enum +{ + WAITOPERAND = 1, + WAITOPERATOR = 2, + WAITFIRSTOPERAND = 3 +} ts_parserstate; + +/* + * token types for parsing + */ +typedef enum +{ + PT_END = 0, + PT_ERR = 1, + PT_VAL = 2, + PT_OPR = 3, + PT_OPEN = 4, + PT_CLOSE = 5 +} ts_tokentype; + +/* + * get token from query string + * + * *operator is filled in with OP_* when return values is PT_OPR, + * but *weight could contain a distance value in case of phrase operator. + * *strval, *lenval and *weight are filled in when return value is PT_VAL + * + */ +typedef ts_tokentype (*ts_tokenizer) (TSQueryParserState state, int8 *operator, + int *lenval, char **strval, + int16 *weight, bool *prefix); + +struct TSQueryParserStateData +{ + /* Tokenizer used for parsing tsquery */ + ts_tokenizer gettoken; + + /* State of tokenizer function */ + char *buffer; /* entire string we are scanning */ + char *buf; /* current scan point */ + int count; /* nesting count, incremented by (, + * decremented by ) */ + ts_parserstate state; + + /* polish (prefix) notation in list, filled in by push* functions */ + List *polstr; + + /* + * Strings from operands are collected in op. curop is a pointer to the + * end of used space of op. + */ + char *op; + char *curop; + int lenop; /* allocated size of op */ + int sumlen; /* used size of op */ + + /* state for value's parser */ + TSVectorParseState valstate; +}; + +/* + * subroutine to parse the modifiers (weight and prefix flag currently) + * part, like ':AB*' of a query. + */ +static char * +get_modifiers(char *buf, int16 *weight, bool *prefix) +{ + *weight = 0; + *prefix = false; + + if (!t_iseq(buf, ':')) + return buf; + + buf++; + while (*buf && pg_mblen(buf) == 1) + { + switch (*buf) + { + case 'a': + case 'A': + *weight |= 1 << 3; + break; + case 'b': + case 'B': + *weight |= 1 << 2; + break; + case 'c': + case 'C': + *weight |= 1 << 1; + break; + case 'd': + case 'D': + *weight |= 1; + break; + case '*': + *prefix = true; + break; + default: + return buf; + } + buf++; + } + + return buf; +} + +/* + * Parse phrase operator. The operator + * may take the following forms: + * + * a b (distance is exactly N lexemes) + * a <-> b (default distance = 1) + * + * The buffer should begin with '<' char + */ +static bool +parse_phrase_operator(TSQueryParserState pstate, int16 *distance) +{ + enum + { + PHRASE_OPEN = 0, + PHRASE_DIST, + PHRASE_CLOSE, + PHRASE_FINISH + } state = PHRASE_OPEN; + char *ptr = pstate->buf; + char *endptr; + long l = 1; /* default distance */ + + while (*ptr) + { + switch (state) + { + case PHRASE_OPEN: + if (t_iseq(ptr, '<')) + { + state = PHRASE_DIST; + ptr++; + } + else + return false; + break; + + case PHRASE_DIST: + if (t_iseq(ptr, '-')) + { + state = PHRASE_CLOSE; + ptr++; + continue; + } + + if (!t_isdigit(ptr)) + return false; + + errno = 0; + l = strtol(ptr, &endptr, 10); + if (ptr == endptr) + return false; + else if (errno == ERANGE || l < 0 || l > MAXENTRYPOS) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("distance in phrase operator must be an integer value between zero and %d inclusive", + MAXENTRYPOS))); + else + { + state = PHRASE_CLOSE; + ptr = endptr; + } + break; + + case PHRASE_CLOSE: + if (t_iseq(ptr, '>')) + { + state = PHRASE_FINISH; + ptr++; + } + else + return false; + break; + + case PHRASE_FINISH: + *distance = (int16) l; + pstate->buf = ptr; + return true; + } + } + + return false; +} + +/* + * Parse OR operator used in websearch_to_tsquery(), returns true if we + * believe that "OR" literal could be an operator OR + */ +static bool +parse_or_operator(TSQueryParserState pstate) +{ + char *ptr = pstate->buf; + + /* it should begin with "OR" literal */ + if (pg_strncasecmp(ptr, "or", 2) != 0) + return false; + + ptr += 2; + + /* + * it shouldn't be a part of any word but somewhere later it should be + * some operand + */ + if (*ptr == '\0') /* no operand */ + return false; + + /* it shouldn't be a part of any word */ + if (t_iseq(ptr, '-') || t_iseq(ptr, '_') || t_isalpha(ptr) || t_isdigit(ptr)) + return false; + + for (;;) + { + ptr += pg_mblen(ptr); + + if (*ptr == '\0') /* got end of string without operand */ + return false; + + /* + * Suppose, we found an operand, but could be a not correct operand. + * So we still treat OR literal as operation with possibly incorrect + * operand and will not search it as lexeme + */ + if (!t_isspace(ptr)) + break; + } + + pstate->buf += 2; + return true; +} + +static ts_tokentype +gettoken_query_standard(TSQueryParserState state, int8 *operator, + int *lenval, char **strval, + int16 *weight, bool *prefix) +{ + *weight = 0; + *prefix = false; + + while (true) + { + switch (state->state) + { + case WAITFIRSTOPERAND: + case WAITOPERAND: + if (t_iseq(state->buf, '!')) + { + state->buf++; + state->state = WAITOPERAND; + *operator = OP_NOT; + return PT_OPR; + } + else if (t_iseq(state->buf, '(')) + { + state->buf++; + state->state = WAITOPERAND; + state->count++; + return PT_OPEN; + } + else if (t_iseq(state->buf, ':')) + { + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("syntax error in tsquery: \"%s\"", + state->buffer))); + } + else if (!t_isspace(state->buf)) + { + /* + * We rely on the tsvector parser to parse the value for + * us + */ + reset_tsvector_parser(state->valstate, state->buf); + if (gettoken_tsvector(state->valstate, strval, lenval, + NULL, NULL, &state->buf)) + { + state->buf = get_modifiers(state->buf, weight, prefix); + state->state = WAITOPERATOR; + return PT_VAL; + } + else if (state->state == WAITFIRSTOPERAND) + { + return PT_END; + } + else + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("no operand in tsquery: \"%s\"", + state->buffer))); + } + break; + + case WAITOPERATOR: + if (t_iseq(state->buf, '&')) + { + state->buf++; + state->state = WAITOPERAND; + *operator = OP_AND; + return PT_OPR; + } + else if (t_iseq(state->buf, '|')) + { + state->buf++; + state->state = WAITOPERAND; + *operator = OP_OR; + return PT_OPR; + } + else if (parse_phrase_operator(state, weight)) + { + /* weight var is used as storage for distance */ + state->state = WAITOPERAND; + *operator = OP_PHRASE; + return PT_OPR; + } + else if (t_iseq(state->buf, ')')) + { + state->buf++; + state->count--; + return (state->count < 0) ? PT_ERR : PT_CLOSE; + } + else if (*state->buf == '\0') + { + return (state->count) ? PT_ERR : PT_END; + } + else if (!t_isspace(state->buf)) + { + return PT_ERR; + } + break; + } + + state->buf += pg_mblen(state->buf); + } +} + +static ts_tokentype +gettoken_query_websearch(TSQueryParserState state, int8 *operator, + int *lenval, char **strval, + int16 *weight, bool *prefix) +{ + *weight = 0; + *prefix = false; + + while (true) + { + switch (state->state) + { + case WAITFIRSTOPERAND: + case WAITOPERAND: + if (t_iseq(state->buf, '-')) + { + state->buf++; + state->state = WAITOPERAND; + + *operator = OP_NOT; + return PT_OPR; + } + else if (t_iseq(state->buf, '"')) + { + /* Everything in quotes is processed as a single token */ + + /* skip opening quote */ + state->buf++; + *strval = state->buf; + + /* iterate to the closing quote or end of the string */ + while (*state->buf != '\0' && !t_iseq(state->buf, '"')) + state->buf++; + *lenval = state->buf - *strval; + + /* skip closing quote if not end of the string */ + if (*state->buf != '\0') + state->buf++; + + state->state = WAITOPERATOR; + state->count++; + return PT_VAL; + } + else if (ISOPERATOR(state->buf)) + { + /* or else gettoken_tsvector() will raise an error */ + state->buf++; + state->state = WAITOPERAND; + continue; + } + else if (!t_isspace(state->buf)) + { + /* + * We rely on the tsvector parser to parse the value for + * us + */ + reset_tsvector_parser(state->valstate, state->buf); + if (gettoken_tsvector(state->valstate, strval, lenval, + NULL, NULL, &state->buf)) + { + state->state = WAITOPERATOR; + return PT_VAL; + } + else if (state->state == WAITFIRSTOPERAND) + { + return PT_END; + } + else + { + /* finally, we have to provide an operand */ + pushStop(state); + return PT_END; + } + } + break; + + case WAITOPERATOR: + if (t_iseq(state->buf, '"')) + { + /* + * put implicit AND after an operand and handle this quote + * in WAITOPERAND + */ + state->state = WAITOPERAND; + *operator = OP_AND; + return PT_OPR; + } + else if (parse_or_operator(state)) + { + state->state = WAITOPERAND; + *operator = OP_OR; + return PT_OPR; + } + else if (*state->buf == '\0') + { + return PT_END; + } + else if (!t_isspace(state->buf)) + { + /* put implicit AND after an operand */ + *operator = OP_AND; + state->state = WAITOPERAND; + return PT_OPR; + } + break; + } + + state->buf += pg_mblen(state->buf); + } +} + +static ts_tokentype +gettoken_query_plain(TSQueryParserState state, int8 *operator, + int *lenval, char **strval, + int16 *weight, bool *prefix) +{ + *weight = 0; + *prefix = false; + + if (*state->buf == '\0') + return PT_END; + + *strval = state->buf; + *lenval = strlen(state->buf); + state->buf += *lenval; + state->count++; + return PT_VAL; +} + +/* + * Push an operator to state->polstr + */ +void +pushOperator(TSQueryParserState state, int8 oper, int16 distance) +{ + QueryOperator *tmp; + + Assert(oper == OP_NOT || oper == OP_AND || oper == OP_OR || oper == OP_PHRASE); + + tmp = (QueryOperator *) palloc0(sizeof(QueryOperator)); + tmp->type = QI_OPR; + tmp->oper = oper; + tmp->distance = (oper == OP_PHRASE) ? distance : 0; + /* left is filled in later with findoprnd */ + + state->polstr = lcons(tmp, state->polstr); +} + +static void +pushValue_internal(TSQueryParserState state, pg_crc32 valcrc, int distance, int lenval, int weight, bool prefix) +{ + QueryOperand *tmp; + + if (distance >= MAXSTRPOS) + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("value is too big in tsquery: \"%s\"", + state->buffer))); + if (lenval >= MAXSTRLEN) + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("operand is too long in tsquery: \"%s\"", + state->buffer))); + + tmp = (QueryOperand *) palloc0(sizeof(QueryOperand)); + tmp->type = QI_VAL; + tmp->weight = weight; + tmp->prefix = prefix; + tmp->valcrc = (int32) valcrc; + tmp->length = lenval; + tmp->distance = distance; + + state->polstr = lcons(tmp, state->polstr); +} + +/* + * Push an operand to state->polstr. + * + * strval must point to a string equal to state->curop. lenval is the length + * of the string. + */ +void +pushValue(TSQueryParserState state, char *strval, int lenval, int16 weight, bool prefix) +{ + pg_crc32 valcrc; + + if (lenval >= MAXSTRLEN) + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("word is too long in tsquery: \"%s\"", + state->buffer))); + + INIT_LEGACY_CRC32(valcrc); + COMP_LEGACY_CRC32(valcrc, strval, lenval); + FIN_LEGACY_CRC32(valcrc); + pushValue_internal(state, valcrc, state->curop - state->op, lenval, weight, prefix); + + /* append the value string to state.op, enlarging buffer if needed first */ + while (state->curop - state->op + lenval + 1 >= state->lenop) + { + int used = state->curop - state->op; + + state->lenop *= 2; + state->op = (char *) repalloc((void *) state->op, state->lenop); + state->curop = state->op + used; + } + memcpy((void *) state->curop, (void *) strval, lenval); + state->curop += lenval; + *(state->curop) = '\0'; + state->curop++; + state->sumlen += lenval + 1 /* \0 */ ; +} + + +/* + * Push a stopword placeholder to state->polstr + */ +void +pushStop(TSQueryParserState state) +{ + QueryOperand *tmp; + + tmp = (QueryOperand *) palloc0(sizeof(QueryOperand)); + tmp->type = QI_VALSTOP; + + state->polstr = lcons(tmp, state->polstr); +} + + +#define STACKDEPTH 32 + +typedef struct OperatorElement +{ + int8 op; + int16 distance; +} OperatorElement; + +static void +pushOpStack(OperatorElement *stack, int *lenstack, int8 op, int16 distance) +{ + if (*lenstack == STACKDEPTH) /* internal error */ + elog(ERROR, "tsquery stack too small"); + + stack[*lenstack].op = op; + stack[*lenstack].distance = distance; + + (*lenstack)++; +} + +static void +cleanOpStack(TSQueryParserState state, + OperatorElement *stack, int *lenstack, int8 op) +{ + int opPriority = OP_PRIORITY(op); + + while (*lenstack) + { + /* NOT is right associative unlike to others */ + if ((op != OP_NOT && opPriority > OP_PRIORITY(stack[*lenstack - 1].op)) || + (op == OP_NOT && opPriority >= OP_PRIORITY(stack[*lenstack - 1].op))) + break; + + (*lenstack)--; + pushOperator(state, stack[*lenstack].op, + stack[*lenstack].distance); + } +} + +/* + * Make polish (prefix) notation of query. + * + * See parse_tsquery for explanation of pushval. + */ +static void +makepol(TSQueryParserState state, + PushFunction pushval, + Datum opaque) +{ + int8 operator = 0; + ts_tokentype type; + int lenval = 0; + char *strval = NULL; + OperatorElement opstack[STACKDEPTH]; + int lenstack = 0; + int16 weight = 0; + bool prefix; + + /* since this function recurses, it could be driven to stack overflow */ + check_stack_depth(); + + while ((type = state->gettoken(state, &operator, + &lenval, &strval, + &weight, &prefix)) != PT_END) + { + switch (type) + { + case PT_VAL: + pushval(opaque, state, strval, lenval, weight, prefix); + break; + case PT_OPR: + cleanOpStack(state, opstack, &lenstack, operator); + pushOpStack(opstack, &lenstack, operator, weight); + break; + case PT_OPEN: + makepol(state, pushval, opaque); + break; + case PT_CLOSE: + cleanOpStack(state, opstack, &lenstack, OP_OR /* lowest */ ); + return; + case PT_ERR: + default: + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("syntax error in tsquery: \"%s\"", + state->buffer))); + } + } + + cleanOpStack(state, opstack, &lenstack, OP_OR /* lowest */ ); +} + +static void +findoprnd_recurse(QueryItem *ptr, uint32 *pos, int nnodes, bool *needcleanup) +{ + /* since this function recurses, it could be driven to stack overflow. */ + check_stack_depth(); + + if (*pos >= nnodes) + elog(ERROR, "malformed tsquery: operand not found"); + + if (ptr[*pos].type == QI_VAL) + { + (*pos)++; + } + else if (ptr[*pos].type == QI_VALSTOP) + { + *needcleanup = true; /* we'll have to remove stop words */ + (*pos)++; + } + else + { + Assert(ptr[*pos].type == QI_OPR); + + if (ptr[*pos].qoperator.oper == OP_NOT) + { + ptr[*pos].qoperator.left = 1; /* fixed offset */ + (*pos)++; + + /* process the only argument */ + findoprnd_recurse(ptr, pos, nnodes, needcleanup); + } + else + { + QueryOperator *curitem = &ptr[*pos].qoperator; + int tmp = *pos; /* save current position */ + + Assert(curitem->oper == OP_AND || + curitem->oper == OP_OR || + curitem->oper == OP_PHRASE); + + (*pos)++; + + /* process RIGHT argument */ + findoprnd_recurse(ptr, pos, nnodes, needcleanup); + + curitem->left = *pos - tmp; /* set LEFT arg's offset */ + + /* process LEFT argument */ + findoprnd_recurse(ptr, pos, nnodes, needcleanup); + } + } +} + + +/* + * Fill in the left-fields previously left unfilled. + * The input QueryItems must be in polish (prefix) notation. + * Also, set *needcleanup to true if there are any QI_VALSTOP nodes. + */ +static void +findoprnd(QueryItem *ptr, int size, bool *needcleanup) +{ + uint32 pos; + + *needcleanup = false; + pos = 0; + findoprnd_recurse(ptr, &pos, size, needcleanup); + + if (pos != size) + elog(ERROR, "malformed tsquery: extra nodes"); +} + + +/* + * Each value (operand) in the query is passed to pushval. pushval can + * transform the simple value to an arbitrarily complex expression using + * pushValue and pushOperator. It must push a single value with pushValue, + * a complete expression with all operands, or a stopword placeholder + * with pushStop, otherwise the prefix notation representation will be broken, + * having an operator with no operand. + * + * opaque is passed on to pushval as is, pushval can use it to store its + * private state. + */ +TSQuery +parse_tsquery(char *buf, + PushFunction pushval, + Datum opaque, + int flags) +{ + struct TSQueryParserStateData state; + int i; + TSQuery query; + int commonlen; + QueryItem *ptr; + ListCell *cell; + bool needcleanup; + int tsv_flags = P_TSV_OPR_IS_DELIM | P_TSV_IS_TSQUERY; + + /* plain should not be used with web */ + Assert((flags & (P_TSQ_PLAIN | P_TSQ_WEB)) != (P_TSQ_PLAIN | P_TSQ_WEB)); + + /* select suitable tokenizer */ + if (flags & P_TSQ_PLAIN) + state.gettoken = gettoken_query_plain; + else if (flags & P_TSQ_WEB) + { + state.gettoken = gettoken_query_websearch; + tsv_flags |= P_TSV_IS_WEB; + } + else + state.gettoken = gettoken_query_standard; + + /* init state */ + state.buffer = buf; + state.buf = buf; + state.count = 0; + state.state = WAITFIRSTOPERAND; + state.polstr = NIL; + + /* init value parser's state */ + state.valstate = init_tsvector_parser(state.buffer, tsv_flags); + + /* init list of operand */ + state.sumlen = 0; + state.lenop = 64; + state.curop = state.op = (char *) palloc(state.lenop); + *(state.curop) = '\0'; + + /* parse query & make polish notation (postfix, but in reverse order) */ + makepol(&state, pushval, opaque); + + close_tsvector_parser(state.valstate); + + if (list_length(state.polstr) == 0) + { + ereport(NOTICE, + (errmsg("text-search query doesn't contain lexemes: \"%s\"", + state.buffer))); + query = (TSQuery) palloc(HDRSIZETQ); + SET_VARSIZE(query, HDRSIZETQ); + query->size = 0; + return query; + } + + if (TSQUERY_TOO_BIG(list_length(state.polstr), state.sumlen)) + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("tsquery is too large"))); + commonlen = COMPUTESIZE(list_length(state.polstr), state.sumlen); + + /* Pack the QueryItems in the final TSQuery struct to return to caller */ + query = (TSQuery) palloc0(commonlen); + SET_VARSIZE(query, commonlen); + query->size = list_length(state.polstr); + ptr = GETQUERY(query); + + /* Copy QueryItems to TSQuery */ + i = 0; + foreach(cell, state.polstr) + { + QueryItem *item = (QueryItem *) lfirst(cell); + + switch (item->type) + { + case QI_VAL: + memcpy(&ptr[i], item, sizeof(QueryOperand)); + break; + case QI_VALSTOP: + ptr[i].type = QI_VALSTOP; + break; + case QI_OPR: + memcpy(&ptr[i], item, sizeof(QueryOperator)); + break; + default: + elog(ERROR, "unrecognized QueryItem type: %d", item->type); + } + i++; + } + + /* Copy all the operand strings to TSQuery */ + memcpy((void *) GETOPERAND(query), (void *) state.op, state.sumlen); + pfree(state.op); + + /* + * Set left operand pointers for every operator. While we're at it, + * detect whether there are any QI_VALSTOP nodes. + */ + findoprnd(ptr, query->size, &needcleanup); + + /* + * If there are QI_VALSTOP nodes, delete them and simplify the tree. + */ + if (needcleanup) + query = cleanup_tsquery_stopwords(query); + + return query; +} + +static void +pushval_asis(Datum opaque, TSQueryParserState state, char *strval, int lenval, + int16 weight, bool prefix) +{ + pushValue(state, strval, lenval, weight, prefix); +} + +/* + * in without morphology + */ +Datum +tsqueryin(PG_FUNCTION_ARGS) +{ + char *in = PG_GETARG_CSTRING(0); + + PG_RETURN_TSQUERY(parse_tsquery(in, pushval_asis, PointerGetDatum(NULL), 0)); +} + +/* + * out function + */ +typedef struct +{ + QueryItem *curpol; + char *buf; + char *cur; + char *op; + int buflen; +} INFIX; + +/* Makes sure inf->buf is large enough for adding 'addsize' bytes */ +#define RESIZEBUF(inf, addsize) \ +while( ( (inf)->cur - (inf)->buf ) + (addsize) + 1 >= (inf)->buflen ) \ +{ \ + int len = (inf)->cur - (inf)->buf; \ + (inf)->buflen *= 2; \ + (inf)->buf = (char*) repalloc( (void*)(inf)->buf, (inf)->buflen ); \ + (inf)->cur = (inf)->buf + len; \ +} + +/* + * recursively traverse the tree and + * print it in infix (human-readable) form + */ +static void +infix(INFIX *in, int parentPriority, bool rightPhraseOp) +{ + /* since this function recurses, it could be driven to stack overflow. */ + check_stack_depth(); + + if (in->curpol->type == QI_VAL) + { + QueryOperand *curpol = &in->curpol->qoperand; + char *op = in->op + curpol->distance; + int clen; + + RESIZEBUF(in, curpol->length * (pg_database_encoding_max_length() + 1) + 2 + 6); + *(in->cur) = '\''; + in->cur++; + while (*op) + { + if (t_iseq(op, '\'')) + { + *(in->cur) = '\''; + in->cur++; + } + else if (t_iseq(op, '\\')) + { + *(in->cur) = '\\'; + in->cur++; + } + COPYCHAR(in->cur, op); + + clen = pg_mblen(op); + op += clen; + in->cur += clen; + } + *(in->cur) = '\''; + in->cur++; + if (curpol->weight || curpol->prefix) + { + *(in->cur) = ':'; + in->cur++; + if (curpol->prefix) + { + *(in->cur) = '*'; + in->cur++; + } + if (curpol->weight & (1 << 3)) + { + *(in->cur) = 'A'; + in->cur++; + } + if (curpol->weight & (1 << 2)) + { + *(in->cur) = 'B'; + in->cur++; + } + if (curpol->weight & (1 << 1)) + { + *(in->cur) = 'C'; + in->cur++; + } + if (curpol->weight & 1) + { + *(in->cur) = 'D'; + in->cur++; + } + } + *(in->cur) = '\0'; + in->curpol++; + } + else if (in->curpol->qoperator.oper == OP_NOT) + { + int priority = QO_PRIORITY(in->curpol); + + if (priority < parentPriority) + { + RESIZEBUF(in, 2); + sprintf(in->cur, "( "); + in->cur = strchr(in->cur, '\0'); + } + RESIZEBUF(in, 1); + *(in->cur) = '!'; + in->cur++; + *(in->cur) = '\0'; + in->curpol++; + + infix(in, priority, false); + if (priority < parentPriority) + { + RESIZEBUF(in, 2); + sprintf(in->cur, " )"); + in->cur = strchr(in->cur, '\0'); + } + } + else + { + int8 op = in->curpol->qoperator.oper; + int priority = QO_PRIORITY(in->curpol); + int16 distance = in->curpol->qoperator.distance; + INFIX nrm; + bool needParenthesis = false; + + in->curpol++; + if (priority < parentPriority || + /* phrase operator depends on order */ + (op == OP_PHRASE && rightPhraseOp)) + { + needParenthesis = true; + RESIZEBUF(in, 2); + sprintf(in->cur, "( "); + in->cur = strchr(in->cur, '\0'); + } + + nrm.curpol = in->curpol; + nrm.op = in->op; + nrm.buflen = 16; + nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen); + + /* get right operand */ + infix(&nrm, priority, (op == OP_PHRASE)); + + /* get & print left operand */ + in->curpol = nrm.curpol; + infix(in, priority, false); + + /* print operator & right operand */ + RESIZEBUF(in, 3 + (2 + 10 /* distance */ ) + (nrm.cur - nrm.buf)); + switch (op) + { + case OP_OR: + sprintf(in->cur, " | %s", nrm.buf); + break; + case OP_AND: + sprintf(in->cur, " & %s", nrm.buf); + break; + case OP_PHRASE: + if (distance != 1) + sprintf(in->cur, " <%d> %s", distance, nrm.buf); + else + sprintf(in->cur, " <-> %s", nrm.buf); + break; + default: + /* OP_NOT is handled in above if-branch */ + elog(ERROR, "unrecognized operator type: %d", op); + } + in->cur = strchr(in->cur, '\0'); + pfree(nrm.buf); + + if (needParenthesis) + { + RESIZEBUF(in, 2); + sprintf(in->cur, " )"); + in->cur = strchr(in->cur, '\0'); + } + } +} + +Datum +tsqueryout(PG_FUNCTION_ARGS) +{ + TSQuery query = PG_GETARG_TSQUERY(0); + INFIX nrm; + + if (query->size == 0) + { + char *b = palloc(1); + + *b = '\0'; + PG_RETURN_POINTER(b); + } + nrm.curpol = GETQUERY(query); + nrm.buflen = 32; + nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen); + *(nrm.cur) = '\0'; + nrm.op = GETOPERAND(query); + infix(&nrm, -1 /* lowest priority */ , false); + + PG_FREE_IF_COPY(query, 0); + PG_RETURN_CSTRING(nrm.buf); +} + +/* + * Binary Input / Output functions. The binary format is as follows: + * + * uint32 number of operators/operands in the query + * + * Followed by the operators and operands, in prefix notation. For each + * operand: + * + * uint8 type, QI_VAL + * uint8 weight + * operand text in client encoding, null-terminated + * uint8 prefix + * + * For each operator: + * uint8 type, QI_OPR + * uint8 operator, one of OP_AND, OP_PHRASE OP_OR, OP_NOT. + * uint16 distance (only for OP_PHRASE) + */ +Datum +tsquerysend(PG_FUNCTION_ARGS) +{ + TSQuery query = PG_GETARG_TSQUERY(0); + StringInfoData buf; + int i; + QueryItem *item = GETQUERY(query); + + pq_begintypsend(&buf); + + pq_sendint32(&buf, query->size); + for (i = 0; i < query->size; i++) + { + pq_sendint8(&buf, item->type); + + switch (item->type) + { + case QI_VAL: + pq_sendint8(&buf, item->qoperand.weight); + pq_sendint8(&buf, item->qoperand.prefix); + pq_sendstring(&buf, GETOPERAND(query) + item->qoperand.distance); + break; + case QI_OPR: + pq_sendint8(&buf, item->qoperator.oper); + if (item->qoperator.oper == OP_PHRASE) + pq_sendint16(&buf, item->qoperator.distance); + break; + default: + elog(ERROR, "unrecognized tsquery node type: %d", item->type); + } + item++; + } + + PG_FREE_IF_COPY(query, 0); + + PG_RETURN_BYTEA_P(pq_endtypsend(&buf)); +} + +Datum +tsqueryrecv(PG_FUNCTION_ARGS) +{ + StringInfo buf = (StringInfo) PG_GETARG_POINTER(0); + TSQuery query; + int i, + len; + QueryItem *item; + int datalen; + char *ptr; + uint32 size; + const char **operands; + bool needcleanup; + + size = pq_getmsgint(buf, sizeof(uint32)); + if (size > (MaxAllocSize / sizeof(QueryItem))) + elog(ERROR, "invalid size of tsquery"); + + /* Allocate space to temporarily hold operand strings */ + operands = palloc(size * sizeof(char *)); + + /* Allocate space for all the QueryItems. */ + len = HDRSIZETQ + sizeof(QueryItem) * size; + query = (TSQuery) palloc0(len); + query->size = size; + item = GETQUERY(query); + + datalen = 0; + for (i = 0; i < size; i++) + { + item->type = (int8) pq_getmsgint(buf, sizeof(int8)); + + if (item->type == QI_VAL) + { + size_t val_len; /* length after recoding to server + * encoding */ + uint8 weight; + uint8 prefix; + const char *val; + pg_crc32 valcrc; + + weight = (uint8) pq_getmsgint(buf, sizeof(uint8)); + prefix = (uint8) pq_getmsgint(buf, sizeof(uint8)); + val = pq_getmsgstring(buf); + val_len = strlen(val); + + /* Sanity checks */ + + if (weight > 0xF) + elog(ERROR, "invalid tsquery: invalid weight bitmap"); + + if (val_len > MAXSTRLEN) + elog(ERROR, "invalid tsquery: operand too long"); + + if (datalen > MAXSTRPOS) + elog(ERROR, "invalid tsquery: total operand length exceeded"); + + /* Looks valid. */ + + INIT_LEGACY_CRC32(valcrc); + COMP_LEGACY_CRC32(valcrc, val, val_len); + FIN_LEGACY_CRC32(valcrc); + + item->qoperand.weight = weight; + item->qoperand.prefix = (prefix) ? true : false; + item->qoperand.valcrc = (int32) valcrc; + item->qoperand.length = val_len; + item->qoperand.distance = datalen; + + /* + * Operand strings are copied to the final struct after this loop; + * here we just collect them to an array + */ + operands[i] = val; + + datalen += val_len + 1; /* + 1 for the '\0' terminator */ + } + else if (item->type == QI_OPR) + { + int8 oper; + + oper = (int8) pq_getmsgint(buf, sizeof(int8)); + if (oper != OP_NOT && oper != OP_OR && oper != OP_AND && oper != OP_PHRASE) + elog(ERROR, "invalid tsquery: unrecognized operator type %d", + (int) oper); + if (i == size - 1) + elog(ERROR, "invalid pointer to right operand"); + + item->qoperator.oper = oper; + if (oper == OP_PHRASE) + item->qoperator.distance = (int16) pq_getmsgint(buf, sizeof(int16)); + } + else + elog(ERROR, "unrecognized tsquery node type: %d", item->type); + + item++; + } + + /* Enlarge buffer to make room for the operand values. */ + query = (TSQuery) repalloc(query, len + datalen); + item = GETQUERY(query); + ptr = GETOPERAND(query); + + /* + * Fill in the left-pointers. Checks that the tree is well-formed as a + * side-effect. + */ + findoprnd(item, size, &needcleanup); + + /* Can't have found any QI_VALSTOP nodes */ + Assert(!needcleanup); + + /* Copy operands to output struct */ + for (i = 0; i < size; i++) + { + if (item->type == QI_VAL) + { + memcpy(ptr, operands[i], item->qoperand.length + 1); + ptr += item->qoperand.length + 1; + } + item++; + } + + pfree(operands); + + Assert(ptr - GETOPERAND(query) == datalen); + + SET_VARSIZE(query, len + datalen); + + PG_RETURN_TSQUERY(query); +} + +/* + * debug function, used only for view query + * which will be executed in non-leaf pages in index + */ +Datum +tsquerytree(PG_FUNCTION_ARGS) +{ + TSQuery query = PG_GETARG_TSQUERY(0); + INFIX nrm; + text *res; + QueryItem *q; + int len; + + if (query->size == 0) + { + res = (text *) palloc(VARHDRSZ); + SET_VARSIZE(res, VARHDRSZ); + PG_RETURN_POINTER(res); + } + + q = clean_NOT(GETQUERY(query), &len); + + if (!q) + { + res = cstring_to_text("T"); + } + else + { + nrm.curpol = q; + nrm.buflen = 32; + nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen); + *(nrm.cur) = '\0'; + nrm.op = GETOPERAND(query); + infix(&nrm, -1, false); + res = cstring_to_text_with_len(nrm.buf, nrm.cur - nrm.buf); + pfree(q); + } + + PG_FREE_IF_COPY(query, 0); + + PG_RETURN_TEXT_P(res); +} -- cgit v1.2.3