summaryrefslogtreecommitdiffstats
path: root/src/include/tsearch/ts_utils.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/include/tsearch/ts_utils.h')
-rw-r--r--src/include/tsearch/ts_utils.h266
1 files changed, 266 insertions, 0 deletions
diff --git a/src/include/tsearch/ts_utils.h b/src/include/tsearch/ts_utils.h
new file mode 100644
index 0000000..4266560
--- /dev/null
+++ b/src/include/tsearch/ts_utils.h
@@ -0,0 +1,266 @@
+/*-------------------------------------------------------------------------
+ *
+ * ts_utils.h
+ * helper utilities for tsearch
+ *
+ * Copyright (c) 1998-2021, PostgreSQL Global Development Group
+ *
+ * src/include/tsearch/ts_utils.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef _PG_TS_UTILS_H_
+#define _PG_TS_UTILS_H_
+
+#include "nodes/pg_list.h"
+#include "tsearch/ts_public.h"
+#include "tsearch/ts_type.h"
+
+/*
+ * Common parse definitions for tsvector and tsquery
+ */
+
+/* tsvector parser support. */
+
+struct TSVectorParseStateData; /* opaque struct in tsvector_parser.c */
+typedef struct TSVectorParseStateData *TSVectorParseState;
+
+#define P_TSV_OPR_IS_DELIM (1 << 0)
+#define P_TSV_IS_TSQUERY (1 << 1)
+#define P_TSV_IS_WEB (1 << 2)
+
+extern TSVectorParseState init_tsvector_parser(char *input, int flags);
+extern void reset_tsvector_parser(TSVectorParseState state, char *input);
+extern bool gettoken_tsvector(TSVectorParseState state,
+ char **token, int *len,
+ WordEntryPos **pos, int *poslen,
+ char **endptr);
+extern void close_tsvector_parser(TSVectorParseState state);
+
+/* phrase operator begins with '<' */
+#define ISOPERATOR(x) \
+ ( pg_mblen(x) == 1 && ( *(x) == '!' || \
+ *(x) == '&' || \
+ *(x) == '|' || \
+ *(x) == '(' || \
+ *(x) == ')' || \
+ *(x) == '<' \
+ ) )
+
+/* parse_tsquery */
+
+struct TSQueryParserStateData; /* private in backend/utils/adt/tsquery.c */
+typedef struct TSQueryParserStateData *TSQueryParserState;
+
+typedef void (*PushFunction) (Datum opaque, TSQueryParserState state,
+ char *token, int tokenlen,
+ int16 tokenweights, /* bitmap as described in
+ * QueryOperand struct */
+ bool prefix);
+
+#define P_TSQ_PLAIN (1 << 0)
+#define P_TSQ_WEB (1 << 1)
+
+extern TSQuery parse_tsquery(char *buf,
+ PushFunction pushval,
+ Datum opaque,
+ int flags);
+
+/* Functions for use by PushFunction implementations */
+extern void pushValue(TSQueryParserState state,
+ char *strval, int lenval, int16 weight, bool prefix);
+extern void pushStop(TSQueryParserState state);
+extern void pushOperator(TSQueryParserState state, int8 oper, int16 distance);
+
+/*
+ * parse plain text and lexize words
+ */
+typedef struct
+{
+ uint16 len;
+ uint16 nvariant;
+ union
+ {
+ uint16 pos;
+
+ /*
+ * When apos array is used, apos[0] is the number of elements in the
+ * array (excluding apos[0]), and alen is the allocated size of the
+ * array.
+ */
+ uint16 *apos;
+ } pos;
+ uint16 flags; /* currently, only TSL_PREFIX */
+ char *word;
+ uint32 alen;
+} ParsedWord;
+
+typedef struct
+{
+ ParsedWord *words;
+ int32 lenwords;
+ int32 curwords;
+ int32 pos;
+} ParsedText;
+
+extern void parsetext(Oid cfgId, ParsedText *prs, char *buf, int32 buflen);
+
+/*
+ * headline framework, flow in common to generate:
+ * 1 parse text with hlparsetext
+ * 2 parser-specific function to find part
+ * 3 generateHeadline to generate result text
+ */
+
+extern void hlparsetext(Oid cfgId, HeadlineParsedText *prs, TSQuery query,
+ char *buf, int32 buflen);
+extern text *generateHeadline(HeadlineParsedText *prs);
+
+/*
+ * TSQuery execution support
+ *
+ * TS_execute() executes a tsquery against data that can be represented in
+ * various forms. The TSExecuteCallback callback function is called to check
+ * whether a given primitive tsquery value is matched in the data.
+ */
+
+/* TS_execute requires ternary logic to handle NOT with phrase matches */
+typedef enum
+{
+ TS_NO, /* definitely no match */
+ TS_YES, /* definitely does match */
+ TS_MAYBE /* can't verify match for lack of pos data */
+} TSTernaryValue;
+
+/*
+ * struct ExecPhraseData is passed to a TSExecuteCallback function if we need
+ * lexeme position data (because of a phrase-match operator in the tsquery).
+ * The callback should fill in position data when it returns TS_YES (success).
+ * If it cannot return position data, it should leave "data" unchanged and
+ * return TS_MAYBE. The caller of TS_execute() must then arrange for a later
+ * recheck with position data available.
+ *
+ * The reported lexeme positions must be sorted and unique. Callers must only
+ * consult the position bits of the pos array, ie, WEP_GETPOS(data->pos[i]).
+ * This allows the returned "pos" to point directly to the WordEntryPos
+ * portion of a tsvector value. If "allocated" is true then the pos array
+ * is palloc'd workspace and caller may free it when done.
+ *
+ * "negate" means that the pos array contains positions where the query does
+ * not match, rather than positions where it does. "width" is positive when
+ * the match is wider than one lexeme. Neither of these fields normally need
+ * to be touched by TSExecuteCallback functions; they are used for
+ * phrase-search processing within TS_execute.
+ *
+ * All fields of the ExecPhraseData struct are initially zeroed by caller.
+ */
+typedef struct ExecPhraseData
+{
+ int npos; /* number of positions reported */
+ bool allocated; /* pos points to palloc'd data? */
+ bool negate; /* positions are where query is NOT matched */
+ WordEntryPos *pos; /* ordered, non-duplicate lexeme positions */
+ int width; /* width of match in lexemes, less 1 */
+} ExecPhraseData;
+
+/*
+ * Signature for TSQuery lexeme check functions
+ *
+ * arg: opaque value passed through from caller of TS_execute
+ * val: lexeme to test for presence of
+ * data: to be filled with lexeme positions; NULL if position data not needed
+ *
+ * Return TS_YES if lexeme is present in data, TS_MAYBE if it might be
+ * present, TS_NO if it definitely is not present. If data is not NULL,
+ * it must be filled with lexeme positions if available. If position data
+ * is not available, leave *data as zeroes and return TS_MAYBE, never TS_YES.
+ */
+typedef TSTernaryValue (*TSExecuteCallback) (void *arg, QueryOperand *val,
+ ExecPhraseData *data);
+
+/*
+ * Flag bits for TS_execute
+ */
+#define TS_EXEC_EMPTY (0x00)
+/*
+ * If TS_EXEC_SKIP_NOT is set, then NOT sub-expressions are automatically
+ * evaluated to be true. This was formerly the default behavior. It's now
+ * deprecated because it tends to give silly answers, but some applications
+ * might still have a use for it.
+ */
+#define TS_EXEC_SKIP_NOT (0x01)
+/*
+ * If TS_EXEC_PHRASE_NO_POS is set, allow OP_PHRASE to be executed lossily
+ * in the absence of position information: a true result indicates that the
+ * phrase might be present. Without this flag, OP_PHRASE always returns
+ * false if lexeme position information is not available.
+ */
+#define TS_EXEC_PHRASE_NO_POS (0x02)
+
+extern bool TS_execute(QueryItem *curitem, void *arg, uint32 flags,
+ TSExecuteCallback chkcond);
+extern TSTernaryValue TS_execute_ternary(QueryItem *curitem, void *arg,
+ uint32 flags,
+ TSExecuteCallback chkcond);
+extern bool tsquery_requires_match(QueryItem *curitem);
+
+/*
+ * to_ts* - text transformation to tsvector, tsquery
+ */
+extern TSVector make_tsvector(ParsedText *prs);
+extern int32 tsCompareString(char *a, int lena, char *b, int lenb, bool prefix);
+
+/*
+ * Possible strategy numbers for indexes
+ * TSearchStrategyNumber - (tsvector|text) @@ tsquery
+ * TSearchWithClassStrategyNumber - tsvector @@@ tsquery
+ */
+#define TSearchStrategyNumber 1
+#define TSearchWithClassStrategyNumber 2
+
+/*
+ * TSQuery Utilities
+ */
+extern QueryItem *clean_NOT(QueryItem *ptr, int32 *len);
+extern TSQuery cleanup_tsquery_stopwords(TSQuery in);
+
+typedef struct QTNode
+{
+ QueryItem *valnode;
+ uint32 flags;
+ int32 nchild;
+ char *word;
+ uint32 sign;
+ struct QTNode **child;
+} QTNode;
+
+/* bits in QTNode.flags */
+#define QTN_NEEDFREE 0x01
+#define QTN_NOCHANGE 0x02
+#define QTN_WORDFREE 0x04
+
+typedef uint64 TSQuerySign;
+
+#define TSQS_SIGLEN (sizeof(TSQuerySign)*BITS_PER_BYTE)
+
+#define TSQuerySignGetDatum(X) Int64GetDatum((int64) (X))
+#define DatumGetTSQuerySign(X) ((TSQuerySign) DatumGetInt64(X))
+#define PG_RETURN_TSQUERYSIGN(X) return TSQuerySignGetDatum(X)
+#define PG_GETARG_TSQUERYSIGN(n) DatumGetTSQuerySign(PG_GETARG_DATUM(n))
+
+
+extern QTNode *QT2QTN(QueryItem *in, char *operand);
+extern TSQuery QTN2QT(QTNode *in);
+extern void QTNFree(QTNode *in);
+extern void QTNSort(QTNode *in);
+extern void QTNTernary(QTNode *in);
+extern void QTNBinary(QTNode *in);
+extern int QTNodeCompare(QTNode *an, QTNode *bn);
+extern QTNode *QTNCopy(QTNode *in);
+extern void QTNClearFlags(QTNode *in, uint32 flags);
+extern bool QTNEq(QTNode *a, QTNode *b);
+extern TSQuerySign makeTSQuerySign(TSQuery a);
+extern QTNode *findsubquery(QTNode *root, QTNode *ex, QTNode *subs,
+ bool *isfind);
+
+#endif /* _PG_TS_UTILS_H_ */