summaryrefslogtreecommitdiffstats
path: root/src/include/tsearch
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-16 19:46:48 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-16 19:46:48 +0000
commit311bcfc6b3acdd6fd152798c7f287ddf74fa2a98 (patch)
tree0ec307299b1dada3701e42f4ca6eda57d708261e /src/include/tsearch
parentInitial commit. (diff)
downloadpostgresql-15-upstream.tar.xz
postgresql-15-upstream.zip
Adding upstream version 15.4.upstream/15.4upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/include/tsearch')
-rw-r--r--src/include/tsearch/dicts/regis.h49
-rw-r--r--src/include/tsearch/dicts/spell.h247
-rw-r--r--src/include/tsearch/ts_cache.h98
-rw-r--r--src/include/tsearch/ts_locale.h63
-rw-r--r--src/include/tsearch/ts_public.h159
-rw-r--r--src/include/tsearch/ts_type.h242
-rw-r--r--src/include/tsearch/ts_utils.h266
7 files changed, 1124 insertions, 0 deletions
diff --git a/src/include/tsearch/dicts/regis.h b/src/include/tsearch/dicts/regis.h
new file mode 100644
index 0000000..c7c3d9f
--- /dev/null
+++ b/src/include/tsearch/dicts/regis.h
@@ -0,0 +1,49 @@
+/*-------------------------------------------------------------------------
+ *
+ * regis.h
+ *
+ * Declarations for fast regex subset, used by ISpell
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ *
+ * src/include/tsearch/dicts/regis.h
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#ifndef __REGIS_H__
+#define __REGIS_H__
+
+typedef struct RegisNode
+{
+ uint32
+ type:2,
+ len:16,
+ unused:14;
+ struct RegisNode *next;
+ unsigned char data[FLEXIBLE_ARRAY_MEMBER];
+} RegisNode;
+
+#define RNHDRSZ (offsetof(RegisNode,data))
+
+#define RSF_ONEOF 1
+#define RSF_NONEOF 2
+
+typedef struct Regis
+{
+ RegisNode *node;
+ uint32
+ issuffix:1,
+ nchar:16,
+ unused:15;
+} Regis;
+
+extern bool RS_isRegis(const char *str);
+
+extern void RS_compile(Regis *r, bool issuffix, const char *str);
+extern void RS_free(Regis *r);
+
+/*returns true if matches */
+extern bool RS_execute(Regis *r, char *str);
+
+#endif
diff --git a/src/include/tsearch/dicts/spell.h b/src/include/tsearch/dicts/spell.h
new file mode 100644
index 0000000..978f43a
--- /dev/null
+++ b/src/include/tsearch/dicts/spell.h
@@ -0,0 +1,247 @@
+/*-------------------------------------------------------------------------
+ *
+ * spell.h
+ *
+ * Declarations for ISpell dictionary
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ *
+ * src/include/tsearch/dicts/spell.h
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#ifndef __SPELL_H__
+#define __SPELL_H__
+
+#include "regex/regex.h"
+#include "tsearch/dicts/regis.h"
+#include "tsearch/ts_public.h"
+
+/*
+ * SPNode and SPNodeData are used to represent prefix tree (Trie) to store
+ * a words list.
+ */
+struct SPNode;
+
+typedef struct
+{
+ uint32 val:8,
+ isword:1,
+ /* Stores compound flags listed below */
+ compoundflag:4,
+ /* Reference to an entry of the AffixData field */
+ affix:19;
+ struct SPNode *node;
+} SPNodeData;
+
+/*
+ * Names of FF_ are correlated with Hunspell options in affix file
+ * http://hunspell.sourceforge.net/
+ */
+#define FF_COMPOUNDONLY 0x01
+#define FF_COMPOUNDBEGIN 0x02
+#define FF_COMPOUNDMIDDLE 0x04
+#define FF_COMPOUNDLAST 0x08
+#define FF_COMPOUNDFLAG ( FF_COMPOUNDBEGIN | FF_COMPOUNDMIDDLE | \
+ FF_COMPOUNDLAST )
+#define FF_COMPOUNDFLAGMASK 0x0f
+
+typedef struct SPNode
+{
+ uint32 length;
+ SPNodeData data[FLEXIBLE_ARRAY_MEMBER];
+} SPNode;
+
+#define SPNHDRSZ (offsetof(SPNode,data))
+
+/*
+ * Represents an entry in a words list.
+ */
+typedef struct spell_struct
+{
+ union
+ {
+ /*
+ * flag is filled in by NIImportDictionary(). After
+ * NISortDictionary(), d is used instead of flag.
+ */
+ char *flag;
+ /* d is used in mkSPNode() */
+ struct
+ {
+ /* Reference to an entry of the AffixData field */
+ int affix;
+ /* Length of the word */
+ int len;
+ } d;
+ } p;
+ char word[FLEXIBLE_ARRAY_MEMBER];
+} SPELL;
+
+#define SPELLHDRSZ (offsetof(SPELL, word))
+
+/*
+ * If an affix uses a regex, we have to store that separately in a struct
+ * that won't move around when arrays of affixes are enlarged or sorted.
+ * This is so that it can be found to be cleaned up at context destruction.
+ */
+typedef struct aff_regex_struct
+{
+ regex_t regex;
+ MemoryContextCallback mcallback;
+} aff_regex_struct;
+
+/*
+ * Represents an entry in an affix list.
+ */
+typedef struct aff_struct
+{
+ char *flag;
+ /* FF_SUFFIX or FF_PREFIX */
+ uint32 type:1,
+ flagflags:7,
+ issimple:1,
+ isregis:1,
+ replen:14;
+ char *find;
+ char *repl;
+ union
+ {
+ aff_regex_struct *pregex;
+ Regis regis;
+ } reg;
+} AFFIX;
+
+/*
+ * affixes use dictionary flags too
+ */
+#define FF_COMPOUNDPERMITFLAG 0x10
+#define FF_COMPOUNDFORBIDFLAG 0x20
+#define FF_CROSSPRODUCT 0x40
+
+/*
+ * Don't change the order of these. Initialization sorts by these,
+ * and expects prefixes to come first after sorting.
+ */
+#define FF_SUFFIX 1
+#define FF_PREFIX 0
+
+/*
+ * AffixNode and AffixNodeData are used to represent prefix tree (Trie) to store
+ * an affix list.
+ */
+struct AffixNode;
+
+typedef struct
+{
+ uint32 val:8,
+ naff:24;
+ AFFIX **aff;
+ struct AffixNode *node;
+} AffixNodeData;
+
+typedef struct AffixNode
+{
+ uint32 isvoid:1,
+ length:31;
+ AffixNodeData data[FLEXIBLE_ARRAY_MEMBER];
+} AffixNode;
+
+#define ANHRDSZ (offsetof(AffixNode, data))
+
+typedef struct
+{
+ char *affix;
+ int len;
+ bool issuffix;
+} CMPDAffix;
+
+/*
+ * Type of encoding affix flags in Hunspell dictionaries
+ */
+typedef enum
+{
+ FM_CHAR, /* one character (like ispell) */
+ FM_LONG, /* two characters */
+ FM_NUM /* number, >= 0 and < 65536 */
+} FlagMode;
+
+/*
+ * Structure to store Hunspell options. Flag representation depends on flag
+ * type. These flags are about support of compound words.
+ */
+typedef struct CompoundAffixFlag
+{
+ union
+ {
+ /* Flag name if flagMode is FM_CHAR or FM_LONG */
+ char *s;
+ /* Flag name if flagMode is FM_NUM */
+ uint32 i;
+ } flag;
+ /* we don't have a bsearch_arg version, so, copy FlagMode */
+ FlagMode flagMode;
+ uint32 value;
+} CompoundAffixFlag;
+
+#define FLAGNUM_MAXSIZE (1 << 16)
+
+typedef struct
+{
+ int maffixes;
+ int naffixes;
+ AFFIX *Affix;
+
+ AffixNode *Suffix;
+ AffixNode *Prefix;
+
+ SPNode *Dictionary;
+ /* Array of sets of affixes */
+ char **AffixData;
+ int lenAffixData;
+ int nAffixData;
+ bool useFlagAliases;
+
+ CMPDAffix *CompoundAffix;
+
+ bool usecompound;
+ FlagMode flagMode;
+
+ /*
+ * All follow fields are actually needed only for initialization
+ */
+
+ /* Array of Hunspell options in affix file */
+ CompoundAffixFlag *CompoundAffixFlags;
+ /* number of entries in CompoundAffixFlags array */
+ int nCompoundAffixFlag;
+ /* allocated length of CompoundAffixFlags array */
+ int mCompoundAffixFlag;
+
+ /*
+ * Remaining fields are only used during dictionary construction; they are
+ * set up by NIStartBuild and cleared by NIFinishBuild.
+ */
+ MemoryContext buildCxt; /* temp context for construction */
+
+ /* Temporary array of all words in the dict file */
+ SPELL **Spell;
+ int nspell; /* number of valid entries in Spell array */
+ int mspell; /* allocated length of Spell array */
+
+ /* These are used to allocate "compact" data without palloc overhead */
+ char *firstfree; /* first free address (always maxaligned) */
+ size_t avail; /* free space remaining at firstfree */
+} IspellDict;
+
+extern TSLexeme *NINormalizeWord(IspellDict *Conf, char *word);
+
+extern void NIStartBuild(IspellDict *Conf);
+extern void NIImportAffixes(IspellDict *Conf, const char *filename);
+extern void NIImportDictionary(IspellDict *Conf, const char *filename);
+extern void NISortDictionary(IspellDict *Conf);
+extern void NISortAffixes(IspellDict *Conf);
+extern void NIFinishBuild(IspellDict *Conf);
+
+#endif
diff --git a/src/include/tsearch/ts_cache.h b/src/include/tsearch/ts_cache.h
new file mode 100644
index 0000000..5e4a49e
--- /dev/null
+++ b/src/include/tsearch/ts_cache.h
@@ -0,0 +1,98 @@
+/*-------------------------------------------------------------------------
+ *
+ * ts_cache.h
+ * Tsearch related object caches.
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/tsearch/ts_cache.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef TS_CACHE_H
+#define TS_CACHE_H
+
+#include "utils/guc.h"
+
+
+/*
+ * All TS*CacheEntry structs must share this common header
+ * (see InvalidateTSCacheCallBack)
+ */
+typedef struct TSAnyCacheEntry
+{
+ Oid objId;
+ bool isvalid;
+} TSAnyCacheEntry;
+
+
+typedef struct TSParserCacheEntry
+{
+ /* prsId is the hash lookup key and MUST BE FIRST */
+ Oid prsId; /* OID of the parser */
+ bool isvalid;
+
+ Oid startOid;
+ Oid tokenOid;
+ Oid endOid;
+ Oid headlineOid;
+ Oid lextypeOid;
+
+ /*
+ * Pre-set-up fmgr call of most needed parser's methods
+ */
+ FmgrInfo prsstart;
+ FmgrInfo prstoken;
+ FmgrInfo prsend;
+ FmgrInfo prsheadline;
+} TSParserCacheEntry;
+
+typedef struct TSDictionaryCacheEntry
+{
+ /* dictId is the hash lookup key and MUST BE FIRST */
+ Oid dictId;
+ bool isvalid;
+
+ /* most frequent fmgr call */
+ Oid lexizeOid;
+ FmgrInfo lexize;
+
+ MemoryContext dictCtx; /* memory context to store private data */
+ void *dictData;
+} TSDictionaryCacheEntry;
+
+typedef struct
+{
+ int len;
+ Oid *dictIds;
+} ListDictionary;
+
+typedef struct
+{
+ /* cfgId is the hash lookup key and MUST BE FIRST */
+ Oid cfgId;
+ bool isvalid;
+
+ Oid prsId;
+
+ int lenmap;
+ ListDictionary *map;
+} TSConfigCacheEntry;
+
+
+/*
+ * GUC variable for current configuration
+ */
+extern PGDLLIMPORT char *TSCurrentConfig;
+
+
+extern TSParserCacheEntry *lookup_ts_parser_cache(Oid prsId);
+extern TSDictionaryCacheEntry *lookup_ts_dictionary_cache(Oid dictId);
+extern TSConfigCacheEntry *lookup_ts_config_cache(Oid cfgId);
+
+extern Oid getTSCurrentConfig(bool emitError);
+extern bool check_TSCurrentConfig(char **newval, void **extra, GucSource source);
+extern void assign_TSCurrentConfig(const char *newval, void *extra);
+
+#endif /* TS_CACHE_H */
diff --git a/src/include/tsearch/ts_locale.h b/src/include/tsearch/ts_locale.h
new file mode 100644
index 0000000..7d7c4e1
--- /dev/null
+++ b/src/include/tsearch/ts_locale.h
@@ -0,0 +1,63 @@
+/*-------------------------------------------------------------------------
+ *
+ * ts_locale.h
+ * locale compatibility layer for tsearch
+ *
+ * Copyright (c) 1998-2022, PostgreSQL Global Development Group
+ *
+ * src/include/tsearch/ts_locale.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef __TSLOCALE_H__
+#define __TSLOCALE_H__
+
+#include <ctype.h>
+#include <limits.h>
+
+#include "lib/stringinfo.h"
+#include "mb/pg_wchar.h"
+#include "utils/pg_locale.h"
+
+/*
+ * towlower() and friends should be in <wctype.h>, but some pre-C99 systems
+ * declare them in <wchar.h>, so include that too.
+ */
+#include <wchar.h>
+#ifdef HAVE_WCTYPE_H
+#include <wctype.h>
+#endif
+
+/* working state for tsearch_readline (should be a local var in caller) */
+typedef struct
+{
+ FILE *fp;
+ const char *filename;
+ int lineno;
+ StringInfoData buf; /* current input line, in UTF-8 */
+ char *curline; /* current input line, in DB's encoding */
+ /* curline may be NULL, or equal to buf.data, or a palloc'd string */
+ ErrorContextCallback cb;
+} tsearch_readline_state;
+
+#define TOUCHAR(x) (*((const unsigned char *) (x)))
+
+/* The second argument of t_iseq() must be a plain ASCII character */
+#define t_iseq(x,c) (TOUCHAR(x) == (unsigned char) (c))
+
+#define COPYCHAR(d,s) memcpy(d, s, pg_mblen(s))
+
+extern int t_isdigit(const char *ptr);
+extern int t_isspace(const char *ptr);
+extern int t_isalpha(const char *ptr);
+extern int t_isprint(const char *ptr);
+
+extern char *lowerstr(const char *str);
+extern char *lowerstr_with_len(const char *str, int len);
+
+extern bool tsearch_readline_begin(tsearch_readline_state *stp,
+ const char *filename);
+extern char *tsearch_readline(tsearch_readline_state *stp);
+extern void tsearch_readline_end(tsearch_readline_state *stp);
+
+#endif /* __TSLOCALE_H__ */
diff --git a/src/include/tsearch/ts_public.h b/src/include/tsearch/ts_public.h
new file mode 100644
index 0000000..fe2a167
--- /dev/null
+++ b/src/include/tsearch/ts_public.h
@@ -0,0 +1,159 @@
+/*-------------------------------------------------------------------------
+ *
+ * ts_public.h
+ * Public interface to various tsearch modules, such as
+ * parsers and dictionaries.
+ *
+ * Copyright (c) 1998-2022, PostgreSQL Global Development Group
+ *
+ * src/include/tsearch/ts_public.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef _PG_TS_PUBLIC_H_
+#define _PG_TS_PUBLIC_H_
+
+#include "tsearch/ts_type.h"
+
+/*
+ * Parser's framework
+ */
+
+/*
+ * returning type for prslextype method of parser
+ */
+typedef struct
+{
+ int lexid;
+ char *alias;
+ char *descr;
+} LexDescr;
+
+/*
+ * Interface to headline generator (tsparser's prsheadline function)
+ *
+ * HeadlineParsedText describes the text that is to be highlighted.
+ * Some fields are passed from the core code to the prsheadline function,
+ * while others are output from the prsheadline function.
+ *
+ * The principal data is words[], an array of HeadlineWordEntry,
+ * one entry per token, of length curwords.
+ * The fields of HeadlineWordEntry are:
+ *
+ * in, selected, replace, skip: these flags are initially zero
+ * and may be set by the prsheadline function. A consecutive group
+ * of tokens marked "in" form a "fragment" to be output.
+ * Such tokens may additionally be marked selected, replace, or skip
+ * to modify how they are shown. (If you set more than one of those
+ * bits, you get an unspecified one of those behaviors.)
+ *
+ * type, len, pos, word: filled by core code to describe the token.
+ *
+ * item: if the token matches any operand of the tsquery of interest,
+ * a pointer to such an operand. (If there are multiple matching
+ * operands, we generate extra copies of the HeadlineWordEntry to hold
+ * all the pointers. The extras are marked with repeated = 1 and should
+ * be ignored except for checking the item pointer.)
+ */
+typedef struct
+{
+ uint32 selected:1, /* token is to be highlighted */
+ in:1, /* token is part of headline */
+ replace:1, /* token is to be replaced with a space */
+ repeated:1, /* duplicate entry to hold item pointer */
+ skip:1, /* token is to be skipped (not output) */
+ unused:3, /* available bits */
+ type:8, /* parser's token category */
+ len:16; /* length of token */
+ WordEntryPos pos; /* position of token */
+ char *word; /* text of token (not null-terminated) */
+ QueryOperand *item; /* a matching query operand, or NULL if none */
+} HeadlineWordEntry;
+
+typedef struct
+{
+ /* Fields filled by core code before calling prsheadline function: */
+ HeadlineWordEntry *words;
+ int32 lenwords; /* allocated length of words[] */
+ int32 curwords; /* current number of valid entries */
+ int32 vectorpos; /* used by ts_parse.c in filling pos fields */
+
+ /* The prsheadline function must fill these fields: */
+ /* Strings for marking selected tokens and separating fragments: */
+ char *startsel; /* palloc'd strings */
+ char *stopsel;
+ char *fragdelim;
+ int16 startsellen; /* lengths of strings */
+ int16 stopsellen;
+ int16 fragdelimlen;
+} HeadlineParsedText;
+
+/*
+ * Common useful things for tsearch subsystem
+ */
+extern char *get_tsearch_config_filename(const char *basename,
+ const char *extension);
+
+/*
+ * Often useful stopword list management
+ */
+typedef struct
+{
+ int len;
+ char **stop;
+} StopList;
+
+extern void readstoplist(const char *fname, StopList *s,
+ char *(*wordop) (const char *));
+extern bool searchstoplist(StopList *s, char *key);
+
+/*
+ * Interface with dictionaries
+ */
+
+/* return struct for any lexize function */
+typedef struct
+{
+ /*----------
+ * Number of current variant of split word. For example the Norwegian
+ * word 'fotballklubber' has two variants to split: ( fotball, klubb )
+ * and ( fot, ball, klubb ). So, dictionary should return:
+ *
+ * nvariant lexeme
+ * 1 fotball
+ * 1 klubb
+ * 2 fot
+ * 2 ball
+ * 2 klubb
+ *
+ * In general, a TSLexeme will be considered to belong to the same split
+ * variant as the previous one if they have the same nvariant value.
+ * The exact values don't matter, only changes from one lexeme to next.
+ *----------
+ */
+ uint16 nvariant;
+
+ uint16 flags; /* See flag bits below */
+
+ char *lexeme; /* C string */
+} TSLexeme;
+
+/* Flag bits that can appear in TSLexeme.flags */
+#define TSL_ADDPOS 0x01
+#define TSL_PREFIX 0x02
+#define TSL_FILTER 0x04
+
+/*
+ * Struct for supporting complex dictionaries like thesaurus.
+ * 4th argument for dictlexize method is a pointer to this
+ */
+typedef struct
+{
+ bool isend; /* in: marks for lexize_info about text end is
+ * reached */
+ bool getnext; /* out: dict wants next lexeme */
+ void *private_state; /* internal dict state between calls with
+ * getnext == true */
+} DictSubState;
+
+#endif /* _PG_TS_PUBLIC_H_ */
diff --git a/src/include/tsearch/ts_type.h b/src/include/tsearch/ts_type.h
new file mode 100644
index 0000000..689b2d1
--- /dev/null
+++ b/src/include/tsearch/ts_type.h
@@ -0,0 +1,242 @@
+/*-------------------------------------------------------------------------
+ *
+ * ts_type.h
+ * Definitions for the tsvector and tsquery types
+ *
+ * Copyright (c) 1998-2022, PostgreSQL Global Development Group
+ *
+ * src/include/tsearch/ts_type.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef _PG_TSTYPE_H_
+#define _PG_TSTYPE_H_
+
+#include "fmgr.h"
+#include "utils/memutils.h"
+
+
+/*
+ * TSVector type.
+ *
+ * Structure of tsvector datatype:
+ * 1) standard varlena header
+ * 2) int32 size - number of lexemes (WordEntry array entries)
+ * 3) Array of WordEntry - one per lexeme; must be sorted according to
+ * tsCompareString() (ie, memcmp of lexeme strings).
+ * WordEntry->pos gives the number of bytes from end of WordEntry
+ * array to start of lexeme's string, which is of length len.
+ * 4) Per-lexeme data storage:
+ * lexeme string (not null-terminated)
+ * if haspos is true:
+ * padding byte if necessary to make the position data 2-byte aligned
+ * uint16 number of positions that follow
+ * WordEntryPos[] positions
+ *
+ * The positions for each lexeme must be sorted.
+ *
+ * Note, tsvectorsend/recv believe that sizeof(WordEntry) == 4
+ */
+
+typedef struct
+{
+ uint32
+ haspos:1,
+ len:11, /* MAX 2Kb */
+ pos:20; /* MAX 1Mb */
+} WordEntry;
+
+#define MAXSTRLEN ( (1<<11) - 1)
+#define MAXSTRPOS ( (1<<20) - 1)
+
+extern int compareWordEntryPos(const void *a, const void *b);
+
+/*
+ * Equivalent to
+ * typedef struct {
+ * uint16
+ * weight:2,
+ * pos:14;
+ * }
+ */
+
+typedef uint16 WordEntryPos;
+
+typedef struct
+{
+ uint16 npos;
+ WordEntryPos pos[FLEXIBLE_ARRAY_MEMBER];
+} WordEntryPosVector;
+
+/* WordEntryPosVector with exactly 1 entry */
+typedef struct
+{
+ uint16 npos;
+ WordEntryPos pos[1];
+} WordEntryPosVector1;
+
+
+#define WEP_GETWEIGHT(x) ( (x) >> 14 )
+#define WEP_GETPOS(x) ( (x) & 0x3fff )
+
+#define WEP_SETWEIGHT(x,v) ( (x) = ( (v) << 14 ) | ( (x) & 0x3fff ) )
+#define WEP_SETPOS(x,v) ( (x) = ( (x) & 0xc000 ) | ( (v) & 0x3fff ) )
+
+#define MAXENTRYPOS (1<<14)
+#define MAXNUMPOS (256)
+#define LIMITPOS(x) ( ( (x) >= MAXENTRYPOS ) ? (MAXENTRYPOS-1) : (x) )
+
+/* This struct represents a complete tsvector datum */
+typedef struct
+{
+ int32 vl_len_; /* varlena header (do not touch directly!) */
+ int32 size;
+ WordEntry entries[FLEXIBLE_ARRAY_MEMBER];
+ /* lexemes follow the entries[] array */
+} TSVectorData;
+
+typedef TSVectorData *TSVector;
+
+#define DATAHDRSIZE (offsetof(TSVectorData, entries))
+#define CALCDATASIZE(nentries, lenstr) (DATAHDRSIZE + (nentries) * sizeof(WordEntry) + (lenstr) )
+
+/* pointer to start of a tsvector's WordEntry array */
+#define ARRPTR(x) ( (x)->entries )
+
+/* pointer to start of a tsvector's lexeme storage */
+#define STRPTR(x) ( (char *) &(x)->entries[(x)->size] )
+
+#define _POSVECPTR(x, e) ((WordEntryPosVector *)(STRPTR(x) + SHORTALIGN((e)->pos + (e)->len)))
+#define POSDATALEN(x,e) ( ( (e)->haspos ) ? (_POSVECPTR(x,e)->npos) : 0 )
+#define POSDATAPTR(x,e) (_POSVECPTR(x,e)->pos)
+
+/*
+ * fmgr interface macros
+ */
+
+#define DatumGetTSVector(X) ((TSVector) PG_DETOAST_DATUM(X))
+#define DatumGetTSVectorCopy(X) ((TSVector) PG_DETOAST_DATUM_COPY(X))
+#define TSVectorGetDatum(X) PointerGetDatum(X)
+#define PG_GETARG_TSVECTOR(n) DatumGetTSVector(PG_GETARG_DATUM(n))
+#define PG_GETARG_TSVECTOR_COPY(n) DatumGetTSVectorCopy(PG_GETARG_DATUM(n))
+#define PG_RETURN_TSVECTOR(x) return TSVectorGetDatum(x)
+
+
+/*
+ * TSQuery
+ *
+ *
+ */
+
+typedef int8 QueryItemType;
+
+/* Valid values for QueryItemType: */
+#define QI_VAL 1
+#define QI_OPR 2
+#define QI_VALSTOP 3 /* This is only used in an intermediate stack
+ * representation in parse_tsquery. It's not a
+ * legal type elsewhere. */
+
+/*
+ * QueryItem is one node in tsquery - operator or operand.
+ */
+typedef struct
+{
+ QueryItemType type; /* operand or kind of operator (ts_tokentype) */
+ uint8 weight; /* weights of operand to search. It's a
+ * bitmask of allowed weights. if it =0 then
+ * any weight are allowed. Weights and bit
+ * map: A: 1<<3 B: 1<<2 C: 1<<1 D: 1<<0 */
+ bool prefix; /* true if it's a prefix search */
+ int32 valcrc; /* XXX: pg_crc32 would be a more appropriate
+ * data type, but we use comparisons to signed
+ * integers in the code. They would need to be
+ * changed as well. */
+
+ /* pointer to text value of operand, must correlate with WordEntry */
+ uint32
+ length:12,
+ distance:20;
+} QueryOperand;
+
+
+/*
+ * Legal values for QueryOperator.operator.
+ */
+#define OP_NOT 1
+#define OP_AND 2
+#define OP_OR 3
+#define OP_PHRASE 4 /* highest code, tsquery_cleanup.c */
+#define OP_COUNT 4
+
+extern PGDLLIMPORT const int tsearch_op_priority[OP_COUNT];
+
+/* get operation priority by its code */
+#define OP_PRIORITY(x) ( tsearch_op_priority[(x) - 1] )
+/* get QueryOperator priority */
+#define QO_PRIORITY(x) OP_PRIORITY(((QueryOperator *) (x))->oper)
+
+typedef struct
+{
+ QueryItemType type;
+ int8 oper; /* see above */
+ int16 distance; /* distance between agrs for OP_PHRASE */
+ uint32 left; /* pointer to left operand. Right operand is
+ * item + 1, left operand is placed
+ * item+item->left */
+} QueryOperator;
+
+/*
+ * Note: TSQuery is 4-bytes aligned, so make sure there's no fields
+ * inside QueryItem requiring 8-byte alignment, like int64.
+ */
+typedef union
+{
+ QueryItemType type;
+ QueryOperator qoperator;
+ QueryOperand qoperand;
+} QueryItem;
+
+/*
+ * Storage:
+ * (len)(size)(array of QueryItem)(operands as '\0'-terminated c-strings)
+ */
+
+typedef struct
+{
+ int32 vl_len_; /* varlena header (do not touch directly!) */
+ int32 size; /* number of QueryItems */
+ char data[FLEXIBLE_ARRAY_MEMBER]; /* data starts here */
+} TSQueryData;
+
+typedef TSQueryData *TSQuery;
+
+#define HDRSIZETQ ( VARHDRSZ + sizeof(int32) )
+
+/* Computes the size of header and all QueryItems. size is the number of
+ * QueryItems, and lenofoperand is the total length of all operands
+ */
+#define COMPUTESIZE(size, lenofoperand) ( HDRSIZETQ + (size) * sizeof(QueryItem) + (lenofoperand) )
+#define TSQUERY_TOO_BIG(size, lenofoperand) \
+ ((size) > (MaxAllocSize - HDRSIZETQ - (lenofoperand)) / sizeof(QueryItem))
+
+/* Returns a pointer to the first QueryItem in a TSQuery */
+#define GETQUERY(x) ((QueryItem*)( (char*)(x)+HDRSIZETQ ))
+
+/* Returns a pointer to the beginning of operands in a TSQuery */
+#define GETOPERAND(x) ( (char*)GETQUERY(x) + ((TSQuery)(x))->size * sizeof(QueryItem) )
+
+/*
+ * fmgr interface macros
+ * Note, TSQuery type marked as plain storage, so it can't be toasted
+ * but PG_DETOAST_DATUM_COPY is used for simplicity
+ */
+
+#define DatumGetTSQuery(X) ((TSQuery) DatumGetPointer(X))
+#define DatumGetTSQueryCopy(X) ((TSQuery) PG_DETOAST_DATUM_COPY(X))
+#define TSQueryGetDatum(X) PointerGetDatum(X)
+#define PG_GETARG_TSQUERY(n) DatumGetTSQuery(PG_GETARG_DATUM(n))
+#define PG_GETARG_TSQUERY_COPY(n) DatumGetTSQueryCopy(PG_GETARG_DATUM(n))
+#define PG_RETURN_TSQUERY(x) return TSQueryGetDatum(x)
+
+#endif /* _PG_TSTYPE_H_ */
diff --git a/src/include/tsearch/ts_utils.h b/src/include/tsearch/ts_utils.h
new file mode 100644
index 0000000..c36c711
--- /dev/null
+++ b/src/include/tsearch/ts_utils.h
@@ -0,0 +1,266 @@
+/*-------------------------------------------------------------------------
+ *
+ * ts_utils.h
+ * helper utilities for tsearch
+ *
+ * Copyright (c) 1998-2022, PostgreSQL Global Development Group
+ *
+ * src/include/tsearch/ts_utils.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef _PG_TS_UTILS_H_
+#define _PG_TS_UTILS_H_
+
+#include "nodes/pg_list.h"
+#include "tsearch/ts_public.h"
+#include "tsearch/ts_type.h"
+
+/*
+ * Common parse definitions for tsvector and tsquery
+ */
+
+/* tsvector parser support. */
+
+struct TSVectorParseStateData; /* opaque struct in tsvector_parser.c */
+typedef struct TSVectorParseStateData *TSVectorParseState;
+
+#define P_TSV_OPR_IS_DELIM (1 << 0)
+#define P_TSV_IS_TSQUERY (1 << 1)
+#define P_TSV_IS_WEB (1 << 2)
+
+extern TSVectorParseState init_tsvector_parser(char *input, int flags);
+extern void reset_tsvector_parser(TSVectorParseState state, char *input);
+extern bool gettoken_tsvector(TSVectorParseState state,
+ char **token, int *len,
+ WordEntryPos **pos, int *poslen,
+ char **endptr);
+extern void close_tsvector_parser(TSVectorParseState state);
+
+/* phrase operator begins with '<' */
+#define ISOPERATOR(x) \
+ ( pg_mblen(x) == 1 && ( *(x) == '!' || \
+ *(x) == '&' || \
+ *(x) == '|' || \
+ *(x) == '(' || \
+ *(x) == ')' || \
+ *(x) == '<' \
+ ) )
+
+/* parse_tsquery */
+
+struct TSQueryParserStateData; /* private in backend/utils/adt/tsquery.c */
+typedef struct TSQueryParserStateData *TSQueryParserState;
+
+typedef void (*PushFunction) (Datum opaque, TSQueryParserState state,
+ char *token, int tokenlen,
+ int16 tokenweights, /* bitmap as described in
+ * QueryOperand struct */
+ bool prefix);
+
+#define P_TSQ_PLAIN (1 << 0)
+#define P_TSQ_WEB (1 << 1)
+
+extern TSQuery parse_tsquery(char *buf,
+ PushFunction pushval,
+ Datum opaque,
+ int flags);
+
+/* Functions for use by PushFunction implementations */
+extern void pushValue(TSQueryParserState state,
+ char *strval, int lenval, int16 weight, bool prefix);
+extern void pushStop(TSQueryParserState state);
+extern void pushOperator(TSQueryParserState state, int8 oper, int16 distance);
+
+/*
+ * parse plain text and lexize words
+ */
+typedef struct
+{
+ uint16 len;
+ uint16 nvariant;
+ union
+ {
+ uint16 pos;
+
+ /*
+ * When apos array is used, apos[0] is the number of elements in the
+ * array (excluding apos[0]), and alen is the allocated size of the
+ * array.
+ */
+ uint16 *apos;
+ } pos;
+ uint16 flags; /* currently, only TSL_PREFIX */
+ char *word;
+ uint32 alen;
+} ParsedWord;
+
+typedef struct
+{
+ ParsedWord *words;
+ int32 lenwords;
+ int32 curwords;
+ int32 pos;
+} ParsedText;
+
+extern void parsetext(Oid cfgId, ParsedText *prs, char *buf, int32 buflen);
+
+/*
+ * headline framework, flow in common to generate:
+ * 1 parse text with hlparsetext
+ * 2 parser-specific function to find part
+ * 3 generateHeadline to generate result text
+ */
+
+extern void hlparsetext(Oid cfgId, HeadlineParsedText *prs, TSQuery query,
+ char *buf, int32 buflen);
+extern text *generateHeadline(HeadlineParsedText *prs);
+
+/*
+ * TSQuery execution support
+ *
+ * TS_execute() executes a tsquery against data that can be represented in
+ * various forms. The TSExecuteCallback callback function is called to check
+ * whether a given primitive tsquery value is matched in the data.
+ */
+
+/* TS_execute requires ternary logic to handle NOT with phrase matches */
+typedef enum
+{
+ TS_NO, /* definitely no match */
+ TS_YES, /* definitely does match */
+ TS_MAYBE /* can't verify match for lack of pos data */
+} TSTernaryValue;
+
+/*
+ * struct ExecPhraseData is passed to a TSExecuteCallback function if we need
+ * lexeme position data (because of a phrase-match operator in the tsquery).
+ * The callback should fill in position data when it returns TS_YES (success).
+ * If it cannot return position data, it should leave "data" unchanged and
+ * return TS_MAYBE. The caller of TS_execute() must then arrange for a later
+ * recheck with position data available.
+ *
+ * The reported lexeme positions must be sorted and unique. Callers must only
+ * consult the position bits of the pos array, ie, WEP_GETPOS(data->pos[i]).
+ * This allows the returned "pos" to point directly to the WordEntryPos
+ * portion of a tsvector value. If "allocated" is true then the pos array
+ * is palloc'd workspace and caller may free it when done.
+ *
+ * "negate" means that the pos array contains positions where the query does
+ * not match, rather than positions where it does. "width" is positive when
+ * the match is wider than one lexeme. Neither of these fields normally need
+ * to be touched by TSExecuteCallback functions; they are used for
+ * phrase-search processing within TS_execute.
+ *
+ * All fields of the ExecPhraseData struct are initially zeroed by caller.
+ */
+typedef struct ExecPhraseData
+{
+ int npos; /* number of positions reported */
+ bool allocated; /* pos points to palloc'd data? */
+ bool negate; /* positions are where query is NOT matched */
+ WordEntryPos *pos; /* ordered, non-duplicate lexeme positions */
+ int width; /* width of match in lexemes, less 1 */
+} ExecPhraseData;
+
+/*
+ * Signature for TSQuery lexeme check functions
+ *
+ * arg: opaque value passed through from caller of TS_execute
+ * val: lexeme to test for presence of
+ * data: to be filled with lexeme positions; NULL if position data not needed
+ *
+ * Return TS_YES if lexeme is present in data, TS_MAYBE if it might be
+ * present, TS_NO if it definitely is not present. If data is not NULL,
+ * it must be filled with lexeme positions if available. If position data
+ * is not available, leave *data as zeroes and return TS_MAYBE, never TS_YES.
+ */
+typedef TSTernaryValue (*TSExecuteCallback) (void *arg, QueryOperand *val,
+ ExecPhraseData *data);
+
+/*
+ * Flag bits for TS_execute
+ */
+#define TS_EXEC_EMPTY (0x00)
+/*
+ * If TS_EXEC_SKIP_NOT is set, then NOT sub-expressions are automatically
+ * evaluated to be true. This was formerly the default behavior. It's now
+ * deprecated because it tends to give silly answers, but some applications
+ * might still have a use for it.
+ */
+#define TS_EXEC_SKIP_NOT (0x01)
+/*
+ * If TS_EXEC_PHRASE_NO_POS is set, allow OP_PHRASE to be executed lossily
+ * in the absence of position information: a true result indicates that the
+ * phrase might be present. Without this flag, OP_PHRASE always returns
+ * false if lexeme position information is not available.
+ */
+#define TS_EXEC_PHRASE_NO_POS (0x02)
+
+extern bool TS_execute(QueryItem *curitem, void *arg, uint32 flags,
+ TSExecuteCallback chkcond);
+extern TSTernaryValue TS_execute_ternary(QueryItem *curitem, void *arg,
+ uint32 flags,
+ TSExecuteCallback chkcond);
+extern bool tsquery_requires_match(QueryItem *curitem);
+
+/*
+ * to_ts* - text transformation to tsvector, tsquery
+ */
+extern TSVector make_tsvector(ParsedText *prs);
+extern int32 tsCompareString(char *a, int lena, char *b, int lenb, bool prefix);
+
+/*
+ * Possible strategy numbers for indexes
+ * TSearchStrategyNumber - (tsvector|text) @@ tsquery
+ * TSearchWithClassStrategyNumber - tsvector @@@ tsquery
+ */
+#define TSearchStrategyNumber 1
+#define TSearchWithClassStrategyNumber 2
+
+/*
+ * TSQuery Utilities
+ */
+extern QueryItem *clean_NOT(QueryItem *ptr, int32 *len);
+extern TSQuery cleanup_tsquery_stopwords(TSQuery in);
+
+typedef struct QTNode
+{
+ QueryItem *valnode;
+ uint32 flags;
+ int32 nchild;
+ char *word;
+ uint32 sign;
+ struct QTNode **child;
+} QTNode;
+
+/* bits in QTNode.flags */
+#define QTN_NEEDFREE 0x01
+#define QTN_NOCHANGE 0x02
+#define QTN_WORDFREE 0x04
+
+typedef uint64 TSQuerySign;
+
+#define TSQS_SIGLEN (sizeof(TSQuerySign)*BITS_PER_BYTE)
+
+#define TSQuerySignGetDatum(X) Int64GetDatum((int64) (X))
+#define DatumGetTSQuerySign(X) ((TSQuerySign) DatumGetInt64(X))
+#define PG_RETURN_TSQUERYSIGN(X) return TSQuerySignGetDatum(X)
+#define PG_GETARG_TSQUERYSIGN(n) DatumGetTSQuerySign(PG_GETARG_DATUM(n))
+
+
+extern QTNode *QT2QTN(QueryItem *in, char *operand);
+extern TSQuery QTN2QT(QTNode *in);
+extern void QTNFree(QTNode *in);
+extern void QTNSort(QTNode *in);
+extern void QTNTernary(QTNode *in);
+extern void QTNBinary(QTNode *in);
+extern int QTNodeCompare(QTNode *an, QTNode *bn);
+extern QTNode *QTNCopy(QTNode *in);
+extern void QTNClearFlags(QTNode *in, uint32 flags);
+extern bool QTNEq(QTNode *a, QTNode *b);
+extern TSQuerySign makeTSQuerySign(TSQuery a);
+extern QTNode *findsubquery(QTNode *root, QTNode *ex, QTNode *subs,
+ bool *isfind);
+
+#endif /* _PG_TS_UTILS_H_ */