summaryrefslogtreecommitdiffstats
path: root/src/include/tsearch/ts_public.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/include/tsearch/ts_public.h')
-rw-r--r--src/include/tsearch/ts_public.h159
1 files changed, 159 insertions, 0 deletions
diff --git a/src/include/tsearch/ts_public.h b/src/include/tsearch/ts_public.h
new file mode 100644
index 0000000..fe2a167
--- /dev/null
+++ b/src/include/tsearch/ts_public.h
@@ -0,0 +1,159 @@
+/*-------------------------------------------------------------------------
+ *
+ * ts_public.h
+ * Public interface to various tsearch modules, such as
+ * parsers and dictionaries.
+ *
+ * Copyright (c) 1998-2022, PostgreSQL Global Development Group
+ *
+ * src/include/tsearch/ts_public.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef _PG_TS_PUBLIC_H_
+#define _PG_TS_PUBLIC_H_
+
+#include "tsearch/ts_type.h"
+
+/*
+ * Parser's framework
+ */
+
+/*
+ * returning type for prslextype method of parser
+ */
+typedef struct
+{
+ int lexid;
+ char *alias;
+ char *descr;
+} LexDescr;
+
+/*
+ * Interface to headline generator (tsparser's prsheadline function)
+ *
+ * HeadlineParsedText describes the text that is to be highlighted.
+ * Some fields are passed from the core code to the prsheadline function,
+ * while others are output from the prsheadline function.
+ *
+ * The principal data is words[], an array of HeadlineWordEntry,
+ * one entry per token, of length curwords.
+ * The fields of HeadlineWordEntry are:
+ *
+ * in, selected, replace, skip: these flags are initially zero
+ * and may be set by the prsheadline function. A consecutive group
+ * of tokens marked "in" form a "fragment" to be output.
+ * Such tokens may additionally be marked selected, replace, or skip
+ * to modify how they are shown. (If you set more than one of those
+ * bits, you get an unspecified one of those behaviors.)
+ *
+ * type, len, pos, word: filled by core code to describe the token.
+ *
+ * item: if the token matches any operand of the tsquery of interest,
+ * a pointer to such an operand. (If there are multiple matching
+ * operands, we generate extra copies of the HeadlineWordEntry to hold
+ * all the pointers. The extras are marked with repeated = 1 and should
+ * be ignored except for checking the item pointer.)
+ */
+typedef struct
+{
+ uint32 selected:1, /* token is to be highlighted */
+ in:1, /* token is part of headline */
+ replace:1, /* token is to be replaced with a space */
+ repeated:1, /* duplicate entry to hold item pointer */
+ skip:1, /* token is to be skipped (not output) */
+ unused:3, /* available bits */
+ type:8, /* parser's token category */
+ len:16; /* length of token */
+ WordEntryPos pos; /* position of token */
+ char *word; /* text of token (not null-terminated) */
+ QueryOperand *item; /* a matching query operand, or NULL if none */
+} HeadlineWordEntry;
+
+typedef struct
+{
+ /* Fields filled by core code before calling prsheadline function: */
+ HeadlineWordEntry *words;
+ int32 lenwords; /* allocated length of words[] */
+ int32 curwords; /* current number of valid entries */
+ int32 vectorpos; /* used by ts_parse.c in filling pos fields */
+
+ /* The prsheadline function must fill these fields: */
+ /* Strings for marking selected tokens and separating fragments: */
+ char *startsel; /* palloc'd strings */
+ char *stopsel;
+ char *fragdelim;
+ int16 startsellen; /* lengths of strings */
+ int16 stopsellen;
+ int16 fragdelimlen;
+} HeadlineParsedText;
+
+/*
+ * Common useful things for tsearch subsystem
+ */
+extern char *get_tsearch_config_filename(const char *basename,
+ const char *extension);
+
+/*
+ * Often useful stopword list management
+ */
+typedef struct
+{
+ int len;
+ char **stop;
+} StopList;
+
+extern void readstoplist(const char *fname, StopList *s,
+ char *(*wordop) (const char *));
+extern bool searchstoplist(StopList *s, char *key);
+
+/*
+ * Interface with dictionaries
+ */
+
+/* return struct for any lexize function */
+typedef struct
+{
+ /*----------
+ * Number of current variant of split word. For example the Norwegian
+ * word 'fotballklubber' has two variants to split: ( fotball, klubb )
+ * and ( fot, ball, klubb ). So, dictionary should return:
+ *
+ * nvariant lexeme
+ * 1 fotball
+ * 1 klubb
+ * 2 fot
+ * 2 ball
+ * 2 klubb
+ *
+ * In general, a TSLexeme will be considered to belong to the same split
+ * variant as the previous one if they have the same nvariant value.
+ * The exact values don't matter, only changes from one lexeme to next.
+ *----------
+ */
+ uint16 nvariant;
+
+ uint16 flags; /* See flag bits below */
+
+ char *lexeme; /* C string */
+} TSLexeme;
+
+/* Flag bits that can appear in TSLexeme.flags */
+#define TSL_ADDPOS 0x01
+#define TSL_PREFIX 0x02
+#define TSL_FILTER 0x04
+
+/*
+ * Struct for supporting complex dictionaries like thesaurus.
+ * 4th argument for dictlexize method is a pointer to this
+ */
+typedef struct
+{
+ bool isend; /* in: marks for lexize_info about text end is
+ * reached */
+ bool getnext; /* out: dict wants next lexeme */
+ void *private_state; /* internal dict state between calls with
+ * getnext == true */
+} DictSubState;
+
+#endif /* _PG_TS_PUBLIC_H_ */