diff options
Diffstat (limited to 'src/include/tsearch/ts_public.h')
-rw-r--r-- | src/include/tsearch/ts_public.h | 159 |
1 files changed, 159 insertions, 0 deletions
diff --git a/src/include/tsearch/ts_public.h b/src/include/tsearch/ts_public.h new file mode 100644 index 0000000..fe2a167 --- /dev/null +++ b/src/include/tsearch/ts_public.h @@ -0,0 +1,159 @@ +/*------------------------------------------------------------------------- + * + * ts_public.h + * Public interface to various tsearch modules, such as + * parsers and dictionaries. + * + * Copyright (c) 1998-2022, PostgreSQL Global Development Group + * + * src/include/tsearch/ts_public.h + * + *------------------------------------------------------------------------- + */ +#ifndef _PG_TS_PUBLIC_H_ +#define _PG_TS_PUBLIC_H_ + +#include "tsearch/ts_type.h" + +/* + * Parser's framework + */ + +/* + * returning type for prslextype method of parser + */ +typedef struct +{ + int lexid; + char *alias; + char *descr; +} LexDescr; + +/* + * Interface to headline generator (tsparser's prsheadline function) + * + * HeadlineParsedText describes the text that is to be highlighted. + * Some fields are passed from the core code to the prsheadline function, + * while others are output from the prsheadline function. + * + * The principal data is words[], an array of HeadlineWordEntry, + * one entry per token, of length curwords. + * The fields of HeadlineWordEntry are: + * + * in, selected, replace, skip: these flags are initially zero + * and may be set by the prsheadline function. A consecutive group + * of tokens marked "in" form a "fragment" to be output. + * Such tokens may additionally be marked selected, replace, or skip + * to modify how they are shown. (If you set more than one of those + * bits, you get an unspecified one of those behaviors.) + * + * type, len, pos, word: filled by core code to describe the token. + * + * item: if the token matches any operand of the tsquery of interest, + * a pointer to such an operand. (If there are multiple matching + * operands, we generate extra copies of the HeadlineWordEntry to hold + * all the pointers. The extras are marked with repeated = 1 and should + * be ignored except for checking the item pointer.) + */ +typedef struct +{ + uint32 selected:1, /* token is to be highlighted */ + in:1, /* token is part of headline */ + replace:1, /* token is to be replaced with a space */ + repeated:1, /* duplicate entry to hold item pointer */ + skip:1, /* token is to be skipped (not output) */ + unused:3, /* available bits */ + type:8, /* parser's token category */ + len:16; /* length of token */ + WordEntryPos pos; /* position of token */ + char *word; /* text of token (not null-terminated) */ + QueryOperand *item; /* a matching query operand, or NULL if none */ +} HeadlineWordEntry; + +typedef struct +{ + /* Fields filled by core code before calling prsheadline function: */ + HeadlineWordEntry *words; + int32 lenwords; /* allocated length of words[] */ + int32 curwords; /* current number of valid entries */ + int32 vectorpos; /* used by ts_parse.c in filling pos fields */ + + /* The prsheadline function must fill these fields: */ + /* Strings for marking selected tokens and separating fragments: */ + char *startsel; /* palloc'd strings */ + char *stopsel; + char *fragdelim; + int16 startsellen; /* lengths of strings */ + int16 stopsellen; + int16 fragdelimlen; +} HeadlineParsedText; + +/* + * Common useful things for tsearch subsystem + */ +extern char *get_tsearch_config_filename(const char *basename, + const char *extension); + +/* + * Often useful stopword list management + */ +typedef struct +{ + int len; + char **stop; +} StopList; + +extern void readstoplist(const char *fname, StopList *s, + char *(*wordop) (const char *)); +extern bool searchstoplist(StopList *s, char *key); + +/* + * Interface with dictionaries + */ + +/* return struct for any lexize function */ +typedef struct +{ + /*---------- + * Number of current variant of split word. For example the Norwegian + * word 'fotballklubber' has two variants to split: ( fotball, klubb ) + * and ( fot, ball, klubb ). So, dictionary should return: + * + * nvariant lexeme + * 1 fotball + * 1 klubb + * 2 fot + * 2 ball + * 2 klubb + * + * In general, a TSLexeme will be considered to belong to the same split + * variant as the previous one if they have the same nvariant value. + * The exact values don't matter, only changes from one lexeme to next. + *---------- + */ + uint16 nvariant; + + uint16 flags; /* See flag bits below */ + + char *lexeme; /* C string */ +} TSLexeme; + +/* Flag bits that can appear in TSLexeme.flags */ +#define TSL_ADDPOS 0x01 +#define TSL_PREFIX 0x02 +#define TSL_FILTER 0x04 + +/* + * Struct for supporting complex dictionaries like thesaurus. + * 4th argument for dictlexize method is a pointer to this + */ +typedef struct +{ + bool isend; /* in: marks for lexize_info about text end is + * reached */ + bool getnext; /* out: dict wants next lexeme */ + void *private_state; /* internal dict state between calls with + * getnext == true */ +} DictSubState; + +#endif /* _PG_TS_PUBLIC_H_ */ |