/*------------------------------------------------------------------------- * * spell.h * * Declarations for ISpell dictionary * * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group * * src/include/tsearch/dicts/spell.h * *------------------------------------------------------------------------- */ #ifndef __SPELL_H__ #define __SPELL_H__ #include "regex/regex.h" #include "tsearch/dicts/regis.h" #include "tsearch/ts_public.h" /* * SPNode and SPNodeData are used to represent prefix tree (Trie) to store * a words list. */ struct SPNode; typedef struct { uint32 val:8, isword:1, /* Stores compound flags listed below */ compoundflag:4, /* Reference to an entry of the AffixData field */ affix:19; struct SPNode *node; } SPNodeData; /* * Names of FF_ are correlated with Hunspell options in affix file * http://hunspell.sourceforge.net/ */ #define FF_COMPOUNDONLY 0x01 #define FF_COMPOUNDBEGIN 0x02 #define FF_COMPOUNDMIDDLE 0x04 #define FF_COMPOUNDLAST 0x08 #define FF_COMPOUNDFLAG ( FF_COMPOUNDBEGIN | FF_COMPOUNDMIDDLE | \ FF_COMPOUNDLAST ) #define FF_COMPOUNDFLAGMASK 0x0f typedef struct SPNode { uint32 length; SPNodeData data[FLEXIBLE_ARRAY_MEMBER]; } SPNode; #define SPNHDRSZ (offsetof(SPNode,data)) /* * Represents an entry in a words list. */ typedef struct spell_struct { union { /* * flag is filled in by NIImportDictionary(). After * NISortDictionary(), d is used instead of flag. */ char *flag; /* d is used in mkSPNode() */ struct { /* Reference to an entry of the AffixData field */ int affix; /* Length of the word */ int len; } d; } p; char word[FLEXIBLE_ARRAY_MEMBER]; } SPELL; #define SPELLHDRSZ (offsetof(SPELL, word)) /* * If an affix uses a regex, we have to store that separately in a struct * that won't move around when arrays of affixes are enlarged or sorted. * This is so that it can be found to be cleaned up at context destruction. */ typedef struct aff_regex_struct { regex_t regex; MemoryContextCallback mcallback; } aff_regex_struct; /* * Represents an entry in an affix list. */ typedef struct aff_struct { char *flag; /* FF_SUFFIX or FF_PREFIX */ uint32 type:1, flagflags:7, issimple:1, isregis:1, replen:14; char *find; char *repl; union { aff_regex_struct *pregex; Regis regis; } reg; } AFFIX; /* * affixes use dictionary flags too */ #define FF_COMPOUNDPERMITFLAG 0x10 #define FF_COMPOUNDFORBIDFLAG 0x20 #define FF_CROSSPRODUCT 0x40 /* * Don't change the order of these. Initialization sorts by these, * and expects prefixes to come first after sorting. */ #define FF_SUFFIX 1 #define FF_PREFIX 0 /* * AffixNode and AffixNodeData are used to represent prefix tree (Trie) to store * an affix list. */ struct AffixNode; typedef struct { uint32 val:8, naff:24; AFFIX **aff; struct AffixNode *node; } AffixNodeData; typedef struct AffixNode { uint32 isvoid:1, length:31; AffixNodeData data[FLEXIBLE_ARRAY_MEMBER]; } AffixNode; #define ANHRDSZ (offsetof(AffixNode, data)) typedef struct { char *affix; int len; bool issuffix; } CMPDAffix; /* * Type of encoding affix flags in Hunspell dictionaries */ typedef enum { FM_CHAR, /* one character (like ispell) */ FM_LONG, /* two characters */ FM_NUM /* number, >= 0 and < 65536 */ } FlagMode; /* * Structure to store Hunspell options. Flag representation depends on flag * type. These flags are about support of compound words. */ typedef struct CompoundAffixFlag { union { /* Flag name if flagMode is FM_CHAR or FM_LONG */ char *s; /* Flag name if flagMode is FM_NUM */ uint32 i; } flag; /* we don't have a bsearch_arg version, so, copy FlagMode */ FlagMode flagMode; uint32 value; } CompoundAffixFlag; #define FLAGNUM_MAXSIZE (1 << 16) typedef struct { int maffixes; int naffixes; AFFIX *Affix; AffixNode *Suffix; AffixNode *Prefix; SPNode *Dictionary; /* Array of sets of affixes */ char **AffixData; int lenAffixData; int nAffixData; bool useFlagAliases; CMPDAffix *CompoundAffix; bool usecompound; FlagMode flagMode; /* * All follow fields are actually needed only for initialization */ /* Array of Hunspell options in affix file */ CompoundAffixFlag *CompoundAffixFlags; /* number of entries in CompoundAffixFlags array */ int nCompoundAffixFlag; /* allocated length of CompoundAffixFlags array */ int mCompoundAffixFlag; /* * Remaining fields are only used during dictionary construction; they are * set up by NIStartBuild and cleared by NIFinishBuild. */ MemoryContext buildCxt; /* temp context for construction */ /* Temporary array of all words in the dict file */ SPELL **Spell; int nspell; /* number of valid entries in Spell array */ int mspell; /* allocated length of Spell array */ /* These are used to allocate "compact" data without palloc overhead */ char *firstfree; /* first free address (always maxaligned) */ size_t avail; /* free space remaining at firstfree */ } IspellDict; extern TSLexeme *NINormalizeWord(IspellDict *Conf, char *word); extern void NIStartBuild(IspellDict *Conf); extern void NIImportAffixes(IspellDict *Conf, const char *filename); extern void NIImportDictionary(IspellDict *Conf, const char *filename); extern void NISortDictionary(IspellDict *Conf); extern void NISortAffixes(IspellDict *Conf); extern void NIFinishBuild(IspellDict *Conf); #endif