summaryrefslogtreecommitdiffstats
path: root/src/include/tsearch/dicts/spell.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/include/tsearch/dicts/spell.h')
-rw-r--r--src/include/tsearch/dicts/spell.h247
1 files changed, 247 insertions, 0 deletions
diff --git a/src/include/tsearch/dicts/spell.h b/src/include/tsearch/dicts/spell.h
new file mode 100644
index 0000000..978f43a
--- /dev/null
+++ b/src/include/tsearch/dicts/spell.h
@@ -0,0 +1,247 @@
+/*-------------------------------------------------------------------------
+ *
+ * spell.h
+ *
+ * Declarations for ISpell dictionary
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ *
+ * src/include/tsearch/dicts/spell.h
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#ifndef __SPELL_H__
+#define __SPELL_H__
+
+#include "regex/regex.h"
+#include "tsearch/dicts/regis.h"
+#include "tsearch/ts_public.h"
+
+/*
+ * SPNode and SPNodeData are used to represent prefix tree (Trie) to store
+ * a words list.
+ */
+struct SPNode;
+
+typedef struct
+{
+ uint32 val:8,
+ isword:1,
+ /* Stores compound flags listed below */
+ compoundflag:4,
+ /* Reference to an entry of the AffixData field */
+ affix:19;
+ struct SPNode *node;
+} SPNodeData;
+
+/*
+ * Names of FF_ are correlated with Hunspell options in affix file
+ * http://hunspell.sourceforge.net/
+ */
+#define FF_COMPOUNDONLY 0x01
+#define FF_COMPOUNDBEGIN 0x02
+#define FF_COMPOUNDMIDDLE 0x04
+#define FF_COMPOUNDLAST 0x08
+#define FF_COMPOUNDFLAG ( FF_COMPOUNDBEGIN | FF_COMPOUNDMIDDLE | \
+ FF_COMPOUNDLAST )
+#define FF_COMPOUNDFLAGMASK 0x0f
+
+typedef struct SPNode
+{
+ uint32 length;
+ SPNodeData data[FLEXIBLE_ARRAY_MEMBER];
+} SPNode;
+
+#define SPNHDRSZ (offsetof(SPNode,data))
+
+/*
+ * Represents an entry in a words list.
+ */
+typedef struct spell_struct
+{
+ union
+ {
+ /*
+ * flag is filled in by NIImportDictionary(). After
+ * NISortDictionary(), d is used instead of flag.
+ */
+ char *flag;
+ /* d is used in mkSPNode() */
+ struct
+ {
+ /* Reference to an entry of the AffixData field */
+ int affix;
+ /* Length of the word */
+ int len;
+ } d;
+ } p;
+ char word[FLEXIBLE_ARRAY_MEMBER];
+} SPELL;
+
+#define SPELLHDRSZ (offsetof(SPELL, word))
+
+/*
+ * If an affix uses a regex, we have to store that separately in a struct
+ * that won't move around when arrays of affixes are enlarged or sorted.
+ * This is so that it can be found to be cleaned up at context destruction.
+ */
+typedef struct aff_regex_struct
+{
+ regex_t regex;
+ MemoryContextCallback mcallback;
+} aff_regex_struct;
+
+/*
+ * Represents an entry in an affix list.
+ */
+typedef struct aff_struct
+{
+ char *flag;
+ /* FF_SUFFIX or FF_PREFIX */
+ uint32 type:1,
+ flagflags:7,
+ issimple:1,
+ isregis:1,
+ replen:14;
+ char *find;
+ char *repl;
+ union
+ {
+ aff_regex_struct *pregex;
+ Regis regis;
+ } reg;
+} AFFIX;
+
+/*
+ * affixes use dictionary flags too
+ */
+#define FF_COMPOUNDPERMITFLAG 0x10
+#define FF_COMPOUNDFORBIDFLAG 0x20
+#define FF_CROSSPRODUCT 0x40
+
+/*
+ * Don't change the order of these. Initialization sorts by these,
+ * and expects prefixes to come first after sorting.
+ */
+#define FF_SUFFIX 1
+#define FF_PREFIX 0
+
+/*
+ * AffixNode and AffixNodeData are used to represent prefix tree (Trie) to store
+ * an affix list.
+ */
+struct AffixNode;
+
+typedef struct
+{
+ uint32 val:8,
+ naff:24;
+ AFFIX **aff;
+ struct AffixNode *node;
+} AffixNodeData;
+
+typedef struct AffixNode
+{
+ uint32 isvoid:1,
+ length:31;
+ AffixNodeData data[FLEXIBLE_ARRAY_MEMBER];
+} AffixNode;
+
+#define ANHRDSZ (offsetof(AffixNode, data))
+
+typedef struct
+{
+ char *affix;
+ int len;
+ bool issuffix;
+} CMPDAffix;
+
+/*
+ * Type of encoding affix flags in Hunspell dictionaries
+ */
+typedef enum
+{
+ FM_CHAR, /* one character (like ispell) */
+ FM_LONG, /* two characters */
+ FM_NUM /* number, >= 0 and < 65536 */
+} FlagMode;
+
+/*
+ * Structure to store Hunspell options. Flag representation depends on flag
+ * type. These flags are about support of compound words.
+ */
+typedef struct CompoundAffixFlag
+{
+ union
+ {
+ /* Flag name if flagMode is FM_CHAR or FM_LONG */
+ char *s;
+ /* Flag name if flagMode is FM_NUM */
+ uint32 i;
+ } flag;
+ /* we don't have a bsearch_arg version, so, copy FlagMode */
+ FlagMode flagMode;
+ uint32 value;
+} CompoundAffixFlag;
+
+#define FLAGNUM_MAXSIZE (1 << 16)
+
+typedef struct
+{
+ int maffixes;
+ int naffixes;
+ AFFIX *Affix;
+
+ AffixNode *Suffix;
+ AffixNode *Prefix;
+
+ SPNode *Dictionary;
+ /* Array of sets of affixes */
+ char **AffixData;
+ int lenAffixData;
+ int nAffixData;
+ bool useFlagAliases;
+
+ CMPDAffix *CompoundAffix;
+
+ bool usecompound;
+ FlagMode flagMode;
+
+ /*
+ * All follow fields are actually needed only for initialization
+ */
+
+ /* Array of Hunspell options in affix file */
+ CompoundAffixFlag *CompoundAffixFlags;
+ /* number of entries in CompoundAffixFlags array */
+ int nCompoundAffixFlag;
+ /* allocated length of CompoundAffixFlags array */
+ int mCompoundAffixFlag;
+
+ /*
+ * Remaining fields are only used during dictionary construction; they are
+ * set up by NIStartBuild and cleared by NIFinishBuild.
+ */
+ MemoryContext buildCxt; /* temp context for construction */
+
+ /* Temporary array of all words in the dict file */
+ SPELL **Spell;
+ int nspell; /* number of valid entries in Spell array */
+ int mspell; /* allocated length of Spell array */
+
+ /* These are used to allocate "compact" data without palloc overhead */
+ char *firstfree; /* first free address (always maxaligned) */
+ size_t avail; /* free space remaining at firstfree */
+} IspellDict;
+
+extern TSLexeme *NINormalizeWord(IspellDict *Conf, char *word);
+
+extern void NIStartBuild(IspellDict *Conf);
+extern void NIImportAffixes(IspellDict *Conf, const char *filename);
+extern void NIImportDictionary(IspellDict *Conf, const char *filename);
+extern void NISortDictionary(IspellDict *Conf);
+extern void NISortAffixes(IspellDict *Conf);
+extern void NIFinishBuild(IspellDict *Conf);
+
+#endif