Adding upstream version 15.5.upstream/15.5

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-05-04 12:17:33 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-05-04 12:17:33 +0000
commit: 5e45211a64149b3c659b90ff2de6fa982a5a93ed (patch)
tree: 739caf8c461053357daa9f162bef34516c7bf452 /src/backend/tsearch
parent: Initial commit. (diff)
download: postgresql-15-5e45211a64149b3c659b90ff2de6fa982a5a93ed.tar.xz
postgresql-15-5e45211a64149b3c659b90ff2de6fa982a5a93ed.zip
25 files changed, 10632 insertions, 0 deletions
diff --git a/src/backend/tsearch/Makefile b/src/backend/tsearch/Makefile
new file mode 100644
index 0000000..c40acc5
--- /dev/null
+++ b/src/backend/tsearch/Makefile
@@ -0,0 +1,54 @@
+#-------------------------------------------------------------------------
+#
+# Makefile for backend/tsearch
+#
+# Copyright (c) 2006-2022, PostgreSQL Global Development Group
+#
+# src/backend/tsearch/Makefile
+#
+#-------------------------------------------------------------------------
+subdir = src/backend/tsearch
+top_builddir = ../../..
+include $(top_builddir)/src/Makefile.global
+
+DICTDIR=tsearch_data
+
+# List of dictionaries files
+DICTFILES=synonym_sample.syn thesaurus_sample.ths \
+	hunspell_sample.affix \
+	ispell_sample.affix ispell_sample.dict \
+	hunspell_sample_long.affix hunspell_sample_long.dict \
+	hunspell_sample_num.affix hunspell_sample_num.dict
+
+# Local paths to dictionaries files
+DICTFILES_PATH=$(addprefix dicts/,$(DICTFILES))
+
+OBJS = \
+	dict.o \
+	dict_ispell.o \
+	dict_simple.o \
+	dict_synonym.o \
+	dict_thesaurus.o \
+	regis.o \
+	spell.o \
+	to_tsany.o \
+	ts_locale.o \
+	ts_parse.o \
+	ts_selfuncs.o \
+	ts_typanalyze.o \
+	ts_utils.o \
+	wparser.o \
+	wparser_def.o
+
+include $(top_srcdir)/src/backend/common.mk
+
+.PHONY: install-data
+install-data: $(DICTFILES_PATH) installdirs
+	$(INSTALL_DATA) $(addprefix $(srcdir)/,$(DICTFILES_PATH)) '$(DESTDIR)$(datadir)/$(DICTDIR)/'
+
+installdirs:
+	$(MKDIR_P) '$(DESTDIR)$(datadir)' '$(DESTDIR)$(datadir)/$(DICTDIR)'
+
+.PHONY: uninstall-data
+uninstall-data:
+	rm -rf $(addprefix '$(DESTDIR)$(datadir)/$(DICTDIR)/',$(DICTFILES))
diff --git a/src/backend/tsearch/dict.c b/src/backend/tsearch/dict.c
new file mode 100644
index 0000000..8dae2b8
--- /dev/null
+++ b/src/backend/tsearch/dict.c
@@ -0,0 +1,89 @@
+/*-------------------------------------------------------------------------
+ *
+ * dict.c
+ *		Standard interface to dictionary
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ *
+ *
+ * IDENTIFICATION
+ *	  src/backend/tsearch/dict.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "catalog/pg_type.h"
+#include "tsearch/ts_cache.h"
+#include "tsearch/ts_utils.h"
+#include "utils/builtins.h"
+
+
+/*
+ * Lexize one word by dictionary, mostly debug function
+ */
+Datum
+ts_lexize(PG_FUNCTION_ARGS)
+{
+	Oid			dictId = PG_GETARG_OID(0);
+	text	   *in = PG_GETARG_TEXT_PP(1);
+	ArrayType  *a;
+	TSDictionaryCacheEntry *dict;
+	TSLexeme   *res,
+			   *ptr;
+	Datum	   *da;
+	DictSubState dstate = {false, false, NULL};
+
+	dict = lookup_ts_dictionary_cache(dictId);
+
+	res = (TSLexeme *) DatumGetPointer(FunctionCall4(&dict->lexize,
+													 PointerGetDatum(dict->dictData),
+													 PointerGetDatum(VARDATA_ANY(in)),
+													 Int32GetDatum(VARSIZE_ANY_EXHDR(in)),
+													 PointerGetDatum(&dstate)));
+
+	if (dstate.getnext)
+	{
+		dstate.isend = true;
+		ptr = (TSLexeme *) DatumGetPointer(FunctionCall4(&dict->lexize,
+														 PointerGetDatum(dict->dictData),
+														 PointerGetDatum(VARDATA_ANY(in)),
+														 Int32GetDatum(VARSIZE_ANY_EXHDR(in)),
+														 PointerGetDatum(&dstate)));
+		if (ptr != NULL)
+			res = ptr;
+	}
+
+	if (!res)
+		PG_RETURN_NULL();
+
+	ptr = res;
+	while (ptr->lexeme)
+		ptr++;
+	da = (Datum *) palloc(sizeof(Datum) * (ptr - res));
+	ptr = res;
+	while (ptr->lexeme)
+	{
+		da[ptr - res] = CStringGetTextDatum(ptr->lexeme);
+		ptr++;
+	}
+
+	a = construct_array(da,
+						ptr - res,
+						TEXTOID,
+						-1,
+						false,
+						TYPALIGN_INT);
+
+	ptr = res;
+	while (ptr->lexeme)
+	{
+		pfree(DatumGetPointer(da[ptr - res]));
+		pfree(ptr->lexeme);
+		ptr++;
+	}
+	pfree(res);
+	pfree(da);
+
+	PG_RETURN_POINTER(a);
+}
diff --git a/src/backend/tsearch/dict_ispell.c b/src/backend/tsearch/dict_ispell.c
new file mode 100644
index 0000000..3dc7b22
--- /dev/null
+++ b/src/backend/tsearch/dict_ispell.c
@@ -0,0 +1,148 @@
+/*-------------------------------------------------------------------------
+ *
+ * dict_ispell.c
+ *		Ispell dictionary interface
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ *
+ *
+ * IDENTIFICATION
+ *	  src/backend/tsearch/dict_ispell.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "commands/defrem.h"
+#include "tsearch/dicts/spell.h"
+#include "tsearch/ts_locale.h"
+#include "tsearch/ts_utils.h"
+#include "utils/builtins.h"
+
+
+typedef struct
+{
+	StopList	stoplist;
+	IspellDict	obj;
+} DictISpell;
+
+Datum
+dispell_init(PG_FUNCTION_ARGS)
+{
+	List	   *dictoptions = (List *) PG_GETARG_POINTER(0);
+	DictISpell *d;
+	bool		affloaded = false,
+				dictloaded = false,
+				stoploaded = false;
+	ListCell   *l;
+
+	d = (DictISpell *) palloc0(sizeof(DictISpell));
+
+	NIStartBuild(&(d->obj));
+
+	foreach(l, dictoptions)
+	{
+		DefElem    *defel = (DefElem *) lfirst(l);
+
+		if (strcmp(defel->defname, "dictfile") == 0)
+		{
+			if (dictloaded)
+				ereport(ERROR,
+						(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+						 errmsg("multiple DictFile parameters")));
+			NIImportDictionary(&(d->obj),
+							   get_tsearch_config_filename(defGetString(defel),
+														   "dict"));
+			dictloaded = true;
+		}
+		else if (strcmp(defel->defname, "afffile") == 0)
+		{
+			if (affloaded)
+				ereport(ERROR,
+						(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+						 errmsg("multiple AffFile parameters")));
+			NIImportAffixes(&(d->obj),
+							get_tsearch_config_filename(defGetString(defel),
+														"affix"));
+			affloaded = true;
+		}
+		else if (strcmp(defel->defname, "stopwords") == 0)
+		{
+			if (stoploaded)
+				ereport(ERROR,
+						(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+						 errmsg("multiple StopWords parameters")));
+			readstoplist(defGetString(defel), &(d->stoplist), lowerstr);
+			stoploaded = true;
+		}
+		else
+		{
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+					 errmsg("unrecognized Ispell parameter: \"%s\"",
+							defel->defname)));
+		}
+	}
+
+	if (affloaded && dictloaded)
+	{
+		NISortDictionary(&(d->obj));
+		NISortAffixes(&(d->obj));
+	}
+	else if (!affloaded)
+	{
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("missing AffFile parameter")));
+	}
+	else
+	{
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("missing DictFile parameter")));
+	}
+
+	NIFinishBuild(&(d->obj));
+
+	PG_RETURN_POINTER(d);
+}
+
+Datum
+dispell_lexize(PG_FUNCTION_ARGS)
+{
+	DictISpell *d = (DictISpell *) PG_GETARG_POINTER(0);
+	char	   *in = (char *) PG_GETARG_POINTER(1);
+	int32		len = PG_GETARG_INT32(2);
+	char	   *txt;
+	TSLexeme   *res;
+	TSLexeme   *ptr,
+			   *cptr;
+
+	if (len <= 0)
+		PG_RETURN_POINTER(NULL);
+
+	txt = lowerstr_with_len(in, len);
+	res = NINormalizeWord(&(d->obj), txt);
+
+	if (res == NULL)
+		PG_RETURN_POINTER(NULL);
+
+	cptr = res;
+	for (ptr = cptr; ptr->lexeme; ptr++)
+	{
+		if (searchstoplist(&(d->stoplist), ptr->lexeme))
+		{
+			pfree(ptr->lexeme);
+			ptr->lexeme = NULL;
+		}
+		else
+		{
+			if (cptr != ptr)
+				memcpy(cptr, ptr, sizeof(TSLexeme));
+			cptr++;
+		}
+	}
+	cptr->lexeme = NULL;
+
+	PG_RETURN_POINTER(res);
+}
diff --git a/src/backend/tsearch/dict_simple.c b/src/backend/tsearch/dict_simple.c
new file mode 100644
index 0000000..214d44e
--- /dev/null
+++ b/src/backend/tsearch/dict_simple.c
@@ -0,0 +1,105 @@
+/*-------------------------------------------------------------------------
+ *
+ * dict_simple.c
+ *		Simple dictionary: just lowercase and check for stopword
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ *
+ *
+ * IDENTIFICATION
+ *	  src/backend/tsearch/dict_simple.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "commands/defrem.h"
+#include "tsearch/ts_locale.h"
+#include "tsearch/ts_utils.h"
+#include "utils/builtins.h"
+
+
+typedef struct
+{
+	StopList	stoplist;
+	bool		accept;
+} DictSimple;
+
+
+Datum
+dsimple_init(PG_FUNCTION_ARGS)
+{
+	List	   *dictoptions = (List *) PG_GETARG_POINTER(0);
+	DictSimple *d = (DictSimple *) palloc0(sizeof(DictSimple));
+	bool		stoploaded = false,
+				acceptloaded = false;
+	ListCell   *l;
+
+	d->accept = true;			/* default */
+
+	foreach(l, dictoptions)
+	{
+		DefElem    *defel = (DefElem *) lfirst(l);
+
+		if (strcmp(defel->defname, "stopwords") == 0)
+		{
+			if (stoploaded)
+				ereport(ERROR,
+						(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+						 errmsg("multiple StopWords parameters")));
+			readstoplist(defGetString(defel), &d->stoplist, lowerstr);
+			stoploaded = true;
+		}
+		else if (strcmp(defel->defname, "accept") == 0)
+		{
+			if (acceptloaded)
+				ereport(ERROR,
+						(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+						 errmsg("multiple Accept parameters")));
+			d->accept = defGetBoolean(defel);
+			acceptloaded = true;
+		}
+		else
+		{
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+					 errmsg("unrecognized simple dictionary parameter: \"%s\"",
+							defel->defname)));
+		}
+	}
+
+	PG_RETURN_POINTER(d);
+}
+
+Datum
+dsimple_lexize(PG_FUNCTION_ARGS)
+{
+	DictSimple *d = (DictSimple *) PG_GETARG_POINTER(0);
+	char	   *in = (char *) PG_GETARG_POINTER(1);
+	int32		len = PG_GETARG_INT32(2);
+	char	   *txt;
+	TSLexeme   *res;
+
+	txt = lowerstr_with_len(in, len);
+
+	if (*txt == '\0' || searchstoplist(&(d->stoplist), txt))
+	{
+		/* reject as stopword */
+		pfree(txt);
+		res = palloc0(sizeof(TSLexeme) * 2);
+		PG_RETURN_POINTER(res);
+	}
+	else if (d->accept)
+	{
+		/* accept */
+		res = palloc0(sizeof(TSLexeme) * 2);
+		res[0].lexeme = txt;
+		PG_RETURN_POINTER(res);
+	}
+	else
+	{
+		/* report as unrecognized */
+		pfree(txt);
+		PG_RETURN_POINTER(NULL);
+	}
+}
diff --git a/src/backend/tsearch/dict_synonym.c b/src/backend/tsearch/dict_synonym.c
new file mode 100644
index 0000000..65e34e9
--- /dev/null
+++ b/src/backend/tsearch/dict_synonym.c
@@ -0,0 +1,241 @@
+/*-------------------------------------------------------------------------
+ *
+ * dict_synonym.c
+ *		Synonym dictionary: replace word by its synonym
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ *
+ *
+ * IDENTIFICATION
+ *	  src/backend/tsearch/dict_synonym.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "commands/defrem.h"
+#include "tsearch/ts_locale.h"
+#include "tsearch/ts_utils.h"
+#include "utils/builtins.h"
+
+typedef struct
+{
+	char	   *in;
+	char	   *out;
+	int			outlen;
+	uint16		flags;
+} Syn;
+
+typedef struct
+{
+	int			len;			/* length of syn array */
+	Syn		   *syn;
+	bool		case_sensitive;
+} DictSyn;
+
+/*
+ * Finds the next whitespace-delimited word within the 'in' string.
+ * Returns a pointer to the first character of the word, and a pointer
+ * to the next byte after the last character in the word (in *end).
+ * Character '*' at the end of word will not be treated as word
+ * character if flags is not null.
+ */
+static char *
+findwrd(char *in, char **end, uint16 *flags)
+{
+	char	   *start;
+	char	   *lastchar;
+
+	/* Skip leading spaces */
+	while (*in && t_isspace(in))
+		in += pg_mblen(in);
+
+	/* Return NULL on empty lines */
+	if (*in == '\0')
+	{
+		*end = NULL;
+		return NULL;
+	}
+
+	lastchar = start = in;
+
+	/* Find end of word */
+	while (*in && !t_isspace(in))
+	{
+		lastchar = in;
+		in += pg_mblen(in);
+	}
+
+	if (in - lastchar == 1 && t_iseq(lastchar, '*') && flags)
+	{
+		*flags = TSL_PREFIX;
+		*end = lastchar;
+	}
+	else
+	{
+		if (flags)
+			*flags = 0;
+		*end = in;
+	}
+
+	return start;
+}
+
+static int
+compareSyn(const void *a, const void *b)
+{
+	return strcmp(((const Syn *) a)->in, ((const Syn *) b)->in);
+}
+
+
+Datum
+dsynonym_init(PG_FUNCTION_ARGS)
+{
+	List	   *dictoptions = (List *) PG_GETARG_POINTER(0);
+	DictSyn    *d;
+	ListCell   *l;
+	char	   *filename = NULL;
+	bool		case_sensitive = false;
+	tsearch_readline_state trst;
+	char	   *starti,
+			   *starto,
+			   *end = NULL;
+	int			cur = 0;
+	char	   *line = NULL;
+	uint16		flags = 0;
+
+	foreach(l, dictoptions)
+	{
+		DefElem    *defel = (DefElem *) lfirst(l);
+
+		if (strcmp(defel->defname, "synonyms") == 0)
+			filename = defGetString(defel);
+		else if (strcmp(defel->defname, "casesensitive") == 0)
+			case_sensitive = defGetBoolean(defel);
+		else
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+					 errmsg("unrecognized synonym parameter: \"%s\"",
+							defel->defname)));
+	}
+
+	if (!filename)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("missing Synonyms parameter")));
+
+	filename = get_tsearch_config_filename(filename, "syn");
+
+	if (!tsearch_readline_begin(&trst, filename))
+		ereport(ERROR,
+				(errcode(ERRCODE_CONFIG_FILE_ERROR),
+				 errmsg("could not open synonym file \"%s\": %m",
+						filename)));
+
+	d = (DictSyn *) palloc0(sizeof(DictSyn));
+
+	while ((line = tsearch_readline(&trst)) != NULL)
+	{
+		starti = findwrd(line, &end, NULL);
+		if (!starti)
+		{
+			/* Empty line */
+			goto skipline;
+		}
+		if (*end == '\0')
+		{
+			/* A line with only one word. Ignore silently. */
+			goto skipline;
+		}
+		*end = '\0';
+
+		starto = findwrd(end + 1, &end, &flags);
+		if (!starto)
+		{
+			/* A line with only one word (+whitespace). Ignore silently. */
+			goto skipline;
+		}
+		*end = '\0';
+
+		/*
+		 * starti now points to the first word, and starto to the second word
+		 * on the line, with a \0 terminator at the end of both words.
+		 */
+
+		if (cur >= d->len)
+		{
+			if (d->len == 0)
+			{
+				d->len = 64;
+				d->syn = (Syn *) palloc(sizeof(Syn) * d->len);
+			}
+			else
+			{
+				d->len *= 2;
+				d->syn = (Syn *) repalloc(d->syn, sizeof(Syn) * d->len);
+			}
+		}
+
+		if (case_sensitive)
+		{
+			d->syn[cur].in = pstrdup(starti);
+			d->syn[cur].out = pstrdup(starto);
+		}
+		else
+		{
+			d->syn[cur].in = lowerstr(starti);
+			d->syn[cur].out = lowerstr(starto);
+		}
+
+		d->syn[cur].outlen = strlen(starto);
+		d->syn[cur].flags = flags;
+
+		cur++;
+
+skipline:
+		pfree(line);
+	}
+
+	tsearch_readline_end(&trst);
+
+	d->len = cur;
+	qsort(d->syn, d->len, sizeof(Syn), compareSyn);
+
+	d->case_sensitive = case_sensitive;
+
+	PG_RETURN_POINTER(d);
+}
+
+Datum
+dsynonym_lexize(PG_FUNCTION_ARGS)
+{
+	DictSyn    *d = (DictSyn *) PG_GETARG_POINTER(0);
+	char	   *in = (char *) PG_GETARG_POINTER(1);
+	int32		len = PG_GETARG_INT32(2);
+	Syn			key,
+			   *found;
+	TSLexeme   *res;
+
+	/* note: d->len test protects against Solaris bsearch-of-no-items bug */
+	if (len <= 0 || d->len <= 0)
+		PG_RETURN_POINTER(NULL);
+
+	if (d->case_sensitive)
+		key.in = pnstrdup(in, len);
+	else
+		key.in = lowerstr_with_len(in, len);
+
+	key.out = NULL;
+
+	found = (Syn *) bsearch(&key, d->syn, d->len, sizeof(Syn), compareSyn);
+	pfree(key.in);
+
+	if (!found)
+		PG_RETURN_POINTER(NULL);
+
+	res = palloc0(sizeof(TSLexeme) * 2);
+	res[0].lexeme = pnstrdup(found->out, found->outlen);
+	res[0].flags = found->flags;
+
+	PG_RETURN_POINTER(res);
+}
diff --git a/src/backend/tsearch/dict_thesaurus.c b/src/backend/tsearch/dict_thesaurus.c
new file mode 100644
index 0000000..b8c08bc
--- /dev/null
+++ b/src/backend/tsearch/dict_thesaurus.c
@@ -0,0 +1,877 @@
+/*-------------------------------------------------------------------------
+ *
+ * dict_thesaurus.c
+ *		Thesaurus dictionary: phrase to phrase substitution
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ *
+ *
+ * IDENTIFICATION
+ *	  src/backend/tsearch/dict_thesaurus.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "catalog/namespace.h"
+#include "commands/defrem.h"
+#include "tsearch/ts_cache.h"
+#include "tsearch/ts_locale.h"
+#include "tsearch/ts_utils.h"
+#include "utils/builtins.h"
+#include "utils/regproc.h"
+
+
+/*
+ * Temporary we use TSLexeme.flags for inner use...
+ */
+#define DT_USEASIS		0x1000
+
+typedef struct LexemeInfo
+{
+	uint32		idsubst;		/* entry's number in DictThesaurus->subst */
+	uint16		posinsubst;		/* pos info in entry */
+	uint16		tnvariant;		/* total num lexemes in one variant */
+	struct LexemeInfo *nextentry;
+	struct LexemeInfo *nextvariant;
+} LexemeInfo;
+
+typedef struct
+{
+	char	   *lexeme;
+	LexemeInfo *entries;
+} TheLexeme;
+
+typedef struct
+{
+	uint16		lastlexeme;		/* number lexemes to substitute */
+	uint16		reslen;
+	TSLexeme   *res;			/* prepared substituted result */
+} TheSubstitute;
+
+typedef struct
+{
+	/* subdictionary to normalize lexemes */
+	Oid			subdictOid;
+	TSDictionaryCacheEntry *subdict;
+
+	/* Array to search lexeme by exact match */
+	TheLexeme  *wrds;
+	int			nwrds;			/* current number of words */
+	int			ntwrds;			/* allocated array length */
+
+	/*
+	 * Storage of substituted result, n-th element is for n-th expression
+	 */
+	TheSubstitute *subst;
+	int			nsubst;
+} DictThesaurus;
+
+
+static void
+newLexeme(DictThesaurus *d, char *b, char *e, uint32 idsubst, uint16 posinsubst)
+{
+	TheLexeme  *ptr;
+
+	if (d->nwrds >= d->ntwrds)
+	{
+		if (d->ntwrds == 0)
+		{
+			d->ntwrds = 16;
+			d->wrds = (TheLexeme *) palloc(sizeof(TheLexeme) * d->ntwrds);
+		}
+		else
+		{
+			d->ntwrds *= 2;
+			d->wrds = (TheLexeme *) repalloc(d->wrds, sizeof(TheLexeme) * d->ntwrds);
+		}
+	}
+
+	ptr = d->wrds + d->nwrds;
+	d->nwrds++;
+
+	ptr->lexeme = palloc(e - b + 1);
+
+	memcpy(ptr->lexeme, b, e - b);
+	ptr->lexeme[e - b] = '\0';
+
+	ptr->entries = (LexemeInfo *) palloc(sizeof(LexemeInfo));
+
+	ptr->entries->nextentry = NULL;
+	ptr->entries->idsubst = idsubst;
+	ptr->entries->posinsubst = posinsubst;
+}
+
+static void
+addWrd(DictThesaurus *d, char *b, char *e, uint32 idsubst, uint16 nwrd, uint16 posinsubst, bool useasis)
+{
+	static int	nres = 0;
+	static int	ntres = 0;
+	TheSubstitute *ptr;
+
+	if (nwrd == 0)
+	{
+		nres = ntres = 0;
+
+		if (idsubst >= d->nsubst)
+		{
+			if (d->nsubst == 0)
+			{
+				d->nsubst = 16;
+				d->subst = (TheSubstitute *) palloc(sizeof(TheSubstitute) * d->nsubst);
+			}
+			else
+			{
+				d->nsubst *= 2;
+				d->subst = (TheSubstitute *) repalloc(d->subst, sizeof(TheSubstitute) * d->nsubst);
+			}
+		}
+	}
+
+	ptr = d->subst + idsubst;
+
+	ptr->lastlexeme = posinsubst - 1;
+
+	if (nres + 1 >= ntres)
+	{
+		if (ntres == 0)
+		{
+			ntres = 2;
+			ptr->res = (TSLexeme *) palloc(sizeof(TSLexeme) * ntres);
+		}
+		else
+		{
+			ntres *= 2;
+			ptr->res = (TSLexeme *) repalloc(ptr->res, sizeof(TSLexeme) * ntres);
+		}
+	}
+
+	ptr->res[nres].lexeme = palloc(e - b + 1);
+	memcpy(ptr->res[nres].lexeme, b, e - b);
+	ptr->res[nres].lexeme[e - b] = '\0';
+
+	ptr->res[nres].nvariant = nwrd;
+	if (useasis)
+		ptr->res[nres].flags = DT_USEASIS;
+	else
+		ptr->res[nres].flags = 0;
+
+	ptr->res[++nres].lexeme = NULL;
+}
+
+#define TR_WAITLEX	1
+#define TR_INLEX	2
+#define TR_WAITSUBS 3
+#define TR_INSUBS	4
+
+static void
+thesaurusRead(const char *filename, DictThesaurus *d)
+{
+	tsearch_readline_state trst;
+	uint32		idsubst = 0;
+	bool		useasis = false;
+	char	   *line;
+
+	filename = get_tsearch_config_filename(filename, "ths");
+	if (!tsearch_readline_begin(&trst, filename))
+		ereport(ERROR,
+				(errcode(ERRCODE_CONFIG_FILE_ERROR),
+				 errmsg("could not open thesaurus file \"%s\": %m",
+						filename)));
+
+	while ((line = tsearch_readline(&trst)) != NULL)
+	{
+		char	   *ptr;
+		int			state = TR_WAITLEX;
+		char	   *beginwrd = NULL;
+		uint32		posinsubst = 0;
+		uint32		nwrd = 0;
+
+		ptr = line;
+
+		/* is it a comment? */
+		while (*ptr && t_isspace(ptr))
+			ptr += pg_mblen(ptr);
+
+		if (t_iseq(ptr, '#') || *ptr == '\0' ||
+			t_iseq(ptr, '\n') || t_iseq(ptr, '\r'))
+		{
+			pfree(line);
+			continue;
+		}
+
+		while (*ptr)
+		{
+			if (state == TR_WAITLEX)
+			{
+				if (t_iseq(ptr, ':'))
+				{
+					if (posinsubst == 0)
+						ereport(ERROR,
+								(errcode(ERRCODE_CONFIG_FILE_ERROR),
+								 errmsg("unexpected delimiter")));
+					state = TR_WAITSUBS;
+				}
+				else if (!t_isspace(ptr))
+				{
+					beginwrd = ptr;
+					state = TR_INLEX;
+				}
+			}
+			else if (state == TR_INLEX)
+			{
+				if (t_iseq(ptr, ':'))
+				{
+					newLexeme(d, beginwrd, ptr, idsubst, posinsubst++);
+					state = TR_WAITSUBS;
+				}
+				else if (t_isspace(ptr))
+				{
+					newLexeme(d, beginwrd, ptr, idsubst, posinsubst++);
+					state = TR_WAITLEX;
+				}
+			}
+			else if (state == TR_WAITSUBS)
+			{
+				if (t_iseq(ptr, '*'))
+				{
+					useasis = true;
+					state = TR_INSUBS;
+					beginwrd = ptr + pg_mblen(ptr);
+				}
+				else if (t_iseq(ptr, '\\'))
+				{
+					useasis = false;
+					state = TR_INSUBS;
+					beginwrd = ptr + pg_mblen(ptr);
+				}
+				else if (!t_isspace(ptr))
+				{
+					useasis = false;
+					beginwrd = ptr;
+					state = TR_INSUBS;
+				}
+			}
+			else if (state == TR_INSUBS)
+			{
+				if (t_isspace(ptr))
+				{
+					if (ptr == beginwrd)
+						ereport(ERROR,
+								(errcode(ERRCODE_CONFIG_FILE_ERROR),
+								 errmsg("unexpected end of line or lexeme")));
+					addWrd(d, beginwrd, ptr, idsubst, nwrd++, posinsubst, useasis);
+					state = TR_WAITSUBS;
+				}
+			}
+			else
+				elog(ERROR, "unrecognized thesaurus state: %d", state);
+
+			ptr += pg_mblen(ptr);
+		}
+
+		if (state == TR_INSUBS)
+		{
+			if (ptr == beginwrd)
+				ereport(ERROR,
+						(errcode(ERRCODE_CONFIG_FILE_ERROR),
+						 errmsg("unexpected end of line or lexeme")));
+			addWrd(d, beginwrd, ptr, idsubst, nwrd++, posinsubst, useasis);
+		}
+
+		idsubst++;
+
+		if (!(nwrd && posinsubst))
+			ereport(ERROR,
+					(errcode(ERRCODE_CONFIG_FILE_ERROR),
+					 errmsg("unexpected end of line")));
+
+		if (nwrd != (uint16) nwrd || posinsubst != (uint16) posinsubst)
+			ereport(ERROR,
+					(errcode(ERRCODE_CONFIG_FILE_ERROR),
+					 errmsg("too many lexemes in thesaurus entry")));
+
+		pfree(line);
+	}
+
+	d->nsubst = idsubst;
+
+	tsearch_readline_end(&trst);
+}
+
+static TheLexeme *
+addCompiledLexeme(TheLexeme *newwrds, int *nnw, int *tnm, TSLexeme *lexeme, LexemeInfo *src, uint16 tnvariant)
+{
+	if (*nnw >= *tnm)
+	{
+		*tnm *= 2;
+		newwrds = (TheLexeme *) repalloc(newwrds, sizeof(TheLexeme) * *tnm);
+	}
+
+	newwrds[*nnw].entries = (LexemeInfo *) palloc(sizeof(LexemeInfo));
+
+	if (lexeme && lexeme->lexeme)
+	{
+		newwrds[*nnw].lexeme = pstrdup(lexeme->lexeme);
+		newwrds[*nnw].entries->tnvariant = tnvariant;
+	}
+	else
+	{
+		newwrds[*nnw].lexeme = NULL;
+		newwrds[*nnw].entries->tnvariant = 1;
+	}
+
+	newwrds[*nnw].entries->idsubst = src->idsubst;
+	newwrds[*nnw].entries->posinsubst = src->posinsubst;
+
+	newwrds[*nnw].entries->nextentry = NULL;
+
+	(*nnw)++;
+	return newwrds;
+}
+
+static int
+cmpLexemeInfo(LexemeInfo *a, LexemeInfo *b)
+{
+	if (a == NULL || b == NULL)
+		return 0;
+
+	if (a->idsubst == b->idsubst)
+	{
+		if (a->posinsubst == b->posinsubst)
+		{
+			if (a->tnvariant == b->tnvariant)
+				return 0;
+
+			return (a->tnvariant > b->tnvariant) ? 1 : -1;
+		}
+
+		return (a->posinsubst > b->posinsubst) ? 1 : -1;
+	}
+
+	return (a->idsubst > b->idsubst) ? 1 : -1;
+}
+
+static int
+cmpLexeme(const TheLexeme *a, const TheLexeme *b)
+{
+	if (a->lexeme == NULL)
+	{
+		if (b->lexeme == NULL)
+			return 0;
+		else
+			return 1;
+	}
+	else if (b->lexeme == NULL)
+		return -1;
+
+	return strcmp(a->lexeme, b->lexeme);
+}
+
+static int
+cmpLexemeQ(const void *a, const void *b)
+{
+	return cmpLexeme((const TheLexeme *) a, (const TheLexeme *) b);
+}
+
+static int
+cmpTheLexeme(const void *a, const void *b)
+{
+	const TheLexeme *la = (const TheLexeme *) a;
+	const TheLexeme *lb = (const TheLexeme *) b;
+	int			res;
+
+	if ((res = cmpLexeme(la, lb)) != 0)
+		return res;
+
+	return -cmpLexemeInfo(la->entries, lb->entries);
+}
+
+static void
+compileTheLexeme(DictThesaurus *d)
+{
+	int			i,
+				nnw = 0,
+				tnm = 16;
+	TheLexeme  *newwrds = (TheLexeme *) palloc(sizeof(TheLexeme) * tnm),
+			   *ptrwrds;
+
+	for (i = 0; i < d->nwrds; i++)
+	{
+		TSLexeme   *ptr;
+
+		if (strcmp(d->wrds[i].lexeme, "?") == 0)	/* Is stop word marker? */
+			newwrds = addCompiledLexeme(newwrds, &nnw, &tnm, NULL, d->wrds[i].entries, 0);
+		else
+		{
+			ptr = (TSLexeme *) DatumGetPointer(FunctionCall4(&(d->subdict->lexize),
+															 PointerGetDatum(d->subdict->dictData),
+															 PointerGetDatum(d->wrds[i].lexeme),
+															 Int32GetDatum(strlen(d->wrds[i].lexeme)),
+															 PointerGetDatum(NULL)));
+
+			if (!ptr)
+				ereport(ERROR,
+						(errcode(ERRCODE_CONFIG_FILE_ERROR),
+						 errmsg("thesaurus sample word \"%s\" isn't recognized by subdictionary (rule %d)",
+								d->wrds[i].lexeme,
+								d->wrds[i].entries->idsubst + 1)));
+			else if (!(ptr->lexeme))
+				ereport(ERROR,
+						(errcode(ERRCODE_CONFIG_FILE_ERROR),
+						 errmsg("thesaurus sample word \"%s\" is a stop word (rule %d)",
+								d->wrds[i].lexeme,
+								d->wrds[i].entries->idsubst + 1),
+						 errhint("Use \"?\" to represent a stop word within a sample phrase.")));
+			else
+			{
+				while (ptr->lexeme)
+				{
+					TSLexeme   *remptr = ptr + 1;
+					int			tnvar = 1;
+					int			curvar = ptr->nvariant;
+
+					/* compute n words in one variant */
+					while (remptr->lexeme)
+					{
+						if (remptr->nvariant != (remptr - 1)->nvariant)
+							break;
+						tnvar++;
+						remptr++;
+					}
+
+					remptr = ptr;
+					while (remptr->lexeme && remptr->nvariant == curvar)
+					{
+						newwrds = addCompiledLexeme(newwrds, &nnw, &tnm, remptr, d->wrds[i].entries, tnvar);
+						remptr++;
+					}
+
+					ptr = remptr;
+				}
+			}
+		}
+
+		pfree(d->wrds[i].lexeme);
+		pfree(d->wrds[i].entries);
+	}
+
+	if (d->wrds)
+		pfree(d->wrds);
+	d->wrds = newwrds;
+	d->nwrds = nnw;
+	d->ntwrds = tnm;
+
+	if (d->nwrds > 1)
+	{
+		qsort(d->wrds, d->nwrds, sizeof(TheLexeme), cmpTheLexeme);
+
+		/* uniq */
+		newwrds = d->wrds;
+		ptrwrds = d->wrds + 1;
+		while (ptrwrds - d->wrds < d->nwrds)
+		{
+			if (cmpLexeme(ptrwrds, newwrds) == 0)
+			{
+				if (cmpLexemeInfo(ptrwrds->entries, newwrds->entries))
+				{
+					ptrwrds->entries->nextentry = newwrds->entries;
+					newwrds->entries = ptrwrds->entries;
+				}
+				else
+					pfree(ptrwrds->entries);
+
+				if (ptrwrds->lexeme)
+					pfree(ptrwrds->lexeme);
+			}
+			else
+			{
+				newwrds++;
+				*newwrds = *ptrwrds;
+			}
+
+			ptrwrds++;
+		}
+
+		d->nwrds = newwrds - d->wrds + 1;
+		d->wrds = (TheLexeme *) repalloc(d->wrds, sizeof(TheLexeme) * d->nwrds);
+	}
+}
+
+static void
+compileTheSubstitute(DictThesaurus *d)
+{
+	int			i;
+
+	for (i = 0; i < d->nsubst; i++)
+	{
+		TSLexeme   *rem = d->subst[i].res,
+				   *outptr,
+				   *inptr;
+		int			n = 2;
+
+		outptr = d->subst[i].res = (TSLexeme *) palloc(sizeof(TSLexeme) * n);
+		outptr->lexeme = NULL;
+		inptr = rem;
+
+		while (inptr && inptr->lexeme)
+		{
+			TSLexeme   *lexized,
+						tmplex[2];
+
+			if (inptr->flags & DT_USEASIS)
+			{					/* do not lexize */
+				tmplex[0] = *inptr;
+				tmplex[0].flags = 0;
+				tmplex[1].lexeme = NULL;
+				lexized = tmplex;
+			}
+			else
+			{
+				lexized = (TSLexeme *) DatumGetPointer(FunctionCall4(&(d->subdict->lexize),
+																	 PointerGetDatum(d->subdict->dictData),
+																	 PointerGetDatum(inptr->lexeme),
+																	 Int32GetDatum(strlen(inptr->lexeme)),
+																	 PointerGetDatum(NULL)));
+			}
+
+			if (lexized && lexized->lexeme)
+			{
+				int			toset = (lexized->lexeme && outptr != d->subst[i].res) ? (outptr - d->subst[i].res) : -1;
+
+				while (lexized->lexeme)
+				{
+					if (outptr - d->subst[i].res + 1 >= n)
+					{
+						int			diff = outptr - d->subst[i].res;
+
+						n *= 2;
+						d->subst[i].res = (TSLexeme *) repalloc(d->subst[i].res, sizeof(TSLexeme) * n);
+						outptr = d->subst[i].res + diff;
+					}
+
+					*outptr = *lexized;
+					outptr->lexeme = pstrdup(lexized->lexeme);
+
+					outptr++;
+					lexized++;
+				}
+
+				if (toset > 0)
+					d->subst[i].res[toset].flags |= TSL_ADDPOS;
+			}
+			else if (lexized)
+			{
+				ereport(ERROR,
+						(errcode(ERRCODE_CONFIG_FILE_ERROR),
+						 errmsg("thesaurus substitute word \"%s\" is a stop word (rule %d)",
+								inptr->lexeme, i + 1)));
+			}
+			else
+			{
+				ereport(ERROR,
+						(errcode(ERRCODE_CONFIG_FILE_ERROR),
+						 errmsg("thesaurus substitute word \"%s\" isn't recognized by subdictionary (rule %d)",
+								inptr->lexeme, i + 1)));
+			}
+
+			if (inptr->lexeme)
+				pfree(inptr->lexeme);
+			inptr++;
+		}
+
+		if (outptr == d->subst[i].res)
+			ereport(ERROR,
+					(errcode(ERRCODE_CONFIG_FILE_ERROR),
+					 errmsg("thesaurus substitute phrase is empty (rule %d)",
+							i + 1)));
+
+		d->subst[i].reslen = outptr - d->subst[i].res;
+
+		pfree(rem);
+	}
+}
+
+Datum
+thesaurus_init(PG_FUNCTION_ARGS)
+{
+	List	   *dictoptions = (List *) PG_GETARG_POINTER(0);
+	DictThesaurus *d;
+	char	   *subdictname = NULL;
+	bool		fileloaded = false;
+	ListCell   *l;
+
+	d = (DictThesaurus *) palloc0(sizeof(DictThesaurus));
+
+	foreach(l, dictoptions)
+	{
+		DefElem    *defel = (DefElem *) lfirst(l);
+
+		if (strcmp(defel->defname, "dictfile") == 0)
+		{
+			if (fileloaded)
+				ereport(ERROR,
+						(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+						 errmsg("multiple DictFile parameters")));
+			thesaurusRead(defGetString(defel), d);
+			fileloaded = true;
+		}
+		else if (strcmp(defel->defname, "dictionary") == 0)
+		{
+			if (subdictname)
+				ereport(ERROR,
+						(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+						 errmsg("multiple Dictionary parameters")));
+			subdictname = pstrdup(defGetString(defel));
+		}
+		else
+		{
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+					 errmsg("unrecognized Thesaurus parameter: \"%s\"",
+							defel->defname)));
+		}
+	}
+
+	if (!fileloaded)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("missing DictFile parameter")));
+	if (!subdictname)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("missing Dictionary parameter")));
+
+	d->subdictOid = get_ts_dict_oid(stringToQualifiedNameList(subdictname), false);
+	d->subdict = lookup_ts_dictionary_cache(d->subdictOid);
+
+	compileTheLexeme(d);
+	compileTheSubstitute(d);
+
+	PG_RETURN_POINTER(d);
+}
+
+static LexemeInfo *
+findTheLexeme(DictThesaurus *d, char *lexeme)
+{
+	TheLexeme	key,
+			   *res;
+
+	if (d->nwrds == 0)
+		return NULL;
+
+	key.lexeme = lexeme;
+	key.entries = NULL;
+
+	res = bsearch(&key, d->wrds, d->nwrds, sizeof(TheLexeme), cmpLexemeQ);
+
+	if (res == NULL)
+		return NULL;
+	return res->entries;
+}
+
+static bool
+matchIdSubst(LexemeInfo *stored, uint32 idsubst)
+{
+	bool		res = true;
+
+	if (stored)
+	{
+		res = false;
+
+		for (; stored; stored = stored->nextvariant)
+			if (stored->idsubst == idsubst)
+			{
+				res = true;
+				break;
+			}
+	}
+
+	return res;
+}
+
+static LexemeInfo *
+findVariant(LexemeInfo *in, LexemeInfo *stored, uint16 curpos, LexemeInfo **newin, int newn)
+{
+	for (;;)
+	{
+		int			i;
+		LexemeInfo *ptr = newin[0];
+
+		for (i = 0; i < newn; i++)
+		{
+			while (newin[i] && newin[i]->idsubst < ptr->idsubst)
+				newin[i] = newin[i]->nextentry;
+
+			if (newin[i] == NULL)
+				return in;
+
+			if (newin[i]->idsubst > ptr->idsubst)
+			{
+				ptr = newin[i];
+				i = -1;
+				continue;
+			}
+
+			while (newin[i]->idsubst == ptr->idsubst)
+			{
+				if (newin[i]->posinsubst == curpos && newin[i]->tnvariant == newn)
+				{
+					ptr = newin[i];
+					break;
+				}
+
+				newin[i] = newin[i]->nextentry;
+				if (newin[i] == NULL)
+					return in;
+			}
+
+			if (newin[i]->idsubst != ptr->idsubst)
+			{
+				ptr = newin[i];
+				i = -1;
+				continue;
+			}
+		}
+
+		if (i == newn && matchIdSubst(stored, ptr->idsubst) && (in == NULL || !matchIdSubst(in, ptr->idsubst)))
+		{						/* found */
+
+			ptr->nextvariant = in;
+			in = ptr;
+		}
+
+		/* step forward */
+		for (i = 0; i < newn; i++)
+			newin[i] = newin[i]->nextentry;
+	}
+}
+
+static TSLexeme *
+copyTSLexeme(TheSubstitute *ts)
+{
+	TSLexeme   *res;
+	uint16		i;
+
+	res = (TSLexeme *) palloc(sizeof(TSLexeme) * (ts->reslen + 1));
+	for (i = 0; i < ts->reslen; i++)
+	{
+		res[i] = ts->res[i];
+		res[i].lexeme = pstrdup(ts->res[i].lexeme);
+	}
+
+	res[ts->reslen].lexeme = NULL;
+
+	return res;
+}
+
+static TSLexeme *
+checkMatch(DictThesaurus *d, LexemeInfo *info, uint16 curpos, bool *moreres)
+{
+	*moreres = false;
+	while (info)
+	{
+		Assert(info->idsubst < d->nsubst);
+		if (info->nextvariant)
+			*moreres = true;
+		if (d->subst[info->idsubst].lastlexeme == curpos)
+			return copyTSLexeme(d->subst + info->idsubst);
+		info = info->nextvariant;
+	}
+
+	return NULL;
+}
+
+Datum
+thesaurus_lexize(PG_FUNCTION_ARGS)
+{
+	DictThesaurus *d = (DictThesaurus *) PG_GETARG_POINTER(0);
+	DictSubState *dstate = (DictSubState *) PG_GETARG_POINTER(3);
+	TSLexeme   *res = NULL;
+	LexemeInfo *stored,
+			   *info = NULL;
+	uint16		curpos = 0;
+	bool		moreres = false;
+
+	if (PG_NARGS() != 4 || dstate == NULL)
+		elog(ERROR, "forbidden call of thesaurus or nested call");
+
+	if (dstate->isend)
+		PG_RETURN_POINTER(NULL);
+	stored = (LexemeInfo *) dstate->private_state;
+
+	if (stored)
+		curpos = stored->posinsubst + 1;
+
+	if (!d->subdict->isvalid)
+		d->subdict = lookup_ts_dictionary_cache(d->subdictOid);
+
+	res = (TSLexeme *) DatumGetPointer(FunctionCall4(&(d->subdict->lexize),
+													 PointerGetDatum(d->subdict->dictData),
+													 PG_GETARG_DATUM(1),
+													 PG_GETARG_DATUM(2),
+													 PointerGetDatum(NULL)));
+
+	if (res && res->lexeme)
+	{
+		TSLexeme   *ptr = res,
+				   *basevar;
+
+		while (ptr->lexeme)
+		{
+			uint16		nv = ptr->nvariant;
+			uint16		i,
+						nlex = 0;
+			LexemeInfo **infos;
+
+			basevar = ptr;
+			while (ptr->lexeme && nv == ptr->nvariant)
+			{
+				nlex++;
+				ptr++;
+			}
+
+			infos = (LexemeInfo **) palloc(sizeof(LexemeInfo *) * nlex);
+			for (i = 0; i < nlex; i++)
+				if ((infos[i] = findTheLexeme(d, basevar[i].lexeme)) == NULL)
+					break;
+
+			if (i < nlex)
+			{
+				/* no chance to find */
+				pfree(infos);
+				continue;
+			}
+
+			info = findVariant(info, stored, curpos, infos, nlex);
+		}
+	}
+	else if (res)
+	{							/* stop-word */
+		LexemeInfo *infos = findTheLexeme(d, NULL);
+
+		info = findVariant(NULL, stored, curpos, &infos, 1);
+	}
+	else
+	{
+		info = NULL;			/* word isn't recognized */
+	}
+
+	dstate->private_state = (void *) info;
+
+	if (!info)
+	{
+		dstate->getnext = false;
+		PG_RETURN_POINTER(NULL);
+	}
+
+	if ((res = checkMatch(d, info, curpos, &moreres)) != NULL)
+	{
+		dstate->getnext = moreres;
+		PG_RETURN_POINTER(res);
+	}
+
+	dstate->getnext = true;
+
+	PG_RETURN_POINTER(NULL);
+}
diff --git a/src/backend/tsearch/dicts/hunspell_sample.affix b/src/backend/tsearch/dicts/hunspell_sample.affix
new file mode 100644
index 0000000..9a64513
--- /dev/null
+++ b/src/backend/tsearch/dicts/hunspell_sample.affix
@@ -0,0 +1,23 @@
+COMPOUNDFLAG Z
+ONLYINCOMPOUND L
+
+PFX B Y 1
+PFX B   0	re	.
+
+PFX U N 1
+PFX U   0	un	.
+
+SFX J Y 1
+SFX J   0	INGS	[^E]
+
+SFX G Y 1
+SFX G   0	ING		[^E]
+
+SFX S Y 1
+SFX S   0	S	[^SXZHY]
+
+SFX A Y 1
+SFX A   Y	IES	[^AEIOU]Y
+
+SFX \ N 1
+SFX \   0	Y/L	[^Y]
diff --git a/src/backend/tsearch/dicts/hunspell_sample_long.affix b/src/backend/tsearch/dicts/hunspell_sample_long.affix
new file mode 100644
index 0000000..d5df7a3
--- /dev/null
+++ b/src/backend/tsearch/dicts/hunspell_sample_long.affix
@@ -0,0 +1,53 @@
+FLAG long
+
+AF 11
+AF cZ		#1
+AF cL		#2
+AF sGsJpUsS	#3
+AF sSpB		#4
+AF cZsS		#5
+AF sScZs\sE	#6
+AF sA		#7
+AF CaCp		#8
+AF CcCp		#9
+AF sD		#10
+AF sB		#11
+
+COMPOUNDFLAG cZ
+COMPOUNDBEGIN Ca
+COMPOUNDMIDDLE Cb
+COMPOUNDEND Cc
+COMPOUNDPERMITFLAG Cp
+ONLYINCOMPOUND cL
+
+PFX pB Y 1
+PFX pB   0	re	.
+
+PFX pU N 1
+PFX pU   0	un	.
+
+SFX sJ Y 1
+SFX sJ   0	INGS	[^E]
+
+SFX sG Y 1
+SFX sG   0	ING		[^E]
+
+SFX sS Y 1
+SFX sS   0	S	[^SXZHY]
+
+SFX sA Y 1
+SFX sA   Y	IES	[^AEIOU]Y{1}
+
+SFX sB Y 1
+SFX sB   0	ED	K{1}
+
+# Affixes with compound flags
+SFX s\ N 1
+SFX s\   0	Y/2	[^Y]
+
+SFX sE N 1
+SFX sE   0	S/2	[^S]
+
+# Check duplicate affixes
+SFX sD N 1
+SFX sD   0	S/2	[^S]
diff --git a/src/backend/tsearch/dicts/hunspell_sample_long.dict b/src/backend/tsearch/dicts/hunspell_sample_long.dict
new file mode 100644
index 0000000..370c27a
--- /dev/null
+++ b/src/backend/tsearch/dicts/hunspell_sample_long.dict
@@ -0,0 +1,11 @@
+book/3
+book/11
+booking/4
+footballklubber
+foot/5
+football/1
+ball/6
+klubber/1
+sky/7
+ex-/8
+machina/9
diff --git a/src/backend/tsearch/dicts/hunspell_sample_num.affix b/src/backend/tsearch/dicts/hunspell_sample_num.affix
new file mode 100644
index 0000000..0c4766a
--- /dev/null
+++ b/src/backend/tsearch/dicts/hunspell_sample_num.affix
@@ -0,0 +1,33 @@
+FLAG num
+
+COMPOUNDFLAG 101
+ONLYINCOMPOUND 102
+
+PFX 201 Y 1
+PFX 201   0	re	.
+
+PFX 202 N 1
+PFX 202   0	un	.
+
+SFX 301 Y 1
+SFX 301   0	INGS	[^E]
+
+SFX 302 Y 1
+SFX 302   0	ING		[^E]
+
+SFX 303 Y 1
+SFX 303   0	S	[^SXZHY]
+
+# Remove ED suffix from lexeme for base words with K ending
+SFX 306 Y 1
+SFX 306   0	ED	K{1}
+
+# Just add Y to lexeme for base words with Y ending
+SFX 307 Y 1
+SFX 307   Y	0	Y*
+
+SFX 304 Y 1
+SFX 304   Y	IES	[^AEIOU]Y
+
+SFX 305 N 1
+SFX 305   0	Y/102	[^Y]
diff --git a/src/backend/tsearch/dicts/hunspell_sample_num.dict b/src/backend/tsearch/dicts/hunspell_sample_num.dict
new file mode 100644
index 0000000..fbc321d
--- /dev/null
+++ b/src/backend/tsearch/dicts/hunspell_sample_num.dict
@@ -0,0 +1,9 @@
+book/302,301,202,303
+book/306
+booking/303,201
+footballklubber
+foot/101,303
+football/101
+ball/303,101,305
+klubber/101
+sky/304,307
diff --git a/src/backend/tsearch/dicts/ispell_sample.affix b/src/backend/tsearch/dicts/ispell_sample.affix
new file mode 100644
index 0000000..f29004f
--- /dev/null
+++ b/src/backend/tsearch/dicts/ispell_sample.affix
@@ -0,0 +1,26 @@
+compoundwords controlled Z
+
+prefixes
+
+flag *B:
+	.       >   RE      # As in enter > reenter
+
+flag U:
+    .       >   UN      # As in natural > unnatural
+
+suffixes
+
+flag *J:
+	[^E]    >   INGS        # As in cross > crossings
+
+flag *G:
+	[^E]    >   ING     # As in cross > crossing
+
+flag *S:
+	[^SXZHY]    >   S       # As in bat > bats
+
+flag *A:
+	[^AEIOU]Y   >   -Y,IES      # As in imply > implies
+
+flag ~\\:
+	[^Y]        >   Y              #~ advarsel > advarsely-
diff --git a/src/backend/tsearch/dicts/ispell_sample.dict b/src/backend/tsearch/dicts/ispell_sample.dict
new file mode 100644
index 0000000..44df196
--- /dev/null
+++ b/src/backend/tsearch/dicts/ispell_sample.dict
@@ -0,0 +1,8 @@
+book/GJUS
+booking/SB
+footballklubber
+foot/ZS
+football/Z
+ball/SZ\
+klubber/Z
+sky/A
diff --git a/src/backend/tsearch/dicts/synonym_sample.syn b/src/backend/tsearch/dicts/synonym_sample.syn
new file mode 100644
index 0000000..3ecbcf9
--- /dev/null
+++ b/src/backend/tsearch/dicts/synonym_sample.syn
@@ -0,0 +1,5 @@
+postgres	pgsql
+postgresql	pgsql
+postgre	pgsql
+gogle	googl
+indices	index*
diff --git a/src/backend/tsearch/dicts/thesaurus_sample.ths b/src/backend/tsearch/dicts/thesaurus_sample.ths
new file mode 100644
index 0000000..718f54a
--- /dev/null
+++ b/src/backend/tsearch/dicts/thesaurus_sample.ths
@@ -0,0 +1,17 @@
+#
+# Theasurus config file. Character ':' separates string from replacement, eg
+# sample-words : substitute-words
+#
+# Any substitute-word can be marked by preceding '*' character,
+# which means do not lexize this word
+# Docs: http://www.sai.msu.su/~megera/oddmuse/index.cgi/Thesaurus_dictionary
+
+one two three : *123
+one two : *12
+one : *1
+two : *2
+
+supernovae stars : *sn
+supernovae : *sn
+booking tickets : order invitation cards
+booking ? tickets : order invitation Cards
diff --git a/src/backend/tsearch/regis.c b/src/backend/tsearch/regis.c
new file mode 100644
index 0000000..43cab72
--- /dev/null
+++ b/src/backend/tsearch/regis.c
@@ -0,0 +1,257 @@
+/*-------------------------------------------------------------------------
+ *
+ * regis.c
+ *		Fast regex subset
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ *
+ *
+ * IDENTIFICATION
+ *	  src/backend/tsearch/regis.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "tsearch/dicts/regis.h"
+#include "tsearch/ts_locale.h"
+
+#define RS_IN_ONEOF 1
+#define RS_IN_ONEOF_IN	2
+#define RS_IN_NONEOF	3
+#define RS_IN_WAIT	4
+
+
+/*
+ * Test whether a regex is of the subset supported here.
+ * Keep this in sync with RS_compile!
+ */
+bool
+RS_isRegis(const char *str)
+{
+	int			state = RS_IN_WAIT;
+	const char *c = str;
+
+	while (*c)
+	{
+		if (state == RS_IN_WAIT)
+		{
+			if (t_isalpha(c))
+				 /* okay */ ;
+			else if (t_iseq(c, '['))
+				state = RS_IN_ONEOF;
+			else
+				return false;
+		}
+		else if (state == RS_IN_ONEOF)
+		{
+			if (t_iseq(c, '^'))
+				state = RS_IN_NONEOF;
+			else if (t_isalpha(c))
+				state = RS_IN_ONEOF_IN;
+			else
+				return false;
+		}
+		else if (state == RS_IN_ONEOF_IN || state == RS_IN_NONEOF)
+		{
+			if (t_isalpha(c))
+				 /* okay */ ;
+			else if (t_iseq(c, ']'))
+				state = RS_IN_WAIT;
+			else
+				return false;
+		}
+		else
+			elog(ERROR, "internal error in RS_isRegis: state %d", state);
+		c += pg_mblen(c);
+	}
+
+	return (state == RS_IN_WAIT);
+}
+
+static RegisNode *
+newRegisNode(RegisNode *prev, int len)
+{
+	RegisNode  *ptr;
+
+	ptr = (RegisNode *) palloc0(RNHDRSZ + len + 1);
+	if (prev)
+		prev->next = ptr;
+	return ptr;
+}
+
+void
+RS_compile(Regis *r, bool issuffix, const char *str)
+{
+	int			len = strlen(str);
+	int			state = RS_IN_WAIT;
+	const char *c = str;
+	RegisNode  *ptr = NULL;
+
+	memset(r, 0, sizeof(Regis));
+	r->issuffix = (issuffix) ? 1 : 0;
+
+	while (*c)
+	{
+		if (state == RS_IN_WAIT)
+		{
+			if (t_isalpha(c))
+			{
+				if (ptr)
+					ptr = newRegisNode(ptr, len);
+				else
+					ptr = r->node = newRegisNode(NULL, len);
+				COPYCHAR(ptr->data, c);
+				ptr->type = RSF_ONEOF;
+				ptr->len = pg_mblen(c);
+			}
+			else if (t_iseq(c, '['))
+			{
+				if (ptr)
+					ptr = newRegisNode(ptr, len);
+				else
+					ptr = r->node = newRegisNode(NULL, len);
+				ptr->type = RSF_ONEOF;
+				state = RS_IN_ONEOF;
+			}
+			else				/* shouldn't get here */
+				elog(ERROR, "invalid regis pattern: \"%s\"", str);
+		}
+		else if (state == RS_IN_ONEOF)
+		{
+			if (t_iseq(c, '^'))
+			{
+				ptr->type = RSF_NONEOF;
+				state = RS_IN_NONEOF;
+			}
+			else if (t_isalpha(c))
+			{
+				COPYCHAR(ptr->data, c);
+				ptr->len = pg_mblen(c);
+				state = RS_IN_ONEOF_IN;
+			}
+			else				/* shouldn't get here */
+				elog(ERROR, "invalid regis pattern: \"%s\"", str);
+		}
+		else if (state == RS_IN_ONEOF_IN || state == RS_IN_NONEOF)
+		{
+			if (t_isalpha(c))
+			{
+				COPYCHAR(ptr->data + ptr->len, c);
+				ptr->len += pg_mblen(c);
+			}
+			else if (t_iseq(c, ']'))
+				state = RS_IN_WAIT;
+			else				/* shouldn't get here */
+				elog(ERROR, "invalid regis pattern: \"%s\"", str);
+		}
+		else
+			elog(ERROR, "internal error in RS_compile: state %d", state);
+		c += pg_mblen(c);
+	}
+
+	if (state != RS_IN_WAIT)	/* shouldn't get here */
+		elog(ERROR, "invalid regis pattern: \"%s\"", str);
+
+	ptr = r->node;
+	while (ptr)
+	{
+		r->nchar++;
+		ptr = ptr->next;
+	}
+}
+
+void
+RS_free(Regis *r)
+{
+	RegisNode  *ptr = r->node,
+			   *tmp;
+
+	while (ptr)
+	{
+		tmp = ptr->next;
+		pfree(ptr);
+		ptr = tmp;
+	}
+
+	r->node = NULL;
+}
+
+static bool
+mb_strchr(char *str, char *c)
+{
+	int			clen,
+				plen,
+				i;
+	char	   *ptr = str;
+	bool		res = false;
+
+	clen = pg_mblen(c);
+	while (*ptr && !res)
+	{
+		plen = pg_mblen(ptr);
+		if (plen == clen)
+		{
+			i = plen;
+			res = true;
+			while (i--)
+				if (*(ptr + i) != *(c + i))
+				{
+					res = false;
+					break;
+				}
+		}
+
+		ptr += plen;
+	}
+
+	return res;
+}
+
+bool
+RS_execute(Regis *r, char *str)
+{
+	RegisNode  *ptr = r->node;
+	char	   *c = str;
+	int			len = 0;
+
+	while (*c)
+	{
+		len++;
+		c += pg_mblen(c);
+	}
+
+	if (len < r->nchar)
+		return 0;
+
+	c = str;
+	if (r->issuffix)
+	{
+		len -= r->nchar;
+		while (len-- > 0)
+			c += pg_mblen(c);
+	}
+
+
+	while (ptr)
+	{
+		switch (ptr->type)
+		{
+			case RSF_ONEOF:
+				if (!mb_strchr((char *) ptr->data, c))
+					return false;
+				break;
+			case RSF_NONEOF:
+				if (mb_strchr((char *) ptr->data, c))
+					return false;
+				break;
+			default:
+				elog(ERROR, "unrecognized regis node type: %d", ptr->type);
+		}
+		ptr = ptr->next;
+		c += pg_mblen(c);
+	}
+
+	return true;
+}
diff --git a/src/backend/tsearch/spell.c b/src/backend/tsearch/spell.c
new file mode 100644
index 0000000..edd2fbb
--- /dev/null
+++ b/src/backend/tsearch/spell.c
@@ -0,0 +1,2622 @@
+/*-------------------------------------------------------------------------
+ *
+ * spell.c
+ *		Normalizing word with ISpell
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ *
+ * Ispell dictionary
+ * -----------------
+ *
+ * Rules of dictionaries are defined in two files with .affix and .dict
+ * extensions. They are used by spell checker programs Ispell and Hunspell.
+ *
+ * An .affix file declares morphological rules to get a basic form of words.
+ * The format of an .affix file has different structure for Ispell and Hunspell
+ * dictionaries. The Hunspell format is more complicated. But when an .affix
+ * file is imported and compiled, it is stored in the same structure AffixNode.
+ *
+ * A .dict file stores a list of basic forms of words with references to
+ * affix rules. The format of a .dict file has the same structure for Ispell
+ * and Hunspell dictionaries.
+ *
+ * Compilation of a dictionary
+ * ---------------------------
+ *
+ * A compiled dictionary is stored in the IspellDict structure. Compilation of
+ * a dictionary is divided into the several steps:
+ *	- NIImportDictionary() - stores each word of a .dict file in the
+ *	  temporary Spell field.
+ *	- NIImportAffixes() - stores affix rules of an .affix file in the
+ *	  Affix field (not temporary) if an .affix file has the Ispell format.
+ *	  -> NIImportOOAffixes() - stores affix rules if an .affix file has the
+ *		 Hunspell format. The AffixData field is initialized if AF parameter
+ *		 is defined.
+ *	- NISortDictionary() - builds a prefix tree (Trie) from the words list
+ *	  and stores it in the Dictionary field. The words list is got from the
+ *	  Spell field. The AffixData field is initialized if AF parameter is not
+ *	  defined.
+ *	- NISortAffixes():
+ *	  - builds a list of compound affixes from the affix list and stores it
+ *		in the CompoundAffix.
+ *	  - builds prefix trees (Trie) from the affix list for prefixes and suffixes
+ *		and stores them in Suffix and Prefix fields.
+ *	  The affix list is got from the Affix field.
+ *
+ * Memory management
+ * -----------------
+ *
+ * The IspellDict structure has the Spell field which is used only in compile
+ * time. The Spell field stores a words list. It can take a lot of memory.
+ * Therefore when a dictionary is compiled this field is cleared by
+ * NIFinishBuild().
+ *
+ * All resources which should cleared by NIFinishBuild() is initialized using
+ * tmpalloc() and tmpalloc0().
+ *
+ * IDENTIFICATION
+ *	  src/backend/tsearch/spell.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "catalog/pg_collation.h"
+#include "miscadmin.h"
+#include "tsearch/dicts/spell.h"
+#include "tsearch/ts_locale.h"
+#include "utils/memutils.h"
+
+
+/*
+ * Initialization requires a lot of memory that's not needed
+ * after the initialization is done.  During initialization,
+ * CurrentMemoryContext is the long-lived memory context associated
+ * with the dictionary cache entry.  We keep the short-lived stuff
+ * in the Conf->buildCxt context.
+ */
+#define tmpalloc(sz)  MemoryContextAlloc(Conf->buildCxt, (sz))
+#define tmpalloc0(sz)  MemoryContextAllocZero(Conf->buildCxt, (sz))
+
+/*
+ * Prepare for constructing an ISpell dictionary.
+ *
+ * The IspellDict struct is assumed to be zeroed when allocated.
+ */
+void
+NIStartBuild(IspellDict *Conf)
+{
+	/*
+	 * The temp context is a child of CurTransactionContext, so that it will
+	 * go away automatically on error.
+	 */
+	Conf->buildCxt = AllocSetContextCreate(CurTransactionContext,
+										   "Ispell dictionary init context",
+										   ALLOCSET_DEFAULT_SIZES);
+}
+
+/*
+ * Clean up when dictionary construction is complete.
+ */
+void
+NIFinishBuild(IspellDict *Conf)
+{
+	/* Release no-longer-needed temp memory */
+	MemoryContextDelete(Conf->buildCxt);
+	/* Just for cleanliness, zero the now-dangling pointers */
+	Conf->buildCxt = NULL;
+	Conf->Spell = NULL;
+	Conf->firstfree = NULL;
+	Conf->CompoundAffixFlags = NULL;
+}
+
+
+/*
+ * "Compact" palloc: allocate without extra palloc overhead.
+ *
+ * Since we have no need to free the ispell data items individually, there's
+ * not much value in the per-chunk overhead normally consumed by palloc.
+ * Getting rid of it is helpful since ispell can allocate a lot of small nodes.
+ *
+ * We currently pre-zero all data allocated this way, even though some of it
+ * doesn't need that.  The cpalloc and cpalloc0 macros are just documentation
+ * to indicate which allocations actually require zeroing.
+ */
+#define COMPACT_ALLOC_CHUNK 8192	/* amount to get from palloc at once */
+#define COMPACT_MAX_REQ		1024	/* must be < COMPACT_ALLOC_CHUNK */
+
+static void *
+compact_palloc0(IspellDict *Conf, size_t size)
+{
+	void	   *result;
+
+	/* Should only be called during init */
+	Assert(Conf->buildCxt != NULL);
+
+	/* No point in this for large chunks */
+	if (size > COMPACT_MAX_REQ)
+		return palloc0(size);
+
+	/* Keep everything maxaligned */
+	size = MAXALIGN(size);
+
+	/* Need more space? */
+	if (size > Conf->avail)
+	{
+		Conf->firstfree = palloc0(COMPACT_ALLOC_CHUNK);
+		Conf->avail = COMPACT_ALLOC_CHUNK;
+	}
+
+	result = (void *) Conf->firstfree;
+	Conf->firstfree += size;
+	Conf->avail -= size;
+
+	return result;
+}
+
+#define cpalloc(size) compact_palloc0(Conf, size)
+#define cpalloc0(size) compact_palloc0(Conf, size)
+
+static char *
+cpstrdup(IspellDict *Conf, const char *str)
+{
+	char	   *res = cpalloc(strlen(str) + 1);
+
+	strcpy(res, str);
+	return res;
+}
+
+
+/*
+ * Apply lowerstr(), producing a temporary result (in the buildCxt).
+ */
+static char *
+lowerstr_ctx(IspellDict *Conf, const char *src)
+{
+	MemoryContext saveCtx;
+	char	   *dst;
+
+	saveCtx = MemoryContextSwitchTo(Conf->buildCxt);
+	dst = lowerstr(src);
+	MemoryContextSwitchTo(saveCtx);
+
+	return dst;
+}
+
+#define MAX_NORM 1024
+#define MAXNORMLEN 256
+
+#define STRNCMP(s,p)	strncmp( (s), (p), strlen(p) )
+#define GETWCHAR(W,L,N,T) ( ((const uint8*)(W))[ ((T)==FF_PREFIX) ? (N) : ( (L) - 1 - (N) ) ] )
+#define GETCHAR(A,N,T)	  GETWCHAR( (A)->repl, (A)->replen, N, T )
+
+static char *VoidString = "";
+
+static int
+cmpspell(const void *s1, const void *s2)
+{
+	return strcmp((*(SPELL *const *) s1)->word, (*(SPELL *const *) s2)->word);
+}
+
+static int
+cmpspellaffix(const void *s1, const void *s2)
+{
+	return strcmp((*(SPELL *const *) s1)->p.flag,
+				  (*(SPELL *const *) s2)->p.flag);
+}
+
+static int
+cmpcmdflag(const void *f1, const void *f2)
+{
+	CompoundAffixFlag *fv1 = (CompoundAffixFlag *) f1,
+			   *fv2 = (CompoundAffixFlag *) f2;
+
+	Assert(fv1->flagMode == fv2->flagMode);
+
+	if (fv1->flagMode == FM_NUM)
+	{
+		if (fv1->flag.i == fv2->flag.i)
+			return 0;
+
+		return (fv1->flag.i > fv2->flag.i) ? 1 : -1;
+	}
+
+	return strcmp(fv1->flag.s, fv2->flag.s);
+}
+
+static char *
+findchar(char *str, int c)
+{
+	while (*str)
+	{
+		if (t_iseq(str, c))
+			return str;
+		str += pg_mblen(str);
+	}
+
+	return NULL;
+}
+
+static char *
+findchar2(char *str, int c1, int c2)
+{
+	while (*str)
+	{
+		if (t_iseq(str, c1) || t_iseq(str, c2))
+			return str;
+		str += pg_mblen(str);
+	}
+
+	return NULL;
+}
+
+
+/* backward string compare for suffix tree operations */
+static int
+strbcmp(const unsigned char *s1, const unsigned char *s2)
+{
+	int			l1 = strlen((const char *) s1) - 1,
+				l2 = strlen((const char *) s2) - 1;
+
+	while (l1 >= 0 && l2 >= 0)
+	{
+		if (s1[l1] < s2[l2])
+			return -1;
+		if (s1[l1] > s2[l2])
+			return 1;
+		l1--;
+		l2--;
+	}
+	if (l1 < l2)
+		return -1;
+	if (l1 > l2)
+		return 1;
+
+	return 0;
+}
+
+static int
+strbncmp(const unsigned char *s1, const unsigned char *s2, size_t count)
+{
+	int			l1 = strlen((const char *) s1) - 1,
+				l2 = strlen((const char *) s2) - 1,
+				l = count;
+
+	while (l1 >= 0 && l2 >= 0 && l > 0)
+	{
+		if (s1[l1] < s2[l2])
+			return -1;
+		if (s1[l1] > s2[l2])
+			return 1;
+		l1--;
+		l2--;
+		l--;
+	}
+	if (l == 0)
+		return 0;
+	if (l1 < l2)
+		return -1;
+	if (l1 > l2)
+		return 1;
+	return 0;
+}
+
+/*
+ * Compares affixes.
+ * First compares the type of an affix. Prefixes should go before affixes.
+ * If types are equal then compares replaceable string.
+ */
+static int
+cmpaffix(const void *s1, const void *s2)
+{
+	const AFFIX *a1 = (const AFFIX *) s1;
+	const AFFIX *a2 = (const AFFIX *) s2;
+
+	if (a1->type < a2->type)
+		return -1;
+	if (a1->type > a2->type)
+		return 1;
+	if (a1->type == FF_PREFIX)
+		return strcmp(a1->repl, a2->repl);
+	else
+		return strbcmp((const unsigned char *) a1->repl,
+					   (const unsigned char *) a2->repl);
+}
+
+/*
+ * Gets an affix flag from the set of affix flags (sflagset).
+ *
+ * Several flags can be stored in a single string. Flags can be represented by:
+ * - 1 character (FM_CHAR). A character may be Unicode.
+ * - 2 characters (FM_LONG). A character may be Unicode.
+ * - numbers from 1 to 65000 (FM_NUM).
+ *
+ * Depending on the flagMode an affix string can have the following format:
+ * - FM_CHAR: ABCD
+ *	 Here we have 4 flags: A, B, C and D
+ * - FM_LONG: ABCDE*
+ *	 Here we have 3 flags: AB, CD and E*
+ * - FM_NUM: 200,205,50
+ *	 Here we have 3 flags: 200, 205 and 50
+ *
+ * Conf: current dictionary.
+ * sflagset: the set of affix flags. Returns a reference to the start of a next
+ *			 affix flag.
+ * sflag: returns an affix flag from sflagset.
+ */
+static void
+getNextFlagFromString(IspellDict *Conf, char **sflagset, char *sflag)
+{
+	int32		s;
+	char	   *next,
+			   *sbuf = *sflagset;
+	int			maxstep;
+	bool		stop = false;
+	bool		met_comma = false;
+
+	maxstep = (Conf->flagMode == FM_LONG) ? 2 : 1;
+
+	while (**sflagset)
+	{
+		switch (Conf->flagMode)
+		{
+			case FM_LONG:
+			case FM_CHAR:
+				COPYCHAR(sflag, *sflagset);
+				sflag += pg_mblen(*sflagset);
+
+				/* Go to start of the next flag */
+				*sflagset += pg_mblen(*sflagset);
+
+				/* Check if we get all characters of flag */
+				maxstep--;
+				stop = (maxstep == 0);
+				break;
+			case FM_NUM:
+				s = strtol(*sflagset, &next, 10);
+				if (*sflagset == next || errno == ERANGE)
+					ereport(ERROR,
+							(errcode(ERRCODE_CONFIG_FILE_ERROR),
+							 errmsg("invalid affix flag \"%s\"", *sflagset)));
+				if (s < 0 || s > FLAGNUM_MAXSIZE)
+					ereport(ERROR,
+							(errcode(ERRCODE_CONFIG_FILE_ERROR),
+							 errmsg("affix flag \"%s\" is out of range",
+									*sflagset)));
+				sflag += sprintf(sflag, "%0d", s);
+
+				/* Go to start of the next flag */
+				*sflagset = next;
+				while (**sflagset)
+				{
+					if (t_isdigit(*sflagset))
+					{
+						if (!met_comma)
+							ereport(ERROR,
+									(errcode(ERRCODE_CONFIG_FILE_ERROR),
+									 errmsg("invalid affix flag \"%s\"",
+											*sflagset)));
+						break;
+					}
+					else if (t_iseq(*sflagset, ','))
+					{
+						if (met_comma)
+							ereport(ERROR,
+									(errcode(ERRCODE_CONFIG_FILE_ERROR),
+									 errmsg("invalid affix flag \"%s\"",
+											*sflagset)));
+						met_comma = true;
+					}
+					else if (!t_isspace(*sflagset))
+					{
+						ereport(ERROR,
+								(errcode(ERRCODE_CONFIG_FILE_ERROR),
+								 errmsg("invalid character in affix flag \"%s\"",
+										*sflagset)));
+					}
+
+					*sflagset += pg_mblen(*sflagset);
+				}
+				stop = true;
+				break;
+			default:
+				elog(ERROR, "unrecognized type of Conf->flagMode: %d",
+					 Conf->flagMode);
+		}
+
+		if (stop)
+			break;
+	}
+
+	if (Conf->flagMode == FM_LONG && maxstep > 0)
+		ereport(ERROR,
+				(errcode(ERRCODE_CONFIG_FILE_ERROR),
+				 errmsg("invalid affix flag \"%s\" with \"long\" flag value",
+						sbuf)));
+
+	*sflag = '\0';
+}
+
+/*
+ * Checks if the affix set Conf->AffixData[affix] contains affixflag.
+ * Conf->AffixData[affix] does not contain affixflag if this flag is not used
+ * actually by the .dict file.
+ *
+ * Conf: current dictionary.
+ * affix: index of the Conf->AffixData array.
+ * affixflag: the affix flag.
+ *
+ * Returns true if the string Conf->AffixData[affix] contains affixflag,
+ * otherwise returns false.
+ */
+static bool
+IsAffixFlagInUse(IspellDict *Conf, int affix, const char *affixflag)
+{
+	char	   *flagcur;
+	char		flag[BUFSIZ];
+
+	if (*affixflag == 0)
+		return true;
+
+	Assert(affix < Conf->nAffixData);
+
+	flagcur = Conf->AffixData[affix];
+
+	while (*flagcur)
+	{
+		getNextFlagFromString(Conf, &flagcur, flag);
+		/* Compare first affix flag in flagcur with affixflag */
+		if (strcmp(flag, affixflag) == 0)
+			return true;
+	}
+
+	/* Could not find affixflag */
+	return false;
+}
+
+/*
+ * Adds the new word into the temporary array Spell.
+ *
+ * Conf: current dictionary.
+ * word: new word.
+ * flag: set of affix flags. Single flag can be get by getNextFlagFromString().
+ */
+static void
+NIAddSpell(IspellDict *Conf, const char *word, const char *flag)
+{
+	if (Conf->nspell >= Conf->mspell)
+	{
+		if (Conf->mspell)
+		{
+			Conf->mspell *= 2;
+			Conf->Spell = (SPELL **) repalloc(Conf->Spell, Conf->mspell * sizeof(SPELL *));
+		}
+		else
+		{
+			Conf->mspell = 1024 * 20;
+			Conf->Spell = (SPELL **) tmpalloc(Conf->mspell * sizeof(SPELL *));
+		}
+	}
+	Conf->Spell[Conf->nspell] = (SPELL *) tmpalloc(SPELLHDRSZ + strlen(word) + 1);
+	strcpy(Conf->Spell[Conf->nspell]->word, word);
+	Conf->Spell[Conf->nspell]->p.flag = (*flag != '\0')
+		? cpstrdup(Conf, flag) : VoidString;
+	Conf->nspell++;
+}
+
+/*
+ * Imports dictionary into the temporary array Spell.
+ *
+ * Note caller must already have applied get_tsearch_config_filename.
+ *
+ * Conf: current dictionary.
+ * filename: path to the .dict file.
+ */
+void
+NIImportDictionary(IspellDict *Conf, const char *filename)
+{
+	tsearch_readline_state trst;
+	char	   *line;
+
+	if (!tsearch_readline_begin(&trst, filename))
+		ereport(ERROR,
+				(errcode(ERRCODE_CONFIG_FILE_ERROR),
+				 errmsg("could not open dictionary file \"%s\": %m",
+						filename)));
+
+	while ((line = tsearch_readline(&trst)) != NULL)
+	{
+		char	   *s,
+				   *pstr;
+
+		/* Set of affix flags */
+		const char *flag;
+
+		/* Extract flag from the line */
+		flag = NULL;
+		if ((s = findchar(line, '/')))
+		{
+			*s++ = '\0';
+			flag = s;
+			while (*s)
+			{
+				/* we allow only single encoded flags for faster works */
+				if (pg_mblen(s) == 1 && t_isprint(s) && !t_isspace(s))
+					s++;
+				else
+				{
+					*s = '\0';
+					break;
+				}
+			}
+		}
+		else
+			flag = "";
+
+		/* Remove trailing spaces */
+		s = line;
+		while (*s)
+		{
+			if (t_isspace(s))
+			{
+				*s = '\0';
+				break;
+			}
+			s += pg_mblen(s);
+		}
+		pstr = lowerstr_ctx(Conf, line);
+
+		NIAddSpell(Conf, pstr, flag);
+		pfree(pstr);
+
+		pfree(line);
+	}
+	tsearch_readline_end(&trst);
+}
+
+/*
+ * Searches a basic form of word in the prefix tree. This word was generated
+ * using an affix rule. This rule may not be presented in an affix set of
+ * a basic form of word.
+ *
+ * For example, we have the entry in the .dict file:
+ * meter/GMD
+ *
+ * The affix rule with the flag S:
+ * SFX S   y	 ies		[^aeiou]y
+ * is not presented here.
+ *
+ * The affix rule with the flag M:
+ * SFX M   0	 's         .
+ * is presented here.
+ *
+ * Conf: current dictionary.
+ * word: basic form of word.
+ * affixflag: affix flag, by which a basic form of word was generated.
+ * flag: compound flag used to compare with StopMiddle->compoundflag.
+ *
+ * Returns 1 if the word was found in the prefix tree, else returns 0.
+ */
+static int
+FindWord(IspellDict *Conf, const char *word, const char *affixflag, int flag)
+{
+	SPNode	   *node = Conf->Dictionary;
+	SPNodeData *StopLow,
+			   *StopHigh,
+			   *StopMiddle;
+	const uint8 *ptr = (const uint8 *) word;
+
+	flag &= FF_COMPOUNDFLAGMASK;
+
+	while (node && *ptr)
+	{
+		StopLow = node->data;
+		StopHigh = node->data + node->length;
+		while (StopLow < StopHigh)
+		{
+			StopMiddle = StopLow + ((StopHigh - StopLow) >> 1);
+			if (StopMiddle->val == *ptr)
+			{
+				if (*(ptr + 1) == '\0' && StopMiddle->isword)
+				{
+					if (flag == 0)
+					{
+						/*
+						 * The word can be formed only with another word. And
+						 * in the flag parameter there is not a sign that we
+						 * search compound words.
+						 */
+						if (StopMiddle->compoundflag & FF_COMPOUNDONLY)
+							return 0;
+					}
+					else if ((flag & StopMiddle->compoundflag) == 0)
+						return 0;
+
+					/*
+					 * Check if this affix rule is presented in the affix set
+					 * with index StopMiddle->affix.
+					 */
+					if (IsAffixFlagInUse(Conf, StopMiddle->affix, affixflag))
+						return 1;
+				}
+				node = StopMiddle->node;
+				ptr++;
+				break;
+			}
+			else if (StopMiddle->val < *ptr)
+				StopLow = StopMiddle + 1;
+			else
+				StopHigh = StopMiddle;
+		}
+		if (StopLow >= StopHigh)
+			break;
+	}
+	return 0;
+}
+
+/*
+ * Context reset/delete callback for a regular expression used in an affix
+ */
+static void
+regex_affix_deletion_callback(void *arg)
+{
+	aff_regex_struct *pregex = (aff_regex_struct *) arg;
+
+	pg_regfree(&(pregex->regex));
+}
+
+/*
+ * Adds a new affix rule to the Affix field.
+ *
+ * Conf: current dictionary.
+ * flag: affix flag ('\' in the below example).
+ * flagflags: set of flags from the flagval field for this affix rule. This set
+ *			  is listed after '/' character in the added string (repl).
+ *
+ *			  For example L flag in the hunspell_sample.affix:
+ *			  SFX \   0 Y/L [^Y]
+ *
+ * mask: condition for search ('[^Y]' in the above example).
+ * find: stripping characters from beginning (at prefix) or end (at suffix)
+ *		 of the word ('0' in the above example, 0 means that there is not
+ *		 stripping character).
+ * repl: adding string after stripping ('Y' in the above example).
+ * type: FF_SUFFIX or FF_PREFIX.
+ */
+static void
+NIAddAffix(IspellDict *Conf, const char *flag, char flagflags, const char *mask,
+		   const char *find, const char *repl, int type)
+{
+	AFFIX	   *Affix;
+
+	if (Conf->naffixes >= Conf->maffixes)
+	{
+		if (Conf->maffixes)
+		{
+			Conf->maffixes *= 2;
+			Conf->Affix = (AFFIX *) repalloc((void *) Conf->Affix, Conf->maffixes * sizeof(AFFIX));
+		}
+		else
+		{
+			Conf->maffixes = 16;
+			Conf->Affix = (AFFIX *) palloc(Conf->maffixes * sizeof(AFFIX));
+		}
+	}
+
+	Affix = Conf->Affix + Conf->naffixes;
+
+	/* This affix rule can be applied for words with any ending */
+	if (strcmp(mask, ".") == 0 || *mask == '\0')
+	{
+		Affix->issimple = 1;
+		Affix->isregis = 0;
+	}
+	/* This affix rule will use regis to search word ending */
+	else if (RS_isRegis(mask))
+	{
+		Affix->issimple = 0;
+		Affix->isregis = 1;
+		RS_compile(&(Affix->reg.regis), (type == FF_SUFFIX),
+				   *mask ? mask : VoidString);
+	}
+	/* This affix rule will use regex_t to search word ending */
+	else
+	{
+		int			masklen;
+		int			wmasklen;
+		int			err;
+		pg_wchar   *wmask;
+		char	   *tmask;
+		aff_regex_struct *pregex;
+
+		Affix->issimple = 0;
+		Affix->isregis = 0;
+		tmask = (char *) tmpalloc(strlen(mask) + 3);
+		if (type == FF_SUFFIX)
+			sprintf(tmask, "%s$", mask);
+		else
+			sprintf(tmask, "^%s", mask);
+
+		masklen = strlen(tmask);
+		wmask = (pg_wchar *) tmpalloc((masklen + 1) * sizeof(pg_wchar));
+		wmasklen = pg_mb2wchar_with_len(tmask, wmask, masklen);
+
+		/*
+		 * The regex engine stores its stuff using malloc not palloc, so we
+		 * must arrange to explicitly clean up the regex when the dictionary's
+		 * context is cleared.  That means the regex_t has to stay in a fixed
+		 * location within the context; we can't keep it directly in the AFFIX
+		 * struct, since we may sort and resize the array of AFFIXes.
+		 */
+		Affix->reg.pregex = pregex = palloc(sizeof(aff_regex_struct));
+
+		err = pg_regcomp(&(pregex->regex), wmask, wmasklen,
+						 REG_ADVANCED | REG_NOSUB,
+						 DEFAULT_COLLATION_OID);
+		if (err)
+		{
+			char		errstr[100];
+
+			pg_regerror(err, &(pregex->regex), errstr, sizeof(errstr));
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
+					 errmsg("invalid regular expression: %s", errstr)));
+		}
+
+		pregex->mcallback.func = regex_affix_deletion_callback;
+		pregex->mcallback.arg = (void *) pregex;
+		MemoryContextRegisterResetCallback(CurrentMemoryContext,
+										   &pregex->mcallback);
+	}
+
+	Affix->flagflags = flagflags;
+	if ((Affix->flagflags & FF_COMPOUNDONLY) || (Affix->flagflags & FF_COMPOUNDPERMITFLAG))
+	{
+		if ((Affix->flagflags & FF_COMPOUNDFLAG) == 0)
+			Affix->flagflags |= FF_COMPOUNDFLAG;
+	}
+	Affix->flag = cpstrdup(Conf, flag);
+	Affix->type = type;
+
+	Affix->find = (find && *find) ? cpstrdup(Conf, find) : VoidString;
+	if ((Affix->replen = strlen(repl)) > 0)
+		Affix->repl = cpstrdup(Conf, repl);
+	else
+		Affix->repl = VoidString;
+	Conf->naffixes++;
+}
+
+/* Parsing states for parse_affentry() and friends */
+#define PAE_WAIT_MASK	0
+#define PAE_INMASK		1
+#define PAE_WAIT_FIND	2
+#define PAE_INFIND		3
+#define PAE_WAIT_REPL	4
+#define PAE_INREPL		5
+#define PAE_WAIT_TYPE	6
+#define PAE_WAIT_FLAG	7
+
+/*
+ * Parse next space-separated field of an .affix file line.
+ *
+ * *str is the input pointer (will be advanced past field)
+ * next is where to copy the field value to, with null termination
+ *
+ * The buffer at "next" must be of size BUFSIZ; we truncate the input to fit.
+ *
+ * Returns true if we found a field, false if not.
+ */
+static bool
+get_nextfield(char **str, char *next)
+{
+	int			state = PAE_WAIT_MASK;
+	int			avail = BUFSIZ;
+
+	while (**str)
+	{
+		if (state == PAE_WAIT_MASK)
+		{
+			if (t_iseq(*str, '#'))
+				return false;
+			else if (!t_isspace(*str))
+			{
+				int			clen = pg_mblen(*str);
+
+				if (clen < avail)
+				{
+					COPYCHAR(next, *str);
+					next += clen;
+					avail -= clen;
+				}
+				state = PAE_INMASK;
+			}
+		}
+		else					/* state == PAE_INMASK */
+		{
+			if (t_isspace(*str))
+			{
+				*next = '\0';
+				return true;
+			}
+			else
+			{
+				int			clen = pg_mblen(*str);
+
+				if (clen < avail)
+				{
+					COPYCHAR(next, *str);
+					next += clen;
+					avail -= clen;
+				}
+			}
+		}
+		*str += pg_mblen(*str);
+	}
+
+	*next = '\0';
+
+	return (state == PAE_INMASK);	/* OK if we got a nonempty field */
+}
+
+/*
+ * Parses entry of an .affix file of MySpell or Hunspell format.
+ *
+ * An .affix file entry has the following format:
+ * - header
+ *	 <type>  <flag>  <cross_flag>  <flag_count>
+ * - fields after header:
+ *	 <type>  <flag>  <find>  <replace>	<mask>
+ *
+ * str is the input line
+ * field values are returned to type etc, which must be buffers of size BUFSIZ.
+ *
+ * Returns number of fields found; any omitted fields are set to empty strings.
+ */
+static int
+parse_ooaffentry(char *str, char *type, char *flag, char *find,
+				 char *repl, char *mask)
+{
+	int			state = PAE_WAIT_TYPE;
+	int			fields_read = 0;
+	bool		valid = false;
+
+	*type = *flag = *find = *repl = *mask = '\0';
+
+	while (*str)
+	{
+		switch (state)
+		{
+			case PAE_WAIT_TYPE:
+				valid = get_nextfield(&str, type);
+				state = PAE_WAIT_FLAG;
+				break;
+			case PAE_WAIT_FLAG:
+				valid = get_nextfield(&str, flag);
+				state = PAE_WAIT_FIND;
+				break;
+			case PAE_WAIT_FIND:
+				valid = get_nextfield(&str, find);
+				state = PAE_WAIT_REPL;
+				break;
+			case PAE_WAIT_REPL:
+				valid = get_nextfield(&str, repl);
+				state = PAE_WAIT_MASK;
+				break;
+			case PAE_WAIT_MASK:
+				valid = get_nextfield(&str, mask);
+				state = -1;		/* force loop exit */
+				break;
+			default:
+				elog(ERROR, "unrecognized state in parse_ooaffentry: %d",
+					 state);
+				break;
+		}
+		if (valid)
+			fields_read++;
+		else
+			break;				/* early EOL */
+		if (state < 0)
+			break;				/* got all fields */
+	}
+
+	return fields_read;
+}
+
+/*
+ * Parses entry of an .affix file of Ispell format
+ *
+ * An .affix file entry has the following format:
+ * <mask>  >  [-<find>,]<replace>
+ */
+static bool
+parse_affentry(char *str, char *mask, char *find, char *repl)
+{
+	int			state = PAE_WAIT_MASK;
+	char	   *pmask = mask,
+			   *pfind = find,
+			   *prepl = repl;
+
+	*mask = *find = *repl = '\0';
+
+	while (*str)
+	{
+		if (state == PAE_WAIT_MASK)
+		{
+			if (t_iseq(str, '#'))
+				return false;
+			else if (!t_isspace(str))
+			{
+				COPYCHAR(pmask, str);
+				pmask += pg_mblen(str);
+				state = PAE_INMASK;
+			}
+		}
+		else if (state == PAE_INMASK)
+		{
+			if (t_iseq(str, '>'))
+			{
+				*pmask = '\0';
+				state = PAE_WAIT_FIND;
+			}
+			else if (!t_isspace(str))
+			{
+				COPYCHAR(pmask, str);
+				pmask += pg_mblen(str);
+			}
+		}
+		else if (state == PAE_WAIT_FIND)
+		{
+			if (t_iseq(str, '-'))
+			{
+				state = PAE_INFIND;
+			}
+			else if (t_isalpha(str) || t_iseq(str, '\'') /* english 's */ )
+			{
+				COPYCHAR(prepl, str);
+				prepl += pg_mblen(str);
+				state = PAE_INREPL;
+			}
+			else if (!t_isspace(str))
+				ereport(ERROR,
+						(errcode(ERRCODE_CONFIG_FILE_ERROR),
+						 errmsg("syntax error")));
+		}
+		else if (state == PAE_INFIND)
+		{
+			if (t_iseq(str, ','))
+			{
+				*pfind = '\0';
+				state = PAE_WAIT_REPL;
+			}
+			else if (t_isalpha(str))
+			{
+				COPYCHAR(pfind, str);
+				pfind += pg_mblen(str);
+			}
+			else if (!t_isspace(str))
+				ereport(ERROR,
+						(errcode(ERRCODE_CONFIG_FILE_ERROR),
+						 errmsg("syntax error")));
+		}
+		else if (state == PAE_WAIT_REPL)
+		{
+			if (t_iseq(str, '-'))
+			{
+				break;			/* void repl */
+			}
+			else if (t_isalpha(str))
+			{
+				COPYCHAR(prepl, str);
+				prepl += pg_mblen(str);
+				state = PAE_INREPL;
+			}
+			else if (!t_isspace(str))
+				ereport(ERROR,
+						(errcode(ERRCODE_CONFIG_FILE_ERROR),
+						 errmsg("syntax error")));
+		}
+		else if (state == PAE_INREPL)
+		{
+			if (t_iseq(str, '#'))
+			{
+				*prepl = '\0';
+				break;
+			}
+			else if (t_isalpha(str))
+			{
+				COPYCHAR(prepl, str);
+				prepl += pg_mblen(str);
+			}
+			else if (!t_isspace(str))
+				ereport(ERROR,
+						(errcode(ERRCODE_CONFIG_FILE_ERROR),
+						 errmsg("syntax error")));
+		}
+		else
+			elog(ERROR, "unrecognized state in parse_affentry: %d", state);
+
+		str += pg_mblen(str);
+	}
+
+	*pmask = *pfind = *prepl = '\0';
+
+	return (*mask && (*find || *repl));
+}
+
+/*
+ * Sets a Hunspell options depending on flag type.
+ */
+static void
+setCompoundAffixFlagValue(IspellDict *Conf, CompoundAffixFlag *entry,
+						  char *s, uint32 val)
+{
+	if (Conf->flagMode == FM_NUM)
+	{
+		char	   *next;
+		int			i;
+
+		i = strtol(s, &next, 10);
+		if (s == next || errno == ERANGE)
+			ereport(ERROR,
+					(errcode(ERRCODE_CONFIG_FILE_ERROR),
+					 errmsg("invalid affix flag \"%s\"", s)));
+		if (i < 0 || i > FLAGNUM_MAXSIZE)
+			ereport(ERROR,
+					(errcode(ERRCODE_CONFIG_FILE_ERROR),
+					 errmsg("affix flag \"%s\" is out of range", s)));
+
+		entry->flag.i = i;
+	}
+	else
+		entry->flag.s = cpstrdup(Conf, s);
+
+	entry->flagMode = Conf->flagMode;
+	entry->value = val;
+}
+
+/*
+ * Sets up a correspondence for the affix parameter with the affix flag.
+ *
+ * Conf: current dictionary.
+ * s: affix flag in string.
+ * val: affix parameter.
+ */
+static void
+addCompoundAffixFlagValue(IspellDict *Conf, char *s, uint32 val)
+{
+	CompoundAffixFlag *newValue;
+	char		sbuf[BUFSIZ];
+	char	   *sflag;
+	int			clen;
+
+	while (*s && t_isspace(s))
+		s += pg_mblen(s);
+
+	if (!*s)
+		ereport(ERROR,
+				(errcode(ERRCODE_CONFIG_FILE_ERROR),
+				 errmsg("syntax error")));
+
+	/* Get flag without \n */
+	sflag = sbuf;
+	while (*s && !t_isspace(s) && *s != '\n')
+	{
+		clen = pg_mblen(s);
+		COPYCHAR(sflag, s);
+		sflag += clen;
+		s += clen;
+	}
+	*sflag = '\0';
+
+	/* Resize array or allocate memory for array CompoundAffixFlag */
+	if (Conf->nCompoundAffixFlag >= Conf->mCompoundAffixFlag)
+	{
+		if (Conf->mCompoundAffixFlag)
+		{
+			Conf->mCompoundAffixFlag *= 2;
+			Conf->CompoundAffixFlags = (CompoundAffixFlag *)
+				repalloc((void *) Conf->CompoundAffixFlags,
+						 Conf->mCompoundAffixFlag * sizeof(CompoundAffixFlag));
+		}
+		else
+		{
+			Conf->mCompoundAffixFlag = 10;
+			Conf->CompoundAffixFlags = (CompoundAffixFlag *)
+				tmpalloc(Conf->mCompoundAffixFlag * sizeof(CompoundAffixFlag));
+		}
+	}
+
+	newValue = Conf->CompoundAffixFlags + Conf->nCompoundAffixFlag;
+
+	setCompoundAffixFlagValue(Conf, newValue, sbuf, val);
+
+	Conf->usecompound = true;
+	Conf->nCompoundAffixFlag++;
+}
+
+/*
+ * Returns a set of affix parameters which correspondence to the set of affix
+ * flags s.
+ */
+static int
+getCompoundAffixFlagValue(IspellDict *Conf, char *s)
+{
+	uint32		flag = 0;
+	CompoundAffixFlag *found,
+				key;
+	char		sflag[BUFSIZ];
+	char	   *flagcur;
+
+	if (Conf->nCompoundAffixFlag == 0)
+		return 0;
+
+	flagcur = s;
+	while (*flagcur)
+	{
+		getNextFlagFromString(Conf, &flagcur, sflag);
+		setCompoundAffixFlagValue(Conf, &key, sflag, 0);
+
+		found = (CompoundAffixFlag *)
+			bsearch(&key, (void *) Conf->CompoundAffixFlags,
+					Conf->nCompoundAffixFlag, sizeof(CompoundAffixFlag),
+					cmpcmdflag);
+		if (found != NULL)
+			flag |= found->value;
+	}
+
+	return flag;
+}
+
+/*
+ * Returns a flag set using the s parameter.
+ *
+ * If Conf->useFlagAliases is true then the s parameter is index of the
+ * Conf->AffixData array and function returns its entry.
+ * Else function returns the s parameter.
+ */
+static char *
+getAffixFlagSet(IspellDict *Conf, char *s)
+{
+	if (Conf->useFlagAliases && *s != '\0')
+	{
+		int			curaffix;
+		char	   *end;
+
+		curaffix = strtol(s, &end, 10);
+		if (s == end || errno == ERANGE)
+			ereport(ERROR,
+					(errcode(ERRCODE_CONFIG_FILE_ERROR),
+					 errmsg("invalid affix alias \"%s\"", s)));
+
+		if (curaffix > 0 && curaffix < Conf->nAffixData)
+
+			/*
+			 * Do not subtract 1 from curaffix because empty string was added
+			 * in NIImportOOAffixes
+			 */
+			return Conf->AffixData[curaffix];
+		else if (curaffix > Conf->nAffixData)
+			ereport(ERROR,
+					(errcode(ERRCODE_CONFIG_FILE_ERROR),
+					 errmsg("invalid affix alias \"%s\"", s)));
+		return VoidString;
+	}
+	else
+		return s;
+}
+
+/*
+ * Import an affix file that follows MySpell or Hunspell format.
+ *
+ * Conf: current dictionary.
+ * filename: path to the .affix file.
+ */
+static void
+NIImportOOAffixes(IspellDict *Conf, const char *filename)
+{
+	char		type[BUFSIZ],
+			   *ptype = NULL;
+	char		sflag[BUFSIZ];
+	char		mask[BUFSIZ],
+			   *pmask;
+	char		find[BUFSIZ],
+			   *pfind;
+	char		repl[BUFSIZ],
+			   *prepl;
+	bool		isSuffix = false;
+	int			naffix = 0,
+				curaffix = 0;
+	int			sflaglen = 0;
+	char		flagflags = 0;
+	tsearch_readline_state trst;
+	char	   *recoded;
+
+	/* read file to find any flag */
+	Conf->usecompound = false;
+	Conf->useFlagAliases = false;
+	Conf->flagMode = FM_CHAR;
+
+	if (!tsearch_readline_begin(&trst, filename))
+		ereport(ERROR,
+				(errcode(ERRCODE_CONFIG_FILE_ERROR),
+				 errmsg("could not open affix file \"%s\": %m",
+						filename)));
+
+	while ((recoded = tsearch_readline(&trst)) != NULL)
+	{
+		if (*recoded == '\0' || t_isspace(recoded) || t_iseq(recoded, '#'))
+		{
+			pfree(recoded);
+			continue;
+		}
+
+		if (STRNCMP(recoded, "COMPOUNDFLAG") == 0)
+			addCompoundAffixFlagValue(Conf, recoded + strlen("COMPOUNDFLAG"),
+									  FF_COMPOUNDFLAG);
+		else if (STRNCMP(recoded, "COMPOUNDBEGIN") == 0)
+			addCompoundAffixFlagValue(Conf, recoded + strlen("COMPOUNDBEGIN"),
+									  FF_COMPOUNDBEGIN);
+		else if (STRNCMP(recoded, "COMPOUNDLAST") == 0)
+			addCompoundAffixFlagValue(Conf, recoded + strlen("COMPOUNDLAST"),
+									  FF_COMPOUNDLAST);
+		/* COMPOUNDLAST and COMPOUNDEND are synonyms */
+		else if (STRNCMP(recoded, "COMPOUNDEND") == 0)
+			addCompoundAffixFlagValue(Conf, recoded + strlen("COMPOUNDEND"),
+									  FF_COMPOUNDLAST);
+		else if (STRNCMP(recoded, "COMPOUNDMIDDLE") == 0)
+			addCompoundAffixFlagValue(Conf, recoded + strlen("COMPOUNDMIDDLE"),
+									  FF_COMPOUNDMIDDLE);
+		else if (STRNCMP(recoded, "ONLYINCOMPOUND") == 0)
+			addCompoundAffixFlagValue(Conf, recoded + strlen("ONLYINCOMPOUND"),
+									  FF_COMPOUNDONLY);
+		else if (STRNCMP(recoded, "COMPOUNDPERMITFLAG") == 0)
+			addCompoundAffixFlagValue(Conf,
+									  recoded + strlen("COMPOUNDPERMITFLAG"),
+									  FF_COMPOUNDPERMITFLAG);
+		else if (STRNCMP(recoded, "COMPOUNDFORBIDFLAG") == 0)
+			addCompoundAffixFlagValue(Conf,
+									  recoded + strlen("COMPOUNDFORBIDFLAG"),
+									  FF_COMPOUNDFORBIDFLAG);
+		else if (STRNCMP(recoded, "FLAG") == 0)
+		{
+			char	   *s = recoded + strlen("FLAG");
+
+			while (*s && t_isspace(s))
+				s += pg_mblen(s);
+
+			if (*s)
+			{
+				if (STRNCMP(s, "long") == 0)
+					Conf->flagMode = FM_LONG;
+				else if (STRNCMP(s, "num") == 0)
+					Conf->flagMode = FM_NUM;
+				else if (STRNCMP(s, "default") != 0)
+					ereport(ERROR,
+							(errcode(ERRCODE_CONFIG_FILE_ERROR),
+							 errmsg("Ispell dictionary supports only "
+									"\"default\", \"long\", "
+									"and \"num\" flag values")));
+			}
+		}
+
+		pfree(recoded);
+	}
+	tsearch_readline_end(&trst);
+
+	if (Conf->nCompoundAffixFlag > 1)
+		qsort((void *) Conf->CompoundAffixFlags, Conf->nCompoundAffixFlag,
+			  sizeof(CompoundAffixFlag), cmpcmdflag);
+
+	if (!tsearch_readline_begin(&trst, filename))
+		ereport(ERROR,
+				(errcode(ERRCODE_CONFIG_FILE_ERROR),
+				 errmsg("could not open affix file \"%s\": %m",
+						filename)));
+
+	while ((recoded = tsearch_readline(&trst)) != NULL)
+	{
+		int			fields_read;
+
+		if (*recoded == '\0' || t_isspace(recoded) || t_iseq(recoded, '#'))
+			goto nextline;
+
+		fields_read = parse_ooaffentry(recoded, type, sflag, find, repl, mask);
+
+		if (ptype)
+			pfree(ptype);
+		ptype = lowerstr_ctx(Conf, type);
+
+		/* First try to parse AF parameter (alias compression) */
+		if (STRNCMP(ptype, "af") == 0)
+		{
+			/* First line is the number of aliases */
+			if (!Conf->useFlagAliases)
+			{
+				Conf->useFlagAliases = true;
+				naffix = atoi(sflag);
+				if (naffix <= 0)
+					ereport(ERROR,
+							(errcode(ERRCODE_CONFIG_FILE_ERROR),
+							 errmsg("invalid number of flag vector aliases")));
+
+				/* Also reserve place for empty flag set */
+				naffix++;
+
+				Conf->AffixData = (char **) palloc0(naffix * sizeof(char *));
+				Conf->lenAffixData = Conf->nAffixData = naffix;
+
+				/* Add empty flag set into AffixData */
+				Conf->AffixData[curaffix] = VoidString;
+				curaffix++;
+			}
+			/* Other lines are aliases */
+			else
+			{
+				if (curaffix < naffix)
+				{
+					Conf->AffixData[curaffix] = cpstrdup(Conf, sflag);
+					curaffix++;
+				}
+				else
+					ereport(ERROR,
+							(errcode(ERRCODE_CONFIG_FILE_ERROR),
+							 errmsg("number of aliases exceeds specified number %d",
+									naffix - 1)));
+			}
+			goto nextline;
+		}
+		/* Else try to parse prefixes and suffixes */
+		if (fields_read < 4 ||
+			(STRNCMP(ptype, "sfx") != 0 && STRNCMP(ptype, "pfx") != 0))
+			goto nextline;
+
+		sflaglen = strlen(sflag);
+		if (sflaglen == 0
+			|| (sflaglen > 1 && Conf->flagMode == FM_CHAR)
+			|| (sflaglen > 2 && Conf->flagMode == FM_LONG))
+			goto nextline;
+
+		/*--------
+		 * Affix header. For example:
+		 * SFX \ N 1
+		 *--------
+		 */
+		if (fields_read == 4)
+		{
+			isSuffix = (STRNCMP(ptype, "sfx") == 0);
+			if (t_iseq(find, 'y') || t_iseq(find, 'Y'))
+				flagflags = FF_CROSSPRODUCT;
+			else
+				flagflags = 0;
+		}
+		/*--------
+		 * Affix fields. For example:
+		 * SFX \   0	Y/L [^Y]
+		 *--------
+		 */
+		else
+		{
+			char	   *ptr;
+			int			aflg = 0;
+
+			/* Get flags after '/' (flags are case sensitive) */
+			if ((ptr = strchr(repl, '/')) != NULL)
+				aflg |= getCompoundAffixFlagValue(Conf,
+												  getAffixFlagSet(Conf,
+																  ptr + 1));
+			/* Get lowercased version of string before '/' */
+			prepl = lowerstr_ctx(Conf, repl);
+			if ((ptr = strchr(prepl, '/')) != NULL)
+				*ptr = '\0';
+			pfind = lowerstr_ctx(Conf, find);
+			pmask = lowerstr_ctx(Conf, mask);
+			if (t_iseq(find, '0'))
+				*pfind = '\0';
+			if (t_iseq(repl, '0'))
+				*prepl = '\0';
+
+			NIAddAffix(Conf, sflag, flagflags | aflg, pmask, pfind, prepl,
+					   isSuffix ? FF_SUFFIX : FF_PREFIX);
+			pfree(prepl);
+			pfree(pfind);
+			pfree(pmask);
+		}
+
+nextline:
+		pfree(recoded);
+	}
+
+	tsearch_readline_end(&trst);
+	if (ptype)
+		pfree(ptype);
+}
+
+/*
+ * import affixes
+ *
+ * Note caller must already have applied get_tsearch_config_filename
+ *
+ * This function is responsible for parsing ispell ("old format") affix files.
+ * If we realize that the file contains new-format commands, we pass off the
+ * work to NIImportOOAffixes(), which will re-read the whole file.
+ */
+void
+NIImportAffixes(IspellDict *Conf, const char *filename)
+{
+	char	   *pstr = NULL;
+	char		flag[BUFSIZ];
+	char		mask[BUFSIZ];
+	char		find[BUFSIZ];
+	char		repl[BUFSIZ];
+	char	   *s;
+	bool		suffixes = false;
+	bool		prefixes = false;
+	char		flagflags = 0;
+	tsearch_readline_state trst;
+	bool		oldformat = false;
+	char	   *recoded = NULL;
+
+	if (!tsearch_readline_begin(&trst, filename))
+		ereport(ERROR,
+				(errcode(ERRCODE_CONFIG_FILE_ERROR),
+				 errmsg("could not open affix file \"%s\": %m",
+						filename)));
+
+	Conf->usecompound = false;
+	Conf->useFlagAliases = false;
+	Conf->flagMode = FM_CHAR;
+
+	while ((recoded = tsearch_readline(&trst)) != NULL)
+	{
+		pstr = lowerstr(recoded);
+
+		/* Skip comments and empty lines */
+		if (*pstr == '#' || *pstr == '\n')
+			goto nextline;
+
+		if (STRNCMP(pstr, "compoundwords") == 0)
+		{
+			/* Find case-insensitive L flag in non-lowercased string */
+			s = findchar2(recoded, 'l', 'L');
+			if (s)
+			{
+				while (*s && !t_isspace(s))
+					s += pg_mblen(s);
+				while (*s && t_isspace(s))
+					s += pg_mblen(s);
+
+				if (*s && pg_mblen(s) == 1)
+				{
+					addCompoundAffixFlagValue(Conf, s, FF_COMPOUNDFLAG);
+					Conf->usecompound = true;
+				}
+				oldformat = true;
+				goto nextline;
+			}
+		}
+		if (STRNCMP(pstr, "suffixes") == 0)
+		{
+			suffixes = true;
+			prefixes = false;
+			oldformat = true;
+			goto nextline;
+		}
+		if (STRNCMP(pstr, "prefixes") == 0)
+		{
+			suffixes = false;
+			prefixes = true;
+			oldformat = true;
+			goto nextline;
+		}
+		if (STRNCMP(pstr, "flag") == 0)
+		{
+			s = recoded + 4;	/* we need non-lowercased string */
+			flagflags = 0;
+
+			while (*s && t_isspace(s))
+				s += pg_mblen(s);
+
+			if (*s == '*')
+			{
+				flagflags |= FF_CROSSPRODUCT;
+				s++;
+			}
+			else if (*s == '~')
+			{
+				flagflags |= FF_COMPOUNDONLY;
+				s++;
+			}
+
+			if (*s == '\\')
+				s++;
+
+			/*
+			 * An old-format flag is a single ASCII character; we expect it to
+			 * be followed by EOL, whitespace, or ':'.  Otherwise this is a
+			 * new-format flag command.
+			 */
+			if (*s && pg_mblen(s) == 1)
+			{
+				COPYCHAR(flag, s);
+				flag[1] = '\0';
+
+				s++;
+				if (*s == '\0' || *s == '#' || *s == '\n' || *s == ':' ||
+					t_isspace(s))
+				{
+					oldformat = true;
+					goto nextline;
+				}
+			}
+			goto isnewformat;
+		}
+		if (STRNCMP(recoded, "COMPOUNDFLAG") == 0 ||
+			STRNCMP(recoded, "COMPOUNDMIN") == 0 ||
+			STRNCMP(recoded, "PFX") == 0 ||
+			STRNCMP(recoded, "SFX") == 0)
+			goto isnewformat;
+
+		if ((!suffixes) && (!prefixes))
+			goto nextline;
+
+		if (!parse_affentry(pstr, mask, find, repl))
+			goto nextline;
+
+		NIAddAffix(Conf, flag, flagflags, mask, find, repl, suffixes ? FF_SUFFIX : FF_PREFIX);
+
+nextline:
+		pfree(recoded);
+		pfree(pstr);
+	}
+	tsearch_readline_end(&trst);
+	return;
+
+isnewformat:
+	if (oldformat)
+		ereport(ERROR,
+				(errcode(ERRCODE_CONFIG_FILE_ERROR),
+				 errmsg("affix file contains both old-style and new-style commands")));
+	tsearch_readline_end(&trst);
+
+	NIImportOOAffixes(Conf, filename);
+}
+
+/*
+ * Merges two affix flag sets and stores a new affix flag set into
+ * Conf->AffixData.
+ *
+ * Returns index of a new affix flag set.
+ */
+static int
+MergeAffix(IspellDict *Conf, int a1, int a2)
+{
+	char	  **ptr;
+
+	Assert(a1 < Conf->nAffixData && a2 < Conf->nAffixData);
+
+	/* Do not merge affix flags if one of affix flags is empty */
+	if (*Conf->AffixData[a1] == '\0')
+		return a2;
+	else if (*Conf->AffixData[a2] == '\0')
+		return a1;
+
+	/* Double the size of AffixData if there's not enough space */
+	if (Conf->nAffixData + 1 >= Conf->lenAffixData)
+	{
+		Conf->lenAffixData *= 2;
+		Conf->AffixData = (char **) repalloc(Conf->AffixData,
+											 sizeof(char *) * Conf->lenAffixData);
+	}
+
+	ptr = Conf->AffixData + Conf->nAffixData;
+	if (Conf->flagMode == FM_NUM)
+	{
+		*ptr = cpalloc(strlen(Conf->AffixData[a1]) +
+					   strlen(Conf->AffixData[a2]) +
+					   1 /* comma */ + 1 /* \0 */ );
+		sprintf(*ptr, "%s,%s", Conf->AffixData[a1], Conf->AffixData[a2]);
+	}
+	else
+	{
+		*ptr = cpalloc(strlen(Conf->AffixData[a1]) +
+					   strlen(Conf->AffixData[a2]) +
+					   1 /* \0 */ );
+		sprintf(*ptr, "%s%s", Conf->AffixData[a1], Conf->AffixData[a2]);
+	}
+	ptr++;
+	*ptr = NULL;
+	Conf->nAffixData++;
+
+	return Conf->nAffixData - 1;
+}
+
+/*
+ * Returns a set of affix parameters which correspondence to the set of affix
+ * flags with the given index.
+ */
+static uint32
+makeCompoundFlags(IspellDict *Conf, int affix)
+{
+	Assert(affix < Conf->nAffixData);
+
+	return (getCompoundAffixFlagValue(Conf, Conf->AffixData[affix]) &
+			FF_COMPOUNDFLAGMASK);
+}
+
+/*
+ * Makes a prefix tree for the given level.
+ *
+ * Conf: current dictionary.
+ * low: lower index of the Conf->Spell array.
+ * high: upper index of the Conf->Spell array.
+ * level: current prefix tree level.
+ */
+static SPNode *
+mkSPNode(IspellDict *Conf, int low, int high, int level)
+{
+	int			i;
+	int			nchar = 0;
+	char		lastchar = '\0';
+	SPNode	   *rs;
+	SPNodeData *data;
+	int			lownew = low;
+
+	for (i = low; i < high; i++)
+		if (Conf->Spell[i]->p.d.len > level && lastchar != Conf->Spell[i]->word[level])
+		{
+			nchar++;
+			lastchar = Conf->Spell[i]->word[level];
+		}
+
+	if (!nchar)
+		return NULL;
+
+	rs = (SPNode *) cpalloc0(SPNHDRSZ + nchar * sizeof(SPNodeData));
+	rs->length = nchar;
+	data = rs->data;
+
+	lastchar = '\0';
+	for (i = low; i < high; i++)
+		if (Conf->Spell[i]->p.d.len > level)
+		{
+			if (lastchar != Conf->Spell[i]->word[level])
+			{
+				if (lastchar)
+				{
+					/* Next level of the prefix tree */
+					data->node = mkSPNode(Conf, lownew, i, level + 1);
+					lownew = i;
+					data++;
+				}
+				lastchar = Conf->Spell[i]->word[level];
+			}
+			data->val = ((uint8 *) (Conf->Spell[i]->word))[level];
+			if (Conf->Spell[i]->p.d.len == level + 1)
+			{
+				bool		clearCompoundOnly = false;
+
+				if (data->isword && data->affix != Conf->Spell[i]->p.d.affix)
+				{
+					/*
+					 * MergeAffix called a few times. If one of word is
+					 * allowed to be in compound word and another isn't, then
+					 * clear FF_COMPOUNDONLY flag.
+					 */
+
+					clearCompoundOnly = (FF_COMPOUNDONLY & data->compoundflag
+										 & makeCompoundFlags(Conf, Conf->Spell[i]->p.d.affix))
+						? false : true;
+					data->affix = MergeAffix(Conf, data->affix, Conf->Spell[i]->p.d.affix);
+				}
+				else
+					data->affix = Conf->Spell[i]->p.d.affix;
+				data->isword = 1;
+
+				data->compoundflag = makeCompoundFlags(Conf, data->affix);
+
+				if ((data->compoundflag & FF_COMPOUNDONLY) &&
+					(data->compoundflag & FF_COMPOUNDFLAG) == 0)
+					data->compoundflag |= FF_COMPOUNDFLAG;
+
+				if (clearCompoundOnly)
+					data->compoundflag &= ~FF_COMPOUNDONLY;
+			}
+		}
+
+	/* Next level of the prefix tree */
+	data->node = mkSPNode(Conf, lownew, high, level + 1);
+
+	return rs;
+}
+
+/*
+ * Builds the Conf->Dictionary tree and AffixData from the imported dictionary
+ * and affixes.
+ */
+void
+NISortDictionary(IspellDict *Conf)
+{
+	int			i;
+	int			naffix;
+	int			curaffix;
+
+	/* compress affixes */
+
+	/*
+	 * If we use flag aliases then we need to use Conf->AffixData filled in
+	 * the NIImportOOAffixes().
+	 */
+	if (Conf->useFlagAliases)
+	{
+		for (i = 0; i < Conf->nspell; i++)
+		{
+			char	   *end;
+
+			if (*Conf->Spell[i]->p.flag != '\0')
+			{
+				curaffix = strtol(Conf->Spell[i]->p.flag, &end, 10);
+				if (Conf->Spell[i]->p.flag == end || errno == ERANGE)
+					ereport(ERROR,
+							(errcode(ERRCODE_CONFIG_FILE_ERROR),
+							 errmsg("invalid affix alias \"%s\"",
+									Conf->Spell[i]->p.flag)));
+				if (curaffix < 0 || curaffix >= Conf->nAffixData)
+					ereport(ERROR,
+							(errcode(ERRCODE_CONFIG_FILE_ERROR),
+							 errmsg("invalid affix alias \"%s\"",
+									Conf->Spell[i]->p.flag)));
+				if (*end != '\0' && !t_isdigit(end) && !t_isspace(end))
+					ereport(ERROR,
+							(errcode(ERRCODE_CONFIG_FILE_ERROR),
+							 errmsg("invalid affix alias \"%s\"",
+									Conf->Spell[i]->p.flag)));
+			}
+			else
+			{
+				/*
+				 * If Conf->Spell[i]->p.flag is empty, then get empty value of
+				 * Conf->AffixData (0 index).
+				 */
+				curaffix = 0;
+			}
+
+			Conf->Spell[i]->p.d.affix = curaffix;
+			Conf->Spell[i]->p.d.len = strlen(Conf->Spell[i]->word);
+		}
+	}
+	/* Otherwise fill Conf->AffixData here */
+	else
+	{
+		/* Count the number of different flags used in the dictionary */
+		qsort((void *) Conf->Spell, Conf->nspell, sizeof(SPELL *),
+			  cmpspellaffix);
+
+		naffix = 0;
+		for (i = 0; i < Conf->nspell; i++)
+		{
+			if (i == 0 ||
+				strcmp(Conf->Spell[i]->p.flag, Conf->Spell[i - 1]->p.flag) != 0)
+				naffix++;
+		}
+
+		/*
+		 * Fill in Conf->AffixData with the affixes that were used in the
+		 * dictionary. Replace textual flag-field of Conf->Spell entries with
+		 * indexes into Conf->AffixData array.
+		 */
+		Conf->AffixData = (char **) palloc0(naffix * sizeof(char *));
+
+		curaffix = -1;
+		for (i = 0; i < Conf->nspell; i++)
+		{
+			if (i == 0 ||
+				strcmp(Conf->Spell[i]->p.flag, Conf->AffixData[curaffix]) != 0)
+			{
+				curaffix++;
+				Assert(curaffix < naffix);
+				Conf->AffixData[curaffix] = cpstrdup(Conf,
+													 Conf->Spell[i]->p.flag);
+			}
+
+			Conf->Spell[i]->p.d.affix = curaffix;
+			Conf->Spell[i]->p.d.len = strlen(Conf->Spell[i]->word);
+		}
+
+		Conf->lenAffixData = Conf->nAffixData = naffix;
+	}
+
+	/* Start build a prefix tree */
+	qsort((void *) Conf->Spell, Conf->nspell, sizeof(SPELL *), cmpspell);
+	Conf->Dictionary = mkSPNode(Conf, 0, Conf->nspell, 0);
+}
+
+/*
+ * Makes a prefix tree for the given level using the repl string of an affix
+ * rule. Affixes with empty replace string do not include in the prefix tree.
+ * This affixes are included by mkVoidAffix().
+ *
+ * Conf: current dictionary.
+ * low: lower index of the Conf->Affix array.
+ * high: upper index of the Conf->Affix array.
+ * level: current prefix tree level.
+ * type: FF_SUFFIX or FF_PREFIX.
+ */
+static AffixNode *
+mkANode(IspellDict *Conf, int low, int high, int level, int type)
+{
+	int			i;
+	int			nchar = 0;
+	uint8		lastchar = '\0';
+	AffixNode  *rs;
+	AffixNodeData *data;
+	int			lownew = low;
+	int			naff;
+	AFFIX	  **aff;
+
+	for (i = low; i < high; i++)
+		if (Conf->Affix[i].replen > level && lastchar != GETCHAR(Conf->Affix + i, level, type))
+		{
+			nchar++;
+			lastchar = GETCHAR(Conf->Affix + i, level, type);
+		}
+
+	if (!nchar)
+		return NULL;
+
+	aff = (AFFIX **) tmpalloc(sizeof(AFFIX *) * (high - low + 1));
+	naff = 0;
+
+	rs = (AffixNode *) cpalloc0(ANHRDSZ + nchar * sizeof(AffixNodeData));
+	rs->length = nchar;
+	data = rs->data;
+
+	lastchar = '\0';
+	for (i = low; i < high; i++)
+		if (Conf->Affix[i].replen > level)
+		{
+			if (lastchar != GETCHAR(Conf->Affix + i, level, type))
+			{
+				if (lastchar)
+				{
+					/* Next level of the prefix tree */
+					data->node = mkANode(Conf, lownew, i, level + 1, type);
+					if (naff)
+					{
+						data->naff = naff;
+						data->aff = (AFFIX **) cpalloc(sizeof(AFFIX *) * naff);
+						memcpy(data->aff, aff, sizeof(AFFIX *) * naff);
+						naff = 0;
+					}
+					data++;
+					lownew = i;
+				}
+				lastchar = GETCHAR(Conf->Affix + i, level, type);
+			}
+			data->val = GETCHAR(Conf->Affix + i, level, type);
+			if (Conf->Affix[i].replen == level + 1)
+			{					/* affix stopped */
+				aff[naff++] = Conf->Affix + i;
+			}
+		}
+
+	/* Next level of the prefix tree */
+	data->node = mkANode(Conf, lownew, high, level + 1, type);
+	if (naff)
+	{
+		data->naff = naff;
+		data->aff = (AFFIX **) cpalloc(sizeof(AFFIX *) * naff);
+		memcpy(data->aff, aff, sizeof(AFFIX *) * naff);
+		naff = 0;
+	}
+
+	pfree(aff);
+
+	return rs;
+}
+
+/*
+ * Makes the root void node in the prefix tree. The root void node is created
+ * for affixes which have empty replace string ("repl" field).
+ */
+static void
+mkVoidAffix(IspellDict *Conf, bool issuffix, int startsuffix)
+{
+	int			i,
+				cnt = 0;
+	int			start = (issuffix) ? startsuffix : 0;
+	int			end = (issuffix) ? Conf->naffixes : startsuffix;
+	AffixNode  *Affix = (AffixNode *) palloc0(ANHRDSZ + sizeof(AffixNodeData));
+
+	Affix->length = 1;
+	Affix->isvoid = 1;
+
+	if (issuffix)
+	{
+		Affix->data->node = Conf->Suffix;
+		Conf->Suffix = Affix;
+	}
+	else
+	{
+		Affix->data->node = Conf->Prefix;
+		Conf->Prefix = Affix;
+	}
+
+	/* Count affixes with empty replace string */
+	for (i = start; i < end; i++)
+		if (Conf->Affix[i].replen == 0)
+			cnt++;
+
+	/* There is not affixes with empty replace string */
+	if (cnt == 0)
+		return;
+
+	Affix->data->aff = (AFFIX **) cpalloc(sizeof(AFFIX *) * cnt);
+	Affix->data->naff = (uint32) cnt;
+
+	cnt = 0;
+	for (i = start; i < end; i++)
+		if (Conf->Affix[i].replen == 0)
+		{
+			Affix->data->aff[cnt] = Conf->Affix + i;
+			cnt++;
+		}
+}
+
+/*
+ * Checks if the affixflag is used by dictionary. Conf->AffixData does not
+ * contain affixflag if this flag is not used actually by the .dict file.
+ *
+ * Conf: current dictionary.
+ * affixflag: affix flag.
+ *
+ * Returns true if the Conf->AffixData array contains affixflag, otherwise
+ * returns false.
+ */
+static bool
+isAffixInUse(IspellDict *Conf, char *affixflag)
+{
+	int			i;
+
+	for (i = 0; i < Conf->nAffixData; i++)
+		if (IsAffixFlagInUse(Conf, i, affixflag))
+			return true;
+
+	return false;
+}
+
+/*
+ * Builds Conf->Prefix and Conf->Suffix trees from the imported affixes.
+ */
+void
+NISortAffixes(IspellDict *Conf)
+{
+	AFFIX	   *Affix;
+	size_t		i;
+	CMPDAffix  *ptr;
+	int			firstsuffix = Conf->naffixes;
+
+	if (Conf->naffixes == 0)
+		return;
+
+	/* Store compound affixes in the Conf->CompoundAffix array */
+	if (Conf->naffixes > 1)
+		qsort((void *) Conf->Affix, Conf->naffixes, sizeof(AFFIX), cmpaffix);
+	Conf->CompoundAffix = ptr = (CMPDAffix *) palloc(sizeof(CMPDAffix) * Conf->naffixes);
+	ptr->affix = NULL;
+
+	for (i = 0; i < Conf->naffixes; i++)
+	{
+		Affix = &(((AFFIX *) Conf->Affix)[i]);
+		if (Affix->type == FF_SUFFIX && i < firstsuffix)
+			firstsuffix = i;
+
+		if ((Affix->flagflags & FF_COMPOUNDFLAG) && Affix->replen > 0 &&
+			isAffixInUse(Conf, Affix->flag))
+		{
+			bool		issuffix = (Affix->type == FF_SUFFIX);
+
+			if (ptr == Conf->CompoundAffix ||
+				issuffix != (ptr - 1)->issuffix ||
+				strbncmp((const unsigned char *) (ptr - 1)->affix,
+						 (const unsigned char *) Affix->repl,
+						 (ptr - 1)->len))
+			{
+				/* leave only unique and minimal suffixes */
+				ptr->affix = Affix->repl;
+				ptr->len = Affix->replen;
+				ptr->issuffix = issuffix;
+				ptr++;
+			}
+		}
+	}
+	ptr->affix = NULL;
+	Conf->CompoundAffix = (CMPDAffix *) repalloc(Conf->CompoundAffix, sizeof(CMPDAffix) * (ptr - Conf->CompoundAffix + 1));
+
+	/* Start build a prefix tree */
+	Conf->Prefix = mkANode(Conf, 0, firstsuffix, 0, FF_PREFIX);
+	Conf->Suffix = mkANode(Conf, firstsuffix, Conf->naffixes, 0, FF_SUFFIX);
+	mkVoidAffix(Conf, true, firstsuffix);
+	mkVoidAffix(Conf, false, firstsuffix);
+}
+
+static AffixNodeData *
+FindAffixes(AffixNode *node, const char *word, int wrdlen, int *level, int type)
+{
+	AffixNodeData *StopLow,
+			   *StopHigh,
+			   *StopMiddle;
+	uint8 symbol;
+
+	if (node->isvoid)
+	{							/* search void affixes */
+		if (node->data->naff)
+			return node->data;
+		node = node->data->node;
+	}
+
+	while (node && *level < wrdlen)
+	{
+		StopLow = node->data;
+		StopHigh = node->data + node->length;
+		while (StopLow < StopHigh)
+		{
+			StopMiddle = StopLow + ((StopHigh - StopLow) >> 1);
+			symbol = GETWCHAR(word, wrdlen, *level, type);
+
+			if (StopMiddle->val == symbol)
+			{
+				(*level)++;
+				if (StopMiddle->naff)
+					return StopMiddle;
+				node = StopMiddle->node;
+				break;
+			}
+			else if (StopMiddle->val < symbol)
+				StopLow = StopMiddle + 1;
+			else
+				StopHigh = StopMiddle;
+		}
+		if (StopLow >= StopHigh)
+			break;
+	}
+	return NULL;
+}
+
+static char *
+CheckAffix(const char *word, size_t len, AFFIX *Affix, int flagflags, char *newword, int *baselen)
+{
+	/*
+	 * Check compound allow flags
+	 */
+
+	if (flagflags == 0)
+	{
+		if (Affix->flagflags & FF_COMPOUNDONLY)
+			return NULL;
+	}
+	else if (flagflags & FF_COMPOUNDBEGIN)
+	{
+		if (Affix->flagflags & FF_COMPOUNDFORBIDFLAG)
+			return NULL;
+		if ((Affix->flagflags & FF_COMPOUNDBEGIN) == 0)
+			if (Affix->type == FF_SUFFIX)
+				return NULL;
+	}
+	else if (flagflags & FF_COMPOUNDMIDDLE)
+	{
+		if ((Affix->flagflags & FF_COMPOUNDMIDDLE) == 0 ||
+			(Affix->flagflags & FF_COMPOUNDFORBIDFLAG))
+			return NULL;
+	}
+	else if (flagflags & FF_COMPOUNDLAST)
+	{
+		if (Affix->flagflags & FF_COMPOUNDFORBIDFLAG)
+			return NULL;
+		if ((Affix->flagflags & FF_COMPOUNDLAST) == 0)
+			if (Affix->type == FF_PREFIX)
+				return NULL;
+	}
+
+	/*
+	 * make replace pattern of affix
+	 */
+	if (Affix->type == FF_SUFFIX)
+	{
+		strcpy(newword, word);
+		strcpy(newword + len - Affix->replen, Affix->find);
+		if (baselen)			/* store length of non-changed part of word */
+			*baselen = len - Affix->replen;
+	}
+	else
+	{
+		/*
+		 * if prefix is an all non-changed part's length then all word
+		 * contains only prefix and suffix, so out
+		 */
+		if (baselen && *baselen + strlen(Affix->find) <= Affix->replen)
+			return NULL;
+		strcpy(newword, Affix->find);
+		strcat(newword, word + Affix->replen);
+	}
+
+	/*
+	 * check resulting word
+	 */
+	if (Affix->issimple)
+		return newword;
+	else if (Affix->isregis)
+	{
+		if (RS_execute(&(Affix->reg.regis), newword))
+			return newword;
+	}
+	else
+	{
+		pg_wchar   *data;
+		size_t		data_len;
+		int			newword_len;
+
+		/* Convert data string to wide characters */
+		newword_len = strlen(newword);
+		data = (pg_wchar *) palloc((newword_len + 1) * sizeof(pg_wchar));
+		data_len = pg_mb2wchar_with_len(newword, data, newword_len);
+
+		if (pg_regexec(&(Affix->reg.pregex->regex), data, data_len,
+					   0, NULL, 0, NULL, 0) == REG_OKAY)
+		{
+			pfree(data);
+			return newword;
+		}
+		pfree(data);
+	}
+
+	return NULL;
+}
+
+static int
+addToResult(char **forms, char **cur, char *word)
+{
+	if (cur - forms >= MAX_NORM - 1)
+		return 0;
+	if (forms == cur || strcmp(word, *(cur - 1)) != 0)
+	{
+		*cur = pstrdup(word);
+		*(cur + 1) = NULL;
+		return 1;
+	}
+
+	return 0;
+}
+
+static char **
+NormalizeSubWord(IspellDict *Conf, char *word, int flag)
+{
+	AffixNodeData *suffix = NULL,
+			   *prefix = NULL;
+	int			slevel = 0,
+				plevel = 0;
+	int			wrdlen = strlen(word),
+				swrdlen;
+	char	  **forms;
+	char	  **cur;
+	char		newword[2 * MAXNORMLEN] = "";
+	char		pnewword[2 * MAXNORMLEN] = "";
+	AffixNode  *snode = Conf->Suffix,
+			   *pnode;
+	int			i,
+				j;
+
+	if (wrdlen > MAXNORMLEN)
+		return NULL;
+	cur = forms = (char **) palloc(MAX_NORM * sizeof(char *));
+	*cur = NULL;
+
+
+	/* Check that the word itself is normal form */
+	if (FindWord(Conf, word, VoidString, flag))
+	{
+		*cur = pstrdup(word);
+		cur++;
+		*cur = NULL;
+	}
+
+	/* Find all other NORMAL forms of the 'word' (check only prefix) */
+	pnode = Conf->Prefix;
+	plevel = 0;
+	while (pnode)
+	{
+		prefix = FindAffixes(pnode, word, wrdlen, &plevel, FF_PREFIX);
+		if (!prefix)
+			break;
+		for (j = 0; j < prefix->naff; j++)
+		{
+			if (CheckAffix(word, wrdlen, prefix->aff[j], flag, newword, NULL))
+			{
+				/* prefix success */
+				if (FindWord(Conf, newword, prefix->aff[j]->flag, flag))
+					cur += addToResult(forms, cur, newword);
+			}
+		}
+		pnode = prefix->node;
+	}
+
+	/*
+	 * Find all other NORMAL forms of the 'word' (check suffix and then
+	 * prefix)
+	 */
+	while (snode)
+	{
+		int			baselen = 0;
+
+		/* find possible suffix */
+		suffix = FindAffixes(snode, word, wrdlen, &slevel, FF_SUFFIX);
+		if (!suffix)
+			break;
+		/* foreach suffix check affix */
+		for (i = 0; i < suffix->naff; i++)
+		{
+			if (CheckAffix(word, wrdlen, suffix->aff[i], flag, newword, &baselen))
+			{
+				/* suffix success */
+				if (FindWord(Conf, newword, suffix->aff[i]->flag, flag))
+					cur += addToResult(forms, cur, newword);
+
+				/* now we will look changed word with prefixes */
+				pnode = Conf->Prefix;
+				plevel = 0;
+				swrdlen = strlen(newword);
+				while (pnode)
+				{
+					prefix = FindAffixes(pnode, newword, swrdlen, &plevel, FF_PREFIX);
+					if (!prefix)
+						break;
+					for (j = 0; j < prefix->naff; j++)
+					{
+						if (CheckAffix(newword, swrdlen, prefix->aff[j], flag, pnewword, &baselen))
+						{
+							/* prefix success */
+							char	   *ff = (prefix->aff[j]->flagflags & suffix->aff[i]->flagflags & FF_CROSSPRODUCT) ?
+							VoidString : prefix->aff[j]->flag;
+
+							if (FindWord(Conf, pnewword, ff, flag))
+								cur += addToResult(forms, cur, pnewword);
+						}
+					}
+					pnode = prefix->node;
+				}
+			}
+		}
+
+		snode = suffix->node;
+	}
+
+	if (cur == forms)
+	{
+		pfree(forms);
+		return NULL;
+	}
+	return forms;
+}
+
+typedef struct SplitVar
+{
+	int			nstem;
+	int			lenstem;
+	char	  **stem;
+	struct SplitVar *next;
+} SplitVar;
+
+static int
+CheckCompoundAffixes(CMPDAffix **ptr, char *word, int len, bool CheckInPlace)
+{
+	bool		issuffix;
+
+	/* in case CompoundAffix is null: */
+	if (*ptr == NULL)
+		return -1;
+
+	if (CheckInPlace)
+	{
+		while ((*ptr)->affix)
+		{
+			if (len > (*ptr)->len && strncmp((*ptr)->affix, word, (*ptr)->len) == 0)
+			{
+				len = (*ptr)->len;
+				issuffix = (*ptr)->issuffix;
+				(*ptr)++;
+				return (issuffix) ? len : 0;
+			}
+			(*ptr)++;
+		}
+	}
+	else
+	{
+		char	   *affbegin;
+
+		while ((*ptr)->affix)
+		{
+			if (len > (*ptr)->len && (affbegin = strstr(word, (*ptr)->affix)) != NULL)
+			{
+				len = (*ptr)->len + (affbegin - word);
+				issuffix = (*ptr)->issuffix;
+				(*ptr)++;
+				return (issuffix) ? len : 0;
+			}
+			(*ptr)++;
+		}
+	}
+	return -1;
+}
+
+static SplitVar *
+CopyVar(SplitVar *s, int makedup)
+{
+	SplitVar   *v = (SplitVar *) palloc(sizeof(SplitVar));
+
+	v->next = NULL;
+	if (s)
+	{
+		int			i;
+
+		v->lenstem = s->lenstem;
+		v->stem = (char **) palloc(sizeof(char *) * v->lenstem);
+		v->nstem = s->nstem;
+		for (i = 0; i < s->nstem; i++)
+			v->stem[i] = (makedup) ? pstrdup(s->stem[i]) : s->stem[i];
+	}
+	else
+	{
+		v->lenstem = 16;
+		v->stem = (char **) palloc(sizeof(char *) * v->lenstem);
+		v->nstem = 0;
+	}
+	return v;
+}
+
+static void
+AddStem(SplitVar *v, char *word)
+{
+	if (v->nstem >= v->lenstem)
+	{
+		v->lenstem *= 2;
+		v->stem = (char **) repalloc(v->stem, sizeof(char *) * v->lenstem);
+	}
+
+	v->stem[v->nstem] = word;
+	v->nstem++;
+}
+
+static SplitVar *
+SplitToVariants(IspellDict *Conf, SPNode *snode, SplitVar *orig, char *word, int wordlen, int startpos, int minpos)
+{
+	SplitVar   *var = NULL;
+	SPNodeData *StopLow,
+			   *StopHigh,
+			   *StopMiddle = NULL;
+	SPNode	   *node = (snode) ? snode : Conf->Dictionary;
+	int			level = (snode) ? minpos : startpos;	/* recursive
+														 * minpos==level */
+	int			lenaff;
+	CMPDAffix  *caff;
+	char	   *notprobed;
+	int			compoundflag = 0;
+
+	/* since this function recurses, it could be driven to stack overflow */
+	check_stack_depth();
+
+	notprobed = (char *) palloc(wordlen);
+	memset(notprobed, 1, wordlen);
+	var = CopyVar(orig, 1);
+
+	while (level < wordlen)
+	{
+		/* find word with epenthetic or/and compound affix */
+		caff = Conf->CompoundAffix;
+		while (level > startpos && (lenaff = CheckCompoundAffixes(&caff, word + level, wordlen - level, (node) ? true : false)) >= 0)
+		{
+			/*
+			 * there is one of compound affixes, so check word for existings
+			 */
+			char		buf[MAXNORMLEN];
+			char	  **subres;
+
+			lenaff = level - startpos + lenaff;
+
+			if (!notprobed[startpos + lenaff - 1])
+				continue;
+
+			if (level + lenaff - 1 <= minpos)
+				continue;
+
+			if (lenaff >= MAXNORMLEN)
+				continue;		/* skip too big value */
+			if (lenaff > 0)
+				memcpy(buf, word + startpos, lenaff);
+			buf[lenaff] = '\0';
+
+			if (level == 0)
+				compoundflag = FF_COMPOUNDBEGIN;
+			else if (level == wordlen - 1)
+				compoundflag = FF_COMPOUNDLAST;
+			else
+				compoundflag = FF_COMPOUNDMIDDLE;
+			subres = NormalizeSubWord(Conf, buf, compoundflag);
+			if (subres)
+			{
+				/* Yes, it was a word from dictionary */
+				SplitVar   *new = CopyVar(var, 0);
+				SplitVar   *ptr = var;
+				char	  **sptr = subres;
+
+				notprobed[startpos + lenaff - 1] = 0;
+
+				while (*sptr)
+				{
+					AddStem(new, *sptr);
+					sptr++;
+				}
+				pfree(subres);
+
+				while (ptr->next)
+					ptr = ptr->next;
+				ptr->next = SplitToVariants(Conf, NULL, new, word, wordlen, startpos + lenaff, startpos + lenaff);
+
+				pfree(new->stem);
+				pfree(new);
+			}
+		}
+
+		if (!node)
+			break;
+
+		StopLow = node->data;
+		StopHigh = node->data + node->length;
+		while (StopLow < StopHigh)
+		{
+			StopMiddle = StopLow + ((StopHigh - StopLow) >> 1);
+			if (StopMiddle->val == ((uint8 *) (word))[level])
+				break;
+			else if (StopMiddle->val < ((uint8 *) (word))[level])
+				StopLow = StopMiddle + 1;
+			else
+				StopHigh = StopMiddle;
+		}
+
+		if (StopLow < StopHigh)
+		{
+			if (startpos == 0)
+				compoundflag = FF_COMPOUNDBEGIN;
+			else if (level == wordlen - 1)
+				compoundflag = FF_COMPOUNDLAST;
+			else
+				compoundflag = FF_COMPOUNDMIDDLE;
+
+			/* find infinitive */
+			if (StopMiddle->isword &&
+				(StopMiddle->compoundflag & compoundflag) &&
+				notprobed[level])
+			{
+				/* ok, we found full compoundallowed word */
+				if (level > minpos)
+				{
+					/* and its length more than minimal */
+					if (wordlen == level + 1)
+					{
+						/* well, it was last word */
+						AddStem(var, pnstrdup(word + startpos, wordlen - startpos));
+						pfree(notprobed);
+						return var;
+					}
+					else
+					{
+						/* then we will search more big word at the same point */
+						SplitVar   *ptr = var;
+
+						while (ptr->next)
+							ptr = ptr->next;
+						ptr->next = SplitToVariants(Conf, node, var, word, wordlen, startpos, level);
+						/* we can find next word */
+						level++;
+						AddStem(var, pnstrdup(word + startpos, level - startpos));
+						node = Conf->Dictionary;
+						startpos = level;
+						continue;
+					}
+				}
+			}
+			node = StopMiddle->node;
+		}
+		else
+			node = NULL;
+		level++;
+	}
+
+	AddStem(var, pnstrdup(word + startpos, wordlen - startpos));
+	pfree(notprobed);
+	return var;
+}
+
+static void
+addNorm(TSLexeme **lres, TSLexeme **lcur, char *word, int flags, uint16 NVariant)
+{
+	if (*lres == NULL)
+		*lcur = *lres = (TSLexeme *) palloc(MAX_NORM * sizeof(TSLexeme));
+
+	if (*lcur - *lres < MAX_NORM - 1)
+	{
+		(*lcur)->lexeme = word;
+		(*lcur)->flags = flags;
+		(*lcur)->nvariant = NVariant;
+		(*lcur)++;
+		(*lcur)->lexeme = NULL;
+	}
+}
+
+TSLexeme *
+NINormalizeWord(IspellDict *Conf, char *word)
+{
+	char	  **res;
+	TSLexeme   *lcur = NULL,
+			   *lres = NULL;
+	uint16		NVariant = 1;
+
+	res = NormalizeSubWord(Conf, word, 0);
+
+	if (res)
+	{
+		char	  **ptr = res;
+
+		while (*ptr && (lcur - lres) < MAX_NORM)
+		{
+			addNorm(&lres, &lcur, *ptr, 0, NVariant++);
+			ptr++;
+		}
+		pfree(res);
+	}
+
+	if (Conf->usecompound)
+	{
+		int			wordlen = strlen(word);
+		SplitVar   *ptr,
+				   *var = SplitToVariants(Conf, NULL, NULL, word, wordlen, 0, -1);
+		int			i;
+
+		while (var)
+		{
+			if (var->nstem > 1)
+			{
+				char	  **subres = NormalizeSubWord(Conf, var->stem[var->nstem - 1], FF_COMPOUNDLAST);
+
+				if (subres)
+				{
+					char	  **subptr = subres;
+
+					while (*subptr)
+					{
+						for (i = 0; i < var->nstem - 1; i++)
+						{
+							addNorm(&lres, &lcur, (subptr == subres) ? var->stem[i] : pstrdup(var->stem[i]), 0, NVariant);
+						}
+
+						addNorm(&lres, &lcur, *subptr, 0, NVariant);
+						subptr++;
+						NVariant++;
+					}
+
+					pfree(subres);
+					var->stem[0] = NULL;
+					pfree(var->stem[var->nstem - 1]);
+				}
+			}
+
+			for (i = 0; i < var->nstem && var->stem[i]; i++)
+				pfree(var->stem[i]);
+			ptr = var->next;
+			pfree(var->stem);
+			pfree(var);
+			var = ptr;
+		}
+	}
+
+	return lres;
+}
diff --git a/src/backend/tsearch/to_tsany.c b/src/backend/tsearch/to_tsany.c
new file mode 100644
index 0000000..361aa99
--- /dev/null
+++ b/src/backend/tsearch/to_tsany.c
@@ -0,0 +1,724 @@
+/*-------------------------------------------------------------------------
+ *
+ * to_tsany.c
+ *		to_ts* function definitions
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ *
+ *
+ * IDENTIFICATION
+ *	  src/backend/tsearch/to_tsany.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "common/jsonapi.h"
+#include "tsearch/ts_cache.h"
+#include "tsearch/ts_utils.h"
+#include "utils/builtins.h"
+#include "utils/jsonfuncs.h"
+
+
+/*
+ * Opaque data structure, which is passed by parse_tsquery() to pushval_morph().
+ */
+typedef struct MorphOpaque
+{
+	Oid			cfg_id;
+
+	/*
+	 * Single tsquery morph could be parsed into multiple words.  When these
+	 * words reside in adjacent positions, they are connected using this
+	 * operator.  Usually, that is OP_PHRASE, which requires word positions of
+	 * a complex morph to exactly match the tsvector.
+	 */
+	int			qoperator;
+} MorphOpaque;
+
+typedef struct TSVectorBuildState
+{
+	ParsedText *prs;
+	Oid			cfgId;
+} TSVectorBuildState;
+
+static void add_to_tsvector(void *_state, char *elem_value, int elem_len);
+
+
+Datum
+get_current_ts_config(PG_FUNCTION_ARGS)
+{
+	PG_RETURN_OID(getTSCurrentConfig(true));
+}
+
+/*
+ * to_tsvector
+ */
+static int
+compareWORD(const void *a, const void *b)
+{
+	int			res;
+
+	res = tsCompareString(((const ParsedWord *) a)->word, ((const ParsedWord *) a)->len,
+						  ((const ParsedWord *) b)->word, ((const ParsedWord *) b)->len,
+						  false);
+
+	if (res == 0)
+	{
+		if (((const ParsedWord *) a)->pos.pos == ((const ParsedWord *) b)->pos.pos)
+			return 0;
+
+		res = (((const ParsedWord *) a)->pos.pos > ((const ParsedWord *) b)->pos.pos) ? 1 : -1;
+	}
+
+	return res;
+}
+
+static int
+uniqueWORD(ParsedWord *a, int32 l)
+{
+	ParsedWord *ptr,
+			   *res;
+	int			tmppos;
+
+	if (l == 1)
+	{
+		tmppos = LIMITPOS(a->pos.pos);
+		a->alen = 2;
+		a->pos.apos = (uint16 *) palloc(sizeof(uint16) * a->alen);
+		a->pos.apos[0] = 1;
+		a->pos.apos[1] = tmppos;
+		return l;
+	}
+
+	res = a;
+	ptr = a + 1;
+
+	/*
+	 * Sort words with its positions
+	 */
+	qsort((void *) a, l, sizeof(ParsedWord), compareWORD);
+
+	/*
+	 * Initialize first word and its first position
+	 */
+	tmppos = LIMITPOS(a->pos.pos);
+	a->alen = 2;
+	a->pos.apos = (uint16 *) palloc(sizeof(uint16) * a->alen);
+	a->pos.apos[0] = 1;
+	a->pos.apos[1] = tmppos;
+
+	/*
+	 * Summarize position information for each word
+	 */
+	while (ptr - a < l)
+	{
+		if (!(ptr->len == res->len &&
+			  strncmp(ptr->word, res->word, res->len) == 0))
+		{
+			/*
+			 * Got a new word, so put it in result
+			 */
+			res++;
+			res->len = ptr->len;
+			res->word = ptr->word;
+			tmppos = LIMITPOS(ptr->pos.pos);
+			res->alen = 2;
+			res->pos.apos = (uint16 *) palloc(sizeof(uint16) * res->alen);
+			res->pos.apos[0] = 1;
+			res->pos.apos[1] = tmppos;
+		}
+		else
+		{
+			/*
+			 * The word already exists, so adjust position information. But
+			 * before we should check size of position's array, max allowed
+			 * value for position and uniqueness of position
+			 */
+			pfree(ptr->word);
+			if (res->pos.apos[0] < MAXNUMPOS - 1 && res->pos.apos[res->pos.apos[0]] != MAXENTRYPOS - 1 &&
+				res->pos.apos[res->pos.apos[0]] != LIMITPOS(ptr->pos.pos))
+			{
+				if (res->pos.apos[0] + 1 >= res->alen)
+				{
+					res->alen *= 2;
+					res->pos.apos = (uint16 *) repalloc(res->pos.apos, sizeof(uint16) * res->alen);
+				}
+				if (res->pos.apos[0] == 0 || res->pos.apos[res->pos.apos[0]] != LIMITPOS(ptr->pos.pos))
+				{
+					res->pos.apos[res->pos.apos[0] + 1] = LIMITPOS(ptr->pos.pos);
+					res->pos.apos[0]++;
+				}
+			}
+		}
+		ptr++;
+	}
+
+	return res + 1 - a;
+}
+
+/*
+ * make value of tsvector, given parsed text
+ *
+ * Note: frees prs->words and subsidiary data.
+ */
+TSVector
+make_tsvector(ParsedText *prs)
+{
+	int			i,
+				j,
+				lenstr = 0,
+				totallen;
+	TSVector	in;
+	WordEntry  *ptr;
+	char	   *str;
+	int			stroff;
+
+	/* Merge duplicate words */
+	if (prs->curwords > 0)
+		prs->curwords = uniqueWORD(prs->words, prs->curwords);
+
+	/* Determine space needed */
+	for (i = 0; i < prs->curwords; i++)
+	{
+		lenstr += prs->words[i].len;
+		if (prs->words[i].alen)
+		{
+			lenstr = SHORTALIGN(lenstr);
+			lenstr += sizeof(uint16) + prs->words[i].pos.apos[0] * sizeof(WordEntryPos);
+		}
+	}
+
+	if (lenstr > MAXSTRPOS)
+		ereport(ERROR,
+				(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+				 errmsg("string is too long for tsvector (%d bytes, max %d bytes)", lenstr, MAXSTRPOS)));
+
+	totallen = CALCDATASIZE(prs->curwords, lenstr);
+	in = (TSVector) palloc0(totallen);
+	SET_VARSIZE(in, totallen);
+	in->size = prs->curwords;
+
+	ptr = ARRPTR(in);
+	str = STRPTR(in);
+	stroff = 0;
+	for (i = 0; i < prs->curwords; i++)
+	{
+		ptr->len = prs->words[i].len;
+		ptr->pos = stroff;
+		memcpy(str + stroff, prs->words[i].word, prs->words[i].len);
+		stroff += prs->words[i].len;
+		pfree(prs->words[i].word);
+		if (prs->words[i].alen)
+		{
+			int			k = prs->words[i].pos.apos[0];
+			WordEntryPos *wptr;
+
+			if (k > 0xFFFF)
+				elog(ERROR, "positions array too long");
+
+			ptr->haspos = 1;
+			stroff = SHORTALIGN(stroff);
+			*(uint16 *) (str + stroff) = (uint16) k;
+			wptr = POSDATAPTR(in, ptr);
+			for (j = 0; j < k; j++)
+			{
+				WEP_SETWEIGHT(wptr[j], 0);
+				WEP_SETPOS(wptr[j], prs->words[i].pos.apos[j + 1]);
+			}
+			stroff += sizeof(uint16) + k * sizeof(WordEntryPos);
+			pfree(prs->words[i].pos.apos);
+		}
+		else
+			ptr->haspos = 0;
+		ptr++;
+	}
+
+	if (prs->words)
+		pfree(prs->words);
+
+	return in;
+}
+
+Datum
+to_tsvector_byid(PG_FUNCTION_ARGS)
+{
+	Oid			cfgId = PG_GETARG_OID(0);
+	text	   *in = PG_GETARG_TEXT_PP(1);
+	ParsedText	prs;
+	TSVector	out;
+
+	prs.lenwords = VARSIZE_ANY_EXHDR(in) / 6;	/* just estimation of word's
+												 * number */
+	if (prs.lenwords < 2)
+		prs.lenwords = 2;
+	else if (prs.lenwords > MaxAllocSize / sizeof(ParsedWord))
+		prs.lenwords = MaxAllocSize / sizeof(ParsedWord);
+	prs.curwords = 0;
+	prs.pos = 0;
+	prs.words = (ParsedWord *) palloc(sizeof(ParsedWord) * prs.lenwords);
+
+	parsetext(cfgId, &prs, VARDATA_ANY(in), VARSIZE_ANY_EXHDR(in));
+
+	PG_FREE_IF_COPY(in, 1);
+
+	out = make_tsvector(&prs);
+
+	PG_RETURN_TSVECTOR(out);
+}
+
+Datum
+to_tsvector(PG_FUNCTION_ARGS)
+{
+	text	   *in = PG_GETARG_TEXT_PP(0);
+	Oid			cfgId;
+
+	cfgId = getTSCurrentConfig(true);
+	PG_RETURN_DATUM(DirectFunctionCall2(to_tsvector_byid,
+										ObjectIdGetDatum(cfgId),
+										PointerGetDatum(in)));
+}
+
+/*
+ * Worker function for jsonb(_string)_to_tsvector(_byid)
+ */
+static TSVector
+jsonb_to_tsvector_worker(Oid cfgId, Jsonb *jb, uint32 flags)
+{
+	TSVectorBuildState state;
+	ParsedText	prs;
+
+	prs.words = NULL;
+	prs.curwords = 0;
+	state.prs = &prs;
+	state.cfgId = cfgId;
+
+	iterate_jsonb_values(jb, flags, &state, add_to_tsvector);
+
+	return make_tsvector(&prs);
+}
+
+Datum
+jsonb_string_to_tsvector_byid(PG_FUNCTION_ARGS)
+{
+	Oid			cfgId = PG_GETARG_OID(0);
+	Jsonb	   *jb = PG_GETARG_JSONB_P(1);
+	TSVector	result;
+
+	result = jsonb_to_tsvector_worker(cfgId, jb, jtiString);
+	PG_FREE_IF_COPY(jb, 1);
+
+	PG_RETURN_TSVECTOR(result);
+}
+
+Datum
+jsonb_string_to_tsvector(PG_FUNCTION_ARGS)
+{
+	Jsonb	   *jb = PG_GETARG_JSONB_P(0);
+	Oid			cfgId;
+	TSVector	result;
+
+	cfgId = getTSCurrentConfig(true);
+	result = jsonb_to_tsvector_worker(cfgId, jb, jtiString);
+	PG_FREE_IF_COPY(jb, 0);
+
+	PG_RETURN_TSVECTOR(result);
+}
+
+Datum
+jsonb_to_tsvector_byid(PG_FUNCTION_ARGS)
+{
+	Oid			cfgId = PG_GETARG_OID(0);
+	Jsonb	   *jb = PG_GETARG_JSONB_P(1);
+	Jsonb	   *jbFlags = PG_GETARG_JSONB_P(2);
+	TSVector	result;
+	uint32		flags = parse_jsonb_index_flags(jbFlags);
+
+	result = jsonb_to_tsvector_worker(cfgId, jb, flags);
+	PG_FREE_IF_COPY(jb, 1);
+	PG_FREE_IF_COPY(jbFlags, 2);
+
+	PG_RETURN_TSVECTOR(result);
+}
+
+Datum
+jsonb_to_tsvector(PG_FUNCTION_ARGS)
+{
+	Jsonb	   *jb = PG_GETARG_JSONB_P(0);
+	Jsonb	   *jbFlags = PG_GETARG_JSONB_P(1);
+	Oid			cfgId;
+	TSVector	result;
+	uint32		flags = parse_jsonb_index_flags(jbFlags);
+
+	cfgId = getTSCurrentConfig(true);
+	result = jsonb_to_tsvector_worker(cfgId, jb, flags);
+	PG_FREE_IF_COPY(jb, 0);
+	PG_FREE_IF_COPY(jbFlags, 1);
+
+	PG_RETURN_TSVECTOR(result);
+}
+
+/*
+ * Worker function for json(_string)_to_tsvector(_byid)
+ */
+static TSVector
+json_to_tsvector_worker(Oid cfgId, text *json, uint32 flags)
+{
+	TSVectorBuildState state;
+	ParsedText	prs;
+
+	prs.words = NULL;
+	prs.curwords = 0;
+	state.prs = &prs;
+	state.cfgId = cfgId;
+
+	iterate_json_values(json, flags, &state, add_to_tsvector);
+
+	return make_tsvector(&prs);
+}
+
+Datum
+json_string_to_tsvector_byid(PG_FUNCTION_ARGS)
+{
+	Oid			cfgId = PG_GETARG_OID(0);
+	text	   *json = PG_GETARG_TEXT_P(1);
+	TSVector	result;
+
+	result = json_to_tsvector_worker(cfgId, json, jtiString);
+	PG_FREE_IF_COPY(json, 1);
+
+	PG_RETURN_TSVECTOR(result);
+}
+
+Datum
+json_string_to_tsvector(PG_FUNCTION_ARGS)
+{
+	text	   *json = PG_GETARG_TEXT_P(0);
+	Oid			cfgId;
+	TSVector	result;
+
+	cfgId = getTSCurrentConfig(true);
+	result = json_to_tsvector_worker(cfgId, json, jtiString);
+	PG_FREE_IF_COPY(json, 0);
+
+	PG_RETURN_TSVECTOR(result);
+}
+
+Datum
+json_to_tsvector_byid(PG_FUNCTION_ARGS)
+{
+	Oid			cfgId = PG_GETARG_OID(0);
+	text	   *json = PG_GETARG_TEXT_P(1);
+	Jsonb	   *jbFlags = PG_GETARG_JSONB_P(2);
+	TSVector	result;
+	uint32		flags = parse_jsonb_index_flags(jbFlags);
+
+	result = json_to_tsvector_worker(cfgId, json, flags);
+	PG_FREE_IF_COPY(json, 1);
+	PG_FREE_IF_COPY(jbFlags, 2);
+
+	PG_RETURN_TSVECTOR(result);
+}
+
+Datum
+json_to_tsvector(PG_FUNCTION_ARGS)
+{
+	text	   *json = PG_GETARG_TEXT_P(0);
+	Jsonb	   *jbFlags = PG_GETARG_JSONB_P(1);
+	Oid			cfgId;
+	TSVector	result;
+	uint32		flags = parse_jsonb_index_flags(jbFlags);
+
+	cfgId = getTSCurrentConfig(true);
+	result = json_to_tsvector_worker(cfgId, json, flags);
+	PG_FREE_IF_COPY(json, 0);
+	PG_FREE_IF_COPY(jbFlags, 1);
+
+	PG_RETURN_TSVECTOR(result);
+}
+
+/*
+ * Parse lexemes in an element of a json(b) value, add to TSVectorBuildState.
+ */
+static void
+add_to_tsvector(void *_state, char *elem_value, int elem_len)
+{
+	TSVectorBuildState *state = (TSVectorBuildState *) _state;
+	ParsedText *prs = state->prs;
+	int32		prevwords;
+
+	if (prs->words == NULL)
+	{
+		/*
+		 * First time through: initialize words array to a reasonable size.
+		 * (parsetext() will realloc it bigger as needed.)
+		 */
+		prs->lenwords = 16;
+		prs->words = (ParsedWord *) palloc(sizeof(ParsedWord) * prs->lenwords);
+		prs->curwords = 0;
+		prs->pos = 0;
+	}
+
+	prevwords = prs->curwords;
+
+	parsetext(state->cfgId, prs, elem_value, elem_len);
+
+	/*
+	 * If we extracted any words from this JSON element, advance pos to create
+	 * an artificial break between elements.  This is because we don't want
+	 * phrase searches to think that the last word in this element is adjacent
+	 * to the first word in the next one.
+	 */
+	if (prs->curwords > prevwords)
+		prs->pos += 1;
+}
+
+
+/*
+ * to_tsquery
+ */
+
+
+/*
+ * This function is used for morph parsing.
+ *
+ * The value is passed to parsetext which will call the right dictionary to
+ * lexize the word. If it turns out to be a stopword, we push a QI_VALSTOP
+ * to the stack.
+ *
+ * All words belonging to the same variant are pushed as an ANDed list,
+ * and different variants are ORed together.
+ */
+static void
+pushval_morph(Datum opaque, TSQueryParserState state, char *strval, int lenval, int16 weight, bool prefix)
+{
+	int32		count = 0;
+	ParsedText	prs;
+	uint32		variant,
+				pos = 0,
+				cntvar = 0,
+				cntpos = 0,
+				cnt = 0;
+	MorphOpaque *data = (MorphOpaque *) DatumGetPointer(opaque);
+
+	prs.lenwords = 4;
+	prs.curwords = 0;
+	prs.pos = 0;
+	prs.words = (ParsedWord *) palloc(sizeof(ParsedWord) * prs.lenwords);
+
+	parsetext(data->cfg_id, &prs, strval, lenval);
+
+	if (prs.curwords > 0)
+	{
+		while (count < prs.curwords)
+		{
+			/*
+			 * Were any stop words removed? If so, fill empty positions with
+			 * placeholders linked by an appropriate operator.
+			 */
+			if (pos > 0 && pos + 1 < prs.words[count].pos.pos)
+			{
+				while (pos + 1 < prs.words[count].pos.pos)
+				{
+					/* put placeholders for each missing stop word */
+					pushStop(state);
+					if (cntpos)
+						pushOperator(state, data->qoperator, 1);
+					cntpos++;
+					pos++;
+				}
+			}
+
+			/* save current word's position */
+			pos = prs.words[count].pos.pos;
+
+			/* Go through all variants obtained from this token */
+			cntvar = 0;
+			while (count < prs.curwords && pos == prs.words[count].pos.pos)
+			{
+				variant = prs.words[count].nvariant;
+
+				/* Push all words belonging to the same variant */
+				cnt = 0;
+				while (count < prs.curwords &&
+					   pos == prs.words[count].pos.pos &&
+					   variant == prs.words[count].nvariant)
+				{
+					pushValue(state,
+							  prs.words[count].word,
+							  prs.words[count].len,
+							  weight,
+							  ((prs.words[count].flags & TSL_PREFIX) || prefix));
+					pfree(prs.words[count].word);
+					if (cnt)
+						pushOperator(state, OP_AND, 0);
+					cnt++;
+					count++;
+				}
+
+				if (cntvar)
+					pushOperator(state, OP_OR, 0);
+				cntvar++;
+			}
+
+			if (cntpos)
+			{
+				/* distance may be useful */
+				pushOperator(state, data->qoperator, 1);
+			}
+
+			cntpos++;
+		}
+
+		pfree(prs.words);
+	}
+	else
+		pushStop(state);
+}
+
+Datum
+to_tsquery_byid(PG_FUNCTION_ARGS)
+{
+	text	   *in = PG_GETARG_TEXT_PP(1);
+	TSQuery		query;
+	MorphOpaque data;
+
+	data.cfg_id = PG_GETARG_OID(0);
+
+	/*
+	 * Passing OP_PHRASE as a qoperator makes tsquery require matching of word
+	 * positions of a complex morph exactly match the tsvector.  Also, when
+	 * the complex morphs are connected with OP_PHRASE operator, we connect
+	 * all their words into the OP_PHRASE sequence.
+	 */
+	data.qoperator = OP_PHRASE;
+
+	query = parse_tsquery(text_to_cstring(in),
+						  pushval_morph,
+						  PointerGetDatum(&data),
+						  0);
+
+	PG_RETURN_TSQUERY(query);
+}
+
+Datum
+to_tsquery(PG_FUNCTION_ARGS)
+{
+	text	   *in = PG_GETARG_TEXT_PP(0);
+	Oid			cfgId;
+
+	cfgId = getTSCurrentConfig(true);
+	PG_RETURN_DATUM(DirectFunctionCall2(to_tsquery_byid,
+										ObjectIdGetDatum(cfgId),
+										PointerGetDatum(in)));
+}
+
+Datum
+plainto_tsquery_byid(PG_FUNCTION_ARGS)
+{
+	text	   *in = PG_GETARG_TEXT_PP(1);
+	TSQuery		query;
+	MorphOpaque data;
+
+	data.cfg_id = PG_GETARG_OID(0);
+
+	/*
+	 * parse_tsquery() with P_TSQ_PLAIN flag takes the whole input text as a
+	 * single morph.  Passing OP_PHRASE as a qoperator makes tsquery require
+	 * matching of all words independently on their positions.
+	 */
+	data.qoperator = OP_AND;
+
+	query = parse_tsquery(text_to_cstring(in),
+						  pushval_morph,
+						  PointerGetDatum(&data),
+						  P_TSQ_PLAIN);
+
+	PG_RETURN_POINTER(query);
+}
+
+Datum
+plainto_tsquery(PG_FUNCTION_ARGS)
+{
+	text	   *in = PG_GETARG_TEXT_PP(0);
+	Oid			cfgId;
+
+	cfgId = getTSCurrentConfig(true);
+	PG_RETURN_DATUM(DirectFunctionCall2(plainto_tsquery_byid,
+										ObjectIdGetDatum(cfgId),
+										PointerGetDatum(in)));
+}
+
+
+Datum
+phraseto_tsquery_byid(PG_FUNCTION_ARGS)
+{
+	text	   *in = PG_GETARG_TEXT_PP(1);
+	TSQuery		query;
+	MorphOpaque data;
+
+	data.cfg_id = PG_GETARG_OID(0);
+
+	/*
+	 * parse_tsquery() with P_TSQ_PLAIN flag takes the whole input text as a
+	 * single morph.  Passing OP_PHRASE as a qoperator makes tsquery require
+	 * matching of word positions.
+	 */
+	data.qoperator = OP_PHRASE;
+
+	query = parse_tsquery(text_to_cstring(in),
+						  pushval_morph,
+						  PointerGetDatum(&data),
+						  P_TSQ_PLAIN);
+
+	PG_RETURN_TSQUERY(query);
+}
+
+Datum
+phraseto_tsquery(PG_FUNCTION_ARGS)
+{
+	text	   *in = PG_GETARG_TEXT_PP(0);
+	Oid			cfgId;
+
+	cfgId = getTSCurrentConfig(true);
+	PG_RETURN_DATUM(DirectFunctionCall2(phraseto_tsquery_byid,
+										ObjectIdGetDatum(cfgId),
+										PointerGetDatum(in)));
+}
+
+Datum
+websearch_to_tsquery_byid(PG_FUNCTION_ARGS)
+{
+	text	   *in = PG_GETARG_TEXT_PP(1);
+	MorphOpaque data;
+	TSQuery		query = NULL;
+
+	data.cfg_id = PG_GETARG_OID(0);
+
+	/*
+	 * Passing OP_PHRASE as a qoperator makes tsquery require matching of word
+	 * positions of a complex morph exactly match the tsvector.  Also, when
+	 * the complex morphs are given in quotes, we connect all their words into
+	 * the OP_PHRASE sequence.
+	 */
+	data.qoperator = OP_PHRASE;
+
+	query = parse_tsquery(text_to_cstring(in),
+						  pushval_morph,
+						  PointerGetDatum(&data),
+						  P_TSQ_WEB);
+
+	PG_RETURN_TSQUERY(query);
+}
+
+Datum
+websearch_to_tsquery(PG_FUNCTION_ARGS)
+{
+	text	   *in = PG_GETARG_TEXT_PP(0);
+	Oid			cfgId;
+
+	cfgId = getTSCurrentConfig(true);
+	PG_RETURN_DATUM(DirectFunctionCall2(websearch_to_tsquery_byid,
+										ObjectIdGetDatum(cfgId),
+										PointerGetDatum(in)));
+}
diff --git a/src/backend/tsearch/ts_locale.c b/src/backend/tsearch/ts_locale.c
new file mode 100644
index 0000000..3a475a0
--- /dev/null
+++ b/src/backend/tsearch/ts_locale.c
@@ -0,0 +1,320 @@
+/*-------------------------------------------------------------------------
+ *
+ * ts_locale.c
+ *		locale compatibility layer for tsearch
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ *
+ *
+ * IDENTIFICATION
+ *	  src/backend/tsearch/ts_locale.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "catalog/pg_collation.h"
+#include "common/string.h"
+#include "storage/fd.h"
+#include "tsearch/ts_locale.h"
+#include "tsearch/ts_public.h"
+
+static void tsearch_readline_callback(void *arg);
+
+
+/*
+ * The reason these functions use a 3-wchar_t output buffer, not 2 as you
+ * might expect, is that on Windows "wchar_t" is 16 bits and what we'll be
+ * getting from char2wchar() is UTF16 not UTF32.  A single input character
+ * may therefore produce a surrogate pair rather than just one wchar_t;
+ * we also need room for a trailing null.  When we do get a surrogate pair,
+ * we pass just the first code to iswdigit() etc, so that these functions will
+ * always return false for characters outside the Basic Multilingual Plane.
+ */
+#define WC_BUF_LEN  3
+
+int
+t_isdigit(const char *ptr)
+{
+	int			clen = pg_mblen(ptr);
+	wchar_t		character[WC_BUF_LEN];
+	pg_locale_t mylocale = 0;	/* TODO */
+
+	if (clen == 1 || database_ctype_is_c)
+		return isdigit(TOUCHAR(ptr));
+
+	char2wchar(character, WC_BUF_LEN, ptr, clen, mylocale);
+
+	return iswdigit((wint_t) character[0]);
+}
+
+int
+t_isspace(const char *ptr)
+{
+	int			clen = pg_mblen(ptr);
+	wchar_t		character[WC_BUF_LEN];
+	pg_locale_t mylocale = 0;	/* TODO */
+
+	if (clen == 1 || database_ctype_is_c)
+		return isspace(TOUCHAR(ptr));
+
+	char2wchar(character, WC_BUF_LEN, ptr, clen, mylocale);
+
+	return iswspace((wint_t) character[0]);
+}
+
+int
+t_isalpha(const char *ptr)
+{
+	int			clen = pg_mblen(ptr);
+	wchar_t		character[WC_BUF_LEN];
+	pg_locale_t mylocale = 0;	/* TODO */
+
+	if (clen == 1 || database_ctype_is_c)
+		return isalpha(TOUCHAR(ptr));
+
+	char2wchar(character, WC_BUF_LEN, ptr, clen, mylocale);
+
+	return iswalpha((wint_t) character[0]);
+}
+
+int
+t_isprint(const char *ptr)
+{
+	int			clen = pg_mblen(ptr);
+	wchar_t		character[WC_BUF_LEN];
+	pg_locale_t mylocale = 0;	/* TODO */
+
+	if (clen == 1 || database_ctype_is_c)
+		return isprint(TOUCHAR(ptr));
+
+	char2wchar(character, WC_BUF_LEN, ptr, clen, mylocale);
+
+	return iswprint((wint_t) character[0]);
+}
+
+
+/*
+ * Set up to read a file using tsearch_readline().  This facility is
+ * better than just reading the file directly because it provides error
+ * context pointing to the specific line where a problem is detected.
+ *
+ * Expected usage is:
+ *
+ *		tsearch_readline_state trst;
+ *
+ *		if (!tsearch_readline_begin(&trst, filename))
+ *			ereport(ERROR,
+ *					(errcode(ERRCODE_CONFIG_FILE_ERROR),
+ *					 errmsg("could not open stop-word file \"%s\": %m",
+ *							filename)));
+ *		while ((line = tsearch_readline(&trst)) != NULL)
+ *			process line;
+ *		tsearch_readline_end(&trst);
+ *
+ * Note that the caller supplies the ereport() for file open failure;
+ * this is so that a custom message can be provided.  The filename string
+ * passed to tsearch_readline_begin() must remain valid through
+ * tsearch_readline_end().
+ */
+bool
+tsearch_readline_begin(tsearch_readline_state *stp,
+					   const char *filename)
+{
+	if ((stp->fp = AllocateFile(filename, "r")) == NULL)
+		return false;
+	stp->filename = filename;
+	stp->lineno = 0;
+	initStringInfo(&stp->buf);
+	stp->curline = NULL;
+	/* Setup error traceback support for ereport() */
+	stp->cb.callback = tsearch_readline_callback;
+	stp->cb.arg = (void *) stp;
+	stp->cb.previous = error_context_stack;
+	error_context_stack = &stp->cb;
+	return true;
+}
+
+/*
+ * Read the next line from a tsearch data file (expected to be in UTF-8), and
+ * convert it to database encoding if needed. The returned string is palloc'd.
+ * NULL return means EOF.
+ */
+char *
+tsearch_readline(tsearch_readline_state *stp)
+{
+	char	   *recoded;
+
+	/* Advance line number to use in error reports */
+	stp->lineno++;
+
+	/* Clear curline, it's no longer relevant */
+	if (stp->curline)
+	{
+		if (stp->curline != stp->buf.data)
+			pfree(stp->curline);
+		stp->curline = NULL;
+	}
+
+	/* Collect next line, if there is one */
+	if (!pg_get_line_buf(stp->fp, &stp->buf))
+		return NULL;
+
+	/* Validate the input as UTF-8, then convert to DB encoding if needed */
+	recoded = pg_any_to_server(stp->buf.data, stp->buf.len, PG_UTF8);
+
+	/* Save the correctly-encoded string for possible error reports */
+	stp->curline = recoded;		/* might be equal to buf.data */
+
+	/*
+	 * We always return a freshly pstrdup'd string.  This is clearly necessary
+	 * if pg_any_to_server() returned buf.data, and we need a second copy even
+	 * if encoding conversion did occur.  The caller is entitled to pfree the
+	 * returned string at any time, which would leave curline pointing to
+	 * recycled storage, causing problems if an error occurs after that point.
+	 * (It's preferable to return the result of pstrdup instead of the output
+	 * of pg_any_to_server, because the conversion result tends to be
+	 * over-allocated.  Since callers might save the result string directly
+	 * into a long-lived dictionary structure, we don't want it to be a larger
+	 * palloc chunk than necessary.  We'll reclaim the conversion result on
+	 * the next call.)
+	 */
+	return pstrdup(recoded);
+}
+
+/*
+ * Close down after reading a file with tsearch_readline()
+ */
+void
+tsearch_readline_end(tsearch_readline_state *stp)
+{
+	/* Suppress use of curline in any error reported below */
+	if (stp->curline)
+	{
+		if (stp->curline != stp->buf.data)
+			pfree(stp->curline);
+		stp->curline = NULL;
+	}
+
+	/* Release other resources */
+	pfree(stp->buf.data);
+	FreeFile(stp->fp);
+
+	/* Pop the error context stack */
+	error_context_stack = stp->cb.previous;
+}
+
+/*
+ * Error context callback for errors occurring while reading a tsearch
+ * configuration file.
+ */
+static void
+tsearch_readline_callback(void *arg)
+{
+	tsearch_readline_state *stp = (tsearch_readline_state *) arg;
+
+	/*
+	 * We can't include the text of the config line for errors that occur
+	 * during tsearch_readline() itself.  The major cause of such errors is
+	 * encoding violations, and we daren't try to print error messages
+	 * containing badly-encoded data.
+	 */
+	if (stp->curline)
+		errcontext("line %d of configuration file \"%s\": \"%s\"",
+				   stp->lineno,
+				   stp->filename,
+				   stp->curline);
+	else
+		errcontext("line %d of configuration file \"%s\"",
+				   stp->lineno,
+				   stp->filename);
+}
+
+
+/*
+ * lowerstr --- fold null-terminated string to lower case
+ *
+ * Returned string is palloc'd
+ */
+char *
+lowerstr(const char *str)
+{
+	return lowerstr_with_len(str, strlen(str));
+}
+
+/*
+ * lowerstr_with_len --- fold string to lower case
+ *
+ * Input string need not be null-terminated.
+ *
+ * Returned string is palloc'd
+ */
+char *
+lowerstr_with_len(const char *str, int len)
+{
+	char	   *out;
+	pg_locale_t mylocale = 0;	/* TODO */
+
+	if (len == 0)
+		return pstrdup("");
+
+	/*
+	 * Use wide char code only when max encoding length > 1 and ctype != C.
+	 * Some operating systems fail with multi-byte encodings and a C locale.
+	 * Also, for a C locale there is no need to process as multibyte. From
+	 * backend/utils/adt/oracle_compat.c Teodor
+	 */
+	if (pg_database_encoding_max_length() > 1 && !database_ctype_is_c)
+	{
+		wchar_t    *wstr,
+				   *wptr;
+		int			wlen;
+
+		/*
+		 * alloc number of wchar_t for worst case, len contains number of
+		 * bytes >= number of characters and alloc 1 wchar_t for 0, because
+		 * wchar2char wants zero-terminated string
+		 */
+		wptr = wstr = (wchar_t *) palloc(sizeof(wchar_t) * (len + 1));
+
+		wlen = char2wchar(wstr, len + 1, str, len, mylocale);
+		Assert(wlen <= len);
+
+		while (*wptr)
+		{
+			*wptr = towlower((wint_t) *wptr);
+			wptr++;
+		}
+
+		/*
+		 * Alloc result string for worst case + '\0'
+		 */
+		len = pg_database_encoding_max_length() * wlen + 1;
+		out = (char *) palloc(len);
+
+		wlen = wchar2char(out, wstr, len, mylocale);
+
+		pfree(wstr);
+
+		if (wlen < 0)
+			ereport(ERROR,
+					(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
+					 errmsg("conversion from wchar_t to server encoding failed: %m")));
+		Assert(wlen < len);
+	}
+	else
+	{
+		const char *ptr = str;
+		char	   *outptr;
+
+		outptr = out = (char *) palloc(sizeof(char) * (len + 1));
+		while ((ptr - str) < len && *ptr)
+		{
+			*outptr++ = tolower(TOUCHAR(ptr));
+			ptr++;
+		}
+		*outptr = '\0';
+	}
+
+	return out;
+}
diff --git a/src/backend/tsearch/ts_parse.c b/src/backend/tsearch/ts_parse.c
new file mode 100644
index 0000000..a87b442
--- /dev/null
+++ b/src/backend/tsearch/ts_parse.c
@@ -0,0 +1,678 @@
+/*-------------------------------------------------------------------------
+ *
+ * ts_parse.c
+ *		main parse functions for tsearch
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ *
+ *
+ * IDENTIFICATION
+ *	  src/backend/tsearch/ts_parse.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "tsearch/ts_cache.h"
+#include "tsearch/ts_utils.h"
+
+#define IGNORE_LONGLEXEME	1
+
+/*
+ * Lexize subsystem
+ */
+
+typedef struct ParsedLex
+{
+	int			type;
+	char	   *lemm;
+	int			lenlemm;
+	struct ParsedLex *next;
+} ParsedLex;
+
+typedef struct ListParsedLex
+{
+	ParsedLex  *head;
+	ParsedLex  *tail;
+} ListParsedLex;
+
+typedef struct
+{
+	TSConfigCacheEntry *cfg;
+	Oid			curDictId;
+	int			posDict;
+	DictSubState dictState;
+	ParsedLex  *curSub;
+	ListParsedLex towork;		/* current list to work */
+	ListParsedLex waste;		/* list of lexemes that already lexized */
+
+	/*
+	 * fields to store last variant to lexize (basically, thesaurus or similar
+	 * to, which wants	several lexemes
+	 */
+
+	ParsedLex  *lastRes;
+	TSLexeme   *tmpRes;
+} LexizeData;
+
+static void
+LexizeInit(LexizeData *ld, TSConfigCacheEntry *cfg)
+{
+	ld->cfg = cfg;
+	ld->curDictId = InvalidOid;
+	ld->posDict = 0;
+	ld->towork.head = ld->towork.tail = ld->curSub = NULL;
+	ld->waste.head = ld->waste.tail = NULL;
+	ld->lastRes = NULL;
+	ld->tmpRes = NULL;
+}
+
+static void
+LPLAddTail(ListParsedLex *list, ParsedLex *newpl)
+{
+	if (list->tail)
+	{
+		list->tail->next = newpl;
+		list->tail = newpl;
+	}
+	else
+		list->head = list->tail = newpl;
+	newpl->next = NULL;
+}
+
+static ParsedLex *
+LPLRemoveHead(ListParsedLex *list)
+{
+	ParsedLex  *res = list->head;
+
+	if (list->head)
+		list->head = list->head->next;
+
+	if (list->head == NULL)
+		list->tail = NULL;
+
+	return res;
+}
+
+static void
+LexizeAddLemm(LexizeData *ld, int type, char *lemm, int lenlemm)
+{
+	ParsedLex  *newpl = (ParsedLex *) palloc(sizeof(ParsedLex));
+
+	newpl->type = type;
+	newpl->lemm = lemm;
+	newpl->lenlemm = lenlemm;
+	LPLAddTail(&ld->towork, newpl);
+	ld->curSub = ld->towork.tail;
+}
+
+static void
+RemoveHead(LexizeData *ld)
+{
+	LPLAddTail(&ld->waste, LPLRemoveHead(&ld->towork));
+
+	ld->posDict = 0;
+}
+
+static void
+setCorrLex(LexizeData *ld, ParsedLex **correspondLexem)
+{
+	if (correspondLexem)
+	{
+		*correspondLexem = ld->waste.head;
+	}
+	else
+	{
+		ParsedLex  *tmp,
+				   *ptr = ld->waste.head;
+
+		while (ptr)
+		{
+			tmp = ptr->next;
+			pfree(ptr);
+			ptr = tmp;
+		}
+	}
+	ld->waste.head = ld->waste.tail = NULL;
+}
+
+static void
+moveToWaste(LexizeData *ld, ParsedLex *stop)
+{
+	bool		go = true;
+
+	while (ld->towork.head && go)
+	{
+		if (ld->towork.head == stop)
+		{
+			ld->curSub = stop->next;
+			go = false;
+		}
+		RemoveHead(ld);
+	}
+}
+
+static void
+setNewTmpRes(LexizeData *ld, ParsedLex *lex, TSLexeme *res)
+{
+	if (ld->tmpRes)
+	{
+		TSLexeme   *ptr;
+
+		for (ptr = ld->tmpRes; ptr->lexeme; ptr++)
+			pfree(ptr->lexeme);
+		pfree(ld->tmpRes);
+	}
+	ld->tmpRes = res;
+	ld->lastRes = lex;
+}
+
+static TSLexeme *
+LexizeExec(LexizeData *ld, ParsedLex **correspondLexem)
+{
+	int			i;
+	ListDictionary *map;
+	TSDictionaryCacheEntry *dict;
+	TSLexeme   *res;
+
+	if (ld->curDictId == InvalidOid)
+	{
+		/*
+		 * usual mode: dictionary wants only one word, but we should keep in
+		 * mind that we should go through all stack
+		 */
+
+		while (ld->towork.head)
+		{
+			ParsedLex  *curVal = ld->towork.head;
+			char	   *curValLemm = curVal->lemm;
+			int			curValLenLemm = curVal->lenlemm;
+
+			map = ld->cfg->map + curVal->type;
+
+			if (curVal->type == 0 || curVal->type >= ld->cfg->lenmap || map->len == 0)
+			{
+				/* skip this type of lexeme */
+				RemoveHead(ld);
+				continue;
+			}
+
+			for (i = ld->posDict; i < map->len; i++)
+			{
+				dict = lookup_ts_dictionary_cache(map->dictIds[i]);
+
+				ld->dictState.isend = ld->dictState.getnext = false;
+				ld->dictState.private_state = NULL;
+				res = (TSLexeme *) DatumGetPointer(FunctionCall4(&(dict->lexize),
+																 PointerGetDatum(dict->dictData),
+																 PointerGetDatum(curValLemm),
+																 Int32GetDatum(curValLenLemm),
+																 PointerGetDatum(&ld->dictState)));
+
+				if (ld->dictState.getnext)
+				{
+					/*
+					 * dictionary wants next word, so setup and store current
+					 * position and go to multiword mode
+					 */
+
+					ld->curDictId = DatumGetObjectId(map->dictIds[i]);
+					ld->posDict = i + 1;
+					ld->curSub = curVal->next;
+					if (res)
+						setNewTmpRes(ld, curVal, res);
+					return LexizeExec(ld, correspondLexem);
+				}
+
+				if (!res)		/* dictionary doesn't know this lexeme */
+					continue;
+
+				if (res->flags & TSL_FILTER)
+				{
+					curValLemm = res->lexeme;
+					curValLenLemm = strlen(res->lexeme);
+					continue;
+				}
+
+				RemoveHead(ld);
+				setCorrLex(ld, correspondLexem);
+				return res;
+			}
+
+			RemoveHead(ld);
+		}
+	}
+	else
+	{							/* curDictId is valid */
+		dict = lookup_ts_dictionary_cache(ld->curDictId);
+
+		/*
+		 * Dictionary ld->curDictId asks us about following words
+		 */
+
+		while (ld->curSub)
+		{
+			ParsedLex  *curVal = ld->curSub;
+
+			map = ld->cfg->map + curVal->type;
+
+			if (curVal->type != 0)
+			{
+				bool		dictExists = false;
+
+				if (curVal->type >= ld->cfg->lenmap || map->len == 0)
+				{
+					/* skip this type of lexeme */
+					ld->curSub = curVal->next;
+					continue;
+				}
+
+				/*
+				 * We should be sure that current type of lexeme is recognized
+				 * by our dictionary: we just check is it exist in list of
+				 * dictionaries ?
+				 */
+				for (i = 0; i < map->len && !dictExists; i++)
+					if (ld->curDictId == DatumGetObjectId(map->dictIds[i]))
+						dictExists = true;
+
+				if (!dictExists)
+				{
+					/*
+					 * Dictionary can't work with current type of lexeme,
+					 * return to basic mode and redo all stored lexemes
+					 */
+					ld->curDictId = InvalidOid;
+					return LexizeExec(ld, correspondLexem);
+				}
+			}
+
+			ld->dictState.isend = (curVal->type == 0);
+			ld->dictState.getnext = false;
+
+			res = (TSLexeme *) DatumGetPointer(FunctionCall4(&(dict->lexize),
+															 PointerGetDatum(dict->dictData),
+															 PointerGetDatum(curVal->lemm),
+															 Int32GetDatum(curVal->lenlemm),
+															 PointerGetDatum(&ld->dictState)));
+
+			if (ld->dictState.getnext)
+			{
+				/* Dictionary wants one more */
+				ld->curSub = curVal->next;
+				if (res)
+					setNewTmpRes(ld, curVal, res);
+				continue;
+			}
+
+			if (res || ld->tmpRes)
+			{
+				/*
+				 * Dictionary normalizes lexemes, so we remove from stack all
+				 * used lexemes, return to basic mode and redo end of stack
+				 * (if it exists)
+				 */
+				if (res)
+				{
+					moveToWaste(ld, ld->curSub);
+				}
+				else
+				{
+					res = ld->tmpRes;
+					moveToWaste(ld, ld->lastRes);
+				}
+
+				/* reset to initial state */
+				ld->curDictId = InvalidOid;
+				ld->posDict = 0;
+				ld->lastRes = NULL;
+				ld->tmpRes = NULL;
+				setCorrLex(ld, correspondLexem);
+				return res;
+			}
+
+			/*
+			 * Dict don't want next lexem and didn't recognize anything, redo
+			 * from ld->towork.head
+			 */
+			ld->curDictId = InvalidOid;
+			return LexizeExec(ld, correspondLexem);
+		}
+	}
+
+	setCorrLex(ld, correspondLexem);
+	return NULL;
+}
+
+/*
+ * Parse string and lexize words.
+ *
+ * prs will be filled in.
+ */
+void
+parsetext(Oid cfgId, ParsedText *prs, char *buf, int buflen)
+{
+	int			type,
+				lenlemm;
+	char	   *lemm = NULL;
+	LexizeData	ldata;
+	TSLexeme   *norms;
+	TSConfigCacheEntry *cfg;
+	TSParserCacheEntry *prsobj;
+	void	   *prsdata;
+
+	cfg = lookup_ts_config_cache(cfgId);
+	prsobj = lookup_ts_parser_cache(cfg->prsId);
+
+	prsdata = (void *) DatumGetPointer(FunctionCall2(&prsobj->prsstart,
+													 PointerGetDatum(buf),
+													 Int32GetDatum(buflen)));
+
+	LexizeInit(&ldata, cfg);
+
+	do
+	{
+		type = DatumGetInt32(FunctionCall3(&(prsobj->prstoken),
+										   PointerGetDatum(prsdata),
+										   PointerGetDatum(&lemm),
+										   PointerGetDatum(&lenlemm)));
+
+		if (type > 0 && lenlemm >= MAXSTRLEN)
+		{
+#ifdef IGNORE_LONGLEXEME
+			ereport(NOTICE,
+					(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+					 errmsg("word is too long to be indexed"),
+					 errdetail("Words longer than %d characters are ignored.",
+							   MAXSTRLEN)));
+			continue;
+#else
+			ereport(ERROR,
+					(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+					 errmsg("word is too long to be indexed"),
+					 errdetail("Words longer than %d characters are ignored.",
+							   MAXSTRLEN)));
+#endif
+		}
+
+		LexizeAddLemm(&ldata, type, lemm, lenlemm);
+
+		while ((norms = LexizeExec(&ldata, NULL)) != NULL)
+		{
+			TSLexeme   *ptr = norms;
+
+			prs->pos++;			/* set pos */
+
+			while (ptr->lexeme)
+			{
+				if (prs->curwords == prs->lenwords)
+				{
+					prs->lenwords *= 2;
+					prs->words = (ParsedWord *) repalloc((void *) prs->words, prs->lenwords * sizeof(ParsedWord));
+				}
+
+				if (ptr->flags & TSL_ADDPOS)
+					prs->pos++;
+				prs->words[prs->curwords].len = strlen(ptr->lexeme);
+				prs->words[prs->curwords].word = ptr->lexeme;
+				prs->words[prs->curwords].nvariant = ptr->nvariant;
+				prs->words[prs->curwords].flags = ptr->flags & TSL_PREFIX;
+				prs->words[prs->curwords].alen = 0;
+				prs->words[prs->curwords].pos.pos = LIMITPOS(prs->pos);
+				ptr++;
+				prs->curwords++;
+			}
+			pfree(norms);
+		}
+	} while (type > 0);
+
+	FunctionCall1(&(prsobj->prsend), PointerGetDatum(prsdata));
+}
+
+/*
+ * Headline framework
+ */
+
+/* Add a word to prs->words[] */
+static void
+hladdword(HeadlineParsedText *prs, char *buf, int buflen, int type)
+{
+	if (prs->curwords >= prs->lenwords)
+	{
+		prs->lenwords *= 2;
+		prs->words = (HeadlineWordEntry *) repalloc((void *) prs->words, prs->lenwords * sizeof(HeadlineWordEntry));
+	}
+	memset(&(prs->words[prs->curwords]), 0, sizeof(HeadlineWordEntry));
+	prs->words[prs->curwords].type = (uint8) type;
+	prs->words[prs->curwords].len = buflen;
+	prs->words[prs->curwords].word = palloc(buflen);
+	memcpy(prs->words[prs->curwords].word, buf, buflen);
+	prs->curwords++;
+}
+
+/*
+ * Add pos and matching-query-item data to the just-added word.
+ * Here, buf/buflen represent a processed lexeme, not raw token text.
+ *
+ * If the query contains more than one matching item, we replicate
+ * the last-added word so that each item can be pointed to.  The
+ * duplicate entries are marked with repeated = 1.
+ */
+static void
+hlfinditem(HeadlineParsedText *prs, TSQuery query, int32 pos, char *buf, int buflen)
+{
+	int			i;
+	QueryItem  *item = GETQUERY(query);
+	HeadlineWordEntry *word;
+
+	while (prs->curwords + query->size >= prs->lenwords)
+	{
+		prs->lenwords *= 2;
+		prs->words = (HeadlineWordEntry *) repalloc((void *) prs->words, prs->lenwords * sizeof(HeadlineWordEntry));
+	}
+
+	word = &(prs->words[prs->curwords - 1]);
+	word->pos = LIMITPOS(pos);
+	for (i = 0; i < query->size; i++)
+	{
+		if (item->type == QI_VAL &&
+			tsCompareString(GETOPERAND(query) + item->qoperand.distance, item->qoperand.length,
+							buf, buflen, item->qoperand.prefix) == 0)
+		{
+			if (word->item)
+			{
+				memcpy(&(prs->words[prs->curwords]), word, sizeof(HeadlineWordEntry));
+				prs->words[prs->curwords].item = &item->qoperand;
+				prs->words[prs->curwords].repeated = 1;
+				prs->curwords++;
+			}
+			else
+				word->item = &item->qoperand;
+		}
+		item++;
+	}
+}
+
+static void
+addHLParsedLex(HeadlineParsedText *prs, TSQuery query, ParsedLex *lexs, TSLexeme *norms)
+{
+	ParsedLex  *tmplexs;
+	TSLexeme   *ptr;
+	int32		savedpos;
+
+	while (lexs)
+	{
+		if (lexs->type > 0)
+			hladdword(prs, lexs->lemm, lexs->lenlemm, lexs->type);
+
+		ptr = norms;
+		savedpos = prs->vectorpos;
+		while (ptr && ptr->lexeme)
+		{
+			if (ptr->flags & TSL_ADDPOS)
+				savedpos++;
+			hlfinditem(prs, query, savedpos, ptr->lexeme, strlen(ptr->lexeme));
+			ptr++;
+		}
+
+		tmplexs = lexs->next;
+		pfree(lexs);
+		lexs = tmplexs;
+	}
+
+	if (norms)
+	{
+		ptr = norms;
+		while (ptr->lexeme)
+		{
+			if (ptr->flags & TSL_ADDPOS)
+				prs->vectorpos++;
+			pfree(ptr->lexeme);
+			ptr++;
+		}
+		pfree(norms);
+	}
+}
+
+void
+hlparsetext(Oid cfgId, HeadlineParsedText *prs, TSQuery query, char *buf, int buflen)
+{
+	int			type,
+				lenlemm;
+	char	   *lemm = NULL;
+	LexizeData	ldata;
+	TSLexeme   *norms;
+	ParsedLex  *lexs;
+	TSConfigCacheEntry *cfg;
+	TSParserCacheEntry *prsobj;
+	void	   *prsdata;
+
+	cfg = lookup_ts_config_cache(cfgId);
+	prsobj = lookup_ts_parser_cache(cfg->prsId);
+
+	prsdata = (void *) DatumGetPointer(FunctionCall2(&(prsobj->prsstart),
+													 PointerGetDatum(buf),
+													 Int32GetDatum(buflen)));
+
+	LexizeInit(&ldata, cfg);
+
+	do
+	{
+		type = DatumGetInt32(FunctionCall3(&(prsobj->prstoken),
+										   PointerGetDatum(prsdata),
+										   PointerGetDatum(&lemm),
+										   PointerGetDatum(&lenlemm)));
+
+		if (type > 0 && lenlemm >= MAXSTRLEN)
+		{
+#ifdef IGNORE_LONGLEXEME
+			ereport(NOTICE,
+					(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+					 errmsg("word is too long to be indexed"),
+					 errdetail("Words longer than %d characters are ignored.",
+							   MAXSTRLEN)));
+			continue;
+#else
+			ereport(ERROR,
+					(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+					 errmsg("word is too long to be indexed"),
+					 errdetail("Words longer than %d characters are ignored.",
+							   MAXSTRLEN)));
+#endif
+		}
+
+		LexizeAddLemm(&ldata, type, lemm, lenlemm);
+
+		do
+		{
+			if ((norms = LexizeExec(&ldata, &lexs)) != NULL)
+			{
+				prs->vectorpos++;
+				addHLParsedLex(prs, query, lexs, norms);
+			}
+			else
+				addHLParsedLex(prs, query, lexs, NULL);
+		} while (norms);
+	} while (type > 0);
+
+	FunctionCall1(&(prsobj->prsend), PointerGetDatum(prsdata));
+}
+
+/*
+ * Generate the headline, as a text object, from HeadlineParsedText.
+ */
+text *
+generateHeadline(HeadlineParsedText *prs)
+{
+	text	   *out;
+	char	   *ptr;
+	int			len = 128;
+	int			numfragments = 0;
+	int16		infrag = 0;
+
+	HeadlineWordEntry *wrd = prs->words;
+
+	out = (text *) palloc(len);
+	ptr = ((char *) out) + VARHDRSZ;
+
+	while (wrd - prs->words < prs->curwords)
+	{
+		while (wrd->len + prs->stopsellen + prs->startsellen + prs->fragdelimlen + (ptr - ((char *) out)) >= len)
+		{
+			int			dist = ptr - ((char *) out);
+
+			len *= 2;
+			out = (text *) repalloc(out, len);
+			ptr = ((char *) out) + dist;
+		}
+
+		if (wrd->in && !wrd->repeated)
+		{
+			if (!infrag)
+			{
+
+				/* start of a new fragment */
+				infrag = 1;
+				numfragments++;
+				/* add a fragment delimiter if this is after the first one */
+				if (numfragments > 1)
+				{
+					memcpy(ptr, prs->fragdelim, prs->fragdelimlen);
+					ptr += prs->fragdelimlen;
+				}
+			}
+			if (wrd->replace)
+			{
+				*ptr = ' ';
+				ptr++;
+			}
+			else if (!wrd->skip)
+			{
+				if (wrd->selected)
+				{
+					memcpy(ptr, prs->startsel, prs->startsellen);
+					ptr += prs->startsellen;
+				}
+				memcpy(ptr, wrd->word, wrd->len);
+				ptr += wrd->len;
+				if (wrd->selected)
+				{
+					memcpy(ptr, prs->stopsel, prs->stopsellen);
+					ptr += prs->stopsellen;
+				}
+			}
+		}
+		else if (!wrd->repeated)
+		{
+			if (infrag)
+				infrag = 0;
+			pfree(wrd->word);
+		}
+
+		wrd++;
+	}
+
+	SET_VARSIZE(out, ptr - ((char *) out));
+	return out;
+}
diff --git a/src/backend/tsearch/ts_selfuncs.c b/src/backend/tsearch/ts_selfuncs.c
new file mode 100644
index 0000000..8f2679f
--- /dev/null
+++ b/src/backend/tsearch/ts_selfuncs.c
@@ -0,0 +1,453 @@
+/*-------------------------------------------------------------------------
+ *
+ * ts_selfuncs.c
+ *	  Selectivity estimation functions for text search operators.
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ *
+ *
+ * IDENTIFICATION
+ *	  src/backend/tsearch/ts_selfuncs.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/htup_details.h"
+#include "catalog/pg_statistic.h"
+#include "catalog/pg_type.h"
+#include "miscadmin.h"
+#include "nodes/nodes.h"
+#include "tsearch/ts_type.h"
+#include "utils/builtins.h"
+#include "utils/lsyscache.h"
+#include "utils/selfuncs.h"
+#include "utils/syscache.h"
+
+
+/*
+ * The default text search selectivity is chosen to be small enough to
+ * encourage indexscans for typical table densities.  See selfuncs.h and
+ * DEFAULT_EQ_SEL for details.
+ */
+#define DEFAULT_TS_MATCH_SEL 0.005
+
+/* lookup table type for binary searching through MCELEMs */
+typedef struct
+{
+	text	   *element;
+	float4		frequency;
+} TextFreq;
+
+/* type of keys for bsearch'ing through an array of TextFreqs */
+typedef struct
+{
+	char	   *lexeme;
+	int			length;
+} LexemeKey;
+
+static Selectivity tsquerysel(VariableStatData *vardata, Datum constval);
+static Selectivity mcelem_tsquery_selec(TSQuery query,
+										Datum *mcelem, int nmcelem,
+										float4 *numbers, int nnumbers);
+static Selectivity tsquery_opr_selec(QueryItem *item, char *operand,
+									 TextFreq *lookup, int length, float4 minfreq);
+static int	compare_lexeme_textfreq(const void *e1, const void *e2);
+
+#define tsquery_opr_selec_no_stats(query) \
+	tsquery_opr_selec(GETQUERY(query), GETOPERAND(query), NULL, 0, 0)
+
+
+/*
+ *	tsmatchsel -- Selectivity of "@@"
+ *
+ * restriction selectivity function for tsvector @@ tsquery and
+ * tsquery @@ tsvector
+ */
+Datum
+tsmatchsel(PG_FUNCTION_ARGS)
+{
+	PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
+
+#ifdef NOT_USED
+	Oid			operator = PG_GETARG_OID(1);
+#endif
+	List	   *args = (List *) PG_GETARG_POINTER(2);
+	int			varRelid = PG_GETARG_INT32(3);
+	VariableStatData vardata;
+	Node	   *other;
+	bool		varonleft;
+	Selectivity selec;
+
+	/*
+	 * If expression is not variable = something or something = variable, then
+	 * punt and return a default estimate.
+	 */
+	if (!get_restriction_variable(root, args, varRelid,
+								  &vardata, &other, &varonleft))
+		PG_RETURN_FLOAT8(DEFAULT_TS_MATCH_SEL);
+
+	/*
+	 * Can't do anything useful if the something is not a constant, either.
+	 */
+	if (!IsA(other, Const))
+	{
+		ReleaseVariableStats(vardata);
+		PG_RETURN_FLOAT8(DEFAULT_TS_MATCH_SEL);
+	}
+
+	/*
+	 * The "@@" operator is strict, so we can cope with NULL right away
+	 */
+	if (((Const *) other)->constisnull)
+	{
+		ReleaseVariableStats(vardata);
+		PG_RETURN_FLOAT8(0.0);
+	}
+
+	/*
+	 * OK, there's a Var and a Const we're dealing with here.  We need the
+	 * Const to be a TSQuery, else we can't do anything useful.  We have to
+	 * check this because the Var might be the TSQuery not the TSVector.
+	 */
+	if (((Const *) other)->consttype == TSQUERYOID)
+	{
+		/* tsvector @@ tsquery or the other way around */
+		Assert(vardata.vartype == TSVECTOROID);
+
+		selec = tsquerysel(&vardata, ((Const *) other)->constvalue);
+	}
+	else
+	{
+		/* If we can't see the query structure, must punt */
+		selec = DEFAULT_TS_MATCH_SEL;
+	}
+
+	ReleaseVariableStats(vardata);
+
+	CLAMP_PROBABILITY(selec);
+
+	PG_RETURN_FLOAT8((float8) selec);
+}
+
+
+/*
+ *	tsmatchjoinsel -- join selectivity of "@@"
+ *
+ * join selectivity function for tsvector @@ tsquery and tsquery @@ tsvector
+ */
+Datum
+tsmatchjoinsel(PG_FUNCTION_ARGS)
+{
+	/* for the moment we just punt */
+	PG_RETURN_FLOAT8(DEFAULT_TS_MATCH_SEL);
+}
+
+
+/*
+ * @@ selectivity for tsvector var vs tsquery constant
+ */
+static Selectivity
+tsquerysel(VariableStatData *vardata, Datum constval)
+{
+	Selectivity selec;
+	TSQuery		query;
+
+	/* The caller made sure the const is a TSQuery, so get it now */
+	query = DatumGetTSQuery(constval);
+
+	/* Empty query matches nothing */
+	if (query->size == 0)
+		return (Selectivity) 0.0;
+
+	if (HeapTupleIsValid(vardata->statsTuple))
+	{
+		Form_pg_statistic stats;
+		AttStatsSlot sslot;
+
+		stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple);
+
+		/* MCELEM will be an array of TEXT elements for a tsvector column */
+		if (get_attstatsslot(&sslot, vardata->statsTuple,
+							 STATISTIC_KIND_MCELEM, InvalidOid,
+							 ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS))
+		{
+			/*
+			 * There is a most-common-elements slot for the tsvector Var, so
+			 * use that.
+			 */
+			selec = mcelem_tsquery_selec(query, sslot.values, sslot.nvalues,
+										 sslot.numbers, sslot.nnumbers);
+			free_attstatsslot(&sslot);
+		}
+		else
+		{
+			/* No most-common-elements info, so do without */
+			selec = tsquery_opr_selec_no_stats(query);
+		}
+
+		/*
+		 * MCE stats count only non-null rows, so adjust for null rows.
+		 */
+		selec *= (1.0 - stats->stanullfrac);
+	}
+	else
+	{
+		/* No stats at all, so do without */
+		selec = tsquery_opr_selec_no_stats(query);
+		/* we assume no nulls here, so no stanullfrac correction */
+	}
+
+	return selec;
+}
+
+/*
+ * Extract data from the pg_statistic arrays into useful format.
+ */
+static Selectivity
+mcelem_tsquery_selec(TSQuery query, Datum *mcelem, int nmcelem,
+					 float4 *numbers, int nnumbers)
+{
+	float4		minfreq;
+	TextFreq   *lookup;
+	Selectivity selec;
+	int			i;
+
+	/*
+	 * There should be two more Numbers than Values, because the last two
+	 * cells are taken for minimal and maximal frequency.  Punt if not.
+	 *
+	 * (Note: the MCELEM statistics slot definition allows for a third extra
+	 * number containing the frequency of nulls, but we're not expecting that
+	 * to appear for a tsvector column.)
+	 */
+	if (nnumbers != nmcelem + 2)
+		return tsquery_opr_selec_no_stats(query);
+
+	/*
+	 * Transpose the data into a single array so we can use bsearch().
+	 */
+	lookup = (TextFreq *) palloc(sizeof(TextFreq) * nmcelem);
+	for (i = 0; i < nmcelem; i++)
+	{
+		/*
+		 * The text Datums came from an array, so it cannot be compressed or
+		 * stored out-of-line -- it's safe to use VARSIZE_ANY*.
+		 */
+		Assert(!VARATT_IS_COMPRESSED(mcelem[i]) && !VARATT_IS_EXTERNAL(mcelem[i]));
+		lookup[i].element = (text *) DatumGetPointer(mcelem[i]);
+		lookup[i].frequency = numbers[i];
+	}
+
+	/*
+	 * Grab the lowest frequency. compute_tsvector_stats() stored it for us in
+	 * the one before the last cell of the Numbers array. See ts_typanalyze.c
+	 */
+	minfreq = numbers[nnumbers - 2];
+
+	selec = tsquery_opr_selec(GETQUERY(query), GETOPERAND(query), lookup,
+							  nmcelem, minfreq);
+
+	pfree(lookup);
+
+	return selec;
+}
+
+/*
+ * Traverse the tsquery in preorder, calculating selectivity as:
+ *
+ *	 selec(left_oper) * selec(right_oper) in AND & PHRASE nodes,
+ *
+ *	 selec(left_oper) + selec(right_oper) -
+ *		selec(left_oper) * selec(right_oper) in OR nodes,
+ *
+ *	 1 - select(oper) in NOT nodes
+ *
+ *	 histogram-based estimation in prefix VAL nodes
+ *
+ *	 freq[val] in exact VAL nodes, if the value is in MCELEM
+ *	 min(freq[MCELEM]) / 2 in VAL nodes, if it is not
+ *
+ * The MCELEM array is already sorted (see ts_typanalyze.c), so we can use
+ * binary search for determining freq[MCELEM].
+ *
+ * If we don't have stats for the tsvector, we still use this logic,
+ * except we use default estimates for VAL nodes.  This case is signaled
+ * by lookup == NULL.
+ */
+static Selectivity
+tsquery_opr_selec(QueryItem *item, char *operand,
+				  TextFreq *lookup, int length, float4 minfreq)
+{
+	Selectivity selec;
+
+	/* since this function recurses, it could be driven to stack overflow */
+	check_stack_depth();
+
+	if (item->type == QI_VAL)
+	{
+		QueryOperand *oper = (QueryOperand *) item;
+		LexemeKey	key;
+
+		/*
+		 * Prepare the key for bsearch().
+		 */
+		key.lexeme = operand + oper->distance;
+		key.length = oper->length;
+
+		if (oper->prefix)
+		{
+			/* Prefix match, ie the query item is lexeme:* */
+			Selectivity matched,
+						allmces;
+			int			i,
+						n_matched;
+
+			/*
+			 * Our strategy is to scan through the MCELEM list and combine the
+			 * frequencies of the ones that match the prefix.  We then
+			 * extrapolate the fraction of matching MCELEMs to the remaining
+			 * rows, assuming that the MCELEMs are representative of the whole
+			 * lexeme population in this respect.  (Compare
+			 * histogram_selectivity().)  Note that these are most common
+			 * elements not most common values, so they're not mutually
+			 * exclusive.  We treat occurrences as independent events.
+			 *
+			 * This is only a good plan if we have a pretty fair number of
+			 * MCELEMs available; we set the threshold at 100.  If no stats or
+			 * insufficient stats, arbitrarily use DEFAULT_TS_MATCH_SEL*4.
+			 */
+			if (lookup == NULL || length < 100)
+				return (Selectivity) (DEFAULT_TS_MATCH_SEL * 4);
+
+			matched = allmces = 0;
+			n_matched = 0;
+			for (i = 0; i < length; i++)
+			{
+				TextFreq   *t = lookup + i;
+				int			tlen = VARSIZE_ANY_EXHDR(t->element);
+
+				if (tlen >= key.length &&
+					strncmp(key.lexeme, VARDATA_ANY(t->element),
+							key.length) == 0)
+				{
+					matched += t->frequency - matched * t->frequency;
+					n_matched++;
+				}
+				allmces += t->frequency - allmces * t->frequency;
+			}
+
+			/* Clamp to ensure sanity in the face of roundoff error */
+			CLAMP_PROBABILITY(matched);
+			CLAMP_PROBABILITY(allmces);
+
+			selec = matched + (1.0 - allmces) * ((double) n_matched / length);
+
+			/*
+			 * In any case, never believe that a prefix match has selectivity
+			 * less than we would assign for a non-MCELEM lexeme.  This
+			 * preserves the property that "word:*" should be estimated to
+			 * match at least as many rows as "word" would be.
+			 */
+			selec = Max(Min(DEFAULT_TS_MATCH_SEL, minfreq / 2), selec);
+		}
+		else
+		{
+			/* Regular exact lexeme match */
+			TextFreq   *searchres;
+
+			/* If no stats for the variable, use DEFAULT_TS_MATCH_SEL */
+			if (lookup == NULL)
+				return (Selectivity) DEFAULT_TS_MATCH_SEL;
+
+			searchres = (TextFreq *) bsearch(&key, lookup, length,
+											 sizeof(TextFreq),
+											 compare_lexeme_textfreq);
+
+			if (searchres)
+			{
+				/*
+				 * The element is in MCELEM.  Return precise selectivity (or
+				 * at least as precise as ANALYZE could find out).
+				 */
+				selec = searchres->frequency;
+			}
+			else
+			{
+				/*
+				 * The element is not in MCELEM.  Punt, but assume that the
+				 * selectivity cannot be more than minfreq / 2.
+				 */
+				selec = Min(DEFAULT_TS_MATCH_SEL, minfreq / 2);
+			}
+		}
+	}
+	else
+	{
+		/* Current TSQuery node is an operator */
+		Selectivity s1,
+					s2;
+
+		switch (item->qoperator.oper)
+		{
+			case OP_NOT:
+				selec = 1.0 - tsquery_opr_selec(item + 1, operand,
+												lookup, length, minfreq);
+				break;
+
+			case OP_PHRASE:
+			case OP_AND:
+				s1 = tsquery_opr_selec(item + 1, operand,
+									   lookup, length, minfreq);
+				s2 = tsquery_opr_selec(item + item->qoperator.left, operand,
+									   lookup, length, minfreq);
+				selec = s1 * s2;
+				break;
+
+			case OP_OR:
+				s1 = tsquery_opr_selec(item + 1, operand,
+									   lookup, length, minfreq);
+				s2 = tsquery_opr_selec(item + item->qoperator.left, operand,
+									   lookup, length, minfreq);
+				selec = s1 + s2 - s1 * s2;
+				break;
+
+			default:
+				elog(ERROR, "unrecognized operator: %d", item->qoperator.oper);
+				selec = 0;		/* keep compiler quiet */
+				break;
+		}
+	}
+
+	/* Clamp intermediate results to stay sane despite roundoff error */
+	CLAMP_PROBABILITY(selec);
+
+	return selec;
+}
+
+/*
+ * bsearch() comparator for a lexeme (non-NULL terminated string with length)
+ * and a TextFreq. Use length, then byte-for-byte comparison, because that's
+ * how ANALYZE code sorted data before storing it in a statistic tuple.
+ * See ts_typanalyze.c for details.
+ */
+static int
+compare_lexeme_textfreq(const void *e1, const void *e2)
+{
+	const LexemeKey *key = (const LexemeKey *) e1;
+	const TextFreq *t = (const TextFreq *) e2;
+	int			len1,
+				len2;
+
+	len1 = key->length;
+	len2 = VARSIZE_ANY_EXHDR(t->element);
+
+	/* Compare lengths first, possibly avoiding a strncmp call */
+	if (len1 > len2)
+		return 1;
+	else if (len1 < len2)
+		return -1;
+
+	/* Fall back on byte-for-byte comparison */
+	return strncmp(key->lexeme, VARDATA_ANY(t->element), len1);
+}
diff --git a/src/backend/tsearch/ts_typanalyze.c b/src/backend/tsearch/ts_typanalyze.c
new file mode 100644
index 0000000..e771a7c
--- /dev/null
+++ b/src/backend/tsearch/ts_typanalyze.c
@@ -0,0 +1,536 @@
+/*-------------------------------------------------------------------------
+ *
+ * ts_typanalyze.c
+ *	  functions for gathering statistics from tsvector columns
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ *
+ *
+ * IDENTIFICATION
+ *	  src/backend/tsearch/ts_typanalyze.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "catalog/pg_collation.h"
+#include "catalog/pg_operator.h"
+#include "commands/vacuum.h"
+#include "common/hashfn.h"
+#include "tsearch/ts_type.h"
+#include "utils/builtins.h"
+
+
+/* A hash key for lexemes */
+typedef struct
+{
+	char	   *lexeme;			/* lexeme (not NULL terminated!) */
+	int			length;			/* its length in bytes */
+} LexemeHashKey;
+
+/* A hash table entry for the Lossy Counting algorithm */
+typedef struct
+{
+	LexemeHashKey key;			/* This is 'e' from the LC algorithm. */
+	int			frequency;		/* This is 'f'. */
+	int			delta;			/* And this is 'delta'. */
+} TrackItem;
+
+static void compute_tsvector_stats(VacAttrStats *stats,
+								   AnalyzeAttrFetchFunc fetchfunc,
+								   int samplerows,
+								   double totalrows);
+static void prune_lexemes_hashtable(HTAB *lexemes_tab, int b_current);
+static uint32 lexeme_hash(const void *key, Size keysize);
+static int	lexeme_match(const void *key1, const void *key2, Size keysize);
+static int	lexeme_compare(const void *key1, const void *key2);
+static int	trackitem_compare_frequencies_desc(const void *e1, const void *e2,
+											   void *arg);
+static int	trackitem_compare_lexemes(const void *e1, const void *e2,
+									  void *arg);
+
+
+/*
+ *	ts_typanalyze -- a custom typanalyze function for tsvector columns
+ */
+Datum
+ts_typanalyze(PG_FUNCTION_ARGS)
+{
+	VacAttrStats *stats = (VacAttrStats *) PG_GETARG_POINTER(0);
+	Form_pg_attribute attr = stats->attr;
+
+	/* If the attstattarget column is negative, use the default value */
+	/* NB: it is okay to scribble on stats->attr since it's a copy */
+	if (attr->attstattarget < 0)
+		attr->attstattarget = default_statistics_target;
+
+	stats->compute_stats = compute_tsvector_stats;
+	/* see comment about the choice of minrows in commands/analyze.c */
+	stats->minrows = 300 * attr->attstattarget;
+
+	PG_RETURN_BOOL(true);
+}
+
+/*
+ *	compute_tsvector_stats() -- compute statistics for a tsvector column
+ *
+ *	This functions computes statistics that are useful for determining @@
+ *	operations' selectivity, along with the fraction of non-null rows and
+ *	average width.
+ *
+ *	Instead of finding the most common values, as we do for most datatypes,
+ *	we're looking for the most common lexemes. This is more useful, because
+ *	there most probably won't be any two rows with the same tsvector and thus
+ *	the notion of a MCV is a bit bogus with this datatype. With a list of the
+ *	most common lexemes we can do a better job at figuring out @@ selectivity.
+ *
+ *	For the same reasons we assume that tsvector columns are unique when
+ *	determining the number of distinct values.
+ *
+ *	The algorithm used is Lossy Counting, as proposed in the paper "Approximate
+ *	frequency counts over data streams" by G. S. Manku and R. Motwani, in
+ *	Proceedings of the 28th International Conference on Very Large Data Bases,
+ *	Hong Kong, China, August 2002, section 4.2. The paper is available at
+ *	http://www.vldb.org/conf/2002/S10P03.pdf
+ *
+ *	The Lossy Counting (aka LC) algorithm goes like this:
+ *	Let s be the threshold frequency for an item (the minimum frequency we
+ *	are interested in) and epsilon the error margin for the frequency. Let D
+ *	be a set of triples (e, f, delta), where e is an element value, f is that
+ *	element's frequency (actually, its current occurrence count) and delta is
+ *	the maximum error in f. We start with D empty and process the elements in
+ *	batches of size w. (The batch size is also known as "bucket size" and is
+ *	equal to 1/epsilon.) Let the current batch number be b_current, starting
+ *	with 1. For each element e we either increment its f count, if it's
+ *	already in D, or insert a new triple into D with values (e, 1, b_current
+ *	- 1). After processing each batch we prune D, by removing from it all
+ *	elements with f + delta <= b_current.  After the algorithm finishes we
+ *	suppress all elements from D that do not satisfy f >= (s - epsilon) * N,
+ *	where N is the total number of elements in the input.  We emit the
+ *	remaining elements with estimated frequency f/N.  The LC paper proves
+ *	that this algorithm finds all elements with true frequency at least s,
+ *	and that no frequency is overestimated or is underestimated by more than
+ *	epsilon.  Furthermore, given reasonable assumptions about the input
+ *	distribution, the required table size is no more than about 7 times w.
+ *
+ *	We set s to be the estimated frequency of the K'th word in a natural
+ *	language's frequency table, where K is the target number of entries in
+ *	the MCELEM array plus an arbitrary constant, meant to reflect the fact
+ *	that the most common words in any language would usually be stopwords
+ *	so we will not actually see them in the input.  We assume that the
+ *	distribution of word frequencies (including the stopwords) follows Zipf's
+ *	law with an exponent of 1.
+ *
+ *	Assuming Zipfian distribution, the frequency of the K'th word is equal
+ *	to 1/(K * H(W)) where H(n) is 1/2 + 1/3 + ... + 1/n and W is the number of
+ *	words in the language.  Putting W as one million, we get roughly 0.07/K.
+ *	Assuming top 10 words are stopwords gives s = 0.07/(K + 10).  We set
+ *	epsilon = s/10, which gives bucket width w = (K + 10)/0.007 and
+ *	maximum expected hashtable size of about 1000 * (K + 10).
+ *
+ *	Note: in the above discussion, s, epsilon, and f/N are in terms of a
+ *	lexeme's frequency as a fraction of all lexemes seen in the input.
+ *	However, what we actually want to store in the finished pg_statistic
+ *	entry is each lexeme's frequency as a fraction of all rows that it occurs
+ *	in.  Assuming that the input tsvectors are correctly constructed, no
+ *	lexeme occurs more than once per tsvector, so the final count f is a
+ *	correct estimate of the number of input tsvectors it occurs in, and we
+ *	need only change the divisor from N to nonnull_cnt to get the number we
+ *	want.
+ */
+static void
+compute_tsvector_stats(VacAttrStats *stats,
+					   AnalyzeAttrFetchFunc fetchfunc,
+					   int samplerows,
+					   double totalrows)
+{
+	int			num_mcelem;
+	int			null_cnt = 0;
+	double		total_width = 0;
+
+	/* This is D from the LC algorithm. */
+	HTAB	   *lexemes_tab;
+	HASHCTL		hash_ctl;
+	HASH_SEQ_STATUS scan_status;
+
+	/* This is the current bucket number from the LC algorithm */
+	int			b_current;
+
+	/* This is 'w' from the LC algorithm */
+	int			bucket_width;
+	int			vector_no,
+				lexeme_no;
+	LexemeHashKey hash_key;
+	TrackItem  *item;
+
+	/*
+	 * We want statistics_target * 10 lexemes in the MCELEM array.  This
+	 * multiplier is pretty arbitrary, but is meant to reflect the fact that
+	 * the number of individual lexeme values tracked in pg_statistic ought to
+	 * be more than the number of values for a simple scalar column.
+	 */
+	num_mcelem = stats->attr->attstattarget * 10;
+
+	/*
+	 * We set bucket width equal to (num_mcelem + 10) / 0.007 as per the
+	 * comment above.
+	 */
+	bucket_width = (num_mcelem + 10) * 1000 / 7;
+
+	/*
+	 * Create the hashtable. It will be in local memory, so we don't need to
+	 * worry about overflowing the initial size. Also we don't need to pay any
+	 * attention to locking and memory management.
+	 */
+	hash_ctl.keysize = sizeof(LexemeHashKey);
+	hash_ctl.entrysize = sizeof(TrackItem);
+	hash_ctl.hash = lexeme_hash;
+	hash_ctl.match = lexeme_match;
+	hash_ctl.hcxt = CurrentMemoryContext;
+	lexemes_tab = hash_create("Analyzed lexemes table",
+							  num_mcelem,
+							  &hash_ctl,
+							  HASH_ELEM | HASH_FUNCTION | HASH_COMPARE | HASH_CONTEXT);
+
+	/* Initialize counters. */
+	b_current = 1;
+	lexeme_no = 0;
+
+	/* Loop over the tsvectors. */
+	for (vector_no = 0; vector_no < samplerows; vector_no++)
+	{
+		Datum		value;
+		bool		isnull;
+		TSVector	vector;
+		WordEntry  *curentryptr;
+		char	   *lexemesptr;
+		int			j;
+
+		vacuum_delay_point();
+
+		value = fetchfunc(stats, vector_no, &isnull);
+
+		/*
+		 * Check for null/nonnull.
+		 */
+		if (isnull)
+		{
+			null_cnt++;
+			continue;
+		}
+
+		/*
+		 * Add up widths for average-width calculation.  Since it's a
+		 * tsvector, we know it's varlena.  As in the regular
+		 * compute_minimal_stats function, we use the toasted width for this
+		 * calculation.
+		 */
+		total_width += VARSIZE_ANY(DatumGetPointer(value));
+
+		/*
+		 * Now detoast the tsvector if needed.
+		 */
+		vector = DatumGetTSVector(value);
+
+		/*
+		 * We loop through the lexemes in the tsvector and add them to our
+		 * tracking hashtable.
+		 */
+		lexemesptr = STRPTR(vector);
+		curentryptr = ARRPTR(vector);
+		for (j = 0; j < vector->size; j++)
+		{
+			bool		found;
+
+			/*
+			 * Construct a hash key.  The key points into the (detoasted)
+			 * tsvector value at this point, but if a new entry is created, we
+			 * make a copy of it.  This way we can free the tsvector value
+			 * once we've processed all its lexemes.
+			 */
+			hash_key.lexeme = lexemesptr + curentryptr->pos;
+			hash_key.length = curentryptr->len;
+
+			/* Lookup current lexeme in hashtable, adding it if new */
+			item = (TrackItem *) hash_search(lexemes_tab,
+											 (const void *) &hash_key,
+											 HASH_ENTER, &found);
+
+			if (found)
+			{
+				/* The lexeme is already on the tracking list */
+				item->frequency++;
+			}
+			else
+			{
+				/* Initialize new tracking list element */
+				item->frequency = 1;
+				item->delta = b_current - 1;
+
+				item->key.lexeme = palloc(hash_key.length);
+				memcpy(item->key.lexeme, hash_key.lexeme, hash_key.length);
+			}
+
+			/* lexeme_no is the number of elements processed (ie N) */
+			lexeme_no++;
+
+			/* We prune the D structure after processing each bucket */
+			if (lexeme_no % bucket_width == 0)
+			{
+				prune_lexemes_hashtable(lexemes_tab, b_current);
+				b_current++;
+			}
+
+			/* Advance to the next WordEntry in the tsvector */
+			curentryptr++;
+		}
+
+		/* If the vector was toasted, free the detoasted copy. */
+		if (TSVectorGetDatum(vector) != value)
+			pfree(vector);
+	}
+
+	/* We can only compute real stats if we found some non-null values. */
+	if (null_cnt < samplerows)
+	{
+		int			nonnull_cnt = samplerows - null_cnt;
+		int			i;
+		TrackItem **sort_table;
+		int			track_len;
+		int			cutoff_freq;
+		int			minfreq,
+					maxfreq;
+
+		stats->stats_valid = true;
+		/* Do the simple null-frac and average width stats */
+		stats->stanullfrac = (double) null_cnt / (double) samplerows;
+		stats->stawidth = total_width / (double) nonnull_cnt;
+
+		/* Assume it's a unique column (see notes above) */
+		stats->stadistinct = -1.0 * (1.0 - stats->stanullfrac);
+
+		/*
+		 * Construct an array of the interesting hashtable items, that is,
+		 * those meeting the cutoff frequency (s - epsilon)*N.  Also identify
+		 * the minimum and maximum frequencies among these items.
+		 *
+		 * Since epsilon = s/10 and bucket_width = 1/epsilon, the cutoff
+		 * frequency is 9*N / bucket_width.
+		 */
+		cutoff_freq = 9 * lexeme_no / bucket_width;
+
+		i = hash_get_num_entries(lexemes_tab);	/* surely enough space */
+		sort_table = (TrackItem **) palloc(sizeof(TrackItem *) * i);
+
+		hash_seq_init(&scan_status, lexemes_tab);
+		track_len = 0;
+		minfreq = lexeme_no;
+		maxfreq = 0;
+		while ((item = (TrackItem *) hash_seq_search(&scan_status)) != NULL)
+		{
+			if (item->frequency > cutoff_freq)
+			{
+				sort_table[track_len++] = item;
+				minfreq = Min(minfreq, item->frequency);
+				maxfreq = Max(maxfreq, item->frequency);
+			}
+		}
+		Assert(track_len <= i);
+
+		/* emit some statistics for debug purposes */
+		elog(DEBUG3, "tsvector_stats: target # mces = %d, bucket width = %d, "
+			 "# lexemes = %d, hashtable size = %d, usable entries = %d",
+			 num_mcelem, bucket_width, lexeme_no, i, track_len);
+
+		/*
+		 * If we obtained more lexemes than we really want, get rid of those
+		 * with least frequencies.  The easiest way is to qsort the array into
+		 * descending frequency order and truncate the array.
+		 */
+		if (num_mcelem < track_len)
+		{
+			qsort_interruptible(sort_table, track_len, sizeof(TrackItem *),
+								trackitem_compare_frequencies_desc, NULL);
+			/* reset minfreq to the smallest frequency we're keeping */
+			minfreq = sort_table[num_mcelem - 1]->frequency;
+		}
+		else
+			num_mcelem = track_len;
+
+		/* Generate MCELEM slot entry */
+		if (num_mcelem > 0)
+		{
+			MemoryContext old_context;
+			Datum	   *mcelem_values;
+			float4	   *mcelem_freqs;
+
+			/*
+			 * We want to store statistics sorted on the lexeme value using
+			 * first length, then byte-for-byte comparison. The reason for
+			 * doing length comparison first is that we don't care about the
+			 * ordering so long as it's consistent, and comparing lengths
+			 * first gives us a chance to avoid a strncmp() call.
+			 *
+			 * This is different from what we do with scalar statistics --
+			 * they get sorted on frequencies. The rationale is that we
+			 * usually search through most common elements looking for a
+			 * specific value, so we can grab its frequency.  When values are
+			 * presorted we can employ binary search for that.  See
+			 * ts_selfuncs.c for a real usage scenario.
+			 */
+			qsort_interruptible(sort_table, num_mcelem, sizeof(TrackItem *),
+								trackitem_compare_lexemes, NULL);
+
+			/* Must copy the target values into anl_context */
+			old_context = MemoryContextSwitchTo(stats->anl_context);
+
+			/*
+			 * We sorted statistics on the lexeme value, but we want to be
+			 * able to find out the minimal and maximal frequency without
+			 * going through all the values.  We keep those two extra
+			 * frequencies in two extra cells in mcelem_freqs.
+			 *
+			 * (Note: the MCELEM statistics slot definition allows for a third
+			 * extra number containing the frequency of nulls, but we don't
+			 * create that for a tsvector column, since null elements aren't
+			 * possible.)
+			 */
+			mcelem_values = (Datum *) palloc(num_mcelem * sizeof(Datum));
+			mcelem_freqs = (float4 *) palloc((num_mcelem + 2) * sizeof(float4));
+
+			/*
+			 * See comments above about use of nonnull_cnt as the divisor for
+			 * the final frequency estimates.
+			 */
+			for (i = 0; i < num_mcelem; i++)
+			{
+				TrackItem  *item = sort_table[i];
+
+				mcelem_values[i] =
+					PointerGetDatum(cstring_to_text_with_len(item->key.lexeme,
+															 item->key.length));
+				mcelem_freqs[i] = (double) item->frequency / (double) nonnull_cnt;
+			}
+			mcelem_freqs[i++] = (double) minfreq / (double) nonnull_cnt;
+			mcelem_freqs[i] = (double) maxfreq / (double) nonnull_cnt;
+			MemoryContextSwitchTo(old_context);
+
+			stats->stakind[0] = STATISTIC_KIND_MCELEM;
+			stats->staop[0] = TextEqualOperator;
+			stats->stacoll[0] = DEFAULT_COLLATION_OID;
+			stats->stanumbers[0] = mcelem_freqs;
+			/* See above comment about two extra frequency fields */
+			stats->numnumbers[0] = num_mcelem + 2;
+			stats->stavalues[0] = mcelem_values;
+			stats->numvalues[0] = num_mcelem;
+			/* We are storing text values */
+			stats->statypid[0] = TEXTOID;
+			stats->statyplen[0] = -1;	/* typlen, -1 for varlena */
+			stats->statypbyval[0] = false;
+			stats->statypalign[0] = 'i';
+		}
+	}
+	else
+	{
+		/* We found only nulls; assume the column is entirely null */
+		stats->stats_valid = true;
+		stats->stanullfrac = 1.0;
+		stats->stawidth = 0;	/* "unknown" */
+		stats->stadistinct = 0.0;	/* "unknown" */
+	}
+
+	/*
+	 * We don't need to bother cleaning up any of our temporary palloc's. The
+	 * hashtable should also go away, as it used a child memory context.
+	 */
+}
+
+/*
+ *	A function to prune the D structure from the Lossy Counting algorithm.
+ *	Consult compute_tsvector_stats() for wider explanation.
+ */
+static void
+prune_lexemes_hashtable(HTAB *lexemes_tab, int b_current)
+{
+	HASH_SEQ_STATUS scan_status;
+	TrackItem  *item;
+
+	hash_seq_init(&scan_status, lexemes_tab);
+	while ((item = (TrackItem *) hash_seq_search(&scan_status)) != NULL)
+	{
+		if (item->frequency + item->delta <= b_current)
+		{
+			char	   *lexeme = item->key.lexeme;
+
+			if (hash_search(lexemes_tab, (const void *) &item->key,
+							HASH_REMOVE, NULL) == NULL)
+				elog(ERROR, "hash table corrupted");
+			pfree(lexeme);
+		}
+	}
+}
+
+/*
+ * Hash functions for lexemes. They are strings, but not NULL terminated,
+ * so we need a special hash function.
+ */
+static uint32
+lexeme_hash(const void *key, Size keysize)
+{
+	const LexemeHashKey *l = (const LexemeHashKey *) key;
+
+	return DatumGetUInt32(hash_any((const unsigned char *) l->lexeme,
+								   l->length));
+}
+
+/*
+ *	Matching function for lexemes, to be used in hashtable lookups.
+ */
+static int
+lexeme_match(const void *key1, const void *key2, Size keysize)
+{
+	/* The keysize parameter is superfluous, the keys store their lengths */
+	return lexeme_compare(key1, key2);
+}
+
+/*
+ *	Comparison function for lexemes.
+ */
+static int
+lexeme_compare(const void *key1, const void *key2)
+{
+	const LexemeHashKey *d1 = (const LexemeHashKey *) key1;
+	const LexemeHashKey *d2 = (const LexemeHashKey *) key2;
+
+	/* First, compare by length */
+	if (d1->length > d2->length)
+		return 1;
+	else if (d1->length < d2->length)
+		return -1;
+	/* Lengths are equal, do a byte-by-byte comparison */
+	return strncmp(d1->lexeme, d2->lexeme, d1->length);
+}
+
+/*
+ *	Comparator for sorting TrackItems on frequencies (descending sort)
+ */
+static int
+trackitem_compare_frequencies_desc(const void *e1, const void *e2, void *arg)
+{
+	const TrackItem *const *t1 = (const TrackItem *const *) e1;
+	const TrackItem *const *t2 = (const TrackItem *const *) e2;
+
+	return (*t2)->frequency - (*t1)->frequency;
+}
+
+/*
+ *	Comparator for sorting TrackItems on lexemes
+ */
+static int
+trackitem_compare_lexemes(const void *e1, const void *e2, void *arg)
+{
+	const TrackItem *const *t1 = (const TrackItem *const *) e1;
+	const TrackItem *const *t2 = (const TrackItem *const *) e2;
+
+	return lexeme_compare(&(*t1)->key, &(*t2)->key);
+}
diff --git a/src/backend/tsearch/ts_utils.c b/src/backend/tsearch/ts_utils.c
new file mode 100644
index 0000000..7743bdf
--- /dev/null
+++ b/src/backend/tsearch/ts_utils.c
@@ -0,0 +1,146 @@
+/*-------------------------------------------------------------------------
+ *
+ * ts_utils.c
+ *		various support functions
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ *
+ *
+ * IDENTIFICATION
+ *	  src/backend/tsearch/ts_utils.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include <ctype.h>
+
+#include "miscadmin.h"
+#include "tsearch/ts_locale.h"
+#include "tsearch/ts_utils.h"
+
+
+/*
+ * Given the base name and extension of a tsearch config file, return
+ * its full path name.  The base name is assumed to be user-supplied,
+ * and is checked to prevent pathname attacks.  The extension is assumed
+ * to be safe.
+ *
+ * The result is a palloc'd string.
+ */
+char *
+get_tsearch_config_filename(const char *basename,
+							const char *extension)
+{
+	char		sharepath[MAXPGPATH];
+	char	   *result;
+
+	/*
+	 * We limit the basename to contain a-z, 0-9, and underscores.  This may
+	 * be overly restrictive, but we don't want to allow access to anything
+	 * outside the tsearch_data directory, so for instance '/' *must* be
+	 * rejected, and on some platforms '\' and ':' are risky as well. Allowing
+	 * uppercase might result in incompatible behavior between case-sensitive
+	 * and case-insensitive filesystems, and non-ASCII characters create other
+	 * interesting risks, so on the whole a tight policy seems best.
+	 */
+	if (strspn(basename, "abcdefghijklmnopqrstuvwxyz0123456789_") != strlen(basename))
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("invalid text search configuration file name \"%s\"",
+						basename)));
+
+	get_share_path(my_exec_path, sharepath);
+	result = palloc(MAXPGPATH);
+	snprintf(result, MAXPGPATH, "%s/tsearch_data/%s.%s",
+			 sharepath, basename, extension);
+
+	return result;
+}
+
+/*
+ * Reads a stop-word file. Each word is run through 'wordop'
+ * function, if given.  wordop may either modify the input in-place,
+ * or palloc a new version.
+ */
+void
+readstoplist(const char *fname, StopList *s, char *(*wordop) (const char *))
+{
+	char	  **stop = NULL;
+
+	s->len = 0;
+	if (fname && *fname)
+	{
+		char	   *filename = get_tsearch_config_filename(fname, "stop");
+		tsearch_readline_state trst;
+		char	   *line;
+		int			reallen = 0;
+
+		if (!tsearch_readline_begin(&trst, filename))
+			ereport(ERROR,
+					(errcode(ERRCODE_CONFIG_FILE_ERROR),
+					 errmsg("could not open stop-word file \"%s\": %m",
+							filename)));
+
+		while ((line = tsearch_readline(&trst)) != NULL)
+		{
+			char	   *pbuf = line;
+
+			/* Trim trailing space */
+			while (*pbuf && !t_isspace(pbuf))
+				pbuf += pg_mblen(pbuf);
+			*pbuf = '\0';
+
+			/* Skip empty lines */
+			if (*line == '\0')
+			{
+				pfree(line);
+				continue;
+			}
+
+			if (s->len >= reallen)
+			{
+				if (reallen == 0)
+				{
+					reallen = 64;
+					stop = (char **) palloc(sizeof(char *) * reallen);
+				}
+				else
+				{
+					reallen *= 2;
+					stop = (char **) repalloc((void *) stop,
+											  sizeof(char *) * reallen);
+				}
+			}
+
+			if (wordop)
+			{
+				stop[s->len] = wordop(line);
+				if (stop[s->len] != line)
+					pfree(line);
+			}
+			else
+				stop[s->len] = line;
+
+			(s->len)++;
+		}
+
+		tsearch_readline_end(&trst);
+		pfree(filename);
+	}
+
+	s->stop = stop;
+
+	/* Sort to allow binary searching */
+	if (s->stop && s->len > 0)
+		qsort(s->stop, s->len, sizeof(char *), pg_qsort_strcmp);
+}
+
+bool
+searchstoplist(StopList *s, char *key)
+{
+	return (s->stop && s->len > 0 &&
+			bsearch(&key, s->stop, s->len,
+					sizeof(char *), pg_qsort_strcmp));
+}
diff --git a/src/backend/tsearch/wparser.c b/src/backend/tsearch/wparser.c
new file mode 100644
index 0000000..14bb605
--- /dev/null
+++ b/src/backend/tsearch/wparser.c
@@ -0,0 +1,549 @@
+/*-------------------------------------------------------------------------
+ *
+ * wparser.c
+ *		Standard interface to word parser
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ *
+ *
+ * IDENTIFICATION
+ *	  src/backend/tsearch/wparser.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "catalog/namespace.h"
+#include "catalog/pg_type.h"
+#include "commands/defrem.h"
+#include "common/jsonapi.h"
+#include "funcapi.h"
+#include "tsearch/ts_cache.h"
+#include "tsearch/ts_utils.h"
+#include "utils/builtins.h"
+#include "utils/jsonfuncs.h"
+#include "utils/varlena.h"
+
+/******sql-level interface******/
+
+typedef struct
+{
+	int			cur;
+	LexDescr   *list;
+} TSTokenTypeStorage;
+
+/* state for ts_headline_json_* */
+typedef struct HeadlineJsonState
+{
+	HeadlineParsedText *prs;
+	TSConfigCacheEntry *cfg;
+	TSParserCacheEntry *prsobj;
+	TSQuery		query;
+	List	   *prsoptions;
+	bool		transformed;
+} HeadlineJsonState;
+
+static text *headline_json_value(void *_state, char *elem_value, int elem_len);
+
+static void
+tt_setup_firstcall(FuncCallContext *funcctx, Oid prsid)
+{
+	TupleDesc	tupdesc;
+	MemoryContext oldcontext;
+	TSTokenTypeStorage *st;
+	TSParserCacheEntry *prs = lookup_ts_parser_cache(prsid);
+
+	if (!OidIsValid(prs->lextypeOid))
+		elog(ERROR, "method lextype isn't defined for text search parser %u",
+			 prsid);
+
+	oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+
+	st = (TSTokenTypeStorage *) palloc(sizeof(TSTokenTypeStorage));
+	st->cur = 0;
+	/* lextype takes one dummy argument */
+	st->list = (LexDescr *) DatumGetPointer(OidFunctionCall1(prs->lextypeOid,
+															 (Datum) 0));
+	funcctx->user_fctx = (void *) st;
+
+	tupdesc = CreateTemplateTupleDesc(3);
+	TupleDescInitEntry(tupdesc, (AttrNumber) 1, "tokid",
+					   INT4OID, -1, 0);
+	TupleDescInitEntry(tupdesc, (AttrNumber) 2, "alias",
+					   TEXTOID, -1, 0);
+	TupleDescInitEntry(tupdesc, (AttrNumber) 3, "description",
+					   TEXTOID, -1, 0);
+
+	funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
+	MemoryContextSwitchTo(oldcontext);
+}
+
+static Datum
+tt_process_call(FuncCallContext *funcctx)
+{
+	TSTokenTypeStorage *st;
+
+	st = (TSTokenTypeStorage *) funcctx->user_fctx;
+	if (st->list && st->list[st->cur].lexid)
+	{
+		Datum		result;
+		char	   *values[3];
+		char		txtid[16];
+		HeapTuple	tuple;
+
+		sprintf(txtid, "%d", st->list[st->cur].lexid);
+		values[0] = txtid;
+		values[1] = st->list[st->cur].alias;
+		values[2] = st->list[st->cur].descr;
+
+		tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
+		result = HeapTupleGetDatum(tuple);
+
+		pfree(values[1]);
+		pfree(values[2]);
+		st->cur++;
+		return result;
+	}
+	return (Datum) 0;
+}
+
+Datum
+ts_token_type_byid(PG_FUNCTION_ARGS)
+{
+	FuncCallContext *funcctx;
+	Datum		result;
+
+	if (SRF_IS_FIRSTCALL())
+	{
+		funcctx = SRF_FIRSTCALL_INIT();
+		tt_setup_firstcall(funcctx, PG_GETARG_OID(0));
+	}
+
+	funcctx = SRF_PERCALL_SETUP();
+
+	if ((result = tt_process_call(funcctx)) != (Datum) 0)
+		SRF_RETURN_NEXT(funcctx, result);
+	SRF_RETURN_DONE(funcctx);
+}
+
+Datum
+ts_token_type_byname(PG_FUNCTION_ARGS)
+{
+	FuncCallContext *funcctx;
+	Datum		result;
+
+	if (SRF_IS_FIRSTCALL())
+	{
+		text	   *prsname = PG_GETARG_TEXT_PP(0);
+		Oid			prsId;
+
+		funcctx = SRF_FIRSTCALL_INIT();
+		prsId = get_ts_parser_oid(textToQualifiedNameList(prsname), false);
+		tt_setup_firstcall(funcctx, prsId);
+	}
+
+	funcctx = SRF_PERCALL_SETUP();
+
+	if ((result = tt_process_call(funcctx)) != (Datum) 0)
+		SRF_RETURN_NEXT(funcctx, result);
+	SRF_RETURN_DONE(funcctx);
+}
+
+typedef struct
+{
+	int			type;
+	char	   *lexeme;
+} LexemeEntry;
+
+typedef struct
+{
+	int			cur;
+	int			len;
+	LexemeEntry *list;
+} PrsStorage;
+
+
+static void
+prs_setup_firstcall(FuncCallContext *funcctx, Oid prsid, text *txt)
+{
+	TupleDesc	tupdesc;
+	MemoryContext oldcontext;
+	PrsStorage *st;
+	TSParserCacheEntry *prs = lookup_ts_parser_cache(prsid);
+	char	   *lex = NULL;
+	int			llen = 0,
+				type = 0;
+	void	   *prsdata;
+
+	oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+
+	st = (PrsStorage *) palloc(sizeof(PrsStorage));
+	st->cur = 0;
+	st->len = 16;
+	st->list = (LexemeEntry *) palloc(sizeof(LexemeEntry) * st->len);
+
+	prsdata = (void *) DatumGetPointer(FunctionCall2(&prs->prsstart,
+													 PointerGetDatum(VARDATA_ANY(txt)),
+													 Int32GetDatum(VARSIZE_ANY_EXHDR(txt))));
+
+	while ((type = DatumGetInt32(FunctionCall3(&prs->prstoken,
+											   PointerGetDatum(prsdata),
+											   PointerGetDatum(&lex),
+											   PointerGetDatum(&llen)))) != 0)
+	{
+		if (st->cur >= st->len)
+		{
+			st->len = 2 * st->len;
+			st->list = (LexemeEntry *) repalloc(st->list, sizeof(LexemeEntry) * st->len);
+		}
+		st->list[st->cur].lexeme = palloc(llen + 1);
+		memcpy(st->list[st->cur].lexeme, lex, llen);
+		st->list[st->cur].lexeme[llen] = '\0';
+		st->list[st->cur].type = type;
+		st->cur++;
+	}
+
+	FunctionCall1(&prs->prsend, PointerGetDatum(prsdata));
+
+	st->len = st->cur;
+	st->cur = 0;
+
+	funcctx->user_fctx = (void *) st;
+	tupdesc = CreateTemplateTupleDesc(2);
+	TupleDescInitEntry(tupdesc, (AttrNumber) 1, "tokid",
+					   INT4OID, -1, 0);
+	TupleDescInitEntry(tupdesc, (AttrNumber) 2, "token",
+					   TEXTOID, -1, 0);
+
+	funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
+	MemoryContextSwitchTo(oldcontext);
+}
+
+static Datum
+prs_process_call(FuncCallContext *funcctx)
+{
+	PrsStorage *st;
+
+	st = (PrsStorage *) funcctx->user_fctx;
+	if (st->cur < st->len)
+	{
+		Datum		result;
+		char	   *values[2];
+		char		tid[16];
+		HeapTuple	tuple;
+
+		values[0] = tid;
+		sprintf(tid, "%d", st->list[st->cur].type);
+		values[1] = st->list[st->cur].lexeme;
+		tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
+		result = HeapTupleGetDatum(tuple);
+
+		pfree(values[1]);
+		st->cur++;
+		return result;
+	}
+	return (Datum) 0;
+}
+
+Datum
+ts_parse_byid(PG_FUNCTION_ARGS)
+{
+	FuncCallContext *funcctx;
+	Datum		result;
+
+	if (SRF_IS_FIRSTCALL())
+	{
+		text	   *txt = PG_GETARG_TEXT_PP(1);
+
+		funcctx = SRF_FIRSTCALL_INIT();
+		prs_setup_firstcall(funcctx, PG_GETARG_OID(0), txt);
+		PG_FREE_IF_COPY(txt, 1);
+	}
+
+	funcctx = SRF_PERCALL_SETUP();
+
+	if ((result = prs_process_call(funcctx)) != (Datum) 0)
+		SRF_RETURN_NEXT(funcctx, result);
+	SRF_RETURN_DONE(funcctx);
+}
+
+Datum
+ts_parse_byname(PG_FUNCTION_ARGS)
+{
+	FuncCallContext *funcctx;
+	Datum		result;
+
+	if (SRF_IS_FIRSTCALL())
+	{
+		text	   *prsname = PG_GETARG_TEXT_PP(0);
+		text	   *txt = PG_GETARG_TEXT_PP(1);
+		Oid			prsId;
+
+		funcctx = SRF_FIRSTCALL_INIT();
+		prsId = get_ts_parser_oid(textToQualifiedNameList(prsname), false);
+		prs_setup_firstcall(funcctx, prsId, txt);
+	}
+
+	funcctx = SRF_PERCALL_SETUP();
+
+	if ((result = prs_process_call(funcctx)) != (Datum) 0)
+		SRF_RETURN_NEXT(funcctx, result);
+	SRF_RETURN_DONE(funcctx);
+}
+
+Datum
+ts_headline_byid_opt(PG_FUNCTION_ARGS)
+{
+	Oid			tsconfig = PG_GETARG_OID(0);
+	text	   *in = PG_GETARG_TEXT_PP(1);
+	TSQuery		query = PG_GETARG_TSQUERY(2);
+	text	   *opt = (PG_NARGS() > 3 && PG_GETARG_POINTER(3)) ? PG_GETARG_TEXT_PP(3) : NULL;
+	HeadlineParsedText prs;
+	List	   *prsoptions;
+	text	   *out;
+	TSConfigCacheEntry *cfg;
+	TSParserCacheEntry *prsobj;
+
+	cfg = lookup_ts_config_cache(tsconfig);
+	prsobj = lookup_ts_parser_cache(cfg->prsId);
+
+	if (!OidIsValid(prsobj->headlineOid))
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("text search parser does not support headline creation")));
+
+	memset(&prs, 0, sizeof(HeadlineParsedText));
+	prs.lenwords = 32;
+	prs.words = (HeadlineWordEntry *) palloc(sizeof(HeadlineWordEntry) * prs.lenwords);
+
+	hlparsetext(cfg->cfgId, &prs, query,
+				VARDATA_ANY(in), VARSIZE_ANY_EXHDR(in));
+
+	if (opt)
+		prsoptions = deserialize_deflist(PointerGetDatum(opt));
+	else
+		prsoptions = NIL;
+
+	FunctionCall3(&(prsobj->prsheadline),
+				  PointerGetDatum(&prs),
+				  PointerGetDatum(prsoptions),
+				  PointerGetDatum(query));
+
+	out = generateHeadline(&prs);
+
+	PG_FREE_IF_COPY(in, 1);
+	PG_FREE_IF_COPY(query, 2);
+	if (opt)
+		PG_FREE_IF_COPY(opt, 3);
+	pfree(prs.words);
+	pfree(prs.startsel);
+	pfree(prs.stopsel);
+
+	PG_RETURN_POINTER(out);
+}
+
+Datum
+ts_headline_byid(PG_FUNCTION_ARGS)
+{
+	PG_RETURN_DATUM(DirectFunctionCall3(ts_headline_byid_opt,
+										PG_GETARG_DATUM(0),
+										PG_GETARG_DATUM(1),
+										PG_GETARG_DATUM(2)));
+}
+
+Datum
+ts_headline(PG_FUNCTION_ARGS)
+{
+	PG_RETURN_DATUM(DirectFunctionCall3(ts_headline_byid_opt,
+										ObjectIdGetDatum(getTSCurrentConfig(true)),
+										PG_GETARG_DATUM(0),
+										PG_GETARG_DATUM(1)));
+}
+
+Datum
+ts_headline_opt(PG_FUNCTION_ARGS)
+{
+	PG_RETURN_DATUM(DirectFunctionCall4(ts_headline_byid_opt,
+										ObjectIdGetDatum(getTSCurrentConfig(true)),
+										PG_GETARG_DATUM(0),
+										PG_GETARG_DATUM(1),
+										PG_GETARG_DATUM(2)));
+}
+
+Datum
+ts_headline_jsonb_byid_opt(PG_FUNCTION_ARGS)
+{
+	Oid			tsconfig = PG_GETARG_OID(0);
+	Jsonb	   *jb = PG_GETARG_JSONB_P(1);
+	TSQuery		query = PG_GETARG_TSQUERY(2);
+	text	   *opt = (PG_NARGS() > 3 && PG_GETARG_POINTER(3)) ? PG_GETARG_TEXT_P(3) : NULL;
+	Jsonb	   *out;
+	JsonTransformStringValuesAction action = (JsonTransformStringValuesAction) headline_json_value;
+	HeadlineParsedText prs;
+	HeadlineJsonState *state = palloc0(sizeof(HeadlineJsonState));
+
+	memset(&prs, 0, sizeof(HeadlineParsedText));
+	prs.lenwords = 32;
+	prs.words = (HeadlineWordEntry *) palloc(sizeof(HeadlineWordEntry) * prs.lenwords);
+
+	state->prs = &prs;
+	state->cfg = lookup_ts_config_cache(tsconfig);
+	state->prsobj = lookup_ts_parser_cache(state->cfg->prsId);
+	state->query = query;
+	if (opt)
+		state->prsoptions = deserialize_deflist(PointerGetDatum(opt));
+	else
+		state->prsoptions = NIL;
+
+	if (!OidIsValid(state->prsobj->headlineOid))
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("text search parser does not support headline creation")));
+
+	out = transform_jsonb_string_values(jb, state, action);
+
+	PG_FREE_IF_COPY(jb, 1);
+	PG_FREE_IF_COPY(query, 2);
+	if (opt)
+		PG_FREE_IF_COPY(opt, 3);
+
+	pfree(prs.words);
+
+	if (state->transformed)
+	{
+		pfree(prs.startsel);
+		pfree(prs.stopsel);
+	}
+
+	PG_RETURN_JSONB_P(out);
+}
+
+Datum
+ts_headline_jsonb(PG_FUNCTION_ARGS)
+{
+	PG_RETURN_DATUM(DirectFunctionCall3(ts_headline_jsonb_byid_opt,
+										ObjectIdGetDatum(getTSCurrentConfig(true)),
+										PG_GETARG_DATUM(0),
+										PG_GETARG_DATUM(1)));
+}
+
+Datum
+ts_headline_jsonb_byid(PG_FUNCTION_ARGS)
+{
+	PG_RETURN_DATUM(DirectFunctionCall3(ts_headline_jsonb_byid_opt,
+										PG_GETARG_DATUM(0),
+										PG_GETARG_DATUM(1),
+										PG_GETARG_DATUM(2)));
+}
+
+Datum
+ts_headline_jsonb_opt(PG_FUNCTION_ARGS)
+{
+	PG_RETURN_DATUM(DirectFunctionCall4(ts_headline_jsonb_byid_opt,
+										ObjectIdGetDatum(getTSCurrentConfig(true)),
+										PG_GETARG_DATUM(0),
+										PG_GETARG_DATUM(1),
+										PG_GETARG_DATUM(2)));
+}
+
+Datum
+ts_headline_json_byid_opt(PG_FUNCTION_ARGS)
+{
+	Oid			tsconfig = PG_GETARG_OID(0);
+	text	   *json = PG_GETARG_TEXT_P(1);
+	TSQuery		query = PG_GETARG_TSQUERY(2);
+	text	   *opt = (PG_NARGS() > 3 && PG_GETARG_POINTER(3)) ? PG_GETARG_TEXT_P(3) : NULL;
+	text	   *out;
+	JsonTransformStringValuesAction action = (JsonTransformStringValuesAction) headline_json_value;
+
+	HeadlineParsedText prs;
+	HeadlineJsonState *state = palloc0(sizeof(HeadlineJsonState));
+
+	memset(&prs, 0, sizeof(HeadlineParsedText));
+	prs.lenwords = 32;
+	prs.words = (HeadlineWordEntry *) palloc(sizeof(HeadlineWordEntry) * prs.lenwords);
+
+	state->prs = &prs;
+	state->cfg = lookup_ts_config_cache(tsconfig);
+	state->prsobj = lookup_ts_parser_cache(state->cfg->prsId);
+	state->query = query;
+	if (opt)
+		state->prsoptions = deserialize_deflist(PointerGetDatum(opt));
+	else
+		state->prsoptions = NIL;
+
+	if (!OidIsValid(state->prsobj->headlineOid))
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("text search parser does not support headline creation")));
+
+	out = transform_json_string_values(json, state, action);
+
+	PG_FREE_IF_COPY(json, 1);
+	PG_FREE_IF_COPY(query, 2);
+	if (opt)
+		PG_FREE_IF_COPY(opt, 3);
+	pfree(prs.words);
+
+	if (state->transformed)
+	{
+		pfree(prs.startsel);
+		pfree(prs.stopsel);
+	}
+
+	PG_RETURN_TEXT_P(out);
+}
+
+Datum
+ts_headline_json(PG_FUNCTION_ARGS)
+{
+	PG_RETURN_DATUM(DirectFunctionCall3(ts_headline_json_byid_opt,
+										ObjectIdGetDatum(getTSCurrentConfig(true)),
+										PG_GETARG_DATUM(0),
+										PG_GETARG_DATUM(1)));
+}
+
+Datum
+ts_headline_json_byid(PG_FUNCTION_ARGS)
+{
+	PG_RETURN_DATUM(DirectFunctionCall3(ts_headline_json_byid_opt,
+										PG_GETARG_DATUM(0),
+										PG_GETARG_DATUM(1),
+										PG_GETARG_DATUM(2)));
+}
+
+Datum
+ts_headline_json_opt(PG_FUNCTION_ARGS)
+{
+	PG_RETURN_DATUM(DirectFunctionCall4(ts_headline_json_byid_opt,
+										ObjectIdGetDatum(getTSCurrentConfig(true)),
+										PG_GETARG_DATUM(0),
+										PG_GETARG_DATUM(1),
+										PG_GETARG_DATUM(2)));
+}
+
+
+/*
+ * Return headline in text from, generated from a json(b) element
+ */
+static text *
+headline_json_value(void *_state, char *elem_value, int elem_len)
+{
+	HeadlineJsonState *state = (HeadlineJsonState *) _state;
+
+	HeadlineParsedText *prs = state->prs;
+	TSConfigCacheEntry *cfg = state->cfg;
+	TSParserCacheEntry *prsobj = state->prsobj;
+	TSQuery		query = state->query;
+	List	   *prsoptions = state->prsoptions;
+
+	prs->curwords = 0;
+	hlparsetext(cfg->cfgId, prs, query, elem_value, elem_len);
+	FunctionCall3(&(prsobj->prsheadline),
+				  PointerGetDatum(prs),
+				  PointerGetDatum(prsoptions),
+				  PointerGetDatum(query));
+
+	state->transformed = true;
+	return generateHeadline(prs);
+}
diff --git a/src/backend/tsearch/wparser_def.c b/src/backend/tsearch/wparser_def.c
new file mode 100644
index 0000000..916db5a
--- /dev/null
+++ b/src/backend/tsearch/wparser_def.c
@@ -0,0 +1,2648 @@
+/*-------------------------------------------------------------------------
+ *
+ * wparser_def.c
+ *		Default text search parser
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ *
+ *
+ * IDENTIFICATION
+ *	  src/backend/tsearch/wparser_def.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include <limits.h>
+
+#include "catalog/pg_collation.h"
+#include "commands/defrem.h"
+#include "miscadmin.h"
+#include "tsearch/ts_locale.h"
+#include "tsearch/ts_public.h"
+#include "tsearch/ts_type.h"
+#include "tsearch/ts_utils.h"
+#include "utils/builtins.h"
+
+
+/* Define me to enable tracing of parser behavior */
+/* #define WPARSER_TRACE */
+
+
+/* Output token categories */
+
+#define ASCIIWORD		1
+#define WORD_T			2
+#define NUMWORD			3
+#define EMAIL			4
+#define URL_T			5
+#define HOST			6
+#define SCIENTIFIC		7
+#define VERSIONNUMBER	8
+#define NUMPARTHWORD	9
+#define PARTHWORD		10
+#define ASCIIPARTHWORD	11
+#define SPACE			12
+#define TAG_T			13
+#define PROTOCOL		14
+#define NUMHWORD		15
+#define ASCIIHWORD		16
+#define HWORD			17
+#define URLPATH			18
+#define FILEPATH		19
+#define DECIMAL_T		20
+#define SIGNEDINT		21
+#define UNSIGNEDINT		22
+#define XMLENTITY		23
+
+#define LASTNUM			23
+
+static const char *const tok_alias[] = {
+	"",
+	"asciiword",
+	"word",
+	"numword",
+	"email",
+	"url",
+	"host",
+	"sfloat",
+	"version",
+	"hword_numpart",
+	"hword_part",
+	"hword_asciipart",
+	"blank",
+	"tag",
+	"protocol",
+	"numhword",
+	"asciihword",
+	"hword",
+	"url_path",
+	"file",
+	"float",
+	"int",
+	"uint",
+	"entity"
+};
+
+static const char *const lex_descr[] = {
+	"",
+	"Word, all ASCII",
+	"Word, all letters",
+	"Word, letters and digits",
+	"Email address",
+	"URL",
+	"Host",
+	"Scientific notation",
+	"Version number",
+	"Hyphenated word part, letters and digits",
+	"Hyphenated word part, all letters",
+	"Hyphenated word part, all ASCII",
+	"Space symbols",
+	"XML tag",
+	"Protocol head",
+	"Hyphenated word, letters and digits",
+	"Hyphenated word, all ASCII",
+	"Hyphenated word, all letters",
+	"URL path",
+	"File or path name",
+	"Decimal notation",
+	"Signed integer",
+	"Unsigned integer",
+	"XML entity"
+};
+
+
+/* Parser states */
+
+typedef enum
+{
+	TPS_Base = 0,
+	TPS_InNumWord,
+	TPS_InAsciiWord,
+	TPS_InWord,
+	TPS_InUnsignedInt,
+	TPS_InSignedIntFirst,
+	TPS_InSignedInt,
+	TPS_InSpace,
+	TPS_InUDecimalFirst,
+	TPS_InUDecimal,
+	TPS_InDecimalFirst,
+	TPS_InDecimal,
+	TPS_InVerVersion,
+	TPS_InSVerVersion,
+	TPS_InVersionFirst,
+	TPS_InVersion,
+	TPS_InMantissaFirst,
+	TPS_InMantissaSign,
+	TPS_InMantissa,
+	TPS_InXMLEntityFirst,
+	TPS_InXMLEntity,
+	TPS_InXMLEntityNumFirst,
+	TPS_InXMLEntityNum,
+	TPS_InXMLEntityHexNumFirst,
+	TPS_InXMLEntityHexNum,
+	TPS_InXMLEntityEnd,
+	TPS_InTagFirst,
+	TPS_InXMLBegin,
+	TPS_InTagCloseFirst,
+	TPS_InTagName,
+	TPS_InTagBeginEnd,
+	TPS_InTag,
+	TPS_InTagEscapeK,
+	TPS_InTagEscapeKK,
+	TPS_InTagBackSleshed,
+	TPS_InTagEnd,
+	TPS_InCommentFirst,
+	TPS_InCommentLast,
+	TPS_InComment,
+	TPS_InCloseCommentFirst,
+	TPS_InCloseCommentLast,
+	TPS_InCommentEnd,
+	TPS_InHostFirstDomain,
+	TPS_InHostDomainSecond,
+	TPS_InHostDomain,
+	TPS_InPortFirst,
+	TPS_InPort,
+	TPS_InHostFirstAN,
+	TPS_InHost,
+	TPS_InEmail,
+	TPS_InFileFirst,
+	TPS_InFileTwiddle,
+	TPS_InPathFirst,
+	TPS_InPathFirstFirst,
+	TPS_InPathSecond,
+	TPS_InFile,
+	TPS_InFileNext,
+	TPS_InURLPathFirst,
+	TPS_InURLPathStart,
+	TPS_InURLPath,
+	TPS_InFURL,
+	TPS_InProtocolFirst,
+	TPS_InProtocolSecond,
+	TPS_InProtocolEnd,
+	TPS_InHyphenAsciiWordFirst,
+	TPS_InHyphenAsciiWord,
+	TPS_InHyphenWordFirst,
+	TPS_InHyphenWord,
+	TPS_InHyphenNumWordFirst,
+	TPS_InHyphenNumWord,
+	TPS_InHyphenDigitLookahead,
+	TPS_InParseHyphen,
+	TPS_InParseHyphenHyphen,
+	TPS_InHyphenWordPart,
+	TPS_InHyphenAsciiWordPart,
+	TPS_InHyphenNumWordPart,
+	TPS_InHyphenUnsignedInt,
+	TPS_Null					/* last state (fake value) */
+} TParserState;
+
+/* forward declaration */
+struct TParser;
+
+typedef int (*TParserCharTest) (struct TParser *);	/* any p_is* functions
+													 * except p_iseq */
+typedef void (*TParserSpecial) (struct TParser *);	/* special handler for
+													 * special cases... */
+
+typedef struct
+{
+	TParserCharTest isclass;
+	char		c;
+	uint16		flags;
+	TParserState tostate;
+	int			type;
+	TParserSpecial special;
+} TParserStateActionItem;
+
+/* Flag bits in TParserStateActionItem.flags */
+#define A_NEXT		0x0000
+#define A_BINGO		0x0001
+#define A_POP		0x0002
+#define A_PUSH		0x0004
+#define A_RERUN		0x0008
+#define A_CLEAR		0x0010
+#define A_MERGE		0x0020
+#define A_CLRALL	0x0040
+
+typedef struct TParserPosition
+{
+	int			posbyte;		/* position of parser in bytes */
+	int			poschar;		/* position of parser in characters */
+	int			charlen;		/* length of current char */
+	int			lenbytetoken;	/* length of token-so-far in bytes */
+	int			lenchartoken;	/* and in chars */
+	TParserState state;
+	struct TParserPosition *prev;
+	const TParserStateActionItem *pushedAtAction;
+} TParserPosition;
+
+typedef struct TParser
+{
+	/* string and position information */
+	char	   *str;			/* multibyte string */
+	int			lenstr;			/* length of mbstring */
+	wchar_t    *wstr;			/* wide character string */
+	pg_wchar   *pgwstr;			/* wide character string for C-locale */
+	bool		usewide;
+
+	/* State of parse */
+	int			charmaxlen;
+	TParserPosition *state;
+	bool		ignore;
+	bool		wanthost;
+
+	/* silly char */
+	char		c;
+
+	/* out */
+	char	   *token;
+	int			lenbytetoken;
+	int			lenchartoken;
+	int			type;
+} TParser;
+
+
+/* forward decls here */
+static bool TParserGet(TParser *prs);
+
+
+static TParserPosition *
+newTParserPosition(TParserPosition *prev)
+{
+	TParserPosition *res = (TParserPosition *) palloc(sizeof(TParserPosition));
+
+	if (prev)
+		memcpy(res, prev, sizeof(TParserPosition));
+	else
+		memset(res, 0, sizeof(TParserPosition));
+
+	res->prev = prev;
+
+	res->pushedAtAction = NULL;
+
+	return res;
+}
+
+static TParser *
+TParserInit(char *str, int len)
+{
+	TParser    *prs = (TParser *) palloc0(sizeof(TParser));
+
+	prs->charmaxlen = pg_database_encoding_max_length();
+	prs->str = str;
+	prs->lenstr = len;
+
+	/*
+	 * Use wide char code only when max encoding length > 1.
+	 */
+	if (prs->charmaxlen > 1)
+	{
+		pg_locale_t mylocale = 0;	/* TODO */
+
+		prs->usewide = true;
+		if (database_ctype_is_c)
+		{
+			/*
+			 * char2wchar doesn't work for C-locale and sizeof(pg_wchar) could
+			 * be different from sizeof(wchar_t)
+			 */
+			prs->pgwstr = (pg_wchar *) palloc(sizeof(pg_wchar) * (prs->lenstr + 1));
+			pg_mb2wchar_with_len(prs->str, prs->pgwstr, prs->lenstr);
+		}
+		else
+		{
+			prs->wstr = (wchar_t *) palloc(sizeof(wchar_t) * (prs->lenstr + 1));
+			char2wchar(prs->wstr, prs->lenstr + 1, prs->str, prs->lenstr,
+					   mylocale);
+		}
+	}
+	else
+		prs->usewide = false;
+
+	prs->state = newTParserPosition(NULL);
+	prs->state->state = TPS_Base;
+
+#ifdef WPARSER_TRACE
+	fprintf(stderr, "parsing \"%.*s\"\n", len, str);
+#endif
+
+	return prs;
+}
+
+/*
+ * As an alternative to a full TParserInit one can create a
+ * TParserCopy which basically is a regular TParser without a private
+ * copy of the string - instead it uses the one from another TParser.
+ * This is useful because at some places TParsers are created
+ * recursively and the repeated copying around of the strings can
+ * cause major inefficiency if the source string is long.
+ * The new parser starts parsing at the original's current position.
+ *
+ * Obviously one must not close the original TParser before the copy.
+ */
+static TParser *
+TParserCopyInit(const TParser *orig)
+{
+	TParser    *prs = (TParser *) palloc0(sizeof(TParser));
+
+	prs->charmaxlen = orig->charmaxlen;
+	prs->str = orig->str + orig->state->posbyte;
+	prs->lenstr = orig->lenstr - orig->state->posbyte;
+	prs->usewide = orig->usewide;
+
+	if (orig->pgwstr)
+		prs->pgwstr = orig->pgwstr + orig->state->poschar;
+	if (orig->wstr)
+		prs->wstr = orig->wstr + orig->state->poschar;
+
+	prs->state = newTParserPosition(NULL);
+	prs->state->state = TPS_Base;
+
+#ifdef WPARSER_TRACE
+	fprintf(stderr, "parsing copy of \"%.*s\"\n", prs->lenstr, prs->str);
+#endif
+
+	return prs;
+}
+
+
+static void
+TParserClose(TParser *prs)
+{
+	while (prs->state)
+	{
+		TParserPosition *ptr = prs->state->prev;
+
+		pfree(prs->state);
+		prs->state = ptr;
+	}
+
+	if (prs->wstr)
+		pfree(prs->wstr);
+	if (prs->pgwstr)
+		pfree(prs->pgwstr);
+
+#ifdef WPARSER_TRACE
+	fprintf(stderr, "closing parser\n");
+#endif
+	pfree(prs);
+}
+
+/*
+ * Close a parser created with TParserCopyInit
+ */
+static void
+TParserCopyClose(TParser *prs)
+{
+	while (prs->state)
+	{
+		TParserPosition *ptr = prs->state->prev;
+
+		pfree(prs->state);
+		prs->state = ptr;
+	}
+
+#ifdef WPARSER_TRACE
+	fprintf(stderr, "closing parser copy\n");
+#endif
+	pfree(prs);
+}
+
+
+/*
+ * Character-type support functions, equivalent to is* macros, but
+ * working with any possible encodings and locales. Notes:
+ *	- with multibyte encoding and C-locale isw* function may fail
+ *	  or give wrong result.
+ *	- multibyte encoding and C-locale often are used for
+ *	  Asian languages.
+ *	- if locale is C then we use pgwstr instead of wstr.
+ */
+
+#define p_iswhat(type, nonascii)											\
+																			\
+static int																	\
+p_is##type(TParser *prs)													\
+{																			\
+	Assert(prs->state);														\
+	if (prs->usewide)														\
+	{																		\
+		if (prs->pgwstr)													\
+		{																	\
+			unsigned int c = *(prs->pgwstr + prs->state->poschar);			\
+			if (c > 0x7f)													\
+				return nonascii;											\
+			return is##type(c);												\
+		}																	\
+		return isw##type(*(prs->wstr + prs->state->poschar));				\
+	}																		\
+	return is##type(*(unsigned char *) (prs->str + prs->state->posbyte));	\
+}																			\
+																			\
+static int																	\
+p_isnot##type(TParser *prs)													\
+{																			\
+	return !p_is##type(prs);												\
+}
+
+/*
+ * In C locale with a multibyte encoding, any non-ASCII symbol is considered
+ * an alpha character, but not a member of other char classes.
+ */
+p_iswhat(alnum, 1)
+p_iswhat(alpha, 1)
+p_iswhat(digit, 0)
+p_iswhat(lower, 0)
+p_iswhat(print, 0)
+p_iswhat(punct, 0)
+p_iswhat(space, 0)
+p_iswhat(upper, 0)
+p_iswhat(xdigit, 0)
+
+/* p_iseq should be used only for ascii symbols */
+
+static int
+p_iseq(TParser *prs, char c)
+{
+	Assert(prs->state);
+	return ((prs->state->charlen == 1 && *(prs->str + prs->state->posbyte) == c)) ? 1 : 0;
+}
+
+static int
+p_isEOF(TParser *prs)
+{
+	Assert(prs->state);
+	return (prs->state->posbyte == prs->lenstr || prs->state->charlen == 0) ? 1 : 0;
+}
+
+static int
+p_iseqC(TParser *prs)
+{
+	return p_iseq(prs, prs->c);
+}
+
+static int
+p_isneC(TParser *prs)
+{
+	return !p_iseq(prs, prs->c);
+}
+
+static int
+p_isascii(TParser *prs)
+{
+	return (prs->state->charlen == 1 && isascii((unsigned char) *(prs->str + prs->state->posbyte))) ? 1 : 0;
+}
+
+static int
+p_isasclet(TParser *prs)
+{
+	return (p_isascii(prs) && p_isalpha(prs)) ? 1 : 0;
+}
+
+static int
+p_isurlchar(TParser *prs)
+{
+	char		ch;
+
+	/* no non-ASCII need apply */
+	if (prs->state->charlen != 1)
+		return 0;
+	ch = *(prs->str + prs->state->posbyte);
+	/* no spaces or control characters */
+	if (ch <= 0x20 || ch >= 0x7F)
+		return 0;
+	/* reject characters disallowed by RFC 3986 */
+	switch (ch)
+	{
+		case '"':
+		case '<':
+		case '>':
+		case '\\':
+		case '^':
+		case '`':
+		case '{':
+		case '|':
+		case '}':
+			return 0;
+	}
+	return 1;
+}
+
+
+/* deliberately suppress unused-function complaints for the above */
+void		_make_compiler_happy(void);
+void
+_make_compiler_happy(void)
+{
+	p_isalnum(NULL);
+	p_isnotalnum(NULL);
+	p_isalpha(NULL);
+	p_isnotalpha(NULL);
+	p_isdigit(NULL);
+	p_isnotdigit(NULL);
+	p_islower(NULL);
+	p_isnotlower(NULL);
+	p_isprint(NULL);
+	p_isnotprint(NULL);
+	p_ispunct(NULL);
+	p_isnotpunct(NULL);
+	p_isspace(NULL);
+	p_isnotspace(NULL);
+	p_isupper(NULL);
+	p_isnotupper(NULL);
+	p_isxdigit(NULL);
+	p_isnotxdigit(NULL);
+	p_isEOF(NULL);
+	p_iseqC(NULL);
+	p_isneC(NULL);
+}
+
+
+static void
+SpecialTags(TParser *prs)
+{
+	switch (prs->state->lenchartoken)
+	{
+		case 8:					/* </script */
+			if (pg_strncasecmp(prs->token, "</script", 8) == 0)
+				prs->ignore = false;
+			break;
+		case 7:					/* <script || </style */
+			if (pg_strncasecmp(prs->token, "</style", 7) == 0)
+				prs->ignore = false;
+			else if (pg_strncasecmp(prs->token, "<script", 7) == 0)
+				prs->ignore = true;
+			break;
+		case 6:					/* <style */
+			if (pg_strncasecmp(prs->token, "<style", 6) == 0)
+				prs->ignore = true;
+			break;
+		default:
+			break;
+	}
+}
+
+static void
+SpecialFURL(TParser *prs)
+{
+	prs->wanthost = true;
+	prs->state->posbyte -= prs->state->lenbytetoken;
+	prs->state->poschar -= prs->state->lenchartoken;
+}
+
+static void
+SpecialHyphen(TParser *prs)
+{
+	prs->state->posbyte -= prs->state->lenbytetoken;
+	prs->state->poschar -= prs->state->lenchartoken;
+}
+
+static void
+SpecialVerVersion(TParser *prs)
+{
+	prs->state->posbyte -= prs->state->lenbytetoken;
+	prs->state->poschar -= prs->state->lenchartoken;
+	prs->state->lenbytetoken = 0;
+	prs->state->lenchartoken = 0;
+}
+
+static int
+p_isstophost(TParser *prs)
+{
+	if (prs->wanthost)
+	{
+		prs->wanthost = false;
+		return 1;
+	}
+	return 0;
+}
+
+static int
+p_isignore(TParser *prs)
+{
+	return (prs->ignore) ? 1 : 0;
+}
+
+static int
+p_ishost(TParser *prs)
+{
+	TParser    *tmpprs = TParserCopyInit(prs);
+	int			res = 0;
+
+	tmpprs->wanthost = true;
+
+	/*
+	 * Check stack depth before recursing.  (Since TParserGet() doesn't
+	 * normally recurse, we put the cost of checking here not there.)
+	 */
+	check_stack_depth();
+
+	if (TParserGet(tmpprs) && tmpprs->type == HOST)
+	{
+		prs->state->posbyte += tmpprs->lenbytetoken;
+		prs->state->poschar += tmpprs->lenchartoken;
+		prs->state->lenbytetoken += tmpprs->lenbytetoken;
+		prs->state->lenchartoken += tmpprs->lenchartoken;
+		prs->state->charlen = tmpprs->state->charlen;
+		res = 1;
+	}
+	TParserCopyClose(tmpprs);
+
+	return res;
+}
+
+static int
+p_isURLPath(TParser *prs)
+{
+	TParser    *tmpprs = TParserCopyInit(prs);
+	int			res = 0;
+
+	tmpprs->state = newTParserPosition(tmpprs->state);
+	tmpprs->state->state = TPS_InURLPathFirst;
+
+	/*
+	 * Check stack depth before recursing.  (Since TParserGet() doesn't
+	 * normally recurse, we put the cost of checking here not there.)
+	 */
+	check_stack_depth();
+
+	if (TParserGet(tmpprs) && tmpprs->type == URLPATH)
+	{
+		prs->state->posbyte += tmpprs->lenbytetoken;
+		prs->state->poschar += tmpprs->lenchartoken;
+		prs->state->lenbytetoken += tmpprs->lenbytetoken;
+		prs->state->lenchartoken += tmpprs->lenchartoken;
+		prs->state->charlen = tmpprs->state->charlen;
+		res = 1;
+	}
+	TParserCopyClose(tmpprs);
+
+	return res;
+}
+
+/*
+ * returns true if current character has zero display length or
+ * it's a special sign in several languages. Such characters
+ * aren't a word-breaker although they aren't an isalpha.
+ * In beginning of word they aren't a part of it.
+ */
+static int
+p_isspecial(TParser *prs)
+{
+	/*
+	 * pg_dsplen could return -1 which means error or control character
+	 */
+	if (pg_dsplen(prs->str + prs->state->posbyte) == 0)
+		return 1;
+
+	/*
+	 * Unicode Characters in the 'Mark, Spacing Combining' Category That
+	 * characters are not alpha although they are not breakers of word too.
+	 * Check that only in utf encoding, because other encodings aren't
+	 * supported by postgres or even exists.
+	 */
+	if (GetDatabaseEncoding() == PG_UTF8 && prs->usewide)
+	{
+		static const pg_wchar strange_letter[] = {
+			/*
+			 * use binary search, so elements should be ordered
+			 */
+			0x0903,				/* DEVANAGARI SIGN VISARGA */
+			0x093E,				/* DEVANAGARI VOWEL SIGN AA */
+			0x093F,				/* DEVANAGARI VOWEL SIGN I */
+			0x0940,				/* DEVANAGARI VOWEL SIGN II */
+			0x0949,				/* DEVANAGARI VOWEL SIGN CANDRA O */
+			0x094A,				/* DEVANAGARI VOWEL SIGN SHORT O */
+			0x094B,				/* DEVANAGARI VOWEL SIGN O */
+			0x094C,				/* DEVANAGARI VOWEL SIGN AU */
+			0x0982,				/* BENGALI SIGN ANUSVARA */
+			0x0983,				/* BENGALI SIGN VISARGA */
+			0x09BE,				/* BENGALI VOWEL SIGN AA */
+			0x09BF,				/* BENGALI VOWEL SIGN I */
+			0x09C0,				/* BENGALI VOWEL SIGN II */
+			0x09C7,				/* BENGALI VOWEL SIGN E */
+			0x09C8,				/* BENGALI VOWEL SIGN AI */
+			0x09CB,				/* BENGALI VOWEL SIGN O */
+			0x09CC,				/* BENGALI VOWEL SIGN AU */
+			0x09D7,				/* BENGALI AU LENGTH MARK */
+			0x0A03,				/* GURMUKHI SIGN VISARGA */
+			0x0A3E,				/* GURMUKHI VOWEL SIGN AA */
+			0x0A3F,				/* GURMUKHI VOWEL SIGN I */
+			0x0A40,				/* GURMUKHI VOWEL SIGN II */
+			0x0A83,				/* GUJARATI SIGN VISARGA */
+			0x0ABE,				/* GUJARATI VOWEL SIGN AA */
+			0x0ABF,				/* GUJARATI VOWEL SIGN I */
+			0x0AC0,				/* GUJARATI VOWEL SIGN II */
+			0x0AC9,				/* GUJARATI VOWEL SIGN CANDRA O */
+			0x0ACB,				/* GUJARATI VOWEL SIGN O */
+			0x0ACC,				/* GUJARATI VOWEL SIGN AU */
+			0x0B02,				/* ORIYA SIGN ANUSVARA */
+			0x0B03,				/* ORIYA SIGN VISARGA */
+			0x0B3E,				/* ORIYA VOWEL SIGN AA */
+			0x0B40,				/* ORIYA VOWEL SIGN II */
+			0x0B47,				/* ORIYA VOWEL SIGN E */
+			0x0B48,				/* ORIYA VOWEL SIGN AI */
+			0x0B4B,				/* ORIYA VOWEL SIGN O */
+			0x0B4C,				/* ORIYA VOWEL SIGN AU */
+			0x0B57,				/* ORIYA AU LENGTH MARK */
+			0x0BBE,				/* TAMIL VOWEL SIGN AA */
+			0x0BBF,				/* TAMIL VOWEL SIGN I */
+			0x0BC1,				/* TAMIL VOWEL SIGN U */
+			0x0BC2,				/* TAMIL VOWEL SIGN UU */
+			0x0BC6,				/* TAMIL VOWEL SIGN E */
+			0x0BC7,				/* TAMIL VOWEL SIGN EE */
+			0x0BC8,				/* TAMIL VOWEL SIGN AI */
+			0x0BCA,				/* TAMIL VOWEL SIGN O */
+			0x0BCB,				/* TAMIL VOWEL SIGN OO */
+			0x0BCC,				/* TAMIL VOWEL SIGN AU */
+			0x0BD7,				/* TAMIL AU LENGTH MARK */
+			0x0C01,				/* TELUGU SIGN CANDRABINDU */
+			0x0C02,				/* TELUGU SIGN ANUSVARA */
+			0x0C03,				/* TELUGU SIGN VISARGA */
+			0x0C41,				/* TELUGU VOWEL SIGN U */
+			0x0C42,				/* TELUGU VOWEL SIGN UU */
+			0x0C43,				/* TELUGU VOWEL SIGN VOCALIC R */
+			0x0C44,				/* TELUGU VOWEL SIGN VOCALIC RR */
+			0x0C82,				/* KANNADA SIGN ANUSVARA */
+			0x0C83,				/* KANNADA SIGN VISARGA */
+			0x0CBE,				/* KANNADA VOWEL SIGN AA */
+			0x0CC0,				/* KANNADA VOWEL SIGN II */
+			0x0CC1,				/* KANNADA VOWEL SIGN U */
+			0x0CC2,				/* KANNADA VOWEL SIGN UU */
+			0x0CC3,				/* KANNADA VOWEL SIGN VOCALIC R */
+			0x0CC4,				/* KANNADA VOWEL SIGN VOCALIC RR */
+			0x0CC7,				/* KANNADA VOWEL SIGN EE */
+			0x0CC8,				/* KANNADA VOWEL SIGN AI */
+			0x0CCA,				/* KANNADA VOWEL SIGN O */
+			0x0CCB,				/* KANNADA VOWEL SIGN OO */
+			0x0CD5,				/* KANNADA LENGTH MARK */
+			0x0CD6,				/* KANNADA AI LENGTH MARK */
+			0x0D02,				/* MALAYALAM SIGN ANUSVARA */
+			0x0D03,				/* MALAYALAM SIGN VISARGA */
+			0x0D3E,				/* MALAYALAM VOWEL SIGN AA */
+			0x0D3F,				/* MALAYALAM VOWEL SIGN I */
+			0x0D40,				/* MALAYALAM VOWEL SIGN II */
+			0x0D46,				/* MALAYALAM VOWEL SIGN E */
+			0x0D47,				/* MALAYALAM VOWEL SIGN EE */
+			0x0D48,				/* MALAYALAM VOWEL SIGN AI */
+			0x0D4A,				/* MALAYALAM VOWEL SIGN O */
+			0x0D4B,				/* MALAYALAM VOWEL SIGN OO */
+			0x0D4C,				/* MALAYALAM VOWEL SIGN AU */
+			0x0D57,				/* MALAYALAM AU LENGTH MARK */
+			0x0D82,				/* SINHALA SIGN ANUSVARAYA */
+			0x0D83,				/* SINHALA SIGN VISARGAYA */
+			0x0DCF,				/* SINHALA VOWEL SIGN AELA-PILLA */
+			0x0DD0,				/* SINHALA VOWEL SIGN KETTI AEDA-PILLA */
+			0x0DD1,				/* SINHALA VOWEL SIGN DIGA AEDA-PILLA */
+			0x0DD8,				/* SINHALA VOWEL SIGN GAETTA-PILLA */
+			0x0DD9,				/* SINHALA VOWEL SIGN KOMBUVA */
+			0x0DDA,				/* SINHALA VOWEL SIGN DIGA KOMBUVA */
+			0x0DDB,				/* SINHALA VOWEL SIGN KOMBU DEKA */
+			0x0DDC,				/* SINHALA VOWEL SIGN KOMBUVA HAA AELA-PILLA */
+			0x0DDD,				/* SINHALA VOWEL SIGN KOMBUVA HAA DIGA
+								 * AELA-PILLA */
+			0x0DDE,				/* SINHALA VOWEL SIGN KOMBUVA HAA GAYANUKITTA */
+			0x0DDF,				/* SINHALA VOWEL SIGN GAYANUKITTA */
+			0x0DF2,				/* SINHALA VOWEL SIGN DIGA GAETTA-PILLA */
+			0x0DF3,				/* SINHALA VOWEL SIGN DIGA GAYANUKITTA */
+			0x0F3E,				/* TIBETAN SIGN YAR TSHES */
+			0x0F3F,				/* TIBETAN SIGN MAR TSHES */
+			0x0F7F,				/* TIBETAN SIGN RNAM BCAD */
+			0x102B,				/* MYANMAR VOWEL SIGN TALL AA */
+			0x102C,				/* MYANMAR VOWEL SIGN AA */
+			0x1031,				/* MYANMAR VOWEL SIGN E */
+			0x1038,				/* MYANMAR SIGN VISARGA */
+			0x103B,				/* MYANMAR CONSONANT SIGN MEDIAL YA */
+			0x103C,				/* MYANMAR CONSONANT SIGN MEDIAL RA */
+			0x1056,				/* MYANMAR VOWEL SIGN VOCALIC R */
+			0x1057,				/* MYANMAR VOWEL SIGN VOCALIC RR */
+			0x1062,				/* MYANMAR VOWEL SIGN SGAW KAREN EU */
+			0x1063,				/* MYANMAR TONE MARK SGAW KAREN HATHI */
+			0x1064,				/* MYANMAR TONE MARK SGAW KAREN KE PHO */
+			0x1067,				/* MYANMAR VOWEL SIGN WESTERN PWO KAREN EU */
+			0x1068,				/* MYANMAR VOWEL SIGN WESTERN PWO KAREN UE */
+			0x1069,				/* MYANMAR SIGN WESTERN PWO KAREN TONE-1 */
+			0x106A,				/* MYANMAR SIGN WESTERN PWO KAREN TONE-2 */
+			0x106B,				/* MYANMAR SIGN WESTERN PWO KAREN TONE-3 */
+			0x106C,				/* MYANMAR SIGN WESTERN PWO KAREN TONE-4 */
+			0x106D,				/* MYANMAR SIGN WESTERN PWO KAREN TONE-5 */
+			0x1083,				/* MYANMAR VOWEL SIGN SHAN AA */
+			0x1084,				/* MYANMAR VOWEL SIGN SHAN E */
+			0x1087,				/* MYANMAR SIGN SHAN TONE-2 */
+			0x1088,				/* MYANMAR SIGN SHAN TONE-3 */
+			0x1089,				/* MYANMAR SIGN SHAN TONE-5 */
+			0x108A,				/* MYANMAR SIGN SHAN TONE-6 */
+			0x108B,				/* MYANMAR SIGN SHAN COUNCIL TONE-2 */
+			0x108C,				/* MYANMAR SIGN SHAN COUNCIL TONE-3 */
+			0x108F,				/* MYANMAR SIGN RUMAI PALAUNG TONE-5 */
+			0x17B6,				/* KHMER VOWEL SIGN AA */
+			0x17BE,				/* KHMER VOWEL SIGN OE */
+			0x17BF,				/* KHMER VOWEL SIGN YA */
+			0x17C0,				/* KHMER VOWEL SIGN IE */
+			0x17C1,				/* KHMER VOWEL SIGN E */
+			0x17C2,				/* KHMER VOWEL SIGN AE */
+			0x17C3,				/* KHMER VOWEL SIGN AI */
+			0x17C4,				/* KHMER VOWEL SIGN OO */
+			0x17C5,				/* KHMER VOWEL SIGN AU */
+			0x17C7,				/* KHMER SIGN REAHMUK */
+			0x17C8,				/* KHMER SIGN YUUKALEAPINTU */
+			0x1923,				/* LIMBU VOWEL SIGN EE */
+			0x1924,				/* LIMBU VOWEL SIGN AI */
+			0x1925,				/* LIMBU VOWEL SIGN OO */
+			0x1926,				/* LIMBU VOWEL SIGN AU */
+			0x1929,				/* LIMBU SUBJOINED LETTER YA */
+			0x192A,				/* LIMBU SUBJOINED LETTER RA */
+			0x192B,				/* LIMBU SUBJOINED LETTER WA */
+			0x1930,				/* LIMBU SMALL LETTER KA */
+			0x1931,				/* LIMBU SMALL LETTER NGA */
+			0x1933,				/* LIMBU SMALL LETTER TA */
+			0x1934,				/* LIMBU SMALL LETTER NA */
+			0x1935,				/* LIMBU SMALL LETTER PA */
+			0x1936,				/* LIMBU SMALL LETTER MA */
+			0x1937,				/* LIMBU SMALL LETTER RA */
+			0x1938,				/* LIMBU SMALL LETTER LA */
+			0x19B0,				/* NEW TAI LUE VOWEL SIGN VOWEL SHORTENER */
+			0x19B1,				/* NEW TAI LUE VOWEL SIGN AA */
+			0x19B2,				/* NEW TAI LUE VOWEL SIGN II */
+			0x19B3,				/* NEW TAI LUE VOWEL SIGN U */
+			0x19B4,				/* NEW TAI LUE VOWEL SIGN UU */
+			0x19B5,				/* NEW TAI LUE VOWEL SIGN E */
+			0x19B6,				/* NEW TAI LUE VOWEL SIGN AE */
+			0x19B7,				/* NEW TAI LUE VOWEL SIGN O */
+			0x19B8,				/* NEW TAI LUE VOWEL SIGN OA */
+			0x19B9,				/* NEW TAI LUE VOWEL SIGN UE */
+			0x19BA,				/* NEW TAI LUE VOWEL SIGN AY */
+			0x19BB,				/* NEW TAI LUE VOWEL SIGN AAY */
+			0x19BC,				/* NEW TAI LUE VOWEL SIGN UY */
+			0x19BD,				/* NEW TAI LUE VOWEL SIGN OY */
+			0x19BE,				/* NEW TAI LUE VOWEL SIGN OAY */
+			0x19BF,				/* NEW TAI LUE VOWEL SIGN UEY */
+			0x19C0,				/* NEW TAI LUE VOWEL SIGN IY */
+			0x19C8,				/* NEW TAI LUE TONE MARK-1 */
+			0x19C9,				/* NEW TAI LUE TONE MARK-2 */
+			0x1A19,				/* BUGINESE VOWEL SIGN E */
+			0x1A1A,				/* BUGINESE VOWEL SIGN O */
+			0x1A1B,				/* BUGINESE VOWEL SIGN AE */
+			0x1B04,				/* BALINESE SIGN BISAH */
+			0x1B35,				/* BALINESE VOWEL SIGN TEDUNG */
+			0x1B3B,				/* BALINESE VOWEL SIGN RA REPA TEDUNG */
+			0x1B3D,				/* BALINESE VOWEL SIGN LA LENGA TEDUNG */
+			0x1B3E,				/* BALINESE VOWEL SIGN TALING */
+			0x1B3F,				/* BALINESE VOWEL SIGN TALING REPA */
+			0x1B40,				/* BALINESE VOWEL SIGN TALING TEDUNG */
+			0x1B41,				/* BALINESE VOWEL SIGN TALING REPA TEDUNG */
+			0x1B43,				/* BALINESE VOWEL SIGN PEPET TEDUNG */
+			0x1B44,				/* BALINESE ADEG ADEG */
+			0x1B82,				/* SUNDANESE SIGN PANGWISAD */
+			0x1BA1,				/* SUNDANESE CONSONANT SIGN PAMINGKAL */
+			0x1BA6,				/* SUNDANESE VOWEL SIGN PANAELAENG */
+			0x1BA7,				/* SUNDANESE VOWEL SIGN PANOLONG */
+			0x1BAA,				/* SUNDANESE SIGN PAMAAEH */
+			0x1C24,				/* LEPCHA SUBJOINED LETTER YA */
+			0x1C25,				/* LEPCHA SUBJOINED LETTER RA */
+			0x1C26,				/* LEPCHA VOWEL SIGN AA */
+			0x1C27,				/* LEPCHA VOWEL SIGN I */
+			0x1C28,				/* LEPCHA VOWEL SIGN O */
+			0x1C29,				/* LEPCHA VOWEL SIGN OO */
+			0x1C2A,				/* LEPCHA VOWEL SIGN U */
+			0x1C2B,				/* LEPCHA VOWEL SIGN UU */
+			0x1C34,				/* LEPCHA CONSONANT SIGN NYIN-DO */
+			0x1C35,				/* LEPCHA CONSONANT SIGN KANG */
+			0xA823,				/* SYLOTI NAGRI VOWEL SIGN A */
+			0xA824,				/* SYLOTI NAGRI VOWEL SIGN I */
+			0xA827,				/* SYLOTI NAGRI VOWEL SIGN OO */
+			0xA880,				/* SAURASHTRA SIGN ANUSVARA */
+			0xA881,				/* SAURASHTRA SIGN VISARGA */
+			0xA8B4,				/* SAURASHTRA CONSONANT SIGN HAARU */
+			0xA8B5,				/* SAURASHTRA VOWEL SIGN AA */
+			0xA8B6,				/* SAURASHTRA VOWEL SIGN I */
+			0xA8B7,				/* SAURASHTRA VOWEL SIGN II */
+			0xA8B8,				/* SAURASHTRA VOWEL SIGN U */
+			0xA8B9,				/* SAURASHTRA VOWEL SIGN UU */
+			0xA8BA,				/* SAURASHTRA VOWEL SIGN VOCALIC R */
+			0xA8BB,				/* SAURASHTRA VOWEL SIGN VOCALIC RR */
+			0xA8BC,				/* SAURASHTRA VOWEL SIGN VOCALIC L */
+			0xA8BD,				/* SAURASHTRA VOWEL SIGN VOCALIC LL */
+			0xA8BE,				/* SAURASHTRA VOWEL SIGN E */
+			0xA8BF,				/* SAURASHTRA VOWEL SIGN EE */
+			0xA8C0,				/* SAURASHTRA VOWEL SIGN AI */
+			0xA8C1,				/* SAURASHTRA VOWEL SIGN O */
+			0xA8C2,				/* SAURASHTRA VOWEL SIGN OO */
+			0xA8C3,				/* SAURASHTRA VOWEL SIGN AU */
+			0xA952,				/* REJANG CONSONANT SIGN H */
+			0xA953,				/* REJANG VIRAMA */
+			0xAA2F,				/* CHAM VOWEL SIGN O */
+			0xAA30,				/* CHAM VOWEL SIGN AI */
+			0xAA33,				/* CHAM CONSONANT SIGN YA */
+			0xAA34,				/* CHAM CONSONANT SIGN RA */
+			0xAA4D				/* CHAM CONSONANT SIGN FINAL H */
+		};
+		const pg_wchar *StopLow = strange_letter,
+				   *StopHigh = strange_letter + lengthof(strange_letter),
+				   *StopMiddle;
+		pg_wchar	c;
+
+		if (prs->pgwstr)
+			c = *(prs->pgwstr + prs->state->poschar);
+		else
+			c = (pg_wchar) *(prs->wstr + prs->state->poschar);
+
+		while (StopLow < StopHigh)
+		{
+			StopMiddle = StopLow + ((StopHigh - StopLow) >> 1);
+			if (*StopMiddle == c)
+				return 1;
+			else if (*StopMiddle < c)
+				StopLow = StopMiddle + 1;
+			else
+				StopHigh = StopMiddle;
+		}
+	}
+
+	return 0;
+}
+
+/*
+ * Table of state/action of parser
+ */
+
+static const TParserStateActionItem actionTPS_Base[] = {
+	{p_isEOF, 0, A_NEXT, TPS_Null, 0, NULL},
+	{p_iseqC, '<', A_PUSH, TPS_InTagFirst, 0, NULL},
+	{p_isignore, 0, A_NEXT, TPS_InSpace, 0, NULL},
+	{p_isasclet, 0, A_NEXT, TPS_InAsciiWord, 0, NULL},
+	{p_isalpha, 0, A_NEXT, TPS_InWord, 0, NULL},
+	{p_isdigit, 0, A_NEXT, TPS_InUnsignedInt, 0, NULL},
+	{p_iseqC, '-', A_PUSH, TPS_InSignedIntFirst, 0, NULL},
+	{p_iseqC, '+', A_PUSH, TPS_InSignedIntFirst, 0, NULL},
+	{p_iseqC, '&', A_PUSH, TPS_InXMLEntityFirst, 0, NULL},
+	{p_iseqC, '~', A_PUSH, TPS_InFileTwiddle, 0, NULL},
+	{p_iseqC, '/', A_PUSH, TPS_InFileFirst, 0, NULL},
+	{p_iseqC, '.', A_PUSH, TPS_InPathFirstFirst, 0, NULL},
+	{NULL, 0, A_NEXT, TPS_InSpace, 0, NULL}
+};
+
+
+static const TParserStateActionItem actionTPS_InNumWord[] = {
+	{p_isEOF, 0, A_BINGO, TPS_Base, NUMWORD, NULL},
+	{p_isalnum, 0, A_NEXT, TPS_InNumWord, 0, NULL},
+	{p_isspecial, 0, A_NEXT, TPS_InNumWord, 0, NULL},
+	{p_iseqC, '@', A_PUSH, TPS_InEmail, 0, NULL},
+	{p_iseqC, '/', A_PUSH, TPS_InFileFirst, 0, NULL},
+	{p_iseqC, '.', A_PUSH, TPS_InFileNext, 0, NULL},
+	{p_iseqC, '-', A_PUSH, TPS_InHyphenNumWordFirst, 0, NULL},
+	{NULL, 0, A_BINGO, TPS_Base, NUMWORD, NULL}
+};
+
+static const TParserStateActionItem actionTPS_InAsciiWord[] = {
+	{p_isEOF, 0, A_BINGO, TPS_Base, ASCIIWORD, NULL},
+	{p_isasclet, 0, A_NEXT, TPS_Null, 0, NULL},
+	{p_iseqC, '.', A_PUSH, TPS_InHostFirstDomain, 0, NULL},
+	{p_iseqC, '.', A_PUSH, TPS_InFileNext, 0, NULL},
+	{p_iseqC, '-', A_PUSH, TPS_InHostFirstAN, 0, NULL},
+	{p_iseqC, '-', A_PUSH, TPS_InHyphenAsciiWordFirst, 0, NULL},
+	{p_iseqC, '_', A_PUSH, TPS_InHostFirstAN, 0, NULL},
+	{p_iseqC, '@', A_PUSH, TPS_InEmail, 0, NULL},
+	{p_iseqC, ':', A_PUSH, TPS_InProtocolFirst, 0, NULL},
+	{p_iseqC, '/', A_PUSH, TPS_InFileFirst, 0, NULL},
+	{p_isdigit, 0, A_PUSH, TPS_InHost, 0, NULL},
+	{p_isdigit, 0, A_NEXT, TPS_InNumWord, 0, NULL},
+	{p_isalpha, 0, A_NEXT, TPS_InWord, 0, NULL},
+	{p_isspecial, 0, A_NEXT, TPS_InWord, 0, NULL},
+	{NULL, 0, A_BINGO, TPS_Base, ASCIIWORD, NULL}
+};
+
+static const TParserStateActionItem actionTPS_InWord[] = {
+	{p_isEOF, 0, A_BINGO, TPS_Base, WORD_T, NULL},
+	{p_isalpha, 0, A_NEXT, TPS_Null, 0, NULL},
+	{p_isspecial, 0, A_NEXT, TPS_Null, 0, NULL},
+	{p_isdigit, 0, A_NEXT, TPS_InNumWord, 0, NULL},
+	{p_iseqC, '-', A_PUSH, TPS_InHyphenWordFirst, 0, NULL},
+	{NULL, 0, A_BINGO, TPS_Base, WORD_T, NULL}
+};
+
+static const TParserStateActionItem actionTPS_InUnsignedInt[] = {
+	{p_isEOF, 0, A_BINGO, TPS_Base, UNSIGNEDINT, NULL},
+	{p_isdigit, 0, A_NEXT, TPS_Null, 0, NULL},
+	{p_iseqC, '.', A_PUSH, TPS_InHostFirstDomain, 0, NULL},
+	{p_iseqC, '.', A_PUSH, TPS_InUDecimalFirst, 0, NULL},
+	{p_iseqC, 'e', A_PUSH, TPS_InMantissaFirst, 0, NULL},
+	{p_iseqC, 'E', A_PUSH, TPS_InMantissaFirst, 0, NULL},
+	{p_iseqC, '-', A_PUSH, TPS_InHostFirstAN, 0, NULL},
+	{p_iseqC, '_', A_PUSH, TPS_InHostFirstAN, 0, NULL},
+	{p_iseqC, '@', A_PUSH, TPS_InEmail, 0, NULL},
+	{p_isasclet, 0, A_PUSH, TPS_InHost, 0, NULL},
+	{p_isalpha, 0, A_NEXT, TPS_InNumWord, 0, NULL},
+	{p_isspecial, 0, A_NEXT, TPS_InNumWord, 0, NULL},
+	{p_iseqC, '/', A_PUSH, TPS_InFileFirst, 0, NULL},
+	{NULL, 0, A_BINGO, TPS_Base, UNSIGNEDINT, NULL}
+};
+
+static const TParserStateActionItem actionTPS_InSignedIntFirst[] = {
+	{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
+	{p_isdigit, 0, A_NEXT | A_CLEAR, TPS_InSignedInt, 0, NULL},
+	{NULL, 0, A_POP, TPS_Null, 0, NULL}
+};
+
+static const TParserStateActionItem actionTPS_InSignedInt[] = {
+	{p_isEOF, 0, A_BINGO, TPS_Base, SIGNEDINT, NULL},
+	{p_isdigit, 0, A_NEXT, TPS_Null, 0, NULL},
+	{p_iseqC, '.', A_PUSH, TPS_InDecimalFirst, 0, NULL},
+	{p_iseqC, 'e', A_PUSH, TPS_InMantissaFirst, 0, NULL},
+	{p_iseqC, 'E', A_PUSH, TPS_InMantissaFirst, 0, NULL},
+	{NULL, 0, A_BINGO, TPS_Base, SIGNEDINT, NULL}
+};
+
+static const TParserStateActionItem actionTPS_InSpace[] = {
+	{p_isEOF, 0, A_BINGO, TPS_Base, SPACE, NULL},
+	{p_iseqC, '<', A_BINGO, TPS_Base, SPACE, NULL},
+	{p_isignore, 0, A_NEXT, TPS_Null, 0, NULL},
+	{p_iseqC, '-', A_BINGO, TPS_Base, SPACE, NULL},
+	{p_iseqC, '+', A_BINGO, TPS_Base, SPACE, NULL},
+	{p_iseqC, '&', A_BINGO, TPS_Base, SPACE, NULL},
+	{p_iseqC, '/', A_BINGO, TPS_Base, SPACE, NULL},
+	{p_isnotalnum, 0, A_NEXT, TPS_InSpace, 0, NULL},
+	{NULL, 0, A_BINGO, TPS_Base, SPACE, NULL}
+};
+
+static const TParserStateActionItem actionTPS_InUDecimalFirst[] = {
+	{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
+	{p_isdigit, 0, A_CLEAR, TPS_InUDecimal, 0, NULL},
+	{NULL, 0, A_POP, TPS_Null, 0, NULL}
+};
+
+static const TParserStateActionItem actionTPS_InUDecimal[] = {
+	{p_isEOF, 0, A_BINGO, TPS_Base, DECIMAL_T, NULL},
+	{p_isdigit, 0, A_NEXT, TPS_InUDecimal, 0, NULL},
+	{p_iseqC, '.', A_PUSH, TPS_InVersionFirst, 0, NULL},
+	{p_iseqC, 'e', A_PUSH, TPS_InMantissaFirst, 0, NULL},
+	{p_iseqC, 'E', A_PUSH, TPS_InMantissaFirst, 0, NULL},
+	{NULL, 0, A_BINGO, TPS_Base, DECIMAL_T, NULL}
+};
+
+static const TParserStateActionItem actionTPS_InDecimalFirst[] = {
+	{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
+	{p_isdigit, 0, A_CLEAR, TPS_InDecimal, 0, NULL},
+	{NULL, 0, A_POP, TPS_Null, 0, NULL}
+};
+
+static const TParserStateActionItem actionTPS_InDecimal[] = {
+	{p_isEOF, 0, A_BINGO, TPS_Base, DECIMAL_T, NULL},
+	{p_isdigit, 0, A_NEXT, TPS_InDecimal, 0, NULL},
+	{p_iseqC, '.', A_PUSH, TPS_InVerVersion, 0, NULL},
+	{p_iseqC, 'e', A_PUSH, TPS_InMantissaFirst, 0, NULL},
+	{p_iseqC, 'E', A_PUSH, TPS_InMantissaFirst, 0, NULL},
+	{NULL, 0, A_BINGO, TPS_Base, DECIMAL_T, NULL}
+};
+
+static const TParserStateActionItem actionTPS_InVerVersion[] = {
+	{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
+	{p_isdigit, 0, A_RERUN, TPS_InSVerVersion, 0, SpecialVerVersion},
+	{NULL, 0, A_POP, TPS_Null, 0, NULL}
+};
+
+static const TParserStateActionItem actionTPS_InSVerVersion[] = {
+	{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
+	{p_isdigit, 0, A_BINGO | A_CLRALL, TPS_InUnsignedInt, SPACE, NULL},
+	{NULL, 0, A_NEXT, TPS_Null, 0, NULL}
+};
+
+
+static const TParserStateActionItem actionTPS_InVersionFirst[] = {
+	{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
+	{p_isdigit, 0, A_CLEAR, TPS_InVersion, 0, NULL},
+	{NULL, 0, A_POP, TPS_Null, 0, NULL}
+};
+
+static const TParserStateActionItem actionTPS_InVersion[] = {
+	{p_isEOF, 0, A_BINGO, TPS_Base, VERSIONNUMBER, NULL},
+	{p_isdigit, 0, A_NEXT, TPS_InVersion, 0, NULL},
+	{p_iseqC, '.', A_PUSH, TPS_InVersionFirst, 0, NULL},
+	{NULL, 0, A_BINGO, TPS_Base, VERSIONNUMBER, NULL}
+};
+
+static const TParserStateActionItem actionTPS_InMantissaFirst[] = {
+	{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
+	{p_isdigit, 0, A_CLEAR, TPS_InMantissa, 0, NULL},
+	{p_iseqC, '+', A_NEXT, TPS_InMantissaSign, 0, NULL},
+	{p_iseqC, '-', A_NEXT, TPS_InMantissaSign, 0, NULL},
+	{NULL, 0, A_POP, TPS_Null, 0, NULL}
+};
+
+static const TParserStateActionItem actionTPS_InMantissaSign[] = {
+	{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
+	{p_isdigit, 0, A_CLEAR, TPS_InMantissa, 0, NULL},
+	{NULL, 0, A_POP, TPS_Null, 0, NULL}
+};
+
+static const TParserStateActionItem actionTPS_InMantissa[] = {
+	{p_isEOF, 0, A_BINGO, TPS_Base, SCIENTIFIC, NULL},
+	{p_isdigit, 0, A_NEXT, TPS_InMantissa, 0, NULL},
+	{NULL, 0, A_BINGO, TPS_Base, SCIENTIFIC, NULL}
+};
+
+static const TParserStateActionItem actionTPS_InXMLEntityFirst[] = {
+	{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
+	{p_iseqC, '#', A_NEXT, TPS_InXMLEntityNumFirst, 0, NULL},
+	{p_isasclet, 0, A_NEXT, TPS_InXMLEntity, 0, NULL},
+	{p_iseqC, ':', A_NEXT, TPS_InXMLEntity, 0, NULL},
+	{p_iseqC, '_', A_NEXT, TPS_InXMLEntity, 0, NULL},
+	{NULL, 0, A_POP, TPS_Null, 0, NULL}
+};
+
+static const TParserStateActionItem actionTPS_InXMLEntity[] = {
+	{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
+	{p_isalnum, 0, A_NEXT, TPS_InXMLEntity, 0, NULL},
+	{p_iseqC, ':', A_NEXT, TPS_InXMLEntity, 0, NULL},
+	{p_iseqC, '_', A_NEXT, TPS_InXMLEntity, 0, NULL},
+	{p_iseqC, '.', A_NEXT, TPS_InXMLEntity, 0, NULL},
+	{p_iseqC, '-', A_NEXT, TPS_InXMLEntity, 0, NULL},
+	{p_iseqC, ';', A_NEXT, TPS_InXMLEntityEnd, 0, NULL},
+	{NULL, 0, A_POP, TPS_Null, 0, NULL}
+};
+
+static const TParserStateActionItem actionTPS_InXMLEntityNumFirst[] = {
+	{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
+	{p_iseqC, 'x', A_NEXT, TPS_InXMLEntityHexNumFirst, 0, NULL},
+	{p_iseqC, 'X', A_NEXT, TPS_InXMLEntityHexNumFirst, 0, NULL},
+	{p_isdigit, 0, A_NEXT, TPS_InXMLEntityNum, 0, NULL},
+	{NULL, 0, A_POP, TPS_Null, 0, NULL}
+};
+
+static const TParserStateActionItem actionTPS_InXMLEntityHexNumFirst[] = {
+	{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
+	{p_isxdigit, 0, A_NEXT, TPS_InXMLEntityHexNum, 0, NULL},
+	{NULL, 0, A_POP, TPS_Null, 0, NULL}
+};
+
+static const TParserStateActionItem actionTPS_InXMLEntityNum[] = {
+	{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
+	{p_isdigit, 0, A_NEXT, TPS_InXMLEntityNum, 0, NULL},
+	{p_iseqC, ';', A_NEXT, TPS_InXMLEntityEnd, 0, NULL},
+	{NULL, 0, A_POP, TPS_Null, 0, NULL}
+};
+
+static const TParserStateActionItem actionTPS_InXMLEntityHexNum[] = {
+	{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
+	{p_isxdigit, 0, A_NEXT, TPS_InXMLEntityHexNum, 0, NULL},
+	{p_iseqC, ';', A_NEXT, TPS_InXMLEntityEnd, 0, NULL},
+	{NULL, 0, A_POP, TPS_Null, 0, NULL}
+};
+
+static const TParserStateActionItem actionTPS_InXMLEntityEnd[] = {
+	{NULL, 0, A_BINGO | A_CLEAR, TPS_Base, XMLENTITY, NULL}
+};
+
+static const TParserStateActionItem actionTPS_InTagFirst[] = {
+	{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
+	{p_iseqC, '/', A_PUSH, TPS_InTagCloseFirst, 0, NULL},
+	{p_iseqC, '!', A_PUSH, TPS_InCommentFirst, 0, NULL},
+	{p_iseqC, '?', A_PUSH, TPS_InXMLBegin, 0, NULL},
+	{p_isasclet, 0, A_PUSH, TPS_InTagName, 0, NULL},
+	{p_iseqC, ':', A_PUSH, TPS_InTagName, 0, NULL},
+	{p_iseqC, '_', A_PUSH, TPS_InTagName, 0, NULL},
+	{NULL, 0, A_POP, TPS_Null, 0, NULL}
+};
+
+static const TParserStateActionItem actionTPS_InXMLBegin[] = {
+	{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
+	/* <?xml ... */
+	/* XXX do we wants states for the m and l ?  Right now this accepts <?xZ */
+	{p_iseqC, 'x', A_NEXT, TPS_InTag, 0, NULL},
+	{NULL, 0, A_POP, TPS_Null, 0, NULL}
+};
+
+static const TParserStateActionItem actionTPS_InTagCloseFirst[] = {
+	{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
+	{p_isasclet, 0, A_NEXT, TPS_InTagName, 0, NULL},
+	{NULL, 0, A_POP, TPS_Null, 0, NULL}
+};
+
+static const TParserStateActionItem actionTPS_InTagName[] = {
+	{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
+	/* <br/> case */
+	{p_iseqC, '/', A_NEXT, TPS_InTagBeginEnd, 0, NULL},
+	{p_iseqC, '>', A_NEXT, TPS_InTagEnd, 0, SpecialTags},
+	{p_isspace, 0, A_NEXT, TPS_InTag, 0, SpecialTags},
+	{p_isalnum, 0, A_NEXT, TPS_Null, 0, NULL},
+	{p_iseqC, ':', A_NEXT, TPS_Null, 0, NULL},
+	{p_iseqC, '_', A_NEXT, TPS_Null, 0, NULL},
+	{p_iseqC, '.', A_NEXT, TPS_Null, 0, NULL},
+	{p_iseqC, '-', A_NEXT, TPS_Null, 0, NULL},
+	{NULL, 0, A_POP, TPS_Null, 0, NULL}
+};
+
+static const TParserStateActionItem actionTPS_InTagBeginEnd[] = {
+	{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
+	{p_iseqC, '>', A_NEXT, TPS_InTagEnd, 0, NULL},
+	{NULL, 0, A_POP, TPS_Null, 0, NULL}
+};
+
+static const TParserStateActionItem actionTPS_InTag[] = {
+	{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
+	{p_iseqC, '>', A_NEXT, TPS_InTagEnd, 0, SpecialTags},
+	{p_iseqC, '\'', A_NEXT, TPS_InTagEscapeK, 0, NULL},
+	{p_iseqC, '"', A_NEXT, TPS_InTagEscapeKK, 0, NULL},
+	{p_isasclet, 0, A_NEXT, TPS_Null, 0, NULL},
+	{p_isdigit, 0, A_NEXT, TPS_Null, 0, NULL},
+	{p_iseqC, '=', A_NEXT, TPS_Null, 0, NULL},
+	{p_iseqC, '-', A_NEXT, TPS_Null, 0, NULL},
+	{p_iseqC, '_', A_NEXT, TPS_Null, 0, NULL},
+	{p_iseqC, '#', A_NEXT, TPS_Null, 0, NULL},
+	{p_iseqC, '/', A_NEXT, TPS_Null, 0, NULL},
+	{p_iseqC, ':', A_NEXT, TPS_Null, 0, NULL},
+	{p_iseqC, '.', A_NEXT, TPS_Null, 0, NULL},
+	{p_iseqC, '&', A_NEXT, TPS_Null, 0, NULL},
+	{p_iseqC, '?', A_NEXT, TPS_Null, 0, NULL},
+	{p_iseqC, '%', A_NEXT, TPS_Null, 0, NULL},
+	{p_iseqC, '~', A_NEXT, TPS_Null, 0, NULL},
+	{p_isspace, 0, A_NEXT, TPS_Null, 0, SpecialTags},
+	{NULL, 0, A_POP, TPS_Null, 0, NULL}
+};
+
+static const TParserStateActionItem actionTPS_InTagEscapeK[] = {
+	{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
+	{p_iseqC, '\\', A_PUSH, TPS_InTagBackSleshed, 0, NULL},
+	{p_iseqC, '\'', A_NEXT, TPS_InTag, 0, NULL},
+	{NULL, 0, A_NEXT, TPS_InTagEscapeK, 0, NULL}
+};
+
+static const TParserStateActionItem actionTPS_InTagEscapeKK[] = {
+	{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
+	{p_iseqC, '\\', A_PUSH, TPS_InTagBackSleshed, 0, NULL},
+	{p_iseqC, '"', A_NEXT, TPS_InTag, 0, NULL},
+	{NULL, 0, A_NEXT, TPS_InTagEscapeKK, 0, NULL}
+};
+
+static const TParserStateActionItem actionTPS_InTagBackSleshed[] = {
+	{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
+	{NULL, 0, A_MERGE, TPS_Null, 0, NULL}
+};
+
+static const TParserStateActionItem actionTPS_InTagEnd[] = {
+	{NULL, 0, A_BINGO | A_CLRALL, TPS_Base, TAG_T, NULL}
+};
+
+static const TParserStateActionItem actionTPS_InCommentFirst[] = {
+	{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
+	{p_iseqC, '-', A_NEXT, TPS_InCommentLast, 0, NULL},
+	/* <!DOCTYPE ...> */
+	{p_iseqC, 'D', A_NEXT, TPS_InTag, 0, NULL},
+	{p_iseqC, 'd', A_NEXT, TPS_InTag, 0, NULL},
+	{NULL, 0, A_POP, TPS_Null, 0, NULL}
+};
+
+static const TParserStateActionItem actionTPS_InCommentLast[] = {
+	{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
+	{p_iseqC, '-', A_NEXT, TPS_InComment, 0, NULL},
+	{NULL, 0, A_POP, TPS_Null, 0, NULL}
+};
+
+static const TParserStateActionItem actionTPS_InComment[] = {
+	{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
+	{p_iseqC, '-', A_NEXT, TPS_InCloseCommentFirst, 0, NULL},
+	{NULL, 0, A_NEXT, TPS_Null, 0, NULL}
+};
+
+static const TParserStateActionItem actionTPS_InCloseCommentFirst[] = {
+	{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
+	{p_iseqC, '-', A_NEXT, TPS_InCloseCommentLast, 0, NULL},
+	{NULL, 0, A_NEXT, TPS_InComment, 0, NULL}
+};
+
+static const TParserStateActionItem actionTPS_InCloseCommentLast[] = {
+	{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
+	{p_iseqC, '-', A_NEXT, TPS_Null, 0, NULL},
+	{p_iseqC, '>', A_NEXT, TPS_InCommentEnd, 0, NULL},
+	{NULL, 0, A_NEXT, TPS_InComment, 0, NULL}
+};
+
+static const TParserStateActionItem actionTPS_InCommentEnd[] = {
+	{NULL, 0, A_BINGO | A_CLRALL, TPS_Base, TAG_T, NULL}
+};
+
+static const TParserStateActionItem actionTPS_InHostFirstDomain[] = {
+	{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
+	{p_isasclet, 0, A_NEXT, TPS_InHostDomainSecond, 0, NULL},
+	{p_isdigit, 0, A_NEXT, TPS_InHost, 0, NULL},
+	{NULL, 0, A_POP, TPS_Null, 0, NULL}
+};
+
+static const TParserStateActionItem actionTPS_InHostDomainSecond[] = {
+	{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
+	{p_isasclet, 0, A_NEXT, TPS_InHostDomain, 0, NULL},
+	{p_isdigit, 0, A_PUSH, TPS_InHost, 0, NULL},
+	{p_iseqC, '-', A_PUSH, TPS_InHostFirstAN, 0, NULL},
+	{p_iseqC, '_', A_PUSH, TPS_InHostFirstAN, 0, NULL},
+	{p_iseqC, '.', A_PUSH, TPS_InHostFirstDomain, 0, NULL},
+	{p_iseqC, '@', A_PUSH, TPS_InEmail, 0, NULL},
+	{NULL, 0, A_POP, TPS_Null, 0, NULL}
+};
+
+static const TParserStateActionItem actionTPS_InHostDomain[] = {
+	{p_isEOF, 0, A_BINGO | A_CLRALL, TPS_Base, HOST, NULL},
+	{p_isasclet, 0, A_NEXT, TPS_InHostDomain, 0, NULL},
+	{p_isdigit, 0, A_PUSH, TPS_InHost, 0, NULL},
+	{p_iseqC, ':', A_PUSH, TPS_InPortFirst, 0, NULL},
+	{p_iseqC, '-', A_PUSH, TPS_InHostFirstAN, 0, NULL},
+	{p_iseqC, '_', A_PUSH, TPS_InHostFirstAN, 0, NULL},
+	{p_iseqC, '.', A_PUSH, TPS_InHostFirstDomain, 0, NULL},
+	{p_iseqC, '@', A_PUSH, TPS_InEmail, 0, NULL},
+	{p_isdigit, 0, A_POP, TPS_Null, 0, NULL},
+	{p_isstophost, 0, A_BINGO | A_CLRALL, TPS_InURLPathStart, HOST, NULL},
+	{p_iseqC, '/', A_PUSH, TPS_InFURL, 0, NULL},
+	{NULL, 0, A_BINGO | A_CLRALL, TPS_Base, HOST, NULL}
+};
+
+static const TParserStateActionItem actionTPS_InPortFirst[] = {
+	{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
+	{p_isdigit, 0, A_NEXT, TPS_InPort, 0, NULL},
+	{NULL, 0, A_POP, TPS_Null, 0, NULL}
+};
+
+static const TParserStateActionItem actionTPS_InPort[] = {
+	{p_isEOF, 0, A_BINGO | A_CLRALL, TPS_Base, HOST, NULL},
+	{p_isdigit, 0, A_NEXT, TPS_InPort, 0, NULL},
+	{p_isstophost, 0, A_BINGO | A_CLRALL, TPS_InURLPathStart, HOST, NULL},
+	{p_iseqC, '/', A_PUSH, TPS_InFURL, 0, NULL},
+	{NULL, 0, A_BINGO | A_CLRALL, TPS_Base, HOST, NULL}
+};
+
+static const TParserStateActionItem actionTPS_InHostFirstAN[] = {
+	{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
+	{p_isdigit, 0, A_NEXT, TPS_InHost, 0, NULL},
+	{p_isasclet, 0, A_NEXT, TPS_InHost, 0, NULL},
+	{NULL, 0, A_POP, TPS_Null, 0, NULL}
+};
+
+static const TParserStateActionItem actionTPS_InHost[] = {
+	{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
+	{p_isdigit, 0, A_NEXT, TPS_InHost, 0, NULL},
+	{p_isasclet, 0, A_NEXT, TPS_InHost, 0, NULL},
+	{p_iseqC, '@', A_PUSH, TPS_InEmail, 0, NULL},
+	{p_iseqC, '.', A_PUSH, TPS_InHostFirstDomain, 0, NULL},
+	{p_iseqC, '-', A_PUSH, TPS_InHostFirstAN, 0, NULL},
+	{p_iseqC, '_', A_PUSH, TPS_InHostFirstAN, 0, NULL},
+	{NULL, 0, A_POP, TPS_Null, 0, NULL}
+};
+
+static const TParserStateActionItem actionTPS_InEmail[] = {
+	{p_isstophost, 0, A_POP, TPS_Null, 0, NULL},
+	{p_ishost, 0, A_BINGO | A_CLRALL, TPS_Base, EMAIL, NULL},
+	{NULL, 0, A_POP, TPS_Null, 0, NULL}
+};
+
+static const TParserStateActionItem actionTPS_InFileFirst[] = {
+	{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
+	{p_isasclet, 0, A_NEXT, TPS_InFile, 0, NULL},
+	{p_isdigit, 0, A_NEXT, TPS_InFile, 0, NULL},
+	{p_iseqC, '.', A_NEXT, TPS_InPathFirst, 0, NULL},
+	{p_iseqC, '_', A_NEXT, TPS_InFile, 0, NULL},
+	{p_iseqC, '~', A_PUSH, TPS_InFileTwiddle, 0, NULL},
+	{NULL, 0, A_POP, TPS_Null, 0, NULL}
+};
+
+static const TParserStateActionItem actionTPS_InFileTwiddle[] = {
+	{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
+	{p_isasclet, 0, A_NEXT, TPS_InFile, 0, NULL},
+	{p_isdigit, 0, A_NEXT, TPS_InFile, 0, NULL},
+	{p_iseqC, '_', A_NEXT, TPS_InFile, 0, NULL},
+	{p_iseqC, '/', A_NEXT, TPS_InFileFirst, 0, NULL},
+	{NULL, 0, A_POP, TPS_Null, 0, NULL}
+};
+
+static const TParserStateActionItem actionTPS_InPathFirst[] = {
+	{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
+	{p_isasclet, 0, A_NEXT, TPS_InFile, 0, NULL},
+	{p_isdigit, 0, A_NEXT, TPS_InFile, 0, NULL},
+	{p_iseqC, '_', A_NEXT, TPS_InFile, 0, NULL},
+	{p_iseqC, '.', A_NEXT, TPS_InPathSecond, 0, NULL},
+	{p_iseqC, '/', A_NEXT, TPS_InFileFirst, 0, NULL},
+	{NULL, 0, A_POP, TPS_Null, 0, NULL}
+};
+
+static const TParserStateActionItem actionTPS_InPathFirstFirst[] = {
+	{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
+	{p_iseqC, '.', A_NEXT, TPS_InPathSecond, 0, NULL},
+	{p_iseqC, '/', A_NEXT, TPS_InFileFirst, 0, NULL},
+	{NULL, 0, A_POP, TPS_Null, 0, NULL}
+};
+
+static const TParserStateActionItem actionTPS_InPathSecond[] = {
+	{p_isEOF, 0, A_BINGO | A_CLEAR, TPS_Base, FILEPATH, NULL},
+	{p_iseqC, '/', A_NEXT | A_PUSH, TPS_InFileFirst, 0, NULL},
+	{p_iseqC, '/', A_BINGO | A_CLEAR, TPS_Base, FILEPATH, NULL},
+	{p_isspace, 0, A_BINGO | A_CLEAR, TPS_Base, FILEPATH, NULL},
+	{NULL, 0, A_POP, TPS_Null, 0, NULL}
+};
+
+static const TParserStateActionItem actionTPS_InFile[] = {
+	{p_isEOF, 0, A_BINGO, TPS_Base, FILEPATH, NULL},
+	{p_isasclet, 0, A_NEXT, TPS_InFile, 0, NULL},
+	{p_isdigit, 0, A_NEXT, TPS_InFile, 0, NULL},
+	{p_iseqC, '.', A_PUSH, TPS_InFileNext, 0, NULL},
+	{p_iseqC, '_', A_NEXT, TPS_InFile, 0, NULL},
+	{p_iseqC, '-', A_NEXT, TPS_InFile, 0, NULL},
+	{p_iseqC, '/', A_PUSH, TPS_InFileFirst, 0, NULL},
+	{NULL, 0, A_BINGO, TPS_Base, FILEPATH, NULL}
+};
+
+static const TParserStateActionItem actionTPS_InFileNext[] = {
+	{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
+	{p_isasclet, 0, A_CLEAR, TPS_InFile, 0, NULL},
+	{p_isdigit, 0, A_CLEAR, TPS_InFile, 0, NULL},
+	{p_iseqC, '_', A_CLEAR, TPS_InFile, 0, NULL},
+	{NULL, 0, A_POP, TPS_Null, 0, NULL}
+};
+
+static const TParserStateActionItem actionTPS_InURLPathFirst[] = {
+	{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
+	{p_isurlchar, 0, A_NEXT, TPS_InURLPath, 0, NULL},
+	{NULL, 0, A_POP, TPS_Null, 0, NULL},
+};
+
+static const TParserStateActionItem actionTPS_InURLPathStart[] = {
+	{NULL, 0, A_NEXT, TPS_InURLPath, 0, NULL}
+};
+
+static const TParserStateActionItem actionTPS_InURLPath[] = {
+	{p_isEOF, 0, A_BINGO, TPS_Base, URLPATH, NULL},
+	{p_isurlchar, 0, A_NEXT, TPS_InURLPath, 0, NULL},
+	{NULL, 0, A_BINGO, TPS_Base, URLPATH, NULL}
+};
+
+static const TParserStateActionItem actionTPS_InFURL[] = {
+	{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
+	{p_isURLPath, 0, A_BINGO | A_CLRALL, TPS_Base, URL_T, SpecialFURL},
+	{NULL, 0, A_POP, TPS_Null, 0, NULL}
+};
+
+static const TParserStateActionItem actionTPS_InProtocolFirst[] = {
+	{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
+	{p_iseqC, '/', A_NEXT, TPS_InProtocolSecond, 0, NULL},
+	{NULL, 0, A_POP, TPS_Null, 0, NULL}
+};
+
+static const TParserStateActionItem actionTPS_InProtocolSecond[] = {
+	{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
+	{p_iseqC, '/', A_NEXT, TPS_InProtocolEnd, 0, NULL},
+	{NULL, 0, A_POP, TPS_Null, 0, NULL}
+};
+
+static const TParserStateActionItem actionTPS_InProtocolEnd[] = {
+	{NULL, 0, A_BINGO | A_CLRALL, TPS_Base, PROTOCOL, NULL}
+};
+
+static const TParserStateActionItem actionTPS_InHyphenAsciiWordFirst[] = {
+	{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
+	{p_isasclet, 0, A_NEXT, TPS_InHyphenAsciiWord, 0, NULL},
+	{p_isalpha, 0, A_NEXT, TPS_InHyphenWord, 0, NULL},
+	{p_isdigit, 0, A_NEXT, TPS_InHyphenDigitLookahead, 0, NULL},
+	{NULL, 0, A_POP, TPS_Null, 0, NULL}
+};
+
+static const TParserStateActionItem actionTPS_InHyphenAsciiWord[] = {
+	{p_isEOF, 0, A_BINGO | A_CLRALL, TPS_InParseHyphen, ASCIIHWORD, SpecialHyphen},
+	{p_isasclet, 0, A_NEXT, TPS_InHyphenAsciiWord, 0, NULL},
+	{p_isalpha, 0, A_NEXT, TPS_InHyphenWord, 0, NULL},
+	{p_isspecial, 0, A_NEXT, TPS_InHyphenWord, 0, NULL},
+	{p_isdigit, 0, A_NEXT, TPS_InHyphenNumWord, 0, NULL},
+	{p_iseqC, '-', A_PUSH, TPS_InHyphenAsciiWordFirst, 0, NULL},
+	{NULL, 0, A_BINGO | A_CLRALL, TPS_InParseHyphen, ASCIIHWORD, SpecialHyphen}
+};
+
+static const TParserStateActionItem actionTPS_InHyphenWordFirst[] = {
+	{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
+	{p_isalpha, 0, A_NEXT, TPS_InHyphenWord, 0, NULL},
+	{p_isdigit, 0, A_NEXT, TPS_InHyphenDigitLookahead, 0, NULL},
+	{NULL, 0, A_POP, TPS_Null, 0, NULL}
+};
+
+static const TParserStateActionItem actionTPS_InHyphenWord[] = {
+	{p_isEOF, 0, A_BINGO | A_CLRALL, TPS_InParseHyphen, HWORD, SpecialHyphen},
+	{p_isalpha, 0, A_NEXT, TPS_InHyphenWord, 0, NULL},
+	{p_isspecial, 0, A_NEXT, TPS_InHyphenWord, 0, NULL},
+	{p_isdigit, 0, A_NEXT, TPS_InHyphenNumWord, 0, NULL},
+	{p_iseqC, '-', A_PUSH, TPS_InHyphenWordFirst, 0, NULL},
+	{NULL, 0, A_BINGO | A_CLRALL, TPS_InParseHyphen, HWORD, SpecialHyphen}
+};
+
+static const TParserStateActionItem actionTPS_InHyphenNumWordFirst[] = {
+	{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
+	{p_isalpha, 0, A_NEXT, TPS_InHyphenNumWord, 0, NULL},
+	{p_isdigit, 0, A_NEXT, TPS_InHyphenDigitLookahead, 0, NULL},
+	{NULL, 0, A_POP, TPS_Null, 0, NULL}
+};
+
+static const TParserStateActionItem actionTPS_InHyphenNumWord[] = {
+	{p_isEOF, 0, A_BINGO | A_CLRALL, TPS_InParseHyphen, NUMHWORD, SpecialHyphen},
+	{p_isalnum, 0, A_NEXT, TPS_InHyphenNumWord, 0, NULL},
+	{p_isspecial, 0, A_NEXT, TPS_InHyphenNumWord, 0, NULL},
+	{p_iseqC, '-', A_PUSH, TPS_InHyphenNumWordFirst, 0, NULL},
+	{NULL, 0, A_BINGO | A_CLRALL, TPS_InParseHyphen, NUMHWORD, SpecialHyphen}
+};
+
+static const TParserStateActionItem actionTPS_InHyphenDigitLookahead[] = {
+	{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
+	{p_isdigit, 0, A_NEXT, TPS_InHyphenDigitLookahead, 0, NULL},
+	{p_isalpha, 0, A_NEXT, TPS_InHyphenNumWord, 0, NULL},
+	{p_isspecial, 0, A_NEXT, TPS_InHyphenNumWord, 0, NULL},
+	{NULL, 0, A_POP, TPS_Null, 0, NULL}
+};
+
+static const TParserStateActionItem actionTPS_InParseHyphen[] = {
+	{p_isEOF, 0, A_RERUN, TPS_Base, 0, NULL},
+	{p_isasclet, 0, A_NEXT, TPS_InHyphenAsciiWordPart, 0, NULL},
+	{p_isalpha, 0, A_NEXT, TPS_InHyphenWordPart, 0, NULL},
+	{p_isdigit, 0, A_PUSH, TPS_InHyphenUnsignedInt, 0, NULL},
+	{p_iseqC, '-', A_PUSH, TPS_InParseHyphenHyphen, 0, NULL},
+	{NULL, 0, A_RERUN, TPS_Base, 0, NULL}
+};
+
+static const TParserStateActionItem actionTPS_InParseHyphenHyphen[] = {
+	{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
+	{p_isalnum, 0, A_BINGO | A_CLEAR, TPS_InParseHyphen, SPACE, NULL},
+	{p_isspecial, 0, A_BINGO | A_CLEAR, TPS_InParseHyphen, SPACE, NULL},
+	{NULL, 0, A_POP, TPS_Null, 0, NULL}
+};
+
+static const TParserStateActionItem actionTPS_InHyphenWordPart[] = {
+	{p_isEOF, 0, A_BINGO, TPS_Base, PARTHWORD, NULL},
+	{p_isalpha, 0, A_NEXT, TPS_InHyphenWordPart, 0, NULL},
+	{p_isspecial, 0, A_NEXT, TPS_InHyphenWordPart, 0, NULL},
+	{p_isdigit, 0, A_NEXT, TPS_InHyphenNumWordPart, 0, NULL},
+	{NULL, 0, A_BINGO, TPS_InParseHyphen, PARTHWORD, NULL}
+};
+
+static const TParserStateActionItem actionTPS_InHyphenAsciiWordPart[] = {
+	{p_isEOF, 0, A_BINGO, TPS_Base, ASCIIPARTHWORD, NULL},
+	{p_isasclet, 0, A_NEXT, TPS_InHyphenAsciiWordPart, 0, NULL},
+	{p_isalpha, 0, A_NEXT, TPS_InHyphenWordPart, 0, NULL},
+	{p_isspecial, 0, A_NEXT, TPS_InHyphenWordPart, 0, NULL},
+	{p_isdigit, 0, A_NEXT, TPS_InHyphenNumWordPart, 0, NULL},
+	{NULL, 0, A_BINGO, TPS_InParseHyphen, ASCIIPARTHWORD, NULL}
+};
+
+static const TParserStateActionItem actionTPS_InHyphenNumWordPart[] = {
+	{p_isEOF, 0, A_BINGO, TPS_Base, NUMPARTHWORD, NULL},
+	{p_isalnum, 0, A_NEXT, TPS_InHyphenNumWordPart, 0, NULL},
+	{p_isspecial, 0, A_NEXT, TPS_InHyphenNumWordPart, 0, NULL},
+	{NULL, 0, A_BINGO, TPS_InParseHyphen, NUMPARTHWORD, NULL}
+};
+
+static const TParserStateActionItem actionTPS_InHyphenUnsignedInt[] = {
+	{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
+	{p_isdigit, 0, A_NEXT, TPS_Null, 0, NULL},
+	{p_isalpha, 0, A_CLEAR, TPS_InHyphenNumWordPart, 0, NULL},
+	{p_isspecial, 0, A_CLEAR, TPS_InHyphenNumWordPart, 0, NULL},
+	{NULL, 0, A_POP, TPS_Null, 0, NULL}
+};
+
+
+/*
+ * main table of per-state parser actions
+ */
+typedef struct
+{
+	const TParserStateActionItem *action;	/* the actual state info */
+	TParserState state;			/* only for Assert crosscheck */
+#ifdef WPARSER_TRACE
+	const char *state_name;		/* only for debug printout */
+#endif
+} TParserStateAction;
+
+#ifdef WPARSER_TRACE
+#define TPARSERSTATEACTION(state) \
+	{ CppConcat(action,state), state, CppAsString(state) }
+#else
+#define TPARSERSTATEACTION(state) \
+	{ CppConcat(action,state), state }
+#endif
+
+/*
+ * order must be the same as in typedef enum {} TParserState!!
+ */
+
+static const TParserStateAction Actions[] = {
+	TPARSERSTATEACTION(TPS_Base),
+	TPARSERSTATEACTION(TPS_InNumWord),
+	TPARSERSTATEACTION(TPS_InAsciiWord),
+	TPARSERSTATEACTION(TPS_InWord),
+	TPARSERSTATEACTION(TPS_InUnsignedInt),
+	TPARSERSTATEACTION(TPS_InSignedIntFirst),
+	TPARSERSTATEACTION(TPS_InSignedInt),
+	TPARSERSTATEACTION(TPS_InSpace),
+	TPARSERSTATEACTION(TPS_InUDecimalFirst),
+	TPARSERSTATEACTION(TPS_InUDecimal),
+	TPARSERSTATEACTION(TPS_InDecimalFirst),
+	TPARSERSTATEACTION(TPS_InDecimal),
+	TPARSERSTATEACTION(TPS_InVerVersion),
+	TPARSERSTATEACTION(TPS_InSVerVersion),
+	TPARSERSTATEACTION(TPS_InVersionFirst),
+	TPARSERSTATEACTION(TPS_InVersion),
+	TPARSERSTATEACTION(TPS_InMantissaFirst),
+	TPARSERSTATEACTION(TPS_InMantissaSign),
+	TPARSERSTATEACTION(TPS_InMantissa),
+	TPARSERSTATEACTION(TPS_InXMLEntityFirst),
+	TPARSERSTATEACTION(TPS_InXMLEntity),
+	TPARSERSTATEACTION(TPS_InXMLEntityNumFirst),
+	TPARSERSTATEACTION(TPS_InXMLEntityNum),
+	TPARSERSTATEACTION(TPS_InXMLEntityHexNumFirst),
+	TPARSERSTATEACTION(TPS_InXMLEntityHexNum),
+	TPARSERSTATEACTION(TPS_InXMLEntityEnd),
+	TPARSERSTATEACTION(TPS_InTagFirst),
+	TPARSERSTATEACTION(TPS_InXMLBegin),
+	TPARSERSTATEACTION(TPS_InTagCloseFirst),
+	TPARSERSTATEACTION(TPS_InTagName),
+	TPARSERSTATEACTION(TPS_InTagBeginEnd),
+	TPARSERSTATEACTION(TPS_InTag),
+	TPARSERSTATEACTION(TPS_InTagEscapeK),
+	TPARSERSTATEACTION(TPS_InTagEscapeKK),
+	TPARSERSTATEACTION(TPS_InTagBackSleshed),
+	TPARSERSTATEACTION(TPS_InTagEnd),
+	TPARSERSTATEACTION(TPS_InCommentFirst),
+	TPARSERSTATEACTION(TPS_InCommentLast),
+	TPARSERSTATEACTION(TPS_InComment),
+	TPARSERSTATEACTION(TPS_InCloseCommentFirst),
+	TPARSERSTATEACTION(TPS_InCloseCommentLast),
+	TPARSERSTATEACTION(TPS_InCommentEnd),
+	TPARSERSTATEACTION(TPS_InHostFirstDomain),
+	TPARSERSTATEACTION(TPS_InHostDomainSecond),
+	TPARSERSTATEACTION(TPS_InHostDomain),
+	TPARSERSTATEACTION(TPS_InPortFirst),
+	TPARSERSTATEACTION(TPS_InPort),
+	TPARSERSTATEACTION(TPS_InHostFirstAN),
+	TPARSERSTATEACTION(TPS_InHost),
+	TPARSERSTATEACTION(TPS_InEmail),
+	TPARSERSTATEACTION(TPS_InFileFirst),
+	TPARSERSTATEACTION(TPS_InFileTwiddle),
+	TPARSERSTATEACTION(TPS_InPathFirst),
+	TPARSERSTATEACTION(TPS_InPathFirstFirst),
+	TPARSERSTATEACTION(TPS_InPathSecond),
+	TPARSERSTATEACTION(TPS_InFile),
+	TPARSERSTATEACTION(TPS_InFileNext),
+	TPARSERSTATEACTION(TPS_InURLPathFirst),
+	TPARSERSTATEACTION(TPS_InURLPathStart),
+	TPARSERSTATEACTION(TPS_InURLPath),
+	TPARSERSTATEACTION(TPS_InFURL),
+	TPARSERSTATEACTION(TPS_InProtocolFirst),
+	TPARSERSTATEACTION(TPS_InProtocolSecond),
+	TPARSERSTATEACTION(TPS_InProtocolEnd),
+	TPARSERSTATEACTION(TPS_InHyphenAsciiWordFirst),
+	TPARSERSTATEACTION(TPS_InHyphenAsciiWord),
+	TPARSERSTATEACTION(TPS_InHyphenWordFirst),
+	TPARSERSTATEACTION(TPS_InHyphenWord),
+	TPARSERSTATEACTION(TPS_InHyphenNumWordFirst),
+	TPARSERSTATEACTION(TPS_InHyphenNumWord),
+	TPARSERSTATEACTION(TPS_InHyphenDigitLookahead),
+	TPARSERSTATEACTION(TPS_InParseHyphen),
+	TPARSERSTATEACTION(TPS_InParseHyphenHyphen),
+	TPARSERSTATEACTION(TPS_InHyphenWordPart),
+	TPARSERSTATEACTION(TPS_InHyphenAsciiWordPart),
+	TPARSERSTATEACTION(TPS_InHyphenNumWordPart),
+	TPARSERSTATEACTION(TPS_InHyphenUnsignedInt)
+};
+
+
+static bool
+TParserGet(TParser *prs)
+{
+	const TParserStateActionItem *item = NULL;
+
+	CHECK_FOR_INTERRUPTS();
+
+	Assert(prs->state);
+
+	if (prs->state->posbyte >= prs->lenstr)
+		return false;
+
+	prs->token = prs->str + prs->state->posbyte;
+	prs->state->pushedAtAction = NULL;
+
+	/* look at string */
+	while (prs->state->posbyte <= prs->lenstr)
+	{
+		if (prs->state->posbyte == prs->lenstr)
+			prs->state->charlen = 0;
+		else
+			prs->state->charlen = (prs->charmaxlen == 1) ? prs->charmaxlen :
+				pg_mblen(prs->str + prs->state->posbyte);
+
+		Assert(prs->state->posbyte + prs->state->charlen <= prs->lenstr);
+		Assert(prs->state->state >= TPS_Base && prs->state->state < TPS_Null);
+		Assert(Actions[prs->state->state].state == prs->state->state);
+
+		if (prs->state->pushedAtAction)
+		{
+			/* After a POP, pick up at the next test */
+			item = prs->state->pushedAtAction + 1;
+			prs->state->pushedAtAction = NULL;
+		}
+		else
+		{
+			item = Actions[prs->state->state].action;
+			Assert(item != NULL);
+		}
+
+		/* find action by character class */
+		while (item->isclass)
+		{
+			prs->c = item->c;
+			if (item->isclass(prs) != 0)
+				break;
+			item++;
+		}
+
+#ifdef WPARSER_TRACE
+		{
+			TParserPosition *ptr;
+
+			fprintf(stderr, "state ");
+			/* indent according to stack depth */
+			for (ptr = prs->state->prev; ptr; ptr = ptr->prev)
+				fprintf(stderr, "  ");
+			fprintf(stderr, "%s ", Actions[prs->state->state].state_name);
+			if (prs->state->posbyte < prs->lenstr)
+				fprintf(stderr, "at %c", *(prs->str + prs->state->posbyte));
+			else
+				fprintf(stderr, "at EOF");
+			fprintf(stderr, " matched rule %d flags%s%s%s%s%s%s%s%s%s%s%s\n",
+					(int) (item - Actions[prs->state->state].action),
+					(item->flags & A_BINGO) ? " BINGO" : "",
+					(item->flags & A_POP) ? " POP" : "",
+					(item->flags & A_PUSH) ? " PUSH" : "",
+					(item->flags & A_RERUN) ? " RERUN" : "",
+					(item->flags & A_CLEAR) ? " CLEAR" : "",
+					(item->flags & A_MERGE) ? " MERGE" : "",
+					(item->flags & A_CLRALL) ? " CLRALL" : "",
+					(item->tostate != TPS_Null) ? " tostate " : "",
+					(item->tostate != TPS_Null) ? Actions[item->tostate].state_name : "",
+					(item->type > 0) ? " type " : "",
+					tok_alias[item->type]);
+		}
+#endif
+
+		/* call special handler if exists */
+		if (item->special)
+			item->special(prs);
+
+		/* BINGO, token is found */
+		if (item->flags & A_BINGO)
+		{
+			Assert(item->type > 0);
+			prs->lenbytetoken = prs->state->lenbytetoken;
+			prs->lenchartoken = prs->state->lenchartoken;
+			prs->state->lenbytetoken = prs->state->lenchartoken = 0;
+			prs->type = item->type;
+		}
+
+		/* do various actions by flags */
+		if (item->flags & A_POP)
+		{						/* pop stored state in stack */
+			TParserPosition *ptr = prs->state->prev;
+
+			pfree(prs->state);
+			prs->state = ptr;
+			Assert(prs->state);
+		}
+		else if (item->flags & A_PUSH)
+		{						/* push (store) state in stack */
+			prs->state->pushedAtAction = item;	/* remember where we push */
+			prs->state = newTParserPosition(prs->state);
+		}
+		else if (item->flags & A_CLEAR)
+		{						/* clear previous pushed state */
+			TParserPosition *ptr;
+
+			Assert(prs->state->prev);
+			ptr = prs->state->prev->prev;
+			pfree(prs->state->prev);
+			prs->state->prev = ptr;
+		}
+		else if (item->flags & A_CLRALL)
+		{						/* clear all previous pushed state */
+			TParserPosition *ptr;
+
+			while (prs->state->prev)
+			{
+				ptr = prs->state->prev->prev;
+				pfree(prs->state->prev);
+				prs->state->prev = ptr;
+			}
+		}
+		else if (item->flags & A_MERGE)
+		{						/* merge posinfo with current and pushed state */
+			TParserPosition *ptr = prs->state;
+
+			Assert(prs->state->prev);
+			prs->state = prs->state->prev;
+
+			prs->state->posbyte = ptr->posbyte;
+			prs->state->poschar = ptr->poschar;
+			prs->state->charlen = ptr->charlen;
+			prs->state->lenbytetoken = ptr->lenbytetoken;
+			prs->state->lenchartoken = ptr->lenchartoken;
+			pfree(ptr);
+		}
+
+		/* set new state if pointed */
+		if (item->tostate != TPS_Null)
+			prs->state->state = item->tostate;
+
+		/* check for go away */
+		if ((item->flags & A_BINGO) ||
+			(prs->state->posbyte >= prs->lenstr &&
+			 (item->flags & A_RERUN) == 0))
+			break;
+
+		/* go to beginning of loop if we should rerun or we just restore state */
+		if (item->flags & (A_RERUN | A_POP))
+			continue;
+
+		/* move forward */
+		if (prs->state->charlen)
+		{
+			prs->state->posbyte += prs->state->charlen;
+			prs->state->lenbytetoken += prs->state->charlen;
+			prs->state->poschar++;
+			prs->state->lenchartoken++;
+		}
+	}
+
+	return (item && (item->flags & A_BINGO));
+}
+
+Datum
+prsd_lextype(PG_FUNCTION_ARGS)
+{
+	LexDescr   *descr = (LexDescr *) palloc(sizeof(LexDescr) * (LASTNUM + 1));
+	int			i;
+
+	for (i = 1; i <= LASTNUM; i++)
+	{
+		descr[i - 1].lexid = i;
+		descr[i - 1].alias = pstrdup(tok_alias[i]);
+		descr[i - 1].descr = pstrdup(lex_descr[i]);
+	}
+
+	descr[LASTNUM].lexid = 0;
+
+	PG_RETURN_POINTER(descr);
+}
+
+Datum
+prsd_start(PG_FUNCTION_ARGS)
+{
+	PG_RETURN_POINTER(TParserInit((char *) PG_GETARG_POINTER(0), PG_GETARG_INT32(1)));
+}
+
+Datum
+prsd_nexttoken(PG_FUNCTION_ARGS)
+{
+	TParser    *p = (TParser *) PG_GETARG_POINTER(0);
+	char	  **t = (char **) PG_GETARG_POINTER(1);
+	int		   *tlen = (int *) PG_GETARG_POINTER(2);
+
+	if (!TParserGet(p))
+		PG_RETURN_INT32(0);
+
+	*t = p->token;
+	*tlen = p->lenbytetoken;
+
+	PG_RETURN_INT32(p->type);
+}
+
+Datum
+prsd_end(PG_FUNCTION_ARGS)
+{
+	TParser    *p = (TParser *) PG_GETARG_POINTER(0);
+
+	TParserClose(p);
+	PG_RETURN_VOID();
+}
+
+
+/*
+ * ts_headline support begins here
+ */
+
+/* token type classification macros */
+#define TS_IDIGNORE(x)	( (x)==TAG_T || (x)==PROTOCOL || (x)==SPACE || (x)==XMLENTITY )
+#define HLIDREPLACE(x)	( (x)==TAG_T )
+#define HLIDSKIP(x)		( (x)==URL_T || (x)==NUMHWORD || (x)==ASCIIHWORD || (x)==HWORD )
+#define XMLHLIDSKIP(x)	( (x)==URL_T || (x)==NUMHWORD || (x)==ASCIIHWORD || (x)==HWORD )
+#define NONWORDTOKEN(x) ( (x)==SPACE || HLIDREPLACE(x) || HLIDSKIP(x) )
+#define NOENDTOKEN(x)	( NONWORDTOKEN(x) || (x)==SCIENTIFIC || (x)==VERSIONNUMBER || (x)==DECIMAL_T || (x)==SIGNEDINT || (x)==UNSIGNEDINT || TS_IDIGNORE(x) )
+
+/*
+ * Macros useful in headline selection.  These rely on availability of
+ * "HeadlineParsedText *prs" describing some text, and "int shortword"
+ * describing the "short word" length parameter.
+ */
+
+/* Interesting words are non-repeated search terms */
+#define INTERESTINGWORD(j) \
+	(prs->words[j].item && !prs->words[j].repeated)
+
+/* Don't want to end at a non-word or a short word, unless interesting */
+#define BADENDPOINT(j) \
+	((NOENDTOKEN(prs->words[j].type) || prs->words[j].len <= shortword) && \
+	 !INTERESTINGWORD(j))
+
+typedef struct
+{
+	/* one cover (well, really one fragment) for mark_hl_fragments */
+	int32		startpos;		/* fragment's starting word index */
+	int32		endpos;			/* ending word index (inclusive) */
+	int32		poslen;			/* number of interesting words */
+	int32		curlen;			/* total number of words */
+	bool		chosen;			/* chosen? */
+	bool		excluded;		/* excluded? */
+} CoverPos;
+
+typedef struct
+{
+	/* callback data for checkcondition_HL */
+	HeadlineWordEntry *words;
+	int			len;
+} hlCheck;
+
+
+/*
+ * TS_execute callback for matching a tsquery operand to headline words
+ */
+static TSTernaryValue
+checkcondition_HL(void *opaque, QueryOperand *val, ExecPhraseData *data)
+{
+	hlCheck    *checkval = (hlCheck *) opaque;
+	int			i;
+
+	/* scan words array for marching items */
+	for (i = 0; i < checkval->len; i++)
+	{
+		if (checkval->words[i].item == val)
+		{
+			/* if data == NULL, don't need to report positions */
+			if (!data)
+				return TS_YES;
+
+			if (!data->pos)
+			{
+				data->pos = palloc(sizeof(WordEntryPos) * checkval->len);
+				data->allocated = true;
+				data->npos = 1;
+				data->pos[0] = checkval->words[i].pos;
+			}
+			else if (data->pos[data->npos - 1] < checkval->words[i].pos)
+			{
+				data->pos[data->npos++] = checkval->words[i].pos;
+			}
+		}
+	}
+
+	if (data && data->npos > 0)
+		return TS_YES;
+
+	return TS_NO;
+}
+
+/*
+ * hlFirstIndex: find first index >= pos containing any word used in query
+ *
+ * Returns -1 if no such index
+ */
+static int
+hlFirstIndex(HeadlineParsedText *prs, int pos)
+{
+	int			i;
+
+	for (i = pos; i < prs->curwords; i++)
+	{
+		if (prs->words[i].item != NULL)
+			return i;
+	}
+	return -1;
+}
+
+/*
+ * hlCover: try to find a substring of prs' word list that satisfies query
+ *
+ * At entry, *p must be the first word index to consider (initialize this
+ * to zero, or to the next index after a previous successful search).
+ * We will consider all substrings starting at or after that word, and
+ * containing no more than max_cover words.  (We need a length limit to
+ * keep this from taking O(N^2) time for a long document with many query
+ * words but few complete matches.  Actually, since checkcondition_HL is
+ * roughly O(N) in the length of the substring being checked, it's even
+ * worse than that.)
+ *
+ * On success, sets *p to first word index and *q to last word index of the
+ * cover substring, and returns true.
+ *
+ * The result is a minimal cover, in the sense that both *p and *q will be
+ * words used in the query.
+ */
+static bool
+hlCover(HeadlineParsedText *prs, TSQuery query, int max_cover,
+		int *p, int *q)
+{
+	int			pmin,
+				pmax,
+				nextpmin,
+				nextpmax;
+	hlCheck		ch;
+
+	if (query->size <= 0)
+		return false;			/* empty query matches nothing */
+
+	/*
+	 * We look for the earliest, shortest substring of prs->words that
+	 * satisfies the query.  Both the pmin and pmax indices must be words
+	 * appearing in the query; there's no point in trying endpoints in between
+	 * such points.
+	 */
+	pmin = hlFirstIndex(prs, *p);
+	while (pmin >= 0)
+	{
+		/* This useless assignment just keeps stupider compilers quiet */
+		nextpmin = -1;
+		/* Consider substrings starting at pmin */
+		ch.words = &(prs->words[pmin]);
+		/* Consider the length-one substring first, then longer substrings */
+		pmax = pmin;
+		do
+		{
+			/* Try to match query against pmin .. pmax substring */
+			ch.len = pmax - pmin + 1;
+			if (TS_execute(GETQUERY(query), &ch,
+						   TS_EXEC_EMPTY, checkcondition_HL))
+			{
+				*p = pmin;
+				*q = pmax;
+				return true;
+			}
+			/* Nope, so advance pmax to next feasible endpoint */
+			nextpmax = hlFirstIndex(prs, pmax + 1);
+
+			/*
+			 * If this is our first advance past pmin, then the result is also
+			 * the next feasible value of pmin; remember it to save a
+			 * redundant search.
+			 */
+			if (pmax == pmin)
+				nextpmin = nextpmax;
+			pmax = nextpmax;
+		}
+		while (pmax >= 0 && pmax - pmin < max_cover);
+		/* No luck here, so try next feasible startpoint */
+		pmin = nextpmin;
+	}
+	return false;
+}
+
+/*
+ * Apply suitable highlight marking to words selected by headline selector
+ *
+ * The words from startpos to endpos inclusive are marked per highlightall
+ */
+static void
+mark_fragment(HeadlineParsedText *prs, bool highlightall,
+			  int startpos, int endpos)
+{
+	int			i;
+
+	for (i = startpos; i <= endpos; i++)
+	{
+		if (prs->words[i].item)
+			prs->words[i].selected = 1;
+		if (!highlightall)
+		{
+			if (HLIDREPLACE(prs->words[i].type))
+				prs->words[i].replace = 1;
+			else if (HLIDSKIP(prs->words[i].type))
+				prs->words[i].skip = 1;
+		}
+		else
+		{
+			if (XMLHLIDSKIP(prs->words[i].type))
+				prs->words[i].skip = 1;
+		}
+
+		prs->words[i].in = (prs->words[i].repeated) ? 0 : 1;
+	}
+}
+
+/*
+ * split a cover substring into fragments not longer than max_words
+ *
+ * At entry, *startpos and *endpos are the (remaining) bounds of the cover
+ * substring.  They are updated to hold the bounds of the next fragment.
+ *
+ * *curlen and *poslen are set to the fragment's length, in words and
+ * interesting words respectively.
+ */
+static void
+get_next_fragment(HeadlineParsedText *prs, int *startpos, int *endpos,
+				  int *curlen, int *poslen, int max_words)
+{
+	int			i;
+
+	/*
+	 * Objective: select a fragment of words between startpos and endpos such
+	 * that it has at most max_words and both ends have query words. If the
+	 * startpos and endpos are the endpoints of the cover and the cover has
+	 * fewer words than max_words, then this function should just return the
+	 * cover
+	 */
+	/* first move startpos to an item */
+	for (i = *startpos; i <= *endpos; i++)
+	{
+		*startpos = i;
+		if (INTERESTINGWORD(i))
+			break;
+	}
+	/* cut endpos to have only max_words */
+	*curlen = 0;
+	*poslen = 0;
+	for (i = *startpos; i <= *endpos && *curlen < max_words; i++)
+	{
+		if (!NONWORDTOKEN(prs->words[i].type))
+			*curlen += 1;
+		if (INTERESTINGWORD(i))
+			*poslen += 1;
+	}
+	/* if the cover was cut then move back endpos to a query item */
+	if (*endpos > i)
+	{
+		*endpos = i;
+		for (i = *endpos; i >= *startpos; i--)
+		{
+			*endpos = i;
+			if (INTERESTINGWORD(i))
+				break;
+			if (!NONWORDTOKEN(prs->words[i].type))
+				*curlen -= 1;
+		}
+	}
+}
+
+/*
+ * Headline selector used when MaxFragments > 0
+ *
+ * Note: in this mode, highlightall is disregarded for phrase selection;
+ * it only controls presentation details.
+ */
+static void
+mark_hl_fragments(HeadlineParsedText *prs, TSQuery query, bool highlightall,
+				  int shortword, int min_words,
+				  int max_words, int max_fragments, int max_cover)
+{
+	int32		poslen,
+				curlen,
+				i,
+				f,
+				num_f = 0;
+	int32		stretch,
+				maxstretch,
+				posmarker;
+
+	int32		startpos = 0,
+				endpos = 0,
+				p = 0,
+				q = 0;
+
+	int32		numcovers = 0,
+				maxcovers = 32;
+
+	int32		minI,
+				minwords,
+				maxitems;
+	CoverPos   *covers;
+
+	covers = palloc(maxcovers * sizeof(CoverPos));
+
+	/* get all covers */
+	while (hlCover(prs, query, max_cover, &p, &q))
+	{
+		startpos = p;
+		endpos = q;
+
+		/*
+		 * Break the cover into smaller fragments such that each fragment has
+		 * at most max_words. Also ensure that each end of each fragment is a
+		 * query word. This will allow us to stretch the fragment in either
+		 * direction
+		 */
+
+		while (startpos <= endpos)
+		{
+			get_next_fragment(prs, &startpos, &endpos, &curlen, &poslen, max_words);
+			if (numcovers >= maxcovers)
+			{
+				maxcovers *= 2;
+				covers = repalloc(covers, sizeof(CoverPos) * maxcovers);
+			}
+			covers[numcovers].startpos = startpos;
+			covers[numcovers].endpos = endpos;
+			covers[numcovers].curlen = curlen;
+			covers[numcovers].poslen = poslen;
+			covers[numcovers].chosen = false;
+			covers[numcovers].excluded = false;
+			numcovers++;
+			startpos = endpos + 1;
+			endpos = q;
+		}
+
+		/* move p to generate the next cover */
+		p++;
+	}
+
+	/* choose best covers */
+	for (f = 0; f < max_fragments; f++)
+	{
+		maxitems = 0;
+		minwords = PG_INT32_MAX;
+		minI = -1;
+
+		/*
+		 * Choose the cover that contains max items. In case of tie choose the
+		 * one with smaller number of words.
+		 */
+		for (i = 0; i < numcovers; i++)
+		{
+			if (!covers[i].chosen && !covers[i].excluded &&
+				(maxitems < covers[i].poslen ||
+				 (maxitems == covers[i].poslen &&
+				  minwords > covers[i].curlen)))
+			{
+				maxitems = covers[i].poslen;
+				minwords = covers[i].curlen;
+				minI = i;
+			}
+		}
+		/* if a cover was found mark it */
+		if (minI >= 0)
+		{
+			covers[minI].chosen = true;
+			/* adjust the size of cover */
+			startpos = covers[minI].startpos;
+			endpos = covers[minI].endpos;
+			curlen = covers[minI].curlen;
+			/* stretch the cover if cover size is lower than max_words */
+			if (curlen < max_words)
+			{
+				/* divide the stretch on both sides of cover */
+				maxstretch = (max_words - curlen) / 2;
+
+				/*
+				 * first stretch the startpos stop stretching if 1. we hit the
+				 * beginning of document 2. exceed maxstretch 3. we hit an
+				 * already marked fragment
+				 */
+				stretch = 0;
+				posmarker = startpos;
+				for (i = startpos - 1; i >= 0 && stretch < maxstretch && !prs->words[i].in; i--)
+				{
+					if (!NONWORDTOKEN(prs->words[i].type))
+					{
+						curlen++;
+						stretch++;
+					}
+					posmarker = i;
+				}
+				/* cut back startpos till we find a good endpoint */
+				for (i = posmarker; i < startpos && BADENDPOINT(i); i++)
+				{
+					if (!NONWORDTOKEN(prs->words[i].type))
+						curlen--;
+				}
+				startpos = i;
+				/* now stretch the endpos as much as possible */
+				posmarker = endpos;
+				for (i = endpos + 1; i < prs->curwords && curlen < max_words && !prs->words[i].in; i++)
+				{
+					if (!NONWORDTOKEN(prs->words[i].type))
+						curlen++;
+					posmarker = i;
+				}
+				/* cut back endpos till we find a good endpoint */
+				for (i = posmarker; i > endpos && BADENDPOINT(i); i--)
+				{
+					if (!NONWORDTOKEN(prs->words[i].type))
+						curlen--;
+				}
+				endpos = i;
+			}
+			covers[minI].startpos = startpos;
+			covers[minI].endpos = endpos;
+			covers[minI].curlen = curlen;
+			/* Mark the chosen fragments (covers) */
+			mark_fragment(prs, highlightall, startpos, endpos);
+			num_f++;
+			/* Exclude covers overlapping this one from future consideration */
+			for (i = 0; i < numcovers; i++)
+			{
+				if (i != minI &&
+					((covers[i].startpos >= startpos &&
+					  covers[i].startpos <= endpos) ||
+					 (covers[i].endpos >= startpos &&
+					  covers[i].endpos <= endpos) ||
+					 (covers[i].startpos < startpos &&
+					  covers[i].endpos > endpos)))
+					covers[i].excluded = true;
+			}
+		}
+		else
+			break;				/* no selectable covers remain */
+	}
+
+	/* show the first min_words words if we have not marked anything */
+	if (num_f <= 0)
+	{
+		startpos = curlen = 0;
+		endpos = -1;
+		for (i = 0; i < prs->curwords && curlen < min_words; i++)
+		{
+			if (!NONWORDTOKEN(prs->words[i].type))
+				curlen++;
+			endpos = i;
+		}
+		mark_fragment(prs, highlightall, startpos, endpos);
+	}
+
+	pfree(covers);
+}
+
+/*
+ * Headline selector used when MaxFragments == 0
+ */
+static void
+mark_hl_words(HeadlineParsedText *prs, TSQuery query, bool highlightall,
+			  int shortword, int min_words, int max_words, int max_cover)
+{
+	int			p = 0,
+				q = 0;
+	int			bestb = -1,
+				beste = -1;
+	int			bestlen = -1;
+	bool		bestcover = false;
+	int			pose,
+				posb,
+				poslen,
+				curlen;
+	bool		poscover;
+	int			i;
+
+	if (!highlightall)
+	{
+		/* examine all covers, select a headline using the best one */
+		while (hlCover(prs, query, max_cover, &p, &q))
+		{
+			/*
+			 * Count words (curlen) and interesting words (poslen) within
+			 * cover, but stop once we reach max_words.  This step doesn't
+			 * consider whether that's a good stopping point.  posb and pose
+			 * are set to the start and end indexes of the possible headline.
+			 */
+			curlen = 0;
+			poslen = 0;
+			posb = pose = p;
+			for (i = p; i <= q && curlen < max_words; i++)
+			{
+				if (!NONWORDTOKEN(prs->words[i].type))
+					curlen++;
+				if (INTERESTINGWORD(i))
+					poslen++;
+				pose = i;
+			}
+
+			if (curlen < max_words)
+			{
+				/*
+				 * We have room to lengthen the headline, so search forward
+				 * until it's full or we find a good stopping point.  We'll
+				 * reconsider the word at "q", then move forward.
+				 */
+				for (i = i - 1; i < prs->curwords && curlen < max_words; i++)
+				{
+					if (i > q)
+					{
+						if (!NONWORDTOKEN(prs->words[i].type))
+							curlen++;
+						if (INTERESTINGWORD(i))
+							poslen++;
+					}
+					pose = i;
+					if (BADENDPOINT(i))
+						continue;
+					if (curlen >= min_words)
+						break;
+				}
+				if (curlen < min_words)
+				{
+					/*
+					 * Reached end of text and our headline is still shorter
+					 * than min_words, so try to extend it to the left.
+					 */
+					for (i = p - 1; i >= 0; i--)
+					{
+						if (!NONWORDTOKEN(prs->words[i].type))
+							curlen++;
+						if (INTERESTINGWORD(i))
+							poslen++;
+						if (curlen >= max_words)
+							break;
+						if (BADENDPOINT(i))
+							continue;
+						if (curlen >= min_words)
+							break;
+					}
+					posb = (i >= 0) ? i : 0;
+				}
+			}
+			else
+			{
+				/*
+				 * Can't make headline longer, so consider making it shorter
+				 * if needed to avoid a bad endpoint.
+				 */
+				if (i > q)
+					i = q;
+				for (; curlen > min_words; i--)
+				{
+					if (!BADENDPOINT(i))
+						break;
+					if (!NONWORDTOKEN(prs->words[i].type))
+						curlen--;
+					if (INTERESTINGWORD(i))
+						poslen--;
+					pose = i - 1;
+				}
+			}
+
+			/*
+			 * Check whether the proposed headline includes the original
+			 * cover; it might not if we trimmed it due to max_words.
+			 */
+			poscover = (posb <= p && pose >= q);
+
+			/*
+			 * Adopt this headline if it's better than the last one, giving
+			 * highest priority to headlines including the cover, then to
+			 * headlines with more interesting words, then to headlines with
+			 * good stopping points.  (Since bestlen is initially -1, we will
+			 * certainly adopt the first headline.)
+			 */
+			if (poscover > bestcover ||
+				(poscover == bestcover && poslen > bestlen) ||
+				(poscover == bestcover && poslen == bestlen &&
+				 !BADENDPOINT(pose) && BADENDPOINT(beste)))
+			{
+				bestb = posb;
+				beste = pose;
+				bestlen = poslen;
+				bestcover = poscover;
+			}
+
+			/* move p to generate the next cover */
+			p++;
+		}
+
+		/*
+		 * If we found nothing acceptable, select min_words words starting at
+		 * the beginning.
+		 */
+		if (bestlen < 0)
+		{
+			curlen = 0;
+			pose = -1;
+			for (i = 0; i < prs->curwords && curlen < min_words; i++)
+			{
+				if (!NONWORDTOKEN(prs->words[i].type))
+					curlen++;
+				pose = i;
+			}
+			bestb = 0;
+			beste = pose;
+		}
+	}
+	else
+	{
+		/* highlightall mode: headline is whole document */
+		bestb = 0;
+		beste = prs->curwords - 1;
+	}
+
+	mark_fragment(prs, highlightall, bestb, beste);
+}
+
+/*
+ * Default parser's prsheadline function
+ */
+Datum
+prsd_headline(PG_FUNCTION_ARGS)
+{
+	HeadlineParsedText *prs = (HeadlineParsedText *) PG_GETARG_POINTER(0);
+	List	   *prsoptions = (List *) PG_GETARG_POINTER(1);
+	TSQuery		query = PG_GETARG_TSQUERY(2);
+
+	/* default option values: */
+	int			min_words = 15;
+	int			max_words = 35;
+	int			shortword = 3;
+	int			max_fragments = 0;
+	bool		highlightall = false;
+	int			max_cover;
+	ListCell   *l;
+
+	/* Extract configuration option values */
+	prs->startsel = NULL;
+	prs->stopsel = NULL;
+	prs->fragdelim = NULL;
+	foreach(l, prsoptions)
+	{
+		DefElem    *defel = (DefElem *) lfirst(l);
+		char	   *val = defGetString(defel);
+
+		if (pg_strcasecmp(defel->defname, "MaxWords") == 0)
+			max_words = pg_strtoint32(val);
+		else if (pg_strcasecmp(defel->defname, "MinWords") == 0)
+			min_words = pg_strtoint32(val);
+		else if (pg_strcasecmp(defel->defname, "ShortWord") == 0)
+			shortword = pg_strtoint32(val);
+		else if (pg_strcasecmp(defel->defname, "MaxFragments") == 0)
+			max_fragments = pg_strtoint32(val);
+		else if (pg_strcasecmp(defel->defname, "StartSel") == 0)
+			prs->startsel = pstrdup(val);
+		else if (pg_strcasecmp(defel->defname, "StopSel") == 0)
+			prs->stopsel = pstrdup(val);
+		else if (pg_strcasecmp(defel->defname, "FragmentDelimiter") == 0)
+			prs->fragdelim = pstrdup(val);
+		else if (pg_strcasecmp(defel->defname, "HighlightAll") == 0)
+			highlightall = (pg_strcasecmp(val, "1") == 0 ||
+							pg_strcasecmp(val, "on") == 0 ||
+							pg_strcasecmp(val, "true") == 0 ||
+							pg_strcasecmp(val, "t") == 0 ||
+							pg_strcasecmp(val, "y") == 0 ||
+							pg_strcasecmp(val, "yes") == 0);
+		else
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+					 errmsg("unrecognized headline parameter: \"%s\"",
+							defel->defname)));
+	}
+
+	/*
+	 * We might eventually make max_cover a user-settable parameter, but for
+	 * now, just compute a reasonable value based on max_words and
+	 * max_fragments.
+	 */
+	max_cover = Max(max_words * 10, 100);
+	if (max_fragments > 0)
+		max_cover *= max_fragments;
+
+	/* in HighlightAll mode these parameters are ignored */
+	if (!highlightall)
+	{
+		if (min_words >= max_words)
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+					 errmsg("MinWords should be less than MaxWords")));
+		if (min_words <= 0)
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+					 errmsg("MinWords should be positive")));
+		if (shortword < 0)
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+					 errmsg("ShortWord should be >= 0")));
+		if (max_fragments < 0)
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+					 errmsg("MaxFragments should be >= 0")));
+	}
+
+	/* Apply appropriate headline selector */
+	if (max_fragments == 0)
+		mark_hl_words(prs, query, highlightall, shortword,
+					  min_words, max_words, max_cover);
+	else
+		mark_hl_fragments(prs, query, highlightall, shortword,
+						  min_words, max_words, max_fragments, max_cover);
+
+	/* Fill in default values for string options */
+	if (!prs->startsel)
+		prs->startsel = pstrdup("<b>");
+	if (!prs->stopsel)
+		prs->stopsel = pstrdup("</b>");
+	if (!prs->fragdelim)
+		prs->fragdelim = pstrdup(" ... ");
+
+	/* Caller will need these lengths, too */
+	prs->startsellen = strlen(prs->startsel);
+	prs->stopsellen = strlen(prs->stopsel);
+	prs->fragdelimlen = strlen(prs->fragdelim);
+
+	PG_RETURN_POINTER(prs);
+}
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-05-04 12:17:33 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-05-04 12:17:33 +0000
commit	5e45211a64149b3c659b90ff2de6fa982a5a93ed (patch)
tree	739caf8c461053357daa9f162bef34516c7bf452 /src/backend/tsearch
parent	Initial commit. (diff)
download	postgresql-15-5e45211a64149b3c659b90ff2de6fa982a5a93ed.tar.xz postgresql-15-5e45211a64149b3c659b90ff2de6fa982a5a93ed.zip