From 46651ce6fe013220ed397add242004d764fc0153 Mon Sep 17 00:00:00 2001
From: Daniel Baumann <daniel.baumann@progress-linux.org>
Date: Sat, 4 May 2024 14:15:05 +0200
Subject: Adding upstream version 14.5.

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
---
 src/backend/tsearch/dict_synonym.c | 241 +++++++++++++++++++++++++++++++++++++
 1 file changed, 241 insertions(+)
 create mode 100644 src/backend/tsearch/dict_synonym.c

(limited to 'src/backend/tsearch/dict_synonym.c')

diff --git a/src/backend/tsearch/dict_synonym.c b/src/backend/tsearch/dict_synonym.c
new file mode 100644
index 0000000..ed885ca
--- /dev/null
+++ b/src/backend/tsearch/dict_synonym.c
@@ -0,0 +1,241 @@
+/*-------------------------------------------------------------------------
+ *
+ * dict_synonym.c
+ *		Synonym dictionary: replace word by its synonym
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ *
+ *
+ * IDENTIFICATION
+ *	  src/backend/tsearch/dict_synonym.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "commands/defrem.h"
+#include "tsearch/ts_locale.h"
+#include "tsearch/ts_utils.h"
+#include "utils/builtins.h"
+
+typedef struct
+{
+	char	   *in;
+	char	   *out;
+	int			outlen;
+	uint16		flags;
+} Syn;
+
+typedef struct
+{
+	int			len;			/* length of syn array */
+	Syn		   *syn;
+	bool		case_sensitive;
+} DictSyn;
+
+/*
+ * Finds the next whitespace-delimited word within the 'in' string.
+ * Returns a pointer to the first character of the word, and a pointer
+ * to the next byte after the last character in the word (in *end).
+ * Character '*' at the end of word will not be treated as word
+ * character if flags is not null.
+ */
+static char *
+findwrd(char *in, char **end, uint16 *flags)
+{
+	char	   *start;
+	char	   *lastchar;
+
+	/* Skip leading spaces */
+	while (*in && t_isspace(in))
+		in += pg_mblen(in);
+
+	/* Return NULL on empty lines */
+	if (*in == '\0')
+	{
+		*end = NULL;
+		return NULL;
+	}
+
+	lastchar = start = in;
+
+	/* Find end of word */
+	while (*in && !t_isspace(in))
+	{
+		lastchar = in;
+		in += pg_mblen(in);
+	}
+
+	if (in - lastchar == 1 && t_iseq(lastchar, '*') && flags)
+	{
+		*flags = TSL_PREFIX;
+		*end = lastchar;
+	}
+	else
+	{
+		if (flags)
+			*flags = 0;
+		*end = in;
+	}
+
+	return start;
+}
+
+static int
+compareSyn(const void *a, const void *b)
+{
+	return strcmp(((const Syn *) a)->in, ((const Syn *) b)->in);
+}
+
+
+Datum
+dsynonym_init(PG_FUNCTION_ARGS)
+{
+	List	   *dictoptions = (List *) PG_GETARG_POINTER(0);
+	DictSyn    *d;
+	ListCell   *l;
+	char	   *filename = NULL;
+	bool		case_sensitive = false;
+	tsearch_readline_state trst;
+	char	   *starti,
+			   *starto,
+			   *end = NULL;
+	int			cur = 0;
+	char	   *line = NULL;
+	uint16		flags = 0;
+
+	foreach(l, dictoptions)
+	{
+		DefElem    *defel = (DefElem *) lfirst(l);
+
+		if (strcmp(defel->defname, "synonyms") == 0)
+			filename = defGetString(defel);
+		else if (strcmp(defel->defname, "casesensitive") == 0)
+			case_sensitive = defGetBoolean(defel);
+		else
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+					 errmsg("unrecognized synonym parameter: \"%s\"",
+							defel->defname)));
+	}
+
+	if (!filename)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("missing Synonyms parameter")));
+
+	filename = get_tsearch_config_filename(filename, "syn");
+
+	if (!tsearch_readline_begin(&trst, filename))
+		ereport(ERROR,
+				(errcode(ERRCODE_CONFIG_FILE_ERROR),
+				 errmsg("could not open synonym file \"%s\": %m",
+						filename)));
+
+	d = (DictSyn *) palloc0(sizeof(DictSyn));
+
+	while ((line = tsearch_readline(&trst)) != NULL)
+	{
+		starti = findwrd(line, &end, NULL);
+		if (!starti)
+		{
+			/* Empty line */
+			goto skipline;
+		}
+		if (*end == '\0')
+		{
+			/* A line with only one word. Ignore silently. */
+			goto skipline;
+		}
+		*end = '\0';
+
+		starto = findwrd(end + 1, &end, &flags);
+		if (!starto)
+		{
+			/* A line with only one word (+whitespace). Ignore silently. */
+			goto skipline;
+		}
+		*end = '\0';
+
+		/*
+		 * starti now points to the first word, and starto to the second word
+		 * on the line, with a \0 terminator at the end of both words.
+		 */
+
+		if (cur >= d->len)
+		{
+			if (d->len == 0)
+			{
+				d->len = 64;
+				d->syn = (Syn *) palloc(sizeof(Syn) * d->len);
+			}
+			else
+			{
+				d->len *= 2;
+				d->syn = (Syn *) repalloc(d->syn, sizeof(Syn) * d->len);
+			}
+		}
+
+		if (case_sensitive)
+		{
+			d->syn[cur].in = pstrdup(starti);
+			d->syn[cur].out = pstrdup(starto);
+		}
+		else
+		{
+			d->syn[cur].in = lowerstr(starti);
+			d->syn[cur].out = lowerstr(starto);
+		}
+
+		d->syn[cur].outlen = strlen(starto);
+		d->syn[cur].flags = flags;
+
+		cur++;
+
+skipline:
+		pfree(line);
+	}
+
+	tsearch_readline_end(&trst);
+
+	d->len = cur;
+	qsort(d->syn, d->len, sizeof(Syn), compareSyn);
+
+	d->case_sensitive = case_sensitive;
+
+	PG_RETURN_POINTER(d);
+}
+
+Datum
+dsynonym_lexize(PG_FUNCTION_ARGS)
+{
+	DictSyn    *d = (DictSyn *) PG_GETARG_POINTER(0);
+	char	   *in = (char *) PG_GETARG_POINTER(1);
+	int32		len = PG_GETARG_INT32(2);
+	Syn			key,
+			   *found;
+	TSLexeme   *res;
+
+	/* note: d->len test protects against Solaris bsearch-of-no-items bug */
+	if (len <= 0 || d->len <= 0)
+		PG_RETURN_POINTER(NULL);
+
+	if (d->case_sensitive)
+		key.in = pnstrdup(in, len);
+	else
+		key.in = lowerstr_with_len(in, len);
+
+	key.out = NULL;
+
+	found = (Syn *) bsearch(&key, d->syn, d->len, sizeof(Syn), compareSyn);
+	pfree(key.in);
+
+	if (!found)
+		PG_RETURN_POINTER(NULL);
+
+	res = palloc0(sizeof(TSLexeme) * 2);
+	res[0].lexeme = pnstrdup(found->out, found->outlen);
+	res[0].flags = found->flags;
+
+	PG_RETURN_POINTER(res);
+}
-- 
cgit v1.2.3