summaryrefslogtreecommitdiffstats
path: root/contrib/dict_xsyn
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-04 12:15:05 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-04 12:15:05 +0000
commit46651ce6fe013220ed397add242004d764fc0153 (patch)
tree6e5299f990f88e60174a1d3ae6e48eedd2688b2b /contrib/dict_xsyn
parentInitial commit. (diff)
downloadpostgresql-14-upstream.tar.xz
postgresql-14-upstream.zip
Adding upstream version 14.5.upstream/14.5upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
-rw-r--r--contrib/dict_xsyn/.gitignore4
-rw-r--r--contrib/dict_xsyn/Makefile24
-rw-r--r--contrib/dict_xsyn/dict_xsyn--1.0.sql25
-rw-r--r--contrib/dict_xsyn/dict_xsyn.c259
-rw-r--r--contrib/dict_xsyn/dict_xsyn.control5
-rw-r--r--contrib/dict_xsyn/expected/dict_xsyn.out142
-rw-r--r--contrib/dict_xsyn/sql/dict_xsyn.sql45
-rw-r--r--contrib/dict_xsyn/xsyn_sample.rules6
8 files changed, 510 insertions, 0 deletions
diff --git a/contrib/dict_xsyn/.gitignore b/contrib/dict_xsyn/.gitignore
new file mode 100644
index 0000000..5dcb3ff
--- /dev/null
+++ b/contrib/dict_xsyn/.gitignore
@@ -0,0 +1,4 @@
+# Generated subdirectories
+/log/
+/results/
+/tmp_check/
diff --git a/contrib/dict_xsyn/Makefile b/contrib/dict_xsyn/Makefile
new file mode 100644
index 0000000..b6bcfe6
--- /dev/null
+++ b/contrib/dict_xsyn/Makefile
@@ -0,0 +1,24 @@
+# contrib/dict_xsyn/Makefile
+
+MODULE_big = dict_xsyn
+OBJS = \
+ $(WIN32RES) \
+ dict_xsyn.o
+
+EXTENSION = dict_xsyn
+DATA = dict_xsyn--1.0.sql
+DATA_TSEARCH = xsyn_sample.rules
+PGFILEDESC = "dict_xsyn - add-on dictionary template for full-text search"
+
+REGRESS = dict_xsyn
+
+ifdef USE_PGXS
+PG_CONFIG = pg_config
+PGXS := $(shell $(PG_CONFIG) --pgxs)
+include $(PGXS)
+else
+subdir = contrib/dict_xsyn
+top_builddir = ../..
+include $(top_builddir)/src/Makefile.global
+include $(top_srcdir)/contrib/contrib-global.mk
+endif
diff --git a/contrib/dict_xsyn/dict_xsyn--1.0.sql b/contrib/dict_xsyn/dict_xsyn--1.0.sql
new file mode 100644
index 0000000..3d6bb51
--- /dev/null
+++ b/contrib/dict_xsyn/dict_xsyn--1.0.sql
@@ -0,0 +1,25 @@
+/* contrib/dict_xsyn/dict_xsyn--1.0.sql */
+
+-- complain if script is sourced in psql, rather than via CREATE EXTENSION
+\echo Use "CREATE EXTENSION dict_xsyn" to load this file. \quit
+
+CREATE FUNCTION dxsyn_init(internal)
+ RETURNS internal
+ AS 'MODULE_PATHNAME'
+ LANGUAGE C STRICT;
+
+CREATE FUNCTION dxsyn_lexize(internal, internal, internal, internal)
+ RETURNS internal
+ AS 'MODULE_PATHNAME'
+ LANGUAGE C STRICT;
+
+CREATE TEXT SEARCH TEMPLATE xsyn_template (
+ LEXIZE = dxsyn_lexize,
+ INIT = dxsyn_init
+);
+
+CREATE TEXT SEARCH DICTIONARY xsyn (
+ TEMPLATE = xsyn_template
+);
+
+COMMENT ON TEXT SEARCH DICTIONARY xsyn IS 'eXtended synonym dictionary';
diff --git a/contrib/dict_xsyn/dict_xsyn.c b/contrib/dict_xsyn/dict_xsyn.c
new file mode 100644
index 0000000..79c4f18
--- /dev/null
+++ b/contrib/dict_xsyn/dict_xsyn.c
@@ -0,0 +1,259 @@
+/*-------------------------------------------------------------------------
+ *
+ * dict_xsyn.c
+ * Extended synonym dictionary
+ *
+ * Copyright (c) 2007-2021, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * contrib/dict_xsyn/dict_xsyn.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include <ctype.h>
+
+#include "commands/defrem.h"
+#include "tsearch/ts_locale.h"
+#include "tsearch/ts_utils.h"
+
+PG_MODULE_MAGIC;
+
+typedef struct
+{
+ char *key; /* Word */
+ char *value; /* Unparsed list of synonyms, including the
+ * word itself */
+} Syn;
+
+typedef struct
+{
+ int len;
+ Syn *syn;
+
+ bool matchorig;
+ bool keeporig;
+ bool matchsynonyms;
+ bool keepsynonyms;
+} DictSyn;
+
+
+PG_FUNCTION_INFO_V1(dxsyn_init);
+PG_FUNCTION_INFO_V1(dxsyn_lexize);
+
+static char *
+find_word(char *in, char **end)
+{
+ char *start;
+
+ *end = NULL;
+ while (*in && t_isspace(in))
+ in += pg_mblen(in);
+
+ if (!*in || *in == '#')
+ return NULL;
+ start = in;
+
+ while (*in && !t_isspace(in))
+ in += pg_mblen(in);
+
+ *end = in;
+
+ return start;
+}
+
+static int
+compare_syn(const void *a, const void *b)
+{
+ return strcmp(((const Syn *) a)->key, ((const Syn *) b)->key);
+}
+
+static void
+read_dictionary(DictSyn *d, const char *filename)
+{
+ char *real_filename = get_tsearch_config_filename(filename, "rules");
+ tsearch_readline_state trst;
+ char *line;
+ int cur = 0;
+
+ if (!tsearch_readline_begin(&trst, real_filename))
+ ereport(ERROR,
+ (errcode(ERRCODE_CONFIG_FILE_ERROR),
+ errmsg("could not open synonym file \"%s\": %m",
+ real_filename)));
+
+ while ((line = tsearch_readline(&trst)) != NULL)
+ {
+ char *value;
+ char *key;
+ char *pos;
+ char *end;
+
+ if (*line == '\0')
+ continue;
+
+ value = lowerstr(line);
+ pfree(line);
+
+ pos = value;
+ while ((key = find_word(pos, &end)) != NULL)
+ {
+ /* Enlarge syn structure if full */
+ if (cur == d->len)
+ {
+ d->len = (d->len > 0) ? 2 * d->len : 16;
+ if (d->syn)
+ d->syn = (Syn *) repalloc(d->syn, sizeof(Syn) * d->len);
+ else
+ d->syn = (Syn *) palloc(sizeof(Syn) * d->len);
+ }
+
+ /* Save first word only if we will match it */
+ if (pos != value || d->matchorig)
+ {
+ d->syn[cur].key = pnstrdup(key, end - key);
+ d->syn[cur].value = pstrdup(value);
+
+ cur++;
+ }
+
+ pos = end;
+
+ /* Don't bother scanning synonyms if we will not match them */
+ if (!d->matchsynonyms)
+ break;
+ }
+
+ pfree(value);
+ }
+
+ tsearch_readline_end(&trst);
+
+ d->len = cur;
+ if (cur > 1)
+ qsort(d->syn, d->len, sizeof(Syn), compare_syn);
+
+ pfree(real_filename);
+}
+
+Datum
+dxsyn_init(PG_FUNCTION_ARGS)
+{
+ List *dictoptions = (List *) PG_GETARG_POINTER(0);
+ DictSyn *d;
+ ListCell *l;
+ char *filename = NULL;
+
+ d = (DictSyn *) palloc0(sizeof(DictSyn));
+ d->len = 0;
+ d->syn = NULL;
+ d->matchorig = true;
+ d->keeporig = true;
+ d->matchsynonyms = false;
+ d->keepsynonyms = true;
+
+ foreach(l, dictoptions)
+ {
+ DefElem *defel = (DefElem *) lfirst(l);
+
+ if (strcmp(defel->defname, "matchorig") == 0)
+ {
+ d->matchorig = defGetBoolean(defel);
+ }
+ else if (strcmp(defel->defname, "keeporig") == 0)
+ {
+ d->keeporig = defGetBoolean(defel);
+ }
+ else if (strcmp(defel->defname, "matchsynonyms") == 0)
+ {
+ d->matchsynonyms = defGetBoolean(defel);
+ }
+ else if (strcmp(defel->defname, "keepsynonyms") == 0)
+ {
+ d->keepsynonyms = defGetBoolean(defel);
+ }
+ else if (strcmp(defel->defname, "rules") == 0)
+ {
+ /* we can't read the rules before parsing all options! */
+ filename = defGetString(defel);
+ }
+ else
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("unrecognized xsyn parameter: \"%s\"",
+ defel->defname)));
+ }
+ }
+
+ if (filename)
+ read_dictionary(d, filename);
+
+ PG_RETURN_POINTER(d);
+}
+
+Datum
+dxsyn_lexize(PG_FUNCTION_ARGS)
+{
+ DictSyn *d = (DictSyn *) PG_GETARG_POINTER(0);
+ char *in = (char *) PG_GETARG_POINTER(1);
+ int length = PG_GETARG_INT32(2);
+ Syn word;
+ Syn *found;
+ TSLexeme *res = NULL;
+
+ if (!length || d->len == 0)
+ PG_RETURN_POINTER(NULL);
+
+ /* Create search pattern */
+ {
+ char *temp = pnstrdup(in, length);
+
+ word.key = lowerstr(temp);
+ pfree(temp);
+ word.value = NULL;
+ }
+
+ /* Look for matching syn */
+ found = (Syn *) bsearch(&word, d->syn, d->len, sizeof(Syn), compare_syn);
+ pfree(word.key);
+
+ if (!found)
+ PG_RETURN_POINTER(NULL);
+
+ /* Parse string of synonyms and return array of words */
+ {
+ char *value = found->value;
+ char *syn;
+ char *pos;
+ char *end;
+ int nsyns = 0;
+
+ res = palloc(sizeof(TSLexeme));
+
+ pos = value;
+ while ((syn = find_word(pos, &end)) != NULL)
+ {
+ res = repalloc(res, sizeof(TSLexeme) * (nsyns + 2));
+
+ /* The first word is output only if keeporig=true */
+ if (pos != value || d->keeporig)
+ {
+ res[nsyns].lexeme = pnstrdup(syn, end - syn);
+ res[nsyns].nvariant = 0;
+ res[nsyns].flags = 0;
+ nsyns++;
+ }
+
+ pos = end;
+
+ /* Stop if we are not to output the synonyms */
+ if (!d->keepsynonyms)
+ break;
+ }
+ res[nsyns].lexeme = NULL;
+ }
+
+ PG_RETURN_POINTER(res);
+}
diff --git a/contrib/dict_xsyn/dict_xsyn.control b/contrib/dict_xsyn/dict_xsyn.control
new file mode 100644
index 0000000..3fd465a
--- /dev/null
+++ b/contrib/dict_xsyn/dict_xsyn.control
@@ -0,0 +1,5 @@
+# dict_xsyn extension
+comment = 'text search dictionary template for extended synonym processing'
+default_version = '1.0'
+module_pathname = '$libdir/dict_xsyn'
+relocatable = true
diff --git a/contrib/dict_xsyn/expected/dict_xsyn.out b/contrib/dict_xsyn/expected/dict_xsyn.out
new file mode 100644
index 0000000..9b95e13
--- /dev/null
+++ b/contrib/dict_xsyn/expected/dict_xsyn.out
@@ -0,0 +1,142 @@
+CREATE EXTENSION dict_xsyn;
+-- default configuration - match first word and return it among with all synonyms
+ALTER TEXT SEARCH DICTIONARY xsyn (RULES='xsyn_sample', KEEPORIG=true, MATCHORIG=true, KEEPSYNONYMS=true, MATCHSYNONYMS=false);
+--lexize
+SELECT ts_lexize('xsyn', 'supernova');
+ ts_lexize
+--------------------------
+ {supernova,sn,sne,1987a}
+(1 row)
+
+SELECT ts_lexize('xsyn', 'sn');
+ ts_lexize
+-----------
+
+(1 row)
+
+SELECT ts_lexize('xsyn', 'grb');
+ ts_lexize
+-----------
+
+(1 row)
+
+-- the same, but return only synonyms
+ALTER TEXT SEARCH DICTIONARY xsyn (RULES='xsyn_sample', KEEPORIG=false, MATCHORIG=true, KEEPSYNONYMS=true, MATCHSYNONYMS=false);
+SELECT ts_lexize('xsyn', 'supernova');
+ ts_lexize
+----------------
+ {sn,sne,1987a}
+(1 row)
+
+SELECT ts_lexize('xsyn', 'sn');
+ ts_lexize
+-----------
+
+(1 row)
+
+SELECT ts_lexize('xsyn', 'grb');
+ ts_lexize
+-----------
+
+(1 row)
+
+-- match any word and return all words
+ALTER TEXT SEARCH DICTIONARY xsyn (RULES='xsyn_sample', KEEPORIG=true, MATCHORIG=true, KEEPSYNONYMS=true, MATCHSYNONYMS=true);
+SELECT ts_lexize('xsyn', 'supernova');
+ ts_lexize
+--------------------------
+ {supernova,sn,sne,1987a}
+(1 row)
+
+SELECT ts_lexize('xsyn', 'sn');
+ ts_lexize
+--------------------------
+ {supernova,sn,sne,1987a}
+(1 row)
+
+SELECT ts_lexize('xsyn', 'grb');
+ ts_lexize
+-----------
+
+(1 row)
+
+-- match any word and return all words except first one
+ALTER TEXT SEARCH DICTIONARY xsyn (RULES='xsyn_sample', KEEPORIG=false, MATCHORIG=true, KEEPSYNONYMS=true, MATCHSYNONYMS=true);
+SELECT ts_lexize('xsyn', 'supernova');
+ ts_lexize
+----------------
+ {sn,sne,1987a}
+(1 row)
+
+SELECT ts_lexize('xsyn', 'sn');
+ ts_lexize
+----------------
+ {sn,sne,1987a}
+(1 row)
+
+SELECT ts_lexize('xsyn', 'grb');
+ ts_lexize
+-----------
+
+(1 row)
+
+-- match any synonym but not first word, and return first word instead
+ALTER TEXT SEARCH DICTIONARY xsyn (RULES='xsyn_sample', KEEPORIG=true, MATCHORIG=false, KEEPSYNONYMS=false, MATCHSYNONYMS=true);
+SELECT ts_lexize('xsyn', 'supernova');
+ ts_lexize
+-----------
+
+(1 row)
+
+SELECT ts_lexize('xsyn', 'sn');
+ ts_lexize
+-------------
+ {supernova}
+(1 row)
+
+SELECT ts_lexize('xsyn', 'grb');
+ ts_lexize
+-----------
+
+(1 row)
+
+-- do not match or return anything
+ALTER TEXT SEARCH DICTIONARY xsyn (RULES='xsyn_sample', KEEPORIG=false, MATCHORIG=false, KEEPSYNONYMS=false, MATCHSYNONYMS=false);
+SELECT ts_lexize('xsyn', 'supernova');
+ ts_lexize
+-----------
+
+(1 row)
+
+SELECT ts_lexize('xsyn', 'sn');
+ ts_lexize
+-----------
+
+(1 row)
+
+SELECT ts_lexize('xsyn', 'grb');
+ ts_lexize
+-----------
+
+(1 row)
+
+-- match any word but return nothing
+ALTER TEXT SEARCH DICTIONARY xsyn (RULES='xsyn_sample', KEEPORIG=false, MATCHORIG=true, KEEPSYNONYMS=false, MATCHSYNONYMS=true);
+SELECT ts_lexize('xsyn', 'supernova');
+ ts_lexize
+-----------
+ {}
+(1 row)
+
+SELECT ts_lexize('xsyn', 'sn');
+ ts_lexize
+-----------
+ {}
+(1 row)
+
+SELECT ts_lexize('xsyn', 'grb');
+ ts_lexize
+-----------
+
+(1 row)
+
diff --git a/contrib/dict_xsyn/sql/dict_xsyn.sql b/contrib/dict_xsyn/sql/dict_xsyn.sql
new file mode 100644
index 0000000..4951106
--- /dev/null
+++ b/contrib/dict_xsyn/sql/dict_xsyn.sql
@@ -0,0 +1,45 @@
+CREATE EXTENSION dict_xsyn;
+
+-- default configuration - match first word and return it among with all synonyms
+ALTER TEXT SEARCH DICTIONARY xsyn (RULES='xsyn_sample', KEEPORIG=true, MATCHORIG=true, KEEPSYNONYMS=true, MATCHSYNONYMS=false);
+
+--lexize
+SELECT ts_lexize('xsyn', 'supernova');
+SELECT ts_lexize('xsyn', 'sn');
+SELECT ts_lexize('xsyn', 'grb');
+
+-- the same, but return only synonyms
+ALTER TEXT SEARCH DICTIONARY xsyn (RULES='xsyn_sample', KEEPORIG=false, MATCHORIG=true, KEEPSYNONYMS=true, MATCHSYNONYMS=false);
+SELECT ts_lexize('xsyn', 'supernova');
+SELECT ts_lexize('xsyn', 'sn');
+SELECT ts_lexize('xsyn', 'grb');
+
+-- match any word and return all words
+ALTER TEXT SEARCH DICTIONARY xsyn (RULES='xsyn_sample', KEEPORIG=true, MATCHORIG=true, KEEPSYNONYMS=true, MATCHSYNONYMS=true);
+SELECT ts_lexize('xsyn', 'supernova');
+SELECT ts_lexize('xsyn', 'sn');
+SELECT ts_lexize('xsyn', 'grb');
+
+-- match any word and return all words except first one
+ALTER TEXT SEARCH DICTIONARY xsyn (RULES='xsyn_sample', KEEPORIG=false, MATCHORIG=true, KEEPSYNONYMS=true, MATCHSYNONYMS=true);
+SELECT ts_lexize('xsyn', 'supernova');
+SELECT ts_lexize('xsyn', 'sn');
+SELECT ts_lexize('xsyn', 'grb');
+
+-- match any synonym but not first word, and return first word instead
+ALTER TEXT SEARCH DICTIONARY xsyn (RULES='xsyn_sample', KEEPORIG=true, MATCHORIG=false, KEEPSYNONYMS=false, MATCHSYNONYMS=true);
+SELECT ts_lexize('xsyn', 'supernova');
+SELECT ts_lexize('xsyn', 'sn');
+SELECT ts_lexize('xsyn', 'grb');
+
+-- do not match or return anything
+ALTER TEXT SEARCH DICTIONARY xsyn (RULES='xsyn_sample', KEEPORIG=false, MATCHORIG=false, KEEPSYNONYMS=false, MATCHSYNONYMS=false);
+SELECT ts_lexize('xsyn', 'supernova');
+SELECT ts_lexize('xsyn', 'sn');
+SELECT ts_lexize('xsyn', 'grb');
+
+-- match any word but return nothing
+ALTER TEXT SEARCH DICTIONARY xsyn (RULES='xsyn_sample', KEEPORIG=false, MATCHORIG=true, KEEPSYNONYMS=false, MATCHSYNONYMS=true);
+SELECT ts_lexize('xsyn', 'supernova');
+SELECT ts_lexize('xsyn', 'sn');
+SELECT ts_lexize('xsyn', 'grb');
diff --git a/contrib/dict_xsyn/xsyn_sample.rules b/contrib/dict_xsyn/xsyn_sample.rules
new file mode 100644
index 0000000..203bec7
--- /dev/null
+++ b/contrib/dict_xsyn/xsyn_sample.rules
@@ -0,0 +1,6 @@
+# Sample rules file for eXtended Synonym (xsyn) dictionary
+# format is as follows:
+#
+# word synonym1 synonym2 ...
+#
+supernova sn sne 1987a