summaryrefslogtreecommitdiffstats
path: root/src/backend/tsearch/dict_synonym.c
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-04 12:15:05 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-04 12:15:05 +0000
commit46651ce6fe013220ed397add242004d764fc0153 (patch)
tree6e5299f990f88e60174a1d3ae6e48eedd2688b2b /src/backend/tsearch/dict_synonym.c
parentInitial commit. (diff)
downloadpostgresql-14-46651ce6fe013220ed397add242004d764fc0153.tar.xz
postgresql-14-46651ce6fe013220ed397add242004d764fc0153.zip
Adding upstream version 14.5.upstream/14.5upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/backend/tsearch/dict_synonym.c')
-rw-r--r--src/backend/tsearch/dict_synonym.c241
1 files changed, 241 insertions, 0 deletions
diff --git a/src/backend/tsearch/dict_synonym.c b/src/backend/tsearch/dict_synonym.c
new file mode 100644
index 0000000..ed885ca
--- /dev/null
+++ b/src/backend/tsearch/dict_synonym.c
@@ -0,0 +1,241 @@
+/*-------------------------------------------------------------------------
+ *
+ * dict_synonym.c
+ * Synonym dictionary: replace word by its synonym
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/tsearch/dict_synonym.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "commands/defrem.h"
+#include "tsearch/ts_locale.h"
+#include "tsearch/ts_utils.h"
+#include "utils/builtins.h"
+
+typedef struct
+{
+ char *in;
+ char *out;
+ int outlen;
+ uint16 flags;
+} Syn;
+
+typedef struct
+{
+ int len; /* length of syn array */
+ Syn *syn;
+ bool case_sensitive;
+} DictSyn;
+
+/*
+ * Finds the next whitespace-delimited word within the 'in' string.
+ * Returns a pointer to the first character of the word, and a pointer
+ * to the next byte after the last character in the word (in *end).
+ * Character '*' at the end of word will not be treated as word
+ * character if flags is not null.
+ */
+static char *
+findwrd(char *in, char **end, uint16 *flags)
+{
+ char *start;
+ char *lastchar;
+
+ /* Skip leading spaces */
+ while (*in && t_isspace(in))
+ in += pg_mblen(in);
+
+ /* Return NULL on empty lines */
+ if (*in == '\0')
+ {
+ *end = NULL;
+ return NULL;
+ }
+
+ lastchar = start = in;
+
+ /* Find end of word */
+ while (*in && !t_isspace(in))
+ {
+ lastchar = in;
+ in += pg_mblen(in);
+ }
+
+ if (in - lastchar == 1 && t_iseq(lastchar, '*') && flags)
+ {
+ *flags = TSL_PREFIX;
+ *end = lastchar;
+ }
+ else
+ {
+ if (flags)
+ *flags = 0;
+ *end = in;
+ }
+
+ return start;
+}
+
+static int
+compareSyn(const void *a, const void *b)
+{
+ return strcmp(((const Syn *) a)->in, ((const Syn *) b)->in);
+}
+
+
+Datum
+dsynonym_init(PG_FUNCTION_ARGS)
+{
+ List *dictoptions = (List *) PG_GETARG_POINTER(0);
+ DictSyn *d;
+ ListCell *l;
+ char *filename = NULL;
+ bool case_sensitive = false;
+ tsearch_readline_state trst;
+ char *starti,
+ *starto,
+ *end = NULL;
+ int cur = 0;
+ char *line = NULL;
+ uint16 flags = 0;
+
+ foreach(l, dictoptions)
+ {
+ DefElem *defel = (DefElem *) lfirst(l);
+
+ if (strcmp(defel->defname, "synonyms") == 0)
+ filename = defGetString(defel);
+ else if (strcmp(defel->defname, "casesensitive") == 0)
+ case_sensitive = defGetBoolean(defel);
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("unrecognized synonym parameter: \"%s\"",
+ defel->defname)));
+ }
+
+ if (!filename)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("missing Synonyms parameter")));
+
+ filename = get_tsearch_config_filename(filename, "syn");
+
+ if (!tsearch_readline_begin(&trst, filename))
+ ereport(ERROR,
+ (errcode(ERRCODE_CONFIG_FILE_ERROR),
+ errmsg("could not open synonym file \"%s\": %m",
+ filename)));
+
+ d = (DictSyn *) palloc0(sizeof(DictSyn));
+
+ while ((line = tsearch_readline(&trst)) != NULL)
+ {
+ starti = findwrd(line, &end, NULL);
+ if (!starti)
+ {
+ /* Empty line */
+ goto skipline;
+ }
+ if (*end == '\0')
+ {
+ /* A line with only one word. Ignore silently. */
+ goto skipline;
+ }
+ *end = '\0';
+
+ starto = findwrd(end + 1, &end, &flags);
+ if (!starto)
+ {
+ /* A line with only one word (+whitespace). Ignore silently. */
+ goto skipline;
+ }
+ *end = '\0';
+
+ /*
+ * starti now points to the first word, and starto to the second word
+ * on the line, with a \0 terminator at the end of both words.
+ */
+
+ if (cur >= d->len)
+ {
+ if (d->len == 0)
+ {
+ d->len = 64;
+ d->syn = (Syn *) palloc(sizeof(Syn) * d->len);
+ }
+ else
+ {
+ d->len *= 2;
+ d->syn = (Syn *) repalloc(d->syn, sizeof(Syn) * d->len);
+ }
+ }
+
+ if (case_sensitive)
+ {
+ d->syn[cur].in = pstrdup(starti);
+ d->syn[cur].out = pstrdup(starto);
+ }
+ else
+ {
+ d->syn[cur].in = lowerstr(starti);
+ d->syn[cur].out = lowerstr(starto);
+ }
+
+ d->syn[cur].outlen = strlen(starto);
+ d->syn[cur].flags = flags;
+
+ cur++;
+
+skipline:
+ pfree(line);
+ }
+
+ tsearch_readline_end(&trst);
+
+ d->len = cur;
+ qsort(d->syn, d->len, sizeof(Syn), compareSyn);
+
+ d->case_sensitive = case_sensitive;
+
+ PG_RETURN_POINTER(d);
+}
+
+Datum
+dsynonym_lexize(PG_FUNCTION_ARGS)
+{
+ DictSyn *d = (DictSyn *) PG_GETARG_POINTER(0);
+ char *in = (char *) PG_GETARG_POINTER(1);
+ int32 len = PG_GETARG_INT32(2);
+ Syn key,
+ *found;
+ TSLexeme *res;
+
+ /* note: d->len test protects against Solaris bsearch-of-no-items bug */
+ if (len <= 0 || d->len <= 0)
+ PG_RETURN_POINTER(NULL);
+
+ if (d->case_sensitive)
+ key.in = pnstrdup(in, len);
+ else
+ key.in = lowerstr_with_len(in, len);
+
+ key.out = NULL;
+
+ found = (Syn *) bsearch(&key, d->syn, d->len, sizeof(Syn), compareSyn);
+ pfree(key.in);
+
+ if (!found)
+ PG_RETURN_POINTER(NULL);
+
+ res = palloc0(sizeof(TSLexeme) * 2);
+ res[0].lexeme = pnstrdup(found->out, found->outlen);
+ res[0].flags = found->flags;
+
+ PG_RETURN_POINTER(res);
+}