From 46651ce6fe013220ed397add242004d764fc0153 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sat, 4 May 2024 14:15:05 +0200 Subject: Adding upstream version 14.5. Signed-off-by: Daniel Baumann --- src/backend/tsearch/dict_synonym.c | 241 +++++++++++++++++++++++++++++++++++++ 1 file changed, 241 insertions(+) create mode 100644 src/backend/tsearch/dict_synonym.c (limited to 'src/backend/tsearch/dict_synonym.c') diff --git a/src/backend/tsearch/dict_synonym.c b/src/backend/tsearch/dict_synonym.c new file mode 100644 index 0000000..ed885ca --- /dev/null +++ b/src/backend/tsearch/dict_synonym.c @@ -0,0 +1,241 @@ +/*------------------------------------------------------------------------- + * + * dict_synonym.c + * Synonym dictionary: replace word by its synonym + * + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group + * + * + * IDENTIFICATION + * src/backend/tsearch/dict_synonym.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "commands/defrem.h" +#include "tsearch/ts_locale.h" +#include "tsearch/ts_utils.h" +#include "utils/builtins.h" + +typedef struct +{ + char *in; + char *out; + int outlen; + uint16 flags; +} Syn; + +typedef struct +{ + int len; /* length of syn array */ + Syn *syn; + bool case_sensitive; +} DictSyn; + +/* + * Finds the next whitespace-delimited word within the 'in' string. + * Returns a pointer to the first character of the word, and a pointer + * to the next byte after the last character in the word (in *end). + * Character '*' at the end of word will not be treated as word + * character if flags is not null. + */ +static char * +findwrd(char *in, char **end, uint16 *flags) +{ + char *start; + char *lastchar; + + /* Skip leading spaces */ + while (*in && t_isspace(in)) + in += pg_mblen(in); + + /* Return NULL on empty lines */ + if (*in == '\0') + { + *end = NULL; + return NULL; + } + + lastchar = start = in; + + /* Find end of word */ + while (*in && !t_isspace(in)) + { + lastchar = in; + in += pg_mblen(in); + } + + if (in - lastchar == 1 && t_iseq(lastchar, '*') && flags) + { + *flags = TSL_PREFIX; + *end = lastchar; + } + else + { + if (flags) + *flags = 0; + *end = in; + } + + return start; +} + +static int +compareSyn(const void *a, const void *b) +{ + return strcmp(((const Syn *) a)->in, ((const Syn *) b)->in); +} + + +Datum +dsynonym_init(PG_FUNCTION_ARGS) +{ + List *dictoptions = (List *) PG_GETARG_POINTER(0); + DictSyn *d; + ListCell *l; + char *filename = NULL; + bool case_sensitive = false; + tsearch_readline_state trst; + char *starti, + *starto, + *end = NULL; + int cur = 0; + char *line = NULL; + uint16 flags = 0; + + foreach(l, dictoptions) + { + DefElem *defel = (DefElem *) lfirst(l); + + if (strcmp(defel->defname, "synonyms") == 0) + filename = defGetString(defel); + else if (strcmp(defel->defname, "casesensitive") == 0) + case_sensitive = defGetBoolean(defel); + else + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("unrecognized synonym parameter: \"%s\"", + defel->defname))); + } + + if (!filename) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("missing Synonyms parameter"))); + + filename = get_tsearch_config_filename(filename, "syn"); + + if (!tsearch_readline_begin(&trst, filename)) + ereport(ERROR, + (errcode(ERRCODE_CONFIG_FILE_ERROR), + errmsg("could not open synonym file \"%s\": %m", + filename))); + + d = (DictSyn *) palloc0(sizeof(DictSyn)); + + while ((line = tsearch_readline(&trst)) != NULL) + { + starti = findwrd(line, &end, NULL); + if (!starti) + { + /* Empty line */ + goto skipline; + } + if (*end == '\0') + { + /* A line with only one word. Ignore silently. */ + goto skipline; + } + *end = '\0'; + + starto = findwrd(end + 1, &end, &flags); + if (!starto) + { + /* A line with only one word (+whitespace). Ignore silently. */ + goto skipline; + } + *end = '\0'; + + /* + * starti now points to the first word, and starto to the second word + * on the line, with a \0 terminator at the end of both words. + */ + + if (cur >= d->len) + { + if (d->len == 0) + { + d->len = 64; + d->syn = (Syn *) palloc(sizeof(Syn) * d->len); + } + else + { + d->len *= 2; + d->syn = (Syn *) repalloc(d->syn, sizeof(Syn) * d->len); + } + } + + if (case_sensitive) + { + d->syn[cur].in = pstrdup(starti); + d->syn[cur].out = pstrdup(starto); + } + else + { + d->syn[cur].in = lowerstr(starti); + d->syn[cur].out = lowerstr(starto); + } + + d->syn[cur].outlen = strlen(starto); + d->syn[cur].flags = flags; + + cur++; + +skipline: + pfree(line); + } + + tsearch_readline_end(&trst); + + d->len = cur; + qsort(d->syn, d->len, sizeof(Syn), compareSyn); + + d->case_sensitive = case_sensitive; + + PG_RETURN_POINTER(d); +} + +Datum +dsynonym_lexize(PG_FUNCTION_ARGS) +{ + DictSyn *d = (DictSyn *) PG_GETARG_POINTER(0); + char *in = (char *) PG_GETARG_POINTER(1); + int32 len = PG_GETARG_INT32(2); + Syn key, + *found; + TSLexeme *res; + + /* note: d->len test protects against Solaris bsearch-of-no-items bug */ + if (len <= 0 || d->len <= 0) + PG_RETURN_POINTER(NULL); + + if (d->case_sensitive) + key.in = pnstrdup(in, len); + else + key.in = lowerstr_with_len(in, len); + + key.out = NULL; + + found = (Syn *) bsearch(&key, d->syn, d->len, sizeof(Syn), compareSyn); + pfree(key.in); + + if (!found) + PG_RETURN_POINTER(NULL); + + res = palloc0(sizeof(TSLexeme) * 2); + res[0].lexeme = pnstrdup(found->out, found->outlen); + res[0].flags = found->flags; + + PG_RETURN_POINTER(res); +} -- cgit v1.2.3