/*------------------------------------------------------------------------- * * dict_synonym.c * Synonym dictionary: replace word by its synonym * * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group * * * IDENTIFICATION * src/backend/tsearch/dict_synonym.c * *------------------------------------------------------------------------- */ #include "postgres.h" #include "commands/defrem.h" #include "tsearch/ts_locale.h" #include "tsearch/ts_utils.h" #include "utils/builtins.h" typedef struct { char *in; char *out; int outlen; uint16 flags; } Syn; typedef struct { int len; /* length of syn array */ Syn *syn; bool case_sensitive; } DictSyn; /* * Finds the next whitespace-delimited word within the 'in' string. * Returns a pointer to the first character of the word, and a pointer * to the next byte after the last character in the word (in *end). * Character '*' at the end of word will not be treated as word * character if flags is not null. */ static char * findwrd(char *in, char **end, uint16 *flags) { char *start; char *lastchar; /* Skip leading spaces */ while (*in && t_isspace(in)) in += pg_mblen(in); /* Return NULL on empty lines */ if (*in == '\0') { *end = NULL; return NULL; } lastchar = start = in; /* Find end of word */ while (*in && !t_isspace(in)) { lastchar = in; in += pg_mblen(in); } if (in - lastchar == 1 && t_iseq(lastchar, '*') && flags) { *flags = TSL_PREFIX; *end = lastchar; } else { if (flags) *flags = 0; *end = in; } return start; } static int compareSyn(const void *a, const void *b) { return strcmp(((const Syn *) a)->in, ((const Syn *) b)->in); } Datum dsynonym_init(PG_FUNCTION_ARGS) { List *dictoptions = (List *) PG_GETARG_POINTER(0); DictSyn *d; ListCell *l; char *filename = NULL; bool case_sensitive = false; tsearch_readline_state trst; char *starti, *starto, *end = NULL; int cur = 0; char *line = NULL; uint16 flags = 0; foreach(l, dictoptions) { DefElem *defel = (DefElem *) lfirst(l); if (strcmp(defel->defname, "synonyms") == 0) filename = defGetString(defel); else if (strcmp(defel->defname, "casesensitive") == 0) case_sensitive = defGetBoolean(defel); else ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("unrecognized synonym parameter: \"%s\"", defel->defname))); } if (!filename) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("missing Synonyms parameter"))); filename = get_tsearch_config_filename(filename, "syn"); if (!tsearch_readline_begin(&trst, filename)) ereport(ERROR, (errcode(ERRCODE_CONFIG_FILE_ERROR), errmsg("could not open synonym file \"%s\": %m", filename))); d = (DictSyn *) palloc0(sizeof(DictSyn)); while ((line = tsearch_readline(&trst)) != NULL) { starti = findwrd(line, &end, NULL); if (!starti) { /* Empty line */ goto skipline; } if (*end == '\0') { /* A line with only one word. Ignore silently. */ goto skipline; } *end = '\0'; starto = findwrd(end + 1, &end, &flags); if (!starto) { /* A line with only one word (+whitespace). Ignore silently. */ goto skipline; } *end = '\0'; /* * starti now points to the first word, and starto to the second word * on the line, with a \0 terminator at the end of both words. */ if (cur >= d->len) { if (d->len == 0) { d->len = 64; d->syn = (Syn *) palloc(sizeof(Syn) * d->len); } else { d->len *= 2; d->syn = (Syn *) repalloc(d->syn, sizeof(Syn) * d->len); } } if (case_sensitive) { d->syn[cur].in = pstrdup(starti); d->syn[cur].out = pstrdup(starto); } else { d->syn[cur].in = lowerstr(starti); d->syn[cur].out = lowerstr(starto); } d->syn[cur].outlen = strlen(starto); d->syn[cur].flags = flags; cur++; skipline: pfree(line); } tsearch_readline_end(&trst); d->len = cur; qsort(d->syn, d->len, sizeof(Syn), compareSyn); d->case_sensitive = case_sensitive; PG_RETURN_POINTER(d); } Datum dsynonym_lexize(PG_FUNCTION_ARGS) { DictSyn *d = (DictSyn *) PG_GETARG_POINTER(0); char *in = (char *) PG_GETARG_POINTER(1); int32 len = PG_GETARG_INT32(2); Syn key, *found; TSLexeme *res; /* note: d->len test protects against Solaris bsearch-of-no-items bug */ if (len <= 0 || d->len <= 0) PG_RETURN_POINTER(NULL); if (d->case_sensitive) key.in = pnstrdup(in, len); else key.in = lowerstr_with_len(in, len); key.out = NULL; found = (Syn *) bsearch(&key, d->syn, d->len, sizeof(Syn), compareSyn); pfree(key.in); if (!found) PG_RETURN_POINTER(NULL); res = palloc0(sizeof(TSLexeme) * 2); res[0].lexeme = pnstrdup(found->out, found->outlen); res[0].flags = found->flags; PG_RETURN_POINTER(res); }