1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
|
/*-------------------------------------------------------------------------
*
* dict_simple.c
* Simple dictionary: just lowercase and check for stopword
*
* Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
*
*
* IDENTIFICATION
* src/backend/tsearch/dict_simple.c
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "commands/defrem.h"
#include "tsearch/ts_locale.h"
#include "tsearch/ts_utils.h"
#include "utils/builtins.h"
typedef struct
{
StopList stoplist;
bool accept;
} DictSimple;
Datum
dsimple_init(PG_FUNCTION_ARGS)
{
List *dictoptions = (List *) PG_GETARG_POINTER(0);
DictSimple *d = (DictSimple *) palloc0(sizeof(DictSimple));
bool stoploaded = false,
acceptloaded = false;
ListCell *l;
d->accept = true; /* default */
foreach(l, dictoptions)
{
DefElem *defel = (DefElem *) lfirst(l);
if (strcmp(defel->defname, "stopwords") == 0)
{
if (stoploaded)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("multiple StopWords parameters")));
readstoplist(defGetString(defel), &d->stoplist, lowerstr);
stoploaded = true;
}
else if (strcmp(defel->defname, "accept") == 0)
{
if (acceptloaded)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("multiple Accept parameters")));
d->accept = defGetBoolean(defel);
acceptloaded = true;
}
else
{
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("unrecognized simple dictionary parameter: \"%s\"",
defel->defname)));
}
}
PG_RETURN_POINTER(d);
}
Datum
dsimple_lexize(PG_FUNCTION_ARGS)
{
DictSimple *d = (DictSimple *) PG_GETARG_POINTER(0);
char *in = (char *) PG_GETARG_POINTER(1);
int32 len = PG_GETARG_INT32(2);
char *txt;
TSLexeme *res;
txt = lowerstr_with_len(in, len);
if (*txt == '\0' || searchstoplist(&(d->stoplist), txt))
{
/* reject as stopword */
pfree(txt);
res = palloc0(sizeof(TSLexeme) * 2);
PG_RETURN_POINTER(res);
}
else if (d->accept)
{
/* accept */
res = palloc0(sizeof(TSLexeme) * 2);
res[0].lexeme = txt;
PG_RETURN_POINTER(res);
}
else
{
/* report as unrecognized */
pfree(txt);
PG_RETURN_POINTER(NULL);
}
}
|