summaryrefslogtreecommitdiffstats
path: root/src/backend/utils/adt/tsginidx.c
blob: e272fca0756ff47aa7be82d9d72bdca519d67177 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
/*-------------------------------------------------------------------------
 *
 * tsginidx.c
 *	 GIN support functions for tsvector_ops
 *
 * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
 *
 *
 * IDENTIFICATION
 *	  src/backend/utils/adt/tsginidx.c
 *
 *-------------------------------------------------------------------------
 */
#include "postgres.h"

#include "access/gin.h"
#include "access/stratnum.h"
#include "miscadmin.h"
#include "tsearch/ts_type.h"
#include "tsearch/ts_utils.h"
#include "utils/builtins.h"


Datum
gin_cmp_tslexeme(PG_FUNCTION_ARGS)
{
	text	   *a = PG_GETARG_TEXT_PP(0);
	text	   *b = PG_GETARG_TEXT_PP(1);
	int			cmp;

	cmp = tsCompareString(VARDATA_ANY(a), VARSIZE_ANY_EXHDR(a),
						  VARDATA_ANY(b), VARSIZE_ANY_EXHDR(b),
						  false);

	PG_FREE_IF_COPY(a, 0);
	PG_FREE_IF_COPY(b, 1);
	PG_RETURN_INT32(cmp);
}

Datum
gin_cmp_prefix(PG_FUNCTION_ARGS)
{
	text	   *a = PG_GETARG_TEXT_PP(0);
	text	   *b = PG_GETARG_TEXT_PP(1);

#ifdef NOT_USED
	StrategyNumber strategy = PG_GETARG_UINT16(2);
	Pointer		extra_data = PG_GETARG_POINTER(3);
#endif
	int			cmp;

	cmp = tsCompareString(VARDATA_ANY(a), VARSIZE_ANY_EXHDR(a),
						  VARDATA_ANY(b), VARSIZE_ANY_EXHDR(b),
						  true);

	if (cmp < 0)
		cmp = 1;				/* prevent continue scan */

	PG_FREE_IF_COPY(a, 0);
	PG_FREE_IF_COPY(b, 1);
	PG_RETURN_INT32(cmp);
}

Datum
gin_extract_tsvector(PG_FUNCTION_ARGS)
{
	TSVector	vector = PG_GETARG_TSVECTOR(0);
	int32	   *nentries = (int32 *) PG_GETARG_POINTER(1);
	Datum	   *entries = NULL;

	*nentries = vector->size;
	if (vector->size > 0)
	{
		int			i;
		WordEntry  *we = ARRPTR(vector);

		entries = (Datum *) palloc(sizeof(Datum) * vector->size);

		for (i = 0; i < vector->size; i++)
		{
			text	   *txt;

			txt = cstring_to_text_with_len(STRPTR(vector) + we->pos, we->len);
			entries[i] = PointerGetDatum(txt);

			we++;
		}
	}

	PG_FREE_IF_COPY(vector, 0);
	PG_RETURN_POINTER(entries);
}

Datum
gin_extract_tsquery(PG_FUNCTION_ARGS)
{
	TSQuery		query = PG_GETARG_TSQUERY(0);
	int32	   *nentries = (int32 *) PG_GETARG_POINTER(1);

	/* StrategyNumber strategy = PG_GETARG_UINT16(2); */
	bool	  **ptr_partialmatch = (bool **) PG_GETARG_POINTER(3);
	Pointer   **extra_data = (Pointer **) PG_GETARG_POINTER(4);

	/* bool   **nullFlags = (bool **) PG_GETARG_POINTER(5); */
	int32	   *searchMode = (int32 *) PG_GETARG_POINTER(6);
	Datum	   *entries = NULL;

	*nentries = 0;

	if (query->size > 0)
	{
		QueryItem  *item = GETQUERY(query);
		int32		i,
					j;
		bool	   *partialmatch;
		int		   *map_item_operand;

		/*
		 * If the query doesn't have any required positive matches (for
		 * instance, it's something like '! foo'), we have to do a full index
		 * scan.
		 */
		if (tsquery_requires_match(item))
			*searchMode = GIN_SEARCH_MODE_DEFAULT;
		else
			*searchMode = GIN_SEARCH_MODE_ALL;

		/* count number of VAL items */
		j = 0;
		for (i = 0; i < query->size; i++)
		{
			if (item[i].type == QI_VAL)
				j++;
		}
		*nentries = j;

		entries = (Datum *) palloc(sizeof(Datum) * j);
		partialmatch = *ptr_partialmatch = (bool *) palloc(sizeof(bool) * j);

		/*
		 * Make map to convert item's number to corresponding operand's (the
		 * same, entry's) number. Entry's number is used in check array in
		 * consistent method. We use the same map for each entry.
		 */
		*extra_data = (Pointer *) palloc(sizeof(Pointer) * j);
		map_item_operand = (int *) palloc0(sizeof(int) * query->size);

		/* Now rescan the VAL items and fill in the arrays */
		j = 0;
		for (i = 0; i < query->size; i++)
		{
			if (item[i].type == QI_VAL)
			{
				QueryOperand *val = &item[i].qoperand;
				text	   *txt;

				txt = cstring_to_text_with_len(GETOPERAND(query) + val->distance,
											   val->length);
				entries[j] = PointerGetDatum(txt);
				partialmatch[j] = val->prefix;
				(*extra_data)[j] = (Pointer) map_item_operand;
				map_item_operand[i] = j;
				j++;
			}
		}
	}

	PG_FREE_IF_COPY(query, 0);

	PG_RETURN_POINTER(entries);
}

typedef struct
{
	QueryItem  *first_item;
	GinTernaryValue *check;
	int		   *map_item_operand;
} GinChkVal;

/*
 * TS_execute callback for matching a tsquery operand to GIN index data
 */
static TSTernaryValue
checkcondition_gin(void *checkval, QueryOperand *val, ExecPhraseData *data)
{
	GinChkVal  *gcv = (GinChkVal *) checkval;
	int			j;
	GinTernaryValue result;

	/* convert item's number to corresponding entry's (operand's) number */
	j = gcv->map_item_operand[((QueryItem *) val) - gcv->first_item];

	/* determine presence of current entry in indexed value */
	result = gcv->check[j];

	/*
	 * If any val requiring a weight is used or caller needs position
	 * information then we must recheck, so replace TRUE with MAYBE.
	 */
	if (result == GIN_TRUE)
	{
		if (val->weight != 0 || data != NULL)
			result = GIN_MAYBE;
	}

	/*
	 * We rely on GinTernaryValue and TSTernaryValue using equivalent value
	 * assignments.  We could use a switch statement to map the values if that
	 * ever stops being true, but it seems unlikely to happen.
	 */
	return (TSTernaryValue) result;
}

Datum
gin_tsquery_consistent(PG_FUNCTION_ARGS)
{
	bool	   *check = (bool *) PG_GETARG_POINTER(0);

	/* StrategyNumber strategy = PG_GETARG_UINT16(1); */
	TSQuery		query = PG_GETARG_TSQUERY(2);

	/* int32	nkeys = PG_GETARG_INT32(3); */
	Pointer    *extra_data = (Pointer *) PG_GETARG_POINTER(4);
	bool	   *recheck = (bool *) PG_GETARG_POINTER(5);
	bool		res = false;

	/* Initially assume query doesn't require recheck */
	*recheck = false;

	if (query->size > 0)
	{
		GinChkVal	gcv;

		/*
		 * check-parameter array has one entry for each value (operand) in the
		 * query.
		 */
		gcv.first_item = GETQUERY(query);
		StaticAssertStmt(sizeof(GinTernaryValue) == sizeof(bool),
						 "sizes of GinTernaryValue and bool are not equal");
		gcv.check = (GinTernaryValue *) check;
		gcv.map_item_operand = (int *) (extra_data[0]);

		switch (TS_execute_ternary(GETQUERY(query),
								   &gcv,
								   TS_EXEC_PHRASE_NO_POS,
								   checkcondition_gin))
		{
			case TS_NO:
				res = false;
				break;
			case TS_YES:
				res = true;
				break;
			case TS_MAYBE:
				res = true;
				*recheck = true;
				break;
		}
	}

	PG_RETURN_BOOL(res);
}

Datum
gin_tsquery_triconsistent(PG_FUNCTION_ARGS)
{
	GinTernaryValue *check = (GinTernaryValue *) PG_GETARG_POINTER(0);

	/* StrategyNumber strategy = PG_GETARG_UINT16(1); */
	TSQuery		query = PG_GETARG_TSQUERY(2);

	/* int32	nkeys = PG_GETARG_INT32(3); */
	Pointer    *extra_data = (Pointer *) PG_GETARG_POINTER(4);
	GinTernaryValue res = GIN_FALSE;

	if (query->size > 0)
	{
		GinChkVal	gcv;

		/*
		 * check-parameter array has one entry for each value (operand) in the
		 * query.
		 */
		gcv.first_item = GETQUERY(query);
		gcv.check = check;
		gcv.map_item_operand = (int *) (extra_data[0]);

		res = TS_execute_ternary(GETQUERY(query),
								 &gcv,
								 TS_EXEC_PHRASE_NO_POS,
								 checkcondition_gin);
	}

	PG_RETURN_GIN_TERNARY_VALUE(res);
}

/*
 * Formerly, gin_extract_tsvector had only two arguments.  Now it has three,
 * but we still need a pg_proc entry with two args to support reloading
 * pre-9.1 contrib/tsearch2 opclass declarations.  This compatibility
 * function should go away eventually.  (Note: you might say "hey, but the
 * code above is only *using* two args, so let's just declare it that way".
 * If you try that you'll find the opr_sanity regression test complains.)
 */
Datum
gin_extract_tsvector_2args(PG_FUNCTION_ARGS)
{
	if (PG_NARGS() < 3)			/* should not happen */
		elog(ERROR, "gin_extract_tsvector requires three arguments");
	return gin_extract_tsvector(fcinfo);
}

/*
 * Likewise, we need a stub version of gin_extract_tsquery declared with
 * only five arguments.
 */
Datum
gin_extract_tsquery_5args(PG_FUNCTION_ARGS)
{
	if (PG_NARGS() < 7)			/* should not happen */
		elog(ERROR, "gin_extract_tsquery requires seven arguments");
	return gin_extract_tsquery(fcinfo);
}

/*
 * Likewise, we need a stub version of gin_tsquery_consistent declared with
 * only six arguments.
 */
Datum
gin_tsquery_consistent_6args(PG_FUNCTION_ARGS)
{
	if (PG_NARGS() < 8)			/* should not happen */
		elog(ERROR, "gin_tsquery_consistent requires eight arguments");
	return gin_tsquery_consistent(fcinfo);
}

/*
 * Likewise, a stub version of gin_extract_tsquery declared with argument
 * types that are no longer considered appropriate.
 */
Datum
gin_extract_tsquery_oldsig(PG_FUNCTION_ARGS)
{
	return gin_extract_tsquery(fcinfo);
}

/*
 * Likewise, a stub version of gin_tsquery_consistent declared with argument
 * types that are no longer considered appropriate.
 */
Datum
gin_tsquery_consistent_oldsig(PG_FUNCTION_ARGS)
{
	return gin_tsquery_consistent(fcinfo);
}