diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-04 12:15:05 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-04 12:15:05 +0000 |
commit | 46651ce6fe013220ed397add242004d764fc0153 (patch) | |
tree | 6e5299f990f88e60174a1d3ae6e48eedd2688b2b /src/test/modules/test_parser | |
parent | Initial commit. (diff) | |
download | postgresql-14-46651ce6fe013220ed397add242004d764fc0153.tar.xz postgresql-14-46651ce6fe013220ed397add242004d764fc0153.zip |
Adding upstream version 14.5.upstream/14.5upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/test/modules/test_parser')
-rw-r--r-- | src/test/modules/test_parser/.gitignore | 4 | ||||
-rw-r--r-- | src/test/modules/test_parser/Makefile | 23 | ||||
-rw-r--r-- | src/test/modules/test_parser/README | 61 | ||||
-rw-r--r-- | src/test/modules/test_parser/expected/test_parser.out | 44 | ||||
-rw-r--r-- | src/test/modules/test_parser/sql/test_parser.sql | 18 | ||||
-rw-r--r-- | src/test/modules/test_parser/test_parser--1.0.sql | 32 | ||||
-rw-r--r-- | src/test/modules/test_parser/test_parser.c | 127 | ||||
-rw-r--r-- | src/test/modules/test_parser/test_parser.control | 5 |
8 files changed, 314 insertions, 0 deletions
diff --git a/src/test/modules/test_parser/.gitignore b/src/test/modules/test_parser/.gitignore new file mode 100644 index 0000000..5dcb3ff --- /dev/null +++ b/src/test/modules/test_parser/.gitignore @@ -0,0 +1,4 @@ +# Generated subdirectories +/log/ +/results/ +/tmp_check/ diff --git a/src/test/modules/test_parser/Makefile b/src/test/modules/test_parser/Makefile new file mode 100644 index 0000000..5327080 --- /dev/null +++ b/src/test/modules/test_parser/Makefile @@ -0,0 +1,23 @@ +# src/test/modules/test_parser/Makefile + +MODULE_big = test_parser +OBJS = \ + $(WIN32RES) \ + test_parser.o +PGFILEDESC = "test_parser - example of a custom parser for full-text search" + +EXTENSION = test_parser +DATA = test_parser--1.0.sql + +REGRESS = test_parser + +ifdef USE_PGXS +PG_CONFIG = pg_config +PGXS := $(shell $(PG_CONFIG) --pgxs) +include $(PGXS) +else +subdir = src/test/modules/test_parser +top_builddir = ../../../.. +include $(top_builddir)/src/Makefile.global +include $(top_srcdir)/contrib/contrib-global.mk +endif diff --git a/src/test/modules/test_parser/README b/src/test/modules/test_parser/README new file mode 100644 index 0000000..0a11ec8 --- /dev/null +++ b/src/test/modules/test_parser/README @@ -0,0 +1,61 @@ +test_parser is an example of a custom parser for full-text +search. It doesn't do anything especially useful, but can serve as +a starting point for developing your own parser. + +test_parser recognizes words separated by white space, +and returns just two token types: + +mydb=# SELECT * FROM ts_token_type('testparser'); + tokid | alias | description +-------+-------+--------------- + 3 | word | Word + 12 | blank | Space symbols +(2 rows) + +These token numbers have been chosen to be compatible with the default +parser's numbering. This allows us to use its headline() +function, thus keeping the example simple. + +Usage +===== + +Installing the test_parser extension creates a text search +parser testparser. It has no user-configurable parameters. + +You can test the parser with, for example, + +mydb=# SELECT * FROM ts_parse('testparser', 'That''s my first own parser'); + tokid | token +-------+-------- + 3 | That's + 12 | + 3 | my + 12 | + 3 | first + 12 | + 3 | own + 12 | + 3 | parser + +Real-world use requires setting up a text search configuration +that uses the parser. For example, + +mydb=# CREATE TEXT SEARCH CONFIGURATION testcfg ( PARSER = testparser ); +CREATE TEXT SEARCH CONFIGURATION + +mydb=# ALTER TEXT SEARCH CONFIGURATION testcfg +mydb-# ADD MAPPING FOR word WITH english_stem; +ALTER TEXT SEARCH CONFIGURATION + +mydb=# SELECT to_tsvector('testcfg', 'That''s my first own parser'); + to_tsvector +------------------------------- + 'that':1 'first':3 'parser':5 +(1 row) + +mydb=# SELECT ts_headline('testcfg', 'Supernovae stars are the brightest phenomena in galaxies', +mydb(# to_tsquery('testcfg', 'star')); + ts_headline +----------------------------------------------------------------- + Supernovae <b>stars</b> are the brightest phenomena in galaxies +(1 row) diff --git a/src/test/modules/test_parser/expected/test_parser.out b/src/test/modules/test_parser/expected/test_parser.out new file mode 100644 index 0000000..8a49bc0 --- /dev/null +++ b/src/test/modules/test_parser/expected/test_parser.out @@ -0,0 +1,44 @@ +CREATE EXTENSION test_parser; +-- make test configuration using parser +CREATE TEXT SEARCH CONFIGURATION testcfg (PARSER = testparser); +ALTER TEXT SEARCH CONFIGURATION testcfg ADD MAPPING FOR word WITH simple; +-- ts_parse +SELECT * FROM ts_parse('testparser', 'That''s simple parser can''t parse urls like http://some.url/here/'); + tokid | token +-------+----------------------- + 3 | That's + 12 | + 3 | simple + 12 | + 3 | parser + 12 | + 3 | can't + 12 | + 3 | parse + 12 | + 3 | urls + 12 | + 3 | like + 12 | + 3 | http://some.url/here/ +(15 rows) + +SELECT to_tsvector('testcfg','That''s my first own parser'); + to_tsvector +------------------------------------------------- + 'first':3 'my':2 'own':4 'parser':5 'that''s':1 +(1 row) + +SELECT to_tsquery('testcfg', 'star'); + to_tsquery +------------ + 'star' +(1 row) + +SELECT ts_headline('testcfg','Supernovae stars are the brightest phenomena in galaxies', + to_tsquery('testcfg', 'stars')); + ts_headline +----------------------------------------------------------------- + Supernovae <b>stars</b> are the brightest phenomena in galaxies +(1 row) + diff --git a/src/test/modules/test_parser/sql/test_parser.sql b/src/test/modules/test_parser/sql/test_parser.sql new file mode 100644 index 0000000..1f21504 --- /dev/null +++ b/src/test/modules/test_parser/sql/test_parser.sql @@ -0,0 +1,18 @@ +CREATE EXTENSION test_parser; + +-- make test configuration using parser + +CREATE TEXT SEARCH CONFIGURATION testcfg (PARSER = testparser); + +ALTER TEXT SEARCH CONFIGURATION testcfg ADD MAPPING FOR word WITH simple; + +-- ts_parse + +SELECT * FROM ts_parse('testparser', 'That''s simple parser can''t parse urls like http://some.url/here/'); + +SELECT to_tsvector('testcfg','That''s my first own parser'); + +SELECT to_tsquery('testcfg', 'star'); + +SELECT ts_headline('testcfg','Supernovae stars are the brightest phenomena in galaxies', + to_tsquery('testcfg', 'stars')); diff --git a/src/test/modules/test_parser/test_parser--1.0.sql b/src/test/modules/test_parser/test_parser--1.0.sql new file mode 100644 index 0000000..56bb244 --- /dev/null +++ b/src/test/modules/test_parser/test_parser--1.0.sql @@ -0,0 +1,32 @@ +/* src/test/modules/test_parser/test_parser--1.0.sql */ + +-- complain if script is sourced in psql, rather than via CREATE EXTENSION +\echo Use "CREATE EXTENSION test_parser" to load this file. \quit + +CREATE FUNCTION testprs_start(internal, int4) +RETURNS internal +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT; + +CREATE FUNCTION testprs_getlexeme(internal, internal, internal) +RETURNS internal +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT; + +CREATE FUNCTION testprs_end(internal) +RETURNS void +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT; + +CREATE FUNCTION testprs_lextype(internal) +RETURNS internal +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT; + +CREATE TEXT SEARCH PARSER testparser ( + START = testprs_start, + GETTOKEN = testprs_getlexeme, + END = testprs_end, + HEADLINE = pg_catalog.prsd_headline, + LEXTYPES = testprs_lextype +); diff --git a/src/test/modules/test_parser/test_parser.c b/src/test/modules/test_parser/test_parser.c new file mode 100644 index 0000000..f133676 --- /dev/null +++ b/src/test/modules/test_parser/test_parser.c @@ -0,0 +1,127 @@ +/*------------------------------------------------------------------------- + * + * test_parser.c + * Simple example of a text search parser + * + * Copyright (c) 2007-2021, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/test/modules/test_parser/test_parser.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "fmgr.h" + +PG_MODULE_MAGIC; + +/* + * types + */ + +/* self-defined type */ +typedef struct +{ + char *buffer; /* text to parse */ + int len; /* length of the text in buffer */ + int pos; /* position of the parser */ +} ParserState; + +typedef struct +{ + int lexid; + char *alias; + char *descr; +} LexDescr; + +/* + * functions + */ +PG_FUNCTION_INFO_V1(testprs_start); +PG_FUNCTION_INFO_V1(testprs_getlexeme); +PG_FUNCTION_INFO_V1(testprs_end); +PG_FUNCTION_INFO_V1(testprs_lextype); + +Datum +testprs_start(PG_FUNCTION_ARGS) +{ + ParserState *pst = (ParserState *) palloc0(sizeof(ParserState)); + + pst->buffer = (char *) PG_GETARG_POINTER(0); + pst->len = PG_GETARG_INT32(1); + pst->pos = 0; + + PG_RETURN_POINTER(pst); +} + +Datum +testprs_getlexeme(PG_FUNCTION_ARGS) +{ + ParserState *pst = (ParserState *) PG_GETARG_POINTER(0); + char **t = (char **) PG_GETARG_POINTER(1); + int *tlen = (int *) PG_GETARG_POINTER(2); + int startpos = pst->pos; + int type; + + *t = pst->buffer + pst->pos; + + if (pst->pos < pst->len && + (pst->buffer)[pst->pos] == ' ') + { + /* blank type */ + type = 12; + /* go to the next non-space character */ + while (pst->pos < pst->len && + (pst->buffer)[pst->pos] == ' ') + (pst->pos)++; + } + else + { + /* word type */ + type = 3; + /* go to the next space character */ + while (pst->pos < pst->len && + (pst->buffer)[pst->pos] != ' ') + (pst->pos)++; + } + + *tlen = pst->pos - startpos; + + /* we are finished if (*tlen == 0) */ + if (*tlen == 0) + type = 0; + + PG_RETURN_INT32(type); +} + +Datum +testprs_end(PG_FUNCTION_ARGS) +{ + ParserState *pst = (ParserState *) PG_GETARG_POINTER(0); + + pfree(pst); + PG_RETURN_VOID(); +} + +Datum +testprs_lextype(PG_FUNCTION_ARGS) +{ + /* + * Remarks: - we have to return the blanks for headline reason - we use + * the same lexids like Teodor in the default word parser; in this way we + * can reuse the headline function of the default word parser. + */ + LexDescr *descr = (LexDescr *) palloc(sizeof(LexDescr) * (2 + 1)); + + /* there are only two types in this parser */ + descr[0].lexid = 3; + descr[0].alias = pstrdup("word"); + descr[0].descr = pstrdup("Word"); + descr[1].lexid = 12; + descr[1].alias = pstrdup("blank"); + descr[1].descr = pstrdup("Space symbols"); + descr[2].lexid = 0; + + PG_RETURN_POINTER(descr); +} diff --git a/src/test/modules/test_parser/test_parser.control b/src/test/modules/test_parser/test_parser.control new file mode 100644 index 0000000..36b26b2 --- /dev/null +++ b/src/test/modules/test_parser/test_parser.control @@ -0,0 +1,5 @@ +# test_parser extension +comment = 'example of a custom parser for full-text search' +default_version = '1.0' +module_pathname = '$libdir/test_parser' +relocatable = true |