summaryrefslogtreecommitdiffstats
path: root/src/test/modules/test_parser
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-04 12:15:05 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-04 12:15:05 +0000
commit46651ce6fe013220ed397add242004d764fc0153 (patch)
tree6e5299f990f88e60174a1d3ae6e48eedd2688b2b /src/test/modules/test_parser
parentInitial commit. (diff)
downloadpostgresql-14-46651ce6fe013220ed397add242004d764fc0153.tar.xz
postgresql-14-46651ce6fe013220ed397add242004d764fc0153.zip
Adding upstream version 14.5.upstream/14.5upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/test/modules/test_parser')
-rw-r--r--src/test/modules/test_parser/.gitignore4
-rw-r--r--src/test/modules/test_parser/Makefile23
-rw-r--r--src/test/modules/test_parser/README61
-rw-r--r--src/test/modules/test_parser/expected/test_parser.out44
-rw-r--r--src/test/modules/test_parser/sql/test_parser.sql18
-rw-r--r--src/test/modules/test_parser/test_parser--1.0.sql32
-rw-r--r--src/test/modules/test_parser/test_parser.c127
-rw-r--r--src/test/modules/test_parser/test_parser.control5
8 files changed, 314 insertions, 0 deletions
diff --git a/src/test/modules/test_parser/.gitignore b/src/test/modules/test_parser/.gitignore
new file mode 100644
index 0000000..5dcb3ff
--- /dev/null
+++ b/src/test/modules/test_parser/.gitignore
@@ -0,0 +1,4 @@
+# Generated subdirectories
+/log/
+/results/
+/tmp_check/
diff --git a/src/test/modules/test_parser/Makefile b/src/test/modules/test_parser/Makefile
new file mode 100644
index 0000000..5327080
--- /dev/null
+++ b/src/test/modules/test_parser/Makefile
@@ -0,0 +1,23 @@
+# src/test/modules/test_parser/Makefile
+
+MODULE_big = test_parser
+OBJS = \
+ $(WIN32RES) \
+ test_parser.o
+PGFILEDESC = "test_parser - example of a custom parser for full-text search"
+
+EXTENSION = test_parser
+DATA = test_parser--1.0.sql
+
+REGRESS = test_parser
+
+ifdef USE_PGXS
+PG_CONFIG = pg_config
+PGXS := $(shell $(PG_CONFIG) --pgxs)
+include $(PGXS)
+else
+subdir = src/test/modules/test_parser
+top_builddir = ../../../..
+include $(top_builddir)/src/Makefile.global
+include $(top_srcdir)/contrib/contrib-global.mk
+endif
diff --git a/src/test/modules/test_parser/README b/src/test/modules/test_parser/README
new file mode 100644
index 0000000..0a11ec8
--- /dev/null
+++ b/src/test/modules/test_parser/README
@@ -0,0 +1,61 @@
+test_parser is an example of a custom parser for full-text
+search. It doesn't do anything especially useful, but can serve as
+a starting point for developing your own parser.
+
+test_parser recognizes words separated by white space,
+and returns just two token types:
+
+mydb=# SELECT * FROM ts_token_type('testparser');
+ tokid | alias | description
+-------+-------+---------------
+ 3 | word | Word
+ 12 | blank | Space symbols
+(2 rows)
+
+These token numbers have been chosen to be compatible with the default
+parser's numbering. This allows us to use its headline()
+function, thus keeping the example simple.
+
+Usage
+=====
+
+Installing the test_parser extension creates a text search
+parser testparser. It has no user-configurable parameters.
+
+You can test the parser with, for example,
+
+mydb=# SELECT * FROM ts_parse('testparser', 'That''s my first own parser');
+ tokid | token
+-------+--------
+ 3 | That's
+ 12 |
+ 3 | my
+ 12 |
+ 3 | first
+ 12 |
+ 3 | own
+ 12 |
+ 3 | parser
+
+Real-world use requires setting up a text search configuration
+that uses the parser. For example,
+
+mydb=# CREATE TEXT SEARCH CONFIGURATION testcfg ( PARSER = testparser );
+CREATE TEXT SEARCH CONFIGURATION
+
+mydb=# ALTER TEXT SEARCH CONFIGURATION testcfg
+mydb-# ADD MAPPING FOR word WITH english_stem;
+ALTER TEXT SEARCH CONFIGURATION
+
+mydb=# SELECT to_tsvector('testcfg', 'That''s my first own parser');
+ to_tsvector
+-------------------------------
+ 'that':1 'first':3 'parser':5
+(1 row)
+
+mydb=# SELECT ts_headline('testcfg', 'Supernovae stars are the brightest phenomena in galaxies',
+mydb(# to_tsquery('testcfg', 'star'));
+ ts_headline
+-----------------------------------------------------------------
+ Supernovae <b>stars</b> are the brightest phenomena in galaxies
+(1 row)
diff --git a/src/test/modules/test_parser/expected/test_parser.out b/src/test/modules/test_parser/expected/test_parser.out
new file mode 100644
index 0000000..8a49bc0
--- /dev/null
+++ b/src/test/modules/test_parser/expected/test_parser.out
@@ -0,0 +1,44 @@
+CREATE EXTENSION test_parser;
+-- make test configuration using parser
+CREATE TEXT SEARCH CONFIGURATION testcfg (PARSER = testparser);
+ALTER TEXT SEARCH CONFIGURATION testcfg ADD MAPPING FOR word WITH simple;
+-- ts_parse
+SELECT * FROM ts_parse('testparser', 'That''s simple parser can''t parse urls like http://some.url/here/');
+ tokid | token
+-------+-----------------------
+ 3 | That's
+ 12 |
+ 3 | simple
+ 12 |
+ 3 | parser
+ 12 |
+ 3 | can't
+ 12 |
+ 3 | parse
+ 12 |
+ 3 | urls
+ 12 |
+ 3 | like
+ 12 |
+ 3 | http://some.url/here/
+(15 rows)
+
+SELECT to_tsvector('testcfg','That''s my first own parser');
+ to_tsvector
+-------------------------------------------------
+ 'first':3 'my':2 'own':4 'parser':5 'that''s':1
+(1 row)
+
+SELECT to_tsquery('testcfg', 'star');
+ to_tsquery
+------------
+ 'star'
+(1 row)
+
+SELECT ts_headline('testcfg','Supernovae stars are the brightest phenomena in galaxies',
+ to_tsquery('testcfg', 'stars'));
+ ts_headline
+-----------------------------------------------------------------
+ Supernovae <b>stars</b> are the brightest phenomena in galaxies
+(1 row)
+
diff --git a/src/test/modules/test_parser/sql/test_parser.sql b/src/test/modules/test_parser/sql/test_parser.sql
new file mode 100644
index 0000000..1f21504
--- /dev/null
+++ b/src/test/modules/test_parser/sql/test_parser.sql
@@ -0,0 +1,18 @@
+CREATE EXTENSION test_parser;
+
+-- make test configuration using parser
+
+CREATE TEXT SEARCH CONFIGURATION testcfg (PARSER = testparser);
+
+ALTER TEXT SEARCH CONFIGURATION testcfg ADD MAPPING FOR word WITH simple;
+
+-- ts_parse
+
+SELECT * FROM ts_parse('testparser', 'That''s simple parser can''t parse urls like http://some.url/here/');
+
+SELECT to_tsvector('testcfg','That''s my first own parser');
+
+SELECT to_tsquery('testcfg', 'star');
+
+SELECT ts_headline('testcfg','Supernovae stars are the brightest phenomena in galaxies',
+ to_tsquery('testcfg', 'stars'));
diff --git a/src/test/modules/test_parser/test_parser--1.0.sql b/src/test/modules/test_parser/test_parser--1.0.sql
new file mode 100644
index 0000000..56bb244
--- /dev/null
+++ b/src/test/modules/test_parser/test_parser--1.0.sql
@@ -0,0 +1,32 @@
+/* src/test/modules/test_parser/test_parser--1.0.sql */
+
+-- complain if script is sourced in psql, rather than via CREATE EXTENSION
+\echo Use "CREATE EXTENSION test_parser" to load this file. \quit
+
+CREATE FUNCTION testprs_start(internal, int4)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE C STRICT;
+
+CREATE FUNCTION testprs_getlexeme(internal, internal, internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE C STRICT;
+
+CREATE FUNCTION testprs_end(internal)
+RETURNS void
+AS 'MODULE_PATHNAME'
+LANGUAGE C STRICT;
+
+CREATE FUNCTION testprs_lextype(internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE C STRICT;
+
+CREATE TEXT SEARCH PARSER testparser (
+ START = testprs_start,
+ GETTOKEN = testprs_getlexeme,
+ END = testprs_end,
+ HEADLINE = pg_catalog.prsd_headline,
+ LEXTYPES = testprs_lextype
+);
diff --git a/src/test/modules/test_parser/test_parser.c b/src/test/modules/test_parser/test_parser.c
new file mode 100644
index 0000000..f133676
--- /dev/null
+++ b/src/test/modules/test_parser/test_parser.c
@@ -0,0 +1,127 @@
+/*-------------------------------------------------------------------------
+ *
+ * test_parser.c
+ * Simple example of a text search parser
+ *
+ * Copyright (c) 2007-2021, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * src/test/modules/test_parser/test_parser.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "fmgr.h"
+
+PG_MODULE_MAGIC;
+
+/*
+ * types
+ */
+
+/* self-defined type */
+typedef struct
+{
+ char *buffer; /* text to parse */
+ int len; /* length of the text in buffer */
+ int pos; /* position of the parser */
+} ParserState;
+
+typedef struct
+{
+ int lexid;
+ char *alias;
+ char *descr;
+} LexDescr;
+
+/*
+ * functions
+ */
+PG_FUNCTION_INFO_V1(testprs_start);
+PG_FUNCTION_INFO_V1(testprs_getlexeme);
+PG_FUNCTION_INFO_V1(testprs_end);
+PG_FUNCTION_INFO_V1(testprs_lextype);
+
+Datum
+testprs_start(PG_FUNCTION_ARGS)
+{
+ ParserState *pst = (ParserState *) palloc0(sizeof(ParserState));
+
+ pst->buffer = (char *) PG_GETARG_POINTER(0);
+ pst->len = PG_GETARG_INT32(1);
+ pst->pos = 0;
+
+ PG_RETURN_POINTER(pst);
+}
+
+Datum
+testprs_getlexeme(PG_FUNCTION_ARGS)
+{
+ ParserState *pst = (ParserState *) PG_GETARG_POINTER(0);
+ char **t = (char **) PG_GETARG_POINTER(1);
+ int *tlen = (int *) PG_GETARG_POINTER(2);
+ int startpos = pst->pos;
+ int type;
+
+ *t = pst->buffer + pst->pos;
+
+ if (pst->pos < pst->len &&
+ (pst->buffer)[pst->pos] == ' ')
+ {
+ /* blank type */
+ type = 12;
+ /* go to the next non-space character */
+ while (pst->pos < pst->len &&
+ (pst->buffer)[pst->pos] == ' ')
+ (pst->pos)++;
+ }
+ else
+ {
+ /* word type */
+ type = 3;
+ /* go to the next space character */
+ while (pst->pos < pst->len &&
+ (pst->buffer)[pst->pos] != ' ')
+ (pst->pos)++;
+ }
+
+ *tlen = pst->pos - startpos;
+
+ /* we are finished if (*tlen == 0) */
+ if (*tlen == 0)
+ type = 0;
+
+ PG_RETURN_INT32(type);
+}
+
+Datum
+testprs_end(PG_FUNCTION_ARGS)
+{
+ ParserState *pst = (ParserState *) PG_GETARG_POINTER(0);
+
+ pfree(pst);
+ PG_RETURN_VOID();
+}
+
+Datum
+testprs_lextype(PG_FUNCTION_ARGS)
+{
+ /*
+ * Remarks: - we have to return the blanks for headline reason - we use
+ * the same lexids like Teodor in the default word parser; in this way we
+ * can reuse the headline function of the default word parser.
+ */
+ LexDescr *descr = (LexDescr *) palloc(sizeof(LexDescr) * (2 + 1));
+
+ /* there are only two types in this parser */
+ descr[0].lexid = 3;
+ descr[0].alias = pstrdup("word");
+ descr[0].descr = pstrdup("Word");
+ descr[1].lexid = 12;
+ descr[1].alias = pstrdup("blank");
+ descr[1].descr = pstrdup("Space symbols");
+ descr[2].lexid = 0;
+
+ PG_RETURN_POINTER(descr);
+}
diff --git a/src/test/modules/test_parser/test_parser.control b/src/test/modules/test_parser/test_parser.control
new file mode 100644
index 0000000..36b26b2
--- /dev/null
+++ b/src/test/modules/test_parser/test_parser.control
@@ -0,0 +1,5 @@
+# test_parser extension
+comment = 'example of a custom parser for full-text search'
+default_version = '1.0'
+module_pathname = '$libdir/test_parser'
+relocatable = true