From 5e45211a64149b3c659b90ff2de6fa982a5a93ed Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sat, 4 May 2024 14:17:33 +0200 Subject: Adding upstream version 15.5. Signed-off-by: Daniel Baumann --- doc/src/sgml/html/textsearch-tables.html | 139 +++++++++++++++++++++++++++++++ 1 file changed, 139 insertions(+) create mode 100644 doc/src/sgml/html/textsearch-tables.html (limited to 'doc/src/sgml/html/textsearch-tables.html') diff --git a/doc/src/sgml/html/textsearch-tables.html b/doc/src/sgml/html/textsearch-tables.html new file mode 100644 index 0000000..3cde56a --- /dev/null +++ b/doc/src/sgml/html/textsearch-tables.html @@ -0,0 +1,139 @@ + +12.2. Tables and Indexes

12.2. Tables and Indexes

+ The examples in the previous section illustrated full text matching using + simple constant strings. This section shows how to search table data, + optionally using indexes. +

12.2.2. Creating Indexes

+ We can create a GIN index (Section 12.9) to speed up text searches: + +

+CREATE INDEX pgweb_idx ON pgweb USING GIN (to_tsvector('english', body));
+

+ + Notice that the 2-argument version of to_tsvector is + used. Only text search functions that specify a configuration name can + be used in expression indexes (Section 11.7). + This is because the index contents must be unaffected by default_text_search_config. If they were affected, the + index contents might be inconsistent because different entries could + contain tsvectors that were created with different text search + configurations, and there would be no way to guess which was which. It + would be impossible to dump and restore such an index correctly. +

+ Because the two-argument version of to_tsvector was + used in the index above, only a query reference that uses the 2-argument + version of to_tsvector with the same configuration + name will use that index. That is, WHERE + to_tsvector('english', body) @@ 'a & b' can use the index, + but WHERE to_tsvector(body) @@ 'a & b' cannot. + This ensures that an index will be used only with the same configuration + used to create the index entries. +

+ It is possible to set up more complex expression indexes wherein the + configuration name is specified by another column, e.g.: + +

+CREATE INDEX pgweb_idx ON pgweb USING GIN (to_tsvector(config_name, body));
+

+ + where config_name is a column in the pgweb + table. This allows mixed configurations in the same index while + recording which configuration was used for each index entry. This + would be useful, for example, if the document collection contained + documents in different languages. Again, + queries that are meant to use the index must be phrased to match, e.g., + WHERE to_tsvector(config_name, body) @@ 'a & b'. +

+ Indexes can even concatenate columns: + +

+CREATE INDEX pgweb_idx ON pgweb USING GIN (to_tsvector('english', title || ' ' || body));
+

+

+ Another approach is to create a separate tsvector column + to hold the output of to_tsvector. To keep this + column automatically up to date with its source data, use a stored + generated column. This example is a + concatenation of title and body, + using coalesce to ensure that one field will still be + indexed when the other is NULL: + +

+ALTER TABLE pgweb
+    ADD COLUMN textsearchable_index_col tsvector
+               GENERATED ALWAYS AS (to_tsvector('english', coalesce(title, '') || ' ' || coalesce(body, ''))) STORED;
+

+ + Then we create a GIN index to speed up the search: + +

+CREATE INDEX textsearch_idx ON pgweb USING GIN (textsearchable_index_col);
+

+ + Now we are ready to perform a fast full text search: + +

+SELECT title
+FROM pgweb
+WHERE textsearchable_index_col @@ to_tsquery('create & table')
+ORDER BY last_mod_date DESC
+LIMIT 10;
+

+

+ One advantage of the separate-column approach over an expression index + is that it is not necessary to explicitly specify the text search + configuration in queries in order to make use of the index. As shown + in the example above, the query can depend on + default_text_search_config. Another advantage is that + searches will be faster, since it will not be necessary to redo the + to_tsvector calls to verify index matches. (This is more + important when using a GiST index than a GIN index; see Section 12.9.) The expression-index approach is + simpler to set up, however, and it requires less disk space since the + tsvector representation is not stored explicitly. +

\ No newline at end of file -- cgit v1.2.3