summaryrefslogtreecommitdiffstats
path: root/src/test/regress/sql/regex.linux.utf8.sql
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-04 12:15:05 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-04 12:15:05 +0000
commit46651ce6fe013220ed397add242004d764fc0153 (patch)
tree6e5299f990f88e60174a1d3ae6e48eedd2688b2b /src/test/regress/sql/regex.linux.utf8.sql
parentInitial commit. (diff)
downloadpostgresql-14-46651ce6fe013220ed397add242004d764fc0153.tar.xz
postgresql-14-46651ce6fe013220ed397add242004d764fc0153.zip
Adding upstream version 14.5.upstream/14.5upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/test/regress/sql/regex.linux.utf8.sql')
-rw-r--r--src/test/regress/sql/regex.linux.utf8.sql46
1 files changed, 46 insertions, 0 deletions
diff --git a/src/test/regress/sql/regex.linux.utf8.sql b/src/test/regress/sql/regex.linux.utf8.sql
new file mode 100644
index 0000000..4577811
--- /dev/null
+++ b/src/test/regress/sql/regex.linux.utf8.sql
@@ -0,0 +1,46 @@
+/*
+ * This test is for Linux/glibc systems and others that implement proper
+ * locale classification of Unicode characters with high code values.
+ * It must be run in a database with UTF8 encoding and a Unicode-aware locale.
+ */
+
+SET client_encoding TO UTF8;
+
+--
+-- Test the "high colormap" logic with single characters and ranges that
+-- exceed the MAX_SIMPLE_CHR cutoff, here assumed to be less than U+2000.
+--
+
+-- trivial cases:
+SELECT 'aⓐ' ~ U&'a\24D0' AS t;
+SELECT 'aⓐ' ~ U&'a\24D1' AS f;
+SELECT 'aⓕ' ~ 'a[ⓐ-ⓩ]' AS t;
+SELECT 'aⒻ' ~ 'a[ⓐ-ⓩ]' AS f;
+-- cases requiring splitting of ranges:
+SELECT 'aⓕⓕ' ~ 'aⓕ[ⓐ-ⓩ]' AS t;
+SELECT 'aⓕⓐ' ~ 'aⓕ[ⓐ-ⓩ]' AS t;
+SELECT 'aⓐⓕ' ~ 'aⓕ[ⓐ-ⓩ]' AS f;
+SELECT 'aⓕⓕ' ~ 'a[ⓐ-ⓩ]ⓕ' AS t;
+SELECT 'aⓕⓐ' ~ 'a[ⓐ-ⓩ]ⓕ' AS f;
+SELECT 'aⓐⓕ' ~ 'a[ⓐ-ⓩ]ⓕ' AS t;
+SELECT 'aⒶⓜ' ~ 'a[Ⓐ-ⓜ][ⓜ-ⓩ]' AS t;
+SELECT 'aⓜⓜ' ~ 'a[Ⓐ-ⓜ][ⓜ-ⓩ]' AS t;
+SELECT 'aⓜⓩ' ~ 'a[Ⓐ-ⓜ][ⓜ-ⓩ]' AS t;
+SELECT 'aⓩⓩ' ~ 'a[Ⓐ-ⓜ][ⓜ-ⓩ]' AS f;
+SELECT 'aⓜ⓪' ~ 'a[Ⓐ-ⓜ][ⓜ-ⓩ]' AS f;
+SELECT 'a0' ~ 'a[a-ⓩ]' AS f;
+SELECT 'aq' ~ 'a[a-ⓩ]' AS t;
+SELECT 'aⓜ' ~ 'a[a-ⓩ]' AS t;
+SELECT 'a⓪' ~ 'a[a-ⓩ]' AS f;
+
+-- Locale-dependent character classes
+
+SELECT 'aⒶⓜ⓪' ~ '[[:alpha:]][[:alpha:]][[:alpha:]][[:graph:]]' AS t;
+SELECT 'aⒶⓜ⓪' ~ '[[:alpha:]][[:alpha:]][[:alpha:]][[:alpha:]]' AS f;
+
+-- Locale-dependent character classes with high ranges
+
+SELECT 'aⒶⓜ⓪' ~ '[a-z][[:alpha:]][ⓐ-ⓩ][[:graph:]]' AS t;
+SELECT 'aⓜⒶ⓪' ~ '[a-z][[:alpha:]][ⓐ-ⓩ][[:graph:]]' AS f;
+SELECT 'aⓜⒶ⓪' ~ '[a-z][ⓐ-ⓩ][[:alpha:]][[:graph:]]' AS t;
+SELECT 'aⒶⓜ⓪' ~ '[a-z][ⓐ-ⓩ][[:alpha:]][[:graph:]]' AS f;