From 3f619478f796eddbba6e39502fe941b285dd97b1 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sat, 4 May 2024 20:00:34 +0200 Subject: Adding upstream version 1:10.11.6. Signed-off-by: Daniel Baumann --- mysql-test/main/func_regexp_pcre.test | 472 ++++++++++++++++++++++++++++++++++ 1 file changed, 472 insertions(+) create mode 100644 mysql-test/main/func_regexp_pcre.test (limited to 'mysql-test/main/func_regexp_pcre.test') diff --git a/mysql-test/main/func_regexp_pcre.test b/mysql-test/main/func_regexp_pcre.test new file mode 100644 index 00000000..e6e356f4 --- /dev/null +++ b/mysql-test/main/func_regexp_pcre.test @@ -0,0 +1,472 @@ +--disable_service_connection +SET NAMES utf8; + +--echo # +--echo # MDEV-4425 REGEXP enhancements +--echo # + +--echo # +--echo # Checking RLIKE +--echo # + +# Checking that à is a single character +SELECT 'à' RLIKE '^.$'; + +# Checking \x{FFFF} syntax and case sensitivity +SELECT 'à' RLIKE '\\x{00E0}'; +SELECT 'À' RLIKE '\\x{00E0}'; +SELECT 'à' RLIKE '\\x{00C0}'; +SELECT 'À' RLIKE '\\x{00C0}'; +SELECT 'à' RLIKE '\\x{00E0}' COLLATE utf8_bin; +SELECT 'À' RLIKE '\\x{00E0}' COLLATE utf8_bin; +SELECT 'à' RLIKE '\\x{00C0}' COLLATE utf8_bin; +SELECT 'À' RLIKE '\\x{00C0}' COLLATE utf8_bin; + +# Checking how (?i) and (?-i) affect case sensitivity +CREATE TABLE t1 (s VARCHAR(10) CHARACTER SET utf8); +INSERT INTO t1 VALUES ('a'),('A'); +CREATE TABLE t2 (p VARCHAR(10) CHARACTER SET utf8); +INSERT INTO t2 VALUES ('a'),('(?i)a'),('(?-i)a'),('A'),('(?i)A'),('(?-i)A'); +SELECT s,p,s RLIKE p, s COLLATE utf8_bin RLIKE p FROM t1,t2 ORDER BY BINARY s, BINARY p; +DROP TABLE t1,t2; + + +# Checking Unicode character classes +CREATE TABLE t1 (ch VARCHAR(22)) CHARACTER SET utf8; +CREATE TABLE t2 (class VARCHAR(32)) CHARACTER SET utf8; +INSERT INTO t1 VALUES ('Я'),('Σ'),('A'),('À'); +INSERT INTO t1 VALUES ('я'),('σ'),('a'),('à'); +INSERT INTO t1 VALUES ('㐗'),('갷'),('ප'); +INSERT INTO t1 VALUES ('1'),('௨'); +INSERT INTO t2 VALUES ('\\p{Cyrillic}'),('\\p{Greek}'),('\\p{Latin}'); +INSERT INTO t2 VALUES ('\\p{Han}'),('\\p{Hangul}'); +INSERT INTO t2 VALUES ('\\p{Sinhala}'), ('\\p{Tamil}'); +INSERT INTO t2 VALUES ('\\p{L}'),('\\p{Ll}'),('\\p{Lu}'),('\\p{L&}'); +INSERT INTO t2 VALUES ('[[:alpha:]]'),('[[:digit:]]'); +SELECT class, ch, ch RLIKE class FROM t1, t2 ORDER BY class, BINARY ch; +DROP TABLE t1, t2; + +# Checking that UCP is disabled by default for binary data +SELECT 0xFF RLIKE '\\w'; +SELECT 0xFF RLIKE '(*UCP)\\w'; + +# newline character +SELECT '\n' RLIKE '(*CR)'; +SELECT '\n' RLIKE '(*LF)'; +SELECT '\n' RLIKE '(*CRLF)'; +SELECT '\n' RLIKE '(*ANYCRLF)'; +SELECT '\n' RLIKE '(*ANY)'; + +SELECT 'a\nb' RLIKE '(*LF)(?m)^a$'; +SELECT 'a\nb' RLIKE '(*CR)(?m)^a$'; +SELECT 'a\nb' RLIKE '(*CRLF)(?m)^a$'; +SELECT 'a\nb' RLIKE '(*ANYCRLF)(?m)^a$'; + +SELECT 'a\rb' RLIKE '(*LF)(?m)^a$'; +SELECT 'a\rb' RLIKE '(*CR)(?m)^a$'; +SELECT 'a\rb' RLIKE '(*CRLF)(?m)^a$'; +SELECT 'a\rb' RLIKE '(*ANYCRLF)(?m)^a$'; + +SELECT 'a\r\nb' RLIKE '(*LF)(?m)^a$'; +SELECT 'a\r\nb' RLIKE '(*CR)(?m)^a$'; +SELECT 'a\r\nb' RLIKE '(*CRLF)(?m)^a$'; +SELECT 'a\r\nb' RLIKE '(*ANYCRLF)(?m)^a$'; + +#backreference +SELECT 'aa' RLIKE '(a)\\g1'; +SELECT 'aa bb' RLIKE '(a)\\g1 (b)\\g2'; + +#repitition +SELECT 'aaaaa' RLIKE 'a{0,5}'; +SELECT 'aaaaa' RLIKE 'a{1,3}'; +SELECT 'aaaaa' RLIKE 'a{0,}'; +SELECT 'aaaaa' RLIKE 'a{10,20}'; + +#Recursion +SELECT 'aabb' RLIKE 'a(?R)?b'; +SELECT 'aabb' RLIKE 'aa(?R)?bb'; + +#subroutine +#SELECT 'abbbc' RLIKE '(a(b|(?1))*c)'; +#SELECT 'abca' RLIKE '([abc])(?1){3}'; + +#Atomic grouping +SELECT 'abcc' RLIKE 'a(?>bc|b)c'; +SELECT 'abc' RLIKE 'a(?>bc|b)c'; + +#lookahead - negative +SELECT 'ab' RLIKE 'a(?!b)'; +SELECT 'ac' RLIKE 'a(?!b)'; + +#lookahead - positive +SELECT 'ab' RLIKE 'a(?=b)'; +SELECT 'ac' RLIKE 'a(?=b)'; + +#lookbehind - negative +SELECT 'ab' RLIKE '(?a)(?P=pattern)'; +SELECT 'aba' RLIKE '(?Pa)b(?P=pattern)'; + +#comments +SELECT 'a' RLIKE 'a(?#comment)'; +SELECT 'aa' RLIKE 'a(?#comment)a'; +SELECT 'aba' RLIKE 'a(?#b)a'; + +#ungreedy maching +#SELECT 'ddd cc eee' RLIKE '<.+?>'; + +#Extended character classes +SELECT 'aaa' RLIKE '\\W\\W\\W'; +SELECT '%' RLIKE '\\W'; +SELECT '%a$' RLIKE '\\W.\\W'; + +SELECT '123' RLIKE '\\d\\d\\d'; +SELECT 'aaa' RLIKE '\\d\\d\\d'; +SELECT '1a3' RLIKE '\\d.\\d'; +SELECT 'a1b' RLIKE '\\d.\\d'; + +SELECT '8' RLIKE '\\D'; +SELECT 'a' RLIKE '\\D'; +SELECT '%' RLIKE '\\D'; +SELECT 'a1' RLIKE '\\D\\d'; +SELECT 'a1' RLIKE '\\d\\D'; + +SELECT '\t' RLIKE '\\s'; +SELECT '\r' RLIKE '\\s'; +SELECT '\n' RLIKE '\\s'; +SELECT '\v' RLIKE '\\s'; + +SELECT 'a' RLIKE '\\S'; +SELECT '1' RLIKE '\\S'; +SELECT '!' RLIKE '\\S'; +SELECT '.' RLIKE '\\S'; + +# checking 0x00 bytes +# Bug#70470 REGEXP fails to find matches after NUL character +SELECT 'abc\0def' REGEXP 'def'; +SELECT 'abc\0def' REGEXP 'abc\\x{00}def'; +SELECT HEX(REGEXP_SUBSTR('abc\0def','abc\\x{00}def')); + + +--echo # +--echo # Checking REGEXP_REPLACE +--echo # + +# Check data type +CREATE TABLE t1 AS SELECT REGEXP_REPLACE('abc','b','x'); +SHOW CREATE TABLE t1; +DROP TABLE t1; + +# Check print() +EXPLAIN EXTENDED SELECT REGEXP_REPLACE('abc','b','x'); + +# Check decimals +SET STATEMENT sql_mode = 'NO_ENGINE_SUBSTITUTION' FOR +CREATE TABLE t1 AS SELECT REGEXP_REPLACE('abc','b','x')+0; +SHOW CREATE TABLE t1; +DROP TABLE t1; + +# Return NULL if any of the arguments are NULL +SELECT REGEXP_REPLACE(NULL,'b','c'); +SELECT REGEXP_REPLACE('a',NULL,'c'); +SELECT REGEXP_REPLACE('a','b',NULL); + +# Return the original string if no match +SELECT REGEXP_REPLACE('a','x','b'); + +# Return the original string for an empty pattern +SELECT REGEXP_REPLACE('a','','b'); + +# Check that replace stops on the first empty match +# 'a5b' matches the pattern and '5' is replaced to 'x' +# then 'ab' matches the pattern, but the match '5*' is empty, +# so replacing stops here. +SELECT REGEXP_REPLACE('a5b ab a5b','(?<=a)5*(?=b)','x'); + +# A modified version of the previous example, +# to check that all matches are replaced if no empty match is met. +SELECT REGEXP_REPLACE('a5b a5b a5b','(?<=a)5*(?=b)','x'); + + +# Check that case sensitiviry respects the collation +SELECT REGEXP_REPLACE('A','a','b'); +SELECT REGEXP_REPLACE('a','A','b'); +SELECT REGEXP_REPLACE('A' COLLATE utf8_bin,'a','b'); +SELECT REGEXP_REPLACE('a' COLLATE utf8_bin,'A','b'); + +# Pattern references in the "replace" string +SELECT REGEXP_REPLACE('James Bond', '(.*) (.*)', '\\2, \\1 \\2'); + +# Checking with UTF8 +SELECT REGEXP_REPLACE('абвгд','в','ц'); + +# Check that it does not treat binary strings as UTF8 +SELECT REGEXP_REPLACE('г',0xB3,0xB4); + +# Check that it replaces all matches by default +SELECT REGEXP_REPLACE('aaaa','a','b'); + +# Replace all matches except the first letter +SELECT REGEXP_REPLACE('aaaa','(?<=.)a','b'); + +# Replace all matches except the last letter +SELECT REGEXP_REPLACE('aaaa','a(?=.)','b'); + +# Replace all matches except the first and the last letter +SELECT REGEXP_REPLACE('aaaa','(?<=.)a(?=.)','b'); + +# newline character +SELECT REGEXP_REPLACE('a\nb','(*LF)(?m)^a$','c'); +SELECT REGEXP_REPLACE('a\nb','(*CR)(?m)^a$','c'); +SELECT REGEXP_REPLACE('a\nb','(*CRLF)(?m)^a$','c'); +SELECT REGEXP_REPLACE('a\nb','(*ANYCRLF)(?m)^a$','c'); + +SELECT REGEXP_REPLACE('a\rb','(*LF)(?m)^a$','c'); +SELECT REGEXP_REPLACE('a\rb','(*CR)(?m)^a$','c'); +SELECT REGEXP_REPLACE('a\rb','(*CRLF)(?m)^a$','c'); +SELECT REGEXP_REPLACE('a\rb','(*ANYCRLF)(?m)^a$','c'); + +SELECT REGEXP_REPLACE('a\r\nb','(*LF)(?m)^a$','c'); +SELECT REGEXP_REPLACE('a\r\nb','(*CR)(?m)^a$','c'); +SELECT REGEXP_REPLACE('a\r\nb','(*CRLF)(?m)^a$','c'); +SELECT REGEXP_REPLACE('a\r\nb','(*ANYCRLF)(?m)^a$','c'); + +#backreference +SELECT REGEXP_REPLACE('aa','(a)\\g1','b'); +SELECT REGEXP_REPLACE('aa bb','(a)\\g1 (b)\\g2','c'); + +#repitition +SELECT REGEXP_REPLACE('aaaaa','a{1,3}','b'); +SELECT REGEXP_REPLACE('aaaaa','a{10,20}','b'); + +#Recursion +SELECT REGEXP_REPLACE('daabbd','a(?R)?b','c'); +SELECT REGEXP_REPLACE('daabbd','aa(?R)?bb','c'); + +#Atomic grouping +SELECT REGEXP_REPLACE('dabccd','a(?>bc|b)c','e'); +SELECT REGEXP_REPLACE('dabcd','a(?>bc|b)c','e'); + +#lookahead - negative +SELECT REGEXP_REPLACE('ab','a(?!b)','e'); +SELECT REGEXP_REPLACE('ac','a(?!b)','e'); + +#lookahead - positive +SELECT REGEXP_REPLACE('ab','a(?=b)','e'); +SELECT REGEXP_REPLACE('ac','a(?=b)','e'); + +#lookbehind - negative +SELECT REGEXP_REPLACE('ab','(?a)(?P=pattern)','b'); +SELECT REGEXP_REPLACE('aba','(?Pa)b(?P=pattern)','c'); + +#comments +SELECT REGEXP_REPLACE('a','a(?#comment)','e'); +SELECT REGEXP_REPLACE('aa','a(?#comment)a','e'); +SELECT REGEXP_REPLACE('aba','a(?#b)a','e'); + +#ungreedy maching +SELECT REGEXP_REPLACE('dddcceee','<.+?>','*'); + +#Extended character classes +SELECT REGEXP_REPLACE('aaa','\\W\\W\\W','e'); +SELECT REGEXP_REPLACE('aaa','\\w\\w\\w','e'); +SELECT REGEXP_REPLACE('%','\\W','e'); +SELECT REGEXP_REPLACE('%a$','\\W.\\W','e'); +SELECT REGEXP_REPLACE('%a$','\\W\\w\\W','e'); + +SELECT REGEXP_REPLACE('123','\\d\\d\\d\\d\\d\\d','e'); +SELECT REGEXP_REPLACE('123','\\d\\d\\d','e'); +SELECT REGEXP_REPLACE('aaa','\\d\\d\\d','e'); +SELECT REGEXP_REPLACE('1a3','\\d.\\d\\d.\\d','e'); +SELECT REGEXP_REPLACE('1a3','\\d.\\d','e'); +SELECT REGEXP_REPLACE('a1b','\\d.\\d','e'); + +SELECT REGEXP_REPLACE('8','\\D','e'); +SELECT REGEXP_REPLACE('a','\\D','e'); +SELECT REGEXP_REPLACE('%','\\D','e'); +SELECT REGEXP_REPLACE('a1','\\D\\d','e'); +SELECT REGEXP_REPLACE('a1','\\d\\D','e'); + +SELECT REGEXP_REPLACE('\t','\\s','e'); +SELECT REGEXP_REPLACE('\r','\\s','e'); +SELECT REGEXP_REPLACE('\n','\\s','e'); + +SELECT REGEXP_REPLACE('a','\\S','e'); +SELECT REGEXP_REPLACE('1','\\S','e'); +SELECT REGEXP_REPLACE('!','\\S','e'); +SELECT REGEXP_REPLACE('.','\\S','e'); + +--echo # +--echo # Checking REGEXP_INSTR +--echo # +SELECT REGEXP_INSTR('abcd','X'); +SELECT REGEXP_INSTR('abcd','a'); +SELECT REGEXP_INSTR('abcd','b'); +SELECT REGEXP_INSTR('abcd','c'); +SELECT REGEXP_INSTR('abcd','d'); +SELECT REGEXP_INSTR('aaaa','(?<=a)a'); + +SELECT REGEXP_INSTR('вася','в'); +SELECT REGEXP_INSTR('вася','а'); +SELECT REGEXP_INSTR('вася','с'); +SELECT REGEXP_INSTR('вася','я'); +#enable after fix MDEV-27871 +--disable_view_protocol +SELECT REGEXP_INSTR(CONVERT('вася' USING koi8r), CONVERT('в' USING koi8r)); +SELECT REGEXP_INSTR(CONVERT('вася' USING koi8r), CONVERT('а' USING koi8r)); +SELECT REGEXP_INSTR(CONVERT('вася' USING koi8r), CONVERT('с' USING koi8r)); +SELECT REGEXP_INSTR(CONVERT('вася' USING koi8r), CONVERT('я' USING koi8r)); +--enable_view_protocol + +--echo # +--echo # Checking REGEXP_SUBSTR +--echo # + +# Check data type +CREATE TABLE t1 AS SELECT REGEXP_SUBSTR('abc','b'); +SHOW CREATE TABLE t1; +DROP TABLE t1; + +# Check print() +EXPLAIN EXTENDED SELECT REGEXP_SUBSTR('abc','b'); + +# Check decimals +SET STATEMENT sql_mode = 'NO_ENGINE_SUBSTITUTION' FOR +CREATE TABLE t1 AS SELECT REGEXP_SUBSTR('abc','b')+0; +SHOW CREATE TABLE t1; +DROP TABLE t1; + +#enable after fix MDEV-27871 +--disable_view_protocol +SELECT REGEXP_SUBSTR('See https://mariadb.org/en/foundation/ for details', 'https?://[^/]*'); +--enable_view_protocol + +--echo # +--echo # MDEV-6027 RLIKE: "." no longer matching new line +--echo # +SELECT 'cat and\ndog' RLIKE 'cat.*dog'; +SELECT 'cat and\r\ndog' RLIKE 'cat.*dog'; +SELECT 'a\nb' RLIKE 'a.b'; +SELECT 'a\nb' RLIKE '(?-s)a.b'; +SET default_regex_flags='DOTALL'; +SELECT @@default_regex_flags; +SELECT 'cat and\ndog' RLIKE 'cat.*dog'; +SELECT 'cat and\r\ndog' RLIKE 'cat.*dog'; +SELECT 'a\nb' RLIKE 'a.b'; +SELECT 'a\nb' RLIKE '(?-s)a.b'; +SET default_regex_flags=DEFAULT; + +#enable after fix MDEV-27871 +--disable_view_protocol +# note that old pcre2 reports a different offset +--replace_result 29 30 +--error ER_REGEXP_ERROR +SELECT REGEXP_SUBSTR('Monday Mon','^((?Mon|Fri|Sun)day|(?Tue)sday).*(?P=DN)$'); +SET default_regex_flags='DUPNAMES'; +SELECT REGEXP_SUBSTR('Monday Mon','^((?Mon|Fri|Sun)day|(?Tue)sday).*(?P=DN)$'); +SELECT REGEXP_SUBSTR('Tuesday Tue','^((?Mon|Fri|Sun)day|(?Tue)sday).*(?P=DN)$'); +--enable_view_protocol +SET default_regex_flags=DEFAULT; + +SELECT 'AB' RLIKE 'A B'; +SELECT 'AB' RLIKE 'A# this is a comment\nB'; +SET default_regex_flags='EXTENDED'; +SELECT 'AB' RLIKE 'A B'; +SELECT 'AB' RLIKE 'A# this is a comment\nB'; +SET default_regex_flags=DEFAULT; + +--error ER_REGEXP_ERROR +SELECT 'Aq' RLIKE 'A\\q'; + +#double warning for view protocol +--disable_view_protocol +SET default_regex_flags='EXTRA'; +SELECT 'A' RLIKE 'B'; +--enable_view_protocol + +SET default_regex_flags=DEFAULT; + +SELECT 'a\nb\nc' RLIKE '^b$'; +SET default_regex_flags='MULTILINE'; +SELECT 'a\nb\nc' RLIKE '^b$'; +SET default_regex_flags=DEFAULT; + +SELECT REGEXP_SUBSTR('abc','.+'); +SELECT REGEXP_REPLACE('abc','^(.*)(.*)$','\\1/\\2'); +SET default_regex_flags='UNGREEDY'; +SELECT REGEXP_SUBSTR('abc','.+'); +SELECT REGEXP_REPLACE('abc','^(.*)(.*)$','\\1/\\2'); +SET default_regex_flags=DEFAULT; + +--echo # +--echo # MDEV-6965 non-captured group \2 in regexp_replace +--echo # +#enable after fix MDEV-27871 +--disable_view_protocol +SELECT REGEXP_REPLACE('1 foo and bar', '(\\d+) foo and (\\d+ )?bar', '\\1 this and \\2that'); +--enable_view_protocol + +--echo # +--echo # MDEV-8102 REGEXP function fails to match hex values when expression is stored as a variable +--echo # + +--echo # Testing a warning +SET NAMES latin1; +SET @regCheck= '\\xE0\\x01'; +SELECT 0xE001 REGEXP @regCheck; + +--echo # Testing workaround N1: This makes the pattern to be a binary string: +SET NAMES latin1; +SET @regCheck= X'E001'; +SELECT 0xE001 REGEXP @regCheck; + +--echo # Testing workaround N2: This also makes the pattern to be a binary string, using a different syntax: +SET NAMES latin1; +SET @regCheck= _binary '\\xE0\\x01'; +SELECT 0xE001 REGEXP @regCheck; + +--echo # Testing workarond N3: This makes derivation of the subject string stronger (IMLICIT instead of COERCIBLE) +SET NAMES latin1; +SET @regCheck= '\\xE0\\x01'; +SELECT CAST(0xE001 AS BINARY) REGEXP @regCheck; + +--echo # MDEV-12420: Testing recursion overflow +# pcre2 < 10.30 relies on stack, we limit recursion depth, REGEXP fails +# newer pcre2 uses heap, no need (and no way) to limit recursion, REGEXP succeeds +# we just test that it doesn't crash with a stack overflow +--disable_warnings +--disable_result_log +SELECT 1 FROM dual WHERE ('Alpha,Bravo,Charlie,Delta,Echo,Foxtrot,StrataCentral,Golf,Hotel,India,Juliet,Kilo,Lima,Mike,StrataL3,November,Oscar,StrataL2,Sand,P3,P4SwitchTest,Arsys,Poppa,ExtensionMgr,Arp,Quebec,Romeo,StrataApiV2,PtReyes,Sierra,SandAcl,Arrow,Artools,BridgeTest,Tango,SandT,PAlaska,Namespace,Agent,Qos,PatchPanel,ProjectReport,Ark,Gimp,Agent,SliceAgent,Arnet,Bgp,Ale,Tommy,Central,AsicPktTestLib,Hsc,SandL3,Abuild,Pca9555,Standby,ControllerDut,CalSys,SandLib,Sb820,PointV2,BfnLib,Evpn,BfnSdk,Sflow,ManagementActive,AutoTest,GatedTest,Bgp,Sand,xinetd,BfnAgentLib,bf-utils,Hello,BfnState,Eos,Artest,Qos,Scd,ThermoMgr,Uniform,EosUtils,Eb,FanController,Central,BfnL3,BfnL2,tcp_wrappers,Victor,Environment,Route,Failover,Whiskey,Xray,Gimp,BfnFixed,Strata,SoCal,XApi,Msrp,XpProfile,tcpdump,PatchPanel,ArosTest,FhTest,Arbus,XpAcl,MacConc,XpApi,telnet,QosTest,Alpha2,BfnVlan,Stp,VxlanControllerTest,MplsAgent,Bravo2,Lanz,BfnMbb,Intf,XCtrl,Unicast,SandTunnel,L3Unicast,Ipsec,MplsTest,Rsvp,EthIntf,StageMgr,Sol,MplsUtils,Nat,Ira,P4NamespaceDut,Counters,Charlie2,Aqlc,Mlag,Power,OpenFlow,Lag,RestApi,BfdTest,strongs,Sfa,CEosUtils,Adt746,MaintenanceMode,MlagDut,EosImage,IpEth,MultiProtocol,Launcher,Max3179,Snmp,Acl,IpEthTest,PhyEee,bf-syslibs,tacc,XpL2,p4-ar-switch,p4-bf-switch,LdpTest,BfnPhy,Mirroring,Phy6,Ptp' REGEXP '^((?!\b(Strata|StrataApi|StrataApiV2)\b).)*$'); +--enable_result_log +--enable_warnings +#enable after fix MDEV-27871 +--disable_view_protocol +--enable_view_protocol + +# +# MDEV-12942 REGEXP_INSTR returns 1 when using brackets +# +SELECT REGEXP_INSTR('a_kollision', 'oll'); +SELECT REGEXP_INSTR('a_kollision', '(oll)'); +SELECT REGEXP_INSTR('a_kollision', 'o([lm])\\1'); + +# +# MDEV-12939 A query crashes MariaDB in Item_func_regex::cleanup +# +SELECT a FROM (SELECT "aa" a) t WHERE a REGEXP '[0-9]'; +--enable_service_connection -- cgit v1.2.3