summaryrefslogtreecommitdiffstats
path: root/src/grep/tests/pcre
diff options
context:
space:
mode:
Diffstat (limited to '')
-rwxr-xr-xsrc/grep/tests/pcre22
-rwxr-xr-xsrc/grep/tests/pcre-abort20
-rwxr-xr-xsrc/grep/tests/pcre-context36
-rwxr-xr-xsrc/grep/tests/pcre-count28
-rwxr-xr-xsrc/grep/tests/pcre-infloop33
-rwxr-xr-xsrc/grep/tests/pcre-invalid-utf8-infloop26
-rwxr-xr-xsrc/grep/tests/pcre-invalid-utf8-input31
-rwxr-xr-xsrc/grep/tests/pcre-jitstack63
-rwxr-xr-xsrc/grep/tests/pcre-o17
-rwxr-xr-xsrc/grep/tests/pcre-utf840
-rwxr-xr-xsrc/grep/tests/pcre-w31
-rwxr-xr-xsrc/grep/tests/pcre-wx-backref28
-rwxr-xr-xsrc/grep/tests/pcre-z28
13 files changed, 403 insertions, 0 deletions
diff --git a/src/grep/tests/pcre b/src/grep/tests/pcre
new file mode 100755
index 0000000..449156d
--- /dev/null
+++ b/src/grep/tests/pcre
@@ -0,0 +1,22 @@
+#! /bin/sh
+# Simple PCRE tests.
+#
+# Copyright (C) 2001, 2006, 2009-2021 Free Software Foundation, Inc.
+#
+# Copying and distribution of this file, with or without modification,
+# are permitted in any medium without royalty provided the copyright
+# notice and this notice are preserved.
+
+. "${srcdir=.}/init.sh"; path_prepend_ ../src
+require_pcre_
+
+fail=0
+
+echo | grep -P '\s*$' || fail=1
+echo | grep -zP '\s$' || fail=1
+echo '.ab' | returns_ 1 grep -Pwx ab || fail=1
+echo x | grep -Pz '[^a]' || fail=1
+printf 'x\n\0' | returns_ 1 grep -zP 'x$' || fail=1
+printf 'a\nb\0' | grep -zxP a && fail=1
+
+Exit $fail
diff --git a/src/grep/tests/pcre-abort b/src/grep/tests/pcre-abort
new file mode 100755
index 0000000..51cee25
--- /dev/null
+++ b/src/grep/tests/pcre-abort
@@ -0,0 +1,20 @@
+#! /bin/sh
+# Show that grep handles PCRE's PCRE_ERROR_MATCHLIMIT.
+# In grep-2.8, it would abort.
+#
+# Copyright (C) 2011-2021 Free Software Foundation, Inc.
+#
+# Copying and distribution of this file, with or without modification,
+# are permitted in any medium without royalty provided the copyright
+# notice and this notice are preserved.
+
+. "${srcdir=.}/init.sh"; path_prepend_ ../src
+require_pcre_
+
+fail=0
+
+echo aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaab > in || framework_failure_
+returns_ 2 grep -P '((a+)*)+$' in > out || fail=1
+compare /dev/null out || fail=1
+
+Exit $fail
diff --git a/src/grep/tests/pcre-context b/src/grep/tests/pcre-context
new file mode 100755
index 0000000..b910a20
--- /dev/null
+++ b/src/grep/tests/pcre-context
@@ -0,0 +1,36 @@
+#!/bin/sh
+# Test Perl regex with context
+. "${srcdir=.}/init.sh"; path_prepend_ ../src
+require_pcre_
+
+cat >in <<'EOF'
+Preceded by 0 empty lines.
+
+Preceded by 1 empty line.
+
+
+Preceded by 2 empty lines.
+
+
+
+Preceded by 3 empty lines.
+
+
+
+
+Preceded by 4 empty lines.
+
+EOF
+test $? -eq 0 || framework_failure_
+
+printf '%s\0' \
+ 'Preceded by 2 empty lines.' \
+ 'Preceded by 3 empty lines.' \
+ 'Preceded by 4 empty lines.' >exp || framework_failure_
+
+fail=0
+
+grep -Pzo '(?<=\n\n\n).*' in >out || fail_ 'grep -Pzo failed'
+compare exp out || fail=1
+
+Exit $fail
diff --git a/src/grep/tests/pcre-count b/src/grep/tests/pcre-count
new file mode 100755
index 0000000..9eda54b
--- /dev/null
+++ b/src/grep/tests/pcre-count
@@ -0,0 +1,28 @@
+#! /bin/sh
+# grep -P / grep -Pc are inconsistent results
+# This bug affected grep versions 2.21 through 2.22.
+#
+# Copyright (C) 2015-2021 Free Software Foundation, Inc.
+#
+# Copying and distribution of this file, with or without modification,
+# are permitted in any medium without royalty provided the copyright
+# notice and this notice are preserved.
+
+. "${srcdir=.}/init.sh"; path_prepend_ ../src
+require_pcre_
+
+fail=0
+
+printf 'a\n%032768d\nb\0\n%032768d\na\n' 0 0 > in || framework_failure_
+
+# grep will discover that the input is a binary file sooner if the
+# page size is larger, so allow for either possible output.
+printf 'a\n' >exp1a || framework_failure_
+LC_ALL=C grep -P 'a' in >out || fail=1
+compare exp1a out || compare /dev/null out || fail=1
+
+printf '2\n' >exp2 || framework_failure_
+LC_ALL=C grep -Pc 'a' in >out || fail=1
+compare exp2 out || fail=1
+
+Exit $fail
diff --git a/src/grep/tests/pcre-infloop b/src/grep/tests/pcre-infloop
new file mode 100755
index 0000000..a4c7cac
--- /dev/null
+++ b/src/grep/tests/pcre-infloop
@@ -0,0 +1,33 @@
+#!/bin/sh
+# With some versions of libpcre, apparently including 8.35,
+# the following would trigger an infinite loop in its match function.
+
+# Copyright 2014-2021 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+
+. "${srcdir=.}/init.sh"; path_prepend_ ../src
+require_timeout_
+require_en_utf8_locale_
+require_compiled_in_MB_support
+LC_ALL=en_US.UTF-8 require_pcre_
+
+printf 'a\201b\r' > in || framework_failure_
+
+fail=0
+
+returns_ 1 env LC_ALL=en_US.UTF-8 timeout 10 grep -P 'a.?..b' in \
+ || fail_ "libpcre's match function appears to infloop"
+
+Exit $fail
diff --git a/src/grep/tests/pcre-invalid-utf8-infloop b/src/grep/tests/pcre-invalid-utf8-infloop
new file mode 100755
index 0000000..45b5ee1
--- /dev/null
+++ b/src/grep/tests/pcre-invalid-utf8-infloop
@@ -0,0 +1,26 @@
+#! /bin/sh
+# Ensure that grep -oaP doesn't infloop for invalid multi-byte input
+#
+# Copyright (C) 2015-2021 Free Software Foundation, Inc.
+#
+# Copying and distribution of this file, with or without modification,
+# are permitted in any medium without royalty provided the copyright
+# notice and this notice are preserved.
+
+. "${srcdir=.}/init.sh"; path_prepend_ ../src
+require_timeout_
+require_en_utf8_locale_
+require_compiled_in_MB_support
+LC_ALL=en_US.UTF-8 require_pcre_
+
+fail=0
+
+printf '\201_\0' > in || framework_failure_
+printf '_\n' > exp || framework_failure_
+
+LC_ALL=en_US.UTF-8 timeout 10 grep -aoP _ in > out 2> err || fail=1
+
+compare exp out || fail=1
+compare /dev/null err || fail=1
+
+Exit $fail
diff --git a/src/grep/tests/pcre-invalid-utf8-input b/src/grep/tests/pcre-invalid-utf8-input
new file mode 100755
index 0000000..d1a2920
--- /dev/null
+++ b/src/grep/tests/pcre-invalid-utf8-input
@@ -0,0 +1,31 @@
+#! /bin/sh
+# Ensure that grep -P doesn't abort or infloop for invalid multi-byte input
+#
+# Copyright (C) 2013-2021 Free Software Foundation, Inc.
+#
+# Copying and distribution of this file, with or without modification,
+# are permitted in any medium without royalty provided the copyright
+# notice and this notice are preserved.
+
+. "${srcdir=.}/init.sh"; path_prepend_ ../src
+require_timeout_
+require_en_utf8_locale_
+require_compiled_in_MB_support
+LC_ALL=en_US.UTF-8 require_pcre_
+
+fail=0
+
+printf 'j\202j\nj\nk\202\n' > in || framework_failure_
+
+LC_ALL=en_US.UTF-8 timeout 10 grep -P j in
+test $? -eq 0 || fail=1
+
+LC_ALL=en_US.UTF-8 timeout 10 grep -P 'k$' in
+test $? -eq 1 || fail=1
+
+echo k >exp
+
+LC_ALL=en_US.UTF-8 timeout 10 grep -aoP 'k*' in >out || fail=1
+compare exp out || fail=1
+
+Exit $fail
diff --git a/src/grep/tests/pcre-jitstack b/src/grep/tests/pcre-jitstack
new file mode 100755
index 0000000..2a2b1b9
--- /dev/null
+++ b/src/grep/tests/pcre-jitstack
@@ -0,0 +1,63 @@
+#! /bin/sh
+# Grep 2.21 would report "grep: internal PCRE error: -27"
+#
+# Copyright 2015-2021 Free Software Foundation, Inc.
+#
+# Copying and distribution of this file, with or without modification,
+# are permitted in any medium without royalty provided the copyright
+# notice and this notice are preserved.
+
+. "${srcdir=.}/init.sh"; path_prepend_ ../src
+require_pcre_
+
+for p in 'base64 -d' 'base64 -D' 'openssl base64 -d' \
+ "perl -MMIME::Base64 -0777ne 'print decode_base64(\$_)'" FAIL; do
+ test "$p" = FAIL && skip_ "your system lacks a base64 decoder"
+ x=$(echo eA==| ( eval "$p" ) 2>/dev/null) && test "X$x" = Xx &&
+ {
+ eval "b64_decode() { $p; }"
+ break
+ }
+done
+
+foo=$( (echo foo | gzip | gzip -d) 2>/dev/null) && test "X$foo" = Xfoo \
+ || skip_ "your system lacks the gzip program"
+
+fail=0
+
+b64_decode >pcrejit.txt.gz <<'EOF' || framework_failure_
+H4sIAAAAAAACA+2bUU4DMQxE/7mMz5T7XwKE+IBKVLue58yk0B9EtX6xJxN7t4VaH69a6+tHrW+/
+r4e3n75KARWShSOFTtiumE3FPVyo79ATIJ0Ry0No/yXe99UIUqTGKKUzYHFJHJoaCONQDCnDSCDS
+IPAvGCVeXNsZ7lpbWFfdaZtgPos5LeK2C1TBKzD09V3HFlCOsbFT/hNbz4HzJaRjnjdam9FXw/o6
+VyPozhMmiaRYAMeNSJR1iMjBEFLMtsH7lptartfxkzPQgFVofwRlxKsMYn2KNDnU9fsOQCkRIYVT
+G80ZRqBpSQjRYPX7s9gvtqknyNE2f8V09sxHM7YPmMMJgrmVna2AT717n5fUAIDkiBCqFgWUUgKD
+8jOc0Rgj5JS6vZnQI14wkaTDAkD266p/iVHs8gjCrMFARVM0iEVgFAa9YRAQT4tkgsmloTJLmyCm
+uSHRnTkzIdZMmZ5kYX/iJFtTwu9cFvr3aDWcUx4pUW/cVQwPoQSlwguNd4M0vTpAauKodmLFXv1P
+dkcKkYUglER2Q4L4gnmOiNGzSBATwGQgwihs5/QffIhyfg4hJvM2r4Rp6L+1ibCCd4jYZ6jCiBlc
+2+y4fl4yTGIwcWXNAUEeXmu8iCMV96DNTnmRNICDk2N5qaXGbsF91OX/0hlcYTjrMfy02p9Xv70D
+mv3RZCFOAAA=
+EOF
+
+gzip -d pcrejit.txt || framework_failure_
+
+LC_ALL=C grep -P -n '^([/](?!/)|[^/])*~/.*' pcrejit.txt
+if test $? != 1; then
+ # The above often makes grep attempt to use an inordinate amount
+ # of stack space. If grep fails with $? != 1, try again, but this
+ # time with no soft limit:
+
+ # Use ulimit to remove that limit, if possible.
+ # If ulimit is not usable, just skip this test.
+ (ulimit -s unlimited) || skip_ this shell lacks ulimit support
+
+ # Rerun that same test, but now with no limit on stack size:
+ (ulimit -s unlimited;
+ returns_ 1 env LC_ALL=C grep -P -n '^([/](?!/)|[^/])*~/.*' pcrejit.txt 2> err) \
+ || fail=1
+
+ # If that failed due to stack overflow, don't cry foul.
+ overflow_pat="stack overflow|exceeded PCRE's recursion limit"
+ test $fail = 1 && { grep -Eq "$overflow_pat" err && fail=0 || cat err; }
+fi
+
+Exit $fail
diff --git a/src/grep/tests/pcre-o b/src/grep/tests/pcre-o
new file mode 100755
index 0000000..1d155a7
--- /dev/null
+++ b/src/grep/tests/pcre-o
@@ -0,0 +1,17 @@
+#! /bin/sh
+# Ensure that grep -oP doesn't cause internal error at match.
+#
+# Copyright (C) 2014-2021 Free Software Foundation, Inc.
+#
+# Copying and distribution of this file, with or without modification,
+# are permitted in any medium without royalty provided the copyright
+# notice and this notice are preserved.
+
+. "${srcdir=.}/init.sh"; path_prepend_ ../src
+require_pcre_
+
+fail=0
+
+echo ab | grep -oP 'a' || fail=1
+
+Exit $fail
diff --git a/src/grep/tests/pcre-utf8 b/src/grep/tests/pcre-utf8
new file mode 100755
index 0000000..c5d0b80
--- /dev/null
+++ b/src/grep/tests/pcre-utf8
@@ -0,0 +1,40 @@
+#! /bin/sh
+# Ensure that, with -P, Unicode \p{} symbols are correctly matched.
+#
+# Copyright (C) 2012-2021 Free Software Foundation, Inc.
+#
+# Copying and distribution of this file, with or without modification,
+# are permitted in any medium without royalty provided the copyright
+# notice and this notice are preserved.
+
+. "${srcdir=.}/init.sh"; path_prepend_ ../src
+require_en_utf8_locale_
+LC_ALL=en_US.UTF-8 require_pcre_
+
+fail=0
+
+echo '$' | LC_ALL=en_US.UTF-8 grep -qP '\p{S}' \
+ || skip_ 'PCRE support is compiled out, or it does not support properties'
+
+euro='\342\202\254 euro'
+printf "$euro\\n" > in || framework_failure_
+
+# The euro sign has the unicode "Symbol" property, so this must match:
+LC_ALL=en_US.UTF-8 grep -P '^\p{S}' in > out || fail=1
+compare in out || fail=1
+
+# This RE must *not* match in the C locale, because the first
+# byte is not a "Symbol".
+LC_ALL=C grep -P '^\p{S}' in > out && fail=1
+compare /dev/null out || fail=1
+
+LC_ALL=en_US.UTF-8 grep -P '^. euro$' in > out2 || fail=1
+compare in out2 || fail=1
+
+LC_ALL=en_US.UTF-8 grep -oP '. euro' in > out3 || fail=1
+compare in out3 || fail=1
+
+LC_ALL=en_US.UTF-8 grep -P '^\P{S}' in > out4
+compare /dev/null out4 || fail=1
+
+Exit $fail
diff --git a/src/grep/tests/pcre-w b/src/grep/tests/pcre-w
new file mode 100755
index 0000000..7173b58
--- /dev/null
+++ b/src/grep/tests/pcre-w
@@ -0,0 +1,31 @@
+#! /bin/sh
+# Before grep-2.19, grep -Pw %% would match %% enclosed in word boundaries
+#
+# Copyright (C) 2014-2021 Free Software Foundation, Inc.
+#
+# Copying and distribution of this file, with or without modification,
+# are permitted in any medium without royalty provided the copyright
+# notice and this notice are preserved.
+
+. "${srcdir=.}/init.sh"; path_prepend_ ../src
+require_pcre_
+
+fail=0
+
+echo %aa% > in || framework_failure_
+grep -Pw aa in > out || fail=1
+compare out in || fail=1
+
+echo a%%a > in || framework_failure_
+grep -Pw %% in > out && fail=1
+compare /dev/null out || fail=1
+
+echo %%%% > in || framework_failure_
+grep -Pw %% in > out || fail=1
+compare out in || fail=1
+
+echo %% > in || framework_failure_
+grep -Pw %% in > out || fail=1
+compare out in || fail=1
+
+Exit $fail
diff --git a/src/grep/tests/pcre-wx-backref b/src/grep/tests/pcre-wx-backref
new file mode 100755
index 0000000..350091a
--- /dev/null
+++ b/src/grep/tests/pcre-wx-backref
@@ -0,0 +1,28 @@
+#! /bin/sh
+# Before grep-2.19, grep -P and -w/-x would not work with a back-reference.
+#
+# Copyright (C) 2014-2021 Free Software Foundation, Inc.
+#
+# Copying and distribution of this file, with or without modification,
+# are permitted in any medium without royalty provided the copyright
+# notice and this notice are preserved.
+
+. "${srcdir=.}/init.sh"; path_prepend_ ../src
+require_pcre_
+
+echo aa > in || framework_failure_
+echo 'grep: reference to non-existent subpattern' > exp-err \
+ || framework_failure_
+
+fail=0
+
+for xw in x w; do
+ grep -P$xw '(.)\1' in > out 2>&1 || fail=1
+ compare out in || fail=1
+
+ grep -P$xw '(.)\2' in > out 2> err && fail=1
+ compare /dev/null out || fail=1
+ compare exp-err err || fail=1
+done
+
+Exit $fail
diff --git a/src/grep/tests/pcre-z b/src/grep/tests/pcre-z
new file mode 100755
index 0000000..4ce9a93
--- /dev/null
+++ b/src/grep/tests/pcre-z
@@ -0,0 +1,28 @@
+#!/bin/sh
+# Test Perl regex with NUL-separated input
+. "${srcdir=.}/init.sh"; path_prepend_ ../src
+require_pcre_
+require_en_utf8_locale_
+
+REGEX=a
+
+printf '%s\n\0' abc def ghi aaa gah > in || framework_failure_
+
+grep -z "$REGEX" in > exp 2>err || fail_ 'Cannot do BRE (grep -z) match.'
+compare /dev/null err || fail_ 'stderr not empty on grep -z.'
+
+# Sanity check the output
+test "$(grep -cz $REGEX in 2>err)" = 3 \
+ || fail_ 'Incorrect BRE (grep -cz) match.'
+compare /dev/null err || fail_ 'stderr not empty on grep -cz.'
+
+fail=0
+grep -Pz "$REGEX" in > out 2>err || fail=1
+compare exp out || fail=1
+compare /dev/null err || fail=1
+
+printf '\303\200\0' >in0 # "À" followed by a NUL.
+LC_ALL=en_US.UTF-8 grep -z . in0 >out || fail=1
+cmp in0 out || fail=1
+
+Exit $fail