diff options
Diffstat (limited to 'src/grep/tests/many-regex-performance')
-rwxr-xr-x | src/grep/tests/many-regex-performance | 79 |
1 files changed, 79 insertions, 0 deletions
diff --git a/src/grep/tests/many-regex-performance b/src/grep/tests/many-regex-performance new file mode 100755 index 0000000..4b4dfe8 --- /dev/null +++ b/src/grep/tests/many-regex-performance @@ -0,0 +1,79 @@ +#!/bin/sh +# Test for this performance regression: +# grep-3.4 would require O(N^2) RSS for N regexps +# grep-3.5 requires O(N) in the most common cases. + +# Copyright 2020-2021 Free Software Foundation, Inc. + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. + +. "${srcdir=.}/init.sh"; path_prepend_ ../src + +fail=0 + +# This test is susceptible to failure due to differences in +# system load during the two test runs, so we'll mark it as +# "expensive", making it less likely to be run by regular users. +expensive_ + +# Make the quick/small input large enough so that even on high-end +# systems this first invocation takes at least 10ms of user time. +word_list=/usr/share/dict/linux.words + +# If $word_list does not exist, generate an input that exibhits +# similar performance characteristics. +if ! test -f $word_list; then + # Generate data comprable to that word list. + # Note how all "words" start with "a", and that there is + # a small percentage of lines with at least one "." metachar. + # This requires /dev/urandom, so if it's not present, skip + # this test. If desperate, we could fall back to using + # tar+compressed lib/*.c as the data source. + test -r /dev/urandom \ + || skip_ 'this system has neither word list nor working /dev/urandom' + word_list=word_list + ( echo a; cat /dev/urandom \ + | LC_ALL=C tr -dc 'a-zA-Z0-9_' \ + | head -c500000 \ + | sed 's/\(........\)/\1\n/g' \ + | sed s/rs/./ \ + | sed s/./a/ \ + | sort \ + ) > $word_list +fi + +n_lines=2000 +while :; do + sed ${n_lines}q < $word_list > in || framework_failure_ + small_ms=$(LC_ALL=C user_time_ 1 grep --file=in -v in) || fail=1 + test $small_ms -ge 10 && break + n_lines=$(expr $n_lines + 2000) +done + +# Now, run it again, but with 20 times as many lines. +n_lines=$(expr $n_lines \* 20) +sed ${n_lines}q < $word_list > in || framework_failure_ +large_ms=$(LC_ALL=C user_time_ 1 grep --file=in -v in) || fail=1 + +# Deliberately recording in an unused variable so it +# shows up in set -x output, in case this test fails. +ratio=$(expr "$large_ms" / "$small_ms") + +# The duration of the larger run must be no more than 60 times +# that of the small one. Using recent versions prior to this fix, +# this test would fail due to ratios larger than 300. Using the +# fixed version, it's common to see a ratio of 20-30. +returns_ 1 expr $small_ms '<' $large_ms / 60 || fail=1 + +Exit $fail |