diff options
Diffstat (limited to 'src/grep/tests/euc-mb')
-rwxr-xr-x | src/grep/tests/euc-mb | 47 |
1 files changed, 47 insertions, 0 deletions
diff --git a/src/grep/tests/euc-mb b/src/grep/tests/euc-mb new file mode 100755 index 0000000..c639374 --- /dev/null +++ b/src/grep/tests/euc-mb @@ -0,0 +1,47 @@ +#!/bin/sh +# test that matches starting in the middle of a multibyte char aren't rejected +# too greedily. +# Derived from https://savannah.gnu.org/bugs/?23814 +. "${srcdir=.}/init.sh"; path_prepend_ ../src + +# Add "." to PATH for the use of get-mb-cur-max. +path_prepend_ . + +require_compiled_in_MB_support + +locale=ja_JP.EUC-JP + +make_input () { + echo "$1" | tr AB '\244\263' +} + +euc_grep () { + pat=$(make_input "$1") + LC_ALL=$locale grep "$pat" +} + +case $(get-mb-cur-max $locale) in + 2|3) ;; + *) skip_ 'EUC-JP locale not found' ;; +esac + +fail=0 + +# Does EUC-JP work at all? +make_input BABA |euc_grep AB && fail=1 + +# Here are two cases in which a KWSet search matches in the middle +# of a multibyte character. The first ensures that the DFA matcher +# finds the real match at the end of line. The second ensures that +# while the KWSet match found a false positive, the DFA matcher +# determines there is no match after all. +make_input BABAAB |euc_grep AB > out || fail=1 +make_input BABAAB > exp || framework_failure_ +compare exp out || fail=1 +make_input BABABA |returns_ 1 euc_grep AB || fail=1 +make_input BABABA |returns_ 1 euc_grep '^x\|AB' || fail=1 + +# -P supports only unibyte and UTF-8 locales. +returns_ 2 env LC_ALL=$locale grep -P x /dev/null || fail=1 + +Exit $fail |