diff options
Diffstat (limited to '')
-rwxr-xr-x | src/grep/tests/turkish-I | 33 | ||||
-rwxr-xr-x | src/grep/tests/turkish-I-without-dot | 55 |
2 files changed, 88 insertions, 0 deletions
diff --git a/src/grep/tests/turkish-I b/src/grep/tests/turkish-I new file mode 100755 index 0000000..cfe90c2 --- /dev/null +++ b/src/grep/tests/turkish-I @@ -0,0 +1,33 @@ +#!/bin/sh +# grep -i in UTF-8: missing NL in output on line containing I WITH DOT (U+0130) + +# Copyright (C) 2011-2021 Free Software Foundation, Inc. + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. + +. "${srcdir=.}/init.sh"; path_prepend_ ../src + +require_en_utf8_locale_ +require_compiled_in_MB_support + +fail=0 + +i='\304\260' +printf "$i$i$i$i$i$i$i\n" > in || framework_failure_ + +LC_ALL=en_US.UTF-8 grep -i .... in > out || fail=1 + +compare out in || fail=1 + +Exit $fail diff --git a/src/grep/tests/turkish-I-without-dot b/src/grep/tests/turkish-I-without-dot new file mode 100755 index 0000000..127ec2f --- /dev/null +++ b/src/grep/tests/turkish-I-without-dot @@ -0,0 +1,55 @@ +#!/bin/sh +# grep -i would misbehave for any matched line containing a character +# (like "I" in the tr_TR.utf8 locale) whose lower-case representation +# occupies more bytes (two in this case, for 0xc4b1, aka U+0131). + +# Copyright (C) 2011-2021 Free Software Foundation, Inc. + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. + +. "${srcdir=.}/init.sh"; path_prepend_ ../src + +require_tr_utf8_locale_ +require_compiled_in_MB_support + +# Before this change, grep could print a lot of uninitialized memory: +# $ printf "IIIIIII\n" > in +# $ for i in $(seq 10); do LC_ALL=tr_TR.utf8 src/grep -i . in|wc -c; done +# 760 +# 754 +# 585 +# 298 +# 273 +# 458 +# 660 +# 552 +# 936 +# 678 + +fail=0 + +printf "IIIIIII\n" > in || framework_failure_ +LC_ALL=tr_TR.utf8 grep -i .... in > out || fail=1 +compare out in || fail=1 + +# Also exercise the case in which the original string and the lower-case +# buffer have precisely the same length (22 bytes here), yet internal +# offsets do differ. Lengths are the same because while some bytes shrink +# when converted to lower case, others grow, and here they balance out. +i='I\304\260' +printf "$i$i$i$i$i$i$i\n" > in || framework_failure_ +LC_ALL=tr_TR.utf8 grep -i .... in > out || fail=1 +compare out in || fail=1 + +Exit $fail |