63 lines
2.4 KiB
Bash
Executable file
63 lines
2.4 KiB
Bash
Executable file
#!/bin/sh
|
|
# before coreutils-8.32, uniq would not distinguish
|
|
# items which compared equal with strcoll()
|
|
# So ensure we avoid strcoll() for the following cases.
|
|
|
|
# Copyright (C) 2020-2025 Free Software Foundation, Inc.
|
|
|
|
# This program is free software: you can redistribute it and/or modify
|
|
# it under the terms of the GNU General Public License as published by
|
|
# the Free Software Foundation, either version 3 of the License, or
|
|
# (at your option) any later version.
|
|
|
|
# This program is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU General Public License for more details.
|
|
|
|
# You should have received a copy of the GNU General Public License
|
|
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
|
|
. "${srcdir=.}/tests/init.sh"; path_prepend_ ./src
|
|
print_ver_ uniq printf
|
|
|
|
gen_input()
|
|
{
|
|
env LC_ALL=$LOCALE_FR_UTF8 printf "$@" > in || framework_failure_
|
|
}
|
|
|
|
# strcoll() used to return 0 comparing the following strings
|
|
# which was fixed somewhere between glibc-2.22 and glibc-2.30
|
|
gen_input '%s\n' 'ⁿᵘˡˡ' 'ܥܝܪܐܩ'
|
|
test $(LC_ALL=$LOCALE_FR_UTF8 uniq < in | wc -l) = 2 || fail=1
|
|
|
|
# normalization in strcoll is inconsistent across platforms.
|
|
# glibc based systems at least do _not_ normalize in strcoll,
|
|
# while cygwin systems for example may do so.
|
|
# á composed and decomposed, are generally not compared equal
|
|
gen_input '\u00E1\na\u0301\n'
|
|
test $(LC_ALL=$LOCALE_FR_UTF8 uniq < in | wc -l) = 2 || fail=1
|
|
# Similarly with the following equivalent hangul characters
|
|
gen_input '\uAC01\n\u1100\u1161\u11A8\n'
|
|
test $(LC_ALL=ko_KR.utf8 uniq < in | wc -l) = 2 || fail=1
|
|
|
|
# Note if running in the wrong locale,
|
|
# strcoll may indicate the strings match when they don't.
|
|
# I.e., cjk and hangul will now work even if
|
|
# uniq is running in the wrong locale
|
|
# hangul (ko_KR.utf8)
|
|
gen_input '\uAC00\n\uAC01\n'
|
|
test $(LC_ALL=en_US.utf8 uniq < in | wc -l) = 2 || fail=1
|
|
# CJK (zh_CN.utf8)
|
|
gen_input '\u3400\n\u3401\n'
|
|
test $(LC_ALL=en_US.utf8 uniq < in | wc -l) = 2 || fail=1
|
|
|
|
# Note strcoll() ignores certain characters,
|
|
# but not if the strings are otherwise equal.
|
|
# I.e., the following on glibc-2.30 at least,
|
|
# as expected, does not print a single item,
|
|
# but testing here for illustration
|
|
gen_input ',a\n.a\n'
|
|
test $(LC_ALL=$LOCALE_FR_UTF8 uniq < in | wc -l) = 2 || fail=1
|
|
|
|
Exit $fail
|