diff options
Diffstat (limited to 'src/preproc/preconv/tests/smoke-test.sh')
-rwxr-xr-x | src/preproc/preconv/tests/smoke-test.sh | 88 |
1 files changed, 88 insertions, 0 deletions
diff --git a/src/preproc/preconv/tests/smoke-test.sh b/src/preproc/preconv/tests/smoke-test.sh new file mode 100755 index 0000000..4131416 --- /dev/null +++ b/src/preproc/preconv/tests/smoke-test.sh @@ -0,0 +1,88 @@ +#!/bin/sh +# +# Copyright (C) 2020 Free Software Foundation, Inc. +# +# This file is part of groff. +# +# groff is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation, either version 3 of the License, or (at your +# option) any later version. +# +# groff is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +# for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + +# Ensure a predictable character encoding. +export LC_ALL=C + +set -e + +preconv="${abs_top_builddir:-.}/preconv" + +echo "testing -e flag override of BOM detection" >&2 +printf '\376\377\0\100\0\n' \ + | "$preconv" -d -e euc-kr 2>&1 > /dev/null \ + | grep -q "no search for coding tag" + +echo "testing detection of UTF-32BE BOM" >&2 +printf '\0\0\376\377\0\0\0\100\0\0\0\n' \ + | "$preconv" -d 2>&1 > /dev/null \ + | grep -q "found BOM" + +echo "testing detection of UTF-32LE BOM" >&2 +printf '\377\376\0\0\100\0\0\0\n\0\0\0' \ + | "$preconv" -d 2>&1 > /dev/null \ + | grep -q "found BOM" + +echo "testing detection of UTF-16BE BOM" >&2 +printf '\376\377\0\100\0\n' \ + | "$preconv" -d 2>&1 > /dev/null \ + | grep -q "found BOM" + +echo "testing detection of UTF-16LE BOM" >&2 +printf '\377\376\100\0\n\0' \ + | "$preconv" -d 2>&1 > /dev/null \ + | grep -q "found BOM" + +echo "testing detection of UTF-8 BOM" >&2 +printf '\357\273\277@\n' \ + | "$preconv" -d 2>&1 > /dev/null \ + | grep -q "found BOM" + +# We do not find a coding tag on piped input because it isn't seekable. +echo "testing detection of Emacs coding tag in piped input" >&2 +printf '.\\" -*- coding: euc-kr; -*-\\n' \ + | "$preconv" -d 2>&1 >/dev/null \ + | grep -q "no coding tag" + +# We need uchardet to work to get past this point. +echo "testing uchardet detection of encoding" >&2 +"$preconv" -v | grep -q 'with uchardet support' || exit 77 + +# Instead of using temporary files, which in all fastidiousness means +# cleaning them up even if we're interrupted, which in turn means +# setting up signal handlers, we use files in the build tree. + +doc=contrib/mm/groff_mmse.7 +echo "testing uchardet detection on Latin-1 document $doc" >&2 +"$preconv" -d -D us-ascii 2>&1 >/dev/null $doc \ + | grep -q 'charset: ISO-8859-1' + +# uchardet can't seek on a pipe either. +echo "testing uchardet detection on pipe (expect fallback to -D)" >&2 +printf 'Eat at the caf\351.\n' \ + | "$preconv" -d -D euc-kr 2>&1 > /dev/null \ + | grep -q "encoding used: 'EUC-KR'" + +# Fall back to the locale. preconv assumes Latin-1 for C instead of +# US-ASCII. +echo "testing fallback to locale setting in environment" >&2 +printf 'Eat at the caf\351.\n' \ + | "$preconv" -d 2>&1 > /dev/null \ + | grep -q "encoding used: 'ISO-8859-1'" |