summaryrefslogtreecommitdiffstats
path: root/src/preproc/preconv/tests/smoke-test.sh
blob: 41314169dc8f6bf52a84831ad9398b351f401699 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
#!/bin/sh
#
# Copyright (C) 2020 Free Software Foundation, Inc.
#
# This file is part of groff.
#
# groff is free software; you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free
# Software Foundation, either version 3 of the License, or (at your
# option) any later version.
#
# groff is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
# for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#

# Ensure a predictable character encoding.
export LC_ALL=C

set -e

preconv="${abs_top_builddir:-.}/preconv"

echo "testing -e flag override of BOM detection" >&2
printf '\376\377\0\100\0\n' \
    | "$preconv" -d -e euc-kr 2>&1 > /dev/null \
    | grep -q "no search for coding tag"

echo "testing detection of UTF-32BE BOM" >&2
printf '\0\0\376\377\0\0\0\100\0\0\0\n' \
    | "$preconv" -d 2>&1 > /dev/null \
    | grep -q "found BOM"

echo "testing detection of UTF-32LE BOM" >&2
printf '\377\376\0\0\100\0\0\0\n\0\0\0' \
    | "$preconv" -d 2>&1 > /dev/null \
    | grep -q "found BOM"

echo "testing detection of UTF-16BE BOM" >&2
printf '\376\377\0\100\0\n' \
    | "$preconv" -d 2>&1 > /dev/null \
    | grep -q "found BOM"

echo "testing detection of UTF-16LE BOM" >&2
printf '\377\376\100\0\n\0' \
    | "$preconv" -d 2>&1 > /dev/null \
    | grep -q "found BOM"

echo "testing detection of UTF-8 BOM" >&2
printf '\357\273\277@\n' \
    | "$preconv" -d 2>&1 > /dev/null \
    | grep -q "found BOM"

# We do not find a coding tag on piped input because it isn't seekable.
echo "testing detection of Emacs coding tag in piped input" >&2
printf '.\\" -*- coding: euc-kr; -*-\\n' \
    | "$preconv" -d 2>&1 >/dev/null \
    | grep -q "no coding tag"

# We need uchardet to work to get past this point.
echo "testing uchardet detection of encoding" >&2
"$preconv" -v | grep -q 'with uchardet support' || exit 77

# Instead of using temporary files, which in all fastidiousness means
# cleaning them up even if we're interrupted, which in turn means
# setting up signal handlers, we use files in the build tree.

doc=contrib/mm/groff_mmse.7
echo "testing uchardet detection on Latin-1 document $doc" >&2
"$preconv" -d -D us-ascii 2>&1 >/dev/null $doc \
    | grep -q 'charset: ISO-8859-1'

# uchardet can't seek on a pipe either.
echo "testing uchardet detection on pipe (expect fallback to -D)" >&2
printf 'Eat at the caf\351.\n' \
    | "$preconv" -d -D euc-kr 2>&1 > /dev/null \
    | grep -q "encoding used: 'EUC-KR'"

# Fall back to the locale.  preconv assumes Latin-1 for C instead of
# US-ASCII.
echo "testing fallback to locale setting in environment" >&2
printf 'Eat at the caf\351.\n' \
    | "$preconv" -d 2>&1 > /dev/null \
    | grep -q "encoding used: 'ISO-8859-1'"