summaryrefslogtreecommitdiffstats
path: root/src/tests/manconv-odd-combinations
diff options
context:
space:
mode:
Diffstat (limited to 'src/tests/manconv-odd-combinations')
-rwxr-xr-xsrc/tests/manconv-odd-combinations81
1 files changed, 81 insertions, 0 deletions
diff --git a/src/tests/manconv-odd-combinations b/src/tests/manconv-odd-combinations
new file mode 100755
index 0000000..087d6fc
--- /dev/null
+++ b/src/tests/manconv-odd-combinations
@@ -0,0 +1,81 @@
+#! /bin/sh
+
+# Test manconv's handling of various odd encoding combinations.
+
+: "${srcdir=.}"
+# shellcheck source-path=SCRIPTDIR
+. "$srcdir/testlib.sh"
+
+: "${MANCONV=manconv}"
+
+init
+
+(for x in $(seq 160 255); do
+ printf %b "\\$(printf %03o "$x")"
+done
+echo) >"$tmpdir/1.inp"
+
+iconv -f ISO-8859-1 -t UTF-8 <"$tmpdir/1.inp" >"$tmpdir/1.exp"
+run $MANCONV -f UTF-8:ISO-8859-1 -t UTF-8 <"$tmpdir/1.inp" >"$tmpdir/1.out"
+expect_files_equal '-f UTF-8:ISO-8859-1 -t UTF-8 on ISO-8859-1 input' \
+ "$tmpdir/1.exp" "$tmpdir/1.out"
+
+iconv -f ISO-8859-2 -t UTF-8 <"$tmpdir/1.inp" >"$tmpdir/1-latin2.exp"
+run $MANCONV -f UTF-8:ISO-8859-2 -t UTF-8 \
+ <"$tmpdir/1.inp" >"$tmpdir/1-latin2.out"
+expect_files_equal '-f UTF-8:ISO-8859-2 -t UTF-8 on ISO-8859-2 input' \
+ "$tmpdir/1-latin2.exp" "$tmpdir/1-latin2.out"
+
+(for x in $(seq 1 1000); do
+ printf '‐'
+done
+echo 'Б' | iconv -f UTF-8 -t KOI8-R
+echo '‐') >"$tmpdir/2.inp"
+iconv -f KOI8-R -t UTF-8 <"$tmpdir/2.inp" >"$tmpdir/2.exp"
+run $MANCONV -f UTF-8:KOI8-R -t UTF-8 <"$tmpdir/2.inp" >"$tmpdir/2.out"
+expect_files_equal \
+ '-f UTF-8:KOI8-R -t UTF-8 on KOI8-R input with UTF-8 prefix' \
+ "$tmpdir/2.exp" "$tmpdir/2.out"
+
+(for x in $(seq 160 255); do
+ printf %b "\\$(printf %03o "$x")"
+done
+echo) | iconv -f ISO-8859-1 -t UTF-8 >"$tmpdir/3.inp"
+run $MANCONV -f UTF-8:ISO-8859-1 -t UTF-8 <"$tmpdir/3.inp" >"$tmpdir/3.out"
+expect_files_equal '-f UTF-8:ISO-8859-1 -t UTF-8 preserves UTF-8 input' \
+ "$tmpdir/3.inp" "$tmpdir/3.out"
+
+# U+00B7 MIDDLE DOT is not representable in ISO-8859-2, and so should be
+# omitted. However, manconv should still recognise that the input was UTF-8
+# rather than falling back to ISO-8859-2.
+cat >"$tmpdir/4.inp" <<'EOF'
+š·ł
+EOF
+iconv -f UTF-8 -t ISO-8859-2 >"$tmpdir/4.exp" <<EOF
+šł
+EOF
+run $MANCONV -f UTF-8:ISO-8859-2 -t ISO-8859-2//IGNORE \
+ <"$tmpdir/4.inp" >"$tmpdir/4.out"
+expect_files_equal \
+ 'recognises input encoding and omits invalid output character' \
+ "$tmpdir/4.exp" "$tmpdir/4.out"
+
+# 0xAE does not exist in ISO-8859-7, so manconv won't be able to recode this
+# to UTF-8 without conversion errors. (In the original case where this was
+# seen in the wild, the coding: tag should actually have read ISO-8859-13.)
+iconv -f UTF-8 -t ISO-8859-13 >"$tmpdir/5.inp" <<'EOF'
+'\" -*- coding: ISO-8859-7
+REGISTERED SIGN: ®
+trailing data
+EOF
+cat >"$tmpdir/5.exp" <<'EOF'
+'\" -*- coding: UTF-8
+EOF
+<"$tmpdir/5.inp" tail -n +2 | iconv -f ISO-8859-7 -t UTF-8//IGNORE \
+ >>"$tmpdir/5.exp" 2>/dev/null
+run $MANCONV -f UTF-8:ISO-8859-1 -t UTF-8//IGNORE \
+ <"$tmpdir/5.inp" >"$tmpdir/5.out"
+expect_files_equal 'copes with invalid input characters' \
+ "$tmpdir/5.exp" "$tmpdir/5.out"
+
+finish