#! /bin/sh # Test manconv's handling of various odd encoding combinations. : "${srcdir=.}" # shellcheck source-path=SCRIPTDIR . "$srcdir/testlib.sh" : "${MANCONV=manconv}" init (for x in $(seq 160 255); do printf %b "\\$(printf %03o "$x")" done echo) >"$tmpdir/1.inp" iconv -f ISO-8859-1 -t UTF-8 <"$tmpdir/1.inp" >"$tmpdir/1.exp" run $MANCONV -f UTF-8:ISO-8859-1 -t UTF-8 <"$tmpdir/1.inp" >"$tmpdir/1.out" if [ "$HAVE_ICONV" = yes ]; then expect_files_equal '-f UTF-8:ISO-8859-1 -t UTF-8 on ISO-8859-1 input' \ "$tmpdir/1.exp" "$tmpdir/1.out" else report_skip '-f UTF-8:ISO-8859-1 -t UTF-8 on ISO-8859-1 input' fi iconv -f ISO-8859-2 -t UTF-8 <"$tmpdir/1.inp" >"$tmpdir/1-latin2.exp" run $MANCONV -f UTF-8:ISO-8859-2 -t UTF-8 \ <"$tmpdir/1.inp" >"$tmpdir/1-latin2.out" if [ "$HAVE_ICONV" = yes ]; then expect_files_equal '-f UTF-8:ISO-8859-2 -t UTF-8 on ISO-8859-2 input' \ "$tmpdir/1-latin2.exp" "$tmpdir/1-latin2.out" else report_skip '-f UTF-8:ISO-8859-2 -t UTF-8 on ISO-8859-2 input' fi (for x in $(seq 1 1000); do printf '‐' done echo 'Б' | iconv -f UTF-8 -t KOI8-R echo '‐') >"$tmpdir/2.inp" iconv -f KOI8-R -t UTF-8 <"$tmpdir/2.inp" >"$tmpdir/2.exp" run $MANCONV -f UTF-8:KOI8-R -t UTF-8 <"$tmpdir/2.inp" >"$tmpdir/2.out" if [ "$HAVE_ICONV" = yes ]; then expect_files_equal \ '-f UTF-8:KOI8-R -t UTF-8 on KOI8-R input with UTF-8 prefix' \ "$tmpdir/2.exp" "$tmpdir/2.out" else report_skip '-f UTF-8:KOI8-R -t UTF-8 on KOI8-R input with UTF-8 prefix' fi (for x in $(seq 160 255); do printf %b "\\$(printf %03o "$x")" done echo) | iconv -f ISO-8859-1 -t UTF-8 >"$tmpdir/3.inp" run $MANCONV -f UTF-8:ISO-8859-1 -t UTF-8 <"$tmpdir/3.inp" >"$tmpdir/3.out" expect_files_equal '-f UTF-8:ISO-8859-1 -t UTF-8 preserves UTF-8 input' \ "$tmpdir/3.inp" "$tmpdir/3.out" # U+00B7 MIDDLE DOT is not representable in ISO-8859-2, and so should be # omitted. However, manconv should still recognise that the input was UTF-8 # rather than falling back to ISO-8859-2. cat >"$tmpdir/4.inp" <<'EOF' š·ł EOF iconv -f UTF-8 -t ISO-8859-2 >"$tmpdir/4.exp" <"$tmpdir/4.out" if [ "$HAVE_ICONV" = yes ]; then expect_files_equal \ 'recognises input encoding and omits invalid output character' \ "$tmpdir/4.exp" "$tmpdir/4.out" else report_skip 'recognises input encoding and omits invalid output character' fi # 0xAE does not exist in ISO-8859-7, so manconv won't be able to recode this # to UTF-8 without conversion errors. (In the original case where this was # seen in the wild, the coding: tag should actually have read ISO-8859-13.) iconv -f UTF-8 -t ISO-8859-13 >"$tmpdir/5.inp" <<'EOF' '\" -*- coding: ISO-8859-7 REGISTERED SIGN: ® trailing data EOF cat >"$tmpdir/5.exp" <<'EOF' '\" -*- coding: UTF-8 EOF <"$tmpdir/5.inp" tail -n +2 | iconv -f ISO-8859-7 -t UTF-8//IGNORE \ >>"$tmpdir/5.exp" 2>/dev/null run $MANCONV -f UTF-8:ISO-8859-1 -t UTF-8//IGNORE \ <"$tmpdir/5.inp" >"$tmpdir/5.out" if [ "$HAVE_ICONV" = yes ]; then expect_files_equal 'copes with invalid input characters' \ "$tmpdir/5.exp" "$tmpdir/5.out" else report_skip 'copes with invalid input characters' fi finish