summaryrefslogtreecommitdiffstats
path: root/scripts/convert_to_utf_8.sh
blob: 28f5a72cf8c7be1332a9ad95d47d066dd4443d1d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
#!/bin/sh
#
# convert_to_utf_8.sh
#
# Find man pages with encoding other than us-ascii, and convert them
# to the utf-8 encoding.
#
# Example usage:
#
#    cd man-pages-x.yy
#    sh convert_to_utf_8.sh <output_dir> man?/*
#
######################################################################
#
# (C) Copyright 2013, Peter Schiffer <pschiffe@redhat.com>
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
# 
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details
# (http://www.gnu.org/licenses/gpl-2.0.html).
#

if [[ $# -lt 2 ]]; then
    echo "Usage: ${0} <output_dir> man?/*" 1>&2
    exit 1
fi

out_dir="$1"
shift

enc_line=""

for f in "$@"; do
    enc=$(file -bi "$f" | cut -d = -f 2)
    if [[ $enc != "us-ascii" ]]; then
        dirn=$(dirname "$f")
        basen=$(basename "$f")
        new_dir="${out_dir}/${dirn}"
        if [[ ! -e "$new_dir" ]]; then
            mkdir -p "$new_dir"
        fi
        case "$basen" in
            armscii-8.7 | cp1251.7 | iso_8859-*.7 | koi8-?.7)

	        # iconv does not understand some encoding names that
	    	# start "iso_", but does understand the corresponding
	        # forms that start with "iso-"

                from_enc="$(echo $basen | sed 's/\.7$//;s/iso_/iso-/')"
                ;;
            *)
	        echo "NULL TRANSFORM: $f"
                from_enc=$enc
                ;;
        esac
        printf "Converting %-23s from %s\n" "$f" "$from_enc"
        echo "$enc_line" > "${new_dir}/${basen}"
        iconv -f "$from_enc" -t utf-8 "$f" \
            | sed "/.*-\*- coding:.*/d;/.\\\" t$/d" >> "${new_dir}/${basen}"
    fi
done

exit 0