summaryrefslogtreecommitdiffstats
path: root/scripts/convert_to_utf_8.sh
diff options
context:
space:
mode:
Diffstat (limited to 'scripts/convert_to_utf_8.sh')
-rwxr-xr-xscripts/convert_to_utf_8.sh68
1 files changed, 68 insertions, 0 deletions
diff --git a/scripts/convert_to_utf_8.sh b/scripts/convert_to_utf_8.sh
new file mode 100755
index 0000000..28f5a72
--- /dev/null
+++ b/scripts/convert_to_utf_8.sh
@@ -0,0 +1,68 @@
+#!/bin/sh
+#
+# convert_to_utf_8.sh
+#
+# Find man pages with encoding other than us-ascii, and convert them
+# to the utf-8 encoding.
+#
+# Example usage:
+#
+# cd man-pages-x.yy
+# sh convert_to_utf_8.sh <output_dir> man?/*
+#
+######################################################################
+#
+# (C) Copyright 2013, Peter Schiffer <pschiffe@redhat.com>
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details
+# (http://www.gnu.org/licenses/gpl-2.0.html).
+#
+
+if [[ $# -lt 2 ]]; then
+ echo "Usage: ${0} <output_dir> man?/*" 1>&2
+ exit 1
+fi
+
+out_dir="$1"
+shift
+
+enc_line=""
+
+for f in "$@"; do
+ enc=$(file -bi "$f" | cut -d = -f 2)
+ if [[ $enc != "us-ascii" ]]; then
+ dirn=$(dirname "$f")
+ basen=$(basename "$f")
+ new_dir="${out_dir}/${dirn}"
+ if [[ ! -e "$new_dir" ]]; then
+ mkdir -p "$new_dir"
+ fi
+ case "$basen" in
+ armscii-8.7 | cp1251.7 | iso_8859-*.7 | koi8-?.7)
+
+ # iconv does not understand some encoding names that
+ # start "iso_", but does understand the corresponding
+ # forms that start with "iso-"
+
+ from_enc="$(echo $basen | sed 's/\.7$//;s/iso_/iso-/')"
+ ;;
+ *)
+ echo "NULL TRANSFORM: $f"
+ from_enc=$enc
+ ;;
+ esac
+ printf "Converting %-23s from %s\n" "$f" "$from_enc"
+ echo "$enc_line" > "${new_dir}/${basen}"
+ iconv -f "$from_enc" -t utf-8 "$f" \
+ | sed "/.*-\*- coding:.*/d;/.\\\" t$/d" >> "${new_dir}/${basen}"
+ fi
+done
+
+exit 0