diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-15 19:40:15 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-15 19:40:15 +0000 |
commit | 399644e47874bff147afb19c89228901ac39340e (patch) | |
tree | 1c4c0b733f4c16b5783b41bebb19194a9ef62ad1 /scripts/convert_to_utf_8.sh | |
parent | Initial commit. (diff) | |
download | manpages-upstream/6.05.01.tar.xz manpages-upstream/6.05.01.zip |
Adding upstream version 6.05.01.upstream/6.05.01
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'scripts/convert_to_utf_8.sh')
-rwxr-xr-x | scripts/convert_to_utf_8.sh | 68 |
1 files changed, 68 insertions, 0 deletions
diff --git a/scripts/convert_to_utf_8.sh b/scripts/convert_to_utf_8.sh new file mode 100755 index 0000000..28f5a72 --- /dev/null +++ b/scripts/convert_to_utf_8.sh @@ -0,0 +1,68 @@ +#!/bin/sh +# +# convert_to_utf_8.sh +# +# Find man pages with encoding other than us-ascii, and convert them +# to the utf-8 encoding. +# +# Example usage: +# +# cd man-pages-x.yy +# sh convert_to_utf_8.sh <output_dir> man?/* +# +###################################################################### +# +# (C) Copyright 2013, Peter Schiffer <pschiffe@redhat.com> +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details +# (http://www.gnu.org/licenses/gpl-2.0.html). +# + +if [[ $# -lt 2 ]]; then + echo "Usage: ${0} <output_dir> man?/*" 1>&2 + exit 1 +fi + +out_dir="$1" +shift + +enc_line="" + +for f in "$@"; do + enc=$(file -bi "$f" | cut -d = -f 2) + if [[ $enc != "us-ascii" ]]; then + dirn=$(dirname "$f") + basen=$(basename "$f") + new_dir="${out_dir}/${dirn}" + if [[ ! -e "$new_dir" ]]; then + mkdir -p "$new_dir" + fi + case "$basen" in + armscii-8.7 | cp1251.7 | iso_8859-*.7 | koi8-?.7) + + # iconv does not understand some encoding names that + # start "iso_", but does understand the corresponding + # forms that start with "iso-" + + from_enc="$(echo $basen | sed 's/\.7$//;s/iso_/iso-/')" + ;; + *) + echo "NULL TRANSFORM: $f" + from_enc=$enc + ;; + esac + printf "Converting %-23s from %s\n" "$f" "$from_enc" + echo "$enc_line" > "${new_dir}/${basen}" + iconv -f "$from_enc" -t utf-8 "$f" \ + | sed "/.*-\*- coding:.*/d;/.\\\" t$/d" >> "${new_dir}/${basen}" + fi +done + +exit 0 |