diff options
Diffstat (limited to 'src/utils/afmtodit/make-afmtodit-tables')
-rwxr-xr-x | src/utils/afmtodit/make-afmtodit-tables | 139 |
1 files changed, 139 insertions, 0 deletions
diff --git a/src/utils/afmtodit/make-afmtodit-tables b/src/utils/afmtodit/make-afmtodit-tables new file mode 100755 index 0000000..937bb72 --- /dev/null +++ b/src/utils/afmtodit/make-afmtodit-tables @@ -0,0 +1,139 @@ +#! /bin/sh +# +# make-afmtodit-tables -- script for creating the 'unicode_decomposed' +# and 'AGL_to_unicode' tables +# +# Copyright (C) 2005-2020 Free Software Foundation, Inc. +# Written by Werner Lemberg <wl@gnu.org> +# +# This file is part of groff. +# +# groff is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# groff is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +# for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +# +# usage: +# +# make-afmtodit-tables \ +# UnicodeData.txt version-string glyphlist.txt > afmtodit.in +# +# 'UnicodeData.txt' is the central database file from the Unicode +# standard. Unfortunately, it doesn't contain a version number, which +# must be thus provided manually as an additional parameter. +# +# 'glyphlist.txt' holds the Adobe Glyph List (AGL). +# +# This program needs a C preprocessor. +# + +if [ $# -ne 3 ] +then + echo "usage: $0 UnicodeData.txt UNICODE-VERSION-STRING" \ + "glyphlist.txt > afmtodit.tables" + exit 2 +fi + +unicode_data="$1" +unicode_version="$2" +glyph_list="$3" + +for f in "$1" "$3" +do + if ! [ -r "$f" ] + then + echo "$0: '$f' does not exist or is not readable" >&2 + exit 1 + fi +done + +# Handle UnicodeData.txt. +# +# Remove ranges and control characters, +# then extract the decomposition field, +# then remove lines without decomposition, +# then remove all compatibility decompositions. +cat "$1" \ +| sed -e '/^[^;]*;</d' \ +| sed -e 's/;[^;]*;[^;]*;[^;]*;[^;]*;\([^;]*\);.*$/;\1/' \ +| sed -e '/^[^;]*;$/d' \ +| sed -e '/^[^;]*;</d' > $$1 + +# Prepare input for running cpp. +cat $$1 \ +| sed -e 's/^\([^;]*\);/#define \1 /' \ + -e 's/ / u/g' > $$2 +cat $$1 \ +| sed -e 's/^\([^;]*\);.*$/\1 u\1/' >> $$2 + +# Run C preprocessor to recursively decompose. +"${CPP:-cpp}" $$2 $$3 + +# Convert it back to original format. +cat $$3 \ +| sed -e '/#/d' \ + -e '/^$/d' \ + -e 's/ \+/ /g' \ + -e 's/ *$//' \ + -e 's/u//g' \ + -e 's/^\([^ ]*\) /\1;/' > $$4 + +# Write comment. +cat <<END +# This table was algorithmically derived from the file 'UnicodeData.txt' +# for Unicode $unicode_version, available from unicode.org, +# on `date '+%Y-%m-%d'`. +END + +# Emit first table. +echo 'my %unicode_decomposed = (' +cat $$4 \ +| sed -e 's/ /_/g' \ + -e 's/\(.*\);\(.*\)/ "\1", "\2",/' +echo ');' +echo '' + +# Write comment. +cat <<END +# This table was algorithmically derived from the Adobe Glyph List (AGL) +# file 'glyphlist.txt' from the GitHub Adobe Type Tools agl-aglfn +# project, on `date '+%Y-%m-%d'`. +# +# See "groff:" comments for altered mappings. +END + +# Convert AGL syntax to a chunk of Perl. +cat "$3" \ +| sed -e '/#/d' \ + -e 's/ /_/g' \ + -e '/;\(E\|F[0-8]\)/d' \ + -e 's/\(.*\);\(.*\)/ "\1", "\2",/' > $$5 + +# Perform groff replacements. +sed \ + -e 's/\("Delta"\), "2206",$/\1, "0394", # groff: not U+2206/' \ + -e 's/\("Omega"\), "2126",$/\1, "03A9", # groff: not U+2126/' \ + -e 's/\("mu"\), "00B5",$/\1, "03BC", # groff: not U+00B5/' \ + < $$5 > $$6 + +# Emit second table. +echo 'my %AGL_to_unicode = (' +cat $$6 +echo ');' + +# Remove temporary files. +rm $$1 $$2 $$3 $$4 $$5 $$6 + +# Local Variables: +# fill-column: 72 +# End: +# vim: set textwidth=72: |