diff options
Diffstat (limited to 'src/utils/hpftodit')
-rw-r--r-- | src/utils/hpftodit/hpftodit.1.man | 476 | ||||
-rw-r--r-- | src/utils/hpftodit/hpftodit.am | 34 | ||||
-rw-r--r-- | src/utils/hpftodit/hpftodit.cpp | 1465 | ||||
-rw-r--r-- | src/utils/hpftodit/hpuni.cpp | 697 |
4 files changed, 2672 insertions, 0 deletions
diff --git a/src/utils/hpftodit/hpftodit.1.man b/src/utils/hpftodit/hpftodit.1.man new file mode 100644 index 0000000..12e3af7 --- /dev/null +++ b/src/utils/hpftodit/hpftodit.1.man @@ -0,0 +1,476 @@ +.TH hpftodit @MAN1EXT@ "@MDATE@" "groff @VERSION@" +.SH Name +hpftodit \- create font description files for use with +.I groff +and +.I grolj4 +. +. +.\" ==================================================================== +.\" Legal Terms +.\" ==================================================================== +.\" +.\" Copyright (C) 1994-2020 Free Software Foundation, Inc. +.\" +.\" Permission is granted to make and distribute verbatim copies of this +.\" manual provided the copyright notice and this permission notice are +.\" preserved on all copies. +.\" +.\" Permission is granted to copy and distribute modified versions of +.\" this manual under the conditions for verbatim copying, provided that +.\" the entire resulting derived work is distributed under the terms of +.\" a permission notice identical to this one. +.\" +.\" Permission is granted to copy and distribute translations of this +.\" manual into another language, under the above conditions for +.\" modified versions, except that this permission notice may be +.\" included in translations approved by the Free Software Foundation +.\" instead of in the original English. +. +. +.\" Save and disable compatibility mode (for, e.g., Solaris 10/11). +.do nr *groff_hpftodit_1_man_C \n[.cp] +.cp 0 +. +.\" Define fallback for groff 1.23's MR macro if the system lacks it. +.nr do-fallback 0 +.if !\n(.f .nr do-fallback 1 \" mandoc +.if \n(.g .if !d MR .nr do-fallback 1 \" older groff +.if !\n(.g .nr do-fallback 1 \" non-groff *roff +.if \n[do-fallback] \{\ +. de MR +. ie \\n(.$=1 \ +. I \%\\$1 +. el \ +. IR \%\\$1 (\\$2)\\$3 +. . +.\} +.rr do-fallback +. +. +.\" ==================================================================== +.SH Synopsis +.\" ==================================================================== +. +.SY hpftodit +.RB [ \-aqs ] +.RB [ \-i\~\c +.IR n ] +.I tfm-file +.I map-file +.I font-description +.YS +. +. +.SY hpftodit +.B \-d +.I tfm-file +.RI [ map-file ] +.YS +. +. +.SY hpftodit +.B \-\-help +.YS +. +. +.SY hpftodit +.B \-v +. +.SY hpftodit +.B \-\-version +.YS +. +. +.\" ==================================================================== +.SH Description +.\" ==================================================================== +. +.I hpftodit +creates a font description file for use with a Hewlett-Packard +LaserJet\~4-\%series +(or newer) +printer with the +.MR grolj4 @MAN1EXT@ +output driver of +.MR groff @MAN1EXT@ , +using data from an HP tagged font metric (TFM) file. +. +.I tfm-file +is the name of the font's TFM file; +Intellifont and TrueType TFM files are supported, +but symbol set TFM files are not. +. +.I map-file +is a file giving the +.I groff +special character identifiers for glyphs in the font; +this file should consist of a sequence of lines of the form +.RS +.EX +.IR "m u c1 c2 " "\&.\|.\|.\& [#" " comment" "]" +.EE +.RE +where +.I m +is a decimal integer giving the glyph's MSL +(Master Symbol List) +number, +.I u +is a hexadecimal integer giving its Unicode character code, +and +.IR c1 , +.IR c2 ", .\|.\|." +are its +.I groff +glyph names +(see +.MR groff_char @MAN7EXT@ +for a list). +. +The values can be separated by any number of spaces and/or tabs. +. +The Unicode value must use uppercase hexadecimal digits A\^\[en]\^F, +and must lack a leading +.RB \[lq] 0x \[rq], +.RB \[lq] u \[rq], +or +.RB \[lq] U+ \[rq]. +. +Unicode values corresponding to composite glyphs are decomposed; +that is +.RB \[lq] u00C0 \[rq] +becomes +.RB \[lq] u0041_0300 \[rq]. +. +A glyph without a +.I groff +special character identifier may be named +.BI u XXXX +if the glyph corresponds to a Unicode value, +or as an unnamed glyph +.RB \[lq] \-\-\- \[rq]. +. +If the given Unicode value is in the Private Use Area (PUA) +(0xE000\^\[en]\^0xF8FF), +the glyph is included as an unnamed glyph. +. +Refer to +.MR groff_diff @MAN1EXT@ +for additional information about unnamed glyphs and how to access them. +. +. +.P +Blank lines and lines beginning with +.RB \[lq] # \[rq] +are ignored. +. +A +.RB \[lq] # \[rq] +following one or more +.I groff +names begins a comment. +. +Because +.RB \[lq] # \[rq] +is a valid +.I groff +name, +it must appear first in a list of +.I groff +names if a comment is included, +as in +. +.RS +.EX +3 0023 # # number sign +.EE +.RE +. +or +. +.RS +.EX +3 0023 # sh # number sign +.EE +.RE +. +whereas in +. +.RS +.EX +3 0023 sh # # number sign +.EE +.RE +. +the first +.RB \[lq] # \[rq] +is interpreted as the beginning of the comment. +. +. +.P +Output is written in +.MR groff_font @MAN5EXT@ +format to +.I font-description, +a file named for the intended +.I groff +font name; +if this operand is +.RB \[lq] \- \[rq], +the font description is written to the standard output stream. +. +. +.LP +If the +.B \-i +option is used, +.I hpftodit +automatically will generate an italic correction, +a left italic correction, +and a subscript correction for each glyph +(the significance of these parameters is explained in +.MR groff_font @MAN5EXT@ ). +. +. +.\" ==================================================================== +.SH Options +.\" ==================================================================== +. +.B \-\-help +displays a usage message, +while +.B \-v +and +.B \-\-version +show version information; +all exit afterward. +. +. +.TP +.B \-a +Include glyphs in the TFM file that are not included in +.IR map-file . +. +A glyph with corresponding Unicode value is given the name +.RI u XXXX ; +a glyph without a Unicode value is included as an unnamed glyph +\[lq]\-\^\-\^\-\[rq]. +. +A glyph with a Unicode value in the Private Use Area +(0xE000\^\[en]\^0xF8FF) +is also included as an unnamed glyph. +. +. +.IP +This option provides a simple means of adding Unicode-named and +unnamed glyphs to a font without including them in the map file, +but it affords little control over which glyphs are placed in a regular +font and which are placed in a special font. +. +The presence or absence of the +.B \-s +option has some effect on which glyphs are included: +without it, +only the \[lq]text\[rq] symbol sets are searched for matching glyphs; +with it, +only the \[lq]mathematical\[rq] symbol sets are searched. +. +Nonetheless, +restricting the symbol sets searched isn't very selective\[em]many +glyphs are placed in both regular and special fonts. +. +Normally, +.B \-a +should be used only as a last resort. +. +. +.TP +.B \-d +Dump information about the TFM file to the standard output stream; +use this to ensure that a TFM file is a proper match for a font, +and that its contents are suitable. +. +The information includes the values of important TFM tags and a listing +(by MSL number for Intellifont TFM files or by Unicode value for +TrueType TFM files) +of the glyphs included in the TFM file. +. +The unit of measure \[lq]DU\[rq] for some tags indicates design units; +there are 8782\~design units per em for Intellifont fonts, +and 2048\~design units per em for TrueType fonts. +. +Note that the accessibility of a glyph depends on its inclusion in a +symbol set; +some TFM files list many glyphs but only a few symbol sets. +. +. +.IP +The glyph listing includes the glyph index within the TFM file, +the MSL or Unicode value, +and the symbol set and character code that will be used to print the +glyph. +. +If +.I map-file +is given, +.I groff +names are given for matching glyphs. +. +If only the glyph index and MSL or Unicode value are given, +the glyph does not appear in any supported symbol set and cannot be +printed. +. +. +.IP +With the +.B \-d +option, +.I map-file +is optional, +and +.I output-font +is ignored if given. +. +. +.TP +.BI \-i\~ n +Generate an italic correction for each glyph so that its width plus its +italic correction is equal to +.I n +thousandths of an em plus the amount by which the right edge of the +glyphs's bounding box is to the right of its origin. +. +If a negative italic correction would result, +use a zero italic correction instead. +. +. +.IP +Also generate a subscript correction equal to the product of the tangent +of the slant of the font and four fifths of the x-height of the font. +. +If a subscript correction greater than the italic correction would +result, +use a subscript correction equal to the italic correction instead. +. +. +.IP +Also generate a left italic correction for each glyph equal to +.I n +thousandths of an em plus the amount by which the left edge of the +glyphs's bounding box is to the left of its origin. +. +The left italic correction may be negative. +. +. +.IP +This option normally is needed only with italic or oblique fonts; +a value of 50 +(0.05\~em) +usually is a reasonable choice. +. +. +.TP +.B \-q +Suppress warnings about glyphs in the map file that were not found in +the TFM file. +. +Warnings never are given for unnamed glyphs or by glyphs named by their +Unicode values. +. +This option is useful when sending the output of +.I hpftodit +to the standard output stream. +. +. +.TP +.B \-s +Add the +.B special +directive to the font description file, +affecting the order in which HP symbol sets are searched for each glyph. +. +Without this option, +the \[lq]text\[rq] sets are searched before the \[lq]mathematical\[rq] +symbol sets. +. +With it, +the search order is reversed. +. +. +.\" ==================================================================== +.SH Files +.\" ==================================================================== +. +.TP +.I @FONTDIR@/\:\%devlj4/\:DESC +describes the +.B lj4 +output device. +. +. +.TP +.IR @FONTDIR@/\:\%devlj4/ F +describes the font known +.RI as\~ F +on device +.BR lj4 . +. +. +.TP +.I @FONTDIR@/\:\%devlj4/\:\%generate/\:\%Makefile +is a +.MR make 1 +script that uses +.MR hpftodit @MAN1EXT@ +to prepare the +.I groff +font description files above from HP TFM data; +in can be used to regenerate them in the event the TFM files are +updated. +. +. +.TP +.I @FONTDIR@/\:\%devlj4/\:\%generate/\:\%special\:.awk +is an +.MR awk 1 +script that corrects the Intellifont-based height metrics for several +glyphs in the +.B S +(special) font for TrueType CG Times used in the HP LaserJet\~4000 and +later. +. +. +.TP +.I @FONTDIR@/\:\%devlj4/\:\%generate/\:\%special\:.map +.TQ +.I @FONTDIR@/\:\%devlj4/\:\%generate/\:\%symbol\:.map +.TQ +.I @FONTDIR@/\:\%devlj4/\:\%generate/\:text\:.map +.TQ +.I @FONTDIR@/\:\%devlj4/\:\%generate/\:\%wingdings.map +map MSL indices and HP Unicode PUA assignments to +.I groff +special character identifiers. +. +. +.\" ==================================================================== +.SH "See also" +.\" ==================================================================== +. +.MR groff @MAN1EXT@ , +.MR groff_diff @MAN1EXT@ , +.MR grolj4 @MAN1EXT@ , +.MR groff_font @MAN5EXT@ +. +. +.\" Restore compatibility mode (for, e.g., Solaris 10/11). +.cp \n[*groff_hpftodit_1_man_C] +.do rr *groff_hpftodit_1_man_C +. +. +.\" Local Variables: +.\" fill-column: 72 +.\" mode: nroff +.\" End: +.\" vim: set filetype=groff textwidth=72: diff --git a/src/utils/hpftodit/hpftodit.am b/src/utils/hpftodit/hpftodit.am new file mode 100644 index 0000000..e31e8f5 --- /dev/null +++ b/src/utils/hpftodit/hpftodit.am @@ -0,0 +1,34 @@ +# Automake rules for 'src utils hpftodit' +# +# Copyright (C) 2014-2020 Free Software Foundation, Inc. +# +# 'groff' is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 2 of the License, or +# (at your option) any later version. +# +# 'groff' is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see +# <http://www.gnu.org/licenses/gpl-2.0.html>. +# +######################################################################## + +bin_PROGRAMS += hpftodit +man1_MANS += src/utils/hpftodit/hpftodit.1 +EXTRA_DIST += src/utils/hpftodit/hpftodit.1.man +hpftodit_LDADD = libgroff.a $(LIBM) lib/libgnu.a +hpftodit_SOURCES = \ + src/utils/hpftodit/hpftodit.cpp \ + src/utils/hpftodit/hpuni.cpp + + +# Local Variables: +# mode: makefile-automake +# fill-column: 72 +# End: +# vim: set autoindent filetype=automake textwidth=72: diff --git a/src/utils/hpftodit/hpftodit.cpp b/src/utils/hpftodit/hpftodit.cpp new file mode 100644 index 0000000..4982e19 --- /dev/null +++ b/src/utils/hpftodit/hpftodit.cpp @@ -0,0 +1,1465 @@ +/* Copyright (C) 1994-2020 Free Software Foundation, Inc. + Written by James Clark (jjc@jclark.com) + +This file is part of groff. + +groff is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation, either version 3 of the License, or +(at your option) any later version. + +groff is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. */ + +/* +TODO +devise new names for useful characters +option to specify symbol sets to look in +put filename in error messages (or fix lib) +*/ + +#include "lib.h" + +#include <assert.h> +#include <ctype.h> +#include <errno.h> +#include <math.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "posix.h" +#include "errarg.h" +#include "error.h" +#include "cset.h" +#include "nonposix.h" +#include "unicode.h" + +extern "C" const char *Version_string; +extern const char *hp_msl_to_unicode_code(const char *); + +#define SIZEOF(v) (sizeof(v)/sizeof(v[0])) +#define equal(a, b) (strcmp(a, b) == 0) +// only valid if is_uname(c) has returned true +#define is_decomposed(c) strchr(c, '_') + +#define NO 0 +#define YES 1 + +#define MSL 0 +#define SYMSET 1 +#define UNICODE 2 + +#define UNNAMED "---" + +static double multiplier = 3.0; // make Agfa-based unitwidth an integer + +inline +int scale(int n) +{ + return int(n * multiplier + 0.5); +} + +// tags in TFM file + +enum tag_type { + min_tag = 400, + type_tag = 400, + copyright_tag = 401, + comment_tag = 402, + charcode_tag = 403, // MSL for Intellifont, Unicode for TrueType + symbol_set_tag = 404, + unique_identifier_tag = 405, + inches_per_point_tag = 406, + nominal_point_size_tag = 407, + design_units_per_em_tag = 408, + posture_tag = 409, + type_structure_tag = 410, + stroke_weight_tag = 411, + spacing_tag = 412, + slant_tag = 413, + appearance_width_tag = 414, + serif_style_tag = 415, + font_name_tag = 417, + typeface_source_tag = 418, + average_width_tag = 419, + max_width_tag = 420, + word_spacing_tag = 421, + recommended_line_spacing_tag = 422, + cap_height_tag = 423, + x_height_tag = 424, + max_ascent_tag = 425, + max_descent_tag = 426, + lower_ascent_tag = 427, + lower_descent_tag = 428, + underscore_depth_tag = 429, + underscore_thickness_tag = 430, + uppercase_accent_height_tag = 431, + lowercase_accent_height_tag = 432, + width_tag = 433, + vertical_escapement_tag = 434, + left_extent_tag = 435, + right_extent_tag = 436, + ascent_tag = 437, + descent_tag = 438, + pair_kern_tag = 439, + sector_kern_tag = 440, + track_kern_tag = 441, + typeface_tag = 442, + panose_tag = 443, + max_tag = 443 +}; + +const char *tag_name[] = { + "Symbol Set", + "Font Type" // MSL for Intellifont, Unicode for TrueType +}; + +// types in TFM file +enum { + BYTE_TYPE = 1, + ASCII_TYPE = 2, // NUL-terminated string + USHORT_TYPE = 3, + LONG_TYPE = 4, // unused + RATIONAL_TYPE = 5, // 8-byte numerator + 8-byte denominator + SIGNED_BYTE_TYPE = 16, // unused + SIGNED_SHORT_TYPE = 17, + SIGNED_LONG_TYPE = 18 // unused +}; + +typedef unsigned char byte; +typedef unsigned short uint16; +typedef short int16; +typedef unsigned int uint32; + +class File { +public: + File(const char *); + void skip(int n); + byte get_byte(); + uint16 get_uint16(); + uint32 get_uint32(); + uint32 get_uint32(char *orig); + void seek(uint32 n); +private: + unsigned char *buf_; + const unsigned char *ptr_; + const unsigned char *end_; +}; + +struct entry { + char present; + uint16 type; + uint32 count; + uint32 value; + char orig_value[4]; + entry() : present(0) { } +}; + +struct char_info { + uint16 charcode; + uint16 width; + int16 ascent; + int16 descent; + int16 left_extent; + uint16 right_extent; + uint16 symbol_set; + unsigned char code; +}; + +const uint16 NO_GLYPH = 0xffff; +const uint16 NO_SYMBOL_SET = 0; + +struct name_list { + char *name; + name_list *next; + name_list(const char *s, name_list *p) : name(strsave(s)), next(p) { } + ~name_list() { delete[] name; } +}; + +struct symbol_set { + uint16 select; + uint16 index[256]; +}; + +#define SYMBOL_SET(n, c) ((n) * 32 + ((c) - 64)) + +uint16 text_symbol_sets[] = { + SYMBOL_SET(19, 'U'), // Windows Latin 1 ("ANSI", code page 1252) + SYMBOL_SET(9, 'E'), // Windows Latin 2, Code Page 1250 + SYMBOL_SET(5, 'T'), // Code Page 1254 + SYMBOL_SET(7, 'J'), // Desktop + SYMBOL_SET(6, 'J'), // Microsoft Publishing + SYMBOL_SET(0, 'N'), // Latin 1 (subset of 19U, + // so we should never get here) + SYMBOL_SET(2, 'N'), // Latin 2 (subset of 9E, + // so we should never get here) + SYMBOL_SET(8, 'U'), // HP Roman 8 + SYMBOL_SET(10, 'J'), // PS Standard + SYMBOL_SET(9, 'U'), // Windows 3.0 "ANSI" + SYMBOL_SET(1, 'U'), // U.S. Legal + + SYMBOL_SET(12, 'J'), // MC Text + SYMBOL_SET(10, 'U'), // PC Code Page 437 + SYMBOL_SET(11, 'U'), // PC Code Page 437N + SYMBOL_SET(17, 'U'), // PC Code Page 852 + SYMBOL_SET(12, 'U'), // PC Code Page 850 + SYMBOL_SET(9, 'T'), // PC Code Page 437T + 0 +}; + +uint16 special_symbol_sets[] = { + SYMBOL_SET(8, 'M'), // Math 8 + SYMBOL_SET(5, 'M'), // PS Math + SYMBOL_SET(15, 'U'), // Pi font + SYMBOL_SET(13, 'J'), // Ventura International + SYMBOL_SET(19, 'M'), // Symbol font + SYMBOL_SET(579, 'L'), // Wingdings + 0 +}; + +entry tags[max_tag + 1 - min_tag]; + +char_info *char_table; +uint32 nchars = 0; + +unsigned int charcode_name_table_size = 0; +name_list **charcode_name_table = NULL; + +symbol_set *symbol_set_table; +unsigned int n_symbol_sets; + +static int debug_flag = NO; +static int special_flag = NO; // not a special font +static int italic_flag = NO; // don't add italic correction +static int italic_sep; +static int all_flag = NO; // don't include glyphs not in mapfile +static int quiet_flag = NO; // don't suppress warnings about symbols not found + +static char *hp_msl_to_ucode_name(int); +static char *unicode_to_ucode_name(int); +static int is_uname(char *); +static char *show_symset(unsigned int); +static void usage(FILE *); +static void usage(); +static const char *xbasename(const char *); +static void read_tags(File &); +static int check_type(); +static void check_units(File &, const int, double *, double *); +static int read_map(const char *, const int); +static void require_tag(tag_type); +static void dump_ascii(File &, tag_type); +static void dump_tags(File &); +static void dump_symbol_sets(File &); +static void dump_symbols(int); +static void output_font_name(File &); +static void output_spacewidth(); +static void output_pclweight(); +static void output_pclproportional(); +static void read_and_output_pcltypeface(File &); +static void output_pclstyle(); +static void output_slant(); +static void output_ligatures(); +static void read_symbol_sets(File &); +static void read_and_output_kernpairs(File &); +static void output_charset(const int); +static void read_char_table(File &); + +inline +entry &tag_info(tag_type t) +{ + return tags[t - min_tag]; +} + +int +main(int argc, char **argv) +{ + program_name = argv[0]; + + int opt; + int res = 1200; // PCL unit of measure for cursor moves + int scalesize = 4; // LaserJet 4 only allows 1/4 point increments + int unitwidth = 6350; + double ppi; // points per inch + double upem; // design units per em + + static const struct option long_options[] = { + { "help", no_argument, 0, CHAR_MAX + 1 }, + { "version", no_argument, 0, 'v' }, + { NULL, 0, 0, 0 } + }; + while ((opt = getopt_long(argc, argv, "adsqvi:", long_options, NULL)) != EOF) { + switch (opt) { + case 'a': + all_flag = YES; + break; + case 'd': + debug_flag = YES; + break; + case 's': + special_flag = YES; + break; + case 'i': + italic_flag = YES; + italic_sep = atoi(optarg); // design units + break; + case 'q': + quiet_flag = YES; // suppress warnings about symbols not found + break; + case 'v': + printf("GNU hpftodit (groff) version %s\n", Version_string); + exit(0); + break; + case CHAR_MAX + 1: // --help + usage(stdout); + exit(0); + break; + case '?': + usage(); + break; + default: + assert(0); + } + } + + if (debug_flag && argc - optind < 1) + usage(); + else if (!debug_flag && argc - optind != 3) + usage(); + File f(argv[optind]); + read_tags(f); + int tfm_type = check_type(); + if (debug_flag) + dump_tags(f); + if (!debug_flag && !read_map(argv[optind + 1], tfm_type)) + exit(1); + else if (debug_flag && argc - optind > 1) + read_map(argv[optind + 1], tfm_type); + current_filename = NULL; + current_lineno = -1; // no line numbers + if (!debug_flag && !equal(argv[optind + 2], "-")) + if (freopen(argv[optind + 2], "w", stdout) == NULL) + fatal("cannot open '%1': %2", argv[optind + 2], strerror(errno)); + current_filename = argv[optind]; + + check_units(f, tfm_type, &ppi, &upem); + if (tfm_type == UNICODE) // don't calculate for Intellifont TFMs + multiplier = double(res) / upem / ppi * unitwidth / scalesize; + if (italic_flag) + // convert from thousandths of an em to design units + italic_sep = int(italic_sep * upem / 1000 + 0.5); + + read_char_table(f); + if (nchars == 0) + fatal("no characters"); + + if (!debug_flag) { + output_font_name(f); + printf("name %s\n", xbasename(argv[optind + 2])); + if (special_flag) + printf("special\n"); + output_spacewidth(); + output_slant(); + read_and_output_pcltypeface(f); + output_pclproportional(); + output_pclweight(); + output_pclstyle(); + } + read_symbol_sets(f); + if (debug_flag) + dump_symbols(tfm_type); + else { + output_ligatures(); + read_and_output_kernpairs(f); + output_charset(tfm_type); + } + return 0; +} + +static void +usage(FILE *stream) +{ + fprintf(stream, +"usage: %s [-aqs] [-i n] tfm-file map-file output-font\n" +"usage: %s -d tfm-file [map-file]\n" +"usage: %s {-v | --version}\n" +"usage: %s --help\n", + program_name, program_name, program_name, program_name); +} + +static void +usage() +{ + usage(stderr); + exit(1); +} + +File::File(const char *s) +{ + // We need to read the file in binary mode because hpftodit relies + // on byte counts. + int fd = open(s, O_RDONLY | O_BINARY); + if (fd < 0) + fatal("cannot open '%1': %2", s, strerror(errno)); + current_filename = s; + struct stat sb; + if (fstat(fd, &sb) < 0) + fatal("cannot stat: %1", strerror(errno)); + if (!S_ISREG(sb.st_mode)) + fatal("not a regular file"); + buf_ = new unsigned char[sb.st_size]; + long nread = read(fd, buf_, sb.st_size); + if (nread < 0) + fatal("read error: %1", strerror(errno)); + if (nread != sb.st_size) + fatal("read unexpected number of bytes"); + ptr_ = buf_; + end_ = buf_ + sb.st_size; +} + +void +File::skip(int n) +{ + if (end_ - ptr_ < n) + fatal("unexpected end of file"); + ptr_ += n; +} + +void +File::seek(uint32 n) +{ + if (uint32(end_ - buf_) < n) + fatal("unexpected end of file"); + ptr_ = buf_ + n; +} + +byte +File::get_byte() +{ + if (ptr_ >= end_) + fatal("unexpected end of file"); + return *ptr_++; +} + +uint16 +File::get_uint16() +{ + if (end_ - ptr_ < 2) + fatal("unexpected end of file"); + uint16 n = *ptr_++; + return n + (*ptr_++ << 8); +} + +uint32 +File::get_uint32() +{ + if (end_ - ptr_ < 4) + fatal("unexpected end of file"); + uint32 n = *ptr_++; + for (int i = 0; i < 3; i++) + n += *ptr_++ << (i + 1)*8; + return n; +} + +uint32 +File::get_uint32(char *orig) +{ + if (end_ - ptr_ < 4) + fatal("unexpected end of file"); + unsigned char v = *ptr_++; + uint32 n = v; + orig[0] = v; + for (int i = 1; i < 4; i++) { + v = *ptr_++; + orig[i] = v; + n += v << i*8; + } + return n; +} + +static void +read_tags(File &f) +{ + if (f.get_byte() != 'I' || f.get_byte() != 'I') + fatal("not an Intel format TFM file"); + f.skip(6); + uint16 ntags = f.get_uint16(); + entry dummy; + for (uint16 i = 0; i < ntags; i++) { + uint16 tag = f.get_uint16(); + entry *p; + if (min_tag <= tag && tag <= max_tag) + p = tags + (tag - min_tag); + else + p = &dummy; + p->present = 1; + p->type = f.get_uint16(); + p->count = f.get_uint32(); + p->value = f.get_uint32(p->orig_value); + } +} + +static int +check_type() +{ + require_tag(type_tag); + int tfm_type = tag_info(type_tag).value; + switch (tfm_type) { + case MSL: + case UNICODE: + break; + case SYMSET: + fatal("cannot handle Symbol Set TFM files"); + break; + default: + fatal("unknown type tag %1", tfm_type); + } + return tfm_type; +} + +static void +check_units(File &f, const int tfm_type, double *ppi, double *upem) +{ + require_tag(design_units_per_em_tag); + f.seek(tag_info(design_units_per_em_tag).value); + uint32 num = f.get_uint32(); + uint32 den = f.get_uint32(); + if (tfm_type == MSL && (num != 8782 || den != 1)) + fatal("design units per em != 8782/1"); + *upem = double(num) / den; + require_tag(inches_per_point_tag); + f.seek(tag_info(inches_per_point_tag).value); + num = f.get_uint32(); + den = f.get_uint32(); + if (tfm_type == MSL && (num != 100 || den != 7231)) + fatal("inches per point not 100/7231"); + *ppi = double(den) / num; +} + +static void +require_tag(tag_type t) +{ + if (!tag_info(t).present) + fatal("tag %1 missing", int(t)); +} + +// put a human-readable font name in the file +static void +output_font_name(File &f) +{ + char *p; + + if (!tag_info(font_name_tag).present) + return; + int count = tag_info(font_name_tag).count; + char *font_name = new char[count]; + + if (count > 4) { // value is a file offset to the string + f.seek(tag_info(font_name_tag).value); + int n = count; + p = font_name; + while (--n) + *p++ = f.get_byte(); + } + else // orig_value contains the string + sprintf(font_name, "%.*s", + count, tag_info(font_name_tag).orig_value); + + // remove any trailing space + p = font_name + count - 1; + while (csspace(*--p)) + ; + *(p + 1) = '\0'; + printf("# %s\n", font_name); + delete[] font_name; +} + +static void +output_spacewidth() +{ + require_tag(word_spacing_tag); + printf("spacewidth %d\n", scale(tag_info(word_spacing_tag).value)); +} + +static void +read_symbol_sets(File &f) +{ + uint32 symbol_set_dir_length = tag_info(symbol_set_tag).count; + uint16 *symbol_set_selectors; + n_symbol_sets = symbol_set_dir_length/14; + symbol_set_table = new symbol_set[n_symbol_sets]; + unsigned int i; + + for (i = 0; i < nchars; i++) + char_table[i].symbol_set = NO_SYMBOL_SET; + + for (i = 0; i < n_symbol_sets; i++) { + f.seek(tag_info(symbol_set_tag).value + i*14); + (void)f.get_uint32(); // offset to symbol set name + uint32 off1 = f.get_uint32(); // offset to selection string + uint32 off2 = f.get_uint32(); // offset to symbol set index array + + f.seek(off1); + uint16 kind = 0; // HP-GL "Kind 1" symbol set value + unsigned int j; + for (j = 0; j < off2 - off1; j++) { + unsigned char c = f.get_byte(); + if ('0' <= c && c <= '9') // value + kind = kind*10 + (c - '0'); + else if ('A' <= c && c <= 'Z') // terminator + kind = kind*32 + (c - 64); + } + symbol_set_table[i].select = kind; + for (j = 0; j < 256; j++) + symbol_set_table[i].index[j] = f.get_uint16(); + } + + symbol_set_selectors = (special_flag ? special_symbol_sets + : text_symbol_sets); + for (i = 0; symbol_set_selectors[i] != 0; i++) { + unsigned int j; + for (j = 0; j < n_symbol_sets; j++) + if (symbol_set_table[j].select == symbol_set_selectors[i]) + break; + if (j < n_symbol_sets) { + for (int k = 0; k < 256; k++) { + uint16 idx = symbol_set_table[j].index[k]; + if (idx != NO_GLYPH + && char_table[idx].symbol_set == NO_SYMBOL_SET) { + char_table[idx].symbol_set = symbol_set_table[j].select; + char_table[idx].code = k; + } + } + } + } + + if (all_flag) + return; + + symbol_set_selectors = (special_flag ? text_symbol_sets + : special_symbol_sets); + for (i = 0; symbol_set_selectors[i] != 0; i++) { + unsigned int j; + for (j = 0; j < n_symbol_sets; j++) + if (symbol_set_table[j].select == symbol_set_selectors[i]) + break; + if (j < n_symbol_sets) { + for (int k = 0; k < 256; k++) { + uint16 idx = symbol_set_table[j].index[k]; + if (idx != NO_GLYPH + && char_table[idx].symbol_set == NO_SYMBOL_SET) { + char_table[idx].symbol_set = symbol_set_table[j].select; + char_table[idx].code = k; + } + } + } + } + return; +} + +static void +read_char_table(File &f) +{ + require_tag(charcode_tag); + nchars = tag_info(charcode_tag).count; + char_table = new char_info[nchars]; + + f.seek(tag_info(charcode_tag).value); + uint32 i; + for (i = 0; i < nchars; i++) + char_table[i].charcode = f.get_uint16(); + + require_tag(width_tag); + f.seek(tag_info(width_tag).value); + for (i = 0; i < nchars; i++) + char_table[i].width = f.get_uint16(); + + require_tag(ascent_tag); + f.seek(tag_info(ascent_tag).value); + for (i = 0; i < nchars; i++) { + char_table[i].ascent = f.get_uint16(); + if (char_table[i].ascent < 0) + char_table[i].ascent = 0; + } + + require_tag(descent_tag); + f.seek(tag_info(descent_tag).value); + for (i = 0; i < nchars; i++) { + char_table[i].descent = f.get_uint16(); + if (char_table[i].descent > 0) + char_table[i].descent = 0; + } + + require_tag(left_extent_tag); + f.seek(tag_info(left_extent_tag).value); + for (i = 0; i < nchars; i++) + char_table[i].left_extent = int16(f.get_uint16()); + + require_tag(right_extent_tag); + f.seek(tag_info(right_extent_tag).value); + for (i = 0; i < nchars; i++) + char_table[i].right_extent = f.get_uint16(); +} + +static void +output_pclweight() +{ + require_tag(stroke_weight_tag); + int stroke_weight = tag_info(stroke_weight_tag).value; + int pcl_stroke_weight; + if (stroke_weight < 128) + pcl_stroke_weight = -3; + else if (stroke_weight == 128) + pcl_stroke_weight = 0; + else if (stroke_weight <= 145) + pcl_stroke_weight = 1; + else if (stroke_weight <= 179) + pcl_stroke_weight = 3; + else + pcl_stroke_weight = 4; + printf("pclweight %d\n", pcl_stroke_weight); +} + +static void +output_pclproportional() +{ + require_tag(spacing_tag); + printf("pclproportional %d\n", tag_info(spacing_tag).value == 0); +} + +static void +read_and_output_pcltypeface(File &f) +{ + printf("pcltypeface "); + require_tag(typeface_tag); + if (tag_info(typeface_tag).count > 4) { + f.seek(tag_info(typeface_tag).value); + for (uint32 i = 0; i < tag_info(typeface_tag).count; i++) { + unsigned char c = f.get_byte(); + if (c == '\0') + break; + putchar(c); + } + } + else + printf("%.4s", tag_info(typeface_tag).orig_value); + printf("\n"); +} + +static void +output_pclstyle() +{ + unsigned pcl_style = 0; + // older tfms don't have the posture tag + if (tag_info(posture_tag).present) { + if (tag_info(posture_tag).value) + pcl_style |= 1; + } + else { + require_tag(slant_tag); + if (tag_info(slant_tag).value != 0) + pcl_style |= 1; + } + require_tag(appearance_width_tag); + if (tag_info(appearance_width_tag).value < 100) // guess + pcl_style |= 4; + printf("pclstyle %d\n", pcl_style); +} + +static void +output_slant() +{ + require_tag(slant_tag); + int slant = int16(tag_info(slant_tag).value); + if (slant != 0) + printf("slant %f\n", slant/100.0); +} + +static void +output_ligatures() +{ + // don't use ligatures for fixed space font + require_tag(spacing_tag); + if (tag_info(spacing_tag).value != 0) + return; + static const char *ligature_names[] = { + "fi", "fl", "ff", "ffi", "ffl" + }; + + static const char *ligature_chars[] = { + "fi", "fl", "ff", "Fi", "Fl" + }; + + unsigned ligature_mask = 0; + unsigned int i; + for (i = 0; i < nchars; i++) { + uint16 charcode = char_table[i].charcode; + if (charcode < charcode_name_table_size + && char_table[i].symbol_set != NO_SYMBOL_SET) { + for (name_list *p = charcode_name_table[charcode]; p; p = p->next) + for (unsigned int j = 0; j < SIZEOF(ligature_chars); j++) + if (strcmp(p->name, ligature_chars[j]) == 0) { + ligature_mask |= 1 << j; + break; + } + } + } + if (ligature_mask) { + printf("ligatures"); + for (i = 0; i < SIZEOF(ligature_names); i++) + if (ligature_mask & (1 << i)) + printf(" %s", ligature_names[i]); + printf(" 0\n"); + } +} + +static void +read_and_output_kernpairs(File &f) +{ + if (tag_info(pair_kern_tag).present) { + printf("kernpairs\n"); + f.seek(tag_info(pair_kern_tag).value); + uint16 n_pairs = f.get_uint16(); + for (int i = 0; i < n_pairs; i++) { + uint16 i1 = f.get_uint16(); + uint16 i2 = f.get_uint16(); + int16 val = int16(f.get_uint16()); + if (char_table[i1].symbol_set != NO_SYMBOL_SET + && char_table[i2].symbol_set != NO_SYMBOL_SET + && char_table[i1].charcode < charcode_name_table_size + && char_table[i2].charcode < charcode_name_table_size) { + for (name_list *p = charcode_name_table[char_table[i1].charcode]; + p; + p = p->next) + for (name_list *q = charcode_name_table[char_table[i2].charcode]; + q; + q = q->next) + if (!equal(p->name, UNNAMED) && !equal(q->name, UNNAMED)) + printf("%s %s %d\n", p->name, q->name, scale(val)); + } + } + } +} + +static void +output_charset(const int tfm_type) +{ + require_tag(slant_tag); + double slant_angle = int16(tag_info(slant_tag).value)*PI/18000.0; + double slant = sin(slant_angle)/cos(slant_angle); + + if (italic_flag) + require_tag(x_height_tag); + require_tag(lower_ascent_tag); + require_tag(lower_descent_tag); + + printf("charset\n"); + unsigned int i; + for (i = 0; i < nchars; i++) { + uint16 charcode = char_table[i].charcode; + + // the glyph is bound to one of the searched symbol sets + if (char_table[i].symbol_set != NO_SYMBOL_SET) { + // the character was in the map file + if (charcode < charcode_name_table_size && charcode_name_table[charcode]) + printf("%s", charcode_name_table[charcode]->name); + else if (!all_flag) + continue; + else if (tfm_type == MSL) + printf("%s", hp_msl_to_ucode_name(charcode)); + else + printf("%s", unicode_to_ucode_name(charcode)); + + printf("\t%d,%d", + scale(char_table[i].width), scale(char_table[i].ascent)); + + int depth = scale(-char_table[i].descent); + if (depth < 0) + depth = 0; + int italic_correction = 0; + int left_italic_correction = 0; + int subscript_correction = 0; + + if (italic_flag) { + italic_correction = scale(char_table[i].right_extent + - char_table[i].width + + italic_sep); + if (italic_correction < 0) + italic_correction = 0; + subscript_correction = int((tag_info(x_height_tag).value + * slant * .8) + .5); + if (subscript_correction > italic_correction) + subscript_correction = italic_correction; + left_italic_correction = scale(italic_sep + - char_table[i].left_extent); + } + + if (subscript_correction != 0) + printf(",%d,%d,%d,%d", + depth, italic_correction, left_italic_correction, + subscript_correction); + else if (left_italic_correction != 0) + printf(",%d,%d,%d", depth, italic_correction, left_italic_correction); + else if (italic_correction != 0) + printf(",%d,%d", depth, italic_correction); + else if (depth != 0) + printf(",%d", depth); + // This is fairly arbitrary. Fortunately it doesn't much matter. + unsigned type = 0; + if (char_table[i].ascent > int16(tag_info(lower_ascent_tag).value)*9/10) + type |= 2; + if (char_table[i].descent < int16(tag_info(lower_descent_tag).value)*9/10) + type |= 1; + printf("\t%d\t%d", type, + char_table[i].symbol_set*256 + char_table[i].code); + + if (tfm_type == UNICODE) { + if (charcode >= 0xE000 && charcode <= 0xF8FF) + printf("\t-- HP PUA U+%04X", charcode); + else + printf("\t-- U+%04X", charcode); + } + else + printf("\t-- MSL %4d", charcode); + printf(" (%3s %3d)\n", + show_symset(char_table[i].symbol_set), char_table[i].code); + + if (charcode < charcode_name_table_size + && charcode_name_table[charcode]) + for (name_list *p = charcode_name_table[charcode]->next; + p; p = p->next) + printf("%s\t\"\n", p->name); + } + // warnings about characters in mapfile not found in TFM + else if (charcode < charcode_name_table_size + && charcode_name_table[charcode]) { + char *name = charcode_name_table[charcode]->name; + // don't warn about Unicode or unnamed glyphs + // that aren't in the TFM file + if (tfm_type == UNICODE && !quiet_flag && !equal(name, UNNAMED) + && !is_uname(name)) { + fprintf(stderr, "%s: warning: symbol U+%04X (%s", + program_name, charcode, name); + for (name_list *p = charcode_name_table[charcode]->next; + p; p = p->next) + fprintf(stderr, ", %s", p->name); + fprintf(stderr, ") not in any searched symbol set\n"); + } + else if (!quiet_flag && !equal(name, UNNAMED) && !is_uname(name)) { + fprintf(stderr, "%s: warning: symbol MSL %d (%s", + program_name, charcode, name); + for (name_list *p = charcode_name_table[charcode]->next; + p; p = p->next) + fprintf(stderr, ", %s", p->name); + fprintf(stderr, ") not in any searched symbol set\n"); + } + } + } +} + +#define em_fract(a) (upem >= 0 ? double(a)/upem : 0) + +static void +dump_tags(File &f) +{ + double upem = -1.0; + + printf("TFM tags\n" + "\n" + "tag# type count value\n" + "---------------------\n"); + + for (int i = min_tag; i <= max_tag; i++) { + enum tag_type t = tag_type(i); + if (tag_info(t).present) { + printf("%4d %4d %5d", i, tag_info(t).type, tag_info(t).count); + switch (tag_info(t).type) { + case BYTE_TYPE: + case USHORT_TYPE: + printf(" %5u", tag_info(t).value); + switch (i) { + case type_tag: + printf(" Font Type "); + switch (tag_info(t).value) { + case MSL: + case SYMSET: + printf("(Intellifont)"); + break; + case UNICODE: + printf("(TrueType)"); + } + break; + case charcode_tag: + printf(" Number of Symbols (%u)", tag_info(t).count); + break; + case symbol_set_tag: + printf(" Symbol Sets (%u): ", + tag_info(symbol_set_tag).count / 14); + dump_symbol_sets(f); + break; + case type_structure_tag: + printf(" Type Structure (%u)", tag_info(t).value); + break; + case stroke_weight_tag: + printf(" Stroke Weight (%u)", tag_info(t).value); + break; + case spacing_tag: + printf(" Spacing "); + switch (tag_info(t).value) { + case 0: + printf("(Proportional)"); + break; + case 1: + printf("(Fixed Pitch: %u DU: %.2f em)", tag_info(t).value, + em_fract(tag_info(t).value)); + break; + } + break; + case appearance_width_tag: + printf(" Appearance Width (%u)", tag_info(t).value); + break; + case serif_style_tag: + printf(" Serif Style (%u)", tag_info(t).value); + break; + case posture_tag: + printf(" Posture (%s)", tag_info(t).value == 0 + ? "Upright" + : tag_info(t).value == 1 + ? "Italic" + : "Alternate Italic"); + break; + case max_width_tag: + printf(" Maximum Width (%u DU: %.2f em)", tag_info(t).value, + em_fract(tag_info(t).value)); + break; + case word_spacing_tag: + printf(" Interword Spacing (%u DU: %.2f em)", tag_info(t).value, + em_fract(tag_info(t).value)); + break; + case recommended_line_spacing_tag: + printf(" Recommended Line Spacing (%u DU: %.2f em)", tag_info(t).value, + em_fract(tag_info(t).value)); + break; + case x_height_tag: + printf(" x-Height (%u DU: %.2f em)", tag_info(t).value, + em_fract(tag_info(t).value)); + break; + case cap_height_tag: + printf(" Cap Height (%u DU: %.2f em)", tag_info(t).value, + em_fract(tag_info(t).value)); + break; + case max_ascent_tag: + printf(" Maximum Ascent (%u DU: %.2f em)", tag_info(t).value, + em_fract(tag_info(t).value)); + break; + case lower_ascent_tag: + printf(" Lowercase Ascent (%u DU: %.2f em)", tag_info(t).value, + em_fract(tag_info(t).value)); + break; + case underscore_thickness_tag: + printf(" Underscore Thickness (%u DU: %.2f em)", tag_info(t).value, + em_fract(tag_info(t).value)); + break; + case uppercase_accent_height_tag: + printf(" Uppercase Accent Height (%u DU: %.2f em)", tag_info(t).value, + em_fract(tag_info(t).value)); + break; + case lowercase_accent_height_tag: + printf(" Lowercase Accent Height (%u DU: %.2f em)", tag_info(t).value, + em_fract(tag_info(t).value)); + break; + case width_tag: + printf(" Horizontal Escapement array"); + break; + case vertical_escapement_tag: + printf(" Vertical Escapement array"); + break; + case right_extent_tag: + printf(" Right Extent array"); + break; + case ascent_tag: + printf(" Character Ascent array"); + break; + case pair_kern_tag: + f.seek(tag_info(t).value); + printf(" Kern Pairs (%u)", f.get_uint16()); + break; + case panose_tag: + printf(" PANOSE Classification array"); + break; + } + break; + case SIGNED_SHORT_TYPE: + printf(" %5d", int16(tag_info(t).value)); + switch (i) { + case slant_tag: + printf(" Slant (%.2f degrees)", double(tag_info(t).value) / 100); + break; + case max_descent_tag: + printf(" Maximum Descent (%d DU: %.2f em)", int16(tag_info(t).value), + em_fract(int16(tag_info(t).value))); + break; + case lower_descent_tag: + printf(" Lowercase Descent (%d DU: %.2f em)", int16(tag_info(t).value), + em_fract(int16(tag_info(t).value))); + break; + case underscore_depth_tag: + printf(" Underscore Depth (%d DU: %.2f em)", int16(tag_info(t).value), + em_fract(int16(tag_info(t).value))); + break; + case left_extent_tag: + printf(" Left Extent array"); + break; + // The type of this tag has changed from SHORT to SIGNED SHORT + // in TFM version 1.3.0. + case ascent_tag: + printf(" Character Ascent array"); + break; + case descent_tag: + printf(" Character Descent array"); + break; + } + break; + case RATIONAL_TYPE: + printf(" %5u", tag_info(t).value); + switch (i) { + case inches_per_point_tag: + printf(" Inches per Point"); + break; + case nominal_point_size_tag: + printf(" Nominal Point Size"); + break; + case design_units_per_em_tag: + printf(" Design Units per Em"); + break; + case average_width_tag: + printf(" Average Width"); + break; + } + if (tag_info(t).count == 1) { + f.seek(tag_info(t).value); + uint32 num = f.get_uint32(); + uint32 den = f.get_uint32(); + if (i == design_units_per_em_tag) + upem = double(num) / den; + printf(" (%u/%u = %g)", num, den, double(num)/den); + } + break; + case ASCII_TYPE: + printf(" %5u ", tag_info(t).value); + switch (i) { + case comment_tag: + printf("Comment "); + break; + case copyright_tag: + printf("Copyright "); + break; + case unique_identifier_tag: + printf("Unique ID "); + break; + case font_name_tag: + printf("Typeface Name "); + break; + case typeface_source_tag: + printf("Typeface Source "); + break; + case typeface_tag: + printf("PCL Typeface "); + break; + } + dump_ascii(f, t); + } + putchar('\n'); + } + } + putchar('\n'); +} +#undef em_fract + +static void +dump_ascii(File &f, tag_type t) +{ + putchar('"'); + if (tag_info(t).count > 4) { + int count = tag_info(t).count; + f.seek(tag_info(t).value); + while (--count) + printf("%c", f.get_byte()); + } + else + printf("%.4s", tag_info(t).orig_value); + putchar('"'); +} + +static void +dump_symbol_sets(File &f) +{ + uint32 symbol_set_dir_length = tag_info(symbol_set_tag).count; + uint32 num_symbol_sets = symbol_set_dir_length / 14; + + for (uint32 i = 0; i < num_symbol_sets; i++) { + f.seek(tag_info(symbol_set_tag).value + i * 14); + (void)f.get_uint32(); // offset to symbol set name + uint32 off1 = f.get_uint32(); // offset to selection string + uint32 off2 = f.get_uint32(); // offset to symbol set index array + f.seek(off1); + for (uint32 j = 0; j < off2 - off1; j++) { + unsigned char c = f.get_byte(); + if ('0' <= c && c <= '9') + putchar(c); + else if ('A' <= c && c <= 'Z') + printf(i < num_symbol_sets - 1 ? "%c," : "%c", c); + } + } +} + +static void +dump_symbols(int tfm_type) +{ + printf("Symbols:\n" + "\n" + " glyph id# symbol set name(s)\n" + "----------------------------------\n"); + for (uint32 i = 0; i < nchars; i++) { + uint16 charcode = char_table[i].charcode; + if (charcode < charcode_name_table_size + && charcode_name_table[charcode]) { + if (char_table[i].symbol_set != NO_SYMBOL_SET) { + printf(tfm_type == UNICODE ? "%4d (U+%04X) (%3s %3d) %s" + : "%4d (MSL %4d) (%3s %3d) %s", + i, charcode, + show_symset(char_table[i].symbol_set), + char_table[i].code, + charcode_name_table[charcode]->name); + for (name_list *p = charcode_name_table[charcode]->next; + p; p = p->next) + printf(", %s", p->name); + putchar('\n'); + } + } + else { + printf(tfm_type == UNICODE ? "%4d (U+%04X) " + : "%4d (MSL %4d) ", + i, charcode); + if (char_table[i].symbol_set != NO_SYMBOL_SET) + printf("(%3s %3d)", + show_symset(char_table[i].symbol_set), char_table[i].code); + putchar('\n'); + } + } + putchar('\n'); +} + +static char * +show_symset(unsigned int symset) +{ + // A 64-bit unsigned int produces up to 20 decimal digits. + assert(sizeof(unsigned int) <= 8); + static char symset_str[22]; // 20 digits + symset char + \0 + sprintf(symset_str, "%u%c", symset / 32, (symset & 31) + 64); + return symset_str; +} + +static char * +hp_msl_to_ucode_name(int msl) +{ + // A 64-bit signed int produces up to 19 decimal digits plus a sign. + assert(sizeof(int) <= 8); + char codestr[21]; // 19 digits + possible sign + \0 + sprintf(codestr, "%d", msl); + const char *ustr = hp_msl_to_unicode_code(codestr); + if (ustr == NULL) + ustr = UNNAMED; + else { + char *nonum; + int ucode = int(strtol(ustr, &nonum, 16)); + // don't allow PUA code points as Unicode names + if (ucode >= 0xE000 && ucode <= 0xF8FF) + ustr = UNNAMED; + } + if (!equal(ustr, UNNAMED)) { + const char *uname_decomposed = decompose_unicode(ustr); + if (uname_decomposed) + // 1st char is the number of components + ustr = uname_decomposed + 1; + } + char *value = new char[strlen(ustr) + 1]; + sprintf(value, equal(ustr, UNNAMED) ? UNNAMED : "u%s", ustr); + return value; +} + +static char * +unicode_to_ucode_name(int ucode) +{ + // A 64-bit signed int produces up to 16 hexadecimal digits. + assert(sizeof(int) <= 8); + const char *ustr; + char codestr[17]; // 16 hex digits + \0 + + // don't allow PUA code points as Unicode names + if (ucode >= 0xE000 && ucode <= 0xF8FF) + ustr = UNNAMED; + else { + sprintf(codestr, "%04X", ucode); + ustr = codestr; + } + if (!equal(ustr, UNNAMED)) { + const char *uname_decomposed = decompose_unicode(ustr); + if (uname_decomposed) + // 1st char is the number of components + ustr = uname_decomposed + 1; + } + char *value = new char[strlen(ustr) + 1]; + sprintf(value, equal(ustr, UNNAMED) ? UNNAMED : "u%s", ustr); + return value; +} + +static int +is_uname(char *name) +{ + size_t i; + size_t len = strlen(name); + if (len % 5) + return 0; + + if (name[0] != 'u') + return 0; + for (i = 1; i < 4; i++) + if (!csxdigit(name[i])) + return 0; + for (i = 5; i < len; i++) + if (i % 5 ? !csxdigit(name[i]) : name[i] != '_') + return 0; + + return 1; +} + +static int +read_map(const char *file, const int tfm_type) +{ + errno = 0; + FILE *fp = fopen(file, "r"); + if (!fp) { + error("can't open '%1': %2", file, strerror(errno)); + return 0; + } + current_filename = file; + char buf[512]; + current_lineno = 0; + char *nonum; + while (fgets(buf, int(sizeof(buf)), fp)) { + current_lineno++; + char *ptr = buf; + while (csspace(*ptr)) + ptr++; + if (*ptr == '\0' || *ptr == '#') + continue; + ptr = strtok(ptr, " \n\t"); + if (!ptr) + continue; + + int msl_code = int(strtol(ptr, &nonum, 10)); + if (*nonum != '\0') { + if (csxdigit(*nonum)) + error("bad MSL map: got hex code (%1)", ptr); + else if (ptr == nonum) + error("bad MSL map: bad MSL code (%1)", ptr); + else + error("bad MSL map"); + fclose(fp); + return 0; + } + + ptr = strtok(NULL, " \n\t"); + if (!ptr) + continue; + int unicode = int(strtol(ptr, &nonum, 16)); + if (*nonum != '\0') { + if (ptr == nonum) + error("bad Unicode value (%1)", ptr); + else + error("bad Unicode map"); + fclose(fp); + return 0; + } + if (strlen(ptr) != 4) { + error("bad Unicode value (%1)", ptr); + return 0; + } + + int n = tfm_type == MSL ? msl_code : unicode; + if (tfm_type == UNICODE && n > 0xFFFF) { + // greatest value supported by TFM files + error("bad Unicode value (%1): greatest value is 0xFFFF", ptr); + fclose(fp); + return 0; + } + else if (n < 0) { + error("negative code value (%1)", ptr); + fclose(fp); + return 0; + } + + ptr = strtok(NULL, " \n\t"); + if (!ptr) { // groff name + error("missing name(s)"); + fclose(fp); + return 0; + } + // leave decomposed Unicode values alone + else if (is_uname(ptr) && !is_decomposed(ptr)) + ptr = unicode_to_ucode_name(strtol(ptr + 1, &nonum, 16)); + + if (size_t(n) >= charcode_name_table_size) { + size_t old_size = charcode_name_table_size; + name_list **old_table = charcode_name_table; + charcode_name_table_size = n + 256; + charcode_name_table = new name_list *[charcode_name_table_size]; + if (old_table) { + memcpy(charcode_name_table, old_table, old_size*sizeof(name_list *)); + delete[] old_table; + } + for (size_t i = old_size; i < charcode_name_table_size; i++) + charcode_name_table[i] = NULL; + } + + // a '#' that isn't the first groff name begins a comment + for (int names = 1; ptr; ptr = strtok(NULL, " \n\t")) { + if (names++ > 1 && *ptr == '#') + break; + charcode_name_table[n] = new name_list(ptr, charcode_name_table[n]); + } + } + fclose(fp); + return 1; +} + +static const char * +xbasename(const char *s) +{ + // DIR_SEPS[] are possible directory separator characters, see + // nonposix.h. We want the rightmost separator of all possible + // ones. Example: d:/foo\\bar. + const char *b = strrchr(s, DIR_SEPS[0]), *b1; + const char *sep = &DIR_SEPS[1]; + + while (*sep) + { + b1 = strrchr(s, *sep); + if (b1 && (!b || b1 > b)) + b = b1; + sep++; + } + return b ? b + 1 : s; +} + +// Local Variables: +// fill-column: 72 +// mode: C++ +// End: +// vim: set cindent noexpandtab shiftwidth=2 textwidth=72: diff --git a/src/utils/hpftodit/hpuni.cpp b/src/utils/hpftodit/hpuni.cpp new file mode 100644 index 0000000..b3f933f --- /dev/null +++ b/src/utils/hpftodit/hpuni.cpp @@ -0,0 +1,697 @@ +// -*- C++ -*- +/* Copyright (C) 2003-2020 Free Software Foundation, Inc. + Written by Jeff Conrad (jeff_conrad@msn.com) + +This file is part of groff. + +groff is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation, either version 3 of the License, or +(at your option) any later version. + +groff is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. */ + +#include "lib.h" +#include "stringclass.h" +#include "ptable.h" + +#include "unicode.h" + +struct hp_msl_to_unicode { + char *value; +}; + +declare_ptable(hp_msl_to_unicode) +implement_ptable(hp_msl_to_unicode) + +PTABLE(hp_msl_to_unicode) hp_msl_to_unicode_table; + +struct S { + const char *key; + const char *value; +} hp_msl_to_unicode_list[] = { + { "1", "0021", }, // Exclamation Mark + { "2", "0022", }, // Neutral Double Quote + { "3", "0023", }, // Number Sign + { "4", "0024", }, // Dollar Sign + { "5", "0025", }, // Per Cent Sign + { "6", "0026", }, // Ampersand + { "8", "2019", }, // Single Close Quote (9) + { "9", "0028", }, // Left Parenthesis + { "10", "0029", }, // Right Parenthesis + { "11", "002A", }, // Asterisk + { "12", "002B", }, // Plus Sign + { "13", "002C", }, // Comma, or Decimal Separator + { "14", "002D", }, // Hyphen + { "15", "002E", }, // Period, or Full Stop + { "16", "002F", }, // Solidus, or Slash + { "17", "0030", }, // Numeral Zero + { "18", "0031", }, // Numeral One + { "19", "0032", }, // Numeral Two + { "20", "0033", }, // Numeral Three + { "21", "0034", }, // Numeral Four + { "22", "0035", }, // Numeral Five + { "23", "0036", }, // Numeral Six + { "24", "0037", }, // Numeral Seven + { "25", "0038", }, // Numeral Eight + { "26", "0039", }, // Numeral Nine + { "27", "003A", }, // Colon + { "28", "003B", }, // Semicolon + { "29", "003C", }, // Less Than Sign + { "30", "003D", }, // Equals Sign + { "31", "003E", }, // Greater Than Sign + { "32", "003F", }, // Question Mark + { "33", "0040", }, // Commercial At + { "34", "0041", }, // Uppercase A + { "35", "0042", }, // Uppercase B + { "36", "0043", }, // Uppercase C + { "37", "0044", }, // Uppercase D + { "38", "0045", }, // Uppercase E + { "39", "0046", }, // Uppercase F + { "40", "0047", }, // Uppercase G + { "41", "0048", }, // Uppercase H + { "42", "0049", }, // Uppercase I + { "43", "004A", }, // Uppercase J + { "44", "004B", }, // Uppercase K + { "45", "004C", }, // Uppercase L + { "46", "004D", }, // Uppercase M + { "47", "004E", }, // Uppercase N + { "48", "004F", }, // Uppercase O + { "49", "0050", }, // Uppercase P + { "50", "0051", }, // Uppercase Q + { "51", "0052", }, // Uppercase R + { "52", "0053", }, // Uppercase S + { "53", "0054", }, // Uppercase T + { "54", "0055", }, // Uppercase U + { "55", "0056", }, // Uppercase V + { "56", "0057", }, // Uppercase W + { "57", "0058", }, // Uppercase X + { "58", "0059", }, // Uppercase Y + { "59", "005A", }, // Uppercase Z + { "60", "005B", }, // Left Bracket + { "61", "005C", }, // Reverse Solidus, or Backslash + { "62", "005D", }, // Right Bracket + { "63", "005E", }, // Circumflex, Exponent, or Pointer + { "64", "005F", }, // Underline or Underscore Character + { "66", "2018", }, // Single Open Quote (6) + { "67", "0061", }, // Lowercase A + { "68", "0062", }, // Lowercase B + { "69", "0063", }, // Lowercase C + { "70", "0064", }, // Lowercase D + { "71", "0065", }, // Lowercase E + { "72", "0066", }, // Lowercase F + { "73", "0067", }, // Lowercase G + { "74", "0068", }, // Lowercase H + { "75", "0069", }, // Lowercase I + { "76", "006A", }, // Lowercase J + { "77", "006B", }, // Lowercase K + { "78", "006C", }, // Lowercase L + { "79", "006D", }, // Lowercase M + { "80", "006E", }, // Lowercase N + { "81", "006F", }, // Lowercase O + { "82", "0070", }, // Lowercase P + { "83", "0071", }, // Lowercase Q + { "84", "0072", }, // Lowercase R + { "85", "0073", }, // Lowercase S + { "86", "0074", }, // Lowercase T + { "87", "0075", }, // Lowercase U + { "88", "0076", }, // Lowercase V + { "89", "0077", }, // Lowercase W + { "90", "0078", }, // Lowercase X + { "91", "0079", }, // Lowercase Y + { "92", "007A", }, // Lowercase Z + { "93", "007B", }, // Left Brace + { "94", "007C", }, // Long Vertical Mark + { "95", "007D", }, // Right Brace + { "96", "007E", }, // One Wavy Line Approximate + { "97", "2592", }, // Medium Shading Character + { "99", "00C0", }, // Uppercase A Grave + { "100", "00C2", }, // Uppercase A Circumflex + { "101", "00C8", }, // Uppercase E Grave + { "102", "00CA", }, // Uppercase E Circumflex + { "103", "00CB", }, // Uppercase E Dieresis + { "104", "00CE", }, // Uppercase I Circumflex + { "105", "00CF", }, // Uppercase I Dieresis + { "106", "00B4", }, // Lowercase Acute Accent (Spacing) + { "107", "0060", }, // Lowercase Grave Accent (Spacing) + { "108", "02C6", }, // Lowercase Circumflex Accent (Spacing) + { "109", "00A8", }, // Lowercase Dieresis Accent (Spacing) + { "110", "02DC", }, // Lowercase Tilde Accent (Spacing) + { "111", "00D9", }, // Uppercase U Grave + { "112", "00DB", }, // Uppercase U Circumflex + { "113", "00AF", }, // Overline, or Overscore Character + { "114", "00DD", }, // Uppercase Y Acute + { "115", "00FD", }, // Lowercase Y Acute + { "116", "00B0", }, // Degree Sign + { "117", "00C7", }, // Uppercase C Cedilla + { "118", "00E7", }, // Lowercase C Cedilla + { "119", "00D1", }, // Uppercase N Tilde + { "120", "00F1", }, // Lowercase N Tilde + { "121", "00A1", }, // Inverted Exclamation + { "122", "00BF", }, // Inverted Question Mark + { "123", "00A4", }, // Currency Symbol + { "124", "00A3", }, // Pound Sterling Sign + { "125", "00A5", }, // Yen Sign + { "126", "00A7", }, // Section Mark + { "127", "0192", }, // Florin Sign + { "128", "00A2", }, // Cent Sign + { "129", "00E2", }, // Lowercase A Circumflex + { "130", "00EA", }, // Lowercase E Circumflex + { "131", "00F4", }, // Lowercase O Circumflex + { "132", "00FB", }, // Lowercase U Circumflex + { "133", "00E1", }, // Lowercase A Acute + { "134", "00E9", }, // Lowercase E Acute + { "135", "00F3", }, // Lowercase O Acute + { "136", "00FA", }, // Lowercase U Acute + { "137", "00E0", }, // Lowercase A Grave + { "138", "00E8", }, // Lowercase E Grave + { "139", "00F2", }, // Lowercase O Grave + { "140", "00F9", }, // Lowercase U Grave + { "141", "00E4", }, // Lowercase A Dieresis + { "142", "00EB", }, // Lowercase E Dieresis + { "143", "00F6", }, // Lowercase O Dieresis + { "144", "00FC", }, // Lowercase U Dieresis + { "145", "00C5", }, // Uppercase A Ring + { "146", "00EE", }, // Lowercase I Circumflex + { "147", "00D8", }, // Uppercase O Oblique + { "148", "00C6", }, // Uppercase AE Diphthong + { "149", "00E5", }, // Lowercase A Ring + { "150", "00ED", }, // Lowercase I Acute + { "151", "00F8", }, // Lowercase O Oblique + { "152", "00E6", }, // Lowercase AE Diphthong + { "153", "00C4", }, // Uppercase A Dieresis + { "154", "00EC", }, // Lowercase I Grave + { "155", "00D6", }, // Uppercase O Dieresis + { "156", "00DC", }, // Uppercase U Dieresis + { "157", "00C9", }, // Uppercase E Acute + { "158", "00EF", }, // Lowercase I Dieresis + { "159", "00DF", }, // Lowercase Es-zet Ligature + { "160", "00D4", }, // Uppercase O Circumflex + { "161", "00C1", }, // Uppercase A Acute + { "162", "00C3", }, // Uppercase A Tilde + { "163", "00E3", }, // Lowercase A Tilde + { "164", "00D0", }, // Uppercase Eth +//{ "164", "0110", }, // Uppercase D-Stroke + { "165", "00F0", }, // Lowercase Eth + { "166", "00CD", }, // Uppercase I Acute + { "167", "00CC", }, // Uppercase I Grave + { "168", "00D3", }, // Uppercase O Acute + { "169", "00D2", }, // Uppercase O Grave + { "170", "00D5", }, // Uppercase O Tilde + { "171", "00F5", }, // Lowercase O Tilde + { "172", "0160", }, // Uppercase S Hacek + { "173", "0161", }, // Lowercase S Hacek + { "174", "00DA", }, // Uppercase U Acute + { "175", "0178", }, // Uppercase Y Dieresis + { "176", "00FF", }, // Lowercase Y Dieresis + { "177", "00DE", }, // Uppercase Thorn + { "178", "00FE", }, // Lowercase Thorn + { "180", "00B5", }, // Lowercase Greek Mu, or Micro + { "181", "00B6", }, // Pilcrow, or Paragraph Sign + { "182", "00BE", }, // Vulgar Fraction 3/4 + { "183", "2212", }, // Minus Sign + { "184", "00BC", }, // Vulgar Fraction 1/4 + { "185", "00BD", }, // Vulgar Fraction 1/2 + { "186", "00AA", }, // Female Ordinal + { "187", "00BA", }, // Male Ordinal + { "188", "00AB", }, // Left Pointing Double Angle Quote + { "189", "25A0", }, // Medium Solid Square Box + { "190", "00BB", }, // Right Pointing Double Angle Quote + { "191", "00B1", }, // Plus Over Minus Sign + { "192", "00A6", }, // Broken Vertical Mark + { "193", "00A9", }, // Copyright Sign + { "194", "00AC", }, // Not Sign + { "195", "00AD", }, // Soft Hyphen + { "196", "00AE", }, // Registered Sign + { "197", "00B2", }, // Superior Numeral 2 + { "198", "00B3", }, // Superior Numeral 3 + { "199", "00B8", }, // Lowercase Cedilla (Spacing) + { "200", "00B9", }, // Superior Numeral 1 + { "201", "00D7", }, // Multiply Sign + { "202", "00F7", }, // Divide Sign + { "203", "263A", }, // Open Smiling Face + { "204", "263B", }, // Solid Smiling Face + { "205", "2665", }, // Solid Heart, Card Suit + { "206", "2666", }, // Solid Diamond, Card Suit + { "207", "2663", }, // Solid Club, Card Suit + { "208", "2660", }, // Solid Spade, Card Suit + { "209", "25CF", }, // Medium Solid Round Bullet + { "210", "25D8", }, // Large Solid square with White Dot + { "211", "EFFD", }, // Large Open Round Bullet + { "212", "25D9", }, // Large Solid square with White Circle + { "213", "2642", }, // Male Symbol + { "214", "2640", }, // Female Symbol + { "215", "266A", }, // Musical Note + { "216", "266B", }, // Pair Of Musical Notes + { "217", "263C", }, // Compass, or Eight Pointed Sun + { "218", "25BA", }, // Right Solid Arrowhead + { "219", "25C4", }, // Left Solid Arrowhead + { "220", "2195", }, // Up/Down Arrow + { "221", "203C", }, // Double Exclamation Mark + { "222", "25AC", }, // Thick Horizontal Mark + { "223", "21A8", }, // Up/Down Arrow Baseline + { "224", "2191", }, // Up Arrow + { "225", "2193", }, // Down Arrow + { "226", "2192", }, // Right Arrow + { "227", "2190", }, // Left Arrow + { "229", "2194", }, // Left/Right Arrow + { "230", "25B2", }, // Up Solid Arrowhead + { "231", "25BC", }, // Down Solid Arrowhead + { "232", "20A7", }, // Pesetas Sign + { "233", "2310", }, // Reversed Not Sign + { "234", "2591", }, // Light Shading Character + { "235", "2593", }, // Dark Shading Character + { "236", "2502", }, // Box Draw Line, Vert. 1 + { "237", "2524", }, // Box Draw Right Tee, Vert. 1 Horiz. 1 + { "238", "2561", }, // Box Draw Right Tee, Vert. 1 Horiz. 2 + { "239", "2562", }, // Box Draw Right Tee, Vert. 2 Horiz. 1 + { "240", "2556", }, // Box Draw Upper Right Corner, Vert. 2 Horiz. 1 + { "241", "2555", }, // Box Draw Upper Right Corner, Vert. 1 Horiz. 2 + { "242", "2563", }, // Box Draw Right Tee, Vert. 2 Horiz. 2 + { "243", "2551", }, // Box Draw Lines, Vert. 2 + { "244", "2557", }, // Box Draw Upper Right Corner, Vert. 2 Horiz. 2 + { "245", "255D", }, // Box Draw Lower Right Corner, Vert. 2 Horiz. 2 + { "246", "255C", }, // Box Draw Lower Right Corner, Vert. 2 Horiz. 1 + { "247", "255B", }, // Box Draw Lower Right Corner, Vert. 1 Horiz. 2 + { "248", "2510", }, // Box Draw Upper Right Corner, Vert. 1, Horiz. 1 + { "249", "2514", }, // Box Draw Lower Left Corner, Vert. 1, Horiz. 1 + { "250", "2534", }, // Box Draw Bottom Tee, Vert. 1 Horiz. 1 + { "251", "252C", }, // Box Draw Top Tee, Vert. 1 Horiz. 1 + { "252", "251C", }, // Box Draw Left Tee, Vert. 1 Horiz. 1 + { "253", "2500", }, // Box Draw Line, Horiz. 1 + { "254", "253C", }, // Box Draw Cross, Vert. 1 Horiz. 1 + { "255", "255E", }, // Box Draw Left Tee, Vert. 1 Horiz. 2 + { "256", "255F", }, // Box Draw Left Tee, Vert. 2 Horz. 1 + { "257", "255A", }, // Box Draw Lower Left Corner, Vert. 2 Horiz. 2 + { "258", "2554", }, // Box Draw Upper Left Corner, Vert. 2 Horiz. 2 + { "259", "2569", }, // Box Draw Bottom Tee, Vert. 2 Horiz. 2 + { "260", "2566", }, // Box Draw Top Tee, Vert. 2 Horiz. 2 + { "261", "2560", }, // Box Draw Left Tee, Vert. 2 Horiz. 2 + { "262", "2550", }, // Box Draw Lines, Horiz. 2 + { "263", "256C", }, // Box Draw Cross Open Center, Vert. 2 Horiz. 2 + { "264", "2567", }, // Box Draw Bottom Tee, Vert. 1 Horiz. 2 + { "265", "2568", }, // Box Draw Bottom Tee, Vert. 2 Horiz. 1 + { "266", "2564", }, // Box Draw Top Tee, Vert. 1 Horiz. 2 + { "267", "2565", }, // Box Draw Top Tee, Vert. 2 Horiz. 1 + { "268", "2559", }, // Box Draw Lower Left Corner, Vert. 2 Horiz. 1 + { "269", "2558", }, // Box Draw Lower Left Corner, Vert. 1 Horiz. 2 + { "270", "2552", }, // Box Draw Upper Left Corner, Vert. 1 Horiz. 2 + { "271", "2553", }, // Box Draw Upper Left Corner, Vert. 2 Horiz. 1 + { "272", "256B", }, // Box Draw Cross, Vert. 2 Horiz. 1 + { "273", "256A", }, // Box Draw Cross, Vert. 1 Horiz. 2 + { "274", "2518", }, // Box Draw Lower Right Corner, Vert. 1 Horiz. 1 + { "275", "250C", }, // Box Draw Upper Left Corner, Vert. 1, Horiz. 1 + { "276", "2588", }, // Solid Full High/Wide + { "277", "2584", }, // Bottom Half Solid Rectangle + { "278", "258C", }, // Left Half Solid Rectangle + { "279", "2590", }, // Right Half Solid Rectangle + { "280", "2580", }, // Top Half Solid Rectangle + { "290", "2126", }, // Uppercase Greek Omega, or Ohms + { "292", "221E", }, // Infinity Symbol + { "295", "2229", }, // Set Intersection Symbol + { "296", "2261", }, // Exactly Equals Sign + { "297", "2265", }, // Greater Than or Equal Sign + { "298", "2264", }, // Less Than or Equal Sign + { "299", "2320", }, // Top Integral + { "300", "2321", }, // Bottom Integral + { "301", "2248", }, // Two Wavy Line Approximate Sign +//{ "302", "00B7", }, // Middle Dot, or Centered Period (see 2219) +//{ "302", "2219", }, // Centered Period, Middle Dot + { "302", "2219", }, // Math Dot, Centered Period + { "303", "221A", }, // Radical Symbol, Standalone Diagonal + { "305", "25AA", }, // Small Solid Square Box + { "306", "013F", }, // Uppercase L-Dot + { "307", "0140", }, // Lowercase L-Dot + { "308", "2113", }, // Litre Symbol + { "309", "0149", }, // Lowercase Apostrophe-N + { "310", "2032", }, // Prime, Minutes, or Feet Symbol + { "311", "2033", }, // Double Prime, Seconds, or Inches Symbol + { "312", "2020", }, // Dagger Symbol + { "313", "2122", }, // Trademark Sign + { "314", "2017", }, // Double Underline Character + { "315", "02C7", }, // Lowercase Hacek Accent (Spacing) + { "316", "02DA", }, // Lowercase Ring Accent (Spacing) + { "317", "EFF9", }, // Uppercase Acute Accent (Spacing) + { "318", "EFF8", }, // Uppercase Grave Accent (Spacing) + { "319", "EFF7", }, // Uppercase Circumflex Accent (Spacing) + { "320", "EFF6", }, // Uppercase Dieresis Accent (Spacing) + { "321", "EFF5", }, // Uppercase Tilde Accent (Spacing) + { "322", "EFF4", }, // Uppercase Hacek Accent (Spacing) + { "323", "EFF3", }, // Uppercase Ring Accent (Spacing) + { "324", "2215", }, // Vulgar Fraction Bar + { "325", "2014", }, // Em Dash + { "326", "2013", }, // En Dash + { "327", "2021", }, // Double Dagger Symbol + { "328", "0131", }, // Lowercase Undotted I + { "329", "0027", }, // Neutral Single Quote + { "330", "EFF2", }, // Uppercase Cedilla (Spacing) + { "331", "2022", }, // Small Solid Round Bullet + { "332", "207F", }, // Superior Lowercase N + { "333", "2302", }, // Home Plate + { "335", "0138", }, // Lowercase Kra + { "338", "0166", }, // Uppercase T-Stroke + { "339", "0167", }, // Lowercase T-Stroke + { "340", "014A", }, // Uppercase Eng + { "341", "014B", }, // Lowercase Eng + { "342", "0111", }, // Lowercase D-Stroke + { "400", "0102", }, // Uppercase A Breve + { "401", "0103", }, // Lowercase A Breve + { "402", "0100", }, // Uppercase A Macron + { "403", "0101", }, // Lowercase A Macron + { "404", "0104", }, // Uppercase A Ogonek + { "405", "0105", }, // Lowercase A Ogonek + { "406", "0106", }, // Uppercase C Acute + { "407", "0107", }, // Lowercase C Acute + { "410", "010C", }, // Uppercase C Hacek + { "411", "010D", }, // Lowercase C Hacek + { "414", "010E", }, // Uppercase D Hacek + { "415", "010F", }, // Lowercase D Hacek + { "416", "011A", }, // Uppercase E Hacek + { "417", "011B", }, // Lowercase E Hacek + { "418", "0116", }, // Uppercase E Overdot + { "419", "0117", }, // Lowercase E Overdot + { "420", "0112", }, // Uppercase E Macron + { "421", "0113", }, // Lowercase E Macron + { "422", "0118", }, // Uppercase E Ogonek + { "423", "0119", }, // Lowercase E Ogonek + { "428", "0122", }, // Uppercase G Cedilla + { "429", "0123", }, // Lowercase G Cedilla + { "432", "012E", }, // Uppercase I Ogonek + { "433", "012F", }, // Lowercase I Ogonek + { "434", "012A", }, // Uppercase I Macron + { "435", "012B", }, // Lowercase I Macron + { "438", "0136", }, // Uppercase K Cedilla + { "439", "0137", }, // Lowercase K Cedilla + { "440", "0139", }, // Uppercase L Acute + { "441", "013A", }, // Lowercase L Acute + { "442", "013D", }, // Uppercase L Hacek + { "443", "013E", }, // Lowercase L Hacek + { "444", "013B", }, // Uppercase L Cedilla + { "445", "013C", }, // Lowercase L Cedilla + { "446", "0143", }, // Uppercase N Acute + { "447", "0144", }, // Lowercase N Acute + { "448", "0147", }, // Uppercase N Hacek + { "449", "0148", }, // Lowercase N Hacek + { "450", "0145", }, // Uppercase N Cedilla + { "451", "0146", }, // Lowercase N Cedilla + { "452", "0150", }, // Uppercase O Double Acute + { "453", "0151", }, // Lowercase O Double Acute + { "454", "014C", }, // Uppercase O Macron + { "455", "014D", }, // Lowercase O Macron + { "456", "0154", }, // Uppercase R Acute + { "457", "0155", }, // Lowercase R Acute + { "458", "0158", }, // Uppercase R Hacek + { "459", "0159", }, // Lowercase R Hacek + { "460", "0156", }, // Uppercase R Cedilla + { "461", "0157", }, // Lowercase R Cedilla + { "462", "015A", }, // Uppercase S Acute + { "463", "015B", }, // Lowercase S Acute + { "466", "0164", }, // Uppercase T Hacek + { "467", "0165", }, // Lowercase T Hacek + { "468", "0162", }, // Uppercase T Cedilla + { "469", "0163", }, // Lowercase T Cedilla + { "470", "0168", }, // Uppercase U Tilde + { "471", "0169", }, // Lowercase U Tilde + { "474", "0170", }, // Uppercase U Double Acute + { "475", "0171", }, // Lowercase U Double Acute + { "476", "016E", }, // Uppercase U Ring + { "477", "016F", }, // Lowercase U Ring + { "478", "016A", }, // Uppercase U Macron + { "479", "016B", }, // Lowercase U Macron + { "480", "0172", }, // Uppercase U Ogonek + { "481", "0173", }, // Lowercase U Ogonek + { "482", "0179", }, // Uppercase Z Acute + { "483", "017A", }, // Lowercase Z Acute + { "484", "017B", }, // Uppercase Z Overdot + { "485", "017C", }, // Lowercase Z Overdot + { "486", "0128", }, // Uppercase I Tilde + { "487", "0129", }, // Lowercase I Tilde + { "500", "EFBF", }, // Radical, Diagonal, Composite + { "501", "221D", }, // Proportional To Symbol + { "502", "212F", }, // Napierian (italic e) + { "503", "03F5", }, // Alternate Lowercase Greek Epsilon +//{ "503", "EFEC", }, // Alternate Lowercase Greek Epsilon + { "504", "2234", }, // Therefore Symbol + { "505", "0393", }, // Uppercase Greek Gamma + { "506", "2206", }, // Increment Symbol (Delta) + { "507", "0398", }, // Uppercase Greek Theta + { "508", "039B", }, // Uppercase Greek Lambda + { "509", "039E", }, // Uppercase Greek Xi + { "510", "03A0", }, // Uppercase Greek Pi + { "511", "03A3", }, // Uppercase Greek Sigma + { "512", "03A5", }, // Uppercase Greek Upsilon + { "513", "03A6", }, // Uppercase Greek Phi + { "514", "03A8", }, // Uppercase Greek Psi + { "515", "03A9", }, // Uppercase Greek Omega + { "516", "2207", }, // Nabla Symbol (inverted Delta) + { "517", "2202", }, // Partial Differential Delta Symbol + { "518", "03C2", }, // Lowercase Sigma, Terminal + { "519", "2260", }, // Not Equal To Symbol + { "520", "EFEB", }, // Underline, Composite + { "521", "2235", }, // Because Symbol + { "522", "03B1", }, // Lowercase Greek Alpha + { "523", "03B2", }, // Lowercase Greek Beta + { "524", "03B3", }, // Lowercase Greek Gamma + { "525", "03B4", }, // Lowercase Greek Delta + { "526", "03B5", }, // Lowercase Greek Epsilon + { "527", "03B6", }, // Lowercase Greek Zeta + { "528", "03B7", }, // Lowercase Greek Eta + { "529", "03B8", }, // Lowercase Greek Theta + { "530", "03B9", }, // Lowercase Greek Iota + { "531", "03BA", }, // Lowercase Greek Kappa + { "532", "03BB", }, // Lowercase Greek Lambda + { "533", "03BC", }, // Lowercase Greek Mu + { "534", "03BD", }, // Lowercase Greek Nu + { "535", "03BE", }, // Lowercase Greek Xi + { "536", "03BF", }, // Lowercase Greek Omicron + { "537", "03C0", }, // Lowercase Greek Pi + { "538", "03C1", }, // Lowercase Greek Rho + { "539", "03C3", }, // Lowercase Greek Sigma + { "540", "03C4", }, // Lowercase Greek Tau + { "541", "03C5", }, // Lowercase Greek Upsilon + { "542", "03C6", }, // Lowercase Greek Phi + { "543", "03C7", }, // Lowercase Greek Chi + { "544", "03C8", }, // Lowercase Greek Psi + { "545", "03C9", }, // Lowercase Greek Omega + { "546", "03D1", }, // Lowercase Greek Theta, Open + { "547", "03D5", }, // Lowercase Greek Phi, Open + { "548", "03D6", }, // Lowercase Pi, Alternate + { "549", "2243", }, // Wavy Over Straight Approximate Symbol + { "550", "2262", }, // Not Exactly Equal To Symbol + { "551", "21D1", }, // Up Arrow Double Stroke + { "552", "21D2", }, // Right Arrow Double Stroke + { "553", "21D3", }, // Down Arrow Double Stroke + { "554", "21D0", }, // Left Arrow Double Stroke + { "555", "21D5", }, // Up/Down Arrow Double Stroke + { "556", "21D4", }, // Left/Right Arrow Double Stroke + { "557", "21C4", }, // Right Over Left Arrow + { "558", "21C6", }, // Left Over Right Arrow + { "559", "EFE9", }, // Vector Symbol + { "560", "0305", }, // Overline, Composite + { "561", "2200", }, // For All Symbol, or Universal (inverted A) + { "562", "2203", }, // There Exists Symbol, or Existential (inverted E) + { "563", "22A4", }, // Top Symbol + { "564", "22A5", }, // Bottom Symbol + { "565", "222A", }, // Set Union Symbol + { "566", "2208", }, // Element-Of Symbol + { "567", "220B", }, // Contains Symbol + { "568", "2209", }, // Not-Element-Of Symbol + { "569", "2282", }, // Proper Subset Symbol + { "570", "2283", }, // Proper Superset Symbol + { "571", "2284", }, // Not Proper Subset Symbol + { "572", "2285", }, // Not Proper Superset Symbol + { "573", "2286", }, // Subset Symbol + { "574", "2287", }, // Superset Symbol + { "575", "2295", }, // Plus In Circle Symbol + { "576", "2299", }, // Dot In Circle Symbol + { "577", "2297", }, // Times In Circle Symbol + { "578", "2296", }, // Minus In Circle Symbol + { "579", "2298", }, // Slash In Circle Symbol + { "580", "2227", }, // Logical And Symbol + { "581", "2228", }, // Logical Or Symbol + { "582", "22BB", }, // Exclusive Or Symbol + { "583", "2218", }, // Functional Composition Symbol + { "584", "20DD", }, // Large Open Circle + { "585", "22A3", }, // Assertion Symbol + { "586", "22A2", }, // Backwards Assertion Symbol + { "587", "222B", }, // Integral Symbol + { "588", "222E", }, // Curvilinear Integral Symbol + { "589", "2220", }, // Angle Symbol + { "590", "2205", }, // Empty Set Symbol + { "591", "2135", }, // Hebrew Aleph + { "592", "2136", }, // Hebrew Beth + { "593", "2137", }, // Hebrew Gimmel + { "594", "212D", }, // Fraktur Uppercase C + { "595", "2111", }, // Fraktur Uppercase I + { "596", "211C", }, // Fraktur Uppercase R + { "597", "2128", }, // Fraktur Uppercase Z + { "598", "23A1", }, // Top Segment Left Bracket (Left Square Bracket Upper Corner) + { "599", "23A3", }, // Bottom Segment Left Bracket (Left Square Bracket Lower Corner) + { "600", "239B", }, // Top Segment Left Brace (Left Parenthesis Upper Hook) +//{ "600", "23A7", }, // Top Segment Left Brace (Right Curly Bracket Upper Hook) + { "601", "23A8", }, // Middle Segment Left Brace (Right Curly Bracket Middle Piece) + { "602", "239D", }, // Bottom Segment LeftBrace (Left Parenthesis Lower Hook) +//{ "602", "23A9", }, // Bottom Segment Left Brace (Right Curly Bracket Lower Hook) + { "603", "EFD4", }, // Middle Segment Curvilinear Integral + { "604", "EFD3", }, // Top Left Segment Summation + { "605", "2225", }, // Double Vertical Line, Composite + { "606", "EFD2", }, // Bottom Left Segment Summation + { "607", "EFD1", }, // Bottom Diagonal Summation + { "608", "23A4", }, // Top Segment Right Bracket (Right Square Bracket Upper Corner) + { "609", "23A6", }, // Bottom Segment Right Bracket (Right Square Bracket Lower Corner) + { "610", "239E", }, // Top Segment Right Brace (Right Parenthesis Upper Hook) +//{ "610", "23AB", }, // Top Segment Right Brace (Right Curly Bracket Upper Hook) + { "611", "23AC", }, // Middle Segment Right Brace (Right Curly Bracket Middle Piece) + { "612", "23A0", }, // Bottom Segment Right ( Right Parenthesis Lower Hook) +//{ "612", "23AD", }, // Bottom Segment Right Brace (Right Curly Bracket Lower Hook) + { "613", "239C", }, // Thick Vertical Line, Composite (Left Parenthesis Extension) +//{ "613", "239F", }, // Thick Vertical Line, Composite (Right Parenthesis Extension) +//{ "613", "23AA", }, // Thick Vertical Line, Composite (Curly Bracket Extension) +//{ "613", "23AE", }, // Thick Vertical Line, Composite (Integral Extension) + { "614", "2223", }, // Thin Vertical Line, Composite + { "615", "EFDC", }, // Bottom Segment of Vertical Radical + { "616", "EFD0", }, // Top Right Segment Summation + { "617", "EFCF", }, // Middle Segment Summation + { "618", "EFCE", }, // Bottom Right Segment Summation + { "619", "EFCD", }, // Top Diagonal Summation + { "620", "2213", }, // Minus Over Plus Sign + { "621", "2329", }, // Left Angle Bracket + { "622", "232A", }, // Right Angle Bracket + { "623", "EFFF", }, // Mask Symbol + { "624", "2245", }, // Wavy Over Two Straight Approximate Symbol + { "625", "2197", }, // 45 Degree Arrow + { "626", "2198", }, // -45 Degree Arrow + { "627", "2199", }, // -135 Degree Arrow + { "628", "2196", }, // 135 Degree Arrow + { "629", "25B5", }, // Up Open Triangle + { "630", "25B9", }, // Right Open Triangle + { "631", "25BF", }, // Down Open Triangle + { "632", "25C3", }, // Left Open Triangle + { "633", "226A", }, // Much Less Than Sign + { "634", "226B", }, // Much Greater Than Sign + { "635", "2237", }, // Proportional To Symbol (4 dots) + { "636", "225C", }, // Defined As Symbol + { "637", "03DD", }, // Lowercase Greek Digamma + { "638", "210F", }, // Planck's Constant divided by 2 pi + { "639", "2112", }, // Laplace Transform Symbol + { "640", "EFFE", }, // Power Set + { "641", "2118", }, // Weierstrassian Symbol + { "642", "2211", }, // Summation Symbol (large Sigma) + { "643", "301A", }, // Left Double Bracket + { "644", "EFC9", }, // Middle Segment Double Bracket + { "645", "301B", }, // Right Double Bracket + { "646", "256D", }, // Box Draw Left Top Round Corner + { "647", "2570", }, // Box Draw Left Bottom Round Corner + { "648", "EFC8", }, // Extender Large Union/Product + { "649", "EFC7", }, // Bottom Segment Large Union + { "650", "EFC6", }, // Top Segment Large Intersection + { "651", "EFC5", }, // Top Segment Left Double Bracket + { "652", "EFC4", }, // Bottom Segment Left Double Bracket + { "653", "EFFC", }, // Large Open Square Box + { "654", "25C7", }, // Open Diamond + { "655", "256E", }, // Box Draw Right Top Round Corner + { "656", "256F", }, // Box Draw Right Bottom Round Corner + { "657", "EFC3", }, // Bottom Segment Large Bottom Product + { "658", "EFC2", }, // Top Segment Large Top Product + { "659", "EFC1", }, // Top Segment Right Double Bracket + { "660", "EFC0", }, // Bottom Segment Right Double Bracket + { "661", "EFFB", }, // Large Solid Square Box + { "662", "25C6", }, // Solid Diamond + { "663", "220D", }, // Such That Symbol (rotated lc epsilon) + { "664", "2217", }, // Math Asterisk + { "665", "23AF", }, // Horizontal Arrow Extender (Horizontal Line Extension) + { "666", "EFCB", }, // Double Horizontal Arrow Extender + { "667", "EFCC", }, // Inverted Complement of 0xEFCF or MSL 617 + { "668", "221F", }, // Right Angle Symbol + { "669", "220F", }, // Product Symbol (large Pi) + { "684", "25CA", }, // Lozenge, Diamond + { "1000", "2070", }, // Superior Numeral 0 + { "1001", "2074", }, // Superior Numeral 4 + { "1002", "2075", }, // Superior Numeral 5 + { "1003", "2076", }, // Superior Numeral 6 + { "1004", "2077", }, // Superior Numeral 7 + { "1005", "2078", }, // Superior Numeral 8 + { "1006", "2079", }, // Superior Numeral 9 + { "1017", "201C", }, // Double Open Quote (6) + { "1018", "201D", }, // Double Close Quote (9) + { "1019", "201E", }, // Double Baseline Quote (9) + { "1020", "2003", }, // Em Space + { "1021", "2002", }, // En Space + { "1023", "2009", }, // Thin Space + { "1028", "2026", }, // Ellipsis + { "1030", "EFF1", }, // Uppercase Ogonek (Spacing) + { "1031", "017E", }, // Lowercase Z Hacek + { "1034", "2120", }, // Service Mark + { "1036", "211E", }, // Prescription Sign +//{ "1040", "F001", }, // Lowercase FI Ligature + { "1040", "FB01", }, // Lowercase FI Ligature +//{ "1041", "F002", }, // Lowercase FL Ligature + { "1041", "FB02", }, // Lowercase FL Ligature + { "1042", "FB00", }, // Lowercase FF Ligature + { "1043", "FB03", }, // Lowercase FFI Ligature + { "1044", "FB04", }, // Lowercase FFL Ligature + { "1045", "EFF0", }, // Uppercase Double Acute Accent (Spacing) + { "1047", "0133", }, // Lowercase IJ Ligature + { "1060", "2105", }, // Care Of Symbol + { "1061", "011E", }, // Uppercase G Breve + { "1062", "011F", }, // Lowercase G Breve + { "1063", "015E", }, // Uppercase S Cedilla + { "1064", "015F", }, // Lowercase S Cedilla + { "1065", "0130", }, // Uppercase I Overdot + { "1067", "201A", }, // Single Baseline Quote (9) + { "1068", "2030", }, // Per Mill Sign + { "1069", "20AC", }, // Euro + { "1084", "02C9", }, // Lowercase Macron Accent (Spacing) + { "1086", "02D8", }, // Lowercase Breve Accent (Spacing) + { "1088", "02D9", }, // Lowercase Overdot Accent (Spacing) + { "1090", "0153", }, // Lowercase OE Ligature + { "1091", "0152", }, // Uppercase OE Ligature + { "1092", "2039", }, // Left Pointing Single Angle Quote + { "1093", "203A", }, // Right Pointing Single Angle Quote + { "1094", "25A1", }, // Medium Open Square Box + { "1095", "0141", }, // Uppercase L-Stroke + { "1096", "0142", }, // Lowercase L-Stroke + { "1097", "02DD", }, // Lowercase Double Acute Accent (Spacing) + { "1098", "02DB", }, // Lowercase Ogonek (Spacing) + { "1099", "21B5", }, // Carriage Return Symbol + { "1100", "EFDB", }, // Full Size Serif Registered + { "1101", "EFDA", }, // Full Size Serif Copyright + { "1102", "EFD9", }, // Full Size Serif Trademark + { "1103", "EFD8", }, // Full Size Sans Registered + { "1104", "EFD7", }, // Full Size Sans Copyright + { "1105", "EFD6", }, // Full Size Sans Trademark + { "1106", "017D", }, // Uppercase Z Hacek + { "1107", "0132", }, // Uppercase IJ Ligature + { "1108", "25AB", }, // Small Open Square Box + { "1109", "25E6", }, // Small Open Round Bullet + { "1110", "25CB", }, // Medium Open Round Bullet + { "1111", "EFFA", }, // Large Solid Round Bullet + { "3812", "F000", }, // Ornament, Apple +}; + +// global constructor +static struct hp_msl_to_unicode_init { + hp_msl_to_unicode_init(); +} _hp_msl_to_unicode_init; + +hp_msl_to_unicode_init::hp_msl_to_unicode_init() { + for (unsigned int i = 0; + i < sizeof(hp_msl_to_unicode_list)/sizeof(hp_msl_to_unicode_list[0]); + i++) { + hp_msl_to_unicode *ptu = new hp_msl_to_unicode[1]; + ptu->value = (char *)hp_msl_to_unicode_list[i].value; + hp_msl_to_unicode_table.define(hp_msl_to_unicode_list[i].key, ptu); + } +} + +const char *hp_msl_to_unicode_code(const char *s) +{ + hp_msl_to_unicode *result = hp_msl_to_unicode_table.lookup(s); + return result ? result->value : 0; +} |