diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-15 19:44:05 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-15 19:44:05 +0000 |
commit | d318611dd6f23fcfedd50e9b9e24620b102ba96a (patch) | |
tree | 8b9eef82ca40fdd5a8deeabf07572074c236095d /src/utils | |
parent | Initial commit. (diff) | |
download | groff-d318611dd6f23fcfedd50e9b9e24620b102ba96a.tar.xz groff-d318611dd6f23fcfedd50e9b9e24620b102ba96a.zip |
Adding upstream version 1.23.0.upstream/1.23.0upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
43 files changed, 17703 insertions, 0 deletions
diff --git a/src/utils/addftinfo/addftinfo.1.man b/src/utils/addftinfo/addftinfo.1.man new file mode 100644 index 0000000..a719136 --- /dev/null +++ b/src/utils/addftinfo/addftinfo.1.man @@ -0,0 +1,236 @@ +.TH addftinfo @MAN1EXT@ "@MDATE@" "groff @VERSION@" +.SH Name +addftinfo \- add font metrics to +.I troff +fonts for use with +.I groff +. +. +.\" ==================================================================== +.\" Legal Terms +.\" ==================================================================== +.\" +.\" Copyright (C) 1989-2020 Free Software Foundation, Inc. +.\" +.\" Permission is granted to make and distribute verbatim copies of this +.\" manual provided the copyright notice and this permission notice are +.\" preserved on all copies. +.\" +.\" Permission is granted to copy and distribute modified versions of +.\" this manual under the conditions for verbatim copying, provided that +.\" the entire resulting derived work is distributed under the terms of +.\" a permission notice identical to this one. +.\" +.\" Permission is granted to copy and distribute translations of this +.\" manual into another language, under the above conditions for +.\" modified versions, except that this permission notice may be +.\" included in translations approved by the Free Software Foundation +.\" instead of in the original English. +. +. +.\" Save and disable compatibility mode (for, e.g., Solaris 10/11). +.do nr *groff_addftinfo_1_man_C \n[.cp] +.cp 0 +. +.\" Define fallback for groff 1.23's MR macro if the system lacks it. +.nr do-fallback 0 +.if !\n(.f .nr do-fallback 1 \" mandoc +.if \n(.g .if !d MR .nr do-fallback 1 \" older groff +.if !\n(.g .nr do-fallback 1 \" non-groff *roff +.if \n[do-fallback] \{\ +. de MR +. ie \\n(.$=1 \ +. I \%\\$1 +. el \ +. IR \%\\$1 (\\$2)\\$3 +. . +.\} +.rr do-fallback +. +. +.\" ==================================================================== +.SH Synopsis +.\" ==================================================================== +. +.SY addftinfo +.RB [ \-asc\-height\~\c +.IR n ] +.RB [ \-body\-depth\~\c +.IR n ] +.RB [ \-body\-height\~\c +.IR n ] +.RB [ \-cap\-height\~\c +.IR n ] +.RB [ \-comma\-depth\~\c +.IR n ] +.RB [ \-desc\-depth\~\c +.IR n ] +.RB [ \-fig\-height\~\c +.IR n ] +.RB [ \-x\-height\~\c +.IR n ] +.I resolution +.I unit-width +.I font +.YS +. +. +.SY addftinfo +.B \-\-help +.YS +. +. +.SY addftinfo +.B \-v +. +.SY addftinfo +.B \-\-version +.YS +. +. +.\" ==================================================================== +.SH Description +.\" ==================================================================== +. +.I addftinfo +reads an +.RI AT&T \~troff +font description file +.IR font , +adds additional font metric information required by +.\" We need the "GNU" below because the @g@ prefix might be empty. +.RI GNU \~@g@troff (@MAN1EXT@), +and writes the combined result to the standard output. +. +The information added is derived from the font's existing parameters and +assumptions about traditional +.I troff +names for characters. +. +Among the font metrics added are the heights and depths of characters +(how far each extends vertically above and below the baseline). +. +The +.I resolution +and +.I unit-width +arguments should be the same as the corresponding parameters in the +.I DESC +file. +. +.I font +is the name of the file describing the font; +if +.I font +ends with +.RB \[lq] I \[rq], +the font is assumed to be oblique +(or italic). +. +. +.\" ==================================================================== +.SH Options +.\" ==================================================================== +. +.B \-\-help +displays a usage message, +while +.B \-v +and +.B \-\-version +show version information; +all exit afterward. +. +. +.P +All other options change parameters that are used to derive the heights +and depths. +. +Like the existing quantities in the font description file, +each +.RI value\~ n +is in +.I "scaled points," +.RI inches/ resolution +for a font whose type size is +.IR unit-width ; +see +.MR groff_font @MAN5EXT@ . +. +. +.TP +.BI \-asc\-height \~n +height of characters with ascenders, +such as \[lq]b\[rq], +\[lq]d\[rq], +or \[lq]l\[rq] +. +. +.TP +.BI \-body\-depth \~n +depth of characters such as parentheses +. +. +.TP +.BI \-body\-height \~n +height of characters such as parentheses +. +. +.TP +.BI \-cap\-height \~n +height of uppercase letters such as \[lq]A\[rq] +. +. +.TP +.BI \-comma\-depth \~n +depth of a comma +. +. +.TP +.BI \-desc\-depth \~n +depth of characters with descenders, +such as \[lq]p\[rq], +\[lq]q\[rq], +or \[lq]y\[rq] +. +. +.TP +.B \-fig\-height +height of figures (numerals) +. +. +.TP +.BI \-x\-height \~n +height of lowercase letters without ascenders such as \[lq]x\[rq] +. +. +.P +.I addftinfo +makes no attempt to use the specified parameters to infer unspecified +parameters. +. +If a parameter is not specified, +the default will be used. +. +The defaults are chosen to produce reasonable values for a Times font. +. +. +.\" ==================================================================== +.SH "See also" +.\" ==================================================================== +. +.MR groff_font @MAN5EXT@ , +.MR groff @MAN1EXT@ , +.MR groff_char @MAN7EXT@ +. +. +.\" Restore compatibility mode (for, e.g., Solaris 10/11). +.cp \n[*groff_addftinfo_1_man_C] +.do rr *groff_addftinfo_1_man_C +. +. +.\" Local Variables: +.\" fill-column: 72 +.\" mode: nroff +.\" End: +.\" vim: set filetype=groff textwidth=72: diff --git a/src/utils/addftinfo/addftinfo.am b/src/utils/addftinfo/addftinfo.am new file mode 100644 index 0000000..dd51372 --- /dev/null +++ b/src/utils/addftinfo/addftinfo.am @@ -0,0 +1,35 @@ +# Automake rules for 'src utils addftinfo' +# +# Copyright (C) 2014-2020 Free Software Foundation, Inc. +# +# 'groff' is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 2 of the License, or +# (at your option) any later version. +# +# 'groff' is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see +# <http://www.gnu.org/licenses/gpl-2.0.html>. +# +######################################################################## + +bin_PROGRAMS += addftinfo +man1_MANS += src/utils/addftinfo/addftinfo.1 +EXTRA_DIST += src/utils/addftinfo/addftinfo.1.man +addftinfo_LDADD = libgroff.a lib/libgnu.a +addftinfo_SOURCES = \ + src/utils/addftinfo/addftinfo.cpp \ + src/utils/addftinfo/guess.cpp \ + src/utils/addftinfo/guess.h + + +# Local Variables: +# mode: makefile-automake +# fill-column: 72 +# End: +# vim: set autoindent filetype=automake textwidth=72: diff --git a/src/utils/addftinfo/addftinfo.cpp b/src/utils/addftinfo/addftinfo.cpp new file mode 100644 index 0000000..6f4facf --- /dev/null +++ b/src/utils/addftinfo/addftinfo.cpp @@ -0,0 +1,237 @@ +/* Copyright (C) 1989-2020 Free Software Foundation, Inc. + Written by James Clark (jjc@jclark.com) + +This file is part of groff. + +groff is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation, either version 3 of the License, or +(at your option) any later version. + +groff is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. */ + +#include "lib.h" + +#include <ctype.h> +#include <stdlib.h> +#include <errno.h> +#include "errarg.h" +#include "error.h" +#include "stringclass.h" +#include "cset.h" +#include "guess.h" + +extern "C" const char *Version_string; + +static void usage(FILE *stream); +static void usage(); +static void usage(const char *problem); +static void version(); +static void convert_font(const font_params &, FILE *, FILE *); + +typedef int font_params::*param_t; + +static struct { + const char *name; + param_t par; +} param_table[] = { + { "asc-height", &font_params::asc_height }, + { "body-depth", &font_params::body_depth }, + { "body-height", &font_params::body_height }, + { "cap-height", &font_params::cap_height }, + { "comma-depth", &font_params::comma_depth }, + { "desc-depth", &font_params::desc_depth }, + { "fig-height", &font_params::fig_height }, + { "x-height", &font_params::x_height }, +}; + +// These are all in thousandths of an em. +// These values are correct for PostScript Times Roman. + +#define DEFAULT_X_HEIGHT 448 +#define DEFAULT_FIG_HEIGHT 676 +#define DEFAULT_ASC_HEIGHT 682 +#define DEFAULT_BODY_HEIGHT 676 +#define DEFAULT_CAP_HEIGHT 662 +#define DEFAULT_COMMA_DEPTH 143 +#define DEFAULT_DESC_DEPTH 217 +#define DEFAULT_BODY_DEPTH 177 + +int main(int argc, char **argv) +{ + program_name = argv[0]; + int i; + for (i = 1; i < argc; i++) { + if (!strcmp(argv[i], "-v") || !strcmp(argv[i],"--version")) + version(); + if (!strcmp(argv[i],"--help")) { + usage(stdout); + exit(0); + } + } + if (argc < 4) + usage("insufficient arguments"); + /* The next couple of usage() calls cannot provide a meaningful + diagnostic because we don't know whether sscanf() failed on a + required parameter or an option. A refactor could fix this. */ + int resolution; + if (sscanf(argv[argc-3], "%d", &resolution) != 1) + usage(); + if (resolution <= 0) + fatal("resolution must be positive"); + int unitwidth; + if (sscanf(argv[argc-2], "%d", &unitwidth) != 1) + usage(); + if (unitwidth <= 0) + fatal("unit width must be positive"); + font_params param; + const char *font = argv[argc-1]; + param.italic = (font[0] != '\0' && strchr(font, '\0')[-1] == 'I'); + param.em = (resolution*unitwidth)/72; + param.x_height = DEFAULT_X_HEIGHT; + param.fig_height = DEFAULT_FIG_HEIGHT; + param.asc_height = DEFAULT_ASC_HEIGHT; + param.body_height = DEFAULT_BODY_HEIGHT; + param.cap_height = DEFAULT_CAP_HEIGHT; + param.comma_depth = DEFAULT_COMMA_DEPTH; + param.desc_depth = DEFAULT_DESC_DEPTH; + param.body_depth = DEFAULT_BODY_DEPTH; + for (i = 1; i < argc && argv[i][0] == '-'; i++) { + if (argv[i][1] == '-' && argv[i][2] == '\0') { + i++; + break; + } + if (i + 1 >= argc) + usage("option requires argument"); + size_t j; + for (j = 0;; j++) { + if (j >= sizeof(param_table)/sizeof(param_table[0])) + fatal("parameter '%1' not recognized", argv[i] + 1); + if (strcmp(param_table[j].name, argv[i] + 1) == 0) + break; + } + if (sscanf(argv[i+1], "%d", &(param.*(param_table[j].par))) != 1) + fatal("invalid option argument '%1'", argv[i+1]); + i++; + } + if (argc - i != 3) + usage("insufficient arguments"); + errno = 0; + FILE *infp = fopen(font, "r"); + if (infp == 0) + fatal("can't open '%1': %2", font, strerror(errno)); + convert_font(param, infp, stdout); + return 0; +} + +static void usage(FILE *stream) +{ + fprintf(stream, "usage: %s", program_name); + size_t len = sizeof(param_table)/sizeof(param_table[0]); + for (size_t i = 0; i < len; i++) + fprintf(stream, " [-%s n]", param_table[i].name); + fputs(" resolution unit-width font\n", stream); + fprintf(stream, "usage: %s {-v | --version}\n" + "usage: %s --help\n", program_name, program_name); +} + +static void usage() +{ + usage(stderr); + exit(1); +} + +static void usage(const char *problem) +{ + error("%1", problem); + usage(); +} + +static void version() +{ + printf("GNU addftinfo (groff) version %s\n", Version_string); + exit(0); +} + +static int get_line(FILE *fp, string *p) +{ + int c; + p->clear(); + while ((c = getc(fp)) != EOF) { + *p += char(c); + if (c == '\n') + break; + } + return p->length() > 0; +} + +static void convert_font(const font_params ¶m, FILE *infp, + FILE *outfp) +{ + string s; + while (get_line(infp, &s)) { + put_string(s, outfp); + if (s.length() >= 8 + && strncmp(&s[0], "charset", 7)) + break; + } + while (get_line(infp, &s)) { + s += '\0'; + string name; + const char *p = s.contents(); + while (csspace(*p)) + p++; + while (*p != '\0' && !csspace(*p)) + name += *p++; + while (csspace(*p)) + p++; + for (const char *q = s.contents(); q < p; q++) + putc(*q, outfp); + char *next; + char_metric metric; + metric.width = (int)strtol(p, &next, 10); + if (next != p) { + printf("%d", metric.width); + p = next; + metric.type = (int)strtol(p, &next, 10); + if (next != p) { + name += '\0'; + guess(name.contents(), param, &metric); + if (metric.sk == 0) { + if (metric.left_ic == 0) { + if (metric.ic == 0) { + if (metric.depth == 0) { + if (metric.height != 0) + printf(",%d", metric.height); + } + else + printf(",%d,%d", metric.height, metric.depth); + } + else + printf(",%d,%d,%d", metric.height, metric.depth, + metric.ic); + } + else + printf(",%d,%d,%d,%d", metric.height, metric.depth, + metric.ic, metric.left_ic); + } + else + printf(",%d,%d,%d,%d,%d", metric.height, metric.depth, + metric.ic, metric.left_ic, metric.sk); + } + } + fputs(p, outfp); + } +} + +// Local Variables: +// fill-column: 72 +// mode: C++ +// End: +// vim: set cindent noexpandtab shiftwidth=2 textwidth=72: diff --git a/src/utils/addftinfo/guess.cpp b/src/utils/addftinfo/guess.cpp new file mode 100644 index 0000000..08bbe05 --- /dev/null +++ b/src/utils/addftinfo/guess.cpp @@ -0,0 +1,489 @@ +// -*- C++ -*- +/* Copyright (C) 1989-2020 Free Software Foundation, Inc. + Written by James Clark (jjc@jclark.com) + +This file is part of groff. + +groff is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation, either version 3 of the License, or +(at your option) any later version. + +groff is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. */ + +#include "guess.h" + +void guess(const char *s, const font_params ¶m, char_metric *metric) +{ + int &height = metric->height; + int &depth = metric->depth; + + metric->ic = 0; + metric->left_ic = 0; + metric->sk = 0; + height = 0; + depth = 0; + if (s[0] == '\0' || (s[1] != '\0' && s[2] != '\0')) + goto do_default; +#define HASH(c1, c2) (((unsigned char)(c1) << 8) | (unsigned char)(c2)) + switch (HASH(s[0], s[1])) { + default: + do_default: + if (metric->type & 01) + depth = param.desc_depth; + if (metric->type & 02) + height = param.asc_height; + else + height = param.x_height; + break; + case HASH('\\', '|'): + case HASH('\\', '^'): + case HASH('\\', '&'): + // these have zero height and depth + break; + case HASH('f', 0): + height = param.asc_height; + if (param.italic) + depth = param.desc_depth; + break; + case HASH('a', 0): + case HASH('c', 0): + case HASH('e', 0): + case HASH('m', 0): + case HASH('n', 0): + case HASH('o', 0): + case HASH('r', 0): + case HASH('s', 0): + case HASH('u', 0): + case HASH('v', 0): + case HASH('w', 0): + case HASH('x', 0): + case HASH('z', 0): + height = param.x_height; + break; + case HASH('i', 0): + height = param.x_height; + break; + case HASH('b', 0): + case HASH('d', 0): + case HASH('h', 0): + case HASH('k', 0): + case HASH('l', 0): + case HASH('F', 'i'): + case HASH('F', 'l'): + case HASH('f', 'f'): + case HASH('f', 'i'): + case HASH('f', 'l'): + height = param.asc_height; + break; + case HASH('t', 0): + height = param.asc_height; + break; + case HASH('g', 0): + case HASH('p', 0): + case HASH('q', 0): + case HASH('y', 0): + height = param.x_height; + depth = param.desc_depth; + break; + case HASH('j', 0): + height = param.x_height; + depth = param.desc_depth; + break; + case HASH('A', 0): + case HASH('B', 0): + case HASH('C', 0): + case HASH('D', 0): + case HASH('E', 0): + case HASH('F', 0): + case HASH('G', 0): + case HASH('H', 0): + case HASH('I', 0): + case HASH('J', 0): + case HASH('K', 0): + case HASH('L', 0): + case HASH('M', 0): + case HASH('N', 0): + case HASH('O', 0): + case HASH('P', 0): + case HASH('Q', 0): + case HASH('R', 0): + case HASH('S', 0): + case HASH('T', 0): + case HASH('U', 0): + case HASH('V', 0): + case HASH('W', 0): + case HASH('X', 0): + case HASH('Y', 0): + case HASH('Z', 0): + height = param.cap_height; + break; + case HASH('*', 'A'): + case HASH('*', 'B'): + case HASH('*', 'C'): + case HASH('*', 'D'): + case HASH('*', 'E'): + case HASH('*', 'F'): + case HASH('*', 'G'): + case HASH('*', 'H'): + case HASH('*', 'I'): + case HASH('*', 'K'): + case HASH('*', 'L'): + case HASH('*', 'M'): + case HASH('*', 'N'): + case HASH('*', 'O'): + case HASH('*', 'P'): + case HASH('*', 'Q'): + case HASH('*', 'R'): + case HASH('*', 'S'): + case HASH('*', 'T'): + case HASH('*', 'U'): + case HASH('*', 'W'): + case HASH('*', 'X'): + case HASH('*', 'Y'): + case HASH('*', 'Z'): + height = param.cap_height; + break; + case HASH('0', 0): + case HASH('1', 0): + case HASH('2', 0): + case HASH('3', 0): + case HASH('4', 0): + case HASH('5', 0): + case HASH('6', 0): + case HASH('7', 0): + case HASH('8', 0): + case HASH('9', 0): + case HASH('1', '2'): + case HASH('1', '4'): + case HASH('3', '4'): + height = param.fig_height; + break; + case HASH('(', 0): + case HASH(')', 0): + case HASH('[', 0): + case HASH(']', 0): + case HASH('{', 0): + case HASH('}', 0): + height = param.body_height; + depth = param.body_depth; + break; + case HASH('i', 's'): + height = (param.em*3)/4; + depth = param.em/4; + break; + case HASH('*', 'a'): + case HASH('*', 'e'): + case HASH('*', 'i'): + case HASH('*', 'k'): + case HASH('*', 'n'): + case HASH('*', 'o'): + case HASH('*', 'p'): + case HASH('*', 's'): + case HASH('*', 't'): + case HASH('*', 'u'): + case HASH('*', 'w'): + height = param.x_height; + break; + case HASH('*', 'd'): + case HASH('*', 'l'): + height = param.asc_height; + break; + case HASH('*', 'g'): + case HASH('*', 'h'): + case HASH('*', 'm'): + case HASH('*', 'r'): + case HASH('*', 'x'): + case HASH('*', 'y'): + height = param.x_height; + depth = param.desc_depth; + break; + case HASH('*', 'b'): + case HASH('*', 'c'): + case HASH('*', 'f'): + case HASH('*', 'q'): + case HASH('*', 'z'): + height = param.asc_height; + depth = param.desc_depth; + break; + case HASH('t', 's'): + height = param.x_height; + depth = param.desc_depth; + break; + case HASH('!', 0): + case HASH('?', 0): + case HASH('"', 0): + case HASH('#', 0): + case HASH('$', 0): + case HASH('%', 0): + case HASH('&', 0): + case HASH('*', 0): + case HASH('+', 0): + height = param.asc_height; + break; + case HASH('`', 0): + case HASH('\'', 0): + height = param.asc_height; + break; + case HASH('~', 0): + case HASH('^', 0): + case HASH('a', 'a'): + case HASH('g', 'a'): + height = param.asc_height; + break; + case HASH('r', 'u'): + case HASH('.', 0): + break; + case HASH(',', 0): + depth = param.comma_depth; + break; + case HASH('m', 'i'): + case HASH('-', 0): + case HASH('h', 'y'): + case HASH('e', 'm'): + height = param.x_height; + break; + case HASH(':', 0): + height = param.x_height; + break; + case HASH(';', 0): + height = param.x_height; + depth = param.comma_depth; + break; + case HASH('=', 0): + case HASH('e', 'q'): + height = param.x_height; + break; + case HASH('<', 0): + case HASH('>', 0): + case HASH('>', '='): + case HASH('<', '='): + case HASH('@', 0): + case HASH('/', 0): + case HASH('|', 0): + case HASH('\\', 0): + height = param.asc_height; + break; + case HASH('_', 0): + case HASH('u', 'l'): + case HASH('\\', '_'): + depth = param.em/4; + break; + case HASH('r', 'n'): + height = (param.em*3)/4; + break; + case HASH('s', 'r'): + height = (param.em*3)/4; + depth = param.em/4; + break; + case HASH('b', 'u'): + case HASH('s', 'q'): + case HASH('d', 'e'): + case HASH('d', 'g'): + case HASH('f', 'm'): + case HASH('c', 't'): + case HASH('r', 'g'): + case HASH('c', 'o'): + case HASH('p', 'l'): + case HASH('*', '*'): + case HASH('s', 'c'): + case HASH('s', 'l'): + case HASH('=', '='): + case HASH('~', '='): + case HASH('a', 'p'): + case HASH('!', '='): + case HASH('-', '>'): + case HASH('<', '-'): + case HASH('u', 'a'): + case HASH('d', 'a'): + case HASH('m', 'u'): + case HASH('d', 'i'): + case HASH('+', '-'): + case HASH('c', 'u'): + case HASH('c', 'a'): + case HASH('s', 'b'): + case HASH('s', 'p'): + case HASH('i', 'b'): + case HASH('i', 'p'): + case HASH('i', 'f'): + case HASH('p', 'd'): + case HASH('g', 'r'): + case HASH('n', 'o'): + case HASH('p', 't'): + case HASH('e', 's'): + case HASH('m', 'o'): + case HASH('b', 'r'): + case HASH('d', 'd'): + case HASH('r', 'h'): + case HASH('l', 'h'): + case HASH('o', 'r'): + case HASH('c', 'i'): + height = param.asc_height; + break; + case HASH('l', 't'): + case HASH('l', 'b'): + case HASH('r', 't'): + case HASH('r', 'b'): + case HASH('l', 'k'): + case HASH('r', 'k'): + case HASH('b', 'v'): + case HASH('l', 'f'): + case HASH('r', 'f'): + case HASH('l', 'c'): + case HASH('r', 'c'): + height = (param.em*3)/4; + depth = param.em/4; + break; +#if 0 + case HASH('%', '0'): + case HASH('-', '+'): + case HASH('-', 'D'): + case HASH('-', 'd'): + case HASH('-', 'd'): + case HASH('-', 'h'): + case HASH('.', 'i'): + case HASH('.', 'j'): + case HASH('/', 'L'): + case HASH('/', 'O'): + case HASH('/', 'l'): + case HASH('/', 'o'): + case HASH('=', '~'): + case HASH('A', 'E'): + case HASH('A', 'h'): + case HASH('A', 'N'): + case HASH('C', 's'): + case HASH('D', 'o'): + case HASH('F', 'c'): + case HASH('F', 'o'): + case HASH('I', 'J'): + case HASH('I', 'm'): + case HASH('O', 'E'): + case HASH('O', 'f'): + case HASH('O', 'K'): + case HASH('O', 'm'): + case HASH('O', 'R'): + case HASH('P', 'o'): + case HASH('R', 'e'): + case HASH('S', '1'): + case HASH('S', '2'): + case HASH('S', '3'): + case HASH('T', 'P'): + case HASH('T', 'p'): + case HASH('Y', 'e'): + case HASH('\\', '-'): + case HASH('a', '"'): + case HASH('a', '-'): + case HASH('a', '.'): + case HASH('a', '^'): + case HASH('a', 'b'): + case HASH('a', 'c'): + case HASH('a', 'd'): + case HASH('a', 'e'): + case HASH('a', 'h'): + case HASH('a', 'o'): + case HASH('a', 't'): + case HASH('a', '~'): + case HASH('b', 'a'): + case HASH('b', 'b'): + case HASH('b', 's'): + case HASH('c', '*'): + case HASH('c', '+'): + case HASH('f', '/'): + case HASH('f', 'a'): + case HASH('f', 'c'): + case HASH('f', 'o'): + case HASH('h', 'a'): + case HASH('h', 'o'): + case HASH('i', 'j'): + case HASH('l', 'A'): + case HASH('l', 'B'): + case HASH('l', 'C'): + case HASH('m', 'd'): + case HASH('n', 'c'): + case HASH('n', 'e'): + case HASH('n', 'm'): + case HASH('o', 'A'): + case HASH('o', 'a'): + case HASH('o', 'e'): + case HASH('o', 'q'): + case HASH('p', 'l'): + case HASH('p', 'p'): + case HASH('p', 's'): + case HASH('r', '!'): + case HASH('r', '?'): + case HASH('r', 'A'): + case HASH('r', 'B'): + case HASH('r', 'C'): + case HASH('r', 's'): + case HASH('s', 'h'): + case HASH('s', 's'): + case HASH('t', 'e'): + case HASH('t', 'f'): + case HASH('t', 'i'): + case HASH('t', 'm'): + case HASH('~', '~'): + case HASH('v', 'S'): + case HASH('v', 'Z'): + case HASH('v', 's'): + case HASH('v', 'z'): + case HASH('^', 'A'): + case HASH('^', 'E'): + case HASH('^', 'I'): + case HASH('^', 'O'): + case HASH('^', 'U'): + case HASH('^', 'a'): + case HASH('^', 'e'): + case HASH('^', 'i'): + case HASH('^', 'o'): + case HASH('^', 'u'): + case HASH('`', 'A'): + case HASH('`', 'E'): + case HASH('`', 'I'): + case HASH('`', 'O'): + case HASH('`', 'U'): + case HASH('`', 'a'): + case HASH('`', 'e'): + case HASH('`', 'i'): + case HASH('`', 'o'): + case HASH('`', 'u'): + case HASH('~', 'A'): + case HASH('~', 'N'): + case HASH('~', 'O'): + case HASH('~', 'a'): + case HASH('~', 'n'): + case HASH('~', 'o'): + case HASH('\'', 'A'): + case HASH('\'', 'C'): + case HASH('\'', 'E'): + case HASH('\'', 'I'): + case HASH('\'', 'O'): + case HASH('\'', 'U'): + case HASH('\'', 'a'): + case HASH('\'', 'c'): + case HASH('\'', 'e'): + case HASH('\'', 'i'): + case HASH('\'', 'o'): + case HASH('\'', 'u') + case HASH(':', 'A'): + case HASH(':', 'E'): + case HASH(':', 'I'): + case HASH(':', 'O'): + case HASH(':', 'U'): + case HASH(':', 'Y'): + case HASH(':', 'a'): + case HASH(':', 'e'): + case HASH(':', 'i'): + case HASH(':', 'o'): + case HASH(':', 'u'): + case HASH(':', 'y'): + case HASH(',', 'C'): + case HASH(',', 'c'): +#endif + } +} diff --git a/src/utils/addftinfo/guess.h b/src/utils/addftinfo/guess.h new file mode 100644 index 0000000..d763fe0 --- /dev/null +++ b/src/utils/addftinfo/guess.h @@ -0,0 +1,43 @@ +// -*- C++ -*- +/* Copyright (C) 1989-2020 Free Software Foundation, Inc. + Written by James Clark (jjc@jclark.com) + +This file is part of groff. + +groff is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation, either version 3 of the License, or +(at your option) any later version. + +groff is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. */ + +struct font_params { + int italic; + int em; + int x_height; + int fig_height; + int cap_height; + int asc_height; + int body_height; + int comma_depth; + int desc_depth; + int body_depth; +}; + +struct char_metric { + int width; + int type; + int height; + int depth; + int ic; + int left_ic; + int sk; +}; + +void guess(const char *s, const font_params ¶m, char_metric *metric); diff --git a/src/utils/afmtodit/afmtodit.1.man b/src/utils/afmtodit/afmtodit.1.man new file mode 100644 index 0000000..7b0a39f --- /dev/null +++ b/src/utils/afmtodit/afmtodit.1.man @@ -0,0 +1,635 @@ +.TH afmtodit @MAN1EXT@ "@MDATE@" "groff @VERSION@" +.SH Name +afmtodit \- adapt Adobe Font Metrics files for +.I groff +PostScript and PDF output +. +. +.\" ==================================================================== +.\" Legal Terms +.\" ==================================================================== +.\" +.\" Copyright (C) 1989-2020 Free Software Foundation, Inc. +.\" +.\" Permission is granted to make and distribute verbatim copies of this +.\" manual provided the copyright notice and this permission notice are +.\" preserved on all copies. +.\" +.\" Permission is granted to copy and distribute modified versions of +.\" this manual under the conditions for verbatim copying, provided that +.\" the entire resulting derived work is distributed under the terms of +.\" a permission notice identical to this one. +.\" +.\" Permission is granted to copy and distribute translations of this +.\" manual into another language, under the above conditions for +.\" modified versions, except that this permission notice may be +.\" included in translations approved by the Free Software Foundation +.\" instead of in the original English. +. +. +.\" Save and disable compatibility mode (for, e.g., Solaris 10/11). +.do nr *groff_afmtodit_1_man_C \n[.cp] +.cp 0 +. +.\" Define fallback for groff 1.23's MR macro if the system lacks it. +.nr do-fallback 0 +.if !\n(.f .nr do-fallback 1 \" mandoc +.if \n(.g .if !d MR .nr do-fallback 1 \" older groff +.if !\n(.g .nr do-fallback 1 \" non-groff *roff +.if \n[do-fallback] \{\ +. de MR +. ie \\n(.$=1 \ +. I \%\\$1 +. el \ +. IR \%\\$1 (\\$2)\\$3 +. . +.\} +.rr do-fallback +. +. +.\" ==================================================================== +.SH Synopsis +.\" ==================================================================== +. +.SY afmtodit +.RB [ \-ckmnsx ] +.RB [ \-a\~\c +.IR slant ] +.RB [ \-d\~\c +.IR device-description-file ] +.RB [ \-e\~\c +.IR encoding-file ] +.RB [ \-f\~\c +.IR internal-name ] +.RB [ \-i\~\c +.IR italic-correction-factor ] +.RB [ \-o\~\c +.IR output-file ] +.RB [ \-w\~\c +.IR space-width ] +.I afm-file +.I map-file +.I font-description-file +.YS +. +. +.SY afmtodit +.B \-\-help +.YS +. +. +.SY afmtodit +.B \-v +. +.SY afmtodit +.B \-\-version +.YS +. +. +.\" ==================================================================== +.SH Description +.\" ==================================================================== +. +.I \%afmtodit +adapts an +Adobe Font Metric +file, +.IR afm-file , +for use with the +.B ps +and +.B pdf +output devices of +.MR @g@troff @MAN1EXT@ . +. +.I map-file +associates a +.I groff +ordinary or special character name with a PostScript glyph name. +. +Output is written in +.MR groff_font @MAN5EXT@ +format to +.I font-description-file, +a file named for the intended +.I groff +font name +(but see the +.B \-o +option). +. +. +.LP +.I map-file +should contain a sequence of lines of the form +. +.RS +.EX +.I ps-glyph groff-char +.EE +.RE +. +where +.I ps-glyph +is the PostScript glyph name and +.I groff-char +is a +.I groff +ordinary +(if of unit length) +or special +(if longer) +character identifier. +. +The same +.I ps-glyph +can occur multiple times in the file; +each +.I groff-char +must occur at most once. +. +Lines starting with \[lq]#\[rq] and blank lines are ignored. +. +If the file isn't found in the current directory, +it is sought in the +.I devps/generate +subdirectory of the default font directory. +. +. +.LP +If a PostScript glyph is not mentioned in +.IR map-file , +and a +.I groff +character name can't be deduced using the Adobe Glyph List +(AGL, +built into +.IR afmtodit ), +then +.I \%afmtodit +puts the PostScript glyph into the +.I groff +font description file as an unnamed glyph which can only be accessed +by the \[lq]\eN\[rq] escape sequence in a +.I roff +document. +. +In particular, +this is true for glyph variants named in the form +.RI \[lq] foo . bar \[rq]; +all glyph names containing one or more periods are mapped to unnamed +entities. +. +Unless +.B \-e +is specified, +the encoding defined in the AFM file +(i.e., +entries +with non-negative codes) +is used. +. +Refer to section \[lq]Using Symbols\[rq] in +.IR "Groff: The GNU Implementation of troff" , +the +.I groff +Texinfo manual, +or +.MR groff_char @MAN7EXT@ , +which describe how +.I groff +character identifiers are constructed. +. +. +.LP +Glyphs not encoded in the AFM file +(i.e., +entries indexed as \[lq]\-1\[rq]) +are still available in +.IR groff ; +they get glyph index values greater than 255 +(or greater than the biggest code used in the AFM file in the unlikely +case that it is greater than 255) +in the +.I groff +font description file. +. +Unencoded glyph indices don't have a specific order; +it is best to access them only via special character identifiers. +. +. +.P +If the font file proper +(not just its metrics) +is available, +listing it in the files +.I @FONTDIR@/\:\%devps/\:\%download +and +.I @FONTDIR@/\:\%devpdf/\:\%download +enables it to be embedded in the output produced by +.MR grops @MAN1EXT@ +and +.MR gropdf @MAN1EXT@ , +respectively. +. +. +.P +If the +.B \-i +option is used, +.I \%afmtodit +automatically generates an italic correction, +a left italic correction, +and a subscript correction for each glyph +(the significance of these is explained in +.MR groff_font @MAN5EXT@ ); +they can be specified for individual glyphs by +adding to the +.I afm-file +lines of the form: +. +.RS +.EX +.RI italicCorrection \~ps-glyph\~n +.RI leftItalicCorrection \~ps-glyph\~n +.RI subscriptCorrection \~ps-glyph\~n +.EE +.RE +. +where +.I ps-glyph +is the PostScript glyph name, +and +.I n +is the desired value of the corresponding parameter in thousandths of an +em. +. +Such parameters are normally needed only for italic +(or oblique) +fonts. +. +. +.P +The +.B \-s +option should be given if the font is \[lq]special\[rq], +meaning that +.I groff +should search it whenever a glyph is not found in the current font. +. +In that case, +.I font-description-file +should be listed as an argument to the +.B fonts +directive in the output device's +.I DESC +file; +if it is not special, +there is no need to do so, +since +.MR @g@troff @MAN1EXT@ +will automatically mount it when it is first used. +. +. +.br +.ne 7v +.\" ==================================================================== +.SH Options +.\" ==================================================================== +. +.B \-\-help +displays a usage message, +while +.B \-v +and +.B \%\-\-version +show version information; +all exit afterward. +. +. +.TP +.BI \-a\~ slant +Use +.I slant +as the slant (\[lq]angle\[rq]) parameter in the font description file; +this is used by +.I groff +in the positioning of accents. +. +By default +.I \%afmtodit +uses the negative of the +.B \%ItalicAngle +specified in the AFM file; +with true italic fonts it is sometimes desirable to use a slant that is +less than this. +. +If you find that an italic font places accents over base glyphs +too far to the right, +use +.B \-a +to give it a smaller slant. +. +. +.TP +.B \-c +Include comments in the font description file identifying the PostScript +font. +. +. +.TP +.BI \-d\~ device-description-file +The device description file is +.I desc-file +rather than the default +.IR DESC . +. +If not found in the current directory, +the +.I devps +subdirectory of the default font directory is searched +(this is true for both the default device description file and a file +given with option +.BR \-d ). +. +. +.TP +.BI \-e\~ encoding-file +The PostScript font should be reencoded to use the encoding described +in +.IR enc-file . +. +The format of +.I enc-file +is described in +.MR grops @MAN1EXT@ . +. +If not found in the current directory, +the +.I devps +subdirectory of the default font directory is searched. +. +. +.TP +.BI \-f\~ internal-name +The internal name of the +.I groff +font is set to +.IR name . +. +. +.TP +.BI \-i\~ italic-correction-factor +Generate an italic correction for each glyph so that its width plus its +italic correction is equal to +.I italic-correction-factor +thousandths of an em +plus the amount by which the right edge of the glyph's bounding box is +to the right of its origin. +. +If this would result in a negative italic correction, +use a zero italic correction instead. +. +. +.IP +Also generate a subscript correction equal to the +product of the tangent of the slant of the font and +four fifths of the x-height of the font. +. +If this would result in a subscript correction greater than the italic +correction, +use a subscript correction equal to the italic correction instead. +. +. +.IP +Also generate a left italic correction for each glyph equal to +.I italic-correction-factor +thousandths of an em +plus the amount by which the left edge of the glyph's bounding box is to +the left of its origin. +. +The left italic correction may be negative unless option +.B \-m +is given. +. +. +.IP +This option is normally needed only with italic +(or oblique) +fonts. +. +The font description files distributed with +.I groff +were created using an option of +.B \-i50 +for italic fonts. +. +. +.TP +.BI \-o\~ output-file +Write to +.I output-file +instead of +.I font-description-file. +. +. +.TP +.B \-k +Omit any kerning data from the +.I groff +font; +use only for monospaced (constant-width) fonts. +. +. +.TP +.B \-m +Prevent negative left italic correction values. +. +Font description files for roman styles distributed with +.I groff +were created with +.RB \[lq] \-i0\~\-m \[rq] +to improve spacing with +.MR @g@eqn @MAN1EXT@ . +. +. +.TP +.B \-n +Don't output a +.B ligatures +command for this font; +use with monospaced (constant-width) fonts. +. +. +.TP +.B \-s +Add the +.B special +directive to the font description file. +. +. +.TP +.BI \-w\~ space-width +Use +.I space-width +as the with of inter-word spaces. +. +. +.TP +.B \-x +Don't use the built-in Adobe Glyph List. +. +. +.\" ==================================================================== +.SH Files +.\" ==================================================================== +. +.TP +.I @FONTDIR@/\:\%devps/\:DESC +describes the +.B ps +output device. +. +. +.TP +.IR @FONTDIR@/\:\%devps/ F +describes the font known +.RI as\~ F +on device +.BR ps . +. +. +.TP +.I @FONTDIR@/\:\%devps/\:\%download +lists fonts available for embedding within the PostScript document +(or download to the device). +. +. +.TP +.I @FONTDIR@/\:\%devps/\:\%generate/\:\%dingbats.map +.TQ +.I @FONTDIR@/\:\%devps/\:\%generate/\:\%dingbats\-reversed.map +.TQ +.I @FONTDIR@/\:\%devps/\:\%generate/\:\%slanted\-symbol.map +.TQ +.I @FONTDIR@/\:\%devps/\:\%generate/\:\%symbol.map +.TQ +.I @FONTDIR@/\:\%devps/\:\%generate/\:\%text.map +map names in the Adobe Glyph List to +.I groff +special character identifiers for Zapf Dingbats +.RB ( ZD ), +reversed Zapf Dingbats +.RB ( ZDR ), +slanted symbol +.RB ( SS ), +symbol +.RB ( S ), +and text fonts, +respectively. +. +These +.IR map-file s +are used to produce the font description files provided with +.I groff +for the +.I \%grops +output driver. +. +. +.\" ==================================================================== +.SH Diagnostics +.\" ==================================================================== +. +.TP +.RI "AGL name \[aq]" x "\[aq] already mapped to groff name \[aq]" y\c +.RI "\[aq]; ignoring AGL name \[aq]uni" XXXX \[aq] +You can disregard these if they're in the form shown, +where the ignored AGL name contains four hexadecimal digits +.IR XXXX . +. +The Adobe Glyph List (AGL) has its own names for glyphs; +they are often +different from +.IR groff 's +special character names. +. +.I \%afmtodit +is constructing a mapping from +.I groff +special character names to AGL names; +this can be a one-to-one or many-to-one mapping, +but one-to-many will not work, +so +.I \%afmtodit +discards the excess mappings. +. +For example, +if +.I x +is +.BR *D , +.I y +is +.BR \%Delta , +and +.I z +is +.BR uni0394 , +.I \%afmtodit +is telling you that the +.I groff +font description that it is writing cannot map the +.I groff +special character +.B \[rs][*D] +to AGL glyphs +.B \%Delta +and +.B uni0394 +at the same time. +. +. +.IP +If you get a message like this but are unhappy with which mapping is +ignored, +a remedy is to craft an alternative +.I map-file +and re-run +.I \%afmtodit +using it. +. +. +.\" ==================================================================== +.SH "See also" +.\" ==================================================================== +. +.IR "Groff: The GNU Implementation of troff" , +by Trent A.\& Fisher and Werner Lemberg, +is the primary +.I groff +manual. +. +Section \[lq]Using Symbols\[rq] may be of particular note. +. +You can browse it interactively with \[lq]info \[aq](groff)Using +\%Symbols\[aq]\[rq]. +. +. +.LP +.MR groff @MAN1EXT@ , +.MR gropdf @MAN1EXT@ , +.MR grops @MAN1EXT@ , +.MR groff_font @MAN5EXT@ +. +. +.\" Restore compatibility mode (for, e.g., Solaris 10/11). +.cp \n[*groff_afmtodit_1_man_C] +.do rr *groff_afmtodit_1_man_C +. +. +.\" Local Variables: +.\" fill-column: 72 +.\" mode: nroff +.\" End: +.\" vim: set filetype=groff textwidth=72: diff --git a/src/utils/afmtodit/afmtodit.am b/src/utils/afmtodit/afmtodit.am new file mode 100644 index 0000000..fda095d --- /dev/null +++ b/src/utils/afmtodit/afmtodit.am @@ -0,0 +1,56 @@ +# Automake rules for 'src utils afmtodit' +# +# Copyright (C) 2013-2020 Free Software Foundation, Inc. +# +# This file is part of groff. +# +# groff is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# groff is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +# for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# +######################################################################## + +afmtodit_srcdir = $(top_srcdir)/src/utils/afmtodit + +bin_SCRIPTS += afmtodit +man1_MANS += src/utils/afmtodit/afmtodit.1 +EXTRA_DIST += \ + src/utils/afmtodit/afmtodit.1.man \ + src/utils/afmtodit/afmtodit.pl \ + src/utils/afmtodit/afmtodit.tables \ + src/utils/afmtodit/make-afmtodit-tables + +afmtodit: $(afmtodit_srcdir)/afmtodit.pl $(afmtodit_srcdir)/afmtodit.tables + $(AM_V_GEN)if test -n "$(PERL)"; then \ + sed -e "s|[@]PERL[@]|$(PERL)|" \ + -e "s|[@]VERSION[@]|$(VERSION)|" \ + -e "s|[@]FONTDIR[@]|$(fontdir)|" \ + -e "/[@]afmtodit.tables[@]/ r $(afmtodit_srcdir)/afmtodit.tables" \ + -e "/[@]afmtodit.tables[@]/ d" \ + $(afmtodit_srcdir)/afmtodit.pl \ + >afmtodit; \ + else \ + sed -e "s|[@]VERSION[@]|$(VERSION)|" \ + -e "s|[@]FONTDIR[@]|$(fontdir)|" \ + -e "/[@]afmtodit.tables[@]/ r $(afmtodit_srcdir)/afmtodit.tables" \ + -e "/[@]afmtodit.tables[@]/ d" \ + $(afmtodit_srcdir)/afmtodit.pl \ + >afmtodit; \ + fi \ + && chmod +x afmtodit + + +# Local Variables: +# mode: makefile-automake +# fill-column: 72 +# End: +# vim: set autoindent filetype=automake textwidth=72: diff --git a/src/utils/afmtodit/afmtodit.pl b/src/utils/afmtodit/afmtodit.pl new file mode 100644 index 0000000..c6b67cc --- /dev/null +++ b/src/utils/afmtodit/afmtodit.pl @@ -0,0 +1,645 @@ +#!@PERL@ +# Copyright (C) 1989-2020 Free Software Foundation, Inc. +# Written by James Clark (jjc@jclark.com) +# +# This file is part of groff. +# +# groff is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# groff is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +# for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +use warnings; +use strict; + +@afmtodit.tables@ + +my $prog = $0; +my $groff_sys_fontdir = "@FONTDIR@"; +my $want_help; +my $space_width = 0; + +our ($opt_a, $opt_c, $opt_d, $opt_e, $opt_f, $opt_i, $opt_k, + $opt_m, $opt_n, $opt_o, $opt_s, $opt_v, $opt_x); + +use Getopt::Long qw(:config gnu_getopt); +GetOptions( "a=s", "c", "d=s", "e=s", "f=s", "i=s", "k", "m", "n", + "o=s", "s", "v", "w=i" => \$space_width, "x", "version" => \$opt_v, + "help" => \$want_help +); + +my $afmtodit_version = "GNU afmtodit (groff) version @VERSION@"; + +if ($opt_v) { + print "$afmtodit_version\n"; + exit 0; +} + +sub croak { + my $msg = shift; + print STDERR "$prog: error: $msg"; + exit(1); +} + +sub usage { + my $stream = *STDOUT; + my $had_error = shift; + $stream = *STDERR if $had_error; + print $stream "usage: $prog [-ckmnsx] [-a slant]" . + " [-d device-description-file] [-e encoding-file]" . + " [-f internal-name] [-i italic-correction-factor]" . + " [-o output-file] [-w space-width] afm-file map-file" . + " font-description-file\n" . + "usage: $prog {-v | --version}\n" . + "usage: $prog --help\n"; + unless ($had_error) { + print $stream "\n" . +"Adapt an Adobe Font Metric file, afm-file, for use with the 'ps'\n" . +"and 'pdf' output devices of groff(1). See the afmtodit(1) manual " . +"page.\n"; + } + my $status = 0; + $status = 2 if ($had_error); + exit($status); +} + +&usage(0) if ($want_help); + +if ($#ARGV != 2) { + print STDERR "$prog: usage error: insufficient arguments\n"; + &usage(1); +} + +my $afm = $ARGV[0]; +my $map = $ARGV[1]; +my $fontfile = $ARGV[2]; +my $outfile = $opt_o || $fontfile; +my $desc = $opt_d || "DESC"; +my $sys_map = $groff_sys_fontdir . "/devps/generate/" . $map; +my $sys_desc = $groff_sys_fontdir . "/devps/" . $desc; + +# read the afm file + +my $psname; +my ($notice, $version, $fullname, $familyname, @comments); +my $italic_angle = 0; +my (@kern1, @kern2, @kernx); +my (%italic_correction, %left_italic_correction); +my %subscript_correction; +# my %ligs +my %ligatures; +my (@encoding, %in_encoding); +my (%width, %height, %depth); +my (%left_side_bearing, %right_side_bearing); + +open(AFM, $afm) || croak("unable to open '$ARGV[0]': $!\n"); + +while (<AFM>) { + chomp; + s/\x0D$//; + my @field = split(' '); + next if $#field < 0; + if ($field[0] eq "FontName") { + $psname = $field[1]; + if($opt_f) { + $psname = $opt_f; + } + } + elsif($field[0] eq "Notice") { + $notice = $_; + } + elsif($field[0] eq "Version") { + $version = $_; + } + elsif($field[0] eq "FullName") { + $fullname = $_; + } + elsif($field[0] eq "FamilyName") { + $familyname = $_; + } + elsif($field[0] eq "Comment") { + push(@comments, $_); + } + elsif($field[0] eq "ItalicAngle") { + $italic_angle = -$field[1]; + } + elsif ($field[0] eq "KPX") { + if ($#field == 3) { + push(@kern1, $field[1]); + push(@kern2, $field[2]); + push(@kernx, $field[3]); + } + } + elsif ($field[0] eq "italicCorrection") { + $italic_correction{$field[1]} = $field[2]; + } + elsif ($field[0] eq "leftItalicCorrection") { + $left_italic_correction{$field[1]} = $field[2]; + } + elsif ($field[0] eq "subscriptCorrection") { + $subscript_correction{$field[1]} = $field[2]; + } + elsif ($field[0] eq "StartCharMetrics") { + while (<AFM>) { + @field = split(' '); + next if $#field < 0; + last if ($field[0] eq "EndCharMetrics"); + if ($field[0] eq "C") { + my $w; + my $wx = 0; + my $n = ""; +# %ligs = (); + my $lly = 0; + my $ury = 0; + my $llx = 0; + my $urx = 0; + my $c = $field[1]; + my $i = 2; + while ($i <= $#field) { + if ($field[$i] eq "WX") { + $w = $field[$i + 1]; + $i += 2; + } + elsif ($field[$i] eq "N") { + $n = $field[$i + 1]; + $i += 2; + } + elsif ($field[$i] eq "B") { + $llx = $field[$i + 1]; + $lly = $field[$i + 2]; + $urx = $field[$i + 3]; + $ury = $field[$i + 4]; + $i += 5; + } +# elsif ($field[$i] eq "L") { +# $ligs{$field[$i + 2]} = $field[$i + 1]; +# $i += 3; +# } + else { + while ($i <= $#field && $field[$i] ne ";") { + $i++; + } + $i++; + } + } + if (!$opt_e && $c != -1) { + $encoding[$c] = $n; + $in_encoding{$n} = 1; + } + $width{$n} = $w; + $height{$n} = $ury; + $depth{$n} = -$lly; + $left_side_bearing{$n} = -$llx; + $right_side_bearing{$n} = $urx - $w; +# foreach my $lig (sort keys %ligs) { +# $ligatures{$lig} = $n . " " . $ligs{$lig}; +# } + } + } + } +} +close(AFM); + +# read the DESC file + +my ($sizescale, $resolution, $unitwidth); +$sizescale = 1; + +open(DESC, $desc) || open(DESC, $sys_desc) || + croak("unable to open '$desc' or '$sys_desc': $!\n"); +while (<DESC>) { + next if /^#/; + chop; + my @field = split(' '); + next if $#field < 0; + last if $field[0] eq "charset"; + if ($field[0] eq "res") { + $resolution = $field[1]; + } + elsif ($field[0] eq "unitwidth") { + $unitwidth = $field[1]; + } + elsif ($field[0] eq "sizescale") { + $sizescale = $field[1]; + } +} +close(DESC); + +if ($opt_e) { + # read the encoding file + + my $sys_opt_e = $groff_sys_fontdir . "/devps/" . $opt_e; + open(ENCODING, $opt_e) || open(ENCODING, $sys_opt_e) || + croak("unable to open '$opt_e' or '$sys_opt_e': $!\n"); + while (<ENCODING>) { + next if /^#/; + chop; + my @field = split(' '); + next if $#field < 0; + if ($#field == 1) { + if ($field[1] >= 0 && defined $width{$field[0]}) { + $encoding[$field[1]] = $field[0]; + $in_encoding{$field[0]} = 1; + } + } + } + close(ENCODING); +} + +# read the map file + +my (%nmap, %map); + +open(MAP, $map) || open(MAP, $sys_map) || + croak("unable to open '$map' or '$sys_map': $!\n"); +while (<MAP>) { + next if /^#/; + chop; + my @field = split(' '); + next if $#field < 0; + if ($#field == 1) { + if ($field[1] eq "space") { + # The PostScript character "space" is automatically mapped + # to the groff character "space"; this is for grops. + warn "$prog: you are not allowed to map to " . + "the groff character 'space'"; + } + elsif ($field[0] eq "space") { + warn "$prog: you are not allowed to map " . + "the PostScript character 'space'"; + } + else { + $nmap{$field[0]} += 0; + $map{$field[0], $nmap{$field[0]}} = $field[1]; + $nmap{$field[0]} += 1; + + # There is more than one way to make a PS glyph name; + # let us try Unicode names with both 'uni' and 'u' prefixes. + my $utmp = $AGL_to_unicode{$field[0]}; + if (defined $utmp && $utmp =~ /^[0-9A-F]{4}$/) { + foreach my $unicodepsname ("uni" . $utmp, "u" . $utmp) { + $nmap{$unicodepsname} += 0; + $map{$unicodepsname, $nmap{$unicodepsname}} = $field[1]; + $nmap{$unicodepsname} += 1; + } + } + } + } +} +close(MAP); + +$italic_angle = $opt_a if $opt_a; + + +if (!$opt_x) { + my %mapped; + my $i = ($#encoding > 256) ? ($#encoding + 1) : 256; + foreach my $ch (sort keys %width) { + # add unencoded characters + if (!$in_encoding{$ch}) { + $encoding[$i] = $ch; + $i++; + } + if ($nmap{$ch}) { + for (my $j = 0; $j < $nmap{$ch}; $j++) { + if (defined $mapped{$map{$ch, $j}}) { + print STDERR "$prog: AGL name" + . " '$mapped{$map{$ch, $j}}' already mapped to" + . " groff name '$map{$ch, $j}'; ignoring AGL" + . " name '$ch'\n"; + } + else { + $mapped{$map{$ch, $j}} = $ch; + } + } + } + else { + my $u = ""; # the resulting groff glyph name + my $ucomp = ""; # Unicode string before decomposition + my $utmp = ""; # temporary value + my $component = ""; + my $nv = 0; + + # Step 1: + # Drop all characters from the glyph name starting with the + # first occurrence of a period (U+002E FULL STOP), if any. + # ?? We avoid mapping of glyphs with periods, since they are + # likely to be variant glyphs, leading to a 'many ps glyphs -- + # one groff glyph' conflict. + # + # If multiple glyphs in the font represent the same character + # in the Unicode standard, as do 'A' and 'A.swash', for example, + # they can be differentiated by using the same base name with + # different suffixes. This suffix (the part of glyph name that + # follows the first period) does not participate in the + # computation of a character sequence. It can be used by font + # designers to indicate some characteristics of the glyph. The + # suffix may contain periods or any other permitted characters. + # Small cap A, for example, could be named 'uni0041.sc' or + # 'A.sc'. + + next if $ch =~ /\./; + + # Step 2: + # Split the remaining string into a sequence of components, + # using the underscore character (U+005F LOW LINE) as the + # delimiter. + + while ($ch =~ /([^_]+)/g) { + $component = $1; + + # Step 3: + # Map each component to a character string according to the + # procedure below: + # + # * If the component is in the Adobe Glyph List, then map + # it to the corresponding character in that list. + + $utmp = $AGL_to_unicode{$component}; + if ($utmp) { + $utmp = "U+" . $utmp; + } + + # * Otherwise, if the component is of the form 'uni' + # (U+0075 U+006E U+0069) followed by a sequence of + # uppercase hexadecimal digits (0 .. 9, A .. F, i.e., + # U+0030 .. U+0039, U+0041 .. U+0046), the length of + # that sequence is a multiple of four, and each group of + # four digits represents a number in the set {0x0000 .. + # 0xD7FF, 0xE000 .. 0xFFFF}, then interpret each such + # number as a Unicode scalar value and map the component + # to the string made of those scalar values. + + elsif ($component =~ /^uni([0-9A-F]{4})+$/) { + while ($component =~ /([0-9A-F]{4})/g) { + $nv = hex("0x" . $1); + if ($nv <= 0xD7FF || $nv >= 0xE000) { + $utmp .= "U+" . $1; + } + else { + $utmp = ""; + last; + } + } + } + + # * Otherwise, if the component is of the form 'u' (U+0075) + # followed by a sequence of four to six uppercase + # hexadecimal digits {0 .. 9, A .. F} (U+0030 .. U+0039, + # U+0041 .. U+0046), and those digits represent a number + # in {0x0000 .. 0xD7FF, 0xE000 .. 0x10FFFF}, then + # interpret this number as a Unicode scalar value and map + # the component to the string made of this scalar value. + + elsif ($component =~ /^u([0-9A-F]{4,6})$/) { + $nv = hex("0x" . $1); + if ($nv <= 0xD7FF || ($nv >= 0xE000 && $nv <= 0x10FFFF)) { + $utmp = "U+" . $1; + } + } + + # Finally, concatenate those strings; the result is the + # character string to which the glyph name is mapped. + + $ucomp .= $utmp if $utmp; + } + + # Unicode decomposition + while ($ucomp =~ /([0-9A-F]{4,6})/g) { + $component = $1; + $utmp = $unicode_decomposed{$component}; + $u .= "_" . ($utmp ? $utmp : $component); + } + $u =~ s/^_/u/; + if ($u) { + if (defined $mapped{$u}) { + warn "$prog: both $mapped{$u} and $ch map to $u"; + } + else { + $mapped{$u} = $ch; + } + $nmap{$ch} += 1; + $map{$ch, "0"} = $u; + } + } + } +} + +# Check explicitly for groff's standard ligatures -- many afm files don't +# have proper 'L' entries. + +my %default_ligatures = ( + "fi", "f i", + "fl", "f l", + "ff", "f f", + "ffi", "ff i", + "ffl", "ff l", +); + +foreach my $lig (sort keys %default_ligatures) { + if (defined $width{$lig} && !defined $ligatures{$lig}) { + $ligatures{$lig} = $default_ligatures{$lig}; + } +} + +# print it all out + +open(FONT, ">$outfile") || + croak("unable to open '$outfile' for writing: $!\n"); +select(FONT); + +print("# This file was generated with $afmtodit_version.\n"); +print("#\n"); +print("# $fullname\n") if defined $fullname; +print("# $version\n") if defined $version; +print("# $familyname\n") if defined $familyname; + +if ($opt_c) { + print("#\n"); + if (defined $notice || @comments) { + print("# The original AFM file contains the following comments:\n"); + print("#\n"); + print("# $notice\n") if defined $notice; + foreach my $comment (@comments) { + print("# $comment\n"); + } + } + else { + print("# The original AFM file contains no comments.\n"); + } +} + +print("\n"); + +my $name = $fontfile; +$name =~ s@.*/@@; + +my $sw = 0; +$sw = conv($width{"space"}) if defined $width{"space"}; +$sw = $space_width if ($space_width); + +print("name $name\n"); +print("internalname $psname\n") if $psname; +print("special\n") if $opt_s; +printf("slant %g\n", $italic_angle) if $italic_angle != 0; +printf("spacewidth %d\n", $sw) if $sw; + +if ($opt_e) { + my $e = $opt_e; + $e =~ s@.*/@@; + print("encoding $e\n"); +} + +if (!$opt_n && %ligatures) { + print("ligatures"); + foreach my $lig (sort keys %ligatures) { + print(" $lig"); + } + print(" 0\n"); +} + +if (!$opt_k && $#kern1 >= 0) { + print("\n"); + print("kernpairs\n"); + + for (my $i = 0; $i <= $#kern1; $i++) { + my $c1 = $kern1[$i]; + my $c2 = $kern2[$i]; + if (defined $nmap{$c1} && $nmap{$c1} != 0 + && defined $nmap{$c2} && $nmap{$c2} != 0) { + for (my $j = 0; $j < $nmap{$c1}; $j++) { + for (my $k = 0; $k < $nmap{$c2}; $k++) { + if ($kernx[$i] != 0) { + printf("%s %s %d\n", + $map{$c1, $j}, + $map{$c2, $k}, + conv($kernx[$i])); + } + } + } + } + } +} + +my ($asc_boundary, $desc_boundary, $xheight, $slant); + +# characters not shorter than asc_boundary are considered to have ascenders + +$asc_boundary = 0; +$asc_boundary = $height{"t"} if defined $height{"t"}; +$asc_boundary -= 1; + +# likewise for descenders + +$desc_boundary = 0; +$desc_boundary = $depth{"g"} if defined $depth{"g"}; +$desc_boundary = $depth{"j"} if defined $depth{"g"} && $depth{"j"} < $desc_boundary; +$desc_boundary = $depth{"p"} if defined $depth{"p"} && $depth{"p"} < $desc_boundary; +$desc_boundary = $depth{"q"} if defined $depth{"q"} && $depth{"q"} < $desc_boundary; +$desc_boundary = $depth{"y"} if defined $depth{"y"} && $depth{"y"} < $desc_boundary; +$desc_boundary -= 1; + +if (defined $height{"x"}) { + $xheight = $height{"x"}; +} +elsif (defined $height{"alpha"}) { + $xheight = $height{"alpha"}; +} +else { + $xheight = 450; +} + +$italic_angle = $italic_angle*3.14159265358979323846/180.0; +$slant = sin($italic_angle)/cos($italic_angle); +$slant = 0 if $slant < 0; + +print("\n"); +print("charset\n"); +for (my $i = 0; $i <= $#encoding; $i++) { + my $ch = $encoding[$i]; + if (defined $ch && $ch ne "" && $ch ne "space") { + $map{$ch, "0"} = "---" if !defined $nmap{$ch} || $nmap{$ch} == 0; + my $type = 0; + my $h = $height{$ch}; + $h = 0 if $h < 0; + my $d = $depth{$ch}; + $d = 0 if $d < 0; + $type = 1 if $d >= $desc_boundary; + $type += 2 if $h >= $asc_boundary; + printf("%s\t%d", $map{$ch, "0"}, conv($width{$ch})); + my $italic_correction = 0; + my $left_math_fit = 0; + my $subscript_correction = 0; + if (defined $opt_i) { + $italic_correction = $right_side_bearing{$ch} + $opt_i; + $italic_correction = 0 if $italic_correction < 0; + $subscript_correction = $slant * $xheight * .8; + $subscript_correction = $italic_correction if + $subscript_correction > $italic_correction; + $left_math_fit = $left_side_bearing{$ch} + $opt_i; + if (defined $opt_m) { + $left_math_fit = 0 if $left_math_fit < 0; + } + } + if (defined $italic_correction{$ch}) { + $italic_correction = $italic_correction{$ch}; + } + if (defined $left_italic_correction{$ch}) { + $left_math_fit = $left_italic_correction{$ch}; + } + if (defined $subscript_correction{$ch}) { + $subscript_correction = $subscript_correction{$ch}; + } + if ($subscript_correction != 0) { + printf(",%d,%d", conv($h), conv($d)); + printf(",%d,%d,%d", conv($italic_correction), + conv($left_math_fit), + conv($subscript_correction)); + } + elsif ($left_math_fit != 0) { + printf(",%d,%d", conv($h), conv($d)); + printf(",%d,%d", conv($italic_correction), + conv($left_math_fit)); + } + elsif ($italic_correction != 0) { + printf(",%d,%d", conv($h), conv($d)); + printf(",%d", conv($italic_correction)); + } + elsif ($d != 0) { + printf(",%d,%d", conv($h), conv($d)); + } + else { + # always put the height in to stop groff guessing + printf(",%d", conv($h)); + } + printf("\t%d", $type); + printf("\t%d\t%s\n", $i, $ch); + if (defined $nmap{$ch}) { + for (my $j = 1; $j < $nmap{$ch}; $j++) { + printf("%s\t\"\n", $map{$ch, $j}); + } + } + } + if (defined $ch && $ch eq "space" && defined $width{"space"}) { + printf("space\t%d\t0\t%d\tspace\n", conv($width{"space"}), $i); + } +} + +sub conv { + $_[0]*$unitwidth*$resolution/(72*1000*$sizescale) + + ($_[0] < 0 ? -.5 : .5); +} + +# Local Variables: +# fill-column: 72 +# mode: CPerl +# End: +# vim: set cindent noexpandtab shiftwidth=2 softtabstop=2 textwidth=72: diff --git a/src/utils/afmtodit/afmtodit.tables b/src/utils/afmtodit/afmtodit.tables new file mode 100644 index 0000000..16e3647 --- /dev/null +++ b/src/utils/afmtodit/afmtodit.tables @@ -0,0 +1,6163 @@ +# This table was algorithmically derived from the file 'UnicodeData.txt' +# for Unicode 15.0.0, available from unicode.org, +# on 2022-10-09. +my %unicode_decomposed = ( + "00C0", "0041_0300", + "00C1", "0041_0301", + "00C2", "0041_0302", + "00C3", "0041_0303", + "00C4", "0041_0308", + "00C5", "0041_030A", + "00C7", "0043_0327", + "00C8", "0045_0300", + "00C9", "0045_0301", + "00CA", "0045_0302", + "00CB", "0045_0308", + "00CC", "0049_0300", + "00CD", "0049_0301", + "00CE", "0049_0302", + "00CF", "0049_0308", + "00D1", "004E_0303", + "00D2", "004F_0300", + "00D3", "004F_0301", + "00D4", "004F_0302", + "00D5", "004F_0303", + "00D6", "004F_0308", + "00D9", "0055_0300", + "00DA", "0055_0301", + "00DB", "0055_0302", + "00DC", "0055_0308", + "00DD", "0059_0301", + "00E0", "0061_0300", + "00E1", "0061_0301", + "00E2", "0061_0302", + "00E3", "0061_0303", + "00E4", "0061_0308", + "00E5", "0061_030A", + "00E7", "0063_0327", + "00E8", "0065_0300", + "00E9", "0065_0301", + "00EA", "0065_0302", + "00EB", "0065_0308", + "00EC", "0069_0300", + "00ED", "0069_0301", + "00EE", "0069_0302", + "00EF", "0069_0308", + "00F1", "006E_0303", + "00F2", "006F_0300", + "00F3", "006F_0301", + "00F4", "006F_0302", + "00F5", "006F_0303", + "00F6", "006F_0308", + "00F9", "0075_0300", + "00FA", "0075_0301", + "00FB", "0075_0302", + "00FC", "0075_0308", + "00FD", "0079_0301", + "00FF", "0079_0308", + "0100", "0041_0304", + "0101", "0061_0304", + "0102", "0041_0306", + "0103", "0061_0306", + "0104", "0041_0328", + "0105", "0061_0328", + "0106", "0043_0301", + "0107", "0063_0301", + "0108", "0043_0302", + "0109", "0063_0302", + "010A", "0043_0307", + "010B", "0063_0307", + "010C", "0043_030C", + "010D", "0063_030C", + "010E", "0044_030C", + "010F", "0064_030C", + "0112", "0045_0304", + "0113", "0065_0304", + "0114", "0045_0306", + "0115", "0065_0306", + "0116", "0045_0307", + "0117", "0065_0307", + "0118", "0045_0328", + "0119", "0065_0328", + "011A", "0045_030C", + "011B", "0065_030C", + "011C", "0047_0302", + "011D", "0067_0302", + "011E", "0047_0306", + "011F", "0067_0306", + "0120", "0047_0307", + "0121", "0067_0307", + "0122", "0047_0327", + "0123", "0067_0327", + "0124", "0048_0302", + "0125", "0068_0302", + "0128", "0049_0303", + "0129", "0069_0303", + "012A", "0049_0304", + "012B", "0069_0304", + "012C", "0049_0306", + "012D", "0069_0306", + "012E", "0049_0328", + "012F", "0069_0328", + "0130", "0049_0307", + "0134", "004A_0302", + "0135", "006A_0302", + "0136", "004B_0327", + "0137", "006B_0327", + "0139", "004C_0301", + "013A", "006C_0301", + "013B", "004C_0327", + "013C", "006C_0327", + "013D", "004C_030C", + "013E", "006C_030C", + "0143", "004E_0301", + "0144", "006E_0301", + "0145", "004E_0327", + "0146", "006E_0327", + "0147", "004E_030C", + "0148", "006E_030C", + "014C", "004F_0304", + "014D", "006F_0304", + "014E", "004F_0306", + "014F", "006F_0306", + "0150", "004F_030B", + "0151", "006F_030B", + "0154", "0052_0301", + "0155", "0072_0301", + "0156", "0052_0327", + "0157", "0072_0327", + "0158", "0052_030C", + "0159", "0072_030C", + "015A", "0053_0301", + "015B", "0073_0301", + "015C", "0053_0302", + "015D", "0073_0302", + "015E", "0053_0327", + "015F", "0073_0327", + "0160", "0053_030C", + "0161", "0073_030C", + "0162", "0054_0327", + "0163", "0074_0327", + "0164", "0054_030C", + "0165", "0074_030C", + "0168", "0055_0303", + "0169", "0075_0303", + "016A", "0055_0304", + "016B", "0075_0304", + "016C", "0055_0306", + "016D", "0075_0306", + "016E", "0055_030A", + "016F", "0075_030A", + "0170", "0055_030B", + "0171", "0075_030B", + "0172", "0055_0328", + "0173", "0075_0328", + "0174", "0057_0302", + "0175", "0077_0302", + "0176", "0059_0302", + "0177", "0079_0302", + "0178", "0059_0308", + "0179", "005A_0301", + "017A", "007A_0301", + "017B", "005A_0307", + "017C", "007A_0307", + "017D", "005A_030C", + "017E", "007A_030C", + "01A0", "004F_031B", + "01A1", "006F_031B", + "01AF", "0055_031B", + "01B0", "0075_031B", + "01CD", "0041_030C", + "01CE", "0061_030C", + "01CF", "0049_030C", + "01D0", "0069_030C", + "01D1", "004F_030C", + "01D2", "006F_030C", + "01D3", "0055_030C", + "01D4", "0075_030C", + "01D5", "0055_0308_0304", + "01D6", "0075_0308_0304", + "01D7", "0055_0308_0301", + "01D8", "0075_0308_0301", + "01D9", "0055_0308_030C", + "01DA", "0075_0308_030C", + "01DB", "0055_0308_0300", + "01DC", "0075_0308_0300", + "01DE", "0041_0308_0304", + "01DF", "0061_0308_0304", + "01E0", "0041_0307_0304", + "01E1", "0061_0307_0304", + "01E2", "00C6_0304", + "01E3", "00E6_0304", + "01E6", "0047_030C", + "01E7", "0067_030C", + "01E8", "004B_030C", + "01E9", "006B_030C", + "01EA", "004F_0328", + "01EB", "006F_0328", + "01EC", "004F_0328_0304", + "01ED", "006F_0328_0304", + "01EE", "01B7_030C", + "01EF", "0292_030C", + "01F0", "006A_030C", + "01F4", "0047_0301", + "01F5", "0067_0301", + "01F8", "004E_0300", + "01F9", "006E_0300", + "01FA", "0041_030A_0301", + "01FB", "0061_030A_0301", + "01FC", "00C6_0301", + "01FD", "00E6_0301", + "01FE", "00D8_0301", + "01FF", "00F8_0301", + "0200", "0041_030F", + "0201", "0061_030F", + "0202", "0041_0311", + "0203", "0061_0311", + "0204", "0045_030F", + "0205", "0065_030F", + "0206", "0045_0311", + "0207", "0065_0311", + "0208", "0049_030F", + "0209", "0069_030F", + "020A", "0049_0311", + "020B", "0069_0311", + "020C", "004F_030F", + "020D", "006F_030F", + "020E", "004F_0311", + "020F", "006F_0311", + "0210", "0052_030F", + "0211", "0072_030F", + "0212", "0052_0311", + "0213", "0072_0311", + "0214", "0055_030F", + "0215", "0075_030F", + "0216", "0055_0311", + "0217", "0075_0311", + "0218", "0053_0326", + "0219", "0073_0326", + "021A", "0054_0326", + "021B", "0074_0326", + "021E", "0048_030C", + "021F", "0068_030C", + "0226", "0041_0307", + "0227", "0061_0307", + "0228", "0045_0327", + "0229", "0065_0327", + "022A", "004F_0308_0304", + "022B", "006F_0308_0304", + "022C", "004F_0303_0304", + "022D", "006F_0303_0304", + "022E", "004F_0307", + "022F", "006F_0307", + "0230", "004F_0307_0304", + "0231", "006F_0307_0304", + "0232", "0059_0304", + "0233", "0079_0304", + "0340", "0300", + "0341", "0301", + "0343", "0313", + "0344", "0308_0301", + "0374", "02B9", + "037E", "003B", + "0385", "00A8_0301", + "0386", "0391_0301", + "0387", "00B7", + "0388", "0395_0301", + "0389", "0397_0301", + "038A", "0399_0301", + "038C", "039F_0301", + "038E", "03A5_0301", + "038F", "03A9_0301", + "0390", "03B9_0308_0301", + "03AA", "0399_0308", + "03AB", "03A5_0308", + "03AC", "03B1_0301", + "03AD", "03B5_0301", + "03AE", "03B7_0301", + "03AF", "03B9_0301", + "03B0", "03C5_0308_0301", + "03CA", "03B9_0308", + "03CB", "03C5_0308", + "03CC", "03BF_0301", + "03CD", "03C5_0301", + "03CE", "03C9_0301", + "03D3", "03D2_0301", + "03D4", "03D2_0308", + "0400", "0415_0300", + "0401", "0415_0308", + "0403", "0413_0301", + "0407", "0406_0308", + "040C", "041A_0301", + "040D", "0418_0300", + "040E", "0423_0306", + "0419", "0418_0306", + "0439", "0438_0306", + "0450", "0435_0300", + "0451", "0435_0308", + "0453", "0433_0301", + "0457", "0456_0308", + "045C", "043A_0301", + "045D", "0438_0300", + "045E", "0443_0306", + "0476", "0474_030F", + "0477", "0475_030F", + "04C1", "0416_0306", + "04C2", "0436_0306", + "04D0", "0410_0306", + "04D1", "0430_0306", + "04D2", "0410_0308", + "04D3", "0430_0308", + "04D6", "0415_0306", + "04D7", "0435_0306", + "04DA", "04D8_0308", + "04DB", "04D9_0308", + "04DC", "0416_0308", + "04DD", "0436_0308", + "04DE", "0417_0308", + "04DF", "0437_0308", + "04E2", "0418_0304", + "04E3", "0438_0304", + "04E4", "0418_0308", + "04E5", "0438_0308", + "04E6", "041E_0308", + "04E7", "043E_0308", + "04EA", "04E8_0308", + "04EB", "04E9_0308", + "04EC", "042D_0308", + "04ED", "044D_0308", + "04EE", "0423_0304", + "04EF", "0443_0304", + "04F0", "0423_0308", + "04F1", "0443_0308", + "04F2", "0423_030B", + "04F3", "0443_030B", + "04F4", "0427_0308", + "04F5", "0447_0308", + "04F8", "042B_0308", + "04F9", "044B_0308", + "0622", "0627_0653", + "0623", "0627_0654", + "0624", "0648_0654", + "0625", "0627_0655", + "0626", "064A_0654", + "06C0", "06D5_0654", + "06C2", "06C1_0654", + "06D3", "06D2_0654", + "0929", "0928_093C", + "0931", "0930_093C", + "0934", "0933_093C", + "0958", "0915_093C", + "0959", "0916_093C", + "095A", "0917_093C", + "095B", "091C_093C", + "095C", "0921_093C", + "095D", "0922_093C", + "095E", "092B_093C", + "095F", "092F_093C", + "09CB", "09C7_09BE", + "09CC", "09C7_09D7", + "09DC", "09A1_09BC", + "09DD", "09A2_09BC", + "09DF", "09AF_09BC", + "0A33", "0A32_0A3C", + "0A36", "0A38_0A3C", + "0A59", "0A16_0A3C", + "0A5A", "0A17_0A3C", + "0A5B", "0A1C_0A3C", + "0A5E", "0A2B_0A3C", + "0B48", "0B47_0B56", + "0B4B", "0B47_0B3E", + "0B4C", "0B47_0B57", + "0B5C", "0B21_0B3C", + "0B5D", "0B22_0B3C", + "0B94", "0B92_0BD7", + "0BCA", "0BC6_0BBE", + "0BCB", "0BC7_0BBE", + "0BCC", "0BC6_0BD7", + "0C48", "0C46_0C56", + "0CC0", "0CBF_0CD5", + "0CC7", "0CC6_0CD5", + "0CC8", "0CC6_0CD6", + "0CCA", "0CC6_0CC2", + "0CCB", "0CC6_0CC2_0CD5", + "0D4A", "0D46_0D3E", + "0D4B", "0D47_0D3E", + "0D4C", "0D46_0D57", + "0DDA", "0DD9_0DCA", + "0DDC", "0DD9_0DCF", + "0DDD", "0DD9_0DCF_0DCA", + "0DDE", "0DD9_0DDF", + "0F43", "0F42_0FB7", + "0F4D", "0F4C_0FB7", + "0F52", "0F51_0FB7", + "0F57", "0F56_0FB7", + "0F5C", "0F5B_0FB7", + "0F69", "0F40_0FB5", + "0F73", "0F71_0F72", + "0F75", "0F71_0F74", + "0F76", "0FB2_0F80", + "0F78", "0FB3_0F80", + "0F81", "0F71_0F80", + "0F93", "0F92_0FB7", + "0F9D", "0F9C_0FB7", + "0FA2", "0FA1_0FB7", + "0FA7", "0FA6_0FB7", + "0FAC", "0FAB_0FB7", + "0FB9", "0F90_0FB5", + "1026", "1025_102E", + "1B06", "1B05_1B35", + "1B08", "1B07_1B35", + "1B0A", "1B09_1B35", + "1B0C", "1B0B_1B35", + "1B0E", "1B0D_1B35", + "1B12", "1B11_1B35", + "1B3B", "1B3A_1B35", + "1B3D", "1B3C_1B35", + "1B40", "1B3E_1B35", + "1B41", "1B3F_1B35", + "1B43", "1B42_1B35", + "1E00", "0041_0325", + "1E01", "0061_0325", + "1E02", "0042_0307", + "1E03", "0062_0307", + "1E04", "0042_0323", + "1E05", "0062_0323", + "1E06", "0042_0331", + "1E07", "0062_0331", + "1E08", "0043_0327_0301", + "1E09", "0063_0327_0301", + "1E0A", "0044_0307", + "1E0B", "0064_0307", + "1E0C", "0044_0323", + "1E0D", "0064_0323", + "1E0E", "0044_0331", + "1E0F", "0064_0331", + "1E10", "0044_0327", + "1E11", "0064_0327", + "1E12", "0044_032D", + "1E13", "0064_032D", + "1E14", "0045_0304_0300", + "1E15", "0065_0304_0300", + "1E16", "0045_0304_0301", + "1E17", "0065_0304_0301", + "1E18", "0045_032D", + "1E19", "0065_032D", + "1E1A", "0045_0330", + "1E1B", "0065_0330", + "1E1C", "0045_0327_0306", + "1E1D", "0065_0327_0306", + "1E1E", "0046_0307", + "1E1F", "0066_0307", + "1E20", "0047_0304", + "1E21", "0067_0304", + "1E22", "0048_0307", + "1E23", "0068_0307", + "1E24", "0048_0323", + "1E25", "0068_0323", + "1E26", "0048_0308", + "1E27", "0068_0308", + "1E28", "0048_0327", + "1E29", "0068_0327", + "1E2A", "0048_032E", + "1E2B", "0068_032E", + "1E2C", "0049_0330", + "1E2D", "0069_0330", + "1E2E", "0049_0308_0301", + "1E2F", "0069_0308_0301", + "1E30", "004B_0301", + "1E31", "006B_0301", + "1E32", "004B_0323", + "1E33", "006B_0323", + "1E34", "004B_0331", + "1E35", "006B_0331", + "1E36", "004C_0323", + "1E37", "006C_0323", + "1E38", "004C_0323_0304", + "1E39", "006C_0323_0304", + "1E3A", "004C_0331", + "1E3B", "006C_0331", + "1E3C", "004C_032D", + "1E3D", "006C_032D", + "1E3E", "004D_0301", + "1E3F", "006D_0301", + "1E40", "004D_0307", + "1E41", "006D_0307", + "1E42", "004D_0323", + "1E43", "006D_0323", + "1E44", "004E_0307", + "1E45", "006E_0307", + "1E46", "004E_0323", + "1E47", "006E_0323", + "1E48", "004E_0331", + "1E49", "006E_0331", + "1E4A", "004E_032D", + "1E4B", "006E_032D", + "1E4C", "004F_0303_0301", + "1E4D", "006F_0303_0301", + "1E4E", "004F_0303_0308", + "1E4F", "006F_0303_0308", + "1E50", "004F_0304_0300", + "1E51", "006F_0304_0300", + "1E52", "004F_0304_0301", + "1E53", "006F_0304_0301", + "1E54", "0050_0301", + "1E55", "0070_0301", + "1E56", "0050_0307", + "1E57", "0070_0307", + "1E58", "0052_0307", + "1E59", "0072_0307", + "1E5A", "0052_0323", + "1E5B", "0072_0323", + "1E5C", "0052_0323_0304", + "1E5D", "0072_0323_0304", + "1E5E", "0052_0331", + "1E5F", "0072_0331", + "1E60", "0053_0307", + "1E61", "0073_0307", + "1E62", "0053_0323", + "1E63", "0073_0323", + "1E64", "0053_0301_0307", + "1E65", "0073_0301_0307", + "1E66", "0053_030C_0307", + "1E67", "0073_030C_0307", + "1E68", "0053_0323_0307", + "1E69", "0073_0323_0307", + "1E6A", "0054_0307", + "1E6B", "0074_0307", + "1E6C", "0054_0323", + "1E6D", "0074_0323", + "1E6E", "0054_0331", + "1E6F", "0074_0331", + "1E70", "0054_032D", + "1E71", "0074_032D", + "1E72", "0055_0324", + "1E73", "0075_0324", + "1E74", "0055_0330", + "1E75", "0075_0330", + "1E76", "0055_032D", + "1E77", "0075_032D", + "1E78", "0055_0303_0301", + "1E79", "0075_0303_0301", + "1E7A", "0055_0304_0308", + "1E7B", "0075_0304_0308", + "1E7C", "0056_0303", + "1E7D", "0076_0303", + "1E7E", "0056_0323", + "1E7F", "0076_0323", + "1E80", "0057_0300", + "1E81", "0077_0300", + "1E82", "0057_0301", + "1E83", "0077_0301", + "1E84", "0057_0308", + "1E85", "0077_0308", + "1E86", "0057_0307", + "1E87", "0077_0307", + "1E88", "0057_0323", + "1E89", "0077_0323", + "1E8A", "0058_0307", + "1E8B", "0078_0307", + "1E8C", "0058_0308", + "1E8D", "0078_0308", + "1E8E", "0059_0307", + "1E8F", "0079_0307", + "1E90", "005A_0302", + "1E91", "007A_0302", + "1E92", "005A_0323", + "1E93", "007A_0323", + "1E94", "005A_0331", + "1E95", "007A_0331", + "1E96", "0068_0331", + "1E97", "0074_0308", + "1E98", "0077_030A", + "1E99", "0079_030A", + "1E9B", "017F_0307", + "1EA0", "0041_0323", + "1EA1", "0061_0323", + "1EA2", "0041_0309", + "1EA3", "0061_0309", + "1EA4", "0041_0302_0301", + "1EA5", "0061_0302_0301", + "1EA6", "0041_0302_0300", + "1EA7", "0061_0302_0300", + "1EA8", "0041_0302_0309", + "1EA9", "0061_0302_0309", + "1EAA", "0041_0302_0303", + "1EAB", "0061_0302_0303", + "1EAC", "0041_0323_0302", + "1EAD", "0061_0323_0302", + "1EAE", "0041_0306_0301", + "1EAF", "0061_0306_0301", + "1EB0", "0041_0306_0300", + "1EB1", "0061_0306_0300", + "1EB2", "0041_0306_0309", + "1EB3", "0061_0306_0309", + "1EB4", "0041_0306_0303", + "1EB5", "0061_0306_0303", + "1EB6", "0041_0323_0306", + "1EB7", "0061_0323_0306", + "1EB8", "0045_0323", + "1EB9", "0065_0323", + "1EBA", "0045_0309", + "1EBB", "0065_0309", + "1EBC", "0045_0303", + "1EBD", "0065_0303", + "1EBE", "0045_0302_0301", + "1EBF", "0065_0302_0301", + "1EC0", "0045_0302_0300", + "1EC1", "0065_0302_0300", + "1EC2", "0045_0302_0309", + "1EC3", "0065_0302_0309", + "1EC4", "0045_0302_0303", + "1EC5", "0065_0302_0303", + "1EC6", "0045_0323_0302", + "1EC7", "0065_0323_0302", + "1EC8", "0049_0309", + "1EC9", "0069_0309", + "1ECA", "0049_0323", + "1ECB", "0069_0323", + "1ECC", "004F_0323", + "1ECD", "006F_0323", + "1ECE", "004F_0309", + "1ECF", "006F_0309", + "1ED0", "004F_0302_0301", + "1ED1", "006F_0302_0301", + "1ED2", "004F_0302_0300", + "1ED3", "006F_0302_0300", + "1ED4", "004F_0302_0309", + "1ED5", "006F_0302_0309", + "1ED6", "004F_0302_0303", + "1ED7", "006F_0302_0303", + "1ED8", "004F_0323_0302", + "1ED9", "006F_0323_0302", + "1EDA", "004F_031B_0301", + "1EDB", "006F_031B_0301", + "1EDC", "004F_031B_0300", + "1EDD", "006F_031B_0300", + "1EDE", "004F_031B_0309", + "1EDF", "006F_031B_0309", + "1EE0", "004F_031B_0303", + "1EE1", "006F_031B_0303", + "1EE2", "004F_031B_0323", + "1EE3", "006F_031B_0323", + "1EE4", "0055_0323", + "1EE5", "0075_0323", + "1EE6", "0055_0309", + "1EE7", "0075_0309", + "1EE8", "0055_031B_0301", + "1EE9", "0075_031B_0301", + "1EEA", "0055_031B_0300", + "1EEB", "0075_031B_0300", + "1EEC", "0055_031B_0309", + "1EED", "0075_031B_0309", + "1EEE", "0055_031B_0303", + "1EEF", "0075_031B_0303", + "1EF0", "0055_031B_0323", + "1EF1", "0075_031B_0323", + "1EF2", "0059_0300", + "1EF3", "0079_0300", + "1EF4", "0059_0323", + "1EF5", "0079_0323", + "1EF6", "0059_0309", + "1EF7", "0079_0309", + "1EF8", "0059_0303", + "1EF9", "0079_0303", + "1F00", "03B1_0313", + "1F01", "03B1_0314", + "1F02", "03B1_0313_0300", + "1F03", "03B1_0314_0300", + "1F04", "03B1_0313_0301", + "1F05", "03B1_0314_0301", + "1F06", "03B1_0313_0342", + "1F07", "03B1_0314_0342", + "1F08", "0391_0313", + "1F09", "0391_0314", + "1F0A", "0391_0313_0300", + "1F0B", "0391_0314_0300", + "1F0C", "0391_0313_0301", + "1F0D", "0391_0314_0301", + "1F0E", "0391_0313_0342", + "1F0F", "0391_0314_0342", + "1F10", "03B5_0313", + "1F11", "03B5_0314", + "1F12", "03B5_0313_0300", + "1F13", "03B5_0314_0300", + "1F14", "03B5_0313_0301", + "1F15", "03B5_0314_0301", + "1F18", "0395_0313", + "1F19", "0395_0314", + "1F1A", "0395_0313_0300", + "1F1B", "0395_0314_0300", + "1F1C", "0395_0313_0301", + "1F1D", "0395_0314_0301", + "1F20", "03B7_0313", + "1F21", "03B7_0314", + "1F22", "03B7_0313_0300", + "1F23", "03B7_0314_0300", + "1F24", "03B7_0313_0301", + "1F25", "03B7_0314_0301", + "1F26", "03B7_0313_0342", + "1F27", "03B7_0314_0342", + "1F28", "0397_0313", + "1F29", "0397_0314", + "1F2A", "0397_0313_0300", + "1F2B", "0397_0314_0300", + "1F2C", "0397_0313_0301", + "1F2D", "0397_0314_0301", + "1F2E", "0397_0313_0342", + "1F2F", "0397_0314_0342", + "1F30", "03B9_0313", + "1F31", "03B9_0314", + "1F32", "03B9_0313_0300", + "1F33", "03B9_0314_0300", + "1F34", "03B9_0313_0301", + "1F35", "03B9_0314_0301", + "1F36", "03B9_0313_0342", + "1F37", "03B9_0314_0342", + "1F38", "0399_0313", + "1F39", "0399_0314", + "1F3A", "0399_0313_0300", + "1F3B", "0399_0314_0300", + "1F3C", "0399_0313_0301", + "1F3D", "0399_0314_0301", + "1F3E", "0399_0313_0342", + "1F3F", "0399_0314_0342", + "1F40", "03BF_0313", + "1F41", "03BF_0314", + "1F42", "03BF_0313_0300", + "1F43", "03BF_0314_0300", + "1F44", "03BF_0313_0301", + "1F45", "03BF_0314_0301", + "1F48", "039F_0313", + "1F49", "039F_0314", + "1F4A", "039F_0313_0300", + "1F4B", "039F_0314_0300", + "1F4C", "039F_0313_0301", + "1F4D", "039F_0314_0301", + "1F50", "03C5_0313", + "1F51", "03C5_0314", + "1F52", "03C5_0313_0300", + "1F53", "03C5_0314_0300", + "1F54", "03C5_0313_0301", + "1F55", "03C5_0314_0301", + "1F56", "03C5_0313_0342", + "1F57", "03C5_0314_0342", + "1F59", "03A5_0314", + "1F5B", "03A5_0314_0300", + "1F5D", "03A5_0314_0301", + "1F5F", "03A5_0314_0342", + "1F60", "03C9_0313", + "1F61", "03C9_0314", + "1F62", "03C9_0313_0300", + "1F63", "03C9_0314_0300", + "1F64", "03C9_0313_0301", + "1F65", "03C9_0314_0301", + "1F66", "03C9_0313_0342", + "1F67", "03C9_0314_0342", + "1F68", "03A9_0313", + "1F69", "03A9_0314", + "1F6A", "03A9_0313_0300", + "1F6B", "03A9_0314_0300", + "1F6C", "03A9_0313_0301", + "1F6D", "03A9_0314_0301", + "1F6E", "03A9_0313_0342", + "1F6F", "03A9_0314_0342", + "1F70", "03B1_0300", + "1F71", "03B1_0301", + "1F72", "03B5_0300", + "1F73", "03B5_0301", + "1F74", "03B7_0300", + "1F75", "03B7_0301", + "1F76", "03B9_0300", + "1F77", "03B9_0301", + "1F78", "03BF_0300", + "1F79", "03BF_0301", + "1F7A", "03C5_0300", + "1F7B", "03C5_0301", + "1F7C", "03C9_0300", + "1F7D", "03C9_0301", + "1F80", "03B1_0313_0345", + "1F81", "03B1_0314_0345", + "1F82", "03B1_0313_0300_0345", + "1F83", "03B1_0314_0300_0345", + "1F84", "03B1_0313_0301_0345", + "1F85", "03B1_0314_0301_0345", + "1F86", "03B1_0313_0342_0345", + "1F87", "03B1_0314_0342_0345", + "1F88", "0391_0313_0345", + "1F89", "0391_0314_0345", + "1F8A", "0391_0313_0300_0345", + "1F8B", "0391_0314_0300_0345", + "1F8C", "0391_0313_0301_0345", + "1F8D", "0391_0314_0301_0345", + "1F8E", "0391_0313_0342_0345", + "1F8F", "0391_0314_0342_0345", + "1F90", "03B7_0313_0345", + "1F91", "03B7_0314_0345", + "1F92", "03B7_0313_0300_0345", + "1F93", "03B7_0314_0300_0345", + "1F94", "03B7_0313_0301_0345", + "1F95", "03B7_0314_0301_0345", + "1F96", "03B7_0313_0342_0345", + "1F97", "03B7_0314_0342_0345", + "1F98", "0397_0313_0345", + "1F99", "0397_0314_0345", + "1F9A", "0397_0313_0300_0345", + "1F9B", "0397_0314_0300_0345", + "1F9C", "0397_0313_0301_0345", + "1F9D", "0397_0314_0301_0345", + "1F9E", "0397_0313_0342_0345", + "1F9F", "0397_0314_0342_0345", + "1FA0", "03C9_0313_0345", + "1FA1", "03C9_0314_0345", + "1FA2", "03C9_0313_0300_0345", + "1FA3", "03C9_0314_0300_0345", + "1FA4", "03C9_0313_0301_0345", + "1FA5", "03C9_0314_0301_0345", + "1FA6", "03C9_0313_0342_0345", + "1FA7", "03C9_0314_0342_0345", + "1FA8", "03A9_0313_0345", + "1FA9", "03A9_0314_0345", + "1FAA", "03A9_0313_0300_0345", + "1FAB", "03A9_0314_0300_0345", + "1FAC", "03A9_0313_0301_0345", + "1FAD", "03A9_0314_0301_0345", + "1FAE", "03A9_0313_0342_0345", + "1FAF", "03A9_0314_0342_0345", + "1FB0", "03B1_0306", + "1FB1", "03B1_0304", + "1FB2", "03B1_0300_0345", + "1FB3", "03B1_0345", + "1FB4", "03B1_0301_0345", + "1FB6", "03B1_0342", + "1FB7", "03B1_0342_0345", + "1FB8", "0391_0306", + "1FB9", "0391_0304", + "1FBA", "0391_0300", + "1FBB", "0391_0301", + "1FBC", "0391_0345", + "1FBE", "03B9", + "1FC1", "00A8_0342", + "1FC2", "03B7_0300_0345", + "1FC3", "03B7_0345", + "1FC4", "03B7_0301_0345", + "1FC6", "03B7_0342", + "1FC7", "03B7_0342_0345", + "1FC8", "0395_0300", + "1FC9", "0395_0301", + "1FCA", "0397_0300", + "1FCB", "0397_0301", + "1FCC", "0397_0345", + "1FCD", "1FBF_0300", + "1FCE", "1FBF_0301", + "1FCF", "1FBF_0342", + "1FD0", "03B9_0306", + "1FD1", "03B9_0304", + "1FD2", "03B9_0308_0300", + "1FD3", "03B9_0308_0301", + "1FD6", "03B9_0342", + "1FD7", "03B9_0308_0342", + "1FD8", "0399_0306", + "1FD9", "0399_0304", + "1FDA", "0399_0300", + "1FDB", "0399_0301", + "1FDD", "1FFE_0300", + "1FDE", "1FFE_0301", + "1FDF", "1FFE_0342", + "1FE0", "03C5_0306", + "1FE1", "03C5_0304", + "1FE2", "03C5_0308_0300", + "1FE3", "03C5_0308_0301", + "1FE4", "03C1_0313", + "1FE5", "03C1_0314", + "1FE6", "03C5_0342", + "1FE7", "03C5_0308_0342", + "1FE8", "03A5_0306", + "1FE9", "03A5_0304", + "1FEA", "03A5_0300", + "1FEB", "03A5_0301", + "1FEC", "03A1_0314", + "1FED", "00A8_0300", + "1FEE", "00A8_0301", + "1FEF", "0060", + "1FF2", "03C9_0300_0345", + "1FF3", "03C9_0345", + "1FF4", "03C9_0301_0345", + "1FF6", "03C9_0342", + "1FF7", "03C9_0342_0345", + "1FF8", "039F_0300", + "1FF9", "039F_0301", + "1FFA", "03A9_0300", + "1FFB", "03A9_0301", + "1FFC", "03A9_0345", + "1FFD", "00B4", + "2000", "2002", + "2001", "2003", + "2126", "03A9", + "212A", "004B", + "212B", "0041_030A", + "219A", "2190_0338", + "219B", "2192_0338", + "21AE", "2194_0338", + "21CD", "21D0_0338", + "21CE", "21D4_0338", + "21CF", "21D2_0338", + "2204", "2203_0338", + "2209", "2208_0338", + "220C", "220B_0338", + "2224", "2223_0338", + "2226", "2225_0338", + "2241", "223C_0338", + "2244", "2243_0338", + "2247", "2245_0338", + "2249", "2248_0338", + "2260", "003D_0338", + "2262", "2261_0338", + "226D", "224D_0338", + "226E", "003C_0338", + "226F", "003E_0338", + "2270", "2264_0338", + "2271", "2265_0338", + "2274", "2272_0338", + "2275", "2273_0338", + "2278", "2276_0338", + "2279", "2277_0338", + "2280", "227A_0338", + "2281", "227B_0338", + "2284", "2282_0338", + "2285", "2283_0338", + "2288", "2286_0338", + "2289", "2287_0338", + "22AC", "22A2_0338", + "22AD", "22A8_0338", + "22AE", "22A9_0338", + "22AF", "22AB_0338", + "22E0", "227C_0338", + "22E1", "227D_0338", + "22E2", "2291_0338", + "22E3", "2292_0338", + "22EA", "22B2_0338", + "22EB", "22B3_0338", + "22EC", "22B4_0338", + "22ED", "22B5_0338", + "2329", "3008", + "232A", "3009", + "2ADC", "2ADD_0338", + "304C", "304B_3099", + "304E", "304D_3099", + "3050", "304F_3099", + "3052", "3051_3099", + "3054", "3053_3099", + "3056", "3055_3099", + "3058", "3057_3099", + "305A", "3059_3099", + "305C", "305B_3099", + "305E", "305D_3099", + "3060", "305F_3099", + "3062", "3061_3099", + "3065", "3064_3099", + "3067", "3066_3099", + "3069", "3068_3099", + "3070", "306F_3099", + "3071", "306F_309A", + "3073", "3072_3099", + "3074", "3072_309A", + "3076", "3075_3099", + "3077", "3075_309A", + "3079", "3078_3099", + "307A", "3078_309A", + "307C", "307B_3099", + "307D", "307B_309A", + "3094", "3046_3099", + "309E", "309D_3099", + "30AC", "30AB_3099", + "30AE", "30AD_3099", + "30B0", "30AF_3099", + "30B2", "30B1_3099", + "30B4", "30B3_3099", + "30B6", "30B5_3099", + "30B8", "30B7_3099", + "30BA", "30B9_3099", + "30BC", "30BB_3099", + "30BE", "30BD_3099", + "30C0", "30BF_3099", + "30C2", "30C1_3099", + "30C5", "30C4_3099", + "30C7", "30C6_3099", + "30C9", "30C8_3099", + "30D0", "30CF_3099", + "30D1", "30CF_309A", + "30D3", "30D2_3099", + "30D4", "30D2_309A", + "30D6", "30D5_3099", + "30D7", "30D5_309A", + "30D9", "30D8_3099", + "30DA", "30D8_309A", + "30DC", "30DB_3099", + "30DD", "30DB_309A", + "30F4", "30A6_3099", + "30F7", "30EF_3099", + "30F8", "30F0_3099", + "30F9", "30F1_3099", + "30FA", "30F2_3099", + "30FE", "30FD_3099", + "F900", "8C48", + "F901", "66F4", + "F902", "8ECA", + "F903", "8CC8", + "F904", "6ED1", + "F905", "4E32", + "F906", "53E5", + "F907", "9F9C", + "F908", "9F9C", + "F909", "5951", + "F90A", "91D1", + "F90B", "5587", + "F90C", "5948", + "F90D", "61F6", + "F90E", "7669", + "F90F", "7F85", + "F910", "863F", + "F911", "87BA", + "F912", "88F8", + "F913", "908F", + "F914", "6A02", + "F915", "6D1B", + "F916", "70D9", + "F917", "73DE", + "F918", "843D", + "F919", "916A", + "F91A", "99F1", + "F91B", "4E82", + "F91C", "5375", + "F91D", "6B04", + "F91E", "721B", + "F91F", "862D", + "F920", "9E1E", + "F921", "5D50", + "F922", "6FEB", + "F923", "85CD", + "F924", "8964", + "F925", "62C9", + "F926", "81D8", + "F927", "881F", + "F928", "5ECA", + "F929", "6717", + "F92A", "6D6A", + "F92B", "72FC", + "F92C", "90CE", + "F92D", "4F86", + "F92E", "51B7", + "F92F", "52DE", + "F930", "64C4", + "F931", "6AD3", + "F932", "7210", + "F933", "76E7", + "F934", "8001", + "F935", "8606", + "F936", "865C", + "F937", "8DEF", + "F938", "9732", + "F939", "9B6F", + "F93A", "9DFA", + "F93B", "788C", + "F93C", "797F", + "F93D", "7DA0", + "F93E", "83C9", + "F93F", "9304", + "F940", "9E7F", + "F941", "8AD6", + "F942", "58DF", + "F943", "5F04", + "F944", "7C60", + "F945", "807E", + "F946", "7262", + "F947", "78CA", + "F948", "8CC2", + "F949", "96F7", + "F94A", "58D8", + "F94B", "5C62", + "F94C", "6A13", + "F94D", "6DDA", + "F94E", "6F0F", + "F94F", "7D2F", + "F950", "7E37", + "F951", "964B", + "F952", "52D2", + "F953", "808B", + "F954", "51DC", + "F955", "51CC", + "F956", "7A1C", + "F957", "7DBE", + "F958", "83F1", + "F959", "9675", + "F95A", "8B80", + "F95B", "62CF", + "F95C", "6A02", + "F95D", "8AFE", + "F95E", "4E39", + "F95F", "5BE7", + "F960", "6012", + "F961", "7387", + "F962", "7570", + "F963", "5317", + "F964", "78FB", + "F965", "4FBF", + "F966", "5FA9", + "F967", "4E0D", + "F968", "6CCC", + "F969", "6578", + "F96A", "7D22", + "F96B", "53C3", + "F96C", "585E", + "F96D", "7701", + "F96E", "8449", + "F96F", "8AAA", + "F970", "6BBA", + "F971", "8FB0", + "F972", "6C88", + "F973", "62FE", + "F974", "82E5", + "F975", "63A0", + "F976", "7565", + "F977", "4EAE", + "F978", "5169", + "F979", "51C9", + "F97A", "6881", + "F97B", "7CE7", + "F97C", "826F", + "F97D", "8AD2", + "F97E", "91CF", + "F97F", "52F5", + "F980", "5442", + "F981", "5973", + "F982", "5EEC", + "F983", "65C5", + "F984", "6FFE", + "F985", "792A", + "F986", "95AD", + "F987", "9A6A", + "F988", "9E97", + "F989", "9ECE", + "F98A", "529B", + "F98B", "66C6", + "F98C", "6B77", + "F98D", "8F62", + "F98E", "5E74", + "F98F", "6190", + "F990", "6200", + "F991", "649A", + "F992", "6F23", + "F993", "7149", + "F994", "7489", + "F995", "79CA", + "F996", "7DF4", + "F997", "806F", + "F998", "8F26", + "F999", "84EE", + "F99A", "9023", + "F99B", "934A", + "F99C", "5217", + "F99D", "52A3", + "F99E", "54BD", + "F99F", "70C8", + "F9A0", "88C2", + "F9A1", "8AAA", + "F9A2", "5EC9", + "F9A3", "5FF5", + "F9A4", "637B", + "F9A5", "6BAE", + "F9A6", "7C3E", + "F9A7", "7375", + "F9A8", "4EE4", + "F9A9", "56F9", + "F9AA", "5BE7", + "F9AB", "5DBA", + "F9AC", "601C", + "F9AD", "73B2", + "F9AE", "7469", + "F9AF", "7F9A", + "F9B0", "8046", + "F9B1", "9234", + "F9B2", "96F6", + "F9B3", "9748", + "F9B4", "9818", + "F9B5", "4F8B", + "F9B6", "79AE", + "F9B7", "91B4", + "F9B8", "96B8", + "F9B9", "60E1", + "F9BA", "4E86", + "F9BB", "50DA", + "F9BC", "5BEE", + "F9BD", "5C3F", + "F9BE", "6599", + "F9BF", "6A02", + "F9C0", "71CE", + "F9C1", "7642", + "F9C2", "84FC", + "F9C3", "907C", + "F9C4", "9F8D", + "F9C5", "6688", + "F9C6", "962E", + "F9C7", "5289", + "F9C8", "677B", + "F9C9", "67F3", + "F9CA", "6D41", + "F9CB", "6E9C", + "F9CC", "7409", + "F9CD", "7559", + "F9CE", "786B", + "F9CF", "7D10", + "F9D0", "985E", + "F9D1", "516D", + "F9D2", "622E", + "F9D3", "9678", + "F9D4", "502B", + "F9D5", "5D19", + "F9D6", "6DEA", + "F9D7", "8F2A", + "F9D8", "5F8B", + "F9D9", "6144", + "F9DA", "6817", + "F9DB", "7387", + "F9DC", "9686", + "F9DD", "5229", + "F9DE", "540F", + "F9DF", "5C65", + "F9E0", "6613", + "F9E1", "674E", + "F9E2", "68A8", + "F9E3", "6CE5", + "F9E4", "7406", + "F9E5", "75E2", + "F9E6", "7F79", + "F9E7", "88CF", + "F9E8", "88E1", + "F9E9", "91CC", + "F9EA", "96E2", + "F9EB", "533F", + "F9EC", "6EBA", + "F9ED", "541D", + "F9EE", "71D0", + "F9EF", "7498", + "F9F0", "85FA", + "F9F1", "96A3", + "F9F2", "9C57", + "F9F3", "9E9F", + "F9F4", "6797", + "F9F5", "6DCB", + "F9F6", "81E8", + "F9F7", "7ACB", + "F9F8", "7B20", + "F9F9", "7C92", + "F9FA", "72C0", + "F9FB", "7099", + "F9FC", "8B58", + "F9FD", "4EC0", + "F9FE", "8336", + "F9FF", "523A", + "FA00", "5207", + "FA01", "5EA6", + "FA02", "62D3", + "FA03", "7CD6", + "FA04", "5B85", + "FA05", "6D1E", + "FA06", "66B4", + "FA07", "8F3B", + "FA08", "884C", + "FA09", "964D", + "FA0A", "898B", + "FA0B", "5ED3", + "FA0C", "5140", + "FA0D", "55C0", + "FA10", "585A", + "FA12", "6674", + "FA15", "51DE", + "FA16", "732A", + "FA17", "76CA", + "FA18", "793C", + "FA19", "795E", + "FA1A", "7965", + "FA1B", "798F", + "FA1C", "9756", + "FA1D", "7CBE", + "FA1E", "7FBD", + "FA20", "8612", + "FA22", "8AF8", + "FA25", "9038", + "FA26", "90FD", + "FA2A", "98EF", + "FA2B", "98FC", + "FA2C", "9928", + "FA2D", "9DB4", + "FA2E", "90DE", + "FA2F", "96B7", + "FA30", "4FAE", + "FA31", "50E7", + "FA32", "514D", + "FA33", "52C9", + "FA34", "52E4", + "FA35", "5351", + "FA36", "559D", + "FA37", "5606", + "FA38", "5668", + "FA39", "5840", + "FA3A", "58A8", + "FA3B", "5C64", + "FA3C", "5C6E", + "FA3D", "6094", + "FA3E", "6168", + "FA3F", "618E", + "FA40", "61F2", + "FA41", "654F", + "FA42", "65E2", + "FA43", "6691", + "FA44", "6885", + "FA45", "6D77", + "FA46", "6E1A", + "FA47", "6F22", + "FA48", "716E", + "FA49", "722B", + "FA4A", "7422", + "FA4B", "7891", + "FA4C", "793E", + "FA4D", "7949", + "FA4E", "7948", + "FA4F", "7950", + "FA50", "7956", + "FA51", "795D", + "FA52", "798D", + "FA53", "798E", + "FA54", "7A40", + "FA55", "7A81", + "FA56", "7BC0", + "FA57", "7DF4", + "FA58", "7E09", + "FA59", "7E41", + "FA5A", "7F72", + "FA5B", "8005", + "FA5C", "81ED", + "FA5D", "8279", + "FA5E", "8279", + "FA5F", "8457", + "FA60", "8910", + "FA61", "8996", + "FA62", "8B01", + "FA63", "8B39", + "FA64", "8CD3", + "FA65", "8D08", + "FA66", "8FB6", + "FA67", "9038", + "FA68", "96E3", + "FA69", "97FF", + "FA6A", "983B", + "FA6B", "6075", + "FA6C", "242EE", + "FA6D", "8218", + "FA70", "4E26", + "FA71", "51B5", + "FA72", "5168", + "FA73", "4F80", + "FA74", "5145", + "FA75", "5180", + "FA76", "52C7", + "FA77", "52FA", + "FA78", "559D", + "FA79", "5555", + "FA7A", "5599", + "FA7B", "55E2", + "FA7C", "585A", + "FA7D", "58B3", + "FA7E", "5944", + "FA7F", "5954", + "FA80", "5A62", + "FA81", "5B28", + "FA82", "5ED2", + "FA83", "5ED9", + "FA84", "5F69", + "FA85", "5FAD", + "FA86", "60D8", + "FA87", "614E", + "FA88", "6108", + "FA89", "618E", + "FA8A", "6160", + "FA8B", "61F2", + "FA8C", "6234", + "FA8D", "63C4", + "FA8E", "641C", + "FA8F", "6452", + "FA90", "6556", + "FA91", "6674", + "FA92", "6717", + "FA93", "671B", + "FA94", "6756", + "FA95", "6B79", + "FA96", "6BBA", + "FA97", "6D41", + "FA98", "6EDB", + "FA99", "6ECB", + "FA9A", "6F22", + "FA9B", "701E", + "FA9C", "716E", + "FA9D", "77A7", + "FA9E", "7235", + "FA9F", "72AF", + "FAA0", "732A", + "FAA1", "7471", + "FAA2", "7506", + "FAA3", "753B", + "FAA4", "761D", + "FAA5", "761F", + "FAA6", "76CA", + "FAA7", "76DB", + "FAA8", "76F4", + "FAA9", "774A", + "FAAA", "7740", + "FAAB", "78CC", + "FAAC", "7AB1", + "FAAD", "7BC0", + "FAAE", "7C7B", + "FAAF", "7D5B", + "FAB0", "7DF4", + "FAB1", "7F3E", + "FAB2", "8005", + "FAB3", "8352", + "FAB4", "83EF", + "FAB5", "8779", + "FAB6", "8941", + "FAB7", "8986", + "FAB8", "8996", + "FAB9", "8ABF", + "FABA", "8AF8", + "FABB", "8ACB", + "FABC", "8B01", + "FABD", "8AFE", + "FABE", "8AED", + "FABF", "8B39", + "FAC0", "8B8A", + "FAC1", "8D08", + "FAC2", "8F38", + "FAC3", "9072", + "FAC4", "9199", + "FAC5", "9276", + "FAC6", "967C", + "FAC7", "96E3", + "FAC8", "9756", + "FAC9", "97DB", + "FACA", "97FF", + "FACB", "980B", + "FACC", "983B", + "FACD", "9B12", + "FACE", "9F9C", + "FACF", "2284A", + "FAD0", "22844", + "FAD1", "233D5", + "FAD2", "3B9D", + "FAD3", "4018", + "FAD4", "4039", + "FAD5", "25249", + "FAD6", "25CD0", + "FAD7", "27ED3", + "FAD8", "9F43", + "FAD9", "9F8E", + "FB1D", "05D9_05B4", + "FB1F", "05F2_05B7", + "FB2A", "05E9_05C1", + "FB2B", "05E9_05C2", + "FB2C", "05E9_05BC_05C1", + "FB2D", "05E9_05BC_05C2", + "FB2E", "05D0_05B7", + "FB2F", "05D0_05B8", + "FB30", "05D0_05BC", + "FB31", "05D1_05BC", + "FB32", "05D2_05BC", + "FB33", "05D3_05BC", + "FB34", "05D4_05BC", + "FB35", "05D5_05BC", + "FB36", "05D6_05BC", + "FB38", "05D8_05BC", + "FB39", "05D9_05BC", + "FB3A", "05DA_05BC", + "FB3B", "05DB_05BC", + "FB3C", "05DC_05BC", + "FB3E", "05DE_05BC", + "FB40", "05E0_05BC", + "FB41", "05E1_05BC", + "FB43", "05E3_05BC", + "FB44", "05E4_05BC", + "FB46", "05E6_05BC", + "FB47", "05E7_05BC", + "FB48", "05E8_05BC", + "FB49", "05E9_05BC", + "FB4A", "05EA_05BC", + "FB4B", "05D5_05B9", + "FB4C", "05D1_05BF", + "FB4D", "05DB_05BF", + "FB4E", "05E4_05BF", + "1109A", "11099_110BA", + "1109C", "1109B_110BA", + "110AB", "110A5_110BA", + "1112E", "11131_11127", + "1112F", "11132_11127", + "1134B", "11347_1133E", + "1134C", "11347_11357", + "114BB", "114B9_114BA", + "114BC", "114B9_114B0", + "114BE", "114B9_114BD", + "115BA", "115B8_115AF", + "115BB", "115B9_115AF", + "11938", "11935_11930", + "1D15E", "1D157_1D165", + "1D15F", "1D158_1D165", + "1D160", "1D158_1D165_1D16E", + "1D161", "1D158_1D165_1D16F", + "1D162", "1D158_1D165_1D170", + "1D163", "1D158_1D165_1D171", + "1D164", "1D158_1D165_1D172", + "1D1BB", "1D1B9_1D165", + "1D1BC", "1D1BA_1D165", + "1D1BD", "1D1B9_1D165_1D16E", + "1D1BE", "1D1BA_1D165_1D16E", + "1D1BF", "1D1B9_1D165_1D16F", + "1D1C0", "1D1BA_1D165_1D16F", + "2F800", "4E3D", + "2F801", "4E38", + "2F802", "4E41", + "2F803", "20122", + "2F804", "4F60", + "2F805", "4FAE", + "2F806", "4FBB", + "2F807", "5002", + "2F808", "507A", + "2F809", "5099", + "2F80A", "50E7", + "2F80B", "50CF", + "2F80C", "349E", + "2F80D", "2063A", + "2F80E", "514D", + "2F80F", "5154", + "2F810", "5164", + "2F811", "5177", + "2F812", "2051C", + "2F813", "34B9", + "2F814", "5167", + "2F815", "518D", + "2F816", "2054B", + "2F817", "5197", + "2F818", "51A4", + "2F819", "4ECC", + "2F81A", "51AC", + "2F81B", "51B5", + "2F81C", "291DF", + "2F81D", "51F5", + "2F81E", "5203", + "2F81F", "34DF", + "2F820", "523B", + "2F821", "5246", + "2F822", "5272", + "2F823", "5277", + "2F824", "3515", + "2F825", "52C7", + "2F826", "52C9", + "2F827", "52E4", + "2F828", "52FA", + "2F829", "5305", + "2F82A", "5306", + "2F82B", "5317", + "2F82C", "5349", + "2F82D", "5351", + "2F82E", "535A", + "2F82F", "5373", + "2F830", "537D", + "2F831", "537F", + "2F832", "537F", + "2F833", "537F", + "2F834", "20A2C", + "2F835", "7070", + "2F836", "53CA", + "2F837", "53DF", + "2F838", "20B63", + "2F839", "53EB", + "2F83A", "53F1", + "2F83B", "5406", + "2F83C", "549E", + "2F83D", "5438", + "2F83E", "5448", + "2F83F", "5468", + "2F840", "54A2", + "2F841", "54F6", + "2F842", "5510", + "2F843", "5553", + "2F844", "5563", + "2F845", "5584", + "2F846", "5584", + "2F847", "5599", + "2F848", "55AB", + "2F849", "55B3", + "2F84A", "55C2", + "2F84B", "5716", + "2F84C", "5606", + "2F84D", "5717", + "2F84E", "5651", + "2F84F", "5674", + "2F850", "5207", + "2F851", "58EE", + "2F852", "57CE", + "2F853", "57F4", + "2F854", "580D", + "2F855", "578B", + "2F856", "5832", + "2F857", "5831", + "2F858", "58AC", + "2F859", "214E4", + "2F85A", "58F2", + "2F85B", "58F7", + "2F85C", "5906", + "2F85D", "591A", + "2F85E", "5922", + "2F85F", "5962", + "2F860", "216A8", + "2F861", "216EA", + "2F862", "59EC", + "2F863", "5A1B", + "2F864", "5A27", + "2F865", "59D8", + "2F866", "5A66", + "2F867", "36EE", + "2F868", "36FC", + "2F869", "5B08", + "2F86A", "5B3E", + "2F86B", "5B3E", + "2F86C", "219C8", + "2F86D", "5BC3", + "2F86E", "5BD8", + "2F86F", "5BE7", + "2F870", "5BF3", + "2F871", "21B18", + "2F872", "5BFF", + "2F873", "5C06", + "2F874", "5F53", + "2F875", "5C22", + "2F876", "3781", + "2F877", "5C60", + "2F878", "5C6E", + "2F879", "5CC0", + "2F87A", "5C8D", + "2F87B", "21DE4", + "2F87C", "5D43", + "2F87D", "21DE6", + "2F87E", "5D6E", + "2F87F", "5D6B", + "2F880", "5D7C", + "2F881", "5DE1", + "2F882", "5DE2", + "2F883", "382F", + "2F884", "5DFD", + "2F885", "5E28", + "2F886", "5E3D", + "2F887", "5E69", + "2F888", "3862", + "2F889", "22183", + "2F88A", "387C", + "2F88B", "5EB0", + "2F88C", "5EB3", + "2F88D", "5EB6", + "2F88E", "5ECA", + "2F88F", "2A392", + "2F890", "5EFE", + "2F891", "22331", + "2F892", "22331", + "2F893", "8201", + "2F894", "5F22", + "2F895", "5F22", + "2F896", "38C7", + "2F897", "232B8", + "2F898", "261DA", + "2F899", "5F62", + "2F89A", "5F6B", + "2F89B", "38E3", + "2F89C", "5F9A", + "2F89D", "5FCD", + "2F89E", "5FD7", + "2F89F", "5FF9", + "2F8A0", "6081", + "2F8A1", "393A", + "2F8A2", "391C", + "2F8A3", "6094", + "2F8A4", "226D4", + "2F8A5", "60C7", + "2F8A6", "6148", + "2F8A7", "614C", + "2F8A8", "614E", + "2F8A9", "614C", + "2F8AA", "617A", + "2F8AB", "618E", + "2F8AC", "61B2", + "2F8AD", "61A4", + "2F8AE", "61AF", + "2F8AF", "61DE", + "2F8B0", "61F2", + "2F8B1", "61F6", + "2F8B2", "6210", + "2F8B3", "621B", + "2F8B4", "625D", + "2F8B5", "62B1", + "2F8B6", "62D4", + "2F8B7", "6350", + "2F8B8", "22B0C", + "2F8B9", "633D", + "2F8BA", "62FC", + "2F8BB", "6368", + "2F8BC", "6383", + "2F8BD", "63E4", + "2F8BE", "22BF1", + "2F8BF", "6422", + "2F8C0", "63C5", + "2F8C1", "63A9", + "2F8C2", "3A2E", + "2F8C3", "6469", + "2F8C4", "647E", + "2F8C5", "649D", + "2F8C6", "6477", + "2F8C7", "3A6C", + "2F8C8", "654F", + "2F8C9", "656C", + "2F8CA", "2300A", + "2F8CB", "65E3", + "2F8CC", "66F8", + "2F8CD", "6649", + "2F8CE", "3B19", + "2F8CF", "6691", + "2F8D0", "3B08", + "2F8D1", "3AE4", + "2F8D2", "5192", + "2F8D3", "5195", + "2F8D4", "6700", + "2F8D5", "669C", + "2F8D6", "80AD", + "2F8D7", "43D9", + "2F8D8", "6717", + "2F8D9", "671B", + "2F8DA", "6721", + "2F8DB", "675E", + "2F8DC", "6753", + "2F8DD", "233C3", + "2F8DE", "3B49", + "2F8DF", "67FA", + "2F8E0", "6785", + "2F8E1", "6852", + "2F8E2", "6885", + "2F8E3", "2346D", + "2F8E4", "688E", + "2F8E5", "681F", + "2F8E6", "6914", + "2F8E7", "3B9D", + "2F8E8", "6942", + "2F8E9", "69A3", + "2F8EA", "69EA", + "2F8EB", "6AA8", + "2F8EC", "236A3", + "2F8ED", "6ADB", + "2F8EE", "3C18", + "2F8EF", "6B21", + "2F8F0", "238A7", + "2F8F1", "6B54", + "2F8F2", "3C4E", + "2F8F3", "6B72", + "2F8F4", "6B9F", + "2F8F5", "6BBA", + "2F8F6", "6BBB", + "2F8F7", "23A8D", + "2F8F8", "21D0B", + "2F8F9", "23AFA", + "2F8FA", "6C4E", + "2F8FB", "23CBC", + "2F8FC", "6CBF", + "2F8FD", "6CCD", + "2F8FE", "6C67", + "2F8FF", "6D16", + "2F900", "6D3E", + "2F901", "6D77", + "2F902", "6D41", + "2F903", "6D69", + "2F904", "6D78", + "2F905", "6D85", + "2F906", "23D1E", + "2F907", "6D34", + "2F908", "6E2F", + "2F909", "6E6E", + "2F90A", "3D33", + "2F90B", "6ECB", + "2F90C", "6EC7", + "2F90D", "23ED1", + "2F90E", "6DF9", + "2F90F", "6F6E", + "2F910", "23F5E", + "2F911", "23F8E", + "2F912", "6FC6", + "2F913", "7039", + "2F914", "701E", + "2F915", "701B", + "2F916", "3D96", + "2F917", "704A", + "2F918", "707D", + "2F919", "7077", + "2F91A", "70AD", + "2F91B", "20525", + "2F91C", "7145", + "2F91D", "24263", + "2F91E", "719C", + "2F91F", "243AB", + "2F920", "7228", + "2F921", "7235", + "2F922", "7250", + "2F923", "24608", + "2F924", "7280", + "2F925", "7295", + "2F926", "24735", + "2F927", "24814", + "2F928", "737A", + "2F929", "738B", + "2F92A", "3EAC", + "2F92B", "73A5", + "2F92C", "3EB8", + "2F92D", "3EB8", + "2F92E", "7447", + "2F92F", "745C", + "2F930", "7471", + "2F931", "7485", + "2F932", "74CA", + "2F933", "3F1B", + "2F934", "7524", + "2F935", "24C36", + "2F936", "753E", + "2F937", "24C92", + "2F938", "7570", + "2F939", "2219F", + "2F93A", "7610", + "2F93B", "24FA1", + "2F93C", "24FB8", + "2F93D", "25044", + "2F93E", "3FFC", + "2F93F", "4008", + "2F940", "76F4", + "2F941", "250F3", + "2F942", "250F2", + "2F943", "25119", + "2F944", "25133", + "2F945", "771E", + "2F946", "771F", + "2F947", "771F", + "2F948", "774A", + "2F949", "4039", + "2F94A", "778B", + "2F94B", "4046", + "2F94C", "4096", + "2F94D", "2541D", + "2F94E", "784E", + "2F94F", "788C", + "2F950", "78CC", + "2F951", "40E3", + "2F952", "25626", + "2F953", "7956", + "2F954", "2569A", + "2F955", "256C5", + "2F956", "798F", + "2F957", "79EB", + "2F958", "412F", + "2F959", "7A40", + "2F95A", "7A4A", + "2F95B", "7A4F", + "2F95C", "2597C", + "2F95D", "25AA7", + "2F95E", "25AA7", + "2F95F", "7AEE", + "2F960", "4202", + "2F961", "25BAB", + "2F962", "7BC6", + "2F963", "7BC9", + "2F964", "4227", + "2F965", "25C80", + "2F966", "7CD2", + "2F967", "42A0", + "2F968", "7CE8", + "2F969", "7CE3", + "2F96A", "7D00", + "2F96B", "25F86", + "2F96C", "7D63", + "2F96D", "4301", + "2F96E", "7DC7", + "2F96F", "7E02", + "2F970", "7E45", + "2F971", "4334", + "2F972", "26228", + "2F973", "26247", + "2F974", "4359", + "2F975", "262D9", + "2F976", "7F7A", + "2F977", "2633E", + "2F978", "7F95", + "2F979", "7FFA", + "2F97A", "8005", + "2F97B", "264DA", + "2F97C", "26523", + "2F97D", "8060", + "2F97E", "265A8", + "2F97F", "8070", + "2F980", "2335F", + "2F981", "43D5", + "2F982", "80B2", + "2F983", "8103", + "2F984", "440B", + "2F985", "813E", + "2F986", "5AB5", + "2F987", "267A7", + "2F988", "267B5", + "2F989", "23393", + "2F98A", "2339C", + "2F98B", "8201", + "2F98C", "8204", + "2F98D", "8F9E", + "2F98E", "446B", + "2F98F", "8291", + "2F990", "828B", + "2F991", "829D", + "2F992", "52B3", + "2F993", "82B1", + "2F994", "82B3", + "2F995", "82BD", + "2F996", "82E6", + "2F997", "26B3C", + "2F998", "82E5", + "2F999", "831D", + "2F99A", "8363", + "2F99B", "83AD", + "2F99C", "8323", + "2F99D", "83BD", + "2F99E", "83E7", + "2F99F", "8457", + "2F9A0", "8353", + "2F9A1", "83CA", + "2F9A2", "83CC", + "2F9A3", "83DC", + "2F9A4", "26C36", + "2F9A5", "26D6B", + "2F9A6", "26CD5", + "2F9A7", "452B", + "2F9A8", "84F1", + "2F9A9", "84F3", + "2F9AA", "8516", + "2F9AB", "273CA", + "2F9AC", "8564", + "2F9AD", "26F2C", + "2F9AE", "455D", + "2F9AF", "4561", + "2F9B0", "26FB1", + "2F9B1", "270D2", + "2F9B2", "456B", + "2F9B3", "8650", + "2F9B4", "865C", + "2F9B5", "8667", + "2F9B6", "8669", + "2F9B7", "86A9", + "2F9B8", "8688", + "2F9B9", "870E", + "2F9BA", "86E2", + "2F9BB", "8779", + "2F9BC", "8728", + "2F9BD", "876B", + "2F9BE", "8786", + "2F9BF", "45D7", + "2F9C0", "87E1", + "2F9C1", "8801", + "2F9C2", "45F9", + "2F9C3", "8860", + "2F9C4", "8863", + "2F9C5", "27667", + "2F9C6", "88D7", + "2F9C7", "88DE", + "2F9C8", "4635", + "2F9C9", "88FA", + "2F9CA", "34BB", + "2F9CB", "278AE", + "2F9CC", "27966", + "2F9CD", "46BE", + "2F9CE", "46C7", + "2F9CF", "8AA0", + "2F9D0", "8AED", + "2F9D1", "8B8A", + "2F9D2", "8C55", + "2F9D3", "27CA8", + "2F9D4", "8CAB", + "2F9D5", "8CC1", + "2F9D6", "8D1B", + "2F9D7", "8D77", + "2F9D8", "27F2F", + "2F9D9", "20804", + "2F9DA", "8DCB", + "2F9DB", "8DBC", + "2F9DC", "8DF0", + "2F9DD", "208DE", + "2F9DE", "8ED4", + "2F9DF", "8F38", + "2F9E0", "285D2", + "2F9E1", "285ED", + "2F9E2", "9094", + "2F9E3", "90F1", + "2F9E4", "9111", + "2F9E5", "2872E", + "2F9E6", "911B", + "2F9E7", "9238", + "2F9E8", "92D7", + "2F9E9", "92D8", + "2F9EA", "927C", + "2F9EB", "93F9", + "2F9EC", "9415", + "2F9ED", "28BFA", + "2F9EE", "958B", + "2F9EF", "4995", + "2F9F0", "95B7", + "2F9F1", "28D77", + "2F9F2", "49E6", + "2F9F3", "96C3", + "2F9F4", "5DB2", + "2F9F5", "9723", + "2F9F6", "29145", + "2F9F7", "2921A", + "2F9F8", "4A6E", + "2F9F9", "4A76", + "2F9FA", "97E0", + "2F9FB", "2940A", + "2F9FC", "4AB2", + "2F9FD", "29496", + "2F9FE", "980B", + "2F9FF", "980B", + "2FA00", "9829", + "2FA01", "295B6", + "2FA02", "98E2", + "2FA03", "4B33", + "2FA04", "9929", + "2FA05", "99A7", + "2FA06", "99C2", + "2FA07", "99FE", + "2FA08", "4BCE", + "2FA09", "29B30", + "2FA0A", "9B12", + "2FA0B", "9C40", + "2FA0C", "9CFD", + "2FA0D", "4CCE", + "2FA0E", "4CED", + "2FA0F", "9D67", + "2FA10", "2A0CE", + "2FA11", "4CF8", + "2FA12", "2A105", + "2FA13", "2A20E", + "2FA14", "2A291", + "2FA15", "9EBB", + "2FA16", "4D56", + "2FA17", "9EF9", + "2FA18", "9EFE", + "2FA19", "9F05", + "2FA1A", "9F0F", + "2FA1B", "9F16", + "2FA1C", "9F3B", + "2FA1D", "2A600", +); + +# This table was algorithmically derived from the Adobe Glyph List (AGL) +# file 'glyphlist.txt' from the GitHub Adobe Type Tools agl-aglfn +# project, on 2022-10-09. +# +# See "groff:" comments for altered mappings. +my %AGL_to_unicode = ( + "A", "0041", + "AE", "00C6", + "AEacute", "01FC", + "AEmacron", "01E2", + "Aacute", "00C1", + "Abreve", "0102", + "Abreveacute", "1EAE", + "Abrevecyrillic", "04D0", + "Abrevedotbelow", "1EB6", + "Abrevegrave", "1EB0", + "Abrevehookabove", "1EB2", + "Abrevetilde", "1EB4", + "Acaron", "01CD", + "Acircle", "24B6", + "Acircumflex", "00C2", + "Acircumflexacute", "1EA4", + "Acircumflexdotbelow", "1EAC", + "Acircumflexgrave", "1EA6", + "Acircumflexhookabove", "1EA8", + "Acircumflextilde", "1EAA", + "Acyrillic", "0410", + "Adblgrave", "0200", + "Adieresis", "00C4", + "Adieresiscyrillic", "04D2", + "Adieresismacron", "01DE", + "Adotbelow", "1EA0", + "Adotmacron", "01E0", + "Agrave", "00C0", + "Ahookabove", "1EA2", + "Aiecyrillic", "04D4", + "Ainvertedbreve", "0202", + "Alpha", "0391", + "Alphatonos", "0386", + "Amacron", "0100", + "Amonospace", "FF21", + "Aogonek", "0104", + "Aring", "00C5", + "Aringacute", "01FA", + "Aringbelow", "1E00", + "Atilde", "00C3", + "Aybarmenian", "0531", + "B", "0042", + "Bcircle", "24B7", + "Bdotaccent", "1E02", + "Bdotbelow", "1E04", + "Becyrillic", "0411", + "Benarmenian", "0532", + "Beta", "0392", + "Bhook", "0181", + "Blinebelow", "1E06", + "Bmonospace", "FF22", + "Btopbar", "0182", + "C", "0043", + "Caarmenian", "053E", + "Cacute", "0106", + "Ccaron", "010C", + "Ccedilla", "00C7", + "Ccedillaacute", "1E08", + "Ccircle", "24B8", + "Ccircumflex", "0108", + "Cdot", "010A", + "Cdotaccent", "010A", + "Chaarmenian", "0549", + "Cheabkhasiancyrillic", "04BC", + "Checyrillic", "0427", + "Chedescenderabkhasiancyrillic", "04BE", + "Chedescendercyrillic", "04B6", + "Chedieresiscyrillic", "04F4", + "Cheharmenian", "0543", + "Chekhakassiancyrillic", "04CB", + "Cheverticalstrokecyrillic", "04B8", + "Chi", "03A7", + "Chook", "0187", + "Cmonospace", "FF23", + "Coarmenian", "0551", + "D", "0044", + "DZ", "01F1", + "DZcaron", "01C4", + "Daarmenian", "0534", + "Dafrican", "0189", + "Dcaron", "010E", + "Dcedilla", "1E10", + "Dcircle", "24B9", + "Dcircumflexbelow", "1E12", + "Dcroat", "0110", + "Ddotaccent", "1E0A", + "Ddotbelow", "1E0C", + "Decyrillic", "0414", + "Deicoptic", "03EE", + "Delta", "0394", # groff: not U+2206 + "Deltagreek", "0394", + "Dhook", "018A", + "Digammagreek", "03DC", + "Djecyrillic", "0402", + "Dlinebelow", "1E0E", + "Dmonospace", "FF24", + "Dslash", "0110", + "Dtopbar", "018B", + "Dz", "01F2", + "Dzcaron", "01C5", + "Dzeabkhasiancyrillic", "04E0", + "Dzecyrillic", "0405", + "Dzhecyrillic", "040F", + "E", "0045", + "Eacute", "00C9", + "Ebreve", "0114", + "Ecaron", "011A", + "Ecedillabreve", "1E1C", + "Echarmenian", "0535", + "Ecircle", "24BA", + "Ecircumflex", "00CA", + "Ecircumflexacute", "1EBE", + "Ecircumflexbelow", "1E18", + "Ecircumflexdotbelow", "1EC6", + "Ecircumflexgrave", "1EC0", + "Ecircumflexhookabove", "1EC2", + "Ecircumflextilde", "1EC4", + "Ecyrillic", "0404", + "Edblgrave", "0204", + "Edieresis", "00CB", + "Edot", "0116", + "Edotaccent", "0116", + "Edotbelow", "1EB8", + "Efcyrillic", "0424", + "Egrave", "00C8", + "Eharmenian", "0537", + "Ehookabove", "1EBA", + "Eightroman", "2167", + "Einvertedbreve", "0206", + "Eiotifiedcyrillic", "0464", + "Elcyrillic", "041B", + "Elevenroman", "216A", + "Emacron", "0112", + "Emacronacute", "1E16", + "Emacrongrave", "1E14", + "Emcyrillic", "041C", + "Emonospace", "FF25", + "Encyrillic", "041D", + "Endescendercyrillic", "04A2", + "Eng", "014A", + "Enghecyrillic", "04A4", + "Enhookcyrillic", "04C7", + "Eogonek", "0118", + "Eopen", "0190", + "Epsilon", "0395", + "Epsilontonos", "0388", + "Ercyrillic", "0420", + "Ereversed", "018E", + "Ereversedcyrillic", "042D", + "Escyrillic", "0421", + "Esdescendercyrillic", "04AA", + "Esh", "01A9", + "Eta", "0397", + "Etarmenian", "0538", + "Etatonos", "0389", + "Eth", "00D0", + "Etilde", "1EBC", + "Etildebelow", "1E1A", + "Euro", "20AC", + "Ezh", "01B7", + "Ezhcaron", "01EE", + "Ezhreversed", "01B8", + "F", "0046", + "Fcircle", "24BB", + "Fdotaccent", "1E1E", + "Feharmenian", "0556", + "Feicoptic", "03E4", + "Fhook", "0191", + "Fitacyrillic", "0472", + "Fiveroman", "2164", + "Fmonospace", "FF26", + "Fourroman", "2163", + "G", "0047", + "GBsquare", "3387", + "Gacute", "01F4", + "Gamma", "0393", + "Gammaafrican", "0194", + "Gangiacoptic", "03EA", + "Gbreve", "011E", + "Gcaron", "01E6", + "Gcedilla", "0122", + "Gcircle", "24BC", + "Gcircumflex", "011C", + "Gcommaaccent", "0122", + "Gdot", "0120", + "Gdotaccent", "0120", + "Gecyrillic", "0413", + "Ghadarmenian", "0542", + "Ghemiddlehookcyrillic", "0494", + "Ghestrokecyrillic", "0492", + "Gheupturncyrillic", "0490", + "Ghook", "0193", + "Gimarmenian", "0533", + "Gjecyrillic", "0403", + "Gmacron", "1E20", + "Gmonospace", "FF27", + "Gsmallhook", "029B", + "Gstroke", "01E4", + "H", "0048", + "H18533", "25CF", + "H18543", "25AA", + "H18551", "25AB", + "H22073", "25A1", + "HPsquare", "33CB", + "Haabkhasiancyrillic", "04A8", + "Hadescendercyrillic", "04B2", + "Hardsigncyrillic", "042A", + "Hbar", "0126", + "Hbrevebelow", "1E2A", + "Hcedilla", "1E28", + "Hcircle", "24BD", + "Hcircumflex", "0124", + "Hdieresis", "1E26", + "Hdotaccent", "1E22", + "Hdotbelow", "1E24", + "Hmonospace", "FF28", + "Hoarmenian", "0540", + "Horicoptic", "03E8", + "Hzsquare", "3390", + "I", "0049", + "IAcyrillic", "042F", + "IJ", "0132", + "IUcyrillic", "042E", + "Iacute", "00CD", + "Ibreve", "012C", + "Icaron", "01CF", + "Icircle", "24BE", + "Icircumflex", "00CE", + "Icyrillic", "0406", + "Idblgrave", "0208", + "Idieresis", "00CF", + "Idieresisacute", "1E2E", + "Idieresiscyrillic", "04E4", + "Idot", "0130", + "Idotaccent", "0130", + "Idotbelow", "1ECA", + "Iebrevecyrillic", "04D6", + "Iecyrillic", "0415", + "Ifraktur", "2111", + "Igrave", "00CC", + "Ihookabove", "1EC8", + "Iicyrillic", "0418", + "Iinvertedbreve", "020A", + "Iishortcyrillic", "0419", + "Imacron", "012A", + "Imacroncyrillic", "04E2", + "Imonospace", "FF29", + "Iniarmenian", "053B", + "Iocyrillic", "0401", + "Iogonek", "012E", + "Iota", "0399", + "Iotaafrican", "0196", + "Iotadieresis", "03AA", + "Iotatonos", "038A", + "Istroke", "0197", + "Itilde", "0128", + "Itildebelow", "1E2C", + "Izhitsacyrillic", "0474", + "Izhitsadblgravecyrillic", "0476", + "J", "004A", + "Jaarmenian", "0541", + "Jcircle", "24BF", + "Jcircumflex", "0134", + "Jecyrillic", "0408", + "Jheharmenian", "054B", + "Jmonospace", "FF2A", + "K", "004B", + "KBsquare", "3385", + "KKsquare", "33CD", + "Kabashkircyrillic", "04A0", + "Kacute", "1E30", + "Kacyrillic", "041A", + "Kadescendercyrillic", "049A", + "Kahookcyrillic", "04C3", + "Kappa", "039A", + "Kastrokecyrillic", "049E", + "Kaverticalstrokecyrillic", "049C", + "Kcaron", "01E8", + "Kcedilla", "0136", + "Kcircle", "24C0", + "Kcommaaccent", "0136", + "Kdotbelow", "1E32", + "Keharmenian", "0554", + "Kenarmenian", "053F", + "Khacyrillic", "0425", + "Kheicoptic", "03E6", + "Khook", "0198", + "Kjecyrillic", "040C", + "Klinebelow", "1E34", + "Kmonospace", "FF2B", + "Koppacyrillic", "0480", + "Koppagreek", "03DE", + "Ksicyrillic", "046E", + "L", "004C", + "LJ", "01C7", + "Lacute", "0139", + "Lambda", "039B", + "Lcaron", "013D", + "Lcedilla", "013B", + "Lcircle", "24C1", + "Lcircumflexbelow", "1E3C", + "Lcommaaccent", "013B", + "Ldot", "013F", + "Ldotaccent", "013F", + "Ldotbelow", "1E36", + "Ldotbelowmacron", "1E38", + "Liwnarmenian", "053C", + "Lj", "01C8", + "Ljecyrillic", "0409", + "Llinebelow", "1E3A", + "Lmonospace", "FF2C", + "Lslash", "0141", + "M", "004D", + "MBsquare", "3386", + "Macute", "1E3E", + "Mcircle", "24C2", + "Mdotaccent", "1E40", + "Mdotbelow", "1E42", + "Menarmenian", "0544", + "Mmonospace", "FF2D", + "Mturned", "019C", + "Mu", "039C", + "N", "004E", + "NJ", "01CA", + "Nacute", "0143", + "Ncaron", "0147", + "Ncedilla", "0145", + "Ncircle", "24C3", + "Ncircumflexbelow", "1E4A", + "Ncommaaccent", "0145", + "Ndotaccent", "1E44", + "Ndotbelow", "1E46", + "Nhookleft", "019D", + "Nineroman", "2168", + "Nj", "01CB", + "Njecyrillic", "040A", + "Nlinebelow", "1E48", + "Nmonospace", "FF2E", + "Nowarmenian", "0546", + "Ntilde", "00D1", + "Nu", "039D", + "O", "004F", + "OE", "0152", + "Oacute", "00D3", + "Obarredcyrillic", "04E8", + "Obarreddieresiscyrillic", "04EA", + "Obreve", "014E", + "Ocaron", "01D1", + "Ocenteredtilde", "019F", + "Ocircle", "24C4", + "Ocircumflex", "00D4", + "Ocircumflexacute", "1ED0", + "Ocircumflexdotbelow", "1ED8", + "Ocircumflexgrave", "1ED2", + "Ocircumflexhookabove", "1ED4", + "Ocircumflextilde", "1ED6", + "Ocyrillic", "041E", + "Odblacute", "0150", + "Odblgrave", "020C", + "Odieresis", "00D6", + "Odieresiscyrillic", "04E6", + "Odotbelow", "1ECC", + "Ograve", "00D2", + "Oharmenian", "0555", + "Ohm", "2126", + "Ohookabove", "1ECE", + "Ohorn", "01A0", + "Ohornacute", "1EDA", + "Ohorndotbelow", "1EE2", + "Ohorngrave", "1EDC", + "Ohornhookabove", "1EDE", + "Ohorntilde", "1EE0", + "Ohungarumlaut", "0150", + "Oi", "01A2", + "Oinvertedbreve", "020E", + "Omacron", "014C", + "Omacronacute", "1E52", + "Omacrongrave", "1E50", + "Omega", "03A9", # groff: not U+2126 + "Omegacyrillic", "0460", + "Omegagreek", "03A9", + "Omegaroundcyrillic", "047A", + "Omegatitlocyrillic", "047C", + "Omegatonos", "038F", + "Omicron", "039F", + "Omicrontonos", "038C", + "Omonospace", "FF2F", + "Oneroman", "2160", + "Oogonek", "01EA", + "Oogonekmacron", "01EC", + "Oopen", "0186", + "Oslash", "00D8", + "Oslashacute", "01FE", + "Ostrokeacute", "01FE", + "Otcyrillic", "047E", + "Otilde", "00D5", + "Otildeacute", "1E4C", + "Otildedieresis", "1E4E", + "P", "0050", + "Pacute", "1E54", + "Pcircle", "24C5", + "Pdotaccent", "1E56", + "Pecyrillic", "041F", + "Peharmenian", "054A", + "Pemiddlehookcyrillic", "04A6", + "Phi", "03A6", + "Phook", "01A4", + "Pi", "03A0", + "Piwrarmenian", "0553", + "Pmonospace", "FF30", + "Psi", "03A8", + "Psicyrillic", "0470", + "Q", "0051", + "Qcircle", "24C6", + "Qmonospace", "FF31", + "R", "0052", + "Raarmenian", "054C", + "Racute", "0154", + "Rcaron", "0158", + "Rcedilla", "0156", + "Rcircle", "24C7", + "Rcommaaccent", "0156", + "Rdblgrave", "0210", + "Rdotaccent", "1E58", + "Rdotbelow", "1E5A", + "Rdotbelowmacron", "1E5C", + "Reharmenian", "0550", + "Rfraktur", "211C", + "Rho", "03A1", + "Rinvertedbreve", "0212", + "Rlinebelow", "1E5E", + "Rmonospace", "FF32", + "Rsmallinverted", "0281", + "Rsmallinvertedsuperior", "02B6", + "S", "0053", + "SF010000", "250C", + "SF020000", "2514", + "SF030000", "2510", + "SF040000", "2518", + "SF050000", "253C", + "SF060000", "252C", + "SF070000", "2534", + "SF080000", "251C", + "SF090000", "2524", + "SF100000", "2500", + "SF110000", "2502", + "SF190000", "2561", + "SF200000", "2562", + "SF210000", "2556", + "SF220000", "2555", + "SF230000", "2563", + "SF240000", "2551", + "SF250000", "2557", + "SF260000", "255D", + "SF270000", "255C", + "SF280000", "255B", + "SF360000", "255E", + "SF370000", "255F", + "SF380000", "255A", + "SF390000", "2554", + "SF400000", "2569", + "SF410000", "2566", + "SF420000", "2560", + "SF430000", "2550", + "SF440000", "256C", + "SF450000", "2567", + "SF460000", "2568", + "SF470000", "2564", + "SF480000", "2565", + "SF490000", "2559", + "SF500000", "2558", + "SF510000", "2552", + "SF520000", "2553", + "SF530000", "256B", + "SF540000", "256A", + "Sacute", "015A", + "Sacutedotaccent", "1E64", + "Sampigreek", "03E0", + "Scaron", "0160", + "Scarondotaccent", "1E66", + "Scedilla", "015E", + "Schwa", "018F", + "Schwacyrillic", "04D8", + "Schwadieresiscyrillic", "04DA", + "Scircle", "24C8", + "Scircumflex", "015C", + "Scommaaccent", "0218", + "Sdotaccent", "1E60", + "Sdotbelow", "1E62", + "Sdotbelowdotaccent", "1E68", + "Seharmenian", "054D", + "Sevenroman", "2166", + "Shaarmenian", "0547", + "Shacyrillic", "0428", + "Shchacyrillic", "0429", + "Sheicoptic", "03E2", + "Shhacyrillic", "04BA", + "Shimacoptic", "03EC", + "Sigma", "03A3", + "Sixroman", "2165", + "Smonospace", "FF33", + "Softsigncyrillic", "042C", + "Stigmagreek", "03DA", + "T", "0054", + "Tau", "03A4", + "Tbar", "0166", + "Tcaron", "0164", + "Tcedilla", "0162", + "Tcircle", "24C9", + "Tcircumflexbelow", "1E70", + "Tcommaaccent", "0162", + "Tdotaccent", "1E6A", + "Tdotbelow", "1E6C", + "Tecyrillic", "0422", + "Tedescendercyrillic", "04AC", + "Tenroman", "2169", + "Tetsecyrillic", "04B4", + "Theta", "0398", + "Thook", "01AC", + "Thorn", "00DE", + "Threeroman", "2162", + "Tiwnarmenian", "054F", + "Tlinebelow", "1E6E", + "Tmonospace", "FF34", + "Toarmenian", "0539", + "Tonefive", "01BC", + "Tonesix", "0184", + "Tonetwo", "01A7", + "Tretroflexhook", "01AE", + "Tsecyrillic", "0426", + "Tshecyrillic", "040B", + "Twelveroman", "216B", + "Tworoman", "2161", + "U", "0055", + "Uacute", "00DA", + "Ubreve", "016C", + "Ucaron", "01D3", + "Ucircle", "24CA", + "Ucircumflex", "00DB", + "Ucircumflexbelow", "1E76", + "Ucyrillic", "0423", + "Udblacute", "0170", + "Udblgrave", "0214", + "Udieresis", "00DC", + "Udieresisacute", "01D7", + "Udieresisbelow", "1E72", + "Udieresiscaron", "01D9", + "Udieresiscyrillic", "04F0", + "Udieresisgrave", "01DB", + "Udieresismacron", "01D5", + "Udotbelow", "1EE4", + "Ugrave", "00D9", + "Uhookabove", "1EE6", + "Uhorn", "01AF", + "Uhornacute", "1EE8", + "Uhorndotbelow", "1EF0", + "Uhorngrave", "1EEA", + "Uhornhookabove", "1EEC", + "Uhorntilde", "1EEE", + "Uhungarumlaut", "0170", + "Uhungarumlautcyrillic", "04F2", + "Uinvertedbreve", "0216", + "Ukcyrillic", "0478", + "Umacron", "016A", + "Umacroncyrillic", "04EE", + "Umacrondieresis", "1E7A", + "Umonospace", "FF35", + "Uogonek", "0172", + "Upsilon", "03A5", + "Upsilon1", "03D2", + "Upsilonacutehooksymbolgreek", "03D3", + "Upsilonafrican", "01B1", + "Upsilondieresis", "03AB", + "Upsilondieresishooksymbolgreek", "03D4", + "Upsilonhooksymbol", "03D2", + "Upsilontonos", "038E", + "Uring", "016E", + "Ushortcyrillic", "040E", + "Ustraightcyrillic", "04AE", + "Ustraightstrokecyrillic", "04B0", + "Utilde", "0168", + "Utildeacute", "1E78", + "Utildebelow", "1E74", + "V", "0056", + "Vcircle", "24CB", + "Vdotbelow", "1E7E", + "Vecyrillic", "0412", + "Vewarmenian", "054E", + "Vhook", "01B2", + "Vmonospace", "FF36", + "Voarmenian", "0548", + "Vtilde", "1E7C", + "W", "0057", + "Wacute", "1E82", + "Wcircle", "24CC", + "Wcircumflex", "0174", + "Wdieresis", "1E84", + "Wdotaccent", "1E86", + "Wdotbelow", "1E88", + "Wgrave", "1E80", + "Wmonospace", "FF37", + "X", "0058", + "Xcircle", "24CD", + "Xdieresis", "1E8C", + "Xdotaccent", "1E8A", + "Xeharmenian", "053D", + "Xi", "039E", + "Xmonospace", "FF38", + "Y", "0059", + "Yacute", "00DD", + "Yatcyrillic", "0462", + "Ycircle", "24CE", + "Ycircumflex", "0176", + "Ydieresis", "0178", + "Ydotaccent", "1E8E", + "Ydotbelow", "1EF4", + "Yericyrillic", "042B", + "Yerudieresiscyrillic", "04F8", + "Ygrave", "1EF2", + "Yhook", "01B3", + "Yhookabove", "1EF6", + "Yiarmenian", "0545", + "Yicyrillic", "0407", + "Yiwnarmenian", "0552", + "Ymonospace", "FF39", + "Ytilde", "1EF8", + "Yusbigcyrillic", "046A", + "Yusbigiotifiedcyrillic", "046C", + "Yuslittlecyrillic", "0466", + "Yuslittleiotifiedcyrillic", "0468", + "Z", "005A", + "Zaarmenian", "0536", + "Zacute", "0179", + "Zcaron", "017D", + "Zcircle", "24CF", + "Zcircumflex", "1E90", + "Zdot", "017B", + "Zdotaccent", "017B", + "Zdotbelow", "1E92", + "Zecyrillic", "0417", + "Zedescendercyrillic", "0498", + "Zedieresiscyrillic", "04DE", + "Zeta", "0396", + "Zhearmenian", "053A", + "Zhebrevecyrillic", "04C1", + "Zhecyrillic", "0416", + "Zhedescendercyrillic", "0496", + "Zhedieresiscyrillic", "04DC", + "Zlinebelow", "1E94", + "Zmonospace", "FF3A", + "Zstroke", "01B5", + "a", "0061", + "aabengali", "0986", + "aacute", "00E1", + "aadeva", "0906", + "aagujarati", "0A86", + "aagurmukhi", "0A06", + "aamatragurmukhi", "0A3E", + "aarusquare", "3303", + "aavowelsignbengali", "09BE", + "aavowelsigndeva", "093E", + "aavowelsigngujarati", "0ABE", + "abbreviationmarkarmenian", "055F", + "abbreviationsigndeva", "0970", + "abengali", "0985", + "abopomofo", "311A", + "abreve", "0103", + "abreveacute", "1EAF", + "abrevecyrillic", "04D1", + "abrevedotbelow", "1EB7", + "abrevegrave", "1EB1", + "abrevehookabove", "1EB3", + "abrevetilde", "1EB5", + "acaron", "01CE", + "acircle", "24D0", + "acircumflex", "00E2", + "acircumflexacute", "1EA5", + "acircumflexdotbelow", "1EAD", + "acircumflexgrave", "1EA7", + "acircumflexhookabove", "1EA9", + "acircumflextilde", "1EAB", + "acute", "00B4", + "acutebelowcmb", "0317", + "acutecmb", "0301", + "acutecomb", "0301", + "acutedeva", "0954", + "acutelowmod", "02CF", + "acutetonecmb", "0341", + "acyrillic", "0430", + "adblgrave", "0201", + "addakgurmukhi", "0A71", + "adeva", "0905", + "adieresis", "00E4", + "adieresiscyrillic", "04D3", + "adieresismacron", "01DF", + "adotbelow", "1EA1", + "adotmacron", "01E1", + "ae", "00E6", + "aeacute", "01FD", + "aekorean", "3150", + "aemacron", "01E3", + "afii00208", "2015", + "afii08941", "20A4", + "afii10017", "0410", + "afii10018", "0411", + "afii10019", "0412", + "afii10020", "0413", + "afii10021", "0414", + "afii10022", "0415", + "afii10023", "0401", + "afii10024", "0416", + "afii10025", "0417", + "afii10026", "0418", + "afii10027", "0419", + "afii10028", "041A", + "afii10029", "041B", + "afii10030", "041C", + "afii10031", "041D", + "afii10032", "041E", + "afii10033", "041F", + "afii10034", "0420", + "afii10035", "0421", + "afii10036", "0422", + "afii10037", "0423", + "afii10038", "0424", + "afii10039", "0425", + "afii10040", "0426", + "afii10041", "0427", + "afii10042", "0428", + "afii10043", "0429", + "afii10044", "042A", + "afii10045", "042B", + "afii10046", "042C", + "afii10047", "042D", + "afii10048", "042E", + "afii10049", "042F", + "afii10050", "0490", + "afii10051", "0402", + "afii10052", "0403", + "afii10053", "0404", + "afii10054", "0405", + "afii10055", "0406", + "afii10056", "0407", + "afii10057", "0408", + "afii10058", "0409", + "afii10059", "040A", + "afii10060", "040B", + "afii10061", "040C", + "afii10062", "040E", + "afii10065", "0430", + "afii10066", "0431", + "afii10067", "0432", + "afii10068", "0433", + "afii10069", "0434", + "afii10070", "0435", + "afii10071", "0451", + "afii10072", "0436", + "afii10073", "0437", + "afii10074", "0438", + "afii10075", "0439", + "afii10076", "043A", + "afii10077", "043B", + "afii10078", "043C", + "afii10079", "043D", + "afii10080", "043E", + "afii10081", "043F", + "afii10082", "0440", + "afii10083", "0441", + "afii10084", "0442", + "afii10085", "0443", + "afii10086", "0444", + "afii10087", "0445", + "afii10088", "0446", + "afii10089", "0447", + "afii10090", "0448", + "afii10091", "0449", + "afii10092", "044A", + "afii10093", "044B", + "afii10094", "044C", + "afii10095", "044D", + "afii10096", "044E", + "afii10097", "044F", + "afii10098", "0491", + "afii10099", "0452", + "afii10100", "0453", + "afii10101", "0454", + "afii10102", "0455", + "afii10103", "0456", + "afii10104", "0457", + "afii10105", "0458", + "afii10106", "0459", + "afii10107", "045A", + "afii10108", "045B", + "afii10109", "045C", + "afii10110", "045E", + "afii10145", "040F", + "afii10146", "0462", + "afii10147", "0472", + "afii10148", "0474", + "afii10193", "045F", + "afii10194", "0463", + "afii10195", "0473", + "afii10196", "0475", + "afii10846", "04D9", + "afii299", "200E", + "afii300", "200F", + "afii301", "200D", + "afii57381", "066A", + "afii57388", "060C", + "afii57392", "0660", + "afii57393", "0661", + "afii57394", "0662", + "afii57395", "0663", + "afii57396", "0664", + "afii57397", "0665", + "afii57398", "0666", + "afii57399", "0667", + "afii57400", "0668", + "afii57401", "0669", + "afii57403", "061B", + "afii57407", "061F", + "afii57409", "0621", + "afii57410", "0622", + "afii57411", "0623", + "afii57412", "0624", + "afii57413", "0625", + "afii57414", "0626", + "afii57415", "0627", + "afii57416", "0628", + "afii57417", "0629", + "afii57418", "062A", + "afii57419", "062B", + "afii57420", "062C", + "afii57421", "062D", + "afii57422", "062E", + "afii57423", "062F", + "afii57424", "0630", + "afii57425", "0631", + "afii57426", "0632", + "afii57427", "0633", + "afii57428", "0634", + "afii57429", "0635", + "afii57430", "0636", + "afii57431", "0637", + "afii57432", "0638", + "afii57433", "0639", + "afii57434", "063A", + "afii57440", "0640", + "afii57441", "0641", + "afii57442", "0642", + "afii57443", "0643", + "afii57444", "0644", + "afii57445", "0645", + "afii57446", "0646", + "afii57448", "0648", + "afii57449", "0649", + "afii57450", "064A", + "afii57451", "064B", + "afii57452", "064C", + "afii57453", "064D", + "afii57454", "064E", + "afii57455", "064F", + "afii57456", "0650", + "afii57457", "0651", + "afii57458", "0652", + "afii57470", "0647", + "afii57505", "06A4", + "afii57506", "067E", + "afii57507", "0686", + "afii57508", "0698", + "afii57509", "06AF", + "afii57511", "0679", + "afii57512", "0688", + "afii57513", "0691", + "afii57514", "06BA", + "afii57519", "06D2", + "afii57534", "06D5", + "afii57636", "20AA", + "afii57645", "05BE", + "afii57658", "05C3", + "afii57664", "05D0", + "afii57665", "05D1", + "afii57666", "05D2", + "afii57667", "05D3", + "afii57668", "05D4", + "afii57669", "05D5", + "afii57670", "05D6", + "afii57671", "05D7", + "afii57672", "05D8", + "afii57673", "05D9", + "afii57674", "05DA", + "afii57675", "05DB", + "afii57676", "05DC", + "afii57677", "05DD", + "afii57678", "05DE", + "afii57679", "05DF", + "afii57680", "05E0", + "afii57681", "05E1", + "afii57682", "05E2", + "afii57683", "05E3", + "afii57684", "05E4", + "afii57685", "05E5", + "afii57686", "05E6", + "afii57687", "05E7", + "afii57688", "05E8", + "afii57689", "05E9", + "afii57690", "05EA", + "afii57694", "FB2A", + "afii57695", "FB2B", + "afii57700", "FB4B", + "afii57705", "FB1F", + "afii57716", "05F0", + "afii57717", "05F1", + "afii57718", "05F2", + "afii57723", "FB35", + "afii57793", "05B4", + "afii57794", "05B5", + "afii57795", "05B6", + "afii57796", "05BB", + "afii57797", "05B8", + "afii57798", "05B7", + "afii57799", "05B0", + "afii57800", "05B2", + "afii57801", "05B1", + "afii57802", "05B3", + "afii57803", "05C2", + "afii57804", "05C1", + "afii57806", "05B9", + "afii57807", "05BC", + "afii57839", "05BD", + "afii57841", "05BF", + "afii57842", "05C0", + "afii57929", "02BC", + "afii61248", "2105", + "afii61289", "2113", + "afii61352", "2116", + "afii61573", "202C", + "afii61574", "202D", + "afii61575", "202E", + "afii61664", "200C", + "afii63167", "066D", + "afii64937", "02BD", + "agrave", "00E0", + "agujarati", "0A85", + "agurmukhi", "0A05", + "ahiragana", "3042", + "ahookabove", "1EA3", + "aibengali", "0990", + "aibopomofo", "311E", + "aideva", "0910", + "aiecyrillic", "04D5", + "aigujarati", "0A90", + "aigurmukhi", "0A10", + "aimatragurmukhi", "0A48", + "ainarabic", "0639", + "ainfinalarabic", "FECA", + "aininitialarabic", "FECB", + "ainmedialarabic", "FECC", + "ainvertedbreve", "0203", + "aivowelsignbengali", "09C8", + "aivowelsigndeva", "0948", + "aivowelsigngujarati", "0AC8", + "akatakana", "30A2", + "akatakanahalfwidth", "FF71", + "akorean", "314F", + "alef", "05D0", + "alefarabic", "0627", + "alefdageshhebrew", "FB30", + "aleffinalarabic", "FE8E", + "alefhamzaabovearabic", "0623", + "alefhamzaabovefinalarabic", "FE84", + "alefhamzabelowarabic", "0625", + "alefhamzabelowfinalarabic", "FE88", + "alefhebrew", "05D0", + "aleflamedhebrew", "FB4F", + "alefmaddaabovearabic", "0622", + "alefmaddaabovefinalarabic", "FE82", + "alefmaksuraarabic", "0649", + "alefmaksurafinalarabic", "FEF0", + "alefmaksurainitialarabic", "FEF3", + "alefmaksuramedialarabic", "FEF4", + "alefpatahhebrew", "FB2E", + "alefqamatshebrew", "FB2F", + "aleph", "2135", + "allequal", "224C", + "alpha", "03B1", + "alphatonos", "03AC", + "amacron", "0101", + "amonospace", "FF41", + "ampersand", "0026", + "ampersandmonospace", "FF06", + "amsquare", "33C2", + "anbopomofo", "3122", + "angbopomofo", "3124", + "angkhankhuthai", "0E5A", + "angle", "2220", + "anglebracketleft", "3008", + "anglebracketleftvertical", "FE3F", + "anglebracketright", "3009", + "anglebracketrightvertical", "FE40", + "angleleft", "2329", + "angleright", "232A", + "angstrom", "212B", + "anoteleia", "0387", + "anudattadeva", "0952", + "anusvarabengali", "0982", + "anusvaradeva", "0902", + "anusvaragujarati", "0A82", + "aogonek", "0105", + "apaatosquare", "3300", + "aparen", "249C", + "apostrophearmenian", "055A", + "apostrophemod", "02BC", + "approaches", "2250", + "approxequal", "2248", + "approxequalorimage", "2252", + "approximatelyequal", "2245", + "araeaekorean", "318E", + "araeakorean", "318D", + "arc", "2312", + "arighthalfring", "1E9A", + "aring", "00E5", + "aringacute", "01FB", + "aringbelow", "1E01", + "arrowboth", "2194", + "arrowdashdown", "21E3", + "arrowdashleft", "21E0", + "arrowdashright", "21E2", + "arrowdashup", "21E1", + "arrowdblboth", "21D4", + "arrowdbldown", "21D3", + "arrowdblleft", "21D0", + "arrowdblright", "21D2", + "arrowdblup", "21D1", + "arrowdown", "2193", + "arrowdownleft", "2199", + "arrowdownright", "2198", + "arrowdownwhite", "21E9", + "arrowheaddownmod", "02C5", + "arrowheadleftmod", "02C2", + "arrowheadrightmod", "02C3", + "arrowheadupmod", "02C4", + "arrowleft", "2190", + "arrowleftdbl", "21D0", + "arrowleftdblstroke", "21CD", + "arrowleftoverright", "21C6", + "arrowleftwhite", "21E6", + "arrowright", "2192", + "arrowrightdblstroke", "21CF", + "arrowrightheavy", "279E", + "arrowrightoverleft", "21C4", + "arrowrightwhite", "21E8", + "arrowtableft", "21E4", + "arrowtabright", "21E5", + "arrowup", "2191", + "arrowupdn", "2195", + "arrowupdnbse", "21A8", + "arrowupdownbase", "21A8", + "arrowupleft", "2196", + "arrowupleftofdown", "21C5", + "arrowupright", "2197", + "arrowupwhite", "21E7", + "asciicircum", "005E", + "asciicircummonospace", "FF3E", + "asciitilde", "007E", + "asciitildemonospace", "FF5E", + "ascript", "0251", + "ascriptturned", "0252", + "asmallhiragana", "3041", + "asmallkatakana", "30A1", + "asmallkatakanahalfwidth", "FF67", + "asterisk", "002A", + "asteriskaltonearabic", "066D", + "asteriskarabic", "066D", + "asteriskmath", "2217", + "asteriskmonospace", "FF0A", + "asterisksmall", "FE61", + "asterism", "2042", + "asymptoticallyequal", "2243", + "at", "0040", + "atilde", "00E3", + "atmonospace", "FF20", + "atsmall", "FE6B", + "aturned", "0250", + "aubengali", "0994", + "aubopomofo", "3120", + "audeva", "0914", + "augujarati", "0A94", + "augurmukhi", "0A14", + "aulengthmarkbengali", "09D7", + "aumatragurmukhi", "0A4C", + "auvowelsignbengali", "09CC", + "auvowelsigndeva", "094C", + "auvowelsigngujarati", "0ACC", + "avagrahadeva", "093D", + "aybarmenian", "0561", + "ayin", "05E2", + "ayinaltonehebrew", "FB20", + "ayinhebrew", "05E2", + "b", "0062", + "babengali", "09AC", + "backslash", "005C", + "backslashmonospace", "FF3C", + "badeva", "092C", + "bagujarati", "0AAC", + "bagurmukhi", "0A2C", + "bahiragana", "3070", + "bahtthai", "0E3F", + "bakatakana", "30D0", + "bar", "007C", + "barmonospace", "FF5C", + "bbopomofo", "3105", + "bcircle", "24D1", + "bdotaccent", "1E03", + "bdotbelow", "1E05", + "beamedsixteenthnotes", "266C", + "because", "2235", + "becyrillic", "0431", + "beharabic", "0628", + "behfinalarabic", "FE90", + "behinitialarabic", "FE91", + "behiragana", "3079", + "behmedialarabic", "FE92", + "behmeeminitialarabic", "FC9F", + "behmeemisolatedarabic", "FC08", + "behnoonfinalarabic", "FC6D", + "bekatakana", "30D9", + "benarmenian", "0562", + "bet", "05D1", + "beta", "03B2", + "betasymbolgreek", "03D0", + "betdagesh", "FB31", + "betdageshhebrew", "FB31", + "bethebrew", "05D1", + "betrafehebrew", "FB4C", + "bhabengali", "09AD", + "bhadeva", "092D", + "bhagujarati", "0AAD", + "bhagurmukhi", "0A2D", + "bhook", "0253", + "bihiragana", "3073", + "bikatakana", "30D3", + "bilabialclick", "0298", + "bindigurmukhi", "0A02", + "birusquare", "3331", + "blackcircle", "25CF", + "blackdiamond", "25C6", + "blackdownpointingtriangle", "25BC", + "blackleftpointingpointer", "25C4", + "blackleftpointingtriangle", "25C0", + "blacklenticularbracketleft", "3010", + "blacklenticularbracketleftvertical", "FE3B", + "blacklenticularbracketright", "3011", + "blacklenticularbracketrightvertical", "FE3C", + "blacklowerlefttriangle", "25E3", + "blacklowerrighttriangle", "25E2", + "blackrectangle", "25AC", + "blackrightpointingpointer", "25BA", + "blackrightpointingtriangle", "25B6", + "blacksmallsquare", "25AA", + "blacksmilingface", "263B", + "blacksquare", "25A0", + "blackstar", "2605", + "blackupperlefttriangle", "25E4", + "blackupperrighttriangle", "25E5", + "blackuppointingsmalltriangle", "25B4", + "blackuppointingtriangle", "25B2", + "blank", "2423", + "blinebelow", "1E07", + "block", "2588", + "bmonospace", "FF42", + "bobaimaithai", "0E1A", + "bohiragana", "307C", + "bokatakana", "30DC", + "bparen", "249D", + "bqsquare", "33C3", + "braceleft", "007B", + "braceleftmonospace", "FF5B", + "braceleftsmall", "FE5B", + "braceleftvertical", "FE37", + "braceright", "007D", + "bracerightmonospace", "FF5D", + "bracerightsmall", "FE5C", + "bracerightvertical", "FE38", + "bracketleft", "005B", + "bracketleftmonospace", "FF3B", + "bracketright", "005D", + "bracketrightmonospace", "FF3D", + "breve", "02D8", + "brevebelowcmb", "032E", + "brevecmb", "0306", + "breveinvertedbelowcmb", "032F", + "breveinvertedcmb", "0311", + "breveinverteddoublecmb", "0361", + "bridgebelowcmb", "032A", + "bridgeinvertedbelowcmb", "033A", + "brokenbar", "00A6", + "bstroke", "0180", + "btopbar", "0183", + "buhiragana", "3076", + "bukatakana", "30D6", + "bullet", "2022", + "bulletinverse", "25D8", + "bulletoperator", "2219", + "bullseye", "25CE", + "c", "0063", + "caarmenian", "056E", + "cabengali", "099A", + "cacute", "0107", + "cadeva", "091A", + "cagujarati", "0A9A", + "cagurmukhi", "0A1A", + "calsquare", "3388", + "candrabindubengali", "0981", + "candrabinducmb", "0310", + "candrabindudeva", "0901", + "candrabindugujarati", "0A81", + "capslock", "21EA", + "careof", "2105", + "caron", "02C7", + "caronbelowcmb", "032C", + "caroncmb", "030C", + "carriagereturn", "21B5", + "cbopomofo", "3118", + "ccaron", "010D", + "ccedilla", "00E7", + "ccedillaacute", "1E09", + "ccircle", "24D2", + "ccircumflex", "0109", + "ccurl", "0255", + "cdot", "010B", + "cdotaccent", "010B", + "cdsquare", "33C5", + "cedilla", "00B8", + "cedillacmb", "0327", + "cent", "00A2", + "centigrade", "2103", + "centmonospace", "FFE0", + "chaarmenian", "0579", + "chabengali", "099B", + "chadeva", "091B", + "chagujarati", "0A9B", + "chagurmukhi", "0A1B", + "chbopomofo", "3114", + "cheabkhasiancyrillic", "04BD", + "checkmark", "2713", + "checyrillic", "0447", + "chedescenderabkhasiancyrillic", "04BF", + "chedescendercyrillic", "04B7", + "chedieresiscyrillic", "04F5", + "cheharmenian", "0573", + "chekhakassiancyrillic", "04CC", + "cheverticalstrokecyrillic", "04B9", + "chi", "03C7", + "chieuchacirclekorean", "3277", + "chieuchaparenkorean", "3217", + "chieuchcirclekorean", "3269", + "chieuchkorean", "314A", + "chieuchparenkorean", "3209", + "chochangthai", "0E0A", + "chochanthai", "0E08", + "chochingthai", "0E09", + "chochoethai", "0E0C", + "chook", "0188", + "cieucacirclekorean", "3276", + "cieucaparenkorean", "3216", + "cieuccirclekorean", "3268", + "cieuckorean", "3148", + "cieucparenkorean", "3208", + "cieucuparenkorean", "321C", + "circle", "25CB", + "circlemultiply", "2297", + "circleot", "2299", + "circleplus", "2295", + "circlepostalmark", "3036", + "circlewithlefthalfblack", "25D0", + "circlewithrighthalfblack", "25D1", + "circumflex", "02C6", + "circumflexbelowcmb", "032D", + "circumflexcmb", "0302", + "clear", "2327", + "clickalveolar", "01C2", + "clickdental", "01C0", + "clicklateral", "01C1", + "clickretroflex", "01C3", + "club", "2663", + "clubsuitblack", "2663", + "clubsuitwhite", "2667", + "cmcubedsquare", "33A4", + "cmonospace", "FF43", + "cmsquaredsquare", "33A0", + "coarmenian", "0581", + "colon", "003A", + "colonmonetary", "20A1", + "colonmonospace", "FF1A", + "colonsign", "20A1", + "colonsmall", "FE55", + "colontriangularhalfmod", "02D1", + "colontriangularmod", "02D0", + "comma", "002C", + "commaabovecmb", "0313", + "commaaboverightcmb", "0315", + "commaarabic", "060C", + "commaarmenian", "055D", + "commamonospace", "FF0C", + "commareversedabovecmb", "0314", + "commareversedmod", "02BD", + "commasmall", "FE50", + "commaturnedabovecmb", "0312", + "commaturnedmod", "02BB", + "compass", "263C", + "congruent", "2245", + "contourintegral", "222E", + "control", "2303", + "controlACK", "0006", + "controlBEL", "0007", + "controlBS", "0008", + "controlCAN", "0018", + "controlCR", "000D", + "controlDC1", "0011", + "controlDC2", "0012", + "controlDC3", "0013", + "controlDC4", "0014", + "controlDEL", "007F", + "controlDLE", "0010", + "controlEM", "0019", + "controlENQ", "0005", + "controlEOT", "0004", + "controlESC", "001B", + "controlETB", "0017", + "controlETX", "0003", + "controlFF", "000C", + "controlFS", "001C", + "controlGS", "001D", + "controlHT", "0009", + "controlLF", "000A", + "controlNAK", "0015", + "controlRS", "001E", + "controlSI", "000F", + "controlSO", "000E", + "controlSOT", "0002", + "controlSTX", "0001", + "controlSUB", "001A", + "controlSYN", "0016", + "controlUS", "001F", + "controlVT", "000B", + "copyright", "00A9", + "cornerbracketleft", "300C", + "cornerbracketlefthalfwidth", "FF62", + "cornerbracketleftvertical", "FE41", + "cornerbracketright", "300D", + "cornerbracketrighthalfwidth", "FF63", + "cornerbracketrightvertical", "FE42", + "corporationsquare", "337F", + "cosquare", "33C7", + "coverkgsquare", "33C6", + "cparen", "249E", + "cruzeiro", "20A2", + "cstretched", "0297", + "curlyand", "22CF", + "curlyor", "22CE", + "currency", "00A4", + "d", "0064", + "daarmenian", "0564", + "dabengali", "09A6", + "dadarabic", "0636", + "dadeva", "0926", + "dadfinalarabic", "FEBE", + "dadinitialarabic", "FEBF", + "dadmedialarabic", "FEC0", + "dagesh", "05BC", + "dageshhebrew", "05BC", + "dagger", "2020", + "daggerdbl", "2021", + "dagujarati", "0AA6", + "dagurmukhi", "0A26", + "dahiragana", "3060", + "dakatakana", "30C0", + "dalarabic", "062F", + "dalet", "05D3", + "daletdagesh", "FB33", + "daletdageshhebrew", "FB33", + "dalethatafpatah", "05D3_05B2", + "dalethatafpatahhebrew", "05D3_05B2", + "dalethatafsegol", "05D3_05B1", + "dalethatafsegolhebrew", "05D3_05B1", + "dalethebrew", "05D3", + "dalethiriq", "05D3_05B4", + "dalethiriqhebrew", "05D3_05B4", + "daletholam", "05D3_05B9", + "daletholamhebrew", "05D3_05B9", + "daletpatah", "05D3_05B7", + "daletpatahhebrew", "05D3_05B7", + "daletqamats", "05D3_05B8", + "daletqamatshebrew", "05D3_05B8", + "daletqubuts", "05D3_05BB", + "daletqubutshebrew", "05D3_05BB", + "daletsegol", "05D3_05B6", + "daletsegolhebrew", "05D3_05B6", + "daletsheva", "05D3_05B0", + "daletshevahebrew", "05D3_05B0", + "dalettsere", "05D3_05B5", + "dalettserehebrew", "05D3_05B5", + "dalfinalarabic", "FEAA", + "dammaarabic", "064F", + "dammalowarabic", "064F", + "dammatanaltonearabic", "064C", + "dammatanarabic", "064C", + "danda", "0964", + "dargahebrew", "05A7", + "dargalefthebrew", "05A7", + "dasiapneumatacyrilliccmb", "0485", + "dblanglebracketleft", "300A", + "dblanglebracketleftvertical", "FE3D", + "dblanglebracketright", "300B", + "dblanglebracketrightvertical", "FE3E", + "dblarchinvertedbelowcmb", "032B", + "dblarrowleft", "21D4", + "dblarrowright", "21D2", + "dbldanda", "0965", + "dblgravecmb", "030F", + "dblintegral", "222C", + "dbllowline", "2017", + "dbllowlinecmb", "0333", + "dbloverlinecmb", "033F", + "dblprimemod", "02BA", + "dblverticalbar", "2016", + "dblverticallineabovecmb", "030E", + "dbopomofo", "3109", + "dbsquare", "33C8", + "dcaron", "010F", + "dcedilla", "1E11", + "dcircle", "24D3", + "dcircumflexbelow", "1E13", + "dcroat", "0111", + "ddabengali", "09A1", + "ddadeva", "0921", + "ddagujarati", "0AA1", + "ddagurmukhi", "0A21", + "ddalarabic", "0688", + "ddalfinalarabic", "FB89", + "dddhadeva", "095C", + "ddhabengali", "09A2", + "ddhadeva", "0922", + "ddhagujarati", "0AA2", + "ddhagurmukhi", "0A22", + "ddotaccent", "1E0B", + "ddotbelow", "1E0D", + "decimalseparatorarabic", "066B", + "decimalseparatorpersian", "066B", + "decyrillic", "0434", + "degree", "00B0", + "dehihebrew", "05AD", + "dehiragana", "3067", + "deicoptic", "03EF", + "dekatakana", "30C7", + "deleteleft", "232B", + "deleteright", "2326", + "delta", "03B4", + "deltaturned", "018D", + "denominatorminusonenumeratorbengali", "09F8", + "dezh", "02A4", + "dhabengali", "09A7", + "dhadeva", "0927", + "dhagujarati", "0AA7", + "dhagurmukhi", "0A27", + "dhook", "0257", + "dialytikatonos", "0385", + "dialytikatonoscmb", "0344", + "diamond", "2666", + "diamondsuitwhite", "2662", + "dieresis", "00A8", + "dieresisbelowcmb", "0324", + "dieresiscmb", "0308", + "dieresistonos", "0385", + "dihiragana", "3062", + "dikatakana", "30C2", + "dittomark", "3003", + "divide", "00F7", + "divides", "2223", + "divisionslash", "2215", + "djecyrillic", "0452", + "dkshade", "2593", + "dlinebelow", "1E0F", + "dlsquare", "3397", + "dmacron", "0111", + "dmonospace", "FF44", + "dnblock", "2584", + "dochadathai", "0E0E", + "dodekthai", "0E14", + "dohiragana", "3069", + "dokatakana", "30C9", + "dollar", "0024", + "dollarmonospace", "FF04", + "dollarsmall", "FE69", + "dong", "20AB", + "dorusquare", "3326", + "dotaccent", "02D9", + "dotaccentcmb", "0307", + "dotbelowcmb", "0323", + "dotbelowcomb", "0323", + "dotkatakana", "30FB", + "dotlessi", "0131", + "dotlessjstrokehook", "0284", + "dotmath", "22C5", + "dottedcircle", "25CC", + "doubleyodpatah", "FB1F", + "doubleyodpatahhebrew", "FB1F", + "downtackbelowcmb", "031E", + "downtackmod", "02D5", + "dparen", "249F", + "dtail", "0256", + "dtopbar", "018C", + "duhiragana", "3065", + "dukatakana", "30C5", + "dz", "01F3", + "dzaltone", "02A3", + "dzcaron", "01C6", + "dzcurl", "02A5", + "dzeabkhasiancyrillic", "04E1", + "dzecyrillic", "0455", + "dzhecyrillic", "045F", + "e", "0065", + "eacute", "00E9", + "earth", "2641", + "ebengali", "098F", + "ebopomofo", "311C", + "ebreve", "0115", + "ecandradeva", "090D", + "ecandragujarati", "0A8D", + "ecandravowelsigndeva", "0945", + "ecandravowelsigngujarati", "0AC5", + "ecaron", "011B", + "ecedillabreve", "1E1D", + "echarmenian", "0565", + "echyiwnarmenian", "0587", + "ecircle", "24D4", + "ecircumflex", "00EA", + "ecircumflexacute", "1EBF", + "ecircumflexbelow", "1E19", + "ecircumflexdotbelow", "1EC7", + "ecircumflexgrave", "1EC1", + "ecircumflexhookabove", "1EC3", + "ecircumflextilde", "1EC5", + "ecyrillic", "0454", + "edblgrave", "0205", + "edeva", "090F", + "edieresis", "00EB", + "edot", "0117", + "edotaccent", "0117", + "edotbelow", "1EB9", + "eegurmukhi", "0A0F", + "eematragurmukhi", "0A47", + "efcyrillic", "0444", + "egrave", "00E8", + "egujarati", "0A8F", + "eharmenian", "0567", + "ehbopomofo", "311D", + "ehiragana", "3048", + "ehookabove", "1EBB", + "eibopomofo", "311F", + "eight", "0038", + "eightarabic", "0668", + "eightbengali", "09EE", + "eightcircle", "2467", + "eightcircleinversesansserif", "2791", + "eightdeva", "096E", + "eighteencircle", "2471", + "eighteenparen", "2485", + "eighteenperiod", "2499", + "eightgujarati", "0AEE", + "eightgurmukhi", "0A6E", + "eighthackarabic", "0668", + "eighthangzhou", "3028", + "eighthnotebeamed", "266B", + "eightideographicparen", "3227", + "eightinferior", "2088", + "eightmonospace", "FF18", + "eightparen", "247B", + "eightperiod", "248F", + "eightpersian", "06F8", + "eightroman", "2177", + "eightsuperior", "2078", + "eightthai", "0E58", + "einvertedbreve", "0207", + "eiotifiedcyrillic", "0465", + "ekatakana", "30A8", + "ekatakanahalfwidth", "FF74", + "ekonkargurmukhi", "0A74", + "ekorean", "3154", + "elcyrillic", "043B", + "element", "2208", + "elevencircle", "246A", + "elevenparen", "247E", + "elevenperiod", "2492", + "elevenroman", "217A", + "ellipsis", "2026", + "ellipsisvertical", "22EE", + "emacron", "0113", + "emacronacute", "1E17", + "emacrongrave", "1E15", + "emcyrillic", "043C", + "emdash", "2014", + "emdashvertical", "FE31", + "emonospace", "FF45", + "emphasismarkarmenian", "055B", + "emptyset", "2205", + "enbopomofo", "3123", + "encyrillic", "043D", + "endash", "2013", + "endashvertical", "FE32", + "endescendercyrillic", "04A3", + "eng", "014B", + "engbopomofo", "3125", + "enghecyrillic", "04A5", + "enhookcyrillic", "04C8", + "enspace", "2002", + "eogonek", "0119", + "eokorean", "3153", + "eopen", "025B", + "eopenclosed", "029A", + "eopenreversed", "025C", + "eopenreversedclosed", "025E", + "eopenreversedhook", "025D", + "eparen", "24A0", + "epsilon", "03B5", + "epsilontonos", "03AD", + "equal", "003D", + "equalmonospace", "FF1D", + "equalsmall", "FE66", + "equalsuperior", "207C", + "equivalence", "2261", + "erbopomofo", "3126", + "ercyrillic", "0440", + "ereversed", "0258", + "ereversedcyrillic", "044D", + "escyrillic", "0441", + "esdescendercyrillic", "04AB", + "esh", "0283", + "eshcurl", "0286", + "eshortdeva", "090E", + "eshortvowelsigndeva", "0946", + "eshreversedloop", "01AA", + "eshsquatreversed", "0285", + "esmallhiragana", "3047", + "esmallkatakana", "30A7", + "esmallkatakanahalfwidth", "FF6A", + "estimated", "212E", + "eta", "03B7", + "etarmenian", "0568", + "etatonos", "03AE", + "eth", "00F0", + "etilde", "1EBD", + "etildebelow", "1E1B", + "etnahtafoukhhebrew", "0591", + "etnahtafoukhlefthebrew", "0591", + "etnahtahebrew", "0591", + "etnahtalefthebrew", "0591", + "eturned", "01DD", + "eukorean", "3161", + "euro", "20AC", + "evowelsignbengali", "09C7", + "evowelsigndeva", "0947", + "evowelsigngujarati", "0AC7", + "exclam", "0021", + "exclamarmenian", "055C", + "exclamdbl", "203C", + "exclamdown", "00A1", + "exclammonospace", "FF01", + "existential", "2203", + "ezh", "0292", + "ezhcaron", "01EF", + "ezhcurl", "0293", + "ezhreversed", "01B9", + "ezhtail", "01BA", + "f", "0066", + "fadeva", "095E", + "fagurmukhi", "0A5E", + "fahrenheit", "2109", + "fathaarabic", "064E", + "fathalowarabic", "064E", + "fathatanarabic", "064B", + "fbopomofo", "3108", + "fcircle", "24D5", + "fdotaccent", "1E1F", + "feharabic", "0641", + "feharmenian", "0586", + "fehfinalarabic", "FED2", + "fehinitialarabic", "FED3", + "fehmedialarabic", "FED4", + "feicoptic", "03E5", + "female", "2640", + "ff", "FB00", + "ffi", "FB03", + "ffl", "FB04", + "fi", "FB01", + "fifteencircle", "246E", + "fifteenparen", "2482", + "fifteenperiod", "2496", + "figuredash", "2012", + "filledbox", "25A0", + "filledrect", "25AC", + "finalkaf", "05DA", + "finalkafdagesh", "FB3A", + "finalkafdageshhebrew", "FB3A", + "finalkafhebrew", "05DA", + "finalkafqamats", "05DA_05B8", + "finalkafqamatshebrew", "05DA_05B8", + "finalkafsheva", "05DA_05B0", + "finalkafshevahebrew", "05DA_05B0", + "finalmem", "05DD", + "finalmemhebrew", "05DD", + "finalnun", "05DF", + "finalnunhebrew", "05DF", + "finalpe", "05E3", + "finalpehebrew", "05E3", + "finaltsadi", "05E5", + "finaltsadihebrew", "05E5", + "firsttonechinese", "02C9", + "fisheye", "25C9", + "fitacyrillic", "0473", + "five", "0035", + "fivearabic", "0665", + "fivebengali", "09EB", + "fivecircle", "2464", + "fivecircleinversesansserif", "278E", + "fivedeva", "096B", + "fiveeighths", "215D", + "fivegujarati", "0AEB", + "fivegurmukhi", "0A6B", + "fivehackarabic", "0665", + "fivehangzhou", "3025", + "fiveideographicparen", "3224", + "fiveinferior", "2085", + "fivemonospace", "FF15", + "fiveparen", "2478", + "fiveperiod", "248C", + "fivepersian", "06F5", + "fiveroman", "2174", + "fivesuperior", "2075", + "fivethai", "0E55", + "fl", "FB02", + "florin", "0192", + "fmonospace", "FF46", + "fmsquare", "3399", + "fofanthai", "0E1F", + "fofathai", "0E1D", + "fongmanthai", "0E4F", + "forall", "2200", + "four", "0034", + "fourarabic", "0664", + "fourbengali", "09EA", + "fourcircle", "2463", + "fourcircleinversesansserif", "278D", + "fourdeva", "096A", + "fourgujarati", "0AEA", + "fourgurmukhi", "0A6A", + "fourhackarabic", "0664", + "fourhangzhou", "3024", + "fourideographicparen", "3223", + "fourinferior", "2084", + "fourmonospace", "FF14", + "fournumeratorbengali", "09F7", + "fourparen", "2477", + "fourperiod", "248B", + "fourpersian", "06F4", + "fourroman", "2173", + "foursuperior", "2074", + "fourteencircle", "246D", + "fourteenparen", "2481", + "fourteenperiod", "2495", + "fourthai", "0E54", + "fourthtonechinese", "02CB", + "fparen", "24A1", + "fraction", "2044", + "franc", "20A3", + "g", "0067", + "gabengali", "0997", + "gacute", "01F5", + "gadeva", "0917", + "gafarabic", "06AF", + "gaffinalarabic", "FB93", + "gafinitialarabic", "FB94", + "gafmedialarabic", "FB95", + "gagujarati", "0A97", + "gagurmukhi", "0A17", + "gahiragana", "304C", + "gakatakana", "30AC", + "gamma", "03B3", + "gammalatinsmall", "0263", + "gammasuperior", "02E0", + "gangiacoptic", "03EB", + "gbopomofo", "310D", + "gbreve", "011F", + "gcaron", "01E7", + "gcedilla", "0123", + "gcircle", "24D6", + "gcircumflex", "011D", + "gcommaaccent", "0123", + "gdot", "0121", + "gdotaccent", "0121", + "gecyrillic", "0433", + "gehiragana", "3052", + "gekatakana", "30B2", + "geometricallyequal", "2251", + "gereshaccenthebrew", "059C", + "gereshhebrew", "05F3", + "gereshmuqdamhebrew", "059D", + "germandbls", "00DF", + "gershayimaccenthebrew", "059E", + "gershayimhebrew", "05F4", + "getamark", "3013", + "ghabengali", "0998", + "ghadarmenian", "0572", + "ghadeva", "0918", + "ghagujarati", "0A98", + "ghagurmukhi", "0A18", + "ghainarabic", "063A", + "ghainfinalarabic", "FECE", + "ghaininitialarabic", "FECF", + "ghainmedialarabic", "FED0", + "ghemiddlehookcyrillic", "0495", + "ghestrokecyrillic", "0493", + "gheupturncyrillic", "0491", + "ghhadeva", "095A", + "ghhagurmukhi", "0A5A", + "ghook", "0260", + "ghzsquare", "3393", + "gihiragana", "304E", + "gikatakana", "30AE", + "gimarmenian", "0563", + "gimel", "05D2", + "gimeldagesh", "FB32", + "gimeldageshhebrew", "FB32", + "gimelhebrew", "05D2", + "gjecyrillic", "0453", + "glottalinvertedstroke", "01BE", + "glottalstop", "0294", + "glottalstopinverted", "0296", + "glottalstopmod", "02C0", + "glottalstopreversed", "0295", + "glottalstopreversedmod", "02C1", + "glottalstopreversedsuperior", "02E4", + "glottalstopstroke", "02A1", + "glottalstopstrokereversed", "02A2", + "gmacron", "1E21", + "gmonospace", "FF47", + "gohiragana", "3054", + "gokatakana", "30B4", + "gparen", "24A2", + "gpasquare", "33AC", + "gradient", "2207", + "grave", "0060", + "gravebelowcmb", "0316", + "gravecmb", "0300", + "gravecomb", "0300", + "gravedeva", "0953", + "gravelowmod", "02CE", + "gravemonospace", "FF40", + "gravetonecmb", "0340", + "greater", "003E", + "greaterequal", "2265", + "greaterequalorless", "22DB", + "greatermonospace", "FF1E", + "greaterorequivalent", "2273", + "greaterorless", "2277", + "greateroverequal", "2267", + "greatersmall", "FE65", + "gscript", "0261", + "gstroke", "01E5", + "guhiragana", "3050", + "guillemotleft", "00AB", + "guillemotright", "00BB", + "guilsinglleft", "2039", + "guilsinglright", "203A", + "gukatakana", "30B0", + "guramusquare", "3318", + "gysquare", "33C9", + "h", "0068", + "haabkhasiancyrillic", "04A9", + "haaltonearabic", "06C1", + "habengali", "09B9", + "hadescendercyrillic", "04B3", + "hadeva", "0939", + "hagujarati", "0AB9", + "hagurmukhi", "0A39", + "haharabic", "062D", + "hahfinalarabic", "FEA2", + "hahinitialarabic", "FEA3", + "hahiragana", "306F", + "hahmedialarabic", "FEA4", + "haitusquare", "332A", + "hakatakana", "30CF", + "hakatakanahalfwidth", "FF8A", + "halantgurmukhi", "0A4D", + "hamzaarabic", "0621", + "hamzadammaarabic", "0621_064F", + "hamzadammatanarabic", "0621_064C", + "hamzafathaarabic", "0621_064E", + "hamzafathatanarabic", "0621_064B", + "hamzalowarabic", "0621", + "hamzalowkasraarabic", "0621_0650", + "hamzalowkasratanarabic", "0621_064D", + "hamzasukunarabic", "0621_0652", + "hangulfiller", "3164", + "hardsigncyrillic", "044A", + "harpoonleftbarbup", "21BC", + "harpoonrightbarbup", "21C0", + "hasquare", "33CA", + "hatafpatah", "05B2", + "hatafpatah16", "05B2", + "hatafpatah23", "05B2", + "hatafpatah2f", "05B2", + "hatafpatahhebrew", "05B2", + "hatafpatahnarrowhebrew", "05B2", + "hatafpatahquarterhebrew", "05B2", + "hatafpatahwidehebrew", "05B2", + "hatafqamats", "05B3", + "hatafqamats1b", "05B3", + "hatafqamats28", "05B3", + "hatafqamats34", "05B3", + "hatafqamatshebrew", "05B3", + "hatafqamatsnarrowhebrew", "05B3", + "hatafqamatsquarterhebrew", "05B3", + "hatafqamatswidehebrew", "05B3", + "hatafsegol", "05B1", + "hatafsegol17", "05B1", + "hatafsegol24", "05B1", + "hatafsegol30", "05B1", + "hatafsegolhebrew", "05B1", + "hatafsegolnarrowhebrew", "05B1", + "hatafsegolquarterhebrew", "05B1", + "hatafsegolwidehebrew", "05B1", + "hbar", "0127", + "hbopomofo", "310F", + "hbrevebelow", "1E2B", + "hcedilla", "1E29", + "hcircle", "24D7", + "hcircumflex", "0125", + "hdieresis", "1E27", + "hdotaccent", "1E23", + "hdotbelow", "1E25", + "he", "05D4", + "heart", "2665", + "heartsuitblack", "2665", + "heartsuitwhite", "2661", + "hedagesh", "FB34", + "hedageshhebrew", "FB34", + "hehaltonearabic", "06C1", + "heharabic", "0647", + "hehebrew", "05D4", + "hehfinalaltonearabic", "FBA7", + "hehfinalalttwoarabic", "FEEA", + "hehfinalarabic", "FEEA", + "hehhamzaabovefinalarabic", "FBA5", + "hehhamzaaboveisolatedarabic", "FBA4", + "hehinitialaltonearabic", "FBA8", + "hehinitialarabic", "FEEB", + "hehiragana", "3078", + "hehmedialaltonearabic", "FBA9", + "hehmedialarabic", "FEEC", + "heiseierasquare", "337B", + "hekatakana", "30D8", + "hekatakanahalfwidth", "FF8D", + "hekutaarusquare", "3336", + "henghook", "0267", + "herutusquare", "3339", + "het", "05D7", + "hethebrew", "05D7", + "hhook", "0266", + "hhooksuperior", "02B1", + "hieuhacirclekorean", "327B", + "hieuhaparenkorean", "321B", + "hieuhcirclekorean", "326D", + "hieuhkorean", "314E", + "hieuhparenkorean", "320D", + "hihiragana", "3072", + "hikatakana", "30D2", + "hikatakanahalfwidth", "FF8B", + "hiriq", "05B4", + "hiriq14", "05B4", + "hiriq21", "05B4", + "hiriq2d", "05B4", + "hiriqhebrew", "05B4", + "hiriqnarrowhebrew", "05B4", + "hiriqquarterhebrew", "05B4", + "hiriqwidehebrew", "05B4", + "hlinebelow", "1E96", + "hmonospace", "FF48", + "hoarmenian", "0570", + "hohipthai", "0E2B", + "hohiragana", "307B", + "hokatakana", "30DB", + "hokatakanahalfwidth", "FF8E", + "holam", "05B9", + "holam19", "05B9", + "holam26", "05B9", + "holam32", "05B9", + "holamhebrew", "05B9", + "holamnarrowhebrew", "05B9", + "holamquarterhebrew", "05B9", + "holamwidehebrew", "05B9", + "honokhukthai", "0E2E", + "hookabovecomb", "0309", + "hookcmb", "0309", + "hookpalatalizedbelowcmb", "0321", + "hookretroflexbelowcmb", "0322", + "hoonsquare", "3342", + "horicoptic", "03E9", + "horizontalbar", "2015", + "horncmb", "031B", + "hotsprings", "2668", + "house", "2302", + "hparen", "24A3", + "hsuperior", "02B0", + "hturned", "0265", + "huhiragana", "3075", + "huiitosquare", "3333", + "hukatakana", "30D5", + "hukatakanahalfwidth", "FF8C", + "hungarumlaut", "02DD", + "hungarumlautcmb", "030B", + "hv", "0195", + "hyphen", "002D", + "hyphenmonospace", "FF0D", + "hyphensmall", "FE63", + "hyphentwo", "2010", + "i", "0069", + "iacute", "00ED", + "iacyrillic", "044F", + "ibengali", "0987", + "ibopomofo", "3127", + "ibreve", "012D", + "icaron", "01D0", + "icircle", "24D8", + "icircumflex", "00EE", + "icyrillic", "0456", + "idblgrave", "0209", + "ideographearthcircle", "328F", + "ideographfirecircle", "328B", + "ideographicallianceparen", "323F", + "ideographiccallparen", "323A", + "ideographiccentrecircle", "32A5", + "ideographicclose", "3006", + "ideographiccomma", "3001", + "ideographiccommaleft", "FF64", + "ideographiccongratulationparen", "3237", + "ideographiccorrectcircle", "32A3", + "ideographicearthparen", "322F", + "ideographicenterpriseparen", "323D", + "ideographicexcellentcircle", "329D", + "ideographicfestivalparen", "3240", + "ideographicfinancialcircle", "3296", + "ideographicfinancialparen", "3236", + "ideographicfireparen", "322B", + "ideographichaveparen", "3232", + "ideographichighcircle", "32A4", + "ideographiciterationmark", "3005", + "ideographiclaborcircle", "3298", + "ideographiclaborparen", "3238", + "ideographicleftcircle", "32A7", + "ideographiclowcircle", "32A6", + "ideographicmedicinecircle", "32A9", + "ideographicmetalparen", "322E", + "ideographicmoonparen", "322A", + "ideographicnameparen", "3234", + "ideographicperiod", "3002", + "ideographicprintcircle", "329E", + "ideographicreachparen", "3243", + "ideographicrepresentparen", "3239", + "ideographicresourceparen", "323E", + "ideographicrightcircle", "32A8", + "ideographicsecretcircle", "3299", + "ideographicselfparen", "3242", + "ideographicsocietyparen", "3233", + "ideographicspace", "3000", + "ideographicspecialparen", "3235", + "ideographicstockparen", "3231", + "ideographicstudyparen", "323B", + "ideographicsunparen", "3230", + "ideographicsuperviseparen", "323C", + "ideographicwaterparen", "322C", + "ideographicwoodparen", "322D", + "ideographiczero", "3007", + "ideographmetalcircle", "328E", + "ideographmooncircle", "328A", + "ideographnamecircle", "3294", + "ideographsuncircle", "3290", + "ideographwatercircle", "328C", + "ideographwoodcircle", "328D", + "ideva", "0907", + "idieresis", "00EF", + "idieresisacute", "1E2F", + "idieresiscyrillic", "04E5", + "idotbelow", "1ECB", + "iebrevecyrillic", "04D7", + "iecyrillic", "0435", + "ieungacirclekorean", "3275", + "ieungaparenkorean", "3215", + "ieungcirclekorean", "3267", + "ieungkorean", "3147", + "ieungparenkorean", "3207", + "igrave", "00EC", + "igujarati", "0A87", + "igurmukhi", "0A07", + "ihiragana", "3044", + "ihookabove", "1EC9", + "iibengali", "0988", + "iicyrillic", "0438", + "iideva", "0908", + "iigujarati", "0A88", + "iigurmukhi", "0A08", + "iimatragurmukhi", "0A40", + "iinvertedbreve", "020B", + "iishortcyrillic", "0439", + "iivowelsignbengali", "09C0", + "iivowelsigndeva", "0940", + "iivowelsigngujarati", "0AC0", + "ij", "0133", + "ikatakana", "30A4", + "ikatakanahalfwidth", "FF72", + "ikorean", "3163", + "ilde", "02DC", + "iluyhebrew", "05AC", + "imacron", "012B", + "imacroncyrillic", "04E3", + "imageorapproximatelyequal", "2253", + "imatragurmukhi", "0A3F", + "imonospace", "FF49", + "increment", "2206", + "infinity", "221E", + "iniarmenian", "056B", + "integral", "222B", + "integralbottom", "2321", + "integralbt", "2321", + "integraltop", "2320", + "integraltp", "2320", + "intersection", "2229", + "intisquare", "3305", + "invbullet", "25D8", + "invcircle", "25D9", + "invsmileface", "263B", + "iocyrillic", "0451", + "iogonek", "012F", + "iota", "03B9", + "iotadieresis", "03CA", + "iotadieresistonos", "0390", + "iotalatin", "0269", + "iotatonos", "03AF", + "iparen", "24A4", + "irigurmukhi", "0A72", + "ismallhiragana", "3043", + "ismallkatakana", "30A3", + "ismallkatakanahalfwidth", "FF68", + "issharbengali", "09FA", + "istroke", "0268", + "iterationhiragana", "309D", + "iterationkatakana", "30FD", + "itilde", "0129", + "itildebelow", "1E2D", + "iubopomofo", "3129", + "iucyrillic", "044E", + "ivowelsignbengali", "09BF", + "ivowelsigndeva", "093F", + "ivowelsigngujarati", "0ABF", + "izhitsacyrillic", "0475", + "izhitsadblgravecyrillic", "0477", + "j", "006A", + "jaarmenian", "0571", + "jabengali", "099C", + "jadeva", "091C", + "jagujarati", "0A9C", + "jagurmukhi", "0A1C", + "jbopomofo", "3110", + "jcaron", "01F0", + "jcircle", "24D9", + "jcircumflex", "0135", + "jcrossedtail", "029D", + "jdotlessstroke", "025F", + "jecyrillic", "0458", + "jeemarabic", "062C", + "jeemfinalarabic", "FE9E", + "jeeminitialarabic", "FE9F", + "jeemmedialarabic", "FEA0", + "jeharabic", "0698", + "jehfinalarabic", "FB8B", + "jhabengali", "099D", + "jhadeva", "091D", + "jhagujarati", "0A9D", + "jhagurmukhi", "0A1D", + "jheharmenian", "057B", + "jis", "3004", + "jmonospace", "FF4A", + "jparen", "24A5", + "jsuperior", "02B2", + "k", "006B", + "kabashkircyrillic", "04A1", + "kabengali", "0995", + "kacute", "1E31", + "kacyrillic", "043A", + "kadescendercyrillic", "049B", + "kadeva", "0915", + "kaf", "05DB", + "kafarabic", "0643", + "kafdagesh", "FB3B", + "kafdageshhebrew", "FB3B", + "kaffinalarabic", "FEDA", + "kafhebrew", "05DB", + "kafinitialarabic", "FEDB", + "kafmedialarabic", "FEDC", + "kafrafehebrew", "FB4D", + "kagujarati", "0A95", + "kagurmukhi", "0A15", + "kahiragana", "304B", + "kahookcyrillic", "04C4", + "kakatakana", "30AB", + "kakatakanahalfwidth", "FF76", + "kappa", "03BA", + "kappasymbolgreek", "03F0", + "kapyeounmieumkorean", "3171", + "kapyeounphieuphkorean", "3184", + "kapyeounpieupkorean", "3178", + "kapyeounssangpieupkorean", "3179", + "karoriisquare", "330D", + "kashidaautoarabic", "0640", + "kashidaautonosidebearingarabic", "0640", + "kasmallkatakana", "30F5", + "kasquare", "3384", + "kasraarabic", "0650", + "kasratanarabic", "064D", + "kastrokecyrillic", "049F", + "katahiraprolongmarkhalfwidth", "FF70", + "kaverticalstrokecyrillic", "049D", + "kbopomofo", "310E", + "kcalsquare", "3389", + "kcaron", "01E9", + "kcedilla", "0137", + "kcircle", "24DA", + "kcommaaccent", "0137", + "kdotbelow", "1E33", + "keharmenian", "0584", + "kehiragana", "3051", + "kekatakana", "30B1", + "kekatakanahalfwidth", "FF79", + "kenarmenian", "056F", + "kesmallkatakana", "30F6", + "kgreenlandic", "0138", + "khabengali", "0996", + "khacyrillic", "0445", + "khadeva", "0916", + "khagujarati", "0A96", + "khagurmukhi", "0A16", + "khaharabic", "062E", + "khahfinalarabic", "FEA6", + "khahinitialarabic", "FEA7", + "khahmedialarabic", "FEA8", + "kheicoptic", "03E7", + "khhadeva", "0959", + "khhagurmukhi", "0A59", + "khieukhacirclekorean", "3278", + "khieukhaparenkorean", "3218", + "khieukhcirclekorean", "326A", + "khieukhkorean", "314B", + "khieukhparenkorean", "320A", + "khokhaithai", "0E02", + "khokhonthai", "0E05", + "khokhuatthai", "0E03", + "khokhwaithai", "0E04", + "khomutthai", "0E5B", + "khook", "0199", + "khorakhangthai", "0E06", + "khzsquare", "3391", + "kihiragana", "304D", + "kikatakana", "30AD", + "kikatakanahalfwidth", "FF77", + "kiroguramusquare", "3315", + "kiromeetorusquare", "3316", + "kirosquare", "3314", + "kiyeokacirclekorean", "326E", + "kiyeokaparenkorean", "320E", + "kiyeokcirclekorean", "3260", + "kiyeokkorean", "3131", + "kiyeokparenkorean", "3200", + "kiyeoksioskorean", "3133", + "kjecyrillic", "045C", + "klinebelow", "1E35", + "klsquare", "3398", + "kmcubedsquare", "33A6", + "kmonospace", "FF4B", + "kmsquaredsquare", "33A2", + "kohiragana", "3053", + "kohmsquare", "33C0", + "kokaithai", "0E01", + "kokatakana", "30B3", + "kokatakanahalfwidth", "FF7A", + "kooposquare", "331E", + "koppacyrillic", "0481", + "koreanstandardsymbol", "327F", + "koroniscmb", "0343", + "kparen", "24A6", + "kpasquare", "33AA", + "ksicyrillic", "046F", + "ktsquare", "33CF", + "kturned", "029E", + "kuhiragana", "304F", + "kukatakana", "30AF", + "kukatakanahalfwidth", "FF78", + "kvsquare", "33B8", + "kwsquare", "33BE", + "l", "006C", + "labengali", "09B2", + "lacute", "013A", + "ladeva", "0932", + "lagujarati", "0AB2", + "lagurmukhi", "0A32", + "lakkhangyaothai", "0E45", + "lamaleffinalarabic", "FEFC", + "lamalefhamzaabovefinalarabic", "FEF8", + "lamalefhamzaaboveisolatedarabic", "FEF7", + "lamalefhamzabelowfinalarabic", "FEFA", + "lamalefhamzabelowisolatedarabic", "FEF9", + "lamalefisolatedarabic", "FEFB", + "lamalefmaddaabovefinalarabic", "FEF6", + "lamalefmaddaaboveisolatedarabic", "FEF5", + "lamarabic", "0644", + "lambda", "03BB", + "lambdastroke", "019B", + "lamed", "05DC", + "lameddagesh", "FB3C", + "lameddageshhebrew", "FB3C", + "lamedhebrew", "05DC", + "lamedholam", "05DC_05B9", + "lamedholamdagesh", "05DC_05B9_05BC", + "lamedholamdageshhebrew", "05DC_05B9_05BC", + "lamedholamhebrew", "05DC_05B9", + "lamfinalarabic", "FEDE", + "lamhahinitialarabic", "FCCA", + "laminitialarabic", "FEDF", + "lamjeeminitialarabic", "FCC9", + "lamkhahinitialarabic", "FCCB", + "lamlamhehisolatedarabic", "FDF2", + "lammedialarabic", "FEE0", + "lammeemhahinitialarabic", "FD88", + "lammeeminitialarabic", "FCCC", + "lammeemjeeminitialarabic", "FEDF_FEE4_FEA0", + "lammeemkhahinitialarabic", "FEDF_FEE4_FEA8", + "largecircle", "25EF", + "lbar", "019A", + "lbelt", "026C", + "lbopomofo", "310C", + "lcaron", "013E", + "lcedilla", "013C", + "lcircle", "24DB", + "lcircumflexbelow", "1E3D", + "lcommaaccent", "013C", + "ldot", "0140", + "ldotaccent", "0140", + "ldotbelow", "1E37", + "ldotbelowmacron", "1E39", + "leftangleabovecmb", "031A", + "lefttackbelowcmb", "0318", + "less", "003C", + "lessequal", "2264", + "lessequalorgreater", "22DA", + "lessmonospace", "FF1C", + "lessorequivalent", "2272", + "lessorgreater", "2276", + "lessoverequal", "2266", + "lesssmall", "FE64", + "lezh", "026E", + "lfblock", "258C", + "lhookretroflex", "026D", + "lira", "20A4", + "liwnarmenian", "056C", + "lj", "01C9", + "ljecyrillic", "0459", + "lladeva", "0933", + "llagujarati", "0AB3", + "llinebelow", "1E3B", + "llladeva", "0934", + "llvocalicbengali", "09E1", + "llvocalicdeva", "0961", + "llvocalicvowelsignbengali", "09E3", + "llvocalicvowelsigndeva", "0963", + "lmiddletilde", "026B", + "lmonospace", "FF4C", + "lmsquare", "33D0", + "lochulathai", "0E2C", + "logicaland", "2227", + "logicalnot", "00AC", + "logicalnotreversed", "2310", + "logicalor", "2228", + "lolingthai", "0E25", + "longs", "017F", + "lowlinecenterline", "FE4E", + "lowlinecmb", "0332", + "lowlinedashed", "FE4D", + "lozenge", "25CA", + "lparen", "24A7", + "lslash", "0142", + "lsquare", "2113", + "ltshade", "2591", + "luthai", "0E26", + "lvocalicbengali", "098C", + "lvocalicdeva", "090C", + "lvocalicvowelsignbengali", "09E2", + "lvocalicvowelsigndeva", "0962", + "lxsquare", "33D3", + "m", "006D", + "mabengali", "09AE", + "macron", "00AF", + "macronbelowcmb", "0331", + "macroncmb", "0304", + "macronlowmod", "02CD", + "macronmonospace", "FFE3", + "macute", "1E3F", + "madeva", "092E", + "magujarati", "0AAE", + "magurmukhi", "0A2E", + "mahapakhhebrew", "05A4", + "mahapakhlefthebrew", "05A4", + "mahiragana", "307E", + "maichattawathai", "0E4B", + "maiekthai", "0E48", + "maihanakatthai", "0E31", + "maitaikhuthai", "0E47", + "maithothai", "0E49", + "maitrithai", "0E4A", + "maiyamokthai", "0E46", + "makatakana", "30DE", + "makatakanahalfwidth", "FF8F", + "male", "2642", + "mansyonsquare", "3347", + "maqafhebrew", "05BE", + "mars", "2642", + "masoracirclehebrew", "05AF", + "masquare", "3383", + "mbopomofo", "3107", + "mbsquare", "33D4", + "mcircle", "24DC", + "mcubedsquare", "33A5", + "mdotaccent", "1E41", + "mdotbelow", "1E43", + "meemarabic", "0645", + "meemfinalarabic", "FEE2", + "meeminitialarabic", "FEE3", + "meemmedialarabic", "FEE4", + "meemmeeminitialarabic", "FCD1", + "meemmeemisolatedarabic", "FC48", + "meetorusquare", "334D", + "mehiragana", "3081", + "meizierasquare", "337E", + "mekatakana", "30E1", + "mekatakanahalfwidth", "FF92", + "mem", "05DE", + "memdagesh", "FB3E", + "memdageshhebrew", "FB3E", + "memhebrew", "05DE", + "menarmenian", "0574", + "merkhahebrew", "05A5", + "merkhakefulahebrew", "05A6", + "merkhakefulalefthebrew", "05A6", + "merkhalefthebrew", "05A5", + "mhook", "0271", + "mhzsquare", "3392", + "middledotkatakanahalfwidth", "FF65", + "middot", "00B7", + "mieumacirclekorean", "3272", + "mieumaparenkorean", "3212", + "mieumcirclekorean", "3264", + "mieumkorean", "3141", + "mieumpansioskorean", "3170", + "mieumparenkorean", "3204", + "mieumpieupkorean", "316E", + "mieumsioskorean", "316F", + "mihiragana", "307F", + "mikatakana", "30DF", + "mikatakanahalfwidth", "FF90", + "minus", "2212", + "minusbelowcmb", "0320", + "minuscircle", "2296", + "minusmod", "02D7", + "minusplus", "2213", + "minute", "2032", + "miribaarusquare", "334A", + "mirisquare", "3349", + "mlonglegturned", "0270", + "mlsquare", "3396", + "mmcubedsquare", "33A3", + "mmonospace", "FF4D", + "mmsquaredsquare", "339F", + "mohiragana", "3082", + "mohmsquare", "33C1", + "mokatakana", "30E2", + "mokatakanahalfwidth", "FF93", + "molsquare", "33D6", + "momathai", "0E21", + "moverssquare", "33A7", + "moverssquaredsquare", "33A8", + "mparen", "24A8", + "mpasquare", "33AB", + "mssquare", "33B3", + "mturned", "026F", + "mu", "03BC", # groff: not U+00B5 + "mu1", "00B5", + "muasquare", "3382", + "muchgreater", "226B", + "muchless", "226A", + "mufsquare", "338C", + "mugreek", "03BC", + "mugsquare", "338D", + "muhiragana", "3080", + "mukatakana", "30E0", + "mukatakanahalfwidth", "FF91", + "mulsquare", "3395", + "multiply", "00D7", + "mumsquare", "339B", + "munahhebrew", "05A3", + "munahlefthebrew", "05A3", + "musicalnote", "266A", + "musicalnotedbl", "266B", + "musicflatsign", "266D", + "musicsharpsign", "266F", + "mussquare", "33B2", + "muvsquare", "33B6", + "muwsquare", "33BC", + "mvmegasquare", "33B9", + "mvsquare", "33B7", + "mwmegasquare", "33BF", + "mwsquare", "33BD", + "n", "006E", + "nabengali", "09A8", + "nabla", "2207", + "nacute", "0144", + "nadeva", "0928", + "nagujarati", "0AA8", + "nagurmukhi", "0A28", + "nahiragana", "306A", + "nakatakana", "30CA", + "nakatakanahalfwidth", "FF85", + "napostrophe", "0149", + "nasquare", "3381", + "nbopomofo", "310B", + "nbspace", "00A0", + "ncaron", "0148", + "ncedilla", "0146", + "ncircle", "24DD", + "ncircumflexbelow", "1E4B", + "ncommaaccent", "0146", + "ndotaccent", "1E45", + "ndotbelow", "1E47", + "nehiragana", "306D", + "nekatakana", "30CD", + "nekatakanahalfwidth", "FF88", + "newsheqelsign", "20AA", + "nfsquare", "338B", + "ngabengali", "0999", + "ngadeva", "0919", + "ngagujarati", "0A99", + "ngagurmukhi", "0A19", + "ngonguthai", "0E07", + "nhiragana", "3093", + "nhookleft", "0272", + "nhookretroflex", "0273", + "nieunacirclekorean", "326F", + "nieunaparenkorean", "320F", + "nieuncieuckorean", "3135", + "nieuncirclekorean", "3261", + "nieunhieuhkorean", "3136", + "nieunkorean", "3134", + "nieunpansioskorean", "3168", + "nieunparenkorean", "3201", + "nieunsioskorean", "3167", + "nieuntikeutkorean", "3166", + "nihiragana", "306B", + "nikatakana", "30CB", + "nikatakanahalfwidth", "FF86", + "nikhahitthai", "0E4D", + "nine", "0039", + "ninearabic", "0669", + "ninebengali", "09EF", + "ninecircle", "2468", + "ninecircleinversesansserif", "2792", + "ninedeva", "096F", + "ninegujarati", "0AEF", + "ninegurmukhi", "0A6F", + "ninehackarabic", "0669", + "ninehangzhou", "3029", + "nineideographicparen", "3228", + "nineinferior", "2089", + "ninemonospace", "FF19", + "nineparen", "247C", + "nineperiod", "2490", + "ninepersian", "06F9", + "nineroman", "2178", + "ninesuperior", "2079", + "nineteencircle", "2472", + "nineteenparen", "2486", + "nineteenperiod", "249A", + "ninethai", "0E59", + "nj", "01CC", + "njecyrillic", "045A", + "nkatakana", "30F3", + "nkatakanahalfwidth", "FF9D", + "nlegrightlong", "019E", + "nlinebelow", "1E49", + "nmonospace", "FF4E", + "nmsquare", "339A", + "nnabengali", "09A3", + "nnadeva", "0923", + "nnagujarati", "0AA3", + "nnagurmukhi", "0A23", + "nnnadeva", "0929", + "nohiragana", "306E", + "nokatakana", "30CE", + "nokatakanahalfwidth", "FF89", + "nonbreakingspace", "00A0", + "nonenthai", "0E13", + "nonuthai", "0E19", + "noonarabic", "0646", + "noonfinalarabic", "FEE6", + "noonghunnaarabic", "06BA", + "noonghunnafinalarabic", "FB9F", + "noonhehinitialarabic", "FEE7_FEEC", + "nooninitialarabic", "FEE7", + "noonjeeminitialarabic", "FCD2", + "noonjeemisolatedarabic", "FC4B", + "noonmedialarabic", "FEE8", + "noonmeeminitialarabic", "FCD5", + "noonmeemisolatedarabic", "FC4E", + "noonnoonfinalarabic", "FC8D", + "notcontains", "220C", + "notelement", "2209", + "notelementof", "2209", + "notequal", "2260", + "notgreater", "226F", + "notgreaternorequal", "2271", + "notgreaternorless", "2279", + "notidentical", "2262", + "notless", "226E", + "notlessnorequal", "2270", + "notparallel", "2226", + "notprecedes", "2280", + "notsubset", "2284", + "notsucceeds", "2281", + "notsuperset", "2285", + "nowarmenian", "0576", + "nparen", "24A9", + "nssquare", "33B1", + "nsuperior", "207F", + "ntilde", "00F1", + "nu", "03BD", + "nuhiragana", "306C", + "nukatakana", "30CC", + "nukatakanahalfwidth", "FF87", + "nuktabengali", "09BC", + "nuktadeva", "093C", + "nuktagujarati", "0ABC", + "nuktagurmukhi", "0A3C", + "numbersign", "0023", + "numbersignmonospace", "FF03", + "numbersignsmall", "FE5F", + "numeralsigngreek", "0374", + "numeralsignlowergreek", "0375", + "numero", "2116", + "nun", "05E0", + "nundagesh", "FB40", + "nundageshhebrew", "FB40", + "nunhebrew", "05E0", + "nvsquare", "33B5", + "nwsquare", "33BB", + "nyabengali", "099E", + "nyadeva", "091E", + "nyagujarati", "0A9E", + "nyagurmukhi", "0A1E", + "o", "006F", + "oacute", "00F3", + "oangthai", "0E2D", + "obarred", "0275", + "obarredcyrillic", "04E9", + "obarreddieresiscyrillic", "04EB", + "obengali", "0993", + "obopomofo", "311B", + "obreve", "014F", + "ocandradeva", "0911", + "ocandragujarati", "0A91", + "ocandravowelsigndeva", "0949", + "ocandravowelsigngujarati", "0AC9", + "ocaron", "01D2", + "ocircle", "24DE", + "ocircumflex", "00F4", + "ocircumflexacute", "1ED1", + "ocircumflexdotbelow", "1ED9", + "ocircumflexgrave", "1ED3", + "ocircumflexhookabove", "1ED5", + "ocircumflextilde", "1ED7", + "ocyrillic", "043E", + "odblacute", "0151", + "odblgrave", "020D", + "odeva", "0913", + "odieresis", "00F6", + "odieresiscyrillic", "04E7", + "odotbelow", "1ECD", + "oe", "0153", + "oekorean", "315A", + "ogonek", "02DB", + "ogonekcmb", "0328", + "ograve", "00F2", + "ogujarati", "0A93", + "oharmenian", "0585", + "ohiragana", "304A", + "ohookabove", "1ECF", + "ohorn", "01A1", + "ohornacute", "1EDB", + "ohorndotbelow", "1EE3", + "ohorngrave", "1EDD", + "ohornhookabove", "1EDF", + "ohorntilde", "1EE1", + "ohungarumlaut", "0151", + "oi", "01A3", + "oinvertedbreve", "020F", + "okatakana", "30AA", + "okatakanahalfwidth", "FF75", + "okorean", "3157", + "olehebrew", "05AB", + "omacron", "014D", + "omacronacute", "1E53", + "omacrongrave", "1E51", + "omdeva", "0950", + "omega", "03C9", + "omega1", "03D6", + "omegacyrillic", "0461", + "omegalatinclosed", "0277", + "omegaroundcyrillic", "047B", + "omegatitlocyrillic", "047D", + "omegatonos", "03CE", + "omgujarati", "0AD0", + "omicron", "03BF", + "omicrontonos", "03CC", + "omonospace", "FF4F", + "one", "0031", + "onearabic", "0661", + "onebengali", "09E7", + "onecircle", "2460", + "onecircleinversesansserif", "278A", + "onedeva", "0967", + "onedotenleader", "2024", + "oneeighth", "215B", + "onegujarati", "0AE7", + "onegurmukhi", "0A67", + "onehackarabic", "0661", + "onehalf", "00BD", + "onehangzhou", "3021", + "oneideographicparen", "3220", + "oneinferior", "2081", + "onemonospace", "FF11", + "onenumeratorbengali", "09F4", + "oneparen", "2474", + "oneperiod", "2488", + "onepersian", "06F1", + "onequarter", "00BC", + "oneroman", "2170", + "onesuperior", "00B9", + "onethai", "0E51", + "onethird", "2153", + "oogonek", "01EB", + "oogonekmacron", "01ED", + "oogurmukhi", "0A13", + "oomatragurmukhi", "0A4B", + "oopen", "0254", + "oparen", "24AA", + "openbullet", "25E6", + "option", "2325", + "ordfeminine", "00AA", + "ordmasculine", "00BA", + "orthogonal", "221F", + "oshortdeva", "0912", + "oshortvowelsigndeva", "094A", + "oslash", "00F8", + "oslashacute", "01FF", + "osmallhiragana", "3049", + "osmallkatakana", "30A9", + "osmallkatakanahalfwidth", "FF6B", + "ostrokeacute", "01FF", + "otcyrillic", "047F", + "otilde", "00F5", + "otildeacute", "1E4D", + "otildedieresis", "1E4F", + "oubopomofo", "3121", + "overline", "203E", + "overlinecenterline", "FE4A", + "overlinecmb", "0305", + "overlinedashed", "FE49", + "overlinedblwavy", "FE4C", + "overlinewavy", "FE4B", + "overscore", "00AF", + "ovowelsignbengali", "09CB", + "ovowelsigndeva", "094B", + "ovowelsigngujarati", "0ACB", + "p", "0070", + "paampssquare", "3380", + "paasentosquare", "332B", + "pabengali", "09AA", + "pacute", "1E55", + "padeva", "092A", + "pagedown", "21DF", + "pageup", "21DE", + "pagujarati", "0AAA", + "pagurmukhi", "0A2A", + "pahiragana", "3071", + "paiyannoithai", "0E2F", + "pakatakana", "30D1", + "palatalizationcyrilliccmb", "0484", + "palochkacyrillic", "04C0", + "pansioskorean", "317F", + "paragraph", "00B6", + "parallel", "2225", + "parenleft", "0028", + "parenleftaltonearabic", "FD3E", + "parenleftinferior", "208D", + "parenleftmonospace", "FF08", + "parenleftsmall", "FE59", + "parenleftsuperior", "207D", + "parenleftvertical", "FE35", + "parenright", "0029", + "parenrightaltonearabic", "FD3F", + "parenrightinferior", "208E", + "parenrightmonospace", "FF09", + "parenrightsmall", "FE5A", + "parenrightsuperior", "207E", + "parenrightvertical", "FE36", + "partialdiff", "2202", + "paseqhebrew", "05C0", + "pashtahebrew", "0599", + "pasquare", "33A9", + "patah", "05B7", + "patah11", "05B7", + "patah1d", "05B7", + "patah2a", "05B7", + "patahhebrew", "05B7", + "patahnarrowhebrew", "05B7", + "patahquarterhebrew", "05B7", + "patahwidehebrew", "05B7", + "pazerhebrew", "05A1", + "pbopomofo", "3106", + "pcircle", "24DF", + "pdotaccent", "1E57", + "pe", "05E4", + "pecyrillic", "043F", + "pedagesh", "FB44", + "pedageshhebrew", "FB44", + "peezisquare", "333B", + "pefinaldageshhebrew", "FB43", + "peharabic", "067E", + "peharmenian", "057A", + "pehebrew", "05E4", + "pehfinalarabic", "FB57", + "pehinitialarabic", "FB58", + "pehiragana", "307A", + "pehmedialarabic", "FB59", + "pekatakana", "30DA", + "pemiddlehookcyrillic", "04A7", + "perafehebrew", "FB4E", + "percent", "0025", + "percentarabic", "066A", + "percentmonospace", "FF05", + "percentsmall", "FE6A", + "period", "002E", + "periodarmenian", "0589", + "periodcentered", "00B7", + "periodhalfwidth", "FF61", + "periodmonospace", "FF0E", + "periodsmall", "FE52", + "perispomenigreekcmb", "0342", + "perpendicular", "22A5", + "perthousand", "2030", + "peseta", "20A7", + "pfsquare", "338A", + "phabengali", "09AB", + "phadeva", "092B", + "phagujarati", "0AAB", + "phagurmukhi", "0A2B", + "phi", "03C6", + "phi1", "03D5", + "phieuphacirclekorean", "327A", + "phieuphaparenkorean", "321A", + "phieuphcirclekorean", "326C", + "phieuphkorean", "314D", + "phieuphparenkorean", "320C", + "philatin", "0278", + "phinthuthai", "0E3A", + "phisymbolgreek", "03D5", + "phook", "01A5", + "phophanthai", "0E1E", + "phophungthai", "0E1C", + "phosamphaothai", "0E20", + "pi", "03C0", + "pieupacirclekorean", "3273", + "pieupaparenkorean", "3213", + "pieupcieuckorean", "3176", + "pieupcirclekorean", "3265", + "pieupkiyeokkorean", "3172", + "pieupkorean", "3142", + "pieupparenkorean", "3205", + "pieupsioskiyeokkorean", "3174", + "pieupsioskorean", "3144", + "pieupsiostikeutkorean", "3175", + "pieupthieuthkorean", "3177", + "pieuptikeutkorean", "3173", + "pihiragana", "3074", + "pikatakana", "30D4", + "pisymbolgreek", "03D6", + "piwrarmenian", "0583", + "plus", "002B", + "plusbelowcmb", "031F", + "pluscircle", "2295", + "plusminus", "00B1", + "plusmod", "02D6", + "plusmonospace", "FF0B", + "plussmall", "FE62", + "plussuperior", "207A", + "pmonospace", "FF50", + "pmsquare", "33D8", + "pohiragana", "307D", + "pointingindexdownwhite", "261F", + "pointingindexleftwhite", "261C", + "pointingindexrightwhite", "261E", + "pointingindexupwhite", "261D", + "pokatakana", "30DD", + "poplathai", "0E1B", + "postalmark", "3012", + "postalmarkface", "3020", + "pparen", "24AB", + "precedes", "227A", + "prescription", "211E", + "primemod", "02B9", + "primereversed", "2035", + "product", "220F", + "projective", "2305", + "prolongedkana", "30FC", + "propellor", "2318", + "propersubset", "2282", + "propersuperset", "2283", + "proportion", "2237", + "proportional", "221D", + "psi", "03C8", + "psicyrillic", "0471", + "psilipneumatacyrilliccmb", "0486", + "pssquare", "33B0", + "puhiragana", "3077", + "pukatakana", "30D7", + "pvsquare", "33B4", + "pwsquare", "33BA", + "q", "0071", + "qadeva", "0958", + "qadmahebrew", "05A8", + "qafarabic", "0642", + "qaffinalarabic", "FED6", + "qafinitialarabic", "FED7", + "qafmedialarabic", "FED8", + "qamats", "05B8", + "qamats10", "05B8", + "qamats1a", "05B8", + "qamats1c", "05B8", + "qamats27", "05B8", + "qamats29", "05B8", + "qamats33", "05B8", + "qamatsde", "05B8", + "qamatshebrew", "05B8", + "qamatsnarrowhebrew", "05B8", + "qamatsqatanhebrew", "05B8", + "qamatsqatannarrowhebrew", "05B8", + "qamatsqatanquarterhebrew", "05B8", + "qamatsqatanwidehebrew", "05B8", + "qamatsquarterhebrew", "05B8", + "qamatswidehebrew", "05B8", + "qarneyparahebrew", "059F", + "qbopomofo", "3111", + "qcircle", "24E0", + "qhook", "02A0", + "qmonospace", "FF51", + "qof", "05E7", + "qofdagesh", "FB47", + "qofdageshhebrew", "FB47", + "qofhatafpatah", "05E7_05B2", + "qofhatafpatahhebrew", "05E7_05B2", + "qofhatafsegol", "05E7_05B1", + "qofhatafsegolhebrew", "05E7_05B1", + "qofhebrew", "05E7", + "qofhiriq", "05E7_05B4", + "qofhiriqhebrew", "05E7_05B4", + "qofholam", "05E7_05B9", + "qofholamhebrew", "05E7_05B9", + "qofpatah", "05E7_05B7", + "qofpatahhebrew", "05E7_05B7", + "qofqamats", "05E7_05B8", + "qofqamatshebrew", "05E7_05B8", + "qofqubuts", "05E7_05BB", + "qofqubutshebrew", "05E7_05BB", + "qofsegol", "05E7_05B6", + "qofsegolhebrew", "05E7_05B6", + "qofsheva", "05E7_05B0", + "qofshevahebrew", "05E7_05B0", + "qoftsere", "05E7_05B5", + "qoftserehebrew", "05E7_05B5", + "qparen", "24AC", + "quarternote", "2669", + "qubuts", "05BB", + "qubuts18", "05BB", + "qubuts25", "05BB", + "qubuts31", "05BB", + "qubutshebrew", "05BB", + "qubutsnarrowhebrew", "05BB", + "qubutsquarterhebrew", "05BB", + "qubutswidehebrew", "05BB", + "question", "003F", + "questionarabic", "061F", + "questionarmenian", "055E", + "questiondown", "00BF", + "questiongreek", "037E", + "questionmonospace", "FF1F", + "quotedbl", "0022", + "quotedblbase", "201E", + "quotedblleft", "201C", + "quotedblmonospace", "FF02", + "quotedblprime", "301E", + "quotedblprimereversed", "301D", + "quotedblright", "201D", + "quoteleft", "2018", + "quoteleftreversed", "201B", + "quotereversed", "201B", + "quoteright", "2019", + "quoterightn", "0149", + "quotesinglbase", "201A", + "quotesingle", "0027", + "quotesinglemonospace", "FF07", + "r", "0072", + "raarmenian", "057C", + "rabengali", "09B0", + "racute", "0155", + "radeva", "0930", + "radical", "221A", + "radoverssquare", "33AE", + "radoverssquaredsquare", "33AF", + "radsquare", "33AD", + "rafe", "05BF", + "rafehebrew", "05BF", + "ragujarati", "0AB0", + "ragurmukhi", "0A30", + "rahiragana", "3089", + "rakatakana", "30E9", + "rakatakanahalfwidth", "FF97", + "ralowerdiagonalbengali", "09F1", + "ramiddlediagonalbengali", "09F0", + "ramshorn", "0264", + "ratio", "2236", + "rbopomofo", "3116", + "rcaron", "0159", + "rcedilla", "0157", + "rcircle", "24E1", + "rcommaaccent", "0157", + "rdblgrave", "0211", + "rdotaccent", "1E59", + "rdotbelow", "1E5B", + "rdotbelowmacron", "1E5D", + "referencemark", "203B", + "reflexsubset", "2286", + "reflexsuperset", "2287", + "registered", "00AE", + "reharabic", "0631", + "reharmenian", "0580", + "rehfinalarabic", "FEAE", + "rehiragana", "308C", + "rehyehaleflamarabic", "0631_FEF3_FE8E_0644", + "rekatakana", "30EC", + "rekatakanahalfwidth", "FF9A", + "resh", "05E8", + "reshdageshhebrew", "FB48", + "reshhatafpatah", "05E8_05B2", + "reshhatafpatahhebrew", "05E8_05B2", + "reshhatafsegol", "05E8_05B1", + "reshhatafsegolhebrew", "05E8_05B1", + "reshhebrew", "05E8", + "reshhiriq", "05E8_05B4", + "reshhiriqhebrew", "05E8_05B4", + "reshholam", "05E8_05B9", + "reshholamhebrew", "05E8_05B9", + "reshpatah", "05E8_05B7", + "reshpatahhebrew", "05E8_05B7", + "reshqamats", "05E8_05B8", + "reshqamatshebrew", "05E8_05B8", + "reshqubuts", "05E8_05BB", + "reshqubutshebrew", "05E8_05BB", + "reshsegol", "05E8_05B6", + "reshsegolhebrew", "05E8_05B6", + "reshsheva", "05E8_05B0", + "reshshevahebrew", "05E8_05B0", + "reshtsere", "05E8_05B5", + "reshtserehebrew", "05E8_05B5", + "reversedtilde", "223D", + "reviahebrew", "0597", + "reviamugrashhebrew", "0597", + "revlogicalnot", "2310", + "rfishhook", "027E", + "rfishhookreversed", "027F", + "rhabengali", "09DD", + "rhadeva", "095D", + "rho", "03C1", + "rhook", "027D", + "rhookturned", "027B", + "rhookturnedsuperior", "02B5", + "rhosymbolgreek", "03F1", + "rhotichookmod", "02DE", + "rieulacirclekorean", "3271", + "rieulaparenkorean", "3211", + "rieulcirclekorean", "3263", + "rieulhieuhkorean", "3140", + "rieulkiyeokkorean", "313A", + "rieulkiyeoksioskorean", "3169", + "rieulkorean", "3139", + "rieulmieumkorean", "313B", + "rieulpansioskorean", "316C", + "rieulparenkorean", "3203", + "rieulphieuphkorean", "313F", + "rieulpieupkorean", "313C", + "rieulpieupsioskorean", "316B", + "rieulsioskorean", "313D", + "rieulthieuthkorean", "313E", + "rieultikeutkorean", "316A", + "rieulyeorinhieuhkorean", "316D", + "rightangle", "221F", + "righttackbelowcmb", "0319", + "righttriangle", "22BF", + "rihiragana", "308A", + "rikatakana", "30EA", + "rikatakanahalfwidth", "FF98", + "ring", "02DA", + "ringbelowcmb", "0325", + "ringcmb", "030A", + "ringhalfleft", "02BF", + "ringhalfleftarmenian", "0559", + "ringhalfleftbelowcmb", "031C", + "ringhalfleftcentered", "02D3", + "ringhalfright", "02BE", + "ringhalfrightbelowcmb", "0339", + "ringhalfrightcentered", "02D2", + "rinvertedbreve", "0213", + "rittorusquare", "3351", + "rlinebelow", "1E5F", + "rlongleg", "027C", + "rlonglegturned", "027A", + "rmonospace", "FF52", + "rohiragana", "308D", + "rokatakana", "30ED", + "rokatakanahalfwidth", "FF9B", + "roruathai", "0E23", + "rparen", "24AD", + "rrabengali", "09DC", + "rradeva", "0931", + "rragurmukhi", "0A5C", + "rreharabic", "0691", + "rrehfinalarabic", "FB8D", + "rrvocalicbengali", "09E0", + "rrvocalicdeva", "0960", + "rrvocalicgujarati", "0AE0", + "rrvocalicvowelsignbengali", "09C4", + "rrvocalicvowelsigndeva", "0944", + "rrvocalicvowelsigngujarati", "0AC4", + "rtblock", "2590", + "rturned", "0279", + "rturnedsuperior", "02B4", + "ruhiragana", "308B", + "rukatakana", "30EB", + "rukatakanahalfwidth", "FF99", + "rupeemarkbengali", "09F2", + "rupeesignbengali", "09F3", + "ruthai", "0E24", + "rvocalicbengali", "098B", + "rvocalicdeva", "090B", + "rvocalicgujarati", "0A8B", + "rvocalicvowelsignbengali", "09C3", + "rvocalicvowelsigndeva", "0943", + "rvocalicvowelsigngujarati", "0AC3", + "s", "0073", + "sabengali", "09B8", + "sacute", "015B", + "sacutedotaccent", "1E65", + "sadarabic", "0635", + "sadeva", "0938", + "sadfinalarabic", "FEBA", + "sadinitialarabic", "FEBB", + "sadmedialarabic", "FEBC", + "sagujarati", "0AB8", + "sagurmukhi", "0A38", + "sahiragana", "3055", + "sakatakana", "30B5", + "sakatakanahalfwidth", "FF7B", + "sallallahoualayhewasallamarabic", "FDFA", + "samekh", "05E1", + "samekhdagesh", "FB41", + "samekhdageshhebrew", "FB41", + "samekhhebrew", "05E1", + "saraaathai", "0E32", + "saraaethai", "0E41", + "saraaimaimalaithai", "0E44", + "saraaimaimuanthai", "0E43", + "saraamthai", "0E33", + "saraathai", "0E30", + "saraethai", "0E40", + "saraiithai", "0E35", + "saraithai", "0E34", + "saraothai", "0E42", + "saraueethai", "0E37", + "sarauethai", "0E36", + "sarauthai", "0E38", + "sarauuthai", "0E39", + "sbopomofo", "3119", + "scaron", "0161", + "scarondotaccent", "1E67", + "scedilla", "015F", + "schwa", "0259", + "schwacyrillic", "04D9", + "schwadieresiscyrillic", "04DB", + "schwahook", "025A", + "scircle", "24E2", + "scircumflex", "015D", + "scommaaccent", "0219", + "sdotaccent", "1E61", + "sdotbelow", "1E63", + "sdotbelowdotaccent", "1E69", + "seagullbelowcmb", "033C", + "second", "2033", + "secondtonechinese", "02CA", + "section", "00A7", + "seenarabic", "0633", + "seenfinalarabic", "FEB2", + "seeninitialarabic", "FEB3", + "seenmedialarabic", "FEB4", + "segol", "05B6", + "segol13", "05B6", + "segol1f", "05B6", + "segol2c", "05B6", + "segolhebrew", "05B6", + "segolnarrowhebrew", "05B6", + "segolquarterhebrew", "05B6", + "segoltahebrew", "0592", + "segolwidehebrew", "05B6", + "seharmenian", "057D", + "sehiragana", "305B", + "sekatakana", "30BB", + "sekatakanahalfwidth", "FF7E", + "semicolon", "003B", + "semicolonarabic", "061B", + "semicolonmonospace", "FF1B", + "semicolonsmall", "FE54", + "semivoicedmarkkana", "309C", + "semivoicedmarkkanahalfwidth", "FF9F", + "sentisquare", "3322", + "sentosquare", "3323", + "seven", "0037", + "sevenarabic", "0667", + "sevenbengali", "09ED", + "sevencircle", "2466", + "sevencircleinversesansserif", "2790", + "sevendeva", "096D", + "seveneighths", "215E", + "sevengujarati", "0AED", + "sevengurmukhi", "0A6D", + "sevenhackarabic", "0667", + "sevenhangzhou", "3027", + "sevenideographicparen", "3226", + "seveninferior", "2087", + "sevenmonospace", "FF17", + "sevenparen", "247A", + "sevenperiod", "248E", + "sevenpersian", "06F7", + "sevenroman", "2176", + "sevensuperior", "2077", + "seventeencircle", "2470", + "seventeenparen", "2484", + "seventeenperiod", "2498", + "seventhai", "0E57", + "sfthyphen", "00AD", + "shaarmenian", "0577", + "shabengali", "09B6", + "shacyrillic", "0448", + "shaddaarabic", "0651", + "shaddadammaarabic", "FC61", + "shaddadammatanarabic", "FC5E", + "shaddafathaarabic", "FC60", + "shaddafathatanarabic", "0651_064B", + "shaddakasraarabic", "FC62", + "shaddakasratanarabic", "FC5F", + "shade", "2592", + "shadedark", "2593", + "shadelight", "2591", + "shademedium", "2592", + "shadeva", "0936", + "shagujarati", "0AB6", + "shagurmukhi", "0A36", + "shalshelethebrew", "0593", + "shbopomofo", "3115", + "shchacyrillic", "0449", + "sheenarabic", "0634", + "sheenfinalarabic", "FEB6", + "sheeninitialarabic", "FEB7", + "sheenmedialarabic", "FEB8", + "sheicoptic", "03E3", + "sheqel", "20AA", + "sheqelhebrew", "20AA", + "sheva", "05B0", + "sheva115", "05B0", + "sheva15", "05B0", + "sheva22", "05B0", + "sheva2e", "05B0", + "shevahebrew", "05B0", + "shevanarrowhebrew", "05B0", + "shevaquarterhebrew", "05B0", + "shevawidehebrew", "05B0", + "shhacyrillic", "04BB", + "shimacoptic", "03ED", + "shin", "05E9", + "shindagesh", "FB49", + "shindageshhebrew", "FB49", + "shindageshshindot", "FB2C", + "shindageshshindothebrew", "FB2C", + "shindageshsindot", "FB2D", + "shindageshsindothebrew", "FB2D", + "shindothebrew", "05C1", + "shinhebrew", "05E9", + "shinshindot", "FB2A", + "shinshindothebrew", "FB2A", + "shinsindot", "FB2B", + "shinsindothebrew", "FB2B", + "shook", "0282", + "sigma", "03C3", + "sigma1", "03C2", + "sigmafinal", "03C2", + "sigmalunatesymbolgreek", "03F2", + "sihiragana", "3057", + "sikatakana", "30B7", + "sikatakanahalfwidth", "FF7C", + "siluqhebrew", "05BD", + "siluqlefthebrew", "05BD", + "similar", "223C", + "sindothebrew", "05C2", + "siosacirclekorean", "3274", + "siosaparenkorean", "3214", + "sioscieuckorean", "317E", + "sioscirclekorean", "3266", + "sioskiyeokkorean", "317A", + "sioskorean", "3145", + "siosnieunkorean", "317B", + "siosparenkorean", "3206", + "siospieupkorean", "317D", + "siostikeutkorean", "317C", + "six", "0036", + "sixarabic", "0666", + "sixbengali", "09EC", + "sixcircle", "2465", + "sixcircleinversesansserif", "278F", + "sixdeva", "096C", + "sixgujarati", "0AEC", + "sixgurmukhi", "0A6C", + "sixhackarabic", "0666", + "sixhangzhou", "3026", + "sixideographicparen", "3225", + "sixinferior", "2086", + "sixmonospace", "FF16", + "sixparen", "2479", + "sixperiod", "248D", + "sixpersian", "06F6", + "sixroman", "2175", + "sixsuperior", "2076", + "sixteencircle", "246F", + "sixteencurrencydenominatorbengali", "09F9", + "sixteenparen", "2483", + "sixteenperiod", "2497", + "sixthai", "0E56", + "slash", "002F", + "slashmonospace", "FF0F", + "slong", "017F", + "slongdotaccent", "1E9B", + "smileface", "263A", + "smonospace", "FF53", + "sofpasuqhebrew", "05C3", + "softhyphen", "00AD", + "softsigncyrillic", "044C", + "sohiragana", "305D", + "sokatakana", "30BD", + "sokatakanahalfwidth", "FF7F", + "soliduslongoverlaycmb", "0338", + "solidusshortoverlaycmb", "0337", + "sorusithai", "0E29", + "sosalathai", "0E28", + "sosothai", "0E0B", + "sosuathai", "0E2A", + "space", "0020", + "spacehackarabic", "0020", + "spade", "2660", + "spadesuitblack", "2660", + "spadesuitwhite", "2664", + "sparen", "24AE", + "squarebelowcmb", "033B", + "squarecc", "33C4", + "squarecm", "339D", + "squarediagonalcrosshatchfill", "25A9", + "squarehorizontalfill", "25A4", + "squarekg", "338F", + "squarekm", "339E", + "squarekmcapital", "33CE", + "squareln", "33D1", + "squarelog", "33D2", + "squaremg", "338E", + "squaremil", "33D5", + "squaremm", "339C", + "squaremsquared", "33A1", + "squareorthogonalcrosshatchfill", "25A6", + "squareupperlefttolowerrightfill", "25A7", + "squareupperrighttolowerleftfill", "25A8", + "squareverticalfill", "25A5", + "squarewhitewithsmallblack", "25A3", + "srsquare", "33DB", + "ssabengali", "09B7", + "ssadeva", "0937", + "ssagujarati", "0AB7", + "ssangcieuckorean", "3149", + "ssanghieuhkorean", "3185", + "ssangieungkorean", "3180", + "ssangkiyeokkorean", "3132", + "ssangnieunkorean", "3165", + "ssangpieupkorean", "3143", + "ssangsioskorean", "3146", + "ssangtikeutkorean", "3138", + "sterling", "00A3", + "sterlingmonospace", "FFE1", + "strokelongoverlaycmb", "0336", + "strokeshortoverlaycmb", "0335", + "subset", "2282", + "subsetnotequal", "228A", + "subsetorequal", "2286", + "succeeds", "227B", + "suchthat", "220B", + "suhiragana", "3059", + "sukatakana", "30B9", + "sukatakanahalfwidth", "FF7D", + "sukunarabic", "0652", + "summation", "2211", + "sun", "263C", + "superset", "2283", + "supersetnotequal", "228B", + "supersetorequal", "2287", + "svsquare", "33DC", + "syouwaerasquare", "337C", + "t", "0074", + "tabengali", "09A4", + "tackdown", "22A4", + "tackleft", "22A3", + "tadeva", "0924", + "tagujarati", "0AA4", + "tagurmukhi", "0A24", + "taharabic", "0637", + "tahfinalarabic", "FEC2", + "tahinitialarabic", "FEC3", + "tahiragana", "305F", + "tahmedialarabic", "FEC4", + "taisyouerasquare", "337D", + "takatakana", "30BF", + "takatakanahalfwidth", "FF80", + "tatweelarabic", "0640", + "tau", "03C4", + "tav", "05EA", + "tavdages", "FB4A", + "tavdagesh", "FB4A", + "tavdageshhebrew", "FB4A", + "tavhebrew", "05EA", + "tbar", "0167", + "tbopomofo", "310A", + "tcaron", "0165", + "tccurl", "02A8", + "tcedilla", "0163", + "tcheharabic", "0686", + "tchehfinalarabic", "FB7B", + "tchehinitialarabic", "FB7C", + "tchehmedialarabic", "FB7D", + "tchehmeeminitialarabic", "FB7C_FEE4", + "tcircle", "24E3", + "tcircumflexbelow", "1E71", + "tcommaaccent", "0163", + "tdieresis", "1E97", + "tdotaccent", "1E6B", + "tdotbelow", "1E6D", + "tecyrillic", "0442", + "tedescendercyrillic", "04AD", + "teharabic", "062A", + "tehfinalarabic", "FE96", + "tehhahinitialarabic", "FCA2", + "tehhahisolatedarabic", "FC0C", + "tehinitialarabic", "FE97", + "tehiragana", "3066", + "tehjeeminitialarabic", "FCA1", + "tehjeemisolatedarabic", "FC0B", + "tehmarbutaarabic", "0629", + "tehmarbutafinalarabic", "FE94", + "tehmedialarabic", "FE98", + "tehmeeminitialarabic", "FCA4", + "tehmeemisolatedarabic", "FC0E", + "tehnoonfinalarabic", "FC73", + "tekatakana", "30C6", + "tekatakanahalfwidth", "FF83", + "telephone", "2121", + "telephoneblack", "260E", + "telishagedolahebrew", "05A0", + "telishaqetanahebrew", "05A9", + "tencircle", "2469", + "tenideographicparen", "3229", + "tenparen", "247D", + "tenperiod", "2491", + "tenroman", "2179", + "tesh", "02A7", + "tet", "05D8", + "tetdagesh", "FB38", + "tetdageshhebrew", "FB38", + "tethebrew", "05D8", + "tetsecyrillic", "04B5", + "tevirhebrew", "059B", + "tevirlefthebrew", "059B", + "thabengali", "09A5", + "thadeva", "0925", + "thagujarati", "0AA5", + "thagurmukhi", "0A25", + "thalarabic", "0630", + "thalfinalarabic", "FEAC", + "thanthakhatthai", "0E4C", + "theharabic", "062B", + "thehfinalarabic", "FE9A", + "thehinitialarabic", "FE9B", + "thehmedialarabic", "FE9C", + "thereexists", "2203", + "therefore", "2234", + "theta", "03B8", + "theta1", "03D1", + "thetasymbolgreek", "03D1", + "thieuthacirclekorean", "3279", + "thieuthaparenkorean", "3219", + "thieuthcirclekorean", "326B", + "thieuthkorean", "314C", + "thieuthparenkorean", "320B", + "thirteencircle", "246C", + "thirteenparen", "2480", + "thirteenperiod", "2494", + "thonangmonthothai", "0E11", + "thook", "01AD", + "thophuthaothai", "0E12", + "thorn", "00FE", + "thothahanthai", "0E17", + "thothanthai", "0E10", + "thothongthai", "0E18", + "thothungthai", "0E16", + "thousandcyrillic", "0482", + "thousandsseparatorarabic", "066C", + "thousandsseparatorpersian", "066C", + "three", "0033", + "threearabic", "0663", + "threebengali", "09E9", + "threecircle", "2462", + "threecircleinversesansserif", "278C", + "threedeva", "0969", + "threeeighths", "215C", + "threegujarati", "0AE9", + "threegurmukhi", "0A69", + "threehackarabic", "0663", + "threehangzhou", "3023", + "threeideographicparen", "3222", + "threeinferior", "2083", + "threemonospace", "FF13", + "threenumeratorbengali", "09F6", + "threeparen", "2476", + "threeperiod", "248A", + "threepersian", "06F3", + "threequarters", "00BE", + "threeroman", "2172", + "threesuperior", "00B3", + "threethai", "0E53", + "thzsquare", "3394", + "tihiragana", "3061", + "tikatakana", "30C1", + "tikatakanahalfwidth", "FF81", + "tikeutacirclekorean", "3270", + "tikeutaparenkorean", "3210", + "tikeutcirclekorean", "3262", + "tikeutkorean", "3137", + "tikeutparenkorean", "3202", + "tilde", "02DC", + "tildebelowcmb", "0330", + "tildecmb", "0303", + "tildecomb", "0303", + "tildedoublecmb", "0360", + "tildeoperator", "223C", + "tildeoverlaycmb", "0334", + "tildeverticalcmb", "033E", + "timescircle", "2297", + "tipehahebrew", "0596", + "tipehalefthebrew", "0596", + "tippigurmukhi", "0A70", + "titlocyrilliccmb", "0483", + "tiwnarmenian", "057F", + "tlinebelow", "1E6F", + "tmonospace", "FF54", + "toarmenian", "0569", + "tohiragana", "3068", + "tokatakana", "30C8", + "tokatakanahalfwidth", "FF84", + "tonebarextrahighmod", "02E5", + "tonebarextralowmod", "02E9", + "tonebarhighmod", "02E6", + "tonebarlowmod", "02E8", + "tonebarmidmod", "02E7", + "tonefive", "01BD", + "tonesix", "0185", + "tonetwo", "01A8", + "tonos", "0384", + "tonsquare", "3327", + "topatakthai", "0E0F", + "tortoiseshellbracketleft", "3014", + "tortoiseshellbracketleftsmall", "FE5D", + "tortoiseshellbracketleftvertical", "FE39", + "tortoiseshellbracketright", "3015", + "tortoiseshellbracketrightsmall", "FE5E", + "tortoiseshellbracketrightvertical", "FE3A", + "totaothai", "0E15", + "tpalatalhook", "01AB", + "tparen", "24AF", + "trademark", "2122", + "tretroflexhook", "0288", + "triagdn", "25BC", + "triaglf", "25C4", + "triagrt", "25BA", + "triagup", "25B2", + "ts", "02A6", + "tsadi", "05E6", + "tsadidagesh", "FB46", + "tsadidageshhebrew", "FB46", + "tsadihebrew", "05E6", + "tsecyrillic", "0446", + "tsere", "05B5", + "tsere12", "05B5", + "tsere1e", "05B5", + "tsere2b", "05B5", + "tserehebrew", "05B5", + "tserenarrowhebrew", "05B5", + "tserequarterhebrew", "05B5", + "tserewidehebrew", "05B5", + "tshecyrillic", "045B", + "ttabengali", "099F", + "ttadeva", "091F", + "ttagujarati", "0A9F", + "ttagurmukhi", "0A1F", + "tteharabic", "0679", + "ttehfinalarabic", "FB67", + "ttehinitialarabic", "FB68", + "ttehmedialarabic", "FB69", + "tthabengali", "09A0", + "tthadeva", "0920", + "tthagujarati", "0AA0", + "tthagurmukhi", "0A20", + "tturned", "0287", + "tuhiragana", "3064", + "tukatakana", "30C4", + "tukatakanahalfwidth", "FF82", + "tusmallhiragana", "3063", + "tusmallkatakana", "30C3", + "tusmallkatakanahalfwidth", "FF6F", + "twelvecircle", "246B", + "twelveparen", "247F", + "twelveperiod", "2493", + "twelveroman", "217B", + "twentycircle", "2473", + "twentyhangzhou", "5344", + "twentyparen", "2487", + "twentyperiod", "249B", + "two", "0032", + "twoarabic", "0662", + "twobengali", "09E8", + "twocircle", "2461", + "twocircleinversesansserif", "278B", + "twodeva", "0968", + "twodotenleader", "2025", + "twodotleader", "2025", + "twodotleadervertical", "FE30", + "twogujarati", "0AE8", + "twogurmukhi", "0A68", + "twohackarabic", "0662", + "twohangzhou", "3022", + "twoideographicparen", "3221", + "twoinferior", "2082", + "twomonospace", "FF12", + "twonumeratorbengali", "09F5", + "twoparen", "2475", + "twoperiod", "2489", + "twopersian", "06F2", + "tworoman", "2171", + "twostroke", "01BB", + "twosuperior", "00B2", + "twothai", "0E52", + "twothirds", "2154", + "u", "0075", + "uacute", "00FA", + "ubar", "0289", + "ubengali", "0989", + "ubopomofo", "3128", + "ubreve", "016D", + "ucaron", "01D4", + "ucircle", "24E4", + "ucircumflex", "00FB", + "ucircumflexbelow", "1E77", + "ucyrillic", "0443", + "udattadeva", "0951", + "udblacute", "0171", + "udblgrave", "0215", + "udeva", "0909", + "udieresis", "00FC", + "udieresisacute", "01D8", + "udieresisbelow", "1E73", + "udieresiscaron", "01DA", + "udieresiscyrillic", "04F1", + "udieresisgrave", "01DC", + "udieresismacron", "01D6", + "udotbelow", "1EE5", + "ugrave", "00F9", + "ugujarati", "0A89", + "ugurmukhi", "0A09", + "uhiragana", "3046", + "uhookabove", "1EE7", + "uhorn", "01B0", + "uhornacute", "1EE9", + "uhorndotbelow", "1EF1", + "uhorngrave", "1EEB", + "uhornhookabove", "1EED", + "uhorntilde", "1EEF", + "uhungarumlaut", "0171", + "uhungarumlautcyrillic", "04F3", + "uinvertedbreve", "0217", + "ukatakana", "30A6", + "ukatakanahalfwidth", "FF73", + "ukcyrillic", "0479", + "ukorean", "315C", + "umacron", "016B", + "umacroncyrillic", "04EF", + "umacrondieresis", "1E7B", + "umatragurmukhi", "0A41", + "umonospace", "FF55", + "underscore", "005F", + "underscoredbl", "2017", + "underscoremonospace", "FF3F", + "underscorevertical", "FE33", + "underscorewavy", "FE4F", + "union", "222A", + "universal", "2200", + "uogonek", "0173", + "uparen", "24B0", + "upblock", "2580", + "upperdothebrew", "05C4", + "upsilon", "03C5", + "upsilondieresis", "03CB", + "upsilondieresistonos", "03B0", + "upsilonlatin", "028A", + "upsilontonos", "03CD", + "uptackbelowcmb", "031D", + "uptackmod", "02D4", + "uragurmukhi", "0A73", + "uring", "016F", + "ushortcyrillic", "045E", + "usmallhiragana", "3045", + "usmallkatakana", "30A5", + "usmallkatakanahalfwidth", "FF69", + "ustraightcyrillic", "04AF", + "ustraightstrokecyrillic", "04B1", + "utilde", "0169", + "utildeacute", "1E79", + "utildebelow", "1E75", + "uubengali", "098A", + "uudeva", "090A", + "uugujarati", "0A8A", + "uugurmukhi", "0A0A", + "uumatragurmukhi", "0A42", + "uuvowelsignbengali", "09C2", + "uuvowelsigndeva", "0942", + "uuvowelsigngujarati", "0AC2", + "uvowelsignbengali", "09C1", + "uvowelsigndeva", "0941", + "uvowelsigngujarati", "0AC1", + "v", "0076", + "vadeva", "0935", + "vagujarati", "0AB5", + "vagurmukhi", "0A35", + "vakatakana", "30F7", + "vav", "05D5", + "vavdagesh", "FB35", + "vavdagesh65", "FB35", + "vavdageshhebrew", "FB35", + "vavhebrew", "05D5", + "vavholam", "FB4B", + "vavholamhebrew", "FB4B", + "vavvavhebrew", "05F0", + "vavyodhebrew", "05F1", + "vcircle", "24E5", + "vdotbelow", "1E7F", + "vecyrillic", "0432", + "veharabic", "06A4", + "vehfinalarabic", "FB6B", + "vehinitialarabic", "FB6C", + "vehmedialarabic", "FB6D", + "vekatakana", "30F9", + "venus", "2640", + "verticalbar", "007C", + "verticallineabovecmb", "030D", + "verticallinebelowcmb", "0329", + "verticallinelowmod", "02CC", + "verticallinemod", "02C8", + "vewarmenian", "057E", + "vhook", "028B", + "vikatakana", "30F8", + "viramabengali", "09CD", + "viramadeva", "094D", + "viramagujarati", "0ACD", + "visargabengali", "0983", + "visargadeva", "0903", + "visargagujarati", "0A83", + "vmonospace", "FF56", + "voarmenian", "0578", + "voicediterationhiragana", "309E", + "voicediterationkatakana", "30FE", + "voicedmarkkana", "309B", + "voicedmarkkanahalfwidth", "FF9E", + "vokatakana", "30FA", + "vparen", "24B1", + "vtilde", "1E7D", + "vturned", "028C", + "vuhiragana", "3094", + "vukatakana", "30F4", + "w", "0077", + "wacute", "1E83", + "waekorean", "3159", + "wahiragana", "308F", + "wakatakana", "30EF", + "wakatakanahalfwidth", "FF9C", + "wakorean", "3158", + "wasmallhiragana", "308E", + "wasmallkatakana", "30EE", + "wattosquare", "3357", + "wavedash", "301C", + "wavyunderscorevertical", "FE34", + "wawarabic", "0648", + "wawfinalarabic", "FEEE", + "wawhamzaabovearabic", "0624", + "wawhamzaabovefinalarabic", "FE86", + "wbsquare", "33DD", + "wcircle", "24E6", + "wcircumflex", "0175", + "wdieresis", "1E85", + "wdotaccent", "1E87", + "wdotbelow", "1E89", + "wehiragana", "3091", + "weierstrass", "2118", + "wekatakana", "30F1", + "wekorean", "315E", + "weokorean", "315D", + "wgrave", "1E81", + "whitebullet", "25E6", + "whitecircle", "25CB", + "whitecircleinverse", "25D9", + "whitecornerbracketleft", "300E", + "whitecornerbracketleftvertical", "FE43", + "whitecornerbracketright", "300F", + "whitecornerbracketrightvertical", "FE44", + "whitediamond", "25C7", + "whitediamondcontainingblacksmalldiamond", "25C8", + "whitedownpointingsmalltriangle", "25BF", + "whitedownpointingtriangle", "25BD", + "whiteleftpointingsmalltriangle", "25C3", + "whiteleftpointingtriangle", "25C1", + "whitelenticularbracketleft", "3016", + "whitelenticularbracketright", "3017", + "whiterightpointingsmalltriangle", "25B9", + "whiterightpointingtriangle", "25B7", + "whitesmallsquare", "25AB", + "whitesmilingface", "263A", + "whitesquare", "25A1", + "whitestar", "2606", + "whitetelephone", "260F", + "whitetortoiseshellbracketleft", "3018", + "whitetortoiseshellbracketright", "3019", + "whiteuppointingsmalltriangle", "25B5", + "whiteuppointingtriangle", "25B3", + "wihiragana", "3090", + "wikatakana", "30F0", + "wikorean", "315F", + "wmonospace", "FF57", + "wohiragana", "3092", + "wokatakana", "30F2", + "wokatakanahalfwidth", "FF66", + "won", "20A9", + "wonmonospace", "FFE6", + "wowaenthai", "0E27", + "wparen", "24B2", + "wring", "1E98", + "wsuperior", "02B7", + "wturned", "028D", + "wynn", "01BF", + "x", "0078", + "xabovecmb", "033D", + "xbopomofo", "3112", + "xcircle", "24E7", + "xdieresis", "1E8D", + "xdotaccent", "1E8B", + "xeharmenian", "056D", + "xi", "03BE", + "xmonospace", "FF58", + "xparen", "24B3", + "xsuperior", "02E3", + "y", "0079", + "yaadosquare", "334E", + "yabengali", "09AF", + "yacute", "00FD", + "yadeva", "092F", + "yaekorean", "3152", + "yagujarati", "0AAF", + "yagurmukhi", "0A2F", + "yahiragana", "3084", + "yakatakana", "30E4", + "yakatakanahalfwidth", "FF94", + "yakorean", "3151", + "yamakkanthai", "0E4E", + "yasmallhiragana", "3083", + "yasmallkatakana", "30E3", + "yasmallkatakanahalfwidth", "FF6C", + "yatcyrillic", "0463", + "ycircle", "24E8", + "ycircumflex", "0177", + "ydieresis", "00FF", + "ydotaccent", "1E8F", + "ydotbelow", "1EF5", + "yeharabic", "064A", + "yehbarreearabic", "06D2", + "yehbarreefinalarabic", "FBAF", + "yehfinalarabic", "FEF2", + "yehhamzaabovearabic", "0626", + "yehhamzaabovefinalarabic", "FE8A", + "yehhamzaaboveinitialarabic", "FE8B", + "yehhamzaabovemedialarabic", "FE8C", + "yehinitialarabic", "FEF3", + "yehmedialarabic", "FEF4", + "yehmeeminitialarabic", "FCDD", + "yehmeemisolatedarabic", "FC58", + "yehnoonfinalarabic", "FC94", + "yehthreedotsbelowarabic", "06D1", + "yekorean", "3156", + "yen", "00A5", + "yenmonospace", "FFE5", + "yeokorean", "3155", + "yeorinhieuhkorean", "3186", + "yerahbenyomohebrew", "05AA", + "yerahbenyomolefthebrew", "05AA", + "yericyrillic", "044B", + "yerudieresiscyrillic", "04F9", + "yesieungkorean", "3181", + "yesieungpansioskorean", "3183", + "yesieungsioskorean", "3182", + "yetivhebrew", "059A", + "ygrave", "1EF3", + "yhook", "01B4", + "yhookabove", "1EF7", + "yiarmenian", "0575", + "yicyrillic", "0457", + "yikorean", "3162", + "yinyang", "262F", + "yiwnarmenian", "0582", + "ymonospace", "FF59", + "yod", "05D9", + "yoddagesh", "FB39", + "yoddageshhebrew", "FB39", + "yodhebrew", "05D9", + "yodyodhebrew", "05F2", + "yodyodpatahhebrew", "FB1F", + "yohiragana", "3088", + "yoikorean", "3189", + "yokatakana", "30E8", + "yokatakanahalfwidth", "FF96", + "yokorean", "315B", + "yosmallhiragana", "3087", + "yosmallkatakana", "30E7", + "yosmallkatakanahalfwidth", "FF6E", + "yotgreek", "03F3", + "yoyaekorean", "3188", + "yoyakorean", "3187", + "yoyakthai", "0E22", + "yoyingthai", "0E0D", + "yparen", "24B4", + "ypogegrammeni", "037A", + "ypogegrammenigreekcmb", "0345", + "yr", "01A6", + "yring", "1E99", + "ysuperior", "02B8", + "ytilde", "1EF9", + "yturned", "028E", + "yuhiragana", "3086", + "yuikorean", "318C", + "yukatakana", "30E6", + "yukatakanahalfwidth", "FF95", + "yukorean", "3160", + "yusbigcyrillic", "046B", + "yusbigiotifiedcyrillic", "046D", + "yuslittlecyrillic", "0467", + "yuslittleiotifiedcyrillic", "0469", + "yusmallhiragana", "3085", + "yusmallkatakana", "30E5", + "yusmallkatakanahalfwidth", "FF6D", + "yuyekorean", "318B", + "yuyeokorean", "318A", + "yyabengali", "09DF", + "yyadeva", "095F", + "z", "007A", + "zaarmenian", "0566", + "zacute", "017A", + "zadeva", "095B", + "zagurmukhi", "0A5B", + "zaharabic", "0638", + "zahfinalarabic", "FEC6", + "zahinitialarabic", "FEC7", + "zahiragana", "3056", + "zahmedialarabic", "FEC8", + "zainarabic", "0632", + "zainfinalarabic", "FEB0", + "zakatakana", "30B6", + "zaqefgadolhebrew", "0595", + "zaqefqatanhebrew", "0594", + "zarqahebrew", "0598", + "zayin", "05D6", + "zayindagesh", "FB36", + "zayindageshhebrew", "FB36", + "zayinhebrew", "05D6", + "zbopomofo", "3117", + "zcaron", "017E", + "zcircle", "24E9", + "zcircumflex", "1E91", + "zcurl", "0291", + "zdot", "017C", + "zdotaccent", "017C", + "zdotbelow", "1E93", + "zecyrillic", "0437", + "zedescendercyrillic", "0499", + "zedieresiscyrillic", "04DF", + "zehiragana", "305C", + "zekatakana", "30BC", + "zero", "0030", + "zeroarabic", "0660", + "zerobengali", "09E6", + "zerodeva", "0966", + "zerogujarati", "0AE6", + "zerogurmukhi", "0A66", + "zerohackarabic", "0660", + "zeroinferior", "2080", + "zeromonospace", "FF10", + "zeropersian", "06F0", + "zerosuperior", "2070", + "zerothai", "0E50", + "zerowidthjoiner", "FEFF", + "zerowidthnonjoiner", "200C", + "zerowidthspace", "200B", + "zeta", "03B6", + "zhbopomofo", "3113", + "zhearmenian", "056A", + "zhebrevecyrillic", "04C2", + "zhecyrillic", "0436", + "zhedescendercyrillic", "0497", + "zhedieresiscyrillic", "04DD", + "zihiragana", "3058", + "zikatakana", "30B8", + "zinorhebrew", "05AE", + "zlinebelow", "1E95", + "zmonospace", "FF5A", + "zohiragana", "305E", + "zokatakana", "30BE", + "zparen", "24B5", + "zretroflexhook", "0290", + "zstroke", "01B6", + "zuhiragana", "305A", + "zukatakana", "30BA", +); diff --git a/src/utils/afmtodit/make-afmtodit-tables b/src/utils/afmtodit/make-afmtodit-tables new file mode 100755 index 0000000..937bb72 --- /dev/null +++ b/src/utils/afmtodit/make-afmtodit-tables @@ -0,0 +1,139 @@ +#! /bin/sh +# +# make-afmtodit-tables -- script for creating the 'unicode_decomposed' +# and 'AGL_to_unicode' tables +# +# Copyright (C) 2005-2020 Free Software Foundation, Inc. +# Written by Werner Lemberg <wl@gnu.org> +# +# This file is part of groff. +# +# groff is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# groff is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +# for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +# +# usage: +# +# make-afmtodit-tables \ +# UnicodeData.txt version-string glyphlist.txt > afmtodit.in +# +# 'UnicodeData.txt' is the central database file from the Unicode +# standard. Unfortunately, it doesn't contain a version number, which +# must be thus provided manually as an additional parameter. +# +# 'glyphlist.txt' holds the Adobe Glyph List (AGL). +# +# This program needs a C preprocessor. +# + +if [ $# -ne 3 ] +then + echo "usage: $0 UnicodeData.txt UNICODE-VERSION-STRING" \ + "glyphlist.txt > afmtodit.tables" + exit 2 +fi + +unicode_data="$1" +unicode_version="$2" +glyph_list="$3" + +for f in "$1" "$3" +do + if ! [ -r "$f" ] + then + echo "$0: '$f' does not exist or is not readable" >&2 + exit 1 + fi +done + +# Handle UnicodeData.txt. +# +# Remove ranges and control characters, +# then extract the decomposition field, +# then remove lines without decomposition, +# then remove all compatibility decompositions. +cat "$1" \ +| sed -e '/^[^;]*;</d' \ +| sed -e 's/;[^;]*;[^;]*;[^;]*;[^;]*;\([^;]*\);.*$/;\1/' \ +| sed -e '/^[^;]*;$/d' \ +| sed -e '/^[^;]*;</d' > $$1 + +# Prepare input for running cpp. +cat $$1 \ +| sed -e 's/^\([^;]*\);/#define \1 /' \ + -e 's/ / u/g' > $$2 +cat $$1 \ +| sed -e 's/^\([^;]*\);.*$/\1 u\1/' >> $$2 + +# Run C preprocessor to recursively decompose. +"${CPP:-cpp}" $$2 $$3 + +# Convert it back to original format. +cat $$3 \ +| sed -e '/#/d' \ + -e '/^$/d' \ + -e 's/ \+/ /g' \ + -e 's/ *$//' \ + -e 's/u//g' \ + -e 's/^\([^ ]*\) /\1;/' > $$4 + +# Write comment. +cat <<END +# This table was algorithmically derived from the file 'UnicodeData.txt' +# for Unicode $unicode_version, available from unicode.org, +# on `date '+%Y-%m-%d'`. +END + +# Emit first table. +echo 'my %unicode_decomposed = (' +cat $$4 \ +| sed -e 's/ /_/g' \ + -e 's/\(.*\);\(.*\)/ "\1", "\2",/' +echo ');' +echo '' + +# Write comment. +cat <<END +# This table was algorithmically derived from the Adobe Glyph List (AGL) +# file 'glyphlist.txt' from the GitHub Adobe Type Tools agl-aglfn +# project, on `date '+%Y-%m-%d'`. +# +# See "groff:" comments for altered mappings. +END + +# Convert AGL syntax to a chunk of Perl. +cat "$3" \ +| sed -e '/#/d' \ + -e 's/ /_/g' \ + -e '/;\(E\|F[0-8]\)/d' \ + -e 's/\(.*\);\(.*\)/ "\1", "\2",/' > $$5 + +# Perform groff replacements. +sed \ + -e 's/\("Delta"\), "2206",$/\1, "0394", # groff: not U+2206/' \ + -e 's/\("Omega"\), "2126",$/\1, "03A9", # groff: not U+2126/' \ + -e 's/\("mu"\), "00B5",$/\1, "03BC", # groff: not U+00B5/' \ + < $$5 > $$6 + +# Emit second table. +echo 'my %AGL_to_unicode = (' +cat $$6 +echo ');' + +# Remove temporary files. +rm $$1 $$2 $$3 $$4 $$5 $$6 + +# Local Variables: +# fill-column: 72 +# End: +# vim: set textwidth=72: diff --git a/src/utils/grog/grog.1.man b/src/utils/grog/grog.1.man new file mode 100644 index 0000000..efcd728 --- /dev/null +++ b/src/utils/grog/grog.1.man @@ -0,0 +1,628 @@ +.TH grog @MAN1EXT@ "@MDATE@" "groff @VERSION@" +.SH Name +grog \- \(lqgroff guess\(rq\(eminfer the +.I groff +command a document requires +. +. +.\" ==================================================================== +.\" Legal Terms +.\" ==================================================================== +.\" +.\" Copyright (C) 1989-2021 Free Software Foundation, Inc. +.\" +.\" This file is part of grog, which is part of groff, a free software +.\" project. You can redistribute it and/or modify it under the terms +.\" of the GNU General Public License version 2 (GPL2) as published by +.\" the Free Software Foundation. +.\" +.\" groff is distributed in the hope that it will be useful, but WITHOUT +.\" ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +.\" or FITNESS FOR A PARTICULAR PURPOSE. +.\" +.\" The text for GPL2 is available in the internet at +.\" <http://www.gnu.org/licenses/gpl2.0.txt>. +. +. +.\" Save and disable compatibility mode (for, e.g., Solaris 10/11). +.do nr *groff_grog_1_man_C \n[.cp] +.cp 0 +. +.\" Define fallback for groff 1.23's MR macro if the system lacks it. +.nr do-fallback 0 +.if !\n(.f .nr do-fallback 1 \" mandoc +.if \n(.g .if !d MR .nr do-fallback 1 \" older groff +.if !\n(.g .nr do-fallback 1 \" non-groff *roff +.if \n[do-fallback] \{\ +. de MR +. ie \\n(.$=1 \ +. I \%\\$1 +. el \ +. IR \%\\$1 (\\$2)\\$3 +. . +.\} +.rr do-fallback +. +. +.\" ==================================================================== +.SH Synopsis +.\" ==================================================================== +. +.SY grog +.RB [ \-\-run ] +.RB [ \-\-ligatures ] +.RI [ groff-option\~ .\|.\|.\&] +.RB [ \-\- ] +.RI [ file\~ .\|.\|.] +.YS +. +. +.SY grog +.B \-h +. +.SY grog +.B \-\-help +.YS +. +. +.SY grog +.B \-v +. +.SY grog +.B \-\-version +.YS +. +. +.\" ==================================================================== +.SH Description +.\" ==================================================================== +. +.I grog +reads its input +and guesses which +.MR groff @MAN1EXT@ +options are needed to render it. +. +If no operands are given, +or if +.I file +is +.RB \[lq] \- \[rq], +.I grog +reads the standard input stream. +. +The corresponding +.I groff +command is normally written to the standard output stream. +. +With the option +.BR \-\-run , +the inferred command is written to the standard error stream and then +executed. +. +. +.\" ==================================================================== +.SH Options +.\" ==================================================================== +. +.B \-h +and +.B \-\-help +display a usage message, +whereas +.B \-v +and +.B \-\-version +display version information; +all exit afterward. +. +. +.TP +.B \-\-ligatures +includes the arguments +.B \-P\-y \-PU +in the inferred +.I groff +command. +. +These are supported only by the +.B pdf +output device. +. +. +.TP +.B \-\-run +writes the inferred command to the standard error stream and then +executes it. +. +. +.P +All other specified short options +(that is, +arguments beginning with a minus sign +.RB \[lq] \- \[rq] +followed by a letter) +are interpreted as +.I groff +options or option clusters with or without an option argument. +. +Such options are included in the constructed +.I groff +command line. +. +. +.\" ==================================================================== +.SH Details +.\" ==================================================================== +. +.I grog +reads each +.I file +operand, +pattern-matching strings that are statistically likely to be +characteristic of +.MR roff @MAN7EXT@ +documents. +. +It tries to guess which of the following +.I groff +options are required to correctly render the input: +.BR \-e , +.BR \-g , +.BR \-G , +.BR \-j , +.\" gideal is not implemented yet. +.\" .BR \-J , +.BR \-p , +.BR \-R , +.\".BR \-s , +.B \-t +(preprocessors); +and +.BR \-man , +.BR \-mdoc , +.BR \-mdoc\-old , +.BR \-me , +.BR \-mm , +.BR \-mom , +and +.B \-ms +(macro packages). +. +The inferred +.I groff +command including these options and any +.I file +parameters is written to the standard output stream. +. +. +.P +It is possible to specify arbitrary +.I groff +options on the command line. +. +These are included in the inferred command without change. +. +Choices of +.I groff +options include +.B \-C +to enable AT&T +.I troff +compatibility mode and +.B \-T +to select a non-default output device. +. +If the input is not encoded in US-ASCII, +ISO 8859-1, +or IBM code page 1047, +specification of a +.I groff +option to run the +.MR preconv @MAN1EXT@ +preprocessor is advised; +see the +.BR \-D , +.BR \-k , +and +.B \-K +options of +.MR groff @MAN1EXT@ . +. +For UTF-8 input, +.B \-k +is a good choice. +. +. +.P +.I groff +may issue diagnostic messages when an inappropriate +.B \-m +option, +or multiple conflicting ones, +are specified. +. +Consequently, +it is best to specify no +.B \-m +options to +.I grog +unless it cannot correctly infer all of the +.B \-m +arguments a document requires. +. +A +.I roff +document can also be written without recourse to any macro package. +. +In such cases, +.I grog +will infer a +.I groff +command without an +.B \-m +option. +. +. +.\" ==================================================================== +.SS Limitations +.\" ==================================================================== +. +.I grog +presumes that the input does not change the escape, +control, +or no-break control characters. +. +.I grog +does not parse +.I roff +input line continuation or control structures +(brace escape sequences and the +.RB \[lq] if \[rq], +.RB \[lq] ie \[rq], +and +.RB \[lq] el \[rq] +requests) +nor +.IR groff 's +.RB \[lq] while \[rq]. +. +Thus the input +. +.RS +.EX +\&.if \[rs] +t .NH 1 +\&.if n .SH +Introduction +.EE +.RE +. +will conceal the use of the +.I ms +macros +.B NH +and +.B SH +from +.IR grog . +. +Such constructions are regarded by +.IR grog 's +implementors as insufficiently common to cause many inference problems. +. +Preprocessors can be even stricter when matching macro calls that +bracket the regions of an input file they replace. +. +.IR pic , +for example, +requires +.BR PS , +.BR PE , +and +.B PF +calls to immediately follow the default control character at the +beginning of a line. +. +. +.P +Detection of the +.B \-s +option +(the +.MR @g@soelim @MAN1EXT@ +preprocessor) +is tricky; +to correctly infer its necessity would require +.I grog +to recursively open all files given as arguments to the +.B .so +request under the same conditions that +.I @g@soelim +itself does so; +see its man page. +. +Recall that +.I @g@soelim +is necessary only if sourced files need to be preprocessed. +. +Therefore, +as a workaround, +you may want to run the input through +.I @g@soelim +manually, +piping it to +.IR grog , +and compare the output to running +.I grog +on the input directly. +. +If the +.RI \[lq] @g@soelim \[rq]ed +input causes +.I grog +to infer additional preprocessor options, +then +.B \-s +is likely necessary. +. +. +.RS +.P +.EX +$ \c +.B printf \[dq].TS\[rs]nl.\[rs]nI\[aq]m a table.\[rs]n.TE\[rs]n\[dq] > \ +3.roff +$ \c +.B printf \[dq].so 3.roff\[rs]n\[dq] > 2.roff +$ \c +.B printf \[dq].XP\[rs]n.so 2.roff\[rs]n\[dq] > 1.roff +$ \c +.B grog 1.roff +groff \-ms 1.roff +$ \c +.B @g@soelim 1.roff | grog +groff \-t \-ms \- +.EE +.RE +. +. +.P +In the foregoing example, +we see that this procedure enabled +.I grog +to detect +.MR @g@tbl @MAN1EXT@ +macros, +so we would add +.B \-s +as well as the detected +.B \-t +option to a revised +.I grog +or +.I groff +command. +. +. +.RS +.P +.EX +$ \c +.B grog \-st 1.roff +groff \-st \-ms 1.roff +.EE +.RE +. +. +.\" ==================================================================== +.SH "Exit status" +.\" ==================================================================== +. +.I grog +exits with error status +.B 1 +if a macro package appears to be in use by the input document, +but +.I grog +was unable to infer which one, +or +.B 2 +if there were problems handling an option or operand. +. +It otherwise exits with status +.BR 0 . +. +(If the +.B \-\-run +option is specified, +.IR groff 's +exit status is discarded.) +. +Inferring no preprocessors or macro packages is not an error condition; +a valid +.I roff +document need not use either. +. +Even plain text is valid input, +if one is mindful of the syntax of the control and escape characters. +. +. +.\" ==================================================================== +.SH Examples +.\" ==================================================================== +. +Running +. +.RS +.EX +.B grog @DOCDIR@/meintro.me +.EE +.RE +at the command line results in +.RS +.EX +groff \-me @DOCDIR@/meintro.me +.EE +.RE +. +because +.I grog +recognizes that the file +.I meintro.me +is written using macros from the +.I me +package. +. +The command +. +.RS +.EX +.B grog @DOCDIR@/pic.ms +.EE +.RE +. +outputs +. +.RS +.EX +groff \-e \-p \-t \-ms @DOCDIR@/pic.ms +.EE +.RE +. +on the other hand. +. +Besides discerning the +.I ms +macro package, +.I grog +recognizes that the file +.I pic.ms +additionally needs the combination of +.B \-t +for +.IR tbl , +.B \-e +for +.IR eqn , +and +.B \-p +for +.IR pic . +. +. +.\" XXX: grog no longer (June 2021) attempts to detect this scenario. +.\" It's also not a practical one; full-service macro packages don't +.\" generally support being "unloaded" for subsequent processing of +.\" another document using a different one. We do achieve it, with +.\" care, in groff with man(7) and mdoc(7) (see andoc.tmac). +.\" .P +.\" If both of the former example files are combined in the command +.\" . +.\" .RS +.\" .EX +.\" .B grog meintro.me pic.ms +.\" .EE +.\" .RE +.\" . +.\" a diagnostic message is sent to the standard error stream because +.\" some macro packages cannot be combined. +.\" . +.\" Nevertheless the corresponding output with the wrong options is +.\" written to standard output: +.\" . +.\" .RS +.\" .EX +.\" groff \-t \-e \-p \-ms meintro.me pic.ms +.\" .EE +.\" .RE +.\" . +.\" and +.\" .I grog +.\" terminates with an error exit status. +. +. +.P +Consider a file +.IR \%doc/\:\%grnexampl.me , +which uses the +.I @g@grn +preprocessor to include a +.MR gremlin 1 +picture file in an +.I me \" generic +document. +. +Let's say we want to suppress color output, +produce a DVI file, +and get backtraces for any errors that +.I @g@troff +encounters. +. +The command +. +.RS +.EX +.B grog \-bc \-Idoc \-Tdvi doc/grnexmpl.me +.EE +.RE +. +is processed by +.I grog +into +. +.RS +.EX +groff \-bc \-Idoc \-Tdvi \-e \-g \-me doc/grnexmpl.me +.EE +.RE +. +where we can see that +.I grog +has inferred the +.I me \" generic +macro package along with the +.I eqn \" generic +and +.I grn \" generic +preprocessors. +. +(The input file is located in +.I @DOCDIR@ +if you'd like to try this example yourself.) +. +. +.\" ==================================================================== +.SH Authors +.\" ==================================================================== +. +.I grog +was originally written in Bourne shell by James Clark. +. +The current implementation in Perl was written by +.MT groff\-bernd\:.warken\-72@\:web\:.de +Bernd Warken +.ME +and heavily revised by +.MT g.branden\:.robinson@\:gmail\:.com +G.\& Branden Robinson +.ME . +. +. +.\" ==================================================================== +.SH "See also" +.\" ==================================================================== +. +.MR groff @MAN1EXT@ +. +. +.\" Restore compatibility mode (for, e.g., Solaris 10/11). +.cp \n[*groff_grog_1_man_C] +.do rr *groff_grog_1_man_C +. +. +.\" Local Variables: +.\" fill-column: 72 +.\" mode: nroff +.\" End: +.\" vim: set filetype=groff textwidth=72: diff --git a/src/utils/grog/grog.am b/src/utils/grog/grog.am new file mode 100644 index 0000000..f7ca5eb --- /dev/null +++ b/src/utils/grog/grog.am @@ -0,0 +1,50 @@ +# Copyright (C) 1993-2021 Free Software Foundation, Inc. +# +# This file is part of groff. +# +# groff is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation, either version 3 of the License, or (at your +# option) any later version. +# +# groff is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +# for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +grog_srcdir = $(top_srcdir)/src/utils/grog +bin_SCRIPTS += grog +man1_MANS += src/utils/grog/grog.1 +EXTRA_DIST += \ + src/utils/grog/grog.1.man \ + src/utils/grog/grog.pl \ + src/utils/grog/tests/foo.man + +grog: $(grog_srcdir)/grog.pl $(SH_DEPS_SED_SCRIPT) + $(AM_V_GEN)$(RM) $@ \ + && sed -f "$(SH_DEPS_SED_SCRIPT)" \ + -e "s|[@]PERL[@]|$(PERL)|" \ + -e "s|[@]VERSION[@]|$(VERSION)|" \ + -e "$(SH_SCRIPT_SED_CMD)" \ + $(grog_srcdir)/grog.pl \ + >$@ \ + && chmod +x $@ + +grog_TESTS = \ + src/utils/grog/tests/PF-does-not-start-pic-region.sh \ + src/utils/grog/tests/avoid-refer-fakeout.sh \ + src/utils/grog/tests/preserve-groff-options.sh \ + src/utils/grog/tests/recognize-perl-pod.sh \ + src/utils/grog/tests/smoke-test.sh +TESTS += $(grog_TESTS) +EXTRA_DIST += $(grog_TESTS) + + +# Local Variables: +# mode: makefile-automake +# fill-column: 72 +# End: +# vim: set autoindent filetype=automake textwidth=72: diff --git a/src/utils/grog/grog.pl b/src/utils/grog/grog.pl new file mode 100644 index 0000000..28973c5 --- /dev/null +++ b/src/utils/grog/grog.pl @@ -0,0 +1,721 @@ +#!@PERL@ +# grog - guess options for groff command +# Inspired by doctype script in Kernighan & Pike, Unix Programming +# Environment, pp 306-8. + +# Copyright (C) 1993-2021 Free Software Foundation, Inc. +# Written by James Clark. +# Rewritten in Perl by Bernd Warken <groff-bernd.warken-72@web.de>. +# Hacked up by G. Branden Robinson, 2021. + +# This file is part of 'grog', which is part of 'groff'. + +# 'groff' is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 2 of the License, or +# (at your option) any later version. + +# 'groff' is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see +# <http://www.gnu.org/licenses/gpl-2.0.html>. + +use warnings; +use strict; + +use File::Spec; + +my $groff_version = 'DEVELOPMENT'; + +my @command = (); # the constructed groff command +my @requested_package = (); # arguments to '-m' grog options +my @inferred_preprocessor = (); # preprocessors the document uses +my @inferred_main_package = (); # full-service package(s) detected +my $main_package; # full-service package we go with +my $do_run = 0; # run generated 'groff' command +my $use_compatibility_mode = 0; # is -C being passed to groff? + +my %preprocessor_for_macro = ( + 'EQ', 'eqn', + 'G1', 'grap', + 'GS', 'grn', + 'PS', 'pic', + '[', 'refer', + #'so', 'soelim', # Can't be inferred this way; see grog man page. + 'TS', 'tbl', + 'cstart', 'chem', + 'lilypond', 'glilypond', + 'Perl', 'gperl', + 'pinyin', 'gpinyin', +); + +my $program_name = $0; +{ + my ($v, $d, $f) = File::Spec->splitpath($program_name); + $program_name = $f; +} + +my %user_macro; +my %score = (); + +my @input_file; + +# .TH is both a man(7) macro and often used with tbl(1). We expect to +# find .TH in ms(7) documents only between .TS and .TE calls, and in +# man(7) documents only as the first macro call. +my $have_seen_first_macro_call = 0; +# man(7) and ms(7) use many of the same macro names; do extra checking. +my $man_score = 0; +my $ms_score = 0; + +my $had_inference_problem = 0; +my $had_processing_problem = 0; +my $have_any_valid_arguments = 0; + + +sub fail { + my $text = shift; + print STDERR "$program_name: error: $text\n"; + $had_processing_problem = 1; +} + + +sub warn { + my $text = shift; + print STDERR "$program_name: warning: $text\n"; +} + + +sub process_arguments { + my $no_more_options = 0; + my $delayed_option = ''; + my $was_minus = 0; + my $optarg = 0; + my $pdf_with_ligatures = 0; + + foreach my $arg (@ARGV) { + if ( $optarg ) { + push @command, $arg; + $optarg = 0; + next; + } + + if ($no_more_options) { + push @input_file, $arg; + next; + } + + if ($delayed_option) { + if ($delayed_option eq '-m') { + push @requested_package, $arg; + $arg = ''; + } else { + push @command, $delayed_option; + } + + push @command, $arg if $arg; + $delayed_option = ''; + next; + } + + unless ( $arg =~ /^-/ ) { # file name, no opt, no optarg + push @input_file, $arg; + next; + } + + # now $arg starts with '-' + + if ($arg eq '-') { + unless ($was_minus) { + push @input_file, $arg; + $was_minus = 1; + } + next; + } + + if ($arg eq '--') { + $no_more_options = 1; + next; + } + + # Handle options that cause an early exit. + &version() if ($arg eq '-v' || $arg eq '--version'); + &usage(0) if ($arg eq '-h' || $arg eq '--help'); + + if ($arg =~ '^--.') { + if ($arg =~ '^--(run|with-ligatures)$') { + $do_run = 1 if ($arg eq '--run'); + $pdf_with_ligatures = 1 if ($arg eq '--with-ligatures'); + } else { + &fail("unrecognized grog option '$arg'; ignored"); + &usage(1); + } + next; + } + + # Handle groff options that take an argument. + + # Handle the option argument being separated by whitespace. + if ($arg =~ /^-[dfFIKLmMnoPrTwW]$/) { + $delayed_option = $arg; + next; + } + + # Handle '-m' option without subsequent whitespace. + if ($arg =~ /^-m/) { + my $package = $arg; + $package =~ s/-m//; + push @requested_package, $package; + next; + } + + # Treat anything else as (possibly clustered) groff options that + # take no arguments. + + # Our do_line() needs to know if it should do compatibility parsing. + $use_compatibility_mode = 1 if ($arg =~ /C/); + + push @command, $arg; + } + + if ($pdf_with_ligatures) { + push @command, '-P-y'; + push @command, '-PU'; + } + + @input_file = ('-') unless (@input_file); +} # process_arguments() + + +sub process_input { + foreach my $file (@input_file) { + unless ( open(FILE, $file eq "-" ? $file : "< $file") ) { + &fail("cannot open '$file': $!"); + next; + } + + $have_any_valid_arguments = 1; + + while (my $line = <FILE>) { + chomp $line; + &do_line($line); + } + + close(FILE); + } # end foreach +} # process_input() + + +# Push item onto inferred full-service list only if not already present. +sub push_main_package { + my $pkg = shift; + if (!grep(/^$pkg/, @inferred_main_package)) { + push @inferred_main_package, $pkg; + } +} # push_main_package() + + +sub do_line { + my $command; # request or macro name + my $args; # request or macro arguments + + my $line = shift; + + # Check for a Perl Pod::Man comment. + # + # An alternative to this kludge is noted below: if a "standard" macro + # is redefined, we could delete it from the relevant lists and + # hashes. + if ($line =~ /\\\" Automatically generated by Pod::Man/) { + $man_score += 100; + } + + # Strip comments. + $line =~ s/\\".*//; + $line =~ s/\\#.*// unless $use_compatibility_mode; + + return unless ($line =~ /^[.']/); # Ignore text lines. + + # Perform preprocessor checks; they scan their inputs using a rump + # interpretation of roff(7) syntax that requires the default control + # character and no space between it and the macro name. In AT&T + # compatibility mode, no space (or newline!) is required after the + # macro name, either. We mimic the preprocessors themselves; eqn(1), + # for instance, does not recognize '.EN' if '.EQ' has not been seen. + my $boundary = '\\b'; + $boundary = '' if ($use_compatibility_mode); + + if ($line =~ /^\.(\S\S)$boundary/ || $line =~ /^\.(\[)/) { + my $macro = $1; + # groff identifiers can have extremely weird characters in them. + # The ones we care about are conventionally named, but me(7) + # documents can call macros like '+c', so quote carefully. + if (grep(/^\Q$macro\E$/, keys %preprocessor_for_macro)) { + my $preproc = $preprocessor_for_macro{$macro}; + if (!grep(/$preproc/, @inferred_preprocessor)) { + push @inferred_preprocessor, $preproc; + } + } + } + + # Normalize control lines; convert no-break control character to the + # regular one and remove unnecessary whitespace. + $line =~ s/^['.]\s*/./; + $line =~ s/\s+$//; + + return if ($line =~ /^\.$/); # Ignore empty request. + return if ($line =~ /^\.\\?\.$/); # Ignore macro definition ends. + + # Split control line into a request or macro call and its arguments. + + # Handle single-letter macro names. + if ($line =~ /^\.(\S)(\s+(.*))?$/) { + $command = $1; + $args = $2; + # Handle two-letter macro/request names in compatibility mode. + } elsif ($use_compatibility_mode) { + $line =~ /^\.(\S\S)\s*(.*)$/; + $command = $1; + $args = $2; + # Handle multi-letter macro/request names in groff mode. + } else { + $line =~ /^\.(\S+)(\s+(.*))?$/; + $command = $1; + $args = $3; + } + + $command = '' unless ($command); + $args = '' unless ($args); + + ###################################################################### + # user-defined macros + + # If the line calls a user-defined macro, skip it. + return if (exists $user_macro{$command}); + + # These are all requests supported by groff 1.23.0. + my @request = ('ab', 'ad', 'af', 'aln', 'als', 'am', 'am1', 'ami', + 'ami1', 'as', 'as1', 'asciify', 'backtrace', 'bd', + 'blm', 'box', 'boxa', 'bp', 'br', 'brp', 'break', 'c2', + 'cc', 'ce', 'cf', 'cflags', 'ch', 'char', 'chop', + 'class', 'close', 'color', 'composite', 'continue', + 'cp', 'cs', 'cu', 'da', 'de', 'de1', 'defcolor', 'dei', + 'dei1', 'device', 'devicem', 'di', 'do', 'ds', 'ds1', + 'dt', 'ec', 'ecr', 'ecs', 'el', 'em', 'eo', 'ev', + 'evc', 'ex', 'fam', 'fc', 'fchar', 'fcolor', 'fi', + 'fp', 'fschar', 'fspecial', 'ft', 'ftr', 'fzoom', + 'gcolor', 'hc', 'hcode', 'hla', 'hlm', 'hpf', 'hpfa', + 'hpfcode', 'hw', 'hy', 'hym', 'hys', 'ie', 'if', 'ig', + 'in', 'it', 'itc', 'kern', 'lc', 'length', 'linetabs', + 'lf', 'lg', 'll', 'lsm', 'ls', 'lt', 'mc', 'mk', 'mso', + 'msoquiet', 'na', 'ne', 'nf', 'nh', 'nm', 'nn', 'nop', + 'nr', 'nroff', 'ns', 'nx', 'open', 'opena', 'os', + 'output', 'pc', 'pev', 'pi', 'pl', 'pm', 'pn', 'pnr', + 'po', 'ps', 'psbb', 'pso', 'ptr', 'pvs', 'rchar', 'rd', + 'return', 'rfschar', 'rj', 'rm', 'rn', 'rnn', 'rr', + 'rs', 'rt', 'schar', 'shc', 'shift', 'sizes', 'so', + 'soquiet', 'sp', 'special', 'spreadwarn', 'ss', + 'stringdown', 'stringup', 'sty', 'substring', 'sv', + 'sy', 'ta', 'tc', 'ti', 'tkf', 'tl', 'tm', 'tm1', + 'tmc', 'tr', 'trf', 'trin', 'trnt', 'troff', 'uf', + 'ul', 'unformat', 'vpt', 'vs', 'warn', 'warnscale', + 'wh', 'while', 'write', 'writec', 'writem'); + + # Add user-defined macro names to %user_macro. + # + # Macros can also be defined with .dei{,1}, ami{,1}, but supporting + # that would be a heavy lift for the benefit of users that probably + # don't require grog's help. --GBR + if ($command =~ /^(de|am)1?$/) { + my $name = $args; + # Strip off any end macro. + $name =~ s/\s+.*$//; + # Handle special cases of macros starting with '[' or ']'. + if ($name =~ /^[][]/) { + delete $preprocessor_for_macro{'['}; + } + # XXX: If the macro name shadows a standard macro name, maybe we + # should delete the latter from our lists and hashes. This might + # depend on whether the document is trying to remain compatible + # with an existing interface, or simply colliding with names they + # don't care about (consider a raw roff document that defines 'PP'). + # --GBR + $user_macro{$name} = 0 unless (exists $user_macro{$name}); + return; + } + + # XXX: Handle .rm as well? + + # Ignore all other requests. Again, macro names can contain Perl + # regex metacharacters, so be careful. + return if (grep(/^\Q$command\E$/, @request)); + # What remains must be a macro name. + my $macro = $command; + + $have_seen_first_macro_call = 1; + $score{$macro}++; + + + ###################################################################### + # macro package (tmac) + ###################################################################### + + # man and ms share too many macro names for the following approach to + # be fruitful for many documents; see &infer_man_or_ms_package. + # + # We can put one thumb on the scale, however. + if ((!$have_seen_first_macro_call) && ($macro eq 'TH')) { + # TH as the first call in a document screams man(7). + $man_score += 100; + } + + ########## + # mdoc + if ($macro =~ /^Dd$/) { + &push_main_package('doc'); + return; + } + + ########## + # old mdoc + if ($macro =~ /^(Tp|Dp|De|Cx|Cl)$/) { + &push_main_package('doc-old'); + return; + } + + ########## + # me + + if ($macro =~ /^( + [ilnp]p| + n[12]| + sh + )$/x) { + &push_main_package('e'); + return; + } + + + ############# + # mm and mmse + + if ($macro =~ /^( + H| + MULB| + LO| + LT| + NCOL| + PH| + SA + )$/x) { + if ($macro =~ /^LO$/) { + if ( $args =~ /^(DNAMN|MDAT|BIL|KOMP|DBET|BET|SIDOR)/ ) { + &push_main_package('mse'); + return; + } + } elsif ($macro =~ /^LT$/) { + if ( $args =~ /^(SVV|SVH)/ ) { + &push_main_package('mse'); + return; + } + } + &push_main_package('m'); + return; + } + + ########## + # mom + + if ($macro =~ /^( + ALD| + AUTHOR| + CHAPTER_TITLE| + CHAPTER| + COLLATE| + DOCHEADER| + DOCTITLE| + DOCTYPE| + DOC_COVER| + FAMILY| + FAM| + FT| + LEFT| + LL| + LS| + NEWPAGE| + NO_TOC_ENTRY| + PAGENUMBER| + PAGE| + PAGINATION| + PAPER| + PRINTSTYLE| + PT_SIZE| + START| + TITLE| + TOC_AFTER_HERE + TOC| + T_MARGIN| + )$/x) { + &push_main_package('om'); + return; + } +} # do_line() + +my @preprocessor = (); + + +sub infer_preprocessors { + my %option_for_preprocessor = ( + 'eqn', '-e', + 'grap', '-G', + 'grn', '-g', + 'pic', '-p', + 'refer', '-R', + #'soelim', '-s', # Can't be inferred this way; see grog man page. + 'tbl', '-t', + 'chem', '-j' + ); + + # Use a temporary list we can sort later. We want the options to show + # up in a stable order for testing purposes instead of the order their + # macros turn up in the input. groff doesn't care about the order. + my @opt = (); + + foreach my $preproc (@inferred_preprocessor) { + my $preproc_option = $option_for_preprocessor{$preproc}; + + if ($preproc_option) { + push @opt, $preproc_option; + } else { + push @preprocessor, $preproc; + } + } + push @command, sort @opt; +} # infer_preprocessors() + + +# Return true (1) if either the man or ms package is inferred. +sub infer_man_or_ms_package { + my @macro_ms = ('RP', 'TL', 'AU', 'AI', 'DA', 'ND', 'AB', 'AE', + 'QP', 'QS', 'QE', 'XP', + 'NH', + 'R', + 'CW', + 'BX', 'UL', 'LG', 'NL', + 'KS', 'KF', 'KE', 'B1', 'B2', + 'DS', 'DE', 'LD', 'ID', 'BD', 'CD', 'RD', + 'FS', 'FE', + 'OH', 'OF', 'EH', 'EF', 'P1', + 'TA', '1C', '2C', 'MC', + 'XS', 'XE', 'XA', 'TC', 'PX', + 'IX', 'SG'); + + my @macro_man = ('BR', 'IB', 'IR', 'RB', 'RI', 'P', 'TH', 'TP', 'SS', + 'HP', 'PD', + 'AT', 'UC', + 'SB', + 'EE', 'EX', + 'OP', + 'MT', 'ME', 'SY', 'YS', 'TQ', 'UR', 'UE'); + + my @macro_man_or_ms = ('B', 'I', 'BI', + 'DT', + 'RS', 'RE', + 'SH', + 'SM', + 'IP', 'LP', 'PP'); + + for my $key (@macro_man_or_ms, @macro_man, @macro_ms) { + $score{$key} = 0 unless exists $score{$key}; + } + + # Compute a score for each package by counting occurrences of their + # characteristic macros. + foreach my $key (@macro_man_or_ms) { + $man_score += $score{$key}; + $ms_score += $score{$key}; + } + + foreach my $key (@macro_man) { + $man_score += $score{$key}; + } + + foreach my $key (@macro_ms) { + $ms_score += $score{$key}; + } + + if (!$ms_score && !$man_score) { + # The input may be a "raw" roff document; this is not a problem, + # but it does mean no package was inferred. + return 0; + } elsif ($ms_score == $man_score) { + # If there was no TH call, it's not a (valid) man(7) document. + if (!$score{'TH'}) { + &push_main_package('s'); + } else { + &warn("document ambiguous; disambiguate with -man or -ms option"); + $had_inference_problem = 1; + } + return 0; + } elsif ($ms_score > $man_score) { + &push_main_package('s'); + } else { + &push_main_package('an'); + } + + return 1; +} # infer_man_or_ms_package() + + +sub construct_command { + my @main_package = ('an', 'doc', 'doc-old', 'e', 'm', 'om', 's'); + my $file_args_included; # file args now only at 1st preproc + unshift @command, 'groff'; + if (@preprocessor) { + my @progs; + $progs[0] = shift @preprocessor; + push(@progs, @input_file); + for (@preprocessor) { + push @progs, '|'; + push @progs, $_; + } + push @progs, '|'; + unshift @command, @progs; + $file_args_included = 1; + } else { + $file_args_included = 0; + } + + foreach (@command) { + next unless /\s/; + # when one argument has several words, use accents + $_ = "'" . $_ . "'"; + } + + my $have_ambiguous_main_package = 0; + my $inferred_main_package_count = scalar @inferred_main_package; + + # Did we infer multiple full-service packages? + if ($inferred_main_package_count > 1) { + $have_ambiguous_main_package = 1; + # For each one the user explicitly requested... + for my $pkg (@requested_package) { + # ...did it resolve the ambiguity for us? + if (grep(/$pkg/, @inferred_main_package)) { + @inferred_main_package = ($pkg); + $have_ambiguous_main_package = 0; + last; + } + } + } elsif ($inferred_main_package_count == 1) { + $main_package = shift @inferred_main_package; + } + + if ($have_ambiguous_main_package) { + # TODO: Alphabetical is probably not the best ordering here. We + # should tally up scores on a per-package basis generally, not just + # for an and s. + for my $pkg (@main_package) { + if (grep(/$pkg/, @inferred_main_package)) { + $main_package = $pkg; + &warn("document ambiguous (choosing '$main_package'" + . " from '@inferred_main_package'); disambiguate with -m" + . " option"); + $had_inference_problem = 1; + last; + } + } + } + + # If a full-service package was explicitly requested, warn if the + # inference differs from the request. This also ensures that all -m + # arguments are placed in the same order that the user gave them; + # caveat dictator. + my @auxiliary_package_argument = (); + for my $pkg (@requested_package) { + my $is_auxiliary_package = 1; + if (grep(/$pkg/, @main_package)) { + $is_auxiliary_package = 0; + if ($pkg ne $main_package) { + &warn("overriding inferred package '$main_package'" + . " with requested package '$pkg'"); + $main_package = $pkg; + } + } + if ($is_auxiliary_package) { + push @auxiliary_package_argument, "-m" . $pkg; + } + } + + push @command, '-m' . $main_package if ($main_package); + push @command, @auxiliary_package_argument; + push @command, @input_file unless ($file_args_included); + + ######### + # execute the 'groff' command here with option '--run' + if ( $do_run ) { # with --run + print STDERR "@command\n"; + my $cmd = join ' ', @command; + system($cmd); + } else { + print "@command\n"; + } +} # construct_command() + + +sub usage { + my $stream = *STDOUT; + my $had_error = shift; + $stream = *STDERR if $had_error; + my $grog = $program_name; + print $stream "usage: $grog [--ligatures] [--run]" . + " [groff-option ...] [--] [file ...]\n" . + "usage: $grog {-v | --version}\n" . + "usage: $grog {-h | --help}\n"; + unless ($had_error) { + print $stream "\n" . +"Read each roff(7) input FILE and attempt to infer an appropriate\n" . +"groff(1) command to format it. See the grog(1) manual page.\n"; + } + exit $had_error; +} + + +sub version { + print "GNU $program_name (groff) $groff_version\n"; + exit 0; +} # version() + + +# initialize + +my $in_unbuilt_source_tree = 0; +{ + my $at = '@'; + $in_unbuilt_source_tree = 1 if ('@VERSION@' eq "${at}VERSION${at}"); +} + +$groff_version = '@VERSION@' unless ($in_unbuilt_source_tree); + +&process_arguments(); +&process_input(); + +if ($have_any_valid_arguments) { + &infer_preprocessors(); + &infer_man_or_ms_package() if (scalar @inferred_main_package != 1); + &construct_command(); +} + +exit 2 if ($had_processing_problem); +exit 1 if ($had_inference_problem); +exit 0; + +# Local Variables: +# fill-column: 72 +# mode: CPerl +# End: +# vim: set cindent noexpandtab shiftwidth=2 softtabstop=2 textwidth=72: diff --git a/src/utils/grog/tests/PF-does-not-start-pic-region.sh b/src/utils/grog/tests/PF-does-not-start-pic-region.sh new file mode 100755 index 0000000..d3b871f --- /dev/null +++ b/src/utils/grog/tests/PF-does-not-start-pic-region.sh @@ -0,0 +1,33 @@ +#!/bin/sh +# +# Copyright (C) 2021 Free Software Foundation, Inc. +# +# This file is part of groff. +# +# groff is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# groff is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +# for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + +grog="${abs_top_builddir:-.}/grog" + +# Regression test Savannah #60772. +# +# .PF does not _start_ a pic(1) region; it ends one. + +DOC='.PF +.PE' + +echo "$DOC" | "$grog" \ + | grep -Fqx 'groff -' + +# vim:set ai et sw=4 ts=4 tw=72: diff --git a/src/utils/grog/tests/avoid-refer-fakeout.sh b/src/utils/grog/tests/avoid-refer-fakeout.sh new file mode 100755 index 0000000..f163bed --- /dev/null +++ b/src/utils/grog/tests/avoid-refer-fakeout.sh @@ -0,0 +1,34 @@ +#!/bin/sh +# +# Copyright (C) 2021 Free Software Foundation, Inc. +# +# This file is part of groff. +# +# groff is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# groff is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +# for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + +grog="${abs_top_builddir:-.}/grog" + +# Regression-test Savannah #61520. +# +# Don't be fooled by documents (like xterm's ctlseqs.ms) that define +# macros with names that start with '[' or ']'. + +input=".de [] +.. +.[] foo" + +echo "$input" | "$grog" | grep -Fqx 'groff -' + +# vim:set ai et sw=4 ts=4 tw=72: diff --git a/src/utils/grog/tests/foo.man b/src/utils/grog/tests/foo.man new file mode 100644 index 0000000..28e9fe6 --- /dev/null +++ b/src/utils/grog/tests/foo.man @@ -0,0 +1,146 @@ +.\" Automatically generated by Pod::Man 4.10 (Pod::Simple 3.35) +.\" +.\" Standard preamble: +.\" ======================================================================== +.de Sp \" Vertical space (when we can't use .PP) +.if t .sp .5v +.if n .sp +.. +.de Vb \" Begin verbatim text +.ft CW +.nf +.ne \\$1 +.. +.de Ve \" End verbatim text +.ft R +.fi +.. +.\" Set up some character translations and predefined strings. \*(-- will +.\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left +.\" double quote, and \*(R" will give a right double quote. \*(C+ will +.\" give a nicer C++. Capital omega is used to do unbreakable dashes and +.\" therefore won't be available. \*(C` and \*(C' expand to `' in nroff, +.\" nothing in troff, for use with C<>. +.tr \(*W- +.ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p' +.ie n \{\ +. ds -- \(*W- +. ds PI pi +. if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch +. if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch +. ds L" "" +. ds R" "" +. ds C` "" +. ds C' "" +'br\} +.el\{\ +. ds -- \|\(em\| +. ds PI \(*p +. ds L" `` +. ds R" '' +. ds C` +. ds C' +'br\} +.\" +.\" Escape single quotes in literal strings from groff's Unicode transform. +.ie \n(.g .ds Aq \(aq +.el .ds Aq ' +.\" +.\" If the F register is >0, we'll generate index entries on stderr for +.\" titles (.TH), headers (.SH), subsections (.SS), items (.Ip), and index +.\" entries marked with X<> in POD. Of course, you'll have to process the +.\" output yourself in some meaningful fashion. +.\" +.\" Avoid warning from groff about undefined register 'F'. +.de IX +.. +.nr rF 0 +.if \n(.g .if rF .nr rF 1 +.if (\n(rF:(\n(.g==0)) \{\ +. if \nF \{\ +. de IX +. tm Index:\\$1\t\\n%\t"\\$2" +.. +. if !\nF==2 \{\ +. nr % 0 +. nr F 2 +. \} +. \} +.\} +.rr rF +.\" +.\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2). +.\" Fear. Run. Save yourself. No user-serviceable parts. +. \" fudge factors for nroff and troff +.if n \{\ +. ds #H 0 +. ds #V .8m +. ds #F .3m +. ds #[ \f1 +. ds #] \fP +.\} +.if t \{\ +. ds #H ((1u-(\\\\n(.fu%2u))*.13m) +. ds #V .6m +. ds #F 0 +. ds #[ \& +. ds #] \& +.\} +. \" simple accents for nroff and troff +.if n \{\ +. ds ' \& +. ds ` \& +. ds ^ \& +. ds , \& +. ds ~ ~ +. ds / +.\} +.if t \{\ +. ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u" +. ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u' +. ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u' +. ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u' +. ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u' +. ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u' +.\} +. \" troff and (daisy-wheel) nroff accents +.ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V' +.ds 8 \h'\*(#H'\(*b\h'-\*(#H' +.ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#] +.ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H' +.ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u' +.ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#] +.ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#] +.ds ae a\h'-(\w'a'u*4/10)'e +.ds Ae A\h'-(\w'A'u*4/10)'E +. \" corrections for vroff +.if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u' +.if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u' +. \" for low resolution devices (crt and lpr) +.if \n(.H>23 .if \n(.V>19 \ +\{\ +. ds : e +. ds 8 ss +. ds o a +. ds d- d\h'-1'\(ga +. ds D- D\h'-1'\(hy +. ds th \o'bp' +. ds Th \o'LP' +. ds ae ae +. ds Ae AE +.\} +.rm #[ #] #H #V #F C +.\" ======================================================================== +.\" +.IX Title "FOO 1" +.TH FOO 1 "2021-06-30" "perl v5.28.1" "User Contributed Perl Documentation" +.\" For nroff, turn off justification. Always turn off hyphenation; it makes +.\" way too many mistakes in technical documents. +.if n .ad l +.nh +.SH "Name" +.IX Header "Name" +foo \- a frobnicator +.SH "Description" +.IX Header "Description" +This is my program. diff --git a/src/utils/grog/tests/preserve-groff-options.sh b/src/utils/grog/tests/preserve-groff-options.sh new file mode 100755 index 0000000..3290798 --- /dev/null +++ b/src/utils/grog/tests/preserve-groff-options.sh @@ -0,0 +1,30 @@ +#!/bin/sh +# +# Copyright (C) 2021 Free Software Foundation, Inc. +# +# This file is part of groff. +# +# groff is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# groff is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +# for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + +grog="${abs_top_builddir:-.}/grog" + +# Regression test Savannah #57873. +# +# Don't mangle groff options. + +echo | "$grog" -ww -fN -P-pa5 -ra5=0 \ + | grep -Fqx 'groff -ww -fN -P-pa5 -ra5=0 -' + +# vim:set ai et sw=4 ts=4 tw=72: diff --git a/src/utils/grog/tests/recognize-perl-pod.sh b/src/utils/grog/tests/recognize-perl-pod.sh new file mode 100755 index 0000000..bc13ece --- /dev/null +++ b/src/utils/grog/tests/recognize-perl-pod.sh @@ -0,0 +1,31 @@ +#!/bin/sh +# +# Copyright (C) 2021 Free Software Foundation, Inc. +# +# This file is part of groff. +# +# groff is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# groff is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +# for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + +grog="${abs_top_builddir:-.}/grog" +doc="${abs_top_srcdir:-..}/src/utils/grog/tests/foo.man" + +# Regression test Savannah #59622. +# +# Recognize the strongly-accented dialect of man(7) produced by +# pod2man(1). + +"$grog" "$doc" | grep '^groff -man .*/src/utils/grog/tests/foo\.man' + +# vim:set ai et sw=4 ts=4 tw=72: diff --git a/src/utils/grog/tests/smoke-test.sh b/src/utils/grog/tests/smoke-test.sh new file mode 100755 index 0000000..2da1fc4 --- /dev/null +++ b/src/utils/grog/tests/smoke-test.sh @@ -0,0 +1,153 @@ +#!/bin/sh +# +# Copyright (C) 2021 Free Software Foundation, Inc. +# +# This file is part of groff. +# +# groff is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# groff is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +# for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + +set -e + +grog="${abs_top_builddir:-.}/grog" +src="${abs_top_srcdir:-..}" + +doc=src/preproc/eqn/neqn.1 +echo "testing simple man(7) page $doc" >&2 +"$grog" "$doc" | \ + grep -Fqx 'groff -man '"$doc" + +doc=src/preproc/tbl/tbl.1 +echo "testing tbl(1)-using man(7) page $doc" >&2 +"$grog" "$doc" | \ + grep -Fqx 'groff -t -man '"$doc" + +doc=man/groff_diff.7 +echo "testing eqn(1)-using man(7) page $doc" >&2 +"$grog" "$doc" | \ + grep -Fqx 'groff -e -man '"$doc" + +# BUG: grog doesn't yet handle .if, .ie, .while. +#doc=src/preproc/soelim/soelim.1 +#echo "testing pic(1)-using man(7) page $doc" >&2 +#"$grog" "$doc" | \ +# grep -Fqx 'groff -p -man '"$doc" + +doc=tmac/groff_mdoc.7 +echo "testing tbl(1)-using mdoc(7) page $doc" >&2 +"$grog" "$doc" | \ + grep -Fqx 'groff -t -mdoc '"$doc" + +doc=$src/doc/meintro.me.in +echo "testing me(7) document $doc" >&2 +"$grog" "$doc" | \ + grep -Fqx 'groff -me '"$doc" + +doc=$src/doc/meintro_fr.me.in +echo "testing tbl(1)-using me(7) document $doc" >&2 +"$grog" "$doc" | \ + grep -Fqx 'groff -t -me '"$doc" + +doc=$src/doc/meref.me.in +echo "testing me(7) document $doc" >&2 +"$grog" "$doc" | \ + grep -Fqx 'groff -me '"$doc" + +doc=$src/doc/grnexmpl.me +echo "testing grn(1)- and eqn(1)-using me(7) document $doc" >&2 +"$grog" "$doc" | \ + grep -Fqx 'groff -e -g -me '"$doc" + +doc=$src/contrib/mm/examples/letter.mm +echo "testing mm(7) document $doc" >&2 +"$grog" "$doc" | \ + grep -Fqx 'groff -mm '"$doc" + +doc=$src/contrib/mom/examples/copyright-chapter.mom +echo "testing mom(7) document $doc" >&2 +"$grog" "$doc" | \ + grep -Fqx 'groff -mom '"$doc" + +doc=$src/contrib/mom/examples/copyright-default.mom +echo "testing mom(7) document $doc" >&2 +"$grog" "$doc" | \ + grep -Fqx 'groff -mom '"$doc" + +doc=$src/contrib/mom/examples/letter.mom +echo "testing mom(7) document $doc" >&2 +"$grog" "$doc" | \ + grep -Fqx 'groff -mom '"$doc" + +doc=$src/contrib/mom/examples/mom-pdf.mom +echo "testing mom(7) document $doc" >&2 +"$grog" "$doc" | \ + grep -Fqx 'groff -mom '"$doc" + +doc=$src/contrib/mom/examples/mon_premier_doc.mom +echo "testing mom(7) document $doc" >&2 +"$grog" "$doc" | \ + grep -Fqx 'groff -mom '"$doc" + +doc=$src/contrib/mom/examples/sample_docs.mom +echo "testing mom(7) document $doc" >&2 +"$grog" "$doc" | \ + grep -Fqx 'groff -mom '"$doc" + +doc=$src/contrib/mom/examples/slide-demo.mom +echo "testing mom(7) document $doc" >&2 +"$grog" "$doc" | \ + grep -Fqx 'groff -e -p -t -mom '"$doc" + +doc=$src/contrib/mom/examples/typesetting.mom +echo "testing mom(7) document $doc" >&2 +"$grog" "$doc" | \ + grep -Fqx 'groff -mom '"$doc" + +doc=$src/contrib/pdfmark/cover.ms +echo "testing ms(7) document $doc" >&2 +"$grog" "$doc" | \ + grep -Fqx 'groff -ms '"$doc" + +doc=$src/contrib/pdfmark/pdfmark.ms +echo "testing ms(7) document $doc" >&2 +"$grog" "$doc" | \ + grep -Fqx 'groff -ms '"$doc" + +doc=$src/doc/ms.ms +echo "testing eqn(1)- and tbl(1)-using ms(7) document $doc" >&2 +"$grog" "$doc" | \ + grep -Fqx 'groff -e -t -ms '"$doc" + +doc=$src/doc/pic.ms +echo "testing tbl(1)-, eqn(1)-, and pic(1)-using ms(7) document $doc" \ + >&2 +"$grog" "$doc" | \ + grep -Fqx 'groff -e -p -t -ms '"$doc" + +doc=$src/doc/webpage.ms +echo "testing ms(7) document $doc" >&2 +# BUG: Should detect -mwww (and -mpspic?) too. +"$grog" "$doc" | \ + grep -Fqx 'groff -ms '"$doc" + +# Test manual specification of auxiliary macro packages. +echo "testing ms(7) document $doc with '-m www' option" >&2 +"$grog" "$doc" -m www | \ + grep -Fqx 'groff -ms -mwww '"$doc" + +echo "testing ms(7) document $doc with '-mwww' option" >&2 +"$grog" "$doc" -mwww | \ + grep -Fqx 'groff -ms -mwww '"$doc" + +# vim:set ai et sw=4 ts=4 tw=72: diff --git a/src/utils/hpftodit/hpftodit.1.man b/src/utils/hpftodit/hpftodit.1.man new file mode 100644 index 0000000..12e3af7 --- /dev/null +++ b/src/utils/hpftodit/hpftodit.1.man @@ -0,0 +1,476 @@ +.TH hpftodit @MAN1EXT@ "@MDATE@" "groff @VERSION@" +.SH Name +hpftodit \- create font description files for use with +.I groff +and +.I grolj4 +. +. +.\" ==================================================================== +.\" Legal Terms +.\" ==================================================================== +.\" +.\" Copyright (C) 1994-2020 Free Software Foundation, Inc. +.\" +.\" Permission is granted to make and distribute verbatim copies of this +.\" manual provided the copyright notice and this permission notice are +.\" preserved on all copies. +.\" +.\" Permission is granted to copy and distribute modified versions of +.\" this manual under the conditions for verbatim copying, provided that +.\" the entire resulting derived work is distributed under the terms of +.\" a permission notice identical to this one. +.\" +.\" Permission is granted to copy and distribute translations of this +.\" manual into another language, under the above conditions for +.\" modified versions, except that this permission notice may be +.\" included in translations approved by the Free Software Foundation +.\" instead of in the original English. +. +. +.\" Save and disable compatibility mode (for, e.g., Solaris 10/11). +.do nr *groff_hpftodit_1_man_C \n[.cp] +.cp 0 +. +.\" Define fallback for groff 1.23's MR macro if the system lacks it. +.nr do-fallback 0 +.if !\n(.f .nr do-fallback 1 \" mandoc +.if \n(.g .if !d MR .nr do-fallback 1 \" older groff +.if !\n(.g .nr do-fallback 1 \" non-groff *roff +.if \n[do-fallback] \{\ +. de MR +. ie \\n(.$=1 \ +. I \%\\$1 +. el \ +. IR \%\\$1 (\\$2)\\$3 +. . +.\} +.rr do-fallback +. +. +.\" ==================================================================== +.SH Synopsis +.\" ==================================================================== +. +.SY hpftodit +.RB [ \-aqs ] +.RB [ \-i\~\c +.IR n ] +.I tfm-file +.I map-file +.I font-description +.YS +. +. +.SY hpftodit +.B \-d +.I tfm-file +.RI [ map-file ] +.YS +. +. +.SY hpftodit +.B \-\-help +.YS +. +. +.SY hpftodit +.B \-v +. +.SY hpftodit +.B \-\-version +.YS +. +. +.\" ==================================================================== +.SH Description +.\" ==================================================================== +. +.I hpftodit +creates a font description file for use with a Hewlett-Packard +LaserJet\~4-\%series +(or newer) +printer with the +.MR grolj4 @MAN1EXT@ +output driver of +.MR groff @MAN1EXT@ , +using data from an HP tagged font metric (TFM) file. +. +.I tfm-file +is the name of the font's TFM file; +Intellifont and TrueType TFM files are supported, +but symbol set TFM files are not. +. +.I map-file +is a file giving the +.I groff +special character identifiers for glyphs in the font; +this file should consist of a sequence of lines of the form +.RS +.EX +.IR "m u c1 c2 " "\&.\|.\|.\& [#" " comment" "]" +.EE +.RE +where +.I m +is a decimal integer giving the glyph's MSL +(Master Symbol List) +number, +.I u +is a hexadecimal integer giving its Unicode character code, +and +.IR c1 , +.IR c2 ", .\|.\|." +are its +.I groff +glyph names +(see +.MR groff_char @MAN7EXT@ +for a list). +. +The values can be separated by any number of spaces and/or tabs. +. +The Unicode value must use uppercase hexadecimal digits A\^\[en]\^F, +and must lack a leading +.RB \[lq] 0x \[rq], +.RB \[lq] u \[rq], +or +.RB \[lq] U+ \[rq]. +. +Unicode values corresponding to composite glyphs are decomposed; +that is +.RB \[lq] u00C0 \[rq] +becomes +.RB \[lq] u0041_0300 \[rq]. +. +A glyph without a +.I groff +special character identifier may be named +.BI u XXXX +if the glyph corresponds to a Unicode value, +or as an unnamed glyph +.RB \[lq] \-\-\- \[rq]. +. +If the given Unicode value is in the Private Use Area (PUA) +(0xE000\^\[en]\^0xF8FF), +the glyph is included as an unnamed glyph. +. +Refer to +.MR groff_diff @MAN1EXT@ +for additional information about unnamed glyphs and how to access them. +. +. +.P +Blank lines and lines beginning with +.RB \[lq] # \[rq] +are ignored. +. +A +.RB \[lq] # \[rq] +following one or more +.I groff +names begins a comment. +. +Because +.RB \[lq] # \[rq] +is a valid +.I groff +name, +it must appear first in a list of +.I groff +names if a comment is included, +as in +. +.RS +.EX +3 0023 # # number sign +.EE +.RE +. +or +. +.RS +.EX +3 0023 # sh # number sign +.EE +.RE +. +whereas in +. +.RS +.EX +3 0023 sh # # number sign +.EE +.RE +. +the first +.RB \[lq] # \[rq] +is interpreted as the beginning of the comment. +. +. +.P +Output is written in +.MR groff_font @MAN5EXT@ +format to +.I font-description, +a file named for the intended +.I groff +font name; +if this operand is +.RB \[lq] \- \[rq], +the font description is written to the standard output stream. +. +. +.LP +If the +.B \-i +option is used, +.I hpftodit +automatically will generate an italic correction, +a left italic correction, +and a subscript correction for each glyph +(the significance of these parameters is explained in +.MR groff_font @MAN5EXT@ ). +. +. +.\" ==================================================================== +.SH Options +.\" ==================================================================== +. +.B \-\-help +displays a usage message, +while +.B \-v +and +.B \-\-version +show version information; +all exit afterward. +. +. +.TP +.B \-a +Include glyphs in the TFM file that are not included in +.IR map-file . +. +A glyph with corresponding Unicode value is given the name +.RI u XXXX ; +a glyph without a Unicode value is included as an unnamed glyph +\[lq]\-\^\-\^\-\[rq]. +. +A glyph with a Unicode value in the Private Use Area +(0xE000\^\[en]\^0xF8FF) +is also included as an unnamed glyph. +. +. +.IP +This option provides a simple means of adding Unicode-named and +unnamed glyphs to a font without including them in the map file, +but it affords little control over which glyphs are placed in a regular +font and which are placed in a special font. +. +The presence or absence of the +.B \-s +option has some effect on which glyphs are included: +without it, +only the \[lq]text\[rq] symbol sets are searched for matching glyphs; +with it, +only the \[lq]mathematical\[rq] symbol sets are searched. +. +Nonetheless, +restricting the symbol sets searched isn't very selective\[em]many +glyphs are placed in both regular and special fonts. +. +Normally, +.B \-a +should be used only as a last resort. +. +. +.TP +.B \-d +Dump information about the TFM file to the standard output stream; +use this to ensure that a TFM file is a proper match for a font, +and that its contents are suitable. +. +The information includes the values of important TFM tags and a listing +(by MSL number for Intellifont TFM files or by Unicode value for +TrueType TFM files) +of the glyphs included in the TFM file. +. +The unit of measure \[lq]DU\[rq] for some tags indicates design units; +there are 8782\~design units per em for Intellifont fonts, +and 2048\~design units per em for TrueType fonts. +. +Note that the accessibility of a glyph depends on its inclusion in a +symbol set; +some TFM files list many glyphs but only a few symbol sets. +. +. +.IP +The glyph listing includes the glyph index within the TFM file, +the MSL or Unicode value, +and the symbol set and character code that will be used to print the +glyph. +. +If +.I map-file +is given, +.I groff +names are given for matching glyphs. +. +If only the glyph index and MSL or Unicode value are given, +the glyph does not appear in any supported symbol set and cannot be +printed. +. +. +.IP +With the +.B \-d +option, +.I map-file +is optional, +and +.I output-font +is ignored if given. +. +. +.TP +.BI \-i\~ n +Generate an italic correction for each glyph so that its width plus its +italic correction is equal to +.I n +thousandths of an em plus the amount by which the right edge of the +glyphs's bounding box is to the right of its origin. +. +If a negative italic correction would result, +use a zero italic correction instead. +. +. +.IP +Also generate a subscript correction equal to the product of the tangent +of the slant of the font and four fifths of the x-height of the font. +. +If a subscript correction greater than the italic correction would +result, +use a subscript correction equal to the italic correction instead. +. +. +.IP +Also generate a left italic correction for each glyph equal to +.I n +thousandths of an em plus the amount by which the left edge of the +glyphs's bounding box is to the left of its origin. +. +The left italic correction may be negative. +. +. +.IP +This option normally is needed only with italic or oblique fonts; +a value of 50 +(0.05\~em) +usually is a reasonable choice. +. +. +.TP +.B \-q +Suppress warnings about glyphs in the map file that were not found in +the TFM file. +. +Warnings never are given for unnamed glyphs or by glyphs named by their +Unicode values. +. +This option is useful when sending the output of +.I hpftodit +to the standard output stream. +. +. +.TP +.B \-s +Add the +.B special +directive to the font description file, +affecting the order in which HP symbol sets are searched for each glyph. +. +Without this option, +the \[lq]text\[rq] sets are searched before the \[lq]mathematical\[rq] +symbol sets. +. +With it, +the search order is reversed. +. +. +.\" ==================================================================== +.SH Files +.\" ==================================================================== +. +.TP +.I @FONTDIR@/\:\%devlj4/\:DESC +describes the +.B lj4 +output device. +. +. +.TP +.IR @FONTDIR@/\:\%devlj4/ F +describes the font known +.RI as\~ F +on device +.BR lj4 . +. +. +.TP +.I @FONTDIR@/\:\%devlj4/\:\%generate/\:\%Makefile +is a +.MR make 1 +script that uses +.MR hpftodit @MAN1EXT@ +to prepare the +.I groff +font description files above from HP TFM data; +in can be used to regenerate them in the event the TFM files are +updated. +. +. +.TP +.I @FONTDIR@/\:\%devlj4/\:\%generate/\:\%special\:.awk +is an +.MR awk 1 +script that corrects the Intellifont-based height metrics for several +glyphs in the +.B S +(special) font for TrueType CG Times used in the HP LaserJet\~4000 and +later. +. +. +.TP +.I @FONTDIR@/\:\%devlj4/\:\%generate/\:\%special\:.map +.TQ +.I @FONTDIR@/\:\%devlj4/\:\%generate/\:\%symbol\:.map +.TQ +.I @FONTDIR@/\:\%devlj4/\:\%generate/\:text\:.map +.TQ +.I @FONTDIR@/\:\%devlj4/\:\%generate/\:\%wingdings.map +map MSL indices and HP Unicode PUA assignments to +.I groff +special character identifiers. +. +. +.\" ==================================================================== +.SH "See also" +.\" ==================================================================== +. +.MR groff @MAN1EXT@ , +.MR groff_diff @MAN1EXT@ , +.MR grolj4 @MAN1EXT@ , +.MR groff_font @MAN5EXT@ +. +. +.\" Restore compatibility mode (for, e.g., Solaris 10/11). +.cp \n[*groff_hpftodit_1_man_C] +.do rr *groff_hpftodit_1_man_C +. +. +.\" Local Variables: +.\" fill-column: 72 +.\" mode: nroff +.\" End: +.\" vim: set filetype=groff textwidth=72: diff --git a/src/utils/hpftodit/hpftodit.am b/src/utils/hpftodit/hpftodit.am new file mode 100644 index 0000000..e31e8f5 --- /dev/null +++ b/src/utils/hpftodit/hpftodit.am @@ -0,0 +1,34 @@ +# Automake rules for 'src utils hpftodit' +# +# Copyright (C) 2014-2020 Free Software Foundation, Inc. +# +# 'groff' is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 2 of the License, or +# (at your option) any later version. +# +# 'groff' is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see +# <http://www.gnu.org/licenses/gpl-2.0.html>. +# +######################################################################## + +bin_PROGRAMS += hpftodit +man1_MANS += src/utils/hpftodit/hpftodit.1 +EXTRA_DIST += src/utils/hpftodit/hpftodit.1.man +hpftodit_LDADD = libgroff.a $(LIBM) lib/libgnu.a +hpftodit_SOURCES = \ + src/utils/hpftodit/hpftodit.cpp \ + src/utils/hpftodit/hpuni.cpp + + +# Local Variables: +# mode: makefile-automake +# fill-column: 72 +# End: +# vim: set autoindent filetype=automake textwidth=72: diff --git a/src/utils/hpftodit/hpftodit.cpp b/src/utils/hpftodit/hpftodit.cpp new file mode 100644 index 0000000..4982e19 --- /dev/null +++ b/src/utils/hpftodit/hpftodit.cpp @@ -0,0 +1,1465 @@ +/* Copyright (C) 1994-2020 Free Software Foundation, Inc. + Written by James Clark (jjc@jclark.com) + +This file is part of groff. + +groff is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation, either version 3 of the License, or +(at your option) any later version. + +groff is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. */ + +/* +TODO +devise new names for useful characters +option to specify symbol sets to look in +put filename in error messages (or fix lib) +*/ + +#include "lib.h" + +#include <assert.h> +#include <ctype.h> +#include <errno.h> +#include <math.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "posix.h" +#include "errarg.h" +#include "error.h" +#include "cset.h" +#include "nonposix.h" +#include "unicode.h" + +extern "C" const char *Version_string; +extern const char *hp_msl_to_unicode_code(const char *); + +#define SIZEOF(v) (sizeof(v)/sizeof(v[0])) +#define equal(a, b) (strcmp(a, b) == 0) +// only valid if is_uname(c) has returned true +#define is_decomposed(c) strchr(c, '_') + +#define NO 0 +#define YES 1 + +#define MSL 0 +#define SYMSET 1 +#define UNICODE 2 + +#define UNNAMED "---" + +static double multiplier = 3.0; // make Agfa-based unitwidth an integer + +inline +int scale(int n) +{ + return int(n * multiplier + 0.5); +} + +// tags in TFM file + +enum tag_type { + min_tag = 400, + type_tag = 400, + copyright_tag = 401, + comment_tag = 402, + charcode_tag = 403, // MSL for Intellifont, Unicode for TrueType + symbol_set_tag = 404, + unique_identifier_tag = 405, + inches_per_point_tag = 406, + nominal_point_size_tag = 407, + design_units_per_em_tag = 408, + posture_tag = 409, + type_structure_tag = 410, + stroke_weight_tag = 411, + spacing_tag = 412, + slant_tag = 413, + appearance_width_tag = 414, + serif_style_tag = 415, + font_name_tag = 417, + typeface_source_tag = 418, + average_width_tag = 419, + max_width_tag = 420, + word_spacing_tag = 421, + recommended_line_spacing_tag = 422, + cap_height_tag = 423, + x_height_tag = 424, + max_ascent_tag = 425, + max_descent_tag = 426, + lower_ascent_tag = 427, + lower_descent_tag = 428, + underscore_depth_tag = 429, + underscore_thickness_tag = 430, + uppercase_accent_height_tag = 431, + lowercase_accent_height_tag = 432, + width_tag = 433, + vertical_escapement_tag = 434, + left_extent_tag = 435, + right_extent_tag = 436, + ascent_tag = 437, + descent_tag = 438, + pair_kern_tag = 439, + sector_kern_tag = 440, + track_kern_tag = 441, + typeface_tag = 442, + panose_tag = 443, + max_tag = 443 +}; + +const char *tag_name[] = { + "Symbol Set", + "Font Type" // MSL for Intellifont, Unicode for TrueType +}; + +// types in TFM file +enum { + BYTE_TYPE = 1, + ASCII_TYPE = 2, // NUL-terminated string + USHORT_TYPE = 3, + LONG_TYPE = 4, // unused + RATIONAL_TYPE = 5, // 8-byte numerator + 8-byte denominator + SIGNED_BYTE_TYPE = 16, // unused + SIGNED_SHORT_TYPE = 17, + SIGNED_LONG_TYPE = 18 // unused +}; + +typedef unsigned char byte; +typedef unsigned short uint16; +typedef short int16; +typedef unsigned int uint32; + +class File { +public: + File(const char *); + void skip(int n); + byte get_byte(); + uint16 get_uint16(); + uint32 get_uint32(); + uint32 get_uint32(char *orig); + void seek(uint32 n); +private: + unsigned char *buf_; + const unsigned char *ptr_; + const unsigned char *end_; +}; + +struct entry { + char present; + uint16 type; + uint32 count; + uint32 value; + char orig_value[4]; + entry() : present(0) { } +}; + +struct char_info { + uint16 charcode; + uint16 width; + int16 ascent; + int16 descent; + int16 left_extent; + uint16 right_extent; + uint16 symbol_set; + unsigned char code; +}; + +const uint16 NO_GLYPH = 0xffff; +const uint16 NO_SYMBOL_SET = 0; + +struct name_list { + char *name; + name_list *next; + name_list(const char *s, name_list *p) : name(strsave(s)), next(p) { } + ~name_list() { delete[] name; } +}; + +struct symbol_set { + uint16 select; + uint16 index[256]; +}; + +#define SYMBOL_SET(n, c) ((n) * 32 + ((c) - 64)) + +uint16 text_symbol_sets[] = { + SYMBOL_SET(19, 'U'), // Windows Latin 1 ("ANSI", code page 1252) + SYMBOL_SET(9, 'E'), // Windows Latin 2, Code Page 1250 + SYMBOL_SET(5, 'T'), // Code Page 1254 + SYMBOL_SET(7, 'J'), // Desktop + SYMBOL_SET(6, 'J'), // Microsoft Publishing + SYMBOL_SET(0, 'N'), // Latin 1 (subset of 19U, + // so we should never get here) + SYMBOL_SET(2, 'N'), // Latin 2 (subset of 9E, + // so we should never get here) + SYMBOL_SET(8, 'U'), // HP Roman 8 + SYMBOL_SET(10, 'J'), // PS Standard + SYMBOL_SET(9, 'U'), // Windows 3.0 "ANSI" + SYMBOL_SET(1, 'U'), // U.S. Legal + + SYMBOL_SET(12, 'J'), // MC Text + SYMBOL_SET(10, 'U'), // PC Code Page 437 + SYMBOL_SET(11, 'U'), // PC Code Page 437N + SYMBOL_SET(17, 'U'), // PC Code Page 852 + SYMBOL_SET(12, 'U'), // PC Code Page 850 + SYMBOL_SET(9, 'T'), // PC Code Page 437T + 0 +}; + +uint16 special_symbol_sets[] = { + SYMBOL_SET(8, 'M'), // Math 8 + SYMBOL_SET(5, 'M'), // PS Math + SYMBOL_SET(15, 'U'), // Pi font + SYMBOL_SET(13, 'J'), // Ventura International + SYMBOL_SET(19, 'M'), // Symbol font + SYMBOL_SET(579, 'L'), // Wingdings + 0 +}; + +entry tags[max_tag + 1 - min_tag]; + +char_info *char_table; +uint32 nchars = 0; + +unsigned int charcode_name_table_size = 0; +name_list **charcode_name_table = NULL; + +symbol_set *symbol_set_table; +unsigned int n_symbol_sets; + +static int debug_flag = NO; +static int special_flag = NO; // not a special font +static int italic_flag = NO; // don't add italic correction +static int italic_sep; +static int all_flag = NO; // don't include glyphs not in mapfile +static int quiet_flag = NO; // don't suppress warnings about symbols not found + +static char *hp_msl_to_ucode_name(int); +static char *unicode_to_ucode_name(int); +static int is_uname(char *); +static char *show_symset(unsigned int); +static void usage(FILE *); +static void usage(); +static const char *xbasename(const char *); +static void read_tags(File &); +static int check_type(); +static void check_units(File &, const int, double *, double *); +static int read_map(const char *, const int); +static void require_tag(tag_type); +static void dump_ascii(File &, tag_type); +static void dump_tags(File &); +static void dump_symbol_sets(File &); +static void dump_symbols(int); +static void output_font_name(File &); +static void output_spacewidth(); +static void output_pclweight(); +static void output_pclproportional(); +static void read_and_output_pcltypeface(File &); +static void output_pclstyle(); +static void output_slant(); +static void output_ligatures(); +static void read_symbol_sets(File &); +static void read_and_output_kernpairs(File &); +static void output_charset(const int); +static void read_char_table(File &); + +inline +entry &tag_info(tag_type t) +{ + return tags[t - min_tag]; +} + +int +main(int argc, char **argv) +{ + program_name = argv[0]; + + int opt; + int res = 1200; // PCL unit of measure for cursor moves + int scalesize = 4; // LaserJet 4 only allows 1/4 point increments + int unitwidth = 6350; + double ppi; // points per inch + double upem; // design units per em + + static const struct option long_options[] = { + { "help", no_argument, 0, CHAR_MAX + 1 }, + { "version", no_argument, 0, 'v' }, + { NULL, 0, 0, 0 } + }; + while ((opt = getopt_long(argc, argv, "adsqvi:", long_options, NULL)) != EOF) { + switch (opt) { + case 'a': + all_flag = YES; + break; + case 'd': + debug_flag = YES; + break; + case 's': + special_flag = YES; + break; + case 'i': + italic_flag = YES; + italic_sep = atoi(optarg); // design units + break; + case 'q': + quiet_flag = YES; // suppress warnings about symbols not found + break; + case 'v': + printf("GNU hpftodit (groff) version %s\n", Version_string); + exit(0); + break; + case CHAR_MAX + 1: // --help + usage(stdout); + exit(0); + break; + case '?': + usage(); + break; + default: + assert(0); + } + } + + if (debug_flag && argc - optind < 1) + usage(); + else if (!debug_flag && argc - optind != 3) + usage(); + File f(argv[optind]); + read_tags(f); + int tfm_type = check_type(); + if (debug_flag) + dump_tags(f); + if (!debug_flag && !read_map(argv[optind + 1], tfm_type)) + exit(1); + else if (debug_flag && argc - optind > 1) + read_map(argv[optind + 1], tfm_type); + current_filename = NULL; + current_lineno = -1; // no line numbers + if (!debug_flag && !equal(argv[optind + 2], "-")) + if (freopen(argv[optind + 2], "w", stdout) == NULL) + fatal("cannot open '%1': %2", argv[optind + 2], strerror(errno)); + current_filename = argv[optind]; + + check_units(f, tfm_type, &ppi, &upem); + if (tfm_type == UNICODE) // don't calculate for Intellifont TFMs + multiplier = double(res) / upem / ppi * unitwidth / scalesize; + if (italic_flag) + // convert from thousandths of an em to design units + italic_sep = int(italic_sep * upem / 1000 + 0.5); + + read_char_table(f); + if (nchars == 0) + fatal("no characters"); + + if (!debug_flag) { + output_font_name(f); + printf("name %s\n", xbasename(argv[optind + 2])); + if (special_flag) + printf("special\n"); + output_spacewidth(); + output_slant(); + read_and_output_pcltypeface(f); + output_pclproportional(); + output_pclweight(); + output_pclstyle(); + } + read_symbol_sets(f); + if (debug_flag) + dump_symbols(tfm_type); + else { + output_ligatures(); + read_and_output_kernpairs(f); + output_charset(tfm_type); + } + return 0; +} + +static void +usage(FILE *stream) +{ + fprintf(stream, +"usage: %s [-aqs] [-i n] tfm-file map-file output-font\n" +"usage: %s -d tfm-file [map-file]\n" +"usage: %s {-v | --version}\n" +"usage: %s --help\n", + program_name, program_name, program_name, program_name); +} + +static void +usage() +{ + usage(stderr); + exit(1); +} + +File::File(const char *s) +{ + // We need to read the file in binary mode because hpftodit relies + // on byte counts. + int fd = open(s, O_RDONLY | O_BINARY); + if (fd < 0) + fatal("cannot open '%1': %2", s, strerror(errno)); + current_filename = s; + struct stat sb; + if (fstat(fd, &sb) < 0) + fatal("cannot stat: %1", strerror(errno)); + if (!S_ISREG(sb.st_mode)) + fatal("not a regular file"); + buf_ = new unsigned char[sb.st_size]; + long nread = read(fd, buf_, sb.st_size); + if (nread < 0) + fatal("read error: %1", strerror(errno)); + if (nread != sb.st_size) + fatal("read unexpected number of bytes"); + ptr_ = buf_; + end_ = buf_ + sb.st_size; +} + +void +File::skip(int n) +{ + if (end_ - ptr_ < n) + fatal("unexpected end of file"); + ptr_ += n; +} + +void +File::seek(uint32 n) +{ + if (uint32(end_ - buf_) < n) + fatal("unexpected end of file"); + ptr_ = buf_ + n; +} + +byte +File::get_byte() +{ + if (ptr_ >= end_) + fatal("unexpected end of file"); + return *ptr_++; +} + +uint16 +File::get_uint16() +{ + if (end_ - ptr_ < 2) + fatal("unexpected end of file"); + uint16 n = *ptr_++; + return n + (*ptr_++ << 8); +} + +uint32 +File::get_uint32() +{ + if (end_ - ptr_ < 4) + fatal("unexpected end of file"); + uint32 n = *ptr_++; + for (int i = 0; i < 3; i++) + n += *ptr_++ << (i + 1)*8; + return n; +} + +uint32 +File::get_uint32(char *orig) +{ + if (end_ - ptr_ < 4) + fatal("unexpected end of file"); + unsigned char v = *ptr_++; + uint32 n = v; + orig[0] = v; + for (int i = 1; i < 4; i++) { + v = *ptr_++; + orig[i] = v; + n += v << i*8; + } + return n; +} + +static void +read_tags(File &f) +{ + if (f.get_byte() != 'I' || f.get_byte() != 'I') + fatal("not an Intel format TFM file"); + f.skip(6); + uint16 ntags = f.get_uint16(); + entry dummy; + for (uint16 i = 0; i < ntags; i++) { + uint16 tag = f.get_uint16(); + entry *p; + if (min_tag <= tag && tag <= max_tag) + p = tags + (tag - min_tag); + else + p = &dummy; + p->present = 1; + p->type = f.get_uint16(); + p->count = f.get_uint32(); + p->value = f.get_uint32(p->orig_value); + } +} + +static int +check_type() +{ + require_tag(type_tag); + int tfm_type = tag_info(type_tag).value; + switch (tfm_type) { + case MSL: + case UNICODE: + break; + case SYMSET: + fatal("cannot handle Symbol Set TFM files"); + break; + default: + fatal("unknown type tag %1", tfm_type); + } + return tfm_type; +} + +static void +check_units(File &f, const int tfm_type, double *ppi, double *upem) +{ + require_tag(design_units_per_em_tag); + f.seek(tag_info(design_units_per_em_tag).value); + uint32 num = f.get_uint32(); + uint32 den = f.get_uint32(); + if (tfm_type == MSL && (num != 8782 || den != 1)) + fatal("design units per em != 8782/1"); + *upem = double(num) / den; + require_tag(inches_per_point_tag); + f.seek(tag_info(inches_per_point_tag).value); + num = f.get_uint32(); + den = f.get_uint32(); + if (tfm_type == MSL && (num != 100 || den != 7231)) + fatal("inches per point not 100/7231"); + *ppi = double(den) / num; +} + +static void +require_tag(tag_type t) +{ + if (!tag_info(t).present) + fatal("tag %1 missing", int(t)); +} + +// put a human-readable font name in the file +static void +output_font_name(File &f) +{ + char *p; + + if (!tag_info(font_name_tag).present) + return; + int count = tag_info(font_name_tag).count; + char *font_name = new char[count]; + + if (count > 4) { // value is a file offset to the string + f.seek(tag_info(font_name_tag).value); + int n = count; + p = font_name; + while (--n) + *p++ = f.get_byte(); + } + else // orig_value contains the string + sprintf(font_name, "%.*s", + count, tag_info(font_name_tag).orig_value); + + // remove any trailing space + p = font_name + count - 1; + while (csspace(*--p)) + ; + *(p + 1) = '\0'; + printf("# %s\n", font_name); + delete[] font_name; +} + +static void +output_spacewidth() +{ + require_tag(word_spacing_tag); + printf("spacewidth %d\n", scale(tag_info(word_spacing_tag).value)); +} + +static void +read_symbol_sets(File &f) +{ + uint32 symbol_set_dir_length = tag_info(symbol_set_tag).count; + uint16 *symbol_set_selectors; + n_symbol_sets = symbol_set_dir_length/14; + symbol_set_table = new symbol_set[n_symbol_sets]; + unsigned int i; + + for (i = 0; i < nchars; i++) + char_table[i].symbol_set = NO_SYMBOL_SET; + + for (i = 0; i < n_symbol_sets; i++) { + f.seek(tag_info(symbol_set_tag).value + i*14); + (void)f.get_uint32(); // offset to symbol set name + uint32 off1 = f.get_uint32(); // offset to selection string + uint32 off2 = f.get_uint32(); // offset to symbol set index array + + f.seek(off1); + uint16 kind = 0; // HP-GL "Kind 1" symbol set value + unsigned int j; + for (j = 0; j < off2 - off1; j++) { + unsigned char c = f.get_byte(); + if ('0' <= c && c <= '9') // value + kind = kind*10 + (c - '0'); + else if ('A' <= c && c <= 'Z') // terminator + kind = kind*32 + (c - 64); + } + symbol_set_table[i].select = kind; + for (j = 0; j < 256; j++) + symbol_set_table[i].index[j] = f.get_uint16(); + } + + symbol_set_selectors = (special_flag ? special_symbol_sets + : text_symbol_sets); + for (i = 0; symbol_set_selectors[i] != 0; i++) { + unsigned int j; + for (j = 0; j < n_symbol_sets; j++) + if (symbol_set_table[j].select == symbol_set_selectors[i]) + break; + if (j < n_symbol_sets) { + for (int k = 0; k < 256; k++) { + uint16 idx = symbol_set_table[j].index[k]; + if (idx != NO_GLYPH + && char_table[idx].symbol_set == NO_SYMBOL_SET) { + char_table[idx].symbol_set = symbol_set_table[j].select; + char_table[idx].code = k; + } + } + } + } + + if (all_flag) + return; + + symbol_set_selectors = (special_flag ? text_symbol_sets + : special_symbol_sets); + for (i = 0; symbol_set_selectors[i] != 0; i++) { + unsigned int j; + for (j = 0; j < n_symbol_sets; j++) + if (symbol_set_table[j].select == symbol_set_selectors[i]) + break; + if (j < n_symbol_sets) { + for (int k = 0; k < 256; k++) { + uint16 idx = symbol_set_table[j].index[k]; + if (idx != NO_GLYPH + && char_table[idx].symbol_set == NO_SYMBOL_SET) { + char_table[idx].symbol_set = symbol_set_table[j].select; + char_table[idx].code = k; + } + } + } + } + return; +} + +static void +read_char_table(File &f) +{ + require_tag(charcode_tag); + nchars = tag_info(charcode_tag).count; + char_table = new char_info[nchars]; + + f.seek(tag_info(charcode_tag).value); + uint32 i; + for (i = 0; i < nchars; i++) + char_table[i].charcode = f.get_uint16(); + + require_tag(width_tag); + f.seek(tag_info(width_tag).value); + for (i = 0; i < nchars; i++) + char_table[i].width = f.get_uint16(); + + require_tag(ascent_tag); + f.seek(tag_info(ascent_tag).value); + for (i = 0; i < nchars; i++) { + char_table[i].ascent = f.get_uint16(); + if (char_table[i].ascent < 0) + char_table[i].ascent = 0; + } + + require_tag(descent_tag); + f.seek(tag_info(descent_tag).value); + for (i = 0; i < nchars; i++) { + char_table[i].descent = f.get_uint16(); + if (char_table[i].descent > 0) + char_table[i].descent = 0; + } + + require_tag(left_extent_tag); + f.seek(tag_info(left_extent_tag).value); + for (i = 0; i < nchars; i++) + char_table[i].left_extent = int16(f.get_uint16()); + + require_tag(right_extent_tag); + f.seek(tag_info(right_extent_tag).value); + for (i = 0; i < nchars; i++) + char_table[i].right_extent = f.get_uint16(); +} + +static void +output_pclweight() +{ + require_tag(stroke_weight_tag); + int stroke_weight = tag_info(stroke_weight_tag).value; + int pcl_stroke_weight; + if (stroke_weight < 128) + pcl_stroke_weight = -3; + else if (stroke_weight == 128) + pcl_stroke_weight = 0; + else if (stroke_weight <= 145) + pcl_stroke_weight = 1; + else if (stroke_weight <= 179) + pcl_stroke_weight = 3; + else + pcl_stroke_weight = 4; + printf("pclweight %d\n", pcl_stroke_weight); +} + +static void +output_pclproportional() +{ + require_tag(spacing_tag); + printf("pclproportional %d\n", tag_info(spacing_tag).value == 0); +} + +static void +read_and_output_pcltypeface(File &f) +{ + printf("pcltypeface "); + require_tag(typeface_tag); + if (tag_info(typeface_tag).count > 4) { + f.seek(tag_info(typeface_tag).value); + for (uint32 i = 0; i < tag_info(typeface_tag).count; i++) { + unsigned char c = f.get_byte(); + if (c == '\0') + break; + putchar(c); + } + } + else + printf("%.4s", tag_info(typeface_tag).orig_value); + printf("\n"); +} + +static void +output_pclstyle() +{ + unsigned pcl_style = 0; + // older tfms don't have the posture tag + if (tag_info(posture_tag).present) { + if (tag_info(posture_tag).value) + pcl_style |= 1; + } + else { + require_tag(slant_tag); + if (tag_info(slant_tag).value != 0) + pcl_style |= 1; + } + require_tag(appearance_width_tag); + if (tag_info(appearance_width_tag).value < 100) // guess + pcl_style |= 4; + printf("pclstyle %d\n", pcl_style); +} + +static void +output_slant() +{ + require_tag(slant_tag); + int slant = int16(tag_info(slant_tag).value); + if (slant != 0) + printf("slant %f\n", slant/100.0); +} + +static void +output_ligatures() +{ + // don't use ligatures for fixed space font + require_tag(spacing_tag); + if (tag_info(spacing_tag).value != 0) + return; + static const char *ligature_names[] = { + "fi", "fl", "ff", "ffi", "ffl" + }; + + static const char *ligature_chars[] = { + "fi", "fl", "ff", "Fi", "Fl" + }; + + unsigned ligature_mask = 0; + unsigned int i; + for (i = 0; i < nchars; i++) { + uint16 charcode = char_table[i].charcode; + if (charcode < charcode_name_table_size + && char_table[i].symbol_set != NO_SYMBOL_SET) { + for (name_list *p = charcode_name_table[charcode]; p; p = p->next) + for (unsigned int j = 0; j < SIZEOF(ligature_chars); j++) + if (strcmp(p->name, ligature_chars[j]) == 0) { + ligature_mask |= 1 << j; + break; + } + } + } + if (ligature_mask) { + printf("ligatures"); + for (i = 0; i < SIZEOF(ligature_names); i++) + if (ligature_mask & (1 << i)) + printf(" %s", ligature_names[i]); + printf(" 0\n"); + } +} + +static void +read_and_output_kernpairs(File &f) +{ + if (tag_info(pair_kern_tag).present) { + printf("kernpairs\n"); + f.seek(tag_info(pair_kern_tag).value); + uint16 n_pairs = f.get_uint16(); + for (int i = 0; i < n_pairs; i++) { + uint16 i1 = f.get_uint16(); + uint16 i2 = f.get_uint16(); + int16 val = int16(f.get_uint16()); + if (char_table[i1].symbol_set != NO_SYMBOL_SET + && char_table[i2].symbol_set != NO_SYMBOL_SET + && char_table[i1].charcode < charcode_name_table_size + && char_table[i2].charcode < charcode_name_table_size) { + for (name_list *p = charcode_name_table[char_table[i1].charcode]; + p; + p = p->next) + for (name_list *q = charcode_name_table[char_table[i2].charcode]; + q; + q = q->next) + if (!equal(p->name, UNNAMED) && !equal(q->name, UNNAMED)) + printf("%s %s %d\n", p->name, q->name, scale(val)); + } + } + } +} + +static void +output_charset(const int tfm_type) +{ + require_tag(slant_tag); + double slant_angle = int16(tag_info(slant_tag).value)*PI/18000.0; + double slant = sin(slant_angle)/cos(slant_angle); + + if (italic_flag) + require_tag(x_height_tag); + require_tag(lower_ascent_tag); + require_tag(lower_descent_tag); + + printf("charset\n"); + unsigned int i; + for (i = 0; i < nchars; i++) { + uint16 charcode = char_table[i].charcode; + + // the glyph is bound to one of the searched symbol sets + if (char_table[i].symbol_set != NO_SYMBOL_SET) { + // the character was in the map file + if (charcode < charcode_name_table_size && charcode_name_table[charcode]) + printf("%s", charcode_name_table[charcode]->name); + else if (!all_flag) + continue; + else if (tfm_type == MSL) + printf("%s", hp_msl_to_ucode_name(charcode)); + else + printf("%s", unicode_to_ucode_name(charcode)); + + printf("\t%d,%d", + scale(char_table[i].width), scale(char_table[i].ascent)); + + int depth = scale(-char_table[i].descent); + if (depth < 0) + depth = 0; + int italic_correction = 0; + int left_italic_correction = 0; + int subscript_correction = 0; + + if (italic_flag) { + italic_correction = scale(char_table[i].right_extent + - char_table[i].width + + italic_sep); + if (italic_correction < 0) + italic_correction = 0; + subscript_correction = int((tag_info(x_height_tag).value + * slant * .8) + .5); + if (subscript_correction > italic_correction) + subscript_correction = italic_correction; + left_italic_correction = scale(italic_sep + - char_table[i].left_extent); + } + + if (subscript_correction != 0) + printf(",%d,%d,%d,%d", + depth, italic_correction, left_italic_correction, + subscript_correction); + else if (left_italic_correction != 0) + printf(",%d,%d,%d", depth, italic_correction, left_italic_correction); + else if (italic_correction != 0) + printf(",%d,%d", depth, italic_correction); + else if (depth != 0) + printf(",%d", depth); + // This is fairly arbitrary. Fortunately it doesn't much matter. + unsigned type = 0; + if (char_table[i].ascent > int16(tag_info(lower_ascent_tag).value)*9/10) + type |= 2; + if (char_table[i].descent < int16(tag_info(lower_descent_tag).value)*9/10) + type |= 1; + printf("\t%d\t%d", type, + char_table[i].symbol_set*256 + char_table[i].code); + + if (tfm_type == UNICODE) { + if (charcode >= 0xE000 && charcode <= 0xF8FF) + printf("\t-- HP PUA U+%04X", charcode); + else + printf("\t-- U+%04X", charcode); + } + else + printf("\t-- MSL %4d", charcode); + printf(" (%3s %3d)\n", + show_symset(char_table[i].symbol_set), char_table[i].code); + + if (charcode < charcode_name_table_size + && charcode_name_table[charcode]) + for (name_list *p = charcode_name_table[charcode]->next; + p; p = p->next) + printf("%s\t\"\n", p->name); + } + // warnings about characters in mapfile not found in TFM + else if (charcode < charcode_name_table_size + && charcode_name_table[charcode]) { + char *name = charcode_name_table[charcode]->name; + // don't warn about Unicode or unnamed glyphs + // that aren't in the TFM file + if (tfm_type == UNICODE && !quiet_flag && !equal(name, UNNAMED) + && !is_uname(name)) { + fprintf(stderr, "%s: warning: symbol U+%04X (%s", + program_name, charcode, name); + for (name_list *p = charcode_name_table[charcode]->next; + p; p = p->next) + fprintf(stderr, ", %s", p->name); + fprintf(stderr, ") not in any searched symbol set\n"); + } + else if (!quiet_flag && !equal(name, UNNAMED) && !is_uname(name)) { + fprintf(stderr, "%s: warning: symbol MSL %d (%s", + program_name, charcode, name); + for (name_list *p = charcode_name_table[charcode]->next; + p; p = p->next) + fprintf(stderr, ", %s", p->name); + fprintf(stderr, ") not in any searched symbol set\n"); + } + } + } +} + +#define em_fract(a) (upem >= 0 ? double(a)/upem : 0) + +static void +dump_tags(File &f) +{ + double upem = -1.0; + + printf("TFM tags\n" + "\n" + "tag# type count value\n" + "---------------------\n"); + + for (int i = min_tag; i <= max_tag; i++) { + enum tag_type t = tag_type(i); + if (tag_info(t).present) { + printf("%4d %4d %5d", i, tag_info(t).type, tag_info(t).count); + switch (tag_info(t).type) { + case BYTE_TYPE: + case USHORT_TYPE: + printf(" %5u", tag_info(t).value); + switch (i) { + case type_tag: + printf(" Font Type "); + switch (tag_info(t).value) { + case MSL: + case SYMSET: + printf("(Intellifont)"); + break; + case UNICODE: + printf("(TrueType)"); + } + break; + case charcode_tag: + printf(" Number of Symbols (%u)", tag_info(t).count); + break; + case symbol_set_tag: + printf(" Symbol Sets (%u): ", + tag_info(symbol_set_tag).count / 14); + dump_symbol_sets(f); + break; + case type_structure_tag: + printf(" Type Structure (%u)", tag_info(t).value); + break; + case stroke_weight_tag: + printf(" Stroke Weight (%u)", tag_info(t).value); + break; + case spacing_tag: + printf(" Spacing "); + switch (tag_info(t).value) { + case 0: + printf("(Proportional)"); + break; + case 1: + printf("(Fixed Pitch: %u DU: %.2f em)", tag_info(t).value, + em_fract(tag_info(t).value)); + break; + } + break; + case appearance_width_tag: + printf(" Appearance Width (%u)", tag_info(t).value); + break; + case serif_style_tag: + printf(" Serif Style (%u)", tag_info(t).value); + break; + case posture_tag: + printf(" Posture (%s)", tag_info(t).value == 0 + ? "Upright" + : tag_info(t).value == 1 + ? "Italic" + : "Alternate Italic"); + break; + case max_width_tag: + printf(" Maximum Width (%u DU: %.2f em)", tag_info(t).value, + em_fract(tag_info(t).value)); + break; + case word_spacing_tag: + printf(" Interword Spacing (%u DU: %.2f em)", tag_info(t).value, + em_fract(tag_info(t).value)); + break; + case recommended_line_spacing_tag: + printf(" Recommended Line Spacing (%u DU: %.2f em)", tag_info(t).value, + em_fract(tag_info(t).value)); + break; + case x_height_tag: + printf(" x-Height (%u DU: %.2f em)", tag_info(t).value, + em_fract(tag_info(t).value)); + break; + case cap_height_tag: + printf(" Cap Height (%u DU: %.2f em)", tag_info(t).value, + em_fract(tag_info(t).value)); + break; + case max_ascent_tag: + printf(" Maximum Ascent (%u DU: %.2f em)", tag_info(t).value, + em_fract(tag_info(t).value)); + break; + case lower_ascent_tag: + printf(" Lowercase Ascent (%u DU: %.2f em)", tag_info(t).value, + em_fract(tag_info(t).value)); + break; + case underscore_thickness_tag: + printf(" Underscore Thickness (%u DU: %.2f em)", tag_info(t).value, + em_fract(tag_info(t).value)); + break; + case uppercase_accent_height_tag: + printf(" Uppercase Accent Height (%u DU: %.2f em)", tag_info(t).value, + em_fract(tag_info(t).value)); + break; + case lowercase_accent_height_tag: + printf(" Lowercase Accent Height (%u DU: %.2f em)", tag_info(t).value, + em_fract(tag_info(t).value)); + break; + case width_tag: + printf(" Horizontal Escapement array"); + break; + case vertical_escapement_tag: + printf(" Vertical Escapement array"); + break; + case right_extent_tag: + printf(" Right Extent array"); + break; + case ascent_tag: + printf(" Character Ascent array"); + break; + case pair_kern_tag: + f.seek(tag_info(t).value); + printf(" Kern Pairs (%u)", f.get_uint16()); + break; + case panose_tag: + printf(" PANOSE Classification array"); + break; + } + break; + case SIGNED_SHORT_TYPE: + printf(" %5d", int16(tag_info(t).value)); + switch (i) { + case slant_tag: + printf(" Slant (%.2f degrees)", double(tag_info(t).value) / 100); + break; + case max_descent_tag: + printf(" Maximum Descent (%d DU: %.2f em)", int16(tag_info(t).value), + em_fract(int16(tag_info(t).value))); + break; + case lower_descent_tag: + printf(" Lowercase Descent (%d DU: %.2f em)", int16(tag_info(t).value), + em_fract(int16(tag_info(t).value))); + break; + case underscore_depth_tag: + printf(" Underscore Depth (%d DU: %.2f em)", int16(tag_info(t).value), + em_fract(int16(tag_info(t).value))); + break; + case left_extent_tag: + printf(" Left Extent array"); + break; + // The type of this tag has changed from SHORT to SIGNED SHORT + // in TFM version 1.3.0. + case ascent_tag: + printf(" Character Ascent array"); + break; + case descent_tag: + printf(" Character Descent array"); + break; + } + break; + case RATIONAL_TYPE: + printf(" %5u", tag_info(t).value); + switch (i) { + case inches_per_point_tag: + printf(" Inches per Point"); + break; + case nominal_point_size_tag: + printf(" Nominal Point Size"); + break; + case design_units_per_em_tag: + printf(" Design Units per Em"); + break; + case average_width_tag: + printf(" Average Width"); + break; + } + if (tag_info(t).count == 1) { + f.seek(tag_info(t).value); + uint32 num = f.get_uint32(); + uint32 den = f.get_uint32(); + if (i == design_units_per_em_tag) + upem = double(num) / den; + printf(" (%u/%u = %g)", num, den, double(num)/den); + } + break; + case ASCII_TYPE: + printf(" %5u ", tag_info(t).value); + switch (i) { + case comment_tag: + printf("Comment "); + break; + case copyright_tag: + printf("Copyright "); + break; + case unique_identifier_tag: + printf("Unique ID "); + break; + case font_name_tag: + printf("Typeface Name "); + break; + case typeface_source_tag: + printf("Typeface Source "); + break; + case typeface_tag: + printf("PCL Typeface "); + break; + } + dump_ascii(f, t); + } + putchar('\n'); + } + } + putchar('\n'); +} +#undef em_fract + +static void +dump_ascii(File &f, tag_type t) +{ + putchar('"'); + if (tag_info(t).count > 4) { + int count = tag_info(t).count; + f.seek(tag_info(t).value); + while (--count) + printf("%c", f.get_byte()); + } + else + printf("%.4s", tag_info(t).orig_value); + putchar('"'); +} + +static void +dump_symbol_sets(File &f) +{ + uint32 symbol_set_dir_length = tag_info(symbol_set_tag).count; + uint32 num_symbol_sets = symbol_set_dir_length / 14; + + for (uint32 i = 0; i < num_symbol_sets; i++) { + f.seek(tag_info(symbol_set_tag).value + i * 14); + (void)f.get_uint32(); // offset to symbol set name + uint32 off1 = f.get_uint32(); // offset to selection string + uint32 off2 = f.get_uint32(); // offset to symbol set index array + f.seek(off1); + for (uint32 j = 0; j < off2 - off1; j++) { + unsigned char c = f.get_byte(); + if ('0' <= c && c <= '9') + putchar(c); + else if ('A' <= c && c <= 'Z') + printf(i < num_symbol_sets - 1 ? "%c," : "%c", c); + } + } +} + +static void +dump_symbols(int tfm_type) +{ + printf("Symbols:\n" + "\n" + " glyph id# symbol set name(s)\n" + "----------------------------------\n"); + for (uint32 i = 0; i < nchars; i++) { + uint16 charcode = char_table[i].charcode; + if (charcode < charcode_name_table_size + && charcode_name_table[charcode]) { + if (char_table[i].symbol_set != NO_SYMBOL_SET) { + printf(tfm_type == UNICODE ? "%4d (U+%04X) (%3s %3d) %s" + : "%4d (MSL %4d) (%3s %3d) %s", + i, charcode, + show_symset(char_table[i].symbol_set), + char_table[i].code, + charcode_name_table[charcode]->name); + for (name_list *p = charcode_name_table[charcode]->next; + p; p = p->next) + printf(", %s", p->name); + putchar('\n'); + } + } + else { + printf(tfm_type == UNICODE ? "%4d (U+%04X) " + : "%4d (MSL %4d) ", + i, charcode); + if (char_table[i].symbol_set != NO_SYMBOL_SET) + printf("(%3s %3d)", + show_symset(char_table[i].symbol_set), char_table[i].code); + putchar('\n'); + } + } + putchar('\n'); +} + +static char * +show_symset(unsigned int symset) +{ + // A 64-bit unsigned int produces up to 20 decimal digits. + assert(sizeof(unsigned int) <= 8); + static char symset_str[22]; // 20 digits + symset char + \0 + sprintf(symset_str, "%u%c", symset / 32, (symset & 31) + 64); + return symset_str; +} + +static char * +hp_msl_to_ucode_name(int msl) +{ + // A 64-bit signed int produces up to 19 decimal digits plus a sign. + assert(sizeof(int) <= 8); + char codestr[21]; // 19 digits + possible sign + \0 + sprintf(codestr, "%d", msl); + const char *ustr = hp_msl_to_unicode_code(codestr); + if (ustr == NULL) + ustr = UNNAMED; + else { + char *nonum; + int ucode = int(strtol(ustr, &nonum, 16)); + // don't allow PUA code points as Unicode names + if (ucode >= 0xE000 && ucode <= 0xF8FF) + ustr = UNNAMED; + } + if (!equal(ustr, UNNAMED)) { + const char *uname_decomposed = decompose_unicode(ustr); + if (uname_decomposed) + // 1st char is the number of components + ustr = uname_decomposed + 1; + } + char *value = new char[strlen(ustr) + 1]; + sprintf(value, equal(ustr, UNNAMED) ? UNNAMED : "u%s", ustr); + return value; +} + +static char * +unicode_to_ucode_name(int ucode) +{ + // A 64-bit signed int produces up to 16 hexadecimal digits. + assert(sizeof(int) <= 8); + const char *ustr; + char codestr[17]; // 16 hex digits + \0 + + // don't allow PUA code points as Unicode names + if (ucode >= 0xE000 && ucode <= 0xF8FF) + ustr = UNNAMED; + else { + sprintf(codestr, "%04X", ucode); + ustr = codestr; + } + if (!equal(ustr, UNNAMED)) { + const char *uname_decomposed = decompose_unicode(ustr); + if (uname_decomposed) + // 1st char is the number of components + ustr = uname_decomposed + 1; + } + char *value = new char[strlen(ustr) + 1]; + sprintf(value, equal(ustr, UNNAMED) ? UNNAMED : "u%s", ustr); + return value; +} + +static int +is_uname(char *name) +{ + size_t i; + size_t len = strlen(name); + if (len % 5) + return 0; + + if (name[0] != 'u') + return 0; + for (i = 1; i < 4; i++) + if (!csxdigit(name[i])) + return 0; + for (i = 5; i < len; i++) + if (i % 5 ? !csxdigit(name[i]) : name[i] != '_') + return 0; + + return 1; +} + +static int +read_map(const char *file, const int tfm_type) +{ + errno = 0; + FILE *fp = fopen(file, "r"); + if (!fp) { + error("can't open '%1': %2", file, strerror(errno)); + return 0; + } + current_filename = file; + char buf[512]; + current_lineno = 0; + char *nonum; + while (fgets(buf, int(sizeof(buf)), fp)) { + current_lineno++; + char *ptr = buf; + while (csspace(*ptr)) + ptr++; + if (*ptr == '\0' || *ptr == '#') + continue; + ptr = strtok(ptr, " \n\t"); + if (!ptr) + continue; + + int msl_code = int(strtol(ptr, &nonum, 10)); + if (*nonum != '\0') { + if (csxdigit(*nonum)) + error("bad MSL map: got hex code (%1)", ptr); + else if (ptr == nonum) + error("bad MSL map: bad MSL code (%1)", ptr); + else + error("bad MSL map"); + fclose(fp); + return 0; + } + + ptr = strtok(NULL, " \n\t"); + if (!ptr) + continue; + int unicode = int(strtol(ptr, &nonum, 16)); + if (*nonum != '\0') { + if (ptr == nonum) + error("bad Unicode value (%1)", ptr); + else + error("bad Unicode map"); + fclose(fp); + return 0; + } + if (strlen(ptr) != 4) { + error("bad Unicode value (%1)", ptr); + return 0; + } + + int n = tfm_type == MSL ? msl_code : unicode; + if (tfm_type == UNICODE && n > 0xFFFF) { + // greatest value supported by TFM files + error("bad Unicode value (%1): greatest value is 0xFFFF", ptr); + fclose(fp); + return 0; + } + else if (n < 0) { + error("negative code value (%1)", ptr); + fclose(fp); + return 0; + } + + ptr = strtok(NULL, " \n\t"); + if (!ptr) { // groff name + error("missing name(s)"); + fclose(fp); + return 0; + } + // leave decomposed Unicode values alone + else if (is_uname(ptr) && !is_decomposed(ptr)) + ptr = unicode_to_ucode_name(strtol(ptr + 1, &nonum, 16)); + + if (size_t(n) >= charcode_name_table_size) { + size_t old_size = charcode_name_table_size; + name_list **old_table = charcode_name_table; + charcode_name_table_size = n + 256; + charcode_name_table = new name_list *[charcode_name_table_size]; + if (old_table) { + memcpy(charcode_name_table, old_table, old_size*sizeof(name_list *)); + delete[] old_table; + } + for (size_t i = old_size; i < charcode_name_table_size; i++) + charcode_name_table[i] = NULL; + } + + // a '#' that isn't the first groff name begins a comment + for (int names = 1; ptr; ptr = strtok(NULL, " \n\t")) { + if (names++ > 1 && *ptr == '#') + break; + charcode_name_table[n] = new name_list(ptr, charcode_name_table[n]); + } + } + fclose(fp); + return 1; +} + +static const char * +xbasename(const char *s) +{ + // DIR_SEPS[] are possible directory separator characters, see + // nonposix.h. We want the rightmost separator of all possible + // ones. Example: d:/foo\\bar. + const char *b = strrchr(s, DIR_SEPS[0]), *b1; + const char *sep = &DIR_SEPS[1]; + + while (*sep) + { + b1 = strrchr(s, *sep); + if (b1 && (!b || b1 > b)) + b = b1; + sep++; + } + return b ? b + 1 : s; +} + +// Local Variables: +// fill-column: 72 +// mode: C++ +// End: +// vim: set cindent noexpandtab shiftwidth=2 textwidth=72: diff --git a/src/utils/hpftodit/hpuni.cpp b/src/utils/hpftodit/hpuni.cpp new file mode 100644 index 0000000..b3f933f --- /dev/null +++ b/src/utils/hpftodit/hpuni.cpp @@ -0,0 +1,697 @@ +// -*- C++ -*- +/* Copyright (C) 2003-2020 Free Software Foundation, Inc. + Written by Jeff Conrad (jeff_conrad@msn.com) + +This file is part of groff. + +groff is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation, either version 3 of the License, or +(at your option) any later version. + +groff is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. */ + +#include "lib.h" +#include "stringclass.h" +#include "ptable.h" + +#include "unicode.h" + +struct hp_msl_to_unicode { + char *value; +}; + +declare_ptable(hp_msl_to_unicode) +implement_ptable(hp_msl_to_unicode) + +PTABLE(hp_msl_to_unicode) hp_msl_to_unicode_table; + +struct S { + const char *key; + const char *value; +} hp_msl_to_unicode_list[] = { + { "1", "0021", }, // Exclamation Mark + { "2", "0022", }, // Neutral Double Quote + { "3", "0023", }, // Number Sign + { "4", "0024", }, // Dollar Sign + { "5", "0025", }, // Per Cent Sign + { "6", "0026", }, // Ampersand + { "8", "2019", }, // Single Close Quote (9) + { "9", "0028", }, // Left Parenthesis + { "10", "0029", }, // Right Parenthesis + { "11", "002A", }, // Asterisk + { "12", "002B", }, // Plus Sign + { "13", "002C", }, // Comma, or Decimal Separator + { "14", "002D", }, // Hyphen + { "15", "002E", }, // Period, or Full Stop + { "16", "002F", }, // Solidus, or Slash + { "17", "0030", }, // Numeral Zero + { "18", "0031", }, // Numeral One + { "19", "0032", }, // Numeral Two + { "20", "0033", }, // Numeral Three + { "21", "0034", }, // Numeral Four + { "22", "0035", }, // Numeral Five + { "23", "0036", }, // Numeral Six + { "24", "0037", }, // Numeral Seven + { "25", "0038", }, // Numeral Eight + { "26", "0039", }, // Numeral Nine + { "27", "003A", }, // Colon + { "28", "003B", }, // Semicolon + { "29", "003C", }, // Less Than Sign + { "30", "003D", }, // Equals Sign + { "31", "003E", }, // Greater Than Sign + { "32", "003F", }, // Question Mark + { "33", "0040", }, // Commercial At + { "34", "0041", }, // Uppercase A + { "35", "0042", }, // Uppercase B + { "36", "0043", }, // Uppercase C + { "37", "0044", }, // Uppercase D + { "38", "0045", }, // Uppercase E + { "39", "0046", }, // Uppercase F + { "40", "0047", }, // Uppercase G + { "41", "0048", }, // Uppercase H + { "42", "0049", }, // Uppercase I + { "43", "004A", }, // Uppercase J + { "44", "004B", }, // Uppercase K + { "45", "004C", }, // Uppercase L + { "46", "004D", }, // Uppercase M + { "47", "004E", }, // Uppercase N + { "48", "004F", }, // Uppercase O + { "49", "0050", }, // Uppercase P + { "50", "0051", }, // Uppercase Q + { "51", "0052", }, // Uppercase R + { "52", "0053", }, // Uppercase S + { "53", "0054", }, // Uppercase T + { "54", "0055", }, // Uppercase U + { "55", "0056", }, // Uppercase V + { "56", "0057", }, // Uppercase W + { "57", "0058", }, // Uppercase X + { "58", "0059", }, // Uppercase Y + { "59", "005A", }, // Uppercase Z + { "60", "005B", }, // Left Bracket + { "61", "005C", }, // Reverse Solidus, or Backslash + { "62", "005D", }, // Right Bracket + { "63", "005E", }, // Circumflex, Exponent, or Pointer + { "64", "005F", }, // Underline or Underscore Character + { "66", "2018", }, // Single Open Quote (6) + { "67", "0061", }, // Lowercase A + { "68", "0062", }, // Lowercase B + { "69", "0063", }, // Lowercase C + { "70", "0064", }, // Lowercase D + { "71", "0065", }, // Lowercase E + { "72", "0066", }, // Lowercase F + { "73", "0067", }, // Lowercase G + { "74", "0068", }, // Lowercase H + { "75", "0069", }, // Lowercase I + { "76", "006A", }, // Lowercase J + { "77", "006B", }, // Lowercase K + { "78", "006C", }, // Lowercase L + { "79", "006D", }, // Lowercase M + { "80", "006E", }, // Lowercase N + { "81", "006F", }, // Lowercase O + { "82", "0070", }, // Lowercase P + { "83", "0071", }, // Lowercase Q + { "84", "0072", }, // Lowercase R + { "85", "0073", }, // Lowercase S + { "86", "0074", }, // Lowercase T + { "87", "0075", }, // Lowercase U + { "88", "0076", }, // Lowercase V + { "89", "0077", }, // Lowercase W + { "90", "0078", }, // Lowercase X + { "91", "0079", }, // Lowercase Y + { "92", "007A", }, // Lowercase Z + { "93", "007B", }, // Left Brace + { "94", "007C", }, // Long Vertical Mark + { "95", "007D", }, // Right Brace + { "96", "007E", }, // One Wavy Line Approximate + { "97", "2592", }, // Medium Shading Character + { "99", "00C0", }, // Uppercase A Grave + { "100", "00C2", }, // Uppercase A Circumflex + { "101", "00C8", }, // Uppercase E Grave + { "102", "00CA", }, // Uppercase E Circumflex + { "103", "00CB", }, // Uppercase E Dieresis + { "104", "00CE", }, // Uppercase I Circumflex + { "105", "00CF", }, // Uppercase I Dieresis + { "106", "00B4", }, // Lowercase Acute Accent (Spacing) + { "107", "0060", }, // Lowercase Grave Accent (Spacing) + { "108", "02C6", }, // Lowercase Circumflex Accent (Spacing) + { "109", "00A8", }, // Lowercase Dieresis Accent (Spacing) + { "110", "02DC", }, // Lowercase Tilde Accent (Spacing) + { "111", "00D9", }, // Uppercase U Grave + { "112", "00DB", }, // Uppercase U Circumflex + { "113", "00AF", }, // Overline, or Overscore Character + { "114", "00DD", }, // Uppercase Y Acute + { "115", "00FD", }, // Lowercase Y Acute + { "116", "00B0", }, // Degree Sign + { "117", "00C7", }, // Uppercase C Cedilla + { "118", "00E7", }, // Lowercase C Cedilla + { "119", "00D1", }, // Uppercase N Tilde + { "120", "00F1", }, // Lowercase N Tilde + { "121", "00A1", }, // Inverted Exclamation + { "122", "00BF", }, // Inverted Question Mark + { "123", "00A4", }, // Currency Symbol + { "124", "00A3", }, // Pound Sterling Sign + { "125", "00A5", }, // Yen Sign + { "126", "00A7", }, // Section Mark + { "127", "0192", }, // Florin Sign + { "128", "00A2", }, // Cent Sign + { "129", "00E2", }, // Lowercase A Circumflex + { "130", "00EA", }, // Lowercase E Circumflex + { "131", "00F4", }, // Lowercase O Circumflex + { "132", "00FB", }, // Lowercase U Circumflex + { "133", "00E1", }, // Lowercase A Acute + { "134", "00E9", }, // Lowercase E Acute + { "135", "00F3", }, // Lowercase O Acute + { "136", "00FA", }, // Lowercase U Acute + { "137", "00E0", }, // Lowercase A Grave + { "138", "00E8", }, // Lowercase E Grave + { "139", "00F2", }, // Lowercase O Grave + { "140", "00F9", }, // Lowercase U Grave + { "141", "00E4", }, // Lowercase A Dieresis + { "142", "00EB", }, // Lowercase E Dieresis + { "143", "00F6", }, // Lowercase O Dieresis + { "144", "00FC", }, // Lowercase U Dieresis + { "145", "00C5", }, // Uppercase A Ring + { "146", "00EE", }, // Lowercase I Circumflex + { "147", "00D8", }, // Uppercase O Oblique + { "148", "00C6", }, // Uppercase AE Diphthong + { "149", "00E5", }, // Lowercase A Ring + { "150", "00ED", }, // Lowercase I Acute + { "151", "00F8", }, // Lowercase O Oblique + { "152", "00E6", }, // Lowercase AE Diphthong + { "153", "00C4", }, // Uppercase A Dieresis + { "154", "00EC", }, // Lowercase I Grave + { "155", "00D6", }, // Uppercase O Dieresis + { "156", "00DC", }, // Uppercase U Dieresis + { "157", "00C9", }, // Uppercase E Acute + { "158", "00EF", }, // Lowercase I Dieresis + { "159", "00DF", }, // Lowercase Es-zet Ligature + { "160", "00D4", }, // Uppercase O Circumflex + { "161", "00C1", }, // Uppercase A Acute + { "162", "00C3", }, // Uppercase A Tilde + { "163", "00E3", }, // Lowercase A Tilde + { "164", "00D0", }, // Uppercase Eth +//{ "164", "0110", }, // Uppercase D-Stroke + { "165", "00F0", }, // Lowercase Eth + { "166", "00CD", }, // Uppercase I Acute + { "167", "00CC", }, // Uppercase I Grave + { "168", "00D3", }, // Uppercase O Acute + { "169", "00D2", }, // Uppercase O Grave + { "170", "00D5", }, // Uppercase O Tilde + { "171", "00F5", }, // Lowercase O Tilde + { "172", "0160", }, // Uppercase S Hacek + { "173", "0161", }, // Lowercase S Hacek + { "174", "00DA", }, // Uppercase U Acute + { "175", "0178", }, // Uppercase Y Dieresis + { "176", "00FF", }, // Lowercase Y Dieresis + { "177", "00DE", }, // Uppercase Thorn + { "178", "00FE", }, // Lowercase Thorn + { "180", "00B5", }, // Lowercase Greek Mu, or Micro + { "181", "00B6", }, // Pilcrow, or Paragraph Sign + { "182", "00BE", }, // Vulgar Fraction 3/4 + { "183", "2212", }, // Minus Sign + { "184", "00BC", }, // Vulgar Fraction 1/4 + { "185", "00BD", }, // Vulgar Fraction 1/2 + { "186", "00AA", }, // Female Ordinal + { "187", "00BA", }, // Male Ordinal + { "188", "00AB", }, // Left Pointing Double Angle Quote + { "189", "25A0", }, // Medium Solid Square Box + { "190", "00BB", }, // Right Pointing Double Angle Quote + { "191", "00B1", }, // Plus Over Minus Sign + { "192", "00A6", }, // Broken Vertical Mark + { "193", "00A9", }, // Copyright Sign + { "194", "00AC", }, // Not Sign + { "195", "00AD", }, // Soft Hyphen + { "196", "00AE", }, // Registered Sign + { "197", "00B2", }, // Superior Numeral 2 + { "198", "00B3", }, // Superior Numeral 3 + { "199", "00B8", }, // Lowercase Cedilla (Spacing) + { "200", "00B9", }, // Superior Numeral 1 + { "201", "00D7", }, // Multiply Sign + { "202", "00F7", }, // Divide Sign + { "203", "263A", }, // Open Smiling Face + { "204", "263B", }, // Solid Smiling Face + { "205", "2665", }, // Solid Heart, Card Suit + { "206", "2666", }, // Solid Diamond, Card Suit + { "207", "2663", }, // Solid Club, Card Suit + { "208", "2660", }, // Solid Spade, Card Suit + { "209", "25CF", }, // Medium Solid Round Bullet + { "210", "25D8", }, // Large Solid square with White Dot + { "211", "EFFD", }, // Large Open Round Bullet + { "212", "25D9", }, // Large Solid square with White Circle + { "213", "2642", }, // Male Symbol + { "214", "2640", }, // Female Symbol + { "215", "266A", }, // Musical Note + { "216", "266B", }, // Pair Of Musical Notes + { "217", "263C", }, // Compass, or Eight Pointed Sun + { "218", "25BA", }, // Right Solid Arrowhead + { "219", "25C4", }, // Left Solid Arrowhead + { "220", "2195", }, // Up/Down Arrow + { "221", "203C", }, // Double Exclamation Mark + { "222", "25AC", }, // Thick Horizontal Mark + { "223", "21A8", }, // Up/Down Arrow Baseline + { "224", "2191", }, // Up Arrow + { "225", "2193", }, // Down Arrow + { "226", "2192", }, // Right Arrow + { "227", "2190", }, // Left Arrow + { "229", "2194", }, // Left/Right Arrow + { "230", "25B2", }, // Up Solid Arrowhead + { "231", "25BC", }, // Down Solid Arrowhead + { "232", "20A7", }, // Pesetas Sign + { "233", "2310", }, // Reversed Not Sign + { "234", "2591", }, // Light Shading Character + { "235", "2593", }, // Dark Shading Character + { "236", "2502", }, // Box Draw Line, Vert. 1 + { "237", "2524", }, // Box Draw Right Tee, Vert. 1 Horiz. 1 + { "238", "2561", }, // Box Draw Right Tee, Vert. 1 Horiz. 2 + { "239", "2562", }, // Box Draw Right Tee, Vert. 2 Horiz. 1 + { "240", "2556", }, // Box Draw Upper Right Corner, Vert. 2 Horiz. 1 + { "241", "2555", }, // Box Draw Upper Right Corner, Vert. 1 Horiz. 2 + { "242", "2563", }, // Box Draw Right Tee, Vert. 2 Horiz. 2 + { "243", "2551", }, // Box Draw Lines, Vert. 2 + { "244", "2557", }, // Box Draw Upper Right Corner, Vert. 2 Horiz. 2 + { "245", "255D", }, // Box Draw Lower Right Corner, Vert. 2 Horiz. 2 + { "246", "255C", }, // Box Draw Lower Right Corner, Vert. 2 Horiz. 1 + { "247", "255B", }, // Box Draw Lower Right Corner, Vert. 1 Horiz. 2 + { "248", "2510", }, // Box Draw Upper Right Corner, Vert. 1, Horiz. 1 + { "249", "2514", }, // Box Draw Lower Left Corner, Vert. 1, Horiz. 1 + { "250", "2534", }, // Box Draw Bottom Tee, Vert. 1 Horiz. 1 + { "251", "252C", }, // Box Draw Top Tee, Vert. 1 Horiz. 1 + { "252", "251C", }, // Box Draw Left Tee, Vert. 1 Horiz. 1 + { "253", "2500", }, // Box Draw Line, Horiz. 1 + { "254", "253C", }, // Box Draw Cross, Vert. 1 Horiz. 1 + { "255", "255E", }, // Box Draw Left Tee, Vert. 1 Horiz. 2 + { "256", "255F", }, // Box Draw Left Tee, Vert. 2 Horz. 1 + { "257", "255A", }, // Box Draw Lower Left Corner, Vert. 2 Horiz. 2 + { "258", "2554", }, // Box Draw Upper Left Corner, Vert. 2 Horiz. 2 + { "259", "2569", }, // Box Draw Bottom Tee, Vert. 2 Horiz. 2 + { "260", "2566", }, // Box Draw Top Tee, Vert. 2 Horiz. 2 + { "261", "2560", }, // Box Draw Left Tee, Vert. 2 Horiz. 2 + { "262", "2550", }, // Box Draw Lines, Horiz. 2 + { "263", "256C", }, // Box Draw Cross Open Center, Vert. 2 Horiz. 2 + { "264", "2567", }, // Box Draw Bottom Tee, Vert. 1 Horiz. 2 + { "265", "2568", }, // Box Draw Bottom Tee, Vert. 2 Horiz. 1 + { "266", "2564", }, // Box Draw Top Tee, Vert. 1 Horiz. 2 + { "267", "2565", }, // Box Draw Top Tee, Vert. 2 Horiz. 1 + { "268", "2559", }, // Box Draw Lower Left Corner, Vert. 2 Horiz. 1 + { "269", "2558", }, // Box Draw Lower Left Corner, Vert. 1 Horiz. 2 + { "270", "2552", }, // Box Draw Upper Left Corner, Vert. 1 Horiz. 2 + { "271", "2553", }, // Box Draw Upper Left Corner, Vert. 2 Horiz. 1 + { "272", "256B", }, // Box Draw Cross, Vert. 2 Horiz. 1 + { "273", "256A", }, // Box Draw Cross, Vert. 1 Horiz. 2 + { "274", "2518", }, // Box Draw Lower Right Corner, Vert. 1 Horiz. 1 + { "275", "250C", }, // Box Draw Upper Left Corner, Vert. 1, Horiz. 1 + { "276", "2588", }, // Solid Full High/Wide + { "277", "2584", }, // Bottom Half Solid Rectangle + { "278", "258C", }, // Left Half Solid Rectangle + { "279", "2590", }, // Right Half Solid Rectangle + { "280", "2580", }, // Top Half Solid Rectangle + { "290", "2126", }, // Uppercase Greek Omega, or Ohms + { "292", "221E", }, // Infinity Symbol + { "295", "2229", }, // Set Intersection Symbol + { "296", "2261", }, // Exactly Equals Sign + { "297", "2265", }, // Greater Than or Equal Sign + { "298", "2264", }, // Less Than or Equal Sign + { "299", "2320", }, // Top Integral + { "300", "2321", }, // Bottom Integral + { "301", "2248", }, // Two Wavy Line Approximate Sign +//{ "302", "00B7", }, // Middle Dot, or Centered Period (see 2219) +//{ "302", "2219", }, // Centered Period, Middle Dot + { "302", "2219", }, // Math Dot, Centered Period + { "303", "221A", }, // Radical Symbol, Standalone Diagonal + { "305", "25AA", }, // Small Solid Square Box + { "306", "013F", }, // Uppercase L-Dot + { "307", "0140", }, // Lowercase L-Dot + { "308", "2113", }, // Litre Symbol + { "309", "0149", }, // Lowercase Apostrophe-N + { "310", "2032", }, // Prime, Minutes, or Feet Symbol + { "311", "2033", }, // Double Prime, Seconds, or Inches Symbol + { "312", "2020", }, // Dagger Symbol + { "313", "2122", }, // Trademark Sign + { "314", "2017", }, // Double Underline Character + { "315", "02C7", }, // Lowercase Hacek Accent (Spacing) + { "316", "02DA", }, // Lowercase Ring Accent (Spacing) + { "317", "EFF9", }, // Uppercase Acute Accent (Spacing) + { "318", "EFF8", }, // Uppercase Grave Accent (Spacing) + { "319", "EFF7", }, // Uppercase Circumflex Accent (Spacing) + { "320", "EFF6", }, // Uppercase Dieresis Accent (Spacing) + { "321", "EFF5", }, // Uppercase Tilde Accent (Spacing) + { "322", "EFF4", }, // Uppercase Hacek Accent (Spacing) + { "323", "EFF3", }, // Uppercase Ring Accent (Spacing) + { "324", "2215", }, // Vulgar Fraction Bar + { "325", "2014", }, // Em Dash + { "326", "2013", }, // En Dash + { "327", "2021", }, // Double Dagger Symbol + { "328", "0131", }, // Lowercase Undotted I + { "329", "0027", }, // Neutral Single Quote + { "330", "EFF2", }, // Uppercase Cedilla (Spacing) + { "331", "2022", }, // Small Solid Round Bullet + { "332", "207F", }, // Superior Lowercase N + { "333", "2302", }, // Home Plate + { "335", "0138", }, // Lowercase Kra + { "338", "0166", }, // Uppercase T-Stroke + { "339", "0167", }, // Lowercase T-Stroke + { "340", "014A", }, // Uppercase Eng + { "341", "014B", }, // Lowercase Eng + { "342", "0111", }, // Lowercase D-Stroke + { "400", "0102", }, // Uppercase A Breve + { "401", "0103", }, // Lowercase A Breve + { "402", "0100", }, // Uppercase A Macron + { "403", "0101", }, // Lowercase A Macron + { "404", "0104", }, // Uppercase A Ogonek + { "405", "0105", }, // Lowercase A Ogonek + { "406", "0106", }, // Uppercase C Acute + { "407", "0107", }, // Lowercase C Acute + { "410", "010C", }, // Uppercase C Hacek + { "411", "010D", }, // Lowercase C Hacek + { "414", "010E", }, // Uppercase D Hacek + { "415", "010F", }, // Lowercase D Hacek + { "416", "011A", }, // Uppercase E Hacek + { "417", "011B", }, // Lowercase E Hacek + { "418", "0116", }, // Uppercase E Overdot + { "419", "0117", }, // Lowercase E Overdot + { "420", "0112", }, // Uppercase E Macron + { "421", "0113", }, // Lowercase E Macron + { "422", "0118", }, // Uppercase E Ogonek + { "423", "0119", }, // Lowercase E Ogonek + { "428", "0122", }, // Uppercase G Cedilla + { "429", "0123", }, // Lowercase G Cedilla + { "432", "012E", }, // Uppercase I Ogonek + { "433", "012F", }, // Lowercase I Ogonek + { "434", "012A", }, // Uppercase I Macron + { "435", "012B", }, // Lowercase I Macron + { "438", "0136", }, // Uppercase K Cedilla + { "439", "0137", }, // Lowercase K Cedilla + { "440", "0139", }, // Uppercase L Acute + { "441", "013A", }, // Lowercase L Acute + { "442", "013D", }, // Uppercase L Hacek + { "443", "013E", }, // Lowercase L Hacek + { "444", "013B", }, // Uppercase L Cedilla + { "445", "013C", }, // Lowercase L Cedilla + { "446", "0143", }, // Uppercase N Acute + { "447", "0144", }, // Lowercase N Acute + { "448", "0147", }, // Uppercase N Hacek + { "449", "0148", }, // Lowercase N Hacek + { "450", "0145", }, // Uppercase N Cedilla + { "451", "0146", }, // Lowercase N Cedilla + { "452", "0150", }, // Uppercase O Double Acute + { "453", "0151", }, // Lowercase O Double Acute + { "454", "014C", }, // Uppercase O Macron + { "455", "014D", }, // Lowercase O Macron + { "456", "0154", }, // Uppercase R Acute + { "457", "0155", }, // Lowercase R Acute + { "458", "0158", }, // Uppercase R Hacek + { "459", "0159", }, // Lowercase R Hacek + { "460", "0156", }, // Uppercase R Cedilla + { "461", "0157", }, // Lowercase R Cedilla + { "462", "015A", }, // Uppercase S Acute + { "463", "015B", }, // Lowercase S Acute + { "466", "0164", }, // Uppercase T Hacek + { "467", "0165", }, // Lowercase T Hacek + { "468", "0162", }, // Uppercase T Cedilla + { "469", "0163", }, // Lowercase T Cedilla + { "470", "0168", }, // Uppercase U Tilde + { "471", "0169", }, // Lowercase U Tilde + { "474", "0170", }, // Uppercase U Double Acute + { "475", "0171", }, // Lowercase U Double Acute + { "476", "016E", }, // Uppercase U Ring + { "477", "016F", }, // Lowercase U Ring + { "478", "016A", }, // Uppercase U Macron + { "479", "016B", }, // Lowercase U Macron + { "480", "0172", }, // Uppercase U Ogonek + { "481", "0173", }, // Lowercase U Ogonek + { "482", "0179", }, // Uppercase Z Acute + { "483", "017A", }, // Lowercase Z Acute + { "484", "017B", }, // Uppercase Z Overdot + { "485", "017C", }, // Lowercase Z Overdot + { "486", "0128", }, // Uppercase I Tilde + { "487", "0129", }, // Lowercase I Tilde + { "500", "EFBF", }, // Radical, Diagonal, Composite + { "501", "221D", }, // Proportional To Symbol + { "502", "212F", }, // Napierian (italic e) + { "503", "03F5", }, // Alternate Lowercase Greek Epsilon +//{ "503", "EFEC", }, // Alternate Lowercase Greek Epsilon + { "504", "2234", }, // Therefore Symbol + { "505", "0393", }, // Uppercase Greek Gamma + { "506", "2206", }, // Increment Symbol (Delta) + { "507", "0398", }, // Uppercase Greek Theta + { "508", "039B", }, // Uppercase Greek Lambda + { "509", "039E", }, // Uppercase Greek Xi + { "510", "03A0", }, // Uppercase Greek Pi + { "511", "03A3", }, // Uppercase Greek Sigma + { "512", "03A5", }, // Uppercase Greek Upsilon + { "513", "03A6", }, // Uppercase Greek Phi + { "514", "03A8", }, // Uppercase Greek Psi + { "515", "03A9", }, // Uppercase Greek Omega + { "516", "2207", }, // Nabla Symbol (inverted Delta) + { "517", "2202", }, // Partial Differential Delta Symbol + { "518", "03C2", }, // Lowercase Sigma, Terminal + { "519", "2260", }, // Not Equal To Symbol + { "520", "EFEB", }, // Underline, Composite + { "521", "2235", }, // Because Symbol + { "522", "03B1", }, // Lowercase Greek Alpha + { "523", "03B2", }, // Lowercase Greek Beta + { "524", "03B3", }, // Lowercase Greek Gamma + { "525", "03B4", }, // Lowercase Greek Delta + { "526", "03B5", }, // Lowercase Greek Epsilon + { "527", "03B6", }, // Lowercase Greek Zeta + { "528", "03B7", }, // Lowercase Greek Eta + { "529", "03B8", }, // Lowercase Greek Theta + { "530", "03B9", }, // Lowercase Greek Iota + { "531", "03BA", }, // Lowercase Greek Kappa + { "532", "03BB", }, // Lowercase Greek Lambda + { "533", "03BC", }, // Lowercase Greek Mu + { "534", "03BD", }, // Lowercase Greek Nu + { "535", "03BE", }, // Lowercase Greek Xi + { "536", "03BF", }, // Lowercase Greek Omicron + { "537", "03C0", }, // Lowercase Greek Pi + { "538", "03C1", }, // Lowercase Greek Rho + { "539", "03C3", }, // Lowercase Greek Sigma + { "540", "03C4", }, // Lowercase Greek Tau + { "541", "03C5", }, // Lowercase Greek Upsilon + { "542", "03C6", }, // Lowercase Greek Phi + { "543", "03C7", }, // Lowercase Greek Chi + { "544", "03C8", }, // Lowercase Greek Psi + { "545", "03C9", }, // Lowercase Greek Omega + { "546", "03D1", }, // Lowercase Greek Theta, Open + { "547", "03D5", }, // Lowercase Greek Phi, Open + { "548", "03D6", }, // Lowercase Pi, Alternate + { "549", "2243", }, // Wavy Over Straight Approximate Symbol + { "550", "2262", }, // Not Exactly Equal To Symbol + { "551", "21D1", }, // Up Arrow Double Stroke + { "552", "21D2", }, // Right Arrow Double Stroke + { "553", "21D3", }, // Down Arrow Double Stroke + { "554", "21D0", }, // Left Arrow Double Stroke + { "555", "21D5", }, // Up/Down Arrow Double Stroke + { "556", "21D4", }, // Left/Right Arrow Double Stroke + { "557", "21C4", }, // Right Over Left Arrow + { "558", "21C6", }, // Left Over Right Arrow + { "559", "EFE9", }, // Vector Symbol + { "560", "0305", }, // Overline, Composite + { "561", "2200", }, // For All Symbol, or Universal (inverted A) + { "562", "2203", }, // There Exists Symbol, or Existential (inverted E) + { "563", "22A4", }, // Top Symbol + { "564", "22A5", }, // Bottom Symbol + { "565", "222A", }, // Set Union Symbol + { "566", "2208", }, // Element-Of Symbol + { "567", "220B", }, // Contains Symbol + { "568", "2209", }, // Not-Element-Of Symbol + { "569", "2282", }, // Proper Subset Symbol + { "570", "2283", }, // Proper Superset Symbol + { "571", "2284", }, // Not Proper Subset Symbol + { "572", "2285", }, // Not Proper Superset Symbol + { "573", "2286", }, // Subset Symbol + { "574", "2287", }, // Superset Symbol + { "575", "2295", }, // Plus In Circle Symbol + { "576", "2299", }, // Dot In Circle Symbol + { "577", "2297", }, // Times In Circle Symbol + { "578", "2296", }, // Minus In Circle Symbol + { "579", "2298", }, // Slash In Circle Symbol + { "580", "2227", }, // Logical And Symbol + { "581", "2228", }, // Logical Or Symbol + { "582", "22BB", }, // Exclusive Or Symbol + { "583", "2218", }, // Functional Composition Symbol + { "584", "20DD", }, // Large Open Circle + { "585", "22A3", }, // Assertion Symbol + { "586", "22A2", }, // Backwards Assertion Symbol + { "587", "222B", }, // Integral Symbol + { "588", "222E", }, // Curvilinear Integral Symbol + { "589", "2220", }, // Angle Symbol + { "590", "2205", }, // Empty Set Symbol + { "591", "2135", }, // Hebrew Aleph + { "592", "2136", }, // Hebrew Beth + { "593", "2137", }, // Hebrew Gimmel + { "594", "212D", }, // Fraktur Uppercase C + { "595", "2111", }, // Fraktur Uppercase I + { "596", "211C", }, // Fraktur Uppercase R + { "597", "2128", }, // Fraktur Uppercase Z + { "598", "23A1", }, // Top Segment Left Bracket (Left Square Bracket Upper Corner) + { "599", "23A3", }, // Bottom Segment Left Bracket (Left Square Bracket Lower Corner) + { "600", "239B", }, // Top Segment Left Brace (Left Parenthesis Upper Hook) +//{ "600", "23A7", }, // Top Segment Left Brace (Right Curly Bracket Upper Hook) + { "601", "23A8", }, // Middle Segment Left Brace (Right Curly Bracket Middle Piece) + { "602", "239D", }, // Bottom Segment LeftBrace (Left Parenthesis Lower Hook) +//{ "602", "23A9", }, // Bottom Segment Left Brace (Right Curly Bracket Lower Hook) + { "603", "EFD4", }, // Middle Segment Curvilinear Integral + { "604", "EFD3", }, // Top Left Segment Summation + { "605", "2225", }, // Double Vertical Line, Composite + { "606", "EFD2", }, // Bottom Left Segment Summation + { "607", "EFD1", }, // Bottom Diagonal Summation + { "608", "23A4", }, // Top Segment Right Bracket (Right Square Bracket Upper Corner) + { "609", "23A6", }, // Bottom Segment Right Bracket (Right Square Bracket Lower Corner) + { "610", "239E", }, // Top Segment Right Brace (Right Parenthesis Upper Hook) +//{ "610", "23AB", }, // Top Segment Right Brace (Right Curly Bracket Upper Hook) + { "611", "23AC", }, // Middle Segment Right Brace (Right Curly Bracket Middle Piece) + { "612", "23A0", }, // Bottom Segment Right ( Right Parenthesis Lower Hook) +//{ "612", "23AD", }, // Bottom Segment Right Brace (Right Curly Bracket Lower Hook) + { "613", "239C", }, // Thick Vertical Line, Composite (Left Parenthesis Extension) +//{ "613", "239F", }, // Thick Vertical Line, Composite (Right Parenthesis Extension) +//{ "613", "23AA", }, // Thick Vertical Line, Composite (Curly Bracket Extension) +//{ "613", "23AE", }, // Thick Vertical Line, Composite (Integral Extension) + { "614", "2223", }, // Thin Vertical Line, Composite + { "615", "EFDC", }, // Bottom Segment of Vertical Radical + { "616", "EFD0", }, // Top Right Segment Summation + { "617", "EFCF", }, // Middle Segment Summation + { "618", "EFCE", }, // Bottom Right Segment Summation + { "619", "EFCD", }, // Top Diagonal Summation + { "620", "2213", }, // Minus Over Plus Sign + { "621", "2329", }, // Left Angle Bracket + { "622", "232A", }, // Right Angle Bracket + { "623", "EFFF", }, // Mask Symbol + { "624", "2245", }, // Wavy Over Two Straight Approximate Symbol + { "625", "2197", }, // 45 Degree Arrow + { "626", "2198", }, // -45 Degree Arrow + { "627", "2199", }, // -135 Degree Arrow + { "628", "2196", }, // 135 Degree Arrow + { "629", "25B5", }, // Up Open Triangle + { "630", "25B9", }, // Right Open Triangle + { "631", "25BF", }, // Down Open Triangle + { "632", "25C3", }, // Left Open Triangle + { "633", "226A", }, // Much Less Than Sign + { "634", "226B", }, // Much Greater Than Sign + { "635", "2237", }, // Proportional To Symbol (4 dots) + { "636", "225C", }, // Defined As Symbol + { "637", "03DD", }, // Lowercase Greek Digamma + { "638", "210F", }, // Planck's Constant divided by 2 pi + { "639", "2112", }, // Laplace Transform Symbol + { "640", "EFFE", }, // Power Set + { "641", "2118", }, // Weierstrassian Symbol + { "642", "2211", }, // Summation Symbol (large Sigma) + { "643", "301A", }, // Left Double Bracket + { "644", "EFC9", }, // Middle Segment Double Bracket + { "645", "301B", }, // Right Double Bracket + { "646", "256D", }, // Box Draw Left Top Round Corner + { "647", "2570", }, // Box Draw Left Bottom Round Corner + { "648", "EFC8", }, // Extender Large Union/Product + { "649", "EFC7", }, // Bottom Segment Large Union + { "650", "EFC6", }, // Top Segment Large Intersection + { "651", "EFC5", }, // Top Segment Left Double Bracket + { "652", "EFC4", }, // Bottom Segment Left Double Bracket + { "653", "EFFC", }, // Large Open Square Box + { "654", "25C7", }, // Open Diamond + { "655", "256E", }, // Box Draw Right Top Round Corner + { "656", "256F", }, // Box Draw Right Bottom Round Corner + { "657", "EFC3", }, // Bottom Segment Large Bottom Product + { "658", "EFC2", }, // Top Segment Large Top Product + { "659", "EFC1", }, // Top Segment Right Double Bracket + { "660", "EFC0", }, // Bottom Segment Right Double Bracket + { "661", "EFFB", }, // Large Solid Square Box + { "662", "25C6", }, // Solid Diamond + { "663", "220D", }, // Such That Symbol (rotated lc epsilon) + { "664", "2217", }, // Math Asterisk + { "665", "23AF", }, // Horizontal Arrow Extender (Horizontal Line Extension) + { "666", "EFCB", }, // Double Horizontal Arrow Extender + { "667", "EFCC", }, // Inverted Complement of 0xEFCF or MSL 617 + { "668", "221F", }, // Right Angle Symbol + { "669", "220F", }, // Product Symbol (large Pi) + { "684", "25CA", }, // Lozenge, Diamond + { "1000", "2070", }, // Superior Numeral 0 + { "1001", "2074", }, // Superior Numeral 4 + { "1002", "2075", }, // Superior Numeral 5 + { "1003", "2076", }, // Superior Numeral 6 + { "1004", "2077", }, // Superior Numeral 7 + { "1005", "2078", }, // Superior Numeral 8 + { "1006", "2079", }, // Superior Numeral 9 + { "1017", "201C", }, // Double Open Quote (6) + { "1018", "201D", }, // Double Close Quote (9) + { "1019", "201E", }, // Double Baseline Quote (9) + { "1020", "2003", }, // Em Space + { "1021", "2002", }, // En Space + { "1023", "2009", }, // Thin Space + { "1028", "2026", }, // Ellipsis + { "1030", "EFF1", }, // Uppercase Ogonek (Spacing) + { "1031", "017E", }, // Lowercase Z Hacek + { "1034", "2120", }, // Service Mark + { "1036", "211E", }, // Prescription Sign +//{ "1040", "F001", }, // Lowercase FI Ligature + { "1040", "FB01", }, // Lowercase FI Ligature +//{ "1041", "F002", }, // Lowercase FL Ligature + { "1041", "FB02", }, // Lowercase FL Ligature + { "1042", "FB00", }, // Lowercase FF Ligature + { "1043", "FB03", }, // Lowercase FFI Ligature + { "1044", "FB04", }, // Lowercase FFL Ligature + { "1045", "EFF0", }, // Uppercase Double Acute Accent (Spacing) + { "1047", "0133", }, // Lowercase IJ Ligature + { "1060", "2105", }, // Care Of Symbol + { "1061", "011E", }, // Uppercase G Breve + { "1062", "011F", }, // Lowercase G Breve + { "1063", "015E", }, // Uppercase S Cedilla + { "1064", "015F", }, // Lowercase S Cedilla + { "1065", "0130", }, // Uppercase I Overdot + { "1067", "201A", }, // Single Baseline Quote (9) + { "1068", "2030", }, // Per Mill Sign + { "1069", "20AC", }, // Euro + { "1084", "02C9", }, // Lowercase Macron Accent (Spacing) + { "1086", "02D8", }, // Lowercase Breve Accent (Spacing) + { "1088", "02D9", }, // Lowercase Overdot Accent (Spacing) + { "1090", "0153", }, // Lowercase OE Ligature + { "1091", "0152", }, // Uppercase OE Ligature + { "1092", "2039", }, // Left Pointing Single Angle Quote + { "1093", "203A", }, // Right Pointing Single Angle Quote + { "1094", "25A1", }, // Medium Open Square Box + { "1095", "0141", }, // Uppercase L-Stroke + { "1096", "0142", }, // Lowercase L-Stroke + { "1097", "02DD", }, // Lowercase Double Acute Accent (Spacing) + { "1098", "02DB", }, // Lowercase Ogonek (Spacing) + { "1099", "21B5", }, // Carriage Return Symbol + { "1100", "EFDB", }, // Full Size Serif Registered + { "1101", "EFDA", }, // Full Size Serif Copyright + { "1102", "EFD9", }, // Full Size Serif Trademark + { "1103", "EFD8", }, // Full Size Sans Registered + { "1104", "EFD7", }, // Full Size Sans Copyright + { "1105", "EFD6", }, // Full Size Sans Trademark + { "1106", "017D", }, // Uppercase Z Hacek + { "1107", "0132", }, // Uppercase IJ Ligature + { "1108", "25AB", }, // Small Open Square Box + { "1109", "25E6", }, // Small Open Round Bullet + { "1110", "25CB", }, // Medium Open Round Bullet + { "1111", "EFFA", }, // Large Solid Round Bullet + { "3812", "F000", }, // Ornament, Apple +}; + +// global constructor +static struct hp_msl_to_unicode_init { + hp_msl_to_unicode_init(); +} _hp_msl_to_unicode_init; + +hp_msl_to_unicode_init::hp_msl_to_unicode_init() { + for (unsigned int i = 0; + i < sizeof(hp_msl_to_unicode_list)/sizeof(hp_msl_to_unicode_list[0]); + i++) { + hp_msl_to_unicode *ptu = new hp_msl_to_unicode[1]; + ptu->value = (char *)hp_msl_to_unicode_list[i].value; + hp_msl_to_unicode_table.define(hp_msl_to_unicode_list[i].key, ptu); + } +} + +const char *hp_msl_to_unicode_code(const char *s) +{ + hp_msl_to_unicode *result = hp_msl_to_unicode_table.lookup(s); + return result ? result->value : 0; +} diff --git a/src/utils/indxbib/eign b/src/utils/indxbib/eign new file mode 100644 index 0000000..7718c8b --- /dev/null +++ b/src/utils/indxbib/eign @@ -0,0 +1,133 @@ +a +i +the +to +of +and +in +is +it +for +that +if +you +this +be +on +with +not +have +are +or +as +from +can +but +by +at +an +will +no +all +was +do +there +my +one +so +we +they +what +would +any +which +about +get +your +use +some +me +then +name +like +out +when +up +time +other +more +only +just +end +also +know +how +new +should +been +than +them +he +who +make +may +people +these +now +their +here +into +first +could +way +had +see +work +well +were +two +very +where +while +us +because +good +same +even +much +most +many +such +long +his +over +last +since +right +before +our +without +too +those +why +must +part +being +current +back +still +go +point +value +each +did +both +true +off +say +another +state +might +under +start +try diff --git a/src/utils/indxbib/indxbib.1.man b/src/utils/indxbib/indxbib.1.man new file mode 100644 index 0000000..df02fcc --- /dev/null +++ b/src/utils/indxbib/indxbib.1.man @@ -0,0 +1,347 @@ +.TH @g@indxbib @MAN1EXT@ "@MDATE@" "groff @VERSION@" +.SH Name +@g@indxbib \- make inverted index for bibliographic databases +. +. +.\" ==================================================================== +.\" Legal Terms +.\" ==================================================================== +.\" +.\" Copyright (C) 1989-2020 Free Software Foundation, Inc. +.\" +.\" Permission is granted to make and distribute verbatim copies of this +.\" manual provided the copyright notice and this permission notice are +.\" preserved on all copies. +.\" +.\" Permission is granted to copy and distribute modified versions of +.\" this manual under the conditions for verbatim copying, provided that +.\" the entire resulting derived work is distributed under the terms of +.\" a permission notice identical to this one. +.\" +.\" Permission is granted to copy and distribute translations of this +.\" manual into another language, under the above conditions for +.\" modified versions, except that this permission notice may be +.\" included in translations approved by the Free Software Foundation +.\" instead of in the original English. +. +. +.\" Save and disable compatibility mode (for, e.g., Solaris 10/11). +.do nr *groff_indxbib_1_man_C \n[.cp] +.cp 0 +. +.\" Define fallback for groff 1.23's MR macro if the system lacks it. +.nr do-fallback 0 +.if !\n(.f .nr do-fallback 1 \" mandoc +.if \n(.g .if !d MR .nr do-fallback 1 \" older groff +.if !\n(.g .nr do-fallback 1 \" non-groff *roff +.if \n[do-fallback] \{\ +. de MR +. ie \\n(.$=1 \ +. I \%\\$1 +. el \ +. IR \%\\$1 (\\$2)\\$3 +. . +.\} +.rr do-fallback +. +. +.\" ==================================================================== +.SH Synopsis +.\" ==================================================================== +. +.SY @g@indxbib +.RB [ \-w ] +.RB [ \-c\~\c +.IR \%common-words-file ] +.RB [ \-d\~\c +.IR dir ] +.RB [ \-f\~\c +.IR \%list-file ] +.RB [ \-h\~\c +.IR \%min-hash-table-size ] +.RB [ \-i\~\c +.IR \%excluded-fields ] +.RB [ \-k\~\c +.IR \%max-keys-per-record ] +.RB [ \-l\~\c +.IR \%min-key-length ] +.RB [ \-n\~\c +.IR \%threshold ] +.RB [ \-o\~\c +.IR file ] +.RB [ \-t\~\c +.IR \%max-key-length ] +.RI [ file\~ .\|.\|.] +.YS +. +. +.SY @g@indxbib +.B \-\-help +.YS +. +. +.SY @g@indxbib +.B \-v +. +.SY @g@indxbib +.B \-\-version +.YS +. +. +.\" ==================================================================== +.SH Description +.\" ==================================================================== +. +.I @g@indxbib +makes an inverted index for the bibliographic databases in each +.I file +for use with +.MR @g@refer @MAN1EXT@ , +.MR @g@lookbib @MAN1EXT@ , +and +.MR lkbib @MAN1EXT@ . +. +Each created index is named +.RI file @INDEX_SUFFIX@ ; +writing is done to a temporary file which is then renamed to this. +. +If no +.I file +operands are given on the command line because the +.B \-f +option has been used, +and no +.B \-o +option is given, +the index will be named +.IR \%@DEFAULT_INDEX_NAME@@INDEX_SUFFIX@ . +. +. +.LP +Bibliographic databases are divided into records by blank lines. +. +Within a record, +each field starts with a +.B % +character at the beginning of a line. +. +Fields have a one letter name that follows the +.B % +character. +. +. +.LP +The values set by the +.BR \-c , +.BR \-l , +.BR \-n , +and +.B \-t +options are stored in the index: +when the index is searched, +keys will be discarded and truncated in a +manner appropriate to these options; +the original keys will be used for verifying that any record +found using the index actually contains the keys. +. +This means that a user of an index need not know whether these +options were used in the creation of the index, +provided that not all the keys to be searched for +would have been discarded during indexing +and that the user supplies at least the part of each key +that would have remained after being truncated during indexing. +. +The value set by the +.B \-i +option is also stored in the index +and will be used in verifying records found using the index. +. +. +.\" ==================================================================== +.SH Options +.\" ==================================================================== +. +.B \-\-help +displays a usage message, +while +.B \-v +and +.B \-\-version +show version information; +all exit afterward. +. +. +.TP +.BI \-c\~ common-words-file +Read the list of common words from +.I common-words-file +instead of +.IR \%@COMMON_WORDS_FILE@ . +. +. +.TP +.BI \-d\~ dir +Use +.I dir +as the name of the directory to store in the index, +instead of that returned by +.MR getcwd 2 . +. +Typically, +.I dir +will be a symbolic link whose target is the current working directory. +. +. +.TP +.BI \-f\~ list-file +Read the files to be indexed from +.IR list-file . +. +If +.I list-file +is +.BR \- , +files will be read from the standard input stream. +. +The +.B \-f +option can be given at most once. +. +. +.TP +.BI \-h\~ min-hash-table-size +Use the first prime number greater than or equal to +the argument for the size of the hash table. +. +Larger values +will usually make searching faster, +but will make the index file larger +and cause +.I @g@indxbib +to use more memory. +. +The default hash table size is 997. +. +. +.TP +.BI \-i\~ excluded-fields +Don't index the contents of fields whose names are in +.IR excluded-fields . +. +Field names are one character each. +. +If this option is not present, +.I @g@indxbib +excludes fields +.BR X , +.BR Y , +and +.BR Z . +. +. +.TP +.BI \-k\~ max-keys-per-record +Use no more keys per input record than specified in the argument. +. +If this option is not present, +the maximum is 100. +. +. +.TP +.BI \-l\~ min-key-length +Discard any key whose length in characters is shorter than the value of +the argument. +. +If this option is not present, +the minimum key length +is 3. +. +. +.TP +.BI \-n\~ threshold +Discard the +.I threshold +most common words from the common words file. +. +If this option is not present, +the 100 most common words are discarded. +. +. +.TP +.BI \-o\~ basename +Name the index +.RI basename @INDEX_SUFFIX@ . +. +. +.TP +.BI \-t\~ max-key-length +Truncate keys to +.I max-key-length +in characters. +. +If this option is not present, +keys are truncated to 6 characters. +. +. +.TP +.B \-w +Index whole files. +. +Each file is a separate record. +. +. +.\" ==================================================================== +.SH Files +.\" ==================================================================== +. +.TP +.RI \%file @INDEX_SUFFIX@ +index for +.I file +. +. +.TP +.I \%@DEFAULT_INDEX_NAME@@INDEX_SUFFIX@ +default index name +. +. +.TP +.I \%@COMMON_WORDS_FILE@ +contains the list of common words. +. +The traditional name, +.RI \[lq] eign \[rq], +is an abbreviation of \[lq]English ignored [word list]\[rq]. +. +. +.TP +.IR \%indxbib XXXXXX +temporary file +. +. +.\" ==================================================================== +.SH "See also" +.\" ==================================================================== +. +\[lq]Some Applications of Inverted Indexes on the Unix System\[rq], +by M.\& E.\& Lesk, +1978, +AT&T Bell Laboratories Computing Science Technical Report No.\& 69. +. +. +.LP +.MR @g@refer @MAN1EXT@ , +.MR lkbib @MAN1EXT@ , +.MR @g@lookbib @MAN1EXT@ +. +. +.\" Restore compatibility mode (for, e.g., Solaris 10/11). +.cp \n[*groff_indxbib_1_man_C] +.do rr *groff_indxbib_1_man_C +. +. +.\" Local Variables: +.\" fill-column: 72 +.\" mode: nroff +.\" End: +.\" vim: set filetype=groff textwidth=72: diff --git a/src/utils/indxbib/indxbib.am b/src/utils/indxbib/indxbib.am new file mode 100644 index 0000000..d2a7d5a --- /dev/null +++ b/src/utils/indxbib/indxbib.am @@ -0,0 +1,57 @@ +# Copyright (C) 2014-2020 Free Software Foundation, Inc. +# +# This file is part of groff. +# +# groff is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# groff is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +# for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +indxbib_srcdir = $(top_srcdir)/src/utils/indxbib +prefixexecbin_PROGRAMS += indxbib +indxbib_SOURCES = \ + src/utils/indxbib/indxbib.cpp \ + src/utils/indxbib/signal.c +src/utils/indxbib/indxbib.$(OBJEXT): defs.h +indxbib_LDADD = libbib.a libgroff.a $(LIBM) lib/libgnu.a +PREFIXMAN1 += src/utils/indxbib/indxbib.1 +EXTRA_DIST += \ + src/utils/indxbib/indxbib.1.man \ + src/utils/indxbib/eign + +install-data-local: install_indxbib +install_indxbib: $(indxbib_srcdir)/eign + -test -d $(DESTDIR)$(datadir) \ + || $(mkinstalldirs) $(DESTDIR)$(datadir) + -test -d $(DESTDIR)$(dataprogramdir) \ + || $(mkinstalldirs) $(DESTDIR)$(dataprogramdir) + -test -d $(DESTDIR)$(datasubdir) \ + || $(mkinstalldirs) $(DESTDIR)$(datasubdir) + if test -f /usr/lib/eign; then \ + rm -f $(DESTDIR)$(common_words_file); \ + ln -s /usr/lib/eign $(DESTDIR)$(common_words_file) 2>/dev/null \ + || ln /usr/lib/eign $(DESTDIR)$(common_words_file) 2>/dev/null \ + || cp /usr/lib/eign $(DESTDIR)$(common_words_file); \ + else \ + rm -f $(DESTDIR)$(common_words_file); \ + $(INSTALL_DATA) $(indxbib_srcdir)/eign $(DESTDIR)$(common_words_file); \ + fi + +uninstall-local: uninstall_indxbib +uninstall_indxbib: + rm -f $(DESTDIR)$(common_words_file) + + +# Local Variables: +# fill-column: 72 +# mode: makefile-automake +# End: +# vim: set autoindent filetype=automake textwidth=72: diff --git a/src/utils/indxbib/indxbib.cpp b/src/utils/indxbib/indxbib.cpp new file mode 100644 index 0000000..ad8bb0e --- /dev/null +++ b/src/utils/indxbib/indxbib.cpp @@ -0,0 +1,803 @@ +/* Copyright (C) 1989-2020 Free Software Foundation, Inc. + Written by James Clark (jjc@jclark.com) + +This file is part of groff. + +groff is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation, either version 3 of the License, or +(at your option) any later version. + +groff is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. */ + +#include "lib.h" + +#include <assert.h> +#include <errno.h> +#include <stdlib.h> + +#include "posix.h" +#include "errarg.h" +#include "error.h" +#include "stringclass.h" +#include "cset.h" +#include "cmap.h" + +#include "defs.h" +#include "index.h" + +#include "nonposix.h" + +extern "C" const char *Version_string; + +#define DEFAULT_HASH_TABLE_SIZE 997 +#define TEMP_INDEX_TEMPLATE "indxbibXXXXXX" + +// (2^n - MALLOC_OVERHEAD) should be a good argument for malloc(). + +#define MALLOC_OVERHEAD 16 + +#ifdef BLOCK_SIZE +#undef BLOCK_SIZE +#endif + +const int BLOCK_SIZE = ((1024 - MALLOC_OVERHEAD - sizeof(struct block *) + - sizeof(int)) / sizeof(int)); +struct block { + block *next; + int used; + int v[BLOCK_SIZE]; + + block(block *p = 0) : next(p), used(0) { } +}; + +struct block; + +union table_entry { + block *ptr; + int count; +}; + +struct word_list { + word_list *next; + char *str; + int len; + word_list(const char *, int, word_list *); +}; + +table_entry *hash_table; +int hash_table_size = DEFAULT_HASH_TABLE_SIZE; +// We make this the same size as hash_table so we only have to do one +// mod per key. +static word_list **common_words_table = 0; +char *key_buffer; + +FILE *indxfp; +int ntags = 0; +string filenames; +char *temp_index_file = 0; + +const char *ignore_fields = "XYZ"; +const char *common_words_file = COMMON_WORDS_FILE; +int n_ignore_words = 100; +int truncate_len = 6; +int shortest_len = 3; +int max_keys_per_item = 100; + +static void usage(FILE *stream); +static void write_hash_table(); +static void init_hash_table(); +static void read_common_words_file(); +static int store_key(char *s, int len); +static void possibly_store_key(char *s, int len); +static int do_whole_file(const char *filename); +static int do_file(const char *filename); +static void store_reference(int filename_index, int pos, int len); +static void check_integer_arg(char opt, const char *arg, int min, int *res); +static void store_filename(const char *); +static void fwrite_or_die(const void *ptr, int size, int nitems, FILE *fp); +static char *get_cwd(); + +extern "C" { + void cleanup(); + void catch_fatal_signals(); + void ignore_fatal_signals(); +} + +int main(int argc, char **argv) +{ + program_name = argv[0]; + static char stderr_buf[BUFSIZ]; + setbuf(stderr, stderr_buf); + + const char *base_name = 0; + typedef int (*parser_t)(const char *); + parser_t parser = do_file; + const char *directory = 0; + const char *foption = 0; + int opt; + static const struct option long_options[] = { + { "help", no_argument, 0, CHAR_MAX + 1 }, + { "version", no_argument, 0, 'v' }, + { NULL, 0, 0, 0 } + }; + while ((opt = getopt_long(argc, argv, "c:o:h:i:k:l:t:n:c:d:f:vw", + long_options, NULL)) + != EOF) + switch (opt) { + case 'c': + common_words_file = optarg; + break; + case 'd': + directory = optarg; + break; + case 'f': + foption = optarg; + break; + case 'h': + { + int requested_hash_table_size; + check_integer_arg('h', optarg, 1, &requested_hash_table_size); + hash_table_size = requested_hash_table_size; + if ((hash_table_size > 2) && (hash_table_size % 2) == 0) + hash_table_size++; + while (!is_prime(hash_table_size)) + hash_table_size += 2; + if (hash_table_size != requested_hash_table_size) + warning("requested hash table size %1 is not prime: using %2" + " instead", optarg, hash_table_size); + } + break; + case 'i': + ignore_fields = optarg; + break; + case 'k': + check_integer_arg('k', optarg, 1, &max_keys_per_item); + break; + case 'l': + check_integer_arg('l', optarg, 0, &shortest_len); + break; + case 'n': + check_integer_arg('n', optarg, 0, &n_ignore_words); + break; + case 'o': + base_name = optarg; + break; + case 't': + check_integer_arg('t', optarg, 1, &truncate_len); + break; + case 'w': + parser = do_whole_file; + break; + case 'v': + printf("GNU indxbib (groff) version %s\n", Version_string); + exit(0); + break; + case CHAR_MAX + 1: // --help + usage(stdout); + exit(0); + break; + case '?': + usage(stderr); + exit(1); + break; + default: + assert(0); + break; + } + if (optind >= argc && foption == 0) + fatal("no files and no -f option"); + if (!directory) { + char *path = get_cwd(); + store_filename(path); + delete[] path; + } + else + store_filename(directory); + init_hash_table(); + store_filename(common_words_file); + store_filename(ignore_fields); + key_buffer = new char[truncate_len]; + read_common_words_file(); + if (!base_name) + base_name = optind < argc ? argv[optind] : DEFAULT_INDEX_NAME; + const char *p = strrchr(base_name, DIR_SEPS[0]), *p1; + const char *sep = &DIR_SEPS[1]; + while (*sep) { + p1 = strrchr(base_name, *sep); + if (p1 && (!p || p1 > p)) + p = p1; + sep++; + } + size_t name_max; + if (p) { + char *dir = strsave(base_name); + dir[p - base_name] = '\0'; + name_max = file_name_max(dir); + delete[] dir; + } + else + name_max = file_name_max("."); + const char *filename = p ? p + 1 : base_name; + if (strlen(filename) + sizeof(INDEX_SUFFIX) - 1 > name_max) + fatal("'%1.%2' is too long for a filename", filename, INDEX_SUFFIX); + if (p) { + p++; + temp_index_file = new char[p - base_name + sizeof(TEMP_INDEX_TEMPLATE)]; + memcpy(temp_index_file, base_name, p - base_name); + strcpy(temp_index_file + (p - base_name), TEMP_INDEX_TEMPLATE); + } + else { + temp_index_file = strsave(TEMP_INDEX_TEMPLATE); + } + catch_fatal_signals(); + int fd = mkstemp(temp_index_file); + if (fd < 0) + fatal("can't create temporary index file: %1", strerror(errno)); + indxfp = fdopen(fd, FOPEN_WB); + if (indxfp == 0) + fatal("fdopen failed"); + if (fseek(indxfp, sizeof(index_header), 0) < 0) + fatal("can't seek past index header: %1", strerror(errno)); + int failed = 0; + if (foption) { + FILE *fp = stdin; + if (strcmp(foption, "-") != 0) { + errno = 0; + fp = fopen(foption, "r"); + if (!fp) + fatal("can't open '%1': %2", foption, strerror(errno)); + } + string path; + int lineno = 1; + for (;;) { + int c; + for (c = getc(fp); c != '\n' && c != EOF; c = getc(fp)) { + if (c == '\0') + error_with_file_and_line(foption, lineno, + "nul character in pathname ignored"); + else + path += c; + } + if (path.length() > 0) { + path += '\0'; + if (!(*parser)(path.contents())) + failed = 1; + path.clear(); + } + if (c == EOF) + break; + lineno++; + } + if (fp != stdin) + fclose(fp); + } + for (int i = optind; i < argc; i++) + if (!(*parser)(argv[i])) + failed = 1; + write_hash_table(); + if (fclose(indxfp) < 0) + fatal("error closing temporary index file: %1", strerror(errno)); + char *index_file = new char[strlen(base_name) + sizeof(INDEX_SUFFIX)]; + strcpy(index_file, base_name); + strcat(index_file, INDEX_SUFFIX); +#ifdef HAVE_RENAME +#ifdef __EMX__ + if (access(index_file, R_OK) == 0) + unlink(index_file); +#endif /* __EMX__ */ + if (rename(temp_index_file, index_file) < 0) { +#ifdef __MSDOS__ + // RENAME could fail on plain MS-DOS filesystems because + // INDEX_FILE is an invalid filename, e.g. it has multiple dots. + char *fname = p ? index_file + (p - base_name) : 0; + char *dot = 0; + + // Replace the dot with an underscore and try again. + if (fname + && (dot = strchr(fname, '.')) != 0 + && strcmp(dot, INDEX_SUFFIX) != 0) + *dot = '_'; + if (rename(temp_index_file, index_file) < 0) +#endif + fatal("can't rename temporary index file: %1", strerror(errno)); + } +#else /* not HAVE_RENAME */ + ignore_fatal_signals(); + if (unlink(index_file) < 0) { + if (errno != ENOENT) + fatal("can't unlink '%1': %2", index_file, strerror(errno)); + } + if (link(temp_index_file, index_file) < 0) + fatal("can't link temporary index file: %1", strerror(errno)); + if (unlink(temp_index_file) < 0) + fatal("can't unlink temporary index file: %1", strerror(errno)); +#endif /* not HAVE_RENAME */ + temp_index_file = 0; + return failed; +} + +static void usage(FILE *stream) +{ + fprintf(stream, +"usage: %s [-w] [-c common-words-file] [-d dir] [-f list-file]" +" [-h min-hash-table-size] [-i excluded-fields]" +" [-k max-keys-per-record] [-l min-key-length]" +" [-n threshold] [-o file] [-t max-key-length] [file ...]\n" +"usage: %s {-v | --version}\n" +"usage: %s --help\n", + program_name, program_name, program_name); +} + +static void check_integer_arg(char opt, const char *arg, int min, int *res) +{ + char *ptr; + long n = strtol(arg, &ptr, 10); + if (n == 0 && ptr == arg) + error("argument to -%1 not an integer", opt); + else if (n < min) + error("argument to -%1 must not be less than %2", opt, min); + else { + if (n > INT_MAX) + error("argument to -%1 greater than maximum integer", opt); + else if (*ptr != '\0') + error("junk after integer argument to -%1", opt); + *res = int(n); + } +} + +static char *get_cwd() +{ + char *buf; + int size = 12; + + for (;;) { + buf = new char[size]; + if (getcwd(buf, size)) + break; + if (errno != ERANGE) + fatal("cannot get current working directory: %1", strerror(errno)); + delete[] buf; + if (size == INT_MAX) + fatal("current working directory longer than INT_MAX"); + if (size > INT_MAX/2) + size = INT_MAX; + else + size *= 2; + } + return buf; +} + +word_list::word_list(const char *s, int n, word_list *p) +: next(p), len(n) +{ + str = new char[n]; + memcpy(str, s, n); +} + +static void read_common_words_file() +{ + if (n_ignore_words <= 0) + return; + errno = 0; + FILE *fp = fopen(common_words_file, "r"); + if (!fp) + fatal("can't open '%1': %2", common_words_file, strerror(errno)); + common_words_table = new word_list * [hash_table_size]; + for (int i = 0; i < hash_table_size; i++) + common_words_table[i] = 0; + int count = 0; + int key_len = 0; + for (;;) { + int c = getc(fp); + while (c != EOF && !csalnum(c)) + c = getc(fp); + if (c == EOF) + break; + do { + if (key_len < truncate_len) + key_buffer[key_len++] = cmlower(c); + c = getc(fp); + } while (c != EOF && csalnum(c)); + if (key_len >= shortest_len) { + int h = hash(key_buffer, key_len) % hash_table_size; + common_words_table[h] = new word_list(key_buffer, key_len, + common_words_table[h]); + } + if (++count >= n_ignore_words) + break; + key_len = 0; + if (c == EOF) + break; + } + n_ignore_words = count; + fclose(fp); +} + +static int do_whole_file(const char *filename) +{ + errno = 0; + FILE *fp = fopen(filename, "r"); + if (!fp) { + error("can't open '%1': %2", filename, strerror(errno)); + return 0; + } + int count = 0; + int key_len = 0; + int c; + while ((c = getc(fp)) != EOF) { + if (csalnum(c)) { + key_len = 1; + key_buffer[0] = c; + while ((c = getc(fp)) != EOF) { + if (!csalnum(c)) + break; + if (key_len < truncate_len) + key_buffer[key_len++] = c; + } + if (store_key(key_buffer, key_len)) { + if (++count >= max_keys_per_item) + break; + } + if (c == EOF) + break; + } + } + store_reference(filenames.length(), 0, 0); + store_filename(filename); + fclose(fp); + return 1; +} + +static int do_file(const char *filename) +{ + errno = 0; + // Need binary I/O for MS-DOS/MS-Windows, because indxbib relies on + // byte counts to be consistent with fseek. + FILE *fp = fopen(filename, FOPEN_RB); + if (fp == 0) { + error("can't open '%1': %2", filename, strerror(errno)); + return 0; + } + int filename_index = filenames.length(); + store_filename(filename); + + enum { + START, // at the start of the file; also in between references + BOL, // in the middle of a reference, at the beginning of the line + PERCENT, // seen a percent at the beginning of the line + IGNORE, // ignoring a field + IGNORE_BOL, // at the beginning of a line ignoring a field + KEY, // in the middle of a key + DISCARD, // after truncate_len bytes of a key + MIDDLE // in between keys + } state = START; + + // In states START, BOL, IGNORE_BOL, space_count how many spaces at + // the beginning have been seen. In states PERCENT, IGNORE, KEY, + // MIDDLE space_count must be 0. + int space_count = 0; + int byte_count = 0; // bytes read + int key_len = 0; + int ref_start = -1; // position of start of current reference + for (;;) { + int c = getc(fp); + if (c == EOF) + break; + // We opened the file in binary mode, so we need to skip + // every CR character before a Newline. + if (c == '\r') { + int peek = getc(fp); + if (peek == '\n') { + byte_count++; + c = peek; + } + else + ungetc(peek, fp); + } +#if defined(__MSDOS__) || defined(_MSC_VER) || defined(__EMX__) + else if (c == 0x1a) // ^Z means EOF in text files + break; +#endif + byte_count++; + switch (state) { + case START: + if (c == ' ' || c == '\t') { + space_count++; + break; + } + if (c == '\n') { + space_count = 0; + break; + } + ref_start = byte_count - space_count - 1; + space_count = 0; + if (c == '%') + state = PERCENT; + else if (csalnum(c)) { + state = KEY; + key_buffer[0] = c; + key_len = 1; + } + else + state = MIDDLE; + break; + case BOL: + switch (c) { + case '%': + if (space_count > 0) { + space_count = 0; + state = MIDDLE; + } + else + state = PERCENT; + break; + case ' ': + case '\t': + space_count++; + break; + case '\n': + store_reference(filename_index, ref_start, + byte_count - 1 - space_count - ref_start); + state = START; + space_count = 0; + break; + default: + space_count = 0; + if (csalnum(c)) { + state = KEY; + key_buffer[0] = c; + key_len = 1; + } + else + state = MIDDLE; + } + break; + case PERCENT: + if (strchr(ignore_fields, c) != 0) + state = IGNORE; + else if (c == '\n') + state = BOL; + else + state = MIDDLE; + break; + case IGNORE: + if (c == '\n') + state = IGNORE_BOL; + break; + case IGNORE_BOL: + switch (c) { + case '%': + if (space_count > 0) { + state = IGNORE; + space_count = 0; + } + else + state = PERCENT; + break; + case ' ': + case '\t': + space_count++; + break; + case '\n': + store_reference(filename_index, ref_start, + byte_count - 1 - space_count - ref_start); + state = START; + space_count = 0; + break; + default: + space_count = 0; + state = IGNORE; + } + break; + case KEY: + if (csalnum(c)) { + if (key_len < truncate_len) + key_buffer[key_len++] = c; + else + state = DISCARD; + } + else { + possibly_store_key(key_buffer, key_len); + key_len = 0; + if (c == '\n') + state = BOL; + else + state = MIDDLE; + } + break; + case DISCARD: + if (!csalnum(c)) { + possibly_store_key(key_buffer, key_len); + key_len = 0; + if (c == '\n') + state = BOL; + else + state = MIDDLE; + } + break; + case MIDDLE: + if (csalnum(c)) { + state = KEY; + key_buffer[0] = c; + key_len = 1; + } + else if (c == '\n') + state = BOL; + break; + default: + assert(0); + } + } + switch (state) { + case START: + break; + case DISCARD: + case KEY: + possibly_store_key(key_buffer, key_len); + // fall through + case BOL: + case PERCENT: + case IGNORE_BOL: + case IGNORE: + case MIDDLE: + store_reference(filename_index, ref_start, + byte_count - ref_start - space_count); + break; + default: + assert(0); + } + fclose(fp); + return 1; +} + +static void store_reference(int filename_index, int pos, int len) +{ + tag t; + t.filename_index = filename_index; + t.start = pos; + t.length = len; + fwrite_or_die(&t, sizeof(t), 1, indxfp); + ntags++; +} + +static void store_filename(const char *fn) +{ + filenames += fn; + filenames += '\0'; +} + +static void init_hash_table() +{ + hash_table = new table_entry[hash_table_size]; + for (int i = 0; i < hash_table_size; i++) + hash_table[i].ptr = 0; +} + +static void possibly_store_key(char *s, int len) +{ + static int last_tagno = -1; + static int key_count; + if (last_tagno != ntags) { + last_tagno = ntags; + key_count = 0; + } + if (key_count < max_keys_per_item) { + if (store_key(s, len)) + key_count++; + } +} + +static int store_key(char *s, int len) +{ + if (len < shortest_len) + return 0; + int is_number = 1; + for (int i = 0; i < len; i++) + if (!csdigit(s[i])) { + is_number = 0; + s[i] = cmlower(s[i]); + } + if (is_number && !(len == 4 && s[0] == '1' && s[1] == '9')) + return 0; + int h = hash(s, len) % hash_table_size; + if (common_words_table) { + for (word_list *ptr = common_words_table[h]; ptr; ptr = ptr->next) + if (len == ptr->len && memcmp(s, ptr->str, len) == 0) + return 0; + } + table_entry *pp = hash_table + h; + if (!pp->ptr) + pp->ptr = new block; + else if (pp->ptr->v[pp->ptr->used - 1] == ntags) + return 1; + else if (pp->ptr->used >= BLOCK_SIZE) + pp->ptr = new block(pp->ptr); + pp->ptr->v[(pp->ptr->used)++] = ntags; + return 1; +} + +static void write_hash_table() +{ + const int minus_one = -1; + int li = 0; + for (int i = 0; i < hash_table_size; i++) { + block *ptr = hash_table[i].ptr; + if (!ptr) + hash_table[i].count = -1; + else { + hash_table[i].count = li; + block *rev = 0; + while (ptr) { + block *tem = ptr; + ptr = ptr->next; + tem->next = rev; + rev = tem; + } + while (rev) { + fwrite_or_die(rev->v, sizeof(int), rev->used, indxfp); + li += rev->used; + block *tem = rev; + rev = rev->next; + delete tem; + } + fwrite_or_die(&minus_one, sizeof(int), 1, indxfp); + li += 1; + } + } + if (sizeof(table_entry) == sizeof(int)) + fwrite_or_die(hash_table, sizeof(int), hash_table_size, indxfp); + else { + // write it out word by word + for (int i = 0; i < hash_table_size; i++) + fwrite_or_die(&hash_table[i].count, sizeof(int), 1, indxfp); + } + fwrite_or_die(filenames.contents(), 1, filenames.length(), indxfp); + if (fseek(indxfp, 0, 0) < 0) + fatal("error seeking on index file: %1", strerror(errno)); + index_header h; + h.magic = INDEX_MAGIC; + h.version = INDEX_VERSION; + h.tags_size = ntags; + h.lists_size = li; + h.table_size = hash_table_size; + h.strings_size = filenames.length(); + h.truncate = truncate_len; + h.shortest = shortest_len; + h.common = n_ignore_words; + fwrite_or_die(&h, sizeof(h), 1, indxfp); +} + +static void fwrite_or_die(const void *ptr, int size, int nitems, FILE *fp) +{ + if (fwrite(ptr, size, nitems, fp) != (size_t)nitems) + fatal("fwrite failed: %1", strerror(errno)); +} + +void fatal_error_exit() +{ + cleanup(); + exit(3); +} + +extern "C" { + +void cleanup() +{ + if (temp_index_file) + unlink(temp_index_file); +} + +} + +// Local Variables: +// fill-column: 72 +// mode: C++ +// End: +// vim: set cindent noexpandtab shiftwidth=2 textwidth=72: diff --git a/src/utils/indxbib/signal.c b/src/utils/indxbib/signal.c new file mode 100644 index 0000000..2231b64 --- /dev/null +++ b/src/utils/indxbib/signal.c @@ -0,0 +1,77 @@ +/* Copyright (C) 1992-2020 Free Software Foundation, Inc. + Written by James Clark (jjc@jclark.com) + +This file is part of groff. + +groff is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation, either version 3 of the License, or +(at your option) any later version. + +groff is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. */ + +/* Unfortunately vendors seem to have problems writing a <signal.h> +that is correct for C++, so we implement all signal handling in C. */ + +#include <config.h> + +#include <stdlib.h> +#include <sys/types.h> +#include <signal.h> +#ifdef HAVE_UNISTD_H +#include <unistd.h> +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +/* Prototype */ +void catch_fatal_signals(void); + +extern void cleanup(void); + +static RETSIGTYPE handle_fatal_signal(int signum) +{ + signal(signum, SIG_DFL); + cleanup(); +#ifdef HAVE_KILL + kill(getpid(), signum); +#else + /* MS-DOS and Win32 don't have kill(); the best compromise is + probably to use exit() instead. */ + exit(signum); +#endif +} + +void catch_fatal_signals(void) +{ +#ifdef SIGHUP + signal(SIGHUP, handle_fatal_signal); +#endif + signal(SIGINT, handle_fatal_signal); + signal(SIGTERM, handle_fatal_signal); +} + +#ifdef __cplusplus +} +#endif + +#ifndef HAVE_RENAME + +void ignore_fatal_signals() +{ +#ifdef SIGHUP + signal(SIGHUP, SIG_IGN); +#endif + signal(SIGINT, SIG_IGN); + signal(SIGTERM, SIG_IGN); +} + +#endif /* not HAVE_RENAME */ diff --git a/src/utils/lkbib/lkbib.1.man b/src/utils/lkbib/lkbib.1.man new file mode 100644 index 0000000..59ef19f --- /dev/null +++ b/src/utils/lkbib/lkbib.1.man @@ -0,0 +1,212 @@ +.TH lkbib @MAN1EXT@ "@MDATE@" "groff @VERSION@" +.SH Name +lkbib \- search bibliographic databases +. +. +.\" ==================================================================== +.\" Legal Terms +.\" ==================================================================== +.\" +.\" Copyright (C) 1989-2020 Free Software Foundation, Inc. +.\" +.\" Permission is granted to make and distribute verbatim copies of this +.\" manual provided the copyright notice and this permission notice are +.\" preserved on all copies. +.\" +.\" Permission is granted to copy and distribute modified versions of +.\" this manual under the conditions for verbatim copying, provided that +.\" the entire resulting derived work is distributed under the terms of +.\" a permission notice identical to this one. +.\" +.\" Permission is granted to copy and distribute translations of this +.\" manual into another language, under the above conditions for +.\" modified versions, except that this permission notice may be +.\" included in translations approved by the Free Software Foundation +.\" instead of in the original English. +. +. +.\" Save and disable compatibility mode (for, e.g., Solaris 10/11). +.do nr *groff_lkbib_1_man_C \n[.cp] +.cp 0 +. +.\" Define fallback for groff 1.23's MR macro if the system lacks it. +.nr do-fallback 0 +.if !\n(.f .nr do-fallback 1 \" mandoc +.if \n(.g .if !d MR .nr do-fallback 1 \" older groff +.if !\n(.g .nr do-fallback 1 \" non-groff *roff +.if \n[do-fallback] \{\ +. de MR +. ie \\n(.$=1 \ +. I \%\\$1 +. el \ +. IR \%\\$1 (\\$2)\\$3 +. . +.\} +.rr do-fallback +. +. +.\" ==================================================================== +.SH Synopsis +.\" ==================================================================== +. +.SY lkbib +.RB [ \-n ] +.RB [ \-i\~\c +.IR fields ] +.RB [ \-p\~\c +.IR file ] +\&.\|.\|.\& +.RB [ \-t\~\c +.IR n ] +.I key +\&.\|.\|. +.YS +. +. +.SY lkbib +.B \-\-help +.YS +. +. +.SY lkbib +.B \-v +. +.SY lkbib +.B \-\-version +.YS +. +. +.\" ==================================================================== +.SH Description +.\" ==================================================================== +. +.I \%lkbib +searches bibliographic databases for references containing keywords +.I key +and writes any references found to the standard output +stream. +. +It reads databases given by +.B \-p +options +and then +(unless +.B \-n +is given) +a default database. +. +The default database is taken from the +.I \%REFER +environment variable if it is set, +otherwise it is +.IR @DEFAULT_INDEX@ . +. +For each database +.I file +to be searched, +if an index +.RI file @INDEX_SUFFIX@ +created by +.MR @g@indxbib @MAN1EXT@ +exists, +then it will be searched instead; +each index can cover multiple databases. +. +. +.\" ==================================================================== +.SH Options +.\" ==================================================================== +. +.B \-\-help +displays a usage message, +while +.B \-v +and +.B \-\-version +show version information; +all exit afterward. +. +. +.TP +.BI \-i\~ string +When searching files for which no index exists, +ignore the contents of fields whose names are in +.IR string . +. +. +.TP +.B \-n +Suppress search of default database. +. +. +.TP +.BI \-p\~ file +Search +.IR file . +. +Multiple +.B \-p +options can be used. +. +. +.TP +.BI \-t\~ n +Require only the first +.I n +characters of keys to be given. +. +The default +is\~6. +. +. +.\" ==================================================================== +.SH Environment +.\" ==================================================================== +. +.TP +.I REFER +Default database. +. +. +.\" ==================================================================== +.SH Files +.\" ==================================================================== +. +.TP +.I \%@DEFAULT_INDEX@ +Default database to be used if the +.I \%REFER +environment variable is not set. +. +. +.TP +.RI file @INDEX_SUFFIX@ +Index files. +. +. +.\" ==================================================================== +.SH "See also" +.\" ==================================================================== +. +\[lq]Some Applications of Inverted Indexes on the Unix System\[rq], +by M.\& E.\& Lesk, +1978, +AT&T Bell Laboratories Computing Science Technical Report No.\& 69. +. +. +.LP +.MR @g@refer @MAN1EXT@ , +.MR @g@lookbib @MAN1EXT@ , +.MR @g@indxbib @MAN1EXT@ +. +. +.\" Restore compatibility mode (for, e.g., Solaris 10/11). +.cp \n[*groff_lkbib_1_man_C] +.do rr *groff_lkbib_1_man_C +. +. +.\" Local Variables: +.\" fill-column: 72 +.\" mode: nroff +.\" End: +.\" vim: set filetype=groff textwidth=72: diff --git a/src/utils/lkbib/lkbib.am b/src/utils/lkbib/lkbib.am new file mode 100644 index 0000000..5f75596 --- /dev/null +++ b/src/utils/lkbib/lkbib.am @@ -0,0 +1,30 @@ +# Copyright (C) 2014-2020 Free Software Foundation, Inc. +# +# This file is part of groff. +# +# groff is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# groff is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +# for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +bin_PROGRAMS += lkbib +man1_MANS += src/utils/lkbib/lkbib.1 +EXTRA_DIST += src/utils/lkbib/lkbib.1.man +lkbib_LDADD = libbib.a libgroff.a $(LIBM) lib/libgnu.a +lkbib_SOURCES = src/utils/lkbib/lkbib.cpp +src/utils/lkbib/lkbib.$(OBJEXT): defs.h + + +# Local Variables: +# fill-column: 72 +# mode: makefile-automake +# End: +# vim: set autoindent filetype=automake textwidth=72: diff --git a/src/utils/lkbib/lkbib.cpp b/src/utils/lkbib/lkbib.cpp new file mode 100644 index 0000000..946bd7d --- /dev/null +++ b/src/utils/lkbib/lkbib.cpp @@ -0,0 +1,144 @@ +/* Copyright (C) 1989-2020 Free Software Foundation, Inc. + Written by James Clark (jjc@jclark.com) + +This file is part of groff. + +groff is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation, either version 3 of the License, or +(at your option) any later version. + +groff is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. */ + +#include "lib.h" + +#include <assert.h> +#include <errno.h> +#include <stdlib.h> + +#include "errarg.h" +#include "error.h" + +#include "defs.h" +#include "refid.h" +#include "search.h" + +extern "C" const char *Version_string; + +static void usage(FILE *stream) +{ + fprintf(stream, + "usage: %s [-n] [-p database] [-i XYZ] [-t N] key ...\n" + "usage: %s {-v | --version}\n" + "usage: %s --help\n", + program_name, program_name, program_name); +} + +int main(int argc, char **argv) +{ + program_name = argv[0]; + static char stderr_buf[BUFSIZ]; + setbuf(stderr, stderr_buf); + int search_default = 1; + search_list list; + int opt; + static const struct option long_options[] = { + { "help", no_argument, 0, CHAR_MAX + 1 }, + { "version", no_argument, 0, 'v' }, + { NULL, 0, 0, 0 } + }; + while ((opt = getopt_long(argc, argv, "nvVi:t:p:", long_options, NULL)) + != EOF) + switch (opt) { + case 'V': + do_verify = true; + break; + case 'n': + search_default = 0; + break; + case 'i': + linear_ignore_fields = optarg; + break; + case 't': + { + char *ptr; + long n = strtol(optarg, &ptr, 10); + if (n == 0 && ptr == optarg) { + error("bad integer '%1' in 't' option", optarg); + break; + } + if (n < 1) + n = 1; + linear_truncate_len = int(n); + break; + } + case 'v': + { + printf("GNU lkbib (groff) version %s\n", Version_string); + exit(0); + break; + } + case 'p': + list.add_file(optarg); + break; + case CHAR_MAX + 1: // --help + usage(stdout); + exit(0); + break; + case '?': + usage(stderr); + exit(1); + break; + default: + assert(0); + } + if (optind >= argc) { + usage(stderr); + exit(1); + } + char *filename = getenv("REFER"); + if (filename) + list.add_file(filename); + else if (search_default) + list.add_file(DEFAULT_INDEX, 1); + if (list.nfiles() == 0) + fatal("no databases"); + int total_len = 0; + int i; + for (i = optind; i < argc; i++) + total_len += strlen(argv[i]); + total_len += argc - optind - 1 + 1; // for spaces and '\0' + char *buffer = new char[total_len]; + char *ptr = buffer; + for (i = optind; i < argc; i++) { + if (i > optind) + *ptr++ = ' '; + strcpy(ptr, argv[i]); + ptr = strchr(ptr, '\0'); + } + search_list_iterator iter(&list, buffer); + const char *start; + int len; + int count; + for (count = 0; iter.next(&start, &len); count++) { + if (fwrite(start, 1, len, stdout) != (size_t)len) + fatal("write error on stdout: %1", strerror(errno)); + // Can happen for last reference in file. + if (start[len - 1] != '\n') + putchar('\n'); + putchar('\n'); + } + return !count; +} + +// Local Variables: +// fill-column: 72 +// mode: C++ +// End: +// vim: set cindent noexpandtab shiftwidth=2 textwidth=72: diff --git a/src/utils/lookbib/lookbib.1.man b/src/utils/lookbib/lookbib.1.man new file mode 100644 index 0000000..5d43bbb --- /dev/null +++ b/src/utils/lookbib/lookbib.1.man @@ -0,0 +1,166 @@ +.TH @g@lookbib @MAN1EXT@ "@MDATE@" "groff @VERSION@" +.SH Name +@g@lookbib \- search bibliographic databases +. +. +.\" ==================================================================== +.\" Legal Terms +.\" ==================================================================== +.\" +.\" Copyright (C) 1989-2020 Free Software Foundation, Inc. +.\" +.\" Permission is granted to make and distribute verbatim copies of this +.\" manual provided the copyright notice and this permission notice are +.\" preserved on all copies. +.\" +.\" Permission is granted to copy and distribute modified versions of +.\" this manual under the conditions for verbatim copying, provided that +.\" the entire resulting derived work is distributed under the terms of +.\" a permission notice identical to this one. +.\" +.\" Permission is granted to copy and distribute translations of this +.\" manual into another language, under the above conditions for +.\" modified versions, except that this permission notice may be +.\" included in translations approved by the Free Software Foundation +.\" instead of in the original English. +. +. +.\" Save and disable compatibility mode (for, e.g., Solaris 10/11). +.do nr *groff_lookbib_1_man_C \n[.cp] +.cp 0 +. +.\" Define fallback for groff 1.23's MR macro if the system lacks it. +.nr do-fallback 0 +.if !\n(.f .nr do-fallback 1 \" mandoc +.if \n(.g .if !d MR .nr do-fallback 1 \" older groff +.if !\n(.g .nr do-fallback 1 \" non-groff *roff +.if \n[do-fallback] \{\ +. de MR +. ie \\n(.$=1 \ +. I \%\\$1 +. el \ +. IR \%\\$1 (\\$2)\\$3 +. . +.\} +.rr do-fallback +. +. +.\" ==================================================================== +.SH Synopsis +.\" ==================================================================== +. +.SY @g@lookbib +.RB [ \-i\~\c +.IR string ] +.RB [ \-t\~\c +.IR n ] +.I file +\&.\|.\|.\& +.YS +. +. +.SY @g@lookbib +.B \-\-help +.YS +. +. +.SY @g@lookbib +.B \-v +. +.SY @g@lookbib +.B \-\-version +.YS +. +. +.\" ==================================================================== +.SH Description +.\" ==================================================================== +. +.I @g@lookbib +writes a prompt to the standard error stream +(unless the standard input stream is not +a terminal), +reads from the standard input a line containing a set of keywords, +searches each bibliographic database +.I file +for references containing those keywords, +writes any references found to the standard output stream, +and repeats this process until the end of input. +. +For each database +.I file +to be searched, +if an index +.RI file @INDEX_SUFFIX@ +created by +.MR @g@indxbib @MAN1EXT@ +exists, +then it will be searched instead; +each index can cover multiple databases. +. +. +.\" ==================================================================== +.SH Options +.\" ==================================================================== +. +.B \-\-help +displays a usage message, +while +.B \-v +and +.B \-\-version +show version information; +all exit afterward. +. +. +.TP +.BI \-i\~ string +When searching files for which no index exists, +ignore the contents of fields whose names are in +.IR string . +. +. +.TP +.BI \-t\~ n +Require only the first +.I n +characters of keys to be given. +. +The default +is\~6. +. +. +.\" ==================================================================== +.SH Files +.\" ==================================================================== +. +.TP +.RI file @INDEX_SUFFIX@ +Index files. +. +. +.\" ==================================================================== +.SH "See also" +.\" ==================================================================== +. +\[lq]Some Applications of Inverted Indexes on the Unix System\[rq], +by M.\& E.\& Lesk, +1978, +AT&T Bell Laboratories Computing Science Technical Report No.\& 69. +. +. +.LP +.MR @g@refer @MAN1EXT@ , +.MR lkbib @MAN1EXT@ , +.MR @g@indxbib @MAN1EXT@ +. +. +.\" Restore compatibility mode (for, e.g., Solaris 10/11). +.cp \n[*groff_lookbib_1_man_C] +.do rr *groff_lookbib_1_man_C +. +.\" Local Variables: +.\" fill-column: 72 +.\" mode: nroff +.\" End: +.\" vim: set filetype=groff textwidth=72: diff --git a/src/utils/lookbib/lookbib.am b/src/utils/lookbib/lookbib.am new file mode 100644 index 0000000..75103c1 --- /dev/null +++ b/src/utils/lookbib/lookbib.am @@ -0,0 +1,29 @@ +# Copyright (C) 2014-2020 Free Software Foundation, Inc. +# +# This file is part of groff. +# +# groff is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# groff is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +# for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +prefixexecbin_PROGRAMS += lookbib +PREFIXMAN1 += src/utils/lookbib/lookbib.1 +EXTRA_DIST += src/utils/lookbib/lookbib.1.man +lookbib_LDADD = libbib.a libgroff.a $(LIBM) lib/libgnu.a +lookbib_SOURCES = src/utils/lookbib/lookbib.cpp + + +# Local Variables: +# fill-column: 72 +# mode: makefile-automake +# End: +# vim: set autoindent filetype=automake textwidth=72: diff --git a/src/utils/lookbib/lookbib.cpp b/src/utils/lookbib/lookbib.cpp new file mode 100644 index 0000000..d8556c6 --- /dev/null +++ b/src/utils/lookbib/lookbib.cpp @@ -0,0 +1,146 @@ +/* Copyright (C) 1989-2020 Free Software Foundation, Inc. + Written by James Clark (jjc@jclark.com) + +This file is part of groff. + +groff is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation, either version 3 of the License, or +(at your option) any later version. + +groff is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. */ + +#include "lib.h" + +#include <assert.h> +#include <errno.h> +#include <stdlib.h> + +#include "errarg.h" +#include "error.h" +#include "cset.h" + +#include "refid.h" +#include "search.h" + +/* for isatty() */ +#include "posix.h" +#include "nonposix.h" + +extern "C" const char *Version_string; + +static void usage(FILE *stream) +{ + fprintf(stream, + "usage: %s [-i XYZ] [-t N] database ...\n" + "usage: %s {-v | --version}\n" + "usage: %s --help\n", + program_name, program_name, program_name); +} + +int main(int argc, char **argv) +{ + program_name = argv[0]; + static char stderr_buf[BUFSIZ]; + setbuf(stderr, stderr_buf); + int opt; + static const struct option long_options[] = { + { "help", no_argument, 0, CHAR_MAX + 1 }, + { "version", no_argument, 0, 'v' }, + { NULL, 0, 0, 0 } + }; + while ((opt = getopt_long(argc, argv, "vVi:t:", long_options, NULL)) != EOF) + switch (opt) { + case 'V': + do_verify = true; + break; + case 'i': + linear_ignore_fields = optarg; + break; + case 't': + { + char *ptr; + long n = strtol(optarg, &ptr, 10); + if (n == 0 && ptr == optarg) { + error("bad integer '%1' in 't' option", optarg); + break; + } + if (n < 1) + n = 1; + linear_truncate_len = int(n); + break; + } + case 'v': + { + printf("GNU lookbib (groff) version %s\n", Version_string); + exit(0); + break; + } + case CHAR_MAX + 1: // --help + usage(stdout); + exit(0); + break; + case '?': + usage(stderr); + exit(1); + break; + default: + assert(0); + } + if (optind >= argc) { + usage(stderr); + exit(1); + } + search_list list; + for (int i = optind; i < argc; i++) + list.add_file(argv[i]); + if (list.nfiles() == 0) + fatal("no databases"); + char line[1024]; + int interactive = isatty(fileno(stdin)); + for (;;) { + if (interactive) { + fputs("> ", stderr); + fflush(stderr); + } + if (!fgets(line, sizeof(line), stdin)) + break; + char *ptr = line; + while (csspace(*ptr)) + ptr++; + if (*ptr == '\0') + continue; + search_list_iterator iter(&list, line); + const char *start; + int len; + int count; + for (count = 0; iter.next(&start, &len); count++) { + if (fwrite(start, 1, len, stdout) != (size_t)len) + fatal("write error on stdout: %1", strerror(errno)); + // Can happen for last reference in file. + if (start[len - 1] != '\n') + putchar('\n'); + putchar('\n'); + } + fflush(stdout); + if (interactive) { + fprintf(stderr, "%d found\n", count); + fflush(stderr); + } + } + if (interactive) + putc('\n', stderr); + return 0; +} + +// Local Variables: +// fill-column: 72 +// mode: C++ +// End: +// vim: set cindent noexpandtab shiftwidth=2 textwidth=72: diff --git a/src/utils/pfbtops/pfbtops.1.man b/src/utils/pfbtops/pfbtops.1.man new file mode 100644 index 0000000..71140c6 --- /dev/null +++ b/src/utils/pfbtops/pfbtops.1.man @@ -0,0 +1,129 @@ +.TH pfbtops @MAN1EXT@ "@MDATE@" "groff @VERSION@" +.SH Name +pfbtops \- translate PostScript Printer Font Binary files to Printer +Font ASCII +. +. +.\" ==================================================================== +.\" Legal Terms +.\" ==================================================================== +.\" +.\" Copyright (C) 1989-2020 Free Software Foundation, Inc. +.\" +.\" Permission is granted to make and distribute verbatim copies of this +.\" manual provided the copyright notice and this permission notice are +.\" preserved on all copies. +.\" +.\" Permission is granted to copy and distribute modified versions of +.\" this manual under the conditions for verbatim copying, provided that +.\" the entire resulting derived work is distributed under the terms of +.\" a permission notice identical to this one. +.\" +.\" Permission is granted to copy and distribute translations of this +.\" manual into another language, under the above conditions for +.\" modified versions, except that this permission notice may be +.\" included in translations approved by the Free Software Foundation +.\" instead of in the original English. +. +. +.\" Save and disable compatibility mode (for, e.g., Solaris 10/11). +.do nr *groff_pfbtops_1_man_C \n[.cp] +.cp 0 +. +.\" Define fallback for groff 1.23's MR macro if the system lacks it. +.nr do-fallback 0 +.if !\n(.f .nr do-fallback 1 \" mandoc +.if \n(.g .if !d MR .nr do-fallback 1 \" older groff +.if !\n(.g .nr do-fallback 1 \" non-groff *roff +.if \n[do-fallback] \{\ +. de MR +. ie \\n(.$=1 \ +. I \%\\$1 +. el \ +. IR \%\\$1 (\\$2)\\$3 +. . +.\} +.rr do-fallback +. +. +.\" ==================================================================== +.SH Synopsis +.\" ==================================================================== +. +.SY pfbtops +.RI [ pfb-file ] +.YS +. +. +.SY pfbtops +.B \-\-help +.YS +. +. +.SY pfbtops +.B \-v +. +.SY pfbtops +.B \-\-version +.YS +. +. +.\" ==================================================================== +.SH Description +.\" ==================================================================== +. +.I pfbtops +translates a PostScript Type\~1 font in Printer Font Binary (PFB) format +to Printer Font ASCII (PFA) format, +splitting overlong lines in text packets into smaller chunks. +. +If +.I pfb-file +is omitted, +the PFB file will be read from the standard input stream. +. +The PFA font will be written on the standard output stream. +. +PostScript fonts for MS-DOS were historically supplied in PFB format. +. +Use of a PostScript Type\~1 font with +.I groff +requires conversion of its metrics +(AFM file) +to a +.I groff +font description file; +see +.MR afmtodit @MAN1EXT@ . +. +. +.P +The +.B \-\-help +option displays a usage message, +while +.B \-v +and +.B \-\-version +show version information; +all exit afterward. +. +. +.\" ==================================================================== +.SH "See also" +.\" ==================================================================== +. +.MR grops @MAN1EXT@ , +.MR gropdf @MAN1EXT@ +. +. +.\" Restore compatibility mode (for, e.g., Solaris 10/11). +.cp \n[*groff_pfbtops_1_man_C] +.do rr *groff_pfbtops_1_man_C +. +. +.\" Local Variables: +.\" fill-column: 72 +.\" mode: nroff +.\" End: +.\" vim: set filetype=groff textwidth=72: diff --git a/src/utils/pfbtops/pfbtops.am b/src/utils/pfbtops/pfbtops.am new file mode 100644 index 0000000..8b7fd71 --- /dev/null +++ b/src/utils/pfbtops/pfbtops.am @@ -0,0 +1,32 @@ +# Copyright (C) 2014-2020 Free Software Foundation, Inc. +# +# This file is part of groff. +# +# groff is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# groff is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +# for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +bin_PROGRAMS += pfbtops +man1_MANS += src/utils/pfbtops/pfbtops.1 +EXTRA_DIST += src/utils/pfbtops/pfbtops.1.man +pfbtops_SOURCES = src/utils/pfbtops/pfbtops.c +pfbtops_LDADD = libgroff.a $(LIBM) lib/libgnu.a +# We use the following trick to force the use of C++ compiler +# See the Automake manual, "Libtool Convenience Libraries" +nodist_EXTRA_pfbtops_SOURCES = src/utils/pfbtops/dummy.cpp + + +# Local Variables: +# fill-column: 72 +# mode: makefile-automake +# End: +# vim: set autoindent filetype=automake textwidth=72: diff --git a/src/utils/pfbtops/pfbtops.c b/src/utils/pfbtops/pfbtops.c new file mode 100644 index 0000000..8fbe44a --- /dev/null +++ b/src/utils/pfbtops/pfbtops.c @@ -0,0 +1,243 @@ +/* Copyright (C) 1992-2020 Free Software Foundation, Inc. + Written by James Clark (jjc@jclark.com) + +This file is part of groff. + +groff is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation, either version 3 of the License, or +(at your option) any later version. + +groff is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. */ + +/* This translates ps fonts in .pfb format to ASCII ps files. */ + +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif + +#define __GETOPT_PREFIX groff_ + +#include <errno.h> // errno +#include <stdio.h> +#include <stdlib.h> // exit(), EXIT_FAILURE, EXIT_SUCCESS +#include <string.h> // strerror() +#include <limits.h> + +#include <getopt.h> + +#include "nonposix.h" + +/* Binary bytes per output line. */ +#define BYTES_PER_LINE (64/2) +#define MAX_LINE_LENGTH 78 +#define HEX_DIGITS "0123456789abcdef" + +extern const char *Version_string; + +static char *program_name; + +static void error(const char *s) +{ + fprintf(stderr, "%s: error: %s\n", program_name, s); + exit(EXIT_FAILURE); +} + +static void usage(FILE *stream) +{ + fprintf(stream, "usage: %s [pfb-file]\n" + "usage: %s {-v | --version}\n" + "usage: %s --help\n", + program_name, program_name, program_name); +} + +static void get_text(int n) +{ + int c = 0, c1; + int in_string = 0; + int is_comment = 0; + int count = 0; + + while (--n >= 0) { + c = getchar(); + if (c == '(' && !is_comment) + in_string++; + else if (c == ')' && !is_comment) + in_string--; + else if (c == '%' && !in_string) + is_comment = 1; + else if (c == '\\' && in_string) { + count++; + putchar(c); + if (n-- == 0) + break; + c = getchar(); + /* don't split octal character representations */ + if (c >= '0' && c <= '7') { + count++; + putchar(c); + if (n-- == 0) + break; + c = getchar(); + if (c >= '0' && c <= '7') { + count++; + putchar(c); + if (n-- == 0) + break; + c = getchar(); + if (c >= '0' && c <= '7') { + count++; + putchar(c); + if (n-- == 0) + break; + c = getchar(); + } + } + } + } + if (c == EOF) + error("end of file in text packet"); + else if (c == '\r') { + if (n-- == 0) + break; + c1 = getchar(); + if (c1 != '\n') { + ungetc(c1, stdin); + n++; + } + c = '\n'; + } + if (c == '\n') { + count = 0; + is_comment = 0; + } + else if (count >= MAX_LINE_LENGTH) { + if (in_string > 0) { + count = 1; + putchar('\\'); + putchar('\n'); + } + else if (is_comment) { + count = 2; + putchar('\n'); + putchar('%'); + } + else { + /* split at the next whitespace character */ + while (c != ' ' && c != '\t' && c != '\f') { + putchar(c); + if (n-- == 0) + break; + c = getchar(); + } + count = 0; + putchar('\n'); + continue; + } + } + count++; + putchar(c); + } + if (c != '\n') + putchar('\n'); +} + +static void get_binary(int n) +{ + int c; + int count = 0; + + while (--n >= 0) { + c = getchar(); + if (c == EOF) + error("end of file in binary packet"); + if (count >= BYTES_PER_LINE) { + putchar('\n'); + count = 0; + } + count++; + putchar(HEX_DIGITS[(c >> 4) & 0xf]); + putchar(HEX_DIGITS[c & 0xf]); + } + putchar('\n'); +} + +int main(int argc, char **argv) +{ + int opt; + static const struct option long_options[] = { + { "help", no_argument, 0, CHAR_MAX + 1 }, + { "version", no_argument, 0, 'v' }, + { NULL, 0, 0, 0 } + }; + + program_name = argv[0]; + + while ((opt = getopt_long(argc, argv, "v", long_options, NULL)) != EOF) { + switch (opt) { + case 'v': + printf("GNU pfbtops (groff) version %s\n", Version_string); + exit(EXIT_SUCCESS); + break; + case CHAR_MAX + 1: /* --help */ + usage(stdout); + exit(EXIT_SUCCESS); + break; + case '?': + usage(stderr); + exit(2); + break; + } + } + + if (argc - optind > 1) { + usage(stderr); + exit(2); + } + const char *file = argv[optind]; + if (argc > optind && !freopen(file, "r", stdin)) { + fprintf(stderr, "%s: error: unable to open file '%s': %s\n", + program_name, file, strerror(errno)); + exit(EXIT_FAILURE); + } + SET_BINARY(fileno(stdin)); + for (;;) { + int type, c, i; + long n; + + c = getchar(); + if (c != 0x80) + error("first byte of packet not 0x80"); + type = getchar(); + if (type == 3) + break; + if (type != 1 && type != 2) + error("bad packet type"); + n = 0; + for (i = 0; i < 4; i++) { + c = getchar(); + if (c == EOF) + error("end of file in packet header"); + n |= (long)c << (i << 3); + } + if (n < 0) + error("negative packet length"); + if (type == 1) + get_text(n); + else + get_binary(n); + } + exit(EXIT_SUCCESS); +} + +// Local Variables: +// fill-column: 72 +// mode: C +// End: +// vim: set cindent noexpandtab shiftwidth=2 textwidth=72: diff --git a/src/utils/tfmtodit/tfmtodit.1.man b/src/utils/tfmtodit/tfmtodit.1.man new file mode 100644 index 0000000..0f21753 --- /dev/null +++ b/src/utils/tfmtodit/tfmtodit.1.man @@ -0,0 +1,415 @@ +.TH tfmtodit @MAN1EXT@ "@MDATE@" "groff @VERSION@" +.SH Name +tfmtodit \- adapt TeX Font Metrics files for use with +.I groff +and +.I grodvi +. +. +.\" ==================================================================== +.\" Legal Terms +.\" ==================================================================== +.\" +.\" Copyright (C) 1989-2020 Free Software Foundation, Inc. +.\" +.\" Permission is granted to make and distribute verbatim copies of this +.\" manual provided the copyright notice and this permission notice are +.\" preserved on all copies. +.\" +.\" Permission is granted to copy and distribute modified versions of +.\" this manual under the conditions for verbatim copying, provided that +.\" the entire resulting derived work is distributed under the terms of +.\" a permission notice identical to this one. +.\" +.\" Permission is granted to copy and distribute translations of this +.\" manual into another language, under the above conditions for +.\" modified versions, except that this permission notice may be +.\" included in translations approved by the Free Software Foundation +.\" instead of in the original English. +. +. +.\" Save and disable compatibility mode (for, e.g., Solaris 10/11). +.do nr *groff_tfmtodit_1_man_C \n[.cp] +.cp 0 +. +.\" Define fallback for groff 1.23's MR macro if the system lacks it. +.nr do-fallback 0 +.if !\n(.f .nr do-fallback 1 \" mandoc +.if \n(.g .if !d MR .nr do-fallback 1 \" older groff +.if !\n(.g .nr do-fallback 1 \" non-groff *roff +.if \n[do-fallback] \{\ +. de MR +. ie \\n(.$=1 \ +. I \%\\$1 +. el \ +. IR \%\\$1 (\\$2)\\$3 +. . +.\} +.rr do-fallback +. +. +.\" ==================================================================== +.\" Definitions +.\" ==================================================================== +. +.ie t .ds tx T\h'-.1667m'\v'.224m'E\v'-.224m'\h'-.125m'X +.el .ds tx TeX +. +. +.\" ==================================================================== +.SH Synopsis +.\" ==================================================================== +. +.SY tfmtodit +.RB [ \-s ] +.RB [ \-g\~\c +.IR gf-file ] +.RB [ \-k\~\c +.IR skew-char ] +.I tfm-file +.I map-file +.I font-description +.YS +. +. +.SY tfmtodit +.B \-\-help +.YS +. +. +.SY tfmtodit +.B \-v +. +.SY tfmtodit +.B \-\-version +.YS +. +. +.\" ==================================================================== +.SH Description +.\" ==================================================================== +. +.I tfmtodit +creates a font description file for use with +.MR groff @MAN1EXT@ 's +.B dvi +output device. +. +.I tfm-file +is the name of the \*(tx font metric file for the font. +. +.I map-file +assigns +.I groff +ordinary or special character identifiers to glyph indices in the font; +it should consist of a sequence of lines of the form +. +.RS +.IR "i c1" \~\&.\|.\|.\&\~ cn +.RE +. +where +.I i +is a position of the glyph in the font in decimal, +and +.I c1 +through +.I cn +are glyph identifiers in the form used by +.I groff +font descriptions. +. +If a glyph has no +.I groff +names but exists in +.I tfm-file, +it is put in the +.I groff +font description file as an unnamed glyph. +. +Output is written in +.MR groff_font @MAN5EXT@ +format to +.I font-description, +a file named for the intended +.I groff +font name. +. +. +.P +If the font is \[lq]special\[rq], +meaning that +.I groff +should search it whenever a glyph is not found in the current font, +use the +.B \-s +option and name +.I font-description +in the +.B fonts +directive in the output device's +.I DESC +file. +. +. +.P +To do a good job of math typesetting, +.I groff +requires font metric information not present in +.I tfm-file. +. +This is because \*(tx has separate math italic fonts, +whereas +.I groff +uses normal italic fonts for math. +. +The additional information required by +.I groff +is given by the two arguments to the +.B math_fit +macro in the Metafont programs for the Computer Modern fonts. +. +In a text font (a font for which +.B math_fit +is false), +Metafont normally ignores these two arguments. +. +Metafont can be made to put this information into the GF +(\[lq]generic font\[rq]) +files it produces by loading the following definition after +.B cmbase +when creating +.IR cm.base . +. +.RS +.EX +def ignore_math_fit(expr left_adjustment,right_adjustment) = + special "adjustment"; + numspecial left_adjustment*16/designsize; + numspecial right_adjustment*16/designsize; + enddef; +.EE +.RE +. +For the EC font family, +load the following definition after +.BR exbase ; +consider patching +.I exbase.mf +locally. +. +.RS +.EX +def ignore_math_fit(expr left_adjustment,right_adjustment) = + ori_special "adjustment"; + ori_numspecial left_adjustment*16/designsize; + ori_numspecial right_adjustment*16/designsize; + enddef; +.EE +.RE +. +The only difference from the previous example is the \[lq]ori_\[rq] +prefix to \[lq]special\[rq] and \[lq]numspecial\[rq]. +. +The GF file created using this modified +.I cm.base +or +.I exbase.mf +should be specified with the +.B \-g +option, +which should +.I not +be given for a font for which +.B math_fit +is true. +. +. +.\" ==================================================================== +.SH Options +.\" ==================================================================== +. +.B \-\-help +displays a usage message, +while +.B \-v +and +.B \-\-version +show version information; +all exit afterward. +. +. +.TP +.BI \-g \~gf-file +Use the +.I gf-file +produced by Metafont containing +.RB \[lq] special \[rq] +and +.RB \[lq] numspecial \[rq] +commands to obtain additional font metric information. +. +. +.TP +.BI \-k \~skew-char +The skew character of this font is at position +.I skew-char. +. +.I skew-char +should be an integer; +it may be given in decimal, +with a leading 0 in octal, +or with a leading 0x in hexadecimal. +. +Any kerns whose second component is +.I skew-char +are ignored. +. +. +.TP +.B \-s +Add the +.B special +directive to the font description file. +. +. +.\" ==================================================================== +.SH Files +.\" ==================================================================== +. +.TP +.I @FONTDIR@/\:\%devdvi/\:DESC +describes the +.B dvi +output device. +. +. +.TP +.IR @FONTDIR@/\:\%devdvi/ F +describes the font known +.RI as\~ F +on device +.BR dvi . +. +. +.TP +.I @FONTDIR@/\:\%devdvi/\:\%generate/\:\%ec.map +.TQ +.I @FONTDIR@/\:\%devdvi/\:\%generate/\:\%msam.map +.TQ +.I @FONTDIR@/\:\%devdvi/\:\%generate/\:\%msbm.map +.TQ +.I @FONTDIR@/\:\%devdvi/\:\%generate/\:\%tc.map +.TQ +.I @FONTDIR@/\:\%devdvi/\:\%generate/\:\%texb.map +.TQ +.I @FONTDIR@/\:\%devdvi/\:\%generate/\:\%texex.map +.TQ +.I @FONTDIR@/\:\%devdvi/\:\%generate/\:\%texi.map +.TQ +.I @FONTDIR@/\:\%devdvi/\:\%generate/\:\%texitt.map +.TQ +.I @FONTDIR@/\:\%devdvi/\:\%generate/\:\%texmi.map +.TQ +.I @FONTDIR@/\:\%devdvi/\:\%generate/\:\%texr.map +.TQ +.I @FONTDIR@/\:\%devdvi/\:\%generate/\:\%texsy.map +.TQ +.I @FONTDIR@/\:\%devdvi/\:\%generate/\:\%textex.map +.TQ +.I @FONTDIR@/\:\%devdvi/\:\%generate/\:\%textt.map +map glyph indices in \*[tx] fonts to +.I groff +ordinary and special character identifiers. +. +.I \%ec.map +is used for +.BR TREC , +.BR TIEC , +.BR TBEC , +.BR TBIEC , +.BR HREC , +.BR HIEC , +.BR HBEC , +.BR HBIEC , +.BR CWEC , +and +.BR CWIEC ; +.I \%msam.map +for +.BR SA ; +.I \%msbm.map +for +.BR SB ; +.I \%tc.map +for +.BR TRTC , +.BR TITC , +.BR TBTC , +.BR TBITC , +.BR HRTC , +.BR HITC , +.BR HBTC , +.BR HBITC , +.BR CWTC , +and +.BR CWITC ; +.I \%texb.map +for +.BR TB , +.BR HR , +.BR HI , +.BR HB , +and +.BR HBI ; +.I \%texex.map +for +.BR EX ; +.I \%texi.map +for +.B TI +and +.BR TBI ; +.I \%texitt.map +for +.BR CWI ; +.I \%texmi.map +for +.BR MI ; +.I \%texr.map +for +.BR TR ; +.I \%texsy.map +for +.BR S ; +.I \%textex.map +for +.BR SC ; +and +.I \%textt.map +for +.BR CW . +. +. +.\" ==================================================================== +.SH "See also" +.\" ==================================================================== +. +.MR groff @MAN1EXT@ , +.MR grodvi @MAN1EXT@ , +.MR groff_font @MAN5EXT@ +. +. +.\" Clean up. +.rm tx +. +.\" Restore compatibility mode (for, e.g., Solaris 10/11). +.cp \n[*groff_tfmtodit_1_man_C] +.do rr *groff_tfmtodit_1_man_C +. +. +.\" Local Variables: +.\" fill-column: 72 +.\" mode: nroff +.\" End: +.\" vim: set filetype=groff textwidth=72: diff --git a/src/utils/tfmtodit/tfmtodit.am b/src/utils/tfmtodit/tfmtodit.am new file mode 100644 index 0000000..758fad5 --- /dev/null +++ b/src/utils/tfmtodit/tfmtodit.am @@ -0,0 +1,29 @@ +# Copyright (C) 2014-2020 Free Software Foundation, Inc. +# +# This file is part of groff. +# +# groff is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# groff is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +# for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +bin_PROGRAMS += tfmtodit +man1_MANS += src/utils/tfmtodit/tfmtodit.1 +EXTRA_DIST += src/utils/tfmtodit/tfmtodit.1.man +tfmtodit_SOURCES = src/utils/tfmtodit/tfmtodit.cpp +tfmtodit_LDADD = libgroff.a $(LIBM) lib/libgnu.a + + +# Local Variables: +# fill-column: 72 +# mode: makefile-automake +# End: +# vim: set autoindent filetype=automake textwidth=72: diff --git a/src/utils/tfmtodit/tfmtodit.cpp b/src/utils/tfmtodit/tfmtodit.cpp new file mode 100644 index 0000000..3003733 --- /dev/null +++ b/src/utils/tfmtodit/tfmtodit.cpp @@ -0,0 +1,889 @@ +/* Copyright (C) 1989-2020 Free Software Foundation, Inc. + Written by James Clark (jjc@jclark.com) + +This file is part of groff. + +groff is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation, either version 3 of the License, or +(at your option) any later version. + +groff is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. */ + +/* I have tried to incorporate the changes needed for TeX 3.0 tfm files, +but I haven't tested them. */ + +/* Groff requires more font metric information than TeX. The reason +for this is that TeX has separate Math Italic fonts, whereas groff +uses normal italic fonts for math. The two additional pieces of +information required by groff correspond to the two arguments to the +math_fit() macro in the Metafont programs for the CM fonts. In the +case of a font for which math_fitting is false, these two arguments +are normally ignored by Metafont. We need to get hold of these two +parameters and put them in the groff font file. + +We do this by loading this definition after cmbase when creating +cm.base. + +def ignore_math_fit(expr left_adjustment,right_adjustment) = + special "adjustment"; + numspecial left_adjustment*16/designsize; + numspecial right_adjustment*16/designsize; + enddef; + +This puts the two arguments to the math_fit macro into the gf file. +(They will appear in the gf file immediately before the character to +which they apply.) We then create a gf file using this cm.base. Then +we run tfmtodit and specify this gf file with the -g option. + +This need only be done for a font for which math_fitting is false; +When it's true, the left_correction and subscript_correction should +both be zero. */ + +#include "lib.h" + +#include <assert.h> +#include <errno.h> +#include <math.h> +#include <stdlib.h> + +#include "errarg.h" +#include "error.h" +#include "cset.h" +#include "nonposix.h" + +extern "C" const char *Version_string; + +/* Values in the tfm file should be multiplied by this. */ + +#define MULTIPLIER 1 + +struct char_info_word { + unsigned char width_index; + unsigned char height_index; + unsigned char depth_index; + unsigned char italic_index; + unsigned char tag; + unsigned char remainder; +}; + +struct lig_kern_command { + unsigned char skip_byte; + unsigned char next_char; + unsigned char op_byte; + unsigned char remainder; +}; + +class tfm { + int bc; + int ec; + int nw; + int nh; + int nd; + int ni; + int nl; + int nk; + int np; + int cs; + int ds; + char_info_word *char_info; + int *width; + int *height; + int *depth; + int *italic; + lig_kern_command *lig_kern; + int *kern; + int *param; +public: + tfm(); + ~tfm(); + int load(const char *); + int contains(int); + int get_width(int); + int get_height(int); + int get_depth(int); + int get_italic(int); + int get_param(int, int *); + int get_checksum(); + int get_design_size(); + int get_lig(unsigned char, unsigned char, unsigned char *); + friend class kern_iterator; +}; + +class kern_iterator { + tfm *t; + int c; + int i; +public: + kern_iterator(tfm *); + int next(unsigned char *c1, unsigned char *c2, int *k); +}; + + +kern_iterator::kern_iterator(tfm *p) +: t(p), c(t->bc), i(-1) +{ +} + +int kern_iterator::next(unsigned char *c1, unsigned char *c2, int *k) +{ + for (; c <= t->ec; c++) + if (t->char_info[c - t->bc].tag == 1) { + if (i < 0) { + i = t->char_info[c - t->bc].remainder; + if (t->lig_kern[i].skip_byte > 128) + i = (256*t->lig_kern[i].op_byte + + t->lig_kern[i].remainder); + } + for (;;) { + int skip = t->lig_kern[i].skip_byte; + if (skip <= 128 && t->lig_kern[i].op_byte >= 128) { + *c1 = c; + *c2 = t->lig_kern[i].next_char; + *k = t->kern[256*(t->lig_kern[i].op_byte - 128) + + t->lig_kern[i].remainder]; + if (skip == 128) { + c++; + i = -1; + } + else + i += skip + 1; + return 1; + } + if (skip >= 128) + break; + i += skip + 1; + } + i = -1; + } + return 0; +} + +tfm::tfm() +: char_info(0), width(0), height(0), depth(0), italic(0), lig_kern(0), + kern(0), param(0) +{ +} + +int tfm::get_lig(unsigned char c1, unsigned char c2, unsigned char *cp) +{ + if (contains(c1) && char_info[c1 - bc].tag == 1) { + int i = char_info[c1 - bc].remainder; + if (lig_kern[i].skip_byte > 128) + i = 256*lig_kern[i].op_byte + lig_kern[i].remainder; + for (;;) { + int skip = lig_kern[i].skip_byte; + if (skip > 128) + break; + // We are only interested in normal ligatures, for which + // op_byte == 0. + if (lig_kern[i].op_byte == 0 + && lig_kern[i].next_char == c2) { + *cp = lig_kern[i].remainder; + return 1; + } + if (skip == 128) + break; + i += skip + 1; + } + } + return 0; +} + +int tfm::contains(int i) +{ + return i >= bc && i <= ec && char_info[i - bc].width_index != 0; +} + +int tfm::get_width(int i) +{ + return width[char_info[i - bc].width_index]; +} + +int tfm::get_height(int i) +{ + return height[char_info[i - bc].height_index]; +} + +int tfm::get_depth(int i) +{ + return depth[char_info[i - bc].depth_index]; +} + +int tfm::get_italic(int i) +{ + return italic[char_info[i - bc].italic_index]; +} + +int tfm::get_param(int i, int *p) +{ + if (i <= 0 || i > np) + return 0; + else { + *p = param[i - 1]; + return 1; + } +} + +int tfm::get_checksum() +{ + return cs; +} + +int tfm::get_design_size() +{ + return ds; +} + +tfm::~tfm() +{ + delete[] char_info; + delete[] width; + delete[] height; + delete[] depth; + delete[] italic; + delete[] lig_kern; + delete[] kern; + delete[] param; +} + +int read2(unsigned char *&s) +{ + int n; + n = *s++ << 8; + n |= *s++; + return n; +} + +int read4(unsigned char *&s) +{ + int n; + n = *s++ << 24; + n |= *s++ << 16; + n |= *s++ << 8; + n |= *s++; + return n; +} + +int tfm::load(const char *file) +{ + errno = 0; + FILE *fp = fopen(file, FOPEN_RB); + if (!fp) { + error("can't open '%1': %2", file, strerror(errno)); + return 0; + } + int c1 = getc(fp); + int c2 = getc(fp); + if (c1 == EOF || c2 == EOF) { + fclose(fp); + error("unexpected end of file on '%1'", file); + return 0; + } + int lf = (c1 << 8) + c2; + int toread = lf*4 - 2; + unsigned char *buf = new unsigned char[toread]; + if (fread(buf, 1, toread, fp) != (size_t)toread) { + if (feof(fp)) + error("unexpected end of file on '%1'", file); + else + error("error on file '%1'", file); + delete[] buf; + fclose(fp); + return 0; + } + fclose(fp); + if (lf < 6) { + error("bad TFM file '%1': impossibly short", file); + delete[] buf; + return 0; + } + unsigned char *ptr = buf; + int lh = read2(ptr); + bc = read2(ptr); + ec = read2(ptr); + nw = read2(ptr); + nh = read2(ptr); + nd = read2(ptr); + ni = read2(ptr); + nl = read2(ptr); + nk = read2(ptr); + int ne = read2(ptr); + np = read2(ptr); + if ((6 + lh + (ec - bc + 1) + nw + nh + nd + ni + nl + nk + ne + np) + != lf) { + error("bad TFM file '%1': lengths do not sum", file); + delete[] buf; + return 0; + } + if (lh < 2) { + error("bad TFM file '%1': header too short", file); + delete[] buf; + return 0; + } + char_info = new char_info_word[ec - bc + 1]; + width = new int[nw]; + height = new int[nh]; + depth = new int[nd]; + italic = new int[ni]; + lig_kern = new lig_kern_command[nl]; + kern = new int[nk]; + param = new int[np]; + int i; + cs = read4(ptr); + ds = read4(ptr); + ptr += (lh-2)*4; + for (i = 0; i < ec - bc + 1; i++) { + char_info[i].width_index = *ptr++; + unsigned char tem = *ptr++; + char_info[i].depth_index = tem & 0xf; + char_info[i].height_index = tem >> 4; + tem = *ptr++; + char_info[i].italic_index = tem >> 2; + char_info[i].tag = tem & 3; + char_info[i].remainder = *ptr++; + } + for (i = 0; i < nw; i++) + width[i] = read4(ptr); + for (i = 0; i < nh; i++) + height[i] = read4(ptr); + for (i = 0; i < nd; i++) + depth[i] = read4(ptr); + for (i = 0; i < ni; i++) + italic[i] = read4(ptr); + for (i = 0; i < nl; i++) { + lig_kern[i].skip_byte = *ptr++; + lig_kern[i].next_char = *ptr++; + lig_kern[i].op_byte = *ptr++; + lig_kern[i].remainder = *ptr++; + } + for (i = 0; i < nk; i++) + kern[i] = read4(ptr); + ptr += ne*4; + for (i = 0; i < np; i++) + param[i] = read4(ptr); + assert(ptr == buf + lf*4 - 2); + delete[] buf; + return 1; +} + +class gf { + int left[256]; + int right[256]; + static int sread4(int *p, FILE *fp); + static int uread3(int *p, FILE *fp); + static int uread2(int *p, FILE *fp); + static int skip(int n, FILE *fp); +public: + gf(); + int load(const char *file); + int get_left_adjustment(int i) { return left[i]; } + int get_right_adjustment(int i) { return right[i]; } +}; + +gf::gf() +{ + for (int i = 0; i < 256; i++) + left[i] = right[i] = 0; +} + +int gf::load(const char *file) +{ + enum { + paint_0 = 0, + paint1 = 64, + boc = 67, + boc1 = 68, + eoc = 69, + skip0 = 70, + skip1 = 71, + new_row_0 = 74, + xxx1 = 239, + yyy = 243, + no_op = 244, + pre = 247, + post = 248 + }; + int got_an_adjustment = 0; + int pending_adjustment = 0; + int left_adj = 0, right_adj = 0; // pacify compiler + const int gf_id_byte = 131; + errno = 0; + FILE *fp = fopen(file, FOPEN_RB); + if (!fp) { + error("can't open '%1': %2", file, strerror(errno)); + return 0; + } + if (getc(fp) != pre || getc(fp) != gf_id_byte) { + error("bad gf file"); + return 0; + } + int n = getc(fp); + if (n == EOF) + goto eof; + if (!skip(n, fp)) + goto eof; + for (;;) { + int op = getc(fp); + if (op == EOF) + goto eof; + if (op == post) + break; + if ((op >= paint_0 && op <= paint_0 + 63) + || (op >= new_row_0 && op <= new_row_0 + 164)) + continue; + switch (op) { + case no_op: + case eoc: + case skip0: + break; + case paint1: + case skip1: + if (!skip(1, fp)) + goto eof; + break; + case paint1 + 1: + case skip1 + 1: + if (!skip(2, fp)) + goto eof; + break; + case paint1 + 2: + case skip1 + 2: + if (!skip(3, fp)) + goto eof; + break; + case boc: + { + int code; + if (!sread4(&code, fp)) + goto eof; + if (pending_adjustment) { + pending_adjustment = 0; + left[code & 0377] = left_adj; + right[code & 0377] = right_adj; + } + if (!skip(20, fp)) + goto eof; + break; + } + case boc1: + { + int code = getc(fp); + if (code == EOF) + goto eof; + if (pending_adjustment) { + pending_adjustment = 0; + left[code] = left_adj; + right[code] = right_adj; + } + if (!skip(4, fp)) + goto eof; + break; + } + case xxx1: + { + int len = getc(fp); + if (len == EOF) + goto eof; + char buf[256]; + if (fread(buf, 1, len, fp) != (size_t)len) + goto eof; + if (len == 10 /* strlen("adjustment") */ + && memcmp(buf, "adjustment", len) == 0) { + int c = getc(fp); + if (c != yyy) { + if (c != EOF) + ungetc(c, fp); + break; + } + if (!sread4(&left_adj, fp)) + goto eof; + c = getc(fp); + if (c != yyy) { + if (c != EOF) + ungetc(c, fp); + break; + } + if (!sread4(&right_adj, fp)) + goto eof; + got_an_adjustment = 1; + pending_adjustment = 1; + } + break; + } + case xxx1 + 1: + if (!uread2(&n, fp) || !skip(n, fp)) + goto eof; + break; + case xxx1 + 2: + if (!uread3(&n, fp) || !skip(n, fp)) + goto eof; + break; + case xxx1 + 3: + if (!sread4(&n, fp) || !skip(n, fp)) + goto eof; + break; + case yyy: + if (!skip(4, fp)) + goto eof; + break; + default: + fatal("unrecognized opcode '%1'", op); + break; + } + } + if (!got_an_adjustment) + warning("no adjustment specials found in gf file"); + return 1; + eof: + error("unexpected end of file"); + return 0; +} + +int gf::sread4(int *p, FILE *fp) +{ + *p = getc(fp); + if (*p >= 128) + *p -= 256; + *p <<= 8; + *p |= getc(fp); + *p <<= 8; + *p |= getc(fp); + *p <<= 8; + *p |= getc(fp); + return !ferror(fp) && !feof(fp); +} + +int gf::uread3(int *p, FILE *fp) +{ + *p = getc(fp); + *p <<= 8; + *p |= getc(fp); + *p <<= 8; + *p |= getc(fp); + return !ferror(fp) && !feof(fp); +} + +int gf::uread2(int *p, FILE *fp) +{ + *p = getc(fp); + *p <<= 8; + *p |= getc(fp); + return !ferror(fp) && !feof(fp); +} + +int gf::skip(int n, FILE *fp) +{ + while (--n >= 0) + if (getc(fp) == EOF) + return 0; + return 1; +} + + +struct char_list { + char *ch; + char_list *next; + char_list(const char *, char_list * = 0); +}; + +char_list::char_list(const char *s, char_list *p) : ch(strsave(s)), + next(p) +{ +} + + +int read_map(const char *file, char_list **table) +{ + errno = 0; + FILE *fp = fopen(file, "r"); + if (!fp) { + error("can't open '%1': %2", file, strerror(errno)); + return 0; + } + for (int i = 0; i < 256; i++) + table[i] = 0; + char buf[512]; + int lineno = 0; + while (fgets(buf, int(sizeof(buf)), fp)) { + lineno++; + char *ptr = buf; + while (csspace(*ptr)) + ptr++; + if (*ptr == '\0' || *ptr == '#') + continue; + ptr = strtok(ptr, " \n\t"); + if (!ptr) + continue; + int n; + if (sscanf(ptr, "%d", &n) != 1) { + error("%1:%2: bad map file", file, lineno); + fclose(fp); + return 0; + } + if (n < 0 || n > 255) { + error("%1:%2: code %3 out of range", file, lineno, n); + fclose(fp); + return 0; + } + ptr = strtok(0, " \n\t"); + if (!ptr) { + error("%1:%2: missing names", file, lineno); + fclose(fp); + return 0; + } + for (; ptr; ptr = strtok(0, " \n\t")) + table[n] = new char_list(ptr, table[n]); + } + fclose(fp); + return 1; +} + + +/* Every character that can participate in a ligature appears in the +lig_chars table. 'ch' gives the full-name of the character, 'name' +gives the groff name of the character, 'i' gives its index in +the encoding, which is filled in later (-1 if it does not appear). */ + +struct S { + const char *ch; + int i; +} lig_chars[] = { + { "f", -1 }, + { "i", -1 }, + { "l", -1 }, + { "ff", -1 }, + { "fi", -1 }, + { "fl", -1 }, + { "Fi", -1 }, + { "Fl", -1 }, +}; + +// Indices into lig_chars[]. + +enum { CH_f, CH_i, CH_l, CH_ff, CH_fi, CH_fl, CH_ffi, CH_ffl }; + +// Each possible ligature appears in this table. + +struct S2 { + unsigned char c1, c2, res; + const char *ch; +} lig_table[] = { + { CH_f, CH_f, CH_ff, "ff" }, + { CH_f, CH_i, CH_fi, "fi" }, + { CH_f, CH_l, CH_fl, "fl" }, + { CH_ff, CH_i, CH_ffi, "ffi" }, + { CH_ff, CH_l, CH_ffl, "ffl" }, + }; + +static void usage(FILE *stream); + +int main(int argc, char **argv) +{ + program_name = argv[0]; + int special_flag = 0; + int skewchar = -1; + int opt; + const char *gf_file = 0; + static const struct option long_options[] = { + { "help", no_argument, 0, CHAR_MAX + 1 }, + { "version", no_argument, 0, 'v' }, + { NULL, 0, 0, 0 } + }; + while ((opt = getopt_long(argc, argv, "svg:k:", long_options, NULL)) + != EOF) + switch (opt) { + case 'g': + gf_file = optarg; + break; + case 's': + special_flag = 1; + break; + case 'k': + { + char *ptr; + long n = strtol(optarg, &ptr, 0); + if ((n == 0 && ptr == optarg) + || *ptr != '\0' + || n < 0 + || n > UCHAR_MAX) + error("invalid skew character position '%1'", optarg); + else + skewchar = (int)n; + break; + } + case 'v': + { + printf("GNU tfmtodit (groff) version %s\n", Version_string); + exit(0); + break; + } + case CHAR_MAX + 1: // --help + usage(stdout); + exit(0); + break; + case '?': + usage(stderr); + exit(1); + break; + case EOF: + assert(0 == "EOF encountered in option processing"); + } + if (argc - optind != 3) { + error("insufficient arguments"); + usage(stderr); + exit(1); + } + gf g; + if (gf_file) { + if (!g.load(gf_file)) + return 1; + } + const char *tfm_file = argv[optind]; + const char *map_file = argv[optind + 1]; + const char *font_file = argv[optind + 2]; + tfm t; + if (!t.load(tfm_file)) + return 1; + char_list *table[256]; + if (!read_map(map_file, table)) + return 1; + errno = 0; + if (!freopen(font_file, "w", stdout)) { + error("can't open '%1' for writing: %2", font_file, + strerror(errno)); + return 1; + } + printf("name %s\n", font_file); + if (special_flag) + fputs("special\n", stdout); + char *internal_name = strsave(argv[optind]); + int len = strlen(internal_name); + if (len > 4 && strcmp(internal_name + len - 4, ".tfm") == 0) + internal_name[len - 4] = '\0'; + // DIR_SEPS[] are possible directory separator characters, see + // nonposix.h. We want the rightmost separator of all possible ones. + // Example: d:/foo\\bar. + const char *s = strrchr(internal_name, DIR_SEPS[0]), *s1; + const char *sep = &DIR_SEPS[1]; + while (*sep) + { + s1 = strrchr(internal_name, *sep); + if (s1 && (!s || s1 > s)) + s = s1; + sep++; + } + printf("internalname %s\n", s ? s + 1 : internal_name); + int n; + if (t.get_param(2, &n)) { + if (n > 0) + printf("spacewidth %d\n", n*MULTIPLIER); + } + if (t.get_param(1, &n) && n != 0) + printf("slant %f\n", atan2(n/double(1<<20), 1.0)*180.0/PI); + int xheight; + if (!t.get_param(5, &xheight)) + xheight = 0; + unsigned int i; + // Print the list of ligatures. + // First find the indices of each character that can participate in + // a ligature. + size_t lig_char_entries = sizeof(lig_chars)/sizeof(lig_chars[0]); + size_t lig_table_entries = sizeof(lig_table)/sizeof(lig_table[0]); + for (i = 0; i < 256; i++) + for (unsigned int j = 0; j < lig_char_entries; j++) + for (char_list *p = table[i]; p; p = p->next) + if (strcmp(lig_chars[j].ch, p->ch) == 0) + lig_chars[j].i = i; + // For each possible ligature, if its participants all exist, + // and it appears as a ligature in the tfm file, include in + // the list of ligatures. + int started = 0; + for (i = 0; i < lig_table_entries; i++) { + int i1 = lig_chars[lig_table[i].c1].i; + int i2 = lig_chars[lig_table[i].c2].i; + int r = lig_chars[lig_table[i].res].i; + if (i1 >= 0 && i2 >= 0 && r >= 0) { + unsigned char c; + if (t.get_lig(i1, i2, &c) && c == r) { + if (!started) { + started = 1; + fputs("ligatures", stdout); + } + printf(" %s", lig_table[i].ch); + } + } + } + if (started) + fputs(" 0\n", stdout); + printf("checksum %d\n", t.get_checksum()); + printf("designsize %d\n", t.get_design_size()); + // Now print out the kerning information. + int had_kern = 0; + kern_iterator iter(&t); + unsigned char c1, c2; + int k; + while (iter.next(&c1, &c2, &k)) + if (c2 != skewchar) { + k *= MULTIPLIER; + char_list *q = table[c2]; + for (char_list *p1 = table[c1]; p1; p1 = p1->next) + for (char_list *p2 = q; p2; p2 = p2->next) { + if (!had_kern) { + printf("kernpairs\n"); + had_kern = 1; + } + printf("%s %s %d\n", p1->ch, p2->ch, k); + } + } + printf("charset\n"); + char_list unnamed("---"); + for (i = 0; i < 256; i++) + if (t.contains(i)) { + char_list *p = table[i] ? table[i] : &unnamed; + int m[6]; + m[0] = t.get_width(i); + m[1] = t.get_height(i); + m[2] = t.get_depth(i); + m[3] = t.get_italic(i); + m[4] = g.get_left_adjustment(i); + m[5] = g.get_right_adjustment(i); + printf("%s\t%d", p->ch, m[0]*MULTIPLIER); + int j; + for (j = int(sizeof(m)/sizeof(m[0])) - 1; j > 0; j--) + if (m[j] != 0) + break; + for (k = 1; k <= j; k++) + printf(",%d", m[k]*MULTIPLIER); + int type = 0; + if (m[2] > 0) + type = 1; + if (m[1] > xheight) + type += 2; + printf("\t%d\t%04o\n", type, i); + for (p = p->next; p; p = p->next) + printf("%s\t\"\n", p->ch); + } + return 0; +} + +static void usage(FILE *stream) +{ + fprintf(stream, +"usage: %s [-s] [-g gf-file] [-k skew-char] tfm-file map-file font\n" +"usage: %s {-v | --version}\n" +"usage: %s --help\n", + program_name, program_name, program_name); +} + +// Local Variables: +// fill-column: 72 +// mode: C++ +// End: +// vim: set cindent noexpandtab shiftwidth=2 textwidth=72: diff --git a/src/utils/xtotroff/xtotroff.1.man b/src/utils/xtotroff/xtotroff.1.man new file mode 100644 index 0000000..17fb0db --- /dev/null +++ b/src/utils/xtotroff/xtotroff.1.man @@ -0,0 +1,237 @@ +.TH xtotroff @MAN1EXT@ "@MDATE@" "groff @VERSION@" +.SH Name +xtotroff \- convert X font metrics into +.I groff +font metrics +. +. +.\" ==================================================================== +.\" Legal Terms +.\" ==================================================================== +.\" +.\" Copyright (C) 2004-2022 Free Software Foundation, Inc. +.\" +.\" Permission is granted to make and distribute verbatim copies of this +.\" manual provided the copyright notice and this permission notice are +.\" preserved on all copies. +.\" +.\" Permission is granted to copy and distribute modified versions of +.\" this manual under the conditions for verbatim copying, provided that +.\" the entire resulting derived work is distributed under the terms of +.\" a permission notice identical to this one. +.\" +.\" Permission is granted to copy and distribute translations of this +.\" manual into another language, under the above conditions for +.\" modified versions, except that this permission notice may be +.\" included in translations approved by the Free Software Foundation +.\" instead of in the original English. +. +. +.\" Save and disable compatibility mode (for, e.g., Solaris 10/11). +.do nr *groff_xtotroff_1_man_C \n[.cp] +.cp 0 +. +.\" Define fallback for groff 1.23's MR macro if the system lacks it. +.nr do-fallback 0 +.if !\n(.f .nr do-fallback 1 \" mandoc +.if \n(.g .if !d MR .nr do-fallback 1 \" older groff +.if !\n(.g .nr do-fallback 1 \" non-groff *roff +.if \n[do-fallback] \{\ +. de MR +. ie \\n(.$=1 \ +. I \%\\$1 +. el \ +. IR \%\\$1 (\\$2)\\$3 +. . +.\} +.rr do-fallback +. +. +.\" ==================================================================== +.SH Synopsis +.\" ==================================================================== +. +.SY xtotroff +.RB [ \-d\~\c +.IR destination-directory ] +.RB [ \-r\~\c +.IR resolution ] +.RB [ \-s\~\c +.IR type-size ] +.I font-map +.YS +. +. +.SY xtotroff +.B \-\-help +.YS +. +. +.SY xtotroff +.B \-v +. +.SY xtotroff +.B \-\-version +.YS +. +. +.\" ==================================================================== +.SH Description +.\" ==================================================================== +. +.I xtotroff +uses +.I font-map +to create +.MR groff @MAN1EXT@ +font description files from X11 fonts. +. +Each line in +.I font-map +consists of a series of lines of paired +.I groff +font names and X font names as X Logical Font Description (XLFD) +patterns, +with the pair members separated by spaces and/or tabs. +. +For example, +an input +.I font-map +file consisting of the line +. +.RS +.EX +TB \-adobe\-times\-bold\-r\-normal\-\-*\-*\-*\-*\-p\-*\-iso8859\-1 +.EE +.RE +. +maps the XLFD on the right to the +.I groff +font name +.BR TB , +conventionally \[lq]Times bold\[rq]. +. +. +.PP +.I xtotroff +opens a connection to the running X server to query its font catalog, +and aborts if it cannot. +. +If necessary, +the wildcards in the XLFD patterns are populated with the arguments to +the +.B \-r +and +.B \-s +options. +. +If a font name is still ambiguous, +.I xtotroff +aborts. +. +For each successful mapping, +.I xtotroff +creates a +.I groff +font description file in the current working directory +(or that specified by the +.B -d +option) +named for each +.I groff +font, +and reports the mapping to the standard output stream. +. +. +.\" ==================================================================== +.SH Options +.\" ==================================================================== +. +.B \-\-help +displays a usage message, +while +.B \-v +and +.B \-\-version +show version information; +all exit afterward. +. +. +.TP +.BI \-d\~ destination-directory +Write font descriptions to +.I destination-directory +rather than the current working directory. +. +. +.TP +.BI \-r\~ resolution +Set the resolution for all font patterns in +.IR font-map . +. +The value is used for both the horizontal and vertical motion quanta. +. +If not specified, +a resolution of 75dpi is assumed. +. +. +.TP +.BI \-s\~ type-size +Set the type size in points for all font patterns in +.IR font-map . +. +If not specified, +a size of 10 points is assumed. +. +. +.\" ==================================================================== +.SH Files +.\" ==================================================================== +. +.TP +.I @FONTDIR@/\:\%FontMap\-X11 +is the font mapping file used to produce the pre-generated font +description files, +supplied with +.IR groff , +of X11 core fonts corresponding to the 13 base Type\~1 fonts for +PostScript level 1. +. +. +.\" ==================================================================== +.SH Bugs +.\" ==================================================================== +. +The only supported font encodings are \[lq]iso8859\-1\[rq] and +\%\[lq]adobe\-\:fontspecific\[rq]. +. +. +.\" ==================================================================== +.SH "See also" +.\" ==================================================================== +. +.UR https://\:www\:.x\:.org/\:releases/\:X11R7.6/\:doc/\:xorg\-docs/\ +\:specs/\:XLFD/xlfd\:.html +\[lq]X Logical Font Description Conventions\[rq] +.UE , +by Jim Flowers and Stephen Gildea. +. +. +.PP +.MR X 7 , +.MR groff @MAN1EXT@ , +.MR gxditview @MAN1EXT@ , +.MR troff @MAN1EXT@ , +.MR groff_font @MAN5EXT@ +. +. +.\" Restore compatibility mode (for, e.g., Solaris 10/11). +.cp \n[*groff_xtotroff_1_man_C] +.do rr *groff_xtotroff_1_man_C +. +. +.\" Local Variables: +.\" fill-column: 72 +.\" mode: nroff +.\" End: +.\" vim: set filetype=groff textwidth=72: diff --git a/src/utils/xtotroff/xtotroff.am b/src/utils/xtotroff/xtotroff.am new file mode 100644 index 0000000..734d143 --- /dev/null +++ b/src/utils/xtotroff/xtotroff.am @@ -0,0 +1,41 @@ +# Copyright (C) 2014-2020 Free Software Foundation, Inc. +# +# This file is part of groff. +# +# groff is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# groff is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +# for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +if WITHOUT_X11 +XTOTROFF_MAN1 = +else +XTOTROFF_MAN1 = src/utils/xtotroff/xtotroff.1 +bin_PROGRAMS += xtotroff +man1_MANS += $(XTOTROFF_MAN1) +xtotroff_SOURCES = src/utils/xtotroff/xtotroff.c +XLIBS=$(LIBXUTIL) $(LIBGROFF) +xtotroff_LDADD = libxutil.a libgroff.a $(X_LIBS) $(X_PRE_LIBS) \ + -lXaw -lXt -lX11 $(X_EXTRA_LIBS) $(LIBM) lib/libgnu.a +xtotroff_CPPFLAGS = $(AM_CPPFLAGS) $(X_CFLAGS) +endif +EXTRA_DIST += src/utils/xtotroff/xtotroff.1.man + +# Define variable needed only for the targets that regenerate +# descriptions of X11 core fonts (used in "maintainer mode"). +xtotroff=$(top_builddir)/xtotroff + + +# Local Variables: +# fill-column: 72 +# mode: makefile-automake +# End: +# vim: set autoindent filetype=automake textwidth=72: diff --git a/src/utils/xtotroff/xtotroff.c b/src/utils/xtotroff/xtotroff.c new file mode 100644 index 0000000..368761f --- /dev/null +++ b/src/utils/xtotroff/xtotroff.c @@ -0,0 +1,368 @@ +/* Copyright (C) 1992-2022 Free Software Foundation, Inc. + Written by James Clark (jjc@jclark.com) + +This file is part of groff. + +groff is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation, either version 3 of the License, or +(at your option) any later version. + +groff is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. */ + +/* + * xtotroff + * + * convert X font metrics into troff font metrics + */ + +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif + +#define __GETOPT_PREFIX groff_ + +#include <X11/Xlib.h> +#include <stdbool.h> +#include <stdio.h> +#include <ctype.h> +#include <errno.h> +#include <unistd.h> +#include <stdlib.h> +#include <string.h> +#include <fcntl.h> +#include <limits.h> + +#include <getopt.h> + +#include "XFontName.h" +#include "DviChar.h" + +#define charWidth(fi,c) \ + ((fi)->per_char[(c) - (fi)->min_char_or_byte2].width) +#define charHeight(fi,c) \ + ((fi)->per_char[(c) - (fi)->min_char_or_byte2].ascent) +#define charDepth(fi,c) \ + ((fi)->per_char[(c) - (fi)->min_char_or_byte2].descent) +#define charLBearing(fi,c) \ + ((fi)->per_char[(c) - (fi)->min_char_or_byte2].lbearing) +#define charRBearing(fi,c) \ + ((fi)->per_char[(c) - (fi)->min_char_or_byte2].rbearing) + +extern const char *Version_string; +static char *program_name; + +Display *dpy; +unsigned resolution = 75; +unsigned point_size = 10; +char *destdir = NULL; + +static bool charExists(XFontStruct * fi, int c) +{ + XCharStruct *p; + + /* 'c' is always >= 0 */ + if ((unsigned int) c < fi->min_char_or_byte2 + || (unsigned int) c > fi->max_char_or_byte2) + return false; + p = fi->per_char + (c - fi->min_char_or_byte2); + return p->lbearing != 0 || p->rbearing != 0 || p->width != 0 + || p->ascent != 0 || p->descent != 0 || p->attributes != 0; +} + +/* Canonicalize the font name by replacing scalable parts by *s. */ + +static bool CanonicalizeFontName(char *font_name, char *canon_font_name) +{ + unsigned int attributes; + XFontName parsed; + + if (!XParseFontName(font_name, &parsed, &attributes)) { + fprintf(stderr, "%s: not a standard font name: \"%s\"\n", + program_name, font_name); + return false; + } + + attributes &= ~(FontNamePixelSize | FontNameAverageWidth + | FontNamePointSize + | FontNameResolutionX | FontNameResolutionY); + XFormatFontName(&parsed, attributes, canon_font_name); + return true; +} + +static bool +FontNamesAmbiguous(const char *font_name, char **names, int count) +{ + char name1[2048], name2[2048]; + int i; + + if (1 == count) + return false; + + for (i = 0; i < count; i++) { + if (!CanonicalizeFontName(names[i], 0 == i ? name1 : name2)) { + fprintf(stderr, "%s: invalid font name: \"%s\"\n", program_name, + names[i]); + return true; + } + if (i > 0 && strcmp(name1, name2) != 0) { + fprintf(stderr, "%s: ambiguous font name: \"%s\"", program_name, + font_name); + fprintf(stderr, " matches \"%s\"", names[0]); + fprintf(stderr, " and \"%s\"", names[i]); + return true; + } + } + return false; +} + +static void xtotroff_exit(int status) +{ + free(destdir); + exit(status); +} + +static bool MapFont(char *font_name, const char *troff_name) +{ + XFontStruct *fi; + int count; + char **names; + FILE *out; + unsigned int c; + unsigned int attributes; + XFontName parsed; + int j, k; + DviCharNameMap *char_map; + /* 'encoding' needs to hold a CharSetRegistry (256), a CharSetEncoding + (256) [both from XFontName.h], a dash, and a null terminator. */ + char encoding[256 * 2 + 1 + 1]; + char *s; + int wid; + char name_string[2048]; + + if (!XParseFontName(font_name, &parsed, &attributes)) { + fprintf(stderr, "%s: not a standard font name: \"%s\"\n", + program_name, font_name); + return false; + } + + attributes &= ~(FontNamePixelSize | FontNameAverageWidth); + attributes |= FontNameResolutionX; + attributes |= FontNameResolutionY; + attributes |= FontNamePointSize; + parsed.ResolutionX = resolution; + parsed.ResolutionY = resolution; + parsed.PointSize = point_size * 10; + XFormatFontName(&parsed, attributes, name_string); + + names = XListFonts(dpy, name_string, 100000, &count); + if (count < 1) { + fprintf(stderr, "%s: invalid font name: \"%s\"\n", program_name, + font_name); + return false; + } + + if (FontNamesAmbiguous(font_name, names, count)) + return false; + + XParseFontName(names[0], &parsed, &attributes); + size_t sz = sizeof encoding; + snprintf(encoding, sz, "%s-%s", parsed.CharSetRegistry, + parsed.CharSetEncoding); + for (s = encoding; *s; s++) + if (isupper(*s)) + *s = tolower(*s); + char_map = DviFindMap(encoding); + if (!char_map) { + fprintf(stderr, "%s: not a standard encoding: \"%s\"\n", + program_name, encoding); + return false; + } + + fi = XLoadQueryFont(dpy, names[0]); + if (!fi) { + fprintf(stderr, "%s: font does not exist: \"%s\"\n", program_name, + names[0]); + return false; + } + + printf("%s -> %s\n", names[0], troff_name); + char *file_name = (char *)troff_name; + size_t dirlen = strlen(destdir); + + if (dirlen > 0) { + size_t baselen = strlen(troff_name); + file_name = malloc(dirlen + baselen + 2 /* '/' and '\0' */); + if (NULL == file_name) { + fprintf(stderr, "%s: fatal error: unable to allocate memory\n", + program_name); + xtotroff_exit(EXIT_FAILURE); + } + (void) strcpy(file_name, destdir); + file_name[dirlen] = '/'; + (void) strcpy((file_name + dirlen + 1), troff_name); + } + + { /* Avoid race while opening file */ + int fd; + (void) unlink(file_name); + fd = open(file_name, O_WRONLY | O_CREAT | O_EXCL, 0600); + out = fdopen(fd, "w"); + } + + if (NULL == out) { + fprintf(stderr, "%s: unable to create '%s': %s\n", program_name, + file_name, strerror(errno)); + free(file_name); + return false; + } + fprintf(out, "name %s\n", troff_name); + if (!strcmp(char_map->encoding, "adobe-fontspecific")) + fprintf(out, "special\n"); + if (charExists(fi, ' ')) { + int w = charWidth(fi, ' '); + if (w > 0) + fprintf(out, "spacewidth %d\n", w); + } + fprintf(out, "charset\n"); + for (c = fi->min_char_or_byte2; c <= fi->max_char_or_byte2; c++) { + const char *name = DviCharName(char_map, c, 0); + if (charExists(fi, c)) { + int param[5]; + + wid = charWidth(fi, c); + + fprintf(out, "%s\t%d", name ? name : "---", wid); + param[0] = charHeight(fi, c); + param[1] = charDepth(fi, c); + param[2] = 0; /* charRBearing (fi, c) - wid */ + param[3] = 0; /* charLBearing (fi, c) */ + param[4] = 0; /* XXX */ + for (j = 0; j < 5; j++) + if (param[j] < 0) + param[j] = 0; + for (j = 4; j >= 0; j--) + if (param[j] != 0) + break; + for (k = 0; k <= j; k++) + fprintf(out, ",%d", param[k]); + fprintf(out, "\t0\t0%o\n", c); + + if (name) { + for (k = 1; DviCharName(char_map, c, k); k++) { + fprintf(out, "%s\t\"\n", DviCharName(char_map, c, k)); + } + } + } + } + XUnloadFont(dpy, fi->fid); + fclose(out); + free(file_name); + return true; +} + +static void usage(FILE *stream) +{ + fprintf(stream, + "usage: %s [-d destination-directory] [-r resolution]" + " [-s type-size] font-map\n" + "usage: %s {-v | --version}\n" + "usage: %s --help\n", + program_name, program_name, program_name); +} + +int main(int argc, char **argv) +{ + char troff_name[1024]; + char font_name[1024]; + char line[1024]; + char *a, *b, c; + FILE *map; + int opt; + static const struct option long_options[] = { + { "help", no_argument, 0, CHAR_MAX + 1 }, + { "version", no_argument, 0, 'v' }, + { NULL, 0, 0, 0 } + }; + + program_name = argv[0]; + + while ((opt = getopt_long(argc, argv, "d:gr:s:v", long_options, + NULL)) != EOF) { + switch (opt) { + case 'd': + destdir = strdup(optarg); + break; + case 'g': + /* unused; just for compatibility */ + break; + case 'r': + sscanf(optarg, "%u", &resolution); + break; + case 's': + sscanf(optarg, "%u", &point_size); + break; + case 'v': + printf("GNU xtotroff (groff) version %s\n", Version_string); + xtotroff_exit(EXIT_SUCCESS); + break; + case CHAR_MAX + 1: /* --help */ + usage(stdout); + xtotroff_exit(EXIT_SUCCESS); + break; + case '?': + usage(stderr); + xtotroff_exit(EXIT_FAILURE); + break; + } + } + if (argc - optind != 1) { + usage(stderr); + xtotroff_exit(EXIT_FAILURE); + } + + dpy = XOpenDisplay(0); + if (!dpy) { + fprintf(stderr, "%s: fatal error: can't connect to the X server;" + " make sure the DISPLAY environment variable is set" + " correctly\n", program_name); + xtotroff_exit(EXIT_FAILURE); + } + + map = fopen(argv[optind], "r"); + if (NULL == map) { + fprintf(stderr, "%s: fatal error: unable to open map file '%s':" + " %s\n", program_name, argv[optind], strerror(errno)); + xtotroff_exit(EXIT_FAILURE); + } + + while (fgets(line, sizeof(line), map)) { + for (a = line, b = troff_name; *a; a++, b++) { + c = (*b = *a); + if (' ' == c || '\t' == c) + break; + } + *b = '\0'; + while (*a && (' ' == *a || '\t' == *a)) + ++a; + for (b = font_name; *a; a++, b++) + if ((*b = *a) == '\n') + break; + *b = '\0'; + if (!MapFont(font_name, troff_name)) + xtotroff_exit(EXIT_FAILURE); + } + xtotroff_exit(EXIT_SUCCESS); +} + +// Local Variables: +// fill-column: 72 +// mode: C +// End: +// vim: set cindent noexpandtab shiftwidth=2 textwidth=72: |