summaryrefslogtreecommitdiffstats
path: root/WWW/Library/Implementation/dtd_util.c
diff options
context:
space:
mode:
Diffstat (limited to 'WWW/Library/Implementation/dtd_util.c')
-rw-r--r--WWW/Library/Implementation/dtd_util.c1407
1 files changed, 1407 insertions, 0 deletions
diff --git a/WWW/Library/Implementation/dtd_util.c b/WWW/Library/Implementation/dtd_util.c
new file mode 100644
index 0000000..3301d81
--- /dev/null
+++ b/WWW/Library/Implementation/dtd_util.c
@@ -0,0 +1,1407 @@
+/*
+ * $LynxId: dtd_util.c,v 1.79 2018/12/27 10:28:12 tom Exp $
+ *
+ * Given a SGML_dtd structure, write a corresponding flat file, or "C" source.
+ * Given the flat-file, write the "C" source.
+ *
+ * TODO: use symbols for HTMLA_NORMAL, etc.
+ */
+
+#include <HTUtils.h>
+#include <HTMLDTD.h>
+#include <string.h>
+
+/*
+ * Tweaks to build standalone.
+ */
+#undef exit
+
+BOOLEAN WWW_TraceFlag = FALSE;
+FILE *TraceFP(void)
+{
+ return stderr;
+}
+
+/*
+ * Begin the actual utility.
+ */
+#define GETOPT "chl:o:ts"
+
+#define NOTE(message) fprintf(output, message "\n");
+/* *INDENT-OFF* */
+#ifdef USE_PRETTYSRC
+# define N HTMLA_NORMAL
+# define i HTMLA_ANAME
+# define h HTMLA_HREF
+# define c HTMLA_CLASS
+# define x HTMLA_AUXCLASS
+# define T(t) , t
+#else
+# define T(t) /*nothing */
+#endif
+
+#define ATTR_TYPE(name) { #name, name##_attr_list }
+
+static const attr core_attr_list[] = {
+ { "CLASS" T(c) },
+ { "ID" T(i) },
+ { "STYLE" T(N) },
+ { "TITLE" T(N) },
+ { 0 T(N) } /* Terminate list */
+};
+
+static const attr i18n_attr_list[] = {
+ { "DIR" T(N) },
+ { "LANG" T(N) },
+ { 0 T(N) } /* Terminate list */
+};
+
+static const attr events_attr_list[] = {
+ { "ONCLICK" T(N) },
+ { "ONDBLCLICK" T(N) },
+ { "ONKEYDOWN" T(N) },
+ { "ONKEYPRESS" T(N) },
+ { "ONKEYUP" T(N) },
+ { "ONMOUSEDOWN" T(N) },
+ { "ONMOUSEMOVE" T(N) },
+ { "ONMOUSEOUT" T(N) },
+ { "ONMOUSEOVER" T(N) },
+ { "ONMOUSEUP" T(N) },
+ { 0 T(N) } /* Terminate list */
+};
+
+static const attr align_attr_list[] = {
+ { "ALIGN" T(N) },
+ { 0 T(N) } /* Terminate list */
+};
+
+static const attr cellalign_attr_list[] = {
+ { "ALIGN" T(N) },
+ { "CHAR" T(N) },
+ { "CHAROFF" T(N) },
+ { "VALIGN" T(N) },
+ { 0 T(N) } /* Terminate list */
+};
+
+static const attr bgcolor_attr_list[] = {
+ { "BGCOLOR" T(N) },
+ { 0 T(N) } /* Terminate list */
+};
+
+#undef T
+/* *INDENT-ON* */
+
+static void failed(const char *s)
+{
+ perror(s);
+ exit(EXIT_FAILURE);
+}
+
+static void usage(void)
+{
+ static const char *tbl[] =
+ {
+ "Usage: dtd_util [options]",
+ "",
+ "Options:",
+ " -c generate C-source"
+ " -h generate C-header"
+ " -l load",
+ " -o filename specify output (default: stdout)",
+ " -s strict (HTML DTD 0)",
+ " -t tagsoup (HTML DTD 1)",
+ };
+ unsigned n;
+
+ for (n = 0; n < TABLESIZE(tbl); ++n) {
+ fprintf(stderr, "%s\n", tbl[n]);
+ }
+ exit(EXIT_FAILURE);
+}
+
+static const char *SGMLContent2s(SGMLContent contents)
+{
+ char *value = "?";
+
+ switch (contents) {
+ case SGML_EMPTY:
+ value = "SGML_EMPTY";
+ break;
+ case SGML_LITTERAL:
+ value = "SGML_LITTERAL";
+ break;
+ case SGML_CDATA:
+ value = "SGML_CDATA";
+ break;
+ case SGML_SCRIPT:
+ value = "SGML_SCRIPT";
+ break;
+ case SGML_RCDATA:
+ value = "SGML_RCDATA";
+ break;
+ case SGML_MIXED:
+ value = "SGML_MIXED";
+ break;
+ case SGML_ELEMENT:
+ value = "SGML_ELEMENT";
+ break;
+ case SGML_PCDATA:
+ value = "SGML_PCDATA";
+ break;
+ }
+ return value;
+}
+
+static SGMLContent s2SGMLContent(const char *value)
+{
+ static SGMLContent table[] =
+ {
+ SGML_EMPTY,
+ SGML_LITTERAL,
+ SGML_CDATA,
+ SGML_SCRIPT,
+ SGML_RCDATA,
+ SGML_MIXED,
+ SGML_ELEMENT,
+ SGML_PCDATA
+ };
+ unsigned n;
+ SGMLContent result = SGML_EMPTY;
+
+ for (n = 0; n < TABLESIZE(table); ++n) {
+ if (!strcmp(SGMLContent2s(table[n]), value)) {
+ result = table[n];
+ break;
+ }
+ }
+ return result;
+}
+
+static void PrintF(FILE *, int, const char *, ...) GCC_PRINTFLIKE(3, 4);
+
+static void PrintF(FILE *output, int width, const char *fmt, ...)
+{
+ char buffer[BUFSIZ];
+ va_list ap;
+
+ va_start(ap, fmt);
+ vsprintf(buffer, fmt, ap);
+ va_end(ap);
+
+ fprintf(output, "%-*s", width, buffer);
+}
+
+static int same_AttrList(AttrList a, AttrList b)
+{
+ int result = 1;
+
+ if (a && b) {
+ while (a->name && b->name) {
+ if (strcmp(a->name, b->name)) {
+ result = 0;
+ break;
+ }
+ ++a, ++b;
+ }
+ if (a->name || b->name)
+ result = 0;
+ } else {
+ result = 0;
+ }
+ return result;
+}
+
+static int first_attrs(const SGML_dtd * dtd, int which)
+{
+ int check;
+ int result = TRUE;
+
+ for (check = 0; check < which; ++check) {
+ if (dtd->tags[check].attributes == dtd->tags[which].attributes) {
+ result = FALSE;
+ break;
+ } else if (same_AttrList(dtd->tags[check].attributes,
+ dtd->tags[which].attributes)) {
+ result = FALSE;
+ dtd->tags[which].attributes = dtd->tags[check].attributes;
+ break;
+ }
+ }
+ return result;
+}
+
+static char *no_dashes(char *target, const char *source)
+{
+ int j;
+
+ for (j = 0; (target[j] = source[j]) != '\0'; ++j) {
+ if (!isalnum(target[j]))
+ target[j] = '_';
+ }
+ return target;
+}
+
+/* the second "OBJECT" is treated specially */
+static int first_object(const SGML_dtd * dtd, int which)
+{
+ int check;
+
+ for (check = 0; check <= which; ++check) {
+ if (!strcmp(dtd->tags[check].name, "OBJECT"))
+ break;
+ }
+ return (check == which);
+}
+
+static const char *NameOfAttrs(const SGML_dtd * dtd, int which)
+{
+ int check;
+ const char *result = dtd->tags[which].name;
+
+ for (check = 0; check < which; ++check) {
+ if (dtd->tags[check].attributes == dtd->tags[which].attributes) {
+ result = dtd->tags[check].name;
+ break;
+ }
+ }
+ /* special cases to match existing headers */
+ if (!strcmp(result, "ABBR"))
+ result = "GEN";
+ else if (!strcmp(result, "ARTICLE"))
+ result = "GEN5";
+ else if (!strcmp(result, "BLOCKQUOTE"))
+ result = "BQ";
+ else if (!strcmp(result, "BASEFONT"))
+ result = "FONT";
+ else if (!strcmp(result, "CENTER"))
+ result = "DIV";
+ else if (!strcmp(result, "DIR"))
+ result = "UL";
+ else if (!strcmp(result, "H1"))
+ result = "H";
+ else if (!strcmp(result, "TBODY"))
+ result = "TR";
+ return result;
+}
+
+static const char *DEF_name(const SGML_dtd * dtd, int which)
+{
+ const char *result = dtd->tags[which].name;
+
+ if (!strcmp(result, "OBJECT") && !first_object(dtd, which))
+ result = "OBJECT_PCDATA";
+ return result;
+}
+
+typedef struct {
+ const char *name;
+ const attr *attrs;
+ int count;
+ int which;
+} AttrInfo;
+
+static int compare_attr_types(const void *a, const void *b)
+{
+ const AttrType *p = (const AttrType *) a;
+ const AttrType *q = (const AttrType *) b;
+ int result = 0;
+
+ /* keep lowercase AttrType lists before uppercase, since latter are derived */
+ if (isupper(p->name[0]) ^ isupper(q->name[0])) {
+ if (isupper(p->name[0])) {
+ result = 1;
+ } else {
+ result = -1;
+ }
+ } else {
+ result = strcmp(p->name, q->name);
+ }
+ return result;
+}
+
+static int len_AttrTypes(const AttrType * data)
+{
+ int result = 0;
+
+ for (result = 0; data[result].name != 0; ++result) {
+ ;
+ }
+ return result;
+}
+
+static AttrType *sorted_AttrTypes(const AttrType * source)
+{
+ AttrType *result = 0;
+ unsigned number = len_AttrTypes(source);
+
+ if (number != 0) {
+ result = typecallocn(AttrType, number + 1);
+ if (result != 0) {
+ MemCpy(result, source, number * sizeof(*result));
+ qsort(result, number, sizeof(*result), compare_attr_types);
+ }
+ }
+
+ return result;
+}
+
+static int compare_attr(const void *a, const void *b)
+{
+ const AttrInfo *p = (const AttrInfo *) a;
+ const AttrInfo *q = (const AttrInfo *) b;
+
+ return strcmp(p->name, q->name);
+}
+
+static int len_AttrList(AttrList data)
+{
+ int result = 0;
+
+ for (result = 0; data[result].name != 0; ++result) {
+ ;
+ }
+ return result;
+}
+
+static void sort_uniq_AttrList(attr * data)
+{
+ unsigned have = len_AttrList(data);
+ unsigned j, k;
+
+ qsort(data, have, sizeof(*data), compare_attr);
+ /*
+ * Eliminate duplicates
+ */
+ for (j = 0; j < have; ++j) {
+ for (k = j; data[k].name; ++k) {
+ if (data[k + 1].name == 0)
+ break;
+ if (strcmp(data[j].name, data[k + 1].name)) {
+ break;
+ }
+ }
+ data[j] = data[k];
+ }
+ memset(data + j, 0, sizeof(data[0]));
+}
+
+static attr *copy_AttrList(AttrList data)
+{
+ unsigned need = len_AttrList(data);
+ unsigned n;
+
+ attr *result = (attr *) calloc(need + 1, sizeof(attr));
+
+ for (n = 0; n < need; ++n)
+ result[n] = data[n];
+ sort_uniq_AttrList(result);
+ return result;
+}
+
+static attr *merge_AttrLists(const AttrType * data)
+{
+ const AttrType *at;
+ attr *result = 0;
+ unsigned need = 1;
+ unsigned have = 0;
+ unsigned j;
+
+ for (at = data; at->name; ++at) {
+ need += len_AttrList(at->list);
+ }
+ result = (attr *) calloc(need + 1, sizeof(attr));
+ for (at = data; at->name; ++at) {
+ if (!strcmp(at->name, "events")) {
+ ; /* lynx does not use events */
+ } else {
+ for (j = 0; at->list[j].name; ++j) {
+ result[have++] = at->list[j];
+ }
+ }
+ }
+ sort_uniq_AttrList(result);
+ return result;
+}
+
+static int clean_AttrList(attr * target, AttrList source)
+{
+ int result = 0;
+ int j, k;
+
+ for (j = 0; target[j].name != 0; ++j) {
+ for (k = 0; source[k].name != 0; ++k) {
+ if (!strcmp(target[j].name, source[k].name)) {
+ k = j--;
+ for (;;) {
+ target[k] = target[k + 1];
+ if (target[k++].name == 0)
+ break;
+ }
+ ++result;
+ break;
+ }
+ }
+ }
+ return result;
+}
+
+/*
+ * Actually COUNT the number of attributes, to make it possible to edit a
+ * attribute-table in src0_HTMLDTD.h and have all of the files updated by
+ * just doing a "make sources".
+ */
+static int AttrCount(HTTag * tag)
+{
+ return len_AttrList(tag->attributes);
+}
+
+static AttrInfo *sorted_attrs(const SGML_dtd * dtd, unsigned *countp)
+{
+ int j;
+
+ AttrInfo *data = (AttrInfo *) calloc(dtd->number_of_tags, sizeof(AttrInfo));
+ unsigned count = 0;
+
+ /* get the attribute-data */
+ for (j = 0; j < dtd->number_of_tags; ++j) {
+ if (first_attrs(dtd, j)) {
+ data[count].name = NameOfAttrs(dtd, j);
+ data[count].attrs = dtd->tags[j].attributes;
+ data[count].count = AttrCount(&(dtd->tags[j]));
+ data[count].which = j;
+ ++count;
+ }
+ }
+ /* sort the data by the name of their associated tag */
+ qsort(data, count, sizeof(*data), compare_attr);
+ *countp = count;
+ return data;
+}
+
+static void dump_src_HTTag_Defines(FILE *output, const SGML_dtd * dtd, int which)
+{
+ HTTag *tag = &(dtd->tags[which]);
+
+#define myFMT "0x%05X"
+ fprintf(output,
+ "#define T_%-13s "
+ myFMT "," myFMT "," myFMT "," myFMT "," myFMT "," myFMT
+ "," myFMT "\n",
+ DEF_name(dtd, which),
+ tag->tagclass,
+ tag->contains,
+ tag->icontains,
+ tag->contained,
+ tag->icontained,
+ tag->canclose,
+ tag->flags);
+}
+
+static void dump_AttrItem(FILE *output, const attr * data)
+{
+ char buffer[BUFSIZ];
+ char pretty = 'N';
+
+ sprintf(buffer, "\"%s\"", data->name);
+#ifdef USE_PRETTYSRC
+ switch (data->type) {
+ case HTMLA_NORMAL:
+ pretty = 'N';
+ break;
+ case HTMLA_ANAME:
+ pretty = 'i';
+ break;
+ case HTMLA_HREF:
+ pretty = 'h';
+ break;
+ case HTMLA_CLASS:
+ pretty = 'c';
+ break;
+ case HTMLA_AUXCLASS:
+ pretty = 'x';
+ break;
+ }
+#endif
+ fprintf(output, "\t{ %-15s T(%c) },\n", buffer, pretty);
+}
+
+static void dump_AttrItem0(FILE *output)
+{
+ fprintf(output, "\t{ 0 T(N) }\t/* Terminate list */\n");
+}
+
+static void dump_src_AttrType(FILE *output, const char *name, AttrList data, const char **from)
+{
+ int n;
+
+ fprintf(output, "static const attr %s_attr_list[] = {\n", name);
+ if (data != 0) {
+ for (n = 0; data[n].name != 0; ++n) {
+ dump_AttrItem(output, data + n);
+ }
+ }
+ fprintf(output, "\t{ 0 T(N) } /* Terminate list */\n");
+ fprintf(output, "};\n");
+ NOTE("");
+ fprintf(output, "static const AttrType %s_attr_type[] = {\n", name);
+ if (from != 0) {
+ while (*from != 0) {
+ fprintf(output, "\t{ ATTR_TYPE(%s) },\n", *from);
+ ++from;
+ }
+ } else {
+ fprintf(output, "\t{ ATTR_TYPE(%s) },\n", name);
+ }
+ fprintf(output, "\t{ 0, 0 },\n");
+ fprintf(output, "};\n");
+ NOTE("");
+}
+
+static void dump_src_HTTag_Attrs(FILE *output, const SGML_dtd * dtd, int which)
+{
+ HTTag *tag = &(dtd->tags[which]);
+ attr *list = merge_AttrLists(tag->attr_types);
+ char buffer[BUFSIZ];
+ int n;
+ int limit = len_AttrList(list);
+
+ sprintf(buffer, "static const attr %s_attr[] = {", NameOfAttrs(dtd, which));
+ fprintf(output,
+ "%-40s/* %s attributes */\n", buffer, tag->name);
+ for (n = 0; n < limit; ++n) {
+ dump_AttrItem(output, list + n);
+ }
+ dump_AttrItem0(output);
+ fprintf(output, "};\n");
+ NOTE("");
+ free(list);
+}
+
+static void dump_src_HTTag(FILE *output, const SGML_dtd * dtd, int which)
+{
+ HTTag *tag = &(dtd->tags[which]);
+ char *P_macro = "P";
+
+#ifdef USE_JUSTIFY_ELTS
+ if (!tag->can_justify)
+ P_macro = "P0";
+#endif
+ PrintF(output, 19, " { %s(%s),", P_macro, tag->name);
+ PrintF(output, 24, "ATTR_DATA(%s), ", NameOfAttrs(dtd, which));
+ PrintF(output, 14, "%s,", SGMLContent2s(tag->contents));
+ fprintf(output, "T_%s", DEF_name(dtd, which));
+ fprintf(output, "},\n");
+}
+
+static void dump_source(FILE *output, const SGML_dtd * dtd, int dtd_version)
+{
+ static AttrType generic_types[] =
+ {
+ ATTR_TYPE(core),
+ ATTR_TYPE(i18n),
+ ATTR_TYPE(events),
+ ATTR_TYPE(align),
+ ATTR_TYPE(cellalign),
+ ATTR_TYPE(bgcolor),
+ {0, 0}
+ };
+ AttrType *gt;
+
+ const char *marker = "src_HTMLDTD_H";
+ int j;
+
+ unsigned count = 0;
+ AttrInfo *data = sorted_attrs(dtd, &count);
+
+ fprintf(output, "/* %cLynxId%c */\n", '$', '$');
+ fprintf(output, "#ifndef %s%d\n", marker, dtd_version);
+ fprintf(output, "#define %s%d 1\n\n", marker, dtd_version);
+
+ /*
+ * If we ifdef this for once, and make the table names distinct, we can
+ * #include the strict- and tagsoup-output directly in HTMLDTD.c
+ */
+ NOTE("#ifndef once_HTMLDTD");
+ NOTE("#define once_HTMLDTD 1");
+ NOTE("");
+
+ /* construct TagClass-define's */
+ for (j = 0; j <= dtd->number_of_tags; ++j) {
+ dump_src_HTTag_Defines(output, dtd, j);
+ }
+ NOTE("#define T__UNREC_ 0x00000,0x00000,0x00000,0x00000,0x00000,0x00000,0x00000");
+
+ /* construct attribute-tables */
+ NOTE("#ifdef USE_PRETTYSRC");
+ NOTE("# define N HTMLA_NORMAL");
+ NOTE("# define i HTMLA_ANAME");
+ NOTE("# define h HTMLA_HREF");
+ NOTE("# define c HTMLA_CLASS");
+ NOTE("# define x HTMLA_AUXCLASS");
+ NOTE("# define T(t) , t");
+ NOTE("#else");
+ NOTE("# define T(t) /*nothing */");
+ NOTE("#endif");
+ NOTE("/* *INDENT-OFF* */");
+ NOTE("");
+ NOTE("#define ATTR_TYPE(name) #name, name##_attr_list");
+ NOTE("");
+ NOTE("/* generic attributes, used in different tags */");
+ for (gt = generic_types; gt->name != 0; ++gt) {
+ dump_src_AttrType(output, gt->name, gt->list, 0);
+ }
+ NOTE("");
+ NOTE("/* tables defining attributes per-tag in terms of generic attributes (editable) */");
+ for (j = 0; j < (int) count; ++j) {
+ int which = data[j].which;
+
+ if (first_attrs(dtd, which)) {
+ HTTag *tag = &(dtd->tags[which]);
+ const AttrType *types = tag->attr_types;
+ const char *name = NameOfAttrs(dtd, which);
+ attr *list = 0;
+ const char *from_attr[10];
+ int from_size = 0;
+
+ while (types->name != 0) {
+ from_attr[from_size++] = types->name;
+ if (!strcmp(types->name, name)) {
+ list = copy_AttrList(types->list);
+ for (gt = generic_types; gt->name != 0; ++gt) {
+ if (clean_AttrList(list, gt->list)) {
+ int k;
+ int found = 0;
+
+ for (k = 0; k < from_size; ++k) {
+ if (!strcmp(from_attr[k], gt->name)) {
+ found = 1;
+ break;
+ }
+ }
+ if (!found)
+ from_attr[from_size++] = gt->name;
+ break;
+ }
+ }
+ }
+ ++types;
+ }
+ from_attr[from_size] = 0;
+
+ if (list != 0) {
+ dump_src_AttrType(output, name, list, from_attr);
+ free(list);
+ }
+ }
+ }
+ NOTE("");
+ NOTE("/* attribute lists for the runtime (generated by dtd_util) */");
+ for (j = 0; j < (int) count; ++j) {
+ dump_src_HTTag_Attrs(output, dtd, data[j].which);
+ }
+ NOTE("/* *INDENT-ON* */");
+ NOTE("");
+ NOTE("/* justification-flags */");
+ NOTE("#undef N");
+ NOTE("#undef i");
+ NOTE("#undef h");
+ NOTE("#undef c");
+ NOTE("#undef x");
+ NOTE("");
+ NOTE("#undef T");
+ NOTE("");
+ NOTE("/* tag-names */");
+ for (j = 0; j <= dtd->number_of_tags; ++j) {
+ fprintf(output, "#undef %s\n", DEF_name(dtd, j));
+ }
+ NOTE("");
+ NOTE("/* these definitions are used in the tags-tables */");
+ NOTE("#undef P");
+ NOTE("#undef P_");
+ NOTE("#ifdef USE_COLOR_STYLE");
+ NOTE("#define P_(x) #x, (sizeof #x) -1");
+ NOTE("#define NULL_HTTag_ NULL, 0");
+ NOTE("#else");
+ NOTE("#define P_(x) #x");
+ NOTE("#define NULL_HTTag_ NULL");
+ NOTE("#endif");
+ NOTE("");
+ NOTE("#ifdef USE_JUSTIFY_ELTS");
+ NOTE("#define P(x) P_(x), 1");
+ NOTE("#define P0(x) P_(x), 0");
+ NOTE("#define NULL_HTTag NULL_HTTag_,0");
+ NOTE("#else");
+ NOTE("#define P(x) P_(x)");
+ NOTE("#define P0(x) P_(x)");
+ NOTE("#define NULL_HTTag NULL_HTTag_");
+ NOTE("#endif");
+ NOTE("");
+ NOTE("#define ATTR_DATA(name) name##_attr, HTML_##name##_ATTRIBUTES, name##_attr_type");
+ NOTE("");
+ NOTE("#endif /* once_HTMLDTD */");
+ NOTE("/* *INDENT-OFF* */");
+
+ /* construct the tags table */
+ fprintf(output,
+ "static const HTTag tags_table%d[HTML_ALL_ELEMENTS] = {\n",
+ dtd_version);
+ for (j = 0; j <= dtd->number_of_tags; ++j) {
+ if (j == dtd->number_of_tags) {
+ NOTE("/* additional (alternative variants), not counted in HTML_ELEMENTS: */");
+ NOTE("/* This one will be used as a temporary substitute within the parser when");
+ NOTE(" it has been signalled to parse OBJECT content as MIXED. - kw */");
+ }
+ dump_src_HTTag(output, dtd, j);
+ }
+ fprintf(output, "};\n");
+
+ NOTE("/* *INDENT-ON* */");
+ NOTE("");
+ fprintf(output, "#endif /* %s%d */\n", marker, dtd_version);
+
+ free(data);
+}
+
+static void dump_hdr_attr(FILE *output, AttrInfo * data)
+{
+ int j;
+ char buffer[BUFSIZ];
+
+ for (j = 0; j < data->count; ++j) {
+ PrintF(output, 33, "#define HTML_%s_%s",
+ data->name,
+ no_dashes(buffer, data->attrs[j].name));
+ fprintf(output, "%2d\n", j);
+ }
+ PrintF(output, 33, "#define HTML_%s_ATTRIBUTES", data->name);
+ fprintf(output, "%2d\n", data->count);
+ fprintf(output, "\n");
+}
+
+static void dump_header(FILE *output, const SGML_dtd * dtd)
+{
+ const char *marker = "hdr_HTMLDTD_H";
+ int j;
+
+ unsigned count = 0;
+ AttrInfo *data = sorted_attrs(dtd, &count);
+
+ fprintf(output, "/* %cLynxId%c */\n", '$', '$');
+ fprintf(output, "#ifndef %s\n", marker);
+ fprintf(output, "#define %s 1\n\n", marker);
+
+ NOTE("#ifdef __cplusplus");
+ NOTE("extern \"C\" {");
+ NOTE("#endif");
+
+ NOTE("/*");
+ NOTE("");
+ NOTE(" Element Numbers");
+ NOTE("");
+ NOTE(" Must Match all tables by element!");
+ NOTE(" These include tables in HTMLDTD.c");
+ NOTE(" and code in HTML.c.");
+ NOTE("");
+ NOTE(" */");
+
+ fprintf(output, " typedef enum {\n");
+ for (j = 0; j < dtd->number_of_tags; ++j) {
+ fprintf(output, "\tHTML_%s,\n", dtd->tags[j].name);
+ }
+ NOTE("\tHTML_ALT_OBJECT");
+ NOTE(" } HTMLElement;\n");
+ NOTE("/* Notes: HTML.c uses a different extension of the");
+ NOTE(" HTML_ELEMENTS space privately, see");
+ NOTE(" HTNestedList.h.");
+ NOTE("");
+ NOTE(" Do NOT replace HTML_ELEMENTS with");
+ NOTE(" TABLESIZE(mumble_dtd.tags).");
+ NOTE("");
+ NOTE(" Keep the following defines in synch with");
+ NOTE(" the above enum!");
+ NOTE(" */");
+ NOTE("");
+ NOTE("/* # of elements generally visible to Lynx code */");
+ fprintf(output, "#define HTML_ELEMENTS %d\n", dtd->number_of_tags);
+ NOTE("");
+ NOTE("/* # of elements visible to SGML parser */");
+ fprintf(output, "#define HTML_ALL_ELEMENTS %d\n", dtd->number_of_tags + 1);
+ NOTE("");
+ NOTE("/*");
+ NOTE("");
+ NOTE(" Attribute numbers");
+ NOTE("");
+ NOTE(" Identifier is HTML_<element>_<attribute>.");
+ NOTE(" These must match the tables in HTML.c!");
+ NOTE("");
+ NOTE(" */");
+
+ /* output the sorted list */
+ for (j = 0; j < (int) count; ++j) {
+ dump_hdr_attr(output, data + j);
+ }
+ free(data);
+
+ NOTE("#ifdef __cplusplus");
+ NOTE("}");
+ NOTE("#endif");
+
+ fprintf(output, "#endif\t\t\t\t/* %s */\n", marker);
+}
+
+#define FMT_NUM_ATTRS "%d attributes:\n"
+#define FMT_ONE_ATTR "%d:%d:%s\n"
+#define NUM_ONE_ATTR 3
+
+static void dump_flat_attrs(FILE *output,
+ const attr * attributes,
+ int number_of_attributes)
+{
+ int n;
+
+ fprintf(output, "\t\t" FMT_NUM_ATTRS, number_of_attributes);
+ for (n = 0; n < number_of_attributes; ++n) {
+ fprintf(output, "\t\t\t" FMT_ONE_ATTR, n,
+#ifdef USE_PRETTYSRC
+ attributes[n].type,
+#else
+ 0, /* need placeholder for source-compat */
+#endif
+ attributes[n].name
+ );
+ }
+}
+
+static void dump_flat_attr_types(FILE *output, const AttrType * attr_types)
+{
+ const AttrType *p = sorted_AttrTypes(attr_types);
+ int number = len_AttrTypes(attr_types);
+
+ fprintf(output, "\t\t%d attr_types\n", number);
+
+ if (p != 0) {
+ while (p->name != 0) {
+ fprintf(output, "\t\t\t%s\n", p->name);
+ ++p;
+ }
+ }
+}
+
+static void dump_flat_SGMLContent(FILE *output, const char *name, SGMLContent contents)
+{
+ fprintf(output, "\t\t%s: %s\n", name, SGMLContent2s(contents));
+}
+
+#define DUMP(name) \
+ if (theClass & Tgc_##name) {\
+ fprintf(output, " " #name); \
+ theClass &= ~(Tgc_##name); \
+ }
+
+static void dump_flat_TagClass(FILE *output, const char *name, TagClass theClass)
+{
+ fprintf(output, "\t\t%s:", name);
+ DUMP(FONTlike);
+ DUMP(EMlike);
+ DUMP(MATHlike);
+ DUMP(Alike);
+ DUMP(formula);
+ DUMP(TRlike);
+ DUMP(SELECTlike);
+ DUMP(FORMlike);
+ DUMP(Plike);
+ DUMP(DIVlike);
+ DUMP(LIlike);
+ DUMP(ULlike);
+ DUMP(BRlike);
+ DUMP(APPLETlike);
+ DUMP(HRlike);
+ DUMP(MAPlike);
+ DUMP(outer);
+ DUMP(BODYlike);
+ DUMP(HEADstuff);
+ DUMP(same);
+ if (theClass)
+ fprintf(output, " OOPS:%#x", theClass);
+ fprintf(output, "\n");
+}
+
+#undef DUMP
+
+#define DUMP(name) \
+ if (theFlags & Tgf_##name) {\
+ fprintf(output, " " #name); \
+ theFlags &= ~(Tgf_##name); \
+ }
+
+static void dump_flat_TagFlags(FILE *output, const char *name, TagFlags theFlags)
+{
+ fprintf(output, "\t\t%s:", name);
+ DUMP(endO);
+ DUMP(startO);
+ DUMP(mafse);
+ DUMP(strict);
+ DUMP(nreie);
+ DUMP(frecyc);
+ DUMP(nolyspcl);
+ if (theFlags)
+ fprintf(output, " OOPS:%#x", theFlags);
+ fprintf(output, "\n");
+}
+
+#undef DUMP
+
+static void dump_flat_HTTag(FILE *output, unsigned n, HTTag * tag)
+{
+ fprintf(output, "\t%u:%s\n", n, tag->name);
+#ifdef USE_JUSTIFY_ELTS
+ fprintf(output, "\t\t%s\n", tag->can_justify ? "justify" : "nojustify");
+#endif
+ dump_flat_attrs(output, tag->attributes, AttrCount(tag));
+ dump_flat_attr_types(output, tag->attr_types);
+ dump_flat_SGMLContent(output, "contents", tag->contents);
+ dump_flat_TagClass(output, "tagclass", tag->tagclass);
+ dump_flat_TagClass(output, "contains", tag->contains);
+ dump_flat_TagClass(output, "icontains", tag->icontains);
+ dump_flat_TagClass(output, "contained", tag->contained);
+ dump_flat_TagClass(output, "icontained", tag->icontained);
+ dump_flat_TagClass(output, "canclose", tag->canclose);
+ dump_flat_TagFlags(output, "flags", tag->flags);
+}
+
+static int count_attr_types(AttrType * attr_types, HTTag * tag)
+{
+ int count = 0;
+ const AttrType *p;
+ AttrType *q;
+
+ if ((p = tag->attr_types) != 0) {
+ while (p->name != 0) {
+ if ((q = attr_types) != 0) {
+ while (q->name != 0) {
+ if (!strcmp(q->name, p->name)) {
+ --count;
+ break;
+ }
+ ++q;
+ }
+ *q = *p;
+ }
+ ++count;
+ ++p;
+ }
+ }
+ return count;
+}
+
+static void dump_flatfile(FILE *output, const SGML_dtd * dtd)
+{
+ AttrType *attr_types = 0;
+ int pass;
+ unsigned count = 0;
+ unsigned n;
+
+ /* merge all of the attr_types data */
+ for (pass = 0; pass < 2; ++pass) {
+ for (n = 0; (int) n < dtd->number_of_tags; ++n) {
+ count += count_attr_types(attr_types, &(dtd->tags[n]));
+ }
+ if (pass == 0) {
+ attr_types = typecallocn(AttrType, count + 1);
+ count = 0;
+ } else {
+ count = len_AttrTypes(attr_types);
+ qsort(attr_types, count, sizeof(*attr_types), compare_attr_types);
+ fprintf(output, "%d attr_types\n", count);
+ for (n = 0; n < count; ++n) {
+ fprintf(output, "\t%d:%s\n", n, attr_types[n].name);
+ dump_flat_attrs(output, attr_types[n].list,
+ len_AttrList(attr_types[n].list));
+ }
+ }
+ }
+
+ fprintf(output, "%d tags\n", dtd->number_of_tags);
+ for (n = 0; (int) n < dtd->number_of_tags; ++n) {
+ dump_flat_HTTag(output, n, &(dtd->tags[n]));
+ }
+#if 0
+ fprintf(output, "%d entities\n", dtd->number_of_entities);
+ for (n = 0; n < dtd->number_of_entities; ++n) {
+ }
+#endif
+}
+
+static char *get_line(FILE *input)
+{
+ char temp[1024];
+ char *result = 0;
+
+ if (fgets(temp, (int) sizeof(temp), input) != 0) {
+ result = strdup(temp);
+ }
+ return result;
+}
+
+#define LOAD(name) \
+ if (!strcmp(data, #name)) {\
+ *theClass |= Tgc_##name; \
+ continue; \
+ }
+
+static int load_flat_TagClass(FILE *input, const char *name, TagClass * theClass)
+{
+ char prefix[80];
+ char *next = get_line(input);
+ char *data;
+ int result = 0;
+
+ *theClass = 0;
+ if (next != 0) {
+ sprintf(prefix, "\t\t%s:", name);
+ data = strtok(next, "\n ");
+
+ if (data != 0 && !strcmp(data, prefix)) {
+ result = 1;
+
+ while ((data = strtok(NULL, "\n ")) != 0) {
+
+ LOAD(FONTlike);
+ LOAD(EMlike);
+ LOAD(MATHlike);
+ LOAD(Alike);
+ LOAD(formula);
+ LOAD(TRlike);
+ LOAD(SELECTlike);
+ LOAD(FORMlike);
+ LOAD(Plike);
+ LOAD(DIVlike);
+ LOAD(LIlike);
+ LOAD(ULlike);
+ LOAD(BRlike);
+ LOAD(APPLETlike);
+ LOAD(HRlike);
+ LOAD(MAPlike);
+ LOAD(outer);
+ LOAD(BODYlike);
+ LOAD(HEADstuff);
+ LOAD(same);
+
+ fprintf(stderr, "Unexpected TagClass '%s'\n", data);
+ result = 0;
+ break;
+ }
+ } else if (data) {
+ fprintf(stderr, "load_flat_TagClass: '%s' vs '%s'\n", data, prefix);
+ }
+ free(next);
+ } else {
+ fprintf(stderr, "Did not find contents\n");
+ }
+ return result;
+}
+
+#undef LOAD
+
+#define LOAD(name) \
+ if (!strcmp(data, #name)) {\
+ *flags |= Tgf_##name; \
+ continue; \
+ }
+
+static int load_flat_TagFlags(FILE *input, const char *name, TagFlags * flags)
+{
+ char prefix[80];
+ char *next = get_line(input);
+ char *data;
+ int result = 0;
+
+ *flags = 0;
+ if (next != 0) {
+ sprintf(prefix, "\t\t%s:", name);
+ data = strtok(next, "\n ");
+
+ if (data != 0 && !strcmp(data, prefix)) {
+ result = 1;
+
+ while ((data = strtok(NULL, "\n ")) != 0) {
+
+ LOAD(endO);
+ LOAD(startO);
+ LOAD(mafse);
+ LOAD(strict);
+ LOAD(nreie);
+ LOAD(frecyc);
+ LOAD(nolyspcl);
+
+ fprintf(stderr, "Unexpected TagFlag '%s'\n", data);
+ result = 0;
+ break;
+ }
+ } else if (data) {
+ fprintf(stderr, "load_flat_TagFlags: '%s' vs '%s'\n", data, prefix);
+ }
+ free(next);
+ }
+ return result;
+}
+
+#undef LOAD
+
+static int load_flat_AttrList(FILE *input, AttrList * attrs, int *length)
+{
+ attr *attributes;
+ int j, jcmp, code;
+ int result = 1;
+ char name[1024];
+
+#ifdef USE_PRETTYSRC
+ int atype;
+#endif
+
+ if (fscanf(input, FMT_NUM_ATTRS, length) == 1
+ && *length > 0
+ && (attributes = typecallocn(attr, (size_t) (*length + 1))) != 0) {
+ *attrs = attributes;
+ for (j = 0; j < *length; ++j) {
+ code = fscanf(input, FMT_ONE_ATTR,
+ &jcmp,
+ &atype,
+ name
+ );
+ if (code == NUM_ONE_ATTR && (j == jcmp)) {
+ attributes[j].name = strdup(name);
+#ifdef USE_PRETTYSRC
+ attributes[j].type = atype;
+#endif
+ } else {
+ fprintf(stderr, "Did not find attributes\n");
+ result = 0;
+ break;
+ }
+ }
+ if (*length > 1)
+ qsort(attributes, *length, sizeof(attributes[0]), compare_attr);
+ }
+ return result;
+}
+
+static int load_flat_HTTag(FILE *input, unsigned nref, HTTag * tag, AttrType * allTypes)
+{
+ int result = 0;
+ unsigned ncmp = 0;
+ char name[1024];
+ int code;
+ int j;
+
+ code = fscanf(input, "%d:%s\n", &ncmp, name);
+ if (code == 2 && (nref == ncmp)) {
+ result = 1;
+ tag->name = strdup(name);
+#ifdef USE_COLOR_STYLE
+ tag->name_len = strlen(tag->name);
+#endif
+#ifdef USE_JUSTIFY_ELTS
+ if (fscanf(input, "%s\n", name) == 1) {
+ tag->can_justify = !strcmp(name, "justify");
+ } else {
+ fprintf(stderr, "Did not find can_justify\n");
+ result = 0;
+ }
+#endif
+ if (result) {
+ result = load_flat_AttrList(input, &(tag->attributes), &(tag->number_of_attributes));
+ }
+ if (result) {
+ AttrType *myTypes;
+ int k, count;
+ char *next = get_line(input);
+
+ if (next != 0
+ && sscanf(next, "%d attr_types\n", &count)
+ && (myTypes = typecallocn(AttrType, (size_t) (count + 1)))
+ != 0) {
+ tag->attr_types = myTypes;
+ for (k = 0; k < count; ++k) {
+ next = get_line(input);
+ if (next != 0
+ && sscanf(next, "%s\n", name)) {
+ for (j = 0; allTypes[j].name != 0; ++j) {
+ if (!strcmp(allTypes[j].name, name)) {
+ myTypes[k].name = strdup(name);
+ myTypes[k].list = allTypes[j].list;
+ break;
+ }
+ }
+ } else {
+ result = 0;
+ break;
+ }
+ }
+ if (result && count > 1)
+ qsort(myTypes, count, sizeof(myTypes[0]), compare_attr_types);
+ }
+ }
+ if (result) {
+ char *next = get_line(input);
+
+ if (next != 0
+ && sscanf(next, "\t\tcontents: %s\n", name)) {
+ tag->contents = s2SGMLContent(name);
+ free(next);
+ } else {
+ fprintf(stderr, "Did not find contents\n");
+ result = 0;
+ }
+ }
+ if (result) {
+ result = load_flat_TagClass(input, "tagclass", &(tag->tagclass));
+ }
+ if (result) {
+ result = load_flat_TagClass(input, "contains", &(tag->contains));
+ }
+ if (result) {
+ result = load_flat_TagClass(input, "icontains", &(tag->icontains));
+ }
+ if (result) {
+ result = load_flat_TagClass(input, "contained", &(tag->contained));
+ }
+ if (result) {
+ result = load_flat_TagClass(input, "icontained", &(tag->icontained));
+ }
+ if (result) {
+ result = load_flat_TagClass(input, "canclose", &(tag->canclose));
+ }
+ if (result) {
+ result = load_flat_TagFlags(input, "flags", &(tag->flags));
+ }
+ } else {
+ fprintf(stderr, "load_flat_HTTag error\n");
+ }
+ return result;
+}
+
+static int load_flat_AttrType(FILE *input, AttrType * types, size_t ncmp)
+{
+ int result = 0;
+ int ntst;
+ char name[1024];
+
+ if (fscanf(input, "%d:%s\n", &ntst, name) == 2
+ && (ntst == (int) ncmp)) {
+ result = 1;
+ types->name = strdup(name);
+ if (!load_flat_AttrList(input, &(types->list), &ntst))
+ result = 0;
+ }
+ return result;
+}
+
+static SGML_dtd *load_flatfile(FILE *input)
+{
+ AttrType *attr_types = 0;
+ SGML_dtd *result = 0;
+ size_t n;
+ int number_of_attrs = 0;
+ int number_of_tags = 0;
+ HTTag *tag;
+ int code;
+
+ code = fscanf(input, "%d attr_types\n", &number_of_attrs);
+ if (code
+ && number_of_attrs
+ && (attr_types = typecallocn(AttrType, number_of_attrs + 1)) != 0) {
+ for (n = 0; n < (size_t) number_of_attrs; ++n) {
+ if (!load_flat_AttrType(input, attr_types + n, n)) {
+ break;
+ }
+ }
+ }
+
+ code = fscanf(input, "%d tags\n", &number_of_tags);
+ if (code == 1) {
+ if ((result = typecalloc(SGML_dtd)) != 0
+ && (result->tags = typecallocn(HTTag, (number_of_tags + 2))) != 0) {
+ for (n = 0; n < (size_t) number_of_tags; ++n) {
+ if (load_flat_HTTag(input, n, &(result->tags[n]), attr_types)) {
+ result->number_of_tags = (n + 1);
+ } else {
+ break;
+ }
+ }
+ tag = 0;
+ for (n = 0; n < (size_t) number_of_tags; ++n) {
+ if (result->tags[n].name != 0
+ && !strcmp(result->tags[n].name, "OBJECT")) {
+ tag = result->tags + number_of_tags;
+ *tag = result->tags[n];
+ tag->contents = SGML_MIXED;
+ tag->flags = Tgf_strict;
+ break;
+ }
+ }
+ if (tag == 0) {
+ fprintf(stderr, "Did not find OBJECT tag\n");
+ result = 0;
+ }
+ }
+ }
+ return result;
+}
+
+int main(int argc, char *argv[])
+{
+ const SGML_dtd *the_dtd = &HTML_dtd;
+ int ch;
+ int dtd_version = 0;
+ int c_option = FALSE;
+ int h_option = FALSE;
+ int l_option = FALSE;
+ FILE *input = stdin;
+ FILE *output = stdout;
+
+ while ((ch = getopt(argc, argv, GETOPT)) != -1) {
+ switch (ch) {
+ case 'c':
+ c_option = TRUE;
+ break;
+ case 'h':
+ h_option = TRUE;
+ break;
+ case 'l':
+ l_option = TRUE;
+ input = fopen(optarg, "r");
+ if (input == 0)
+ failed(optarg);
+ break;
+ case 'o':
+ output = fopen(optarg, "w");
+ if (output == 0)
+ failed(optarg);
+ break;
+ case 't':
+ dtd_version = 1;
+ break;
+ case 's':
+ dtd_version = 0;
+ break;
+ default:
+ usage();
+ }
+ }
+
+ HTSwitchDTD(dtd_version);
+ if (l_option)
+ the_dtd = load_flatfile(input);
+
+ if (the_dtd != 0) {
+ if (c_option)
+ dump_source(output, the_dtd, dtd_version);
+ if (h_option)
+ dump_header(output, the_dtd);
+ if (!c_option && !h_option)
+ dump_flatfile(output, the_dtd);
+ }
+
+ return EXIT_SUCCESS;
+}