diff options
Diffstat (limited to 'WWW/Library/Implementation/HTMLDTD.c')
-rw-r--r-- | WWW/Library/Implementation/HTMLDTD.c | 334 |
1 files changed, 334 insertions, 0 deletions
diff --git a/WWW/Library/Implementation/HTMLDTD.c b/WWW/Library/Implementation/HTMLDTD.c new file mode 100644 index 0000000..382c141 --- /dev/null +++ b/WWW/Library/Implementation/HTMLDTD.c @@ -0,0 +1,334 @@ +/* + * $LynxId: HTMLDTD.c,v 1.58 2021/07/23 00:00:03 tom Exp $ + * + * Our Static DTD for HTML + * ----------------------- + */ + +/* Implements: +*/ + +#include <HTUtils.h> +#include <HTMLDTD.h> +#include <LYLeaks.h> +#include <LYJustify.h> + +/* + * Character entities like   now excluded from our DTD tables, they are + * mapped to Unicode and handled by chartrans code directly the similar way the + * numeric entities like { does. See src/chrtrans/entities.h for real + * mapping. + */ + +/* Entity Names + * ------------ + * + * This table must be matched exactly with ALL the translation tables + * (this is an obsolete translation mechanism, probably unused, + * currently replaced with Unicode chartrans in most cases...) + */ +static const char *entities[] = +{ + "AElig", /* capital AE diphthong (ligature) */ + "Aacute", /* capital A, acute accent */ + "Acirc", /* capital A, circumflex accent */ + "Agrave", /* capital A, grave accent */ + "Aring", /* capital A, ring */ + "Atilde", /* capital A, tilde */ + "Auml", /* capital A, dieresis or umlaut mark */ + "Ccedil", /* capital C, cedilla */ + "Dstrok", /* capital Eth, Icelandic */ + "ETH", /* capital Eth, Icelandic */ + "Eacute", /* capital E, acute accent */ + "Ecirc", /* capital E, circumflex accent */ + "Egrave", /* capital E, grave accent */ + "Euml", /* capital E, dieresis or umlaut mark */ + "Iacute", /* capital I, acute accent */ + "Icirc", /* capital I, circumflex accent */ + "Igrave", /* capital I, grave accent */ + "Iuml", /* capital I, dieresis or umlaut mark */ + "Ntilde", /* capital N, tilde */ + "Oacute", /* capital O, acute accent */ + "Ocirc", /* capital O, circumflex accent */ + "Ograve", /* capital O, grave accent */ + "Oslash", /* capital O, slash */ + "Otilde", /* capital O, tilde */ + "Ouml", /* capital O, dieresis or umlaut mark */ + "THORN", /* capital THORN, Icelandic */ + "Uacute", /* capital U, acute accent */ + "Ucirc", /* capital U, circumflex accent */ + "Ugrave", /* capital U, grave accent */ + "Uuml", /* capital U, dieresis or umlaut mark */ + "Yacute", /* capital Y, acute accent */ + "aacute", /* small a, acute accent */ + "acirc", /* small a, circumflex accent */ + "acute", /* spacing acute */ + "aelig", /* small ae diphthong (ligature) */ + "agrave", /* small a, grave accent */ + "amp", /* ampersand */ + "aring", /* small a, ring */ + "atilde", /* small a, tilde */ + "auml", /* small a, dieresis or umlaut mark */ + "brkbar", /* broken vertical bar */ + "brvbar", /* broken vertical bar */ + "ccedil", /* small c, cedilla */ + "cedil", /* spacing cedilla */ + "cent", /* cent sign */ + "copy", /* copyright sign */ + "curren", /* currency sign */ + "deg", /* degree sign */ + "die", /* spacing dieresis */ + "divide", /* division sign */ + "eacute", /* small e, acute accent */ + "ecirc", /* small e, circumflex accent */ + "egrave", /* small e, grave accent */ + "emdash", /* dash the width of emsp */ + "emsp", /* em space - not collapsed */ + "endash", /* dash the width of ensp */ + "ensp", /* en space - not collapsed */ + "eth", /* small eth, Icelandic */ + "euml", /* small e, dieresis or umlaut mark */ + "frac12", /* fraction 1/2 */ + "frac14", /* fraction 1/4 */ + "frac34", /* fraction 3/4 */ + "gt", /* greater than */ + "hibar", /* spacing macron */ + "iacute", /* small i, acute accent */ + "icirc", /* small i, circumflex accent */ + "iexcl", /* inverted exclamation mark */ + "igrave", /* small i, grave accent */ + "iquest", /* inverted question mark */ + "iuml", /* small i, dieresis or umlaut mark */ + "laquo", /* angle quotation mark, left */ + "lt", /* less than */ + "macr", /* spacing macron */ + "mdash", /* dash the width of emsp */ + "micro", /* micro sign */ + "middot", /* middle dot */ + "nbsp", /* non breaking space */ + "ndash", /* dash the width of ensp */ + "not", /* negation sign */ + "ntilde", /* small n, tilde */ + "oacute", /* small o, acute accent */ + "ocirc", /* small o, circumflex accent */ + "ograve", /* small o, grave accent */ + "ordf", /* feminine ordinal indicator */ + "ordm", /* masculine ordinal indicator */ + "oslash", /* small o, slash */ + "otilde", /* small o, tilde */ + "ouml", /* small o, dieresis or umlaut mark */ + "para", /* paragraph sign */ + "plusmn", /* plus-or-minus sign */ + "pound", /* pound sign */ + "quot", /* quote '"' */ + "raquo", /* angle quotation mark, right */ + "reg", /* circled R registered sign */ + "sect", /* section sign */ + "shy", /* soft hyphen */ + "sup1", /* superscript 1 */ + "sup2", /* superscript 2 */ + "sup3", /* superscript 3 */ + "szlig", /* small sharp s, German (sz ligature) */ + "thinsp", /* thin space (not collapsed) */ + "thorn", /* small thorn, Icelandic */ + "times", /* multiplication sign */ + "trade", /* trade mark sign (U+2122) */ + "uacute", /* small u, acute accent */ + "ucirc", /* small u, circumflex accent */ + "ugrave", /* small u, grave accent */ + "uml", /* spacing dieresis */ + "uuml", /* small u, dieresis or umlaut mark */ + "yacute", /* small y, acute accent */ + "yen", /* yen sign */ + "yuml", /* small y, dieresis or umlaut mark */ +}; + +/* Attribute Lists + * --------------- + * + * Lists must be in alphabetical order by attribute name + * The tag elements contain the number of attributes + */ + +/* From Peter Flynn's intro to the HTML Pro DTD: + + %structure; + + DIV, CENTER, H1 to H6, P, UL, OL, DL, DIR, MENU, PRE, XMP, LISTING, BLOCKQUOTE, BQ, + 2 1 2 2 1 8 8 8 8 8 8 8 8 4 4 + MULTICOL,?NOBR, FORM, TABLE, ADDRESS, FIG, BDO, NOTE, and FN; plus?WBR, LI, and LH + 8 n ?1 n 8 8 2 2 2 2 2 ?1 nE 4 4 + + %insertions; + + Elements which usually contain special-purpose material, or no text material at all. + + BASEFONT, APPLET, OBJECT, EMBED, SCRIPT, MAP, MARQUEE, HR, ISINDEX, BGSOUND, TAB,?IMG, + 1 e? 2 2 l 1 e 2 l 8 4 4 E 1? E 1 E ! E ?1 E + IMAGE, BR, plus NOEMBED, SERVER, SPACER, AUDIOSCOPE, and SIDEBAR; ?area + 1 n 1 E n n n n n 8 E + + %text; + + Elements within the %structure; which directly contain running text. + + Descriptive or analytic markup: EM, STRONG, DFN, CODE, SAMP, KBD, VAR, CITE, Q, LANG, AU, + 2 2 2 2 2 2 2 2 2 2 n 2 + AUTHOR, PERSON, ACRONYM, ABBR, INS, DEL, and SPAN + 2 2 n 2 2 2 2 2 + Visual markup:S, STRIKE, I, B, TT, U,?NOBR,?WBR, BR, BIG, SMALL, FONT, STYLE, BLINK, TAB, + 1 1 1 1 1 1 ?1 n ?1nE? 1 E 1 1 1 1 l 1 1 E? + BLACKFACE, LIMITTEXT, NOSMARTQUOTES, and SHADOW + 1 n 1 n 1 n 1 n + Hypertext and graphics: A and?IMG + 8 ?8 E + Mathematical: SUB, SUP, and MATH + 4 4 4 l + Documentary: COMMENT, ENTITY, ELEMENT, and ATTRIB + 4 4 n 4 n 4 n + %formula; + */ + +/* Elements + * -------- + * + * Must match definitions in HTMLDTD.html! + * Must be in alphabetical order. + * + * The T_* extra info is listed here, even though most fields are not used + * in SGML.c if Old_DTD is set (with the exception of some Tgf_* flags). + * This simplifies comparison of the tags_table0[] table (otherwise unchanged + * from original Lynx treatment) with the tags_table1[] table below. - kw + * + * Name*, Attributes, No. of attributes, content, extra info... + */ + +#include <src0_HTMLDTD.h> +#include <src1_HTMLDTD.h> + +/* Dummy space, will be filled with the contents of either tags_table1 + or tags_table0 on calling HTSwitchDTD - kw */ + +static HTTag tags[HTML_ALL_ELEMENTS]; + +const SGML_dtd HTML_dtd = +{ + tags, + HTML_ELEMENTS, + entities, /* probably unused */ + TABLESIZE(entities), +}; + +/* This function fills the "tags" part of the HTML_dtd structure with + what we want to use, either tags_table0 or tags_table1. Note that it + has to be called at least once before HTML_dtd is used, otherwise + the HTML_dtd contents will be invalid! This could be coded in a way + that would make an initialisation call unnecessary, but my C knowledge + is limited and I didn't want to list the whole tags_table1 table + twice... - kw */ +void HTSwitchDTD(int new_flag) +{ + if (TRACE) + CTRACE((tfp, + "HTMLDTD: Copying %s DTD element info of size %d, %d * %d\n", + new_flag ? "strict" : "tagsoup", + (int) (new_flag ? sizeof(tags_table1) : sizeof(tags_table0)), + HTML_ALL_ELEMENTS, + (int) sizeof(HTTag))); + if (new_flag) + MemCpy(tags, tags_table1, HTML_ALL_ELEMENTS * sizeof(HTTag)); + else + MemCpy(tags, tags_table0, HTML_ALL_ELEMENTS * sizeof(HTTag)); +} + +HTTag HTTag_unrecognized = + +{NULL_HTTag, NULL, 0, 0, SGML_EMPTY, T__UNREC_, 0, 0}; + +/* + * Utility Routine: Useful for people building HTML objects. + */ + +/* Start anchor element + * -------------------- + * + * It is kinda convenient to have a particulr routine for + * starting an anchor element, as everything else for HTML is + * simple anyway. + */ +struct _HTStructured { + HTStructuredClass *isa; + /* ... */ +}; + +void HTStartAnchor(HTStructured * obj, const char *name, + const char *href) +{ + BOOL present[HTML_A_ATTRIBUTES]; + const char *value[HTML_A_ATTRIBUTES]; + int i; + + for (i = 0; i < HTML_A_ATTRIBUTES; i++) + present[i] = NO; + + if (name && *name) { + present[HTML_A_NAME] = YES; + value[HTML_A_NAME] = (const char *) name; + } + if (href) { + present[HTML_A_HREF] = YES; + value[HTML_A_HREF] = (const char *) href; + } + + (*obj->isa->start_element) (obj, HTML_A, present, value, -1, 0); +} + +void HTStartAnchor5(HTStructured * obj, const char *name, + const char *href, + const char *linktype, + int tag_charset) +{ + BOOL present[HTML_A_ATTRIBUTES]; + const char *value[HTML_A_ATTRIBUTES]; + int i; + + for (i = 0; i < HTML_A_ATTRIBUTES; i++) + present[i] = NO; + + if (name && *name) { + present[HTML_A_NAME] = YES; + value[HTML_A_NAME] = name; + } + if (href && *href) { + present[HTML_A_HREF] = YES; + value[HTML_A_HREF] = href; + } + if (linktype && *linktype) { + present[HTML_A_TYPE] = YES; + value[HTML_A_TYPE] = linktype; + } + + (*obj->isa->start_element) (obj, HTML_A, present, value, tag_charset, 0); +} + +void HTStartIsIndex(HTStructured * obj, const char *prompt, + const char *href) +{ + BOOL present[HTML_ISINDEX_ATTRIBUTES]; + const char *value[HTML_ISINDEX_ATTRIBUTES]; + int i; + + for (i = 0; i < HTML_ISINDEX_ATTRIBUTES; i++) + present[i] = NO; + + if (prompt && *prompt) { + present[HTML_ISINDEX_PROMPT] = YES; + value[HTML_ISINDEX_PROMPT] = (const char *) prompt; + } + if (href) { + present[HTML_ISINDEX_HREF] = YES; + value[HTML_ISINDEX_HREF] = (const char *) href; + } + + (*obj->isa->start_element) (obj, HTML_ISINDEX, present, value, -1, 0); +} |