334 lines
11 KiB
C
334 lines
11 KiB
C
/*
|
|
* $LynxId: HTMLDTD.c,v 1.58 2021/07/23 00:00:03 tom Exp $
|
|
*
|
|
* Our Static DTD for HTML
|
|
* -----------------------
|
|
*/
|
|
|
|
/* Implements:
|
|
*/
|
|
|
|
#include <HTUtils.h>
|
|
#include <HTMLDTD.h>
|
|
#include <LYLeaks.h>
|
|
#include <LYJustify.h>
|
|
|
|
/*
|
|
* Character entities like   now excluded from our DTD tables, they are
|
|
* mapped to Unicode and handled by chartrans code directly the similar way the
|
|
* numeric entities like { does. See src/chrtrans/entities.h for real
|
|
* mapping.
|
|
*/
|
|
|
|
/* Entity Names
|
|
* ------------
|
|
*
|
|
* This table must be matched exactly with ALL the translation tables
|
|
* (this is an obsolete translation mechanism, probably unused,
|
|
* currently replaced with Unicode chartrans in most cases...)
|
|
*/
|
|
static const char *entities[] =
|
|
{
|
|
"AElig", /* capital AE diphthong (ligature) */
|
|
"Aacute", /* capital A, acute accent */
|
|
"Acirc", /* capital A, circumflex accent */
|
|
"Agrave", /* capital A, grave accent */
|
|
"Aring", /* capital A, ring */
|
|
"Atilde", /* capital A, tilde */
|
|
"Auml", /* capital A, dieresis or umlaut mark */
|
|
"Ccedil", /* capital C, cedilla */
|
|
"Dstrok", /* capital Eth, Icelandic */
|
|
"ETH", /* capital Eth, Icelandic */
|
|
"Eacute", /* capital E, acute accent */
|
|
"Ecirc", /* capital E, circumflex accent */
|
|
"Egrave", /* capital E, grave accent */
|
|
"Euml", /* capital E, dieresis or umlaut mark */
|
|
"Iacute", /* capital I, acute accent */
|
|
"Icirc", /* capital I, circumflex accent */
|
|
"Igrave", /* capital I, grave accent */
|
|
"Iuml", /* capital I, dieresis or umlaut mark */
|
|
"Ntilde", /* capital N, tilde */
|
|
"Oacute", /* capital O, acute accent */
|
|
"Ocirc", /* capital O, circumflex accent */
|
|
"Ograve", /* capital O, grave accent */
|
|
"Oslash", /* capital O, slash */
|
|
"Otilde", /* capital O, tilde */
|
|
"Ouml", /* capital O, dieresis or umlaut mark */
|
|
"THORN", /* capital THORN, Icelandic */
|
|
"Uacute", /* capital U, acute accent */
|
|
"Ucirc", /* capital U, circumflex accent */
|
|
"Ugrave", /* capital U, grave accent */
|
|
"Uuml", /* capital U, dieresis or umlaut mark */
|
|
"Yacute", /* capital Y, acute accent */
|
|
"aacute", /* small a, acute accent */
|
|
"acirc", /* small a, circumflex accent */
|
|
"acute", /* spacing acute */
|
|
"aelig", /* small ae diphthong (ligature) */
|
|
"agrave", /* small a, grave accent */
|
|
"amp", /* ampersand */
|
|
"aring", /* small a, ring */
|
|
"atilde", /* small a, tilde */
|
|
"auml", /* small a, dieresis or umlaut mark */
|
|
"brkbar", /* broken vertical bar */
|
|
"brvbar", /* broken vertical bar */
|
|
"ccedil", /* small c, cedilla */
|
|
"cedil", /* spacing cedilla */
|
|
"cent", /* cent sign */
|
|
"copy", /* copyright sign */
|
|
"curren", /* currency sign */
|
|
"deg", /* degree sign */
|
|
"die", /* spacing dieresis */
|
|
"divide", /* division sign */
|
|
"eacute", /* small e, acute accent */
|
|
"ecirc", /* small e, circumflex accent */
|
|
"egrave", /* small e, grave accent */
|
|
"emdash", /* dash the width of emsp */
|
|
"emsp", /* em space - not collapsed */
|
|
"endash", /* dash the width of ensp */
|
|
"ensp", /* en space - not collapsed */
|
|
"eth", /* small eth, Icelandic */
|
|
"euml", /* small e, dieresis or umlaut mark */
|
|
"frac12", /* fraction 1/2 */
|
|
"frac14", /* fraction 1/4 */
|
|
"frac34", /* fraction 3/4 */
|
|
"gt", /* greater than */
|
|
"hibar", /* spacing macron */
|
|
"iacute", /* small i, acute accent */
|
|
"icirc", /* small i, circumflex accent */
|
|
"iexcl", /* inverted exclamation mark */
|
|
"igrave", /* small i, grave accent */
|
|
"iquest", /* inverted question mark */
|
|
"iuml", /* small i, dieresis or umlaut mark */
|
|
"laquo", /* angle quotation mark, left */
|
|
"lt", /* less than */
|
|
"macr", /* spacing macron */
|
|
"mdash", /* dash the width of emsp */
|
|
"micro", /* micro sign */
|
|
"middot", /* middle dot */
|
|
"nbsp", /* non breaking space */
|
|
"ndash", /* dash the width of ensp */
|
|
"not", /* negation sign */
|
|
"ntilde", /* small n, tilde */
|
|
"oacute", /* small o, acute accent */
|
|
"ocirc", /* small o, circumflex accent */
|
|
"ograve", /* small o, grave accent */
|
|
"ordf", /* feminine ordinal indicator */
|
|
"ordm", /* masculine ordinal indicator */
|
|
"oslash", /* small o, slash */
|
|
"otilde", /* small o, tilde */
|
|
"ouml", /* small o, dieresis or umlaut mark */
|
|
"para", /* paragraph sign */
|
|
"plusmn", /* plus-or-minus sign */
|
|
"pound", /* pound sign */
|
|
"quot", /* quote '"' */
|
|
"raquo", /* angle quotation mark, right */
|
|
"reg", /* circled R registered sign */
|
|
"sect", /* section sign */
|
|
"shy", /* soft hyphen */
|
|
"sup1", /* superscript 1 */
|
|
"sup2", /* superscript 2 */
|
|
"sup3", /* superscript 3 */
|
|
"szlig", /* small sharp s, German (sz ligature) */
|
|
"thinsp", /* thin space (not collapsed) */
|
|
"thorn", /* small thorn, Icelandic */
|
|
"times", /* multiplication sign */
|
|
"trade", /* trade mark sign (U+2122) */
|
|
"uacute", /* small u, acute accent */
|
|
"ucirc", /* small u, circumflex accent */
|
|
"ugrave", /* small u, grave accent */
|
|
"uml", /* spacing dieresis */
|
|
"uuml", /* small u, dieresis or umlaut mark */
|
|
"yacute", /* small y, acute accent */
|
|
"yen", /* yen sign */
|
|
"yuml", /* small y, dieresis or umlaut mark */
|
|
};
|
|
|
|
/* Attribute Lists
|
|
* ---------------
|
|
*
|
|
* Lists must be in alphabetical order by attribute name
|
|
* The tag elements contain the number of attributes
|
|
*/
|
|
|
|
/* From Peter Flynn's intro to the HTML Pro DTD:
|
|
|
|
%structure;
|
|
|
|
DIV, CENTER, H1 to H6, P, UL, OL, DL, DIR, MENU, PRE, XMP, LISTING, BLOCKQUOTE, BQ,
|
|
2 1 2 2 1 8 8 8 8 8 8 8 8 4 4
|
|
MULTICOL,?NOBR, FORM, TABLE, ADDRESS, FIG, BDO, NOTE, and FN; plus?WBR, LI, and LH
|
|
8 n ?1 n 8 8 2 2 2 2 2 ?1 nE 4 4
|
|
|
|
%insertions;
|
|
|
|
Elements which usually contain special-purpose material, or no text material at all.
|
|
|
|
BASEFONT, APPLET, OBJECT, EMBED, SCRIPT, MAP, MARQUEE, HR, ISINDEX, BGSOUND, TAB,?IMG,
|
|
1 e? 2 2 l 1 e 2 l 8 4 4 E 1? E 1 E ! E ?1 E
|
|
IMAGE, BR, plus NOEMBED, SERVER, SPACER, AUDIOSCOPE, and SIDEBAR; ?area
|
|
1 n 1 E n n n n n 8 E
|
|
|
|
%text;
|
|
|
|
Elements within the %structure; which directly contain running text.
|
|
|
|
Descriptive or analytic markup: EM, STRONG, DFN, CODE, SAMP, KBD, VAR, CITE, Q, LANG, AU,
|
|
2 2 2 2 2 2 2 2 2 2 n 2
|
|
AUTHOR, PERSON, ACRONYM, ABBR, INS, DEL, and SPAN
|
|
2 2 n 2 2 2 2 2
|
|
Visual markup:S, STRIKE, I, B, TT, U,?NOBR,?WBR, BR, BIG, SMALL, FONT, STYLE, BLINK, TAB,
|
|
1 1 1 1 1 1 ?1 n ?1nE? 1 E 1 1 1 1 l 1 1 E?
|
|
BLACKFACE, LIMITTEXT, NOSMARTQUOTES, and SHADOW
|
|
1 n 1 n 1 n 1 n
|
|
Hypertext and graphics: A and?IMG
|
|
8 ?8 E
|
|
Mathematical: SUB, SUP, and MATH
|
|
4 4 4 l
|
|
Documentary: COMMENT, ENTITY, ELEMENT, and ATTRIB
|
|
4 4 n 4 n 4 n
|
|
%formula;
|
|
*/
|
|
|
|
/* Elements
|
|
* --------
|
|
*
|
|
* Must match definitions in HTMLDTD.html!
|
|
* Must be in alphabetical order.
|
|
*
|
|
* The T_* extra info is listed here, even though most fields are not used
|
|
* in SGML.c if Old_DTD is set (with the exception of some Tgf_* flags).
|
|
* This simplifies comparison of the tags_table0[] table (otherwise unchanged
|
|
* from original Lynx treatment) with the tags_table1[] table below. - kw
|
|
*
|
|
* Name*, Attributes, No. of attributes, content, extra info...
|
|
*/
|
|
|
|
#include <src0_HTMLDTD.h>
|
|
#include <src1_HTMLDTD.h>
|
|
|
|
/* Dummy space, will be filled with the contents of either tags_table1
|
|
or tags_table0 on calling HTSwitchDTD - kw */
|
|
|
|
static HTTag tags[HTML_ALL_ELEMENTS];
|
|
|
|
const SGML_dtd HTML_dtd =
|
|
{
|
|
tags,
|
|
HTML_ELEMENTS,
|
|
entities, /* probably unused */
|
|
TABLESIZE(entities),
|
|
};
|
|
|
|
/* This function fills the "tags" part of the HTML_dtd structure with
|
|
what we want to use, either tags_table0 or tags_table1. Note that it
|
|
has to be called at least once before HTML_dtd is used, otherwise
|
|
the HTML_dtd contents will be invalid! This could be coded in a way
|
|
that would make an initialisation call unnecessary, but my C knowledge
|
|
is limited and I didn't want to list the whole tags_table1 table
|
|
twice... - kw */
|
|
void HTSwitchDTD(int new_flag)
|
|
{
|
|
if (TRACE)
|
|
CTRACE((tfp,
|
|
"HTMLDTD: Copying %s DTD element info of size %d, %d * %d\n",
|
|
new_flag ? "strict" : "tagsoup",
|
|
(int) (new_flag ? sizeof(tags_table1) : sizeof(tags_table0)),
|
|
HTML_ALL_ELEMENTS,
|
|
(int) sizeof(HTTag)));
|
|
if (new_flag)
|
|
MemCpy(tags, tags_table1, HTML_ALL_ELEMENTS * sizeof(HTTag));
|
|
else
|
|
MemCpy(tags, tags_table0, HTML_ALL_ELEMENTS * sizeof(HTTag));
|
|
}
|
|
|
|
HTTag HTTag_unrecognized =
|
|
|
|
{NULL_HTTag, NULL, 0, 0, SGML_EMPTY, T__UNREC_, 0, 0};
|
|
|
|
/*
|
|
* Utility Routine: Useful for people building HTML objects.
|
|
*/
|
|
|
|
/* Start anchor element
|
|
* --------------------
|
|
*
|
|
* It is kinda convenient to have a particulr routine for
|
|
* starting an anchor element, as everything else for HTML is
|
|
* simple anyway.
|
|
*/
|
|
struct _HTStructured {
|
|
HTStructuredClass *isa;
|
|
/* ... */
|
|
};
|
|
|
|
void HTStartAnchor(HTStructured * obj, const char *name,
|
|
const char *href)
|
|
{
|
|
BOOL present[HTML_A_ATTRIBUTES];
|
|
const char *value[HTML_A_ATTRIBUTES];
|
|
int i;
|
|
|
|
for (i = 0; i < HTML_A_ATTRIBUTES; i++)
|
|
present[i] = NO;
|
|
|
|
if (name && *name) {
|
|
present[HTML_A_NAME] = YES;
|
|
value[HTML_A_NAME] = (const char *) name;
|
|
}
|
|
if (href) {
|
|
present[HTML_A_HREF] = YES;
|
|
value[HTML_A_HREF] = (const char *) href;
|
|
}
|
|
|
|
(*obj->isa->start_element) (obj, HTML_A, present, value, -1, 0);
|
|
}
|
|
|
|
void HTStartAnchor5(HTStructured * obj, const char *name,
|
|
const char *href,
|
|
const char *linktype,
|
|
int tag_charset)
|
|
{
|
|
BOOL present[HTML_A_ATTRIBUTES];
|
|
const char *value[HTML_A_ATTRIBUTES];
|
|
int i;
|
|
|
|
for (i = 0; i < HTML_A_ATTRIBUTES; i++)
|
|
present[i] = NO;
|
|
|
|
if (name && *name) {
|
|
present[HTML_A_NAME] = YES;
|
|
value[HTML_A_NAME] = name;
|
|
}
|
|
if (href && *href) {
|
|
present[HTML_A_HREF] = YES;
|
|
value[HTML_A_HREF] = href;
|
|
}
|
|
if (linktype && *linktype) {
|
|
present[HTML_A_TYPE] = YES;
|
|
value[HTML_A_TYPE] = linktype;
|
|
}
|
|
|
|
(*obj->isa->start_element) (obj, HTML_A, present, value, tag_charset, 0);
|
|
}
|
|
|
|
void HTStartIsIndex(HTStructured * obj, const char *prompt,
|
|
const char *href)
|
|
{
|
|
BOOL present[HTML_ISINDEX_ATTRIBUTES];
|
|
const char *value[HTML_ISINDEX_ATTRIBUTES];
|
|
int i;
|
|
|
|
for (i = 0; i < HTML_ISINDEX_ATTRIBUTES; i++)
|
|
present[i] = NO;
|
|
|
|
if (prompt && *prompt) {
|
|
present[HTML_ISINDEX_PROMPT] = YES;
|
|
value[HTML_ISINDEX_PROMPT] = (const char *) prompt;
|
|
}
|
|
if (href) {
|
|
present[HTML_ISINDEX_HREF] = YES;
|
|
value[HTML_ISINDEX_HREF] = (const char *) href;
|
|
}
|
|
|
|
(*obj->isa->start_element) (obj, HTML_ISINDEX, present, value, -1, 0);
|
|
}
|