diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-04 11:33:32 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-04 11:33:32 +0000 |
commit | 1f403ad2197fc7442409f434ee574f3e6b46fb73 (patch) | |
tree | 0299c6dd11d5edfa918a29b6456bc1875f1d288c /tests/dtds | |
parent | Initial commit. (diff) | |
download | pygments-1f403ad2197fc7442409f434ee574f3e6b46fb73.tar.xz pygments-1f403ad2197fc7442409f434ee574f3e6b46fb73.zip |
Adding upstream version 2.14.0+dfsg.upstream/2.14.0+dfsgupstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'tests/dtds')
-rw-r--r-- | tests/dtds/HTML4-f.dtd | 37 | ||||
-rw-r--r-- | tests/dtds/HTML4-s.dtd | 869 | ||||
-rw-r--r-- | tests/dtds/HTML4.dcl | 88 | ||||
-rw-r--r-- | tests/dtds/HTML4.dtd | 1092 | ||||
-rw-r--r-- | tests/dtds/HTML4.soc | 9 | ||||
-rw-r--r-- | tests/dtds/HTMLlat1.ent | 195 | ||||
-rw-r--r-- | tests/dtds/HTMLspec.ent | 77 | ||||
-rw-r--r-- | tests/dtds/HTMLsym.ent | 241 |
8 files changed, 2608 insertions, 0 deletions
diff --git a/tests/dtds/HTML4-f.dtd b/tests/dtds/HTML4-f.dtd new file mode 100644 index 0000000..9552012 --- /dev/null +++ b/tests/dtds/HTML4-f.dtd @@ -0,0 +1,37 @@ +<!-- + This is the HTML 4.0 Frameset DTD, which should be + used for documents with frames. This DTD is identical + to the HTML 4.0 Transitional DTD except for the + content model of the "HTML" element: in frameset + documents, the "FRAMESET" element replaces the "BODY" + element. + + Draft: $Date: 1999/05/02 15:37:15 $ + + Authors: + Dave Raggett <dsr@w3.org> + Arnaud Le Hors <lehors@w3.org> + Ian Jacobs <ij@w3.org> + + Further information about HTML 4.0 is available at: + + http://www.w3.org/TR/REC-html40. +--> +<!ENTITY % HTML.Version "-//W3C//DTD HTML 4.0 Frameset//EN" + -- Typical usage: + + <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Frameset//EN" + "http://www.w3.org/TR/REC-html40/frameset.dtd"> + <html> + <head> + ... + </head> + <frameset> + ... + </frameset> + </html> +--> + +<!ENTITY % HTML.Frameset "INCLUDE"> +<!ENTITY % HTML4.dtd PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN"> +%HTML4.dtd;
\ No newline at end of file diff --git a/tests/dtds/HTML4-s.dtd b/tests/dtds/HTML4-s.dtd new file mode 100644 index 0000000..8ce7917 --- /dev/null +++ b/tests/dtds/HTML4-s.dtd @@ -0,0 +1,869 @@ +<!-- + This is HTML 4.0 Strict DTD, which excludes the presentation + attributes and elements that W3C expects to phase out as + support for style sheets matures. Authors should use the Strict + DTD when possible, but may use the Transitional DTD when support + for presentation attribute and elements is required. + + HTML 4.0 includes mechanisms for style sheets, scripting, + embedding objects, improved support for right to left and mixed + direction text, and enhancements to forms for improved + accessibility for people with disabilities. + + Draft: $Date: 1999/05/02 15:37:15 $ + + Authors: + Dave Raggett <dsr@w3.org> + Arnaud Le Hors <lehors@w3.org> + Ian Jacobs <ij@w3.org> + + Further information about HTML 4.0 is available at: + + http://www.w3.org/TR/REC-html40 +--> +<!-- + Typical usage: + + <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0//EN" + "http://www.w3.org/TR/REC-html40/strict.dtd"> + <html> + <head> + ... + </head> + <body> + ... + </body> + </html> + + The URI used as a system identifier with the public identifier allows + the user agent to download the DTD and entity sets as needed. + + The FPI for the Transitional HTML 4.0 DTD is: + + "-//W3C//DTD HTML 4.0 Transitional//EN + + and its URI is: + + http://www.w3.org/TR/REC-html40/loose.dtd + + If you are writing a document that includes frames, use + the following FPI: + + "-//W3C//DTD HTML 4.0 Frameset//EN" + + with the URI: + + http://www.w3.org/TR/REC-html40/frameset.dtd + + The following URIs are supported in relation to HTML 4.0 + + "http://www.w3.org/TR/REC-html40/strict.dtd" (Strict DTD) + "http://www.w3.org/TR/REC-html40/loose.dtd" (Loose DTD) + "http://www.w3.org/TR/REC-html40/frameset.dtd" (Frameset DTD) + "http://www.w3.org/TR/REC-html40/HTMLlat1.ent" (Latin-1 entities) + "http://www.w3.org/TR/REC-html40/HTMLsymbol.ent" (Symbol entities) + "http://www.w3.org/TR/REC-html40/HTMLspecial.ent" (Special entities) + + These URIs point to the latest version of each file. To reference + this specific revision use the following URIs: + + "http://www.w3.org/TR/REC-html40-971218/strict.dtd" + "http://www.w3.org/TR/REC-html40-971218/loose.dtd" + "http://www.w3.org/TR/REC-html40-971218/frameset.dtd" + "http://www.w3.org/TR/REC-html40-971218/HTMLlat1.ent" + "http://www.w3.org/TR/REC-html40-971218/HTMLsymbol.ent" + "http://www.w3.org/TR/REC-html40-971218/HTMLspecial.ent" + +--> + +<!--================== Imported Names ====================================--> + +<!ENTITY % ContentType "CDATA" + -- media type, as per [RFC2045] + --> + +<!ENTITY % ContentTypes "CDATA" + -- comma-separated list of media types, as per [RFC2045] + --> + +<!ENTITY % Charset "CDATA" + -- a character encoding, as per [RFC2045] + --> + +<!ENTITY % Charsets "CDATA" + -- a space separated list of character encodings, as per [RFC2045] + --> + +<!ENTITY % LanguageCode "NAME" + -- a language code, as per [RFC1766] + --> + +<!ENTITY % Character "CDATA" + -- a single character from [ISO10646] + --> + +<!ENTITY % LinkTypes "CDATA" + -- space-separated list of link types + --> + +<!ENTITY % MediaDesc "CDATA" + -- single or comma-separated list of media descriptors + --> + +<!ENTITY % URI "CDATA" + -- a Uniform Resource Identifier, + see [URI] + --> + +<!ENTITY % Datetime "CDATA" -- date and time information. ISO date format --> + + +<!ENTITY % Script "CDATA" -- script expression --> + +<!ENTITY % StyleSheet "CDATA" -- style sheet data --> + + + +<!ENTITY % Text "CDATA"> + + +<!-- Parameter Entities --> + +<!ENTITY % head.misc "SCRIPT|STYLE|META|LINK|OBJECT" -- repeatable head elements --> + +<!ENTITY % heading "H1|H2|H3|H4|H5|H6"> + +<!ENTITY % list "UL | OL"> + +<!ENTITY % preformatted "PRE"> + + +<!--================ Character mnemonic entities =========================--> + +<!ENTITY % HTMLlat1 PUBLIC + "-//W3C//ENTITIES Latin1//EN//HTML" + "http://www.w3.org/TR/REC-html40-971218/HTMLlat1.ent"> +%HTMLlat1; + +<!ENTITY % HTMLsymbol PUBLIC + "-//W3C//ENTITIES Symbols//EN//HTML" + "http://www.w3.org/TR/REC-html40-971218/HTMLsymbol.ent"> +%HTMLsymbol; + +<!ENTITY % HTMLspecial PUBLIC + "-//W3C//ENTITIES Special//EN//HTML" + "http://www.w3.org/TR/REC-html40-971218/HTMLspecial.ent"> +%HTMLspecial; +<!--=================== Generic Attributes ===============================--> + +<!ENTITY % coreattrs + "id ID #IMPLIED -- document-wide unique id -- + class CDATA #IMPLIED -- space separated list of classes -- + style %StyleSheet; #IMPLIED -- associated style info -- + title %Text; #IMPLIED -- advisory title/amplification --" + > + +<!ENTITY % i18n + "lang %LanguageCode; #IMPLIED -- language code -- + dir (ltr|rtl) #IMPLIED -- direction for weak/neutral text --" + > + +<!ENTITY % events + "onclick %Script; #IMPLIED -- a pointer button was clicked -- + ondblclick %Script; #IMPLIED -- a pointer button was double clicked-- + onmousedown %Script; #IMPLIED -- a pointer button was pressed down -- + onmouseup %Script; #IMPLIED -- a pointer button was released -- + onmouseover %Script; #IMPLIED -- a pointer was moved onto -- + onmousemove %Script; #IMPLIED -- a pointer was moved within -- + onmouseout %Script; #IMPLIED -- a pointer was moved away -- + onkeypress %Script; #IMPLIED -- a key was pressed and released -- + onkeydown %Script; #IMPLIED -- a key was pressed down -- + onkeyup %Script; #IMPLIED -- a key was released --" + > + +<!-- Reserved Feature Switch --> +<!ENTITY % HTML.Reserved "IGNORE"> + +<!-- The following attributes are reserved for possible future use --> +<![ %HTML.Reserved; [ +<!ENTITY % reserved + "datasrc %URI; #IMPLIED -- a single or tabular Data Source -- + datafld CDATA #IMPLIED -- the property or column name -- + dataformatas (plaintext|html) plaintext -- text or html --" + > +]]> + +<!ENTITY % reserved ""> + +<!ENTITY % attrs "%coreattrs; %i18n; %events;"> + + +<!--=================== Text Markup ======================================--> + +<!ENTITY % fontstyle + "TT | I | B | BIG | SMALL"> + +<!ENTITY % phrase "EM | STRONG | DFN | CODE | + SAMP | KBD | VAR | CITE | ABBR | ACRONYM" > + +<!ENTITY % special + "A | IMG | OBJECT | BR | SCRIPT | MAP | Q | SUB | SUP | SPAN | BDO"> + +<!ENTITY % formctrl "INPUT | SELECT | TEXTAREA | LABEL | BUTTON"> + +<!-- %inline; covers inline or "text-level" elements --> +<!ENTITY % inline "#PCDATA | %fontstyle; | %phrase; | %special; | %formctrl;"> + +<!ELEMENT (%fontstyle;|%phrase;) - - (%inline;)*> +<!ATTLIST (%fontstyle;|%phrase;) + %attrs; -- %coreattrs, %i18n, %events -- + > + +<!ELEMENT (SUB|SUP) - - (%inline;)* -- subscript, superscript --> +<!ATTLIST (SUB|SUP) + %attrs; -- %coreattrs, %i18n, %events -- + > + +<!ELEMENT SPAN - - (%inline;)* -- generic language/style container --> +<!ATTLIST SPAN + %attrs; -- %coreattrs, %i18n, %events -- + %reserved; -- reserved for possible future use -- + > + +<!ELEMENT BDO - - (%inline;)* -- I18N BiDi over-ride --> +<!ATTLIST BDO + %coreattrs; -- id, class, style, title -- + lang %LanguageCode; #IMPLIED -- language code -- + dir (ltr|rtl) #REQUIRED -- directionality -- + > + + +<!ELEMENT BR - O EMPTY -- forced line break --> +<!ATTLIST BR + %coreattrs; -- id, class, style, title -- + > + +<!--================== HTML content models ===============================--> + +<!-- + HTML has two basic content models: + + %inline; character level elements and text strings + %block; block-like elements e.g. paragraphs and lists +--> + +<!ENTITY % block + "P | %heading; | %list; | %preformatted; | DL | DIV | NOSCRIPT | + BLOCKQUOTE | FORM | HR | TABLE | FIELDSET | ADDRESS"> + +<!ENTITY % flow "%block; | %inline;"> + +<!--=================== Document Body ====================================--> + +<!ELEMENT BODY O O (%block;|SCRIPT)+ +(INS|DEL) -- document body --> +<!ATTLIST BODY + %attrs; -- %coreattrs, %i18n, %events -- + onload %Script; #IMPLIED -- the document has been loaded -- + onunload %Script; #IMPLIED -- the document has been removed -- + > + +<!ELEMENT ADDRESS - - (%inline;)* -- information on author --> +<!ATTLIST ADDRESS + %attrs; -- %coreattrs, %i18n, %events -- + > + +<!ELEMENT DIV - - (%flow;)* -- generic language/style container --> +<!ATTLIST DIV + %attrs; -- %coreattrs, %i18n, %events -- + %reserved; -- reserved for possible future use -- + > + + +<!--================== The Anchor Element ================================--> + +<!ENTITY % Shape "(rect|circle|poly|default)"> +<!ENTITY % Coords "CDATA" -- comma separated list of lengths --> + +<!ELEMENT A - - (%inline;)* -(A) -- anchor --> +<!ATTLIST A + %attrs; -- %coreattrs, %i18n, %events -- + charset %Charset; #IMPLIED -- char encoding of linked resource -- + type %ContentType; #IMPLIED -- advisory content type -- + name CDATA #IMPLIED -- named link end -- + href %URI; #IMPLIED -- URI for linked resource -- + hreflang %LanguageCode; #IMPLIED -- language code -- + rel %LinkTypes; #IMPLIED -- forward link types -- + rev %LinkTypes; #IMPLIED -- reverse link types -- + accesskey %Character; #IMPLIED -- accessibility key character -- + shape %Shape; rect -- for use with client-side image maps -- + coords %Coords; #IMPLIED -- for use with client-side image maps -- + tabindex NUMBER #IMPLIED -- position in tabbing order -- + onfocus %Script; #IMPLIED -- the element got the focus -- + onblur %Script; #IMPLIED -- the element lost the focus -- + > + +<!--================== Client-side image maps ============================--> + +<!-- These can be placed in the same document or grouped in a + separate document although this isn't yet widely supported --> + +<!ELEMENT MAP - - ((%block;)+ | AREA+) -- client-side image map --> +<!ATTLIST MAP + %attrs; -- %coreattrs, %i18n, %events -- + name CDATA #REQUIRED -- for reference by usemap -- + > + +<!ELEMENT AREA - O EMPTY -- client-side image map area --> +<!ATTLIST AREA + %attrs; -- %coreattrs, %i18n, %events -- + shape %Shape; rect -- controls interpretation of coords -- + coords %Coords; #IMPLIED -- comma separated list of lengths -- + href %URI; #IMPLIED -- URI for linked resource -- + nohref (nohref) #IMPLIED -- this region has no action -- + alt %Text; #REQUIRED -- short description -- + tabindex NUMBER #IMPLIED -- position in tabbing order -- + accesskey %Character; #IMPLIED -- accessibility key character -- + onfocus %Script; #IMPLIED -- the element got the focus -- + onblur %Script; #IMPLIED -- the element lost the focus -- + > + +<!--================== The LINK Element ==================================--> + +<!-- + Relationship values can be used in principle: + + a) for document specific toolbars/menus when used + with the LINK element in document head e.g. + start, contents, previous, next, index, end, help + b) to link to a separate style sheet (rel=stylesheet) + c) to make a link to a script (rel=script) + d) by stylesheets to control how collections of + html nodes are rendered into printed documents + e) to make a link to a printable version of this document + e.g. a postscript or pdf version (rel=alternate media=print) +--> + +<!ELEMENT LINK - O EMPTY -- a media-independent link --> +<!ATTLIST LINK + %attrs; -- %coreattrs, %i18n, %events -- + charset %Charset; #IMPLIED -- char encoding of linked resource -- + href %URI; #IMPLIED -- URI for linked resource -- + hreflang %LanguageCode; #IMPLIED -- language code -- + type %ContentType; #IMPLIED -- advisory content type -- + rel %LinkTypes; #IMPLIED -- forward link types -- + rev %LinkTypes; #IMPLIED -- reverse link types -- + media %MediaDesc; #IMPLIED -- for rendering on these media -- + > + +<!--=================== Images ===========================================--> + +<!-- Length defined in strict DTD for cellpadding/cellspacing --> +<!ENTITY % Length "CDATA" -- nn for pixels or nn% for percentage length --> +<!ENTITY % MultiLength "CDATA" -- pixel, percentage, or relative --> + +<!ENTITY % MultiLengths "CDATA" -- comma-separated list of MultiLength --> + +<!ENTITY % Pixels "CDATA" -- integer representing length in pixels --> + + +<!-- To avoid problems with text-only UAs as well as + to make image content understandable and navigable + to users of non-visual UAs, you need to provide + a description with ALT, and avoid server-side image maps --> +<!ELEMENT IMG - O EMPTY -- Embedded image --> +<!ATTLIST IMG + %attrs; -- %coreattrs, %i18n, %events -- + src %URI; #REQUIRED -- URI of image to embed -- + alt %Text; #REQUIRED -- short description -- + longdesc %URI; #IMPLIED -- link to long description + (complements alt) -- + height %Length; #IMPLIED -- override height -- + width %Length; #IMPLIED -- override width -- + usemap %URI; #IMPLIED -- use client-side image map -- + ismap (ismap) #IMPLIED -- use server-side image map -- + > + +<!-- USEMAP points to a MAP element which may be in this document + or an external document, although the latter is not widely supported --> + +<!--==================== OBJECT ======================================--> +<!-- + OBJECT is used to embed objects as part of HTML pages + PARAM elements should precede other content. SGML mixed content + model technicality precludes specifying this formally ... +--> + +<!ELEMENT OBJECT - - (PARAM | %flow;)* + -- generic embedded object --> +<!ATTLIST OBJECT + %attrs; -- %coreattrs, %i18n, %events -- + declare (declare) #IMPLIED -- declare but don't instantiate flag -- + classid %URI; #IMPLIED -- identifies an implementation -- + codebase %URI; #IMPLIED -- base URI for classid, data, archive-- + data %URI; #IMPLIED -- reference to object's data -- + type %ContentType; #IMPLIED -- content type for data -- + codetype %ContentType; #IMPLIED -- content type for code -- + archive %URI; #IMPLIED -- space separated archive list -- + standby %Text; #IMPLIED -- message to show while loading -- + height %Length; #IMPLIED -- override height -- + width %Length; #IMPLIED -- override width -- + usemap %URI; #IMPLIED -- use client-side image map -- + name CDATA #IMPLIED -- submit as part of form -- + tabindex NUMBER #IMPLIED -- position in tabbing order -- + %reserved; -- reserved for possible future use -- + > + +<!ELEMENT PARAM - O EMPTY -- named property value --> +<!ATTLIST PARAM + id ID #IMPLIED -- document-wide unique id -- + name CDATA #REQUIRED -- property name -- + value CDATA #IMPLIED -- property value -- + valuetype (DATA|REF|OBJECT) DATA -- How to interpret value -- + type %ContentType; #IMPLIED -- content type for value + when valuetype=ref -- + > + + +<!--=================== Horizontal Rule ==================================--> + +<!ELEMENT HR - O EMPTY -- horizontal rule --> +<!ATTLIST HR + %coreattrs; -- id, class, style, title -- + %events; + > + +<!--=================== Paragraphs =======================================--> + +<!ELEMENT P - O (%inline;)* -- paragraph --> +<!ATTLIST P + %attrs; -- %coreattrs, %i18n, %events -- + > + +<!--=================== Headings =========================================--> + +<!-- + There are six levels of headings from H1 (the most important) + to H6 (the least important). +--> + +<!ELEMENT (%heading;) - - (%inline;)* -- heading --> +<!ATTLIST (%heading;) + %attrs; -- %coreattrs, %i18n, %events -- + > + +<!--=================== Preformatted Text ================================--> + +<!-- excludes markup for images and changes in font size --> +<!ENTITY % pre.exclusion "IMG|OBJECT|BIG|SMALL|SUB|SUP"> + +<!ELEMENT PRE - - (%inline;)* -(%pre.exclusion;) -- preformatted text --> +<!ATTLIST PRE + %attrs; -- %coreattrs, %i18n, %events -- + > + +<!--===================== Inline Quotes ==================================--> + +<!ELEMENT Q - - (%inline;)* -- short inline quotation --> +<!ATTLIST Q + %attrs; -- %coreattrs, %i18n, %events -- + cite %URI; #IMPLIED -- URI for source document or msg -- + > + +<!--=================== Block-like Quotes ================================--> + +<!ELEMENT BLOCKQUOTE - - (%block;|SCRIPT)+ -- long quotation --> +<!ATTLIST BLOCKQUOTE + %attrs; -- %coreattrs, %i18n, %events -- + cite %URI; #IMPLIED -- URI for source document or msg -- + > + +<!--=================== Inserted/Deleted Text ============================--> + + +<!-- INS/DEL are handled by inclusion on BODY --> +<!ELEMENT (INS|DEL) - - (%flow;)* -- inserted text, deleted text --> +<!ATTLIST (INS|DEL) + %attrs; -- %coreattrs, %i18n, %events -- + cite %URI; #IMPLIED -- info on reason for change -- + datetime %Datetime; #IMPLIED -- date and time of change -- + > + +<!--=================== Lists ============================================--> + +<!-- definition lists - DT for term, DD for its definition --> + +<!ELEMENT DL - - (DT|DD)+ -- definition list --> +<!ATTLIST DL + %attrs; -- %coreattrs, %i18n, %events -- + > + +<!ELEMENT DT - O (%inline;)* -- definition term --> +<!ELEMENT DD - O (%flow;)* -- definition description --> +<!ATTLIST (DT|DD) + %attrs; -- %coreattrs, %i18n, %events -- + > + + +<!ELEMENT OL - - (LI)+ -- ordered list --> +<!ATTLIST OL + %attrs; -- %coreattrs, %i18n, %events -- + > + +<!-- Unordered Lists (UL) bullet styles --> +<!ELEMENT UL - - (LI)+ -- unordered list --> +<!ATTLIST UL + %attrs; -- %coreattrs, %i18n, %events -- + > + + + +<!ELEMENT LI - O (%flow;)* -- list item --> +<!ATTLIST LI + %attrs; -- %coreattrs, %i18n, %events -- + > + +<!--================ Forms ===============================================--> +<!ELEMENT FORM - - (%block;|SCRIPT)+ -(FORM) -- interactive form --> +<!ATTLIST FORM + %attrs; -- %coreattrs, %i18n, %events -- + action %URI; #REQUIRED -- server-side form handler -- + method (GET|POST) GET -- HTTP method used to submit the form-- + enctype %ContentType; "application/x-www-form-urlencoded" + onsubmit %Script; #IMPLIED -- the form was submitted -- + onreset %Script; #IMPLIED -- the form was reset -- + accept-charset %Charsets; #IMPLIED -- list of supported charsets -- + > + +<!-- Each label must not contain more than ONE field --> +<!ELEMENT LABEL - - (%inline;)* -(LABEL) -- form field label text --> +<!ATTLIST LABEL + %attrs; -- %coreattrs, %i18n, %events -- + for IDREF #IMPLIED -- matches field ID value -- + accesskey %Character; #IMPLIED -- accessibility key character -- + onfocus %Script; #IMPLIED -- the element got the focus -- + onblur %Script; #IMPLIED -- the element lost the focus -- + > + +<!ENTITY % InputType + "(TEXT | PASSWORD | CHECKBOX | + RADIO | SUBMIT | RESET | + FILE | HIDDEN | IMAGE | BUTTON)" + > + +<!-- attribute name required for all but submit & reset --> +<!ELEMENT INPUT - O EMPTY -- form control --> +<!ATTLIST INPUT + %attrs; -- %coreattrs, %i18n, %events -- + type %InputType; TEXT -- what kind of widget is needed -- + name CDATA #IMPLIED -- submit as part of form -- + value CDATA #IMPLIED -- required for radio and checkboxes -- + checked (checked) #IMPLIED -- for radio buttons and check boxes -- + disabled (disabled) #IMPLIED -- unavailable in this context -- + readonly (readonly) #IMPLIED -- for text and passwd -- + size CDATA #IMPLIED -- specific to each type of field -- + maxlength NUMBER #IMPLIED -- max chars for text fields -- + src %URI; #IMPLIED -- for fields with images -- + alt CDATA #IMPLIED -- short description -- + usemap %URI; #IMPLIED -- use client-side image map -- + tabindex NUMBER #IMPLIED -- position in tabbing order -- + accesskey %Character; #IMPLIED -- accessibility key character -- + onfocus %Script; #IMPLIED -- the element got the focus -- + onblur %Script; #IMPLIED -- the element lost the focus -- + onselect %Script; #IMPLIED -- some text was selected -- + onchange %Script; #IMPLIED -- the element value was changed -- + accept %ContentTypes; #IMPLIED -- list of MIME types for file upload -- + %reserved; -- reserved for possible future use -- + > + +<!ELEMENT SELECT - - (OPTGROUP|OPTION)+ -- option selector --> +<!ATTLIST SELECT + %attrs; -- %coreattrs, %i18n, %events -- + name CDATA #IMPLIED -- field name -- + size NUMBER #IMPLIED -- rows visible -- + multiple (multiple) #IMPLIED -- default is single selection -- + disabled (disabled) #IMPLIED -- unavailable in this context -- + tabindex NUMBER #IMPLIED -- position in tabbing order -- + onfocus %Script; #IMPLIED -- the element got the focus -- + onblur %Script; #IMPLIED -- the element lost the focus -- + onchange %Script; #IMPLIED -- the element value was changed -- + %reserved; -- reserved for possible future use -- + > + +<!ELEMENT OPTGROUP - - (OPTION)+ -- option group --> +<!ATTLIST OPTGROUP + %attrs; -- %coreattrs, %i18n, %events -- + disabled (disabled) #IMPLIED -- unavailable in this context -- + label %Text; #REQUIRED -- for use in hierarchical menus -- + > + +<!ELEMENT OPTION - O (#PCDATA) -- selectable choice --> +<!ATTLIST OPTION + %attrs; -- %coreattrs, %i18n, %events -- + selected (selected) #IMPLIED + disabled (disabled) #IMPLIED -- unavailable in this context -- + label %Text; #IMPLIED -- for use in hierarchical menus -- + value CDATA #IMPLIED -- defaults to element content -- + > + +<!ELEMENT TEXTAREA - - (#PCDATA) -- multi-line text field --> +<!ATTLIST TEXTAREA + %attrs; -- %coreattrs, %i18n, %events -- + name CDATA #IMPLIED + rows NUMBER #REQUIRED + cols NUMBER #REQUIRED + disabled (disabled) #IMPLIED -- unavailable in this context -- + readonly (readonly) #IMPLIED + tabindex NUMBER #IMPLIED -- position in tabbing order -- + accesskey %Character; #IMPLIED -- accessibility key character -- + onfocus %Script; #IMPLIED -- the element got the focus -- + onblur %Script; #IMPLIED -- the element lost the focus -- + onselect %Script; #IMPLIED -- some text was selected -- + onchange %Script; #IMPLIED -- the element value was changed -- + %reserved; -- reserved for possible future use -- + > + +<!-- + #PCDATA is to solve the mixed content problem, + per specification only whitespace is allowed there! + --> +<!ELEMENT FIELDSET - - (#PCDATA,LEGEND,(%flow;)*) -- form control group --> +<!ATTLIST FIELDSET + %attrs; -- %coreattrs, %i18n, %events -- + > + +<!ELEMENT LEGEND - - (%inline;)* -- fieldset legend --> +<!ENTITY % LAlign "(top|bottom|left|right)"> + +<!ATTLIST LEGEND + %attrs; -- %coreattrs, %i18n, %events -- + accesskey %Character; #IMPLIED -- accessibility key character -- + > + +<!ELEMENT BUTTON - - + (%flow;)* -(A|%formctrl;|FORM|FIELDSET) + -- push button --> +<!ATTLIST BUTTON + %attrs; -- %coreattrs, %i18n, %events -- + name CDATA #IMPLIED + value CDATA #IMPLIED -- sent to server when submitted -- + type (button|submit|reset) submit -- for use as form button -- + disabled (disabled) #IMPLIED -- unavailable in this context -- + tabindex NUMBER #IMPLIED -- position in tabbing order -- + accesskey %Character; #IMPLIED -- accessibility key character -- + onfocus %Script; #IMPLIED -- the element got the focus -- + onblur %Script; #IMPLIED -- the element lost the focus -- + %reserved; -- reserved for possible future use -- + > + +<!--======================= Tables =======================================--> + +<!-- IETF HTML table standard, see [RFC1942] --> + +<!-- + The BORDER attribute sets the thickness of the frame around the + table. The default units are screen pixels. + + The FRAME attribute specifies which parts of the frame around + the table should be rendered. The values are not the same as + CALS to avoid a name clash with the VALIGN attribute. + + The value "border" is included for backwards compatibility with + <TABLE BORDER> which yields frame=border and border=implied + For <TABLE BORDER=1> you get border=1 and frame=implied. In this + case, it is appropriate to treat this as frame=border for backwards + compatibility with deployed browsers. +--> +<!ENTITY % TFrame "(void|above|below|hsides|lhs|rhs|vsides|box|border)"> + +<!-- + The RULES attribute defines which rules to draw between cells: + + If RULES is absent then assume: + "none" if BORDER is absent or BORDER=0 otherwise "all" +--> + +<!ENTITY % TRules "(none | groups | rows | cols | all)"> + +<!-- horizontal placement of table relative to document --> +<!ENTITY % TAlign "(left|center|right)"> + +<!-- horizontal alignment attributes for cell contents --> +<!ENTITY % cellhalign + "align (left|center|right|justify|char) #IMPLIED + char %Character; #IMPLIED -- alignment char, e.g. char=':' -- + charoff %Length; #IMPLIED -- offset for alignment char --" + > + +<!-- vertical alignment attributes for cell contents --> +<!ENTITY % cellvalign + "valign (top|middle|bottom|baseline) #IMPLIED" + > + +<!ELEMENT TABLE - - + (CAPTION?, (COL*|COLGROUP*), THEAD?, TFOOT?, TBODY+)> +<!ELEMENT CAPTION - - (%inline;)* -- table caption --> +<!ELEMENT THEAD - O (TR)+ -- table header --> +<!ELEMENT TFOOT - O (TR)+ -- table footer --> +<!ELEMENT TBODY O O (TR)+ -- table body --> +<!ELEMENT COLGROUP - O (col)* -- table column group --> +<!ELEMENT COL - O EMPTY -- table column --> +<!ELEMENT TR - O (TH|TD)+ -- table row --> +<!ELEMENT (TH|TD) - O (%flow;)* -- table header cell, table data cell--> + +<!ATTLIST TABLE -- table element -- + %attrs; -- %coreattrs, %i18n, %events -- + summary %Text; #IMPLIED -- purpose/structure for speech output-- + width %Length; #IMPLIED -- table width -- + border %Pixels; #IMPLIED -- controls frame width around table -- + frame %TFrame; #IMPLIED -- which parts of frame to render -- + rules %TRules; #IMPLIED -- rulings between rows and cols -- + cellspacing %Length; #IMPLIED -- spacing between cells -- + cellpadding %Length; #IMPLIED -- spacing within cells -- + %reserved; -- reserved for possible future use -- + datapagesize CDATA #IMPLIED -- reserved for possible future use -- + > + +<!ENTITY % CAlign "(top|bottom|left|right)"> + +<!ATTLIST CAPTION + %attrs; -- %coreattrs, %i18n, %events -- + > + +<!-- +COLGROUP groups a set of COL elements. It allows you to group +several semantically related columns together. +--> +<!ATTLIST COLGROUP + %attrs; -- %coreattrs, %i18n, %events -- + span NUMBER 1 -- default number of columns in group -- + width %MultiLength; #IMPLIED -- default width for enclosed COLs -- + %cellhalign; -- horizontal alignment in cells -- + %cellvalign; -- vertical alignment in cells -- + > + +<!-- + COL elements define the alignment properties for cells in + one or more columns. + + The WIDTH attribute specifies the width of the columns, e.g. + + width=64 width in screen pixels + width=0.5* relative width of 0.5 + + The SPAN attribute causes the attributes of one + COL element to apply to more than one column. +--> +<!ATTLIST COL -- column groups and properties -- + %attrs; -- %coreattrs, %i18n, %events -- + span NUMBER 1 -- COL attributes affect N columns -- + width %MultiLength; #IMPLIED -- column width specification -- + %cellhalign; -- horizontal alignment in cells -- + %cellvalign; -- vertical alignment in cells -- + > + +<!-- + Use THEAD to duplicate headers when breaking table + across page boundaries, or for static headers when + TBODY sections are rendered in scrolling panel. + + Use TFOOT to duplicate footers when breaking table + across page boundaries, or for static footers when + TBODY sections are rendered in scrolling panel. + + Use multiple TBODY sections when rules are needed + between groups of table rows. +--> +<!ATTLIST (THEAD|TBODY|TFOOT) -- table section -- + %attrs; -- %coreattrs, %i18n, %events -- + %cellhalign; -- horizontal alignment in cells -- + %cellvalign; -- vertical alignment in cells -- + > + +<!ATTLIST TR -- table row -- + %attrs; -- %coreattrs, %i18n, %events -- + %cellhalign; -- horizontal alignment in cells -- + %cellvalign; -- vertical alignment in cells -- + > + + +<!-- Scope is simpler than axes attribute for common tables --> +<!ENTITY % Scope "(row|col|rowgroup|colgroup)"> + +<!-- TH is for headers, TD for data, but for cells acting as both use TD --> +<!ATTLIST (TH|TD) -- header or data cell -- + %attrs; -- %coreattrs, %i18n, %events -- + abbr %Text; #IMPLIED -- abbreviation for header cell -- + axis CDATA #IMPLIED -- names groups of related headers-- + headers IDREFS #IMPLIED -- list of id's for header cells -- + scope %Scope; #IMPLIED -- scope covered by header cells -- + rowspan NUMBER 1 -- number of rows spanned by cell -- + colspan NUMBER 1 -- number of cols spanned by cell -- + %cellhalign; -- horizontal alignment in cells -- + %cellvalign; -- vertical alignment in cells -- + > + + +<!--================ Document Head =======================================--> +<!-- %head.misc; defined earlier on as "SCRIPT|STYLE|META|LINK|OBJECT" --> +<!ENTITY % head.content "TITLE & BASE?"> + +<!ELEMENT HEAD O O (%head.content;) +(%head.misc;) -- document head --> +<!ATTLIST HEAD + %i18n; -- lang, dir -- + profile %URI; #IMPLIED -- named dictionary of meta info -- + > + +<!-- The TITLE element is not considered part of the flow of text. + It should be displayed, for example as the page header or + window title. Exactly one title is required per document. + --> +<!ELEMENT TITLE - - (#PCDATA) -(%head.misc;) -- document title --> +<!ATTLIST TITLE %i18n> + + +<!ELEMENT BASE - O EMPTY -- document base URI --> +<!ATTLIST BASE + href %URI; #REQUIRED -- URI that acts as base URI -- + > + +<!ELEMENT META - O EMPTY -- generic metainformation --> +<!ATTLIST META + %i18n; -- lang, dir, for use with content -- + http-equiv NAME #IMPLIED -- HTTP response header name -- + name NAME #IMPLIED -- metainformation name -- + content CDATA #REQUIRED -- associated information -- + scheme CDATA #IMPLIED -- select form of content -- + > + +<!ELEMENT STYLE - - %StyleSheet -- style info --> +<!ATTLIST STYLE + %i18n; -- lang, dir, for use with title -- + type %ContentType; #REQUIRED -- content type of style language -- + media %MediaDesc; #IMPLIED -- designed for use with these media -- + title %Text; #IMPLIED -- advisory title -- + > + +<!ELEMENT SCRIPT - - %Script; -- script statements --> +<!ATTLIST SCRIPT + charset %Charset; #IMPLIED -- char encoding of linked resource -- + type %ContentType; #REQUIRED -- content type of script language -- + language CDATA #IMPLIED -- predefined script language name -- + src %URI; #IMPLIED -- URI for an external script -- + defer (defer) #IMPLIED -- UA may defer execution of script -- + event CDATA #IMPLIED -- reserved for possible future use -- + for %URI; #IMPLIED -- reserved for possible future use -- + > + +<!ELEMENT NOSCRIPT - - (%block;)+ + -- alternate content container for non script-based rendering --> +<!ATTLIST NOSCRIPT + %attrs; -- %coreattrs, %i18n, %events -- + > + +<!--================ Document Structure ==================================--> +<!ENTITY % html.content "HEAD, BODY"> + +<!ELEMENT HTML O O (%html.content;) -- document root element --> +<!ATTLIST HTML + %i18n; -- lang, dir -- + > diff --git a/tests/dtds/HTML4.dcl b/tests/dtds/HTML4.dcl new file mode 100644 index 0000000..db46db0 --- /dev/null +++ b/tests/dtds/HTML4.dcl @@ -0,0 +1,88 @@ +<!SGML "ISO 8879:1986 (WWW)" + -- + SGML Declaration for HyperText Markup Language version 4.0 + + With support for the first 17 planes of ISO 10646 and + increased limits for tag and literal lengths etc. + + Modified by jjc to work around SP's 16-bit character limit. + Modified by jjc to support hex character references. + -- + + CHARSET + BASESET "ISO Registration Number 177//CHARSET + ISO/IEC 10646-1:1993 UCS-4 with + implementation level 3//ESC 2/5 2/15 4/6" + DESCSET 0 9 UNUSED + 9 2 9 + 11 2 UNUSED + 13 1 13 + 14 18 UNUSED + 32 95 32 + 127 1 UNUSED + 128 32 UNUSED + -- jjc: changed the rest of the DESCSET. + Note that surrogates are not declared UNUSED; + this allows non-BMP characters to be parsed. -- + 160 65376 160 + -- 160 55136 160 + 55296 2048 UNUSED + 57344 1056768 57344 -- + +CAPACITY SGMLREF + TOTALCAP 150000 + GRPCAP 150000 + ENTCAP 150000 + +SCOPE DOCUMENT +SYNTAX + SHUNCHAR CONTROLS 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 + 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 127 + BASESET "ISO 646IRV:1991//CHARSET + International Reference Version + (IRV)//ESC 2/8 4/2" + DESCSET 0 128 0 + + FUNCTION + RE 13 + RS 10 + SPACE 32 + TAB SEPCHAR 9 + + NAMING LCNMSTRT "" + UCNMSTRT "" + LCNMCHAR ".-_:" + UCNMCHAR ".-_:" + NAMECASE GENERAL YES + ENTITY NO + DELIM GENERAL SGMLREF + HCRO "&#X" -- added by jjc -- + SHORTREF SGMLREF + NAMES SGMLREF + QUANTITY SGMLREF + ATTCNT 60 -- increased -- + ATTSPLEN 65536 -- These are the largest values -- + LITLEN 65536 -- permitted in the declaration -- + NAMELEN 65536 -- Avoid fixed limits in actual -- + PILEN 65536 -- implementations of HTML UA's -- + TAGLVL 100 + TAGLEN 65536 + GRPGTCNT 150 + GRPCNT 64 + +FEATURES + MINIMIZE + DATATAG NO + OMITTAG YES + RANK NO + SHORTTAG YES + LINK + SIMPLE NO + IMPLICIT NO + EXPLICIT NO + OTHER + CONCUR NO + SUBDOC NO + FORMAL YES + APPINFO NONE +>
\ No newline at end of file diff --git a/tests/dtds/HTML4.dtd b/tests/dtds/HTML4.dtd new file mode 100644 index 0000000..9e781db --- /dev/null +++ b/tests/dtds/HTML4.dtd @@ -0,0 +1,1092 @@ +<!-- + This is the HTML 4.0 Transitional DTD, which includes + presentation attributes and elements that W3C expects to phase out + as support for style sheets matures. Authors should use the Strict + DTD when possible, but may use the Transitional DTD when support + for presentation attribute and elements is required. + + HTML 4.0 includes mechanisms for style sheets, scripting, + embedding objects, improved support for right to left and mixed + direction text, and enhancements to forms for improved + accessibility for people with disabilities. + + Draft: $Date: 1999/05/02 15:37:15 $ + + Authors: + Dave Raggett <dsr@w3.org> + Arnaud Le Hors <lehors@w3.org> + Ian Jacobs <ij@w3.org> + + Further information about HTML 4.0 is available at: + + http://www.w3.org/TR/REC-html40 +--> +<!ENTITY % HTML.Version "-//W3C//DTD HTML 4.0 Transitional//EN" + -- Typical usage: + + <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" + "http://www.w3.org/TR/REC-html40/loose.dtd"> + <html> + <head> + ... + </head> + <body> + ... + </body> + </html> + + The URI used as a system identifier with the public identifier allows + the user agent to download the DTD and entity sets as needed. + + The FPI for the Strict HTML 4.0 DTD is: + + "-//W3C//DTD HTML 4.0//EN" + + and its URI is: + + http://www.w3.org/TR/REC-html40/strict.dtd + + Authors should use the Strict DTD unless they need the + presentation control for user agents that don't (adequately) + support style sheets. + + If you are writing a document that includes frames, use + the following FPI: + + "-//W3C//DTD HTML 4.0 Frameset//EN" + + with the URI: + + http://www.w3.org/TR/REC-html40/frameset.dtd + + The following URIs are supported in relation to HTML 4.0 + + "http://www.w3.org/TR/REC-html40/strict.dtd" (Strict DTD) + "http://www.w3.org/TR/REC-html40/loose.dtd" (Loose DTD) + "http://www.w3.org/TR/REC-html40/frameset.dtd" (Frameset DTD) + "http://www.w3.org/TR/REC-html40/HTMLlat1.ent" (Latin-1 entities) + "http://www.w3.org/TR/REC-html40/HTMLsymbol.ent" (Symbol entities) + "http://www.w3.org/TR/REC-html40/HTMLspecial.ent" (Special entities) + + These URIs point to the latest version of each file. To reference + this specific revision use the following URIs: + + "http://www.w3.org/TR/REC-html40-971218/strict.dtd" + "http://www.w3.org/TR/REC-html40-971218/loose.dtd" + "http://www.w3.org/TR/REC-html40-971218/frameset.dtd" + "http://www.w3.org/TR/REC-html40-971218/HTMLlat1.ent" + "http://www.w3.org/TR/REC-html40-971218/HTMLsymbol.ent" + "http://www.w3.org/TR/REC-html40-971218/HTMLspecial.ent" + +--> + +<!--================== Imported Names ====================================--> + +<!ENTITY % ContentType "CDATA" + -- media type, as per [RFC2045] + --> + +<!ENTITY % ContentTypes "CDATA" + -- comma-separated list of media types, as per [RFC2045] + --> + +<!ENTITY % Charset "CDATA" + -- a character encoding, as per [RFC2045] + --> + +<!ENTITY % Charsets "CDATA" + -- a space separated list of character encodings, as per [RFC2045] + --> + +<!ENTITY % LanguageCode "NAME" + -- a language code, as per [RFC1766] + --> + +<!ENTITY % Character "CDATA" + -- a single character from [ISO10646] + --> + +<!ENTITY % LinkTypes "CDATA" + -- space-separated list of link types + --> + +<!ENTITY % MediaDesc "CDATA" + -- single or comma-separated list of media descriptors + --> + +<!ENTITY % URI "CDATA" + -- a Uniform Resource Identifier, + see [URI] + --> + +<!ENTITY % Datetime "CDATA" -- date and time information. ISO date format --> + + +<!ENTITY % Script "CDATA" -- script expression --> + +<!ENTITY % StyleSheet "CDATA" -- style sheet data --> + +<!ENTITY % FrameTarget "CDATA" -- render in this frame --> + + +<!ENTITY % Text "CDATA"> + + +<!-- Parameter Entities --> + +<!ENTITY % head.misc "SCRIPT|STYLE|META|LINK|OBJECT" -- repeatable head elements --> + +<!ENTITY % heading "H1|H2|H3|H4|H5|H6"> + +<!ENTITY % list "UL | OL | DIR | MENU"> + +<!ENTITY % preformatted "PRE"> + +<!ENTITY % Color "CDATA" -- a color using sRGB: #RRGGBB as Hex values --> + +<!-- There are also 16 widely known color names with their sRGB values: + + Black = #000000 Green = #008000 + Silver = #C0C0C0 Lime = #00FF00 + Gray = #808080 Olive = #808000 + White = #FFFFFF Yellow = #FFFF00 + Maroon = #800000 Navy = #000080 + Red = #FF0000 Blue = #0000FF + Purple = #800080 Teal = #008080 + Fuchsia= #FF00FF Aqua = #00FFFF + --> + +<!ENTITY % bodycolors " + bgcolor %Color; #IMPLIED -- document background color -- + text %Color; #IMPLIED -- document text color -- + link %Color; #IMPLIED -- color of links -- + vlink %Color; #IMPLIED -- color of visited links -- + alink %Color; #IMPLIED -- color of selected links -- + "> + +<!--================ Character mnemonic entities =========================--> + +<!ENTITY % HTMLlat1 PUBLIC + "-//W3C//ENTITIES Latin1//EN//HTML" + "http://www.w3.org/TR/REC-html40-971218/HTMLlat1.ent"> +%HTMLlat1; + +<!ENTITY % HTMLsymbol PUBLIC + "-//W3C//ENTITIES Symbols//EN//HTML" + "http://www.w3.org/TR/REC-html40-971218/HTMLsymbol.ent"> +%HTMLsymbol; + +<!ENTITY % HTMLspecial PUBLIC + "-//W3C//ENTITIES Special//EN//HTML" + "http://www.w3.org/TR/REC-html40-971218/HTMLspecial.ent"> +%HTMLspecial; +<!--=================== Generic Attributes ===============================--> + +<!ENTITY % coreattrs + "id ID #IMPLIED -- document-wide unique id -- + class CDATA #IMPLIED -- space separated list of classes -- + style %StyleSheet; #IMPLIED -- associated style info -- + title %Text; #IMPLIED -- advisory title/amplification --" + > + +<!ENTITY % i18n + "lang %LanguageCode; #IMPLIED -- language code -- + dir (ltr|rtl) #IMPLIED -- direction for weak/neutral text --" + > + +<!ENTITY % events + "onclick %Script; #IMPLIED -- a pointer button was clicked -- + ondblclick %Script; #IMPLIED -- a pointer button was double clicked-- + onmousedown %Script; #IMPLIED -- a pointer button was pressed down -- + onmouseup %Script; #IMPLIED -- a pointer button was released -- + onmouseover %Script; #IMPLIED -- a pointer was moved onto -- + onmousemove %Script; #IMPLIED -- a pointer was moved within -- + onmouseout %Script; #IMPLIED -- a pointer was moved away -- + onkeypress %Script; #IMPLIED -- a key was pressed and released -- + onkeydown %Script; #IMPLIED -- a key was pressed down -- + onkeyup %Script; #IMPLIED -- a key was released --" + > + +<!-- Reserved Feature Switch --> +<!ENTITY % HTML.Reserved "IGNORE"> + +<!-- The following attributes are reserved for possible future use --> +<![ %HTML.Reserved; [ +<!ENTITY % reserved + "datasrc %URI; #IMPLIED -- a single or tabular Data Source -- + datafld CDATA #IMPLIED -- the property or column name -- + dataformatas (plaintext|html) plaintext -- text or html --" + > +]]> + +<!ENTITY % reserved ""> + +<!ENTITY % attrs "%coreattrs; %i18n; %events;"> + +<!ENTITY % align "align (left|center|right|justify) #IMPLIED" + -- default is left for ltr paragraphs, right for rtl -- + > + +<!--=================== Text Markup ======================================--> + +<!ENTITY % fontstyle + "TT | I | B | U | S | STRIKE | BIG | SMALL"> + +<!ENTITY % phrase "EM | STRONG | DFN | CODE | + SAMP | KBD | VAR | CITE | ABBR | ACRONYM" > + +<!ENTITY % special + "A | IMG | APPLET | OBJECT | FONT | BASEFONT | BR | SCRIPT | + MAP | Q | SUB | SUP | SPAN | BDO | IFRAME"> + +<!ENTITY % formctrl "INPUT | SELECT | TEXTAREA | LABEL | BUTTON"> + +<!-- %inline; covers inline or "text-level" elements --> +<!ENTITY % inline "#PCDATA | %fontstyle; | %phrase; | %special; | %formctrl;"> + +<!ELEMENT (%fontstyle;|%phrase;) - - (%inline;)*> +<!ATTLIST (%fontstyle;|%phrase;) + %attrs; -- %coreattrs, %i18n, %events -- + > + +<!ELEMENT (SUB|SUP) - - (%inline;)* -- subscript, superscript --> +<!ATTLIST (SUB|SUP) + %attrs; -- %coreattrs, %i18n, %events -- + > + +<!ELEMENT SPAN - - (%inline;)* -- generic language/style container --> +<!ATTLIST SPAN + %attrs; -- %coreattrs, %i18n, %events -- + %reserved; -- reserved for possible future use -- + > + +<!ELEMENT BDO - - (%inline;)* -- I18N BiDi over-ride --> +<!ATTLIST BDO + %coreattrs; -- id, class, style, title -- + lang %LanguageCode; #IMPLIED -- language code -- + dir (ltr|rtl) #REQUIRED -- directionality -- + > + +<!ELEMENT BASEFONT - O EMPTY -- base font size --> +<!ATTLIST BASEFONT + id ID #IMPLIED -- document-wide unique id -- + size CDATA #REQUIRED -- base font size for FONT elements -- + color %Color; #IMPLIED -- text color -- + face CDATA #IMPLIED -- comma separated list of font names -- + > + +<!ELEMENT FONT - - (%inline;)* -- local change to font --> +<!ATTLIST FONT + %coreattrs; -- id, class, style, title -- + %i18n; -- lang, dir -- + size CDATA #IMPLIED -- [+|-]nn e.g. size="+1", size="4" -- + color %Color; #IMPLIED -- text color -- + face CDATA #IMPLIED -- comma separated list of font names -- + > + +<!ELEMENT BR - O EMPTY -- forced line break --> +<!ATTLIST BR + %coreattrs; -- id, class, style, title -- + clear (left|all|right|none) none -- control of text flow -- + > + +<!--================== HTML content models ===============================--> + +<!-- + HTML has two basic content models: + + %inline; character level elements and text strings + %block; block-like elements e.g. paragraphs and lists +--> + +<!ENTITY % block + "P | %heading; | %list; | %preformatted; | DL | DIV | CENTER | + NOSCRIPT | NOFRAMES | BLOCKQUOTE | FORM | ISINDEX | HR | + TABLE | FIELDSET | ADDRESS"> + +<!ENTITY % flow "%block; | %inline;"> + +<!--=================== Document Body ====================================--> + +<!ELEMENT BODY O O (%flow;)* +(INS|DEL) -- document body --> +<!ATTLIST BODY + %attrs; -- %coreattrs, %i18n, %events -- + onload %Script; #IMPLIED -- the document has been loaded -- + onunload %Script; #IMPLIED -- the document has been removed -- + background %URI; #IMPLIED -- texture tile for document + background -- + %bodycolors; -- bgcolor, text, link, vlink, alink -- + > + +<!ELEMENT ADDRESS - - ((%inline;)|P)* -- information on author --> +<!ATTLIST ADDRESS + %attrs; -- %coreattrs, %i18n, %events -- + > + +<!ELEMENT DIV - - (%flow;)* -- generic language/style container --> +<!ATTLIST DIV + %attrs; -- %coreattrs, %i18n, %events -- + %align; -- align, text alignment -- + %reserved; -- reserved for possible future use -- + > + +<!ELEMENT CENTER - - (%flow;)* -- shorthand for DIV align=center --> +<!ATTLIST CENTER + %attrs; -- %coreattrs, %i18n, %events -- + > + +<!--================== The Anchor Element ================================--> + +<!ENTITY % Shape "(rect|circle|poly|default)"> +<!ENTITY % Coords "CDATA" -- comma separated list of lengths --> + +<!ELEMENT A - - (%inline;)* -(A) -- anchor --> +<!ATTLIST A + %attrs; -- %coreattrs, %i18n, %events -- + charset %Charset; #IMPLIED -- char encoding of linked resource -- + type %ContentType; #IMPLIED -- advisory content type -- + name CDATA #IMPLIED -- named link end -- + href %URI; #IMPLIED -- URI for linked resource -- + hreflang %LanguageCode; #IMPLIED -- language code -- + target %FrameTarget; #IMPLIED -- render in this frame -- + rel %LinkTypes; #IMPLIED -- forward link types -- + rev %LinkTypes; #IMPLIED -- reverse link types -- + accesskey %Character; #IMPLIED -- accessibility key character -- + shape %Shape; rect -- for use with client-side image maps -- + coords %Coords; #IMPLIED -- for use with client-side image maps -- + tabindex NUMBER #IMPLIED -- position in tabbing order -- + onfocus %Script; #IMPLIED -- the element got the focus -- + onblur %Script; #IMPLIED -- the element lost the focus -- + > + +<!--================== Client-side image maps ============================--> + +<!-- These can be placed in the same document or grouped in a + separate document although this isn't yet widely supported --> + +<!ELEMENT MAP - - ((%block;)+ | AREA+) -- client-side image map --> +<!ATTLIST MAP + %attrs; -- %coreattrs, %i18n, %events -- + name CDATA #REQUIRED -- for reference by usemap -- + > + +<!ELEMENT AREA - O EMPTY -- client-side image map area --> +<!ATTLIST AREA + %attrs; -- %coreattrs, %i18n, %events -- + shape %Shape; rect -- controls interpretation of coords -- + coords %Coords; #IMPLIED -- comma separated list of lengths -- + href %URI; #IMPLIED -- URI for linked resource -- + target %FrameTarget; #IMPLIED -- render in this frame -- + nohref (nohref) #IMPLIED -- this region has no action -- + alt %Text; #REQUIRED -- short description -- + tabindex NUMBER #IMPLIED -- position in tabbing order -- + accesskey %Character; #IMPLIED -- accessibility key character -- + onfocus %Script; #IMPLIED -- the element got the focus -- + onblur %Script; #IMPLIED -- the element lost the focus -- + > + +<!--================== The LINK Element ==================================--> + +<!-- + Relationship values can be used in principle: + + a) for document specific toolbars/menus when used + with the LINK element in document head e.g. + start, contents, previous, next, index, end, help + b) to link to a separate style sheet (rel=stylesheet) + c) to make a link to a script (rel=script) + d) by stylesheets to control how collections of + html nodes are rendered into printed documents + e) to make a link to a printable version of this document + e.g. a postscript or pdf version (rel=alternate media=print) +--> + +<!ELEMENT LINK - O EMPTY -- a media-independent link --> +<!ATTLIST LINK + %attrs; -- %coreattrs, %i18n, %events -- + charset %Charset; #IMPLIED -- char encoding of linked resource -- + href %URI; #IMPLIED -- URI for linked resource -- + hreflang %LanguageCode; #IMPLIED -- language code -- + type %ContentType; #IMPLIED -- advisory content type -- + rel %LinkTypes; #IMPLIED -- forward link types -- + rev %LinkTypes; #IMPLIED -- reverse link types -- + media %MediaDesc; #IMPLIED -- for rendering on these media -- + target %FrameTarget; #IMPLIED -- render in this frame -- + > + +<!--=================== Images ===========================================--> + +<!-- Length defined in strict DTD for cellpadding/cellspacing --> +<!ENTITY % Length "CDATA" -- nn for pixels or nn% for percentage length --> +<!ENTITY % MultiLength "CDATA" -- pixel, percentage, or relative --> + +<!ENTITY % MultiLengths "CDATA" -- comma-separated list of MultiLength --> + +<!ENTITY % Pixels "CDATA" -- integer representing length in pixels --> + +<!ENTITY % IAlign "(top|middle|bottom|left|right)" -- center? --> + +<!-- To avoid problems with text-only UAs as well as + to make image content understandable and navigable + to users of non-visual UAs, you need to provide + a description with ALT, and avoid server-side image maps --> +<!ELEMENT IMG - O EMPTY -- Embedded image --> +<!ATTLIST IMG + %attrs; -- %coreattrs, %i18n, %events -- + src %URI; #REQUIRED -- URI of image to embed -- + alt %Text; #REQUIRED -- short description -- + longdesc %URI; #IMPLIED -- link to long description + (complements alt) -- + height %Length; #IMPLIED -- override height -- + width %Length; #IMPLIED -- override width -- + usemap %URI; #IMPLIED -- use client-side image map -- + ismap (ismap) #IMPLIED -- use server-side image map -- + align %IAlign; #IMPLIED -- vertical or horizontal alignment -- + border %Length; #IMPLIED -- link border width -- + hspace %Pixels; #IMPLIED -- horizontal gutter -- + vspace %Pixels; #IMPLIED -- vertical gutter -- + > + +<!-- USEMAP points to a MAP element which may be in this document + or an external document, although the latter is not widely supported --> + +<!--==================== OBJECT ======================================--> +<!-- + OBJECT is used to embed objects as part of HTML pages + PARAM elements should precede other content. SGML mixed content + model technicality precludes specifying this formally ... +--> + +<!ELEMENT OBJECT - - (PARAM | %flow;)* + -- generic embedded object --> +<!ATTLIST OBJECT + %attrs; -- %coreattrs, %i18n, %events -- + declare (declare) #IMPLIED -- declare but don't instantiate flag -- + classid %URI; #IMPLIED -- identifies an implementation -- + codebase %URI; #IMPLIED -- base URI for classid, data, archive-- + data %URI; #IMPLIED -- reference to object's data -- + type %ContentType; #IMPLIED -- content type for data -- + codetype %ContentType; #IMPLIED -- content type for code -- + archive %URI; #IMPLIED -- space separated archive list -- + standby %Text; #IMPLIED -- message to show while loading -- + height %Length; #IMPLIED -- override height -- + width %Length; #IMPLIED -- override width -- + usemap %URI; #IMPLIED -- use client-side image map -- + name CDATA #IMPLIED -- submit as part of form -- + tabindex NUMBER #IMPLIED -- position in tabbing order -- + align %IAlign; #IMPLIED -- vertical or horizontal alignment -- + border %Length; #IMPLIED -- link border width -- + hspace %Pixels; #IMPLIED -- horizontal gutter -- + vspace %Pixels; #IMPLIED -- vertical gutter -- + %reserved; -- reserved for possible future use -- + > + +<!ELEMENT PARAM - O EMPTY -- named property value --> +<!ATTLIST PARAM + id ID #IMPLIED -- document-wide unique id -- + name CDATA #REQUIRED -- property name -- + value CDATA #IMPLIED -- property value -- + valuetype (DATA|REF|OBJECT) DATA -- How to interpret value -- + type %ContentType; #IMPLIED -- content type for value + when valuetype=ref -- + > + +<!--=================== Java APPLET ==================================--> +<!-- + One of code or object attributes must be present. + Place PARAM elements before other content. +--> +<!ELEMENT APPLET - - (PARAM | %flow;)* -- Java applet --> +<!ATTLIST APPLET + %coreattrs; -- id, class, style, title -- + codebase %URI; #IMPLIED -- optional base URI for applet -- + archive CDATA #IMPLIED -- comma separated archive list -- + code CDATA #IMPLIED -- applet class file -- + object CDATA #IMPLIED -- serialized applet file -- + alt %Text; #IMPLIED -- short description -- + name CDATA #IMPLIED -- allows applets to find each other -- + width %Length; #REQUIRED -- initial width -- + height %Length; #REQUIRED -- initial height -- + align %IAlign; #IMPLIED -- vertical or horizontal alignment -- + hspace %Pixels; #IMPLIED -- horizontal gutter -- + vspace %Pixels; #IMPLIED -- vertical gutter -- + > + +<!--=================== Horizontal Rule ==================================--> + +<!ELEMENT HR - O EMPTY -- horizontal rule --> +<!ATTLIST HR + %coreattrs; -- id, class, style, title -- + %events; + align (left|center|right) #IMPLIED + noshade (noshade) #IMPLIED + size %Pixels; #IMPLIED + width %Length; #IMPLIED + > + +<!--=================== Paragraphs =======================================--> + +<!ELEMENT P - O (%inline;)* -- paragraph --> +<!ATTLIST P + %attrs; -- %coreattrs, %i18n, %events -- + %align; -- align, text alignment -- + > + +<!--=================== Headings =========================================--> + +<!-- + There are six levels of headings from H1 (the most important) + to H6 (the least important). +--> + +<!ELEMENT (%heading;) - - (%inline;)* -- heading --> +<!ATTLIST (%heading;) + %attrs; -- %coreattrs, %i18n, %events -- + %align; -- align, text alignment -- + > + +<!--=================== Preformatted Text ================================--> + +<!-- excludes markup for images and changes in font size --> +<!ENTITY % pre.exclusion "IMG|OBJECT|APPLET|BIG|SMALL|SUB|SUP|FONT|BASEFONT"> + +<!ELEMENT PRE - - (%inline;)* -(%pre.exclusion;) -- preformatted text --> +<!ATTLIST PRE + %attrs; -- %coreattrs, %i18n, %events -- + width NUMBER #IMPLIED + > + +<!--===================== Inline Quotes ==================================--> + +<!ELEMENT Q - - (%inline;)* -- short inline quotation --> +<!ATTLIST Q + %attrs; -- %coreattrs, %i18n, %events -- + cite %URI; #IMPLIED -- URI for source document or msg -- + > + +<!--=================== Block-like Quotes ================================--> + +<!ELEMENT BLOCKQUOTE - - (%flow;)* -- long quotation --> +<!ATTLIST BLOCKQUOTE + %attrs; -- %coreattrs, %i18n, %events -- + cite %URI; #IMPLIED -- URI for source document or msg -- + > + +<!--=================== Inserted/Deleted Text ============================--> + + +<!-- INS/DEL are handled by inclusion on BODY --> +<!ELEMENT (INS|DEL) - - (%flow;)* -- inserted text, deleted text --> +<!ATTLIST (INS|DEL) + %attrs; -- %coreattrs, %i18n, %events -- + cite %URI; #IMPLIED -- info on reason for change -- + datetime %Datetime; #IMPLIED -- date and time of change -- + > + +<!--=================== Lists ============================================--> + +<!-- definition lists - DT for term, DD for its definition --> + +<!ELEMENT DL - - (DT|DD)+ -- definition list --> +<!ATTLIST DL + %attrs; -- %coreattrs, %i18n, %events -- + compact (compact) #IMPLIED -- reduced interitem spacing -- + > + +<!ELEMENT DT - O (%inline;)* -- definition term --> +<!ELEMENT DD - O (%flow;)* -- definition description --> +<!ATTLIST (DT|DD) + %attrs; -- %coreattrs, %i18n, %events -- + > + +<!-- Ordered lists (OL) Numbering style + + 1 arablic numbers 1, 2, 3, ... + a lower alpha a, b, c, ... + A upper alpha A, B, C, ... + i lower roman i, ii, iii, ... + I upper roman I, II, III, ... + + The style is applied to the sequence number which by default + is reset to 1 for the first list item in an ordered list. + + This can't be expressed directly in SGML due to case folding. +--> + +<!ENTITY % OLStyle "CDATA" -- constrained to: "(1|a|A|i|I)" --> + +<!ELEMENT OL - - (LI)+ -- ordered list --> +<!ATTLIST OL + %attrs; -- %coreattrs, %i18n, %events -- + type %OLStyle; #IMPLIED -- numbering style -- + compact (compact) #IMPLIED -- reduced interitem spacing -- + start NUMBER #IMPLIED -- starting sequence number -- + > + +<!-- Unordered Lists (UL) bullet styles --> +<!ENTITY % ULStyle "(disc|square|circle)"> + +<!ELEMENT UL - - (LI)+ -- unordered list --> +<!ATTLIST UL + %attrs; -- %coreattrs, %i18n, %events -- + type %ULStyle; #IMPLIED -- bullet style -- + compact (compact) #IMPLIED -- reduced interitem spacing -- + > + +<!ELEMENT (DIR|MENU) - - (LI)+ -(%block;) -- directory list, menu list --> +<!ATTLIST DIR + %attrs; -- %coreattrs, %i18n, %events -- + compact (compact) #IMPLIED + > +<!ATTLIST MENU + %attrs; -- %coreattrs, %i18n, %events -- + compact (compact) #IMPLIED + > + +<!ENTITY % LIStyle "CDATA" -- constrained to: "(%ULStyle;|%OLStyle;)" --> + +<!ELEMENT LI - O (%flow;)* -- list item --> +<!ATTLIST LI + %attrs; -- %coreattrs, %i18n, %events -- + type %LIStyle; #IMPLIED -- list item style -- + value NUMBER #IMPLIED -- reset sequence number -- + > + +<!--================ Forms ===============================================--> +<!ELEMENT FORM - - (%flow;)* -(FORM) -- interactive form --> +<!ATTLIST FORM + %attrs; -- %coreattrs, %i18n, %events -- + action %URI; #REQUIRED -- server-side form handler -- + method (GET|POST) GET -- HTTP method used to submit the form-- + enctype %ContentType; "application/x-www-form-urlencoded" + onsubmit %Script; #IMPLIED -- the form was submitted -- + onreset %Script; #IMPLIED -- the form was reset -- + target %FrameTarget; #IMPLIED -- render in this frame -- + accept-charset %Charsets; #IMPLIED -- list of supported charsets -- + > + +<!-- Each label must not contain more than ONE field --> +<!ELEMENT LABEL - - (%inline;)* -(LABEL) -- form field label text --> +<!ATTLIST LABEL + %attrs; -- %coreattrs, %i18n, %events -- + for IDREF #IMPLIED -- matches field ID value -- + accesskey %Character; #IMPLIED -- accessibility key character -- + onfocus %Script; #IMPLIED -- the element got the focus -- + onblur %Script; #IMPLIED -- the element lost the focus -- + > + +<!ENTITY % InputType + "(TEXT | PASSWORD | CHECKBOX | + RADIO | SUBMIT | RESET | + FILE | HIDDEN | IMAGE | BUTTON)" + > + +<!-- attribute name required for all but submit & reset --> +<!ELEMENT INPUT - O EMPTY -- form control --> +<!ATTLIST INPUT + %attrs; -- %coreattrs, %i18n, %events -- + type %InputType; TEXT -- what kind of widget is needed -- + name CDATA #IMPLIED -- submit as part of form -- + value CDATA #IMPLIED -- required for radio and checkboxes -- + checked (checked) #IMPLIED -- for radio buttons and check boxes -- + disabled (disabled) #IMPLIED -- unavailable in this context -- + readonly (readonly) #IMPLIED -- for text and passwd -- + size CDATA #IMPLIED -- specific to each type of field -- + maxlength NUMBER #IMPLIED -- max chars for text fields -- + src %URI; #IMPLIED -- for fields with images -- + alt CDATA #IMPLIED -- short description -- + usemap %URI; #IMPLIED -- use client-side image map -- + tabindex NUMBER #IMPLIED -- position in tabbing order -- + accesskey %Character; #IMPLIED -- accessibility key character -- + onfocus %Script; #IMPLIED -- the element got the focus -- + onblur %Script; #IMPLIED -- the element lost the focus -- + onselect %Script; #IMPLIED -- some text was selected -- + onchange %Script; #IMPLIED -- the element value was changed -- + accept %ContentTypes; #IMPLIED -- list of MIME types for file upload -- + align %IAlign; #IMPLIED -- vertical or horizontal alignment -- + %reserved; -- reserved for possible future use -- + > + +<!ELEMENT SELECT - - (OPTGROUP|OPTION)+ -- option selector --> +<!ATTLIST SELECT + %attrs; -- %coreattrs, %i18n, %events -- + name CDATA #IMPLIED -- field name -- + size NUMBER #IMPLIED -- rows visible -- + multiple (multiple) #IMPLIED -- default is single selection -- + disabled (disabled) #IMPLIED -- unavailable in this context -- + tabindex NUMBER #IMPLIED -- position in tabbing order -- + onfocus %Script; #IMPLIED -- the element got the focus -- + onblur %Script; #IMPLIED -- the element lost the focus -- + onchange %Script; #IMPLIED -- the element value was changed -- + %reserved; -- reserved for possible future use -- + > + +<!ELEMENT OPTGROUP - - (OPTION)+ -- option group --> +<!ATTLIST OPTGROUP + %attrs; -- %coreattrs, %i18n, %events -- + disabled (disabled) #IMPLIED -- unavailable in this context -- + label %Text; #REQUIRED -- for use in hierarchical menus -- + > + +<!ELEMENT OPTION - O (#PCDATA) -- selectable choice --> +<!ATTLIST OPTION + %attrs; -- %coreattrs, %i18n, %events -- + selected (selected) #IMPLIED + disabled (disabled) #IMPLIED -- unavailable in this context -- + label %Text; #IMPLIED -- for use in hierarchical menus -- + value CDATA #IMPLIED -- defaults to element content -- + > + +<!ELEMENT TEXTAREA - - (#PCDATA) -- multi-line text field --> +<!ATTLIST TEXTAREA + %attrs; -- %coreattrs, %i18n, %events -- + name CDATA #IMPLIED + rows NUMBER #REQUIRED + cols NUMBER #REQUIRED + disabled (disabled) #IMPLIED -- unavailable in this context -- + readonly (readonly) #IMPLIED + tabindex NUMBER #IMPLIED -- position in tabbing order -- + accesskey %Character; #IMPLIED -- accessibility key character -- + onfocus %Script; #IMPLIED -- the element got the focus -- + onblur %Script; #IMPLIED -- the element lost the focus -- + onselect %Script; #IMPLIED -- some text was selected -- + onchange %Script; #IMPLIED -- the element value was changed -- + %reserved; -- reserved for possible future use -- + > + +<!-- + #PCDATA is to solve the mixed content problem, + per specification only whitespace is allowed there! + --> +<!ELEMENT FIELDSET - - (#PCDATA,LEGEND,(%flow;)*) -- form control group --> +<!ATTLIST FIELDSET + %attrs; -- %coreattrs, %i18n, %events -- + > + +<!ELEMENT LEGEND - - (%inline;)* -- fieldset legend --> +<!ENTITY % LAlign "(top|bottom|left|right)"> + +<!ATTLIST LEGEND + %attrs; -- %coreattrs, %i18n, %events -- + accesskey %Character; #IMPLIED -- accessibility key character -- + align %LAlign; #IMPLIED -- relative to fieldset -- + > + +<!ELEMENT BUTTON - - + (%flow;)* -(A|%formctrl;|FORM|ISINDEX|FIELDSET|IFRAME) + -- push button --> +<!ATTLIST BUTTON + %attrs; -- %coreattrs, %i18n, %events -- + name CDATA #IMPLIED + value CDATA #IMPLIED -- sent to server when submitted -- + type (button|submit|reset) submit -- for use as form button -- + disabled (disabled) #IMPLIED -- unavailable in this context -- + tabindex NUMBER #IMPLIED -- position in tabbing order -- + accesskey %Character; #IMPLIED -- accessibility key character -- + onfocus %Script; #IMPLIED -- the element got the focus -- + onblur %Script; #IMPLIED -- the element lost the focus -- + %reserved; -- reserved for possible future use -- + > + +<!--======================= Tables =======================================--> + +<!-- IETF HTML table standard, see [RFC1942] --> + +<!-- + The BORDER attribute sets the thickness of the frame around the + table. The default units are screen pixels. + + The FRAME attribute specifies which parts of the frame around + the table should be rendered. The values are not the same as + CALS to avoid a name clash with the VALIGN attribute. + + The value "border" is included for backwards compatibility with + <TABLE BORDER> which yields frame=border and border=implied + For <TABLE BORDER=1> you get border=1 and frame=implied. In this + case, it is appropriate to treat this as frame=border for backwards + compatibility with deployed browsers. +--> +<!ENTITY % TFrame "(void|above|below|hsides|lhs|rhs|vsides|box|border)"> + +<!-- + The RULES attribute defines which rules to draw between cells: + + If RULES is absent then assume: + "none" if BORDER is absent or BORDER=0 otherwise "all" +--> + +<!ENTITY % TRules "(none | groups | rows | cols | all)"> + +<!-- horizontal placement of table relative to document --> +<!ENTITY % TAlign "(left|center|right)"> + +<!-- horizontal alignment attributes for cell contents --> +<!ENTITY % cellhalign + "align (left|center|right|justify|char) #IMPLIED + char %Character; #IMPLIED -- alignment char, e.g. char=':' -- + charoff %Length; #IMPLIED -- offset for alignment char --" + > + +<!-- vertical alignment attributes for cell contents --> +<!ENTITY % cellvalign + "valign (top|middle|bottom|baseline) #IMPLIED" + > + +<!ELEMENT TABLE - - + (CAPTION?, (COL*|COLGROUP*), THEAD?, TFOOT?, TBODY+)> +<!ELEMENT CAPTION - - (%inline;)* -- table caption --> +<!ELEMENT THEAD - O (TR)+ -- table header --> +<!ELEMENT TFOOT - O (TR)+ -- table footer --> +<!ELEMENT TBODY O O (TR)+ -- table body --> +<!ELEMENT COLGROUP - O (col)* -- table column group --> +<!ELEMENT COL - O EMPTY -- table column --> +<!ELEMENT TR - O (TH|TD)+ -- table row --> +<!ELEMENT (TH|TD) - O (%flow;)* -- table header cell, table data cell--> + +<!ATTLIST TABLE -- table element -- + %attrs; -- %coreattrs, %i18n, %events -- + summary %Text; #IMPLIED -- purpose/structure for speech output-- + width %Length; #IMPLIED -- table width -- + border %Pixels; #IMPLIED -- controls frame width around table -- + frame %TFrame; #IMPLIED -- which parts of frame to render -- + rules %TRules; #IMPLIED -- rulings between rows and cols -- + cellspacing %Length; #IMPLIED -- spacing between cells -- + cellpadding %Length; #IMPLIED -- spacing within cells -- + align %TAlign; #IMPLIED -- table position relative to window -- + bgcolor %Color; #IMPLIED -- background color for cells -- + %reserved; -- reserved for possible future use -- + datapagesize CDATA #IMPLIED -- reserved for possible future use -- + > + +<!ENTITY % CAlign "(top|bottom|left|right)"> + +<!ATTLIST CAPTION + %attrs; -- %coreattrs, %i18n, %events -- + align %CAlign; #IMPLIED -- relative to table -- + > + +<!-- +COLGROUP groups a set of COL elements. It allows you to group +several semantically related columns together. +--> +<!ATTLIST COLGROUP + %attrs; -- %coreattrs, %i18n, %events -- + span NUMBER 1 -- default number of columns in group -- + width %MultiLength; #IMPLIED -- default width for enclosed COLs -- + %cellhalign; -- horizontal alignment in cells -- + %cellvalign; -- vertical alignment in cells -- + > + +<!-- + COL elements define the alignment properties for cells in + one or more columns. + + The WIDTH attribute specifies the width of the columns, e.g. + + width=64 width in screen pixels + width=0.5* relative width of 0.5 + + The SPAN attribute causes the attributes of one + COL element to apply to more than one column. +--> +<!ATTLIST COL -- column groups and properties -- + %attrs; -- %coreattrs, %i18n, %events -- + span NUMBER 1 -- COL attributes affect N columns -- + width %MultiLength; #IMPLIED -- column width specification -- + %cellhalign; -- horizontal alignment in cells -- + %cellvalign; -- vertical alignment in cells -- + > + +<!-- + Use THEAD to duplicate headers when breaking table + across page boundaries, or for static headers when + TBODY sections are rendered in scrolling panel. + + Use TFOOT to duplicate footers when breaking table + across page boundaries, or for static footers when + TBODY sections are rendered in scrolling panel. + + Use multiple TBODY sections when rules are needed + between groups of table rows. +--> +<!ATTLIST (THEAD|TBODY|TFOOT) -- table section -- + %attrs; -- %coreattrs, %i18n, %events -- + %cellhalign; -- horizontal alignment in cells -- + %cellvalign; -- vertical alignment in cells -- + > + +<!ATTLIST TR -- table row -- + %attrs; -- %coreattrs, %i18n, %events -- + %cellhalign; -- horizontal alignment in cells -- + %cellvalign; -- vertical alignment in cells -- + bgcolor %Color; #IMPLIED -- background color for row -- + > + + +<!-- Scope is simpler than axes attribute for common tables --> +<!ENTITY % Scope "(row|col|rowgroup|colgroup)"> + +<!-- TH is for headers, TD for data, but for cells acting as both use TD --> +<!ATTLIST (TH|TD) -- header or data cell -- + %attrs; -- %coreattrs, %i18n, %events -- + abbr %Text; #IMPLIED -- abbreviation for header cell -- + axis CDATA #IMPLIED -- names groups of related headers-- + headers IDREFS #IMPLIED -- list of id's for header cells -- + scope %Scope; #IMPLIED -- scope covered by header cells -- + rowspan NUMBER 1 -- number of rows spanned by cell -- + colspan NUMBER 1 -- number of cols spanned by cell -- + %cellhalign; -- horizontal alignment in cells -- + %cellvalign; -- vertical alignment in cells -- + nowrap (nowrap) #IMPLIED -- suppress word wrap -- + bgcolor %Color; #IMPLIED -- cell background color -- + width %Pixels; #IMPLIED -- width for cell -- + height %Pixels; #IMPLIED -- height for cell -- + > + +<!--================== Document Frames ===================================--> + +<!-- + The content model for HTML documents depends on whether the HEAD is + followed by a FRAMESET or BODY element. The widespread omission of + the BODY start tag makes it impractical to define the content model + without the use of a marked section. +--> + +<!-- Feature Switch for frameset documents --> +<!ENTITY % HTML.Frameset "IGNORE"> + +<![ %HTML.Frameset; [ +<!ELEMENT FRAMESET - - ((FRAMESET|FRAME)+ & NOFRAMES?) -- window subdivision--> +<!ATTLIST FRAMESET + %coreattrs; -- id, class, style, title -- + rows %MultiLengths; #IMPLIED -- list of lengths, + default: 100% (1 row) -- + cols %MultiLengths; #IMPLIED -- list of lengths, + default: 100% (1 col) -- + onload %Script; #IMPLIED -- all the frames have been loaded -- + onunload %Script; #IMPLIED -- all the frames have been removed -- + > +]]> + +<![ %HTML.Frameset; [ +<!-- reserved frame names start with "_" otherwise starts with letter --> +<!ELEMENT FRAME - O EMPTY -- subwindow --> +<!ATTLIST FRAME + %coreattrs; -- id, class, style, title -- + longdesc %URI; #IMPLIED -- link to long description + (complements title) -- + name CDATA #IMPLIED -- name of frame for targetting -- + src %URI; #IMPLIED -- source of frame content -- + frameborder (1|0) 1 -- request frame borders? -- + marginwidth %Pixels; #IMPLIED -- margin widths in pixels -- + marginheight %Pixels; #IMPLIED -- margin height in pixels -- + noresize (noresize) #IMPLIED -- allow users to resize frames? -- + scrolling (yes|no|auto) auto -- scrollbar or none -- + > +]]> + +<!ELEMENT IFRAME - - (%flow;)* -- inline subwindow --> +<!ATTLIST IFRAME + %coreattrs; -- id, class, style, title -- + longdesc %URI; #IMPLIED -- link to long description + (complements title) -- + name CDATA #IMPLIED -- name of frame for targetting -- + src %URI; #IMPLIED -- source of frame content -- + frameborder (1|0) 1 -- request frame borders? -- + marginwidth %Pixels; #IMPLIED -- margin widths in pixels -- + marginheight %Pixels; #IMPLIED -- margin height in pixels -- + scrolling (yes|no|auto) auto -- scrollbar or none -- + align %IAlign; #IMPLIED -- vertical or horizontal alignment -- + height %Length; #IMPLIED -- frame height -- + width %Length; #IMPLIED -- frame width -- + > + +<![ %HTML.Frameset; [ +<!ENTITY % noframes.content "(BODY) -(NOFRAMES)"> +]]> + +<!ENTITY % noframes.content "(%flow;)*"> + +<!ELEMENT NOFRAMES - - %noframes.content; + -- alternate content container for non frame-based rendering --> +<!ATTLIST NOFRAMES + %attrs; -- %coreattrs, %i18n, %events -- + > + +<!--================ Document Head =======================================--> +<!-- %head.misc; defined earlier on as "SCRIPT|STYLE|META|LINK|OBJECT" --> +<!ENTITY % head.content "TITLE & ISINDEX? & BASE?"> + +<!ELEMENT HEAD O O (%head.content;) +(%head.misc;) -- document head --> +<!ATTLIST HEAD + %i18n; -- lang, dir -- + profile %URI; #IMPLIED -- named dictionary of meta info -- + > + +<!-- The TITLE element is not considered part of the flow of text. + It should be displayed, for example as the page header or + window title. Exactly one title is required per document. + --> +<!ELEMENT TITLE - - (#PCDATA) -(%head.misc;) -- document title --> +<!ATTLIST TITLE %i18n> + +<!ELEMENT ISINDEX - O EMPTY -- single line prompt --> +<!ATTLIST ISINDEX + %coreattrs; -- id, class, style, title -- + %i18n; -- lang, dir -- + prompt %Text; #IMPLIED -- prompt message --> + +<!ELEMENT BASE - O EMPTY -- document base URI --> +<!ATTLIST BASE + href %URI; #IMPLIED -- URI that acts as base URI -- + target %FrameTarget; #IMPLIED -- render in this frame -- + > + +<!ELEMENT META - O EMPTY -- generic metainformation --> +<!ATTLIST META + %i18n; -- lang, dir, for use with content -- + http-equiv NAME #IMPLIED -- HTTP response header name -- + name NAME #IMPLIED -- metainformation name -- + content CDATA #REQUIRED -- associated information -- + scheme CDATA #IMPLIED -- select form of content -- + > + +<!ELEMENT STYLE - - %StyleSheet -- style info --> +<!ATTLIST STYLE + %i18n; -- lang, dir, for use with title -- + type %ContentType; #REQUIRED -- content type of style language -- + media %MediaDesc; #IMPLIED -- designed for use with these media -- + title %Text; #IMPLIED -- advisory title -- + > + +<!ELEMENT SCRIPT - - %Script; -- script statements --> +<!ATTLIST SCRIPT + charset %Charset; #IMPLIED -- char encoding of linked resource -- + type %ContentType; #REQUIRED -- content type of script language -- + language CDATA #IMPLIED -- predefined script language name -- + src %URI; #IMPLIED -- URI for an external script -- + defer (defer) #IMPLIED -- UA may defer execution of script -- + event CDATA #IMPLIED -- reserved for possible future use -- + for %URI; #IMPLIED -- reserved for possible future use -- + > + +<!ELEMENT NOSCRIPT - - (%flow;)* + -- alternate content container for non script-based rendering --> +<!ATTLIST NOSCRIPT + %attrs; -- %coreattrs, %i18n, %events -- + > + +<!--================ Document Structure ==================================--> +<!ENTITY % version "version CDATA #FIXED '%HTML.Version;'"> + +<![ %HTML.Frameset; [ +<!ENTITY % html.content "HEAD, FRAMESET"> +]]> + +<!ENTITY % html.content "HEAD, BODY"> + +<!ELEMENT HTML O O (%html.content;) -- document root element --> +<!ATTLIST HTML + %i18n; -- lang, dir -- + %version; + > diff --git a/tests/dtds/HTML4.soc b/tests/dtds/HTML4.soc new file mode 100644 index 0000000..ec4825f --- /dev/null +++ b/tests/dtds/HTML4.soc @@ -0,0 +1,9 @@ +OVERRIDE YES +SGMLDECL HTML4.dcl +DOCTYPE HTML HTML4.dtd +PUBLIC "-//W3C//DTD HTML 4.0//EN" HTML4-s.dtd +PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" HTML4.dtd +PUBLIC "-//W3C//DTD HTML 4.0 Frameset//EN" HTML4-f.dtd +PUBLIC "-//W3C//ENTITIES Latin1//EN//HTML" HTMLlat1.ent +PUBLIC "-//W3C//ENTITIES Special//EN//HTML" HTMLspec.ent +PUBLIC "-//W3C//ENTITIES Symbols//EN//HTML" HTMLsym.ent diff --git a/tests/dtds/HTMLlat1.ent b/tests/dtds/HTMLlat1.ent new file mode 100644 index 0000000..7632023 --- /dev/null +++ b/tests/dtds/HTMLlat1.ent @@ -0,0 +1,195 @@ +<!-- Portions (C) International Organization for Standardization 1986 + Permission to copy in any form is granted for use with + conforming SGML systems and applications as defined in + ISO 8879, provided this notice is included in all copies. +--> +<!-- Character entity set. Typical invocation: + <!ENTITY % HTMLlat1 PUBLIC + "-//W3C//ENTITIES Full Latin 1//EN//HTML"> + %HTMLlat1; +--> + +<!ENTITY nbsp CDATA " " -- no-break space = non-breaking space, + U+00A0 ISOnum --> +<!ENTITY iexcl CDATA "¡" -- inverted exclamation mark, U+00A1 ISOnum --> +<!ENTITY cent CDATA "¢" -- cent sign, U+00A2 ISOnum --> +<!ENTITY pound CDATA "£" -- pound sign, U+00A3 ISOnum --> +<!ENTITY curren CDATA "¤" -- currency sign, U+00A4 ISOnum --> +<!ENTITY yen CDATA "¥" -- yen sign = yuan sign, U+00A5 ISOnum --> +<!ENTITY brvbar CDATA "¦" -- broken bar = broken vertical bar, + U+00A6 ISOnum --> +<!ENTITY sect CDATA "§" -- section sign, U+00A7 ISOnum --> +<!ENTITY uml CDATA "¨" -- diaeresis = spacing diaeresis, + U+00A8 ISOdia --> +<!ENTITY copy CDATA "©" -- copyright sign, U+00A9 ISOnum --> +<!ENTITY ordf CDATA "ª" -- feminine ordinal indicator, U+00AA ISOnum --> +<!ENTITY laquo CDATA "«" -- left-pointing double angle quotation mark + = left pointing guillemet, U+00AB ISOnum --> +<!ENTITY not CDATA "¬" -- not sign = discretionary hyphen, + U+00AC ISOnum --> +<!ENTITY shy CDATA "­" -- soft hyphen = discretionary hyphen, + U+00AD ISOnum --> +<!ENTITY reg CDATA "®" -- registered sign = registered trade mark sign, + U+00AE ISOnum --> +<!ENTITY macr CDATA "¯" -- macron = spacing macron = overline + = APL overbar, U+00AF ISOdia --> +<!ENTITY deg CDATA "°" -- degree sign, U+00B0 ISOnum --> +<!ENTITY plusmn CDATA "±" -- plus-minus sign = plus-or-minus sign, + U+00B1 ISOnum --> +<!ENTITY sup2 CDATA "²" -- superscript two = superscript digit two + = squared, U+00B2 ISOnum --> +<!ENTITY sup3 CDATA "³" -- superscript three = superscript digit three + = cubed, U+00B3 ISOnum --> +<!ENTITY acute CDATA "´" -- acute accent = spacing acute, + U+00B4 ISOdia --> +<!ENTITY micro CDATA "µ" -- micro sign, U+00B5 ISOnum --> +<!ENTITY para CDATA "¶" -- pilcrow sign = paragraph sign, + U+00B6 ISOnum --> +<!ENTITY middot CDATA "·" -- middle dot = Georgian comma + = Greek middle dot, U+00B7 ISOnum --> +<!ENTITY cedil CDATA "¸" -- cedilla = spacing cedilla, U+00B8 ISOdia --> +<!ENTITY sup1 CDATA "¹" -- superscript one = superscript digit one, + U+00B9 ISOnum --> +<!ENTITY ordm CDATA "º" -- masculine ordinal indicator, + U+00BA ISOnum --> +<!ENTITY raquo CDATA "»" -- right-pointing double angle quotation mark + = right pointing guillemet, U+00BB ISOnum --> +<!ENTITY frac14 CDATA "¼" -- vulgar fraction one quarter + = fraction one quarter, U+00BC ISOnum --> +<!ENTITY frac12 CDATA "½" -- vulgar fraction one half + = fraction one half, U+00BD ISOnum --> +<!ENTITY frac34 CDATA "¾" -- vulgar fraction three quarters + = fraction three quarters, U+00BE ISOnum --> +<!ENTITY iquest CDATA "¿" -- inverted question mark + = turned question mark, U+00BF ISOnum --> +<!ENTITY Agrave CDATA "À" -- latin capital letter A with grave + = latin capital letter A grave, + U+00C0 ISOlat1 --> +<!ENTITY Aacute CDATA "Á" -- latin capital letter A with acute, + U+00C1 ISOlat1 --> +<!ENTITY Acirc CDATA "Â" -- latin capital letter A with circumflex, + U+00C2 ISOlat1 --> +<!ENTITY Atilde CDATA "Ã" -- latin capital letter A with tilde, + U+00C3 ISOlat1 --> +<!ENTITY Auml CDATA "Ä" -- latin capital letter A with diaeresis, + U+00C4 ISOlat1 --> +<!ENTITY Aring CDATA "Å" -- latin capital letter A with ring above + = latin capital letter A ring, + U+00C5 ISOlat1 --> +<!ENTITY AElig CDATA "Æ" -- latin capital letter AE + = latin capital ligature AE, + U+00C6 ISOlat1 --> +<!ENTITY Ccedil CDATA "Ç" -- latin capital letter C with cedilla, + U+00C7 ISOlat1 --> +<!ENTITY Egrave CDATA "È" -- latin capital letter E with grave, + U+00C8 ISOlat1 --> +<!ENTITY Eacute CDATA "É" -- latin capital letter E with acute, + U+00C9 ISOlat1 --> +<!ENTITY Ecirc CDATA "Ê" -- latin capital letter E with circumflex, + U+00CA ISOlat1 --> +<!ENTITY Euml CDATA "Ë" -- latin capital letter E with diaeresis, + U+00CB ISOlat1 --> +<!ENTITY Igrave CDATA "Ì" -- latin capital letter I with grave, + U+00CC ISOlat1 --> +<!ENTITY Iacute CDATA "Í" -- latin capital letter I with acute, + U+00CD ISOlat1 --> +<!ENTITY Icirc CDATA "Î" -- latin capital letter I with circumflex, + U+00CE ISOlat1 --> +<!ENTITY Iuml CDATA "Ï" -- latin capital letter I with diaeresis, + U+00CF ISOlat1 --> +<!ENTITY ETH CDATA "Ð" -- latin capital letter ETH, U+00D0 ISOlat1 --> +<!ENTITY Ntilde CDATA "Ñ" -- latin capital letter N with tilde, + U+00D1 ISOlat1 --> +<!ENTITY Ograve CDATA "Ò" -- latin capital letter O with grave, + U+00D2 ISOlat1 --> +<!ENTITY Oacute CDATA "Ó" -- latin capital letter O with acute, + U+00D3 ISOlat1 --> +<!ENTITY Ocirc CDATA "Ô" -- latin capital letter O with circumflex, + U+00D4 ISOlat1 --> +<!ENTITY Otilde CDATA "Õ" -- latin capital letter O with tilde, + U+00D5 ISOlat1 --> +<!ENTITY Ouml CDATA "Ö" -- latin capital letter O with diaeresis, + U+00D6 ISOlat1 --> +<!ENTITY times CDATA "×" -- multiplication sign, U+00D7 ISOnum --> +<!ENTITY Oslash CDATA "Ø" -- latin capital letter O with stroke + = latin capital letter O slash, + U+00D8 ISOlat1 --> +<!ENTITY Ugrave CDATA "Ù" -- latin capital letter U with grave, + U+00D9 ISOlat1 --> +<!ENTITY Uacute CDATA "Ú" -- latin capital letter U with acute, + U+00DA ISOlat1 --> +<!ENTITY Ucirc CDATA "Û" -- latin capital letter U with circumflex, + U+00DB ISOlat1 --> +<!ENTITY Uuml CDATA "Ü" -- latin capital letter U with diaeresis, + U+00DC ISOlat1 --> +<!ENTITY Yacute CDATA "Ý" -- latin capital letter Y with acute, + U+00DD ISOlat1 --> +<!ENTITY THORN CDATA "Þ" -- latin capital letter THORN, + U+00DE ISOlat1 --> +<!ENTITY szlig CDATA "ß" -- latin small letter sharp s = ess-zed, + U+00DF ISOlat1 --> +<!ENTITY agrave CDATA "à" -- latin small letter a with grave + = latin small letter a grave, + U+00E0 ISOlat1 --> +<!ENTITY aacute CDATA "á" -- latin small letter a with acute, + U+00E1 ISOlat1 --> +<!ENTITY acirc CDATA "â" -- latin small letter a with circumflex, + U+00E2 ISOlat1 --> +<!ENTITY atilde CDATA "ã" -- latin small letter a with tilde, + U+00E3 ISOlat1 --> +<!ENTITY auml CDATA "ä" -- latin small letter a with diaeresis, + U+00E4 ISOlat1 --> +<!ENTITY aring CDATA "å" -- latin small letter a with ring above + = latin small letter a ring, + U+00E5 ISOlat1 --> +<!ENTITY aelig CDATA "æ" -- latin small letter ae + = latin small ligature ae, U+00E6 ISOlat1 --> +<!ENTITY ccedil CDATA "ç" -- latin small letter c with cedilla, + U+00E7 ISOlat1 --> +<!ENTITY egrave CDATA "è" -- latin small letter e with grave, + U+00E8 ISOlat1 --> +<!ENTITY eacute CDATA "é" -- latin small letter e with acute, + U+00E9 ISOlat1 --> +<!ENTITY ecirc CDATA "ê" -- latin small letter e with circumflex, + U+00EA ISOlat1 --> +<!ENTITY euml CDATA "ë" -- latin small letter e with diaeresis, + U+00EB ISOlat1 --> +<!ENTITY igrave CDATA "ì" -- latin small letter i with grave, + U+00EC ISOlat1 --> +<!ENTITY iacute CDATA "í" -- latin small letter i with acute, + U+00ED ISOlat1 --> +<!ENTITY icirc CDATA "î" -- latin small letter i with circumflex, + U+00EE ISOlat1 --> +<!ENTITY iuml CDATA "ï" -- latin small letter i with diaeresis, + U+00EF ISOlat1 --> +<!ENTITY eth CDATA "ð" -- latin small letter eth, U+00F0 ISOlat1 --> +<!ENTITY ntilde CDATA "ñ" -- latin small letter n with tilde, + U+00F1 ISOlat1 --> +<!ENTITY ograve CDATA "ò" -- latin small letter o with grave, + U+00F2 ISOlat1 --> +<!ENTITY oacute CDATA "ó" -- latin small letter o with acute, + U+00F3 ISOlat1 --> +<!ENTITY ocirc CDATA "ô" -- latin small letter o with circumflex, + U+00F4 ISOlat1 --> +<!ENTITY otilde CDATA "õ" -- latin small letter o with tilde, + U+00F5 ISOlat1 --> +<!ENTITY ouml CDATA "ö" -- latin small letter o with diaeresis, + U+00F6 ISOlat1 --> +<!ENTITY divide CDATA "÷" -- division sign, U+00F7 ISOnum --> +<!ENTITY oslash CDATA "ø" -- latin small letter o with stroke, + = latin small letter o slash, + U+00F8 ISOlat1 --> +<!ENTITY ugrave CDATA "ù" -- latin small letter u with grave, + U+00F9 ISOlat1 --> +<!ENTITY uacute CDATA "ú" -- latin small letter u with acute, + U+00FA ISOlat1 --> +<!ENTITY ucirc CDATA "û" -- latin small letter u with circumflex, + U+00FB ISOlat1 --> +<!ENTITY uuml CDATA "ü" -- latin small letter u with diaeresis, + U+00FC ISOlat1 --> +<!ENTITY yacute CDATA "ý" -- latin small letter y with acute, + U+00FD ISOlat1 --> +<!ENTITY thorn CDATA "þ" -- latin small letter thorn with, + U+00FE ISOlat1 --> +<!ENTITY yuml CDATA "ÿ" -- latin small letter y with diaeresis, + U+00FF ISOlat1 -->
\ No newline at end of file diff --git a/tests/dtds/HTMLspec.ent b/tests/dtds/HTMLspec.ent new file mode 100644 index 0000000..29011cc --- /dev/null +++ b/tests/dtds/HTMLspec.ent @@ -0,0 +1,77 @@ +<!-- Special characters for HTML --> + +<!-- Character entity set. Typical invocation: + <!ENTITY % HTMLspecial PUBLIC + "-//W3C//ENTITIES Special//EN//HTML"> + %HTMLspecial; --> + +<!-- Portions (C) International Organization for Standardization 1986: + Permission to copy in any form is granted for use with + conforming SGML systems and applications as defined in + ISO 8879, provided this notice is included in all copies. +--> + +<!-- Relevant ISO entity set is given unless names are newly introduced. + New names (i.e., not in ISO 8879 list) do not clash with any + existing ISO 8879 entity names. ISO 10646 character numbers + are given for each character, in hex. CDATA values are decimal + conversions of the ISO 10646 values and refer to the document + character set. Names are Unicode 2.0 names. + +--> + +<!-- C0 Controls and Basic Latin --> +<!ENTITY quot CDATA """ -- quotation mark = APL quote, + U+0022 ISOnum --> +<!ENTITY amp CDATA "&" -- ampersand, U+0026 ISOnum --> +<!ENTITY lt CDATA "<" -- less-than sign, U+003C ISOnum --> +<!ENTITY gt CDATA ">" -- greater-than sign, U+003E ISOnum --> + +<!-- Latin Extended-A --> +<!ENTITY OElig CDATA "Œ" -- latin capital ligature OE, + U+0152 ISOlat2 --> +<!ENTITY oelig CDATA "œ" -- latin small ligature oe, U+0153 ISOlat2 --> +<!-- ligature is a misnomer, this is a separate character in some languages --> +<!ENTITY Scaron CDATA "Š" -- latin capital letter S with caron, + U+0160 ISOlat2 --> +<!ENTITY scaron CDATA "š" -- latin small letter s with caron, + U+0161 ISOlat2 --> +<!ENTITY Yuml CDATA "Ÿ" -- latin capital letter Y with diaeresis, + U+0178 ISOlat2 --> + +<!-- Spacing Modifier Letters --> +<!ENTITY circ CDATA "ˆ" -- modifier letter circumflex accent, + U+02C6 ISOpub --> +<!ENTITY tilde CDATA "˜" -- small tilde, U+02DC ISOdia --> + +<!-- General Punctuation --> +<!ENTITY ensp CDATA " " -- en space, U+2002 ISOpub --> +<!ENTITY emsp CDATA " " -- em space, U+2003 ISOpub --> +<!ENTITY thinsp CDATA " " -- thin space, U+2009 ISOpub --> +<!ENTITY zwnj CDATA "‌" -- zero width non-joiner, + U+200C NEW RFC 2070 --> +<!ENTITY zwj CDATA "‍" -- zero width joiner, U+200D NEW RFC 2070 --> +<!ENTITY lrm CDATA "‎" -- left-to-right mark, U+200E NEW RFC 2070 --> +<!ENTITY rlm CDATA "‏" -- right-to-left mark, U+200F NEW RFC 2070 --> +<!ENTITY ndash CDATA "–" -- en dash, U+2013 ISOpub --> +<!ENTITY mdash CDATA "—" -- em dash, U+2014 ISOpub --> +<!ENTITY lsquo CDATA "‘" -- left single quotation mark, + U+2018 ISOnum --> +<!ENTITY rsquo CDATA "’" -- right single quotation mark, + U+2019 ISOnum --> +<!ENTITY sbquo CDATA "‚" -- single low-9 quotation mark, U+201A NEW --> +<!ENTITY ldquo CDATA "“" -- left double quotation mark, + U+201C ISOnum --> +<!ENTITY rdquo CDATA "”" -- right double quotation mark, + U+201D ISOnum --> +<!ENTITY bdquo CDATA "„" -- double low-9 quotation mark, U+201E NEW --> +<!ENTITY dagger CDATA "†" -- dagger, U+2020 ISOpub --> +<!ENTITY Dagger CDATA "‡" -- double dagger, U+2021 ISOpub --> +<!ENTITY permil CDATA "‰" -- per mille sign, U+2030 ISOtech --> +<!ENTITY lsaquo CDATA "‹" -- single left-pointing angle quotation mark, + U+2039 ISO proposed --> +<!-- lsaquo is proposed but not yet ISO standardized --> +<!ENTITY rsaquo CDATA "›" -- single right-pointing angle quotation mark, + U+203A ISO proposed --> +<!-- rsaquo is proposed but not yet ISO standardized --> +<!ENTITY euro CDATA "€" -- euro sign, U+20AC NEW -->
\ No newline at end of file diff --git a/tests/dtds/HTMLsym.ent b/tests/dtds/HTMLsym.ent new file mode 100644 index 0000000..2a6250b --- /dev/null +++ b/tests/dtds/HTMLsym.ent @@ -0,0 +1,241 @@ +<!-- Mathematical, Greek and Symbolic characters for HTML --> + +<!-- Character entity set. Typical invocation: + <!ENTITY % HTMLsymbol PUBLIC + "-//W3C//ENTITIES Symbolic//EN//HTML"> + %HTMLsymbol; --> + +<!-- Portions (C) International Organization for Standardization 1986: + Permission to copy in any form is granted for use with + conforming SGML systems and applications as defined in + ISO 8879, provided this notice is included in all copies. +--> + +<!-- Relevant ISO entity set is given unless names are newly introduced. + New names (i.e., not in ISO 8879 list) do not clash with any + existing ISO 8879 entity names. ISO 10646 character numbers + are given for each character, in hex. CDATA values are decimal + conversions of the ISO 10646 values and refer to the document + character set. Names are Unicode 2.0 names. + +--> + +<!-- Latin Extended-B --> +<!ENTITY fnof CDATA "ƒ" -- latin small f with hook = function + = florin, U+0192 ISOtech --> + +<!-- Greek --> +<!ENTITY Alpha CDATA "Α" -- greek capital letter alpha, U+0391 --> +<!ENTITY Beta CDATA "Β" -- greek capital letter beta, U+0392 --> +<!ENTITY Gamma CDATA "Γ" -- greek capital letter gamma, + U+0393 ISOgrk3 --> +<!ENTITY Delta CDATA "Δ" -- greek capital letter delta, + U+0394 ISOgrk3 --> +<!ENTITY Epsilon CDATA "Ε" -- greek capital letter epsilon, U+0395 --> +<!ENTITY Zeta CDATA "Ζ" -- greek capital letter zeta, U+0396 --> +<!ENTITY Eta CDATA "Η" -- greek capital letter eta, U+0397 --> +<!ENTITY Theta CDATA "Θ" -- greek capital letter theta, + U+0398 ISOgrk3 --> +<!ENTITY Iota CDATA "Ι" -- greek capital letter iota, U+0399 --> +<!ENTITY Kappa CDATA "Κ" -- greek capital letter kappa, U+039A --> +<!ENTITY Lambda CDATA "Λ" -- greek capital letter lambda, + U+039B ISOgrk3 --> +<!ENTITY Mu CDATA "Μ" -- greek capital letter mu, U+039C --> +<!ENTITY Nu CDATA "Ν" -- greek capital letter nu, U+039D --> +<!ENTITY Xi CDATA "Ξ" -- greek capital letter xi, U+039E ISOgrk3 --> +<!ENTITY Omicron CDATA "Ο" -- greek capital letter omicron, U+039F --> +<!ENTITY Pi CDATA "Π" -- greek capital letter pi, U+03A0 ISOgrk3 --> +<!ENTITY Rho CDATA "Ρ" -- greek capital letter rho, U+03A1 --> +<!-- there is no Sigmaf, and no U+03A2 character either --> +<!ENTITY Sigma CDATA "Σ" -- greek capital letter sigma, + U+03A3 ISOgrk3 --> +<!ENTITY Tau CDATA "Τ" -- greek capital letter tau, U+03A4 --> +<!ENTITY Upsilon CDATA "Υ" -- greek capital letter upsilon, + U+03A5 ISOgrk3 --> +<!ENTITY Phi CDATA "Φ" -- greek capital letter phi, + U+03A6 ISOgrk3 --> +<!ENTITY Chi CDATA "Χ" -- greek capital letter chi, U+03A7 --> +<!ENTITY Psi CDATA "Ψ" -- greek capital letter psi, + U+03A8 ISOgrk3 --> +<!ENTITY Omega CDATA "Ω" -- greek capital letter omega, + U+03A9 ISOgrk3 --> + +<!ENTITY alpha CDATA "α" -- greek small letter alpha, + U+03B1 ISOgrk3 --> +<!ENTITY beta CDATA "β" -- greek small letter beta, U+03B2 ISOgrk3 --> +<!ENTITY gamma CDATA "γ" -- greek small letter gamma, + U+03B3 ISOgrk3 --> +<!ENTITY delta CDATA "δ" -- greek small letter delta, + U+03B4 ISOgrk3 --> +<!ENTITY epsilon CDATA "ε" -- greek small letter epsilon, + U+03B5 ISOgrk3 --> +<!ENTITY zeta CDATA "ζ" -- greek small letter zeta, U+03B6 ISOgrk3 --> +<!ENTITY eta CDATA "η" -- greek small letter eta, U+03B7 ISOgrk3 --> +<!ENTITY theta CDATA "θ" -- greek small letter theta, + U+03B8 ISOgrk3 --> +<!ENTITY iota CDATA "ι" -- greek small letter iota, U+03B9 ISOgrk3 --> +<!ENTITY kappa CDATA "κ" -- greek small letter kappa, + U+03BA ISOgrk3 --> +<!ENTITY lambda CDATA "λ" -- greek small letter lambda, + U+03BB ISOgrk3 --> +<!ENTITY mu CDATA "μ" -- greek small letter mu, U+03BC ISOgrk3 --> +<!ENTITY nu CDATA "ν" -- greek small letter nu, U+03BD ISOgrk3 --> +<!ENTITY xi CDATA "ξ" -- greek small letter xi, U+03BE ISOgrk3 --> +<!ENTITY omicron CDATA "ο" -- greek small letter omicron, U+03BF NEW --> +<!ENTITY pi CDATA "π" -- greek small letter pi, U+03C0 ISOgrk3 --> +<!ENTITY rho CDATA "ρ" -- greek small letter rho, U+03C1 ISOgrk3 --> +<!ENTITY sigmaf CDATA "ς" -- greek small letter final sigma, + U+03C2 ISOgrk3 --> +<!ENTITY sigma CDATA "σ" -- greek small letter sigma, + U+03C3 ISOgrk3 --> +<!ENTITY tau CDATA "τ" -- greek small letter tau, U+03C4 ISOgrk3 --> +<!ENTITY upsilon CDATA "υ" -- greek small letter upsilon, + U+03C5 ISOgrk3 --> +<!ENTITY phi CDATA "φ" -- greek small letter phi, U+03C6 ISOgrk3 --> +<!ENTITY chi CDATA "χ" -- greek small letter chi, U+03C7 ISOgrk3 --> +<!ENTITY psi CDATA "ψ" -- greek small letter psi, U+03C8 ISOgrk3 --> +<!ENTITY omega CDATA "ω" -- greek small letter omega, + U+03C9 ISOgrk3 --> +<!ENTITY thetasym CDATA "ϑ" -- greek small letter theta symbol, + U+03D1 NEW --> +<!ENTITY upsih CDATA "ϒ" -- greek upsilon with hook symbol, + U+03D2 NEW --> +<!ENTITY piv CDATA "ϖ" -- greek pi symbol, U+03D6 ISOgrk3 --> + +<!-- General Punctuation --> +<!ENTITY bull CDATA "•" -- bullet = black small circle, + U+2022 ISOpub --> +<!-- bullet is NOT the same as bullet operator, U+2219 --> +<!ENTITY hellip CDATA "…" -- horizontal ellipsis = three dot leader, + U+2026 ISOpub --> +<!ENTITY prime CDATA "′" -- prime = minutes = feet, U+2032 ISOtech --> +<!ENTITY Prime CDATA "″" -- double prime = seconds = inches, + U+2033 ISOtech --> +<!ENTITY oline CDATA "‾" -- overline = spacing overscore, + U+203E NEW --> +<!ENTITY frasl CDATA "⁄" -- fraction slash, U+2044 NEW --> + +<!-- Letterlike Symbols --> +<!ENTITY weierp CDATA "℘" -- script capital P = power set + = Weierstrass p, U+2118 ISOamso --> +<!ENTITY image CDATA "ℑ" -- blackletter capital I = imaginary part, + U+2111 ISOamso --> +<!ENTITY real CDATA "ℜ" -- blackletter capital R = real part symbol, + U+211C ISOamso --> +<!ENTITY trade CDATA "™" -- trade mark sign, U+2122 ISOnum --> +<!ENTITY alefsym CDATA "ℵ" -- alef symbol = first transfinite cardinal, + U+2135 NEW --> +<!-- alef symbol is NOT the same as hebrew letter alef, + U+05D0 although the same glyph could be used to depict both characters --> + +<!-- Arrows --> +<!ENTITY larr CDATA "←" -- leftwards arrow, U+2190 ISOnum --> +<!ENTITY uarr CDATA "↑" -- upwards arrow, U+2191 ISOnum--> +<!ENTITY rarr CDATA "→" -- rightwards arrow, U+2192 ISOnum --> +<!ENTITY darr CDATA "↓" -- downwards arrow, U+2193 ISOnum --> +<!ENTITY harr CDATA "↔" -- left right arrow, U+2194 ISOamsa --> +<!ENTITY crarr CDATA "↵" -- downwards arrow with corner leftwards + = carriage return, U+21B5 NEW --> +<!ENTITY lArr CDATA "⇐" -- leftwards double arrow, U+21D0 ISOtech --> +<!-- Unicode does not say that lArr is the same as the 'is implied by' arrow + but also does not have any other character for that function. So ? lArr can + be used for 'is implied by' as ISOtech suggests --> +<!ENTITY uArr CDATA "⇑" -- upwards double arrow, U+21D1 ISOamsa --> +<!ENTITY rArr CDATA "⇒" -- rightwards double arrow, + U+21D2 ISOtech --> +<!-- Unicode does not say this is the 'implies' character but does not have + another character with this function so ? + rArr can be used for 'implies' as ISOtech suggests --> +<!ENTITY dArr CDATA "⇓" -- downwards double arrow, U+21D3 ISOamsa --> +<!ENTITY hArr CDATA "⇔" -- left right double arrow, + U+21D4 ISOamsa --> + +<!-- Mathematical Operators --> +<!ENTITY forall CDATA "∀" -- for all, U+2200 ISOtech --> +<!ENTITY part CDATA "∂" -- partial differential, U+2202 ISOtech --> +<!ENTITY exist CDATA "∃" -- there exists, U+2203 ISOtech --> +<!ENTITY empty CDATA "∅" -- empty set = null set = diameter, + U+2205 ISOamso --> +<!ENTITY nabla CDATA "∇" -- nabla = backward difference, + U+2207 ISOtech --> +<!ENTITY isin CDATA "∈" -- element of, U+2208 ISOtech --> +<!ENTITY notin CDATA "∉" -- not an element of, U+2209 ISOtech --> +<!ENTITY ni CDATA "∋" -- contains as member, U+220B ISOtech --> +<!-- should there be a more memorable name than 'ni'? --> +<!ENTITY prod CDATA "∏" -- n-ary product = product sign, + U+220F ISOamsb --> +<!-- prod is NOT the same character as U+03A0 'greek capital letter pi' though + the same glyph might be used for both --> +<!ENTITY sum CDATA "∑" -- n-ary sumation, U+2211 ISOamsb --> +<!-- sum is NOT the same character as U+03A3 'greek capital letter sigma' + though the same glyph might be used for both --> +<!ENTITY minus CDATA "−" -- minus sign, U+2212 ISOtech --> +<!ENTITY lowast CDATA "∗" -- asterisk operator, U+2217 ISOtech --> +<!ENTITY radic CDATA "√" -- square root = radical sign, + U+221A ISOtech --> +<!ENTITY prop CDATA "∝" -- proportional to, U+221D ISOtech --> +<!ENTITY infin CDATA "∞" -- infinity, U+221E ISOtech --> +<!ENTITY ang CDATA "∠" -- angle, U+2220 ISOamso --> +<!ENTITY and CDATA "∧" -- logical and = wedge, U+2227 ISOtech --> +<!ENTITY or CDATA "∨" -- logical or = vee, U+2228 ISOtech --> +<!ENTITY cap CDATA "∩" -- intersection = cap, U+2229 ISOtech --> +<!ENTITY cup CDATA "∪" -- union = cup, U+222A ISOtech --> +<!ENTITY int CDATA "∫" -- integral, U+222B ISOtech --> +<!ENTITY there4 CDATA "∴" -- therefore, U+2234 ISOtech --> +<!ENTITY sim CDATA "∼" -- tilde operator = varies with = similar to, + U+223C ISOtech --> +<!-- tilde operator is NOT the same character as the tilde, U+007E, + although the same glyph might be used to represent both --> +<!ENTITY cong CDATA "≅" -- approximately equal to, U+2245 ISOtech --> +<!ENTITY asymp CDATA "≈" -- almost equal to = asymptotic to, + U+2248 ISOamsr --> +<!ENTITY ne CDATA "≠" -- not equal to, U+2260 ISOtech --> +<!ENTITY equiv CDATA "≡" -- identical to, U+2261 ISOtech --> +<!ENTITY le CDATA "≤" -- less-than or equal to, U+2264 ISOtech --> +<!ENTITY ge CDATA "≥" -- greater-than or equal to, + U+2265 ISOtech --> +<!ENTITY sub CDATA "⊂" -- subset of, U+2282 ISOtech --> +<!ENTITY sup CDATA "⊃" -- superset of, U+2283 ISOtech --> +<!-- note that nsup, 'not a superset of, U+2283' is not covered by the Symbol + font encoding and is not included. Should it be, for symmetry? + It is in ISOamsn --> +<!ENTITY nsub CDATA "⊄" -- not a subset of, U+2284 ISOamsn --> +<!ENTITY sube CDATA "⊆" -- subset of or equal to, U+2286 ISOtech --> +<!ENTITY supe CDATA "⊇" -- superset of or equal to, + U+2287 ISOtech --> +<!ENTITY oplus CDATA "⊕" -- circled plus = direct sum, + U+2295 ISOamsb --> +<!ENTITY otimes CDATA "⊗" -- circled times = vector product, + U+2297 ISOamsb --> +<!ENTITY perp CDATA "⊥" -- up tack = orthogonal to = perpendicular, + U+22A5 ISOtech --> +<!ENTITY sdot CDATA "⋅" -- dot operator, U+22C5 ISOamsb --> +<!-- dot operator is NOT the same character as U+00B7 middle dot --> + +<!-- Miscellaneous Technical --> +<!ENTITY lceil CDATA "⌈" -- left ceiling = apl upstile, + U+2308 ISOamsc --> +<!ENTITY rceil CDATA "⌉" -- right ceiling, U+2309 ISOamsc --> +<!ENTITY lfloor CDATA "⌊" -- left floor = apl downstile, + U+230A ISOamsc --> +<!ENTITY rfloor CDATA "⌋" -- right floor, U+230B ISOamsc --> +<!ENTITY lang CDATA "〈" -- left-pointing angle bracket = bra, + U+2329 ISOtech --> +<!-- lang is NOT the same character as U+003C 'less than' + or U+2039 'single left-pointing angle quotation mark' --> +<!ENTITY rang CDATA "〉" -- right-pointing angle bracket = ket, + U+232A ISOtech --> +<!-- rang is NOT the same character as U+003E 'greater than' + or U+203A 'single right-pointing angle quotation mark' --> + +<!-- Geometric Shapes --> +<!ENTITY loz CDATA "◊" -- lozenge, U+25CA ISOpub --> + +<!-- Miscellaneous Symbols --> +<!ENTITY spades CDATA "♠" -- black spade suit, U+2660 ISOpub --> +<!-- black here seems to mean filled as opposed to hollow --> +<!ENTITY clubs CDATA "♣" -- black club suit = shamrock, + U+2663 ISOpub --> +<!ENTITY hearts CDATA "♥" -- black heart suit = valentine, + U+2665 ISOpub --> +<!ENTITY diams CDATA "♦" -- black diamond suit, U+2666 ISOpub -->
\ No newline at end of file |