diff options
Diffstat (limited to '')
-rw-r--r-- | test/c1.html | 63 |
1 files changed, 63 insertions, 0 deletions
diff --git a/test/c1.html b/test/c1.html new file mode 100644 index 0000000..6ec70aa --- /dev/null +++ b/test/c1.html @@ -0,0 +1,63 @@ +<!DOCTYPE html PUBLIC "-//IETF//DTD HTML 3.0//EN"> +<HTML> +<HEAD> +<TITLE>Test of invalid NCRs 128-159</TITLE> +</HEAD> +<BODY><H2>Test of invalid NCRs 128-159</H2> +<P> +Authoring tools on MS Windows, in particular MS FrontPage ("WYSIWYG" HTML editor), +generate invalid <DFN>Numerical Character References</DFN> for characters +commonly found in positions 128...159 (0x80...0x9f) in Windows fonts. Although +these are valid codepoints for <em>windows-1252</em> (and other +windows-xxxx) charsets, valid NCRs always refer to the document character set +in the SGML sense, not to the character encoding scheme (or charset). For HTML, +the SGML document character set is fixed, it is always a subset of Unicode +(or ISO 10646). In Unicode and its iso-8859-1 subset, values 128...159 are +C1 control characters, they must not appear in HTML. Valid NCRs for the +intended characters use Unicode values greater than 256. +<p> +Lynx tries to interpret some of the invalid codes, by assuming that they are +windows-1252 codepoints. +<PRE> + +You may want to press '\' to view the source of this test. + +<em>Code invalid NCR <!-- --> <tab id=c>valid NCR, description</em> +<em> normal in ALT <a id=table></a> </em> + +0x80 € <IMG SRC=X ALT="€"> <tab to=c>€ #EURO SIGN +0x81  <IMG SRC=X ALT=""> <!----> #NOT USED +0x82 ‚ <IMG SRC=X ALT="‚"> <tab to=c>‚ #SINGLE LOW-9 QUOTATION MARK +0x83 ƒ <IMG SRC=X ALT="ƒ"> <tab to=c>ƒ #LATIN SMALL LETTER F WITH HOOK +0x84 „ <IMG SRC=X ALT="„"> <tab to=c>„ #DOUBLE LOW-9 QUOTATION MARK +0x85 … <IMG SRC=X ALT="…"> <tab to=c>… #HORIZONTAL ELLIPSIS +0x86 † <IMG SRC=X ALT="†"> <tab to=c>† #DAGGER +0x87 ‡ <IMG SRC=X ALT="‡"> <tab to=c>‡ #DOUBLE DAGGER +0x88 ˆ <IMG SRC=X ALT="ˆ"> <tab to=c>ˆ #MODIFIER LETTER CIRCUMFLEX ACCENT +0x89 ‰ <IMG SRC=X ALT="‰"> <tab to=c>‰ #PER MILLE SIGN +0x8a Š <IMG SRC=X ALT="Š"> <tab to=c>Š #LATIN CAPITAL LETTER S WITH CARON +0x8b ‹ <IMG SRC=X ALT="‹"> <tab to=c>‹ #SINGLE LEFT-POINTING ANGLE QUOTATION MARK +0x8c Œ <IMG SRC=X ALT="Œ"> <tab to=c>Œ #LATIN CAPITAL LIGATURE OE +0x8d  <IMG SRC=X ALT=""> <!----> #NOT USED +0x8e Ž <IMG SRC=X ALT="Ž"> <!--Ž--> #NOT USED +0x8f  <IMG SRC=X ALT=""> <!----> #NOT USED +0x90  <IMG SRC=X ALT=""> <!----> #NOT USED +0x91 ‘ <IMG SRC=X ALT="‘"> <tab to=c>‘ #LEFT SINGLE QUOTATION MARK +0x92 ’ <IMG SRC=X ALT="’"> <tab to=c>’ #RIGHT SINGLE QUOTATION MARK +0x93 “ <IMG SRC=X ALT="“"> <tab to=c>“ #LEFT DOUBLE QUOTATION MARK +0x94 ” <IMG SRC=X ALT="”"> <tab to=c>” #RIGHT DOUBLE QUOTATION MARK +0x95 • <IMG SRC=X ALT="•"> <tab to=c>• #BULLET +0x96 – <IMG SRC=X ALT="–"> <tab to=c>– #EN DASH +0x97 — <IMG SRC=X ALT="—"> <tab to=c>— #EM DASH +0x98 ˜ <IMG SRC=X ALT="˜"> <tab to=c>˜ #SMALL TILDE +0x99 ™ <IMG SRC=X ALT="™"> <tab to=c>™ #TRADE MARK SIGN +0x9a š <IMG SRC=X ALT="š"> <tab to=c>š #LATIN SMALL LETTER S WITH CARON +0x9b › <IMG SRC=X ALT="›"> <tab to=c>› #SINGLE RIGHT-POINTING ANGLE QUOTATION MARK +0x9c œ <IMG SRC=X ALT="œ"> <tab to=c>œ #LATIN SMALL LIGATURE OE +0x9d  <IMG SRC=X ALT=""> <!----> #NOT USED +0x9e ž <IMG SRC=X ALT="ž"> <!--ž--> #NOT USED +0x9f Ÿ <IMG SRC=X ALT="Ÿ"> <tab to=c>Ÿ #LATIN CAPITAL LETTER Y WITH DIAERESIS + +</PRE> +</BODY> +</HTML> |