From 510ed32cfbffa6148018869f5ade416505a450b3 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Mon, 15 Apr 2024 22:21:21 +0200 Subject: Adding upstream version 2.9.0rel.0. Signed-off-by: Daniel Baumann --- test/c1.html | 64 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) create mode 100644 test/c1.html (limited to 'test/c1.html') diff --git a/test/c1.html b/test/c1.html new file mode 100644 index 0000000..c8d1edd --- /dev/null +++ b/test/c1.html @@ -0,0 +1,64 @@ + + + +Test of invalid NCRs 128-159 + + +

Test of invalid NCRs 128-159

+

+Authoring tools on MS Windows, in particular MS FrontPage ("WYSIWYG" HTML editor), +generate invalid Numerical Character References for characters +commonly found in positions 128...159 (0x80...0x9f) in Windows fonts. Although +these are valid codepoints for windows-1252 (and other +windows-xxxx) charsets, valid NCRs always refer to the document character set +in the SGML sense, not to the character encoding scheme (or charset). For HTML, +the SGML document character set is fixed, it is always a subset of Unicode +(or ISO 10646). In Unicode and its iso-8859-1 subset, values 128...159 are +C1 control characters, they must not appear in HTML. Valid NCRs for the +intended characters use Unicode values greater than 256. +

+Lynx tries to interpret some of the invalid codes, by assuming that they are +windows-1252 codepoints. +

 
+ 
+You may want to press '\' to view the source of this test. 
+ 
+Code      invalid NCR     valid NCR, description 
+        normal   in ALT  				 
+                             
+0x80    €	€ €	#EURO SIGN 
+0x81    	 	#NOT USED 
+0x82    ‚	‚ ‚	#SINGLE LOW-9 QUOTATION MARK 
+0x83    ƒ	ƒ ƒ	#LATIN SMALL LETTER F WITH HOOK 
+0x84    „	„ „	#DOUBLE LOW-9 QUOTATION MARK 
+0x85    …	… …	#HORIZONTAL ELLIPSIS 
+0x86    †	† †	#DAGGER 
+0x87    ‡	‡ ‡	#DOUBLE DAGGER 
+0x88    ˆ	ˆ ˆ	#MODIFIER LETTER CIRCUMFLEX ACCENT 
+0x89    ‰	‰ ‰	#PER MILLE SIGN 
+0x8a    Š	Š Š	#LATIN CAPITAL LETTER S WITH CARON 
+0x8b    ‹	‹ ‹	#SINGLE LEFT-POINTING ANGLE QUOTATION MARK 
+0x8c    Œ	Œ Œ	#LATIN CAPITAL LIGATURE OE 
+0x8d    	 	#NOT USED 
+0x8e    Ž	Ž 	#NOT USED 
+0x8f    	 	#NOT USED 
+0x90    	 	#NOT USED 
+0x91    ‘	‘ ‘	#LEFT SINGLE QUOTATION MARK 
+0x92    ’	’ ’	#RIGHT SINGLE QUOTATION MARK 
+0x93    “	“ “	#LEFT DOUBLE QUOTATION MARK 
+0x94    ”	” ”	#RIGHT DOUBLE QUOTATION MARK 
+0x95    •	• •	#BULLET 
+0x96    –	– –	#EN DASH 
+0x97    —	— —	#EM DASH 
+0x98    ˜	˜ ˜	#SMALL TILDE 
+0x99    ™	™ ™	#TRADE MARK SIGN 
+0x9a    š	š š	#LATIN SMALL LETTER S WITH CARON 
+0x9b    ›	› ›	#SINGLE RIGHT-POINTING ANGLE QUOTATION MARK 
+0x9c    œ	œ œ	#LATIN SMALL LIGATURE OE 
+0x9d    	 	#NOT USED 
+0x9e    ž	ž 	#NOT USED 
+0x9f    Ÿ	Ÿ Ÿ	#LATIN CAPITAL LETTER Y WITH DIAERESIS 
+ 
+
+ + -- cgit v1.2.3