summaryrefslogtreecommitdiffstats
path: root/test
diff options
context:
space:
mode:
Diffstat (limited to 'test')
-rw-r--r--test/ALT88592.html172
-rw-r--r--test/ISO_LATIN1_test.html84
-rw-r--r--test/README.txt8
-rw-r--r--test/TestComment.html51
-rw-r--r--test/X1
-rw-r--r--test/bad-html.html47
-rw-r--r--test/c1.html64
-rw-r--r--test/circle.html15
-rw-r--r--test/cp-1252.html179
-rw-r--r--test/cp-1252a.html184
-rw-r--r--test/idna-tr46.html55
-rw-r--r--test/image.jpgbin0 -> 1287 bytes
-rw-r--r--test/iso-8859-1.html242
-rw-r--r--test/iso-8859-1a.html276
-rw-r--r--test/iso-8859-2.html175
-rw-r--r--test/iso-8859-2a.html209
-rw-r--r--test/koi8-r.html322
-rw-r--r--test/nobody1
-rw-r--r--test/quickbrown.html104
-rw-r--r--test/raw8bit.html39
-rw-r--r--test/sgml.html1082
-rw-r--r--test/spaces.html38
-rw-r--r--test/special_urls.html23
-rw-r--r--test/square.html15
-rw-r--r--test/tabtest.html40
-rw-r--r--test/tags.html220
-rw-r--r--test/test-styles.html107
-rw-r--r--test/triangle.html15
-rw-r--r--test/unicode.html916
-rw-r--r--test/utf-8-demo.html217
30 files changed, 4901 insertions, 0 deletions
diff --git a/test/ALT88592.html b/test/ALT88592.html
new file mode 100644
index 0000000..59d86f9
--- /dev/null
+++ b/test/ALT88592.html
@@ -0,0 +1,172 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
+<HTML>
+<HEAD>
+<TITLE>Character table modified and enhanced for iso8859-2 - ALT test</TITLE>
+<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=iso-8859-2">
+<LINK REV="made" HREF="mailto:lynx-dev@nongnu.org">
+<LINK REV="owner" HREF="http://mail.gnu.org/mailman/listinfo/lynx-dev/">
+<!-- A BASE tag for the SRC attributes of dummy images.
+ They should be inaccessible so that the ALT text will be shown in graphical browsers.
+ Use file: to save network resources. -->
+<BASE HREF="file://localhost/this.path.intentionally.invalid/">
+ <meta name="viewport" content="width=device-width, initial-scale=1">
+</HEAD>
+
+<BODY>
+
+<H1 ALIGN=center>iso8859-2 plus table - ALT test</H1>
+
+<PRE>
+Description Code Entity name
+=================================== ============ ==============
+quotation mark <IMG SRC=X ALT=" &amp;#34; --> &#34; &amp;quot; --> &quot;">
+ampersand <IMG SRC=X ALT=" &amp;#38; --> &#38; &amp;amp; --> &amp;">
+less-than sign <IMG SRC=X ALT=" &amp;#60; --> &#60; &amp;lt; --> &lt;">
+greater-than sign <IMG SRC=X ALT=" &amp;#62; --> &#62; &amp;gt; --> &gt;">
+
+Description Char Code Entity name
+=================================== ==== ============ ==============
+non-breaking space <IMG SRC=X ALT="   &amp;#160; --> &#160; &amp;nbsp; --> &nbsp;">
+capital A, ogonek <IMG SRC=X ALT=" ¡ &amp;#260; --> &#260; &amp;Aogon; --> &Aogon;">
+breve <IMG SRC=X ALT=" {¢} {&amp;#728;}-->{&#728;} {&amp;breve;} -->{&breve;}">
+capital L, stroke <IMG SRC=X ALT=" £ &amp;#321; --> &#321; &amp;Lstrok; --> &Lstrok;">
+general currency sign <IMG SRC=X ALT=" ¤ &amp;#164; --> &#164; &amp;curren; --> &curren;">
+capital L, caron <IMG SRC=X ALT=" ¥ &amp;#317; --> &#317; &amp;Lcaron; --> &Lcaron;">
+capital S, acute accent <IMG SRC=X ALT=" ¦ &amp;#346; --> &#346; &amp;Sacute; --> &Sacute;">
+section sign <IMG SRC=X ALT=" § &amp;#167; --> &#167; &amp;sect; --> &sect;">
+umlaut (dieresis) <IMG SRC=X ALT=" ¨ &amp;#168; --> &#168; &amp;uml; --> &uml;">
+ <IMG SRC=X ALT=" &amp;die; --> &die;">
+capital S, caron <IMG SRC=X ALT=" © &amp;#352; --> &#352; &amp;Scaron; --> &Scaron;">
+capital S, cedilla <IMG SRC=X ALT=" ª &amp;#350; --> &#350; &amp;Scedil; --> &Scedil;">
+capital T, caron <IMG SRC=X ALT=" « &amp;#356; --> &#356; &amp;Tcaron; --> &Tcaron;">
+capital Z, acute accent <IMG SRC=X ALT=" ¬ &amp;#377; --> &#377; &amp;Zacute; --> &Zacute;">
+soft hyphen <IMG SRC=X ALT=" [­] [&amp;#173;]-->[&#173;] [&amp;shy;] -->[&shy;]">
+capital Z, caron <IMG SRC=X ALT=" ® &amp;#381; --> &#381; &amp;Zcaron; --> &Zcaron;">
+capital Z, dot above <IMG SRC=X ALT=" ¯ &amp;#379; --> &#379; &amp;Zdot; --> &Zdot;">
+degree sign <IMG SRC=X ALT=" ° &amp;#176; --> &#176; &amp;deg; --> &deg;">
+small a, ogonek <IMG SRC=X ALT=" ± &amp;#261; --> &#261; &amp;aogon; --> &aogon;">
+ogonek <IMG SRC=X ALT=" {²} {&amp;#731;}-->{&#731;} {&amp;ogon;} -->{&ogon;}">
+small l, stroke <IMG SRC=X ALT=" ³ &amp;#322; --> &#322; &amp;lstrok; --> &lstrok;">
+acute accent <IMG SRC=X ALT=" ´ &amp;#180; --> &#180; &amp;acute; --> &acute;">
+small l, caron <IMG SRC=X ALT=" µ &amp;#318; --> &#318; &amp;lcaron; --> &lcaron;">
+small s, acute accent <IMG SRC=X ALT=" ¶ &amp;#347; --> &#347; &amp;sacute; --> &sacute;">
+caron <IMG SRC=X ALT=" {·} {&amp;#711;}-->{&#711;} {&amp;caron;} -->{&caron;}">
+cedilla <IMG SRC=X ALT=" ¸ &amp;#184; --> &#184; &amp;cedil; --> &cedil;">
+small s, caron <IMG SRC=X ALT=" ¹ &amp;#353; --> &#353; &amp;scaron; --> &scaron;">
+small s, cedilla <IMG SRC=X ALT=" º &amp;#351; --> &#351; &amp;scedil; --> &scedil;">
+small t, caron <IMG SRC=X ALT=" » &amp;#357; --> &#357; &amp;tcaron; --> &tcaron;">
+small z, acute accent <IMG SRC=X ALT=" ¼ &amp;#378; --> &#378; &amp;zacute; --> &zacute;">
+double acute accent <IMG SRC=X ALT=" {½} {&amp;#733;}-->{&#733;} {&amp;dblac;} -->{&dblac;}">
+small z, caron <IMG SRC=X ALT=" ¾ &amp;#382; --> &#382; &amp;zcaron; --> &zcaron;">
+small z, dot above <IMG SRC=X ALT=" ¿ &amp;#380; --> &#380; &amp;zdot; --> &zdot; ">
+capital R, acute accent <IMG SRC=X ALT=" À &amp;#340; --> &#340; &amp;Racute; --> &Racute;">
+capital A, acute accent <IMG SRC=X ALT=" Á &amp;#193; --> &#193; &amp;Aacute; --> &Aacute;">
+capital A, circumflex accent <IMG SRC=X ALT=" Â &amp;#194; --> &#194; &amp;Acirc; --> &Acirc;">
+capital A, breve <IMG SRC=X ALT=" Ã &amp;#258; --> &#258; &amp;Abreve; --> &Abreve;">
+capital A, dieresis or umlaut mark <IMG SRC=X ALT=" Ä &amp;#196; --> &#196; &amp;Auml; --> &Auml;">
+capital L, acute accent <IMG SRC=X ALT=" Å &amp;#313; --> &#313; &amp;Lacute; --> &Lacute;">
+capital C, acute accent <IMG SRC=X ALT=" Æ &amp;#262; --> &#262; &amp;Cacute; --> &Cacute;">
+capital C, cedilla <IMG SRC=X ALT=" Ç &amp;#199; --> &#199; &amp;Ccedil; --> &Ccedil;">
+capital C, caron <IMG SRC=X ALT=" È &amp;#268; --> &#268; &amp;Ccaron; --> &Ccaron;">
+capital E, acute accent <IMG SRC=X ALT=" É &amp;#201; --> &#201; &amp;Eacute; --> &Eacute;">
+capital E, ogonek <IMG SRC=X ALT=" Ê &amp;#280; --> &#280; &amp;Eogon; --> &Eogon;">
+capital E, dieresis or umlaut mark <IMG SRC=X ALT=" Ë &amp;#203; --> &#203; &amp;Euml; --> &Euml;">
+capital E, caron <IMG SRC=X ALT=" Ì &amp;#282; --> &#282; &amp;Ecaron; --> &Ecaron;">
+capital I, acute accent <IMG SRC=X ALT=" Í &amp;#205; --> &#205; &amp;Iacute; --> &Iacute;">
+capital I, circumflex accent <IMG SRC=X ALT=" Î &amp;#206; --> &#206; &amp;Icirc; --> &Icirc;">
+capital D, caron <IMG SRC=X ALT=" Ï &amp;#270; --> &#270; &amp;Dcaron; --> &Dcaron;">
+capital D, stroke <IMG SRC=X ALT=" Ð &amp;#272; --> &#272; &amp;Dstrok; --> &Dstrok;">
+capital Eth, Icelandic <IMG SRC=X ALT=" N/A &amp;#208; --> &#208; &amp;ETH; --> &ETH;">
+capital N, acute accent <IMG SRC=X ALT=" Ñ &amp;#323; --> &#323; &amp;Nacute; --> &Nacute;">
+capital N, caron <IMG SRC=X ALT=" Ò &amp;#327; --> &#327; &amp;Ncaron; --> &Ncaron;">
+capital O, acute accent <IMG SRC=X ALT=" Ó &amp;#211; --> &#211; &amp;Oacute; --> &Oacute;">
+capital O, circumflex accent <IMG SRC=X ALT=" Ô &amp;#212; --> &#212; &amp;Ocirc; --> &Ocirc;">
+capital O, double acute accent <IMG SRC=X ALT=" Õ &amp;#368; --> &#368; &amp;Odblac; --> &Odblac;">
+capital O, dieresis or umlaut mark <IMG SRC=X ALT=" Ö &amp;#214; --> &#214; &amp;Ouml; --> &Ouml;">
+multiply sign <IMG SRC=X ALT=" × &amp;#215; --> &#215; &amp;times; --> &times;">
+capital R, caron <IMG SRC=X ALT=" Ø &amp;#344; --> &#344; &amp;Rcaron; --> &Rcaron;">
+capital U, ring <IMG SRC=X ALT=" Ù &amp;#366; --> &#366; &amp;Uring; --> &Uring;">
+capital U, acute accent <IMG SRC=X ALT=" Ú &amp;#218; --> &#218; &amp;Uacute; --> &Uacute;">
+capital U, double acute accent <IMG SRC=X ALT=" Û &amp;#368; --> &#368; &amp;Udblac; --> &Udblac;">
+capital U, dieresis or umlaut mark <IMG SRC=X ALT=" Ü &amp;#220; --> &#220; &amp;Uuml; --> &Uuml;">
+capital Y, acute accent <IMG SRC=X ALT=" Ý &amp;#221; --> &#221; &amp;Yacute; --> &Yacute;">
+capital T, cedilla <IMG SRC=X ALT=" Þ &amp;#354; --> &#354; &amp;Tcedil; --> &Tcedil;">
+small sharp s, German (sz ligature) <IMG SRC=X ALT=" ß &amp;#223; --> &#223; &amp;szlig; --> &szlig;">
+small r, acute accent <IMG SRC=X ALT=" à &amp;#341; --> &#341; &amp;racute; --> &racute;">
+small a, acute accent <IMG SRC=X ALT=" á &amp;#225; --> &#225; &amp;aacute; --> &aacute;">
+small a, circumflex accent <IMG SRC=X ALT=" â &amp;#226; --> &#226; &amp;acirc; --> &acirc;">
+small a, breve <IMG SRC=X ALT=" ã &amp;#259; --> &#259; &amp;abreve; --> &abreve;">
+small a, dieresis or umlaut mark <IMG SRC=X ALT=" ä &amp;#228; --> &#228; &amp;auml; --> &auml;">
+small l, acute accent <IMG SRC=X ALT=" å &amp;#314; --> &#314; &amp;lacute; --> &lacute;">
+small c, acute accent <IMG SRC=X ALT=" æ &amp;#263; --> &#263; &amp;cacute; --> &cacute;">
+small c, cedilla <IMG SRC=X ALT=" ç &amp;#231; --> &#231; &amp;ccedil; --> &ccedil;">
+small c, caron <IMG SRC=X ALT=" è &amp;#269; --> &#269; &amp;ccaron; --> &ccaron;">
+small e, acute accent <IMG SRC=X ALT=" é &amp;#233; --> &#233; &amp;eacute; --> &eacute;">
+small e, ogonek <IMG SRC=X ALT=" ê &amp;#281; --> &#281; &amp;eogon; --> &eogon;">
+small e, dieresis or umlaut mark <IMG SRC=X ALT=" ë &amp;#235; --> &#235; &amp;euml; --> &euml;">
+small e, caron <IMG SRC=X ALT=" ì &amp;#283; --> &#283; &amp;ecaron; --> &ecaron;">
+small i, acute accent <IMG SRC=X ALT=" í &amp;#237; --> &#237; &amp;iacute; --> &iacute;">
+small i, circumflex accent <IMG SRC=X ALT=" î &amp;#238; --> &#238; &amp;icirc; --> &icirc;">
+small d, caron <IMG SRC=X ALT=" ï &amp;#271; --> &#271; &amp;dcaron; --> &dcaron;">
+small d, stroke <IMG SRC=X ALT=" ð &amp;#273; --> &#273; &amp;dstrok; --> &dstrok;">
+small eth, Icelandic <IMG SRC=X ALT=" N/A &amp;#240; --> &#240; &amp;eth; --> &eth;">
+small n, acute accent <IMG SRC=X ALT=" ñ &amp;#324; --> &#324; &amp;nacute; --> &nacute;">
+small n, caron <IMG SRC=X ALT=" ò &amp;#328; --> &#328; &amp;ncaron; --> &ncaron;">
+small o, acute accent <IMG SRC=X ALT=" ó &amp;#243; --> &#243; &amp;oacute; --> &oacute;">
+small o, circumflex accent <IMG SRC=X ALT=" ô &amp;#244; --> &#244; &amp;ocirc; --> &ocirc;">
+small o, double acute accent <IMG SRC=X ALT=" õ &amp;#369; --> &#369; &amp;odblac; --> &odblac;">
+small o, dieresis or umlaut mark <IMG SRC=X ALT=" ö &amp;#246; --> &#246; &amp;ouml; --> &ouml;">
+division sign <IMG SRC=X ALT=" ÷ &amp;#247; --> &#247; &amp;divide; --> &divide;">
+small r, caron <IMG SRC=X ALT=" ø &amp;#345; --> &#345; &amp;rcaron; --> &rcaron;">
+small u, ring <IMG SRC=X ALT=" ù &amp;#367; --> &#367; &amp;uring; --> &uring;">
+small u, acute accent <IMG SRC=X ALT=" ú &amp;#250; --> &#250; &amp;uacute; --> &uacute;">
+small u, double acute accent <IMG SRC=X ALT=" û &amp;#369; --> &#369; &amp;udblac; --> &udblac;">
+small u, dieresis or umlaut mark <IMG SRC=X ALT=" ü &amp;#252; --> &#252; &amp;uuml; --> &uuml;">
+small y, acute accent <IMG SRC=X ALT=" ý &amp;#253; --> &#253; &amp;yacute; --> &yacute;">
+small t, cedilla <IMG SRC=X ALT=" þ &amp;#355; --> &#355; &amp;tcedil; --> &tcedil;">
+dot above <IMG SRC=X ALT=" {ÿ} {&amp;#729;}-->{&#729;} {&amp;dot;} -->{&dot;}">
+
+Some other characters of interest Char Code Entity name
+=================================== ==== ============ ==============
+capital AE diphthong (ligature) <IMG SRC=X ALT=" N/A &amp;#198; --> &#198; &amp;AElig; --> &AElig;">
+small ae diphthong (ligature) <IMG SRC=X ALT=" N/A &amp;#230; --> &#230; &amp;aelig; --> &aelig;">
+capital OE ligature <IMG SRC=X ALT=" N/A {&amp;#338;}-->{&#338;} {&amp;OElig;} -->{&OElig;}">
+small oe ligature <IMG SRC=X ALT=" N/A {&amp;#339;}-->{&#339;} {&amp;oelig;} -->{&oelig;}">
+copyright <IMG SRC=X ALT=" N/A &amp;#169; --> &#169; &amp;copy; --> &copy;">
+registered trademark <IMG SRC=X ALT=" N/A &amp;#174; --> &#174; &amp;reg; --> &reg;">
+trademark sign <IMG SRC=X ALT=" N/A &amp;#8482;--> &#8482; &amp;trade; --> &trade;">
+em space <IMG SRC=X ALT=" N/A [&amp;#8195;]->[&#8195;] [&amp;emsp;] -->[&emsp;]">
+en space <IMG SRC=X ALT=" N/A [&amp;#8194;]->[&#8194;] [&amp;ensp;] -->[&ensp;]">
+1/3-em space <IMG SRC=X ALT=" N/A [&amp;#8196;]->[&#8196;] [&amp;emsp13;] -->[&emsp13;]">
+1/4-em space <IMG SRC=X ALT=" N/A [&amp;#8197;]->[&#8197;] [&amp;emsp14;] -->[&emsp14;]">
+thin space <IMG SRC=X ALT=" N/A [&amp;#8201;]->[&#8201;] [&amp;thinsp;]-->[&thinsp;]">
+hair space <IMG SRC=X ALT=" N/A [&amp;#8202;]->[&#8202;] [&amp;hairsp;]-->[&hairsp;]">
+em dash <IMG SRC=X ALT=" N/A [&amp;#8212;]->[&#8212;] [&amp;mdash;] -->[&mdash;]">
+en dash <IMG SRC=X ALT=" N/A [&amp;#8211;]->[&#8211;] [&amp;ndash;] -->[&ndash;]">
+
+</PRE><!-- </PRE> no HotJava preBeta hackx - kw -->
+<!-- second /PRE is a hack for HotJava 1.0 preBeta 1 -->
+<HR>
+<P>
+Characters not found in ISO-8859-2 have "N/A" in the <TT>Char</TT> column.
+Some characters for which I could not find entity names in either
+<A HREF="http://www.internic.net/rfc/rfc2070.txt">RFC 2070</A>
+or the
+<A HREF="ftp://www.ucc.ie/pub/sgml/">ISOlat1, ISOlat2, ISOnum, ISOpub and ISOtech</A>
+sets (the ones included by Peter Flynn's
+<A HREF="http://www.ucc.ie/doc/www/html/dtds/htmlpro.html">HTML Pro DTD</A>)
+are shown enclosed in <TT>{</TT>braces<TT>}</TT>.
+</P>
+<P>
+See Martin Ramsch's original
+<A CHARSET="iso-8859-1" HREF="https://web.archive.org/web/19970119160651/http://www.uni-passau.de:80/~ramsch/iso8859-1.html">ISO-8859-1 Table</A>
+for related info and links, and for some notes on entity names.
+This file is mostly just an adaptation of his table
+to the ISO-8859-2 character set.
+
+<HR>
+
+<ADDRESS>kweide@tezcat.com 1997-03-09</ADDRESS>
+
+</BODY>
+</HTML>
diff --git a/test/ISO_LATIN1_test.html b/test/ISO_LATIN1_test.html
new file mode 100644
index 0000000..a6369b8
--- /dev/null
+++ b/test/ISO_LATIN1_test.html
@@ -0,0 +1,84 @@
+<!DOCTYPE html public "-//IETF//DTD HTML 3.0//EN">
+<html>
+<head>
+<title>Test of minimal ISO LATIN1 character set</title>
+<link rev="made" href="mailto:lynx-dev@nongnu.org">
+ <meta name="viewport" content="width=device-width, initial-scale=1">
+</head>
+
+<body>
+<h1>minimal ISO LATIN1 text entities</h1>
+<ul>
+ <li>"&AElig;", /* capital AE diphthong (ligature) */
+ <li>"&Aacute;", /* capital A, acute accent */
+ <li>"&Acirc;", /* capital A, circumflex accent */
+ <li>"&Agrave;", /* capital A, grave accent */
+ <li>"&Aring;", /* capital A, ring */
+ <li>"&Atilde;", /* capital A, tilde */
+ <li>"&Auml;", /* capital A, dieresis or umlaut mark */
+ <li>"&Ccedil;", /* capital C, cedilla */
+ <li>"&ETH;", /* capital Eth, Icelandic */
+ <li>"&Eacute;", /* capital E, acute accent */
+ <li>"&Ecirc;", /* capital E, circumflex accent */
+ <li>"&Egrave;", /* capital E, grave accent */
+ <li>"&Euml;", /* capital E, dieresis or umlaut mark */
+ <li>"&Iacute;", /* capital I, acute accent */
+ <li>"&Icirc;", /* capital I, circumflex accent */
+ <li>"&Igrave;", /* capital I, grave accent */
+ <li>"&Iuml;", /* capital I, dieresis or umlaut mark */
+ <li>"&Ntilde;", /* capital N, tilde */
+ <li>"&Oacute;", /* capital O, acute accent */
+ <li>"&Ocirc;", /* capital O, circumflex accent */
+ <li>"&Ograve;", /* capital O, grave accent */
+ <li>"&Oslash;", /* capital O, slash */
+ <li>"&Otilde;", /* capital O, tilde */
+ <li>"&Ouml;", /* capital O, dieresis or umlaut mark */
+ <li>"&THORN;", /* capital THORN, Icelandic */
+ <li>"&Uacute;", /* capital U, acute accent */
+ <li>"&Ucirc;", /* capital U, circumflex accent */
+ <li>"&Ugrave;", /* capital U, grave accent */
+ <li>"&Uuml;", /* capital U, dieresis or umlaut mark */
+ <li>"&Yacute;", /* capital Y, acute accent */
+ <li>"&aacute;", /* small a, acute accent */
+ <li>"&acirc;", /* small a, circumflex accent */
+ <li>"&aelig;", /* small ae diphthong (ligature) */
+ <li>"&agrave;", /* small a, grave accent */
+ <li>"&amp;", /* ampersand */
+ <li>"&aring;", /* small a, ring */
+ <li>"&atilde;", /* small a, tilde */
+ <li>"&auml;", /* small a, dieresis or umlaut mark */
+ <li>"&ccedil;", /* small c, cedilla */
+ <li>"&eacute;", /* small e, acute accent */
+ <li>"&ecirc;", /* small e, circumflex accent */
+ <li>"&egrave;", /* small e, grave accent */
+ <li>"&emsp;", /* emsp, em space - not collapsed */
+ <li>"&ensp;", /* ensp, en space - not collapsed */
+ <li>"&eth;", /* small eth, Icelandic */
+ <li>"&euml;", /* small e, dieresis or umlaut mark */
+ <li>"&gt;", /* greater than */
+ <li>"&iacute;", /* small i, acute accent */
+ <li>"&icirc;", /* small i, circumflex accent */
+ <li>"&igrave;", /* small i, grave accent */
+ <li>"&iuml;", /* small i, dieresis or umlaut mark */
+ <li>"&lt;", /* less than */
+ <li>"&nbsp;", /* nbsp, non breaking space */
+ <li>"&ntilde;", /* small n, tilde */
+ <li>"&oacute;", /* small o, acute accent */
+ <li>"&ocirc;", /* small o, circumflex accent */
+ <li>"&ograve;", /* small o, grave accent */
+ <li>"&oslash;", /* small o, slash */
+ <li>"&otilde;", /* small o, tilde */
+ <li>"&ouml;", /* small o, dieresis or umlaut mark */
+ <li>"&quot;", /* quote, '"' */
+ <li>"&szlig;", /* small sharp s, German (sz ligature) */
+ <li>"&thorn;", /* small thorn, Icelandic */
+ <li>"&uacute;", /* small u, acute accent */
+ <li>"&ucirc;", /* small u, circumflex accent */
+ <li>"&ugrave;", /* small u, grave accent */
+ <li>"&uuml;", /* small u, dieresis or umlaut mark */
+ <li>"&yacute;", /* small y, acute accent */
+ <li>"&yuml;", /* small y, dieresis or umlaut mark */
+</ul>
+
+</body>
+</html>
diff --git a/test/README.txt b/test/README.txt
new file mode 100644
index 0000000..21419f2
--- /dev/null
+++ b/test/README.txt
@@ -0,0 +1,8 @@
+ISO_LATIN1_test.html and iso-8859-1.html are for testing the translation of
+HTML entities with the character sets that are selectable via the 'o'ptions
+menu.
+
+TestComment.html and tabtest.html are for testing comment and TAB handling.
+
+Any other files in this directory do not represent a test suite. They
+are used during program testing to track down odd and mysterious bugs.
diff --git a/test/TestComment.html b/test/TestComment.html
new file mode 100644
index 0000000..8bf39a9
--- /dev/null
+++ b/test/TestComment.html
@@ -0,0 +1,51 @@
+<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML 2.0 Level 2//EN">
+<html>
+<head>
+<title>HTML Comment Parser Test</title>
+<link rev="made" href="mailto:pg@sweng.stortek.com">
+<base href="http://nyx10.cs.du.edu:8001/~pgilmart/TestComment.html">
+ <meta name="viewport" content="width=device-width, initial-scale=1">
+</head>
+
+<body>
+<P> Test of the HTML/SGML comment syntax, as given in the W3 HTML Spec:
+<a
+href="http://www.w3.org/hypertext/WWW/MarkUp/html-spec/html-spec_3.html#SEC15">
+Comments</a>
+
+<P>See especially, the footnote:
+<a
+href="http://www.w3.org/hypertext/WWW/MarkUp/html-spec/html-spec_foot.html#FOOT10"
+>(10)</a>
+
+<P>
+Co-vary the LYK_MINIMAL and LYK_HISTORICAL command key toggles (use the
+'k'eymap command to see their key bindings) to establish Valid, Minimal
+or Historical comment parsing, and toggle trace mode on (Ctrl-T), to see
+how comment parsing is affected.
+
+<P>Case 01 through Case 14 should appear
+as short separate paragraphs with the case numbers aligned vertically.
+Some noise characters may appear to the right as a byproduct of code present
+for error recovery, but there should be no noise before each case number.
+
+<P> Case <!-- trivial --> | 01 | Trivial
+<P> Case <!-- extra hyphens and spaces -- -- -- > | 02 | Hyphens and Spaces
+<P> Case <!-- extra < < < --> | 03 | Extra LT --> --> -->
+<P> Case <!-- balanced < < < > > > --> | 04 | Balanced
+<P> Case <!-- extra > -- --> > still in comment --> | 05 | Extra GT
+<P> Case <!-- stuff between -- and > -- still in comment --> | 06 | Stuff Inside
+<P> Case <!-- Extra <!-- -- Second Comment --> | 07 | Extra Open --> -->
+<P> Case <!-- New Line between --
+ -- Second Comment --
+ > | 08 | New line
+<P> Case <!---> degenerate --> | 09 | Degenerate <P> Case <!----> | 10 | Empty
+<P> Case <!-- perverse <!--> | 11 | Perverse --> --> -->
+<P> Case <!-- Comment -- -- and a half > this is still in comment -- > | 12 | Multiple Comments --> --> -->
+<P> Case <!> | 13 | Zero Comments
+<P> Case <!-- < >
+< > Still in comment --> | 14 | Last
+
+<P>&lt;<STRONG>Tests completed!</STRONG>&gt;
+</body>
+</html>
diff --git a/test/X b/test/X
new file mode 100644
index 0000000..a1e2647
--- /dev/null
+++ b/test/X
@@ -0,0 +1 @@
+?
diff --git a/test/bad-html.html b/test/bad-html.html
new file mode 100644
index 0000000..ab00a96
--- /dev/null
+++ b/test/bad-html.html
@@ -0,0 +1,47 @@
+<!DOCTYPE html PUBLIC "-//IETF//DTD HTML 3.0//EN">
+<HTML>
+<HEAD>
+<TITLE>Examples of "Bad HTML" per Lynx</TITLE>
+<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=iso-8859-1">
+<LINK REV="made" HREF="mailto:lynx-dev@nongnu.org">
+<LINK REV="owner" HREF="http://mail.gnu.org/mailman/listinfo/lynx-dev/">
+ <meta name="viewport" content="width=device-width, initial-scale=1">
+</HEAD>
+
+<BODY>
+<h2>Unterminated TEXTAREA</h2>
+<form action="http://localhost/cgi-bin/bogus-parms" method="get">
+<textarea name="50cols" cols="50" rows=3>
+This is not empty.
+</textarea>
+<br>
+<textarea name="50percent" cols="50%" rows=3>
+This seems to have a button.
+<button>Button 1</button>
+</textarea>
+<hr>
+<input type="submit" value="Submit this form">
+<br>
+<input type="reset" value="Reset this form">
+</form>
+
+<h2>Unterminated SELECT</h2>
+<select>
+<option>first option</option>
+<option>second option</option>
+<option>third option</option>
+</notselect>
+<br>
+<select>
+<option>first option</option>
+<option>second option</option>
+<option>third option</option>
+</select>
+
+<h2>OPTION not within SELECT</h2>
+<option>third option</option>
+
+<h2>TEXTAREA ending without starting</h2>
+</textarea>
+
+</BODY>
diff --git a/test/c1.html b/test/c1.html
new file mode 100644
index 0000000..c8d1edd
--- /dev/null
+++ b/test/c1.html
@@ -0,0 +1,64 @@
+<!DOCTYPE html PUBLIC "-//IETF//DTD HTML 3.0//EN">
+<HTML>
+<HEAD>
+<TITLE>Test of invalid NCRs 128-159</TITLE>
+ <meta name="viewport" content="width=device-width, initial-scale=1">
+</HEAD>
+<BODY><H2>Test of invalid NCRs 128-159</H2>
+<P>
+Authoring tools on MS Windows, in particular MS FrontPage ("WYSIWYG" HTML editor),
+generate invalid <DFN>Numerical Character References</DFN> for characters
+commonly found in positions 128...159 (0x80...0x9f) in Windows fonts. Although
+these are valid codepoints for <em>windows-1252</em> (and other
+windows-xxxx) charsets, valid NCRs always refer to the document character set
+in the SGML sense, not to the character encoding scheme (or charset). For HTML,
+the SGML document character set is fixed, it is always a subset of Unicode
+(or ISO 10646). In Unicode and its iso-8859-1 subset, values 128...159 are
+C1 control characters, they must not appear in HTML. Valid NCRs for the
+intended characters use Unicode values greater than 256.
+<p>
+Lynx tries to interpret some of the invalid codes, by assuming that they are
+windows-1252 codepoints.
+<PRE>
+
+You may want to press '\' to view the source of this test.
+
+<em>Code invalid NCR <!-- --> <tab id=c>valid NCR, description</em>
+<em> normal in ALT <a id=table></a> </em>
+
+0x80 &#x80; <IMG SRC=X ALT="&#x80;"> <tab to=c>&#x20AC; #EURO SIGN
+0x81 &#x81; <IMG SRC=X ALT="&#x81;"> <!--&#x0081;--> #NOT USED
+0x82 &#x82; <IMG SRC=X ALT="&#x82;"> <tab to=c>&#x201a; #SINGLE LOW-9 QUOTATION MARK
+0x83 &#x83; <IMG SRC=X ALT="&#x83;"> <tab to=c>&#x0192; #LATIN SMALL LETTER F WITH HOOK
+0x84 &#x84; <IMG SRC=X ALT="&#x84;"> <tab to=c>&#x201e; #DOUBLE LOW-9 QUOTATION MARK
+0x85 &#x85; <IMG SRC=X ALT="&#x85;"> <tab to=c>&#x2026; #HORIZONTAL ELLIPSIS
+0x86 &#x86; <IMG SRC=X ALT="&#x86;"> <tab to=c>&#x2020; #DAGGER
+0x87 &#x87; <IMG SRC=X ALT="&#x87;"> <tab to=c>&#x2021; #DOUBLE DAGGER
+0x88 &#x88; <IMG SRC=X ALT="&#x88;"> <tab to=c>&#x02c6; #MODIFIER LETTER CIRCUMFLEX ACCENT
+0x89 &#x89; <IMG SRC=X ALT="&#x89;"> <tab to=c>&#x2030; #PER MILLE SIGN
+0x8a &#x8a; <IMG SRC=X ALT="&#x8a;"> <tab to=c>&#x0160; #LATIN CAPITAL LETTER S WITH CARON
+0x8b &#x8b; <IMG SRC=X ALT="&#x8b;"> <tab to=c>&#x2039; #SINGLE LEFT-POINTING ANGLE QUOTATION MARK
+0x8c &#x8c; <IMG SRC=X ALT="&#x8c;"> <tab to=c>&#x0152; #LATIN CAPITAL LIGATURE OE
+0x8d &#x8d; <IMG SRC=X ALT="&#x8d;"> <!--&#x008d;--> #NOT USED
+0x8e &#x8e; <IMG SRC=X ALT="&#x8e;"> <!--&#x008e;--> #NOT USED
+0x8f &#x8f; <IMG SRC=X ALT="&#x8f;"> <!--&#x008f;--> #NOT USED
+0x90 &#x90; <IMG SRC=X ALT="&#x90;"> <!--&#x0090;--> #NOT USED
+0x91 &#x91; <IMG SRC=X ALT="&#x91;"> <tab to=c>&#x2018; #LEFT SINGLE QUOTATION MARK
+0x92 &#x92; <IMG SRC=X ALT="&#x92;"> <tab to=c>&#x2019; #RIGHT SINGLE QUOTATION MARK
+0x93 &#x93; <IMG SRC=X ALT="&#x93;"> <tab to=c>&#x201c; #LEFT DOUBLE QUOTATION MARK
+0x94 &#x94; <IMG SRC=X ALT="&#x94;"> <tab to=c>&#x201d; #RIGHT DOUBLE QUOTATION MARK
+0x95 &#x95; <IMG SRC=X ALT="&#x95;"> <tab to=c>&#x2022; #BULLET
+0x96 &#x96; <IMG SRC=X ALT="&#x96;"> <tab to=c>&#x2013; #EN DASH
+0x97 &#x97; <IMG SRC=X ALT="&#x97;"> <tab to=c>&#x2014; #EM DASH
+0x98 &#x98; <IMG SRC=X ALT="&#x98;"> <tab to=c>&#x02dc; #SMALL TILDE
+0x99 &#x99; <IMG SRC=X ALT="&#x99;"> <tab to=c>&#x2122; #TRADE MARK SIGN
+0x9a &#x9a; <IMG SRC=X ALT="&#x9a;"> <tab to=c>&#x0161; #LATIN SMALL LETTER S WITH CARON
+0x9b &#x9b; <IMG SRC=X ALT="&#x9b;"> <tab to=c>&#x203a; #SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
+0x9c &#x9c; <IMG SRC=X ALT="&#x9c;"> <tab to=c>&#x0153; #LATIN SMALL LIGATURE OE
+0x9d &#x9d; <IMG SRC=X ALT="&#x9d;"> <!--&#x009d;--> #NOT USED
+0x9e &#x9e; <IMG SRC=X ALT="&#x9e;"> <!--&#x009e;--> #NOT USED
+0x9f &#x9f; <IMG SRC=X ALT="&#x9f;"> <tab to=c>&#x0178; #LATIN CAPITAL LETTER Y WITH DIAERESIS
+
+</PRE>
+</BODY>
+</HTML>
diff --git a/test/circle.html b/test/circle.html
new file mode 100644
index 0000000..acf8f45
--- /dev/null
+++ b/test/circle.html
@@ -0,0 +1,15 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN">
+
+<html>
+<head>
+ <meta name="generator" content=
+ "HTML Tidy for Linux (vers 25 March 2009), see www.w3.org">
+
+ <title>Test ImageMap - circle</title>
+ <meta name="viewport" content="width=device-width, initial-scale=1">
+</head>
+
+<body>
+ <p>CIRCLE</p>
+</body>
+</html>
diff --git a/test/cp-1252.html b/test/cp-1252.html
new file mode 100644
index 0000000..f895ac7
--- /dev/null
+++ b/test/cp-1252.html
@@ -0,0 +1,179 @@
+<!DOCTYPE html PUBLIC "-//IETF//DTD HTML 3.0//EN">
+<HTML>
+<HEAD>
+<TITLE>Character table for cp-1252</TITLE>
+<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=cp-1252">
+<LINK REV="made" HREF="mailto:lynx-dev@nongnu.org">
+<LINK REV="owner" HREF="http://mail.gnu.org/mailman/listinfo/lynx-dev/">
+<LINK REL="sibling" HREF="iso-8859-1.html" TITLE="iso-8859-1 test">
+<LINK REL="sibling" HREF="ALT88592.html" TITLE="iso-8859-2 ALT test">
+ <meta name="viewport" content="width=device-width, initial-scale=1">
+</HEAD>
+
+<BODY>
+
+<H1 ALIGN=center>cp-1252 table</H1>
+
+<PRE>
+Description Code Entity name
+=================================== ============ ==============
+quotation mark &amp;#34; --> &#34; &amp;quot; --> &quot;
+ampersand &amp;#38; --> &#38; &amp;amp; --> &amp;
+less-than sign &amp;#60; --> &#60; &amp;lt; --> &lt;
+greater-than sign &amp;#62; --> &#62; &amp;gt; --> &gt;
+
+Description Char Code Entity name
+=================================== ==== ============ ==============
+euro sign € &amp;128; --> &#128;
+single low-9 quotation mark ‚ &amp;130; --> &#130;
+latin small letter f with hook ƒ &amp;131; --> &#131;
+double low-9 quotation mark „ &amp;132; --> &#132;
+horizontal ellipsis … &amp;133; --> &#133;
+dagger † &amp;134; --> &#134;
+double dagger ‡ &amp;135; --> &#135;
+modifier letter circumflex accent ˆ &amp;136; --> &#136;
+per mille sign ‰ &amp;137; --> &#137;
+latin capital letter s with caron Š &amp;138; --> &#138;
+single left-pointing angle quote mark ‹ &amp;139; --> &#139;
+latin capital ligature oe Œ &amp;140; --> &#140;
+latin capital letter z with caron Ž &amp;142; --> &#142;
+
+left single quotation mark ‘ &amp;145; --> &#145;
+right single quotation mark ’ &amp;146; --> &#146;
+left double quotation mark “ &amp;147; --> &#147;
+right double quotation mark ” &amp;148; --> &#148;
+bullet • &amp;149; --> &#149;
+en dash – &amp;150; --> &#150;
+em dash — &amp;151; --> &#151;
+small tilde ˜ &amp;152; --> &#152;
+trade mark sign ™ &amp;153; --> &#153;
+latin small letter s with caron š &amp;154; --> &#154;
+single right-pointing angle quote mark › &amp;155; --> &#155;
+latin small ligature oe œ &amp;156; --> &#156;
+latin small letter z with caron ž &amp;158; --> &#158;
+latin capital letter y with diaeresis Ÿ &amp;159; --> &#159;
+
+non-breaking space   &amp;#160; --> &#160; &amp;nbsp; --> &nbsp;
+inverted exclamation ¡ &amp;#161; --> &#161; &amp;iexcl; --> &iexcl;
+cent sign ¢ &amp;#162; --> &#162; &amp;cent; --> &cent;
+pound sterling £ &amp;#163; --> &#163; &amp;pound; --> &pound;
+general currency sign ¤ &amp;#164; --> &#164; &amp;curren; --> &curren;
+yen sign ¥ &amp;#165; --> &#165; &amp;yen; --> &yen;
+broken vertical bar ¦ &amp;#166; --> &#166; &amp;brvbar; --> &brvbar;
+section sign § &amp;#167; --> &#167; &amp;sect; --> &sect;
+umlaut (dieresis) ¨ &amp;#168; --> &#168; &amp;uml; --> &uml;
+copyright © &amp;#169; --> &#169; &amp;copy; --> &copy;
+feminine ordinal ª &amp;#170; --> &#170; &amp;ordf; --> &ordf;
+left angle quote, guillemotleft « &amp;#171; --> &#171; &amp;laquo; --> &laquo;
+not sign ¬ &amp;#172; --> &#172; &amp;not; --> &not;
+soft hyphen ­ &amp;#173; --> &#173; &amp;shy; --> &shy;
+registered trademark ® &amp;#174; --> &#174; &amp;reg; --> &reg;
+macron accent ¯ &amp;#175; --> &#175; &amp;macr; --> &macr;
+
+degree sign ° &amp;#176; --> &#176; &amp;deg; --> &deg;
+plus or minus ± &amp;#177; --> &#177; &amp;plusmn; --> &plusmn;
+superscript two ² &amp;#178; --> &#178; &amp;sup2; --> &sup2;
+superscript three ³ &amp;#179; --> &#179; &amp;sup3; --> &sup3;
+acute accent ´ &amp;#180; --> &#180; &amp;acute; --> &acute;
+micro sign µ &amp;#181; --> &#181; &amp;micro; --> &micro;
+paragraph sign ¶ &amp;#182; --> &#182; &amp;para; --> &para;
+middle dot · &amp;#183; --> &#183; &amp;middot; --> &middot;
+cedilla ¸ &amp;#184; --> &#184; &amp;cedil; --> &cedil;
+superscript one ¹ &amp;#185; --> &#185; &amp;sup1; --> &sup1;
+masculine ordinal º &amp;#186; --> &#186; &amp;ordm; --> &ordm;
+right angle quote, guillemotright » &amp;#187; --> &#187; &amp;raquo; --> &raquo;
+vulgar fraction one-quarter ¼ &amp;#188; --> &#188; &amp;frac14; --> &frac14;
+vulgar fraction one-half ½ &amp;#189; --> &#189; &amp;frac12; --> &frac12;
+vulgar fraction three-fourths ¾ &amp;#190; --> &#190; &amp;frac34; --> &frac34;
+inverted question mark ¿ &amp;#191; --> &#191; &amp;iquest; --> &iquest;
+
+latin capital letter a with grave À &amp;#192; --> &#192; &amp;Agrave; --> &Agrave;
+latin capital letter a with acute Á &amp;#193; --> &#193; &amp;Aacute; --> &Aacute;
+latin capital letter a with circumflex  &amp;#194; --> &#194; &amp;Acirc; --> &Acirc;
+latin capital letter a with tilde à &amp;#195; --> &#195; &amp;Atilde; --> &Atilde;
+latin capital letter a with diaeresis Ä &amp;#196; --> &#196; &amp;Auml; --> &Auml;
+latin capital letter a with ring above Å &amp;#197; --> &#197; &amp;Aring; --> &Aring;
+latin capital letter ae Æ &amp;#198; --> &#198; &amp;AElig; --> &AElig;
+latin capital letter c with cedilla Ç &amp;#199; --> &#199; &amp;Ccedil; --> &Ccedil;
+latin capital letter e with grave È &amp;#200; --> &#200; &amp;Egrave; --> &Egrave;
+latin capital letter e with acute É &amp;#201; --> &#201; &amp;Eacute; --> &Eacute;
+latin capital letter e with circumflex Ê &amp;#202; --> &#202; &amp;Ecirc; --> &Ecirc;
+latin capital letter e with diaeresis Ë &amp;#203; --> &#203; &amp;Euml; --> &Euml;
+latin capital letter i with grave Ì &amp;#204; --> &#204; &amp;Igrave; --> &Igrave;
+latin capital letter i with acute Í &amp;#205; --> &#205; &amp;Iacute; --> &Iacute;
+latin capital letter i with circumflex Î &amp;#206; --> &#206; &amp;Icirc; --> &Icirc;
+latin capital letter i with diaeresis Ï &amp;#207; --> &#207; &amp;Iuml; --> &Iuml;
+
+latin capital letter eth Ð &amp;#208; --> &#208; &amp;ETH; --> &ETH;
+latin capital letter n with tilde Ñ &amp;#209; --> &#209; &amp;Ntilde; --> &Ntilde;
+latin capital letter o with grave Ò &amp;#210; --> &#210; &amp;Ograve; --> &Ograve;
+latin capital letter o with acute Ó &amp;#211; --> &#211; &amp;Oacute; --> &Oacute;
+latin capital letter o with circumflex Ô &amp;#212; --> &#212; &amp;Ocirc; --> &Ocirc;
+latin capital letter o with tilde Õ &amp;#213; --> &#213; &amp;Otilde; --> &Otilde;
+latin capital letter o with diaeresis Ö &amp;#214; --> &#214; &amp;Ouml; --> &Ouml;
+multiplication sign × &amp;#215; --> &#215; &amp;times; --> &times;
+latin capital letter o with stroke Ø &amp;#216; --> &#216; &amp;Oslash; --> &Oslash;
+latin capital letter u with grave Ù &amp;#217; --> &#217; &amp;Ugrave; --> &Ugrave;
+latin capital letter u with acute Ú &amp;#218; --> &#218; &amp;Uacute; --> &Uacute;
+latin capital letter u with circumflex Û &amp;#219; --> &#219; &amp;Ucirc; --> &Ucirc;
+latin capital letter u with diaeresis Ü &amp;#220; --> &#220; &amp;Uuml; --> &Uuml;
+latin capital letter y with acute Ý &amp;#221; --> &#221; &amp;Yacute; --> &Yacute;
+latin capital letter thorn Þ &amp;#222; --> &#222; &amp;THORN; --> &THORN;
+latin small letter sharp s ß &amp;#223; --> &#223; &amp;szlig; --> &szlig;
+
+latin small letter a with grave à &amp;#224; --> &#224; &amp;agrave; --> &agrave;
+latin small letter a with acute á &amp;#225; --> &#225; &amp;aacute; --> &aacute;
+latin small letter a with circumflex â &amp;#226; --> &#226; &amp;acirc; --> &acirc;
+latin small letter a with tilde ã &amp;#227; --> &#227; &amp;atilde; --> &atilde;
+latin small letter a with diaeresis ä &amp;#228; --> &#228; &amp;auml; --> &auml;
+latin small letter a with ring above å &amp;#229; --> &#229; &amp;aring; --> &aring;
+latin small letter ae æ &amp;#230; --> &#230; &amp;aelig; --> &aelig;
+latin small letter c with cedilla ç &amp;#231; --> &#231; &amp;ccedil; --> &ccedil;
+latin small letter e with grave è &amp;#232; --> &#232; &amp;egrave; --> &egrave;
+latin small letter e with acute é &amp;#233; --> &#233; &amp;eacute; --> &eacute;
+latin small letter e with circumflex ê &amp;#234; --> &#234; &amp;ecirc; --> &ecirc;
+latin small letter e with diaeresis ë &amp;#235; --> &#235; &amp;euml; --> &euml;
+latin small letter i with grave ì &amp;#236; --> &#236; &amp;igrave; --> &igrave;
+latin small letter i with acute í &amp;#237; --> &#237; &amp;iacute; --> &iacute;
+latin small letter i with circumflex î &amp;#238; --> &#238; &amp;icirc; --> &icirc;
+latin small letter i with diaeresis ï &amp;#239; --> &#239; &amp;iuml; --> &iuml;
+
+latin small letter eth ð &amp;#240; --> &#240; &amp;eth; --> &eth;
+latin small letter n with tilde ñ &amp;#241; --> &#241; &amp;ntilde; --> &ntilde;
+latin small letter o with grave ò &amp;#242; --> &#242; &amp;ograve; --> &ograve;
+latin small letter o with acute ó &amp;#243; --> &#243; &amp;oacute; --> &oacute;
+latin small letter o with circumflex ô &amp;#244; --> &#244; &amp;ocirc; --> &ocirc;
+latin small letter o with tilde õ &amp;#245; --> &#245; &amp;otilde; --> &otilde;
+latin small letter o with diaeresis ö &amp;#246; --> &#246; &amp;ouml; --> &ouml;
+division sign ÷ &amp;#247; --> &#247; &amp;divide; --> &divide;
+latin small letter o with stroke ø &amp;#248; --> &#248; &amp;oslash; --> &oslash;
+latin small letter u with grave ù &amp;#249; --> &#249; &amp;ugrave; --> &ugrave;
+latin small letter u with acute ú &amp;#250; --> &#250; &amp;uacute; --> &uacute;
+latin small letter u with circumflex û &amp;#251; --> &#251; &amp;ucirc; --> &ucirc;
+latin small letter u with diaeresis ü &amp;#252; --> &#252; &amp;uuml; --> &uuml;
+latin small letter y with acute ý &amp;#253; --> &#253; &amp;yacute; --> &yacute;
+latin small letter thorn þ &amp;#254; --> &#254; &amp;thorn; --> &thorn;
+latin small letter y with diaeresis {ÿ} {&amp;#255;}-->{&#255;} {&amp;yuml;} -->{&yuml;}
+
+Some other characters of interest Char Code Entity name
+=================================== ==== ============ ==============
+capital AE diphthong (ligature) N/A &amp;#198; --> &#198; &amp;AElig; --> &AElig;
+small ae diphthong (ligature) N/A &amp;#230; --> &#230; &amp;aelig; --> &aelig;
+capital OE ligature N/A {&amp;#338;}-->{&#338;} {&amp;OElig;} -->{&OElig;}
+small oe ligature N/A {&amp;#339;}-->{&#339;} {&amp;oelig;} -->{&oelig;}
+copyright N/A &amp;#169; --> &#169; &amp;copy; --> &copy;
+registered trademark N/A &amp;#174; --> &#174; &amp;reg; --> &reg;
+trademark sign N/A &amp;#8482;--> &#8482; &amp;trade; --> &trade;
+em space N/A [&amp;#8195;]->[&#8195;] [&amp;emsp;] -->[&emsp;]
+en space N/A [&amp;#8194;]->[&#8194;] [&amp;ensp;] -->[&ensp;]
+1/3-em space N/A [&amp;#8196;]->[&#8196;] [&amp;emsp13;] -->[&emsp13;]
+1/4-em space N/A [&amp;#8197;]->[&#8197;] [&amp;emsp14;] -->[&emsp14;]
+thin space N/A [&amp;#8201;]->[&#8201;] [&amp;thinsp;]-->[&thinsp;]
+hair space N/A [&amp;#8202;]->[&#8202;] [&amp;hairsp;]-->[&hairsp;]
+em dash N/A [&amp;#8212;]->[&#8212;] [&amp;mdash;] -->[&mdash;]
+en dash N/A [&amp;#8211;]->[&#8211;] [&amp;ndash;] -->[&ndash;]
+
+</PRE>
+
+</BODY>
+</HTML>
diff --git a/test/cp-1252a.html b/test/cp-1252a.html
new file mode 100644
index 0000000..1bb5d86
--- /dev/null
+++ b/test/cp-1252a.html
@@ -0,0 +1,184 @@
+<!DOCTYPE html PUBLIC "-//IETF//DTD HTML 3.0//EN">
+<HTML>
+<HEAD>
+<TITLE>Character table for cp-1252</TITLE>
+<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=cp-1252">
+<LINK REV="made" HREF="mailto:lynx-dev@nongnu.org">
+<LINK REV="owner" HREF="http://mail.gnu.org/mailman/listinfo/lynx-dev/">
+<LINK REL="sibling" HREF="iso-8859-1.html" TITLE="iso-8859-1 test">
+<LINK REL="sibling" HREF="ALT88592.html" TITLE="iso-8859-2 ALT test">
+ <meta name="viewport" content="width=device-width, initial-scale=1">
+</HEAD>
+
+<BODY>
+
+<H1 ALIGN=center>cp-1252 table</H1>
+
+<PRE>
+Description Code Entity name
+=================================== ============ ==============
+quotation mark &amp;#34; --> &#34; &amp;quot; --> &quot;
+ampersand &amp;#38; --> &#38; &amp;amp; --> &amp;
+less-than sign &amp;#60; --> &#60; &amp;lt; --> &lt;
+greater-than sign &amp;#62; --> &#62; &amp;gt; --> &gt;
+
+Description Char Code Entity name
+=================================== ==== ============ ==============
+euro sign € &amp;128; --> &#128;
+undefined &amp;129; --> &#129;
+single low-9 quotation mark ‚ &amp;130; --> &#130;
+latin small letter f with hook ƒ &amp;131; --> &#131;
+double low-9 quotation mark „ &amp;132; --> &#132;
+horizontal ellipsis … &amp;133; --> &#133;
+dagger † &amp;134; --> &#134;
+double dagger ‡ &amp;135; --> &#135;
+modifier letter circumflex accent ˆ &amp;136; --> &#136;
+per mille sign ‰ &amp;137; --> &#137;
+latin capital letter s with caron Š &amp;138; --> &#138;
+single left-pointing angle quote mark ‹ &amp;139; --> &#139;
+latin capital ligature oe Œ &amp;140; --> &#140;
+undefined &amp;141; --> &#141;
+latin capital letter z with caron Ž &amp;142; --> &#142;
+undefined &amp;143; --> &#143;
+
+undefined &amp;144; --> &#144;
+left single quotation mark ‘ &amp;145; --> &#145;
+right single quotation mark ’ &amp;146; --> &#146;
+left double quotation mark “ &amp;147; --> &#147;
+right double quotation mark ” &amp;148; --> &#148;
+bullet • &amp;149; --> &#149;
+en dash – &amp;150; --> &#150;
+em dash — &amp;151; --> &#151;
+small tilde ˜ &amp;152; --> &#152;
+trade mark sign ™ &amp;153; --> &#153;
+latin small letter s with caron š &amp;154; --> &#154;
+single right-pointing angle quote mark › &amp;155; --> &#155;
+latin small ligature oe œ &amp;156; --> &#156;
+undefined &amp;157; --> &#157;
+latin small letter z with caron ž &amp;158; --> &#158;
+latin capital letter y with diaeresis Ÿ &amp;159; --> &#159;
+
+non-breaking space   &amp;#160; --> &#160; &amp;nbsp; --> &nbsp;
+inverted exclamation ¡ &amp;#161; --> &#161; &amp;iexcl; --> &iexcl;
+cent sign ¢ &amp;#162; --> &#162; &amp;cent; --> &cent;
+pound sterling £ &amp;#163; --> &#163; &amp;pound; --> &pound;
+general currency sign ¤ &amp;#164; --> &#164; &amp;curren; --> &curren;
+yen sign ¥ &amp;#165; --> &#165; &amp;yen; --> &yen;
+broken vertical bar ¦ &amp;#166; --> &#166; &amp;brvbar; --> &brvbar;
+section sign § &amp;#167; --> &#167; &amp;sect; --> &sect;
+umlaut (dieresis) ¨ &amp;#168; --> &#168; &amp;uml; --> &uml;
+copyright © &amp;#169; --> &#169; &amp;copy; --> &copy;
+feminine ordinal ª &amp;#170; --> &#170; &amp;ordf; --> &ordf;
+left angle quote, guillemotleft « &amp;#171; --> &#171; &amp;laquo; --> &laquo;
+not sign ¬ &amp;#172; --> &#172; &amp;not; --> &not;
+soft hyphen ­ &amp;#173; --> &#173; &amp;shy; --> &shy;
+registered trademark ® &amp;#174; --> &#174; &amp;reg; --> &reg;
+macron accent ¯ &amp;#175; --> &#175; &amp;macr; --> &macr;
+
+degree sign ° &amp;#176; --> &#176; &amp;deg; --> &deg;
+plus or minus ± &amp;#177; --> &#177; &amp;plusmn; --> &plusmn;
+superscript two ² &amp;#178; --> &#178; &amp;sup2; --> &sup2;
+superscript three ³ &amp;#179; --> &#179; &amp;sup3; --> &sup3;
+acute accent ´ &amp;#180; --> &#180; &amp;acute; --> &acute;
+micro sign µ &amp;#181; --> &#181; &amp;micro; --> &micro;
+paragraph sign ¶ &amp;#182; --> &#182; &amp;para; --> &para;
+middle dot · &amp;#183; --> &#183; &amp;middot; --> &middot;
+cedilla ¸ &amp;#184; --> &#184; &amp;cedil; --> &cedil;
+superscript one ¹ &amp;#185; --> &#185; &amp;sup1; --> &sup1;
+masculine ordinal º &amp;#186; --> &#186; &amp;ordm; --> &ordm;
+right angle quote, guillemotright » &amp;#187; --> &#187; &amp;raquo; --> &raquo;
+vulgar fraction one-quarter ¼ &amp;#188; --> &#188; &amp;frac14; --> &frac14;
+vulgar fraction one-half ½ &amp;#189; --> &#189; &amp;frac12; --> &frac12;
+vulgar fraction three-fourths ¾ &amp;#190; --> &#190; &amp;frac34; --> &frac34;
+inverted question mark ¿ &amp;#191; --> &#191; &amp;iquest; --> &iquest;
+
+latin capital letter a with grave À &amp;#192; --> &#192; &amp;Agrave; --> &Agrave;
+latin capital letter a with acute Á &amp;#193; --> &#193; &amp;Aacute; --> &Aacute;
+latin capital letter a with circumflex  &amp;#194; --> &#194; &amp;Acirc; --> &Acirc;
+latin capital letter a with tilde à &amp;#195; --> &#195; &amp;Atilde; --> &Atilde;
+latin capital letter a with diaeresis Ä &amp;#196; --> &#196; &amp;Auml; --> &Auml;
+latin capital letter a with ring above Å &amp;#197; --> &#197; &amp;Aring; --> &Aring;
+latin capital letter ae Æ &amp;#198; --> &#198; &amp;AElig; --> &AElig;
+latin capital letter c with cedilla Ç &amp;#199; --> &#199; &amp;Ccedil; --> &Ccedil;
+latin capital letter e with grave È &amp;#200; --> &#200; &amp;Egrave; --> &Egrave;
+latin capital letter e with acute É &amp;#201; --> &#201; &amp;Eacute; --> &Eacute;
+latin capital letter e with circumflex Ê &amp;#202; --> &#202; &amp;Ecirc; --> &Ecirc;
+latin capital letter e with diaeresis Ë &amp;#203; --> &#203; &amp;Euml; --> &Euml;
+latin capital letter i with grave Ì &amp;#204; --> &#204; &amp;Igrave; --> &Igrave;
+latin capital letter i with acute Í &amp;#205; --> &#205; &amp;Iacute; --> &Iacute;
+latin capital letter i with circumflex Î &amp;#206; --> &#206; &amp;Icirc; --> &Icirc;
+latin capital letter i with diaeresis Ï &amp;#207; --> &#207; &amp;Iuml; --> &Iuml;
+
+latin capital letter eth Ð &amp;#208; --> &#208; &amp;ETH; --> &ETH;
+latin capital letter n with tilde Ñ &amp;#209; --> &#209; &amp;Ntilde; --> &Ntilde;
+latin capital letter o with grave Ò &amp;#210; --> &#210; &amp;Ograve; --> &Ograve;
+latin capital letter o with acute Ó &amp;#211; --> &#211; &amp;Oacute; --> &Oacute;
+latin capital letter o with circumflex Ô &amp;#212; --> &#212; &amp;Ocirc; --> &Ocirc;
+latin capital letter o with tilde Õ &amp;#213; --> &#213; &amp;Otilde; --> &Otilde;
+latin capital letter o with diaeresis Ö &amp;#214; --> &#214; &amp;Ouml; --> &Ouml;
+multiplication sign × &amp;#215; --> &#215; &amp;times; --> &times;
+latin capital letter o with stroke Ø &amp;#216; --> &#216; &amp;Oslash; --> &Oslash;
+latin capital letter u with grave Ù &amp;#217; --> &#217; &amp;Ugrave; --> &Ugrave;
+latin capital letter u with acute Ú &amp;#218; --> &#218; &amp;Uacute; --> &Uacute;
+latin capital letter u with circumflex Û &amp;#219; --> &#219; &amp;Ucirc; --> &Ucirc;
+latin capital letter u with diaeresis Ü &amp;#220; --> &#220; &amp;Uuml; --> &Uuml;
+latin capital letter y with acute Ý &amp;#221; --> &#221; &amp;Yacute; --> &Yacute;
+latin capital letter thorn Þ &amp;#222; --> &#222; &amp;THORN; --> &THORN;
+latin small letter sharp s ß &amp;#223; --> &#223; &amp;szlig; --> &szlig;
+
+latin small letter a with grave à &amp;#224; --> &#224; &amp;agrave; --> &agrave;
+latin small letter a with acute á &amp;#225; --> &#225; &amp;aacute; --> &aacute;
+latin small letter a with circumflex â &amp;#226; --> &#226; &amp;acirc; --> &acirc;
+latin small letter a with tilde ã &amp;#227; --> &#227; &amp;atilde; --> &atilde;
+latin small letter a with diaeresis ä &amp;#228; --> &#228; &amp;auml; --> &auml;
+latin small letter a with ring above å &amp;#229; --> &#229; &amp;aring; --> &aring;
+latin small letter ae æ &amp;#230; --> &#230; &amp;aelig; --> &aelig;
+latin small letter c with cedilla ç &amp;#231; --> &#231; &amp;ccedil; --> &ccedil;
+latin small letter e with grave è &amp;#232; --> &#232; &amp;egrave; --> &egrave;
+latin small letter e with acute é &amp;#233; --> &#233; &amp;eacute; --> &eacute;
+latin small letter e with circumflex ê &amp;#234; --> &#234; &amp;ecirc; --> &ecirc;
+latin small letter e with diaeresis ë &amp;#235; --> &#235; &amp;euml; --> &euml;
+latin small letter i with grave ì &amp;#236; --> &#236; &amp;igrave; --> &igrave;
+latin small letter i with acute í &amp;#237; --> &#237; &amp;iacute; --> &iacute;
+latin small letter i with circumflex î &amp;#238; --> &#238; &amp;icirc; --> &icirc;
+latin small letter i with diaeresis ï &amp;#239; --> &#239; &amp;iuml; --> &iuml;
+
+latin small letter eth ð &amp;#240; --> &#240; &amp;eth; --> &eth;
+latin small letter n with tilde ñ &amp;#241; --> &#241; &amp;ntilde; --> &ntilde;
+latin small letter o with grave ò &amp;#242; --> &#242; &amp;ograve; --> &ograve;
+latin small letter o with acute ó &amp;#243; --> &#243; &amp;oacute; --> &oacute;
+latin small letter o with circumflex ô &amp;#244; --> &#244; &amp;ocirc; --> &ocirc;
+latin small letter o with tilde õ &amp;#245; --> &#245; &amp;otilde; --> &otilde;
+latin small letter o with diaeresis ö &amp;#246; --> &#246; &amp;ouml; --> &ouml;
+division sign ÷ &amp;#247; --> &#247; &amp;divide; --> &divide;
+latin small letter o with stroke ø &amp;#248; --> &#248; &amp;oslash; --> &oslash;
+latin small letter u with grave ù &amp;#249; --> &#249; &amp;ugrave; --> &ugrave;
+latin small letter u with acute ú &amp;#250; --> &#250; &amp;uacute; --> &uacute;
+latin small letter u with circumflex û &amp;#251; --> &#251; &amp;ucirc; --> &ucirc;
+latin small letter u with diaeresis ü &amp;#252; --> &#252; &amp;uuml; --> &uuml;
+latin small letter y with acute ý &amp;#253; --> &#253; &amp;yacute; --> &yacute;
+latin small letter thorn þ &amp;#254; --> &#254; &amp;thorn; --> &thorn;
+latin small letter y with diaeresis {ÿ} {&amp;#255;}-->{&#255;} {&amp;yuml;} -->{&yuml;}
+
+Some other characters of interest Char Code Entity name
+=================================== ==== ============ ==============
+capital AE diphthong (ligature) N/A &amp;#198; --> &#198; &amp;AElig; --> &AElig;
+small ae diphthong (ligature) N/A &amp;#230; --> &#230; &amp;aelig; --> &aelig;
+capital OE ligature N/A {&amp;#338;}-->{&#338;} {&amp;OElig;} -->{&OElig;}
+small oe ligature N/A {&amp;#339;}-->{&#339;} {&amp;oelig;} -->{&oelig;}
+copyright N/A &amp;#169; --> &#169; &amp;copy; --> &copy;
+registered trademark N/A &amp;#174; --> &#174; &amp;reg; --> &reg;
+trademark sign N/A &amp;#8482;--> &#8482; &amp;trade; --> &trade;
+em space N/A [&amp;#8195;]->[&#8195;] [&amp;emsp;] -->[&emsp;]
+en space N/A [&amp;#8194;]->[&#8194;] [&amp;ensp;] -->[&ensp;]
+1/3-em space N/A [&amp;#8196;]->[&#8196;] [&amp;emsp13;] -->[&emsp13;]
+1/4-em space N/A [&amp;#8197;]->[&#8197;] [&amp;emsp14;] -->[&emsp14;]
+thin space N/A [&amp;#8201;]->[&#8201;] [&amp;thinsp;]-->[&thinsp;]
+hair space N/A [&amp;#8202;]->[&#8202;] [&amp;hairsp;]-->[&hairsp;]
+em dash N/A [&amp;#8212;]->[&#8212;] [&amp;mdash;] -->[&mdash;]
+en dash N/A [&amp;#8211;]->[&#8211;] [&amp;ndash;] -->[&ndash;]
+
+</PRE>
+
+</BODY>
+</HTML>
diff --git a/test/idna-tr46.html b/test/idna-tr46.html
new file mode 100644
index 0000000..f1e0b0c
--- /dev/null
+++ b/test/idna-tr46.html
@@ -0,0 +1,55 @@
+<!DOCTYPE HTML>
+<!--
+ https://unicode.org/reports/tr46/
+-->
+<html lang="en">
+<head>
+<title>Sample URLs from TR-46</title>
+<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
+ <meta name="viewport" content="width=device-width, initial-scale=1">
+</head>
+
+<body>
+<p>Samples from <em>Table 1. Deviation Characters</em></p>
+<ul>
+<li>
+<p>
+<a href="http://fa&#223;.de">test</a>
+fa&#223;.de
+</p>
+<ul>
+<li>2003: xn--bcher-kva.de</li>
+<li>2008: xn--bcher-kva.de</li>
+</ul>
+</li>
+<li>
+<p><a href="http://&#946;&#972;&#955;&#959;&#962;.com">test</a>
+&#946;&#972;&#955;&#959;&#962;.com
+</p>
+<ul>
+<li>2003: http://xn--nxasmq6b.com</li>
+<li>2008: http://xn--nxasmm1c.com</li>
+</ul>
+</li>
+<li>
+<p>
+<a href="http://&#3521;&#3530;&#8205;&#3515;&#3539;.com">test</a>
+&#3521;&#3530;&#8205;&#3515;&#3539;.com
+</p>
+<ul>
+<li>2003: http://xn--10cl1a0b.com</li>
+<li>2008: http://xn--10cl1a0b660p.com/</li>
+</ul>
+</li>
+<li>
+<p><a href="http://&#1606;&#1575;&#1605;&#1607;&#8204;&#1575;&#1740;.com">test</a>
+&#1606;&#1575;&#1605;&#1607;&#8204;&#1575;&#1740;.com
+</p>
+<ul>
+<li>2003: http://xn--mgba3gch31f.com/</li>
+<li>2008: http://xn--mgba3gch31f060k.com/</li>
+</ul>
+</li>
+</ul>
+</body>
+</html>
diff --git a/test/image.jpg b/test/image.jpg
new file mode 100644
index 0000000..5c102b8
--- /dev/null
+++ b/test/image.jpg
Binary files differ
diff --git a/test/iso-8859-1.html b/test/iso-8859-1.html
new file mode 100644
index 0000000..61a13e4
--- /dev/null
+++ b/test/iso-8859-1.html
@@ -0,0 +1,242 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
+<!-- X-URL: http://www.ramsch.org/martin/uni/fmi-hp/iso8859-1.html -->
+<!-- Date: Tue, 28 Dec 2004 20:24:09 GMT -->
+<!-- Last-Modified: Mon, 15 May 2000 09:37:37 GMT -->
+<HTML>
+<HEAD>
+<TITLE>Martin Ramsch - iso8859-1 table</TITLE>
+<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=iso-8859-1">
+<BASE HREF="http://www.ramsch.org/martin/uni/fmi-hp/iso8859-1.html">
+ <meta name="viewport" content="width=device-width, initial-scale=1">
+</HEAD>
+
+<BODY>
+
+<H1 ALIGN=center>iso8859-1 table</H1>
+
+<PRE>
+Description Code Entity name
+=================================== ============ ==============
+quotation mark &amp;#34; --> &#34; &amp;quot; --> &quot;
+ampersand &amp;#38; --> &#38; &amp;amp; --> &amp;
+less-than sign &amp;#60; --> &#60; &amp;lt; --> &lt;
+greater-than sign &amp;#62; --> &#62; &amp;gt; --> &gt;
+
+Description Char Code Entity name
+=================================== ==== ============ ==============
+non-breaking space   &amp;#160; --> &#160; &amp;nbsp; --> &nbsp;
+inverted exclamation ¡ &amp;#161; --> &#161; &amp;iexcl; --> &iexcl;
+cent sign ¢ &amp;#162; --> &#162; &amp;cent; --> &cent;
+pound sterling £ &amp;#163; --> &#163; &amp;pound; --> &pound;
+general currency sign ¤ &amp;#164; --> &#164; &amp;curren; --> &curren;
+yen sign ¥ &amp;#165; --> &#165; &amp;yen; --> &yen;
+broken vertical bar ¦ &amp;#166; --> &#166; &amp;brvbar; --> &brvbar;
+ Non-standard &amp;brkbar; --> &brkbar;
+section sign § &amp;#167; --> &#167; &amp;sect; --> &sect;
+umlaut (dieresis) ¨ &amp;#168; --> &#168; &amp;uml; --> &uml;
+ Non-standard &amp;die; --> &die;
+copyright © &amp;#169; --> &#169; &amp;copy; --> &copy;
+feminine ordinal ª &amp;#170; --> &#170; &amp;ordf; --> &ordf;
+left angle quote, guillemotleft « &amp;#171; --> &#171; &amp;laquo; --> &laquo;
+not sign ¬ &amp;#172; --> &#172; &amp;not; --> &not;
+soft hyphen ­ &amp;#173; --> &#173; &amp;shy; --> &shy;
+registered trademark ® &amp;#174; --> &#174; &amp;reg; --> &reg;
+macron accent ¯ &amp;#175; --> &#175; &amp;macr; --> &macr;
+ Non-standard &amp;hibar; --> &hibar;
+degree sign ° &amp;#176; --> &#176; &amp;deg; --> &deg;
+plus or minus ± &amp;#177; --> &#177; &amp;plusmn; --> &plusmn;
+superscript two ² &amp;#178; --> &#178; &amp;sup2; --> &sup2;
+superscript three ³ &amp;#179; --> &#179; &amp;sup3; --> &sup3;
+acute accent ´ &amp;#180; --> &#180; &amp;acute; --> &acute;
+micro sign µ &amp;#181; --> &#181; &amp;micro; --> &micro;
+paragraph sign ¶ &amp;#182; --> &#182; &amp;para; --> &para;
+middle dot · &amp;#183; --> &#183; &amp;middot; --> &middot;
+cedilla ¸ &amp;#184; --> &#184; &amp;cedil; --> &cedil;
+superscript one ¹ &amp;#185; --> &#185; &amp;sup1; --> &sup1;
+masculine ordinal º &amp;#186; --> &#186; &amp;ordm; --> &ordm;
+right angle quote, guillemotright » &amp;#187; --> &#187; &amp;raquo; --> &raquo;
+fraction one-fourth ¼ &amp;#188; --> &#188; &amp;frac14; --> &frac14;
+fraction one-half ½ &amp;#189; --> &#189; &amp;frac12; --> &frac12;
+fraction three-fourths ¾ &amp;#190; --> &#190; &amp;frac34; --> &frac34;
+inverted question mark ¿ &amp;#191; --> &#191; &amp;iquest; --> &iquest;
+capital A, grave accent À &amp;#192; --> &#192; &amp;Agrave; --> &Agrave;
+capital A, acute accent Á &amp;#193; --> &#193; &amp;Aacute; --> &Aacute;
+capital A, circumflex accent  &amp;#194; --> &#194; &amp;Acirc; --> &Acirc;
+capital A, tilde à &amp;#195; --> &#195; &amp;Atilde; --> &Atilde;
+capital A, dieresis or umlaut mark Ä &amp;#196; --> &#196; &amp;Auml; --> &Auml;
+capital A, ring Å &amp;#197; --> &#197; &amp;Aring; --> &Aring;
+capital AE diphthong (ligature) Æ &amp;#198; --> &#198; &amp;AElig; --> &AElig;
+capital C, cedilla Ç &amp;#199; --> &#199; &amp;Ccedil; --> &Ccedil;
+capital E, grave accent È &amp;#200; --> &#200; &amp;Egrave; --> &Egrave;
+capital E, acute accent É &amp;#201; --> &#201; &amp;Eacute; --> &Eacute;
+capital E, circumflex accent Ê &amp;#202; --> &#202; &amp;Ecirc; --> &Ecirc;
+capital E, dieresis or umlaut mark Ë &amp;#203; --> &#203; &amp;Euml; --> &Euml;
+capital I, grave accent Ì &amp;#204; --> &#204; &amp;Igrave; --> &Igrave;
+capital I, acute accent Í &amp;#205; --> &#205; &amp;Iacute; --> &Iacute;
+capital I, circumflex accent Î &amp;#206; --> &#206; &amp;Icirc; --> &Icirc;
+capital I, dieresis or umlaut mark Ï &amp;#207; --> &#207; &amp;Iuml; --> &Iuml;
+capital Eth, Icelandic Ð &amp;#208; --> &#208; &amp;ETH; --> &ETH;
+ Non-standard &amp;Dstrok; --> &Dstrok;
+capital N, tilde Ñ &amp;#209; --> &#209; &amp;Ntilde; --> &Ntilde;
+capital O, grave accent Ò &amp;#210; --> &#210; &amp;Ograve; --> &Ograve;
+capital O, acute accent Ó &amp;#211; --> &#211; &amp;Oacute; --> &Oacute;
+capital O, circumflex accent Ô &amp;#212; --> &#212; &amp;Ocirc; --> &Ocirc;
+capital O, tilde Õ &amp;#213; --> &#213; &amp;Otilde; --> &Otilde;
+capital O, dieresis or umlaut mark Ö &amp;#214; --> &#214; &amp;Ouml; --> &Ouml;
+multiply sign × &amp;#215; --> &#215; &amp;times; --> &times;
+capital O, slash Ø &amp;#216; --> &#216; &amp;Oslash; --> &Oslash;
+capital U, grave accent Ù &amp;#217; --> &#217; &amp;Ugrave; --> &Ugrave;
+capital U, acute accent Ú &amp;#218; --> &#218; &amp;Uacute; --> &Uacute;
+capital U, circumflex accent Û &amp;#219; --> &#219; &amp;Ucirc; --> &Ucirc;
+capital U, dieresis or umlaut mark Ü &amp;#220; --> &#220; &amp;Uuml; --> &Uuml;
+capital Y, acute accent Ý &amp;#221; --> &#221; &amp;Yacute; --> &Yacute;
+capital THORN, Icelandic Þ &amp;#222; --> &#222; &amp;THORN; --> &THORN;
+small sharp s, German (sz ligature) ß &amp;#223; --> &#223; &amp;szlig; --> &szlig;
+small a, grave accent à &amp;#224; --> &#224; &amp;agrave; --> &agrave;
+small a, acute accent á &amp;#225; --> &#225; &amp;aacute; --> &aacute;
+small a, circumflex accent â &amp;#226; --> &#226; &amp;acirc; --> &acirc;
+small a, tilde ã &amp;#227; --> &#227; &amp;atilde; --> &atilde;
+small a, dieresis or umlaut mark ä &amp;#228; --> &#228; &amp;auml; --> &auml;
+small a, ring å &amp;#229; --> &#229; &amp;aring; --> &aring;
+small ae diphthong (ligature) æ &amp;#230; --> &#230; &amp;aelig; --> &aelig;
+small c, cedilla ç &amp;#231; --> &#231; &amp;ccedil; --> &ccedil;
+small e, grave accent è &amp;#232; --> &#232; &amp;egrave; --> &egrave;
+small e, acute accent é &amp;#233; --> &#233; &amp;eacute; --> &eacute;
+small e, circumflex accent ê &amp;#234; --> &#234; &amp;ecirc; --> &ecirc;
+small e, dieresis or umlaut mark ë &amp;#235; --> &#235; &amp;euml; --> &euml;
+small i, grave accent ì &amp;#236; --> &#236; &amp;igrave; --> &igrave;
+small i, acute accent í &amp;#237; --> &#237; &amp;iacute; --> &iacute;
+small i, circumflex accent î &amp;#238; --> &#238; &amp;icirc; --> &icirc;
+small i, dieresis or umlaut mark ï &amp;#239; --> &#239; &amp;iuml; --> &iuml;
+small eth, Icelandic ð &amp;#240; --> &#240; &amp;eth; --> &eth;
+small n, tilde ñ &amp;#241; --> &#241; &amp;ntilde; --> &ntilde;
+small o, grave accent ò &amp;#242; --> &#242; &amp;ograve; --> &ograve;
+small o, acute accent ó &amp;#243; --> &#243; &amp;oacute; --> &oacute;
+small o, circumflex accent ô &amp;#244; --> &#244; &amp;ocirc; --> &ocirc;
+small o, tilde õ &amp;#245; --> &#245; &amp;otilde; --> &otilde;
+small o, dieresis or umlaut mark ö &amp;#246; --> &#246; &amp;ouml; --> &ouml;
+division sign ÷ &amp;#247; --> &#247; &amp;divide; --> &divide;
+small o, slash ø &amp;#248; --> &#248; &amp;oslash; --> &oslash;
+small u, grave accent ù &amp;#249; --> &#249; &amp;ugrave; --> &ugrave;
+small u, acute accent ú &amp;#250; --> &#250; &amp;uacute; --> &uacute;
+small u, circumflex accent û &amp;#251; --> &#251; &amp;ucirc; --> &ucirc;
+small u, dieresis or umlaut mark ü &amp;#252; --> &#252; &amp;uuml; --> &uuml;
+small y, acute accent ý &amp;#253; --> &#253; &amp;yacute; --> &yacute;
+small thorn, Icelandic þ &amp;#254; --> &#254; &amp;thorn; --> &thorn;
+small y, dieresis or umlaut mark ÿ &amp;#255; --> &#255; &amp;yuml; --> &yuml;
+</PRE>
+<!-- removed: second /PRE, a hack for HotJava 1.0 preBeta 1 -->
+<HR>
+
+<STRONG>How to read</STRONG> this table. The columns are
+<DL COMPACT>
+<DT>1st:<DD>textual <EM>description</EM> of the character
+<DT>2nd:<DD>character inserted directly into the HTML page as <EM>one
+ byte</EM>
+<DT>3rd:<DD>character written as <EM>numeric HTML entity</EM>, in the
+ format:<BR>"how it looks literally" <CODE>--&gt;</CODE>
+ "what your browser does with it"
+<DT>4th:<DD>character written as <EM>symbolic HTML entity</EM>, in the
+ format:<BR>"how it looks literally" <CODE>--&gt;</CODE>
+ "what your browser does with it"
+</DL>
+
+So for example, if you see something like "<CODE>&amp;divide; -->
+&amp;divide;</CODE>" in the 4th column, this means your browser
+doesn't know about the entity name "divide" and just puts it
+literally.
+
+<P>
+<STRONG>This table</STRONG> grew out of an overview of the "ISO
+Latin-1 Character Set" overview related to the Hyper-G Text Format
+(<A HREF="http://www.hyperwave.de/HTFdoc">HTF</A>).
+
+The entity names <CODE>&amp;brkbar;</CODE> and <CODE>&amp;Dstrok;</CODE>
+seem to be unique to HTF.
+
+The entity name <CODE>&amp;hibar;</CODE> has been supported by X Mosaic
+but seems to be replaced with <CODE>&amp;macr;</CODE>.
+
+The entity names <CODE>&amp;uml;</CODE> and <CODE>&amp;die;</CODE> should
+be equivalent.
+
+<P><STRONG>The standards stuff:</STRONG>
+The
+<A HREF="http://www.w3.org/hypertext/WWW/MarkUp/html-spec/">HTML 2.0 Standard</A>
+includes a section on
+<A HREF="http://www.w3.org/hypertext/WWW/MarkUp/html-spec/html-spec_9.html#SEC99">Character Entity Sets</A>
+and an overview on the
+<A HREF="http://www.w3.org/hypertext/WWW/MarkUp/html-spec/html-spec_13.html#SEC106">HTML Coded Character Set</A>
+(The entity names are derived from <A HREF="http://www.ucc.ie/info/net/isolat1.html">ISO 8879</A>).
+<BR>
+
+Or have a look at the
+<A HREF="http://www.w3.org/hypertext/WWW/MarkUp/html3/latin1.html">Latin-1 Character Entities</A>
+as listed in an draft for the
+<A HREF="http://www.w3.org/hypertext/WWW/MarkUp/html3/CoverPage.html">HTML 3.0 specification</A>.
+<BR>
+
+The
+<A HREF="http://www.w3.org/hypertext/WWW/MarkUp/HTMLPlus/htmlplus_59.html">Appendix II</A>
+of CERN's
+<A HREF="http://www.w3.org/hypertext/WWW/MarkUp/HTMLPlus/htmlplus_1.html">HTML+ Discussion Document</A>
+contains a
+<A HREF="http://www.w3.org/hypertext/WWW/MarkUp/HTMLPlus/htmlplus_table.ps">table</A>
+(in PostScript format) of the proposed character entities for HTML+ and their
+corresponding character codes for Unicode and the Adobe Latin-1 &amp; Symbol
+character sets.
+<P>
+
+<STRONG>Please note</STRONG> that there is nothing wrong with using
+characters of ISO Latin-1 above 127: the normal transmission protocol
+for the WWW,
+<A HREF="http://www.w3.org/pub/WWW/Protocols/rfc1945/rfc1945">HTTP/1.0</A>,
+uses the 8bit ISO latin-1 as default encoding.
+(Thanks to Roman
+Czyborra for pointing this out!)
+<P>
+
+<STRONG>Other information:</STRONG>
+<UL>
+
+<LI><STRONG>Kevin J. Brewer</STRONG> has done two very good pages on the subject:
+ <UL>
+ <LI><A HREF="http://www.bbsinc.com/iso8859.html">ASCII - ISO 8859-1 (Latin-1) with HTML 3.0 Entities Table</A> and
+ <LI><A HREF="http://www.bbsinc.com/iso8879.html">ISO 8879 Entities Gopher Menu</A>
+ </UL>
+
+<LI>The excellent overview on the series of
+ <A HREF="http://czyborra.com/charsets/iso8859.html">ISO 8859
+ character sets</A> compiled by Roman Czyborra.
+
+<LI>Also have a look on Alan Flavell's page of
+ <A HREF="http://ppewww.ph.gla.ac.uk/%7Eflavell/iso8859/iso8859-pointers.html">pointers
+ to information about ISO8859</A>. It's written very well!
+
+<LI>Maybe also of interest to you is the
+ <A HREF="ftp://ftp.vlsivie.tuwien.ac.at/pub/8bit/FAQ-ISO-8859-1">ISO
+ 8859-1 FAQ</A> by Michael Gschwind
+ (<A HREF="mailto:mike@vlsivie.tuwien.ac.at">mike@vlsivie.tuwien.ac.at</A>),
+ part of his page on
+ <A HREF="http://www.vlsivie.tuwien.ac.at/mike/i18n.html">Internationalization</A>.
+
+<LI>For users of X11R5 on SunOS systems: the
+ <A HREF="Compose.txt">table over the compose combinations</A>
+ (also coded <A HREF="Compose.html">with entities</A> where possible).
+ It's taken from the MIT X sources in
+ <CODE>server/ddx/sun/Compose.list</CODE>.
+
+<LI>Finally you could have a look at
+ <A HREF="ftp://ds.internic.net/rfc/rfc1345.txt">RFC 1345:
+ Character Mnemonics &amp; Character Sets</A>
+ by K. Simonsen (06/11/92, 103 pages, approx. 240 kbyte).
+
+</UL>
+
+
+<HR>
+
+<ADDRESS><A HREF="http://ramsch.home.pages.de/">Martin Ramsch</A>, 16.02.1994, 07.01.1996, 01.07.1996, 1998-10-09, 2000-05-15</ADDRESS>
+
+</BODY>
+</HTML>
diff --git a/test/iso-8859-1a.html b/test/iso-8859-1a.html
new file mode 100644
index 0000000..715061a
--- /dev/null
+++ b/test/iso-8859-1a.html
@@ -0,0 +1,276 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
+<!-- X-URL: http://www.ramsch.org/martin/uni/fmi-hp/iso8859-1.html -->
+<!-- Date: Tue, 28 Dec 2004 20:24:09 GMT -->
+<!-- Last-Modified: Mon, 15 May 2000 09:37:37 GMT -->
+<HTML>
+<HEAD>
+<TITLE>Martin Ramsch - iso8859-1 table</TITLE>
+<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=iso-8859-1">
+<BASE HREF="http://www.ramsch.org/martin/uni/fmi-hp/iso8859-1.html">
+ <meta name="viewport" content="width=device-width, initial-scale=1">
+</HEAD>
+
+<BODY>
+
+<H1 ALIGN=center>iso8859-1 table, with cp-1252</H1>
+
+<PRE>
+Description Code Entity name
+=================================== ============ ==============
+quotation mark &amp;#34; --> &#34; &amp;quot; --> &quot;
+ampersand &amp;#38; --> &#38; &amp;amp; --> &amp;
+less-than sign &amp;#60; --> &#60; &amp;lt; --> &lt;
+greater-than sign &amp;#62; --> &#62; &amp;gt; --> &gt;
+
+Description Char Code Entity name
+=================================== ==== ============ ==============
+euro sign € &amp;128; --> &#128;
+undefined &amp;129; --> &#129;
+single low-9 quotation mark ‚ &amp;130; --> &#130;
+latin small letter f with hook ƒ &amp;131; --> &#131;
+double low-9 quotation mark „ &amp;132; --> &#132;
+horizontal ellipsis … &amp;133; --> &#133;
+dagger † &amp;134; --> &#134;
+double dagger ‡ &amp;135; --> &#135;
+modifier letter circumflex accent ˆ &amp;136; --> &#136;
+per mille sign ‰ &amp;137; --> &#137;
+latin capital letter s with caron Š &amp;138; --> &#138;
+single left-pointing angle quote mark ‹ &amp;139; --> &#139;
+latin capital ligature oe Œ &amp;140; --> &#140;
+undefined &amp;141; --> &#141;
+latin capital letter z with caron Ž &amp;142; --> &#142;
+undefined &amp;143; --> &#143;
+
+undefined &amp;144; --> &#144;
+left single quotation mark ‘ &amp;145; --> &#145;
+right single quotation mark ’ &amp;146; --> &#146;
+left double quotation mark “ &amp;147; --> &#147;
+right double quotation mark ” &amp;148; --> &#148;
+bullet • &amp;149; --> &#149;
+en dash – &amp;150; --> &#150;
+em dash — &amp;151; --> &#151;
+small tilde ˜ &amp;152; --> &#152;
+trade mark sign ™ &amp;153; --> &#153;
+latin small letter s with caron š &amp;154; --> &#154;
+single right-pointing angle quote mark › &amp;155; --> &#155;
+latin small ligature oe œ &amp;156; --> &#156;
+undefined &amp;157; --> &#157;
+latin small letter z with caron ž &amp;158; --> &#158;
+latin capital letter y with diaeresis Ÿ &amp;159; --> &#159;
+
+non-breaking space   &amp;#160; --> &#160; &amp;nbsp; --> &nbsp;
+inverted exclamation ¡ &amp;#161; --> &#161; &amp;iexcl; --> &iexcl;
+cent sign ¢ &amp;#162; --> &#162; &amp;cent; --> &cent;
+pound sterling £ &amp;#163; --> &#163; &amp;pound; --> &pound;
+general currency sign ¤ &amp;#164; --> &#164; &amp;curren; --> &curren;
+yen sign ¥ &amp;#165; --> &#165; &amp;yen; --> &yen;
+broken vertical bar ¦ &amp;#166; --> &#166; &amp;brvbar; --> &brvbar;
+ Non-standard &amp;brkbar; --> &brkbar;
+section sign § &amp;#167; --> &#167; &amp;sect; --> &sect;
+umlaut (dieresis) ¨ &amp;#168; --> &#168; &amp;uml; --> &uml;
+ Non-standard &amp;die; --> &die;
+copyright © &amp;#169; --> &#169; &amp;copy; --> &copy;
+feminine ordinal ª &amp;#170; --> &#170; &amp;ordf; --> &ordf;
+left angle quote, guillemotleft « &amp;#171; --> &#171; &amp;laquo; --> &laquo;
+not sign ¬ &amp;#172; --> &#172; &amp;not; --> &not;
+soft hyphen ­ &amp;#173; --> &#173; &amp;shy; --> &shy;
+registered trademark ® &amp;#174; --> &#174; &amp;reg; --> &reg;
+macron accent ¯ &amp;#175; --> &#175; &amp;macr; --> &macr;
+ Non-standard &amp;hibar; --> &hibar;
+degree sign ° &amp;#176; --> &#176; &amp;deg; --> &deg;
+plus or minus ± &amp;#177; --> &#177; &amp;plusmn; --> &plusmn;
+superscript two ² &amp;#178; --> &#178; &amp;sup2; --> &sup2;
+superscript three ³ &amp;#179; --> &#179; &amp;sup3; --> &sup3;
+acute accent ´ &amp;#180; --> &#180; &amp;acute; --> &acute;
+micro sign µ &amp;#181; --> &#181; &amp;micro; --> &micro;
+paragraph sign ¶ &amp;#182; --> &#182; &amp;para; --> &para;
+middle dot · &amp;#183; --> &#183; &amp;middot; --> &middot;
+cedilla ¸ &amp;#184; --> &#184; &amp;cedil; --> &cedil;
+superscript one ¹ &amp;#185; --> &#185; &amp;sup1; --> &sup1;
+masculine ordinal º &amp;#186; --> &#186; &amp;ordm; --> &ordm;
+right angle quote, guillemotright » &amp;#187; --> &#187; &amp;raquo; --> &raquo;
+fraction one-fourth ¼ &amp;#188; --> &#188; &amp;frac14; --> &frac14;
+fraction one-half ½ &amp;#189; --> &#189; &amp;frac12; --> &frac12;
+fraction three-fourths ¾ &amp;#190; --> &#190; &amp;frac34; --> &frac34;
+inverted question mark ¿ &amp;#191; --> &#191; &amp;iquest; --> &iquest;
+capital A, grave accent À &amp;#192; --> &#192; &amp;Agrave; --> &Agrave;
+capital A, acute accent Á &amp;#193; --> &#193; &amp;Aacute; --> &Aacute;
+capital A, circumflex accent  &amp;#194; --> &#194; &amp;Acirc; --> &Acirc;
+capital A, tilde à &amp;#195; --> &#195; &amp;Atilde; --> &Atilde;
+capital A, dieresis or umlaut mark Ä &amp;#196; --> &#196; &amp;Auml; --> &Auml;
+capital A, ring Å &amp;#197; --> &#197; &amp;Aring; --> &Aring;
+capital AE diphthong (ligature) Æ &amp;#198; --> &#198; &amp;AElig; --> &AElig;
+capital C, cedilla Ç &amp;#199; --> &#199; &amp;Ccedil; --> &Ccedil;
+capital E, grave accent È &amp;#200; --> &#200; &amp;Egrave; --> &Egrave;
+capital E, acute accent É &amp;#201; --> &#201; &amp;Eacute; --> &Eacute;
+capital E, circumflex accent Ê &amp;#202; --> &#202; &amp;Ecirc; --> &Ecirc;
+capital E, dieresis or umlaut mark Ë &amp;#203; --> &#203; &amp;Euml; --> &Euml;
+capital I, grave accent Ì &amp;#204; --> &#204; &amp;Igrave; --> &Igrave;
+capital I, acute accent Í &amp;#205; --> &#205; &amp;Iacute; --> &Iacute;
+capital I, circumflex accent Î &amp;#206; --> &#206; &amp;Icirc; --> &Icirc;
+capital I, dieresis or umlaut mark Ï &amp;#207; --> &#207; &amp;Iuml; --> &Iuml;
+capital Eth, Icelandic Ð &amp;#208; --> &#208; &amp;ETH; --> &ETH;
+ Non-standard &amp;Dstrok; --> &Dstrok;
+capital N, tilde Ñ &amp;#209; --> &#209; &amp;Ntilde; --> &Ntilde;
+capital O, grave accent Ò &amp;#210; --> &#210; &amp;Ograve; --> &Ograve;
+capital O, acute accent Ó &amp;#211; --> &#211; &amp;Oacute; --> &Oacute;
+capital O, circumflex accent Ô &amp;#212; --> &#212; &amp;Ocirc; --> &Ocirc;
+capital O, tilde Õ &amp;#213; --> &#213; &amp;Otilde; --> &Otilde;
+capital O, dieresis or umlaut mark Ö &amp;#214; --> &#214; &amp;Ouml; --> &Ouml;
+multiply sign × &amp;#215; --> &#215; &amp;times; --> &times;
+capital O, slash Ø &amp;#216; --> &#216; &amp;Oslash; --> &Oslash;
+capital U, grave accent Ù &amp;#217; --> &#217; &amp;Ugrave; --> &Ugrave;
+capital U, acute accent Ú &amp;#218; --> &#218; &amp;Uacute; --> &Uacute;
+capital U, circumflex accent Û &amp;#219; --> &#219; &amp;Ucirc; --> &Ucirc;
+capital U, dieresis or umlaut mark Ü &amp;#220; --> &#220; &amp;Uuml; --> &Uuml;
+capital Y, acute accent Ý &amp;#221; --> &#221; &amp;Yacute; --> &Yacute;
+capital THORN, Icelandic Þ &amp;#222; --> &#222; &amp;THORN; --> &THORN;
+small sharp s, German (sz ligature) ß &amp;#223; --> &#223; &amp;szlig; --> &szlig;
+small a, grave accent à &amp;#224; --> &#224; &amp;agrave; --> &agrave;
+small a, acute accent á &amp;#225; --> &#225; &amp;aacute; --> &aacute;
+small a, circumflex accent â &amp;#226; --> &#226; &amp;acirc; --> &acirc;
+small a, tilde ã &amp;#227; --> &#227; &amp;atilde; --> &atilde;
+small a, dieresis or umlaut mark ä &amp;#228; --> &#228; &amp;auml; --> &auml;
+small a, ring å &amp;#229; --> &#229; &amp;aring; --> &aring;
+small ae diphthong (ligature) æ &amp;#230; --> &#230; &amp;aelig; --> &aelig;
+small c, cedilla ç &amp;#231; --> &#231; &amp;ccedil; --> &ccedil;
+small e, grave accent è &amp;#232; --> &#232; &amp;egrave; --> &egrave;
+small e, acute accent é &amp;#233; --> &#233; &amp;eacute; --> &eacute;
+small e, circumflex accent ê &amp;#234; --> &#234; &amp;ecirc; --> &ecirc;
+small e, dieresis or umlaut mark ë &amp;#235; --> &#235; &amp;euml; --> &euml;
+small i, grave accent ì &amp;#236; --> &#236; &amp;igrave; --> &igrave;
+small i, acute accent í &amp;#237; --> &#237; &amp;iacute; --> &iacute;
+small i, circumflex accent î &amp;#238; --> &#238; &amp;icirc; --> &icirc;
+small i, dieresis or umlaut mark ï &amp;#239; --> &#239; &amp;iuml; --> &iuml;
+small eth, Icelandic ð &amp;#240; --> &#240; &amp;eth; --> &eth;
+small n, tilde ñ &amp;#241; --> &#241; &amp;ntilde; --> &ntilde;
+small o, grave accent ò &amp;#242; --> &#242; &amp;ograve; --> &ograve;
+small o, acute accent ó &amp;#243; --> &#243; &amp;oacute; --> &oacute;
+small o, circumflex accent ô &amp;#244; --> &#244; &amp;ocirc; --> &ocirc;
+small o, tilde õ &amp;#245; --> &#245; &amp;otilde; --> &otilde;
+small o, dieresis or umlaut mark ö &amp;#246; --> &#246; &amp;ouml; --> &ouml;
+division sign ÷ &amp;#247; --> &#247; &amp;divide; --> &divide;
+small o, slash ø &amp;#248; --> &#248; &amp;oslash; --> &oslash;
+small u, grave accent ù &amp;#249; --> &#249; &amp;ugrave; --> &ugrave;
+small u, acute accent ú &amp;#250; --> &#250; &amp;uacute; --> &uacute;
+small u, circumflex accent û &amp;#251; --> &#251; &amp;ucirc; --> &ucirc;
+small u, dieresis or umlaut mark ü &amp;#252; --> &#252; &amp;uuml; --> &uuml;
+small y, acute accent ý &amp;#253; --> &#253; &amp;yacute; --> &yacute;
+small thorn, Icelandic þ &amp;#254; --> &#254; &amp;thorn; --> &thorn;
+small y, dieresis or umlaut mark ÿ &amp;#255; --> &#255; &amp;yuml; --> &yuml;
+</PRE>
+<!-- removed: second /PRE, a hack for HotJava 1.0 preBeta 1 -->
+<HR>
+
+<STRONG>How to read</STRONG> this table. The columns are
+<DL COMPACT>
+<DT>1st:<DD>textual <EM>description</EM> of the character
+<DT>2nd:<DD>character inserted directly into the HTML page as <EM>one
+ byte</EM>
+<DT>3rd:<DD>character written as <EM>numeric HTML entity</EM>, in the
+ format:<BR>"how it looks literally" <CODE>--&gt;</CODE>
+ "what your browser does with it"
+<DT>4th:<DD>character written as <EM>symbolic HTML entity</EM>, in the
+ format:<BR>"how it looks literally" <CODE>--&gt;</CODE>
+ "what your browser does with it"
+</DL>
+
+So for example, if you see something like "<CODE>&amp;divide; -->
+&amp;divide;</CODE>" in the 4th column, this means your browser
+doesn't know about the entity name "divide" and just puts it
+literally.
+
+<P>
+<STRONG>This table</STRONG> grew out of an overview of the "ISO
+Latin-1 Character Set" overview related to the Hyper-G Text Format
+(<A HREF="http://www.hyperwave.de/HTFdoc">HTF</A>).
+
+The entity names <CODE>&amp;brkbar;</CODE> and <CODE>&amp;Dstrok;</CODE>
+seem to be unique to HTF.
+
+The entity name <CODE>&amp;hibar;</CODE> has been supported by X Mosaic
+but seems to be replaced with <CODE>&amp;macr;</CODE>.
+
+The entity names <CODE>&amp;uml;</CODE> and <CODE>&amp;die;</CODE> should
+be equivalent.
+
+<P><STRONG>The standards stuff:</STRONG>
+The
+<A HREF="http://www.w3.org/hypertext/WWW/MarkUp/html-spec/">HTML 2.0 Standard</A>
+includes a section on
+<A HREF="http://www.w3.org/hypertext/WWW/MarkUp/html-spec/html-spec_9.html#SEC99">Character Entity Sets</A>
+and an overview on the
+<A HREF="http://www.w3.org/hypertext/WWW/MarkUp/html-spec/html-spec_13.html#SEC106">HTML Coded Character Set</A>
+(The entity names are derived from <A HREF="http://www.ucc.ie/info/net/isolat1.html">ISO 8879</A>).
+<BR>
+
+Or have a look at the
+<A HREF="http://www.w3.org/hypertext/WWW/MarkUp/html3/latin1.html">Latin-1 Character Entities</A>
+as listed in an draft for the
+<A HREF="http://www.w3.org/hypertext/WWW/MarkUp/html3/CoverPage.html">HTML 3.0 specification</A>.
+<BR>
+
+The
+<A HREF="http://www.w3.org/hypertext/WWW/MarkUp/HTMLPlus/htmlplus_59.html">Appendix II</A>
+of CERN's
+<A HREF="http://www.w3.org/hypertext/WWW/MarkUp/HTMLPlus/htmlplus_1.html">HTML+ Discussion Document</A>
+contains a
+<A HREF="http://www.w3.org/hypertext/WWW/MarkUp/HTMLPlus/htmlplus_table.ps">table</A>
+(in PostScript format) of the proposed character entities for HTML+ and their
+corresponding character codes for Unicode and the Adobe Latin-1 &amp; Symbol
+character sets.
+<P>
+
+<STRONG>Please note</STRONG> that there is nothing wrong with using
+characters of ISO Latin-1 above 127: the normal transmission protocol
+for the WWW,
+<A HREF="http://www.w3.org/pub/WWW/Protocols/rfc1945/rfc1945">HTTP/1.0</A>,
+uses the 8bit ISO latin-1 as default encoding.
+(Thanks to Roman
+Czyborra for pointing this out!)
+<P>
+
+<STRONG>Other information:</STRONG>
+<UL>
+
+<LI><STRONG>Kevin J. Brewer</STRONG> has done two very good pages on the subject:
+ <UL>
+ <LI><A HREF="http://www.bbsinc.com/iso8859.html">ASCII - ISO 8859-1 (Latin-1) with HTML 3.0 Entities Table</A> and
+ <LI><A HREF="http://www.bbsinc.com/iso8879.html">ISO 8879 Entities Gopher Menu</A>
+ </UL>
+
+<LI>The excellent overview on the series of
+ <A HREF="http://czyborra.com/charsets/iso8859.html">ISO 8859
+ character sets</A> compiled by Roman Czyborra.
+
+<LI>Also have a look on Alan Flavell's page of
+ <A HREF="http://ppewww.ph.gla.ac.uk/%7Eflavell/iso8859/iso8859-pointers.html">pointers
+ to information about ISO8859</A>. It's written very well!
+
+<LI>Maybe also of interest to you is the
+ <A HREF="ftp://ftp.vlsivie.tuwien.ac.at/pub/8bit/FAQ-ISO-8859-1">ISO
+ 8859-1 FAQ</A> by Michael Gschwind
+ (<A HREF="mailto:mike@vlsivie.tuwien.ac.at">mike@vlsivie.tuwien.ac.at</A>),
+ part of his page on
+ <A HREF="http://www.vlsivie.tuwien.ac.at/mike/i18n.html">Internationalization</A>.
+
+<LI>For users of X11R5 on SunOS systems: the
+ <A HREF="Compose.txt">table over the compose combinations</A>
+ (also coded <A HREF="Compose.html">with entities</A> where possible).
+ It's taken from the MIT X sources in
+ <CODE>server/ddx/sun/Compose.list</CODE>.
+
+<LI>Finally you could have a look at
+ <A HREF="ftp://ds.internic.net/rfc/rfc1345.txt">RFC 1345:
+ Character Mnemonics &amp; Character Sets</A>
+ by K. Simonsen (06/11/92, 103 pages, approx. 240 kbyte).
+
+</UL>
+
+
+<HR>
+
+<ADDRESS><A HREF="http://ramsch.home.pages.de/">Martin Ramsch</A>, 16.02.1994, 07.01.1996, 01.07.1996, 1998-10-09, 2000-05-15</ADDRESS>
+
+</BODY>
+</HTML>
diff --git a/test/iso-8859-2.html b/test/iso-8859-2.html
new file mode 100644
index 0000000..37c705e
--- /dev/null
+++ b/test/iso-8859-2.html
@@ -0,0 +1,175 @@
+<!DOCTYPE html PUBLIC "-//IETF//DTD HTML 3.0//EN">
+<!-- X-URL: http://www.uni-passau.de/~ramsch/iso8859-1.html -->
+<HTML>
+<HEAD>
+<TITLE>Martin Ramsch's character table modified and enhanced for iso8859-2</TITLE>
+<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=iso-8859-2">
+<LINK REV="made" HREF="mailto:lynx-dev@nongnu.org">
+<LINK REV="owner" HREF="http://mail.gnu.org/mailman/listinfo/lynx-dev/">
+<LINK REL="sibling" HREF="iso-8859-1.html" TITLE="iso-8859-1 test">
+<LINK REL="sibling" HREF="ALT88592.html" TITLE="iso-8859-2 ALT test">
+ <meta name="viewport" content="width=device-width, initial-scale=1">
+</HEAD>
+
+<BODY>
+
+<H1 ALIGN=center>iso8859-2 plus table</H1>
+
+<PRE>
+Description Code Entity name
+=================================== ============ ==============
+quotation mark &amp;#34; --> &#34; &amp;quot; --> &quot;
+ampersand &amp;#38; --> &#38; &amp;amp; --> &amp;
+less-than sign &amp;#60; --> &#60; &amp;lt; --> &lt;
+greater-than sign &amp;#62; --> &#62; &amp;gt; --> &gt;
+
+Description Char Code Entity name
+=================================== ==== ============ ==============
+non-breaking space   &amp;#160; --> &#160; &amp;nbsp; --> &nbsp;
+capital A, ogonek ¡ &amp;#260; --> &#260; &amp;Aogon; --> &Aogon;
+breve {¢} {&amp;#728;}-->{&#728;} {&amp;breve;} -->{&breve;}
+capital L, stroke £ &amp;#321; --> &#321; &amp;Lstrok; --> &Lstrok;
+general currency sign ¤ &amp;#164; --> &#164; &amp;curren; --> &curren;
+capital L, caron ¥ &amp;#317; --> &#317; &amp;Lcaron; --> &Lcaron;
+capital S, acute accent ¦ &amp;#346; --> &#346; &amp;Sacute; --> &Sacute;
+section sign § &amp;#167; --> &#167; &amp;sect; --> &sect;
+umlaut (dieresis) ¨ &amp;#168; --> &#168; &amp;uml; --> &uml;
+ &amp;die; --> &die;
+capital S, caron © &amp;#352; --> &#352; &amp;Scaron; --> &Scaron;
+capital S, cedilla ª &amp;#350; --> &#350; &amp;Scedil; --> &Scedil;
+capital T, caron « &amp;#356; --> &#356; &amp;Tcaron; --> &Tcaron;
+capital Z, acute accent ¬ &amp;#377; --> &#377; &amp;Zacute; --> &Zacute;
+soft hyphen [­] [&amp;#173;]-->[&#173;] [&amp;shy;] -->[&shy;]
+capital Z, caron ® &amp;#381; --> &#381; &amp;Zcaron; --> &Zcaron;
+capital Z, dot above ¯ &amp;#379; --> &#379; &amp;Zdot; --> &Zdot;
+degree sign ° &amp;#176; --> &#176; &amp;deg; --> &deg;
+small a, ogonek ± &amp;#261; --> &#261; &amp;aogon; --> &aogon;
+ogonek {²} {&amp;#731;}-->{&#731;} {&amp;ogon;} -->{&ogon;}
+small l, stroke ³ &amp;#322; --> &#322; &amp;lstrok; --> &lstrok;
+acute accent ´ &amp;#180; --> &#180; &amp;acute; --> &acute;
+small l, caron µ &amp;#318; --> &#318; &amp;lcaron; --> &lcaron;
+small s, acute accent ¶ &amp;#347; --> &#347; &amp;sacute; --> &sacute;
+caron {·} {&amp;#711;}-->{&#711;} {&amp;caron;} -->{&caron;}
+cedilla ¸ &amp;#184; --> &#184; &amp;cedil; --> &cedil;
+small s, caron ¹ &amp;#353; --> &#353; &amp;scaron; --> &scaron;
+small s, cedilla º &amp;#351; --> &#351; &amp;scedil; --> &scedil;
+small t, caron » &amp;#357; --> &#357; &amp;tcaron; --> &tcaron;
+small z, acute accent ¼ &amp;#378; --> &#378; &amp;zacute; --> &zacute;
+double acute accent {½} {&amp;#733;}-->{&#733;} {&amp;dblac;} -->{&dblac;}
+small z, caron ¾ &amp;#382; --> &#382; &amp;zcaron; --> &zcaron;
+small z, dot above ¿ &amp;#380; --> &#380; &amp;zdot; --> &zdot;
+capital R, acute accent À &amp;#340; --> &#340; &amp;Racute; --> &Racute;
+capital A, acute accent Á &amp;#193; --> &#193; &amp;Aacute; --> &Aacute;
+capital A, circumflex accent  &amp;#194; --> &#194; &amp;Acirc; --> &Acirc;
+capital A, breve à &amp;#258; --> &#258; &amp;Abreve; --> &Abreve;
+capital A, dieresis or umlaut mark Ä &amp;#196; --> &#196; &amp;Auml; --> &Auml;
+capital L, acute accent Å &amp;#313; --> &#313; &amp;Lacute; --> &Lacute;
+capital C, acute accent Æ &amp;#262; --> &#262; &amp;Cacute; --> &Cacute;
+capital C, cedilla Ç &amp;#199; --> &#199; &amp;Ccedil; --> &Ccedil;
+capital C, caron È &amp;#268; --> &#268; &amp;Ccaron; --> &Ccaron;
+capital E, acute accent É &amp;#201; --> &#201; &amp;Eacute; --> &Eacute;
+capital E, ogonek Ê &amp;#280; --> &#280; &amp;Eogon; --> &Eogon;
+capital E, dieresis or umlaut mark Ë &amp;#203; --> &#203; &amp;Euml; --> &Euml;
+capital E, caron Ì &amp;#282; --> &#282; &amp;Ecaron; --> &Ecaron;
+capital I, acute accent Í &amp;#205; --> &#205; &amp;Iacute; --> &Iacute;
+capital I, circumflex accent Î &amp;#206; --> &#206; &amp;Icirc; --> &Icirc;
+capital D, caron Ï &amp;#270; --> &#270; &amp;Dcaron; --> &Dcaron;
+capital D, stroke Ð &amp;#272; --> &#272; &amp;Dstrok; --> &Dstrok;
+capital Eth, Icelandic N/A &amp;#208; --> &#208; &amp;ETH; --> &ETH;
+capital N, acute accent Ñ &amp;#323; --> &#323; &amp;Nacute; --> &Nacute;
+capital N, caron Ò &amp;#327; --> &#327; &amp;Ncaron; --> &Ncaron;
+capital O, acute accent Ó &amp;#211; --> &#211; &amp;Oacute; --> &Oacute;
+capital O, circumflex accent Ô &amp;#212; --> &#212; &amp;Ocirc; --> &Ocirc;
+capital O, double acute accent Õ &amp;#368; --> &#368; &amp;Odblac; --> &Odblac;
+capital O, dieresis or umlaut mark Ö &amp;#214; --> &#214; &amp;Ouml; --> &Ouml;
+multiply sign × &amp;#215; --> &#215; &amp;times; --> &times;
+capital R, caron Ø &amp;#344; --> &#344; &amp;Rcaron; --> &Rcaron;
+capital U, ring Ù &amp;#366; --> &#366; &amp;Uring; --> &Uring;
+capital U, acute accent Ú &amp;#218; --> &#218; &amp;Uacute; --> &Uacute;
+capital U, double acute accent Û &amp;#368; --> &#368; &amp;Udblac; --> &Udblac;
+capital U, dieresis or umlaut mark Ü &amp;#220; --> &#220; &amp;Uuml; --> &Uuml;
+capital Y, acute accent Ý &amp;#221; --> &#221; &amp;Yacute; --> &Yacute;
+capital T, cedilla Þ &amp;#354; --> &#354; &amp;Tcedil; --> &Tcedil;
+small sharp s, German (sz ligature) ß &amp;#223; --> &#223; &amp;szlig; --> &szlig;
+small r, acute accent à &amp;#341; --> &#341; &amp;racute; --> &racute;
+small a, acute accent á &amp;#225; --> &#225; &amp;aacute; --> &aacute;
+small a, circumflex accent â &amp;#226; --> &#226; &amp;acirc; --> &acirc;
+small a, breve ã &amp;#259; --> &#259; &amp;abreve; --> &abreve;
+small a, dieresis or umlaut mark ä &amp;#228; --> &#228; &amp;auml; --> &auml;
+small l, acute accent å &amp;#314; --> &#314; &amp;lacute; --> &lacute;
+small c, acute accent æ &amp;#263; --> &#263; &amp;cacute; --> &cacute;
+small c, cedilla ç &amp;#231; --> &#231; &amp;ccedil; --> &ccedil;
+small c, caron è &amp;#269; --> &#269; &amp;ccaron; --> &ccaron;
+small e, acute accent é &amp;#233; --> &#233; &amp;eacute; --> &eacute;
+small e, ogonek ê &amp;#281; --> &#281; &amp;eogon; --> &eogon;
+small e, dieresis or umlaut mark ë &amp;#235; --> &#235; &amp;euml; --> &euml;
+small e, caron ì &amp;#283; --> &#283; &amp;ecaron; --> &ecaron;
+small i, acute accent í &amp;#237; --> &#237; &amp;iacute; --> &iacute;
+small i, circumflex accent î &amp;#238; --> &#238; &amp;icirc; --> &icirc;
+small d, caron ï &amp;#271; --> &#271; &amp;dcaron; --> &dcaron;
+small d, stroke ð &amp;#273; --> &#273; &amp;dstrok; --> &dstrok;
+small eth, Icelandic N/A &amp;#240; --> &#240; &amp;eth; --> &eth;
+small n, acute accent ñ &amp;#324; --> &#324; &amp;nacute; --> &nacute;
+small n, caron ò &amp;#328; --> &#328; &amp;ncaron; --> &ncaron;
+small o, acute accent ó &amp;#243; --> &#243; &amp;oacute; --> &oacute;
+small o, circumflex accent ô &amp;#244; --> &#244; &amp;ocirc; --> &ocirc;
+small o, double acute accent õ &amp;#369; --> &#369; &amp;odblac; --> &odblac;
+small o, dieresis or umlaut mark ö &amp;#246; --> &#246; &amp;ouml; --> &ouml;
+division sign ÷ &amp;#247; --> &#247; &amp;divide; --> &divide;
+small r, caron ø &amp;#345; --> &#345; &amp;rcaron; --> &rcaron;
+small u, ring ù &amp;#367; --> &#367; &amp;uring; --> &uring;
+small u, acute accent ú &amp;#250; --> &#250; &amp;uacute; --> &uacute;
+small u, double acute accent û &amp;#369; --> &#369; &amp;udblac; --> &udblac;
+small u, dieresis or umlaut mark ü &amp;#252; --> &#252; &amp;uuml; --> &uuml;
+small y, acute accent ý &amp;#253; --> &#253; &amp;yacute; --> &yacute;
+small t, cedilla þ &amp;#355; --> &#355; &amp;tcedil; --> &tcedil;
+dot above {ÿ} {&amp;#729;}-->{&#729;} {&amp;dot;} -->{&dot;}
+
+Some other characters of interest Char Code Entity name
+=================================== ==== ============ ==============
+capital AE diphthong (ligature) N/A &amp;#198; --> &#198; &amp;AElig; --> &AElig;
+small ae diphthong (ligature) N/A &amp;#230; --> &#230; &amp;aelig; --> &aelig;
+capital OE ligature N/A {&amp;#338;}-->{&#338;} {&amp;OElig;} -->{&OElig;}
+small oe ligature N/A {&amp;#339;}-->{&#339;} {&amp;oelig;} -->{&oelig;}
+copyright N/A &amp;#169; --> &#169; &amp;copy; --> &copy;
+registered trademark N/A &amp;#174; --> &#174; &amp;reg; --> &reg;
+trademark sign N/A &amp;#8482;--> &#8482; &amp;trade; --> &trade;
+em space N/A [&amp;#8195;]->[&#8195;] [&amp;emsp;] -->[&emsp;]
+en space N/A [&amp;#8194;]->[&#8194;] [&amp;ensp;] -->[&ensp;]
+1/3-em space N/A [&amp;#8196;]->[&#8196;] [&amp;emsp13;] -->[&emsp13;]
+1/4-em space N/A [&amp;#8197;]->[&#8197;] [&amp;emsp14;] -->[&emsp14;]
+thin space N/A [&amp;#8201;]->[&#8201;] [&amp;thinsp;]-->[&thinsp;]
+hair space N/A [&amp;#8202;]->[&#8202;] [&amp;hairsp;]-->[&hairsp;]
+em dash N/A [&amp;#8212;]->[&#8212;] [&amp;mdash;] -->[&mdash;]
+en dash N/A [&amp;#8211;]->[&#8211;] [&amp;ndash;] -->[&ndash;]
+
+</PRE><!-- </PRE> no HotJava preBeta hackx - kw -->
+<!-- second /PRE is a hack for HotJava 1.0 preBeta 1 -->
+<HR>
+<P>
+Characters not found in ISO-8859-2 have "N/A" in the <TT>Char</TT> column.
+Some characters for which I could not find entity names in either
+<A HREF="http://www.internic.net/rfc/rfc2070.txt">RFC 2070</A>
+or the
+<A HREF="ftp://www.ucc.ie/pub/sgml/">ISOlat1, ISOlat2, ISOnum, ISOpub and ISOtech</A>
+sets (the ones included by Peter Flynn's
+<A HREF="http://www.ucc.ie/doc/www/html/dtds/htmlpro.html">HTML Pro DTD</A>)
+are shown enclosed in <TT>{</TT>braces<TT>}</TT>.
+</P>
+<P>
+There also is a variation of this table which tests
+<A HREF="ALT88592.html">ISO-8859-2 characters and entities in ALT attributes</A>.
+</P>
+<P>
+See Martin Ramsch's original
+<A CHARSET="iso-8859-1" HREF="https://web.archive.org/web/19970119160651/http://www.uni-passau.de:80/~ramsch/iso8859-1.html">ISO-8859-1 Table</A>
+for related info and links, and for some notes on entity names.
+This file is mostly just an adaptation of his table
+to the ISO-8859-2 character set.
+</P>
+<HR>
+
+<ADDRESS>kweide@tezcat.com 1997-03-09</ADDRESS>
+
+</BODY>
+</HTML>
diff --git a/test/iso-8859-2a.html b/test/iso-8859-2a.html
new file mode 100644
index 0000000..9d6ce0d
--- /dev/null
+++ b/test/iso-8859-2a.html
@@ -0,0 +1,209 @@
+<!DOCTYPE html PUBLIC "-//IETF//DTD HTML 3.0//EN">
+<!-- X-URL: http://www.uni-passau.de/~ramsch/iso8859-1.html -->
+<HTML>
+<HEAD>
+<TITLE>Martin Ramsch's character table modified and enhanced for iso8859-2</TITLE>
+<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=iso-8859-2">
+<LINK REV="made" HREF="mailto:lynx-dev@nongnu.org">
+<LINK REV="owner" HREF="http://mail.gnu.org/mailman/listinfo/lynx-dev/">
+<LINK REL="sibling" HREF="iso-8859-1.html" TITLE="iso-8859-1 test">
+<LINK REL="sibling" HREF="ALT88592.html" TITLE="iso-8859-2 ALT test">
+ <meta name="viewport" content="width=device-width, initial-scale=1">
+</HEAD>
+
+<BODY>
+
+<H1 ALIGN=center>iso8859-2 plus table, and cp-1252</H1>
+
+<PRE>
+Description Code Entity name
+=================================== ============ ==============
+quotation mark &amp;#34; --> &#34; &amp;quot; --> &quot;
+ampersand &amp;#38; --> &#38; &amp;amp; --> &amp;
+less-than sign &amp;#60; --> &#60; &amp;lt; --> &lt;
+greater-than sign &amp;#62; --> &#62; &amp;gt; --> &gt;
+
+Description Char Code Entity name
+=================================== ==== ============ ==============
+euro sign € &amp;128; --> &#128;
+undefined &amp;129; --> &#129;
+single low-9 quotation mark ‚ &amp;130; --> &#130;
+latin small letter f with hook ƒ &amp;131; --> &#131;
+double low-9 quotation mark „ &amp;132; --> &#132;
+horizontal ellipsis … &amp;133; --> &#133;
+dagger † &amp;134; --> &#134;
+double dagger ‡ &amp;135; --> &#135;
+modifier letter circumflex accent ˆ &amp;136; --> &#136;
+per mille sign ‰ &amp;137; --> &#137;
+latin capital letter s with caron Š &amp;138; --> &#138;
+single left-pointing angle quote mark ‹ &amp;139; --> &#139;
+latin capital ligature oe Œ &amp;140; --> &#140;
+undefined &amp;141; --> &#141;
+latin capital letter z with caron Ž &amp;142; --> &#142;
+undefined &amp;143; --> &#143;
+
+undefined &amp;144; --> &#144;
+left single quotation mark ‘ &amp;145; --> &#145;
+right single quotation mark ’ &amp;146; --> &#146;
+left double quotation mark “ &amp;147; --> &#147;
+right double quotation mark ” &amp;148; --> &#148;
+bullet • &amp;149; --> &#149;
+en dash – &amp;150; --> &#150;
+em dash — &amp;151; --> &#151;
+small tilde ˜ &amp;152; --> &#152;
+trade mark sign ™ &amp;153; --> &#153;
+latin small letter s with caron š &amp;154; --> &#154;
+single right-pointing angle quote mark › &amp;155; --> &#155;
+latin small ligature oe œ &amp;156; --> &#156;
+undefined &amp;157; --> &#157;
+latin small letter z with caron ž &amp;158; --> &#158;
+latin capital letter y with diaeresis Ÿ &amp;159; --> &#159;
+
+non-breaking space   &amp;#160; --> &#160; &amp;nbsp; --> &nbsp;
+capital A, ogonek ¡ &amp;#260; --> &#260; &amp;Aogon; --> &Aogon;
+breve {¢} {&amp;#728;}-->{&#728;} {&amp;breve;} -->{&breve;}
+capital L, stroke £ &amp;#321; --> &#321; &amp;Lstrok; --> &Lstrok;
+general currency sign ¤ &amp;#164; --> &#164; &amp;curren; --> &curren;
+capital L, caron ¥ &amp;#317; --> &#317; &amp;Lcaron; --> &Lcaron;
+capital S, acute accent ¦ &amp;#346; --> &#346; &amp;Sacute; --> &Sacute;
+section sign § &amp;#167; --> &#167; &amp;sect; --> &sect;
+umlaut (dieresis) ¨ &amp;#168; --> &#168; &amp;uml; --> &uml;
+ &amp;die; --> &die;
+capital S, caron © &amp;#352; --> &#352; &amp;Scaron; --> &Scaron;
+capital S, cedilla ª &amp;#350; --> &#350; &amp;Scedil; --> &Scedil;
+capital T, caron « &amp;#356; --> &#356; &amp;Tcaron; --> &Tcaron;
+capital Z, acute accent ¬ &amp;#377; --> &#377; &amp;Zacute; --> &Zacute;
+soft hyphen [­] [&amp;#173;]-->[&#173;] [&amp;shy;] -->[&shy;]
+capital Z, caron ® &amp;#381; --> &#381; &amp;Zcaron; --> &Zcaron;
+capital Z, dot above ¯ &amp;#379; --> &#379; &amp;Zdot; --> &Zdot;
+degree sign ° &amp;#176; --> &#176; &amp;deg; --> &deg;
+small a, ogonek ± &amp;#261; --> &#261; &amp;aogon; --> &aogon;
+ogonek {²} {&amp;#731;}-->{&#731;} {&amp;ogon;} -->{&ogon;}
+small l, stroke ³ &amp;#322; --> &#322; &amp;lstrok; --> &lstrok;
+acute accent ´ &amp;#180; --> &#180; &amp;acute; --> &acute;
+small l, caron µ &amp;#318; --> &#318; &amp;lcaron; --> &lcaron;
+small s, acute accent ¶ &amp;#347; --> &#347; &amp;sacute; --> &sacute;
+caron {·} {&amp;#711;}-->{&#711;} {&amp;caron;} -->{&caron;}
+cedilla ¸ &amp;#184; --> &#184; &amp;cedil; --> &cedil;
+small s, caron ¹ &amp;#353; --> &#353; &amp;scaron; --> &scaron;
+small s, cedilla º &amp;#351; --> &#351; &amp;scedil; --> &scedil;
+small t, caron » &amp;#357; --> &#357; &amp;tcaron; --> &tcaron;
+small z, acute accent ¼ &amp;#378; --> &#378; &amp;zacute; --> &zacute;
+double acute accent {½} {&amp;#733;}-->{&#733;} {&amp;dblac;} -->{&dblac;}
+small z, caron ¾ &amp;#382; --> &#382; &amp;zcaron; --> &zcaron;
+small z, dot above ¿ &amp;#380; --> &#380; &amp;zdot; --> &zdot;
+capital R, acute accent À &amp;#340; --> &#340; &amp;Racute; --> &Racute;
+capital A, acute accent Á &amp;#193; --> &#193; &amp;Aacute; --> &Aacute;
+capital A, circumflex accent  &amp;#194; --> &#194; &amp;Acirc; --> &Acirc;
+capital A, breve à &amp;#258; --> &#258; &amp;Abreve; --> &Abreve;
+capital A, dieresis or umlaut mark Ä &amp;#196; --> &#196; &amp;Auml; --> &Auml;
+capital L, acute accent Å &amp;#313; --> &#313; &amp;Lacute; --> &Lacute;
+capital C, acute accent Æ &amp;#262; --> &#262; &amp;Cacute; --> &Cacute;
+capital C, cedilla Ç &amp;#199; --> &#199; &amp;Ccedil; --> &Ccedil;
+capital C, caron È &amp;#268; --> &#268; &amp;Ccaron; --> &Ccaron;
+capital E, acute accent É &amp;#201; --> &#201; &amp;Eacute; --> &Eacute;
+capital E, ogonek Ê &amp;#280; --> &#280; &amp;Eogon; --> &Eogon;
+capital E, dieresis or umlaut mark Ë &amp;#203; --> &#203; &amp;Euml; --> &Euml;
+capital E, caron Ì &amp;#282; --> &#282; &amp;Ecaron; --> &Ecaron;
+capital I, acute accent Í &amp;#205; --> &#205; &amp;Iacute; --> &Iacute;
+capital I, circumflex accent Î &amp;#206; --> &#206; &amp;Icirc; --> &Icirc;
+capital D, caron Ï &amp;#270; --> &#270; &amp;Dcaron; --> &Dcaron;
+capital D, stroke Ð &amp;#272; --> &#272; &amp;Dstrok; --> &Dstrok;
+capital Eth, Icelandic N/A &amp;#208; --> &#208; &amp;ETH; --> &ETH;
+capital N, acute accent Ñ &amp;#323; --> &#323; &amp;Nacute; --> &Nacute;
+capital N, caron Ò &amp;#327; --> &#327; &amp;Ncaron; --> &Ncaron;
+capital O, acute accent Ó &amp;#211; --> &#211; &amp;Oacute; --> &Oacute;
+capital O, circumflex accent Ô &amp;#212; --> &#212; &amp;Ocirc; --> &Ocirc;
+capital O, double acute accent Õ &amp;#368; --> &#368; &amp;Odblac; --> &Odblac;
+capital O, dieresis or umlaut mark Ö &amp;#214; --> &#214; &amp;Ouml; --> &Ouml;
+multiply sign × &amp;#215; --> &#215; &amp;times; --> &times;
+capital R, caron Ø &amp;#344; --> &#344; &amp;Rcaron; --> &Rcaron;
+capital U, ring Ù &amp;#366; --> &#366; &amp;Uring; --> &Uring;
+capital U, acute accent Ú &amp;#218; --> &#218; &amp;Uacute; --> &Uacute;
+capital U, double acute accent Û &amp;#368; --> &#368; &amp;Udblac; --> &Udblac;
+capital U, dieresis or umlaut mark Ü &amp;#220; --> &#220; &amp;Uuml; --> &Uuml;
+capital Y, acute accent Ý &amp;#221; --> &#221; &amp;Yacute; --> &Yacute;
+capital T, cedilla Þ &amp;#354; --> &#354; &amp;Tcedil; --> &Tcedil;
+small sharp s, German (sz ligature) ß &amp;#223; --> &#223; &amp;szlig; --> &szlig;
+small r, acute accent à &amp;#341; --> &#341; &amp;racute; --> &racute;
+small a, acute accent á &amp;#225; --> &#225; &amp;aacute; --> &aacute;
+small a, circumflex accent â &amp;#226; --> &#226; &amp;acirc; --> &acirc;
+small a, breve ã &amp;#259; --> &#259; &amp;abreve; --> &abreve;
+small a, dieresis or umlaut mark ä &amp;#228; --> &#228; &amp;auml; --> &auml;
+small l, acute accent å &amp;#314; --> &#314; &amp;lacute; --> &lacute;
+small c, acute accent æ &amp;#263; --> &#263; &amp;cacute; --> &cacute;
+small c, cedilla ç &amp;#231; --> &#231; &amp;ccedil; --> &ccedil;
+small c, caron è &amp;#269; --> &#269; &amp;ccaron; --> &ccaron;
+small e, acute accent é &amp;#233; --> &#233; &amp;eacute; --> &eacute;
+small e, ogonek ê &amp;#281; --> &#281; &amp;eogon; --> &eogon;
+small e, dieresis or umlaut mark ë &amp;#235; --> &#235; &amp;euml; --> &euml;
+small e, caron ì &amp;#283; --> &#283; &amp;ecaron; --> &ecaron;
+small i, acute accent í &amp;#237; --> &#237; &amp;iacute; --> &iacute;
+small i, circumflex accent î &amp;#238; --> &#238; &amp;icirc; --> &icirc;
+small d, caron ï &amp;#271; --> &#271; &amp;dcaron; --> &dcaron;
+small d, stroke ð &amp;#273; --> &#273; &amp;dstrok; --> &dstrok;
+small eth, Icelandic N/A &amp;#240; --> &#240; &amp;eth; --> &eth;
+small n, acute accent ñ &amp;#324; --> &#324; &amp;nacute; --> &nacute;
+small n, caron ò &amp;#328; --> &#328; &amp;ncaron; --> &ncaron;
+small o, acute accent ó &amp;#243; --> &#243; &amp;oacute; --> &oacute;
+small o, circumflex accent ô &amp;#244; --> &#244; &amp;ocirc; --> &ocirc;
+small o, double acute accent õ &amp;#369; --> &#369; &amp;odblac; --> &odblac;
+small o, dieresis or umlaut mark ö &amp;#246; --> &#246; &amp;ouml; --> &ouml;
+division sign ÷ &amp;#247; --> &#247; &amp;divide; --> &divide;
+small r, caron ø &amp;#345; --> &#345; &amp;rcaron; --> &rcaron;
+small u, ring ù &amp;#367; --> &#367; &amp;uring; --> &uring;
+small u, acute accent ú &amp;#250; --> &#250; &amp;uacute; --> &uacute;
+small u, double acute accent û &amp;#369; --> &#369; &amp;udblac; --> &udblac;
+small u, dieresis or umlaut mark ü &amp;#252; --> &#252; &amp;uuml; --> &uuml;
+small y, acute accent ý &amp;#253; --> &#253; &amp;yacute; --> &yacute;
+small t, cedilla þ &amp;#355; --> &#355; &amp;tcedil; --> &tcedil;
+dot above {ÿ} {&amp;#729;}-->{&#729;} {&amp;dot;} -->{&dot;}
+
+Some other characters of interest Char Code Entity name
+=================================== ==== ============ ==============
+capital AE diphthong (ligature) N/A &amp;#198; --> &#198; &amp;AElig; --> &AElig;
+small ae diphthong (ligature) N/A &amp;#230; --> &#230; &amp;aelig; --> &aelig;
+capital OE ligature N/A {&amp;#338;}-->{&#338;} {&amp;OElig;} -->{&OElig;}
+small oe ligature N/A {&amp;#339;}-->{&#339;} {&amp;oelig;} -->{&oelig;}
+copyright N/A &amp;#169; --> &#169; &amp;copy; --> &copy;
+registered trademark N/A &amp;#174; --> &#174; &amp;reg; --> &reg;
+trademark sign N/A &amp;#8482;--> &#8482; &amp;trade; --> &trade;
+em space N/A [&amp;#8195;]->[&#8195;] [&amp;emsp;] -->[&emsp;]
+en space N/A [&amp;#8194;]->[&#8194;] [&amp;ensp;] -->[&ensp;]
+1/3-em space N/A [&amp;#8196;]->[&#8196;] [&amp;emsp13;] -->[&emsp13;]
+1/4-em space N/A [&amp;#8197;]->[&#8197;] [&amp;emsp14;] -->[&emsp14;]
+thin space N/A [&amp;#8201;]->[&#8201;] [&amp;thinsp;]-->[&thinsp;]
+hair space N/A [&amp;#8202;]->[&#8202;] [&amp;hairsp;]-->[&hairsp;]
+em dash N/A [&amp;#8212;]->[&#8212;] [&amp;mdash;] -->[&mdash;]
+en dash N/A [&amp;#8211;]->[&#8211;] [&amp;ndash;] -->[&ndash;]
+
+</PRE><!-- </PRE> no HotJava preBeta hackx - kw -->
+<!-- second /PRE is a hack for HotJava 1.0 preBeta 1 -->
+<HR>
+<P>
+Characters not found in ISO-8859-2 have "N/A" in the <TT>Char</TT> column.
+Some characters for which I could not find entity names in either
+<A HREF="http://www.internic.net/rfc/rfc2070.txt">RFC 2070</A>
+or the
+<A HREF="ftp://www.ucc.ie/pub/sgml/">ISOlat1, ISOlat2, ISOnum, ISOpub and ISOtech</A>
+sets (the ones included by Peter Flynn's
+<A HREF="http://www.ucc.ie/doc/www/html/dtds/htmlpro.html">HTML Pro DTD</A>)
+are shown enclosed in <TT>{</TT>braces<TT>}</TT>.
+</P>
+<P>
+There also is a variation of this table which tests
+<A HREF="ALT88592.html">ISO-8859-2 characters and entities in ALT attributes</A>.
+</P>
+<P>
+See Martin Ramsch's original
+<A CHARSET="iso-8859-1" HREF="https://web.archive.org/web/19970119160651/http://www.uni-passau.de:80/~ramsch/iso8859-1.html">ISO-8859-1 Table</A>
+for related info and links, and for some notes on entity names.
+This file is mostly just an adaptation of his table
+to the ISO-8859-2 character set.
+</P>
+<HR>
+
+<ADDRESS>kweide@tezcat.com 1997-03-09</ADDRESS>
+
+</BODY>
+</HTML>
diff --git a/test/koi8-r.html b/test/koi8-r.html
new file mode 100644
index 0000000..e887c1f
--- /dev/null
+++ b/test/koi8-r.html
@@ -0,0 +1,322 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
+<HTML>
+<HEAD>
+<TITLE>Test of the KOI8-R symbols</TITLE>
+ <meta name="viewport" content="width=device-width, initial-scale=1">
+</HEAD>
+<BODY>
+<PRE>
+
+ This table prepared from KOI8-R.TXT available at ftp.unicode.org
+
+ ftp://ftp.unicode.org/MAPPINGS/VENDORS/MISC/KOI8-R.TXT
+ (if doing ftp, try cd Public/MAPPINGS/VENDORS/MISC)
+
+
+original comment:
+
+#
+# Name: KOI8-R (RFC1489) to Unicode
+# Unicode version: 3.0
+# Table version: 1.0
+# Table format: Format A
+# Date: 18 August 1999
+# Authors: Helmut Richter &lt;richter@lrz.de&gt;
+#
+# Copyright (c) 1991-1999 Unicode, Inc. All Rights reserved.
+#
+# This file is provided as-is by Unicode, Inc. (The Unicode Consortium).
+# No claims are made as to fitness for any particular purpose. No
+# warranties of any kind are expressed or implied. The recipient
+# agrees to determine applicability of information provided. If this
+# file has been provided on optical media by Unicode, Inc., the sole
+# remedy for any claim will be exchange of defective media within 90
+# days of receipt.
+#
+# Unicode, Inc. hereby grants the right to freely use the information
+# supplied in this file in the creation of products supporting the
+# Unicode Standard, and to make copies of this file in any form for
+# internal or external distribution as long as this notice remains
+# attached.
+#
+# General notes:
+#
+# This table contains the data the Unicode Consortium has on how
+# KOI8-R characters map into Unicode. The underlying document is the
+# mapping described in RFC 1489. No statements are made as to whether
+# this mapping is the same as the mapping defined as "Code Page 878"
+# with some vendors.
+#
+# Format: Three tab-separated columns
+# Column #1 is the KOI8-R code (in hex as 0xXX)
+# Column #2 is the Unicode (in hex as 0xXXXX)
+# Column #3 the Unicode name (follows a comment sign, '#')
+#
+# The entries are in KOI8-R order.
+#
+# Version history
+# 1.0 version: created.
+#
+# Any comments or problems, contact &lt;errata@unicode.org&gt;
+# Please note that &lt;errata@unicode.org&gt; is an archival address;
+# notices will be checked, but do not expect an immediate response.
+#
+0x00 0x0000 "&#x0000" # NULL
+0x01 0x0001 "&#x0001" # START OF HEADING
+0x02 0x0002 "&#x0002" # START OF TEXT
+0x03 0x0003 "&#x0003" # END OF TEXT
+0x04 0x0004 "&#x0004" # END OF TRANSMISSION
+0x05 0x0005 "&#x0005" # ENQUIRY
+0x06 0x0006 "&#x0006" # ACKNOWLEDGE
+0x07 0x0007 "&#x0007" # BELL
+0x08 0x0008 "&#x0008" # BACKSPACE
+0x09 0x0009 "&#x0009" # HORIZONTAL TABULATION
+0x0A 0x000A "&#x000A" # LINE FEED
+0x0B 0x000B "&#x000B" # VERTICAL TABULATION
+0x0C 0x000C "&#x000C" # FORM FEED
+0x0D 0x000D "&#x000D" # CARRIAGE RETURN
+0x0E 0x000E "&#x000E" # SHIFT OUT
+0x0F 0x000F "&#x000F" # SHIFT IN
+0x10 0x0010 "&#x0010" # DATA LINK ESCAPE
+0x11 0x0011 "&#x0011" # DEVICE CONTROL ONE
+0x12 0x0012 "&#x0012" # DEVICE CONTROL TWO
+0x13 0x0013 "&#x0013" # DEVICE CONTROL THREE
+0x14 0x0014 "&#x0014" # DEVICE CONTROL FOUR
+0x15 0x0015 "&#x0015" # NEGATIVE ACKNOWLEDGE
+0x16 0x0016 "&#x0016" # SYNCHRONOUS IDLE
+0x17 0x0017 "&#x0017" # END OF TRANSMISSION BLOCK
+0x18 0x0018 "&#x0018" # CANCEL
+0x19 0x0019 "&#x0019" # END OF MEDIUM
+0x1A 0x001A "&#x001A" # SUBSTITUTE
+0x1B 0x001B "&#x001B" # ESCAPE
+0x1C 0x001C "&#x001C" # FILE SEPARATOR
+0x1D 0x001D "&#x001D" # GROUP SEPARATOR
+0x1E 0x001E "&#x001E" # RECORD SEPARATOR
+0x1F 0x001F "&#x001F" # UNIT SEPARATOR
+0x20 0x0020 "&#x0020" # SPACE
+0x21 0x0021 "&#x0021" # EXCLAMATION MARK
+0x22 0x0022 "&#x0022" # QUOTATION MARK
+0x23 0x0023 "&#x0023" # NUMBER SIGN
+0x24 0x0024 "&#x0024" # DOLLAR SIGN
+0x25 0x0025 "&#x0025" # PERCENT SIGN
+0x26 0x0026 "&#x0026" # AMPERSAND
+0x27 0x0027 "&#x0027" # APOSTROPHE
+0x28 0x0028 "&#x0028" # LEFT PARENTHESIS
+0x29 0x0029 "&#x0029" # RIGHT PARENTHESIS
+0x2A 0x002A "&#x002A" # ASTERISK
+0x2B 0x002B "&#x002B" # PLUS SIGN
+0x2C 0x002C "&#x002C" # COMMA
+0x2D 0x002D "&#x002D" # HYPHEN-MINUS
+0x2E 0x002E "&#x002E" # FULL STOP
+0x2F 0x002F "&#x002F" # SOLIDUS
+0x30 0x0030 "&#x0030" # DIGIT ZERO
+0x31 0x0031 "&#x0031" # DIGIT ONE
+0x32 0x0032 "&#x0032" # DIGIT TWO
+0x33 0x0033 "&#x0033" # DIGIT THREE
+0x34 0x0034 "&#x0034" # DIGIT FOUR
+0x35 0x0035 "&#x0035" # DIGIT FIVE
+0x36 0x0036 "&#x0036" # DIGIT SIX
+0x37 0x0037 "&#x0037" # DIGIT SEVEN
+0x38 0x0038 "&#x0038" # DIGIT EIGHT
+0x39 0x0039 "&#x0039" # DIGIT NINE
+0x3A 0x003A "&#x003A" # COLON
+0x3B 0x003B "&#x003B" # SEMICOLON
+0x3C 0x003C "&#x003C" # LESS-THAN SIGN
+0x3D 0x003D "&#x003D" # EQUALS SIGN
+0x3E 0x003E "&#x003E" # GREATER-THAN SIGN
+0x3F 0x003F "&#x003F" # QUESTION MARK
+0x40 0x0040 "&#x0040" # COMMERCIAL AT
+0x41 0x0041 "&#x0041" # LATIN CAPITAL LETTER A
+0x42 0x0042 "&#x0042" # LATIN CAPITAL LETTER B
+0x43 0x0043 "&#x0043" # LATIN CAPITAL LETTER C
+0x44 0x0044 "&#x0044" # LATIN CAPITAL LETTER D
+0x45 0x0045 "&#x0045" # LATIN CAPITAL LETTER E
+0x46 0x0046 "&#x0046" # LATIN CAPITAL LETTER F
+0x47 0x0047 "&#x0047" # LATIN CAPITAL LETTER G
+0x48 0x0048 "&#x0048" # LATIN CAPITAL LETTER H
+0x49 0x0049 "&#x0049" # LATIN CAPITAL LETTER I
+0x4A 0x004A "&#x004A" # LATIN CAPITAL LETTER J
+0x4B 0x004B "&#x004B" # LATIN CAPITAL LETTER K
+0x4C 0x004C "&#x004C" # LATIN CAPITAL LETTER L
+0x4D 0x004D "&#x004D" # LATIN CAPITAL LETTER M
+0x4E 0x004E "&#x004E" # LATIN CAPITAL LETTER N
+0x4F 0x004F "&#x004F" # LATIN CAPITAL LETTER O
+0x50 0x0050 "&#x0050" # LATIN CAPITAL LETTER P
+0x51 0x0051 "&#x0051" # LATIN CAPITAL LETTER Q
+0x52 0x0052 "&#x0052" # LATIN CAPITAL LETTER R
+0x53 0x0053 "&#x0053" # LATIN CAPITAL LETTER S
+0x54 0x0054 "&#x0054" # LATIN CAPITAL LETTER T
+0x55 0x0055 "&#x0055" # LATIN CAPITAL LETTER U
+0x56 0x0056 "&#x0056" # LATIN CAPITAL LETTER V
+0x57 0x0057 "&#x0057" # LATIN CAPITAL LETTER W
+0x58 0x0058 "&#x0058" # LATIN CAPITAL LETTER X
+0x59 0x0059 "&#x0059" # LATIN CAPITAL LETTER Y
+0x5A 0x005A "&#x005A" # LATIN CAPITAL LETTER Z
+0x5B 0x005B "&#x005B" # LEFT SQUARE BRACKET
+0x5C 0x005C "&#x005C" # REVERSE SOLIDUS
+0x5D 0x005D "&#x005D" # RIGHT SQUARE BRACKET
+0x5E 0x005E "&#x005E" # CIRCUMFLEX ACCENT
+0x5F 0x005F "&#x005F" # LOW LINE
+0x60 0x0060 "&#x0060" # GRAVE ACCENT
+0x61 0x0061 "&#x0061" # LATIN SMALL LETTER A
+0x62 0x0062 "&#x0062" # LATIN SMALL LETTER B
+0x63 0x0063 "&#x0063" # LATIN SMALL LETTER C
+0x64 0x0064 "&#x0064" # LATIN SMALL LETTER D
+0x65 0x0065 "&#x0065" # LATIN SMALL LETTER E
+0x66 0x0066 "&#x0066" # LATIN SMALL LETTER F
+0x67 0x0067 "&#x0067" # LATIN SMALL LETTER G
+0x68 0x0068 "&#x0068" # LATIN SMALL LETTER H
+0x69 0x0069 "&#x0069" # LATIN SMALL LETTER I
+0x6A 0x006A "&#x006A" # LATIN SMALL LETTER J
+0x6B 0x006B "&#x006B" # LATIN SMALL LETTER K
+0x6C 0x006C "&#x006C" # LATIN SMALL LETTER L
+0x6D 0x006D "&#x006D" # LATIN SMALL LETTER M
+0x6E 0x006E "&#x006E" # LATIN SMALL LETTER N
+0x6F 0x006F "&#x006F" # LATIN SMALL LETTER O
+0x70 0x0070 "&#x0070" # LATIN SMALL LETTER P
+0x71 0x0071 "&#x0071" # LATIN SMALL LETTER Q
+0x72 0x0072 "&#x0072" # LATIN SMALL LETTER R
+0x73 0x0073 "&#x0073" # LATIN SMALL LETTER S
+0x74 0x0074 "&#x0074" # LATIN SMALL LETTER T
+0x75 0x0075 "&#x0075" # LATIN SMALL LETTER U
+0x76 0x0076 "&#x0076" # LATIN SMALL LETTER V
+0x77 0x0077 "&#x0077" # LATIN SMALL LETTER W
+0x78 0x0078 "&#x0078" # LATIN SMALL LETTER X
+0x79 0x0079 "&#x0079" # LATIN SMALL LETTER Y
+0x7A 0x007A "&#x007A" # LATIN SMALL LETTER Z
+0x7B 0x007B "&#x007B" # LEFT CURLY BRACKET
+0x7C 0x007C "&#x007C" # VERTICAL LINE
+0x7D 0x007D "&#x007D" # RIGHT CURLY BRACKET
+0x7E 0x007E "&#x007E" # TILDE
+0x7F 0x007F "&#x007F" # DELETE
+0x80 0x2500 "&#x2500" # BOX DRAWINGS LIGHT HORIZONTAL
+0x81 0x2502 "&#x2502" # BOX DRAWINGS LIGHT VERTICAL
+0x82 0x250C "&#x250C" # BOX DRAWINGS LIGHT DOWN AND RIGHT
+0x83 0x2510 "&#x2510" # BOX DRAWINGS LIGHT DOWN AND LEFT
+0x84 0x2514 "&#x2514" # BOX DRAWINGS LIGHT UP AND RIGHT
+0x85 0x2518 "&#x2518" # BOX DRAWINGS LIGHT UP AND LEFT
+0x86 0x251C "&#x251C" # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
+0x87 0x2524 "&#x2524" # BOX DRAWINGS LIGHT VERTICAL AND LEFT
+0x88 0x252C "&#x252C" # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
+0x89 0x2534 "&#x2534" # BOX DRAWINGS LIGHT UP AND HORIZONTAL
+0x8A 0x253C "&#x253C" # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
+0x8B 0x2580 "&#x2580" # UPPER HALF BLOCK
+0x8C 0x2584 "&#x2584" # LOWER HALF BLOCK
+0x8D 0x2588 "&#x2588" # FULL BLOCK
+0x8E 0x258C "&#x258C" # LEFT HALF BLOCK
+0x8F 0x2590 "&#x2590" # RIGHT HALF BLOCK
+0x90 0x2591 "&#x2591" # LIGHT SHADE
+0x91 0x2592 "&#x2592" # MEDIUM SHADE
+0x92 0x2593 "&#x2593" # DARK SHADE
+0x93 0x2320 "&#x2320" # TOP HALF INTEGRAL
+0x94 0x25A0 "&#x25A0" # BLACK SQUARE
+0x95 0x2219 "&#x2219" # BULLET OPERATOR
+0x96 0x221A "&#x221A" # SQUARE ROOT
+0x97 0x2248 "&#x2248" # ALMOST EQUAL TO
+0x98 0x2264 "&#x2264" # LESS-THAN OR EQUAL TO
+0x99 0x2265 "&#x2265" # GREATER-THAN OR EQUAL TO
+0x9A 0x00A0 "&#x00A0" # NO-BREAK SPACE
+0x9B 0x2321 "&#x2321" # BOTTOM HALF INTEGRAL
+0x9C 0x00B0 "&#x00B0" # DEGREE SIGN
+0x9D 0x00B2 "&#x00B2" # SUPERSCRIPT TWO
+0x9E 0x00B7 "&#x00B7" # MIDDLE DOT
+0x9F 0x00F7 "&#x00F7" # DIVISION SIGN
+0xA0 0x2550 "&#x2550" # BOX DRAWINGS DOUBLE HORIZONTAL
+0xA1 0x2551 "&#x2551" # BOX DRAWINGS DOUBLE VERTICAL
+0xA2 0x2552 "&#x2552" # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
+0xA3 0x0451 "&#x0451" # CYRILLIC SMALL LETTER IO
+0xA4 0x2553 "&#x2553" # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
+0xA5 0x2554 "&#x2554" # BOX DRAWINGS DOUBLE DOWN AND RIGHT
+0xA6 0x2555 "&#x2555" # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
+0xA7 0x2556 "&#x2556" # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
+0xA8 0x2557 "&#x2557" # BOX DRAWINGS DOUBLE DOWN AND LEFT
+0xA9 0x2558 "&#x2558" # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
+0xAA 0x2559 "&#x2559" # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
+0xAB 0x255A "&#x255A" # BOX DRAWINGS DOUBLE UP AND RIGHT
+0xAC 0x255B "&#x255B" # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
+0xAD 0x255C "&#x255C" # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
+0xAE 0x255D "&#x255D" # BOX DRAWINGS DOUBLE UP AND LEFT
+0xAF 0x255E "&#x255E" # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
+0xB0 0x255F "&#x255F" # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
+0xB1 0x2560 "&#x2560" # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
+0xB2 0x2561 "&#x2561" # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
+0xB3 0x0401 "&#x0401" # CYRILLIC CAPITAL LETTER IO
+0xB4 0x2562 "&#x2562" # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
+0xB5 0x2563 "&#x2563" # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
+0xB6 0x2564 "&#x2564" # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
+0xB7 0x2565 "&#x2565" # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
+0xB8 0x2566 "&#x2566" # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
+0xB9 0x2567 "&#x2567" # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
+0xBA 0x2568 "&#x2568" # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
+0xBB 0x2569 "&#x2569" # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
+0xBC 0x256A "&#x256A" # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
+0xBD 0x256B "&#x256B" # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
+0xBE 0x256C "&#x256C" # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
+0xBF 0x00A9 "&#x00A9" # COPYRIGHT SIGN
+0xC0 0x044E "&#x044E" # CYRILLIC SMALL LETTER YU
+0xC1 0x0430 "&#x0430" # CYRILLIC SMALL LETTER A
+0xC2 0x0431 "&#x0431" # CYRILLIC SMALL LETTER BE
+0xC3 0x0446 "&#x0446" # CYRILLIC SMALL LETTER TSE
+0xC4 0x0434 "&#x0434" # CYRILLIC SMALL LETTER DE
+0xC5 0x0435 "&#x0435" # CYRILLIC SMALL LETTER IE
+0xC6 0x0444 "&#x0444" # CYRILLIC SMALL LETTER EF
+0xC7 0x0433 "&#x0433" # CYRILLIC SMALL LETTER GHE
+0xC8 0x0445 "&#x0445" # CYRILLIC SMALL LETTER HA
+0xC9 0x0438 "&#x0438" # CYRILLIC SMALL LETTER I
+0xCA 0x0439 "&#x0439" # CYRILLIC SMALL LETTER SHORT I
+0xCB 0x043A "&#x043A" # CYRILLIC SMALL LETTER KA
+0xCC 0x043B "&#x043B" # CYRILLIC SMALL LETTER EL
+0xCD 0x043C "&#x043C" # CYRILLIC SMALL LETTER EM
+0xCE 0x043D "&#x043D" # CYRILLIC SMALL LETTER EN
+0xCF 0x043E "&#x043E" # CYRILLIC SMALL LETTER O
+0xD0 0x043F "&#x043F" # CYRILLIC SMALL LETTER PE
+0xD1 0x044F "&#x044F" # CYRILLIC SMALL LETTER YA
+0xD2 0x0440 "&#x0440" # CYRILLIC SMALL LETTER ER
+0xD3 0x0441 "&#x0441" # CYRILLIC SMALL LETTER ES
+0xD4 0x0442 "&#x0442" # CYRILLIC SMALL LETTER TE
+0xD5 0x0443 "&#x0443" # CYRILLIC SMALL LETTER U
+0xD6 0x0436 "&#x0436" # CYRILLIC SMALL LETTER ZHE
+0xD7 0x0432 "&#x0432" # CYRILLIC SMALL LETTER VE
+0xD8 0x044C "&#x044C" # CYRILLIC SMALL LETTER SOFT SIGN
+0xD9 0x044B "&#x044B" # CYRILLIC SMALL LETTER YERU
+0xDA 0x0437 "&#x0437" # CYRILLIC SMALL LETTER ZE
+0xDB 0x0448 "&#x0448" # CYRILLIC SMALL LETTER SHA
+0xDC 0x044D "&#x044D" # CYRILLIC SMALL LETTER E
+0xDD 0x0449 "&#x0449" # CYRILLIC SMALL LETTER SHCHA
+0xDE 0x0447 "&#x0447" # CYRILLIC SMALL LETTER CHE
+0xDF 0x044A "&#x044A" # CYRILLIC SMALL LETTER HARD SIGN
+0xE0 0x042E "&#x042E" # CYRILLIC CAPITAL LETTER YU
+0xE1 0x0410 "&#x0410" # CYRILLIC CAPITAL LETTER A
+0xE2 0x0411 "&#x0411" # CYRILLIC CAPITAL LETTER BE
+0xE3 0x0426 "&#x0426" # CYRILLIC CAPITAL LETTER TSE
+0xE4 0x0414 "&#x0414" # CYRILLIC CAPITAL LETTER DE
+0xE5 0x0415 "&#x0415" # CYRILLIC CAPITAL LETTER IE
+0xE6 0x0424 "&#x0424" # CYRILLIC CAPITAL LETTER EF
+0xE7 0x0413 "&#x0413" # CYRILLIC CAPITAL LETTER GHE
+0xE8 0x0425 "&#x0425" # CYRILLIC CAPITAL LETTER HA
+0xE9 0x0418 "&#x0418" # CYRILLIC CAPITAL LETTER I
+0xEA 0x0419 "&#x0419" # CYRILLIC CAPITAL LETTER SHORT I
+0xEB 0x041A "&#x041A" # CYRILLIC CAPITAL LETTER KA
+0xEC 0x041B "&#x041B" # CYRILLIC CAPITAL LETTER EL
+0xED 0x041C "&#x041C" # CYRILLIC CAPITAL LETTER EM
+0xEE 0x041D "&#x041D" # CYRILLIC CAPITAL LETTER EN
+0xEF 0x041E "&#x041E" # CYRILLIC CAPITAL LETTER O
+0xF0 0x041F "&#x041F" # CYRILLIC CAPITAL LETTER PE
+0xF1 0x042F "&#x042F" # CYRILLIC CAPITAL LETTER YA
+0xF2 0x0420 "&#x0420" # CYRILLIC CAPITAL LETTER ER
+0xF3 0x0421 "&#x0421" # CYRILLIC CAPITAL LETTER ES
+0xF4 0x0422 "&#x0422" # CYRILLIC CAPITAL LETTER TE
+0xF5 0x0423 "&#x0423" # CYRILLIC CAPITAL LETTER U
+0xF6 0x0416 "&#x0416" # CYRILLIC CAPITAL LETTER ZHE
+0xF7 0x0412 "&#x0412" # CYRILLIC CAPITAL LETTER VE
+0xF8 0x042C "&#x042C" # CYRILLIC CAPITAL LETTER SOFT SIGN
+0xF9 0x042B "&#x042B" # CYRILLIC CAPITAL LETTER YERU
+0xFA 0x0417 "&#x0417" # CYRILLIC CAPITAL LETTER ZE
+0xFB 0x0428 "&#x0428" # CYRILLIC CAPITAL LETTER SHA
+0xFC 0x042D "&#x042D" # CYRILLIC CAPITAL LETTER E
+0xFD 0x0429 "&#x0429" # CYRILLIC CAPITAL LETTER SHCHA
+0xFE 0x0427 "&#x0427" # CYRILLIC CAPITAL LETTER CHE
+0xFF 0x042A "&#x042A" # CYRILLIC CAPITAL LETTER HARD SIGN
+</PRE>
+</BODY>
+</HTML>
diff --git a/test/nobody b/test/nobody
new file mode 100644
index 0000000..a1e2647
--- /dev/null
+++ b/test/nobody
@@ -0,0 +1 @@
+?
diff --git a/test/quickbrown.html b/test/quickbrown.html
new file mode 100644
index 0000000..d326e9b
--- /dev/null
+++ b/test/quickbrown.html
@@ -0,0 +1,104 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
+<HTML>
+<HEAD>
+<TITLE>Markus Kuhn's quick-brown-fox UTF-8 demo</TITLE>
+<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=iso-8859-1">
+<LINK REV="made" HREF="mailto:dickey@invisible-island.net">
+ <meta name="viewport" content="width=device-width, initial-scale=1">
+</HEAD>
+
+<BODY>
+<pre>
+Sentences that contain all letters commonly used in a language
+--------------------------------------------------------------
+
+Markus Kuhn &lt;mkuhn@acm.org&gt; -- 1998-11-30
+
+This file was UTF-8 encoded.
+
+
+German (de)
+-----------
+
+ Falsches &#xdc;ben von Xylophonmusik qu&#xe4;lt jeden gr&#xf6;&#xdf;eren Zwerg
+ (= Wrongful practicing of xylophone music tortures every larger dwarf)
+
+ Zw&#xf6;lf Boxk&#xe4;mpfer jagten Eva quer &#xfc;ber den Sylter Deich
+ (= Twelve boxing fighters hunted Eva across the dike of Sylt)
+
+ Heiz&#xf6;lr&#xfc;cksto&#xdf;abd&#xe4;mpfung
+ (= fuel oil recoil absorber) (jqvwxy missing, but all non-ASCII letters in one word)
+
+English (en)
+------------
+
+ The quick brown fox jumps over the lazy dog
+
+French (fr)
+-----------
+
+ Portez ce vieux whisky au juge blond qui fume sur son &#xee;le int&#xe9;rieure, &#xe0;
+ c&#xf4;t&#xe9; de l'alc&#xf4;ve ovo&#xef;de, o&#xf9; les b&#xfb;ches se consument dans l'&#xe2;tre, ce qui lui
+ permet de penser &#xe0; la c&#xe6;nogen&#xe8;se de l'&#xea;tre dont il est question dans la
+ cause ambigu&#xeb; entendue &#xe0; Mo&#xff;, dans un capharna&#xfc;m qui, pense-t-il, diminue
+ &#xe7;&#xe0; et l&#xe0; la qualit&#xe9; de son &#x153;uvre.
+
+ l'&#xee;le exigu&#xeb;
+ O&#xf9; l'ob&#xe8;se jury m&#xfb;r
+ F&#xea;te l'ha&#xef; volap&#xfc;k,
+ &#xc2;ne ex a&#xe9;quo au whist,
+ &#xd4;tez ce v&#x153;u d&#xe9;&#xe7;u.
+
+ Le c&#x153;ur d&#xe9;&#xe7;u mais l'&#xe2;me plut&#xf4;t na&#xef;ve, Lou&#xff;s r&#xea;va de crapa&#xfc;ter en
+ cano&#xeb; au del&#xe0; des &#xee;les, pr&#xe8;s du m&#xe4;lstr&#xf6;m o&#xf9; br&#xfb;lent les nov&#xe6;.
+
+Irish Gaelic (ga)
+-----------------
+
+ D'fhuascail &#xcd;osa, &#xda;rmhac na h&#xd3;ighe Beannaithe, p&#xf3;r &#xc9;ava agus &#xc1;dhaimh
+
+Icelandic (is)
+--------------
+
+ K&#xe6;mi n&#xfd; &#xf6;xi h&#xe9;r ykist &#xfe;j&#xf3;fum n&#xfa; b&#xe6;&#xf0;i v&#xed;l og &#xe1;drepa
+
+ S&#xe6;v&#xf6;r gr&#xe9;t &#xe1;&#xf0;an &#xfe;v&#xed; &#xfa;lpan var &#xf3;n&#xfd;t
+ (some ASCII letters missing)
+
+Hebrew (iw)
+-----------
+
+ &#x5d3;&#x5d2; &#x5e1;&#x5e7;&#x5e8;&#x5df; &#x5e9;&#x5d8; &#x5d1;&#x5d9;&#x5dd; &#x5de;&#x5d0;&#x5d5;&#x5db;&#x5d6;&#x5d1; &#x5d5;&#x5dc;&#x5e4;&#x5ea;&#x5e2; &#x5de;&#x5e6;&#x5d0; &#x5dc;&#x5d5; &#x5d7;&#x5d1;&#x5e8;&#x5d4; &#x5d0;&#x5d9;&#x5da; &#x5d4;&#x5e7;&#x5dc;&#x5d9;&#x5d8;&#x5d4;?
+
+Polish (pl)
+-----------
+
+ Pchn&#x105;&#x107; w t&#x119; &#x142;&#xf3;d&#x17a; je&#x17c;a lub o&#x15b;m skrzy&#x144; fig
+
+Russian (ru)
+------------
+
+ &#x412; &#x447;&#x430;&#x449;&#x430;&#x445; &#x44e;&#x433;&#x430; &#x436;&#x438;&#x43b; &#x431;&#x44b; &#x446;&#x438;&#x442;&#x440;&#x443;&#x441;? &#x414;&#x430;, &#x43d;&#x43e; &#x444;&#x430;&#x43b;&#x44c;&#x448;&#x438;&#x432;&#x44b;&#x439; &#x44d;&#x43a;&#x437;&#x435;&#x43c;&#x43f;&#x43b;&#x44f;&#x440;!
+ (= Would a citrus live in the bushes of south? Yes, but a only a fake!)
+
+
+Please let me know if you find others! Special thanks to the people
+from all over the world who contributed these sentences.
+
+</pre>
+See also:
+<ul>
+<li><a href="http://www.columbia.edu/kermit/utf8.html"
+ >http://www.columbia.edu/kermit/utf8.html</a>
+<li><a href="http://www.kernel.org/"
+ >http://www.kernel.org/</a>
+<li><a href="http://www.unicode.org/"
+ >http://www.unicode.org/</a>
+<br>and
+<li><a href="http://www.cl.cam.ac.uk/~mgk25/ucs/examples/TeX.txt"
+ >http://www.cl.cam.ac.uk/~mgk25/ucs/examples/TeX.txt</a>
+<li><a href="http://www.cl.cam.ac.uk/~mgk25/ucs/wgl4.txt"
+ >http://www.cl.cam.ac.uk/~mgk25/ucs/wgl4.txt</a>
+</ul>
+</BODY>
+</HTML>
diff --git a/test/raw8bit.html b/test/raw8bit.html
new file mode 100644
index 0000000..9b32eaa
--- /dev/null
+++ b/test/raw8bit.html
@@ -0,0 +1,39 @@
+<!DOCTYPE html PUBLIC "-//IETF//DTD HTML 3.0//EN">
+<HTML>
+<HEAD>
+<TITLE> Test of raw 8-bit symbols </TITLE>
+<!-- you may uncomment the next line
+and set the document's charset directly via META tag -->
+<!--META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=iso-8859-1"-->
+ <meta name="viewport" content="width=device-width, initial-scale=1">
+</HEAD>
+<BODY>
+<PRE>
+This is a test of translation 8-bit letters for different pairs of
+document's charset (assumed charset) and display charset,
+both can be reached from 'O'ptions menu.
+
+This page (obviously) corresponds to text/html mode
+but you may test text/plain just by pressing '\'
+Try also: '@' for ``raw mode'' and '=' for Information Page.
+
+
+ 0 1 2 3 4 5 6 7 8 9 A B C D E F
+20 ! " # $ % & ' ( ) * + , - . /
+30 0 1 2 3 4 5 6 7 8 9 : ; < = > ?
+40 @ A B C D E F G H I J K L M N O
+50 P Q R S T U V W X Y Z [ \ ] ^ _
+60 ` a b c d e f g h i j k l m n o
+70 p q r s t u v w x y z { | } ~ 
+80 € ‚ ƒ „ … † ‡ ˆ ‰ Š ‹ Œ Ž
+90 ‘ ’ “ ” • – — ˜ ™ š › œ ž Ÿ
+A0   ¡ ¢ £ ¤ ¥ ¦ § ¨ © ª « ¬ ­ ® ¯
+B0 ° ± ² ³ ´ µ ¶ · ¸ ¹ º » ¼ ½ ¾ ¿
+C0 À Á Â Ã Ä Å Æ Ç È É Ê Ë Ì Í Î Ï
+D0 Ð Ñ Ò Ó Ô Õ Ö × Ø Ù Ú Û Ü Ý Þ ß
+E0 à á â ã ä å æ ç è é ê ë ì í î ï
+F0 ð ñ ò ó ô õ ö ÷ ø ù ú û ü ý þ ÿ
+
+</PRE>
+</BODY>
+</HTML>
diff --git a/test/sgml.html b/test/sgml.html
new file mode 100644
index 0000000..db9d2b7
--- /dev/null
+++ b/test/sgml.html
@@ -0,0 +1,1082 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
+<HTML>
+<HEAD>
+<TITLE>Test of some Unicode symbols enclosed as SGML entity names</TITLE>
+ <meta name="viewport" content="width=device-width, initial-scale=1">
+</HEAD>
+<BODY>
+<PRE>
+
+ This table prepared from SGML.TXT available at ftp.unicode.org
+
+ ftp://ftp.unicode.org/MAPPINGS/VENDORS/MISC/SGML.TXT
+ (if doing ftp, try cd Public/MAPPINGS/VENDORS/MISC)
+
+
+original comment:
+
+# Author: John Cowan &lt;cowan@ccil.org&gt;
+# Date: 25 July 1997
+#
+# The following table maps SGML character entities from various
+# public sets (namely, ISOamsa, ISOamsb, ISOamsc, ISOamsn, ISOamso,
+# ISOamsr, ISObox, ISOcyr1, ISOcyr2, ISOdia, ISOgrk1, ISOgrk2,
+# ISOgrk3, ISOgrk4, ISOlat1, ISOlat2, ISOnum, ISOpub, ISOtech,
+# HTMLspecial, HTMLsymbol) to corresponding Unicode characters.
+#
+# The table has four tab-separated columns:
+# Column 1: SGML character entity name
+# Column 2: SGML public entity set
+# Column 3: Unicode 2.0 character code
+# Column 4: Unicode 2.0 character name (UPPER CASE)
+# Entries which don't have Unicode equivalents have "0x????"
+# in Column 3 and a lower case description (from the public entity
+# set DTD) in Column 4. The mapping is not reversible, because many
+# distinctions are unified away in Unicode, particularly between
+# mathematical symbols.
+#
+# The table is sorted case-blind by SGML character entity name.
+#
+# The contents of this table are drawn from various sources, and
+# are in the public domain.
+#
+<!-- Changes:
++ {"euro", 0x20AC}, /* EURO SIGN */
+ {"loz", 0x25CA}, /* LOZENGE */
+! /* {"loz", 0x2727}, WHITE FOUR POINTED STAR */
+! /* Warning: Duplicated &loz; entry. HTML 4,0 defines it as U+25CA. */
+- {"b.delta", 0x03B3}, /* GREEK SMALL LETTER GAMMA */
++ {"b.delta", 0x03B4}, /* GREEK SMALL LETTER DELTA */
+
+-->
+
+This test illuminates SGML character entities implementation in your browser.
+We sort the entities according to unicode numbers.
+You should see visible character if your display character set supports it
+or some substitution string picked up from src/chrtrans/def7_uni.tbl.
+If you see &amp;somename; - this name is not implemented yet,
+you may search for &amp;. (Sorry, ISOgrk4 which holds a dot in its name
+seems to be nonvisible for most browsers. Keep in mind that
+this table is much wider than in the HTML 4.0 draft).
+ Leonid Pauzner.
+
+
+0x0021 &excl; ISOnum # EXCLAMATION MARK
+0x0022 &quot; ISOnum # QUOTATION MARK
+0x0023 &num; ISOnum # NUMBER SIGN
+0x0024 &dollar; ISOnum # DOLLAR SIGN
+0x0025 &percnt; ISOnum # PERCENT SIGN
+0x0026 &amp; ISOnum # AMPERSAND
+0x0028 &lpar; ISOnum # LEFT PARENTHESIS
+0x0029 &rpar; ISOnum # RIGHT PARENTHESIS
+0x002A &ast; ISOnum # ASTERISK
+0x002B &plus; ISOnum # PLUS SIGN
+0x002C &comma; ISOnum # COMMA
+0x002D &hyphen; ISOnum # HYPHEN-MINUS
+0x002E &period; ISOnum # FULL STOP
+0x002F &sol; ISOnum # SOLIDUS
+0x003A &colon; ISOnum # COLON
+0x003B &semi; ISOnum # SEMICOLON
+0x003C &lt; ISOnum # LESS-THAN SIGN
+0x003D &equals; ISOnum # EQUALS SIGN
+0x003E &gt; ISOnum # GREATER-THAN SIGN
+0x003F &quest; ISOnum # QUESTION MARK
+0x0040 &commat; ISOnum # COMMERCIAL AT
+0x005B &lsqb; ISOnum # LEFT SQUARE BRACKET
+0x005C &bsol; ISOnum # REVERSE SOLIDUS
+0x005C &sbsol; ISOamso # REVERSE SOLIDUS
+0x005D &rsqb; ISOnum # RIGHT SQUARE BRACKET
+0x005F &lowbar; ISOnum # LOW LINE
+0x0060 &grave; ISOdia # GRAVE ACCENT
+0x007B &lcub; ISOnum # LEFT CURLY BRACKET
+0x007C &verbar; ISOnum # VERTICAL LINE
+0x007D &rcub; ISOnum # RIGHT CURLY BRACKET
+0x00A0 &nbsp; ISOnum # NO-BREAK SPACE
+0x00A1 &iexcl; ISOnum # INVERTED EXCLAMATION MARK
+0x00A2 &cent; ISOnum # CENT SIGN
+0x00A3 &pound; ISOnum # POUND SIGN
+0x00A4 &curren; ISOnum # CURRENCY SIGN
+0x00A5 &yen; ISOnum # YEN SIGN
+0x00A6 &brvbar; ISOnum # BROKEN BAR
+0x00A7 &sect; ISOnum # SECTION SIGN
+0x00A8 &Dot; ISOtech # DIAERESIS
+0x00A8 &die; ISOdia # DIAERESIS
+0x00A8 &uml; ISOdia # DIAERESIS
+0x00A9 &copy; ISOnum # COPYRIGHT SIGN
+0x00AA &ordf; ISOnum # FEMININE ORDINAL INDICATOR
+0x00AB &laquo; ISOnum # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+0x00AC &not; ISOnum # NOT SIGN
+0x00AD &shy; ISOnum # SOFT HYPHEN
+0x00AE &reg; ISOnum # REGISTERED SIGN
+0x00AF &macr; ISOdia # MACRON
+0x00B0 &deg; ISOnum # DEGREE SIGN
+0x00B1 &plusmn; ISOnum # PLUS-MINUS SIGN
+0x00B2 &sup2; ISOnum # SUPERSCRIPT TWO
+0x00B3 &sup3; ISOnum # SUPERSCRIPT THREE
+0x00B4 &acute; ISOdia # ACUTE ACCENT
+0x00B5 &micro; ISOnum # MICRO SIGN
+0x00B6 &para; ISOnum # PILCROW SIGN
+0x00B7 &middot; ISOnum # MIDDLE DOT
+0x00B8 &cedil; ISOdia # CEDILLA
+0x00B9 &sup1; ISOnum # SUPERSCRIPT ONE
+0x00BA &ordm; ISOnum # MASCULINE ORDINAL INDICATOR
+0x00BB &raquo; ISOnum # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+0x00BC &frac14; ISOnum # VULGAR FRACTION ONE QUARTER
+0x00BD &frac12; ISOnum # VULGAR FRACTION ONE HALF
+0x00BD &half; ISOnum # VULGAR FRACTION ONE HALF
+0x00BE &frac34; ISOnum # VULGAR FRACTION THREE QUARTERS
+0x00BF &iquest; ISOnum # INVERTED QUESTION MARK
+0x00C0 &Agrave; ISOlat1 # LATIN CAPITAL LETTER A WITH GRAVE
+0x00C1 &Aacute; ISOlat1 # LATIN CAPITAL LETTER A WITH ACUTE
+0x00C2 &Acirc; ISOlat1 # LATIN CAPITAL LETTER A WITH CIRCUMFLEX
+0x00C3 &Atilde; ISOlat1 # LATIN CAPITAL LETTER A WITH TILDE
+0x00C4 &Auml; ISOlat1 # LATIN CAPITAL LETTER A WITH DIAERESIS
+0x00C5 &Aring; ISOlat1 # LATIN CAPITAL LETTER A WITH RING ABOVE
+0x00C6 &AElig; ISOlat1 # LATIN CAPITAL LETTER AE
+0x00C7 &Ccedil; ISOlat1 # LATIN CAPITAL LETTER C WITH CEDILLA
+0x00C8 &Egrave; ISOlat1 # LATIN CAPITAL LETTER E WITH GRAVE
+0x00C9 &Eacute; ISOlat1 # LATIN CAPITAL LETTER E WITH ACUTE
+0x00CA &Ecirc; ISOlat1 # LATIN CAPITAL LETTER E WITH CIRCUMFLEX
+0x00CB &Euml; ISOlat1 # LATIN CAPITAL LETTER E WITH DIAERESIS
+0x00CC &Igrave; ISOlat1 # LATIN CAPITAL LETTER I WITH GRAVE
+0x00CD &Iacute; ISOlat1 # LATIN CAPITAL LETTER I WITH ACUTE
+0x00CE &Icirc; ISOlat1 # LATIN CAPITAL LETTER I WITH CIRCUMFLEX
+0x00CF &Iuml; ISOlat1 # LATIN CAPITAL LETTER I WITH DIAERESIS
+0x00D0 &ETH; ISOlat1 # LATIN CAPITAL LETTER ETH
+0x00D1 &Ntilde; ISOlat1 # LATIN CAPITAL LETTER N WITH TILDE
+0x00D2 &Ograve; ISOlat1 # LATIN CAPITAL LETTER O WITH GRAVE
+0x00D3 &Oacute; ISOlat1 # LATIN CAPITAL LETTER O WITH ACUTE
+0x00D4 &Ocirc; ISOlat1 # LATIN CAPITAL LETTER O WITH CIRCUMFLEX
+0x00D5 &Otilde; ISOlat1 # LATIN CAPITAL LETTER O WITH TILDE
+0x00D6 &Ouml; ISOlat1 # LATIN CAPITAL LETTER O WITH DIAERESIS
+0x00D7 &times; ISOnum # MULTIPLICATION SIGN
+0x00D8 &Oslash; ISOlat1 # LATIN CAPITAL LETTER O WITH STROKE
+0x00D9 &Ugrave; ISOlat1 # LATIN CAPITAL LETTER U WITH GRAVE
+0x00DA &Uacute; ISOlat1 # LATIN CAPITAL LETTER U WITH ACUTE
+0x00DB &Ucirc; ISOlat1 # LATIN CAPITAL LETTER U WITH CIRCUMFLEX
+0x00DC &Uuml; ISOlat1 # LATIN CAPITAL LETTER U WITH DIAERESIS
+0x00DD &Yacute; ISOlat1 # LATIN CAPITAL LETTER Y WITH ACUTE
+0x00DE &THORN; ISOlat1 # LATIN CAPITAL LETTER THORN
+0x00DF &szlig; ISOlat1 # LATIN SMALL LETTER SHARP S
+0x00E0 &agrave; ISOlat1 # LATIN SMALL LETTER A WITH GRAVE
+0x00E1 &aacute; ISOlat1 # LATIN SMALL LETTER A WITH ACUTE
+0x00E2 &acirc; ISOlat1 # LATIN SMALL LETTER A WITH CIRCUMFLEX
+0x00E3 &atilde; ISOlat1 # LATIN SMALL LETTER A WITH TILDE
+0x00E4 &auml; ISOlat1 # LATIN SMALL LETTER A WITH DIAERESIS
+0x00E5 &aring; ISOlat1 # LATIN SMALL LETTER A WITH RING ABOVE
+0x00E6 &aelig; ISOlat1 # LATIN SMALL LETTER AE
+0x00E7 &ccedil; ISOlat1 # LATIN SMALL LETTER C WITH CEDILLA
+0x00E8 &egrave; ISOlat1 # LATIN SMALL LETTER E WITH GRAVE
+0x00E9 &eacute; ISOlat1 # LATIN SMALL LETTER E WITH ACUTE
+0x00EA &ecirc; ISOlat1 # LATIN SMALL LETTER E WITH CIRCUMFLEX
+0x00EB &euml; ISOlat1 # LATIN SMALL LETTER E WITH DIAERESIS
+0x00EC &igrave; ISOlat1 # LATIN SMALL LETTER I WITH GRAVE
+0x00ED &iacute; ISOlat1 # LATIN SMALL LETTER I WITH ACUTE
+0x00EE &icirc; ISOlat1 # LATIN SMALL LETTER I WITH CIRCUMFLEX
+0x00EF &iuml; ISOlat1 # LATIN SMALL LETTER I WITH DIAERESIS
+0x00F0 &eth; ISOlat1 # LATIN SMALL LETTER ETH
+0x00F1 &ntilde; ISOlat1 # LATIN SMALL LETTER N WITH TILDE
+0x00F2 &ograve; ISOlat1 # LATIN SMALL LETTER O WITH GRAVE
+0x00F3 &oacute; ISOlat1 # LATIN SMALL LETTER O WITH ACUTE
+0x00F4 &ocirc; ISOlat1 # LATIN SMALL LETTER O WITH CIRCUMFLEX
+0x00F5 &otilde; ISOlat1 # LATIN SMALL LETTER O WITH TILDE
+0x00F6 &ouml; ISOlat1 # LATIN SMALL LETTER O WITH DIAERESIS
+0x00F7 &divide; ISOnum # DIVISION SIGN
+0x00F8 &oslash; ISOlat1 # LATIN SMALL LETTER O WITH STROKE
+0x00F9 &ugrave; ISOlat1 # LATIN SMALL LETTER U WITH GRAVE
+0x00FA &uacute; ISOlat1 # LATIN SMALL LETTER U WITH ACUTE
+0x00FB &ucirc; ISOlat1 # LATIN SMALL LETTER U WITH CIRCUMFLEX
+0x00FC &uuml; ISOlat1 # LATIN SMALL LETTER U WITH DIAERESIS
+0x00FD &yacute; ISOlat1 # LATIN SMALL LETTER Y WITH ACUTE
+0x00FE &thorn; ISOlat1 # LATIN SMALL LETTER THORN
+0x00FF &yuml; ISOlat1 # LATIN SMALL LETTER Y WITH DIAERESIS
+0x0100 &Amacr; ISOlat2 # LATIN CAPITAL LETTER A WITH MACRON
+0x0101 &amacr; ISOlat2 # LATIN SMALL LETTER A WITH MACRON
+0x0102 &Abreve; ISOlat2 # LATIN CAPITAL LETTER A WITH BREVE
+0x0103 &abreve; ISOlat2 # LATIN SMALL LETTER A WITH BREVE
+0x0104 &Aogon; ISOlat2 # LATIN CAPITAL LETTER A WITH OGONEK
+0x0105 &aogon; ISOlat2 # LATIN SMALL LETTER A WITH OGONEK
+0x0106 &Cacute; ISOlat2 # LATIN CAPITAL LETTER C WITH ACUTE
+0x0107 &cacute; ISOlat2 # LATIN SMALL LETTER C WITH ACUTE
+0x0108 &Ccirc; ISOlat2 # LATIN CAPITAL LETTER C WITH CIRCUMFLEX
+0x0109 &ccirc; ISOlat2 # LATIN SMALL LETTER C WITH CIRCUMFLEX
+0x010A &Cdot; ISOlat2 # LATIN CAPITAL LETTER C WITH DOT ABOVE
+0x010B &cdot; ISOlat2 # LATIN SMALL LETTER C WITH DOT ABOVE
+0x010C &Ccaron; ISOlat2 # LATIN CAPITAL LETTER C WITH CARON
+0x010D &ccaron; ISOlat2 # LATIN SMALL LETTER C WITH CARON
+0x010E &Dcaron; ISOlat2 # LATIN CAPITAL LETTER D WITH CARON
+0x010F &dcaron; ISOlat2 # LATIN SMALL LETTER D WITH CARON
+0x0110 &Dstrok; ISOlat2 # LATIN CAPITAL LETTER D WITH STROKE
+0x0111 &dstrok; ISOlat2 # LATIN SMALL LETTER D WITH STROKE
+0x0112 &Emacr; ISOlat2 # LATIN CAPITAL LETTER E WITH MACRON
+0x0113 &emacr; ISOlat2 # LATIN SMALL LETTER E WITH MACRON
+0x0116 &Edot; ISOlat2 # LATIN CAPITAL LETTER E WITH DOT ABOVE
+0x0117 &edot; ISOlat2 # LATIN SMALL LETTER E WITH DOT ABOVE
+0x0118 &Eogon; ISOlat2 # LATIN CAPITAL LETTER E WITH OGONEK
+0x0119 &eogon; ISOlat2 # LATIN SMALL LETTER E WITH OGONEK
+0x011A &Ecaron; ISOlat2 # LATIN CAPITAL LETTER E WITH CARON
+0x011B &ecaron; ISOlat2 # LATIN SMALL LETTER E WITH CARON
+0x011C &Gcirc; ISOlat2 # LATIN CAPITAL LETTER G WITH CIRCUMFLEX
+0x011D &gcirc; ISOlat2 # LATIN SMALL LETTER G WITH CIRCUMFLEX
+0x011E &Gbreve; ISOlat2 # LATIN CAPITAL LETTER G WITH BREVE
+0x011F &gbreve; ISOlat2 # LATIN SMALL LETTER G WITH BREVE
+0x0120 &Gdot; ISOlat2 # LATIN CAPITAL LETTER G WITH DOT ABOVE
+0x0121 &gdot; ISOlat2 # LATIN SMALL LETTER G WITH DOT ABOVE
+0x0122 &Gcedil; ISOlat2 # LATIN CAPITAL LETTER G WITH CEDILLA
+0x0123 &gcedil; ISOlat2 # LATIN SMALL LETTER G WITH CEDILLA
+0x0124 &Hcirc; ISOlat2 # LATIN CAPITAL LETTER H WITH CIRCUMFLEX
+0x0125 &hcirc; ISOlat2 # LATIN SMALL LETTER H WITH CIRCUMFLEX
+0x0126 &Hstrok; ISOlat2 # LATIN CAPITAL LETTER H WITH STROKE
+0x0127 &hstrok; ISOlat2 # LATIN SMALL LETTER H WITH STROKE
+0x0128 &Itilde; ISOlat2 # LATIN CAPITAL LETTER I WITH TILDE
+0x0129 &itilde; ISOlat2 # LATIN SMALL LETTER I WITH TILDE
+0x012A &Imacr; ISOlat2 # LATIN CAPITAL LETTER I WITH MACRON
+0x012B &imacr; ISOlat2 # LATIN SMALL LETTER I WITH MACRON
+0x012E &Iogon; ISOlat2 # LATIN CAPITAL LETTER I WITH OGONEK
+0x012F &iogon; ISOlat2 # LATIN SMALL LETTER I WITH OGONEK
+0x0130 &Idot; ISOlat2 # LATIN CAPITAL LETTER I WITH DOT ABOVE
+0x0131 &inodot; ISOamso # LATIN SMALL LETTER DOTLESS I
+0x0131 &inodot; ISOlat2 # LATIN SMALL LETTER DOTLESS I
+0x0132 &IJlig; ISOlat2 # LATIN CAPITAL LIGATURE IJ
+0x0133 &ijlig; ISOlat2 # LATIN SMALL LIGATURE IJ
+0x0134 &Jcirc; ISOlat2 # LATIN CAPITAL LETTER J WITH CIRCUMFLEX
+0x0135 &jcirc; ISOlat2 # LATIN SMALL LETTER J WITH CIRCUMFLEX
+0x0136 &Kcedil; ISOlat2 # LATIN CAPITAL LETTER K WITH CEDILLA
+0x0137 &kcedil; ISOlat2 # LATIN SMALL LETTER K WITH CEDILLA
+0x0138 &kgreen; ISOlat2 # LATIN SMALL LETTER KRA
+0x0139 &Lacute; ISOlat2 # LATIN CAPITAL LETTER L WITH ACUTE
+0x013A &lacute; ISOlat2 # LATIN SMALL LETTER L WITH ACUTE
+0x013B &Lcedil; ISOlat2 # LATIN CAPITAL LETTER L WITH CEDILLA
+0x013C &lcedil; ISOlat2 # LATIN SMALL LETTER L WITH CEDILLA
+0x013D &Lcaron; ISOlat2 # LATIN CAPITAL LETTER L WITH CARON
+0x013E &lcaron; ISOlat2 # LATIN SMALL LETTER L WITH CARON
+0x013F &Lmidot; ISOlat2 # LATIN CAPITAL LETTER L WITH MIDDLE DOT
+0x0140 &lmidot; ISOlat2 # LATIN SMALL LETTER L WITH MIDDLE DOT
+0x0141 &Lstrok; ISOlat2 # LATIN CAPITAL LETTER L WITH STROKE
+0x0142 &lstrok; ISOlat2 # LATIN SMALL LETTER L WITH STROKE
+0x0143 &Nacute; ISOlat2 # LATIN CAPITAL LETTER N WITH ACUTE
+0x0144 &nacute; ISOlat2 # LATIN SMALL LETTER N WITH ACUTE
+0x0145 &Ncedil; ISOlat2 # LATIN CAPITAL LETTER N WITH CEDILLA
+0x0146 &ncedil; ISOlat2 # LATIN SMALL LETTER N WITH CEDILLA
+0x0147 &Ncaron; ISOlat2 # LATIN CAPITAL LETTER N WITH CARON
+0x0148 &ncaron; ISOlat2 # LATIN SMALL LETTER N WITH CARON
+0x0149 &napos; ISOlat2 # LATIN SMALL LETTER N PRECEDED BY APOSTROPHE
+0x014A &ENG; ISOlat2 # LATIN CAPITAL LETTER ENG
+0x014B &eng; ISOlat2 # LATIN SMALL LETTER ENG
+0x014C &Omacr; ISOlat2 # LATIN CAPITAL LETTER O WITH MACRON
+0x014D &omacr; ISOlat2 # LATIN SMALL LETTER O WITH MACRON
+0x0150 &Odblac; ISOlat2 # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE
+0x0151 &odblac; ISOlat2 # LATIN SMALL LETTER O WITH DOUBLE ACUTE
+0x0152 &OElig; ISOlat2 # LATIN CAPITAL LIGATURE OE
+0x0153 &oelig; ISOlat2 # LATIN SMALL LIGATURE OE
+0x0154 &Racute; ISOlat2 # LATIN CAPITAL LETTER R WITH ACUTE
+0x0155 &racute; ISOlat2 # LATIN SMALL LETTER R WITH ACUTE
+0x0156 &Rcedil; ISOlat2 # LATIN CAPITAL LETTER R WITH CEDILLA
+0x0157 &rcedil; ISOlat2 # LATIN SMALL LETTER R WITH CEDILLA
+0x0158 &Rcaron; ISOlat2 # LATIN CAPITAL LETTER R WITH CARON
+0x0159 &rcaron; ISOlat2 # LATIN SMALL LETTER R WITH CARON
+0x015A &Sacute; ISOlat2 # LATIN CAPITAL LETTER S WITH ACUTE
+0x015B &sacute; ISOlat2 # LATIN SMALL LETTER S WITH ACUTE
+0x015C &Scirc; ISOlat2 # LATIN CAPITAL LETTER S WITH CIRCUMFLEX
+0x015D &scirc; ISOlat2 # LATIN SMALL LETTER S WITH CIRCUMFLEX
+0x015E &Scedil; ISOlat2 # LATIN CAPITAL LETTER S WITH CEDILLA
+0x015F &scedil; ISOlat2 # LATIN SMALL LETTER S WITH CEDILLA
+0x0160 &Scaron; ISOlat2 # LATIN CAPITAL LETTER S WITH CARON
+0x0161 &scaron; ISOlat2 # LATIN SMALL LETTER S WITH CARON
+0x0162 &Tcedil; ISOlat2 # LATIN CAPITAL LETTER T WITH CEDILLA
+0x0163 &tcedil; ISOlat2 # LATIN SMALL LETTER T WITH CEDILLA
+0x0164 &Tcaron; ISOlat2 # LATIN CAPITAL LETTER T WITH CARON
+0x0165 &tcaron; ISOlat2 # LATIN SMALL LETTER T WITH CARON
+0x0166 &Tstrok; ISOlat2 # LATIN CAPITAL LETTER T WITH STROKE
+0x0167 &tstrok; ISOlat2 # LATIN SMALL LETTER T WITH STROKE
+0x0168 &Utilde; ISOlat2 # LATIN CAPITAL LETTER U WITH TILDE
+0x0169 &utilde; ISOlat2 # LATIN SMALL LETTER U WITH TILDE
+0x016A &Umacr; ISOlat2 # LATIN CAPITAL LETTER U WITH MACRON
+0x016B &umacr; ISOlat2 # LATIN SMALL LETTER U WITH MACRON
+0x016C &Ubreve; ISOlat2 # LATIN CAPITAL LETTER U WITH BREVE
+0x016D &ubreve; ISOlat2 # LATIN SMALL LETTER U WITH BREVE
+0x016E &Uring; ISOlat2 # LATIN CAPITAL LETTER U WITH RING ABOVE
+0x016F &uring; ISOlat2 # LATIN SMALL LETTER U WITH RING ABOVE
+0x0170 &Udblac; ISOlat2 # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE
+0x0171 &udblac; ISOlat2 # LATIN SMALL LETTER U WITH DOUBLE ACUTE
+0x0172 &Uogon; ISOlat2 # LATIN CAPITAL LETTER U WITH OGONEK
+0x0173 &uogon; ISOlat2 # LATIN SMALL LETTER U WITH OGONEK
+0x0174 &Wcirc; ISOlat2 # LATIN CAPITAL LETTER W WITH CIRCUMFLEX
+0x0175 &wcirc; ISOlat2 # LATIN SMALL LETTER W WITH CIRCUMFLEX
+0x0176 &Ycirc; ISOlat2 # LATIN CAPITAL LETTER Y WITH CIRCUMFLEX
+0x0177 &ycirc; ISOlat2 # LATIN SMALL LETTER Y WITH CIRCUMFLEX
+0x0178 &Yuml; ISOlat2 # LATIN CAPITAL LETTER Y WITH DIAERESIS
+0x0179 &Zacute; ISOlat2 # LATIN CAPITAL LETTER Z WITH ACUTE
+0x017A &zacute; ISOlat2 # LATIN SMALL LETTER Z WITH ACUTE
+0x017B &Zdot; ISOlat2 # LATIN CAPITAL LETTER Z WITH DOT ABOVE
+0x017C &zdot; ISOlat2 # LATIN SMALL LETTER Z WITH DOT ABOVE
+0x017D &Zcaron; ISOlat2 # LATIN CAPITAL LETTER Z WITH CARON
+0x017E &zcaron; ISOlat2 # LATIN SMALL LETTER Z WITH CARON
+0x0192 &fnof; ISOtech # LATIN SMALL LETTER F WITH HOOK
+0x01F5 &gacute; ISOlat2 # LATIN SMALL LETTER G WITH ACUTE
+0x02BC &apos; ISOnum # MODIFIER LETTER APOSTROPHE
+0x02C6 &circ; ISOdia # MODIFIER LETTER CIRCUMFLEX ACCENT
+0x02C7 &caron; ISOdia # CARON
+0x02D8 &breve; ISOdia # BREVE
+0x02D9 &dot; ISOdia # DOT ABOVE
+0x02DA &ring; ISOdia # RING ABOVE
+0x02DB &ogon; ISOdia # OGONEK
+0x02DC &tilde; ISOdia # SMALL TILDE
+0x02DD &dblac; ISOdia # DOUBLE ACUTE ACCENT
+0x0386 &Aacgr; ISOgrk2 # GREEK CAPITAL LETTER ALPHA WITH TONOS
+0x0388 &Eacgr; ISOgrk2 # GREEK CAPITAL LETTER EPSILON WITH TONOS
+0x0389 &EEacgr; ISOgrk2 # GREEK CAPITAL LETTER ETA WITH TONOS
+0x038A &Iacgr; ISOgrk2 # GREEK CAPITAL LETTER IOTA WITH TONOS
+0x038C &Oacgr; ISOgrk2 # GREEK CAPITAL LETTER OMICRON WITH TONOS
+0x038E &Uacgr; ISOgrk2 # GREEK CAPITAL LETTER UPSILON WITH TONOS
+0x038F &OHacgr; ISOgrk2 # GREEK CAPITAL LETTER OMEGA WITH TONOS
+0x0390 &idiagr; ISOgrk2 # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
+0x0391 &Agr; ISOgrk1 # GREEK CAPITAL LETTER ALPHA
+0x0391 &Alpha; HTMLsymbol # GREEK CAPITAL LETTER ALPHA
+0x0392 &Beta; HTMLsymbol # GREEK CAPITAL LETTER BETA
+0x0392 &Bgr; ISOgrk1 # GREEK CAPITAL LETTER BETA
+0x0393 &Gamma; ISOgrk3 # GREEK CAPITAL LETTER GAMMA
+0x0393 &Ggr; ISOgrk1 # GREEK CAPITAL LETTER GAMMA
+0x0393 &b.Gamma; ISOgrk4 # GREEK CAPITAL LETTER GAMMA
+0x0394 &Delta; ISOgrk3 # GREEK CAPITAL LETTER DELTA
+0x0394 &Dgr; ISOgrk1 # GREEK CAPITAL LETTER DELTA
+0x0394 &b.Delta; ISOgrk4 # GREEK CAPITAL LETTER DELTA
+0x0395 &Egr; ISOgrk1 # GREEK CAPITAL LETTER EPSILON
+0x0395 &Epsilon; HTMLsymbol # GREEK CAPITAL LETTER EPSILON
+0x0396 &Zeta; HTMLsymbol # GREEK CAPITAL LETTER ZETA
+0x0396 &Zgr; ISOgrk1 # GREEK CAPITAL LETTER ZETA
+0x0397 &EEgr; ISOgrk1 # GREEK CAPITAL LETTER ETA
+0x0397 &Eta; HTMLsymbol # GREEK CAPITAL LETTER ETA
+0x0398 &THgr; ISOgrk1 # GREEK CAPITAL LETTER THETA
+0x0398 &Theta; ISOgrk3 # GREEK CAPITAL LETTER THETA
+0x0398 &b.Theta; ISOgrk4 # GREEK CAPITAL LETTER THETA
+0x0399 &Igr; ISOgrk1 # GREEK CAPITAL LETTER IOTA
+0x0399 &Iota; HTMLsymbol # GREEK CAPITAL LETTER IOTA
+0x039A &Kappa; HTMLsymbol # GREEK CAPITAL LETTER KAPPA
+0x039A &Kgr; ISOgrk1 # GREEK CAPITAL LETTER KAPPA
+0x039B &Lambda; ISOgrk3 # GREEK CAPITAL LETTER LAMDA
+0x039B &Lgr; ISOgrk1 # GREEK CAPITAL LETTER LAMDA
+0x039B &b.Lambda; ISOgrk4 # GREEK CAPITAL LETTER LAMDA
+0x039C &Mgr; ISOgrk1 # GREEK CAPITAL LETTER MU
+0x039C &Mu; HTMLsymbol # GREEK CAPITAL LETTER MU
+0x039D &Ngr; ISOgrk1 # GREEK CAPITAL LETTER NU
+0x039D &Nu; HTMLsymbol # GREEK CAPITAL LETTER NU
+0x039E &Xgr; ISOgrk1 # GREEK CAPITAL LETTER XI
+0x039E &Xi; ISOgrk3 # GREEK CAPITAL LETTER XI
+0x039E &b.Xi; ISOgrk4 # GREEK CAPITAL LETTER XI
+0x039F &Ogr; ISOgrk1 # GREEK CAPITAL LETTER OMICRON
+0x039F &Omicron; HTMLsymbol # GREEK CAPITAL LETTER OMICRON
+0x03A0 &Pgr; ISOgrk1 # GREEK CAPITAL LETTER PI
+0x03A0 &Pi; ISOgrk3 # GREEK CAPITAL LETTER PI
+0x03A0 &b.Pi; ISOgrk4 # GREEK CAPITAL LETTER PI
+0x03A1 &Rgr; ISOgrk1 # GREEK CAPITAL LETTER RHO
+0x03A1 &Rho; HTMLsymbol # GREEK CAPITAL LETTER RHO
+0x03A3 &Sgr; ISOgrk1 # GREEK CAPITAL LETTER SIGMA
+0x03A3 &Sigma; ISOgrk3 # GREEK CAPITAL LETTER SIGMA
+0x03A3 &b.Sigma; ISOgrk4 # GREEK CAPITAL LETTER SIGMA
+0x03A4 &Tau; HTMLsymbol # GREEK CAPITAL LETTER TAU
+0x03A4 &Tgr; ISOgrk1 # GREEK CAPITAL LETTER TAU
+0x03A5 &Ugr; ISOgrk1 # GREEK CAPITAL LETTER UPSILON
+0x03A5 &Upsi; ISOgrk3 # GREEK CAPITAL LETTER UPSILON
+0x03A5 &Upsilon; HTMLsymbol # GREEK CAPITAL LETTER UPSILON
+0x03A5 &b.Upsi; ISOgrk4 # GREEK CAPITAL LETTER UPSILON
+0x03A6 &PHgr; ISOgrk1 # GREEK CAPITAL LETTER PHI
+0x03A6 &Phi; ISOgrk3 # GREEK CAPITAL LETTER PHI
+0x03A6 &b.Phi; ISOgrk4 # GREEK CAPITAL LETTER PHI
+0x03A7 &Chi; HTMLsymbol # GREEK CAPITAL LETTER CHI
+0x03A7 &KHgr; ISOgrk1 # GREEK CAPITAL LETTER CHI
+0x03A8 &PSgr; ISOgrk1 # GREEK CAPITAL LETTER PSI
+0x03A8 &Psi; ISOgrk3 # GREEK CAPITAL LETTER PSI
+0x03A8 &b.Psi; ISOgrk4 # GREEK CAPITAL LETTER PSI
+0x03A9 &OHgr; ISOgrk1 # GREEK CAPITAL LETTER OMEGA
+0x03A9 &Omega; ISOgrk3 # GREEK CAPITAL LETTER OMEGA
+0x03A9 &b.Omega; ISOgrk4 # GREEK CAPITAL LETTER OMEGA
+0x03AA &Idigr; ISOgrk2 # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA
+0x03AB &Udigr; ISOgrk2 # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA
+0x03AC &aacgr; ISOgrk2 # GREEK SMALL LETTER ALPHA WITH TONOS
+0x03AD &eacgr; ISOgrk2 # GREEK SMALL LETTER EPSILON WITH TONOS
+0x03AE &eeacgr; ISOgrk2 # GREEK SMALL LETTER ETA WITH TONOS
+0x03AF &iacgr; ISOgrk2 # GREEK SMALL LETTER IOTA WITH TONOS
+0x03B0 &udiagr; ISOgrk2 # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND
+0x03B1 &agr; ISOgrk1 # GREEK SMALL LETTER ALPHA
+0x03B1 &alpha; ISOgrk3 # GREEK SMALL LETTER ALPHA
+0x03B1 &b.alpha; ISOgrk4 # GREEK SMALL LETTER ALPHA
+0x03B2 &b.beta; ISOgrk4 # GREEK SMALL LETTER BETA
+0x03B2 &beta; ISOgrk3 # GREEK SMALL LETTER BETA
+0x03B2 &bgr; ISOgrk1 # GREEK SMALL LETTER BETA
+0x03B3 &b.gamma; ISOgrk4 # GREEK SMALL LETTER GAMMA
+0x03B3 &gamma; ISOgrk3 # GREEK SMALL LETTER GAMMA
+0x03B3 &ggr; ISOgrk1 # GREEK SMALL LETTER GAMMA
+0x03B4 &b.delta; ISOgrk4 # GREEK SMALL LETTER DELTA
+0x03B4 &delta; ISOgrk3 # GREEK SMALL LETTER DELTA
+0x03B4 &dgr; ISOgrk1 # GREEK SMALL LETTER DELTA
+0x03B5 &b.epsi; ISOgrk4 # GREEK SMALL LETTER EPSILON
+0x03B5 &b.epsis; ISOgrk4 # GREEK SMALL LETTER EPSILON
+0x03B5 &b.epsiv; ISOgrk4 # GREEK SMALL LETTER EPSILON
+0x03B5 &egr; ISOgrk1 # GREEK SMALL LETTER EPSILON
+0x03B5 &epsi; ISOgrk3 # GREEK SMALL LETTER EPSILON
+0x03B5 &epsilon; HTMLsymbol # GREEK SMALL LETTER EPSILON
+0x03B6 &b.zeta; ISOgrk4 # GREEK SMALL LETTER ZETA
+0x03B6 &zeta; ISOgrk3 # GREEK SMALL LETTER ZETA
+0x03B6 &zgr; ISOgrk1 # GREEK SMALL LETTER ZETA
+0x03B7 &b.eta; ISOgrk4 # GREEK SMALL LETTER ETA
+0x03B7 &eegr; ISOgrk1 # GREEK SMALL LETTER ETA
+0x03B7 &eta; ISOgrk3 # GREEK SMALL LETTER ETA
+0x03B8 &b.thetas; ISOgrk4 # GREEK SMALL LETTER THETA
+0x03B8 &theta; HTMLsymbol # GREEK SMALL LETTER THETA
+0x03B8 &thetas; ISOgrk3 # GREEK SMALL LETTER THETA
+0x03B8 &thgr; ISOgrk1 # GREEK SMALL LETTER THETA
+0x03B9 &b.iota; ISOgrk4 # GREEK SMALL LETTER IOTA
+0x03B9 &igr; ISOgrk1 # GREEK SMALL LETTER IOTA
+0x03B9 &iota; ISOgrk3 # GREEK SMALL LETTER IOTA
+0x03BA &b.kappa; ISOgrk4 # GREEK SMALL LETTER KAPPA
+0x03BA &kappa; ISOgrk3 # GREEK SMALL LETTER KAPPA
+0x03BA &kgr; ISOgrk1 # GREEK SMALL LETTER KAPPA
+0x03BB &b.lambda; ISOgrk4 # GREEK SMALL LETTER LAMDA
+0x03BB &lambda; ISOgrk3 # GREEK SMALL LETTER LAMDA
+0x03BB &lgr; ISOgrk1 # GREEK SMALL LETTER LAMDA
+0x03BC &b.mu; ISOgrk4 # GREEK SMALL LETTER MU
+0x03BC &mgr; ISOgrk1 # GREEK SMALL LETTER MU
+0x03BC &mu; ISOgrk3 # GREEK SMALL LETTER MU
+0x03BD &b.nu; ISOgrk4 # GREEK SMALL LETTER NU
+0x03BD &ngr; ISOgrk1 # GREEK SMALL LETTER NU
+0x03BD &nu; ISOgrk3 # GREEK SMALL LETTER NU
+0x03BE &b.xi; ISOgrk4 # GREEK SMALL LETTER XI
+0x03BE &xgr; ISOgrk1 # GREEK SMALL LETTER XI
+0x03BE &xi; ISOgrk3 # GREEK SMALL LETTER XI
+0x03BF &ogr; ISOgrk1 # GREEK SMALL LETTER OMICRON
+0x03BF &omicron; HTMLsymbol # GREEK SMALL LETTER OMICRON
+0x03C0 &b.pi; ISOgrk4 # GREEK SMALL LETTER PI
+0x03C0 &pgr; ISOgrk1 # GREEK SMALL LETTER PI
+0x03C0 &pi; ISOgrk3 # GREEK SMALL LETTER PI
+0x03C1 &b.rho; ISOgrk4 # GREEK SMALL LETTER RHO
+0x03C1 &rgr; ISOgrk1 # GREEK SMALL LETTER RHO
+0x03C1 &rho; ISOgrk3 # GREEK SMALL LETTER RHO
+0x03C2 &b.sigmav; ISOgrk4 # GREEK SMALL LETTER FINAL SIGMA
+0x03C2 &sfgr; ISOgrk1 # GREEK SMALL LETTER FINAL SIGMA
+0x03C2 &sigmaf; HTMLsymbol # GREEK SMALL LETTER FINAL SIGMA
+0x03C2 &sigmav; ISOgrk3 # GREEK SMALL LETTER FINAL SIGMA
+0x03C3 &b.sigma; ISOgrk4 # GREEK SMALL LETTER SIGMA
+0x03C3 &sgr; ISOgrk1 # GREEK SMALL LETTER SIGMA
+0x03C3 &sigma; ISOgrk3 # GREEK SMALL LETTER SIGMA
+0x03C4 &b.tau; ISOgrk4 # GREEK SMALL LETTER TAU
+0x03C4 &tau; ISOgrk3 # GREEK SMALL LETTER TAU
+0x03C4 &tgr; ISOgrk1 # GREEK SMALL LETTER TAU
+0x03C5 &b.upsi; ISOgrk4 # GREEK SMALL LETTER UPSILON
+0x03C5 &ugr; ISOgrk1 # GREEK SMALL LETTER UPSILON
+0x03C5 &upsi; ISOgrk3 # GREEK SMALL LETTER UPSILON
+0x03C5 &upsilon; HTMLsymbol # GREEK SMALL LETTER UPSILON
+0x03C6 &b.phis; ISOgrk4 # GREEK SMALL LETTER PHI
+0x03C6 &phgr; ISOgrk1 # GREEK SMALL LETTER PHI
+0x03C6 &phi; HTMLsymbol # GREEK SMALL LETTER PHI
+0x03C6 &phis; ISOgrk3 # GREEK SMALL LETTER PHI
+0x03C7 &b.chi; ISOgrk4 # GREEK SMALL LETTER CHI
+0x03C7 &chi; ISOgrk3 # GREEK SMALL LETTER CHI
+0x03C7 &khgr; ISOgrk1 # GREEK SMALL LETTER CHI
+0x03C8 &b.psi; ISOgrk4 # GREEK SMALL LETTER PSI
+0x03C8 &psgr; ISOgrk1 # GREEK SMALL LETTER PSI
+0x03C8 &psi; ISOgrk3 # GREEK SMALL LETTER PSI
+0x03C9 &ohgr; ISOgrk1 # GREEK SMALL LETTER OMEGA
+0x03C9 &omega; ISOgrk3 # GREEK SMALL LETTER OMEGA
+0x03CA &idigr; ISOgrk2 # GREEK SMALL LETTER IOTA WITH DIALYTIKA
+0x03CB &udigr; ISOgrk2 # GREEK SMALL LETTER UPSILON WITH DIALYTIKA
+0x03CC &oacgr; ISOgrk2 # GREEK SMALL LETTER OMICRON WITH TONOS
+0x03CD &uacgr; ISOgrk2 # GREEK SMALL LETTER UPSILON WITH TONOS
+0x03CE &b.omega; ISOgrk4 # GREEK SMALL LETTER OMEGA WITH TONOS
+0x03CE &ohacgr; ISOgrk2 # GREEK SMALL LETTER OMEGA WITH TONOS
+0x03D1 &b.thetav; ISOgrk4 # GREEK THETA SYMBOL
+0x03D1 &thetasym; HTMLsymbol # GREEK THETA SYMBOL
+0x03D1 &thetav; ISOgrk3 # GREEK THETA SYMBOL
+0x03D2 &upsih; HTMLsymbol # GREEK UPSILON WITH HOOK SYMBOL
+0x03D5 &b.phiv; ISOgrk4 # GREEK PHI SYMBOL
+0x03D5 &phiv; ISOgrk3 # GREEK PHI SYMBOL
+0x03D6 &b.piv; ISOgrk4 # GREEK PI SYMBOL
+0x03D6 &piv; ISOgrk3 # GREEK PI SYMBOL
+0x03DC &b.gammad; ISOgrk4 # GREEK LETTER DIGAMMA
+0x03DC &gammad; ISOgrk3 # GREEK LETTER DIGAMMA
+0x03F0 &b.kappav; ISOgrk4 # GREEK KAPPA SYMBOL
+0x03F0 &kappav; ISOgrk3 # GREEK KAPPA SYMBOL
+0x03F1 &b.rhov; ISOgrk4 # GREEK RHO SYMBOL
+0x03F1 &rhov; ISOgrk3 # GREEK RHO SYMBOL
+0x0401 &IOcy; ISOcyr1 # CYRILLIC CAPITAL LETTER IO
+0x0402 &DJcy; ISOcyr2 # CYRILLIC CAPITAL LETTER DJE
+0x0403 &GJcy; ISOcyr2 # CYRILLIC CAPITAL LETTER GJE
+0x0404 &Jukcy; ISOcyr2 # CYRILLIC CAPITAL LETTER UKRAINIAN IE
+0x0405 &DScy; ISOcyr2 # CYRILLIC CAPITAL LETTER DZE
+0x0406 &Iukcy; ISOcyr2 # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I
+0x0407 &YIcy; ISOcyr2 # CYRILLIC CAPITAL LETTER YI
+0x0408 &Jsercy; ISOcyr2 # CYRILLIC CAPITAL LETTER JE
+0x0409 &LJcy; ISOcyr2 # CYRILLIC CAPITAL LETTER LJE
+0x040A &NJcy; ISOcyr2 # CYRILLIC CAPITAL LETTER NJE
+0x040B &TSHcy; ISOcyr2 # CYRILLIC CAPITAL LETTER TSHE
+0x040C &KJcy; ISOcyr2 # CYRILLIC CAPITAL LETTER KJE
+0x040E &Ubrcy; ISOcyr2 # CYRILLIC CAPITAL LETTER SHORT U
+0x040F &DZcy; ISOcyr2 # CYRILLIC CAPITAL LETTER DZHE
+0x0410 &Acy; ISOcyr1 # CYRILLIC CAPITAL LETTER A
+0x0411 &Bcy; ISOcyr1 # CYRILLIC CAPITAL LETTER BE
+0x0412 &Vcy; ISOcyr1 # CYRILLIC CAPITAL LETTER VE
+0x0413 &Gcy; ISOcyr1 # CYRILLIC CAPITAL LETTER GHE
+0x0414 &Dcy; ISOcyr1 # CYRILLIC CAPITAL LETTER DE
+0x0415 &IEcy; ISOcyr1 # CYRILLIC CAPITAL LETTER IE
+0x0416 &ZHcy; ISOcyr1 # CYRILLIC CAPITAL LETTER ZHE
+0x0417 &Zcy; ISOcyr1 # CYRILLIC CAPITAL LETTER ZE
+0x0418 &Icy; ISOcyr1 # CYRILLIC CAPITAL LETTER I
+0x0419 &Jcy; ISOcyr1 # CYRILLIC CAPITAL LETTER SHORT I
+0x041A &Kcy; ISOcyr1 # CYRILLIC CAPITAL LETTER KA
+0x041B &Lcy; ISOcyr1 # CYRILLIC CAPITAL LETTER EL
+0x041C &Mcy; ISOcyr1 # CYRILLIC CAPITAL LETTER EM
+0x041D &Ncy; ISOcyr1 # CYRILLIC CAPITAL LETTER EN
+0x041E &Ocy; ISOcyr1 # CYRILLIC CAPITAL LETTER O
+0x041F &Pcy; ISOcyr1 # CYRILLIC CAPITAL LETTER PE
+0x0420 &Rcy; ISOcyr1 # CYRILLIC CAPITAL LETTER ER
+0x0421 &Scy; ISOcyr1 # CYRILLIC CAPITAL LETTER ES
+0x0422 &Tcy; ISOcyr1 # CYRILLIC CAPITAL LETTER TE
+0x0423 &Ucy; ISOcyr1 # CYRILLIC CAPITAL LETTER U
+0x0424 &Fcy; ISOcyr1 # CYRILLIC CAPITAL LETTER EF
+0x0425 &KHcy; ISOcyr1 # CYRILLIC CAPITAL LETTER HA
+0x0426 &TScy; ISOcyr1 # CYRILLIC CAPITAL LETTER TSE
+0x0427 &CHcy; ISOcyr1 # CYRILLIC CAPITAL LETTER CHE
+0x0428 &SHcy; ISOcyr1 # CYRILLIC CAPITAL LETTER SHA
+0x0429 &SHCHcy; ISOcyr1 # CYRILLIC CAPITAL LETTER SHCHA
+0x042A &HARDcy; ISOcyr1 # CYRILLIC CAPITAL LETTER HARD SIGN
+0x042B &Ycy; ISOcyr1 # CYRILLIC CAPITAL LETTER YERU
+0x042C &SOFTcy; ISOcyr1 # CYRILLIC CAPITAL LETTER SOFT SIGN
+0x042D &Ecy; ISOcyr1 # CYRILLIC CAPITAL LETTER E
+0x042E &YUcy; ISOcyr1 # CYRILLIC CAPITAL LETTER YU
+0x042F &YAcy; ISOcyr1 # CYRILLIC CAPITAL LETTER YA
+0x0430 &acy; ISOcyr1 # CYRILLIC SMALL LETTER A
+0x0431 &bcy; ISOcyr1 # CYRILLIC SMALL LETTER BE
+0x0432 &vcy; ISOcyr1 # CYRILLIC SMALL LETTER VE
+0x0433 &gcy; ISOcyr1 # CYRILLIC SMALL LETTER GHE
+0x0434 &dcy; ISOcyr1 # CYRILLIC SMALL LETTER DE
+0x0435 &iecy; ISOcyr1 # CYRILLIC SMALL LETTER IE
+0x0436 &zhcy; ISOcyr1 # CYRILLIC SMALL LETTER ZHE
+0x0437 &zcy; ISOcyr1 # CYRILLIC SMALL LETTER ZE
+0x0438 &icy; ISOcyr1 # CYRILLIC SMALL LETTER I
+0x0439 &jcy; ISOcyr1 # CYRILLIC SMALL LETTER SHORT I
+0x043A &kcy; ISOcyr1 # CYRILLIC SMALL LETTER KA
+0x043B &lcy; ISOcyr1 # CYRILLIC SMALL LETTER EL
+0x043C &mcy; ISOcyr1 # CYRILLIC SMALL LETTER EM
+0x043D &ncy; ISOcyr1 # CYRILLIC SMALL LETTER EN
+0x043E &ocy; ISOcyr1 # CYRILLIC SMALL LETTER O
+0x043F &pcy; ISOcyr1 # CYRILLIC SMALL LETTER PE
+0x0440 &rcy; ISOcyr1 # CYRILLIC SMALL LETTER ER
+0x0441 &scy; ISOcyr1 # CYRILLIC SMALL LETTER ES
+0x0442 &tcy; ISOcyr1 # CYRILLIC SMALL LETTER TE
+0x0443 &ucy; ISOcyr1 # CYRILLIC SMALL LETTER U
+0x0444 &fcy; ISOcyr1 # CYRILLIC SMALL LETTER EF
+0x0445 &khcy; ISOcyr1 # CYRILLIC SMALL LETTER HA
+0x0446 &tscy; ISOcyr1 # CYRILLIC SMALL LETTER TSE
+0x0447 &chcy; ISOcyr1 # CYRILLIC SMALL LETTER CHE
+0x0448 &shcy; ISOcyr1 # CYRILLIC SMALL LETTER SHA
+0x0449 &shchcy; ISOcyr1 # CYRILLIC SMALL LETTER SHCHA
+0x044A &hardcy; ISOcyr1 # CYRILLIC SMALL LETTER HARD SIGN
+0x044B &ycy; ISOcyr1 # CYRILLIC SMALL LETTER YERU
+0x044C &softcy; ISOcyr1 # CYRILLIC SMALL LETTER SOFT SIGN
+0x044D &ecy; ISOcyr1 # CYRILLIC SMALL LETTER E
+0x044E &yucy; ISOcyr1 # CYRILLIC SMALL LETTER YU
+0x044F &yacy; ISOcyr1 # CYRILLIC SMALL LETTER YA
+0x0451 &iocy; ISOcyr1 # CYRILLIC SMALL LETTER IO
+0x0452 &djcy; ISOcyr2 # CYRILLIC SMALL LETTER DJE
+0x0453 &gjcy; ISOcyr2 # CYRILLIC SMALL LETTER GJE
+0x0454 &jukcy; ISOcyr2 # CYRILLIC SMALL LETTER UKRAINIAN IE
+0x0455 &dscy; ISOcyr2 # CYRILLIC SMALL LETTER DZE
+0x0456 &iukcy; ISOcyr2 # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
+0x0457 &yicy; ISOcyr2 # CYRILLIC SMALL LETTER YI
+0x0458 &jsercy; ISOcyr2 # CYRILLIC SMALL LETTER JE
+0x0459 &ljcy; ISOcyr2 # CYRILLIC SMALL LETTER LJE
+0x045A &njcy; ISOcyr2 # CYRILLIC SMALL LETTER NJE
+0x045B &tshcy; ISOcyr2 # CYRILLIC SMALL LETTER TSHE
+0x045C &kjcy; ISOcyr2 # CYRILLIC SMALL LETTER KJE
+0x045E &ubrcy; ISOcyr2 # CYRILLIC SMALL LETTER SHORT U
+0x045F &dzcy; ISOcyr2 # CYRILLIC SMALL LETTER DZHE
+0x2002 &ensp; ISOpub # EN SPACE
+0x2003 &emsp; ISOpub # EM SPACE
+0x2004 &emsp13; ISOpub # THREE-PER-EM SPACE
+0x2005 &emsp14; ISOpub # FOUR-PER-EM SPACE
+0x2007 &numsp; ISOpub # FIGURE SPACE
+0x2008 &puncsp; ISOpub # PUNCTUATION SPACE
+0x2009 &thinsp; ISOpub # THIN SPACE
+0x200A &hairsp; ISOpub # HAIR SPACE
+0x200C &zwnj; HTMLspecial # ZERO WIDTH NON-JOINER
+0x200D &zwj; HTMLspecial # ZERO WIDTH JOINER
+0x200E &lrm; HTMLspecial # LEFT-TO-RIGHT MARK
+0x200F &rlm; HTMLspecial # RIGHT-TO-LEFT MARK
+0x2010 &dash; ISOpub # HYPHEN
+0x2013 &ndash; ISOpub # EN DASH
+0x2014 &mdash; ISOpub # EM DASH
+0x2015 &horbar; ISOnum # HORIZONTAL BAR
+0x2016 &Verbar; ISOtech # DOUBLE VERTICAL LINE
+0x2018 &lsquo; ISOnum # LEFT SINGLE QUOTATION MARK
+0x2018 &rsquor; ISOpub # LEFT SINGLE QUOTATION MARK
+0x2019 &rsquo; ISOnum # RIGHT SINGLE QUOTATION MARK
+0x201A &lsquor; ISOpub # SINGLE LOW-9 QUOTATION MARK
+0x201A &sbquo; HTMLspecial # SINGLE LOW-9 QUOTATION MARK
+0x201C &ldquo; ISOnum # LEFT DOUBLE QUOTATION MARK
+0x201C &rdquor; ISOpub # LEFT DOUBLE QUOTATION MARK
+0x201D &rdquo; ISOnum # RIGHT DOUBLE QUOTATION MARK
+0x201E &bdquo; HTMLspecial # DOUBLE LOW-9 QUOTATION MARK
+0x201E &ldquor; ISOpub # DOUBLE LOW-9 QUOTATION MARK
+0x2020 &dagger; ISOpub # DAGGER
+0x2021 &Dagger; ISOpub # DOUBLE DAGGER
+0x2022 &bull; ISOpub # BULLET
+0x2025 &nldr; ISOpub # TWO DOT LEADER
+0x2026 &hellip; ISOpub # HORIZONTAL ELLIPSIS
+0x2026 &mldr; ISOpub # HORIZONTAL ELLIPSIS
+0x2030 &permil; ISOtech # PER MILLE SIGN
+0x2032 &prime; ISOtech # PRIME
+0x2032 &vprime; ISOamso # PRIME
+0x2033 &Prime; ISOtech # DOUBLE PRIME
+0x2034 &tprime; ISOtech # TRIPLE PRIME
+0x2035 &bprime; ISOamso # REVERSED PRIME
+0x2039 &lsaquo; HTMLspecial # SINGLE LEFT-POINTING ANGLE QUOTATION MARK
+0x203A &rsaquo; HTMLspecial # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
+0x203E &oline; HTMLsymbol # OVERLINE
+0x2041 &caret; ISOpub # CARET INSERTION POINT
+0x2043 &hybull; ISOpub # HYPHEN BULLET
+0x2044 &frasl; HTMLsymbol # FRACTION SLASH
+0x20AC &euro; new # EURO SIGN
+0x20DB &tdot; ISOtech # COMBINING THREE DOTS ABOVE
+0x20DC &DotDot; ISOtech # COMBINING FOUR DOTS ABOVE
+0x2105 &incare; ISOpub # CARE OF
+0x210B &hamilt; ISOtech # SCRIPT CAPITAL H
+0x210F &planck; ISOamso # PLANCK CONSTANT OVER TWO PI
+0x2111 &image; ISOamso # BLACK-LETTER CAPITAL I
+0x2112 &lagran; ISOtech # SCRIPT CAPITAL L
+0x2113 &ell; ISOamso # SCRIPT SMALL L
+0x2116 &numero; ISOcyr1 # NUMERO SIGN
+0x2117 &copysr; ISOpub # SOUND RECORDING COPYRIGHT
+0x2118 &weierp; ISOamso # SCRIPT CAPITAL P
+0x211C &real; ISOamso # BLACK-LETTER CAPITAL R
+0x211E &rx; ISOpub # PRESCRIPTION TAKE
+0x2122 &trade; ISOnum # TRADE MARK SIGN
+0x2126 &ohm; ISOnum # OHM SIGN
+0x212B &angst; ISOtech # ANGSTROM SIGN
+0x212C &bernou; ISOtech # SCRIPT CAPITAL B
+0x2133 &phmmat; ISOtech # SCRIPT CAPITAL M
+0x2134 &order; ISOtech # SCRIPT SMALL O
+0x2135 &alefsym; HTMLsymbol # ALEF SYMBOL
+0x2135 &aleph; ISOtech # ALEF SYMBOL
+0x2136 &beth; ISOamso # BET SYMBOL
+0x2137 &gimel; ISOamso # GIMEL SYMBOL
+0x2138 &daleth; ISOamso # DALET SYMBOL
+0x2153 &frac13; ISOpub # VULGAR FRACTION ONE THIRD
+0x2154 &frac23; ISOpub # VULGAR FRACTION TWO THIRDS
+0x2155 &frac15; ISOpub # VULGAR FRACTION ONE FIFTH
+0x2156 &frac25; ISOpub # VULGAR FRACTION TWO FIFTHS
+0x2157 &frac35; ISOpub # VULGAR FRACTION THREE FIFTHS
+0x2158 &frac45; ISOpub # VULGAR FRACTION FOUR FIFTHS
+0x2159 &frac16; ISOpub # VULGAR FRACTION ONE SIXTH
+0x215A &frac56; ISOpub # VULGAR FRACTION FIVE SIXTHS
+0x215B &frac18; ISOnum # VULGAR FRACTION ONE EIGHTH
+0x215C &frac38; ISOnum # VULGAR FRACTION THREE EIGHTHS
+0x215D &frac58; ISOnum # VULGAR FRACTION FIVE EIGHTHS
+0x215E &frac78; ISOnum # VULGAR FRACTION SEVEN EIGHTHS
+0x2190 &larr; ISOnum # LEFTWARDS ARROW
+0x2191 &uarr; ISOnum # UPWARDS ARROW
+0x2192 &rarr; ISOnum # RIGHTWARDS ARROW
+0x2193 &darr; ISOnum # DOWNWARDS ARROW
+0x2194 &harr; ISOamsa # LEFT RIGHT ARROW
+0x2194 &xhArr; ISOamsa # LEFT RIGHT ARROW
+0x2194 &xharr; ISOamsa # LEFT RIGHT ARROW
+0x2195 &varr; ISOamsa # UP DOWN ARROW
+0x2196 &nwarr; ISOamsa # NORTH WEST ARROW
+0x2197 &nearr; ISOamsa # NORTH EAST ARROW
+0x2198 &drarr; ISOamsa # SOUTH EAST ARROW
+0x2199 &dlarr; ISOamsa # SOUTH WEST ARROW
+0x219A &nlarr; ISOamsa # LEFTWARDS ARROW WITH STROKE
+0x219B &nrarr; ISOamsa # RIGHTWARDS ARROW WITH STROKE
+0x219D &rarrw; ISOamsa # RIGHTWARDS WAVE ARROW
+0x219E &Larr; ISOamsa # LEFTWARDS TWO HEADED ARROW
+0x21A0 &Rarr; ISOamsa # RIGHTWARDS TWO HEADED ARROW
+0x21A2 &larrtl; ISOamsa # LEFTWARDS ARROW WITH TAIL
+0x21A3 &rarrtl; ISOamsa # RIGHTWARDS ARROW WITH TAIL
+0x21A6 &map; ISOamsa # RIGHTWARDS ARROW FROM BAR
+0x21A9 &larrhk; ISOamsa # LEFTWARDS ARROW WITH HOOK
+0x21AA &rarrhk; ISOamsa # RIGHTWARDS ARROW WITH HOOK
+0x21AB &larrlp; ISOamsa # LEFTWARDS ARROW WITH LOOP
+0x21AC &rarrlp; ISOamsa # RIGHTWARDS ARROW WITH LOOP
+0x21AD &harrw; ISOamsa # LEFT RIGHT WAVE ARROW
+0x21AE &nharr; ISOamsa # LEFT RIGHT ARROW WITH STROKE
+0x21B0 &lsh; ISOamsa # UPWARDS ARROW WITH TIP LEFTWARDS
+0x21B1 &rsh; ISOamsa # UPWARDS ARROW WITH TIP RIGHTWARDS
+0x21B5 &crarr; HTMLsymbol # DOWNWARDS ARROW WITH CORNER LEFTWARDS
+0x21B6 &cularr; ISOamsa # ANTICLOCKWISE TOP SEMICIRCLE ARROW
+0x21B7 &curarr; ISOamsa # CLOCKWISE TOP SEMICIRCLE ARROW
+0x21BA &olarr; ISOamsa # ANTICLOCKWISE OPEN CIRCLE ARROW
+0x21BB &orarr; ISOamsa # CLOCKWISE OPEN CIRCLE ARROW
+0x21BC &lharu; ISOamsa # LEFTWARDS HARPOON WITH BARB UPWARDS
+0x21BD &lhard; ISOamsa # LEFTWARDS HARPOON WITH BARB DOWNWARDS
+0x21BE &uharr; ISOamsa # UPWARDS HARPOON WITH BARB RIGHTWARDS
+0x21BF &uharl; ISOamsa # UPWARDS HARPOON WITH BARB LEFTWARDS
+0x21C0 &rharu; ISOamsa # RIGHTWARDS HARPOON WITH BARB UPWARDS
+0x21C1 &rhard; ISOamsa # RIGHTWARDS HARPOON WITH BARB DOWNWARDS
+0x21C2 &dharr; ISOamsa # DOWNWARDS HARPOON WITH BARB RIGHTWARDS
+0x21C3 &dharl; ISOamsa # DOWNWARDS HARPOON WITH BARB LEFTWARDS
+0x21C4 &rlarr2; ISOamsa # RIGHTWARDS ARROW OVER LEFTWARDS ARROW
+0x21C6 &lrarr2; ISOamsa # LEFTWARDS ARROW OVER RIGHTWARDS ARROW
+0x21C7 &larr2; ISOamsa # LEFTWARDS PAIRED ARROWS
+0x21C8 &uarr2; ISOamsa # UPWARDS PAIRED ARROWS
+0x21C9 &rarr2; ISOamsa # RIGHTWARDS PAIRED ARROWS
+0x21CA &darr2; ISOamsa # DOWNWARDS PAIRED ARROWS
+0x21CB &lrhar2; ISOamsa # LEFTWARDS HARPOON OVER RIGHTWARDS HARPOON
+0x21CC &rlhar2; ISOamsa # RIGHTWARDS HARPOON OVER LEFTWARDS HARPOON
+0x21CD &nlArr; ISOamsa # LEFTWARDS DOUBLE ARROW WITH STROKE
+0x21CE &nhArr; ISOamsa # LEFT RIGHT DOUBLE ARROW WITH STROKE
+0x21CF &nrArr; ISOamsa # RIGHTWARDS DOUBLE ARROW WITH STROKE
+0x21D0 &lArr; ISOtech # LEFTWARDS DOUBLE ARROW
+0x21D0 &xlArr; ISOamsa # LEFTWARDS DOUBLE ARROW
+0x21D1 &uArr; ISOamsa # UPWARDS DOUBLE ARROW
+0x21D2 &rArr; ISOtech # RIGHTWARDS DOUBLE ARROW
+0x21D2 &xrArr; ISOamsa # RIGHTWARDS DOUBLE ARROW
+0x21D3 &dArr; ISOamsa # DOWNWARDS DOUBLE ARROW
+0x21D4 &hArr; ISOamsa # LEFT RIGHT DOUBLE ARROW
+0x21D4 &iff; ISOtech # LEFT RIGHT DOUBLE ARROW
+0x21D5 &vArr; ISOamsa # UP DOWN DOUBLE ARROW
+0x21DA &lAarr; ISOamsa # LEFTWARDS TRIPLE ARROW
+0x21DB &rAarr; ISOamsa # RIGHTWARDS TRIPLE ARROW
+0x2200 &forall; ISOtech # FOR ALL
+0x2201 &comp; ISOamso # COMPLEMENT
+0x2202 &part; ISOtech # PARTIAL DIFFERENTIAL
+0x2203 &exist; ISOtech # THERE EXISTS
+0x2204 &nexist; ISOamso # THERE DOES NOT EXIST
+0x2205 &empty; ISOamso # EMPTY SET
+0x2207 &nabla; ISOtech # NABLA
+0x2208 &isin; ISOtech # ELEMENT OF
+0x2209 &notin; ISOtech # NOT AN ELEMENT OF
+0x220A &epsis; ISOgrk3 # SMALL ELEMENT OF
+0x220B &ni; ISOtech # CONTAINS AS MEMBER
+0x220D &bepsi; ISOamsr # SMALL CONTAINS AS MEMBER
+0x220F &prod; ISOamsb # N-ARY PRODUCT
+0x2210 &amalg; ISOamsb # N-ARY COPRODUCT
+0x2210 &coprod; ISOamsb # N-ARY COPRODUCT
+0x2210 &samalg; ISOamsr # N-ARY COPRODUCT
+0x2211 &sum; ISOamsb # N-ARY SUMMATION
+0x2212 &minus; ISOtech # MINUS SIGN
+0x2213 &mnplus; ISOtech # MINUS-OR-PLUS SIGN
+0x2214 &plusdo; ISOamsb # DOT PLUS
+0x2216 &setmn; ISOamsb # SET MINUS
+0x2216 &ssetmn; ISOamsb # SET MINUS
+0x2217 &lowast; ISOtech # ASTERISK OPERATOR
+0x2218 &compfn; ISOtech # RING OPERATOR
+0x221A &radic; ISOtech # SQUARE ROOT
+0x221D &prop; ISOtech # PROPORTIONAL TO
+0x221D &vprop; ISOamsr # PROPORTIONAL TO
+0x221E &infin; ISOtech # INFINITY
+0x221F &ang90; ISOtech # RIGHT ANGLE
+0x2220 &ang; ISOamso # ANGLE
+0x2221 &angmsd; ISOamso # MEASURED ANGLE
+0x2222 &angsph; ISOtech # SPHERICAL ANGLE
+0x2223 &mid; ISOamsr # DIVIDES
+0x2224 &nmid; ISOamsn # DOES NOT DIVIDE
+0x2225 &par; ISOtech # PARALLEL TO
+0x2225 &spar; ISOamsr # PARALLEL TO
+0x2226 &npar; ISOamsn # NOT PARALLEL TO
+0x2226 &nspar; ISOamsn # NOT PARALLEL TO
+0x2227 &and; ISOtech # LOGICAL AND
+0x2228 &or; ISOtech # LOGICAL OR
+0x2229 &cap; ISOtech # INTERSECTION
+0x222A &cup; ISOtech # UNION
+0x222B &int; ISOtech # INTEGRAL
+0x222E &conint; ISOtech # CONTOUR INTEGRAL
+0x2234 &there4; ISOtech # THEREFORE
+0x2235 &becaus; ISOtech # BECAUSE
+0x223C &sim; ISOtech # TILDE OPERATOR
+0x223C &thksim; ISOamsr # TILDE OPERATOR
+0x223D &bsim; ISOamsr # REVERSED TILDE
+0x2240 &wreath; ISOamsb # WREATH PRODUCT
+0x2241 &nsim; ISOamsn # NOT TILDE
+0x2243 &sime; ISOtech # ASYMPTOTICALLY EQUAL TO
+0x2244 &nsime; ISOamsn # NOT ASYMPTOTICALLY EQUAL TO
+0x2245 &cong; ISOtech # APPROXIMATELY EQUAL TO
+0x2247 &ncong; ISOamsn # NEITHER APPROXIMATELY NOR ACTUALLY EQUAL TO
+0x2248 &ap; ISOtech # ALMOST EQUAL TO
+0x2248 &asymp; ISOamsr # ALMOST EQUAL TO
+0x2248 &thkap; ISOamsr # ALMOST EQUAL TO
+0x2249 &nap; ISOamsn # NOT ALMOST EQUAL TO
+0x224A &ape; ISOamsr # ALMOST EQUAL OR EQUAL TO
+0x224C &bcong; ISOamsr # ALL EQUAL TO
+0x224E &bump; ISOamsr # GEOMETRICALLY EQUIVALENT TO
+0x224F &bumpe; ISOamsr # DIFFERENCE BETWEEN
+0x2250 &esdot; ISOamsr # APPROACHES THE LIMIT
+0x2251 &eDot; ISOamsr # GEOMETRICALLY EQUAL TO
+0x2252 &efDot; ISOamsr # APPROXIMATELY EQUAL TO OR THE IMAGE OF
+0x2253 &erDot; ISOamsr # IMAGE OF OR APPROXIMATELY EQUAL TO
+0x2254 &colone; ISOamsr # COLON EQUALS
+0x2255 &ecolon; ISOamsr # EQUALS COLON
+0x2256 &ecir; ISOamsr # RING IN EQUAL TO
+0x2257 &cire; ISOamsr # RING EQUAL TO
+0x2259 &wedgeq; ISOtech # ESTIMATES
+0x225C &trie; ISOamsr # DELTA EQUAL TO
+0x2260 &ne; ISOtech # NOT EQUAL TO
+0x2261 &equiv; ISOtech # IDENTICAL TO
+0x2262 &nequiv; ISOamsn # NOT IDENTICAL TO
+0x2264 &le; ISOtech # LESS-THAN OR EQUAL TO
+0x2264 &les; ISOamsr # LESS-THAN OR EQUAL TO
+0x2265 &ge; ISOtech # GREATER-THAN OR EQUAL TO
+0x2265 &ges; ISOamsr # GREATER-THAN OR EQUAL TO
+0x2266 &lE; ISOamsr # LESS-THAN OVER EQUAL TO
+0x2267 &gE; ISOamsr # GREATER-THAN OVER EQUAL TO
+0x2268 &lnE; ISOamsn # LESS-THAN BUT NOT EQUAL TO
+0x2268 &lne; ISOamsn # LESS-THAN BUT NOT EQUAL TO
+0x2268 &lvnE; ISOamsn # LESS-THAN BUT NOT EQUAL TO
+0x2269 &gnE; ISOamsn # GREATER-THAN BUT NOT EQUAL TO
+0x2269 &gne; ISOamsn # GREATER-THAN BUT NOT EQUAL TO
+0x2269 &gvnE; ISOamsn # GREATER-THAN BUT NOT EQUAL TO
+0x226A &Lt; ISOamsr # MUCH LESS-THAN
+0x226B &Gt; ISOamsr # MUCH GREATER-THAN
+0x226C &twixt; ISOamsr # BETWEEN
+0x226E &nlt; ISOamsn # NOT LESS-THAN
+0x226F &ngt; ISOamsn # NOT GREATER-THAN
+0x2270 &nle; ISOamsn # NEITHER LESS-THAN NOR EQUAL TO
+0x2270 &nles; ISOamsn # NEITHER LESS-THAN NOR EQUAL TO
+0x2271 &nge; ISOamsn # NEITHER GREATER-THAN NOR EQUAL TO
+0x2271 &nges; ISOamsn # NEITHER GREATER-THAN NOR EQUAL TO
+0x2272 &lsim; ISOamsr # LESS-THAN OR EQUIVALENT TO
+0x2273 &gsim; ISOamsr # GREATER-THAN OR EQUIVALENT TO
+0x2276 &lg; ISOamsr # LESS-THAN OR GREATER-THAN
+0x2277 &gl; ISOamsr # GREATER-THAN OR LESS-THAN
+0x227A &pr; ISOamsr # PRECEDES
+0x227B &sc; ISOamsr # SUCCEEDS
+0x227C &cupre; ISOamsr # PRECEDES OR EQUAL TO
+0x227C &pre; ISOamsr # PRECEDES OR EQUAL TO
+0x227D &sccue; ISOamsr # SUCCEEDS OR EQUAL TO
+0x227D &sce; ISOamsr # SUCCEEDS OR EQUAL TO
+0x227E &prsim; ISOamsr # PRECEDES OR EQUIVALENT TO
+0x227F &scsim; ISOamsr # SUCCEEDS OR EQUIVALENT TO
+0x2280 &npr; ISOamsn # DOES NOT PRECEDE
+0x2281 &nsc; ISOamsn # DOES NOT SUCCEED
+0x2282 &sub; ISOtech # SUBSET OF
+0x2283 &sup; ISOtech # SUPERSET OF
+0x2284 &nsub; ISOamsn # NOT A SUBSET OF
+0x2285 &nsup; ISOamsn # NOT A SUPERSET OF
+0x2286 &subE; ISOamsr # SUBSET OF OR EQUAL TO
+0x2286 &sube; ISOtech # SUBSET OF OR EQUAL TO
+0x2287 &supE; ISOamsr # SUPERSET OF OR EQUAL TO
+0x2287 &supe; ISOtech # SUPERSET OF OR EQUAL TO
+0x2288 &nsubE; ISOamsn # NEITHER A SUBSET OF NOR EQUAL TO
+0x2288 &nsube; ISOamsn # NEITHER A SUBSET OF NOR EQUAL TO
+0x2289 &nsupE; ISOamsn # NEITHER A SUPERSET OF NOR EQUAL TO
+0x2289 &nsupe; ISOamsn # NEITHER A SUPERSET OF NOR EQUAL TO
+0x228A &subnE; ISOamsn # SUBSET OF WITH NOT EQUAL TO
+0x228A &subne; ISOamsn # SUBSET OF WITH NOT EQUAL TO
+0x228A &vsubnE; ISOamsn # SUBSET OF WITH NOT EQUAL TO
+0x228A &vsubne; ISOamsn # SUBSET OF WITH NOT EQUAL TO
+0x228B &supnE; ISOamsn # SUPERSET OF WITH NOT EQUAL TO
+0x228B &supne; ISOamsn # SUPERSET OF WITH NOT EQUAL TO
+0x228B &vsupnE; ISOamsn # SUPERSET OF WITH NOT EQUAL TO
+0x228B &vsupne; ISOamsn # SUPERSET OF WITH NOT EQUAL TO
+0x228E &uplus; ISOamsb # MULTISET UNION
+0x228F &sqsub; ISOamsr # SQUARE IMAGE OF
+0x2290 &sqsup; ISOamsr # SQUARE ORIGINAL OF
+0x2291 &sqsube; ISOamsr # SQUARE IMAGE OF OR EQUAL TO
+0x2292 &sqsupe; ISOamsr # SQUARE ORIGINAL OF OR EQUAL TO
+0x2293 &sqcap; ISOamsb # SQUARE CAP
+0x2294 &sqcup; ISOamsb # SQUARE CUP
+0x2295 &oplus; ISOamsb # CIRCLED PLUS
+0x2296 &ominus; ISOamsb # CIRCLED MINUS
+0x2297 &otimes; ISOamsb # CIRCLED TIMES
+0x2298 &osol; ISOamsb # CIRCLED DIVISION SLASH
+0x2299 &odot; ISOamsb # CIRCLED DOT OPERATOR
+0x229A &ocir; ISOamsb # CIRCLED RING OPERATOR
+0x229B &oast; ISOamsb # CIRCLED ASTERISK OPERATOR
+0x229D &odash; ISOamsb # CIRCLED DASH
+0x229E &plusb; ISOamsb # SQUARED PLUS
+0x229F &minusb; ISOamsb # SQUARED MINUS
+0x22A0 &timesb; ISOamsb # SQUARED TIMES
+0x22A1 &sdotb; ISOamsb # SQUARED DOT OPERATOR
+0x22A2 &vdash; ISOamsr # RIGHT TACK
+0x22A3 &dashv; ISOamsr # LEFT TACK
+0x22A4 &top; ISOamsb # DOWN TACK
+0x22A5 &bottom; ISOtech # UP TACK
+0x22A5 &perp; ISOtech # UP TACK
+0x22A7 &models; ISOamsr # MODELS
+0x22A8 &vDash; ISOamsr # TRUE
+0x22A9 &Vdash; ISOamsr # FORCES
+0x22AA &Vvdash; ISOamsr # TRIPLE VERTICAL BAR RIGHT TURNSTILE
+0x22AC &nvdash; ISOamsn # DOES NOT PROVE
+0x22AD &nvDash; ISOamsn # NOT TRUE
+0x22AE &nVdash; ISOamsn # DOES NOT FORCE
+0x22AF &nVDash; ISOamsn # NEGATED DOUBLE VERTICAL BAR DOUBLE RIGHT
+0x22B2 &vltri; ISOamsr # NORMAL SUBGROUP OF
+0x22B3 &vrtri; ISOamsr # CONTAINS AS NORMAL SUBGROUP
+0x22B4 &ltrie; ISOamsr # NORMAL SUBGROUP OF OR EQUAL TO
+0x22B5 &rtrie; ISOamsr # CONTAINS AS NORMAL SUBGROUP OR EQUAL TO
+0x22B8 &mumap; ISOamsa # MULTIMAP
+0x22BA &intcal; ISOamsb # INTERCALATE
+0x22BB &veebar; ISOamsr # XOR
+0x22BC &barwed; ISOamsb # NAND
+0x22C4 &diam; ISOamsb # DIAMOND OPERATOR
+0x22C5 &sdot; ISOamsb # DOT OPERATOR
+0x22C6 &sstarf; ISOamsb # STAR OPERATOR
+0x22C7 &divonx; ISOamsb # DIVISION TIMES
+0x22C8 &bowtie; ISOamsr # BOWTIE
+0x22C9 &ltimes; ISOamsb # LEFT NORMAL FACTOR SEMIDIRECT PRODUCT
+0x22CA &rtimes; ISOamsb # RIGHT NORMAL FACTOR SEMIDIRECT PRODUCT
+0x22CB &lthree; ISOamsb # LEFT SEMIDIRECT PRODUCT
+0x22CC &rthree; ISOamsb # RIGHT SEMIDIRECT PRODUCT
+0x22CD &bsime; ISOamsr # REVERSED TILDE EQUALS
+0x22CE &cuvee; ISOamsb # CURLY LOGICAL OR
+0x22CF &cuwed; ISOamsb # CURLY LOGICAL AND
+0x22D0 &Sub; ISOamsr # DOUBLE SUBSET
+0x22D1 &Sup; ISOamsr # DOUBLE SUPERSET
+0x22D2 &Cap; ISOamsb # DOUBLE INTERSECTION
+0x22D3 &Cup; ISOamsb # DOUBLE UNION
+0x22D4 &fork; ISOamsr # PITCHFORK
+0x22D6 &ldot; ISOamsr # LESS-THAN WITH DOT
+0x22D7 &gsdot; ISOamsr # GREATER-THAN WITH DOT
+0x22D8 &Ll; ISOamsr # VERY MUCH LESS-THAN
+0x22D9 &Gg; ISOamsr # VERY MUCH GREATER-THAN
+0x22DA &leg; ISOamsr # LESS-THAN EQUAL TO OR GREATER-THAN
+0x22DB &gel; ISOamsr # GREATER-THAN EQUAL TO OR LESS-THAN
+0x22DC &els; ISOamsr # EQUAL TO OR LESS-THAN
+0x22DD &egs; ISOamsr # EQUAL TO OR GREATER-THAN
+0x22DE &cuepr; ISOamsr # EQUAL TO OR PRECEDES
+0x22DF &cuesc; ISOamsr # EQUAL TO OR SUCCEEDS
+0x22E0 &npre; ISOamsn # DOES NOT PRECEDE OR EQUAL
+0x22E1 &nsce; ISOamsn # DOES NOT SUCCEED OR EQUAL
+0x22E6 &lnsim; ISOamsn # LESS-THAN BUT NOT EQUIVALENT TO
+0x22E7 &gnsim; ISOamsn # GREATER-THAN BUT NOT EQUIVALENT TO
+0x22E8 &prnsim; ISOamsn # PRECEDES BUT NOT EQUIVALENT TO
+0x22E9 &scnsim; ISOamsn # SUCCEEDS BUT NOT EQUIVALENT TO
+0x22EA &nltri; ISOamsn # NOT NORMAL SUBGROUP OF
+0x22EB &nrtri; ISOamsn # DOES NOT CONTAIN AS NORMAL SUBGROUP
+0x22EC &nltrie; ISOamsn # NOT NORMAL SUBGROUP OF OR EQUAL TO
+0x22ED &nrtrie; ISOamsn # DOES NOT CONTAIN AS NORMAL SUBGROUP OR EQUAL
+0x22EE &vellip; ISOpub # VERTICAL ELLIPSIS
+0x2306 &Barwed; ISOamsb # PERSPECTIVE
+0x2308 &lceil; ISOamsc # LEFT CEILING
+0x2309 &rceil; ISOamsc # RIGHT CEILING
+0x230A &lfloor; ISOamsc # LEFT FLOOR
+0x230B &rfloor; ISOamsc # RIGHT FLOOR
+0x230C &drcrop; ISOpub # BOTTOM RIGHT CROP
+0x230D &dlcrop; ISOpub # BOTTOM LEFT CROP
+0x230E &urcrop; ISOpub # TOP RIGHT CROP
+0x230F &ulcrop; ISOpub # TOP LEFT CROP
+0x2315 &telrec; ISOpub # TELEPHONE RECORDER
+0x2316 &target; ISOpub # POSITION INDICATOR
+0x231C &ulcorn; ISOamsc # TOP LEFT CORNER
+0x231D &urcorn; ISOamsc # TOP RIGHT CORNER
+0x231E &dlcorn; ISOamsc # BOTTOM LEFT CORNER
+0x231F &drcorn; ISOamsc # BOTTOM RIGHT CORNER
+0x2322 &frown; ISOamsr # FROWN
+0x2322 &sfrown; ISOamsr # FROWN
+0x2323 &smile; ISOamsr # SMILE
+0x2323 &ssmile; ISOamsr # SMILE
+0x2329 &lang; ISOtech # LEFT-POINTING ANGLE BRACKET
+0x232A &rang; ISOtech # RIGHT-POINTING ANGLE BRACKET
+0x2423 &blank; ISOpub # OPEN BOX
+0x24C8 &oS; ISOamso # CIRCLED LATIN CAPITAL LETTER S
+0x2500 &boxh; ISObox # BOX DRAWINGS LIGHT HORIZONTAL
+0x2502 &boxv; ISObox # BOX DRAWINGS LIGHT VERTICAL
+0x250C &boxdr; ISObox # BOX DRAWINGS LIGHT DOWN AND RIGHT
+0x2510 &boxdl; ISObox # BOX DRAWINGS LIGHT DOWN AND LEFT
+0x2514 &boxur; ISObox # BOX DRAWINGS LIGHT UP AND RIGHT
+0x2518 &boxul; ISObox # BOX DRAWINGS LIGHT UP AND LEFT
+0x251C &boxvr; ISObox # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
+0x2524 &boxvl; ISObox # BOX DRAWINGS LIGHT VERTICAL AND LEFT
+0x252C &boxhd; ISObox # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
+0x2534 &boxhu; ISObox # BOX DRAWINGS LIGHT UP AND HORIZONTAL
+0x253C &boxvh; ISObox # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
+0x2550 &boxH; ISObox # BOX DRAWINGS DOUBLE HORIZONTAL
+0x2551 &boxV; ISObox # BOX DRAWINGS DOUBLE VERTICAL
+0x2552 &boxdR; ISObox # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
+0x2553 &boxDr; ISObox # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
+0x2554 &boxDR; ISObox # BOX DRAWINGS DOUBLE DOWN AND RIGHT
+0x2555 &boxdL; ISObox # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
+0x2556 &boxDl; ISObox # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
+0x2557 &boxDL; ISObox # BOX DRAWINGS DOUBLE DOWN AND LEFT
+0x2558 &boxuR; ISObox # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
+0x2559 &boxUr; ISObox # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
+0x255A &boxUR; ISObox # BOX DRAWINGS DOUBLE UP AND RIGHT
+0x255B &boxuL; ISObox # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
+0x255C &boxUl; ISObox # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
+0x255D &boxUL; ISObox # BOX DRAWINGS DOUBLE UP AND LEFT
+0x255E &boxvR; ISObox # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
+0x255F &boxVr; ISObox # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
+0x2560 &boxVR; ISObox # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
+0x2561 &boxvL; ISObox # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
+0x2562 &boxVl; ISObox # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
+0x2563 &boxVL; ISObox # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
+0x2564 &boxHd; ISObox # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
+0x2565 &boxhD; ISObox # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
+0x2566 &boxHD; ISObox # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
+0x2567 &boxHu; ISObox # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
+0x2568 &boxhU; ISObox # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
+0x2569 &boxHU; ISObox # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
+0x256A &boxvH; ISObox # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
+0x256B &boxVh; ISObox # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
+0x256C &boxVH; ISObox # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
+0x2580 &uhblk; ISOpub # UPPER HALF BLOCK
+0x2584 &lhblk; ISOpub # LOWER HALF BLOCK
+0x2588 &block; ISOpub # FULL BLOCK
+0x2591 &blk14; ISOpub # LIGHT SHADE
+0x2592 &blk12; ISOpub # MEDIUM SHADE
+0x2593 &blk34; ISOpub # DARK SHADE
+0x25A1 &squ; ISOpub # WHITE SQUARE
+0x25A1 &square; ISOtech # WHITE SQUARE
+0x25AA &squf; ISOpub # BLACK SMALL SQUARE
+0x25AD &rect; ISOpub # WHITE RECTANGLE
+0x25AE &marker; ISOpub # BLACK VERTICAL RECTANGLE
+0x25B3 &xutri; ISOamsb # WHITE UP-POINTING TRIANGLE
+0x25B4 &utrif; ISOpub # BLACK UP-POINTING SMALL TRIANGLE
+0x25B5 &utri; ISOpub # WHITE UP-POINTING SMALL TRIANGLE
+0x25B8 &rtrif; ISOpub # BLACK RIGHT-POINTING SMALL TRIANGLE
+0x25B9 &rtri; ISOpub # WHITE RIGHT-POINTING SMALL TRIANGLE
+0x25BD &xdtri; ISOamsb # WHITE DOWN-POINTING TRIANGLE
+0x25BE &dtrif; ISOpub # BLACK DOWN-POINTING SMALL TRIANGLE
+0x25BF &dtri; ISOpub # WHITE DOWN-POINTING SMALL TRIANGLE
+0x25C2 &ltrif; ISOpub # BLACK LEFT-POINTING SMALL TRIANGLE
+0x25C3 &ltri; ISOpub # WHITE LEFT-POINTING SMALL TRIANGLE
+0x25CA &loz; ISOpub # LOZENGE
+0x25CB &cir; ISOpub # WHITE CIRCLE
+0x25CB &xcirc; ISOamsb # WHITE CIRCLE
+0x2605 &starf; ISOpub # BLACK STAR
+0x2606 &star; ISOpub # WHITE STAR
+0x260E &phone; ISOpub # BLACK TELEPHONE
+0x2640 &female; ISOpub # FEMALE SIGN
+0x2642 &male; ISOpub # MALE SIGN
+0x2660 &spades; ISOpub # BLACK SPADE SUIT
+0x2663 &clubs; ISOpub # BLACK CLUB SUIT
+0x2665 &hearts; ISOpub # BLACK HEART SUIT
+0x2666 &diams; ISOpub # BLACK DIAMOND SUIT
+0x266A &sung; ISOnum # EIGHTH NOTE
+0x266D &flat; ISOpub # MUSIC FLAT SIGN
+0x266E &natur; ISOpub # MUSIC NATURAL SIGN
+0x266F &sharp; ISOpub # MUSIC SHARP SIGN
+0x2713 &check; ISOpub # CHECK MARK
+0x2717 &cross; ISOpub # BALLOT X
+0x2720 &malt; ISOpub # MALTESE CROSS
+0x2726 &lozf; ISOpub # BLACK FOUR POINTED STAR
+<!-- 0x2727 &loz; ISOpub # WHITE FOUR POINTED STAR -->
+0x2736 &sext; ISOpub # SIX POINTED BLACK STAR
+0x???? &epsiv; ISOgrk3 # variant epsilon
+0x???? &fjlig; ISOpub # fj ligature
+0x???? &gEl; ISOamsr # greater-than, double equals, less-than
+0x???? &gap; ISOamsr # greater-than, approximately equal to
+0x???? &gnap; ISOamsn # greater-than, not approximately equal to
+0x???? &jnodot; ISOamso # latin small letter dotless j
+0x???? &lEg; ISOamsr # less-than, double equals, greater-than
+0x???? &lap; ISOamsr # less-than, approximately equal to
+0x???? &lnap; ISOamsn # less-than, not approximately equal to
+0x???? &lpargt; ISOamsc # left parenthesis, greater-than
+0x???? &ngE; ISOamsn # not greater-than, double equals
+0x???? &nlE; ISOamsn # not less-than, double equals
+0x???? &nsmid; ISOamsn # nshortmid
+0x???? &prap; ISOamsr # precedes, approximately equal to
+0x???? &prnE; ISOamsn # precedes, not double equal
+0x???? &prnap; ISOamsn # precedes, not approximately equal to
+0x???? &rpargt; ISOamsc # right parenthesis, greater-than
+0x???? &scap; ISOamsr # succeeds, approximately equal to
+0x???? &scnE; ISOamsn # succeeds, not double equals
+0x???? &scnap; ISOamsn # succeeds, not approximately equal to
+0x???? &smid; ISOamsr # shortmid
+0xFB00 &fflig; ISOpub # LATIN SMALL LIGATURE FF
+0xFB01 &filig; ISOpub # LATIN SMALL LIGATURE FI
+0xFB02 &fllig; ISOpub # LATIN SMALL LIGATURE FL
+0xFB03 &ffilig; ISOpub # LATIN SMALL LIGATURE FFI
+0xFB04 &ffllig; ISOpub # LATIN SMALL LIGATURE FFL
+
+</PRE>
+</BODY>
+</HTML>
diff --git a/test/spaces.html b/test/spaces.html
new file mode 100644
index 0000000..c3076d9
--- /dev/null
+++ b/test/spaces.html
@@ -0,0 +1,38 @@
+<!DOCTYPE html PUBLIC "-//IETF//DTD HTML 3.0//EN">
+<HTML>
+<HEAD>
+<TITLE> Test of some symbols </TITLE>
+ <meta name="viewport" content="width=device-width, initial-scale=1">
+</HEAD>
+<BODY>
+<!-- Multiple spaces are normally collapsed unless we are in a <PRE> mode
+or use "special" spaces like &nbsp; or &emsp; - try playing around this page
+by adding more spaces inside brackets or using <PRE>.
+-->
+<!-- PRE -->
+
+You may press '\' to view the source of this test<br>
+<em>UNICODE NCR alt-NCR named alt-named</em><br>
+<p>
+0x2000 [&#x2000;] <IMG SRC=X ALT="[&#x2000;]"> # EN QUAD<br>
+0x2001 [&#x2001;] <IMG SRC=X ALT="[&#x2001;]"> # EM QUAD<br>
+0x2002 [&#x2002;] <IMG SRC=X ALT="[&#x2002;]"> [&ensp;] <IMG SRC=X ALT="[&ensp;]"> # EN SPACE<br>
+0x2003 [&#x2003;] <IMG SRC=X ALT="[&#x2003;]"> [&emsp;] <IMG SRC=X ALT="[&emsp;]"> # EM SPACE<br>
+0x2004 [&#x2004;] <IMG SRC=X ALT="[&#x2004;]"> [&emsp13;] <IMG SRC=X ALT="[&emsp13;]"> # THREE-PER-EM SPACE<br>
+0x2005 [&#x2005;] <IMG SRC=X ALT="[&#x2005;]"> [&emsp14;] <IMG SRC=X ALT="[&emsp14;]"> # FOUR-PER-EM SPACE<br>
+0x2007 [&#x2007;] <IMG SRC=X ALT="[&#x2007;]"> [&numsp;] <IMG SRC=X ALT="[&numsp;]"> # FIGURE SPACE<br>
+0x2008 [&#x2008;] <IMG SRC=X ALT="[&#x2008;]"> [&puncsp;] <IMG SRC=X ALT="[&puncsp;]"> # PUNCTUATION SPACE<br>
+0x2009 [&#x2009;] <IMG SRC=X ALT="[&#x2009;]"> [&thinsp;] <IMG SRC=X ALT="[&thinsp;]"> # THIN SPACE<br>
+0x200A [&#x200A;] <IMG SRC=X ALT="[&#x200A;]"> [&hairsp;] <IMG SRC=X ALT="[&hairsp;]"> # HAIR SPACE<br>
+0x200C [&#x200C;] <IMG SRC=X ALT="[&#x200C;]"> [&zwnj;] <IMG SRC=X ALT="[&zwnj;]"> # ZERO WIDTH NON-JOINER<br>
+0x200D [&#x200D;] <IMG SRC=X ALT="[&#x200D;]"> [&zwj;] <IMG SRC=X ALT="[&zwj;]"> # ZERO WIDTH JOINER<br>
+0x200E [&#x200E;] <IMG SRC=X ALT="[&#x200E;]"> [&lrm;] <IMG SRC=X ALT="[&lrm;]"> # LEFT-TO-RIGHT MARK<br>
+0x200F [&#x200F;] <IMG SRC=X ALT="[&#x200F;]"> [&rlm;] <IMG SRC=X ALT="[&rlm;]"> # RIGHT-TO-LEFT MARK<br>
+0x2010 [&#x2010;] <IMG SRC=X ALT="[&#x2010;]"> [&dash;] <IMG SRC=X ALT="[&dash;]"> # HYPHEN<br>
+0x2013 [&#x2013;] <IMG SRC=X ALT="[&#x2013;]"> [&ndash;] <IMG SRC=X ALT="[&ndash;]"> # EN DASH<br>
+0x2014 [&#x2014;] <IMG SRC=X ALT="[&#x2014;]"> [&mdash;] <IMG SRC=X ALT="[&mdash;]"> # EM DASH<br>
+
+
+</PRE>
+</BODY>
+</HTML>
diff --git a/test/special_urls.html b/test/special_urls.html
new file mode 100644
index 0000000..96e789e
--- /dev/null
+++ b/test/special_urls.html
@@ -0,0 +1,23 @@
+<html>
+<head>
+<title>Lynx Special URLs</title>
+<link rev="made" href="mailto:WebMaster@foo.blah.dom">
+ <meta name="viewport" content="width=device-width, initial-scale=1">
+</head>
+<body>
+<h1>Lynx Special URLs</h1>
+<dl compact>
+<dd>LYNXCFG:<a href="LYNXCFG:">LYNXCFG (ok)</a>
+<dd>LYNXCOMPILEOPTS:<a href="LYNXCOMPILEOPTS:">LYNXCOMPILEOPTS (ok)</a>
+<dd>LYNXCOOKIE:<a href="LYNXCOOKIE:">LYNXCOOKIE is not allowed</a>
+<dd>LYNXDIRED:<a href="LYNXDIRED:">LYNXDIRED is not allowed</a>
+<dd>LYNXDOWNLOAD:<a href="LYNXDOWNLOAD:">LYNXDOWNLOAD is not allowed</a>
+<dd>LYNXHIST:<a href="LYNXHIST:">LYNXHIST is not allowed</a>
+<dd>LYNXIMGMAP:<a href="LYNXIMGMAP:">LYNXIMGMAP is not allowed</a>
+<dd>LYNXKEYMAP:<a href="LYNXKEYMAP:">LYNXKEYMAP (ok)</a>
+<dd>LYNXMESSAGES:<a href="LYNXMESSAGES:">LYNXMESSAGES (ok)</a>
+<dd>LYNXOPTIONS:<a href="LYNXOPTIONS:">LYNXOPTIONS (ok)</a>
+<dd>LYNXPRINT:<a href="LYNXPRINT:">LYNXPRINT is not allowed</a>
+</dl>
+</body>
+</html>
diff --git a/test/square.html b/test/square.html
new file mode 100644
index 0000000..5354dce
--- /dev/null
+++ b/test/square.html
@@ -0,0 +1,15 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN">
+
+<html>
+<head>
+ <meta name="generator" content=
+ "HTML Tidy for Linux (vers 25 March 2009), see www.w3.org">
+
+ <title>Test ImageMap - square</title>
+ <meta name="viewport" content="width=device-width, initial-scale=1">
+</head>
+
+<body>
+ <p>SQUARE</p>
+</body>
+</html>
diff --git a/test/tabtest.html b/test/tabtest.html
new file mode 100644
index 0000000..73d62eb
--- /dev/null
+++ b/test/tabtest.html
@@ -0,0 +1,40 @@
+<!DOCTYPE HTML PUBLIC "-//W3O//DTD W3 HTML 3.0//EN">
+<html>
+<head>
+<title>Tests of TAB element.</title>
+<link rev="made" href="mailto:lynx-dev@nongnu.org">
+ <meta name="viewport" content="width=device-width, initial-scale=1">
+</head>
+
+<body>
+<h1>Tests of TAB element.</h1>
+
+<TAB INDENT="16" ID="t0"><em>Normal Style:</em><br>
+One<TAB INDENT="26" ID="t1">Two<TAB INDENT="44" ID="t2">Three
+<TAB INDENT="62" ID="t3">Four<TAB INDENT="80" ID="t4">Five
+<TAB INDENT="98" ID="t5">Six<TAB INDENT="116" ID="t6">Seven
+<TAB INDENT="132" ID="t7">Eight<br>
+1.<TAB TO="t1">2.<TAB TO="t2">3.<TAB TO="t3">4.<TAB TO="t4">5.
+<TAB TO="t5">6.<TAB TO="t6">7.<TAB TO="t7">8.<br>
+i.<TAB TO="t1">ii.<TAB TO="t2">iii.<TAB TO="t3">iv.<TAB TO="t4">v.
+<TAB TO="t5">vi.<TAB TO="t6">vii.<TAB TO="t7">viii.
+
+<p><pre><TAB TO="t0"><em>In PRE block:</em>
+One<TAB TO="t1">Two<TAB TO="t3">Three<TAB TO="t5">Four<TAB TO="t7">Five
+1.<TAB TO="t1">2.<TAB TO="t3">3.<TAB TO="t5">4.<TAB TO="t7">5.
+i.<TAB TO="t1">ii.<TAB TO="t3">iii.<TAB TO="t5">iv.<TAB TO="t7">v.
+</pre>
+
+<bq>
+<TAB TO="t0"><em>In BQ block:</em><br>
+One<TAB TO="t2">Two<TAB TO="t4">Three<TAB TO="t6">Four<br>
+1.<TAB TO="t2">2.<TAB TO="t4">3.<TAB TO="t6">4.<br>
+i.<TAB TO="t2">ii.<TAB TO="t4">iii.<TAB TO="t6">iv.
+</bq>
+
+<p><b>noct<TAB ID="tn">ambulant</b> - walking at night<br>
+<TAB TO="tn">(from Latin: <i>nox noctis</i> night + <i>ambulare</i> walk)
+<pre>|<TAB INDENT="78">|<TAB INDENT="156">|
+0<TAB INDENT="76">80<TAB INDENT="152">158</pre>
+</body>
+</html>
diff --git a/test/tags.html b/test/tags.html
new file mode 100644
index 0000000..697e161
--- /dev/null
+++ b/test/tags.html
@@ -0,0 +1,220 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
+<HTML>
+<HEAD>
+<TITLE>Tags to Test Color-Style</TITLE>
+<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=iso-8859-1">
+<link href="nobody" rev="made">
+ <meta name="viewport" content="width=device-width, initial-scale=1">
+</HEAD>
+
+<BODY alink="green" bgcolor="yellow">
+<!-- ====================================================================== -->
+<br>
+<h1>Content of an H1 Tag</h1>
+Text after an H1 Tag.
+<p>Paragraph after an H1 Tag.
+<br>
+<h2>Content of an H2 Tag</h2>
+Text after an H2 Tag.
+<p>Paragraph after an H2 Tag.
+<br>
+<h3>Content of an H3 Tag</h3>
+Text after an H3 Tag.
+<p>Paragraph after an H3 Tag.
+<br>
+<h4>Content of an H4 Tag</h4>
+Text after an H4 Tag.
+<p>Paragraph after an H4 Tag.
+<br>
+<h5>Content of an H5 Tag</h5>
+Text after an H5 Tag.
+<p>Paragraph after an H5 Tag.
+<br>
+<h6>Content of an H6 Tag</h6>
+Text after an H6 Tag.
+<p>Paragraph after an H6 Tag.
+<!-- ====================================================================== -->
+This is an <a href="#imagemap">"a"</a> tag.
+<br>
+This is an <address>"address"</address> tag.
+<br>
+This is a <b>"b"</b> tag.
+<br>
+This is a <big>"big"</big> tag.
+<br>
+Before quote, <blockquote>this is a "blockquote"</blockquote>, after quote.
+<br>
+This is a <center>"center"</center> tag.
+<br>
+This is a <cite>"cite"</cite> tag.
+<br>
+This is a <code>"code"</code> tag.
+<br>
+This is a <div>div</div> tag.
+<br>
+This is an <em>"em"</em> tag.
+<br>
+This is a <font>"font"</font> tag.
+<!-- ====================================================================== -->
+<br>
+This is an <hr>"hr"<hr> tag.
+<br>
+This is an <i>"i"</i> tag.
+<br>
+This is an <iframe>"iframe"</iframe> tag.
+<br>
+This is an <img alt="img" src="image.jpg"> tag.
+<br>
+This is an <label>"label"</label> tag.
+<br>
+map: normal: lightgray: blue
+<br>
+<pre>
+This is
+pre-formatted
+text (three lines, with pre's on preceding/following lines).
+</pre>
+<br>
+This is a <q>"q"</q>tag.
+<br>
+This is a <samp>"samp"</samp> tag.
+<br>
+This is a <small>"small"</small> tag.
+<br>
+This is a <strong>"strong"</strong> tag.
+<br>
+This is a <sub>"sub"</sub> tag.
+<br>
+This is a <sup>"sup"</sup> tag.
+<br>
+This is a <tt>"tt"</tt> tag.
+<br>
+This is a <var>"var"</var> tag.
+<!-- ====================================================================== -->
+<h1>Forms</h1>
+<hr>
+<form action="http://localhost/cgi-bin/bogus-parms" method="get">
+First: <input type="text" name="First" size=20>
+Last: <input type="text" name="Last" size=20>
+Description: <textarea rows=3 cols=40>
+contents of textarea
+</textarea>
+<hr>
+<input type="submit" value="Submit this form">
+<br>
+<input type="reset" value="Reset this form">
+</form>
+
+<h1 align="left">Another form</h1>
+<hr>
+<form action="http://localhost/cgi-bin/bogus-parms" method="get">
+<hr>
+<input type="checkbox" value="first">first
+<br><input type="checkbox" value="second">second
+<br><input type="checkbox" value="third">third
+<br><input type="checkbox" value="">empty
+<hr>
+<input type="submit" value="done">done
+</form>
+
+<h1 align="right">Another form</h1>
+<hr>
+<form action="http://localhost/cgi-bin/bogus-parms" method="get">
+<select>
+<option>first option</option>
+<option>second option</option>
+<option>third option</option>
+</select>
+<hr>
+<input type="submit" value="Submit this form">
+<br>
+<input type="reset" value="Reset this form">
+</form>
+<!-- ====================================================================== -->
+<table border=2 summary="unquoted table">
+<caption>Unquoted Table</caption>
+<tr>
+<td>First:</td>
+<td>the first row</td>
+<td>short</td>
+<td>last</td></tr>
+<tr>
+<td>Second:</td>
+<td>the second row</td>
+<td>very long string</td>
+<td>lower-right</td></tr>
+</table>
+<!-- ====================================================================== -->
+<blockquote><table border=2 summary="quoted table">
+<caption>Quoted Table</caption>
+<tr>
+<td>First:</td>
+<td>the first row</td>
+<td>very long string</td>
+<td>last</td></tr>
+<tr>
+<td>Second:</td>
+<td>the second row</td>
+<td>short</td>
+<td>lower-right</td></tr>
+</table></blockquote>
+<!-- ====================================================================== -->
+<br>
+<h1>An image map</h1>
+<map name="IMAGEMAP">
+<area alt="Square" shape="rect" coords="18,18,82,80" href="square.html">
+<area alt="Circle" shape="circle" coords="127,48,31" href="circle.html">
+<area alt="Triangle" shape="poly" coords="232,78,303,78,263,14,232,76"
+ href="triangle.html">
+ </map>
+<!-- ====================================================================== -->
+<br>
+<h1>Definition List</h1>
+This is an definition list:
+<dl>
+<dt>the first dt
+<dd>the first dd
+<dt>the second dt
+<dd>the second dd
+<dl>
+<dt>the first dt
+<dd>the first dd
+<dt>the second dt
+<dd>the second dd
+<dt>the third dt
+<dd>the third dd
+</dl>
+<dt>the third dt
+<dd>the third dd
+</dl>
+<!-- ====================================================================== -->
+<br>
+<h1>Unordered List</h1>
+This is an unordered list:
+<ul>
+<li>first item
+<li>second item
+<ul>
+<li>first item
+<li>second item
+<li>third item
+</ul>
+<li>third item
+</ul>
+<!-- ====================================================================== -->
+<br>
+<h1>Ordered List</h1>
+This is an ordered list:
+<ol>
+<li>first item
+<li>second item
+<ol>
+<li>first item
+<li>second item
+<li>third item
+</ol>
+<li>third item
+</ol>
+
+</BODY>
+</HTML>
diff --git a/test/test-styles.html b/test/test-styles.html
new file mode 100644
index 0000000..2cae284
--- /dev/null
+++ b/test/test-styles.html
@@ -0,0 +1,107 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
+<HTML>
+<HEAD>
+<TITLE>Test Color-Styles</TITLE>
+ <meta name="viewport" content="width=device-width, initial-scale=1">
+</HEAD>
+<BODY>
+<h1>Heading 1</h1>
+<h2>Heading 2</h2>
+<h3>Heading 3</h3>
+<h4>Heading 4</h4>
+<h5>Heading 5</h5>
+<h6>Heading 6</h6>
+
+<h1>Heading 1 - Ordinary Text</h1>
+<h2><a name="fontlike_text">Heading 2 - <b>Fontlike</b> Text</a></h2>
+<!-- STYLE,BR,TAB -->
+This is <b>b (bold)</b>.
+<br>
+This is <big>big</big>.
+<br>
+This is <blink>blink</blink>.
+<br>
+This is <i>i (italicized)</i>.
+<br>
+This is <small>small</small>.
+<br>
+This is <strike>strike</strike>.
+<br>
+This is <tt>tt (typewriter)</tt>.
+<br>
+This is <u>u (underlined)</u>.
+
+<h2><a name="emphasized_text">Heading 2 - <em>Emphasized</em> Text</a></h2>
+This is <cite>cite (citation)</cite>.
+<br>
+This is <code>code</code>.
+<br>
+This is <del>del</del>.
+<br>
+This is <dfn>dfn (definition)</dfn>.
+<br>
+This is <em>emphasized</em>.
+<br>
+This is <ins>ins</ins>.
+<br>
+This is <kbd>kbd (keyboard)</kbd>.
+<br>
+This is <q>q (quoted)</q>.
+<br>
+This is <samp>samp (sample)</samp>.
+<br>
+This is <span>span</span>.
+<br>
+This is <strong>strong</strong>.
+<br>
+This is <var>var</var>.
+
+<h1>Heading 1 - Ordinary Links</h1>
+<a href="#fontlike_text">This is a link</a> to fontlike text.
+<br>
+<a href="#emphasized_text">This is a link</a> to emphasized text.
+
+<h1>Heading 1 - Emphasized Links</h1>
+<h2><a name="fontlike_links">Heading 2 - <b>Fontlike</b> Links</a></h2>
+<br>
+This is <a href="#fontlike_text"><b>b (bold)</b> link</a>.
+<br>
+This is <a href="#fontlike_text"><big>big</big> link</a>.
+<br>
+This is <a href="#fontlike_text"><blink>blink</blink> link</a>.
+<br>
+This is <a href="#fontlike_text"><i>i (italicized)</i> link</a>.
+<br>
+This is <a href="#fontlike_text"><small>small</small> link</a>.
+<br>
+This is <a href="#fontlike_text"><strike>strike</strike> link</a>.
+<br>
+This is <a href="#fontlike_text"><tt>tt (typewriter)</tt> link</a>.
+<br>
+This is <a href="#fontlike_text"><u>u (underlined)</u> link</a>.
+
+<h2><a name="emphasized_links">Heading 2 - <b>Emphasized</b> Links</a></h2>
+This is <a href="#emphasized_text"><cite>cite (citation)</cite> link</a>.
+<br>
+This is <a href="#emphasized_text"><code>code</code> link</a>.
+<br>
+This is <a href="#emphasized_text"><del>del</del> link</a>.
+<br>
+This is <a href="#emphasized_text"><dfn>dfn (definition)</dfn> link</a>.
+<br>
+This is <a href="#emphasized_text"><em>emphasized</em> link</a>.
+<br>
+This is <a href="#emphasized_text"><ins>ins</ins> link</a>.
+<br>
+This is <a href="#emphasized_text"><kbd>kbd (keyboard)</kbd> link</a>.
+<br>
+This is <a href="#emphasized_text"><q>q (quoted)</q> link</a>.
+<br>
+This is <a href="#emphasized_text"><samp>samp (sample)</samp> link</a>.
+<br>
+This is <a href="#emphasized_text"><span>span</span> link</a>.
+<br>
+This is <a href="#emphasized_text"><strong>strong</strong> link</a>.
+<br>
+This is <a href="#emphasized_text"><var>var</var> link</a>.
+</BODY>
diff --git a/test/triangle.html b/test/triangle.html
new file mode 100644
index 0000000..5dcf628
--- /dev/null
+++ b/test/triangle.html
@@ -0,0 +1,15 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN">
+
+<html>
+<head>
+ <meta name="generator" content=
+ "HTML Tidy for Linux (vers 25 March 2009), see www.w3.org">
+
+ <title>Test ImageMap - triangle</title>
+ <meta name="viewport" content="width=device-width, initial-scale=1">
+</head>
+
+<body>
+ <p>TRIANGLE</p>
+</body>
+</html>
diff --git a/test/unicode.html b/test/unicode.html
new file mode 100644
index 0000000..903ac61
--- /dev/null
+++ b/test/unicode.html
@@ -0,0 +1,916 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
+<HTML>
+<HEAD>
+<TITLE>Test of some Unicode symbols in numeric character reference form</TITLE>
+ <meta name="viewport" content="width=device-width, initial-scale=1">
+</HEAD>
+<BODY>
+<PRE>
+
+ This table prepared from SGML.TXT available at ftp.unicode.org
+
+ ftp://ftp.unicode.org/MAPPINGS/VENDORS/MISC/SGML.TXT
+ (if doing ftp, try cd Public/MAPPINGS/VENDORS/MISC)
+
+
+original comment:
+
+# Author: John Cowan &lt;cowan@ccil.org&gt;
+# Date: 25 July 1997
+#
+# The following table maps SGML character entities from various
+# public sets (namely, ISOamsa, ISOamsb, ISOamsc, ISOamsn, ISOamso,
+# ISOamsr, ISObox, ISOcyr1, ISOcyr2, ISOdia, ISOgrk1, ISOgrk2,
+# ISOgrk3, ISOgrk4, ISOlat1, ISOlat2, ISOnum, ISOpub, ISOtech,
+# HTMLspecial, HTMLsymbol) to corresponding Unicode characters.
+#
+# The table has four tab-separated columns:
+# Column 1: SGML character entity name
+# Column 2: SGML public entity set
+# Column 3: Unicode 2.0 character code
+# Column 4: Unicode 2.0 character name (UPPER CASE)
+# Entries which don't have Unicode equivalents have "0x????"
+# in Column 3 and a lower case description (from the public entity
+# set DTD) in Column 4. The mapping is not reversible, because many
+# distinctions are unified away in Unicode, particularly between
+# mathematical symbols.
+#
+# The table is sorted case-blind by SGML character entity name.
+#
+# The contents of this table are drawn from various sources, and
+# are in the public domain.
+#
+<!-- Changes:
++ {"euro", 0x20AC}, /* EURO SIGN */
+
+-->
+
+This test is illuminated Unicode numeric entities like &amp;#x22AB;
+We sort the entities according to unicode numbers.
+You should see visible characters if your display character set support them
+or some substitution string picked up from src/chrtrans/def7_uni.tbl
+
+If you see something like &amp;#x34D2; - this number unknown to def7_uni.tbl
+or the internal browser's implementation is broken.
+ Leonid Pauzner.
+
+
+
+
+0x0021 &#x0021; # EXCLAMATION MARK
+0x0022 &#x0022; # QUOTATION MARK
+0x0023 &#x0023; # NUMBER SIGN
+0x0024 &#x0024; # DOLLAR SIGN
+0x0025 &#x0025; # PERCENT SIGN
+0x0026 &#x0026; # AMPERSAND
+0x0028 &#x0028; # LEFT PARENTHESIS
+0x0029 &#x0029; # RIGHT PARENTHESIS
+0x002A &#x002A; # ASTERISK
+0x002B &#x002B; # PLUS SIGN
+0x002C &#x002C; # COMMA
+0x002D &#x002D; # HYPHEN-MINUS
+0x002E &#x002E; # FULL STOP
+0x002F &#x002F; # SOLIDUS
+0x003A &#x003A; # COLON
+0x003B &#x003B; # SEMICOLON
+0x003C &#x003C; # LESS-THAN SIGN
+0x003D &#x003D; # EQUALS SIGN
+0x003E &#x003E; # GREATER-THAN SIGN
+0x003F &#x003F; # QUESTION MARK
+0x0040 &#x0040; # COMMERCIAL AT
+0x005B &#x005B; # LEFT SQUARE BRACKET
+0x005C &#x005C; # REVERSE SOLIDUS
+0x005C &#x005C; # REVERSE SOLIDUS
+0x005D &#x005D; # RIGHT SQUARE BRACKET
+0x005F &#x005F; # LOW LINE
+0x0060 &#x0060; # GRAVE ACCENT
+0x007B &#x007B; # LEFT CURLY BRACKET
+0x007C &#x007C; # VERTICAL LINE
+0x007D &#x007D; # RIGHT CURLY BRACKET
+0x00A0 &#x00A0; # NO-BREAK SPACE
+0x00A1 &#x00A1; # INVERTED EXCLAMATION MARK
+0x00A2 &#x00A2; # CENT SIGN
+0x00A3 &#x00A3; # POUND SIGN
+0x00A4 &#x00A4; # CURRENCY SIGN
+0x00A5 &#x00A5; # YEN SIGN
+0x00A6 &#x00A6; # BROKEN BAR
+0x00A7 &#x00A7; # SECTION SIGN
+0x00A8 &#x00A8; # DIAERESIS
+0x00A9 &#x00A9; # COPYRIGHT SIGN
+0x00AA &#x00AA; # FEMININE ORDINAL INDICATOR
+0x00AB &#x00AB; # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+0x00AC &#x00AC; # NOT SIGN
+0x00AD &#x00AD; # SOFT HYPHEN
+0x00AE &#x00AE; # REGISTERED SIGN
+0x00AF &#x00AF; # MACRON
+0x00B0 &#x00B0; # DEGREE SIGN
+0x00B1 &#x00B1; # PLUS-MINUS SIGN
+0x00B2 &#x00B2; # SUPERSCRIPT TWO
+0x00B3 &#x00B3; # SUPERSCRIPT THREE
+0x00B4 &#x00B4; # ACUTE ACCENT
+0x00B5 &#x00B5; # MICRO SIGN
+0x00B6 &#x00B6; # PILCROW SIGN
+0x00B7 &#x00B7; # MIDDLE DOT
+0x00B8 &#x00B8; # CEDILLA
+0x00B9 &#x00B9; # SUPERSCRIPT ONE
+0x00BA &#x00BA; # MASCULINE ORDINAL INDICATOR
+0x00BB &#x00BB; # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+0x00BC &#x00BC; # VULGAR FRACTION ONE QUARTER
+0x00BD &#x00BD; # VULGAR FRACTION ONE HALF
+0x00BE &#x00BE; # VULGAR FRACTION THREE QUARTERS
+0x00BF &#x00BF; # INVERTED QUESTION MARK
+0x00C0 &#x00C0; # LATIN CAPITAL LETTER A WITH GRAVE
+0x00C1 &#x00C1; # LATIN CAPITAL LETTER A WITH ACUTE
+0x00C2 &#x00C2; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX
+0x00C3 &#x00C3; # LATIN CAPITAL LETTER A WITH TILDE
+0x00C4 &#x00C4; # LATIN CAPITAL LETTER A WITH DIAERESIS
+0x00C5 &#x00C5; # LATIN CAPITAL LETTER A WITH RING ABOVE
+0x00C6 &#x00C6; # LATIN CAPITAL LETTER AE
+0x00C7 &#x00C7; # LATIN CAPITAL LETTER C WITH CEDILLA
+0x00C8 &#x00C8; # LATIN CAPITAL LETTER E WITH GRAVE
+0x00C9 &#x00C9; # LATIN CAPITAL LETTER E WITH ACUTE
+0x00CA &#x00CA; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX
+0x00CB &#x00CB; # LATIN CAPITAL LETTER E WITH DIAERESIS
+0x00CC &#x00CC; # LATIN CAPITAL LETTER I WITH GRAVE
+0x00CD &#x00CD; # LATIN CAPITAL LETTER I WITH ACUTE
+0x00CE &#x00CE; # LATIN CAPITAL LETTER I WITH CIRCUMFLEX
+0x00CF &#x00CF; # LATIN CAPITAL LETTER I WITH DIAERESIS
+0x00D0 &#x00D0; # LATIN CAPITAL LETTER ETH
+0x00D1 &#x00D1; # LATIN CAPITAL LETTER N WITH TILDE
+0x00D2 &#x00D2; # LATIN CAPITAL LETTER O WITH GRAVE
+0x00D3 &#x00D3; # LATIN CAPITAL LETTER O WITH ACUTE
+0x00D4 &#x00D4; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX
+0x00D5 &#x00D5; # LATIN CAPITAL LETTER O WITH TILDE
+0x00D6 &#x00D6; # LATIN CAPITAL LETTER O WITH DIAERESIS
+0x00D7 &#x00D7; # MULTIPLICATION SIGN
+0x00D8 &#x00D8; # LATIN CAPITAL LETTER O WITH STROKE
+0x00D9 &#x00D9; # LATIN CAPITAL LETTER U WITH GRAVE
+0x00DA &#x00DA; # LATIN CAPITAL LETTER U WITH ACUTE
+0x00DB &#x00DB; # LATIN CAPITAL LETTER U WITH CIRCUMFLEX
+0x00DC &#x00DC; # LATIN CAPITAL LETTER U WITH DIAERESIS
+0x00DD &#x00DD; # LATIN CAPITAL LETTER Y WITH ACUTE
+0x00DE &#x00DE; # LATIN CAPITAL LETTER THORN
+0x00DF &#x00DF; # LATIN SMALL LETTER SHARP S
+0x00E0 &#x00E0; # LATIN SMALL LETTER A WITH GRAVE
+0x00E1 &#x00E1; # LATIN SMALL LETTER A WITH ACUTE
+0x00E2 &#x00E2; # LATIN SMALL LETTER A WITH CIRCUMFLEX
+0x00E3 &#x00E3; # LATIN SMALL LETTER A WITH TILDE
+0x00E4 &#x00E4; # LATIN SMALL LETTER A WITH DIAERESIS
+0x00E5 &#x00E5; # LATIN SMALL LETTER A WITH RING ABOVE
+0x00E6 &#x00E6; # LATIN SMALL LETTER AE
+0x00E7 &#x00E7; # LATIN SMALL LETTER C WITH CEDILLA
+0x00E8 &#x00E8; # LATIN SMALL LETTER E WITH GRAVE
+0x00E9 &#x00E9; # LATIN SMALL LETTER E WITH ACUTE
+0x00EA &#x00EA; # LATIN SMALL LETTER E WITH CIRCUMFLEX
+0x00EB &#x00EB; # LATIN SMALL LETTER E WITH DIAERESIS
+0x00EC &#x00EC; # LATIN SMALL LETTER I WITH GRAVE
+0x00ED &#x00ED; # LATIN SMALL LETTER I WITH ACUTE
+0x00EE &#x00EE; # LATIN SMALL LETTER I WITH CIRCUMFLEX
+0x00EF &#x00EF; # LATIN SMALL LETTER I WITH DIAERESIS
+0x00F0 &#x00F0; # LATIN SMALL LETTER ETH
+0x00F1 &#x00F1; # LATIN SMALL LETTER N WITH TILDE
+0x00F2 &#x00F2; # LATIN SMALL LETTER O WITH GRAVE
+0x00F3 &#x00F3; # LATIN SMALL LETTER O WITH ACUTE
+0x00F4 &#x00F4; # LATIN SMALL LETTER O WITH CIRCUMFLEX
+0x00F5 &#x00F5; # LATIN SMALL LETTER O WITH TILDE
+0x00F6 &#x00F6; # LATIN SMALL LETTER O WITH DIAERESIS
+0x00F7 &#x00F7; # DIVISION SIGN
+0x00F8 &#x00F8; # LATIN SMALL LETTER O WITH STROKE
+0x00F9 &#x00F9; # LATIN SMALL LETTER U WITH GRAVE
+0x00FA &#x00FA; # LATIN SMALL LETTER U WITH ACUTE
+0x00FB &#x00FB; # LATIN SMALL LETTER U WITH CIRCUMFLEX
+0x00FC &#x00FC; # LATIN SMALL LETTER U WITH DIAERESIS
+0x00FD &#x00FD; # LATIN SMALL LETTER Y WITH ACUTE
+0x00FE &#x00FE; # LATIN SMALL LETTER THORN
+0x00FF &#x00FF; # LATIN SMALL LETTER Y WITH DIAERESIS
+0x0100 &#x0100; # LATIN CAPITAL LETTER A WITH MACRON
+0x0101 &#x0101; # LATIN SMALL LETTER A WITH MACRON
+0x0102 &#x0102; # LATIN CAPITAL LETTER A WITH BREVE
+0x0103 &#x0103; # LATIN SMALL LETTER A WITH BREVE
+0x0104 &#x0104; # LATIN CAPITAL LETTER A WITH OGONEK
+0x0105 &#x0105; # LATIN SMALL LETTER A WITH OGONEK
+0x0106 &#x0106; # LATIN CAPITAL LETTER C WITH ACUTE
+0x0107 &#x0107; # LATIN SMALL LETTER C WITH ACUTE
+0x0108 &#x0108; # LATIN CAPITAL LETTER C WITH CIRCUMFLEX
+0x0109 &#x0109; # LATIN SMALL LETTER C WITH CIRCUMFLEX
+0x010A &#x010A; # LATIN CAPITAL LETTER C WITH DOT ABOVE
+0x010B &#x010B; # LATIN SMALL LETTER C WITH DOT ABOVE
+0x010C &#x010C; # LATIN CAPITAL LETTER C WITH CARON
+0x010D &#x010D; # LATIN SMALL LETTER C WITH CARON
+0x010E &#x010E; # LATIN CAPITAL LETTER D WITH CARON
+0x010F &#x010F; # LATIN SMALL LETTER D WITH CARON
+0x0110 &#x0110; # LATIN CAPITAL LETTER D WITH STROKE
+0x0111 &#x0111; # LATIN SMALL LETTER D WITH STROKE
+0x0112 &#x0112; # LATIN CAPITAL LETTER E WITH MACRON
+0x0113 &#x0113; # LATIN SMALL LETTER E WITH MACRON
+0x0116 &#x0116; # LATIN CAPITAL LETTER E WITH DOT ABOVE
+0x0117 &#x0117; # LATIN SMALL LETTER E WITH DOT ABOVE
+0x0118 &#x0118; # LATIN CAPITAL LETTER E WITH OGONEK
+0x0119 &#x0119; # LATIN SMALL LETTER E WITH OGONEK
+0x011A &#x011A; # LATIN CAPITAL LETTER E WITH CARON
+0x011B &#x011B; # LATIN SMALL LETTER E WITH CARON
+0x011C &#x011C; # LATIN CAPITAL LETTER G WITH CIRCUMFLEX
+0x011D &#x011D; # LATIN SMALL LETTER G WITH CIRCUMFLEX
+0x011E &#x011E; # LATIN CAPITAL LETTER G WITH BREVE
+0x011F &#x011F; # LATIN SMALL LETTER G WITH BREVE
+0x0120 &#x0120; # LATIN CAPITAL LETTER G WITH DOT ABOVE
+0x0121 &#x0121; # LATIN SMALL LETTER G WITH DOT ABOVE
+0x0122 &#x0122; # LATIN CAPITAL LETTER G WITH CEDILLA
+0x0123 &#x0123; # LATIN SMALL LETTER G WITH CEDILLA
+0x0124 &#x0124; # LATIN CAPITAL LETTER H WITH CIRCUMFLEX
+0x0125 &#x0125; # LATIN SMALL LETTER H WITH CIRCUMFLEX
+0x0126 &#x0126; # LATIN CAPITAL LETTER H WITH STROKE
+0x0127 &#x0127; # LATIN SMALL LETTER H WITH STROKE
+0x0128 &#x0128; # LATIN CAPITAL LETTER I WITH TILDE
+0x0129 &#x0129; # LATIN SMALL LETTER I WITH TILDE
+0x012A &#x012A; # LATIN CAPITAL LETTER I WITH MACRON
+0x012B &#x012B; # LATIN SMALL LETTER I WITH MACRON
+0x012E &#x012E; # LATIN CAPITAL LETTER I WITH OGONEK
+0x012F &#x012F; # LATIN SMALL LETTER I WITH OGONEK
+0x0130 &#x0130; # LATIN CAPITAL LETTER I WITH DOT ABOVE
+0x0131 &#x0131; # LATIN SMALL LETTER DOTLESS I
+0x0131 &#x0131; # LATIN SMALL LETTER DOTLESS I
+0x0132 &#x0132; # LATIN CAPITAL LIGATURE IJ
+0x0133 &#x0133; # LATIN SMALL LIGATURE IJ
+0x0134 &#x0134; # LATIN CAPITAL LETTER J WITH CIRCUMFLEX
+0x0135 &#x0135; # LATIN SMALL LETTER J WITH CIRCUMFLEX
+0x0136 &#x0136; # LATIN CAPITAL LETTER K WITH CEDILLA
+0x0137 &#x0137; # LATIN SMALL LETTER K WITH CEDILLA
+0x0138 &#x0138; # LATIN SMALL LETTER KRA
+0x0139 &#x0139; # LATIN CAPITAL LETTER L WITH ACUTE
+0x013A &#x013A; # LATIN SMALL LETTER L WITH ACUTE
+0x013B &#x013B; # LATIN CAPITAL LETTER L WITH CEDILLA
+0x013C &#x013C; # LATIN SMALL LETTER L WITH CEDILLA
+0x013D &#x013D; # LATIN CAPITAL LETTER L WITH CARON
+0x013E &#x013E; # LATIN SMALL LETTER L WITH CARON
+0x013F &#x013F; # LATIN CAPITAL LETTER L WITH MIDDLE DOT
+0x0140 &#x0140; # LATIN SMALL LETTER L WITH MIDDLE DOT
+0x0141 &#x0141; # LATIN CAPITAL LETTER L WITH STROKE
+0x0142 &#x0142; # LATIN SMALL LETTER L WITH STROKE
+0x0143 &#x0143; # LATIN CAPITAL LETTER N WITH ACUTE
+0x0144 &#x0144; # LATIN SMALL LETTER N WITH ACUTE
+0x0145 &#x0145; # LATIN CAPITAL LETTER N WITH CEDILLA
+0x0146 &#x0146; # LATIN SMALL LETTER N WITH CEDILLA
+0x0147 &#x0147; # LATIN CAPITAL LETTER N WITH CARON
+0x0148 &#x0148; # LATIN SMALL LETTER N WITH CARON
+0x0149 &#x0149; # LATIN SMALL LETTER N PRECEDED BY APOSTROPHE
+0x014A &#x014A; # LATIN CAPITAL LETTER ENG
+0x014B &#x014B; # LATIN SMALL LETTER ENG
+0x014C &#x014C; # LATIN CAPITAL LETTER O WITH MACRON
+0x014D &#x014D; # LATIN SMALL LETTER O WITH MACRON
+0x0150 &#x0150; # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE
+0x0151 &#x0151; # LATIN SMALL LETTER O WITH DOUBLE ACUTE
+0x0152 &#x0152; # LATIN CAPITAL LIGATURE OE
+0x0153 &#x0153; # LATIN SMALL LIGATURE OE
+0x0154 &#x0154; # LATIN CAPITAL LETTER R WITH ACUTE
+0x0155 &#x0155; # LATIN SMALL LETTER R WITH ACUTE
+0x0156 &#x0156; # LATIN CAPITAL LETTER R WITH CEDILLA
+0x0157 &#x0157; # LATIN SMALL LETTER R WITH CEDILLA
+0x0158 &#x0158; # LATIN CAPITAL LETTER R WITH CARON
+0x0159 &#x0159; # LATIN SMALL LETTER R WITH CARON
+0x015A &#x015A; # LATIN CAPITAL LETTER S WITH ACUTE
+0x015B &#x015B; # LATIN SMALL LETTER S WITH ACUTE
+0x015C &#x015C; # LATIN CAPITAL LETTER S WITH CIRCUMFLEX
+0x015D &#x015D; # LATIN SMALL LETTER S WITH CIRCUMFLEX
+0x015E &#x015E; # LATIN CAPITAL LETTER S WITH CEDILLA
+0x015F &#x015F; # LATIN SMALL LETTER S WITH CEDILLA
+0x0160 &#x0160; # LATIN CAPITAL LETTER S WITH CARON
+0x0161 &#x0161; # LATIN SMALL LETTER S WITH CARON
+0x0162 &#x0162; # LATIN CAPITAL LETTER T WITH CEDILLA
+0x0163 &#x0163; # LATIN SMALL LETTER T WITH CEDILLA
+0x0164 &#x0164; # LATIN CAPITAL LETTER T WITH CARON
+0x0165 &#x0165; # LATIN SMALL LETTER T WITH CARON
+0x0166 &#x0166; # LATIN CAPITAL LETTER T WITH STROKE
+0x0167 &#x0167; # LATIN SMALL LETTER T WITH STROKE
+0x0168 &#x0168; # LATIN CAPITAL LETTER U WITH TILDE
+0x0169 &#x0169; # LATIN SMALL LETTER U WITH TILDE
+0x016A &#x016A; # LATIN CAPITAL LETTER U WITH MACRON
+0x016B &#x016B; # LATIN SMALL LETTER U WITH MACRON
+0x016C &#x016C; # LATIN CAPITAL LETTER U WITH BREVE
+0x016D &#x016D; # LATIN SMALL LETTER U WITH BREVE
+0x016E &#x016E; # LATIN CAPITAL LETTER U WITH RING ABOVE
+0x016F &#x016F; # LATIN SMALL LETTER U WITH RING ABOVE
+0x0170 &#x0170; # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE
+0x0171 &#x0171; # LATIN SMALL LETTER U WITH DOUBLE ACUTE
+0x0172 &#x0172; # LATIN CAPITAL LETTER U WITH OGONEK
+0x0173 &#x0173; # LATIN SMALL LETTER U WITH OGONEK
+0x0174 &#x0174; # LATIN CAPITAL LETTER W WITH CIRCUMFLEX
+0x0175 &#x0175; # LATIN SMALL LETTER W WITH CIRCUMFLEX
+0x0176 &#x0176; # LATIN CAPITAL LETTER Y WITH CIRCUMFLEX
+0x0177 &#x0177; # LATIN SMALL LETTER Y WITH CIRCUMFLEX
+0x0178 &#x0178; # LATIN CAPITAL LETTER Y WITH DIAERESIS
+0x0179 &#x0179; # LATIN CAPITAL LETTER Z WITH ACUTE
+0x017A &#x017A; # LATIN SMALL LETTER Z WITH ACUTE
+0x017B &#x017B; # LATIN CAPITAL LETTER Z WITH DOT ABOVE
+0x017C &#x017C; # LATIN SMALL LETTER Z WITH DOT ABOVE
+0x017D &#x017D; # LATIN CAPITAL LETTER Z WITH CARON
+0x017E &#x017E; # LATIN SMALL LETTER Z WITH CARON
+0x0192 &#x0192; # LATIN SMALL LETTER F WITH HOOK
+0x01F5 &#x01F5; # LATIN SMALL LETTER G WITH ACUTE
+0x02BC &#x02BC; # MODIFIER LETTER APOSTROPHE
+0x02C6 &#x02C6; # MODIFIER LETTER CIRCUMFLEX ACCENT
+0x02C7 &#x02C7; # CARON
+0x02D8 &#x02D8; # BREVE
+0x02D9 &#x02D9; # DOT ABOVE
+0x02DA &#x02DA; # RING ABOVE
+0x02DB &#x02DB; # OGONEK
+0x02DC &#x02DC; # SMALL TILDE
+0x02DD &#x02DD; # DOUBLE ACUTE ACCENT
+0x0386 &#x0386; # GREEK CAPITAL LETTER ALPHA WITH TONOS
+0x0388 &#x0388; # GREEK CAPITAL LETTER EPSILON WITH TONOS
+0x0389 &#x0389; # GREEK CAPITAL LETTER ETA WITH TONOS
+0x038A &#x038A; # GREEK CAPITAL LETTER IOTA WITH TONOS
+0x038C &#x038C; # GREEK CAPITAL LETTER OMICRON WITH TONOS
+0x038E &#x038E; # GREEK CAPITAL LETTER UPSILON WITH TONOS
+0x038F &#x038F; # GREEK CAPITAL LETTER OMEGA WITH TONOS
+0x0390 &#x0390; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
+0x0391 &#x0391; # GREEK CAPITAL LETTER ALPHA
+0x0392 &#x0392; # GREEK CAPITAL LETTER BETA
+0x0393 &#x0393; # GREEK CAPITAL LETTER GAMMA
+0x0394 &#x0394; # GREEK CAPITAL LETTER DELTA
+0x0395 &#x0395; # GREEK CAPITAL LETTER EPSILON
+0x0396 &#x0396; # GREEK CAPITAL LETTER ZETA
+0x0397 &#x0397; # GREEK CAPITAL LETTER ETA
+0x0398 &#x0398; # GREEK CAPITAL LETTER THETA
+0x0399 &#x0399; # GREEK CAPITAL LETTER IOTA
+0x039A &#x039A; # GREEK CAPITAL LETTER KAPPA
+0x039B &#x039B; # GREEK CAPITAL LETTER LAMDA
+0x039C &#x039C; # GREEK CAPITAL LETTER MU
+0x039D &#x039D; # GREEK CAPITAL LETTER NU
+0x039E &#x039E; # GREEK CAPITAL LETTER XI
+0x039F &#x039F; # GREEK CAPITAL LETTER OMICRON
+0x03A0 &#x03A0; # GREEK CAPITAL LETTER PI
+0x03A1 &#x03A1; # GREEK CAPITAL LETTER RHO
+0x03A3 &#x03A3; # GREEK CAPITAL LETTER SIGMA
+0x03A4 &#x03A4; # GREEK CAPITAL LETTER TAU
+0x03A5 &#x03A5; # GREEK CAPITAL LETTER UPSILON
+0x03A6 &#x03A6; # GREEK CAPITAL LETTER PHI
+0x03A7 &#x03A7; # GREEK CAPITAL LETTER CHI
+0x03A8 &#x03A8; # GREEK CAPITAL LETTER PSI
+0x03A9 &#x03A9; # GREEK CAPITAL LETTER OMEGA
+0x03AA &#x03AA; # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA
+0x03AB &#x03AB; # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA
+0x03AC &#x03AC; # GREEK SMALL LETTER ALPHA WITH TONOS
+0x03AD &#x03AD; # GREEK SMALL LETTER EPSILON WITH TONOS
+0x03AE &#x03AE; # GREEK SMALL LETTER ETA WITH TONOS
+0x03AF &#x03AF; # GREEK SMALL LETTER IOTA WITH TONOS
+0x03B0 &#x03B0; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
+0x03B1 &#x03B1; # GREEK SMALL LETTER ALPHA
+0x03B2 &#x03B2; # GREEK SMALL LETTER BETA
+0x03B3 &#x03B3; # GREEK SMALL LETTER GAMMA
+0x03B4 &#x03B4; # GREEK SMALL LETTER DELTA
+0x03B5 &#x03B5; # GREEK SMALL LETTER EPSILON
+0x03B6 &#x03B6; # GREEK SMALL LETTER ZETA
+0x03B7 &#x03B7; # GREEK SMALL LETTER ETA
+0x03B8 &#x03B8; # GREEK SMALL LETTER THETA
+0x03B9 &#x03B9; # GREEK SMALL LETTER IOTA
+0x03BA &#x03BA; # GREEK SMALL LETTER KAPPA
+0x03BB &#x03BB; # GREEK SMALL LETTER LAMDA
+0x03BC &#x03BC; # GREEK SMALL LETTER MU
+0x03BD &#x03BD; # GREEK SMALL LETTER NU
+0x03BE &#x03BE; # GREEK SMALL LETTER XI
+0x03BF &#x03BF; # GREEK SMALL LETTER OMICRON
+0x03C0 &#x03C0; # GREEK SMALL LETTER PI
+0x03C1 &#x03C1; # GREEK SMALL LETTER RHO
+0x03C2 &#x03C2; # GREEK SMALL LETTER FINAL SIGMA
+0x03C3 &#x03C3; # GREEK SMALL LETTER SIGMA
+0x03C4 &#x03C4; # GREEK SMALL LETTER TAU
+0x03C5 &#x03C5; # GREEK SMALL LETTER UPSILON
+0x03C6 &#x03C6; # GREEK SMALL LETTER PHI
+0x03C7 &#x03C7; # GREEK SMALL LETTER CHI
+0x03C8 &#x03C8; # GREEK SMALL LETTER PSI
+0x03C9 &#x03C9; # GREEK SMALL LETTER OMEGA
+0x03CA &#x03CA; # GREEK SMALL LETTER IOTA WITH DIALYTIKA
+0x03CB &#x03CB; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA
+0x03CC &#x03CC; # GREEK SMALL LETTER OMICRON WITH TONOS
+0x03CE &#x03CE; # GREEK SMALL LETTER OMEGA WITH TONOS
+0x03D1 &#x03D1; # GREEK THETA SYMBOL
+0x03D2 &#x03D2; # GREEK UPSILON WITH HOOK SYMBOL
+0x03D5 &#x03D5; # GREEK PHI SYMBOL
+0x03D6 &#x03D6; # GREEK PI SYMBOL
+0x03DC &#x03DC; # GREEK LETTER DIGAMMA
+0x03F0 &#x03F0; # GREEK KAPPA SYMBOL
+0x03F1 &#x03F1; # GREEK RHO SYMBOL
+0x0401 &#x0401; # CYRILLIC CAPITAL LETTER IO
+0x0402 &#x0402; # CYRILLIC CAPITAL LETTER DJE
+0x0403 &#x0403; # CYRILLIC CAPITAL LETTER GJE
+0x0404 &#x0404; # CYRILLIC CAPITAL LETTER UKRAINIAN IE
+0x0405 &#x0405; # CYRILLIC CAPITAL LETTER DZE
+0x0406 &#x0406; # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I
+0x0407 &#x0407; # CYRILLIC CAPITAL LETTER YI
+0x0408 &#x0408; # CYRILLIC CAPITAL LETTER JE
+0x0409 &#x0409; # CYRILLIC CAPITAL LETTER LJE
+0x040A &#x040A; # CYRILLIC CAPITAL LETTER NJE
+0x040B &#x040B; # CYRILLIC CAPITAL LETTER TSHE
+0x040C &#x040C; # CYRILLIC CAPITAL LETTER KJE
+0x040E &#x040E; # CYRILLIC CAPITAL LETTER SHORT U
+0x040F &#x040F; # CYRILLIC CAPITAL LETTER DZHE
+0x0410 &#x0410; # CYRILLIC CAPITAL LETTER A
+0x0411 &#x0411; # CYRILLIC CAPITAL LETTER BE
+0x0412 &#x0412; # CYRILLIC CAPITAL LETTER VE
+0x0413 &#x0413; # CYRILLIC CAPITAL LETTER GHE
+0x0414 &#x0414; # CYRILLIC CAPITAL LETTER DE
+0x0415 &#x0415; # CYRILLIC CAPITAL LETTER IE
+0x0416 &#x0416; # CYRILLIC CAPITAL LETTER ZHE
+0x0417 &#x0417; # CYRILLIC CAPITAL LETTER ZE
+0x0418 &#x0418; # CYRILLIC CAPITAL LETTER I
+0x0419 &#x0419; # CYRILLIC CAPITAL LETTER SHORT I
+0x041A &#x041A; # CYRILLIC CAPITAL LETTER KA
+0x041B &#x041B; # CYRILLIC CAPITAL LETTER EL
+0x041C &#x041C; # CYRILLIC CAPITAL LETTER EM
+0x041D &#x041D; # CYRILLIC CAPITAL LETTER EN
+0x041E &#x041E; # CYRILLIC CAPITAL LETTER O
+0x041F &#x041F; # CYRILLIC CAPITAL LETTER PE
+0x0420 &#x0420; # CYRILLIC CAPITAL LETTER ER
+0x0421 &#x0421; # CYRILLIC CAPITAL LETTER ES
+0x0422 &#x0422; # CYRILLIC CAPITAL LETTER TE
+0x0423 &#x0423; # CYRILLIC CAPITAL LETTER U
+0x0424 &#x0424; # CYRILLIC CAPITAL LETTER EF
+0x0425 &#x0425; # CYRILLIC CAPITAL LETTER HA
+0x0426 &#x0426; # CYRILLIC CAPITAL LETTER TSE
+0x0427 &#x0427; # CYRILLIC CAPITAL LETTER CHE
+0x0428 &#x0428; # CYRILLIC CAPITAL LETTER SHA
+0x0429 &#x0429; # CYRILLIC CAPITAL LETTER SHCHA
+0x042A &#x042A; # CYRILLIC CAPITAL LETTER HARD SIGN
+0x042B &#x042B; # CYRILLIC CAPITAL LETTER YERU
+0x042C &#x042C; # CYRILLIC CAPITAL LETTER SOFT SIGN
+0x042D &#x042D; # CYRILLIC CAPITAL LETTER E
+0x042E &#x042E; # CYRILLIC CAPITAL LETTER YU
+0x042F &#x042F; # CYRILLIC CAPITAL LETTER YA
+0x0430 &#x0430; # CYRILLIC SMALL LETTER A
+0x0431 &#x0431; # CYRILLIC SMALL LETTER BE
+0x0432 &#x0432; # CYRILLIC SMALL LETTER VE
+0x0433 &#x0433; # CYRILLIC SMALL LETTER GHE
+0x0434 &#x0434; # CYRILLIC SMALL LETTER DE
+0x0435 &#x0435; # CYRILLIC SMALL LETTER IE
+0x0436 &#x0436; # CYRILLIC SMALL LETTER ZHE
+0x0437 &#x0437; # CYRILLIC SMALL LETTER ZE
+0x0438 &#x0438; # CYRILLIC SMALL LETTER I
+0x0439 &#x0439; # CYRILLIC SMALL LETTER SHORT I
+0x043A &#x043A; # CYRILLIC SMALL LETTER KA
+0x043B &#x043B; # CYRILLIC SMALL LETTER EL
+0x043C &#x043C; # CYRILLIC SMALL LETTER EM
+0x043D &#x043D; # CYRILLIC SMALL LETTER EN
+0x043E &#x043E; # CYRILLIC SMALL LETTER O
+0x043F &#x043F; # CYRILLIC SMALL LETTER PE
+0x0440 &#x0440; # CYRILLIC SMALL LETTER ER
+0x0441 &#x0441; # CYRILLIC SMALL LETTER ES
+0x0442 &#x0442; # CYRILLIC SMALL LETTER TE
+0x0443 &#x0443; # CYRILLIC SMALL LETTER U
+0x0444 &#x0444; # CYRILLIC SMALL LETTER EF
+0x0445 &#x0445; # CYRILLIC SMALL LETTER HA
+0x0446 &#x0446; # CYRILLIC SMALL LETTER TSE
+0x0447 &#x0447; # CYRILLIC SMALL LETTER CHE
+0x0448 &#x0448; # CYRILLIC SMALL LETTER SHA
+0x0449 &#x0449; # CYRILLIC SMALL LETTER SHCHA
+0x044A &#x044A; # CYRILLIC SMALL LETTER HARD SIGN
+0x044B &#x044B; # CYRILLIC SMALL LETTER YERU
+0x044C &#x044C; # CYRILLIC SMALL LETTER SOFT SIGN
+0x044D &#x044D; # CYRILLIC SMALL LETTER E
+0x044E &#x044E; # CYRILLIC SMALL LETTER YU
+0x044F &#x044F; # CYRILLIC SMALL LETTER YA
+0x0451 &#x0451; # CYRILLIC SMALL LETTER IO
+0x0452 &#x0452; # CYRILLIC SMALL LETTER DJE
+0x0453 &#x0453; # CYRILLIC SMALL LETTER GJE
+0x0454 &#x0454; # CYRILLIC SMALL LETTER UKRAINIAN IE
+0x0455 &#x0455; # CYRILLIC SMALL LETTER DZE
+0x0456 &#x0456; # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
+0x0457 &#x0457; # CYRILLIC SMALL LETTER YI
+0x0458 &#x0458; # CYRILLIC SMALL LETTER JE
+0x0459 &#x0459; # CYRILLIC SMALL LETTER LJE
+0x045A &#x045A; # CYRILLIC SMALL LETTER NJE
+0x045B &#x045B; # CYRILLIC SMALL LETTER TSHE
+0x045C &#x045C; # CYRILLIC SMALL LETTER KJE
+0x045E &#x045E; # CYRILLIC SMALL LETTER SHORT U
+0x045F &#x045F; # CYRILLIC SMALL LETTER DZHE
+0x2002 &#x2002; # EN SPACE
+0x2003 &#x2003; # EM SPACE
+0x2004 &#x2004; # THREE-PER-EM SPACE
+0x2005 &#x2005; # FOUR-PER-EM SPACE
+0x2007 &#x2007; # FIGURE SPACE
+0x2008 &#x2008; # PUNCTUATION SPACE
+0x2009 &#x2009; # THIN SPACE
+0x200A &#x200A; # HAIR SPACE
+0x200C &#x200C; # ZERO WIDTH NON-JOINER
+0x200D &#x200D; # ZERO WIDTH JOINER
+0x200E &#x200E; # LEFT-TO-RIGHT MARK
+0x200F &#x200F; # RIGHT-TO-LEFT MARK
+0x2010 &#x2010; # HYPHEN
+0x2013 &#x2013; # EN DASH
+0x2014 &#x2014; # EM DASH
+0x2015 &#x2015; # HORIZONTAL BAR
+0x2016 &#x2016; # DOUBLE VERTICAL LINE
+0x2018 &#x2018; # LEFT SINGLE QUOTATION MARK
+0x2018 &#x2018; # LEFT SINGLE QUOTATION MARK
+0x2019 &#x2019; # RIGHT SINGLE QUOTATION MARK
+0x201A &#x201A; # SINGLE LOW-9 QUOTATION MARK
+0x201A &#x201A; # SINGLE LOW-9 QUOTATION MARK
+0x201C &#x201C; # LEFT DOUBLE QUOTATION MARK
+0x201C &#x201C; # LEFT DOUBLE QUOTATION MARK
+0x201D &#x201D; # RIGHT DOUBLE QUOTATION MARK
+0x201E &#x201E; # DOUBLE LOW-9 QUOTATION MARK
+0x201E &#x201E; # DOUBLE LOW-9 QUOTATION MARK
+0x2020 &#x2020; # DAGGER
+0x2021 &#x2021; # DOUBLE DAGGER
+0x2022 &#x2022; # BULLET
+0x2025 &#x2025; # TWO DOT LEADER
+0x2026 &#x2026; # HORIZONTAL ELLIPSIS
+0x2026 &#x2026; # HORIZONTAL ELLIPSIS
+0x2030 &#x2030; # PER MILLE SIGN
+0x2032 &#x2032; # PRIME
+0x2032 &#x2032; # PRIME
+0x2033 &#x2033; # DOUBLE PRIME
+0x2034 &#x2034; # TRIPLE PRIME
+0x2035 &#x2035; # REVERSED PRIME
+0x2039 &#x2039; # SINGLE LEFT-POINTING ANGLE QUOTATION MARK
+0x203A &#x203A; # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
+0x203E &#x203E; # OVERLINE
+0x2041 &#x2041; # CARET INSERTION POINT
+0x2043 &#x2043; # HYPHEN BULLET
+0x2044 &#x2044; # FRACTION SLASH
+0x20AC &#x20AC; # EURO SIGN
+0x20DB &#x20DB; # COMBINING THREE DOTS ABOVE
+0x20DC &#x20DC; # COMBINING FOUR DOTS ABOVE
+0x2105 &#x2105; # CARE OF
+0x210B &#x210B; # SCRIPT CAPITAL H
+0x210F &#x210F; # PLANCK CONSTANT OVER TWO PI
+0x2111 &#x2111; # BLACK-LETTER CAPITAL I
+0x2112 &#x2112; # SCRIPT CAPITAL L
+0x2113 &#x2113; # SCRIPT SMALL L
+0x2116 &#x2116; # NUMERO SIGN
+0x2117 &#x2117; # SOUND RECORDING COPYRIGHT
+0x2118 &#x2118; # SCRIPT CAPITAL P
+0x211C &#x211C; # BLACK-LETTER CAPITAL R
+0x211E &#x211E; # PRESCRIPTION TAKE
+0x2122 &#x2122; # TRADE MARK SIGN
+0x2126 &#x2126; # OHM SIGN
+0x212B &#x212B; # ANGSTROM SIGN
+0x212C &#x212C; # SCRIPT CAPITAL B
+0x2133 &#x2133; # SCRIPT CAPITAL M
+0x2134 &#x2134; # SCRIPT SMALL O
+0x2135 &#x2135; # ALEF SYMBOL
+0x2135 &#x2135; # ALEF SYMBOL
+0x2136 &#x2136; # BET SYMBOL
+0x2137 &#x2137; # GIMEL SYMBOL
+0x2138 &#x2138; # DALET SYMBOL
+0x2153 &#x2153; # VULGAR FRACTION ONE THIRD
+0x2154 &#x2154; # VULGAR FRACTION TWO THIRDS
+0x2155 &#x2155; # VULGAR FRACTION ONE FIFTH
+0x2156 &#x2156; # VULGAR FRACTION TWO FIFTHS
+0x2157 &#x2157; # VULGAR FRACTION THREE FIFTHS
+0x2158 &#x2158; # VULGAR FRACTION FOUR FIFTHS
+0x2159 &#x2159; # VULGAR FRACTION ONE SIXTH
+0x215A &#x215A; # VULGAR FRACTION FIVE SIXTHS
+0x215B &#x215B; # VULGAR FRACTION ONE EIGHTH
+0x215C &#x215C; # VULGAR FRACTION THREE EIGHTHS
+0x215D &#x215D; # VULGAR FRACTION FIVE EIGHTHS
+0x215E &#x215E; # VULGAR FRACTION SEVEN EIGHTHS
+0x2190 &#x2190; # LEFTWARDS ARROW
+0x2191 &#x2191; # UPWARDS ARROW
+0x2192 &#x2192; # RIGHTWARDS ARROW
+0x2193 &#x2193; # DOWNWARDS ARROW
+0x2194 &#x2194; # LEFT RIGHT ARROW
+0x2195 &#x2195; # UP DOWN ARROW
+0x2196 &#x2196; # NORTH WEST ARROW
+0x2197 &#x2197; # NORTH EAST ARROW
+0x2198 &#x2198; # SOUTH EAST ARROW
+0x2199 &#x2199; # SOUTH WEST ARROW
+0x219A &#x219A; # LEFTWARDS ARROW WITH STROKE
+0x219B &#x219B; # RIGHTWARDS ARROW WITH STROKE
+0x219D &#x219D; # RIGHTWARDS WAVE ARROW
+0x219E &#x219E; # LEFTWARDS TWO HEADED ARROW
+0x21A0 &#x21A0; # RIGHTWARDS TWO HEADED ARROW
+0x21A2 &#x21A2; # LEFTWARDS ARROW WITH TAIL
+0x21A3 &#x21A3; # RIGHTWARDS ARROW WITH TAIL
+0x21A6 &#x21A6; # RIGHTWARDS ARROW FROM BAR
+0x21A9 &#x21A9; # LEFTWARDS ARROW WITH HOOK
+0x21AA &#x21AA; # RIGHTWARDS ARROW WITH HOOK
+0x21AB &#x21AB; # LEFTWARDS ARROW WITH LOOP
+0x21AC &#x21AC; # RIGHTWARDS ARROW WITH LOOP
+0x21AD &#x21AD; # LEFT RIGHT WAVE ARROW
+0x21AE &#x21AE; # LEFT RIGHT ARROW WITH STROKE
+0x21B0 &#x21B0; # UPWARDS ARROW WITH TIP LEFTWARDS
+0x21B1 &#x21B1; # UPWARDS ARROW WITH TIP RIGHTWARDS
+0x21B5 &#x21B5; # DOWNWARDS ARROW WITH CORNER LEFTWARDS
+0x21B6 &#x21B6; # ANTICLOCKWISE TOP SEMICIRCLE ARROW
+0x21B7 &#x21B7; # CLOCKWISE TOP SEMICIRCLE ARROW
+0x21BA &#x21BA; # ANTICLOCKWISE OPEN CIRCLE ARROW
+0x21BB &#x21BB; # CLOCKWISE OPEN CIRCLE ARROW
+0x21BC &#x21BC; # LEFTWARDS HARPOON WITH BARB UPWARDS
+0x21BD &#x21BD; # LEFTWARDS HARPOON WITH BARB DOWNWARDS
+0x21BE &#x21BE; # UPWARDS HARPOON WITH BARB RIGHTWARDS
+0x21BF &#x21BF; # UPWARDS HARPOON WITH BARB LEFTWARDS
+0x21C0 &#x21C0; # RIGHTWARDS HARPOON WITH BARB UPWARDS
+0x21C1 &#x21C1; # RIGHTWARDS HARPOON WITH BARB DOWNWARDS
+0x21C2 &#x21C2; # DOWNWARDS HARPOON WITH BARB RIGHTWARDS
+0x21C3 &#x21C3; # DOWNWARDS HARPOON WITH BARB LEFTWARDS
+0x21C4 &#x21C4; # RIGHTWARDS ARROW OVER LEFTWARDS ARROW
+0x21C6 &#x21C6; # LEFTWARDS ARROW OVER RIGHTWARDS ARROW
+0x21C7 &#x21C7; # LEFTWARDS PAIRED ARROWS
+0x21C8 &#x21C8; # UPWARDS PAIRED ARROWS
+0x21C9 &#x21C9; # RIGHTWARDS PAIRED ARROWS
+0x21CA &#x21CA; # DOWNWARDS PAIRED ARROWS
+0x21CB &#x21CB; # LEFTWARDS HARPOON OVER RIGHTWARDS HARPOON
+0x21CC &#x21CC; # RIGHTWARDS HARPOON OVER LEFTWARDS HARPOON
+0x21CD &#x21CD; # LEFTWARDS DOUBLE ARROW WITH STROKE
+0x21CE &#x21CE; # LEFT RIGHT DOUBLE ARROW WITH STROKE
+0x21CF &#x21CF; # RIGHTWARDS DOUBLE ARROW WITH STROKE
+0x21D0 &#x21D0; # LEFTWARDS DOUBLE ARROW
+0x21D1 &#x21D1; # UPWARDS DOUBLE ARROW
+0x21D2 &#x21D2; # RIGHTWARDS DOUBLE ARROW
+0x21D3 &#x21D3; # DOWNWARDS DOUBLE ARROW
+0x21D4 &#x21D4; # LEFT RIGHT DOUBLE ARROW
+0x21D5 &#x21D5; # UP DOWN DOUBLE ARROW
+0x21DA &#x21DA; # LEFTWARDS TRIPLE ARROW
+0x21DB &#x21DB; # RIGHTWARDS TRIPLE ARROW
+0x2200 &#x2200; # FOR ALL
+0x2201 &#x2201; # COMPLEMENT
+0x2202 &#x2202; # PARTIAL DIFFERENTIAL
+0x2203 &#x2203; # THERE EXISTS
+0x2204 &#x2204; # THERE DOES NOT EXIST
+0x2205 &#x2205; # EMPTY SET
+0x2207 &#x2207; # NABLA
+0x2208 &#x2208; # ELEMENT OF
+0x2209 &#x2209; # NOT AN ELEMENT OF
+0x220A &#x220A; # SMALL ELEMENT OF
+0x220B &#x220B; # CONTAINS AS MEMBER
+0x220D &#x220D; # SMALL CONTAINS AS MEMBER
+0x220F &#x220F; # N-ARY PRODUCT
+0x2210 &#x2210; # N-ARY COPRODUCT
+0x2211 &#x2211; # N-ARY SUMMATION
+0x2212 &#x2212; # MINUS SIGN
+0x2213 &#x2213; # MINUS-OR-PLUS SIGN
+0x2214 &#x2214; # DOT PLUS
+0x2216 &#x2216; # SET MINUS
+0x2217 &#x2217; # ASTERISK OPERATOR
+0x2218 &#x2218; # RING OPERATOR
+0x221A &#x221A; # SQUARE ROOT
+0x221D &#x221D; # PROPORTIONAL TO
+0x221E &#x221E; # INFINITY
+0x221F &#x221F; # RIGHT ANGLE
+0x2220 &#x2220; # ANGLE
+0x2221 &#x2221; # MEASURED ANGLE
+0x2222 &#x2222; # SPHERICAL ANGLE
+0x2223 &#x2223; # DIVIDES
+0x2224 &#x2224; # DOES NOT DIVIDE
+0x2225 &#x2225; # PARALLEL TO
+0x2226 &#x2226; # NOT PARALLEL TO
+0x2227 &#x2227; # LOGICAL AND
+0x2228 &#x2228; # LOGICAL OR
+0x2229 &#x2229; # INTERSECTION
+0x222A &#x222A; # UNION
+0x222B &#x222B; # INTEGRAL
+0x222E &#x222E; # CONTOUR INTEGRAL
+0x2234 &#x2234; # THEREFORE
+0x2235 &#x2235; # BECAUSE
+0x223C &#x223C; # TILDE OPERATOR
+0x223D &#x223D; # REVERSED TILDE
+0x2240 &#x2240; # WREATH PRODUCT
+0x2241 &#x2241; # NOT TILDE
+0x2243 &#x2243; # ASYMPTOTICALLY EQUAL TO
+0x2244 &#x2244; # NOT ASYMPTOTICALLY EQUAL TO
+0x2245 &#x2245; # APPROXIMATELY EQUAL TO
+0x2247 &#x2247; # NEITHER APPROXIMATELY NOR ACTUALLY EQUAL TO
+0x2248 &#x2248; # ALMOST EQUAL TO
+0x2249 &#x2249; # NOT ALMOST EQUAL TO
+0x224A &#x224A; # ALMOST EQUAL OR EQUAL TO
+0x224C &#x224C; # ALL EQUAL TO
+0x224E &#x224E; # GEOMETRICALLY EQUIVALENT TO
+0x224F &#x224F; # DIFFERENCE BETWEEN
+0x2250 &#x2250; # APPROACHES THE LIMIT
+0x2251 &#x2251; # GEOMETRICALLY EQUAL TO
+0x2252 &#x2252; # APPROXIMATELY EQUAL TO OR THE IMAGE OF
+0x2253 &#x2253; # IMAGE OF OR APPROXIMATELY EQUAL TO
+0x2254 &#x2254; # COLON EQUALS
+0x2255 &#x2255; # EQUALS COLON
+0x2256 &#x2256; # RING IN EQUAL TO
+0x2257 &#x2257; # RING EQUAL TO
+0x2259 &#x2259; # ESTIMATES
+0x225C &#x225C; # DELTA EQUAL TO
+0x2260 &#x2260; # NOT EQUAL TO
+0x2261 &#x2261; # IDENTICAL TO
+0x2262 &#x2262; # NOT IDENTICAL TO
+0x2264 &#x2264; # LESS-THAN OR EQUAL TO
+0x2265 &#x2265; # GREATER-THAN OR EQUAL TO
+0x2266 &#x2266; # LESS-THAN OVER EQUAL TO
+0x2267 &#x2267; # GREATER-THAN OVER EQUAL TO
+0x2268 &#x2268; # LESS-THAN BUT NOT EQUAL TO
+0x2269 &#x2269; # GREATER-THAN BUT NOT EQUAL TO
+0x226A &#x226A; # MUCH LESS-THAN
+0x226B &#x226B; # MUCH GREATER-THAN
+0x226C &#x226C; # BETWEEN
+0x226E &#x226E; # NOT LESS-THAN
+0x226F &#x226F; # NOT GREATER-THAN
+0x2270 &#x2270; # NEITHER LESS-THAN NOR EQUAL TO
+0x2271 &#x2271; # NEITHER GREATER-THAN NOR EQUAL TO
+0x2272 &#x2272; # LESS-THAN OR EQUIVALENT TO
+0x2273 &#x2273; # GREATER-THAN OR EQUIVALENT TO
+0x2276 &#x2276; # LESS-THAN OR GREATER-THAN
+0x2277 &#x2277; # GREATER-THAN OR LESS-THAN
+0x227A &#x227A; # PRECEDES
+0x227B &#x227B; # SUCCEEDS
+0x227C &#x227C; # PRECEDES OR EQUAL TO
+0x227D &#x227D; # SUCCEEDS OR EQUAL TO
+0x227E &#x227E; # PRECEDES OR EQUIVALENT TO
+0x227F &#x227F; # SUCCEEDS OR EQUIVALENT TO
+0x2280 &#x2280; # DOES NOT PRECEDE
+0x2281 &#x2281; # DOES NOT SUCCEED
+0x2282 &#x2282; # SUBSET OF
+0x2283 &#x2283; # SUPERSET OF
+0x2284 &#x2284; # NOT A SUBSET OF
+0x2285 &#x2285; # NOT A SUPERSET OF
+0x2286 &#x2286; # SUBSET OF OR EQUAL TO
+0x2287 &#x2287; # SUPERSET OF OR EQUAL TO
+0x2288 &#x2288; # NEITHER A SUBSET OF NOR EQUAL TO
+0x2289 &#x2289; # NEITHER A SUPERSET OF NOR EQUAL TO
+0x228A &#x228A; # SUBSET OF WITH NOT EQUAL TO
+0x228B &#x228B; # SUPERSET OF WITH NOT EQUAL TO
+0x228E &#x228E; # MULTISET UNION
+0x228F &#x228F; # SQUARE IMAGE OF
+0x2290 &#x2290; # SQUARE ORIGINAL OF
+0x2291 &#x2291; # SQUARE IMAGE OF OR EQUAL TO
+0x2292 &#x2292; # SQUARE ORIGINAL OF OR EQUAL TO
+0x2293 &#x2293; # SQUARE CAP
+0x2294 &#x2294; # SQUARE CUP
+0x2295 &#x2295; # CIRCLED PLUS
+0x2296 &#x2296; # CIRCLED MINUS
+0x2297 &#x2297; # CIRCLED TIMES
+0x2298 &#x2298; # CIRCLED DIVISION SLASH
+0x2299 &#x2299; # CIRCLED DOT OPERATOR
+0x229A &#x229A; # CIRCLED RING OPERATOR
+0x229B &#x229B; # CIRCLED ASTERISK OPERATOR
+0x229D &#x229D; # CIRCLED DASH
+0x229E &#x229E; # SQUARED PLUS
+0x229F &#x229F; # SQUARED MINUS
+0x22A0 &#x22A0; # SQUARED TIMES
+0x22A1 &#x22A1; # SQUARED DOT OPERATOR
+0x22A2 &#x22A2; # RIGHT TACK
+0x22A3 &#x22A3; # LEFT TACK
+0x22A4 &#x22A4; # DOWN TACK
+0x22A5 &#x22A5; # UP TACK
+0x22A7 &#x22A7; # MODELS
+0x22A8 &#x22A8; # TRUE
+0x22A9 &#x22A9; # FORCES
+0x22AA &#x22AA; # TRIPLE VERTICAL BAR RIGHT TURNSTILE
+0x22AC &#x22AC; # DOES NOT PROVE
+0x22AD &#x22AD; # NOT TRUE
+0x22AE &#x22AE; # DOES NOT FORCE
+0x22AF &#x22AF; # NEGATED DOUBLE VERTICAL BAR DOUBLE RIGHT TURNSTILE
+0x22B2 &#x22B2; # NORMAL SUBGROUP OF
+0x22B3 &#x22B3; # CONTAINS AS NORMAL SUBGROUP
+0x22B4 &#x22B4; # NORMAL SUBGROUP OF OR EQUAL TO
+0x22B5 &#x22B5; # CONTAINS AS NORMAL SUBGROUP OR EQUAL TO
+0x22B8 &#x22B8; # MULTIMAP
+0x22BA &#x22BA; # INTERCALATE
+0x22BB &#x22BB; # XOR
+0x22BC &#x22BC; # NAND
+0x22C4 &#x22C4; # DIAMOND OPERATOR
+0x22C5 &#x22C5; # DOT OPERATOR
+0x22C6 &#x22C6; # STAR OPERATOR
+0x22C7 &#x22C7; # DIVISION TIMES
+0x22C8 &#x22C8; # BOWTIE
+0x22C9 &#x22C9; # LEFT NORMAL FACTOR SEMIDIRECT PRODUCT
+0x22CA &#x22CA; # RIGHT NORMAL FACTOR SEMIDIRECT PRODUCT
+0x22CB &#x22CB; # LEFT SEMIDIRECT PRODUCT
+0x22CC &#x22CC; # RIGHT SEMIDIRECT PRODUCT
+0x22CD &#x22CD; # REVERSED TILDE EQUALS
+0x22CE &#x22CE; # CURLY LOGICAL OR
+0x22CF &#x22CF; # CURLY LOGICAL AND
+0x22D0 &#x22D0; # DOUBLE SUBSET
+0x22D1 &#x22D1; # DOUBLE SUPERSET
+0x22D2 &#x22D2; # DOUBLE INTERSECTION
+0x22D3 &#x22D3; # DOUBLE UNION
+0x22D4 &#x22D4; # PITCHFORK
+0x22D6 &#x22D6; # LESS-THAN WITH DOT
+0x22D7 &#x22D7; # GREATER-THAN WITH DOT
+0x22D8 &#x22D8; # VERY MUCH LESS-THAN
+0x22D9 &#x22D9; # VERY MUCH GREATER-THAN
+0x22DA &#x22DA; # LESS-THAN EQUAL TO OR GREATER-THAN
+0x22DB &#x22DB; # GREATER-THAN EQUAL TO OR LESS-THAN
+0x22DC &#x22DC; # EQUAL TO OR LESS-THAN
+0x22DD &#x22DD; # EQUAL TO OR GREATER-THAN
+0x22DE &#x22DE; # EQUAL TO OR PRECEDES
+0x22DF &#x22DF; # EQUAL TO OR SUCCEEDS
+0x22E0 &#x22E0; # DOES NOT PRECEDE OR EQUAL
+0x22E1 &#x22E1; # DOES NOT SUCCEED OR EQUAL
+0x22E6 &#x22E6; # LESS-THAN BUT NOT EQUIVALENT TO
+0x22E7 &#x22E7; # GREATER-THAN BUT NOT EQUIVALENT TO
+0x22E8 &#x22E8; # PRECEDES BUT NOT EQUIVALENT TO
+0x22E9 &#x22E9; # SUCCEEDS BUT NOT EQUIVALENT TO
+0x22EA &#x22EA; # NOT NORMAL SUBGROUP OF
+0x22EB &#x22EB; # DOES NOT CONTAIN AS NORMAL SUBGROUP
+0x22EC &#x22EC; # NOT NORMAL SUBGROUP OF OR EQUAL TO
+0x22ED &#x22ED; # DOES NOT CONTAIN AS NORMAL SUBGROUP OR EQUAL
+0x22EE &#x22EE; # VERTICAL ELLIPSIS
+0x2306 &#x2306; # PERSPECTIVE
+0x2308 &#x2308; # LEFT CEILING
+0x2309 &#x2309; # RIGHT CEILING
+0x230A &#x230A; # LEFT FLOOR
+0x230B &#x230B; # RIGHT FLOOR
+0x230C &#x230C; # BOTTOM RIGHT CROP
+0x230D &#x230D; # BOTTOM LEFT CROP
+0x230E &#x230E; # TOP RIGHT CROP
+0x230F &#x230F; # TOP LEFT CROP
+0x2315 &#x2315; # TELEPHONE RECORDER
+0x2316 &#x2316; # POSITION INDICATOR
+0x231C &#x231C; # TOP LEFT CORNER
+0x231D &#x231D; # TOP RIGHT CORNER
+0x231E &#x231E; # BOTTOM LEFT CORNER
+0x231F &#x231F; # BOTTOM RIGHT CORNER
+0x2322 &#x2322; # FROWN
+0x2323 &#x2323; # SMILE
+0x2329 &#x2329; # LEFT-POINTING ANGLE BRACKET
+0x232A &#x232A; # RIGHT-POINTING ANGLE BRACKET
+0x2423 &#x2423; # OPEN BOX
+0x24C8 &#x24C8; # CIRCLED LATIN CAPITAL LETTER S
+0x2500 &#x2500; # BOX DRAWINGS LIGHT HORIZONTAL
+0x2502 &#x2502; # BOX DRAWINGS LIGHT VERTICAL
+0x250C &#x250C; # BOX DRAWINGS LIGHT DOWN AND RIGHT
+0x2510 &#x2510; # BOX DRAWINGS LIGHT DOWN AND LEFT
+0x2514 &#x2514; # BOX DRAWINGS LIGHT UP AND RIGHT
+0x2518 &#x2518; # BOX DRAWINGS LIGHT UP AND LEFT
+0x251C &#x251C; # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
+0x2524 &#x2524; # BOX DRAWINGS LIGHT VERTICAL AND LEFT
+0x252C &#x252C; # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
+0x2534 &#x2534; # BOX DRAWINGS LIGHT UP AND HORIZONTAL
+0x253C &#x253C; # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
+0x2550 &#x2550; # BOX DRAWINGS DOUBLE HORIZONTAL
+0x2551 &#x2551; # BOX DRAWINGS DOUBLE VERTICAL
+0x2552 &#x2552; # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
+0x2553 &#x2553; # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
+0x2554 &#x2554; # BOX DRAWINGS DOUBLE DOWN AND RIGHT
+0x2555 &#x2555; # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
+0x2556 &#x2556; # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
+0x2557 &#x2557; # BOX DRAWINGS DOUBLE DOWN AND LEFT
+0x2558 &#x2558; # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
+0x2559 &#x2559; # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
+0x255A &#x255A; # BOX DRAWINGS DOUBLE UP AND RIGHT
+0x255B &#x255B; # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
+0x255C &#x255C; # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
+0x255D &#x255D; # BOX DRAWINGS DOUBLE UP AND LEFT
+0x255E &#x255E; # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
+0x255F &#x255F; # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
+0x2560 &#x2560; # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
+0x2561 &#x2561; # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
+0x2562 &#x2562; # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
+0x2563 &#x2563; # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
+0x2564 &#x2564; # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
+0x2565 &#x2565; # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
+0x2566 &#x2566; # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
+0x2567 &#x2567; # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
+0x2568 &#x2568; # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
+0x2569 &#x2569; # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
+0x256A &#x256A; # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
+0x256B &#x256B; # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
+0x256C &#x256C; # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
+0x2580 &#x2580; # UPPER HALF BLOCK
+0x2584 &#x2584; # LOWER HALF BLOCK
+0x2588 &#x2588; # FULL BLOCK
+0x2591 &#x2591; # LIGHT SHADE
+0x2592 &#x2592; # MEDIUM SHADE
+0x2593 &#x2593; # DARK SHADE
+0x25A1 &#x25A1; # WHITE SQUARE
+0x25AA &#x25AA; # BLACK SMALL SQUARE
+0x25AD &#x25AD; # WHITE RECTANGLE
+0x25AE &#x25AE; # BLACK VERTICAL RECTANGLE
+0x25B3 &#x25B3; # WHITE UP-POINTING TRIANGLE
+0x25B4 &#x25B4; # BLACK UP-POINTING SMALL TRIANGLE
+0x25B5 &#x25B5; # WHITE UP-POINTING SMALL TRIANGLE
+0x25B8 &#x25B8; # BLACK RIGHT-POINTING SMALL TRIANGLE
+0x25B9 &#x25B9; # WHITE RIGHT-POINTING SMALL TRIANGLE
+0x25BD &#x25BD; # WHITE DOWN-POINTING TRIANGLE
+0x25BE &#x25BE; # BLACK DOWN-POINTING SMALL TRIANGLE
+0x25BF &#x25BF; # WHITE DOWN-POINTING SMALL TRIANGLE
+0x25C2 &#x25C2; # BLACK LEFT-POINTING SMALL TRIANGLE
+0x25C3 &#x25C3; # WHITE LEFT-POINTING SMALL TRIANGLE
+0x25CA &#x25CA; # LOZENGE
+0x25CB &#x25CB; # WHITE CIRCLE
+0x2605 &#x2605; # BLACK STAR
+0x2606 &#x2606; # WHITE STAR
+0x260E &#x260E; # BLACK TELEPHONE
+0x2640 &#x2640; # FEMALE SIGN
+0x2642 &#x2642; # MALE SIGN
+0x2660 &#x2660; # BLACK SPADE SUIT
+0x2663 &#x2663; # BLACK CLUB SUIT
+0x2665 &#x2665; # BLACK HEART SUIT
+0x2666 &#x2666; # BLACK DIAMOND SUIT
+0x266A &#x266A; # EIGHTH NOTE
+0x266D &#x266D; # MUSIC FLAT SIGN
+0x266E &#x266E; # MUSIC NATURAL SIGN
+0x266F &#x266F; # MUSIC SHARP SIGN
+0x2713 &#x2713; # CHECK MARK
+0x2717 &#x2717; # BALLOT X
+0x2720 &#x2720; # MALTESE CROSS
+0x2726 &#x2726; # BLACK FOUR POINTED STAR
+0x2727 &#x2727; # WHITE FOUR POINTED STAR
+0x2736 &#x2736; # SIX POINTED BLACK STAR
+0xFB00 &#xFB00; # LATIN SMALL LIGATURE FF
+0xFB01 &#xFB01; # LATIN SMALL LIGATURE FI
+0xFB02 &#xFB02; # LATIN SMALL LIGATURE FL
+0xFB03 &#xFB03; # LATIN SMALL LIGATURE FFI
+0xFB04 &#xFB04; # LATIN SMALL LIGATURE FFL
+
+
+</PRE>
+</BODY>
+</HTML>
diff --git a/test/utf-8-demo.html b/test/utf-8-demo.html
new file mode 100644
index 0000000..fe47451
--- /dev/null
+++ b/test/utf-8-demo.html
@@ -0,0 +1,217 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
+<HTML>
+<HEAD>
+<TITLE>Markus Kuhn's UTF-8 demo</TITLE>
+<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=iso-8859-1">
+<LINK REV="made" HREF="mailto:dickey@invisible-island.net">
+ <meta name="viewport" content="width=device-width, initial-scale=1">
+</HEAD>
+
+<BODY>
+<pre>
+UTF-8 encoded sample plain-text file
+&#x203e;&#x203e;&#x203e;&#x203e;&#x203e;&#x203e;&#x203e;&#x203e;&#x203e;&#x203e;&#x203e;&#x203e;&#x203e;&#x203e;&#x203e;&#x203e;&#x203e;&#x203e;&#x203e;&#x203e;&#x203e;&#x203e;&#x203e;&#x203e;&#x203e;&#x203e;&#x203e;&#x203e;&#x203e;&#x203e;&#x203e;&#x203e;&#x203e;&#x203e;&#x203e;&#x203e;
+
+Markus Kuhn [&#x2c8;ma&#x2b3;k&#x28a;s ku&#x2d0;n] &lt;mkuhn@acm.org&gt; &#x2014; 1999-08-20
+
+
+The ASCII compatible UTF-8 encoding of ISO 10646 and Unicode
+plain-text files is defined in RFC 2279 and in ISO 10646-1 Annex R.
+
+
+Using Unicode/UTF-8, you can write in emails and source code things such as
+
+Mathematics and Sciences:
+
+ &#x222e; E&#x22c5;da = Q, n &#x2192; &#x221e;, &#x2211; f(i) = &#x220f; g(i), &#x2200;x&#x2208;&#x211d;: &#x2308;x&#x2309; = &#x2212;&#x230a;&#x2212;x&#x230b;, &#x3b1; &#x2227; &#xac;&#x3b2; = &#xac;(&#xac;&#x3b1; &#x2228; &#x3b2;),
+
+ &#x2115; &#x2286; &#x2115;&#x2080; &#x2282; &#x2124; &#x2282; &#x211a; &#x2282; &#x211d; &#x2282; &#x2102;, &#x22a5; &lt; a &#x2260; b &#x2261; c &#x2264; d &#x226a; &#x22a4; &#x21d2; (A &#x21d4; B),
+
+ 2H&#x2082; + O&#x2082; &#x21cc; 2H&#x2082;O, R = 4.7 k&#x3a9;, &#x2300; 200 mm
+
+Linguistics and dictionaries:
+
+ &#xf0;i &#x131;nt&#x259;&#x2c8;n&#xe6;&#x283;&#x259;n&#x259;l f&#x259;&#x2c8;n&#x25b;t&#x131;k &#x259;so&#x28a;si&#x2c8;e&#x131;&#x283;n
+ Y [&#x2c8;&#x28f;psil&#x254;n], Yen [j&#x25b;n], Yoga [&#x2c8;jo&#x2d0;g&#x251;]
+
+APL:
+
+ ((V&#x2373;V)=&#x2373;&#x2374;V)/V&#x2190;,V &#x2337;&#x2190;&#x2373;&#x2192;&#x2374;&#x2206;&#x2207;&#x2283;&#x203e;&#x234e;&#x2355;&#x2308;
+
+Nicer typography in plain text files:
+
+ &#x2554;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2557;
+ &#x2551; &#x2551;
+ &#x2551; &#x2022; &#x2018;single&#x2019; and &#x201c;double&#x201d; quotes &#x2551;
+ &#x2551; &#x2551;
+ &#x2551; &#x2022; Curly apostrophes: &#x201c;We&#x2019;ve been here&#x201d; &#x2551;
+ &#x2551; &#x2551;
+ &#x2551; &#x2022; Latin-1 apostrophe and accents: '&#xb4;` &#x2551;
+ &#x2551; &#x2551;
+ &#x2551; &#x2022; &#x201a;deutsche&#x2018; &#x201e;Anf&#xfc;hrungszeichen&#x201c; &#x2551;
+ &#x2551; &#x2551;
+ &#x2551; &#x2022; &#x2020;, &#x2021;, &#x2030;, &#x2022;, 3&#x2013;4, &#x2014;, &#x2212;5/+5, &#x2122;, &#x2026; &#x2551;
+ &#x2551; &#x2551;
+ &#x2551; &#x2022; ASCII safety test: 1lI|, 0OD, 8B &#x2551;
+ &#x2551; &#x256d;&#x2500;&#x2500;&#x2500;&#x2500;&#x2500;&#x2500;&#x2500;&#x2500;&#x2500;&#x256e; &#x2551;
+ &#x2551; &#x2022; the euro symbol: &#x2502; &#x20ac; 14.95 &#x2502; &#x2551;
+ &#x2551; &#x2570;&#x2500;&#x2500;&#x2500;&#x2500;&#x2500;&#x2500;&#x2500;&#x2500;&#x2500;&#x256f; &#x2551;
+ &#x255a;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x2550;&#x255d;
+
+Greek (in Polytonic):
+
+ The Greek anthem:
+
+ &#x3a3;&#x1f72; &#x3b3;&#x3bd;&#x3c9;&#x3c1;&#x1f77;&#x3b6;&#x3c9; &#x1f00;&#x3c0;&#x1f78; &#x3c4;&#x1f74;&#x3bd; &#x3ba;&#x1f79;&#x3c8;&#x3b7;
+ &#x3c4;&#x3bf;&#x1fe6; &#x3c3;&#x3c0;&#x3b1;&#x3b8;&#x3b9;&#x3bf;&#x1fe6; &#x3c4;&#x1f74;&#x3bd; &#x3c4;&#x3c1;&#x3bf;&#x3bc;&#x3b5;&#x3c1;&#x1f75;,
+ &#x3c3;&#x1f72; &#x3b3;&#x3bd;&#x3c9;&#x3c1;&#x1f77;&#x3b6;&#x3c9; &#x1f00;&#x3c0;&#x1f78; &#x3c4;&#x1f74;&#x3bd; &#x1f44;&#x3c8;&#x3b7;
+ &#x3c0;&#x3bf;&#x1f7a; &#x3bc;&#x1f72; &#x3b2;&#x1f77;&#x3b1; &#x3bc;&#x3b5;&#x3c4;&#x3c1;&#x1f71;&#x3b5;&#x3b9; &#x3c4;&#x1f74; &#x3b3;&#x1fc6;.
+
+ &#x1fbf;&#x391;&#x3c0;&#x1fbf; &#x3c4;&#x1f70; &#x3ba;&#x1f79;&#x3ba;&#x3ba;&#x3b1;&#x3bb;&#x3b1; &#x3b2;&#x3b3;&#x3b1;&#x3bb;&#x3bc;&#x1f73;&#x3bd;&#x3b7;
+ &#x3c4;&#x1ff6;&#x3bd; &#x1ffe;&#x395;&#x3bb;&#x3bb;&#x1f75;&#x3bd;&#x3c9;&#x3bd; &#x3c4;&#x1f70; &#x1f31;&#x3b5;&#x3c1;&#x1f71;
+ &#x3ba;&#x3b1;&#x1f76; &#x3c3;&#x1f70;&#x3bd; &#x3c0;&#x3c1;&#x1ff6;&#x3c4;&#x3b1; &#x1f00;&#x3bd;&#x3b4;&#x3c1;&#x3b5;&#x3b9;&#x3c9;&#x3bc;&#x1f73;&#x3bd;&#x3b7;
+ &#x3c7;&#x3b1;&#x1fd6;&#x3c1;&#x3b5;, &#x1f66; &#x3c7;&#x3b1;&#x1fd6;&#x3c1;&#x3b5;, &#x1fbf;&#x395;&#x3bb;&#x3b5;&#x3c5;&#x3b8;&#x3b5;&#x3c1;&#x3b9;&#x1f71;!
+
+ From a speech of Demosthenes in the 4th century BC:
+
+ &#x39f;&#x1f50;&#x3c7;&#x1f76; &#x3c4;&#x3b1;&#x1f50;&#x3c4;&#x1f70; &#x3c0;&#x3b1;&#x3c1;&#x1f77;&#x3c3;&#x3c4;&#x3b1;&#x3c4;&#x3b1;&#x1f77; &#x3bc;&#x3bf;&#x3b9; &#x3b3;&#x3b9;&#x3b3;&#x3bd;&#x1f7d;&#x3c3;&#x3ba;&#x3b5;&#x3b9;&#x3bd;, &#x1f66; &#x1f04;&#x3bd;&#x3b4;&#x3c1;&#x3b5;&#x3c2; &#x1fbf;&#x391;&#x3b8;&#x3b7;&#x3bd;&#x3b1;&#x1fd6;&#x3bf;&#x3b9;,
+ &#x1f45;&#x3c4;&#x3b1;&#x3bd; &#x3c4;&#x1fbf; &#x3b5;&#x1f30;&#x3c2; &#x3c4;&#x1f70; &#x3c0;&#x3c1;&#x1f71;&#x3b3;&#x3bc;&#x3b1;&#x3c4;&#x3b1; &#x1f00;&#x3c0;&#x3bf;&#x3b2;&#x3bb;&#x1f73;&#x3c8;&#x3c9; &#x3ba;&#x3b1;&#x1f76; &#x1f45;&#x3c4;&#x3b1;&#x3bd; &#x3c0;&#x3c1;&#x1f78;&#x3c2; &#x3c4;&#x3bf;&#x1f7a;&#x3c2;
+ &#x3bb;&#x1f79;&#x3b3;&#x3bf;&#x3c5;&#x3c2; &#x3bf;&#x1f53;&#x3c2; &#x1f00;&#x3ba;&#x3bf;&#x1f7b;&#x3c9;&#x387; &#x3c4;&#x3bf;&#x1f7a;&#x3c2; &#x3bc;&#x1f72;&#x3bd; &#x3b3;&#x1f70;&#x3c1; &#x3bb;&#x1f79;&#x3b3;&#x3bf;&#x3c5;&#x3c2; &#x3c0;&#x3b5;&#x3c1;&#x1f76; &#x3c4;&#x3bf;&#x1fe6;
+ &#x3c4;&#x3b9;&#x3bc;&#x3c9;&#x3c1;&#x1f75;&#x3c3;&#x3b1;&#x3c3;&#x3b8;&#x3b1;&#x3b9; &#x3a6;&#x1f77;&#x3bb;&#x3b9;&#x3c0;&#x3c0;&#x3bf;&#x3bd; &#x1f41;&#x3c1;&#x1ff6; &#x3b3;&#x3b9;&#x3b3;&#x3bd;&#x3bf;&#x3bc;&#x1f73;&#x3bd;&#x3bf;&#x3c5;&#x3c2;, &#x3c4;&#x1f70; &#x3b4;&#x1f72; &#x3c0;&#x3c1;&#x1f71;&#x3b3;&#x3bc;&#x3b1;&#x3c4;&#x1fbf;
+ &#x3b5;&#x1f30;&#x3c2; &#x3c4;&#x3bf;&#x1fe6;&#x3c4;&#x3bf; &#x3c0;&#x3c1;&#x3bf;&#x1f75;&#x3ba;&#x3bf;&#x3bd;&#x3c4;&#x3b1;, &#x1f65;&#x3c3;&#x3b8;&#x1fbf; &#x1f45;&#x3c0;&#x3c9;&#x3c2; &#x3bc;&#x1f74; &#x3c0;&#x3b5;&#x3b9;&#x3c3;&#x1f79;&#x3bc;&#x3b5;&#x3b8;&#x1fbf; &#x3b1;&#x1f50;&#x3c4;&#x3bf;&#x1f76;
+ &#x3c0;&#x3c1;&#x1f79;&#x3c4;&#x3b5;&#x3c1;&#x3bf;&#x3bd; &#x3ba;&#x3b1;&#x3ba;&#x1ff6;&#x3c2; &#x3c3;&#x3ba;&#x1f73;&#x3c8;&#x3b1;&#x3c3;&#x3b8;&#x3b1;&#x3b9; &#x3b4;&#x1f73;&#x3bf;&#x3bd;. &#x3bf;&#x1f50;&#x3b4;&#x1f73;&#x3bd; &#x3bf;&#x1f56;&#x3bd; &#x1f04;&#x3bb;&#x3bb;&#x3bf; &#x3bc;&#x3bf;&#x3b9; &#x3b4;&#x3bf;&#x3ba;&#x3bf;&#x1fe6;&#x3c3;&#x3b9;&#x3bd;
+ &#x3bf;&#x1f31; &#x3c4;&#x1f70; &#x3c4;&#x3bf;&#x3b9;&#x3b1;&#x1fe6;&#x3c4;&#x3b1; &#x3bb;&#x1f73;&#x3b3;&#x3bf;&#x3bd;&#x3c4;&#x3b5;&#x3c2; &#x1f22; &#x3c4;&#x1f74;&#x3bd; &#x1f51;&#x3c0;&#x1f79;&#x3b8;&#x3b5;&#x3c3;&#x3b9;&#x3bd;, &#x3c0;&#x3b5;&#x3c1;&#x1f76; &#x1f27;&#x3c2; &#x3b2;&#x3bf;&#x3c5;&#x3bb;&#x3b5;&#x1f7b;&#x3b5;&#x3c3;&#x3b8;&#x3b1;&#x3b9;,
+ &#x3bf;&#x1f50;&#x3c7;&#x1f76; &#x3c4;&#x1f74;&#x3bd; &#x3bf;&#x1f56;&#x3c3;&#x3b1;&#x3bd; &#x3c0;&#x3b1;&#x3c1;&#x3b9;&#x3c3;&#x3c4;&#x1f71;&#x3bd;&#x3c4;&#x3b5;&#x3c2; &#x1f51;&#x3bc;&#x1fd6;&#x3bd; &#x1f01;&#x3bc;&#x3b1;&#x3c1;&#x3c4;&#x1f71;&#x3bd;&#x3b5;&#x3b9;&#x3bd;. &#x1f10;&#x3b3;&#x1f7c; &#x3b4;&#x1f73;, &#x1f45;&#x3c4;&#x3b9; &#x3bc;&#x1f73;&#x3bd;
+ &#x3c0;&#x3bf;&#x3c4;&#x1fbf; &#x1f10;&#x3be;&#x1fc6;&#x3bd; &#x3c4;&#x1fc7; &#x3c0;&#x1f79;&#x3bb;&#x3b5;&#x3b9; &#x3ba;&#x3b1;&#x1f76; &#x3c4;&#x1f70; &#x3b1;&#x1f51;&#x3c4;&#x1fc6;&#x3c2; &#x1f14;&#x3c7;&#x3b5;&#x3b9;&#x3bd; &#x1f00;&#x3c3;&#x3c6;&#x3b1;&#x3bb;&#x1ff6;&#x3c2; &#x3ba;&#x3b1;&#x1f76; &#x3a6;&#x1f77;&#x3bb;&#x3b9;&#x3c0;&#x3c0;&#x3bf;&#x3bd;
+ &#x3c4;&#x3b9;&#x3bc;&#x3c9;&#x3c1;&#x1f75;&#x3c3;&#x3b1;&#x3c3;&#x3b8;&#x3b1;&#x3b9;, &#x3ba;&#x3b1;&#x1f76; &#x3bc;&#x1f71;&#x3bb;&#x1fbf; &#x1f00;&#x3ba;&#x3c1;&#x3b9;&#x3b2;&#x1ff6;&#x3c2; &#x3bf;&#x1f36;&#x3b4;&#x3b1;&#x387; &#x1f10;&#x3c0;&#x1fbf; &#x1f10;&#x3bc;&#x3bf;&#x1fe6; &#x3b3;&#x1f71;&#x3c1;, &#x3bf;&#x1f50; &#x3c0;&#x1f71;&#x3bb;&#x3b1;&#x3b9;
+ &#x3b3;&#x1f73;&#x3b3;&#x3bf;&#x3bd;&#x3b5;&#x3bd; &#x3c4;&#x3b1;&#x1fe6;&#x3c4;&#x1fbf; &#x1f00;&#x3bc;&#x3c6;&#x1f79;&#x3c4;&#x3b5;&#x3c1;&#x3b1;&#x387; &#x3bd;&#x1fe6;&#x3bd; &#x3bc;&#x1f73;&#x3bd;&#x3c4;&#x3bf;&#x3b9; &#x3c0;&#x1f73;&#x3c0;&#x3b5;&#x3b9;&#x3c3;&#x3bc;&#x3b1;&#x3b9; &#x3c4;&#x3bf;&#x1fe6;&#x3b8;&#x1fbf; &#x1f31;&#x3ba;&#x3b1;&#x3bd;&#x1f78;&#x3bd;
+ &#x3c0;&#x3c1;&#x3bf;&#x3bb;&#x3b1;&#x3b2;&#x3b5;&#x1fd6;&#x3bd; &#x1f21;&#x3bc;&#x1fd6;&#x3bd; &#x3b5;&#x1f36;&#x3bd;&#x3b1;&#x3b9; &#x3c4;&#x1f74;&#x3bd; &#x3c0;&#x3c1;&#x1f7d;&#x3c4;&#x3b7;&#x3bd;, &#x1f45;&#x3c0;&#x3c9;&#x3c2; &#x3c4;&#x3bf;&#x1f7a;&#x3c2; &#x3c3;&#x3c5;&#x3bc;&#x3bc;&#x1f71;&#x3c7;&#x3bf;&#x3c5;&#x3c2;
+ &#x3c3;&#x1f7d;&#x3c3;&#x3bf;&#x3bc;&#x3b5;&#x3bd;. &#x1f10;&#x1f70;&#x3bd; &#x3b3;&#x1f70;&#x3c1; &#x3c4;&#x3bf;&#x1fe6;&#x3c4;&#x3bf; &#x3b2;&#x3b5;&#x3b2;&#x3b1;&#x1f77;&#x3c9;&#x3c2; &#x1f51;&#x3c0;&#x1f71;&#x3c1;&#x3be;&#x1fc3;, &#x3c4;&#x1f79;&#x3c4;&#x3b5; &#x3ba;&#x3b1;&#x1f76; &#x3c0;&#x3b5;&#x3c1;&#x1f76; &#x3c4;&#x3bf;&#x1fe6;
+ &#x3c4;&#x1f77;&#x3bd;&#x3b1; &#x3c4;&#x3b9;&#x3bc;&#x3c9;&#x3c1;&#x1f75;&#x3c3;&#x3b5;&#x3c4;&#x3b1;&#x1f77; &#x3c4;&#x3b9;&#x3c2; &#x3ba;&#x3b1;&#x1f76; &#x1f43;&#x3bd; &#x3c4;&#x3c1;&#x1f79;&#x3c0;&#x3bf;&#x3bd; &#x1f10;&#x3be;&#x1f73;&#x3c3;&#x3c4;&#x3b1;&#x3b9; &#x3c3;&#x3ba;&#x3bf;&#x3c0;&#x3b5;&#x1fd6;&#x3bd;&#x387; &#x3c0;&#x3c1;&#x1f76;&#x3bd; &#x3b4;&#x1f72;
+ &#x3c4;&#x1f74;&#x3bd; &#x1f00;&#x3c1;&#x3c7;&#x1f74;&#x3bd; &#x1f40;&#x3c1;&#x3b8;&#x1ff6;&#x3c2; &#x1f51;&#x3c0;&#x3bf;&#x3b8;&#x1f73;&#x3c3;&#x3b8;&#x3b1;&#x3b9;, &#x3bc;&#x1f71;&#x3c4;&#x3b1;&#x3b9;&#x3bf;&#x3bd; &#x1f21;&#x3b3;&#x3bf;&#x1fe6;&#x3bc;&#x3b1;&#x3b9; &#x3c0;&#x3b5;&#x3c1;&#x1f76; &#x3c4;&#x1fc6;&#x3c2;
+ &#x3c4;&#x3b5;&#x3bb;&#x3b5;&#x3c5;&#x3c4;&#x1fc6;&#x3c2; &#x1f41;&#x3bd;&#x3c4;&#x3b9;&#x3bd;&#x3bf;&#x1fe6;&#x3bd; &#x3c0;&#x3bf;&#x3b9;&#x3b5;&#x1fd6;&#x3c3;&#x3b8;&#x3b1;&#x3b9; &#x3bb;&#x1f79;&#x3b3;&#x3bf;&#x3bd;.
+
+ &#x394;&#x3b7;&#x3bc;&#x3bf;&#x3c3;&#x3b8;&#x1f73;&#x3bd;&#x3bf;&#x3c5;&#x3c2;, &#x393;&#x1ffd; &#x1fbf;&#x39f;&#x3bb;&#x3c5;&#x3bd;&#x3b8;&#x3b9;&#x3b1;&#x3ba;&#x1f78;&#x3c2;
+
+Georgian:
+
+ From a Unicode conference invitation:
+
+ &#x10d2;&#x10d7;&#x10ee;&#x10dd;&#x10d5;&#x10d7; &#x10d0;&#x10ee;&#x10da;&#x10d0;&#x10d5;&#x10d4; &#x10d2;&#x10d0;&#x10d8;&#x10d0;&#x10e0;&#x10dd;&#x10d7; &#x10e0;&#x10d4;&#x10d2;&#x10d8;&#x10e1;&#x10e2;&#x10e0;&#x10d0;&#x10ea;&#x10d8;&#x10d0; Unicode-&#x10d8;&#x10e1; &#x10db;&#x10d4;&#x10d0;&#x10d7;&#x10d4; &#x10e1;&#x10d0;&#x10d4;&#x10e0;&#x10d7;&#x10d0;&#x10e8;&#x10dd;&#x10e0;&#x10d8;&#x10e1;&#x10dd;
+ &#x10d9;&#x10dd;&#x10dc;&#x10e4;&#x10d4;&#x10e0;&#x10d4;&#x10dc;&#x10ea;&#x10d8;&#x10d0;&#x10d6;&#x10d4; &#x10d3;&#x10d0;&#x10e1;&#x10d0;&#x10e1;&#x10ec;&#x10e0;&#x10d4;&#x10d1;&#x10d0;&#x10d3;, &#x10e0;&#x10dd;&#x10db;&#x10d4;&#x10da;&#x10d8;&#x10ea; &#x10d2;&#x10d0;&#x10d8;&#x10db;&#x10d0;&#x10e0;&#x10d7;&#x10d4;&#x10d1;&#x10d0; 10-12 &#x10db;&#x10d0;&#x10e0;&#x10e2;&#x10e1;,
+ &#x10e5;. &#x10db;&#x10d0;&#x10d8;&#x10dc;&#x10ea;&#x10e8;&#x10d8;, &#x10d2;&#x10d4;&#x10e0;&#x10db;&#x10d0;&#x10dc;&#x10d8;&#x10d0;&#x10e8;&#x10d8;. &#x10d9;&#x10dd;&#x10dc;&#x10e4;&#x10d4;&#x10e0;&#x10d4;&#x10dc;&#x10ea;&#x10d8;&#x10d0; &#x10e8;&#x10d4;&#x10f0;&#x10d9;&#x10e0;&#x10d4;&#x10d1;&#x10e1; &#x10d4;&#x10e0;&#x10d7;&#x10d0;&#x10d3; &#x10db;&#x10e1;&#x10dd;&#x10e4;&#x10da;&#x10d8;&#x10dd;&#x10e1;
+ &#x10d4;&#x10e5;&#x10e1;&#x10de;&#x10d4;&#x10e0;&#x10e2;&#x10d4;&#x10d1;&#x10e1; &#x10d8;&#x10e1;&#x10d4;&#x10d7; &#x10d3;&#x10d0;&#x10e0;&#x10d2;&#x10d4;&#x10d1;&#x10e8;&#x10d8; &#x10e0;&#x10dd;&#x10d2;&#x10dd;&#x10e0;&#x10d8;&#x10ea;&#x10d0;&#x10d0; &#x10d8;&#x10dc;&#x10e2;&#x10d4;&#x10e0;&#x10dc;&#x10d4;&#x10e2;&#x10d8; &#x10d3;&#x10d0; Unicode-&#x10d8;,
+ &#x10d8;&#x10dc;&#x10e2;&#x10d4;&#x10e0;&#x10dc;&#x10d0;&#x10ea;&#x10d8;&#x10dd;&#x10dc;&#x10d0;&#x10da;&#x10d8;&#x10d6;&#x10d0;&#x10ea;&#x10d8;&#x10d0; &#x10d3;&#x10d0; &#x10da;&#x10dd;&#x10d9;&#x10d0;&#x10da;&#x10d8;&#x10d6;&#x10d0;&#x10ea;&#x10d8;&#x10d0;, Unicode-&#x10d8;&#x10e1; &#x10d2;&#x10d0;&#x10db;&#x10dd;&#x10e7;&#x10d4;&#x10dc;&#x10d4;&#x10d1;&#x10d0;
+ &#x10dd;&#x10de;&#x10d4;&#x10e0;&#x10d0;&#x10ea;&#x10d8;&#x10e3;&#x10da; &#x10e1;&#x10d8;&#x10e1;&#x10e2;&#x10d4;&#x10db;&#x10d4;&#x10d1;&#x10e1;&#x10d0;, &#x10d3;&#x10d0; &#x10d2;&#x10d0;&#x10db;&#x10dd;&#x10e7;&#x10d4;&#x10dc;&#x10d4;&#x10d1;&#x10d8;&#x10d7; &#x10de;&#x10e0;&#x10dd;&#x10d2;&#x10e0;&#x10d0;&#x10db;&#x10d4;&#x10d1;&#x10e8;&#x10d8;, &#x10e8;&#x10e0;&#x10d8;&#x10e4;&#x10e2;&#x10d4;&#x10d1;&#x10e8;&#x10d8;,
+ &#x10e2;&#x10d4;&#x10e5;&#x10e1;&#x10e2;&#x10d4;&#x10d1;&#x10d8;&#x10e1; &#x10d3;&#x10d0;&#x10db;&#x10e3;&#x10e8;&#x10d0;&#x10d5;&#x10d4;&#x10d1;&#x10d0;&#x10e1;&#x10d0; &#x10d3;&#x10d0; &#x10db;&#x10e0;&#x10d0;&#x10d5;&#x10d0;&#x10da;&#x10d4;&#x10dc;&#x10dd;&#x10d5;&#x10d0;&#x10dc; &#x10d9;&#x10dd;&#x10db;&#x10de;&#x10d8;&#x10e3;&#x10e2;&#x10d4;&#x10e0;&#x10e3;&#x10da; &#x10e1;&#x10d8;&#x10e1;&#x10e2;&#x10d4;&#x10db;&#x10d4;&#x10d1;&#x10e8;&#x10d8;.
+
+Russian:
+
+ From a Unicode conference invitation:
+
+ &#x417;&#x430;&#x440;&#x435;&#x433;&#x438;&#x441;&#x442;&#x440;&#x438;&#x440;&#x443;&#x439;&#x442;&#x435;&#x441;&#x44c; &#x441;&#x435;&#x439;&#x447;&#x430;&#x441; &#x43d;&#x430; &#x414;&#x435;&#x441;&#x44f;&#x442;&#x443;&#x44e; &#x41c;&#x435;&#x436;&#x434;&#x443;&#x43d;&#x430;&#x440;&#x43e;&#x434;&#x43d;&#x443;&#x44e; &#x41a;&#x43e;&#x43d;&#x444;&#x435;&#x440;&#x435;&#x43d;&#x446;&#x438;&#x44e; &#x43f;&#x43e;
+ Unicode, &#x43a;&#x43e;&#x442;&#x43e;&#x440;&#x430;&#x44f; &#x441;&#x43e;&#x441;&#x442;&#x43e;&#x438;&#x442;&#x441;&#x44f; 10-12 &#x43c;&#x430;&#x440;&#x442;&#x430; 1997 &#x433;&#x43e;&#x434;&#x430; &#x432; &#x41c;&#x430;&#x439;&#x43d;&#x446;&#x435; &#x432; &#x413;&#x435;&#x440;&#x43c;&#x430;&#x43d;&#x438;&#x438;.
+ &#x41a;&#x43e;&#x43d;&#x444;&#x435;&#x440;&#x435;&#x43d;&#x446;&#x438;&#x44f; &#x441;&#x43e;&#x431;&#x435;&#x440;&#x435;&#x442; &#x448;&#x438;&#x440;&#x43e;&#x43a;&#x438;&#x439; &#x43a;&#x440;&#x443;&#x433; &#x44d;&#x43a;&#x441;&#x43f;&#x435;&#x440;&#x442;&#x43e;&#x432; &#x43f;&#x43e; &#x432;&#x43e;&#x43f;&#x440;&#x43e;&#x441;&#x430;&#x43c; &#x433;&#x43b;&#x43e;&#x431;&#x430;&#x43b;&#x44c;&#x43d;&#x43e;&#x433;&#x43e;
+ &#x418;&#x43d;&#x442;&#x435;&#x440;&#x43d;&#x435;&#x442;&#x430; &#x438; Unicode, &#x43b;&#x43e;&#x43a;&#x430;&#x43b;&#x438;&#x437;&#x430;&#x446;&#x438;&#x438; &#x438; &#x438;&#x43d;&#x442;&#x435;&#x440;&#x43d;&#x430;&#x446;&#x438;&#x43e;&#x43d;&#x430;&#x43b;&#x438;&#x437;&#x430;&#x446;&#x438;&#x438;, &#x432;&#x43e;&#x43f;&#x43b;&#x43e;&#x449;&#x435;&#x43d;&#x438;&#x44e; &#x438;
+ &#x43f;&#x440;&#x438;&#x43c;&#x435;&#x43d;&#x435;&#x43d;&#x438;&#x44e; Unicode &#x432; &#x440;&#x430;&#x437;&#x43b;&#x438;&#x447;&#x43d;&#x44b;&#x445; &#x43e;&#x43f;&#x435;&#x440;&#x430;&#x446;&#x438;&#x43e;&#x43d;&#x43d;&#x44b;&#x445; &#x441;&#x438;&#x441;&#x442;&#x435;&#x43c;&#x430;&#x445; &#x438; &#x43f;&#x440;&#x43e;&#x433;&#x440;&#x430;&#x43c;&#x43c;&#x43d;&#x44b;&#x445;
+ &#x43f;&#x440;&#x438;&#x43b;&#x43e;&#x436;&#x435;&#x43d;&#x438;&#x44f;&#x445;, &#x448;&#x440;&#x438;&#x444;&#x442;&#x430;&#x445;, &#x432;&#x435;&#x440;&#x441;&#x442;&#x43a;&#x435; &#x438; &#x43c;&#x43d;&#x43e;&#x433;&#x43e;&#x44f;&#x437;&#x44b;&#x447;&#x43d;&#x44b;&#x445; &#x43a;&#x43e;&#x43c;&#x43f;&#x44c;&#x44e;&#x442;&#x435;&#x440;&#x43d;&#x44b;&#x445; &#x441;&#x438;&#x441;&#x442;&#x435;&#x43c;&#x430;&#x445;.
+
+Thai (UCS Level 2):
+
+ Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese
+ classic 'San Gua'):
+
+ [----------------------------|------------------------]
+ &#xe4f; &#xe41;&#xe1c;&#xe48;&#xe19;&#xe14;&#xe34;&#xe19;&#xe2e;&#xe31;&#xe48;&#xe19;&#xe40;&#xe2a;&#xe37;&#xe48;&#xe2d;&#xe21;&#xe42;&#xe17;&#xe23;&#xe21;&#xe41;&#xe2a;&#xe19;&#xe2a;&#xe31;&#xe07;&#xe40;&#xe27;&#xe0a; &#xe1e;&#xe23;&#xe30;&#xe1b;&#xe01;&#xe40;&#xe01;&#xe28;&#xe01;&#xe2d;&#xe07;&#xe1a;&#xe39;&#xe4a;&#xe01;&#xe39;&#xe49;&#xe02;&#xe36;&#xe49;&#xe19;&#xe43;&#xe2b;&#xe21;&#xe48;
+ &#xe2a;&#xe34;&#xe1a;&#xe2a;&#xe2d;&#xe07;&#xe01;&#xe29;&#xe31;&#xe15;&#xe23;&#xe34;&#xe22;&#xe4c;&#xe01;&#xe48;&#xe2d;&#xe19;&#xe2b;&#xe19;&#xe49;&#xe32;&#xe41;&#xe25;&#xe16;&#xe31;&#xe14;&#xe44;&#xe1b; &#xe2a;&#xe2d;&#xe07;&#xe2d;&#xe07;&#xe04;&#xe4c;&#xe44;&#xe0b;&#xe23;&#xe49;&#xe42;&#xe07;&#xe48;&#xe40;&#xe02;&#xe25;&#xe32;&#xe40;&#xe1a;&#xe32;&#xe1b;&#xe31;&#xe0d;&#xe0d;&#xe32;
+ &#xe17;&#xe23;&#xe07;&#xe19;&#xe31;&#xe1a;&#xe16;&#xe37;&#xe2d;&#xe02;&#xe31;&#xe19;&#xe17;&#xe35;&#xe40;&#xe1b;&#xe47;&#xe19;&#xe17;&#xe35;&#xe48;&#xe1e;&#xe36;&#xe48;&#xe07; &#xe1a;&#xe49;&#xe32;&#xe19;&#xe40;&#xe21;&#xe37;&#xe2d;&#xe07;&#xe08;&#xe36;&#xe07;&#xe27;&#xe34;&#xe1b;&#xe23;&#xe34;&#xe15;&#xe40;&#xe1b;&#xe47;&#xe19;&#xe19;&#xe31;&#xe01;&#xe2b;&#xe19;&#xe32;
+ &#xe42;&#xe2e;&#xe08;&#xe34;&#xe4b;&#xe19;&#xe40;&#xe23;&#xe35;&#xe22;&#xe01;&#xe17;&#xe31;&#xe1e;&#xe17;&#xe31;&#xe48;&#xe27;&#xe2b;&#xe31;&#xe27;&#xe40;&#xe21;&#xe37;&#xe2d;&#xe07;&#xe21;&#xe32; &#xe2b;&#xe21;&#xe32;&#xe22;&#xe08;&#xe30;&#xe06;&#xe48;&#xe32;&#xe21;&#xe14;&#xe0a;&#xe31;&#xe48;&#xe27;&#xe15;&#xe31;&#xe27;&#xe2a;&#xe33;&#xe04;&#xe31;&#xe0d;
+ &#xe40;&#xe2b;&#xe21;&#xe37;&#xe2d;&#xe19;&#xe02;&#xe31;&#xe1a;&#xe44;&#xe2a;&#xe44;&#xe25;&#xe48;&#xe40;&#xe2a;&#xe37;&#xe2d;&#xe08;&#xe32;&#xe01;&#xe40;&#xe04;&#xe2b;&#xe32; &#xe23;&#xe31;&#xe1a;&#xe2b;&#xe21;&#xe32;&#xe1b;&#xe48;&#xe32;&#xe40;&#xe02;&#xe49;&#xe32;&#xe21;&#xe32;&#xe40;&#xe25;&#xe22;&#xe2d;&#xe32;&#xe2a;&#xe31;&#xe0d;
+ &#xe1d;&#xe48;&#xe32;&#xe22;&#xe2d;&#xe49;&#xe2d;&#xe07;&#xe2d;&#xe38;&#xe49;&#xe19;&#xe22;&#xe38;&#xe41;&#xe22;&#xe01;&#xe43;&#xe2b;&#xe49;&#xe41;&#xe15;&#xe01;&#xe01;&#xe31;&#xe19; &#xe43;&#xe0a;&#xe49;&#xe2a;&#xe32;&#xe27;&#xe19;&#xe31;&#xe49;&#xe19;&#xe40;&#xe1b;&#xe47;&#xe19;&#xe0a;&#xe19;&#xe27;&#xe19;&#xe0a;&#xe37;&#xe48;&#xe19;&#xe0a;&#xe27;&#xe19;&#xe43;&#xe08;
+ &#xe1e;&#xe25;&#xe31;&#xe19;&#xe25;&#xe34;&#xe09;&#xe38;&#xe22;&#xe01;&#xe38;&#xe22;&#xe01;&#xe35;&#xe01;&#xe25;&#xe31;&#xe1a;&#xe01;&#xe48;&#xe2d;&#xe40;&#xe2b;&#xe15;&#xe38; &#xe0a;&#xe48;&#xe32;&#xe07;&#xe2d;&#xe32;&#xe40;&#xe1e;&#xe28;&#xe08;&#xe23;&#xe34;&#xe07;&#xe2b;&#xe19;&#xe32;&#xe1f;&#xe49;&#xe32;&#xe23;&#xe49;&#xe2d;&#xe07;&#xe44;&#xe2b;&#xe49;
+ &#xe15;&#xe49;&#xe2d;&#xe07;&#xe23;&#xe1a;&#xe23;&#xe32;&#xe06;&#xe48;&#xe32;&#xe1f;&#xe31;&#xe19;&#xe08;&#xe19;&#xe1a;&#xe23;&#xe23;&#xe25;&#xe31;&#xe22; &#xe24;&#xe45;&#xe2b;&#xe32;&#xe43;&#xe04;&#xe23;&#xe04;&#xe49;&#xe33;&#xe0a;&#xe39;&#xe01;&#xe39;&#xe49;&#xe1a;&#xe23;&#xe23;&#xe25;&#xe31;&#xe07;&#xe01;&#xe4c; &#xe2f;
+
+ (The above is a two-column text. If combining characters are handled
+ correctly, the lines of the second column should be aligned with the
+ | character above.)
+
+Ethiopian:
+
+ Proverbs in the Amharic language:
+
+ &#x1230;&#x121b;&#x12ed; &#x12a0;&#x12ed;&#x1273;&#x1228;&#x1235; &#x1295;&#x1309;&#x1225; &#x12a0;&#x12ed;&#x12a8;&#x1230;&#x1235;&#x1362;
+ &#x1265;&#x120b; &#x12ab;&#x1208;&#x129d; &#x12a5;&#x1295;&#x12f0;&#x12a0;&#x1263;&#x1274; &#x1260;&#x1246;&#x1218;&#x1320;&#x129d;&#x1362;
+ &#x130c;&#x1325; &#x12eb;&#x1208;&#x1264;&#x1271; &#x1241;&#x121d;&#x1325;&#x1293; &#x1290;&#x12cd;&#x1362;
+ &#x12f0;&#x1200; &#x1260;&#x1215;&#x120d;&#x1219; &#x1245;&#x1264; &#x1263;&#x12ed;&#x1320;&#x1323; &#x1295;&#x1323;&#x1275; &#x1260;&#x1308;&#x12f0;&#x1208;&#x12cd;&#x1362;
+ &#x12e8;&#x12a0;&#x134d; &#x12c8;&#x1208;&#x121d;&#x1273; &#x1260;&#x1245;&#x1264; &#x12a0;&#x12ed;&#x1273;&#x123d;&#x121d;&#x1362;
+ &#x12a0;&#x12ed;&#x1325; &#x1260;&#x1260;&#x120b; &#x12f3;&#x12cb; &#x1270;&#x1218;&#x1273;&#x1362;
+ &#x1232;&#x1270;&#x1228;&#x1309;&#x1219; &#x12ed;&#x12f0;&#x1228;&#x130d;&#x1219;&#x1362;
+ &#x1240;&#x1235; &#x1260;&#x1240;&#x1235;&#x1365; &#x12d5;&#x1295;&#x1241;&#x120b;&#x120d; &#x1260;&#x12a5;&#x130d;&#x1229; &#x12ed;&#x1204;&#x12f3;&#x120d;&#x1362;
+ &#x12f5;&#x122d; &#x1262;&#x12eb;&#x1265;&#x122d; &#x12a0;&#x1295;&#x1260;&#x1233; &#x12eb;&#x1235;&#x122d;&#x1362;
+ &#x1230;&#x12cd; &#x12a5;&#x1295;&#x12f0;&#x1264;&#x1271; &#x12a5;&#x1295;&#x1305; &#x12a5;&#x1295;&#x12f0; &#x1309;&#x1228;&#x1264;&#x1271; &#x12a0;&#x12ed;&#x1270;&#x12f3;&#x12f0;&#x122d;&#x121d;&#x1362;
+ &#x12a5;&#x130d;&#x12dc;&#x122d; &#x12e8;&#x12a8;&#x1348;&#x1270;&#x12cd;&#x1295; &#x1309;&#x122e;&#x122e; &#x1233;&#x12ed;&#x12d8;&#x130b;&#x12cd; &#x12a0;&#x12ed;&#x12f5;&#x122d;&#x121d;&#x1362;
+ &#x12e8;&#x130e;&#x1228;&#x1264;&#x1275; &#x120c;&#x1263;&#x1365; &#x1262;&#x12eb;&#x12e9;&#x1275; &#x12ed;&#x1235;&#x1245; &#x1263;&#x12eb;&#x12e9;&#x1275; &#x12eb;&#x1320;&#x120d;&#x1245;&#x1362;
+ &#x1225;&#x122b; &#x12a8;&#x1218;&#x134d;&#x1273;&#x1275; &#x120d;&#x1304;&#x1295; &#x120b;&#x134b;&#x1273;&#x1275;&#x1362;
+ &#x12d3;&#x1263;&#x12ed; &#x121b;&#x12f0;&#x122a;&#x12eb; &#x12e8;&#x1208;&#x12cd;&#x1365; &#x130d;&#x1295;&#x12f5; &#x12ed;&#x12de; &#x12ed;&#x12de;&#x122b;&#x120d;&#x1362;
+ &#x12e8;&#x12a5;&#x1235;&#x120b;&#x121d; &#x12a0;&#x1308;&#x1229; &#x1218;&#x12ab; &#x12e8;&#x12a0;&#x121e;&#x122b; &#x12a0;&#x1308;&#x1229; &#x12cb;&#x122d;&#x12ab;&#x1362;
+ &#x1270;&#x1295;&#x130b;&#x120e; &#x1262;&#x1270;&#x1349; &#x1270;&#x1218;&#x120d;&#x1236; &#x1263;&#x1349;&#x1362;
+ &#x12c8;&#x12f3;&#x1305;&#x1205; &#x121b;&#x122d; &#x1262;&#x1206;&#x1295; &#x1328;&#x122d;&#x1235;&#x1205; &#x12a0;&#x1275;&#x120b;&#x1230;&#x12cd;&#x1362;
+ &#x12a5;&#x130d;&#x122d;&#x1205;&#x1295; &#x1260;&#x134d;&#x122b;&#x123d;&#x1205; &#x120d;&#x12ad; &#x12d8;&#x122d;&#x130b;&#x1362;
+
+Runes:
+
+ &#x16bb;&#x16d6; &#x16b3;&#x16b9;&#x16ab;&#x16a6; &#x16a6;&#x16ab;&#x16cf; &#x16bb;&#x16d6; &#x16d2;&#x16a2;&#x16de;&#x16d6; &#x16a9;&#x16be; &#x16a6;&#x16ab;&#x16d7; &#x16da;&#x16aa;&#x16be;&#x16de;&#x16d6; &#x16be;&#x16a9;&#x16b1;&#x16a6;&#x16b9;&#x16d6;&#x16aa;&#x16b1;&#x16de;&#x16a2;&#x16d7; &#x16b9;&#x16c1;&#x16a6; &#x16a6;&#x16aa; &#x16b9;&#x16d6;&#x16e5;&#x16ab;
+
+ (Old English, which transcribed into Latin reads 'He cwaeth that he
+ bude thaem lande northweardum with tha Westsae.' and means 'He said
+ that he lived in the northern land near the Western Sea.')
+
+Braille:
+
+ &#x284c;&#x2801;&#x2827;&#x2811; &#x283c;&#x2801;&#x2812; &#x284d;&#x281c;&#x2807;&#x2811;&#x2839;&#x2830;&#x280e; &#x2863;&#x2815;&#x280c;
+
+ &#x284d;&#x281c;&#x2807;&#x2811;&#x2839; &#x283a;&#x2801;&#x280e; &#x2819;&#x2811;&#x2801;&#x2819;&#x2812; &#x281e;&#x2815; &#x2803;&#x2811;&#x281b;&#x2814; &#x283a;&#x280a;&#x2839;&#x2832; &#x2879;&#x283b;&#x2811; &#x280a;&#x280e; &#x281d;&#x2815; &#x2819;&#x2833;&#x2803;&#x281e;
+ &#x2831;&#x2801;&#x281e;&#x2811;&#x2827;&#x283b; &#x2801;&#x2803;&#x2833;&#x281e; &#x2839;&#x2801;&#x281e;&#x2832; &#x2879;&#x2811; &#x2817;&#x2811;&#x281b;&#x280a;&#x280c;&#x283b; &#x2815;&#x280b; &#x2819;&#x280a;&#x280e; &#x2803;&#x2825;&#x2817;&#x280a;&#x2801;&#x2807; &#x283a;&#x2801;&#x280e;
+ &#x280e;&#x280a;&#x281b;&#x281d;&#x282b; &#x2803;&#x2839; &#x2839;&#x2811; &#x280a;&#x2807;&#x283b;&#x281b;&#x2839;&#x280d;&#x2801;&#x281d;&#x2802; &#x2839;&#x2811; &#x280a;&#x2807;&#x283b;&#x2805;&#x2802; &#x2839;&#x2811; &#x2825;&#x281d;&#x2819;&#x283b;&#x281e;&#x2801;&#x2805;&#x283b;&#x2802;
+ &#x2801;&#x281d;&#x2819; &#x2839;&#x2811; &#x2821;&#x280a;&#x2811;&#x280b; &#x280d;&#x2833;&#x2817;&#x281d;&#x283b;&#x2832; &#x284e;&#x280a;&#x2817;&#x2815;&#x2815;&#x281b;&#x2811; &#x280e;&#x280a;&#x281b;&#x281d;&#x282b; &#x280a;&#x281e;&#x2832; &#x2841;&#x281d;&#x2819;
+ &#x284e;&#x280a;&#x2817;&#x2815;&#x2815;&#x281b;&#x2811;&#x2830;&#x280e; &#x281d;&#x2801;&#x280d;&#x2811; &#x283a;&#x2801;&#x280e; &#x281b;&#x2815;&#x2815;&#x2819; &#x2825;&#x280f;&#x2815;&#x281d; &#x2830;&#x2861;&#x2801;&#x281d;&#x281b;&#x2811;&#x2802; &#x280b;&#x2815;&#x2817; &#x2801;&#x281d;&#x2839;&#x2839;&#x2814;&#x281b; &#x2819;&#x2811;
+ &#x2821;&#x2815;&#x280e;&#x2811; &#x281e;&#x2815; &#x280f;&#x2825;&#x281e; &#x2819;&#x280a;&#x280e; &#x2819;&#x2801;&#x281d;&#x2819; &#x281e;&#x2815;&#x2832;
+
+ &#x2855;&#x2807;&#x2819; &#x284d;&#x281c;&#x2807;&#x2811;&#x2839; &#x283a;&#x2801;&#x280e; &#x2801;&#x280e; &#x2819;&#x2811;&#x2801;&#x2819; &#x2801;&#x280e; &#x2801; &#x2819;&#x2815;&#x2815;&#x2817;&#x2824;&#x281d;&#x2801;&#x280a;&#x2807;&#x2832;
+
+ &#x284d;&#x2814;&#x2819;&#x2816; &#x284a; &#x2819;&#x2815;&#x281d;&#x2830;&#x281e; &#x280d;&#x2811;&#x2801;&#x281d; &#x281e;&#x2815; &#x280e;&#x2801;&#x2839; &#x2839;&#x2801;&#x281e; &#x284a; &#x2805;&#x281d;&#x282a;&#x2802; &#x2815;&#x280b; &#x280d;&#x2839;
+ &#x282a;&#x281d; &#x2805;&#x281d;&#x282a;&#x2807;&#x282b;&#x281b;&#x2811;&#x2802; &#x2831;&#x2801;&#x281e; &#x2839;&#x283b;&#x2811; &#x280a;&#x280e; &#x280f;&#x281c;&#x281e;&#x280a;&#x280a;&#x2825;&#x2807;&#x281c;&#x2807;&#x2839; &#x2819;&#x2811;&#x2801;&#x2819; &#x2801;&#x2803;&#x2833;&#x281e;
+ &#x2801; &#x2819;&#x2815;&#x2815;&#x2817;&#x2824;&#x281d;&#x2801;&#x280a;&#x2807;&#x2832; &#x284a; &#x280d;&#x280a;&#x2823;&#x281e; &#x2819;&#x2801;&#x2827;&#x2811; &#x2803;&#x2811;&#x2832; &#x2814;&#x280a;&#x2807;&#x2814;&#x282b;&#x2802; &#x280d;&#x2839;&#x280e;&#x2811;&#x2807;&#x280b;&#x2802; &#x281e;&#x2815;
+ &#x2817;&#x2811;&#x281b;&#x281c;&#x2819; &#x2801; &#x280a;&#x2815;&#x280b;&#x280b;&#x2814;&#x2824;&#x281d;&#x2801;&#x280a;&#x2807; &#x2801;&#x280e; &#x2839;&#x2811; &#x2819;&#x2811;&#x2801;&#x2819;&#x2811;&#x280c; &#x280f;&#x280a;&#x2811;&#x280a;&#x2811; &#x2815;&#x280b; &#x280a;&#x2817;&#x2815;&#x281d;&#x280d;&#x2815;&#x281d;&#x281b;&#x283b;&#x2839;
+ &#x2814; &#x2839;&#x2811; &#x281e;&#x2817;&#x2801;&#x2819;&#x2811;&#x2832; &#x2843;&#x2825;&#x281e; &#x2839;&#x2811; &#x283a;&#x280a;&#x280e;&#x2819;&#x2815;&#x280d; &#x2815;&#x280b; &#x2833;&#x2817; &#x2801;&#x281d;&#x280a;&#x2811;&#x280c;&#x2815;&#x2817;&#x280e;
+ &#x280a;&#x280e; &#x2814; &#x2839;&#x2811; &#x280e;&#x280a;&#x280d;&#x280a;&#x2807;&#x2811;&#x2806; &#x2801;&#x281d;&#x2819; &#x280d;&#x2839; &#x2825;&#x281d;&#x2819;&#x2801;&#x2807;&#x2807;&#x282a;&#x282b; &#x2819;&#x2801;&#x281d;&#x2819;&#x280e;
+ &#x2829;&#x2801;&#x2807;&#x2807; &#x281d;&#x2815;&#x281e; &#x2819;&#x280a;&#x280c;&#x2825;&#x2817;&#x2803; &#x280a;&#x281e;&#x2802; &#x2815;&#x2817; &#x2839;&#x2811; &#x284a;&#x2833;&#x281d;&#x281e;&#x2817;&#x2839;&#x2830;&#x280e; &#x2819;&#x2815;&#x281d;&#x2811; &#x280b;&#x2815;&#x2817;&#x2832; &#x2879;&#x2833;
+ &#x283a;&#x280a;&#x2807;&#x2807; &#x2839;&#x283b;&#x2811;&#x280b;&#x2815;&#x2817;&#x2811; &#x280f;&#x283b;&#x280d;&#x280a;&#x281e; &#x280d;&#x2811; &#x281e;&#x2815; &#x2817;&#x2811;&#x280f;&#x2811;&#x2801;&#x281e;&#x2802; &#x2811;&#x280d;&#x280f;&#x2819;&#x2801;&#x281e;&#x280a;&#x280a;&#x2801;&#x2807;&#x2807;&#x2839;&#x2802; &#x2839;&#x2801;&#x281e;
+ &#x284d;&#x281c;&#x2807;&#x2811;&#x2839; &#x283a;&#x2801;&#x280e; &#x2801;&#x280e; &#x2819;&#x2811;&#x2801;&#x2819; &#x2801;&#x280e; &#x2801; &#x2819;&#x2815;&#x2815;&#x2817;&#x2824;&#x281d;&#x2801;&#x280a;&#x2807;&#x2832;
+
+ (The first couple of paragraphs of "A Christmas Carol" by Dickens)
+
+Compact font selection example text:
+
+ ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789
+ abcdefghijklmnopqrstuvwxyz &#xa3;&#xa9;&#xb5;&#xc0;&#xc6;&#xd6;&#xde;&#xdf;&#xe9;&#xf6;&#xff;
+ &#x2013;&#x2014;&#x2018;&#x201c;&#x201d;&#x201e;&#x2020;&#x2022;&#x2026;&#x2030;&#x2122;&#x153;&#x160;&#x178;&#x17e;&#x20ac; &#x391;&#x392;&#x393;&#x394;&#x3a9;&#x3b1;&#x3b2;&#x3b3;&#x3b4;&#x3c9; &#x410;&#x411;&#x412;&#x413;&#x414;&#x430;&#x431;&#x432;&#x433;&#x434;
+ &#x2200;&#x2202;&#x2208;&#x211d;&#x2227;&#x222a;&#x2261;&#x221e; &#x2191;&#x2197;&#x21a8;&#x21bb;&#x21e3; &#x2510;&#x253c;&#x2554;&#x2558;&#x2591;&#x25ba;&#x263a;&#x2640; &#xfb01;&#xfffd;&#x2440;&#x2082;&#x1f20;&#x1e02;&#x4e5;&#x1e84;&#x250;&#x2d0;&#x234e;&#x5d0;&#x531;&#x10d0;
+
+Greetings in various languages:
+
+ Hello world, &#x39a;&#x3b1;&#x3bb;&#x3b7;&#x3bc;&#x1f73;&#x3c1;&#x3b1; &#x3ba;&#x1f79;&#x3c3;&#x3bc;&#x3b5;, &#x30b3;&#x30f3;&#x30cb;&#x30c1;&#x30cf;
+
+Box drawing alignment tests: &#x2588;
+ &#x2589;
+ &#x2554;&#x2550;&#x2550;&#x2566;&#x2550;&#x2550;&#x2557; &#x250c;&#x2500;&#x2500;&#x252c;&#x2500;&#x2500;&#x2510; &#x256d;&#x2500;&#x2500;&#x252c;&#x2500;&#x2500;&#x256e; &#x256d;&#x2500;&#x2500;&#x252c;&#x2500;&#x2500;&#x256e; &#x250f;&#x2501;&#x2501;&#x2533;&#x2501;&#x2501;&#x2513; &#x250e;&#x2512;&#x250f;&#x2511; &#x2577; &#x257b; &#x250f;&#x252f;&#x2513; &#x250c;&#x2530;&#x2510; &#x258a; &#x2571;&#x2572;&#x2571;&#x2572;&#x2573;&#x2573;&#x2573;
+ &#x2551;&#x250c;&#x2500;&#x2568;&#x2500;&#x2510;&#x2551; &#x2502;&#x2554;&#x2550;&#x2567;&#x2550;&#x2557;&#x2502; &#x2502;&#x2552;&#x2550;&#x256a;&#x2550;&#x2555;&#x2502; &#x2502;&#x2553;&#x2500;&#x2541;&#x2500;&#x2556;&#x2502; &#x2503;&#x250c;&#x2500;&#x2542;&#x2500;&#x2510;&#x2503; &#x2517;&#x2543;&#x2544;&#x2519; &#x2576;&#x253c;&#x2574;&#x257a;&#x254b;&#x2578;&#x2520;&#x253c;&#x2528; &#x251d;&#x254b;&#x2525; &#x258b; &#x2572;&#x2571;&#x2572;&#x2571;&#x2573;&#x2573;&#x2573;
+ &#x2551;&#x2502;&#x2572; &#x2571;&#x2502;&#x2551; &#x2502;&#x2551; &#x2551;&#x2502; &#x2502;&#x2502; &#x2502; &#x2502;&#x2502; &#x2502;&#x2551; &#x2503; &#x2551;&#x2502; &#x2503;&#x2502; &#x257f; &#x2502;&#x2503; &#x250d;&#x2545;&#x2546;&#x2513; &#x2575; &#x2579; &#x2517;&#x2537;&#x251b; &#x2514;&#x2538;&#x2518; &#x258c; &#x2571;&#x2572;&#x2571;&#x2572;&#x2573;&#x2573;&#x2573;
+ &#x2560;&#x2561; &#x2573; &#x255e;&#x2563; &#x251c;&#x2562; &#x255f;&#x2524; &#x251c;&#x253c;&#x2500;&#x253c;&#x2500;&#x253c;&#x2524; &#x251c;&#x256b;&#x2500;&#x2542;&#x2500;&#x256b;&#x2524; &#x2523;&#x253f;&#x257e;&#x253c;&#x257c;&#x253f;&#x252b; &#x2515;&#x251b;&#x2516;&#x251a; &#x250c;&#x2504;&#x2504;&#x2510; &#x254e; &#x250f;&#x2505;&#x2505;&#x2513; &#x250b; &#x258d; &#x2572;&#x2571;&#x2572;&#x2571;&#x2573;&#x2573;&#x2573;
+ &#x2551;&#x2502;&#x2571; &#x2572;&#x2502;&#x2551; &#x2502;&#x2551; &#x2551;&#x2502; &#x2502;&#x2502; &#x2502; &#x2502;&#x2502; &#x2502;&#x2551; &#x2503; &#x2551;&#x2502; &#x2503;&#x2502; &#x257d; &#x2502;&#x2503; &#x2591;&#x2591;&#x2592;&#x2592;&#x2593;&#x2593;&#x2588;&#x2588; &#x250a; &#x2506; &#x254e; &#x254f; &#x2507; &#x250b; &#x258e;
+ &#x2551;&#x2514;&#x2500;&#x2565;&#x2500;&#x2518;&#x2551; &#x2502;&#x255a;&#x2550;&#x2564;&#x2550;&#x255d;&#x2502; &#x2502;&#x2558;&#x2550;&#x256a;&#x2550;&#x255b;&#x2502; &#x2502;&#x2559;&#x2500;&#x2540;&#x2500;&#x255c;&#x2502; &#x2503;&#x2514;&#x2500;&#x2542;&#x2500;&#x2518;&#x2503; &#x2591;&#x2591;&#x2592;&#x2592;&#x2593;&#x2593;&#x2588;&#x2588; &#x250a; &#x2506; &#x254e; &#x254f; &#x2507; &#x250b; &#x258f;
+ &#x255a;&#x2550;&#x2550;&#x2569;&#x2550;&#x2550;&#x255d; &#x2514;&#x2500;&#x2500;&#x2534;&#x2500;&#x2500;&#x2518; &#x2570;&#x2500;&#x2500;&#x2534;&#x2500;&#x2500;&#x256f; &#x2570;&#x2500;&#x2500;&#x2534;&#x2500;&#x2500;&#x256f; &#x2517;&#x2501;&#x2501;&#x253b;&#x2501;&#x2501;&#x251b; &#x2514;&#x254c;&#x254c;&#x2518; &#x254e; &#x2517;&#x254d;&#x254d;&#x251b; &#x250b; &#x2581;&#x2582;&#x2583;&#x2584;&#x2585;&#x2586;&#x2587;&#x2588;
+
+</pre>
+</BODY>
+</HTML>