diff options
Diffstat (limited to 'test')
-rw-r--r-- | test/ALT88592.html | 171 | ||||
-rw-r--r-- | test/ISO_LATIN1_test.html | 83 | ||||
-rw-r--r-- | test/README.txt | 8 | ||||
-rw-r--r-- | test/TestComment.html | 50 | ||||
-rw-r--r-- | test/X | 0 | ||||
-rw-r--r-- | test/bad-html.html | 46 | ||||
-rw-r--r-- | test/c1.html | 63 | ||||
-rw-r--r-- | test/circle.html | 14 | ||||
-rw-r--r-- | test/cp-1252.html | 178 | ||||
-rw-r--r-- | test/cp-1252a.html | 183 | ||||
-rw-r--r-- | test/image.jpg | bin | 0 -> 1287 bytes | |||
-rw-r--r-- | test/iso-8859-1.html | 241 | ||||
-rw-r--r-- | test/iso-8859-1a.html | 275 | ||||
-rw-r--r-- | test/iso-8859-2.html | 174 | ||||
-rw-r--r-- | test/iso-8859-2a.html | 208 | ||||
-rw-r--r-- | test/koi8-r.html | 321 | ||||
-rw-r--r-- | test/nobody | 0 | ||||
-rw-r--r-- | test/quickbrown.html | 103 | ||||
-rw-r--r-- | test/raw8bit.html | 38 | ||||
-rw-r--r-- | test/sgml.html | 1081 | ||||
-rw-r--r-- | test/spaces.html | 37 | ||||
-rw-r--r-- | test/special_urls.html | 22 | ||||
-rw-r--r-- | test/square.html | 14 | ||||
-rw-r--r-- | test/tabtest.html | 39 | ||||
-rw-r--r-- | test/tags.html | 219 | ||||
-rw-r--r-- | test/test-styles.html | 106 | ||||
-rw-r--r-- | test/triangle.html | 14 | ||||
-rw-r--r-- | test/unicode.html | 915 | ||||
-rw-r--r-- | test/utf-8-demo.html | 216 |
29 files changed, 4819 insertions, 0 deletions
diff --git a/test/ALT88592.html b/test/ALT88592.html new file mode 100644 index 0000000..419e699 --- /dev/null +++ b/test/ALT88592.html @@ -0,0 +1,171 @@ +<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"> +<HTML> +<HEAD> +<TITLE>Character table modified and enhanced for iso8859-2 - ALT test</TITLE> +<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=iso-8859-2"> +<LINK REV="made" HREF="mailto:lynx-dev@nongnu.org"> +<LINK REV="owner" HREF="http://mail.gnu.org/mailman/listinfo/lynx-dev/"> +<!-- A BASE tag for the SRC attributes of dummy images. + They should be inaccessible so that the ALT text will be shown in graphical browsers. + Use file: to save network resources. --> +<BASE HREF="file://localhost/this.path.intentionally.invalid/"> +</HEAD> + +<BODY> + +<H1 ALIGN=center>iso8859-2 plus table - ALT test</H1> + +<PRE> +Description Code Entity name +=================================== ============ ============== +quotation mark <IMG SRC=X ALT=" &#34; --> " &quot; --> ""> +ampersand <IMG SRC=X ALT=" &#38; --> & &amp; --> &"> +less-than sign <IMG SRC=X ALT=" &#60; --> < &lt; --> <"> +greater-than sign <IMG SRC=X ALT=" &#62; --> > &gt; --> >"> + +Description Char Code Entity name +=================================== ==== ============ ============== +non-breaking space <IMG SRC=X ALT=" &#160; -->   &nbsp; --> "> +capital A, ogonek <IMG SRC=X ALT=" ¡ &#260; --> Ą &Aogon; --> Ą"> +breve <IMG SRC=X ALT=" {¢} {&#728;}-->{˘} {&breve;} -->{˘}"> +capital L, stroke <IMG SRC=X ALT=" £ &#321; --> Ł &Lstrok; --> Ł"> +general currency sign <IMG SRC=X ALT=" ¤ &#164; --> ¤ &curren; --> ¤"> +capital L, caron <IMG SRC=X ALT=" ¥ &#317; --> Ľ &Lcaron; --> Ľ"> +capital S, acute accent <IMG SRC=X ALT=" ¦ &#346; --> Ś &Sacute; --> Ś"> +section sign <IMG SRC=X ALT=" § &#167; --> § &sect; --> §"> +umlaut (dieresis) <IMG SRC=X ALT=" ¨ &#168; --> ¨ &uml; --> ¨"> + <IMG SRC=X ALT=" &die; --> ¨"> +capital S, caron <IMG SRC=X ALT=" © &#352; --> Š &Scaron; --> Š"> +capital S, cedilla <IMG SRC=X ALT=" ª &#350; --> Ş &Scedil; --> Ş"> +capital T, caron <IMG SRC=X ALT=" « &#356; --> Ť &Tcaron; --> Ť"> +capital Z, acute accent <IMG SRC=X ALT=" ¬ &#377; --> Ź &Zacute; --> Ź"> +soft hyphen <IMG SRC=X ALT=" [] [&#173;]-->[­] [&shy;] -->[­]"> +capital Z, caron <IMG SRC=X ALT=" ® &#381; --> Ž &Zcaron; --> Ž"> +capital Z, dot above <IMG SRC=X ALT=" ¯ &#379; --> Ż &Zdot; --> Ż"> +degree sign <IMG SRC=X ALT=" ° &#176; --> ° &deg; --> °"> +small a, ogonek <IMG SRC=X ALT=" ± &#261; --> ą &aogon; --> ą"> +ogonek <IMG SRC=X ALT=" {²} {&#731;}-->{˛} {&ogon;} -->{˛}"> +small l, stroke <IMG SRC=X ALT=" ³ &#322; --> ł &lstrok; --> ł"> +acute accent <IMG SRC=X ALT=" ´ &#180; --> ´ &acute; --> ´"> +small l, caron <IMG SRC=X ALT=" µ &#318; --> ľ &lcaron; --> ľ"> +small s, acute accent <IMG SRC=X ALT=" ¶ &#347; --> ś &sacute; --> ś"> +caron <IMG SRC=X ALT=" {·} {&#711;}-->{ˇ} {&caron;} -->{ˇ}"> +cedilla <IMG SRC=X ALT=" ¸ &#184; --> ¸ &cedil; --> ¸"> +small s, caron <IMG SRC=X ALT=" ¹ &#353; --> š &scaron; --> š"> +small s, cedilla <IMG SRC=X ALT=" º &#351; --> ş &scedil; --> ş"> +small t, caron <IMG SRC=X ALT=" » &#357; --> ť &tcaron; --> ť"> +small z, acute accent <IMG SRC=X ALT=" ¼ &#378; --> ź &zacute; --> ź"> +double acute accent <IMG SRC=X ALT=" {½} {&#733;}-->{˝} {&dblac;} -->{˝}"> +small z, caron <IMG SRC=X ALT=" ¾ &#382; --> ž &zcaron; --> ž"> +small z, dot above <IMG SRC=X ALT=" ¿ &#380; --> ż &zdot; --> ż "> +capital R, acute accent <IMG SRC=X ALT=" À &#340; --> Ŕ &Racute; --> Ŕ"> +capital A, acute accent <IMG SRC=X ALT=" Á &#193; --> Á &Aacute; --> Á"> +capital A, circumflex accent <IMG SRC=X ALT="  &#194; -->  &Acirc; --> Â"> +capital A, breve <IMG SRC=X ALT=" à &#258; --> Ă &Abreve; --> Ă"> +capital A, dieresis or umlaut mark <IMG SRC=X ALT=" Ä &#196; --> Ä &Auml; --> Ä"> +capital L, acute accent <IMG SRC=X ALT=" Å &#313; --> Ĺ &Lacute; --> Ĺ"> +capital C, acute accent <IMG SRC=X ALT=" Æ &#262; --> Ć &Cacute; --> Ć"> +capital C, cedilla <IMG SRC=X ALT=" Ç &#199; --> Ç &Ccedil; --> Ç"> +capital C, caron <IMG SRC=X ALT=" È &#268; --> Č &Ccaron; --> Č"> +capital E, acute accent <IMG SRC=X ALT=" É &#201; --> É &Eacute; --> É"> +capital E, ogonek <IMG SRC=X ALT=" Ê &#280; --> Ę &Eogon; --> Ę"> +capital E, dieresis or umlaut mark <IMG SRC=X ALT=" Ë &#203; --> Ë &Euml; --> Ë"> +capital E, caron <IMG SRC=X ALT=" Ì &#282; --> Ě &Ecaron; --> Ě"> +capital I, acute accent <IMG SRC=X ALT=" Í &#205; --> Í &Iacute; --> Í"> +capital I, circumflex accent <IMG SRC=X ALT=" Î &#206; --> Î &Icirc; --> Î"> +capital D, caron <IMG SRC=X ALT=" Ï &#270; --> Ď &Dcaron; --> Ď"> +capital D, stroke <IMG SRC=X ALT=" Ð &#272; --> Đ &Dstrok; --> Đ"> +capital Eth, Icelandic <IMG SRC=X ALT=" N/A &#208; --> Ð &ETH; --> Ð"> +capital N, acute accent <IMG SRC=X ALT=" Ñ &#323; --> Ń &Nacute; --> Ń"> +capital N, caron <IMG SRC=X ALT=" Ò &#327; --> Ň &Ncaron; --> Ň"> +capital O, acute accent <IMG SRC=X ALT=" Ó &#211; --> Ó &Oacute; --> Ó"> +capital O, circumflex accent <IMG SRC=X ALT=" Ô &#212; --> Ô &Ocirc; --> Ô"> +capital O, double acute accent <IMG SRC=X ALT=" Õ &#368; --> Ű &Odblac; --> Ő"> +capital O, dieresis or umlaut mark <IMG SRC=X ALT=" Ö &#214; --> Ö &Ouml; --> Ö"> +multiply sign <IMG SRC=X ALT=" × &#215; --> × &times; --> ×"> +capital R, caron <IMG SRC=X ALT=" Ø &#344; --> Ř &Rcaron; --> Ř"> +capital U, ring <IMG SRC=X ALT=" Ù &#366; --> Ů &Uring; --> Ů"> +capital U, acute accent <IMG SRC=X ALT=" Ú &#218; --> Ú &Uacute; --> Ú"> +capital U, double acute accent <IMG SRC=X ALT=" Û &#368; --> Ű &Udblac; --> Ű"> +capital U, dieresis or umlaut mark <IMG SRC=X ALT=" Ü &#220; --> Ü &Uuml; --> Ü"> +capital Y, acute accent <IMG SRC=X ALT=" Ý &#221; --> Ý &Yacute; --> Ý"> +capital T, cedilla <IMG SRC=X ALT=" Þ &#354; --> Ţ &Tcedil; --> Ţ"> +small sharp s, German (sz ligature) <IMG SRC=X ALT=" ß &#223; --> ß &szlig; --> ß"> +small r, acute accent <IMG SRC=X ALT=" à &#341; --> ŕ &racute; --> ŕ"> +small a, acute accent <IMG SRC=X ALT=" á &#225; --> á &aacute; --> á"> +small a, circumflex accent <IMG SRC=X ALT=" â &#226; --> â &acirc; --> â"> +small a, breve <IMG SRC=X ALT=" ã &#259; --> ă &abreve; --> ă"> +small a, dieresis or umlaut mark <IMG SRC=X ALT=" ä &#228; --> ä &auml; --> ä"> +small l, acute accent <IMG SRC=X ALT=" å &#314; --> ĺ &lacute; --> ĺ"> +small c, acute accent <IMG SRC=X ALT=" æ &#263; --> ć &cacute; --> ć"> +small c, cedilla <IMG SRC=X ALT=" ç &#231; --> ç &ccedil; --> ç"> +small c, caron <IMG SRC=X ALT=" è &#269; --> č &ccaron; --> č"> +small e, acute accent <IMG SRC=X ALT=" é &#233; --> é &eacute; --> é"> +small e, ogonek <IMG SRC=X ALT=" ê &#281; --> ę &eogon; --> ę"> +small e, dieresis or umlaut mark <IMG SRC=X ALT=" ë &#235; --> ë &euml; --> ë"> +small e, caron <IMG SRC=X ALT=" ì &#283; --> ě &ecaron; --> ě"> +small i, acute accent <IMG SRC=X ALT=" í &#237; --> í &iacute; --> í"> +small i, circumflex accent <IMG SRC=X ALT=" î &#238; --> î &icirc; --> î"> +small d, caron <IMG SRC=X ALT=" ï &#271; --> ď &dcaron; --> ď"> +small d, stroke <IMG SRC=X ALT=" ð &#273; --> đ &dstrok; --> đ"> +small eth, Icelandic <IMG SRC=X ALT=" N/A &#240; --> ð &eth; --> ð"> +small n, acute accent <IMG SRC=X ALT=" ñ &#324; --> ń &nacute; --> ń"> +small n, caron <IMG SRC=X ALT=" ò &#328; --> ň &ncaron; --> ň"> +small o, acute accent <IMG SRC=X ALT=" ó &#243; --> ó &oacute; --> ó"> +small o, circumflex accent <IMG SRC=X ALT=" ô &#244; --> ô &ocirc; --> ô"> +small o, double acute accent <IMG SRC=X ALT=" õ &#369; --> ű &odblac; --> ő"> +small o, dieresis or umlaut mark <IMG SRC=X ALT=" ö &#246; --> ö &ouml; --> ö"> +division sign <IMG SRC=X ALT=" ÷ &#247; --> ÷ &divide; --> ÷"> +small r, caron <IMG SRC=X ALT=" ø &#345; --> ř &rcaron; --> ř"> +small u, ring <IMG SRC=X ALT=" ù &#367; --> ů &uring; --> ů"> +small u, acute accent <IMG SRC=X ALT=" ú &#250; --> ú &uacute; --> ú"> +small u, double acute accent <IMG SRC=X ALT=" û &#369; --> ű &udblac; --> ű"> +small u, dieresis or umlaut mark <IMG SRC=X ALT=" ü &#252; --> ü &uuml; --> ü"> +small y, acute accent <IMG SRC=X ALT=" ý &#253; --> ý &yacute; --> ý"> +small t, cedilla <IMG SRC=X ALT=" þ &#355; --> ţ &tcedil; --> ţ"> +dot above <IMG SRC=X ALT=" {ÿ} {&#729;}-->{˙} {&dot;} -->{˙}"> + +Some other characters of interest Char Code Entity name +=================================== ==== ============ ============== +capital AE diphthong (ligature) <IMG SRC=X ALT=" N/A &#198; --> Æ &AElig; --> Æ"> +small ae diphthong (ligature) <IMG SRC=X ALT=" N/A &#230; --> æ &aelig; --> æ"> +capital OE ligature <IMG SRC=X ALT=" N/A {&#338;}-->{Œ} {&OElig;} -->{Œ}"> +small oe ligature <IMG SRC=X ALT=" N/A {&#339;}-->{œ} {&oelig;} -->{œ}"> +copyright <IMG SRC=X ALT=" N/A &#169; --> © &copy; --> ©"> +registered trademark <IMG SRC=X ALT=" N/A &#174; --> ® &reg; --> ®"> +trademark sign <IMG SRC=X ALT=" N/A &#8482;--> ™ &trade; --> ™"> +em space <IMG SRC=X ALT=" N/A [&#8195;]->[ ] [&emsp;] -->[ ]"> +en space <IMG SRC=X ALT=" N/A [&#8194;]->[ ] [&ensp;] -->[ ]"> +1/3-em space <IMG SRC=X ALT=" N/A [&#8196;]->[ ] [&emsp13;] -->[ ]"> +1/4-em space <IMG SRC=X ALT=" N/A [&#8197;]->[ ] [&emsp14;] -->[ ]"> +thin space <IMG SRC=X ALT=" N/A [&#8201;]->[ ] [&thinsp;]-->[ ]"> +hair space <IMG SRC=X ALT=" N/A [&#8202;]->[ ] [&hairsp;]-->[ ]"> +em dash <IMG SRC=X ALT=" N/A [&#8212;]->[—] [&mdash;] -->[—]"> +en dash <IMG SRC=X ALT=" N/A [&#8211;]->[–] [&ndash;] -->[–]"> + +</PRE><!-- </PRE> no HotJava preBeta hackx - kw --> +<!-- second /PRE is a hack for HotJava 1.0 preBeta 1 --> +<HR> +<P> +Characters not found in ISO-8859-2 have "N/A" in the <TT>Char</TT> column. +Some characters for which I could not find entity names in either +<A HREF="http://www.internic.net/rfc/rfc2070.txt">RFC 2070</A> +or the +<A HREF="ftp://www.ucc.ie/pub/sgml/">ISOlat1, ISOlat2, ISOnum, ISOpub and ISOtech</A> +sets (the ones included by Peter Flynn's +<A HREF="http://www.ucc.ie/doc/www/html/dtds/htmlpro.html">HTML Pro DTD</A>) +are shown enclosed in <TT>{</TT>braces<TT>}</TT>. +</P> +<P> +See Martin Ramsch's original +<A CHARSET="iso-8859-1" HREF="https://web.archive.org/web/19970119160651/http://www.uni-passau.de:80/~ramsch/iso8859-1.html">ISO-8859-1 Table</A> +for related info and links, and for some notes on entity names. +This file is mostly just an adaptation of his table +to the ISO-8859-2 character set. + +<HR> + +<ADDRESS>kweide@tezcat.com 1997-03-09</ADDRESS> + +</BODY> +</HTML> diff --git a/test/ISO_LATIN1_test.html b/test/ISO_LATIN1_test.html new file mode 100644 index 0000000..d767978 --- /dev/null +++ b/test/ISO_LATIN1_test.html @@ -0,0 +1,83 @@ +<!DOCTYPE html public "-//IETF//DTD HTML 3.0//EN"> +<html> +<head> +<title>Test of minimal ISO LATIN1 character set</title> +<link rev="made" href="mailto:lynx-dev@nongnu.org"> +</head> + +<body> +<h1>minimal ISO LATIN1 text entities</h1> +<ul> + <li>"Æ", /* capital AE diphthong (ligature) */ + <li>"Á", /* capital A, acute accent */ + <li>"Â", /* capital A, circumflex accent */ + <li>"À", /* capital A, grave accent */ + <li>"Å", /* capital A, ring */ + <li>"Ã", /* capital A, tilde */ + <li>"Ä", /* capital A, dieresis or umlaut mark */ + <li>"Ç", /* capital C, cedilla */ + <li>"Ð", /* capital Eth, Icelandic */ + <li>"É", /* capital E, acute accent */ + <li>"Ê", /* capital E, circumflex accent */ + <li>"È", /* capital E, grave accent */ + <li>"Ë", /* capital E, dieresis or umlaut mark */ + <li>"Í", /* capital I, acute accent */ + <li>"Î", /* capital I, circumflex accent */ + <li>"Ì", /* capital I, grave accent */ + <li>"Ï", /* capital I, dieresis or umlaut mark */ + <li>"Ñ", /* capital N, tilde */ + <li>"Ó", /* capital O, acute accent */ + <li>"Ô", /* capital O, circumflex accent */ + <li>"Ò", /* capital O, grave accent */ + <li>"Ø", /* capital O, slash */ + <li>"Õ", /* capital O, tilde */ + <li>"Ö", /* capital O, dieresis or umlaut mark */ + <li>"Þ", /* capital THORN, Icelandic */ + <li>"Ú", /* capital U, acute accent */ + <li>"Û", /* capital U, circumflex accent */ + <li>"Ù", /* capital U, grave accent */ + <li>"Ü", /* capital U, dieresis or umlaut mark */ + <li>"Ý", /* capital Y, acute accent */ + <li>"á", /* small a, acute accent */ + <li>"â", /* small a, circumflex accent */ + <li>"æ", /* small ae diphthong (ligature) */ + <li>"à", /* small a, grave accent */ + <li>"&", /* ampersand */ + <li>"å", /* small a, ring */ + <li>"ã", /* small a, tilde */ + <li>"ä", /* small a, dieresis or umlaut mark */ + <li>"ç", /* small c, cedilla */ + <li>"é", /* small e, acute accent */ + <li>"ê", /* small e, circumflex accent */ + <li>"è", /* small e, grave accent */ + <li>" ", /* emsp, em space - not collapsed */ + <li>" ", /* ensp, en space - not collapsed */ + <li>"ð", /* small eth, Icelandic */ + <li>"ë", /* small e, dieresis or umlaut mark */ + <li>">", /* greater than */ + <li>"í", /* small i, acute accent */ + <li>"î", /* small i, circumflex accent */ + <li>"ì", /* small i, grave accent */ + <li>"ï", /* small i, dieresis or umlaut mark */ + <li>"<", /* less than */ + <li>" ", /* nbsp, non breaking space */ + <li>"ñ", /* small n, tilde */ + <li>"ó", /* small o, acute accent */ + <li>"ô", /* small o, circumflex accent */ + <li>"ò", /* small o, grave accent */ + <li>"ø", /* small o, slash */ + <li>"õ", /* small o, tilde */ + <li>"ö", /* small o, dieresis or umlaut mark */ + <li>""", /* quote, '"' */ + <li>"ß", /* small sharp s, German (sz ligature) */ + <li>"þ", /* small thorn, Icelandic */ + <li>"ú", /* small u, acute accent */ + <li>"û", /* small u, circumflex accent */ + <li>"ù", /* small u, grave accent */ + <li>"ü", /* small u, dieresis or umlaut mark */ + <li>"ý", /* small y, acute accent */ + <li>"ÿ", /* small y, dieresis or umlaut mark */ +</ul> + +</body> +</html> diff --git a/test/README.txt b/test/README.txt new file mode 100644 index 0000000..21419f2 --- /dev/null +++ b/test/README.txt @@ -0,0 +1,8 @@ +ISO_LATIN1_test.html and iso-8859-1.html are for testing the translation of +HTML entities with the character sets that are selectable via the 'o'ptions +menu. + +TestComment.html and tabtest.html are for testing comment and TAB handling. + +Any other files in this directory do not represent a test suite. They +are used during program testing to track down odd and mysterious bugs. diff --git a/test/TestComment.html b/test/TestComment.html new file mode 100644 index 0000000..873169f --- /dev/null +++ b/test/TestComment.html @@ -0,0 +1,50 @@ +<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML 2.0 Level 2//EN"> +<html> +<head> +<title>HTML Comment Parser Test</title> +<link rev="made" href="mailto:pg@sweng.stortek.com"> +<base href="http://nyx10.cs.du.edu:8001/~pgilmart/TestComment.html"> +</head> + +<body> +<P> Test of the HTML/SGML comment syntax, as given in the W3 HTML Spec: +<a +href="http://www.w3.org/hypertext/WWW/MarkUp/html-spec/html-spec_3.html#SEC15"> +Comments</a> + +<P>See especially, the footnote: +<a +href="http://www.w3.org/hypertext/WWW/MarkUp/html-spec/html-spec_foot.html#FOOT10" +>(10)</a> + +<P> +Co-vary the LYK_MINIMAL and LYK_HISTORICAL command key toggles (use the +'k'eymap command to see their key bindings) to establish Valid, Minimal +or Historical comment parsing, and toggle trace mode on (Ctrl-T), to see +how comment parsing is affected. + +<P>Case 01 through Case 14 should appear +as short separate paragraphs with the case numbers aligned vertically. +Some noise characters may appear to the right as a byproduct of code present +for error recovery, but there should be no noise before each case number. + +<P> Case <!-- trivial --> | 01 | Trivial +<P> Case <!-- extra hyphens and spaces -- -- -- > | 02 | Hyphens and Spaces +<P> Case <!-- extra < < < --> | 03 | Extra LT --> --> --> +<P> Case <!-- balanced < < < > > > --> | 04 | Balanced +<P> Case <!-- extra > -- --> > still in comment --> | 05 | Extra GT +<P> Case <!-- stuff between -- and > -- still in comment --> | 06 | Stuff Inside +<P> Case <!-- Extra <!-- -- Second Comment --> | 07 | Extra Open --> --> +<P> Case <!-- New Line between -- + -- Second Comment -- + > | 08 | New line +<P> Case <!---> degenerate --> | 09 | Degenerate <P> Case <!----> | 10 | Empty +<P> Case <!-- perverse <!--> | 11 | Perverse --> --> --> +<P> Case <!-- Comment -- -- and a half > this is still in comment -- > | 12 | Multiple Comments --> --> --> +<P> Case <!> | 13 | Zero Comments +<P> Case <!-- < > +< > Still in comment --> | 14 | Last + +<P><<STRONG>Tests completed!</STRONG>> +</body> +</html> diff --git a/test/bad-html.html b/test/bad-html.html new file mode 100644 index 0000000..8c0b9dc --- /dev/null +++ b/test/bad-html.html @@ -0,0 +1,46 @@ +<!DOCTYPE html PUBLIC "-//IETF//DTD HTML 3.0//EN"> +<HTML> +<HEAD> +<TITLE>Examples of "Bad HTML" per Lynx</TITLE> +<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=iso-8859-1"> +<LINK REV="made" HREF="mailto:lynx-dev@nongnu.org"> +<LINK REV="owner" HREF="http://mail.gnu.org/mailman/listinfo/lynx-dev/"> +</HEAD> + +<BODY> +<h2>Unterminated TEXTAREA</h2> +<form action="http://localhost/cgi-bin/bogus-parms" method="get"> +<textarea name="50cols" cols="50" rows=3> +This is not empty. +</textarea> +<br> +<textarea name="50percent" cols="50%" rows=3> +This seems to have a button. +<button>Button 1</button> +</textarea> +<hr> +<input type="submit" value="Submit this form"> +<br> +<input type="reset" value="Reset this form"> +</form> + +<h2>Unterminated SELECT</h2> +<select> +<option>first option</option> +<option>second option</option> +<option>third option</option> +</notselect> +<br> +<select> +<option>first option</option> +<option>second option</option> +<option>third option</option> +</select> + +<h2>OPTION not within SELECT</h2> +<option>third option</option> + +<h2>TEXTAREA ending without starting</h2> +</textarea> + +</BODY> diff --git a/test/c1.html b/test/c1.html new file mode 100644 index 0000000..6ec70aa --- /dev/null +++ b/test/c1.html @@ -0,0 +1,63 @@ +<!DOCTYPE html PUBLIC "-//IETF//DTD HTML 3.0//EN"> +<HTML> +<HEAD> +<TITLE>Test of invalid NCRs 128-159</TITLE> +</HEAD> +<BODY><H2>Test of invalid NCRs 128-159</H2> +<P> +Authoring tools on MS Windows, in particular MS FrontPage ("WYSIWYG" HTML editor), +generate invalid <DFN>Numerical Character References</DFN> for characters +commonly found in positions 128...159 (0x80...0x9f) in Windows fonts. Although +these are valid codepoints for <em>windows-1252</em> (and other +windows-xxxx) charsets, valid NCRs always refer to the document character set +in the SGML sense, not to the character encoding scheme (or charset). For HTML, +the SGML document character set is fixed, it is always a subset of Unicode +(or ISO 10646). In Unicode and its iso-8859-1 subset, values 128...159 are +C1 control characters, they must not appear in HTML. Valid NCRs for the +intended characters use Unicode values greater than 256. +<p> +Lynx tries to interpret some of the invalid codes, by assuming that they are +windows-1252 codepoints. +<PRE> + +You may want to press '\' to view the source of this test. + +<em>Code invalid NCR <!-- --> <tab id=c>valid NCR, description</em> +<em> normal in ALT <a id=table></a> </em> + +0x80 € <IMG SRC=X ALT="€"> <tab to=c>€ #EURO SIGN +0x81  <IMG SRC=X ALT=""> <!----> #NOT USED +0x82 ‚ <IMG SRC=X ALT="‚"> <tab to=c>‚ #SINGLE LOW-9 QUOTATION MARK +0x83 ƒ <IMG SRC=X ALT="ƒ"> <tab to=c>ƒ #LATIN SMALL LETTER F WITH HOOK +0x84 „ <IMG SRC=X ALT="„"> <tab to=c>„ #DOUBLE LOW-9 QUOTATION MARK +0x85 … <IMG SRC=X ALT="…"> <tab to=c>… #HORIZONTAL ELLIPSIS +0x86 † <IMG SRC=X ALT="†"> <tab to=c>† #DAGGER +0x87 ‡ <IMG SRC=X ALT="‡"> <tab to=c>‡ #DOUBLE DAGGER +0x88 ˆ <IMG SRC=X ALT="ˆ"> <tab to=c>ˆ #MODIFIER LETTER CIRCUMFLEX ACCENT +0x89 ‰ <IMG SRC=X ALT="‰"> <tab to=c>‰ #PER MILLE SIGN +0x8a Š <IMG SRC=X ALT="Š"> <tab to=c>Š #LATIN CAPITAL LETTER S WITH CARON +0x8b ‹ <IMG SRC=X ALT="‹"> <tab to=c>‹ #SINGLE LEFT-POINTING ANGLE QUOTATION MARK +0x8c Œ <IMG SRC=X ALT="Œ"> <tab to=c>Œ #LATIN CAPITAL LIGATURE OE +0x8d  <IMG SRC=X ALT=""> <!----> #NOT USED +0x8e Ž <IMG SRC=X ALT="Ž"> <!--Ž--> #NOT USED +0x8f  <IMG SRC=X ALT=""> <!----> #NOT USED +0x90  <IMG SRC=X ALT=""> <!----> #NOT USED +0x91 ‘ <IMG SRC=X ALT="‘"> <tab to=c>‘ #LEFT SINGLE QUOTATION MARK +0x92 ’ <IMG SRC=X ALT="’"> <tab to=c>’ #RIGHT SINGLE QUOTATION MARK +0x93 “ <IMG SRC=X ALT="“"> <tab to=c>“ #LEFT DOUBLE QUOTATION MARK +0x94 ” <IMG SRC=X ALT="”"> <tab to=c>” #RIGHT DOUBLE QUOTATION MARK +0x95 • <IMG SRC=X ALT="•"> <tab to=c>• #BULLET +0x96 – <IMG SRC=X ALT="–"> <tab to=c>– #EN DASH +0x97 — <IMG SRC=X ALT="—"> <tab to=c>— #EM DASH +0x98 ˜ <IMG SRC=X ALT="˜"> <tab to=c>˜ #SMALL TILDE +0x99 ™ <IMG SRC=X ALT="™"> <tab to=c>™ #TRADE MARK SIGN +0x9a š <IMG SRC=X ALT="š"> <tab to=c>š #LATIN SMALL LETTER S WITH CARON +0x9b › <IMG SRC=X ALT="›"> <tab to=c>› #SINGLE RIGHT-POINTING ANGLE QUOTATION MARK +0x9c œ <IMG SRC=X ALT="œ"> <tab to=c>œ #LATIN SMALL LIGATURE OE +0x9d  <IMG SRC=X ALT=""> <!----> #NOT USED +0x9e ž <IMG SRC=X ALT="ž"> <!--ž--> #NOT USED +0x9f Ÿ <IMG SRC=X ALT="Ÿ"> <tab to=c>Ÿ #LATIN CAPITAL LETTER Y WITH DIAERESIS + +</PRE> +</BODY> +</HTML> diff --git a/test/circle.html b/test/circle.html new file mode 100644 index 0000000..dedf305 --- /dev/null +++ b/test/circle.html @@ -0,0 +1,14 @@ +<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN"> + +<html> +<head> + <meta name="generator" content= + "HTML Tidy for Linux (vers 25 March 2009), see www.w3.org"> + + <title>Test ImageMap - circle</title> +</head> + +<body> + <p>CIRCLE</p> +</body> +</html> diff --git a/test/cp-1252.html b/test/cp-1252.html new file mode 100644 index 0000000..387c92a --- /dev/null +++ b/test/cp-1252.html @@ -0,0 +1,178 @@ +<!DOCTYPE html PUBLIC "-//IETF//DTD HTML 3.0//EN"> +<HTML> +<HEAD> +<TITLE>Character table for cp-1252</TITLE> +<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=cp-1252"> +<LINK REV="made" HREF="mailto:lynx-dev@nongnu.org"> +<LINK REV="owner" HREF="http://mail.gnu.org/mailman/listinfo/lynx-dev/"> +<LINK REL="sibling" HREF="iso-8859-1.html" TITLE="iso-8859-1 test"> +<LINK REL="sibling" HREF="ALT88592.html" TITLE="iso-8859-2 ALT test"> +</HEAD> + +<BODY> + +<H1 ALIGN=center>cp-1252 table</H1> + +<PRE> +Description Code Entity name +=================================== ============ ============== +quotation mark &#34; --> " &quot; --> " +ampersand &#38; --> & &amp; --> & +less-than sign &#60; --> < &lt; --> < +greater-than sign &#62; --> > &gt; --> > + +Description Char Code Entity name +=================================== ==== ============ ============== +euro sign € &128; --> € +single low-9 quotation mark ‚ &130; --> ‚ +latin small letter f with hook ƒ &131; --> ƒ +double low-9 quotation mark „ &132; --> „ +horizontal ellipsis … &133; --> … +dagger † &134; --> † +double dagger ‡ &135; --> ‡ +modifier letter circumflex accent ˆ &136; --> ˆ +per mille sign ‰ &137; --> ‰ +latin capital letter s with caron Š &138; --> Š +single left-pointing angle quote mark ‹ &139; --> ‹ +latin capital ligature oe Œ &140; --> Œ +latin capital letter z with caron Ž &142; --> Ž + +left single quotation mark ‘ &145; --> ‘ +right single quotation mark ’ &146; --> ’ +left double quotation mark “ &147; --> “ +right double quotation mark ” &148; --> ” +bullet • &149; --> • +en dash – &150; --> – +em dash — &151; --> — +small tilde ˜ &152; --> ˜ +trade mark sign ™ &153; --> ™ +latin small letter s with caron š &154; --> š +single right-pointing angle quote mark › &155; --> › +latin small ligature oe œ &156; --> œ +latin small letter z with caron ž &158; --> ž +latin capital letter y with diaeresis Ÿ &159; --> Ÿ + +non-breaking space &#160; -->   &nbsp; --> +inverted exclamation ¡ &#161; --> ¡ &iexcl; --> ¡ +cent sign ¢ &#162; --> ¢ &cent; --> ¢ +pound sterling £ &#163; --> £ &pound; --> £ +general currency sign ¤ &#164; --> ¤ &curren; --> ¤ +yen sign ¥ &#165; --> ¥ &yen; --> ¥ +broken vertical bar ¦ &#166; --> ¦ &brvbar; --> ¦ +section sign § &#167; --> § &sect; --> § +umlaut (dieresis) ¨ &#168; --> ¨ &uml; --> ¨ +copyright © &#169; --> © &copy; --> © +feminine ordinal ª &#170; --> ª &ordf; --> ª +left angle quote, guillemotleft « &#171; --> « &laquo; --> « +not sign ¬ &#172; --> ¬ &not; --> ¬ +soft hyphen &#173; --> ­ &shy; --> ­ +registered trademark ® &#174; --> ® &reg; --> ® +macron accent ¯ &#175; --> ¯ &macr; --> ¯ + +degree sign ° &#176; --> ° &deg; --> ° +plus or minus ± &#177; --> ± &plusmn; --> ± +superscript two ² &#178; --> ² &sup2; --> ² +superscript three ³ &#179; --> ³ &sup3; --> ³ +acute accent ´ &#180; --> ´ &acute; --> ´ +micro sign µ &#181; --> µ &micro; --> µ +paragraph sign ¶ &#182; --> ¶ &para; --> ¶ +middle dot · &#183; --> · &middot; --> · +cedilla ¸ &#184; --> ¸ &cedil; --> ¸ +superscript one ¹ &#185; --> ¹ &sup1; --> ¹ +masculine ordinal º &#186; --> º &ordm; --> º +right angle quote, guillemotright » &#187; --> » &raquo; --> » +vulgar fraction one-quarter ¼ &#188; --> ¼ &frac14; --> ¼ +vulgar fraction one-half ½ &#189; --> ½ &frac12; --> ½ +vulgar fraction three-fourths ¾ &#190; --> ¾ &frac34; --> ¾ +inverted question mark ¿ &#191; --> ¿ &iquest; --> ¿ + +latin capital letter a with grave À &#192; --> À &Agrave; --> À +latin capital letter a with acute Á &#193; --> Á &Aacute; --> Á +latin capital letter a with circumflex  &#194; -->  &Acirc; -->  +latin capital letter a with tilde à &#195; --> à &Atilde; --> à +latin capital letter a with diaeresis Ä &#196; --> Ä &Auml; --> Ä +latin capital letter a with ring above Å &#197; --> Å &Aring; --> Å +latin capital letter ae Æ &#198; --> Æ &AElig; --> Æ +latin capital letter c with cedilla Ç &#199; --> Ç &Ccedil; --> Ç +latin capital letter e with grave È &#200; --> È &Egrave; --> È +latin capital letter e with acute É &#201; --> É &Eacute; --> É +latin capital letter e with circumflex Ê &#202; --> Ê &Ecirc; --> Ê +latin capital letter e with diaeresis Ë &#203; --> Ë &Euml; --> Ë +latin capital letter i with grave Ì &#204; --> Ì &Igrave; --> Ì +latin capital letter i with acute Í &#205; --> Í &Iacute; --> Í +latin capital letter i with circumflex Î &#206; --> Î &Icirc; --> Î +latin capital letter i with diaeresis Ï &#207; --> Ï &Iuml; --> Ï + +latin capital letter eth Ð &#208; --> Ð &ETH; --> Ð +latin capital letter n with tilde Ñ &#209; --> Ñ &Ntilde; --> Ñ +latin capital letter o with grave Ò &#210; --> Ò &Ograve; --> Ò +latin capital letter o with acute Ó &#211; --> Ó &Oacute; --> Ó +latin capital letter o with circumflex Ô &#212; --> Ô &Ocirc; --> Ô +latin capital letter o with tilde Õ &#213; --> Õ &Otilde; --> Õ +latin capital letter o with diaeresis Ö &#214; --> Ö &Ouml; --> Ö +multiplication sign × &#215; --> × &times; --> × +latin capital letter o with stroke Ø &#216; --> Ø &Oslash; --> Ø +latin capital letter u with grave Ù &#217; --> Ù &Ugrave; --> Ù +latin capital letter u with acute Ú &#218; --> Ú &Uacute; --> Ú +latin capital letter u with circumflex Û &#219; --> Û &Ucirc; --> Û +latin capital letter u with diaeresis Ü &#220; --> Ü &Uuml; --> Ü +latin capital letter y with acute Ý &#221; --> Ý &Yacute; --> Ý +latin capital letter thorn Þ &#222; --> Þ &THORN; --> Þ +latin small letter sharp s ß &#223; --> ß &szlig; --> ß + +latin small letter a with grave à &#224; --> à &agrave; --> à +latin small letter a with acute á &#225; --> á &aacute; --> á +latin small letter a with circumflex â &#226; --> â &acirc; --> â +latin small letter a with tilde ã &#227; --> ã &atilde; --> ã +latin small letter a with diaeresis ä &#228; --> ä &auml; --> ä +latin small letter a with ring above å &#229; --> å &aring; --> å +latin small letter ae æ &#230; --> æ &aelig; --> æ +latin small letter c with cedilla ç &#231; --> ç &ccedil; --> ç +latin small letter e with grave è &#232; --> è &egrave; --> è +latin small letter e with acute é &#233; --> é &eacute; --> é +latin small letter e with circumflex ê &#234; --> ê &ecirc; --> ê +latin small letter e with diaeresis ë &#235; --> ë &euml; --> ë +latin small letter i with grave ì &#236; --> ì &igrave; --> ì +latin small letter i with acute í &#237; --> í &iacute; --> í +latin small letter i with circumflex î &#238; --> î &icirc; --> î +latin small letter i with diaeresis ï &#239; --> ï &iuml; --> ï + +latin small letter eth ð &#240; --> ð &eth; --> ð +latin small letter n with tilde ñ &#241; --> ñ &ntilde; --> ñ +latin small letter o with grave ò &#242; --> ò &ograve; --> ò +latin small letter o with acute ó &#243; --> ó &oacute; --> ó +latin small letter o with circumflex ô &#244; --> ô &ocirc; --> ô +latin small letter o with tilde õ &#245; --> õ &otilde; --> õ +latin small letter o with diaeresis ö &#246; --> ö &ouml; --> ö +division sign ÷ &#247; --> ÷ &divide; --> ÷ +latin small letter o with stroke ø &#248; --> ø &oslash; --> ø +latin small letter u with grave ù &#249; --> ù &ugrave; --> ù +latin small letter u with acute ú &#250; --> ú &uacute; --> ú +latin small letter u with circumflex û &#251; --> û &ucirc; --> û +latin small letter u with diaeresis ü &#252; --> ü &uuml; --> ü +latin small letter y with acute ý &#253; --> ý &yacute; --> ý +latin small letter thorn þ &#254; --> þ &thorn; --> þ +latin small letter y with diaeresis {ÿ} {&#255;}-->{ÿ} {&yuml;} -->{ÿ} + +Some other characters of interest Char Code Entity name +=================================== ==== ============ ============== +capital AE diphthong (ligature) N/A &#198; --> Æ &AElig; --> Æ +small ae diphthong (ligature) N/A &#230; --> æ &aelig; --> æ +capital OE ligature N/A {&#338;}-->{Œ} {&OElig;} -->{Œ} +small oe ligature N/A {&#339;}-->{œ} {&oelig;} -->{œ} +copyright N/A &#169; --> © &copy; --> © +registered trademark N/A &#174; --> ® &reg; --> ® +trademark sign N/A &#8482;--> ™ &trade; --> ™ +em space N/A [&#8195;]->[ ] [&emsp;] -->[ ] +en space N/A [&#8194;]->[ ] [&ensp;] -->[ ] +1/3-em space N/A [&#8196;]->[ ] [&emsp13;] -->[ ] +1/4-em space N/A [&#8197;]->[ ] [&emsp14;] -->[ ] +thin space N/A [&#8201;]->[ ] [&thinsp;]-->[ ] +hair space N/A [&#8202;]->[ ] [&hairsp;]-->[ ] +em dash N/A [&#8212;]->[—] [&mdash;] -->[—] +en dash N/A [&#8211;]->[–] [&ndash;] -->[–] + +</PRE> + +</BODY> +</HTML> diff --git a/test/cp-1252a.html b/test/cp-1252a.html new file mode 100644 index 0000000..99800a3 --- /dev/null +++ b/test/cp-1252a.html @@ -0,0 +1,183 @@ +<!DOCTYPE html PUBLIC "-//IETF//DTD HTML 3.0//EN"> +<HTML> +<HEAD> +<TITLE>Character table for cp-1252</TITLE> +<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=cp-1252"> +<LINK REV="made" HREF="mailto:lynx-dev@nongnu.org"> +<LINK REV="owner" HREF="http://mail.gnu.org/mailman/listinfo/lynx-dev/"> +<LINK REL="sibling" HREF="iso-8859-1.html" TITLE="iso-8859-1 test"> +<LINK REL="sibling" HREF="ALT88592.html" TITLE="iso-8859-2 ALT test"> +</HEAD> + +<BODY> + +<H1 ALIGN=center>cp-1252 table</H1> + +<PRE> +Description Code Entity name +=================================== ============ ============== +quotation mark &#34; --> " &quot; --> " +ampersand &#38; --> & &amp; --> & +less-than sign &#60; --> < &lt; --> < +greater-than sign &#62; --> > &gt; --> > + +Description Char Code Entity name +=================================== ==== ============ ============== +euro sign € &128; --> € +undefined &129; -->  +single low-9 quotation mark ‚ &130; --> ‚ +latin small letter f with hook ƒ &131; --> ƒ +double low-9 quotation mark „ &132; --> „ +horizontal ellipsis … &133; --> … +dagger † &134; --> † +double dagger ‡ &135; --> ‡ +modifier letter circumflex accent ˆ &136; --> ˆ +per mille sign ‰ &137; --> ‰ +latin capital letter s with caron Š &138; --> Š +single left-pointing angle quote mark ‹ &139; --> ‹ +latin capital ligature oe Œ &140; --> Œ +undefined &141; -->  +latin capital letter z with caron Ž &142; --> Ž +undefined &143; -->  + +undefined &144; -->  +left single quotation mark ‘ &145; --> ‘ +right single quotation mark ’ &146; --> ’ +left double quotation mark “ &147; --> “ +right double quotation mark ” &148; --> ” +bullet • &149; --> • +en dash – &150; --> – +em dash — &151; --> — +small tilde ˜ &152; --> ˜ +trade mark sign ™ &153; --> ™ +latin small letter s with caron š &154; --> š +single right-pointing angle quote mark › &155; --> › +latin small ligature oe œ &156; --> œ +undefined &157; -->  +latin small letter z with caron ž &158; --> ž +latin capital letter y with diaeresis Ÿ &159; --> Ÿ + +non-breaking space &#160; -->   &nbsp; --> +inverted exclamation ¡ &#161; --> ¡ &iexcl; --> ¡ +cent sign ¢ &#162; --> ¢ &cent; --> ¢ +pound sterling £ &#163; --> £ &pound; --> £ +general currency sign ¤ &#164; --> ¤ &curren; --> ¤ +yen sign ¥ &#165; --> ¥ &yen; --> ¥ +broken vertical bar ¦ &#166; --> ¦ &brvbar; --> ¦ +section sign § &#167; --> § &sect; --> § +umlaut (dieresis) ¨ &#168; --> ¨ &uml; --> ¨ +copyright © &#169; --> © &copy; --> © +feminine ordinal ª &#170; --> ª &ordf; --> ª +left angle quote, guillemotleft « &#171; --> « &laquo; --> « +not sign ¬ &#172; --> ¬ &not; --> ¬ +soft hyphen &#173; --> ­ &shy; --> ­ +registered trademark ® &#174; --> ® &reg; --> ® +macron accent ¯ &#175; --> ¯ &macr; --> ¯ + +degree sign ° &#176; --> ° &deg; --> ° +plus or minus ± &#177; --> ± &plusmn; --> ± +superscript two ² &#178; --> ² &sup2; --> ² +superscript three ³ &#179; --> ³ &sup3; --> ³ +acute accent ´ &#180; --> ´ &acute; --> ´ +micro sign µ &#181; --> µ &micro; --> µ +paragraph sign ¶ &#182; --> ¶ &para; --> ¶ +middle dot · &#183; --> · &middot; --> · +cedilla ¸ &#184; --> ¸ &cedil; --> ¸ +superscript one ¹ &#185; --> ¹ &sup1; --> ¹ +masculine ordinal º &#186; --> º &ordm; --> º +right angle quote, guillemotright » &#187; --> » &raquo; --> » +vulgar fraction one-quarter ¼ &#188; --> ¼ &frac14; --> ¼ +vulgar fraction one-half ½ &#189; --> ½ &frac12; --> ½ +vulgar fraction three-fourths ¾ &#190; --> ¾ &frac34; --> ¾ +inverted question mark ¿ &#191; --> ¿ &iquest; --> ¿ + +latin capital letter a with grave À &#192; --> À &Agrave; --> À +latin capital letter a with acute Á &#193; --> Á &Aacute; --> Á +latin capital letter a with circumflex  &#194; -->  &Acirc; -->  +latin capital letter a with tilde à &#195; --> à &Atilde; --> à +latin capital letter a with diaeresis Ä &#196; --> Ä &Auml; --> Ä +latin capital letter a with ring above Å &#197; --> Å &Aring; --> Å +latin capital letter ae Æ &#198; --> Æ &AElig; --> Æ +latin capital letter c with cedilla Ç &#199; --> Ç &Ccedil; --> Ç +latin capital letter e with grave È &#200; --> È &Egrave; --> È +latin capital letter e with acute É &#201; --> É &Eacute; --> É +latin capital letter e with circumflex Ê &#202; --> Ê &Ecirc; --> Ê +latin capital letter e with diaeresis Ë &#203; --> Ë &Euml; --> Ë +latin capital letter i with grave Ì &#204; --> Ì &Igrave; --> Ì +latin capital letter i with acute Í &#205; --> Í &Iacute; --> Í +latin capital letter i with circumflex Î &#206; --> Î &Icirc; --> Î +latin capital letter i with diaeresis Ï &#207; --> Ï &Iuml; --> Ï + +latin capital letter eth Ð &#208; --> Ð &ETH; --> Ð +latin capital letter n with tilde Ñ &#209; --> Ñ &Ntilde; --> Ñ +latin capital letter o with grave Ò &#210; --> Ò &Ograve; --> Ò +latin capital letter o with acute Ó &#211; --> Ó &Oacute; --> Ó +latin capital letter o with circumflex Ô &#212; --> Ô &Ocirc; --> Ô +latin capital letter o with tilde Õ &#213; --> Õ &Otilde; --> Õ +latin capital letter o with diaeresis Ö &#214; --> Ö &Ouml; --> Ö +multiplication sign × &#215; --> × &times; --> × +latin capital letter o with stroke Ø &#216; --> Ø &Oslash; --> Ø +latin capital letter u with grave Ù &#217; --> Ù &Ugrave; --> Ù +latin capital letter u with acute Ú &#218; --> Ú &Uacute; --> Ú +latin capital letter u with circumflex Û &#219; --> Û &Ucirc; --> Û +latin capital letter u with diaeresis Ü &#220; --> Ü &Uuml; --> Ü +latin capital letter y with acute Ý &#221; --> Ý &Yacute; --> Ý +latin capital letter thorn Þ &#222; --> Þ &THORN; --> Þ +latin small letter sharp s ß &#223; --> ß &szlig; --> ß + +latin small letter a with grave à &#224; --> à &agrave; --> à +latin small letter a with acute á &#225; --> á &aacute; --> á +latin small letter a with circumflex â &#226; --> â &acirc; --> â +latin small letter a with tilde ã &#227; --> ã &atilde; --> ã +latin small letter a with diaeresis ä &#228; --> ä &auml; --> ä +latin small letter a with ring above å &#229; --> å &aring; --> å +latin small letter ae æ &#230; --> æ &aelig; --> æ +latin small letter c with cedilla ç &#231; --> ç &ccedil; --> ç +latin small letter e with grave è &#232; --> è &egrave; --> è +latin small letter e with acute é &#233; --> é &eacute; --> é +latin small letter e with circumflex ê &#234; --> ê &ecirc; --> ê +latin small letter e with diaeresis ë &#235; --> ë &euml; --> ë +latin small letter i with grave ì &#236; --> ì &igrave; --> ì +latin small letter i with acute í &#237; --> í &iacute; --> í +latin small letter i with circumflex î &#238; --> î &icirc; --> î +latin small letter i with diaeresis ï &#239; --> ï &iuml; --> ï + +latin small letter eth ð &#240; --> ð &eth; --> ð +latin small letter n with tilde ñ &#241; --> ñ &ntilde; --> ñ +latin small letter o with grave ò &#242; --> ò &ograve; --> ò +latin small letter o with acute ó &#243; --> ó &oacute; --> ó +latin small letter o with circumflex ô &#244; --> ô &ocirc; --> ô +latin small letter o with tilde õ &#245; --> õ &otilde; --> õ +latin small letter o with diaeresis ö &#246; --> ö &ouml; --> ö +division sign ÷ &#247; --> ÷ &divide; --> ÷ +latin small letter o with stroke ø &#248; --> ø &oslash; --> ø +latin small letter u with grave ù &#249; --> ù &ugrave; --> ù +latin small letter u with acute ú &#250; --> ú &uacute; --> ú +latin small letter u with circumflex û &#251; --> û &ucirc; --> û +latin small letter u with diaeresis ü &#252; --> ü &uuml; --> ü +latin small letter y with acute ý &#253; --> ý &yacute; --> ý +latin small letter thorn þ &#254; --> þ &thorn; --> þ +latin small letter y with diaeresis {ÿ} {&#255;}-->{ÿ} {&yuml;} -->{ÿ} + +Some other characters of interest Char Code Entity name +=================================== ==== ============ ============== +capital AE diphthong (ligature) N/A &#198; --> Æ &AElig; --> Æ +small ae diphthong (ligature) N/A &#230; --> æ &aelig; --> æ +capital OE ligature N/A {&#338;}-->{Œ} {&OElig;} -->{Œ} +small oe ligature N/A {&#339;}-->{œ} {&oelig;} -->{œ} +copyright N/A &#169; --> © &copy; --> © +registered trademark N/A &#174; --> ® &reg; --> ® +trademark sign N/A &#8482;--> ™ &trade; --> ™ +em space N/A [&#8195;]->[ ] [&emsp;] -->[ ] +en space N/A [&#8194;]->[ ] [&ensp;] -->[ ] +1/3-em space N/A [&#8196;]->[ ] [&emsp13;] -->[ ] +1/4-em space N/A [&#8197;]->[ ] [&emsp14;] -->[ ] +thin space N/A [&#8201;]->[ ] [&thinsp;]-->[ ] +hair space N/A [&#8202;]->[ ] [&hairsp;]-->[ ] +em dash N/A [&#8212;]->[—] [&mdash;] -->[—] +en dash N/A [&#8211;]->[–] [&ndash;] -->[–] + +</PRE> + +</BODY> +</HTML> diff --git a/test/image.jpg b/test/image.jpg Binary files differnew file mode 100644 index 0000000..5c102b8 --- /dev/null +++ b/test/image.jpg diff --git a/test/iso-8859-1.html b/test/iso-8859-1.html new file mode 100644 index 0000000..b9349fa --- /dev/null +++ b/test/iso-8859-1.html @@ -0,0 +1,241 @@ +<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"> +<!-- X-URL: http://www.ramsch.org/martin/uni/fmi-hp/iso8859-1.html --> +<!-- Date: Tue, 28 Dec 2004 20:24:09 GMT --> +<!-- Last-Modified: Mon, 15 May 2000 09:37:37 GMT --> +<HTML> +<HEAD> +<TITLE>Martin Ramsch - iso8859-1 table</TITLE> +<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=iso-8859-1"> +<BASE HREF="http://www.ramsch.org/martin/uni/fmi-hp/iso8859-1.html"> +</HEAD> + +<BODY> + +<H1 ALIGN=center>iso8859-1 table</H1> + +<PRE> +Description Code Entity name +=================================== ============ ============== +quotation mark &#34; --> " &quot; --> " +ampersand &#38; --> & &amp; --> & +less-than sign &#60; --> < &lt; --> < +greater-than sign &#62; --> > &gt; --> > + +Description Char Code Entity name +=================================== ==== ============ ============== +non-breaking space &#160; -->   &nbsp; --> +inverted exclamation ¡ &#161; --> ¡ &iexcl; --> ¡ +cent sign ¢ &#162; --> ¢ &cent; --> ¢ +pound sterling £ &#163; --> £ &pound; --> £ +general currency sign ¤ &#164; --> ¤ &curren; --> ¤ +yen sign ¥ &#165; --> ¥ &yen; --> ¥ +broken vertical bar ¦ &#166; --> ¦ &brvbar; --> ¦ + Non-standard &brkbar; --> &brkbar; +section sign § &#167; --> § &sect; --> § +umlaut (dieresis) ¨ &#168; --> ¨ &uml; --> ¨ + Non-standard &die; --> ¨ +copyright © &#169; --> © &copy; --> © +feminine ordinal ª &#170; --> ª &ordf; --> ª +left angle quote, guillemotleft « &#171; --> « &laquo; --> « +not sign ¬ &#172; --> ¬ &not; --> ¬ +soft hyphen &#173; --> ­ &shy; --> ­ +registered trademark ® &#174; --> ® &reg; --> ® +macron accent ¯ &#175; --> ¯ &macr; --> ¯ + Non-standard &hibar; --> &hibar; +degree sign ° &#176; --> ° &deg; --> ° +plus or minus ± &#177; --> ± &plusmn; --> ± +superscript two ² &#178; --> ² &sup2; --> ² +superscript three ³ &#179; --> ³ &sup3; --> ³ +acute accent ´ &#180; --> ´ &acute; --> ´ +micro sign µ &#181; --> µ &micro; --> µ +paragraph sign ¶ &#182; --> ¶ &para; --> ¶ +middle dot · &#183; --> · &middot; --> · +cedilla ¸ &#184; --> ¸ &cedil; --> ¸ +superscript one ¹ &#185; --> ¹ &sup1; --> ¹ +masculine ordinal º &#186; --> º &ordm; --> º +right angle quote, guillemotright » &#187; --> » &raquo; --> » +fraction one-fourth ¼ &#188; --> ¼ &frac14; --> ¼ +fraction one-half ½ &#189; --> ½ &frac12; --> ½ +fraction three-fourths ¾ &#190; --> ¾ &frac34; --> ¾ +inverted question mark ¿ &#191; --> ¿ &iquest; --> ¿ +capital A, grave accent À &#192; --> À &Agrave; --> À +capital A, acute accent Á &#193; --> Á &Aacute; --> Á +capital A, circumflex accent  &#194; -->  &Acirc; -->  +capital A, tilde à &#195; --> à &Atilde; --> à +capital A, dieresis or umlaut mark Ä &#196; --> Ä &Auml; --> Ä +capital A, ring Å &#197; --> Å &Aring; --> Å +capital AE diphthong (ligature) Æ &#198; --> Æ &AElig; --> Æ +capital C, cedilla Ç &#199; --> Ç &Ccedil; --> Ç +capital E, grave accent È &#200; --> È &Egrave; --> È +capital E, acute accent É &#201; --> É &Eacute; --> É +capital E, circumflex accent Ê &#202; --> Ê &Ecirc; --> Ê +capital E, dieresis or umlaut mark Ë &#203; --> Ë &Euml; --> Ë +capital I, grave accent Ì &#204; --> Ì &Igrave; --> Ì +capital I, acute accent Í &#205; --> Í &Iacute; --> Í +capital I, circumflex accent Î &#206; --> Î &Icirc; --> Î +capital I, dieresis or umlaut mark Ï &#207; --> Ï &Iuml; --> Ï +capital Eth, Icelandic Ð &#208; --> Ð &ETH; --> Ð + Non-standard &Dstrok; --> Đ +capital N, tilde Ñ &#209; --> Ñ &Ntilde; --> Ñ +capital O, grave accent Ò &#210; --> Ò &Ograve; --> Ò +capital O, acute accent Ó &#211; --> Ó &Oacute; --> Ó +capital O, circumflex accent Ô &#212; --> Ô &Ocirc; --> Ô +capital O, tilde Õ &#213; --> Õ &Otilde; --> Õ +capital O, dieresis or umlaut mark Ö &#214; --> Ö &Ouml; --> Ö +multiply sign × &#215; --> × &times; --> × +capital O, slash Ø &#216; --> Ø &Oslash; --> Ø +capital U, grave accent Ù &#217; --> Ù &Ugrave; --> Ù +capital U, acute accent Ú &#218; --> Ú &Uacute; --> Ú +capital U, circumflex accent Û &#219; --> Û &Ucirc; --> Û +capital U, dieresis or umlaut mark Ü &#220; --> Ü &Uuml; --> Ü +capital Y, acute accent Ý &#221; --> Ý &Yacute; --> Ý +capital THORN, Icelandic Þ &#222; --> Þ &THORN; --> Þ +small sharp s, German (sz ligature) ß &#223; --> ß &szlig; --> ß +small a, grave accent à &#224; --> à &agrave; --> à +small a, acute accent á &#225; --> á &aacute; --> á +small a, circumflex accent â &#226; --> â &acirc; --> â +small a, tilde ã &#227; --> ã &atilde; --> ã +small a, dieresis or umlaut mark ä &#228; --> ä &auml; --> ä +small a, ring å &#229; --> å &aring; --> å +small ae diphthong (ligature) æ &#230; --> æ &aelig; --> æ +small c, cedilla ç &#231; --> ç &ccedil; --> ç +small e, grave accent è &#232; --> è &egrave; --> è +small e, acute accent é &#233; --> é &eacute; --> é +small e, circumflex accent ê &#234; --> ê &ecirc; --> ê +small e, dieresis or umlaut mark ë &#235; --> ë &euml; --> ë +small i, grave accent ì &#236; --> ì &igrave; --> ì +small i, acute accent í &#237; --> í &iacute; --> í +small i, circumflex accent î &#238; --> î &icirc; --> î +small i, dieresis or umlaut mark ï &#239; --> ï &iuml; --> ï +small eth, Icelandic ð &#240; --> ð &eth; --> ð +small n, tilde ñ &#241; --> ñ &ntilde; --> ñ +small o, grave accent ò &#242; --> ò &ograve; --> ò +small o, acute accent ó &#243; --> ó &oacute; --> ó +small o, circumflex accent ô &#244; --> ô &ocirc; --> ô +small o, tilde õ &#245; --> õ &otilde; --> õ +small o, dieresis or umlaut mark ö &#246; --> ö &ouml; --> ö +division sign ÷ &#247; --> ÷ &divide; --> ÷ +small o, slash ø &#248; --> ø &oslash; --> ø +small u, grave accent ù &#249; --> ù &ugrave; --> ù +small u, acute accent ú &#250; --> ú &uacute; --> ú +small u, circumflex accent û &#251; --> û &ucirc; --> û +small u, dieresis or umlaut mark ü &#252; --> ü &uuml; --> ü +small y, acute accent ý &#253; --> ý &yacute; --> ý +small thorn, Icelandic þ &#254; --> þ &thorn; --> þ +small y, dieresis or umlaut mark ÿ &#255; --> ÿ &yuml; --> ÿ +</PRE> +<!-- removed: second /PRE, a hack for HotJava 1.0 preBeta 1 --> +<HR> + +<STRONG>How to read</STRONG> this table. The columns are +<DL COMPACT> +<DT>1st:<DD>textual <EM>description</EM> of the character +<DT>2nd:<DD>character inserted directly into the HTML page as <EM>one + byte</EM> +<DT>3rd:<DD>character written as <EM>numeric HTML entity</EM>, in the + format:<BR>"how it looks literally" <CODE>--></CODE> + "what your browser does with it" +<DT>4th:<DD>character written as <EM>symbolic HTML entity</EM>, in the + format:<BR>"how it looks literally" <CODE>--></CODE> + "what your browser does with it" +</DL> + +So for example, if you see something like "<CODE>&divide; --> +&divide;</CODE>" in the 4th column, this means your browser +doesn't know about the entity name "divide" and just puts it +literally. + +<P> +<STRONG>This table</STRONG> grew out of an overview of the "ISO +Latin-1 Character Set" overview related to the Hyper-G Text Format +(<A HREF="http://www.hyperwave.de/HTFdoc">HTF</A>). + +The entity names <CODE>&brkbar;</CODE> and <CODE>&Dstrok;</CODE> +seem to be unique to HTF. + +The entity name <CODE>&hibar;</CODE> has been supported by X Mosaic +but seems to be replaced with <CODE>&macr;</CODE>. + +The entity names <CODE>&uml;</CODE> and <CODE>&die;</CODE> should +be equivalent. + +<P><STRONG>The standards stuff:</STRONG> +The +<A HREF="http://www.w3.org/hypertext/WWW/MarkUp/html-spec/">HTML 2.0 Standard</A> +includes a section on +<A HREF="http://www.w3.org/hypertext/WWW/MarkUp/html-spec/html-spec_9.html#SEC99">Character Entity Sets</A> +and an overview on the +<A HREF="http://www.w3.org/hypertext/WWW/MarkUp/html-spec/html-spec_13.html#SEC106">HTML Coded Character Set</A> +(The entity names are derived from <A HREF="http://www.ucc.ie/info/net/isolat1.html">ISO 8879</A>). +<BR> + +Or have a look at the +<A HREF="http://www.w3.org/hypertext/WWW/MarkUp/html3/latin1.html">Latin-1 Character Entities</A> +as listed in an draft for the +<A HREF="http://www.w3.org/hypertext/WWW/MarkUp/html3/CoverPage.html">HTML 3.0 specification</A>. +<BR> + +The +<A HREF="http://www.w3.org/hypertext/WWW/MarkUp/HTMLPlus/htmlplus_59.html">Appendix II</A> +of CERN's +<A HREF="http://www.w3.org/hypertext/WWW/MarkUp/HTMLPlus/htmlplus_1.html">HTML+ Discussion Document</A> +contains a +<A HREF="http://www.w3.org/hypertext/WWW/MarkUp/HTMLPlus/htmlplus_table.ps">table</A> +(in PostScript format) of the proposed character entities for HTML+ and their +corresponding character codes for Unicode and the Adobe Latin-1 & Symbol +character sets. +<P> + +<STRONG>Please note</STRONG> that there is nothing wrong with using +characters of ISO Latin-1 above 127: the normal transmission protocol +for the WWW, +<A HREF="http://www.w3.org/pub/WWW/Protocols/rfc1945/rfc1945">HTTP/1.0</A>, +uses the 8bit ISO latin-1 as default encoding. +(Thanks to Roman +Czyborra for pointing this out!) +<P> + +<STRONG>Other information:</STRONG> +<UL> + +<LI><STRONG>Kevin J. Brewer</STRONG> has done two very good pages on the subject: + <UL> + <LI><A HREF="http://www.bbsinc.com/iso8859.html">ASCII - ISO 8859-1 (Latin-1) with HTML 3.0 Entities Table</A> and + <LI><A HREF="http://www.bbsinc.com/iso8879.html">ISO 8879 Entities Gopher Menu</A> + </UL> + +<LI>The excellent overview on the series of + <A HREF="http://czyborra.com/charsets/iso8859.html">ISO 8859 + character sets</A> compiled by Roman Czyborra. + +<LI>Also have a look on Alan Flavell's page of + <A HREF="http://ppewww.ph.gla.ac.uk/%7Eflavell/iso8859/iso8859-pointers.html">pointers + to information about ISO8859</A>. It's written very well! + +<LI>Maybe also of interest to you is the + <A HREF="ftp://ftp.vlsivie.tuwien.ac.at/pub/8bit/FAQ-ISO-8859-1">ISO + 8859-1 FAQ</A> by Michael Gschwind + (<A HREF="mailto:mike@vlsivie.tuwien.ac.at">mike@vlsivie.tuwien.ac.at</A>), + part of his page on + <A HREF="http://www.vlsivie.tuwien.ac.at/mike/i18n.html">Internationalization</A>. + +<LI>For users of X11R5 on SunOS systems: the + <A HREF="Compose.txt">table over the compose combinations</A> + (also coded <A HREF="Compose.html">with entities</A> where possible). + It's taken from the MIT X sources in + <CODE>server/ddx/sun/Compose.list</CODE>. + +<LI>Finally you could have a look at + <A HREF="ftp://ds.internic.net/rfc/rfc1345.txt">RFC 1345: + Character Mnemonics & Character Sets</A> + by K. Simonsen (06/11/92, 103 pages, approx. 240 kbyte). + +</UL> + + +<HR> + +<ADDRESS><A HREF="http://ramsch.home.pages.de/">Martin Ramsch</A>, 16.02.1994, 07.01.1996, 01.07.1996, 1998-10-09, 2000-05-15</ADDRESS> + +</BODY> +</HTML> diff --git a/test/iso-8859-1a.html b/test/iso-8859-1a.html new file mode 100644 index 0000000..972329d --- /dev/null +++ b/test/iso-8859-1a.html @@ -0,0 +1,275 @@ +<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"> +<!-- X-URL: http://www.ramsch.org/martin/uni/fmi-hp/iso8859-1.html --> +<!-- Date: Tue, 28 Dec 2004 20:24:09 GMT --> +<!-- Last-Modified: Mon, 15 May 2000 09:37:37 GMT --> +<HTML> +<HEAD> +<TITLE>Martin Ramsch - iso8859-1 table</TITLE> +<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=iso-8859-1"> +<BASE HREF="http://www.ramsch.org/martin/uni/fmi-hp/iso8859-1.html"> +</HEAD> + +<BODY> + +<H1 ALIGN=center>iso8859-1 table, with cp-1252</H1> + +<PRE> +Description Code Entity name +=================================== ============ ============== +quotation mark &#34; --> " &quot; --> " +ampersand &#38; --> & &amp; --> & +less-than sign &#60; --> < &lt; --> < +greater-than sign &#62; --> > &gt; --> > + +Description Char Code Entity name +=================================== ==== ============ ============== +euro sign € &128; --> € +undefined &129; -->  +single low-9 quotation mark ‚ &130; --> ‚ +latin small letter f with hook ƒ &131; --> ƒ +double low-9 quotation mark „ &132; --> „ +horizontal ellipsis … &133; --> … +dagger † &134; --> † +double dagger ‡ &135; --> ‡ +modifier letter circumflex accent ˆ &136; --> ˆ +per mille sign ‰ &137; --> ‰ +latin capital letter s with caron Š &138; --> Š +single left-pointing angle quote mark ‹ &139; --> ‹ +latin capital ligature oe Œ &140; --> Œ +undefined &141; -->  +latin capital letter z with caron Ž &142; --> Ž +undefined &143; -->  + +undefined &144; -->  +left single quotation mark ‘ &145; --> ‘ +right single quotation mark ’ &146; --> ’ +left double quotation mark “ &147; --> “ +right double quotation mark ” &148; --> ” +bullet • &149; --> • +en dash – &150; --> – +em dash — &151; --> — +small tilde ˜ &152; --> ˜ +trade mark sign ™ &153; --> ™ +latin small letter s with caron š &154; --> š +single right-pointing angle quote mark › &155; --> › +latin small ligature oe œ &156; --> œ +undefined &157; -->  +latin small letter z with caron ž &158; --> ž +latin capital letter y with diaeresis Ÿ &159; --> Ÿ + +non-breaking space &#160; -->   &nbsp; --> +inverted exclamation ¡ &#161; --> ¡ &iexcl; --> ¡ +cent sign ¢ &#162; --> ¢ &cent; --> ¢ +pound sterling £ &#163; --> £ &pound; --> £ +general currency sign ¤ &#164; --> ¤ &curren; --> ¤ +yen sign ¥ &#165; --> ¥ &yen; --> ¥ +broken vertical bar ¦ &#166; --> ¦ &brvbar; --> ¦ + Non-standard &brkbar; --> &brkbar; +section sign § &#167; --> § &sect; --> § +umlaut (dieresis) ¨ &#168; --> ¨ &uml; --> ¨ + Non-standard &die; --> ¨ +copyright © &#169; --> © &copy; --> © +feminine ordinal ª &#170; --> ª &ordf; --> ª +left angle quote, guillemotleft « &#171; --> « &laquo; --> « +not sign ¬ &#172; --> ¬ &not; --> ¬ +soft hyphen &#173; --> ­ &shy; --> ­ +registered trademark ® &#174; --> ® &reg; --> ® +macron accent ¯ &#175; --> ¯ &macr; --> ¯ + Non-standard &hibar; --> &hibar; +degree sign ° &#176; --> ° &deg; --> ° +plus or minus ± &#177; --> ± &plusmn; --> ± +superscript two ² &#178; --> ² &sup2; --> ² +superscript three ³ &#179; --> ³ &sup3; --> ³ +acute accent ´ &#180; --> ´ &acute; --> ´ +micro sign µ &#181; --> µ &micro; --> µ +paragraph sign ¶ &#182; --> ¶ &para; --> ¶ +middle dot · &#183; --> · &middot; --> · +cedilla ¸ &#184; --> ¸ &cedil; --> ¸ +superscript one ¹ &#185; --> ¹ &sup1; --> ¹ +masculine ordinal º &#186; --> º &ordm; --> º +right angle quote, guillemotright » &#187; --> » &raquo; --> » +fraction one-fourth ¼ &#188; --> ¼ &frac14; --> ¼ +fraction one-half ½ &#189; --> ½ &frac12; --> ½ +fraction three-fourths ¾ &#190; --> ¾ &frac34; --> ¾ +inverted question mark ¿ &#191; --> ¿ &iquest; --> ¿ +capital A, grave accent À &#192; --> À &Agrave; --> À +capital A, acute accent Á &#193; --> Á &Aacute; --> Á +capital A, circumflex accent  &#194; -->  &Acirc; -->  +capital A, tilde à &#195; --> à &Atilde; --> à +capital A, dieresis or umlaut mark Ä &#196; --> Ä &Auml; --> Ä +capital A, ring Å &#197; --> Å &Aring; --> Å +capital AE diphthong (ligature) Æ &#198; --> Æ &AElig; --> Æ +capital C, cedilla Ç &#199; --> Ç &Ccedil; --> Ç +capital E, grave accent È &#200; --> È &Egrave; --> È +capital E, acute accent É &#201; --> É &Eacute; --> É +capital E, circumflex accent Ê &#202; --> Ê &Ecirc; --> Ê +capital E, dieresis or umlaut mark Ë &#203; --> Ë &Euml; --> Ë +capital I, grave accent Ì &#204; --> Ì &Igrave; --> Ì +capital I, acute accent Í &#205; --> Í &Iacute; --> Í +capital I, circumflex accent Î &#206; --> Î &Icirc; --> Î +capital I, dieresis or umlaut mark Ï &#207; --> Ï &Iuml; --> Ï +capital Eth, Icelandic Ð &#208; --> Ð &ETH; --> Ð + Non-standard &Dstrok; --> Đ +capital N, tilde Ñ &#209; --> Ñ &Ntilde; --> Ñ +capital O, grave accent Ò &#210; --> Ò &Ograve; --> Ò +capital O, acute accent Ó &#211; --> Ó &Oacute; --> Ó +capital O, circumflex accent Ô &#212; --> Ô &Ocirc; --> Ô +capital O, tilde Õ &#213; --> Õ &Otilde; --> Õ +capital O, dieresis or umlaut mark Ö &#214; --> Ö &Ouml; --> Ö +multiply sign × &#215; --> × &times; --> × +capital O, slash Ø &#216; --> Ø &Oslash; --> Ø +capital U, grave accent Ù &#217; --> Ù &Ugrave; --> Ù +capital U, acute accent Ú &#218; --> Ú &Uacute; --> Ú +capital U, circumflex accent Û &#219; --> Û &Ucirc; --> Û +capital U, dieresis or umlaut mark Ü &#220; --> Ü &Uuml; --> Ü +capital Y, acute accent Ý &#221; --> Ý &Yacute; --> Ý +capital THORN, Icelandic Þ &#222; --> Þ &THORN; --> Þ +small sharp s, German (sz ligature) ß &#223; --> ß &szlig; --> ß +small a, grave accent à &#224; --> à &agrave; --> à +small a, acute accent á &#225; --> á &aacute; --> á +small a, circumflex accent â &#226; --> â &acirc; --> â +small a, tilde ã &#227; --> ã &atilde; --> ã +small a, dieresis or umlaut mark ä &#228; --> ä &auml; --> ä +small a, ring å &#229; --> å &aring; --> å +small ae diphthong (ligature) æ &#230; --> æ &aelig; --> æ +small c, cedilla ç &#231; --> ç &ccedil; --> ç +small e, grave accent è &#232; --> è &egrave; --> è +small e, acute accent é &#233; --> é &eacute; --> é +small e, circumflex accent ê &#234; --> ê &ecirc; --> ê +small e, dieresis or umlaut mark ë &#235; --> ë &euml; --> ë +small i, grave accent ì &#236; --> ì &igrave; --> ì +small i, acute accent í &#237; --> í &iacute; --> í +small i, circumflex accent î &#238; --> î &icirc; --> î +small i, dieresis or umlaut mark ï &#239; --> ï &iuml; --> ï +small eth, Icelandic ð &#240; --> ð &eth; --> ð +small n, tilde ñ &#241; --> ñ &ntilde; --> ñ +small o, grave accent ò &#242; --> ò &ograve; --> ò +small o, acute accent ó &#243; --> ó &oacute; --> ó +small o, circumflex accent ô &#244; --> ô &ocirc; --> ô +small o, tilde õ &#245; --> õ &otilde; --> õ +small o, dieresis or umlaut mark ö &#246; --> ö &ouml; --> ö +division sign ÷ &#247; --> ÷ &divide; --> ÷ +small o, slash ø &#248; --> ø &oslash; --> ø +small u, grave accent ù &#249; --> ù &ugrave; --> ù +small u, acute accent ú &#250; --> ú &uacute; --> ú +small u, circumflex accent û &#251; --> û &ucirc; --> û +small u, dieresis or umlaut mark ü &#252; --> ü &uuml; --> ü +small y, acute accent ý &#253; --> ý &yacute; --> ý +small thorn, Icelandic þ &#254; --> þ &thorn; --> þ +small y, dieresis or umlaut mark ÿ &#255; --> ÿ &yuml; --> ÿ +</PRE> +<!-- removed: second /PRE, a hack for HotJava 1.0 preBeta 1 --> +<HR> + +<STRONG>How to read</STRONG> this table. The columns are +<DL COMPACT> +<DT>1st:<DD>textual <EM>description</EM> of the character +<DT>2nd:<DD>character inserted directly into the HTML page as <EM>one + byte</EM> +<DT>3rd:<DD>character written as <EM>numeric HTML entity</EM>, in the + format:<BR>"how it looks literally" <CODE>--></CODE> + "what your browser does with it" +<DT>4th:<DD>character written as <EM>symbolic HTML entity</EM>, in the + format:<BR>"how it looks literally" <CODE>--></CODE> + "what your browser does with it" +</DL> + +So for example, if you see something like "<CODE>&divide; --> +&divide;</CODE>" in the 4th column, this means your browser +doesn't know about the entity name "divide" and just puts it +literally. + +<P> +<STRONG>This table</STRONG> grew out of an overview of the "ISO +Latin-1 Character Set" overview related to the Hyper-G Text Format +(<A HREF="http://www.hyperwave.de/HTFdoc">HTF</A>). + +The entity names <CODE>&brkbar;</CODE> and <CODE>&Dstrok;</CODE> +seem to be unique to HTF. + +The entity name <CODE>&hibar;</CODE> has been supported by X Mosaic +but seems to be replaced with <CODE>&macr;</CODE>. + +The entity names <CODE>&uml;</CODE> and <CODE>&die;</CODE> should +be equivalent. + +<P><STRONG>The standards stuff:</STRONG> +The +<A HREF="http://www.w3.org/hypertext/WWW/MarkUp/html-spec/">HTML 2.0 Standard</A> +includes a section on +<A HREF="http://www.w3.org/hypertext/WWW/MarkUp/html-spec/html-spec_9.html#SEC99">Character Entity Sets</A> +and an overview on the +<A HREF="http://www.w3.org/hypertext/WWW/MarkUp/html-spec/html-spec_13.html#SEC106">HTML Coded Character Set</A> +(The entity names are derived from <A HREF="http://www.ucc.ie/info/net/isolat1.html">ISO 8879</A>). +<BR> + +Or have a look at the +<A HREF="http://www.w3.org/hypertext/WWW/MarkUp/html3/latin1.html">Latin-1 Character Entities</A> +as listed in an draft for the +<A HREF="http://www.w3.org/hypertext/WWW/MarkUp/html3/CoverPage.html">HTML 3.0 specification</A>. +<BR> + +The +<A HREF="http://www.w3.org/hypertext/WWW/MarkUp/HTMLPlus/htmlplus_59.html">Appendix II</A> +of CERN's +<A HREF="http://www.w3.org/hypertext/WWW/MarkUp/HTMLPlus/htmlplus_1.html">HTML+ Discussion Document</A> +contains a +<A HREF="http://www.w3.org/hypertext/WWW/MarkUp/HTMLPlus/htmlplus_table.ps">table</A> +(in PostScript format) of the proposed character entities for HTML+ and their +corresponding character codes for Unicode and the Adobe Latin-1 & Symbol +character sets. +<P> + +<STRONG>Please note</STRONG> that there is nothing wrong with using +characters of ISO Latin-1 above 127: the normal transmission protocol +for the WWW, +<A HREF="http://www.w3.org/pub/WWW/Protocols/rfc1945/rfc1945">HTTP/1.0</A>, +uses the 8bit ISO latin-1 as default encoding. +(Thanks to Roman +Czyborra for pointing this out!) +<P> + +<STRONG>Other information:</STRONG> +<UL> + +<LI><STRONG>Kevin J. Brewer</STRONG> has done two very good pages on the subject: + <UL> + <LI><A HREF="http://www.bbsinc.com/iso8859.html">ASCII - ISO 8859-1 (Latin-1) with HTML 3.0 Entities Table</A> and + <LI><A HREF="http://www.bbsinc.com/iso8879.html">ISO 8879 Entities Gopher Menu</A> + </UL> + +<LI>The excellent overview on the series of + <A HREF="http://czyborra.com/charsets/iso8859.html">ISO 8859 + character sets</A> compiled by Roman Czyborra. + +<LI>Also have a look on Alan Flavell's page of + <A HREF="http://ppewww.ph.gla.ac.uk/%7Eflavell/iso8859/iso8859-pointers.html">pointers + to information about ISO8859</A>. It's written very well! + +<LI>Maybe also of interest to you is the + <A HREF="ftp://ftp.vlsivie.tuwien.ac.at/pub/8bit/FAQ-ISO-8859-1">ISO + 8859-1 FAQ</A> by Michael Gschwind + (<A HREF="mailto:mike@vlsivie.tuwien.ac.at">mike@vlsivie.tuwien.ac.at</A>), + part of his page on + <A HREF="http://www.vlsivie.tuwien.ac.at/mike/i18n.html">Internationalization</A>. + +<LI>For users of X11R5 on SunOS systems: the + <A HREF="Compose.txt">table over the compose combinations</A> + (also coded <A HREF="Compose.html">with entities</A> where possible). + It's taken from the MIT X sources in + <CODE>server/ddx/sun/Compose.list</CODE>. + +<LI>Finally you could have a look at + <A HREF="ftp://ds.internic.net/rfc/rfc1345.txt">RFC 1345: + Character Mnemonics & Character Sets</A> + by K. Simonsen (06/11/92, 103 pages, approx. 240 kbyte). + +</UL> + + +<HR> + +<ADDRESS><A HREF="http://ramsch.home.pages.de/">Martin Ramsch</A>, 16.02.1994, 07.01.1996, 01.07.1996, 1998-10-09, 2000-05-15</ADDRESS> + +</BODY> +</HTML> diff --git a/test/iso-8859-2.html b/test/iso-8859-2.html new file mode 100644 index 0000000..39380e3 --- /dev/null +++ b/test/iso-8859-2.html @@ -0,0 +1,174 @@ +<!DOCTYPE html PUBLIC "-//IETF//DTD HTML 3.0//EN"> +<!-- X-URL: http://www.uni-passau.de/~ramsch/iso8859-1.html --> +<HTML> +<HEAD> +<TITLE>Martin Ramsch's character table modified and enhanced for iso8859-2</TITLE> +<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=iso-8859-2"> +<LINK REV="made" HREF="mailto:lynx-dev@nongnu.org"> +<LINK REV="owner" HREF="http://mail.gnu.org/mailman/listinfo/lynx-dev/"> +<LINK REL="sibling" HREF="iso-8859-1.html" TITLE="iso-8859-1 test"> +<LINK REL="sibling" HREF="ALT88592.html" TITLE="iso-8859-2 ALT test"> +</HEAD> + +<BODY> + +<H1 ALIGN=center>iso8859-2 plus table</H1> + +<PRE> +Description Code Entity name +=================================== ============ ============== +quotation mark &#34; --> " &quot; --> " +ampersand &#38; --> & &amp; --> & +less-than sign &#60; --> < &lt; --> < +greater-than sign &#62; --> > &gt; --> > + +Description Char Code Entity name +=================================== ==== ============ ============== +non-breaking space &#160; -->   &nbsp; --> +capital A, ogonek ¡ &#260; --> Ą &Aogon; --> Ą +breve {¢} {&#728;}-->{˘} {&breve;} -->{˘} +capital L, stroke £ &#321; --> Ł &Lstrok; --> Ł +general currency sign ¤ &#164; --> ¤ &curren; --> ¤ +capital L, caron ¥ &#317; --> Ľ &Lcaron; --> Ľ +capital S, acute accent ¦ &#346; --> Ś &Sacute; --> Ś +section sign § &#167; --> § &sect; --> § +umlaut (dieresis) ¨ &#168; --> ¨ &uml; --> ¨ + &die; --> ¨ +capital S, caron © &#352; --> Š &Scaron; --> Š +capital S, cedilla ª &#350; --> Ş &Scedil; --> Ş +capital T, caron « &#356; --> Ť &Tcaron; --> Ť +capital Z, acute accent ¬ &#377; --> Ź &Zacute; --> Ź +soft hyphen [] [&#173;]-->[­] [&shy;] -->[­] +capital Z, caron ® &#381; --> Ž &Zcaron; --> Ž +capital Z, dot above ¯ &#379; --> Ż &Zdot; --> Ż +degree sign ° &#176; --> ° &deg; --> ° +small a, ogonek ± &#261; --> ą &aogon; --> ą +ogonek {²} {&#731;}-->{˛} {&ogon;} -->{˛} +small l, stroke ³ &#322; --> ł &lstrok; --> ł +acute accent ´ &#180; --> ´ &acute; --> ´ +small l, caron µ &#318; --> ľ &lcaron; --> ľ +small s, acute accent ¶ &#347; --> ś &sacute; --> ś +caron {·} {&#711;}-->{ˇ} {&caron;} -->{ˇ} +cedilla ¸ &#184; --> ¸ &cedil; --> ¸ +small s, caron ¹ &#353; --> š &scaron; --> š +small s, cedilla º &#351; --> ş &scedil; --> ş +small t, caron » &#357; --> ť &tcaron; --> ť +small z, acute accent ¼ &#378; --> ź &zacute; --> ź +double acute accent {½} {&#733;}-->{˝} {&dblac;} -->{˝} +small z, caron ¾ &#382; --> ž &zcaron; --> ž +small z, dot above ¿ &#380; --> ż &zdot; --> ż +capital R, acute accent À &#340; --> Ŕ &Racute; --> Ŕ +capital A, acute accent Á &#193; --> Á &Aacute; --> Á +capital A, circumflex accent  &#194; -->  &Acirc; -->  +capital A, breve à &#258; --> Ă &Abreve; --> Ă +capital A, dieresis or umlaut mark Ä &#196; --> Ä &Auml; --> Ä +capital L, acute accent Å &#313; --> Ĺ &Lacute; --> Ĺ +capital C, acute accent Æ &#262; --> Ć &Cacute; --> Ć +capital C, cedilla Ç &#199; --> Ç &Ccedil; --> Ç +capital C, caron È &#268; --> Č &Ccaron; --> Č +capital E, acute accent É &#201; --> É &Eacute; --> É +capital E, ogonek Ê &#280; --> Ę &Eogon; --> Ę +capital E, dieresis or umlaut mark Ë &#203; --> Ë &Euml; --> Ë +capital E, caron Ì &#282; --> Ě &Ecaron; --> Ě +capital I, acute accent Í &#205; --> Í &Iacute; --> Í +capital I, circumflex accent Î &#206; --> Î &Icirc; --> Î +capital D, caron Ï &#270; --> Ď &Dcaron; --> Ď +capital D, stroke Ð &#272; --> Đ &Dstrok; --> Đ +capital Eth, Icelandic N/A &#208; --> Ð &ETH; --> Ð +capital N, acute accent Ñ &#323; --> Ń &Nacute; --> Ń +capital N, caron Ò &#327; --> Ň &Ncaron; --> Ň +capital O, acute accent Ó &#211; --> Ó &Oacute; --> Ó +capital O, circumflex accent Ô &#212; --> Ô &Ocirc; --> Ô +capital O, double acute accent Õ &#368; --> Ű &Odblac; --> Ő +capital O, dieresis or umlaut mark Ö &#214; --> Ö &Ouml; --> Ö +multiply sign × &#215; --> × &times; --> × +capital R, caron Ø &#344; --> Ř &Rcaron; --> Ř +capital U, ring Ù &#366; --> Ů &Uring; --> Ů +capital U, acute accent Ú &#218; --> Ú &Uacute; --> Ú +capital U, double acute accent Û &#368; --> Ű &Udblac; --> Ű +capital U, dieresis or umlaut mark Ü &#220; --> Ü &Uuml; --> Ü +capital Y, acute accent Ý &#221; --> Ý &Yacute; --> Ý +capital T, cedilla Þ &#354; --> Ţ &Tcedil; --> Ţ +small sharp s, German (sz ligature) ß &#223; --> ß &szlig; --> ß +small r, acute accent à &#341; --> ŕ &racute; --> ŕ +small a, acute accent á &#225; --> á &aacute; --> á +small a, circumflex accent â &#226; --> â &acirc; --> â +small a, breve ã &#259; --> ă &abreve; --> ă +small a, dieresis or umlaut mark ä &#228; --> ä &auml; --> ä +small l, acute accent å &#314; --> ĺ &lacute; --> ĺ +small c, acute accent æ &#263; --> ć &cacute; --> ć +small c, cedilla ç &#231; --> ç &ccedil; --> ç +small c, caron è &#269; --> č &ccaron; --> č +small e, acute accent é &#233; --> é &eacute; --> é +small e, ogonek ê &#281; --> ę &eogon; --> ę +small e, dieresis or umlaut mark ë &#235; --> ë &euml; --> ë +small e, caron ì &#283; --> ě &ecaron; --> ě +small i, acute accent í &#237; --> í &iacute; --> í +small i, circumflex accent î &#238; --> î &icirc; --> î +small d, caron ï &#271; --> ď &dcaron; --> ď +small d, stroke ð &#273; --> đ &dstrok; --> đ +small eth, Icelandic N/A &#240; --> ð &eth; --> ð +small n, acute accent ñ &#324; --> ń &nacute; --> ń +small n, caron ò &#328; --> ň &ncaron; --> ň +small o, acute accent ó &#243; --> ó &oacute; --> ó +small o, circumflex accent ô &#244; --> ô &ocirc; --> ô +small o, double acute accent õ &#369; --> ű &odblac; --> ő +small o, dieresis or umlaut mark ö &#246; --> ö &ouml; --> ö +division sign ÷ &#247; --> ÷ &divide; --> ÷ +small r, caron ø &#345; --> ř &rcaron; --> ř +small u, ring ù &#367; --> ů &uring; --> ů +small u, acute accent ú &#250; --> ú &uacute; --> ú +small u, double acute accent û &#369; --> ű &udblac; --> ű +small u, dieresis or umlaut mark ü &#252; --> ü &uuml; --> ü +small y, acute accent ý &#253; --> ý &yacute; --> ý +small t, cedilla þ &#355; --> ţ &tcedil; --> ţ +dot above {ÿ} {&#729;}-->{˙} {&dot;} -->{˙} + +Some other characters of interest Char Code Entity name +=================================== ==== ============ ============== +capital AE diphthong (ligature) N/A &#198; --> Æ &AElig; --> Æ +small ae diphthong (ligature) N/A &#230; --> æ &aelig; --> æ +capital OE ligature N/A {&#338;}-->{Œ} {&OElig;} -->{Œ} +small oe ligature N/A {&#339;}-->{œ} {&oelig;} -->{œ} +copyright N/A &#169; --> © &copy; --> © +registered trademark N/A &#174; --> ® &reg; --> ® +trademark sign N/A &#8482;--> ™ &trade; --> ™ +em space N/A [&#8195;]->[ ] [&emsp;] -->[ ] +en space N/A [&#8194;]->[ ] [&ensp;] -->[ ] +1/3-em space N/A [&#8196;]->[ ] [&emsp13;] -->[ ] +1/4-em space N/A [&#8197;]->[ ] [&emsp14;] -->[ ] +thin space N/A [&#8201;]->[ ] [&thinsp;]-->[ ] +hair space N/A [&#8202;]->[ ] [&hairsp;]-->[ ] +em dash N/A [&#8212;]->[—] [&mdash;] -->[—] +en dash N/A [&#8211;]->[–] [&ndash;] -->[–] + +</PRE><!-- </PRE> no HotJava preBeta hackx - kw --> +<!-- second /PRE is a hack for HotJava 1.0 preBeta 1 --> +<HR> +<P> +Characters not found in ISO-8859-2 have "N/A" in the <TT>Char</TT> column. +Some characters for which I could not find entity names in either +<A HREF="http://www.internic.net/rfc/rfc2070.txt">RFC 2070</A> +or the +<A HREF="ftp://www.ucc.ie/pub/sgml/">ISOlat1, ISOlat2, ISOnum, ISOpub and ISOtech</A> +sets (the ones included by Peter Flynn's +<A HREF="http://www.ucc.ie/doc/www/html/dtds/htmlpro.html">HTML Pro DTD</A>) +are shown enclosed in <TT>{</TT>braces<TT>}</TT>. +</P> +<P> +There also is a variation of this table which tests +<A HREF="ALT88592.html">ISO-8859-2 characters and entities in ALT attributes</A>. +</P> +<P> +See Martin Ramsch's original +<A CHARSET="iso-8859-1" HREF="https://web.archive.org/web/19970119160651/http://www.uni-passau.de:80/~ramsch/iso8859-1.html">ISO-8859-1 Table</A> +for related info and links, and for some notes on entity names. +This file is mostly just an adaptation of his table +to the ISO-8859-2 character set. +</P> +<HR> + +<ADDRESS>kweide@tezcat.com 1997-03-09</ADDRESS> + +</BODY> +</HTML> diff --git a/test/iso-8859-2a.html b/test/iso-8859-2a.html new file mode 100644 index 0000000..062b1a5 --- /dev/null +++ b/test/iso-8859-2a.html @@ -0,0 +1,208 @@ +<!DOCTYPE html PUBLIC "-//IETF//DTD HTML 3.0//EN"> +<!-- X-URL: http://www.uni-passau.de/~ramsch/iso8859-1.html --> +<HTML> +<HEAD> +<TITLE>Martin Ramsch's character table modified and enhanced for iso8859-2</TITLE> +<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=iso-8859-2"> +<LINK REV="made" HREF="mailto:lynx-dev@nongnu.org"> +<LINK REV="owner" HREF="http://mail.gnu.org/mailman/listinfo/lynx-dev/"> +<LINK REL="sibling" HREF="iso-8859-1.html" TITLE="iso-8859-1 test"> +<LINK REL="sibling" HREF="ALT88592.html" TITLE="iso-8859-2 ALT test"> +</HEAD> + +<BODY> + +<H1 ALIGN=center>iso8859-2 plus table, and cp-1252</H1> + +<PRE> +Description Code Entity name +=================================== ============ ============== +quotation mark &#34; --> " &quot; --> " +ampersand &#38; --> & &amp; --> & +less-than sign &#60; --> < &lt; --> < +greater-than sign &#62; --> > &gt; --> > + +Description Char Code Entity name +=================================== ==== ============ ============== +euro sign € &128; --> € +undefined &129; -->  +single low-9 quotation mark ‚ &130; --> ‚ +latin small letter f with hook ƒ &131; --> ƒ +double low-9 quotation mark „ &132; --> „ +horizontal ellipsis … &133; --> … +dagger † &134; --> † +double dagger ‡ &135; --> ‡ +modifier letter circumflex accent ˆ &136; --> ˆ +per mille sign ‰ &137; --> ‰ +latin capital letter s with caron Š &138; --> Š +single left-pointing angle quote mark ‹ &139; --> ‹ +latin capital ligature oe Œ &140; --> Œ +undefined &141; -->  +latin capital letter z with caron Ž &142; --> Ž +undefined &143; -->  + +undefined &144; -->  +left single quotation mark ‘ &145; --> ‘ +right single quotation mark ’ &146; --> ’ +left double quotation mark “ &147; --> “ +right double quotation mark ” &148; --> ” +bullet • &149; --> • +en dash – &150; --> – +em dash — &151; --> — +small tilde ˜ &152; --> ˜ +trade mark sign ™ &153; --> ™ +latin small letter s with caron š &154; --> š +single right-pointing angle quote mark › &155; --> › +latin small ligature oe œ &156; --> œ +undefined &157; -->  +latin small letter z with caron ž &158; --> ž +latin capital letter y with diaeresis Ÿ &159; --> Ÿ + +non-breaking space &#160; -->   &nbsp; --> +capital A, ogonek ¡ &#260; --> Ą &Aogon; --> Ą +breve {¢} {&#728;}-->{˘} {&breve;} -->{˘} +capital L, stroke £ &#321; --> Ł &Lstrok; --> Ł +general currency sign ¤ &#164; --> ¤ &curren; --> ¤ +capital L, caron ¥ &#317; --> Ľ &Lcaron; --> Ľ +capital S, acute accent ¦ &#346; --> Ś &Sacute; --> Ś +section sign § &#167; --> § &sect; --> § +umlaut (dieresis) ¨ &#168; --> ¨ &uml; --> ¨ + &die; --> ¨ +capital S, caron © &#352; --> Š &Scaron; --> Š +capital S, cedilla ª &#350; --> Ş &Scedil; --> Ş +capital T, caron « &#356; --> Ť &Tcaron; --> Ť +capital Z, acute accent ¬ &#377; --> Ź &Zacute; --> Ź +soft hyphen [] [&#173;]-->[­] [&shy;] -->[­] +capital Z, caron ® &#381; --> Ž &Zcaron; --> Ž +capital Z, dot above ¯ &#379; --> Ż &Zdot; --> Ż +degree sign ° &#176; --> ° &deg; --> ° +small a, ogonek ± &#261; --> ą &aogon; --> ą +ogonek {²} {&#731;}-->{˛} {&ogon;} -->{˛} +small l, stroke ³ &#322; --> ł &lstrok; --> ł +acute accent ´ &#180; --> ´ &acute; --> ´ +small l, caron µ &#318; --> ľ &lcaron; --> ľ +small s, acute accent ¶ &#347; --> ś &sacute; --> ś +caron {·} {&#711;}-->{ˇ} {&caron;} -->{ˇ} +cedilla ¸ &#184; --> ¸ &cedil; --> ¸ +small s, caron ¹ &#353; --> š &scaron; --> š +small s, cedilla º &#351; --> ş &scedil; --> ş +small t, caron » &#357; --> ť &tcaron; --> ť +small z, acute accent ¼ &#378; --> ź &zacute; --> ź +double acute accent {½} {&#733;}-->{˝} {&dblac;} -->{˝} +small z, caron ¾ &#382; --> ž &zcaron; --> ž +small z, dot above ¿ &#380; --> ż &zdot; --> ż +capital R, acute accent À &#340; --> Ŕ &Racute; --> Ŕ +capital A, acute accent Á &#193; --> Á &Aacute; --> Á +capital A, circumflex accent  &#194; -->  &Acirc; -->  +capital A, breve à &#258; --> Ă &Abreve; --> Ă +capital A, dieresis or umlaut mark Ä &#196; --> Ä &Auml; --> Ä +capital L, acute accent Å &#313; --> Ĺ &Lacute; --> Ĺ +capital C, acute accent Æ &#262; --> Ć &Cacute; --> Ć +capital C, cedilla Ç &#199; --> Ç &Ccedil; --> Ç +capital C, caron È &#268; --> Č &Ccaron; --> Č +capital E, acute accent É &#201; --> É &Eacute; --> É +capital E, ogonek Ê &#280; --> Ę &Eogon; --> Ę +capital E, dieresis or umlaut mark Ë &#203; --> Ë &Euml; --> Ë +capital E, caron Ì &#282; --> Ě &Ecaron; --> Ě +capital I, acute accent Í &#205; --> Í &Iacute; --> Í +capital I, circumflex accent Î &#206; --> Î &Icirc; --> Î +capital D, caron Ï &#270; --> Ď &Dcaron; --> Ď +capital D, stroke Ð &#272; --> Đ &Dstrok; --> Đ +capital Eth, Icelandic N/A &#208; --> Ð &ETH; --> Ð +capital N, acute accent Ñ &#323; --> Ń &Nacute; --> Ń +capital N, caron Ò &#327; --> Ň &Ncaron; --> Ň +capital O, acute accent Ó &#211; --> Ó &Oacute; --> Ó +capital O, circumflex accent Ô &#212; --> Ô &Ocirc; --> Ô +capital O, double acute accent Õ &#368; --> Ű &Odblac; --> Ő +capital O, dieresis or umlaut mark Ö &#214; --> Ö &Ouml; --> Ö +multiply sign × &#215; --> × &times; --> × +capital R, caron Ø &#344; --> Ř &Rcaron; --> Ř +capital U, ring Ù &#366; --> Ů &Uring; --> Ů +capital U, acute accent Ú &#218; --> Ú &Uacute; --> Ú +capital U, double acute accent Û &#368; --> Ű &Udblac; --> Ű +capital U, dieresis or umlaut mark Ü &#220; --> Ü &Uuml; --> Ü +capital Y, acute accent Ý &#221; --> Ý &Yacute; --> Ý +capital T, cedilla Þ &#354; --> Ţ &Tcedil; --> Ţ +small sharp s, German (sz ligature) ß &#223; --> ß &szlig; --> ß +small r, acute accent à &#341; --> ŕ &racute; --> ŕ +small a, acute accent á &#225; --> á &aacute; --> á +small a, circumflex accent â &#226; --> â &acirc; --> â +small a, breve ã &#259; --> ă &abreve; --> ă +small a, dieresis or umlaut mark ä &#228; --> ä &auml; --> ä +small l, acute accent å &#314; --> ĺ &lacute; --> ĺ +small c, acute accent æ &#263; --> ć &cacute; --> ć +small c, cedilla ç &#231; --> ç &ccedil; --> ç +small c, caron è &#269; --> č &ccaron; --> č +small e, acute accent é &#233; --> é &eacute; --> é +small e, ogonek ê &#281; --> ę &eogon; --> ę +small e, dieresis or umlaut mark ë &#235; --> ë &euml; --> ë +small e, caron ì &#283; --> ě &ecaron; --> ě +small i, acute accent í &#237; --> í &iacute; --> í +small i, circumflex accent î &#238; --> î &icirc; --> î +small d, caron ï &#271; --> ď &dcaron; --> ď +small d, stroke ð &#273; --> đ &dstrok; --> đ +small eth, Icelandic N/A &#240; --> ð &eth; --> ð +small n, acute accent ñ &#324; --> ń &nacute; --> ń +small n, caron ò &#328; --> ň &ncaron; --> ň +small o, acute accent ó &#243; --> ó &oacute; --> ó +small o, circumflex accent ô &#244; --> ô &ocirc; --> ô +small o, double acute accent õ &#369; --> ű &odblac; --> ő +small o, dieresis or umlaut mark ö &#246; --> ö &ouml; --> ö +division sign ÷ &#247; --> ÷ &divide; --> ÷ +small r, caron ø &#345; --> ř &rcaron; --> ř +small u, ring ù &#367; --> ů &uring; --> ů +small u, acute accent ú &#250; --> ú &uacute; --> ú +small u, double acute accent û &#369; --> ű &udblac; --> ű +small u, dieresis or umlaut mark ü &#252; --> ü &uuml; --> ü +small y, acute accent ý &#253; --> ý &yacute; --> ý +small t, cedilla þ &#355; --> ţ &tcedil; --> ţ +dot above {ÿ} {&#729;}-->{˙} {&dot;} -->{˙} + +Some other characters of interest Char Code Entity name +=================================== ==== ============ ============== +capital AE diphthong (ligature) N/A &#198; --> Æ &AElig; --> Æ +small ae diphthong (ligature) N/A &#230; --> æ &aelig; --> æ +capital OE ligature N/A {&#338;}-->{Œ} {&OElig;} -->{Œ} +small oe ligature N/A {&#339;}-->{œ} {&oelig;} -->{œ} +copyright N/A &#169; --> © &copy; --> © +registered trademark N/A &#174; --> ® &reg; --> ® +trademark sign N/A &#8482;--> ™ &trade; --> ™ +em space N/A [&#8195;]->[ ] [&emsp;] -->[ ] +en space N/A [&#8194;]->[ ] [&ensp;] -->[ ] +1/3-em space N/A [&#8196;]->[ ] [&emsp13;] -->[ ] +1/4-em space N/A [&#8197;]->[ ] [&emsp14;] -->[ ] +thin space N/A [&#8201;]->[ ] [&thinsp;]-->[ ] +hair space N/A [&#8202;]->[ ] [&hairsp;]-->[ ] +em dash N/A [&#8212;]->[—] [&mdash;] -->[—] +en dash N/A [&#8211;]->[–] [&ndash;] -->[–] + +</PRE><!-- </PRE> no HotJava preBeta hackx - kw --> +<!-- second /PRE is a hack for HotJava 1.0 preBeta 1 --> +<HR> +<P> +Characters not found in ISO-8859-2 have "N/A" in the <TT>Char</TT> column. +Some characters for which I could not find entity names in either +<A HREF="http://www.internic.net/rfc/rfc2070.txt">RFC 2070</A> +or the +<A HREF="ftp://www.ucc.ie/pub/sgml/">ISOlat1, ISOlat2, ISOnum, ISOpub and ISOtech</A> +sets (the ones included by Peter Flynn's +<A HREF="http://www.ucc.ie/doc/www/html/dtds/htmlpro.html">HTML Pro DTD</A>) +are shown enclosed in <TT>{</TT>braces<TT>}</TT>. +</P> +<P> +There also is a variation of this table which tests +<A HREF="ALT88592.html">ISO-8859-2 characters and entities in ALT attributes</A>. +</P> +<P> +See Martin Ramsch's original +<A CHARSET="iso-8859-1" HREF="https://web.archive.org/web/19970119160651/http://www.uni-passau.de:80/~ramsch/iso8859-1.html">ISO-8859-1 Table</A> +for related info and links, and for some notes on entity names. +This file is mostly just an adaptation of his table +to the ISO-8859-2 character set. +</P> +<HR> + +<ADDRESS>kweide@tezcat.com 1997-03-09</ADDRESS> + +</BODY> +</HTML> diff --git a/test/koi8-r.html b/test/koi8-r.html new file mode 100644 index 0000000..d679219 --- /dev/null +++ b/test/koi8-r.html @@ -0,0 +1,321 @@ +<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"> +<HTML> +<HEAD> +<TITLE>Test of the KOI8-R symbols</TITLE> +</HEAD> +<BODY> +<PRE> + + This table prepared from KOI8-R.TXT available at ftp.unicode.org + + ftp://ftp.unicode.org/MAPPINGS/VENDORS/MISC/KOI8-R.TXT + (if doing ftp, try cd Public/MAPPINGS/VENDORS/MISC) + + +original comment: + +# +# Name: KOI8-R (RFC1489) to Unicode +# Unicode version: 3.0 +# Table version: 1.0 +# Table format: Format A +# Date: 18 August 1999 +# Authors: Helmut Richter <richter@lrz.de> +# +# Copyright (c) 1991-1999 Unicode, Inc. All Rights reserved. +# +# This file is provided as-is by Unicode, Inc. (The Unicode Consortium). +# No claims are made as to fitness for any particular purpose. No +# warranties of any kind are expressed or implied. The recipient +# agrees to determine applicability of information provided. If this +# file has been provided on optical media by Unicode, Inc., the sole +# remedy for any claim will be exchange of defective media within 90 +# days of receipt. +# +# Unicode, Inc. hereby grants the right to freely use the information +# supplied in this file in the creation of products supporting the +# Unicode Standard, and to make copies of this file in any form for +# internal or external distribution as long as this notice remains +# attached. +# +# General notes: +# +# This table contains the data the Unicode Consortium has on how +# KOI8-R characters map into Unicode. The underlying document is the +# mapping described in RFC 1489. No statements are made as to whether +# this mapping is the same as the mapping defined as "Code Page 878" +# with some vendors. +# +# Format: Three tab-separated columns +# Column #1 is the KOI8-R code (in hex as 0xXX) +# Column #2 is the Unicode (in hex as 0xXXXX) +# Column #3 the Unicode name (follows a comment sign, '#') +# +# The entries are in KOI8-R order. +# +# Version history +# 1.0 version: created. +# +# Any comments or problems, contact <errata@unicode.org> +# Please note that <errata@unicode.org> is an archival address; +# notices will be checked, but do not expect an immediate response. +# +0x00 0x0000 "�" # NULL +0x01 0x0001 "" # START OF HEADING +0x02 0x0002 "" # START OF TEXT +0x03 0x0003 "" # END OF TEXT +0x04 0x0004 "" # END OF TRANSMISSION +0x05 0x0005 "" # ENQUIRY +0x06 0x0006 "" # ACKNOWLEDGE +0x07 0x0007 "" # BELL +0x08 0x0008 "" # BACKSPACE +0x09 0x0009 "	" # HORIZONTAL TABULATION +0x0A 0x000A "
" # LINE FEED +0x0B 0x000B "" # VERTICAL TABULATION +0x0C 0x000C "" # FORM FEED +0x0D 0x000D "
" # CARRIAGE RETURN +0x0E 0x000E "" # SHIFT OUT +0x0F 0x000F "" # SHIFT IN +0x10 0x0010 "" # DATA LINK ESCAPE +0x11 0x0011 "" # DEVICE CONTROL ONE +0x12 0x0012 "" # DEVICE CONTROL TWO +0x13 0x0013 "" # DEVICE CONTROL THREE +0x14 0x0014 "" # DEVICE CONTROL FOUR +0x15 0x0015 "" # NEGATIVE ACKNOWLEDGE +0x16 0x0016 "" # SYNCHRONOUS IDLE +0x17 0x0017 "" # END OF TRANSMISSION BLOCK +0x18 0x0018 "" # CANCEL +0x19 0x0019 "" # END OF MEDIUM +0x1A 0x001A "" # SUBSTITUTE +0x1B 0x001B "" # ESCAPE +0x1C 0x001C "" # FILE SEPARATOR +0x1D 0x001D "" # GROUP SEPARATOR +0x1E 0x001E "" # RECORD SEPARATOR +0x1F 0x001F "" # UNIT SEPARATOR +0x20 0x0020 " " # SPACE +0x21 0x0021 "!" # EXCLAMATION MARK +0x22 0x0022 """ # QUOTATION MARK +0x23 0x0023 "#" # NUMBER SIGN +0x24 0x0024 "$" # DOLLAR SIGN +0x25 0x0025 "%" # PERCENT SIGN +0x26 0x0026 "&" # AMPERSAND +0x27 0x0027 "'" # APOSTROPHE +0x28 0x0028 "(" # LEFT PARENTHESIS +0x29 0x0029 ")" # RIGHT PARENTHESIS +0x2A 0x002A "*" # ASTERISK +0x2B 0x002B "+" # PLUS SIGN +0x2C 0x002C "," # COMMA +0x2D 0x002D "-" # HYPHEN-MINUS +0x2E 0x002E "." # FULL STOP +0x2F 0x002F "/" # SOLIDUS +0x30 0x0030 "0" # DIGIT ZERO +0x31 0x0031 "1" # DIGIT ONE +0x32 0x0032 "2" # DIGIT TWO +0x33 0x0033 "3" # DIGIT THREE +0x34 0x0034 "4" # DIGIT FOUR +0x35 0x0035 "5" # DIGIT FIVE +0x36 0x0036 "6" # DIGIT SIX +0x37 0x0037 "7" # DIGIT SEVEN +0x38 0x0038 "8" # DIGIT EIGHT +0x39 0x0039 "9" # DIGIT NINE +0x3A 0x003A ":" # COLON +0x3B 0x003B ";" # SEMICOLON +0x3C 0x003C "<" # LESS-THAN SIGN +0x3D 0x003D "=" # EQUALS SIGN +0x3E 0x003E ">" # GREATER-THAN SIGN +0x3F 0x003F "?" # QUESTION MARK +0x40 0x0040 "@" # COMMERCIAL AT +0x41 0x0041 "A" # LATIN CAPITAL LETTER A +0x42 0x0042 "B" # LATIN CAPITAL LETTER B +0x43 0x0043 "C" # LATIN CAPITAL LETTER C +0x44 0x0044 "D" # LATIN CAPITAL LETTER D +0x45 0x0045 "E" # LATIN CAPITAL LETTER E +0x46 0x0046 "F" # LATIN CAPITAL LETTER F +0x47 0x0047 "G" # LATIN CAPITAL LETTER G +0x48 0x0048 "H" # LATIN CAPITAL LETTER H +0x49 0x0049 "I" # LATIN CAPITAL LETTER I +0x4A 0x004A "J" # LATIN CAPITAL LETTER J +0x4B 0x004B "K" # LATIN CAPITAL LETTER K +0x4C 0x004C "L" # LATIN CAPITAL LETTER L +0x4D 0x004D "M" # LATIN CAPITAL LETTER M +0x4E 0x004E "N" # LATIN CAPITAL LETTER N +0x4F 0x004F "O" # LATIN CAPITAL LETTER O +0x50 0x0050 "P" # LATIN CAPITAL LETTER P +0x51 0x0051 "Q" # LATIN CAPITAL LETTER Q +0x52 0x0052 "R" # LATIN CAPITAL LETTER R +0x53 0x0053 "S" # LATIN CAPITAL LETTER S +0x54 0x0054 "T" # LATIN CAPITAL LETTER T +0x55 0x0055 "U" # LATIN CAPITAL LETTER U +0x56 0x0056 "V" # LATIN CAPITAL LETTER V +0x57 0x0057 "W" # LATIN CAPITAL LETTER W +0x58 0x0058 "X" # LATIN CAPITAL LETTER X +0x59 0x0059 "Y" # LATIN CAPITAL LETTER Y +0x5A 0x005A "Z" # LATIN CAPITAL LETTER Z +0x5B 0x005B "[" # LEFT SQUARE BRACKET +0x5C 0x005C "\" # REVERSE SOLIDUS +0x5D 0x005D "]" # RIGHT SQUARE BRACKET +0x5E 0x005E "^" # CIRCUMFLEX ACCENT +0x5F 0x005F "_" # LOW LINE +0x60 0x0060 "`" # GRAVE ACCENT +0x61 0x0061 "a" # LATIN SMALL LETTER A +0x62 0x0062 "b" # LATIN SMALL LETTER B +0x63 0x0063 "c" # LATIN SMALL LETTER C +0x64 0x0064 "d" # LATIN SMALL LETTER D +0x65 0x0065 "e" # LATIN SMALL LETTER E +0x66 0x0066 "f" # LATIN SMALL LETTER F +0x67 0x0067 "g" # LATIN SMALL LETTER G +0x68 0x0068 "h" # LATIN SMALL LETTER H +0x69 0x0069 "i" # LATIN SMALL LETTER I +0x6A 0x006A "j" # LATIN SMALL LETTER J +0x6B 0x006B "k" # LATIN SMALL LETTER K +0x6C 0x006C "l" # LATIN SMALL LETTER L +0x6D 0x006D "m" # LATIN SMALL LETTER M +0x6E 0x006E "n" # LATIN SMALL LETTER N +0x6F 0x006F "o" # LATIN SMALL LETTER O +0x70 0x0070 "p" # LATIN SMALL LETTER P +0x71 0x0071 "q" # LATIN SMALL LETTER Q +0x72 0x0072 "r" # LATIN SMALL LETTER R +0x73 0x0073 "s" # LATIN SMALL LETTER S +0x74 0x0074 "t" # LATIN SMALL LETTER T +0x75 0x0075 "u" # LATIN SMALL LETTER U +0x76 0x0076 "v" # LATIN SMALL LETTER V +0x77 0x0077 "w" # LATIN SMALL LETTER W +0x78 0x0078 "x" # LATIN SMALL LETTER X +0x79 0x0079 "y" # LATIN SMALL LETTER Y +0x7A 0x007A "z" # LATIN SMALL LETTER Z +0x7B 0x007B "{" # LEFT CURLY BRACKET +0x7C 0x007C "|" # VERTICAL LINE +0x7D 0x007D "}" # RIGHT CURLY BRACKET +0x7E 0x007E "~" # TILDE +0x7F 0x007F "" # DELETE +0x80 0x2500 "─" # BOX DRAWINGS LIGHT HORIZONTAL +0x81 0x2502 "│" # BOX DRAWINGS LIGHT VERTICAL +0x82 0x250C "┌" # BOX DRAWINGS LIGHT DOWN AND RIGHT +0x83 0x2510 "┐" # BOX DRAWINGS LIGHT DOWN AND LEFT +0x84 0x2514 "└" # BOX DRAWINGS LIGHT UP AND RIGHT +0x85 0x2518 "┘" # BOX DRAWINGS LIGHT UP AND LEFT +0x86 0x251C "├" # BOX DRAWINGS LIGHT VERTICAL AND RIGHT +0x87 0x2524 "┤" # BOX DRAWINGS LIGHT VERTICAL AND LEFT +0x88 0x252C "┬" # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL +0x89 0x2534 "┴" # BOX DRAWINGS LIGHT UP AND HORIZONTAL +0x8A 0x253C "┼" # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL +0x8B 0x2580 "▀" # UPPER HALF BLOCK +0x8C 0x2584 "▄" # LOWER HALF BLOCK +0x8D 0x2588 "█" # FULL BLOCK +0x8E 0x258C "▌" # LEFT HALF BLOCK +0x8F 0x2590 "▐" # RIGHT HALF BLOCK +0x90 0x2591 "░" # LIGHT SHADE +0x91 0x2592 "▒" # MEDIUM SHADE +0x92 0x2593 "▓" # DARK SHADE +0x93 0x2320 "⌠" # TOP HALF INTEGRAL +0x94 0x25A0 "■" # BLACK SQUARE +0x95 0x2219 "∙" # BULLET OPERATOR +0x96 0x221A "√" # SQUARE ROOT +0x97 0x2248 "≈" # ALMOST EQUAL TO +0x98 0x2264 "≤" # LESS-THAN OR EQUAL TO +0x99 0x2265 "≥" # GREATER-THAN OR EQUAL TO +0x9A 0x00A0 " " # NO-BREAK SPACE +0x9B 0x2321 "⌡" # BOTTOM HALF INTEGRAL +0x9C 0x00B0 "°" # DEGREE SIGN +0x9D 0x00B2 "²" # SUPERSCRIPT TWO +0x9E 0x00B7 "·" # MIDDLE DOT +0x9F 0x00F7 "÷" # DIVISION SIGN +0xA0 0x2550 "═" # BOX DRAWINGS DOUBLE HORIZONTAL +0xA1 0x2551 "║" # BOX DRAWINGS DOUBLE VERTICAL +0xA2 0x2552 "╒" # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE +0xA3 0x0451 "ё" # CYRILLIC SMALL LETTER IO +0xA4 0x2553 "╓" # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE +0xA5 0x2554 "╔" # BOX DRAWINGS DOUBLE DOWN AND RIGHT +0xA6 0x2555 "╕" # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE +0xA7 0x2556 "╖" # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE +0xA8 0x2557 "╗" # BOX DRAWINGS DOUBLE DOWN AND LEFT +0xA9 0x2558 "╘" # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE +0xAA 0x2559 "╙" # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE +0xAB 0x255A "╚" # BOX DRAWINGS DOUBLE UP AND RIGHT +0xAC 0x255B "╛" # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE +0xAD 0x255C "╜" # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE +0xAE 0x255D "╝" # BOX DRAWINGS DOUBLE UP AND LEFT +0xAF 0x255E "╞" # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE +0xB0 0x255F "╟" # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE +0xB1 0x2560 "╠" # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT +0xB2 0x2561 "╡" # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE +0xB3 0x0401 "Ё" # CYRILLIC CAPITAL LETTER IO +0xB4 0x2562 "╢" # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE +0xB5 0x2563 "╣" # BOX DRAWINGS DOUBLE VERTICAL AND LEFT +0xB6 0x2564 "╤" # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE +0xB7 0x2565 "╥" # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE +0xB8 0x2566 "╦" # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL +0xB9 0x2567 "╧" # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE +0xBA 0x2568 "╨" # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE +0xBB 0x2569 "╩" # BOX DRAWINGS DOUBLE UP AND HORIZONTAL +0xBC 0x256A "╪" # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE +0xBD 0x256B "╫" # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE +0xBE 0x256C "╬" # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL +0xBF 0x00A9 "©" # COPYRIGHT SIGN +0xC0 0x044E "ю" # CYRILLIC SMALL LETTER YU +0xC1 0x0430 "а" # CYRILLIC SMALL LETTER A +0xC2 0x0431 "б" # CYRILLIC SMALL LETTER BE +0xC3 0x0446 "ц" # CYRILLIC SMALL LETTER TSE +0xC4 0x0434 "д" # CYRILLIC SMALL LETTER DE +0xC5 0x0435 "е" # CYRILLIC SMALL LETTER IE +0xC6 0x0444 "ф" # CYRILLIC SMALL LETTER EF +0xC7 0x0433 "г" # CYRILLIC SMALL LETTER GHE +0xC8 0x0445 "х" # CYRILLIC SMALL LETTER HA +0xC9 0x0438 "и" # CYRILLIC SMALL LETTER I +0xCA 0x0439 "й" # CYRILLIC SMALL LETTER SHORT I +0xCB 0x043A "к" # CYRILLIC SMALL LETTER KA +0xCC 0x043B "л" # CYRILLIC SMALL LETTER EL +0xCD 0x043C "м" # CYRILLIC SMALL LETTER EM +0xCE 0x043D "н" # CYRILLIC SMALL LETTER EN +0xCF 0x043E "о" # CYRILLIC SMALL LETTER O +0xD0 0x043F "п" # CYRILLIC SMALL LETTER PE +0xD1 0x044F "я" # CYRILLIC SMALL LETTER YA +0xD2 0x0440 "р" # CYRILLIC SMALL LETTER ER +0xD3 0x0441 "с" # CYRILLIC SMALL LETTER ES +0xD4 0x0442 "т" # CYRILLIC SMALL LETTER TE +0xD5 0x0443 "у" # CYRILLIC SMALL LETTER U +0xD6 0x0436 "ж" # CYRILLIC SMALL LETTER ZHE +0xD7 0x0432 "в" # CYRILLIC SMALL LETTER VE +0xD8 0x044C "ь" # CYRILLIC SMALL LETTER SOFT SIGN +0xD9 0x044B "ы" # CYRILLIC SMALL LETTER YERU +0xDA 0x0437 "з" # CYRILLIC SMALL LETTER ZE +0xDB 0x0448 "ш" # CYRILLIC SMALL LETTER SHA +0xDC 0x044D "э" # CYRILLIC SMALL LETTER E +0xDD 0x0449 "щ" # CYRILLIC SMALL LETTER SHCHA +0xDE 0x0447 "ч" # CYRILLIC SMALL LETTER CHE +0xDF 0x044A "ъ" # CYRILLIC SMALL LETTER HARD SIGN +0xE0 0x042E "Ю" # CYRILLIC CAPITAL LETTER YU +0xE1 0x0410 "А" # CYRILLIC CAPITAL LETTER A +0xE2 0x0411 "Б" # CYRILLIC CAPITAL LETTER BE +0xE3 0x0426 "Ц" # CYRILLIC CAPITAL LETTER TSE +0xE4 0x0414 "Д" # CYRILLIC CAPITAL LETTER DE +0xE5 0x0415 "Е" # CYRILLIC CAPITAL LETTER IE +0xE6 0x0424 "Ф" # CYRILLIC CAPITAL LETTER EF +0xE7 0x0413 "Г" # CYRILLIC CAPITAL LETTER GHE +0xE8 0x0425 "Х" # CYRILLIC CAPITAL LETTER HA +0xE9 0x0418 "И" # CYRILLIC CAPITAL LETTER I +0xEA 0x0419 "Й" # CYRILLIC CAPITAL LETTER SHORT I +0xEB 0x041A "К" # CYRILLIC CAPITAL LETTER KA +0xEC 0x041B "Л" # CYRILLIC CAPITAL LETTER EL +0xED 0x041C "М" # CYRILLIC CAPITAL LETTER EM +0xEE 0x041D "Н" # CYRILLIC CAPITAL LETTER EN +0xEF 0x041E "О" # CYRILLIC CAPITAL LETTER O +0xF0 0x041F "П" # CYRILLIC CAPITAL LETTER PE +0xF1 0x042F "Я" # CYRILLIC CAPITAL LETTER YA +0xF2 0x0420 "Р" # CYRILLIC CAPITAL LETTER ER +0xF3 0x0421 "С" # CYRILLIC CAPITAL LETTER ES +0xF4 0x0422 "Т" # CYRILLIC CAPITAL LETTER TE +0xF5 0x0423 "У" # CYRILLIC CAPITAL LETTER U +0xF6 0x0416 "Ж" # CYRILLIC CAPITAL LETTER ZHE +0xF7 0x0412 "В" # CYRILLIC CAPITAL LETTER VE +0xF8 0x042C "Ь" # CYRILLIC CAPITAL LETTER SOFT SIGN +0xF9 0x042B "Ы" # CYRILLIC CAPITAL LETTER YERU +0xFA 0x0417 "З" # CYRILLIC CAPITAL LETTER ZE +0xFB 0x0428 "Ш" # CYRILLIC CAPITAL LETTER SHA +0xFC 0x042D "Э" # CYRILLIC CAPITAL LETTER E +0xFD 0x0429 "Щ" # CYRILLIC CAPITAL LETTER SHCHA +0xFE 0x0427 "Ч" # CYRILLIC CAPITAL LETTER CHE +0xFF 0x042A "Ъ" # CYRILLIC CAPITAL LETTER HARD SIGN +</PRE> +</BODY> +</HTML> diff --git a/test/nobody b/test/nobody new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/test/nobody diff --git a/test/quickbrown.html b/test/quickbrown.html new file mode 100644 index 0000000..e320722 --- /dev/null +++ b/test/quickbrown.html @@ -0,0 +1,103 @@ +<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"> +<HTML> +<HEAD> +<TITLE>Markus Kuhn's quick-brown-fox UTF-8 demo</TITLE> +<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=iso-8859-1"> +<LINK REV="made" HREF="mailto:dickey@invisible-island.net"> +</HEAD> + +<BODY> +<pre> +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn <mkuhn@acm.org> -- 1998-11-30 + +This file was UTF-8 encoded. + + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) (jqvwxy missing, but all non-ASCII letters in one word) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce qui lui + permet de penser à la cænogenèse de l'être dont il est question dans la + cause ambiguë entendue à Moÿ, dans un capharnaüm qui, pense-t-il, diminue + çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Hebrew (iw) +----------- + + דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה? + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but a only a fake!) + + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. + +</pre> +See also: +<ul> +<li><a href="http://www.columbia.edu/kermit/utf8.html" + >http://www.columbia.edu/kermit/utf8.html</a> +<li><a href="http://www.kernel.org/" + >http://www.kernel.org/</a> +<li><a href="http://www.unicode.org/" + >http://www.unicode.org/</a> +<br>and +<li><a href="http://www.cl.cam.ac.uk/~mgk25/ucs/examples/TeX.txt" + >http://www.cl.cam.ac.uk/~mgk25/ucs/examples/TeX.txt</a> +<li><a href="http://www.cl.cam.ac.uk/~mgk25/ucs/wgl4.txt" + >http://www.cl.cam.ac.uk/~mgk25/ucs/wgl4.txt</a> +</ul> +</BODY> +</HTML> diff --git a/test/raw8bit.html b/test/raw8bit.html new file mode 100644 index 0000000..f0d0eeb --- /dev/null +++ b/test/raw8bit.html @@ -0,0 +1,38 @@ +<!DOCTYPE html PUBLIC "-//IETF//DTD HTML 3.0//EN"> +<HTML> +<HEAD> +<TITLE> Test of raw 8-bit symbols </TITLE> +<!-- you may uncomment the next line +and set the document's charset directly via META tag --> +<!--META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=iso-8859-1"--> +</HEAD> +<BODY> +<PRE> +This is a test of translation 8-bit letters for different pairs of +document's charset (assumed charset) and display charset, +both can be reached from 'O'ptions menu. + +This page (obviously) corresponds to text/html mode +but you may test text/plain just by pressing '\' +Try also: '@' for ``raw mode'' and '=' for Information Page. + + + 0 1 2 3 4 5 6 7 8 9 A B C D E F +20 ! " # $ % & ' ( ) * + , - . / +30 0 1 2 3 4 5 6 7 8 9 : ; < = > ? +40 @ A B C D E F G H I J K L M N O +50 P Q R S T U V W X Y Z [ \ ] ^ _ +60 ` a b c d e f g h i j k l m n o +70 p q r s t u v w x y z { | } ~ +80 € ‚ ƒ „ … † ‡ ˆ ‰ Š ‹ Œ Ž +90 ‘ ’ “ ” • – — ˜ ™ š › œ ž Ÿ +A0 ¡ ¢ £ ¤ ¥ ¦ § ¨ © ª « ¬ ® ¯ +B0 ° ± ² ³ ´ µ ¶ · ¸ ¹ º » ¼ ½ ¾ ¿ +C0 À Á Â Ã Ä Å Æ Ç È É Ê Ë Ì Í Î Ï +D0 Ð Ñ Ò Ó Ô Õ Ö × Ø Ù Ú Û Ü Ý Þ ß +E0 à á â ã ä å æ ç è é ê ë ì í î ï +F0 ð ñ ò ó ô õ ö ÷ ø ù ú û ü ý þ ÿ + +</PRE> +</BODY> +</HTML> diff --git a/test/sgml.html b/test/sgml.html new file mode 100644 index 0000000..9442534 --- /dev/null +++ b/test/sgml.html @@ -0,0 +1,1081 @@ +<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"> +<HTML> +<HEAD> +<TITLE>Test of some Unicode symbols enclosed as SGML entity names</TITLE> +</HEAD> +<BODY> +<PRE> + + This table prepared from SGML.TXT available at ftp.unicode.org + + ftp://ftp.unicode.org/MAPPINGS/VENDORS/MISC/SGML.TXT + (if doing ftp, try cd Public/MAPPINGS/VENDORS/MISC) + + +original comment: + +# Author: John Cowan <cowan@ccil.org> +# Date: 25 July 1997 +# +# The following table maps SGML character entities from various +# public sets (namely, ISOamsa, ISOamsb, ISOamsc, ISOamsn, ISOamso, +# ISOamsr, ISObox, ISOcyr1, ISOcyr2, ISOdia, ISOgrk1, ISOgrk2, +# ISOgrk3, ISOgrk4, ISOlat1, ISOlat2, ISOnum, ISOpub, ISOtech, +# HTMLspecial, HTMLsymbol) to corresponding Unicode characters. +# +# The table has four tab-separated columns: +# Column 1: SGML character entity name +# Column 2: SGML public entity set +# Column 3: Unicode 2.0 character code +# Column 4: Unicode 2.0 character name (UPPER CASE) +# Entries which don't have Unicode equivalents have "0x????" +# in Column 3 and a lower case description (from the public entity +# set DTD) in Column 4. The mapping is not reversible, because many +# distinctions are unified away in Unicode, particularly between +# mathematical symbols. +# +# The table is sorted case-blind by SGML character entity name. +# +# The contents of this table are drawn from various sources, and +# are in the public domain. +# +<!-- Changes: ++ {"euro", 0x20AC}, /* EURO SIGN */ + {"loz", 0x25CA}, /* LOZENGE */ +! /* {"loz", 0x2727}, WHITE FOUR POINTED STAR */ +! /* Warning: Duplicated ◊ entry. HTML 4,0 defines it as U+25CA. */ +- {"b.delta", 0x03B3}, /* GREEK SMALL LETTER GAMMA */ ++ {"b.delta", 0x03B4}, /* GREEK SMALL LETTER DELTA */ + +--> + +This test illuminates SGML character entities implementation in your browser. +We sort the entities according to unicode numbers. +You should see visible character if your display character set supports it +or some substitution string picked up from src/chrtrans/def7_uni.tbl. +If you see &somename; - this name is not implemented yet, +you may search for &. (Sorry, ISOgrk4 which holds a dot in its name +seems to be nonvisible for most browsers. Keep in mind that +this table is much wider than in the HTML 4.0 draft). + Leonid Pauzner. + + +0x0021 ! ISOnum # EXCLAMATION MARK +0x0022 " ISOnum # QUOTATION MARK +0x0023 # ISOnum # NUMBER SIGN +0x0024 $ ISOnum # DOLLAR SIGN +0x0025 % ISOnum # PERCENT SIGN +0x0026 & ISOnum # AMPERSAND +0x0028 ( ISOnum # LEFT PARENTHESIS +0x0029 ) ISOnum # RIGHT PARENTHESIS +0x002A * ISOnum # ASTERISK +0x002B + ISOnum # PLUS SIGN +0x002C , ISOnum # COMMA +0x002D ‐ ISOnum # HYPHEN-MINUS +0x002E . ISOnum # FULL STOP +0x002F / ISOnum # SOLIDUS +0x003A : ISOnum # COLON +0x003B ; ISOnum # SEMICOLON +0x003C < ISOnum # LESS-THAN SIGN +0x003D = ISOnum # EQUALS SIGN +0x003E > ISOnum # GREATER-THAN SIGN +0x003F ? ISOnum # QUESTION MARK +0x0040 @ ISOnum # COMMERCIAL AT +0x005B [ ISOnum # LEFT SQUARE BRACKET +0x005C \ ISOnum # REVERSE SOLIDUS +0x005C &sbsol; ISOamso # REVERSE SOLIDUS +0x005D ] ISOnum # RIGHT SQUARE BRACKET +0x005F _ ISOnum # LOW LINE +0x0060 ` ISOdia # GRAVE ACCENT +0x007B { ISOnum # LEFT CURLY BRACKET +0x007C | ISOnum # VERTICAL LINE +0x007D } ISOnum # RIGHT CURLY BRACKET +0x00A0 ISOnum # NO-BREAK SPACE +0x00A1 ¡ ISOnum # INVERTED EXCLAMATION MARK +0x00A2 ¢ ISOnum # CENT SIGN +0x00A3 £ ISOnum # POUND SIGN +0x00A4 ¤ ISOnum # CURRENCY SIGN +0x00A5 ¥ ISOnum # YEN SIGN +0x00A6 ¦ ISOnum # BROKEN BAR +0x00A7 § ISOnum # SECTION SIGN +0x00A8 ¨ ISOtech # DIAERESIS +0x00A8 ¨ ISOdia # DIAERESIS +0x00A8 ¨ ISOdia # DIAERESIS +0x00A9 © ISOnum # COPYRIGHT SIGN +0x00AA ª ISOnum # FEMININE ORDINAL INDICATOR +0x00AB « ISOnum # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK +0x00AC ¬ ISOnum # NOT SIGN +0x00AD ­ ISOnum # SOFT HYPHEN +0x00AE ® ISOnum # REGISTERED SIGN +0x00AF ¯ ISOdia # MACRON +0x00B0 ° ISOnum # DEGREE SIGN +0x00B1 ± ISOnum # PLUS-MINUS SIGN +0x00B2 ² ISOnum # SUPERSCRIPT TWO +0x00B3 ³ ISOnum # SUPERSCRIPT THREE +0x00B4 ´ ISOdia # ACUTE ACCENT +0x00B5 µ ISOnum # MICRO SIGN +0x00B6 ¶ ISOnum # PILCROW SIGN +0x00B7 · ISOnum # MIDDLE DOT +0x00B8 ¸ ISOdia # CEDILLA +0x00B9 ¹ ISOnum # SUPERSCRIPT ONE +0x00BA º ISOnum # MASCULINE ORDINAL INDICATOR +0x00BB » ISOnum # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK +0x00BC ¼ ISOnum # VULGAR FRACTION ONE QUARTER +0x00BD ½ ISOnum # VULGAR FRACTION ONE HALF +0x00BD ½ ISOnum # VULGAR FRACTION ONE HALF +0x00BE ¾ ISOnum # VULGAR FRACTION THREE QUARTERS +0x00BF ¿ ISOnum # INVERTED QUESTION MARK +0x00C0 À ISOlat1 # LATIN CAPITAL LETTER A WITH GRAVE +0x00C1 Á ISOlat1 # LATIN CAPITAL LETTER A WITH ACUTE +0x00C2  ISOlat1 # LATIN CAPITAL LETTER A WITH CIRCUMFLEX +0x00C3 à ISOlat1 # LATIN CAPITAL LETTER A WITH TILDE +0x00C4 Ä ISOlat1 # LATIN CAPITAL LETTER A WITH DIAERESIS +0x00C5 Å ISOlat1 # LATIN CAPITAL LETTER A WITH RING ABOVE +0x00C6 Æ ISOlat1 # LATIN CAPITAL LETTER AE +0x00C7 Ç ISOlat1 # LATIN CAPITAL LETTER C WITH CEDILLA +0x00C8 È ISOlat1 # LATIN CAPITAL LETTER E WITH GRAVE +0x00C9 É ISOlat1 # LATIN CAPITAL LETTER E WITH ACUTE +0x00CA Ê ISOlat1 # LATIN CAPITAL LETTER E WITH CIRCUMFLEX +0x00CB Ë ISOlat1 # LATIN CAPITAL LETTER E WITH DIAERESIS +0x00CC Ì ISOlat1 # LATIN CAPITAL LETTER I WITH GRAVE +0x00CD Í ISOlat1 # LATIN CAPITAL LETTER I WITH ACUTE +0x00CE Î ISOlat1 # LATIN CAPITAL LETTER I WITH CIRCUMFLEX +0x00CF Ï ISOlat1 # LATIN CAPITAL LETTER I WITH DIAERESIS +0x00D0 Ð ISOlat1 # LATIN CAPITAL LETTER ETH +0x00D1 Ñ ISOlat1 # LATIN CAPITAL LETTER N WITH TILDE +0x00D2 Ò ISOlat1 # LATIN CAPITAL LETTER O WITH GRAVE +0x00D3 Ó ISOlat1 # LATIN CAPITAL LETTER O WITH ACUTE +0x00D4 Ô ISOlat1 # LATIN CAPITAL LETTER O WITH CIRCUMFLEX +0x00D5 Õ ISOlat1 # LATIN CAPITAL LETTER O WITH TILDE +0x00D6 Ö ISOlat1 # LATIN CAPITAL LETTER O WITH DIAERESIS +0x00D7 × ISOnum # MULTIPLICATION SIGN +0x00D8 Ø ISOlat1 # LATIN CAPITAL LETTER O WITH STROKE +0x00D9 Ù ISOlat1 # LATIN CAPITAL LETTER U WITH GRAVE +0x00DA Ú ISOlat1 # LATIN CAPITAL LETTER U WITH ACUTE +0x00DB Û ISOlat1 # LATIN CAPITAL LETTER U WITH CIRCUMFLEX +0x00DC Ü ISOlat1 # LATIN CAPITAL LETTER U WITH DIAERESIS +0x00DD Ý ISOlat1 # LATIN CAPITAL LETTER Y WITH ACUTE +0x00DE Þ ISOlat1 # LATIN CAPITAL LETTER THORN +0x00DF ß ISOlat1 # LATIN SMALL LETTER SHARP S +0x00E0 à ISOlat1 # LATIN SMALL LETTER A WITH GRAVE +0x00E1 á ISOlat1 # LATIN SMALL LETTER A WITH ACUTE +0x00E2 â ISOlat1 # LATIN SMALL LETTER A WITH CIRCUMFLEX +0x00E3 ã ISOlat1 # LATIN SMALL LETTER A WITH TILDE +0x00E4 ä ISOlat1 # LATIN SMALL LETTER A WITH DIAERESIS +0x00E5 å ISOlat1 # LATIN SMALL LETTER A WITH RING ABOVE +0x00E6 æ ISOlat1 # LATIN SMALL LETTER AE +0x00E7 ç ISOlat1 # LATIN SMALL LETTER C WITH CEDILLA +0x00E8 è ISOlat1 # LATIN SMALL LETTER E WITH GRAVE +0x00E9 é ISOlat1 # LATIN SMALL LETTER E WITH ACUTE +0x00EA ê ISOlat1 # LATIN SMALL LETTER E WITH CIRCUMFLEX +0x00EB ë ISOlat1 # LATIN SMALL LETTER E WITH DIAERESIS +0x00EC ì ISOlat1 # LATIN SMALL LETTER I WITH GRAVE +0x00ED í ISOlat1 # LATIN SMALL LETTER I WITH ACUTE +0x00EE î ISOlat1 # LATIN SMALL LETTER I WITH CIRCUMFLEX +0x00EF ï ISOlat1 # LATIN SMALL LETTER I WITH DIAERESIS +0x00F0 ð ISOlat1 # LATIN SMALL LETTER ETH +0x00F1 ñ ISOlat1 # LATIN SMALL LETTER N WITH TILDE +0x00F2 ò ISOlat1 # LATIN SMALL LETTER O WITH GRAVE +0x00F3 ó ISOlat1 # LATIN SMALL LETTER O WITH ACUTE +0x00F4 ô ISOlat1 # LATIN SMALL LETTER O WITH CIRCUMFLEX +0x00F5 õ ISOlat1 # LATIN SMALL LETTER O WITH TILDE +0x00F6 ö ISOlat1 # LATIN SMALL LETTER O WITH DIAERESIS +0x00F7 ÷ ISOnum # DIVISION SIGN +0x00F8 ø ISOlat1 # LATIN SMALL LETTER O WITH STROKE +0x00F9 ù ISOlat1 # LATIN SMALL LETTER U WITH GRAVE +0x00FA ú ISOlat1 # LATIN SMALL LETTER U WITH ACUTE +0x00FB û ISOlat1 # LATIN SMALL LETTER U WITH CIRCUMFLEX +0x00FC ü ISOlat1 # LATIN SMALL LETTER U WITH DIAERESIS +0x00FD ý ISOlat1 # LATIN SMALL LETTER Y WITH ACUTE +0x00FE þ ISOlat1 # LATIN SMALL LETTER THORN +0x00FF ÿ ISOlat1 # LATIN SMALL LETTER Y WITH DIAERESIS +0x0100 Ā ISOlat2 # LATIN CAPITAL LETTER A WITH MACRON +0x0101 ā ISOlat2 # LATIN SMALL LETTER A WITH MACRON +0x0102 Ă ISOlat2 # LATIN CAPITAL LETTER A WITH BREVE +0x0103 ă ISOlat2 # LATIN SMALL LETTER A WITH BREVE +0x0104 Ą ISOlat2 # LATIN CAPITAL LETTER A WITH OGONEK +0x0105 ą ISOlat2 # LATIN SMALL LETTER A WITH OGONEK +0x0106 Ć ISOlat2 # LATIN CAPITAL LETTER C WITH ACUTE +0x0107 ć ISOlat2 # LATIN SMALL LETTER C WITH ACUTE +0x0108 Ĉ ISOlat2 # LATIN CAPITAL LETTER C WITH CIRCUMFLEX +0x0109 ĉ ISOlat2 # LATIN SMALL LETTER C WITH CIRCUMFLEX +0x010A Ċ ISOlat2 # LATIN CAPITAL LETTER C WITH DOT ABOVE +0x010B ċ ISOlat2 # LATIN SMALL LETTER C WITH DOT ABOVE +0x010C Č ISOlat2 # LATIN CAPITAL LETTER C WITH CARON +0x010D č ISOlat2 # LATIN SMALL LETTER C WITH CARON +0x010E Ď ISOlat2 # LATIN CAPITAL LETTER D WITH CARON +0x010F ď ISOlat2 # LATIN SMALL LETTER D WITH CARON +0x0110 Đ ISOlat2 # LATIN CAPITAL LETTER D WITH STROKE +0x0111 đ ISOlat2 # LATIN SMALL LETTER D WITH STROKE +0x0112 Ē ISOlat2 # LATIN CAPITAL LETTER E WITH MACRON +0x0113 ē ISOlat2 # LATIN SMALL LETTER E WITH MACRON +0x0116 Ė ISOlat2 # LATIN CAPITAL LETTER E WITH DOT ABOVE +0x0117 ė ISOlat2 # LATIN SMALL LETTER E WITH DOT ABOVE +0x0118 Ę ISOlat2 # LATIN CAPITAL LETTER E WITH OGONEK +0x0119 ę ISOlat2 # LATIN SMALL LETTER E WITH OGONEK +0x011A Ě ISOlat2 # LATIN CAPITAL LETTER E WITH CARON +0x011B ě ISOlat2 # LATIN SMALL LETTER E WITH CARON +0x011C Ĝ ISOlat2 # LATIN CAPITAL LETTER G WITH CIRCUMFLEX +0x011D ĝ ISOlat2 # LATIN SMALL LETTER G WITH CIRCUMFLEX +0x011E Ğ ISOlat2 # LATIN CAPITAL LETTER G WITH BREVE +0x011F ğ ISOlat2 # LATIN SMALL LETTER G WITH BREVE +0x0120 Ġ ISOlat2 # LATIN CAPITAL LETTER G WITH DOT ABOVE +0x0121 ġ ISOlat2 # LATIN SMALL LETTER G WITH DOT ABOVE +0x0122 Ģ ISOlat2 # LATIN CAPITAL LETTER G WITH CEDILLA +0x0123 &gcedil; ISOlat2 # LATIN SMALL LETTER G WITH CEDILLA +0x0124 Ĥ ISOlat2 # LATIN CAPITAL LETTER H WITH CIRCUMFLEX +0x0125 ĥ ISOlat2 # LATIN SMALL LETTER H WITH CIRCUMFLEX +0x0126 Ħ ISOlat2 # LATIN CAPITAL LETTER H WITH STROKE +0x0127 ħ ISOlat2 # LATIN SMALL LETTER H WITH STROKE +0x0128 Ĩ ISOlat2 # LATIN CAPITAL LETTER I WITH TILDE +0x0129 ĩ ISOlat2 # LATIN SMALL LETTER I WITH TILDE +0x012A Ī ISOlat2 # LATIN CAPITAL LETTER I WITH MACRON +0x012B ī ISOlat2 # LATIN SMALL LETTER I WITH MACRON +0x012E Į ISOlat2 # LATIN CAPITAL LETTER I WITH OGONEK +0x012F į ISOlat2 # LATIN SMALL LETTER I WITH OGONEK +0x0130 İ ISOlat2 # LATIN CAPITAL LETTER I WITH DOT ABOVE +0x0131 ı ISOamso # LATIN SMALL LETTER DOTLESS I +0x0131 ı ISOlat2 # LATIN SMALL LETTER DOTLESS I +0x0132 IJ ISOlat2 # LATIN CAPITAL LIGATURE IJ +0x0133 ij ISOlat2 # LATIN SMALL LIGATURE IJ +0x0134 Ĵ ISOlat2 # LATIN CAPITAL LETTER J WITH CIRCUMFLEX +0x0135 ĵ ISOlat2 # LATIN SMALL LETTER J WITH CIRCUMFLEX +0x0136 Ķ ISOlat2 # LATIN CAPITAL LETTER K WITH CEDILLA +0x0137 ķ ISOlat2 # LATIN SMALL LETTER K WITH CEDILLA +0x0138 ĸ ISOlat2 # LATIN SMALL LETTER KRA +0x0139 Ĺ ISOlat2 # LATIN CAPITAL LETTER L WITH ACUTE +0x013A ĺ ISOlat2 # LATIN SMALL LETTER L WITH ACUTE +0x013B Ļ ISOlat2 # LATIN CAPITAL LETTER L WITH CEDILLA +0x013C ļ ISOlat2 # LATIN SMALL LETTER L WITH CEDILLA +0x013D Ľ ISOlat2 # LATIN CAPITAL LETTER L WITH CARON +0x013E ľ ISOlat2 # LATIN SMALL LETTER L WITH CARON +0x013F Ŀ ISOlat2 # LATIN CAPITAL LETTER L WITH MIDDLE DOT +0x0140 ŀ ISOlat2 # LATIN SMALL LETTER L WITH MIDDLE DOT +0x0141 Ł ISOlat2 # LATIN CAPITAL LETTER L WITH STROKE +0x0142 ł ISOlat2 # LATIN SMALL LETTER L WITH STROKE +0x0143 Ń ISOlat2 # LATIN CAPITAL LETTER N WITH ACUTE +0x0144 ń ISOlat2 # LATIN SMALL LETTER N WITH ACUTE +0x0145 Ņ ISOlat2 # LATIN CAPITAL LETTER N WITH CEDILLA +0x0146 ņ ISOlat2 # LATIN SMALL LETTER N WITH CEDILLA +0x0147 Ň ISOlat2 # LATIN CAPITAL LETTER N WITH CARON +0x0148 ň ISOlat2 # LATIN SMALL LETTER N WITH CARON +0x0149 ʼn ISOlat2 # LATIN SMALL LETTER N PRECEDED BY APOSTROPHE +0x014A Ŋ ISOlat2 # LATIN CAPITAL LETTER ENG +0x014B ŋ ISOlat2 # LATIN SMALL LETTER ENG +0x014C Ō ISOlat2 # LATIN CAPITAL LETTER O WITH MACRON +0x014D ō ISOlat2 # LATIN SMALL LETTER O WITH MACRON +0x0150 Ő ISOlat2 # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE +0x0151 ő ISOlat2 # LATIN SMALL LETTER O WITH DOUBLE ACUTE +0x0152 Œ ISOlat2 # LATIN CAPITAL LIGATURE OE +0x0153 œ ISOlat2 # LATIN SMALL LIGATURE OE +0x0154 Ŕ ISOlat2 # LATIN CAPITAL LETTER R WITH ACUTE +0x0155 ŕ ISOlat2 # LATIN SMALL LETTER R WITH ACUTE +0x0156 Ŗ ISOlat2 # LATIN CAPITAL LETTER R WITH CEDILLA +0x0157 ŗ ISOlat2 # LATIN SMALL LETTER R WITH CEDILLA +0x0158 Ř ISOlat2 # LATIN CAPITAL LETTER R WITH CARON +0x0159 ř ISOlat2 # LATIN SMALL LETTER R WITH CARON +0x015A Ś ISOlat2 # LATIN CAPITAL LETTER S WITH ACUTE +0x015B ś ISOlat2 # LATIN SMALL LETTER S WITH ACUTE +0x015C Ŝ ISOlat2 # LATIN CAPITAL LETTER S WITH CIRCUMFLEX +0x015D ŝ ISOlat2 # LATIN SMALL LETTER S WITH CIRCUMFLEX +0x015E Ş ISOlat2 # LATIN CAPITAL LETTER S WITH CEDILLA +0x015F ş ISOlat2 # LATIN SMALL LETTER S WITH CEDILLA +0x0160 Š ISOlat2 # LATIN CAPITAL LETTER S WITH CARON +0x0161 š ISOlat2 # LATIN SMALL LETTER S WITH CARON +0x0162 Ţ ISOlat2 # LATIN CAPITAL LETTER T WITH CEDILLA +0x0163 ţ ISOlat2 # LATIN SMALL LETTER T WITH CEDILLA +0x0164 Ť ISOlat2 # LATIN CAPITAL LETTER T WITH CARON +0x0165 ť ISOlat2 # LATIN SMALL LETTER T WITH CARON +0x0166 Ŧ ISOlat2 # LATIN CAPITAL LETTER T WITH STROKE +0x0167 ŧ ISOlat2 # LATIN SMALL LETTER T WITH STROKE +0x0168 Ũ ISOlat2 # LATIN CAPITAL LETTER U WITH TILDE +0x0169 ũ ISOlat2 # LATIN SMALL LETTER U WITH TILDE +0x016A Ū ISOlat2 # LATIN CAPITAL LETTER U WITH MACRON +0x016B ū ISOlat2 # LATIN SMALL LETTER U WITH MACRON +0x016C Ŭ ISOlat2 # LATIN CAPITAL LETTER U WITH BREVE +0x016D ŭ ISOlat2 # LATIN SMALL LETTER U WITH BREVE +0x016E Ů ISOlat2 # LATIN CAPITAL LETTER U WITH RING ABOVE +0x016F ů ISOlat2 # LATIN SMALL LETTER U WITH RING ABOVE +0x0170 Ű ISOlat2 # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE +0x0171 ű ISOlat2 # LATIN SMALL LETTER U WITH DOUBLE ACUTE +0x0172 Ų ISOlat2 # LATIN CAPITAL LETTER U WITH OGONEK +0x0173 ų ISOlat2 # LATIN SMALL LETTER U WITH OGONEK +0x0174 Ŵ ISOlat2 # LATIN CAPITAL LETTER W WITH CIRCUMFLEX +0x0175 ŵ ISOlat2 # LATIN SMALL LETTER W WITH CIRCUMFLEX +0x0176 Ŷ ISOlat2 # LATIN CAPITAL LETTER Y WITH CIRCUMFLEX +0x0177 ŷ ISOlat2 # LATIN SMALL LETTER Y WITH CIRCUMFLEX +0x0178 Ÿ ISOlat2 # LATIN CAPITAL LETTER Y WITH DIAERESIS +0x0179 Ź ISOlat2 # LATIN CAPITAL LETTER Z WITH ACUTE +0x017A ź ISOlat2 # LATIN SMALL LETTER Z WITH ACUTE +0x017B Ż ISOlat2 # LATIN CAPITAL LETTER Z WITH DOT ABOVE +0x017C ż ISOlat2 # LATIN SMALL LETTER Z WITH DOT ABOVE +0x017D Ž ISOlat2 # LATIN CAPITAL LETTER Z WITH CARON +0x017E ž ISOlat2 # LATIN SMALL LETTER Z WITH CARON +0x0192 ƒ ISOtech # LATIN SMALL LETTER F WITH HOOK +0x01F5 ǵ ISOlat2 # LATIN SMALL LETTER G WITH ACUTE +0x02BC ' ISOnum # MODIFIER LETTER APOSTROPHE +0x02C6 ˆ ISOdia # MODIFIER LETTER CIRCUMFLEX ACCENT +0x02C7 ˇ ISOdia # CARON +0x02D8 ˘ ISOdia # BREVE +0x02D9 ˙ ISOdia # DOT ABOVE +0x02DA ˚ ISOdia # RING ABOVE +0x02DB ˛ ISOdia # OGONEK +0x02DC ˜ ISOdia # SMALL TILDE +0x02DD ˝ ISOdia # DOUBLE ACUTE ACCENT +0x0386 &Aacgr; ISOgrk2 # GREEK CAPITAL LETTER ALPHA WITH TONOS +0x0388 &Eacgr; ISOgrk2 # GREEK CAPITAL LETTER EPSILON WITH TONOS +0x0389 &EEacgr; ISOgrk2 # GREEK CAPITAL LETTER ETA WITH TONOS +0x038A &Iacgr; ISOgrk2 # GREEK CAPITAL LETTER IOTA WITH TONOS +0x038C &Oacgr; ISOgrk2 # GREEK CAPITAL LETTER OMICRON WITH TONOS +0x038E &Uacgr; ISOgrk2 # GREEK CAPITAL LETTER UPSILON WITH TONOS +0x038F &OHacgr; ISOgrk2 # GREEK CAPITAL LETTER OMEGA WITH TONOS +0x0390 &idiagr; ISOgrk2 # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS +0x0391 &Agr; ISOgrk1 # GREEK CAPITAL LETTER ALPHA +0x0391 Α HTMLsymbol # GREEK CAPITAL LETTER ALPHA +0x0392 Β HTMLsymbol # GREEK CAPITAL LETTER BETA +0x0392 &Bgr; ISOgrk1 # GREEK CAPITAL LETTER BETA +0x0393 Γ ISOgrk3 # GREEK CAPITAL LETTER GAMMA +0x0393 &Ggr; ISOgrk1 # GREEK CAPITAL LETTER GAMMA +0x0393 &b.Gamma; ISOgrk4 # GREEK CAPITAL LETTER GAMMA +0x0394 Δ ISOgrk3 # GREEK CAPITAL LETTER DELTA +0x0394 &Dgr; ISOgrk1 # GREEK CAPITAL LETTER DELTA +0x0394 &b.Delta; ISOgrk4 # GREEK CAPITAL LETTER DELTA +0x0395 &Egr; ISOgrk1 # GREEK CAPITAL LETTER EPSILON +0x0395 Ε HTMLsymbol # GREEK CAPITAL LETTER EPSILON +0x0396 Ζ HTMLsymbol # GREEK CAPITAL LETTER ZETA +0x0396 &Zgr; ISOgrk1 # GREEK CAPITAL LETTER ZETA +0x0397 &EEgr; ISOgrk1 # GREEK CAPITAL LETTER ETA +0x0397 Η HTMLsymbol # GREEK CAPITAL LETTER ETA +0x0398 &THgr; ISOgrk1 # GREEK CAPITAL LETTER THETA +0x0398 Θ ISOgrk3 # GREEK CAPITAL LETTER THETA +0x0398 &b.Theta; ISOgrk4 # GREEK CAPITAL LETTER THETA +0x0399 &Igr; ISOgrk1 # GREEK CAPITAL LETTER IOTA +0x0399 Ι HTMLsymbol # GREEK CAPITAL LETTER IOTA +0x039A Κ HTMLsymbol # GREEK CAPITAL LETTER KAPPA +0x039A &Kgr; ISOgrk1 # GREEK CAPITAL LETTER KAPPA +0x039B Λ ISOgrk3 # GREEK CAPITAL LETTER LAMDA +0x039B &Lgr; ISOgrk1 # GREEK CAPITAL LETTER LAMDA +0x039B &b.Lambda; ISOgrk4 # GREEK CAPITAL LETTER LAMDA +0x039C &Mgr; ISOgrk1 # GREEK CAPITAL LETTER MU +0x039C Μ HTMLsymbol # GREEK CAPITAL LETTER MU +0x039D &Ngr; ISOgrk1 # GREEK CAPITAL LETTER NU +0x039D Ν HTMLsymbol # GREEK CAPITAL LETTER NU +0x039E &Xgr; ISOgrk1 # GREEK CAPITAL LETTER XI +0x039E Ξ ISOgrk3 # GREEK CAPITAL LETTER XI +0x039E &b.Xi; ISOgrk4 # GREEK CAPITAL LETTER XI +0x039F &Ogr; ISOgrk1 # GREEK CAPITAL LETTER OMICRON +0x039F Ο HTMLsymbol # GREEK CAPITAL LETTER OMICRON +0x03A0 &Pgr; ISOgrk1 # GREEK CAPITAL LETTER PI +0x03A0 Π ISOgrk3 # GREEK CAPITAL LETTER PI +0x03A0 &b.Pi; ISOgrk4 # GREEK CAPITAL LETTER PI +0x03A1 &Rgr; ISOgrk1 # GREEK CAPITAL LETTER RHO +0x03A1 Ρ HTMLsymbol # GREEK CAPITAL LETTER RHO +0x03A3 &Sgr; ISOgrk1 # GREEK CAPITAL LETTER SIGMA +0x03A3 Σ ISOgrk3 # GREEK CAPITAL LETTER SIGMA +0x03A3 &b.Sigma; ISOgrk4 # GREEK CAPITAL LETTER SIGMA +0x03A4 Τ HTMLsymbol # GREEK CAPITAL LETTER TAU +0x03A4 &Tgr; ISOgrk1 # GREEK CAPITAL LETTER TAU +0x03A5 &Ugr; ISOgrk1 # GREEK CAPITAL LETTER UPSILON +0x03A5 ϒ ISOgrk3 # GREEK CAPITAL LETTER UPSILON +0x03A5 Υ HTMLsymbol # GREEK CAPITAL LETTER UPSILON +0x03A5 &b.Upsi; ISOgrk4 # GREEK CAPITAL LETTER UPSILON +0x03A6 &PHgr; ISOgrk1 # GREEK CAPITAL LETTER PHI +0x03A6 Φ ISOgrk3 # GREEK CAPITAL LETTER PHI +0x03A6 &b.Phi; ISOgrk4 # GREEK CAPITAL LETTER PHI +0x03A7 Χ HTMLsymbol # GREEK CAPITAL LETTER CHI +0x03A7 &KHgr; ISOgrk1 # GREEK CAPITAL LETTER CHI +0x03A8 &PSgr; ISOgrk1 # GREEK CAPITAL LETTER PSI +0x03A8 Ψ ISOgrk3 # GREEK CAPITAL LETTER PSI +0x03A8 &b.Psi; ISOgrk4 # GREEK CAPITAL LETTER PSI +0x03A9 &OHgr; ISOgrk1 # GREEK CAPITAL LETTER OMEGA +0x03A9 Ω ISOgrk3 # GREEK CAPITAL LETTER OMEGA +0x03A9 &b.Omega; ISOgrk4 # GREEK CAPITAL LETTER OMEGA +0x03AA &Idigr; ISOgrk2 # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA +0x03AB &Udigr; ISOgrk2 # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA +0x03AC &aacgr; ISOgrk2 # GREEK SMALL LETTER ALPHA WITH TONOS +0x03AD &eacgr; ISOgrk2 # GREEK SMALL LETTER EPSILON WITH TONOS +0x03AE &eeacgr; ISOgrk2 # GREEK SMALL LETTER ETA WITH TONOS +0x03AF &iacgr; ISOgrk2 # GREEK SMALL LETTER IOTA WITH TONOS +0x03B0 &udiagr; ISOgrk2 # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND +0x03B1 &agr; ISOgrk1 # GREEK SMALL LETTER ALPHA +0x03B1 α ISOgrk3 # GREEK SMALL LETTER ALPHA +0x03B1 &b.alpha; ISOgrk4 # GREEK SMALL LETTER ALPHA +0x03B2 &b.beta; ISOgrk4 # GREEK SMALL LETTER BETA +0x03B2 β ISOgrk3 # GREEK SMALL LETTER BETA +0x03B2 &bgr; ISOgrk1 # GREEK SMALL LETTER BETA +0x03B3 &b.gamma; ISOgrk4 # GREEK SMALL LETTER GAMMA +0x03B3 γ ISOgrk3 # GREEK SMALL LETTER GAMMA +0x03B3 &ggr; ISOgrk1 # GREEK SMALL LETTER GAMMA +0x03B4 &b.delta; ISOgrk4 # GREEK SMALL LETTER DELTA +0x03B4 δ ISOgrk3 # GREEK SMALL LETTER DELTA +0x03B4 &dgr; ISOgrk1 # GREEK SMALL LETTER DELTA +0x03B5 &b.epsi; ISOgrk4 # GREEK SMALL LETTER EPSILON +0x03B5 &b.epsis; ISOgrk4 # GREEK SMALL LETTER EPSILON +0x03B5 &b.epsiv; ISOgrk4 # GREEK SMALL LETTER EPSILON +0x03B5 &egr; ISOgrk1 # GREEK SMALL LETTER EPSILON +0x03B5 ε ISOgrk3 # GREEK SMALL LETTER EPSILON +0x03B5 ε HTMLsymbol # GREEK SMALL LETTER EPSILON +0x03B6 &b.zeta; ISOgrk4 # GREEK SMALL LETTER ZETA +0x03B6 ζ ISOgrk3 # GREEK SMALL LETTER ZETA +0x03B6 &zgr; ISOgrk1 # GREEK SMALL LETTER ZETA +0x03B7 &b.eta; ISOgrk4 # GREEK SMALL LETTER ETA +0x03B7 &eegr; ISOgrk1 # GREEK SMALL LETTER ETA +0x03B7 η ISOgrk3 # GREEK SMALL LETTER ETA +0x03B8 &b.thetas; ISOgrk4 # GREEK SMALL LETTER THETA +0x03B8 θ HTMLsymbol # GREEK SMALL LETTER THETA +0x03B8 &thetas; ISOgrk3 # GREEK SMALL LETTER THETA +0x03B8 &thgr; ISOgrk1 # GREEK SMALL LETTER THETA +0x03B9 &b.iota; ISOgrk4 # GREEK SMALL LETTER IOTA +0x03B9 &igr; ISOgrk1 # GREEK SMALL LETTER IOTA +0x03B9 ι ISOgrk3 # GREEK SMALL LETTER IOTA +0x03BA &b.kappa; ISOgrk4 # GREEK SMALL LETTER KAPPA +0x03BA κ ISOgrk3 # GREEK SMALL LETTER KAPPA +0x03BA &kgr; ISOgrk1 # GREEK SMALL LETTER KAPPA +0x03BB &b.lambda; ISOgrk4 # GREEK SMALL LETTER LAMDA +0x03BB λ ISOgrk3 # GREEK SMALL LETTER LAMDA +0x03BB &lgr; ISOgrk1 # GREEK SMALL LETTER LAMDA +0x03BC &b.mu; ISOgrk4 # GREEK SMALL LETTER MU +0x03BC &mgr; ISOgrk1 # GREEK SMALL LETTER MU +0x03BC μ ISOgrk3 # GREEK SMALL LETTER MU +0x03BD &b.nu; ISOgrk4 # GREEK SMALL LETTER NU +0x03BD &ngr; ISOgrk1 # GREEK SMALL LETTER NU +0x03BD ν ISOgrk3 # GREEK SMALL LETTER NU +0x03BE &b.xi; ISOgrk4 # GREEK SMALL LETTER XI +0x03BE &xgr; ISOgrk1 # GREEK SMALL LETTER XI +0x03BE ξ ISOgrk3 # GREEK SMALL LETTER XI +0x03BF &ogr; ISOgrk1 # GREEK SMALL LETTER OMICRON +0x03BF ο HTMLsymbol # GREEK SMALL LETTER OMICRON +0x03C0 &b.pi; ISOgrk4 # GREEK SMALL LETTER PI +0x03C0 &pgr; ISOgrk1 # GREEK SMALL LETTER PI +0x03C0 π ISOgrk3 # GREEK SMALL LETTER PI +0x03C1 &b.rho; ISOgrk4 # GREEK SMALL LETTER RHO +0x03C1 &rgr; ISOgrk1 # GREEK SMALL LETTER RHO +0x03C1 ρ ISOgrk3 # GREEK SMALL LETTER RHO +0x03C2 &b.sigmav; ISOgrk4 # GREEK SMALL LETTER FINAL SIGMA +0x03C2 &sfgr; ISOgrk1 # GREEK SMALL LETTER FINAL SIGMA +0x03C2 ς HTMLsymbol # GREEK SMALL LETTER FINAL SIGMA +0x03C2 ς ISOgrk3 # GREEK SMALL LETTER FINAL SIGMA +0x03C3 &b.sigma; ISOgrk4 # GREEK SMALL LETTER SIGMA +0x03C3 &sgr; ISOgrk1 # GREEK SMALL LETTER SIGMA +0x03C3 σ ISOgrk3 # GREEK SMALL LETTER SIGMA +0x03C4 &b.tau; ISOgrk4 # GREEK SMALL LETTER TAU +0x03C4 τ ISOgrk3 # GREEK SMALL LETTER TAU +0x03C4 &tgr; ISOgrk1 # GREEK SMALL LETTER TAU +0x03C5 &b.upsi; ISOgrk4 # GREEK SMALL LETTER UPSILON +0x03C5 &ugr; ISOgrk1 # GREEK SMALL LETTER UPSILON +0x03C5 υ ISOgrk3 # GREEK SMALL LETTER UPSILON +0x03C5 υ HTMLsymbol # GREEK SMALL LETTER UPSILON +0x03C6 &b.phis; ISOgrk4 # GREEK SMALL LETTER PHI +0x03C6 &phgr; ISOgrk1 # GREEK SMALL LETTER PHI +0x03C6 φ HTMLsymbol # GREEK SMALL LETTER PHI +0x03C6 &phis; ISOgrk3 # GREEK SMALL LETTER PHI +0x03C7 &b.chi; ISOgrk4 # GREEK SMALL LETTER CHI +0x03C7 χ ISOgrk3 # GREEK SMALL LETTER CHI +0x03C7 &khgr; ISOgrk1 # GREEK SMALL LETTER CHI +0x03C8 &b.psi; ISOgrk4 # GREEK SMALL LETTER PSI +0x03C8 &psgr; ISOgrk1 # GREEK SMALL LETTER PSI +0x03C8 ψ ISOgrk3 # GREEK SMALL LETTER PSI +0x03C9 &ohgr; ISOgrk1 # GREEK SMALL LETTER OMEGA +0x03C9 ω ISOgrk3 # GREEK SMALL LETTER OMEGA +0x03CA &idigr; ISOgrk2 # GREEK SMALL LETTER IOTA WITH DIALYTIKA +0x03CB &udigr; ISOgrk2 # GREEK SMALL LETTER UPSILON WITH DIALYTIKA +0x03CC &oacgr; ISOgrk2 # GREEK SMALL LETTER OMICRON WITH TONOS +0x03CD &uacgr; ISOgrk2 # GREEK SMALL LETTER UPSILON WITH TONOS +0x03CE &b.omega; ISOgrk4 # GREEK SMALL LETTER OMEGA WITH TONOS +0x03CE &ohacgr; ISOgrk2 # GREEK SMALL LETTER OMEGA WITH TONOS +0x03D1 &b.thetav; ISOgrk4 # GREEK THETA SYMBOL +0x03D1 ϑ HTMLsymbol # GREEK THETA SYMBOL +0x03D1 ϑ ISOgrk3 # GREEK THETA SYMBOL +0x03D2 ϒ HTMLsymbol # GREEK UPSILON WITH HOOK SYMBOL +0x03D5 &b.phiv; ISOgrk4 # GREEK PHI SYMBOL +0x03D5 ϕ ISOgrk3 # GREEK PHI SYMBOL +0x03D6 &b.piv; ISOgrk4 # GREEK PI SYMBOL +0x03D6 ϖ ISOgrk3 # GREEK PI SYMBOL +0x03DC &b.gammad; ISOgrk4 # GREEK LETTER DIGAMMA +0x03DC ϝ ISOgrk3 # GREEK LETTER DIGAMMA +0x03F0 &b.kappav; ISOgrk4 # GREEK KAPPA SYMBOL +0x03F0 ϰ ISOgrk3 # GREEK KAPPA SYMBOL +0x03F1 &b.rhov; ISOgrk4 # GREEK RHO SYMBOL +0x03F1 ϱ ISOgrk3 # GREEK RHO SYMBOL +0x0401 Ё ISOcyr1 # CYRILLIC CAPITAL LETTER IO +0x0402 Ђ ISOcyr2 # CYRILLIC CAPITAL LETTER DJE +0x0403 Ѓ ISOcyr2 # CYRILLIC CAPITAL LETTER GJE +0x0404 Є ISOcyr2 # CYRILLIC CAPITAL LETTER UKRAINIAN IE +0x0405 Ѕ ISOcyr2 # CYRILLIC CAPITAL LETTER DZE +0x0406 І ISOcyr2 # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I +0x0407 Ї ISOcyr2 # CYRILLIC CAPITAL LETTER YI +0x0408 Ј ISOcyr2 # CYRILLIC CAPITAL LETTER JE +0x0409 Љ ISOcyr2 # CYRILLIC CAPITAL LETTER LJE +0x040A Њ ISOcyr2 # CYRILLIC CAPITAL LETTER NJE +0x040B Ћ ISOcyr2 # CYRILLIC CAPITAL LETTER TSHE +0x040C Ќ ISOcyr2 # CYRILLIC CAPITAL LETTER KJE +0x040E Ў ISOcyr2 # CYRILLIC CAPITAL LETTER SHORT U +0x040F Џ ISOcyr2 # CYRILLIC CAPITAL LETTER DZHE +0x0410 А ISOcyr1 # CYRILLIC CAPITAL LETTER A +0x0411 Б ISOcyr1 # CYRILLIC CAPITAL LETTER BE +0x0412 В ISOcyr1 # CYRILLIC CAPITAL LETTER VE +0x0413 Г ISOcyr1 # CYRILLIC CAPITAL LETTER GHE +0x0414 Д ISOcyr1 # CYRILLIC CAPITAL LETTER DE +0x0415 Е ISOcyr1 # CYRILLIC CAPITAL LETTER IE +0x0416 Ж ISOcyr1 # CYRILLIC CAPITAL LETTER ZHE +0x0417 З ISOcyr1 # CYRILLIC CAPITAL LETTER ZE +0x0418 И ISOcyr1 # CYRILLIC CAPITAL LETTER I +0x0419 Й ISOcyr1 # CYRILLIC CAPITAL LETTER SHORT I +0x041A К ISOcyr1 # CYRILLIC CAPITAL LETTER KA +0x041B Л ISOcyr1 # CYRILLIC CAPITAL LETTER EL +0x041C М ISOcyr1 # CYRILLIC CAPITAL LETTER EM +0x041D Н ISOcyr1 # CYRILLIC CAPITAL LETTER EN +0x041E О ISOcyr1 # CYRILLIC CAPITAL LETTER O +0x041F П ISOcyr1 # CYRILLIC CAPITAL LETTER PE +0x0420 Р ISOcyr1 # CYRILLIC CAPITAL LETTER ER +0x0421 С ISOcyr1 # CYRILLIC CAPITAL LETTER ES +0x0422 Т ISOcyr1 # CYRILLIC CAPITAL LETTER TE +0x0423 У ISOcyr1 # CYRILLIC CAPITAL LETTER U +0x0424 Ф ISOcyr1 # CYRILLIC CAPITAL LETTER EF +0x0425 Х ISOcyr1 # CYRILLIC CAPITAL LETTER HA +0x0426 Ц ISOcyr1 # CYRILLIC CAPITAL LETTER TSE +0x0427 Ч ISOcyr1 # CYRILLIC CAPITAL LETTER CHE +0x0428 Ш ISOcyr1 # CYRILLIC CAPITAL LETTER SHA +0x0429 Щ ISOcyr1 # CYRILLIC CAPITAL LETTER SHCHA +0x042A Ъ ISOcyr1 # CYRILLIC CAPITAL LETTER HARD SIGN +0x042B Ы ISOcyr1 # CYRILLIC CAPITAL LETTER YERU +0x042C Ь ISOcyr1 # CYRILLIC CAPITAL LETTER SOFT SIGN +0x042D Э ISOcyr1 # CYRILLIC CAPITAL LETTER E +0x042E Ю ISOcyr1 # CYRILLIC CAPITAL LETTER YU +0x042F Я ISOcyr1 # CYRILLIC CAPITAL LETTER YA +0x0430 а ISOcyr1 # CYRILLIC SMALL LETTER A +0x0431 б ISOcyr1 # CYRILLIC SMALL LETTER BE +0x0432 в ISOcyr1 # CYRILLIC SMALL LETTER VE +0x0433 г ISOcyr1 # CYRILLIC SMALL LETTER GHE +0x0434 д ISOcyr1 # CYRILLIC SMALL LETTER DE +0x0435 е ISOcyr1 # CYRILLIC SMALL LETTER IE +0x0436 ж ISOcyr1 # CYRILLIC SMALL LETTER ZHE +0x0437 з ISOcyr1 # CYRILLIC SMALL LETTER ZE +0x0438 и ISOcyr1 # CYRILLIC SMALL LETTER I +0x0439 й ISOcyr1 # CYRILLIC SMALL LETTER SHORT I +0x043A к ISOcyr1 # CYRILLIC SMALL LETTER KA +0x043B л ISOcyr1 # CYRILLIC SMALL LETTER EL +0x043C м ISOcyr1 # CYRILLIC SMALL LETTER EM +0x043D н ISOcyr1 # CYRILLIC SMALL LETTER EN +0x043E о ISOcyr1 # CYRILLIC SMALL LETTER O +0x043F п ISOcyr1 # CYRILLIC SMALL LETTER PE +0x0440 р ISOcyr1 # CYRILLIC SMALL LETTER ER +0x0441 с ISOcyr1 # CYRILLIC SMALL LETTER ES +0x0442 т ISOcyr1 # CYRILLIC SMALL LETTER TE +0x0443 у ISOcyr1 # CYRILLIC SMALL LETTER U +0x0444 ф ISOcyr1 # CYRILLIC SMALL LETTER EF +0x0445 х ISOcyr1 # CYRILLIC SMALL LETTER HA +0x0446 ц ISOcyr1 # CYRILLIC SMALL LETTER TSE +0x0447 ч ISOcyr1 # CYRILLIC SMALL LETTER CHE +0x0448 ш ISOcyr1 # CYRILLIC SMALL LETTER SHA +0x0449 щ ISOcyr1 # CYRILLIC SMALL LETTER SHCHA +0x044A ъ ISOcyr1 # CYRILLIC SMALL LETTER HARD SIGN +0x044B ы ISOcyr1 # CYRILLIC SMALL LETTER YERU +0x044C ь ISOcyr1 # CYRILLIC SMALL LETTER SOFT SIGN +0x044D э ISOcyr1 # CYRILLIC SMALL LETTER E +0x044E ю ISOcyr1 # CYRILLIC SMALL LETTER YU +0x044F я ISOcyr1 # CYRILLIC SMALL LETTER YA +0x0451 ё ISOcyr1 # CYRILLIC SMALL LETTER IO +0x0452 ђ ISOcyr2 # CYRILLIC SMALL LETTER DJE +0x0453 ѓ ISOcyr2 # CYRILLIC SMALL LETTER GJE +0x0454 є ISOcyr2 # CYRILLIC SMALL LETTER UKRAINIAN IE +0x0455 ѕ ISOcyr2 # CYRILLIC SMALL LETTER DZE +0x0456 і ISOcyr2 # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I +0x0457 ї ISOcyr2 # CYRILLIC SMALL LETTER YI +0x0458 ј ISOcyr2 # CYRILLIC SMALL LETTER JE +0x0459 љ ISOcyr2 # CYRILLIC SMALL LETTER LJE +0x045A њ ISOcyr2 # CYRILLIC SMALL LETTER NJE +0x045B ћ ISOcyr2 # CYRILLIC SMALL LETTER TSHE +0x045C ќ ISOcyr2 # CYRILLIC SMALL LETTER KJE +0x045E ў ISOcyr2 # CYRILLIC SMALL LETTER SHORT U +0x045F џ ISOcyr2 # CYRILLIC SMALL LETTER DZHE +0x2002   ISOpub # EN SPACE +0x2003   ISOpub # EM SPACE +0x2004   ISOpub # THREE-PER-EM SPACE +0x2005   ISOpub # FOUR-PER-EM SPACE +0x2007   ISOpub # FIGURE SPACE +0x2008   ISOpub # PUNCTUATION SPACE +0x2009   ISOpub # THIN SPACE +0x200A   ISOpub # HAIR SPACE +0x200C ‌ HTMLspecial # ZERO WIDTH NON-JOINER +0x200D ‍ HTMLspecial # ZERO WIDTH JOINER +0x200E ‎ HTMLspecial # LEFT-TO-RIGHT MARK +0x200F ‏ HTMLspecial # RIGHT-TO-LEFT MARK +0x2010 ‐ ISOpub # HYPHEN +0x2013 – ISOpub # EN DASH +0x2014 — ISOpub # EM DASH +0x2015 ― ISOnum # HORIZONTAL BAR +0x2016 ‖ ISOtech # DOUBLE VERTICAL LINE +0x2018 ‘ ISOnum # LEFT SINGLE QUOTATION MARK +0x2018 ’ ISOpub # LEFT SINGLE QUOTATION MARK +0x2019 ’ ISOnum # RIGHT SINGLE QUOTATION MARK +0x201A ‚ ISOpub # SINGLE LOW-9 QUOTATION MARK +0x201A ‚ HTMLspecial # SINGLE LOW-9 QUOTATION MARK +0x201C “ ISOnum # LEFT DOUBLE QUOTATION MARK +0x201C ” ISOpub # LEFT DOUBLE QUOTATION MARK +0x201D ” ISOnum # RIGHT DOUBLE QUOTATION MARK +0x201E „ HTMLspecial # DOUBLE LOW-9 QUOTATION MARK +0x201E „ ISOpub # DOUBLE LOW-9 QUOTATION MARK +0x2020 † ISOpub # DAGGER +0x2021 ‡ ISOpub # DOUBLE DAGGER +0x2022 • ISOpub # BULLET +0x2025 ‥ ISOpub # TWO DOT LEADER +0x2026 … ISOpub # HORIZONTAL ELLIPSIS +0x2026 … ISOpub # HORIZONTAL ELLIPSIS +0x2030 ‰ ISOtech # PER MILLE SIGN +0x2032 ′ ISOtech # PRIME +0x2032 &vprime; ISOamso # PRIME +0x2033 ″ ISOtech # DOUBLE PRIME +0x2034 ‴ ISOtech # TRIPLE PRIME +0x2035 ‵ ISOamso # REVERSED PRIME +0x2039 ‹ HTMLspecial # SINGLE LEFT-POINTING ANGLE QUOTATION MARK +0x203A › HTMLspecial # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK +0x203E ‾ HTMLsymbol # OVERLINE +0x2041 ⁁ ISOpub # CARET INSERTION POINT +0x2043 ⁃ ISOpub # HYPHEN BULLET +0x2044 ⁄ HTMLsymbol # FRACTION SLASH +0x20AC € new # EURO SIGN +0x20DB ⃛ ISOtech # COMBINING THREE DOTS ABOVE +0x20DC ⃜ ISOtech # COMBINING FOUR DOTS ABOVE +0x2105 ℅ ISOpub # CARE OF +0x210B ℋ ISOtech # SCRIPT CAPITAL H +0x210F ℏ ISOamso # PLANCK CONSTANT OVER TWO PI +0x2111 ℑ ISOamso # BLACK-LETTER CAPITAL I +0x2112 ℒ ISOtech # SCRIPT CAPITAL L +0x2113 ℓ ISOamso # SCRIPT SMALL L +0x2116 № ISOcyr1 # NUMERO SIGN +0x2117 ℗ ISOpub # SOUND RECORDING COPYRIGHT +0x2118 ℘ ISOamso # SCRIPT CAPITAL P +0x211C ℜ ISOamso # BLACK-LETTER CAPITAL R +0x211E ℞ ISOpub # PRESCRIPTION TAKE +0x2122 ™ ISOnum # TRADE MARK SIGN +0x2126 Ω ISOnum # OHM SIGN +0x212B Å ISOtech # ANGSTROM SIGN +0x212C ℬ ISOtech # SCRIPT CAPITAL B +0x2133 ℳ ISOtech # SCRIPT CAPITAL M +0x2134 ℴ ISOtech # SCRIPT SMALL O +0x2135 ℵ HTMLsymbol # ALEF SYMBOL +0x2135 ℵ ISOtech # ALEF SYMBOL +0x2136 ℶ ISOamso # BET SYMBOL +0x2137 ℷ ISOamso # GIMEL SYMBOL +0x2138 ℸ ISOamso # DALET SYMBOL +0x2153 ⅓ ISOpub # VULGAR FRACTION ONE THIRD +0x2154 ⅔ ISOpub # VULGAR FRACTION TWO THIRDS +0x2155 ⅕ ISOpub # VULGAR FRACTION ONE FIFTH +0x2156 ⅖ ISOpub # VULGAR FRACTION TWO FIFTHS +0x2157 ⅗ ISOpub # VULGAR FRACTION THREE FIFTHS +0x2158 ⅘ ISOpub # VULGAR FRACTION FOUR FIFTHS +0x2159 ⅙ ISOpub # VULGAR FRACTION ONE SIXTH +0x215A ⅚ ISOpub # VULGAR FRACTION FIVE SIXTHS +0x215B ⅛ ISOnum # VULGAR FRACTION ONE EIGHTH +0x215C ⅜ ISOnum # VULGAR FRACTION THREE EIGHTHS +0x215D ⅝ ISOnum # VULGAR FRACTION FIVE EIGHTHS +0x215E ⅞ ISOnum # VULGAR FRACTION SEVEN EIGHTHS +0x2190 ← ISOnum # LEFTWARDS ARROW +0x2191 ↑ ISOnum # UPWARDS ARROW +0x2192 → ISOnum # RIGHTWARDS ARROW +0x2193 ↓ ISOnum # DOWNWARDS ARROW +0x2194 ↔ ISOamsa # LEFT RIGHT ARROW +0x2194 ⟺ ISOamsa # LEFT RIGHT ARROW +0x2194 ⟷ ISOamsa # LEFT RIGHT ARROW +0x2195 ↕ ISOamsa # UP DOWN ARROW +0x2196 ↖ ISOamsa # NORTH WEST ARROW +0x2197 ↗ ISOamsa # NORTH EAST ARROW +0x2198 &drarr; ISOamsa # SOUTH EAST ARROW +0x2199 &dlarr; ISOamsa # SOUTH WEST ARROW +0x219A ↚ ISOamsa # LEFTWARDS ARROW WITH STROKE +0x219B ↛ ISOamsa # RIGHTWARDS ARROW WITH STROKE +0x219D ↝ ISOamsa # RIGHTWARDS WAVE ARROW +0x219E ↞ ISOamsa # LEFTWARDS TWO HEADED ARROW +0x21A0 ↠ ISOamsa # RIGHTWARDS TWO HEADED ARROW +0x21A2 ↢ ISOamsa # LEFTWARDS ARROW WITH TAIL +0x21A3 ↣ ISOamsa # RIGHTWARDS ARROW WITH TAIL +0x21A6 ↦ ISOamsa # RIGHTWARDS ARROW FROM BAR +0x21A9 ↩ ISOamsa # LEFTWARDS ARROW WITH HOOK +0x21AA ↪ ISOamsa # RIGHTWARDS ARROW WITH HOOK +0x21AB ↫ ISOamsa # LEFTWARDS ARROW WITH LOOP +0x21AC ↬ ISOamsa # RIGHTWARDS ARROW WITH LOOP +0x21AD ↭ ISOamsa # LEFT RIGHT WAVE ARROW +0x21AE ↮ ISOamsa # LEFT RIGHT ARROW WITH STROKE +0x21B0 ↰ ISOamsa # UPWARDS ARROW WITH TIP LEFTWARDS +0x21B1 ↱ ISOamsa # UPWARDS ARROW WITH TIP RIGHTWARDS +0x21B5 ↵ HTMLsymbol # DOWNWARDS ARROW WITH CORNER LEFTWARDS +0x21B6 ↶ ISOamsa # ANTICLOCKWISE TOP SEMICIRCLE ARROW +0x21B7 ↷ ISOamsa # CLOCKWISE TOP SEMICIRCLE ARROW +0x21BA ↺ ISOamsa # ANTICLOCKWISE OPEN CIRCLE ARROW +0x21BB ↻ ISOamsa # CLOCKWISE OPEN CIRCLE ARROW +0x21BC ↼ ISOamsa # LEFTWARDS HARPOON WITH BARB UPWARDS +0x21BD ↽ ISOamsa # LEFTWARDS HARPOON WITH BARB DOWNWARDS +0x21BE ↾ ISOamsa # UPWARDS HARPOON WITH BARB RIGHTWARDS +0x21BF ↿ ISOamsa # UPWARDS HARPOON WITH BARB LEFTWARDS +0x21C0 ⇀ ISOamsa # RIGHTWARDS HARPOON WITH BARB UPWARDS +0x21C1 ⇁ ISOamsa # RIGHTWARDS HARPOON WITH BARB DOWNWARDS +0x21C2 ⇂ ISOamsa # DOWNWARDS HARPOON WITH BARB RIGHTWARDS +0x21C3 ⇃ ISOamsa # DOWNWARDS HARPOON WITH BARB LEFTWARDS +0x21C4 &rlarr2; ISOamsa # RIGHTWARDS ARROW OVER LEFTWARDS ARROW +0x21C6 &lrarr2; ISOamsa # LEFTWARDS ARROW OVER RIGHTWARDS ARROW +0x21C7 &larr2; ISOamsa # LEFTWARDS PAIRED ARROWS +0x21C8 &uarr2; ISOamsa # UPWARDS PAIRED ARROWS +0x21C9 &rarr2; ISOamsa # RIGHTWARDS PAIRED ARROWS +0x21CA &darr2; ISOamsa # DOWNWARDS PAIRED ARROWS +0x21CB &lrhar2; ISOamsa # LEFTWARDS HARPOON OVER RIGHTWARDS HARPOON +0x21CC &rlhar2; ISOamsa # RIGHTWARDS HARPOON OVER LEFTWARDS HARPOON +0x21CD ⇍ ISOamsa # LEFTWARDS DOUBLE ARROW WITH STROKE +0x21CE ⇎ ISOamsa # LEFT RIGHT DOUBLE ARROW WITH STROKE +0x21CF ⇏ ISOamsa # RIGHTWARDS DOUBLE ARROW WITH STROKE +0x21D0 ⇐ ISOtech # LEFTWARDS DOUBLE ARROW +0x21D0 ⟸ ISOamsa # LEFTWARDS DOUBLE ARROW +0x21D1 ⇑ ISOamsa # UPWARDS DOUBLE ARROW +0x21D2 ⇒ ISOtech # RIGHTWARDS DOUBLE ARROW +0x21D2 ⟹ ISOamsa # RIGHTWARDS DOUBLE ARROW +0x21D3 ⇓ ISOamsa # DOWNWARDS DOUBLE ARROW +0x21D4 ⇔ ISOamsa # LEFT RIGHT DOUBLE ARROW +0x21D4 ⇔ ISOtech # LEFT RIGHT DOUBLE ARROW +0x21D5 ⇕ ISOamsa # UP DOWN DOUBLE ARROW +0x21DA ⇚ ISOamsa # LEFTWARDS TRIPLE ARROW +0x21DB ⇛ ISOamsa # RIGHTWARDS TRIPLE ARROW +0x2200 ∀ ISOtech # FOR ALL +0x2201 ∁ ISOamso # COMPLEMENT +0x2202 ∂ ISOtech # PARTIAL DIFFERENTIAL +0x2203 ∃ ISOtech # THERE EXISTS +0x2204 ∄ ISOamso # THERE DOES NOT EXIST +0x2205 ∅ ISOamso # EMPTY SET +0x2207 ∇ ISOtech # NABLA +0x2208 ∈ ISOtech # ELEMENT OF +0x2209 ∉ ISOtech # NOT AN ELEMENT OF +0x220A &epsis; ISOgrk3 # SMALL ELEMENT OF +0x220B ∋ ISOtech # CONTAINS AS MEMBER +0x220D ϶ ISOamsr # SMALL CONTAINS AS MEMBER +0x220F ∏ ISOamsb # N-ARY PRODUCT +0x2210 ⨿ ISOamsb # N-ARY COPRODUCT +0x2210 ∐ ISOamsb # N-ARY COPRODUCT +0x2210 &samalg; ISOamsr # N-ARY COPRODUCT +0x2211 ∑ ISOamsb # N-ARY SUMMATION +0x2212 − ISOtech # MINUS SIGN +0x2213 ∓ ISOtech # MINUS-OR-PLUS SIGN +0x2214 ∔ ISOamsb # DOT PLUS +0x2216 ∖ ISOamsb # SET MINUS +0x2216 ∖ ISOamsb # SET MINUS +0x2217 ∗ ISOtech # ASTERISK OPERATOR +0x2218 ∘ ISOtech # RING OPERATOR +0x221A √ ISOtech # SQUARE ROOT +0x221D ∝ ISOtech # PROPORTIONAL TO +0x221D ∝ ISOamsr # PROPORTIONAL TO +0x221E ∞ ISOtech # INFINITY +0x221F &ang90; ISOtech # RIGHT ANGLE +0x2220 ∠ ISOamso # ANGLE +0x2221 ∡ ISOamso # MEASURED ANGLE +0x2222 ∢ ISOtech # SPHERICAL ANGLE +0x2223 ∣ ISOamsr # DIVIDES +0x2224 ∤ ISOamsn # DOES NOT DIVIDE +0x2225 ∥ ISOtech # PARALLEL TO +0x2225 ∥ ISOamsr # PARALLEL TO +0x2226 ∦ ISOamsn # NOT PARALLEL TO +0x2226 ∦ ISOamsn # NOT PARALLEL TO +0x2227 ∧ ISOtech # LOGICAL AND +0x2228 ∨ ISOtech # LOGICAL OR +0x2229 ∩ ISOtech # INTERSECTION +0x222A ∪ ISOtech # UNION +0x222B ∫ ISOtech # INTEGRAL +0x222E ∮ ISOtech # CONTOUR INTEGRAL +0x2234 ∴ ISOtech # THEREFORE +0x2235 ∵ ISOtech # BECAUSE +0x223C ∼ ISOtech # TILDE OPERATOR +0x223C ∼ ISOamsr # TILDE OPERATOR +0x223D ∽ ISOamsr # REVERSED TILDE +0x2240 ≀ ISOamsb # WREATH PRODUCT +0x2241 ≁ ISOamsn # NOT TILDE +0x2243 ≃ ISOtech # ASYMPTOTICALLY EQUAL TO +0x2244 ≄ ISOamsn # NOT ASYMPTOTICALLY EQUAL TO +0x2245 ≅ ISOtech # APPROXIMATELY EQUAL TO +0x2247 ≇ ISOamsn # NEITHER APPROXIMATELY NOR ACTUALLY EQUAL TO +0x2248 ≈ ISOtech # ALMOST EQUAL TO +0x2248 ≈ ISOamsr # ALMOST EQUAL TO +0x2248 ≈ ISOamsr # ALMOST EQUAL TO +0x2249 ≉ ISOamsn # NOT ALMOST EQUAL TO +0x224A ≊ ISOamsr # ALMOST EQUAL OR EQUAL TO +0x224C ≌ ISOamsr # ALL EQUAL TO +0x224E ≎ ISOamsr # GEOMETRICALLY EQUIVALENT TO +0x224F ≏ ISOamsr # DIFFERENCE BETWEEN +0x2250 ≐ ISOamsr # APPROACHES THE LIMIT +0x2251 ≑ ISOamsr # GEOMETRICALLY EQUAL TO +0x2252 ≒ ISOamsr # APPROXIMATELY EQUAL TO OR THE IMAGE OF +0x2253 ≓ ISOamsr # IMAGE OF OR APPROXIMATELY EQUAL TO +0x2254 ≔ ISOamsr # COLON EQUALS +0x2255 ≕ ISOamsr # EQUALS COLON +0x2256 ≖ ISOamsr # RING IN EQUAL TO +0x2257 ≗ ISOamsr # RING EQUAL TO +0x2259 ≙ ISOtech # ESTIMATES +0x225C ≜ ISOamsr # DELTA EQUAL TO +0x2260 ≠ ISOtech # NOT EQUAL TO +0x2261 ≡ ISOtech # IDENTICAL TO +0x2262 ≢ ISOamsn # NOT IDENTICAL TO +0x2264 ≤ ISOtech # LESS-THAN OR EQUAL TO +0x2264 ⩽ ISOamsr # LESS-THAN OR EQUAL TO +0x2265 ≥ ISOtech # GREATER-THAN OR EQUAL TO +0x2265 ⩾ ISOamsr # GREATER-THAN OR EQUAL TO +0x2266 ≦ ISOamsr # LESS-THAN OVER EQUAL TO +0x2267 ≧ ISOamsr # GREATER-THAN OVER EQUAL TO +0x2268 ≨ ISOamsn # LESS-THAN BUT NOT EQUAL TO +0x2268 ⪇ ISOamsn # LESS-THAN BUT NOT EQUAL TO +0x2268 ≨︀ ISOamsn # LESS-THAN BUT NOT EQUAL TO +0x2269 ≩ ISOamsn # GREATER-THAN BUT NOT EQUAL TO +0x2269 ⪈ ISOamsn # GREATER-THAN BUT NOT EQUAL TO +0x2269 ≩︀ ISOamsn # GREATER-THAN BUT NOT EQUAL TO +0x226A ≪ ISOamsr # MUCH LESS-THAN +0x226B ≫ ISOamsr # MUCH GREATER-THAN +0x226C ≬ ISOamsr # BETWEEN +0x226E ≮ ISOamsn # NOT LESS-THAN +0x226F ≯ ISOamsn # NOT GREATER-THAN +0x2270 ≰ ISOamsn # NEITHER LESS-THAN NOR EQUAL TO +0x2270 ⩽̸ ISOamsn # NEITHER LESS-THAN NOR EQUAL TO +0x2271 ≱ ISOamsn # NEITHER GREATER-THAN NOR EQUAL TO +0x2271 ⩾̸ ISOamsn # NEITHER GREATER-THAN NOR EQUAL TO +0x2272 ≲ ISOamsr # LESS-THAN OR EQUIVALENT TO +0x2273 ≳ ISOamsr # GREATER-THAN OR EQUIVALENT TO +0x2276 ≶ ISOamsr # LESS-THAN OR GREATER-THAN +0x2277 ≷ ISOamsr # GREATER-THAN OR LESS-THAN +0x227A ≺ ISOamsr # PRECEDES +0x227B ≻ ISOamsr # SUCCEEDS +0x227C &cupre; ISOamsr # PRECEDES OR EQUAL TO +0x227C ⪯ ISOamsr # PRECEDES OR EQUAL TO +0x227D ≽ ISOamsr # SUCCEEDS OR EQUAL TO +0x227D ⪰ ISOamsr # SUCCEEDS OR EQUAL TO +0x227E ≾ ISOamsr # PRECEDES OR EQUIVALENT TO +0x227F ≿ ISOamsr # SUCCEEDS OR EQUIVALENT TO +0x2280 ⊀ ISOamsn # DOES NOT PRECEDE +0x2281 ⊁ ISOamsn # DOES NOT SUCCEED +0x2282 ⊂ ISOtech # SUBSET OF +0x2283 ⊃ ISOtech # SUPERSET OF +0x2284 ⊄ ISOamsn # NOT A SUBSET OF +0x2285 ⊅ ISOamsn # NOT A SUPERSET OF +0x2286 ⫅ ISOamsr # SUBSET OF OR EQUAL TO +0x2286 ⊆ ISOtech # SUBSET OF OR EQUAL TO +0x2287 ⫆ ISOamsr # SUPERSET OF OR EQUAL TO +0x2287 ⊇ ISOtech # SUPERSET OF OR EQUAL TO +0x2288 ⫅̸ ISOamsn # NEITHER A SUBSET OF NOR EQUAL TO +0x2288 ⊈ ISOamsn # NEITHER A SUBSET OF NOR EQUAL TO +0x2289 ⫆̸ ISOamsn # NEITHER A SUPERSET OF NOR EQUAL TO +0x2289 ⊉ ISOamsn # NEITHER A SUPERSET OF NOR EQUAL TO +0x228A ⫋ ISOamsn # SUBSET OF WITH NOT EQUAL TO +0x228A ⊊ ISOamsn # SUBSET OF WITH NOT EQUAL TO +0x228A ⫋︀ ISOamsn # SUBSET OF WITH NOT EQUAL TO +0x228A ⊊︀ ISOamsn # SUBSET OF WITH NOT EQUAL TO +0x228B ⫌ ISOamsn # SUPERSET OF WITH NOT EQUAL TO +0x228B ⊋ ISOamsn # SUPERSET OF WITH NOT EQUAL TO +0x228B ⫌︀ ISOamsn # SUPERSET OF WITH NOT EQUAL TO +0x228B ⊋︀ ISOamsn # SUPERSET OF WITH NOT EQUAL TO +0x228E ⊎ ISOamsb # MULTISET UNION +0x228F ⊏ ISOamsr # SQUARE IMAGE OF +0x2290 ⊐ ISOamsr # SQUARE ORIGINAL OF +0x2291 ⊑ ISOamsr # SQUARE IMAGE OF OR EQUAL TO +0x2292 ⊒ ISOamsr # SQUARE ORIGINAL OF OR EQUAL TO +0x2293 ⊓ ISOamsb # SQUARE CAP +0x2294 ⊔ ISOamsb # SQUARE CUP +0x2295 ⊕ ISOamsb # CIRCLED PLUS +0x2296 ⊖ ISOamsb # CIRCLED MINUS +0x2297 ⊗ ISOamsb # CIRCLED TIMES +0x2298 ⊘ ISOamsb # CIRCLED DIVISION SLASH +0x2299 ⊙ ISOamsb # CIRCLED DOT OPERATOR +0x229A ⊚ ISOamsb # CIRCLED RING OPERATOR +0x229B ⊛ ISOamsb # CIRCLED ASTERISK OPERATOR +0x229D ⊝ ISOamsb # CIRCLED DASH +0x229E ⊞ ISOamsb # SQUARED PLUS +0x229F ⊟ ISOamsb # SQUARED MINUS +0x22A0 ⊠ ISOamsb # SQUARED TIMES +0x22A1 ⊡ ISOamsb # SQUARED DOT OPERATOR +0x22A2 ⊢ ISOamsr # RIGHT TACK +0x22A3 ⊣ ISOamsr # LEFT TACK +0x22A4 ⊤ ISOamsb # DOWN TACK +0x22A5 ⊥ ISOtech # UP TACK +0x22A5 ⊥ ISOtech # UP TACK +0x22A7 ⊧ ISOamsr # MODELS +0x22A8 ⊨ ISOamsr # TRUE +0x22A9 ⊩ ISOamsr # FORCES +0x22AA ⊪ ISOamsr # TRIPLE VERTICAL BAR RIGHT TURNSTILE +0x22AC ⊬ ISOamsn # DOES NOT PROVE +0x22AD ⊭ ISOamsn # NOT TRUE +0x22AE ⊮ ISOamsn # DOES NOT FORCE +0x22AF ⊯ ISOamsn # NEGATED DOUBLE VERTICAL BAR DOUBLE RIGHT +0x22B2 ⊲ ISOamsr # NORMAL SUBGROUP OF +0x22B3 ⊳ ISOamsr # CONTAINS AS NORMAL SUBGROUP +0x22B4 ⊴ ISOamsr # NORMAL SUBGROUP OF OR EQUAL TO +0x22B5 ⊵ ISOamsr # CONTAINS AS NORMAL SUBGROUP OR EQUAL TO +0x22B8 ⊸ ISOamsa # MULTIMAP +0x22BA ⊺ ISOamsb # INTERCALATE +0x22BB ⊻ ISOamsr # XOR +0x22BC ⌅ ISOamsb # NAND +0x22C4 ⋄ ISOamsb # DIAMOND OPERATOR +0x22C5 ⋅ ISOamsb # DOT OPERATOR +0x22C6 ⋆ ISOamsb # STAR OPERATOR +0x22C7 ⋇ ISOamsb # DIVISION TIMES +0x22C8 ⋈ ISOamsr # BOWTIE +0x22C9 ⋉ ISOamsb # LEFT NORMAL FACTOR SEMIDIRECT PRODUCT +0x22CA ⋊ ISOamsb # RIGHT NORMAL FACTOR SEMIDIRECT PRODUCT +0x22CB ⋋ ISOamsb # LEFT SEMIDIRECT PRODUCT +0x22CC ⋌ ISOamsb # RIGHT SEMIDIRECT PRODUCT +0x22CD ⋍ ISOamsr # REVERSED TILDE EQUALS +0x22CE ⋎ ISOamsb # CURLY LOGICAL OR +0x22CF ⋏ ISOamsb # CURLY LOGICAL AND +0x22D0 ⋐ ISOamsr # DOUBLE SUBSET +0x22D1 ⋑ ISOamsr # DOUBLE SUPERSET +0x22D2 ⋒ ISOamsb # DOUBLE INTERSECTION +0x22D3 ⋓ ISOamsb # DOUBLE UNION +0x22D4 ⋔ ISOamsr # PITCHFORK +0x22D6 &ldot; ISOamsr # LESS-THAN WITH DOT +0x22D7 &gsdot; ISOamsr # GREATER-THAN WITH DOT +0x22D8 ⋘ ISOamsr # VERY MUCH LESS-THAN +0x22D9 ⋙ ISOamsr # VERY MUCH GREATER-THAN +0x22DA ⋚ ISOamsr # LESS-THAN EQUAL TO OR GREATER-THAN +0x22DB ⋛ ISOamsr # GREATER-THAN EQUAL TO OR LESS-THAN +0x22DC ⪕ ISOamsr # EQUAL TO OR LESS-THAN +0x22DD ⪖ ISOamsr # EQUAL TO OR GREATER-THAN +0x22DE ⋞ ISOamsr # EQUAL TO OR PRECEDES +0x22DF ⋟ ISOamsr # EQUAL TO OR SUCCEEDS +0x22E0 ⪯̸ ISOamsn # DOES NOT PRECEDE OR EQUAL +0x22E1 ⪰̸ ISOamsn # DOES NOT SUCCEED OR EQUAL +0x22E6 ⋦ ISOamsn # LESS-THAN BUT NOT EQUIVALENT TO +0x22E7 ⋧ ISOamsn # GREATER-THAN BUT NOT EQUIVALENT TO +0x22E8 ⋨ ISOamsn # PRECEDES BUT NOT EQUIVALENT TO +0x22E9 ⋩ ISOamsn # SUCCEEDS BUT NOT EQUIVALENT TO +0x22EA ⋪ ISOamsn # NOT NORMAL SUBGROUP OF +0x22EB ⋫ ISOamsn # DOES NOT CONTAIN AS NORMAL SUBGROUP +0x22EC ⋬ ISOamsn # NOT NORMAL SUBGROUP OF OR EQUAL TO +0x22ED ⋭ ISOamsn # DOES NOT CONTAIN AS NORMAL SUBGROUP OR EQUAL +0x22EE ⋮ ISOpub # VERTICAL ELLIPSIS +0x2306 ⌆ ISOamsb # PERSPECTIVE +0x2308 ⌈ ISOamsc # LEFT CEILING +0x2309 ⌉ ISOamsc # RIGHT CEILING +0x230A ⌊ ISOamsc # LEFT FLOOR +0x230B ⌋ ISOamsc # RIGHT FLOOR +0x230C ⌌ ISOpub # BOTTOM RIGHT CROP +0x230D ⌍ ISOpub # BOTTOM LEFT CROP +0x230E ⌎ ISOpub # TOP RIGHT CROP +0x230F ⌏ ISOpub # TOP LEFT CROP +0x2315 ⌕ ISOpub # TELEPHONE RECORDER +0x2316 ⌖ ISOpub # POSITION INDICATOR +0x231C ⌜ ISOamsc # TOP LEFT CORNER +0x231D ⌝ ISOamsc # TOP RIGHT CORNER +0x231E ⌞ ISOamsc # BOTTOM LEFT CORNER +0x231F ⌟ ISOamsc # BOTTOM RIGHT CORNER +0x2322 ⌢ ISOamsr # FROWN +0x2322 ⌢ ISOamsr # FROWN +0x2323 ⌣ ISOamsr # SMILE +0x2323 ⌣ ISOamsr # SMILE +0x2329 ⟨ ISOtech # LEFT-POINTING ANGLE BRACKET +0x232A ⟩ ISOtech # RIGHT-POINTING ANGLE BRACKET +0x2423 ␣ ISOpub # OPEN BOX +0x24C8 Ⓢ ISOamso # CIRCLED LATIN CAPITAL LETTER S +0x2500 ─ ISObox # BOX DRAWINGS LIGHT HORIZONTAL +0x2502 │ ISObox # BOX DRAWINGS LIGHT VERTICAL +0x250C ┌ ISObox # BOX DRAWINGS LIGHT DOWN AND RIGHT +0x2510 ┐ ISObox # BOX DRAWINGS LIGHT DOWN AND LEFT +0x2514 └ ISObox # BOX DRAWINGS LIGHT UP AND RIGHT +0x2518 ┘ ISObox # BOX DRAWINGS LIGHT UP AND LEFT +0x251C ├ ISObox # BOX DRAWINGS LIGHT VERTICAL AND RIGHT +0x2524 ┤ ISObox # BOX DRAWINGS LIGHT VERTICAL AND LEFT +0x252C ┬ ISObox # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL +0x2534 ┴ ISObox # BOX DRAWINGS LIGHT UP AND HORIZONTAL +0x253C ┼ ISObox # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL +0x2550 ═ ISObox # BOX DRAWINGS DOUBLE HORIZONTAL +0x2551 ║ ISObox # BOX DRAWINGS DOUBLE VERTICAL +0x2552 ╒ ISObox # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE +0x2553 ╓ ISObox # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE +0x2554 ╔ ISObox # BOX DRAWINGS DOUBLE DOWN AND RIGHT +0x2555 ╕ ISObox # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE +0x2556 ╖ ISObox # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE +0x2557 ╗ ISObox # BOX DRAWINGS DOUBLE DOWN AND LEFT +0x2558 ╘ ISObox # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE +0x2559 ╙ ISObox # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE +0x255A ╚ ISObox # BOX DRAWINGS DOUBLE UP AND RIGHT +0x255B ╛ ISObox # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE +0x255C ╜ ISObox # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE +0x255D ╝ ISObox # BOX DRAWINGS DOUBLE UP AND LEFT +0x255E ╞ ISObox # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE +0x255F ╟ ISObox # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE +0x2560 ╠ ISObox # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT +0x2561 ╡ ISObox # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE +0x2562 ╢ ISObox # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE +0x2563 ╣ ISObox # BOX DRAWINGS DOUBLE VERTICAL AND LEFT +0x2564 ╤ ISObox # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE +0x2565 ╥ ISObox # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE +0x2566 ╦ ISObox # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL +0x2567 ╧ ISObox # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE +0x2568 ╨ ISObox # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE +0x2569 ╩ ISObox # BOX DRAWINGS DOUBLE UP AND HORIZONTAL +0x256A ╪ ISObox # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE +0x256B ╫ ISObox # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE +0x256C ╬ ISObox # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL +0x2580 ▀ ISOpub # UPPER HALF BLOCK +0x2584 ▄ ISOpub # LOWER HALF BLOCK +0x2588 █ ISOpub # FULL BLOCK +0x2591 ░ ISOpub # LIGHT SHADE +0x2592 ▒ ISOpub # MEDIUM SHADE +0x2593 ▓ ISOpub # DARK SHADE +0x25A1 □ ISOpub # WHITE SQUARE +0x25A1 □ ISOtech # WHITE SQUARE +0x25AA ▪ ISOpub # BLACK SMALL SQUARE +0x25AD ▭ ISOpub # WHITE RECTANGLE +0x25AE ▮ ISOpub # BLACK VERTICAL RECTANGLE +0x25B3 △ ISOamsb # WHITE UP-POINTING TRIANGLE +0x25B4 ▴ ISOpub # BLACK UP-POINTING SMALL TRIANGLE +0x25B5 ▵ ISOpub # WHITE UP-POINTING SMALL TRIANGLE +0x25B8 ▸ ISOpub # BLACK RIGHT-POINTING SMALL TRIANGLE +0x25B9 ▹ ISOpub # WHITE RIGHT-POINTING SMALL TRIANGLE +0x25BD ▽ ISOamsb # WHITE DOWN-POINTING TRIANGLE +0x25BE ▾ ISOpub # BLACK DOWN-POINTING SMALL TRIANGLE +0x25BF ▿ ISOpub # WHITE DOWN-POINTING SMALL TRIANGLE +0x25C2 ◂ ISOpub # BLACK LEFT-POINTING SMALL TRIANGLE +0x25C3 ◃ ISOpub # WHITE LEFT-POINTING SMALL TRIANGLE +0x25CA ◊ ISOpub # LOZENGE +0x25CB ○ ISOpub # WHITE CIRCLE +0x25CB ◯ ISOamsb # WHITE CIRCLE +0x2605 ★ ISOpub # BLACK STAR +0x2606 ☆ ISOpub # WHITE STAR +0x260E ☎ ISOpub # BLACK TELEPHONE +0x2640 ♀ ISOpub # FEMALE SIGN +0x2642 ♂ ISOpub # MALE SIGN +0x2660 ♠ ISOpub # BLACK SPADE SUIT +0x2663 ♣ ISOpub # BLACK CLUB SUIT +0x2665 ♥ ISOpub # BLACK HEART SUIT +0x2666 ♦ ISOpub # BLACK DIAMOND SUIT +0x266A ♪ ISOnum # EIGHTH NOTE +0x266D ♭ ISOpub # MUSIC FLAT SIGN +0x266E ♮ ISOpub # MUSIC NATURAL SIGN +0x266F ♯ ISOpub # MUSIC SHARP SIGN +0x2713 ✓ ISOpub # CHECK MARK +0x2717 ✗ ISOpub # BALLOT X +0x2720 ✠ ISOpub # MALTESE CROSS +0x2726 ⧫ ISOpub # BLACK FOUR POINTED STAR +<!-- 0x2727 ◊ ISOpub # WHITE FOUR POINTED STAR --> +0x2736 ✶ ISOpub # SIX POINTED BLACK STAR +0x???? ϵ ISOgrk3 # variant epsilon +0x???? fj ISOpub # fj ligature +0x???? ⪌ ISOamsr # greater-than, double equals, less-than +0x???? ⪆ ISOamsr # greater-than, approximately equal to +0x???? ⪊ ISOamsn # greater-than, not approximately equal to +0x???? &jnodot; ISOamso # latin small letter dotless j +0x???? ⪋ ISOamsr # less-than, double equals, greater-than +0x???? ⪅ ISOamsr # less-than, approximately equal to +0x???? ⪉ ISOamsn # less-than, not approximately equal to +0x???? &lpargt; ISOamsc # left parenthesis, greater-than +0x???? ≧̸ ISOamsn # not greater-than, double equals +0x???? ≦̸ ISOamsn # not less-than, double equals +0x???? ∤ ISOamsn # nshortmid +0x???? ⪷ ISOamsr # precedes, approximately equal to +0x???? ⪵ ISOamsn # precedes, not double equal +0x???? ⪹ ISOamsn # precedes, not approximately equal to +0x???? ⦔ ISOamsc # right parenthesis, greater-than +0x???? ⪸ ISOamsr # succeeds, approximately equal to +0x???? ⪶ ISOamsn # succeeds, not double equals +0x???? ⪺ ISOamsn # succeeds, not approximately equal to +0x???? ∣ ISOamsr # shortmid +0xFB00 ff ISOpub # LATIN SMALL LIGATURE FF +0xFB01 fi ISOpub # LATIN SMALL LIGATURE FI +0xFB02 fl ISOpub # LATIN SMALL LIGATURE FL +0xFB03 ffi ISOpub # LATIN SMALL LIGATURE FFI +0xFB04 ffl ISOpub # LATIN SMALL LIGATURE FFL + +</PRE> +</BODY> +</HTML> diff --git a/test/spaces.html b/test/spaces.html new file mode 100644 index 0000000..d527a19 --- /dev/null +++ b/test/spaces.html @@ -0,0 +1,37 @@ +<!DOCTYPE html PUBLIC "-//IETF//DTD HTML 3.0//EN"> +<HTML> +<HEAD> +<TITLE> Test of some symbols </TITLE> +</HEAD> +<BODY> +<!-- Multiple spaces are normally collapsed unless we are in a <PRE> mode +or use "special" spaces like or   - try playing around this page +by adding more spaces inside brackets or using <PRE>. +--> +<!-- PRE --> + +You may press '\' to view the source of this test<br> +<em>UNICODE NCR alt-NCR named alt-named</em><br> +<p> +0x2000 [ ] <IMG SRC=X ALT="[ ]"> # EN QUAD<br> +0x2001 [ ] <IMG SRC=X ALT="[ ]"> # EM QUAD<br> +0x2002 [ ] <IMG SRC=X ALT="[ ]"> [ ] <IMG SRC=X ALT="[ ]"> # EN SPACE<br> +0x2003 [ ] <IMG SRC=X ALT="[ ]"> [ ] <IMG SRC=X ALT="[ ]"> # EM SPACE<br> +0x2004 [ ] <IMG SRC=X ALT="[ ]"> [ ] <IMG SRC=X ALT="[ ]"> # THREE-PER-EM SPACE<br> +0x2005 [ ] <IMG SRC=X ALT="[ ]"> [ ] <IMG SRC=X ALT="[ ]"> # FOUR-PER-EM SPACE<br> +0x2007 [ ] <IMG SRC=X ALT="[ ]"> [ ] <IMG SRC=X ALT="[ ]"> # FIGURE SPACE<br> +0x2008 [ ] <IMG SRC=X ALT="[ ]"> [ ] <IMG SRC=X ALT="[ ]"> # PUNCTUATION SPACE<br> +0x2009 [ ] <IMG SRC=X ALT="[ ]"> [ ] <IMG SRC=X ALT="[ ]"> # THIN SPACE<br> +0x200A [ ] <IMG SRC=X ALT="[ ]"> [ ] <IMG SRC=X ALT="[ ]"> # HAIR SPACE<br> +0x200C [‌] <IMG SRC=X ALT="[‌]"> [‌] <IMG SRC=X ALT="[‌]"> # ZERO WIDTH NON-JOINER<br> +0x200D [‍] <IMG SRC=X ALT="[‍]"> [‍] <IMG SRC=X ALT="[‍]"> # ZERO WIDTH JOINER<br> +0x200E [‎] <IMG SRC=X ALT="[‎]"> [‎] <IMG SRC=X ALT="[‎]"> # LEFT-TO-RIGHT MARK<br> +0x200F [‏] <IMG SRC=X ALT="[‏]"> [‏] <IMG SRC=X ALT="[‏]"> # RIGHT-TO-LEFT MARK<br> +0x2010 [‐] <IMG SRC=X ALT="[‐]"> [‐] <IMG SRC=X ALT="[‐]"> # HYPHEN<br> +0x2013 [–] <IMG SRC=X ALT="[–]"> [–] <IMG SRC=X ALT="[–]"> # EN DASH<br> +0x2014 [—] <IMG SRC=X ALT="[—]"> [—] <IMG SRC=X ALT="[—]"> # EM DASH<br> + + +</PRE> +</BODY> +</HTML> diff --git a/test/special_urls.html b/test/special_urls.html new file mode 100644 index 0000000..c9d3506 --- /dev/null +++ b/test/special_urls.html @@ -0,0 +1,22 @@ +<html> +<head> +<title>Lynx Special URLs</title> +<link rev="made" href="mailto:WebMaster@foo.blah.dom"> +</head> +<body> +<h1>Lynx Special URLs</h1> +<dl compact> +<dd>LYNXCFG:<a href="LYNXCFG:">LYNXCFG (ok)</a> +<dd>LYNXCOMPILEOPTS:<a href="LYNXCOMPILEOPTS:">LYNXCOMPILEOPTS (ok)</a> +<dd>LYNXCOOKIE:<a href="LYNXCOOKIE:">LYNXCOOKIE is not allowed</a> +<dd>LYNXDIRED:<a href="LYNXDIRED:">LYNXDIRED is not allowed</a> +<dd>LYNXDOWNLOAD:<a href="LYNXDOWNLOAD:">LYNXDOWNLOAD is not allowed</a> +<dd>LYNXHIST:<a href="LYNXHIST:">LYNXHIST is not allowed</a> +<dd>LYNXIMGMAP:<a href="LYNXIMGMAP:">LYNXIMGMAP is not allowed</a> +<dd>LYNXKEYMAP:<a href="LYNXKEYMAP:">LYNXKEYMAP (ok)</a> +<dd>LYNXMESSAGES:<a href="LYNXMESSAGES:">LYNXMESSAGES (ok)</a> +<dd>LYNXOPTIONS:<a href="LYNXOPTIONS:">LYNXOPTIONS (ok)</a> +<dd>LYNXPRINT:<a href="LYNXPRINT:">LYNXPRINT is not allowed</a> +</dl> +</body> +</html> diff --git a/test/square.html b/test/square.html new file mode 100644 index 0000000..c840328 --- /dev/null +++ b/test/square.html @@ -0,0 +1,14 @@ +<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN"> + +<html> +<head> + <meta name="generator" content= + "HTML Tidy for Linux (vers 25 March 2009), see www.w3.org"> + + <title>Test ImageMap - square</title> +</head> + +<body> + <p>SQUARE</p> +</body> +</html> diff --git a/test/tabtest.html b/test/tabtest.html new file mode 100644 index 0000000..45184a8 --- /dev/null +++ b/test/tabtest.html @@ -0,0 +1,39 @@ +<!DOCTYPE HTML PUBLIC "-//W3O//DTD W3 HTML 3.0//EN"> +<html> +<head> +<title>Tests of TAB element.</title> +<link rev="made" href="mailto:lynx-dev@nongnu.org"> +</head> + +<body> +<h1>Tests of TAB element.</h1> + +<TAB INDENT="16" ID="t0"><em>Normal Style:</em><br> +One<TAB INDENT="26" ID="t1">Two<TAB INDENT="44" ID="t2">Three +<TAB INDENT="62" ID="t3">Four<TAB INDENT="80" ID="t4">Five +<TAB INDENT="98" ID="t5">Six<TAB INDENT="116" ID="t6">Seven +<TAB INDENT="132" ID="t7">Eight<br> +1.<TAB TO="t1">2.<TAB TO="t2">3.<TAB TO="t3">4.<TAB TO="t4">5. +<TAB TO="t5">6.<TAB TO="t6">7.<TAB TO="t7">8.<br> +i.<TAB TO="t1">ii.<TAB TO="t2">iii.<TAB TO="t3">iv.<TAB TO="t4">v. +<TAB TO="t5">vi.<TAB TO="t6">vii.<TAB TO="t7">viii. + +<p><pre><TAB TO="t0"><em>In PRE block:</em> +One<TAB TO="t1">Two<TAB TO="t3">Three<TAB TO="t5">Four<TAB TO="t7">Five +1.<TAB TO="t1">2.<TAB TO="t3">3.<TAB TO="t5">4.<TAB TO="t7">5. +i.<TAB TO="t1">ii.<TAB TO="t3">iii.<TAB TO="t5">iv.<TAB TO="t7">v. +</pre> + +<bq> +<TAB TO="t0"><em>In BQ block:</em><br> +One<TAB TO="t2">Two<TAB TO="t4">Three<TAB TO="t6">Four<br> +1.<TAB TO="t2">2.<TAB TO="t4">3.<TAB TO="t6">4.<br> +i.<TAB TO="t2">ii.<TAB TO="t4">iii.<TAB TO="t6">iv. +</bq> + +<p><b>noct<TAB ID="tn">ambulant</b> - walking at night<br> +<TAB TO="tn">(from Latin: <i>nox noctis</i> night + <i>ambulare</i> walk) +<pre>|<TAB INDENT="78">|<TAB INDENT="156">| +0<TAB INDENT="76">80<TAB INDENT="152">158</pre> +</body> +</html> diff --git a/test/tags.html b/test/tags.html new file mode 100644 index 0000000..3f405ec --- /dev/null +++ b/test/tags.html @@ -0,0 +1,219 @@ +<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"> +<HTML> +<HEAD> +<TITLE>Tags to Test Color-Style</TITLE> +<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=iso-8859-1"> +<link href="nobody" rev="made"> +</HEAD> + +<BODY alink="green" bgcolor="yellow"> +<!-- ====================================================================== --> +<br> +<h1>Content of an H1 Tag</h1> +Text after an H1 Tag. +<p>Paragraph after an H1 Tag. +<br> +<h2>Content of an H2 Tag</h2> +Text after an H2 Tag. +<p>Paragraph after an H2 Tag. +<br> +<h3>Content of an H3 Tag</h3> +Text after an H3 Tag. +<p>Paragraph after an H3 Tag. +<br> +<h4>Content of an H4 Tag</h4> +Text after an H4 Tag. +<p>Paragraph after an H4 Tag. +<br> +<h5>Content of an H5 Tag</h5> +Text after an H5 Tag. +<p>Paragraph after an H5 Tag. +<br> +<h6>Content of an H6 Tag</h6> +Text after an H6 Tag. +<p>Paragraph after an H6 Tag. +<!-- ====================================================================== --> +This is an <a href="#imagemap">"a"</a> tag. +<br> +This is an <address>"address"</address> tag. +<br> +This is a <b>"b"</b> tag. +<br> +This is a <big>"big"</big> tag. +<br> +Before quote, <blockquote>this is a "blockquote"</blockquote>, after quote. +<br> +This is a <center>"center"</center> tag. +<br> +This is a <cite>"cite"</cite> tag. +<br> +This is a <code>"code"</code> tag. +<br> +This is a <div>div</div> tag. +<br> +This is an <em>"em"</em> tag. +<br> +This is a <font>"font"</font> tag. +<!-- ====================================================================== --> +<br> +This is an <hr>"hr"<hr> tag. +<br> +This is an <i>"i"</i> tag. +<br> +This is an <iframe>"iframe"</iframe> tag. +<br> +This is an <img alt="img" src="image.jpg"> tag. +<br> +This is an <label>"label"</label> tag. +<br> +map: normal: lightgray: blue +<br> +<pre> +This is +pre-formatted +text (three lines, with pre's on preceding/following lines). +</pre> +<br> +This is a <q>"q"</q>tag. +<br> +This is a <samp>"samp"</samp> tag. +<br> +This is a <small>"small"</small> tag. +<br> +This is a <strong>"strong"</strong> tag. +<br> +This is a <sub>"sub"</sub> tag. +<br> +This is a <sup>"sup"</sup> tag. +<br> +This is a <tt>"tt"</tt> tag. +<br> +This is a <var>"var"</var> tag. +<!-- ====================================================================== --> +<h1>Forms</h1> +<hr> +<form action="http://localhost/cgi-bin/bogus-parms" method="get"> +First: <input type="text" name="First" size=20> +Last: <input type="text" name="Last" size=20> +Description: <textarea rows=3 cols=40> +contents of textarea +</textarea> +<hr> +<input type="submit" value="Submit this form"> +<br> +<input type="reset" value="Reset this form"> +</form> + +<h1 align="left">Another form</h1> +<hr> +<form action="http://localhost/cgi-bin/bogus-parms" method="get"> +<hr> +<input type="checkbox" value="first">first +<br><input type="checkbox" value="second">second +<br><input type="checkbox" value="third">third +<br><input type="checkbox" value="">empty +<hr> +<input type="submit" value="done">done +</form> + +<h1 align="right">Another form</h1> +<hr> +<form action="http://localhost/cgi-bin/bogus-parms" method="get"> +<select> +<option>first option</option> +<option>second option</option> +<option>third option</option> +</select> +<hr> +<input type="submit" value="Submit this form"> +<br> +<input type="reset" value="Reset this form"> +</form> +<!-- ====================================================================== --> +<table border=2 summary="unquoted table"> +<caption>Unquoted Table</caption> +<tr> +<td>First:</td> +<td>the first row</td> +<td>short</td> +<td>last</td></tr> +<tr> +<td>Second:</td> +<td>the second row</td> +<td>very long string</td> +<td>lower-right</td></tr> +</table> +<!-- ====================================================================== --> +<blockquote><table border=2 summary="quoted table"> +<caption>Quoted Table</caption> +<tr> +<td>First:</td> +<td>the first row</td> +<td>very long string</td> +<td>last</td></tr> +<tr> +<td>Second:</td> +<td>the second row</td> +<td>short</td> +<td>lower-right</td></tr> +</table></blockquote> +<!-- ====================================================================== --> +<br> +<h1>An image map</h1> +<map name="IMAGEMAP"> +<area alt="Square" shape="rect" coords="18,18,82,80" href="square.html"> +<area alt="Circle" shape="circle" coords="127,48,31" href="circle.html"> +<area alt="Triangle" shape="poly" coords="232,78,303,78,263,14,232,76" + href="triangle.html"> + </map> +<!-- ====================================================================== --> +<br> +<h1>Definition List</h1> +This is an definition list: +<dl> +<dt>the first dt +<dd>the first dd +<dt>the second dt +<dd>the second dd +<dl> +<dt>the first dt +<dd>the first dd +<dt>the second dt +<dd>the second dd +<dt>the third dt +<dd>the third dd +</dl> +<dt>the third dt +<dd>the third dd +</dl> +<!-- ====================================================================== --> +<br> +<h1>Unordered List</h1> +This is an unordered list: +<ul> +<li>first item +<li>second item +<ul> +<li>first item +<li>second item +<li>third item +</ul> +<li>third item +</ul> +<!-- ====================================================================== --> +<br> +<h1>Ordered List</h1> +This is an ordered list: +<ol> +<li>first item +<li>second item +<ol> +<li>first item +<li>second item +<li>third item +</ol> +<li>third item +</ol> + +</BODY> +</HTML> diff --git a/test/test-styles.html b/test/test-styles.html new file mode 100644 index 0000000..728c288 --- /dev/null +++ b/test/test-styles.html @@ -0,0 +1,106 @@ +<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"> +<HTML> +<HEAD> +<TITLE>Test Color-Styles</TITLE> +</HEAD> +<BODY> +<h1>Heading 1</h1> +<h2>Heading 2</h2> +<h3>Heading 3</h3> +<h4>Heading 4</h4> +<h5>Heading 5</h5> +<h6>Heading 6</h6> + +<h1>Heading 1 - Ordinary Text</h1> +<h2><a name="fontlike_text">Heading 2 - <b>Fontlike</b> Text</a></h2> +<!-- STYLE,BR,TAB --> +This is <b>b (bold)</b>. +<br> +This is <big>big</big>. +<br> +This is <blink>blink</blink>. +<br> +This is <i>i (italicized)</i>. +<br> +This is <small>small</small>. +<br> +This is <strike>strike</strike>. +<br> +This is <tt>tt (typewriter)</tt>. +<br> +This is <u>u (underlined)</u>. + +<h2><a name="emphasized_text">Heading 2 - <em>Emphasized</em> Text</a></h2> +This is <cite>cite (citation)</cite>. +<br> +This is <code>code</code>. +<br> +This is <del>del</del>. +<br> +This is <dfn>dfn (definition)</dfn>. +<br> +This is <em>emphasized</em>. +<br> +This is <ins>ins</ins>. +<br> +This is <kbd>kbd (keyboard)</kbd>. +<br> +This is <q>q (quoted)</q>. +<br> +This is <samp>samp (sample)</samp>. +<br> +This is <span>span</span>. +<br> +This is <strong>strong</strong>. +<br> +This is <var>var</var>. + +<h1>Heading 1 - Ordinary Links</h1> +<a href="#fontlike_text">This is a link</a> to fontlike text. +<br> +<a href="#emphasized_text">This is a link</a> to emphasized text. + +<h1>Heading 1 - Emphasized Links</h1> +<h2><a name="fontlike_links">Heading 2 - <b>Fontlike</b> Links</a></h2> +<br> +This is <a href="#fontlike_text"><b>b (bold)</b> link</a>. +<br> +This is <a href="#fontlike_text"><big>big</big> link</a>. +<br> +This is <a href="#fontlike_text"><blink>blink</blink> link</a>. +<br> +This is <a href="#fontlike_text"><i>i (italicized)</i> link</a>. +<br> +This is <a href="#fontlike_text"><small>small</small> link</a>. +<br> +This is <a href="#fontlike_text"><strike>strike</strike> link</a>. +<br> +This is <a href="#fontlike_text"><tt>tt (typewriter)</tt> link</a>. +<br> +This is <a href="#fontlike_text"><u>u (underlined)</u> link</a>. + +<h2><a name="emphasized_links">Heading 2 - <b>Emphasized</b> Links</a></h2> +This is <a href="#emphasized_text"><cite>cite (citation)</cite> link</a>. +<br> +This is <a href="#emphasized_text"><code>code</code> link</a>. +<br> +This is <a href="#emphasized_text"><del>del</del> link</a>. +<br> +This is <a href="#emphasized_text"><dfn>dfn (definition)</dfn> link</a>. +<br> +This is <a href="#emphasized_text"><em>emphasized</em> link</a>. +<br> +This is <a href="#emphasized_text"><ins>ins</ins> link</a>. +<br> +This is <a href="#emphasized_text"><kbd>kbd (keyboard)</kbd> link</a>. +<br> +This is <a href="#emphasized_text"><q>q (quoted)</q> link</a>. +<br> +This is <a href="#emphasized_text"><samp>samp (sample)</samp> link</a>. +<br> +This is <a href="#emphasized_text"><span>span</span> link</a>. +<br> +This is <a href="#emphasized_text"><strong>strong</strong> link</a>. +<br> +This is <a href="#emphasized_text"><var>var</var> link</a>. +</BODY> diff --git a/test/triangle.html b/test/triangle.html new file mode 100644 index 0000000..abea6c7 --- /dev/null +++ b/test/triangle.html @@ -0,0 +1,14 @@ +<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN"> + +<html> +<head> + <meta name="generator" content= + "HTML Tidy for Linux (vers 25 March 2009), see www.w3.org"> + + <title>Test ImageMap - triangle</title> +</head> + +<body> + <p>TRIANGLE</p> +</body> +</html> diff --git a/test/unicode.html b/test/unicode.html new file mode 100644 index 0000000..7abcd1a --- /dev/null +++ b/test/unicode.html @@ -0,0 +1,915 @@ +<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"> +<HTML> +<HEAD> +<TITLE>Test of some Unicode symbols in numeric character reference form</TITLE> +</HEAD> +<BODY> +<PRE> + + This table prepared from SGML.TXT available at ftp.unicode.org + + ftp://ftp.unicode.org/MAPPINGS/VENDORS/MISC/SGML.TXT + (if doing ftp, try cd Public/MAPPINGS/VENDORS/MISC) + + +original comment: + +# Author: John Cowan <cowan@ccil.org> +# Date: 25 July 1997 +# +# The following table maps SGML character entities from various +# public sets (namely, ISOamsa, ISOamsb, ISOamsc, ISOamsn, ISOamso, +# ISOamsr, ISObox, ISOcyr1, ISOcyr2, ISOdia, ISOgrk1, ISOgrk2, +# ISOgrk3, ISOgrk4, ISOlat1, ISOlat2, ISOnum, ISOpub, ISOtech, +# HTMLspecial, HTMLsymbol) to corresponding Unicode characters. +# +# The table has four tab-separated columns: +# Column 1: SGML character entity name +# Column 2: SGML public entity set +# Column 3: Unicode 2.0 character code +# Column 4: Unicode 2.0 character name (UPPER CASE) +# Entries which don't have Unicode equivalents have "0x????" +# in Column 3 and a lower case description (from the public entity +# set DTD) in Column 4. The mapping is not reversible, because many +# distinctions are unified away in Unicode, particularly between +# mathematical symbols. +# +# The table is sorted case-blind by SGML character entity name. +# +# The contents of this table are drawn from various sources, and +# are in the public domain. +# +<!-- Changes: ++ {"euro", 0x20AC}, /* EURO SIGN */ + +--> + +This test is illuminated Unicode numeric entities like &#x22AB; +We sort the entities according to unicode numbers. +You should see visible characters if your display character set support them +or some substitution string picked up from src/chrtrans/def7_uni.tbl + +If you see something like &#x34D2; - this number unknown to def7_uni.tbl +or the internal browser's implementation is broken. + Leonid Pauzner. + + + + +0x0021 ! # EXCLAMATION MARK +0x0022 " # QUOTATION MARK +0x0023 # # NUMBER SIGN +0x0024 $ # DOLLAR SIGN +0x0025 % # PERCENT SIGN +0x0026 & # AMPERSAND +0x0028 ( # LEFT PARENTHESIS +0x0029 ) # RIGHT PARENTHESIS +0x002A * # ASTERISK +0x002B + # PLUS SIGN +0x002C , # COMMA +0x002D - # HYPHEN-MINUS +0x002E . # FULL STOP +0x002F / # SOLIDUS +0x003A : # COLON +0x003B ; # SEMICOLON +0x003C < # LESS-THAN SIGN +0x003D = # EQUALS SIGN +0x003E > # GREATER-THAN SIGN +0x003F ? # QUESTION MARK +0x0040 @ # COMMERCIAL AT +0x005B [ # LEFT SQUARE BRACKET +0x005C \ # REVERSE SOLIDUS +0x005C \ # REVERSE SOLIDUS +0x005D ] # RIGHT SQUARE BRACKET +0x005F _ # LOW LINE +0x0060 ` # GRAVE ACCENT +0x007B { # LEFT CURLY BRACKET +0x007C | # VERTICAL LINE +0x007D } # RIGHT CURLY BRACKET +0x00A0   # NO-BREAK SPACE +0x00A1 ¡ # INVERTED EXCLAMATION MARK +0x00A2 ¢ # CENT SIGN +0x00A3 £ # POUND SIGN +0x00A4 ¤ # CURRENCY SIGN +0x00A5 ¥ # YEN SIGN +0x00A6 ¦ # BROKEN BAR +0x00A7 § # SECTION SIGN +0x00A8 ¨ # DIAERESIS +0x00A9 © # COPYRIGHT SIGN +0x00AA ª # FEMININE ORDINAL INDICATOR +0x00AB « # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK +0x00AC ¬ # NOT SIGN +0x00AD ­ # SOFT HYPHEN +0x00AE ® # REGISTERED SIGN +0x00AF ¯ # MACRON +0x00B0 ° # DEGREE SIGN +0x00B1 ± # PLUS-MINUS SIGN +0x00B2 ² # SUPERSCRIPT TWO +0x00B3 ³ # SUPERSCRIPT THREE +0x00B4 ´ # ACUTE ACCENT +0x00B5 µ # MICRO SIGN +0x00B6 ¶ # PILCROW SIGN +0x00B7 · # MIDDLE DOT +0x00B8 ¸ # CEDILLA +0x00B9 ¹ # SUPERSCRIPT ONE +0x00BA º # MASCULINE ORDINAL INDICATOR +0x00BB » # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK +0x00BC ¼ # VULGAR FRACTION ONE QUARTER +0x00BD ½ # VULGAR FRACTION ONE HALF +0x00BE ¾ # VULGAR FRACTION THREE QUARTERS +0x00BF ¿ # INVERTED QUESTION MARK +0x00C0 À # LATIN CAPITAL LETTER A WITH GRAVE +0x00C1 Á # LATIN CAPITAL LETTER A WITH ACUTE +0x00C2  # LATIN CAPITAL LETTER A WITH CIRCUMFLEX +0x00C3 à # LATIN CAPITAL LETTER A WITH TILDE +0x00C4 Ä # LATIN CAPITAL LETTER A WITH DIAERESIS +0x00C5 Å # LATIN CAPITAL LETTER A WITH RING ABOVE +0x00C6 Æ # LATIN CAPITAL LETTER AE +0x00C7 Ç # LATIN CAPITAL LETTER C WITH CEDILLA +0x00C8 È # LATIN CAPITAL LETTER E WITH GRAVE +0x00C9 É # LATIN CAPITAL LETTER E WITH ACUTE +0x00CA Ê # LATIN CAPITAL LETTER E WITH CIRCUMFLEX +0x00CB Ë # LATIN CAPITAL LETTER E WITH DIAERESIS +0x00CC Ì # LATIN CAPITAL LETTER I WITH GRAVE +0x00CD Í # LATIN CAPITAL LETTER I WITH ACUTE +0x00CE Î # LATIN CAPITAL LETTER I WITH CIRCUMFLEX +0x00CF Ï # LATIN CAPITAL LETTER I WITH DIAERESIS +0x00D0 Ð # LATIN CAPITAL LETTER ETH +0x00D1 Ñ # LATIN CAPITAL LETTER N WITH TILDE +0x00D2 Ò # LATIN CAPITAL LETTER O WITH GRAVE +0x00D3 Ó # LATIN CAPITAL LETTER O WITH ACUTE +0x00D4 Ô # LATIN CAPITAL LETTER O WITH CIRCUMFLEX +0x00D5 Õ # LATIN CAPITAL LETTER O WITH TILDE +0x00D6 Ö # LATIN CAPITAL LETTER O WITH DIAERESIS +0x00D7 × # MULTIPLICATION SIGN +0x00D8 Ø # LATIN CAPITAL LETTER O WITH STROKE +0x00D9 Ù # LATIN CAPITAL LETTER U WITH GRAVE +0x00DA Ú # LATIN CAPITAL LETTER U WITH ACUTE +0x00DB Û # LATIN CAPITAL LETTER U WITH CIRCUMFLEX +0x00DC Ü # LATIN CAPITAL LETTER U WITH DIAERESIS +0x00DD Ý # LATIN CAPITAL LETTER Y WITH ACUTE +0x00DE Þ # LATIN CAPITAL LETTER THORN +0x00DF ß # LATIN SMALL LETTER SHARP S +0x00E0 à # LATIN SMALL LETTER A WITH GRAVE +0x00E1 á # LATIN SMALL LETTER A WITH ACUTE +0x00E2 â # LATIN SMALL LETTER A WITH CIRCUMFLEX +0x00E3 ã # LATIN SMALL LETTER A WITH TILDE +0x00E4 ä # LATIN SMALL LETTER A WITH DIAERESIS +0x00E5 å # LATIN SMALL LETTER A WITH RING ABOVE +0x00E6 æ # LATIN SMALL LETTER AE +0x00E7 ç # LATIN SMALL LETTER C WITH CEDILLA +0x00E8 è # LATIN SMALL LETTER E WITH GRAVE +0x00E9 é # LATIN SMALL LETTER E WITH ACUTE +0x00EA ê # LATIN SMALL LETTER E WITH CIRCUMFLEX +0x00EB ë # LATIN SMALL LETTER E WITH DIAERESIS +0x00EC ì # LATIN SMALL LETTER I WITH GRAVE +0x00ED í # LATIN SMALL LETTER I WITH ACUTE +0x00EE î # LATIN SMALL LETTER I WITH CIRCUMFLEX +0x00EF ï # LATIN SMALL LETTER I WITH DIAERESIS +0x00F0 ð # LATIN SMALL LETTER ETH +0x00F1 ñ # LATIN SMALL LETTER N WITH TILDE +0x00F2 ò # LATIN SMALL LETTER O WITH GRAVE +0x00F3 ó # LATIN SMALL LETTER O WITH ACUTE +0x00F4 ô # LATIN SMALL LETTER O WITH CIRCUMFLEX +0x00F5 õ # LATIN SMALL LETTER O WITH TILDE +0x00F6 ö # LATIN SMALL LETTER O WITH DIAERESIS +0x00F7 ÷ # DIVISION SIGN +0x00F8 ø # LATIN SMALL LETTER O WITH STROKE +0x00F9 ù # LATIN SMALL LETTER U WITH GRAVE +0x00FA ú # LATIN SMALL LETTER U WITH ACUTE +0x00FB û # LATIN SMALL LETTER U WITH CIRCUMFLEX +0x00FC ü # LATIN SMALL LETTER U WITH DIAERESIS +0x00FD ý # LATIN SMALL LETTER Y WITH ACUTE +0x00FE þ # LATIN SMALL LETTER THORN +0x00FF ÿ # LATIN SMALL LETTER Y WITH DIAERESIS +0x0100 Ā # LATIN CAPITAL LETTER A WITH MACRON +0x0101 ā # LATIN SMALL LETTER A WITH MACRON +0x0102 Ă # LATIN CAPITAL LETTER A WITH BREVE +0x0103 ă # LATIN SMALL LETTER A WITH BREVE +0x0104 Ą # LATIN CAPITAL LETTER A WITH OGONEK +0x0105 ą # LATIN SMALL LETTER A WITH OGONEK +0x0106 Ć # LATIN CAPITAL LETTER C WITH ACUTE +0x0107 ć # LATIN SMALL LETTER C WITH ACUTE +0x0108 Ĉ # LATIN CAPITAL LETTER C WITH CIRCUMFLEX +0x0109 ĉ # LATIN SMALL LETTER C WITH CIRCUMFLEX +0x010A Ċ # LATIN CAPITAL LETTER C WITH DOT ABOVE +0x010B ċ # LATIN SMALL LETTER C WITH DOT ABOVE +0x010C Č # LATIN CAPITAL LETTER C WITH CARON +0x010D č # LATIN SMALL LETTER C WITH CARON +0x010E Ď # LATIN CAPITAL LETTER D WITH CARON +0x010F ď # LATIN SMALL LETTER D WITH CARON +0x0110 Đ # LATIN CAPITAL LETTER D WITH STROKE +0x0111 đ # LATIN SMALL LETTER D WITH STROKE +0x0112 Ē # LATIN CAPITAL LETTER E WITH MACRON +0x0113 ē # LATIN SMALL LETTER E WITH MACRON +0x0116 Ė # LATIN CAPITAL LETTER E WITH DOT ABOVE +0x0117 ė # LATIN SMALL LETTER E WITH DOT ABOVE +0x0118 Ę # LATIN CAPITAL LETTER E WITH OGONEK +0x0119 ę # LATIN SMALL LETTER E WITH OGONEK +0x011A Ě # LATIN CAPITAL LETTER E WITH CARON +0x011B ě # LATIN SMALL LETTER E WITH CARON +0x011C Ĝ # LATIN CAPITAL LETTER G WITH CIRCUMFLEX +0x011D ĝ # LATIN SMALL LETTER G WITH CIRCUMFLEX +0x011E Ğ # LATIN CAPITAL LETTER G WITH BREVE +0x011F ğ # LATIN SMALL LETTER G WITH BREVE +0x0120 Ġ # LATIN CAPITAL LETTER G WITH DOT ABOVE +0x0121 ġ # LATIN SMALL LETTER G WITH DOT ABOVE +0x0122 Ģ # LATIN CAPITAL LETTER G WITH CEDILLA +0x0123 ģ # LATIN SMALL LETTER G WITH CEDILLA +0x0124 Ĥ # LATIN CAPITAL LETTER H WITH CIRCUMFLEX +0x0125 ĥ # LATIN SMALL LETTER H WITH CIRCUMFLEX +0x0126 Ħ # LATIN CAPITAL LETTER H WITH STROKE +0x0127 ħ # LATIN SMALL LETTER H WITH STROKE +0x0128 Ĩ # LATIN CAPITAL LETTER I WITH TILDE +0x0129 ĩ # LATIN SMALL LETTER I WITH TILDE +0x012A Ī # LATIN CAPITAL LETTER I WITH MACRON +0x012B ī # LATIN SMALL LETTER I WITH MACRON +0x012E Į # LATIN CAPITAL LETTER I WITH OGONEK +0x012F į # LATIN SMALL LETTER I WITH OGONEK +0x0130 İ # LATIN CAPITAL LETTER I WITH DOT ABOVE +0x0131 ı # LATIN SMALL LETTER DOTLESS I +0x0131 ı # LATIN SMALL LETTER DOTLESS I +0x0132 IJ # LATIN CAPITAL LIGATURE IJ +0x0133 ij # LATIN SMALL LIGATURE IJ +0x0134 Ĵ # LATIN CAPITAL LETTER J WITH CIRCUMFLEX +0x0135 ĵ # LATIN SMALL LETTER J WITH CIRCUMFLEX +0x0136 Ķ # LATIN CAPITAL LETTER K WITH CEDILLA +0x0137 ķ # LATIN SMALL LETTER K WITH CEDILLA +0x0138 ĸ # LATIN SMALL LETTER KRA +0x0139 Ĺ # LATIN CAPITAL LETTER L WITH ACUTE +0x013A ĺ # LATIN SMALL LETTER L WITH ACUTE +0x013B Ļ # LATIN CAPITAL LETTER L WITH CEDILLA +0x013C ļ # LATIN SMALL LETTER L WITH CEDILLA +0x013D Ľ # LATIN CAPITAL LETTER L WITH CARON +0x013E ľ # LATIN SMALL LETTER L WITH CARON +0x013F Ŀ # LATIN CAPITAL LETTER L WITH MIDDLE DOT +0x0140 ŀ # LATIN SMALL LETTER L WITH MIDDLE DOT +0x0141 Ł # LATIN CAPITAL LETTER L WITH STROKE +0x0142 ł # LATIN SMALL LETTER L WITH STROKE +0x0143 Ń # LATIN CAPITAL LETTER N WITH ACUTE +0x0144 ń # LATIN SMALL LETTER N WITH ACUTE +0x0145 Ņ # LATIN CAPITAL LETTER N WITH CEDILLA +0x0146 ņ # LATIN SMALL LETTER N WITH CEDILLA +0x0147 Ň # LATIN CAPITAL LETTER N WITH CARON +0x0148 ň # LATIN SMALL LETTER N WITH CARON +0x0149 ʼn # LATIN SMALL LETTER N PRECEDED BY APOSTROPHE +0x014A Ŋ # LATIN CAPITAL LETTER ENG +0x014B ŋ # LATIN SMALL LETTER ENG +0x014C Ō # LATIN CAPITAL LETTER O WITH MACRON +0x014D ō # LATIN SMALL LETTER O WITH MACRON +0x0150 Ő # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE +0x0151 ő # LATIN SMALL LETTER O WITH DOUBLE ACUTE +0x0152 Œ # LATIN CAPITAL LIGATURE OE +0x0153 œ # LATIN SMALL LIGATURE OE +0x0154 Ŕ # LATIN CAPITAL LETTER R WITH ACUTE +0x0155 ŕ # LATIN SMALL LETTER R WITH ACUTE +0x0156 Ŗ # LATIN CAPITAL LETTER R WITH CEDILLA +0x0157 ŗ # LATIN SMALL LETTER R WITH CEDILLA +0x0158 Ř # LATIN CAPITAL LETTER R WITH CARON +0x0159 ř # LATIN SMALL LETTER R WITH CARON +0x015A Ś # LATIN CAPITAL LETTER S WITH ACUTE +0x015B ś # LATIN SMALL LETTER S WITH ACUTE +0x015C Ŝ # LATIN CAPITAL LETTER S WITH CIRCUMFLEX +0x015D ŝ # LATIN SMALL LETTER S WITH CIRCUMFLEX +0x015E Ş # LATIN CAPITAL LETTER S WITH CEDILLA +0x015F ş # LATIN SMALL LETTER S WITH CEDILLA +0x0160 Š # LATIN CAPITAL LETTER S WITH CARON +0x0161 š # LATIN SMALL LETTER S WITH CARON +0x0162 Ţ # LATIN CAPITAL LETTER T WITH CEDILLA +0x0163 ţ # LATIN SMALL LETTER T WITH CEDILLA +0x0164 Ť # LATIN CAPITAL LETTER T WITH CARON +0x0165 ť # LATIN SMALL LETTER T WITH CARON +0x0166 Ŧ # LATIN CAPITAL LETTER T WITH STROKE +0x0167 ŧ # LATIN SMALL LETTER T WITH STROKE +0x0168 Ũ # LATIN CAPITAL LETTER U WITH TILDE +0x0169 ũ # LATIN SMALL LETTER U WITH TILDE +0x016A Ū # LATIN CAPITAL LETTER U WITH MACRON +0x016B ū # LATIN SMALL LETTER U WITH MACRON +0x016C Ŭ # LATIN CAPITAL LETTER U WITH BREVE +0x016D ŭ # LATIN SMALL LETTER U WITH BREVE +0x016E Ů # LATIN CAPITAL LETTER U WITH RING ABOVE +0x016F ů # LATIN SMALL LETTER U WITH RING ABOVE +0x0170 Ű # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE +0x0171 ű # LATIN SMALL LETTER U WITH DOUBLE ACUTE +0x0172 Ų # LATIN CAPITAL LETTER U WITH OGONEK +0x0173 ų # LATIN SMALL LETTER U WITH OGONEK +0x0174 Ŵ # LATIN CAPITAL LETTER W WITH CIRCUMFLEX +0x0175 ŵ # LATIN SMALL LETTER W WITH CIRCUMFLEX +0x0176 Ŷ # LATIN CAPITAL LETTER Y WITH CIRCUMFLEX +0x0177 ŷ # LATIN SMALL LETTER Y WITH CIRCUMFLEX +0x0178 Ÿ # LATIN CAPITAL LETTER Y WITH DIAERESIS +0x0179 Ź # LATIN CAPITAL LETTER Z WITH ACUTE +0x017A ź # LATIN SMALL LETTER Z WITH ACUTE +0x017B Ż # LATIN CAPITAL LETTER Z WITH DOT ABOVE +0x017C ż # LATIN SMALL LETTER Z WITH DOT ABOVE +0x017D Ž # LATIN CAPITAL LETTER Z WITH CARON +0x017E ž # LATIN SMALL LETTER Z WITH CARON +0x0192 ƒ # LATIN SMALL LETTER F WITH HOOK +0x01F5 ǵ # LATIN SMALL LETTER G WITH ACUTE +0x02BC ʼ # MODIFIER LETTER APOSTROPHE +0x02C6 ˆ # MODIFIER LETTER CIRCUMFLEX ACCENT +0x02C7 ˇ # CARON +0x02D8 ˘ # BREVE +0x02D9 ˙ # DOT ABOVE +0x02DA ˚ # RING ABOVE +0x02DB ˛ # OGONEK +0x02DC ˜ # SMALL TILDE +0x02DD ˝ # DOUBLE ACUTE ACCENT +0x0386 Ά # GREEK CAPITAL LETTER ALPHA WITH TONOS +0x0388 Έ # GREEK CAPITAL LETTER EPSILON WITH TONOS +0x0389 Ή # GREEK CAPITAL LETTER ETA WITH TONOS +0x038A Ί # GREEK CAPITAL LETTER IOTA WITH TONOS +0x038C Ό # GREEK CAPITAL LETTER OMICRON WITH TONOS +0x038E Ύ # GREEK CAPITAL LETTER UPSILON WITH TONOS +0x038F Ώ # GREEK CAPITAL LETTER OMEGA WITH TONOS +0x0390 ΐ # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS +0x0391 Α # GREEK CAPITAL LETTER ALPHA +0x0392 Β # GREEK CAPITAL LETTER BETA +0x0393 Γ # GREEK CAPITAL LETTER GAMMA +0x0394 Δ # GREEK CAPITAL LETTER DELTA +0x0395 Ε # GREEK CAPITAL LETTER EPSILON +0x0396 Ζ # GREEK CAPITAL LETTER ZETA +0x0397 Η # GREEK CAPITAL LETTER ETA +0x0398 Θ # GREEK CAPITAL LETTER THETA +0x0399 Ι # GREEK CAPITAL LETTER IOTA +0x039A Κ # GREEK CAPITAL LETTER KAPPA +0x039B Λ # GREEK CAPITAL LETTER LAMDA +0x039C Μ # GREEK CAPITAL LETTER MU +0x039D Ν # GREEK CAPITAL LETTER NU +0x039E Ξ # GREEK CAPITAL LETTER XI +0x039F Ο # GREEK CAPITAL LETTER OMICRON +0x03A0 Π # GREEK CAPITAL LETTER PI +0x03A1 Ρ # GREEK CAPITAL LETTER RHO +0x03A3 Σ # GREEK CAPITAL LETTER SIGMA +0x03A4 Τ # GREEK CAPITAL LETTER TAU +0x03A5 Υ # GREEK CAPITAL LETTER UPSILON +0x03A6 Φ # GREEK CAPITAL LETTER PHI +0x03A7 Χ # GREEK CAPITAL LETTER CHI +0x03A8 Ψ # GREEK CAPITAL LETTER PSI +0x03A9 Ω # GREEK CAPITAL LETTER OMEGA +0x03AA Ϊ # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA +0x03AB Ϋ # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA +0x03AC ά # GREEK SMALL LETTER ALPHA WITH TONOS +0x03AD έ # GREEK SMALL LETTER EPSILON WITH TONOS +0x03AE ή # GREEK SMALL LETTER ETA WITH TONOS +0x03AF ί # GREEK SMALL LETTER IOTA WITH TONOS +0x03B0 ΰ # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS +0x03B1 α # GREEK SMALL LETTER ALPHA +0x03B2 β # GREEK SMALL LETTER BETA +0x03B3 γ # GREEK SMALL LETTER GAMMA +0x03B4 δ # GREEK SMALL LETTER DELTA +0x03B5 ε # GREEK SMALL LETTER EPSILON +0x03B6 ζ # GREEK SMALL LETTER ZETA +0x03B7 η # GREEK SMALL LETTER ETA +0x03B8 θ # GREEK SMALL LETTER THETA +0x03B9 ι # GREEK SMALL LETTER IOTA +0x03BA κ # GREEK SMALL LETTER KAPPA +0x03BB λ # GREEK SMALL LETTER LAMDA +0x03BC μ # GREEK SMALL LETTER MU +0x03BD ν # GREEK SMALL LETTER NU +0x03BE ξ # GREEK SMALL LETTER XI +0x03BF ο # GREEK SMALL LETTER OMICRON +0x03C0 π # GREEK SMALL LETTER PI +0x03C1 ρ # GREEK SMALL LETTER RHO +0x03C2 ς # GREEK SMALL LETTER FINAL SIGMA +0x03C3 σ # GREEK SMALL LETTER SIGMA +0x03C4 τ # GREEK SMALL LETTER TAU +0x03C5 υ # GREEK SMALL LETTER UPSILON +0x03C6 φ # GREEK SMALL LETTER PHI +0x03C7 χ # GREEK SMALL LETTER CHI +0x03C8 ψ # GREEK SMALL LETTER PSI +0x03C9 ω # GREEK SMALL LETTER OMEGA +0x03CA ϊ # GREEK SMALL LETTER IOTA WITH DIALYTIKA +0x03CB ϋ # GREEK SMALL LETTER UPSILON WITH DIALYTIKA +0x03CC ό # GREEK SMALL LETTER OMICRON WITH TONOS +0x03CE ώ # GREEK SMALL LETTER OMEGA WITH TONOS +0x03D1 ϑ # GREEK THETA SYMBOL +0x03D2 ϒ # GREEK UPSILON WITH HOOK SYMBOL +0x03D5 ϕ # GREEK PHI SYMBOL +0x03D6 ϖ # GREEK PI SYMBOL +0x03DC Ϝ # GREEK LETTER DIGAMMA +0x03F0 ϰ # GREEK KAPPA SYMBOL +0x03F1 ϱ # GREEK RHO SYMBOL +0x0401 Ё # CYRILLIC CAPITAL LETTER IO +0x0402 Ђ # CYRILLIC CAPITAL LETTER DJE +0x0403 Ѓ # CYRILLIC CAPITAL LETTER GJE +0x0404 Є # CYRILLIC CAPITAL LETTER UKRAINIAN IE +0x0405 Ѕ # CYRILLIC CAPITAL LETTER DZE +0x0406 І # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I +0x0407 Ї # CYRILLIC CAPITAL LETTER YI +0x0408 Ј # CYRILLIC CAPITAL LETTER JE +0x0409 Љ # CYRILLIC CAPITAL LETTER LJE +0x040A Њ # CYRILLIC CAPITAL LETTER NJE +0x040B Ћ # CYRILLIC CAPITAL LETTER TSHE +0x040C Ќ # CYRILLIC CAPITAL LETTER KJE +0x040E Ў # CYRILLIC CAPITAL LETTER SHORT U +0x040F Џ # CYRILLIC CAPITAL LETTER DZHE +0x0410 А # CYRILLIC CAPITAL LETTER A +0x0411 Б # CYRILLIC CAPITAL LETTER BE +0x0412 В # CYRILLIC CAPITAL LETTER VE +0x0413 Г # CYRILLIC CAPITAL LETTER GHE +0x0414 Д # CYRILLIC CAPITAL LETTER DE +0x0415 Е # CYRILLIC CAPITAL LETTER IE +0x0416 Ж # CYRILLIC CAPITAL LETTER ZHE +0x0417 З # CYRILLIC CAPITAL LETTER ZE +0x0418 И # CYRILLIC CAPITAL LETTER I +0x0419 Й # CYRILLIC CAPITAL LETTER SHORT I +0x041A К # CYRILLIC CAPITAL LETTER KA +0x041B Л # CYRILLIC CAPITAL LETTER EL +0x041C М # CYRILLIC CAPITAL LETTER EM +0x041D Н # CYRILLIC CAPITAL LETTER EN +0x041E О # CYRILLIC CAPITAL LETTER O +0x041F П # CYRILLIC CAPITAL LETTER PE +0x0420 Р # CYRILLIC CAPITAL LETTER ER +0x0421 С # CYRILLIC CAPITAL LETTER ES +0x0422 Т # CYRILLIC CAPITAL LETTER TE +0x0423 У # CYRILLIC CAPITAL LETTER U +0x0424 Ф # CYRILLIC CAPITAL LETTER EF +0x0425 Х # CYRILLIC CAPITAL LETTER HA +0x0426 Ц # CYRILLIC CAPITAL LETTER TSE +0x0427 Ч # CYRILLIC CAPITAL LETTER CHE +0x0428 Ш # CYRILLIC CAPITAL LETTER SHA +0x0429 Щ # CYRILLIC CAPITAL LETTER SHCHA +0x042A Ъ # CYRILLIC CAPITAL LETTER HARD SIGN +0x042B Ы # CYRILLIC CAPITAL LETTER YERU +0x042C Ь # CYRILLIC CAPITAL LETTER SOFT SIGN +0x042D Э # CYRILLIC CAPITAL LETTER E +0x042E Ю # CYRILLIC CAPITAL LETTER YU +0x042F Я # CYRILLIC CAPITAL LETTER YA +0x0430 а # CYRILLIC SMALL LETTER A +0x0431 б # CYRILLIC SMALL LETTER BE +0x0432 в # CYRILLIC SMALL LETTER VE +0x0433 г # CYRILLIC SMALL LETTER GHE +0x0434 д # CYRILLIC SMALL LETTER DE +0x0435 е # CYRILLIC SMALL LETTER IE +0x0436 ж # CYRILLIC SMALL LETTER ZHE +0x0437 з # CYRILLIC SMALL LETTER ZE +0x0438 и # CYRILLIC SMALL LETTER I +0x0439 й # CYRILLIC SMALL LETTER SHORT I +0x043A к # CYRILLIC SMALL LETTER KA +0x043B л # CYRILLIC SMALL LETTER EL +0x043C м # CYRILLIC SMALL LETTER EM +0x043D н # CYRILLIC SMALL LETTER EN +0x043E о # CYRILLIC SMALL LETTER O +0x043F п # CYRILLIC SMALL LETTER PE +0x0440 р # CYRILLIC SMALL LETTER ER +0x0441 с # CYRILLIC SMALL LETTER ES +0x0442 т # CYRILLIC SMALL LETTER TE +0x0443 у # CYRILLIC SMALL LETTER U +0x0444 ф # CYRILLIC SMALL LETTER EF +0x0445 х # CYRILLIC SMALL LETTER HA +0x0446 ц # CYRILLIC SMALL LETTER TSE +0x0447 ч # CYRILLIC SMALL LETTER CHE +0x0448 ш # CYRILLIC SMALL LETTER SHA +0x0449 щ # CYRILLIC SMALL LETTER SHCHA +0x044A ъ # CYRILLIC SMALL LETTER HARD SIGN +0x044B ы # CYRILLIC SMALL LETTER YERU +0x044C ь # CYRILLIC SMALL LETTER SOFT SIGN +0x044D э # CYRILLIC SMALL LETTER E +0x044E ю # CYRILLIC SMALL LETTER YU +0x044F я # CYRILLIC SMALL LETTER YA +0x0451 ё # CYRILLIC SMALL LETTER IO +0x0452 ђ # CYRILLIC SMALL LETTER DJE +0x0453 ѓ # CYRILLIC SMALL LETTER GJE +0x0454 є # CYRILLIC SMALL LETTER UKRAINIAN IE +0x0455 ѕ # CYRILLIC SMALL LETTER DZE +0x0456 і # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I +0x0457 ї # CYRILLIC SMALL LETTER YI +0x0458 ј # CYRILLIC SMALL LETTER JE +0x0459 љ # CYRILLIC SMALL LETTER LJE +0x045A њ # CYRILLIC SMALL LETTER NJE +0x045B ћ # CYRILLIC SMALL LETTER TSHE +0x045C ќ # CYRILLIC SMALL LETTER KJE +0x045E ў # CYRILLIC SMALL LETTER SHORT U +0x045F џ # CYRILLIC SMALL LETTER DZHE +0x2002   # EN SPACE +0x2003   # EM SPACE +0x2004   # THREE-PER-EM SPACE +0x2005   # FOUR-PER-EM SPACE +0x2007   # FIGURE SPACE +0x2008   # PUNCTUATION SPACE +0x2009   # THIN SPACE +0x200A   # HAIR SPACE +0x200C ‌ # ZERO WIDTH NON-JOINER +0x200D ‍ # ZERO WIDTH JOINER +0x200E ‎ # LEFT-TO-RIGHT MARK +0x200F ‏ # RIGHT-TO-LEFT MARK +0x2010 ‐ # HYPHEN +0x2013 – # EN DASH +0x2014 — # EM DASH +0x2015 ― # HORIZONTAL BAR +0x2016 ‖ # DOUBLE VERTICAL LINE +0x2018 ‘ # LEFT SINGLE QUOTATION MARK +0x2018 ‘ # LEFT SINGLE QUOTATION MARK +0x2019 ’ # RIGHT SINGLE QUOTATION MARK +0x201A ‚ # SINGLE LOW-9 QUOTATION MARK +0x201A ‚ # SINGLE LOW-9 QUOTATION MARK +0x201C “ # LEFT DOUBLE QUOTATION MARK +0x201C “ # LEFT DOUBLE QUOTATION MARK +0x201D ” # RIGHT DOUBLE QUOTATION MARK +0x201E „ # DOUBLE LOW-9 QUOTATION MARK +0x201E „ # DOUBLE LOW-9 QUOTATION MARK +0x2020 † # DAGGER +0x2021 ‡ # DOUBLE DAGGER +0x2022 • # BULLET +0x2025 ‥ # TWO DOT LEADER +0x2026 … # HORIZONTAL ELLIPSIS +0x2026 … # HORIZONTAL ELLIPSIS +0x2030 ‰ # PER MILLE SIGN +0x2032 ′ # PRIME +0x2032 ′ # PRIME +0x2033 ″ # DOUBLE PRIME +0x2034 ‴ # TRIPLE PRIME +0x2035 ‵ # REVERSED PRIME +0x2039 ‹ # SINGLE LEFT-POINTING ANGLE QUOTATION MARK +0x203A › # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK +0x203E ‾ # OVERLINE +0x2041 ⁁ # CARET INSERTION POINT +0x2043 ⁃ # HYPHEN BULLET +0x2044 ⁄ # FRACTION SLASH +0x20AC € # EURO SIGN +0x20DB ⃛ # COMBINING THREE DOTS ABOVE +0x20DC ⃜ # COMBINING FOUR DOTS ABOVE +0x2105 ℅ # CARE OF +0x210B ℋ # SCRIPT CAPITAL H +0x210F ℏ # PLANCK CONSTANT OVER TWO PI +0x2111 ℑ # BLACK-LETTER CAPITAL I +0x2112 ℒ # SCRIPT CAPITAL L +0x2113 ℓ # SCRIPT SMALL L +0x2116 № # NUMERO SIGN +0x2117 ℗ # SOUND RECORDING COPYRIGHT +0x2118 ℘ # SCRIPT CAPITAL P +0x211C ℜ # BLACK-LETTER CAPITAL R +0x211E ℞ # PRESCRIPTION TAKE +0x2122 ™ # TRADE MARK SIGN +0x2126 Ω # OHM SIGN +0x212B Å # ANGSTROM SIGN +0x212C ℬ # SCRIPT CAPITAL B +0x2133 ℳ # SCRIPT CAPITAL M +0x2134 ℴ # SCRIPT SMALL O +0x2135 ℵ # ALEF SYMBOL +0x2135 ℵ # ALEF SYMBOL +0x2136 ℶ # BET SYMBOL +0x2137 ℷ # GIMEL SYMBOL +0x2138 ℸ # DALET SYMBOL +0x2153 ⅓ # VULGAR FRACTION ONE THIRD +0x2154 ⅔ # VULGAR FRACTION TWO THIRDS +0x2155 ⅕ # VULGAR FRACTION ONE FIFTH +0x2156 ⅖ # VULGAR FRACTION TWO FIFTHS +0x2157 ⅗ # VULGAR FRACTION THREE FIFTHS +0x2158 ⅘ # VULGAR FRACTION FOUR FIFTHS +0x2159 ⅙ # VULGAR FRACTION ONE SIXTH +0x215A ⅚ # VULGAR FRACTION FIVE SIXTHS +0x215B ⅛ # VULGAR FRACTION ONE EIGHTH +0x215C ⅜ # VULGAR FRACTION THREE EIGHTHS +0x215D ⅝ # VULGAR FRACTION FIVE EIGHTHS +0x215E ⅞ # VULGAR FRACTION SEVEN EIGHTHS +0x2190 ← # LEFTWARDS ARROW +0x2191 ↑ # UPWARDS ARROW +0x2192 → # RIGHTWARDS ARROW +0x2193 ↓ # DOWNWARDS ARROW +0x2194 ↔ # LEFT RIGHT ARROW +0x2195 ↕ # UP DOWN ARROW +0x2196 ↖ # NORTH WEST ARROW +0x2197 ↗ # NORTH EAST ARROW +0x2198 ↘ # SOUTH EAST ARROW +0x2199 ↙ # SOUTH WEST ARROW +0x219A ↚ # LEFTWARDS ARROW WITH STROKE +0x219B ↛ # RIGHTWARDS ARROW WITH STROKE +0x219D ↝ # RIGHTWARDS WAVE ARROW +0x219E ↞ # LEFTWARDS TWO HEADED ARROW +0x21A0 ↠ # RIGHTWARDS TWO HEADED ARROW +0x21A2 ↢ # LEFTWARDS ARROW WITH TAIL +0x21A3 ↣ # RIGHTWARDS ARROW WITH TAIL +0x21A6 ↦ # RIGHTWARDS ARROW FROM BAR +0x21A9 ↩ # LEFTWARDS ARROW WITH HOOK +0x21AA ↪ # RIGHTWARDS ARROW WITH HOOK +0x21AB ↫ # LEFTWARDS ARROW WITH LOOP +0x21AC ↬ # RIGHTWARDS ARROW WITH LOOP +0x21AD ↭ # LEFT RIGHT WAVE ARROW +0x21AE ↮ # LEFT RIGHT ARROW WITH STROKE +0x21B0 ↰ # UPWARDS ARROW WITH TIP LEFTWARDS +0x21B1 ↱ # UPWARDS ARROW WITH TIP RIGHTWARDS +0x21B5 ↵ # DOWNWARDS ARROW WITH CORNER LEFTWARDS +0x21B6 ↶ # ANTICLOCKWISE TOP SEMICIRCLE ARROW +0x21B7 ↷ # CLOCKWISE TOP SEMICIRCLE ARROW +0x21BA ↺ # ANTICLOCKWISE OPEN CIRCLE ARROW +0x21BB ↻ # CLOCKWISE OPEN CIRCLE ARROW +0x21BC ↼ # LEFTWARDS HARPOON WITH BARB UPWARDS +0x21BD ↽ # LEFTWARDS HARPOON WITH BARB DOWNWARDS +0x21BE ↾ # UPWARDS HARPOON WITH BARB RIGHTWARDS +0x21BF ↿ # UPWARDS HARPOON WITH BARB LEFTWARDS +0x21C0 ⇀ # RIGHTWARDS HARPOON WITH BARB UPWARDS +0x21C1 ⇁ # RIGHTWARDS HARPOON WITH BARB DOWNWARDS +0x21C2 ⇂ # DOWNWARDS HARPOON WITH BARB RIGHTWARDS +0x21C3 ⇃ # DOWNWARDS HARPOON WITH BARB LEFTWARDS +0x21C4 ⇄ # RIGHTWARDS ARROW OVER LEFTWARDS ARROW +0x21C6 ⇆ # LEFTWARDS ARROW OVER RIGHTWARDS ARROW +0x21C7 ⇇ # LEFTWARDS PAIRED ARROWS +0x21C8 ⇈ # UPWARDS PAIRED ARROWS +0x21C9 ⇉ # RIGHTWARDS PAIRED ARROWS +0x21CA ⇊ # DOWNWARDS PAIRED ARROWS +0x21CB ⇋ # LEFTWARDS HARPOON OVER RIGHTWARDS HARPOON +0x21CC ⇌ # RIGHTWARDS HARPOON OVER LEFTWARDS HARPOON +0x21CD ⇍ # LEFTWARDS DOUBLE ARROW WITH STROKE +0x21CE ⇎ # LEFT RIGHT DOUBLE ARROW WITH STROKE +0x21CF ⇏ # RIGHTWARDS DOUBLE ARROW WITH STROKE +0x21D0 ⇐ # LEFTWARDS DOUBLE ARROW +0x21D1 ⇑ # UPWARDS DOUBLE ARROW +0x21D2 ⇒ # RIGHTWARDS DOUBLE ARROW +0x21D3 ⇓ # DOWNWARDS DOUBLE ARROW +0x21D4 ⇔ # LEFT RIGHT DOUBLE ARROW +0x21D5 ⇕ # UP DOWN DOUBLE ARROW +0x21DA ⇚ # LEFTWARDS TRIPLE ARROW +0x21DB ⇛ # RIGHTWARDS TRIPLE ARROW +0x2200 ∀ # FOR ALL +0x2201 ∁ # COMPLEMENT +0x2202 ∂ # PARTIAL DIFFERENTIAL +0x2203 ∃ # THERE EXISTS +0x2204 ∄ # THERE DOES NOT EXIST +0x2205 ∅ # EMPTY SET +0x2207 ∇ # NABLA +0x2208 ∈ # ELEMENT OF +0x2209 ∉ # NOT AN ELEMENT OF +0x220A ∊ # SMALL ELEMENT OF +0x220B ∋ # CONTAINS AS MEMBER +0x220D ∍ # SMALL CONTAINS AS MEMBER +0x220F ∏ # N-ARY PRODUCT +0x2210 ∐ # N-ARY COPRODUCT +0x2211 ∑ # N-ARY SUMMATION +0x2212 − # MINUS SIGN +0x2213 ∓ # MINUS-OR-PLUS SIGN +0x2214 ∔ # DOT PLUS +0x2216 ∖ # SET MINUS +0x2217 ∗ # ASTERISK OPERATOR +0x2218 ∘ # RING OPERATOR +0x221A √ # SQUARE ROOT +0x221D ∝ # PROPORTIONAL TO +0x221E ∞ # INFINITY +0x221F ∟ # RIGHT ANGLE +0x2220 ∠ # ANGLE +0x2221 ∡ # MEASURED ANGLE +0x2222 ∢ # SPHERICAL ANGLE +0x2223 ∣ # DIVIDES +0x2224 ∤ # DOES NOT DIVIDE +0x2225 ∥ # PARALLEL TO +0x2226 ∦ # NOT PARALLEL TO +0x2227 ∧ # LOGICAL AND +0x2228 ∨ # LOGICAL OR +0x2229 ∩ # INTERSECTION +0x222A ∪ # UNION +0x222B ∫ # INTEGRAL +0x222E ∮ # CONTOUR INTEGRAL +0x2234 ∴ # THEREFORE +0x2235 ∵ # BECAUSE +0x223C ∼ # TILDE OPERATOR +0x223D ∽ # REVERSED TILDE +0x2240 ≀ # WREATH PRODUCT +0x2241 ≁ # NOT TILDE +0x2243 ≃ # ASYMPTOTICALLY EQUAL TO +0x2244 ≄ # NOT ASYMPTOTICALLY EQUAL TO +0x2245 ≅ # APPROXIMATELY EQUAL TO +0x2247 ≇ # NEITHER APPROXIMATELY NOR ACTUALLY EQUAL TO +0x2248 ≈ # ALMOST EQUAL TO +0x2249 ≉ # NOT ALMOST EQUAL TO +0x224A ≊ # ALMOST EQUAL OR EQUAL TO +0x224C ≌ # ALL EQUAL TO +0x224E ≎ # GEOMETRICALLY EQUIVALENT TO +0x224F ≏ # DIFFERENCE BETWEEN +0x2250 ≐ # APPROACHES THE LIMIT +0x2251 ≑ # GEOMETRICALLY EQUAL TO +0x2252 ≒ # APPROXIMATELY EQUAL TO OR THE IMAGE OF +0x2253 ≓ # IMAGE OF OR APPROXIMATELY EQUAL TO +0x2254 ≔ # COLON EQUALS +0x2255 ≕ # EQUALS COLON +0x2256 ≖ # RING IN EQUAL TO +0x2257 ≗ # RING EQUAL TO +0x2259 ≙ # ESTIMATES +0x225C ≜ # DELTA EQUAL TO +0x2260 ≠ # NOT EQUAL TO +0x2261 ≡ # IDENTICAL TO +0x2262 ≢ # NOT IDENTICAL TO +0x2264 ≤ # LESS-THAN OR EQUAL TO +0x2265 ≥ # GREATER-THAN OR EQUAL TO +0x2266 ≦ # LESS-THAN OVER EQUAL TO +0x2267 ≧ # GREATER-THAN OVER EQUAL TO +0x2268 ≨ # LESS-THAN BUT NOT EQUAL TO +0x2269 ≩ # GREATER-THAN BUT NOT EQUAL TO +0x226A ≪ # MUCH LESS-THAN +0x226B ≫ # MUCH GREATER-THAN +0x226C ≬ # BETWEEN +0x226E ≮ # NOT LESS-THAN +0x226F ≯ # NOT GREATER-THAN +0x2270 ≰ # NEITHER LESS-THAN NOR EQUAL TO +0x2271 ≱ # NEITHER GREATER-THAN NOR EQUAL TO +0x2272 ≲ # LESS-THAN OR EQUIVALENT TO +0x2273 ≳ # GREATER-THAN OR EQUIVALENT TO +0x2276 ≶ # LESS-THAN OR GREATER-THAN +0x2277 ≷ # GREATER-THAN OR LESS-THAN +0x227A ≺ # PRECEDES +0x227B ≻ # SUCCEEDS +0x227C ≼ # PRECEDES OR EQUAL TO +0x227D ≽ # SUCCEEDS OR EQUAL TO +0x227E ≾ # PRECEDES OR EQUIVALENT TO +0x227F ≿ # SUCCEEDS OR EQUIVALENT TO +0x2280 ⊀ # DOES NOT PRECEDE +0x2281 ⊁ # DOES NOT SUCCEED +0x2282 ⊂ # SUBSET OF +0x2283 ⊃ # SUPERSET OF +0x2284 ⊄ # NOT A SUBSET OF +0x2285 ⊅ # NOT A SUPERSET OF +0x2286 ⊆ # SUBSET OF OR EQUAL TO +0x2287 ⊇ # SUPERSET OF OR EQUAL TO +0x2288 ⊈ # NEITHER A SUBSET OF NOR EQUAL TO +0x2289 ⊉ # NEITHER A SUPERSET OF NOR EQUAL TO +0x228A ⊊ # SUBSET OF WITH NOT EQUAL TO +0x228B ⊋ # SUPERSET OF WITH NOT EQUAL TO +0x228E ⊎ # MULTISET UNION +0x228F ⊏ # SQUARE IMAGE OF +0x2290 ⊐ # SQUARE ORIGINAL OF +0x2291 ⊑ # SQUARE IMAGE OF OR EQUAL TO +0x2292 ⊒ # SQUARE ORIGINAL OF OR EQUAL TO +0x2293 ⊓ # SQUARE CAP +0x2294 ⊔ # SQUARE CUP +0x2295 ⊕ # CIRCLED PLUS +0x2296 ⊖ # CIRCLED MINUS +0x2297 ⊗ # CIRCLED TIMES +0x2298 ⊘ # CIRCLED DIVISION SLASH +0x2299 ⊙ # CIRCLED DOT OPERATOR +0x229A ⊚ # CIRCLED RING OPERATOR +0x229B ⊛ # CIRCLED ASTERISK OPERATOR +0x229D ⊝ # CIRCLED DASH +0x229E ⊞ # SQUARED PLUS +0x229F ⊟ # SQUARED MINUS +0x22A0 ⊠ # SQUARED TIMES +0x22A1 ⊡ # SQUARED DOT OPERATOR +0x22A2 ⊢ # RIGHT TACK +0x22A3 ⊣ # LEFT TACK +0x22A4 ⊤ # DOWN TACK +0x22A5 ⊥ # UP TACK +0x22A7 ⊧ # MODELS +0x22A8 ⊨ # TRUE +0x22A9 ⊩ # FORCES +0x22AA ⊪ # TRIPLE VERTICAL BAR RIGHT TURNSTILE +0x22AC ⊬ # DOES NOT PROVE +0x22AD ⊭ # NOT TRUE +0x22AE ⊮ # DOES NOT FORCE +0x22AF ⊯ # NEGATED DOUBLE VERTICAL BAR DOUBLE RIGHT TURNSTILE +0x22B2 ⊲ # NORMAL SUBGROUP OF +0x22B3 ⊳ # CONTAINS AS NORMAL SUBGROUP +0x22B4 ⊴ # NORMAL SUBGROUP OF OR EQUAL TO +0x22B5 ⊵ # CONTAINS AS NORMAL SUBGROUP OR EQUAL TO +0x22B8 ⊸ # MULTIMAP +0x22BA ⊺ # INTERCALATE +0x22BB ⊻ # XOR +0x22BC ⊼ # NAND +0x22C4 ⋄ # DIAMOND OPERATOR +0x22C5 ⋅ # DOT OPERATOR +0x22C6 ⋆ # STAR OPERATOR +0x22C7 ⋇ # DIVISION TIMES +0x22C8 ⋈ # BOWTIE +0x22C9 ⋉ # LEFT NORMAL FACTOR SEMIDIRECT PRODUCT +0x22CA ⋊ # RIGHT NORMAL FACTOR SEMIDIRECT PRODUCT +0x22CB ⋋ # LEFT SEMIDIRECT PRODUCT +0x22CC ⋌ # RIGHT SEMIDIRECT PRODUCT +0x22CD ⋍ # REVERSED TILDE EQUALS +0x22CE ⋎ # CURLY LOGICAL OR +0x22CF ⋏ # CURLY LOGICAL AND +0x22D0 ⋐ # DOUBLE SUBSET +0x22D1 ⋑ # DOUBLE SUPERSET +0x22D2 ⋒ # DOUBLE INTERSECTION +0x22D3 ⋓ # DOUBLE UNION +0x22D4 ⋔ # PITCHFORK +0x22D6 ⋖ # LESS-THAN WITH DOT +0x22D7 ⋗ # GREATER-THAN WITH DOT +0x22D8 ⋘ # VERY MUCH LESS-THAN +0x22D9 ⋙ # VERY MUCH GREATER-THAN +0x22DA ⋚ # LESS-THAN EQUAL TO OR GREATER-THAN +0x22DB ⋛ # GREATER-THAN EQUAL TO OR LESS-THAN +0x22DC ⋜ # EQUAL TO OR LESS-THAN +0x22DD ⋝ # EQUAL TO OR GREATER-THAN +0x22DE ⋞ # EQUAL TO OR PRECEDES +0x22DF ⋟ # EQUAL TO OR SUCCEEDS +0x22E0 ⋠ # DOES NOT PRECEDE OR EQUAL +0x22E1 ⋡ # DOES NOT SUCCEED OR EQUAL +0x22E6 ⋦ # LESS-THAN BUT NOT EQUIVALENT TO +0x22E7 ⋧ # GREATER-THAN BUT NOT EQUIVALENT TO +0x22E8 ⋨ # PRECEDES BUT NOT EQUIVALENT TO +0x22E9 ⋩ # SUCCEEDS BUT NOT EQUIVALENT TO +0x22EA ⋪ # NOT NORMAL SUBGROUP OF +0x22EB ⋫ # DOES NOT CONTAIN AS NORMAL SUBGROUP +0x22EC ⋬ # NOT NORMAL SUBGROUP OF OR EQUAL TO +0x22ED ⋭ # DOES NOT CONTAIN AS NORMAL SUBGROUP OR EQUAL +0x22EE ⋮ # VERTICAL ELLIPSIS +0x2306 ⌆ # PERSPECTIVE +0x2308 ⌈ # LEFT CEILING +0x2309 ⌉ # RIGHT CEILING +0x230A ⌊ # LEFT FLOOR +0x230B ⌋ # RIGHT FLOOR +0x230C ⌌ # BOTTOM RIGHT CROP +0x230D ⌍ # BOTTOM LEFT CROP +0x230E ⌎ # TOP RIGHT CROP +0x230F ⌏ # TOP LEFT CROP +0x2315 ⌕ # TELEPHONE RECORDER +0x2316 ⌖ # POSITION INDICATOR +0x231C ⌜ # TOP LEFT CORNER +0x231D ⌝ # TOP RIGHT CORNER +0x231E ⌞ # BOTTOM LEFT CORNER +0x231F ⌟ # BOTTOM RIGHT CORNER +0x2322 ⌢ # FROWN +0x2323 ⌣ # SMILE +0x2329 〈 # LEFT-POINTING ANGLE BRACKET +0x232A 〉 # RIGHT-POINTING ANGLE BRACKET +0x2423 ␣ # OPEN BOX +0x24C8 Ⓢ # CIRCLED LATIN CAPITAL LETTER S +0x2500 ─ # BOX DRAWINGS LIGHT HORIZONTAL +0x2502 │ # BOX DRAWINGS LIGHT VERTICAL +0x250C ┌ # BOX DRAWINGS LIGHT DOWN AND RIGHT +0x2510 ┐ # BOX DRAWINGS LIGHT DOWN AND LEFT +0x2514 └ # BOX DRAWINGS LIGHT UP AND RIGHT +0x2518 ┘ # BOX DRAWINGS LIGHT UP AND LEFT +0x251C ├ # BOX DRAWINGS LIGHT VERTICAL AND RIGHT +0x2524 ┤ # BOX DRAWINGS LIGHT VERTICAL AND LEFT +0x252C ┬ # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL +0x2534 ┴ # BOX DRAWINGS LIGHT UP AND HORIZONTAL +0x253C ┼ # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL +0x2550 ═ # BOX DRAWINGS DOUBLE HORIZONTAL +0x2551 ║ # BOX DRAWINGS DOUBLE VERTICAL +0x2552 ╒ # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE +0x2553 ╓ # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE +0x2554 ╔ # BOX DRAWINGS DOUBLE DOWN AND RIGHT +0x2555 ╕ # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE +0x2556 ╖ # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE +0x2557 ╗ # BOX DRAWINGS DOUBLE DOWN AND LEFT +0x2558 ╘ # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE +0x2559 ╙ # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE +0x255A ╚ # BOX DRAWINGS DOUBLE UP AND RIGHT +0x255B ╛ # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE +0x255C ╜ # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE +0x255D ╝ # BOX DRAWINGS DOUBLE UP AND LEFT +0x255E ╞ # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE +0x255F ╟ # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE +0x2560 ╠ # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT +0x2561 ╡ # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE +0x2562 ╢ # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE +0x2563 ╣ # BOX DRAWINGS DOUBLE VERTICAL AND LEFT +0x2564 ╤ # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE +0x2565 ╥ # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE +0x2566 ╦ # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL +0x2567 ╧ # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE +0x2568 ╨ # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE +0x2569 ╩ # BOX DRAWINGS DOUBLE UP AND HORIZONTAL +0x256A ╪ # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE +0x256B ╫ # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE +0x256C ╬ # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL +0x2580 ▀ # UPPER HALF BLOCK +0x2584 ▄ # LOWER HALF BLOCK +0x2588 █ # FULL BLOCK +0x2591 ░ # LIGHT SHADE +0x2592 ▒ # MEDIUM SHADE +0x2593 ▓ # DARK SHADE +0x25A1 □ # WHITE SQUARE +0x25AA ▪ # BLACK SMALL SQUARE +0x25AD ▭ # WHITE RECTANGLE +0x25AE ▮ # BLACK VERTICAL RECTANGLE +0x25B3 △ # WHITE UP-POINTING TRIANGLE +0x25B4 ▴ # BLACK UP-POINTING SMALL TRIANGLE +0x25B5 ▵ # WHITE UP-POINTING SMALL TRIANGLE +0x25B8 ▸ # BLACK RIGHT-POINTING SMALL TRIANGLE +0x25B9 ▹ # WHITE RIGHT-POINTING SMALL TRIANGLE +0x25BD ▽ # WHITE DOWN-POINTING TRIANGLE +0x25BE ▾ # BLACK DOWN-POINTING SMALL TRIANGLE +0x25BF ▿ # WHITE DOWN-POINTING SMALL TRIANGLE +0x25C2 ◂ # BLACK LEFT-POINTING SMALL TRIANGLE +0x25C3 ◃ # WHITE LEFT-POINTING SMALL TRIANGLE +0x25CA ◊ # LOZENGE +0x25CB ○ # WHITE CIRCLE +0x2605 ★ # BLACK STAR +0x2606 ☆ # WHITE STAR +0x260E ☎ # BLACK TELEPHONE +0x2640 ♀ # FEMALE SIGN +0x2642 ♂ # MALE SIGN +0x2660 ♠ # BLACK SPADE SUIT +0x2663 ♣ # BLACK CLUB SUIT +0x2665 ♥ # BLACK HEART SUIT +0x2666 ♦ # BLACK DIAMOND SUIT +0x266A ♪ # EIGHTH NOTE +0x266D ♭ # MUSIC FLAT SIGN +0x266E ♮ # MUSIC NATURAL SIGN +0x266F ♯ # MUSIC SHARP SIGN +0x2713 ✓ # CHECK MARK +0x2717 ✗ # BALLOT X +0x2720 ✠ # MALTESE CROSS +0x2726 ✦ # BLACK FOUR POINTED STAR +0x2727 ✧ # WHITE FOUR POINTED STAR +0x2736 ✶ # SIX POINTED BLACK STAR +0xFB00 ff # LATIN SMALL LIGATURE FF +0xFB01 fi # LATIN SMALL LIGATURE FI +0xFB02 fl # LATIN SMALL LIGATURE FL +0xFB03 ffi # LATIN SMALL LIGATURE FFI +0xFB04 ffl # LATIN SMALL LIGATURE FFL + + +</PRE> +</BODY> +</HTML> diff --git a/test/utf-8-demo.html b/test/utf-8-demo.html new file mode 100644 index 0000000..d792903 --- /dev/null +++ b/test/utf-8-demo.html @@ -0,0 +1,216 @@ +<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"> +<HTML> +<HEAD> +<TITLE>Markus Kuhn's UTF-8 demo</TITLE> +<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=iso-8859-1"> +<LINK REV="made" HREF="mailto:dickey@invisible-island.net"> +</HEAD> + +<BODY> +<pre> +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ + +Markus Kuhn [ˈmaʳkʊs kuːn] <mkuhn@acm.org> — 1999-08-20 + + +The ASCII compatible UTF-8 encoding of ISO 10646 and Unicode +plain-text files is defined in RFC 2279 and in ISO 10646-1 Annex R. + + +Using Unicode/UTF-8, you can write in emails and source code things such as + +Mathematics and Sciences: + + ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), + + ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (A ⇔ B), + + 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm + +Linguistics and dictionaries: + + ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn + Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] + +APL: + + ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ + +Nicer typography in plain text files: + + ╔══════════════════════════════════════════╗ + ║ ║ + ║ • ‘single’ and “double” quotes ║ + ║ ║ + ║ • Curly apostrophes: “We’ve been here” ║ + ║ ║ + ║ • Latin-1 apostrophe and accents: '´` ║ + ║ ║ + ║ • ‚deutsche‘ „Anführungszeichen“ ║ + ║ ║ + ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ + ║ ║ + ║ • ASCII safety test: 1lI|, 0OD, 8B ║ + ║ ╭─────────╮ ║ + ║ • the euro symbol: │ € 14.95 │ ║ + ║ ╰─────────╯ ║ + ╚══════════════════════════════════════════╝ + +Greek (in Polytonic): + + The Greek anthem: + + Σὲ γνωρίζω ἀπὸ τὴν κόψη + τοῦ σπαθιοῦ τὴν τρομερή, + σὲ γνωρίζω ἀπὸ τὴν ὄψη + ποὺ μὲ βία μετράει τὴ γῆ. + + ᾿Απ᾿ τὰ κόκκαλα βγαλμένη + τῶν ῾Ελλήνων τὰ ἱερά + καὶ σὰν πρῶτα ἀνδρειωμένη + χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! + + From a speech of Demosthenes in the 4th century BC: + + Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, + ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς + λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ + τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ + εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ + πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν + οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, + οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν + ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον + τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι + γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν + προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους + σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ + τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ + τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς + τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. + + Δημοσθένους, Γ´ ᾿Ολυνθιακὸς + +Georgian: + + From a Unicode conference invitation: + + გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო + კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, + ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს + ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, + ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება + ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, + ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. + +Russian: + + From a Unicode conference invitation: + + Зарегистрируйтесь сейчас на Десятую Международную Конференцию по + Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. + Конференция соберет широкий круг экспертов по вопросам глобального + Интернета и Unicode, локализации и интернационализации, воплощению и + применению Unicode в различных операционных системах и программных + приложениях, шрифтах, верстке и многоязычных компьютерных системах. + +Thai (UCS Level 2): + + Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese + classic 'San Gua'): + + [----------------------------|------------------------] + ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ + สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา + ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา + โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ + เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ + ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ + พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ + ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ + + (The above is a two-column text. If combining characters are handled + correctly, the lines of the second column should be aligned with the + | character above.) + +Ethiopian: + + Proverbs in the Amharic language: + + ሰማይ አይታረስ ንጉሥ አይከሰስ። + ብላ ካለኝ እንደአባቴ በቆመጠኝ። + ጌጥ ያለቤቱ ቁምጥና ነው። + ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። + የአፍ ወለምታ በቅቤ አይታሽም። + አይጥ በበላ ዳዋ ተመታ። + ሲተረጉሙ ይደረግሙ። + ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። + ድር ቢያብር አንበሳ ያስር። + ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። + እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። + የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። + ሥራ ከመፍታት ልጄን ላፋታት። + ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። + የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። + ተንጋሎ ቢተፉ ተመልሶ ባፉ። + ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። + እግርህን በፍራሽህ ልክ ዘርጋ። + +Runes: + + ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ + + (Old English, which transcribed into Latin reads 'He cwaeth that he + bude thaem lande northweardum with tha Westsae.' and means 'He said + that he lived in the northern land near the Western Sea.') + +Braille: + + ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ + + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ + ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ + ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ + ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ + ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ + ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ + + ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ + ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ + ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ + ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ + ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ + ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ + ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ + ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ + ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ + + (The first couple of paragraphs of "A Christmas Carol" by Dickens) + +Compact font selection example text: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 + abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ + –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд + ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა + +Greetings in various languages: + + Hello world, Καλημέρα κόσμε, コンニチハ + +Box drawing alignment tests: █ + ▉ + ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ + ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ + ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ + ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ + ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ + ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ + ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ + +</pre> +</BODY> +</HTML> |