1 files changed, 2524 insertions, 0 deletions
diff --git a/src/UCdomap.c b/src/UCdomap.c
new file mode 100644
index 0000000..0ec239a
--- /dev/null
+++ b/src/UCdomap.c
@@ -0,0 +1,2524 @@
+/*
+ * $LynxId: UCdomap.c,v 1.111 2023/01/05 09:17:16 tom Exp $
+ *
+ *  UCdomap.c
+ *  =========
+ *
+ * This is a Lynx chartrans engine, its external calls are in UCMap.h
+ *
+ * Derived from code in the Linux kernel console driver.
+ *
+ * The GNU General Public License therefore applies, see the file
+ * COPYING in the top-level directory which should come with every Lynx
+ * distribution.
+ *
+ *  [ original comment: - KW ]
+ *
+ * Mapping from internal code (such as Latin-1 or Unicode or IBM PC code)
+ * to font positions.
+ *
+ * aeb, 950210
+ */
+#include <HTUtils.h>
+#include <HTMLDTD.h>
+
+#include <LYGlobalDefs.h>
+#include <UCdomap.h>
+#include <UCMap.h>
+#include <UCAux.h>
+#include <UCDefs.h>
+#include <LYCharSets.h>
+#include <LYStrings.h>
+#include <LYUtils.h>
+
+#if defined(USE_LOCALE_CHARSET) && defined(HAVE_LANGINFO_CODESET)
+#include <langinfo.h>
+#endif
+
+#if defined(USE_JAPANESEUTF8_SUPPORT) || defined(EXP_CHINESEUTF8_SUPPORT)
+#include <iconv.h>
+#endif
+
+#include <LYLeaks.h>
+
+/*
+ * Include chartrans tables:
+ */
+#include <cp1250_uni.h>		/* WinLatin2 (cp1250)   */
+#include <cp1251_uni.h>		/* WinCyrillic (cp1251) */
+#include <cp1252_uni.h>		/* WinLatin1 (cp1252)   */
+#include <cp1253_uni.h>		/* WinGreek (cp1253)    */
+#include <cp1255_uni.h>		/* WinHebrew (cp1255)   */
+#include <cp1256_uni.h>		/* WinArabic (cp1256)   */
+#include <cp1257_uni.h>		/* WinBaltRim (cp1257)  */
+#include <cp437_uni.h>		/* DosLatinUS (cp437)   */
+#include <cp737_uni.h>		/* DosGreek (cp737)     */
+#include <cp775_uni.h>		/* DosBaltRim (cp775)   */
+#include <cp850_uni.h>		/* DosLatin1 (cp850)    */
+#include <cp852_uni.h>		/* DosLatin2 (cp852)    */
+#include <cp857_uni.h>		/* DosTurkish (cp857)   */
+#include <cp862_uni.h>		/* DosHebrew (cp862)    */
+#include <cp864_uni.h>		/* DosArabic (cp864)    */
+#include <cp866_uni.h>		/* DosCyrillic (cp866)  */
+#include <cp869_uni.h>		/* DosGreek2 (cp869)    */
+#include <def7_uni.h>		/* 7 bit approximations */
+#include <dmcs_uni.h>		/* DEC Multinational    */
+#include <hp_uni.h>		/* HP Roman8            */
+#include <iso01_uni.h>		/* ISO Latin 1          */
+#include <iso02_uni.h>		/* ISO Latin 2          */
+#include <iso03_uni.h>		/* ISO Latin 3          */
+#include <iso04_uni.h>		/* ISO Latin 4          */
+#include <iso05_uni.h>		/* ISO 8859-5 Cyrillic  */
+#include <iso06_uni.h>		/* ISO 8859-6 Arabic    */
+#include <iso07_uni.h>		/* ISO 8859-7 Greek     */
+#include <iso08_uni.h>		/* ISO 8859-8 Hebrew    */
+#include <iso09_uni.h>		/* ISO 8859-9 (Latin 5) */
+#include <iso10_uni.h>		/* ISO 8859-10          */
+#include <iso13_uni.h>		/* ISO 8859-13 (Latin 7) */
+#include <iso14_uni.h>		/* ISO 8859-14 (Latin 8) */
+#include <iso15_uni.h>		/* ISO 8859-15 (Latin 9) */
+#include <iso16_uni.h>		/* ISO 8859-16 (Latin 10) */
+#include <koi8r_uni.h>		/* KOI8-R Cyrillic      */
+#include <mac_uni.h>		/* Macintosh (8 bit)    */
+#include <mnem2_suni.h>		/* RFC 1345 Mnemonic    */
+#include <next_uni.h>		/* NeXT character set   */
+#include <rfc_suni.h>		/* RFC 1345 w/o Intro   */
+/* #include <utf8_uni.h> */ /* UNICODE UTF 8        */
+#include <viscii_uni.h>		/* Vietnamese (VISCII)  */
+#include <cp866u_uni.h>		/* Ukrainian Cyrillic (866) */
+#include <koi8u_uni.h>		/* Ukrainian Cyrillic (koi8-u */
+#include <pt154_uni.h>		/* Cyrillic-Asian (PT154) */
+
+#ifdef CAN_AUTODETECT_DISPLAY_CHARSET
+int auto_display_charset = -1;
+#endif
+
+static const char *UC_GNsetMIMEnames[4] =
+{
+    "iso-8859-1", "x-dec-graphics", "cp437", "x-transparent"
+};
+
+static int UC_GNhandles[4] =
+{
+    -1, -1, -1, -1
+};
+
+/*
+ * Some of the code below, and some of the comments, are left in for
+ * historical reasons.  Not all those tables below are currently
+ * really needed (and what with all those hardwired codepoints),
+ * but let's keep them around for now.  They may come in handy if we
+ * decide to make more extended use of the mechanisms (including e.g.
+ * for chars < 127...).  - KW
+ */
+
+static u16 translations[][256] =
+{
+    /*
+     * 8-bit Latin-1 mapped to Unicode -- trivial mapping.
+     */
+    {
+	0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007,
+	0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f,
+	0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017,
+	0x0018, 0x0019, 0x001a, 0x001b, 0x001c, 0x001d, 0x001e, 0x001f,
+	0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027,
+	0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f,
+	0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037,
+	0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f,
+	0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047,
+	0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f,
+	0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057,
+	0x0058, 0x0059, 0x005a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f,
+	0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067,
+	0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f,
+	0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077,
+	0x0078, 0x0079, 0x007a, 0x007b, 0x007c, 0x007d, 0x007e, 0x007f,
+	0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
+	0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
+	0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
+	0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
+	0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
+	0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
+	0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
+	0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf,
+	0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
+	0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
+	0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
+	0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
+	0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
+	0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
+	0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
+	0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff
+    },
+    /*
+     * VT100 graphics mapped to Unicode.
+     */
+    {
+	0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007,
+	0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f,
+	0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017,
+	0x0018, 0x0019, 0x001a, 0x001b, 0x001c, 0x001d, 0x001e, 0x001f,
+	0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027,
+	0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f,
+	0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037,
+	0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f,
+	0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047,
+	0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f,
+	0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057,
+	0x0058, 0x0059, 0x005a, 0x005b, 0x005c, 0x005d, 0x005e, 0x00a0,
+	0x25c6, 0x2592, 0x2409, 0x240c, 0x240d, 0x240a, 0x00b0, 0x00b1,
+	0x2424, 0x240b, 0x2518, 0x2510, 0x250c, 0x2514, 0x253c, 0xf800,
+	0xf801, 0x2500, 0xf803, 0xf804, 0x251c, 0x2524, 0x2534, 0x252c,
+	0x2502, 0x2264, 0x2265, 0x03c0, 0x2260, 0x00a3, 0x00b7, 0x007f,
+	0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
+	0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
+	0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
+	0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
+	0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
+	0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
+	0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
+	0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf,
+	0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
+	0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
+	0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
+	0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
+	0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
+	0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
+	0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
+	0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff
+    },
+    /*
+     * IBM Codepage 437 mapped to Unicode.
+     */
+    {
+	0x0000, 0x263a, 0x263b, 0x2665, 0x2666, 0x2663, 0x2660, 0x2022,
+	0x25d8, 0x25cb, 0x25d9, 0x2642, 0x2640, 0x266a, 0x266b, 0x263c,
+	0x25ba, 0x25c4, 0x2195, 0x203c, 0x00b6, 0x00a7, 0x25ac, 0x21a8,
+	0x2191, 0x2193, 0x2192, 0x2190, 0x221f, 0x2194, 0x25b2, 0x25bc,
+	0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027,
+	0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f,
+	0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037,
+	0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f,
+	0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047,
+	0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f,
+	0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057,
+	0x0058, 0x0059, 0x005a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f,
+	0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067,
+	0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f,
+	0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077,
+	0x0078, 0x0079, 0x007a, 0x007b, 0x007c, 0x007d, 0x007e, 0x2302,
+	0x00c7, 0x00fc, 0x00e9, 0x00e2, 0x00e4, 0x00e0, 0x00e5, 0x00e7,
+	0x00ea, 0x00eb, 0x00e8, 0x00ef, 0x00ee, 0x00ec, 0x00c4, 0x00c5,
+	0x00c9, 0x00e6, 0x00c6, 0x00f4, 0x00f6, 0x00f2, 0x00fb, 0x00f9,
+	0x00ff, 0x00d6, 0x00dc, 0x00a2, 0x00a3, 0x00a5, 0x20a7, 0x0192,
+	0x00e1, 0x00ed, 0x00f3, 0x00fa, 0x00f1, 0x00d1, 0x00aa, 0x00ba,
+	0x00bf, 0x2310, 0x00ac, 0x00bd, 0x00bc, 0x00a1, 0x00ab, 0x00bb,
+	0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x2561, 0x2562, 0x2556,
+	0x2555, 0x2563, 0x2551, 0x2557, 0x255d, 0x255c, 0x255b, 0x2510,
+	0x2514, 0x2534, 0x252c, 0x251c, 0x2500, 0x253c, 0x255e, 0x255f,
+	0x255a, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550, 0x256c, 0x2567,
+	0x2568, 0x2564, 0x2565, 0x2559, 0x2558, 0x2552, 0x2553, 0x256b,
+	0x256a, 0x2518, 0x250c, 0x2588, 0x2584, 0x258c, 0x2590, 0x2580,
+	0x03b1, 0x00df, 0x0393, 0x03c0, 0x03a3, 0x03c3, 0x00b5, 0x03c4,
+	0x03a6, 0x0398, 0x03a9, 0x03b4, 0x221e, 0x03c6, 0x03b5, 0x2229,
+	0x2261, 0x00b1, 0x2265, 0x2264, 0x2320, 0x2321, 0x00f7, 0x2248,
+	0x00b0, 0x2219, 0x00b7, 0x221a, 0x207f, 0x00b2, 0x25a0, 0x00a0
+    },
+    /*
+     * User mapping -- default to codes for direct font mapping.
+     */
+    {
+	0xf000, 0xf001, 0xf002, 0xf003, 0xf004, 0xf005, 0xf006, 0xf007,
+	0xf008, 0xf009, 0xf00a, 0xf00b, 0xf00c, 0xf00d, 0xf00e, 0xf00f,
+	0xf010, 0xf011, 0xf012, 0xf013, 0xf014, 0xf015, 0xf016, 0xf017,
+	0xf018, 0xf019, 0xf01a, 0xf01b, 0xf01c, 0xf01d, 0xf01e, 0xf01f,
+	0xf020, 0xf021, 0xf022, 0xf023, 0xf024, 0xf025, 0xf026, 0xf027,
+	0xf028, 0xf029, 0xf02a, 0xf02b, 0xf02c, 0xf02d, 0xf02e, 0xf02f,
+	0xf030, 0xf031, 0xf032, 0xf033, 0xf034, 0xf035, 0xf036, 0xf037,
+	0xf038, 0xf039, 0xf03a, 0xf03b, 0xf03c, 0xf03d, 0xf03e, 0xf03f,
+	0xf040, 0xf041, 0xf042, 0xf043, 0xf044, 0xf045, 0xf046, 0xf047,
+	0xf048, 0xf049, 0xf04a, 0xf04b, 0xf04c, 0xf04d, 0xf04e, 0xf04f,
+	0xf050, 0xf051, 0xf052, 0xf053, 0xf054, 0xf055, 0xf056, 0xf057,
+	0xf058, 0xf059, 0xf05a, 0xf05b, 0xf05c, 0xf05d, 0xf05e, 0xf05f,
+	0xf060, 0xf061, 0xf062, 0xf063, 0xf064, 0xf065, 0xf066, 0xf067,
+	0xf068, 0xf069, 0xf06a, 0xf06b, 0xf06c, 0xf06d, 0xf06e, 0xf06f,
+	0xf070, 0xf071, 0xf072, 0xf073, 0xf074, 0xf075, 0xf076, 0xf077,
+	0xf078, 0xf079, 0xf07a, 0xf07b, 0xf07c, 0xf07d, 0xf07e, 0xf07f,
+	0xf080, 0xf081, 0xf082, 0xf083, 0xf084, 0xf085, 0xf086, 0xf087,
+	0xf088, 0xf089, 0xf08a, 0xf08b, 0xf08c, 0xf08d, 0xf08e, 0xf08f,
+	0xf090, 0xf091, 0xf092, 0xf093, 0xf094, 0xf095, 0xf096, 0xf097,
+	0xf098, 0xf099, 0xf09a, 0xf09b, 0xf09c, 0xf09d, 0xf09e, 0xf09f,
+	0xf0a0, 0xf0a1, 0xf0a2, 0xf0a3, 0xf0a4, 0xf0a5, 0xf0a6, 0xf0a7,
+	0xf0a8, 0xf0a9, 0xf0aa, 0xf0ab, 0xf0ac, 0xf0ad, 0xf0ae, 0xf0af,
+	0xf0b0, 0xf0b1, 0xf0b2, 0xf0b3, 0xf0b4, 0xf0b5, 0xf0b6, 0xf0b7,
+	0xf0b8, 0xf0b9, 0xf0ba, 0xf0bb, 0xf0bc, 0xf0bd, 0xf0be, 0xf0bf,
+	0xf0c0, 0xf0c1, 0xf0c2, 0xf0c3, 0xf0c4, 0xf0c5, 0xf0c6, 0xf0c7,
+	0xf0c8, 0xf0c9, 0xf0ca, 0xf0cb, 0xf0cc, 0xf0cd, 0xf0ce, 0xf0cf,
+	0xf0d0, 0xf0d1, 0xf0d2, 0xf0d3, 0xf0d4, 0xf0d5, 0xf0d6, 0xf0d7,
+	0xf0d8, 0xf0d9, 0xf0da, 0xf0db, 0xf0dc, 0xf0dd, 0xf0de, 0xf0df,
+	0xf0e0, 0xf0e1, 0xf0e2, 0xf0e3, 0xf0e4, 0xf0e5, 0xf0e6, 0xf0e7,
+	0xf0e8, 0xf0e9, 0xf0ea, 0xf0eb, 0xf0ec, 0xf0ed, 0xf0ee, 0xf0ef,
+	0xf0f0, 0xf0f1, 0xf0f2, 0xf0f3, 0xf0f4, 0xf0f5, 0xf0f6, 0xf0f7,
+	0xf0f8, 0xf0f9, 0xf0fa, 0xf0fb, 0xf0fc, 0xf0fd, 0xf0fe, 0xf0ff
+    }
+};
+static u16 *UC_translate = NULL;
+
+static struct UC_charset UCInfo[MAXCHARSETS];
+
+/*
+ * The standard kernel character-to-font mappings are not invertible
+ * -- this is just a best effort.
+ */
+#define MAX_GLYPH 512		/* Max possible glyph value */
+
+static unsigned char *inv_translate = NULL;
+static unsigned char inv_norm_transl[MAX_GLYPH];
+static unsigned char *inverse_translations[4] =
+{NULL, NULL, NULL, NULL};
+
+static void set_inverse_transl(int i);
+static u16 *set_translate(int m);
+static int UC_valid_UC_charset(int UC_charset_hndl);
+static void UC_con_set_trans(int UC_charset_in_hndl, int Gn, int update_flag);
+static int con_insert_unipair(unsigned unicode, unsigned fontpos, int fordefault);
+static int con_insert_unipair_str(unsigned unicode, const char *replace_str, int fordefault);
+static void con_clear_unimap(int fordefault);
+static void con_clear_unimap_str(int fordefault);
+static void con_set_default_unimap(void);
+static int UC_con_set_unimap(int UC_charset_out_hndl, int update_flag);
+static int UC_con_set_unimap_str(unsigned ct, struct unipair_str *list, int fordefault);
+static int conv_uni_to_pc(long ucs, int usedefault);
+static int conv_uni_to_str(char *outbuf, int buflen, UCode_t ucs, int usedefault);
+static void UCconsole_map_init(void);
+static int UC_MapGN(int UChndl, int update_flag);
+static int UC_FindGN_byMIME(const char *UC_MIMEcharset);
+static void UCreset_allocated_LYCharSets(void);
+static STRING2PTR UC_setup_LYCharSets_repl(int UC_charset_in_hndl, unsigned lowest8);
+static int UC_Register_with_LYCharSets(int s,
+				       const char *UC_MIMEcharset,
+				       const char *UC_LYNXcharset,
+				       int lowest_eightbit);
+
+#ifdef LY_FIND_LEAKS
+static void UCfree_allocated_LYCharSets(void);
+static void UCcleanup_mem(void);
+#endif
+
+static int default_UChndl = -1;
+
+static void set_inverse_transl(int i)
+{
+    int j, glyph;
+    u16 *p = translations[i];
+    unsigned char *q = inverse_translations[i];
+
+    if (!q) {
+	/*
+	 * Slightly messy to avoid calling kmalloc too early.
+	 */
+	q = inverse_translations[i] = ((i == LAT1_MAP) ?
+				       inv_norm_transl :
+				       typeMallocn(unsigned char, MAX_GLYPH));
+
+	if (!q)
+	    return;
+    }
+    for (j = 0; j < MAX_GLYPH; j++)
+	q[j] = 0;
+
+    for (j = 0; j < E_TABSZ; j++) {
+	glyph = conv_uni_to_pc((long) p[j], 0);
+	if (glyph >= 0 && glyph < MAX_GLYPH && q[glyph] < 32) {
+	    /*
+	     * Prefer '-' above SHY etc.
+	     */
+	    q[glyph] = UCH(j);
+	}
+    }
+}
+
+static u16 *set_translate(int m)
+{
+    if (!inverse_translations[m])
+	set_inverse_transl(m);
+    inv_translate = inverse_translations[m];
+    return translations[m];
+}
+
+static int UC_valid_UC_charset(int UC_charset_hndl)
+{
+    return (UC_charset_hndl >= 0 && UC_charset_hndl < UCNumCharsets);
+}
+
+static void UC_con_set_trans(int UC_charset_in_hndl,
+			     int Gn,
+			     int update_flag)
+{
+    int i, j;
+    const u16 *p;
+    u16 *ptrans;
+
+    if (!UC_valid_UC_charset(UC_charset_in_hndl)) {
+	CTRACE((tfp, "UC_con_set_trans: Invalid charset handle %d.\n",
+		UC_charset_in_hndl));
+	return;
+    }
+    ptrans = translations[Gn];
+    p = UCInfo[UC_charset_in_hndl].unitable;
+#if(0)
+    if (p == UC_current_unitable) {	/* test whether pointers are equal */
+	return;			/* nothing to be done */
+    }
+    /*
+     * The font is always 256 characters - so far.
+     */
+    con_clear_unimap();
+#endif
+    for (i = 0; i < 256; i++) {
+	if ((j = UCInfo[UC_charset_in_hndl].unicount[i])) {
+	    ptrans[i] = *p;
+	    for (; j; j--) {
+		p++;
+	    }
+	} else {
+	    ptrans[i] = UCS_REPL;
+	}
+    }
+    if (update_flag) {
+	set_inverse_transl(Gn);	/* Update inverse translation for this one */
+    }
+}
+
+/*
+ * Unicode -> current font conversion
+ *
+ * A font has at most 512 chars, usually 256.
+ * But one font position may represent several Unicode chars.
+ * A hashtable is somewhat of a pain to deal with, so use a
+ * "paged table" instead.  Simulation has shown the memory cost of
+ * this 3-level paged table scheme to be comparable to a hash table.
+ */
+static int hashtable_contents_valid = 0;	/* Use ASCII-only mode for bootup */
+static int hashtable_str_contents_valid = 0;
+
+static u16 **uni_pagedir[32] =
+{
+    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL
+};
+
+static char ***uni_pagedir_str[32] =
+{
+    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL
+};
+
+static const u16 *UC_current_unitable = NULL;
+static struct unimapdesc_str *UC_current_unitable_str = NULL;
+
+/*
+ * Keep a second set of structures for the translation designated
+ * as "default" - kw
+ */
+static int unidefault_contents_valid = 0;	/* Use ASCII-only mode for bootup */
+static int unidefault_str_contents_valid = 0;
+
+static u16 **unidefault_pagedir[32] =
+{
+    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL
+};
+static char ***unidefault_pagedir_str[32] =
+{
+    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL
+};
+
+static const u16 *UC_default_unitable = 0;
+static const struct unimapdesc_str *UC_default_unitable_str = 0;
+
+static int con_insert_unipair(unsigned unicode, unsigned fontpos, int fordefault)
+{
+    int i;
+    unsigned n;
+    u16 **p1, *p2;
+
+    if (fordefault)
+	p1 = unidefault_pagedir[n = unicode >> 11];
+    else
+	p1 = uni_pagedir[n = unicode >> 11];
+    if (!p1) {
+	if ((p1 = typecallocn(u16 *, 32)) == NULL)
+	    return ucError;
+	if (fordefault)
+	    unidefault_pagedir[n] = p1;
+	else
+	    uni_pagedir[n] = p1;
+    }
+
+    if (!(p2 = p1[n = (unicode >> 6) & 0x1f])) {
+	p2 = p1[n] = (u16 *) malloc(64 * sizeof(u16));
+	if (!p2)
+	    return ucError;
+
+	for (i = 0; i < 64; i++) {
+	    p2[i] = UCS_HIDE;	/* No glyph for this character (yet) */
+	}
+    }
+
+    p2[unicode & 0x3f] = (u16) fontpos;
+
+    return 0;
+}
+
+static int con_insert_unipair_str(unsigned unicode, const char *replace_str,
+				  int fordefault)
+{
+    unsigned n;
+    char ***p1;
+    const char **p2;
+
+    if (fordefault)
+	p1 = unidefault_pagedir_str[n = unicode >> 11];
+    else
+	p1 = uni_pagedir_str[n = unicode >> 11];
+    if (!p1) {
+	if ((p1 = typecallocn(char **, 32)) == NULL)
+	      return ucError;
+
+	if (fordefault)
+	    unidefault_pagedir_str[n] = p1;
+	else
+	    uni_pagedir_str[n] = p1;
+    }
+
+    n = ((unicode >> 6) & 0x1f);
+    if (!p1[n]) {
+	if ((p1[n] = typecallocn(char *, 64)) == NULL)
+	      return ucError;
+    }
+    p2 = (const char **) p1[n];
+
+    p2[unicode & 0x3f] = replace_str;
+
+    return 0;
+}
+
+/*
+ * ui arg was a leftover, deleted.  - KW
+ */
+static void con_clear_unimap(int fordefault)
+{
+    int i, j;
+    u16 **p1;
+
+    if (fordefault) {
+	for (i = 0; i < 32; i++) {
+	    if ((p1 = unidefault_pagedir[i]) != NULL) {
+		for (j = 0; j < 32; j++) {
+		    FREE(p1[j]);
+		}
+		FREE(p1);
+	    }
+	    unidefault_pagedir[i] = NULL;
+	}
+
+	unidefault_contents_valid = 1;
+    } else {
+	for (i = 0; i < 32; i++) {
+	    if ((p1 = uni_pagedir[i]) != NULL) {
+		for (j = 0; j < 32; j++) {
+		    FREE(p1[j]);
+		}
+		FREE(p1);
+	    }
+	    uni_pagedir[i] = NULL;
+	}
+
+	hashtable_contents_valid = 1;
+    }
+}
+
+static void con_clear_unimap_str(int fordefault)
+{
+    int i, j;
+    char ***p1;
+
+    if (fordefault) {
+	for (i = 0; i < 32; i++) {
+	    if ((p1 = unidefault_pagedir_str[i]) != NULL) {
+		for (j = 0; j < 32; j++) {
+		    FREE(p1[j]);
+		}
+		FREE(p1);
+	    }
+	    unidefault_pagedir_str[i] = NULL;
+	}
+
+	unidefault_str_contents_valid = 1;	/* ??? probably no use... */
+    } else {
+	for (i = 0; i < 32; i++) {
+	    if ((p1 = uni_pagedir_str[i]) != NULL) {
+		for (j = 0; j < 32; j++) {
+		    FREE(p1[j]);
+		}
+		FREE(p1);
+	    }
+	    uni_pagedir_str[i] = NULL;
+	}
+
+	hashtable_str_contents_valid = 1;	/* ??? probably no use... */
+    }
+}
+
+/*
+ * Loads the unimap for the hardware font, as defined in uni_hash.tbl.
+ * The representation used was the most compact I could come up
+ * with.  This routine is executed at sys_setup time, and when the
+ * PIO_FONTRESET ioctl is called.
+ */
+static void con_set_default_unimap(void)
+{
+    int i, j;
+    const u16 *p;
+
+    /*
+     * The default font is always 256 characters.
+     */
+    con_clear_unimap(1);
+
+    p = dfont_unitable;
+    for (i = 0; i < 256; i++) {
+	for (j = dfont_unicount[i]; j; j--) {
+	    con_insert_unipair(*(p++), (u16) i, 1);
+	}
+    }
+
+    UC_default_unitable = dfont_unitable;
+
+    con_clear_unimap_str(1);
+    UC_con_set_unimap_str(dfont_replacedesc.entry_ct, repl_map, 1);
+    UC_default_unitable_str = &dfont_replacedesc;
+}
+
+int UCNumCharsets = 0;
+
+int UCLYhndl_HTFile_for_unspec = -1;
+int UCLYhndl_HTFile_for_unrec = -1;
+int UCLYhndl_for_unspec = -1;
+int UCLYhndl_for_unrec = -1;
+
+/* easy to type, will initialize later */
+int LATIN1 = -1;		/* UCGetLYhndl_byMIME("iso-8859-1") */
+int US_ASCII = -1;		/* UCGetLYhndl_byMIME("us-ascii")   */
+int UTF8_handle = -1;		/* UCGetLYhndl_byMIME("utf-8")      */
+int TRANSPARENT = -1;		/* UCGetLYhndl_byMIME("x-transparent")  */
+
+static int UC_con_set_unimap(int UC_charset_out_hndl,
+			     int update_flag)
+{
+    int i, j;
+    const u16 *p;
+
+    if (!UC_valid_UC_charset(UC_charset_out_hndl)) {
+	CTRACE((tfp, "UC_con_set_unimap: Invalid charset handle %d.\n",
+		UC_charset_out_hndl));
+	return ucError;
+    }
+
+    p = UCInfo[UC_charset_out_hndl].unitable;
+    if (p == UC_current_unitable) {	/* test whether pointers are equal */
+	return update_flag;	/* nothing to be done */
+    }
+    UC_current_unitable = p;
+
+    /*
+     * The font is always 256 characters - so far.
+     */
+    con_clear_unimap(0);
+
+    for (i = 0; i < 256; i++) {
+	for (j = UCInfo[UC_charset_out_hndl].unicount[i]; j; j--) {
+	    con_insert_unipair(*(p++), (u16) i, 0);
+	}
+    }
+
+    if (update_flag) {
+	for (i = 0; i <= 3; i++) {
+	    set_inverse_transl(i);	/* Update all inverse translations */
+	}
+    }
+
+    return 0;
+}
+
+static int UC_con_set_unimap_str(unsigned ct, struct unipair_str *list,
+				 int fordefault)
+{
+    int err = 0, err1;
+
+    while (ct--) {
+	if ((err1 = con_insert_unipair_str(list->unicode,
+					   list->replace_str,
+					   fordefault)) != 0) {
+	    err = err1;
+	}
+	list++;
+    }
+
+    /*
+     * No inverse translations for replacement strings!
+     */
+    if (!err) {
+	if (fordefault)
+	    unidefault_str_contents_valid = 1;
+	else
+	    hashtable_str_contents_valid = 1;
+    }
+
+    return err;
+}
+
+static int conv_uni_to_pc(long ucs,
+			  int usedefault)
+{
+    int h;
+    u16 **p1, *p2;
+
+    /*
+     * Only 16-bit codes supported at this time.
+     */
+    if (ucs > 0xffff) {
+	/*
+	 * U+FFFD:  REPLACEMENT CHARACTER.
+	 */
+	ucs = UCS_REPL;
+    } else if (ucs < 0x20 || ucs >= 0xfffe) {
+	/*
+	 * Not a printable character.
+	 */
+	return ucError;
+    } else if (ucs == 0xfeff || (ucs >= 0x200b && ucs <= 0x200f)) {
+	/*
+	 * Zero-width space.
+	 */
+	return ucZeroWidth;
+    } else if ((ucs & ~UNI_DIRECT_MASK) == UNI_DIRECT_BASE) {
+	/*
+	 * UNI_DIRECT_BASE indicates the start of the region in the
+	 * User Zone which always has a 1:1 mapping to the currently
+	 * loaded font.  The UNI_DIRECT_MASK indicates the bit span
+	 * of the region.
+	 */
+	return (ucs & UNI_DIRECT_MASK);
+    }
+
+    if (usedefault) {
+	if (!unidefault_contents_valid)
+	    return ucInvalidHash;
+	p1 = unidefault_pagedir[ucs >> 11];
+    } else {
+	if (!hashtable_contents_valid)
+	    return ucInvalidHash;
+	p1 = uni_pagedir[ucs >> 11];
+    }
+
+    if (p1 &&
+	(p2 = p1[(ucs >> 6) & 0x1f]) &&
+	(h = p2[ucs & 0x3f]) < MAX_GLYPH) {
+	return h;
+    }
+
+    /*
+     * Not found.
+     */
+    return ucNotFound;
+}
+
+/*
+ * Note:  contents of outbuf is not changes for negative return value!
+ */
+static int conv_uni_to_str(char *outbuf,
+			   int buflen,
+			   UCode_t ucs,
+			   int usedefault)
+{
+    char *h;
+    char ***p1, **p2;
+
+    /*
+     * Only 16-bit codes supported at this time.
+     */
+    if (ucs > 0xffff) {
+	/*
+	 * U+FFFD:  REPLACEMENT CHARACTER.
+	 */
+	ucs = UCS_REPL;
+	/*
+	 * Maybe the following two cases should be allowed here??  - KW
+	 */
+    } else if (ucs < 0x20 || ucs >= 0xfffe) {
+	/*
+	 * Not a printable character.
+	 */
+	return ucError;
+    } else if (ucs == 0xfeff || (ucs >= 0x200b && ucs <= 0x200f)) {
+	/*
+	 * Zero-width space.
+	 */
+	return ucZeroWidth;
+    }
+
+    if (usedefault) {
+	if (!unidefault_str_contents_valid)
+	    return ucInvalidHash;
+	p1 = unidefault_pagedir_str[ucs >> 11];
+    } else {
+	if (!hashtable_str_contents_valid)
+	    return ucInvalidHash;
+	p1 = uni_pagedir_str[ucs >> 11];
+    }
+
+    if (p1 &&
+	(p2 = p1[(ucs >> 6) & 0x1f]) &&
+	(h = p2[ucs & 0x3f])) {
+	StrNCpy(outbuf, h, (buflen - 1));
+	return 1;		/* ok ! */
+    }
+
+    /*
+     * Not found.
+     */
+    return ucNotFound;
+}
+
+int UCInitialized = 0;
+
+/*
+ * [ original comment:  - KW ]
+ * This is called at sys_setup time, after memory and the console are
+ * initialized.  It must be possible to call kmalloc(..., GFP_KERNEL)
+ * from this function, hence the call from sys_setup.
+ */
+static void UCconsole_map_init(void)
+{
+    con_set_default_unimap();
+    UCInitialized = 1;
+}
+
+/*
+ * OK now, finally, some stuff that is more specifically for Lynx:  - KW
+ */
+int UCTransUniChar(UCode_t unicode,
+		   int charset_out)
+{
+    int rc = 0;
+    int UChndl_out;
+    int isdefault, trydefault = 0;
+    const u16 *ut;
+
+    if ((UChndl_out = LYCharSet_UC[charset_out].UChndl) < 0) {
+	if (LYCharSet_UC[charset_out].codepage < 0) {
+	    if (unicode < 128) {
+		rc = (int) unicode;
+	    } else {
+		rc = LYCharSet_UC[charset_out].codepage;
+	    }
+	    return rc;
+	}
+	if ((UChndl_out = default_UChndl) < 0) {
+	    return ucCannotOutput;
+	}
+	isdefault = 1;
+    } else {
+	isdefault = UCInfo[UChndl_out].replacedesc.isdefault;
+	trydefault = UCInfo[UChndl_out].replacedesc.trydefault;
+    }
+
+    if (!isdefault) {
+	ut = UCInfo[UChndl_out].unitable;
+	if (ut != UC_current_unitable) {
+	    rc = UC_con_set_unimap(UChndl_out, 1);
+	    if (rc < 0) {
+		return rc;
+	    }
+	}
+	rc = conv_uni_to_pc(unicode, 0);
+	if (rc >= 0) {
+	    return rc;
+	}
+    }
+    if (isdefault || trydefault) {
+	rc = conv_uni_to_pc(unicode, 1);
+	if (rc >= 0) {
+	    return rc;
+	}
+    }
+    if (!isdefault && (rc == ucNotFound)) {
+	rc = conv_uni_to_pc(UCS_REPL, 0);
+    }
+    if ((isdefault || trydefault) && (rc == ucNotFound)) {
+	rc = conv_uni_to_pc(UCS_REPL, 1);
+    }
+    return rc;
+}
+
+/*
+ * Returns string length, or negative value for error.
+ */
+int UCTransUniCharStr(char *outbuf,
+		      int buflen,
+		      UCode_t unicode,
+		      int charset_out,
+		      int chk_single_flag)
+{
+    int rc = ucUnknown, src = 0;
+    int UChndl_out;
+    int isdefault, trydefault = 0;
+    struct unimapdesc_str *repl;
+    const u16 *ut;
+
+    if (buflen < 2)
+	return ucBufferTooSmall;
+
+    if ((UChndl_out = LYCharSet_UC[charset_out].UChndl) < 0) {
+	if (LYCharSet_UC[charset_out].codepage < 0)
+	    return LYCharSet_UC[charset_out].codepage;
+	if ((UChndl_out = default_UChndl) < 0)
+	    return ucCannotOutput;
+	isdefault = 1;
+    } else {
+	isdefault = UCInfo[UChndl_out].replacedesc.isdefault;
+	trydefault = UCInfo[UChndl_out].replacedesc.trydefault;
+    }
+
+    if (chk_single_flag) {
+	if (!isdefault) {
+	    ut = UCInfo[UChndl_out].unitable;
+	    if (ut != UC_current_unitable) {
+		src = UC_con_set_unimap(UChndl_out, 1);
+		if (src < 0) {
+		    return src;
+		}
+	    }
+	}
+	src = conv_uni_to_pc(unicode, isdefault);
+	if (src >= 32) {
+	    outbuf[0] = (char) src;
+	    outbuf[1] = '\0';
+	    return 1;
+	}
+    }
+
+    repl = &(UCInfo[UChndl_out].replacedesc);
+    if (!isdefault) {
+	if (repl != UC_current_unitable_str) {
+	    con_clear_unimap_str(0);
+	    (void) UC_con_set_unimap_str(repl->entry_ct, repl->entries, 0);
+	    UC_current_unitable_str = repl;
+	}
+	rc = conv_uni_to_str(outbuf, buflen, unicode, 0);
+	if (rc >= 0)
+	    return (int) strlen(outbuf);
+    }
+    if (trydefault && chk_single_flag) {
+	src = conv_uni_to_pc(unicode, 1);
+	if (src >= 32) {
+	    outbuf[0] = (char) src;
+	    outbuf[1] = '\0';
+	    return 1;
+	}
+    }
+    if (isdefault || trydefault) {
+#ifdef USE_JAPANESEUTF8_SUPPORT
+	if (LYCharSet_UC[charset_out].codepage == 0 &&
+	    LYCharSet_UC[charset_out].codepoints == 0) {
+	    iconv_t cd;
+	    char str[3], *pin, *pout;
+	    size_t inleft, outleft;
+	    char *tocode = NULL;
+
+	    str[0] = (char) (unicode >> 8);
+	    str[1] = (char) (unicode & 0xFF);
+	    str[2] = 0;
+	    pin = str;
+	    inleft = 2;
+	    pout = outbuf;
+	    outleft = (size_t) buflen;
+	    /*
+	     * Try TRANSLIT first, since it is an extension which can provide
+	     * translations when there is no available exact translation to
+	     * the target character set.
+	     */
+	    HTSprintf0(&tocode, "%s//TRANSLIT", LYCharSet_UC[charset_out].MIMEname);
+	    cd = iconv_open(tocode, "UTF-16BE");
+	    if (cd == (iconv_t) -1) {
+		/*
+		 * Try again, without TRANSLIT
+		 */
+		HTSprintf0(&tocode, "%s", LYCharSet_UC[charset_out].MIMEname);
+		cd = iconv_open(tocode, "UTF-16BE");
+
+		if (cd == (iconv_t) -1) {
+		    CTRACE((tfp,
+			    "Warning: Cannot transcode form charset %s to %s!\n",
+			    "UTF-16BE", tocode));
+		}
+	    }
+	    FREE(tocode);
+
+	    if (cd != (iconv_t) -1) {
+		rc = (int) iconv(cd, (ICONV_CONST char **) &pin, &inleft,
+				 &pout, &outleft);
+		iconv_close(cd);
+		if ((pout - outbuf) == 3) {
+		    CTRACE((tfp,
+			    "It seems to be a JIS X 0201 code(%" PRI_UCode_t
+			    "). Not supported.\n", CAST_UCode_t (unicode)));
+		    pin = str;
+		    inleft = 2;
+		    pout = outbuf;
+		    outleft = (size_t) buflen;
+		} else if (rc >= 0) {
+		    *pout = '\0';
+		    return (int) strlen(outbuf);
+		}
+	    }
+	}
+#endif
+	rc = conv_uni_to_str(outbuf, buflen, unicode, 1);
+	if (rc >= 0)
+	    return (int) strlen(outbuf);
+    }
+    if (rc == ucNotFound) {
+	if (!isdefault)
+	    rc = conv_uni_to_str(outbuf, buflen, UCS_REPL, 0);
+	if ((rc == ucNotFound) && (isdefault || trydefault))
+	    rc = conv_uni_to_str(outbuf, buflen, UCS_REPL, 1);
+	if (rc >= 0)
+	    return (int) strlen(outbuf);
+    }
+    if (chk_single_flag && src == ucNotFound) {
+	if (!isdefault)
+	    rc = conv_uni_to_pc(UCS_REPL, 0);
+	if ((rc == ucNotFound) && (isdefault || trydefault))
+	    rc = conv_uni_to_pc(UCS_REPL, 1);
+	if (rc >= 32) {
+	    outbuf[0] = (char) rc;
+	    outbuf[1] = '\0';
+	    return 1;
+	}
+	return rc;
+    }
+    return ucNotFound;
+}
+
+static int UC_lastautoGN = 0;
+
+static int UC_MapGN(int UChndl,
+		    int update_flag)
+{
+    int i, Gn, found, lasthndl;
+
+    found = 0;
+    Gn = -1;
+    for (i = 0; i < 4 && Gn < 0; i++) {
+	if (UC_GNhandles[i] < 0) {
+	    Gn = i;
+	} else if (UC_GNhandles[i] == UChndl) {
+	    Gn = i;
+	    found = 1;
+	}
+    }
+    if (found)
+	return Gn;
+    if (Gn >= 0) {
+	UCInfo[UChndl].GN = Gn;
+	UC_GNhandles[Gn] = UChndl;
+    } else {
+	if (UC_lastautoGN == GRAF_MAP) {
+	    Gn = IBMPC_MAP;
+	} else {
+	    Gn = GRAF_MAP;
+	}
+	UC_lastautoGN = Gn;
+	lasthndl = UC_GNhandles[Gn];
+	UCInfo[lasthndl].GN = -1;
+	UCInfo[UChndl].GN = Gn;
+	UC_GNhandles[Gn] = UChndl;
+    }
+    CTRACE((tfp, "UC_MapGN: Using %d <- %d (%s)\n",
+	    Gn, UChndl, UCInfo[UChndl].MIMEname));
+    UC_con_set_trans(UChndl, Gn, update_flag);
+    return Gn;
+}
+
+int UCTransChar(int ch_in,
+		int charset_in,
+		int charset_out)
+{
+    UCode_t unicode;
+    int Gn;
+    int rc = ucNotFound;
+    int UChndl_in, UChndl_out;
+    int isdefault, trydefault = 0;
+    const u16 *ut;
+    int upd = 0;
+
+    if (charset_in == charset_out)
+	return UCH(ch_in);
+    if (charset_in < 0)
+	return ucCannotConvert;
+    if ((UChndl_in = LYCharSet_UC[charset_in].UChndl) < 0)
+	return ucCannotConvert;
+    if ((UChndl_out = LYCharSet_UC[charset_out].UChndl) < 0) {
+	if (LYCharSet_UC[charset_out].codepage < 0)
+	    return LYCharSet_UC[charset_out].codepage;
+	if ((UChndl_out = default_UChndl) < 0)
+	    return ucCannotOutput;
+	isdefault = 1;
+    } else {
+	isdefault = UCInfo[UChndl_out].replacedesc.isdefault;
+	trydefault = UCInfo[UChndl_out].replacedesc.trydefault;
+    }
+    if (!UCInfo[UChndl_in].num_uni)
+	return ucCannotConvert;
+    if ((Gn = UCInfo[UChndl_in].GN) < 0) {
+	Gn = UC_MapGN(UChndl_in, 0);
+	upd = 1;
+    }
+
+    ut = UCInfo[UChndl_out].unitable;
+    if (!isdefault) {
+	if (ut == UC_current_unitable) {
+	    if (upd) {
+		set_inverse_transl(Gn);
+	    }
+	} else {
+	    rc = UC_con_set_unimap(UChndl_out, 1);
+	    if (rc > 0) {
+		set_inverse_transl(Gn);
+	    } else if (rc < 0) {
+		return rc;
+	    }
+	}
+    }
+    UC_translate = set_translate(Gn);
+    unicode = UC_translate[UCH(ch_in)];
+    if (!isdefault) {
+	rc = conv_uni_to_pc(unicode, 0);
+	if (rc >= 0)
+	    return rc;
+    }
+    if ((rc == ucNotFound) && (isdefault || trydefault)) {
+	rc = conv_uni_to_pc(unicode, 1);
+    }
+    if ((rc == ucNotFound) && !isdefault) {
+	rc = conv_uni_to_pc(UCS_REPL, 0);
+    }
+    if ((rc == ucNotFound) && (isdefault || trydefault)) {
+	rc = conv_uni_to_pc(UCS_REPL, 1);
+    }
+    return rc;
+}
+
+#if defined(USE_JAPANESEUTF8_SUPPORT) || defined(EXP_CHINESEUTF8_SUPPORT)
+UCode_t UCTransJPToUni(char *inbuf,
+		       int buflen,
+		       int charset_in)
+{
+    char outbuf[3], *pin, *pout;
+    size_t ilen, olen;
+    iconv_t cd;
+
+    pin = inbuf;
+    pout = outbuf;
+    ilen = 2;
+    olen = (size_t) buflen;
+
+    cd = iconv_open("UTF-16BE", LYCharSet_UC[charset_in].MIMEname);
+    (void) iconv(cd, (ICONV_CONST char **) &pin, &ilen, &pout, &olen);
+    iconv_close(cd);
+    if ((ilen == 0) && (olen == 0)) {
+	return (((unsigned char) outbuf[0]) << 8) + (unsigned char) outbuf[1];
+    }
+    return ucCannotConvert;
+}
+#endif
+
+/*
+ * Translate a character to Unicode.  If additional bytes are needed, this
+ * returns ucNeedMore, based on its internal state.  To reset the state,
+ * call this with charset_in < 0.
+ */
+UCode_t UCTransToUni(int ch_in,
+		     int charset_in)
+{
+    static char buffer[10];
+    static unsigned inx = 0;
+
+    UCode_t unicode;
+    int Gn;
+    unsigned char ch_iu = UCH(ch_in);
+    int UChndl_in;
+
+    /*
+     * Reset saved-state.
+     */
+    if (charset_in < 0) {
+	inx = 0;
+	return ucCannotConvert;
+    } else if (charset_in == LATIN1) {
+	return ch_iu;
+    } else if (charset_in == UTF8_handle) {
+	if (is8bits(ch_iu)) {
+	    unsigned need;
+	    const char *ptr;
+
+	    buffer[inx++] = (char) ch_iu;
+	    buffer[inx] = '\0';
+	    need = (unsigned) utf8_length(TRUE, buffer);
+	    if (need && (need + 1) == inx) {
+		inx = 0;
+		ptr = buffer;
+		return UCGetUniFromUtf8String(&ptr);
+	    } else if (inx < sizeof(buffer) - 1) {
+		return ucNeedMore;
+	    } else {
+		inx = 0;
+	    }
+	} else {
+	    inx = 0;
+	}
+    }
+#ifdef USE_JAPANESEUTF8_SUPPORT
+    if ((strcmp(LYCharSet_UC[charset_in].MIMEname, "shift_jis") == 0) ||
+	(strcmp(LYCharSet_UC[charset_in].MIMEname, "euc-jp") == 0)) {
+	char obuffer[3], *pin, *pout;
+	size_t ilen, olen;
+	iconv_t cd;
+
+	pin = buffer;
+	pout = obuffer;
+	ilen = olen = 2;
+	if (strcmp(LYCharSet_UC[charset_in].MIMEname, "shift_jis") == 0) {
+	    if (inx == 0) {
+		if (IS_SJIS_HI1(ch_iu) ||
+		    IS_SJIS_HI2(ch_iu)) {
+		    buffer[0] = (char) ch_in;
+		    inx = 1;
+		    return ucNeedMore;
+		} else if (IS_SJIS_X0201KANA(ch_iu)) {
+		    buffer[0] = (char) ch_in;
+		    buffer[1] = 0;
+		    cd = iconv_open("UTF-16BE", "Shift_JIS");
+		    ilen = 1;
+		    (void) iconv(cd, (ICONV_CONST char **) &pin, &ilen, &pout, &olen);
+		    iconv_close(cd);
+		    if ((ilen == 0) && (olen == 0)) {
+			return (UCH(obuffer[0]) << 8) + UCH(obuffer[1]);
+		    }
+		}
+	    } else {
+		if (IS_SJIS_LO(ch_iu)) {
+		    buffer[1] = (char) ch_in;
+		    buffer[2] = 0;
+
+		    cd = iconv_open("UTF-16BE", "Shift_JIS");
+		    (void) iconv(cd, (ICONV_CONST char **) &pin, &ilen, &pout, &olen);
+		    iconv_close(cd);
+		    inx = 0;
+		    if ((ilen == 0) && (olen == 0)) {
+			return (UCH(obuffer[0]) << 8) + UCH(obuffer[1]);
+		    }
+		}
+	    }
+	}
+	if (strcmp(LYCharSet_UC[charset_in].MIMEname, "euc-jp") == 0) {
+	    if (inx == 0) {
+		if (IS_EUC_HI(ch_iu) || ch_iu == 0x8E) {
+		    buffer[0] = (char) ch_in;
+		    inx = 1;
+		    return ucNeedMore;
+		}
+	    } else {
+		if (IS_EUC_LOX(ch_iu)) {
+		    buffer[1] = (char) ch_in;
+		    buffer[2] = 0;
+
+		    cd = iconv_open("UTF-16BE", "EUC-JP");
+		    (void) iconv(cd, (ICONV_CONST char **) &pin, &ilen, &pout, &olen);
+		    iconv_close(cd);
+		    inx = 0;
+		    if ((ilen == 0) && (olen == 0)) {
+			return (UCH(obuffer[0]) << 8) + UCH(obuffer[1]);
+		    }
+		}
+	    }
+	}
+	inx = 0;
+    }
+#endif
+    if (ch_iu < 128 && ch_iu >= 32)
+	return ch_iu;
+
+    if (ch_iu < 32 &&
+	LYCharSet_UC[charset_in].enc != UCT_ENC_8BIT_C0) {
+	/*
+	 * Don't translate C0 chars except for specific charsets.
+	 */
+	return ch_iu;
+    } else if ((UChndl_in = LYCharSet_UC[charset_in].UChndl) < 0) {
+	return ucCannotConvert;
+    } else if (!UCInfo[UChndl_in].num_uni) {
+	return ucCannotConvert;
+    }
+
+    if ((Gn = UCInfo[UChndl_in].GN) < 0) {
+	Gn = UC_MapGN(UChndl_in, 1);
+    }
+
+    UC_translate = set_translate(Gn);
+    unicode = UC_translate[ch_iu];
+
+    return unicode;
+}
+
+int UCReverseTransChar(int ch_out,
+		       int charset_in,
+		       int charset_out)
+{
+    int Gn;
+    int rc = ucError;
+    int UChndl_in, UChndl_out;
+    int isdefault;
+    int i_ch = UCH(ch_out);
+    const u16 *ut;
+
+    if (charset_in == charset_out)
+	return UCH(ch_out);
+    if (charset_in < 0)
+	return ucCannotConvert;
+    if ((UChndl_in = LYCharSet_UC[charset_in].UChndl) < 0)
+	return ucCannotConvert;
+    if (!UCInfo[UChndl_in].num_uni)
+	return ucCannotConvert;
+    if (charset_out < 0)
+	return ucCannotOutput;
+    if ((UChndl_out = LYCharSet_UC[charset_out].UChndl) < 0) {
+	if (LYCharSet_UC[charset_out].codepage < 0)
+	    return LYCharSet_UC[charset_out].codepage;
+	if ((UChndl_out = default_UChndl) < 0)
+	    return ucCannotOutput;
+	isdefault = 1;
+    } else {
+	isdefault = UCInfo[UChndl_out].replacedesc.isdefault;
+    }
+
+    if (!isdefault) {
+	/*
+	 * Try to use the inverse table if charset_out is not equivalent
+	 * to using just the default table.  If it is, it should have
+	 * just ASCII chars and trying to back-translate those should
+	 * not give anything but themselves.  - kw
+	 */
+	ut = UCInfo[UChndl_out].unitable;
+	if (ut == UC_current_unitable) {
+	    if ((Gn = UCInfo[UChndl_in].GN) < 0) {
+		Gn = UC_MapGN(UChndl_in, 1);
+	    }
+	    UC_translate = set_translate(Gn);
+	    if (inv_translate)
+		rc = inv_translate[i_ch];
+	    if (rc >= 32) {
+		return rc;
+	    }
+	}
+    }
+    return UCTransChar(ch_out, charset_out, charset_in);
+}
+
+/*
+ * Returns string length, or negative value for error.
+ */
+int UCTransCharStr(char *outbuf,
+		   int buflen,
+		   int ch_in,
+		   int charset_in,
+		   int charset_out,
+		   int chk_single_flag)
+{
+    UCode_t unicode;
+    int Gn;
+    int rc = ucUnknown, src = 0;
+    int UChndl_in, UChndl_out;
+    int isdefault, trydefault = 0;
+    struct unimapdesc_str *repl;
+    const u16 *ut;
+    int upd = 0;
+
+    if (buflen < 2)
+	return ucBufferTooSmall;
+    if (chk_single_flag && charset_in == charset_out) {
+	outbuf[0] = (char) ch_in;
+	outbuf[1] = '\0';
+	return 1;
+    }
+    if (charset_in < 0)
+	return ucCannotConvert;
+    if ((UChndl_in = LYCharSet_UC[charset_in].UChndl) < 0)
+	return ucCannotConvert;
+    if (!UCInfo[UChndl_in].num_uni)
+	return ucCannotConvert;
+    if ((UChndl_out = LYCharSet_UC[charset_out].UChndl) < 0) {
+	if (LYCharSet_UC[charset_out].codepage < 0)
+	    return LYCharSet_UC[charset_out].codepage;
+	if ((UChndl_out = default_UChndl) < 0)
+	    return ucCannotOutput;
+	isdefault = 1;
+    } else {
+	isdefault = UCInfo[UChndl_out].replacedesc.isdefault;
+	trydefault = UCInfo[UChndl_out].replacedesc.trydefault;
+    }
+    if ((Gn = UCInfo[UChndl_in].GN) < 0) {
+	Gn = UC_MapGN(UChndl_in, !chk_single_flag);
+	upd = chk_single_flag;
+    }
+
+    UC_translate = set_translate(Gn);
+    unicode = UC_translate[UCH(ch_in)];
+
+    if (chk_single_flag) {
+	if (!isdefault) {
+	    ut = UCInfo[UChndl_out].unitable;
+	    if (ut == UC_current_unitable) {
+		if (upd)
+		    set_inverse_transl(Gn);
+	    } else {
+		src = UC_con_set_unimap(UChndl_out, 1);
+		if (src > 0) {
+		    set_inverse_transl(Gn);
+		} else if (src < 0) {
+		    return src;
+		}
+	    }
+	}
+	src = conv_uni_to_pc(unicode, isdefault);
+	if (src >= 32) {
+	    outbuf[0] = (char) src;
+	    outbuf[1] = '\0';
+	    return 1;
+	}
+    }
+
+    repl = &(UCInfo[UChndl_out].replacedesc);
+    if (!isdefault) {
+	if (repl != UC_current_unitable_str) {
+	    con_clear_unimap_str(0);
+	    (void) UC_con_set_unimap_str(repl->entry_ct, repl->entries, 0);
+	    UC_current_unitable_str = repl;
+	}
+	rc = conv_uni_to_str(outbuf, buflen, unicode, 0);
+	if (rc >= 0)
+	    return (int) strlen(outbuf);
+    }
+    if (trydefault && chk_single_flag) {
+	src = conv_uni_to_pc(unicode, 1);
+	if (src >= 32) {
+	    outbuf[0] = (char) src;
+	    outbuf[1] = '\0';
+	    return 1;
+	}
+    }
+    if (isdefault || trydefault) {
+	rc = conv_uni_to_str(outbuf, buflen, unicode, 1);
+	if (rc >= 0)
+	    return (int) strlen(outbuf);
+    }
+    if (rc == ucNotFound) {
+	if (!isdefault)
+	    rc = conv_uni_to_str(outbuf, buflen, UCS_REPL, 0);
+	if ((rc == ucNotFound) && (isdefault || trydefault))
+	    rc = conv_uni_to_str(outbuf, buflen, UCS_REPL, 1);
+	if (rc >= 0)
+	    return (int) strlen(outbuf);
+    }
+    if (chk_single_flag && src == ucNotFound) {
+	if (!isdefault)
+	    rc = conv_uni_to_pc(UCS_REPL, 0);
+	if ((rc == ucNotFound) && (isdefault || trydefault))
+	    rc = conv_uni_to_pc(UCS_REPL, 1);
+	if (rc >= 32) {
+	    outbuf[0] = (char) rc;
+	    outbuf[1] = '\0';
+	    return 1;
+	} else if (rc <= 0) {
+	    outbuf[0] = '\0';
+	    return rc;
+	}
+	return rc;
+    }
+    return ucNotFound;
+}
+
+static int UC_FindGN_byMIME(const char *UC_MIMEcharset)
+{
+    int i;
+
+    for (i = 0; i < 4; i++) {
+	if (!strcmp(UC_MIMEcharset, UC_GNsetMIMEnames[i])) {
+	    return i;
+	}
+    }
+    return ucError;
+}
+
+int UCGetRawUniMode_byLYhndl(int i)
+{
+    if (i < 0)
+	return 0;
+    return LYCharSet_UC[i].enc;
+}
+
+/*
+ * Construct a new charset name, given prefix and codepage.  This introduces
+ * potentially unchecked recursion into UCGetLYhntl_byMIME if neither the "cp"
+ * nor "windows-" prefixes are configured, so we check it here.
+ */
+static int getLYhndl_byCP(const char *prefix,
+			  const char *codepage)
+{
+    static int nested;
+    int result = ucError;
+
+    if (!nested++) {
+	char *cptmp = NULL;
+
+	StrAllocCopy(cptmp, prefix);
+	StrAllocCat(cptmp, codepage);
+	result = UCGetLYhndl_byMIME(cptmp);
+	FREE(cptmp);
+    }
+    nested--;
+    return result;
+}
+
+/*
+ * Get Lynx internal charset handler from MIME name,
+ * return -1 if we got NULL or did not recognize value.
+ * According to RFC, MIME headers should match case-insensitively.
+ */
+int UCGetLYhndl_byMIME(const char *value)
+{
+    int i;
+    int LYhndl = -1;
+
+    if (isEmpty(value)) {
+	CTRACE((tfp,
+		"UCGetLYhndl_byMIME: NULL argument instead of MIME name.\n"));
+	return ucError;
+    }
+
+    for (i = 0;
+	 (i < MAXCHARSETS && i < LYNumCharsets &&
+	  LYchar_set_names[i]); i++) {
+	if (LYCharSet_UC[i].MIMEname &&
+	    !strcasecomp(value, LYCharSet_UC[i].MIMEname)) {
+	    return i;
+	}
+    }
+
+    /*
+     * Not yet found, try synonyms.  - FM
+     */
+#if !NO_CHARSET_utf_8
+    if (!strcasecomp(value, "unicode-1-1-utf-8") ||
+	!strcasecomp(value, "utf8")) {
+	/*
+	 * Treat these as synonyms for the IANA registered name.  - FM
+	 */
+	return UCGetLYhndl_byMIME("utf-8");
+    }
+#endif
+    if (!strncasecomp(value, "iso", 3) && !StrNCmp(value + 3, "8859", 4)) {
+	return getLYhndl_byCP("iso-", value + 3);
+    }
+    if (!strcasecomp(value, "iso-8859-8-i") ||
+	!strcasecomp(value, "iso-8859-8-e")) {
+	return UCGetLYhndl_byMIME("iso-8859-8");
+    }
+#if !NO_CHARSET_euc_jp
+    if (!strcasecomp(value, "x-euc-jp") ||
+	!strcasecomp(value, "eucjp")) {
+	return UCGetLYhndl_byMIME("euc-jp");
+    }
+#endif
+#if !NO_CHARSET_shift_jis
+    if ((!strcasecomp(value, "x-shift-jis")) ||
+	(!strcasecomp(value, "x-sjis")) ||
+	(!strcasecomp(value, "pck"))) {
+	return UCGetLYhndl_byMIME("shift_jis");
+    }
+#endif
+#if !NO_CHARSET_euc_kr
+    if ((!strcasecomp(value, "iso-2022-kr")) ||
+	(!strcasecomp(value, "ks_c_5601-1987"))) {
+	return UCGetLYhndl_byMIME("euc-kr");
+    }
+#endif
+#if !NO_CHARSET_euc_cn
+    if (!strcasecomp(value, "gb2312") ||
+	!strncasecomp(value, "cn-gb", 5) ||
+	!strcasecomp(value, "iso-2022-cn")) {
+	return UCGetLYhndl_byMIME("euc-cn");
+    }
+#endif
+#if !NO_CHARSET_big5
+    if (!strcasecomp(value, "cn-big5")) {
+	return UCGetLYhndl_byMIME("big5");
+    }
+#endif
+#if !NO_CHARSET_macintosh
+    if (!strcasecomp(value, "x-mac-roman") ||
+	!strcasecomp(value, "mac-roman")) {
+	return UCGetLYhndl_byMIME("macintosh");
+    }
+#endif
+#if !NO_CHARSET_next
+    if (!strcasecomp(value, "x-next") ||
+	!strcasecomp(value, "nextstep") ||
+	!strcasecomp(value, "x-nextstep")) {
+	return UCGetLYhndl_byMIME("next");
+    }
+#endif
+#if !NO_CHARSET_windows_1252
+    if (!strcasecomp(value, "iso-8859-1-windows-3.1-latin-1") ||
+	!strcasecomp(value, "cp1252") ||
+	!strcasecomp(value, "cp-1252") ||
+	!strcasecomp(value, "ibm1252") ||
+	!strcasecomp(value, "iso-8859-1-windows-3.0-latin-1")) {
+	/*
+	 * Treat these as synonyms for windows-1252, which is more
+	 * commonly used than the IANA registered name.  - FM
+	 */
+	return UCGetLYhndl_byMIME("windows-1252");
+    }
+#endif
+#if !NO_CHARSET_windows_1251
+    if (!strcasecomp(value, "ansi-1251")) {
+	return UCGetLYhndl_byMIME("windows-1251");
+    }
+#endif
+#if !NO_CHARSET_windows_1250
+    if (!strcasecomp(value, "iso-8859-2-windows-latin-2") ||
+	!strcasecomp(value, "cp1250") ||
+	!strcasecomp(value, "cp-1250") ||
+	!strcasecomp(value, "ibm1250")) {
+	/*
+	 * Treat these as synonyms for windows-1250.  - FM
+	 */
+	return UCGetLYhndl_byMIME("windows-1250");
+    }
+#endif
+    if ((!strncasecomp(value, "ibm", 3) ||
+	 !strncasecomp(value, "cp-", 3)) &&
+	isdigit(UCH(value[3])) &&
+	isdigit(UCH(value[4])) &&
+	isdigit(UCH(value[5]))) {
+	/*
+	 * For "ibmNNN<...>" or "cp-NNN", try "cpNNN<...>"
+	 * if not yet found.  - KW & FM
+	 */
+	if ((LYhndl = getLYhndl_byCP("cp", value + 3)) >= 0)
+	    return LYhndl;
+	/*
+	 * Try windows-NNN<...> if not yet found.  - FM
+	 */
+	return getLYhndl_byCP("windows-", value + 3);
+    }
+    if (!strncasecomp(value, "windows-", 8) &&
+	isdigit(UCH(value[8])) &&
+	isdigit(UCH(value[9])) &&
+	isdigit(UCH(value[10]))) {
+	/*
+	 * For "windows-NNN<...>", try "cpNNN<...>" - FM
+	 */
+	return getLYhndl_byCP("cp", value + 8);
+    }
+#if !NO_CHARSET_koi8_r
+    if (!strcasecomp(value, "koi-8")) {		/* accentsoft bugosity */
+	return UCGetLYhndl_byMIME("koi8-r");
+    }
+#endif
+    if (!strcasecomp(value, "ANSI_X3.4-1968")) {
+	return US_ASCII;
+    }
+    /* no more synonyms if come here... */
+
+    CTRACE((tfp, "UCGetLYhndl_byMIME: unrecognized MIME name \"%s\"\n", value));
+    return ucError;		/* returns -1 if no charset found by that MIME name */
+}
+
+/*
+ * Function UC_setup_LYCharSets_repl() tries to set up a subtable in
+ * LYCharSets[] appropriate for this new charset, for compatibility with the
+ * "old method".  Maybe not nice (maybe not even necessary any more), but it
+ * works (as far as it goes..).
+ *
+ * We try to be conservative and only allocate new memory for this if needed.
+ * If not needed, just point to SevenBitApproximations[i].  [Could do the same
+ * for ISO_Latin1[] if it's identical to that, but would make it even *more*
+ * messy than it already is...] This the only function in this file that knows,
+ * or cares, about the HTMLDTD or details of LYCharSets[] subtables (and
+ * therefore somewhat violates the idea that this file should be independent of
+ * those).  As in other places, we rely on ISO_Latin1 being the *first* table
+ * in LYCharSets.  - KW
+ */
+
+/*
+ * We need to remember which ones were allocated and which are static.
+ */
+static STRING2PTR remember_allocated_LYCharSets[MAXCHARSETS];
+
+static void UCreset_allocated_LYCharSets(void)
+{
+    int i = 0;
+
+    for (; i < MAXCHARSETS; i++) {
+	remember_allocated_LYCharSets[i] = NULL;
+    }
+}
+
+#ifdef LY_FIND_LEAKS
+static void UCfree_allocated_LYCharSets(void)
+{
+    int i = 0;
+
+    for (; i < MAXCHARSETS; i++) {
+	if (remember_allocated_LYCharSets[i] != NULL) {
+	    FREE(remember_allocated_LYCharSets[i]);
+	}
+    }
+}
+#endif
+
+static STRING2PTR UC_setup_LYCharSets_repl(int UC_charset_in_hndl,
+					   unsigned lowest8)
+{
+    STRING2PTR ISO_Latin1 = LYCharSets[0];
+    const char **p;
+    char **prepl;
+    const u16 *pp;
+    const char **tp;
+    const char *s7;
+    const char *s8;
+    size_t i;
+    int j, changed;
+    u16 k;
+    u8 *ti;
+
+    /*
+     * Create a temporary table for reverse lookup of latin1 codes:
+     */
+    if ((tp = typecallocn(const char *, 96)) == NULL)
+	  return NULL;
+
+    if ((ti = typecallocn(u8, 96)) == NULL) {
+	FREE(tp);
+	return NULL;
+    }
+
+    pp = UCInfo[UC_charset_in_hndl].unitable;
+
+    /*
+     * Determine if we have any mapping of a Unicode in the range 160-255
+     * to an allowed code point > 0x80 in our new charset...
+     * Store any mappings found in ti[].
+     */
+    if (UCInfo[UC_charset_in_hndl].num_uni > 0) {
+	for (i = 0; i < 256; i++) {
+	    if ((j = UCInfo[UC_charset_in_hndl].unicount[i])) {
+		if ((k = *pp) >= 160 && k < 256 && i >= lowest8) {
+		    ti[k - 160] = UCH(i);
+		}
+		for (; j; j--) {
+		    pp++;
+		}
+	    }
+	}
+    } {
+	u16 ct;
+	struct unipair_str *list;
+
+	/*
+	 * Determine if we have any mapping of a Unicode in the range
+	 * 160-255 to a replacement string for our new charset...
+	 * Store any mappings found in tp[].
+	 */
+	ct = UCInfo[UC_charset_in_hndl].replacedesc.entry_ct;
+	list = UCInfo[UC_charset_in_hndl].replacedesc.entries;
+	while (ct--) {
+	    if ((k = list->unicode) >= 160 && k < 256) {
+		tp[k - 160] = list->replace_str;
+	    }
+	    list++;
+	}
+    }
+    /*
+     * Now allocate a new table compatible with LYCharSets[]
+     * and with the HTMLDTD for entities.
+     * We don't know yet whether we'll keep it around.
+     */
+    prepl = (char **) malloc(HTML_dtd.number_of_entities * sizeof(char *));
+
+    if (!prepl) {
+	FREE(tp);
+	FREE(ti);
+	return 0;
+    }
+
+    p = (const char **) prepl;
+    changed = 0;
+    for (i = 0; i < HTML_dtd.number_of_entities; i++, p++) {
+	/*
+	 * For each of those entities, we check what the "old method"
+	 * ISO_Latin1[] mapping does with them.  If it is nothing we
+	 * want to use, just point to the SevenBitApproximations[] string.
+	 */
+	s7 = SevenBitApproximations[i];
+	s8 = ISO_Latin1[i];
+	*p = s7;
+	if (s8 && UCH(*s8) >= 160 && s8[1] == '\0') {
+	    /*
+	     * We have an entity that is mapped to
+	     * one valid eightbit latin1 char.
+	     */
+	    if (ti[UCH(*s8) - 160] >= UCH(lowest8) &&
+		!(UCH(s7[0]) == ti[UCH(*s8) - 160] &&
+		  s7[1] == '\0')) {
+		/*
+		 * ...which in turn is mapped, by our "new method",
+		 * to another valid eightbit char for this new
+		 * charset:  either to itself...
+		 */
+		if (ti[UCH(*s8) - 160] == UCH(*s8)) {
+		    *p = s8;
+		} else {
+		    /*
+		     * make those 1-char strings
+		     * into HTAtoms, so they will be cleaned up
+		     * at exit...  all for the sake of preventing
+		     * memory leaks, sigh.
+		     */
+		    static char dummy[2];	/* one char dummy string */
+
+		    dummy[0] = (char) ti[UCH(*s8) - 160];
+		    *p = HTAtom_name(HTAtom_for(dummy));
+		}
+		changed = 1;
+	    } else if (tp[UCH(*s8) - 160] &&
+		       strcmp(s7, tp[UCH(*s8) - 160])) {
+		/*
+		 * ...or which is mapped, by our "new method",
+		 * to a replacement string for this new charset.
+		 */
+		*p = tp[UCH(*s8) - 160];
+		changed = 1;
+	    }
+	}
+    }
+    FREE(tp);
+    FREE(ti);
+    if (!changed) {
+	FREE(prepl);
+	return NULL;
+    }
+    return (STRING2PTR) prepl;
+}
+
+/*
+ * "New method" meets "Old method" ...
+ */
+static int UC_Register_with_LYCharSets(int s,
+				       const char *UC_MIMEcharset,
+				       const char *UC_LYNXcharset,
+				       int lowest_eightbit)
+{
+    int i, LYhndl, found;
+    STRING2PTR repl;
+
+    LYhndl = -1;
+    if (LYNumCharsets == 0) {
+	/*
+	 * Initialize here; so whoever changes
+	 * LYCharSets.c doesn't have to count...
+	 */
+	for (i = 0; (i < MAXCHARSETS) && LYchar_set_names[i]; i++) {
+	    LYNumCharsets = i + 1;
+	}
+    }
+
+    /*
+     * Search by MIME name, (LYchar_set_names may differ...)
+     */
+    for (i = 0; i < MAXCHARSETS && LYchar_set_names[i] && LYhndl < 0; i++) {
+	if (LYCharSet_UC[i].MIMEname &&
+	    !strcmp(UC_MIMEcharset, LYCharSet_UC[i].MIMEname)) {
+	    LYhndl = i;
+	}
+    }
+
+    if (LYhndl < 0) {		/* not found */
+	found = 0;
+	if (LYNumCharsets >= MAXCHARSETS) {
+	    CTRACE((tfp,
+		    "UC_Register_with_LYCharSets: Too many.  Ignoring %s/%s.",
+		    UC_MIMEcharset, UC_LYNXcharset));
+	    return ucError;
+	}
+	/*
+	 * Add to LYCharSets.c lists.
+	 */
+	LYhndl = LYNumCharsets;
+	LYNumCharsets++;
+	LYlowest_eightbit[LYhndl] = 999;
+	LYCharSets[LYhndl] = SevenBitApproximations;
+	/*
+	 * Hmm, try to be conservative here.
+	 */
+	LYchar_set_names[LYhndl] = UC_LYNXcharset;
+	LYchar_set_names[LYhndl + 1] = NULL;
+	/*
+	 * Terminating NULL may be looked for by Lynx code.
+	 */
+    } else {
+	found = 1;
+    }
+    LYCharSet_UC[LYhndl].UChndl = s;
+    /*
+     * Can we just copy the pointer?  Hope so...
+     */
+    LYCharSet_UC[LYhndl].MIMEname = UC_MIMEcharset;
+    LYCharSet_UC[LYhndl].enc = UCInfo[s].enc;
+    LYCharSet_UC[LYhndl].codepage = UCInfo[s].codepage;
+
+    /*
+     * @@@ We really SHOULD get more info from the table files,
+     * and set relevant flags in the LYCharSet_UC[] entry with
+     * that info...  For now, let's try it without.  - KW
+     */
+    if (lowest_eightbit < LYlowest_eightbit[LYhndl]) {
+	LYlowest_eightbit[LYhndl] = lowest_eightbit;
+    } else if (lowest_eightbit > LYlowest_eightbit[LYhndl]) {
+	UCInfo[s].lowest_eight = LYlowest_eightbit[LYhndl];
+    }
+
+    if (!found && LYhndl > 0) {
+	repl = UC_setup_LYCharSets_repl(s, (unsigned) UCInfo[s].lowest_eight);
+	if (repl) {
+	    LYCharSets[LYhndl] = repl;
+	    /*
+	     * Remember to FREE at exit.
+	     */
+	    remember_allocated_LYCharSets[LYhndl] = repl;
+	}
+    }
+    return LYhndl;
+}
+
+/*
+ * This only sets up the structure - no initialization of the tables
+ * is done here yet.
+ */
+void UC_Charset_Setup(const char *UC_MIMEcharset,
+		      const char *UC_LYNXcharset,
+		      const u8 * unicount,
+		      const u16 * unitable,
+		      int nnuni,
+		      struct unimapdesc_str replacedesc,
+		      int lowest_eight,
+		      int UC_rawuni,
+		      int codepage)
+{
+    int s, Gn;
+    int i, status = 0, found;
+
+    /*
+     * Get (new?) slot.
+     */
+    found = -1;
+    for (i = 0; i < UCNumCharsets && found < 0; i++) {
+	if (!strcmp(UCInfo[i].MIMEname, UC_MIMEcharset)) {
+	    found = i;
+	}
+    }
+    if (found >= 0) {
+	s = found;
+    } else {
+	if (UCNumCharsets >= MAXCHARSETS) {
+	    CTRACE((tfp, "UC_Charset_Setup: Too many.  Ignoring %s/%s.",
+		    UC_MIMEcharset, UC_LYNXcharset));
+	    return;
+	}
+	s = UCNumCharsets;
+	UCInfo[s].MIMEname = UC_MIMEcharset;
+    }
+    UCInfo[s].LYNXname = UC_LYNXcharset;
+    UCInfo[s].unicount = unicount;
+    UCInfo[s].unitable = unitable;
+    UCInfo[s].num_uni = nnuni;
+    UCInfo[s].replacedesc = replacedesc;
+    if (replacedesc.isdefault) {
+	default_UChndl = s;
+    }
+    Gn = UC_FindGN_byMIME(UC_MIMEcharset);
+    if (Gn >= 0)
+	UC_GNhandles[Gn] = s;
+    UCInfo[s].GN = Gn;
+    if (UC_rawuni == UCT_ENC_UTF8)
+	lowest_eight = 128;	/* cheat here */
+    UCInfo[s].lowest_eight = lowest_eight;
+    UCInfo[s].enc = UC_rawuni;
+    UCInfo[s].codepage = codepage;
+    UCInfo[s].LYhndl = UC_Register_with_LYCharSets(s,
+						   UC_MIMEcharset,
+						   UC_LYNXcharset,
+						   lowest_eight);
+    CTRACE2(TRACE_CFG, (tfp, "registered charset %d mime \"%s\" lynx \"%s\"\n",
+			s, UC_MIMEcharset, UC_LYNXcharset));
+    UCInfo[s].uc_status = status;
+    if (found < 0)
+	UCNumCharsets++;
+    return;
+}
+
+/*
+ * UC_NoUctb_Register_with_LYCharSets, UC_Charset_NoUctb_Setup -
+ * Alternative functions for adding character set info to the lists
+ * kept in LYCharSets.c.
+ *
+ * These are for character sets without any real tables of their own.
+ * We don't keep an entry in UCinfo[] for them.
+ */
+static int UC_NoUctb_Register_with_LYCharSets(const char *UC_MIMEcharset,
+					      const char *UC_LYNXcharset,
+					      int lowest_eightbit,
+					      int UC_rawuni,
+					      int codepage)
+{
+    int i, LYhndl = -1;
+
+    if (LYNumCharsets == 0) {
+	/*
+	 * Initialize here; so whoever changes
+	 * LYCharSets.c doesn't have to count...
+	 */
+	for (i = 0; (i < MAXCHARSETS) && LYchar_set_names[i]; i++) {
+	    LYNumCharsets = i + 1;
+	}
+    }
+
+    /*
+     * Search by MIME name, (LYchar_set_names may differ...)
+     * ignore if already present!
+     */
+    for (i = 0; i < MAXCHARSETS && LYchar_set_names[i] && LYhndl < 0; i++) {
+	if (LYCharSet_UC[i].MIMEname &&
+	    !strcmp(UC_MIMEcharset, LYCharSet_UC[i].MIMEname)) {
+	    return ucError;
+	}
+    }
+
+    /* not found */
+    if (LYNumCharsets >= MAXCHARSETS) {
+	CTRACE((tfp,
+		"UC_NoUctb_Register_with_LYCharSets: Too many.  Ignoring %s/%s.",
+		UC_MIMEcharset, UC_LYNXcharset));
+	return ucError;
+    }
+    /*
+     * Add to LYCharSets.c lists.
+     */
+    LYhndl = LYNumCharsets;
+    LYNumCharsets++;
+    LYlowest_eightbit[LYhndl] = lowest_eightbit;
+    LYCharSets[LYhndl] = SevenBitApproximations;
+    LYchar_set_names[LYhndl] = UC_LYNXcharset;
+    LYchar_set_names[LYhndl + 1] = NULL;
+    /*
+     * Terminating NULL may be looked for by Lynx code.
+     */
+
+    LYCharSet_UC[LYhndl].UChndl = -1;	/* no corresponding UChndl ! */
+    LYCharSet_UC[LYhndl].MIMEname = UC_MIMEcharset;
+    LYCharSet_UC[LYhndl].enc = UC_rawuni;
+    LYCharSet_UC[LYhndl].codepage = codepage;
+
+    /*
+     * @@@ We really SHOULD get more info from the table files,
+     * and set relevant flags in the LYCharSet_UC[] entry with
+     * that info...  For now, let's try it without.  - KW
+     */
+
+    return LYhndl;
+}
+
+/*
+ * A wrapper for the previous function.
+ */
+static void UC_Charset_NoUctb_Setup(const char *UC_MIMEcharset,
+				    const char *UC_LYNXcharset,
+				    int trydefault,
+				    int lowest_eight,
+				    int UC_rawuni,
+				    int codepage)
+{
+    int i;
+
+    /*
+     * Ignore completely if already in slot.
+     */
+    for (i = 0; i < UCNumCharsets; i++) {
+	if (!strcmp(UCInfo[i].MIMEname, UC_MIMEcharset)) {
+	    return;
+	}
+    }
+    if (UC_rawuni == UCT_ENC_UTF8)
+	lowest_eight = 128;	/* cheat here */
+    /* 'codepage' doubles as a flag for 'do not try any table
+     * lookup, not even default' when negative.  The value will
+     * be returned immediately by UCTrans* functions.
+     */
+    if (!trydefault && codepage == 0)
+	codepage = ucCannotOutput;	/* if not already set; any negative should do. */
+    UC_NoUctb_Register_with_LYCharSets(UC_MIMEcharset,
+				       UC_LYNXcharset,
+				       lowest_eight,
+				       UC_rawuni,
+				       codepage);
+    return;
+}
+
+#ifdef LY_FIND_LEAKS
+static void UCcleanup_mem(void)
+{
+    int i;
+
+    UCfree_allocated_LYCharSets();
+    con_clear_unimap_str(0);
+    con_clear_unimap_str(1);
+    con_clear_unimap(0);
+    con_clear_unimap(1);
+    for (i = 1; i < 4; i++) {	/* first one is static! */
+	FREE(inverse_translations[i]);
+    }
+}
+#endif /* LY_FIND_LEAKS */
+
+#ifdef EXP_CHARTRANS_AUTOSWITCH
+#ifdef CAN_AUTODETECT_DISPLAY_CHARSET
+#  ifdef __EMX__
+static int CpOrdinal(const unsigned UCode_t cp, const int other)
+{
+    char lyName[80];
+    char myMimeName[80];
+    char *mimeName, *mName = NULL, *lName = NULL;
+    int s, i, exists = 0, ret;
+
+    CTRACE((tfp, "CpOrdinal(cp=%lu, other=%d).\n", cp, other));
+    sprintf(myMimeName, "auto%s-cp%lu", (other ? "2" : ""), cp);
+    mimeName = myMimeName + 5 + (other != 0);
+    sprintf(lyName, "AutoDetect%s (cp%lu)",
+	    (other ? "-2" : ""), cp);
+    /* Find slot. */
+    s = -1;
+    for (i = 0; i < UCNumCharsets; i++) {
+	if (!strcmp(UCInfo[i].LYNXname, lyName))
+	    return UCGetLYhndl_byMIME(myMimeName);
+	else if (!strcasecomp(UCInfo[i].MIMEname, mimeName))
+	    s = i;
+    }
+    if (s < 0)
+	return ucError;
+    /* Store the "real" charset info */
+    real_charsets[other != 0] = UCGetLYhndl_byMIME(mimeName);
+    /* Duplicate the record. */
+    StrAllocCopy(mName, myMimeName);
+    StrAllocCopy(lName, lyName);
+    UC_Charset_Setup(mName, lName,
+		     UCInfo[s].unicount, UCInfo[s].unitable,
+		     UCInfo[s].num_uni, UCInfo[s].replacedesc,
+		     UCInfo[s].lowest_eight, UCInfo[s].enc,
+		     UCInfo[s].codepage);
+    ret = UCGetLYhndl_byMIME(myMimeName);
+    CTRACE((tfp, "Found %i.\n", ret));
+    return ret;
+}
+#  endif /* __EMX__ */
+#endif /* CAN_AUTODETECT_DISPLAY_CHARSET */
+#endif /* EXP_CHARTRANS_AUTOSWITCH */
+
+void UCInit(void)
+{
+
+    UCreset_allocated_LYCharSets();
+#ifdef LY_FIND_LEAKS
+    atexit(UCcleanup_mem);
+#endif
+    UCconsole_map_init();
+
+    /*
+     * The order of charset names visible in Lynx Options menu correspond to
+     * the order of lines below, except the first two described in LYCharSet.c
+     *
+     * Entries whose comment is marked with *** are declared in UCdomap.h,
+     * others are based on the included tables - UCdomap.c, near the top.
+     */
+
+    UC_CHARSET_SETUP_iso_8859_1;	/* ISO Latin 1          */
+    UC_CHARSET_SETUP_iso_8859_15;	/* ISO 8859-15 (Latin 9) */
+    UC_CHARSET_SETUP_cp850;	/* DosLatin1 (cp850)    */
+    UC_CHARSET_SETUP_windows_1252;	/* WinLatin1 (cp1252)   */
+    UC_CHARSET_SETUP_cp437;	/* DosLatinUS (cp437)   */
+
+    UC_CHARSET_SETUP_dec_mcs;	/* DEC Multinational    */
+    UC_CHARSET_SETUP_macintosh;	/* Macintosh (8 bit)    */
+    UC_CHARSET_SETUP_next;	/* NeXT character set   */
+    UC_CHARSET_SETUP_hp_roman8;	/* HP Roman8            */
+
+    UC_CHARSET_SETUP_euc_cn;		  /*** Chinese		    */
+    UC_CHARSET_SETUP_euc_jp;		  /*** Japanese (EUC_JP)    */
+    UC_CHARSET_SETUP_shift_jis;		  /*** Japanese (Shift_JIS) */
+    UC_CHARSET_SETUP_euc_kr;		  /*** Korean		    */
+    UC_CHARSET_SETUP_big5;		  /*** Taipei (Big5)	    */
+
+    UC_CHARSET_SETUP_viscii;	/* Vietnamese (VISCII)  */
+    UC_CHARSET_SETUP;		/* us-ascii */ /* 7 bit approximations */
+
+    UC_CHARSET_SETUP_x_transparent;	  /*** Transparent	  */
+
+    UC_CHARSET_SETUP_iso_8859_2;	/* ISO Latin 2          */
+    UC_CHARSET_SETUP_cp852;	/* DosLatin2 (cp852)    */
+    UC_CHARSET_SETUP_windows_1250;	/* WinLatin2 (cp1250)   */
+
+    UC_CHARSET_SETUP_iso_8859_3;	/* ISO Latin 3          */
+    UC_CHARSET_SETUP_iso_8859_4;	/* ISO Latin 4          */
+    UC_CHARSET_SETUP_iso_8859_13;	/* ISO 8859-13 Baltic Rim */
+    UC_CHARSET_SETUP_cp775;	/* DosBaltRim (cp775)   */
+    UC_CHARSET_SETUP_windows_1257;	/* WinBaltRim (cp1257)  */
+    UC_CHARSET_SETUP_iso_8859_5;	/* ISO 8859-5 Cyrillic  */
+    UC_CHARSET_SETUP_cp866;	/* DosCyrillic (cp866)  */
+    UC_CHARSET_SETUP_windows_1251;	/* WinCyrillic (cp1251) */
+    UC_CHARSET_SETUP_koi8_r;	/* KOI8-R Cyrillic      */
+    UC_CHARSET_SETUP_iso_8859_6;	/* ISO 8869-6 Arabic    */
+    UC_CHARSET_SETUP_cp864;	/* DosArabic (cp864)    */
+    UC_CHARSET_SETUP_windows_1256;	/* WinArabic (cp1256)   */
+    UC_CHARSET_SETUP_iso_8859_14;	/* ISO 8859-14 Celtic   */
+    UC_CHARSET_SETUP_iso_8859_7;	/* ISO 8859-7 Greek     */
+    UC_CHARSET_SETUP_cp737;	/* DosGreek (cp737)     */
+    UC_CHARSET_SETUP_cp869;	/* DosGreek2 (cp869)    */
+    UC_CHARSET_SETUP_windows_1253;	/* WinGreek (cp1253)    */
+    UC_CHARSET_SETUP_iso_8859_8;	/* ISO 8859-8 Hebrew    */
+    UC_CHARSET_SETUP_cp862;	/* DosHebrew (cp862)    */
+    UC_CHARSET_SETUP_windows_1255;	/* WinHebrew (cp1255)   */
+    UC_CHARSET_SETUP_iso_8859_9;	/* ISO 8859-9 (Latin 5) */
+    UC_CHARSET_SETUP_cp857;	/* DosTurkish (cp857) */
+    UC_CHARSET_SETUP_iso_8859_10;	/* ISO 8859-10 North European */
+    UC_CHARSET_SETUP_iso_8859_16;	/* ISO 8859-16 (Latin 10) */
+
+    UC_CHARSET_SETUP_utf_8;		  /*** UNICODE UTF-8	  */
+    UC_CHARSET_SETUP_mnemonic_ascii_0;	/* RFC 1345 w/o Intro   */
+    UC_CHARSET_SETUP_mnemonic;	/* RFC 1345 Mnemonic    */
+    UC_CHARSET_SETUP_cp866u;	/* Ukrainian Cyrillic (866) */
+    UC_CHARSET_SETUP_koi8_u;	/* Ukrainian Cyrillic (koi8-u) */
+    UC_CHARSET_SETUP_ptcp154;	/* Cyrillic-Asian (PT154) */
+
+#ifdef EXP_CHARTRANS_AUTOSWITCH
+#ifdef CAN_AUTODETECT_DISPLAY_CHARSET
+#  ifdef __EMX__
+    {
+	unsigned UCode_t lst[3];
+	unsigned UCode_t len, rc;
+
+	rc = DosQueryCp(sizeof(lst), lst, &len);
+	if (rc == 0) {
+	    if (len >= 1)
+		auto_display_charset = CpOrdinal(lst[0], 0);
+#    ifdef CAN_SWITCH_DISPLAY_CHARSET
+	    if (len >= 3) {
+		codepages[0] = lst[0];
+		codepages[1] = (lst[0] == lst[1] ? lst[2] : lst[1]);
+		auto_other_display_charset = CpOrdinal(codepages[1], 1);
+	    }
+#    endif
+	} else {
+	    CTRACE((tfp, "DosQueryCp() returned %#lx=%lu.\n", rc, rc));
+	}
+    }
+#  endif
+#endif
+#endif
+
+/*
+ * To add synonyms for any charset name check function UCGetLYhndl_byMIME in
+ * this file.
+ */
+
+/* for coding/performance - easy to type: */
+    LATIN1 = UCGetLYhndl_byMIME("iso-8859-1");
+    US_ASCII = UCGetLYhndl_byMIME("us-ascii");
+    UTF8_handle = UCGetLYhndl_byMIME("utf-8");
+    TRANSPARENT = UCGetLYhndl_byMIME("x-transparent");
+}
+
+/*
+ * Safe variant of UCGetLYhndl_byMIME, with blind recovery from typo in user
+ * input:  lynx.cfg, userdefs.h, command line switches.
+ */
+int safeUCGetLYhndl_byMIME(const char *value)
+{
+    int i = UCGetLYhndl_byMIME(value);
+
+    if (i == -1) {		/* was user's typo or not yet recognized value */
+	i = LATIN1;		/* error recovery? */
+	CTRACE((tfp, "safeUCGetLYhndl_byMIME: ISO-8859-1 assumed.\n"));
+    }
+
+    return (i);
+}
+
+#ifdef USE_LOCALE_CHARSET
+
+#if defined(USE_LOCALE_CHARSET) && !defined(HAVE_LANGINFO_CODESET)
+/*
+ * This is a quick-and-dirty emulator of the nl_langinfo(CODESET)
+ * function defined in the Single Unix Specification for those systems
+ * (FreeBSD, etc.) that don't have one yet. It behaves as if it had
+ * been called after setlocale(LC_CTYPE, ""), that is it looks at
+ * the locale environment variables.
+ *
+ * http://www.opengroup.org/onlinepubs/7908799/xsh/langinfo.h.html
+ *
+ * Please extend it as needed and suggest improvements to the author.
+ * This emulator will hopefully become redundant soon as
+ * nl_langinfo(CODESET) becomes more widely implemented.
+ *
+ * Since the proposed Li18nux encoding name registry is still not mature,
+ * the output follows the MIME registry where possible:
+ *
+ *   http://www.iana.org/assignments/character-sets
+ *
+ * A possible autoconf test for the availability of nl_langinfo(CODESET)
+ * can be found in
+ *
+ *   http://www.cl.cam.ac.uk/~mgk25/unicode.html#activate
+ *
+ * Markus.Kuhn@cl.cam.ac.uk -- 2002-03-11
+ * Permission to use, copy, modify, and distribute this software
+ * for any purpose and without fee is hereby granted. The author
+ * disclaims all warranties with regard to this software.
+ *
+ * Latest version:
+ *
+ *   http://www.cl.cam.ac.uk/~mgk25/ucs/langinfo.c
+ */
+
+/*
+#include "langinfo.h"
+*/
+typedef int nl_item;
+
+#define CODESET 1
+
+#define C_CODESET "US-ASCII"	/* Return this as the encoding of the
+				 * C/POSIX locale. Could as well one day
+				 * become "UTF-8". */
+
+#define digit(x) ((x) >= '0' && (x) <= '9')
+
+static char buf[16];
+
+static char *nl_langinfo(nl_item item)
+{
+    char *l, *p;
+
+    if (item != CODESET)
+	return NULL;
+
+    if (((l = LYGetEnv("LC_ALL")) != 0) ||
+	((l = LYGetEnv("LC_CTYPE")) != 0) ||
+	((l = LYGetEnv("LANG")) != 0)) {
+	/* check standardized locales */
+	if (!strcmp(l, "C") || !strcmp(l, "POSIX"))
+	    return C_CODESET;
+	/* check for encoding name fragment */
+	if (strstr(l, "UTF") || strstr(l, "utf"))
+	    return "UTF-8";
+	if ((p = strstr(l, "8859-"))) {
+	    memcpy(buf, "ISO-8859-\0\0", 12);
+	    p += 5;
+	    if (digit(*p)) {
+		buf[9] = *p++;
+		if (digit(*p))
+		    buf[10] = *p++;
+		return buf;
+	    }
+	}
+	if (strstr(l, "KOI8-R"))
+	    return "KOI8-R";
+	if (strstr(l, "KOI8-U"))
+	    return "KOI8-U";
+	if (strstr(l, "620"))
+	    return "TIS-620";
+	if (strstr(l, "2312"))
+	    return "GB2312";
+	if (strstr(l, "HKSCS"))
+	    return "Big5HKSCS";	/* no MIME charset */
+	if (strstr(l, "Big5") || strstr(l, "BIG5"))
+	    return "Big5";
+	if (strstr(l, "GBK"))
+	    return "GBK";	/* no MIME charset */
+	if (strstr(l, "18030"))
+	    return "GB18030";	/* no MIME charset */
+	if (strstr(l, "Shift_JIS") || strstr(l, "SJIS"))
+	    return "Shift_JIS";
+	/* check for conclusive modifier */
+	if (strstr(l, "euro"))
+	    return "ISO-8859-15";
+	/* check for language (and perhaps country) codes */
+	if (strstr(l, "zh_TW"))
+	    return "Big5";
+	if (strstr(l, "zh_HK"))
+	    return "Big5HKSCS";	/* no MIME charset */
+	if (strstr(l, "zh"))
+	    return "GB2312";
+	if (strstr(l, "ja"))
+	    return "EUC-JP";
+	if (strstr(l, "ko"))
+	    return "EUC-KR";
+	if (strstr(l, "ru"))
+	    return "KOI8-R";
+	if (strstr(l, "uk"))
+	    return "KOI8-U";
+	if (strstr(l, "pl") || strstr(l, "hr") ||
+	    strstr(l, "hu") || strstr(l, "cs") ||
+	    strstr(l, "sk") || strstr(l, "sl"))
+	    return "ISO-8859-2";
+	if (strstr(l, "eo") || strstr(l, "mt"))
+	    return "ISO-8859-3";
+	if (strstr(l, "el"))
+	    return "ISO-8859-7";
+	if (strstr(l, "he"))
+	    return "ISO-8859-8";
+	if (strstr(l, "tr"))
+	    return "ISO-8859-9";
+	if (strstr(l, "th"))
+	    return "TIS-620";	/* or ISO-8859-11 */
+	if (strstr(l, "lt"))
+	    return "ISO-8859-13";
+	if (strstr(l, "cy"))
+	    return "ISO-8859-14";
+	if (strstr(l, "ro"))
+	    return "ISO-8859-2";	/* or ISO-8859-16 */
+	if (strstr(l, "am") || strstr(l, "vi"))
+	    return "UTF-8";
+	/* Send me further rules if you like, but don't forget that we are
+	 * *only* interested in locale naming conventions on platforms
+	 * that do not already provide an nl_langinfo(CODESET) implementation. */
+	return "ISO-8859-1";	/* should perhaps be "UTF-8" instead */
+    }
+    return C_CODESET;
+}
+#endif /* defined(USE_LOCALE_CHARSET) && !defined(HAVE_LANGINFO_CODESET) */
+
+/*
+ * If LYLocaleCharset is true, use the current locale to lookup a MIME name
+ * that corresponds, and use that as the display charset.  This feature is
+ * experimental because while nl_langinfo(CODESET) itself is standardized,
+ * the return values and their relationship to the locale value is not.
+ * GNU libiconv happens to give useful values, but other implementations are
+ * not guaranteed to do this.
+ *
+ * Not all Linux versions provide useful information.  GNU libc 2.2 returns
+ *	"ANSI_X3.4-1968"
+ * whether locale is POSIX or en_US.UTF-8.
+ *
+ * Another possible thing to investigate is the locale_charset() function
+ * provided in libiconv 1.5.1.
+ */
+void LYFindLocaleCharset(void)
+{
+    char *name;
+
+    CTRACE((tfp, "LYFindLocaleCharset(%d)\n", LYLocaleCharset));
+    name = nl_langinfo(CODESET);
+
+    if (name != 0) {
+	int value = UCGetLYhndl_byMIME(name);
+
+	if (value >= 0) {
+	    linedrawing_char_set = value;
+	    CTRACE((tfp, "Found name \"%s\" -> %d\n", name, value));
+	    /*
+	     * If no locale was set, we will get the POSIX character set, which
+	     * in Lynx is treated as US-ASCII.  However, Lynx's longstanding
+	     * behavior has been to default to ISO-8859-1.  So we treat that
+	     * encoding specially.  Otherwise, if LOCALE_CHARSET is set, then
+	     * we will use the locale encoding -- unless overridden by the
+	     * ASSUME_CHARSET value and/or command-line option.
+	     */
+	    if (LYLocaleCharset) {
+		CTRACE((tfp, "...prior LocaleCharset '%s'\n", NonNull(UCAssume_MIMEcharset)));
+		if (value == US_ASCII) {
+		    CTRACE((tfp, "...prefer existing charset to ASCII\n"));
+		} else if (assumed_charset) {
+		    CTRACE((tfp, "...already assumed-charset\n"));
+		} else {
+		    current_char_set = linedrawing_char_set;
+		    UCLYhndl_for_unspec = current_char_set;
+		    StrAllocCopy(UCAssume_MIMEcharset, name);
+		    CTRACE((tfp, "...using LocaleCharset '%s'\n", NonNull(UCAssume_MIMEcharset)));
+		}
+	    }
+	} else {
+	    CTRACE((tfp, "Cannot find a handle for MIME name \"%s\"\n", name));
+	}
+    } else {
+	CTRACE((tfp, "Cannot find a MIME name for locale\n"));
+    }
+}
+#endif /* USE_LOCALE_CHARSET */
+
+BOOL UCScanCode(UCode_t *target, const char *source, BOOL isHex)
+{
+    BOOL status = FALSE;
+    long lcode;
+    char *endptr;
+
+    errno = 0;
+    *target = 0;
+    lcode = strtol(source, &endptr, isHex ? 16 : 10);
+    if (lcode >= 0
+	&& (endptr > source)
+#if defined(ERANGE) && defined(LONG_MAX) && defined(LONG_MIN)
+	&& (errno != ERANGE || (lcode != LONG_MAX && lcode != LONG_MIN))
+#else
+	&& (endptr - source) < (isHex ? 8 : 10)
+#endif
+	&& (endptr != 0)
+	&& (*endptr == '\0')) {
+	*target = (UCode_t) lcode;
+	status = TRUE;
+    }
+    return status;
+}