summaryrefslogtreecommitdiffstats
path: root/src/LYCharSets.c
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 16:37:15 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 16:37:15 +0000
commitae5d181b854d3ccb373b6bc01b4869e44ff4d87a (patch)
tree91f59efb48c56a84cc798e012fccb667b63d3fee /src/LYCharSets.c
parentInitial commit. (diff)
downloadlynx-ae5d181b854d3ccb373b6bc01b4869e44ff4d87a.tar.xz
lynx-ae5d181b854d3ccb373b6bc01b4869e44ff4d87a.zip
Adding upstream version 2.9.0dev.12.upstream/2.9.0dev.12upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/LYCharSets.c')
-rw-r--r--src/LYCharSets.c1157
1 files changed, 1157 insertions, 0 deletions
diff --git a/src/LYCharSets.c b/src/LYCharSets.c
new file mode 100644
index 0000000..94b7a04
--- /dev/null
+++ b/src/LYCharSets.c
@@ -0,0 +1,1157 @@
+/*
+ * $LynxId: LYCharSets.c,v 1.71 2021/06/29 22:01:12 tom Exp $
+ */
+#include <HTUtils.h>
+#include <HTCJK.h>
+#include <HTMLDTD.h>
+
+#include <LYGlobalDefs.h>
+#include <UCMap.h>
+#include <UCdomap.h>
+#include <UCDefs.h>
+#include <LYCharSets.h>
+#include <GridText.h>
+#include <LYCurses.h>
+#include <LYStrings.h>
+
+#include <LYLeaks.h>
+
+HTkcode kanji_code = NOKANJI;
+BOOLEAN LYHaveCJKCharacterSet = FALSE;
+BOOLEAN DisplayCharsetMatchLocale = TRUE;
+BOOL force_old_UCLYhndl_on_reload = FALSE;
+int forced_UCLYhdnl;
+int LYNumCharsets = 0; /* Will be initialized later by UC_Register. */
+int current_char_set = -1; /* will be initialized later in LYMain.c */
+int linedrawing_char_set = -1;
+STRING2PTR p_entity_values = NULL; /* Pointer, for HTML_put_entity() */
+
+ /* obsolete and probably not used(???) */
+ /* will be initialized in HTMLUseCharacterSet */
+#ifdef USE_CHARSET_CHOICE
+charset_subset_t charset_subsets[MAXCHARSETS];
+BOOL custom_display_charset = FALSE;
+BOOL custom_assumed_doc_charset = FALSE;
+
+#ifndef ALL_CHARSETS_IN_O_MENU_SCREEN
+int display_charset_map[MAXCHARSETS];
+int assumed_doc_charset_map[MAXCHARSETS];
+
+const char *display_charset_choices[MAXCHARSETS + 1];
+const char *assumed_charset_choices[MAXCHARSETS + 1];
+int displayed_display_charset_idx;
+#endif
+#endif /* USE_CHARSET_CHOICE */
+
+/*
+ * New character sets now declared with UCInit() in UCdomap.c
+ *
+ * INSTRUCTIONS for adding new character sets which do not have
+ * Unicode tables now in UCdomap.h
+ *
+ *
+ * [We hope you need not correct/add old-style mapping below as in ISO_LATIN1[]
+ * or SevenBitApproximations[] any more - it works now via new chartrans
+ * mechanism, but kept for compatibility only: we should cleanup the stuff,
+ * but this is not so easy...]
+ *
+ * Currently we only declare some charset's properties here (such as MIME
+ * names, etc.), it does not include real mapping.
+ *
+ * There is a place marked "Add your new character sets HERE" in this file.
+ * Make up a character set and add it in the same style as the ISO_LATIN1 set
+ * below, giving it a unique name.
+ *
+ * Add the name of the set to LYCharSets. Similarly add the appropriate
+ * information to the tables below: LYchar_set_names, LYCharSet_UC,
+ * LYlowest_eightbit. These 4 tables all MUST have the same order. (And this
+ * is the order you will see in Lynx Options Menu, which is why few
+ * unicode-based charsets are listed here).
+ *
+ */
+
+/* Entity values -- for ISO Latin 1 local representation
+ *
+ * This MUST match exactly the table referred to in the DTD!
+ */
+static const char *ISO_Latin1[] =
+{
+ "\306", /* capital AE diphthong (ligature) (&#198;) - AElig */
+ "\301", /* capital A, acute accent (&#193;) - Aacute */
+ "\302", /* capital A, circumflex accent (&#194;) - Acirc */
+ "\300", /* capital A, grave accent (&#192;) - Agrave */
+ "\305", /* capital A, ring - Aring (&#197;) */
+ "\303", /* capital A, tilde - Atilde (&#195;) */
+ "\304", /* capital A, dieresis or umlaut mark (&#196;) - Auml */
+ "\307", /* capital C, cedilla - Ccedil (&#199;) */
+ "\320", /* capital Eth or D with stroke (&#208;) - Dstrok */
+ "\320", /* capital Eth, Icelandic (&#208;) - ETH */
+ "\311", /* capital E, acute accent (&#201;) - Eacute */
+ "\312", /* capital E, circumflex accent (&#202;) - Ecirc */
+ "\310", /* capital E, grave accent (&#200;) - Egrave */
+ "\313", /* capital E, dieresis or umlaut mark (&#203;) - Euml */
+ "\315", /* capital I, acute accent (&#205;) - Iacute */
+ "\316", /* capital I, circumflex accent (&#206;) - Icirc */
+ "\314", /* capital I, grave accent (&#204;) - Igrave */
+ "\317", /* capital I, dieresis or umlaut mark (&#207;) - Iuml */
+ "\321", /* capital N, tilde (&#209;) - Ntilde */
+ "\323", /* capital O, acute accent (&#211;) - Oacute */
+ "\324", /* capital O, circumflex accent (&#212;) - Ocirc */
+ "\322", /* capital O, grave accent (&#210;) - Ograve */
+ "\330", /* capital O, slash (&#216;) - Oslash */
+ "\325", /* capital O, tilde (&#213;) - Otilde */
+ "\326", /* capital O, dieresis or umlaut mark (&#214;) - Ouml */
+ "\336", /* capital THORN, Icelandic (&#222;) - THORN */
+ "\332", /* capital U, acute accent (&#218;) - Uacute */
+ "\333", /* capital U, circumflex accent (&#219;) - Ucirc */
+ "\331", /* capital U, grave accent (&#217;) - Ugrave */
+ "\334", /* capital U, dieresis or umlaut mark (&#220;) - Uuml */
+ "\335", /* capital Y, acute accent (&#221;) - Yacute */
+ "\341", /* small a, acute accent (&#225;) - aacute */
+ "\342", /* small a, circumflex accent (&#226;) - acirc */
+ "\264", /* spacing acute (&#180;) - acute */
+ "\346", /* small ae diphthong (ligature) (&#230;) - aelig */
+ "\340", /* small a, grave accent (&#224;) - agrave */
+ "\046", /* ampersand (&#38;) - amp */
+ "\345", /* small a, ring (&#229;) - aring */
+ "\343", /* small a, tilde (&#227;) - atilde */
+ "\344", /* small a, dieresis or umlaut mark (&#228;) - auml */
+ "\246", /* broken vertical bar (&#166;) - brkbar */
+ "\246", /* broken vertical bar (&#166;) - brvbar */
+ "\347", /* small c, cedilla (&#231;) - ccedil */
+ "\270", /* spacing cedilla (&#184;) - cedil */
+ "\242", /* cent sign (&#162;) - cent */
+ "\251", /* copyright sign (&#169;) - copy */
+ "\244", /* currency sign (&#164;) - curren */
+ "\260", /* degree sign (&#176;) - deg */
+ "\250", /* spacing dieresis (&#168;) - die */
+ "\367", /* division sign (&#247;) - divide */
+ "\351", /* small e, acute accent (&#233;) - eacute */
+ "\352", /* small e, circumflex accent (&#234;) - ecirc */
+ "\350", /* small e, grave accent (&#232;) - egrave */
+ "-", /* dash the width of emsp - emdash */
+ "\002", /* emsp, em space - not collapsed NEVER CHANGE THIS - emsp */
+ "-", /* dash the width of ensp - endash */
+ "\002", /* ensp, en space - not collapsed NEVER CHANGE THIS - ensp */
+ "\360", /* small eth, Icelandic (&#240;) - eth */
+ "\353", /* small e, dieresis or umlaut mark (&#235;) - euml */
+ "\275", /* fraction 1/2 (&#189;) - frac12 */
+ "\274", /* fraction 1/4 (&#188;) - frac14 */
+ "\276", /* fraction 3/4 (&#190;) - frac34 */
+ "\076", /* greater than (&#62;) - gt */
+ "\257", /* spacing macron (&#175;) - hibar */
+ "\355", /* small i, acute accent (&#237;) - iacute */
+ "\356", /* small i, circumflex accent (&#238;) - icirc */
+ "\241", /* inverted exclamation mark (&#161;) - iexcl */
+ "\354", /* small i, grave accent (&#236;) - igrave */
+ "\277", /* inverted question mark (&#191;) - iquest */
+ "\357", /* small i, dieresis or umlaut mark (&#239;) - iuml */
+ "\253", /* angle quotation mark, left (&#171;) - laquo */
+ "\074", /* less than (&#60;) - lt */
+ "\257", /* spacing macron (&#175;) - macr */
+ "-", /* dash the width of emsp - mdash */
+ "\265", /* micro sign (&#181;) - micro */
+ "\267", /* middle dot (&#183;) - middot */
+ "\001", /* nbsp non-breaking space NEVER CHANGE THIS - nbsp */
+ "-", /* dash the width of ensp - ndash */
+ "\254", /* negation sign (&#172;) - not */
+ "\361", /* small n, tilde (&#241;) - ntilde */
+ "\363", /* small o, acute accent (&#243;) - oacute */
+ "\364", /* small o, circumflex accent (&#244;) - ocirc */
+ "\362", /* small o, grave accent (&#242;) - ograve */
+ "\252", /* feminine ordinal indicator (&#170;) - ordf */
+ "\272", /* masculine ordinal indicator (&#186;) - ordm */
+ "\370", /* small o, slash (&#248;) - oslash */
+ "\365", /* small o, tilde (&#245;) - otilde */
+ "\366", /* small o, dieresis or umlaut mark (&#246;) - ouml */
+ "\266", /* paragraph sign (&#182;) - para */
+ "\261", /* plus-or-minus sign (&#177;) - plusmn */
+ "\243", /* pound sign (&#163;) - pound */
+ "\042", /* quote '"' (&#34;) - quot */
+ "\273", /* angle quotation mark, right (&#187;) - raquo */
+ "\256", /* circled R registered sign (&#174;) - reg */
+ "\247", /* section sign (&#167;) - sect */
+ "\007", /* soft hyphen (&#173;) NEVER CHANGE THIS - shy */
+ "\271", /* superscript 1 (&#185;) - sup1 */
+ "\262", /* superscript 2 (&#178;) - sup2 */
+ "\263", /* superscript 3 (&#179;) - sup3 */
+ "\337", /* small sharp s, German (sz ligature) (&#223;) - szlig */
+ "\002", /* thin space - not collapsed NEVER CHANGE THIS - thinsp */
+ "\376", /* small thorn, Icelandic (&#254;) - thorn */
+ "\327", /* multiplication sign (&#215;) - times */
+ "(TM)", /* circled TM trade mark sign (&#8482;) - trade */
+ "\372", /* small u, acute accent (&#250;) - uacute */
+ "\373", /* small u, circumflex accent (&#251;) - ucirc */
+ "\371", /* small u, grave accent (&#249;) - ugrave */
+ "\250", /* spacing dieresis (&#168;) - uml */
+ "\374", /* small u, dieresis or umlaut mark (&#252;) - uuml */
+ "\375", /* small y, acute accent (&#253;) - yacute */
+ "\245", /* yen sign (&#165;) - yen */
+ "\377", /* small y, dieresis or umlaut mark (&#255;) - yuml */
+};
+
+/* Entity values -- 7 bit character approximations
+ *
+ * This MUST match exactly the table referred to in the DTD!
+ */
+const char *SevenBitApproximations[] =
+{
+ "AE", /* capital AE diphthong (ligature) (&#198;) - AElig */
+ "A", /* capital A, acute accent (&#193;) - Aacute */
+ "A", /* capital A, circumflex accent (&#194;) - Acirc */
+ "A", /* capital A, grave accent (&#192;) - Agrave */
+ "A", /* capital A, ring - Aring (&#197;) */
+ "A", /* capital A, tilde - Atilde (&#195;) */
+#ifdef LY_UMLAUT
+ "Ae", /* capital A, dieresis or umlaut mark (&#196;) - Auml */
+#else
+ "A", /* capital A, dieresis or umlaut mark (&#196;) - Auml */
+#endif /* LY_UMLAUT */
+ "C", /* capital C, cedilla (&#199;) - Ccedil */
+ "Dj", /* capital D with stroke (&#208;) - Dstrok */
+ "DH", /* capital Eth, Icelandic (&#208;) - ETH */
+ "E", /* capital E, acute accent (&#201;) - Eacute */
+ "E", /* capital E, circumflex accent (&#202;) - Ecirc */
+ "E", /* capital E, grave accent (&#200;) - Egrave */
+ "E", /* capital E, dieresis or umlaut mark (&#203;) - Euml */
+ "I", /* capital I, acute accent (&#205;) - Iacute */
+ "I", /* capital I, circumflex accent (&#206;) - Icirc */
+ "I", /* capital I, grave accent (&#204;) - Igrave */
+ "I", /* capital I, dieresis or umlaut mark (&#207;) - Iuml */
+ "N", /* capital N, tilde - Ntilde (&#209;) */
+ "O", /* capital O, acute accent (&#211;) - Oacute */
+ "O", /* capital O, circumflex accent (&#212;) - Ocirc */
+ "O", /* capital O, grave accent (&#210;) - Ograve */
+ "O", /* capital O, slash (&#216;) - Oslash */
+ "O", /* capital O, tilde (&#213;) - Otilde */
+#ifdef LY_UMLAUT
+ "Oe", /* capital O, dieresis or umlaut mark (&#214;) - Ouml */
+#else
+ "O", /* capital O, dieresis or umlaut mark (&#214;) - Ouml */
+#endif /* LY_UMLAUT */
+ "P", /* capital THORN, Icelandic (&#222;) - THORN */
+ "U", /* capital U, acute accent (&#218;) - Uacute */
+ "U", /* capital U, circumflex accent (&#219;) - Ucirc */
+ "U", /* capital U, grave accent (&#217;) - Ugrave */
+#ifdef LY_UMLAUT
+ "Ue", /* capital U, dieresis or umlaut mark (&#220;) - Uuml */
+#else
+ "U", /* capital U, dieresis or umlaut mark (&#220;) - Uuml */
+#endif /* LY_UMLAUT */
+ "Y", /* capital Y, acute accent (&#221;) - Yacute */
+ "a", /* small a, acute accent (&#225;) - aacute */
+ "a", /* small a, circumflex accent (&#226;) - acirc */
+ "'", /* spacing acute (&#180;) - acute */
+ "ae", /* small ae diphthong (ligature) (&#230;) - aelig */
+ "`a", /* small a, grave accent (&#232;) - agrave */
+ "&", /* ampersand (&#38;) - amp */
+ "a", /* small a, ring (&#229;) - aring */
+ "a", /* small a, tilde (&#227;) - atilde */
+#ifdef LY_UMLAUT
+ "ae", /* small a, dieresis or umlaut mark (&#228;) - auml */
+#else
+ "a", /* small a, dieresis or umlaut mark (&#228;) - auml */
+#endif /* LY_UMLAUT */
+ "|", /* broken vertical bar (&#166;) - brkbar */
+ "|", /* broken vertical bar (&#166;) - brvbar */
+ "c", /* small c, cedilla (&#231;) - ccedil */
+ ",", /* spacing cedilla (&#184;) - cedil */
+ "-c-", /* cent sign (&#162;) - cent */
+ "(c)", /* copyright sign (&#169;) - copy */
+ "CUR", /* currency sign (&#164;) - curren */
+ "DEG", /* degree sign (&#176;) - deg */
+ "\042", /* spacing dieresis (&#168;) - die */
+ "/", /* division sign (&#247;) - divide */
+ "e", /* small e, acute accent (&#233;) - eacute */
+ "e", /* small e, circumflex accent (&#234;) - ecirc */
+ "e", /* small e, grave accent (&#232;) - egrave */
+ "-", /* dash the width of emsp - emdash */
+ "\002", /* emsp NEVER CHANGE THIS - emsp */
+ "-", /* dash the width of ensp - endash */
+ "\002", /* ensp NEVER CHANGE THIS - ensp */
+ "dh", /* small eth, Icelandic eth (&#240;) */
+ "e", /* small e, dieresis or umlaut mark (&#235;) - euml */
+ " 1/2", /* fraction 1/2 (&#189;) - frac12 */
+ " 1/4", /* fraction 1/4 (&#188;) - frac14 */
+ " 3/4", /* fraction 3/4 (&#190;) - frac34 */
+ ">", /* greater than (&#62;) - gt */
+ "-", /* spacing macron (&#175;) - hibar */
+ "i", /* small i, acute accent (&#237;) - iacute */
+ "i", /* small i, circumflex accent (&#238;) - icirc */
+ "!", /* inverted exclamation mark (&#161;) - iexcl */
+ "`i", /* small i, grave accent (&#236;) - igrave */
+ "?", /* inverted question mark (&#191;) - iquest */
+ "i", /* small i, dieresis or umlaut mark (&#239;) - iuml */
+ "<<", /* angle quotation mark, left (&#171;) - laquo */
+ "<", /* less than - lt (&#60;) */
+ "-", /* spacing macron (&#175;) - macr */
+ "-", /* dash the width of emsp - mdash */
+ "u", /* micro sign (&#181;) - micro */
+ ".", /* middle dot (&#183;) - middot */
+ "\001", /* nbsp non-breaking space NEVER CHANGE THIS - nbsp */
+ "-", /* dash the width of ensp - ndash */
+ "NOT", /* negation sign (&#172;) - not */
+ "n", /* small n, tilde (&#241;) - ntilde */
+ "o", /* small o, acute accent (&#243;) - oacute */
+ "o", /* small o, circumflex accent (&#244;) - ocirc */
+ "o", /* small o, grave accent (&#242;) - ograve */
+ "-a", /* feminine ordinal indicator (&#170;) - ordf */
+ "-o", /* masculine ordinal indicator (&#186;) - ordm */
+ "o", /* small o, slash (&#248;) - oslash */
+ "o", /* small o, tilde (&#245;) - otilde */
+#ifdef LY_UMLAUT
+ "oe", /* small o, dieresis or umlaut mark (&#246;) - ouml */
+#else
+ "o", /* small o, dieresis or umlaut mark (&#246;) - ouml */
+#endif /* LY_UMLAUT */
+ "P:", /* paragraph sign (&#182;) - para */
+ "+-", /* plus-or-minus sign (&#177;) - plusmn */
+ "-L-", /* pound sign (&#163;) - pound */
+ "\"", /* quote '"' (&#34;) - quot */
+ ">>", /* angle quotation mark, right (&#187;) - raquo */
+ "(R)", /* circled R registered sign (&#174;) - reg */
+ "S:", /* section sign (&#167;) - sect */
+ "\007", /* soft hyphen (&#173;) NEVER CHANGE THIS - shy */
+ "^1", /* superscript 1 (&#185;) - sup1 */
+ "^2", /* superscript 2 (&#178;) - sup2 */
+ "^3", /* superscript 3 (&#179;) - sup3 */
+ "ss", /* small sharp s, German (sz ligature) (&#223;) - szlig */
+ "\002", /* thin space - not collapsed NEVER CHANGE THIS - thinsp */
+ "p", /* small thorn, Icelandic (&#254;) - thorn */
+ "*", /* multiplication sign (&#215;) - times */
+ "(TM)", /* circled TM trade mark sign (&#8482;) - trade */
+ "u", /* small u, acute accent (&#250;) - uacute */
+ "u", /* small u, circumflex accent (&#251;) - ucirc */
+ "u", /* small u, grave accent (&#249;) - ugrave */
+ "\042", /* spacing dieresis (&#168;) - uml */
+#ifdef LY_UMLAUT
+ "ue", /* small u, dieresis or umlaut mark (&#252;) - uuml */
+#else
+ "u", /* small u, dieresis or umlaut mark (&#252;) - uuml */
+#endif /* LY_UMLAUT */
+ "y", /* small y, acute accent (&#253;) - yacute */
+ "YEN", /* yen sign (&#165;) - yen */
+ "y", /* small y, dieresis or umlaut mark (&#255;) - yuml */
+};
+
+/*
+ * Add your new character sets HERE (but only if you can't construct Unicode
+ * tables for them). - FM
+ */
+
+/*
+ * Add the array name to LYCharSets
+ */
+STRING2PTR LYCharSets[MAXCHARSETS] =
+{
+ ISO_Latin1, /* ISO Latin 1 */
+ SevenBitApproximations, /* 7 Bit Approximations */
+};
+
+/*
+ * Add the name that the user will see below. The order of LYCharSets and
+ * LYchar_set_names MUST be the same
+ */
+const char *LYchar_set_names[MAXCHARSETS + 1] =
+{
+ "Western (ISO-8859-1)",
+ "7 bit approximations (US-ASCII)",
+ (char *) 0
+};
+
+/*
+ * Associate additional pieces of info with each of the charsets listed above.
+ * Will be automatically modified (and extended) by charset translations which
+ * are loaded using the chartrans mechanism. Most important piece of info to
+ * put here is a MIME charset name. Used for chartrans (see UCDefs.h). The
+ * order of LYCharSets and LYCharSet_UC MUST be the same.
+ *
+ * Note that most of the charsets added by the new mechanism in src/chrtrans
+ * don't show up here at all. They don't have to.
+ */
+LYUCcharset LYCharSet_UC[MAXCHARSETS] =
+{
+ /*
+ * Zero position placeholder and HTMLGetEntityUCValue() reference. - FM
+ */
+ {-1, "iso-8859-1", UCT_ENC_8BIT, 0,
+ UCT_REP_IS_LAT1,
+ UCT_CP_IS_LAT1, UCT_R_LAT1, UCT_R_LAT1},
+
+ /*
+ * Placeholders for Unicode tables. - FM
+ */
+ {-1, "us-ascii", UCT_ENC_7BIT, 0,
+ UCT_REP_SUBSETOF_LAT1,
+ UCT_CP_SUBSETOF_LAT1, UCT_R_ASCII, UCT_R_ASCII},
+
+};
+
+/*
+ * Add the code of the the lowest character with the high bit set that can be
+ * directly displayed. The order of LYCharSets and LYlowest_eightbit MUST be
+ * the same.
+ *
+ * (If charset have chartrans unicode table, LYlowest_eightbit will be
+ * verified/modified anyway.)
+ */
+int LYlowest_eightbit[MAXCHARSETS] =
+{
+ 160, /* ISO Latin 1 */
+ 999, /* 7 bit approximations */
+};
+
+/*
+ * Function to set the handling of selected character sets based on the current
+ * LYUseDefaultRawMode value. - FM
+ */
+void HTMLSetCharacterHandling(int i)
+{
+ int chndl = safeUCGetLYhndl_byMIME(UCAssume_MIMEcharset);
+ BOOLEAN LYRawMode_flag = LYRawMode;
+ int UCLYhndl_for_unspec_flag = UCLYhndl_for_unspec;
+
+ if (LYCharSet_UC[i].enc != UCT_ENC_CJK) {
+ HTCJK = NOCJK;
+ kanji_code = NOKANJI;
+ if (i == chndl)
+ LYRawMode = LYUseDefaultRawMode;
+ else
+ LYRawMode = (BOOL) (!LYUseDefaultRawMode);
+
+ HTPassEightBitNum = (BOOL) ((LYCharSet_UC[i].codepoints & UCT_CP_SUPERSETOF_LAT1)
+ || (LYCharSet_UC[i].like8859 & UCT_R_HIGH8BIT));
+
+ if (LYRawMode) {
+ HTPassEightBitRaw = (BOOL) (LYlowest_eightbit[i] <= 160);
+ } else {
+ HTPassEightBitRaw = FALSE;
+ }
+ if (LYRawMode || i == chndl) {
+ HTPassHighCtrlRaw = (BOOL) (LYlowest_eightbit[i] <= 130);
+ } else {
+ HTPassHighCtrlRaw = FALSE;
+ }
+
+ HTPassHighCtrlNum = FALSE;
+
+ } else { /* CJK encoding: */
+ const char *mime = LYCharSet_UC[i].MIMEname;
+
+ if (!strcmp(mime, "euc-cn")) {
+ HTCJK = CHINESE;
+ kanji_code = EUC;
+ } else if (!strcmp(mime, "euc-jp")) {
+ HTCJK = JAPANESE;
+ kanji_code = EUC;
+ } else if (!strcmp(mime, "shift_jis")) {
+ HTCJK = JAPANESE;
+ kanji_code = SJIS;
+ } else if (!strcmp(mime, "euc-kr")) {
+ HTCJK = KOREAN;
+ kanji_code = EUC;
+ } else if (!strcmp(mime, "big5")) {
+ HTCJK = TAIPEI;
+ kanji_code = EUC;
+ }
+
+ /* for any CJK: */
+ if (!LYUseDefaultRawMode)
+ HTCJK = NOCJK;
+ LYRawMode = (BOOL) (IS_CJK_TTY ? TRUE : FALSE);
+ HTPassEightBitRaw = FALSE;
+ HTPassEightBitNum = FALSE;
+ HTPassHighCtrlRaw = (BOOL) (IS_CJK_TTY ? TRUE : FALSE);
+ HTPassHighCtrlNum = FALSE;
+ }
+
+ /*
+ * Comment for coding below:
+ * UCLYhndl_for_unspec is "current" state with LYRawMode, but
+ * UCAssume_MIMEcharset is independent from LYRawMode: holds the history
+ * and may be changed from 'O'ptions menu only. - LP
+ */
+ if (LYRawMode) {
+ UCLYhndl_for_unspec = i; /* UCAssume_MIMEcharset not changed! */
+ } else {
+ if (chndl != i &&
+ (LYCharSet_UC[i].enc != UCT_ENC_CJK ||
+ LYCharSet_UC[chndl].enc != UCT_ENC_CJK)) {
+ UCLYhndl_for_unspec = chndl; /* fall to UCAssume_MIMEcharset */
+ } else {
+ UCLYhndl_for_unspec = LATIN1; /* UCAssume_MIMEcharset not changed! */
+ }
+ }
+
+#ifdef USE_SLANG
+ if (LYlowest_eightbit[i] > 191) {
+ /*
+ * Higher than this may output cntrl chars to screen. - KW
+ */
+ SLsmg_Display_Eight_Bit = 191;
+ } else {
+ SLsmg_Display_Eight_Bit = LYlowest_eightbit[i];
+ }
+#endif /* USE_SLANG */
+
+ ena_csi(LYlowest_eightbit[current_char_set] > 155);
+
+ /* some diagnostics */
+ if (TRACE) {
+ if (LYRawMode_flag != LYRawMode)
+ CTRACE((tfp,
+ "HTMLSetCharacterHandling: LYRawMode changed %s -> %s\n",
+ (LYRawMode_flag ? "ON" : "OFF"),
+ (LYRawMode ? "ON" : "OFF")));
+ if (UCLYhndl_for_unspec_flag != UCLYhndl_for_unspec)
+ CTRACE((tfp,
+ "HTMLSetCharacterHandling: UCLYhndl_for_unspec changed %d -> %d\n",
+ UCLYhndl_for_unspec_flag,
+ UCLYhndl_for_unspec));
+ }
+
+ return;
+}
+
+/*
+ * Function to set HTCJK based on "in" and "out" charsets.
+ */
+void Set_HTCJK(const char *inMIMEname,
+ const char *outMIMEname)
+{
+ /* need not check for synonyms: MIMEname's got from LYCharSet_UC */
+
+ if (LYRawMode) {
+ if ((!strcmp(inMIMEname, "euc-jp") ||
+#ifdef USE_JAPANESEUTF8_SUPPORT
+ !strcmp(inMIMEname, "utf-8") ||
+#endif
+ !strcmp(inMIMEname, "shift_jis")) &&
+ (!strcmp(outMIMEname, "euc-jp") ||
+ !strcmp(outMIMEname, "shift_jis"))) {
+ HTCJK = JAPANESE;
+ } else if (!strcmp(inMIMEname, "euc-cn") &&
+ !strcmp(outMIMEname, "euc-cn")) {
+ HTCJK = CHINESE;
+ } else if (!strcmp(inMIMEname, "big5") &&
+ !strcmp(outMIMEname, "big5")) {
+ HTCJK = TAIPEI;
+ } else if (!strcmp(inMIMEname, "euc-kr") &&
+ !strcmp(outMIMEname, "euc-kr")) {
+ HTCJK = KOREAN;
+ } else {
+ HTCJK = NOCJK;
+ }
+ } else {
+ HTCJK = NOCJK;
+ }
+}
+
+/*
+ * Function to set the LYDefaultRawMode value based on the selected character
+ * set. - FM
+ *
+ * Currently unused: the default value so obvious that LYUseDefaultRawMode
+ * utilized directly by someone's mistake. - LP
+ */
+static void HTMLSetRawModeDefault(int i)
+{
+ LYDefaultRawMode = (BOOL) (LYCharSet_UC[i].enc == UCT_ENC_CJK);
+ return;
+}
+
+/*
+ * Function to set the LYUseDefaultRawMode value based on the selected
+ * character set and the current LYRawMode value. - FM
+ */
+void HTMLSetUseDefaultRawMode(int i,
+ int modeflag)
+{
+ if (LYCharSet_UC[i].enc != UCT_ENC_CJK) {
+
+ int chndl = safeUCGetLYhndl_byMIME(UCAssume_MIMEcharset);
+
+ if (i == chndl)
+ LYUseDefaultRawMode = (BOOLEAN) modeflag;
+ else
+ LYUseDefaultRawMode = (BOOL) (!modeflag);
+ } else /* CJK encoding: */
+ LYUseDefaultRawMode = (BOOLEAN) modeflag;
+
+ return;
+}
+
+/*
+ * Function to set the LYHaveCJKCharacterSet value based on the selected
+ * character set. - FM
+ */
+static void HTMLSetHaveCJKCharacterSet(int i)
+{
+ LYHaveCJKCharacterSet = (BOOL) (LYCharSet_UC[i].enc == UCT_ENC_CJK);
+ return;
+}
+
+/*
+ * Function to set the DisplayCharsetMatchLocale value based on the selected
+ * character set. It is used in UPPER8 for 8bit case-insensitive search by
+ * matching def7_uni.tbl images. - LP
+ */
+static void HTMLSetDisplayCharsetMatchLocale(int i)
+{
+ BOOLEAN match;
+
+ if (LYHaveCJKCharacterSet) {
+ /*
+ * We have no intention to pass CJK via UCTransChar if that happened.
+ * Let someone from CJK correct this if necessary.
+ */
+ DisplayCharsetMatchLocale = TRUE; /* old-style */
+ return;
+
+ } else if (strncasecomp(LYCharSet_UC[i].MIMEname, "cp", 2) ||
+ strncasecomp(LYCharSet_UC[i].MIMEname, "windows", 7)) {
+ /*
+ * Assume dos/windows displays usually on remote terminal, hence it
+ * rarely matches locale. (In fact, MS Windows codepoints locale are
+ * never seen on UNIX).
+ */
+ match = FALSE;
+ } else {
+ match = TRUE; /* guess, but see below */
+
+#if !defined(LOCALE)
+ if (LYCharSet_UC[i].enc != UCT_ENC_UTF8)
+ /*
+ * Leave true for utf-8 display - the code doesn't deal very well
+ * with this case. - kw
+ */
+ match = FALSE;
+#else
+ if (UCForce8bitTOUPPER) {
+ /*
+ * Force disable locale (from lynx.cfg)
+ */
+ match = FALSE;
+ }
+#endif
+ }
+
+ DisplayCharsetMatchLocale = match;
+ return;
+}
+
+/*
+ * lynx 2.8/2.7.2(and more early) compatibility code: "human-readable" charset
+ * names changes with time so we map that history names to MIME here to get old
+ * lynx.cfg and (especially) .lynxrc always recognized. Please update this
+ * table when you change "fullname" of any present charset.
+ */
+typedef struct _names_pairs {
+ const char *fullname;
+ const char *MIMEname;
+} names_pairs;
+/* *INDENT-OFF* */
+static const names_pairs OLD_charset_names[] =
+{
+ {"ISO Latin 1", "iso-8859-1"},
+ {"ISO Latin 2", "iso-8859-2"},
+ {"WinLatin1 (cp1252)", "windows-1252"},
+ {"DEC Multinational", "dec-mcs"},
+ {"Macintosh (8 bit)", "macintosh"},
+ {"NeXT character set", "next"},
+ {"KOI8-R Cyrillic", "koi8-r"},
+ {"Chinese", "euc-cn"},
+ {"Japanese (EUC)", "euc-jp"},
+ {"Japanese (SJIS)", "shift_jis"},
+ {"Korean", "euc-kr"},
+ {"Taipei (Big5)", "big5"},
+ {"Vietnamese (VISCII)", "viscii"},
+ {"7 bit approximations", "us-ascii"},
+ {"Transparent", "x-transparent"},
+ {"DosLatinUS (cp437)", "cp437"},
+ {"IBM PC character set", "cp437"},
+ {"DosLatin1 (cp850)", "cp850"},
+ {"IBM PC codepage 850", "cp850"},
+ {"DosLatin2 (cp852)", "cp852"},
+ {"PC Latin2 CP 852", "cp852"},
+ {"DosCyrillic (cp866)", "cp866"},
+ {"DosArabic (cp864)", "cp864"},
+ {"DosGreek (cp737)", "cp737"},
+ {"DosBaltRim (cp775)", "cp775"},
+ {"DosGreek2 (cp869)", "cp869"},
+ {"DosHebrew (cp862)", "cp862"},
+ {"WinLatin2 (cp1250)", "windows-1250"},
+ {"WinCyrillic (cp1251)", "windows-1251"},
+ {"WinGreek (cp1253)", "windows-1253"},
+ {"WinHebrew (cp1255)", "windows-1255"},
+ {"WinArabic (cp1256)", "windows-1256"},
+ {"WinBaltRim (cp1257)", "windows-1257"},
+ {"ISO Latin 3", "iso-8859-3"},
+ {"ISO Latin 4", "iso-8859-4"},
+ {"ISO 8859-5 Cyrillic", "iso-8859-5"},
+ {"ISO 8859-6 Arabic", "iso-8859-6"},
+ {"ISO 8859-7 Greek", "iso-8859-7"},
+ {"ISO 8859-8 Hebrew", "iso-8859-8"},
+ {"ISO-8859-8-I", "iso-8859-8"},
+ {"ISO-8859-8-E", "iso-8859-8"},
+ {"ISO 8859-9 (Latin 5)", "iso-8859-9"},
+ {"ISO 8859-10", "iso-8859-10"},
+ {"UNICODE UTF 8", "utf-8"},
+ {"RFC 1345 w/o Intro", "mnemonic+ascii+0"},
+ {"RFC 1345 Mnemonic", "mnemonic"},
+ {NULL, NULL}, /* terminated with NULL */
+};
+/* *INDENT-ON* */
+
+/*
+ * lynx 2.8/2.7.2 compatibility code: read "character_set" parameter from
+ * lynx.cfg and .lynxrc in both MIME name and "human-readable" name (old and
+ * new style). Returns -1 if not recognized.
+ */
+int UCGetLYhndl_byAnyName(char *value)
+{
+ int i;
+
+ if (value == NULL)
+ return -1;
+
+ LYTrimTrailing(value);
+ CTRACE((tfp, "UCGetLYhndl_byAnyName(%s)\n", value));
+
+ /* search by name */
+ for (i = 0; (i < MAXCHARSETS && LYchar_set_names[i]); i++) {
+ if (!strcmp(value, LYchar_set_names[i])) {
+ return i; /* OK */
+ }
+ }
+
+ /* search by old name from 2.8/2.7.2 version */
+ for (i = 0; (OLD_charset_names[i].fullname); i++) {
+ if (!strcmp(value, OLD_charset_names[i].fullname)) {
+ return UCGetLYhndl_byMIME(OLD_charset_names[i].MIMEname); /* OK */
+ }
+ }
+
+ return UCGetLYhndl_byMIME(value); /* by MIME */
+}
+
+/*
+ * Entity names -- Ordered by ISO Latin 1 value.
+ * ---------------------------------------------
+ * For conversions of DECIMAL escaped entities.
+ * Must be in order of ascending value.
+ */
+static const char *LYEntityNames[] =
+{
+/* NAME DECIMAL VALUE */
+ "nbsp", /* 160, non breaking space */
+ "iexcl", /* 161, inverted exclamation mark */
+ "cent", /* 162, cent sign */
+ "pound", /* 163, pound sign */
+ "curren", /* 164, currency sign */
+ "yen", /* 165, yen sign */
+ "brvbar", /* 166, broken vertical bar, (brkbar) */
+ "sect", /* 167, section sign */
+ "uml", /* 168, spacing dieresis */
+ "copy", /* 169, copyright sign */
+ "ordf", /* 170, feminine ordinal indicator */
+ "laquo", /* 171, angle quotation mark, left */
+ "not", /* 172, negation sign */
+ "shy", /* 173, soft hyphen */
+ "reg", /* 174, circled R registered sign */
+ "hibar", /* 175, spacing macron */
+ "deg", /* 176, degree sign */
+ "plusmn", /* 177, plus-or-minus sign */
+ "sup2", /* 178, superscript 2 */
+ "sup3", /* 179, superscript 3 */
+ "acute", /* 180, spacing acute (96) */
+ "micro", /* 181, micro sign */
+ "para", /* 182, paragraph sign */
+ "middot", /* 183, middle dot */
+ "cedil", /* 184, spacing cedilla */
+ "sup1", /* 185, superscript 1 */
+ "ordm", /* 186, masculine ordinal indicator */
+ "raquo", /* 187, angle quotation mark, right */
+ "frac14", /* 188, fraction 1/4 */
+ "frac12", /* 189, fraction 1/2 */
+ "frac34", /* 190, fraction 3/4 */
+ "iquest", /* 191, inverted question mark */
+ "Agrave", /* 192, capital A, grave accent */
+ "Aacute", /* 193, capital A, acute accent */
+ "Acirc", /* 194, capital A, circumflex accent */
+ "Atilde", /* 195, capital A, tilde */
+ "Auml", /* 196, capital A, dieresis or umlaut mark */
+ "Aring", /* 197, capital A, ring */
+ "AElig", /* 198, capital AE diphthong (ligature) */
+ "Ccedil", /* 199, capital C, cedilla */
+ "Egrave", /* 200, capital E, grave accent */
+ "Eacute", /* 201, capital E, acute accent */
+ "Ecirc", /* 202, capital E, circumflex accent */
+ "Euml", /* 203, capital E, dieresis or umlaut mark */
+ "Igrave", /* 204, capital I, grave accent */
+ "Iacute", /* 205, capital I, acute accent */
+ "Icirc", /* 206, capital I, circumflex accent */
+ "Iuml", /* 207, capital I, dieresis or umlaut mark */
+ "ETH", /* 208, capital Eth, Icelandic (or Latin2 Dstrok) */
+ "Ntilde", /* 209, capital N, tilde */
+ "Ograve", /* 210, capital O, grave accent */
+ "Oacute", /* 211, capital O, acute accent */
+ "Ocirc", /* 212, capital O, circumflex accent */
+ "Otilde", /* 213, capital O, tilde */
+ "Ouml", /* 214, capital O, dieresis or umlaut mark */
+ "times", /* 215, multiplication sign */
+ "Oslash", /* 216, capital O, slash */
+ "Ugrave", /* 217, capital U, grave accent */
+ "Uacute", /* 218, capital U, acute accent */
+ "Ucirc", /* 219, capital U, circumflex accent */
+ "Uuml", /* 220, capital U, dieresis or umlaut mark */
+ "Yacute", /* 221, capital Y, acute accent */
+ "THORN", /* 222, capital THORN, Icelandic */
+ "szlig", /* 223, small sharp s, German (sz ligature) */
+ "agrave", /* 224, small a, grave accent */
+ "aacute", /* 225, small a, acute accent */
+ "acirc", /* 226, small a, circumflex accent */
+ "atilde", /* 227, small a, tilde */
+ "auml", /* 228, small a, dieresis or umlaut mark */
+ "aring", /* 229, small a, ring */
+ "aelig", /* 230, small ae diphthong (ligature) */
+ "ccedil", /* 231, small c, cedilla */
+ "egrave", /* 232, small e, grave accent */
+ "eacute", /* 233, small e, acute accent */
+ "ecirc", /* 234, small e, circumflex accent */
+ "euml", /* 235, small e, dieresis or umlaut mark */
+ "igrave", /* 236, small i, grave accent */
+ "iacute", /* 237, small i, acute accent */
+ "icirc", /* 238, small i, circumflex accent */
+ "iuml", /* 239, small i, dieresis or umlaut mark */
+ "eth", /* 240, small eth, Icelandic */
+ "ntilde", /* 241, small n, tilde */
+ "ograve", /* 242, small o, grave accent */
+ "oacute", /* 243, small o, acute accent */
+ "ocirc", /* 244, small o, circumflex accent */
+ "otilde", /* 245, small o, tilde */
+ "ouml", /* 246, small o, dieresis or umlaut mark */
+ "divide", /* 247, division sign */
+ "oslash", /* 248, small o, slash */
+ "ugrave", /* 249, small u, grave accent */
+ "uacute", /* 250, small u, acute accent */
+ "ucirc", /* 251, small u, circumflex accent */
+ "uuml", /* 252, small u, dieresis or umlaut mark */
+ "yacute", /* 253, small y, acute accent */
+ "thorn", /* 254, small thorn, Icelandic */
+ "yuml", /* 255, small y, dieresis or umlaut mark */
+};
+
+/*
+ * Function to return the entity names of ISO-8859-1 8-bit characters. - FM
+ */
+const char *HTMLGetEntityName(UCode_t code)
+{
+#define IntValue code
+ int MaxValue = (TABLESIZE(LYEntityNames) - 1);
+
+ if (IntValue < 0 || IntValue > MaxValue) {
+ return "";
+ }
+
+ return LYEntityNames[IntValue];
+}
+
+/*
+ * Function to return the UCode_t (long int) value for entity names. It
+ * returns 0 if not found.
+ *
+ * unicode_entities[] handles all the names from old style entities[] too.
+ * Lynx now calls unicode_entities[] only through this function:
+ * HTMLGetEntityUCValue(). Note, we need not check for special characters here
+ * in function or even before it, we should check them *after* invoking this
+ * function, see put_special_unicodes() in SGML.c.
+ *
+ * In the future we will try to isolate all calls to entities[] in favor of new
+ * unicode-based chartrans scheme. - LP
+ */
+UCode_t HTMLGetEntityUCValue(const char *name)
+{
+#include <entities.h>
+
+ UCode_t value = 0;
+ size_t i, high, low;
+ int diff = 0;
+ size_t number_of_unicode_entities = TABLESIZE(unicode_entities);
+
+ /*
+ * Make sure we have a non-zero length name. - FM
+ */
+ if (isEmpty(name))
+ return (value);
+
+ /*
+ * Try UC_entity_info unicode_entities[].
+ */
+ for (low = 0, high = number_of_unicode_entities;
+ high > low;
+ diff < 0 ? (low = i + 1) : (high = i)) {
+ /*
+ * Binary search.
+ */
+ i = (low + (high - low) / 2);
+ diff = AS_cmp(unicode_entities[i].name, name); /* Case sensitive! */
+ if (diff == 0) {
+ value = unicode_entities[i].code;
+ break;
+ }
+ }
+ return (value);
+}
+
+/*
+ * Original comment -
+ * Assume these are Microsoft code points, inflicted on us by FrontPage. - FM
+ *
+ * MS FrontPage uses syntax like &#153; in 128-159 range and doesn't follow
+ * Unicode standards for this area. Windows-1252 codepoints are assumed here.
+ *
+ * However see -
+ * http://www.whatwg.org/specs/web-apps/current-work/multipage/infrastructure.html#character-encodings-0
+ */
+UCode_t LYcp1252ToUnicode(UCode_t code)
+{
+ if ((code == 1) ||
+ (code > 127 && code < 160)) {
+ switch (code) {
+ case 1:
+ /*
+ * WHITE SMILING FACE
+ */
+ code = 0x263a;
+ break;
+ case 128:
+ /*
+ * EURO currency sign
+ */
+ code = 0x20ac;
+ break;
+ case 130:
+ /*
+ * SINGLE LOW-9 QUOTATION MARK (sbquo)
+ */
+ code = 0x201a;
+ break;
+ case 131:
+ /*
+ * LATIN SMALL LETTER F WITH HOOK
+ */
+ code = 0x192;
+ break;
+ case 132:
+ /*
+ * DOUBLE LOW-9 QUOTATION MARK (bdquo)
+ */
+ code = 0x201e;
+ break;
+ case 133:
+ /*
+ * HORIZONTAL ELLIPSIS (hellip)
+ */
+ code = 0x2026;
+ break;
+ case 134:
+ /*
+ * DAGGER (dagger)
+ */
+ code = 0x2020;
+ break;
+ case 135:
+ /*
+ * DOUBLE DAGGER (Dagger)
+ */
+ code = 0x2021;
+ break;
+ case 136:
+ /*
+ * MODIFIER LETTER CIRCUMFLEX ACCENT
+ */
+ code = 0x2c6;
+ break;
+ case 137:
+ /*
+ * PER MILLE SIGN (permil)
+ */
+ code = 0x2030;
+ break;
+ case 138:
+ /*
+ * LATIN CAPITAL LETTER S WITH CARON
+ */
+ code = 0x160;
+ break;
+ case 139:
+ /*
+ * SINGLE LEFT-POINTING ANGLE QUOTATION MARK (lsaquo)
+ */
+ code = 0x2039;
+ break;
+ case 140:
+ /*
+ * LATIN CAPITAL LIGATURE OE
+ */
+ code = 0x152;
+ break;
+ case 142:
+ /*
+ * LATIN CAPITAL LETTER Z WITH CARON
+ */
+ code = 0x17d;
+ break;
+ case 145:
+ /*
+ * LEFT SINGLE QUOTATION MARK (lsquo)
+ */
+ code = 0x2018;
+ break;
+ case 146:
+ /*
+ * RIGHT SINGLE QUOTATION MARK (rsquo)
+ */
+ code = 0x2019;
+ break;
+ case 147:
+ /*
+ * LEFT DOUBLE QUOTATION MARK (ldquo)
+ */
+ code = 0x201c;
+ break;
+ case 148:
+ /*
+ * RIGHT DOUBLE QUOTATION MARK (rdquo)
+ */
+ code = 0x201d;
+ break;
+ case 149:
+ /*
+ * BULLET (bull)
+ */
+ code = 0x2022;
+ break;
+ case 150:
+ /*
+ * EN DASH (ndash)
+ */
+ code = 0x2013;
+ break;
+ case 151:
+ /*
+ * EM DASH (mdash)
+ */
+ code = 0x2014;
+ break;
+ case 152:
+ /*
+ * SMALL TILDE (tilde)
+ */
+ code = 0x02dc;
+ break;
+ case 153:
+ /*
+ * TRADE MARK SIGN (trade)
+ */
+ code = 0x2122;
+ break;
+ case 154:
+ /*
+ * LATIN SMALL LETTER S WITH CARON
+ */
+ code = 0x161;
+ break;
+ case 155:
+ /*
+ * SINGLE RIGHT-POINTING ANGLE QUOTATION MARK (rsaquo)
+ */
+ code = 0x203a;
+ break;
+ case 156:
+ /*
+ * LATIN SMALL LIGATURE OE
+ */
+ code = 0x153;
+ break;
+ case 158:
+ /*
+ * LATIN SMALL LETTER Z WITH CARON
+ */
+ code = 0x17e;
+ break;
+ case 159:
+ /*
+ * LATIN CAPITAL LETTER Y WITH DIAERESIS
+ */
+ code = 0x178;
+ break;
+ default:
+ /*
+ * Undefined (by convention, use the replacement character).
+ */
+ code = UCS_REPL;
+ break;
+ }
+ }
+ return code;
+}
+
+/*
+ * Function to select a character set and then set the character handling and
+ * LYHaveCJKCharacterSet flag. - FM
+ */
+void HTMLUseCharacterSet(int i)
+{
+ HTMLSetRawModeDefault(i);
+ p_entity_values = LYCharSets[i];
+ HTMLSetCharacterHandling(i); /* set LYRawMode and CJK attributes */
+ HTMLSetHaveCJKCharacterSet(i);
+ HTMLSetDisplayCharsetMatchLocale(i);
+ return;
+}
+
+/*
+ * Initializer, calls initialization function for the CHARTRANS handling. - KW
+ */
+int LYCharSetsDeclared(void)
+{
+ UCInit();
+
+ return UCInitialized;
+}
+
+#ifdef USE_CHARSET_CHOICE
+void init_charset_subsets(void)
+{
+ int i, n;
+ int cur_display = 0;
+ int cur_assumed = 0;
+
+ /* add them to displayed values */
+ charset_subsets[UCLYhndl_for_unspec].hide_assumed = FALSE;
+ charset_subsets[current_char_set].hide_display = FALSE;
+
+#ifndef ALL_CHARSETS_IN_O_MENU_SCREEN
+ /*all this stuff is for supporting old menu screen... */
+ for (i = 0; i < LYNumCharsets; ++i) {
+ if (charset_subsets[i].hide_display == FALSE) {
+ n = cur_display++;
+ if (i == current_char_set)
+ displayed_display_charset_idx = n;
+ display_charset_map[n] = i;
+ display_charset_choices[n] = LYchar_set_names[i];
+ }
+ if (charset_subsets[i].hide_assumed == FALSE) {
+ n = cur_assumed++;
+ assumed_doc_charset_map[n] = i;
+ assumed_charset_choices[n] = LYCharSet_UC[i].MIMEname;
+ charset_subsets[i].assumed_idx = n;
+ }
+ display_charset_choices[cur_display] = NULL;
+ assumed_charset_choices[cur_assumed] = NULL;
+ }
+#endif
+}
+#endif /* USE_CHARSET_CHOICE */