summaryrefslogtreecommitdiffstats
path: root/encoding.c
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--encoding.c2141
1 files changed, 2141 insertions, 0 deletions
diff --git a/encoding.c b/encoding.c
new file mode 100644
index 0000000..976978a
--- /dev/null
+++ b/encoding.c
@@ -0,0 +1,2141 @@
+/* Copyright (c) 1993-2003
+ * Juergen Weigert (jnweiger@immd4.informatik.uni-erlangen.de)
+ * Michael Schroeder (mlschroe@immd4.informatik.uni-erlangen.de)
+ * Copyright (c) 1987 Oliver Laumann
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program (see the file COPYING); if not, see
+ * https://www.gnu.org/licenses/, or contact Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301 USA
+ *
+ ****************************************************************
+ */
+
+#include <sys/types.h>
+
+#include "config.h"
+#include "screen.h"
+#include "extern.h"
+
+#ifdef ENCODINGS
+
+extern unsigned char *null;
+extern struct display *display, *displays;
+extern struct layer *flayer;
+
+extern char *screenencodings;
+
+#ifdef DW_CHARS
+extern int cjkwidth;
+#endif
+
+static int encmatch __P((char *, char *));
+# ifdef UTF8
+static int recode_char __P((int, int, int));
+static int recode_char_to_encoding __P((int, int));
+static void comb_tofront __P((int));
+# ifdef DW_CHARS
+static int recode_char_dw __P((int, int *, int, int));
+static int recode_char_dw_to_encoding __P((int, int *, int));
+# endif
+# endif
+
+struct encoding {
+ char *name;
+ char *charsets;
+ int deffont;
+ int usegr;
+ int noc1;
+ char *fontlist;
+};
+
+/* big5 font: ^X */
+/* KOI8-R font: 96 ! */
+/* CP1251 font: 96 ? */
+
+struct encoding encodings[] = {
+ { "C", 0, 0, 0, 0, 0 },
+ { "eucJP", "B\002I\00401", 0, 1, 0, "\002\004I" },
+ { "SJIS", "BIBB01", 0, 1, 1, "\002I" },
+ { "eucKR", "B\003BB01", 0, 1, 0, "\003" },
+ { "eucCN", "B\001BB01", 0, 1, 0, "\001" },
+ { "Big5", "B\030BB01", 0, 1, 0, "\030" },
+ { "KOI8-R", 0, 0x80|'!', 0, 1, 0 },
+ { "CP1251", 0, 0x80|'?', 0, 1, 0 },
+ { "UTF-8", 0, -1, 0, 0, 0 },
+ { "ISO8859-2", 0, 0x80|'B', 0, 0, 0 },
+ { "ISO8859-3", 0, 0x80|'C', 0, 0, 0 },
+ { "ISO8859-4", 0, 0x80|'D', 0, 0, 0 },
+ { "ISO8859-5", 0, 0x80|'L', 0, 0, 0 },
+ { "ISO8859-6", 0, 0x80|'G', 0, 0, 0 },
+ { "ISO8859-7", 0, 0x80|'F', 0, 0, 0 },
+ { "ISO8859-8", 0, 0x80|'H', 0, 0, 0 },
+ { "ISO8859-9", 0, 0x80|'M', 0, 0, 0 },
+ { "ISO8859-10", 0, 0x80|'V', 0, 0, 0 },
+ { "ISO8859-15", 0, 0x80|'b', 0, 0, 0 },
+ { "jis", 0, 0, 0, 0, "\002\004I" },
+ { "GBK", "B\031BB01", 0x80|'b', 1, 1, "\031" }
+};
+
+#ifdef UTF8
+
+static unsigned short builtin_tabs[][2] = {
+ { 0x30, 0 }, /* 0: special graphics (line drawing) */
+ { 0x005f, 0x25AE },
+ { 0x0060, 0x25C6 },
+ { 0x0061, 0x2592 },
+ { 0x0062, 0x2409 },
+ { 0x0063, 0x240C },
+ { 0x0064, 0x240D },
+ { 0x0065, 0x240A },
+ { 0x0066, 0x00B0 },
+ { 0x0067, 0x00B1 },
+ { 0x0068, 0x2424 },
+ { 0x0069, 0x240B },
+ { 0x006a, 0x2518 },
+ { 0x006b, 0x2510 },
+ { 0x006c, 0x250C },
+ { 0x006d, 0x2514 },
+ { 0x006e, 0x253C },
+ { 0x006f, 0x23BA },
+ { 0x0070, 0x23BB },
+ { 0x0071, 0x2500 },
+ { 0x0072, 0x23BC },
+ { 0x0073, 0x23BD },
+ { 0x0074, 0x251C },
+ { 0x0075, 0x2524 },
+ { 0x0076, 0x2534 },
+ { 0x0077, 0x252C },
+ { 0x0078, 0x2502 },
+ { 0x0079, 0x2264 },
+ { 0x007a, 0x2265 },
+ { 0x007b, 0x03C0 },
+ { 0x007c, 0x2260 },
+ { 0x007d, 0x00A3 },
+ { 0x007e, 0x00B7 },
+ { 0, 0},
+
+ { 0x34, 0 }, /* 4: Dutch */
+ { 0x0023, 0x00a3 },
+ { 0x0040, 0x00be },
+ { 0x005b, 0x00ff },
+ { 0x005c, 0x00bd },
+ { 0x005d, 0x007c },
+ { 0x007b, 0x00a8 },
+ { 0x007c, 0x0066 },
+ { 0x007d, 0x00bc },
+ { 0x007e, 0x00b4 },
+ { 0, 0},
+
+ { 0x35, 0 }, /* 5: Finnish */
+ { 0x005b, 0x00c4 },
+ { 0x005c, 0x00d6 },
+ { 0x005d, 0x00c5 },
+ { 0x005e, 0x00dc },
+ { 0x0060, 0x00e9 },
+ { 0x007b, 0x00e4 },
+ { 0x007c, 0x00f6 },
+ { 0x007d, 0x00e5 },
+ { 0x007e, 0x00fc },
+ { 0, 0},
+
+ { 0x36, 0 }, /* 6: Norwegian/Danish */
+ { 0x0040, 0x00c4 },
+ { 0x005b, 0x00c6 },
+ { 0x005c, 0x00d8 },
+ { 0x005d, 0x00c5 },
+ { 0x005e, 0x00dc },
+ { 0x0060, 0x00e4 },
+ { 0x007b, 0x00e6 },
+ { 0x007c, 0x00f8 },
+ { 0x007d, 0x00e5 },
+ { 0x007e, 0x00fc },
+ { 0, 0},
+
+ { 0x37, 0 }, /* 7: Swedish */
+ { 0x0040, 0x00c9 },
+ { 0x005b, 0x00c4 },
+ { 0x005c, 0x00d6 },
+ { 0x005d, 0x00c5 },
+ { 0x005e, 0x00dc },
+ { 0x0060, 0x00e9 },
+ { 0x007b, 0x00e4 },
+ { 0x007c, 0x00f6 },
+ { 0x007d, 0x00e5 },
+ { 0x007e, 0x00fc },
+ { 0, 0},
+
+ { 0x3d, 0}, /* =: Swiss */
+ { 0x0023, 0x00f9 },
+ { 0x0040, 0x00e0 },
+ { 0x005b, 0x00e9 },
+ { 0x005c, 0x00e7 },
+ { 0x005d, 0x00ea },
+ { 0x005e, 0x00ee },
+ { 0x005f, 0x00e8 },
+ { 0x0060, 0x00f4 },
+ { 0x007b, 0x00e4 },
+ { 0x007c, 0x00f6 },
+ { 0x007d, 0x00fc },
+ { 0x007e, 0x00fb },
+ { 0, 0},
+
+ { 0x41, 0}, /* A: UK */
+ { 0x0023, 0x00a3 },
+ { 0, 0},
+
+ { 0x4b, 0}, /* K: German */
+ { 0x0040, 0x00a7 },
+ { 0x005b, 0x00c4 },
+ { 0x005c, 0x00d6 },
+ { 0x005d, 0x00dc },
+ { 0x007b, 0x00e4 },
+ { 0x007c, 0x00f6 },
+ { 0x007d, 0x00fc },
+ { 0x007e, 0x00df },
+ { 0, 0},
+
+ { 0x51, 0}, /* Q: French Canadian */
+ { 0x0040, 0x00e0 },
+ { 0x005b, 0x00e2 },
+ { 0x005c, 0x00e7 },
+ { 0x005d, 0x00ea },
+ { 0x005e, 0x00ee },
+ { 0x0060, 0x00f4 },
+ { 0x007b, 0x00e9 },
+ { 0x007c, 0x00f9 },
+ { 0x007d, 0x00e8 },
+ { 0x007e, 0x00fb },
+ { 0, 0},
+
+ { 0x52, 0}, /* R: French */
+ { 0x0023, 0x00a3 },
+ { 0x0040, 0x00e0 },
+ { 0x005b, 0x00b0 },
+ { 0x005c, 0x00e7 },
+ { 0x005d, 0x00a7 },
+ { 0x007b, 0x00e9 },
+ { 0x007c, 0x00f9 },
+ { 0x007d, 0x00e8 },
+ { 0x007e, 0x00a8 },
+ { 0, 0},
+
+ { 0x59, 0}, /* Y: Italian */
+ { 0x0023, 0x00a3 },
+ { 0x0040, 0x00a7 },
+ { 0x005b, 0x00b0 },
+ { 0x005c, 0x00e7 },
+ { 0x005d, 0x00e9 },
+ { 0x0060, 0x00f9 },
+ { 0x007b, 0x00e0 },
+ { 0x007c, 0x00f2 },
+ { 0x007d, 0x00e8 },
+ { 0x007e, 0x00ec },
+ { 0, 0},
+
+ { 0x5a, 0}, /* Z: Spanish */
+ { 0x0023, 0x00a3 },
+ { 0x0040, 0x00a7 },
+ { 0x005b, 0x00a1 },
+ { 0x005c, 0x00d1 },
+ { 0x005d, 0x00bf },
+ { 0x007b, 0x00b0 },
+ { 0x007c, 0x00f1 },
+ { 0x007d, 0x00e7 },
+ { 0, 0},
+
+ { 0xe2, 0}, /* 96-b: ISO-8859-15 */
+ { 0x00a4, 0x20ac },
+ { 0x00a6, 0x0160 },
+ { 0x00a8, 0x0161 },
+ { 0x00b4, 0x017D },
+ { 0x00b8, 0x017E },
+ { 0x00bc, 0x0152 },
+ { 0x00bd, 0x0153 },
+ { 0x00be, 0x0178 },
+ { 0, 0},
+
+ { 0x4a, 0}, /* J: JIS 0201 Roman */
+ { 0x005c, 0x00a5 },
+ { 0x007e, 0x203e },
+ { 0, 0},
+
+ { 0x49, 0}, /* I: halfwidth katakana */
+ { 0x0021, 0xff61 },
+ { 0x005f|0x8000, 0xff9f },
+ { 0, 0},
+
+ { 0, 0}
+};
+
+struct recodetab
+{
+ unsigned short (*tab)[2];
+ int flags;
+};
+
+#define RECODETAB_ALLOCED 1
+#define RECODETAB_BUILTIN 2
+#define RECODETAB_TRIED 4
+
+static struct recodetab recodetabs[256];
+
+void
+InitBuiltinTabs()
+{
+ unsigned short (*p)[2];
+ for (p = builtin_tabs; (*p)[0]; p++)
+ {
+ recodetabs[(*p)[0]].flags = RECODETAB_BUILTIN;
+ recodetabs[(*p)[0]].tab = p + 1;
+ p++;
+ while((*p)[0])
+ p++;
+ }
+}
+
+static int
+recode_char(c, to_utf, font)
+int c, to_utf, font;
+{
+ int f;
+ unsigned short (*p)[2];
+
+ if (to_utf)
+ {
+ if (c < 256)
+ return c;
+ f = (c >> 8) & 0xff;
+ c &= 0xff;
+ /* map aliases to keep the table small */
+ switch (f)
+ {
+ case 'C':
+ f ^= ('C' ^ '5');
+ break;
+ case 'E':
+ f ^= ('E' ^ '6');
+ break;
+ case 'H':
+ f ^= ('H' ^ '7');
+ break;
+ default:
+ break;
+ }
+ p = recodetabs[f].tab;
+ if (p == 0 && recodetabs[f].flags == 0)
+ {
+ LoadFontTranslation(f, 0);
+ p = recodetabs[f].tab;
+ }
+ if (p)
+ for (; (*p)[0]; p++)
+ {
+ if ((p[0][0] & 0x8000) && (c <= (p[0][0] & 0x7fff)) && c >= p[-1][0])
+ return c - p[-1][0] + p[-1][1];
+ if ((*p)[0] == c)
+ return (*p)[1];
+ }
+ return c & 0xff; /* map to latin1 */
+ }
+ if (font == -1)
+ {
+ if (c < 256)
+ return c; /* latin1 */
+ for (font = 32; font < 128; font++)
+ {
+ p = recodetabs[font].tab;
+ if (p)
+ for (; (*p)[1]; p++)
+ {
+ if ((p[0][0] & 0x8000) && c <= p[0][1] && c >= p[-1][1])
+ return (c - p[-1][1] + p[-1][0]) | (font << 8);
+ if ((*p)[1] == c)
+ return (*p)[0] | (font << 8);
+ }
+ }
+ return '?';
+ }
+ if (c < 128 && (font & 128) != 0)
+ return c;
+ if (font >= 32)
+ {
+ p = recodetabs[font].tab;
+ if (p == 0 && recodetabs[font].flags == 0)
+ {
+ LoadFontTranslation(font, 0);
+ p = recodetabs[font].tab;
+ }
+ if (p)
+ for (; (*p)[1]; p++)
+ {
+ if ((p[0][0] & 0x8000) && c <= p[0][1] && c >= p[-1][1])
+ return (c - p[-1][1] + p[-1][0]) | (font & 128 ? 0 : font << 8);
+ if ((*p)[1] == c)
+ return (*p)[0] | (font & 128 ? 0 : font << 8);
+ }
+ }
+ return -1;
+}
+
+
+#ifdef DW_CHARS
+static int
+recode_char_dw(c, c2p, to_utf, font)
+int c, *c2p, to_utf, font;
+{
+ int f;
+ unsigned short (*p)[2];
+
+ if (to_utf)
+ {
+ f = (c >> 8) & 0xff;
+ c = (c & 255) << 8 | (*c2p & 255);
+ *c2p = 0xffff;
+ p = recodetabs[f].tab;
+ if (p == 0 && recodetabs[f].flags == 0)
+ {
+ LoadFontTranslation(f, 0);
+ p = recodetabs[f].tab;
+ }
+ if (p)
+ for (; (*p)[0]; p++)
+ if ((*p)[0] == c)
+ {
+#ifdef DW_CHARS
+ if (!utf8_isdouble((*p)[1]))
+ *c2p = ' ';
+#endif
+ return (*p)[1];
+ }
+ return UCS_REPL_DW;
+ }
+ if (font == -1)
+ {
+ for (font = 0; font < 030; font++)
+ {
+ p = recodetabs[font].tab;
+ if (p)
+ for (; (*p)[1]; p++)
+ if ((*p)[1] == c)
+ {
+ *c2p = ((*p)[0] & 255) | font << 8 | 0x8000;
+ return ((*p)[0] >> 8) | font << 8;
+ }
+ }
+ *c2p = '?';
+ return '?';
+ }
+ if (font < 32)
+ {
+ p = recodetabs[font].tab;
+ if (p == 0 && recodetabs[font].flags == 0)
+ {
+ LoadFontTranslation(font, 0);
+ p = recodetabs[font].tab;
+ }
+ if (p)
+ for (; (*p)[1]; p++)
+ if ((*p)[1] == c)
+ {
+ *c2p = ((*p)[0] & 255) | font << 8 | 0x8000;
+ return ((*p)[0] >> 8) | font << 8;
+ }
+ }
+ return -1;
+}
+#endif
+
+static int
+recode_char_to_encoding(c, encoding)
+int c, encoding;
+{
+ char *fp;
+ int x;
+
+ if (encoding == UTF8)
+ return recode_char(c, 1, -1);
+ if ((fp = encodings[encoding].fontlist) != 0)
+ while(*fp)
+ if ((x = recode_char(c, 0, (unsigned char)*fp++)) != -1)
+ return x;
+ if (encodings[encoding].deffont)
+ if ((x = recode_char(c, 0, encodings[encoding].deffont)) != -1)
+ return x;
+ return recode_char(c, 0, -1);
+}
+
+#ifdef DW_CHARS
+static int
+recode_char_dw_to_encoding(c, c2p, encoding)
+int c, *c2p, encoding;
+{
+ char *fp;
+ int x;
+
+ if (encoding == UTF8)
+ return recode_char_dw(c, c2p, 1, -1);
+ if ((fp = encodings[encoding].fontlist) != 0)
+ while(*fp)
+ if ((x = recode_char_dw(c, c2p, 0, (unsigned char)*fp++)) != -1)
+ return x;
+ if (encodings[encoding].deffont)
+ if ((x = recode_char_dw(c, c2p, 0, encodings[encoding].deffont)) != -1)
+ return x;
+ return recode_char_dw(c, c2p, 0, -1);
+}
+#endif
+
+
+struct mchar *
+recode_mchar(mc, from, to)
+struct mchar *mc;
+int from, to;
+{
+ static struct mchar rmc;
+ int c;
+
+ debug3("recode_mchar %02x from %d to %d\n", mc->image, from, to);
+ if (from == to || (from != UTF8 && to != UTF8))
+ return mc;
+ rmc = *mc;
+ if (rmc.font == 0 && from != UTF8)
+ rmc.font = encodings[from].deffont;
+ if (rmc.font == 0) /* latin1 is the same in unicode */
+ return mc;
+ c = rmc.image | (rmc.font << 8);
+ if (from == UTF8)
+ c |= rmc.fontx << 16;
+#ifdef DW_CHARS
+ if (rmc.mbcs)
+ {
+ int c2 = rmc.mbcs;
+ c = recode_char_dw_to_encoding(c, &c2, to);
+ rmc.mbcs = c2;
+ }
+ else
+#endif
+ c = recode_char_to_encoding(c, to);
+ rmc.image = c & 255;
+ rmc.font = c >> 8 & 255;
+ if (to == UTF8)
+ rmc.fontx = c >> 16 & 255;
+ return &rmc;
+}
+
+struct mline *
+recode_mline(ml, w, from, to)
+struct mline *ml;
+int w;
+int from, to;
+{
+ static int maxlen;
+ static int last;
+ static struct mline rml[2], *rl;
+ int i, c;
+
+ if (from == to || (from != UTF8 && to != UTF8) || w == 0)
+ return ml;
+ if (ml->font == null && ml->fontx == null && encodings[from].deffont == 0)
+ return ml;
+ if (w > maxlen)
+ {
+ for (i = 0; i < 2; i++)
+ {
+ if (rml[i].image == 0)
+ rml[i].image = malloc(w);
+ else
+ rml[i].image = realloc(rml[i].image, w);
+ if (rml[i].font == 0)
+ rml[i].font = malloc(w);
+ else
+ rml[i].font = realloc(rml[i].font, w);
+ if (rml[i].fontx == 0)
+ rml[i].fontx = malloc(w);
+ else
+ rml[i].fontx = realloc(rml[i].fontx, w);
+ if (rml[i].image == 0 || rml[i].font == 0 || rml[i].fontx == 0)
+ {
+ maxlen = 0;
+ return ml; /* sorry */
+ }
+ }
+ maxlen = w;
+ }
+
+ debug("recode_mline: from\n");
+ for (i = 0; i < w; i++)
+ debug1("%c", "0123456789abcdef"[(ml->image[i] >> 4) & 15]);
+ debug("\n");
+ for (i = 0; i < w; i++)
+ debug1("%c", "0123456789abcdef"[(ml->image[i] ) & 15]);
+ debug("\n");
+ for (i = 0; i < w; i++)
+ debug1("%c", "0123456789abcdef"[(ml->font[i] >> 4) & 15]);
+ debug("\n");
+ for (i = 0; i < w; i++)
+ debug1("%c", "0123456789abcdef"[(ml->font[i] ) & 15]);
+ debug("\n");
+ for (i = 0; i < w; i++)
+ debug1("%c", "0123456789abcdef"[(ml->fontx[i] >> 4) & 15]);
+ debug("\n");
+ for (i = 0; i < w; i++)
+ debug1("%c", "0123456789abcdef"[(ml->fontx[i] ) & 15]);
+ debug("\n");
+
+ rl = rml + last;
+ rl->attr = ml->attr;
+#ifdef COLOR
+ rl->color = ml->color;
+# ifdef COLORS256
+ rl->colorx = ml->colorx;
+# endif
+#endif
+ for (i = 0; i < w; i++)
+ {
+ c = ml->image[i] | (ml->font[i] << 8);
+ if (from == UTF8)
+ c |= ml->fontx[i] << 16;
+ if (from != UTF8 && c < 256)
+ c |= encodings[from].deffont << 8;
+#ifdef DW_CHARS
+ if ((from != UTF8 && (c & 0x1f00) != 0 && (c & 0xe000) == 0) || (from == UTF8 && utf8_isdouble(c)))
+ {
+ if (i + 1 == w)
+ c = '?';
+ else
+ {
+ int c2;
+ i++;
+ c2 = ml->image[i] | (ml->font[i] << 8);
+ c = recode_char_dw_to_encoding(c, &c2, to);
+ if (to == UTF8)
+ rl->fontx[i - 1] = c >> 16 & 255;
+ rl->font[i - 1] = c >> 8 & 255;
+ rl->image[i - 1] = c & 255;
+ c = c2;
+ }
+ }
+ else
+#endif
+ c = recode_char_to_encoding(c, to);
+ rl->image[i] = c & 255;
+ rl->font[i] = c >> 8 & 255;
+ if (to == UTF8)
+ rl->fontx[i] = c >> 16 & 255;
+ }
+ last ^= 1;
+ debug("recode_mline: to\n");
+ for (i = 0; i < w; i++)
+ debug1("%c", "0123456789abcdef"[(rl->image[i] >> 4) & 15]);
+ debug("\n");
+ for (i = 0; i < w; i++)
+ debug1("%c", "0123456789abcdef"[(rl->image[i] ) & 15]);
+ debug("\n");
+ for (i = 0; i < w; i++)
+ debug1("%c", "0123456789abcdef"[(rl->font[i] >> 4) & 15]);
+ debug("\n");
+ for (i = 0; i < w; i++)
+ debug1("%c", "0123456789abcdef"[(rl->font[i] ) & 15]);
+ debug("\n");
+ for (i = 0; i < w; i++)
+ debug1("%c", "0123456789abcdef"[(rl->fontx[i] >> 4) & 15]);
+ debug("\n");
+ for (i = 0; i < w; i++)
+ debug1("%c", "0123456789abcdef"[(rl->fontx[i] ) & 15]);
+ debug("\n");
+ return rl;
+}
+
+struct combchar {
+ unsigned int c1;
+ unsigned int c2;
+ unsigned int next;
+ unsigned int prev;
+};
+struct combchar **combchars;
+
+void
+AddUtf8(c)
+int c;
+{
+ ASSERT(D_encoding == UTF8);
+ if (c >= 0xd800 && c < 0xe000 && combchars && combchars[c - 0xd800])
+ {
+ AddUtf8(combchars[c - 0xd800]->c1);
+ c = combchars[c - 0xd800]->c2;
+ }
+
+ /* replace out of range values with U+FFFD "replacement character" */
+ if (c < 0 || c > 0x10ffff)
+ c = 0xfffd;
+
+ if (c >= 0x10000)
+ {
+ AddChar((c & 0x1c0000) >> 18 ^ 0xf0);
+ c = (c & 0x3ffff) ^ ((0xe0 ^ 0x80) << 12);
+ }
+ if (c >= 0x800)
+ {
+ AddChar((c & 0x7f000) >> 12 ^ 0xe0);
+ c = (c & 0x0fff) ^ ((0xc0 ^ 0x80) << 6);
+ }
+ if (c >= 0x80)
+ {
+ AddChar((c & 0x1fc0) >> 6 ^ 0xc0);
+ c = (c & 0x3f) | 0x80;
+ }
+ AddChar(c);
+}
+
+int
+ToUtf8_comb(p, c)
+char *p;
+int c;
+{
+ int l;
+
+ if (c >= 0xd800 && c < 0xe000 && combchars && combchars[c - 0xd800])
+ {
+ l = ToUtf8_comb(p, combchars[c - 0xd800]->c1);
+ return l + ToUtf8(p ? p + l : 0, combchars[c - 0xd800]->c2);
+ }
+ return ToUtf8(p, c);
+}
+
+int
+ToUtf8(p, c)
+char *p;
+int c;
+{
+ int l = 1;
+ /* replace out of range values with U+FFFD "replacement character" */
+ if (c < 0 || c > 0x10ffff)
+ c = 0xfffd;
+
+ if (c >= 0x10000)
+ {
+ if (p)
+ *p++ = (c & 0x1c0000) >> 18 ^ 0xf0;
+ l++;
+ c = (c & 0x3ffff) ^ ((0xe0 ^ 0x80) << 12);
+ }
+ if (c >= 0x800)
+ {
+ if (p)
+ *p++ = (c & 0x7f000) >> 12 ^ 0xe0;
+ l++;
+ c = (c & 0x0fff) | 0x1000;
+ }
+ if (c >= 0x80)
+ {
+ if (p)
+ *p++ = (c & 0x1fc0) >> 6 ^ 0xc0;
+ l++;
+ c = (c & 0x3f) | 0x80;
+ }
+ if (p)
+ *p++ = c;
+ return l;
+}
+
+/*
+ * returns:
+ * -1: need more bytes, sequence not finished
+ * -2: corrupt sequence found, redo last char
+ * >= 0: decoded character
+ */
+int
+FromUtf8(c, utf8charp)
+int c, *utf8charp;
+{
+ int utf8char = *utf8charp;
+ if (utf8char)
+ {
+ if ((c & 0xc0) != 0x80)
+ {
+ *utf8charp = 0;
+ return -2; /* corrupt sequence! */
+ }
+ else
+ c = (c & 0x3f) | (utf8char << 6);
+ if (!(utf8char & 0x40000000))
+ {
+ /* check for overlong sequences */
+ if ((c & 0x820823e0) == 0x80000000)
+ c = 0xfdffffff;
+ else if ((c & 0x020821f0) == 0x02000000)
+ c = 0xfff7ffff;
+ else if ((c & 0x000820f8) == 0x00080000)
+ c = 0xffffd000;
+ else if ((c & 0x0000207c) == 0x00002000)
+ c = 0xffffff70;
+ }
+ }
+ else
+ {
+ /* new sequence */
+ if (c >= 0xfe)
+ c = UCS_REPL;
+ else if (c >= 0xfc)
+ c = (c & 0x01) | 0xbffffffc; /* 5 bytes to follow */
+ else if (c >= 0xf8)
+ c = (c & 0x03) | 0xbfffff00; /* 4 */
+ else if (c >= 0xf0)
+ c = (c & 0x07) | 0xbfffc000; /* 3 */
+ else if (c >= 0xe0)
+ c = (c & 0x0f) | 0xbff00000; /* 2 */
+ else if (c >= 0xc2)
+ c = (c & 0x1f) | 0xfc000000; /* 1 */
+ else if (c >= 0xc0)
+ c = 0xfdffffff; /* overlong */
+ else if (c >= 0x80)
+ c = UCS_REPL;
+ }
+ *utf8charp = utf8char = (c & 0x80000000) ? c : 0;
+ if (utf8char)
+ return -1;
+#if 0
+ if (c & 0xffff0000)
+ c = UCS_REPL; /* sorry, only know 16bit Unicode */
+#else
+ if (c & 0xff800000)
+ c = UCS_REPL; /* sorry, only know 23bit Unicode */
+#endif
+ if (c >= 0xd800 && (c <= 0xdfff || c == 0xfffe || c == 0xffff))
+ c = UCS_REPL; /* illegal code */
+ return c;
+}
+
+
+void
+WinSwitchEncoding(p, encoding)
+struct win *p;
+int encoding;
+{
+ int i, j, c;
+ struct mline *ml;
+ struct display *d;
+ struct canvas *cv;
+ struct layer *oldflayer;
+
+ if ((p->w_encoding == UTF8) == (encoding == UTF8))
+ {
+ p->w_encoding = encoding;
+ return;
+ }
+ oldflayer = flayer;
+ for (d = displays; d; d = d->d_next)
+ for (cv = d->d_cvlist; cv; cv = cv->c_next)
+ if (p == Layer2Window(cv->c_layer))
+ {
+ flayer = cv->c_layer;
+ while(flayer->l_next)
+ {
+ if (oldflayer == flayer)
+ oldflayer = flayer->l_next;
+ ExitOverlayPage();
+ }
+ }
+ flayer = oldflayer;
+ for (j = 0; j < p->w_height + p->w_histheight; j++)
+ {
+#ifdef COPY_PASTE
+ ml = j < p->w_height ? &p->w_mlines[j] : &p->w_hlines[j - p->w_height];
+#else
+ ml = &p->w_mlines[j];
+#endif
+ if (ml->font == null && ml->fontx == 0 && encodings[p->w_encoding].deffont == 0)
+ continue;
+ for (i = 0; i < p->w_width; i++)
+ {
+ c = ml->image[i] | (ml->font[i] << 8);
+ if (p->w_encoding == UTF8)
+ c |= ml->fontx[i] << 16;
+ if (p->w_encoding != UTF8 && c < 256)
+ c |= encodings[p->w_encoding].deffont << 8;
+ if (c < 256)
+ continue;
+ if (ml->font == null)
+ {
+ if ((ml->font = (unsigned char *)calloc(p->w_width + 1, 1)) == 0)
+ {
+ ml->font = null;
+ break;
+ }
+ }
+#ifdef DW_CHARS
+ if ((p->w_encoding != UTF8 && (c & 0x1f00) != 0 && (c & 0xe000) == 0) || (p->w_encoding == UTF8 && utf8_isdouble(c)))
+ {
+ if (i + 1 == p->w_width)
+ c = '?';
+ else
+ {
+ int c2;
+ i++;
+ c2 = ml->image[i] | (ml->font[i] << 8) | (ml->fontx[i] << 16);
+ c = recode_char_dw_to_encoding(c, &c2, encoding);
+ if (encoding == UTF8)
+ {
+ if (c > 0x10000 && ml->fontx == null)
+ {
+ if ((ml->fontx = (unsigned char *)calloc(p->w_width + 1, 1)) == 0)
+ {
+ ml->fontx = null;
+ break;
+ }
+ }
+ ml->fontx[i - 1] = c >> 16 & 255;
+ }
+ else
+ ml->fontx = null;
+ ml->font[i - 1] = c >> 8 & 255;
+ ml->image[i - 1] = c & 255;
+ c = c2;
+ }
+ }
+ else
+#endif
+ c = recode_char_to_encoding(c, encoding);
+ ml->image[i] = c & 255;
+ ml->font[i] = c >> 8 & 255;
+ if (encoding == UTF8)
+ {
+ if (c > 0x10000 && ml->fontx == null)
+ {
+ if ((ml->fontx = (unsigned char *)calloc(p->w_width + 1, 1)) == 0)
+ {
+ ml->fontx = null;
+ break;
+ }
+ }
+ ml->fontx[i] = c >> 16 & 255;
+ }
+ else
+ ml->fontx = null;
+ }
+ }
+ p->w_encoding = encoding;
+ return;
+}
+
+#ifdef DW_CHARS
+struct interval {
+ int first;
+ int last;
+};
+
+/* auxiliary function for binary search in interval table */
+static int bisearch(int ucs, const struct interval *table, int max) {
+ int min = 0;
+ int mid;
+
+ if (ucs < table[0].first || ucs > table[max].last)
+ return 0;
+ while (max >= min) {
+ mid = (min + max) / 2;
+ if (ucs > table[mid].last)
+ min = mid + 1;
+ else if (ucs < table[mid].first)
+ max = mid - 1;
+ else
+ return 1;
+ }
+
+ return 0;
+}
+
+int
+utf8_isdouble(c)
+int c;
+{
+ /* A sorted list of intervals of ambiguous width characters generated by
+ * https://github.com/GNOME/glib/blob/glib-2-50/glib/gen-unicode-tables.pl */
+ static const struct interval ambiguous[] = {
+ {0x00A1, 0x00A1},
+ {0x00A4, 0x00A4},
+ {0x00A7, 0x00A8},
+ {0x00AA, 0x00AA},
+ {0x00AD, 0x00AE},
+ {0x00B0, 0x00B4},
+ {0x00B6, 0x00BA},
+ {0x00BC, 0x00BF},
+ {0x00C6, 0x00C6},
+ {0x00D0, 0x00D0},
+ {0x00D7, 0x00D8},
+ {0x00DE, 0x00E1},
+ {0x00E6, 0x00E6},
+ {0x00E8, 0x00EA},
+ {0x00EC, 0x00ED},
+ {0x00F0, 0x00F0},
+ {0x00F2, 0x00F3},
+ {0x00F7, 0x00FA},
+ {0x00FC, 0x00FC},
+ {0x00FE, 0x00FE},
+ {0x0101, 0x0101},
+ {0x0111, 0x0111},
+ {0x0113, 0x0113},
+ {0x011B, 0x011B},
+ {0x0126, 0x0127},
+ {0x012B, 0x012B},
+ {0x0131, 0x0133},
+ {0x0138, 0x0138},
+ {0x013F, 0x0142},
+ {0x0144, 0x0144},
+ {0x0148, 0x014B},
+ {0x014D, 0x014D},
+ {0x0152, 0x0153},
+ {0x0166, 0x0167},
+ {0x016B, 0x016B},
+ {0x01CE, 0x01CE},
+ {0x01D0, 0x01D0},
+ {0x01D2, 0x01D2},
+ {0x01D4, 0x01D4},
+ {0x01D6, 0x01D6},
+ {0x01D8, 0x01D8},
+ {0x01DA, 0x01DA},
+ {0x01DC, 0x01DC},
+ {0x0251, 0x0251},
+ {0x0261, 0x0261},
+ {0x02C4, 0x02C4},
+ {0x02C7, 0x02C7},
+ {0x02C9, 0x02CB},
+ {0x02CD, 0x02CD},
+ {0x02D0, 0x02D0},
+ {0x02D8, 0x02DB},
+ {0x02DD, 0x02DD},
+ {0x02DF, 0x02DF},
+ {0x0300, 0x036F},
+ {0x0391, 0x03A1},
+ {0x03A3, 0x03A9},
+ {0x03B1, 0x03C1},
+ {0x03C3, 0x03C9},
+ {0x0401, 0x0401},
+ {0x0410, 0x044F},
+ {0x0451, 0x0451},
+ {0x2010, 0x2010},
+ {0x2013, 0x2016},
+ {0x2018, 0x2019},
+ {0x201C, 0x201D},
+ {0x2020, 0x2022},
+ {0x2024, 0x2027},
+ {0x2030, 0x2030},
+ {0x2032, 0x2033},
+ {0x2035, 0x2035},
+ {0x203B, 0x203B},
+ {0x203E, 0x203E},
+ {0x2074, 0x2074},
+ {0x207F, 0x207F},
+ {0x2081, 0x2084},
+ {0x20AC, 0x20AC},
+ {0x2103, 0x2103},
+ {0x2105, 0x2105},
+ {0x2109, 0x2109},
+ {0x2113, 0x2113},
+ {0x2116, 0x2116},
+ {0x2121, 0x2122},
+ {0x2126, 0x2126},
+ {0x212B, 0x212B},
+ {0x2153, 0x2154},
+ {0x215B, 0x215E},
+ {0x2160, 0x216B},
+ {0x2170, 0x2179},
+ {0x2189, 0x2189},
+ {0x2190, 0x2199},
+ {0x21B8, 0x21B9},
+ {0x21D2, 0x21D2},
+ {0x21D4, 0x21D4},
+ {0x21E7, 0x21E7},
+ {0x2200, 0x2200},
+ {0x2202, 0x2203},
+ {0x2207, 0x2208},
+ {0x220B, 0x220B},
+ {0x220F, 0x220F},
+ {0x2211, 0x2211},
+ {0x2215, 0x2215},
+ {0x221A, 0x221A},
+ {0x221D, 0x2220},
+ {0x2223, 0x2223},
+ {0x2225, 0x2225},
+ {0x2227, 0x222C},
+ {0x222E, 0x222E},
+ {0x2234, 0x2237},
+ {0x223C, 0x223D},
+ {0x2248, 0x2248},
+ {0x224C, 0x224C},
+ {0x2252, 0x2252},
+ {0x2260, 0x2261},
+ {0x2264, 0x2267},
+ {0x226A, 0x226B},
+ {0x226E, 0x226F},
+ {0x2282, 0x2283},
+ {0x2286, 0x2287},
+ {0x2295, 0x2295},
+ {0x2299, 0x2299},
+ {0x22A5, 0x22A5},
+ {0x22BF, 0x22BF},
+ {0x2312, 0x2312},
+ {0x2460, 0x24E9},
+ {0x24EB, 0x254B},
+ {0x2550, 0x2573},
+ {0x2580, 0x258F},
+ {0x2592, 0x2595},
+ {0x25A0, 0x25A1},
+ {0x25A3, 0x25A9},
+ {0x25B2, 0x25B3},
+ {0x25B6, 0x25B7},
+ {0x25BC, 0x25BD},
+ {0x25C0, 0x25C1},
+ {0x25C6, 0x25C8},
+ {0x25CB, 0x25CB},
+ {0x25CE, 0x25D1},
+ {0x25E2, 0x25E5},
+ {0x25EF, 0x25EF},
+ {0x2605, 0x2606},
+ {0x2609, 0x2609},
+ {0x260E, 0x260F},
+ {0x261C, 0x261C},
+ {0x261E, 0x261E},
+ {0x2640, 0x2640},
+ {0x2642, 0x2642},
+ {0x2660, 0x2661},
+ {0x2663, 0x2665},
+ {0x2667, 0x266A},
+ {0x266C, 0x266D},
+ {0x266F, 0x266F},
+ {0x269E, 0x269F},
+ {0x26BF, 0x26BF},
+ {0x26C6, 0x26CD},
+ {0x26CF, 0x26D3},
+ {0x26D5, 0x26E1},
+ {0x26E3, 0x26E3},
+ {0x26E8, 0x26E9},
+ {0x26EB, 0x26F1},
+ {0x26F4, 0x26F4},
+ {0x26F6, 0x26F9},
+ {0x26FB, 0x26FC},
+ {0x26FE, 0x26FF},
+ {0x273D, 0x273D},
+ {0x2776, 0x277F},
+ {0x2B56, 0x2B59},
+ {0x3248, 0x324F},
+ {0xE000, 0xF8FF},
+ {0xFE00, 0xFE0F},
+ {0xFFFD, 0xFFFD},
+ {0x1F100, 0x1F10A},
+ {0x1F110, 0x1F12D},
+ {0x1F130, 0x1F169},
+ {0x1F170, 0x1F18D},
+ {0x1F18F, 0x1F190},
+ {0x1F19B, 0x1F1AC},
+ {0xE0100, 0xE01EF},
+ {0xF0000, 0xFFFFD},
+ {0x100000, 0x10FFFD},
+ };
+ /* A sorted list of intervals of double width characters generated by
+ * https://github.com/GNOME/glib/blob/glib-2-50/glib/gen-unicode-tables.pl */
+ static const struct interval wide[] = {
+ {0x1100, 0x115F},
+ {0x231A, 0x231B},
+ {0x2329, 0x232A},
+ {0x23E9, 0x23EC},
+ {0x23F0, 0x23F0},
+ {0x23F3, 0x23F3},
+ {0x25FD, 0x25FE},
+ {0x2614, 0x2615},
+ {0x2648, 0x2653},
+ {0x267F, 0x267F},
+ {0x2693, 0x2693},
+ {0x26A1, 0x26A1},
+ {0x26AA, 0x26AB},
+ {0x26BD, 0x26BE},
+ {0x26C4, 0x26C5},
+ {0x26CE, 0x26CE},
+ {0x26D4, 0x26D4},
+ {0x26EA, 0x26EA},
+ {0x26F2, 0x26F3},
+ {0x26F5, 0x26F5},
+ {0x26FA, 0x26FA},
+ {0x26FD, 0x26FD},
+ {0x2705, 0x2705},
+ {0x270A, 0x270B},
+ {0x2728, 0x2728},
+ {0x274C, 0x274C},
+ {0x274E, 0x274E},
+ {0x2753, 0x2755},
+ {0x2757, 0x2757},
+ {0x2795, 0x2797},
+ {0x27B0, 0x27B0},
+ {0x27BF, 0x27BF},
+ {0x2B1B, 0x2B1C},
+ {0x2B50, 0x2B50},
+ {0x2B55, 0x2B55},
+ {0x2E80, 0x2E99},
+ {0x2E9B, 0x2EF3},
+ {0x2F00, 0x2FD5},
+ {0x2FF0, 0x2FFB},
+ {0x3000, 0x303E},
+ {0x3041, 0x3096},
+ {0x3099, 0x30FF},
+ {0x3105, 0x312F},
+ {0x3131, 0x318E},
+ {0x3190, 0x31BA},
+ {0x31C0, 0x31E3},
+ {0x31F0, 0x321E},
+ {0x3220, 0x3247},
+ {0x3250, 0x4DBF},
+ {0x4E00, 0xA48C},
+ {0xA490, 0xA4C6},
+ {0xA960, 0xA97C},
+ {0xAC00, 0xD7A3},
+ {0xF900, 0xFAFF},
+ {0xFE10, 0xFE19},
+ {0xFE30, 0xFE52},
+ {0xFE54, 0xFE66},
+ {0xFE68, 0xFE6B},
+ {0xFF01, 0xFF60},
+ {0xFFE0, 0xFFE6},
+ {0x16FE0, 0x16FE3},
+ {0x17000, 0x187F7},
+ {0x18800, 0x18AF2},
+ {0x1B000, 0x1B11E},
+ {0x1B150, 0x1B152},
+ {0x1B164, 0x1B167},
+ {0x1B170, 0x1B2FB},
+ {0x1F004, 0x1F004},
+ {0x1F0CF, 0x1F0CF},
+ {0x1F18E, 0x1F18E},
+ {0x1F191, 0x1F19A},
+ {0x1F200, 0x1F202},
+ {0x1F210, 0x1F23B},
+ {0x1F240, 0x1F248},
+ {0x1F250, 0x1F251},
+ {0x1F260, 0x1F265},
+ {0x1F300, 0x1F320},
+ {0x1F32D, 0x1F335},
+ {0x1F337, 0x1F37C},
+ {0x1F37E, 0x1F393},
+ {0x1F3A0, 0x1F3CA},
+ {0x1F3CF, 0x1F3D3},
+ {0x1F3E0, 0x1F3F0},
+ {0x1F3F4, 0x1F3F4},
+ {0x1F3F8, 0x1F43E},
+ {0x1F440, 0x1F440},
+ {0x1F442, 0x1F4FC},
+ {0x1F4FF, 0x1F53D},
+ {0x1F54B, 0x1F54E},
+ {0x1F550, 0x1F567},
+ {0x1F57A, 0x1F57A},
+ {0x1F595, 0x1F596},
+ {0x1F5A4, 0x1F5A4},
+ {0x1F5FB, 0x1F64F},
+ {0x1F680, 0x1F6C5},
+ {0x1F6CC, 0x1F6CC},
+ {0x1F6D0, 0x1F6D2},
+ {0x1F6D5, 0x1F6D5},
+ {0x1F6EB, 0x1F6EC},
+ {0x1F6F4, 0x1F6FA},
+ {0x1F7E0, 0x1F7EB},
+ {0x1F90D, 0x1F971},
+ {0x1F973, 0x1F976},
+ {0x1F97A, 0x1F9A2},
+ {0x1F9A5, 0x1F9AA},
+ {0x1F9AE, 0x1F9CA},
+ {0x1F9CD, 0x1F9FF},
+ {0x1FA70, 0x1FA73},
+ {0x1FA78, 0x1FA7A},
+ {0x1FA80, 0x1FA82},
+ {0x1FA90, 0x1FA95},
+ {0x20000, 0x2FFFD},
+ {0x30000, 0x3FFFD},
+ };
+
+ if (c >= 0xdf00 && c <= 0xdfff)
+ return 1; /* dw combining sequence */
+ return ((bisearch(c, wide, sizeof(wide) / sizeof(struct interval) - 1)) ||
+ (cjkwidth &&
+ bisearch(c, ambiguous,
+ sizeof(ambiguous) / sizeof(struct interval) - 1)));
+}
+#endif
+
+int
+utf8_iscomb(c)
+int c;
+{
+ /* taken from Markus Kuhn's wcwidth */
+ static const struct interval combining[] = {
+ { 0x0300, 0x036F }, { 0x0483, 0x0486 }, { 0x0488, 0x0489 },
+ { 0x0591, 0x05BD }, { 0x05BF, 0x05BF }, { 0x05C1, 0x05C2 },
+ { 0x05C4, 0x05C5 }, { 0x05C7, 0x05C7 }, { 0x0600, 0x0603 },
+ { 0x0610, 0x0615 }, { 0x064B, 0x065E }, { 0x0670, 0x0670 },
+ { 0x06D6, 0x06E4 }, { 0x06E7, 0x06E8 }, { 0x06EA, 0x06ED },
+ { 0x070F, 0x070F }, { 0x0711, 0x0711 }, { 0x0730, 0x074A },
+ { 0x07A6, 0x07B0 }, { 0x07EB, 0x07F3 }, { 0x0901, 0x0902 },
+ { 0x093C, 0x093C }, { 0x0941, 0x0948 }, { 0x094D, 0x094D },
+ { 0x0951, 0x0954 }, { 0x0962, 0x0963 }, { 0x0981, 0x0981 },
+ { 0x09BC, 0x09BC }, { 0x09C1, 0x09C4 }, { 0x09CD, 0x09CD },
+ { 0x09E2, 0x09E3 }, { 0x0A01, 0x0A02 }, { 0x0A3C, 0x0A3C },
+ { 0x0A41, 0x0A42 }, { 0x0A47, 0x0A48 }, { 0x0A4B, 0x0A4D },
+ { 0x0A70, 0x0A71 }, { 0x0A81, 0x0A82 }, { 0x0ABC, 0x0ABC },
+ { 0x0AC1, 0x0AC5 }, { 0x0AC7, 0x0AC8 }, { 0x0ACD, 0x0ACD },
+ { 0x0AE2, 0x0AE3 }, { 0x0B01, 0x0B01 }, { 0x0B3C, 0x0B3C },
+ { 0x0B3F, 0x0B3F }, { 0x0B41, 0x0B43 }, { 0x0B4D, 0x0B4D },
+ { 0x0B56, 0x0B56 }, { 0x0B82, 0x0B82 }, { 0x0BC0, 0x0BC0 },
+ { 0x0BCD, 0x0BCD }, { 0x0C3E, 0x0C40 }, { 0x0C46, 0x0C48 },
+ { 0x0C4A, 0x0C4D }, { 0x0C55, 0x0C56 }, { 0x0CBC, 0x0CBC },
+ { 0x0CBF, 0x0CBF }, { 0x0CC6, 0x0CC6 }, { 0x0CCC, 0x0CCD },
+ { 0x0CE2, 0x0CE3 }, { 0x0D41, 0x0D43 }, { 0x0D4D, 0x0D4D },
+ { 0x0DCA, 0x0DCA }, { 0x0DD2, 0x0DD4 }, { 0x0DD6, 0x0DD6 },
+ { 0x0E31, 0x0E31 }, { 0x0E34, 0x0E3A }, { 0x0E47, 0x0E4E },
+ { 0x0EB1, 0x0EB1 }, { 0x0EB4, 0x0EB9 }, { 0x0EBB, 0x0EBC },
+ { 0x0EC8, 0x0ECD }, { 0x0F18, 0x0F19 }, { 0x0F35, 0x0F35 },
+ { 0x0F37, 0x0F37 }, { 0x0F39, 0x0F39 }, { 0x0F71, 0x0F7E },
+ { 0x0F80, 0x0F84 }, { 0x0F86, 0x0F87 }, { 0x0F90, 0x0F97 },
+ { 0x0F99, 0x0FBC }, { 0x0FC6, 0x0FC6 }, { 0x102D, 0x1030 },
+ { 0x1032, 0x1032 }, { 0x1036, 0x1037 }, { 0x1039, 0x1039 },
+ { 0x1058, 0x1059 }, { 0x1160, 0x11FF }, { 0x135F, 0x135F },
+ { 0x1712, 0x1714 }, { 0x1732, 0x1734 }, { 0x1752, 0x1753 },
+ { 0x1772, 0x1773 }, { 0x17B4, 0x17B5 }, { 0x17B7, 0x17BD },
+ { 0x17C6, 0x17C6 }, { 0x17C9, 0x17D3 }, { 0x17DD, 0x17DD },
+ { 0x180B, 0x180D }, { 0x18A9, 0x18A9 }, { 0x1920, 0x1922 },
+ { 0x1927, 0x1928 }, { 0x1932, 0x1932 }, { 0x1939, 0x193B },
+ { 0x1A17, 0x1A18 }, { 0x1B00, 0x1B03 }, { 0x1B34, 0x1B34 },
+ { 0x1B36, 0x1B3A }, { 0x1B3C, 0x1B3C }, { 0x1B42, 0x1B42 },
+ { 0x1B6B, 0x1B73 }, { 0x1DC0, 0x1DCA }, { 0x1DFE, 0x1DFF },
+ { 0x200B, 0x200F }, { 0x202A, 0x202E }, { 0x2060, 0x2063 },
+ { 0x206A, 0x206F }, { 0x20D0, 0x20EF }, { 0x302A, 0x302F },
+ { 0x3099, 0x309A }, { 0xA806, 0xA806 }, { 0xA80B, 0xA80B },
+ { 0xA825, 0xA826 }, { 0xFB1E, 0xFB1E }, { 0xFE00, 0xFE0F },
+ { 0xFE20, 0xFE23 }, { 0xFEFF, 0xFEFF }, { 0xFFF9, 0xFFFB },
+ { 0x10A01, 0x10A03 }, { 0x10A05, 0x10A06 }, { 0x10A0C, 0x10A0F },
+ { 0x10A38, 0x10A3A }, { 0x10A3F, 0x10A3F }, { 0x1D167, 0x1D169 },
+ { 0x1D173, 0x1D182 }, { 0x1D185, 0x1D18B }, { 0x1D1AA, 0x1D1AD },
+ { 0x1D242, 0x1D244 }, { 0xE0001, 0xE0001 }, { 0xE0020, 0xE007F },
+ { 0xE0100, 0xE01EF }
+ };
+
+ return bisearch(c, combining, sizeof(combining) / sizeof(struct interval) - 1);
+}
+
+static void
+comb_tofront(i)
+int i;
+{
+ for (;;)
+ {
+ int root = i >= 0x700 ? 0x801 : 0x800;
+ debug1("bring to front: %x\n", i);
+ combchars[combchars[i]->prev]->next = combchars[i]->next;
+ combchars[combchars[i]->next]->prev = combchars[i]->prev;
+ combchars[i]->next = combchars[root]->next;
+ combchars[i]->prev = root;
+ combchars[combchars[root]->next]->prev = i;
+ combchars[root]->next = i;
+ i = combchars[i]->c1;
+ if (i < 0xd800 || i >= 0xe000)
+ return;
+ i -= 0xd800;
+ }
+}
+
+void
+utf8_handle_comb(c, mc)
+int c;
+struct mchar *mc;
+{
+ int root, i, c1;
+ int isdouble;
+
+ c1 = mc->image | (mc->font << 8) | mc->fontx << 16;
+ isdouble = c1 >= 0x1100 && utf8_isdouble(c1);
+ if (!combchars)
+ {
+ combchars = (struct combchar **)calloc(0x802, sizeof(struct combchar *));
+ if (!combchars)
+ return;
+ combchars[0x800] = (struct combchar *)malloc(sizeof(struct combchar));
+ combchars[0x801] = (struct combchar *)malloc(sizeof(struct combchar));
+ if (!combchars[0x800] || !combchars[0x801])
+ {
+ if (combchars[0x800])
+ free(combchars[0x800]);
+ if (combchars[0x801])
+ free(combchars[0x801]);
+ free(combchars);
+ return;
+ }
+ combchars[0x800]->c1 = 0x000;
+ combchars[0x800]->c2 = 0x700;
+ combchars[0x800]->next = 0x800;
+ combchars[0x800]->prev = 0x800;
+ combchars[0x801]->c1 = 0x700;
+ combchars[0x801]->c2 = 0x800;
+ combchars[0x801]->next = 0x801;
+ combchars[0x801]->prev = 0x801;
+ }
+ root = isdouble ? 0x801 : 0x800;
+ for (i = combchars[root]->c1; i < combchars[root]->c2; i++)
+ {
+ if (!combchars[i])
+ break;
+ if (combchars[i]->c1 == c1 && combchars[i]->c2 == c)
+ break;
+ }
+ if (i == combchars[root]->c2)
+ {
+ /* full, recycle old entry */
+ if (c1 >= 0xd800 && c1 < 0xe000)
+ comb_tofront(c1 - 0xd800);
+ i = combchars[root]->prev;
+ if (i == 0x800 || i == 0x801 || c1 == i + 0xd800)
+ {
+ /* completely full, can't recycle */
+ debug("utf8_handle_comp: completely full!\n");
+ mc->image = '?';
+ mc->font = 0;
+ return;
+ }
+ /* FIXME: delete old char from all buffers */
+ }
+ else if (!combchars[i])
+ {
+ combchars[i] = (struct combchar *)malloc(sizeof(struct combchar));
+ if (!combchars[i])
+ return;
+ combchars[i]->prev = i;
+ combchars[i]->next = i;
+ }
+ combchars[i]->c1 = c1;
+ combchars[i]->c2 = c;
+ mc->image = i & 0xff;
+ mc->font = (i >> 8) + 0xd8;
+ mc->fontx = 0;
+ debug3("combinig char %x %x -> %x\n", c1, c, i + 0xd800);
+ comb_tofront(i);
+}
+
+#else /* !UTF8 */
+
+void
+WinSwitchEncoding(p, encoding)
+struct win *p;
+int encoding;
+{
+ p->w_encoding = encoding;
+ return;
+}
+
+#endif /* UTF8 */
+
+static int
+encmatch(s1, s2)
+char *s1;
+char *s2;
+{
+ int c1, c2;
+ do
+ {
+ c1 = (unsigned char)*s1;
+ if (c1 >= 'A' && c1 <= 'Z')
+ c1 += 'a' - 'A';
+ if (!(c1 >= 'a' && c1 <= 'z') && !(c1 >= '0' && c1 <= '9'))
+ {
+ s1++;
+ continue;
+ }
+ c2 = (unsigned char)*s2;
+ if (c2 >= 'A' && c2 <= 'Z')
+ c2 += 'a' - 'A';
+ if (!(c2 >= 'a' && c2 <= 'z') && !(c2 >= '0' && c2 <= '9'))
+ {
+ s2++;
+ continue;
+ }
+ if (c1 != c2)
+ return 0;
+ s1++;
+ s2++;
+ }
+ while(c1);
+ return 1;
+}
+
+int
+FindEncoding(name)
+char *name;
+{
+ int encoding;
+
+ debug1("FindEncoding %s\n", name);
+ if (name == 0 || *name == 0)
+ return 0;
+ if (encmatch(name, "euc"))
+ name = "eucJP";
+ if (encmatch(name, "off") || encmatch(name, "iso8859-1"))
+ return 0;
+#ifndef UTF8
+ if (encmatch(name, "UTF-8"))
+ return -1;
+#endif
+ for (encoding = 0; encoding < (int)(sizeof(encodings)/sizeof(*encodings)); encoding++)
+ if (encmatch(name, encodings[encoding].name))
+ {
+#ifdef UTF8
+ LoadFontTranslationsForEncoding(encoding);
+#endif
+ return encoding;
+ }
+ return -1;
+}
+
+char *
+EncodingName(encoding)
+int encoding;
+{
+ if (encoding >= (int)(sizeof(encodings)/sizeof(*encodings)))
+ return 0;
+ return encodings[encoding].name;
+}
+
+int
+EncodingDefFont(encoding)
+int encoding;
+{
+ return encodings[encoding].deffont;
+}
+
+void
+ResetEncoding(p)
+struct win *p;
+{
+ char *c;
+ int encoding = p->w_encoding;
+
+ c = encodings[encoding].charsets;
+ if (c)
+ SetCharsets(p, c);
+#ifdef UTF8
+ LoadFontTranslationsForEncoding(encoding);
+#endif
+ if (encodings[encoding].usegr)
+ {
+ p->w_gr = 2;
+ p->w_FontE = encodings[encoding].charsets[1];
+ }
+ else
+ p->w_FontE = 0;
+ if (encodings[encoding].noc1)
+ p->w_c1 = 0;
+}
+
+/* decoded char: 32-bit <fontx><font><c2><c>
+ * fontx: non-bmp utf8
+ * c2: multi-byte character
+ * font is always zero for utf8
+ * returns: -1 need more bytes
+ * -2 decode error
+ */
+
+
+int
+DecodeChar(c, encoding, statep)
+int c;
+int encoding;
+int *statep;
+{
+ int t;
+
+ debug2("Decoding char %02x for encoding %d\n", c, encoding);
+#ifdef UTF8
+ if (encoding == UTF8)
+ {
+ c = FromUtf8(c, statep);
+ if (c >= 0x10000)
+ c = (c & 0x7f0000) << 8 | (c & 0xffff);
+ return c;
+ }
+#endif
+ if (encoding == SJIS)
+ {
+ if (!*statep)
+ {
+ if ((0x81 <= c && c <= 0x9f) || (0xe0 <= c && c <= 0xef))
+ {
+ *statep = c;
+ return -1;
+ }
+ if (c < 0x80)
+ return c;
+ return c | (KANA << 16);
+ }
+ t = c;
+ c = *statep;
+ *statep = 0;
+ if (0x40 <= t && t <= 0xfc && t != 0x7f)
+ {
+ if (c <= 0x9f)
+ c = (c - 0x81) * 2 + 0x21;
+ else
+ c = (c - 0xc1) * 2 + 0x21;
+ if (t <= 0x7e)
+ t -= 0x1f;
+ else if (t <= 0x9e)
+ t -= 0x20;
+ else
+ t -= 0x7e, c++;
+ return (c << 8) | t | (KANJI << 16);
+ }
+ return t;
+ }
+ if (encoding == EUC_JP || encoding == EUC_KR || encoding == EUC_CN)
+ {
+ if (!*statep)
+ {
+ if (c & 0x80)
+ {
+ *statep = c;
+ return -1;
+ }
+ return c;
+ }
+ t = c;
+ c = *statep;
+ *statep = 0;
+ if (encoding == EUC_JP)
+ {
+ if (c == 0x8e)
+ return t | (KANA << 16);
+ if (c == 0x8f)
+ {
+ *statep = t | (KANJI0212 << 8);
+ return -1;
+ }
+ }
+ c &= 0xff7f;
+ t &= 0x7f;
+ c = c << 8 | t;
+ if (encoding == EUC_KR)
+ return c | (3 << 16);
+ if (encoding == EUC_CN)
+ return c | (1 << 16);
+ if (c & (KANJI0212 << 16))
+ return c;
+ else
+ return c | (KANJI << 16);
+ }
+ if (encoding == BIG5 || encoding == GBK)
+ {
+ if (!*statep)
+ {
+ if (c & 0x80)
+ {
+ if (encoding == GBK && c == 0x80)
+ return 0xa4 | (('b'|0x80) << 16);
+ *statep = c;
+ return -1;
+ }
+ return c;
+ }
+ t = c;
+ c = *statep;
+ *statep = 0;
+ c &= 0x7f;
+ return c << 8 | t | (encoding == BIG5 ? 030 << 16 : 031 << 16);
+ }
+ return c | (encodings[encoding].deffont << 16);
+}
+
+int
+EncodeChar(bp, c, encoding, fontp)
+char *bp;
+int c;
+int encoding;
+int *fontp;
+{
+ int t, f, l;
+
+ debug2("Encoding char %02x for encoding %d\n", c, encoding);
+ if (c == -1 && fontp)
+ {
+ if (*fontp == 0)
+ return 0;
+ if (bp)
+ {
+ *bp++ = 033;
+ *bp++ = '(';
+ *bp++ = 'B';
+ }
+ return 3;
+ }
+ f = (c >> 16) & 0xff;
+
+#ifdef UTF8
+ if (encoding == UTF8)
+ {
+ if (f)
+ {
+# ifdef DW_CHARS
+ if (is_dw_font(f))
+ {
+ int c2 = c & 0xff;
+ c = (c >> 8 & 0xff) | (f << 8);
+ c = recode_char_dw_to_encoding(c, &c2, encoding);
+ }
+ else
+# endif
+ {
+ c = (c & 0xff) | (f << 8);
+ c = recode_char_to_encoding(c, encoding);
+ }
+ }
+ return ToUtf8(bp, c);
+ }
+ if (f == 0 && (c & 0x7f00ff00) != 0) /* is_utf8? */
+ {
+ if (c >= 0x10000)
+ c = (c & 0x7f0000) >> 8 | (c & 0xffff);
+# ifdef DW_CHARS
+ if (utf8_isdouble(c))
+ {
+ int c2 = 0xffff;
+ c = recode_char_dw_to_encoding(c, &c2, encoding);
+ c = (c << 8) | (c2 & 0xff);
+ }
+ else
+# endif
+ {
+ c = recode_char_to_encoding(c, encoding);
+ c = ((c & 0xff00) << 8) | (c & 0xff);
+ }
+ debug1("Encode: char mapped from utf8 to %x\n", c);
+ f = c >> 16;
+ }
+#endif
+ if (f & 0x80) /* map special 96-fonts to latin1 */
+ f = 0;
+
+ if (encoding == SJIS)
+ {
+ if (f == KANA)
+ c = (c & 0xff) | 0x80;
+ else if (f == KANJI)
+ {
+ if (!bp)
+ return 2;
+ t = c & 0xff;
+ c = (c >> 8) & 0xff;
+ t += (c & 1) ? ((t <= 0x5f) ? 0x1f : 0x20) : 0x7e;
+ c = (c - 0x21) / 2 + ((c < 0x5f) ? 0x81 : 0xc1);
+ *bp++ = c;
+ *bp++ = t;
+ return 2;
+ }
+ }
+ if (encoding == EUC)
+ {
+ if (f == KANA)
+ {
+ if (bp)
+ {
+ *bp++ = 0x8e;
+ *bp++ = c;
+ }
+ return 2;
+ }
+ if (f == KANJI)
+ {
+ if (bp)
+ {
+ *bp++ = (c >> 8) | 0x80;
+ *bp++ = c | 0x80;
+ }
+ return 2;
+ }
+ if (f == KANJI0212)
+ {
+ if (bp)
+ {
+ *bp++ = 0x8f;
+ *bp++ = c >> 8;
+ *bp++ = c;
+ }
+ return 3;
+ }
+ }
+ if ((encoding == EUC_KR && f == 3) || (encoding == EUC_CN && f == 1))
+ {
+ if (bp)
+ {
+ *bp++ = (c >> 8) | 0x80;
+ *bp++ = c | 0x80;
+ }
+ return 2;
+ }
+ if ((encoding == BIG5 && f == 030) || (encoding == GBK && f == 031))
+ {
+ if (bp)
+ {
+ *bp++ = (c >> 8) | 0x80;
+ *bp++ = c;
+ }
+ return 2;
+ }
+ if (encoding == GBK && f == 0 && c == 0xa4)
+ c = 0x80;
+
+ l = 0;
+ if (fontp && f != *fontp)
+ {
+ *fontp = f;
+ if (f && f < ' ')
+ {
+ if (bp)
+ {
+ *bp++ = 033;
+ *bp++ = '$';
+ if (f > 2)
+ *bp++ = '(';
+ *bp++ = '@' + f;
+ }
+ l += f > 2 ? 4 : 3;
+ }
+ else if (f < 128)
+ {
+ if (f == 0)
+ f = 'B';
+ if (bp)
+ {
+ *bp++ = 033;
+ *bp++ = '(';
+ *bp++ = f;
+ }
+ l += 3;
+ }
+ }
+ if (c & 0xff00)
+ {
+ if (bp)
+ *bp++ = c >> 8;
+ l++;
+ }
+ if (bp)
+ *bp++ = c;
+ return l + 1;
+}
+
+int
+CanEncodeFont(encoding, f)
+int encoding, f;
+{
+ switch(encoding)
+ {
+#ifdef UTF8
+ case UTF8:
+ return 1;
+#endif
+ case SJIS:
+ return f == KANJI || f == KANA;
+ case EUC:
+ return f == KANJI || f == KANA || f == KANJI0212;
+ case EUC_KR:
+ return f == 3;
+ case EUC_CN:
+ return f == 1;
+ case BIG5:
+ return f == 030;
+ case GBK:
+ return f == 031;
+ default:
+ break;
+ }
+ return 0;
+}
+
+#ifdef DW_CHARS
+int
+PrepareEncodedChar(c)
+int c;
+{
+ int encoding;
+ int t = 0;
+ int f;
+
+ encoding = D_encoding;
+ f = D_rend.font;
+ t = D_mbcs;
+ if (encoding == SJIS)
+ {
+ if (f == KANA)
+ return c | 0x80;
+ else if (f == KANJI)
+ {
+ t += (c & 1) ? ((t <= 0x5f) ? 0x1f : 0x20) : 0x7e;
+ c = (c - 0x21) / 2 + ((c < 0x5f) ? 0x81 : 0xc1);
+ D_mbcs = t;
+ }
+ return c;
+ }
+ if (encoding == EUC)
+ {
+ if (f == KANA)
+ {
+ AddChar(0x8e);
+ return c | 0x80;
+ }
+ if (f == KANJI)
+ {
+ D_mbcs = t | 0x80;
+ return c | 0x80;
+ }
+ if (f == KANJI0212)
+ {
+ AddChar(0x8f);
+ D_mbcs = t | 0x80;
+ return c | 0x80;
+ }
+ }
+ if ((encoding == EUC_KR && f == 3) || (encoding == EUC_CN && f == 1))
+ {
+ D_mbcs = t | 0x80;
+ return c | 0x80;
+ }
+ if ((encoding == BIG5 && f == 030) || (encoding == GBK && f == 031))
+ return c | 0x80;
+ return c;
+}
+#endif
+
+int
+RecodeBuf(fbuf, flen, fenc, tenc, tbuf)
+unsigned char *fbuf;
+int flen;
+int fenc, tenc;
+unsigned char *tbuf;
+{
+ int c, i, j;
+ int decstate = 0, font = 0;
+
+ for (i = j = 0; i < flen; i++)
+ {
+ c = fbuf[i];
+ c = DecodeChar(c, fenc, &decstate);
+ if (c == -2)
+ i--;
+ if (c < 0)
+ continue;
+ j += EncodeChar(tbuf ? (char *)tbuf + j : 0, c, tenc, &font);
+ }
+ j += EncodeChar(tbuf ? (char *)tbuf + j : 0, -1, tenc, &font);
+ return j;
+}
+
+#ifdef UTF8
+int
+ContainsSpecialDeffont(ml, xs, xe, encoding)
+struct mline *ml;
+int xs, xe;
+int encoding;
+{
+ unsigned char *f, *i;
+ int c, x, dx;
+
+ if (encoding == UTF8 || encodings[encoding].deffont == 0)
+ return 0;
+ i = ml->image + xs;
+ f = ml->font + xs;
+ dx = xe - xs + 1;
+ while (dx-- > 0)
+ {
+ if (*f++)
+ continue;
+ c = *i++;
+ x = recode_char_to_encoding(c | (encodings[encoding].deffont << 8), UTF8);
+ if (c != x)
+ {
+ debug2("ContainsSpecialDeffont: yes %02x != %02x\n", c, x);
+ return 1;
+ }
+ }
+ debug("ContainsSpecialDeffont: no\n");
+ return 0;
+}
+
+
+int
+LoadFontTranslation(font, file)
+int font;
+char *file;
+{
+ char buf[1024], *myfile;
+ FILE *f;
+ int i;
+ int fo;
+ int x, u, c, ok;
+ unsigned short (*p)[2], (*tab)[2];
+
+ myfile = file;
+ if (myfile == 0)
+ {
+ if (font == 0 || screenencodings == 0)
+ return -1;
+ if (strlen(screenencodings) > sizeof(buf) - 10)
+ return -1;
+ sprintf(buf, "%s/%02x", screenencodings, font & 0xff);
+ myfile = buf;
+ }
+ debug1("LoadFontTranslation: trying %s\n", myfile);
+ if ((f = secfopen(myfile, "r")) == 0)
+ return -1;
+ i = ok = 0;
+ for (;;)
+ {
+ for(; i < 12; i++)
+ if (getc(f) != "ScreenI2UTF8"[i])
+ break;
+ if (getc(f) != 0) /* format */
+ break;
+ fo = getc(f); /* id */
+ if (fo == EOF)
+ break;
+ if (font != -1 && font != fo)
+ break;
+ i = getc(f);
+ x = getc(f);
+ if (x == EOF)
+ break;
+ i = i << 8 | x;
+ getc(f);
+ while ((x = getc(f)) && x != EOF)
+ getc(f); /* skip font name (padded to 2 bytes) */
+ if ((p = malloc(sizeof(*p) * (i + 1))) == 0)
+ break;
+ tab = p;
+ while(i > 0)
+ {
+ x = getc(f);
+ x = x << 8 | getc(f);
+ u = getc(f);
+ c = getc(f);
+ u = u << 8 | c;
+ if (c == EOF)
+ break;
+ (*p)[0] = x;
+ (*p)[1] = u;
+ p++;
+ i--;
+ }
+ (*p)[0] = 0;
+ (*p)[1] = 0;
+ if (i || (tab[0][0] & 0x8000))
+ {
+ free(tab);
+ break;
+ }
+ if (recodetabs[fo].tab && (recodetabs[fo].flags & RECODETAB_ALLOCED) != 0)
+ free(recodetabs[fo].tab);
+ recodetabs[fo].tab = tab;
+ recodetabs[fo].flags = RECODETAB_ALLOCED;
+ debug1("Successful load of recodetab %02x\n", fo);
+ c = getc(f);
+ if (c == EOF)
+ {
+ ok = 1;
+ break;
+ }
+ if (c != 'S')
+ break;
+ i = 1;
+ }
+ fclose(f);
+ if (font != -1 && file == 0 && recodetabs[font].flags == 0)
+ recodetabs[font].flags = RECODETAB_TRIED;
+ return ok ? 0 : -1;
+}
+
+void
+LoadFontTranslationsForEncoding(encoding)
+int encoding;
+{
+ char *c;
+ int f;
+
+ debug1("LoadFontTranslationsForEncoding: encoding %d\n", encoding);
+ if ((c = encodings[encoding].fontlist) != 0)
+ while ((f = (unsigned char)*c++) != 0)
+ if (recodetabs[f].flags == 0)
+ LoadFontTranslation(f, 0);
+ f = encodings[encoding].deffont;
+ if (f > 0 && recodetabs[f].flags == 0)
+ LoadFontTranslation(f, 0);
+}
+
+#endif /* UTF8 */
+
+#else /* !ENCODINGS */
+
+/* Simple version of EncodeChar to encode font changes for
+ * copy/paste mode
+ */
+int
+EncodeChar(bp, c, encoding, fontp)
+char *bp;
+int c;
+int encoding;
+int *fontp;
+{
+ int f, l;
+ f = (c == -1) ? 0 : c >> 16;
+ l = 0;
+ if (fontp && f != *fontp)
+ {
+ *fontp = f;
+ if (f && f < ' ')
+ {
+ if (bp)
+ {
+ *bp++ = 033;
+ *bp++ = '$';
+ if (f > 2)
+ *bp++ = '(';
+ *bp++ = '@' + f;
+ }
+ l += f > 2 ? 4 : 3;
+ }
+ else if (f < 128)
+ {
+ if (f == 0)
+ f = 'B';
+ if (bp)
+ {
+ *bp++ = 033;
+ *bp++ = '(';
+ *bp++ = f;
+ }
+ l += 3;
+ }
+ }
+ if (c == -1)
+ return l;
+ if (c & 0xff00)
+ {
+ if (bp)
+ *bp++ = c >> 8;
+ l++;
+ }
+ if (bp)
+ *bp++ = c;
+ return l + 1;
+}
+
+#endif /* ENCODINGS */