diff options
Diffstat (limited to '')
-rw-r--r-- | encoding.c | 2141 |
1 files changed, 2141 insertions, 0 deletions
diff --git a/encoding.c b/encoding.c new file mode 100644 index 0000000..976978a --- /dev/null +++ b/encoding.c @@ -0,0 +1,2141 @@ +/* Copyright (c) 1993-2003 + * Juergen Weigert (jnweiger@immd4.informatik.uni-erlangen.de) + * Michael Schroeder (mlschroe@immd4.informatik.uni-erlangen.de) + * Copyright (c) 1987 Oliver Laumann + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program (see the file COPYING); if not, see + * https://www.gnu.org/licenses/, or contact Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301 USA + * + **************************************************************** + */ + +#include <sys/types.h> + +#include "config.h" +#include "screen.h" +#include "extern.h" + +#ifdef ENCODINGS + +extern unsigned char *null; +extern struct display *display, *displays; +extern struct layer *flayer; + +extern char *screenencodings; + +#ifdef DW_CHARS +extern int cjkwidth; +#endif + +static int encmatch __P((char *, char *)); +# ifdef UTF8 +static int recode_char __P((int, int, int)); +static int recode_char_to_encoding __P((int, int)); +static void comb_tofront __P((int)); +# ifdef DW_CHARS +static int recode_char_dw __P((int, int *, int, int)); +static int recode_char_dw_to_encoding __P((int, int *, int)); +# endif +# endif + +struct encoding { + char *name; + char *charsets; + int deffont; + int usegr; + int noc1; + char *fontlist; +}; + +/* big5 font: ^X */ +/* KOI8-R font: 96 ! */ +/* CP1251 font: 96 ? */ + +struct encoding encodings[] = { + { "C", 0, 0, 0, 0, 0 }, + { "eucJP", "B\002I\00401", 0, 1, 0, "\002\004I" }, + { "SJIS", "BIBB01", 0, 1, 1, "\002I" }, + { "eucKR", "B\003BB01", 0, 1, 0, "\003" }, + { "eucCN", "B\001BB01", 0, 1, 0, "\001" }, + { "Big5", "B\030BB01", 0, 1, 0, "\030" }, + { "KOI8-R", 0, 0x80|'!', 0, 1, 0 }, + { "CP1251", 0, 0x80|'?', 0, 1, 0 }, + { "UTF-8", 0, -1, 0, 0, 0 }, + { "ISO8859-2", 0, 0x80|'B', 0, 0, 0 }, + { "ISO8859-3", 0, 0x80|'C', 0, 0, 0 }, + { "ISO8859-4", 0, 0x80|'D', 0, 0, 0 }, + { "ISO8859-5", 0, 0x80|'L', 0, 0, 0 }, + { "ISO8859-6", 0, 0x80|'G', 0, 0, 0 }, + { "ISO8859-7", 0, 0x80|'F', 0, 0, 0 }, + { "ISO8859-8", 0, 0x80|'H', 0, 0, 0 }, + { "ISO8859-9", 0, 0x80|'M', 0, 0, 0 }, + { "ISO8859-10", 0, 0x80|'V', 0, 0, 0 }, + { "ISO8859-15", 0, 0x80|'b', 0, 0, 0 }, + { "jis", 0, 0, 0, 0, "\002\004I" }, + { "GBK", "B\031BB01", 0x80|'b', 1, 1, "\031" } +}; + +#ifdef UTF8 + +static unsigned short builtin_tabs[][2] = { + { 0x30, 0 }, /* 0: special graphics (line drawing) */ + { 0x005f, 0x25AE }, + { 0x0060, 0x25C6 }, + { 0x0061, 0x2592 }, + { 0x0062, 0x2409 }, + { 0x0063, 0x240C }, + { 0x0064, 0x240D }, + { 0x0065, 0x240A }, + { 0x0066, 0x00B0 }, + { 0x0067, 0x00B1 }, + { 0x0068, 0x2424 }, + { 0x0069, 0x240B }, + { 0x006a, 0x2518 }, + { 0x006b, 0x2510 }, + { 0x006c, 0x250C }, + { 0x006d, 0x2514 }, + { 0x006e, 0x253C }, + { 0x006f, 0x23BA }, + { 0x0070, 0x23BB }, + { 0x0071, 0x2500 }, + { 0x0072, 0x23BC }, + { 0x0073, 0x23BD }, + { 0x0074, 0x251C }, + { 0x0075, 0x2524 }, + { 0x0076, 0x2534 }, + { 0x0077, 0x252C }, + { 0x0078, 0x2502 }, + { 0x0079, 0x2264 }, + { 0x007a, 0x2265 }, + { 0x007b, 0x03C0 }, + { 0x007c, 0x2260 }, + { 0x007d, 0x00A3 }, + { 0x007e, 0x00B7 }, + { 0, 0}, + + { 0x34, 0 }, /* 4: Dutch */ + { 0x0023, 0x00a3 }, + { 0x0040, 0x00be }, + { 0x005b, 0x00ff }, + { 0x005c, 0x00bd }, + { 0x005d, 0x007c }, + { 0x007b, 0x00a8 }, + { 0x007c, 0x0066 }, + { 0x007d, 0x00bc }, + { 0x007e, 0x00b4 }, + { 0, 0}, + + { 0x35, 0 }, /* 5: Finnish */ + { 0x005b, 0x00c4 }, + { 0x005c, 0x00d6 }, + { 0x005d, 0x00c5 }, + { 0x005e, 0x00dc }, + { 0x0060, 0x00e9 }, + { 0x007b, 0x00e4 }, + { 0x007c, 0x00f6 }, + { 0x007d, 0x00e5 }, + { 0x007e, 0x00fc }, + { 0, 0}, + + { 0x36, 0 }, /* 6: Norwegian/Danish */ + { 0x0040, 0x00c4 }, + { 0x005b, 0x00c6 }, + { 0x005c, 0x00d8 }, + { 0x005d, 0x00c5 }, + { 0x005e, 0x00dc }, + { 0x0060, 0x00e4 }, + { 0x007b, 0x00e6 }, + { 0x007c, 0x00f8 }, + { 0x007d, 0x00e5 }, + { 0x007e, 0x00fc }, + { 0, 0}, + + { 0x37, 0 }, /* 7: Swedish */ + { 0x0040, 0x00c9 }, + { 0x005b, 0x00c4 }, + { 0x005c, 0x00d6 }, + { 0x005d, 0x00c5 }, + { 0x005e, 0x00dc }, + { 0x0060, 0x00e9 }, + { 0x007b, 0x00e4 }, + { 0x007c, 0x00f6 }, + { 0x007d, 0x00e5 }, + { 0x007e, 0x00fc }, + { 0, 0}, + + { 0x3d, 0}, /* =: Swiss */ + { 0x0023, 0x00f9 }, + { 0x0040, 0x00e0 }, + { 0x005b, 0x00e9 }, + { 0x005c, 0x00e7 }, + { 0x005d, 0x00ea }, + { 0x005e, 0x00ee }, + { 0x005f, 0x00e8 }, + { 0x0060, 0x00f4 }, + { 0x007b, 0x00e4 }, + { 0x007c, 0x00f6 }, + { 0x007d, 0x00fc }, + { 0x007e, 0x00fb }, + { 0, 0}, + + { 0x41, 0}, /* A: UK */ + { 0x0023, 0x00a3 }, + { 0, 0}, + + { 0x4b, 0}, /* K: German */ + { 0x0040, 0x00a7 }, + { 0x005b, 0x00c4 }, + { 0x005c, 0x00d6 }, + { 0x005d, 0x00dc }, + { 0x007b, 0x00e4 }, + { 0x007c, 0x00f6 }, + { 0x007d, 0x00fc }, + { 0x007e, 0x00df }, + { 0, 0}, + + { 0x51, 0}, /* Q: French Canadian */ + { 0x0040, 0x00e0 }, + { 0x005b, 0x00e2 }, + { 0x005c, 0x00e7 }, + { 0x005d, 0x00ea }, + { 0x005e, 0x00ee }, + { 0x0060, 0x00f4 }, + { 0x007b, 0x00e9 }, + { 0x007c, 0x00f9 }, + { 0x007d, 0x00e8 }, + { 0x007e, 0x00fb }, + { 0, 0}, + + { 0x52, 0}, /* R: French */ + { 0x0023, 0x00a3 }, + { 0x0040, 0x00e0 }, + { 0x005b, 0x00b0 }, + { 0x005c, 0x00e7 }, + { 0x005d, 0x00a7 }, + { 0x007b, 0x00e9 }, + { 0x007c, 0x00f9 }, + { 0x007d, 0x00e8 }, + { 0x007e, 0x00a8 }, + { 0, 0}, + + { 0x59, 0}, /* Y: Italian */ + { 0x0023, 0x00a3 }, + { 0x0040, 0x00a7 }, + { 0x005b, 0x00b0 }, + { 0x005c, 0x00e7 }, + { 0x005d, 0x00e9 }, + { 0x0060, 0x00f9 }, + { 0x007b, 0x00e0 }, + { 0x007c, 0x00f2 }, + { 0x007d, 0x00e8 }, + { 0x007e, 0x00ec }, + { 0, 0}, + + { 0x5a, 0}, /* Z: Spanish */ + { 0x0023, 0x00a3 }, + { 0x0040, 0x00a7 }, + { 0x005b, 0x00a1 }, + { 0x005c, 0x00d1 }, + { 0x005d, 0x00bf }, + { 0x007b, 0x00b0 }, + { 0x007c, 0x00f1 }, + { 0x007d, 0x00e7 }, + { 0, 0}, + + { 0xe2, 0}, /* 96-b: ISO-8859-15 */ + { 0x00a4, 0x20ac }, + { 0x00a6, 0x0160 }, + { 0x00a8, 0x0161 }, + { 0x00b4, 0x017D }, + { 0x00b8, 0x017E }, + { 0x00bc, 0x0152 }, + { 0x00bd, 0x0153 }, + { 0x00be, 0x0178 }, + { 0, 0}, + + { 0x4a, 0}, /* J: JIS 0201 Roman */ + { 0x005c, 0x00a5 }, + { 0x007e, 0x203e }, + { 0, 0}, + + { 0x49, 0}, /* I: halfwidth katakana */ + { 0x0021, 0xff61 }, + { 0x005f|0x8000, 0xff9f }, + { 0, 0}, + + { 0, 0} +}; + +struct recodetab +{ + unsigned short (*tab)[2]; + int flags; +}; + +#define RECODETAB_ALLOCED 1 +#define RECODETAB_BUILTIN 2 +#define RECODETAB_TRIED 4 + +static struct recodetab recodetabs[256]; + +void +InitBuiltinTabs() +{ + unsigned short (*p)[2]; + for (p = builtin_tabs; (*p)[0]; p++) + { + recodetabs[(*p)[0]].flags = RECODETAB_BUILTIN; + recodetabs[(*p)[0]].tab = p + 1; + p++; + while((*p)[0]) + p++; + } +} + +static int +recode_char(c, to_utf, font) +int c, to_utf, font; +{ + int f; + unsigned short (*p)[2]; + + if (to_utf) + { + if (c < 256) + return c; + f = (c >> 8) & 0xff; + c &= 0xff; + /* map aliases to keep the table small */ + switch (f) + { + case 'C': + f ^= ('C' ^ '5'); + break; + case 'E': + f ^= ('E' ^ '6'); + break; + case 'H': + f ^= ('H' ^ '7'); + break; + default: + break; + } + p = recodetabs[f].tab; + if (p == 0 && recodetabs[f].flags == 0) + { + LoadFontTranslation(f, 0); + p = recodetabs[f].tab; + } + if (p) + for (; (*p)[0]; p++) + { + if ((p[0][0] & 0x8000) && (c <= (p[0][0] & 0x7fff)) && c >= p[-1][0]) + return c - p[-1][0] + p[-1][1]; + if ((*p)[0] == c) + return (*p)[1]; + } + return c & 0xff; /* map to latin1 */ + } + if (font == -1) + { + if (c < 256) + return c; /* latin1 */ + for (font = 32; font < 128; font++) + { + p = recodetabs[font].tab; + if (p) + for (; (*p)[1]; p++) + { + if ((p[0][0] & 0x8000) && c <= p[0][1] && c >= p[-1][1]) + return (c - p[-1][1] + p[-1][0]) | (font << 8); + if ((*p)[1] == c) + return (*p)[0] | (font << 8); + } + } + return '?'; + } + if (c < 128 && (font & 128) != 0) + return c; + if (font >= 32) + { + p = recodetabs[font].tab; + if (p == 0 && recodetabs[font].flags == 0) + { + LoadFontTranslation(font, 0); + p = recodetabs[font].tab; + } + if (p) + for (; (*p)[1]; p++) + { + if ((p[0][0] & 0x8000) && c <= p[0][1] && c >= p[-1][1]) + return (c - p[-1][1] + p[-1][0]) | (font & 128 ? 0 : font << 8); + if ((*p)[1] == c) + return (*p)[0] | (font & 128 ? 0 : font << 8); + } + } + return -1; +} + + +#ifdef DW_CHARS +static int +recode_char_dw(c, c2p, to_utf, font) +int c, *c2p, to_utf, font; +{ + int f; + unsigned short (*p)[2]; + + if (to_utf) + { + f = (c >> 8) & 0xff; + c = (c & 255) << 8 | (*c2p & 255); + *c2p = 0xffff; + p = recodetabs[f].tab; + if (p == 0 && recodetabs[f].flags == 0) + { + LoadFontTranslation(f, 0); + p = recodetabs[f].tab; + } + if (p) + for (; (*p)[0]; p++) + if ((*p)[0] == c) + { +#ifdef DW_CHARS + if (!utf8_isdouble((*p)[1])) + *c2p = ' '; +#endif + return (*p)[1]; + } + return UCS_REPL_DW; + } + if (font == -1) + { + for (font = 0; font < 030; font++) + { + p = recodetabs[font].tab; + if (p) + for (; (*p)[1]; p++) + if ((*p)[1] == c) + { + *c2p = ((*p)[0] & 255) | font << 8 | 0x8000; + return ((*p)[0] >> 8) | font << 8; + } + } + *c2p = '?'; + return '?'; + } + if (font < 32) + { + p = recodetabs[font].tab; + if (p == 0 && recodetabs[font].flags == 0) + { + LoadFontTranslation(font, 0); + p = recodetabs[font].tab; + } + if (p) + for (; (*p)[1]; p++) + if ((*p)[1] == c) + { + *c2p = ((*p)[0] & 255) | font << 8 | 0x8000; + return ((*p)[0] >> 8) | font << 8; + } + } + return -1; +} +#endif + +static int +recode_char_to_encoding(c, encoding) +int c, encoding; +{ + char *fp; + int x; + + if (encoding == UTF8) + return recode_char(c, 1, -1); + if ((fp = encodings[encoding].fontlist) != 0) + while(*fp) + if ((x = recode_char(c, 0, (unsigned char)*fp++)) != -1) + return x; + if (encodings[encoding].deffont) + if ((x = recode_char(c, 0, encodings[encoding].deffont)) != -1) + return x; + return recode_char(c, 0, -1); +} + +#ifdef DW_CHARS +static int +recode_char_dw_to_encoding(c, c2p, encoding) +int c, *c2p, encoding; +{ + char *fp; + int x; + + if (encoding == UTF8) + return recode_char_dw(c, c2p, 1, -1); + if ((fp = encodings[encoding].fontlist) != 0) + while(*fp) + if ((x = recode_char_dw(c, c2p, 0, (unsigned char)*fp++)) != -1) + return x; + if (encodings[encoding].deffont) + if ((x = recode_char_dw(c, c2p, 0, encodings[encoding].deffont)) != -1) + return x; + return recode_char_dw(c, c2p, 0, -1); +} +#endif + + +struct mchar * +recode_mchar(mc, from, to) +struct mchar *mc; +int from, to; +{ + static struct mchar rmc; + int c; + + debug3("recode_mchar %02x from %d to %d\n", mc->image, from, to); + if (from == to || (from != UTF8 && to != UTF8)) + return mc; + rmc = *mc; + if (rmc.font == 0 && from != UTF8) + rmc.font = encodings[from].deffont; + if (rmc.font == 0) /* latin1 is the same in unicode */ + return mc; + c = rmc.image | (rmc.font << 8); + if (from == UTF8) + c |= rmc.fontx << 16; +#ifdef DW_CHARS + if (rmc.mbcs) + { + int c2 = rmc.mbcs; + c = recode_char_dw_to_encoding(c, &c2, to); + rmc.mbcs = c2; + } + else +#endif + c = recode_char_to_encoding(c, to); + rmc.image = c & 255; + rmc.font = c >> 8 & 255; + if (to == UTF8) + rmc.fontx = c >> 16 & 255; + return &rmc; +} + +struct mline * +recode_mline(ml, w, from, to) +struct mline *ml; +int w; +int from, to; +{ + static int maxlen; + static int last; + static struct mline rml[2], *rl; + int i, c; + + if (from == to || (from != UTF8 && to != UTF8) || w == 0) + return ml; + if (ml->font == null && ml->fontx == null && encodings[from].deffont == 0) + return ml; + if (w > maxlen) + { + for (i = 0; i < 2; i++) + { + if (rml[i].image == 0) + rml[i].image = malloc(w); + else + rml[i].image = realloc(rml[i].image, w); + if (rml[i].font == 0) + rml[i].font = malloc(w); + else + rml[i].font = realloc(rml[i].font, w); + if (rml[i].fontx == 0) + rml[i].fontx = malloc(w); + else + rml[i].fontx = realloc(rml[i].fontx, w); + if (rml[i].image == 0 || rml[i].font == 0 || rml[i].fontx == 0) + { + maxlen = 0; + return ml; /* sorry */ + } + } + maxlen = w; + } + + debug("recode_mline: from\n"); + for (i = 0; i < w; i++) + debug1("%c", "0123456789abcdef"[(ml->image[i] >> 4) & 15]); + debug("\n"); + for (i = 0; i < w; i++) + debug1("%c", "0123456789abcdef"[(ml->image[i] ) & 15]); + debug("\n"); + for (i = 0; i < w; i++) + debug1("%c", "0123456789abcdef"[(ml->font[i] >> 4) & 15]); + debug("\n"); + for (i = 0; i < w; i++) + debug1("%c", "0123456789abcdef"[(ml->font[i] ) & 15]); + debug("\n"); + for (i = 0; i < w; i++) + debug1("%c", "0123456789abcdef"[(ml->fontx[i] >> 4) & 15]); + debug("\n"); + for (i = 0; i < w; i++) + debug1("%c", "0123456789abcdef"[(ml->fontx[i] ) & 15]); + debug("\n"); + + rl = rml + last; + rl->attr = ml->attr; +#ifdef COLOR + rl->color = ml->color; +# ifdef COLORS256 + rl->colorx = ml->colorx; +# endif +#endif + for (i = 0; i < w; i++) + { + c = ml->image[i] | (ml->font[i] << 8); + if (from == UTF8) + c |= ml->fontx[i] << 16; + if (from != UTF8 && c < 256) + c |= encodings[from].deffont << 8; +#ifdef DW_CHARS + if ((from != UTF8 && (c & 0x1f00) != 0 && (c & 0xe000) == 0) || (from == UTF8 && utf8_isdouble(c))) + { + if (i + 1 == w) + c = '?'; + else + { + int c2; + i++; + c2 = ml->image[i] | (ml->font[i] << 8); + c = recode_char_dw_to_encoding(c, &c2, to); + if (to == UTF8) + rl->fontx[i - 1] = c >> 16 & 255; + rl->font[i - 1] = c >> 8 & 255; + rl->image[i - 1] = c & 255; + c = c2; + } + } + else +#endif + c = recode_char_to_encoding(c, to); + rl->image[i] = c & 255; + rl->font[i] = c >> 8 & 255; + if (to == UTF8) + rl->fontx[i] = c >> 16 & 255; + } + last ^= 1; + debug("recode_mline: to\n"); + for (i = 0; i < w; i++) + debug1("%c", "0123456789abcdef"[(rl->image[i] >> 4) & 15]); + debug("\n"); + for (i = 0; i < w; i++) + debug1("%c", "0123456789abcdef"[(rl->image[i] ) & 15]); + debug("\n"); + for (i = 0; i < w; i++) + debug1("%c", "0123456789abcdef"[(rl->font[i] >> 4) & 15]); + debug("\n"); + for (i = 0; i < w; i++) + debug1("%c", "0123456789abcdef"[(rl->font[i] ) & 15]); + debug("\n"); + for (i = 0; i < w; i++) + debug1("%c", "0123456789abcdef"[(rl->fontx[i] >> 4) & 15]); + debug("\n"); + for (i = 0; i < w; i++) + debug1("%c", "0123456789abcdef"[(rl->fontx[i] ) & 15]); + debug("\n"); + return rl; +} + +struct combchar { + unsigned int c1; + unsigned int c2; + unsigned int next; + unsigned int prev; +}; +struct combchar **combchars; + +void +AddUtf8(c) +int c; +{ + ASSERT(D_encoding == UTF8); + if (c >= 0xd800 && c < 0xe000 && combchars && combchars[c - 0xd800]) + { + AddUtf8(combchars[c - 0xd800]->c1); + c = combchars[c - 0xd800]->c2; + } + + /* replace out of range values with U+FFFD "replacement character" */ + if (c < 0 || c > 0x10ffff) + c = 0xfffd; + + if (c >= 0x10000) + { + AddChar((c & 0x1c0000) >> 18 ^ 0xf0); + c = (c & 0x3ffff) ^ ((0xe0 ^ 0x80) << 12); + } + if (c >= 0x800) + { + AddChar((c & 0x7f000) >> 12 ^ 0xe0); + c = (c & 0x0fff) ^ ((0xc0 ^ 0x80) << 6); + } + if (c >= 0x80) + { + AddChar((c & 0x1fc0) >> 6 ^ 0xc0); + c = (c & 0x3f) | 0x80; + } + AddChar(c); +} + +int +ToUtf8_comb(p, c) +char *p; +int c; +{ + int l; + + if (c >= 0xd800 && c < 0xe000 && combchars && combchars[c - 0xd800]) + { + l = ToUtf8_comb(p, combchars[c - 0xd800]->c1); + return l + ToUtf8(p ? p + l : 0, combchars[c - 0xd800]->c2); + } + return ToUtf8(p, c); +} + +int +ToUtf8(p, c) +char *p; +int c; +{ + int l = 1; + /* replace out of range values with U+FFFD "replacement character" */ + if (c < 0 || c > 0x10ffff) + c = 0xfffd; + + if (c >= 0x10000) + { + if (p) + *p++ = (c & 0x1c0000) >> 18 ^ 0xf0; + l++; + c = (c & 0x3ffff) ^ ((0xe0 ^ 0x80) << 12); + } + if (c >= 0x800) + { + if (p) + *p++ = (c & 0x7f000) >> 12 ^ 0xe0; + l++; + c = (c & 0x0fff) | 0x1000; + } + if (c >= 0x80) + { + if (p) + *p++ = (c & 0x1fc0) >> 6 ^ 0xc0; + l++; + c = (c & 0x3f) | 0x80; + } + if (p) + *p++ = c; + return l; +} + +/* + * returns: + * -1: need more bytes, sequence not finished + * -2: corrupt sequence found, redo last char + * >= 0: decoded character + */ +int +FromUtf8(c, utf8charp) +int c, *utf8charp; +{ + int utf8char = *utf8charp; + if (utf8char) + { + if ((c & 0xc0) != 0x80) + { + *utf8charp = 0; + return -2; /* corrupt sequence! */ + } + else + c = (c & 0x3f) | (utf8char << 6); + if (!(utf8char & 0x40000000)) + { + /* check for overlong sequences */ + if ((c & 0x820823e0) == 0x80000000) + c = 0xfdffffff; + else if ((c & 0x020821f0) == 0x02000000) + c = 0xfff7ffff; + else if ((c & 0x000820f8) == 0x00080000) + c = 0xffffd000; + else if ((c & 0x0000207c) == 0x00002000) + c = 0xffffff70; + } + } + else + { + /* new sequence */ + if (c >= 0xfe) + c = UCS_REPL; + else if (c >= 0xfc) + c = (c & 0x01) | 0xbffffffc; /* 5 bytes to follow */ + else if (c >= 0xf8) + c = (c & 0x03) | 0xbfffff00; /* 4 */ + else if (c >= 0xf0) + c = (c & 0x07) | 0xbfffc000; /* 3 */ + else if (c >= 0xe0) + c = (c & 0x0f) | 0xbff00000; /* 2 */ + else if (c >= 0xc2) + c = (c & 0x1f) | 0xfc000000; /* 1 */ + else if (c >= 0xc0) + c = 0xfdffffff; /* overlong */ + else if (c >= 0x80) + c = UCS_REPL; + } + *utf8charp = utf8char = (c & 0x80000000) ? c : 0; + if (utf8char) + return -1; +#if 0 + if (c & 0xffff0000) + c = UCS_REPL; /* sorry, only know 16bit Unicode */ +#else + if (c & 0xff800000) + c = UCS_REPL; /* sorry, only know 23bit Unicode */ +#endif + if (c >= 0xd800 && (c <= 0xdfff || c == 0xfffe || c == 0xffff)) + c = UCS_REPL; /* illegal code */ + return c; +} + + +void +WinSwitchEncoding(p, encoding) +struct win *p; +int encoding; +{ + int i, j, c; + struct mline *ml; + struct display *d; + struct canvas *cv; + struct layer *oldflayer; + + if ((p->w_encoding == UTF8) == (encoding == UTF8)) + { + p->w_encoding = encoding; + return; + } + oldflayer = flayer; + for (d = displays; d; d = d->d_next) + for (cv = d->d_cvlist; cv; cv = cv->c_next) + if (p == Layer2Window(cv->c_layer)) + { + flayer = cv->c_layer; + while(flayer->l_next) + { + if (oldflayer == flayer) + oldflayer = flayer->l_next; + ExitOverlayPage(); + } + } + flayer = oldflayer; + for (j = 0; j < p->w_height + p->w_histheight; j++) + { +#ifdef COPY_PASTE + ml = j < p->w_height ? &p->w_mlines[j] : &p->w_hlines[j - p->w_height]; +#else + ml = &p->w_mlines[j]; +#endif + if (ml->font == null && ml->fontx == 0 && encodings[p->w_encoding].deffont == 0) + continue; + for (i = 0; i < p->w_width; i++) + { + c = ml->image[i] | (ml->font[i] << 8); + if (p->w_encoding == UTF8) + c |= ml->fontx[i] << 16; + if (p->w_encoding != UTF8 && c < 256) + c |= encodings[p->w_encoding].deffont << 8; + if (c < 256) + continue; + if (ml->font == null) + { + if ((ml->font = (unsigned char *)calloc(p->w_width + 1, 1)) == 0) + { + ml->font = null; + break; + } + } +#ifdef DW_CHARS + if ((p->w_encoding != UTF8 && (c & 0x1f00) != 0 && (c & 0xe000) == 0) || (p->w_encoding == UTF8 && utf8_isdouble(c))) + { + if (i + 1 == p->w_width) + c = '?'; + else + { + int c2; + i++; + c2 = ml->image[i] | (ml->font[i] << 8) | (ml->fontx[i] << 16); + c = recode_char_dw_to_encoding(c, &c2, encoding); + if (encoding == UTF8) + { + if (c > 0x10000 && ml->fontx == null) + { + if ((ml->fontx = (unsigned char *)calloc(p->w_width + 1, 1)) == 0) + { + ml->fontx = null; + break; + } + } + ml->fontx[i - 1] = c >> 16 & 255; + } + else + ml->fontx = null; + ml->font[i - 1] = c >> 8 & 255; + ml->image[i - 1] = c & 255; + c = c2; + } + } + else +#endif + c = recode_char_to_encoding(c, encoding); + ml->image[i] = c & 255; + ml->font[i] = c >> 8 & 255; + if (encoding == UTF8) + { + if (c > 0x10000 && ml->fontx == null) + { + if ((ml->fontx = (unsigned char *)calloc(p->w_width + 1, 1)) == 0) + { + ml->fontx = null; + break; + } + } + ml->fontx[i] = c >> 16 & 255; + } + else + ml->fontx = null; + } + } + p->w_encoding = encoding; + return; +} + +#ifdef DW_CHARS +struct interval { + int first; + int last; +}; + +/* auxiliary function for binary search in interval table */ +static int bisearch(int ucs, const struct interval *table, int max) { + int min = 0; + int mid; + + if (ucs < table[0].first || ucs > table[max].last) + return 0; + while (max >= min) { + mid = (min + max) / 2; + if (ucs > table[mid].last) + min = mid + 1; + else if (ucs < table[mid].first) + max = mid - 1; + else + return 1; + } + + return 0; +} + +int +utf8_isdouble(c) +int c; +{ + /* A sorted list of intervals of ambiguous width characters generated by + * https://github.com/GNOME/glib/blob/glib-2-50/glib/gen-unicode-tables.pl */ + static const struct interval ambiguous[] = { + {0x00A1, 0x00A1}, + {0x00A4, 0x00A4}, + {0x00A7, 0x00A8}, + {0x00AA, 0x00AA}, + {0x00AD, 0x00AE}, + {0x00B0, 0x00B4}, + {0x00B6, 0x00BA}, + {0x00BC, 0x00BF}, + {0x00C6, 0x00C6}, + {0x00D0, 0x00D0}, + {0x00D7, 0x00D8}, + {0x00DE, 0x00E1}, + {0x00E6, 0x00E6}, + {0x00E8, 0x00EA}, + {0x00EC, 0x00ED}, + {0x00F0, 0x00F0}, + {0x00F2, 0x00F3}, + {0x00F7, 0x00FA}, + {0x00FC, 0x00FC}, + {0x00FE, 0x00FE}, + {0x0101, 0x0101}, + {0x0111, 0x0111}, + {0x0113, 0x0113}, + {0x011B, 0x011B}, + {0x0126, 0x0127}, + {0x012B, 0x012B}, + {0x0131, 0x0133}, + {0x0138, 0x0138}, + {0x013F, 0x0142}, + {0x0144, 0x0144}, + {0x0148, 0x014B}, + {0x014D, 0x014D}, + {0x0152, 0x0153}, + {0x0166, 0x0167}, + {0x016B, 0x016B}, + {0x01CE, 0x01CE}, + {0x01D0, 0x01D0}, + {0x01D2, 0x01D2}, + {0x01D4, 0x01D4}, + {0x01D6, 0x01D6}, + {0x01D8, 0x01D8}, + {0x01DA, 0x01DA}, + {0x01DC, 0x01DC}, + {0x0251, 0x0251}, + {0x0261, 0x0261}, + {0x02C4, 0x02C4}, + {0x02C7, 0x02C7}, + {0x02C9, 0x02CB}, + {0x02CD, 0x02CD}, + {0x02D0, 0x02D0}, + {0x02D8, 0x02DB}, + {0x02DD, 0x02DD}, + {0x02DF, 0x02DF}, + {0x0300, 0x036F}, + {0x0391, 0x03A1}, + {0x03A3, 0x03A9}, + {0x03B1, 0x03C1}, + {0x03C3, 0x03C9}, + {0x0401, 0x0401}, + {0x0410, 0x044F}, + {0x0451, 0x0451}, + {0x2010, 0x2010}, + {0x2013, 0x2016}, + {0x2018, 0x2019}, + {0x201C, 0x201D}, + {0x2020, 0x2022}, + {0x2024, 0x2027}, + {0x2030, 0x2030}, + {0x2032, 0x2033}, + {0x2035, 0x2035}, + {0x203B, 0x203B}, + {0x203E, 0x203E}, + {0x2074, 0x2074}, + {0x207F, 0x207F}, + {0x2081, 0x2084}, + {0x20AC, 0x20AC}, + {0x2103, 0x2103}, + {0x2105, 0x2105}, + {0x2109, 0x2109}, + {0x2113, 0x2113}, + {0x2116, 0x2116}, + {0x2121, 0x2122}, + {0x2126, 0x2126}, + {0x212B, 0x212B}, + {0x2153, 0x2154}, + {0x215B, 0x215E}, + {0x2160, 0x216B}, + {0x2170, 0x2179}, + {0x2189, 0x2189}, + {0x2190, 0x2199}, + {0x21B8, 0x21B9}, + {0x21D2, 0x21D2}, + {0x21D4, 0x21D4}, + {0x21E7, 0x21E7}, + {0x2200, 0x2200}, + {0x2202, 0x2203}, + {0x2207, 0x2208}, + {0x220B, 0x220B}, + {0x220F, 0x220F}, + {0x2211, 0x2211}, + {0x2215, 0x2215}, + {0x221A, 0x221A}, + {0x221D, 0x2220}, + {0x2223, 0x2223}, + {0x2225, 0x2225}, + {0x2227, 0x222C}, + {0x222E, 0x222E}, + {0x2234, 0x2237}, + {0x223C, 0x223D}, + {0x2248, 0x2248}, + {0x224C, 0x224C}, + {0x2252, 0x2252}, + {0x2260, 0x2261}, + {0x2264, 0x2267}, + {0x226A, 0x226B}, + {0x226E, 0x226F}, + {0x2282, 0x2283}, + {0x2286, 0x2287}, + {0x2295, 0x2295}, + {0x2299, 0x2299}, + {0x22A5, 0x22A5}, + {0x22BF, 0x22BF}, + {0x2312, 0x2312}, + {0x2460, 0x24E9}, + {0x24EB, 0x254B}, + {0x2550, 0x2573}, + {0x2580, 0x258F}, + {0x2592, 0x2595}, + {0x25A0, 0x25A1}, + {0x25A3, 0x25A9}, + {0x25B2, 0x25B3}, + {0x25B6, 0x25B7}, + {0x25BC, 0x25BD}, + {0x25C0, 0x25C1}, + {0x25C6, 0x25C8}, + {0x25CB, 0x25CB}, + {0x25CE, 0x25D1}, + {0x25E2, 0x25E5}, + {0x25EF, 0x25EF}, + {0x2605, 0x2606}, + {0x2609, 0x2609}, + {0x260E, 0x260F}, + {0x261C, 0x261C}, + {0x261E, 0x261E}, + {0x2640, 0x2640}, + {0x2642, 0x2642}, + {0x2660, 0x2661}, + {0x2663, 0x2665}, + {0x2667, 0x266A}, + {0x266C, 0x266D}, + {0x266F, 0x266F}, + {0x269E, 0x269F}, + {0x26BF, 0x26BF}, + {0x26C6, 0x26CD}, + {0x26CF, 0x26D3}, + {0x26D5, 0x26E1}, + {0x26E3, 0x26E3}, + {0x26E8, 0x26E9}, + {0x26EB, 0x26F1}, + {0x26F4, 0x26F4}, + {0x26F6, 0x26F9}, + {0x26FB, 0x26FC}, + {0x26FE, 0x26FF}, + {0x273D, 0x273D}, + {0x2776, 0x277F}, + {0x2B56, 0x2B59}, + {0x3248, 0x324F}, + {0xE000, 0xF8FF}, + {0xFE00, 0xFE0F}, + {0xFFFD, 0xFFFD}, + {0x1F100, 0x1F10A}, + {0x1F110, 0x1F12D}, + {0x1F130, 0x1F169}, + {0x1F170, 0x1F18D}, + {0x1F18F, 0x1F190}, + {0x1F19B, 0x1F1AC}, + {0xE0100, 0xE01EF}, + {0xF0000, 0xFFFFD}, + {0x100000, 0x10FFFD}, + }; + /* A sorted list of intervals of double width characters generated by + * https://github.com/GNOME/glib/blob/glib-2-50/glib/gen-unicode-tables.pl */ + static const struct interval wide[] = { + {0x1100, 0x115F}, + {0x231A, 0x231B}, + {0x2329, 0x232A}, + {0x23E9, 0x23EC}, + {0x23F0, 0x23F0}, + {0x23F3, 0x23F3}, + {0x25FD, 0x25FE}, + {0x2614, 0x2615}, + {0x2648, 0x2653}, + {0x267F, 0x267F}, + {0x2693, 0x2693}, + {0x26A1, 0x26A1}, + {0x26AA, 0x26AB}, + {0x26BD, 0x26BE}, + {0x26C4, 0x26C5}, + {0x26CE, 0x26CE}, + {0x26D4, 0x26D4}, + {0x26EA, 0x26EA}, + {0x26F2, 0x26F3}, + {0x26F5, 0x26F5}, + {0x26FA, 0x26FA}, + {0x26FD, 0x26FD}, + {0x2705, 0x2705}, + {0x270A, 0x270B}, + {0x2728, 0x2728}, + {0x274C, 0x274C}, + {0x274E, 0x274E}, + {0x2753, 0x2755}, + {0x2757, 0x2757}, + {0x2795, 0x2797}, + {0x27B0, 0x27B0}, + {0x27BF, 0x27BF}, + {0x2B1B, 0x2B1C}, + {0x2B50, 0x2B50}, + {0x2B55, 0x2B55}, + {0x2E80, 0x2E99}, + {0x2E9B, 0x2EF3}, + {0x2F00, 0x2FD5}, + {0x2FF0, 0x2FFB}, + {0x3000, 0x303E}, + {0x3041, 0x3096}, + {0x3099, 0x30FF}, + {0x3105, 0x312F}, + {0x3131, 0x318E}, + {0x3190, 0x31BA}, + {0x31C0, 0x31E3}, + {0x31F0, 0x321E}, + {0x3220, 0x3247}, + {0x3250, 0x4DBF}, + {0x4E00, 0xA48C}, + {0xA490, 0xA4C6}, + {0xA960, 0xA97C}, + {0xAC00, 0xD7A3}, + {0xF900, 0xFAFF}, + {0xFE10, 0xFE19}, + {0xFE30, 0xFE52}, + {0xFE54, 0xFE66}, + {0xFE68, 0xFE6B}, + {0xFF01, 0xFF60}, + {0xFFE0, 0xFFE6}, + {0x16FE0, 0x16FE3}, + {0x17000, 0x187F7}, + {0x18800, 0x18AF2}, + {0x1B000, 0x1B11E}, + {0x1B150, 0x1B152}, + {0x1B164, 0x1B167}, + {0x1B170, 0x1B2FB}, + {0x1F004, 0x1F004}, + {0x1F0CF, 0x1F0CF}, + {0x1F18E, 0x1F18E}, + {0x1F191, 0x1F19A}, + {0x1F200, 0x1F202}, + {0x1F210, 0x1F23B}, + {0x1F240, 0x1F248}, + {0x1F250, 0x1F251}, + {0x1F260, 0x1F265}, + {0x1F300, 0x1F320}, + {0x1F32D, 0x1F335}, + {0x1F337, 0x1F37C}, + {0x1F37E, 0x1F393}, + {0x1F3A0, 0x1F3CA}, + {0x1F3CF, 0x1F3D3}, + {0x1F3E0, 0x1F3F0}, + {0x1F3F4, 0x1F3F4}, + {0x1F3F8, 0x1F43E}, + {0x1F440, 0x1F440}, + {0x1F442, 0x1F4FC}, + {0x1F4FF, 0x1F53D}, + {0x1F54B, 0x1F54E}, + {0x1F550, 0x1F567}, + {0x1F57A, 0x1F57A}, + {0x1F595, 0x1F596}, + {0x1F5A4, 0x1F5A4}, + {0x1F5FB, 0x1F64F}, + {0x1F680, 0x1F6C5}, + {0x1F6CC, 0x1F6CC}, + {0x1F6D0, 0x1F6D2}, + {0x1F6D5, 0x1F6D5}, + {0x1F6EB, 0x1F6EC}, + {0x1F6F4, 0x1F6FA}, + {0x1F7E0, 0x1F7EB}, + {0x1F90D, 0x1F971}, + {0x1F973, 0x1F976}, + {0x1F97A, 0x1F9A2}, + {0x1F9A5, 0x1F9AA}, + {0x1F9AE, 0x1F9CA}, + {0x1F9CD, 0x1F9FF}, + {0x1FA70, 0x1FA73}, + {0x1FA78, 0x1FA7A}, + {0x1FA80, 0x1FA82}, + {0x1FA90, 0x1FA95}, + {0x20000, 0x2FFFD}, + {0x30000, 0x3FFFD}, + }; + + if (c >= 0xdf00 && c <= 0xdfff) + return 1; /* dw combining sequence */ + return ((bisearch(c, wide, sizeof(wide) / sizeof(struct interval) - 1)) || + (cjkwidth && + bisearch(c, ambiguous, + sizeof(ambiguous) / sizeof(struct interval) - 1))); +} +#endif + +int +utf8_iscomb(c) +int c; +{ + /* taken from Markus Kuhn's wcwidth */ + static const struct interval combining[] = { + { 0x0300, 0x036F }, { 0x0483, 0x0486 }, { 0x0488, 0x0489 }, + { 0x0591, 0x05BD }, { 0x05BF, 0x05BF }, { 0x05C1, 0x05C2 }, + { 0x05C4, 0x05C5 }, { 0x05C7, 0x05C7 }, { 0x0600, 0x0603 }, + { 0x0610, 0x0615 }, { 0x064B, 0x065E }, { 0x0670, 0x0670 }, + { 0x06D6, 0x06E4 }, { 0x06E7, 0x06E8 }, { 0x06EA, 0x06ED }, + { 0x070F, 0x070F }, { 0x0711, 0x0711 }, { 0x0730, 0x074A }, + { 0x07A6, 0x07B0 }, { 0x07EB, 0x07F3 }, { 0x0901, 0x0902 }, + { 0x093C, 0x093C }, { 0x0941, 0x0948 }, { 0x094D, 0x094D }, + { 0x0951, 0x0954 }, { 0x0962, 0x0963 }, { 0x0981, 0x0981 }, + { 0x09BC, 0x09BC }, { 0x09C1, 0x09C4 }, { 0x09CD, 0x09CD }, + { 0x09E2, 0x09E3 }, { 0x0A01, 0x0A02 }, { 0x0A3C, 0x0A3C }, + { 0x0A41, 0x0A42 }, { 0x0A47, 0x0A48 }, { 0x0A4B, 0x0A4D }, + { 0x0A70, 0x0A71 }, { 0x0A81, 0x0A82 }, { 0x0ABC, 0x0ABC }, + { 0x0AC1, 0x0AC5 }, { 0x0AC7, 0x0AC8 }, { 0x0ACD, 0x0ACD }, + { 0x0AE2, 0x0AE3 }, { 0x0B01, 0x0B01 }, { 0x0B3C, 0x0B3C }, + { 0x0B3F, 0x0B3F }, { 0x0B41, 0x0B43 }, { 0x0B4D, 0x0B4D }, + { 0x0B56, 0x0B56 }, { 0x0B82, 0x0B82 }, { 0x0BC0, 0x0BC0 }, + { 0x0BCD, 0x0BCD }, { 0x0C3E, 0x0C40 }, { 0x0C46, 0x0C48 }, + { 0x0C4A, 0x0C4D }, { 0x0C55, 0x0C56 }, { 0x0CBC, 0x0CBC }, + { 0x0CBF, 0x0CBF }, { 0x0CC6, 0x0CC6 }, { 0x0CCC, 0x0CCD }, + { 0x0CE2, 0x0CE3 }, { 0x0D41, 0x0D43 }, { 0x0D4D, 0x0D4D }, + { 0x0DCA, 0x0DCA }, { 0x0DD2, 0x0DD4 }, { 0x0DD6, 0x0DD6 }, + { 0x0E31, 0x0E31 }, { 0x0E34, 0x0E3A }, { 0x0E47, 0x0E4E }, + { 0x0EB1, 0x0EB1 }, { 0x0EB4, 0x0EB9 }, { 0x0EBB, 0x0EBC }, + { 0x0EC8, 0x0ECD }, { 0x0F18, 0x0F19 }, { 0x0F35, 0x0F35 }, + { 0x0F37, 0x0F37 }, { 0x0F39, 0x0F39 }, { 0x0F71, 0x0F7E }, + { 0x0F80, 0x0F84 }, { 0x0F86, 0x0F87 }, { 0x0F90, 0x0F97 }, + { 0x0F99, 0x0FBC }, { 0x0FC6, 0x0FC6 }, { 0x102D, 0x1030 }, + { 0x1032, 0x1032 }, { 0x1036, 0x1037 }, { 0x1039, 0x1039 }, + { 0x1058, 0x1059 }, { 0x1160, 0x11FF }, { 0x135F, 0x135F }, + { 0x1712, 0x1714 }, { 0x1732, 0x1734 }, { 0x1752, 0x1753 }, + { 0x1772, 0x1773 }, { 0x17B4, 0x17B5 }, { 0x17B7, 0x17BD }, + { 0x17C6, 0x17C6 }, { 0x17C9, 0x17D3 }, { 0x17DD, 0x17DD }, + { 0x180B, 0x180D }, { 0x18A9, 0x18A9 }, { 0x1920, 0x1922 }, + { 0x1927, 0x1928 }, { 0x1932, 0x1932 }, { 0x1939, 0x193B }, + { 0x1A17, 0x1A18 }, { 0x1B00, 0x1B03 }, { 0x1B34, 0x1B34 }, + { 0x1B36, 0x1B3A }, { 0x1B3C, 0x1B3C }, { 0x1B42, 0x1B42 }, + { 0x1B6B, 0x1B73 }, { 0x1DC0, 0x1DCA }, { 0x1DFE, 0x1DFF }, + { 0x200B, 0x200F }, { 0x202A, 0x202E }, { 0x2060, 0x2063 }, + { 0x206A, 0x206F }, { 0x20D0, 0x20EF }, { 0x302A, 0x302F }, + { 0x3099, 0x309A }, { 0xA806, 0xA806 }, { 0xA80B, 0xA80B }, + { 0xA825, 0xA826 }, { 0xFB1E, 0xFB1E }, { 0xFE00, 0xFE0F }, + { 0xFE20, 0xFE23 }, { 0xFEFF, 0xFEFF }, { 0xFFF9, 0xFFFB }, + { 0x10A01, 0x10A03 }, { 0x10A05, 0x10A06 }, { 0x10A0C, 0x10A0F }, + { 0x10A38, 0x10A3A }, { 0x10A3F, 0x10A3F }, { 0x1D167, 0x1D169 }, + { 0x1D173, 0x1D182 }, { 0x1D185, 0x1D18B }, { 0x1D1AA, 0x1D1AD }, + { 0x1D242, 0x1D244 }, { 0xE0001, 0xE0001 }, { 0xE0020, 0xE007F }, + { 0xE0100, 0xE01EF } + }; + + return bisearch(c, combining, sizeof(combining) / sizeof(struct interval) - 1); +} + +static void +comb_tofront(i) +int i; +{ + for (;;) + { + int root = i >= 0x700 ? 0x801 : 0x800; + debug1("bring to front: %x\n", i); + combchars[combchars[i]->prev]->next = combchars[i]->next; + combchars[combchars[i]->next]->prev = combchars[i]->prev; + combchars[i]->next = combchars[root]->next; + combchars[i]->prev = root; + combchars[combchars[root]->next]->prev = i; + combchars[root]->next = i; + i = combchars[i]->c1; + if (i < 0xd800 || i >= 0xe000) + return; + i -= 0xd800; + } +} + +void +utf8_handle_comb(c, mc) +int c; +struct mchar *mc; +{ + int root, i, c1; + int isdouble; + + c1 = mc->image | (mc->font << 8) | mc->fontx << 16; + isdouble = c1 >= 0x1100 && utf8_isdouble(c1); + if (!combchars) + { + combchars = (struct combchar **)calloc(0x802, sizeof(struct combchar *)); + if (!combchars) + return; + combchars[0x800] = (struct combchar *)malloc(sizeof(struct combchar)); + combchars[0x801] = (struct combchar *)malloc(sizeof(struct combchar)); + if (!combchars[0x800] || !combchars[0x801]) + { + if (combchars[0x800]) + free(combchars[0x800]); + if (combchars[0x801]) + free(combchars[0x801]); + free(combchars); + return; + } + combchars[0x800]->c1 = 0x000; + combchars[0x800]->c2 = 0x700; + combchars[0x800]->next = 0x800; + combchars[0x800]->prev = 0x800; + combchars[0x801]->c1 = 0x700; + combchars[0x801]->c2 = 0x800; + combchars[0x801]->next = 0x801; + combchars[0x801]->prev = 0x801; + } + root = isdouble ? 0x801 : 0x800; + for (i = combchars[root]->c1; i < combchars[root]->c2; i++) + { + if (!combchars[i]) + break; + if (combchars[i]->c1 == c1 && combchars[i]->c2 == c) + break; + } + if (i == combchars[root]->c2) + { + /* full, recycle old entry */ + if (c1 >= 0xd800 && c1 < 0xe000) + comb_tofront(c1 - 0xd800); + i = combchars[root]->prev; + if (i == 0x800 || i == 0x801 || c1 == i + 0xd800) + { + /* completely full, can't recycle */ + debug("utf8_handle_comp: completely full!\n"); + mc->image = '?'; + mc->font = 0; + return; + } + /* FIXME: delete old char from all buffers */ + } + else if (!combchars[i]) + { + combchars[i] = (struct combchar *)malloc(sizeof(struct combchar)); + if (!combchars[i]) + return; + combchars[i]->prev = i; + combchars[i]->next = i; + } + combchars[i]->c1 = c1; + combchars[i]->c2 = c; + mc->image = i & 0xff; + mc->font = (i >> 8) + 0xd8; + mc->fontx = 0; + debug3("combinig char %x %x -> %x\n", c1, c, i + 0xd800); + comb_tofront(i); +} + +#else /* !UTF8 */ + +void +WinSwitchEncoding(p, encoding) +struct win *p; +int encoding; +{ + p->w_encoding = encoding; + return; +} + +#endif /* UTF8 */ + +static int +encmatch(s1, s2) +char *s1; +char *s2; +{ + int c1, c2; + do + { + c1 = (unsigned char)*s1; + if (c1 >= 'A' && c1 <= 'Z') + c1 += 'a' - 'A'; + if (!(c1 >= 'a' && c1 <= 'z') && !(c1 >= '0' && c1 <= '9')) + { + s1++; + continue; + } + c2 = (unsigned char)*s2; + if (c2 >= 'A' && c2 <= 'Z') + c2 += 'a' - 'A'; + if (!(c2 >= 'a' && c2 <= 'z') && !(c2 >= '0' && c2 <= '9')) + { + s2++; + continue; + } + if (c1 != c2) + return 0; + s1++; + s2++; + } + while(c1); + return 1; +} + +int +FindEncoding(name) +char *name; +{ + int encoding; + + debug1("FindEncoding %s\n", name); + if (name == 0 || *name == 0) + return 0; + if (encmatch(name, "euc")) + name = "eucJP"; + if (encmatch(name, "off") || encmatch(name, "iso8859-1")) + return 0; +#ifndef UTF8 + if (encmatch(name, "UTF-8")) + return -1; +#endif + for (encoding = 0; encoding < (int)(sizeof(encodings)/sizeof(*encodings)); encoding++) + if (encmatch(name, encodings[encoding].name)) + { +#ifdef UTF8 + LoadFontTranslationsForEncoding(encoding); +#endif + return encoding; + } + return -1; +} + +char * +EncodingName(encoding) +int encoding; +{ + if (encoding >= (int)(sizeof(encodings)/sizeof(*encodings))) + return 0; + return encodings[encoding].name; +} + +int +EncodingDefFont(encoding) +int encoding; +{ + return encodings[encoding].deffont; +} + +void +ResetEncoding(p) +struct win *p; +{ + char *c; + int encoding = p->w_encoding; + + c = encodings[encoding].charsets; + if (c) + SetCharsets(p, c); +#ifdef UTF8 + LoadFontTranslationsForEncoding(encoding); +#endif + if (encodings[encoding].usegr) + { + p->w_gr = 2; + p->w_FontE = encodings[encoding].charsets[1]; + } + else + p->w_FontE = 0; + if (encodings[encoding].noc1) + p->w_c1 = 0; +} + +/* decoded char: 32-bit <fontx><font><c2><c> + * fontx: non-bmp utf8 + * c2: multi-byte character + * font is always zero for utf8 + * returns: -1 need more bytes + * -2 decode error + */ + + +int +DecodeChar(c, encoding, statep) +int c; +int encoding; +int *statep; +{ + int t; + + debug2("Decoding char %02x for encoding %d\n", c, encoding); +#ifdef UTF8 + if (encoding == UTF8) + { + c = FromUtf8(c, statep); + if (c >= 0x10000) + c = (c & 0x7f0000) << 8 | (c & 0xffff); + return c; + } +#endif + if (encoding == SJIS) + { + if (!*statep) + { + if ((0x81 <= c && c <= 0x9f) || (0xe0 <= c && c <= 0xef)) + { + *statep = c; + return -1; + } + if (c < 0x80) + return c; + return c | (KANA << 16); + } + t = c; + c = *statep; + *statep = 0; + if (0x40 <= t && t <= 0xfc && t != 0x7f) + { + if (c <= 0x9f) + c = (c - 0x81) * 2 + 0x21; + else + c = (c - 0xc1) * 2 + 0x21; + if (t <= 0x7e) + t -= 0x1f; + else if (t <= 0x9e) + t -= 0x20; + else + t -= 0x7e, c++; + return (c << 8) | t | (KANJI << 16); + } + return t; + } + if (encoding == EUC_JP || encoding == EUC_KR || encoding == EUC_CN) + { + if (!*statep) + { + if (c & 0x80) + { + *statep = c; + return -1; + } + return c; + } + t = c; + c = *statep; + *statep = 0; + if (encoding == EUC_JP) + { + if (c == 0x8e) + return t | (KANA << 16); + if (c == 0x8f) + { + *statep = t | (KANJI0212 << 8); + return -1; + } + } + c &= 0xff7f; + t &= 0x7f; + c = c << 8 | t; + if (encoding == EUC_KR) + return c | (3 << 16); + if (encoding == EUC_CN) + return c | (1 << 16); + if (c & (KANJI0212 << 16)) + return c; + else + return c | (KANJI << 16); + } + if (encoding == BIG5 || encoding == GBK) + { + if (!*statep) + { + if (c & 0x80) + { + if (encoding == GBK && c == 0x80) + return 0xa4 | (('b'|0x80) << 16); + *statep = c; + return -1; + } + return c; + } + t = c; + c = *statep; + *statep = 0; + c &= 0x7f; + return c << 8 | t | (encoding == BIG5 ? 030 << 16 : 031 << 16); + } + return c | (encodings[encoding].deffont << 16); +} + +int +EncodeChar(bp, c, encoding, fontp) +char *bp; +int c; +int encoding; +int *fontp; +{ + int t, f, l; + + debug2("Encoding char %02x for encoding %d\n", c, encoding); + if (c == -1 && fontp) + { + if (*fontp == 0) + return 0; + if (bp) + { + *bp++ = 033; + *bp++ = '('; + *bp++ = 'B'; + } + return 3; + } + f = (c >> 16) & 0xff; + +#ifdef UTF8 + if (encoding == UTF8) + { + if (f) + { +# ifdef DW_CHARS + if (is_dw_font(f)) + { + int c2 = c & 0xff; + c = (c >> 8 & 0xff) | (f << 8); + c = recode_char_dw_to_encoding(c, &c2, encoding); + } + else +# endif + { + c = (c & 0xff) | (f << 8); + c = recode_char_to_encoding(c, encoding); + } + } + return ToUtf8(bp, c); + } + if (f == 0 && (c & 0x7f00ff00) != 0) /* is_utf8? */ + { + if (c >= 0x10000) + c = (c & 0x7f0000) >> 8 | (c & 0xffff); +# ifdef DW_CHARS + if (utf8_isdouble(c)) + { + int c2 = 0xffff; + c = recode_char_dw_to_encoding(c, &c2, encoding); + c = (c << 8) | (c2 & 0xff); + } + else +# endif + { + c = recode_char_to_encoding(c, encoding); + c = ((c & 0xff00) << 8) | (c & 0xff); + } + debug1("Encode: char mapped from utf8 to %x\n", c); + f = c >> 16; + } +#endif + if (f & 0x80) /* map special 96-fonts to latin1 */ + f = 0; + + if (encoding == SJIS) + { + if (f == KANA) + c = (c & 0xff) | 0x80; + else if (f == KANJI) + { + if (!bp) + return 2; + t = c & 0xff; + c = (c >> 8) & 0xff; + t += (c & 1) ? ((t <= 0x5f) ? 0x1f : 0x20) : 0x7e; + c = (c - 0x21) / 2 + ((c < 0x5f) ? 0x81 : 0xc1); + *bp++ = c; + *bp++ = t; + return 2; + } + } + if (encoding == EUC) + { + if (f == KANA) + { + if (bp) + { + *bp++ = 0x8e; + *bp++ = c; + } + return 2; + } + if (f == KANJI) + { + if (bp) + { + *bp++ = (c >> 8) | 0x80; + *bp++ = c | 0x80; + } + return 2; + } + if (f == KANJI0212) + { + if (bp) + { + *bp++ = 0x8f; + *bp++ = c >> 8; + *bp++ = c; + } + return 3; + } + } + if ((encoding == EUC_KR && f == 3) || (encoding == EUC_CN && f == 1)) + { + if (bp) + { + *bp++ = (c >> 8) | 0x80; + *bp++ = c | 0x80; + } + return 2; + } + if ((encoding == BIG5 && f == 030) || (encoding == GBK && f == 031)) + { + if (bp) + { + *bp++ = (c >> 8) | 0x80; + *bp++ = c; + } + return 2; + } + if (encoding == GBK && f == 0 && c == 0xa4) + c = 0x80; + + l = 0; + if (fontp && f != *fontp) + { + *fontp = f; + if (f && f < ' ') + { + if (bp) + { + *bp++ = 033; + *bp++ = '$'; + if (f > 2) + *bp++ = '('; + *bp++ = '@' + f; + } + l += f > 2 ? 4 : 3; + } + else if (f < 128) + { + if (f == 0) + f = 'B'; + if (bp) + { + *bp++ = 033; + *bp++ = '('; + *bp++ = f; + } + l += 3; + } + } + if (c & 0xff00) + { + if (bp) + *bp++ = c >> 8; + l++; + } + if (bp) + *bp++ = c; + return l + 1; +} + +int +CanEncodeFont(encoding, f) +int encoding, f; +{ + switch(encoding) + { +#ifdef UTF8 + case UTF8: + return 1; +#endif + case SJIS: + return f == KANJI || f == KANA; + case EUC: + return f == KANJI || f == KANA || f == KANJI0212; + case EUC_KR: + return f == 3; + case EUC_CN: + return f == 1; + case BIG5: + return f == 030; + case GBK: + return f == 031; + default: + break; + } + return 0; +} + +#ifdef DW_CHARS +int +PrepareEncodedChar(c) +int c; +{ + int encoding; + int t = 0; + int f; + + encoding = D_encoding; + f = D_rend.font; + t = D_mbcs; + if (encoding == SJIS) + { + if (f == KANA) + return c | 0x80; + else if (f == KANJI) + { + t += (c & 1) ? ((t <= 0x5f) ? 0x1f : 0x20) : 0x7e; + c = (c - 0x21) / 2 + ((c < 0x5f) ? 0x81 : 0xc1); + D_mbcs = t; + } + return c; + } + if (encoding == EUC) + { + if (f == KANA) + { + AddChar(0x8e); + return c | 0x80; + } + if (f == KANJI) + { + D_mbcs = t | 0x80; + return c | 0x80; + } + if (f == KANJI0212) + { + AddChar(0x8f); + D_mbcs = t | 0x80; + return c | 0x80; + } + } + if ((encoding == EUC_KR && f == 3) || (encoding == EUC_CN && f == 1)) + { + D_mbcs = t | 0x80; + return c | 0x80; + } + if ((encoding == BIG5 && f == 030) || (encoding == GBK && f == 031)) + return c | 0x80; + return c; +} +#endif + +int +RecodeBuf(fbuf, flen, fenc, tenc, tbuf) +unsigned char *fbuf; +int flen; +int fenc, tenc; +unsigned char *tbuf; +{ + int c, i, j; + int decstate = 0, font = 0; + + for (i = j = 0; i < flen; i++) + { + c = fbuf[i]; + c = DecodeChar(c, fenc, &decstate); + if (c == -2) + i--; + if (c < 0) + continue; + j += EncodeChar(tbuf ? (char *)tbuf + j : 0, c, tenc, &font); + } + j += EncodeChar(tbuf ? (char *)tbuf + j : 0, -1, tenc, &font); + return j; +} + +#ifdef UTF8 +int +ContainsSpecialDeffont(ml, xs, xe, encoding) +struct mline *ml; +int xs, xe; +int encoding; +{ + unsigned char *f, *i; + int c, x, dx; + + if (encoding == UTF8 || encodings[encoding].deffont == 0) + return 0; + i = ml->image + xs; + f = ml->font + xs; + dx = xe - xs + 1; + while (dx-- > 0) + { + if (*f++) + continue; + c = *i++; + x = recode_char_to_encoding(c | (encodings[encoding].deffont << 8), UTF8); + if (c != x) + { + debug2("ContainsSpecialDeffont: yes %02x != %02x\n", c, x); + return 1; + } + } + debug("ContainsSpecialDeffont: no\n"); + return 0; +} + + +int +LoadFontTranslation(font, file) +int font; +char *file; +{ + char buf[1024], *myfile; + FILE *f; + int i; + int fo; + int x, u, c, ok; + unsigned short (*p)[2], (*tab)[2]; + + myfile = file; + if (myfile == 0) + { + if (font == 0 || screenencodings == 0) + return -1; + if (strlen(screenencodings) > sizeof(buf) - 10) + return -1; + sprintf(buf, "%s/%02x", screenencodings, font & 0xff); + myfile = buf; + } + debug1("LoadFontTranslation: trying %s\n", myfile); + if ((f = secfopen(myfile, "r")) == 0) + return -1; + i = ok = 0; + for (;;) + { + for(; i < 12; i++) + if (getc(f) != "ScreenI2UTF8"[i]) + break; + if (getc(f) != 0) /* format */ + break; + fo = getc(f); /* id */ + if (fo == EOF) + break; + if (font != -1 && font != fo) + break; + i = getc(f); + x = getc(f); + if (x == EOF) + break; + i = i << 8 | x; + getc(f); + while ((x = getc(f)) && x != EOF) + getc(f); /* skip font name (padded to 2 bytes) */ + if ((p = malloc(sizeof(*p) * (i + 1))) == 0) + break; + tab = p; + while(i > 0) + { + x = getc(f); + x = x << 8 | getc(f); + u = getc(f); + c = getc(f); + u = u << 8 | c; + if (c == EOF) + break; + (*p)[0] = x; + (*p)[1] = u; + p++; + i--; + } + (*p)[0] = 0; + (*p)[1] = 0; + if (i || (tab[0][0] & 0x8000)) + { + free(tab); + break; + } + if (recodetabs[fo].tab && (recodetabs[fo].flags & RECODETAB_ALLOCED) != 0) + free(recodetabs[fo].tab); + recodetabs[fo].tab = tab; + recodetabs[fo].flags = RECODETAB_ALLOCED; + debug1("Successful load of recodetab %02x\n", fo); + c = getc(f); + if (c == EOF) + { + ok = 1; + break; + } + if (c != 'S') + break; + i = 1; + } + fclose(f); + if (font != -1 && file == 0 && recodetabs[font].flags == 0) + recodetabs[font].flags = RECODETAB_TRIED; + return ok ? 0 : -1; +} + +void +LoadFontTranslationsForEncoding(encoding) +int encoding; +{ + char *c; + int f; + + debug1("LoadFontTranslationsForEncoding: encoding %d\n", encoding); + if ((c = encodings[encoding].fontlist) != 0) + while ((f = (unsigned char)*c++) != 0) + if (recodetabs[f].flags == 0) + LoadFontTranslation(f, 0); + f = encodings[encoding].deffont; + if (f > 0 && recodetabs[f].flags == 0) + LoadFontTranslation(f, 0); +} + +#endif /* UTF8 */ + +#else /* !ENCODINGS */ + +/* Simple version of EncodeChar to encode font changes for + * copy/paste mode + */ +int +EncodeChar(bp, c, encoding, fontp) +char *bp; +int c; +int encoding; +int *fontp; +{ + int f, l; + f = (c == -1) ? 0 : c >> 16; + l = 0; + if (fontp && f != *fontp) + { + *fontp = f; + if (f && f < ' ') + { + if (bp) + { + *bp++ = 033; + *bp++ = '$'; + if (f > 2) + *bp++ = '('; + *bp++ = '@' + f; + } + l += f > 2 ? 4 : 3; + } + else if (f < 128) + { + if (f == 0) + f = 'B'; + if (bp) + { + *bp++ = 033; + *bp++ = '('; + *bp++ = f; + } + l += 3; + } + } + if (c == -1) + return l; + if (c & 0xff00) + { + if (bp) + *bp++ = c >> 8; + l++; + } + if (bp) + *bp++ = c; + return l + 1; +} + +#endif /* ENCODINGS */ |