/* Copyright (c) 1993-2003 * Juergen Weigert (jnweiger@immd4.informatik.uni-erlangen.de) * Michael Schroeder (mlschroe@immd4.informatik.uni-erlangen.de) * Copyright (c) 1987 Oliver Laumann * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3, or (at your option) * any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program (see the file COPYING); if not, see * https://www.gnu.org/licenses/, or contact Free Software Foundation, Inc., * 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301 USA * **************************************************************** */ #include #include "config.h" #include "screen.h" #include "extern.h" #ifdef ENCODINGS extern unsigned char *null; extern struct display *display, *displays; extern struct layer *flayer; extern char *screenencodings; #ifdef DW_CHARS extern int cjkwidth; #endif static int encmatch __P((char *, char *)); # ifdef UTF8 static int recode_char __P((int, int, int)); static int recode_char_to_encoding __P((int, int)); static void comb_tofront __P((int)); # ifdef DW_CHARS static int recode_char_dw __P((int, int *, int, int)); static int recode_char_dw_to_encoding __P((int, int *, int)); # endif # endif struct encoding { char *name; char *charsets; int deffont; int usegr; int noc1; char *fontlist; }; /* big5 font: ^X */ /* KOI8-R font: 96 ! */ /* CP1251 font: 96 ? */ struct encoding encodings[] = { { "C", 0, 0, 0, 0, 0 }, { "eucJP", "B\002I\00401", 0, 1, 0, "\002\004I" }, { "SJIS", "BIBB01", 0, 1, 1, "\002I" }, { "eucKR", "B\003BB01", 0, 1, 0, "\003" }, { "eucCN", "B\001BB01", 0, 1, 0, "\001" }, { "Big5", "B\030BB01", 0, 1, 0, "\030" }, { "KOI8-R", 0, 0x80|'!', 0, 1, 0 }, { "CP1251", 0, 0x80|'?', 0, 1, 0 }, { "UTF-8", 0, -1, 0, 0, 0 }, { "ISO8859-2", 0, 0x80|'B', 0, 0, 0 }, { "ISO8859-3", 0, 0x80|'C', 0, 0, 0 }, { "ISO8859-4", 0, 0x80|'D', 0, 0, 0 }, { "ISO8859-5", 0, 0x80|'L', 0, 0, 0 }, { "ISO8859-6", 0, 0x80|'G', 0, 0, 0 }, { "ISO8859-7", 0, 0x80|'F', 0, 0, 0 }, { "ISO8859-8", 0, 0x80|'H', 0, 0, 0 }, { "ISO8859-9", 0, 0x80|'M', 0, 0, 0 }, { "ISO8859-10", 0, 0x80|'V', 0, 0, 0 }, { "ISO8859-15", 0, 0x80|'b', 0, 0, 0 }, { "jis", 0, 0, 0, 0, "\002\004I" }, { "GBK", "B\031BB01", 0x80|'b', 1, 1, "\031" } }; #ifdef UTF8 static unsigned short builtin_tabs[][2] = { { 0x30, 0 }, /* 0: special graphics (line drawing) */ { 0x005f, 0x25AE }, { 0x0060, 0x25C6 }, { 0x0061, 0x2592 }, { 0x0062, 0x2409 }, { 0x0063, 0x240C }, { 0x0064, 0x240D }, { 0x0065, 0x240A }, { 0x0066, 0x00B0 }, { 0x0067, 0x00B1 }, { 0x0068, 0x2424 }, { 0x0069, 0x240B }, { 0x006a, 0x2518 }, { 0x006b, 0x2510 }, { 0x006c, 0x250C }, { 0x006d, 0x2514 }, { 0x006e, 0x253C }, { 0x006f, 0x23BA }, { 0x0070, 0x23BB }, { 0x0071, 0x2500 }, { 0x0072, 0x23BC }, { 0x0073, 0x23BD }, { 0x0074, 0x251C }, { 0x0075, 0x2524 }, { 0x0076, 0x2534 }, { 0x0077, 0x252C }, { 0x0078, 0x2502 }, { 0x0079, 0x2264 }, { 0x007a, 0x2265 }, { 0x007b, 0x03C0 }, { 0x007c, 0x2260 }, { 0x007d, 0x00A3 }, { 0x007e, 0x00B7 }, { 0, 0}, { 0x34, 0 }, /* 4: Dutch */ { 0x0023, 0x00a3 }, { 0x0040, 0x00be }, { 0x005b, 0x00ff }, { 0x005c, 0x00bd }, { 0x005d, 0x007c }, { 0x007b, 0x00a8 }, { 0x007c, 0x0066 }, { 0x007d, 0x00bc }, { 0x007e, 0x00b4 }, { 0, 0}, { 0x35, 0 }, /* 5: Finnish */ { 0x005b, 0x00c4 }, { 0x005c, 0x00d6 }, { 0x005d, 0x00c5 }, { 0x005e, 0x00dc }, { 0x0060, 0x00e9 }, { 0x007b, 0x00e4 }, { 0x007c, 0x00f6 }, { 0x007d, 0x00e5 }, { 0x007e, 0x00fc }, { 0, 0}, { 0x36, 0 }, /* 6: Norwegian/Danish */ { 0x0040, 0x00c4 }, { 0x005b, 0x00c6 }, { 0x005c, 0x00d8 }, { 0x005d, 0x00c5 }, { 0x005e, 0x00dc }, { 0x0060, 0x00e4 }, { 0x007b, 0x00e6 }, { 0x007c, 0x00f8 }, { 0x007d, 0x00e5 }, { 0x007e, 0x00fc }, { 0, 0}, { 0x37, 0 }, /* 7: Swedish */ { 0x0040, 0x00c9 }, { 0x005b, 0x00c4 }, { 0x005c, 0x00d6 }, { 0x005d, 0x00c5 }, { 0x005e, 0x00dc }, { 0x0060, 0x00e9 }, { 0x007b, 0x00e4 }, { 0x007c, 0x00f6 }, { 0x007d, 0x00e5 }, { 0x007e, 0x00fc }, { 0, 0}, { 0x3d, 0}, /* =: Swiss */ { 0x0023, 0x00f9 }, { 0x0040, 0x00e0 }, { 0x005b, 0x00e9 }, { 0x005c, 0x00e7 }, { 0x005d, 0x00ea }, { 0x005e, 0x00ee }, { 0x005f, 0x00e8 }, { 0x0060, 0x00f4 }, { 0x007b, 0x00e4 }, { 0x007c, 0x00f6 }, { 0x007d, 0x00fc }, { 0x007e, 0x00fb }, { 0, 0}, { 0x41, 0}, /* A: UK */ { 0x0023, 0x00a3 }, { 0, 0}, { 0x4b, 0}, /* K: German */ { 0x0040, 0x00a7 }, { 0x005b, 0x00c4 }, { 0x005c, 0x00d6 }, { 0x005d, 0x00dc }, { 0x007b, 0x00e4 }, { 0x007c, 0x00f6 }, { 0x007d, 0x00fc }, { 0x007e, 0x00df }, { 0, 0}, { 0x51, 0}, /* Q: French Canadian */ { 0x0040, 0x00e0 }, { 0x005b, 0x00e2 }, { 0x005c, 0x00e7 }, { 0x005d, 0x00ea }, { 0x005e, 0x00ee }, { 0x0060, 0x00f4 }, { 0x007b, 0x00e9 }, { 0x007c, 0x00f9 }, { 0x007d, 0x00e8 }, { 0x007e, 0x00fb }, { 0, 0}, { 0x52, 0}, /* R: French */ { 0x0023, 0x00a3 }, { 0x0040, 0x00e0 }, { 0x005b, 0x00b0 }, { 0x005c, 0x00e7 }, { 0x005d, 0x00a7 }, { 0x007b, 0x00e9 }, { 0x007c, 0x00f9 }, { 0x007d, 0x00e8 }, { 0x007e, 0x00a8 }, { 0, 0}, { 0x59, 0}, /* Y: Italian */ { 0x0023, 0x00a3 }, { 0x0040, 0x00a7 }, { 0x005b, 0x00b0 }, { 0x005c, 0x00e7 }, { 0x005d, 0x00e9 }, { 0x0060, 0x00f9 }, { 0x007b, 0x00e0 }, { 0x007c, 0x00f2 }, { 0x007d, 0x00e8 }, { 0x007e, 0x00ec }, { 0, 0}, { 0x5a, 0}, /* Z: Spanish */ { 0x0023, 0x00a3 }, { 0x0040, 0x00a7 }, { 0x005b, 0x00a1 }, { 0x005c, 0x00d1 }, { 0x005d, 0x00bf }, { 0x007b, 0x00b0 }, { 0x007c, 0x00f1 }, { 0x007d, 0x00e7 }, { 0, 0}, { 0xe2, 0}, /* 96-b: ISO-8859-15 */ { 0x00a4, 0x20ac }, { 0x00a6, 0x0160 }, { 0x00a8, 0x0161 }, { 0x00b4, 0x017D }, { 0x00b8, 0x017E }, { 0x00bc, 0x0152 }, { 0x00bd, 0x0153 }, { 0x00be, 0x0178 }, { 0, 0}, { 0x4a, 0}, /* J: JIS 0201 Roman */ { 0x005c, 0x00a5 }, { 0x007e, 0x203e }, { 0, 0}, { 0x49, 0}, /* I: halfwidth katakana */ { 0x0021, 0xff61 }, { 0x005f|0x8000, 0xff9f }, { 0, 0}, { 0, 0} }; struct recodetab { unsigned short (*tab)[2]; int flags; }; #define RECODETAB_ALLOCED 1 #define RECODETAB_BUILTIN 2 #define RECODETAB_TRIED 4 static struct recodetab recodetabs[256]; void InitBuiltinTabs() { unsigned short (*p)[2]; for (p = builtin_tabs; (*p)[0]; p++) { recodetabs[(*p)[0]].flags = RECODETAB_BUILTIN; recodetabs[(*p)[0]].tab = p + 1; p++; while((*p)[0]) p++; } } static int recode_char(c, to_utf, font) int c, to_utf, font; { int f; unsigned short (*p)[2]; if (to_utf) { if (c < 256) return c; f = (c >> 8) & 0xff; c &= 0xff; /* map aliases to keep the table small */ switch (f) { case 'C': f ^= ('C' ^ '5'); break; case 'E': f ^= ('E' ^ '6'); break; case 'H': f ^= ('H' ^ '7'); break; default: break; } p = recodetabs[f].tab; if (p == 0 && recodetabs[f].flags == 0) { LoadFontTranslation(f, 0); p = recodetabs[f].tab; } if (p) for (; (*p)[0]; p++) { if ((p[0][0] & 0x8000) && (c <= (p[0][0] & 0x7fff)) && c >= p[-1][0]) return c - p[-1][0] + p[-1][1]; if ((*p)[0] == c) return (*p)[1]; } return c & 0xff; /* map to latin1 */ } if (font == -1) { if (c < 256) return c; /* latin1 */ for (font = 32; font < 128; font++) { p = recodetabs[font].tab; if (p) for (; (*p)[1]; p++) { if ((p[0][0] & 0x8000) && c <= p[0][1] && c >= p[-1][1]) return (c - p[-1][1] + p[-1][0]) | (font << 8); if ((*p)[1] == c) return (*p)[0] | (font << 8); } } return '?'; } if (c < 128 && (font & 128) != 0) return c; if (font >= 32) { p = recodetabs[font].tab; if (p == 0 && recodetabs[font].flags == 0) { LoadFontTranslation(font, 0); p = recodetabs[font].tab; } if (p) for (; (*p)[1]; p++) { if ((p[0][0] & 0x8000) && c <= p[0][1] && c >= p[-1][1]) return (c - p[-1][1] + p[-1][0]) | (font & 128 ? 0 : font << 8); if ((*p)[1] == c) return (*p)[0] | (font & 128 ? 0 : font << 8); } } return -1; } #ifdef DW_CHARS static int recode_char_dw(c, c2p, to_utf, font) int c, *c2p, to_utf, font; { int f; unsigned short (*p)[2]; if (to_utf) { f = (c >> 8) & 0xff; c = (c & 255) << 8 | (*c2p & 255); *c2p = 0xffff; p = recodetabs[f].tab; if (p == 0 && recodetabs[f].flags == 0) { LoadFontTranslation(f, 0); p = recodetabs[f].tab; } if (p) for (; (*p)[0]; p++) if ((*p)[0] == c) { #ifdef DW_CHARS if (!utf8_isdouble((*p)[1])) *c2p = ' '; #endif return (*p)[1]; } return UCS_REPL_DW; } if (font == -1) { for (font = 0; font < 030; font++) { p = recodetabs[font].tab; if (p) for (; (*p)[1]; p++) if ((*p)[1] == c) { *c2p = ((*p)[0] & 255) | font << 8 | 0x8000; return ((*p)[0] >> 8) | font << 8; } } *c2p = '?'; return '?'; } if (font < 32) { p = recodetabs[font].tab; if (p == 0 && recodetabs[font].flags == 0) { LoadFontTranslation(font, 0); p = recodetabs[font].tab; } if (p) for (; (*p)[1]; p++) if ((*p)[1] == c) { *c2p = ((*p)[0] & 255) | font << 8 | 0x8000; return ((*p)[0] >> 8) | font << 8; } } return -1; } #endif static int recode_char_to_encoding(c, encoding) int c, encoding; { char *fp; int x; if (encoding == UTF8) return recode_char(c, 1, -1); if ((fp = encodings[encoding].fontlist) != 0) while(*fp) if ((x = recode_char(c, 0, (unsigned char)*fp++)) != -1) return x; if (encodings[encoding].deffont) if ((x = recode_char(c, 0, encodings[encoding].deffont)) != -1) return x; return recode_char(c, 0, -1); } #ifdef DW_CHARS static int recode_char_dw_to_encoding(c, c2p, encoding) int c, *c2p, encoding; { char *fp; int x; if (encoding == UTF8) return recode_char_dw(c, c2p, 1, -1); if ((fp = encodings[encoding].fontlist) != 0) while(*fp) if ((x = recode_char_dw(c, c2p, 0, (unsigned char)*fp++)) != -1) return x; if (encodings[encoding].deffont) if ((x = recode_char_dw(c, c2p, 0, encodings[encoding].deffont)) != -1) return x; return recode_char_dw(c, c2p, 0, -1); } #endif struct mchar * recode_mchar(mc, from, to) struct mchar *mc; int from, to; { static struct mchar rmc; int c; debug3("recode_mchar %02x from %d to %d\n", mc->image, from, to); if (from == to || (from != UTF8 && to != UTF8)) return mc; rmc = *mc; if (rmc.font == 0 && from != UTF8) rmc.font = encodings[from].deffont; if (rmc.font == 0) /* latin1 is the same in unicode */ return mc; c = rmc.image | (rmc.font << 8); if (from == UTF8) c |= rmc.fontx << 16; #ifdef DW_CHARS if (rmc.mbcs) { int c2 = rmc.mbcs; c = recode_char_dw_to_encoding(c, &c2, to); rmc.mbcs = c2; } else #endif c = recode_char_to_encoding(c, to); rmc.image = c & 255; rmc.font = c >> 8 & 255; if (to == UTF8) rmc.fontx = c >> 16 & 255; return &rmc; } struct mline * recode_mline(ml, w, from, to) struct mline *ml; int w; int from, to; { static int maxlen; static int last; static struct mline rml[2], *rl; int i, c; if (from == to || (from != UTF8 && to != UTF8) || w == 0) return ml; if (ml->font == null && ml->fontx == null && encodings[from].deffont == 0) return ml; if (w > maxlen) { for (i = 0; i < 2; i++) { if (rml[i].image == 0) rml[i].image = malloc(w); else rml[i].image = realloc(rml[i].image, w); if (rml[i].font == 0) rml[i].font = malloc(w); else rml[i].font = realloc(rml[i].font, w); if (rml[i].fontx == 0) rml[i].fontx = malloc(w); else rml[i].fontx = realloc(rml[i].fontx, w); if (rml[i].image == 0 || rml[i].font == 0 || rml[i].fontx == 0) { maxlen = 0; return ml; /* sorry */ } } maxlen = w; } debug("recode_mline: from\n"); for (i = 0; i < w; i++) debug1("%c", "0123456789abcdef"[(ml->image[i] >> 4) & 15]); debug("\n"); for (i = 0; i < w; i++) debug1("%c", "0123456789abcdef"[(ml->image[i] ) & 15]); debug("\n"); for (i = 0; i < w; i++) debug1("%c", "0123456789abcdef"[(ml->font[i] >> 4) & 15]); debug("\n"); for (i = 0; i < w; i++) debug1("%c", "0123456789abcdef"[(ml->font[i] ) & 15]); debug("\n"); for (i = 0; i < w; i++) debug1("%c", "0123456789abcdef"[(ml->fontx[i] >> 4) & 15]); debug("\n"); for (i = 0; i < w; i++) debug1("%c", "0123456789abcdef"[(ml->fontx[i] ) & 15]); debug("\n"); rl = rml + last; rl->attr = ml->attr; #ifdef COLOR rl->color = ml->color; # ifdef COLORS256 rl->colorx = ml->colorx; # endif #endif for (i = 0; i < w; i++) { c = ml->image[i] | (ml->font[i] << 8); if (from == UTF8) c |= ml->fontx[i] << 16; if (from != UTF8 && c < 256) c |= encodings[from].deffont << 8; #ifdef DW_CHARS if ((from != UTF8 && (c & 0x1f00) != 0 && (c & 0xe000) == 0) || (from == UTF8 && utf8_isdouble(c))) { if (i + 1 == w) c = '?'; else { int c2; i++; c2 = ml->image[i] | (ml->font[i] << 8); c = recode_char_dw_to_encoding(c, &c2, to); if (to == UTF8) rl->fontx[i - 1] = c >> 16 & 255; rl->font[i - 1] = c >> 8 & 255; rl->image[i - 1] = c & 255; c = c2; } } else #endif c = recode_char_to_encoding(c, to); rl->image[i] = c & 255; rl->font[i] = c >> 8 & 255; if (to == UTF8) rl->fontx[i] = c >> 16 & 255; } last ^= 1; debug("recode_mline: to\n"); for (i = 0; i < w; i++) debug1("%c", "0123456789abcdef"[(rl->image[i] >> 4) & 15]); debug("\n"); for (i = 0; i < w; i++) debug1("%c", "0123456789abcdef"[(rl->image[i] ) & 15]); debug("\n"); for (i = 0; i < w; i++) debug1("%c", "0123456789abcdef"[(rl->font[i] >> 4) & 15]); debug("\n"); for (i = 0; i < w; i++) debug1("%c", "0123456789abcdef"[(rl->font[i] ) & 15]); debug("\n"); for (i = 0; i < w; i++) debug1("%c", "0123456789abcdef"[(rl->fontx[i] >> 4) & 15]); debug("\n"); for (i = 0; i < w; i++) debug1("%c", "0123456789abcdef"[(rl->fontx[i] ) & 15]); debug("\n"); return rl; } struct combchar { unsigned int c1; unsigned int c2; unsigned int next; unsigned int prev; }; struct combchar **combchars; void AddUtf8(c) int c; { ASSERT(D_encoding == UTF8); if (c >= 0xd800 && c < 0xe000 && combchars && combchars[c - 0xd800]) { AddUtf8(combchars[c - 0xd800]->c1); c = combchars[c - 0xd800]->c2; } if (c >= 0x10000) { if (c >= 0x200000) { AddChar((c & 0x3000000) >> 12 ^ 0xf8); c = (c & 0xffffff) ^ ((0xf0 ^ 0x80) << 18); } AddChar((c & 0x1fc0000) >> 18 ^ 0xf0); c = (c & 0x3ffff) ^ ((0xe0 ^ 0x80) << 12); } if (c >= 0x800) { AddChar((c & 0x7f000) >> 12 ^ 0xe0); c = (c & 0x0fff) ^ ((0xc0 ^ 0x80) << 6); } if (c >= 0x80) { AddChar((c & 0x1fc0) >> 6 ^ 0xc0); c = (c & 0x3f) | 0x80; } AddChar(c); } int ToUtf8_comb(p, c) char *p; int c; { int l; if (c >= 0xd800 && c < 0xe000 && combchars && combchars[c - 0xd800]) { l = ToUtf8_comb(p, combchars[c - 0xd800]->c1); return l + ToUtf8(p ? p + l : 0, combchars[c - 0xd800]->c2); } return ToUtf8(p, c); } int ToUtf8(p, c) char *p; int c; { int l = 1; if (c >= 0x10000) { if (c >= 0x200000) { if (p) *p++ = (c & 0x3000000) >> 12 ^ 0xf8; l++; c = (c & 0xffffff) ^ ((0xf0 ^ 0x80) << 18); } if (p) *p++ = (c & 0x1fc0000) >> 18 ^ 0xf0; l++; c = (c & 0x3ffff) ^ ((0xe0 ^ 0x80) << 12); } if (c >= 0x800) { if (p) *p++ = (c & 0x7f000) >> 12 ^ 0xe0; l++; c = (c & 0x0fff) | 0x1000; } if (c >= 0x80) { if (p) *p++ = (c & 0x1fc0) >> 6 ^ 0xc0; l++; c = (c & 0x3f) | 0x80; } if (p) *p++ = c; return l; } /* * returns: * -1: need more bytes, sequence not finished * -2: corrupt sequence found, redo last char * >= 0: decoded character */ int FromUtf8(c, utf8charp) int c, *utf8charp; { int utf8char = *utf8charp; if (utf8char) { if ((c & 0xc0) != 0x80) { *utf8charp = 0; return -2; /* corrupt sequence! */ } else c = (c & 0x3f) | (utf8char << 6); if (!(utf8char & 0x40000000)) { /* check for overlong sequences */ if ((c & 0x820823e0) == 0x80000000) c = 0xfdffffff; else if ((c & 0x020821f0) == 0x02000000) c = 0xfff7ffff; else if ((c & 0x000820f8) == 0x00080000) c = 0xffffd000; else if ((c & 0x0000207c) == 0x00002000) c = 0xffffff70; } } else { /* new sequence */ if (c >= 0xfe) c = UCS_REPL; else if (c >= 0xfc) c = (c & 0x01) | 0xbffffffc; /* 5 bytes to follow */ else if (c >= 0xf8) c = (c & 0x03) | 0xbfffff00; /* 4 */ else if (c >= 0xf0) c = (c & 0x07) | 0xbfffc000; /* 3 */ else if (c >= 0xe0) c = (c & 0x0f) | 0xbff00000; /* 2 */ else if (c >= 0xc2) c = (c & 0x1f) | 0xfc000000; /* 1 */ else if (c >= 0xc0) c = 0xfdffffff; /* overlong */ else if (c >= 0x80) c = UCS_REPL; } *utf8charp = utf8char = (c & 0x80000000) ? c : 0; if (utf8char) return -1; #if 0 if (c & 0xffff0000) c = UCS_REPL; /* sorry, only know 16bit Unicode */ #else if (c & 0xff800000) c = UCS_REPL; /* sorry, only know 23bit Unicode */ #endif if (c >= 0xd800 && (c <= 0xdfff || c == 0xfffe || c == 0xffff)) c = UCS_REPL; /* illegal code */ return c; } void WinSwitchEncoding(p, encoding) struct win *p; int encoding; { int i, j, c; struct mline *ml; struct display *d; struct canvas *cv; struct layer *oldflayer; if ((p->w_encoding == UTF8) == (encoding == UTF8)) { p->w_encoding = encoding; return; } oldflayer = flayer; for (d = displays; d; d = d->d_next) for (cv = d->d_cvlist; cv; cv = cv->c_next) if (p == Layer2Window(cv->c_layer)) { flayer = cv->c_layer; while(flayer->l_next) { if (oldflayer == flayer) oldflayer = flayer->l_next; ExitOverlayPage(); } } flayer = oldflayer; for (j = 0; j < p->w_height + p->w_histheight; j++) { #ifdef COPY_PASTE ml = j < p->w_height ? &p->w_mlines[j] : &p->w_hlines[j - p->w_height]; #else ml = &p->w_mlines[j]; #endif if (ml->font == null && ml->fontx == 0 && encodings[p->w_encoding].deffont == 0) continue; for (i = 0; i < p->w_width; i++) { c = ml->image[i] | (ml->font[i] << 8); if (p->w_encoding == UTF8) c |= ml->fontx[i] << 16; if (p->w_encoding != UTF8 && c < 256) c |= encodings[p->w_encoding].deffont << 8; if (c < 256) continue; if (ml->font == null) { if ((ml->font = (unsigned char *)calloc(p->w_width + 1, 1)) == 0) { ml->font = null; break; } } #ifdef DW_CHARS if ((p->w_encoding != UTF8 && (c & 0x1f00) != 0 && (c & 0xe000) == 0) || (p->w_encoding == UTF8 && utf8_isdouble(c))) { if (i + 1 == p->w_width) c = '?'; else { int c2; i++; c2 = ml->image[i] | (ml->font[i] << 8) | (ml->fontx[i] << 16); c = recode_char_dw_to_encoding(c, &c2, encoding); if (encoding == UTF8) { if (c > 0x10000 && ml->fontx == null) { if ((ml->fontx = (unsigned char *)calloc(p->w_width + 1, 1)) == 0) { ml->fontx = null; break; } } ml->fontx[i - 1] = c >> 16 & 255; } else ml->fontx = null; ml->font[i - 1] = c >> 8 & 255; ml->image[i - 1] = c & 255; c = c2; } } else #endif c = recode_char_to_encoding(c, encoding); ml->image[i] = c & 255; ml->font[i] = c >> 8 & 255; if (encoding == UTF8) { if (c > 0x10000 && ml->fontx == null) { if ((ml->fontx = (unsigned char *)calloc(p->w_width + 1, 1)) == 0) { ml->fontx = null; break; } } ml->fontx[i] = c >> 16 & 255; } else ml->fontx = null; } } p->w_encoding = encoding; return; } #ifdef DW_CHARS struct interval { int first; int last; }; /* auxiliary function for binary search in interval table */ static int bisearch(int ucs, const struct interval *table, int max) { int min = 0; int mid; if (ucs < table[0].first || ucs > table[max].last) return 0; while (max >= min) { mid = (min + max) / 2; if (ucs > table[mid].last) min = mid + 1; else if (ucs < table[mid].first) max = mid - 1; else return 1; } return 0; } int utf8_isdouble(c) int c; { /* A sorted list of intervals of ambiguous width characters generated by * https://github.com/GNOME/glib/blob/glib-2-50/glib/gen-unicode-tables.pl */ static const struct interval ambiguous[] = { {0x00A1, 0x00A1}, {0x00A4, 0x00A4}, {0x00A7, 0x00A8}, {0x00AA, 0x00AA}, {0x00AD, 0x00AE}, {0x00B0, 0x00B4}, {0x00B6, 0x00BA}, {0x00BC, 0x00BF}, {0x00C6, 0x00C6}, {0x00D0, 0x00D0}, {0x00D7, 0x00D8}, {0x00DE, 0x00E1}, {0x00E6, 0x00E6}, {0x00E8, 0x00EA}, {0x00EC, 0x00ED}, {0x00F0, 0x00F0}, {0x00F2, 0x00F3}, {0x00F7, 0x00FA}, {0x00FC, 0x00FC}, {0x00FE, 0x00FE}, {0x0101, 0x0101}, {0x0111, 0x0111}, {0x0113, 0x0113}, {0x011B, 0x011B}, {0x0126, 0x0127}, {0x012B, 0x012B}, {0x0131, 0x0133}, {0x0138, 0x0138}, {0x013F, 0x0142}, {0x0144, 0x0144}, {0x0148, 0x014B}, {0x014D, 0x014D}, {0x0152, 0x0153}, {0x0166, 0x0167}, {0x016B, 0x016B}, {0x01CE, 0x01CE}, {0x01D0, 0x01D0}, {0x01D2, 0x01D2}, {0x01D4, 0x01D4}, {0x01D6, 0x01D6}, {0x01D8, 0x01D8}, {0x01DA, 0x01DA}, {0x01DC, 0x01DC}, {0x0251, 0x0251}, {0x0261, 0x0261}, {0x02C4, 0x02C4}, {0x02C7, 0x02C7}, {0x02C9, 0x02CB}, {0x02CD, 0x02CD}, {0x02D0, 0x02D0}, {0x02D8, 0x02DB}, {0x02DD, 0x02DD}, {0x02DF, 0x02DF}, {0x0300, 0x036F}, {0x0391, 0x03A1}, {0x03A3, 0x03A9}, {0x03B1, 0x03C1}, {0x03C3, 0x03C9}, {0x0401, 0x0401}, {0x0410, 0x044F}, {0x0451, 0x0451}, {0x2010, 0x2010}, {0x2013, 0x2016}, {0x2018, 0x2019}, {0x201C, 0x201D}, {0x2020, 0x2022}, {0x2024, 0x2027}, {0x2030, 0x2030}, {0x2032, 0x2033}, {0x2035, 0x2035}, {0x203B, 0x203B}, {0x203E, 0x203E}, {0x2074, 0x2074}, {0x207F, 0x207F}, {0x2081, 0x2084}, {0x20AC, 0x20AC}, {0x2103, 0x2103}, {0x2105, 0x2105}, {0x2109, 0x2109}, {0x2113, 0x2113}, {0x2116, 0x2116}, {0x2121, 0x2122}, {0x2126, 0x2126}, {0x212B, 0x212B}, {0x2153, 0x2154}, {0x215B, 0x215E}, {0x2160, 0x216B}, {0x2170, 0x2179}, {0x2189, 0x2189}, {0x2190, 0x2199}, {0x21B8, 0x21B9}, {0x21D2, 0x21D2}, {0x21D4, 0x21D4}, {0x21E7, 0x21E7}, {0x2200, 0x2200}, {0x2202, 0x2203}, {0x2207, 0x2208}, {0x220B, 0x220B}, {0x220F, 0x220F}, {0x2211, 0x2211}, {0x2215, 0x2215}, {0x221A, 0x221A}, {0x221D, 0x2220}, {0x2223, 0x2223}, {0x2225, 0x2225}, {0x2227, 0x222C}, {0x222E, 0x222E}, {0x2234, 0x2237}, {0x223C, 0x223D}, {0x2248, 0x2248}, {0x224C, 0x224C}, {0x2252, 0x2252}, {0x2260, 0x2261}, {0x2264, 0x2267}, {0x226A, 0x226B}, {0x226E, 0x226F}, {0x2282, 0x2283}, {0x2286, 0x2287}, {0x2295, 0x2295}, {0x2299, 0x2299}, {0x22A5, 0x22A5}, {0x22BF, 0x22BF}, {0x2312, 0x2312}, {0x2460, 0x24E9}, {0x24EB, 0x254B}, {0x2550, 0x2573}, {0x2580, 0x258F}, {0x2592, 0x2595}, {0x25A0, 0x25A1}, {0x25A3, 0x25A9}, {0x25B2, 0x25B3}, {0x25B6, 0x25B7}, {0x25BC, 0x25BD}, {0x25C0, 0x25C1}, {0x25C6, 0x25C8}, {0x25CB, 0x25CB}, {0x25CE, 0x25D1}, {0x25E2, 0x25E5}, {0x25EF, 0x25EF}, {0x2605, 0x2606}, {0x2609, 0x2609}, {0x260E, 0x260F}, {0x261C, 0x261C}, {0x261E, 0x261E}, {0x2640, 0x2640}, {0x2642, 0x2642}, {0x2660, 0x2661}, {0x2663, 0x2665}, {0x2667, 0x266A}, {0x266C, 0x266D}, {0x266F, 0x266F}, {0x269E, 0x269F}, {0x26BF, 0x26BF}, {0x26C6, 0x26CD}, {0x26CF, 0x26D3}, {0x26D5, 0x26E1}, {0x26E3, 0x26E3}, {0x26E8, 0x26E9}, {0x26EB, 0x26F1}, {0x26F4, 0x26F4}, {0x26F6, 0x26F9}, {0x26FB, 0x26FC}, {0x26FE, 0x26FF}, {0x273D, 0x273D}, {0x2776, 0x277F}, {0x2B56, 0x2B59}, {0x3248, 0x324F}, {0xE000, 0xF8FF}, {0xFE00, 0xFE0F}, {0xFFFD, 0xFFFD}, {0x1F100, 0x1F10A}, {0x1F110, 0x1F12D}, {0x1F130, 0x1F169}, {0x1F170, 0x1F18D}, {0x1F18F, 0x1F190}, {0x1F19B, 0x1F1AC}, {0xE0100, 0xE01EF}, {0xF0000, 0xFFFFD}, {0x100000, 0x10FFFD}, }; /* A sorted list of intervals of double width characters generated by * https://github.com/GNOME/glib/blob/glib-2-50/glib/gen-unicode-tables.pl */ static const struct interval wide[] = { {0x1100, 0x115F}, {0x231A, 0x231B}, {0x2329, 0x232A}, {0x23E9, 0x23EC}, {0x23F0, 0x23F0}, {0x23F3, 0x23F3}, {0x25FD, 0x25FE}, {0x2614, 0x2615}, {0x2648, 0x2653}, {0x267F, 0x267F}, {0x2693, 0x2693}, {0x26A1, 0x26A1}, {0x26AA, 0x26AB}, {0x26BD, 0x26BE}, {0x26C4, 0x26C5}, {0x26CE, 0x26CE}, {0x26D4, 0x26D4}, {0x26EA, 0x26EA}, {0x26F2, 0x26F3}, {0x26F5, 0x26F5}, {0x26FA, 0x26FA}, {0x26FD, 0x26FD}, {0x2705, 0x2705}, {0x270A, 0x270B}, {0x2728, 0x2728}, {0x274C, 0x274C}, {0x274E, 0x274E}, {0x2753, 0x2755}, {0x2757, 0x2757}, {0x2795, 0x2797}, {0x27B0, 0x27B0}, {0x27BF, 0x27BF}, {0x2B1B, 0x2B1C}, {0x2B50, 0x2B50}, {0x2B55, 0x2B55}, {0x2E80, 0x2E99}, {0x2E9B, 0x2EF3}, {0x2F00, 0x2FD5}, {0x2FF0, 0x2FFB}, {0x3000, 0x303E}, {0x3041, 0x3096}, {0x3099, 0x30FF}, {0x3105, 0x312F}, {0x3131, 0x318E}, {0x3190, 0x31BA}, {0x31C0, 0x31E3}, {0x31F0, 0x321E}, {0x3220, 0x3247}, {0x3250, 0x4DBF}, {0x4E00, 0xA48C}, {0xA490, 0xA4C6}, {0xA960, 0xA97C}, {0xAC00, 0xD7A3}, {0xF900, 0xFAFF}, {0xFE10, 0xFE19}, {0xFE30, 0xFE52}, {0xFE54, 0xFE66}, {0xFE68, 0xFE6B}, {0xFF01, 0xFF60}, {0xFFE0, 0xFFE6}, {0x16FE0, 0x16FE3}, {0x17000, 0x187F7}, {0x18800, 0x18AF2}, {0x1B000, 0x1B11E}, {0x1B150, 0x1B152}, {0x1B164, 0x1B167}, {0x1B170, 0x1B2FB}, {0x1F004, 0x1F004}, {0x1F0CF, 0x1F0CF}, {0x1F18E, 0x1F18E}, {0x1F191, 0x1F19A}, {0x1F200, 0x1F202}, {0x1F210, 0x1F23B}, {0x1F240, 0x1F248}, {0x1F250, 0x1F251}, {0x1F260, 0x1F265}, {0x1F300, 0x1F320}, {0x1F32D, 0x1F335}, {0x1F337, 0x1F37C}, {0x1F37E, 0x1F393}, {0x1F3A0, 0x1F3CA}, {0x1F3CF, 0x1F3D3}, {0x1F3E0, 0x1F3F0}, {0x1F3F4, 0x1F3F4}, {0x1F3F8, 0x1F43E}, {0x1F440, 0x1F440}, {0x1F442, 0x1F4FC}, {0x1F4FF, 0x1F53D}, {0x1F54B, 0x1F54E}, {0x1F550, 0x1F567}, {0x1F57A, 0x1F57A}, {0x1F595, 0x1F596}, {0x1F5A4, 0x1F5A4}, {0x1F5FB, 0x1F64F}, {0x1F680, 0x1F6C5}, {0x1F6CC, 0x1F6CC}, {0x1F6D0, 0x1F6D2}, {0x1F6D5, 0x1F6D5}, {0x1F6EB, 0x1F6EC}, {0x1F6F4, 0x1F6FA}, {0x1F7E0, 0x1F7EB}, {0x1F90D, 0x1F971}, {0x1F973, 0x1F976}, {0x1F97A, 0x1F9A2}, {0x1F9A5, 0x1F9AA}, {0x1F9AE, 0x1F9CA}, {0x1F9CD, 0x1F9FF}, {0x1FA70, 0x1FA73}, {0x1FA78, 0x1FA7A}, {0x1FA80, 0x1FA82}, {0x1FA90, 0x1FA95}, {0x20000, 0x2FFFD}, {0x30000, 0x3FFFD}, }; if (c >= 0xdf00 && c <= 0xdfff) return 1; /* dw combining sequence */ return ((bisearch(c, wide, sizeof(wide) / sizeof(struct interval) - 1)) || (cjkwidth && bisearch(c, ambiguous, sizeof(ambiguous) / sizeof(struct interval) - 1))); } #endif int utf8_iscomb(c) int c; { /* taken from Markus Kuhn's wcwidth */ static const struct interval combining[] = { { 0x0300, 0x036F }, { 0x0483, 0x0486 }, { 0x0488, 0x0489 }, { 0x0591, 0x05BD }, { 0x05BF, 0x05BF }, { 0x05C1, 0x05C2 }, { 0x05C4, 0x05C5 }, { 0x05C7, 0x05C7 }, { 0x0600, 0x0603 }, { 0x0610, 0x0615 }, { 0x064B, 0x065E }, { 0x0670, 0x0670 }, { 0x06D6, 0x06E4 }, { 0x06E7, 0x06E8 }, { 0x06EA, 0x06ED }, { 0x070F, 0x070F }, { 0x0711, 0x0711 }, { 0x0730, 0x074A }, { 0x07A6, 0x07B0 }, { 0x07EB, 0x07F3 }, { 0x0901, 0x0902 }, { 0x093C, 0x093C }, { 0x0941, 0x0948 }, { 0x094D, 0x094D }, { 0x0951, 0x0954 }, { 0x0962, 0x0963 }, { 0x0981, 0x0981 }, { 0x09BC, 0x09BC }, { 0x09C1, 0x09C4 }, { 0x09CD, 0x09CD }, { 0x09E2, 0x09E3 }, { 0x0A01, 0x0A02 }, { 0x0A3C, 0x0A3C }, { 0x0A41, 0x0A42 }, { 0x0A47, 0x0A48 }, { 0x0A4B, 0x0A4D }, { 0x0A70, 0x0A71 }, { 0x0A81, 0x0A82 }, { 0x0ABC, 0x0ABC }, { 0x0AC1, 0x0AC5 }, { 0x0AC7, 0x0AC8 }, { 0x0ACD, 0x0ACD }, { 0x0AE2, 0x0AE3 }, { 0x0B01, 0x0B01 }, { 0x0B3C, 0x0B3C }, { 0x0B3F, 0x0B3F }, { 0x0B41, 0x0B43 }, { 0x0B4D, 0x0B4D }, { 0x0B56, 0x0B56 }, { 0x0B82, 0x0B82 }, { 0x0BC0, 0x0BC0 }, { 0x0BCD, 0x0BCD }, { 0x0C3E, 0x0C40 }, { 0x0C46, 0x0C48 }, { 0x0C4A, 0x0C4D }, { 0x0C55, 0x0C56 }, { 0x0CBC, 0x0CBC }, { 0x0CBF, 0x0CBF }, { 0x0CC6, 0x0CC6 }, { 0x0CCC, 0x0CCD }, { 0x0CE2, 0x0CE3 }, { 0x0D41, 0x0D43 }, { 0x0D4D, 0x0D4D }, { 0x0DCA, 0x0DCA }, { 0x0DD2, 0x0DD4 }, { 0x0DD6, 0x0DD6 }, { 0x0E31, 0x0E31 }, { 0x0E34, 0x0E3A }, { 0x0E47, 0x0E4E }, { 0x0EB1, 0x0EB1 }, { 0x0EB4, 0x0EB9 }, { 0x0EBB, 0x0EBC }, { 0x0EC8, 0x0ECD }, { 0x0F18, 0x0F19 }, { 0x0F35, 0x0F35 }, { 0x0F37, 0x0F37 }, { 0x0F39, 0x0F39 }, { 0x0F71, 0x0F7E }, { 0x0F80, 0x0F84 }, { 0x0F86, 0x0F87 }, { 0x0F90, 0x0F97 }, { 0x0F99, 0x0FBC }, { 0x0FC6, 0x0FC6 }, { 0x102D, 0x1030 }, { 0x1032, 0x1032 }, { 0x1036, 0x1037 }, { 0x1039, 0x1039 }, { 0x1058, 0x1059 }, { 0x1160, 0x11FF }, { 0x135F, 0x135F }, { 0x1712, 0x1714 }, { 0x1732, 0x1734 }, { 0x1752, 0x1753 }, { 0x1772, 0x1773 }, { 0x17B4, 0x17B5 }, { 0x17B7, 0x17BD }, { 0x17C6, 0x17C6 }, { 0x17C9, 0x17D3 }, { 0x17DD, 0x17DD }, { 0x180B, 0x180D }, { 0x18A9, 0x18A9 }, { 0x1920, 0x1922 }, { 0x1927, 0x1928 }, { 0x1932, 0x1932 }, { 0x1939, 0x193B }, { 0x1A17, 0x1A18 }, { 0x1B00, 0x1B03 }, { 0x1B34, 0x1B34 }, { 0x1B36, 0x1B3A }, { 0x1B3C, 0x1B3C }, { 0x1B42, 0x1B42 }, { 0x1B6B, 0x1B73 }, { 0x1DC0, 0x1DCA }, { 0x1DFE, 0x1DFF }, { 0x200B, 0x200F }, { 0x202A, 0x202E }, { 0x2060, 0x2063 }, { 0x206A, 0x206F }, { 0x20D0, 0x20EF }, { 0x302A, 0x302F }, { 0x3099, 0x309A }, { 0xA806, 0xA806 }, { 0xA80B, 0xA80B }, { 0xA825, 0xA826 }, { 0xFB1E, 0xFB1E }, { 0xFE00, 0xFE0F }, { 0xFE20, 0xFE23 }, { 0xFEFF, 0xFEFF }, { 0xFFF9, 0xFFFB }, { 0x10A01, 0x10A03 }, { 0x10A05, 0x10A06 }, { 0x10A0C, 0x10A0F }, { 0x10A38, 0x10A3A }, { 0x10A3F, 0x10A3F }, { 0x1D167, 0x1D169 }, { 0x1D173, 0x1D182 }, { 0x1D185, 0x1D18B }, { 0x1D1AA, 0x1D1AD }, { 0x1D242, 0x1D244 }, { 0xE0001, 0xE0001 }, { 0xE0020, 0xE007F }, { 0xE0100, 0xE01EF } }; return bisearch(c, combining, sizeof(combining) / sizeof(struct interval) - 1); } static void comb_tofront(i) int i; { for (;;) { int root = i >= 0x700 ? 0x801 : 0x800; debug1("bring to front: %x\n", i); combchars[combchars[i]->prev]->next = combchars[i]->next; combchars[combchars[i]->next]->prev = combchars[i]->prev; combchars[i]->next = combchars[root]->next; combchars[i]->prev = root; combchars[combchars[root]->next]->prev = i; combchars[root]->next = i; i = combchars[i]->c1; if (i < 0xd800 || i >= 0xe000) return; i -= 0xd800; } } void utf8_handle_comb(c, mc) int c; struct mchar *mc; { int root, i, c1; int isdouble; c1 = mc->image | (mc->font << 8) | mc->fontx << 16; isdouble = c1 >= 0x1100 && utf8_isdouble(c1); if (!combchars) { combchars = (struct combchar **)calloc(0x802, sizeof(struct combchar *)); if (!combchars) return; combchars[0x800] = (struct combchar *)malloc(sizeof(struct combchar)); combchars[0x801] = (struct combchar *)malloc(sizeof(struct combchar)); if (!combchars[0x800] || !combchars[0x801]) { if (combchars[0x800]) free(combchars[0x800]); if (combchars[0x801]) free(combchars[0x801]); free(combchars); return; } combchars[0x800]->c1 = 0x000; combchars[0x800]->c2 = 0x700; combchars[0x800]->next = 0x800; combchars[0x800]->prev = 0x800; combchars[0x801]->c1 = 0x700; combchars[0x801]->c2 = 0x800; combchars[0x801]->next = 0x801; combchars[0x801]->prev = 0x801; } root = isdouble ? 0x801 : 0x800; for (i = combchars[root]->c1; i < combchars[root]->c2; i++) { if (!combchars[i]) break; if (combchars[i]->c1 == c1 && combchars[i]->c2 == c) break; } if (i == combchars[root]->c2) { /* full, recycle old entry */ if (c1 >= 0xd800 && c1 < 0xe000) comb_tofront(c1 - 0xd800); i = combchars[root]->prev; if (i == 0x800 || i == 0x801 || c1 == i + 0xd800) { /* completely full, can't recycle */ debug("utf8_handle_comp: completely full!\n"); mc->image = '?'; mc->font = 0; return; } /* FIXME: delete old char from all buffers */ } else if (!combchars[i]) { combchars[i] = (struct combchar *)malloc(sizeof(struct combchar)); if (!combchars[i]) return; combchars[i]->prev = i; combchars[i]->next = i; } combchars[i]->c1 = c1; combchars[i]->c2 = c; mc->image = i & 0xff; mc->font = (i >> 8) + 0xd8; mc->fontx = 0; debug3("combinig char %x %x -> %x\n", c1, c, i + 0xd800); comb_tofront(i); } #else /* !UTF8 */ void WinSwitchEncoding(p, encoding) struct win *p; int encoding; { p->w_encoding = encoding; return; } #endif /* UTF8 */ static int encmatch(s1, s2) char *s1; char *s2; { int c1, c2; do { c1 = (unsigned char)*s1; if (c1 >= 'A' && c1 <= 'Z') c1 += 'a' - 'A'; if (!(c1 >= 'a' && c1 <= 'z') && !(c1 >= '0' && c1 <= '9')) { s1++; continue; } c2 = (unsigned char)*s2; if (c2 >= 'A' && c2 <= 'Z') c2 += 'a' - 'A'; if (!(c2 >= 'a' && c2 <= 'z') && !(c2 >= '0' && c2 <= '9')) { s2++; continue; } if (c1 != c2) return 0; s1++; s2++; } while(c1); return 1; } int FindEncoding(name) char *name; { int encoding; debug1("FindEncoding %s\n", name); if (name == 0 || *name == 0) return 0; if (encmatch(name, "euc")) name = "eucJP"; if (encmatch(name, "off") || encmatch(name, "iso8859-1")) return 0; #ifndef UTF8 if (encmatch(name, "UTF-8")) return -1; #endif for (encoding = 0; encoding < (int)(sizeof(encodings)/sizeof(*encodings)); encoding++) if (encmatch(name, encodings[encoding].name)) { #ifdef UTF8 LoadFontTranslationsForEncoding(encoding); #endif return encoding; } return -1; } char * EncodingName(encoding) int encoding; { if (encoding >= (int)(sizeof(encodings)/sizeof(*encodings))) return 0; return encodings[encoding].name; } int EncodingDefFont(encoding) int encoding; { return encodings[encoding].deffont; } void ResetEncoding(p) struct win *p; { char *c; int encoding = p->w_encoding; c = encodings[encoding].charsets; if (c) SetCharsets(p, c); #ifdef UTF8 LoadFontTranslationsForEncoding(encoding); #endif if (encodings[encoding].usegr) { p->w_gr = 2; p->w_FontE = encodings[encoding].charsets[1]; } else p->w_FontE = 0; if (encodings[encoding].noc1) p->w_c1 = 0; } /* decoded char: 32-bit * fontx: non-bmp utf8 * c2: multi-byte character * font is always zero for utf8 * returns: -1 need more bytes * -2 decode error */ int DecodeChar(c, encoding, statep) int c; int encoding; int *statep; { int t; debug2("Decoding char %02x for encoding %d\n", c, encoding); #ifdef UTF8 if (encoding == UTF8) { c = FromUtf8(c, statep); if (c >= 0x10000) c = (c & 0x7f0000) << 8 | (c & 0xffff); return c; } #endif if (encoding == SJIS) { if (!*statep) { if ((0x81 <= c && c <= 0x9f) || (0xe0 <= c && c <= 0xef)) { *statep = c; return -1; } if (c < 0x80) return c; return c | (KANA << 16); } t = c; c = *statep; *statep = 0; if (0x40 <= t && t <= 0xfc && t != 0x7f) { if (c <= 0x9f) c = (c - 0x81) * 2 + 0x21; else c = (c - 0xc1) * 2 + 0x21; if (t <= 0x7e) t -= 0x1f; else if (t <= 0x9e) t -= 0x20; else t -= 0x7e, c++; return (c << 8) | t | (KANJI << 16); } return t; } if (encoding == EUC_JP || encoding == EUC_KR || encoding == EUC_CN) { if (!*statep) { if (c & 0x80) { *statep = c; return -1; } return c; } t = c; c = *statep; *statep = 0; if (encoding == EUC_JP) { if (c == 0x8e) return t | (KANA << 16); if (c == 0x8f) { *statep = t | (KANJI0212 << 8); return -1; } } c &= 0xff7f; t &= 0x7f; c = c << 8 | t; if (encoding == EUC_KR) return c | (3 << 16); if (encoding == EUC_CN) return c | (1 << 16); if (c & (KANJI0212 << 16)) return c; else return c | (KANJI << 16); } if (encoding == BIG5 || encoding == GBK) { if (!*statep) { if (c & 0x80) { if (encoding == GBK && c == 0x80) return 0xa4 | (('b'|0x80) << 16); *statep = c; return -1; } return c; } t = c; c = *statep; *statep = 0; c &= 0x7f; return c << 8 | t | (encoding == BIG5 ? 030 << 16 : 031 << 16); } return c | (encodings[encoding].deffont << 16); } int EncodeChar(bp, c, encoding, fontp) char *bp; int c; int encoding; int *fontp; { int t, f, l; debug2("Encoding char %02x for encoding %d\n", c, encoding); if (c == -1 && fontp) { if (*fontp == 0) return 0; if (bp) { *bp++ = 033; *bp++ = '('; *bp++ = 'B'; } return 3; } f = (c >> 16) & 0xff; #ifdef UTF8 if (encoding == UTF8) { if (f) { # ifdef DW_CHARS if (is_dw_font(f)) { int c2 = c & 0xff; c = (c >> 8 & 0xff) | (f << 8); c = recode_char_dw_to_encoding(c, &c2, encoding); } else # endif { c = (c & 0xff) | (f << 8); c = recode_char_to_encoding(c, encoding); } } return ToUtf8(bp, c); } if (f == 0 && (c & 0x7f00ff00) != 0) /* is_utf8? */ { if (c >= 0x10000) c = (c & 0x7f0000) >> 8 | (c & 0xffff); # ifdef DW_CHARS if (utf8_isdouble(c)) { int c2 = 0xffff; c = recode_char_dw_to_encoding(c, &c2, encoding); c = (c << 8) | (c2 & 0xff); } else # endif { c = recode_char_to_encoding(c, encoding); c = ((c & 0xff00) << 8) | (c & 0xff); } debug1("Encode: char mapped from utf8 to %x\n", c); f = c >> 16; } #endif if (f & 0x80) /* map special 96-fonts to latin1 */ f = 0; if (encoding == SJIS) { if (f == KANA) c = (c & 0xff) | 0x80; else if (f == KANJI) { if (!bp) return 2; t = c & 0xff; c = (c >> 8) & 0xff; t += (c & 1) ? ((t <= 0x5f) ? 0x1f : 0x20) : 0x7e; c = (c - 0x21) / 2 + ((c < 0x5f) ? 0x81 : 0xc1); *bp++ = c; *bp++ = t; return 2; } } if (encoding == EUC) { if (f == KANA) { if (bp) { *bp++ = 0x8e; *bp++ = c; } return 2; } if (f == KANJI) { if (bp) { *bp++ = (c >> 8) | 0x80; *bp++ = c | 0x80; } return 2; } if (f == KANJI0212) { if (bp) { *bp++ = 0x8f; *bp++ = c >> 8; *bp++ = c; } return 3; } } if ((encoding == EUC_KR && f == 3) || (encoding == EUC_CN && f == 1)) { if (bp) { *bp++ = (c >> 8) | 0x80; *bp++ = c | 0x80; } return 2; } if ((encoding == BIG5 && f == 030) || (encoding == GBK && f == 031)) { if (bp) { *bp++ = (c >> 8) | 0x80; *bp++ = c; } return 2; } if (encoding == GBK && f == 0 && c == 0xa4) c = 0x80; l = 0; if (fontp && f != *fontp) { *fontp = f; if (f && f < ' ') { if (bp) { *bp++ = 033; *bp++ = '$'; if (f > 2) *bp++ = '('; *bp++ = '@' + f; } l += f > 2 ? 4 : 3; } else if (f < 128) { if (f == 0) f = 'B'; if (bp) { *bp++ = 033; *bp++ = '('; *bp++ = f; } l += 3; } } if (c & 0xff00) { if (bp) *bp++ = c >> 8; l++; } if (bp) *bp++ = c; return l + 1; } int CanEncodeFont(encoding, f) int encoding, f; { switch(encoding) { #ifdef UTF8 case UTF8: return 1; #endif case SJIS: return f == KANJI || f == KANA; case EUC: return f == KANJI || f == KANA || f == KANJI0212; case EUC_KR: return f == 3; case EUC_CN: return f == 1; case BIG5: return f == 030; case GBK: return f == 031; default: break; } return 0; } #ifdef DW_CHARS int PrepareEncodedChar(c) int c; { int encoding; int t = 0; int f; encoding = D_encoding; f = D_rend.font; t = D_mbcs; if (encoding == SJIS) { if (f == KANA) return c | 0x80; else if (f == KANJI) { t += (c & 1) ? ((t <= 0x5f) ? 0x1f : 0x20) : 0x7e; c = (c - 0x21) / 2 + ((c < 0x5f) ? 0x81 : 0xc1); D_mbcs = t; } return c; } if (encoding == EUC) { if (f == KANA) { AddChar(0x8e); return c | 0x80; } if (f == KANJI) { D_mbcs = t | 0x80; return c | 0x80; } if (f == KANJI0212) { AddChar(0x8f); D_mbcs = t | 0x80; return c | 0x80; } } if ((encoding == EUC_KR && f == 3) || (encoding == EUC_CN && f == 1)) { D_mbcs = t | 0x80; return c | 0x80; } if ((encoding == BIG5 && f == 030) || (encoding == GBK && f == 031)) return c | 0x80; return c; } #endif int RecodeBuf(fbuf, flen, fenc, tenc, tbuf) unsigned char *fbuf; int flen; int fenc, tenc; unsigned char *tbuf; { int c, i, j; int decstate = 0, font = 0; for (i = j = 0; i < flen; i++) { c = fbuf[i]; c = DecodeChar(c, fenc, &decstate); if (c == -2) i--; if (c < 0) continue; j += EncodeChar(tbuf ? (char *)tbuf + j : 0, c, tenc, &font); } j += EncodeChar(tbuf ? (char *)tbuf + j : 0, -1, tenc, &font); return j; } #ifdef UTF8 int ContainsSpecialDeffont(ml, xs, xe, encoding) struct mline *ml; int xs, xe; int encoding; { unsigned char *f, *i; int c, x, dx; if (encoding == UTF8 || encodings[encoding].deffont == 0) return 0; i = ml->image + xs; f = ml->font + xs; dx = xe - xs + 1; while (dx-- > 0) { if (*f++) continue; c = *i++; x = recode_char_to_encoding(c | (encodings[encoding].deffont << 8), UTF8); if (c != x) { debug2("ContainsSpecialDeffont: yes %02x != %02x\n", c, x); return 1; } } debug("ContainsSpecialDeffont: no\n"); return 0; } int LoadFontTranslation(font, file) int font; char *file; { char buf[1024], *myfile; FILE *f; int i; int fo; int x, u, c, ok; unsigned short (*p)[2], (*tab)[2]; myfile = file; if (myfile == 0) { if (font == 0 || screenencodings == 0) return -1; if (strlen(screenencodings) > sizeof(buf) - 10) return -1; sprintf(buf, "%s/%02x", screenencodings, font & 0xff); myfile = buf; } debug1("LoadFontTranslation: trying %s\n", myfile); if ((f = secfopen(myfile, "r")) == 0) return -1; i = ok = 0; for (;;) { for(; i < 12; i++) if (getc(f) != "ScreenI2UTF8"[i]) break; if (getc(f) != 0) /* format */ break; fo = getc(f); /* id */ if (fo == EOF) break; if (font != -1 && font != fo) break; i = getc(f); x = getc(f); if (x == EOF) break; i = i << 8 | x; getc(f); while ((x = getc(f)) && x != EOF) getc(f); /* skip font name (padded to 2 bytes) */ if ((p = malloc(sizeof(*p) * (i + 1))) == 0) break; tab = p; while(i > 0) { x = getc(f); x = x << 8 | getc(f); u = getc(f); c = getc(f); u = u << 8 | c; if (c == EOF) break; (*p)[0] = x; (*p)[1] = u; p++; i--; } (*p)[0] = 0; (*p)[1] = 0; if (i || (tab[0][0] & 0x8000)) { free(tab); break; } if (recodetabs[fo].tab && (recodetabs[fo].flags & RECODETAB_ALLOCED) != 0) free(recodetabs[fo].tab); recodetabs[fo].tab = tab; recodetabs[fo].flags = RECODETAB_ALLOCED; debug1("Successful load of recodetab %02x\n", fo); c = getc(f); if (c == EOF) { ok = 1; break; } if (c != 'S') break; i = 1; } fclose(f); if (font != -1 && file == 0 && recodetabs[font].flags == 0) recodetabs[font].flags = RECODETAB_TRIED; return ok ? 0 : -1; } void LoadFontTranslationsForEncoding(encoding) int encoding; { char *c; int f; debug1("LoadFontTranslationsForEncoding: encoding %d\n", encoding); if ((c = encodings[encoding].fontlist) != 0) while ((f = (unsigned char)*c++) != 0) if (recodetabs[f].flags == 0) LoadFontTranslation(f, 0); f = encodings[encoding].deffont; if (f > 0 && recodetabs[f].flags == 0) LoadFontTranslation(f, 0); } #endif /* UTF8 */ #else /* !ENCODINGS */ /* Simple version of EncodeChar to encode font changes for * copy/paste mode */ int EncodeChar(bp, c, encoding, fontp) char *bp; int c; int encoding; int *fontp; { int f, l; f = (c == -1) ? 0 : c >> 16; l = 0; if (fontp && f != *fontp) { *fontp = f; if (f && f < ' ') { if (bp) { *bp++ = 033; *bp++ = '$'; if (f > 2) *bp++ = '('; *bp++ = '@' + f; } l += f > 2 ? 4 : 3; } else if (f < 128) { if (f == 0) f = 'B'; if (bp) { *bp++ = 033; *bp++ = '('; *bp++ = f; } l += 3; } } if (c == -1) return l; if (c & 0xff00) { if (bp) *bp++ = c >> 8; l++; } if (bp) *bp++ = c; return l + 1; } #endif /* ENCODINGS */