summaryrefslogtreecommitdiffstats
path: root/tools/make_charset_table.c
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--tools/make_charset_table.c125
1 files changed, 125 insertions, 0 deletions
diff --git a/tools/make_charset_table.c b/tools/make_charset_table.c
new file mode 100644
index 0000000..27d921a
--- /dev/null
+++ b/tools/make_charset_table.c
@@ -0,0 +1,125 @@
+/* make_charset_table.c
+ * sample program to generate tables for charsets.c using iconv
+ *
+ * public domain
+ */
+
+#include <stdio.h>
+#include <stdint.h>
+#include <errno.h>
+#include <iconv.h>
+
+#define UNREPL 0xFFFD
+
+int main(int argc, char **argv) {
+ /* for now only UCS-2 */
+ uint16_t table[0x100];
+
+ iconv_t conv;
+ const char *charset;
+ int i, j;
+
+ /* 0x00 ... 0x7F same as ASCII? */
+ int ascii_based = 1;
+ /* 0x00 ... 0x9F same as ISO? */
+ int iso_based = 1;
+
+ if (argc != 2) {
+ printf("usage: %s <charset>\n", argv[0]);
+ return 1;
+ }
+
+ charset = argv[1];
+
+ conv = iconv_open("UCS-2", charset);
+ if (conv == (iconv_t) -1) {
+ perror("iconv_open");
+ return 2;
+ }
+ iconv_close(conv);
+
+ for (i = 0x00; i < 0x100; i++) {
+ unsigned char in[1], out[2];
+ size_t inlen = 1, outlen = 2;
+
+ char *inbuf = (char *) in;
+ char *outbuf = (char *) out;
+
+ size_t ret;
+
+ in[0] = i;
+
+ conv = iconv_open("UCS-2BE", charset);
+
+ if (conv == (iconv_t) -1) {
+ /* shouldn't fail now */
+ perror("iconv_open");
+ return 2;
+ }
+
+ ret = iconv(conv, &inbuf, &inlen, &outbuf, &outlen);
+
+ if (ret == (size_t) -1 && errno == EILSEQ) {
+ table[i] = UNREPL;
+ iconv_close(conv);
+ continue;
+ }
+
+ if (ret == (size_t) -1) {
+ perror("iconv");
+ iconv_close(conv);
+ return 4;
+ }
+
+ iconv_close(conv);
+
+ if (ret != 0 || inlen != 0 || outlen != 0) {
+ fprintf(stderr, "%d: smth went wrong: %zu %zu %zu\n", i, ret, inlen, outlen);
+ return 3;
+ }
+
+ if (i < 0x80 && (out[0] != 0 || out[1] != i))
+ ascii_based = 0;
+
+ if (i < 0xA0 && (out[0] != 0 || out[1] != i))
+ iso_based = 0;
+
+ table[i] = (out[0] << 8) | out[1];
+ }
+
+ /* iso_based not supported */
+ iso_based = 0;
+
+ printf("/* generated by %s %s */\n", argv[0], charset);
+
+ if (iso_based)
+ i = 0xA0;
+ else if (ascii_based)
+ i = 0x80;
+ else
+ i = 0;
+
+ printf("const gunichar2 charset_table_%s[0x%x] = {\n", charset, 0x100 - i);
+ while (i < 0x100) {
+ int start = i;
+
+ printf(" ");
+
+ for (j = 0; j < 8; j++, i++) {
+ if (table[i] == UNREPL)
+ printf("UNREPL, ");
+ else
+ printf("0x%.4x, ", table[i]);
+ }
+
+ if ((start & 0xf) == 0)
+ printf(" /* 0x%.2X - */", start);
+ else
+ printf(" /* - 0x%.2X */", i - 1);
+
+ printf("\n");
+ }
+ printf("};\n");
+
+ return 0;
+}