summaryrefslogtreecommitdiffstats
path: root/src/share/utf8/charset_test.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/share/utf8/charset_test.c')
-rw-r--r--src/share/utf8/charset_test.c263
1 files changed, 263 insertions, 0 deletions
diff --git a/src/share/utf8/charset_test.c b/src/share/utf8/charset_test.c
new file mode 100644
index 0000000..6761100
--- /dev/null
+++ b/src/share/utf8/charset_test.c
@@ -0,0 +1,263 @@
+/*
+ * Copyright (C) 2001 Edmund Grimley Evans <edmundo@rano.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif
+
+#include <assert.h>
+#include <string.h>
+
+#include "charset.h"
+
+void test_any(struct charset *charset)
+{
+ int wc;
+ char s[2];
+
+ assert(charset);
+
+ /* Decoder */
+
+ assert(charset_mbtowc(charset, 0, 0, 0) == 0);
+ assert(charset_mbtowc(charset, 0, 0, 1) == 0);
+ assert(charset_mbtowc(charset, 0, (char *)(-1), 0) == 0);
+
+ assert(charset_mbtowc(charset, 0, "a", 0) == 0);
+ assert(charset_mbtowc(charset, 0, "", 1) == 0);
+ assert(charset_mbtowc(charset, 0, "b", 1) == 1);
+ assert(charset_mbtowc(charset, 0, "", 2) == 0);
+ assert(charset_mbtowc(charset, 0, "c", 2) == 1);
+
+ wc = 'x';
+ assert(charset_mbtowc(charset, &wc, "a", 0) == 0 && wc == 'x');
+ assert(charset_mbtowc(charset, &wc, "", 1) == 0 && wc == 0);
+ assert(charset_mbtowc(charset, &wc, "b", 1) == 1 && wc == 'b');
+ assert(charset_mbtowc(charset, &wc, "", 2) == 0 && wc == 0);
+ assert(charset_mbtowc(charset, &wc, "c", 2) == 1 && wc == 'c');
+
+ /* Encoder */
+
+ assert(charset_wctomb(charset, 0, 0) == 0);
+
+ s[0] = s[1] = '.';
+ assert(charset_wctomb(charset, s, 0) == 1 &&
+ s[0] == '\0' && s[1] == '.');
+ assert(charset_wctomb(charset, s, 'x') == 1 &&
+ s[0] == 'x' && s[1] == '.');
+}
+
+void test_utf8()
+{
+ struct charset *charset;
+ int wc;
+ char s[8];
+
+ charset = charset_find("UTF-8");
+ test_any(charset);
+
+ /* Decoder */
+ wc = 0;
+ assert(charset_mbtowc(charset, &wc, "\177", 1) == 1 && wc == 127);
+ assert(charset_mbtowc(charset, &wc, "\200", 2) == -1);
+ assert(charset_mbtowc(charset, &wc, "\301\277", 9) == -1);
+ assert(charset_mbtowc(charset, &wc, "\302\200", 1) == -1);
+ assert(charset_mbtowc(charset, &wc, "\302\200", 2) == 2 && wc == 128);
+ assert(charset_mbtowc(charset, &wc, "\302\200", 3) == 2 && wc == 128);
+ assert(charset_mbtowc(charset, &wc, "\340\237\200", 9) == -1);
+ assert(charset_mbtowc(charset, &wc, "\340\240\200", 9) == 3 &&
+ wc == 1 << 11);
+ assert(charset_mbtowc(charset, &wc, "\360\217\277\277", 9) == -1);
+ assert(charset_mbtowc(charset, &wc, "\360\220\200\200", 9) == 4 &&
+ wc == 1 << 16);
+ assert(charset_mbtowc(charset, &wc, "\370\207\277\277\277", 9) == -1);
+ assert(charset_mbtowc(charset, &wc, "\370\210\200\200\200", 9) == 5 &&
+ wc == 1 << 21);
+ assert(charset_mbtowc(charset, &wc, "\374\203\277\277\277\277", 9) == -1);
+ assert(charset_mbtowc(charset, &wc, "\374\204\200\200\200\200", 9) == 6 &&
+ wc == 1 << 26);
+ assert(charset_mbtowc(charset, &wc, "\375\277\277\277\277\277", 9) == 6 &&
+ wc == 0x7fffffff);
+
+ assert(charset_mbtowc(charset, &wc, "\302\000", 2) == -1);
+ assert(charset_mbtowc(charset, &wc, "\302\300", 2) == -1);
+ assert(charset_mbtowc(charset, &wc, "\340\040\200", 9) == -1);
+ assert(charset_mbtowc(charset, &wc, "\340\340\200", 9) == -1);
+ assert(charset_mbtowc(charset, &wc, "\340\240\000", 9) == -1);
+ assert(charset_mbtowc(charset, &wc, "\340\240\300", 9) == -1);
+ assert(charset_mbtowc(charset, &wc, "\360\020\200\200", 9) == -1);
+ assert(charset_mbtowc(charset, &wc, "\360\320\200\200", 9) == -1);
+ assert(charset_mbtowc(charset, &wc, "\360\220\000\200", 9) == -1);
+ assert(charset_mbtowc(charset, &wc, "\360\220\300\200", 9) == -1);
+ assert(charset_mbtowc(charset, &wc, "\360\220\200\000", 9) == -1);
+ assert(charset_mbtowc(charset, &wc, "\360\220\200\300", 9) == -1);
+ assert(charset_mbtowc(charset, &wc, "\375\077\277\277\277\277", 9) == -1);
+ assert(charset_mbtowc(charset, &wc, "\375\377\277\277\277\277", 9) == -1);
+ assert(charset_mbtowc(charset, &wc, "\375\277\077\277\277\277", 9) == -1);
+ assert(charset_mbtowc(charset, &wc, "\375\277\377\277\277\277", 9) == -1);
+ assert(charset_mbtowc(charset, &wc, "\375\277\277\277\077\277", 9) == -1);
+ assert(charset_mbtowc(charset, &wc, "\375\277\277\277\377\277", 9) == -1);
+ assert(charset_mbtowc(charset, &wc, "\375\277\277\277\277\077", 9) == -1);
+ assert(charset_mbtowc(charset, &wc, "\375\277\277\277\277\377", 9) == -1);
+
+ assert(charset_mbtowc(charset, &wc, "\376\277\277\277\277\277", 9) == -1);
+ assert(charset_mbtowc(charset, &wc, "\377\277\277\277\277\277", 9) == -1);
+
+ /* Encoder */
+ safe_strncpy(s, ".......", sizeof(s));
+ assert(charset_wctomb(charset, s, 1u << 31) == -1 &&
+ !strcmp(s, "......."));
+ assert(charset_wctomb(charset, s, 127) == 1 &&
+ !strcmp(s, "\177......"));
+ assert(charset_wctomb(charset, s, 128) == 2 &&
+ !strcmp(s, "\302\200....."));
+ assert(charset_wctomb(charset, s, 0x7ff) == 2 &&
+ !strcmp(s, "\337\277....."));
+ assert(charset_wctomb(charset, s, 0x800) == 3 &&
+ !strcmp(s, "\340\240\200...."));
+ assert(charset_wctomb(charset, s, 0xffff) == 3 &&
+ !strcmp(s, "\357\277\277...."));
+ assert(charset_wctomb(charset, s, 0x10000) == 4 &&
+ !strcmp(s, "\360\220\200\200..."));
+ assert(charset_wctomb(charset, s, 0x1fffff) == 4 &&
+ !strcmp(s, "\367\277\277\277..."));
+ assert(charset_wctomb(charset, s, 0x200000) == 5 &&
+ !strcmp(s, "\370\210\200\200\200.."));
+ assert(charset_wctomb(charset, s, 0x3ffffff) == 5 &&
+ !strcmp(s, "\373\277\277\277\277.."));
+ assert(charset_wctomb(charset, s, 0x4000000) == 6 &&
+ !strcmp(s, "\374\204\200\200\200\200."));
+ assert(charset_wctomb(charset, s, 0x7fffffff) == 6 &&
+ !strcmp(s, "\375\277\277\277\277\277."));
+}
+
+void test_ascii()
+{
+ struct charset *charset;
+ int wc;
+ char s[3];
+
+ charset = charset_find("us-ascii");
+ test_any(charset);
+
+ /* Decoder */
+ wc = 0;
+ assert(charset_mbtowc(charset, &wc, "\177", 2) == 1 && wc == 127);
+ assert(charset_mbtowc(charset, &wc, "\200", 2) == -1);
+
+ /* Encoder */
+ safe_strncpy(s, "..", sizeof(s));
+ assert(charset_wctomb(charset, s, 256) == -1 && !strcmp(s, ".."));
+ assert(charset_wctomb(charset, s, 255) == -1);
+ assert(charset_wctomb(charset, s, 128) == -1);
+ assert(charset_wctomb(charset, s, 127) == 1 && !strcmp(s, "\177."));
+}
+
+void test_iso1()
+{
+ struct charset *charset;
+ int wc;
+ char s[3];
+
+ charset = charset_find("iso-8859-1");
+ test_any(charset);
+
+ /* Decoder */
+ wc = 0;
+ assert(charset_mbtowc(charset, &wc, "\302\200", 9) == 1 && wc == 0xc2);
+
+ /* Encoder */
+ safe_strncpy(s, "..", sizeof(s));
+ assert(charset_wctomb(charset, s, 256) == -1 && !strcmp(s, ".."));
+ assert(charset_wctomb(charset, s, 255) == 1 && !strcmp(s, "\377."));
+ assert(charset_wctomb(charset, s, 128) == 1 && !strcmp(s, "\200."));
+}
+
+void test_iso2()
+{
+ struct charset *charset;
+ int wc;
+ char s[3];
+
+ charset = charset_find("iso-8859-2");
+ test_any(charset);
+
+ /* Decoder */
+ wc = 0;
+ assert(charset_mbtowc(charset, &wc, "\302\200", 9) == 1 && wc == 0xc2);
+ assert(charset_mbtowc(charset, &wc, "\377", 2) == 1 && wc == 0x2d9);
+
+ /* Encoder */
+ safe_strncpy(s, "..", sizeof(s));
+ assert(charset_wctomb(charset, s, 256) == -1 && !strcmp(s, ".."));
+ assert(charset_wctomb(charset, s, 255) == -1 && !strcmp(s, ".."));
+ assert(charset_wctomb(charset, s, 258) == 1 && !strcmp(s, "\303."));
+ assert(charset_wctomb(charset, s, 128) == 1 && !strcmp(s, "\200."));
+}
+
+void test_convert()
+{
+ const char *p;
+ char *q, *r;
+ char s[256];
+ size_t n, n2;
+ int i;
+
+ p = "\000x\302\200\375\277\277\277\277\277";
+ assert(charset_convert("UTF-8", "UTF-8", p, 10, &q, &n) == 0 &&
+ n == 10 && !strcmp(p, q));
+ assert(charset_convert("UTF-8", "UTF-8", "x\301\277y", 4, &q, &n) == 2 &&
+ n == 4 && !strcmp(q, "x##y"));
+ assert(charset_convert("UTF-8", "UTF-8", "x\301\277y", 4, 0, &n) == 2 &&
+ n == 4);
+ assert(charset_convert("UTF-8", "UTF-8", "x\301\277y", 4, &q, 0) == 2 &&
+ !strcmp(q, "x##y"));
+ assert(charset_convert("UTF-8", "iso-8859-1",
+ "\302\200\304\200x", 5, &q, &n) == 1 &&
+ n == 3 && !strcmp(q, "\200?x"));
+ assert(charset_convert("iso-8859-1", "UTF-8",
+ "\000\200\377", 3, &q, &n) == 0 &&
+ n == 5 && !memcmp(q, "\000\302\200\303\277", 5));
+ assert(charset_convert("iso-8859-1", "iso-8859-1",
+ "\000\200\377", 3, &q, &n) == 0 &&
+ n == 3 && !memcmp(q, "\000\200\377", 3));
+
+ assert(charset_convert("iso-8859-2", "utf-8", "\300", 1, &q, &n) == 0 &&
+ n == 2 && !strcmp(q, "\305\224"));
+ assert(charset_convert("utf-8", "iso-8859-2", "\305\224", 2, &q, &n) == 0 &&
+ n == 1 && !strcmp(q, "\300"));
+
+ for (i = 0; i < 256; i++)
+ s[i] = i;
+
+ assert(charset_convert("iso-8859-2", "utf-8", s, 256, &q, &n) == 0);
+ assert(charset_convert("utf-8", "iso-8859-2", q, n, &r, &n2) == 0);
+ assert(n2 == 256 && !memcmp(r, s, n2));
+}
+
+int main()
+{
+ test_utf8();
+ test_ascii();
+ test_iso1();
+ test_iso2();
+
+ test_convert();
+
+ return 0;
+}